From 5aae8ccce48ee22068f7497d33a7af3ae47cdd1e Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Mon, 18 Nov 2024 12:45:41 +0100
Subject: [PATCH 01/41] Reduce WMOPS by approx. 300 for test case in issue
 #1010

---
 lib_com/basop_util.c   |  45 +++++-
 lib_dec/ivas_svd_dec.c | 326 ++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 368 insertions(+), 3 deletions(-)

diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c
index c465428fc..a0624a6b4 100644
--- a/lib_com/basop_util.c
+++ b/lib_com/basop_util.c
@@ -1038,9 +1038,52 @@ Word32 div_w( Word32 L_num, Word32 L_den )
     }
 }
 
+#define OPT_BASOP_Util_Divide3232_Scale_cadence
+
+#ifdef OPT_BASOP_Util_Divide3232_Scale_cadence
+static
+Word32 BASOP_Util_Inv32(Word32 x, Word16 *px_e)
+{
+    Word16 sign, shift;
+
+    sign = 0;
+    move16();
+    if (x < 0) {
+        sign = 1;
+    }
+    if (sign) {
+        x = L_negate(x);
+    }
+
+    shift = norm_l(x);
+    x = L_shl(x, shift);
+    *px_e = 0;
+    move16();
+    x = ISqrt32norm(x, px_e);
+    x = Mpy_32_32(x, x);
+    *px_e = add(shl(*px_e, 1), shift);
+    move16();
+
+    if (sign) {
+        x = L_negate(x);
+    }
+    return x;
+}
+#endif
+
 Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s )
 {
     Word32 z;
+
+#ifdef OPT_BASOP_Util_Divide3232_Scale_cadence
+    Word16 shift, s2;
+    z = BASOP_Util_Inv32(y, &s2);
+    shift = norm_l(x);
+    z = Mpy_32_32_r(L_shl(x, shift), z);
+    *s = sub(s2, shift);
+    move16();
+#else
+
     Word16 sx;
     Word16 sy;
     Word32 sign;
@@ -1086,7 +1129,7 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s )
     {
         z = L_negate( z );
     }
-
+#endif
     return z;
 }
 
diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index 801bb9e2b..a89e2ee3d 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -62,6 +62,17 @@
 #define SVD_ZERO_FLUSH_THRESHOLD_FX ( 0 )
 #define CONVERGENCE_FACTOR_FX       214748 /* factor for SVD convergence (as per latest float code: 1.0e-04f) */
 #endif
+
+#if 1
+#define OPT_DIV
+#define OPT_DIV_NORM /* 5 dB SNR precision improvement */
+#define OPT_SUM /* Very little WMOPS savings */
+#define OPT_SUM2 /* Very little WMOPS savings */
+
+#define OPT_GIVENS
+#define OPT_GIVENS_INV
+#endif
+
 /*-----------------------------------------------------------------------*
  * Local function prototypes
  *-----------------------------------------------------------------------*/
@@ -197,6 +208,18 @@ static void ApplyRotation_fx(
     const Word16 nChannels      /* Q0 */
 );
 
+#ifdef OPT_GIVENS_INV
+static void GivensRotation2_fx(
+    const Word32 x, /* exp(x_e) */
+    const Word16 x_e,
+    const Word32 z, /* exp(z_e) */
+    const Word16 z_e,
+    Word32 *result,
+    Word32 *resultInv,
+    Word16 *out_e,
+    Word16 *outInv_e );
+#endif
+
 static Word32 GivensRotation_fx(
     const Word32 x, /* exp(x_e) */
     const Word16 x_e,
@@ -617,6 +640,9 @@ static Word16 BidagonalDiagonalisation_fx(
     Word16 convergence, iteration, found_split;
     Word16 error = 0;
     move16();
+#ifdef OPT_GIVENS_INV
+    Word32 temp;
+#endif
     Word16 singularValues_new_e[MAX_OUTPUT_CHANNELS], secDiag_new_e[MAX_OUTPUT_CHANNELS];
     Copy( singularValues_fx_e, singularValues_new_e, MAX_OUTPUT_CHANNELS );
     set16_fx( secDiag_new_e, *secDiag_fx_e, MAX_OUTPUT_CHANNELS );
@@ -687,17 +713,28 @@ static Word16 BidagonalDiagonalisation_fx(
 
                     c = singularValues_fx[kCh]; /* exp(singularValues_new_e) */
                     c_e = singularValues_new_e[kCh];
+#ifdef OPT_GIVENS_INV
+                    GivensRotation2_fx( g, g_e, singularValues_fx[kCh], singularValues_new_e[kCh], &singularValues_fx[kCh], &temp, &singularValues_new_e[kCh], &temp_exp ); /* exp(singularValues_new_e) */
+                    c = Mpy_32_32( c, temp );
+                    c_e = add(c_e, temp_exp);
+#else
                     singularValues_fx[kCh] = GivensRotation_fx( g, g_e, singularValues_fx[kCh], singularValues_new_e[kCh], &singularValues_new_e[kCh] ); /* exp(singularValues_new_e) */
                     c = BASOP_Util_Divide3232_Scale_cadence( c, maxWithSign_fx( singularValues_fx[kCh] ), &temp_exp );                                   /* exp(temp_exp + (c_e - singularValues_new_e)) */
                     c_e = add( temp_exp, sub( c_e, singularValues_new_e[kCh] ) );
+#endif
                     IF( c_e > 0 )
                     {
                         c = L_shl_sat( c, c_e ); // Q31
                         c_e = 0;
                         move16();
                     }
+#ifdef OPT_GIVENS_INV
+                    s = Mpy_32_32( -g, temp );
+                    s_e = add( g_e, temp_exp );
+#else
                     s = BASOP_Util_Divide3232_Scale_cadence( -g, maxWithSign_fx( singularValues_fx[kCh] ), &temp_exp ); /* exp(temp_exp + (g_e - singularValues_new_e))*/
                     s_e = add( temp_exp, sub( g_e, singularValues_new_e[kCh] ) );
+#endif
                     IF( s_e > 0 )
                     {
                         s = L_shl_sat( s, s_e ); // Q31
@@ -905,6 +942,10 @@ static void ApplyQRTransform_fx(
     const Word16 nChannelsC    /* i  : number of columns in the matrix to be decomposed			Q0*/
 )
 {
+#ifdef OPT_GIVENS_INV
+    Word32 temp;
+    Word16 temp_e;
+#endif
     Word16 ch, split;
     Word32 d = 0, g = 0, r = 0, x_ii = 0, x_split = 0, x_kk = 0, mu = 0, aux = 0;
     move32();
@@ -1007,18 +1048,29 @@ static void ApplyQRTransform_fx(
         g = Mpy_32_32( c, secDiag[ch + 1] ); /* exp(c_e + secDiag_e) */
         g_e = add( c_e, secDiag_e[ch + 1] );
 
+#ifdef OPT_GIVENS_INV
+        GivensRotation2_fx( d, d_e, r, r_e, &secDiag[ch], &temp, &secDiag_e[ch], &temp_e ); /* exp(secDiag_e) */
+        c = Mpy_32_32( d, temp);
+        c_e = add(temp_e, d_e);
+#else
         secDiag[ch] = GivensRotation_fx( d, d_e, r, r_e, &secDiag_e[ch] ); /* exp(secDiag_e) */
         move32();
         c = BASOP_Util_Divide3232_Scale_cadence( d, maxWithSign_fx( secDiag[ch] ), &c_e ); /* exp(c_e + (d_e + secDiag_e)) */
         c_e = add( c_e, sub( d_e, secDiag_e[ch] ) );
+#endif
         IF( c_e > 0 )
         {
             c = L_shl_sat( c, c_e ); // Q31
             c_e = 0;
             move16();
         }
+#ifdef OPT_GIVENS_INV
+        s = Mpy_32_32( r, temp );
+        s_e = add(r_e, temp_e);
+#else
         s = BASOP_Util_Divide3232_Scale_cadence( r, maxWithSign_fx( secDiag[ch] ), &s_e ); /* exp(s_e + (r_e - sec_Diag_e))*/
         s_e = add( s_e, sub( r_e, secDiag_e[ch] ) );
+#endif
         IF( s_e > 0 )
         {
             s = L_shl_sat( s, s_e ); // Q31
@@ -1039,12 +1091,18 @@ static void ApplyQRTransform_fx(
         // ApplyRotation(singularVectors_Right, c, s, x_ii, aux, &d, &g, ch + 1, ch, nChannelsC);
         ApplyRotation_fx( singularVectors_Right, c, c_e, s, s_e, x_ii, x_ii_e, aux, aux_e, &d, &d_e, &g, &g_e, ch + 1, ch, nChannelsC );
 
+#ifdef OPT_GIVENS_INV
+        GivensRotation2_fx( d, d_e, r, r_e, &singularValues[ch], &aux, &singularValues_e[ch], &aux_e ); /* exp(singularValues_e) */
+#else
         singularValues[ch] = GivensRotation_fx( d, d_e, r, r_e, &singularValues_e[ch] ); /* exp(singularValues_e) */
         move32();
+#endif
         IF( GT_32( L_abs( singularValues[ch] ), Mpy_32_32( CONVERGENCE_FACTOR_FX, L_abs( singularValues[ch] ) ) ) )
         {
+#ifndef OPT_GIVENS_INV
             aux = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, singularValues[ch], &aux_e ); /* exp(aux_e + (1 - singularValues_e)) */
             aux_e = add( aux_e, sub( 1, singularValues_e[ch] ) );
+#endif
 
             c = Mpy_32_32( d, aux ); /* exp(d_e + aux_e) */
             c_e = add( d_e, aux_e );
@@ -1317,6 +1375,98 @@ static void HouseholderReduction(
  *
  *-------------------------------------------------------------------------*/
 
+#ifdef OPT_DIV
+static
+Word32 BASOP_Util_Inv32(Word32 x, Word16 *px_e)
+{
+    Word16 sign, shift;
+
+    sign = 0;
+    move16();
+    if (x < 0) {
+        sign = 1;
+    }
+    if (sign) {
+        x = L_negate(x);
+    }
+
+    shift = norm_l(x);
+    x = L_shl(x, shift);
+    *px_e = 0;
+    move16();
+    x = ISqrt32norm(x, px_e);
+    x = Mpy_32_32(x, x);
+    *px_e = add(shl(*px_e, 1), shift);
+    move16();
+
+    if (sign) {
+        x = L_negate(x);
+    }
+    return x;
+}
+#endif
+
+#ifdef OPT_SUM
+static
+Word32 BASOP_Util_Accu_Mant32Exp /* o  : normalized result mantissa */
+    ( Word32 a_m,                /* i  : Mantissa of 1st operand a  */
+      Word16 *p_a_e,               /* i/o  : Exponent of 1st operand a  */
+      Word32 b_m,                /* i  : Mantissa of 2nd operand b  */
+      Word16 b_e                 /* i  : Exponent of 2nd operand b  */
+    )
+{
+    Word32 L_tmp;
+    Word16 shift, a_e = *p_a_e;
+
+    /* Compare exponents: the difference is limited to +/- 30
+       The Word32 mantissa of the operand with lower exponent is shifted right by the exponent difference.
+       Then, the unshifted mantissa of the operand with the higher exponent is added. The addition result
+       is normalized and the result represents the mantissa to return. The returned exponent takes into
+       account all shift operations.
+    */
+
+#if 0
+    if ( !a_m )
+        a_e = add( b_e, 0 );
+#endif
+    if ( !b_m )
+        b_e = add( a_e, 0 );
+
+    shift = sub( a_e, b_e );
+#if 0
+    shift = s_max( -31, shift );
+    shift = s_min( 31, shift );
+#endif
+    if ( shift < 0 )
+    {
+        /* exponent of b is greater than exponent of a, shr a_m */
+        a_m = L_shl( a_m, shift );
+    }
+    if ( shift > 0 )
+    {
+        /* exponent of a is greater than exponent of b */
+        b_m = L_shr( b_m, shift );
+    }
+    a_e = add( s_max( a_e, b_e ), 1 );
+    L_tmp = L_add( L_shr( a_m, 1 ), L_shr( b_m, 1 ) );
+#if 1
+    shift = norm_l( L_tmp );
+    if ( shift )
+        L_tmp = L_shl( L_tmp, shift );
+#if 0
+    if ( L_tmp == 0 )
+        a_e = add( 0, 0 );
+#endif
+    if ( L_tmp != 0 )
+        a_e = sub( a_e, shift );
+#endif
+    *p_a_e = a_e;
+
+    return ( L_tmp );
+}
+
+#endif
+
 #ifdef IVAS_FLOAT_FIXED
 static void biDiagonalReductionLeft_fx(
     Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
@@ -1373,27 +1523,66 @@ move32();
 
 IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
 {
+#ifdef OPT_SUM2
+    *sig_x_e = -31;
+    move16();
+#endif
+
     idx = currChannel;
     move16();
 
     FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
     {
+#ifdef OPT_SUM2
+        ( *sig_x ) = BASOP_Util_Accu_Mant32Exp( *sig_x, sig_x_e, L_abs( singularVectors[jCh][currChannel] ), *singularVectors_e ); /* exp(sig_x_e) */
+#else
         ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[jCh][currChannel] ), *singularVectors_e, sig_x_e ); /* exp(sig_x_e) */
+#endif
     }
 
     IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
     {
+#ifdef OPT_DIV
+        Word16 invVal_e, temp_e;
+        Word32 invVal = BASOP_Util_Inv32(maxWithSign_fx( *sig_x ), &invVal_e);
+#ifdef OPT_DIV_NORM
+        temp_e = norm_l(invVal);
+        invVal = L_shl(invVal, temp_e);
+        invVal_e = sub(invVal_e, temp_e);
+#endif
+#endif
         norm_x = 0;
         move32();
+#ifdef OPT_SUM2
+        norm_x_e = -31;
+#else
         norm_x_e = 0;
+#endif
         move16();
         FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
         {
+#ifndef OPT_DIV
             singularVectors[jCh][currChannel] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[jCh][currChannel], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
+#else
+            temp_e = norm_l(singularVectors[jCh][currChannel]);
+            singularVectors[jCh][currChannel] = L_shl(singularVectors[jCh][currChannel], temp_e);
+            singularVectors[jCh][currChannel] = Mpy_32_32( singularVectors[jCh][currChannel], invVal); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
+            sing_exp[jCh] = sub(invVal_e, temp_e);
+#ifdef OPT_DIV_NORM
+            temp_e = norm_l(singularVectors[jCh][currChannel]);
+            singularVectors[jCh][currChannel] = L_shl(singularVectors[jCh][currChannel], temp_e);
+            sing_exp[jCh] = sub(sing_exp[jCh], temp_e);
+#endif
+            move16();
+#endif
             move32();
             sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) );
             move16();
+#ifdef OPT_SUM2
+            norm_x = BASOP_Util_Accu_Mant32Exp( norm_x, &norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ) ); /* exp(norm_x_e) */
+#else
             norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
+#endif
         }
         IF( GT_16( norm_x_e, 0 ) )
         {
@@ -1426,19 +1615,46 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
 #endif
         move32();
 
+#ifdef OPT_DIV
+        invVal = BASOP_Util_Inv32(maxWithSign_fx( r ), &invVal_e);
+#ifdef OPT_DIV_NORM
+        temp_e = norm_l(invVal);
+        invVal = L_shl(invVal, temp_e);
+        invVal_e = sub(invVal_e, temp_e);
+#endif
+#endif
+
         FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
         {
             norm_x = 0;
             move32();
+#ifdef OPT_SUM2
+            norm_x_e = -31;
+#else
             norm_x_e = 0;
+#endif
             move16();
             FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
             {
+#ifdef OPT_SUM2
+                norm_x = BASOP_Util_Accu_Mant32Exp( norm_x, &norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( sing_exp[jCh], *singularVectors_e )); /* exp(norm_x_e) */
+#else
                 norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( sing_exp[jCh], *singularVectors_e ), &norm_x_e ); /* exp(norm_x_e) */
+#endif
             }
 
+#ifndef OPT_DIV
             f = BASOP_Util_Divide3232_Scale_cadence( norm_x, maxWithSign_fx( r ), &f_e ); /* f_e + (norm_x_e - r_e) */
             f_e = add( f_e, sub( norm_x_e, r_e ) );
+#else
+            f = Mpy_32_32(norm_x, invVal); /* invVal_e + (norm_x_e - r_e) */
+            f_e = add(invVal_e, sub( norm_x_e, r_e ) );
+#ifdef OPT_DIV_NORM
+            temp_e = norm_l(f);
+            f = L_shl(f, temp_e);
+            f_e = sub(f_e, temp_e);
+#endif
+#endif
 
             FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
             {
@@ -1605,9 +1821,18 @@ static void biDiagonalReductionRight_fx(
     {
         idx = add( currChannel, 1 ); /* Q0 */
 
+#ifdef OPT_SUM
+        *sig_x_e = -31;
+        move16();
+#endif
+
         FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
         {
+#ifdef OPT_SUM
+            ( *sig_x ) = BASOP_Util_Accu_Mant32Exp( *sig_x, sig_x_e, L_abs( singularVectors[currChannel][jCh] ), *singularVectors_e ); /* exp(sig_x_e) */
+#else
             ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[currChannel][jCh] ), *singularVectors_e, sig_x_e ); /* exp(sig_x_e) */
+#endif
         }
 
         IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
@@ -1617,9 +1842,31 @@ static void biDiagonalReductionRight_fx(
             norm_x_e = 0;
             move16();
 
+#ifdef OPT_DIV
+            Word16 invVal_e, temp_e;
+            Word32 invVal = BASOP_Util_Inv32(maxWithSign_fx( *sig_x ), &invVal_e);
+#ifdef OPT_DIV_NORM
+            temp_e = norm_l(invVal);
+            invVal = L_shl(invVal, temp_e);
+            invVal_e = sub(invVal_e, temp_e);
+#endif
+#endif
             FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */
             {
+#ifndef OPT_DIV
                 singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e))  */
+#else
+                temp_e = norm_l(singularVectors[currChannel][jCh]);
+                singularVectors[currChannel][jCh] = L_shl(singularVectors[currChannel][jCh], temp_e);
+                singularVectors[currChannel][jCh] = Mpy_32_32( singularVectors[currChannel][jCh], invVal); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
+                sing_exp[jCh] = sub(invVal_e, temp_e);
+                move16();
+#ifdef OPT_DIV_NORM
+                temp_e = norm_l(singularVectors[currChannel][jCh]);
+                singularVectors[currChannel][jCh] = L_shl(singularVectors[currChannel][jCh], temp_e);
+                sing_exp[jCh] = sub(sing_exp[jCh], temp_e);
+#endif
+#endif
                 move32();
                 sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) );
                 move16();
@@ -1651,9 +1898,31 @@ static void biDiagonalReductionRight_fx(
             singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], sing_exp[idx], -( *g ), 0, &sing_exp[idx] ); /* exp(sing_exp) */
             move32();
 
+#ifdef OPT_DIV
+            invVal = BASOP_Util_Inv32(maxWithSign_fx( r ), &invVal_e);
+#ifdef OPT_DIV_NORM
+            temp_e = norm_l(invVal);
+            invVal = L_shl(invVal, temp_e);
+            invVal_e = sub(invVal_e, temp_e);
+#endif
+#endif
+
             FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
             {
+#ifndef OPT_DIV
                 secDiag[jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( r ), &secDiag_exp[jCh] ); /* exp(secDiag_exp + (sing_exp - r_e) */
+#else
+                temp_e = norm_l(singularVectors[currChannel][jCh]);
+                secDiag[jCh] = L_shl(singularVectors[currChannel][jCh], temp_e);
+                secDiag[jCh] = Mpy_32_32( secDiag[jCh], invVal); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
+                secDiag_exp[jCh] = sub(invVal_e, temp_e);
+#ifdef OPT_DIV_NORM
+                temp_e = norm_l(secDiag[jCh]);
+                secDiag[jCh] = L_shl(secDiag[jCh], temp_e);
+                secDiag_exp[jCh] = sub(secDiag_exp[jCh], temp_e);
+#endif
+                move16();
+#endif
                 move32();
                 secDiag_exp[jCh] = add( secDiag_exp[jCh], sub( sing_exp[jCh], r_e ) );
                 move32();
@@ -1663,11 +1932,19 @@ static void biDiagonalReductionRight_fx(
             {
                 norm_x = 0;
                 move32();
+#ifdef OPT_SUM2
+                norm_x_e = -31;
+#else
                 norm_x_e = 0;
+#endif
                 move16();
                 FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
                 {
+#ifdef OPT_SUM2
+                    norm_x = BASOP_Util_Accu_Mant32Exp( norm_x, &norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( *singularVectors_e, sing_exp[jCh] ) ); /* exp(sig_x_e) */
+#else
                     norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( *singularVectors_e, sing_exp[jCh] ), &norm_x_e ); /* exp(norm_x_e) */
+#endif
                 }
 
                 FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*  nChannelsC */
@@ -1837,8 +2114,13 @@ static void singularVectorsAccumulationLeft_fx(
 
         IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/
         {
+#ifdef OPT_DIV
+            t_ii = BASOP_Util_Inv32(maxWithSign_fx(t_ii), &temp_exp);
+            t_ii_e = sub(temp_exp, t_ii_e);
+#else
             t_ii = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, maxWithSign_fx( t_ii ), &temp_exp ); /* exp(1 + (temp_exp + tii_e)) */
             t_ii_e = add( 1, sub( temp_exp, t_ii_e ) );
+#endif
             // fprintf( fp, "%e\n", me2f( t_ii, t_ii_e ) );
             FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
             {
@@ -1850,9 +2132,14 @@ static void singularVectorsAccumulationLeft_fx(
                 {
                     norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( sing_exp2[k][nCh], sing_exp2[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */
                 }
+#ifdef OPT_DIV
+                t_jj = BASOP_Util_Inv32(maxWithSign_fx(singularVectors_Left[nCh][nCh]), &temp_exp);
+                t_jj = Mpy_32_32(Mpy_32_32( t_ii, norm_y ), t_jj);
+                t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) );
+#else
                 t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e,
                 t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) );
-
+#endif
                 FOR( k = nCh; k < nChannelsL; k++ ) /* nChannelsL */
                 {
                     singularVectors_Left[k][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[k][iCh], sing_exp2[k][iCh], Mpy_32_32( t_jj, singularVectors_Left[k][nCh] ), add( t_jj_e, sing_exp2[k][nCh] ), &sing_exp2[k][iCh] ); /* exp(sing_exp2) */
@@ -2106,6 +2393,32 @@ static void singularVectorsAccumulationRight(
  *-------------------------------------------------------------------------*/
 
 #ifdef IVAS_FLOAT_FIXED
+
+#ifdef OPT_GIVENS_INV
+static void GivensRotation2_fx(
+    const Word32 x, /* exp(x_e) */
+    const Word16 x_e,
+    const Word32 z, /* exp(z_e) */
+    const Word16 z_e,
+    Word32 *result,
+    Word32 *resultInv,
+    Word16 *out_e,
+    Word16 *outInv_e )
+{
+    Word32 r;
+
+    r = BASOP_Util_Add_Mant32Exp( Mpy_32_32(z, z), shl(z_e, 1), Mpy_32_32(x, x), shl(x_e, 1), out_e );
+    r = L_max(r, 1);
+    *outInv_e = *out_e;
+    move16();
+    *result = Sqrt32(r, out_e);
+    move32();
+
+    *resultInv = ISqrt32(r, outInv_e);
+    move32();
+}
+#endif
+
 static Word32 GivensRotation_fx(
     const Word32 x, /* exp(x_e) */
     const Word16 x_e,
@@ -2113,10 +2426,19 @@ static Word32 GivensRotation_fx(
     const Word16 z_e,
     Word16 *out_e )
 {
+#ifdef OPT_GIVENS
+    Word32 r;
+#else
     Word32 x_abs, z_abs;
     Word32 cotan, tan, r;
     Word16 temp_exp;
     Word32 L_temp;
+#endif
+
+#ifdef OPT_GIVENS
+    r = BASOP_Util_Add_Mant32Exp( Mpy_32_32(z, z), shl(z_e, 1), Mpy_32_32(x, x), shl(x_e, 1), out_e );
+    r = Sqrt32(r, out_e);
+#else
     x_abs = L_abs( x );
     z_abs = L_abs( z );
     test();
@@ -2159,7 +2481,7 @@ static Word32 GivensRotation_fx(
             *out_e = add( z_e, temp_exp );
         }
     }
-
+#endif
     return ( r );
 }
 #else
-- 
GitLab


From ca43c4f56e9d4757259284776c994502b8bce90e Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Mon, 18 Nov 2024 16:50:54 +0100
Subject: [PATCH 02/41] Issue #1010 : Add output normalization and x==0
 handling to BASOP_Util_Inv32 to prevent instabilities for some operating
 points.

---
 lib_com/basop_util.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c
index a0624a6b4..2aba88d15 100644
--- a/lib_com/basop_util.c
+++ b/lib_com/basop_util.c
@@ -1044,7 +1044,14 @@ Word32 div_w( Word32 L_num, Word32 L_den )
 static
 Word32 BASOP_Util_Inv32(Word32 x, Word16 *px_e)
 {
-    Word16 sign, shift;
+    Word16 sign, shift, shift2;
+
+    /* Avoid result 0 with inconvenient exponent returned. */
+    IF( x == (Word32) 0 )
+    {
+        *px_e = 0;
+        return ( (Word32) 0 );
+    }
 
     sign = 0;
     move16();
@@ -1061,7 +1068,9 @@ Word32 BASOP_Util_Inv32(Word32 x, Word16 *px_e)
     move16();
     x = ISqrt32norm(x, px_e);
     x = Mpy_32_32(x, x);
-    *px_e = add(shl(*px_e, 1), shift);
+    shift2 = norm_l(x);
+    x = L_shl(x, shift2);
+    *px_e = add(shl(*px_e, 1), sub(shift, shift2));
     move16();
 
     if (sign) {
-- 
GitLab


From c41a883c2eaf587965e42fb4a0b7e3386f01c219 Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Mon, 18 Nov 2024 16:50:54 +0100
Subject: [PATCH 03/41] Issue #1010 : Add output normalization and x==0
 handling to BASOP_Util_Inv32 to prevent instabilities for some operating
 points.

---
 lib_com/basop_util.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c
index 2aba88d15..852b4d225 100644
--- a/lib_com/basop_util.c
+++ b/lib_com/basop_util.c
@@ -1049,8 +1049,8 @@ Word32 BASOP_Util_Inv32(Word32 x, Word16 *px_e)
     /* Avoid result 0 with inconvenient exponent returned. */
     IF( x == (Word32) 0 )
     {
-        *px_e = 0;
-        return ( (Word32) 0 );
+        *px_e = 31;
+        return ( (Word32) MAX_32 );
     }
 
     sign = 0;
@@ -1086,6 +1086,13 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s )
 
 #ifdef OPT_BASOP_Util_Divide3232_Scale_cadence
     Word16 shift, s2;
+
+    IF( x == (Word32) 0 )
+    {
+        *s = 0;
+        return ( (Word32) 0 );
+    }
+
     z = BASOP_Util_Inv32(y, &s2);
     shift = norm_l(x);
     z = Mpy_32_32_r(L_shl(x, shift), z);
-- 
GitLab


From dc90a03f1490460ddc8bf23124805701eada21c2 Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Tue, 26 Nov 2024 14:59:49 +0100
Subject: [PATCH 04/41] Remove attempts to optimize normalized additions.

---
 lib_dec/ivas_svd_dec.c | 110 ++---------------------------------------
 1 file changed, 5 insertions(+), 105 deletions(-)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index a89e2ee3d..9c8ae6ad4 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -66,8 +66,6 @@
 #if 1
 #define OPT_DIV
 #define OPT_DIV_NORM /* 5 dB SNR precision improvement */
-#define OPT_SUM /* Very little WMOPS savings */
-#define OPT_SUM2 /* Very little WMOPS savings */
 
 #define OPT_GIVENS
 #define OPT_GIVENS_INV
@@ -1406,67 +1404,6 @@ Word32 BASOP_Util_Inv32(Word32 x, Word16 *px_e)
 }
 #endif
 
-#ifdef OPT_SUM
-static
-Word32 BASOP_Util_Accu_Mant32Exp /* o  : normalized result mantissa */
-    ( Word32 a_m,                /* i  : Mantissa of 1st operand a  */
-      Word16 *p_a_e,               /* i/o  : Exponent of 1st operand a  */
-      Word32 b_m,                /* i  : Mantissa of 2nd operand b  */
-      Word16 b_e                 /* i  : Exponent of 2nd operand b  */
-    )
-{
-    Word32 L_tmp;
-    Word16 shift, a_e = *p_a_e;
-
-    /* Compare exponents: the difference is limited to +/- 30
-       The Word32 mantissa of the operand with lower exponent is shifted right by the exponent difference.
-       Then, the unshifted mantissa of the operand with the higher exponent is added. The addition result
-       is normalized and the result represents the mantissa to return. The returned exponent takes into
-       account all shift operations.
-    */
-
-#if 0
-    if ( !a_m )
-        a_e = add( b_e, 0 );
-#endif
-    if ( !b_m )
-        b_e = add( a_e, 0 );
-
-    shift = sub( a_e, b_e );
-#if 0
-    shift = s_max( -31, shift );
-    shift = s_min( 31, shift );
-#endif
-    if ( shift < 0 )
-    {
-        /* exponent of b is greater than exponent of a, shr a_m */
-        a_m = L_shl( a_m, shift );
-    }
-    if ( shift > 0 )
-    {
-        /* exponent of a is greater than exponent of b */
-        b_m = L_shr( b_m, shift );
-    }
-    a_e = add( s_max( a_e, b_e ), 1 );
-    L_tmp = L_add( L_shr( a_m, 1 ), L_shr( b_m, 1 ) );
-#if 1
-    shift = norm_l( L_tmp );
-    if ( shift )
-        L_tmp = L_shl( L_tmp, shift );
-#if 0
-    if ( L_tmp == 0 )
-        a_e = add( 0, 0 );
-#endif
-    if ( L_tmp != 0 )
-        a_e = sub( a_e, shift );
-#endif
-    *p_a_e = a_e;
-
-    return ( L_tmp );
-}
-
-#endif
-
 #ifdef IVAS_FLOAT_FIXED
 static void biDiagonalReductionLeft_fx(
     Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
@@ -1523,21 +1460,12 @@ move32();
 
 IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
 {
-#ifdef OPT_SUM2
-    *sig_x_e = -31;
-    move16();
-#endif
-
     idx = currChannel;
     move16();
 
     FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
     {
-#ifdef OPT_SUM2
-        ( *sig_x ) = BASOP_Util_Accu_Mant32Exp( *sig_x, sig_x_e, L_abs( singularVectors[jCh][currChannel] ), *singularVectors_e ); /* exp(sig_x_e) */
-#else
         ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[jCh][currChannel] ), *singularVectors_e, sig_x_e ); /* exp(sig_x_e) */
-#endif
     }
 
     IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
@@ -1553,11 +1481,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
 #endif
         norm_x = 0;
         move32();
-#ifdef OPT_SUM2
-        norm_x_e = -31;
-#else
         norm_x_e = 0;
-#endif
         move16();
         FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
         {
@@ -1578,11 +1502,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
             move32();
             sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) );
             move16();
-#ifdef OPT_SUM2
-            norm_x = BASOP_Util_Accu_Mant32Exp( norm_x, &norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ) ); /* exp(norm_x_e) */
-#else
             norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
-#endif
         }
         IF( GT_16( norm_x_e, 0 ) )
         {
@@ -1628,19 +1548,11 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
         {
             norm_x = 0;
             move32();
-#ifdef OPT_SUM2
-            norm_x_e = -31;
-#else
             norm_x_e = 0;
-#endif
             move16();
             FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
             {
-#ifdef OPT_SUM2
-                norm_x = BASOP_Util_Accu_Mant32Exp( norm_x, &norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( sing_exp[jCh], *singularVectors_e )); /* exp(norm_x_e) */
-#else
                 norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( sing_exp[jCh], *singularVectors_e ), &norm_x_e ); /* exp(norm_x_e) */
-#endif
             }
 
 #ifndef OPT_DIV
@@ -1821,18 +1733,9 @@ static void biDiagonalReductionRight_fx(
     {
         idx = add( currChannel, 1 ); /* Q0 */
 
-#ifdef OPT_SUM
-        *sig_x_e = -31;
-        move16();
-#endif
-
         FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
         {
-#ifdef OPT_SUM
-            ( *sig_x ) = BASOP_Util_Accu_Mant32Exp( *sig_x, sig_x_e, L_abs( singularVectors[currChannel][jCh] ), *singularVectors_e ); /* exp(sig_x_e) */
-#else
             ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[currChannel][jCh] ), *singularVectors_e, sig_x_e ); /* exp(sig_x_e) */
-#endif
         }
 
         IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
@@ -1932,19 +1835,11 @@ static void biDiagonalReductionRight_fx(
             {
                 norm_x = 0;
                 move32();
-#ifdef OPT_SUM2
-                norm_x_e = -31;
-#else
                 norm_x_e = 0;
-#endif
                 move16();
                 FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
                 {
-#ifdef OPT_SUM2
-                    norm_x = BASOP_Util_Accu_Mant32Exp( norm_x, &norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( *singularVectors_e, sing_exp[jCh] ) ); /* exp(sig_x_e) */
-#else
                     norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( *singularVectors_e, sing_exp[jCh] ), &norm_x_e ); /* exp(norm_x_e) */
-#endif
                 }
 
                 FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*  nChannelsC */
@@ -2280,8 +2175,13 @@ static void singularVectorsAccumulationRight_fx(
 
                 FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC*/
                 {
+#ifdef OPT_DIVno
+                    ratio_float = BASOP_Util_Divide3232_Scale( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ); /* exp(temp_exp1) */
+                    singularVectors_Right[iCh][nCh] = BASOP_Util_Divide3232_Scale( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] );               /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */
+#else
                     ratio_float = BASOP_Util_Divide3232_Scale_cadence( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ); /* exp(temp_exp1) */
                     singularVectors_Right[iCh][nCh] = BASOP_Util_Divide3232_Scale_cadence( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] );               /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */
+#endif
                     move32();
                     sing_right_exp[iCh][nCh] = add( sing_right_exp[iCh][nCh], sub( temp_exp1, secDiag_e ) );
                     move16();
-- 
GitLab


From ea2c4fda65a13494fbb3ba346ee639eeb47d1646 Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Tue, 26 Nov 2024 17:07:47 +0100
Subject: [PATCH 05/41] Revert OPT_BASOP_Util_Divide3232_Scale_cadence
 optimization attempt.

---
 lib_com/basop_util.c | 61 +-------------------------------------------
 1 file changed, 1 insertion(+), 60 deletions(-)

diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c
index 852b4d225..c465428fc 100644
--- a/lib_com/basop_util.c
+++ b/lib_com/basop_util.c
@@ -1038,68 +1038,9 @@ Word32 div_w( Word32 L_num, Word32 L_den )
     }
 }
 
-#define OPT_BASOP_Util_Divide3232_Scale_cadence
-
-#ifdef OPT_BASOP_Util_Divide3232_Scale_cadence
-static
-Word32 BASOP_Util_Inv32(Word32 x, Word16 *px_e)
-{
-    Word16 sign, shift, shift2;
-
-    /* Avoid result 0 with inconvenient exponent returned. */
-    IF( x == (Word32) 0 )
-    {
-        *px_e = 31;
-        return ( (Word32) MAX_32 );
-    }
-
-    sign = 0;
-    move16();
-    if (x < 0) {
-        sign = 1;
-    }
-    if (sign) {
-        x = L_negate(x);
-    }
-
-    shift = norm_l(x);
-    x = L_shl(x, shift);
-    *px_e = 0;
-    move16();
-    x = ISqrt32norm(x, px_e);
-    x = Mpy_32_32(x, x);
-    shift2 = norm_l(x);
-    x = L_shl(x, shift2);
-    *px_e = add(shl(*px_e, 1), sub(shift, shift2));
-    move16();
-
-    if (sign) {
-        x = L_negate(x);
-    }
-    return x;
-}
-#endif
-
 Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s )
 {
     Word32 z;
-
-#ifdef OPT_BASOP_Util_Divide3232_Scale_cadence
-    Word16 shift, s2;
-
-    IF( x == (Word32) 0 )
-    {
-        *s = 0;
-        return ( (Word32) 0 );
-    }
-
-    z = BASOP_Util_Inv32(y, &s2);
-    shift = norm_l(x);
-    z = Mpy_32_32_r(L_shl(x, shift), z);
-    *s = sub(s2, shift);
-    move16();
-#else
-
     Word16 sx;
     Word16 sy;
     Word32 sign;
@@ -1145,7 +1086,7 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s )
     {
         z = L_negate( z );
     }
-#endif
+
     return z;
 }
 
-- 
GitLab


From e643e42ec48317f55068318bc431a3549d822ed0 Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Wed, 27 Nov 2024 09:40:25 +0100
Subject: [PATCH 06/41] Correct and activate optimization using
 BASOP_Util_Divide3232_Scale.

---
 lib_dec/ivas_svd_dec.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index 9c8ae6ad4..8e526d283 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -2175,9 +2175,9 @@ static void singularVectorsAccumulationRight_fx(
 
                 FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC*/
                 {
-#ifdef OPT_DIVno
-                    ratio_float = BASOP_Util_Divide3232_Scale( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ); /* exp(temp_exp1) */
-                    singularVectors_Right[iCh][nCh] = BASOP_Util_Divide3232_Scale( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] );               /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */
+#ifdef OPT_DIV
+                    ratio_float = L_deposit_h(BASOP_Util_Divide3232_Scale( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 )); /* exp(temp_exp1) */
+                    singularVectors_Right[iCh][nCh] = L_deposit_h(BASOP_Util_Divide3232_Scale( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] ));               /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */
 #else
                     ratio_float = BASOP_Util_Divide3232_Scale_cadence( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ); /* exp(temp_exp1) */
                     singularVectors_Right[iCh][nCh] = BASOP_Util_Divide3232_Scale_cadence( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] );               /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */
-- 
GitLab


From 5b3c28cc10907ad673f6267882bcfa9f00764fcd Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Wed, 27 Nov 2024 10:59:47 +0100
Subject: [PATCH 07/41] Use correct macro naming scheme.

---
 lib_dec/ivas_svd_dec.c | 72 +++++++++++++++++++++---------------------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index 8e526d283..339bf0d5d 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -64,11 +64,11 @@
 #endif
 
 #if 1
-#define OPT_DIV
-#define OPT_DIV_NORM /* 5 dB SNR precision improvement */
+#define FIX_1010_OPT_DIV
+#define FIX_1010_OPT_DIV_NORM /* precision improvement */
 
-#define OPT_GIVENS
-#define OPT_GIVENS_INV
+#define FIX_1010_OPT_GIVENS
+#define FIX_1010_OPT_GIVENS_INV
 #endif
 
 /*-----------------------------------------------------------------------*
@@ -206,7 +206,7 @@ static void ApplyRotation_fx(
     const Word16 nChannels      /* Q0 */
 );
 
-#ifdef OPT_GIVENS_INV
+#ifdef FIX_1010_OPT_GIVENS_INV
 static void GivensRotation2_fx(
     const Word32 x, /* exp(x_e) */
     const Word16 x_e,
@@ -638,7 +638,7 @@ static Word16 BidagonalDiagonalisation_fx(
     Word16 convergence, iteration, found_split;
     Word16 error = 0;
     move16();
-#ifdef OPT_GIVENS_INV
+#ifdef FIX_1010_OPT_GIVENS_INV
     Word32 temp;
 #endif
     Word16 singularValues_new_e[MAX_OUTPUT_CHANNELS], secDiag_new_e[MAX_OUTPUT_CHANNELS];
@@ -711,7 +711,7 @@ static Word16 BidagonalDiagonalisation_fx(
 
                     c = singularValues_fx[kCh]; /* exp(singularValues_new_e) */
                     c_e = singularValues_new_e[kCh];
-#ifdef OPT_GIVENS_INV
+#ifdef FIX_1010_OPT_GIVENS_INV
                     GivensRotation2_fx( g, g_e, singularValues_fx[kCh], singularValues_new_e[kCh], &singularValues_fx[kCh], &temp, &singularValues_new_e[kCh], &temp_exp ); /* exp(singularValues_new_e) */
                     c = Mpy_32_32( c, temp );
                     c_e = add(c_e, temp_exp);
@@ -726,7 +726,7 @@ static Word16 BidagonalDiagonalisation_fx(
                         c_e = 0;
                         move16();
                     }
-#ifdef OPT_GIVENS_INV
+#ifdef FIX_1010_OPT_GIVENS_INV
                     s = Mpy_32_32( -g, temp );
                     s_e = add( g_e, temp_exp );
 #else
@@ -940,7 +940,7 @@ static void ApplyQRTransform_fx(
     const Word16 nChannelsC    /* i  : number of columns in the matrix to be decomposed			Q0*/
 )
 {
-#ifdef OPT_GIVENS_INV
+#ifdef FIX_1010_OPT_GIVENS_INV
     Word32 temp;
     Word16 temp_e;
 #endif
@@ -1046,7 +1046,7 @@ static void ApplyQRTransform_fx(
         g = Mpy_32_32( c, secDiag[ch + 1] ); /* exp(c_e + secDiag_e) */
         g_e = add( c_e, secDiag_e[ch + 1] );
 
-#ifdef OPT_GIVENS_INV
+#ifdef FIX_1010_OPT_GIVENS_INV
         GivensRotation2_fx( d, d_e, r, r_e, &secDiag[ch], &temp, &secDiag_e[ch], &temp_e ); /* exp(secDiag_e) */
         c = Mpy_32_32( d, temp);
         c_e = add(temp_e, d_e);
@@ -1062,7 +1062,7 @@ static void ApplyQRTransform_fx(
             c_e = 0;
             move16();
         }
-#ifdef OPT_GIVENS_INV
+#ifdef FIX_1010_OPT_GIVENS_INV
         s = Mpy_32_32( r, temp );
         s_e = add(r_e, temp_e);
 #else
@@ -1089,7 +1089,7 @@ static void ApplyQRTransform_fx(
         // ApplyRotation(singularVectors_Right, c, s, x_ii, aux, &d, &g, ch + 1, ch, nChannelsC);
         ApplyRotation_fx( singularVectors_Right, c, c_e, s, s_e, x_ii, x_ii_e, aux, aux_e, &d, &d_e, &g, &g_e, ch + 1, ch, nChannelsC );
 
-#ifdef OPT_GIVENS_INV
+#ifdef FIX_1010_OPT_GIVENS_INV
         GivensRotation2_fx( d, d_e, r, r_e, &singularValues[ch], &aux, &singularValues_e[ch], &aux_e ); /* exp(singularValues_e) */
 #else
         singularValues[ch] = GivensRotation_fx( d, d_e, r, r_e, &singularValues_e[ch] ); /* exp(singularValues_e) */
@@ -1097,7 +1097,7 @@ static void ApplyQRTransform_fx(
 #endif
         IF( GT_32( L_abs( singularValues[ch] ), Mpy_32_32( CONVERGENCE_FACTOR_FX, L_abs( singularValues[ch] ) ) ) )
         {
-#ifndef OPT_GIVENS_INV
+#ifndef FIX_1010_OPT_GIVENS_INV
             aux = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, singularValues[ch], &aux_e ); /* exp(aux_e + (1 - singularValues_e)) */
             aux_e = add( aux_e, sub( 1, singularValues_e[ch] ) );
 #endif
@@ -1373,7 +1373,7 @@ static void HouseholderReduction(
  *
  *-------------------------------------------------------------------------*/
 
-#ifdef OPT_DIV
+#ifdef FIX_1010_OPT_DIV
 static
 Word32 BASOP_Util_Inv32(Word32 x, Word16 *px_e)
 {
@@ -1470,10 +1470,10 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
 
     IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
     {
-#ifdef OPT_DIV
+#ifdef FIX_1010_OPT_DIV
         Word16 invVal_e, temp_e;
         Word32 invVal = BASOP_Util_Inv32(maxWithSign_fx( *sig_x ), &invVal_e);
-#ifdef OPT_DIV_NORM
+#ifdef FIX_1010_OPT_DIV_NORM
         temp_e = norm_l(invVal);
         invVal = L_shl(invVal, temp_e);
         invVal_e = sub(invVal_e, temp_e);
@@ -1485,14 +1485,14 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
         move16();
         FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
         {
-#ifndef OPT_DIV
+#ifndef FIX_1010_OPT_DIV
             singularVectors[jCh][currChannel] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[jCh][currChannel], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
 #else
             temp_e = norm_l(singularVectors[jCh][currChannel]);
             singularVectors[jCh][currChannel] = L_shl(singularVectors[jCh][currChannel], temp_e);
             singularVectors[jCh][currChannel] = Mpy_32_32( singularVectors[jCh][currChannel], invVal); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
             sing_exp[jCh] = sub(invVal_e, temp_e);
-#ifdef OPT_DIV_NORM
+#ifdef FIX_1010_OPT_DIV_NORM
             temp_e = norm_l(singularVectors[jCh][currChannel]);
             singularVectors[jCh][currChannel] = L_shl(singularVectors[jCh][currChannel], temp_e);
             sing_exp[jCh] = sub(sing_exp[jCh], temp_e);
@@ -1535,9 +1535,9 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
 #endif
         move32();
 
-#ifdef OPT_DIV
+#ifdef FIX_1010_OPT_DIV
         invVal = BASOP_Util_Inv32(maxWithSign_fx( r ), &invVal_e);
-#ifdef OPT_DIV_NORM
+#ifdef FIX_1010_OPT_DIV_NORM
         temp_e = norm_l(invVal);
         invVal = L_shl(invVal, temp_e);
         invVal_e = sub(invVal_e, temp_e);
@@ -1555,13 +1555,13 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
                 norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( sing_exp[jCh], *singularVectors_e ), &norm_x_e ); /* exp(norm_x_e) */
             }
 
-#ifndef OPT_DIV
+#ifndef FIX_1010_OPT_DIV
             f = BASOP_Util_Divide3232_Scale_cadence( norm_x, maxWithSign_fx( r ), &f_e ); /* f_e + (norm_x_e - r_e) */
             f_e = add( f_e, sub( norm_x_e, r_e ) );
 #else
             f = Mpy_32_32(norm_x, invVal); /* invVal_e + (norm_x_e - r_e) */
             f_e = add(invVal_e, sub( norm_x_e, r_e ) );
-#ifdef OPT_DIV_NORM
+#ifdef FIX_1010_OPT_DIV_NORM
             temp_e = norm_l(f);
             f = L_shl(f, temp_e);
             f_e = sub(f_e, temp_e);
@@ -1745,10 +1745,10 @@ static void biDiagonalReductionRight_fx(
             norm_x_e = 0;
             move16();
 
-#ifdef OPT_DIV
+#ifdef FIX_1010_OPT_DIV
             Word16 invVal_e, temp_e;
             Word32 invVal = BASOP_Util_Inv32(maxWithSign_fx( *sig_x ), &invVal_e);
-#ifdef OPT_DIV_NORM
+#ifdef FIX_1010_OPT_DIV_NORM
             temp_e = norm_l(invVal);
             invVal = L_shl(invVal, temp_e);
             invVal_e = sub(invVal_e, temp_e);
@@ -1756,7 +1756,7 @@ static void biDiagonalReductionRight_fx(
 #endif
             FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */
             {
-#ifndef OPT_DIV
+#ifndef FIX_1010_OPT_DIV
                 singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e))  */
 #else
                 temp_e = norm_l(singularVectors[currChannel][jCh]);
@@ -1764,7 +1764,7 @@ static void biDiagonalReductionRight_fx(
                 singularVectors[currChannel][jCh] = Mpy_32_32( singularVectors[currChannel][jCh], invVal); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
                 sing_exp[jCh] = sub(invVal_e, temp_e);
                 move16();
-#ifdef OPT_DIV_NORM
+#ifdef FIX_1010_OPT_DIV_NORM
                 temp_e = norm_l(singularVectors[currChannel][jCh]);
                 singularVectors[currChannel][jCh] = L_shl(singularVectors[currChannel][jCh], temp_e);
                 sing_exp[jCh] = sub(sing_exp[jCh], temp_e);
@@ -1801,9 +1801,9 @@ static void biDiagonalReductionRight_fx(
             singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], sing_exp[idx], -( *g ), 0, &sing_exp[idx] ); /* exp(sing_exp) */
             move32();
 
-#ifdef OPT_DIV
+#ifdef FIX_1010_OPT_DIV
             invVal = BASOP_Util_Inv32(maxWithSign_fx( r ), &invVal_e);
-#ifdef OPT_DIV_NORM
+#ifdef FIX_1010_OPT_DIV_NORM
             temp_e = norm_l(invVal);
             invVal = L_shl(invVal, temp_e);
             invVal_e = sub(invVal_e, temp_e);
@@ -1812,14 +1812,14 @@ static void biDiagonalReductionRight_fx(
 
             FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
             {
-#ifndef OPT_DIV
+#ifndef FIX_1010_OPT_DIV
                 secDiag[jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( r ), &secDiag_exp[jCh] ); /* exp(secDiag_exp + (sing_exp - r_e) */
 #else
                 temp_e = norm_l(singularVectors[currChannel][jCh]);
                 secDiag[jCh] = L_shl(singularVectors[currChannel][jCh], temp_e);
                 secDiag[jCh] = Mpy_32_32( secDiag[jCh], invVal); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
                 secDiag_exp[jCh] = sub(invVal_e, temp_e);
-#ifdef OPT_DIV_NORM
+#ifdef FIX_1010_OPT_DIV_NORM
                 temp_e = norm_l(secDiag[jCh]);
                 secDiag[jCh] = L_shl(secDiag[jCh], temp_e);
                 secDiag_exp[jCh] = sub(secDiag_exp[jCh], temp_e);
@@ -2009,7 +2009,7 @@ static void singularVectorsAccumulationLeft_fx(
 
         IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/
         {
-#ifdef OPT_DIV
+#ifdef FIX_1010_OPT_DIV
             t_ii = BASOP_Util_Inv32(maxWithSign_fx(t_ii), &temp_exp);
             t_ii_e = sub(temp_exp, t_ii_e);
 #else
@@ -2027,7 +2027,7 @@ static void singularVectorsAccumulationLeft_fx(
                 {
                     norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( sing_exp2[k][nCh], sing_exp2[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */
                 }
-#ifdef OPT_DIV
+#ifdef FIX_1010_OPT_DIV
                 t_jj = BASOP_Util_Inv32(maxWithSign_fx(singularVectors_Left[nCh][nCh]), &temp_exp);
                 t_jj = Mpy_32_32(Mpy_32_32( t_ii, norm_y ), t_jj);
                 t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) );
@@ -2175,7 +2175,7 @@ static void singularVectorsAccumulationRight_fx(
 
                 FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC*/
                 {
-#ifdef OPT_DIV
+#ifdef FIX_1010_OPT_DIV
                     ratio_float = L_deposit_h(BASOP_Util_Divide3232_Scale( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 )); /* exp(temp_exp1) */
                     singularVectors_Right[iCh][nCh] = L_deposit_h(BASOP_Util_Divide3232_Scale( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] ));               /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */
 #else
@@ -2294,7 +2294,7 @@ static void singularVectorsAccumulationRight(
 
 #ifdef IVAS_FLOAT_FIXED
 
-#ifdef OPT_GIVENS_INV
+#ifdef FIX_1010_OPT_GIVENS_INV
 static void GivensRotation2_fx(
     const Word32 x, /* exp(x_e) */
     const Word16 x_e,
@@ -2326,7 +2326,7 @@ static Word32 GivensRotation_fx(
     const Word16 z_e,
     Word16 *out_e )
 {
-#ifdef OPT_GIVENS
+#ifdef FIX_1010_OPT_GIVENS
     Word32 r;
 #else
     Word32 x_abs, z_abs;
@@ -2335,7 +2335,7 @@ static Word32 GivensRotation_fx(
     Word32 L_temp;
 #endif
 
-#ifdef OPT_GIVENS
+#ifdef FIX_1010_OPT_GIVENS
     r = BASOP_Util_Add_Mant32Exp( Mpy_32_32(z, z), shl(z_e, 1), Mpy_32_32(x, x), shl(x_e, 1), out_e );
     r = Sqrt32(r, out_e);
 #else
-- 
GitLab


From 8f9f193c013cdc5e7472c9ef7ec61ba30cb233f0 Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Wed, 27 Nov 2024 11:47:22 +0100
Subject: [PATCH 08/41] Merge format check patch from merge request pipeline.

---
 lib_dec/ivas_svd_dec.c | 142 +++++++++++++++++++++--------------------
 1 file changed, 72 insertions(+), 70 deletions(-)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index 339bf0d5d..67a38f9e3 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -714,7 +714,7 @@ static Word16 BidagonalDiagonalisation_fx(
 #ifdef FIX_1010_OPT_GIVENS_INV
                     GivensRotation2_fx( g, g_e, singularValues_fx[kCh], singularValues_new_e[kCh], &singularValues_fx[kCh], &temp, &singularValues_new_e[kCh], &temp_exp ); /* exp(singularValues_new_e) */
                     c = Mpy_32_32( c, temp );
-                    c_e = add(c_e, temp_exp);
+                    c_e = add( c_e, temp_exp );
 #else
                     singularValues_fx[kCh] = GivensRotation_fx( g, g_e, singularValues_fx[kCh], singularValues_new_e[kCh], &singularValues_new_e[kCh] ); /* exp(singularValues_new_e) */
                     c = BASOP_Util_Divide3232_Scale_cadence( c, maxWithSign_fx( singularValues_fx[kCh] ), &temp_exp );                                   /* exp(temp_exp + (c_e - singularValues_new_e)) */
@@ -1048,8 +1048,8 @@ static void ApplyQRTransform_fx(
 
 #ifdef FIX_1010_OPT_GIVENS_INV
         GivensRotation2_fx( d, d_e, r, r_e, &secDiag[ch], &temp, &secDiag_e[ch], &temp_e ); /* exp(secDiag_e) */
-        c = Mpy_32_32( d, temp);
-        c_e = add(temp_e, d_e);
+        c = Mpy_32_32( d, temp );
+        c_e = add( temp_e, d_e );
 #else
         secDiag[ch] = GivensRotation_fx( d, d_e, r, r_e, &secDiag_e[ch] ); /* exp(secDiag_e) */
         move32();
@@ -1064,7 +1064,7 @@ static void ApplyQRTransform_fx(
         }
 #ifdef FIX_1010_OPT_GIVENS_INV
         s = Mpy_32_32( r, temp );
-        s_e = add(r_e, temp_e);
+        s_e = add( r_e, temp_e );
 #else
         s = BASOP_Util_Divide3232_Scale_cadence( r, maxWithSign_fx( secDiag[ch] ), &s_e ); /* exp(s_e + (r_e - sec_Diag_e))*/
         s_e = add( s_e, sub( r_e, secDiag_e[ch] ) );
@@ -1374,31 +1374,33 @@ static void HouseholderReduction(
  *-------------------------------------------------------------------------*/
 
 #ifdef FIX_1010_OPT_DIV
-static
-Word32 BASOP_Util_Inv32(Word32 x, Word16 *px_e)
+static Word32 BASOP_Util_Inv32( Word32 x, Word16 *px_e )
 {
     Word16 sign, shift;
 
     sign = 0;
     move16();
-    if (x < 0) {
+    if ( x < 0 )
+    {
         sign = 1;
     }
-    if (sign) {
-        x = L_negate(x);
+    if ( sign )
+    {
+        x = L_negate( x );
     }
 
-    shift = norm_l(x);
-    x = L_shl(x, shift);
+    shift = norm_l( x );
+    x = L_shl( x, shift );
     *px_e = 0;
     move16();
-    x = ISqrt32norm(x, px_e);
-    x = Mpy_32_32(x, x);
-    *px_e = add(shl(*px_e, 1), shift);
+    x = ISqrt32norm( x, px_e );
+    x = Mpy_32_32( x, x );
+    *px_e = add( shl( *px_e, 1 ), shift );
     move16();
 
-    if (sign) {
-        x = L_negate(x);
+    if ( sign )
+    {
+        x = L_negate( x );
     }
     return x;
 }
@@ -1472,11 +1474,11 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
     {
 #ifdef FIX_1010_OPT_DIV
         Word16 invVal_e, temp_e;
-        Word32 invVal = BASOP_Util_Inv32(maxWithSign_fx( *sig_x ), &invVal_e);
+        Word32 invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e );
 #ifdef FIX_1010_OPT_DIV_NORM
-        temp_e = norm_l(invVal);
-        invVal = L_shl(invVal, temp_e);
-        invVal_e = sub(invVal_e, temp_e);
+        temp_e = norm_l( invVal );
+        invVal = L_shl( invVal, temp_e );
+        invVal_e = sub( invVal_e, temp_e );
 #endif
 #endif
         norm_x = 0;
@@ -1488,14 +1490,14 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
 #ifndef FIX_1010_OPT_DIV
             singularVectors[jCh][currChannel] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[jCh][currChannel], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
 #else
-            temp_e = norm_l(singularVectors[jCh][currChannel]);
-            singularVectors[jCh][currChannel] = L_shl(singularVectors[jCh][currChannel], temp_e);
-            singularVectors[jCh][currChannel] = Mpy_32_32( singularVectors[jCh][currChannel], invVal); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
-            sing_exp[jCh] = sub(invVal_e, temp_e);
+            temp_e = norm_l( singularVectors[jCh][currChannel] );
+            singularVectors[jCh][currChannel] = L_shl( singularVectors[jCh][currChannel], temp_e );
+            singularVectors[jCh][currChannel] = Mpy_32_32( singularVectors[jCh][currChannel], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
+            sing_exp[jCh] = sub( invVal_e, temp_e );
 #ifdef FIX_1010_OPT_DIV_NORM
-            temp_e = norm_l(singularVectors[jCh][currChannel]);
-            singularVectors[jCh][currChannel] = L_shl(singularVectors[jCh][currChannel], temp_e);
-            sing_exp[jCh] = sub(sing_exp[jCh], temp_e);
+            temp_e = norm_l( singularVectors[jCh][currChannel] );
+            singularVectors[jCh][currChannel] = L_shl( singularVectors[jCh][currChannel], temp_e );
+            sing_exp[jCh] = sub( sing_exp[jCh], temp_e );
 #endif
             move16();
 #endif
@@ -1536,11 +1538,11 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
         move32();
 
 #ifdef FIX_1010_OPT_DIV
-        invVal = BASOP_Util_Inv32(maxWithSign_fx( r ), &invVal_e);
+        invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e );
 #ifdef FIX_1010_OPT_DIV_NORM
-        temp_e = norm_l(invVal);
-        invVal = L_shl(invVal, temp_e);
-        invVal_e = sub(invVal_e, temp_e);
+        temp_e = norm_l( invVal );
+        invVal = L_shl( invVal, temp_e );
+        invVal_e = sub( invVal_e, temp_e );
 #endif
 #endif
 
@@ -1559,12 +1561,12 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
             f = BASOP_Util_Divide3232_Scale_cadence( norm_x, maxWithSign_fx( r ), &f_e ); /* f_e + (norm_x_e - r_e) */
             f_e = add( f_e, sub( norm_x_e, r_e ) );
 #else
-            f = Mpy_32_32(norm_x, invVal); /* invVal_e + (norm_x_e - r_e) */
-            f_e = add(invVal_e, sub( norm_x_e, r_e ) );
+            f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
+            f_e = add( invVal_e, sub( norm_x_e, r_e ) );
 #ifdef FIX_1010_OPT_DIV_NORM
-            temp_e = norm_l(f);
-            f = L_shl(f, temp_e);
-            f_e = sub(f_e, temp_e);
+            temp_e = norm_l( f );
+            f = L_shl( f, temp_e );
+            f_e = sub( f_e, temp_e );
 #endif
 #endif
 
@@ -1747,11 +1749,11 @@ static void biDiagonalReductionRight_fx(
 
 #ifdef FIX_1010_OPT_DIV
             Word16 invVal_e, temp_e;
-            Word32 invVal = BASOP_Util_Inv32(maxWithSign_fx( *sig_x ), &invVal_e);
+            Word32 invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e );
 #ifdef FIX_1010_OPT_DIV_NORM
-            temp_e = norm_l(invVal);
-            invVal = L_shl(invVal, temp_e);
-            invVal_e = sub(invVal_e, temp_e);
+            temp_e = norm_l( invVal );
+            invVal = L_shl( invVal, temp_e );
+            invVal_e = sub( invVal_e, temp_e );
 #endif
 #endif
             FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */
@@ -1759,15 +1761,15 @@ static void biDiagonalReductionRight_fx(
 #ifndef FIX_1010_OPT_DIV
                 singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e))  */
 #else
-                temp_e = norm_l(singularVectors[currChannel][jCh]);
-                singularVectors[currChannel][jCh] = L_shl(singularVectors[currChannel][jCh], temp_e);
-                singularVectors[currChannel][jCh] = Mpy_32_32( singularVectors[currChannel][jCh], invVal); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
-                sing_exp[jCh] = sub(invVal_e, temp_e);
+                temp_e = norm_l( singularVectors[currChannel][jCh] );
+                singularVectors[currChannel][jCh] = L_shl( singularVectors[currChannel][jCh], temp_e );
+                singularVectors[currChannel][jCh] = Mpy_32_32( singularVectors[currChannel][jCh], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
+                sing_exp[jCh] = sub( invVal_e, temp_e );
                 move16();
 #ifdef FIX_1010_OPT_DIV_NORM
-                temp_e = norm_l(singularVectors[currChannel][jCh]);
-                singularVectors[currChannel][jCh] = L_shl(singularVectors[currChannel][jCh], temp_e);
-                sing_exp[jCh] = sub(sing_exp[jCh], temp_e);
+                temp_e = norm_l( singularVectors[currChannel][jCh] );
+                singularVectors[currChannel][jCh] = L_shl( singularVectors[currChannel][jCh], temp_e );
+                sing_exp[jCh] = sub( sing_exp[jCh], temp_e );
 #endif
 #endif
                 move32();
@@ -1802,11 +1804,11 @@ static void biDiagonalReductionRight_fx(
             move32();
 
 #ifdef FIX_1010_OPT_DIV
-            invVal = BASOP_Util_Inv32(maxWithSign_fx( r ), &invVal_e);
+            invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e );
 #ifdef FIX_1010_OPT_DIV_NORM
-            temp_e = norm_l(invVal);
-            invVal = L_shl(invVal, temp_e);
-            invVal_e = sub(invVal_e, temp_e);
+            temp_e = norm_l( invVal );
+            invVal = L_shl( invVal, temp_e );
+            invVal_e = sub( invVal_e, temp_e );
 #endif
 #endif
 
@@ -1815,14 +1817,14 @@ static void biDiagonalReductionRight_fx(
 #ifndef FIX_1010_OPT_DIV
                 secDiag[jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( r ), &secDiag_exp[jCh] ); /* exp(secDiag_exp + (sing_exp - r_e) */
 #else
-                temp_e = norm_l(singularVectors[currChannel][jCh]);
-                secDiag[jCh] = L_shl(singularVectors[currChannel][jCh], temp_e);
-                secDiag[jCh] = Mpy_32_32( secDiag[jCh], invVal); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
-                secDiag_exp[jCh] = sub(invVal_e, temp_e);
+                temp_e = norm_l( singularVectors[currChannel][jCh] );
+                secDiag[jCh] = L_shl( singularVectors[currChannel][jCh], temp_e );
+                secDiag[jCh] = Mpy_32_32( secDiag[jCh], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
+                secDiag_exp[jCh] = sub( invVal_e, temp_e );
 #ifdef FIX_1010_OPT_DIV_NORM
-                temp_e = norm_l(secDiag[jCh]);
-                secDiag[jCh] = L_shl(secDiag[jCh], temp_e);
-                secDiag_exp[jCh] = sub(secDiag_exp[jCh], temp_e);
+                temp_e = norm_l( secDiag[jCh] );
+                secDiag[jCh] = L_shl( secDiag[jCh], temp_e );
+                secDiag_exp[jCh] = sub( secDiag_exp[jCh], temp_e );
 #endif
                 move16();
 #endif
@@ -2010,8 +2012,8 @@ static void singularVectorsAccumulationLeft_fx(
         IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/
         {
 #ifdef FIX_1010_OPT_DIV
-            t_ii = BASOP_Util_Inv32(maxWithSign_fx(t_ii), &temp_exp);
-            t_ii_e = sub(temp_exp, t_ii_e);
+            t_ii = BASOP_Util_Inv32( maxWithSign_fx( t_ii ), &temp_exp );
+            t_ii_e = sub( temp_exp, t_ii_e );
 #else
             t_ii = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, maxWithSign_fx( t_ii ), &temp_exp ); /* exp(1 + (temp_exp + tii_e)) */
             t_ii_e = add( 1, sub( temp_exp, t_ii_e ) );
@@ -2028,8 +2030,8 @@ static void singularVectorsAccumulationLeft_fx(
                     norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( sing_exp2[k][nCh], sing_exp2[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */
                 }
 #ifdef FIX_1010_OPT_DIV
-                t_jj = BASOP_Util_Inv32(maxWithSign_fx(singularVectors_Left[nCh][nCh]), &temp_exp);
-                t_jj = Mpy_32_32(Mpy_32_32( t_ii, norm_y ), t_jj);
+                t_jj = BASOP_Util_Inv32( maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp );
+                t_jj = Mpy_32_32( Mpy_32_32( t_ii, norm_y ), t_jj );
                 t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) );
 #else
                 t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e,
@@ -2176,8 +2178,8 @@ static void singularVectorsAccumulationRight_fx(
                 FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC*/
                 {
 #ifdef FIX_1010_OPT_DIV
-                    ratio_float = L_deposit_h(BASOP_Util_Divide3232_Scale( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 )); /* exp(temp_exp1) */
-                    singularVectors_Right[iCh][nCh] = L_deposit_h(BASOP_Util_Divide3232_Scale( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] ));               /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */
+                    ratio_float = L_deposit_h( BASOP_Util_Divide3232_Scale( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ) ); /* exp(temp_exp1) */
+                    singularVectors_Right[iCh][nCh] = L_deposit_h( BASOP_Util_Divide3232_Scale( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] ) );               /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */
 #else
                     ratio_float = BASOP_Util_Divide3232_Scale_cadence( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ); /* exp(temp_exp1) */
                     singularVectors_Right[iCh][nCh] = BASOP_Util_Divide3232_Scale_cadence( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] );               /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */
@@ -2307,14 +2309,14 @@ static void GivensRotation2_fx(
 {
     Word32 r;
 
-    r = BASOP_Util_Add_Mant32Exp( Mpy_32_32(z, z), shl(z_e, 1), Mpy_32_32(x, x), shl(x_e, 1), out_e );
-    r = L_max(r, 1);
+    r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e );
+    r = L_max( r, 1 );
     *outInv_e = *out_e;
     move16();
-    *result = Sqrt32(r, out_e);
+    *result = Sqrt32( r, out_e );
     move32();
 
-    *resultInv = ISqrt32(r, outInv_e);
+    *resultInv = ISqrt32( r, outInv_e );
     move32();
 }
 #endif
@@ -2336,8 +2338,8 @@ static Word32 GivensRotation_fx(
 #endif
 
 #ifdef FIX_1010_OPT_GIVENS
-    r = BASOP_Util_Add_Mant32Exp( Mpy_32_32(z, z), shl(z_e, 1), Mpy_32_32(x, x), shl(x_e, 1), out_e );
-    r = Sqrt32(r, out_e);
+    r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e );
+    r = Sqrt32( r, out_e );
 #else
     x_abs = L_abs( x );
     z_abs = L_abs( z );
-- 
GitLab


From 578055a56aff640c37008251a37a772bd4bc371e Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Thu, 5 Dec 2024 18:40:13 +0100
Subject: [PATCH 09/41] Use alpha max plus beta min approximation for Givens
 Rotation. This algorithm does not require squaring nor root square and is
 hopefully numerically more stable, but requires a data table which size
 determines the precision. The pipeline result will tell if this has any
 future.

---
 lib_dec/ivas_svd_dec.c | 296 ++++++++++++++++++++++++++++++++++++++---
 1 file changed, 279 insertions(+), 17 deletions(-)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index 67a38f9e3..6386bf82d 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -65,10 +65,10 @@
 
 #if 1
 #define FIX_1010_OPT_DIV
-#define FIX_1010_OPT_DIV_NORM /* precision improvement */
 
 #define FIX_1010_OPT_GIVENS
 #define FIX_1010_OPT_GIVENS_INV
+#define FIX_1010_OPT_GIVENS_AMAX_BMIN
 #endif
 
 /*-----------------------------------------------------------------------*
@@ -409,6 +409,204 @@ void svdMat2mat(
 }
 #endif
 
+//#define MORE_DEBUG
+
+#ifdef MORE_DEBUG
+
+#if (MAX_INPUT_CHANNELS > MAX_OUTPUT_CHANNELS)
+#define MAX_MATRIX MAX_INPUT_CHANNELS
+#else
+#define MAX_MATRIX MAX_OUTPUT_CHANNELS
+#endif
+
+static void matrixFx2Fl(
+    float r[][MAX_MATRIX],
+    const Word32 a[][MAX_MATRIX],
+    const Word16 a_e[MAX_MATRIX],
+    const int adim1,
+    const int adim2)
+{
+    for (int i1=0; i1<adim1; i1++) {
+        for (int i2=0; i2<adim2; i2++) {
+            r[i1][i2] = (float)a[i1][i2] * powf(2.f, a_e[i2]-31);
+        }
+    }
+}
+
+static void matrixProduct(
+    float r[][MAX_MATRIX],
+    const float a[][MAX_MATRIX],
+    const float b[][MAX_MATRIX],
+    const int adim1,
+    const int adim2,
+    const int bdim1,
+    const int bdim2)
+{
+    assert(adim2 == bdim1);
+
+    for (int i1=0; i1<adim1; i1++) {
+        for (int i2=0; i2<bdim2; i2++) {
+            r[i1][i2] = 0.f;
+            for (int i3=0; i3<bdim1; i3++) {
+                r[i1][i2] += a[i1][i3] * b[i3][i2];
+            }
+        }
+    }
+}
+
+static void matrixTranspose(
+    float r[][MAX_MATRIX],
+    const float a[][MAX_MATRIX],
+    const int adim1,
+    const int adim2)
+{
+    for (int i1=0; i1<adim1; i1++) {
+        for (int i2=0; i2<adim2; i2++) {
+            r[i2][i1] = a[i1][i2];
+        }
+    }
+}
+
+static void matrixDiagonal(
+    float r[][MAX_MATRIX],
+    const float a[MAX_MATRIX],
+    const int dim)
+{
+    for (int i1=0; i1<dim; i1++) {
+        for (int i2=0; i2<dim; i2++) {
+            r[i1][i2] = 0;
+        }
+        r[i1][i1] = a[i1];
+    }
+}
+
+static float matrixDifference(
+    const float a[][MAX_MATRIX],
+    const float b[][MAX_MATRIX],
+    const int dim1,
+    const int dim2)
+{
+    float r = 0.f;
+
+    for (int i1=0; i1<dim1; i1++) {
+        for (int i2=0; i2<dim2; i2++) {
+            r += fabsf((b[i1][i2] - a[i1][i2])/a[i1][i2]);
+        }
+    }
+
+    return r/(float)(dim1*dim2);
+}
+
+static void matrixPrint(
+    const float a[][MAX_MATRIX],
+    const int dim1,
+    const int dim2,
+    const char *name)
+{
+    printf("Matrix %s[%d][%d] = \n", name, dim1, dim2);
+    for (int i1=0; i1<dim1; i1++) {
+        for (int i2=0; i2<dim2; i2++) {
+            printf("%f, ", a[i1][i2]);
+        }
+        printf("\n");
+    }
+}
+
+static float matrixTestIdentity(
+    const float a[][MAX_MATRIX],
+    const int dim)
+{
+    float r = 0.f;
+
+    for (int i1=0; i1<dim; i1++) {
+        for (int i2=0; i2<dim; i2++) {
+            if (i1 == i2) {
+                r += fabsf(1.f - a[i1][i2]);
+            } else {
+                r += fabsf(0.f - a[i1][i2]);
+            }
+        }
+    }
+
+    return r;
+}
+
+static void svd_accuracy_test_fx(
+    Word32 InputMatrixFx[][MAX_OUTPUT_CHANNELS], /* i  : matrix to be decomposed (M)            InputMatrix_e*/
+    Word16 InputMatrixFx_e,
+    Word32 singularVectors_LeftFx[][MAX_OUTPUT_CHANNELS],  /* o  : left singular vectors (U)			Q31 */
+    Word32 singularValuesFx[MAX_OUTPUT_CHANNELS],          /* o  : singular values vector (S)         singularValues_fx_e*/
+    Word32 singularVectors_RightFx[][MAX_OUTPUT_CHANNELS], /* o  : right singular vectors (V)			Q31 */
+    Word16 singularValuesFx_e[MAX_OUTPUT_CHANNELS],
+    const Word16 nChannelsL, /* i  : number of rows in the matrix to be decomposed		Q0*/
+    const Word16 nChannelsC  /* i  : number of columns in the matrix to be decomposed	Q0*/
+    )
+{
+    float tmp1[MAX_MATRIX][MAX_MATRIX];
+    float tmp2[MAX_MATRIX][MAX_MATRIX];
+    float tmp3[MAX_MATRIX][MAX_MATRIX];
+    float InputMatrix[MAX_MATRIX][MAX_MATRIX];
+
+    Word16 singularValuesFx2_e[MAX_OUTPUT_CHANNELS];
+
+    float singularVectors_Left[MAX_INPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
+    float singularValues[MAX_MATRIX];
+    float singularValuesMatrix[MAX_MATRIX][MAX_MATRIX];
+    float singularVectors_Right[MAX_INPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
+    float result;
+    int dimSingular;
+
+        /* Convert to float and Create singular values matrix from signular values vector */
+        for (int x=0; x<MAX_MATRIX; x++) singularValuesFx2_e[x] = InputMatrixFx_e;
+        matrixFx2Fl(InputMatrix, InputMatrixFx, singularValuesFx2_e, nChannelsL, nChannelsC);
+        dimSingular = min(nChannelsL, nChannelsC);
+        matrixFx2Fl(singularValues, singularValuesFx, singularValuesFx_e, 1, nChannelsC);
+        for (int x=0; x<MAX_MATRIX; x++) singularValuesFx2_e[x] = 0 ;
+        matrixFx2Fl(singularVectors_Left, singularVectors_LeftFx, singularValuesFx2_e, nChannelsL, nChannelsC);
+        matrixFx2Fl(singularVectors_Right, singularVectors_RightFx, singularValuesFx2_e, nChannelsC, nChannelsC);
+        matrixDiagonal(singularValuesMatrix, singularValues, dimSingular); /* CxC */
+
+#ifdef MORE_DEBUG
+        matrixPrint(InputMatrix, nChannelsL, nChannelsC, "A");
+        printf("Result of svd() \n");
+        matrixPrint(singularVectors_Left, nChannelsL, nChannelsC, "U");
+        matrixPrint(singularValuesMatrix, nChannelsC, nChannelsC, "S");
+        matrixPrint(singularVectors_Right, nChannelsC, nChannelsC, "V");
+#endif
+
+        printf("\nResult quality tests\n\n");
+
+        /* Test U' * U == I */
+        matrixTranspose(tmp1, singularVectors_Left, nChannelsL, nChannelsC); /* CxL */
+        matrixProduct(tmp2, tmp1, singularVectors_Left, nChannelsC, nChannelsL, nChannelsL, nChannelsC); /* CxC */
+        result = matrixTestIdentity(tmp2, nChannelsC);
+#ifdef MORE_DEBUG
+        matrixPrint(tmp2, nChannelsC, nChannelsC, "U\'*U");
+#endif
+        printf("U' * U difference to I is %f\n", result);
+
+        /* Test V * V' == I */
+        matrixTranspose(tmp1, singularVectors_Right, nChannelsC, nChannelsC); /* CxC */
+        matrixProduct(tmp2, singularVectors_Right, tmp1, nChannelsC, nChannelsC, nChannelsC, nChannelsC); /* CxC */
+        result = matrixTestIdentity(tmp2, nChannelsC);
+#ifdef MORE_DEBUG
+        matrixPrint(tmp2, nChannelsC, nChannelsC, "V*V\'");
+#endif
+        printf("V * V' difference to I is %f\n", result);
+
+        /* Test InputMatrix == U * S * V' */
+        matrixProduct(tmp1, singularVectors_Left, singularValuesMatrix, nChannelsL, nChannelsC, dimSingular, dimSingular); /* LxC */
+        matrixTranspose(tmp3, singularVectors_Right, nChannelsC, nChannelsC); /* CxC */
+        matrixProduct(tmp2, tmp1, tmp3, nChannelsL, dimSingular, nChannelsC, nChannelsC); /* LxC */
+        result = matrixDifference(tmp2, InputMatrix, nChannelsL, nChannelsC);
+#ifdef MORE_DEBUG
+        matrixPrint(tmp2, nChannelsL, nChannelsC, "U*S*V\'");
+#endif
+        printf("U * S * V' difference to M is %f\n", result);
+
+}
+#endif
+
 /*-------------------------------------------------------------------------
  * svd()
  *
@@ -518,6 +716,18 @@ Word16 svd_fx(
     WHILE( EQ_16( condition, 1 ) );
 
     pop_wmops();
+#ifdef MORE_DEBUG
+    svd_accuracy_test_fx(
+        InputMatrix,
+        InputMatrix_e,
+        singularVectors_Left_fx,
+        singularValues_fx,
+        singularVectors_Right_fx,
+        singularValues_fx_e,
+        nChannelsL,
+        nChannelsC
+    );
+#endif
     return ( errorMessage );
 }
 
@@ -1475,11 +1685,9 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
 #ifdef FIX_1010_OPT_DIV
         Word16 invVal_e, temp_e;
         Word32 invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e );
-#ifdef FIX_1010_OPT_DIV_NORM
         temp_e = norm_l( invVal );
         invVal = L_shl( invVal, temp_e );
         invVal_e = sub( invVal_e, temp_e );
-#endif
 #endif
         norm_x = 0;
         move32();
@@ -1494,11 +1702,9 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
             singularVectors[jCh][currChannel] = L_shl( singularVectors[jCh][currChannel], temp_e );
             singularVectors[jCh][currChannel] = Mpy_32_32( singularVectors[jCh][currChannel], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
             sing_exp[jCh] = sub( invVal_e, temp_e );
-#ifdef FIX_1010_OPT_DIV_NORM
             temp_e = norm_l( singularVectors[jCh][currChannel] );
             singularVectors[jCh][currChannel] = L_shl( singularVectors[jCh][currChannel], temp_e );
             sing_exp[jCh] = sub( sing_exp[jCh], temp_e );
-#endif
             move16();
 #endif
             move32();
@@ -1539,11 +1745,9 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
 
 #ifdef FIX_1010_OPT_DIV
         invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e );
-#ifdef FIX_1010_OPT_DIV_NORM
         temp_e = norm_l( invVal );
         invVal = L_shl( invVal, temp_e );
         invVal_e = sub( invVal_e, temp_e );
-#endif
 #endif
 
         FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
@@ -1563,11 +1767,9 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
 #else
             f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
             f_e = add( invVal_e, sub( norm_x_e, r_e ) );
-#ifdef FIX_1010_OPT_DIV_NORM
             temp_e = norm_l( f );
             f = L_shl( f, temp_e );
             f_e = sub( f_e, temp_e );
-#endif
 #endif
 
             FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
@@ -1750,11 +1952,9 @@ static void biDiagonalReductionRight_fx(
 #ifdef FIX_1010_OPT_DIV
             Word16 invVal_e, temp_e;
             Word32 invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e );
-#ifdef FIX_1010_OPT_DIV_NORM
             temp_e = norm_l( invVal );
             invVal = L_shl( invVal, temp_e );
             invVal_e = sub( invVal_e, temp_e );
-#endif
 #endif
             FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */
             {
@@ -1766,11 +1966,9 @@ static void biDiagonalReductionRight_fx(
                 singularVectors[currChannel][jCh] = Mpy_32_32( singularVectors[currChannel][jCh], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
                 sing_exp[jCh] = sub( invVal_e, temp_e );
                 move16();
-#ifdef FIX_1010_OPT_DIV_NORM
                 temp_e = norm_l( singularVectors[currChannel][jCh] );
                 singularVectors[currChannel][jCh] = L_shl( singularVectors[currChannel][jCh], temp_e );
                 sing_exp[jCh] = sub( sing_exp[jCh], temp_e );
-#endif
 #endif
                 move32();
                 sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) );
@@ -1805,11 +2003,9 @@ static void biDiagonalReductionRight_fx(
 
 #ifdef FIX_1010_OPT_DIV
             invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e );
-#ifdef FIX_1010_OPT_DIV_NORM
             temp_e = norm_l( invVal );
             invVal = L_shl( invVal, temp_e );
             invVal_e = sub( invVal_e, temp_e );
-#endif
 #endif
 
             FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
@@ -1821,11 +2017,9 @@ static void biDiagonalReductionRight_fx(
                 secDiag[jCh] = L_shl( singularVectors[currChannel][jCh], temp_e );
                 secDiag[jCh] = Mpy_32_32( secDiag[jCh], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
                 secDiag_exp[jCh] = sub( invVal_e, temp_e );
-#ifdef FIX_1010_OPT_DIV_NORM
                 temp_e = norm_l( secDiag[jCh] );
                 secDiag[jCh] = L_shl( secDiag[jCh], temp_e );
                 secDiag_exp[jCh] = sub( secDiag_exp[jCh], temp_e );
-#endif
                 move16();
 #endif
                 move32();
@@ -2296,6 +2490,46 @@ static void singularVectorsAccumulationRight(
 
 #ifdef IVAS_FLOAT_FIXED
 
+#ifdef FIX_1010_OPT_GIVENS_AMAX_BMIN
+#define NUM_REGIONS 1024
+static Word32 alphaBeta[NUM_REGIONS][2];
+static void get_alpha_beta(Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 *alpha, Word32 *beta)
+{
+    static int init = 0;
+
+    if (init == 0) {
+        for (int i=0; i<NUM_REGIONS; i++) {
+            double thetaS, thetaE, thetaM;
+
+            thetaS = M_PI/4. * (double)i/(double)NUM_REGIONS;
+            thetaE = M_PI/4. * (double)(i+1)/(double)NUM_REGIONS;
+            thetaM = M_PI/4. * ((double)i+0.5)/(double)NUM_REGIONS;
+            //alphaBeta[i][0] = FL2WORD32(1./(sin(thetaM)*tan((thetaS+thetaE)/2.)+cos(thetaM)));
+            //alphaBeta[i][1] = FL2WORD32(1./(sin(thetaM)*tan((thetaS+thetaE)/2.)+cos(thetaM)) * tan((thetaS+thetaE)/2.));
+            alphaBeta[i][0] = FL2WORD32(2./( ((sin(thetaM) + sin(thetaS))*tan((thetaS+thetaE)/2.)) + cos(thetaM) + cos(thetaS)));
+            alphaBeta[i][1] = FL2WORD32(2./( ((sin(thetaM) + sin(thetaS))*tan((thetaS+thetaE)/2.)) + cos(thetaM) + cos(thetaS)) * tan((thetaS+thetaE)/2.) );
+        }
+        init = 1;
+    }
+    Word16 r, shift;
+#if 0
+    float pf, qf;
+    pf = (float)p * powf(2.f, p_e-31);
+    qf = (float)q * powf(2.f, q_e-31);
+    r = floor((double)NUM_REGIONS * 4. * atan2f(qf, pf)/M_PI);
+#else
+    shift = sub(p_e, q_e);
+    r = mult_r( atan2_fx(L_shr(q, s_max(0, shift)), L_shr(p, s_max(0, negate(shift)))), FL2WORD16_SCALE((float)NUM_REGIONS*4./M_PI, 14));
+#endif
+    if (r == NUM_REGIONS) {
+        r =  NUM_REGIONS-1;
+    }
+    assert((r >= 0) && (r < NUM_REGIONS));
+    *alpha = alphaBeta[r][0];
+    *beta = alphaBeta[r][1];
+}
+#endif
+
 #ifdef FIX_1010_OPT_GIVENS_INV
 static void GivensRotation2_fx(
     const Word32 x, /* exp(x_e) */
@@ -2308,7 +2542,32 @@ static void GivensRotation2_fx(
     Word16 *outInv_e )
 {
     Word32 r;
+#ifdef FIX_1010_OPT_GIVENS_AMAX_BMIN
+    Word32 az, ax, a, b;
+
+    ax = L_abs(x);
+    az = L_abs(z);
+    IF (BASOP_Util_Cmp_Mant32Exp(ax, x_e, az, z_e) > 0) {
+        get_alpha_beta(ax, x_e, az, z_e, &a, &b);
+        r = BASOP_Util_Add_Mant32Exp(Mpy_32_32(ax, a), x_e, Mpy_32_32(az, b), z_e, out_e);
+    } ELSE {
+        get_alpha_beta(az, z_e, ax, x_e, &a, &b);
+        r = BASOP_Util_Add_Mant32Exp(Mpy_32_32(az, a), z_e, Mpy_32_32(ax, b), x_e, out_e);
+    }
+    *result = r;
+    move32();
+#if 1
+    *outInv_e = shl(*out_e, 1);
+    *resultInv = ISqrt32( Mpy_32_32(r, r), outInv_e );
+    move32();
+#else
+    *resultInv = L_deposit_h(BASOP_Util_Divide3232_Scale(MAX_32, r, outInv_e));
+    move32();
+    *outInv_e = sub(*outInv_e, *out_e);
+    move16();
+#endif
 
+#else
     r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e );
     r = L_max( r, 1 );
     *outInv_e = *out_e;
@@ -2318,6 +2577,9 @@ static void GivensRotation2_fx(
 
     *resultInv = ISqrt32( r, outInv_e );
     move32();
+#endif
+
+    pop_wmops();
 }
 #endif
 
-- 
GitLab


From b324bfbc47d77ad6236c24d25cc888f34310b283 Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Thu, 5 Dec 2024 18:47:37 +0100
Subject: [PATCH 10/41] Fix missing include and second Givens Rotation case.

---
 lib_dec/ivas_svd_dec.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index 6386bf82d..d64901834 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -2491,6 +2491,7 @@ static void singularVectorsAccumulationRight(
 #ifdef IVAS_FLOAT_FIXED
 
 #ifdef FIX_1010_OPT_GIVENS_AMAX_BMIN
+#include <math.h> /* for M_PI */
 #define NUM_REGIONS 1024
 static Word32 alphaBeta[NUM_REGIONS][2];
 static void get_alpha_beta(Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 *alpha, Word32 *beta)
@@ -2600,8 +2601,22 @@ static Word32 GivensRotation_fx(
 #endif
 
 #ifdef FIX_1010_OPT_GIVENS
+#ifdef FIX_1010_OPT_GIVENS_AMAX_BMIN
+    Word32 az, ax, a, b;
+
+    ax = L_abs(x);
+    az = L_abs(z);
+    IF (BASOP_Util_Cmp_Mant32Exp(ax, x_e, az, z_e) > 0) {
+        get_alpha_beta(ax, x_e, az, z_e, &a, &b);
+        r = BASOP_Util_Add_Mant32Exp(Mpy_32_32(ax, a), x_e, Mpy_32_32(az, b), z_e, out_e);
+    } ELSE {
+        get_alpha_beta(az, z_e, ax, x_e, &a, &b);
+        r = BASOP_Util_Add_Mant32Exp(Mpy_32_32(az, a), z_e, Mpy_32_32(ax, b), x_e, out_e);
+    }
+#else
     r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e );
     r = Sqrt32( r, out_e );
+#endif
 #else
     x_abs = L_abs( x );
     z_abs = L_abs( z );
-- 
GitLab


From d46a16dbffd5301cd8ab0f0d06bb4b59b486f774 Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Thu, 5 Dec 2024 18:56:19 +0100
Subject: [PATCH 11/41] define M_PI for the time being.

---
 lib_dec/ivas_svd_dec.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index d64901834..b9ab85205 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -2491,7 +2491,9 @@ static void singularVectorsAccumulationRight(
 #ifdef IVAS_FLOAT_FIXED
 
 #ifdef FIX_1010_OPT_GIVENS_AMAX_BMIN
-#include <math.h> /* for M_PI */
+#ifndef M_PI
+#define M_PI 3.141592653589793 
+#endif
 #define NUM_REGIONS 1024
 static Word32 alphaBeta[NUM_REGIONS][2];
 static void get_alpha_beta(Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 *alpha, Word32 *beta)
-- 
GitLab


From 95b2b53084d111db3d9957e2c36bda3bfdd3ecb9 Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Mon, 9 Dec 2024 12:51:28 +0100
Subject: [PATCH 12/41] Fix: remove stray pop_wmops

---
 lib_dec/ivas_svd_dec.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index 1d9e1732e..2712042c0 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -1860,8 +1860,6 @@ static void GivensRotation2_fx(
     *resultInv = ISqrt32( r, outInv_e );
     move32();
 #endif
-
-    pop_wmops();
 }
 #endif
 
-- 
GitLab


From ee9a52242e0a6427501673b99df94b0087510a71 Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Tue, 10 Dec 2024 16:36:50 +0100
Subject: [PATCH 13/41] Disable FIX_1010_OPT_DIV for testing.

---
 lib_dec/ivas_svd_dec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index 2712042c0..b1b8dbe70 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -52,7 +52,7 @@
 #define CONVERGENCE_FACTOR_FX       214748 /* factor for SVD convergence (as per latest float code: 1.0e-04f) */
 
 #if 1
-#define FIX_1010_OPT_DIV
+//#define FIX_1010_OPT_DIV
 
 #define FIX_1010_OPT_GIVENS
 #define FIX_1010_OPT_GIVENS_INV
-- 
GitLab


From 707aa4289029202111b30c3204c1d6c8efbed650 Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Wed, 11 Dec 2024 17:44:57 +0100
Subject: [PATCH 14/41] Activate division optimizations except one which for
 some reasons causes more error in testset. Optimize get_alpha_beta() index
 calculation, more precision and less WMOPS.

---
 lib_dec/ivas_svd_dec.c | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index b1b8dbe70..07c2b3100 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -52,7 +52,7 @@
 #define CONVERGENCE_FACTOR_FX       214748 /* factor for SVD convergence (as per latest float code: 1.0e-04f) */
 
 #if 1
-//#define FIX_1010_OPT_DIV
+#define FIX_1010_OPT_DIV
 
 #define FIX_1010_OPT_GIVENS
 #define FIX_1010_OPT_GIVENS_INV
@@ -1430,7 +1430,7 @@ static void biDiagonalReductionRight_fx(
 #endif
             FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */
             {
-#ifndef FIX_1010_OPT_DIV
+#ifndef FIX_1010_OPT_DIV_no
                 singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e))  */
 #else
                 temp_e = norm_l( singularVectors[currChannel][jCh] );
@@ -1773,7 +1773,7 @@ static void singularVectorsAccumulationRight_fx(
 #ifndef M_PI
 #define M_PI 3.141592653589793 
 #endif
-#define NUM_REGIONS 1024
+#define NUM_REGIONS 32
 static Word32 alphaBeta[NUM_REGIONS][2];
 static void get_alpha_beta(Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 *alpha, Word32 *beta)
 {
@@ -1799,13 +1799,22 @@ static void get_alpha_beta(Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 *a
     pf = (float)p * powf(2.f, p_e-31);
     qf = (float)q * powf(2.f, q_e-31);
     r = floor((double)NUM_REGIONS * 4. * atan2f(qf, pf)/M_PI);
-#else
-    shift = sub(p_e, q_e);
-    r = mult_r( atan2_fx(L_shr(q, s_max(0, shift)), L_shr(p, s_max(0, negate(shift)))), FL2WORD16_SCALE((float)NUM_REGIONS*4./M_PI, 14));
-#endif
-    if (r == NUM_REGIONS) {
+    if (r >= NUM_REGIONS) {
         r =  NUM_REGIONS-1;
     }
+#elif 1
+    shift = sub(norm_l(q),1);
+    q = L_shl(q, shift);
+    q_e = sub(q_e, shift);
+    shift = norm_l(p);
+    p = L_shl(p, shift);
+    p_e = sub(p_e, shift);
+    shift = sub(q_e, p_e);
+    r = shl(div_s(extract_h(q), extract_h(p)), shift);
+    /* Second order polyfit of atan(r)/(pi/4) for r=0..1 */
+    r = add(add(mult(mult(r,r), FL2WORD16_SCALE(-3.672563685340096e-01, 3)), mult(r, FL2WORD16_SCALE(1.375369641423651e+00, 3))), FL2WORD16_SCALE(-6.529424378422714e-03, 3));
+    r = s_min(s_max(0, shr(r, 4+3)), NUM_REGIONS-1);
+#endif
     assert((r >= 0) && (r < NUM_REGIONS));
     *alpha = alphaBeta[r][0];
     *beta = alphaBeta[r][1];
@@ -1840,7 +1849,7 @@ static void GivensRotation2_fx(
     move32();
 #if 1
     *outInv_e = shl(*out_e, 1);
-    *resultInv = ISqrt32( Mpy_32_32(r, r), outInv_e );
+    *resultInv = ISqrt32( L_max(1, Mpy_32_32(r, r)), outInv_e );
     move32();
 #else
     *resultInv = L_deposit_h(BASOP_Util_Divide3232_Scale(MAX_32, r, outInv_e));
-- 
GitLab


From adacb91ce9de16704331eecda0f3e09ea3c1600a Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Thu, 12 Dec 2024 13:46:11 +0100
Subject: [PATCH 15/41] Apply clang-format. Disable
 FIX_1010_OPT_GIVENS_AMAX_BMIN and reactivate all FIX_1010_OPT_DIV for
 testing.

---
 lib_dec/ivas_svd_dec.c | 258 +++++++++++++++++++++++------------------
 1 file changed, 142 insertions(+), 116 deletions(-)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index 07c2b3100..da9140857 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -56,7 +56,7 @@
 
 #define FIX_1010_OPT_GIVENS
 #define FIX_1010_OPT_GIVENS_INV
-#define FIX_1010_OPT_GIVENS_AMAX_BMIN
+//#define FIX_1010_OPT_GIVENS_AMAX_BMIN
 #endif
 
 /*-----------------------------------------------------------------------*
@@ -274,11 +274,11 @@ void svdMat2mat_fx(
     return;
 }
 
-//#define MORE_DEBUG
+// #define MORE_DEBUG
 
 #ifdef MORE_DEBUG
 
-#if (MAX_INPUT_CHANNELS > MAX_OUTPUT_CHANNELS)
+#if ( MAX_INPUT_CHANNELS > MAX_OUTPUT_CHANNELS )
 #define MAX_MATRIX MAX_INPUT_CHANNELS
 #else
 #define MAX_MATRIX MAX_OUTPUT_CHANNELS
@@ -289,11 +289,13 @@ static void matrixFx2Fl(
     const Word32 a[][MAX_MATRIX],
     const Word16 a_e[MAX_MATRIX],
     const int adim1,
-    const int adim2)
+    const int adim2 )
 {
-    for (int i1=0; i1<adim1; i1++) {
-        for (int i2=0; i2<adim2; i2++) {
-            r[i1][i2] = (float)a[i1][i2] * powf(2.f, a_e[i2]-31);
+    for ( int i1 = 0; i1 < adim1; i1++ )
+    {
+        for ( int i2 = 0; i2 < adim2; i2++ )
+        {
+            r[i1][i2] = (float) a[i1][i2] * powf( 2.f, a_e[i2] - 31 );
         }
     }
 }
@@ -305,14 +307,17 @@ static void matrixProduct(
     const int adim1,
     const int adim2,
     const int bdim1,
-    const int bdim2)
+    const int bdim2 )
 {
-    assert(adim2 == bdim1);
+    assert( adim2 == bdim1 );
 
-    for (int i1=0; i1<adim1; i1++) {
-        for (int i2=0; i2<bdim2; i2++) {
+    for ( int i1 = 0; i1 < adim1; i1++ )
+    {
+        for ( int i2 = 0; i2 < bdim2; i2++ )
+        {
             r[i1][i2] = 0.f;
-            for (int i3=0; i3<bdim1; i3++) {
+            for ( int i3 = 0; i3 < bdim1; i3++ )
+            {
                 r[i1][i2] += a[i1][i3] * b[i3][i2];
             }
         }
@@ -323,10 +328,12 @@ static void matrixTranspose(
     float r[][MAX_MATRIX],
     const float a[][MAX_MATRIX],
     const int adim1,
-    const int adim2)
+    const int adim2 )
 {
-    for (int i1=0; i1<adim1; i1++) {
-        for (int i2=0; i2<adim2; i2++) {
+    for ( int i1 = 0; i1 < adim1; i1++ )
+    {
+        for ( int i2 = 0; i2 < adim2; i2++ )
+        {
             r[i2][i1] = a[i1][i2];
         }
     }
@@ -335,10 +342,12 @@ static void matrixTranspose(
 static void matrixDiagonal(
     float r[][MAX_MATRIX],
     const float a[MAX_MATRIX],
-    const int dim)
+    const int dim )
 {
-    for (int i1=0; i1<dim; i1++) {
-        for (int i2=0; i2<dim; i2++) {
+    for ( int i1 = 0; i1 < dim; i1++ )
+    {
+        for ( int i2 = 0; i2 < dim; i2++ )
+        {
             r[i1][i2] = 0;
         }
         r[i1][i1] = a[i1];
@@ -349,46 +358,55 @@ static float matrixDifference(
     const float a[][MAX_MATRIX],
     const float b[][MAX_MATRIX],
     const int dim1,
-    const int dim2)
+    const int dim2 )
 {
     float r = 0.f;
 
-    for (int i1=0; i1<dim1; i1++) {
-        for (int i2=0; i2<dim2; i2++) {
-            r += fabsf((b[i1][i2] - a[i1][i2])/a[i1][i2]);
+    for ( int i1 = 0; i1 < dim1; i1++ )
+    {
+        for ( int i2 = 0; i2 < dim2; i2++ )
+        {
+            r += fabsf( ( b[i1][i2] - a[i1][i2] ) / a[i1][i2] );
         }
     }
 
-    return r/(float)(dim1*dim2);
+    return r / (float) ( dim1 * dim2 );
 }
 
 static void matrixPrint(
     const float a[][MAX_MATRIX],
     const int dim1,
     const int dim2,
-    const char *name)
+    const char *name )
 {
-    printf("Matrix %s[%d][%d] = \n", name, dim1, dim2);
-    for (int i1=0; i1<dim1; i1++) {
-        for (int i2=0; i2<dim2; i2++) {
-            printf("%f, ", a[i1][i2]);
+    printf( "Matrix %s[%d][%d] = \n", name, dim1, dim2 );
+    for ( int i1 = 0; i1 < dim1; i1++ )
+    {
+        for ( int i2 = 0; i2 < dim2; i2++ )
+        {
+            printf( "%f, ", a[i1][i2] );
         }
-        printf("\n");
+        printf( "\n" );
     }
 }
 
 static float matrixTestIdentity(
     const float a[][MAX_MATRIX],
-    const int dim)
+    const int dim )
 {
     float r = 0.f;
 
-    for (int i1=0; i1<dim; i1++) {
-        for (int i2=0; i2<dim; i2++) {
-            if (i1 == i2) {
-                r += fabsf(1.f - a[i1][i2]);
-            } else {
-                r += fabsf(0.f - a[i1][i2]);
+    for ( int i1 = 0; i1 < dim; i1++ )
+    {
+        for ( int i2 = 0; i2 < dim; i2++ )
+        {
+            if ( i1 == i2 )
+            {
+                r += fabsf( 1.f - a[i1][i2] );
+            }
+            else
+            {
+                r += fabsf( 0.f - a[i1][i2] );
             }
         }
     }
@@ -405,7 +423,7 @@ static void svd_accuracy_test_fx(
     Word16 singularValuesFx_e[MAX_OUTPUT_CHANNELS],
     const Word16 nChannelsL, /* i  : number of rows in the matrix to be decomposed		Q0*/
     const Word16 nChannelsC  /* i  : number of columns in the matrix to be decomposed	Q0*/
-    )
+)
 {
     float tmp1[MAX_MATRIX][MAX_MATRIX];
     float tmp2[MAX_MATRIX][MAX_MATRIX];
@@ -421,54 +439,55 @@ static void svd_accuracy_test_fx(
     float result;
     int dimSingular;
 
-        /* Convert to float and Create singular values matrix from signular values vector */
-        for (int x=0; x<MAX_MATRIX; x++) singularValuesFx2_e[x] = InputMatrixFx_e;
-        matrixFx2Fl(InputMatrix, InputMatrixFx, singularValuesFx2_e, nChannelsL, nChannelsC);
-        dimSingular = min(nChannelsL, nChannelsC);
-        matrixFx2Fl(singularValues, singularValuesFx, singularValuesFx_e, 1, nChannelsC);
-        for (int x=0; x<MAX_MATRIX; x++) singularValuesFx2_e[x] = 0 ;
-        matrixFx2Fl(singularVectors_Left, singularVectors_LeftFx, singularValuesFx2_e, nChannelsL, nChannelsC);
-        matrixFx2Fl(singularVectors_Right, singularVectors_RightFx, singularValuesFx2_e, nChannelsC, nChannelsC);
-        matrixDiagonal(singularValuesMatrix, singularValues, dimSingular); /* CxC */
+    /* Convert to float and Create singular values matrix from signular values vector */
+    for ( int x = 0; x < MAX_MATRIX; x++ )
+        singularValuesFx2_e[x] = InputMatrixFx_e;
+    matrixFx2Fl( InputMatrix, InputMatrixFx, singularValuesFx2_e, nChannelsL, nChannelsC );
+    dimSingular = min( nChannelsL, nChannelsC );
+    matrixFx2Fl( singularValues, singularValuesFx, singularValuesFx_e, 1, nChannelsC );
+    for ( int x = 0; x < MAX_MATRIX; x++ )
+        singularValuesFx2_e[x] = 0;
+    matrixFx2Fl( singularVectors_Left, singularVectors_LeftFx, singularValuesFx2_e, nChannelsL, nChannelsC );
+    matrixFx2Fl( singularVectors_Right, singularVectors_RightFx, singularValuesFx2_e, nChannelsC, nChannelsC );
+    matrixDiagonal( singularValuesMatrix, singularValues, dimSingular ); /* CxC */
 
 #ifdef MORE_DEBUG
-        matrixPrint(InputMatrix, nChannelsL, nChannelsC, "A");
-        printf("Result of svd() \n");
-        matrixPrint(singularVectors_Left, nChannelsL, nChannelsC, "U");
-        matrixPrint(singularValuesMatrix, nChannelsC, nChannelsC, "S");
-        matrixPrint(singularVectors_Right, nChannelsC, nChannelsC, "V");
+    matrixPrint( InputMatrix, nChannelsL, nChannelsC, "A" );
+    printf( "Result of svd() \n" );
+    matrixPrint( singularVectors_Left, nChannelsL, nChannelsC, "U" );
+    matrixPrint( singularValuesMatrix, nChannelsC, nChannelsC, "S" );
+    matrixPrint( singularVectors_Right, nChannelsC, nChannelsC, "V" );
 #endif
 
-        printf("\nResult quality tests\n\n");
+    printf( "\nResult quality tests\n\n" );
 
-        /* Test U' * U == I */
-        matrixTranspose(tmp1, singularVectors_Left, nChannelsL, nChannelsC); /* CxL */
-        matrixProduct(tmp2, tmp1, singularVectors_Left, nChannelsC, nChannelsL, nChannelsL, nChannelsC); /* CxC */
-        result = matrixTestIdentity(tmp2, nChannelsC);
+    /* Test U' * U == I */
+    matrixTranspose( tmp1, singularVectors_Left, nChannelsL, nChannelsC );                             /* CxL */
+    matrixProduct( tmp2, tmp1, singularVectors_Left, nChannelsC, nChannelsL, nChannelsL, nChannelsC ); /* CxC */
+    result = matrixTestIdentity( tmp2, nChannelsC );
 #ifdef MORE_DEBUG
-        matrixPrint(tmp2, nChannelsC, nChannelsC, "U\'*U");
+    matrixPrint( tmp2, nChannelsC, nChannelsC, "U\'*U" );
 #endif
-        printf("U' * U difference to I is %f\n", result);
+    printf( "U' * U difference to I is %f\n", result );
 
-        /* Test V * V' == I */
-        matrixTranspose(tmp1, singularVectors_Right, nChannelsC, nChannelsC); /* CxC */
-        matrixProduct(tmp2, singularVectors_Right, tmp1, nChannelsC, nChannelsC, nChannelsC, nChannelsC); /* CxC */
-        result = matrixTestIdentity(tmp2, nChannelsC);
+    /* Test V * V' == I */
+    matrixTranspose( tmp1, singularVectors_Right, nChannelsC, nChannelsC );                             /* CxC */
+    matrixProduct( tmp2, singularVectors_Right, tmp1, nChannelsC, nChannelsC, nChannelsC, nChannelsC ); /* CxC */
+    result = matrixTestIdentity( tmp2, nChannelsC );
 #ifdef MORE_DEBUG
-        matrixPrint(tmp2, nChannelsC, nChannelsC, "V*V\'");
+    matrixPrint( tmp2, nChannelsC, nChannelsC, "V*V\'" );
 #endif
-        printf("V * V' difference to I is %f\n", result);
+    printf( "V * V' difference to I is %f\n", result );
 
-        /* Test InputMatrix == U * S * V' */
-        matrixProduct(tmp1, singularVectors_Left, singularValuesMatrix, nChannelsL, nChannelsC, dimSingular, dimSingular); /* LxC */
-        matrixTranspose(tmp3, singularVectors_Right, nChannelsC, nChannelsC); /* CxC */
-        matrixProduct(tmp2, tmp1, tmp3, nChannelsL, dimSingular, nChannelsC, nChannelsC); /* LxC */
-        result = matrixDifference(tmp2, InputMatrix, nChannelsL, nChannelsC);
+    /* Test InputMatrix == U * S * V' */
+    matrixProduct( tmp1, singularVectors_Left, singularValuesMatrix, nChannelsL, nChannelsC, dimSingular, dimSingular ); /* LxC */
+    matrixTranspose( tmp3, singularVectors_Right, nChannelsC, nChannelsC );                                              /* CxC */
+    matrixProduct( tmp2, tmp1, tmp3, nChannelsL, dimSingular, nChannelsC, nChannelsC );                                  /* LxC */
+    result = matrixDifference( tmp2, InputMatrix, nChannelsL, nChannelsC );
 #ifdef MORE_DEBUG
-        matrixPrint(tmp2, nChannelsL, nChannelsC, "U*S*V\'");
+    matrixPrint( tmp2, nChannelsL, nChannelsC, "U*S*V\'" );
 #endif
-        printf("U * S * V' difference to M is %f\n", result);
-
+    printf( "U * S * V' difference to M is %f\n", result );
 }
 #endif
 
@@ -589,8 +608,7 @@ Word16 svd_fx(
         singularVectors_Right_fx,
         singularValues_fx_e,
         nChannelsL,
-        nChannelsC
-    );
+        nChannelsC );
 #endif
     return ( errorMessage );
 }
@@ -1771,25 +1789,27 @@ static void singularVectorsAccumulationRight_fx(
 
 #ifdef FIX_1010_OPT_GIVENS_AMAX_BMIN
 #ifndef M_PI
-#define M_PI 3.141592653589793 
+#define M_PI 3.141592653589793
 #endif
 #define NUM_REGIONS 32
 static Word32 alphaBeta[NUM_REGIONS][2];
-static void get_alpha_beta(Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 *alpha, Word32 *beta)
+static void get_alpha_beta( Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 *alpha, Word32 *beta )
 {
     static int init = 0;
 
-    if (init == 0) {
-        for (int i=0; i<NUM_REGIONS; i++) {
+    if ( init == 0 )
+    {
+        for ( int i = 0; i < NUM_REGIONS; i++ )
+        {
             double thetaS, thetaE, thetaM;
 
-            thetaS = M_PI/4. * (double)i/(double)NUM_REGIONS;
-            thetaE = M_PI/4. * (double)(i+1)/(double)NUM_REGIONS;
-            thetaM = M_PI/4. * ((double)i+0.5)/(double)NUM_REGIONS;
-            //alphaBeta[i][0] = FL2WORD32(1./(sin(thetaM)*tan((thetaS+thetaE)/2.)+cos(thetaM)));
-            //alphaBeta[i][1] = FL2WORD32(1./(sin(thetaM)*tan((thetaS+thetaE)/2.)+cos(thetaM)) * tan((thetaS+thetaE)/2.));
-            alphaBeta[i][0] = FL2WORD32(2./( ((sin(thetaM) + sin(thetaS))*tan((thetaS+thetaE)/2.)) + cos(thetaM) + cos(thetaS)));
-            alphaBeta[i][1] = FL2WORD32(2./( ((sin(thetaM) + sin(thetaS))*tan((thetaS+thetaE)/2.)) + cos(thetaM) + cos(thetaS)) * tan((thetaS+thetaE)/2.) );
+            thetaS = M_PI / 4. * (double) i / (double) NUM_REGIONS;
+            thetaE = M_PI / 4. * (double) ( i + 1 ) / (double) NUM_REGIONS;
+            thetaM = M_PI / 4. * ( (double) i + 0.5 ) / (double) NUM_REGIONS;
+            // alphaBeta[i][0] = FL2WORD32(1./(sin(thetaM)*tan((thetaS+thetaE)/2.)+cos(thetaM)));
+            // alphaBeta[i][1] = FL2WORD32(1./(sin(thetaM)*tan((thetaS+thetaE)/2.)+cos(thetaM)) * tan((thetaS+thetaE)/2.));
+            alphaBeta[i][0] = FL2WORD32( 2. / ( ( ( sin( thetaM ) + sin( thetaS ) ) * tan( ( thetaS + thetaE ) / 2. ) ) + cos( thetaM ) + cos( thetaS ) ) );
+            alphaBeta[i][1] = FL2WORD32( 2. / ( ( ( sin( thetaM ) + sin( thetaS ) ) * tan( ( thetaS + thetaE ) / 2. ) ) + cos( thetaM ) + cos( thetaS ) ) * tan( ( thetaS + thetaE ) / 2. ) );
         }
         init = 1;
     }
@@ -1803,19 +1823,19 @@ static void get_alpha_beta(Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 *a
         r =  NUM_REGIONS-1;
     }
 #elif 1
-    shift = sub(norm_l(q),1);
-    q = L_shl(q, shift);
-    q_e = sub(q_e, shift);
-    shift = norm_l(p);
-    p = L_shl(p, shift);
-    p_e = sub(p_e, shift);
-    shift = sub(q_e, p_e);
-    r = shl(div_s(extract_h(q), extract_h(p)), shift);
+    shift = sub( norm_l( q ), 1 );
+    q = L_shl( q, shift );
+    q_e = sub( q_e, shift );
+    shift = norm_l( p );
+    p = L_shl( p, shift );
+    p_e = sub( p_e, shift );
+    shift = sub( q_e, p_e );
+    r = shl( div_s( extract_h( q ), extract_h( p ) ), shift );
     /* Second order polyfit of atan(r)/(pi/4) for r=0..1 */
-    r = add(add(mult(mult(r,r), FL2WORD16_SCALE(-3.672563685340096e-01, 3)), mult(r, FL2WORD16_SCALE(1.375369641423651e+00, 3))), FL2WORD16_SCALE(-6.529424378422714e-03, 3));
-    r = s_min(s_max(0, shr(r, 4+3)), NUM_REGIONS-1);
+    r = add( add( mult( mult( r, r ), FL2WORD16_SCALE( -3.672563685340096e-01, 3 ) ), mult( r, FL2WORD16_SCALE( 1.375369641423651e+00, 3 ) ) ), FL2WORD16_SCALE( -6.529424378422714e-03, 3 ) );
+    r = s_min( s_max( 0, shr( r, 4 + 3 ) ), NUM_REGIONS - 1 );
 #endif
-    assert((r >= 0) && (r < NUM_REGIONS));
+    assert( ( r >= 0 ) && ( r < NUM_REGIONS ) );
     *alpha = alphaBeta[r][0];
     *beta = alphaBeta[r][1];
 }
@@ -1836,25 +1856,28 @@ static void GivensRotation2_fx(
 #ifdef FIX_1010_OPT_GIVENS_AMAX_BMIN
     Word32 az, ax, a, b;
 
-    ax = L_abs(x);
-    az = L_abs(z);
-    IF (BASOP_Util_Cmp_Mant32Exp(ax, x_e, az, z_e) > 0) {
-        get_alpha_beta(ax, x_e, az, z_e, &a, &b);
-        r = BASOP_Util_Add_Mant32Exp(Mpy_32_32(ax, a), x_e, Mpy_32_32(az, b), z_e, out_e);
-    } ELSE {
-        get_alpha_beta(az, z_e, ax, x_e, &a, &b);
-        r = BASOP_Util_Add_Mant32Exp(Mpy_32_32(az, a), z_e, Mpy_32_32(ax, b), x_e, out_e);
+    ax = L_abs( x );
+    az = L_abs( z );
+    IF( BASOP_Util_Cmp_Mant32Exp( ax, x_e, az, z_e ) > 0 )
+    {
+        get_alpha_beta( ax, x_e, az, z_e, &a, &b );
+        r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ax, a ), x_e, Mpy_32_32( az, b ), z_e, out_e );
+    }
+    ELSE
+    {
+        get_alpha_beta( az, z_e, ax, x_e, &a, &b );
+        r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( az, a ), z_e, Mpy_32_32( ax, b ), x_e, out_e );
     }
     *result = r;
     move32();
 #if 1
-    *outInv_e = shl(*out_e, 1);
-    *resultInv = ISqrt32( L_max(1, Mpy_32_32(r, r)), outInv_e );
+    *outInv_e = shl( *out_e, 1 );
+    *resultInv = ISqrt32( L_max( 1, Mpy_32_32( r, r ) ), outInv_e );
     move32();
 #else
-    *resultInv = L_deposit_h(BASOP_Util_Divide3232_Scale(MAX_32, r, outInv_e));
+    *resultInv = L_deposit_h( BASOP_Util_Divide3232_Scale( MAX_32, r, outInv_e ) );
     move32();
-    *outInv_e = sub(*outInv_e, *out_e);
+    *outInv_e = sub( *outInv_e, *out_e );
     move16();
 #endif
 
@@ -1892,14 +1915,17 @@ static Word32 GivensRotation_fx(
 #ifdef FIX_1010_OPT_GIVENS_AMAX_BMIN
     Word32 az, ax, a, b;
 
-    ax = L_abs(x);
-    az = L_abs(z);
-    IF (BASOP_Util_Cmp_Mant32Exp(ax, x_e, az, z_e) > 0) {
-        get_alpha_beta(ax, x_e, az, z_e, &a, &b);
-        r = BASOP_Util_Add_Mant32Exp(Mpy_32_32(ax, a), x_e, Mpy_32_32(az, b), z_e, out_e);
-    } ELSE {
-        get_alpha_beta(az, z_e, ax, x_e, &a, &b);
-        r = BASOP_Util_Add_Mant32Exp(Mpy_32_32(az, a), z_e, Mpy_32_32(ax, b), x_e, out_e);
+    ax = L_abs( x );
+    az = L_abs( z );
+    IF( BASOP_Util_Cmp_Mant32Exp( ax, x_e, az, z_e ) > 0 )
+    {
+        get_alpha_beta( ax, x_e, az, z_e, &a, &b );
+        r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ax, a ), x_e, Mpy_32_32( az, b ), z_e, out_e );
+    }
+    ELSE
+    {
+        get_alpha_beta( az, z_e, ax, x_e, &a, &b );
+        r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( az, a ), z_e, Mpy_32_32( ax, b ), x_e, out_e );
     }
 #else
     r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e );
-- 
GitLab


From 75d25b051f515ffe1d54da4c54f16e0d52f01691 Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Mon, 16 Dec 2024 17:47:26 +0100
Subject: [PATCH 16/41] Tune normalizations under the scope of
 FIX_1010_OPT_DIV. Increase AMAXBMIN interval count to better match reference.

---
 lib_dec/ivas_svd_dec.c | 42 ++++++++++--------------------------------
 1 file changed, 10 insertions(+), 32 deletions(-)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index da9140857..1b8b36c0c 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -56,7 +56,7 @@
 
 #define FIX_1010_OPT_GIVENS
 #define FIX_1010_OPT_GIVENS_INV
-//#define FIX_1010_OPT_GIVENS_AMAX_BMIN
+#define FIX_1010_OPT_GIVENS_AMAX_BMIN
 #endif
 
 /*-----------------------------------------------------------------------*
@@ -997,7 +997,7 @@ static void ApplyQRTransform_fx(
         singularValues[ch] = GivensRotation_fx( d, d_e, r, r_e, &singularValues_e[ch] ); /* exp(singularValues_e) */
         move32();
 #endif
-        IF( GT_32( L_abs( singularValues[ch] ), Mpy_32_32( CONVERGENCE_FACTOR_FX, L_abs( singularValues[ch] ) ) ) )
+        IF (singularValues[ch] != 0)
         {
 #ifndef FIX_1010_OPT_GIVENS_INV
             aux = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, singularValues[ch], &aux_e ); /* exp(aux_e + (1 - singularValues_e)) */
@@ -1154,7 +1154,7 @@ static void HouseholderReduction_fx(
 #ifdef FIX_1010_OPT_DIV
 static Word32 BASOP_Util_Inv32( Word32 x, Word16 *px_e )
 {
-    Word16 sign, shift;
+    Word16 sign, shift, shift2;
 
     sign = 0;
     move16();
@@ -1173,7 +1173,9 @@ static Word32 BASOP_Util_Inv32( Word32 x, Word16 *px_e )
     move16();
     x = ISqrt32norm( x, px_e );
     x = Mpy_32_32( x, x );
-    *px_e = add( shl( *px_e, 1 ), shift );
+    shift2 = norm_l( x );
+    x = L_shl( x, shift2 );
+    *px_e = add( shl( *px_e, 1 ), sub(shift, shift2) );
     move16();
 
     if ( sign )
@@ -1252,9 +1254,6 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
 #ifdef FIX_1010_OPT_DIV
         Word16 invVal_e, temp_e;
         Word32 invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e );
-        temp_e = norm_l( invVal );
-        invVal = L_shl( invVal, temp_e );
-        invVal_e = sub( invVal_e, temp_e );
 #endif
         norm_x = 0;
         move32();
@@ -1269,9 +1268,6 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
             singularVectors[jCh][currChannel] = L_shl( singularVectors[jCh][currChannel], temp_e );
             singularVectors[jCh][currChannel] = Mpy_32_32( singularVectors[jCh][currChannel], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
             sing_exp[jCh] = sub( invVal_e, temp_e );
-            temp_e = norm_l( singularVectors[jCh][currChannel] );
-            singularVectors[jCh][currChannel] = L_shl( singularVectors[jCh][currChannel], temp_e );
-            sing_exp[jCh] = sub( sing_exp[jCh], temp_e );
             move16();
 #endif
             move32();
@@ -1308,9 +1304,6 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
 
 #ifdef FIX_1010_OPT_DIV
         invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e );
-        temp_e = norm_l( invVal );
-        invVal = L_shl( invVal, temp_e );
-        invVal_e = sub( invVal_e, temp_e );
 #endif
 
         FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
@@ -1330,9 +1323,6 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
 #else
             f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
             f_e = add( invVal_e, sub( norm_x_e, r_e ) );
-            temp_e = norm_l( f );
-            f = L_shl( f, temp_e );
-            f_e = sub( f_e, temp_e );
 #endif
 
             FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
@@ -1442,13 +1432,10 @@ static void biDiagonalReductionRight_fx(
 #ifdef FIX_1010_OPT_DIV
             Word16 invVal_e, temp_e;
             Word32 invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e );
-            temp_e = norm_l( invVal );
-            invVal = L_shl( invVal, temp_e );
-            invVal_e = sub( invVal_e, temp_e );
 #endif
             FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */
             {
-#ifndef FIX_1010_OPT_DIV_no
+#ifndef FIX_1010_OPT_DIV
                 singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e))  */
 #else
                 temp_e = norm_l( singularVectors[currChannel][jCh] );
@@ -1456,9 +1443,6 @@ static void biDiagonalReductionRight_fx(
                 singularVectors[currChannel][jCh] = Mpy_32_32( singularVectors[currChannel][jCh], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
                 sing_exp[jCh] = sub( invVal_e, temp_e );
                 move16();
-                temp_e = norm_l( singularVectors[currChannel][jCh] );
-                singularVectors[currChannel][jCh] = L_shl( singularVectors[currChannel][jCh], temp_e );
-                sing_exp[jCh] = sub( sing_exp[jCh], temp_e );
 #endif
                 move32();
                 sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) );
@@ -1493,9 +1477,6 @@ static void biDiagonalReductionRight_fx(
 
 #ifdef FIX_1010_OPT_DIV
             invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e );
-            temp_e = norm_l( invVal );
-            invVal = L_shl( invVal, temp_e );
-            invVal_e = sub( invVal_e, temp_e );
 #endif
 
             FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
@@ -1507,9 +1488,6 @@ static void biDiagonalReductionRight_fx(
                 secDiag[jCh] = L_shl( singularVectors[currChannel][jCh], temp_e );
                 secDiag[jCh] = Mpy_32_32( secDiag[jCh], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
                 secDiag_exp[jCh] = sub( invVal_e, temp_e );
-                temp_e = norm_l( secDiag[jCh] );
-                secDiag[jCh] = L_shl( secDiag[jCh], temp_e );
-                secDiag_exp[jCh] = sub( secDiag_exp[jCh], temp_e );
                 move16();
 #endif
                 move32();
@@ -1791,7 +1769,7 @@ static void singularVectorsAccumulationRight_fx(
 #ifndef M_PI
 #define M_PI 3.141592653589793
 #endif
-#define NUM_REGIONS 32
+#define NUM_REGIONS 128
 static Word32 alphaBeta[NUM_REGIONS][2];
 static void get_alpha_beta( Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 *alpha, Word32 *beta )
 {
@@ -1830,10 +1808,10 @@ static void get_alpha_beta( Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 *
     p = L_shl( p, shift );
     p_e = sub( p_e, shift );
     shift = sub( q_e, p_e );
-    r = shl( div_s( extract_h( q ), extract_h( p ) ), shift );
+    r = shl_sat( div_s( extract_h( q ), extract_h( p ) ), shift );
     /* Second order polyfit of atan(r)/(pi/4) for r=0..1 */
     r = add( add( mult( mult( r, r ), FL2WORD16_SCALE( -3.672563685340096e-01, 3 ) ), mult( r, FL2WORD16_SCALE( 1.375369641423651e+00, 3 ) ) ), FL2WORD16_SCALE( -6.529424378422714e-03, 3 ) );
-    r = s_min( s_max( 0, shr( r, 4 + 3 ) ), NUM_REGIONS - 1 );
+    r = s_min( s_max( 0, shr( r, WORD16_BITS-1-7-3 ) ), NUM_REGIONS - 1 );
 #endif
     assert( ( r >= 0 ) && ( r < NUM_REGIONS ) );
     *alpha = alphaBeta[r][0];
-- 
GitLab


From 326588ce8cd0536c0b29e42e7ae86b77061e6b80 Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Mon, 16 Dec 2024 17:50:27 +0100
Subject: [PATCH 17/41] Fix clang format.

---
 lib_dec/ivas_svd_dec.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index 1b8b36c0c..1b397e7fb 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -997,7 +997,7 @@ static void ApplyQRTransform_fx(
         singularValues[ch] = GivensRotation_fx( d, d_e, r, r_e, &singularValues_e[ch] ); /* exp(singularValues_e) */
         move32();
 #endif
-        IF (singularValues[ch] != 0)
+        IF( singularValues[ch] != 0 )
         {
 #ifndef FIX_1010_OPT_GIVENS_INV
             aux = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, singularValues[ch], &aux_e ); /* exp(aux_e + (1 - singularValues_e)) */
@@ -1175,7 +1175,7 @@ static Word32 BASOP_Util_Inv32( Word32 x, Word16 *px_e )
     x = Mpy_32_32( x, x );
     shift2 = norm_l( x );
     x = L_shl( x, shift2 );
-    *px_e = add( shl( *px_e, 1 ), sub(shift, shift2) );
+    *px_e = add( shl( *px_e, 1 ), sub( shift, shift2 ) );
     move16();
 
     if ( sign )
@@ -1811,7 +1811,7 @@ static void get_alpha_beta( Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 *
     r = shl_sat( div_s( extract_h( q ), extract_h( p ) ), shift );
     /* Second order polyfit of atan(r)/(pi/4) for r=0..1 */
     r = add( add( mult( mult( r, r ), FL2WORD16_SCALE( -3.672563685340096e-01, 3 ) ), mult( r, FL2WORD16_SCALE( 1.375369641423651e+00, 3 ) ) ), FL2WORD16_SCALE( -6.529424378422714e-03, 3 ) );
-    r = s_min( s_max( 0, shr( r, WORD16_BITS-1-7-3 ) ), NUM_REGIONS - 1 );
+    r = s_min( s_max( 0, shr( r, WORD16_BITS - 1 - 7 - 3 ) ), NUM_REGIONS - 1 );
 #endif
     assert( ( r >= 0 ) && ( r < NUM_REGIONS ) );
     *alpha = alphaBeta[r][0];
-- 
GitLab


From 964c80d54138b141a14143639c4c1767246847ba Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Tue, 17 Dec 2024 17:06:51 +0100
Subject: [PATCH 18/41] Remove norm, improves test case
 stv4ISM48n.wav_4_ISM_with_and_without_extended_metadata_bitrate_switching_from_24_4_kbps_to_256_kbps_48_kHz

---
 lib_dec/ivas_svd_dec.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index 1b397e7fb..035272caa 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -1252,7 +1252,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
     IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
     {
 #ifdef FIX_1010_OPT_DIV
-        Word16 invVal_e, temp_e;
+        Word16 invVal_e;
         Word32 invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e );
 #endif
         norm_x = 0;
@@ -1263,16 +1263,15 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
         {
 #ifndef FIX_1010_OPT_DIV
             singularVectors[jCh][currChannel] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[jCh][currChannel], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
+            move32();
+            sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) );
+            move16();
 #else
-            temp_e = norm_l( singularVectors[jCh][currChannel] );
-            singularVectors[jCh][currChannel] = L_shl( singularVectors[jCh][currChannel], temp_e );
             singularVectors[jCh][currChannel] = Mpy_32_32( singularVectors[jCh][currChannel], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
-            sing_exp[jCh] = sub( invVal_e, temp_e );
-            move16();
-#endif
             move32();
-            sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) );
+            sing_exp[jCh] = add( invVal_e, sub( *singularVectors_e, *sig_x_e ) );
             move16();
+#endif
             norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
         }
         IF( GT_16( norm_x_e, 0 ) )
-- 
GitLab


From 5c30dabdd630ade7687cfc8b2de7829a48dd3f99 Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Wed, 18 Dec 2024 11:45:21 +0100
Subject: [PATCH 19/41] Add normalization under FIX_1010_OPT_DIV again but
 without overwriting source data which format should not be changed.

---
 lib_dec/ivas_svd_dec.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index 035272caa..68b2aaeea 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -1252,7 +1252,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
     IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
     {
 #ifdef FIX_1010_OPT_DIV
-        Word16 invVal_e;
+        Word16 invVal_e, temp_e;
         Word32 invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e );
 #endif
         norm_x = 0;
@@ -1267,9 +1267,10 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
             sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) );
             move16();
 #else
-            singularVectors[jCh][currChannel] = Mpy_32_32( singularVectors[jCh][currChannel], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
+            temp_e = norm_l( singularVectors[jCh][currChannel] );
+            singularVectors[jCh][currChannel] = Mpy_32_32( L_shl( singularVectors[jCh][currChannel], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
             move32();
-            sing_exp[jCh] = add( invVal_e, sub( *singularVectors_e, *sig_x_e ) );
+            sing_exp[jCh] = add( sub(invVal_e, temp_e), sub( *singularVectors_e, *sig_x_e ) );
             move16();
 #endif
             norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
@@ -1438,8 +1439,7 @@ static void biDiagonalReductionRight_fx(
                 singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e))  */
 #else
                 temp_e = norm_l( singularVectors[currChannel][jCh] );
-                singularVectors[currChannel][jCh] = L_shl( singularVectors[currChannel][jCh], temp_e );
-                singularVectors[currChannel][jCh] = Mpy_32_32( singularVectors[currChannel][jCh], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
+                singularVectors[currChannel][jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
                 sing_exp[jCh] = sub( invVal_e, temp_e );
                 move16();
 #endif
@@ -1484,8 +1484,7 @@ static void biDiagonalReductionRight_fx(
                 secDiag[jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( r ), &secDiag_exp[jCh] ); /* exp(secDiag_exp + (sing_exp - r_e) */
 #else
                 temp_e = norm_l( singularVectors[currChannel][jCh] );
-                secDiag[jCh] = L_shl( singularVectors[currChannel][jCh], temp_e );
-                secDiag[jCh] = Mpy_32_32( secDiag[jCh], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
+                secDiag[jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
                 secDiag_exp[jCh] = sub( invVal_e, temp_e );
                 move16();
 #endif
@@ -1603,7 +1602,7 @@ static void singularVectorsAccumulationLeft_fx(
 
         IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/
         {
-#ifdef FIX_1010_OPT_DIV
+#ifdef  FIX_1010_OPT_DIV
             t_ii = BASOP_Util_Inv32( maxWithSign_fx( t_ii ), &temp_exp );
             t_ii_e = sub( temp_exp, t_ii_e );
 #else
@@ -1622,9 +1621,10 @@ static void singularVectorsAccumulationLeft_fx(
                     norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( sing_exp2[k][nCh], sing_exp2[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */
                 }
 #ifdef FIX_1010_OPT_DIV
-                t_jj = BASOP_Util_Inv32( maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp );
+                Word16 temp_e = norm_l(singularVectors_Left[nCh][nCh]);
+                t_jj = BASOP_Util_Inv32( maxWithSign_fx( L_shl(singularVectors_Left[nCh][nCh], temp_e) ), &temp_exp );
                 t_jj = Mpy_32_32( Mpy_32_32( t_ii, norm_y ), t_jj );
-                t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) );
+                t_jj_e = add( add(temp_exp, temp_e), sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) );
 #else
                 t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e,
                 t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) );
-- 
GitLab


From a728d3882b0672565d6b5460add7bb02681f1055 Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Wed, 18 Dec 2024 11:50:33 +0100
Subject: [PATCH 20/41] clang-format

---
 lib_dec/ivas_svd_dec.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index 68b2aaeea..a8a8cd265 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -1270,7 +1270,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
             temp_e = norm_l( singularVectors[jCh][currChannel] );
             singularVectors[jCh][currChannel] = Mpy_32_32( L_shl( singularVectors[jCh][currChannel], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
             move32();
-            sing_exp[jCh] = add( sub(invVal_e, temp_e), sub( *singularVectors_e, *sig_x_e ) );
+            sing_exp[jCh] = add( sub( invVal_e, temp_e ), sub( *singularVectors_e, *sig_x_e ) );
             move16();
 #endif
             norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
@@ -1602,7 +1602,7 @@ static void singularVectorsAccumulationLeft_fx(
 
         IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/
         {
-#ifdef  FIX_1010_OPT_DIV
+#ifdef FIX_1010_OPT_DIV
             t_ii = BASOP_Util_Inv32( maxWithSign_fx( t_ii ), &temp_exp );
             t_ii_e = sub( temp_exp, t_ii_e );
 #else
@@ -1621,10 +1621,10 @@ static void singularVectorsAccumulationLeft_fx(
                     norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( sing_exp2[k][nCh], sing_exp2[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */
                 }
 #ifdef FIX_1010_OPT_DIV
-                Word16 temp_e = norm_l(singularVectors_Left[nCh][nCh]);
-                t_jj = BASOP_Util_Inv32( maxWithSign_fx( L_shl(singularVectors_Left[nCh][nCh], temp_e) ), &temp_exp );
+                Word16 temp_e = norm_l( singularVectors_Left[nCh][nCh] );
+                t_jj = BASOP_Util_Inv32( maxWithSign_fx( L_shl( singularVectors_Left[nCh][nCh], temp_e ) ), &temp_exp );
                 t_jj = Mpy_32_32( Mpy_32_32( t_ii, norm_y ), t_jj );
-                t_jj_e = add( add(temp_exp, temp_e), sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) );
+                t_jj_e = add( add( temp_exp, temp_e ), sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) );
 #else
                 t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e,
                 t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) );
-- 
GitLab


From e3899b161bebc2598c4110f21a178184655b39c2 Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Wed, 18 Dec 2024 14:27:01 +0100
Subject: [PATCH 21/41] Disable one FIX_1010_OPT_DIV case.

---
 lib_dec/ivas_svd_dec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index a8a8cd265..31d9f9660 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -1261,7 +1261,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
         move16();
         FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
         {
-#ifndef FIX_1010_OPT_DIV
+#ifndef FIX_1010_OPT_DIVno
             singularVectors[jCh][currChannel] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[jCh][currChannel], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
             move32();
             sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) );
-- 
GitLab


From 8ac486d2188051b401203d1ceac613a8c6461016 Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Wed, 18 Dec 2024 14:32:36 +0100
Subject: [PATCH 22/41] Fix warning.

---
 lib_dec/ivas_svd_dec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index 31d9f9660..a5ccdce3a 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -1252,7 +1252,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
     IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
     {
 #ifdef FIX_1010_OPT_DIV
-        Word16 invVal_e, temp_e;
+        Word16 invVal_e;
         Word32 invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e );
 #endif
         norm_x = 0;
-- 
GitLab


From c87442fc3a60b1bb830eb9658ef67762891ab78d Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Wed, 18 Dec 2024 16:44:54 +0100
Subject: [PATCH 23/41] Enable FIX_1010_OPT_DIV case again because of crash,
 disable FIX_1010_OPT_GIVENS_AMAX_BMIN.

---
 lib_dec/ivas_svd_dec.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index a5ccdce3a..eb3757b5b 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -53,10 +53,9 @@
 
 #if 1
 #define FIX_1010_OPT_DIV
-
 #define FIX_1010_OPT_GIVENS
 #define FIX_1010_OPT_GIVENS_INV
-#define FIX_1010_OPT_GIVENS_AMAX_BMIN
+//#define FIX_1010_OPT_GIVENS_AMAX_BMIN
 #endif
 
 /*-----------------------------------------------------------------------*
@@ -1261,13 +1260,13 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
         move16();
         FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
         {
-#ifndef FIX_1010_OPT_DIVno
+#ifndef FIX_1010_OPT_DIV
             singularVectors[jCh][currChannel] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[jCh][currChannel], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
             move32();
             sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) );
             move16();
 #else
-            temp_e = norm_l( singularVectors[jCh][currChannel] );
+            Word16 temp_e = norm_l( singularVectors[jCh][currChannel] );
             singularVectors[jCh][currChannel] = Mpy_32_32( L_shl( singularVectors[jCh][currChannel], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
             move32();
             sing_exp[jCh] = add( sub( invVal_e, temp_e ), sub( *singularVectors_e, *sig_x_e ) );
@@ -1807,7 +1806,7 @@ static void get_alpha_beta( Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 *
     p = L_shl( p, shift );
     p_e = sub( p_e, shift );
     shift = sub( q_e, p_e );
-    r = shl_sat( div_s( extract_h( q ), extract_h( p ) ), shift );
+    r = shl_sat( div_s( extract_h( q ), s_max(1, extract_h( p ) ) ), shift );
     /* Second order polyfit of atan(r)/(pi/4) for r=0..1 */
     r = add( add( mult( mult( r, r ), FL2WORD16_SCALE( -3.672563685340096e-01, 3 ) ), mult( r, FL2WORD16_SCALE( 1.375369641423651e+00, 3 ) ) ), FL2WORD16_SCALE( -6.529424378422714e-03, 3 ) );
     r = s_min( s_max( 0, shr( r, WORD16_BITS - 1 - 7 - 3 ) ), NUM_REGIONS - 1 );
-- 
GitLab


From a18b9b9631a998f8a788358702d07406e72148a4 Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Thu, 19 Dec 2024 15:46:46 +0100
Subject: [PATCH 24/41] Apply bug fix from issue 1139 and add wmops/precision
 improvement macro FIX_1010_OPT_SINGLE_RESCALE.

---
 lib_dec/ivas_svd_dec.c | 170 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 152 insertions(+), 18 deletions(-)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index eb3757b5b..730bb3d42 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -53,9 +53,10 @@
 
 #if 1
 #define FIX_1010_OPT_DIV
+#define FIX_1010_OPT_SINGLE_RESCALE
 #define FIX_1010_OPT_GIVENS
 #define FIX_1010_OPT_GIVENS_INV
-//#define FIX_1010_OPT_GIVENS_AMAX_BMIN
+// #define FIX_1010_OPT_GIVENS_AMAX_BMIN
 #endif
 
 /*-----------------------------------------------------------------------*
@@ -79,7 +80,11 @@ static void biDiagonalReductionLeft_fx(
     Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
     Word32 singularValues[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
     Word32 secDiag[MAX_OUTPUT_CHANNELS],           /* exp(secDiag_e) */
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
     Word16 *singularVectors_e,
+#else
+    Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
+#endif
     Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
     Word16 *secDiag_e,
     const Word16 nChannelsL,  /* Q0 */
@@ -93,7 +98,11 @@ static void biDiagonalReductionLeft_fx(
 static void biDiagonalReductionRight_fx(
     Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
     Word32 secDiag[MAX_OUTPUT_CHANNELS],           /* exp(secDiag_e) */
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
     Word16 *singularVectors_e,
+#else
+    Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
+#endif
     Word16 *secDiag_e,
     const Word16 nChannelsL,  /* Q0 */
     const Word16 nChannelsC,  /* Q0 */
@@ -1119,11 +1128,29 @@ static void HouseholderReduction_fx(
     Word16 sig_x_fx_e = 0;
     move16();
 
+#ifdef FIX_1010_OPT_SINGLE_RESCALE
+    Word16 iCh, jCh;
+    Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
+    FOR( jCh = 0; jCh < nChannelsL; jCh++ )
+    {
+        FOR( iCh = 0; iCh < nChannelsC; iCh++ )
+        {
+            singularVectors_Left_fx_e[jCh][iCh] = singularVectors_Left_e;
+            move32();
+        }
+    }
+#endif
+
     /* Bidiagonal Reduction for every channel */
     FOR( nCh = 0; nCh < nChannelsC; nCh++ ) /* nChannelsC */
     {
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
         biDiagonalReductionLeft_fx( singularVectors_Left_fx, singularValues_fx, secDiag_fx, &singularVectors_Left_e, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &sig_x_fx, &sig_x_fx_e, &g_fx );
         biDiagonalReductionRight_fx( singularVectors_Left_fx, secDiag_fx, &singularVectors_Left_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &sig_x_fx, &sig_x_fx_e, &g_fx );
+#else
+        biDiagonalReductionLeft_fx( singularVectors_Left_fx, singularValues_fx, secDiag_fx, singularVectors_Left_fx_e, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &sig_x_fx, &sig_x_fx_e, &g_fx );
+        biDiagonalReductionRight_fx( singularVectors_Left_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &sig_x_fx, &sig_x_fx_e, &g_fx );
+#endif
 
         Word16 L_temp_e;
         Word32 L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), *secDiag_fx_e, &L_temp_e ); /* exp(L_temp_e) */
@@ -1136,6 +1163,30 @@ static void HouseholderReduction_fx(
         }
     }
 
+#ifdef FIX_1010_OPT_SINGLE_RESCALE
+    // rescaling block
+    Word16 exp_max = 0;
+    move16();
+    FOR( jCh = 0; jCh < nChannelsL; jCh++ )
+    {
+        FOR( iCh = 0; iCh < nChannelsC; iCh++ )
+        {
+            exp_max = s_max( exp_max, singularVectors_Left_fx_e[jCh][iCh] );
+        }
+    }
+
+    FOR( jCh = 0; jCh < nChannelsL; jCh++ )
+    {
+        FOR( iCh = 0; iCh < nChannelsC; iCh++ )
+        {
+            singularVectors_Left_fx[jCh][iCh] = L_shr_r( singularVectors_Left_fx[jCh][iCh], sub( exp_max, singularVectors_Left_fx_e[jCh][iCh] ) ); /* exp(exp_max) */
+            move32();
+        }
+    }
+    singularVectors_Left_e = exp_max;
+    move16();
+#endif
+
     /* SingularVecotr Accumulation */
     singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_e, *secDiag_fx_e, nChannelsC );
 
@@ -1189,7 +1240,11 @@ static void biDiagonalReductionLeft_fx(
     Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
     Word32 singularValues[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
     Word32 secDiag[MAX_OUTPUT_CHANNELS],           /* exp(secDiag_e) */
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
     Word16 *singularVectors_e,
+#else
+    Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
+#endif
     Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
     Word16 *secDiag_e,
     const Word16 nChannelsL,  /* Q0 */
@@ -1203,14 +1258,16 @@ static void biDiagonalReductionLeft_fx(
     Word16 iCh, jCh, idx;
     Word32 norm_x, f, r;
     Word16 norm_x_e, f_e, r_e;
-    Word16 sing_exp[MAX_OUTPUT_CHANNELS];
-    Word16 sing_exp2[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS] = { 0 };
     Word32 L_temp;
     Word16 L_temp_e;
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
+    Word16 sing_exp[MAX_OUTPUT_CHANNELS];
+    Word16 sing_exp2[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS] = { 0 };
     FOR( jCh = 0; jCh < MAX_OUTPUT_CHANNELS; jCh++ )
     {
         set16_fx( sing_exp2[jCh], *singularVectors_e, MAX_OUTPUT_CHANNELS );
     }
+#endif
 
     secDiag[currChannel] = Mpy_32_32( *sig_x, *g ); /* exp(sig_x_e) */
     move32();
@@ -1245,14 +1302,20 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
 
     FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
     {
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
         ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[jCh][currChannel] ), *singularVectors_e, sig_x_e ); /* exp(sig_x_e) */
+#else
+        ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[jCh][currChannel] ), singularVectors2_e[jCh][currChannel], sig_x_e ); /* exp(sig_x_e) */
+#endif
     }
 
     IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
     {
 #ifdef FIX_1010_OPT_DIV
         Word16 invVal_e;
-        Word32 invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e );
+        Word32 invVal;
+        /* BASOP_Util_Inv32 is not accurate enogh in this case. */
+        invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e );
 #endif
         norm_x = 0;
         move32();
@@ -1265,14 +1328,21 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
             move32();
             sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) );
             move16();
+            norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
 #else
             Word16 temp_e = norm_l( singularVectors[jCh][currChannel] );
             singularVectors[jCh][currChannel] = Mpy_32_32( L_shl( singularVectors[jCh][currChannel], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
             move32();
-            sing_exp[jCh] = add( sub( invVal_e, temp_e ), sub( *singularVectors_e, *sig_x_e ) );
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
+            sing_exp[jCh] = sub( add( invVal_e, sub( *singularVectors_e, *sig_x_e ) ), temp_e );
             move16();
-#endif
             norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
+#else
+            singularVectors2_e[jCh][currChannel] = sub( add( invVal_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ), temp_e );
+            move16();
+            norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( singularVectors2_e[jCh][currChannel], 1 ), &norm_x_e ); /* exp(norm_x_e) */
+#endif
+#endif
         }
         IF( GT_16( norm_x_e, 0 ) )
         {
@@ -1297,8 +1367,13 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
             move32();
         }
 
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
         r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), sing_exp[currChannel], -norm_x, norm_x_e, &r_e );                       /* exp(r_e) */
         singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], sing_exp[currChannel], -( *g ), 0, &sing_exp[currChannel] ); /* sing_exp */
+#else
+        r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e );                                      /* exp(r_e) */
+        singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* sing_exp */
+#endif
         move32();
 
 #ifdef FIX_1010_OPT_DIV
@@ -1313,7 +1388,11 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
             move16();
             FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
             {
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
                 norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( sing_exp[jCh], *singularVectors_e ), &norm_x_e ); /* exp(norm_x_e) */
+#else
+                norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( singularVectors2_e[jCh][currChannel], singularVectors2_e[jCh][iCh] ), &norm_x_e ); /* exp(norm_x_e) */
+#endif
             }
 
 #ifndef FIX_1010_OPT_DIV
@@ -1326,7 +1405,11 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
 
             FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
             {
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
                 singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], *singularVectors_e, Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, sing_exp[jCh] ), &sing_exp2[jCh][iCh] ); /* exp( sing_exp2) */
+#else
+                singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], singularVectors2_e[jCh][iCh], Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, singularVectors2_e[jCh][currChannel] ), &singularVectors2_e[jCh][iCh] );
+#endif
                 move32();
             }
         }
@@ -1336,10 +1419,15 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
         {
             singularVectors[jCh][currChannel] = Mpy_32_32( singularVectors[jCh][currChannel], ( *sig_x ) ); /* sing_exp + sig_x_e */
             move32();
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
             sing_exp2[jCh][currChannel] = add( sing_exp[jCh], *sig_x_e );
+#else
+            singularVectors2_e[jCh][currChannel] = add( singularVectors2_e[jCh][currChannel], *sig_x_e );
+#endif
             move16();
         }
 
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
         // rescaling block
         Word16 exp_max = *singularVectors_e;
         move16();
@@ -1361,6 +1449,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
         }
         *singularVectors_e = exp_max;
         move16();
+#endif
     }
 
     // rescaling block
@@ -1382,7 +1471,11 @@ return;
 static void biDiagonalReductionRight_fx(
     Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
     Word32 secDiag[MAX_OUTPUT_CHANNELS],           /* exp(secDiag_e) */
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
     Word16 *singularVectors_e,
+#else
+    Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
+#endif
     Word16 *secDiag_e,
     const Word16 nChannelsL,  /* Q0 */
     const Word16 nChannelsC,  /* Q0 */
@@ -1395,15 +1488,17 @@ static void biDiagonalReductionRight_fx(
     Word16 iCh, jCh, idx;
     Word32 norm_x, r;
     Word16 norm_x_e, r_e;
-    Word16 sing_exp[MAX_OUTPUT_CHANNELS];
     Word16 secDiag_exp[MAX_OUTPUT_CHANNELS];
-    Word16 sing_exp2[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS] = { 0 };
     Word32 L_temp;
     Word16 L_temp_e;
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
+    Word16 sing_exp[MAX_OUTPUT_CHANNELS];
+    Word16 sing_exp2[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS] = { 0 };
     FOR( jCh = 0; jCh < MAX_OUTPUT_CHANNELS; jCh++ )
     {
         set16_fx( sing_exp2[jCh], *singularVectors_e, MAX_OUTPUT_CHANNELS );
     }
+#endif
     set16_fx( secDiag_exp, *secDiag_e, MAX_OUTPUT_CHANNELS );
 
     /* Setting values to 0 */
@@ -1418,7 +1513,11 @@ static void biDiagonalReductionRight_fx(
 
         FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
         {
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
             ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[currChannel][jCh] ), *singularVectors_e, sig_x_e ); /* exp(sig_x_e) */
+#else
+            ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[currChannel][jCh] ), singularVectors2_e[currChannel][jCh], sig_x_e ); /* exp(sig_x_e) */
+#endif
         }
 
         IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
@@ -1436,16 +1535,25 @@ static void biDiagonalReductionRight_fx(
             {
 #ifndef FIX_1010_OPT_DIV
                 singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e))  */
+                move32();
+                sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) );
+                move16();
+                norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
 #else
                 temp_e = norm_l( singularVectors[currChannel][jCh] );
                 singularVectors[currChannel][jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
-                sing_exp[jCh] = sub( invVal_e, temp_e );
-                move16();
-#endif
                 move32();
-                sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) );
+
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
+                sing_exp[jCh] = add( sub( invVal_e, temp_e ), sub( *singularVectors_e, *sig_x_e ) );
                 move16();
                 norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
+#else
+                singularVectors2_e[currChannel][jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], *sig_x_e ) );
+                move16();
+                norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( singularVectors2_e[currChannel][jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
+#endif
+#endif
             }
             IF( GT_16( norm_x_e, 0 ) )
             {
@@ -1469,8 +1577,13 @@ static void biDiagonalReductionRight_fx(
                 move32();
             }
 
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
             r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), sing_exp[idx], -norm_x, norm_x_e, &r_e );               /* exp(r_e) */
             singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], sing_exp[idx], -( *g ), 0, &sing_exp[idx] ); /* exp(sing_exp) */
+#else
+            r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e );                                      /* exp(r_e) */
+            singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* exp(sing_exp) */
+#endif
             move32();
 
 #ifdef FIX_1010_OPT_DIV
@@ -1481,15 +1594,20 @@ static void biDiagonalReductionRight_fx(
             {
 #ifndef FIX_1010_OPT_DIV
                 secDiag[jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( r ), &secDiag_exp[jCh] ); /* exp(secDiag_exp + (sing_exp - r_e) */
+                move32();
+                secDiag_exp[jCh] = add( secDiag_exp[jCh], sub( sing_exp[jCh], r_e ) );
+                move32();
 #else
                 temp_e = norm_l( singularVectors[currChannel][jCh] );
                 secDiag[jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
-                secDiag_exp[jCh] = sub( invVal_e, temp_e );
+                move32();
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
+                secDiag_exp[jCh] = add( sub( invVal_e, temp_e ), sub( sing_exp[jCh], r_e ) );
+#else
+                secDiag_exp[jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], r_e ) );
+#endif
                 move16();
 #endif
-                move32();
-                secDiag_exp[jCh] = add( secDiag_exp[jCh], sub( sing_exp[jCh], r_e ) );
-                move32();
             }
 
             FOR( iCh = currChannel + 1; iCh < nChannelsL; iCh++ ) /*  nChannelsL */
@@ -1500,12 +1618,20 @@ static void biDiagonalReductionRight_fx(
                 move16();
                 FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
                 {
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
                     norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( *singularVectors_e, sing_exp[jCh] ), &norm_x_e ); /* exp(norm_x_e) */
+#else
+                    norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( singularVectors2_e[iCh][jCh], singularVectors2_e[currChannel][jCh] ), &norm_x_e ); /* exp(norm_x_e) */
+#endif
                 }
 
                 FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*  nChannelsC */
                 {
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
                     singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], *singularVectors_e, Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &sing_exp2[iCh][jCh] ); /* exp(sing_exp2) */
+#else
+                    singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */
+#endif
                     move32();
                 }
             }
@@ -1514,10 +1640,15 @@ static void biDiagonalReductionRight_fx(
             {
                 singularVectors[currChannel][jCh] = Mpy_32_32( singularVectors[currChannel][jCh], ( *sig_x ) ); /* exp(sing_exp + sig_x_e) */
                 move32();
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
                 sing_exp2[currChannel][jCh] = add( sing_exp[jCh], *sig_x_e );
+#else
+                singularVectors2_e[currChannel][jCh] = add( singularVectors2_e[currChannel][jCh], *sig_x_e );
+#endif
                 move16();
             }
 
+
             /*rescaling block*/
             Word16 exp_max = *secDiag_e;
             move16();
@@ -1530,8 +1661,10 @@ static void biDiagonalReductionRight_fx(
                 secDiag[jCh] = L_shr_r( secDiag[jCh], sub( exp_max, secDiag_exp[jCh] ) ); /* exp(exp_max) */
                 move32();
             }
+            *secDiag_e = exp_max;
+            move16();
 
-
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
             exp_max = *singularVectors_e;
             move16();
             FOR( iCh = 0; iCh < nChannelsL; iCh++ )
@@ -1552,6 +1685,7 @@ static void biDiagonalReductionRight_fx(
             }
             *singularVectors_e = exp_max;
             move16();
+#endif
         }
     }
 
@@ -1806,7 +1940,7 @@ static void get_alpha_beta( Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 *
     p = L_shl( p, shift );
     p_e = sub( p_e, shift );
     shift = sub( q_e, p_e );
-    r = shl_sat( div_s( extract_h( q ), s_max(1, extract_h( p ) ) ), shift );
+    r = shl_sat( div_s( extract_h( q ), s_max( 1, extract_h( p ) ) ), shift );
     /* Second order polyfit of atan(r)/(pi/4) for r=0..1 */
     r = add( add( mult( mult( r, r ), FL2WORD16_SCALE( -3.672563685340096e-01, 3 ) ), mult( r, FL2WORD16_SCALE( 1.375369641423651e+00, 3 ) ) ), FL2WORD16_SCALE( -6.529424378422714e-03, 3 ) );
     r = s_min( s_max( 0, shr( r, WORD16_BITS - 1 - 7 - 3 ) ), NUM_REGIONS - 1 );
-- 
GitLab


From 2815f37b9726594427001807f3bca6fff814f5c2 Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Thu, 19 Dec 2024 15:49:32 +0100
Subject: [PATCH 25/41] clang format

---
 lib_dec/ivas_svd_dec.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index 730bb3d42..d7245a8d4 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -1336,7 +1336,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
             sing_exp[jCh] = sub( add( invVal_e, sub( *singularVectors_e, *sig_x_e ) ), temp_e );
             move16();
-            norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
+            norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e );                                               /* exp(norm_x_e) */
 #else
             singularVectors2_e[jCh][currChannel] = sub( add( invVal_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ), temp_e );
             move16();
@@ -1371,8 +1371,8 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
         r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), sing_exp[currChannel], -norm_x, norm_x_e, &r_e );                       /* exp(r_e) */
         singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], sing_exp[currChannel], -( *g ), 0, &sing_exp[currChannel] ); /* sing_exp */
 #else
-        r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e );                                      /* exp(r_e) */
-        singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* sing_exp */
+        r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e );                                                                          /* exp(r_e) */
+        singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] );                                     /* sing_exp */
 #endif
         move32();
 
@@ -1399,7 +1399,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
             f = BASOP_Util_Divide3232_Scale_cadence( norm_x, maxWithSign_fx( r ), &f_e ); /* f_e + (norm_x_e - r_e) */
             f_e = add( f_e, sub( norm_x_e, r_e ) );
 #else
-            f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
+            f = Mpy_32_32( norm_x, invVal );                                                                                                                                                                                            /* invVal_e + (norm_x_e - r_e) */
             f_e = add( invVal_e, sub( norm_x_e, r_e ) );
 #endif
 
@@ -1547,7 +1547,7 @@ static void biDiagonalReductionRight_fx(
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
                 sing_exp[jCh] = add( sub( invVal_e, temp_e ), sub( *singularVectors_e, *sig_x_e ) );
                 move16();
-                norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
+                norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e );           /* exp(norm_x_e) */
 #else
                 singularVectors2_e[currChannel][jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], *sig_x_e ) );
                 move16();
@@ -1630,7 +1630,7 @@ static void biDiagonalReductionRight_fx(
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
                     singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], *singularVectors_e, Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &sing_exp2[iCh][jCh] ); /* exp(sing_exp2) */
 #else
-                    singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */
+                    singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] );   /* exp(sing_exp2) */
 #endif
                     move32();
                 }
-- 
GitLab


From aff979224d95cf9ad587b5ce7a1c10ee3efec6c8 Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Thu, 19 Dec 2024 17:54:44 +0100
Subject: [PATCH 26/41] Couple use of BASOP_Util_Inv32 to macro
 FIX_1010_OPT_INV_USING_INVSQRT and disable to to improve accuracy.

---
 lib_dec/ivas_svd_dec.c | 65 +++++++++++++++++++++++++++++++++++-------
 1 file changed, 54 insertions(+), 11 deletions(-)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index d7245a8d4..c70de847e 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -53,6 +53,7 @@
 
 #if 1
 #define FIX_1010_OPT_DIV
+// #define FIX_1010_OPT_INV_USING_INVSQRT
 #define FIX_1010_OPT_SINGLE_RESCALE
 #define FIX_1010_OPT_GIVENS
 #define FIX_1010_OPT_GIVENS_INV
@@ -1201,7 +1202,7 @@ static void HouseholderReduction_fx(
  *
  *-------------------------------------------------------------------------*/
 
-#ifdef FIX_1010_OPT_DIV
+#ifdef FIX_1010_OPT_INV_USING_INVSQRT
 static Word32 BASOP_Util_Inv32( Word32 x, Word16 *px_e )
 {
     Word16 sign, shift, shift2;
@@ -1314,8 +1315,11 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
 #ifdef FIX_1010_OPT_DIV
         Word16 invVal_e;
         Word32 invVal;
-        /* BASOP_Util_Inv32 is not accurate enogh in this case. */
+#ifdef FIX_1010_OPT_INV_USING_INVSQRT
+        invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e );
+#else
         invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e );
+#endif
 #endif
         norm_x = 0;
         move32();
@@ -1324,11 +1328,19 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
         FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
         {
 #ifndef FIX_1010_OPT_DIV
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
             singularVectors[jCh][currChannel] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[jCh][currChannel], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
             move32();
             sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) );
             move16();
             norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
+#else
+            singularVectors[jCh][currChannel] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[jCh][currChannel], maxWithSign_fx( *sig_x ), &L_temp_e ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
+            move32();
+            singularVectors2_e[jCh][currChannel] = add( L_temp_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) );
+            move16();
+            norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( singularVectors2_e[jCh][currChannel], 1 ), &norm_x_e ); /* exp(norm_x_e) */
+#endif
 #else
             Word16 temp_e = norm_l( singularVectors[jCh][currChannel] );
             singularVectors[jCh][currChannel] = Mpy_32_32( L_shl( singularVectors[jCh][currChannel], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
@@ -1336,7 +1348,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
             sing_exp[jCh] = sub( add( invVal_e, sub( *singularVectors_e, *sig_x_e ) ), temp_e );
             move16();
-            norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e );                                               /* exp(norm_x_e) */
+            norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
 #else
             singularVectors2_e[jCh][currChannel] = sub( add( invVal_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ), temp_e );
             move16();
@@ -1371,13 +1383,17 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
         r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), sing_exp[currChannel], -norm_x, norm_x_e, &r_e );                       /* exp(r_e) */
         singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], sing_exp[currChannel], -( *g ), 0, &sing_exp[currChannel] ); /* sing_exp */
 #else
-        r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e );                                                                          /* exp(r_e) */
-        singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] );                                     /* sing_exp */
+        r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e );                                      /* exp(r_e) */
+        singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* sing_exp */
 #endif
         move32();
 
 #ifdef FIX_1010_OPT_DIV
+#ifdef FIX_1010_OPT_INV_USING_INVSQRT
         invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e );
+#else
+        invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
+#endif
 #endif
 
         FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
@@ -1399,7 +1415,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
             f = BASOP_Util_Divide3232_Scale_cadence( norm_x, maxWithSign_fx( r ), &f_e ); /* f_e + (norm_x_e - r_e) */
             f_e = add( f_e, sub( norm_x_e, r_e ) );
 #else
-            f = Mpy_32_32( norm_x, invVal );                                                                                                                                                                                            /* invVal_e + (norm_x_e - r_e) */
+            f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
             f_e = add( invVal_e, sub( norm_x_e, r_e ) );
 #endif
 
@@ -1529,25 +1545,37 @@ static void biDiagonalReductionRight_fx(
 
 #ifdef FIX_1010_OPT_DIV
             Word16 invVal_e, temp_e;
-            Word32 invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e );
+            Word32 invVal;
+#ifdef FIX_1010_OPT_INV_USING_INVSQRT
+            invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e );
+#else
+            invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e );
+#endif
 #endif
             FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */
             {
 #ifndef FIX_1010_OPT_DIV
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
                 singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e))  */
                 move32();
                 sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) );
                 move16();
                 norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
+#else
+                singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &L_temp_e ); /* exp(sing_exp + (singularVectors_e - sig_x_e))  */
+                move32();
+                singularVectors2_e[currChannel][jCh] = add( L_temp_e, sub( singularVectors2_e[currChannel][jCh], *sig_x_e ) );
+                move16();
+                norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( singularVectors2_e[currChannel][jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
+#endif
 #else
                 temp_e = norm_l( singularVectors[currChannel][jCh] );
                 singularVectors[currChannel][jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
                 move32();
-
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
                 sing_exp[jCh] = add( sub( invVal_e, temp_e ), sub( *singularVectors_e, *sig_x_e ) );
                 move16();
-                norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e );           /* exp(norm_x_e) */
+                norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
 #else
                 singularVectors2_e[currChannel][jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], *sig_x_e ) );
                 move16();
@@ -1587,16 +1615,27 @@ static void biDiagonalReductionRight_fx(
             move32();
 
 #ifdef FIX_1010_OPT_DIV
+#ifdef FIX_1010_OPT_INV_USING_INVSQRT
             invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e );
+#else
+            invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
+#endif
 #endif
 
             FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
             {
 #ifndef FIX_1010_OPT_DIV
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
                 secDiag[jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( r ), &secDiag_exp[jCh] ); /* exp(secDiag_exp + (sing_exp - r_e) */
                 move32();
                 secDiag_exp[jCh] = add( secDiag_exp[jCh], sub( sing_exp[jCh], r_e ) );
                 move32();
+#else
+                secDiag[jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( r ), &secDiag_exp[jCh] ); /* exp(secDiag_exp + (sing_exp - r_e) */
+                move32();
+                secDiag_exp[jCh] = add( secDiag_exp[jCh], sub( singularVectors2_e[currChannel][jCh], r_e ) );
+                move32();
+#endif
 #else
                 temp_e = norm_l( singularVectors[currChannel][jCh] );
                 secDiag[jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
@@ -1630,7 +1669,7 @@ static void biDiagonalReductionRight_fx(
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
                     singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], *singularVectors_e, Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &sing_exp2[iCh][jCh] ); /* exp(sing_exp2) */
 #else
-                    singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] );   /* exp(sing_exp2) */
+                    singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */
 #endif
                     move32();
                 }
@@ -1736,7 +1775,11 @@ static void singularVectorsAccumulationLeft_fx(
         IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/
         {
 #ifdef FIX_1010_OPT_DIV
+#ifdef FIX_1010_OPT_INV_USING_INVSQRT
             t_ii = BASOP_Util_Inv32( maxWithSign_fx( t_ii ), &temp_exp );
+#else
+            t_ii = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( t_ii ), &temp_exp );
+#endif
             t_ii_e = sub( temp_exp, t_ii_e );
 #else
             t_ii = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, maxWithSign_fx( t_ii ), &temp_exp ); /* exp(1 + (temp_exp + tii_e)) */
@@ -1753,7 +1796,7 @@ static void singularVectorsAccumulationLeft_fx(
                 {
                     norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( sing_exp2[k][nCh], sing_exp2[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */
                 }
-#ifdef FIX_1010_OPT_DIV
+#ifdef FIX_1010_OPT_INV_USING_INVSQRT
                 Word16 temp_e = norm_l( singularVectors_Left[nCh][nCh] );
                 t_jj = BASOP_Util_Inv32( maxWithSign_fx( L_shl( singularVectors_Left[nCh][nCh], temp_e ) ), &temp_exp );
                 t_jj = Mpy_32_32( Mpy_32_32( t_ii, norm_y ), t_jj );
-- 
GitLab


From 1b32a0f0daa4fa5151860e972d6813db59051631 Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Tue, 7 Jan 2025 09:45:08 +0100
Subject: [PATCH 27/41] clang format fix

---
 lib_dec/ivas_svd_dec.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index c70de847e..33de89493 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -1348,7 +1348,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
             sing_exp[jCh] = sub( add( invVal_e, sub( *singularVectors_e, *sig_x_e ) ), temp_e );
             move16();
-            norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
+            norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e );                                               /* exp(norm_x_e) */
 #else
             singularVectors2_e[jCh][currChannel] = sub( add( invVal_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ), temp_e );
             move16();
@@ -1383,8 +1383,8 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
         r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), sing_exp[currChannel], -norm_x, norm_x_e, &r_e );                       /* exp(r_e) */
         singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], sing_exp[currChannel], -( *g ), 0, &sing_exp[currChannel] ); /* sing_exp */
 #else
-        r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e );                                      /* exp(r_e) */
-        singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* sing_exp */
+        r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e );                                                                          /* exp(r_e) */
+        singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] );                                     /* sing_exp */
 #endif
         move32();
 
@@ -1415,7 +1415,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
             f = BASOP_Util_Divide3232_Scale_cadence( norm_x, maxWithSign_fx( r ), &f_e ); /* f_e + (norm_x_e - r_e) */
             f_e = add( f_e, sub( norm_x_e, r_e ) );
 #else
-            f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
+            f = Mpy_32_32( norm_x, invVal );                                                                                                                                                                                            /* invVal_e + (norm_x_e - r_e) */
             f_e = add( invVal_e, sub( norm_x_e, r_e ) );
 #endif
 
@@ -1575,7 +1575,7 @@ static void biDiagonalReductionRight_fx(
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
                 sing_exp[jCh] = add( sub( invVal_e, temp_e ), sub( *singularVectors_e, *sig_x_e ) );
                 move16();
-                norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
+                norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e );           /* exp(norm_x_e) */
 #else
                 singularVectors2_e[currChannel][jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], *sig_x_e ) );
                 move16();
@@ -1669,7 +1669,7 @@ static void biDiagonalReductionRight_fx(
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
                     singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], *singularVectors_e, Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &sing_exp2[iCh][jCh] ); /* exp(sing_exp2) */
 #else
-                    singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */
+                    singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] );   /* exp(sing_exp2) */
 #endif
                     move32();
                 }
-- 
GitLab


From 1c6d7b7f34beefe99cb38d622ca34e139b14d8db Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Thu, 9 Jan 2025 08:59:13 +0100
Subject: [PATCH 28/41] Extend dynamic scale to
 singularVectorsAccumulationRight_fx and singularVectorsAccumulationLeft_fx
 which use dynamic scale internally anyway, to fix MLD failure. This reduces
 intermediate denormalizations, more precision and less complexity.

---
 lib_dec/ivas_svd_dec.c | 134 +++++++++++++++++++++++++++++------------
 1 file changed, 95 insertions(+), 39 deletions(-)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index 33de89493..e280abb42 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -116,7 +116,11 @@ static void biDiagonalReductionRight_fx(
 static void singularVectorsAccumulationLeft_fx(
     Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) as Input, Q31 as output */
     Word32 singularValues[MAX_OUTPUT_CHANNELS],         /* exp(singularValues_e) */
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
     Word16 singularVectors_e,
+#else
+    Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
+#endif
     Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
     const Word16 nChannelsL, /* Q0 */
     const Word16 nChannelsC  /* Q0 */
@@ -126,7 +130,11 @@ static void singularVectorsAccumulationRight_fx(
     Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS],  /* singularVectors_e */
     Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* singularVectors_e */
     Word32 secDiag[MAX_OUTPUT_CHANNELS],                 /* exp(secDiag_e) */
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
     Word16 singularVectors_e,
+#else
+    Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
+#endif
     Word16 secDiag_e,
     const Word16 nChannelsC /* Q0 */
 );
@@ -283,9 +291,12 @@ void svdMat2mat_fx(
     return;
 }
 
+#ifndef DEBUG_SVD_TEST
+#define DEBUG_SVD_PRECISION
+#endif
 // #define MORE_DEBUG
 
-#ifdef MORE_DEBUG
+#if defined( DEBUG_SVD_PRECISION ) || defined( MORE_DEBUG )
 
 #if ( MAX_INPUT_CHANNELS > MAX_OUTPUT_CHANNELS )
 #define MAX_MATRIX MAX_INPUT_CHANNELS
@@ -375,7 +386,14 @@ static float matrixDifference(
     {
         for ( int i2 = 0; i2 < dim2; i2++ )
         {
-            r += fabsf( ( b[i1][i2] - a[i1][i2] ) / a[i1][i2] );
+            if ( a[i1][i2] != 0.f )
+            {
+                r += fabsf( ( b[i1][i2] - a[i1][i2] ) / a[i1][i2] );
+            }
+            else
+            {
+                r += fabsf( b[i1][i2] - a[i1][i2] );
+            }
         }
     }
 
@@ -447,6 +465,7 @@ static void svd_accuracy_test_fx(
     float singularVectors_Right[MAX_INPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
     float result;
     int dimSingular;
+    int problematic = 0;
 
     /* Convert to float and Create singular values matrix from signular values vector */
     for ( int x = 0; x < MAX_MATRIX; x++ )
@@ -474,6 +493,10 @@ static void svd_accuracy_test_fx(
     matrixTranspose( tmp1, singularVectors_Left, nChannelsL, nChannelsC );                             /* CxL */
     matrixProduct( tmp2, tmp1, singularVectors_Left, nChannelsC, nChannelsL, nChannelsL, nChannelsC ); /* CxC */
     result = matrixTestIdentity( tmp2, nChannelsC );
+    if ( result >= 1.0 )
+    {
+        problematic = 1;
+    }
 #ifdef MORE_DEBUG
     matrixPrint( tmp2, nChannelsC, nChannelsC, "U\'*U" );
 #endif
@@ -483,6 +506,10 @@ static void svd_accuracy_test_fx(
     matrixTranspose( tmp1, singularVectors_Right, nChannelsC, nChannelsC );                             /* CxC */
     matrixProduct( tmp2, singularVectors_Right, tmp1, nChannelsC, nChannelsC, nChannelsC, nChannelsC ); /* CxC */
     result = matrixTestIdentity( tmp2, nChannelsC );
+    if ( result >= 1.0 )
+    {
+        problematic = 1;
+    }
 #ifdef MORE_DEBUG
     matrixPrint( tmp2, nChannelsC, nChannelsC, "V*V\'" );
 #endif
@@ -493,10 +520,19 @@ static void svd_accuracy_test_fx(
     matrixTranspose( tmp3, singularVectors_Right, nChannelsC, nChannelsC );                                              /* CxC */
     matrixProduct( tmp2, tmp1, tmp3, nChannelsL, dimSingular, nChannelsC, nChannelsC );                                  /* LxC */
     result = matrixDifference( tmp2, InputMatrix, nChannelsL, nChannelsC );
+    if ( result >= 1.0 )
+    {
+        problematic = 1;
+    }
 #ifdef MORE_DEBUG
     matrixPrint( tmp2, nChannelsL, nChannelsC, "U*S*V\'" );
 #endif
     printf( "U * S * V' difference to M is %f\n", result );
+
+    if ( problematic )
+    {
+        matrixPrint( InputMatrix, nChannelsL, nChannelsC, "Problematic Input" );
+    }
 }
 #endif
 
@@ -608,7 +644,7 @@ Word16 svd_fx(
     WHILE( EQ_16( condition, 1 ) );
 
     pop_wmops();
-#ifdef MORE_DEBUG
+#ifdef DEBUG_SVD_PRECISION
     svd_accuracy_test_fx(
         InputMatrix,
         InputMatrix_e,
@@ -1137,7 +1173,7 @@ static void HouseholderReduction_fx(
         FOR( iCh = 0; iCh < nChannelsC; iCh++ )
         {
             singularVectors_Left_fx_e[jCh][iCh] = singularVectors_Left_e;
-            move32();
+            move16();
         }
     }
 #endif
@@ -1164,34 +1200,14 @@ static void HouseholderReduction_fx(
         }
     }
 
-#ifdef FIX_1010_OPT_SINGLE_RESCALE
-    // rescaling block
-    Word16 exp_max = 0;
-    move16();
-    FOR( jCh = 0; jCh < nChannelsL; jCh++ )
-    {
-        FOR( iCh = 0; iCh < nChannelsC; iCh++ )
-        {
-            exp_max = s_max( exp_max, singularVectors_Left_fx_e[jCh][iCh] );
-        }
-    }
-
-    FOR( jCh = 0; jCh < nChannelsL; jCh++ )
-    {
-        FOR( iCh = 0; iCh < nChannelsC; iCh++ )
-        {
-            singularVectors_Left_fx[jCh][iCh] = L_shr_r( singularVectors_Left_fx[jCh][iCh], sub( exp_max, singularVectors_Left_fx_e[jCh][iCh] ) ); /* exp(exp_max) */
-            move32();
-        }
-    }
-    singularVectors_Left_e = exp_max;
-    move16();
-#endif
-
     /* SingularVecotr Accumulation */
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
     singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_e, *secDiag_fx_e, nChannelsC );
-
     singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_e, singularValues_fx_e, nChannelsL, nChannelsC );
+#else
+    singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, *secDiag_fx_e, nChannelsC );
+    singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_fx_e, singularValues_fx_e, nChannelsL, nChannelsC );
+#endif
 
     return;
 }
@@ -1348,7 +1364,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
             sing_exp[jCh] = sub( add( invVal_e, sub( *singularVectors_e, *sig_x_e ) ), temp_e );
             move16();
-            norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e );                                               /* exp(norm_x_e) */
+            norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
 #else
             singularVectors2_e[jCh][currChannel] = sub( add( invVal_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ), temp_e );
             move16();
@@ -1383,8 +1399,8 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
         r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), sing_exp[currChannel], -norm_x, norm_x_e, &r_e );                       /* exp(r_e) */
         singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], sing_exp[currChannel], -( *g ), 0, &sing_exp[currChannel] ); /* sing_exp */
 #else
-        r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e );                                                                          /* exp(r_e) */
-        singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] );                                     /* sing_exp */
+        r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e );                                      /* exp(r_e) */
+        singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* sing_exp */
 #endif
         move32();
 
@@ -1415,7 +1431,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
             f = BASOP_Util_Divide3232_Scale_cadence( norm_x, maxWithSign_fx( r ), &f_e ); /* f_e + (norm_x_e - r_e) */
             f_e = add( f_e, sub( norm_x_e, r_e ) );
 #else
-            f = Mpy_32_32( norm_x, invVal );                                                                                                                                                                                            /* invVal_e + (norm_x_e - r_e) */
+            f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
             f_e = add( invVal_e, sub( norm_x_e, r_e ) );
 #endif
 
@@ -1575,7 +1591,7 @@ static void biDiagonalReductionRight_fx(
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
                 sing_exp[jCh] = add( sub( invVal_e, temp_e ), sub( *singularVectors_e, *sig_x_e ) );
                 move16();
-                norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e );           /* exp(norm_x_e) */
+                norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
 #else
                 singularVectors2_e[currChannel][jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], *sig_x_e ) );
                 move16();
@@ -1669,7 +1685,7 @@ static void biDiagonalReductionRight_fx(
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
                     singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], *singularVectors_e, Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &sing_exp2[iCh][jCh] ); /* exp(sing_exp2) */
 #else
-                    singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] );   /* exp(sing_exp2) */
+                    singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */
 #endif
                     move32();
                 }
@@ -1738,9 +1754,13 @@ static void biDiagonalReductionRight_fx(
  *-------------------------------------------------------------------------*/
 
 static void singularVectorsAccumulationLeft_fx(
-    Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) as Input, Q31 as output */
+    Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* input exp(singularVectors_Left_e), output Q31 */
     Word32 singularValues[MAX_OUTPUT_CHANNELS],         /* exp(singularValues_e) */
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
     Word16 singularVectors_e,
+#else
+    Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
+#endif
     Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
     const Word16 nChannelsL, /* Q0 */
     const Word16 nChannelsC  /* Q0 */
@@ -1750,11 +1770,13 @@ static void singularVectorsAccumulationLeft_fx(
     Word16 nChannels;
     Word32 norm_y, t_jj, t_ii;
     Word16 norm_y_e, t_jj_e, t_ii_e, temp_exp;
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
     Word16 sing_exp2[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS] = { 0 };
     FOR( nCh = 0; nCh < MAX_OUTPUT_CHANNELS; nCh++ )
     {
         set16_fx( sing_exp2[nCh], singularVectors_e, MAX_OUTPUT_CHANNELS );
     }
+#endif
 
     /* Processing */
     nChannels = s_min( nChannelsL, nChannelsC ); /* min(nChannelsL,ChannelsC) Q0*/
@@ -1794,7 +1816,11 @@ static void singularVectorsAccumulationLeft_fx(
                 move16();
                 FOR( k = nCh + 1; k < nChannelsL; k++ ) /* nChannelsL */
                 {
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
                     norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( sing_exp2[k][nCh], sing_exp2[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */
+#else
+                    norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( singularVectors_Left_e[k][nCh], singularVectors_Left_e[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */
+#endif
                 }
 #ifdef FIX_1010_OPT_INV_USING_INVSQRT
                 Word16 temp_e = norm_l( singularVectors_Left[nCh][nCh] );
@@ -1803,11 +1829,19 @@ static void singularVectorsAccumulationLeft_fx(
                 t_jj_e = add( add( temp_exp, temp_e ), sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) );
 #else
                 t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e,
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
                 t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) );
+#else
+                t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), singularVectors_Left_e[nCh][nCh] ) );
+#endif
 #endif
                 FOR( k = nCh; k < nChannelsL; k++ ) /* nChannelsL */
                 {
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
                     singularVectors_Left[k][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[k][iCh], sing_exp2[k][iCh], Mpy_32_32( t_jj, singularVectors_Left[k][nCh] ), add( t_jj_e, sing_exp2[k][nCh] ), &sing_exp2[k][iCh] ); /* exp(sing_exp2) */
+#else
+                    singularVectors_Left[k][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[k][iCh], singularVectors_Left_e[k][iCh], Mpy_32_32( t_jj, singularVectors_Left[k][nCh] ), add( t_jj_e, singularVectors_Left_e[k][nCh] ), &singularVectors_Left_e[k][iCh] ); /* exp(sing_exp2) */
+#endif
                     move32();
                 }
             }
@@ -1816,7 +1850,11 @@ static void singularVectorsAccumulationLeft_fx(
             {
                 singularVectors_Left[iCh][nCh] = Mpy_32_32( singularVectors_Left[iCh][nCh], t_ii ); /* exp(sing_exp2 + t_ii_e) */
                 move32();
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
                 sing_exp2[iCh][nCh] = add( sing_exp2[iCh][nCh], t_ii_e );
+#else
+                singularVectors_Left_e[iCh][nCh] = add( singularVectors_Left_e[iCh][nCh], t_ii_e );
+#endif
                 move16();
             }
         }
@@ -1828,8 +1866,11 @@ static void singularVectorsAccumulationLeft_fx(
                 move32();
             }
         }
-
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
         singularVectors_Left[nCh][nCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[nCh][nCh], sing_exp2[nCh][nCh], ONE_IN_Q30, 1, &sing_exp2[nCh][nCh] ); /* exp(sing_exp2) */
+#else
+        singularVectors_Left[nCh][nCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[nCh][nCh], singularVectors_Left_e[nCh][nCh], ONE_IN_Q30, 1, &singularVectors_Left_e[nCh][nCh] ); /* exp(sing_exp2) */
+#endif
         move32();
     }
     // fclose(fp);
@@ -1837,7 +1878,11 @@ static void singularVectorsAccumulationLeft_fx(
     {
         FOR( iCh = 0; iCh < nChannelsC; iCh++ )
         {
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
             singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], sing_exp2[nCh][iCh] ); /* Q31 */
+#else
+            singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], singularVectors_Left_e[nCh][iCh] ); /* Q31 */
+#endif
             move32();
         }
     }
@@ -1852,10 +1897,14 @@ static void singularVectorsAccumulationLeft_fx(
  *-------------------------------------------------------------------------*/
 
 static void singularVectorsAccumulationRight_fx(
-    Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS],  /* exp(singularVectors_e) */
-    Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
+    Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS],  /* exp(singularVectors_Left_e) */
+    Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* input exp(singularVectors_Left_e), output Q31 */
     Word32 secDiag[MAX_OUTPUT_CHANNELS],                 /* exp(secDiag_e) */
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
     Word16 singularVectors_e,
+#else
+    Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
+#endif
     Word16 secDiag_e,
     const Word16 nChannelsC /* Q0 */
 )
@@ -1888,6 +1937,9 @@ static void singularVectorsAccumulationRight_fx(
 #else
                     ratio_float = BASOP_Util_Divide3232_Scale_cadence( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ); /* exp(temp_exp1) */
                     singularVectors_Right[iCh][nCh] = BASOP_Util_Divide3232_Scale_cadence( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] );               /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */
+#endif
+#ifdef FIX_1010_OPT_SINGLE_RESCALE
+                    temp_exp1 = add( temp_exp1, sub( singularVectors_Left_e[nCh][iCh], singularVectors_Left_e[nCh][nCh + 1] ) );
 #endif
                     move32();
                     sing_right_exp[iCh][nCh] = add( sing_right_exp[iCh][nCh], sub( temp_exp1, secDiag_e ) );
@@ -1904,7 +1956,11 @@ static void singularVectorsAccumulationRight_fx(
 
                     FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
                     {
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
                         norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[nCh][k], singularVectors_Right[k][iCh] ), add( singularVectors_e, sing_right_exp[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */
+#else
+                        norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[nCh][k], singularVectors_Right[k][iCh] ), add( singularVectors_Left_e[nCh][k], sing_right_exp[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */
+#endif
                     }
 
                     FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
-- 
GitLab


From 34dd3d7664b2fc34cbf5799f672d63f850931fed Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Thu, 9 Jan 2025 09:44:05 +0100
Subject: [PATCH 29/41] Clang fix format (trailing comment alignment behaves
 different in my local clang-format version).

---
 lib_dec/ivas_svd_dec.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index e280abb42..bcd3670f3 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -1364,7 +1364,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
             sing_exp[jCh] = sub( add( invVal_e, sub( *singularVectors_e, *sig_x_e ) ), temp_e );
             move16();
-            norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
+            norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e );                                               /* exp(norm_x_e) */
 #else
             singularVectors2_e[jCh][currChannel] = sub( add( invVal_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ), temp_e );
             move16();
@@ -1399,8 +1399,8 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
         r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), sing_exp[currChannel], -norm_x, norm_x_e, &r_e );                       /* exp(r_e) */
         singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], sing_exp[currChannel], -( *g ), 0, &sing_exp[currChannel] ); /* sing_exp */
 #else
-        r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e );                                      /* exp(r_e) */
-        singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* sing_exp */
+        r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e );                                                                          /* exp(r_e) */
+        singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] );                                     /* sing_exp */
 #endif
         move32();
 
@@ -1431,7 +1431,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
             f = BASOP_Util_Divide3232_Scale_cadence( norm_x, maxWithSign_fx( r ), &f_e ); /* f_e + (norm_x_e - r_e) */
             f_e = add( f_e, sub( norm_x_e, r_e ) );
 #else
-            f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
+            f = Mpy_32_32( norm_x, invVal );                                                                                                                                                                                            /* invVal_e + (norm_x_e - r_e) */
             f_e = add( invVal_e, sub( norm_x_e, r_e ) );
 #endif
 
@@ -1591,7 +1591,7 @@ static void biDiagonalReductionRight_fx(
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
                 sing_exp[jCh] = add( sub( invVal_e, temp_e ), sub( *singularVectors_e, *sig_x_e ) );
                 move16();
-                norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
+                norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e );           /* exp(norm_x_e) */
 #else
                 singularVectors2_e[currChannel][jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], *sig_x_e ) );
                 move16();
@@ -1685,7 +1685,7 @@ static void biDiagonalReductionRight_fx(
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
                     singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], *singularVectors_e, Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &sing_exp2[iCh][jCh] ); /* exp(sing_exp2) */
 #else
-                    singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */
+                    singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] );   /* exp(sing_exp2) */
 #endif
                     move32();
                 }
@@ -1828,7 +1828,7 @@ static void singularVectorsAccumulationLeft_fx(
                 t_jj = Mpy_32_32( Mpy_32_32( t_ii, norm_y ), t_jj );
                 t_jj_e = add( add( temp_exp, temp_e ), sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) );
 #else
-                t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e,
+                t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp );                                                                                 // t_ii_e+norm_y_e-*singularVectors_e,
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
                 t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) );
 #else
@@ -1881,7 +1881,7 @@ static void singularVectorsAccumulationLeft_fx(
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
             singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], sing_exp2[nCh][iCh] ); /* Q31 */
 #else
-            singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], singularVectors_Left_e[nCh][iCh] ); /* Q31 */
+            singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], singularVectors_Left_e[nCh][iCh] );                                                              /* Q31 */
 #endif
             move32();
         }
@@ -1935,8 +1935,8 @@ static void singularVectorsAccumulationRight_fx(
                     ratio_float = L_deposit_h( BASOP_Util_Divide3232_Scale( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ) ); /* exp(temp_exp1) */
                     singularVectors_Right[iCh][nCh] = L_deposit_h( BASOP_Util_Divide3232_Scale( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] ) );               /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */
 #else
-                    ratio_float = BASOP_Util_Divide3232_Scale_cadence( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ); /* exp(temp_exp1) */
-                    singularVectors_Right[iCh][nCh] = BASOP_Util_Divide3232_Scale_cadence( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] );               /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */
+                    ratio_float = BASOP_Util_Divide3232_Scale_cadence( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 );                                                         /* exp(temp_exp1) */
+                    singularVectors_Right[iCh][nCh] = BASOP_Util_Divide3232_Scale_cadence( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] );                                                                       /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */
 #endif
 #ifdef FIX_1010_OPT_SINGLE_RESCALE
                     temp_exp1 = add( temp_exp1, sub( singularVectors_Left_e[nCh][iCh], singularVectors_Left_e[nCh][nCh + 1] ) );
-- 
GitLab


From f4471291a58f8694105cf3dd111376d3f7bffc5a Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Thu, 9 Jan 2025 09:58:45 +0100
Subject: [PATCH 30/41] Deactivate debug code.

---
 lib_dec/ivas_svd_dec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index bcd3670f3..72caa9b1b 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -292,7 +292,7 @@ void svdMat2mat_fx(
 }
 
 #ifndef DEBUG_SVD_TEST
-#define DEBUG_SVD_PRECISION
+// #define DEBUG_SVD_PRECISION
 #endif
 // #define MORE_DEBUG
 
-- 
GitLab


From 80f7175e36abdf2564fe6f919360c8350a698d7a Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Thu, 9 Jan 2025 14:58:23 +0100
Subject: [PATCH 31/41] Fix compile error for FIX_1010_OPT_INV_USING_INVSQRT,
 but keep it disabled because of regressions. Reduce threshold for SVD problem
 debug code (disabled by default).

---
 lib_dec/ivas_svd_dec.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index 72caa9b1b..8da6e2f4c 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -441,6 +441,7 @@ static float matrixTestIdentity(
     return r;
 }
 
+#define PROBLEMATIC_THRESHOLD 0.5f
 static void svd_accuracy_test_fx(
     Word32 InputMatrixFx[][MAX_OUTPUT_CHANNELS], /* i  : matrix to be decomposed (M)            InputMatrix_e*/
     Word16 InputMatrixFx_e,
@@ -493,7 +494,7 @@ static void svd_accuracy_test_fx(
     matrixTranspose( tmp1, singularVectors_Left, nChannelsL, nChannelsC );                             /* CxL */
     matrixProduct( tmp2, tmp1, singularVectors_Left, nChannelsC, nChannelsL, nChannelsL, nChannelsC ); /* CxC */
     result = matrixTestIdentity( tmp2, nChannelsC );
-    if ( result >= 1.0 )
+    if ( result >= PROBLEMATIC_THRESHOLD )
     {
         problematic = 1;
     }
@@ -506,7 +507,7 @@ static void svd_accuracy_test_fx(
     matrixTranspose( tmp1, singularVectors_Right, nChannelsC, nChannelsC );                             /* CxC */
     matrixProduct( tmp2, singularVectors_Right, tmp1, nChannelsC, nChannelsC, nChannelsC, nChannelsC ); /* CxC */
     result = matrixTestIdentity( tmp2, nChannelsC );
-    if ( result >= 1.0 )
+    if ( result >= PROBLEMATIC_THRESHOLD )
     {
         problematic = 1;
     }
@@ -520,7 +521,7 @@ static void svd_accuracy_test_fx(
     matrixTranspose( tmp3, singularVectors_Right, nChannelsC, nChannelsC );                                              /* CxC */
     matrixProduct( tmp2, tmp1, tmp3, nChannelsL, dimSingular, nChannelsC, nChannelsC );                                  /* LxC */
     result = matrixDifference( tmp2, InputMatrix, nChannelsL, nChannelsC );
-    if ( result >= 1.0 )
+    if ( result >= PROBLEMATIC_THRESHOLD )
     {
         problematic = 1;
     }
@@ -1826,7 +1827,11 @@ static void singularVectorsAccumulationLeft_fx(
                 Word16 temp_e = norm_l( singularVectors_Left[nCh][nCh] );
                 t_jj = BASOP_Util_Inv32( maxWithSign_fx( L_shl( singularVectors_Left[nCh][nCh], temp_e ) ), &temp_exp );
                 t_jj = Mpy_32_32( Mpy_32_32( t_ii, norm_y ), t_jj );
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
                 t_jj_e = add( add( temp_exp, temp_e ), sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) );
+#else
+                t_jj_e = add( add( temp_exp, temp_e ), sub( add( t_ii_e, norm_y_e ), singularVectors_Left_e[nCh][nCh] ) );
+#endif
 #else
                 t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp );                                                                                 // t_ii_e+norm_y_e-*singularVectors_e,
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
-- 
GitLab


From 5b946e69d566e713b62b1448083812b85ac9bacc Mon Sep 17 00:00:00 2001
From: Sandesh Venkatesh <sandesh.venkatesh@ittiam.com>
Date: Thu, 9 Jan 2025 15:14:05 +0530
Subject: [PATCH 32/41] Bug fix in StableHighPitchDetect_ivas_fx, LTV crash
 fixes for MASA in original and -10dB scaled inputs

---
 lib_enc/ivas_stereo_icbwe_enc.c | 98 +++++++++++++++------------------
 lib_enc/pitch_ol2.c             |  2 +-
 2 files changed, 44 insertions(+), 56 deletions(-)

diff --git a/lib_enc/ivas_stereo_icbwe_enc.c b/lib_enc/ivas_stereo_icbwe_enc.c
index 90d6da69e..aed080b12 100644
--- a/lib_enc/ivas_stereo_icbwe_enc.c
+++ b/lib_enc/ivas_stereo_icbwe_enc.c
@@ -115,11 +115,12 @@ static Word16 ic_bwe_enc_specMapping_ivas_fx(
     Word16 Txx1_fx = 0, Txx2_fx = 0, Txx3_fx = 0, T_desired_fx = 0;
     Word16 Txx1_e = 0, Txx2_e = 0, Txx3_e = 0, T_desired_e = 0;
     Word16 T_nonref_target_fx, temp_fx;
+    Word32 temp00_fx, temp11_fx;
+    Word16 temp00_exp, temp11_exp;
     Word32 temp0_fx, temp1_fx, temp2_fx, temp3_fx;
     Word16 a_fx, b_fx, c_fx, a_e, b_e, c_e;
     Word16 u_fx, u1_fx, u2_fx, u_e = 0, u1_e, u2_e;
     Word16 temp0_exp, temp1_exp, temp2_exp, temp3_exp, exp, T_nonref_target_e;
-    Word16 exp_buf[6];
 
     move16();
     move16();
@@ -133,35 +134,8 @@ static Word16 ic_bwe_enc_specMapping_ivas_fx(
 
     /* Calculate rxx(1)/rxx(0) of the non ref target */
 
-    temp0_fx = dotp_fixed_ivas_fx( shb_frame_target_fx, shb_frame_target_e, shb_frame_target_fx, shb_frame_target_e, L_FRAME16k - 1, &temp0_exp );     /* Q31-temp0_exp */
-    temp1_fx = dotp_fixed_ivas_fx( shb_frame_target_fx, shb_frame_target_e, shb_frame_target_fx + 1, shb_frame_target_e, L_FRAME16k - 1, &temp1_exp ); /* Q31-temp1_exp */
-
-    /* Smoothing */
-    temp0_fx = L_shr( temp0_fx, 1 );
-    temp1_fx = L_shr( temp1_fx, 1 );
-
-
-    memShbSpecXcorr_fx[0] = temp0_fx;                                 // tem0_exp
-    memShbSpecXcorr_fx[1] = L_shr( temp1_fx, temp0_exp - temp1_exp ); // temp0_exp
-    exp = sub( temp0_exp, temp1_exp );
-    exp_buf[0] = exp;
-    exp_buf[1] = exp;
-    move32();
-    move32();
-    move16();
-    move16();
-
-    IF( temp0_fx != 0 )
-    {
-        T_nonref_target_fx = BASOP_Util_Divide3232_Scale( temp1_fx, temp0_fx, &T_nonref_target_e ); // exp
-    }
-    ELSE
-    {
-        T_nonref_target_fx = 0;
-        T_nonref_target_e = 31;
-        move32();
-        move16();
-    }
+    temp00_fx = dotp_fixed_ivas_fx( shb_frame_target_fx, shb_frame_target_e, shb_frame_target_fx, shb_frame_target_e, L_FRAME16k - 1, &temp00_exp );     /* Q31-temp0_exp */
+    temp11_fx = dotp_fixed_ivas_fx( shb_frame_target_fx, shb_frame_target_e, shb_frame_target_fx + 1, shb_frame_target_e, L_FRAME16k - 1, &temp11_exp ); /* Q31-temp1_exp */
 
     /* Calculate rxx(1)/rxx(0) of the non ref synth */
     temp0_fx = dotp_fixed_ivas_fx( shb_synth_nonref_fx, shb_synth_nonref_e, shb_synth_nonref_fx, shb_synth_nonref_e, L_FRAME16k - 3, &temp0_exp );     /* Q31-temp0_exp */
@@ -169,41 +143,55 @@ static Word16 ic_bwe_enc_specMapping_ivas_fx(
     temp2_fx = dotp_fixed_ivas_fx( shb_synth_nonref_fx, shb_synth_nonref_e, shb_synth_nonref_fx + 2, shb_synth_nonref_e, L_FRAME16k - 3, &temp2_exp ); /* Q31-temp2_exp */
     temp3_fx = dotp_fixed_ivas_fx( shb_synth_nonref_fx, shb_synth_nonref_e, shb_synth_nonref_fx + 3, shb_synth_nonref_e, L_FRAME16k - 3, &temp3_exp ); /* Q31-temp3_exp */
 
+    exp = s_max( *memShbSpecXcorr_e, s_max( s_max( s_max( temp00_exp, temp11_exp ), s_max( temp0_exp, temp1_exp ) ), s_max( temp2_exp, temp3_exp ) ) );
+
+    temp00_fx = L_shr( temp00_fx, sub( exp, temp00_exp ) );
+    temp11_fx = L_shr( temp11_fx, sub( exp, temp11_exp ) );
+    temp0_fx = L_shr( temp0_fx, sub( exp, temp0_exp ) );
+    temp1_fx = L_shr( temp1_fx, sub( exp, temp1_exp ) );
+    temp2_fx = L_shr( temp2_fx, sub( exp, temp2_exp ) );
+    temp3_fx = L_shr( temp3_fx, sub( exp, temp3_exp ) );
+
     /* Smoothing */
-    temp0_fx = L_shr( temp0_fx, 1 );
-    temp1_fx = L_shr( temp1_fx, 1 );
-    temp2_fx = L_shr( temp2_fx, 1 );
-    temp3_fx = L_shr( temp3_fx, 1 );
-
-    exp_buf[2] = temp0_exp;
-    exp_buf[3] = temp1_exp;
-    exp_buf[4] = temp2_exp;
-    exp_buf[5] = temp3_exp;
-    exp = exp_buf[0];
-    move16();
-    move16();
-    move16();
-    move16();
-    move16();
+
     FOR( Word16 i = 0; i < 6; i++ )
     {
-        IF( LT_16( exp, exp_buf[i] ) )
-        {
-            exp = exp_buf[i];
-            move16();
-        }
+        memShbSpecXcorr_fx[i] = L_shl( memShbSpecXcorr_fx[i], sub( *memShbSpecXcorr_e, exp ) );
+        move32();
     }
-    memShbSpecXcorr_fx[2] = L_shr( temp0_fx, sub( exp, temp0_exp ) ); /* Q31-exp */
-    memShbSpecXcorr_fx[3] = L_shr( temp1_fx, sub( exp, temp1_exp ) ); /* Q31-exp */
-    memShbSpecXcorr_fx[4] = L_shr( temp2_fx, sub( exp, temp2_exp ) ); /* Q31-exp */
-    memShbSpecXcorr_fx[5] = L_shr( temp3_fx, sub( exp, temp3_exp ) ); /* Q31-exp */
     *memShbSpecXcorr_e = exp;
+    move16();
+
+    temp00_fx = L_add( L_shr( temp00_fx, 1 ), L_shr( memShbSpecXcorr_fx[0], 1 ) );
+    temp11_fx = L_add( L_shr( temp11_fx, 1 ), L_shr( memShbSpecXcorr_fx[1], 1 ) );
+    temp0_fx = L_add( L_shr( temp0_fx, 1 ), L_shr( memShbSpecXcorr_fx[2], 1 ) );
+    temp1_fx = L_add( L_shr( temp1_fx, 1 ), L_shr( memShbSpecXcorr_fx[3], 1 ) );
+    temp2_fx = L_add( L_shr( temp2_fx, 1 ), L_shr( memShbSpecXcorr_fx[4], 1 ) );
+    temp3_fx = L_add( L_shr( temp3_fx, 1 ), L_shr( memShbSpecXcorr_fx[5], 1 ) );
 
+    T_nonref_target_fx = 0;
     move32();
+    T_nonref_target_e = 31;
+    move16();
+
+    IF( temp00_fx != 0 )
+    {
+        T_nonref_target_fx = BASOP_Util_Divide3232_Scale( temp11_fx, temp00_fx, &T_nonref_target_e );
+    }
+
+
+    memShbSpecXcorr_fx[0] = temp00_fx;
     move32();
+    memShbSpecXcorr_fx[1] = temp11_fx;
     move32();
+    memShbSpecXcorr_fx[2] = temp0_fx; /* Q31-exp */
+    move32();
+    memShbSpecXcorr_fx[3] = temp1_fx; /* Q31-exp */
+    move32();
+    memShbSpecXcorr_fx[4] = temp2_fx; /* Q31-exp */
+    move32();
+    memShbSpecXcorr_fx[5] = temp3_fx; /* Q31-exp */
     move32();
-    move16();
 
 
     IF( temp0_fx != 0 )
diff --git a/lib_enc/pitch_ol2.c b/lib_enc/pitch_ol2.c
index 4e4277eef..f3087f1b9 100644
--- a/lib_enc/pitch_ol2.c
+++ b/lib_enc/pitch_ol2.c
@@ -285,7 +285,7 @@ void StableHighPitchDetect_ivas_fx(
     *flag_spitch = 0;
     move16();
     IF( ( EQ_16( localVAD, 1 ) ) && ( EQ_16( *predecision_flag, 1 ) ) &&
-        ( GT_16( *voicing0_sm, 16384 ) ) && ( GT_16( *voicing0_sm, mult_r( *voicing_sm, 21299 ) ) ) )
+        ( GT_16( *voicing0_sm, 21299 ) ) && ( GT_16( *voicing0_sm, mult_r( *voicing_sm, 22938 ) ) ) )
     {
         *flag_spitch = 1;
         move16();
-- 
GitLab


From 0d35d20f17da5b332bf1cf8ed2f8484b2548492c Mon Sep 17 00:00:00 2001
From: Sandesh Venkatesh <sandesh.venkatesh@ittiam.com>
Date: Thu, 9 Jan 2025 15:20:03 +0530
Subject: [PATCH 33/41] Fix for 3GPP issue 1148: crash at 32kbps

[x] link #1148
[x] When an unvoiced frame is coded at 32 kbps, bwe_exc_fx is not defined and hence should not be updated. Check added for update to happen.
---
 lib_com/options.h   | 1 +
 lib_enc/enc_uv_fx.c | 7 +++++++
 2 files changed, 8 insertions(+)

diff --git a/lib_com/options.h b/lib_com/options.h
index 5aa205a23..343515281 100644
--- a/lib_com/options.h
+++ b/lib_com/options.h
@@ -107,4 +107,5 @@
 #define FIX_ISSUE_1122                          /* Ittiam: Fix issue 1122: corrected incorrect scaling of a buffer leading to incorrect metadata bits */
 #define FIX_1132_STACK_CORRUPTION               /* Stack corruption issue due of extending index access*/
 #define FIX_ISSUE_1092                          /* Ittiam: Fix for Issue 1092: BASOP asserts in stereo fx encoder for selection test inputs*/
+#define FIX_ISSUE_1148
 #endif
diff --git a/lib_enc/enc_uv_fx.c b/lib_enc/enc_uv_fx.c
index d2d8c9570..cd147c6ce 100644
--- a/lib_enc/enc_uv_fx.c
+++ b/lib_enc/enc_uv_fx.c
@@ -480,7 +480,14 @@ void encod_unvoiced_ivas_fx(
         voice_factors_fx[i_subfr / L_SUBFR] = 0;
         move16();
 
+#ifdef FIX_ISSUE_1148
+        if ( st_fx->hBWE_TD != NULL )
+        {
+            interp_code_5over2_fx( &exc_fx[i_subfr], &bwe_exc_fx[i_subfr * HIBND_ACB_L_FAC], L_SUBFR );
+        }
+#else
         interp_code_5over2_fx( &exc_fx[i_subfr], &bwe_exc_fx[i_subfr * HIBND_ACB_L_FAC], L_SUBFR );
+#endif
 
         /*-----------------------------------------------------------------*
          * Synthesize speech to update mem_syn[].
-- 
GitLab


From cf45fa0fac7bb2516690511c940af9064af9c88f Mon Sep 17 00:00:00 2001
From: Jan Kiene <jan.kiene@iis.fraunhofer.de>
Date: Thu, 9 Jan 2025 08:54:36 +0100
Subject: [PATCH 34/41] get ref complexity numbers from ivas-float-update
 branch

---
 .gitlab-ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index c2ca28465..e1fde534c 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1215,8 +1215,8 @@ voip-be-on-merge-request:
   - rm artifacts.zip
   - rm -rf $public_dir
 
-  ### 1.5.part: get the corresponding measurement from ivas-float-update-cmplx
-  - job_id=$(python3 ci/get_id_of_last_job_occurence.py ivas-float-update-cmplx $CI_JOB_NAME $CI_PROJECT_ID)
+  ### 1.5.part: get the corresponding measurement from ivas-float-update
+  - job_id=$(python3 ci/get_id_of_last_job_occurence.py ivas-float-update $CI_JOB_NAME $CI_PROJECT_ID)
   - echo $job_id
   - curl --request GET "https://forge.3gpp.org/rep/api/v4/projects/$CI_PROJECT_ID/jobs/$job_id/artifacts" --output artifacts_ref.zip
   - unzip -j artifacts_ref.zip "*latest_WMOPS.csv"
-- 
GitLab


From 0f364fcbb5045165f79400e4eb8dda8e17904b42 Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Wed, 15 Jan 2025 12:31:29 +0100
Subject: [PATCH 35/41] Add FIX_1010_OPT_NORM_NOSAT (do not saturate
 intermediate results) and FIX_1010_OPT_SEC_SINGLE_RESCALE (do not rescale
 secDiag data repeatedly). Improves accuracy and reduces workload but makes
 dependency on dynamic scaling bigger.

---
 lib_dec/ivas_svd_dec.c | 174 ++++++++++++++++++++++++++++++++---------
 1 file changed, 135 insertions(+), 39 deletions(-)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index 8da6e2f4c..647b203af 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -58,6 +58,8 @@
 #define FIX_1010_OPT_GIVENS
 #define FIX_1010_OPT_GIVENS_INV
 // #define FIX_1010_OPT_GIVENS_AMAX_BMIN
+#define FIX_1010_OPT_NORM_NOSAT
+#define FIX_1010_OPT_SEC_SINGLE_RESCALE
 #endif
 
 /*-----------------------------------------------------------------------*
@@ -135,7 +137,11 @@ static void singularVectorsAccumulationRight_fx(
 #else
     Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
 #endif
+#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
     Word16 secDiag_e,
+#else
+    Word16 *secDiag_e,
+#endif
     const Word16 nChannelsC /* Q0 */
 );
 
@@ -560,7 +566,11 @@ Word16 svd_fx(
     Word16 errorMessage, condition;
     // int16_t max_length = ((nChannelsL > nChannelsC) ? nChannelsL : nChannelsC);
     Word32 secDiag_fx[MAX_OUTPUT_CHANNELS];
+#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
     Word16 secDiag_fx_e = 0;
+#else
+    Word16 secDiag_fx_e[MAX_OUTPUT_CHANNELS];
+#endif
     move16();
     Word32 eps_x_fx = 0, temp_fx;
     move16();
@@ -569,7 +579,10 @@ Word16 svd_fx(
     Word16 temp_fx_e;
     push_wmops( "svd_fx" );
 
+#if 1
     set32_fx( secDiag_fx, 0, MAX_OUTPUT_CHANNELS );
+    set16_fx( secDiag_fx_e, 0, MAX_OUTPUT_CHANNELS );
+#endif
 
     /* Collecting Values */
     FOR( iCh = 0; iCh < nChannelsL; iCh++ )
@@ -584,16 +597,22 @@ Word16 svd_fx(
     set16_fx( singularValues_fx_e, 0, MAX_OUTPUT_CHANNELS );
 
     /* Householder reduction */
+#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
     HouseholderReduction_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, InputMatrix_e, singularValues_fx_e, &secDiag_fx_e, nChannelsL, nChannelsC, &eps_x_fx, &eps_x_fx_e );
-
+#else
+    HouseholderReduction_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, InputMatrix_e, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, &eps_x_fx, &eps_x_fx_e );
+#endif
     /* Set extremely small values to zero if needed */
     // flushToZeroArray(singularValues, max_length);
     // flushToZeroMat(singularVectors_Left, nChannelsL, nChannelsL);
     // flushToZeroMat(singularVectors_Right, nChannelsC, nChannelsC);
 
     /* BidagonalDiagonalisation */
+#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
     errorMessage = BidagonalDiagonalisation_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, singularValues_fx_e, &secDiag_fx_e, nChannelsL, nChannelsC, eps_x_fx, eps_x_fx_e ); /* Q0 */
-
+#else
+    errorMessage = BidagonalDiagonalisation_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, eps_x_fx, eps_x_fx_e ); /* Q0 */
+#endif
     /* Sort the singular values descending order */
     lengthSingularValues = s_min( nChannelsL, nChannelsC ); /* Q0 */
 
@@ -676,11 +695,15 @@ static Word16 BidagonalDiagonalisation_fx(
     Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V)		   singularValues_fx_e*/
     Word32 secDiag_fx[MAX_OUTPUT_CHANNELS],                 /* i/o:                                           secDiag_fx_e*/
     Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],        /* i/o: singular values vector (S)							  */
-    Word16 *secDiag_fx_e,                                   /* i/o:														  */
-    const Word16 nChannelsL,                                /* i  : number of rows in the matrix to be decomposed		Q0*/
-    const Word16 nChannelsC,                                /* i  : number of columns in the matrix to be decomposed	Q0*/
-    const Word32 eps_x,                                     /* i  :                                                eps_x_e*/
-    const Word16 eps_x_e                                    /* i  :                                                       */
+#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
+    Word16 *secDiag_fx_e, /* i/o:														  */
+#else
+    Word16 *secDiag_new_e, /* i/o:														  */
+#endif
+    const Word16 nChannelsL, /* i  : number of rows in the matrix to be decomposed		Q0*/
+    const Word16 nChannelsC, /* i  : number of columns in the matrix to be decomposed	Q0*/
+    const Word32 eps_x,      /* i  :                                                eps_x_e*/
+    const Word16 eps_x_e     /* i  :                                                       */
 )
 {
     Word16 kCh, nCh, iCh, jCh, split;
@@ -690,6 +713,9 @@ static Word16 BidagonalDiagonalisation_fx(
     move16();
     move16();
     Word16 temp_exp;
+#ifdef FIX_1010_OPT_NORM_NOSAT
+    Word16 temp_exp2;
+#endif
     Word32 g = 0;
     move16();
     Word16 g_e = 0;
@@ -700,9 +726,12 @@ static Word16 BidagonalDiagonalisation_fx(
 #ifdef FIX_1010_OPT_GIVENS_INV
     Word32 temp;
 #endif
-    Word16 singularValues_new_e[MAX_OUTPUT_CHANNELS], secDiag_new_e[MAX_OUTPUT_CHANNELS];
-    Copy( singularValues_fx_e, singularValues_new_e, MAX_OUTPUT_CHANNELS );
+    Word16 singularValues_new_e[MAX_OUTPUT_CHANNELS];
+#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
+    Word16 secDiag_new_e[MAX_OUTPUT_CHANNELS];
     set16_fx( secDiag_new_e, *secDiag_fx_e, MAX_OUTPUT_CHANNELS );
+#endif
+    Copy( singularValues_fx_e, singularValues_new_e, MAX_OUTPUT_CHANNELS );
 
     FOR( iCh = nChannelsC - 1; iCh >= 0; iCh-- ) /* nChannelsC */
     {
@@ -779,12 +808,18 @@ static Word16 BidagonalDiagonalisation_fx(
                     c = BASOP_Util_Divide3232_Scale_cadence( c, maxWithSign_fx( singularValues_fx[kCh] ), &temp_exp );                                   /* exp(temp_exp + (c_e - singularValues_new_e)) */
                     c_e = add( temp_exp, sub( c_e, singularValues_new_e[kCh] ) );
 #endif
+#ifndef FIX_1010_OPT_NORM_NOSAT
                     IF( c_e > 0 )
                     {
                         c = L_shl_sat( c, c_e ); // Q31
                         c_e = 0;
                         move16();
                     }
+#else
+                    temp_exp2 = norm_l( c );
+                    c = L_shl( c, temp_exp2 );
+                    c_e = sub( c_e, temp_exp2 );
+#endif
 #ifdef FIX_1010_OPT_GIVENS_INV
                     s = Mpy_32_32( -g, temp );
                     s_e = add( g_e, temp_exp );
@@ -792,13 +827,18 @@ static Word16 BidagonalDiagonalisation_fx(
                     s = BASOP_Util_Divide3232_Scale_cadence( -g, maxWithSign_fx( singularValues_fx[kCh] ), &temp_exp ); /* exp(temp_exp + (g_e - singularValues_new_e))*/
                     s_e = add( temp_exp, sub( g_e, singularValues_new_e[kCh] ) );
 #endif
+#ifndef FIX_1010_OPT_NORM_NOSAT
                     IF( s_e > 0 )
                     {
                         s = L_shl_sat( s, s_e ); // Q31
                         s_e = 0;
                         move16();
                     }
-
+#else
+                    temp_exp2 = norm_l( s );
+                    s = L_shl( s, temp_exp2 );
+                    s_e = sub( s_e, temp_exp2 );
+#endif
                     ApplyRotation_fx( singularVectors_Left_fx, c, c_e, s, s_e, 0, x11_e, 0, x12_e, &f1, &f1_e, &f2, &f2_e, kCh, split, nChannelsL ); /* nChannelsL */
                 }
             }
@@ -849,6 +889,7 @@ static Word16 BidagonalDiagonalisation_fx(
     // rescaling block
     Copy( singularValues_new_e, singularValues_fx_e, MAX_OUTPUT_CHANNELS );
 
+#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
     Word16 max_exp = -31;
     move16();
     FOR( iCh = 0; iCh < nChannelsC; iCh++ )
@@ -865,6 +906,7 @@ static Word16 BidagonalDiagonalisation_fx(
         secDiag_fx[iCh] = L_shr_r( secDiag_fx[iCh], sub( *secDiag_fx_e, secDiag_new_e[iCh] ) ); /* exp(secDiag_fx_e) */
         move32();
     }
+#endif
 
     return ( error );
 }
@@ -891,6 +933,9 @@ static void ApplyQRTransform_fx(
 #ifdef FIX_1010_OPT_GIVENS_INV
     Word32 temp;
     Word16 temp_e;
+#endif
+#ifdef FIX_1010_OPT_NORM_NOSAT
+    Word16 temp_norm_e;
 #endif
     Word16 ch, split;
     Word32 d = 0, g = 0, r = 0, x_ii = 0, x_split = 0, x_kk = 0, mu = 0, aux = 0;
@@ -1004,12 +1049,18 @@ static void ApplyQRTransform_fx(
         c = BASOP_Util_Divide3232_Scale_cadence( d, maxWithSign_fx( secDiag[ch] ), &c_e ); /* exp(c_e + (d_e + secDiag_e)) */
         c_e = add( c_e, sub( d_e, secDiag_e[ch] ) );
 #endif
+#ifndef FIX_1010_OPT_NORM_NOSAT
         IF( c_e > 0 )
         {
             c = L_shl_sat( c, c_e ); // Q31
             c_e = 0;
             move16();
         }
+#else
+        temp_norm_e = norm_l( c );
+        c = L_shl( c, temp_norm_e );
+        c_e = sub( c_e, temp_norm_e );
+#endif
 #ifdef FIX_1010_OPT_GIVENS_INV
         s = Mpy_32_32( r, temp );
         s_e = add( r_e, temp_e );
@@ -1017,13 +1068,18 @@ static void ApplyQRTransform_fx(
         s = BASOP_Util_Divide3232_Scale_cadence( r, maxWithSign_fx( secDiag[ch] ), &s_e ); /* exp(s_e + (r_e - sec_Diag_e))*/
         s_e = add( s_e, sub( r_e, secDiag_e[ch] ) );
 #endif
+#ifndef FIX_1010_OPT_NORM_NOSAT
         IF( s_e > 0 )
         {
             s = L_shl_sat( s, s_e ); // Q31
             s_e = 0;
             move16();
         }
-
+#else
+        temp_norm_e = norm_l( s );
+        s = L_shl( s, temp_norm_e );
+        s_e = sub( s_e, temp_norm_e );
+#endif
         r = Mpy_32_32( s, singularValues[ch + 1] ); /* exp(r_e + secDiag_e) */
         r_e = add( s_e, singularValues_e[ch + 1] );
         x_split = Mpy_32_32( c, singularValues[ch + 1] ); /* exp(c_e + secDiag_e) */
@@ -1052,21 +1108,33 @@ static void ApplyQRTransform_fx(
 
             c = Mpy_32_32( d, aux ); /* exp(d_e + aux_e) */
             c_e = add( d_e, aux_e );
+#ifndef FIX_1010_OPT_NORM_NOSAT
             IF( c_e > 0 )
             {
                 c = L_shl_sat( c, c_e ); // Q31
                 c_e = 0;
                 move16();
             }
+#else
+            temp_norm_e = norm_l( c );
+            c = L_shl( c, temp_norm_e );
+            c_e = sub( c_e, temp_norm_e );
+#endif
 
             s = Mpy_32_32( r, aux ); /* exp(r_e + aux_e) */
             s_e = add( r_e, aux_e );
+#ifndef FIX_1010_OPT_NORM_NOSAT
             IF( s_e > 0 )
             {
                 s = L_shl_sat( s, s_e ); // Q31
                 s_e = 0;
                 move16();
             }
+#else
+            temp_norm_e = norm_l( s );
+            s = L_shl( s, temp_norm_e );
+            s_e = sub( s_e, temp_norm_e );
+#endif
         }
 
         // ApplyRotation(singularVectors_Left, c, s, g, x_split, &d, &x_ii, ch + 1, ch, nChannelsL);
@@ -1191,7 +1259,11 @@ static void HouseholderReduction_fx(
 #endif
 
         Word16 L_temp_e;
+#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
         Word32 L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), *secDiag_fx_e, &L_temp_e ); /* exp(L_temp_e) */
+#else
+        Word32 L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), secDiag_fx_e[nCh], &L_temp_e ); /* exp(L_temp_e) */
+#endif
         IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( L_temp, L_temp_e, *eps_x_fx, *eps_x_fx_e ), 1 ) )
         {
             *eps_x_fx = L_temp; /* exp(L_temp_e) */
@@ -1206,7 +1278,11 @@ static void HouseholderReduction_fx(
     singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_e, *secDiag_fx_e, nChannelsC );
     singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_e, singularValues_fx_e, nChannelsL, nChannelsC );
 #else
+#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
     singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, *secDiag_fx_e, nChannelsC );
+#else
+    singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsC );
+#endif
     singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_fx_e, singularValues_fx_e, nChannelsL, nChannelsC );
 #endif
 
@@ -1289,6 +1365,7 @@ static void biDiagonalReductionLeft_fx(
 
     secDiag[currChannel] = Mpy_32_32( *sig_x, *g ); /* exp(sig_x_e) */
     move32();
+#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
     // rescaling block
     IF( GT_16( *sig_x_e, *secDiag_e ) )
     {
@@ -1306,7 +1383,10 @@ ELSE IF( LT_16( *sig_x_e, *secDiag_e ) )
     secDiag[currChannel] = L_shr_r( secDiag[currChannel], sub( *secDiag_e, *sig_x_e ) ); /* exp(secDiag_e) */
     move32();
 }
-
+#else
+    secDiag_e[currChannel] = *sig_x_e;
+    move16();
+#endif
 /* Setting values to 0 */
 ( *sig_x ) = 0;
 move32();
@@ -1323,7 +1403,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
         ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[jCh][currChannel] ), *singularVectors_e, sig_x_e ); /* exp(sig_x_e) */
 #else
-        ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[jCh][currChannel] ), singularVectors2_e[jCh][currChannel], sig_x_e ); /* exp(sig_x_e) */
+            ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[jCh][currChannel] ), singularVectors2_e[jCh][currChannel], sig_x_e ); /* exp(sig_x_e) */
 #endif
     }
 
@@ -1359,17 +1439,17 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
             norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( singularVectors2_e[jCh][currChannel], 1 ), &norm_x_e ); /* exp(norm_x_e) */
 #endif
 #else
-            Word16 temp_e = norm_l( singularVectors[jCh][currChannel] );
-            singularVectors[jCh][currChannel] = Mpy_32_32( L_shl( singularVectors[jCh][currChannel], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
-            move32();
+                Word16 temp_e = norm_l( singularVectors[jCh][currChannel] );
+                singularVectors[jCh][currChannel] = Mpy_32_32( L_shl( singularVectors[jCh][currChannel], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
+                move32();
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
-            sing_exp[jCh] = sub( add( invVal_e, sub( *singularVectors_e, *sig_x_e ) ), temp_e );
-            move16();
-            norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e );                                               /* exp(norm_x_e) */
+                sing_exp[jCh] = sub( add( invVal_e, sub( *singularVectors_e, *sig_x_e ) ), temp_e );
+                move16();
+                norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
 #else
-            singularVectors2_e[jCh][currChannel] = sub( add( invVal_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ), temp_e );
-            move16();
-            norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( singularVectors2_e[jCh][currChannel], 1 ), &norm_x_e ); /* exp(norm_x_e) */
+                singularVectors2_e[jCh][currChannel] = sub( add( invVal_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ), temp_e );
+                move16();
+                norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( singularVectors2_e[jCh][currChannel], 1 ), &norm_x_e ); /* exp(norm_x_e) */
 #endif
 #endif
         }
@@ -1400,8 +1480,8 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
         r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), sing_exp[currChannel], -norm_x, norm_x_e, &r_e );                       /* exp(r_e) */
         singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], sing_exp[currChannel], -( *g ), 0, &sing_exp[currChannel] ); /* sing_exp */
 #else
-        r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e );                                                                          /* exp(r_e) */
-        singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] );                                     /* sing_exp */
+            r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e );                                      /* exp(r_e) */
+            singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* sing_exp */
 #endif
         move32();
 
@@ -1424,7 +1504,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
                 norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( sing_exp[jCh], *singularVectors_e ), &norm_x_e ); /* exp(norm_x_e) */
 #else
-                norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( singularVectors2_e[jCh][currChannel], singularVectors2_e[jCh][iCh] ), &norm_x_e ); /* exp(norm_x_e) */
+                    norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( singularVectors2_e[jCh][currChannel], singularVectors2_e[jCh][iCh] ), &norm_x_e ); /* exp(norm_x_e) */
 #endif
             }
 
@@ -1432,8 +1512,8 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
             f = BASOP_Util_Divide3232_Scale_cadence( norm_x, maxWithSign_fx( r ), &f_e ); /* f_e + (norm_x_e - r_e) */
             f_e = add( f_e, sub( norm_x_e, r_e ) );
 #else
-            f = Mpy_32_32( norm_x, invVal );                                                                                                                                                                                            /* invVal_e + (norm_x_e - r_e) */
-            f_e = add( invVal_e, sub( norm_x_e, r_e ) );
+                f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
+                f_e = add( invVal_e, sub( norm_x_e, r_e ) );
 #endif
 
             FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
@@ -1441,7 +1521,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
                 singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], *singularVectors_e, Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, sing_exp[jCh] ), &sing_exp2[jCh][iCh] ); /* exp( sing_exp2) */
 #else
-                singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], singularVectors2_e[jCh][iCh], Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, singularVectors2_e[jCh][currChannel] ), &singularVectors2_e[jCh][iCh] );
+                    singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], singularVectors2_e[jCh][iCh], Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, singularVectors2_e[jCh][currChannel] ), &singularVectors2_e[jCh][iCh] );
 #endif
                 move32();
             }
@@ -1455,7 +1535,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
             sing_exp2[jCh][currChannel] = add( sing_exp[jCh], *sig_x_e );
 #else
-            singularVectors2_e[jCh][currChannel] = add( singularVectors2_e[jCh][currChannel], *sig_x_e );
+                singularVectors2_e[jCh][currChannel] = add( singularVectors2_e[jCh][currChannel], *sig_x_e );
 #endif
             move16();
         }
@@ -1509,7 +1589,11 @@ static void biDiagonalReductionRight_fx(
 #else
     Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
 #endif
+#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
     Word16 *secDiag_e,
+#else
+    Word16 *secDiag_exp,
+#endif
     const Word16 nChannelsL,  /* Q0 */
     const Word16 nChannelsC,  /* Q0 */
     const Word16 currChannel, /* Q0 */
@@ -1521,7 +1605,9 @@ static void biDiagonalReductionRight_fx(
     Word16 iCh, jCh, idx;
     Word32 norm_x, r;
     Word16 norm_x_e, r_e;
+#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
     Word16 secDiag_exp[MAX_OUTPUT_CHANNELS];
+#endif
     Word32 L_temp;
     Word16 L_temp_e;
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
@@ -1532,7 +1618,9 @@ static void biDiagonalReductionRight_fx(
         set16_fx( sing_exp2[jCh], *singularVectors_e, MAX_OUTPUT_CHANNELS );
     }
 #endif
+#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
     set16_fx( secDiag_exp, *secDiag_e, MAX_OUTPUT_CHANNELS );
+#endif
 
     /* Setting values to 0 */
     ( *sig_x ) = 0;
@@ -1592,7 +1680,7 @@ static void biDiagonalReductionRight_fx(
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
                 sing_exp[jCh] = add( sub( invVal_e, temp_e ), sub( *singularVectors_e, *sig_x_e ) );
                 move16();
-                norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e );           /* exp(norm_x_e) */
+                norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
 #else
                 singularVectors2_e[currChannel][jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], *sig_x_e ) );
                 move16();
@@ -1686,7 +1774,7 @@ static void biDiagonalReductionRight_fx(
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
                     singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], *singularVectors_e, Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &sing_exp2[iCh][jCh] ); /* exp(sing_exp2) */
 #else
-                    singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] );   /* exp(sing_exp2) */
+                    singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */
 #endif
                     move32();
                 }
@@ -1704,7 +1792,7 @@ static void biDiagonalReductionRight_fx(
                 move16();
             }
 
-
+#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
             /*rescaling block*/
             Word16 exp_max = *secDiag_e;
             move16();
@@ -1719,7 +1807,7 @@ static void biDiagonalReductionRight_fx(
             }
             *secDiag_e = exp_max;
             move16();
-
+#endif
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
             exp_max = *singularVectors_e;
             move16();
@@ -1833,7 +1921,7 @@ static void singularVectorsAccumulationLeft_fx(
                 t_jj_e = add( add( temp_exp, temp_e ), sub( add( t_ii_e, norm_y_e ), singularVectors_Left_e[nCh][nCh] ) );
 #endif
 #else
-                t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp );                                                                                 // t_ii_e+norm_y_e-*singularVectors_e,
+                t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e,
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
                 t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) );
 #else
@@ -1886,7 +1974,7 @@ static void singularVectorsAccumulationLeft_fx(
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
             singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], sing_exp2[nCh][iCh] ); /* Q31 */
 #else
-            singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], singularVectors_Left_e[nCh][iCh] );                                                              /* Q31 */
+            singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], singularVectors_Left_e[nCh][iCh] ); /* Q31 */
 #endif
             move32();
         }
@@ -1910,7 +1998,11 @@ static void singularVectorsAccumulationRight_fx(
 #else
     Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
 #endif
+#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
     Word16 secDiag_e,
+#else
+    Word16 *secDiag_e,
+#endif
     const Word16 nChannelsC /* Q0 */
 )
 {
@@ -1923,7 +2015,7 @@ static void singularVectorsAccumulationRight_fx(
     nChannels = nChannelsC; /* nChannelsC	Q0*/
 
     /* avoid compiler warning */
-    t_ii = secDiag[nChannels - 1]; /* exp(secDiag_e) */
+    t_ii = secDiag[nChannels - 1]; /* exp(secDiag_e[nChannels - 1]) */
     move32();
 
     FOR( nCh = nChannels - 1; nCh >= 0; nCh-- ) /* nChannelsC, min(nChannelsLmnChannelsC) otherwise */
@@ -1940,14 +2032,18 @@ static void singularVectorsAccumulationRight_fx(
                     ratio_float = L_deposit_h( BASOP_Util_Divide3232_Scale( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ) ); /* exp(temp_exp1) */
                     singularVectors_Right[iCh][nCh] = L_deposit_h( BASOP_Util_Divide3232_Scale( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] ) );               /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */
 #else
-                    ratio_float = BASOP_Util_Divide3232_Scale_cadence( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 );                                                         /* exp(temp_exp1) */
-                    singularVectors_Right[iCh][nCh] = BASOP_Util_Divide3232_Scale_cadence( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] );                                                                       /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */
+                    ratio_float = BASOP_Util_Divide3232_Scale_cadence( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ); /* exp(temp_exp1) */
+                    singularVectors_Right[iCh][nCh] = BASOP_Util_Divide3232_Scale_cadence( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] );               /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */
 #endif
 #ifdef FIX_1010_OPT_SINGLE_RESCALE
                     temp_exp1 = add( temp_exp1, sub( singularVectors_Left_e[nCh][iCh], singularVectors_Left_e[nCh][nCh + 1] ) );
 #endif
                     move32();
+#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
                     sing_right_exp[iCh][nCh] = add( sing_right_exp[iCh][nCh], sub( temp_exp1, secDiag_e ) );
+#else
+                    sing_right_exp[iCh][nCh] = add( sing_right_exp[iCh][nCh], sub( temp_exp1, secDiag_e[nCh + 1] ) );
+#endif
                     move16();
                     // singularVectors_Right[iCh][nCh] = L_shl_sat( singularVectors_Right[iCh][nCh], temp_exp2 );
                 }
@@ -1989,7 +2085,7 @@ static void singularVectorsAccumulationRight_fx(
         }
         singularVectors_Right[nCh][nCh] = MAX_32;
         move32();
-        t_ii = secDiag[nCh]; /* exp(secDiag_e) */
+        t_ii = secDiag[nCh]; /* exp(secDiag_e[nCh]) */
         move32();
     }
     return;
-- 
GitLab


From daead0fd5cc02e32a2bec9f6e8430e3226cac625 Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Wed, 15 Jan 2025 12:33:42 +0100
Subject: [PATCH 36/41] clang-format

---
 lib_dec/ivas_svd_dec.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index 647b203af..1b113e036 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -698,7 +698,7 @@ static Word16 BidagonalDiagonalisation_fx(
 #ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
     Word16 *secDiag_fx_e, /* i/o:														  */
 #else
-    Word16 *secDiag_new_e, /* i/o:														  */
+    Word16 *secDiag_new_e,                                                                                                                                                                                           /* i/o:														  */
 #endif
     const Word16 nChannelsL, /* i  : number of rows in the matrix to be decomposed		Q0*/
     const Word16 nChannelsC, /* i  : number of columns in the matrix to be decomposed	Q0*/
@@ -804,8 +804,8 @@ static Word16 BidagonalDiagonalisation_fx(
                     c = Mpy_32_32( c, temp );
                     c_e = add( c_e, temp_exp );
 #else
-                    singularValues_fx[kCh] = GivensRotation_fx( g, g_e, singularValues_fx[kCh], singularValues_new_e[kCh], &singularValues_new_e[kCh] ); /* exp(singularValues_new_e) */
-                    c = BASOP_Util_Divide3232_Scale_cadence( c, maxWithSign_fx( singularValues_fx[kCh] ), &temp_exp );                                   /* exp(temp_exp + (c_e - singularValues_new_e)) */
+                    singularValues_fx[kCh] = GivensRotation_fx( g, g_e, singularValues_fx[kCh], singularValues_new_e[kCh], &singularValues_new_e[kCh] );                                                             /* exp(singularValues_new_e) */
+                    c = BASOP_Util_Divide3232_Scale_cadence( c, maxWithSign_fx( singularValues_fx[kCh] ), &temp_exp );                                                                                               /* exp(temp_exp + (c_e - singularValues_new_e)) */
                     c_e = add( temp_exp, sub( c_e, singularValues_new_e[kCh] ) );
 #endif
 #ifndef FIX_1010_OPT_NORM_NOSAT
@@ -1445,7 +1445,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
                 sing_exp[jCh] = sub( add( invVal_e, sub( *singularVectors_e, *sig_x_e ) ), temp_e );
                 move16();
-                norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
+                norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e );                                               /* exp(norm_x_e) */
 #else
                 singularVectors2_e[jCh][currChannel] = sub( add( invVal_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ), temp_e );
                 move16();
@@ -1480,8 +1480,8 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
         r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), sing_exp[currChannel], -norm_x, norm_x_e, &r_e );                       /* exp(r_e) */
         singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], sing_exp[currChannel], -( *g ), 0, &sing_exp[currChannel] ); /* sing_exp */
 #else
-            r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e );                                      /* exp(r_e) */
-            singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* sing_exp */
+            r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e );                                                                          /* exp(r_e) */
+            singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] );                                     /* sing_exp */
 #endif
         move32();
 
@@ -1512,7 +1512,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
             f = BASOP_Util_Divide3232_Scale_cadence( norm_x, maxWithSign_fx( r ), &f_e ); /* f_e + (norm_x_e - r_e) */
             f_e = add( f_e, sub( norm_x_e, r_e ) );
 #else
-                f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
+                f = Mpy_32_32( norm_x, invVal );                                                                                                                                                                                            /* invVal_e + (norm_x_e - r_e) */
                 f_e = add( invVal_e, sub( norm_x_e, r_e ) );
 #endif
 
@@ -1680,7 +1680,7 @@ static void biDiagonalReductionRight_fx(
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
                 sing_exp[jCh] = add( sub( invVal_e, temp_e ), sub( *singularVectors_e, *sig_x_e ) );
                 move16();
-                norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
+                norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e );           /* exp(norm_x_e) */
 #else
                 singularVectors2_e[currChannel][jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], *sig_x_e ) );
                 move16();
@@ -1774,7 +1774,7 @@ static void biDiagonalReductionRight_fx(
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
                     singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], *singularVectors_e, Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &sing_exp2[iCh][jCh] ); /* exp(sing_exp2) */
 #else
-                    singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */
+                    singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] );   /* exp(sing_exp2) */
 #endif
                     move32();
                 }
@@ -1921,7 +1921,7 @@ static void singularVectorsAccumulationLeft_fx(
                 t_jj_e = add( add( temp_exp, temp_e ), sub( add( t_ii_e, norm_y_e ), singularVectors_Left_e[nCh][nCh] ) );
 #endif
 #else
-                t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e,
+                t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp );                                                                                 // t_ii_e+norm_y_e-*singularVectors_e,
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
                 t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) );
 #else
@@ -1974,7 +1974,7 @@ static void singularVectorsAccumulationLeft_fx(
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
             singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], sing_exp2[nCh][iCh] ); /* Q31 */
 #else
-            singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], singularVectors_Left_e[nCh][iCh] ); /* Q31 */
+            singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], singularVectors_Left_e[nCh][iCh] );                                                              /* Q31 */
 #endif
             move32();
         }
-- 
GitLab


From 21134e489a374a1b042dc970350ec1fa33fe81f5 Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Mon, 27 Jan 2025 08:16:14 +0100
Subject: [PATCH 37/41] Remove unnecessary set16_fx() and add logging code for
 later reference for objective precision asessment.

---
 lib_dec/ivas_svd_dec.c | 100 ++++++++++++++++++++++++++++++++++++++---
 1 file changed, 95 insertions(+), 5 deletions(-)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index 1b113e036..a084a9024 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -297,6 +297,42 @@ void svdMat2mat_fx(
     return;
 }
 
+#ifdef MORE_DEBUG2
+static void matrixFx2Fl2(
+    float r[][MAX_OUTPUT_CHANNELS],
+    const Word32 a[][MAX_OUTPUT_CHANNELS],
+    const Word16 a_e[][MAX_OUTPUT_CHANNELS],
+    const int adim1,
+    const int adim2 )
+{
+    for ( int i1 = 0; i1 < adim1; i1++ )
+    {
+        for ( int i2 = 0; i2 < adim2; i2++ )
+        {
+            r[i1][i2] = (float) a[i1][i2] * powf( 2.f, a_e[i1][i2] - 31 );
+        }
+    }
+}
+
+static void matrixPrint2(
+    const float a[][MAX_OUTPUT_CHANNELS],
+    const int dim1,
+    const int dim2,
+    const char *name )
+{
+    printf( "Matrix %s[%d][%d] = \n", name, dim1, dim2 );
+    for ( int i1 = 0; i1 < dim1; i1++ )
+    {
+        printf( " { " );
+        for ( int i2 = 0; i2 < dim2; i2++ )
+        {
+            printf( "%.10e, ", a[i1][i2] );
+        }
+        printf( " },\n" );
+    }
+}
+#endif
+
 #ifndef DEBUG_SVD_TEST
 // #define DEBUG_SVD_PRECISION
 #endif
@@ -417,7 +453,7 @@ static void matrixPrint(
     {
         for ( int i2 = 0; i2 < dim2; i2++ )
         {
-            printf( "%f, ", a[i1][i2] );
+            printf( "%.10e, ", a[i1][i2] );
         }
         printf( "\n" );
     }
@@ -479,7 +515,7 @@ static void svd_accuracy_test_fx(
         singularValuesFx2_e[x] = InputMatrixFx_e;
     matrixFx2Fl( InputMatrix, InputMatrixFx, singularValuesFx2_e, nChannelsL, nChannelsC );
     dimSingular = min( nChannelsL, nChannelsC );
-    matrixFx2Fl( singularValues, singularValuesFx, singularValuesFx_e, 1, nChannelsC );
+    matrixFx2Fl( &singularValues, (Word32(*)[MAX_MATRIX])singularValuesFx, singularValuesFx_e, 1, nChannelsC );
     for ( int x = 0; x < MAX_MATRIX; x++ )
         singularValuesFx2_e[x] = 0;
     matrixFx2Fl( singularVectors_Left, singularVectors_LeftFx, singularValuesFx2_e, nChannelsL, nChannelsC );
@@ -579,9 +615,25 @@ Word16 svd_fx(
     Word16 temp_fx_e;
     push_wmops( "svd_fx" );
 
-#if 1
+#ifdef MORE_DEBUG2
+{
+    float input[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
+    Word16 exp_matrix[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
+
+    for (int ii=0; ii<MAX_OUTPUT_CHANNELS; ii++)
+        for (int iii=0; iii<MAX_OUTPUT_CHANNELS; iii++)
+            exp_matrix[ii][iii] = InputMatrix_e;
+
+    matrixFx2Fl2(input, InputMatrix, exp_matrix, nChannelsL, nChannelsC);
+    matrixPrint2(input, nChannelsL, nChannelsC, "  input  ");
+}
+#endif
+
+#ifndef FIX_1010_OPT_SINGLE_RESCALE
     set32_fx( secDiag_fx, 0, MAX_OUTPUT_CHANNELS );
     set16_fx( secDiag_fx_e, 0, MAX_OUTPUT_CHANNELS );
+
+    set16_fx( singularValues_fx_e, 0, MAX_OUTPUT_CHANNELS );
 #endif
 
     /* Collecting Values */
@@ -594,8 +646,6 @@ Word16 svd_fx(
         }
     }
 
-    set16_fx( singularValues_fx_e, 0, MAX_OUTPUT_CHANNELS );
-
     /* Householder reduction */
 #ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
     HouseholderReduction_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, InputMatrix_e, singularValues_fx_e, &secDiag_fx_e, nChannelsL, nChannelsC, &eps_x_fx, &eps_x_fx_e );
@@ -1273,6 +1323,18 @@ static void HouseholderReduction_fx(
         }
     }
 
+#ifdef MORE_DEBUG2
+{
+    float singularVectors_Left[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
+    float secDiag[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
+
+    matrixFx2Fl2(singularVectors_Left, singularVectors_Left_fx, singularVectors_Left_fx_e, nChannelsL, nChannelsC);
+    matrixFx2Fl2(secDiag, (Word32(*)[MAX_OUTPUT_CHANNELS])secDiag_fx, (Word16(*)[MAX_OUTPUT_CHANNELS])secDiag_fx_e, 1, nChannelsC);
+    matrixPrint2(singularVectors_Left, nChannelsL, nChannelsC, "left");
+    matrixPrint2(secDiag, 1, nChannelsC, "secDiag");
+}
+#endif
+
     /* SingularVecotr Accumulation */
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
     singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_e, *secDiag_fx_e, nChannelsC );
@@ -1283,9 +1345,37 @@ static void HouseholderReduction_fx(
 #else
     singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsC );
 #endif
+
+#ifdef MORE_DEBUG2
+{
+    float singularVectors_Right[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
+    Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
+
+    for (int ii=0; ii<MAX_OUTPUT_CHANNELS; ii++)
+        for (int iii=0; iii<MAX_OUTPUT_CHANNELS; iii++)
+            singularVectors_Left_fx_e[ii][iii] = 0;
+
+    matrixFx2Fl2(singularVectors_Right, singularVectors_Right_fx, singularVectors_Left_fx_e, nChannelsC, nChannelsC);
+    matrixPrint2(singularVectors_Right, nChannelsC, nChannelsC, "right2");
+}
+#endif
+
     singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_fx_e, singularValues_fx_e, nChannelsL, nChannelsC );
 #endif
 
+#ifdef MORE_DEBUG2
+{
+    float singularVectors_Left[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
+    Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
+
+    for (int ii=0; ii<MAX_OUTPUT_CHANNELS; ii++)
+        for (int iii=0; iii<MAX_OUTPUT_CHANNELS; iii++)
+            singularVectors_Left_fx_e[ii][iii] = 0;
+
+    matrixFx2Fl2(singularVectors_Left, singularVectors_Left_fx, singularVectors_Left_fx_e, nChannelsL, nChannelsC);
+    matrixPrint2(singularVectors_Left, nChannelsL, nChannelsC, "left2");
+}
+#endif
     return;
 }
 
-- 
GitLab


From bb0333d82eea2d4ce60b1f4e6ddc56998ae4a564 Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Mon, 27 Jan 2025 08:38:03 +0100
Subject: [PATCH 38/41] format fix

---
 lib_dec/ivas_svd_dec.c | 72 +++++++++++++++++++++---------------------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index a084a9024..aa9238639 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -515,7 +515,7 @@ static void svd_accuracy_test_fx(
         singularValuesFx2_e[x] = InputMatrixFx_e;
     matrixFx2Fl( InputMatrix, InputMatrixFx, singularValuesFx2_e, nChannelsL, nChannelsC );
     dimSingular = min( nChannelsL, nChannelsC );
-    matrixFx2Fl( &singularValues, (Word32(*)[MAX_MATRIX])singularValuesFx, singularValuesFx_e, 1, nChannelsC );
+    matrixFx2Fl( &singularValues, (Word32( * )[MAX_MATRIX]) singularValuesFx, singularValuesFx_e, 1, nChannelsC );
     for ( int x = 0; x < MAX_MATRIX; x++ )
         singularValuesFx2_e[x] = 0;
     matrixFx2Fl( singularVectors_Left, singularVectors_LeftFx, singularValuesFx2_e, nChannelsL, nChannelsC );
@@ -616,17 +616,17 @@ Word16 svd_fx(
     push_wmops( "svd_fx" );
 
 #ifdef MORE_DEBUG2
-{
-    float input[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
-    Word16 exp_matrix[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
+    {
+        float input[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
+        Word16 exp_matrix[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
 
-    for (int ii=0; ii<MAX_OUTPUT_CHANNELS; ii++)
-        for (int iii=0; iii<MAX_OUTPUT_CHANNELS; iii++)
-            exp_matrix[ii][iii] = InputMatrix_e;
+        for ( int ii = 0; ii < MAX_OUTPUT_CHANNELS; ii++ )
+            for ( int iii = 0; iii < MAX_OUTPUT_CHANNELS; iii++ )
+                exp_matrix[ii][iii] = InputMatrix_e;
 
-    matrixFx2Fl2(input, InputMatrix, exp_matrix, nChannelsL, nChannelsC);
-    matrixPrint2(input, nChannelsL, nChannelsC, "  input  ");
-}
+        matrixFx2Fl2( input, InputMatrix, exp_matrix, nChannelsL, nChannelsC );
+        matrixPrint2( input, nChannelsL, nChannelsC, "  input  " );
+    }
 #endif
 
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
@@ -1324,15 +1324,15 @@ static void HouseholderReduction_fx(
     }
 
 #ifdef MORE_DEBUG2
-{
-    float singularVectors_Left[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
-    float secDiag[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
+    {
+        float singularVectors_Left[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
+        float secDiag[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
 
-    matrixFx2Fl2(singularVectors_Left, singularVectors_Left_fx, singularVectors_Left_fx_e, nChannelsL, nChannelsC);
-    matrixFx2Fl2(secDiag, (Word32(*)[MAX_OUTPUT_CHANNELS])secDiag_fx, (Word16(*)[MAX_OUTPUT_CHANNELS])secDiag_fx_e, 1, nChannelsC);
-    matrixPrint2(singularVectors_Left, nChannelsL, nChannelsC, "left");
-    matrixPrint2(secDiag, 1, nChannelsC, "secDiag");
-}
+        matrixFx2Fl2( singularVectors_Left, singularVectors_Left_fx, singularVectors_Left_fx_e, nChannelsL, nChannelsC );
+        matrixFx2Fl2( secDiag, (Word32( * )[MAX_OUTPUT_CHANNELS]) secDiag_fx, (Word16( * )[MAX_OUTPUT_CHANNELS]) secDiag_fx_e, 1, nChannelsC );
+        matrixPrint2( singularVectors_Left, nChannelsL, nChannelsC, "left" );
+        matrixPrint2( secDiag, 1, nChannelsC, "secDiag" );
+    }
 #endif
 
     /* SingularVecotr Accumulation */
@@ -1347,34 +1347,34 @@ static void HouseholderReduction_fx(
 #endif
 
 #ifdef MORE_DEBUG2
-{
-    float singularVectors_Right[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
-    Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
+    {
+        float singularVectors_Right[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
+        Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
 
-    for (int ii=0; ii<MAX_OUTPUT_CHANNELS; ii++)
-        for (int iii=0; iii<MAX_OUTPUT_CHANNELS; iii++)
-            singularVectors_Left_fx_e[ii][iii] = 0;
+        for ( int ii = 0; ii < MAX_OUTPUT_CHANNELS; ii++ )
+            for ( int iii = 0; iii < MAX_OUTPUT_CHANNELS; iii++ )
+                singularVectors_Left_fx_e[ii][iii] = 0;
 
-    matrixFx2Fl2(singularVectors_Right, singularVectors_Right_fx, singularVectors_Left_fx_e, nChannelsC, nChannelsC);
-    matrixPrint2(singularVectors_Right, nChannelsC, nChannelsC, "right2");
-}
+        matrixFx2Fl2( singularVectors_Right, singularVectors_Right_fx, singularVectors_Left_fx_e, nChannelsC, nChannelsC );
+        matrixPrint2( singularVectors_Right, nChannelsC, nChannelsC, "right2" );
+    }
 #endif
 
     singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_fx_e, singularValues_fx_e, nChannelsL, nChannelsC );
 #endif
 
 #ifdef MORE_DEBUG2
-{
-    float singularVectors_Left[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
-    Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
+    {
+        float singularVectors_Left[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
+        Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
 
-    for (int ii=0; ii<MAX_OUTPUT_CHANNELS; ii++)
-        for (int iii=0; iii<MAX_OUTPUT_CHANNELS; iii++)
-            singularVectors_Left_fx_e[ii][iii] = 0;
+        for ( int ii = 0; ii < MAX_OUTPUT_CHANNELS; ii++ )
+            for ( int iii = 0; iii < MAX_OUTPUT_CHANNELS; iii++ )
+                singularVectors_Left_fx_e[ii][iii] = 0;
 
-    matrixFx2Fl2(singularVectors_Left, singularVectors_Left_fx, singularVectors_Left_fx_e, nChannelsL, nChannelsC);
-    matrixPrint2(singularVectors_Left, nChannelsL, nChannelsC, "left2");
-}
+        matrixFx2Fl2( singularVectors_Left, singularVectors_Left_fx, singularVectors_Left_fx_e, nChannelsL, nChannelsC );
+        matrixPrint2( singularVectors_Left, nChannelsL, nChannelsC, "left2" );
+    }
 #endif
     return;
 }
-- 
GitLab


From 9c300c2b4b43c57988f9b00f7de6ba2a8d79c298 Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Mon, 27 Jan 2025 11:53:59 +0100
Subject: [PATCH 39/41] Remove FIX_1010_OPT_INV_USING_INVSQRT and
 FIX_1010_OPT_GIVENS_AMAX_BMIN. Remove all debug/measurement code. Preparation
 for merge to main.

---
 lib_dec/ivas_svd_dec.c | 519 +----------------------------------------
 1 file changed, 4 insertions(+), 515 deletions(-)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index aa9238639..d3377ebba 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -53,11 +53,9 @@
 
 #if 1
 #define FIX_1010_OPT_DIV
-// #define FIX_1010_OPT_INV_USING_INVSQRT
 #define FIX_1010_OPT_SINGLE_RESCALE
 #define FIX_1010_OPT_GIVENS
 #define FIX_1010_OPT_GIVENS_INV
-// #define FIX_1010_OPT_GIVENS_AMAX_BMIN
 #define FIX_1010_OPT_NORM_NOSAT
 #define FIX_1010_OPT_SEC_SINGLE_RESCALE
 #endif
@@ -297,288 +295,6 @@ void svdMat2mat_fx(
     return;
 }
 
-#ifdef MORE_DEBUG2
-static void matrixFx2Fl2(
-    float r[][MAX_OUTPUT_CHANNELS],
-    const Word32 a[][MAX_OUTPUT_CHANNELS],
-    const Word16 a_e[][MAX_OUTPUT_CHANNELS],
-    const int adim1,
-    const int adim2 )
-{
-    for ( int i1 = 0; i1 < adim1; i1++ )
-    {
-        for ( int i2 = 0; i2 < adim2; i2++ )
-        {
-            r[i1][i2] = (float) a[i1][i2] * powf( 2.f, a_e[i1][i2] - 31 );
-        }
-    }
-}
-
-static void matrixPrint2(
-    const float a[][MAX_OUTPUT_CHANNELS],
-    const int dim1,
-    const int dim2,
-    const char *name )
-{
-    printf( "Matrix %s[%d][%d] = \n", name, dim1, dim2 );
-    for ( int i1 = 0; i1 < dim1; i1++ )
-    {
-        printf( " { " );
-        for ( int i2 = 0; i2 < dim2; i2++ )
-        {
-            printf( "%.10e, ", a[i1][i2] );
-        }
-        printf( " },\n" );
-    }
-}
-#endif
-
-#ifndef DEBUG_SVD_TEST
-// #define DEBUG_SVD_PRECISION
-#endif
-// #define MORE_DEBUG
-
-#if defined( DEBUG_SVD_PRECISION ) || defined( MORE_DEBUG )
-
-#if ( MAX_INPUT_CHANNELS > MAX_OUTPUT_CHANNELS )
-#define MAX_MATRIX MAX_INPUT_CHANNELS
-#else
-#define MAX_MATRIX MAX_OUTPUT_CHANNELS
-#endif
-
-static void matrixFx2Fl(
-    float r[][MAX_MATRIX],
-    const Word32 a[][MAX_MATRIX],
-    const Word16 a_e[MAX_MATRIX],
-    const int adim1,
-    const int adim2 )
-{
-    for ( int i1 = 0; i1 < adim1; i1++ )
-    {
-        for ( int i2 = 0; i2 < adim2; i2++ )
-        {
-            r[i1][i2] = (float) a[i1][i2] * powf( 2.f, a_e[i2] - 31 );
-        }
-    }
-}
-
-static void matrixProduct(
-    float r[][MAX_MATRIX],
-    const float a[][MAX_MATRIX],
-    const float b[][MAX_MATRIX],
-    const int adim1,
-    const int adim2,
-    const int bdim1,
-    const int bdim2 )
-{
-    assert( adim2 == bdim1 );
-
-    for ( int i1 = 0; i1 < adim1; i1++ )
-    {
-        for ( int i2 = 0; i2 < bdim2; i2++ )
-        {
-            r[i1][i2] = 0.f;
-            for ( int i3 = 0; i3 < bdim1; i3++ )
-            {
-                r[i1][i2] += a[i1][i3] * b[i3][i2];
-            }
-        }
-    }
-}
-
-static void matrixTranspose(
-    float r[][MAX_MATRIX],
-    const float a[][MAX_MATRIX],
-    const int adim1,
-    const int adim2 )
-{
-    for ( int i1 = 0; i1 < adim1; i1++ )
-    {
-        for ( int i2 = 0; i2 < adim2; i2++ )
-        {
-            r[i2][i1] = a[i1][i2];
-        }
-    }
-}
-
-static void matrixDiagonal(
-    float r[][MAX_MATRIX],
-    const float a[MAX_MATRIX],
-    const int dim )
-{
-    for ( int i1 = 0; i1 < dim; i1++ )
-    {
-        for ( int i2 = 0; i2 < dim; i2++ )
-        {
-            r[i1][i2] = 0;
-        }
-        r[i1][i1] = a[i1];
-    }
-}
-
-static float matrixDifference(
-    const float a[][MAX_MATRIX],
-    const float b[][MAX_MATRIX],
-    const int dim1,
-    const int dim2 )
-{
-    float r = 0.f;
-
-    for ( int i1 = 0; i1 < dim1; i1++ )
-    {
-        for ( int i2 = 0; i2 < dim2; i2++ )
-        {
-            if ( a[i1][i2] != 0.f )
-            {
-                r += fabsf( ( b[i1][i2] - a[i1][i2] ) / a[i1][i2] );
-            }
-            else
-            {
-                r += fabsf( b[i1][i2] - a[i1][i2] );
-            }
-        }
-    }
-
-    return r / (float) ( dim1 * dim2 );
-}
-
-static void matrixPrint(
-    const float a[][MAX_MATRIX],
-    const int dim1,
-    const int dim2,
-    const char *name )
-{
-    printf( "Matrix %s[%d][%d] = \n", name, dim1, dim2 );
-    for ( int i1 = 0; i1 < dim1; i1++ )
-    {
-        for ( int i2 = 0; i2 < dim2; i2++ )
-        {
-            printf( "%.10e, ", a[i1][i2] );
-        }
-        printf( "\n" );
-    }
-}
-
-static float matrixTestIdentity(
-    const float a[][MAX_MATRIX],
-    const int dim )
-{
-    float r = 0.f;
-
-    for ( int i1 = 0; i1 < dim; i1++ )
-    {
-        for ( int i2 = 0; i2 < dim; i2++ )
-        {
-            if ( i1 == i2 )
-            {
-                r += fabsf( 1.f - a[i1][i2] );
-            }
-            else
-            {
-                r += fabsf( 0.f - a[i1][i2] );
-            }
-        }
-    }
-
-    return r;
-}
-
-#define PROBLEMATIC_THRESHOLD 0.5f
-static void svd_accuracy_test_fx(
-    Word32 InputMatrixFx[][MAX_OUTPUT_CHANNELS], /* i  : matrix to be decomposed (M)            InputMatrix_e*/
-    Word16 InputMatrixFx_e,
-    Word32 singularVectors_LeftFx[][MAX_OUTPUT_CHANNELS],  /* o  : left singular vectors (U)			Q31 */
-    Word32 singularValuesFx[MAX_OUTPUT_CHANNELS],          /* o  : singular values vector (S)         singularValues_fx_e*/
-    Word32 singularVectors_RightFx[][MAX_OUTPUT_CHANNELS], /* o  : right singular vectors (V)			Q31 */
-    Word16 singularValuesFx_e[MAX_OUTPUT_CHANNELS],
-    const Word16 nChannelsL, /* i  : number of rows in the matrix to be decomposed		Q0*/
-    const Word16 nChannelsC  /* i  : number of columns in the matrix to be decomposed	Q0*/
-)
-{
-    float tmp1[MAX_MATRIX][MAX_MATRIX];
-    float tmp2[MAX_MATRIX][MAX_MATRIX];
-    float tmp3[MAX_MATRIX][MAX_MATRIX];
-    float InputMatrix[MAX_MATRIX][MAX_MATRIX];
-
-    Word16 singularValuesFx2_e[MAX_OUTPUT_CHANNELS];
-
-    float singularVectors_Left[MAX_INPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
-    float singularValues[MAX_MATRIX];
-    float singularValuesMatrix[MAX_MATRIX][MAX_MATRIX];
-    float singularVectors_Right[MAX_INPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
-    float result;
-    int dimSingular;
-    int problematic = 0;
-
-    /* Convert to float and Create singular values matrix from signular values vector */
-    for ( int x = 0; x < MAX_MATRIX; x++ )
-        singularValuesFx2_e[x] = InputMatrixFx_e;
-    matrixFx2Fl( InputMatrix, InputMatrixFx, singularValuesFx2_e, nChannelsL, nChannelsC );
-    dimSingular = min( nChannelsL, nChannelsC );
-    matrixFx2Fl( &singularValues, (Word32( * )[MAX_MATRIX]) singularValuesFx, singularValuesFx_e, 1, nChannelsC );
-    for ( int x = 0; x < MAX_MATRIX; x++ )
-        singularValuesFx2_e[x] = 0;
-    matrixFx2Fl( singularVectors_Left, singularVectors_LeftFx, singularValuesFx2_e, nChannelsL, nChannelsC );
-    matrixFx2Fl( singularVectors_Right, singularVectors_RightFx, singularValuesFx2_e, nChannelsC, nChannelsC );
-    matrixDiagonal( singularValuesMatrix, singularValues, dimSingular ); /* CxC */
-
-#ifdef MORE_DEBUG
-    matrixPrint( InputMatrix, nChannelsL, nChannelsC, "A" );
-    printf( "Result of svd() \n" );
-    matrixPrint( singularVectors_Left, nChannelsL, nChannelsC, "U" );
-    matrixPrint( singularValuesMatrix, nChannelsC, nChannelsC, "S" );
-    matrixPrint( singularVectors_Right, nChannelsC, nChannelsC, "V" );
-#endif
-
-    printf( "\nResult quality tests\n\n" );
-
-    /* Test U' * U == I */
-    matrixTranspose( tmp1, singularVectors_Left, nChannelsL, nChannelsC );                             /* CxL */
-    matrixProduct( tmp2, tmp1, singularVectors_Left, nChannelsC, nChannelsL, nChannelsL, nChannelsC ); /* CxC */
-    result = matrixTestIdentity( tmp2, nChannelsC );
-    if ( result >= PROBLEMATIC_THRESHOLD )
-    {
-        problematic = 1;
-    }
-#ifdef MORE_DEBUG
-    matrixPrint( tmp2, nChannelsC, nChannelsC, "U\'*U" );
-#endif
-    printf( "U' * U difference to I is %f\n", result );
-
-    /* Test V * V' == I */
-    matrixTranspose( tmp1, singularVectors_Right, nChannelsC, nChannelsC );                             /* CxC */
-    matrixProduct( tmp2, singularVectors_Right, tmp1, nChannelsC, nChannelsC, nChannelsC, nChannelsC ); /* CxC */
-    result = matrixTestIdentity( tmp2, nChannelsC );
-    if ( result >= PROBLEMATIC_THRESHOLD )
-    {
-        problematic = 1;
-    }
-#ifdef MORE_DEBUG
-    matrixPrint( tmp2, nChannelsC, nChannelsC, "V*V\'" );
-#endif
-    printf( "V * V' difference to I is %f\n", result );
-
-    /* Test InputMatrix == U * S * V' */
-    matrixProduct( tmp1, singularVectors_Left, singularValuesMatrix, nChannelsL, nChannelsC, dimSingular, dimSingular ); /* LxC */
-    matrixTranspose( tmp3, singularVectors_Right, nChannelsC, nChannelsC );                                              /* CxC */
-    matrixProduct( tmp2, tmp1, tmp3, nChannelsL, dimSingular, nChannelsC, nChannelsC );                                  /* LxC */
-    result = matrixDifference( tmp2, InputMatrix, nChannelsL, nChannelsC );
-    if ( result >= PROBLEMATIC_THRESHOLD )
-    {
-        problematic = 1;
-    }
-#ifdef MORE_DEBUG
-    matrixPrint( tmp2, nChannelsL, nChannelsC, "U*S*V\'" );
-#endif
-    printf( "U * S * V' difference to M is %f\n", result );
-
-    if ( problematic )
-    {
-        matrixPrint( InputMatrix, nChannelsL, nChannelsC, "Problematic Input" );
-    }
-}
-#endif
-
 /*-------------------------------------------------------------------------
  * svd()
  *
@@ -615,24 +331,9 @@ Word16 svd_fx(
     Word16 temp_fx_e;
     push_wmops( "svd_fx" );
 
-#ifdef MORE_DEBUG2
-    {
-        float input[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
-        Word16 exp_matrix[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
-
-        for ( int ii = 0; ii < MAX_OUTPUT_CHANNELS; ii++ )
-            for ( int iii = 0; iii < MAX_OUTPUT_CHANNELS; iii++ )
-                exp_matrix[ii][iii] = InputMatrix_e;
-
-        matrixFx2Fl2( input, InputMatrix, exp_matrix, nChannelsL, nChannelsC );
-        matrixPrint2( input, nChannelsL, nChannelsC, "  input  " );
-    }
-#endif
 
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
     set32_fx( secDiag_fx, 0, MAX_OUTPUT_CHANNELS );
-    set16_fx( secDiag_fx_e, 0, MAX_OUTPUT_CHANNELS );
-
     set16_fx( singularValues_fx_e, 0, MAX_OUTPUT_CHANNELS );
 #endif
 
@@ -714,17 +415,6 @@ Word16 svd_fx(
     WHILE( EQ_16( condition, 1 ) );
 
     pop_wmops();
-#ifdef DEBUG_SVD_PRECISION
-    svd_accuracy_test_fx(
-        InputMatrix,
-        InputMatrix_e,
-        singularVectors_Left_fx,
-        singularValues_fx,
-        singularVectors_Right_fx,
-        singularValues_fx_e,
-        nChannelsL,
-        nChannelsC );
-#endif
     return ( errorMessage );
 }
 
@@ -1323,18 +1013,6 @@ static void HouseholderReduction_fx(
         }
     }
 
-#ifdef MORE_DEBUG2
-    {
-        float singularVectors_Left[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
-        float secDiag[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
-
-        matrixFx2Fl2( singularVectors_Left, singularVectors_Left_fx, singularVectors_Left_fx_e, nChannelsL, nChannelsC );
-        matrixFx2Fl2( secDiag, (Word32( * )[MAX_OUTPUT_CHANNELS]) secDiag_fx, (Word16( * )[MAX_OUTPUT_CHANNELS]) secDiag_fx_e, 1, nChannelsC );
-        matrixPrint2( singularVectors_Left, nChannelsL, nChannelsC, "left" );
-        matrixPrint2( secDiag, 1, nChannelsC, "secDiag" );
-    }
-#endif
-
     /* SingularVecotr Accumulation */
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
     singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_e, *secDiag_fx_e, nChannelsC );
@@ -1346,36 +1024,10 @@ static void HouseholderReduction_fx(
     singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsC );
 #endif
 
-#ifdef MORE_DEBUG2
-    {
-        float singularVectors_Right[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
-        Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
-
-        for ( int ii = 0; ii < MAX_OUTPUT_CHANNELS; ii++ )
-            for ( int iii = 0; iii < MAX_OUTPUT_CHANNELS; iii++ )
-                singularVectors_Left_fx_e[ii][iii] = 0;
-
-        matrixFx2Fl2( singularVectors_Right, singularVectors_Right_fx, singularVectors_Left_fx_e, nChannelsC, nChannelsC );
-        matrixPrint2( singularVectors_Right, nChannelsC, nChannelsC, "right2" );
-    }
-#endif
 
     singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_fx_e, singularValues_fx_e, nChannelsL, nChannelsC );
 #endif
 
-#ifdef MORE_DEBUG2
-    {
-        float singularVectors_Left[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
-        Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
-
-        for ( int ii = 0; ii < MAX_OUTPUT_CHANNELS; ii++ )
-            for ( int iii = 0; iii < MAX_OUTPUT_CHANNELS; iii++ )
-                singularVectors_Left_fx_e[ii][iii] = 0;
-
-        matrixFx2Fl2( singularVectors_Left, singularVectors_Left_fx, singularVectors_Left_fx_e, nChannelsL, nChannelsC );
-        matrixPrint2( singularVectors_Left, nChannelsL, nChannelsC, "left2" );
-    }
-#endif
     return;
 }
 
@@ -1385,41 +1037,6 @@ static void HouseholderReduction_fx(
  *
  *-------------------------------------------------------------------------*/
 
-#ifdef FIX_1010_OPT_INV_USING_INVSQRT
-static Word32 BASOP_Util_Inv32( Word32 x, Word16 *px_e )
-{
-    Word16 sign, shift, shift2;
-
-    sign = 0;
-    move16();
-    if ( x < 0 )
-    {
-        sign = 1;
-    }
-    if ( sign )
-    {
-        x = L_negate( x );
-    }
-
-    shift = norm_l( x );
-    x = L_shl( x, shift );
-    *px_e = 0;
-    move16();
-    x = ISqrt32norm( x, px_e );
-    x = Mpy_32_32( x, x );
-    shift2 = norm_l( x );
-    x = L_shl( x, shift2 );
-    *px_e = add( shl( *px_e, 1 ), sub( shift, shift2 ) );
-    move16();
-
-    if ( sign )
-    {
-        x = L_negate( x );
-    }
-    return x;
-}
-#endif
-
 static void biDiagonalReductionLeft_fx(
     Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
     Word32 singularValues[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
@@ -1477,6 +1094,7 @@ ELSE IF( LT_16( *sig_x_e, *secDiag_e ) )
     secDiag_e[currChannel] = *sig_x_e;
     move16();
 #endif
+
 /* Setting values to 0 */
 ( *sig_x ) = 0;
 move32();
@@ -1502,11 +1120,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
 #ifdef FIX_1010_OPT_DIV
         Word16 invVal_e;
         Word32 invVal;
-#ifdef FIX_1010_OPT_INV_USING_INVSQRT
-        invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e );
-#else
         invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e );
-#endif
 #endif
         norm_x = 0;
         move32();
@@ -1576,11 +1190,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
         move32();
 
 #ifdef FIX_1010_OPT_DIV
-#ifdef FIX_1010_OPT_INV_USING_INVSQRT
-        invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e );
-#else
         invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
-#endif
 #endif
 
         FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
@@ -1673,7 +1283,7 @@ return;
 
 static void biDiagonalReductionRight_fx(
     Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
-    Word32 secDiag[MAX_OUTPUT_CHANNELS],           /* exp(secDiag_e) */
+    Word32 secDiag[MAX_OUTPUT_CHANNELS],           /* exp(secDiag_exp[]) */
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
     Word16 *singularVectors_e,
 #else
@@ -1741,11 +1351,7 @@ static void biDiagonalReductionRight_fx(
 #ifdef FIX_1010_OPT_DIV
             Word16 invVal_e, temp_e;
             Word32 invVal;
-#ifdef FIX_1010_OPT_INV_USING_INVSQRT
-            invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e );
-#else
             invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e );
-#endif
 #endif
             FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */
             {
@@ -1810,11 +1416,7 @@ static void biDiagonalReductionRight_fx(
             move32();
 
 #ifdef FIX_1010_OPT_DIV
-#ifdef FIX_1010_OPT_INV_USING_INVSQRT
-            invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e );
-#else
             invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
-#endif
 #endif
 
             FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
@@ -1898,6 +1500,8 @@ static void biDiagonalReductionRight_fx(
             *secDiag_e = exp_max;
             move16();
 #endif
+
+
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
             exp_max = *singularVectors_e;
             move16();
@@ -1976,11 +1580,7 @@ static void singularVectorsAccumulationLeft_fx(
         IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/
         {
 #ifdef FIX_1010_OPT_DIV
-#ifdef FIX_1010_OPT_INV_USING_INVSQRT
-            t_ii = BASOP_Util_Inv32( maxWithSign_fx( t_ii ), &temp_exp );
-#else
             t_ii = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( t_ii ), &temp_exp );
-#endif
             t_ii_e = sub( temp_exp, t_ii_e );
 #else
             t_ii = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, maxWithSign_fx( t_ii ), &temp_exp ); /* exp(1 + (temp_exp + tii_e)) */
@@ -2001,22 +1601,11 @@ static void singularVectorsAccumulationLeft_fx(
                     norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( singularVectors_Left_e[k][nCh], singularVectors_Left_e[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */
 #endif
                 }
-#ifdef FIX_1010_OPT_INV_USING_INVSQRT
-                Word16 temp_e = norm_l( singularVectors_Left[nCh][nCh] );
-                t_jj = BASOP_Util_Inv32( maxWithSign_fx( L_shl( singularVectors_Left[nCh][nCh], temp_e ) ), &temp_exp );
-                t_jj = Mpy_32_32( Mpy_32_32( t_ii, norm_y ), t_jj );
-#ifndef FIX_1010_OPT_SINGLE_RESCALE
-                t_jj_e = add( add( temp_exp, temp_e ), sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) );
-#else
-                t_jj_e = add( add( temp_exp, temp_e ), sub( add( t_ii_e, norm_y_e ), singularVectors_Left_e[nCh][nCh] ) );
-#endif
-#else
                 t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp );                                                                                 // t_ii_e+norm_y_e-*singularVectors_e,
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
                 t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) );
 #else
                 t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), singularVectors_Left_e[nCh][nCh] ) );
-#endif
 #endif
                 FOR( k = nCh; k < nChannelsL; k++ ) /* nChannelsL */
                 {
@@ -2187,59 +1776,6 @@ static void singularVectorsAccumulationRight_fx(
  *
  *-------------------------------------------------------------------------*/
 
-#ifdef FIX_1010_OPT_GIVENS_AMAX_BMIN
-#ifndef M_PI
-#define M_PI 3.141592653589793
-#endif
-#define NUM_REGIONS 128
-static Word32 alphaBeta[NUM_REGIONS][2];
-static void get_alpha_beta( Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 *alpha, Word32 *beta )
-{
-    static int init = 0;
-
-    if ( init == 0 )
-    {
-        for ( int i = 0; i < NUM_REGIONS; i++ )
-        {
-            double thetaS, thetaE, thetaM;
-
-            thetaS = M_PI / 4. * (double) i / (double) NUM_REGIONS;
-            thetaE = M_PI / 4. * (double) ( i + 1 ) / (double) NUM_REGIONS;
-            thetaM = M_PI / 4. * ( (double) i + 0.5 ) / (double) NUM_REGIONS;
-            // alphaBeta[i][0] = FL2WORD32(1./(sin(thetaM)*tan((thetaS+thetaE)/2.)+cos(thetaM)));
-            // alphaBeta[i][1] = FL2WORD32(1./(sin(thetaM)*tan((thetaS+thetaE)/2.)+cos(thetaM)) * tan((thetaS+thetaE)/2.));
-            alphaBeta[i][0] = FL2WORD32( 2. / ( ( ( sin( thetaM ) + sin( thetaS ) ) * tan( ( thetaS + thetaE ) / 2. ) ) + cos( thetaM ) + cos( thetaS ) ) );
-            alphaBeta[i][1] = FL2WORD32( 2. / ( ( ( sin( thetaM ) + sin( thetaS ) ) * tan( ( thetaS + thetaE ) / 2. ) ) + cos( thetaM ) + cos( thetaS ) ) * tan( ( thetaS + thetaE ) / 2. ) );
-        }
-        init = 1;
-    }
-    Word16 r, shift;
-#if 0
-    float pf, qf;
-    pf = (float)p * powf(2.f, p_e-31);
-    qf = (float)q * powf(2.f, q_e-31);
-    r = floor((double)NUM_REGIONS * 4. * atan2f(qf, pf)/M_PI);
-    if (r >= NUM_REGIONS) {
-        r =  NUM_REGIONS-1;
-    }
-#elif 1
-    shift = sub( norm_l( q ), 1 );
-    q = L_shl( q, shift );
-    q_e = sub( q_e, shift );
-    shift = norm_l( p );
-    p = L_shl( p, shift );
-    p_e = sub( p_e, shift );
-    shift = sub( q_e, p_e );
-    r = shl_sat( div_s( extract_h( q ), s_max( 1, extract_h( p ) ) ), shift );
-    /* Second order polyfit of atan(r)/(pi/4) for r=0..1 */
-    r = add( add( mult( mult( r, r ), FL2WORD16_SCALE( -3.672563685340096e-01, 3 ) ), mult( r, FL2WORD16_SCALE( 1.375369641423651e+00, 3 ) ) ), FL2WORD16_SCALE( -6.529424378422714e-03, 3 ) );
-    r = s_min( s_max( 0, shr( r, WORD16_BITS - 1 - 7 - 3 ) ), NUM_REGIONS - 1 );
-#endif
-    assert( ( r >= 0 ) && ( r < NUM_REGIONS ) );
-    *alpha = alphaBeta[r][0];
-    *beta = alphaBeta[r][1];
-}
-#endif
 
 #ifdef FIX_1010_OPT_GIVENS_INV
 static void GivensRotation2_fx(
@@ -2253,35 +1789,6 @@ static void GivensRotation2_fx(
     Word16 *outInv_e )
 {
     Word32 r;
-#ifdef FIX_1010_OPT_GIVENS_AMAX_BMIN
-    Word32 az, ax, a, b;
-
-    ax = L_abs( x );
-    az = L_abs( z );
-    IF( BASOP_Util_Cmp_Mant32Exp( ax, x_e, az, z_e ) > 0 )
-    {
-        get_alpha_beta( ax, x_e, az, z_e, &a, &b );
-        r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ax, a ), x_e, Mpy_32_32( az, b ), z_e, out_e );
-    }
-    ELSE
-    {
-        get_alpha_beta( az, z_e, ax, x_e, &a, &b );
-        r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( az, a ), z_e, Mpy_32_32( ax, b ), x_e, out_e );
-    }
-    *result = r;
-    move32();
-#if 1
-    *outInv_e = shl( *out_e, 1 );
-    *resultInv = ISqrt32( L_max( 1, Mpy_32_32( r, r ) ), outInv_e );
-    move32();
-#else
-    *resultInv = L_deposit_h( BASOP_Util_Divide3232_Scale( MAX_32, r, outInv_e ) );
-    move32();
-    *outInv_e = sub( *outInv_e, *out_e );
-    move16();
-#endif
-
-#else
     r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e );
     r = L_max( r, 1 );
     *outInv_e = *out_e;
@@ -2291,7 +1798,6 @@ static void GivensRotation2_fx(
 
     *resultInv = ISqrt32( r, outInv_e );
     move32();
-#endif
 }
 #endif
 
@@ -2312,25 +1818,8 @@ static Word32 GivensRotation_fx(
 #endif
 
 #ifdef FIX_1010_OPT_GIVENS
-#ifdef FIX_1010_OPT_GIVENS_AMAX_BMIN
-    Word32 az, ax, a, b;
-
-    ax = L_abs( x );
-    az = L_abs( z );
-    IF( BASOP_Util_Cmp_Mant32Exp( ax, x_e, az, z_e ) > 0 )
-    {
-        get_alpha_beta( ax, x_e, az, z_e, &a, &b );
-        r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ax, a ), x_e, Mpy_32_32( az, b ), z_e, out_e );
-    }
-    ELSE
-    {
-        get_alpha_beta( az, z_e, ax, x_e, &a, &b );
-        r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( az, a ), z_e, Mpy_32_32( ax, b ), x_e, out_e );
-    }
-#else
     r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e );
     r = Sqrt32( r, out_e );
-#endif
 #else
     x_abs = L_abs( x );
     z_abs = L_abs( z );
-- 
GitLab


From 6894dd5e81fc265d1f959b7fd4271ca755e92905 Mon Sep 17 00:00:00 2001
From: Manuel Jander <manuel.jander@iis.fraunhofer.de>
Date: Mon, 27 Jan 2025 11:58:50 +0100
Subject: [PATCH 40/41] clang format

---
 lib_dec/ivas_svd_dec.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index d3377ebba..328a67a46 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -1363,7 +1363,7 @@ static void biDiagonalReductionRight_fx(
                 move16();
                 norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
 #else
-                singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &L_temp_e ); /* exp(sing_exp + (singularVectors_e - sig_x_e))  */
+                singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &L_temp_e );                                               /* exp(sing_exp + (singularVectors_e - sig_x_e))  */
                 move32();
                 singularVectors2_e[currChannel][jCh] = add( L_temp_e, sub( singularVectors2_e[currChannel][jCh], *sig_x_e ) );
                 move16();
@@ -1428,7 +1428,7 @@ static void biDiagonalReductionRight_fx(
                 secDiag_exp[jCh] = add( secDiag_exp[jCh], sub( sing_exp[jCh], r_e ) );
                 move32();
 #else
-                secDiag[jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( r ), &secDiag_exp[jCh] ); /* exp(secDiag_exp + (sing_exp - r_e) */
+                secDiag[jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( r ), &secDiag_exp[jCh] );                                                                     /* exp(secDiag_exp + (sing_exp - r_e) */
                 move32();
                 secDiag_exp[jCh] = add( secDiag_exp[jCh], sub( singularVectors2_e[currChannel][jCh], r_e ) );
                 move32();
@@ -1601,7 +1601,7 @@ static void singularVectorsAccumulationLeft_fx(
                     norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( singularVectors_Left_e[k][nCh], singularVectors_Left_e[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */
 #endif
                 }
-                t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp );                                                                                 // t_ii_e+norm_y_e-*singularVectors_e,
+                t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e,
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
                 t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) );
 #else
-- 
GitLab


From eebf398b476c5e9d96dd6aa2506354de0a344aec Mon Sep 17 00:00:00 2001
From: Markus Multrus <markus.multrus@iis.fraunhofer.de>
Date: Tue, 28 Jan 2025 13:28:32 +0100
Subject: [PATCH 41/41] move switches to options.h

---
 lib_com/options.h      | 6 ++++++
 lib_dec/ivas_svd_dec.c | 9 ---------
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/lib_com/options.h b/lib_com/options.h
index 208175b46..7e8d63c30 100644
--- a/lib_com/options.h
+++ b/lib_com/options.h
@@ -128,4 +128,10 @@
 #define FIX_ISSUE_1209                          /* Ittiam: Fix for issue 1209: Assertion exit in BASOP encoder (stereo_dmx_evs)*/
 #define IVAS_ISSUE_1188_EVS_CRASH               /* Ittiam: Fix for issue 1188: Issue due to ASAN */
 #define FIX_ISSUE_1155                          /* Ittiam: Fix for issue 1155: Encoder crash for Stereo at 32kbps in PostShortTerm_ivas_enc_fx()*/
+#define FIX_1010_OPT_DIV                        /* FhG: SVD complexity optimizations (non-be) */
+#define FIX_1010_OPT_SINGLE_RESCALE             /* FhG: SVD complexity optimizations (non-be) */
+#define FIX_1010_OPT_GIVENS                     /* FhG: SVD complexity optimizations (non-be) */
+#define FIX_1010_OPT_GIVENS_INV                 /* FhG: SVD complexity optimizations (non-be) */
+#define FIX_1010_OPT_NORM_NOSAT                 /* FhG: SVD complexity optimizations (non-be) */
+#define FIX_1010_OPT_SEC_SINGLE_RESCALE         /* FhG: SVD complexity optimizations (non-be) */
 #endif
diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index 328a67a46..dc1965a5b 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -51,15 +51,6 @@
 #define SVD_ZERO_FLUSH_THRESHOLD_FX ( 0 )
 #define CONVERGENCE_FACTOR_FX       214748 /* factor for SVD convergence (as per latest float code: 1.0e-04f) */
 
-#if 1
-#define FIX_1010_OPT_DIV
-#define FIX_1010_OPT_SINGLE_RESCALE
-#define FIX_1010_OPT_GIVENS
-#define FIX_1010_OPT_GIVENS_INV
-#define FIX_1010_OPT_NORM_NOSAT
-#define FIX_1010_OPT_SEC_SINGLE_RESCALE
-#endif
-
 /*-----------------------------------------------------------------------*
  * Local function prototypes
  *-----------------------------------------------------------------------*/
-- 
GitLab