From 40a0b4795baa5f3bd96c862ef35c8c4526b3a23c Mon Sep 17 00:00:00 2001
From: Arthur <Arthur.tritthart@iis.fraunhofer.de>
Date: Thu, 6 Feb 2025 15:57:58 +0100
Subject: [PATCH 1/2] tuned function matrix_product_mant_exp_fx (regular case)
 in WMOPS (about -36.2), all bit-exact to previous version

---
 lib_com/ivas_tools.c | 38 ++++++++++++++++++++++++++++++++++++++
 lib_com/options.h    |  2 ++
 2 files changed, 40 insertions(+)

diff --git a/lib_com/ivas_tools.c b/lib_com/ivas_tools.c
index 8d486df04..664418cd2 100644
--- a/lib_com/ivas_tools.c
+++ b/lib_com/ivas_tools.c
@@ -1040,6 +1040,7 @@ Word16 matrix_product_mant_exp_fx(
     }
     ELSE /* Regular case */
     {
+#ifndef FIX_TUNE_IVAS_TOOLS_MATRIX_PRODUCT_WMOPS_MR1010X
         IF( NE_16( colsX, rowsY ) )
         {
             return EXIT_FAILURE;
@@ -1068,6 +1069,43 @@ Word16 matrix_product_mant_exp_fx(
         move16();
         col = rowsX; /*Q0*/
         move16();
+#else
+        assert( colsX == rowsY );
+        assert ( colsX >= 1 );
+
+        Word16 XY_fx_e = add( X_fx_e, Y_fx_e );
+
+        FOR( j = 0; j < colsY; ++j )
+        {
+            Word16 j_rowsY = imult1616( j, rowsY );
+            FOR( i = 0; i < rowsX; ++i )
+            {
+                Word32 Z_fx_m;
+                Word16 Z_fx_e;
+
+                x_idx = add( i, 0 );
+                y_idx = add( 0, j_rowsY );
+                Z_fx_m = Mpy_32_32( X_fx[x_idx], Y_fx[y_idx]);
+                Z_fx_e = XY_fx_e;
+                FOR( k = 1; k < colsX; ++k )
+                {
+                    x_idx = add( x_idx, rowsX );
+                    y_idx = add( 1, y_idx );
+                    Z_fx_m = BASOP_Util_Add_Mant32Exp( Z_fx_m, Z_fx_e, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ), XY_fx_e, &Z_fx_e ); /*Q31 - Zp_fx_e*/
+                }
+                *Zp_fx++ = Z_fx_m;
+                move32();
+                *Zp_fx_e++ = Z_fx_e;
+                move16();
+            }
+        }
+
+
+        row = colsY; /*Q0*/
+        move16();
+        col = rowsX; /*Q0*/
+        move16();
+#endif
     }
     Zp_fx = Z_fx; /*Q31 - Zp_fx_e*/
     Zp_fx_e = out_e;
diff --git a/lib_com/options.h b/lib_com/options.h
index fa1fd8f4e..ae1742c6f 100755
--- a/lib_com/options.h
+++ b/lib_com/options.h
@@ -153,3 +153,5 @@
 #define FIX_881_HILBERT_FILTER                  /* VA: improve the precision of the Hilbert filter to remove 2kHz unwanted tone */
 #endif
 #define FIX_MINOR_SVD_WMOPS_MR1010X             /* FhG: Minor WMOPS tuning, bit-exact to previous version, saves about 8.2 WMOPS for MR1010 */
+#define FIX_TUNE_IVAS_TOOLS_MATRIX_PRODUCT_WMOPS_MR1010X /* FhG: Major WMOPS tuning in HQ matrix multiplication, bit-exact to previous version, saves about 37 WMOPS for MR1010 */
+
-- 
GitLab


From a5f4d99e96418b9dfa890cc57a0feaf6967fc30f Mon Sep 17 00:00:00 2001
From: Arthur <Arthur.tritthart@iis.fraunhofer.de>
Date: Thu, 6 Feb 2025 16:05:52 +0100
Subject: [PATCH 2/2] fix clang-format issues

---
 lib_com/ivas_tools.c | 4 ++--
 lib_com/options.h    | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/lib_com/ivas_tools.c b/lib_com/ivas_tools.c
index 664418cd2..017300ea1 100644
--- a/lib_com/ivas_tools.c
+++ b/lib_com/ivas_tools.c
@@ -1071,7 +1071,7 @@ Word16 matrix_product_mant_exp_fx(
         move16();
 #else
         assert( colsX == rowsY );
-        assert ( colsX >= 1 );
+        assert( colsX >= 1 );
 
         Word16 XY_fx_e = add( X_fx_e, Y_fx_e );
 
@@ -1085,7 +1085,7 @@ Word16 matrix_product_mant_exp_fx(
 
                 x_idx = add( i, 0 );
                 y_idx = add( 0, j_rowsY );
-                Z_fx_m = Mpy_32_32( X_fx[x_idx], Y_fx[y_idx]);
+                Z_fx_m = Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] );
                 Z_fx_e = XY_fx_e;
                 FOR( k = 1; k < colsX; ++k )
                 {
diff --git a/lib_com/options.h b/lib_com/options.h
index ae1742c6f..9d034fcc3 100755
--- a/lib_com/options.h
+++ b/lib_com/options.h
@@ -154,4 +154,3 @@
 #endif
 #define FIX_MINOR_SVD_WMOPS_MR1010X             /* FhG: Minor WMOPS tuning, bit-exact to previous version, saves about 8.2 WMOPS for MR1010 */
 #define FIX_TUNE_IVAS_TOOLS_MATRIX_PRODUCT_WMOPS_MR1010X /* FhG: Major WMOPS tuning in HQ matrix multiplication, bit-exact to previous version, saves about 37 WMOPS for MR1010 */
-
-- 
GitLab