From 40a0b4795baa5f3bd96c862ef35c8c4526b3a23c Mon Sep 17 00:00:00 2001 From: Arthur Date: Thu, 6 Feb 2025 15:57:58 +0100 Subject: [PATCH 1/2] tuned function matrix_product_mant_exp_fx (regular case) in WMOPS (about -36.2), all bit-exact to previous version --- lib_com/ivas_tools.c | 38 ++++++++++++++++++++++++++++++++++++++ lib_com/options.h | 2 ++ 2 files changed, 40 insertions(+) diff --git a/lib_com/ivas_tools.c b/lib_com/ivas_tools.c index 8d486df04..664418cd2 100644 --- a/lib_com/ivas_tools.c +++ b/lib_com/ivas_tools.c @@ -1040,6 +1040,7 @@ Word16 matrix_product_mant_exp_fx( } ELSE /* Regular case */ { +#ifndef FIX_TUNE_IVAS_TOOLS_MATRIX_PRODUCT_WMOPS_MR1010X IF( NE_16( colsX, rowsY ) ) { return EXIT_FAILURE; @@ -1068,6 +1069,43 @@ Word16 matrix_product_mant_exp_fx( move16(); col = rowsX; /*Q0*/ move16(); +#else + assert( colsX == rowsY ); + assert ( colsX >= 1 ); + + Word16 XY_fx_e = add( X_fx_e, Y_fx_e ); + + FOR( j = 0; j < colsY; ++j ) + { + Word16 j_rowsY = imult1616( j, rowsY ); + FOR( i = 0; i < rowsX; ++i ) + { + Word32 Z_fx_m; + Word16 Z_fx_e; + + x_idx = add( i, 0 ); + y_idx = add( 0, j_rowsY ); + Z_fx_m = Mpy_32_32( X_fx[x_idx], Y_fx[y_idx]); + Z_fx_e = XY_fx_e; + FOR( k = 1; k < colsX; ++k ) + { + x_idx = add( x_idx, rowsX ); + y_idx = add( 1, y_idx ); + Z_fx_m = BASOP_Util_Add_Mant32Exp( Z_fx_m, Z_fx_e, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ), XY_fx_e, &Z_fx_e ); /*Q31 - Zp_fx_e*/ + } + *Zp_fx++ = Z_fx_m; + move32(); + *Zp_fx_e++ = Z_fx_e; + move16(); + } + } + + + row = colsY; /*Q0*/ + move16(); + col = rowsX; /*Q0*/ + move16(); +#endif } Zp_fx = Z_fx; /*Q31 - Zp_fx_e*/ Zp_fx_e = out_e; diff --git a/lib_com/options.h b/lib_com/options.h index fa1fd8f4e..ae1742c6f 100755 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -153,3 +153,5 @@ #define FIX_881_HILBERT_FILTER /* VA: improve the precision of the Hilbert filter to remove 2kHz unwanted tone */ #endif #define FIX_MINOR_SVD_WMOPS_MR1010X /* FhG: Minor WMOPS tuning, bit-exact to previous version, saves about 8.2 WMOPS for MR1010 */ +#define FIX_TUNE_IVAS_TOOLS_MATRIX_PRODUCT_WMOPS_MR1010X /* FhG: Major WMOPS tuning in HQ matrix multiplication, bit-exact to previous version, saves about 37 WMOPS for MR1010 */ + -- GitLab From a5f4d99e96418b9dfa890cc57a0feaf6967fc30f Mon Sep 17 00:00:00 2001 From: Arthur Date: Thu, 6 Feb 2025 16:05:52 +0100 Subject: [PATCH 2/2] fix clang-format issues --- lib_com/ivas_tools.c | 4 ++-- lib_com/options.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/lib_com/ivas_tools.c b/lib_com/ivas_tools.c index 664418cd2..017300ea1 100644 --- a/lib_com/ivas_tools.c +++ b/lib_com/ivas_tools.c @@ -1071,7 +1071,7 @@ Word16 matrix_product_mant_exp_fx( move16(); #else assert( colsX == rowsY ); - assert ( colsX >= 1 ); + assert( colsX >= 1 ); Word16 XY_fx_e = add( X_fx_e, Y_fx_e ); @@ -1085,7 +1085,7 @@ Word16 matrix_product_mant_exp_fx( x_idx = add( i, 0 ); y_idx = add( 0, j_rowsY ); - Z_fx_m = Mpy_32_32( X_fx[x_idx], Y_fx[y_idx]); + Z_fx_m = Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ); Z_fx_e = XY_fx_e; FOR( k = 1; k < colsX; ++k ) { diff --git a/lib_com/options.h b/lib_com/options.h index ae1742c6f..9d034fcc3 100755 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -154,4 +154,3 @@ #endif #define FIX_MINOR_SVD_WMOPS_MR1010X /* FhG: Minor WMOPS tuning, bit-exact to previous version, saves about 8.2 WMOPS for MR1010 */ #define FIX_TUNE_IVAS_TOOLS_MATRIX_PRODUCT_WMOPS_MR1010X /* FhG: Major WMOPS tuning in HQ matrix multiplication, bit-exact to previous version, saves about 37 WMOPS for MR1010 */ - -- GitLab