From 6ea437edcce69accf83794a92f0791bfe4b25438 Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Fri, 18 Jul 2025 12:08:52 +0200 Subject: [PATCH 01/33] initial revision: Added two new functions biDiagonalReductionLeft_64() and biDiagonalReductionRight_64() to replace biDiagonalReductionLeft_fx() and biDiagonalReductionRight_fx(). --- lib_dec/ivas_svd_dec_fx.c | 325 +++++++++++++++++++++++++++++++++++++- 1 file changed, 324 insertions(+), 1 deletion(-) diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index ba65c4b7b..d6d2480ee 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -29,7 +29,7 @@ the United Nations Convention on Contracts on the International Sales of Goods. *******************************************************************************************************/ - +#define MYCHANGES #include #include "options.h" #include "prot_fx.h" @@ -65,7 +65,27 @@ static void HouseholderReduction_fx( const Word16 nChannelsC, /* Q0 */ Word32 *eps_x_fx, /* exp(eps_x_fx_e) */ Word16 *eps_x_fx_e ); +#ifdef MYCHANGES +static void biDiagonalReductionLeft_64( + Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], + Word16 bitwindow, + Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ + Word16 singularValues_e[MAX_OUTPUT_CHANNELS], + const Word16 nChannelsL, /* Q0 */ + const Word16 nChannelsC, /* Q0 */ + const Word16 currChannel /* Q0 */ +); +static void biDiagonalReductionRight_64( + Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], + Word16 bitwindow, + const Word16 nChannelsL, /* Q0 */ + const Word16 nChannelsC, /* Q0 */ + const Word16 currChannel, /* Q0 */ + Word32 *g, /* Q31 */ + Word16 *g_e +); +#else static void biDiagonalReductionLeft_fx( Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ @@ -93,6 +113,7 @@ static void biDiagonalReductionRight_fx( Word16 *sig_x_e, Word32 *g /* Q31 */ ); // Q31 +#endif static void singularVectorsAccumulationLeft_fx( Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) as Input, Q31 as output */ @@ -853,15 +874,88 @@ static void HouseholderReduction_fx( Word16 *eps_x_fx_e ) { Word16 nCh; +#ifdef MYCHANGES + + Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; + Word32 g_fx = 0; + Word16 g_e = 0; + move32(); + move16(); + +#else + // float g = 0.0f, sig_x = 0.0f;// to be removed Word32 g_fx = 0, sig_x_fx = 0; move32(); move32(); Word16 sig_x_fx_e = 0; move16(); +#endif Word16 iCh, jCh; Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; +#ifdef MYCHANGES + push_wmops("HouseholderReduction_fx 64"); + FOR( jCh = 0; jCh < nChannelsL; jCh++ ) + { + FOR( iCh = 0; iCh < nChannelsC; iCh++ ) + { + singularVectors_Left_64[jCh][iCh] = W_shr(W_deposit32_h(singularVectors_Left_fx[jCh][iCh]),32); + } + } + for (nCh=0;nCh Date: Fri, 18 Jul 2025 12:20:17 +0200 Subject: [PATCH 02/33] Brought the code to the same level as the svd_optimization_branch. --- lib_dec/ivas_svd_dec_fx.c | 67 ++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index d6d2480ee..bbb4e83df 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -69,11 +69,11 @@ static void HouseholderReduction_fx( static void biDiagonalReductionLeft_64( Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], Word16 bitwindow, - Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ - Word16 singularValues_e[MAX_OUTPUT_CHANNELS], const Word16 nChannelsL, /* Q0 */ const Word16 nChannelsC, /* Q0 */ - const Word16 currChannel /* Q0 */ + const Word16 currChannel, /* Q0 */ + Word32 *g, /* Q31 */ + Word16 *g_e ); static void biDiagonalReductionRight_64( @@ -876,9 +876,13 @@ static void HouseholderReduction_fx( Word16 nCh; #ifdef MYCHANGES - Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; - Word32 g_fx = 0; - Word16 g_e = 0; + Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; + Word32 g_left_fx = 0; + Word16 g_left_e = 0; + move32(); + move16(); + Word32 g_right_fx = 0; + Word16 g_right_e = 0; move32(); move16(); @@ -895,7 +899,6 @@ static void HouseholderReduction_fx( Word16 iCh, jCh; Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; #ifdef MYCHANGES - push_wmops("HouseholderReduction_fx 64"); FOR( jCh = 0; jCh < nChannelsL; jCh++ ) { FOR( iCh = 0; iCh < nChannelsC; iCh++ ) @@ -909,23 +912,26 @@ static void HouseholderReduction_fx( bitwindow=1; biDiagonalReductionLeft_64( singularVectors_Left_64,bitwindow, - singularValues_fx,singularValues_fx_e, nChannelsL, nChannelsC, - nCh + nCh, + &g_left_fx, + &g_left_e ); - singularValues_fx_e[nCh]=add(singularVectors_Left_e,singularValues_fx_e[nCh]); - secDiag_fx[nCh]=g_fx; + singularValues_fx[nCh]=g_left_fx; + move32(); + singularValues_fx_e[nCh]=add(singularVectors_Left_e,g_left_e); + secDiag_fx[nCh]=g_right_fx; /* from the previous channel */ move32(); - secDiag_fx_e[nCh]=add(singularVectors_Left_e,g_e); + secDiag_fx_e[nCh]=add(singularVectors_Left_e,g_right_e); bitwindow=2; biDiagonalReductionRight_64( singularVectors_Left_64,bitwindow, nChannelsL, nChannelsC, nCh, - &g_fx, - &g_e + &g_right_fx, + &g_right_e ); { Word16 L_temp_e; @@ -953,7 +959,6 @@ static void HouseholderReduction_fx( } } } - pop_wmops(); #else FOR( jCh = 0; jCh < nChannelsL; jCh++ ) @@ -1002,22 +1007,22 @@ static void HouseholderReduction_fx( static void biDiagonalReductionLeft_64( Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], Word16 bitwindow, - Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ - Word16 singularValues_e[MAX_OUTPUT_CHANNELS], const Word16 nChannelsL, /* Q0 */ const Word16 nChannelsC, /* Q0 */ - const Word16 currChannel /* Q0 */ + const Word16 currChannel, /* Q0 */ + Word32 *g, + Word16 *g_e ) { Word16 iCh, jCh; - Word32 norm_x, g; - Word16 norm_x_e, g_e; + Word32 norm_x; + Word16 norm_x_e; Word64 norm_64; - g=0; - g_e=0; + ( *g )=0; + ( *g_e) =0; move32(); move16(); norm_x=0; @@ -1047,16 +1052,16 @@ static void biDiagonalReductionLeft_64( Word32 r, invVal; Word16 r_e, invVal_e; - g_e = norm_x_e; + ( *g_e ) = norm_x_e; move16(); - g = Sqrt32( norm_x, &g_e); + ( *g ) = Sqrt32( norm_x, g_e); IF ( GE_64( singularVectors_Left_64[currChannel][currChannel], 0 ) ) { - g = L_negate( g ); + ( *g ) = L_negate( *g ); } factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], bitwindow) ); - tmp_e=sub( g_e, bitwindow) ; - tmpmul=W_mult0_32_32( g, factor2); + tmp_e=sub( ( *g_e ), bitwindow) ; + tmpmul=W_mult0_32_32( ( *g ), factor2); tmpmul=W_shl(tmpmul, tmp_e); r_64=W_sub(tmpmul, norm_64); r_e=W_norm(r_64 ); @@ -1068,8 +1073,8 @@ static void biDiagonalReductionLeft_64( invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, r, &invVal_e); - tmp_e = add(31, sub(bitwindow, g_e ) ); - singularVectors_Left_64[currChannel][currChannel] = W_sub( singularVectors_Left_64[currChannel][currChannel], W_shr( W_deposit32_h( g), tmp_e) ); // here, the exponent goes up. + tmp_e = add(31, sub(bitwindow, *g_e ) ); + singularVectors_Left_64[currChannel][currChannel] = W_sub( singularVectors_Left_64[currChannel][currChannel], W_shr( W_deposit32_h( *g ), tmp_e) ); // here, the exponent goes up. bitwindow=add(bitwindow, 1); // so does the bit window FOR ( iCh = add( currChannel, 1 ); iCh < nChannelsC; iCh++) @@ -1098,10 +1103,6 @@ static void biDiagonalReductionLeft_64( } } } - singularValues[currChannel] = g; - singularValues_e[currChannel] = g_e; - move32(); - move16(); } /*------------------------------------------------------------------------- -- GitLab From e3a5ad688dccbc68eedaa65a5fdd244ae7f0dfd1 Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Fri, 18 Jul 2025 12:30:18 +0200 Subject: [PATCH 03/33] removed a (no longer needed) helper variable "bitwindow" and replaced it with macros. --- lib_dec/ivas_svd_dec_fx.c | 53 +++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 30 deletions(-) diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index bbb4e83df..603b38732 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -68,7 +68,6 @@ static void HouseholderReduction_fx( #ifdef MYCHANGES static void biDiagonalReductionLeft_64( Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], - Word16 bitwindow, const Word16 nChannelsL, /* Q0 */ const Word16 nChannelsC, /* Q0 */ const Word16 currChannel, /* Q0 */ @@ -78,7 +77,6 @@ static void biDiagonalReductionLeft_64( static void biDiagonalReductionRight_64( Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], - Word16 bitwindow, const Word16 nChannelsL, /* Q0 */ const Word16 nChannelsC, /* Q0 */ const Word16 currChannel, /* Q0 */ @@ -908,10 +906,8 @@ static void HouseholderReduction_fx( } for (nCh=0;nCh Date: Fri, 18 Jul 2025 13:06:26 +0200 Subject: [PATCH 04/33] applied the clang patch and renamed the MYCHANGES define as MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE --- lib_com/options.h | 1 + lib_dec/ivas_svd_dec_fx.c | 271 +++++++++++++++++++------------------- 2 files changed, 133 insertions(+), 139 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index 14b1c5eb8..fe2791bb3 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -148,4 +148,5 @@ #define FIX_1824 #define FIX_1822 +#define MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE /* FhG: reduce WMOPS of HouseHolderReduction() in ivas_svd_dec.c() by removing redundant mathematics and using 64 bit additions.*/ #endif diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index 603b38732..d9e619ed8 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -29,7 +29,6 @@ the United Nations Convention on Contracts on the International Sales of Goods. *******************************************************************************************************/ -#define MYCHANGES #include #include "options.h" #include "prot_fx.h" @@ -65,24 +64,22 @@ static void HouseholderReduction_fx( const Word16 nChannelsC, /* Q0 */ Word32 *eps_x_fx, /* exp(eps_x_fx_e) */ Word16 *eps_x_fx_e ); -#ifdef MYCHANGES +#ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE static void biDiagonalReductionLeft_64( Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], const Word16 nChannelsL, /* Q0 */ const Word16 nChannelsC, /* Q0 */ const Word16 currChannel, /* Q0 */ - Word32 *g, /* Q31 */ - Word16 *g_e -); + Word32 *g, /* Q31 */ + Word16 *g_e ); static void biDiagonalReductionRight_64( Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], const Word16 nChannelsL, /* Q0 */ const Word16 nChannelsC, /* Q0 */ const Word16 currChannel, /* Q0 */ - Word32 *g, /* Q31 */ - Word16 *g_e -); + Word32 *g, /* Q31 */ + Word16 *g_e ); #else static void biDiagonalReductionLeft_fx( Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ @@ -872,7 +869,7 @@ static void HouseholderReduction_fx( Word16 *eps_x_fx_e ) { Word16 nCh; -#ifdef MYCHANGES +#ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; Word32 g_left_fx = 0; @@ -896,62 +893,60 @@ static void HouseholderReduction_fx( Word16 iCh, jCh; Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; -#ifdef MYCHANGES +#ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE FOR( jCh = 0; jCh < nChannelsL; jCh++ ) { FOR( iCh = 0; iCh < nChannelsC; iCh++ ) { - singularVectors_Left_64[jCh][iCh] = W_shr(W_deposit32_h(singularVectors_Left_fx[jCh][iCh]),32); + singularVectors_Left_64[jCh][iCh] = W_shr( W_deposit32_h( singularVectors_Left_fx[jCh][iCh] ), 32 ); } } - for (nCh=0;nCh Date: Mon, 21 Jul 2025 17:16:35 +0200 Subject: [PATCH 05/33] Code cleanup in biDiagonalReductionLeft_64() and biDiagonalReductionRight_64() revealed some accuracy issues. --- lib_dec/ivas_svd_dec_fx.c | 43 ++++++++++++++++----------------------- 1 file changed, 18 insertions(+), 25 deletions(-) diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index d9e619ed8..f98020eb8 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -1005,7 +1005,6 @@ static void biDiagonalReductionLeft_64( #define HEADROOM_LEFT_1 1 #define HEADROOM_LEFT_2 ( HEADROOM_LEFT_1 + 1 ) - Word16 iCh, jCh; Word32 norm_x; Word16 norm_x_e; @@ -1029,7 +1028,6 @@ static void biDiagonalReductionLeft_64( } norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); - norm_x_e = add( sub( ( HEADROOM_LEFT_1 + HEADROOM_LEFT_1 ), norm_x_e ), 1 ); } IF( norm_x ) { @@ -1041,7 +1039,7 @@ static void biDiagonalReductionLeft_64( Word32 r, invVal; Word16 r_e, invVal_e; - ( *g_e ) = norm_x_e; + ( *g_e ) = add( sub( ( HEADROOM_LEFT_1 + HEADROOM_LEFT_1 ), norm_x_e ), 1 ); move16(); ( *g ) = Sqrt32( norm_x, g_e ); IF( GE_64( singularVectors_Left_64[currChannel][currChannel], 0 ) ) @@ -1049,16 +1047,15 @@ static void biDiagonalReductionLeft_64( ( *g ) = L_negate( *g ); } factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], HEADROOM_LEFT_1 ) ); - tmp_e = sub( ( *g_e ), HEADROOM_LEFT_1 ); + tmp_e = shr(sub(norm_x_e,1),1); tmpmul = W_mult0_32_32( ( *g ), factor2 ); - tmpmul = W_shl( tmpmul, tmp_e ); + tmpmul = W_shr( tmpmul, tmp_e ); r_64 = W_sub( tmpmul, norm_64 ); r_e = W_norm( r_64 ); r = W_extract_h( W_shl( r_64, r_e ) ); - r_e = sub( add( 1, ( HEADROOM_LEFT_1 + HEADROOM_LEFT_1 ) ), r_e ); - invVal_e = r_e; + invVal_e = sub( add( 1, ( HEADROOM_LEFT_1 + HEADROOM_LEFT_1 ) ), r_e ); invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, r, &invVal_e ); @@ -1067,6 +1064,7 @@ static void biDiagonalReductionLeft_64( FOR( iCh = add( currChannel, 1 ); iCh < nChannelsC; iCh++ ) { + Word16 magic_shift; Word32 factor1; Word32 factor2; Word32 f; // = norm / r @@ -1082,11 +1080,10 @@ static void biDiagonalReductionLeft_64( norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); f = Mpy_32_32( norm_x, invVal ); + magic_shift=31+norm_x_e-r_e; FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) { - Word16 magic_shift; - magic_shift = add( add( norm_x_e, 23 ), r_e ); - factor1 = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_2 ) ); + factor1 = W_extract_l( singularVectors_Left_64[jCh][currChannel] ); singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr( W_mult0_32_32( f, factor1 ), magic_shift ) ); } } @@ -1113,8 +1110,9 @@ static void biDiagonalReductionRight_64( Word64 norm_64; Word16 idx; -#define HEADROOM_RIGHT_1 2 +#define HEADROOM_RIGHT_1 1 #define HEADROOM_RIGHT_2 ( HEADROOM_RIGHT_1 + 1 ) +#define HEADROOM_RIGHT_3 3 ( *g ) = 0; @@ -1134,7 +1132,6 @@ static void biDiagonalReductionRight_64( } norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); - norm_x_e = add( sub( ( HEADROOM_RIGHT_1 + HEADROOM_RIGHT_1 ), norm_x_e ), 1 ); move16(); IF( norm_x ) @@ -1143,8 +1140,6 @@ static void biDiagonalReductionRight_64( Word32 factor2; Word16 tmp_e; Word64 tmpmul; - Word32 tmp_g; - Word16 tmp_g_e; Word16 magic_shift; Word64 r_64; Word32 r; @@ -1153,20 +1148,18 @@ static void biDiagonalReductionRight_64( Word32 invVal; Word16 invVal_e; - tmp_g_e = norm_x_e; + ( *g_e ) = add( sub( ( HEADROOM_RIGHT_1 + HEADROOM_RIGHT_1 ), norm_x_e ), 1 ); move16(); - tmp_g = Sqrt32( norm_x, &tmp_g_e ); + ( *g ) = Sqrt32( norm_x, g_e ); IF( GE_64( singularVectors_Left_64[currChannel][idx], 0 ) ) { - tmp_g = L_negate( tmp_g ); + ( *g ) = L_negate( *g ); } - *g = tmp_g; - *g_e = tmp_g_e; move32(); move16(); factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], HEADROOM_RIGHT_1 ) ); - tmp_e = sub( tmp_g_e, HEADROOM_RIGHT_1 ); - tmpmul = W_mult0_32_32( tmp_g, factor2 ); + tmp_e = sub( *g_e, HEADROOM_RIGHT_1 ); + tmpmul = W_mult0_32_32( *g, factor2 ); tmpmul = W_shl( tmpmul, tmp_e ); r_64 = W_sub( tmpmul, norm_64 ); r_e = W_norm( r_64 ); @@ -1176,8 +1169,8 @@ static void biDiagonalReductionRight_64( move16(); invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); - magic_shift = 32 - tmp_g_e; - singularVectors_Left_64[currChannel][idx] = W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( tmp_g ), magic_shift ) ); // here, the exponent goes up + magic_shift = 32 - *g_e; + singularVectors_Left_64[currChannel][idx] = W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( *g ), magic_shift ) ); // here, the exponent goes up FOR( iCh = idx; iCh < nChannelsL; iCh++ ) { @@ -1194,11 +1187,11 @@ static void biDiagonalReductionRight_64( norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); f = Mpy_32_32( norm_x, invVal ); - magic_shift = 25 + norm_x_e - r_e; // FIXME: Why does this work? + magic_shift = 25 + norm_x_e - r_e; // headroom 3 FIXME: Why does this work? FOR( jCh = idx; jCh < nChannelsC; jCh++ ) { - factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_2 ) ); + factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_3 ) ); singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2 ), magic_shift ) ); } } -- GitLab From 9adf3a9c5e18f7ac6d865f6e6ad8374cd0b0f144 Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Mon, 21 Jul 2025 17:20:21 +0200 Subject: [PATCH 06/33] applied the clang patch. --- lib_dec/ivas_svd_dec_fx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index f98020eb8..e8c36b541 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -1047,7 +1047,7 @@ static void biDiagonalReductionLeft_64( ( *g ) = L_negate( *g ); } factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], HEADROOM_LEFT_1 ) ); - tmp_e = shr(sub(norm_x_e,1),1); + tmp_e = shr( sub( norm_x_e, 1 ), 1 ); tmpmul = W_mult0_32_32( ( *g ), factor2 ); tmpmul = W_shr( tmpmul, tmp_e ); r_64 = W_sub( tmpmul, norm_64 ); @@ -1080,7 +1080,7 @@ static void biDiagonalReductionLeft_64( norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); f = Mpy_32_32( norm_x, invVal ); - magic_shift=31+norm_x_e-r_e; + magic_shift = 31 + norm_x_e - r_e; FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) { factor1 = W_extract_l( singularVectors_Left_64[jCh][currChannel] ); -- GitLab From f5737f6c947119bd84e7ca02da06e4d115e6fba6 Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Tue, 22 Jul 2025 08:56:05 +0200 Subject: [PATCH 07/33] rolled back changes which caused a regression on the Bit Exactness. --- lib_dec/ivas_svd_dec_fx.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index e8c36b541..4eb7477e4 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -1005,6 +1005,7 @@ static void biDiagonalReductionLeft_64( #define HEADROOM_LEFT_1 1 #define HEADROOM_LEFT_2 ( HEADROOM_LEFT_1 + 1 ) +#define HEADROOM_LEFT_3 2 Word16 iCh, jCh; Word32 norm_x; Word16 norm_x_e; @@ -1080,10 +1081,10 @@ static void biDiagonalReductionLeft_64( norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); f = Mpy_32_32( norm_x, invVal ); - magic_shift = 31 + norm_x_e - r_e; + magic_shift = 31 - HEADROOM_LEFT_3 + norm_x_e - r_e; FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) { - factor1 = W_extract_l( singularVectors_Left_64[jCh][currChannel] ); + factor1 = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_3 ) ); singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr( W_mult0_32_32( f, factor1 ), magic_shift ) ); } } @@ -1110,7 +1111,7 @@ static void biDiagonalReductionRight_64( Word64 norm_64; Word16 idx; -#define HEADROOM_RIGHT_1 1 +#define HEADROOM_RIGHT_1 2 #define HEADROOM_RIGHT_2 ( HEADROOM_RIGHT_1 + 1 ) #define HEADROOM_RIGHT_3 3 -- GitLab From f47fd3d8af32895b4e9c5fc6354c87c2f10d0b63 Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Tue, 22 Jul 2025 13:14:26 +0200 Subject: [PATCH 08/33] More deterministic "magic_shifts" in biDiagonalReductionLeft_64() and biDiagonalReductionRight_64(). --- lib_dec/ivas_svd_dec_fx.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index 4eb7477e4..4588b4e19 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -1003,9 +1003,10 @@ static void biDiagonalReductionLeft_64( Word16 *g_e ) { -#define HEADROOM_LEFT_1 1 -#define HEADROOM_LEFT_2 ( HEADROOM_LEFT_1 + 1 ) +#define HEADROOM_LEFT_1 1 +#define HEADROOM_LEFT_2 2 #define HEADROOM_LEFT_3 2 + Word16 iCh, jCh; Word32 norm_x; Word16 norm_x_e; @@ -1048,28 +1049,28 @@ static void biDiagonalReductionLeft_64( ( *g ) = L_negate( *g ); } factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], HEADROOM_LEFT_1 ) ); - tmp_e = shr( sub( norm_x_e, 1 ), 1 ); + tmp_e = sub( HEADROOM_LEFT_1, ( *g_e ) ); tmpmul = W_mult0_32_32( ( *g ), factor2 ); tmpmul = W_shr( tmpmul, tmp_e ); r_64 = W_sub( tmpmul, norm_64 ); r_e = W_norm( r_64 ); r = W_extract_h( W_shl( r_64, r_e ) ); - - invVal_e = sub( add( 1, ( HEADROOM_LEFT_1 + HEADROOM_LEFT_1 ) ), r_e ); - invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, r, &invVal_e ); + invVal_e = 0; + move16(); + invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); - tmp_e = add( 31, sub( HEADROOM_LEFT_1, *g_e ) ); + tmp_e = sub( 32, *g_e ); singularVectors_Left_64[currChannel][currChannel] = W_sub( singularVectors_Left_64[currChannel][currChannel], W_shr( W_deposit32_h( *g ), tmp_e ) ); // here, the exponent goes up. FOR( iCh = add( currChannel, 1 ); iCh < nChannelsC; iCh++ ) { - Word16 magic_shift; Word32 factor1; Word32 factor2; Word32 f; // = norm / r // Word16 f_e; // not really needed + Word16 magic_shift; norm_64 = 0; for ( jCh = currChannel; jCh < nChannelsL; jCh++ ) @@ -1081,10 +1082,10 @@ static void biDiagonalReductionLeft_64( norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); f = Mpy_32_32( norm_x, invVal ); - magic_shift = 31 - HEADROOM_LEFT_3 + norm_x_e - r_e; + magic_shift = norm_x_e-r_e+(28-HEADROOM_LEFT_3); // works with headroom_left: 1,2,2 FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) { - factor1 = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_3 ) ); + factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32-HEADROOM_LEFT_3 ) ); singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr( W_mult0_32_32( f, factor1 ), magic_shift ) ); } } @@ -1112,8 +1113,8 @@ static void biDiagonalReductionRight_64( Word16 idx; #define HEADROOM_RIGHT_1 2 -#define HEADROOM_RIGHT_2 ( HEADROOM_RIGHT_1 + 1 ) -#define HEADROOM_RIGHT_3 3 +#define HEADROOM_RIGHT_2 2 +#define HEADROOM_RIGHT_3 2 ( *g ) = 0; @@ -1159,9 +1160,9 @@ static void biDiagonalReductionRight_64( move32(); move16(); factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], HEADROOM_RIGHT_1 ) ); - tmp_e = sub( *g_e, HEADROOM_RIGHT_1 ); + tmp_e = sub( HEADROOM_RIGHT_1, *g_e ); tmpmul = W_mult0_32_32( *g, factor2 ); - tmpmul = W_shl( tmpmul, tmp_e ); + tmpmul = W_shr( tmpmul, tmp_e ); r_64 = W_sub( tmpmul, norm_64 ); r_e = W_norm( r_64 ); r = W_extract_h( W_shl( r_64, r_e ) ); @@ -1170,8 +1171,8 @@ static void biDiagonalReductionRight_64( move16(); invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); - magic_shift = 32 - *g_e; - singularVectors_Left_64[currChannel][idx] = W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( *g ), magic_shift ) ); // here, the exponent goes up + tmp_e = sub( 32, *g_e ); + singularVectors_Left_64[currChannel][idx] = W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( *g ), tmp_e ) ); // here, the exponent goes up FOR( iCh = idx; iCh < nChannelsL; iCh++ ) { @@ -1188,11 +1189,11 @@ static void biDiagonalReductionRight_64( norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); f = Mpy_32_32( norm_x, invVal ); - magic_shift = 25 + norm_x_e - r_e; // headroom 3 FIXME: Why does this work? + magic_shift = norm_x_e-r_e +29-(HEADROOM_RIGHT_3); // works with headroom_right: 2,2,2 FOR( jCh = idx; jCh < nChannelsC; jCh++ ) { - factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_3 ) ); + factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32-HEADROOM_RIGHT_3 ) ); singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2 ), magic_shift ) ); } } -- GitLab From bab21b8c63d5cfce30b5f76f04302d0d7cfed123 Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Tue, 22 Jul 2025 13:26:55 +0200 Subject: [PATCH 09/33] applied the clang patch. --- lib_dec/ivas_svd_dec_fx.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index 4588b4e19..14f449caf 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -1003,7 +1003,7 @@ static void biDiagonalReductionLeft_64( Word16 *g_e ) { -#define HEADROOM_LEFT_1 1 +#define HEADROOM_LEFT_1 1 #define HEADROOM_LEFT_2 2 #define HEADROOM_LEFT_3 2 @@ -1082,10 +1082,10 @@ static void biDiagonalReductionLeft_64( norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); f = Mpy_32_32( norm_x, invVal ); - magic_shift = norm_x_e-r_e+(28-HEADROOM_LEFT_3); // works with headroom_left: 1,2,2 + magic_shift = norm_x_e - r_e + ( 28 - HEADROOM_LEFT_3 ); // works with headroom_left: 1,2,2 FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) { - factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32-HEADROOM_LEFT_3 ) ); + factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - HEADROOM_LEFT_3 ) ); singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr( W_mult0_32_32( f, factor1 ), magic_shift ) ); } } @@ -1189,11 +1189,11 @@ static void biDiagonalReductionRight_64( norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); f = Mpy_32_32( norm_x, invVal ); - magic_shift = norm_x_e-r_e +29-(HEADROOM_RIGHT_3); // works with headroom_right: 2,2,2 + magic_shift = norm_x_e - r_e + 29 - ( HEADROOM_RIGHT_3 ); // works with headroom_right: 2,2,2 FOR( jCh = idx; jCh < nChannelsC; jCh++ ) { - factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32-HEADROOM_RIGHT_3 ) ); + factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - HEADROOM_RIGHT_3 ) ); singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2 ), magic_shift ) ); } } -- GitLab From 9f38e9b1fe0b8f906f0e0da676c3893bf4fbd325 Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Wed, 23 Jul 2025 11:26:38 +0200 Subject: [PATCH 10/33] broken down the headroom shifts a little bit more. --- lib_dec/ivas_svd_dec_fx.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index 14f449caf..9cca6aa33 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -1003,9 +1003,10 @@ static void biDiagonalReductionLeft_64( Word16 *g_e ) { -#define HEADROOM_LEFT_1 1 +#define HEADROOM_LEFT_1 2 #define HEADROOM_LEFT_2 2 -#define HEADROOM_LEFT_3 2 +#define HEADROOM_LEFT_3 15 +#define HEADROOM_LEFT_4 15 Word16 iCh, jCh; Word32 norm_x; @@ -1075,17 +1076,17 @@ static void biDiagonalReductionLeft_64( norm_64 = 0; for ( jCh = currChannel; jCh < nChannelsL; jCh++ ) { - factor1 = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_2 ) ); - factor2 = W_extract_l( W_shr( singularVectors_Left_64[jCh][iCh], HEADROOM_LEFT_2 ) ); + factor1 = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_3 ) ); + factor2 = W_extract_l( W_shr( singularVectors_Left_64[jCh][iCh], HEADROOM_LEFT_3 ) ); norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) ); } norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); f = Mpy_32_32( norm_x, invVal ); - magic_shift = norm_x_e - r_e + ( 28 - HEADROOM_LEFT_3 ); // works with headroom_left: 1,2,2 + magic_shift = norm_x_e - r_e + ( 32 + 2*HEADROOM_LEFT_1 - HEADROOM_LEFT_2 -2*HEADROOM_LEFT_3 - HEADROOM_LEFT_4 ); FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) { - factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - HEADROOM_LEFT_3 ) ); + factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - HEADROOM_LEFT_4 ) ); singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr( W_mult0_32_32( f, factor1 ), magic_shift ) ); } } @@ -1114,7 +1115,8 @@ static void biDiagonalReductionRight_64( #define HEADROOM_RIGHT_1 2 #define HEADROOM_RIGHT_2 2 -#define HEADROOM_RIGHT_3 2 +#define HEADROOM_RIGHT_3 15 +#define HEADROOM_RIGHT_4 15 ( *g ) = 0; @@ -1159,8 +1161,8 @@ static void biDiagonalReductionRight_64( } move32(); move16(); - factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], HEADROOM_RIGHT_1 ) ); - tmp_e = sub( HEADROOM_RIGHT_1, *g_e ); + factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], HEADROOM_RIGHT_2 ) ); + tmp_e = sub( HEADROOM_RIGHT_2, *g_e ); tmpmul = W_mult0_32_32( *g, factor2 ); tmpmul = W_shr( tmpmul, tmp_e ); r_64 = W_sub( tmpmul, norm_64 ); @@ -1181,19 +1183,19 @@ static void biDiagonalReductionRight_64( move64(); FOR( jCh = idx; jCh < nChannelsC; jCh++ ) { - factor1 = W_extract_l( W_shr( singularVectors_Left_64[iCh][jCh], HEADROOM_RIGHT_2 ) ); - factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_2 ) ); + factor1 = W_extract_l( W_shr( singularVectors_Left_64[iCh][jCh], HEADROOM_RIGHT_3 ) ); + factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_3 ) ); norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) ); } norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); f = Mpy_32_32( norm_x, invVal ); - magic_shift = norm_x_e - r_e + 29 - ( HEADROOM_RIGHT_3 ); // works with headroom_right: 2,2,2 + magic_shift = norm_x_e - r_e + ( 32 + 2*HEADROOM_RIGHT_1 - HEADROOM_RIGHT_2 -2*HEADROOM_RIGHT_3 - HEADROOM_RIGHT_4 ); FOR( jCh = idx; jCh < nChannelsC; jCh++ ) { - factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - HEADROOM_RIGHT_3 ) ); + factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - HEADROOM_RIGHT_4 ) ); singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2 ), magic_shift ) ); } } -- GitLab From f62a0613bd90d4a1be71f0e423159d493d66cada Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Wed, 23 Jul 2025 11:31:43 +0200 Subject: [PATCH 11/33] applied the clang patch. --- lib_dec/ivas_svd_dec_fx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index 9cca6aa33..16ccada30 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -1083,7 +1083,7 @@ static void biDiagonalReductionLeft_64( norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); f = Mpy_32_32( norm_x, invVal ); - magic_shift = norm_x_e - r_e + ( 32 + 2*HEADROOM_LEFT_1 - HEADROOM_LEFT_2 -2*HEADROOM_LEFT_3 - HEADROOM_LEFT_4 ); + magic_shift = norm_x_e - r_e + ( 32 + 2 * HEADROOM_LEFT_1 - HEADROOM_LEFT_2 - 2 * HEADROOM_LEFT_3 - HEADROOM_LEFT_4 ); FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) { factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - HEADROOM_LEFT_4 ) ); @@ -1191,7 +1191,7 @@ static void biDiagonalReductionRight_64( norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); f = Mpy_32_32( norm_x, invVal ); - magic_shift = norm_x_e - r_e + ( 32 + 2*HEADROOM_RIGHT_1 - HEADROOM_RIGHT_2 -2*HEADROOM_RIGHT_3 - HEADROOM_RIGHT_4 ); + magic_shift = norm_x_e - r_e + ( 32 + 2 * HEADROOM_RIGHT_1 - HEADROOM_RIGHT_2 - 2 * HEADROOM_RIGHT_3 - HEADROOM_RIGHT_4 ); FOR( jCh = idx; jCh < nChannelsC; jCh++ ) { -- GitLab From 1732636a8edb599ed519c131ba30aadc55d192ed Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Wed, 23 Jul 2025 19:24:12 +0200 Subject: [PATCH 12/33] more fine tuning in left_64() and right_64() (to run the regression test) --- lib_dec/ivas_svd_dec_fx.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index 16ccada30..54ab787bc 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -1004,8 +1004,8 @@ static void biDiagonalReductionLeft_64( { #define HEADROOM_LEFT_1 2 -#define HEADROOM_LEFT_2 2 -#define HEADROOM_LEFT_3 15 +#define HEADROOM_LEFT_2 12 +#define HEADROOM_LEFT_3 14 #define HEADROOM_LEFT_4 15 Word16 iCh, jCh; @@ -1049,8 +1049,8 @@ static void biDiagonalReductionLeft_64( { ( *g ) = L_negate( *g ); } - factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], HEADROOM_LEFT_1 ) ); - tmp_e = sub( HEADROOM_LEFT_1, ( *g_e ) ); + factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], HEADROOM_LEFT_2 ) ); + tmp_e = sub( 2 * HEADROOM_LEFT_1 - HEADROOM_LEFT_2, ( *g_e ) ); tmpmul = W_mult0_32_32( ( *g ), factor2 ); tmpmul = W_shr( tmpmul, tmp_e ); r_64 = W_sub( tmpmul, norm_64 ); @@ -1083,7 +1083,7 @@ static void biDiagonalReductionLeft_64( norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); f = Mpy_32_32( norm_x, invVal ); - magic_shift = norm_x_e - r_e + ( 32 + 2 * HEADROOM_LEFT_1 - HEADROOM_LEFT_2 - 2 * HEADROOM_LEFT_3 - HEADROOM_LEFT_4 ); + magic_shift = norm_x_e - r_e + ( 30 + 2 * HEADROOM_LEFT_1 - 2 * HEADROOM_LEFT_3 - HEADROOM_LEFT_4 ); FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) { factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - HEADROOM_LEFT_4 ) ); @@ -1114,8 +1114,8 @@ static void biDiagonalReductionRight_64( Word16 idx; #define HEADROOM_RIGHT_1 2 -#define HEADROOM_RIGHT_2 2 -#define HEADROOM_RIGHT_3 15 +#define HEADROOM_RIGHT_2 12 +#define HEADROOM_RIGHT_3 14 #define HEADROOM_RIGHT_4 15 @@ -1162,7 +1162,7 @@ static void biDiagonalReductionRight_64( move32(); move16(); factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], HEADROOM_RIGHT_2 ) ); - tmp_e = sub( HEADROOM_RIGHT_2, *g_e ); + tmp_e = sub( 2 * HEADROOM_RIGHT_1-HEADROOM_RIGHT_2, *g_e ); tmpmul = W_mult0_32_32( *g, factor2 ); tmpmul = W_shr( tmpmul, tmp_e ); r_64 = W_sub( tmpmul, norm_64 ); @@ -1191,7 +1191,7 @@ static void biDiagonalReductionRight_64( norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); f = Mpy_32_32( norm_x, invVal ); - magic_shift = norm_x_e - r_e + ( 32 + 2 * HEADROOM_RIGHT_1 - HEADROOM_RIGHT_2 - 2 * HEADROOM_RIGHT_3 - HEADROOM_RIGHT_4 ); + magic_shift = norm_x_e - r_e + ( 30 + 2 * HEADROOM_RIGHT_1 - 2 * HEADROOM_RIGHT_3 - HEADROOM_RIGHT_4 ); FOR( jCh = idx; jCh < nChannelsC; jCh++ ) { -- GitLab From 80c7142d82c87bf16f37de4dcc89bacc994c085a Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Wed, 23 Jul 2025 19:29:31 +0200 Subject: [PATCH 13/33] applied the clang patch. --- lib_dec/ivas_svd_dec_fx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index 54ab787bc..9304ee03c 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -1162,7 +1162,7 @@ static void biDiagonalReductionRight_64( move32(); move16(); factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], HEADROOM_RIGHT_2 ) ); - tmp_e = sub( 2 * HEADROOM_RIGHT_1-HEADROOM_RIGHT_2, *g_e ); + tmp_e = sub( 2 * HEADROOM_RIGHT_1 - HEADROOM_RIGHT_2, *g_e ); tmpmul = W_mult0_32_32( *g, factor2 ); tmpmul = W_shr( tmpmul, tmp_e ); r_64 = W_sub( tmpmul, norm_64 ); -- GitLab From eb7410daeefd9c226ab7b419ddd8cdcd1aba8e4f Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Thu, 24 Jul 2025 12:37:18 +0200 Subject: [PATCH 14/33] another experiment: Making the headromm shifts dependant on the input values. --- lib_dec/ivas_svd_dec_fx.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index 9304ee03c..1cae38228 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -1004,13 +1004,14 @@ static void biDiagonalReductionLeft_64( { #define HEADROOM_LEFT_1 2 -#define HEADROOM_LEFT_2 12 -#define HEADROOM_LEFT_3 14 -#define HEADROOM_LEFT_4 15 +#define HEADROOM_LEFT_2 ( 16 - norm_x_e0 / 4 ) // 12 +#define HEADROOM_LEFT_3 ( 16 - norm_x_e0 / 4 ) // 14 +#define HEADROOM_LEFT_4 ( 16 - norm_x_e0 / 4 ) // 15 Word16 iCh, jCh; Word32 norm_x; Word16 norm_x_e; + Word16 norm_x_e0; Word64 norm_64; ( *g ) = 0; ( *g_e ) = 0; @@ -1030,6 +1031,7 @@ static void biDiagonalReductionLeft_64( norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) ); } norm_x_e = W_norm( norm_64 ); + norm_x_e0 = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); } IF( norm_x ) @@ -1076,8 +1078,8 @@ static void biDiagonalReductionLeft_64( norm_64 = 0; for ( jCh = currChannel; jCh < nChannelsL; jCh++ ) { - factor1 = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_3 ) ); - factor2 = W_extract_l( W_shr( singularVectors_Left_64[jCh][iCh], HEADROOM_LEFT_3 ) ); + factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - HEADROOM_LEFT_3 ) ); + factor2 = W_extract_h( W_shl( singularVectors_Left_64[jCh][iCh], 32 - HEADROOM_LEFT_3 ) ); norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) ); } norm_x_e = W_norm( norm_64 ); @@ -1110,13 +1112,14 @@ static void biDiagonalReductionRight_64( Word16 iCh, jCh; Word32 norm_x; Word16 norm_x_e; + Word16 norm_x_e0; Word64 norm_64; Word16 idx; #define HEADROOM_RIGHT_1 2 -#define HEADROOM_RIGHT_2 12 -#define HEADROOM_RIGHT_3 14 -#define HEADROOM_RIGHT_4 15 +#define HEADROOM_RIGHT_2 ( 16 - norm_x_e0 / 4 ) //12 +#define HEADROOM_RIGHT_3 ( 16 - norm_x_e0 / 4 ) //14 +#define HEADROOM_RIGHT_4 ( 16 - norm_x_e0 / 4 ) //15 ( *g ) = 0; @@ -1135,6 +1138,7 @@ static void biDiagonalReductionRight_64( norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) ); } norm_x_e = W_norm( norm_64 ); + norm_x_e0 = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); move16(); @@ -1183,8 +1187,8 @@ static void biDiagonalReductionRight_64( move64(); FOR( jCh = idx; jCh < nChannelsC; jCh++ ) { - factor1 = W_extract_l( W_shr( singularVectors_Left_64[iCh][jCh], HEADROOM_RIGHT_3 ) ); - factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_3 ) ); + factor1 = W_extract_h( W_shl( singularVectors_Left_64[iCh][jCh], 32 - HEADROOM_RIGHT_3 ) ); + factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - HEADROOM_RIGHT_3 ) ); norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) ); } -- GitLab From 31a19c209d4853f6839629a9166cd2d3fba5fae0 Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Thu, 24 Jul 2025 12:45:04 +0200 Subject: [PATCH 15/33] applied the clang patch. --- lib_dec/ivas_svd_dec_fx.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index 1cae38228..78d0b90a3 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -1004,9 +1004,9 @@ static void biDiagonalReductionLeft_64( { #define HEADROOM_LEFT_1 2 -#define HEADROOM_LEFT_2 ( 16 - norm_x_e0 / 4 ) // 12 -#define HEADROOM_LEFT_3 ( 16 - norm_x_e0 / 4 ) // 14 -#define HEADROOM_LEFT_4 ( 16 - norm_x_e0 / 4 ) // 15 +#define HEADROOM_LEFT_2 ( 16 - norm_x_e0 / 4 ) // 12 +#define HEADROOM_LEFT_3 ( 16 - norm_x_e0 / 4 ) // 14 +#define HEADROOM_LEFT_4 ( 16 - norm_x_e0 / 4 ) // 15 Word16 iCh, jCh; Word32 norm_x; @@ -1117,9 +1117,9 @@ static void biDiagonalReductionRight_64( Word16 idx; #define HEADROOM_RIGHT_1 2 -#define HEADROOM_RIGHT_2 ( 16 - norm_x_e0 / 4 ) //12 -#define HEADROOM_RIGHT_3 ( 16 - norm_x_e0 / 4 ) //14 -#define HEADROOM_RIGHT_4 ( 16 - norm_x_e0 / 4 ) //15 +#define HEADROOM_RIGHT_2 ( 16 - norm_x_e0 / 4 ) // 12 +#define HEADROOM_RIGHT_3 ( 16 - norm_x_e0 / 4 ) // 14 +#define HEADROOM_RIGHT_4 ( 16 - norm_x_e0 / 4 ) // 15 ( *g ) = 0; -- GitLab From d0617a4faf48227ad21e7f75477efe45b9671479 Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Mon, 28 Jul 2025 09:10:01 +0200 Subject: [PATCH 16/33] clean up of the "magic shift" definition. --- lib_dec/ivas_svd_dec_fx.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index 78d0b90a3..42edc1c11 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -1004,9 +1004,9 @@ static void biDiagonalReductionLeft_64( { #define HEADROOM_LEFT_1 2 -#define HEADROOM_LEFT_2 ( 16 - norm_x_e0 / 4 ) // 12 -#define HEADROOM_LEFT_3 ( 16 - norm_x_e0 / 4 ) // 14 -#define HEADROOM_LEFT_4 ( 16 - norm_x_e0 / 4 ) // 15 +#define HEADROOM_LEFT_2 ( 16 - norm_x_e0 / 4 ) // 10 +#define HEADROOM_LEFT_3 ( 16 - norm_x_e0 / 4 ) // 4 +#define HEADROOM_LEFT_4 ( 16 - norm_x_e0 / 4 ) // 14 Word16 iCh, jCh; Word32 norm_x; @@ -1085,7 +1085,7 @@ static void biDiagonalReductionLeft_64( norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); f = Mpy_32_32( norm_x, invVal ); - magic_shift = norm_x_e - r_e + ( 30 + 2 * HEADROOM_LEFT_1 - 2 * HEADROOM_LEFT_3 - HEADROOM_LEFT_4 ); + magic_shift = ( norm_x_e - 2 * HEADROOM_LEFT_3 ) - ( r_e - HEADROOM_LEFT_1 ) + ( 32 - HEADROOM_LEFT_4 ); FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) { factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - HEADROOM_LEFT_4 ) ); @@ -1117,9 +1117,9 @@ static void biDiagonalReductionRight_64( Word16 idx; #define HEADROOM_RIGHT_1 2 -#define HEADROOM_RIGHT_2 ( 16 - norm_x_e0 / 4 ) // 12 -#define HEADROOM_RIGHT_3 ( 16 - norm_x_e0 / 4 ) // 14 -#define HEADROOM_RIGHT_4 ( 16 - norm_x_e0 / 4 ) // 15 +#define HEADROOM_RIGHT_2 ( 16 - norm_x_e0 / 4 ) // 10 +#define HEADROOM_RIGHT_3 ( 16 - norm_x_e0 / 4 ) // 4 +#define HEADROOM_RIGHT_4 ( 16 - norm_x_e0 / 4 ) // 14 ( *g ) = 0; @@ -1195,7 +1195,7 @@ static void biDiagonalReductionRight_64( norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); f = Mpy_32_32( norm_x, invVal ); - magic_shift = norm_x_e - r_e + ( 30 + 2 * HEADROOM_RIGHT_1 - 2 * HEADROOM_RIGHT_3 - HEADROOM_RIGHT_4 ); + magic_shift = ( norm_x_e - 2 * HEADROOM_RIGHT_3 ) - ( r_e - HEADROOM_RIGHT_1 ) + ( 32 - HEADROOM_RIGHT_4 ); FOR( jCh = idx; jCh < nChannelsC; jCh++ ) { -- GitLab From e8cef013595da88370196c3fd74e44b3f3e1a93e Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Tue, 29 Jul 2025 17:29:06 +0200 Subject: [PATCH 17/33] added invVal_e to the magic_shift. --- lib_dec/ivas_svd_dec_fx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index 42edc1c11..d40ec0f33 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -901,7 +901,7 @@ static void HouseholderReduction_fx( singularVectors_Left_64[jCh][iCh] = W_shr( W_deposit32_h( singularVectors_Left_fx[jCh][iCh] ), 32 ); } } - for ( nCh = 0; nCh < nChannelsC; nCh++ ) + FOR ( nCh = 0; nCh < nChannelsC; nCh++ ) { biDiagonalReductionLeft_64( singularVectors_Left_64, @@ -1085,7 +1085,7 @@ static void biDiagonalReductionLeft_64( norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); f = Mpy_32_32( norm_x, invVal ); - magic_shift = ( norm_x_e - 2 * HEADROOM_LEFT_3 ) - ( r_e - HEADROOM_LEFT_1 ) + ( 32 - HEADROOM_LEFT_4 ); + magic_shift = ( norm_x_e - 2 * HEADROOM_LEFT_3 ) - ( r_e - 2 * HEADROOM_LEFT_1 ) + ( 32 - HEADROOM_LEFT_4 ) - 2 * invVal_e; FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) { factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - HEADROOM_LEFT_4 ) ); @@ -1195,7 +1195,7 @@ static void biDiagonalReductionRight_64( norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); f = Mpy_32_32( norm_x, invVal ); - magic_shift = ( norm_x_e - 2 * HEADROOM_RIGHT_3 ) - ( r_e - HEADROOM_RIGHT_1 ) + ( 32 - HEADROOM_RIGHT_4 ); + magic_shift = ( norm_x_e - 2 * HEADROOM_RIGHT_3 ) - ( r_e - 2 * HEADROOM_RIGHT_1 ) + ( 32 - HEADROOM_RIGHT_4 ) - 2 * invVal_e; FOR( jCh = idx; jCh < nChannelsC; jCh++ ) { -- GitLab From 7d9a7a487a2fd2144ed931e804a59377367df221 Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Tue, 29 Jul 2025 17:35:02 +0200 Subject: [PATCH 18/33] applied the clang patch --- lib_dec/ivas_svd_dec_fx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index d40ec0f33..302bdb32a 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -901,7 +901,7 @@ static void HouseholderReduction_fx( singularVectors_Left_64[jCh][iCh] = W_shr( W_deposit32_h( singularVectors_Left_fx[jCh][iCh] ), 32 ); } } - FOR ( nCh = 0; nCh < nChannelsC; nCh++ ) + FOR( nCh = 0; nCh < nChannelsC; nCh++ ) { biDiagonalReductionLeft_64( singularVectors_Left_64, @@ -1117,9 +1117,9 @@ static void biDiagonalReductionRight_64( Word16 idx; #define HEADROOM_RIGHT_1 2 -#define HEADROOM_RIGHT_2 ( 16 - norm_x_e0 / 4 ) // 10 -#define HEADROOM_RIGHT_3 ( 16 - norm_x_e0 / 4 ) // 4 -#define HEADROOM_RIGHT_4 ( 16 - norm_x_e0 / 4 ) // 14 +#define HEADROOM_RIGHT_2 ( 16 - norm_x_e0 / 4 ) // 10 +#define HEADROOM_RIGHT_3 ( 16 - norm_x_e0 / 4 ) // 4 +#define HEADROOM_RIGHT_4 ( 16 - norm_x_e0 / 4 ) // 14 ( *g ) = 0; -- GitLab From 617c0edfd995e2b16d6df28d5a3d767a5b179d01 Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Wed, 30 Jul 2025 13:37:20 +0200 Subject: [PATCH 19/33] added comments to track the Q and exponents. --- lib_dec/ivas_svd_dec_fx.c | 74 +++++++++++++++++++-------------------- 1 file changed, 36 insertions(+), 38 deletions(-) diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index 302bdb32a..9b92d6a57 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -995,7 +995,7 @@ static void HouseholderReduction_fx( *-------------------------------------------------------------------------*/ static void biDiagonalReductionLeft_64( - Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], + Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], // q(sing) exp(sing) const Word16 nChannelsL, /* Q0 */ const Word16 nChannelsC, /* Q0 */ const Word16 currChannel, /* Q0 */ @@ -1027,12 +1027,12 @@ static void biDiagonalReductionLeft_64( move64(); FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { - tmp = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_1 ) ); - norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) ); + tmp = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_1 ) ); // q(sing)-H1 // exp(sing)+H1 + norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) ); // q(norm)=2*q(sing)-2*H1 // exp(norm)=2*exp(sing)+2*H1 } - norm_x_e = W_norm( norm_64 ); + norm_x_e = W_norm( norm_64 ); norm_x_e0 = W_norm( norm_64 ); - norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); + norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // q(norm_x)=32-exp(norm_x) exp(norm_x)=exp(norm)-32 } IF( norm_x ) { @@ -1040,51 +1040,50 @@ static void biDiagonalReductionLeft_64( Word16 tmp_e; Word64 tmpmul; - Word64 r_64; // = sqrt(norm)*singularVectors_Left_64[currChannel][currChannel]-norm OR -sqrt(norm)*singularVectors_Left_64[currChannel][currChannel]-norm + Word64 r_64; Word32 r, invVal; Word16 r_e, invVal_e; - ( *g_e ) = add( sub( ( HEADROOM_LEFT_1 + HEADROOM_LEFT_1 ), norm_x_e ), 1 ); + ( *g_e ) = add( sub( add( HEADROOM_LEFT_1, HEADROOM_LEFT_1 ), norm_x_e ), 1 ); // exp(g)=(2*H1-exp(norm_x)+1) move16(); - ( *g ) = Sqrt32( norm_x, g_e ); + ( *g ) = Sqrt32( norm_x, g_e ); // --> exp(g)=((2*H1-exp(norm_x)+1)/2) IF( GE_64( singularVectors_Left_64[currChannel][currChannel], 0 ) ) { ( *g ) = L_negate( *g ); } - factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], HEADROOM_LEFT_2 ) ); - tmp_e = sub( 2 * HEADROOM_LEFT_1 - HEADROOM_LEFT_2, ( *g_e ) ); - tmpmul = W_mult0_32_32( ( *g ), factor2 ); - tmpmul = W_shr( tmpmul, tmp_e ); - r_64 = W_sub( tmpmul, norm_64 ); + factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], HEADROOM_LEFT_2 ) ); // q(factor2)=q(sing)-H2 exp(factor2)=exp(qsing)+H2 + tmp_e = sub( 2 * HEADROOM_LEFT_1 - HEADROOM_LEFT_2, ( *g_e ) ); + tmpmul = W_mult0_32_32( ( *g ), factor2 ); // q(tmpmul)=q(g)+q(factor2) + tmpmul = W_shr( tmpmul, tmp_e ); // --> q(tmpmul)=q(g)+q(factor2)-(2*H1-H2-q(g)) + r_64 = W_sub( tmpmul, norm_64 ); // q(r_64)=max(q(tmpmul),q(norm)) r_e = W_norm( r_64 ); r = W_extract_h( W_shl( r_64, r_e ) ); invVal_e = 0; move16(); - invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); + invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); // invVal=1/r --> q(invVal)=-q(r) tmp_e = sub( 32, *g_e ); - singularVectors_Left_64[currChannel][currChannel] = W_sub( singularVectors_Left_64[currChannel][currChannel], W_shr( W_deposit32_h( *g ), tmp_e ) ); // here, the exponent goes up. + singularVectors_Left_64[currChannel][currChannel] = W_sub( singularVectors_Left_64[currChannel][currChannel], W_shr( W_deposit32_h( *g ), tmp_e ) ); // q(sing)=max(q(sing),q(r)-(2*H1-H2-exp(r)) FOR( iCh = add( currChannel, 1 ); iCh < nChannelsC; iCh++ ) { Word32 factor1; Word32 factor2; Word32 f; // = norm / r - // Word16 f_e; // not really needed Word16 magic_shift; norm_64 = 0; for ( jCh = currChannel; jCh < nChannelsL; jCh++ ) { - factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - HEADROOM_LEFT_3 ) ); - factor2 = W_extract_h( W_shl( singularVectors_Left_64[jCh][iCh], 32 - HEADROOM_LEFT_3 ) ); - norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) ); + factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - HEADROOM_LEFT_3 ) ); // q(factor1) = q(sing)-H3 + factor2 = W_extract_h( W_shl( singularVectors_Left_64[jCh][iCh], 32 - HEADROOM_LEFT_3 ) ); // q(factor2) = q(sing)-H3 + norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) ); // q(norm)=2*q(sing)-2*H3 } norm_x_e = W_norm( norm_64 ); - norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); - f = Mpy_32_32( norm_x, invVal ); + norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // Note: different norm + f = Mpy_32_32( norm_x, invVal ); // q(f)=q(norm_x)-q(invVal) magic_shift = ( norm_x_e - 2 * HEADROOM_LEFT_3 ) - ( r_e - 2 * HEADROOM_LEFT_1 ) + ( 32 - HEADROOM_LEFT_4 ) - 2 * invVal_e; FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) { @@ -1134,12 +1133,12 @@ static void biDiagonalReductionRight_64( FOR( jCh = idx; jCh < nChannelsC; jCh++ ) { Word32 tmp; - tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_1 ) ); - norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) ); + tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_1 ) ); // q(sing)-H1 // exp(sing)+H1 + norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) ); // q(norm)=2*q(sing)-2*H1 // exp(norm)=2*exp(sing)+2*H1 } norm_x_e = W_norm( norm_64 ); norm_x_e0 = W_norm( norm_64 ); - norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); + norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // q(norm_x)=32-exp(norm_x) exp(norm_x)=exp(norm)-32 move16(); IF( norm_x ) @@ -1156,29 +1155,28 @@ static void biDiagonalReductionRight_64( Word32 invVal; Word16 invVal_e; - ( *g_e ) = add( sub( ( HEADROOM_RIGHT_1 + HEADROOM_RIGHT_1 ), norm_x_e ), 1 ); + ( *g_e ) = add( sub( ( HEADROOM_RIGHT_1 + HEADROOM_RIGHT_1 ), norm_x_e ), 1 ); // exp(g)=(2*H1-exp(norm_x)+1) move16(); - ( *g ) = Sqrt32( norm_x, g_e ); + ( *g ) = Sqrt32( norm_x, g_e ); // --> exp(g)=((2*H1-exp(norm_x)+1)/2) IF( GE_64( singularVectors_Left_64[currChannel][idx], 0 ) ) { ( *g ) = L_negate( *g ); } move32(); move16(); - factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], HEADROOM_RIGHT_2 ) ); + factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], HEADROOM_RIGHT_2 ) ); // q(factor2)=q(sing)-H2 exp(factor2)=exp(qsing)+H2 tmp_e = sub( 2 * HEADROOM_RIGHT_1 - HEADROOM_RIGHT_2, *g_e ); - tmpmul = W_mult0_32_32( *g, factor2 ); - tmpmul = W_shr( tmpmul, tmp_e ); - r_64 = W_sub( tmpmul, norm_64 ); + tmpmul = W_mult0_32_32( *g, factor2 ); // q(tmpmul)=q(g)+q(factor2) + tmpmul = W_shr( tmpmul, tmp_e ); // --> q(tmpmul)=q(g)+q(factor2)-(2*H1-H2-q(g)) + r_64 = W_sub( tmpmul, norm_64 ); // q(r_64)=max(q(tmpmul),q(norm)) r_e = W_norm( r_64 ); r = W_extract_h( W_shl( r_64, r_e ) ); invVal_e = 0; move16(); - invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); - + invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); // invVal=1/r --> q(invVal)=-q(r) tmp_e = sub( 32, *g_e ); - singularVectors_Left_64[currChannel][idx] = W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( *g ), tmp_e ) ); // here, the exponent goes up + singularVectors_Left_64[currChannel][idx] = W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( *g ), tmp_e ) ); // q(sing)=max(q(sing),q(r)-(2*H1-H2-exp(r))) FOR( iCh = idx; iCh < nChannelsL; iCh++ ) { @@ -1187,14 +1185,14 @@ static void biDiagonalReductionRight_64( move64(); FOR( jCh = idx; jCh < nChannelsC; jCh++ ) { - factor1 = W_extract_h( W_shl( singularVectors_Left_64[iCh][jCh], 32 - HEADROOM_RIGHT_3 ) ); - factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - HEADROOM_RIGHT_3 ) ); - norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) ); + factor1 = W_extract_h( W_shl( singularVectors_Left_64[iCh][jCh], 32 - HEADROOM_RIGHT_3 ) ); // q(factor1) = q(sing)-H3 + factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - HEADROOM_RIGHT_3 ) ); // q(factor2) = q(sing)-H3 + norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) ); // q(norm)=2*q(sing)-2*H3 } norm_x_e = W_norm( norm_64 ); - norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); - f = Mpy_32_32( norm_x, invVal ); + norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // Note: different norm + f = Mpy_32_32( norm_x, invVal ); // q(f)=q(norm_x)-q(invVal) magic_shift = ( norm_x_e - 2 * HEADROOM_RIGHT_3 ) - ( r_e - 2 * HEADROOM_RIGHT_1 ) + ( 32 - HEADROOM_RIGHT_4 ) - 2 * invVal_e; FOR( jCh = idx; jCh < nChannelsC; jCh++ ) -- GitLab From 8cd5a1cec0f1ea81ab45f6ab534541b1b630f44a Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Wed, 30 Jul 2025 17:23:27 +0200 Subject: [PATCH 20/33] renamed the macros for the headroom shifts. --- lib_dec/ivas_svd_dec_fx.c | 48 ++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index 9b92d6a57..384f05f09 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -1002,11 +1002,11 @@ static void biDiagonalReductionLeft_64( Word32 *g, Word16 *g_e ) { - -#define HEADROOM_LEFT_1 2 -#define HEADROOM_LEFT_2 ( 16 - norm_x_e0 / 4 ) // 10 -#define HEADROOM_LEFT_3 ( 16 - norm_x_e0 / 4 ) // 4 -#define HEADROOM_LEFT_4 ( 16 - norm_x_e0 / 4 ) // 14 +/* TODO: For some reason, this is optimal. But why? why not ( 32 - 2 * MAGIC_HEADROOM_1 - norm_x_e0 + 1 ) , for example? */ +#define MAGIC_HEADROOM_1 2 +#define MAGIC_HEADROOM_2 ( 16 - norm_x_e0 / 4 ) +#define MAGIC_HEADROOM_3 ( 16 - norm_x_e0 / 4 ) +#define MAGIC_HEADROOM_4 ( 16 - norm_x_e0 / 4 ) Word16 iCh, jCh; Word32 norm_x; @@ -1027,7 +1027,7 @@ static void biDiagonalReductionLeft_64( move64(); FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { - tmp = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_1 ) ); // q(sing)-H1 // exp(sing)+H1 + tmp = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], MAGIC_HEADROOM_1 ) ); // q(sing)-H1 // exp(sing)+H1 norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) ); // q(norm)=2*q(sing)-2*H1 // exp(norm)=2*exp(sing)+2*H1 } norm_x_e = W_norm( norm_64 ); @@ -1044,16 +1044,16 @@ static void biDiagonalReductionLeft_64( Word32 r, invVal; Word16 r_e, invVal_e; - ( *g_e ) = add( sub( add( HEADROOM_LEFT_1, HEADROOM_LEFT_1 ), norm_x_e ), 1 ); // exp(g)=(2*H1-exp(norm_x)+1) + ( *g_e ) = add( sub( add( MAGIC_HEADROOM_1, MAGIC_HEADROOM_1 ), norm_x_e ), 1 ); // exp(g)=(2*H1-exp(norm_x)+1) move16(); ( *g ) = Sqrt32( norm_x, g_e ); // --> exp(g)=((2*H1-exp(norm_x)+1)/2) IF( GE_64( singularVectors_Left_64[currChannel][currChannel], 0 ) ) { ( *g ) = L_negate( *g ); } - factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], HEADROOM_LEFT_2 ) ); // q(factor2)=q(sing)-H2 exp(factor2)=exp(qsing)+H2 - tmp_e = sub( 2 * HEADROOM_LEFT_1 - HEADROOM_LEFT_2, ( *g_e ) ); - tmpmul = W_mult0_32_32( ( *g ), factor2 ); // q(tmpmul)=q(g)+q(factor2) + factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], MAGIC_HEADROOM_2 ) ); // q(factor2)=q(sing)-H2 exp(factor2)=exp(qsing)+H2 + tmp_e = sub( 2 * MAGIC_HEADROOM_1 - MAGIC_HEADROOM_2, ( *g_e ) ); + tmpmul = W_mult0_32_32( ( *g ), factor2 ); // q(tmpmul)=q(g)+q(factor2) --> q(tmpmul) ~= q(norm) tmpmul = W_shr( tmpmul, tmp_e ); // --> q(tmpmul)=q(g)+q(factor2)-(2*H1-H2-q(g)) r_64 = W_sub( tmpmul, norm_64 ); // q(r_64)=max(q(tmpmul),q(norm)) r_e = W_norm( r_64 ); @@ -1077,17 +1077,17 @@ static void biDiagonalReductionLeft_64( norm_64 = 0; for ( jCh = currChannel; jCh < nChannelsL; jCh++ ) { - factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - HEADROOM_LEFT_3 ) ); // q(factor1) = q(sing)-H3 - factor2 = W_extract_h( W_shl( singularVectors_Left_64[jCh][iCh], 32 - HEADROOM_LEFT_3 ) ); // q(factor2) = q(sing)-H3 + factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - MAGIC_HEADROOM_3 ) ); // q(factor1) = q(sing)-H3 + factor2 = W_extract_h( W_shl( singularVectors_Left_64[jCh][iCh], 32 - MAGIC_HEADROOM_3 ) ); // q(factor2) = q(sing)-H3 norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) ); // q(norm)=2*q(sing)-2*H3 } norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // Note: different norm f = Mpy_32_32( norm_x, invVal ); // q(f)=q(norm_x)-q(invVal) - magic_shift = ( norm_x_e - 2 * HEADROOM_LEFT_3 ) - ( r_e - 2 * HEADROOM_LEFT_1 ) + ( 32 - HEADROOM_LEFT_4 ) - 2 * invVal_e; + magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e; FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) { - factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - HEADROOM_LEFT_4 ) ); + factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - MAGIC_HEADROOM_4 ) ); singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr( W_mult0_32_32( f, factor1 ), magic_shift ) ); } } @@ -1115,10 +1115,6 @@ static void biDiagonalReductionRight_64( Word64 norm_64; Word16 idx; -#define HEADROOM_RIGHT_1 2 -#define HEADROOM_RIGHT_2 ( 16 - norm_x_e0 / 4 ) // 10 -#define HEADROOM_RIGHT_3 ( 16 - norm_x_e0 / 4 ) // 4 -#define HEADROOM_RIGHT_4 ( 16 - norm_x_e0 / 4 ) // 14 ( *g ) = 0; @@ -1133,7 +1129,7 @@ static void biDiagonalReductionRight_64( FOR( jCh = idx; jCh < nChannelsC; jCh++ ) { Word32 tmp; - tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_1 ) ); // q(sing)-H1 // exp(sing)+H1 + tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], MAGIC_HEADROOM_1 ) ); // q(sing)-H1 // exp(sing)+H1 norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) ); // q(norm)=2*q(sing)-2*H1 // exp(norm)=2*exp(sing)+2*H1 } norm_x_e = W_norm( norm_64 ); @@ -1155,7 +1151,7 @@ static void biDiagonalReductionRight_64( Word32 invVal; Word16 invVal_e; - ( *g_e ) = add( sub( ( HEADROOM_RIGHT_1 + HEADROOM_RIGHT_1 ), norm_x_e ), 1 ); // exp(g)=(2*H1-exp(norm_x)+1) + ( *g_e ) = add( sub( ( MAGIC_HEADROOM_1 + MAGIC_HEADROOM_1 ), norm_x_e ), 1 ); // exp(g)=(2*H1-exp(norm_x)+1) move16(); ( *g ) = Sqrt32( norm_x, g_e ); // --> exp(g)=((2*H1-exp(norm_x)+1)/2) IF( GE_64( singularVectors_Left_64[currChannel][idx], 0 ) ) @@ -1164,8 +1160,8 @@ static void biDiagonalReductionRight_64( } move32(); move16(); - factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], HEADROOM_RIGHT_2 ) ); // q(factor2)=q(sing)-H2 exp(factor2)=exp(qsing)+H2 - tmp_e = sub( 2 * HEADROOM_RIGHT_1 - HEADROOM_RIGHT_2, *g_e ); + factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], MAGIC_HEADROOM_2 ) ); // q(factor2)=q(sing)-H2 exp(factor2)=exp(qsing)+H2 + tmp_e = sub( 2 * MAGIC_HEADROOM_1 - MAGIC_HEADROOM_2, *g_e ); tmpmul = W_mult0_32_32( *g, factor2 ); // q(tmpmul)=q(g)+q(factor2) tmpmul = W_shr( tmpmul, tmp_e ); // --> q(tmpmul)=q(g)+q(factor2)-(2*H1-H2-q(g)) r_64 = W_sub( tmpmul, norm_64 ); // q(r_64)=max(q(tmpmul),q(norm)) @@ -1185,19 +1181,19 @@ static void biDiagonalReductionRight_64( move64(); FOR( jCh = idx; jCh < nChannelsC; jCh++ ) { - factor1 = W_extract_h( W_shl( singularVectors_Left_64[iCh][jCh], 32 - HEADROOM_RIGHT_3 ) ); // q(factor1) = q(sing)-H3 - factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - HEADROOM_RIGHT_3 ) ); // q(factor2) = q(sing)-H3 + factor1 = W_extract_h( W_shl( singularVectors_Left_64[iCh][jCh], 32 - MAGIC_HEADROOM_3 ) ); // q(factor1) = q(sing)-H3 + factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - MAGIC_HEADROOM_3 ) ); // q(factor2) = q(sing)-H3 norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) ); // q(norm)=2*q(sing)-2*H3 } norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // Note: different norm f = Mpy_32_32( norm_x, invVal ); // q(f)=q(norm_x)-q(invVal) - magic_shift = ( norm_x_e - 2 * HEADROOM_RIGHT_3 ) - ( r_e - 2 * HEADROOM_RIGHT_1 ) + ( 32 - HEADROOM_RIGHT_4 ) - 2 * invVal_e; + magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e; FOR( jCh = idx; jCh < nChannelsC; jCh++ ) { - factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - HEADROOM_RIGHT_4 ) ); + factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - MAGIC_HEADROOM_4 ) ); singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2 ), magic_shift ) ); } } -- GitLab From 31bfe505634537016b63f249c21500b8c5ba7906 Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Wed, 30 Jul 2025 17:31:29 +0200 Subject: [PATCH 21/33] applied the clang patch --- lib_dec/ivas_svd_dec_fx.c | 77 +++++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 39 deletions(-) diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index 384f05f09..1c5eec520 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -995,10 +995,10 @@ static void HouseholderReduction_fx( *-------------------------------------------------------------------------*/ static void biDiagonalReductionLeft_64( - Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], // q(sing) exp(sing) - const Word16 nChannelsL, /* Q0 */ - const Word16 nChannelsC, /* Q0 */ - const Word16 currChannel, /* Q0 */ + Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], // q(sing) exp(sing) + const Word16 nChannelsL, /* Q0 */ + const Word16 nChannelsC, /* Q0 */ + const Word16 currChannel, /* Q0 */ Word32 *g, Word16 *g_e ) { @@ -1027,12 +1027,12 @@ static void biDiagonalReductionLeft_64( move64(); FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { - tmp = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], MAGIC_HEADROOM_1 ) ); // q(sing)-H1 // exp(sing)+H1 - norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) ); // q(norm)=2*q(sing)-2*H1 // exp(norm)=2*exp(sing)+2*H1 + tmp = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], MAGIC_HEADROOM_1 ) ); // q(sing)-H1 // exp(sing)+H1 + norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) ); // q(norm)=2*q(sing)-2*H1 // exp(norm)=2*exp(sing)+2*H1 } - norm_x_e = W_norm( norm_64 ); + norm_x_e = W_norm( norm_64 ); norm_x_e0 = W_norm( norm_64 ); - norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // q(norm_x)=32-exp(norm_x) exp(norm_x)=exp(norm)-32 + norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // q(norm_x)=32-exp(norm_x) exp(norm_x)=exp(norm)-32 } IF( norm_x ) { @@ -1040,28 +1040,28 @@ static void biDiagonalReductionLeft_64( Word16 tmp_e; Word64 tmpmul; - Word64 r_64; + Word64 r_64; Word32 r, invVal; Word16 r_e, invVal_e; - ( *g_e ) = add( sub( add( MAGIC_HEADROOM_1, MAGIC_HEADROOM_1 ), norm_x_e ), 1 ); // exp(g)=(2*H1-exp(norm_x)+1) + ( *g_e ) = add( sub( add( MAGIC_HEADROOM_1, MAGIC_HEADROOM_1 ), norm_x_e ), 1 ); // exp(g)=(2*H1-exp(norm_x)+1) move16(); - ( *g ) = Sqrt32( norm_x, g_e ); // --> exp(g)=((2*H1-exp(norm_x)+1)/2) + ( *g ) = Sqrt32( norm_x, g_e ); // --> exp(g)=((2*H1-exp(norm_x)+1)/2) IF( GE_64( singularVectors_Left_64[currChannel][currChannel], 0 ) ) { ( *g ) = L_negate( *g ); } - factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], MAGIC_HEADROOM_2 ) ); // q(factor2)=q(sing)-H2 exp(factor2)=exp(qsing)+H2 - tmp_e = sub( 2 * MAGIC_HEADROOM_1 - MAGIC_HEADROOM_2, ( *g_e ) ); - tmpmul = W_mult0_32_32( ( *g ), factor2 ); // q(tmpmul)=q(g)+q(factor2) --> q(tmpmul) ~= q(norm) - tmpmul = W_shr( tmpmul, tmp_e ); // --> q(tmpmul)=q(g)+q(factor2)-(2*H1-H2-q(g)) - r_64 = W_sub( tmpmul, norm_64 ); // q(r_64)=max(q(tmpmul),q(norm)) + factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], MAGIC_HEADROOM_2 ) ); // q(factor2)=q(sing)-H2 exp(factor2)=exp(qsing)+H2 + tmp_e = sub( 2 * MAGIC_HEADROOM_1 - MAGIC_HEADROOM_2, ( *g_e ) ); + tmpmul = W_mult0_32_32( ( *g ), factor2 ); // q(tmpmul)=q(g)+q(factor2) --> q(tmpmul) ~= q(norm) + tmpmul = W_shr( tmpmul, tmp_e ); // --> q(tmpmul)=q(g)+q(factor2)-(2*H1-H2-q(g)) + r_64 = W_sub( tmpmul, norm_64 ); // q(r_64)=max(q(tmpmul),q(norm)) r_e = W_norm( r_64 ); r = W_extract_h( W_shl( r_64, r_e ) ); invVal_e = 0; move16(); - invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); // invVal=1/r --> q(invVal)=-q(r) + invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); // invVal=1/r --> q(invVal)=-q(r) tmp_e = sub( 32, *g_e ); @@ -1077,13 +1077,13 @@ static void biDiagonalReductionLeft_64( norm_64 = 0; for ( jCh = currChannel; jCh < nChannelsL; jCh++ ) { - factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - MAGIC_HEADROOM_3 ) ); // q(factor1) = q(sing)-H3 - factor2 = W_extract_h( W_shl( singularVectors_Left_64[jCh][iCh], 32 - MAGIC_HEADROOM_3 ) ); // q(factor2) = q(sing)-H3 - norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) ); // q(norm)=2*q(sing)-2*H3 + factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - MAGIC_HEADROOM_3 ) ); // q(factor1) = q(sing)-H3 + factor2 = W_extract_h( W_shl( singularVectors_Left_64[jCh][iCh], 32 - MAGIC_HEADROOM_3 ) ); // q(factor2) = q(sing)-H3 + norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) ); // q(norm)=2*q(sing)-2*H3 } norm_x_e = W_norm( norm_64 ); - norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // Note: different norm - f = Mpy_32_32( norm_x, invVal ); // q(f)=q(norm_x)-q(invVal) + norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // Note: different norm + f = Mpy_32_32( norm_x, invVal ); // q(f)=q(norm_x)-q(invVal) magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e; FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) { @@ -1116,7 +1116,6 @@ static void biDiagonalReductionRight_64( Word16 idx; - ( *g ) = 0; ( *g_e ) = 0; move32(); @@ -1129,12 +1128,12 @@ static void biDiagonalReductionRight_64( FOR( jCh = idx; jCh < nChannelsC; jCh++ ) { Word32 tmp; - tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], MAGIC_HEADROOM_1 ) ); // q(sing)-H1 // exp(sing)+H1 - norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) ); // q(norm)=2*q(sing)-2*H1 // exp(norm)=2*exp(sing)+2*H1 + tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], MAGIC_HEADROOM_1 ) ); // q(sing)-H1 // exp(sing)+H1 + norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) ); // q(norm)=2*q(sing)-2*H1 // exp(norm)=2*exp(sing)+2*H1 } norm_x_e = W_norm( norm_64 ); norm_x_e0 = W_norm( norm_64 ); - norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // q(norm_x)=32-exp(norm_x) exp(norm_x)=exp(norm)-32 + norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // q(norm_x)=32-exp(norm_x) exp(norm_x)=exp(norm)-32 move16(); IF( norm_x ) @@ -1151,28 +1150,28 @@ static void biDiagonalReductionRight_64( Word32 invVal; Word16 invVal_e; - ( *g_e ) = add( sub( ( MAGIC_HEADROOM_1 + MAGIC_HEADROOM_1 ), norm_x_e ), 1 ); // exp(g)=(2*H1-exp(norm_x)+1) + ( *g_e ) = add( sub( ( MAGIC_HEADROOM_1 + MAGIC_HEADROOM_1 ), norm_x_e ), 1 ); // exp(g)=(2*H1-exp(norm_x)+1) move16(); - ( *g ) = Sqrt32( norm_x, g_e ); // --> exp(g)=((2*H1-exp(norm_x)+1)/2) + ( *g ) = Sqrt32( norm_x, g_e ); // --> exp(g)=((2*H1-exp(norm_x)+1)/2) IF( GE_64( singularVectors_Left_64[currChannel][idx], 0 ) ) { ( *g ) = L_negate( *g ); } move32(); move16(); - factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], MAGIC_HEADROOM_2 ) ); // q(factor2)=q(sing)-H2 exp(factor2)=exp(qsing)+H2 + factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], MAGIC_HEADROOM_2 ) ); // q(factor2)=q(sing)-H2 exp(factor2)=exp(qsing)+H2 tmp_e = sub( 2 * MAGIC_HEADROOM_1 - MAGIC_HEADROOM_2, *g_e ); - tmpmul = W_mult0_32_32( *g, factor2 ); // q(tmpmul)=q(g)+q(factor2) - tmpmul = W_shr( tmpmul, tmp_e ); // --> q(tmpmul)=q(g)+q(factor2)-(2*H1-H2-q(g)) - r_64 = W_sub( tmpmul, norm_64 ); // q(r_64)=max(q(tmpmul),q(norm)) + tmpmul = W_mult0_32_32( *g, factor2 ); // q(tmpmul)=q(g)+q(factor2) + tmpmul = W_shr( tmpmul, tmp_e ); // --> q(tmpmul)=q(g)+q(factor2)-(2*H1-H2-q(g)) + r_64 = W_sub( tmpmul, norm_64 ); // q(r_64)=max(q(tmpmul),q(norm)) r_e = W_norm( r_64 ); r = W_extract_h( W_shl( r_64, r_e ) ); invVal_e = 0; move16(); - invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); // invVal=1/r --> q(invVal)=-q(r) + invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); // invVal=1/r --> q(invVal)=-q(r) tmp_e = sub( 32, *g_e ); - singularVectors_Left_64[currChannel][idx] = W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( *g ), tmp_e ) ); // q(sing)=max(q(sing),q(r)-(2*H1-H2-exp(r))) + singularVectors_Left_64[currChannel][idx] = W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( *g ), tmp_e ) ); // q(sing)=max(q(sing),q(r)-(2*H1-H2-exp(r))) FOR( iCh = idx; iCh < nChannelsL; iCh++ ) { @@ -1181,14 +1180,14 @@ static void biDiagonalReductionRight_64( move64(); FOR( jCh = idx; jCh < nChannelsC; jCh++ ) { - factor1 = W_extract_h( W_shl( singularVectors_Left_64[iCh][jCh], 32 - MAGIC_HEADROOM_3 ) ); // q(factor1) = q(sing)-H3 - factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - MAGIC_HEADROOM_3 ) ); // q(factor2) = q(sing)-H3 - norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) ); // q(norm)=2*q(sing)-2*H3 + factor1 = W_extract_h( W_shl( singularVectors_Left_64[iCh][jCh], 32 - MAGIC_HEADROOM_3 ) ); // q(factor1) = q(sing)-H3 + factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - MAGIC_HEADROOM_3 ) ); // q(factor2) = q(sing)-H3 + norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) ); // q(norm)=2*q(sing)-2*H3 } norm_x_e = W_norm( norm_64 ); - norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // Note: different norm - f = Mpy_32_32( norm_x, invVal ); // q(f)=q(norm_x)-q(invVal) + norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // Note: different norm + f = Mpy_32_32( norm_x, invVal ); // q(f)=q(norm_x)-q(invVal) magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e; FOR( jCh = idx; jCh < nChannelsC; jCh++ ) -- GitLab From 8e5931aa10aff5ee562e97c1eba9604698f590e1 Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Fri, 1 Aug 2025 09:49:10 +0200 Subject: [PATCH 22/33] biDiagonalReductionLeft_64() and biDiagonalReductionRight_64() are ready for a non-draft merge request. --- lib_dec/ivas_svd_dec_fx.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index 1c5eec520..7a408b396 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -1004,9 +1004,9 @@ static void biDiagonalReductionLeft_64( { /* TODO: For some reason, this is optimal. But why? why not ( 32 - 2 * MAGIC_HEADROOM_1 - norm_x_e0 + 1 ) , for example? */ #define MAGIC_HEADROOM_1 2 -#define MAGIC_HEADROOM_2 ( 16 - norm_x_e0 / 4 ) -#define MAGIC_HEADROOM_3 ( 16 - norm_x_e0 / 4 ) -#define MAGIC_HEADROOM_4 ( 16 - norm_x_e0 / 4 ) +#define MAGIC_HEADROOM_2 ( sub( 16, shr( norm_x_e0, 2 ) ) ) +#define MAGIC_HEADROOM_3 ( sub( 16, shr( norm_x_e0, 2 ) ) ) +#define MAGIC_HEADROOM_4 ( sub( 16, shr( norm_x_e0, 2 ) ) ) Word16 iCh, jCh; Word32 norm_x; @@ -1077,17 +1077,21 @@ static void biDiagonalReductionLeft_64( norm_64 = 0; for ( jCh = currChannel; jCh < nChannelsL; jCh++ ) { - factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - MAGIC_HEADROOM_3 ) ); // q(factor1) = q(sing)-H3 - factor2 = W_extract_h( W_shl( singularVectors_Left_64[jCh][iCh], 32 - MAGIC_HEADROOM_3 ) ); // q(factor2) = q(sing)-H3 + factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], sub( 32 , MAGIC_HEADROOM_3 ) ) ); // q(factor1) = q(sing)-H3 + factor2 = W_extract_h( W_shl( singularVectors_Left_64[jCh][iCh], sub( 32, MAGIC_HEADROOM_3 ) ) ); // q(factor2) = q(sing)-H3 norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) ); // q(norm)=2*q(sing)-2*H3 } norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // Note: different norm f = Mpy_32_32( norm_x, invVal ); // q(f)=q(norm_x)-q(invVal) - magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e; +// magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e; + magic_shift = sub( norm_x_e, shl( MAGIC_HEADROOM_3, 1) ); + magic_shift = sub( magic_shift, sub( r_e, ( shl( MAGIC_HEADROOM_1, 1 ) ) ) ); + magic_shift = add( magic_shift, sub( 32, MAGIC_HEADROOM_4 ) ); + magic_shift = sub( magic_shift, shl( invVal_e, 1 ) ); FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) { - factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - MAGIC_HEADROOM_4 ) ); + factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], sub( 32, MAGIC_HEADROOM_4 ) ) ); singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr( W_mult0_32_32( f, factor1 ), magic_shift ) ); } } @@ -1180,19 +1184,23 @@ static void biDiagonalReductionRight_64( move64(); FOR( jCh = idx; jCh < nChannelsC; jCh++ ) { - factor1 = W_extract_h( W_shl( singularVectors_Left_64[iCh][jCh], 32 - MAGIC_HEADROOM_3 ) ); // q(factor1) = q(sing)-H3 - factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - MAGIC_HEADROOM_3 ) ); // q(factor2) = q(sing)-H3 + factor1 = W_extract_h( W_shl( singularVectors_Left_64[iCh][jCh], sub( 32, MAGIC_HEADROOM_3 ) ) ); // q(factor1) = q(sing)-H3 + factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], sub( 32, MAGIC_HEADROOM_3 ) ) ); // q(factor2) = q(sing)-H3 norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) ); // q(norm)=2*q(sing)-2*H3 } norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // Note: different norm f = Mpy_32_32( norm_x, invVal ); // q(f)=q(norm_x)-q(invVal) - magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e; + // magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e; + magic_shift = sub( norm_x_e, shl( MAGIC_HEADROOM_3, 1) ); + magic_shift = sub( magic_shift, sub( r_e, ( shl( MAGIC_HEADROOM_1, 1 ) ) ) ); + magic_shift = add( magic_shift, sub( 32, MAGIC_HEADROOM_4 ) ); + magic_shift = sub( magic_shift, shl( invVal_e, 1 ) ); FOR( jCh = idx; jCh < nChannelsC; jCh++ ) { - factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - MAGIC_HEADROOM_4 ) ); + factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], sub( 32, MAGIC_HEADROOM_4 ) ) ); singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2 ), magic_shift ) ); } } -- GitLab From 6256011f886f0d0f8be8ef4fb330e3a72cd0b8ed Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Fri, 1 Aug 2025 10:20:07 +0200 Subject: [PATCH 23/33] applied the clang patch. --- lib_dec/ivas_svd_dec_fx.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index 7a408b396..9b5f905e1 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -1004,15 +1004,18 @@ static void biDiagonalReductionLeft_64( { /* TODO: For some reason, this is optimal. But why? why not ( 32 - 2 * MAGIC_HEADROOM_1 - norm_x_e0 + 1 ) , for example? */ #define MAGIC_HEADROOM_1 2 -#define MAGIC_HEADROOM_2 ( sub( 16, shr( norm_x_e0, 2 ) ) ) -#define MAGIC_HEADROOM_3 ( sub( 16, shr( norm_x_e0, 2 ) ) ) -#define MAGIC_HEADROOM_4 ( sub( 16, shr( norm_x_e0, 2 ) ) ) +//#define MAGIC_HEADROOM_2 ( sub( 16, shr( norm_x_e0, 2 ) ) ) +//#define MAGIC_HEADROOM_3 ( sub( 16, shr( norm_x_e0, 2 ) ) ) +//#define MAGIC_HEADROOM_4 ( sub( 16, shr( norm_x_e0, 2 ) ) ) +#define MAGIC_HEADROOM_2 magic_headroom +#define MAGIC_HEADROOM_3 magic_headroom +#define MAGIC_HEADROOM_4 magic_headroom Word16 iCh, jCh; Word32 norm_x; Word16 norm_x_e; - Word16 norm_x_e0; Word64 norm_64; + Word16 magic_headroom; ( *g ) = 0; ( *g_e ) = 0; move32(); @@ -1031,7 +1034,7 @@ static void biDiagonalReductionLeft_64( norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) ); // q(norm)=2*q(sing)-2*H1 // exp(norm)=2*exp(sing)+2*H1 } norm_x_e = W_norm( norm_64 ); - norm_x_e0 = W_norm( norm_64 ); + magic_headroom = sub( 16, shr( norm_x_e, 2 ) ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // q(norm_x)=32-exp(norm_x) exp(norm_x)=exp(norm)-32 } IF( norm_x ) @@ -1077,15 +1080,15 @@ static void biDiagonalReductionLeft_64( norm_64 = 0; for ( jCh = currChannel; jCh < nChannelsL; jCh++ ) { - factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], sub( 32 , MAGIC_HEADROOM_3 ) ) ); // q(factor1) = q(sing)-H3 + factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], sub( 32, MAGIC_HEADROOM_3 ) ) ); // q(factor1) = q(sing)-H3 factor2 = W_extract_h( W_shl( singularVectors_Left_64[jCh][iCh], sub( 32, MAGIC_HEADROOM_3 ) ) ); // q(factor2) = q(sing)-H3 - norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) ); // q(norm)=2*q(sing)-2*H3 + norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) ); // q(norm)=2*q(sing)-2*H3 } norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // Note: different norm f = Mpy_32_32( norm_x, invVal ); // q(f)=q(norm_x)-q(invVal) -// magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e; - magic_shift = sub( norm_x_e, shl( MAGIC_HEADROOM_3, 1) ); + // magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e; + magic_shift = sub( norm_x_e, shl( MAGIC_HEADROOM_3, 1 ) ); magic_shift = sub( magic_shift, sub( r_e, ( shl( MAGIC_HEADROOM_1, 1 ) ) ) ); magic_shift = add( magic_shift, sub( 32, MAGIC_HEADROOM_4 ) ); magic_shift = sub( magic_shift, shl( invVal_e, 1 ) ); @@ -1115,9 +1118,9 @@ static void biDiagonalReductionRight_64( Word16 iCh, jCh; Word32 norm_x; Word16 norm_x_e; - Word16 norm_x_e0; Word64 norm_64; Word16 idx; + Word16 magic_headroom; ( *g ) = 0; @@ -1136,7 +1139,7 @@ static void biDiagonalReductionRight_64( norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) ); // q(norm)=2*q(sing)-2*H1 // exp(norm)=2*exp(sing)+2*H1 } norm_x_e = W_norm( norm_64 ); - norm_x_e0 = W_norm( norm_64 ); + magic_headroom = sub( 16, shr( norm_x_e, 2 ) ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // q(norm_x)=32-exp(norm_x) exp(norm_x)=exp(norm)-32 move16(); @@ -1186,14 +1189,14 @@ static void biDiagonalReductionRight_64( { factor1 = W_extract_h( W_shl( singularVectors_Left_64[iCh][jCh], sub( 32, MAGIC_HEADROOM_3 ) ) ); // q(factor1) = q(sing)-H3 factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], sub( 32, MAGIC_HEADROOM_3 ) ) ); // q(factor2) = q(sing)-H3 - norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) ); // q(norm)=2*q(sing)-2*H3 + norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) ); // q(norm)=2*q(sing)-2*H3 } norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // Note: different norm f = Mpy_32_32( norm_x, invVal ); // q(f)=q(norm_x)-q(invVal) // magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e; - magic_shift = sub( norm_x_e, shl( MAGIC_HEADROOM_3, 1) ); + magic_shift = sub( norm_x_e, shl( MAGIC_HEADROOM_3, 1 ) ); magic_shift = sub( magic_shift, sub( r_e, ( shl( MAGIC_HEADROOM_1, 1 ) ) ) ); magic_shift = add( magic_shift, sub( 32, MAGIC_HEADROOM_4 ) ); magic_shift = sub( magic_shift, shl( invVal_e, 1 ) ); -- GitLab From 5aaf2643b90c6692c1524d2c583b1edb042fd0f1 Mon Sep 17 00:00:00 2001 From: naghibza Date: Wed, 6 Aug 2025 18:47:09 +0200 Subject: [PATCH 24/33] Rewrite biDiagonalReductionLeft_fx() and biDiagonalReductionRight_fx() according to optimized float version. --- lib_dec/ivas_svd_dec_fx.c | 350 ++++++++++++++++++-------------------- 1 file changed, 161 insertions(+), 189 deletions(-) diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index 1f50870e0..7f341d76a 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -65,16 +65,19 @@ static void HouseholderReduction_fx( Word32 *eps_x_fx, /* exp(eps_x_fx_e) */ Word16 *eps_x_fx_e ); #ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE -static void biDiagonalReductionLeft_64( - Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], - const Word16 nChannelsL, /* Q0 */ + +static void biDiagonalReductionLeft_fx( + Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ + Word16 singularValues_e[][MAX_OUTPUT_CHANNELS], /* Q0 */ + const Word16 nChannelsL, const Word16 nChannelsC, /* Q0 */ const Word16 currChannel, /* Q0 */ - Word32 *g, /* Q31 */ + Word32 *g, Word16 *g_e ); -static void biDiagonalReductionRight_64( - Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], +static void biDiagonalReductionRight_fx( + Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ + Word16 singularVectors_e[][MAX_OUTPUT_CHANNELS], const Word16 nChannelsL, /* Q0 */ const Word16 nChannelsC, /* Q0 */ const Word16 currChannel, /* Q0 */ @@ -840,7 +843,6 @@ static void HouseholderReduction_fx( Word16 nCh; #ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE - Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; Word32 g_left_fx = 0; Word16 g_left_e = 0; move32(); @@ -862,62 +864,57 @@ static void HouseholderReduction_fx( Word16 iCh, jCh; Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; + #ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE + FOR( jCh = 0; jCh < nChannelsL; jCh++ ) { FOR( iCh = 0; iCh < nChannelsC; iCh++ ) { - singularVectors_Left_64[jCh][iCh] = W_shr( W_deposit32_h( singularVectors_Left_fx[jCh][iCh] ), 32 ); + singularVectors_Left_fx_e[jCh][iCh] = singularVectors_Left_e; + move16(); } } + FOR( nCh = 0; nCh < nChannelsC; nCh++ ) { - biDiagonalReductionLeft_64( - singularVectors_Left_64, + biDiagonalReductionLeft_fx( + singularVectors_Left_fx, + singularVectors_Left_fx_e, nChannelsL, nChannelsC, nCh, &g_left_fx, &g_left_e ); + singularValues_fx[nCh] = g_left_fx; move32(); - singularValues_fx_e[nCh] = add( singularVectors_Left_e, g_left_e ); + singularValues_fx_e[nCh] = g_left_e; + secDiag_fx[nCh] = g_right_fx; /* from the previous channel */ move32(); - secDiag_fx_e[nCh] = add( singularVectors_Left_e, g_right_e ); - biDiagonalReductionRight_64( - singularVectors_Left_64, + secDiag_fx_e[nCh] = g_right_e; + + biDiagonalReductionRight_fx( + singularVectors_Left_fx, + singularVectors_Left_fx_e, nChannelsL, nChannelsC, nCh, &g_right_fx, &g_right_e ); + + Word16 L_temp_e; + Word32 L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), secDiag_fx_e[nCh], &L_temp_e ); /* exp(L_temp_e) */ + IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( L_temp, L_temp_e, *eps_x_fx, *eps_x_fx_e ), 1 ) ) { - Word16 L_temp_e; - Word32 L_temp; - L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), secDiag_fx_e[nCh], &L_temp_e ); /* exp(L_temp_e) */ - IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( L_temp, L_temp_e, *eps_x_fx, *eps_x_fx_e ), 1 ) ) - { - *eps_x_fx = L_temp; /* exp(L_temp_e) */ - move32(); - *eps_x_fx_e = L_temp_e; - move32(); - } - } - } - { - int i, j; - for ( j = 0; j < nChannelsL; j++ ) - { - for ( i = 0; i < nChannelsC; i++ ) - { - Word16 n; - n = W_norm( singularVectors_Left_64[j][i] ); - singularVectors_Left_fx[j][i] = W_extract_h( W_shl( singularVectors_Left_64[j][i], n ) ); - singularVectors_Left_fx_e[j][i] = sub( add( 32, singularVectors_Left_e ), n ); - } + *eps_x_fx = L_temp; /* exp(L_temp_e) */ + move32(); + *eps_x_fx_e = L_temp_e; + move32(); } } + #else FOR( jCh = 0; jCh < nChannelsL; jCh++ ) @@ -962,222 +959,197 @@ static void HouseholderReduction_fx( * * *-------------------------------------------------------------------------*/ - -static void biDiagonalReductionLeft_64( - Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], // q(sing) exp(sing) - const Word16 nChannelsL, /* Q0 */ +static void biDiagonalReductionLeft_fx( + Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ + Word16 singularVectors_e[][MAX_OUTPUT_CHANNELS], /* Q0 */ + const Word16 nChannelsL, const Word16 nChannelsC, /* Q0 */ const Word16 currChannel, /* Q0 */ Word32 *g, Word16 *g_e ) { -/* TODO: For some reason, this is optimal. But why? why not ( 32 - 2 * MAGIC_HEADROOM_1 - norm_x_e0 + 1 ) , for example? */ -#define MAGIC_HEADROOM_1 2 -//#define MAGIC_HEADROOM_2 ( sub( 16, shr( norm_x_e0, 2 ) ) ) -//#define MAGIC_HEADROOM_3 ( sub( 16, shr( norm_x_e0, 2 ) ) ) -//#define MAGIC_HEADROOM_4 ( sub( 16, shr( norm_x_e0, 2 ) ) ) -#define MAGIC_HEADROOM_2 magic_headroom -#define MAGIC_HEADROOM_3 magic_headroom -#define MAGIC_HEADROOM_4 magic_headroom - Word16 iCh, jCh; - Word32 norm_x; - Word16 norm_x_e; - Word64 norm_64; - Word16 magic_headroom; + Word32 norm_x, f, r; + Word16 norm_x_e, f_e, r_e; + Word32 L_temp; + Word16 L_temp_e; + + /* Setting values to 0 */ ( *g ) = 0; ( *g_e ) = 0; move32(); move16(); - norm_x = 0; - move32(); + IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ { - - Word32 tmp; - norm_64 = 0; + Word64 temp = 0; move64(); + norm_x = 0; + norm_x_e = 0; + Word16 max_e = MIN_16; + move16(); FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { - tmp = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], MAGIC_HEADROOM_1 ) ); // q(sing)-H1 // exp(sing)+H1 - norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) ); // q(norm)=2*q(sing)-2*H1 // exp(norm)=2*exp(sing)+2*H1 + max_e = s_max( max_e, singularVectors_e[jCh][currChannel] ); } - norm_x_e = W_norm( norm_64 ); - magic_headroom = sub( 16, shr( norm_x_e, 2 ) ); - norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // q(norm_x)=32-exp(norm_x) exp(norm_x)=exp(norm)-32 - } - IF( norm_x ) - { - Word32 factor2; - Word16 tmp_e; - Word64 tmpmul; - - Word64 r_64; - Word32 r, invVal; - Word16 r_e, invVal_e; - ( *g_e ) = add( sub( add( MAGIC_HEADROOM_1, MAGIC_HEADROOM_1 ), norm_x_e ), 1 ); // exp(g)=(2*H1-exp(norm_x)+1) - move16(); - ( *g ) = Sqrt32( norm_x, g_e ); // --> exp(g)=((2*H1-exp(norm_x)+1)/2) - IF( GE_64( singularVectors_Left_64[currChannel][currChannel], 0 ) ) + FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { - ( *g ) = L_negate( *g ); + temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sub( max_e, singularVectors_e[jCh][currChannel] ), 1 ) ) ); } - factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], MAGIC_HEADROOM_2 ) ); // q(factor2)=q(sing)-H2 exp(factor2)=exp(qsing)+H2 - tmp_e = sub( 2 * MAGIC_HEADROOM_1 - MAGIC_HEADROOM_2, ( *g_e ) ); - tmpmul = W_mult0_32_32( ( *g ), factor2 ); // q(tmpmul)=q(g)+q(factor2) --> q(tmpmul) ~= q(norm) - tmpmul = W_shr( tmpmul, tmp_e ); // --> q(tmpmul)=q(g)+q(factor2)-(2*H1-H2-q(g)) - r_64 = W_sub( tmpmul, norm_64 ); // q(r_64)=max(q(tmpmul),q(norm)) - r_e = W_norm( r_64 ); - r = W_extract_h( W_shl( r_64, r_e ) ); - - invVal_e = 0; - move16(); - invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); // invVal=1/r --> q(invVal)=-q(r) - - tmp_e = sub( 32, *g_e ); - singularVectors_Left_64[currChannel][currChannel] = W_sub( singularVectors_Left_64[currChannel][currChannel], W_shr( W_deposit32_h( *g ), tmp_e ) ); // q(sing)=max(q(sing),q(r)-(2*H1-H2-exp(r)) + Word16 nrm = W_norm( temp ); + nrm = sub( nrm, 32 ); + norm_x = W_shl_sat_l( temp, nrm ); + norm_x_e = sub( add( max_e, max_e ), nrm ); - FOR( iCh = add( currChannel, 1 ); iCh < nChannelsC; iCh++ ) + IF( ( norm_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */ { - Word32 factor1; - Word32 factor2; - Word32 f; // = norm / r - Word16 magic_shift; + Word16 invVal_e; + Word32 invVal; - norm_64 = 0; - for ( jCh = currChannel; jCh < nChannelsL; jCh++ ) + L_temp_e = norm_x_e; + move16(); + if (0) + L_temp = ISqrt32( norm_x, &L_temp_e ); + else + L_temp = Sqrt32( norm_x, &L_temp_e ); + //( *g ) = L_negate( GE_32( singularVectors[currChannel][idx], 0 ) ? L_temp : L_negate( L_temp ) ); + if ( singularVectors[currChannel][currChannel] >= 0 ) { - factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], sub( 32, MAGIC_HEADROOM_3 ) ) ); // q(factor1) = q(sing)-H3 - factor2 = W_extract_h( W_shl( singularVectors_Left_64[jCh][iCh], sub( 32, MAGIC_HEADROOM_3 ) ) ); // q(factor2) = q(sing)-H3 - norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) ); // q(norm)=2*q(sing)-2*H3 + L_temp = L_negate( L_temp ); } - norm_x_e = W_norm( norm_64 ); - norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // Note: different norm - f = Mpy_32_32( norm_x, invVal ); // q(f)=q(norm_x)-q(invVal) - // magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e; - magic_shift = sub( norm_x_e, shl( MAGIC_HEADROOM_3, 1 ) ); - magic_shift = sub( magic_shift, sub( r_e, ( shl( MAGIC_HEADROOM_1, 1 ) ) ) ); - magic_shift = add( magic_shift, sub( 32, MAGIC_HEADROOM_4 ) ); - magic_shift = sub( magic_shift, shl( invVal_e, 1 ) ); - FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) + ( *g ) = L_temp; + move32(); + *g_e = L_temp_e; + r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][currChannel] ), singularVectors_e[currChannel][currChannel] + L_temp_e, -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ + singularVectors[currChannel][currChannel] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][currChannel], singularVectors_e[currChannel][currChannel], -( *g ), *g_e, &singularVectors_e[currChannel][currChannel] ); /* sing_exp */ + move32(); + invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); + + FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */ { - factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], sub( 32, MAGIC_HEADROOM_4 ) ) ); - singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr( W_mult0_32_32( f, factor1 ), magic_shift ) ); + Word16 max2_e = MIN_16; + max_e = MIN_16; + move16(); + move16(); + temp = 0; + move64(); + + FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */ + { + max_e = s_max( max_e, singularVectors_e[jCh][currChannel] ); /* exp(norm_x_e) */ + max2_e = s_max( max2_e, singularVectors_e[jCh][iCh] ); /* exp(norm_x_e) */ + } + max_e = add( max_e, max2_e ); + + FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */ + { + temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), sub( max_e, add( singularVectors_e[jCh][currChannel], singularVectors_e[jCh][iCh] ) ) ) ); + } + Word16 nrm = W_norm( temp ); + nrm = sub( nrm, 32 ); + norm_x = W_shl_sat_l( temp, nrm ); + norm_x_e = sub( max_e, nrm ); + + f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */ + f_e = add( invVal_e, sub( norm_x_e, r_e ) ); + + FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */ + { + singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], singularVectors_e[jCh][iCh], Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, singularVectors_e[jCh][currChannel] ), &singularVectors_e[jCh][iCh] ); + move32(); + } } } } + return; } -/*------------------------------------------------------------------------- - * biDiagonalReductionRight() - * - * - *-------------------------------------------------------------------------*/ - -static void biDiagonalReductionRight_64( - Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], +static void biDiagonalReductionRight_fx( + Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ + Word16 singularVectors_e[][MAX_OUTPUT_CHANNELS], const Word16 nChannelsL, /* Q0 */ const Word16 nChannelsC, /* Q0 */ const Word16 currChannel, /* Q0 */ Word32 *g, /* Q31 */ - Word16 *g_e ) + Word16 *g_e +) { - Word16 iCh, jCh; - Word32 norm_x; - Word16 norm_x_e; - Word64 norm_64; - Word16 idx; - Word16 magic_headroom; - + Word16 iCh, jCh, idx; + Word32 norm_x, r; + Word16 norm_x_e, r_e; + Word32 L_temp; + Word16 L_temp_e; + /* Setting values to 0 */ ( *g ) = 0; ( *g_e ) = 0; move32(); move16(); IF( LT_16( currChannel, nChannelsL ) && NE_16( currChannel, sub( nChannelsC, 1 ) ) ) /* i <=m && i !=n */ { - norm_64 = 0; - move64(); - idx = add( currChannel, 1 ); - FOR( jCh = idx; jCh < nChannelsC; jCh++ ) + idx = add( currChannel, 1 ); /* Q0 */ + + norm_x = 0; + move32(); + norm_x_e = 0; + move16(); + FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */ { - Word32 tmp; - tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], MAGIC_HEADROOM_1 ) ); // q(sing)-H1 // exp(sing)+H1 - norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) ); // q(norm)=2*q(sing)-2*H1 // exp(norm)=2*exp(sing)+2*H1 + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( singularVectors_e[currChannel][jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ } - norm_x_e = W_norm( norm_64 ); - magic_headroom = sub( 16, shr( norm_x_e, 2 ) ); - norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // q(norm_x)=32-exp(norm_x) exp(norm_x)=exp(norm)-32 - move16(); - IF( norm_x ) + IF( ( norm_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */ { - Word32 factor1; - Word32 factor2; - Word16 tmp_e; - Word64 tmpmul; - Word16 magic_shift; - Word64 r_64; - Word32 r; - Word16 r_e; - Word32 f; + Word16 invVal_e, temp_e; Word32 invVal; - Word16 invVal_e; - ( *g_e ) = add( sub( ( MAGIC_HEADROOM_1 + MAGIC_HEADROOM_1 ), norm_x_e ), 1 ); // exp(g)=(2*H1-exp(norm_x)+1) + L_temp_e = norm_x_e; move16(); - ( *g ) = Sqrt32( norm_x, g_e ); // --> exp(g)=((2*H1-exp(norm_x)+1)/2) - IF( GE_64( singularVectors_Left_64[currChannel][idx], 0 ) ) + L_temp = Sqrt32( norm_x, &L_temp_e ); + //L_temp = L_shl_r( L_temp, L_temp_e ); // Q31 + IF( singularVectors[currChannel][idx] >= 0 ) { - ( *g ) = L_negate( *g ); + ( *g ) = L_negate( L_temp ); /* exp(L_temp_e) */ + move32(); } + ELSE + { + ( *g ) = L_negate( L_negate( L_temp ) ); /* exp(L_temp_e) */ + move32(); + } + *g_e = L_temp_e; + r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors_e[currChannel][idx] + (*g_e), -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ + singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors_e[currChannel][idx], -( *g ), *g_e, &singularVectors_e[currChannel][idx] ); /* exp(sing_exp) */ move32(); - move16(); - factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], MAGIC_HEADROOM_2 ) ); // q(factor2)=q(sing)-H2 exp(factor2)=exp(qsing)+H2 - tmp_e = sub( 2 * MAGIC_HEADROOM_1 - MAGIC_HEADROOM_2, *g_e ); - tmpmul = W_mult0_32_32( *g, factor2 ); // q(tmpmul)=q(g)+q(factor2) - tmpmul = W_shr( tmpmul, tmp_e ); // --> q(tmpmul)=q(g)+q(factor2)-(2*H1-H2-q(g)) - r_64 = W_sub( tmpmul, norm_64 ); // q(r_64)=max(q(tmpmul),q(norm)) - r_e = W_norm( r_64 ); - r = W_extract_h( W_shl( r_64, r_e ) ); - - invVal_e = 0; - move16(); - invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); // invVal=1/r --> q(invVal)=-q(r) - tmp_e = sub( 32, *g_e ); - singularVectors_Left_64[currChannel][idx] = W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( *g ), tmp_e ) ); // q(sing)=max(q(sing),q(r)-(2*H1-H2-exp(r))) - FOR( iCh = idx; iCh < nChannelsL; iCh++ ) - { + invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); - norm_64 = 0; - move64(); - FOR( jCh = idx; jCh < nChannelsC; jCh++ ) + FOR( iCh = currChannel + 1; iCh < nChannelsL; iCh++ ) /* nChannelsL */ + { + norm_x = 0; + move32(); + norm_x_e = 0; + move16(); + FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ { - factor1 = W_extract_h( W_shl( singularVectors_Left_64[iCh][jCh], sub( 32, MAGIC_HEADROOM_3 ) ) ); // q(factor1) = q(sing)-H3 - factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], sub( 32, MAGIC_HEADROOM_3 ) ) ); // q(factor2) = q(sing)-H3 - norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) ); // q(norm)=2*q(sing)-2*H3 + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( singularVectors_e[iCh][jCh], singularVectors_e[currChannel][jCh] ), &norm_x_e ); /* exp(norm_x_e) */ } - norm_x_e = W_norm( norm_64 ); - norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // Note: different norm - f = Mpy_32_32( norm_x, invVal ); // q(f)=q(norm_x)-q(invVal) - // magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e; - magic_shift = sub( norm_x_e, shl( MAGIC_HEADROOM_3, 1 ) ); - magic_shift = sub( magic_shift, sub( r_e, ( shl( MAGIC_HEADROOM_1, 1 ) ) ) ); - magic_shift = add( magic_shift, sub( 32, MAGIC_HEADROOM_4 ) ); - magic_shift = sub( magic_shift, shl( invVal_e, 1 ) ); + norm_x = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */ + norm_x_e = add( invVal_e, sub( norm_x_e, r_e ) ); - FOR( jCh = idx; jCh < nChannelsC; jCh++ ) + FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ { - factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], sub( 32, MAGIC_HEADROOM_4 ) ) ); - singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2 ), magic_shift ) ); + singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors_e[iCh][jCh], Mpy_32_32( norm_x, singularVectors[currChannel][jCh] ), add( norm_x_e, singularVectors_e[currChannel][jCh] ), &singularVectors_e[iCh][jCh] ); /* exp(sing_exp2) */ + move32(); } } } } + + return; } #else /*------------------------------------------------------------------------- -- GitLab From 21121559e76f8edc9ad3f579d6b09908165fb76b Mon Sep 17 00:00:00 2001 From: naghibza Date: Wed, 6 Aug 2025 18:52:33 +0200 Subject: [PATCH 25/33] Applied clang formatting patch. --- lib_dec/ivas_svd_dec_fx.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index 7f341d76a..2ceedc4e3 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -875,7 +875,7 @@ static void HouseholderReduction_fx( move16(); } } - + FOR( nCh = 0; nCh < nChannelsC; nCh++ ) { biDiagonalReductionLeft_fx( @@ -960,11 +960,11 @@ static void HouseholderReduction_fx( * *-------------------------------------------------------------------------*/ static void biDiagonalReductionLeft_fx( - Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ + Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ Word16 singularVectors_e[][MAX_OUTPUT_CHANNELS], /* Q0 */ const Word16 nChannelsL, - const Word16 nChannelsC, /* Q0 */ - const Word16 currChannel, /* Q0 */ + const Word16 nChannelsC, /* Q0 */ + const Word16 currChannel, /* Q0 */ Word32 *g, Word16 *g_e ) { @@ -1010,10 +1010,7 @@ static void biDiagonalReductionLeft_fx( L_temp_e = norm_x_e; move16(); - if (0) - L_temp = ISqrt32( norm_x, &L_temp_e ); - else - L_temp = Sqrt32( norm_x, &L_temp_e ); + L_temp = Sqrt32( norm_x, &L_temp_e ); //( *g ) = L_negate( GE_32( singularVectors[currChannel][idx], 0 ) ? L_temp : L_negate( L_temp ) ); if ( singularVectors[currChannel][currChannel] >= 0 ) { @@ -1022,7 +1019,7 @@ static void biDiagonalReductionLeft_fx( ( *g ) = L_temp; move32(); *g_e = L_temp_e; - r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][currChannel] ), singularVectors_e[currChannel][currChannel] + L_temp_e, -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ + r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][currChannel] ), singularVectors_e[currChannel][currChannel] + L_temp_e, -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ singularVectors[currChannel][currChannel] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][currChannel], singularVectors_e[currChannel][currChannel], -( *g ), *g_e, &singularVectors_e[currChannel][currChannel] ); /* sing_exp */ move32(); invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); @@ -1073,8 +1070,7 @@ static void biDiagonalReductionRight_fx( const Word16 nChannelsC, /* Q0 */ const Word16 currChannel, /* Q0 */ Word32 *g, /* Q31 */ - Word16 *g_e -) + Word16 *g_e ) { Word16 iCh, jCh, idx; Word32 norm_x, r; @@ -1108,7 +1104,7 @@ static void biDiagonalReductionRight_fx( L_temp_e = norm_x_e; move16(); L_temp = Sqrt32( norm_x, &L_temp_e ); - //L_temp = L_shl_r( L_temp, L_temp_e ); // Q31 + // L_temp = L_shl_r( L_temp, L_temp_e ); // Q31 IF( singularVectors[currChannel][idx] >= 0 ) { ( *g ) = L_negate( L_temp ); /* exp(L_temp_e) */ @@ -1120,7 +1116,7 @@ static void biDiagonalReductionRight_fx( move32(); } *g_e = L_temp_e; - r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors_e[currChannel][idx] + (*g_e), -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ + r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors_e[currChannel][idx] + ( *g_e ), -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors_e[currChannel][idx], -( *g ), *g_e, &singularVectors_e[currChannel][idx] ); /* exp(sing_exp) */ move32(); -- GitLab From 24193cc17d4127c66b2e0b209cb6b84f303a1827 Mon Sep 17 00:00:00 2001 From: naghibza Date: Wed, 6 Aug 2025 19:26:29 +0200 Subject: [PATCH 26/33] Removed unused variable. --- lib_dec/ivas_svd_dec_fx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index 2ceedc4e3..135c76061 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -1098,7 +1098,7 @@ static void biDiagonalReductionRight_fx( IF( ( norm_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */ { - Word16 invVal_e, temp_e; + Word16 invVal_e; Word32 invVal; L_temp_e = norm_x_e; -- GitLab From 050eb9b5a18d2e12dce278b8d511ea839eb4d3b4 Mon Sep 17 00:00:00 2001 From: naghibza Date: Thu, 7 Aug 2025 12:34:14 +0200 Subject: [PATCH 27/33] Scale singularVectors_Left_fx to preserve precision in SVD calculation. --- lib_dec/ivas_svd_dec_fx.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index 135c76061..31dbd7dfa 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -866,12 +866,18 @@ static void HouseholderReduction_fx( Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; #ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE - + Word16 sc = 0; + sc = getScaleFactor32( singularVectors_Left_fx[0], nChannelsC ); + FOR( jCh = 1; jCh < nChannelsL; jCh++ ) + { + sc = s_min( sc, getScaleFactor32( singularVectors_Left_fx[jCh], nChannelsC ) ); + } FOR( jCh = 0; jCh < nChannelsL; jCh++ ) { + Scale_sig32( singularVectors_Left_fx[jCh], nChannelsC, sc ); FOR( iCh = 0; iCh < nChannelsC; iCh++ ) { - singularVectors_Left_fx_e[jCh][iCh] = singularVectors_Left_e; + singularVectors_Left_fx_e[jCh][iCh] = singularVectors_Left_e - sc; move16(); } } @@ -1003,7 +1009,7 @@ static void biDiagonalReductionLeft_fx( norm_x = W_shl_sat_l( temp, nrm ); norm_x_e = sub( add( max_e, max_e ), nrm ); - IF( ( norm_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */ + IF( norm_x ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */ { Word16 invVal_e; Word32 invVal; @@ -1096,7 +1102,7 @@ static void biDiagonalReductionRight_fx( norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( singularVectors_e[currChannel][jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ } - IF( ( norm_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */ + IF( norm_x ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */ { Word16 invVal_e; Word32 invVal; -- GitLab From 131ba565fcb0a2687e98f49ecec996dabcb24e25 Mon Sep 17 00:00:00 2001 From: naghibza Date: Fri, 8 Aug 2025 16:08:19 +0200 Subject: [PATCH 28/33] Add/remove some move32/16 functions. --- lib_dec/ivas_svd_dec_fx.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index 31dbd7dfa..98ff48342 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -867,6 +867,7 @@ static void HouseholderReduction_fx( #ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE Word16 sc = 0; + move16(); sc = getScaleFactor32( singularVectors_Left_fx[0], nChannelsC ); FOR( jCh = 1; jCh < nChannelsL; jCh++ ) { @@ -981,8 +982,8 @@ static void biDiagonalReductionLeft_fx( Word16 L_temp_e; /* Setting values to 0 */ - ( *g ) = 0; - ( *g_e ) = 0; + *g = 0; + *g_e = 0; move32(); move16(); @@ -991,7 +992,9 @@ static void biDiagonalReductionLeft_fx( Word64 temp = 0; move64(); norm_x = 0; + move32(); norm_x_e = 0; + move16(); Word16 max_e = MIN_16; move16(); FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */ @@ -1021,13 +1024,15 @@ static void biDiagonalReductionLeft_fx( if ( singularVectors[currChannel][currChannel] >= 0 ) { L_temp = L_negate( L_temp ); + move32(); } - ( *g ) = L_temp; + *g = L_temp; move32(); *g_e = L_temp_e; - r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][currChannel] ), singularVectors_e[currChannel][currChannel] + L_temp_e, -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ + move16(); + + r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][currChannel] ), singularVectors_e[currChannel][currChannel] + ( *g_e ), -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ singularVectors[currChannel][currChannel] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][currChannel], singularVectors_e[currChannel][currChannel], -( *g ), *g_e, &singularVectors_e[currChannel][currChannel] ); /* sing_exp */ - move32(); invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */ @@ -1061,7 +1066,6 @@ static void biDiagonalReductionLeft_fx( FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], singularVectors_e[jCh][iCh], Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, singularVectors_e[jCh][currChannel] ), &singularVectors_e[jCh][iCh] ); - move32(); } } } @@ -1085,8 +1089,8 @@ static void biDiagonalReductionRight_fx( Word16 L_temp_e; /* Setting values to 0 */ - ( *g ) = 0; - ( *g_e ) = 0; + *g = 0; + *g_e = 0; move32(); move16(); IF( LT_16( currChannel, nChannelsL ) && NE_16( currChannel, sub( nChannelsC, 1 ) ) ) /* i <=m && i !=n */ @@ -1118,13 +1122,14 @@ static void biDiagonalReductionRight_fx( } ELSE { - ( *g ) = L_negate( L_negate( L_temp ) ); /* exp(L_temp_e) */ + ( *g ) = L_temp; /* exp(L_temp_e) */ move32(); } *g_e = L_temp_e; + move16(); + r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors_e[currChannel][idx] + ( *g_e ), -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors_e[currChannel][idx], -( *g ), *g_e, &singularVectors_e[currChannel][idx] ); /* exp(sing_exp) */ - move32(); invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); @@ -1145,7 +1150,6 @@ static void biDiagonalReductionRight_fx( FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ { singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors_e[iCh][jCh], Mpy_32_32( norm_x, singularVectors[currChannel][jCh] ), add( norm_x_e, singularVectors_e[currChannel][jCh] ), &singularVectors_e[iCh][jCh] ); /* exp(sing_exp2) */ - move32(); } } } -- GitLab From c38548bc1345eebad7066bc55324e6b16b847781 Mon Sep 17 00:00:00 2001 From: naghibza Date: Fri, 8 Aug 2025 16:12:41 +0200 Subject: [PATCH 29/33] Applied clang formatting patch. --- lib_dec/ivas_svd_dec_fx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index 98ff48342..281e8c3ea 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -1031,7 +1031,7 @@ static void biDiagonalReductionLeft_fx( *g_e = L_temp_e; move16(); - r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][currChannel] ), singularVectors_e[currChannel][currChannel] + ( *g_e ), -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ + r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][currChannel] ), singularVectors_e[currChannel][currChannel] + ( *g_e ), -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ singularVectors[currChannel][currChannel] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][currChannel], singularVectors_e[currChannel][currChannel], -( *g ), *g_e, &singularVectors_e[currChannel][currChannel] ); /* sing_exp */ invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); -- GitLab From 83a9031f5d7dbbec474a7095380d5bc758c27f8a Mon Sep 17 00:00:00 2001 From: naghibza Date: Fri, 8 Aug 2025 17:41:36 +0200 Subject: [PATCH 30/33] Added one bit headroom to cx_fx calculation. --- lib_dec/ivas_mc_param_dec_fx.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/lib_dec/ivas_mc_param_dec_fx.c b/lib_dec/ivas_mc_param_dec_fx.c index 230c210b7..6d832ba45 100644 --- a/lib_dec/ivas_mc_param_dec_fx.c +++ b/lib_dec/ivas_mc_param_dec_fx.c @@ -1799,8 +1799,22 @@ void ivas_param_mc_dec_digest_tc_fx( test(); IF( hParamMC->hMetadataPMC->bAttackPresent && ( EQ_32( hParamMC->synthesis_conf, PARAM_MC_SYNTH_LS_CONV_COV ) || EQ_32( hParamMC->synthesis_conf, PARAM_MC_SYNTH_MONO_STEREO ) ) ) { +#ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE + Word16 len = imult1616( nchan_transport, nchan_transport ); + Word16 sc = s_min( getScaleFactor32( cx_fx, len ), getScaleFactor32( cx_next_band_fx, len ) ); + IF( EQ_16( sc, 0 ) ) + { + Scale_sig32( cx_fx, len, -Q1 ); // add one bit head room + Scale_sig32( cx_next_band_fx, len, -Q1 ); // add one bit head room + cx_e = add( cx_e, Q1 ); + cx_next_band_e = add( cx_e, Q1 ); + } + v_add_fx( cx_fx, cx_next_band_fx, cx_fx, len ); + Copy32( cx_fx, cx_next_band_fx, len ); +#else v_add_fx( cx_fx, cx_next_band_fx, cx_fx, imult1616( nchan_transport, nchan_transport ) ); Copy32( cx_fx, cx_next_band_fx, imult1616( nchan_transport, nchan_transport ) ); +#endif } FOR( is_next_band = 0; is_next_band < 2; is_next_band++ ) -- GitLab From 95ba2f481a14440634ca6f6e28136e104f9c1742 Mon Sep 17 00:00:00 2001 From: Dominik Weckbecker Date: Fri, 15 Aug 2025 11:10:18 +0200 Subject: [PATCH 31/33] name the svd-optimizations define consistently --- lib_com/options.h | 4 ++-- lib_dec/ivas_mc_param_dec_fx.c | 2 +- lib_dec/ivas_svd_dec_fx.c | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index 1f2893b1b..0b037291c 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -97,7 +97,7 @@ #define FIX_ISSUE_1817_REPLACE_CARRY_OVERFLOW /* FhG: bit-exact, replace carry and overflow operations by 64-bit operations, MR 1931 */ #define FIX_1844_MISSING_FREE /* FhG: add missing free in ivas_binRenderer_convModuleClose_fx() */ -#define MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE /* FhG: reduce WMOPS of HouseHolderReduction() in ivas_svd_dec.c() by removing redundant mathematics and using 64 bit additions.*/ +#define NONBE_SVD_OPTIMIZATION /* FhG: reduce WMOPS of HouseHolderReduction() in ivas_svd_dec.c() by removing redundant mathematics and using 64 bit additions.*/ /* #################### Start BASOP porting switches ############################ */ #define FIX_1372_ISAR_POST_REND @@ -142,7 +142,7 @@ #define NONBE_1360_LFE_DELAY /* Dlb: LFE delay alignment when rendering in CLDFB domain*/ #define NONBE_1229_FIX_ISM1_DPID /* Eri: issue 1229: fix bug causing ISM 1 to use default -dpid instead of the specified one */ -#define MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE +#define NONBE_SVD_OPTIMIZATION /* #################### End BASOP porting switches ############################ */ diff --git a/lib_dec/ivas_mc_param_dec_fx.c b/lib_dec/ivas_mc_param_dec_fx.c index 6d832ba45..73ba585d1 100644 --- a/lib_dec/ivas_mc_param_dec_fx.c +++ b/lib_dec/ivas_mc_param_dec_fx.c @@ -1799,7 +1799,7 @@ void ivas_param_mc_dec_digest_tc_fx( test(); IF( hParamMC->hMetadataPMC->bAttackPresent && ( EQ_32( hParamMC->synthesis_conf, PARAM_MC_SYNTH_LS_CONV_COV ) || EQ_32( hParamMC->synthesis_conf, PARAM_MC_SYNTH_MONO_STEREO ) ) ) { -#ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE +#ifdef NONBE_SVD_OPTIMIZATION Word16 len = imult1616( nchan_transport, nchan_transport ); Word16 sc = s_min( getScaleFactor32( cx_fx, len ), getScaleFactor32( cx_next_band_fx, len ) ); IF( EQ_16( sc, 0 ) ) diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index 281e8c3ea..4bd0ae06e 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -64,7 +64,7 @@ static void HouseholderReduction_fx( const Word16 nChannelsC, /* Q0 */ Word32 *eps_x_fx, /* exp(eps_x_fx_e) */ Word16 *eps_x_fx_e ); -#ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE +#ifdef NONBE_SVD_OPTIMIZATION static void biDiagonalReductionLeft_fx( Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ @@ -841,7 +841,7 @@ static void HouseholderReduction_fx( Word16 *eps_x_fx_e ) { Word16 nCh; -#ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE +#ifdef NONBE_SVD_OPTIMIZATION Word32 g_left_fx = 0; Word16 g_left_e = 0; @@ -865,7 +865,7 @@ static void HouseholderReduction_fx( Word16 iCh, jCh; Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; -#ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE +#ifdef NONBE_SVD_OPTIMIZATION Word16 sc = 0; move16(); sc = getScaleFactor32( singularVectors_Left_fx[0], nChannelsC ); @@ -960,7 +960,7 @@ static void HouseholderReduction_fx( return; } -#ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE +#ifdef NONBE_SVD_OPTIMIZATION /*------------------------------------------------------------------------- * biDiagonalReductionLeft() * -- GitLab From e3a7fe8d4e9234690caed3bd382b4ccdf308f05e Mon Sep 17 00:00:00 2001 From: Dominik Weckbecker Date: Fri, 15 Aug 2025 12:25:40 +0200 Subject: [PATCH 32/33] reorder code in HouseholderReduction_fx to match the float code --- lib_dec/ivas_svd_dec_fx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index 4bd0ae06e..ded17e005 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -885,6 +885,10 @@ static void HouseholderReduction_fx( FOR( nCh = 0; nCh < nChannelsC; nCh++ ) { + secDiag_fx[nCh] = g_right_fx; /* from the previous channel */ + move32(); + secDiag_fx_e[nCh] = g_right_e; + biDiagonalReductionLeft_fx( singularVectors_Left_fx, singularVectors_Left_fx_e, @@ -898,10 +902,6 @@ static void HouseholderReduction_fx( move32(); singularValues_fx_e[nCh] = g_left_e; - secDiag_fx[nCh] = g_right_fx; /* from the previous channel */ - move32(); - secDiag_fx_e[nCh] = g_right_e; - biDiagonalReductionRight_fx( singularVectors_Left_fx, singularVectors_Left_fx_e, -- GitLab From 6e01c1fc420d0f71f958adf2518c28a54beeedc4 Mon Sep 17 00:00:00 2001 From: Dominik Weckbecker Date: Fri, 15 Aug 2025 13:13:54 +0200 Subject: [PATCH 33/33] fix formatting --- lib_dec/ivas_svd_dec_fx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index ded17e005..6471becde 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -888,7 +888,7 @@ static void HouseholderReduction_fx( secDiag_fx[nCh] = g_right_fx; /* from the previous channel */ move32(); secDiag_fx_e[nCh] = g_right_e; - + biDiagonalReductionLeft_fx( singularVectors_Left_fx, singularVectors_Left_fx_e, -- GitLab