From 5aae8ccce48ee22068f7497d33a7af3ae47cdd1e Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Mon, 18 Nov 2024 12:45:41 +0100 Subject: [PATCH 01/41] Reduce WMOPS by approx. 300 for test case in issue #1010 --- lib_com/basop_util.c | 45 +++++- lib_dec/ivas_svd_dec.c | 326 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 368 insertions(+), 3 deletions(-) diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index c465428fc..a0624a6b4 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -1038,9 +1038,52 @@ Word32 div_w( Word32 L_num, Word32 L_den ) } } +#define OPT_BASOP_Util_Divide3232_Scale_cadence + +#ifdef OPT_BASOP_Util_Divide3232_Scale_cadence +static +Word32 BASOP_Util_Inv32(Word32 x, Word16 *px_e) +{ + Word16 sign, shift; + + sign = 0; + move16(); + if (x < 0) { + sign = 1; + } + if (sign) { + x = L_negate(x); + } + + shift = norm_l(x); + x = L_shl(x, shift); + *px_e = 0; + move16(); + x = ISqrt32norm(x, px_e); + x = Mpy_32_32(x, x); + *px_e = add(shl(*px_e, 1), shift); + move16(); + + if (sign) { + x = L_negate(x); + } + return x; +} +#endif + Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) { Word32 z; + +#ifdef OPT_BASOP_Util_Divide3232_Scale_cadence + Word16 shift, s2; + z = BASOP_Util_Inv32(y, &s2); + shift = norm_l(x); + z = Mpy_32_32_r(L_shl(x, shift), z); + *s = sub(s2, shift); + move16(); +#else + Word16 sx; Word16 sy; Word32 sign; @@ -1086,7 +1129,7 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) { z = L_negate( z ); } - +#endif return z; } diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index 801bb9e2b..a89e2ee3d 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -62,6 +62,17 @@ #define SVD_ZERO_FLUSH_THRESHOLD_FX ( 0 ) #define CONVERGENCE_FACTOR_FX 214748 /* factor for SVD convergence (as per latest float code: 1.0e-04f) */ #endif + +#if 1 +#define OPT_DIV +#define OPT_DIV_NORM /* 5 dB SNR precision improvement */ +#define OPT_SUM /* Very little WMOPS savings */ +#define OPT_SUM2 /* Very little WMOPS savings */ + +#define OPT_GIVENS +#define OPT_GIVENS_INV +#endif + /*-----------------------------------------------------------------------* * Local function prototypes *-----------------------------------------------------------------------*/ @@ -197,6 +208,18 @@ static void ApplyRotation_fx( const Word16 nChannels /* Q0 */ ); +#ifdef OPT_GIVENS_INV +static void GivensRotation2_fx( + const Word32 x, /* exp(x_e) */ + const Word16 x_e, + const Word32 z, /* exp(z_e) */ + const Word16 z_e, + Word32 *result, + Word32 *resultInv, + Word16 *out_e, + Word16 *outInv_e ); +#endif + static Word32 GivensRotation_fx( const Word32 x, /* exp(x_e) */ const Word16 x_e, @@ -617,6 +640,9 @@ static Word16 BidagonalDiagonalisation_fx( Word16 convergence, iteration, found_split; Word16 error = 0; move16(); +#ifdef OPT_GIVENS_INV + Word32 temp; +#endif Word16 singularValues_new_e[MAX_OUTPUT_CHANNELS], secDiag_new_e[MAX_OUTPUT_CHANNELS]; Copy( singularValues_fx_e, singularValues_new_e, MAX_OUTPUT_CHANNELS ); set16_fx( secDiag_new_e, *secDiag_fx_e, MAX_OUTPUT_CHANNELS ); @@ -687,17 +713,28 @@ static Word16 BidagonalDiagonalisation_fx( c = singularValues_fx[kCh]; /* exp(singularValues_new_e) */ c_e = singularValues_new_e[kCh]; +#ifdef OPT_GIVENS_INV + GivensRotation2_fx( g, g_e, singularValues_fx[kCh], singularValues_new_e[kCh], &singularValues_fx[kCh], &temp, &singularValues_new_e[kCh], &temp_exp ); /* exp(singularValues_new_e) */ + c = Mpy_32_32( c, temp ); + c_e = add(c_e, temp_exp); +#else singularValues_fx[kCh] = GivensRotation_fx( g, g_e, singularValues_fx[kCh], singularValues_new_e[kCh], &singularValues_new_e[kCh] ); /* exp(singularValues_new_e) */ c = BASOP_Util_Divide3232_Scale_cadence( c, maxWithSign_fx( singularValues_fx[kCh] ), &temp_exp ); /* exp(temp_exp + (c_e - singularValues_new_e)) */ c_e = add( temp_exp, sub( c_e, singularValues_new_e[kCh] ) ); +#endif IF( c_e > 0 ) { c = L_shl_sat( c, c_e ); // Q31 c_e = 0; move16(); } +#ifdef OPT_GIVENS_INV + s = Mpy_32_32( -g, temp ); + s_e = add( g_e, temp_exp ); +#else s = BASOP_Util_Divide3232_Scale_cadence( -g, maxWithSign_fx( singularValues_fx[kCh] ), &temp_exp ); /* exp(temp_exp + (g_e - singularValues_new_e))*/ s_e = add( temp_exp, sub( g_e, singularValues_new_e[kCh] ) ); +#endif IF( s_e > 0 ) { s = L_shl_sat( s, s_e ); // Q31 @@ -905,6 +942,10 @@ static void ApplyQRTransform_fx( const Word16 nChannelsC /* i : number of columns in the matrix to be decomposed Q0*/ ) { +#ifdef OPT_GIVENS_INV + Word32 temp; + Word16 temp_e; +#endif Word16 ch, split; Word32 d = 0, g = 0, r = 0, x_ii = 0, x_split = 0, x_kk = 0, mu = 0, aux = 0; move32(); @@ -1007,18 +1048,29 @@ static void ApplyQRTransform_fx( g = Mpy_32_32( c, secDiag[ch + 1] ); /* exp(c_e + secDiag_e) */ g_e = add( c_e, secDiag_e[ch + 1] ); +#ifdef OPT_GIVENS_INV + GivensRotation2_fx( d, d_e, r, r_e, &secDiag[ch], &temp, &secDiag_e[ch], &temp_e ); /* exp(secDiag_e) */ + c = Mpy_32_32( d, temp); + c_e = add(temp_e, d_e); +#else secDiag[ch] = GivensRotation_fx( d, d_e, r, r_e, &secDiag_e[ch] ); /* exp(secDiag_e) */ move32(); c = BASOP_Util_Divide3232_Scale_cadence( d, maxWithSign_fx( secDiag[ch] ), &c_e ); /* exp(c_e + (d_e + secDiag_e)) */ c_e = add( c_e, sub( d_e, secDiag_e[ch] ) ); +#endif IF( c_e > 0 ) { c = L_shl_sat( c, c_e ); // Q31 c_e = 0; move16(); } +#ifdef OPT_GIVENS_INV + s = Mpy_32_32( r, temp ); + s_e = add(r_e, temp_e); +#else s = BASOP_Util_Divide3232_Scale_cadence( r, maxWithSign_fx( secDiag[ch] ), &s_e ); /* exp(s_e + (r_e - sec_Diag_e))*/ s_e = add( s_e, sub( r_e, secDiag_e[ch] ) ); +#endif IF( s_e > 0 ) { s = L_shl_sat( s, s_e ); // Q31 @@ -1039,12 +1091,18 @@ static void ApplyQRTransform_fx( // ApplyRotation(singularVectors_Right, c, s, x_ii, aux, &d, &g, ch + 1, ch, nChannelsC); ApplyRotation_fx( singularVectors_Right, c, c_e, s, s_e, x_ii, x_ii_e, aux, aux_e, &d, &d_e, &g, &g_e, ch + 1, ch, nChannelsC ); +#ifdef OPT_GIVENS_INV + GivensRotation2_fx( d, d_e, r, r_e, &singularValues[ch], &aux, &singularValues_e[ch], &aux_e ); /* exp(singularValues_e) */ +#else singularValues[ch] = GivensRotation_fx( d, d_e, r, r_e, &singularValues_e[ch] ); /* exp(singularValues_e) */ move32(); +#endif IF( GT_32( L_abs( singularValues[ch] ), Mpy_32_32( CONVERGENCE_FACTOR_FX, L_abs( singularValues[ch] ) ) ) ) { +#ifndef OPT_GIVENS_INV aux = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, singularValues[ch], &aux_e ); /* exp(aux_e + (1 - singularValues_e)) */ aux_e = add( aux_e, sub( 1, singularValues_e[ch] ) ); +#endif c = Mpy_32_32( d, aux ); /* exp(d_e + aux_e) */ c_e = add( d_e, aux_e ); @@ -1317,6 +1375,98 @@ static void HouseholderReduction( * *-------------------------------------------------------------------------*/ +#ifdef OPT_DIV +static +Word32 BASOP_Util_Inv32(Word32 x, Word16 *px_e) +{ + Word16 sign, shift; + + sign = 0; + move16(); + if (x < 0) { + sign = 1; + } + if (sign) { + x = L_negate(x); + } + + shift = norm_l(x); + x = L_shl(x, shift); + *px_e = 0; + move16(); + x = ISqrt32norm(x, px_e); + x = Mpy_32_32(x, x); + *px_e = add(shl(*px_e, 1), shift); + move16(); + + if (sign) { + x = L_negate(x); + } + return x; +} +#endif + +#ifdef OPT_SUM +static +Word32 BASOP_Util_Accu_Mant32Exp /* o : normalized result mantissa */ + ( Word32 a_m, /* i : Mantissa of 1st operand a */ + Word16 *p_a_e, /* i/o : Exponent of 1st operand a */ + Word32 b_m, /* i : Mantissa of 2nd operand b */ + Word16 b_e /* i : Exponent of 2nd operand b */ + ) +{ + Word32 L_tmp; + Word16 shift, a_e = *p_a_e; + + /* Compare exponents: the difference is limited to +/- 30 + The Word32 mantissa of the operand with lower exponent is shifted right by the exponent difference. + Then, the unshifted mantissa of the operand with the higher exponent is added. The addition result + is normalized and the result represents the mantissa to return. The returned exponent takes into + account all shift operations. + */ + +#if 0 + if ( !a_m ) + a_e = add( b_e, 0 ); +#endif + if ( !b_m ) + b_e = add( a_e, 0 ); + + shift = sub( a_e, b_e ); +#if 0 + shift = s_max( -31, shift ); + shift = s_min( 31, shift ); +#endif + if ( shift < 0 ) + { + /* exponent of b is greater than exponent of a, shr a_m */ + a_m = L_shl( a_m, shift ); + } + if ( shift > 0 ) + { + /* exponent of a is greater than exponent of b */ + b_m = L_shr( b_m, shift ); + } + a_e = add( s_max( a_e, b_e ), 1 ); + L_tmp = L_add( L_shr( a_m, 1 ), L_shr( b_m, 1 ) ); +#if 1 + shift = norm_l( L_tmp ); + if ( shift ) + L_tmp = L_shl( L_tmp, shift ); +#if 0 + if ( L_tmp == 0 ) + a_e = add( 0, 0 ); +#endif + if ( L_tmp != 0 ) + a_e = sub( a_e, shift ); +#endif + *p_a_e = a_e; + + return ( L_tmp ); +} + +#endif + #ifdef IVAS_FLOAT_FIXED static void biDiagonalReductionLeft_fx( Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ @@ -1373,27 +1523,66 @@ move32(); IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ { +#ifdef OPT_SUM2 + *sig_x_e = -31; + move16(); +#endif + idx = currChannel; move16(); FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { +#ifdef OPT_SUM2 + ( *sig_x ) = BASOP_Util_Accu_Mant32Exp( *sig_x, sig_x_e, L_abs( singularVectors[jCh][currChannel] ), *singularVectors_e ); /* exp(sig_x_e) */ +#else ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[jCh][currChannel] ), *singularVectors_e, sig_x_e ); /* exp(sig_x_e) */ +#endif } IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */ { +#ifdef OPT_DIV + Word16 invVal_e, temp_e; + Word32 invVal = BASOP_Util_Inv32(maxWithSign_fx( *sig_x ), &invVal_e); +#ifdef OPT_DIV_NORM + temp_e = norm_l(invVal); + invVal = L_shl(invVal, temp_e); + invVal_e = sub(invVal_e, temp_e); +#endif +#endif norm_x = 0; move32(); +#ifdef OPT_SUM2 + norm_x_e = -31; +#else norm_x_e = 0; +#endif move16(); FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { +#ifndef OPT_DIV singularVectors[jCh][currChannel] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[jCh][currChannel], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ +#else + temp_e = norm_l(singularVectors[jCh][currChannel]); + singularVectors[jCh][currChannel] = L_shl(singularVectors[jCh][currChannel], temp_e); + singularVectors[jCh][currChannel] = Mpy_32_32( singularVectors[jCh][currChannel], invVal); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ + sing_exp[jCh] = sub(invVal_e, temp_e); +#ifdef OPT_DIV_NORM + temp_e = norm_l(singularVectors[jCh][currChannel]); + singularVectors[jCh][currChannel] = L_shl(singularVectors[jCh][currChannel], temp_e); + sing_exp[jCh] = sub(sing_exp[jCh], temp_e); +#endif + move16(); +#endif move32(); sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) ); move16(); +#ifdef OPT_SUM2 + norm_x = BASOP_Util_Accu_Mant32Exp( norm_x, &norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ) ); /* exp(norm_x_e) */ +#else norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ +#endif } IF( GT_16( norm_x_e, 0 ) ) { @@ -1426,19 +1615,46 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ #endif move32(); +#ifdef OPT_DIV + invVal = BASOP_Util_Inv32(maxWithSign_fx( r ), &invVal_e); +#ifdef OPT_DIV_NORM + temp_e = norm_l(invVal); + invVal = L_shl(invVal, temp_e); + invVal_e = sub(invVal_e, temp_e); +#endif +#endif + FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */ { norm_x = 0; move32(); +#ifdef OPT_SUM2 + norm_x_e = -31; +#else norm_x_e = 0; +#endif move16(); FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { +#ifdef OPT_SUM2 + norm_x = BASOP_Util_Accu_Mant32Exp( norm_x, &norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( sing_exp[jCh], *singularVectors_e )); /* exp(norm_x_e) */ +#else norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( sing_exp[jCh], *singularVectors_e ), &norm_x_e ); /* exp(norm_x_e) */ +#endif } +#ifndef OPT_DIV f = BASOP_Util_Divide3232_Scale_cadence( norm_x, maxWithSign_fx( r ), &f_e ); /* f_e + (norm_x_e - r_e) */ f_e = add( f_e, sub( norm_x_e, r_e ) ); +#else + f = Mpy_32_32(norm_x, invVal); /* invVal_e + (norm_x_e - r_e) */ + f_e = add(invVal_e, sub( norm_x_e, r_e ) ); +#ifdef OPT_DIV_NORM + temp_e = norm_l(f); + f = L_shl(f, temp_e); + f_e = sub(f_e, temp_e); +#endif +#endif FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { @@ -1605,9 +1821,18 @@ static void biDiagonalReductionRight_fx( { idx = add( currChannel, 1 ); /* Q0 */ +#ifdef OPT_SUM + *sig_x_e = -31; + move16(); +#endif + FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ { +#ifdef OPT_SUM + ( *sig_x ) = BASOP_Util_Accu_Mant32Exp( *sig_x, sig_x_e, L_abs( singularVectors[currChannel][jCh] ), *singularVectors_e ); /* exp(sig_x_e) */ +#else ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[currChannel][jCh] ), *singularVectors_e, sig_x_e ); /* exp(sig_x_e) */ +#endif } IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */ @@ -1617,9 +1842,31 @@ static void biDiagonalReductionRight_fx( norm_x_e = 0; move16(); +#ifdef OPT_DIV + Word16 invVal_e, temp_e; + Word32 invVal = BASOP_Util_Inv32(maxWithSign_fx( *sig_x ), &invVal_e); +#ifdef OPT_DIV_NORM + temp_e = norm_l(invVal); + invVal = L_shl(invVal, temp_e); + invVal_e = sub(invVal_e, temp_e); +#endif +#endif FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */ { +#ifndef OPT_DIV singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e)) */ +#else + temp_e = norm_l(singularVectors[currChannel][jCh]); + singularVectors[currChannel][jCh] = L_shl(singularVectors[currChannel][jCh], temp_e); + singularVectors[currChannel][jCh] = Mpy_32_32( singularVectors[currChannel][jCh], invVal); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ + sing_exp[jCh] = sub(invVal_e, temp_e); + move16(); +#ifdef OPT_DIV_NORM + temp_e = norm_l(singularVectors[currChannel][jCh]); + singularVectors[currChannel][jCh] = L_shl(singularVectors[currChannel][jCh], temp_e); + sing_exp[jCh] = sub(sing_exp[jCh], temp_e); +#endif +#endif move32(); sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) ); move16(); @@ -1651,9 +1898,31 @@ static void biDiagonalReductionRight_fx( singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], sing_exp[idx], -( *g ), 0, &sing_exp[idx] ); /* exp(sing_exp) */ move32(); +#ifdef OPT_DIV + invVal = BASOP_Util_Inv32(maxWithSign_fx( r ), &invVal_e); +#ifdef OPT_DIV_NORM + temp_e = norm_l(invVal); + invVal = L_shl(invVal, temp_e); + invVal_e = sub(invVal_e, temp_e); +#endif +#endif + FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ { +#ifndef OPT_DIV secDiag[jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( r ), &secDiag_exp[jCh] ); /* exp(secDiag_exp + (sing_exp - r_e) */ +#else + temp_e = norm_l(singularVectors[currChannel][jCh]); + secDiag[jCh] = L_shl(singularVectors[currChannel][jCh], temp_e); + secDiag[jCh] = Mpy_32_32( secDiag[jCh], invVal); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ + secDiag_exp[jCh] = sub(invVal_e, temp_e); +#ifdef OPT_DIV_NORM + temp_e = norm_l(secDiag[jCh]); + secDiag[jCh] = L_shl(secDiag[jCh], temp_e); + secDiag_exp[jCh] = sub(secDiag_exp[jCh], temp_e); +#endif + move16(); +#endif move32(); secDiag_exp[jCh] = add( secDiag_exp[jCh], sub( sing_exp[jCh], r_e ) ); move32(); @@ -1663,11 +1932,19 @@ static void biDiagonalReductionRight_fx( { norm_x = 0; move32(); +#ifdef OPT_SUM2 + norm_x_e = -31; +#else norm_x_e = 0; +#endif move16(); FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ { +#ifdef OPT_SUM2 + norm_x = BASOP_Util_Accu_Mant32Exp( norm_x, &norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( *singularVectors_e, sing_exp[jCh] ) ); /* exp(sig_x_e) */ +#else norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( *singularVectors_e, sing_exp[jCh] ), &norm_x_e ); /* exp(norm_x_e) */ +#endif } FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ @@ -1837,8 +2114,13 @@ static void singularVectorsAccumulationLeft_fx( IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/ { +#ifdef OPT_DIV + t_ii = BASOP_Util_Inv32(maxWithSign_fx(t_ii), &temp_exp); + t_ii_e = sub(temp_exp, t_ii_e); +#else t_ii = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, maxWithSign_fx( t_ii ), &temp_exp ); /* exp(1 + (temp_exp + tii_e)) */ t_ii_e = add( 1, sub( temp_exp, t_ii_e ) ); +#endif // fprintf( fp, "%e\n", me2f( t_ii, t_ii_e ) ); FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */ { @@ -1850,9 +2132,14 @@ static void singularVectorsAccumulationLeft_fx( { norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( sing_exp2[k][nCh], sing_exp2[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */ } +#ifdef OPT_DIV + t_jj = BASOP_Util_Inv32(maxWithSign_fx(singularVectors_Left[nCh][nCh]), &temp_exp); + t_jj = Mpy_32_32(Mpy_32_32( t_ii, norm_y ), t_jj); + t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) ); +#else t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e, t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) ); - +#endif FOR( k = nCh; k < nChannelsL; k++ ) /* nChannelsL */ { singularVectors_Left[k][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[k][iCh], sing_exp2[k][iCh], Mpy_32_32( t_jj, singularVectors_Left[k][nCh] ), add( t_jj_e, sing_exp2[k][nCh] ), &sing_exp2[k][iCh] ); /* exp(sing_exp2) */ @@ -2106,6 +2393,32 @@ static void singularVectorsAccumulationRight( *-------------------------------------------------------------------------*/ #ifdef IVAS_FLOAT_FIXED + +#ifdef OPT_GIVENS_INV +static void GivensRotation2_fx( + const Word32 x, /* exp(x_e) */ + const Word16 x_e, + const Word32 z, /* exp(z_e) */ + const Word16 z_e, + Word32 *result, + Word32 *resultInv, + Word16 *out_e, + Word16 *outInv_e ) +{ + Word32 r; + + r = BASOP_Util_Add_Mant32Exp( Mpy_32_32(z, z), shl(z_e, 1), Mpy_32_32(x, x), shl(x_e, 1), out_e ); + r = L_max(r, 1); + *outInv_e = *out_e; + move16(); + *result = Sqrt32(r, out_e); + move32(); + + *resultInv = ISqrt32(r, outInv_e); + move32(); +} +#endif + static Word32 GivensRotation_fx( const Word32 x, /* exp(x_e) */ const Word16 x_e, @@ -2113,10 +2426,19 @@ static Word32 GivensRotation_fx( const Word16 z_e, Word16 *out_e ) { +#ifdef OPT_GIVENS + Word32 r; +#else Word32 x_abs, z_abs; Word32 cotan, tan, r; Word16 temp_exp; Word32 L_temp; +#endif + +#ifdef OPT_GIVENS + r = BASOP_Util_Add_Mant32Exp( Mpy_32_32(z, z), shl(z_e, 1), Mpy_32_32(x, x), shl(x_e, 1), out_e ); + r = Sqrt32(r, out_e); +#else x_abs = L_abs( x ); z_abs = L_abs( z ); test(); @@ -2159,7 +2481,7 @@ static Word32 GivensRotation_fx( *out_e = add( z_e, temp_exp ); } } - +#endif return ( r ); } #else -- GitLab From ca43c4f56e9d4757259284776c994502b8bce90e Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Mon, 18 Nov 2024 16:50:54 +0100 Subject: [PATCH 02/41] Issue #1010 : Add output normalization and x==0 handling to BASOP_Util_Inv32 to prevent instabilities for some operating points. --- lib_com/basop_util.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index a0624a6b4..2aba88d15 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -1044,7 +1044,14 @@ Word32 div_w( Word32 L_num, Word32 L_den ) static Word32 BASOP_Util_Inv32(Word32 x, Word16 *px_e) { - Word16 sign, shift; + Word16 sign, shift, shift2; + + /* Avoid result 0 with inconvenient exponent returned. */ + IF( x == (Word32) 0 ) + { + *px_e = 0; + return ( (Word32) 0 ); + } sign = 0; move16(); @@ -1061,7 +1068,9 @@ Word32 BASOP_Util_Inv32(Word32 x, Word16 *px_e) move16(); x = ISqrt32norm(x, px_e); x = Mpy_32_32(x, x); - *px_e = add(shl(*px_e, 1), shift); + shift2 = norm_l(x); + x = L_shl(x, shift2); + *px_e = add(shl(*px_e, 1), sub(shift, shift2)); move16(); if (sign) { -- GitLab From c41a883c2eaf587965e42fb4a0b7e3386f01c219 Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Mon, 18 Nov 2024 16:50:54 +0100 Subject: [PATCH 03/41] Issue #1010 : Add output normalization and x==0 handling to BASOP_Util_Inv32 to prevent instabilities for some operating points. --- lib_com/basop_util.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index 2aba88d15..852b4d225 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -1049,8 +1049,8 @@ Word32 BASOP_Util_Inv32(Word32 x, Word16 *px_e) /* Avoid result 0 with inconvenient exponent returned. */ IF( x == (Word32) 0 ) { - *px_e = 0; - return ( (Word32) 0 ); + *px_e = 31; + return ( (Word32) MAX_32 ); } sign = 0; @@ -1086,6 +1086,13 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) #ifdef OPT_BASOP_Util_Divide3232_Scale_cadence Word16 shift, s2; + + IF( x == (Word32) 0 ) + { + *s = 0; + return ( (Word32) 0 ); + } + z = BASOP_Util_Inv32(y, &s2); shift = norm_l(x); z = Mpy_32_32_r(L_shl(x, shift), z); -- GitLab From dc90a03f1490460ddc8bf23124805701eada21c2 Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Tue, 26 Nov 2024 14:59:49 +0100 Subject: [PATCH 04/41] Remove attempts to optimize normalized additions. --- lib_dec/ivas_svd_dec.c | 110 ++--------------------------------------- 1 file changed, 5 insertions(+), 105 deletions(-) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index a89e2ee3d..9c8ae6ad4 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -66,8 +66,6 @@ #if 1 #define OPT_DIV #define OPT_DIV_NORM /* 5 dB SNR precision improvement */ -#define OPT_SUM /* Very little WMOPS savings */ -#define OPT_SUM2 /* Very little WMOPS savings */ #define OPT_GIVENS #define OPT_GIVENS_INV @@ -1406,67 +1404,6 @@ Word32 BASOP_Util_Inv32(Word32 x, Word16 *px_e) } #endif -#ifdef OPT_SUM -static -Word32 BASOP_Util_Accu_Mant32Exp /* o : normalized result mantissa */ - ( Word32 a_m, /* i : Mantissa of 1st operand a */ - Word16 *p_a_e, /* i/o : Exponent of 1st operand a */ - Word32 b_m, /* i : Mantissa of 2nd operand b */ - Word16 b_e /* i : Exponent of 2nd operand b */ - ) -{ - Word32 L_tmp; - Word16 shift, a_e = *p_a_e; - - /* Compare exponents: the difference is limited to +/- 30 - The Word32 mantissa of the operand with lower exponent is shifted right by the exponent difference. - Then, the unshifted mantissa of the operand with the higher exponent is added. The addition result - is normalized and the result represents the mantissa to return. The returned exponent takes into - account all shift operations. - */ - -#if 0 - if ( !a_m ) - a_e = add( b_e, 0 ); -#endif - if ( !b_m ) - b_e = add( a_e, 0 ); - - shift = sub( a_e, b_e ); -#if 0 - shift = s_max( -31, shift ); - shift = s_min( 31, shift ); -#endif - if ( shift < 0 ) - { - /* exponent of b is greater than exponent of a, shr a_m */ - a_m = L_shl( a_m, shift ); - } - if ( shift > 0 ) - { - /* exponent of a is greater than exponent of b */ - b_m = L_shr( b_m, shift ); - } - a_e = add( s_max( a_e, b_e ), 1 ); - L_tmp = L_add( L_shr( a_m, 1 ), L_shr( b_m, 1 ) ); -#if 1 - shift = norm_l( L_tmp ); - if ( shift ) - L_tmp = L_shl( L_tmp, shift ); -#if 0 - if ( L_tmp == 0 ) - a_e = add( 0, 0 ); -#endif - if ( L_tmp != 0 ) - a_e = sub( a_e, shift ); -#endif - *p_a_e = a_e; - - return ( L_tmp ); -} - -#endif - #ifdef IVAS_FLOAT_FIXED static void biDiagonalReductionLeft_fx( Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ @@ -1523,21 +1460,12 @@ move32(); IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ { -#ifdef OPT_SUM2 - *sig_x_e = -31; - move16(); -#endif - idx = currChannel; move16(); FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { -#ifdef OPT_SUM2 - ( *sig_x ) = BASOP_Util_Accu_Mant32Exp( *sig_x, sig_x_e, L_abs( singularVectors[jCh][currChannel] ), *singularVectors_e ); /* exp(sig_x_e) */ -#else ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[jCh][currChannel] ), *singularVectors_e, sig_x_e ); /* exp(sig_x_e) */ -#endif } IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */ @@ -1553,11 +1481,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ #endif norm_x = 0; move32(); -#ifdef OPT_SUM2 - norm_x_e = -31; -#else norm_x_e = 0; -#endif move16(); FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { @@ -1578,11 +1502,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ move32(); sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) ); move16(); -#ifdef OPT_SUM2 - norm_x = BASOP_Util_Accu_Mant32Exp( norm_x, &norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ) ); /* exp(norm_x_e) */ -#else norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ -#endif } IF( GT_16( norm_x_e, 0 ) ) { @@ -1628,19 +1548,11 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ { norm_x = 0; move32(); -#ifdef OPT_SUM2 - norm_x_e = -31; -#else norm_x_e = 0; -#endif move16(); FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { -#ifdef OPT_SUM2 - norm_x = BASOP_Util_Accu_Mant32Exp( norm_x, &norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( sing_exp[jCh], *singularVectors_e )); /* exp(norm_x_e) */ -#else norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( sing_exp[jCh], *singularVectors_e ), &norm_x_e ); /* exp(norm_x_e) */ -#endif } #ifndef OPT_DIV @@ -1821,18 +1733,9 @@ static void biDiagonalReductionRight_fx( { idx = add( currChannel, 1 ); /* Q0 */ -#ifdef OPT_SUM - *sig_x_e = -31; - move16(); -#endif - FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ { -#ifdef OPT_SUM - ( *sig_x ) = BASOP_Util_Accu_Mant32Exp( *sig_x, sig_x_e, L_abs( singularVectors[currChannel][jCh] ), *singularVectors_e ); /* exp(sig_x_e) */ -#else ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[currChannel][jCh] ), *singularVectors_e, sig_x_e ); /* exp(sig_x_e) */ -#endif } IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */ @@ -1932,19 +1835,11 @@ static void biDiagonalReductionRight_fx( { norm_x = 0; move32(); -#ifdef OPT_SUM2 - norm_x_e = -31; -#else norm_x_e = 0; -#endif move16(); FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ { -#ifdef OPT_SUM2 - norm_x = BASOP_Util_Accu_Mant32Exp( norm_x, &norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( *singularVectors_e, sing_exp[jCh] ) ); /* exp(sig_x_e) */ -#else norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( *singularVectors_e, sing_exp[jCh] ), &norm_x_e ); /* exp(norm_x_e) */ -#endif } FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ @@ -2280,8 +2175,13 @@ static void singularVectorsAccumulationRight_fx( FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC*/ { +#ifdef OPT_DIVno + ratio_float = BASOP_Util_Divide3232_Scale( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ); /* exp(temp_exp1) */ + singularVectors_Right[iCh][nCh] = BASOP_Util_Divide3232_Scale( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] ); /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */ +#else ratio_float = BASOP_Util_Divide3232_Scale_cadence( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ); /* exp(temp_exp1) */ singularVectors_Right[iCh][nCh] = BASOP_Util_Divide3232_Scale_cadence( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] ); /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */ +#endif move32(); sing_right_exp[iCh][nCh] = add( sing_right_exp[iCh][nCh], sub( temp_exp1, secDiag_e ) ); move16(); -- GitLab From ea2c4fda65a13494fbb3ba346ee639eeb47d1646 Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Tue, 26 Nov 2024 17:07:47 +0100 Subject: [PATCH 05/41] Revert OPT_BASOP_Util_Divide3232_Scale_cadence optimization attempt. --- lib_com/basop_util.c | 61 +------------------------------------------- 1 file changed, 1 insertion(+), 60 deletions(-) diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index 852b4d225..c465428fc 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -1038,68 +1038,9 @@ Word32 div_w( Word32 L_num, Word32 L_den ) } } -#define OPT_BASOP_Util_Divide3232_Scale_cadence - -#ifdef OPT_BASOP_Util_Divide3232_Scale_cadence -static -Word32 BASOP_Util_Inv32(Word32 x, Word16 *px_e) -{ - Word16 sign, shift, shift2; - - /* Avoid result 0 with inconvenient exponent returned. */ - IF( x == (Word32) 0 ) - { - *px_e = 31; - return ( (Word32) MAX_32 ); - } - - sign = 0; - move16(); - if (x < 0) { - sign = 1; - } - if (sign) { - x = L_negate(x); - } - - shift = norm_l(x); - x = L_shl(x, shift); - *px_e = 0; - move16(); - x = ISqrt32norm(x, px_e); - x = Mpy_32_32(x, x); - shift2 = norm_l(x); - x = L_shl(x, shift2); - *px_e = add(shl(*px_e, 1), sub(shift, shift2)); - move16(); - - if (sign) { - x = L_negate(x); - } - return x; -} -#endif - Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) { Word32 z; - -#ifdef OPT_BASOP_Util_Divide3232_Scale_cadence - Word16 shift, s2; - - IF( x == (Word32) 0 ) - { - *s = 0; - return ( (Word32) 0 ); - } - - z = BASOP_Util_Inv32(y, &s2); - shift = norm_l(x); - z = Mpy_32_32_r(L_shl(x, shift), z); - *s = sub(s2, shift); - move16(); -#else - Word16 sx; Word16 sy; Word32 sign; @@ -1145,7 +1086,7 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) { z = L_negate( z ); } -#endif + return z; } -- GitLab From e643e42ec48317f55068318bc431a3549d822ed0 Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Wed, 27 Nov 2024 09:40:25 +0100 Subject: [PATCH 06/41] Correct and activate optimization using BASOP_Util_Divide3232_Scale. --- lib_dec/ivas_svd_dec.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index 9c8ae6ad4..8e526d283 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -2175,9 +2175,9 @@ static void singularVectorsAccumulationRight_fx( FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC*/ { -#ifdef OPT_DIVno - ratio_float = BASOP_Util_Divide3232_Scale( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ); /* exp(temp_exp1) */ - singularVectors_Right[iCh][nCh] = BASOP_Util_Divide3232_Scale( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] ); /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */ +#ifdef OPT_DIV + ratio_float = L_deposit_h(BASOP_Util_Divide3232_Scale( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 )); /* exp(temp_exp1) */ + singularVectors_Right[iCh][nCh] = L_deposit_h(BASOP_Util_Divide3232_Scale( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] )); /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */ #else ratio_float = BASOP_Util_Divide3232_Scale_cadence( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ); /* exp(temp_exp1) */ singularVectors_Right[iCh][nCh] = BASOP_Util_Divide3232_Scale_cadence( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] ); /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */ -- GitLab From 5b3c28cc10907ad673f6267882bcfa9f00764fcd Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Wed, 27 Nov 2024 10:59:47 +0100 Subject: [PATCH 07/41] Use correct macro naming scheme. --- lib_dec/ivas_svd_dec.c | 72 +++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index 8e526d283..339bf0d5d 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -64,11 +64,11 @@ #endif #if 1 -#define OPT_DIV -#define OPT_DIV_NORM /* 5 dB SNR precision improvement */ +#define FIX_1010_OPT_DIV +#define FIX_1010_OPT_DIV_NORM /* precision improvement */ -#define OPT_GIVENS -#define OPT_GIVENS_INV +#define FIX_1010_OPT_GIVENS +#define FIX_1010_OPT_GIVENS_INV #endif /*-----------------------------------------------------------------------* @@ -206,7 +206,7 @@ static void ApplyRotation_fx( const Word16 nChannels /* Q0 */ ); -#ifdef OPT_GIVENS_INV +#ifdef FIX_1010_OPT_GIVENS_INV static void GivensRotation2_fx( const Word32 x, /* exp(x_e) */ const Word16 x_e, @@ -638,7 +638,7 @@ static Word16 BidagonalDiagonalisation_fx( Word16 convergence, iteration, found_split; Word16 error = 0; move16(); -#ifdef OPT_GIVENS_INV +#ifdef FIX_1010_OPT_GIVENS_INV Word32 temp; #endif Word16 singularValues_new_e[MAX_OUTPUT_CHANNELS], secDiag_new_e[MAX_OUTPUT_CHANNELS]; @@ -711,7 +711,7 @@ static Word16 BidagonalDiagonalisation_fx( c = singularValues_fx[kCh]; /* exp(singularValues_new_e) */ c_e = singularValues_new_e[kCh]; -#ifdef OPT_GIVENS_INV +#ifdef FIX_1010_OPT_GIVENS_INV GivensRotation2_fx( g, g_e, singularValues_fx[kCh], singularValues_new_e[kCh], &singularValues_fx[kCh], &temp, &singularValues_new_e[kCh], &temp_exp ); /* exp(singularValues_new_e) */ c = Mpy_32_32( c, temp ); c_e = add(c_e, temp_exp); @@ -726,7 +726,7 @@ static Word16 BidagonalDiagonalisation_fx( c_e = 0; move16(); } -#ifdef OPT_GIVENS_INV +#ifdef FIX_1010_OPT_GIVENS_INV s = Mpy_32_32( -g, temp ); s_e = add( g_e, temp_exp ); #else @@ -940,7 +940,7 @@ static void ApplyQRTransform_fx( const Word16 nChannelsC /* i : number of columns in the matrix to be decomposed Q0*/ ) { -#ifdef OPT_GIVENS_INV +#ifdef FIX_1010_OPT_GIVENS_INV Word32 temp; Word16 temp_e; #endif @@ -1046,7 +1046,7 @@ static void ApplyQRTransform_fx( g = Mpy_32_32( c, secDiag[ch + 1] ); /* exp(c_e + secDiag_e) */ g_e = add( c_e, secDiag_e[ch + 1] ); -#ifdef OPT_GIVENS_INV +#ifdef FIX_1010_OPT_GIVENS_INV GivensRotation2_fx( d, d_e, r, r_e, &secDiag[ch], &temp, &secDiag_e[ch], &temp_e ); /* exp(secDiag_e) */ c = Mpy_32_32( d, temp); c_e = add(temp_e, d_e); @@ -1062,7 +1062,7 @@ static void ApplyQRTransform_fx( c_e = 0; move16(); } -#ifdef OPT_GIVENS_INV +#ifdef FIX_1010_OPT_GIVENS_INV s = Mpy_32_32( r, temp ); s_e = add(r_e, temp_e); #else @@ -1089,7 +1089,7 @@ static void ApplyQRTransform_fx( // ApplyRotation(singularVectors_Right, c, s, x_ii, aux, &d, &g, ch + 1, ch, nChannelsC); ApplyRotation_fx( singularVectors_Right, c, c_e, s, s_e, x_ii, x_ii_e, aux, aux_e, &d, &d_e, &g, &g_e, ch + 1, ch, nChannelsC ); -#ifdef OPT_GIVENS_INV +#ifdef FIX_1010_OPT_GIVENS_INV GivensRotation2_fx( d, d_e, r, r_e, &singularValues[ch], &aux, &singularValues_e[ch], &aux_e ); /* exp(singularValues_e) */ #else singularValues[ch] = GivensRotation_fx( d, d_e, r, r_e, &singularValues_e[ch] ); /* exp(singularValues_e) */ @@ -1097,7 +1097,7 @@ static void ApplyQRTransform_fx( #endif IF( GT_32( L_abs( singularValues[ch] ), Mpy_32_32( CONVERGENCE_FACTOR_FX, L_abs( singularValues[ch] ) ) ) ) { -#ifndef OPT_GIVENS_INV +#ifndef FIX_1010_OPT_GIVENS_INV aux = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, singularValues[ch], &aux_e ); /* exp(aux_e + (1 - singularValues_e)) */ aux_e = add( aux_e, sub( 1, singularValues_e[ch] ) ); #endif @@ -1373,7 +1373,7 @@ static void HouseholderReduction( * *-------------------------------------------------------------------------*/ -#ifdef OPT_DIV +#ifdef FIX_1010_OPT_DIV static Word32 BASOP_Util_Inv32(Word32 x, Word16 *px_e) { @@ -1470,10 +1470,10 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */ { -#ifdef OPT_DIV +#ifdef FIX_1010_OPT_DIV Word16 invVal_e, temp_e; Word32 invVal = BASOP_Util_Inv32(maxWithSign_fx( *sig_x ), &invVal_e); -#ifdef OPT_DIV_NORM +#ifdef FIX_1010_OPT_DIV_NORM temp_e = norm_l(invVal); invVal = L_shl(invVal, temp_e); invVal_e = sub(invVal_e, temp_e); @@ -1485,14 +1485,14 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ move16(); FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { -#ifndef OPT_DIV +#ifndef FIX_1010_OPT_DIV singularVectors[jCh][currChannel] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[jCh][currChannel], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ #else temp_e = norm_l(singularVectors[jCh][currChannel]); singularVectors[jCh][currChannel] = L_shl(singularVectors[jCh][currChannel], temp_e); singularVectors[jCh][currChannel] = Mpy_32_32( singularVectors[jCh][currChannel], invVal); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ sing_exp[jCh] = sub(invVal_e, temp_e); -#ifdef OPT_DIV_NORM +#ifdef FIX_1010_OPT_DIV_NORM temp_e = norm_l(singularVectors[jCh][currChannel]); singularVectors[jCh][currChannel] = L_shl(singularVectors[jCh][currChannel], temp_e); sing_exp[jCh] = sub(sing_exp[jCh], temp_e); @@ -1535,9 +1535,9 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ #endif move32(); -#ifdef OPT_DIV +#ifdef FIX_1010_OPT_DIV invVal = BASOP_Util_Inv32(maxWithSign_fx( r ), &invVal_e); -#ifdef OPT_DIV_NORM +#ifdef FIX_1010_OPT_DIV_NORM temp_e = norm_l(invVal); invVal = L_shl(invVal, temp_e); invVal_e = sub(invVal_e, temp_e); @@ -1555,13 +1555,13 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( sing_exp[jCh], *singularVectors_e ), &norm_x_e ); /* exp(norm_x_e) */ } -#ifndef OPT_DIV +#ifndef FIX_1010_OPT_DIV f = BASOP_Util_Divide3232_Scale_cadence( norm_x, maxWithSign_fx( r ), &f_e ); /* f_e + (norm_x_e - r_e) */ f_e = add( f_e, sub( norm_x_e, r_e ) ); #else f = Mpy_32_32(norm_x, invVal); /* invVal_e + (norm_x_e - r_e) */ f_e = add(invVal_e, sub( norm_x_e, r_e ) ); -#ifdef OPT_DIV_NORM +#ifdef FIX_1010_OPT_DIV_NORM temp_e = norm_l(f); f = L_shl(f, temp_e); f_e = sub(f_e, temp_e); @@ -1745,10 +1745,10 @@ static void biDiagonalReductionRight_fx( norm_x_e = 0; move16(); -#ifdef OPT_DIV +#ifdef FIX_1010_OPT_DIV Word16 invVal_e, temp_e; Word32 invVal = BASOP_Util_Inv32(maxWithSign_fx( *sig_x ), &invVal_e); -#ifdef OPT_DIV_NORM +#ifdef FIX_1010_OPT_DIV_NORM temp_e = norm_l(invVal); invVal = L_shl(invVal, temp_e); invVal_e = sub(invVal_e, temp_e); @@ -1756,7 +1756,7 @@ static void biDiagonalReductionRight_fx( #endif FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */ { -#ifndef OPT_DIV +#ifndef FIX_1010_OPT_DIV singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e)) */ #else temp_e = norm_l(singularVectors[currChannel][jCh]); @@ -1764,7 +1764,7 @@ static void biDiagonalReductionRight_fx( singularVectors[currChannel][jCh] = Mpy_32_32( singularVectors[currChannel][jCh], invVal); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ sing_exp[jCh] = sub(invVal_e, temp_e); move16(); -#ifdef OPT_DIV_NORM +#ifdef FIX_1010_OPT_DIV_NORM temp_e = norm_l(singularVectors[currChannel][jCh]); singularVectors[currChannel][jCh] = L_shl(singularVectors[currChannel][jCh], temp_e); sing_exp[jCh] = sub(sing_exp[jCh], temp_e); @@ -1801,9 +1801,9 @@ static void biDiagonalReductionRight_fx( singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], sing_exp[idx], -( *g ), 0, &sing_exp[idx] ); /* exp(sing_exp) */ move32(); -#ifdef OPT_DIV +#ifdef FIX_1010_OPT_DIV invVal = BASOP_Util_Inv32(maxWithSign_fx( r ), &invVal_e); -#ifdef OPT_DIV_NORM +#ifdef FIX_1010_OPT_DIV_NORM temp_e = norm_l(invVal); invVal = L_shl(invVal, temp_e); invVal_e = sub(invVal_e, temp_e); @@ -1812,14 +1812,14 @@ static void biDiagonalReductionRight_fx( FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ { -#ifndef OPT_DIV +#ifndef FIX_1010_OPT_DIV secDiag[jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( r ), &secDiag_exp[jCh] ); /* exp(secDiag_exp + (sing_exp - r_e) */ #else temp_e = norm_l(singularVectors[currChannel][jCh]); secDiag[jCh] = L_shl(singularVectors[currChannel][jCh], temp_e); secDiag[jCh] = Mpy_32_32( secDiag[jCh], invVal); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ secDiag_exp[jCh] = sub(invVal_e, temp_e); -#ifdef OPT_DIV_NORM +#ifdef FIX_1010_OPT_DIV_NORM temp_e = norm_l(secDiag[jCh]); secDiag[jCh] = L_shl(secDiag[jCh], temp_e); secDiag_exp[jCh] = sub(secDiag_exp[jCh], temp_e); @@ -2009,7 +2009,7 @@ static void singularVectorsAccumulationLeft_fx( IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/ { -#ifdef OPT_DIV +#ifdef FIX_1010_OPT_DIV t_ii = BASOP_Util_Inv32(maxWithSign_fx(t_ii), &temp_exp); t_ii_e = sub(temp_exp, t_ii_e); #else @@ -2027,7 +2027,7 @@ static void singularVectorsAccumulationLeft_fx( { norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( sing_exp2[k][nCh], sing_exp2[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */ } -#ifdef OPT_DIV +#ifdef FIX_1010_OPT_DIV t_jj = BASOP_Util_Inv32(maxWithSign_fx(singularVectors_Left[nCh][nCh]), &temp_exp); t_jj = Mpy_32_32(Mpy_32_32( t_ii, norm_y ), t_jj); t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) ); @@ -2175,7 +2175,7 @@ static void singularVectorsAccumulationRight_fx( FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC*/ { -#ifdef OPT_DIV +#ifdef FIX_1010_OPT_DIV ratio_float = L_deposit_h(BASOP_Util_Divide3232_Scale( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 )); /* exp(temp_exp1) */ singularVectors_Right[iCh][nCh] = L_deposit_h(BASOP_Util_Divide3232_Scale( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] )); /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */ #else @@ -2294,7 +2294,7 @@ static void singularVectorsAccumulationRight( #ifdef IVAS_FLOAT_FIXED -#ifdef OPT_GIVENS_INV +#ifdef FIX_1010_OPT_GIVENS_INV static void GivensRotation2_fx( const Word32 x, /* exp(x_e) */ const Word16 x_e, @@ -2326,7 +2326,7 @@ static Word32 GivensRotation_fx( const Word16 z_e, Word16 *out_e ) { -#ifdef OPT_GIVENS +#ifdef FIX_1010_OPT_GIVENS Word32 r; #else Word32 x_abs, z_abs; @@ -2335,7 +2335,7 @@ static Word32 GivensRotation_fx( Word32 L_temp; #endif -#ifdef OPT_GIVENS +#ifdef FIX_1010_OPT_GIVENS r = BASOP_Util_Add_Mant32Exp( Mpy_32_32(z, z), shl(z_e, 1), Mpy_32_32(x, x), shl(x_e, 1), out_e ); r = Sqrt32(r, out_e); #else -- GitLab From 8f9f193c013cdc5e7472c9ef7ec61ba30cb233f0 Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Wed, 27 Nov 2024 11:47:22 +0100 Subject: [PATCH 08/41] Merge format check patch from merge request pipeline. --- lib_dec/ivas_svd_dec.c | 142 +++++++++++++++++++++-------------------- 1 file changed, 72 insertions(+), 70 deletions(-) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index 339bf0d5d..67a38f9e3 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -714,7 +714,7 @@ static Word16 BidagonalDiagonalisation_fx( #ifdef FIX_1010_OPT_GIVENS_INV GivensRotation2_fx( g, g_e, singularValues_fx[kCh], singularValues_new_e[kCh], &singularValues_fx[kCh], &temp, &singularValues_new_e[kCh], &temp_exp ); /* exp(singularValues_new_e) */ c = Mpy_32_32( c, temp ); - c_e = add(c_e, temp_exp); + c_e = add( c_e, temp_exp ); #else singularValues_fx[kCh] = GivensRotation_fx( g, g_e, singularValues_fx[kCh], singularValues_new_e[kCh], &singularValues_new_e[kCh] ); /* exp(singularValues_new_e) */ c = BASOP_Util_Divide3232_Scale_cadence( c, maxWithSign_fx( singularValues_fx[kCh] ), &temp_exp ); /* exp(temp_exp + (c_e - singularValues_new_e)) */ @@ -1048,8 +1048,8 @@ static void ApplyQRTransform_fx( #ifdef FIX_1010_OPT_GIVENS_INV GivensRotation2_fx( d, d_e, r, r_e, &secDiag[ch], &temp, &secDiag_e[ch], &temp_e ); /* exp(secDiag_e) */ - c = Mpy_32_32( d, temp); - c_e = add(temp_e, d_e); + c = Mpy_32_32( d, temp ); + c_e = add( temp_e, d_e ); #else secDiag[ch] = GivensRotation_fx( d, d_e, r, r_e, &secDiag_e[ch] ); /* exp(secDiag_e) */ move32(); @@ -1064,7 +1064,7 @@ static void ApplyQRTransform_fx( } #ifdef FIX_1010_OPT_GIVENS_INV s = Mpy_32_32( r, temp ); - s_e = add(r_e, temp_e); + s_e = add( r_e, temp_e ); #else s = BASOP_Util_Divide3232_Scale_cadence( r, maxWithSign_fx( secDiag[ch] ), &s_e ); /* exp(s_e + (r_e - sec_Diag_e))*/ s_e = add( s_e, sub( r_e, secDiag_e[ch] ) ); @@ -1374,31 +1374,33 @@ static void HouseholderReduction( *-------------------------------------------------------------------------*/ #ifdef FIX_1010_OPT_DIV -static -Word32 BASOP_Util_Inv32(Word32 x, Word16 *px_e) +static Word32 BASOP_Util_Inv32( Word32 x, Word16 *px_e ) { Word16 sign, shift; sign = 0; move16(); - if (x < 0) { + if ( x < 0 ) + { sign = 1; } - if (sign) { - x = L_negate(x); + if ( sign ) + { + x = L_negate( x ); } - shift = norm_l(x); - x = L_shl(x, shift); + shift = norm_l( x ); + x = L_shl( x, shift ); *px_e = 0; move16(); - x = ISqrt32norm(x, px_e); - x = Mpy_32_32(x, x); - *px_e = add(shl(*px_e, 1), shift); + x = ISqrt32norm( x, px_e ); + x = Mpy_32_32( x, x ); + *px_e = add( shl( *px_e, 1 ), shift ); move16(); - if (sign) { - x = L_negate(x); + if ( sign ) + { + x = L_negate( x ); } return x; } @@ -1472,11 +1474,11 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ { #ifdef FIX_1010_OPT_DIV Word16 invVal_e, temp_e; - Word32 invVal = BASOP_Util_Inv32(maxWithSign_fx( *sig_x ), &invVal_e); + Word32 invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e ); #ifdef FIX_1010_OPT_DIV_NORM - temp_e = norm_l(invVal); - invVal = L_shl(invVal, temp_e); - invVal_e = sub(invVal_e, temp_e); + temp_e = norm_l( invVal ); + invVal = L_shl( invVal, temp_e ); + invVal_e = sub( invVal_e, temp_e ); #endif #endif norm_x = 0; @@ -1488,14 +1490,14 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ #ifndef FIX_1010_OPT_DIV singularVectors[jCh][currChannel] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[jCh][currChannel], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ #else - temp_e = norm_l(singularVectors[jCh][currChannel]); - singularVectors[jCh][currChannel] = L_shl(singularVectors[jCh][currChannel], temp_e); - singularVectors[jCh][currChannel] = Mpy_32_32( singularVectors[jCh][currChannel], invVal); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ - sing_exp[jCh] = sub(invVal_e, temp_e); + temp_e = norm_l( singularVectors[jCh][currChannel] ); + singularVectors[jCh][currChannel] = L_shl( singularVectors[jCh][currChannel], temp_e ); + singularVectors[jCh][currChannel] = Mpy_32_32( singularVectors[jCh][currChannel], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ + sing_exp[jCh] = sub( invVal_e, temp_e ); #ifdef FIX_1010_OPT_DIV_NORM - temp_e = norm_l(singularVectors[jCh][currChannel]); - singularVectors[jCh][currChannel] = L_shl(singularVectors[jCh][currChannel], temp_e); - sing_exp[jCh] = sub(sing_exp[jCh], temp_e); + temp_e = norm_l( singularVectors[jCh][currChannel] ); + singularVectors[jCh][currChannel] = L_shl( singularVectors[jCh][currChannel], temp_e ); + sing_exp[jCh] = sub( sing_exp[jCh], temp_e ); #endif move16(); #endif @@ -1536,11 +1538,11 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ move32(); #ifdef FIX_1010_OPT_DIV - invVal = BASOP_Util_Inv32(maxWithSign_fx( r ), &invVal_e); + invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e ); #ifdef FIX_1010_OPT_DIV_NORM - temp_e = norm_l(invVal); - invVal = L_shl(invVal, temp_e); - invVal_e = sub(invVal_e, temp_e); + temp_e = norm_l( invVal ); + invVal = L_shl( invVal, temp_e ); + invVal_e = sub( invVal_e, temp_e ); #endif #endif @@ -1559,12 +1561,12 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ f = BASOP_Util_Divide3232_Scale_cadence( norm_x, maxWithSign_fx( r ), &f_e ); /* f_e + (norm_x_e - r_e) */ f_e = add( f_e, sub( norm_x_e, r_e ) ); #else - f = Mpy_32_32(norm_x, invVal); /* invVal_e + (norm_x_e - r_e) */ - f_e = add(invVal_e, sub( norm_x_e, r_e ) ); + f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */ + f_e = add( invVal_e, sub( norm_x_e, r_e ) ); #ifdef FIX_1010_OPT_DIV_NORM - temp_e = norm_l(f); - f = L_shl(f, temp_e); - f_e = sub(f_e, temp_e); + temp_e = norm_l( f ); + f = L_shl( f, temp_e ); + f_e = sub( f_e, temp_e ); #endif #endif @@ -1747,11 +1749,11 @@ static void biDiagonalReductionRight_fx( #ifdef FIX_1010_OPT_DIV Word16 invVal_e, temp_e; - Word32 invVal = BASOP_Util_Inv32(maxWithSign_fx( *sig_x ), &invVal_e); + Word32 invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e ); #ifdef FIX_1010_OPT_DIV_NORM - temp_e = norm_l(invVal); - invVal = L_shl(invVal, temp_e); - invVal_e = sub(invVal_e, temp_e); + temp_e = norm_l( invVal ); + invVal = L_shl( invVal, temp_e ); + invVal_e = sub( invVal_e, temp_e ); #endif #endif FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */ @@ -1759,15 +1761,15 @@ static void biDiagonalReductionRight_fx( #ifndef FIX_1010_OPT_DIV singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e)) */ #else - temp_e = norm_l(singularVectors[currChannel][jCh]); - singularVectors[currChannel][jCh] = L_shl(singularVectors[currChannel][jCh], temp_e); - singularVectors[currChannel][jCh] = Mpy_32_32( singularVectors[currChannel][jCh], invVal); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ - sing_exp[jCh] = sub(invVal_e, temp_e); + temp_e = norm_l( singularVectors[currChannel][jCh] ); + singularVectors[currChannel][jCh] = L_shl( singularVectors[currChannel][jCh], temp_e ); + singularVectors[currChannel][jCh] = Mpy_32_32( singularVectors[currChannel][jCh], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ + sing_exp[jCh] = sub( invVal_e, temp_e ); move16(); #ifdef FIX_1010_OPT_DIV_NORM - temp_e = norm_l(singularVectors[currChannel][jCh]); - singularVectors[currChannel][jCh] = L_shl(singularVectors[currChannel][jCh], temp_e); - sing_exp[jCh] = sub(sing_exp[jCh], temp_e); + temp_e = norm_l( singularVectors[currChannel][jCh] ); + singularVectors[currChannel][jCh] = L_shl( singularVectors[currChannel][jCh], temp_e ); + sing_exp[jCh] = sub( sing_exp[jCh], temp_e ); #endif #endif move32(); @@ -1802,11 +1804,11 @@ static void biDiagonalReductionRight_fx( move32(); #ifdef FIX_1010_OPT_DIV - invVal = BASOP_Util_Inv32(maxWithSign_fx( r ), &invVal_e); + invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e ); #ifdef FIX_1010_OPT_DIV_NORM - temp_e = norm_l(invVal); - invVal = L_shl(invVal, temp_e); - invVal_e = sub(invVal_e, temp_e); + temp_e = norm_l( invVal ); + invVal = L_shl( invVal, temp_e ); + invVal_e = sub( invVal_e, temp_e ); #endif #endif @@ -1815,14 +1817,14 @@ static void biDiagonalReductionRight_fx( #ifndef FIX_1010_OPT_DIV secDiag[jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( r ), &secDiag_exp[jCh] ); /* exp(secDiag_exp + (sing_exp - r_e) */ #else - temp_e = norm_l(singularVectors[currChannel][jCh]); - secDiag[jCh] = L_shl(singularVectors[currChannel][jCh], temp_e); - secDiag[jCh] = Mpy_32_32( secDiag[jCh], invVal); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ - secDiag_exp[jCh] = sub(invVal_e, temp_e); + temp_e = norm_l( singularVectors[currChannel][jCh] ); + secDiag[jCh] = L_shl( singularVectors[currChannel][jCh], temp_e ); + secDiag[jCh] = Mpy_32_32( secDiag[jCh], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ + secDiag_exp[jCh] = sub( invVal_e, temp_e ); #ifdef FIX_1010_OPT_DIV_NORM - temp_e = norm_l(secDiag[jCh]); - secDiag[jCh] = L_shl(secDiag[jCh], temp_e); - secDiag_exp[jCh] = sub(secDiag_exp[jCh], temp_e); + temp_e = norm_l( secDiag[jCh] ); + secDiag[jCh] = L_shl( secDiag[jCh], temp_e ); + secDiag_exp[jCh] = sub( secDiag_exp[jCh], temp_e ); #endif move16(); #endif @@ -2010,8 +2012,8 @@ static void singularVectorsAccumulationLeft_fx( IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/ { #ifdef FIX_1010_OPT_DIV - t_ii = BASOP_Util_Inv32(maxWithSign_fx(t_ii), &temp_exp); - t_ii_e = sub(temp_exp, t_ii_e); + t_ii = BASOP_Util_Inv32( maxWithSign_fx( t_ii ), &temp_exp ); + t_ii_e = sub( temp_exp, t_ii_e ); #else t_ii = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, maxWithSign_fx( t_ii ), &temp_exp ); /* exp(1 + (temp_exp + tii_e)) */ t_ii_e = add( 1, sub( temp_exp, t_ii_e ) ); @@ -2028,8 +2030,8 @@ static void singularVectorsAccumulationLeft_fx( norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( sing_exp2[k][nCh], sing_exp2[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */ } #ifdef FIX_1010_OPT_DIV - t_jj = BASOP_Util_Inv32(maxWithSign_fx(singularVectors_Left[nCh][nCh]), &temp_exp); - t_jj = Mpy_32_32(Mpy_32_32( t_ii, norm_y ), t_jj); + t_jj = BASOP_Util_Inv32( maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); + t_jj = Mpy_32_32( Mpy_32_32( t_ii, norm_y ), t_jj ); t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) ); #else t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e, @@ -2176,8 +2178,8 @@ static void singularVectorsAccumulationRight_fx( FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC*/ { #ifdef FIX_1010_OPT_DIV - ratio_float = L_deposit_h(BASOP_Util_Divide3232_Scale( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 )); /* exp(temp_exp1) */ - singularVectors_Right[iCh][nCh] = L_deposit_h(BASOP_Util_Divide3232_Scale( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] )); /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */ + ratio_float = L_deposit_h( BASOP_Util_Divide3232_Scale( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ) ); /* exp(temp_exp1) */ + singularVectors_Right[iCh][nCh] = L_deposit_h( BASOP_Util_Divide3232_Scale( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] ) ); /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */ #else ratio_float = BASOP_Util_Divide3232_Scale_cadence( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ); /* exp(temp_exp1) */ singularVectors_Right[iCh][nCh] = BASOP_Util_Divide3232_Scale_cadence( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] ); /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */ @@ -2307,14 +2309,14 @@ static void GivensRotation2_fx( { Word32 r; - r = BASOP_Util_Add_Mant32Exp( Mpy_32_32(z, z), shl(z_e, 1), Mpy_32_32(x, x), shl(x_e, 1), out_e ); - r = L_max(r, 1); + r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e ); + r = L_max( r, 1 ); *outInv_e = *out_e; move16(); - *result = Sqrt32(r, out_e); + *result = Sqrt32( r, out_e ); move32(); - *resultInv = ISqrt32(r, outInv_e); + *resultInv = ISqrt32( r, outInv_e ); move32(); } #endif @@ -2336,8 +2338,8 @@ static Word32 GivensRotation_fx( #endif #ifdef FIX_1010_OPT_GIVENS - r = BASOP_Util_Add_Mant32Exp( Mpy_32_32(z, z), shl(z_e, 1), Mpy_32_32(x, x), shl(x_e, 1), out_e ); - r = Sqrt32(r, out_e); + r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e ); + r = Sqrt32( r, out_e ); #else x_abs = L_abs( x ); z_abs = L_abs( z ); -- GitLab From 578055a56aff640c37008251a37a772bd4bc371e Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Thu, 5 Dec 2024 18:40:13 +0100 Subject: [PATCH 09/41] Use alpha max plus beta min approximation for Givens Rotation. This algorithm does not require squaring nor root square and is hopefully numerically more stable, but requires a data table which size determines the precision. The pipeline result will tell if this has any future. --- lib_dec/ivas_svd_dec.c | 296 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 279 insertions(+), 17 deletions(-) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index 67a38f9e3..6386bf82d 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -65,10 +65,10 @@ #if 1 #define FIX_1010_OPT_DIV -#define FIX_1010_OPT_DIV_NORM /* precision improvement */ #define FIX_1010_OPT_GIVENS #define FIX_1010_OPT_GIVENS_INV +#define FIX_1010_OPT_GIVENS_AMAX_BMIN #endif /*-----------------------------------------------------------------------* @@ -409,6 +409,204 @@ void svdMat2mat( } #endif +//#define MORE_DEBUG + +#ifdef MORE_DEBUG + +#if (MAX_INPUT_CHANNELS > MAX_OUTPUT_CHANNELS) +#define MAX_MATRIX MAX_INPUT_CHANNELS +#else +#define MAX_MATRIX MAX_OUTPUT_CHANNELS +#endif + +static void matrixFx2Fl( + float r[][MAX_MATRIX], + const Word32 a[][MAX_MATRIX], + const Word16 a_e[MAX_MATRIX], + const int adim1, + const int adim2) +{ + for (int i1=0; i1= 0) && (r < NUM_REGIONS)); + *alpha = alphaBeta[r][0]; + *beta = alphaBeta[r][1]; +} +#endif + #ifdef FIX_1010_OPT_GIVENS_INV static void GivensRotation2_fx( const Word32 x, /* exp(x_e) */ @@ -2308,7 +2542,32 @@ static void GivensRotation2_fx( Word16 *outInv_e ) { Word32 r; +#ifdef FIX_1010_OPT_GIVENS_AMAX_BMIN + Word32 az, ax, a, b; + + ax = L_abs(x); + az = L_abs(z); + IF (BASOP_Util_Cmp_Mant32Exp(ax, x_e, az, z_e) > 0) { + get_alpha_beta(ax, x_e, az, z_e, &a, &b); + r = BASOP_Util_Add_Mant32Exp(Mpy_32_32(ax, a), x_e, Mpy_32_32(az, b), z_e, out_e); + } ELSE { + get_alpha_beta(az, z_e, ax, x_e, &a, &b); + r = BASOP_Util_Add_Mant32Exp(Mpy_32_32(az, a), z_e, Mpy_32_32(ax, b), x_e, out_e); + } + *result = r; + move32(); +#if 1 + *outInv_e = shl(*out_e, 1); + *resultInv = ISqrt32( Mpy_32_32(r, r), outInv_e ); + move32(); +#else + *resultInv = L_deposit_h(BASOP_Util_Divide3232_Scale(MAX_32, r, outInv_e)); + move32(); + *outInv_e = sub(*outInv_e, *out_e); + move16(); +#endif +#else r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e ); r = L_max( r, 1 ); *outInv_e = *out_e; @@ -2318,6 +2577,9 @@ static void GivensRotation2_fx( *resultInv = ISqrt32( r, outInv_e ); move32(); +#endif + + pop_wmops(); } #endif -- GitLab From b324bfbc47d77ad6236c24d25cc888f34310b283 Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Thu, 5 Dec 2024 18:47:37 +0100 Subject: [PATCH 10/41] Fix missing include and second Givens Rotation case. --- lib_dec/ivas_svd_dec.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index 6386bf82d..d64901834 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -2491,6 +2491,7 @@ static void singularVectorsAccumulationRight( #ifdef IVAS_FLOAT_FIXED #ifdef FIX_1010_OPT_GIVENS_AMAX_BMIN +#include /* for M_PI */ #define NUM_REGIONS 1024 static Word32 alphaBeta[NUM_REGIONS][2]; static void get_alpha_beta(Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 *alpha, Word32 *beta) @@ -2600,8 +2601,22 @@ static Word32 GivensRotation_fx( #endif #ifdef FIX_1010_OPT_GIVENS +#ifdef FIX_1010_OPT_GIVENS_AMAX_BMIN + Word32 az, ax, a, b; + + ax = L_abs(x); + az = L_abs(z); + IF (BASOP_Util_Cmp_Mant32Exp(ax, x_e, az, z_e) > 0) { + get_alpha_beta(ax, x_e, az, z_e, &a, &b); + r = BASOP_Util_Add_Mant32Exp(Mpy_32_32(ax, a), x_e, Mpy_32_32(az, b), z_e, out_e); + } ELSE { + get_alpha_beta(az, z_e, ax, x_e, &a, &b); + r = BASOP_Util_Add_Mant32Exp(Mpy_32_32(az, a), z_e, Mpy_32_32(ax, b), x_e, out_e); + } +#else r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e ); r = Sqrt32( r, out_e ); +#endif #else x_abs = L_abs( x ); z_abs = L_abs( z ); -- GitLab From d46a16dbffd5301cd8ab0f0d06bb4b59b486f774 Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Thu, 5 Dec 2024 18:56:19 +0100 Subject: [PATCH 11/41] define M_PI for the time being. --- lib_dec/ivas_svd_dec.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index d64901834..b9ab85205 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -2491,7 +2491,9 @@ static void singularVectorsAccumulationRight( #ifdef IVAS_FLOAT_FIXED #ifdef FIX_1010_OPT_GIVENS_AMAX_BMIN -#include /* for M_PI */ +#ifndef M_PI +#define M_PI 3.141592653589793 +#endif #define NUM_REGIONS 1024 static Word32 alphaBeta[NUM_REGIONS][2]; static void get_alpha_beta(Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 *alpha, Word32 *beta) -- GitLab From 95b2b53084d111db3d9957e2c36bda3bfdd3ecb9 Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Mon, 9 Dec 2024 12:51:28 +0100 Subject: [PATCH 12/41] Fix: remove stray pop_wmops --- lib_dec/ivas_svd_dec.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index 1d9e1732e..2712042c0 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -1860,8 +1860,6 @@ static void GivensRotation2_fx( *resultInv = ISqrt32( r, outInv_e ); move32(); #endif - - pop_wmops(); } #endif -- GitLab From ee9a52242e0a6427501673b99df94b0087510a71 Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Tue, 10 Dec 2024 16:36:50 +0100 Subject: [PATCH 13/41] Disable FIX_1010_OPT_DIV for testing. --- lib_dec/ivas_svd_dec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index 2712042c0..b1b8dbe70 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -52,7 +52,7 @@ #define CONVERGENCE_FACTOR_FX 214748 /* factor for SVD convergence (as per latest float code: 1.0e-04f) */ #if 1 -#define FIX_1010_OPT_DIV +//#define FIX_1010_OPT_DIV #define FIX_1010_OPT_GIVENS #define FIX_1010_OPT_GIVENS_INV -- GitLab From 707aa4289029202111b30c3204c1d6c8efbed650 Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Wed, 11 Dec 2024 17:44:57 +0100 Subject: [PATCH 14/41] Activate division optimizations except one which for some reasons causes more error in testset. Optimize get_alpha_beta() index calculation, more precision and less WMOPS. --- lib_dec/ivas_svd_dec.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index b1b8dbe70..07c2b3100 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -52,7 +52,7 @@ #define CONVERGENCE_FACTOR_FX 214748 /* factor for SVD convergence (as per latest float code: 1.0e-04f) */ #if 1 -//#define FIX_1010_OPT_DIV +#define FIX_1010_OPT_DIV #define FIX_1010_OPT_GIVENS #define FIX_1010_OPT_GIVENS_INV @@ -1430,7 +1430,7 @@ static void biDiagonalReductionRight_fx( #endif FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */ { -#ifndef FIX_1010_OPT_DIV +#ifndef FIX_1010_OPT_DIV_no singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e)) */ #else temp_e = norm_l( singularVectors[currChannel][jCh] ); @@ -1773,7 +1773,7 @@ static void singularVectorsAccumulationRight_fx( #ifndef M_PI #define M_PI 3.141592653589793 #endif -#define NUM_REGIONS 1024 +#define NUM_REGIONS 32 static Word32 alphaBeta[NUM_REGIONS][2]; static void get_alpha_beta(Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 *alpha, Word32 *beta) { @@ -1799,13 +1799,22 @@ static void get_alpha_beta(Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 *a pf = (float)p * powf(2.f, p_e-31); qf = (float)q * powf(2.f, q_e-31); r = floor((double)NUM_REGIONS * 4. * atan2f(qf, pf)/M_PI); -#else - shift = sub(p_e, q_e); - r = mult_r( atan2_fx(L_shr(q, s_max(0, shift)), L_shr(p, s_max(0, negate(shift)))), FL2WORD16_SCALE((float)NUM_REGIONS*4./M_PI, 14)); -#endif - if (r == NUM_REGIONS) { + if (r >= NUM_REGIONS) { r = NUM_REGIONS-1; } +#elif 1 + shift = sub(norm_l(q),1); + q = L_shl(q, shift); + q_e = sub(q_e, shift); + shift = norm_l(p); + p = L_shl(p, shift); + p_e = sub(p_e, shift); + shift = sub(q_e, p_e); + r = shl(div_s(extract_h(q), extract_h(p)), shift); + /* Second order polyfit of atan(r)/(pi/4) for r=0..1 */ + r = add(add(mult(mult(r,r), FL2WORD16_SCALE(-3.672563685340096e-01, 3)), mult(r, FL2WORD16_SCALE(1.375369641423651e+00, 3))), FL2WORD16_SCALE(-6.529424378422714e-03, 3)); + r = s_min(s_max(0, shr(r, 4+3)), NUM_REGIONS-1); +#endif assert((r >= 0) && (r < NUM_REGIONS)); *alpha = alphaBeta[r][0]; *beta = alphaBeta[r][1]; @@ -1840,7 +1849,7 @@ static void GivensRotation2_fx( move32(); #if 1 *outInv_e = shl(*out_e, 1); - *resultInv = ISqrt32( Mpy_32_32(r, r), outInv_e ); + *resultInv = ISqrt32( L_max(1, Mpy_32_32(r, r)), outInv_e ); move32(); #else *resultInv = L_deposit_h(BASOP_Util_Divide3232_Scale(MAX_32, r, outInv_e)); -- GitLab From adacb91ce9de16704331eecda0f3e09ea3c1600a Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Thu, 12 Dec 2024 13:46:11 +0100 Subject: [PATCH 15/41] Apply clang-format. Disable FIX_1010_OPT_GIVENS_AMAX_BMIN and reactivate all FIX_1010_OPT_DIV for testing. --- lib_dec/ivas_svd_dec.c | 258 +++++++++++++++++++++++------------------ 1 file changed, 142 insertions(+), 116 deletions(-) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index 07c2b3100..da9140857 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -56,7 +56,7 @@ #define FIX_1010_OPT_GIVENS #define FIX_1010_OPT_GIVENS_INV -#define FIX_1010_OPT_GIVENS_AMAX_BMIN +//#define FIX_1010_OPT_GIVENS_AMAX_BMIN #endif /*-----------------------------------------------------------------------* @@ -274,11 +274,11 @@ void svdMat2mat_fx( return; } -//#define MORE_DEBUG +// #define MORE_DEBUG #ifdef MORE_DEBUG -#if (MAX_INPUT_CHANNELS > MAX_OUTPUT_CHANNELS) +#if ( MAX_INPUT_CHANNELS > MAX_OUTPUT_CHANNELS ) #define MAX_MATRIX MAX_INPUT_CHANNELS #else #define MAX_MATRIX MAX_OUTPUT_CHANNELS @@ -289,11 +289,13 @@ static void matrixFx2Fl( const Word32 a[][MAX_MATRIX], const Word16 a_e[MAX_MATRIX], const int adim1, - const int adim2) + const int adim2 ) { - for (int i1=0; i1= 0) && (r < NUM_REGIONS)); + assert( ( r >= 0 ) && ( r < NUM_REGIONS ) ); *alpha = alphaBeta[r][0]; *beta = alphaBeta[r][1]; } @@ -1836,25 +1856,28 @@ static void GivensRotation2_fx( #ifdef FIX_1010_OPT_GIVENS_AMAX_BMIN Word32 az, ax, a, b; - ax = L_abs(x); - az = L_abs(z); - IF (BASOP_Util_Cmp_Mant32Exp(ax, x_e, az, z_e) > 0) { - get_alpha_beta(ax, x_e, az, z_e, &a, &b); - r = BASOP_Util_Add_Mant32Exp(Mpy_32_32(ax, a), x_e, Mpy_32_32(az, b), z_e, out_e); - } ELSE { - get_alpha_beta(az, z_e, ax, x_e, &a, &b); - r = BASOP_Util_Add_Mant32Exp(Mpy_32_32(az, a), z_e, Mpy_32_32(ax, b), x_e, out_e); + ax = L_abs( x ); + az = L_abs( z ); + IF( BASOP_Util_Cmp_Mant32Exp( ax, x_e, az, z_e ) > 0 ) + { + get_alpha_beta( ax, x_e, az, z_e, &a, &b ); + r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ax, a ), x_e, Mpy_32_32( az, b ), z_e, out_e ); + } + ELSE + { + get_alpha_beta( az, z_e, ax, x_e, &a, &b ); + r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( az, a ), z_e, Mpy_32_32( ax, b ), x_e, out_e ); } *result = r; move32(); #if 1 - *outInv_e = shl(*out_e, 1); - *resultInv = ISqrt32( L_max(1, Mpy_32_32(r, r)), outInv_e ); + *outInv_e = shl( *out_e, 1 ); + *resultInv = ISqrt32( L_max( 1, Mpy_32_32( r, r ) ), outInv_e ); move32(); #else - *resultInv = L_deposit_h(BASOP_Util_Divide3232_Scale(MAX_32, r, outInv_e)); + *resultInv = L_deposit_h( BASOP_Util_Divide3232_Scale( MAX_32, r, outInv_e ) ); move32(); - *outInv_e = sub(*outInv_e, *out_e); + *outInv_e = sub( *outInv_e, *out_e ); move16(); #endif @@ -1892,14 +1915,17 @@ static Word32 GivensRotation_fx( #ifdef FIX_1010_OPT_GIVENS_AMAX_BMIN Word32 az, ax, a, b; - ax = L_abs(x); - az = L_abs(z); - IF (BASOP_Util_Cmp_Mant32Exp(ax, x_e, az, z_e) > 0) { - get_alpha_beta(ax, x_e, az, z_e, &a, &b); - r = BASOP_Util_Add_Mant32Exp(Mpy_32_32(ax, a), x_e, Mpy_32_32(az, b), z_e, out_e); - } ELSE { - get_alpha_beta(az, z_e, ax, x_e, &a, &b); - r = BASOP_Util_Add_Mant32Exp(Mpy_32_32(az, a), z_e, Mpy_32_32(ax, b), x_e, out_e); + ax = L_abs( x ); + az = L_abs( z ); + IF( BASOP_Util_Cmp_Mant32Exp( ax, x_e, az, z_e ) > 0 ) + { + get_alpha_beta( ax, x_e, az, z_e, &a, &b ); + r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ax, a ), x_e, Mpy_32_32( az, b ), z_e, out_e ); + } + ELSE + { + get_alpha_beta( az, z_e, ax, x_e, &a, &b ); + r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( az, a ), z_e, Mpy_32_32( ax, b ), x_e, out_e ); } #else r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e ); -- GitLab From 75d25b051f515ffe1d54da4c54f16e0d52f01691 Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Mon, 16 Dec 2024 17:47:26 +0100 Subject: [PATCH 16/41] Tune normalizations under the scope of FIX_1010_OPT_DIV. Increase AMAXBMIN interval count to better match reference. --- lib_dec/ivas_svd_dec.c | 42 ++++++++++-------------------------------- 1 file changed, 10 insertions(+), 32 deletions(-) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index da9140857..1b8b36c0c 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -56,7 +56,7 @@ #define FIX_1010_OPT_GIVENS #define FIX_1010_OPT_GIVENS_INV -//#define FIX_1010_OPT_GIVENS_AMAX_BMIN +#define FIX_1010_OPT_GIVENS_AMAX_BMIN #endif /*-----------------------------------------------------------------------* @@ -997,7 +997,7 @@ static void ApplyQRTransform_fx( singularValues[ch] = GivensRotation_fx( d, d_e, r, r_e, &singularValues_e[ch] ); /* exp(singularValues_e) */ move32(); #endif - IF( GT_32( L_abs( singularValues[ch] ), Mpy_32_32( CONVERGENCE_FACTOR_FX, L_abs( singularValues[ch] ) ) ) ) + IF (singularValues[ch] != 0) { #ifndef FIX_1010_OPT_GIVENS_INV aux = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, singularValues[ch], &aux_e ); /* exp(aux_e + (1 - singularValues_e)) */ @@ -1154,7 +1154,7 @@ static void HouseholderReduction_fx( #ifdef FIX_1010_OPT_DIV static Word32 BASOP_Util_Inv32( Word32 x, Word16 *px_e ) { - Word16 sign, shift; + Word16 sign, shift, shift2; sign = 0; move16(); @@ -1173,7 +1173,9 @@ static Word32 BASOP_Util_Inv32( Word32 x, Word16 *px_e ) move16(); x = ISqrt32norm( x, px_e ); x = Mpy_32_32( x, x ); - *px_e = add( shl( *px_e, 1 ), shift ); + shift2 = norm_l( x ); + x = L_shl( x, shift2 ); + *px_e = add( shl( *px_e, 1 ), sub(shift, shift2) ); move16(); if ( sign ) @@ -1252,9 +1254,6 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ #ifdef FIX_1010_OPT_DIV Word16 invVal_e, temp_e; Word32 invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e ); - temp_e = norm_l( invVal ); - invVal = L_shl( invVal, temp_e ); - invVal_e = sub( invVal_e, temp_e ); #endif norm_x = 0; move32(); @@ -1269,9 +1268,6 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ singularVectors[jCh][currChannel] = L_shl( singularVectors[jCh][currChannel], temp_e ); singularVectors[jCh][currChannel] = Mpy_32_32( singularVectors[jCh][currChannel], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ sing_exp[jCh] = sub( invVal_e, temp_e ); - temp_e = norm_l( singularVectors[jCh][currChannel] ); - singularVectors[jCh][currChannel] = L_shl( singularVectors[jCh][currChannel], temp_e ); - sing_exp[jCh] = sub( sing_exp[jCh], temp_e ); move16(); #endif move32(); @@ -1308,9 +1304,6 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ #ifdef FIX_1010_OPT_DIV invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e ); - temp_e = norm_l( invVal ); - invVal = L_shl( invVal, temp_e ); - invVal_e = sub( invVal_e, temp_e ); #endif FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */ @@ -1330,9 +1323,6 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ #else f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */ f_e = add( invVal_e, sub( norm_x_e, r_e ) ); - temp_e = norm_l( f ); - f = L_shl( f, temp_e ); - f_e = sub( f_e, temp_e ); #endif FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ @@ -1442,13 +1432,10 @@ static void biDiagonalReductionRight_fx( #ifdef FIX_1010_OPT_DIV Word16 invVal_e, temp_e; Word32 invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e ); - temp_e = norm_l( invVal ); - invVal = L_shl( invVal, temp_e ); - invVal_e = sub( invVal_e, temp_e ); #endif FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */ { -#ifndef FIX_1010_OPT_DIV_no +#ifndef FIX_1010_OPT_DIV singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e)) */ #else temp_e = norm_l( singularVectors[currChannel][jCh] ); @@ -1456,9 +1443,6 @@ static void biDiagonalReductionRight_fx( singularVectors[currChannel][jCh] = Mpy_32_32( singularVectors[currChannel][jCh], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ sing_exp[jCh] = sub( invVal_e, temp_e ); move16(); - temp_e = norm_l( singularVectors[currChannel][jCh] ); - singularVectors[currChannel][jCh] = L_shl( singularVectors[currChannel][jCh], temp_e ); - sing_exp[jCh] = sub( sing_exp[jCh], temp_e ); #endif move32(); sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) ); @@ -1493,9 +1477,6 @@ static void biDiagonalReductionRight_fx( #ifdef FIX_1010_OPT_DIV invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e ); - temp_e = norm_l( invVal ); - invVal = L_shl( invVal, temp_e ); - invVal_e = sub( invVal_e, temp_e ); #endif FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ @@ -1507,9 +1488,6 @@ static void biDiagonalReductionRight_fx( secDiag[jCh] = L_shl( singularVectors[currChannel][jCh], temp_e ); secDiag[jCh] = Mpy_32_32( secDiag[jCh], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ secDiag_exp[jCh] = sub( invVal_e, temp_e ); - temp_e = norm_l( secDiag[jCh] ); - secDiag[jCh] = L_shl( secDiag[jCh], temp_e ); - secDiag_exp[jCh] = sub( secDiag_exp[jCh], temp_e ); move16(); #endif move32(); @@ -1791,7 +1769,7 @@ static void singularVectorsAccumulationRight_fx( #ifndef M_PI #define M_PI 3.141592653589793 #endif -#define NUM_REGIONS 32 +#define NUM_REGIONS 128 static Word32 alphaBeta[NUM_REGIONS][2]; static void get_alpha_beta( Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 *alpha, Word32 *beta ) { @@ -1830,10 +1808,10 @@ static void get_alpha_beta( Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 * p = L_shl( p, shift ); p_e = sub( p_e, shift ); shift = sub( q_e, p_e ); - r = shl( div_s( extract_h( q ), extract_h( p ) ), shift ); + r = shl_sat( div_s( extract_h( q ), extract_h( p ) ), shift ); /* Second order polyfit of atan(r)/(pi/4) for r=0..1 */ r = add( add( mult( mult( r, r ), FL2WORD16_SCALE( -3.672563685340096e-01, 3 ) ), mult( r, FL2WORD16_SCALE( 1.375369641423651e+00, 3 ) ) ), FL2WORD16_SCALE( -6.529424378422714e-03, 3 ) ); - r = s_min( s_max( 0, shr( r, 4 + 3 ) ), NUM_REGIONS - 1 ); + r = s_min( s_max( 0, shr( r, WORD16_BITS-1-7-3 ) ), NUM_REGIONS - 1 ); #endif assert( ( r >= 0 ) && ( r < NUM_REGIONS ) ); *alpha = alphaBeta[r][0]; -- GitLab From 326588ce8cd0536c0b29e42e7ae86b77061e6b80 Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Mon, 16 Dec 2024 17:50:27 +0100 Subject: [PATCH 17/41] Fix clang format. --- lib_dec/ivas_svd_dec.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index 1b8b36c0c..1b397e7fb 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -997,7 +997,7 @@ static void ApplyQRTransform_fx( singularValues[ch] = GivensRotation_fx( d, d_e, r, r_e, &singularValues_e[ch] ); /* exp(singularValues_e) */ move32(); #endif - IF (singularValues[ch] != 0) + IF( singularValues[ch] != 0 ) { #ifndef FIX_1010_OPT_GIVENS_INV aux = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, singularValues[ch], &aux_e ); /* exp(aux_e + (1 - singularValues_e)) */ @@ -1175,7 +1175,7 @@ static Word32 BASOP_Util_Inv32( Word32 x, Word16 *px_e ) x = Mpy_32_32( x, x ); shift2 = norm_l( x ); x = L_shl( x, shift2 ); - *px_e = add( shl( *px_e, 1 ), sub(shift, shift2) ); + *px_e = add( shl( *px_e, 1 ), sub( shift, shift2 ) ); move16(); if ( sign ) @@ -1811,7 +1811,7 @@ static void get_alpha_beta( Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 * r = shl_sat( div_s( extract_h( q ), extract_h( p ) ), shift ); /* Second order polyfit of atan(r)/(pi/4) for r=0..1 */ r = add( add( mult( mult( r, r ), FL2WORD16_SCALE( -3.672563685340096e-01, 3 ) ), mult( r, FL2WORD16_SCALE( 1.375369641423651e+00, 3 ) ) ), FL2WORD16_SCALE( -6.529424378422714e-03, 3 ) ); - r = s_min( s_max( 0, shr( r, WORD16_BITS-1-7-3 ) ), NUM_REGIONS - 1 ); + r = s_min( s_max( 0, shr( r, WORD16_BITS - 1 - 7 - 3 ) ), NUM_REGIONS - 1 ); #endif assert( ( r >= 0 ) && ( r < NUM_REGIONS ) ); *alpha = alphaBeta[r][0]; -- GitLab From 964c80d54138b141a14143639c4c1767246847ba Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Tue, 17 Dec 2024 17:06:51 +0100 Subject: [PATCH 18/41] Remove norm, improves test case stv4ISM48n.wav_4_ISM_with_and_without_extended_metadata_bitrate_switching_from_24_4_kbps_to_256_kbps_48_kHz --- lib_dec/ivas_svd_dec.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index 1b397e7fb..035272caa 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -1252,7 +1252,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */ { #ifdef FIX_1010_OPT_DIV - Word16 invVal_e, temp_e; + Word16 invVal_e; Word32 invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e ); #endif norm_x = 0; @@ -1263,16 +1263,15 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ { #ifndef FIX_1010_OPT_DIV singularVectors[jCh][currChannel] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[jCh][currChannel], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ + move32(); + sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) ); + move16(); #else - temp_e = norm_l( singularVectors[jCh][currChannel] ); - singularVectors[jCh][currChannel] = L_shl( singularVectors[jCh][currChannel], temp_e ); singularVectors[jCh][currChannel] = Mpy_32_32( singularVectors[jCh][currChannel], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ - sing_exp[jCh] = sub( invVal_e, temp_e ); - move16(); -#endif move32(); - sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) ); + sing_exp[jCh] = add( invVal_e, sub( *singularVectors_e, *sig_x_e ) ); move16(); +#endif norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ } IF( GT_16( norm_x_e, 0 ) ) -- GitLab From 5c30dabdd630ade7687cfc8b2de7829a48dd3f99 Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Wed, 18 Dec 2024 11:45:21 +0100 Subject: [PATCH 19/41] Add normalization under FIX_1010_OPT_DIV again but without overwriting source data which format should not be changed. --- lib_dec/ivas_svd_dec.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index 035272caa..68b2aaeea 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -1252,7 +1252,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */ { #ifdef FIX_1010_OPT_DIV - Word16 invVal_e; + Word16 invVal_e, temp_e; Word32 invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e ); #endif norm_x = 0; @@ -1267,9 +1267,10 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) ); move16(); #else - singularVectors[jCh][currChannel] = Mpy_32_32( singularVectors[jCh][currChannel], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ + temp_e = norm_l( singularVectors[jCh][currChannel] ); + singularVectors[jCh][currChannel] = Mpy_32_32( L_shl( singularVectors[jCh][currChannel], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ move32(); - sing_exp[jCh] = add( invVal_e, sub( *singularVectors_e, *sig_x_e ) ); + sing_exp[jCh] = add( sub(invVal_e, temp_e), sub( *singularVectors_e, *sig_x_e ) ); move16(); #endif norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ @@ -1438,8 +1439,7 @@ static void biDiagonalReductionRight_fx( singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e)) */ #else temp_e = norm_l( singularVectors[currChannel][jCh] ); - singularVectors[currChannel][jCh] = L_shl( singularVectors[currChannel][jCh], temp_e ); - singularVectors[currChannel][jCh] = Mpy_32_32( singularVectors[currChannel][jCh], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ + singularVectors[currChannel][jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ sing_exp[jCh] = sub( invVal_e, temp_e ); move16(); #endif @@ -1484,8 +1484,7 @@ static void biDiagonalReductionRight_fx( secDiag[jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( r ), &secDiag_exp[jCh] ); /* exp(secDiag_exp + (sing_exp - r_e) */ #else temp_e = norm_l( singularVectors[currChannel][jCh] ); - secDiag[jCh] = L_shl( singularVectors[currChannel][jCh], temp_e ); - secDiag[jCh] = Mpy_32_32( secDiag[jCh], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ + secDiag[jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ secDiag_exp[jCh] = sub( invVal_e, temp_e ); move16(); #endif @@ -1603,7 +1602,7 @@ static void singularVectorsAccumulationLeft_fx( IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/ { -#ifdef FIX_1010_OPT_DIV +#ifdef FIX_1010_OPT_DIV t_ii = BASOP_Util_Inv32( maxWithSign_fx( t_ii ), &temp_exp ); t_ii_e = sub( temp_exp, t_ii_e ); #else @@ -1622,9 +1621,10 @@ static void singularVectorsAccumulationLeft_fx( norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( sing_exp2[k][nCh], sing_exp2[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */ } #ifdef FIX_1010_OPT_DIV - t_jj = BASOP_Util_Inv32( maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); + Word16 temp_e = norm_l(singularVectors_Left[nCh][nCh]); + t_jj = BASOP_Util_Inv32( maxWithSign_fx( L_shl(singularVectors_Left[nCh][nCh], temp_e) ), &temp_exp ); t_jj = Mpy_32_32( Mpy_32_32( t_ii, norm_y ), t_jj ); - t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) ); + t_jj_e = add( add(temp_exp, temp_e), sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) ); #else t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e, t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) ); -- GitLab From a728d3882b0672565d6b5460add7bb02681f1055 Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Wed, 18 Dec 2024 11:50:33 +0100 Subject: [PATCH 20/41] clang-format --- lib_dec/ivas_svd_dec.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index 68b2aaeea..a8a8cd265 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -1270,7 +1270,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ temp_e = norm_l( singularVectors[jCh][currChannel] ); singularVectors[jCh][currChannel] = Mpy_32_32( L_shl( singularVectors[jCh][currChannel], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ move32(); - sing_exp[jCh] = add( sub(invVal_e, temp_e), sub( *singularVectors_e, *sig_x_e ) ); + sing_exp[jCh] = add( sub( invVal_e, temp_e ), sub( *singularVectors_e, *sig_x_e ) ); move16(); #endif norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ @@ -1602,7 +1602,7 @@ static void singularVectorsAccumulationLeft_fx( IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/ { -#ifdef FIX_1010_OPT_DIV +#ifdef FIX_1010_OPT_DIV t_ii = BASOP_Util_Inv32( maxWithSign_fx( t_ii ), &temp_exp ); t_ii_e = sub( temp_exp, t_ii_e ); #else @@ -1621,10 +1621,10 @@ static void singularVectorsAccumulationLeft_fx( norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( sing_exp2[k][nCh], sing_exp2[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */ } #ifdef FIX_1010_OPT_DIV - Word16 temp_e = norm_l(singularVectors_Left[nCh][nCh]); - t_jj = BASOP_Util_Inv32( maxWithSign_fx( L_shl(singularVectors_Left[nCh][nCh], temp_e) ), &temp_exp ); + Word16 temp_e = norm_l( singularVectors_Left[nCh][nCh] ); + t_jj = BASOP_Util_Inv32( maxWithSign_fx( L_shl( singularVectors_Left[nCh][nCh], temp_e ) ), &temp_exp ); t_jj = Mpy_32_32( Mpy_32_32( t_ii, norm_y ), t_jj ); - t_jj_e = add( add(temp_exp, temp_e), sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) ); + t_jj_e = add( add( temp_exp, temp_e ), sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) ); #else t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e, t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) ); -- GitLab From e3899b161bebc2598c4110f21a178184655b39c2 Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Wed, 18 Dec 2024 14:27:01 +0100 Subject: [PATCH 21/41] Disable one FIX_1010_OPT_DIV case. --- lib_dec/ivas_svd_dec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index a8a8cd265..31d9f9660 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -1261,7 +1261,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ move16(); FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { -#ifndef FIX_1010_OPT_DIV +#ifndef FIX_1010_OPT_DIVno singularVectors[jCh][currChannel] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[jCh][currChannel], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ move32(); sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) ); -- GitLab From 8ac486d2188051b401203d1ceac613a8c6461016 Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Wed, 18 Dec 2024 14:32:36 +0100 Subject: [PATCH 22/41] Fix warning. --- lib_dec/ivas_svd_dec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index 31d9f9660..a5ccdce3a 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -1252,7 +1252,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */ { #ifdef FIX_1010_OPT_DIV - Word16 invVal_e, temp_e; + Word16 invVal_e; Word32 invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e ); #endif norm_x = 0; -- GitLab From c87442fc3a60b1bb830eb9658ef67762891ab78d Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Wed, 18 Dec 2024 16:44:54 +0100 Subject: [PATCH 23/41] Enable FIX_1010_OPT_DIV case again because of crash, disable FIX_1010_OPT_GIVENS_AMAX_BMIN. --- lib_dec/ivas_svd_dec.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index a5ccdce3a..eb3757b5b 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -53,10 +53,9 @@ #if 1 #define FIX_1010_OPT_DIV - #define FIX_1010_OPT_GIVENS #define FIX_1010_OPT_GIVENS_INV -#define FIX_1010_OPT_GIVENS_AMAX_BMIN +//#define FIX_1010_OPT_GIVENS_AMAX_BMIN #endif /*-----------------------------------------------------------------------* @@ -1261,13 +1260,13 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ move16(); FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { -#ifndef FIX_1010_OPT_DIVno +#ifndef FIX_1010_OPT_DIV singularVectors[jCh][currChannel] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[jCh][currChannel], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ move32(); sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) ); move16(); #else - temp_e = norm_l( singularVectors[jCh][currChannel] ); + Word16 temp_e = norm_l( singularVectors[jCh][currChannel] ); singularVectors[jCh][currChannel] = Mpy_32_32( L_shl( singularVectors[jCh][currChannel], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ move32(); sing_exp[jCh] = add( sub( invVal_e, temp_e ), sub( *singularVectors_e, *sig_x_e ) ); @@ -1807,7 +1806,7 @@ static void get_alpha_beta( Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 * p = L_shl( p, shift ); p_e = sub( p_e, shift ); shift = sub( q_e, p_e ); - r = shl_sat( div_s( extract_h( q ), extract_h( p ) ), shift ); + r = shl_sat( div_s( extract_h( q ), s_max(1, extract_h( p ) ) ), shift ); /* Second order polyfit of atan(r)/(pi/4) for r=0..1 */ r = add( add( mult( mult( r, r ), FL2WORD16_SCALE( -3.672563685340096e-01, 3 ) ), mult( r, FL2WORD16_SCALE( 1.375369641423651e+00, 3 ) ) ), FL2WORD16_SCALE( -6.529424378422714e-03, 3 ) ); r = s_min( s_max( 0, shr( r, WORD16_BITS - 1 - 7 - 3 ) ), NUM_REGIONS - 1 ); -- GitLab From a18b9b9631a998f8a788358702d07406e72148a4 Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Thu, 19 Dec 2024 15:46:46 +0100 Subject: [PATCH 24/41] Apply bug fix from issue 1139 and add wmops/precision improvement macro FIX_1010_OPT_SINGLE_RESCALE. --- lib_dec/ivas_svd_dec.c | 170 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 152 insertions(+), 18 deletions(-) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index eb3757b5b..730bb3d42 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -53,9 +53,10 @@ #if 1 #define FIX_1010_OPT_DIV +#define FIX_1010_OPT_SINGLE_RESCALE #define FIX_1010_OPT_GIVENS #define FIX_1010_OPT_GIVENS_INV -//#define FIX_1010_OPT_GIVENS_AMAX_BMIN +// #define FIX_1010_OPT_GIVENS_AMAX_BMIN #endif /*-----------------------------------------------------------------------* @@ -79,7 +80,11 @@ static void biDiagonalReductionLeft_fx( Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */ +#ifndef FIX_1010_OPT_SINGLE_RESCALE Word16 *singularVectors_e, +#else + Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS], +#endif Word16 singularValues_e[MAX_OUTPUT_CHANNELS], Word16 *secDiag_e, const Word16 nChannelsL, /* Q0 */ @@ -93,7 +98,11 @@ static void biDiagonalReductionLeft_fx( static void biDiagonalReductionRight_fx( Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */ +#ifndef FIX_1010_OPT_SINGLE_RESCALE Word16 *singularVectors_e, +#else + Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS], +#endif Word16 *secDiag_e, const Word16 nChannelsL, /* Q0 */ const Word16 nChannelsC, /* Q0 */ @@ -1119,11 +1128,29 @@ static void HouseholderReduction_fx( Word16 sig_x_fx_e = 0; move16(); +#ifdef FIX_1010_OPT_SINGLE_RESCALE + Word16 iCh, jCh; + Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; + FOR( jCh = 0; jCh < nChannelsL; jCh++ ) + { + FOR( iCh = 0; iCh < nChannelsC; iCh++ ) + { + singularVectors_Left_fx_e[jCh][iCh] = singularVectors_Left_e; + move32(); + } + } +#endif + /* Bidiagonal Reduction for every channel */ FOR( nCh = 0; nCh < nChannelsC; nCh++ ) /* nChannelsC */ { +#ifndef FIX_1010_OPT_SINGLE_RESCALE biDiagonalReductionLeft_fx( singularVectors_Left_fx, singularValues_fx, secDiag_fx, &singularVectors_Left_e, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &sig_x_fx, &sig_x_fx_e, &g_fx ); biDiagonalReductionRight_fx( singularVectors_Left_fx, secDiag_fx, &singularVectors_Left_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &sig_x_fx, &sig_x_fx_e, &g_fx ); +#else + biDiagonalReductionLeft_fx( singularVectors_Left_fx, singularValues_fx, secDiag_fx, singularVectors_Left_fx_e, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &sig_x_fx, &sig_x_fx_e, &g_fx ); + biDiagonalReductionRight_fx( singularVectors_Left_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &sig_x_fx, &sig_x_fx_e, &g_fx ); +#endif Word16 L_temp_e; Word32 L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), *secDiag_fx_e, &L_temp_e ); /* exp(L_temp_e) */ @@ -1136,6 +1163,30 @@ static void HouseholderReduction_fx( } } +#ifdef FIX_1010_OPT_SINGLE_RESCALE + // rescaling block + Word16 exp_max = 0; + move16(); + FOR( jCh = 0; jCh < nChannelsL; jCh++ ) + { + FOR( iCh = 0; iCh < nChannelsC; iCh++ ) + { + exp_max = s_max( exp_max, singularVectors_Left_fx_e[jCh][iCh] ); + } + } + + FOR( jCh = 0; jCh < nChannelsL; jCh++ ) + { + FOR( iCh = 0; iCh < nChannelsC; iCh++ ) + { + singularVectors_Left_fx[jCh][iCh] = L_shr_r( singularVectors_Left_fx[jCh][iCh], sub( exp_max, singularVectors_Left_fx_e[jCh][iCh] ) ); /* exp(exp_max) */ + move32(); + } + } + singularVectors_Left_e = exp_max; + move16(); +#endif + /* SingularVecotr Accumulation */ singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_e, *secDiag_fx_e, nChannelsC ); @@ -1189,7 +1240,11 @@ static void biDiagonalReductionLeft_fx( Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */ +#ifndef FIX_1010_OPT_SINGLE_RESCALE Word16 *singularVectors_e, +#else + Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS], +#endif Word16 singularValues_e[MAX_OUTPUT_CHANNELS], Word16 *secDiag_e, const Word16 nChannelsL, /* Q0 */ @@ -1203,14 +1258,16 @@ static void biDiagonalReductionLeft_fx( Word16 iCh, jCh, idx; Word32 norm_x, f, r; Word16 norm_x_e, f_e, r_e; - Word16 sing_exp[MAX_OUTPUT_CHANNELS]; - Word16 sing_exp2[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS] = { 0 }; Word32 L_temp; Word16 L_temp_e; +#ifndef FIX_1010_OPT_SINGLE_RESCALE + Word16 sing_exp[MAX_OUTPUT_CHANNELS]; + Word16 sing_exp2[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS] = { 0 }; FOR( jCh = 0; jCh < MAX_OUTPUT_CHANNELS; jCh++ ) { set16_fx( sing_exp2[jCh], *singularVectors_e, MAX_OUTPUT_CHANNELS ); } +#endif secDiag[currChannel] = Mpy_32_32( *sig_x, *g ); /* exp(sig_x_e) */ move32(); @@ -1245,14 +1302,20 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { +#ifndef FIX_1010_OPT_SINGLE_RESCALE ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[jCh][currChannel] ), *singularVectors_e, sig_x_e ); /* exp(sig_x_e) */ +#else + ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[jCh][currChannel] ), singularVectors2_e[jCh][currChannel], sig_x_e ); /* exp(sig_x_e) */ +#endif } IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */ { #ifdef FIX_1010_OPT_DIV Word16 invVal_e; - Word32 invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e ); + Word32 invVal; + /* BASOP_Util_Inv32 is not accurate enogh in this case. */ + invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e ); #endif norm_x = 0; move32(); @@ -1265,14 +1328,21 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ move32(); sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) ); move16(); + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ #else Word16 temp_e = norm_l( singularVectors[jCh][currChannel] ); singularVectors[jCh][currChannel] = Mpy_32_32( L_shl( singularVectors[jCh][currChannel], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ move32(); - sing_exp[jCh] = add( sub( invVal_e, temp_e ), sub( *singularVectors_e, *sig_x_e ) ); +#ifndef FIX_1010_OPT_SINGLE_RESCALE + sing_exp[jCh] = sub( add( invVal_e, sub( *singularVectors_e, *sig_x_e ) ), temp_e ); move16(); -#endif norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ +#else + singularVectors2_e[jCh][currChannel] = sub( add( invVal_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ), temp_e ); + move16(); + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( singularVectors2_e[jCh][currChannel], 1 ), &norm_x_e ); /* exp(norm_x_e) */ +#endif +#endif } IF( GT_16( norm_x_e, 0 ) ) { @@ -1297,8 +1367,13 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ move32(); } +#ifndef FIX_1010_OPT_SINGLE_RESCALE r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), sing_exp[currChannel], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], sing_exp[currChannel], -( *g ), 0, &sing_exp[currChannel] ); /* sing_exp */ +#else + r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ + singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* sing_exp */ +#endif move32(); #ifdef FIX_1010_OPT_DIV @@ -1313,7 +1388,11 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ move16(); FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { +#ifndef FIX_1010_OPT_SINGLE_RESCALE norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( sing_exp[jCh], *singularVectors_e ), &norm_x_e ); /* exp(norm_x_e) */ +#else + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( singularVectors2_e[jCh][currChannel], singularVectors2_e[jCh][iCh] ), &norm_x_e ); /* exp(norm_x_e) */ +#endif } #ifndef FIX_1010_OPT_DIV @@ -1326,7 +1405,11 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { +#ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], *singularVectors_e, Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, sing_exp[jCh] ), &sing_exp2[jCh][iCh] ); /* exp( sing_exp2) */ +#else + singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], singularVectors2_e[jCh][iCh], Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, singularVectors2_e[jCh][currChannel] ), &singularVectors2_e[jCh][iCh] ); +#endif move32(); } } @@ -1336,10 +1419,15 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ { singularVectors[jCh][currChannel] = Mpy_32_32( singularVectors[jCh][currChannel], ( *sig_x ) ); /* sing_exp + sig_x_e */ move32(); +#ifndef FIX_1010_OPT_SINGLE_RESCALE sing_exp2[jCh][currChannel] = add( sing_exp[jCh], *sig_x_e ); +#else + singularVectors2_e[jCh][currChannel] = add( singularVectors2_e[jCh][currChannel], *sig_x_e ); +#endif move16(); } +#ifndef FIX_1010_OPT_SINGLE_RESCALE // rescaling block Word16 exp_max = *singularVectors_e; move16(); @@ -1361,6 +1449,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ } *singularVectors_e = exp_max; move16(); +#endif } // rescaling block @@ -1382,7 +1471,11 @@ return; static void biDiagonalReductionRight_fx( Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */ +#ifndef FIX_1010_OPT_SINGLE_RESCALE Word16 *singularVectors_e, +#else + Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS], +#endif Word16 *secDiag_e, const Word16 nChannelsL, /* Q0 */ const Word16 nChannelsC, /* Q0 */ @@ -1395,15 +1488,17 @@ static void biDiagonalReductionRight_fx( Word16 iCh, jCh, idx; Word32 norm_x, r; Word16 norm_x_e, r_e; - Word16 sing_exp[MAX_OUTPUT_CHANNELS]; Word16 secDiag_exp[MAX_OUTPUT_CHANNELS]; - Word16 sing_exp2[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS] = { 0 }; Word32 L_temp; Word16 L_temp_e; +#ifndef FIX_1010_OPT_SINGLE_RESCALE + Word16 sing_exp[MAX_OUTPUT_CHANNELS]; + Word16 sing_exp2[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS] = { 0 }; FOR( jCh = 0; jCh < MAX_OUTPUT_CHANNELS; jCh++ ) { set16_fx( sing_exp2[jCh], *singularVectors_e, MAX_OUTPUT_CHANNELS ); } +#endif set16_fx( secDiag_exp, *secDiag_e, MAX_OUTPUT_CHANNELS ); /* Setting values to 0 */ @@ -1418,7 +1513,11 @@ static void biDiagonalReductionRight_fx( FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ { +#ifndef FIX_1010_OPT_SINGLE_RESCALE ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[currChannel][jCh] ), *singularVectors_e, sig_x_e ); /* exp(sig_x_e) */ +#else + ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[currChannel][jCh] ), singularVectors2_e[currChannel][jCh], sig_x_e ); /* exp(sig_x_e) */ +#endif } IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */ @@ -1436,16 +1535,25 @@ static void biDiagonalReductionRight_fx( { #ifndef FIX_1010_OPT_DIV singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e)) */ + move32(); + sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) ); + move16(); + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ #else temp_e = norm_l( singularVectors[currChannel][jCh] ); singularVectors[currChannel][jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ - sing_exp[jCh] = sub( invVal_e, temp_e ); - move16(); -#endif move32(); - sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) ); + +#ifndef FIX_1010_OPT_SINGLE_RESCALE + sing_exp[jCh] = add( sub( invVal_e, temp_e ), sub( *singularVectors_e, *sig_x_e ) ); move16(); norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ +#else + singularVectors2_e[currChannel][jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], *sig_x_e ) ); + move16(); + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( singularVectors2_e[currChannel][jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ +#endif +#endif } IF( GT_16( norm_x_e, 0 ) ) { @@ -1469,8 +1577,13 @@ static void biDiagonalReductionRight_fx( move32(); } +#ifndef FIX_1010_OPT_SINGLE_RESCALE r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), sing_exp[idx], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], sing_exp[idx], -( *g ), 0, &sing_exp[idx] ); /* exp(sing_exp) */ +#else + r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ + singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* exp(sing_exp) */ +#endif move32(); #ifdef FIX_1010_OPT_DIV @@ -1481,15 +1594,20 @@ static void biDiagonalReductionRight_fx( { #ifndef FIX_1010_OPT_DIV secDiag[jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( r ), &secDiag_exp[jCh] ); /* exp(secDiag_exp + (sing_exp - r_e) */ + move32(); + secDiag_exp[jCh] = add( secDiag_exp[jCh], sub( sing_exp[jCh], r_e ) ); + move32(); #else temp_e = norm_l( singularVectors[currChannel][jCh] ); secDiag[jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ - secDiag_exp[jCh] = sub( invVal_e, temp_e ); + move32(); +#ifndef FIX_1010_OPT_SINGLE_RESCALE + secDiag_exp[jCh] = add( sub( invVal_e, temp_e ), sub( sing_exp[jCh], r_e ) ); +#else + secDiag_exp[jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], r_e ) ); +#endif move16(); #endif - move32(); - secDiag_exp[jCh] = add( secDiag_exp[jCh], sub( sing_exp[jCh], r_e ) ); - move32(); } FOR( iCh = currChannel + 1; iCh < nChannelsL; iCh++ ) /* nChannelsL */ @@ -1500,12 +1618,20 @@ static void biDiagonalReductionRight_fx( move16(); FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ { +#ifndef FIX_1010_OPT_SINGLE_RESCALE norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( *singularVectors_e, sing_exp[jCh] ), &norm_x_e ); /* exp(norm_x_e) */ +#else + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( singularVectors2_e[iCh][jCh], singularVectors2_e[currChannel][jCh] ), &norm_x_e ); /* exp(norm_x_e) */ +#endif } FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ { +#ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], *singularVectors_e, Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &sing_exp2[iCh][jCh] ); /* exp(sing_exp2) */ +#else + singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */ +#endif move32(); } } @@ -1514,10 +1640,15 @@ static void biDiagonalReductionRight_fx( { singularVectors[currChannel][jCh] = Mpy_32_32( singularVectors[currChannel][jCh], ( *sig_x ) ); /* exp(sing_exp + sig_x_e) */ move32(); +#ifndef FIX_1010_OPT_SINGLE_RESCALE sing_exp2[currChannel][jCh] = add( sing_exp[jCh], *sig_x_e ); +#else + singularVectors2_e[currChannel][jCh] = add( singularVectors2_e[currChannel][jCh], *sig_x_e ); +#endif move16(); } + /*rescaling block*/ Word16 exp_max = *secDiag_e; move16(); @@ -1530,8 +1661,10 @@ static void biDiagonalReductionRight_fx( secDiag[jCh] = L_shr_r( secDiag[jCh], sub( exp_max, secDiag_exp[jCh] ) ); /* exp(exp_max) */ move32(); } + *secDiag_e = exp_max; + move16(); - +#ifndef FIX_1010_OPT_SINGLE_RESCALE exp_max = *singularVectors_e; move16(); FOR( iCh = 0; iCh < nChannelsL; iCh++ ) @@ -1552,6 +1685,7 @@ static void biDiagonalReductionRight_fx( } *singularVectors_e = exp_max; move16(); +#endif } } @@ -1806,7 +1940,7 @@ static void get_alpha_beta( Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 * p = L_shl( p, shift ); p_e = sub( p_e, shift ); shift = sub( q_e, p_e ); - r = shl_sat( div_s( extract_h( q ), s_max(1, extract_h( p ) ) ), shift ); + r = shl_sat( div_s( extract_h( q ), s_max( 1, extract_h( p ) ) ), shift ); /* Second order polyfit of atan(r)/(pi/4) for r=0..1 */ r = add( add( mult( mult( r, r ), FL2WORD16_SCALE( -3.672563685340096e-01, 3 ) ), mult( r, FL2WORD16_SCALE( 1.375369641423651e+00, 3 ) ) ), FL2WORD16_SCALE( -6.529424378422714e-03, 3 ) ); r = s_min( s_max( 0, shr( r, WORD16_BITS - 1 - 7 - 3 ) ), NUM_REGIONS - 1 ); -- GitLab From 2815f37b9726594427001807f3bca6fff814f5c2 Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Thu, 19 Dec 2024 15:49:32 +0100 Subject: [PATCH 25/41] clang format --- lib_dec/ivas_svd_dec.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index 730bb3d42..d7245a8d4 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -1336,7 +1336,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ #ifndef FIX_1010_OPT_SINGLE_RESCALE sing_exp[jCh] = sub( add( invVal_e, sub( *singularVectors_e, *sig_x_e ) ), temp_e ); move16(); - norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ #else singularVectors2_e[jCh][currChannel] = sub( add( invVal_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ), temp_e ); move16(); @@ -1371,8 +1371,8 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), sing_exp[currChannel], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], sing_exp[currChannel], -( *g ), 0, &sing_exp[currChannel] ); /* sing_exp */ #else - r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ - singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* sing_exp */ + r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ + singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* sing_exp */ #endif move32(); @@ -1399,7 +1399,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ f = BASOP_Util_Divide3232_Scale_cadence( norm_x, maxWithSign_fx( r ), &f_e ); /* f_e + (norm_x_e - r_e) */ f_e = add( f_e, sub( norm_x_e, r_e ) ); #else - f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */ + f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */ f_e = add( invVal_e, sub( norm_x_e, r_e ) ); #endif @@ -1547,7 +1547,7 @@ static void biDiagonalReductionRight_fx( #ifndef FIX_1010_OPT_SINGLE_RESCALE sing_exp[jCh] = add( sub( invVal_e, temp_e ), sub( *singularVectors_e, *sig_x_e ) ); move16(); - norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ #else singularVectors2_e[currChannel][jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], *sig_x_e ) ); move16(); @@ -1630,7 +1630,7 @@ static void biDiagonalReductionRight_fx( #ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], *singularVectors_e, Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &sing_exp2[iCh][jCh] ); /* exp(sing_exp2) */ #else - singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */ + singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */ #endif move32(); } -- GitLab From aff979224d95cf9ad587b5ce7a1c10ee3efec6c8 Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Thu, 19 Dec 2024 17:54:44 +0100 Subject: [PATCH 26/41] Couple use of BASOP_Util_Inv32 to macro FIX_1010_OPT_INV_USING_INVSQRT and disable to to improve accuracy. --- lib_dec/ivas_svd_dec.c | 65 +++++++++++++++++++++++++++++++++++------- 1 file changed, 54 insertions(+), 11 deletions(-) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index d7245a8d4..c70de847e 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -53,6 +53,7 @@ #if 1 #define FIX_1010_OPT_DIV +// #define FIX_1010_OPT_INV_USING_INVSQRT #define FIX_1010_OPT_SINGLE_RESCALE #define FIX_1010_OPT_GIVENS #define FIX_1010_OPT_GIVENS_INV @@ -1201,7 +1202,7 @@ static void HouseholderReduction_fx( * *-------------------------------------------------------------------------*/ -#ifdef FIX_1010_OPT_DIV +#ifdef FIX_1010_OPT_INV_USING_INVSQRT static Word32 BASOP_Util_Inv32( Word32 x, Word16 *px_e ) { Word16 sign, shift, shift2; @@ -1314,8 +1315,11 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ #ifdef FIX_1010_OPT_DIV Word16 invVal_e; Word32 invVal; - /* BASOP_Util_Inv32 is not accurate enogh in this case. */ +#ifdef FIX_1010_OPT_INV_USING_INVSQRT + invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e ); +#else invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e ); +#endif #endif norm_x = 0; move32(); @@ -1324,11 +1328,19 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { #ifndef FIX_1010_OPT_DIV +#ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectors[jCh][currChannel] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[jCh][currChannel], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ move32(); sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) ); move16(); norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ +#else + singularVectors[jCh][currChannel] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[jCh][currChannel], maxWithSign_fx( *sig_x ), &L_temp_e ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ + move32(); + singularVectors2_e[jCh][currChannel] = add( L_temp_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ); + move16(); + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( singularVectors2_e[jCh][currChannel], 1 ), &norm_x_e ); /* exp(norm_x_e) */ +#endif #else Word16 temp_e = norm_l( singularVectors[jCh][currChannel] ); singularVectors[jCh][currChannel] = Mpy_32_32( L_shl( singularVectors[jCh][currChannel], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ @@ -1336,7 +1348,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ #ifndef FIX_1010_OPT_SINGLE_RESCALE sing_exp[jCh] = sub( add( invVal_e, sub( *singularVectors_e, *sig_x_e ) ), temp_e ); move16(); - norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ #else singularVectors2_e[jCh][currChannel] = sub( add( invVal_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ), temp_e ); move16(); @@ -1371,13 +1383,17 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), sing_exp[currChannel], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], sing_exp[currChannel], -( *g ), 0, &sing_exp[currChannel] ); /* sing_exp */ #else - r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ - singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* sing_exp */ + r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ + singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* sing_exp */ #endif move32(); #ifdef FIX_1010_OPT_DIV +#ifdef FIX_1010_OPT_INV_USING_INVSQRT invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e ); +#else + invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); +#endif #endif FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */ @@ -1399,7 +1415,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ f = BASOP_Util_Divide3232_Scale_cadence( norm_x, maxWithSign_fx( r ), &f_e ); /* f_e + (norm_x_e - r_e) */ f_e = add( f_e, sub( norm_x_e, r_e ) ); #else - f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */ + f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */ f_e = add( invVal_e, sub( norm_x_e, r_e ) ); #endif @@ -1529,25 +1545,37 @@ static void biDiagonalReductionRight_fx( #ifdef FIX_1010_OPT_DIV Word16 invVal_e, temp_e; - Word32 invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e ); + Word32 invVal; +#ifdef FIX_1010_OPT_INV_USING_INVSQRT + invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e ); +#else + invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e ); +#endif #endif FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */ { #ifndef FIX_1010_OPT_DIV +#ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e)) */ move32(); sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) ); move16(); norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ +#else + singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &L_temp_e ); /* exp(sing_exp + (singularVectors_e - sig_x_e)) */ + move32(); + singularVectors2_e[currChannel][jCh] = add( L_temp_e, sub( singularVectors2_e[currChannel][jCh], *sig_x_e ) ); + move16(); + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( singularVectors2_e[currChannel][jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ +#endif #else temp_e = norm_l( singularVectors[currChannel][jCh] ); singularVectors[currChannel][jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ move32(); - #ifndef FIX_1010_OPT_SINGLE_RESCALE sing_exp[jCh] = add( sub( invVal_e, temp_e ), sub( *singularVectors_e, *sig_x_e ) ); move16(); - norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ #else singularVectors2_e[currChannel][jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], *sig_x_e ) ); move16(); @@ -1587,16 +1615,27 @@ static void biDiagonalReductionRight_fx( move32(); #ifdef FIX_1010_OPT_DIV +#ifdef FIX_1010_OPT_INV_USING_INVSQRT invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e ); +#else + invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); +#endif #endif FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ { #ifndef FIX_1010_OPT_DIV +#ifndef FIX_1010_OPT_SINGLE_RESCALE secDiag[jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( r ), &secDiag_exp[jCh] ); /* exp(secDiag_exp + (sing_exp - r_e) */ move32(); secDiag_exp[jCh] = add( secDiag_exp[jCh], sub( sing_exp[jCh], r_e ) ); move32(); +#else + secDiag[jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( r ), &secDiag_exp[jCh] ); /* exp(secDiag_exp + (sing_exp - r_e) */ + move32(); + secDiag_exp[jCh] = add( secDiag_exp[jCh], sub( singularVectors2_e[currChannel][jCh], r_e ) ); + move32(); +#endif #else temp_e = norm_l( singularVectors[currChannel][jCh] ); secDiag[jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ @@ -1630,7 +1669,7 @@ static void biDiagonalReductionRight_fx( #ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], *singularVectors_e, Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &sing_exp2[iCh][jCh] ); /* exp(sing_exp2) */ #else - singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */ + singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */ #endif move32(); } @@ -1736,7 +1775,11 @@ static void singularVectorsAccumulationLeft_fx( IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/ { #ifdef FIX_1010_OPT_DIV +#ifdef FIX_1010_OPT_INV_USING_INVSQRT t_ii = BASOP_Util_Inv32( maxWithSign_fx( t_ii ), &temp_exp ); +#else + t_ii = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( t_ii ), &temp_exp ); +#endif t_ii_e = sub( temp_exp, t_ii_e ); #else t_ii = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, maxWithSign_fx( t_ii ), &temp_exp ); /* exp(1 + (temp_exp + tii_e)) */ @@ -1753,7 +1796,7 @@ static void singularVectorsAccumulationLeft_fx( { norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( sing_exp2[k][nCh], sing_exp2[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */ } -#ifdef FIX_1010_OPT_DIV +#ifdef FIX_1010_OPT_INV_USING_INVSQRT Word16 temp_e = norm_l( singularVectors_Left[nCh][nCh] ); t_jj = BASOP_Util_Inv32( maxWithSign_fx( L_shl( singularVectors_Left[nCh][nCh], temp_e ) ), &temp_exp ); t_jj = Mpy_32_32( Mpy_32_32( t_ii, norm_y ), t_jj ); -- GitLab From 1b32a0f0daa4fa5151860e972d6813db59051631 Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Tue, 7 Jan 2025 09:45:08 +0100 Subject: [PATCH 27/41] clang format fix --- lib_dec/ivas_svd_dec.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index c70de847e..33de89493 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -1348,7 +1348,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ #ifndef FIX_1010_OPT_SINGLE_RESCALE sing_exp[jCh] = sub( add( invVal_e, sub( *singularVectors_e, *sig_x_e ) ), temp_e ); move16(); - norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ #else singularVectors2_e[jCh][currChannel] = sub( add( invVal_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ), temp_e ); move16(); @@ -1383,8 +1383,8 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), sing_exp[currChannel], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], sing_exp[currChannel], -( *g ), 0, &sing_exp[currChannel] ); /* sing_exp */ #else - r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ - singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* sing_exp */ + r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ + singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* sing_exp */ #endif move32(); @@ -1415,7 +1415,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ f = BASOP_Util_Divide3232_Scale_cadence( norm_x, maxWithSign_fx( r ), &f_e ); /* f_e + (norm_x_e - r_e) */ f_e = add( f_e, sub( norm_x_e, r_e ) ); #else - f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */ + f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */ f_e = add( invVal_e, sub( norm_x_e, r_e ) ); #endif @@ -1575,7 +1575,7 @@ static void biDiagonalReductionRight_fx( #ifndef FIX_1010_OPT_SINGLE_RESCALE sing_exp[jCh] = add( sub( invVal_e, temp_e ), sub( *singularVectors_e, *sig_x_e ) ); move16(); - norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ #else singularVectors2_e[currChannel][jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], *sig_x_e ) ); move16(); @@ -1669,7 +1669,7 @@ static void biDiagonalReductionRight_fx( #ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], *singularVectors_e, Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &sing_exp2[iCh][jCh] ); /* exp(sing_exp2) */ #else - singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */ + singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */ #endif move32(); } -- GitLab From 1c6d7b7f34beefe99cb38d622ca34e139b14d8db Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Thu, 9 Jan 2025 08:59:13 +0100 Subject: [PATCH 28/41] Extend dynamic scale to singularVectorsAccumulationRight_fx and singularVectorsAccumulationLeft_fx which use dynamic scale internally anyway, to fix MLD failure. This reduces intermediate denormalizations, more precision and less complexity. --- lib_dec/ivas_svd_dec.c | 134 +++++++++++++++++++++++++++++------------ 1 file changed, 95 insertions(+), 39 deletions(-) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index 33de89493..e280abb42 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -116,7 +116,11 @@ static void biDiagonalReductionRight_fx( static void singularVectorsAccumulationLeft_fx( Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) as Input, Q31 as output */ Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ +#ifndef FIX_1010_OPT_SINGLE_RESCALE Word16 singularVectors_e, +#else + Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS], +#endif Word16 singularValues_e[MAX_OUTPUT_CHANNELS], const Word16 nChannelsL, /* Q0 */ const Word16 nChannelsC /* Q0 */ @@ -126,7 +130,11 @@ static void singularVectorsAccumulationRight_fx( Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* singularVectors_e */ Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* singularVectors_e */ Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */ +#ifndef FIX_1010_OPT_SINGLE_RESCALE Word16 singularVectors_e, +#else + Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS], +#endif Word16 secDiag_e, const Word16 nChannelsC /* Q0 */ ); @@ -283,9 +291,12 @@ void svdMat2mat_fx( return; } +#ifndef DEBUG_SVD_TEST +#define DEBUG_SVD_PRECISION +#endif // #define MORE_DEBUG -#ifdef MORE_DEBUG +#if defined( DEBUG_SVD_PRECISION ) || defined( MORE_DEBUG ) #if ( MAX_INPUT_CHANNELS > MAX_OUTPUT_CHANNELS ) #define MAX_MATRIX MAX_INPUT_CHANNELS @@ -375,7 +386,14 @@ static float matrixDifference( { for ( int i2 = 0; i2 < dim2; i2++ ) { - r += fabsf( ( b[i1][i2] - a[i1][i2] ) / a[i1][i2] ); + if ( a[i1][i2] != 0.f ) + { + r += fabsf( ( b[i1][i2] - a[i1][i2] ) / a[i1][i2] ); + } + else + { + r += fabsf( b[i1][i2] - a[i1][i2] ); + } } } @@ -447,6 +465,7 @@ static void svd_accuracy_test_fx( float singularVectors_Right[MAX_INPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; float result; int dimSingular; + int problematic = 0; /* Convert to float and Create singular values matrix from signular values vector */ for ( int x = 0; x < MAX_MATRIX; x++ ) @@ -474,6 +493,10 @@ static void svd_accuracy_test_fx( matrixTranspose( tmp1, singularVectors_Left, nChannelsL, nChannelsC ); /* CxL */ matrixProduct( tmp2, tmp1, singularVectors_Left, nChannelsC, nChannelsL, nChannelsL, nChannelsC ); /* CxC */ result = matrixTestIdentity( tmp2, nChannelsC ); + if ( result >= 1.0 ) + { + problematic = 1; + } #ifdef MORE_DEBUG matrixPrint( tmp2, nChannelsC, nChannelsC, "U\'*U" ); #endif @@ -483,6 +506,10 @@ static void svd_accuracy_test_fx( matrixTranspose( tmp1, singularVectors_Right, nChannelsC, nChannelsC ); /* CxC */ matrixProduct( tmp2, singularVectors_Right, tmp1, nChannelsC, nChannelsC, nChannelsC, nChannelsC ); /* CxC */ result = matrixTestIdentity( tmp2, nChannelsC ); + if ( result >= 1.0 ) + { + problematic = 1; + } #ifdef MORE_DEBUG matrixPrint( tmp2, nChannelsC, nChannelsC, "V*V\'" ); #endif @@ -493,10 +520,19 @@ static void svd_accuracy_test_fx( matrixTranspose( tmp3, singularVectors_Right, nChannelsC, nChannelsC ); /* CxC */ matrixProduct( tmp2, tmp1, tmp3, nChannelsL, dimSingular, nChannelsC, nChannelsC ); /* LxC */ result = matrixDifference( tmp2, InputMatrix, nChannelsL, nChannelsC ); + if ( result >= 1.0 ) + { + problematic = 1; + } #ifdef MORE_DEBUG matrixPrint( tmp2, nChannelsL, nChannelsC, "U*S*V\'" ); #endif printf( "U * S * V' difference to M is %f\n", result ); + + if ( problematic ) + { + matrixPrint( InputMatrix, nChannelsL, nChannelsC, "Problematic Input" ); + } } #endif @@ -608,7 +644,7 @@ Word16 svd_fx( WHILE( EQ_16( condition, 1 ) ); pop_wmops(); -#ifdef MORE_DEBUG +#ifdef DEBUG_SVD_PRECISION svd_accuracy_test_fx( InputMatrix, InputMatrix_e, @@ -1137,7 +1173,7 @@ static void HouseholderReduction_fx( FOR( iCh = 0; iCh < nChannelsC; iCh++ ) { singularVectors_Left_fx_e[jCh][iCh] = singularVectors_Left_e; - move32(); + move16(); } } #endif @@ -1164,34 +1200,14 @@ static void HouseholderReduction_fx( } } -#ifdef FIX_1010_OPT_SINGLE_RESCALE - // rescaling block - Word16 exp_max = 0; - move16(); - FOR( jCh = 0; jCh < nChannelsL; jCh++ ) - { - FOR( iCh = 0; iCh < nChannelsC; iCh++ ) - { - exp_max = s_max( exp_max, singularVectors_Left_fx_e[jCh][iCh] ); - } - } - - FOR( jCh = 0; jCh < nChannelsL; jCh++ ) - { - FOR( iCh = 0; iCh < nChannelsC; iCh++ ) - { - singularVectors_Left_fx[jCh][iCh] = L_shr_r( singularVectors_Left_fx[jCh][iCh], sub( exp_max, singularVectors_Left_fx_e[jCh][iCh] ) ); /* exp(exp_max) */ - move32(); - } - } - singularVectors_Left_e = exp_max; - move16(); -#endif - /* SingularVecotr Accumulation */ +#ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_e, *secDiag_fx_e, nChannelsC ); - singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_e, singularValues_fx_e, nChannelsL, nChannelsC ); +#else + singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, *secDiag_fx_e, nChannelsC ); + singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_fx_e, singularValues_fx_e, nChannelsL, nChannelsC ); +#endif return; } @@ -1348,7 +1364,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ #ifndef FIX_1010_OPT_SINGLE_RESCALE sing_exp[jCh] = sub( add( invVal_e, sub( *singularVectors_e, *sig_x_e ) ), temp_e ); move16(); - norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ #else singularVectors2_e[jCh][currChannel] = sub( add( invVal_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ), temp_e ); move16(); @@ -1383,8 +1399,8 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), sing_exp[currChannel], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], sing_exp[currChannel], -( *g ), 0, &sing_exp[currChannel] ); /* sing_exp */ #else - r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ - singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* sing_exp */ + r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ + singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* sing_exp */ #endif move32(); @@ -1415,7 +1431,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ f = BASOP_Util_Divide3232_Scale_cadence( norm_x, maxWithSign_fx( r ), &f_e ); /* f_e + (norm_x_e - r_e) */ f_e = add( f_e, sub( norm_x_e, r_e ) ); #else - f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */ + f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */ f_e = add( invVal_e, sub( norm_x_e, r_e ) ); #endif @@ -1575,7 +1591,7 @@ static void biDiagonalReductionRight_fx( #ifndef FIX_1010_OPT_SINGLE_RESCALE sing_exp[jCh] = add( sub( invVal_e, temp_e ), sub( *singularVectors_e, *sig_x_e ) ); move16(); - norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ #else singularVectors2_e[currChannel][jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], *sig_x_e ) ); move16(); @@ -1669,7 +1685,7 @@ static void biDiagonalReductionRight_fx( #ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], *singularVectors_e, Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &sing_exp2[iCh][jCh] ); /* exp(sing_exp2) */ #else - singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */ + singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */ #endif move32(); } @@ -1738,9 +1754,13 @@ static void biDiagonalReductionRight_fx( *-------------------------------------------------------------------------*/ static void singularVectorsAccumulationLeft_fx( - Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) as Input, Q31 as output */ + Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* input exp(singularVectors_Left_e), output Q31 */ Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ +#ifndef FIX_1010_OPT_SINGLE_RESCALE Word16 singularVectors_e, +#else + Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS], +#endif Word16 singularValues_e[MAX_OUTPUT_CHANNELS], const Word16 nChannelsL, /* Q0 */ const Word16 nChannelsC /* Q0 */ @@ -1750,11 +1770,13 @@ static void singularVectorsAccumulationLeft_fx( Word16 nChannels; Word32 norm_y, t_jj, t_ii; Word16 norm_y_e, t_jj_e, t_ii_e, temp_exp; +#ifndef FIX_1010_OPT_SINGLE_RESCALE Word16 sing_exp2[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS] = { 0 }; FOR( nCh = 0; nCh < MAX_OUTPUT_CHANNELS; nCh++ ) { set16_fx( sing_exp2[nCh], singularVectors_e, MAX_OUTPUT_CHANNELS ); } +#endif /* Processing */ nChannels = s_min( nChannelsL, nChannelsC ); /* min(nChannelsL,ChannelsC) Q0*/ @@ -1794,7 +1816,11 @@ static void singularVectorsAccumulationLeft_fx( move16(); FOR( k = nCh + 1; k < nChannelsL; k++ ) /* nChannelsL */ { +#ifndef FIX_1010_OPT_SINGLE_RESCALE norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( sing_exp2[k][nCh], sing_exp2[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */ +#else + norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( singularVectors_Left_e[k][nCh], singularVectors_Left_e[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */ +#endif } #ifdef FIX_1010_OPT_INV_USING_INVSQRT Word16 temp_e = norm_l( singularVectors_Left[nCh][nCh] ); @@ -1803,11 +1829,19 @@ static void singularVectorsAccumulationLeft_fx( t_jj_e = add( add( temp_exp, temp_e ), sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) ); #else t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e, +#ifndef FIX_1010_OPT_SINGLE_RESCALE t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) ); +#else + t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), singularVectors_Left_e[nCh][nCh] ) ); +#endif #endif FOR( k = nCh; k < nChannelsL; k++ ) /* nChannelsL */ { +#ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectors_Left[k][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[k][iCh], sing_exp2[k][iCh], Mpy_32_32( t_jj, singularVectors_Left[k][nCh] ), add( t_jj_e, sing_exp2[k][nCh] ), &sing_exp2[k][iCh] ); /* exp(sing_exp2) */ +#else + singularVectors_Left[k][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[k][iCh], singularVectors_Left_e[k][iCh], Mpy_32_32( t_jj, singularVectors_Left[k][nCh] ), add( t_jj_e, singularVectors_Left_e[k][nCh] ), &singularVectors_Left_e[k][iCh] ); /* exp(sing_exp2) */ +#endif move32(); } } @@ -1816,7 +1850,11 @@ static void singularVectorsAccumulationLeft_fx( { singularVectors_Left[iCh][nCh] = Mpy_32_32( singularVectors_Left[iCh][nCh], t_ii ); /* exp(sing_exp2 + t_ii_e) */ move32(); +#ifndef FIX_1010_OPT_SINGLE_RESCALE sing_exp2[iCh][nCh] = add( sing_exp2[iCh][nCh], t_ii_e ); +#else + singularVectors_Left_e[iCh][nCh] = add( singularVectors_Left_e[iCh][nCh], t_ii_e ); +#endif move16(); } } @@ -1828,8 +1866,11 @@ static void singularVectorsAccumulationLeft_fx( move32(); } } - +#ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectors_Left[nCh][nCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[nCh][nCh], sing_exp2[nCh][nCh], ONE_IN_Q30, 1, &sing_exp2[nCh][nCh] ); /* exp(sing_exp2) */ +#else + singularVectors_Left[nCh][nCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[nCh][nCh], singularVectors_Left_e[nCh][nCh], ONE_IN_Q30, 1, &singularVectors_Left_e[nCh][nCh] ); /* exp(sing_exp2) */ +#endif move32(); } // fclose(fp); @@ -1837,7 +1878,11 @@ static void singularVectorsAccumulationLeft_fx( { FOR( iCh = 0; iCh < nChannelsC; iCh++ ) { +#ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], sing_exp2[nCh][iCh] ); /* Q31 */ +#else + singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], singularVectors_Left_e[nCh][iCh] ); /* Q31 */ +#endif move32(); } } @@ -1852,10 +1897,14 @@ static void singularVectorsAccumulationLeft_fx( *-------------------------------------------------------------------------*/ static void singularVectorsAccumulationRight_fx( - Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ - Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ + Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_Left_e) */ + Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* input exp(singularVectors_Left_e), output Q31 */ Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */ +#ifndef FIX_1010_OPT_SINGLE_RESCALE Word16 singularVectors_e, +#else + Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS], +#endif Word16 secDiag_e, const Word16 nChannelsC /* Q0 */ ) @@ -1888,6 +1937,9 @@ static void singularVectorsAccumulationRight_fx( #else ratio_float = BASOP_Util_Divide3232_Scale_cadence( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ); /* exp(temp_exp1) */ singularVectors_Right[iCh][nCh] = BASOP_Util_Divide3232_Scale_cadence( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] ); /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */ +#endif +#ifdef FIX_1010_OPT_SINGLE_RESCALE + temp_exp1 = add( temp_exp1, sub( singularVectors_Left_e[nCh][iCh], singularVectors_Left_e[nCh][nCh + 1] ) ); #endif move32(); sing_right_exp[iCh][nCh] = add( sing_right_exp[iCh][nCh], sub( temp_exp1, secDiag_e ) ); @@ -1904,7 +1956,11 @@ static void singularVectorsAccumulationRight_fx( FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */ { +#ifndef FIX_1010_OPT_SINGLE_RESCALE norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[nCh][k], singularVectors_Right[k][iCh] ), add( singularVectors_e, sing_right_exp[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */ +#else + norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[nCh][k], singularVectors_Right[k][iCh] ), add( singularVectors_Left_e[nCh][k], sing_right_exp[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */ +#endif } FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */ -- GitLab From 34dd3d7664b2fc34cbf5799f672d63f850931fed Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Thu, 9 Jan 2025 09:44:05 +0100 Subject: [PATCH 29/41] Clang fix format (trailing comment alignment behaves different in my local clang-format version). --- lib_dec/ivas_svd_dec.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index e280abb42..bcd3670f3 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -1364,7 +1364,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ #ifndef FIX_1010_OPT_SINGLE_RESCALE sing_exp[jCh] = sub( add( invVal_e, sub( *singularVectors_e, *sig_x_e ) ), temp_e ); move16(); - norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ #else singularVectors2_e[jCh][currChannel] = sub( add( invVal_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ), temp_e ); move16(); @@ -1399,8 +1399,8 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), sing_exp[currChannel], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], sing_exp[currChannel], -( *g ), 0, &sing_exp[currChannel] ); /* sing_exp */ #else - r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ - singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* sing_exp */ + r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ + singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* sing_exp */ #endif move32(); @@ -1431,7 +1431,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ f = BASOP_Util_Divide3232_Scale_cadence( norm_x, maxWithSign_fx( r ), &f_e ); /* f_e + (norm_x_e - r_e) */ f_e = add( f_e, sub( norm_x_e, r_e ) ); #else - f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */ + f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */ f_e = add( invVal_e, sub( norm_x_e, r_e ) ); #endif @@ -1591,7 +1591,7 @@ static void biDiagonalReductionRight_fx( #ifndef FIX_1010_OPT_SINGLE_RESCALE sing_exp[jCh] = add( sub( invVal_e, temp_e ), sub( *singularVectors_e, *sig_x_e ) ); move16(); - norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ #else singularVectors2_e[currChannel][jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], *sig_x_e ) ); move16(); @@ -1685,7 +1685,7 @@ static void biDiagonalReductionRight_fx( #ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], *singularVectors_e, Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &sing_exp2[iCh][jCh] ); /* exp(sing_exp2) */ #else - singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */ + singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */ #endif move32(); } @@ -1828,7 +1828,7 @@ static void singularVectorsAccumulationLeft_fx( t_jj = Mpy_32_32( Mpy_32_32( t_ii, norm_y ), t_jj ); t_jj_e = add( add( temp_exp, temp_e ), sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) ); #else - t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e, + t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e, #ifndef FIX_1010_OPT_SINGLE_RESCALE t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) ); #else @@ -1881,7 +1881,7 @@ static void singularVectorsAccumulationLeft_fx( #ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], sing_exp2[nCh][iCh] ); /* Q31 */ #else - singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], singularVectors_Left_e[nCh][iCh] ); /* Q31 */ + singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], singularVectors_Left_e[nCh][iCh] ); /* Q31 */ #endif move32(); } @@ -1935,8 +1935,8 @@ static void singularVectorsAccumulationRight_fx( ratio_float = L_deposit_h( BASOP_Util_Divide3232_Scale( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ) ); /* exp(temp_exp1) */ singularVectors_Right[iCh][nCh] = L_deposit_h( BASOP_Util_Divide3232_Scale( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] ) ); /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */ #else - ratio_float = BASOP_Util_Divide3232_Scale_cadence( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ); /* exp(temp_exp1) */ - singularVectors_Right[iCh][nCh] = BASOP_Util_Divide3232_Scale_cadence( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] ); /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */ + ratio_float = BASOP_Util_Divide3232_Scale_cadence( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ); /* exp(temp_exp1) */ + singularVectors_Right[iCh][nCh] = BASOP_Util_Divide3232_Scale_cadence( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] ); /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */ #endif #ifdef FIX_1010_OPT_SINGLE_RESCALE temp_exp1 = add( temp_exp1, sub( singularVectors_Left_e[nCh][iCh], singularVectors_Left_e[nCh][nCh + 1] ) ); -- GitLab From f4471291a58f8694105cf3dd111376d3f7bffc5a Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Thu, 9 Jan 2025 09:58:45 +0100 Subject: [PATCH 30/41] Deactivate debug code. --- lib_dec/ivas_svd_dec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index bcd3670f3..72caa9b1b 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -292,7 +292,7 @@ void svdMat2mat_fx( } #ifndef DEBUG_SVD_TEST -#define DEBUG_SVD_PRECISION +// #define DEBUG_SVD_PRECISION #endif // #define MORE_DEBUG -- GitLab From 80f7175e36abdf2564fe6f919360c8350a698d7a Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Thu, 9 Jan 2025 14:58:23 +0100 Subject: [PATCH 31/41] Fix compile error for FIX_1010_OPT_INV_USING_INVSQRT, but keep it disabled because of regressions. Reduce threshold for SVD problem debug code (disabled by default). --- lib_dec/ivas_svd_dec.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index 72caa9b1b..8da6e2f4c 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -441,6 +441,7 @@ static float matrixTestIdentity( return r; } +#define PROBLEMATIC_THRESHOLD 0.5f static void svd_accuracy_test_fx( Word32 InputMatrixFx[][MAX_OUTPUT_CHANNELS], /* i : matrix to be decomposed (M) InputMatrix_e*/ Word16 InputMatrixFx_e, @@ -493,7 +494,7 @@ static void svd_accuracy_test_fx( matrixTranspose( tmp1, singularVectors_Left, nChannelsL, nChannelsC ); /* CxL */ matrixProduct( tmp2, tmp1, singularVectors_Left, nChannelsC, nChannelsL, nChannelsL, nChannelsC ); /* CxC */ result = matrixTestIdentity( tmp2, nChannelsC ); - if ( result >= 1.0 ) + if ( result >= PROBLEMATIC_THRESHOLD ) { problematic = 1; } @@ -506,7 +507,7 @@ static void svd_accuracy_test_fx( matrixTranspose( tmp1, singularVectors_Right, nChannelsC, nChannelsC ); /* CxC */ matrixProduct( tmp2, singularVectors_Right, tmp1, nChannelsC, nChannelsC, nChannelsC, nChannelsC ); /* CxC */ result = matrixTestIdentity( tmp2, nChannelsC ); - if ( result >= 1.0 ) + if ( result >= PROBLEMATIC_THRESHOLD ) { problematic = 1; } @@ -520,7 +521,7 @@ static void svd_accuracy_test_fx( matrixTranspose( tmp3, singularVectors_Right, nChannelsC, nChannelsC ); /* CxC */ matrixProduct( tmp2, tmp1, tmp3, nChannelsL, dimSingular, nChannelsC, nChannelsC ); /* LxC */ result = matrixDifference( tmp2, InputMatrix, nChannelsL, nChannelsC ); - if ( result >= 1.0 ) + if ( result >= PROBLEMATIC_THRESHOLD ) { problematic = 1; } @@ -1826,7 +1827,11 @@ static void singularVectorsAccumulationLeft_fx( Word16 temp_e = norm_l( singularVectors_Left[nCh][nCh] ); t_jj = BASOP_Util_Inv32( maxWithSign_fx( L_shl( singularVectors_Left[nCh][nCh], temp_e ) ), &temp_exp ); t_jj = Mpy_32_32( Mpy_32_32( t_ii, norm_y ), t_jj ); +#ifndef FIX_1010_OPT_SINGLE_RESCALE t_jj_e = add( add( temp_exp, temp_e ), sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) ); +#else + t_jj_e = add( add( temp_exp, temp_e ), sub( add( t_ii_e, norm_y_e ), singularVectors_Left_e[nCh][nCh] ) ); +#endif #else t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e, #ifndef FIX_1010_OPT_SINGLE_RESCALE -- GitLab From 5b946e69d566e713b62b1448083812b85ac9bacc Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Thu, 9 Jan 2025 15:14:05 +0530 Subject: [PATCH 32/41] Bug fix in StableHighPitchDetect_ivas_fx, LTV crash fixes for MASA in original and -10dB scaled inputs --- lib_enc/ivas_stereo_icbwe_enc.c | 98 +++++++++++++++------------------ lib_enc/pitch_ol2.c | 2 +- 2 files changed, 44 insertions(+), 56 deletions(-) diff --git a/lib_enc/ivas_stereo_icbwe_enc.c b/lib_enc/ivas_stereo_icbwe_enc.c index 90d6da69e..aed080b12 100644 --- a/lib_enc/ivas_stereo_icbwe_enc.c +++ b/lib_enc/ivas_stereo_icbwe_enc.c @@ -115,11 +115,12 @@ static Word16 ic_bwe_enc_specMapping_ivas_fx( Word16 Txx1_fx = 0, Txx2_fx = 0, Txx3_fx = 0, T_desired_fx = 0; Word16 Txx1_e = 0, Txx2_e = 0, Txx3_e = 0, T_desired_e = 0; Word16 T_nonref_target_fx, temp_fx; + Word32 temp00_fx, temp11_fx; + Word16 temp00_exp, temp11_exp; Word32 temp0_fx, temp1_fx, temp2_fx, temp3_fx; Word16 a_fx, b_fx, c_fx, a_e, b_e, c_e; Word16 u_fx, u1_fx, u2_fx, u_e = 0, u1_e, u2_e; Word16 temp0_exp, temp1_exp, temp2_exp, temp3_exp, exp, T_nonref_target_e; - Word16 exp_buf[6]; move16(); move16(); @@ -133,35 +134,8 @@ static Word16 ic_bwe_enc_specMapping_ivas_fx( /* Calculate rxx(1)/rxx(0) of the non ref target */ - temp0_fx = dotp_fixed_ivas_fx( shb_frame_target_fx, shb_frame_target_e, shb_frame_target_fx, shb_frame_target_e, L_FRAME16k - 1, &temp0_exp ); /* Q31-temp0_exp */ - temp1_fx = dotp_fixed_ivas_fx( shb_frame_target_fx, shb_frame_target_e, shb_frame_target_fx + 1, shb_frame_target_e, L_FRAME16k - 1, &temp1_exp ); /* Q31-temp1_exp */ - - /* Smoothing */ - temp0_fx = L_shr( temp0_fx, 1 ); - temp1_fx = L_shr( temp1_fx, 1 ); - - - memShbSpecXcorr_fx[0] = temp0_fx; // tem0_exp - memShbSpecXcorr_fx[1] = L_shr( temp1_fx, temp0_exp - temp1_exp ); // temp0_exp - exp = sub( temp0_exp, temp1_exp ); - exp_buf[0] = exp; - exp_buf[1] = exp; - move32(); - move32(); - move16(); - move16(); - - IF( temp0_fx != 0 ) - { - T_nonref_target_fx = BASOP_Util_Divide3232_Scale( temp1_fx, temp0_fx, &T_nonref_target_e ); // exp - } - ELSE - { - T_nonref_target_fx = 0; - T_nonref_target_e = 31; - move32(); - move16(); - } + temp00_fx = dotp_fixed_ivas_fx( shb_frame_target_fx, shb_frame_target_e, shb_frame_target_fx, shb_frame_target_e, L_FRAME16k - 1, &temp00_exp ); /* Q31-temp0_exp */ + temp11_fx = dotp_fixed_ivas_fx( shb_frame_target_fx, shb_frame_target_e, shb_frame_target_fx + 1, shb_frame_target_e, L_FRAME16k - 1, &temp11_exp ); /* Q31-temp1_exp */ /* Calculate rxx(1)/rxx(0) of the non ref synth */ temp0_fx = dotp_fixed_ivas_fx( shb_synth_nonref_fx, shb_synth_nonref_e, shb_synth_nonref_fx, shb_synth_nonref_e, L_FRAME16k - 3, &temp0_exp ); /* Q31-temp0_exp */ @@ -169,41 +143,55 @@ static Word16 ic_bwe_enc_specMapping_ivas_fx( temp2_fx = dotp_fixed_ivas_fx( shb_synth_nonref_fx, shb_synth_nonref_e, shb_synth_nonref_fx + 2, shb_synth_nonref_e, L_FRAME16k - 3, &temp2_exp ); /* Q31-temp2_exp */ temp3_fx = dotp_fixed_ivas_fx( shb_synth_nonref_fx, shb_synth_nonref_e, shb_synth_nonref_fx + 3, shb_synth_nonref_e, L_FRAME16k - 3, &temp3_exp ); /* Q31-temp3_exp */ + exp = s_max( *memShbSpecXcorr_e, s_max( s_max( s_max( temp00_exp, temp11_exp ), s_max( temp0_exp, temp1_exp ) ), s_max( temp2_exp, temp3_exp ) ) ); + + temp00_fx = L_shr( temp00_fx, sub( exp, temp00_exp ) ); + temp11_fx = L_shr( temp11_fx, sub( exp, temp11_exp ) ); + temp0_fx = L_shr( temp0_fx, sub( exp, temp0_exp ) ); + temp1_fx = L_shr( temp1_fx, sub( exp, temp1_exp ) ); + temp2_fx = L_shr( temp2_fx, sub( exp, temp2_exp ) ); + temp3_fx = L_shr( temp3_fx, sub( exp, temp3_exp ) ); + /* Smoothing */ - temp0_fx = L_shr( temp0_fx, 1 ); - temp1_fx = L_shr( temp1_fx, 1 ); - temp2_fx = L_shr( temp2_fx, 1 ); - temp3_fx = L_shr( temp3_fx, 1 ); - - exp_buf[2] = temp0_exp; - exp_buf[3] = temp1_exp; - exp_buf[4] = temp2_exp; - exp_buf[5] = temp3_exp; - exp = exp_buf[0]; - move16(); - move16(); - move16(); - move16(); - move16(); + FOR( Word16 i = 0; i < 6; i++ ) { - IF( LT_16( exp, exp_buf[i] ) ) - { - exp = exp_buf[i]; - move16(); - } + memShbSpecXcorr_fx[i] = L_shl( memShbSpecXcorr_fx[i], sub( *memShbSpecXcorr_e, exp ) ); + move32(); } - memShbSpecXcorr_fx[2] = L_shr( temp0_fx, sub( exp, temp0_exp ) ); /* Q31-exp */ - memShbSpecXcorr_fx[3] = L_shr( temp1_fx, sub( exp, temp1_exp ) ); /* Q31-exp */ - memShbSpecXcorr_fx[4] = L_shr( temp2_fx, sub( exp, temp2_exp ) ); /* Q31-exp */ - memShbSpecXcorr_fx[5] = L_shr( temp3_fx, sub( exp, temp3_exp ) ); /* Q31-exp */ *memShbSpecXcorr_e = exp; + move16(); + + temp00_fx = L_add( L_shr( temp00_fx, 1 ), L_shr( memShbSpecXcorr_fx[0], 1 ) ); + temp11_fx = L_add( L_shr( temp11_fx, 1 ), L_shr( memShbSpecXcorr_fx[1], 1 ) ); + temp0_fx = L_add( L_shr( temp0_fx, 1 ), L_shr( memShbSpecXcorr_fx[2], 1 ) ); + temp1_fx = L_add( L_shr( temp1_fx, 1 ), L_shr( memShbSpecXcorr_fx[3], 1 ) ); + temp2_fx = L_add( L_shr( temp2_fx, 1 ), L_shr( memShbSpecXcorr_fx[4], 1 ) ); + temp3_fx = L_add( L_shr( temp3_fx, 1 ), L_shr( memShbSpecXcorr_fx[5], 1 ) ); + T_nonref_target_fx = 0; move32(); + T_nonref_target_e = 31; + move16(); + + IF( temp00_fx != 0 ) + { + T_nonref_target_fx = BASOP_Util_Divide3232_Scale( temp11_fx, temp00_fx, &T_nonref_target_e ); + } + + + memShbSpecXcorr_fx[0] = temp00_fx; move32(); + memShbSpecXcorr_fx[1] = temp11_fx; move32(); + memShbSpecXcorr_fx[2] = temp0_fx; /* Q31-exp */ + move32(); + memShbSpecXcorr_fx[3] = temp1_fx; /* Q31-exp */ + move32(); + memShbSpecXcorr_fx[4] = temp2_fx; /* Q31-exp */ + move32(); + memShbSpecXcorr_fx[5] = temp3_fx; /* Q31-exp */ move32(); - move16(); IF( temp0_fx != 0 ) diff --git a/lib_enc/pitch_ol2.c b/lib_enc/pitch_ol2.c index 4e4277eef..f3087f1b9 100644 --- a/lib_enc/pitch_ol2.c +++ b/lib_enc/pitch_ol2.c @@ -285,7 +285,7 @@ void StableHighPitchDetect_ivas_fx( *flag_spitch = 0; move16(); IF( ( EQ_16( localVAD, 1 ) ) && ( EQ_16( *predecision_flag, 1 ) ) && - ( GT_16( *voicing0_sm, 16384 ) ) && ( GT_16( *voicing0_sm, mult_r( *voicing_sm, 21299 ) ) ) ) + ( GT_16( *voicing0_sm, 21299 ) ) && ( GT_16( *voicing0_sm, mult_r( *voicing_sm, 22938 ) ) ) ) { *flag_spitch = 1; move16(); -- GitLab From 0d35d20f17da5b332bf1cf8ed2f8484b2548492c Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Thu, 9 Jan 2025 15:20:03 +0530 Subject: [PATCH 33/41] Fix for 3GPP issue 1148: crash at 32kbps [x] link #1148 [x] When an unvoiced frame is coded at 32 kbps, bwe_exc_fx is not defined and hence should not be updated. Check added for update to happen. --- lib_com/options.h | 1 + lib_enc/enc_uv_fx.c | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/lib_com/options.h b/lib_com/options.h index 5aa205a23..343515281 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -107,4 +107,5 @@ #define FIX_ISSUE_1122 /* Ittiam: Fix issue 1122: corrected incorrect scaling of a buffer leading to incorrect metadata bits */ #define FIX_1132_STACK_CORRUPTION /* Stack corruption issue due of extending index access*/ #define FIX_ISSUE_1092 /* Ittiam: Fix for Issue 1092: BASOP asserts in stereo fx encoder for selection test inputs*/ +#define FIX_ISSUE_1148 #endif diff --git a/lib_enc/enc_uv_fx.c b/lib_enc/enc_uv_fx.c index d2d8c9570..cd147c6ce 100644 --- a/lib_enc/enc_uv_fx.c +++ b/lib_enc/enc_uv_fx.c @@ -480,7 +480,14 @@ void encod_unvoiced_ivas_fx( voice_factors_fx[i_subfr / L_SUBFR] = 0; move16(); +#ifdef FIX_ISSUE_1148 + if ( st_fx->hBWE_TD != NULL ) + { + interp_code_5over2_fx( &exc_fx[i_subfr], &bwe_exc_fx[i_subfr * HIBND_ACB_L_FAC], L_SUBFR ); + } +#else interp_code_5over2_fx( &exc_fx[i_subfr], &bwe_exc_fx[i_subfr * HIBND_ACB_L_FAC], L_SUBFR ); +#endif /*-----------------------------------------------------------------* * Synthesize speech to update mem_syn[]. -- GitLab From cf45fa0fac7bb2516690511c940af9064af9c88f Mon Sep 17 00:00:00 2001 From: Jan Kiene Date: Thu, 9 Jan 2025 08:54:36 +0100 Subject: [PATCH 34/41] get ref complexity numbers from ivas-float-update branch --- .gitlab-ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index c2ca28465..e1fde534c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1215,8 +1215,8 @@ voip-be-on-merge-request: - rm artifacts.zip - rm -rf $public_dir - ### 1.5.part: get the corresponding measurement from ivas-float-update-cmplx - - job_id=$(python3 ci/get_id_of_last_job_occurence.py ivas-float-update-cmplx $CI_JOB_NAME $CI_PROJECT_ID) + ### 1.5.part: get the corresponding measurement from ivas-float-update + - job_id=$(python3 ci/get_id_of_last_job_occurence.py ivas-float-update $CI_JOB_NAME $CI_PROJECT_ID) - echo $job_id - curl --request GET "https://forge.3gpp.org/rep/api/v4/projects/$CI_PROJECT_ID/jobs/$job_id/artifacts" --output artifacts_ref.zip - unzip -j artifacts_ref.zip "*latest_WMOPS.csv" -- GitLab From 0f364fcbb5045165f79400e4eb8dda8e17904b42 Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Wed, 15 Jan 2025 12:31:29 +0100 Subject: [PATCH 35/41] Add FIX_1010_OPT_NORM_NOSAT (do not saturate intermediate results) and FIX_1010_OPT_SEC_SINGLE_RESCALE (do not rescale secDiag data repeatedly). Improves accuracy and reduces workload but makes dependency on dynamic scaling bigger. --- lib_dec/ivas_svd_dec.c | 174 ++++++++++++++++++++++++++++++++--------- 1 file changed, 135 insertions(+), 39 deletions(-) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index 8da6e2f4c..647b203af 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -58,6 +58,8 @@ #define FIX_1010_OPT_GIVENS #define FIX_1010_OPT_GIVENS_INV // #define FIX_1010_OPT_GIVENS_AMAX_BMIN +#define FIX_1010_OPT_NORM_NOSAT +#define FIX_1010_OPT_SEC_SINGLE_RESCALE #endif /*-----------------------------------------------------------------------* @@ -135,7 +137,11 @@ static void singularVectorsAccumulationRight_fx( #else Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS], #endif +#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE Word16 secDiag_e, +#else + Word16 *secDiag_e, +#endif const Word16 nChannelsC /* Q0 */ ); @@ -560,7 +566,11 @@ Word16 svd_fx( Word16 errorMessage, condition; // int16_t max_length = ((nChannelsL > nChannelsC) ? nChannelsL : nChannelsC); Word32 secDiag_fx[MAX_OUTPUT_CHANNELS]; +#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE Word16 secDiag_fx_e = 0; +#else + Word16 secDiag_fx_e[MAX_OUTPUT_CHANNELS]; +#endif move16(); Word32 eps_x_fx = 0, temp_fx; move16(); @@ -569,7 +579,10 @@ Word16 svd_fx( Word16 temp_fx_e; push_wmops( "svd_fx" ); +#if 1 set32_fx( secDiag_fx, 0, MAX_OUTPUT_CHANNELS ); + set16_fx( secDiag_fx_e, 0, MAX_OUTPUT_CHANNELS ); +#endif /* Collecting Values */ FOR( iCh = 0; iCh < nChannelsL; iCh++ ) @@ -584,16 +597,22 @@ Word16 svd_fx( set16_fx( singularValues_fx_e, 0, MAX_OUTPUT_CHANNELS ); /* Householder reduction */ +#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE HouseholderReduction_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, InputMatrix_e, singularValues_fx_e, &secDiag_fx_e, nChannelsL, nChannelsC, &eps_x_fx, &eps_x_fx_e ); - +#else + HouseholderReduction_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, InputMatrix_e, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, &eps_x_fx, &eps_x_fx_e ); +#endif /* Set extremely small values to zero if needed */ // flushToZeroArray(singularValues, max_length); // flushToZeroMat(singularVectors_Left, nChannelsL, nChannelsL); // flushToZeroMat(singularVectors_Right, nChannelsC, nChannelsC); /* BidagonalDiagonalisation */ +#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE errorMessage = BidagonalDiagonalisation_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, singularValues_fx_e, &secDiag_fx_e, nChannelsL, nChannelsC, eps_x_fx, eps_x_fx_e ); /* Q0 */ - +#else + errorMessage = BidagonalDiagonalisation_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, eps_x_fx, eps_x_fx_e ); /* Q0 */ +#endif /* Sort the singular values descending order */ lengthSingularValues = s_min( nChannelsL, nChannelsC ); /* Q0 */ @@ -676,11 +695,15 @@ static Word16 BidagonalDiagonalisation_fx( Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V) singularValues_fx_e*/ Word32 secDiag_fx[MAX_OUTPUT_CHANNELS], /* i/o: secDiag_fx_e*/ Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS], /* i/o: singular values vector (S) */ - Word16 *secDiag_fx_e, /* i/o: */ - const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed Q0*/ - const Word16 nChannelsC, /* i : number of columns in the matrix to be decomposed Q0*/ - const Word32 eps_x, /* i : eps_x_e*/ - const Word16 eps_x_e /* i : */ +#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE + Word16 *secDiag_fx_e, /* i/o: */ +#else + Word16 *secDiag_new_e, /* i/o: */ +#endif + const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed Q0*/ + const Word16 nChannelsC, /* i : number of columns in the matrix to be decomposed Q0*/ + const Word32 eps_x, /* i : eps_x_e*/ + const Word16 eps_x_e /* i : */ ) { Word16 kCh, nCh, iCh, jCh, split; @@ -690,6 +713,9 @@ static Word16 BidagonalDiagonalisation_fx( move16(); move16(); Word16 temp_exp; +#ifdef FIX_1010_OPT_NORM_NOSAT + Word16 temp_exp2; +#endif Word32 g = 0; move16(); Word16 g_e = 0; @@ -700,9 +726,12 @@ static Word16 BidagonalDiagonalisation_fx( #ifdef FIX_1010_OPT_GIVENS_INV Word32 temp; #endif - Word16 singularValues_new_e[MAX_OUTPUT_CHANNELS], secDiag_new_e[MAX_OUTPUT_CHANNELS]; - Copy( singularValues_fx_e, singularValues_new_e, MAX_OUTPUT_CHANNELS ); + Word16 singularValues_new_e[MAX_OUTPUT_CHANNELS]; +#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE + Word16 secDiag_new_e[MAX_OUTPUT_CHANNELS]; set16_fx( secDiag_new_e, *secDiag_fx_e, MAX_OUTPUT_CHANNELS ); +#endif + Copy( singularValues_fx_e, singularValues_new_e, MAX_OUTPUT_CHANNELS ); FOR( iCh = nChannelsC - 1; iCh >= 0; iCh-- ) /* nChannelsC */ { @@ -779,12 +808,18 @@ static Word16 BidagonalDiagonalisation_fx( c = BASOP_Util_Divide3232_Scale_cadence( c, maxWithSign_fx( singularValues_fx[kCh] ), &temp_exp ); /* exp(temp_exp + (c_e - singularValues_new_e)) */ c_e = add( temp_exp, sub( c_e, singularValues_new_e[kCh] ) ); #endif +#ifndef FIX_1010_OPT_NORM_NOSAT IF( c_e > 0 ) { c = L_shl_sat( c, c_e ); // Q31 c_e = 0; move16(); } +#else + temp_exp2 = norm_l( c ); + c = L_shl( c, temp_exp2 ); + c_e = sub( c_e, temp_exp2 ); +#endif #ifdef FIX_1010_OPT_GIVENS_INV s = Mpy_32_32( -g, temp ); s_e = add( g_e, temp_exp ); @@ -792,13 +827,18 @@ static Word16 BidagonalDiagonalisation_fx( s = BASOP_Util_Divide3232_Scale_cadence( -g, maxWithSign_fx( singularValues_fx[kCh] ), &temp_exp ); /* exp(temp_exp + (g_e - singularValues_new_e))*/ s_e = add( temp_exp, sub( g_e, singularValues_new_e[kCh] ) ); #endif +#ifndef FIX_1010_OPT_NORM_NOSAT IF( s_e > 0 ) { s = L_shl_sat( s, s_e ); // Q31 s_e = 0; move16(); } - +#else + temp_exp2 = norm_l( s ); + s = L_shl( s, temp_exp2 ); + s_e = sub( s_e, temp_exp2 ); +#endif ApplyRotation_fx( singularVectors_Left_fx, c, c_e, s, s_e, 0, x11_e, 0, x12_e, &f1, &f1_e, &f2, &f2_e, kCh, split, nChannelsL ); /* nChannelsL */ } } @@ -849,6 +889,7 @@ static Word16 BidagonalDiagonalisation_fx( // rescaling block Copy( singularValues_new_e, singularValues_fx_e, MAX_OUTPUT_CHANNELS ); +#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE Word16 max_exp = -31; move16(); FOR( iCh = 0; iCh < nChannelsC; iCh++ ) @@ -865,6 +906,7 @@ static Word16 BidagonalDiagonalisation_fx( secDiag_fx[iCh] = L_shr_r( secDiag_fx[iCh], sub( *secDiag_fx_e, secDiag_new_e[iCh] ) ); /* exp(secDiag_fx_e) */ move32(); } +#endif return ( error ); } @@ -891,6 +933,9 @@ static void ApplyQRTransform_fx( #ifdef FIX_1010_OPT_GIVENS_INV Word32 temp; Word16 temp_e; +#endif +#ifdef FIX_1010_OPT_NORM_NOSAT + Word16 temp_norm_e; #endif Word16 ch, split; Word32 d = 0, g = 0, r = 0, x_ii = 0, x_split = 0, x_kk = 0, mu = 0, aux = 0; @@ -1004,12 +1049,18 @@ static void ApplyQRTransform_fx( c = BASOP_Util_Divide3232_Scale_cadence( d, maxWithSign_fx( secDiag[ch] ), &c_e ); /* exp(c_e + (d_e + secDiag_e)) */ c_e = add( c_e, sub( d_e, secDiag_e[ch] ) ); #endif +#ifndef FIX_1010_OPT_NORM_NOSAT IF( c_e > 0 ) { c = L_shl_sat( c, c_e ); // Q31 c_e = 0; move16(); } +#else + temp_norm_e = norm_l( c ); + c = L_shl( c, temp_norm_e ); + c_e = sub( c_e, temp_norm_e ); +#endif #ifdef FIX_1010_OPT_GIVENS_INV s = Mpy_32_32( r, temp ); s_e = add( r_e, temp_e ); @@ -1017,13 +1068,18 @@ static void ApplyQRTransform_fx( s = BASOP_Util_Divide3232_Scale_cadence( r, maxWithSign_fx( secDiag[ch] ), &s_e ); /* exp(s_e + (r_e - sec_Diag_e))*/ s_e = add( s_e, sub( r_e, secDiag_e[ch] ) ); #endif +#ifndef FIX_1010_OPT_NORM_NOSAT IF( s_e > 0 ) { s = L_shl_sat( s, s_e ); // Q31 s_e = 0; move16(); } - +#else + temp_norm_e = norm_l( s ); + s = L_shl( s, temp_norm_e ); + s_e = sub( s_e, temp_norm_e ); +#endif r = Mpy_32_32( s, singularValues[ch + 1] ); /* exp(r_e + secDiag_e) */ r_e = add( s_e, singularValues_e[ch + 1] ); x_split = Mpy_32_32( c, singularValues[ch + 1] ); /* exp(c_e + secDiag_e) */ @@ -1052,21 +1108,33 @@ static void ApplyQRTransform_fx( c = Mpy_32_32( d, aux ); /* exp(d_e + aux_e) */ c_e = add( d_e, aux_e ); +#ifndef FIX_1010_OPT_NORM_NOSAT IF( c_e > 0 ) { c = L_shl_sat( c, c_e ); // Q31 c_e = 0; move16(); } +#else + temp_norm_e = norm_l( c ); + c = L_shl( c, temp_norm_e ); + c_e = sub( c_e, temp_norm_e ); +#endif s = Mpy_32_32( r, aux ); /* exp(r_e + aux_e) */ s_e = add( r_e, aux_e ); +#ifndef FIX_1010_OPT_NORM_NOSAT IF( s_e > 0 ) { s = L_shl_sat( s, s_e ); // Q31 s_e = 0; move16(); } +#else + temp_norm_e = norm_l( s ); + s = L_shl( s, temp_norm_e ); + s_e = sub( s_e, temp_norm_e ); +#endif } // ApplyRotation(singularVectors_Left, c, s, g, x_split, &d, &x_ii, ch + 1, ch, nChannelsL); @@ -1191,7 +1259,11 @@ static void HouseholderReduction_fx( #endif Word16 L_temp_e; +#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE Word32 L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), *secDiag_fx_e, &L_temp_e ); /* exp(L_temp_e) */ +#else + Word32 L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), secDiag_fx_e[nCh], &L_temp_e ); /* exp(L_temp_e) */ +#endif IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( L_temp, L_temp_e, *eps_x_fx, *eps_x_fx_e ), 1 ) ) { *eps_x_fx = L_temp; /* exp(L_temp_e) */ @@ -1206,7 +1278,11 @@ static void HouseholderReduction_fx( singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_e, *secDiag_fx_e, nChannelsC ); singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_e, singularValues_fx_e, nChannelsL, nChannelsC ); #else +#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, *secDiag_fx_e, nChannelsC ); +#else + singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsC ); +#endif singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_fx_e, singularValues_fx_e, nChannelsL, nChannelsC ); #endif @@ -1289,6 +1365,7 @@ static void biDiagonalReductionLeft_fx( secDiag[currChannel] = Mpy_32_32( *sig_x, *g ); /* exp(sig_x_e) */ move32(); +#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE // rescaling block IF( GT_16( *sig_x_e, *secDiag_e ) ) { @@ -1306,7 +1383,10 @@ ELSE IF( LT_16( *sig_x_e, *secDiag_e ) ) secDiag[currChannel] = L_shr_r( secDiag[currChannel], sub( *secDiag_e, *sig_x_e ) ); /* exp(secDiag_e) */ move32(); } - +#else + secDiag_e[currChannel] = *sig_x_e; + move16(); +#endif /* Setting values to 0 */ ( *sig_x ) = 0; move32(); @@ -1323,7 +1403,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ #ifndef FIX_1010_OPT_SINGLE_RESCALE ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[jCh][currChannel] ), *singularVectors_e, sig_x_e ); /* exp(sig_x_e) */ #else - ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[jCh][currChannel] ), singularVectors2_e[jCh][currChannel], sig_x_e ); /* exp(sig_x_e) */ + ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[jCh][currChannel] ), singularVectors2_e[jCh][currChannel], sig_x_e ); /* exp(sig_x_e) */ #endif } @@ -1359,17 +1439,17 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( singularVectors2_e[jCh][currChannel], 1 ), &norm_x_e ); /* exp(norm_x_e) */ #endif #else - Word16 temp_e = norm_l( singularVectors[jCh][currChannel] ); - singularVectors[jCh][currChannel] = Mpy_32_32( L_shl( singularVectors[jCh][currChannel], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ - move32(); + Word16 temp_e = norm_l( singularVectors[jCh][currChannel] ); + singularVectors[jCh][currChannel] = Mpy_32_32( L_shl( singularVectors[jCh][currChannel], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ + move32(); #ifndef FIX_1010_OPT_SINGLE_RESCALE - sing_exp[jCh] = sub( add( invVal_e, sub( *singularVectors_e, *sig_x_e ) ), temp_e ); - move16(); - norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ + sing_exp[jCh] = sub( add( invVal_e, sub( *singularVectors_e, *sig_x_e ) ), temp_e ); + move16(); + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ #else - singularVectors2_e[jCh][currChannel] = sub( add( invVal_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ), temp_e ); - move16(); - norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( singularVectors2_e[jCh][currChannel], 1 ), &norm_x_e ); /* exp(norm_x_e) */ + singularVectors2_e[jCh][currChannel] = sub( add( invVal_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ), temp_e ); + move16(); + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( singularVectors2_e[jCh][currChannel], 1 ), &norm_x_e ); /* exp(norm_x_e) */ #endif #endif } @@ -1400,8 +1480,8 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), sing_exp[currChannel], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], sing_exp[currChannel], -( *g ), 0, &sing_exp[currChannel] ); /* sing_exp */ #else - r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ - singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* sing_exp */ + r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ + singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* sing_exp */ #endif move32(); @@ -1424,7 +1504,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ #ifndef FIX_1010_OPT_SINGLE_RESCALE norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( sing_exp[jCh], *singularVectors_e ), &norm_x_e ); /* exp(norm_x_e) */ #else - norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( singularVectors2_e[jCh][currChannel], singularVectors2_e[jCh][iCh] ), &norm_x_e ); /* exp(norm_x_e) */ + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( singularVectors2_e[jCh][currChannel], singularVectors2_e[jCh][iCh] ), &norm_x_e ); /* exp(norm_x_e) */ #endif } @@ -1432,8 +1512,8 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ f = BASOP_Util_Divide3232_Scale_cadence( norm_x, maxWithSign_fx( r ), &f_e ); /* f_e + (norm_x_e - r_e) */ f_e = add( f_e, sub( norm_x_e, r_e ) ); #else - f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */ - f_e = add( invVal_e, sub( norm_x_e, r_e ) ); + f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */ + f_e = add( invVal_e, sub( norm_x_e, r_e ) ); #endif FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ @@ -1441,7 +1521,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ #ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], *singularVectors_e, Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, sing_exp[jCh] ), &sing_exp2[jCh][iCh] ); /* exp( sing_exp2) */ #else - singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], singularVectors2_e[jCh][iCh], Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, singularVectors2_e[jCh][currChannel] ), &singularVectors2_e[jCh][iCh] ); + singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], singularVectors2_e[jCh][iCh], Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, singularVectors2_e[jCh][currChannel] ), &singularVectors2_e[jCh][iCh] ); #endif move32(); } @@ -1455,7 +1535,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ #ifndef FIX_1010_OPT_SINGLE_RESCALE sing_exp2[jCh][currChannel] = add( sing_exp[jCh], *sig_x_e ); #else - singularVectors2_e[jCh][currChannel] = add( singularVectors2_e[jCh][currChannel], *sig_x_e ); + singularVectors2_e[jCh][currChannel] = add( singularVectors2_e[jCh][currChannel], *sig_x_e ); #endif move16(); } @@ -1509,7 +1589,11 @@ static void biDiagonalReductionRight_fx( #else Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS], #endif +#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE Word16 *secDiag_e, +#else + Word16 *secDiag_exp, +#endif const Word16 nChannelsL, /* Q0 */ const Word16 nChannelsC, /* Q0 */ const Word16 currChannel, /* Q0 */ @@ -1521,7 +1605,9 @@ static void biDiagonalReductionRight_fx( Word16 iCh, jCh, idx; Word32 norm_x, r; Word16 norm_x_e, r_e; +#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE Word16 secDiag_exp[MAX_OUTPUT_CHANNELS]; +#endif Word32 L_temp; Word16 L_temp_e; #ifndef FIX_1010_OPT_SINGLE_RESCALE @@ -1532,7 +1618,9 @@ static void biDiagonalReductionRight_fx( set16_fx( sing_exp2[jCh], *singularVectors_e, MAX_OUTPUT_CHANNELS ); } #endif +#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE set16_fx( secDiag_exp, *secDiag_e, MAX_OUTPUT_CHANNELS ); +#endif /* Setting values to 0 */ ( *sig_x ) = 0; @@ -1592,7 +1680,7 @@ static void biDiagonalReductionRight_fx( #ifndef FIX_1010_OPT_SINGLE_RESCALE sing_exp[jCh] = add( sub( invVal_e, temp_e ), sub( *singularVectors_e, *sig_x_e ) ); move16(); - norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ #else singularVectors2_e[currChannel][jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], *sig_x_e ) ); move16(); @@ -1686,7 +1774,7 @@ static void biDiagonalReductionRight_fx( #ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], *singularVectors_e, Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &sing_exp2[iCh][jCh] ); /* exp(sing_exp2) */ #else - singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */ + singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */ #endif move32(); } @@ -1704,7 +1792,7 @@ static void biDiagonalReductionRight_fx( move16(); } - +#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE /*rescaling block*/ Word16 exp_max = *secDiag_e; move16(); @@ -1719,7 +1807,7 @@ static void biDiagonalReductionRight_fx( } *secDiag_e = exp_max; move16(); - +#endif #ifndef FIX_1010_OPT_SINGLE_RESCALE exp_max = *singularVectors_e; move16(); @@ -1833,7 +1921,7 @@ static void singularVectorsAccumulationLeft_fx( t_jj_e = add( add( temp_exp, temp_e ), sub( add( t_ii_e, norm_y_e ), singularVectors_Left_e[nCh][nCh] ) ); #endif #else - t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e, + t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e, #ifndef FIX_1010_OPT_SINGLE_RESCALE t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) ); #else @@ -1886,7 +1974,7 @@ static void singularVectorsAccumulationLeft_fx( #ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], sing_exp2[nCh][iCh] ); /* Q31 */ #else - singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], singularVectors_Left_e[nCh][iCh] ); /* Q31 */ + singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], singularVectors_Left_e[nCh][iCh] ); /* Q31 */ #endif move32(); } @@ -1910,7 +1998,11 @@ static void singularVectorsAccumulationRight_fx( #else Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS], #endif +#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE Word16 secDiag_e, +#else + Word16 *secDiag_e, +#endif const Word16 nChannelsC /* Q0 */ ) { @@ -1923,7 +2015,7 @@ static void singularVectorsAccumulationRight_fx( nChannels = nChannelsC; /* nChannelsC Q0*/ /* avoid compiler warning */ - t_ii = secDiag[nChannels - 1]; /* exp(secDiag_e) */ + t_ii = secDiag[nChannels - 1]; /* exp(secDiag_e[nChannels - 1]) */ move32(); FOR( nCh = nChannels - 1; nCh >= 0; nCh-- ) /* nChannelsC, min(nChannelsLmnChannelsC) otherwise */ @@ -1940,14 +2032,18 @@ static void singularVectorsAccumulationRight_fx( ratio_float = L_deposit_h( BASOP_Util_Divide3232_Scale( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ) ); /* exp(temp_exp1) */ singularVectors_Right[iCh][nCh] = L_deposit_h( BASOP_Util_Divide3232_Scale( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] ) ); /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */ #else - ratio_float = BASOP_Util_Divide3232_Scale_cadence( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ); /* exp(temp_exp1) */ - singularVectors_Right[iCh][nCh] = BASOP_Util_Divide3232_Scale_cadence( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] ); /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */ + ratio_float = BASOP_Util_Divide3232_Scale_cadence( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ); /* exp(temp_exp1) */ + singularVectors_Right[iCh][nCh] = BASOP_Util_Divide3232_Scale_cadence( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] ); /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */ #endif #ifdef FIX_1010_OPT_SINGLE_RESCALE temp_exp1 = add( temp_exp1, sub( singularVectors_Left_e[nCh][iCh], singularVectors_Left_e[nCh][nCh + 1] ) ); #endif move32(); +#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE sing_right_exp[iCh][nCh] = add( sing_right_exp[iCh][nCh], sub( temp_exp1, secDiag_e ) ); +#else + sing_right_exp[iCh][nCh] = add( sing_right_exp[iCh][nCh], sub( temp_exp1, secDiag_e[nCh + 1] ) ); +#endif move16(); // singularVectors_Right[iCh][nCh] = L_shl_sat( singularVectors_Right[iCh][nCh], temp_exp2 ); } @@ -1989,7 +2085,7 @@ static void singularVectorsAccumulationRight_fx( } singularVectors_Right[nCh][nCh] = MAX_32; move32(); - t_ii = secDiag[nCh]; /* exp(secDiag_e) */ + t_ii = secDiag[nCh]; /* exp(secDiag_e[nCh]) */ move32(); } return; -- GitLab From daead0fd5cc02e32a2bec9f6e8430e3226cac625 Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Wed, 15 Jan 2025 12:33:42 +0100 Subject: [PATCH 36/41] clang-format --- lib_dec/ivas_svd_dec.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index 647b203af..1b113e036 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -698,7 +698,7 @@ static Word16 BidagonalDiagonalisation_fx( #ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE Word16 *secDiag_fx_e, /* i/o: */ #else - Word16 *secDiag_new_e, /* i/o: */ + Word16 *secDiag_new_e, /* i/o: */ #endif const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed Q0*/ const Word16 nChannelsC, /* i : number of columns in the matrix to be decomposed Q0*/ @@ -804,8 +804,8 @@ static Word16 BidagonalDiagonalisation_fx( c = Mpy_32_32( c, temp ); c_e = add( c_e, temp_exp ); #else - singularValues_fx[kCh] = GivensRotation_fx( g, g_e, singularValues_fx[kCh], singularValues_new_e[kCh], &singularValues_new_e[kCh] ); /* exp(singularValues_new_e) */ - c = BASOP_Util_Divide3232_Scale_cadence( c, maxWithSign_fx( singularValues_fx[kCh] ), &temp_exp ); /* exp(temp_exp + (c_e - singularValues_new_e)) */ + singularValues_fx[kCh] = GivensRotation_fx( g, g_e, singularValues_fx[kCh], singularValues_new_e[kCh], &singularValues_new_e[kCh] ); /* exp(singularValues_new_e) */ + c = BASOP_Util_Divide3232_Scale_cadence( c, maxWithSign_fx( singularValues_fx[kCh] ), &temp_exp ); /* exp(temp_exp + (c_e - singularValues_new_e)) */ c_e = add( temp_exp, sub( c_e, singularValues_new_e[kCh] ) ); #endif #ifndef FIX_1010_OPT_NORM_NOSAT @@ -1445,7 +1445,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ #ifndef FIX_1010_OPT_SINGLE_RESCALE sing_exp[jCh] = sub( add( invVal_e, sub( *singularVectors_e, *sig_x_e ) ), temp_e ); move16(); - norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ #else singularVectors2_e[jCh][currChannel] = sub( add( invVal_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ), temp_e ); move16(); @@ -1480,8 +1480,8 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), sing_exp[currChannel], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], sing_exp[currChannel], -( *g ), 0, &sing_exp[currChannel] ); /* sing_exp */ #else - r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ - singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* sing_exp */ + r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ + singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* sing_exp */ #endif move32(); @@ -1512,7 +1512,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ f = BASOP_Util_Divide3232_Scale_cadence( norm_x, maxWithSign_fx( r ), &f_e ); /* f_e + (norm_x_e - r_e) */ f_e = add( f_e, sub( norm_x_e, r_e ) ); #else - f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */ + f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */ f_e = add( invVal_e, sub( norm_x_e, r_e ) ); #endif @@ -1680,7 +1680,7 @@ static void biDiagonalReductionRight_fx( #ifndef FIX_1010_OPT_SINGLE_RESCALE sing_exp[jCh] = add( sub( invVal_e, temp_e ), sub( *singularVectors_e, *sig_x_e ) ); move16(); - norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ #else singularVectors2_e[currChannel][jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], *sig_x_e ) ); move16(); @@ -1774,7 +1774,7 @@ static void biDiagonalReductionRight_fx( #ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], *singularVectors_e, Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &sing_exp2[iCh][jCh] ); /* exp(sing_exp2) */ #else - singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */ + singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */ #endif move32(); } @@ -1921,7 +1921,7 @@ static void singularVectorsAccumulationLeft_fx( t_jj_e = add( add( temp_exp, temp_e ), sub( add( t_ii_e, norm_y_e ), singularVectors_Left_e[nCh][nCh] ) ); #endif #else - t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e, + t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e, #ifndef FIX_1010_OPT_SINGLE_RESCALE t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) ); #else @@ -1974,7 +1974,7 @@ static void singularVectorsAccumulationLeft_fx( #ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], sing_exp2[nCh][iCh] ); /* Q31 */ #else - singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], singularVectors_Left_e[nCh][iCh] ); /* Q31 */ + singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], singularVectors_Left_e[nCh][iCh] ); /* Q31 */ #endif move32(); } -- GitLab From 21134e489a374a1b042dc970350ec1fa33fe81f5 Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Mon, 27 Jan 2025 08:16:14 +0100 Subject: [PATCH 37/41] Remove unnecessary set16_fx() and add logging code for later reference for objective precision asessment. --- lib_dec/ivas_svd_dec.c | 100 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 95 insertions(+), 5 deletions(-) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index 1b113e036..a084a9024 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -297,6 +297,42 @@ void svdMat2mat_fx( return; } +#ifdef MORE_DEBUG2 +static void matrixFx2Fl2( + float r[][MAX_OUTPUT_CHANNELS], + const Word32 a[][MAX_OUTPUT_CHANNELS], + const Word16 a_e[][MAX_OUTPUT_CHANNELS], + const int adim1, + const int adim2 ) +{ + for ( int i1 = 0; i1 < adim1; i1++ ) + { + for ( int i2 = 0; i2 < adim2; i2++ ) + { + r[i1][i2] = (float) a[i1][i2] * powf( 2.f, a_e[i1][i2] - 31 ); + } + } +} + +static void matrixPrint2( + const float a[][MAX_OUTPUT_CHANNELS], + const int dim1, + const int dim2, + const char *name ) +{ + printf( "Matrix %s[%d][%d] = \n", name, dim1, dim2 ); + for ( int i1 = 0; i1 < dim1; i1++ ) + { + printf( " { " ); + for ( int i2 = 0; i2 < dim2; i2++ ) + { + printf( "%.10e, ", a[i1][i2] ); + } + printf( " },\n" ); + } +} +#endif + #ifndef DEBUG_SVD_TEST // #define DEBUG_SVD_PRECISION #endif @@ -417,7 +453,7 @@ static void matrixPrint( { for ( int i2 = 0; i2 < dim2; i2++ ) { - printf( "%f, ", a[i1][i2] ); + printf( "%.10e, ", a[i1][i2] ); } printf( "\n" ); } @@ -479,7 +515,7 @@ static void svd_accuracy_test_fx( singularValuesFx2_e[x] = InputMatrixFx_e; matrixFx2Fl( InputMatrix, InputMatrixFx, singularValuesFx2_e, nChannelsL, nChannelsC ); dimSingular = min( nChannelsL, nChannelsC ); - matrixFx2Fl( singularValues, singularValuesFx, singularValuesFx_e, 1, nChannelsC ); + matrixFx2Fl( &singularValues, (Word32(*)[MAX_MATRIX])singularValuesFx, singularValuesFx_e, 1, nChannelsC ); for ( int x = 0; x < MAX_MATRIX; x++ ) singularValuesFx2_e[x] = 0; matrixFx2Fl( singularVectors_Left, singularVectors_LeftFx, singularValuesFx2_e, nChannelsL, nChannelsC ); @@ -579,9 +615,25 @@ Word16 svd_fx( Word16 temp_fx_e; push_wmops( "svd_fx" ); -#if 1 +#ifdef MORE_DEBUG2 +{ + float input[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; + Word16 exp_matrix[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; + + for (int ii=0; ii Date: Mon, 27 Jan 2025 08:38:03 +0100 Subject: [PATCH 38/41] format fix --- lib_dec/ivas_svd_dec.c | 72 +++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index a084a9024..aa9238639 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -515,7 +515,7 @@ static void svd_accuracy_test_fx( singularValuesFx2_e[x] = InputMatrixFx_e; matrixFx2Fl( InputMatrix, InputMatrixFx, singularValuesFx2_e, nChannelsL, nChannelsC ); dimSingular = min( nChannelsL, nChannelsC ); - matrixFx2Fl( &singularValues, (Word32(*)[MAX_MATRIX])singularValuesFx, singularValuesFx_e, 1, nChannelsC ); + matrixFx2Fl( &singularValues, (Word32( * )[MAX_MATRIX]) singularValuesFx, singularValuesFx_e, 1, nChannelsC ); for ( int x = 0; x < MAX_MATRIX; x++ ) singularValuesFx2_e[x] = 0; matrixFx2Fl( singularVectors_Left, singularVectors_LeftFx, singularValuesFx2_e, nChannelsL, nChannelsC ); @@ -616,17 +616,17 @@ Word16 svd_fx( push_wmops( "svd_fx" ); #ifdef MORE_DEBUG2 -{ - float input[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; - Word16 exp_matrix[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; + { + float input[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; + Word16 exp_matrix[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; - for (int ii=0; ii Date: Mon, 27 Jan 2025 11:53:59 +0100 Subject: [PATCH 39/41] Remove FIX_1010_OPT_INV_USING_INVSQRT and FIX_1010_OPT_GIVENS_AMAX_BMIN. Remove all debug/measurement code. Preparation for merge to main. --- lib_dec/ivas_svd_dec.c | 519 +---------------------------------------- 1 file changed, 4 insertions(+), 515 deletions(-) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index aa9238639..d3377ebba 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -53,11 +53,9 @@ #if 1 #define FIX_1010_OPT_DIV -// #define FIX_1010_OPT_INV_USING_INVSQRT #define FIX_1010_OPT_SINGLE_RESCALE #define FIX_1010_OPT_GIVENS #define FIX_1010_OPT_GIVENS_INV -// #define FIX_1010_OPT_GIVENS_AMAX_BMIN #define FIX_1010_OPT_NORM_NOSAT #define FIX_1010_OPT_SEC_SINGLE_RESCALE #endif @@ -297,288 +295,6 @@ void svdMat2mat_fx( return; } -#ifdef MORE_DEBUG2 -static void matrixFx2Fl2( - float r[][MAX_OUTPUT_CHANNELS], - const Word32 a[][MAX_OUTPUT_CHANNELS], - const Word16 a_e[][MAX_OUTPUT_CHANNELS], - const int adim1, - const int adim2 ) -{ - for ( int i1 = 0; i1 < adim1; i1++ ) - { - for ( int i2 = 0; i2 < adim2; i2++ ) - { - r[i1][i2] = (float) a[i1][i2] * powf( 2.f, a_e[i1][i2] - 31 ); - } - } -} - -static void matrixPrint2( - const float a[][MAX_OUTPUT_CHANNELS], - const int dim1, - const int dim2, - const char *name ) -{ - printf( "Matrix %s[%d][%d] = \n", name, dim1, dim2 ); - for ( int i1 = 0; i1 < dim1; i1++ ) - { - printf( " { " ); - for ( int i2 = 0; i2 < dim2; i2++ ) - { - printf( "%.10e, ", a[i1][i2] ); - } - printf( " },\n" ); - } -} -#endif - -#ifndef DEBUG_SVD_TEST -// #define DEBUG_SVD_PRECISION -#endif -// #define MORE_DEBUG - -#if defined( DEBUG_SVD_PRECISION ) || defined( MORE_DEBUG ) - -#if ( MAX_INPUT_CHANNELS > MAX_OUTPUT_CHANNELS ) -#define MAX_MATRIX MAX_INPUT_CHANNELS -#else -#define MAX_MATRIX MAX_OUTPUT_CHANNELS -#endif - -static void matrixFx2Fl( - float r[][MAX_MATRIX], - const Word32 a[][MAX_MATRIX], - const Word16 a_e[MAX_MATRIX], - const int adim1, - const int adim2 ) -{ - for ( int i1 = 0; i1 < adim1; i1++ ) - { - for ( int i2 = 0; i2 < adim2; i2++ ) - { - r[i1][i2] = (float) a[i1][i2] * powf( 2.f, a_e[i2] - 31 ); - } - } -} - -static void matrixProduct( - float r[][MAX_MATRIX], - const float a[][MAX_MATRIX], - const float b[][MAX_MATRIX], - const int adim1, - const int adim2, - const int bdim1, - const int bdim2 ) -{ - assert( adim2 == bdim1 ); - - for ( int i1 = 0; i1 < adim1; i1++ ) - { - for ( int i2 = 0; i2 < bdim2; i2++ ) - { - r[i1][i2] = 0.f; - for ( int i3 = 0; i3 < bdim1; i3++ ) - { - r[i1][i2] += a[i1][i3] * b[i3][i2]; - } - } - } -} - -static void matrixTranspose( - float r[][MAX_MATRIX], - const float a[][MAX_MATRIX], - const int adim1, - const int adim2 ) -{ - for ( int i1 = 0; i1 < adim1; i1++ ) - { - for ( int i2 = 0; i2 < adim2; i2++ ) - { - r[i2][i1] = a[i1][i2]; - } - } -} - -static void matrixDiagonal( - float r[][MAX_MATRIX], - const float a[MAX_MATRIX], - const int dim ) -{ - for ( int i1 = 0; i1 < dim; i1++ ) - { - for ( int i2 = 0; i2 < dim; i2++ ) - { - r[i1][i2] = 0; - } - r[i1][i1] = a[i1]; - } -} - -static float matrixDifference( - const float a[][MAX_MATRIX], - const float b[][MAX_MATRIX], - const int dim1, - const int dim2 ) -{ - float r = 0.f; - - for ( int i1 = 0; i1 < dim1; i1++ ) - { - for ( int i2 = 0; i2 < dim2; i2++ ) - { - if ( a[i1][i2] != 0.f ) - { - r += fabsf( ( b[i1][i2] - a[i1][i2] ) / a[i1][i2] ); - } - else - { - r += fabsf( b[i1][i2] - a[i1][i2] ); - } - } - } - - return r / (float) ( dim1 * dim2 ); -} - -static void matrixPrint( - const float a[][MAX_MATRIX], - const int dim1, - const int dim2, - const char *name ) -{ - printf( "Matrix %s[%d][%d] = \n", name, dim1, dim2 ); - for ( int i1 = 0; i1 < dim1; i1++ ) - { - for ( int i2 = 0; i2 < dim2; i2++ ) - { - printf( "%.10e, ", a[i1][i2] ); - } - printf( "\n" ); - } -} - -static float matrixTestIdentity( - const float a[][MAX_MATRIX], - const int dim ) -{ - float r = 0.f; - - for ( int i1 = 0; i1 < dim; i1++ ) - { - for ( int i2 = 0; i2 < dim; i2++ ) - { - if ( i1 == i2 ) - { - r += fabsf( 1.f - a[i1][i2] ); - } - else - { - r += fabsf( 0.f - a[i1][i2] ); - } - } - } - - return r; -} - -#define PROBLEMATIC_THRESHOLD 0.5f -static void svd_accuracy_test_fx( - Word32 InputMatrixFx[][MAX_OUTPUT_CHANNELS], /* i : matrix to be decomposed (M) InputMatrix_e*/ - Word16 InputMatrixFx_e, - Word32 singularVectors_LeftFx[][MAX_OUTPUT_CHANNELS], /* o : left singular vectors (U) Q31 */ - Word32 singularValuesFx[MAX_OUTPUT_CHANNELS], /* o : singular values vector (S) singularValues_fx_e*/ - Word32 singularVectors_RightFx[][MAX_OUTPUT_CHANNELS], /* o : right singular vectors (V) Q31 */ - Word16 singularValuesFx_e[MAX_OUTPUT_CHANNELS], - const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed Q0*/ - const Word16 nChannelsC /* i : number of columns in the matrix to be decomposed Q0*/ -) -{ - float tmp1[MAX_MATRIX][MAX_MATRIX]; - float tmp2[MAX_MATRIX][MAX_MATRIX]; - float tmp3[MAX_MATRIX][MAX_MATRIX]; - float InputMatrix[MAX_MATRIX][MAX_MATRIX]; - - Word16 singularValuesFx2_e[MAX_OUTPUT_CHANNELS]; - - float singularVectors_Left[MAX_INPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; - float singularValues[MAX_MATRIX]; - float singularValuesMatrix[MAX_MATRIX][MAX_MATRIX]; - float singularVectors_Right[MAX_INPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; - float result; - int dimSingular; - int problematic = 0; - - /* Convert to float and Create singular values matrix from signular values vector */ - for ( int x = 0; x < MAX_MATRIX; x++ ) - singularValuesFx2_e[x] = InputMatrixFx_e; - matrixFx2Fl( InputMatrix, InputMatrixFx, singularValuesFx2_e, nChannelsL, nChannelsC ); - dimSingular = min( nChannelsL, nChannelsC ); - matrixFx2Fl( &singularValues, (Word32( * )[MAX_MATRIX]) singularValuesFx, singularValuesFx_e, 1, nChannelsC ); - for ( int x = 0; x < MAX_MATRIX; x++ ) - singularValuesFx2_e[x] = 0; - matrixFx2Fl( singularVectors_Left, singularVectors_LeftFx, singularValuesFx2_e, nChannelsL, nChannelsC ); - matrixFx2Fl( singularVectors_Right, singularVectors_RightFx, singularValuesFx2_e, nChannelsC, nChannelsC ); - matrixDiagonal( singularValuesMatrix, singularValues, dimSingular ); /* CxC */ - -#ifdef MORE_DEBUG - matrixPrint( InputMatrix, nChannelsL, nChannelsC, "A" ); - printf( "Result of svd() \n" ); - matrixPrint( singularVectors_Left, nChannelsL, nChannelsC, "U" ); - matrixPrint( singularValuesMatrix, nChannelsC, nChannelsC, "S" ); - matrixPrint( singularVectors_Right, nChannelsC, nChannelsC, "V" ); -#endif - - printf( "\nResult quality tests\n\n" ); - - /* Test U' * U == I */ - matrixTranspose( tmp1, singularVectors_Left, nChannelsL, nChannelsC ); /* CxL */ - matrixProduct( tmp2, tmp1, singularVectors_Left, nChannelsC, nChannelsL, nChannelsL, nChannelsC ); /* CxC */ - result = matrixTestIdentity( tmp2, nChannelsC ); - if ( result >= PROBLEMATIC_THRESHOLD ) - { - problematic = 1; - } -#ifdef MORE_DEBUG - matrixPrint( tmp2, nChannelsC, nChannelsC, "U\'*U" ); -#endif - printf( "U' * U difference to I is %f\n", result ); - - /* Test V * V' == I */ - matrixTranspose( tmp1, singularVectors_Right, nChannelsC, nChannelsC ); /* CxC */ - matrixProduct( tmp2, singularVectors_Right, tmp1, nChannelsC, nChannelsC, nChannelsC, nChannelsC ); /* CxC */ - result = matrixTestIdentity( tmp2, nChannelsC ); - if ( result >= PROBLEMATIC_THRESHOLD ) - { - problematic = 1; - } -#ifdef MORE_DEBUG - matrixPrint( tmp2, nChannelsC, nChannelsC, "V*V\'" ); -#endif - printf( "V * V' difference to I is %f\n", result ); - - /* Test InputMatrix == U * S * V' */ - matrixProduct( tmp1, singularVectors_Left, singularValuesMatrix, nChannelsL, nChannelsC, dimSingular, dimSingular ); /* LxC */ - matrixTranspose( tmp3, singularVectors_Right, nChannelsC, nChannelsC ); /* CxC */ - matrixProduct( tmp2, tmp1, tmp3, nChannelsL, dimSingular, nChannelsC, nChannelsC ); /* LxC */ - result = matrixDifference( tmp2, InputMatrix, nChannelsL, nChannelsC ); - if ( result >= PROBLEMATIC_THRESHOLD ) - { - problematic = 1; - } -#ifdef MORE_DEBUG - matrixPrint( tmp2, nChannelsL, nChannelsC, "U*S*V\'" ); -#endif - printf( "U * S * V' difference to M is %f\n", result ); - - if ( problematic ) - { - matrixPrint( InputMatrix, nChannelsL, nChannelsC, "Problematic Input" ); - } -} -#endif - /*------------------------------------------------------------------------- * svd() * @@ -615,24 +331,9 @@ Word16 svd_fx( Word16 temp_fx_e; push_wmops( "svd_fx" ); -#ifdef MORE_DEBUG2 - { - float input[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; - Word16 exp_matrix[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; - - for ( int ii = 0; ii < MAX_OUTPUT_CHANNELS; ii++ ) - for ( int iii = 0; iii < MAX_OUTPUT_CHANNELS; iii++ ) - exp_matrix[ii][iii] = InputMatrix_e; - - matrixFx2Fl2( input, InputMatrix, exp_matrix, nChannelsL, nChannelsC ); - matrixPrint2( input, nChannelsL, nChannelsC, " input " ); - } -#endif #ifndef FIX_1010_OPT_SINGLE_RESCALE set32_fx( secDiag_fx, 0, MAX_OUTPUT_CHANNELS ); - set16_fx( secDiag_fx_e, 0, MAX_OUTPUT_CHANNELS ); - set16_fx( singularValues_fx_e, 0, MAX_OUTPUT_CHANNELS ); #endif @@ -714,17 +415,6 @@ Word16 svd_fx( WHILE( EQ_16( condition, 1 ) ); pop_wmops(); -#ifdef DEBUG_SVD_PRECISION - svd_accuracy_test_fx( - InputMatrix, - InputMatrix_e, - singularVectors_Left_fx, - singularValues_fx, - singularVectors_Right_fx, - singularValues_fx_e, - nChannelsL, - nChannelsC ); -#endif return ( errorMessage ); } @@ -1323,18 +1013,6 @@ static void HouseholderReduction_fx( } } -#ifdef MORE_DEBUG2 - { - float singularVectors_Left[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; - float secDiag[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; - - matrixFx2Fl2( singularVectors_Left, singularVectors_Left_fx, singularVectors_Left_fx_e, nChannelsL, nChannelsC ); - matrixFx2Fl2( secDiag, (Word32( * )[MAX_OUTPUT_CHANNELS]) secDiag_fx, (Word16( * )[MAX_OUTPUT_CHANNELS]) secDiag_fx_e, 1, nChannelsC ); - matrixPrint2( singularVectors_Left, nChannelsL, nChannelsC, "left" ); - matrixPrint2( secDiag, 1, nChannelsC, "secDiag" ); - } -#endif - /* SingularVecotr Accumulation */ #ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_e, *secDiag_fx_e, nChannelsC ); @@ -1346,36 +1024,10 @@ static void HouseholderReduction_fx( singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsC ); #endif -#ifdef MORE_DEBUG2 - { - float singularVectors_Right[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; - Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; - - for ( int ii = 0; ii < MAX_OUTPUT_CHANNELS; ii++ ) - for ( int iii = 0; iii < MAX_OUTPUT_CHANNELS; iii++ ) - singularVectors_Left_fx_e[ii][iii] = 0; - - matrixFx2Fl2( singularVectors_Right, singularVectors_Right_fx, singularVectors_Left_fx_e, nChannelsC, nChannelsC ); - matrixPrint2( singularVectors_Right, nChannelsC, nChannelsC, "right2" ); - } -#endif singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_fx_e, singularValues_fx_e, nChannelsL, nChannelsC ); #endif -#ifdef MORE_DEBUG2 - { - float singularVectors_Left[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; - Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; - - for ( int ii = 0; ii < MAX_OUTPUT_CHANNELS; ii++ ) - for ( int iii = 0; iii < MAX_OUTPUT_CHANNELS; iii++ ) - singularVectors_Left_fx_e[ii][iii] = 0; - - matrixFx2Fl2( singularVectors_Left, singularVectors_Left_fx, singularVectors_Left_fx_e, nChannelsL, nChannelsC ); - matrixPrint2( singularVectors_Left, nChannelsL, nChannelsC, "left2" ); - } -#endif return; } @@ -1385,41 +1037,6 @@ static void HouseholderReduction_fx( * *-------------------------------------------------------------------------*/ -#ifdef FIX_1010_OPT_INV_USING_INVSQRT -static Word32 BASOP_Util_Inv32( Word32 x, Word16 *px_e ) -{ - Word16 sign, shift, shift2; - - sign = 0; - move16(); - if ( x < 0 ) - { - sign = 1; - } - if ( sign ) - { - x = L_negate( x ); - } - - shift = norm_l( x ); - x = L_shl( x, shift ); - *px_e = 0; - move16(); - x = ISqrt32norm( x, px_e ); - x = Mpy_32_32( x, x ); - shift2 = norm_l( x ); - x = L_shl( x, shift2 ); - *px_e = add( shl( *px_e, 1 ), sub( shift, shift2 ) ); - move16(); - - if ( sign ) - { - x = L_negate( x ); - } - return x; -} -#endif - static void biDiagonalReductionLeft_fx( Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ @@ -1477,6 +1094,7 @@ ELSE IF( LT_16( *sig_x_e, *secDiag_e ) ) secDiag_e[currChannel] = *sig_x_e; move16(); #endif + /* Setting values to 0 */ ( *sig_x ) = 0; move32(); @@ -1502,11 +1120,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ #ifdef FIX_1010_OPT_DIV Word16 invVal_e; Word32 invVal; -#ifdef FIX_1010_OPT_INV_USING_INVSQRT - invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e ); -#else invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e ); -#endif #endif norm_x = 0; move32(); @@ -1576,11 +1190,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ move32(); #ifdef FIX_1010_OPT_DIV -#ifdef FIX_1010_OPT_INV_USING_INVSQRT - invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e ); -#else invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); -#endif #endif FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */ @@ -1673,7 +1283,7 @@ return; static void biDiagonalReductionRight_fx( Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ - Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */ + Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_exp[]) */ #ifndef FIX_1010_OPT_SINGLE_RESCALE Word16 *singularVectors_e, #else @@ -1741,11 +1351,7 @@ static void biDiagonalReductionRight_fx( #ifdef FIX_1010_OPT_DIV Word16 invVal_e, temp_e; Word32 invVal; -#ifdef FIX_1010_OPT_INV_USING_INVSQRT - invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e ); -#else invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e ); -#endif #endif FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */ { @@ -1810,11 +1416,7 @@ static void biDiagonalReductionRight_fx( move32(); #ifdef FIX_1010_OPT_DIV -#ifdef FIX_1010_OPT_INV_USING_INVSQRT - invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e ); -#else invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); -#endif #endif FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ @@ -1898,6 +1500,8 @@ static void biDiagonalReductionRight_fx( *secDiag_e = exp_max; move16(); #endif + + #ifndef FIX_1010_OPT_SINGLE_RESCALE exp_max = *singularVectors_e; move16(); @@ -1976,11 +1580,7 @@ static void singularVectorsAccumulationLeft_fx( IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/ { #ifdef FIX_1010_OPT_DIV -#ifdef FIX_1010_OPT_INV_USING_INVSQRT - t_ii = BASOP_Util_Inv32( maxWithSign_fx( t_ii ), &temp_exp ); -#else t_ii = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( t_ii ), &temp_exp ); -#endif t_ii_e = sub( temp_exp, t_ii_e ); #else t_ii = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, maxWithSign_fx( t_ii ), &temp_exp ); /* exp(1 + (temp_exp + tii_e)) */ @@ -2001,22 +1601,11 @@ static void singularVectorsAccumulationLeft_fx( norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( singularVectors_Left_e[k][nCh], singularVectors_Left_e[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */ #endif } -#ifdef FIX_1010_OPT_INV_USING_INVSQRT - Word16 temp_e = norm_l( singularVectors_Left[nCh][nCh] ); - t_jj = BASOP_Util_Inv32( maxWithSign_fx( L_shl( singularVectors_Left[nCh][nCh], temp_e ) ), &temp_exp ); - t_jj = Mpy_32_32( Mpy_32_32( t_ii, norm_y ), t_jj ); -#ifndef FIX_1010_OPT_SINGLE_RESCALE - t_jj_e = add( add( temp_exp, temp_e ), sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) ); -#else - t_jj_e = add( add( temp_exp, temp_e ), sub( add( t_ii_e, norm_y_e ), singularVectors_Left_e[nCh][nCh] ) ); -#endif -#else t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e, #ifndef FIX_1010_OPT_SINGLE_RESCALE t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) ); #else t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), singularVectors_Left_e[nCh][nCh] ) ); -#endif #endif FOR( k = nCh; k < nChannelsL; k++ ) /* nChannelsL */ { @@ -2187,59 +1776,6 @@ static void singularVectorsAccumulationRight_fx( * *-------------------------------------------------------------------------*/ -#ifdef FIX_1010_OPT_GIVENS_AMAX_BMIN -#ifndef M_PI -#define M_PI 3.141592653589793 -#endif -#define NUM_REGIONS 128 -static Word32 alphaBeta[NUM_REGIONS][2]; -static void get_alpha_beta( Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 *alpha, Word32 *beta ) -{ - static int init = 0; - - if ( init == 0 ) - { - for ( int i = 0; i < NUM_REGIONS; i++ ) - { - double thetaS, thetaE, thetaM; - - thetaS = M_PI / 4. * (double) i / (double) NUM_REGIONS; - thetaE = M_PI / 4. * (double) ( i + 1 ) / (double) NUM_REGIONS; - thetaM = M_PI / 4. * ( (double) i + 0.5 ) / (double) NUM_REGIONS; - // alphaBeta[i][0] = FL2WORD32(1./(sin(thetaM)*tan((thetaS+thetaE)/2.)+cos(thetaM))); - // alphaBeta[i][1] = FL2WORD32(1./(sin(thetaM)*tan((thetaS+thetaE)/2.)+cos(thetaM)) * tan((thetaS+thetaE)/2.)); - alphaBeta[i][0] = FL2WORD32( 2. / ( ( ( sin( thetaM ) + sin( thetaS ) ) * tan( ( thetaS + thetaE ) / 2. ) ) + cos( thetaM ) + cos( thetaS ) ) ); - alphaBeta[i][1] = FL2WORD32( 2. / ( ( ( sin( thetaM ) + sin( thetaS ) ) * tan( ( thetaS + thetaE ) / 2. ) ) + cos( thetaM ) + cos( thetaS ) ) * tan( ( thetaS + thetaE ) / 2. ) ); - } - init = 1; - } - Word16 r, shift; -#if 0 - float pf, qf; - pf = (float)p * powf(2.f, p_e-31); - qf = (float)q * powf(2.f, q_e-31); - r = floor((double)NUM_REGIONS * 4. * atan2f(qf, pf)/M_PI); - if (r >= NUM_REGIONS) { - r = NUM_REGIONS-1; - } -#elif 1 - shift = sub( norm_l( q ), 1 ); - q = L_shl( q, shift ); - q_e = sub( q_e, shift ); - shift = norm_l( p ); - p = L_shl( p, shift ); - p_e = sub( p_e, shift ); - shift = sub( q_e, p_e ); - r = shl_sat( div_s( extract_h( q ), s_max( 1, extract_h( p ) ) ), shift ); - /* Second order polyfit of atan(r)/(pi/4) for r=0..1 */ - r = add( add( mult( mult( r, r ), FL2WORD16_SCALE( -3.672563685340096e-01, 3 ) ), mult( r, FL2WORD16_SCALE( 1.375369641423651e+00, 3 ) ) ), FL2WORD16_SCALE( -6.529424378422714e-03, 3 ) ); - r = s_min( s_max( 0, shr( r, WORD16_BITS - 1 - 7 - 3 ) ), NUM_REGIONS - 1 ); -#endif - assert( ( r >= 0 ) && ( r < NUM_REGIONS ) ); - *alpha = alphaBeta[r][0]; - *beta = alphaBeta[r][1]; -} -#endif #ifdef FIX_1010_OPT_GIVENS_INV static void GivensRotation2_fx( @@ -2253,35 +1789,6 @@ static void GivensRotation2_fx( Word16 *outInv_e ) { Word32 r; -#ifdef FIX_1010_OPT_GIVENS_AMAX_BMIN - Word32 az, ax, a, b; - - ax = L_abs( x ); - az = L_abs( z ); - IF( BASOP_Util_Cmp_Mant32Exp( ax, x_e, az, z_e ) > 0 ) - { - get_alpha_beta( ax, x_e, az, z_e, &a, &b ); - r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ax, a ), x_e, Mpy_32_32( az, b ), z_e, out_e ); - } - ELSE - { - get_alpha_beta( az, z_e, ax, x_e, &a, &b ); - r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( az, a ), z_e, Mpy_32_32( ax, b ), x_e, out_e ); - } - *result = r; - move32(); -#if 1 - *outInv_e = shl( *out_e, 1 ); - *resultInv = ISqrt32( L_max( 1, Mpy_32_32( r, r ) ), outInv_e ); - move32(); -#else - *resultInv = L_deposit_h( BASOP_Util_Divide3232_Scale( MAX_32, r, outInv_e ) ); - move32(); - *outInv_e = sub( *outInv_e, *out_e ); - move16(); -#endif - -#else r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e ); r = L_max( r, 1 ); *outInv_e = *out_e; @@ -2291,7 +1798,6 @@ static void GivensRotation2_fx( *resultInv = ISqrt32( r, outInv_e ); move32(); -#endif } #endif @@ -2312,25 +1818,8 @@ static Word32 GivensRotation_fx( #endif #ifdef FIX_1010_OPT_GIVENS -#ifdef FIX_1010_OPT_GIVENS_AMAX_BMIN - Word32 az, ax, a, b; - - ax = L_abs( x ); - az = L_abs( z ); - IF( BASOP_Util_Cmp_Mant32Exp( ax, x_e, az, z_e ) > 0 ) - { - get_alpha_beta( ax, x_e, az, z_e, &a, &b ); - r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ax, a ), x_e, Mpy_32_32( az, b ), z_e, out_e ); - } - ELSE - { - get_alpha_beta( az, z_e, ax, x_e, &a, &b ); - r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( az, a ), z_e, Mpy_32_32( ax, b ), x_e, out_e ); - } -#else r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e ); r = Sqrt32( r, out_e ); -#endif #else x_abs = L_abs( x ); z_abs = L_abs( z ); -- GitLab From 6894dd5e81fc265d1f959b7fd4271ca755e92905 Mon Sep 17 00:00:00 2001 From: Manuel Jander Date: Mon, 27 Jan 2025 11:58:50 +0100 Subject: [PATCH 40/41] clang format --- lib_dec/ivas_svd_dec.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index d3377ebba..328a67a46 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -1363,7 +1363,7 @@ static void biDiagonalReductionRight_fx( move16(); norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ #else - singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &L_temp_e ); /* exp(sing_exp + (singularVectors_e - sig_x_e)) */ + singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &L_temp_e ); /* exp(sing_exp + (singularVectors_e - sig_x_e)) */ move32(); singularVectors2_e[currChannel][jCh] = add( L_temp_e, sub( singularVectors2_e[currChannel][jCh], *sig_x_e ) ); move16(); @@ -1428,7 +1428,7 @@ static void biDiagonalReductionRight_fx( secDiag_exp[jCh] = add( secDiag_exp[jCh], sub( sing_exp[jCh], r_e ) ); move32(); #else - secDiag[jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( r ), &secDiag_exp[jCh] ); /* exp(secDiag_exp + (sing_exp - r_e) */ + secDiag[jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( r ), &secDiag_exp[jCh] ); /* exp(secDiag_exp + (sing_exp - r_e) */ move32(); secDiag_exp[jCh] = add( secDiag_exp[jCh], sub( singularVectors2_e[currChannel][jCh], r_e ) ); move32(); @@ -1601,7 +1601,7 @@ static void singularVectorsAccumulationLeft_fx( norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( singularVectors_Left_e[k][nCh], singularVectors_Left_e[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */ #endif } - t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e, + t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e, #ifndef FIX_1010_OPT_SINGLE_RESCALE t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) ); #else -- GitLab From eebf398b476c5e9d96dd6aa2506354de0a344aec Mon Sep 17 00:00:00 2001 From: Markus Multrus Date: Tue, 28 Jan 2025 13:28:32 +0100 Subject: [PATCH 41/41] move switches to options.h --- lib_com/options.h | 6 ++++++ lib_dec/ivas_svd_dec.c | 9 --------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index 208175b46..7e8d63c30 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -128,4 +128,10 @@ #define FIX_ISSUE_1209 /* Ittiam: Fix for issue 1209: Assertion exit in BASOP encoder (stereo_dmx_evs)*/ #define IVAS_ISSUE_1188_EVS_CRASH /* Ittiam: Fix for issue 1188: Issue due to ASAN */ #define FIX_ISSUE_1155 /* Ittiam: Fix for issue 1155: Encoder crash for Stereo at 32kbps in PostShortTerm_ivas_enc_fx()*/ +#define FIX_1010_OPT_DIV /* FhG: SVD complexity optimizations (non-be) */ +#define FIX_1010_OPT_SINGLE_RESCALE /* FhG: SVD complexity optimizations (non-be) */ +#define FIX_1010_OPT_GIVENS /* FhG: SVD complexity optimizations (non-be) */ +#define FIX_1010_OPT_GIVENS_INV /* FhG: SVD complexity optimizations (non-be) */ +#define FIX_1010_OPT_NORM_NOSAT /* FhG: SVD complexity optimizations (non-be) */ +#define FIX_1010_OPT_SEC_SINGLE_RESCALE /* FhG: SVD complexity optimizations (non-be) */ #endif diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index 328a67a46..dc1965a5b 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -51,15 +51,6 @@ #define SVD_ZERO_FLUSH_THRESHOLD_FX ( 0 ) #define CONVERGENCE_FACTOR_FX 214748 /* factor for SVD convergence (as per latest float code: 1.0e-04f) */ -#if 1 -#define FIX_1010_OPT_DIV -#define FIX_1010_OPT_SINGLE_RESCALE -#define FIX_1010_OPT_GIVENS -#define FIX_1010_OPT_GIVENS_INV -#define FIX_1010_OPT_NORM_NOSAT -#define FIX_1010_OPT_SEC_SINGLE_RESCALE -#endif - /*-----------------------------------------------------------------------* * Local function prototypes *-----------------------------------------------------------------------*/ -- GitLab