diff --git a/lib_com/options.h b/lib_com/options.h index 208175b4665b71d3e330db6a329c54e1c42d963c..7e8d63c30df74c4041bf4e283990a334c244f1f8 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -128,4 +128,10 @@ #define FIX_ISSUE_1209 /* Ittiam: Fix for issue 1209: Assertion exit in BASOP encoder (stereo_dmx_evs)*/ #define IVAS_ISSUE_1188_EVS_CRASH /* Ittiam: Fix for issue 1188: Issue due to ASAN */ #define FIX_ISSUE_1155 /* Ittiam: Fix for issue 1155: Encoder crash for Stereo at 32kbps in PostShortTerm_ivas_enc_fx()*/ +#define FIX_1010_OPT_DIV /* FhG: SVD complexity optimizations (non-be) */ +#define FIX_1010_OPT_SINGLE_RESCALE /* FhG: SVD complexity optimizations (non-be) */ +#define FIX_1010_OPT_GIVENS /* FhG: SVD complexity optimizations (non-be) */ +#define FIX_1010_OPT_GIVENS_INV /* FhG: SVD complexity optimizations (non-be) */ +#define FIX_1010_OPT_NORM_NOSAT /* FhG: SVD complexity optimizations (non-be) */ +#define FIX_1010_OPT_SEC_SINGLE_RESCALE /* FhG: SVD complexity optimizations (non-be) */ #endif diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index 3b8a90a4c87bd969a3359b24a3ee577266a1b55e..dc1965a5bdea4de61cb93bfb53464cf6b3c7ccf9 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -50,6 +50,7 @@ #define SVD_MINIMUM_VALUE_FX ( 2 ) /* minimum value */ #define SVD_ZERO_FLUSH_THRESHOLD_FX ( 0 ) #define CONVERGENCE_FACTOR_FX 214748 /* factor for SVD convergence (as per latest float code: 1.0e-04f) */ + /*-----------------------------------------------------------------------* * Local function prototypes *-----------------------------------------------------------------------*/ @@ -71,7 +72,11 @@ static void biDiagonalReductionLeft_fx( Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */ +#ifndef FIX_1010_OPT_SINGLE_RESCALE Word16 *singularVectors_e, +#else + Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS], +#endif Word16 singularValues_e[MAX_OUTPUT_CHANNELS], Word16 *secDiag_e, const Word16 nChannelsL, /* Q0 */ @@ -85,7 +90,11 @@ static void biDiagonalReductionLeft_fx( static void biDiagonalReductionRight_fx( Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */ +#ifndef FIX_1010_OPT_SINGLE_RESCALE Word16 *singularVectors_e, +#else + Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS], +#endif Word16 *secDiag_e, const Word16 nChannelsL, /* Q0 */ const Word16 nChannelsC, /* Q0 */ @@ -98,7 +107,11 @@ static void biDiagonalReductionRight_fx( static void singularVectorsAccumulationLeft_fx( Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) as Input, Q31 as output */ Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ +#ifndef FIX_1010_OPT_SINGLE_RESCALE Word16 singularVectors_e, +#else + Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS], +#endif Word16 singularValues_e[MAX_OUTPUT_CHANNELS], const Word16 nChannelsL, /* Q0 */ const Word16 nChannelsC /* Q0 */ @@ -108,8 +121,16 @@ static void singularVectorsAccumulationRight_fx( Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* singularVectors_e */ Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* singularVectors_e */ Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */ +#ifndef FIX_1010_OPT_SINGLE_RESCALE Word16 singularVectors_e, +#else + Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS], +#endif +#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE Word16 secDiag_e, +#else + Word16 *secDiag_e, +#endif const Word16 nChannelsC /* Q0 */ ); @@ -149,6 +170,18 @@ static void ApplyRotation_fx( const Word16 nChannels /* Q0 */ ); +#ifdef FIX_1010_OPT_GIVENS_INV +static void GivensRotation2_fx( + const Word32 x, /* exp(x_e) */ + const Word16 x_e, + const Word32 z, /* exp(z_e) */ + const Word16 z_e, + Word32 *result, + Word32 *resultInv, + Word16 *out_e, + Word16 *outInv_e ); +#endif + static Word32 GivensRotation_fx( const Word32 x, /* exp(x_e) */ const Word16 x_e, @@ -276,7 +309,11 @@ Word16 svd_fx( Word16 errorMessage, condition; // int16_t max_length = ((nChannelsL > nChannelsC) ? nChannelsL : nChannelsC); Word32 secDiag_fx[MAX_OUTPUT_CHANNELS]; +#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE Word16 secDiag_fx_e = 0; +#else + Word16 secDiag_fx_e[MAX_OUTPUT_CHANNELS]; +#endif move16(); Word32 eps_x_fx = 0, temp_fx; move16(); @@ -285,7 +322,11 @@ Word16 svd_fx( Word16 temp_fx_e; push_wmops( "svd_fx" ); + +#ifndef FIX_1010_OPT_SINGLE_RESCALE set32_fx( secDiag_fx, 0, MAX_OUTPUT_CHANNELS ); + set16_fx( singularValues_fx_e, 0, MAX_OUTPUT_CHANNELS ); +#endif /* Collecting Values */ FOR( iCh = 0; iCh < nChannelsL; iCh++ ) @@ -297,19 +338,23 @@ Word16 svd_fx( } } - set16_fx( singularValues_fx_e, 0, MAX_OUTPUT_CHANNELS ); - /* Householder reduction */ +#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE HouseholderReduction_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, InputMatrix_e, singularValues_fx_e, &secDiag_fx_e, nChannelsL, nChannelsC, &eps_x_fx, &eps_x_fx_e ); - +#else + HouseholderReduction_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, InputMatrix_e, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, &eps_x_fx, &eps_x_fx_e ); +#endif /* Set extremely small values to zero if needed */ // flushToZeroArray(singularValues, max_length); // flushToZeroMat(singularVectors_Left, nChannelsL, nChannelsL); // flushToZeroMat(singularVectors_Right, nChannelsC, nChannelsC); /* BidagonalDiagonalisation */ +#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE errorMessage = BidagonalDiagonalisation_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, singularValues_fx_e, &secDiag_fx_e, nChannelsL, nChannelsC, eps_x_fx, eps_x_fx_e ); /* Q0 */ - +#else + errorMessage = BidagonalDiagonalisation_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, eps_x_fx, eps_x_fx_e ); /* Q0 */ +#endif /* Sort the singular values descending order */ lengthSingularValues = s_min( nChannelsL, nChannelsC ); /* Q0 */ @@ -381,11 +426,15 @@ static Word16 BidagonalDiagonalisation_fx( Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V) singularValues_fx_e*/ Word32 secDiag_fx[MAX_OUTPUT_CHANNELS], /* i/o: secDiag_fx_e*/ Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS], /* i/o: singular values vector (S) */ - Word16 *secDiag_fx_e, /* i/o: */ - const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed Q0*/ - const Word16 nChannelsC, /* i : number of columns in the matrix to be decomposed Q0*/ - const Word32 eps_x, /* i : eps_x_e*/ - const Word16 eps_x_e /* i : */ +#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE + Word16 *secDiag_fx_e, /* i/o: */ +#else + Word16 *secDiag_new_e, /* i/o: */ +#endif + const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed Q0*/ + const Word16 nChannelsC, /* i : number of columns in the matrix to be decomposed Q0*/ + const Word32 eps_x, /* i : eps_x_e*/ + const Word16 eps_x_e /* i : */ ) { Word16 kCh, nCh, iCh, jCh, split; @@ -395,6 +444,9 @@ static Word16 BidagonalDiagonalisation_fx( move16(); move16(); Word16 temp_exp; +#ifdef FIX_1010_OPT_NORM_NOSAT + Word16 temp_exp2; +#endif Word32 g = 0; move16(); Word16 g_e = 0; @@ -402,9 +454,15 @@ static Word16 BidagonalDiagonalisation_fx( Word16 convergence, iteration, found_split; Word16 error = 0; move16(); - Word16 singularValues_new_e[MAX_OUTPUT_CHANNELS], secDiag_new_e[MAX_OUTPUT_CHANNELS]; - Copy( singularValues_fx_e, singularValues_new_e, MAX_OUTPUT_CHANNELS ); +#ifdef FIX_1010_OPT_GIVENS_INV + Word32 temp; +#endif + Word16 singularValues_new_e[MAX_OUTPUT_CHANNELS]; +#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE + Word16 secDiag_new_e[MAX_OUTPUT_CHANNELS]; set16_fx( secDiag_new_e, *secDiag_fx_e, MAX_OUTPUT_CHANNELS ); +#endif + Copy( singularValues_fx_e, singularValues_new_e, MAX_OUTPUT_CHANNELS ); FOR( iCh = nChannelsC - 1; iCh >= 0; iCh-- ) /* nChannelsC */ { @@ -472,24 +530,46 @@ static Word16 BidagonalDiagonalisation_fx( c = singularValues_fx[kCh]; /* exp(singularValues_new_e) */ c_e = singularValues_new_e[kCh]; - singularValues_fx[kCh] = GivensRotation_fx( g, g_e, singularValues_fx[kCh], singularValues_new_e[kCh], &singularValues_new_e[kCh] ); /* exp(singularValues_new_e) */ - c = BASOP_Util_Divide3232_Scale_cadence( c, maxWithSign_fx( singularValues_fx[kCh] ), &temp_exp ); /* exp(temp_exp + (c_e - singularValues_new_e)) */ +#ifdef FIX_1010_OPT_GIVENS_INV + GivensRotation2_fx( g, g_e, singularValues_fx[kCh], singularValues_new_e[kCh], &singularValues_fx[kCh], &temp, &singularValues_new_e[kCh], &temp_exp ); /* exp(singularValues_new_e) */ + c = Mpy_32_32( c, temp ); + c_e = add( c_e, temp_exp ); +#else + singularValues_fx[kCh] = GivensRotation_fx( g, g_e, singularValues_fx[kCh], singularValues_new_e[kCh], &singularValues_new_e[kCh] ); /* exp(singularValues_new_e) */ + c = BASOP_Util_Divide3232_Scale_cadence( c, maxWithSign_fx( singularValues_fx[kCh] ), &temp_exp ); /* exp(temp_exp + (c_e - singularValues_new_e)) */ c_e = add( temp_exp, sub( c_e, singularValues_new_e[kCh] ) ); +#endif +#ifndef FIX_1010_OPT_NORM_NOSAT IF( c_e > 0 ) { c = L_shl_sat( c, c_e ); // Q31 c_e = 0; move16(); } +#else + temp_exp2 = norm_l( c ); + c = L_shl( c, temp_exp2 ); + c_e = sub( c_e, temp_exp2 ); +#endif +#ifdef FIX_1010_OPT_GIVENS_INV + s = Mpy_32_32( -g, temp ); + s_e = add( g_e, temp_exp ); +#else s = BASOP_Util_Divide3232_Scale_cadence( -g, maxWithSign_fx( singularValues_fx[kCh] ), &temp_exp ); /* exp(temp_exp + (g_e - singularValues_new_e))*/ s_e = add( temp_exp, sub( g_e, singularValues_new_e[kCh] ) ); +#endif +#ifndef FIX_1010_OPT_NORM_NOSAT IF( s_e > 0 ) { s = L_shl_sat( s, s_e ); // Q31 s_e = 0; move16(); } - +#else + temp_exp2 = norm_l( s ); + s = L_shl( s, temp_exp2 ); + s_e = sub( s_e, temp_exp2 ); +#endif ApplyRotation_fx( singularVectors_Left_fx, c, c_e, s, s_e, 0, x11_e, 0, x12_e, &f1, &f1_e, &f2, &f2_e, kCh, split, nChannelsL ); /* nChannelsL */ } } @@ -540,6 +620,7 @@ static Word16 BidagonalDiagonalisation_fx( // rescaling block Copy( singularValues_new_e, singularValues_fx_e, MAX_OUTPUT_CHANNELS ); +#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE Word16 max_exp = -31; move16(); FOR( iCh = 0; iCh < nChannelsC; iCh++ ) @@ -556,6 +637,7 @@ static Word16 BidagonalDiagonalisation_fx( secDiag_fx[iCh] = L_shr_r( secDiag_fx[iCh], sub( *secDiag_fx_e, secDiag_new_e[iCh] ) ); /* exp(secDiag_fx_e) */ move32(); } +#endif return ( error ); } @@ -579,6 +661,13 @@ static void ApplyQRTransform_fx( const Word16 nChannelsC /* i : number of columns in the matrix to be decomposed Q0*/ ) { +#ifdef FIX_1010_OPT_GIVENS_INV + Word32 temp; + Word16 temp_e; +#endif +#ifdef FIX_1010_OPT_NORM_NOSAT + Word16 temp_norm_e; +#endif Word16 ch, split; Word32 d = 0, g = 0, r = 0, x_ii = 0, x_split = 0, x_kk = 0, mu = 0, aux = 0; move32(); @@ -681,25 +770,47 @@ static void ApplyQRTransform_fx( g = Mpy_32_32( c, secDiag[ch + 1] ); /* exp(c_e + secDiag_e) */ g_e = add( c_e, secDiag_e[ch + 1] ); +#ifdef FIX_1010_OPT_GIVENS_INV + GivensRotation2_fx( d, d_e, r, r_e, &secDiag[ch], &temp, &secDiag_e[ch], &temp_e ); /* exp(secDiag_e) */ + c = Mpy_32_32( d, temp ); + c_e = add( temp_e, d_e ); +#else secDiag[ch] = GivensRotation_fx( d, d_e, r, r_e, &secDiag_e[ch] ); /* exp(secDiag_e) */ move32(); c = BASOP_Util_Divide3232_Scale_cadence( d, maxWithSign_fx( secDiag[ch] ), &c_e ); /* exp(c_e + (d_e + secDiag_e)) */ c_e = add( c_e, sub( d_e, secDiag_e[ch] ) ); +#endif +#ifndef FIX_1010_OPT_NORM_NOSAT IF( c_e > 0 ) { c = L_shl_sat( c, c_e ); // Q31 c_e = 0; move16(); } +#else + temp_norm_e = norm_l( c ); + c = L_shl( c, temp_norm_e ); + c_e = sub( c_e, temp_norm_e ); +#endif +#ifdef FIX_1010_OPT_GIVENS_INV + s = Mpy_32_32( r, temp ); + s_e = add( r_e, temp_e ); +#else s = BASOP_Util_Divide3232_Scale_cadence( r, maxWithSign_fx( secDiag[ch] ), &s_e ); /* exp(s_e + (r_e - sec_Diag_e))*/ s_e = add( s_e, sub( r_e, secDiag_e[ch] ) ); +#endif +#ifndef FIX_1010_OPT_NORM_NOSAT IF( s_e > 0 ) { s = L_shl_sat( s, s_e ); // Q31 s_e = 0; move16(); } - +#else + temp_norm_e = norm_l( s ); + s = L_shl( s, temp_norm_e ); + s_e = sub( s_e, temp_norm_e ); +#endif r = Mpy_32_32( s, singularValues[ch + 1] ); /* exp(r_e + secDiag_e) */ r_e = add( s_e, singularValues_e[ch + 1] ); x_split = Mpy_32_32( c, singularValues[ch + 1] ); /* exp(c_e + secDiag_e) */ @@ -713,30 +824,48 @@ static void ApplyQRTransform_fx( // ApplyRotation(singularVectors_Right, c, s, x_ii, aux, &d, &g, ch + 1, ch, nChannelsC); ApplyRotation_fx( singularVectors_Right, c, c_e, s, s_e, x_ii, x_ii_e, aux, aux_e, &d, &d_e, &g, &g_e, ch + 1, ch, nChannelsC ); +#ifdef FIX_1010_OPT_GIVENS_INV + GivensRotation2_fx( d, d_e, r, r_e, &singularValues[ch], &aux, &singularValues_e[ch], &aux_e ); /* exp(singularValues_e) */ +#else singularValues[ch] = GivensRotation_fx( d, d_e, r, r_e, &singularValues_e[ch] ); /* exp(singularValues_e) */ move32(); - IF( GT_32( L_abs( singularValues[ch] ), Mpy_32_32( CONVERGENCE_FACTOR_FX, L_abs( singularValues[ch] ) ) ) ) +#endif + IF( singularValues[ch] != 0 ) { +#ifndef FIX_1010_OPT_GIVENS_INV aux = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, singularValues[ch], &aux_e ); /* exp(aux_e + (1 - singularValues_e)) */ aux_e = add( aux_e, sub( 1, singularValues_e[ch] ) ); +#endif c = Mpy_32_32( d, aux ); /* exp(d_e + aux_e) */ c_e = add( d_e, aux_e ); +#ifndef FIX_1010_OPT_NORM_NOSAT IF( c_e > 0 ) { c = L_shl_sat( c, c_e ); // Q31 c_e = 0; move16(); } +#else + temp_norm_e = norm_l( c ); + c = L_shl( c, temp_norm_e ); + c_e = sub( c_e, temp_norm_e ); +#endif s = Mpy_32_32( r, aux ); /* exp(r_e + aux_e) */ s_e = add( r_e, aux_e ); +#ifndef FIX_1010_OPT_NORM_NOSAT IF( s_e > 0 ) { s = L_shl_sat( s, s_e ); // Q31 s_e = 0; move16(); } +#else + temp_norm_e = norm_l( s ); + s = L_shl( s, temp_norm_e ); + s_e = sub( s_e, temp_norm_e ); +#endif } // ApplyRotation(singularVectors_Left, c, s, g, x_split, &d, &x_ii, ch + 1, ch, nChannelsL); @@ -836,14 +965,36 @@ static void HouseholderReduction_fx( Word16 sig_x_fx_e = 0; move16(); +#ifdef FIX_1010_OPT_SINGLE_RESCALE + Word16 iCh, jCh; + Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; + FOR( jCh = 0; jCh < nChannelsL; jCh++ ) + { + FOR( iCh = 0; iCh < nChannelsC; iCh++ ) + { + singularVectors_Left_fx_e[jCh][iCh] = singularVectors_Left_e; + move16(); + } + } +#endif + /* Bidiagonal Reduction for every channel */ FOR( nCh = 0; nCh < nChannelsC; nCh++ ) /* nChannelsC */ { +#ifndef FIX_1010_OPT_SINGLE_RESCALE biDiagonalReductionLeft_fx( singularVectors_Left_fx, singularValues_fx, secDiag_fx, &singularVectors_Left_e, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &sig_x_fx, &sig_x_fx_e, &g_fx ); biDiagonalReductionRight_fx( singularVectors_Left_fx, secDiag_fx, &singularVectors_Left_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &sig_x_fx, &sig_x_fx_e, &g_fx ); +#else + biDiagonalReductionLeft_fx( singularVectors_Left_fx, singularValues_fx, secDiag_fx, singularVectors_Left_fx_e, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &sig_x_fx, &sig_x_fx_e, &g_fx ); + biDiagonalReductionRight_fx( singularVectors_Left_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &sig_x_fx, &sig_x_fx_e, &g_fx ); +#endif Word16 L_temp_e; +#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE Word32 L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), *secDiag_fx_e, &L_temp_e ); /* exp(L_temp_e) */ +#else + Word32 L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), secDiag_fx_e[nCh], &L_temp_e ); /* exp(L_temp_e) */ +#endif IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( L_temp, L_temp_e, *eps_x_fx, *eps_x_fx_e ), 1 ) ) { *eps_x_fx = L_temp; /* exp(L_temp_e) */ @@ -854,9 +1005,19 @@ static void HouseholderReduction_fx( } /* SingularVecotr Accumulation */ +#ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_e, *secDiag_fx_e, nChannelsC ); - singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_e, singularValues_fx_e, nChannelsL, nChannelsC ); +#else +#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE + singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, *secDiag_fx_e, nChannelsC ); +#else + singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsC ); +#endif + + + singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_fx_e, singularValues_fx_e, nChannelsL, nChannelsC ); +#endif return; } @@ -871,7 +1032,11 @@ static void biDiagonalReductionLeft_fx( Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */ +#ifndef FIX_1010_OPT_SINGLE_RESCALE Word16 *singularVectors_e, +#else + Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS], +#endif Word16 singularValues_e[MAX_OUTPUT_CHANNELS], Word16 *secDiag_e, const Word16 nChannelsL, /* Q0 */ @@ -885,17 +1050,20 @@ static void biDiagonalReductionLeft_fx( Word16 iCh, jCh, idx; Word32 norm_x, f, r; Word16 norm_x_e, f_e, r_e; - Word16 sing_exp[MAX_OUTPUT_CHANNELS]; - Word16 sing_exp2[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS] = { 0 }; Word32 L_temp; Word16 L_temp_e; +#ifndef FIX_1010_OPT_SINGLE_RESCALE + Word16 sing_exp[MAX_OUTPUT_CHANNELS]; + Word16 sing_exp2[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS] = { 0 }; FOR( jCh = 0; jCh < MAX_OUTPUT_CHANNELS; jCh++ ) { set16_fx( sing_exp2[jCh], *singularVectors_e, MAX_OUTPUT_CHANNELS ); } +#endif secDiag[currChannel] = Mpy_32_32( *sig_x, *g ); /* exp(sig_x_e) */ move32(); +#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE // rescaling block IF( GT_16( *sig_x_e, *secDiag_e ) ) { @@ -913,6 +1081,10 @@ ELSE IF( LT_16( *sig_x_e, *secDiag_e ) ) secDiag[currChannel] = L_shr_r( secDiag[currChannel], sub( *secDiag_e, *sig_x_e ) ); /* exp(secDiag_e) */ move32(); } +#else + secDiag_e[currChannel] = *sig_x_e; + move16(); +#endif /* Setting values to 0 */ ( *sig_x ) = 0; @@ -927,22 +1099,54 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { +#ifndef FIX_1010_OPT_SINGLE_RESCALE ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[jCh][currChannel] ), *singularVectors_e, sig_x_e ); /* exp(sig_x_e) */ +#else + ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[jCh][currChannel] ), singularVectors2_e[jCh][currChannel], sig_x_e ); /* exp(sig_x_e) */ +#endif } IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */ { +#ifdef FIX_1010_OPT_DIV + Word16 invVal_e; + Word32 invVal; + invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e ); +#endif norm_x = 0; move32(); norm_x_e = 0; move16(); FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { +#ifndef FIX_1010_OPT_DIV +#ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectors[jCh][currChannel] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[jCh][currChannel], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ move32(); sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) ); move16(); norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ +#else + singularVectors[jCh][currChannel] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[jCh][currChannel], maxWithSign_fx( *sig_x ), &L_temp_e ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ + move32(); + singularVectors2_e[jCh][currChannel] = add( L_temp_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ); + move16(); + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( singularVectors2_e[jCh][currChannel], 1 ), &norm_x_e ); /* exp(norm_x_e) */ +#endif +#else + Word16 temp_e = norm_l( singularVectors[jCh][currChannel] ); + singularVectors[jCh][currChannel] = Mpy_32_32( L_shl( singularVectors[jCh][currChannel], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ + move32(); +#ifndef FIX_1010_OPT_SINGLE_RESCALE + sing_exp[jCh] = sub( add( invVal_e, sub( *singularVectors_e, *sig_x_e ) ), temp_e ); + move16(); + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ +#else + singularVectors2_e[jCh][currChannel] = sub( add( invVal_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ), temp_e ); + move16(); + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( singularVectors2_e[jCh][currChannel], 1 ), &norm_x_e ); /* exp(norm_x_e) */ +#endif +#endif } IF( GT_16( norm_x_e, 0 ) ) { @@ -967,10 +1171,19 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ move32(); } +#ifndef FIX_1010_OPT_SINGLE_RESCALE r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), sing_exp[currChannel], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], sing_exp[currChannel], -( *g ), 0, &sing_exp[currChannel] ); /* sing_exp */ +#else + r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ + singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* sing_exp */ +#endif move32(); +#ifdef FIX_1010_OPT_DIV + invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); +#endif + FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */ { norm_x = 0; @@ -979,15 +1192,28 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ move16(); FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { +#ifndef FIX_1010_OPT_SINGLE_RESCALE norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( sing_exp[jCh], *singularVectors_e ), &norm_x_e ); /* exp(norm_x_e) */ +#else + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( singularVectors2_e[jCh][currChannel], singularVectors2_e[jCh][iCh] ), &norm_x_e ); /* exp(norm_x_e) */ +#endif } +#ifndef FIX_1010_OPT_DIV f = BASOP_Util_Divide3232_Scale_cadence( norm_x, maxWithSign_fx( r ), &f_e ); /* f_e + (norm_x_e - r_e) */ f_e = add( f_e, sub( norm_x_e, r_e ) ); +#else + f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */ + f_e = add( invVal_e, sub( norm_x_e, r_e ) ); +#endif FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { +#ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], *singularVectors_e, Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, sing_exp[jCh] ), &sing_exp2[jCh][iCh] ); /* exp( sing_exp2) */ +#else + singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], singularVectors2_e[jCh][iCh], Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, singularVectors2_e[jCh][currChannel] ), &singularVectors2_e[jCh][iCh] ); +#endif move32(); } } @@ -997,10 +1223,15 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ { singularVectors[jCh][currChannel] = Mpy_32_32( singularVectors[jCh][currChannel], ( *sig_x ) ); /* sing_exp + sig_x_e */ move32(); +#ifndef FIX_1010_OPT_SINGLE_RESCALE sing_exp2[jCh][currChannel] = add( sing_exp[jCh], *sig_x_e ); +#else + singularVectors2_e[jCh][currChannel] = add( singularVectors2_e[jCh][currChannel], *sig_x_e ); +#endif move16(); } +#ifndef FIX_1010_OPT_SINGLE_RESCALE // rescaling block Word16 exp_max = *singularVectors_e; move16(); @@ -1022,6 +1253,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ } *singularVectors_e = exp_max; move16(); +#endif } // rescaling block @@ -1042,9 +1274,17 @@ return; static void biDiagonalReductionRight_fx( Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ - Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */ + Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_exp[]) */ +#ifndef FIX_1010_OPT_SINGLE_RESCALE Word16 *singularVectors_e, +#else + Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS], +#endif +#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE Word16 *secDiag_e, +#else + Word16 *secDiag_exp, +#endif const Word16 nChannelsL, /* Q0 */ const Word16 nChannelsC, /* Q0 */ const Word16 currChannel, /* Q0 */ @@ -1056,16 +1296,22 @@ static void biDiagonalReductionRight_fx( Word16 iCh, jCh, idx; Word32 norm_x, r; Word16 norm_x_e, r_e; - Word16 sing_exp[MAX_OUTPUT_CHANNELS]; +#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE Word16 secDiag_exp[MAX_OUTPUT_CHANNELS]; - Word16 sing_exp2[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS] = { 0 }; +#endif Word32 L_temp; Word16 L_temp_e; +#ifndef FIX_1010_OPT_SINGLE_RESCALE + Word16 sing_exp[MAX_OUTPUT_CHANNELS]; + Word16 sing_exp2[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS] = { 0 }; FOR( jCh = 0; jCh < MAX_OUTPUT_CHANNELS; jCh++ ) { set16_fx( sing_exp2[jCh], *singularVectors_e, MAX_OUTPUT_CHANNELS ); } +#endif +#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE set16_fx( secDiag_exp, *secDiag_e, MAX_OUTPUT_CHANNELS ); +#endif /* Setting values to 0 */ ( *sig_x ) = 0; @@ -1079,7 +1325,11 @@ static void biDiagonalReductionRight_fx( FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ { +#ifndef FIX_1010_OPT_SINGLE_RESCALE ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[currChannel][jCh] ), *singularVectors_e, sig_x_e ); /* exp(sig_x_e) */ +#else + ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[currChannel][jCh] ), singularVectors2_e[currChannel][jCh], sig_x_e ); /* exp(sig_x_e) */ +#endif } IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */ @@ -1089,13 +1339,41 @@ static void biDiagonalReductionRight_fx( norm_x_e = 0; move16(); +#ifdef FIX_1010_OPT_DIV + Word16 invVal_e, temp_e; + Word32 invVal; + invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e ); +#endif FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */ { +#ifndef FIX_1010_OPT_DIV +#ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e)) */ move32(); sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) ); move16(); norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ +#else + singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &L_temp_e ); /* exp(sing_exp + (singularVectors_e - sig_x_e)) */ + move32(); + singularVectors2_e[currChannel][jCh] = add( L_temp_e, sub( singularVectors2_e[currChannel][jCh], *sig_x_e ) ); + move16(); + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( singularVectors2_e[currChannel][jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ +#endif +#else + temp_e = norm_l( singularVectors[currChannel][jCh] ); + singularVectors[currChannel][jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ + move32(); +#ifndef FIX_1010_OPT_SINGLE_RESCALE + sing_exp[jCh] = add( sub( invVal_e, temp_e ), sub( *singularVectors_e, *sig_x_e ) ); + move16(); + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ +#else + singularVectors2_e[currChannel][jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], *sig_x_e ) ); + move16(); + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( singularVectors2_e[currChannel][jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ +#endif +#endif } IF( GT_16( norm_x_e, 0 ) ) { @@ -1119,16 +1397,44 @@ static void biDiagonalReductionRight_fx( move32(); } +#ifndef FIX_1010_OPT_SINGLE_RESCALE r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), sing_exp[idx], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], sing_exp[idx], -( *g ), 0, &sing_exp[idx] ); /* exp(sing_exp) */ +#else + r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ + singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* exp(sing_exp) */ +#endif move32(); +#ifdef FIX_1010_OPT_DIV + invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); +#endif + FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ { +#ifndef FIX_1010_OPT_DIV +#ifndef FIX_1010_OPT_SINGLE_RESCALE secDiag[jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( r ), &secDiag_exp[jCh] ); /* exp(secDiag_exp + (sing_exp - r_e) */ move32(); secDiag_exp[jCh] = add( secDiag_exp[jCh], sub( sing_exp[jCh], r_e ) ); move32(); +#else + secDiag[jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( r ), &secDiag_exp[jCh] ); /* exp(secDiag_exp + (sing_exp - r_e) */ + move32(); + secDiag_exp[jCh] = add( secDiag_exp[jCh], sub( singularVectors2_e[currChannel][jCh], r_e ) ); + move32(); +#endif +#else + temp_e = norm_l( singularVectors[currChannel][jCh] ); + secDiag[jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ + move32(); +#ifndef FIX_1010_OPT_SINGLE_RESCALE + secDiag_exp[jCh] = add( sub( invVal_e, temp_e ), sub( sing_exp[jCh], r_e ) ); +#else + secDiag_exp[jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], r_e ) ); +#endif + move16(); +#endif } FOR( iCh = currChannel + 1; iCh < nChannelsL; iCh++ ) /* nChannelsL */ @@ -1139,12 +1445,20 @@ static void biDiagonalReductionRight_fx( move16(); FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ { +#ifndef FIX_1010_OPT_SINGLE_RESCALE norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( *singularVectors_e, sing_exp[jCh] ), &norm_x_e ); /* exp(norm_x_e) */ +#else + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( singularVectors2_e[iCh][jCh], singularVectors2_e[currChannel][jCh] ), &norm_x_e ); /* exp(norm_x_e) */ +#endif } FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ { +#ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], *singularVectors_e, Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &sing_exp2[iCh][jCh] ); /* exp(sing_exp2) */ +#else + singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */ +#endif move32(); } } @@ -1153,10 +1467,15 @@ static void biDiagonalReductionRight_fx( { singularVectors[currChannel][jCh] = Mpy_32_32( singularVectors[currChannel][jCh], ( *sig_x ) ); /* exp(sing_exp + sig_x_e) */ move32(); +#ifndef FIX_1010_OPT_SINGLE_RESCALE sing_exp2[currChannel][jCh] = add( sing_exp[jCh], *sig_x_e ); +#else + singularVectors2_e[currChannel][jCh] = add( singularVectors2_e[currChannel][jCh], *sig_x_e ); +#endif move16(); } +#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE /*rescaling block*/ Word16 exp_max = *secDiag_e; move16(); @@ -1169,8 +1488,12 @@ static void biDiagonalReductionRight_fx( secDiag[jCh] = L_shr_r( secDiag[jCh], sub( exp_max, secDiag_exp[jCh] ) ); /* exp(exp_max) */ move32(); } + *secDiag_e = exp_max; + move16(); +#endif +#ifndef FIX_1010_OPT_SINGLE_RESCALE exp_max = *singularVectors_e; move16(); FOR( iCh = 0; iCh < nChannelsL; iCh++ ) @@ -1191,6 +1514,7 @@ static void biDiagonalReductionRight_fx( } *singularVectors_e = exp_max; move16(); +#endif } } @@ -1204,9 +1528,13 @@ static void biDiagonalReductionRight_fx( *-------------------------------------------------------------------------*/ static void singularVectorsAccumulationLeft_fx( - Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) as Input, Q31 as output */ + Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* input exp(singularVectors_Left_e), output Q31 */ Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ +#ifndef FIX_1010_OPT_SINGLE_RESCALE Word16 singularVectors_e, +#else + Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS], +#endif Word16 singularValues_e[MAX_OUTPUT_CHANNELS], const Word16 nChannelsL, /* Q0 */ const Word16 nChannelsC /* Q0 */ @@ -1216,11 +1544,13 @@ static void singularVectorsAccumulationLeft_fx( Word16 nChannels; Word32 norm_y, t_jj, t_ii; Word16 norm_y_e, t_jj_e, t_ii_e, temp_exp; +#ifndef FIX_1010_OPT_SINGLE_RESCALE Word16 sing_exp2[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS] = { 0 }; FOR( nCh = 0; nCh < MAX_OUTPUT_CHANNELS; nCh++ ) { set16_fx( sing_exp2[nCh], singularVectors_e, MAX_OUTPUT_CHANNELS ); } +#endif /* Processing */ nChannels = s_min( nChannelsL, nChannelsC ); /* min(nChannelsL,ChannelsC) Q0*/ @@ -1240,8 +1570,13 @@ static void singularVectorsAccumulationLeft_fx( IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/ { +#ifdef FIX_1010_OPT_DIV + t_ii = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( t_ii ), &temp_exp ); + t_ii_e = sub( temp_exp, t_ii_e ); +#else t_ii = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, maxWithSign_fx( t_ii ), &temp_exp ); /* exp(1 + (temp_exp + tii_e)) */ t_ii_e = add( 1, sub( temp_exp, t_ii_e ) ); +#endif // fprintf( fp, "%e\n", me2f( t_ii, t_ii_e ) ); FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */ { @@ -1251,14 +1586,25 @@ static void singularVectorsAccumulationLeft_fx( move16(); FOR( k = nCh + 1; k < nChannelsL; k++ ) /* nChannelsL */ { +#ifndef FIX_1010_OPT_SINGLE_RESCALE norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( sing_exp2[k][nCh], sing_exp2[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */ +#else + norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( singularVectors_Left_e[k][nCh], singularVectors_Left_e[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */ +#endif } t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e, +#ifndef FIX_1010_OPT_SINGLE_RESCALE t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) ); - +#else + t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), singularVectors_Left_e[nCh][nCh] ) ); +#endif FOR( k = nCh; k < nChannelsL; k++ ) /* nChannelsL */ { +#ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectors_Left[k][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[k][iCh], sing_exp2[k][iCh], Mpy_32_32( t_jj, singularVectors_Left[k][nCh] ), add( t_jj_e, sing_exp2[k][nCh] ), &sing_exp2[k][iCh] ); /* exp(sing_exp2) */ +#else + singularVectors_Left[k][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[k][iCh], singularVectors_Left_e[k][iCh], Mpy_32_32( t_jj, singularVectors_Left[k][nCh] ), add( t_jj_e, singularVectors_Left_e[k][nCh] ), &singularVectors_Left_e[k][iCh] ); /* exp(sing_exp2) */ +#endif move32(); } } @@ -1267,7 +1613,11 @@ static void singularVectorsAccumulationLeft_fx( { singularVectors_Left[iCh][nCh] = Mpy_32_32( singularVectors_Left[iCh][nCh], t_ii ); /* exp(sing_exp2 + t_ii_e) */ move32(); +#ifndef FIX_1010_OPT_SINGLE_RESCALE sing_exp2[iCh][nCh] = add( sing_exp2[iCh][nCh], t_ii_e ); +#else + singularVectors_Left_e[iCh][nCh] = add( singularVectors_Left_e[iCh][nCh], t_ii_e ); +#endif move16(); } } @@ -1279,8 +1629,11 @@ static void singularVectorsAccumulationLeft_fx( move32(); } } - +#ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectors_Left[nCh][nCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[nCh][nCh], sing_exp2[nCh][nCh], ONE_IN_Q30, 1, &sing_exp2[nCh][nCh] ); /* exp(sing_exp2) */ +#else + singularVectors_Left[nCh][nCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[nCh][nCh], singularVectors_Left_e[nCh][nCh], ONE_IN_Q30, 1, &singularVectors_Left_e[nCh][nCh] ); /* exp(sing_exp2) */ +#endif move32(); } // fclose(fp); @@ -1288,7 +1641,11 @@ static void singularVectorsAccumulationLeft_fx( { FOR( iCh = 0; iCh < nChannelsC; iCh++ ) { +#ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], sing_exp2[nCh][iCh] ); /* Q31 */ +#else + singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], singularVectors_Left_e[nCh][iCh] ); /* Q31 */ +#endif move32(); } } @@ -1303,11 +1660,19 @@ static void singularVectorsAccumulationLeft_fx( *-------------------------------------------------------------------------*/ static void singularVectorsAccumulationRight_fx( - Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ - Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ + Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_Left_e) */ + Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* input exp(singularVectors_Left_e), output Q31 */ Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */ +#ifndef FIX_1010_OPT_SINGLE_RESCALE Word16 singularVectors_e, +#else + Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS], +#endif +#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE Word16 secDiag_e, +#else + Word16 *secDiag_e, +#endif const Word16 nChannelsC /* Q0 */ ) { @@ -1320,7 +1685,7 @@ static void singularVectorsAccumulationRight_fx( nChannels = nChannelsC; /* nChannelsC Q0*/ /* avoid compiler warning */ - t_ii = secDiag[nChannels - 1]; /* exp(secDiag_e) */ + t_ii = secDiag[nChannels - 1]; /* exp(secDiag_e[nChannels - 1]) */ move32(); FOR( nCh = nChannels - 1; nCh >= 0; nCh-- ) /* nChannelsC, min(nChannelsLmnChannelsC) otherwise */ @@ -1333,10 +1698,22 @@ static void singularVectorsAccumulationRight_fx( FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC*/ { +#ifdef FIX_1010_OPT_DIV + ratio_float = L_deposit_h( BASOP_Util_Divide3232_Scale( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ) ); /* exp(temp_exp1) */ + singularVectors_Right[iCh][nCh] = L_deposit_h( BASOP_Util_Divide3232_Scale( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] ) ); /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */ +#else ratio_float = BASOP_Util_Divide3232_Scale_cadence( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ); /* exp(temp_exp1) */ singularVectors_Right[iCh][nCh] = BASOP_Util_Divide3232_Scale_cadence( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] ); /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */ +#endif +#ifdef FIX_1010_OPT_SINGLE_RESCALE + temp_exp1 = add( temp_exp1, sub( singularVectors_Left_e[nCh][iCh], singularVectors_Left_e[nCh][nCh + 1] ) ); +#endif move32(); +#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE sing_right_exp[iCh][nCh] = add( sing_right_exp[iCh][nCh], sub( temp_exp1, secDiag_e ) ); +#else + sing_right_exp[iCh][nCh] = add( sing_right_exp[iCh][nCh], sub( temp_exp1, secDiag_e[nCh + 1] ) ); +#endif move16(); // singularVectors_Right[iCh][nCh] = L_shl_sat( singularVectors_Right[iCh][nCh], temp_exp2 ); } @@ -1350,7 +1727,11 @@ static void singularVectorsAccumulationRight_fx( FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */ { +#ifndef FIX_1010_OPT_SINGLE_RESCALE norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[nCh][k], singularVectors_Right[k][iCh] ), add( singularVectors_e, sing_right_exp[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */ +#else + norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[nCh][k], singularVectors_Right[k][iCh] ), add( singularVectors_Left_e[nCh][k], sing_right_exp[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */ +#endif } FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */ @@ -1374,7 +1755,7 @@ static void singularVectorsAccumulationRight_fx( } singularVectors_Right[nCh][nCh] = MAX_32; move32(); - t_ii = secDiag[nCh]; /* exp(secDiag_e) */ + t_ii = secDiag[nCh]; /* exp(secDiag_e[nCh]) */ move32(); } return; @@ -1386,6 +1767,31 @@ static void singularVectorsAccumulationRight_fx( * *-------------------------------------------------------------------------*/ + +#ifdef FIX_1010_OPT_GIVENS_INV +static void GivensRotation2_fx( + const Word32 x, /* exp(x_e) */ + const Word16 x_e, + const Word32 z, /* exp(z_e) */ + const Word16 z_e, + Word32 *result, + Word32 *resultInv, + Word16 *out_e, + Word16 *outInv_e ) +{ + Word32 r; + r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e ); + r = L_max( r, 1 ); + *outInv_e = *out_e; + move16(); + *result = Sqrt32( r, out_e ); + move32(); + + *resultInv = ISqrt32( r, outInv_e ); + move32(); +} +#endif + static Word32 GivensRotation_fx( const Word32 x, /* exp(x_e) */ const Word16 x_e, @@ -1393,10 +1799,19 @@ static Word32 GivensRotation_fx( const Word16 z_e, Word16 *out_e ) { +#ifdef FIX_1010_OPT_GIVENS + Word32 r; +#else Word32 x_abs, z_abs; Word32 cotan, tan, r; Word16 temp_exp; Word32 L_temp; +#endif + +#ifdef FIX_1010_OPT_GIVENS + r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e ); + r = Sqrt32( r, out_e ); +#else x_abs = L_abs( x ); z_abs = L_abs( z ); test(); @@ -1439,7 +1854,7 @@ static Word32 GivensRotation_fx( *out_e = add( z_e, temp_exp ); } } - +#endif return ( r ); }