Loading lib_dec/ivas_svd_dec_fx.c +317 −2 Original line number Diff line number Diff line Loading @@ -29,7 +29,7 @@ the United Nations Convention on Contracts on the International Sales of Goods. *******************************************************************************************************/ #define MYCHANGES #include <stdint.h> #include "options.h" #include "prot_fx.h" Loading Loading @@ -65,7 +65,30 @@ static void HouseholderReduction_fx( const Word16 nChannelsC, /* Q0 */ Word32 *eps_x_fx, /* exp(eps_x_fx_e) */ Word16 *eps_x_fx_e ); #ifdef MYCHANGES static void biDiagonalReductionLeft_64( Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], Word16 singularVectors_e, Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ Word16 singularValues_e[MAX_OUTPUT_CHANNELS], const Word16 nChannelsL, /* Q0 */ const Word16 nChannelsC, /* Q0 */ const Word16 currChannel /* Q0 */ ); static void biDiagonalReductionRight_64( Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], Word16 singularVectors_e, Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ Word16 secDiag_e[MAX_OUTPUT_CHANNELS], const Word16 nChannelsL, /* Q0 */ const Word16 nChannelsC, /* Q0 */ const Word16 currChannel, /* Q0 */ Word32 *g, /* Q31 */ Word16 *g_e ); #endif static void biDiagonalReductionLeft_fx( Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ Loading Loading @@ -832,15 +855,63 @@ static void HouseholderReduction_fx( Word16 *eps_x_fx_e ) { Word16 nCh; push_wmops("HouseholderReduction_fx"); #ifdef MYCHANGES Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; Word32 g_fx = 0; Word16 g_e = 0; move32(); move16(); Word32 sig_x_fx = 0; Word16 sig_x_fx_e = 0; move32(); move16(); #else // float g = 0.0f, sig_x = 0.0f;// to be removed Word32 g_fx = 0, sig_x_fx = 0; move32(); move32(); Word16 sig_x_fx_e = 0; move16(); #endif Word16 iCh, jCh; Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; #ifdef MYCHANGES push_wmops("HouseholderReduction_fx 64"); FOR( jCh = 0; jCh < nChannelsL; jCh++ ) { FOR( iCh = 0; iCh < nChannelsC; iCh++ ) { singularVectors_Left_64[jCh][iCh] = W_shr(W_deposit32_h(singularVectors_Left_fx[jCh][iCh]),32); } } for (nCh=0;nCh<nChannelsC;nCh++) { biDiagonalReductionLeft_64( singularVectors_Left_64,0, singularValues_fx,singularValues_fx_e, nChannelsL, nChannelsC, nCh ); biDiagonalReductionRight_64( singularVectors_Left_64,nCh, secDiag_fx,secDiag_fx_e, nChannelsL, nChannelsC, nCh, &g_fx, &g_e ); } pop_wmops(); #endif push_wmops("HouseholderReduction_fx 32"); FOR( jCh = 0; jCh < nChannelsL; jCh++ ) { FOR( iCh = 0; iCh < nChannelsC; iCh++ ) Loading @@ -866,13 +937,14 @@ static void HouseholderReduction_fx( move32(); } } pop_wmops(); /* SingularVecotr Accumulation */ singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsC ); singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_fx_e, singularValues_fx_e, nChannelsL, nChannelsC ); pop_wmops(); return; } Loading @@ -881,7 +953,250 @@ static void HouseholderReduction_fx( * * *-------------------------------------------------------------------------*/ #ifdef MYCHANGES static void biDiagonalReductionLeft_64( Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], Word16 singularVectors_e, Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ Word16 singularValues_e[MAX_OUTPUT_CHANNELS], const Word16 nChannelsL, /* Q0 */ const Word16 nChannelsC, /* Q0 */ const Word16 currChannel /* Q0 */ ) { Word16 iCh, jCh; Word32 norm_x, g; Word16 norm_x_e, g_e; Word64 norm_64; Word16 bitwindow; norm_x=0; move32(); IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ { Word16 n; Word32 tmp; norm_64=0; move64(); bitwindow=add(singularVectors_e,1); FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { tmp=W_extract_l(W_shr(singularVectors_Left_64[jCh][currChannel],bitwindow)); norm_64=W_add(norm_64,W_mult0_32_32(tmp,tmp)); } n=W_norm(norm_64); norm_x=W_extract_h(W_shl(norm_64, n )); norm_x_e = add(sub(shl(singularVectors_e, 1), n), 3 ); } IF ( norm_x ) { Word32 factor2; Word16 tmp_e; Word64 tmpmul; Word64 r_64; // = sqrt(norm)*singularVectors_Left_64[currChannel][currChannel]-norm OR -sqrt(norm)*singularVectors_Left_64[currChannel][currChannel]-norm Word32 r, invVal; Word16 r_e, invVal_e; g_e = norm_x_e; move16(); g = Sqrt32( norm_x, &g_e); IF ( GE_64( singularVectors_Left_64[currChannel][currChannel], 0 ) ) { g = L_negate( g ); } factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], singularVectors_e) ); tmp_e=sub( sub( g_e, singularVectors_e), 2); tmpmul=W_mult0_32_32( g, factor2); tmpmul=W_shl(tmpmul, tmp_e); r_64=W_sub(tmpmul, norm_64); r_e=W_norm(r_64 ); r=W_extract_h( W_shl( r_64, r_e ) ); // r_e = sub( sub( add( add(1, singularVectors_e),g_e), tmp_e), r_e); // 1+singularVectors_e + g_e - ((g_e-singularVectors_e)-2)-r_e r_e = sub( add( 3, add(singularVectors_e, singularVectors_e )), r_e ); invVal_e = r_e; invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, r, &invVal_e); tmp_e = add(32, sub(singularVectors_e, g_e ) ); singularVectors_Left_64[currChannel][currChannel] = W_sub( singularVectors_Left_64[currChannel][currChannel], W_shr( W_deposit32_h( g), tmp_e) ); // here, the exponent goes up. bitwindow=add(singularVectors_e, 2); // so does the bit window FOR ( iCh = currChannel+1; iCh < nChannelsC; iCh++) { Word32 factor1; Word32 factor2; Word32 f; // = norm / r // Word16 f_e; // not really needed norm_64 = 0; for ( jCh = currChannel; jCh<nChannelsL; jCh++ ) { factor1 = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], bitwindow)); factor2 = W_extract_l( W_shr( singularVectors_Left_64[jCh][iCh], bitwindow)); norm_64 = W_add( norm_64, W_mult0_32_32(factor1, factor2)); } norm_x_e = W_norm( norm_64); norm_x = W_extract_h( W_shl( norm_64, norm_x_e) ); f = Mpy_32_32( norm_x, invVal ); // f_e = add( invVal_e, sub(norm_x, r_e ) ); FOR ( jCh = currChannel; jCh < nChannelsL; jCh++ ) { Word16 magic_shift; magic_shift = add( add( norm_x_e, 23), r_e); factor1 = W_extract_l( W_shr(singularVectors_Left_64[jCh][currChannel], bitwindow ) ); singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr(W_mult0_32_32( f, factor1 ), magic_shift ) ); } } move16(); } } static void biDiagonalReductionRight_64( Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], Word16 singularVectors_e, Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ Word16 secDiag_e[MAX_OUTPUT_CHANNELS], const Word16 nChannelsL, /* Q0 */ const Word16 nChannelsC, /* Q0 */ const Word16 currChannel, /* Q0 */ Word32 *g, /* Q31 */ Word16 *g_e ) { Word16 iCh, jCh; Word32 norm_x; Word16 norm_x_e; Word64 norm_64; Word32 abs_x; Word16 abs_x_e; Word64 abs_64; Word16 bitwindow; Word16 idx; secDiag[currChannel] = ( *g ); secDiag_e[currChannel] = ( *g_e ); move32(); move16(); ( *g ) =0; ( *g_e ) = 0; move32(); move16(); IF ( LT_16( currChannel, nChannelsL ) && NE_16( currChannel, sub( nChannelsC, 1 ) ) ) /* i <=m && i !=n */ { Word16 tmp_e; norm_64=0; move64(); bitwindow=add( singularVectors_e, 1); // FIXME: WHY NOT 2???? idx = add( currChannel, 1); FOR ( jCh = idx; jCh < nChannelsC; jCh++ ) { Word32 tmp; tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) ); norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp) ); abs_64 = W_add( abs_64, W_abs( singularVectors_Left_64[currChannel][jCh]) ); } norm_x_e = W_norm( norm_64); norm_x = W_extract_h( W_shl( norm_64, norm_x_e) ); norm_x_e = add( sub( add( singularVectors_e, singularVectors_e), norm_x_e), 3); move16(); abs_x_e = W_norm( abs_64); abs_x = W_extract_h( W_shl( abs_64, abs_x_e) ); abs_x_e = add( sub( add( singularVectors_e, singularVectors_e), abs_x_e), 3); IF ( norm_x ) { Word32 factor1; Word32 factor2; Word16 tmp_e; Word64 tmpmul; Word32 tmp_g; Word16 tmp_g_e; Word16 magic_shift; Word64 r_64; Word32 r; Word16 r_e; Word32 f; Word16 f_e; Word32 invVal; Word16 invVal_e; tmp_g_e = norm_x_e; move16(); tmp_g = Sqrt32( norm_x, &tmp_g_e); IF ( GE_64( singularVectors_Left_64[currChannel][idx],0 ) ) { tmp_g = L_negate( tmp_g); } *g = tmp_g; *g_e = tmp_g_e; move32(); move16(); factor2=W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], bitwindow) ); tmp_e = sub( sub( tmp_g_e, singularVectors_e), 1); tmpmul = W_mult0_32_32( tmp_g, factor2); tmpmul = W_shl(tmpmul, tmp_e); r_64 = W_sub( W_shr( tmpmul, 1), W_shr( norm_64, 1) );// FIXME: why those two shi(f)ts? r_e = W_norm( r_64); r = W_extract_h( W_shl( r_64, r_e) ); r_e = sub( sub( add( bitwindow, tmp_g_e), tmp_e), r_e); // FIXME: reduce this!! invVal_e = r_e; move16(); invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e); magic_shift=32-tmp_g_e; singularVectors_Left_64[currChannel][idx]=W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( tmp_g), magic_shift) ); bitwindow=add(singularVectors_e,2); FOR( iCh = idx; iCh < nChannelsC; iCh++ ) { Word16 magicshift; norm_64 = 0; move64(); FOR ( jCh = idx; jCh<nChannelsL; jCh++ ) { factor1 = W_extract_l(W_shr( singularVectors_Left_64[iCh][jCh], bitwindow) ); factor2 = W_extract_l(W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) ); norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2) ); } norm_x_e = W_norm( norm_64); norm_x = W_extract_h( W_shl( norm_64, norm_x_e) ); f = Mpy_32_32( norm_x, invVal); f_e = add( invVal_e, sub( norm_x_e, r_e) ); magic_shift = -3*currChannel+22-2*norm_x_e+4*r_e+3*f_e; // FIXME: HOW IS THIS WORKING?????!?!?!?!?!?!?!?!?!? FOR( iCh = idx; iCh < nChannelsC; iCh++ ) { factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) ); singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2), magic_shift) ); } } // FIXME BEGIN: The following code has not yet been tested invVal_e = 0; move16(); invVal = BASOP_Util_Divide3232_Scale_newton( abs_x, maxWithSign_fx( r ), &invVal_e); invVal_e = add( invVal_e, sub( abs_x_e, r_e) ); bitwindow = add(singularVectors_e, 1); FOR ( jCh = idx; jCh < nChannelsL ; jCh++ ) { secDiag[jCh] = Mpy_32_32( W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) ), invVal ); move32(); secDiag_e[jCh] = add(invVal_e, bitwindow); move16(); } // FIXME END } } } #endif static void biDiagonalReductionLeft_fx( Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ Loading Loading
lib_dec/ivas_svd_dec_fx.c +317 −2 Original line number Diff line number Diff line Loading @@ -29,7 +29,7 @@ the United Nations Convention on Contracts on the International Sales of Goods. *******************************************************************************************************/ #define MYCHANGES #include <stdint.h> #include "options.h" #include "prot_fx.h" Loading Loading @@ -65,7 +65,30 @@ static void HouseholderReduction_fx( const Word16 nChannelsC, /* Q0 */ Word32 *eps_x_fx, /* exp(eps_x_fx_e) */ Word16 *eps_x_fx_e ); #ifdef MYCHANGES static void biDiagonalReductionLeft_64( Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], Word16 singularVectors_e, Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ Word16 singularValues_e[MAX_OUTPUT_CHANNELS], const Word16 nChannelsL, /* Q0 */ const Word16 nChannelsC, /* Q0 */ const Word16 currChannel /* Q0 */ ); static void biDiagonalReductionRight_64( Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], Word16 singularVectors_e, Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ Word16 secDiag_e[MAX_OUTPUT_CHANNELS], const Word16 nChannelsL, /* Q0 */ const Word16 nChannelsC, /* Q0 */ const Word16 currChannel, /* Q0 */ Word32 *g, /* Q31 */ Word16 *g_e ); #endif static void biDiagonalReductionLeft_fx( Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ Loading Loading @@ -832,15 +855,63 @@ static void HouseholderReduction_fx( Word16 *eps_x_fx_e ) { Word16 nCh; push_wmops("HouseholderReduction_fx"); #ifdef MYCHANGES Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; Word32 g_fx = 0; Word16 g_e = 0; move32(); move16(); Word32 sig_x_fx = 0; Word16 sig_x_fx_e = 0; move32(); move16(); #else // float g = 0.0f, sig_x = 0.0f;// to be removed Word32 g_fx = 0, sig_x_fx = 0; move32(); move32(); Word16 sig_x_fx_e = 0; move16(); #endif Word16 iCh, jCh; Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; #ifdef MYCHANGES push_wmops("HouseholderReduction_fx 64"); FOR( jCh = 0; jCh < nChannelsL; jCh++ ) { FOR( iCh = 0; iCh < nChannelsC; iCh++ ) { singularVectors_Left_64[jCh][iCh] = W_shr(W_deposit32_h(singularVectors_Left_fx[jCh][iCh]),32); } } for (nCh=0;nCh<nChannelsC;nCh++) { biDiagonalReductionLeft_64( singularVectors_Left_64,0, singularValues_fx,singularValues_fx_e, nChannelsL, nChannelsC, nCh ); biDiagonalReductionRight_64( singularVectors_Left_64,nCh, secDiag_fx,secDiag_fx_e, nChannelsL, nChannelsC, nCh, &g_fx, &g_e ); } pop_wmops(); #endif push_wmops("HouseholderReduction_fx 32"); FOR( jCh = 0; jCh < nChannelsL; jCh++ ) { FOR( iCh = 0; iCh < nChannelsC; iCh++ ) Loading @@ -866,13 +937,14 @@ static void HouseholderReduction_fx( move32(); } } pop_wmops(); /* SingularVecotr Accumulation */ singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsC ); singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_fx_e, singularValues_fx_e, nChannelsL, nChannelsC ); pop_wmops(); return; } Loading @@ -881,7 +953,250 @@ static void HouseholderReduction_fx( * * *-------------------------------------------------------------------------*/ #ifdef MYCHANGES static void biDiagonalReductionLeft_64( Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], Word16 singularVectors_e, Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ Word16 singularValues_e[MAX_OUTPUT_CHANNELS], const Word16 nChannelsL, /* Q0 */ const Word16 nChannelsC, /* Q0 */ const Word16 currChannel /* Q0 */ ) { Word16 iCh, jCh; Word32 norm_x, g; Word16 norm_x_e, g_e; Word64 norm_64; Word16 bitwindow; norm_x=0; move32(); IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ { Word16 n; Word32 tmp; norm_64=0; move64(); bitwindow=add(singularVectors_e,1); FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { tmp=W_extract_l(W_shr(singularVectors_Left_64[jCh][currChannel],bitwindow)); norm_64=W_add(norm_64,W_mult0_32_32(tmp,tmp)); } n=W_norm(norm_64); norm_x=W_extract_h(W_shl(norm_64, n )); norm_x_e = add(sub(shl(singularVectors_e, 1), n), 3 ); } IF ( norm_x ) { Word32 factor2; Word16 tmp_e; Word64 tmpmul; Word64 r_64; // = sqrt(norm)*singularVectors_Left_64[currChannel][currChannel]-norm OR -sqrt(norm)*singularVectors_Left_64[currChannel][currChannel]-norm Word32 r, invVal; Word16 r_e, invVal_e; g_e = norm_x_e; move16(); g = Sqrt32( norm_x, &g_e); IF ( GE_64( singularVectors_Left_64[currChannel][currChannel], 0 ) ) { g = L_negate( g ); } factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], singularVectors_e) ); tmp_e=sub( sub( g_e, singularVectors_e), 2); tmpmul=W_mult0_32_32( g, factor2); tmpmul=W_shl(tmpmul, tmp_e); r_64=W_sub(tmpmul, norm_64); r_e=W_norm(r_64 ); r=W_extract_h( W_shl( r_64, r_e ) ); // r_e = sub( sub( add( add(1, singularVectors_e),g_e), tmp_e), r_e); // 1+singularVectors_e + g_e - ((g_e-singularVectors_e)-2)-r_e r_e = sub( add( 3, add(singularVectors_e, singularVectors_e )), r_e ); invVal_e = r_e; invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, r, &invVal_e); tmp_e = add(32, sub(singularVectors_e, g_e ) ); singularVectors_Left_64[currChannel][currChannel] = W_sub( singularVectors_Left_64[currChannel][currChannel], W_shr( W_deposit32_h( g), tmp_e) ); // here, the exponent goes up. bitwindow=add(singularVectors_e, 2); // so does the bit window FOR ( iCh = currChannel+1; iCh < nChannelsC; iCh++) { Word32 factor1; Word32 factor2; Word32 f; // = norm / r // Word16 f_e; // not really needed norm_64 = 0; for ( jCh = currChannel; jCh<nChannelsL; jCh++ ) { factor1 = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], bitwindow)); factor2 = W_extract_l( W_shr( singularVectors_Left_64[jCh][iCh], bitwindow)); norm_64 = W_add( norm_64, W_mult0_32_32(factor1, factor2)); } norm_x_e = W_norm( norm_64); norm_x = W_extract_h( W_shl( norm_64, norm_x_e) ); f = Mpy_32_32( norm_x, invVal ); // f_e = add( invVal_e, sub(norm_x, r_e ) ); FOR ( jCh = currChannel; jCh < nChannelsL; jCh++ ) { Word16 magic_shift; magic_shift = add( add( norm_x_e, 23), r_e); factor1 = W_extract_l( W_shr(singularVectors_Left_64[jCh][currChannel], bitwindow ) ); singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr(W_mult0_32_32( f, factor1 ), magic_shift ) ); } } move16(); } } static void biDiagonalReductionRight_64( Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], Word16 singularVectors_e, Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ Word16 secDiag_e[MAX_OUTPUT_CHANNELS], const Word16 nChannelsL, /* Q0 */ const Word16 nChannelsC, /* Q0 */ const Word16 currChannel, /* Q0 */ Word32 *g, /* Q31 */ Word16 *g_e ) { Word16 iCh, jCh; Word32 norm_x; Word16 norm_x_e; Word64 norm_64; Word32 abs_x; Word16 abs_x_e; Word64 abs_64; Word16 bitwindow; Word16 idx; secDiag[currChannel] = ( *g ); secDiag_e[currChannel] = ( *g_e ); move32(); move16(); ( *g ) =0; ( *g_e ) = 0; move32(); move16(); IF ( LT_16( currChannel, nChannelsL ) && NE_16( currChannel, sub( nChannelsC, 1 ) ) ) /* i <=m && i !=n */ { Word16 tmp_e; norm_64=0; move64(); bitwindow=add( singularVectors_e, 1); // FIXME: WHY NOT 2???? idx = add( currChannel, 1); FOR ( jCh = idx; jCh < nChannelsC; jCh++ ) { Word32 tmp; tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) ); norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp) ); abs_64 = W_add( abs_64, W_abs( singularVectors_Left_64[currChannel][jCh]) ); } norm_x_e = W_norm( norm_64); norm_x = W_extract_h( W_shl( norm_64, norm_x_e) ); norm_x_e = add( sub( add( singularVectors_e, singularVectors_e), norm_x_e), 3); move16(); abs_x_e = W_norm( abs_64); abs_x = W_extract_h( W_shl( abs_64, abs_x_e) ); abs_x_e = add( sub( add( singularVectors_e, singularVectors_e), abs_x_e), 3); IF ( norm_x ) { Word32 factor1; Word32 factor2; Word16 tmp_e; Word64 tmpmul; Word32 tmp_g; Word16 tmp_g_e; Word16 magic_shift; Word64 r_64; Word32 r; Word16 r_e; Word32 f; Word16 f_e; Word32 invVal; Word16 invVal_e; tmp_g_e = norm_x_e; move16(); tmp_g = Sqrt32( norm_x, &tmp_g_e); IF ( GE_64( singularVectors_Left_64[currChannel][idx],0 ) ) { tmp_g = L_negate( tmp_g); } *g = tmp_g; *g_e = tmp_g_e; move32(); move16(); factor2=W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], bitwindow) ); tmp_e = sub( sub( tmp_g_e, singularVectors_e), 1); tmpmul = W_mult0_32_32( tmp_g, factor2); tmpmul = W_shl(tmpmul, tmp_e); r_64 = W_sub( W_shr( tmpmul, 1), W_shr( norm_64, 1) );// FIXME: why those two shi(f)ts? r_e = W_norm( r_64); r = W_extract_h( W_shl( r_64, r_e) ); r_e = sub( sub( add( bitwindow, tmp_g_e), tmp_e), r_e); // FIXME: reduce this!! invVal_e = r_e; move16(); invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e); magic_shift=32-tmp_g_e; singularVectors_Left_64[currChannel][idx]=W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( tmp_g), magic_shift) ); bitwindow=add(singularVectors_e,2); FOR( iCh = idx; iCh < nChannelsC; iCh++ ) { Word16 magicshift; norm_64 = 0; move64(); FOR ( jCh = idx; jCh<nChannelsL; jCh++ ) { factor1 = W_extract_l(W_shr( singularVectors_Left_64[iCh][jCh], bitwindow) ); factor2 = W_extract_l(W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) ); norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2) ); } norm_x_e = W_norm( norm_64); norm_x = W_extract_h( W_shl( norm_64, norm_x_e) ); f = Mpy_32_32( norm_x, invVal); f_e = add( invVal_e, sub( norm_x_e, r_e) ); magic_shift = -3*currChannel+22-2*norm_x_e+4*r_e+3*f_e; // FIXME: HOW IS THIS WORKING?????!?!?!?!?!?!?!?!?!? FOR( iCh = idx; iCh < nChannelsC; iCh++ ) { factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) ); singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2), magic_shift) ); } } // FIXME BEGIN: The following code has not yet been tested invVal_e = 0; move16(); invVal = BASOP_Util_Divide3232_Scale_newton( abs_x, maxWithSign_fx( r ), &invVal_e); invVal_e = add( invVal_e, sub( abs_x_e, r_e) ); bitwindow = add(singularVectors_e, 1); FOR ( jCh = idx; jCh < nChannelsL ; jCh++ ) { secDiag[jCh] = Mpy_32_32( W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) ), invVal ); move32(); secDiag_e[jCh] = add(invVal_e, bitwindow); move16(); } // FIXME END } } } #endif static void biDiagonalReductionLeft_fx( Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ Loading