Loading lib_dec/ivas_svd_dec_fx.c +161 −189 Original line number Diff line number Diff line Loading @@ -65,16 +65,19 @@ static void HouseholderReduction_fx( Word32 *eps_x_fx, /* exp(eps_x_fx_e) */ Word16 *eps_x_fx_e ); #ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE static void biDiagonalReductionLeft_64( Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], const Word16 nChannelsL, /* Q0 */ static void biDiagonalReductionLeft_fx( Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ Word16 singularValues_e[][MAX_OUTPUT_CHANNELS], /* Q0 */ const Word16 nChannelsL, const Word16 nChannelsC, /* Q0 */ const Word16 currChannel, /* Q0 */ Word32 *g, /* Q31 */ Word32 *g, Word16 *g_e ); static void biDiagonalReductionRight_64( Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], static void biDiagonalReductionRight_fx( Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ Word16 singularVectors_e[][MAX_OUTPUT_CHANNELS], const Word16 nChannelsL, /* Q0 */ const Word16 nChannelsC, /* Q0 */ const Word16 currChannel, /* Q0 */ Loading Loading @@ -840,7 +843,6 @@ static void HouseholderReduction_fx( Word16 nCh; #ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; Word32 g_left_fx = 0; Word16 g_left_e = 0; move32(); Loading @@ -862,40 +864,48 @@ static void HouseholderReduction_fx( Word16 iCh, jCh; Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; #ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE FOR( jCh = 0; jCh < nChannelsL; jCh++ ) { FOR( iCh = 0; iCh < nChannelsC; iCh++ ) { singularVectors_Left_64[jCh][iCh] = W_shr( W_deposit32_h( singularVectors_Left_fx[jCh][iCh] ), 32 ); singularVectors_Left_fx_e[jCh][iCh] = singularVectors_Left_e; move16(); } } FOR( nCh = 0; nCh < nChannelsC; nCh++ ) { biDiagonalReductionLeft_64( singularVectors_Left_64, biDiagonalReductionLeft_fx( singularVectors_Left_fx, singularVectors_Left_fx_e, nChannelsL, nChannelsC, nCh, &g_left_fx, &g_left_e ); singularValues_fx[nCh] = g_left_fx; move32(); singularValues_fx_e[nCh] = add( singularVectors_Left_e, g_left_e ); singularValues_fx_e[nCh] = g_left_e; secDiag_fx[nCh] = g_right_fx; /* from the previous channel */ move32(); secDiag_fx_e[nCh] = add( singularVectors_Left_e, g_right_e ); biDiagonalReductionRight_64( singularVectors_Left_64, secDiag_fx_e[nCh] = g_right_e; biDiagonalReductionRight_fx( singularVectors_Left_fx, singularVectors_Left_fx_e, nChannelsL, nChannelsC, nCh, &g_right_fx, &g_right_e ); { Word16 L_temp_e; Word32 L_temp; L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), secDiag_fx_e[nCh], &L_temp_e ); /* exp(L_temp_e) */ Word32 L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), secDiag_fx_e[nCh], &L_temp_e ); /* exp(L_temp_e) */ IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( L_temp, L_temp_e, *eps_x_fx, *eps_x_fx_e ), 1 ) ) { *eps_x_fx = L_temp; /* exp(L_temp_e) */ Loading @@ -904,20 +914,7 @@ static void HouseholderReduction_fx( move32(); } } } { int i, j; for ( j = 0; j < nChannelsL; j++ ) { for ( i = 0; i < nChannelsC; i++ ) { Word16 n; n = W_norm( singularVectors_Left_64[j][i] ); singularVectors_Left_fx[j][i] = W_extract_h( W_shl( singularVectors_Left_64[j][i], n ) ); singularVectors_Left_fx_e[j][i] = sub( add( 32, singularVectors_Left_e ), n ); } } } #else FOR( jCh = 0; jCh < nChannelsL; jCh++ ) Loading Loading @@ -962,222 +959,197 @@ static void HouseholderReduction_fx( * * *-------------------------------------------------------------------------*/ static void biDiagonalReductionLeft_64( Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], // q(sing) exp(sing) const Word16 nChannelsL, /* Q0 */ static void biDiagonalReductionLeft_fx( Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ Word16 singularVectors_e[][MAX_OUTPUT_CHANNELS], /* Q0 */ const Word16 nChannelsL, const Word16 nChannelsC, /* Q0 */ const Word16 currChannel, /* Q0 */ Word32 *g, Word16 *g_e ) { /* TODO: For some reason, this is optimal. But why? why not ( 32 - 2 * MAGIC_HEADROOM_1 - norm_x_e0 + 1 ) , for example? */ #define MAGIC_HEADROOM_1 2 //#define MAGIC_HEADROOM_2 ( sub( 16, shr( norm_x_e0, 2 ) ) ) //#define MAGIC_HEADROOM_3 ( sub( 16, shr( norm_x_e0, 2 ) ) ) //#define MAGIC_HEADROOM_4 ( sub( 16, shr( norm_x_e0, 2 ) ) ) #define MAGIC_HEADROOM_2 magic_headroom #define MAGIC_HEADROOM_3 magic_headroom #define MAGIC_HEADROOM_4 magic_headroom Word16 iCh, jCh; Word32 norm_x; Word16 norm_x_e; Word64 norm_64; Word16 magic_headroom; Word32 norm_x, f, r; Word16 norm_x_e, f_e, r_e; Word32 L_temp; Word16 L_temp_e; /* Setting values to 0 */ ( *g ) = 0; ( *g_e ) = 0; move32(); move16(); norm_x = 0; move32(); IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ { Word32 tmp; norm_64 = 0; Word64 temp = 0; move64(); norm_x = 0; norm_x_e = 0; Word16 max_e = MIN_16; move16(); FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { tmp = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], MAGIC_HEADROOM_1 ) ); // q(sing)-H1 // exp(sing)+H1 norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) ); // q(norm)=2*q(sing)-2*H1 // exp(norm)=2*exp(sing)+2*H1 } norm_x_e = W_norm( norm_64 ); magic_headroom = sub( 16, shr( norm_x_e, 2 ) ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // q(norm_x)=32-exp(norm_x) exp(norm_x)=exp(norm)-32 max_e = s_max( max_e, singularVectors_e[jCh][currChannel] ); } IF( norm_x ) FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { Word32 factor2; Word16 tmp_e; Word64 tmpmul; temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sub( max_e, singularVectors_e[jCh][currChannel] ), 1 ) ) ); } Word64 r_64; Word32 r, invVal; Word16 r_e, invVal_e; Word16 nrm = W_norm( temp ); nrm = sub( nrm, 32 ); norm_x = W_shl_sat_l( temp, nrm ); norm_x_e = sub( add( max_e, max_e ), nrm ); ( *g_e ) = add( sub( add( MAGIC_HEADROOM_1, MAGIC_HEADROOM_1 ), norm_x_e ), 1 ); // exp(g)=(2*H1-exp(norm_x)+1) IF( ( norm_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */ { Word16 invVal_e; Word32 invVal; L_temp_e = norm_x_e; move16(); ( *g ) = Sqrt32( norm_x, g_e ); // --> exp(g)=((2*H1-exp(norm_x)+1)/2) IF( GE_64( singularVectors_Left_64[currChannel][currChannel], 0 ) ) if (0) L_temp = ISqrt32( norm_x, &L_temp_e ); else L_temp = Sqrt32( norm_x, &L_temp_e ); //( *g ) = L_negate( GE_32( singularVectors[currChannel][idx], 0 ) ? L_temp : L_negate( L_temp ) ); if ( singularVectors[currChannel][currChannel] >= 0 ) { ( *g ) = L_negate( *g ); L_temp = L_negate( L_temp ); } factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], MAGIC_HEADROOM_2 ) ); // q(factor2)=q(sing)-H2 exp(factor2)=exp(qsing)+H2 tmp_e = sub( 2 * MAGIC_HEADROOM_1 - MAGIC_HEADROOM_2, ( *g_e ) ); tmpmul = W_mult0_32_32( ( *g ), factor2 ); // q(tmpmul)=q(g)+q(factor2) --> q(tmpmul) ~= q(norm) tmpmul = W_shr( tmpmul, tmp_e ); // --> q(tmpmul)=q(g)+q(factor2)-(2*H1-H2-q(g)) r_64 = W_sub( tmpmul, norm_64 ); // q(r_64)=max(q(tmpmul),q(norm)) r_e = W_norm( r_64 ); r = W_extract_h( W_shl( r_64, r_e ) ); ( *g ) = L_temp; move32(); *g_e = L_temp_e; r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][currChannel] ), singularVectors_e[currChannel][currChannel] + L_temp_e, -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ singularVectors[currChannel][currChannel] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][currChannel], singularVectors_e[currChannel][currChannel], -( *g ), *g_e, &singularVectors_e[currChannel][currChannel] ); /* sing_exp */ move32(); invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); invVal_e = 0; FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */ { Word16 max2_e = MIN_16; max_e = MIN_16; move16(); invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); // invVal=1/r --> q(invVal)=-q(r) tmp_e = sub( 32, *g_e ); singularVectors_Left_64[currChannel][currChannel] = W_sub( singularVectors_Left_64[currChannel][currChannel], W_shr( W_deposit32_h( *g ), tmp_e ) ); // q(sing)=max(q(sing),q(r)-(2*H1-H2-exp(r)) move16(); temp = 0; move64(); FOR( iCh = add( currChannel, 1 ); iCh < nChannelsC; iCh++ ) FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { Word32 factor1; Word32 factor2; Word32 f; // = norm / r Word16 magic_shift; max_e = s_max( max_e, singularVectors_e[jCh][currChannel] ); /* exp(norm_x_e) */ max2_e = s_max( max2_e, singularVectors_e[jCh][iCh] ); /* exp(norm_x_e) */ } max_e = add( max_e, max2_e ); norm_64 = 0; for ( jCh = currChannel; jCh < nChannelsL; jCh++ ) FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], sub( 32, MAGIC_HEADROOM_3 ) ) ); // q(factor1) = q(sing)-H3 factor2 = W_extract_h( W_shl( singularVectors_Left_64[jCh][iCh], sub( 32, MAGIC_HEADROOM_3 ) ) ); // q(factor2) = q(sing)-H3 norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) ); // q(norm)=2*q(sing)-2*H3 temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), sub( max_e, add( singularVectors_e[jCh][currChannel], singularVectors_e[jCh][iCh] ) ) ) ); } norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // Note: different norm f = Mpy_32_32( norm_x, invVal ); // q(f)=q(norm_x)-q(invVal) // magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e; magic_shift = sub( norm_x_e, shl( MAGIC_HEADROOM_3, 1 ) ); magic_shift = sub( magic_shift, sub( r_e, ( shl( MAGIC_HEADROOM_1, 1 ) ) ) ); magic_shift = add( magic_shift, sub( 32, MAGIC_HEADROOM_4 ) ); magic_shift = sub( magic_shift, shl( invVal_e, 1 ) ); FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) Word16 nrm = W_norm( temp ); nrm = sub( nrm, 32 ); norm_x = W_shl_sat_l( temp, nrm ); norm_x_e = sub( max_e, nrm ); f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */ f_e = add( invVal_e, sub( norm_x_e, r_e ) ); FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], sub( 32, MAGIC_HEADROOM_4 ) ) ); singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr( W_mult0_32_32( f, factor1 ), magic_shift ) ); singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], singularVectors_e[jCh][iCh], Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, singularVectors_e[jCh][currChannel] ), &singularVectors_e[jCh][iCh] ); move32(); } } } } return; } /*------------------------------------------------------------------------- * biDiagonalReductionRight() * * *-------------------------------------------------------------------------*/ static void biDiagonalReductionRight_64( Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], static void biDiagonalReductionRight_fx( Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ Word16 singularVectors_e[][MAX_OUTPUT_CHANNELS], const Word16 nChannelsL, /* Q0 */ const Word16 nChannelsC, /* Q0 */ const Word16 currChannel, /* Q0 */ Word32 *g, /* Q31 */ Word16 *g_e ) Word16 *g_e ) { Word16 iCh, jCh; Word32 norm_x; Word16 norm_x_e; Word64 norm_64; Word16 idx; Word16 magic_headroom; Word16 iCh, jCh, idx; Word32 norm_x, r; Word16 norm_x_e, r_e; Word32 L_temp; Word16 L_temp_e; /* Setting values to 0 */ ( *g ) = 0; ( *g_e ) = 0; move32(); move16(); IF( LT_16( currChannel, nChannelsL ) && NE_16( currChannel, sub( nChannelsC, 1 ) ) ) /* i <=m && i !=n */ { norm_64 = 0; move64(); idx = add( currChannel, 1 ); FOR( jCh = idx; jCh < nChannelsC; jCh++ ) idx = add( currChannel, 1 ); /* Q0 */ norm_x = 0; move32(); norm_x_e = 0; move16(); FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */ { Word32 tmp; tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], MAGIC_HEADROOM_1 ) ); // q(sing)-H1 // exp(sing)+H1 norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) ); // q(norm)=2*q(sing)-2*H1 // exp(norm)=2*exp(sing)+2*H1 norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( singularVectors_e[currChannel][jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ } norm_x_e = W_norm( norm_64 ); magic_headroom = sub( 16, shr( norm_x_e, 2 ) ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // q(norm_x)=32-exp(norm_x) exp(norm_x)=exp(norm)-32 move16(); IF( norm_x ) IF( ( norm_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */ { Word32 factor1; Word32 factor2; Word16 tmp_e; Word64 tmpmul; Word16 magic_shift; Word64 r_64; Word32 r; Word16 r_e; Word32 f; Word16 invVal_e, temp_e; Word32 invVal; Word16 invVal_e; ( *g_e ) = add( sub( ( MAGIC_HEADROOM_1 + MAGIC_HEADROOM_1 ), norm_x_e ), 1 ); // exp(g)=(2*H1-exp(norm_x)+1) L_temp_e = norm_x_e; move16(); ( *g ) = Sqrt32( norm_x, g_e ); // --> exp(g)=((2*H1-exp(norm_x)+1)/2) IF( GE_64( singularVectors_Left_64[currChannel][idx], 0 ) ) L_temp = Sqrt32( norm_x, &L_temp_e ); //L_temp = L_shl_r( L_temp, L_temp_e ); // Q31 IF( singularVectors[currChannel][idx] >= 0 ) { ( *g ) = L_negate( *g ); ( *g ) = L_negate( L_temp ); /* exp(L_temp_e) */ move32(); } ELSE { ( *g ) = L_negate( L_negate( L_temp ) ); /* exp(L_temp_e) */ move32(); } *g_e = L_temp_e; r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors_e[currChannel][idx] + (*g_e), -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors_e[currChannel][idx], -( *g ), *g_e, &singularVectors_e[currChannel][idx] ); /* exp(sing_exp) */ move32(); move16(); factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], MAGIC_HEADROOM_2 ) ); // q(factor2)=q(sing)-H2 exp(factor2)=exp(qsing)+H2 tmp_e = sub( 2 * MAGIC_HEADROOM_1 - MAGIC_HEADROOM_2, *g_e ); tmpmul = W_mult0_32_32( *g, factor2 ); // q(tmpmul)=q(g)+q(factor2) tmpmul = W_shr( tmpmul, tmp_e ); // --> q(tmpmul)=q(g)+q(factor2)-(2*H1-H2-q(g)) r_64 = W_sub( tmpmul, norm_64 ); // q(r_64)=max(q(tmpmul),q(norm)) r_e = W_norm( r_64 ); r = W_extract_h( W_shl( r_64, r_e ) ); invVal_e = 0; move16(); invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); // invVal=1/r --> q(invVal)=-q(r) tmp_e = sub( 32, *g_e ); singularVectors_Left_64[currChannel][idx] = W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( *g ), tmp_e ) ); // q(sing)=max(q(sing),q(r)-(2*H1-H2-exp(r))) invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); FOR( iCh = idx; iCh < nChannelsL; iCh++ ) FOR( iCh = currChannel + 1; iCh < nChannelsL; iCh++ ) /* nChannelsL */ { norm_64 = 0; move64(); FOR( jCh = idx; jCh < nChannelsC; jCh++ ) norm_x = 0; move32(); norm_x_e = 0; move16(); FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ { factor1 = W_extract_h( W_shl( singularVectors_Left_64[iCh][jCh], sub( 32, MAGIC_HEADROOM_3 ) ) ); // q(factor1) = q(sing)-H3 factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], sub( 32, MAGIC_HEADROOM_3 ) ) ); // q(factor2) = q(sing)-H3 norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) ); // q(norm)=2*q(sing)-2*H3 norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( singularVectors_e[iCh][jCh], singularVectors_e[currChannel][jCh] ), &norm_x_e ); /* exp(norm_x_e) */ } norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // Note: different norm f = Mpy_32_32( norm_x, invVal ); // q(f)=q(norm_x)-q(invVal) // magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e; magic_shift = sub( norm_x_e, shl( MAGIC_HEADROOM_3, 1 ) ); magic_shift = sub( magic_shift, sub( r_e, ( shl( MAGIC_HEADROOM_1, 1 ) ) ) ); magic_shift = add( magic_shift, sub( 32, MAGIC_HEADROOM_4 ) ); magic_shift = sub( magic_shift, shl( invVal_e, 1 ) ); norm_x = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */ norm_x_e = add( invVal_e, sub( norm_x_e, r_e ) ); FOR( jCh = idx; jCh < nChannelsC; jCh++ ) FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ { factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], sub( 32, MAGIC_HEADROOM_4 ) ) ); singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2 ), magic_shift ) ); singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors_e[iCh][jCh], Mpy_32_32( norm_x, singularVectors[currChannel][jCh] ), add( norm_x_e, singularVectors_e[currChannel][jCh] ), &singularVectors_e[iCh][jCh] ); /* exp(sing_exp2) */ move32(); } } } } return; } #else /*------------------------------------------------------------------------- Loading Loading
lib_dec/ivas_svd_dec_fx.c +161 −189 Original line number Diff line number Diff line Loading @@ -65,16 +65,19 @@ static void HouseholderReduction_fx( Word32 *eps_x_fx, /* exp(eps_x_fx_e) */ Word16 *eps_x_fx_e ); #ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE static void biDiagonalReductionLeft_64( Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], const Word16 nChannelsL, /* Q0 */ static void biDiagonalReductionLeft_fx( Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ Word16 singularValues_e[][MAX_OUTPUT_CHANNELS], /* Q0 */ const Word16 nChannelsL, const Word16 nChannelsC, /* Q0 */ const Word16 currChannel, /* Q0 */ Word32 *g, /* Q31 */ Word32 *g, Word16 *g_e ); static void biDiagonalReductionRight_64( Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], static void biDiagonalReductionRight_fx( Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ Word16 singularVectors_e[][MAX_OUTPUT_CHANNELS], const Word16 nChannelsL, /* Q0 */ const Word16 nChannelsC, /* Q0 */ const Word16 currChannel, /* Q0 */ Loading Loading @@ -840,7 +843,6 @@ static void HouseholderReduction_fx( Word16 nCh; #ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; Word32 g_left_fx = 0; Word16 g_left_e = 0; move32(); Loading @@ -862,40 +864,48 @@ static void HouseholderReduction_fx( Word16 iCh, jCh; Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; #ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE FOR( jCh = 0; jCh < nChannelsL; jCh++ ) { FOR( iCh = 0; iCh < nChannelsC; iCh++ ) { singularVectors_Left_64[jCh][iCh] = W_shr( W_deposit32_h( singularVectors_Left_fx[jCh][iCh] ), 32 ); singularVectors_Left_fx_e[jCh][iCh] = singularVectors_Left_e; move16(); } } FOR( nCh = 0; nCh < nChannelsC; nCh++ ) { biDiagonalReductionLeft_64( singularVectors_Left_64, biDiagonalReductionLeft_fx( singularVectors_Left_fx, singularVectors_Left_fx_e, nChannelsL, nChannelsC, nCh, &g_left_fx, &g_left_e ); singularValues_fx[nCh] = g_left_fx; move32(); singularValues_fx_e[nCh] = add( singularVectors_Left_e, g_left_e ); singularValues_fx_e[nCh] = g_left_e; secDiag_fx[nCh] = g_right_fx; /* from the previous channel */ move32(); secDiag_fx_e[nCh] = add( singularVectors_Left_e, g_right_e ); biDiagonalReductionRight_64( singularVectors_Left_64, secDiag_fx_e[nCh] = g_right_e; biDiagonalReductionRight_fx( singularVectors_Left_fx, singularVectors_Left_fx_e, nChannelsL, nChannelsC, nCh, &g_right_fx, &g_right_e ); { Word16 L_temp_e; Word32 L_temp; L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), secDiag_fx_e[nCh], &L_temp_e ); /* exp(L_temp_e) */ Word32 L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), secDiag_fx_e[nCh], &L_temp_e ); /* exp(L_temp_e) */ IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( L_temp, L_temp_e, *eps_x_fx, *eps_x_fx_e ), 1 ) ) { *eps_x_fx = L_temp; /* exp(L_temp_e) */ Loading @@ -904,20 +914,7 @@ static void HouseholderReduction_fx( move32(); } } } { int i, j; for ( j = 0; j < nChannelsL; j++ ) { for ( i = 0; i < nChannelsC; i++ ) { Word16 n; n = W_norm( singularVectors_Left_64[j][i] ); singularVectors_Left_fx[j][i] = W_extract_h( W_shl( singularVectors_Left_64[j][i], n ) ); singularVectors_Left_fx_e[j][i] = sub( add( 32, singularVectors_Left_e ), n ); } } } #else FOR( jCh = 0; jCh < nChannelsL; jCh++ ) Loading Loading @@ -962,222 +959,197 @@ static void HouseholderReduction_fx( * * *-------------------------------------------------------------------------*/ static void biDiagonalReductionLeft_64( Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], // q(sing) exp(sing) const Word16 nChannelsL, /* Q0 */ static void biDiagonalReductionLeft_fx( Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ Word16 singularVectors_e[][MAX_OUTPUT_CHANNELS], /* Q0 */ const Word16 nChannelsL, const Word16 nChannelsC, /* Q0 */ const Word16 currChannel, /* Q0 */ Word32 *g, Word16 *g_e ) { /* TODO: For some reason, this is optimal. But why? why not ( 32 - 2 * MAGIC_HEADROOM_1 - norm_x_e0 + 1 ) , for example? */ #define MAGIC_HEADROOM_1 2 //#define MAGIC_HEADROOM_2 ( sub( 16, shr( norm_x_e0, 2 ) ) ) //#define MAGIC_HEADROOM_3 ( sub( 16, shr( norm_x_e0, 2 ) ) ) //#define MAGIC_HEADROOM_4 ( sub( 16, shr( norm_x_e0, 2 ) ) ) #define MAGIC_HEADROOM_2 magic_headroom #define MAGIC_HEADROOM_3 magic_headroom #define MAGIC_HEADROOM_4 magic_headroom Word16 iCh, jCh; Word32 norm_x; Word16 norm_x_e; Word64 norm_64; Word16 magic_headroom; Word32 norm_x, f, r; Word16 norm_x_e, f_e, r_e; Word32 L_temp; Word16 L_temp_e; /* Setting values to 0 */ ( *g ) = 0; ( *g_e ) = 0; move32(); move16(); norm_x = 0; move32(); IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ { Word32 tmp; norm_64 = 0; Word64 temp = 0; move64(); norm_x = 0; norm_x_e = 0; Word16 max_e = MIN_16; move16(); FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { tmp = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], MAGIC_HEADROOM_1 ) ); // q(sing)-H1 // exp(sing)+H1 norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) ); // q(norm)=2*q(sing)-2*H1 // exp(norm)=2*exp(sing)+2*H1 } norm_x_e = W_norm( norm_64 ); magic_headroom = sub( 16, shr( norm_x_e, 2 ) ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // q(norm_x)=32-exp(norm_x) exp(norm_x)=exp(norm)-32 max_e = s_max( max_e, singularVectors_e[jCh][currChannel] ); } IF( norm_x ) FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { Word32 factor2; Word16 tmp_e; Word64 tmpmul; temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sub( max_e, singularVectors_e[jCh][currChannel] ), 1 ) ) ); } Word64 r_64; Word32 r, invVal; Word16 r_e, invVal_e; Word16 nrm = W_norm( temp ); nrm = sub( nrm, 32 ); norm_x = W_shl_sat_l( temp, nrm ); norm_x_e = sub( add( max_e, max_e ), nrm ); ( *g_e ) = add( sub( add( MAGIC_HEADROOM_1, MAGIC_HEADROOM_1 ), norm_x_e ), 1 ); // exp(g)=(2*H1-exp(norm_x)+1) IF( ( norm_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */ { Word16 invVal_e; Word32 invVal; L_temp_e = norm_x_e; move16(); ( *g ) = Sqrt32( norm_x, g_e ); // --> exp(g)=((2*H1-exp(norm_x)+1)/2) IF( GE_64( singularVectors_Left_64[currChannel][currChannel], 0 ) ) if (0) L_temp = ISqrt32( norm_x, &L_temp_e ); else L_temp = Sqrt32( norm_x, &L_temp_e ); //( *g ) = L_negate( GE_32( singularVectors[currChannel][idx], 0 ) ? L_temp : L_negate( L_temp ) ); if ( singularVectors[currChannel][currChannel] >= 0 ) { ( *g ) = L_negate( *g ); L_temp = L_negate( L_temp ); } factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], MAGIC_HEADROOM_2 ) ); // q(factor2)=q(sing)-H2 exp(factor2)=exp(qsing)+H2 tmp_e = sub( 2 * MAGIC_HEADROOM_1 - MAGIC_HEADROOM_2, ( *g_e ) ); tmpmul = W_mult0_32_32( ( *g ), factor2 ); // q(tmpmul)=q(g)+q(factor2) --> q(tmpmul) ~= q(norm) tmpmul = W_shr( tmpmul, tmp_e ); // --> q(tmpmul)=q(g)+q(factor2)-(2*H1-H2-q(g)) r_64 = W_sub( tmpmul, norm_64 ); // q(r_64)=max(q(tmpmul),q(norm)) r_e = W_norm( r_64 ); r = W_extract_h( W_shl( r_64, r_e ) ); ( *g ) = L_temp; move32(); *g_e = L_temp_e; r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][currChannel] ), singularVectors_e[currChannel][currChannel] + L_temp_e, -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ singularVectors[currChannel][currChannel] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][currChannel], singularVectors_e[currChannel][currChannel], -( *g ), *g_e, &singularVectors_e[currChannel][currChannel] ); /* sing_exp */ move32(); invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); invVal_e = 0; FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */ { Word16 max2_e = MIN_16; max_e = MIN_16; move16(); invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); // invVal=1/r --> q(invVal)=-q(r) tmp_e = sub( 32, *g_e ); singularVectors_Left_64[currChannel][currChannel] = W_sub( singularVectors_Left_64[currChannel][currChannel], W_shr( W_deposit32_h( *g ), tmp_e ) ); // q(sing)=max(q(sing),q(r)-(2*H1-H2-exp(r)) move16(); temp = 0; move64(); FOR( iCh = add( currChannel, 1 ); iCh < nChannelsC; iCh++ ) FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { Word32 factor1; Word32 factor2; Word32 f; // = norm / r Word16 magic_shift; max_e = s_max( max_e, singularVectors_e[jCh][currChannel] ); /* exp(norm_x_e) */ max2_e = s_max( max2_e, singularVectors_e[jCh][iCh] ); /* exp(norm_x_e) */ } max_e = add( max_e, max2_e ); norm_64 = 0; for ( jCh = currChannel; jCh < nChannelsL; jCh++ ) FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], sub( 32, MAGIC_HEADROOM_3 ) ) ); // q(factor1) = q(sing)-H3 factor2 = W_extract_h( W_shl( singularVectors_Left_64[jCh][iCh], sub( 32, MAGIC_HEADROOM_3 ) ) ); // q(factor2) = q(sing)-H3 norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) ); // q(norm)=2*q(sing)-2*H3 temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), sub( max_e, add( singularVectors_e[jCh][currChannel], singularVectors_e[jCh][iCh] ) ) ) ); } norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // Note: different norm f = Mpy_32_32( norm_x, invVal ); // q(f)=q(norm_x)-q(invVal) // magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e; magic_shift = sub( norm_x_e, shl( MAGIC_HEADROOM_3, 1 ) ); magic_shift = sub( magic_shift, sub( r_e, ( shl( MAGIC_HEADROOM_1, 1 ) ) ) ); magic_shift = add( magic_shift, sub( 32, MAGIC_HEADROOM_4 ) ); magic_shift = sub( magic_shift, shl( invVal_e, 1 ) ); FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) Word16 nrm = W_norm( temp ); nrm = sub( nrm, 32 ); norm_x = W_shl_sat_l( temp, nrm ); norm_x_e = sub( max_e, nrm ); f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */ f_e = add( invVal_e, sub( norm_x_e, r_e ) ); FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], sub( 32, MAGIC_HEADROOM_4 ) ) ); singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr( W_mult0_32_32( f, factor1 ), magic_shift ) ); singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], singularVectors_e[jCh][iCh], Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, singularVectors_e[jCh][currChannel] ), &singularVectors_e[jCh][iCh] ); move32(); } } } } return; } /*------------------------------------------------------------------------- * biDiagonalReductionRight() * * *-------------------------------------------------------------------------*/ static void biDiagonalReductionRight_64( Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], static void biDiagonalReductionRight_fx( Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ Word16 singularVectors_e[][MAX_OUTPUT_CHANNELS], const Word16 nChannelsL, /* Q0 */ const Word16 nChannelsC, /* Q0 */ const Word16 currChannel, /* Q0 */ Word32 *g, /* Q31 */ Word16 *g_e ) Word16 *g_e ) { Word16 iCh, jCh; Word32 norm_x; Word16 norm_x_e; Word64 norm_64; Word16 idx; Word16 magic_headroom; Word16 iCh, jCh, idx; Word32 norm_x, r; Word16 norm_x_e, r_e; Word32 L_temp; Word16 L_temp_e; /* Setting values to 0 */ ( *g ) = 0; ( *g_e ) = 0; move32(); move16(); IF( LT_16( currChannel, nChannelsL ) && NE_16( currChannel, sub( nChannelsC, 1 ) ) ) /* i <=m && i !=n */ { norm_64 = 0; move64(); idx = add( currChannel, 1 ); FOR( jCh = idx; jCh < nChannelsC; jCh++ ) idx = add( currChannel, 1 ); /* Q0 */ norm_x = 0; move32(); norm_x_e = 0; move16(); FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */ { Word32 tmp; tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], MAGIC_HEADROOM_1 ) ); // q(sing)-H1 // exp(sing)+H1 norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) ); // q(norm)=2*q(sing)-2*H1 // exp(norm)=2*exp(sing)+2*H1 norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( singularVectors_e[currChannel][jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ } norm_x_e = W_norm( norm_64 ); magic_headroom = sub( 16, shr( norm_x_e, 2 ) ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // q(norm_x)=32-exp(norm_x) exp(norm_x)=exp(norm)-32 move16(); IF( norm_x ) IF( ( norm_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */ { Word32 factor1; Word32 factor2; Word16 tmp_e; Word64 tmpmul; Word16 magic_shift; Word64 r_64; Word32 r; Word16 r_e; Word32 f; Word16 invVal_e, temp_e; Word32 invVal; Word16 invVal_e; ( *g_e ) = add( sub( ( MAGIC_HEADROOM_1 + MAGIC_HEADROOM_1 ), norm_x_e ), 1 ); // exp(g)=(2*H1-exp(norm_x)+1) L_temp_e = norm_x_e; move16(); ( *g ) = Sqrt32( norm_x, g_e ); // --> exp(g)=((2*H1-exp(norm_x)+1)/2) IF( GE_64( singularVectors_Left_64[currChannel][idx], 0 ) ) L_temp = Sqrt32( norm_x, &L_temp_e ); //L_temp = L_shl_r( L_temp, L_temp_e ); // Q31 IF( singularVectors[currChannel][idx] >= 0 ) { ( *g ) = L_negate( *g ); ( *g ) = L_negate( L_temp ); /* exp(L_temp_e) */ move32(); } ELSE { ( *g ) = L_negate( L_negate( L_temp ) ); /* exp(L_temp_e) */ move32(); } *g_e = L_temp_e; r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors_e[currChannel][idx] + (*g_e), -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors_e[currChannel][idx], -( *g ), *g_e, &singularVectors_e[currChannel][idx] ); /* exp(sing_exp) */ move32(); move16(); factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], MAGIC_HEADROOM_2 ) ); // q(factor2)=q(sing)-H2 exp(factor2)=exp(qsing)+H2 tmp_e = sub( 2 * MAGIC_HEADROOM_1 - MAGIC_HEADROOM_2, *g_e ); tmpmul = W_mult0_32_32( *g, factor2 ); // q(tmpmul)=q(g)+q(factor2) tmpmul = W_shr( tmpmul, tmp_e ); // --> q(tmpmul)=q(g)+q(factor2)-(2*H1-H2-q(g)) r_64 = W_sub( tmpmul, norm_64 ); // q(r_64)=max(q(tmpmul),q(norm)) r_e = W_norm( r_64 ); r = W_extract_h( W_shl( r_64, r_e ) ); invVal_e = 0; move16(); invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); // invVal=1/r --> q(invVal)=-q(r) tmp_e = sub( 32, *g_e ); singularVectors_Left_64[currChannel][idx] = W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( *g ), tmp_e ) ); // q(sing)=max(q(sing),q(r)-(2*H1-H2-exp(r))) invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); FOR( iCh = idx; iCh < nChannelsL; iCh++ ) FOR( iCh = currChannel + 1; iCh < nChannelsL; iCh++ ) /* nChannelsL */ { norm_64 = 0; move64(); FOR( jCh = idx; jCh < nChannelsC; jCh++ ) norm_x = 0; move32(); norm_x_e = 0; move16(); FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ { factor1 = W_extract_h( W_shl( singularVectors_Left_64[iCh][jCh], sub( 32, MAGIC_HEADROOM_3 ) ) ); // q(factor1) = q(sing)-H3 factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], sub( 32, MAGIC_HEADROOM_3 ) ) ); // q(factor2) = q(sing)-H3 norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) ); // q(norm)=2*q(sing)-2*H3 norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( singularVectors_e[iCh][jCh], singularVectors_e[currChannel][jCh] ), &norm_x_e ); /* exp(norm_x_e) */ } norm_x_e = W_norm( norm_64 ); norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // Note: different norm f = Mpy_32_32( norm_x, invVal ); // q(f)=q(norm_x)-q(invVal) // magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e; magic_shift = sub( norm_x_e, shl( MAGIC_HEADROOM_3, 1 ) ); magic_shift = sub( magic_shift, sub( r_e, ( shl( MAGIC_HEADROOM_1, 1 ) ) ) ); magic_shift = add( magic_shift, sub( 32, MAGIC_HEADROOM_4 ) ); magic_shift = sub( magic_shift, shl( invVal_e, 1 ) ); norm_x = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */ norm_x_e = add( invVal_e, sub( norm_x_e, r_e ) ); FOR( jCh = idx; jCh < nChannelsC; jCh++ ) FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ { factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], sub( 32, MAGIC_HEADROOM_4 ) ) ); singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2 ), magic_shift ) ); singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors_e[iCh][jCh], Mpy_32_32( norm_x, singularVectors[currChannel][jCh] ), add( norm_x_e, singularVectors_e[currChannel][jCh] ), &singularVectors_e[iCh][jCh] ); /* exp(sing_exp2) */ move32(); } } } } return; } #else /*------------------------------------------------------------------------- Loading