compiles and runs. (184ae3a4) · Commits · SA4 / Audio / IVAS BASOP

lib_dec/ivas_svd_dec_fx.c

+317 −2

Original line number	Diff line number	Diff line
		@@ -29,7 +29,7 @@
		the United Nations Convention on Contracts on the International Sales of Goods.

		*******************************************************************************************************/

		#define MYCHANGES
		#include <stdint.h>
		#include "options.h"
		#include "prot_fx.h"
		@@ -65,7 +65,30 @@ static void HouseholderReduction_fx(
		const Word16 nChannelsC, /* Q0 */
		Word32 eps_x_fx, / exp(eps_x_fx_e) */
		Word16 *eps_x_fx_e );
		#ifdef MYCHANGES
		static void biDiagonalReductionLeft_64(
		Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
		Word16 singularVectors_e,
		Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */
		Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
		const Word16 nChannelsL, /* Q0 */
		const Word16 nChannelsC, /* Q0 */
		const Word16 currChannel /* Q0 */
		);

		static void biDiagonalReductionRight_64(
		Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
		Word16 singularVectors_e,
		Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */
		Word16 secDiag_e[MAX_OUTPUT_CHANNELS],
		const Word16 nChannelsL, /* Q0 */
		const Word16 nChannelsC, /* Q0 */
		const Word16 currChannel, /* Q0 */
		Word32 g, / Q31 */
		Word16 *g_e
		);

		#endif
		static void biDiagonalReductionLeft_fx(
		Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
		Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */
		@@ -832,15 +855,63 @@ static void HouseholderReduction_fx(
		Word16 *eps_x_fx_e )
		{
		Word16 nCh;
		push_wmops("HouseholderReduction_fx");
		#ifdef MYCHANGES
		Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
		Word32 g_fx = 0;
		Word16 g_e = 0;
		move32();
		move16();

		Word32 sig_x_fx = 0;
		Word16 sig_x_fx_e = 0;
		move32();
		move16();
		#else

		// float g = 0.0f, sig_x = 0.0f;// to be removed
		Word32 g_fx = 0, sig_x_fx = 0;
		move32();
		move32();
		Word16 sig_x_fx_e = 0;
		move16();
		#endif

		Word16 iCh, jCh;
		Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
		#ifdef MYCHANGES
		push_wmops("HouseholderReduction_fx 64");
		FOR( jCh = 0; jCh < nChannelsL; jCh++ )
		{
		FOR( iCh = 0; iCh < nChannelsC; iCh++ )
		{
		singularVectors_Left_64[jCh][iCh] = W_shr(W_deposit32_h(singularVectors_Left_fx[jCh][iCh]),32);
		}
		}
		for (nCh=0;nCh<nChannelsC;nCh++)
		{
		biDiagonalReductionLeft_64(
		singularVectors_Left_64,0,
		singularValues_fx,singularValues_fx_e,
		nChannelsL,
		nChannelsC,
		nCh
		);


		biDiagonalReductionRight_64(
		singularVectors_Left_64,nCh,
		secDiag_fx,secDiag_fx_e,
		nChannelsL,
		nChannelsC,
		nCh,
		&g_fx,
		&g_e
		);
		}
		pop_wmops();
		#endif
		push_wmops("HouseholderReduction_fx 32");
		FOR( jCh = 0; jCh < nChannelsL; jCh++ )
		{
		FOR( iCh = 0; iCh < nChannelsC; iCh++ )
		@@ -866,13 +937,14 @@ static void HouseholderReduction_fx(
		move32();
		}
		}
		pop_wmops();

		/* SingularVecotr Accumulation */
		singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsC );


		singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_fx_e, singularValues_fx_e, nChannelsL, nChannelsC );

		pop_wmops();
		return;
		}

		@@ -881,7 +953,250 @@ static void HouseholderReduction_fx(
		*
		*
		-------------------------------------------------------------------------/
		#ifdef MYCHANGES
		static void biDiagonalReductionLeft_64(
		Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
		Word16 singularVectors_e,
		Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */
		Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
		const Word16 nChannelsL, /* Q0 */
		const Word16 nChannelsC, /* Q0 */
		const Word16 currChannel /* Q0 */
		)
		{
		Word16 iCh, jCh;
		Word32 norm_x, g;
		Word16 norm_x_e, g_e;
		Word64 norm_64;
		Word16 bitwindow;
		norm_x=0;
		move32();
		IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
		{
		Word16 n;
		Word32 tmp;
		norm_64=0;
		move64();
		bitwindow=add(singularVectors_e,1);
		FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
		{
		tmp=W_extract_l(W_shr(singularVectors_Left_64[jCh][currChannel],bitwindow));
		norm_64=W_add(norm_64,W_mult0_32_32(tmp,tmp));
		}
		n=W_norm(norm_64);
		norm_x=W_extract_h(W_shl(norm_64, n ));
		norm_x_e = add(sub(shl(singularVectors_e, 1), n), 3 );
		}
		IF ( norm_x )
		{
		Word32 factor2;
		Word16 tmp_e;
		Word64 tmpmul;

		Word64 r_64; // = sqrt(norm)singularVectors_Left_64[currChannel][currChannel]-norm OR -sqrt(norm)singularVectors_Left_64[currChannel][currChannel]-norm
		Word32 r, invVal;
		Word16 r_e, invVal_e;

		g_e = norm_x_e;
		move16();
		g = Sqrt32( norm_x, &g_e);
		IF ( GE_64( singularVectors_Left_64[currChannel][currChannel], 0 ) )
		{
		g = L_negate( g );
		}
		factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], singularVectors_e) );
		tmp_e=sub( sub( g_e, singularVectors_e), 2);
		tmpmul=W_mult0_32_32( g, factor2);
		tmpmul=W_shl(tmpmul, tmp_e);
		r_64=W_sub(tmpmul, norm_64);
		r_e=W_norm(r_64 );
		r=W_extract_h( W_shl( r_64, r_e ) );
		// r_e = sub( sub( add( add(1, singularVectors_e),g_e), tmp_e), r_e); // 1+singularVectors_e + g_e - ((g_e-singularVectors_e)-2)-r_e
		r_e = sub( add( 3, add(singularVectors_e, singularVectors_e )), r_e );


		invVal_e = r_e;
		invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, r, &invVal_e);


		tmp_e = add(32, sub(singularVectors_e, g_e ) );
		singularVectors_Left_64[currChannel][currChannel] = W_sub( singularVectors_Left_64[currChannel][currChannel], W_shr( W_deposit32_h( g), tmp_e) ); // here, the exponent goes up.

		bitwindow=add(singularVectors_e, 2); // so does the bit window
		FOR ( iCh = currChannel+1; iCh < nChannelsC; iCh++)
		{
		Word32 factor1;
		Word32 factor2;
		Word32 f; // = norm / r
		// Word16 f_e; // not really needed

		norm_64 = 0;
		for ( jCh = currChannel; jCh<nChannelsL; jCh++ )
		{
		factor1 = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], bitwindow));
		factor2 = W_extract_l( W_shr( singularVectors_Left_64[jCh][iCh], bitwindow));
		norm_64 = W_add( norm_64, W_mult0_32_32(factor1, factor2));
		}
		norm_x_e = W_norm( norm_64);
		norm_x = W_extract_h( W_shl( norm_64, norm_x_e) );
		f = Mpy_32_32( norm_x, invVal );
		// f_e = add( invVal_e, sub(norm_x, r_e ) );
		FOR ( jCh = currChannel; jCh < nChannelsL; jCh++ )
		{
		Word16 magic_shift;
		magic_shift = add( add( norm_x_e, 23), r_e);
		factor1 = W_extract_l( W_shr(singularVectors_Left_64[jCh][currChannel], bitwindow ) );
		singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr(W_mult0_32_32( f, factor1 ), magic_shift ) );
		}
		}
		move16();
		}
		}

		static void biDiagonalReductionRight_64(
		Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
		Word16 singularVectors_e,
		Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */
		Word16 secDiag_e[MAX_OUTPUT_CHANNELS],
		const Word16 nChannelsL, /* Q0 */
		const Word16 nChannelsC, /* Q0 */
		const Word16 currChannel, /* Q0 */
		Word32 g, / Q31 */
		Word16 *g_e
		)
		{
		Word16 iCh, jCh;
		Word32 norm_x;
		Word16 norm_x_e;
		Word64 norm_64;
		Word32 abs_x;
		Word16 abs_x_e;
		Word64 abs_64;
		Word16 bitwindow;
		Word16 idx;



		secDiag[currChannel] = ( *g );
		secDiag_e[currChannel] = ( *g_e );
		move32();
		move16();

		( *g ) =0;
		( *g_e ) = 0;
		move32();
		move16();

		IF ( LT_16( currChannel, nChannelsL ) && NE_16( currChannel, sub( nChannelsC, 1 ) ) ) /* i <=m && i !=n */
		{
		Word16 tmp_e;
		norm_64=0;
		move64();
		bitwindow=add( singularVectors_e, 1); // FIXME: WHY NOT 2????
		idx = add( currChannel, 1);
		FOR ( jCh = idx; jCh < nChannelsC; jCh++ )
		{
		Word32 tmp;
		tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) );
		norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp) );
		abs_64 = W_add( abs_64, W_abs( singularVectors_Left_64[currChannel][jCh]) );
		}
		norm_x_e = W_norm( norm_64);
		norm_x = W_extract_h( W_shl( norm_64, norm_x_e) );
		norm_x_e = add( sub( add( singularVectors_e, singularVectors_e), norm_x_e), 3);
		move16();
		abs_x_e = W_norm( abs_64);
		abs_x = W_extract_h( W_shl( abs_64, abs_x_e) );
		abs_x_e = add( sub( add( singularVectors_e, singularVectors_e), abs_x_e), 3);

		IF ( norm_x )
		{
		Word32 factor1;
		Word32 factor2;
		Word16 tmp_e;
		Word64 tmpmul;
		Word32 tmp_g;
		Word16 tmp_g_e;
		Word16 magic_shift;
		Word64 r_64;
		Word32 r;
		Word16 r_e;
		Word32 f;
		Word16 f_e;
		Word32 invVal;
		Word16 invVal_e;

		tmp_g_e = norm_x_e;
		move16();
		tmp_g = Sqrt32( norm_x, &tmp_g_e);
		IF ( GE_64( singularVectors_Left_64[currChannel][idx],0 ) )
		{
		tmp_g = L_negate( tmp_g);
		}
		*g = tmp_g;
		*g_e = tmp_g_e;
		move32();
		move16();
		factor2=W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], bitwindow) );
		tmp_e = sub( sub( tmp_g_e, singularVectors_e), 1);
		tmpmul = W_mult0_32_32( tmp_g, factor2);
		tmpmul = W_shl(tmpmul, tmp_e);

		r_64 = W_sub( W_shr( tmpmul, 1), W_shr( norm_64, 1) );// FIXME: why those two shi(f)ts?
		r_e = W_norm( r_64);
		r = W_extract_h( W_shl( r_64, r_e) );
		r_e = sub( sub( add( bitwindow, tmp_g_e), tmp_e), r_e); // FIXME: reduce this!!

		invVal_e = r_e;
		move16();
		invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e);

		magic_shift=32-tmp_g_e;
		singularVectors_Left_64[currChannel][idx]=W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( tmp_g), magic_shift) );

		bitwindow=add(singularVectors_e,2);
		FOR( iCh = idx; iCh < nChannelsC; iCh++ )
		{
		Word16 magicshift;

		norm_64 = 0;
		move64();
		FOR ( jCh = idx; jCh<nChannelsL; jCh++ )
		{
		factor1 = W_extract_l(W_shr( singularVectors_Left_64[iCh][jCh], bitwindow) );
		factor2 = W_extract_l(W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) );
		norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2) );
		}

		norm_x_e = W_norm( norm_64);
		norm_x = W_extract_h( W_shl( norm_64, norm_x_e) );
		f = Mpy_32_32( norm_x, invVal);
		f_e = add( invVal_e, sub( norm_x_e, r_e) );
		magic_shift = -3currChannel+22-2norm_x_e+4r_e+3f_e; // FIXME: HOW IS THIS WORKING?????!?!?!?!?!?!?!?!?!?
		FOR( iCh = idx; iCh < nChannelsC; iCh++ )
		{
		factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) );
		singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2), magic_shift) );
		}
		}
		// FIXME BEGIN: The following code has not yet been tested
		invVal_e = 0;
		move16();
		invVal = BASOP_Util_Divide3232_Scale_newton( abs_x, maxWithSign_fx( r ), &invVal_e);
		invVal_e = add( invVal_e, sub( abs_x_e, r_e) );
		bitwindow = add(singularVectors_e, 1);
		FOR ( jCh = idx; jCh < nChannelsL ; jCh++ )
		{
		secDiag[jCh] = Mpy_32_32( W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) ), invVal );
		move32();
		secDiag_e[jCh] = add(invVal_e, bitwindow);
		move16();
		}
		// FIXME END
		}
		}
		}
		#endif
		static void biDiagonalReductionLeft_fx(
		Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
		Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */