brought this branch up to speed with the merge request. (3e4b8b66) · Commits · SA4 / Audio / IVAS BASOP

lib_com/options.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -148,4 +148,5 @@
		#define FIX_1824
		#define FIX_1822

		#define MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE /* FhG: reduce WMOPS of HouseHolderReduction() in ivas_svd_dec.c() by removing redundant mathematics and using 64 bit additions.*/
		#endif

lib_dec/ivas_svd_dec_fx.c

+165 −169

Original line number	Diff line number	Diff line
		@@ -29,7 +29,6 @@
		the United Nations Convention on Contracts on the International Sales of Goods.

		*******************************************************************************************************/
		#define MYCHANGES
		#include <stdint.h>
		#include "options.h"
		#include "prot_fx.h"
		@@ -65,26 +64,22 @@ static void HouseholderReduction_fx(
		const Word16 nChannelsC, /* Q0 */
		Word32 eps_x_fx, / exp(eps_x_fx_e) */
		Word16 *eps_x_fx_e );
		#ifdef MYCHANGES
		#ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE
		static void biDiagonalReductionLeft_64(
		Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
		Word16 bitwindow,
		Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */
		Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
		const Word16 nChannelsL, /* Q0 */
		const Word16 nChannelsC, /* Q0 */
		const Word16 currChannel /* Q0 */
		);
		const Word16 currChannel, /* Q0 */
		Word32 g, / Q31 */
		Word16 *g_e );

		static void biDiagonalReductionRight_64(
		Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
		Word16 bitwindow,
		const Word16 nChannelsL, /* Q0 */
		const Word16 nChannelsC, /* Q0 */
		const Word16 currChannel, /* Q0 */
		Word32 g, / Q31 */
		Word16 *g_e
		);
		Word16 *g_e );
		#else
		static void biDiagonalReductionLeft_fx(
		Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
		@@ -874,12 +869,15 @@ static void HouseholderReduction_fx(
		Word16 *eps_x_fx_e )
		{
		Word16 nCh;
		push_wmops("HouseholderReduction_fx");
		#ifdef MYCHANGES
		#ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE

		Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
		Word32 g_fx = 0;
		Word16 g_e = 0;
		Word32 g_left_fx = 0;
		Word16 g_left_e = 0;
		move32();
		move16();
		Word32 g_right_fx = 0;
		Word16 g_right_e = 0;
		move32();
		move16();

		@@ -895,8 +893,7 @@ static void HouseholderReduction_fx(

		Word16 iCh, jCh;
		Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
		#ifdef MYCHANGES
		push_wmops("HouseholderReduction_fx 64");
		#ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE
		FOR( jCh = 0; jCh < nChannelsL; jCh++ )
		{
		FOR( iCh = 0; iCh < nChannelsC; iCh++ )
		@@ -906,28 +903,26 @@ static void HouseholderReduction_fx(
		}
		for ( nCh = 0; nCh < nChannelsC; nCh++ )
		{
		Word16 bitwindow;
		bitwindow=1;
		biDiagonalReductionLeft_64(
		singularVectors_Left_64,bitwindow,
		singularValues_fx,singularValues_fx_e,
		singularVectors_Left_64,
		nChannelsL,
		nChannelsC,
		nCh
		);
		singularValues_fx_e[nCh]=add(singularVectors_Left_e,singularValues_fx_e[nCh]);
		secDiag_fx[nCh]=g_fx;
		nCh,
		&g_left_fx,
		&g_left_e );
		singularValues_fx[nCh] = g_left_fx;
		move32();
		singularValues_fx_e[nCh] = add( singularVectors_Left_e, g_left_e );
		secDiag_fx[nCh] = g_right_fx; /* from the previous channel */
		move32();
		secDiag_fx_e[nCh]=add(singularVectors_Left_e,g_e);
		bitwindow=2;
		secDiag_fx_e[nCh] = add( singularVectors_Left_e, g_right_e );
		biDiagonalReductionRight_64(
		singularVectors_Left_64,bitwindow,
		singularVectors_Left_64,
		nChannelsL,
		nChannelsC,
		nCh,
		&g_fx,
		&g_e
		);
		&g_right_fx,
		&g_right_e );
		{
		Word16 L_temp_e;
		Word32 L_temp;
		@@ -954,9 +949,8 @@ static void HouseholderReduction_fx(
		}
		}
		}
		pop_wmops();
		#else
		push_wmops("HouseholderReduction_fx 32");

		FOR( jCh = 0; jCh < nChannelsL; jCh++ )
		{
		FOR( iCh = 0; iCh < nChannelsC; iCh++ )
		@@ -982,45 +976,42 @@ static void HouseholderReduction_fx(
		move32();
		}
		}
		pop_wmops();
		#endif



		/* SingularVecotr Accumulation */
		singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsC );


		singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_fx_e, singularValues_fx_e, nChannelsL, nChannelsC );
		pop_wmops();

		return;
		}

		#ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE
		/*-------------------------------------------------------------------------
		* biDiagonalReductionLeft()
		*
		*
		-------------------------------------------------------------------------/
		#ifdef MYCHANGES

		static void biDiagonalReductionLeft_64(
		Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
		Word16 bitwindow,
		Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */
		Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
		const Word16 nChannelsL, /* Q0 */
		const Word16 nChannelsC, /* Q0 */
		const Word16 currChannel /* Q0 */
		)
		const Word16 currChannel, /* Q0 */
		Word32 *g,
		Word16 *g_e )
		{


		#define HEADROOM_LEFT_1 1
		#define HEADROOM_LEFT_2 ( HEADROOM_LEFT_1 + 1 )

		Word16 iCh, jCh;
		Word32 norm_x, g;
		Word16 norm_x_e, g_e;
		Word32 norm_x;
		Word16 norm_x_e;
		Word64 norm_64;
		g=0;
		g_e=0;
		( *g ) = 0;
		( *g_e ) = 0;
		move32();
		move16();
		norm_x = 0;
		@@ -1033,12 +1024,12 @@ static void biDiagonalReductionLeft_64(
		move64();
		FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
		{
		tmp=W_extract_l(W_shr(singularVectors_Left_64[jCh][currChannel],bitwindow));
		tmp = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_1 ) );
		norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) );
		}
		norm_x_e = W_norm( norm_64 );
		norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
		norm_x_e = add(sub(shl(bitwindow, 1), norm_x_e), 1 );
		norm_x_e = add( sub( ( HEADROOM_LEFT_1 + HEADROOM_LEFT_1 ), norm_x_e ), 1 );
		}
		IF( norm_x )
		{
		@@ -1050,31 +1041,30 @@ static void biDiagonalReductionLeft_64(
		Word32 r, invVal;
		Word16 r_e, invVal_e;

		g_e = norm_x_e;
		( *g_e ) = norm_x_e;
		move16();
		g = Sqrt32( norm_x, &g_e);
		( *g ) = Sqrt32( norm_x, g_e );
		IF( GE_64( singularVectors_Left_64[currChannel][currChannel], 0 ) )
		{
		g = L_negate( g );
		( g ) = L_negate( g );
		}
		factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], bitwindow) );
		tmp_e=sub( g_e, bitwindow) ;
		tmpmul=W_mult0_32_32( g, factor2);
		factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], HEADROOM_LEFT_1 ) );
		tmp_e = sub( ( *g_e ), HEADROOM_LEFT_1 );
		tmpmul = W_mult0_32_32( ( *g ), factor2 );
		tmpmul = W_shl( tmpmul, tmp_e );
		r_64 = W_sub( tmpmul, norm_64 );
		r_e = W_norm( r_64 );
		r = W_extract_h( W_shl( r_64, r_e ) );
		r_e = sub( add( 1, add(bitwindow, bitwindow )), r_e );
		r_e = sub( add( 1, ( HEADROOM_LEFT_1 + HEADROOM_LEFT_1 ) ), r_e );


		invVal_e = r_e;
		invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, r, &invVal_e );


		tmp_e = add(31, sub(bitwindow, g_e ) );
		singularVectors_Left_64[currChannel][currChannel] = W_sub( singularVectors_Left_64[currChannel][currChannel], W_shr( W_deposit32_h( g), tmp_e) ); // here, the exponent goes up.
		tmp_e = add( 31, sub( HEADROOM_LEFT_1, *g_e ) );
		singularVectors_Left_64[currChannel][currChannel] = W_sub( singularVectors_Left_64[currChannel][currChannel], W_shr( W_deposit32_h( *g ), tmp_e ) ); // here, the exponent goes up.

		bitwindow=add(bitwindow, 1); // so does the bit window
		FOR( iCh = add( currChannel, 1 ); iCh < nChannelsC; iCh++ )
		{
		Word32 factor1;
		@@ -1085,8 +1075,8 @@ static void biDiagonalReductionLeft_64(
		norm_64 = 0;
		for ( jCh = currChannel; jCh < nChannelsL; jCh++ )
		{
		factor1 = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], bitwindow));
		factor2 = W_extract_l( W_shr( singularVectors_Left_64[jCh][iCh], bitwindow));
		factor1 = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_2 ) );
		factor2 = W_extract_l( W_shr( singularVectors_Left_64[jCh][iCh], HEADROOM_LEFT_2 ) );
		norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );
		}
		norm_x_e = W_norm( norm_64 );
		@@ -1096,26 +1086,26 @@ static void biDiagonalReductionLeft_64(
		{
		Word16 magic_shift;
		magic_shift = add( add( norm_x_e, 23 ), r_e );
		factor1 = W_extract_l( W_shr(singularVectors_Left_64[jCh][currChannel], bitwindow ) );
		factor1 = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_2 ) );
		singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr( W_mult0_32_32( f, factor1 ), magic_shift ) );
		}
		}
		}
		singularValues[currChannel] = g;
		singularValues_e[currChannel] = g_e;
		move32();
		move16();
		}

		/*-------------------------------------------------------------------------
		* biDiagonalReductionRight()
		*
		*
		-------------------------------------------------------------------------/

		static void biDiagonalReductionRight_64(
		Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
		Word16 bitwindow,
		const Word16 nChannelsL, /* Q0 */
		const Word16 nChannelsC, /* Q0 */
		const Word16 currChannel, /* Q0 */
		Word32 g, / Q31 */
		Word16 *g_e
		)
		Word16 *g_e )
		{
		Word16 iCh, jCh;
		Word32 norm_x;
		@@ -1123,7 +1113,8 @@ static void biDiagonalReductionRight_64(
		Word64 norm_64;
		Word16 idx;


		#define HEADROOM_RIGHT_1 2
		#define HEADROOM_RIGHT_2 ( HEADROOM_RIGHT_1 + 1 )


		( *g ) = 0;
		@@ -1138,12 +1129,12 @@ static void biDiagonalReductionRight_64(
		FOR( jCh = idx; jCh < nChannelsC; jCh++ )
		{
		Word32 tmp;
		tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) );
		tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_1 ) );
		norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) );
		}
		norm_x_e = W_norm( norm_64 );
		norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
		norm_x_e = add( sub( shl( bitwindow, 1), norm_x_e), 1);
		norm_x_e = add( sub( ( HEADROOM_RIGHT_1 + HEADROOM_RIGHT_1 ), norm_x_e ), 1 );
		move16();

		IF( norm_x )
		@@ -1173,8 +1164,8 @@ static void biDiagonalReductionRight_64(
		*g_e = tmp_g_e;
		move32();
		move16();
		factor2=W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], bitwindow) );
		tmp_e = sub( tmp_g_e, bitwindow);
		factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], HEADROOM_RIGHT_1 ) );
		tmp_e = sub( tmp_g_e, HEADROOM_RIGHT_1 );
		tmpmul = W_mult0_32_32( tmp_g, factor2 );
		tmpmul = W_shl( tmpmul, tmp_e );
		r_64 = W_sub( tmpmul, norm_64 );
		@@ -1186,8 +1177,7 @@ static void biDiagonalReductionRight_64(
		invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );

		magic_shift = 32 - tmp_g_e;
		singularVectors_Left_64[currChannel][idx]=W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( tmp_g), magic_shift) );
		bitwindow=add(bitwindow, 1);
		singularVectors_Left_64[currChannel][idx] = W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( tmp_g ), magic_shift ) ); // here, the exponent goes up

		FOR( iCh = idx; iCh < nChannelsL; iCh++ )
		{
		@@ -1196,8 +1186,8 @@ static void biDiagonalReductionRight_64(
		move64();
		FOR( jCh = idx; jCh < nChannelsC; jCh++ )
		{
		factor1 = W_extract_l(W_shr( singularVectors_Left_64[iCh][jCh], bitwindow) );
		factor2 = W_extract_l(W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) );
		factor1 = W_extract_l( W_shr( singularVectors_Left_64[iCh][jCh], HEADROOM_RIGHT_2 ) );
		factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_2 ) );
		norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );
		}

		@@ -1208,7 +1198,7 @@ static void biDiagonalReductionRight_64(

		FOR( jCh = idx; jCh < nChannelsC; jCh++ )
		{
		factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) );
		factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_2 ) );
		singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2 ), magic_shift ) );
		}
		}
		@@ -1216,6 +1206,12 @@ static void biDiagonalReductionRight_64(
		}
		}
		#else
		/*-------------------------------------------------------------------------
		* biDiagonalReductionLeft()
		*
		*
		-------------------------------------------------------------------------/

		static void biDiagonalReductionLeft_fx(
		Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
		Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */
		@@ -1248,7 +1244,6 @@ static void biDiagonalReductionLeft_fx(
		( *g ) = 0;
		move32();


		IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
		{
		idx = currChannel;
		@@ -1258,6 +1253,7 @@ static void biDiagonalReductionLeft_fx(
		{
		( sig_x ) = BASOP_Util_Add_Mant32Exp( sig_x, sig_x_e, L_abs( singularVectors[jCh][currChannel] ), singularVectors2_e[jCh][currChannel], sig_x_e ); / exp(sig_x_e) */
		}

		IF( ( sig_x ) ) /(fabsf(sig_x) > EPSILON fabsf(sig_x)) { /
		{
		Word16 invVal_e;
		@@ -1273,7 +1269,6 @@ static void biDiagonalReductionLeft_fx(
		norm_x_e = 0;
		#endif /* OPT_MCH_DEC_V1_NBE */
		move16();

		FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
		{
		Word16 temp_e = norm_l( singularVectors[jCh][currChannel] );
		@@ -1497,16 +1492,17 @@ static void biDiagonalReductionRight_fx(
		{
		norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( singularVectors2_e[iCh][jCh], singularVectors2_e[currChannel][jCh] ), &norm_x_e ); /* exp(norm_x_e) */
		}

		FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
		{
		singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */
		move32();
		}
		}

		FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
		{
		singularVectors[currChannel][jCh] = Mpy_32_32( singularVectors[currChannel][jCh], ( sig_x ) ); / exp(sing_exp + sig_x_e) */

		move32();
		singularVectors2_e[currChannel][jCh] = add( singularVectors2_e[currChannel][jCh], *sig_x_e );
		move16();