Remove FIX_1010_OPT_INV_USING_INVSQRT and FIX_1010_OPT_GIVENS_AMAX_BMIN.... (9c300c2b) · Commits · SA4 / Audio / IVAS BASOP

lib_dec/ivas_svd_dec.c

+4 −515

Original line number	Diff line number	Diff line
		@@ -53,11 +53,9 @@

		#if 1
		#define FIX_1010_OPT_DIV
		// #define FIX_1010_OPT_INV_USING_INVSQRT
		#define FIX_1010_OPT_SINGLE_RESCALE
		#define FIX_1010_OPT_GIVENS
		#define FIX_1010_OPT_GIVENS_INV
		// #define FIX_1010_OPT_GIVENS_AMAX_BMIN
		#define FIX_1010_OPT_NORM_NOSAT
		#define FIX_1010_OPT_SEC_SINGLE_RESCALE
		#endif
		@@ -297,288 +295,6 @@ void svdMat2mat_fx(
		return;
		}

		#ifdef MORE_DEBUG2
		static void matrixFx2Fl2(
		float r[][MAX_OUTPUT_CHANNELS],
		const Word32 a[][MAX_OUTPUT_CHANNELS],
		const Word16 a_e[][MAX_OUTPUT_CHANNELS],
		const int adim1,
		const int adim2 )
		{
		for ( int i1 = 0; i1 < adim1; i1++ )
		{
		for ( int i2 = 0; i2 < adim2; i2++ )
		{
		r[i1][i2] = (float) a[i1][i2] * powf( 2.f, a_e[i1][i2] - 31 );
		}
		}
		}

		static void matrixPrint2(
		const float a[][MAX_OUTPUT_CHANNELS],
		const int dim1,
		const int dim2,
		const char *name )
		{
		printf( "Matrix %s[%d][%d] = \n", name, dim1, dim2 );
		for ( int i1 = 0; i1 < dim1; i1++ )
		{
		printf( " { " );
		for ( int i2 = 0; i2 < dim2; i2++ )
		{
		printf( "%.10e, ", a[i1][i2] );
		}
		printf( " },\n" );
		}
		}
		#endif

		#ifndef DEBUG_SVD_TEST
		// #define DEBUG_SVD_PRECISION
		#endif
		// #define MORE_DEBUG

		#if defined( DEBUG_SVD_PRECISION ) \|\| defined( MORE_DEBUG )

		#if ( MAX_INPUT_CHANNELS > MAX_OUTPUT_CHANNELS )
		#define MAX_MATRIX MAX_INPUT_CHANNELS
		#else
		#define MAX_MATRIX MAX_OUTPUT_CHANNELS
		#endif

		static void matrixFx2Fl(
		float r[][MAX_MATRIX],
		const Word32 a[][MAX_MATRIX],
		const Word16 a_e[MAX_MATRIX],
		const int adim1,
		const int adim2 )
		{
		for ( int i1 = 0; i1 < adim1; i1++ )
		{
		for ( int i2 = 0; i2 < adim2; i2++ )
		{
		r[i1][i2] = (float) a[i1][i2] * powf( 2.f, a_e[i2] - 31 );
		}
		}
		}

		static void matrixProduct(
		float r[][MAX_MATRIX],
		const float a[][MAX_MATRIX],
		const float b[][MAX_MATRIX],
		const int adim1,
		const int adim2,
		const int bdim1,
		const int bdim2 )
		{
		assert( adim2 == bdim1 );

		for ( int i1 = 0; i1 < adim1; i1++ )
		{
		for ( int i2 = 0; i2 < bdim2; i2++ )
		{
		r[i1][i2] = 0.f;
		for ( int i3 = 0; i3 < bdim1; i3++ )
		{
		r[i1][i2] += a[i1][i3] * b[i3][i2];
		}
		}
		}
		}

		static void matrixTranspose(
		float r[][MAX_MATRIX],
		const float a[][MAX_MATRIX],
		const int adim1,
		const int adim2 )
		{
		for ( int i1 = 0; i1 < adim1; i1++ )
		{
		for ( int i2 = 0; i2 < adim2; i2++ )
		{
		r[i2][i1] = a[i1][i2];
		}
		}
		}

		static void matrixDiagonal(
		float r[][MAX_MATRIX],
		const float a[MAX_MATRIX],
		const int dim )
		{
		for ( int i1 = 0; i1 < dim; i1++ )
		{
		for ( int i2 = 0; i2 < dim; i2++ )
		{
		r[i1][i2] = 0;
		}
		r[i1][i1] = a[i1];
		}
		}

		static float matrixDifference(
		const float a[][MAX_MATRIX],
		const float b[][MAX_MATRIX],
		const int dim1,
		const int dim2 )
		{
		float r = 0.f;

		for ( int i1 = 0; i1 < dim1; i1++ )
		{
		for ( int i2 = 0; i2 < dim2; i2++ )
		{
		if ( a[i1][i2] != 0.f )
		{
		r += fabsf( ( b[i1][i2] - a[i1][i2] ) / a[i1][i2] );
		}
		else
		{
		r += fabsf( b[i1][i2] - a[i1][i2] );
		}
		}
		}

		return r / (float) ( dim1 * dim2 );
		}

		static void matrixPrint(
		const float a[][MAX_MATRIX],
		const int dim1,
		const int dim2,
		const char *name )
		{
		printf( "Matrix %s[%d][%d] = \n", name, dim1, dim2 );
		for ( int i1 = 0; i1 < dim1; i1++ )
		{
		for ( int i2 = 0; i2 < dim2; i2++ )
		{
		printf( "%.10e, ", a[i1][i2] );
		}
		printf( "\n" );
		}
		}

		static float matrixTestIdentity(
		const float a[][MAX_MATRIX],
		const int dim )
		{
		float r = 0.f;

		for ( int i1 = 0; i1 < dim; i1++ )
		{
		for ( int i2 = 0; i2 < dim; i2++ )
		{
		if ( i1 == i2 )
		{
		r += fabsf( 1.f - a[i1][i2] );
		}
		else
		{
		r += fabsf( 0.f - a[i1][i2] );
		}
		}
		}

		return r;
		}

		#define PROBLEMATIC_THRESHOLD 0.5f
		static void svd_accuracy_test_fx(
		Word32 InputMatrixFx[][MAX_OUTPUT_CHANNELS], /* i : matrix to be decomposed (M) InputMatrix_e*/
		Word16 InputMatrixFx_e,
		Word32 singularVectors_LeftFx[][MAX_OUTPUT_CHANNELS], /* o : left singular vectors (U) Q31 */
		Word32 singularValuesFx[MAX_OUTPUT_CHANNELS], /* o : singular values vector (S) singularValues_fx_e*/
		Word32 singularVectors_RightFx[][MAX_OUTPUT_CHANNELS], /* o : right singular vectors (V) Q31 */
		Word16 singularValuesFx_e[MAX_OUTPUT_CHANNELS],
		const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed Q0*/
		const Word16 nChannelsC /* i : number of columns in the matrix to be decomposed Q0*/
		)
		{
		float tmp1[MAX_MATRIX][MAX_MATRIX];
		float tmp2[MAX_MATRIX][MAX_MATRIX];
		float tmp3[MAX_MATRIX][MAX_MATRIX];
		float InputMatrix[MAX_MATRIX][MAX_MATRIX];

		Word16 singularValuesFx2_e[MAX_OUTPUT_CHANNELS];

		float singularVectors_Left[MAX_INPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
		float singularValues[MAX_MATRIX];
		float singularValuesMatrix[MAX_MATRIX][MAX_MATRIX];
		float singularVectors_Right[MAX_INPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
		float result;
		int dimSingular;
		int problematic = 0;

		/* Convert to float and Create singular values matrix from signular values vector */
		for ( int x = 0; x < MAX_MATRIX; x++ )
		singularValuesFx2_e[x] = InputMatrixFx_e;
		matrixFx2Fl( InputMatrix, InputMatrixFx, singularValuesFx2_e, nChannelsL, nChannelsC );
		dimSingular = min( nChannelsL, nChannelsC );
		matrixFx2Fl( &singularValues, (Word32( * )[MAX_MATRIX]) singularValuesFx, singularValuesFx_e, 1, nChannelsC );
		for ( int x = 0; x < MAX_MATRIX; x++ )
		singularValuesFx2_e[x] = 0;
		matrixFx2Fl( singularVectors_Left, singularVectors_LeftFx, singularValuesFx2_e, nChannelsL, nChannelsC );
		matrixFx2Fl( singularVectors_Right, singularVectors_RightFx, singularValuesFx2_e, nChannelsC, nChannelsC );
		matrixDiagonal( singularValuesMatrix, singularValues, dimSingular ); /* CxC */

		#ifdef MORE_DEBUG
		matrixPrint( InputMatrix, nChannelsL, nChannelsC, "A" );
		printf( "Result of svd() \n" );
		matrixPrint( singularVectors_Left, nChannelsL, nChannelsC, "U" );
		matrixPrint( singularValuesMatrix, nChannelsC, nChannelsC, "S" );
		matrixPrint( singularVectors_Right, nChannelsC, nChannelsC, "V" );
		#endif

		printf( "\nResult quality tests\n\n" );

		/* Test U' * U == I */
		matrixTranspose( tmp1, singularVectors_Left, nChannelsL, nChannelsC ); /* CxL */
		matrixProduct( tmp2, tmp1, singularVectors_Left, nChannelsC, nChannelsL, nChannelsL, nChannelsC ); /* CxC */
		result = matrixTestIdentity( tmp2, nChannelsC );
		if ( result >= PROBLEMATIC_THRESHOLD )
		{
		problematic = 1;
		}
		#ifdef MORE_DEBUG
		matrixPrint( tmp2, nChannelsC, nChannelsC, "U\'*U" );
		#endif
		printf( "U' * U difference to I is %f\n", result );

		/* Test V * V' == I */
		matrixTranspose( tmp1, singularVectors_Right, nChannelsC, nChannelsC ); /* CxC */
		matrixProduct( tmp2, singularVectors_Right, tmp1, nChannelsC, nChannelsC, nChannelsC, nChannelsC ); /* CxC */
		result = matrixTestIdentity( tmp2, nChannelsC );
		if ( result >= PROBLEMATIC_THRESHOLD )
		{
		problematic = 1;
		}
		#ifdef MORE_DEBUG
		matrixPrint( tmp2, nChannelsC, nChannelsC, "V*V\'" );
		#endif
		printf( "V * V' difference to I is %f\n", result );

		/* Test InputMatrix == U * S * V' */
		matrixProduct( tmp1, singularVectors_Left, singularValuesMatrix, nChannelsL, nChannelsC, dimSingular, dimSingular ); /* LxC */
		matrixTranspose( tmp3, singularVectors_Right, nChannelsC, nChannelsC ); /* CxC */
		matrixProduct( tmp2, tmp1, tmp3, nChannelsL, dimSingular, nChannelsC, nChannelsC ); /* LxC */
		result = matrixDifference( tmp2, InputMatrix, nChannelsL, nChannelsC );
		if ( result >= PROBLEMATIC_THRESHOLD )
		{
		problematic = 1;
		}
		#ifdef MORE_DEBUG
		matrixPrint( tmp2, nChannelsL, nChannelsC, "USV\'" );
		#endif
		printf( "U * S * V' difference to M is %f\n", result );

		if ( problematic )
		{
		matrixPrint( InputMatrix, nChannelsL, nChannelsC, "Problematic Input" );
		}
		}
		#endif

		/*-------------------------------------------------------------------------
		* svd()
		*
		@@ -615,24 +331,9 @@ Word16 svd_fx(
		Word16 temp_fx_e;
		push_wmops( "svd_fx" );

		#ifdef MORE_DEBUG2
		{
		float input[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
		Word16 exp_matrix[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];

		for ( int ii = 0; ii < MAX_OUTPUT_CHANNELS; ii++ )
		for ( int iii = 0; iii < MAX_OUTPUT_CHANNELS; iii++ )
		exp_matrix[ii][iii] = InputMatrix_e;

		matrixFx2Fl2( input, InputMatrix, exp_matrix, nChannelsL, nChannelsC );
		matrixPrint2( input, nChannelsL, nChannelsC, " input " );
		}
		#endif

		#ifndef FIX_1010_OPT_SINGLE_RESCALE
		set32_fx( secDiag_fx, 0, MAX_OUTPUT_CHANNELS );
		set16_fx( secDiag_fx_e, 0, MAX_OUTPUT_CHANNELS );

		set16_fx( singularValues_fx_e, 0, MAX_OUTPUT_CHANNELS );
		#endif

		@@ -714,17 +415,6 @@ Word16 svd_fx(
		WHILE( EQ_16( condition, 1 ) );

		pop_wmops();
		#ifdef DEBUG_SVD_PRECISION
		svd_accuracy_test_fx(
		InputMatrix,
		InputMatrix_e,
		singularVectors_Left_fx,
		singularValues_fx,
		singularVectors_Right_fx,
		singularValues_fx_e,
		nChannelsL,
		nChannelsC );
		#endif
		return ( errorMessage );
		}

		@@ -1323,18 +1013,6 @@ static void HouseholderReduction_fx(
		}
		}

		#ifdef MORE_DEBUG2
		{
		float singularVectors_Left[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
		float secDiag[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];

		matrixFx2Fl2( singularVectors_Left, singularVectors_Left_fx, singularVectors_Left_fx_e, nChannelsL, nChannelsC );
		matrixFx2Fl2( secDiag, (Word32( * )[MAX_OUTPUT_CHANNELS]) secDiag_fx, (Word16( * )[MAX_OUTPUT_CHANNELS]) secDiag_fx_e, 1, nChannelsC );
		matrixPrint2( singularVectors_Left, nChannelsL, nChannelsC, "left" );
		matrixPrint2( secDiag, 1, nChannelsC, "secDiag" );
		}
		#endif

		/* SingularVecotr Accumulation */
		#ifndef FIX_1010_OPT_SINGLE_RESCALE
		singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_e, *secDiag_fx_e, nChannelsC );
		@@ -1346,36 +1024,10 @@ static void HouseholderReduction_fx(
		singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsC );
		#endif

		#ifdef MORE_DEBUG2
		{
		float singularVectors_Right[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
		Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];

		for ( int ii = 0; ii < MAX_OUTPUT_CHANNELS; ii++ )
		for ( int iii = 0; iii < MAX_OUTPUT_CHANNELS; iii++ )
		singularVectors_Left_fx_e[ii][iii] = 0;

		matrixFx2Fl2( singularVectors_Right, singularVectors_Right_fx, singularVectors_Left_fx_e, nChannelsC, nChannelsC );
		matrixPrint2( singularVectors_Right, nChannelsC, nChannelsC, "right2" );
		}
		#endif

		singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_fx_e, singularValues_fx_e, nChannelsL, nChannelsC );
		#endif

		#ifdef MORE_DEBUG2
		{
		float singularVectors_Left[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
		Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];

		for ( int ii = 0; ii < MAX_OUTPUT_CHANNELS; ii++ )
		for ( int iii = 0; iii < MAX_OUTPUT_CHANNELS; iii++ )
		singularVectors_Left_fx_e[ii][iii] = 0;

		matrixFx2Fl2( singularVectors_Left, singularVectors_Left_fx, singularVectors_Left_fx_e, nChannelsL, nChannelsC );
		matrixPrint2( singularVectors_Left, nChannelsL, nChannelsC, "left2" );
		}
		#endif
		return;
		}

		@@ -1385,41 +1037,6 @@ static void HouseholderReduction_fx(
		*
		-------------------------------------------------------------------------/

		#ifdef FIX_1010_OPT_INV_USING_INVSQRT
		static Word32 BASOP_Util_Inv32( Word32 x, Word16 *px_e )
		{
		Word16 sign, shift, shift2;

		sign = 0;
		move16();
		if ( x < 0 )
		{
		sign = 1;
		}
		if ( sign )
		{
		x = L_negate( x );
		}

		shift = norm_l( x );
		x = L_shl( x, shift );
		*px_e = 0;
		move16();
		x = ISqrt32norm( x, px_e );
		x = Mpy_32_32( x, x );
		shift2 = norm_l( x );
		x = L_shl( x, shift2 );
		px_e = add( shl( px_e, 1 ), sub( shift, shift2 ) );
		move16();

		if ( sign )
		{
		x = L_negate( x );
		}
		return x;
		}
		#endif

		static void biDiagonalReductionLeft_fx(
		Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
		Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */
		@@ -1477,6 +1094,7 @@ ELSE IF( LT_16( sig_x_e, secDiag_e ) )
		secDiag_e[currChannel] = *sig_x_e;
		move16();
		#endif

		/* Setting values to 0 */
		( *sig_x ) = 0;
		move32();
		@@ -1502,11 +1120,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
		#ifdef FIX_1010_OPT_DIV
		Word16 invVal_e;
		Word32 invVal;
		#ifdef FIX_1010_OPT_INV_USING_INVSQRT
		invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e );
		#else
		invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e );
		#endif
		#endif
		norm_x = 0;
		move32();
		@@ -1576,11 +1190,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
		move32();

		#ifdef FIX_1010_OPT_DIV
		#ifdef FIX_1010_OPT_INV_USING_INVSQRT
		invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e );
		#else
		invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
		#endif
		#endif

		FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
		@@ -1673,7 +1283,7 @@ return;

		static void biDiagonalReductionRight_fx(
		Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
		Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */
		Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_exp[]) */
		#ifndef FIX_1010_OPT_SINGLE_RESCALE
		Word16 *singularVectors_e,
		#else
		@@ -1741,11 +1351,7 @@ static void biDiagonalReductionRight_fx(
		#ifdef FIX_1010_OPT_DIV
		Word16 invVal_e, temp_e;
		Word32 invVal;
		#ifdef FIX_1010_OPT_INV_USING_INVSQRT
		invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e );
		#else
		invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e );
		#endif
		#endif
		FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /nChannelsC /
		{
		@@ -1810,11 +1416,7 @@ static void biDiagonalReductionRight_fx(
		move32();

		#ifdef FIX_1010_OPT_DIV
		#ifdef FIX_1010_OPT_INV_USING_INVSQRT
		invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e );
		#else
		invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
		#endif
		#endif

		FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
		@@ -1898,6 +1500,8 @@ static void biDiagonalReductionRight_fx(
		*secDiag_e = exp_max;
		move16();
		#endif


		#ifndef FIX_1010_OPT_SINGLE_RESCALE
		exp_max = *singularVectors_e;
		move16();
		@@ -1976,11 +1580,7 @@ static void singularVectorsAccumulationLeft_fx(
		IF( t_ii ) /if (fabsf(t_ii) > EPSILON fabsf(t_ii)) {*/
		{
		#ifdef FIX_1010_OPT_DIV
		#ifdef FIX_1010_OPT_INV_USING_INVSQRT
		t_ii = BASOP_Util_Inv32( maxWithSign_fx( t_ii ), &temp_exp );
		#else
		t_ii = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( t_ii ), &temp_exp );
		#endif
		t_ii_e = sub( temp_exp, t_ii_e );
		#else
		t_ii = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, maxWithSign_fx( t_ii ), &temp_exp ); /* exp(1 + (temp_exp + tii_e)) */
		@@ -2001,22 +1601,11 @@ static void singularVectorsAccumulationLeft_fx(
		norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( singularVectors_Left_e[k][nCh], singularVectors_Left_e[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */
		#endif
		}
		#ifdef FIX_1010_OPT_INV_USING_INVSQRT
		Word16 temp_e = norm_l( singularVectors_Left[nCh][nCh] );
		t_jj = BASOP_Util_Inv32( maxWithSign_fx( L_shl( singularVectors_Left[nCh][nCh], temp_e ) ), &temp_exp );
		t_jj = Mpy_32_32( Mpy_32_32( t_ii, norm_y ), t_jj );
		#ifndef FIX_1010_OPT_SINGLE_RESCALE
		t_jj_e = add( add( temp_exp, temp_e ), sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) );
		#else
		t_jj_e = add( add( temp_exp, temp_e ), sub( add( t_ii_e, norm_y_e ), singularVectors_Left_e[nCh][nCh] ) );
		#endif
		#else
		t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e,
		#ifndef FIX_1010_OPT_SINGLE_RESCALE
		t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) );
		#else
		t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), singularVectors_Left_e[nCh][nCh] ) );
		#endif
		#endif
		FOR( k = nCh; k < nChannelsL; k++ ) /* nChannelsL */
		{
		@@ -2187,59 +1776,6 @@ static void singularVectorsAccumulationRight_fx(
		*
		-------------------------------------------------------------------------/

		#ifdef FIX_1010_OPT_GIVENS_AMAX_BMIN
		#ifndef M_PI
		#define M_PI 3.141592653589793
		#endif
		#define NUM_REGIONS 128
		static Word32 alphaBeta[NUM_REGIONS][2];
		static void get_alpha_beta( Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 alpha, Word32 beta )
		{
		static int init = 0;

		if ( init == 0 )
		{
		for ( int i = 0; i < NUM_REGIONS; i++ )
		{
		double thetaS, thetaE, thetaM;

		thetaS = M_PI / 4. * (double) i / (double) NUM_REGIONS;
		thetaE = M_PI / 4. * (double) ( i + 1 ) / (double) NUM_REGIONS;
		thetaM = M_PI / 4. * ( (double) i + 0.5 ) / (double) NUM_REGIONS;
		// alphaBeta[i][0] = FL2WORD32(1./(sin(thetaM)*tan((thetaS+thetaE)/2.)+cos(thetaM)));
		// alphaBeta[i][1] = FL2WORD32(1./(sin(thetaM)tan((thetaS+thetaE)/2.)+cos(thetaM)) tan((thetaS+thetaE)/2.));
		alphaBeta[i][0] = FL2WORD32( 2. / ( ( ( sin( thetaM ) + sin( thetaS ) ) * tan( ( thetaS + thetaE ) / 2. ) ) + cos( thetaM ) + cos( thetaS ) ) );
		alphaBeta[i][1] = FL2WORD32( 2. / ( ( ( sin( thetaM ) + sin( thetaS ) ) * tan( ( thetaS + thetaE ) / 2. ) ) + cos( thetaM ) + cos( thetaS ) ) * tan( ( thetaS + thetaE ) / 2. ) );
		}
		init = 1;
		}
		Word16 r, shift;
		#if 0
		float pf, qf;
		pf = (float)p * powf(2.f, p_e-31);
		qf = (float)q * powf(2.f, q_e-31);
		r = floor((double)NUM_REGIONS * 4. * atan2f(qf, pf)/M_PI);
		if (r >= NUM_REGIONS) {
		r = NUM_REGIONS-1;
		}
		#elif 1
		shift = sub( norm_l( q ), 1 );
		q = L_shl( q, shift );
		q_e = sub( q_e, shift );
		shift = norm_l( p );
		p = L_shl( p, shift );
		p_e = sub( p_e, shift );
		shift = sub( q_e, p_e );
		r = shl_sat( div_s( extract_h( q ), s_max( 1, extract_h( p ) ) ), shift );
		/* Second order polyfit of atan(r)/(pi/4) for r=0..1 */
		r = add( add( mult( mult( r, r ), FL2WORD16_SCALE( -3.672563685340096e-01, 3 ) ), mult( r, FL2WORD16_SCALE( 1.375369641423651e+00, 3 ) ) ), FL2WORD16_SCALE( -6.529424378422714e-03, 3 ) );
		r = s_min( s_max( 0, shr( r, WORD16_BITS - 1 - 7 - 3 ) ), NUM_REGIONS - 1 );
		#endif
		assert( ( r >= 0 ) && ( r < NUM_REGIONS ) );
		*alpha = alphaBeta[r][0];
		*beta = alphaBeta[r][1];
		}
		#endif

		#ifdef FIX_1010_OPT_GIVENS_INV
		static void GivensRotation2_fx(
		@@ -2253,35 +1789,6 @@ static void GivensRotation2_fx(
		Word16 *outInv_e )
		{
		Word32 r;
		#ifdef FIX_1010_OPT_GIVENS_AMAX_BMIN
		Word32 az, ax, a, b;

		ax = L_abs( x );
		az = L_abs( z );
		IF( BASOP_Util_Cmp_Mant32Exp( ax, x_e, az, z_e ) > 0 )
		{
		get_alpha_beta( ax, x_e, az, z_e, &a, &b );
		r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ax, a ), x_e, Mpy_32_32( az, b ), z_e, out_e );
		}
		ELSE
		{
		get_alpha_beta( az, z_e, ax, x_e, &a, &b );
		r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( az, a ), z_e, Mpy_32_32( ax, b ), x_e, out_e );
		}
		*result = r;
		move32();
		#if 1
		outInv_e = shl( out_e, 1 );
		*resultInv = ISqrt32( L_max( 1, Mpy_32_32( r, r ) ), outInv_e );
		move32();
		#else
		*resultInv = L_deposit_h( BASOP_Util_Divide3232_Scale( MAX_32, r, outInv_e ) );
		move32();
		outInv_e = sub( outInv_e, *out_e );
		move16();
		#endif

		#else
		r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e );
		r = L_max( r, 1 );
		outInv_e = out_e;
		@@ -2291,7 +1798,6 @@ static void GivensRotation2_fx(

		*resultInv = ISqrt32( r, outInv_e );
		move32();
		#endif
		}
		#endif

		@@ -2312,25 +1818,8 @@ static Word32 GivensRotation_fx(
		#endif

		#ifdef FIX_1010_OPT_GIVENS
		#ifdef FIX_1010_OPT_GIVENS_AMAX_BMIN
		Word32 az, ax, a, b;

		ax = L_abs( x );
		az = L_abs( z );
		IF( BASOP_Util_Cmp_Mant32Exp( ax, x_e, az, z_e ) > 0 )
		{
		get_alpha_beta( ax, x_e, az, z_e, &a, &b );
		r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ax, a ), x_e, Mpy_32_32( az, b ), z_e, out_e );
		}
		ELSE
		{
		get_alpha_beta( az, z_e, ax, x_e, &a, &b );
		r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( az, a ), z_e, Mpy_32_32( ax, b ), x_e, out_e );
		}
		#else
		r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e );
		r = Sqrt32( r, out_e );
		#endif
		#else
		x_abs = L_abs( x );
		z_abs = L_abs( z );