Use alpha max plus beta min approximation for Givens Rotation. This algorithm... (578055a5) · Commits · SA4 / Audio / IVAS BASOP

lib_dec/ivas_svd_dec.c

+279 −17

Original line number	Diff line number	Diff line
		@@ -65,10 +65,10 @@

		#if 1
		#define FIX_1010_OPT_DIV
		#define FIX_1010_OPT_DIV_NORM /* precision improvement */

		#define FIX_1010_OPT_GIVENS
		#define FIX_1010_OPT_GIVENS_INV
		#define FIX_1010_OPT_GIVENS_AMAX_BMIN
		#endif

		/-----------------------------------------------------------------------
		@@ -409,6 +409,204 @@ void svdMat2mat(
		}
		#endif

		//#define MORE_DEBUG

		#ifdef MORE_DEBUG

		#if (MAX_INPUT_CHANNELS > MAX_OUTPUT_CHANNELS)
		#define MAX_MATRIX MAX_INPUT_CHANNELS
		#else
		#define MAX_MATRIX MAX_OUTPUT_CHANNELS
		#endif

		static void matrixFx2Fl(
		float r[][MAX_MATRIX],
		const Word32 a[][MAX_MATRIX],
		const Word16 a_e[MAX_MATRIX],
		const int adim1,
		const int adim2)
		{
		for (int i1=0; i1<adim1; i1++) {
		for (int i2=0; i2<adim2; i2++) {
		r[i1][i2] = (float)a[i1][i2] * powf(2.f, a_e[i2]-31);
		}
		}
		}

		static void matrixProduct(
		float r[][MAX_MATRIX],
		const float a[][MAX_MATRIX],
		const float b[][MAX_MATRIX],
		const int adim1,
		const int adim2,
		const int bdim1,
		const int bdim2)
		{
		assert(adim2 == bdim1);

		for (int i1=0; i1<adim1; i1++) {
		for (int i2=0; i2<bdim2; i2++) {
		r[i1][i2] = 0.f;
		for (int i3=0; i3<bdim1; i3++) {
		r[i1][i2] += a[i1][i3] * b[i3][i2];
		}
		}
		}
		}

		static void matrixTranspose(
		float r[][MAX_MATRIX],
		const float a[][MAX_MATRIX],
		const int adim1,
		const int adim2)
		{
		for (int i1=0; i1<adim1; i1++) {
		for (int i2=0; i2<adim2; i2++) {
		r[i2][i1] = a[i1][i2];
		}
		}
		}

		static void matrixDiagonal(
		float r[][MAX_MATRIX],
		const float a[MAX_MATRIX],
		const int dim)
		{
		for (int i1=0; i1<dim; i1++) {
		for (int i2=0; i2<dim; i2++) {
		r[i1][i2] = 0;
		}
		r[i1][i1] = a[i1];
		}
		}

		static float matrixDifference(
		const float a[][MAX_MATRIX],
		const float b[][MAX_MATRIX],
		const int dim1,
		const int dim2)
		{
		float r = 0.f;

		for (int i1=0; i1<dim1; i1++) {
		for (int i2=0; i2<dim2; i2++) {
		r += fabsf((b[i1][i2] - a[i1][i2])/a[i1][i2]);
		}
		}

		return r/(float)(dim1*dim2);
		}

		static void matrixPrint(
		const float a[][MAX_MATRIX],
		const int dim1,
		const int dim2,
		const char *name)
		{
		printf("Matrix %s[%d][%d] = \n", name, dim1, dim2);
		for (int i1=0; i1<dim1; i1++) {
		for (int i2=0; i2<dim2; i2++) {
		printf("%f, ", a[i1][i2]);
		}
		printf("\n");
		}
		}

		static float matrixTestIdentity(
		const float a[][MAX_MATRIX],
		const int dim)
		{
		float r = 0.f;

		for (int i1=0; i1<dim; i1++) {
		for (int i2=0; i2<dim; i2++) {
		if (i1 == i2) {
		r += fabsf(1.f - a[i1][i2]);
		} else {
		r += fabsf(0.f - a[i1][i2]);
		}
		}
		}

		return r;
		}

		static void svd_accuracy_test_fx(
		Word32 InputMatrixFx[][MAX_OUTPUT_CHANNELS], /* i : matrix to be decomposed (M) InputMatrix_e*/
		Word16 InputMatrixFx_e,
		Word32 singularVectors_LeftFx[][MAX_OUTPUT_CHANNELS], /* o : left singular vectors (U) Q31 */
		Word32 singularValuesFx[MAX_OUTPUT_CHANNELS], /* o : singular values vector (S) singularValues_fx_e*/
		Word32 singularVectors_RightFx[][MAX_OUTPUT_CHANNELS], /* o : right singular vectors (V) Q31 */
		Word16 singularValuesFx_e[MAX_OUTPUT_CHANNELS],
		const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed Q0*/
		const Word16 nChannelsC /* i : number of columns in the matrix to be decomposed Q0*/
		)
		{
		float tmp1[MAX_MATRIX][MAX_MATRIX];
		float tmp2[MAX_MATRIX][MAX_MATRIX];
		float tmp3[MAX_MATRIX][MAX_MATRIX];
		float InputMatrix[MAX_MATRIX][MAX_MATRIX];

		Word16 singularValuesFx2_e[MAX_OUTPUT_CHANNELS];

		float singularVectors_Left[MAX_INPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
		float singularValues[MAX_MATRIX];
		float singularValuesMatrix[MAX_MATRIX][MAX_MATRIX];
		float singularVectors_Right[MAX_INPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
		float result;
		int dimSingular;

		/* Convert to float and Create singular values matrix from signular values vector */
		for (int x=0; x<MAX_MATRIX; x++) singularValuesFx2_e[x] = InputMatrixFx_e;
		matrixFx2Fl(InputMatrix, InputMatrixFx, singularValuesFx2_e, nChannelsL, nChannelsC);
		dimSingular = min(nChannelsL, nChannelsC);
		matrixFx2Fl(singularValues, singularValuesFx, singularValuesFx_e, 1, nChannelsC);
		for (int x=0; x<MAX_MATRIX; x++) singularValuesFx2_e[x] = 0 ;
		matrixFx2Fl(singularVectors_Left, singularVectors_LeftFx, singularValuesFx2_e, nChannelsL, nChannelsC);
		matrixFx2Fl(singularVectors_Right, singularVectors_RightFx, singularValuesFx2_e, nChannelsC, nChannelsC);
		matrixDiagonal(singularValuesMatrix, singularValues, dimSingular); /* CxC */

		#ifdef MORE_DEBUG
		matrixPrint(InputMatrix, nChannelsL, nChannelsC, "A");
		printf("Result of svd() \n");
		matrixPrint(singularVectors_Left, nChannelsL, nChannelsC, "U");
		matrixPrint(singularValuesMatrix, nChannelsC, nChannelsC, "S");
		matrixPrint(singularVectors_Right, nChannelsC, nChannelsC, "V");
		#endif

		printf("\nResult quality tests\n\n");

		/* Test U' * U == I */
		matrixTranspose(tmp1, singularVectors_Left, nChannelsL, nChannelsC); /* CxL */
		matrixProduct(tmp2, tmp1, singularVectors_Left, nChannelsC, nChannelsL, nChannelsL, nChannelsC); /* CxC */
		result = matrixTestIdentity(tmp2, nChannelsC);
		#ifdef MORE_DEBUG
		matrixPrint(tmp2, nChannelsC, nChannelsC, "U\'*U");
		#endif
		printf("U' * U difference to I is %f\n", result);

		/* Test V * V' == I */
		matrixTranspose(tmp1, singularVectors_Right, nChannelsC, nChannelsC); /* CxC */
		matrixProduct(tmp2, singularVectors_Right, tmp1, nChannelsC, nChannelsC, nChannelsC, nChannelsC); /* CxC */
		result = matrixTestIdentity(tmp2, nChannelsC);
		#ifdef MORE_DEBUG
		matrixPrint(tmp2, nChannelsC, nChannelsC, "V*V\'");
		#endif
		printf("V * V' difference to I is %f\n", result);

		/* Test InputMatrix == U * S * V' */
		matrixProduct(tmp1, singularVectors_Left, singularValuesMatrix, nChannelsL, nChannelsC, dimSingular, dimSingular); /* LxC */
		matrixTranspose(tmp3, singularVectors_Right, nChannelsC, nChannelsC); /* CxC */
		matrixProduct(tmp2, tmp1, tmp3, nChannelsL, dimSingular, nChannelsC, nChannelsC); /* LxC */
		result = matrixDifference(tmp2, InputMatrix, nChannelsL, nChannelsC);
		#ifdef MORE_DEBUG
		matrixPrint(tmp2, nChannelsL, nChannelsC, "USV\'");
		#endif
		printf("U * S * V' difference to M is %f\n", result);

		}
		#endif

		/*-------------------------------------------------------------------------
		* svd()
		*
		@@ -518,6 +716,18 @@ Word16 svd_fx(
		WHILE( EQ_16( condition, 1 ) );

		pop_wmops();
		#ifdef MORE_DEBUG
		svd_accuracy_test_fx(
		InputMatrix,
		InputMatrix_e,
		singularVectors_Left_fx,
		singularValues_fx,
		singularVectors_Right_fx,
		singularValues_fx_e,
		nChannelsL,
		nChannelsC
		);
		#endif
		return ( errorMessage );
		}

		@@ -1475,11 +1685,9 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
		#ifdef FIX_1010_OPT_DIV
		Word16 invVal_e, temp_e;
		Word32 invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e );
		#ifdef FIX_1010_OPT_DIV_NORM
		temp_e = norm_l( invVal );
		invVal = L_shl( invVal, temp_e );
		invVal_e = sub( invVal_e, temp_e );
		#endif
		#endif
		norm_x = 0;
		move32();
		@@ -1494,11 +1702,9 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
		singularVectors[jCh][currChannel] = L_shl( singularVectors[jCh][currChannel], temp_e );
		singularVectors[jCh][currChannel] = Mpy_32_32( singularVectors[jCh][currChannel], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
		sing_exp[jCh] = sub( invVal_e, temp_e );
		#ifdef FIX_1010_OPT_DIV_NORM
		temp_e = norm_l( singularVectors[jCh][currChannel] );
		singularVectors[jCh][currChannel] = L_shl( singularVectors[jCh][currChannel], temp_e );
		sing_exp[jCh] = sub( sing_exp[jCh], temp_e );
		#endif
		move16();
		#endif
		move32();
		@@ -1539,11 +1745,9 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */

		#ifdef FIX_1010_OPT_DIV
		invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e );
		#ifdef FIX_1010_OPT_DIV_NORM
		temp_e = norm_l( invVal );
		invVal = L_shl( invVal, temp_e );
		invVal_e = sub( invVal_e, temp_e );
		#endif
		#endif

		FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
		@@ -1563,11 +1767,9 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
		#else
		f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
		f_e = add( invVal_e, sub( norm_x_e, r_e ) );
		#ifdef FIX_1010_OPT_DIV_NORM
		temp_e = norm_l( f );
		f = L_shl( f, temp_e );
		f_e = sub( f_e, temp_e );
		#endif
		#endif

		FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
		@@ -1750,11 +1952,9 @@ static void biDiagonalReductionRight_fx(
		#ifdef FIX_1010_OPT_DIV
		Word16 invVal_e, temp_e;
		Word32 invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e );
		#ifdef FIX_1010_OPT_DIV_NORM
		temp_e = norm_l( invVal );
		invVal = L_shl( invVal, temp_e );
		invVal_e = sub( invVal_e, temp_e );
		#endif
		#endif
		FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /nChannelsC /
		{
		@@ -1766,11 +1966,9 @@ static void biDiagonalReductionRight_fx(
		singularVectors[currChannel][jCh] = Mpy_32_32( singularVectors[currChannel][jCh], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
		sing_exp[jCh] = sub( invVal_e, temp_e );
		move16();
		#ifdef FIX_1010_OPT_DIV_NORM
		temp_e = norm_l( singularVectors[currChannel][jCh] );
		singularVectors[currChannel][jCh] = L_shl( singularVectors[currChannel][jCh], temp_e );
		sing_exp[jCh] = sub( sing_exp[jCh], temp_e );
		#endif
		#endif
		move32();
		sing_exp[jCh] = add( sing_exp[jCh], sub( singularVectors_e, sig_x_e ) );
		@@ -1805,11 +2003,9 @@ static void biDiagonalReductionRight_fx(

		#ifdef FIX_1010_OPT_DIV
		invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e );
		#ifdef FIX_1010_OPT_DIV_NORM
		temp_e = norm_l( invVal );
		invVal = L_shl( invVal, temp_e );
		invVal_e = sub( invVal_e, temp_e );
		#endif
		#endif

		FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
		@@ -1821,11 +2017,9 @@ static void biDiagonalReductionRight_fx(
		secDiag[jCh] = L_shl( singularVectors[currChannel][jCh], temp_e );
		secDiag[jCh] = Mpy_32_32( secDiag[jCh], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
		secDiag_exp[jCh] = sub( invVal_e, temp_e );
		#ifdef FIX_1010_OPT_DIV_NORM
		temp_e = norm_l( secDiag[jCh] );
		secDiag[jCh] = L_shl( secDiag[jCh], temp_e );
		secDiag_exp[jCh] = sub( secDiag_exp[jCh], temp_e );
		#endif
		move16();
		#endif
		move32();
		@@ -2296,6 +2490,46 @@ static void singularVectorsAccumulationRight(

		#ifdef IVAS_FLOAT_FIXED

		#ifdef FIX_1010_OPT_GIVENS_AMAX_BMIN
		#define NUM_REGIONS 1024
		static Word32 alphaBeta[NUM_REGIONS][2];
		static void get_alpha_beta(Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 alpha, Word32 beta)
		{
		static int init = 0;

		if (init == 0) {
		for (int i=0; i<NUM_REGIONS; i++) {
		double thetaS, thetaE, thetaM;

		thetaS = M_PI/4. * (double)i/(double)NUM_REGIONS;
		thetaE = M_PI/4. * (double)(i+1)/(double)NUM_REGIONS;
		thetaM = M_PI/4. * ((double)i+0.5)/(double)NUM_REGIONS;
		//alphaBeta[i][0] = FL2WORD32(1./(sin(thetaM)*tan((thetaS+thetaE)/2.)+cos(thetaM)));
		//alphaBeta[i][1] = FL2WORD32(1./(sin(thetaM)tan((thetaS+thetaE)/2.)+cos(thetaM)) tan((thetaS+thetaE)/2.));
		alphaBeta[i][0] = FL2WORD32(2./( ((sin(thetaM) + sin(thetaS))*tan((thetaS+thetaE)/2.)) + cos(thetaM) + cos(thetaS)));
		alphaBeta[i][1] = FL2WORD32(2./( ((sin(thetaM) + sin(thetaS))tan((thetaS+thetaE)/2.)) + cos(thetaM) + cos(thetaS)) tan((thetaS+thetaE)/2.) );
		}
		init = 1;
		}
		Word16 r, shift;
		#if 0
		float pf, qf;
		pf = (float)p * powf(2.f, p_e-31);
		qf = (float)q * powf(2.f, q_e-31);
		r = floor((double)NUM_REGIONS * 4. * atan2f(qf, pf)/M_PI);
		#else
		shift = sub(p_e, q_e);
		r = mult_r( atan2_fx(L_shr(q, s_max(0, shift)), L_shr(p, s_max(0, negate(shift)))), FL2WORD16_SCALE((float)NUM_REGIONS*4./M_PI, 14));
		#endif
		if (r == NUM_REGIONS) {
		r = NUM_REGIONS-1;
		}
		assert((r >= 0) && (r < NUM_REGIONS));
		*alpha = alphaBeta[r][0];
		*beta = alphaBeta[r][1];
		}
		#endif

		#ifdef FIX_1010_OPT_GIVENS_INV
		static void GivensRotation2_fx(
		const Word32 x, /* exp(x_e) */
		@@ -2308,7 +2542,32 @@ static void GivensRotation2_fx(
		Word16 *outInv_e )
		{
		Word32 r;
		#ifdef FIX_1010_OPT_GIVENS_AMAX_BMIN
		Word32 az, ax, a, b;

		ax = L_abs(x);
		az = L_abs(z);
		IF (BASOP_Util_Cmp_Mant32Exp(ax, x_e, az, z_e) > 0) {
		get_alpha_beta(ax, x_e, az, z_e, &a, &b);
		r = BASOP_Util_Add_Mant32Exp(Mpy_32_32(ax, a), x_e, Mpy_32_32(az, b), z_e, out_e);
		} ELSE {
		get_alpha_beta(az, z_e, ax, x_e, &a, &b);
		r = BASOP_Util_Add_Mant32Exp(Mpy_32_32(az, a), z_e, Mpy_32_32(ax, b), x_e, out_e);
		}
		*result = r;
		move32();
		#if 1
		outInv_e = shl(out_e, 1);
		*resultInv = ISqrt32( Mpy_32_32(r, r), outInv_e );
		move32();
		#else
		*resultInv = L_deposit_h(BASOP_Util_Divide3232_Scale(MAX_32, r, outInv_e));
		move32();
		outInv_e = sub(outInv_e, *out_e);
		move16();
		#endif

		#else
		r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e );
		r = L_max( r, 1 );
		outInv_e = out_e;
		@@ -2318,6 +2577,9 @@ static void GivensRotation2_fx(

		*resultInv = ISqrt32( r, outInv_e );
		move32();
		#endif

		pop_wmops();
		}
		#endif

Admin message