closer to the svd-optimizations-float. (14ec31bf) · Commits · SA4 / Audio / IVAS BASOP

lib_dec/ivas_svd_dec_fx.c

+24 −27

Original line number	Diff line number	Diff line
		@@ -917,7 +917,7 @@ static void biDiagonalReductionLeft_fx(
		const Word16 currChannel /* Q0 */
		)
		{
		Word16 iCh, jCh, idx;
		Word16 iCh, jCh;
		Word32 norm_x, f, r, g;
		Word16 norm_x_e, f_e, r_e, g_e;
		Word32 L_temp;
		@@ -931,14 +931,13 @@ static void biDiagonalReductionLeft_fx(

		IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
		{
		idx = currChannel;
		move16();

		norm_x = 0;
		move32();
		norm_x_e = 0;
		move16();
		FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
		FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
		{
		norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( singularVectors2_e[jCh][currChannel], 1 ), &norm_x_e ); /* exp(norm_x_e) */
		}
		@@ -951,8 +950,8 @@ static void biDiagonalReductionLeft_fx(
		move16();
		L_temp = Sqrt32( norm_x, &L_temp_e );
		// L_temp = L_shl_r( L_temp, L_temp_e ); // Q31
		//( *g ) = L_negate( GE_32( singularVectors[currChannel][idx], 0 ) ? L_temp : L_negate( L_temp ) );
		if ( singularVectors[currChannel][idx] >= 0 )
		//( *g ) = L_negate( GE_32( singularVectors[currChannel][currChannel], 0 ) ? L_temp : L_negate( L_temp ) );
		if ( singularVectors[currChannel][currChannel] >= 0 )
		{
		L_temp = L_negate( L_temp );
		}
		@@ -961,8 +960,8 @@ static void biDiagonalReductionLeft_fx(
		g_e = L_temp_e;
		move16();

		r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( g ), singularVectors[currChannel][idx] ), add( singularVectors2_e[currChannel][idx], g_e), -norm_x, norm_x_e, &r_e ); /* exp(r_e) */
		singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( g ), g_e, &singularVectors2_e[currChannel][idx] ); /* sing_exp */
		r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( g ), singularVectors[currChannel][currChannel] ), add( singularVectors2_e[currChannel][currChannel], g_e), -norm_x, norm_x_e, &r_e ); /* exp(r_e) */
		singularVectors[currChannel][currChannel] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][currChannel], singularVectors2_e[currChannel][currChannel], -g, g_e, &singularVectors2_e[currChannel][currChannel] ); /* sing_exp */
		move32();

		invVal_e = r_e;
		@@ -975,7 +974,7 @@ static void biDiagonalReductionLeft_fx(
		move32();
		norm_x_e = 0;
		move16();
		FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
		FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
		{
		norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( singularVectors2_e[jCh][currChannel], singularVectors2_e[jCh][iCh] ), &norm_x_e ); /* exp(norm_x_e) */
		}
		@@ -983,13 +982,12 @@ static void biDiagonalReductionLeft_fx(
		f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
		f_e = add( invVal_e, sub( norm_x_e, r_e ) );

		FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
		FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
		{
		singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], singularVectors2_e[jCh][iCh], Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, singularVectors2_e[jCh][currChannel] ), &singularVectors2_e[jCh][iCh] );
		move32();
		}
		}

		}

		// rescaling block
		@@ -1199,7 +1197,7 @@ static void biDiagonalReductionRight_fx(
		}
		ELSE
		{
		( g ) = L_negate( L_negate( L_temp ) ); / exp(L_temp_e) */
		( g ) = L_temp; / exp(L_temp_e) */
		move32();
		}
		*g_e = L_temp_e;
		@@ -1211,18 +1209,6 @@ static void biDiagonalReductionRight_fx(
		move32();


		invVal_e = 0;
		move16();
		invVal = BASOP_Util_Divide3232_Scale_newton( abs_x, maxWithSign_fx( r ), &invVal_e );
		invVal_e = add(invVal_e, sub( abs_x_e,r_e ) );
		FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
		{
		secDiag[jCh] = Mpy_32_32( singularVectors[currChannel][jCh], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
		move32();
		secDiag_exp[jCh] = add( invVal_e, singularVectors2_e[currChannel][jCh] );
		move16();

		}
		FOR( iCh = currChannel + 1; iCh < nChannelsL; iCh++ ) /* nChannelsL */
		{
		norm_x = 0;
		@@ -1233,17 +1219,28 @@ static void biDiagonalReductionRight_fx(
		{
		norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( singularVectors2_e[iCh][jCh], singularVectors2_e[currChannel][jCh] ), &norm_x_e ); /* exp(norm_x_e) */
		}
		norm_x = BASOP_Util_Divide3232_Scale_newton( norm_x, abs_x, &invVal_e );
		norm_x_e = add( invVal_e, sub(norm_x_e, abs_x_e ) );
		norm_x = BASOP_Util_Divide3232_Scale_newton( norm_x, r, &invVal_e );
		norm_x_e = add( invVal_e, sub(norm_x_e, r_e ) );
		FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
		{
		singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */
		singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, singularVectors[currChannel][jCh] ), add( norm_x_e, singularVectors2_e[currChannel][jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */
		move32();
		}
		}
		invVal_e = 0;
		move16();
		invVal = BASOP_Util_Divide3232_Scale_newton( abs_x, maxWithSign_fx( r ), &invVal_e );
		invVal_e = add(invVal_e, sub( abs_x_e,r_e ) );
		FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
		{
		secDiag[jCh] = Mpy_32_32( singularVectors[currChannel][jCh], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
		move32();
		secDiag_exp[jCh] = add( invVal_e, singularVectors2_e[currChannel][jCh] );
		move16();

		}
		}
		}
		return;
		}
		#else