ctivated speedup 15, 16 to test (29b8a3f7) · Commits · SA4 / Audio / IVAS BASOP

lib_rend/ivas_dirac_dec_binaural_functions_fx.c

+6 −112

Original line number	Diff line number	Diff line
		@@ -53,18 +53,14 @@
		//#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx // .3 WMOPS --> USE
		//#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx // .1 WMOPS --> USE
		//#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx // .2 WMOPS --> USE
		//#define FIX_1326_SPEEDUP_05 // div->sqrt =>isqrt // 3.5 WMOPS //Quite bad diffs --> DONT USE
		//#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt // 3.0 WMOPS //Quite bad diffs --> DONT USE
		//#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt // 0 WMOPS --> DONT USE
		//#define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs --> USE
		//#define FIX_1326_SPEEDUP_09 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET
		//#define FIX_1326_SPEEDUP_10 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET
		//#define FIX_1326_SPEEDUP_11 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET
		//#define FIX_1326_SPEEDUP_12 // tiny speedup // <.1 WMOPS --> DONTUSE
		#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS --> USE? (pipe tbd)
		//#define FIX_1326_SPEEDUP_14 // test wether any of these paths is realy necessary, then assert --> DONTUSE (pipes red, asserts!)
		//#define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd // .1 WMOPS --> USE? (pipe tbd)
		//#define FIX_1326_SPEEDUP_16 // tiny speedup like 04 // .18 WMOPS --> USE? (pipe tbd)
		//#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS --> USE

		#define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd // .1 WMOPS --> USE? (pipe tbd)
		#define FIX_1326_SPEEDUP_16 // tiny speedup like 04 // .18 WMOPS --> USE? (pipe tbd)


		Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };
		@@ -3224,15 +3220,8 @@ static void eig2x2_fx(
		a_fx = (e1 + e2) * (e1 + e2) - 4.0f * ((e1 * e2) - crossSquare_fx) = (e1 - e2)^2 + 4 * crossSquare_fx
		pm_fx = 0.5f * sqrtf(max(0.0f, a_fx))
		add_fx = 0.5f * (e1 + e2)*/

		#ifdef FIX_1326_SPEEDUP_14
		static int tstcnt = 0;
		#endif
		IF( L_and( c_re == 0, c_im == 0 ) )
		{
		#ifdef FIX_1326_SPEEDUP_14
		tstcnt++;
		#endif
		/* if c_re = 0 and c_im = 0, then crossSquare_fx = (c_re * c_re) + (c_im * c_im) = 0
		a_fx = (E1 - E2)^2
		pm_fx = 0.5 * sqrt(max(0, a_fx)) = 0.5 * max(0, (e1 - e2)) */
		@@ -3250,9 +3239,6 @@ static void eig2x2_fx(
		q_crossSquare = sub( add( q_c, q_c ), 31 );
		IF( EQ_32( e1, e2 ) )
		{
		#ifdef FIX_1326_SPEEDUP_14
		tstcnt++;
		#endif
		/* if e1 - e2 = 0, then a_fx = 4 * crossSquare_fx
		pm_fx = 0.5 * sqrt(max(0, 4 * crossSquare_fx)) = sqrt(0, crossSquare_fx)*/
		test();
		@@ -3286,9 +3272,6 @@ static void eig2x2_fx(

		IF( GT_16( sub( q_c, q_e ), Q15 ) )
		{
		#ifdef FIX_1326_SPEEDUP_14
		tstcnt++;
		#endif
		pm_fx = L_shr( L_max( 0, L_abs( L_sub( e1, e2 ) ) ), 1 );
		q_tmp2 = q_e;
		move16();
		@@ -3312,10 +3295,6 @@ static void eig2x2_fx(
		}
		}
		}
		#ifdef FIX_1326_SPEEDUP_14
		if ( tstcnt > 10000 )
		assert( 0 );
		#endif
		// add_fx = 0.5 * (e1 + e2)
		add_fx = L_shr( L_add( e1, e2 ), 1 );
		q_tmp1 = q_e;
		@@ -4287,33 +4266,6 @@ static void formulate2x2MixingMatrix_fx(
		}
		}
		ELSE
		#ifdef FIX_1326_SPEEDUP_05
		{
		Word16 shift = norm_l( temp );
		#if 1 // oldcode
		temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
		#else

		temp = L_add( L_shl( temp, sub( shift, 1 ) ), L_shl_sat( EPSILON_MANT, sub( sub( EPSILON_EXP, shift ), 1 ) ) );
		exp_temp = sub( 30, q_ein );
		if ( temp == 0 )
		{
		exp_temp = EPSILON_EXP;
		move32();
		}
		if ( temp == 0 )
		{
		temp = EPSILON_MANT;
		move32();
		}
		#endif
		temp = ISqrt32( temp, &exp_temp );
		shift = sub( 31, q_eout );
		Ghat_fx[0] = Mpy_32_32( Sqrt32( E_out1, &shift ), temp );
		move32();
		exp = add( shift, exp_temp );
		}
		#else
		{
		temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
		push_wmops( "formulate2x2MixingMatrix Division" );
		@@ -4322,7 +4274,7 @@ static void formulate2x2MixingMatrix_fx(
		exp = sub( exp, sub( q_eout, sub( 31, exp_temp ) ) );
		Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
		}
		#endif

		#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
		Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
		#endif
		@@ -4346,39 +4298,6 @@ static void formulate2x2MixingMatrix_fx(
		}
		}
		ELSE
		#ifdef FIX_1326_SPEEDUP_06
		{
		Word16 shift = norm_l( temp );
		#if 0 // oldcode
		temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
		#else
		temp = L_add( L_shl( temp, sub( shift, 1 ) ), L_shl_sat( EPSILON_MANT, sub( sub( EPSILON_EXP, shift ), 1 ) ) );
		exp_temp = sub( 31 - 1, q_ein );
		if ( temp == 0 )
		{
		exp_temp = add( 0, EPSILON_EXP );
		}
		if ( temp == 0 )
		{
		temp = L_add( 0, EPSILON_MANT );
		}
		#endif
		#if 1 // oldcode - new code introduces too much noise
		push_wmops( "formulate2x2MixingMatrix Division" );
		temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, temp, &exp1 );
		pop_wmops(); /push_wmops( "formulate2x2MixingMatrix Division" )/
		exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) );
		#ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
		Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
		#endif
		#else
		temp = ISqrt32( temp, &exp_temp );
		shift = sub( 31, q_eout );
		Ghat_fx[1] = Mpy_32_32( temp, Sqrt32( E_out2, &shift ) );
		exp1 = add( shift, exp_temp );
		#endif
		}
		#else
		{
		temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
		push_wmops( "formulate2x2MixingMatrix Division" );
		@@ -4387,7 +4306,7 @@ static void formulate2x2MixingMatrix_fx(
		exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) );
		Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
		}
		#endif

		#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
		Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
		#endif
		@@ -4512,30 +4431,6 @@ static void formulate2x2MixingMatrix_fx(
		#endif
		pop_wmops(); /push_wmops( "oPtoA MT1M" );/

		#ifdef FIX_1326_SPEEDUP_07
		IF( D_fx[0] == 0 )
		{
		// temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */
		// exp = ONE_DIV_EPSILON_EXP;
		div_fx[0] = L_add( 0, 2047986068 ); // Sqrt32( temp, &exp ); // Q = 31 - exp
		exp = add( 0, 20 );
		}
		ELSE
		{
		#if 1 // old code
		push_wmops( "formulate2x2MixingMatrix Division" );
		temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp );
		exp = sub( exp, sub( Q30, q_D ) );
		pop_wmops(); /push_wmops( "formulate2x2MixingMatrix Division" )/
		div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
		move32();
		#else
		exp = sub( 31, q_D );
		div_fx[0] = ISqrt32_2( D_fx[0], &exp );
		move32();
		#endif
		}
		#else
		IF( D_fx[0] == 0 )
		{
		temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */
		@@ -4552,7 +4447,6 @@ static void formulate2x2MixingMatrix_fx(
		}
		div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
		move32();
		#endif

		#ifdef FIX_1326_SPEEDUP_08
		// This is just a shortcut to already existing optimizations (FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC) - but makes everything even faster