Loading lib_rend/ivas_dirac_dec_binaural_functions_fx.c +6 −112 Original line number Diff line number Diff line Loading @@ -53,18 +53,14 @@ //#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx // .3 WMOPS --> USE //#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx // .1 WMOPS --> USE //#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx // .2 WMOPS --> USE //#define FIX_1326_SPEEDUP_05 // div->sqrt =>isqrt // 3.5 WMOPS //Quite bad diffs --> DONT USE //#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt // 3.0 WMOPS //Quite bad diffs --> DONT USE //#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt // 0 WMOPS --> DONT USE //#define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs --> USE //#define FIX_1326_SPEEDUP_09 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET //#define FIX_1326_SPEEDUP_10 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET //#define FIX_1326_SPEEDUP_11 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET //#define FIX_1326_SPEEDUP_12 // tiny speedup // <.1 WMOPS --> DONTUSE #define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS --> USE? (pipe tbd) //#define FIX_1326_SPEEDUP_14 // test wether any of these paths is realy necessary, then assert --> DONTUSE (pipes red, asserts!) //#define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd // .1 WMOPS --> USE? (pipe tbd) //#define FIX_1326_SPEEDUP_16 // tiny speedup like 04 // .18 WMOPS --> USE? (pipe tbd) //#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS --> USE #define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd // .1 WMOPS --> USE? (pipe tbd) #define FIX_1326_SPEEDUP_16 // tiny speedup like 04 // .18 WMOPS --> USE? (pipe tbd) Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; Loading Loading @@ -3246,15 +3242,8 @@ static void eig2x2_fx( a_fx = (e1 + e2) * (e1 + e2) - 4.0f * ((e1 * e2) - crossSquare_fx) = (e1 - e2)^2 + 4 * crossSquare_fx pm_fx = 0.5f * sqrtf(max(0.0f, a_fx)) add_fx = 0.5f * (e1 + e2)*/ #ifdef FIX_1326_SPEEDUP_14 static int tstcnt = 0; #endif IF( L_and( c_re == 0, c_im == 0 ) ) { #ifdef FIX_1326_SPEEDUP_14 tstcnt++; #endif /* if c_re = 0 and c_im = 0, then crossSquare_fx = (c_re * c_re) + (c_im * c_im) = 0 a_fx = (E1 - E2)^2 pm_fx = 0.5 * sqrt(max(0, a_fx)) = 0.5 * max(0, (e1 - e2)) */ Loading @@ -3272,9 +3261,6 @@ static void eig2x2_fx( q_crossSquare = sub( add( q_c, q_c ), 31 ); IF( EQ_32( e1, e2 ) ) { #ifdef FIX_1326_SPEEDUP_14 tstcnt++; #endif /* if e1 - e2 = 0, then a_fx = 4 * crossSquare_fx pm_fx = 0.5 * sqrt(max(0, 4 * crossSquare_fx)) = sqrt(0, crossSquare_fx)*/ test(); Loading Loading @@ -3308,9 +3294,6 @@ static void eig2x2_fx( IF( GT_16( sub( q_c, q_e ), Q15 ) ) { #ifdef FIX_1326_SPEEDUP_14 tstcnt++; #endif pm_fx = L_shr( L_max( 0, L_abs( L_sub( e1, e2 ) ) ), 1 ); q_tmp2 = q_e; move16(); Loading @@ -3334,10 +3317,6 @@ static void eig2x2_fx( } } } #ifdef FIX_1326_SPEEDUP_14 if ( tstcnt > 10000 ) assert( 0 ); #endif // add_fx = 0.5 * (e1 + e2) add_fx = L_shr( L_add( e1, e2 ), 1 ); q_tmp1 = q_e; Loading Loading @@ -4611,33 +4590,6 @@ static void formulate2x2MixingMatrix_fx( #endif } ELSE #ifdef FIX_1326_SPEEDUP_05 { Word16 shift = norm_l( temp ); #if 1 // oldcode temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); #else temp = L_add( L_shl( temp, sub( shift, 1 ) ), L_shl_sat( EPSILON_MANT, sub( sub( EPSILON_EXP, shift ), 1 ) ) ); exp_temp = sub( 30, q_ein ); if ( temp == 0 ) { exp_temp = EPSILON_EXP; move32(); } if ( temp == 0 ) { temp = EPSILON_MANT; move32(); } #endif temp = ISqrt32( temp, &exp_temp ); shift = sub( 31, q_eout ); Ghat_fx[0] = Mpy_32_32( Sqrt32( E_out1, &shift ), temp ); move32(); exp = add( shift, exp_temp ); } #else { temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); push_wmops( "formulate2x2MixingMatrix Division" ); Loading @@ -4648,7 +4600,7 @@ static void formulate2x2MixingMatrix_fx( Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp #endif } #endif #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp #endif Loading Loading @@ -4677,39 +4629,6 @@ static void formulate2x2MixingMatrix_fx( #endif } ELSE #ifdef FIX_1326_SPEEDUP_06 { Word16 shift = norm_l( temp ); #if 0 // oldcode temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); #else temp = L_add( L_shl( temp, sub( shift, 1 ) ), L_shl_sat( EPSILON_MANT, sub( sub( EPSILON_EXP, shift ), 1 ) ) ); exp_temp = sub( 31 - 1, q_ein ); if ( temp == 0 ) { exp_temp = add( 0, EPSILON_EXP ); } if ( temp == 0 ) { temp = L_add( 0, EPSILON_MANT ); } #endif #if 1 // oldcode - new code introduces too much noise push_wmops( "formulate2x2MixingMatrix Division" ); temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, temp, &exp1 ); pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/ exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) ); #ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 #endif #else temp = ISqrt32( temp, &exp_temp ); shift = sub( 31, q_eout ); Ghat_fx[1] = Mpy_32_32( temp, Sqrt32( E_out2, &shift ) ); exp1 = add( shift, exp_temp ); #endif } #else { temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); push_wmops( "formulate2x2MixingMatrix Division" ); Loading @@ -4720,7 +4639,7 @@ static void formulate2x2MixingMatrix_fx( Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 #endif } #endif #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 #endif Loading Loading @@ -4845,30 +4764,6 @@ static void formulate2x2MixingMatrix_fx( #endif pop_wmops(); /*push_wmops( "oPtoA MT1M" );*/ #ifdef FIX_1326_SPEEDUP_07 IF( D_fx[0] == 0 ) { // temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */ // exp = ONE_DIV_EPSILON_EXP; div_fx[0] = L_add( 0, 2047986068 ); // Sqrt32( temp, &exp ); // Q = 31 - exp exp = add( 0, 20 ); } ELSE { #if 1 // old code push_wmops( "formulate2x2MixingMatrix Division" ); temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp ); exp = sub( exp, sub( Q30, q_D ) ); pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/ div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp move32(); #else exp = sub( 31, q_D ); div_fx[0] = ISqrt32_2( D_fx[0], &exp ); move32(); #endif } #else IF( D_fx[0] == 0 ) { #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC Loading @@ -4888,7 +4783,6 @@ static void formulate2x2MixingMatrix_fx( } div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp move32(); #endif #ifdef FIX_1326_SPEEDUP_08 // This is just a shortcut to already existing optimizations (FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC) - but makes everything even faster Loading Loading
lib_rend/ivas_dirac_dec_binaural_functions_fx.c +6 −112 Original line number Diff line number Diff line Loading @@ -53,18 +53,14 @@ //#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx // .3 WMOPS --> USE //#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx // .1 WMOPS --> USE //#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx // .2 WMOPS --> USE //#define FIX_1326_SPEEDUP_05 // div->sqrt =>isqrt // 3.5 WMOPS //Quite bad diffs --> DONT USE //#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt // 3.0 WMOPS //Quite bad diffs --> DONT USE //#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt // 0 WMOPS --> DONT USE //#define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs --> USE //#define FIX_1326_SPEEDUP_09 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET //#define FIX_1326_SPEEDUP_10 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET //#define FIX_1326_SPEEDUP_11 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET //#define FIX_1326_SPEEDUP_12 // tiny speedup // <.1 WMOPS --> DONTUSE #define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS --> USE? (pipe tbd) //#define FIX_1326_SPEEDUP_14 // test wether any of these paths is realy necessary, then assert --> DONTUSE (pipes red, asserts!) //#define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd // .1 WMOPS --> USE? (pipe tbd) //#define FIX_1326_SPEEDUP_16 // tiny speedup like 04 // .18 WMOPS --> USE? (pipe tbd) //#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS --> USE #define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd // .1 WMOPS --> USE? (pipe tbd) #define FIX_1326_SPEEDUP_16 // tiny speedup like 04 // .18 WMOPS --> USE? (pipe tbd) Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; Loading Loading @@ -3246,15 +3242,8 @@ static void eig2x2_fx( a_fx = (e1 + e2) * (e1 + e2) - 4.0f * ((e1 * e2) - crossSquare_fx) = (e1 - e2)^2 + 4 * crossSquare_fx pm_fx = 0.5f * sqrtf(max(0.0f, a_fx)) add_fx = 0.5f * (e1 + e2)*/ #ifdef FIX_1326_SPEEDUP_14 static int tstcnt = 0; #endif IF( L_and( c_re == 0, c_im == 0 ) ) { #ifdef FIX_1326_SPEEDUP_14 tstcnt++; #endif /* if c_re = 0 and c_im = 0, then crossSquare_fx = (c_re * c_re) + (c_im * c_im) = 0 a_fx = (E1 - E2)^2 pm_fx = 0.5 * sqrt(max(0, a_fx)) = 0.5 * max(0, (e1 - e2)) */ Loading @@ -3272,9 +3261,6 @@ static void eig2x2_fx( q_crossSquare = sub( add( q_c, q_c ), 31 ); IF( EQ_32( e1, e2 ) ) { #ifdef FIX_1326_SPEEDUP_14 tstcnt++; #endif /* if e1 - e2 = 0, then a_fx = 4 * crossSquare_fx pm_fx = 0.5 * sqrt(max(0, 4 * crossSquare_fx)) = sqrt(0, crossSquare_fx)*/ test(); Loading Loading @@ -3308,9 +3294,6 @@ static void eig2x2_fx( IF( GT_16( sub( q_c, q_e ), Q15 ) ) { #ifdef FIX_1326_SPEEDUP_14 tstcnt++; #endif pm_fx = L_shr( L_max( 0, L_abs( L_sub( e1, e2 ) ) ), 1 ); q_tmp2 = q_e; move16(); Loading @@ -3334,10 +3317,6 @@ static void eig2x2_fx( } } } #ifdef FIX_1326_SPEEDUP_14 if ( tstcnt > 10000 ) assert( 0 ); #endif // add_fx = 0.5 * (e1 + e2) add_fx = L_shr( L_add( e1, e2 ), 1 ); q_tmp1 = q_e; Loading Loading @@ -4611,33 +4590,6 @@ static void formulate2x2MixingMatrix_fx( #endif } ELSE #ifdef FIX_1326_SPEEDUP_05 { Word16 shift = norm_l( temp ); #if 1 // oldcode temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); #else temp = L_add( L_shl( temp, sub( shift, 1 ) ), L_shl_sat( EPSILON_MANT, sub( sub( EPSILON_EXP, shift ), 1 ) ) ); exp_temp = sub( 30, q_ein ); if ( temp == 0 ) { exp_temp = EPSILON_EXP; move32(); } if ( temp == 0 ) { temp = EPSILON_MANT; move32(); } #endif temp = ISqrt32( temp, &exp_temp ); shift = sub( 31, q_eout ); Ghat_fx[0] = Mpy_32_32( Sqrt32( E_out1, &shift ), temp ); move32(); exp = add( shift, exp_temp ); } #else { temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); push_wmops( "formulate2x2MixingMatrix Division" ); Loading @@ -4648,7 +4600,7 @@ static void formulate2x2MixingMatrix_fx( Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp #endif } #endif #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp #endif Loading Loading @@ -4677,39 +4629,6 @@ static void formulate2x2MixingMatrix_fx( #endif } ELSE #ifdef FIX_1326_SPEEDUP_06 { Word16 shift = norm_l( temp ); #if 0 // oldcode temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); #else temp = L_add( L_shl( temp, sub( shift, 1 ) ), L_shl_sat( EPSILON_MANT, sub( sub( EPSILON_EXP, shift ), 1 ) ) ); exp_temp = sub( 31 - 1, q_ein ); if ( temp == 0 ) { exp_temp = add( 0, EPSILON_EXP ); } if ( temp == 0 ) { temp = L_add( 0, EPSILON_MANT ); } #endif #if 1 // oldcode - new code introduces too much noise push_wmops( "formulate2x2MixingMatrix Division" ); temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, temp, &exp1 ); pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/ exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) ); #ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 #endif #else temp = ISqrt32( temp, &exp_temp ); shift = sub( 31, q_eout ); Ghat_fx[1] = Mpy_32_32( temp, Sqrt32( E_out2, &shift ) ); exp1 = add( shift, exp_temp ); #endif } #else { temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); push_wmops( "formulate2x2MixingMatrix Division" ); Loading @@ -4720,7 +4639,7 @@ static void formulate2x2MixingMatrix_fx( Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 #endif } #endif #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 #endif Loading Loading @@ -4845,30 +4764,6 @@ static void formulate2x2MixingMatrix_fx( #endif pop_wmops(); /*push_wmops( "oPtoA MT1M" );*/ #ifdef FIX_1326_SPEEDUP_07 IF( D_fx[0] == 0 ) { // temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */ // exp = ONE_DIV_EPSILON_EXP; div_fx[0] = L_add( 0, 2047986068 ); // Sqrt32( temp, &exp ); // Q = 31 - exp exp = add( 0, 20 ); } ELSE { #if 1 // old code push_wmops( "formulate2x2MixingMatrix Division" ); temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp ); exp = sub( exp, sub( Q30, q_D ) ); pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/ div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp move32(); #else exp = sub( 31, q_D ); div_fx[0] = ISqrt32_2( D_fx[0], &exp ); move32(); #endif } #else IF( D_fx[0] == 0 ) { #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC Loading @@ -4888,7 +4783,6 @@ static void formulate2x2MixingMatrix_fx( } div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp move32(); #endif #ifdef FIX_1326_SPEEDUP_08 // This is just a shortcut to already existing optimizations (FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC) - but makes everything even faster Loading