Loading lib_rend/ivas_dirac_dec_binaural_functions_fx.c +62 −21 Original line number Diff line number Diff line Loading @@ -45,16 +45,25 @@ #include "wmc_auto.h" //#define FIX_1326_SPEEDUP_00 //make sqrt(1) a const - catch bitstreams //no occurence in current bitstream #define FIX_1326_SPEEDUP_01 // optimize matrixTransp1Mul_fx -> eig2x2_fx : .4 WMOPS #define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx // .3 WMOPS #define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx // .1 WMOPS #define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx // .2 WMOPS // MHZ NUMBERS: // NULL: 179.292 //#define FIX_1326_SPEEDUP_00 //make sqrt(1) a const - catch bitstreams //no occurence --> DONT USE //#define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx // .4 WMOPS --> USE //#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx // .3 WMOPS --> USE //#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx // .1 WMOPS --> USE //#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx // .2 WMOPS --> USE //#define FIX_1326_SPEEDUP_05 // div->sqrt =>isqrt // 3.5 WMOPS //Quite bad diffs --> DONT USE //#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt // 3.0 WMOPS //Quite bad diffs --> DONT USE #define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt // ? WMOPS //Big DIffs , no replacement of divSqrt , PIPELINE GREEN, --> USE #define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs, PIPELINE GREEN! -- > USE #define FIX_1326_SPEEDUP_09 // Relocate matrixMul //#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt // 0 WMOPS --> DONT USE //#define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs --> USE #define FIX_1326_SPEEDUP_09 // tiny speedup // .1 WMOPS #define FIX_1326_SPEEDUP_10 // tiny speedup // .1 WMOPS #define FIX_1326_SPEEDUP_11 // tiny speedup // .1 WMOPS //#define FIX_1326_SPEEDUP_12 // tiny speedup // <.1 WMOPS -->DONTUSE //#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS //#define FIX_1326_SPEEDUP_14 // Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; /*------------------------------------------------------------------------- Loading Loading @@ -2124,6 +2133,14 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( exp = sub( get_min_scalefactor( resultMtxRe_fx[0][0], resultMtxRe_fx[1][1] ), 2 ); tmp2 = L_add( L_shl( resultMtxRe_fx[0][0], exp ), L_shl( resultMtxRe_fx[1][1], exp ) ); q_tmp2 = add( q_res, exp ); #ifdef FIX_1326_SPEEDUP_11 { Word16 shift1 = s_max( 0, sub( q_tmp2, q_CrEne ) ); Word16 shift2 = s_max( 0, sub( q_CrEne, q_tmp2 ) ); realizedOutputEne_fx = L_add( L_shr( tmp1, shift2 ), L_shr( tmp2, shift1 ) ); q_realizedOutputEne = s_min( q_CrEne, q_tmp2 ); } #else IF( LT_16( q_CrEne, q_tmp2 ) ) { realizedOutputEne_fx = L_add( tmp1, L_shr( tmp2, sub( q_tmp2, q_CrEne ) ) ); Loading @@ -2136,7 +2153,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( q_realizedOutputEne = q_tmp2; move16(); } #endif exp = sub( get_min_scalefactor( hDiracDecBin->ChEneOut_fx[0][bin], hDiracDecBin->ChEneOut_fx[1][bin] ), 1 ); targetOutputEne_fx = L_add( L_shl( hDiracDecBin->ChEneOut_fx[0][bin], exp ), L_shl( hDiracDecBin->ChEneOut_fx[1][bin], exp ) ); q_targetOutputEne = add( hDiracDecBin->q_ChEneOut, exp ); Loading @@ -2159,15 +2176,26 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( q_missingOutputEne = q_targetOutputEne; move16(); } tmp1 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), missingOutputEne_fx, sub( 31, q_missingOutputEne ), &exp1 ); #ifdef FIX_1326_SPEEDUP_13 { Word16 exp_temp; tmp2 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); tmp2 = ISqrt32( tmp2, &exp_temp ); gain_fx = Mpy_32_32(tmp2, Sqrt32(tmp1, &exp1)); q_gain = sub( 31, add( exp_temp, exp1 ) ); } #else { tmp2 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), EPSILON_MANT, EPSILON_EXP, &exp2 ); tmp2 = ISqrt32( tmp2, &exp2 ); gain_fx = Mpy_32_32( tmp2, Sqrt32( tmp1, &exp1 ) ); q_gain = sub( 31, add( exp2, exp1 ) ); } gain_fx = Sqrt32( tmp2, &exp2 ); q_gain = sub( 31, exp2 ); #endif // 1073741824 = 4 in Q28 Loading Loading @@ -4415,17 +4443,11 @@ static void formulate2x2MixingMatrix_fx( eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D ); #ifdef FIX_1326_SPEEDUP_09 matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp ); #endif #else matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp ); eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D ); #ifdef FIX_1326_SPEEDUP_09 matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp ); #endif #endif pop_wmops(); /*push_wmops( "oPtoA MT1M" );*/ Loading Loading @@ -4512,7 +4534,19 @@ static void formulate2x2MixingMatrix_fx( div_fx[1] = L_shr( div_fx[1], sub( sub( 31, exp1 ), q_div ) ); // q_div move32(); // 1310720000 = 10,000.0f in Q17 #ifdef FIX_1326_SPEEDUP_09 { Word16 shift1 = s_max( sub( Q17, q_div ), 0 ); Word16 shift2 = s_max( sub( q_div, Q17 ), 0 ); div_fx[0] = L_min( L_shr( 1310720000, shift1 ), L_shr( div_fx[0], shift2 ) ); // q_div move32(); div_fx[1] = L_min( L_shr( 1310720000, shift1 ), L_shr( div_fx[1], shift2 ) ); // q_div move32(); } #else IF( LT_16( q_div, Q17 ) ) { div_fx[0] = L_min( L_shr( 1310720000, sub( Q17, q_div ) ), div_fx[0] ); // q_div Loading @@ -4529,10 +4563,9 @@ static void formulate2x2MixingMatrix_fx( q_div = Q17; move16(); } #endif #ifndef FIX_1326_SPEEDUP_09 matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp ); #endif exp = L_norm_arr( div_fx, BINAURAL_CHANNELS ); scale_sig32( div_fx, BINAURAL_CHANNELS, exp ); Loading @@ -4547,7 +4580,11 @@ static void formulate2x2MixingMatrix_fx( W_tmp = W_mult0_32_32( tmpRe_fx[chA][chB], div_fx[chB] ); IF( W_tmp != 0 ) { #ifdef FIX_1326_SPEEDUP_10 hdrm_re[chA][chB] = W_norm( W_tmp ); #else hdrm_re[chA][chB] = sub( W_norm( W_tmp ), 0 ); #endif move16(); W_tmp = W_shl( W_tmp, hdrm_re[chA][chB] ); tmpRe_fx[chA][chB] = W_extract_h( W_tmp ); Loading @@ -4564,7 +4601,11 @@ static void formulate2x2MixingMatrix_fx( W_tmp = W_mult0_32_32( tmpIm_fx[chA][chB], div_fx[chB] ); IF( W_tmp != 0 ) { #ifdef FIX_1326_SPEEDUP_10 hdrm_im[chA][chB] = W_norm( W_tmp ); #else hdrm_im[chA][chB] = sub( W_norm( W_tmp ), 0 ); #endif move16(); W_tmp = W_shl( W_tmp, hdrm_im[chA][chB] ); tmpIm_fx[chA][chB] = W_extract_h( W_tmp ); Loading Loading
lib_rend/ivas_dirac_dec_binaural_functions_fx.c +62 −21 Original line number Diff line number Diff line Loading @@ -45,16 +45,25 @@ #include "wmc_auto.h" //#define FIX_1326_SPEEDUP_00 //make sqrt(1) a const - catch bitstreams //no occurence in current bitstream #define FIX_1326_SPEEDUP_01 // optimize matrixTransp1Mul_fx -> eig2x2_fx : .4 WMOPS #define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx // .3 WMOPS #define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx // .1 WMOPS #define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx // .2 WMOPS // MHZ NUMBERS: // NULL: 179.292 //#define FIX_1326_SPEEDUP_00 //make sqrt(1) a const - catch bitstreams //no occurence --> DONT USE //#define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx // .4 WMOPS --> USE //#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx // .3 WMOPS --> USE //#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx // .1 WMOPS --> USE //#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx // .2 WMOPS --> USE //#define FIX_1326_SPEEDUP_05 // div->sqrt =>isqrt // 3.5 WMOPS //Quite bad diffs --> DONT USE //#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt // 3.0 WMOPS //Quite bad diffs --> DONT USE #define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt // ? WMOPS //Big DIffs , no replacement of divSqrt , PIPELINE GREEN, --> USE #define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs, PIPELINE GREEN! -- > USE #define FIX_1326_SPEEDUP_09 // Relocate matrixMul //#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt // 0 WMOPS --> DONT USE //#define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs --> USE #define FIX_1326_SPEEDUP_09 // tiny speedup // .1 WMOPS #define FIX_1326_SPEEDUP_10 // tiny speedup // .1 WMOPS #define FIX_1326_SPEEDUP_11 // tiny speedup // .1 WMOPS //#define FIX_1326_SPEEDUP_12 // tiny speedup // <.1 WMOPS -->DONTUSE //#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS //#define FIX_1326_SPEEDUP_14 // Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; /*------------------------------------------------------------------------- Loading Loading @@ -2124,6 +2133,14 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( exp = sub( get_min_scalefactor( resultMtxRe_fx[0][0], resultMtxRe_fx[1][1] ), 2 ); tmp2 = L_add( L_shl( resultMtxRe_fx[0][0], exp ), L_shl( resultMtxRe_fx[1][1], exp ) ); q_tmp2 = add( q_res, exp ); #ifdef FIX_1326_SPEEDUP_11 { Word16 shift1 = s_max( 0, sub( q_tmp2, q_CrEne ) ); Word16 shift2 = s_max( 0, sub( q_CrEne, q_tmp2 ) ); realizedOutputEne_fx = L_add( L_shr( tmp1, shift2 ), L_shr( tmp2, shift1 ) ); q_realizedOutputEne = s_min( q_CrEne, q_tmp2 ); } #else IF( LT_16( q_CrEne, q_tmp2 ) ) { realizedOutputEne_fx = L_add( tmp1, L_shr( tmp2, sub( q_tmp2, q_CrEne ) ) ); Loading @@ -2136,7 +2153,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( q_realizedOutputEne = q_tmp2; move16(); } #endif exp = sub( get_min_scalefactor( hDiracDecBin->ChEneOut_fx[0][bin], hDiracDecBin->ChEneOut_fx[1][bin] ), 1 ); targetOutputEne_fx = L_add( L_shl( hDiracDecBin->ChEneOut_fx[0][bin], exp ), L_shl( hDiracDecBin->ChEneOut_fx[1][bin], exp ) ); q_targetOutputEne = add( hDiracDecBin->q_ChEneOut, exp ); Loading @@ -2159,15 +2176,26 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( q_missingOutputEne = q_targetOutputEne; move16(); } tmp1 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), missingOutputEne_fx, sub( 31, q_missingOutputEne ), &exp1 ); #ifdef FIX_1326_SPEEDUP_13 { Word16 exp_temp; tmp2 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); tmp2 = ISqrt32( tmp2, &exp_temp ); gain_fx = Mpy_32_32(tmp2, Sqrt32(tmp1, &exp1)); q_gain = sub( 31, add( exp_temp, exp1 ) ); } #else { tmp2 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), EPSILON_MANT, EPSILON_EXP, &exp2 ); tmp2 = ISqrt32( tmp2, &exp2 ); gain_fx = Mpy_32_32( tmp2, Sqrt32( tmp1, &exp1 ) ); q_gain = sub( 31, add( exp2, exp1 ) ); } gain_fx = Sqrt32( tmp2, &exp2 ); q_gain = sub( 31, exp2 ); #endif // 1073741824 = 4 in Q28 Loading Loading @@ -4415,17 +4443,11 @@ static void formulate2x2MixingMatrix_fx( eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D ); #ifdef FIX_1326_SPEEDUP_09 matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp ); #endif #else matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp ); eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D ); #ifdef FIX_1326_SPEEDUP_09 matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp ); #endif #endif pop_wmops(); /*push_wmops( "oPtoA MT1M" );*/ Loading Loading @@ -4512,7 +4534,19 @@ static void formulate2x2MixingMatrix_fx( div_fx[1] = L_shr( div_fx[1], sub( sub( 31, exp1 ), q_div ) ); // q_div move32(); // 1310720000 = 10,000.0f in Q17 #ifdef FIX_1326_SPEEDUP_09 { Word16 shift1 = s_max( sub( Q17, q_div ), 0 ); Word16 shift2 = s_max( sub( q_div, Q17 ), 0 ); div_fx[0] = L_min( L_shr( 1310720000, shift1 ), L_shr( div_fx[0], shift2 ) ); // q_div move32(); div_fx[1] = L_min( L_shr( 1310720000, shift1 ), L_shr( div_fx[1], shift2 ) ); // q_div move32(); } #else IF( LT_16( q_div, Q17 ) ) { div_fx[0] = L_min( L_shr( 1310720000, sub( Q17, q_div ) ), div_fx[0] ); // q_div Loading @@ -4529,10 +4563,9 @@ static void formulate2x2MixingMatrix_fx( q_div = Q17; move16(); } #endif #ifndef FIX_1326_SPEEDUP_09 matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp ); #endif exp = L_norm_arr( div_fx, BINAURAL_CHANNELS ); scale_sig32( div_fx, BINAURAL_CHANNELS, exp ); Loading @@ -4547,7 +4580,11 @@ static void formulate2x2MixingMatrix_fx( W_tmp = W_mult0_32_32( tmpRe_fx[chA][chB], div_fx[chB] ); IF( W_tmp != 0 ) { #ifdef FIX_1326_SPEEDUP_10 hdrm_re[chA][chB] = W_norm( W_tmp ); #else hdrm_re[chA][chB] = sub( W_norm( W_tmp ), 0 ); #endif move16(); W_tmp = W_shl( W_tmp, hdrm_re[chA][chB] ); tmpRe_fx[chA][chB] = W_extract_h( W_tmp ); Loading @@ -4564,7 +4601,11 @@ static void formulate2x2MixingMatrix_fx( W_tmp = W_mult0_32_32( tmpIm_fx[chA][chB], div_fx[chB] ); IF( W_tmp != 0 ) { #ifdef FIX_1326_SPEEDUP_10 hdrm_im[chA][chB] = W_norm( W_tmp ); #else hdrm_im[chA][chB] = sub( W_norm( W_tmp ), 0 ); #endif move16(); W_tmp = W_shl( W_tmp, hdrm_im[chA][chB] ); tmpIm_fx[chA][chB] = W_extract_h( W_tmp ); Loading