From 469ab4619d538f0df6037890fd6abfa8e8cadb48 Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Thu, 27 Feb 2025 10:39:39 +0100 Subject: [PATCH 01/41] - added some wmops push/pop, - added FIX_xxxx_SPEEDUP_00: not implemented, no bitstream - added FIX_xxxx_SPEEDUP_01: not implemented yet --- lib_com/basop_util.c | 6 +- lib_dec/ivas_jbm_dec_fx.c | 2 +- .../ivas_dirac_dec_binaural_functions_fx.c | 72 +++++++++++++++++-- 3 files changed, 74 insertions(+), 6 deletions(-) diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index b7ee35ab3..fa8d097df 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -1010,6 +1010,7 @@ Word32 div_w( Word32 L_num, Word32 L_den ) } } + Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) { Word32 z; @@ -1017,6 +1018,8 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) Word16 sy; Word32 sign; + //push_wmops( "BASOP_Util_Divide3232_Scale_cadence" ); + /* assert (x >= (Word32)0); */ assert( y != (Word32) 0 ); @@ -1038,6 +1041,7 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) IF( x == (Word32) 0 ) { *s = 0; + //pop_wmops(); return ( (Word32) 0 ); } @@ -1058,7 +1062,7 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) { z = L_negate( z ); } - + //pop_wmops(); return z; } diff --git a/lib_dec/ivas_jbm_dec_fx.c b/lib_dec/ivas_jbm_dec_fx.c index 1918ce3f3..ce60c0d65 100644 --- a/lib_dec/ivas_jbm_dec_fx.c +++ b/lib_dec/ivas_jbm_dec_fx.c @@ -1875,7 +1875,7 @@ ivas_error ivas_jbm_dec_render_fx( move16(); SPAT_PARAM_REND_COMMON_DATA_HANDLE hSpatParamRendCom; - push_wmops( "ivas_dec_render" ); + push_wmops( "ivas_dec_render (IDR)" ); /*----------------------------------------------------------------* * Initialization of local vars after struct has been set *----------------------------------------------------------------*/ diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index b5dd1f8b9..8153775b7 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -45,6 +45,9 @@ #include "wmc_auto.h" +//#define FIX_xxxx_SPEEDUP_00 //make sqrt(1) a const - catch bitstreams //no occurence in current bitstream +//#define FIX_xxxx_SPEEDUP_01 // optimize matrixTransp1Mul_fx -> eig2x2_fx : rollout loop in mul, only 3 out of 4 results are needed - maybe a=b can also benefitcui + Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; /*------------------------------------------------------------------------- @@ -504,8 +507,9 @@ void ivas_dirac_dec_binaural_render_fx( FOR( subframe_idx = first_sf; subframe_idx < last_sf; subframe_idx++ ) { Word16 n_samples_sf = imult1616( slot_size, hSpatParamRendCom->subframe_nbslots[subframe_idx] ); + push_wmops( "IDR binaural internal (IDRBI)" ); ivas_dirac_dec_binaural_internal_fx( st_ivas, st_ivas->hCombinedOrientationData, output_fx_local, nchan_transport, subframe_idx ); - + pop_wmops();/*push_wmops( "IDR binaural internal (IDRBI)" );*/ FOR( ch = 0; ch < nchan_out; ch++ ) { output_fx_local[ch] += n_samples_sf; @@ -708,6 +712,7 @@ static void ivas_dirac_dec_binaural_internal_fx( } } /* CLDFB Analysis of input */ + push_wmops( "IDRBI CLDFB ANALYSYS" ); FOR( slot = 0; slot < hSpatParamRendCom->subframe_nbslots[subframe]; slot++ ) { FOR( ch = 0; ch < numInChannels; ch++ ) @@ -857,6 +862,7 @@ static void ivas_dirac_dec_binaural_internal_fx( } } } + pop_wmops(); /*push_wmops( "IDRBI CLDFB ANALYSYS" );*/ test(); IF( EQ_32( config_data.ivas_format, SBA_FORMAT ) || EQ_32( config_data.ivas_format, SBA_ISM_FORMAT ) ) @@ -921,7 +927,9 @@ static void ivas_dirac_dec_binaural_internal_fx( } test(); + push_wmops( "IDRBI cov matrices" ); ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matrices_fx( hDiracDecBin, hSpatParamRendCom, &config_data, Cldfb_RealBuffer_in_fx, Cldfb_ImagBuffer_in_fx, Rmat_fx, subframe, hCombinedOrientationData && hCombinedOrientationData->enableCombinedOrientation[hCombinedOrientationData->subframe_idx] > 0, st_ivas->hMasaIsmData, q_inp ); + pop_wmops();/*push_wmops( "IDRBI cov matrices" );*/ IF( EQ_32( config_data.ivas_format, ISM_FORMAT ) ) { @@ -959,7 +967,9 @@ static void ivas_dirac_dec_binaural_internal_fx( move16(); } + push_wmops( "IDRBI proc matrices (IRDBI pm)" ); ivas_dirac_dec_binaural_determine_processing_matrices_fx( hDiracDecBin, hSpatParamRendCom, &config_data, max_band_decorr, Rmat_fx, subframe, hCombinedOrientationData && hCombinedOrientationData->enableCombinedOrientation[hCombinedOrientationData->subframe_idx] > 0, nchanSeparateChannels, st_ivas->hMasaIsmData ); + pop_wmops(); /*push_wmops( "IDRBI proc matrices (IRDBI pm)" );*/ q_inp = Q6; move16(); @@ -1005,8 +1015,10 @@ static void ivas_dirac_dec_binaural_internal_fx( hDiracDecBin->q_processMtxDecPrev = q_mat; move16(); + push_wmops( "IDRBI processOutput" ); ivas_dirac_dec_binaural_process_output_fx( hDiracDecBin, hSpatParamRendCom, st_ivas->cldfbSynDec, output_fx, &q_out, Cldfb_RealBuffer_in_fx, Cldfb_ImagBuffer_in_fx, q_inp, max_band_decorr, numInChannels, config_data.processReverb, subframe, q_mat ); - + pop_wmops(); /*push_wmops( "IDRBI processOutput" ); + */ hDiracDecBin->hDiffuseDist = NULL; hSpatParamRendCom->slots_rendered = add( hSpatParamRendCom->slots_rendered, hSpatParamRendCom->subframe_nbslots[subframe] ); @@ -1843,6 +1855,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( move16(); } + push_wmops( "IRDBI pm LOOP1 (IDRBI pm LOOP1)" ); FOR( bin = 0; bin < nBins; bin++ ) { Word32 tmpMtxRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], tmpMtxIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], resultMtxRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], resultMtxIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], gain_fx; @@ -1866,6 +1879,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( q_CrEne = Q31; move16(); + push_wmops( "IDRBI pm LOOP1 sec A (formulate2x2MixingMatrix)" ); IF( GT_16( hDiracDecBin->ChEne_e[0][bin], hDiracDecBin->ChEne_e[1][bin] ) ) { hDiracDecBin->ChEne_fx[1][bin] = L_shr( hDiracDecBin->ChEne_fx[1][bin], sub( hDiracDecBin->ChEne_e[0][bin], hDiracDecBin->ChEne_e[1][bin] ) ); @@ -1935,7 +1949,9 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( hDiracDecBin->ChCrossReOut_fx[bin], hDiracDecBin->ChCrossImOut_fx[bin], hDiracDecBin->q_ChCrossOut, prototypeMtx_fx, Mre_fx, Mim_fx, &q_M, hDiracDecBin->reqularizationFactor_fx ); + pop_wmops(); /*push_wmops( "IDRBI pm LOOP1 sec A (formulate2x2MixingMatrix)" );*/ + push_wmops( "IDRBI pm LOOP1 sec B" ); IF( LT_16( hDiracDecBin->q_ChEne, hDiracDecBin->q_ChCross ) ) { CxRe_fx[0][0] = hDiracDecBin->ChEne_fx[0][bin]; @@ -1989,9 +2005,13 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( #endif resultMtxRe_fx, resultMtxIm_fx, &q_res ); + pop_wmops(); /*push_wmops( "IDRBI pm LOOP1 sec B" );*/ + /* When below the frequency limit where decorrelation is applied, we inject the decorrelated * residual (or missing) signal component. The procedure is active when there are not enough independent * signal energy to synthesize a signal with the target covariance matrix from the non-decorrelated signals */ + + push_wmops( "IDRBI pm LOOP1 sec C" ); IF( LT_16( bin, max_band_decorr ) ) { Word32 decorrelationReductionFactor_fx; @@ -2107,7 +2127,9 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( q_Mdec = Q31; move16(); } + pop_wmops(); /*push_wmops( "IDRBI pm LOOP1 sec C" );*/ + push_wmops( "IDRBI pm LOOP1 sec D" ); /* The regularizations at determining mixing matrices cause signal energy to be lost to some degree, which is compensated for here */ tmp1 = L_add( CrEneL_fx, CrEneR_fx ); exp = sub( get_min_scalefactor( resultMtxRe_fx[0][0], resultMtxRe_fx[1][1] ), 2 ); @@ -2198,6 +2220,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( q_processMtxDec_bin = q_processMtxDec[bin]; move16(); move16(); + /* Store processing matrices */ FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { @@ -2232,7 +2255,10 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( move16(); q_processMtxDec[bin] = sub( q_Mdec, 16 ); move16(); + pop_wmops(); /*push_wmops( "IDRBI pm LOOP1 sec D" );*/ + + push_wmops( "IDRBI pm LOOP1 sec E" ); IF( separateCenterChannelRendering ) { /* The rendering of the separate center channel in masa + mono mode. @@ -2322,7 +2348,10 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( } } } + pop_wmops(); /*push_wmops( "IDRBI pm LOOP1 sec E" );*/ } + pop_wmops(); /*push_wmops( "IRDBI pm LOOP1 (IDRBI pm LOOP1)" );*/ + /* Aligning Q-factors of all bins in the processing matrices to a common Q-factor */ minimum_s( q_processMtx, nBins, &hDiracDecBin->q_processMtx ); minimum_s( q_processMtxPrev, nBins, &hDiracDecBin->q_processMtxPrev ); @@ -2342,6 +2371,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( minimum_s( q_processMtxDec, nBins, &hDiracDecBin->q_processMtxDec ); minimum_s( q_processMtxDecPrev, nBins, &hDiracDecBin->q_processMtxDecPrev ); + push_wmops( "IRDBI pm LOOP2" ); FOR( bin = 0; bin < nBins; bin++ ) { FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) @@ -2381,6 +2411,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( } } } + pop_wmops(); /*push_wmops( "IRDBI pm LOOP2" );*/ return; } @@ -4354,7 +4385,9 @@ static void formulate2x2MixingMatrix_fx( } ELSE { + push_wmops( "formulate2x2MixingMatrix Division" ); maxEneDiv_fx = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, maxEne_fx, &exp ); + pop_wmops();/*push_wmops( "formulate2x2MixingMatrix Division" )*/ q_maxEneDiv = add( sub( 31, exp ), sub( Q30, q_maxEne ) ); } exp = norm_l( maxEneDiv_fx ); @@ -4377,9 +4410,12 @@ static void formulate2x2MixingMatrix_fx( Cout_im = Mpy_32_32( Cout_im, maxEneDiv_fx ); q_cout = sub( add( q_cout, q_maxEneDiv ), 31 ); + push_wmops( "formulate2x2MixingMatrix cholesky" ); /* Cholesky decomposition of target / output covariance matrix */ chol2x2_fx( E_out1, E_out2, q_eout, Cout_re, Cout_im, q_cout, KyRe_fx, KyIm_fx, &q_ky ); + pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix cholesky" );*/ + push_wmops( "formulate2x2MixingMatrix Eigendecomp" ); /* Eigendecomposition of input covariance matrix */ eig2x2_fx( E_in1, E_in2, q_ein, Cin_re, Cin_im, q_cin, Uxre_fx, Uxim_fx, &q_Ux, Sx_fx, &q_Sx ); @@ -4397,7 +4433,9 @@ static void formulate2x2MixingMatrix_fx( move32(); matrixDiagMul_fx( Uxre_fx, Uxim_fx, q_Ux, Sx_fx, q_Sx, Kxre_fx, Kxim_fx, &q_Kx ); + pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Eigendecomp" );*/ + push_wmops( "formulate2x2MixingMatrix RegSMInv" ); /* Regularize the diagonal Sx for matrix inversion */ Sx_fx[0] = L_max( L_shr( Sx_fx[0], 1 ), Mpy_32_16_1( Sx_fx[1], regularizationFactor_fx ) ); Sx_fx[1] = L_max( L_shr( Sx_fx[1], 1 ), L_shl( Mpy_32_16_1( Sx_fx[0], regularizationFactor_fx ), 1 ) ); @@ -4432,8 +4470,9 @@ static void formulate2x2MixingMatrix_fx( ELSE { temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); - + push_wmops( "formulate2x2MixingMatrix Division" ); temp = BASOP_Util_Divide3232_Scale_cadence( E_out1, temp, &exp ); + pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/ exp = sub( exp, sub( q_eout, sub( 31, exp_temp ) ) ); #ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp @@ -4469,8 +4508,9 @@ static void formulate2x2MixingMatrix_fx( ELSE { temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); - + push_wmops( "formulate2x2MixingMatrix Division" ); temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, temp, &exp1 ); + pop_wmops();/*push_wmops( "formulate2x2MixingMatrix Division" )*/ exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) ); #ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 @@ -4487,7 +4527,9 @@ static void formulate2x2MixingMatrix_fx( move32(); Ghat_fx[1] = L_shr( Ghat_fx[1], sub( sub( 31, exp1 ), q_Ghat ) ); // q_Ghat move32(); + pop_wmops(); + push_wmops( "formulate2x2MixingMatrix MMUL K*Ghat*Q" ); /* Matrix multiplication, tmp = Ky' * G_hat * Q */ FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { @@ -4513,17 +4555,29 @@ static void formulate2x2MixingMatrix_fx( move32(); } } + pop_wmops();/*push_wmops( "formulate2x2MixingMatrix MMUL K*Ghat*Q" );*/ q_temp = sub( add( q_ky, q_GhatQ ), 31 ); + push_wmops( "formulate2x2MixingMatrix MMUL K*Ghat*Q*Kx" ); /* A = Ky' * G_hat * Q * Kx (see publication) */ matrixMul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Kxre_fx, Kxim_fx, &q_Kx, Are_fx, Aim_fx, &q_A ); + pop_wmops();/*push_wmops( "formulate2x2MixingMatrix MMUL K*Ghat*Q*Kx" );*/ + push_wmops( "formulate2x2MixingMatrix nrst orthonrm PtoA (oPtoA)" ); /* Find nearest orthonormal matrix P to A = Ky' * G_hat * Q * Kx For matrix A that is P = A(A'A)^0.5 */ + push_wmops( "oPtoA MT1M" ); +#ifdef FIX_xxxx_SPEEDUP_01 + matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp ); + + eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D ); +#else matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp ); eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D ); +#endif + pop_wmops();/*push_wmops( "oPtoA MT1M" );*/ IF( D_fx[0] == 0 ) { @@ -4537,8 +4591,10 @@ static void formulate2x2MixingMatrix_fx( } ELSE { + push_wmops( "formulate2x2MixingMatrix Division" ); temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp ); exp = sub( exp, sub( Q30, q_D ) ); + pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/ } div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp move32(); @@ -4555,7 +4611,9 @@ static void formulate2x2MixingMatrix_fx( } ELSE { + push_wmops( "formulate2x2MixingMatrix Division" ); temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[1], &exp1 ); + pop_wmops();/*push_wmops( "formulate2x2MixingMatrix Division" )*/ exp1 = sub( exp1, sub( Q30, q_D ) ); } div_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 @@ -4657,7 +4715,9 @@ static void formulate2x2MixingMatrix_fx( 0 /*int Bscale*/, #endif Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */ + pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix nrst orthonrm PtoA" );*/ + push_wmops( "formulate2x2MixingMatrix Ky P Kx^-1" ); /* These are the final formulas of the JAES publication M = Ky P Kx^(-1) */ #if ( BINAURAL_CHANNELS != 2 ) FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) @@ -4740,7 +4800,9 @@ static void formulate2x2MixingMatrix_fx( { Word16 Pre_shift, Pim_shift; temp = BASOP_Util_Add_Mant32Exp( Sx_fx[chB], sub( 31, q_Sx ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); + push_wmops( "formulate2x2MixingMatrix Division" ); temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, temp, &exp ); + pop_wmops();/*push_wmops( "formulate2x2MixingMatrix Division" )*/ q_temp = add( sub( sub( q_P, exp ), sub( 31, Q30 ) ), exp_temp ); Pre_shift = norm_l( Pre_fx[0][chB] ); @@ -4811,6 +4873,8 @@ static void formulate2x2MixingMatrix_fx( 0 /*int Bscale*/, #endif Mre_fx, Mim_fx, q_M ); + pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Ky P Kx^-1" );*/ + return; } -- GitLab From 679ca05f9e94d39c8df4ff0a4b107b9fd89782bc Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Thu, 27 Feb 2025 10:41:26 +0100 Subject: [PATCH 02/41] change names of macros from xxxx to 1326 --- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 8153775b7..716b16262 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -45,8 +45,8 @@ #include "wmc_auto.h" -//#define FIX_xxxx_SPEEDUP_00 //make sqrt(1) a const - catch bitstreams //no occurence in current bitstream -//#define FIX_xxxx_SPEEDUP_01 // optimize matrixTransp1Mul_fx -> eig2x2_fx : rollout loop in mul, only 3 out of 4 results are needed - maybe a=b can also benefitcui +//#define FIX_1326_SPEEDUP_00 //make sqrt(1) a const - catch bitstreams //no occurence in current bitstream +//#define FIX_1326_SPEEDUP_01 // optimize matrixTransp1Mul_fx -> eig2x2_fx : rollout loop in mul, only 3 out of 4 results are needed - maybe a=b can also benefitcui Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; @@ -4568,7 +4568,7 @@ static void formulate2x2MixingMatrix_fx( /* Find nearest orthonormal matrix P to A = Ky' * G_hat * Q * Kx For matrix A that is P = A(A'A)^0.5 */ push_wmops( "oPtoA MT1M" ); -#ifdef FIX_xxxx_SPEEDUP_01 +#ifdef FIX_1326_SPEEDUP_01 matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp ); eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D ); -- GitLab From 50fbdde0ea69c9e131293ad7a4abecf64498c847 Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Thu, 27 Feb 2025 16:56:36 +0100 Subject: [PATCH 03/41] added and activated FIX_1326_SPEEDUP_00 - 07 --- .../ivas_dirac_dec_binaural_functions_fx.c | 190 +++++++++++++++++- 1 file changed, 182 insertions(+), 8 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 716b16262..d1fb6c23c 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -46,8 +46,13 @@ #include "wmc_auto.h" //#define FIX_1326_SPEEDUP_00 //make sqrt(1) a const - catch bitstreams //no occurence in current bitstream -//#define FIX_1326_SPEEDUP_01 // optimize matrixTransp1Mul_fx -> eig2x2_fx : rollout loop in mul, only 3 out of 4 results are needed - maybe a=b can also benefitcui - +#define FIX_1326_SPEEDUP_01 // optimize matrixTransp1Mul_fx -> eig2x2_fx : .4 WMOPS +#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx // .3 WMOPS +#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx // .1 WMOPS +#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx // .2 WMOPS +#define FIX_1326_SPEEDUP_05 // div->sqrt =>isqrt // 3.5 WMOPS +#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt // 3.0 WMOPS +#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt // 2.8 WMOPS Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; /*------------------------------------------------------------------------- @@ -3280,6 +3285,19 @@ static void eig2x2_fx( /* Numeric case, when input is practically zeros */ // IF( D_fx[0] < EPSILON_FX ) +#ifdef FIX_1326_SPEEDUP_02 + IF ( LT_32( L_shl_sat( D_fx[0], sub( sub( 31, *q_D ), EPSILON_EXP ) ), EPSILON_MANT ) ) + { + Ure_fx[0][0] = ONE_IN_Q31; + move32(); + Ure_fx[1][1] = ONE_IN_Q31; + move32(); + *q_U = Q31; + move16(); + + return; + } +#else IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( D_fx[0], *q_D, EPSILON_MANT, EPSILON_EXP ), -1 ) ) { Ure_fx[0][0] = ONE_IN_Q31; @@ -3291,8 +3309,24 @@ static void eig2x2_fx( return; } +#endif /* Numeric case, when input is near an identity matrix with a gain */ +#ifdef FIX_1326_SPEEDUP_03 //178.932 + tmp1 = Mpy_32_32( 2147484, add_fx ); // 2147484 = 1e-3f in Q31 + + IF( LT_32( pm_fx, L_shl_sat(tmp1, sub(q_tmp1,q_tmp2) ) ) ) + { + Ure_fx[0][0] = ONE_IN_Q30; + move32(); + Ure_fx[1][1] = ONE_IN_Q30; + move32(); + *q_U = Q30; + move16(); + + return; + } +#else tmp1 = Mpy_32_32( 2147484, add_fx ); // 2147484 = 1e-3f in Q31 IF( LT_16( q_tmp1, q_tmp2 ) ) @@ -3323,6 +3357,7 @@ static void eig2x2_fx( return; } } +#endif q_U_1 = 0; q_U_2 = 0; @@ -3431,10 +3466,22 @@ static void eig2x2_fx( tmp2 = Mpy_32_32( s_fx, s_fx ); q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 ); + +#ifdef FIX_1326_SPEEDUP_04 + Word16 exp_tmp2; + Word32 eps_tmp; + + tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &exp_tmp2 ); + eps_tmp = L_shl_sat( epsilon_mant, sub( epsilon_exp, exp_tmp2 ) ); + + tmp3 = L_add( L_shr ( tmp2,1), L_shr(eps_tmp,1) ); // Add Epsilon if relevant + + exp_tmp3 = add(exp_tmp2 , 1); +#else tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 ); q_tmp2 = sub( 31, q_tmp2 ); - tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); +#endif #if 1 tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp ); @@ -4387,7 +4434,7 @@ static void formulate2x2MixingMatrix_fx( { push_wmops( "formulate2x2MixingMatrix Division" ); maxEneDiv_fx = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, maxEne_fx, &exp ); - pop_wmops();/*push_wmops( "formulate2x2MixingMatrix Division" )*/ + pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/ q_maxEneDiv = add( sub( 31, exp ), sub( Q30, q_maxEne ) ); } exp = norm_l( maxEneDiv_fx ); @@ -4468,6 +4515,28 @@ static void formulate2x2MixingMatrix_fx( #endif } ELSE +#ifdef FIX_1326_SPEEDUP_05 + { + Word16 shift = norm_l( temp ); + temp = L_add( L_shl( temp, sub( shift, 1 ) ), L_shl_sat( EPSILON_MANT, sub( sub( EPSILON_EXP, shift ), 1 ) ) ); + exp_temp = sub( 31, q_ein ); + if ( temp == 0 ) + { + exp_temp = EPSILON_EXP; + move32(); + } + if (temp == 0) + { + temp = EPSILON_MANT; + move32(); + } + temp = ISqrt32( temp , &exp_temp); + shift = sub( 31, q_eout ); + Ghat_fx[0] = Mpy_32_32( Sqrt32( E_out1, &shift ), temp ); + move32(); + exp = add( shift, exp_temp ); + } +#else { temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); push_wmops( "formulate2x2MixingMatrix Division" ); @@ -4478,6 +4547,7 @@ static void formulate2x2MixingMatrix_fx( Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp #endif } +#endif #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp #endif @@ -4506,16 +4576,36 @@ static void formulate2x2MixingMatrix_fx( #endif } ELSE +#ifdef FIX_1326_SPEEDUP_06 + { + Word16 shift = norm_l( temp ); + temp = L_add( L_shl( temp, sub( shift, 1 ) ), L_shl_sat( EPSILON_MANT, sub( sub( EPSILON_EXP, shift ), 1 ) ) ); + exp_temp = sub(31, q_ein); + if ( temp == 0 ) + { + exp_temp = add( 0, EPSILON_EXP ); + } + if (temp == 0) + { + temp = L_add( 0, EPSILON_MANT ); + } + temp = ISqrt32( temp, &exp_temp ); + shift = sub( 31, q_eout ); + Ghat_fx[1] = Mpy_32_32( temp, ISqrt32( E_out2, &shift ) ); + exp_temp = add( shift, exp_temp ); + } +#else { temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); push_wmops( "formulate2x2MixingMatrix Division" ); temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, temp, &exp1 ); - pop_wmops();/*push_wmops( "formulate2x2MixingMatrix Division" )*/ + pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/ exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) ); #ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 #endif } +#endif #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 #endif @@ -4555,21 +4645,86 @@ static void formulate2x2MixingMatrix_fx( move32(); } } - pop_wmops();/*push_wmops( "formulate2x2MixingMatrix MMUL K*Ghat*Q" );*/ + pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix MMUL K*Ghat*Q" );*/ q_temp = sub( add( q_ky, q_GhatQ ), 31 ); push_wmops( "formulate2x2MixingMatrix MMUL K*Ghat*Q*Kx" ); /* A = Ky' * G_hat * Q * Kx (see publication) */ matrixMul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Kxre_fx, Kxim_fx, &q_Kx, Are_fx, Aim_fx, &q_A ); - pop_wmops();/*push_wmops( "formulate2x2MixingMatrix MMUL K*Ghat*Q*Kx" );*/ + pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix MMUL K*Ghat*Q*Kx" );*/ push_wmops( "formulate2x2MixingMatrix nrst orthonrm PtoA (oPtoA)" ); /* Find nearest orthonormal matrix P to A = Ky' * G_hat * Q * Kx For matrix A that is P = A(A'A)^0.5 */ push_wmops( "oPtoA MT1M" ); #ifdef FIX_1326_SPEEDUP_01 - matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp ); + // matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp ); + + { + Word16 chA, chB; + { + chA = 0, chB = 0; + tmpRe_fx[0][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Are_fx[0][0] ), + Are_fx[1][0], Are_fx[1][0] ), + Aim_fx[0][0], Aim_fx[0][0] ), + Aim_fx[1][0], Aim_fx[1][0] ); + move32(); + } + { + chA = 0, chB = 1; + tmpRe_fx[1][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Are_fx[0][0] ), + Are_fx[1][1], Are_fx[1][0] ), + Aim_fx[0][1], Aim_fx[0][0] ), + Aim_fx[1][1], Aim_fx[1][0] ); + move32(); + tmpIm_fx[1][0] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Aim_fx[0][0] ), + Are_fx[1][1], Aim_fx[1][0] ), + Aim_fx[0][1], Are_fx[0][0] ), + Aim_fx[1][1], Are_fx[1][0] ); + move32(); + } + { + chA = 1, chB = 0; + tmpRe_fx[1][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Are_fx[0][1] ), + Are_fx[1][1], Are_fx[1][1] ), + Aim_fx[0][1], Aim_fx[0][1] ), + Aim_fx[1][1], Aim_fx[1][1] ); + move32(); + } + { + chA = 1, chB = 1; + } + + q_temp = sub( add( q_A, q_A ), 31 ); + + move16(); + Word16 ZeroState = add( 1, 0 ); + if (tmpRe_fx[0][0] != 0) + { + ZeroState = add(0, 0); + } + if ( tmpRe_fx[1][1] != 0 ) + { + ZeroState = add( 0, 0 ); + } + if ( tmpRe_fx[1][0] != 0 ) + { + ZeroState = add( 0, 0 ); + } + if ( tmpIm_fx[1][0] != 0 ) + { + ZeroState = add( 0, 0 ); + } + + if ( sub(ZeroState,1) == 0 ) + { + q_temp = Q31; + move16(); + } + + } + eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D ); #else @@ -4579,6 +4734,24 @@ static void formulate2x2MixingMatrix_fx( #endif pop_wmops();/*push_wmops( "oPtoA MT1M" );*/ +#ifdef FIX_1326_SPEEDUP_07 + IF( D_fx[0] == 0 ) + { + //temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */ + //exp = ONE_DIV_EPSILON_EXP; + div_fx[0] = L_add(0,2047986068); //Sqrt32( temp, &exp ); // Q = 31 - exp + exp = add(0,20); + } + ELSE + { + exp = sub( 31, q_D ); + div_fx[0] = ISqrt32( D_fx[0], &exp ); + //temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp ); + //exp = sub( exp, sub( Q30, q_D ) ); + //div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp + move32(); + } +#else IF( D_fx[0] == 0 ) { #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC @@ -4598,6 +4771,7 @@ static void formulate2x2MixingMatrix_fx( } div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp move32(); +#endif IF( D_fx[1] == 0 ) { -- GitLab From d12ec4121777398516e9a8bab2665f62e16c43c0 Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Tue, 4 Mar 2025 16:32:41 +0100 Subject: [PATCH 04/41] apply clang patch --- lib_com/basop_util.c | 6 +- .../ivas_dirac_dec_binaural_functions_fx.c | 139 +++++++++--------- 2 files changed, 72 insertions(+), 73 deletions(-) diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index fa8d097df..609ca234d 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -1018,7 +1018,7 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) Word16 sy; Word32 sign; - //push_wmops( "BASOP_Util_Divide3232_Scale_cadence" ); + // push_wmops( "BASOP_Util_Divide3232_Scale_cadence" ); /* assert (x >= (Word32)0); */ assert( y != (Word32) 0 ); @@ -1041,7 +1041,7 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) IF( x == (Word32) 0 ) { *s = 0; - //pop_wmops(); + // pop_wmops(); return ( (Word32) 0 ); } @@ -1062,7 +1062,7 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) { z = L_negate( z ); } - //pop_wmops(); + // pop_wmops(); return z; } diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index d1fb6c23c..577ee62f7 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -514,7 +514,7 @@ void ivas_dirac_dec_binaural_render_fx( Word16 n_samples_sf = imult1616( slot_size, hSpatParamRendCom->subframe_nbslots[subframe_idx] ); push_wmops( "IDR binaural internal (IDRBI)" ); ivas_dirac_dec_binaural_internal_fx( st_ivas, st_ivas->hCombinedOrientationData, output_fx_local, nchan_transport, subframe_idx ); - pop_wmops();/*push_wmops( "IDR binaural internal (IDRBI)" );*/ + pop_wmops(); /*push_wmops( "IDR binaural internal (IDRBI)" );*/ FOR( ch = 0; ch < nchan_out; ch++ ) { output_fx_local[ch] += n_samples_sf; @@ -934,7 +934,7 @@ static void ivas_dirac_dec_binaural_internal_fx( test(); push_wmops( "IDRBI cov matrices" ); ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matrices_fx( hDiracDecBin, hSpatParamRendCom, &config_data, Cldfb_RealBuffer_in_fx, Cldfb_ImagBuffer_in_fx, Rmat_fx, subframe, hCombinedOrientationData && hCombinedOrientationData->enableCombinedOrientation[hCombinedOrientationData->subframe_idx] > 0, st_ivas->hMasaIsmData, q_inp ); - pop_wmops();/*push_wmops( "IDRBI cov matrices" );*/ + pop_wmops(); /*push_wmops( "IDRBI cov matrices" );*/ IF( EQ_32( config_data.ivas_format, ISM_FORMAT ) ) { @@ -1023,7 +1023,7 @@ static void ivas_dirac_dec_binaural_internal_fx( push_wmops( "IDRBI processOutput" ); ivas_dirac_dec_binaural_process_output_fx( hDiracDecBin, hSpatParamRendCom, st_ivas->cldfbSynDec, output_fx, &q_out, Cldfb_RealBuffer_in_fx, Cldfb_ImagBuffer_in_fx, q_inp, max_band_decorr, numInChannels, config_data.processReverb, subframe, q_mat ); pop_wmops(); /*push_wmops( "IDRBI processOutput" ); - */ + */ hDiracDecBin->hDiffuseDist = NULL; hSpatParamRendCom->slots_rendered = add( hSpatParamRendCom->slots_rendered, hSpatParamRendCom->subframe_nbslots[subframe] ); @@ -3286,7 +3286,7 @@ static void eig2x2_fx( /* Numeric case, when input is practically zeros */ // IF( D_fx[0] < EPSILON_FX ) #ifdef FIX_1326_SPEEDUP_02 - IF ( LT_32( L_shl_sat( D_fx[0], sub( sub( 31, *q_D ), EPSILON_EXP ) ), EPSILON_MANT ) ) + IF( LT_32( L_shl_sat( D_fx[0], sub( sub( 31, *q_D ), EPSILON_EXP ) ), EPSILON_MANT ) ) { Ure_fx[0][0] = ONE_IN_Q31; move32(); @@ -3312,20 +3312,20 @@ static void eig2x2_fx( #endif /* Numeric case, when input is near an identity matrix with a gain */ -#ifdef FIX_1326_SPEEDUP_03 //178.932 +#ifdef FIX_1326_SPEEDUP_03 // 178.932 tmp1 = Mpy_32_32( 2147484, add_fx ); // 2147484 = 1e-3f in Q31 - IF( LT_32( pm_fx, L_shl_sat(tmp1, sub(q_tmp1,q_tmp2) ) ) ) - { - Ure_fx[0][0] = ONE_IN_Q30; - move32(); - Ure_fx[1][1] = ONE_IN_Q30; - move32(); - *q_U = Q30; - move16(); + IF( LT_32( pm_fx, L_shl_sat( tmp1, sub( q_tmp1, q_tmp2 ) ) ) ) + { + Ure_fx[0][0] = ONE_IN_Q30; + move32(); + Ure_fx[1][1] = ONE_IN_Q30; + move32(); + *q_U = Q30; + move16(); - return; - } + return; + } #else tmp1 = Mpy_32_32( 2147484, add_fx ); // 2147484 = 1e-3f in Q31 @@ -3470,13 +3470,13 @@ static void eig2x2_fx( #ifdef FIX_1326_SPEEDUP_04 Word16 exp_tmp2; Word32 eps_tmp; - + tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &exp_tmp2 ); eps_tmp = L_shl_sat( epsilon_mant, sub( epsilon_exp, exp_tmp2 ) ); - tmp3 = L_add( L_shr ( tmp2,1), L_shr(eps_tmp,1) ); // Add Epsilon if relevant + tmp3 = L_add( L_shr( tmp2, 1 ), L_shr( eps_tmp, 1 ) ); // Add Epsilon if relevant - exp_tmp3 = add(exp_tmp2 , 1); + exp_tmp3 = add( exp_tmp2, 1 ); #else tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 ); q_tmp2 = sub( 31, q_tmp2 ); @@ -4525,17 +4525,17 @@ static void formulate2x2MixingMatrix_fx( exp_temp = EPSILON_EXP; move32(); } - if (temp == 0) + if ( temp == 0 ) { temp = EPSILON_MANT; move32(); } - temp = ISqrt32( temp , &exp_temp); + temp = ISqrt32( temp, &exp_temp ); shift = sub( 31, q_eout ); Ghat_fx[0] = Mpy_32_32( Sqrt32( E_out1, &shift ), temp ); move32(); exp = add( shift, exp_temp ); - } + } #else { temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); @@ -4580,12 +4580,12 @@ static void formulate2x2MixingMatrix_fx( { Word16 shift = norm_l( temp ); temp = L_add( L_shl( temp, sub( shift, 1 ) ), L_shl_sat( EPSILON_MANT, sub( sub( EPSILON_EXP, shift ), 1 ) ) ); - exp_temp = sub(31, q_ein); + exp_temp = sub( 31, q_ein ); if ( temp == 0 ) { exp_temp = add( 0, EPSILON_EXP ); } - if (temp == 0) + if ( temp == 0 ) { temp = L_add( 0, EPSILON_MANT ); } @@ -4663,46 +4663,46 @@ static void formulate2x2MixingMatrix_fx( { Word16 chA, chB; - { + { chA = 0, chB = 0; - tmpRe_fx[0][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Are_fx[0][0] ), - Are_fx[1][0], Are_fx[1][0] ), - Aim_fx[0][0], Aim_fx[0][0] ), - Aim_fx[1][0], Aim_fx[1][0] ); - move32(); - } - { - chA = 0, chB = 1; - tmpRe_fx[1][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Are_fx[0][0] ), - Are_fx[1][1], Are_fx[1][0] ), - Aim_fx[0][1], Aim_fx[0][0] ), - Aim_fx[1][1], Aim_fx[1][0] ); - move32(); - tmpIm_fx[1][0] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Aim_fx[0][0] ), - Are_fx[1][1], Aim_fx[1][0] ), - Aim_fx[0][1], Are_fx[0][0] ), - Aim_fx[1][1], Are_fx[1][0] ); - move32(); - } - { - chA = 1, chB = 0; - tmpRe_fx[1][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Are_fx[0][1] ), - Are_fx[1][1], Are_fx[1][1] ), - Aim_fx[0][1], Aim_fx[0][1] ), - Aim_fx[1][1], Aim_fx[1][1] ); - move32(); - } - { - chA = 1, chB = 1; - } + tmpRe_fx[0][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Are_fx[0][0] ), + Are_fx[1][0], Are_fx[1][0] ), + Aim_fx[0][0], Aim_fx[0][0] ), + Aim_fx[1][0], Aim_fx[1][0] ); + move32(); + } + { + chA = 0, chB = 1; + tmpRe_fx[1][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Are_fx[0][0] ), + Are_fx[1][1], Are_fx[1][0] ), + Aim_fx[0][1], Aim_fx[0][0] ), + Aim_fx[1][1], Aim_fx[1][0] ); + move32(); + tmpIm_fx[1][0] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Aim_fx[0][0] ), + Are_fx[1][1], Aim_fx[1][0] ), + Aim_fx[0][1], Are_fx[0][0] ), + Aim_fx[1][1], Are_fx[1][0] ); + move32(); + } + { + chA = 1, chB = 0; + tmpRe_fx[1][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Are_fx[0][1] ), + Are_fx[1][1], Are_fx[1][1] ), + Aim_fx[0][1], Aim_fx[0][1] ), + Aim_fx[1][1], Aim_fx[1][1] ); + move32(); + } + { + chA = 1, chB = 1; + } q_temp = sub( add( q_A, q_A ), 31 ); move16(); Word16 ZeroState = add( 1, 0 ); - if (tmpRe_fx[0][0] != 0) + if ( tmpRe_fx[0][0] != 0 ) { - ZeroState = add(0, 0); + ZeroState = add( 0, 0 ); } if ( tmpRe_fx[1][1] != 0 ) { @@ -4717,12 +4717,11 @@ static void formulate2x2MixingMatrix_fx( ZeroState = add( 0, 0 ); } - if ( sub(ZeroState,1) == 0 ) + if ( sub( ZeroState, 1 ) == 0 ) { - q_temp = Q31; + q_temp = Q31; move16(); } - } @@ -4732,23 +4731,23 @@ static void formulate2x2MixingMatrix_fx( eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D ); #endif - pop_wmops();/*push_wmops( "oPtoA MT1M" );*/ + pop_wmops(); /*push_wmops( "oPtoA MT1M" );*/ #ifdef FIX_1326_SPEEDUP_07 IF( D_fx[0] == 0 ) { - //temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */ - //exp = ONE_DIV_EPSILON_EXP; - div_fx[0] = L_add(0,2047986068); //Sqrt32( temp, &exp ); // Q = 31 - exp - exp = add(0,20); + // temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */ + // exp = ONE_DIV_EPSILON_EXP; + div_fx[0] = L_add( 0, 2047986068 ); // Sqrt32( temp, &exp ); // Q = 31 - exp + exp = add( 0, 20 ); } ELSE { exp = sub( 31, q_D ); div_fx[0] = ISqrt32( D_fx[0], &exp ); - //temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp ); - //exp = sub( exp, sub( Q30, q_D ) ); - //div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp + // temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp ); + // exp = sub( exp, sub( Q30, q_D ) ); + // div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp move32(); } #else @@ -4787,7 +4786,7 @@ static void formulate2x2MixingMatrix_fx( { push_wmops( "formulate2x2MixingMatrix Division" ); temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[1], &exp1 ); - pop_wmops();/*push_wmops( "formulate2x2MixingMatrix Division" )*/ + pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/ exp1 = sub( exp1, sub( Q30, q_D ) ); } div_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 @@ -4889,7 +4888,7 @@ static void formulate2x2MixingMatrix_fx( 0 /*int Bscale*/, #endif Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */ - pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix nrst orthonrm PtoA" );*/ + pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix nrst orthonrm PtoA" );*/ push_wmops( "formulate2x2MixingMatrix Ky P Kx^-1" ); /* These are the final formulas of the JAES publication M = Ky P Kx^(-1) */ @@ -4976,7 +4975,7 @@ static void formulate2x2MixingMatrix_fx( temp = BASOP_Util_Add_Mant32Exp( Sx_fx[chB], sub( 31, q_Sx ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); push_wmops( "formulate2x2MixingMatrix Division" ); temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, temp, &exp ); - pop_wmops();/*push_wmops( "formulate2x2MixingMatrix Division" )*/ + pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/ q_temp = add( sub( sub( q_P, exp ), sub( 31, Q30 ) ), exp_temp ); Pre_shift = norm_l( Pre_fx[0][chB] ); -- GitLab From f12a124db1300a8f344276a4b674747fa464a264 Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Tue, 4 Mar 2025 17:04:07 +0100 Subject: [PATCH 05/41] added FIX_1326_SPEEDUP_08 --- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 577ee62f7..2dac2b867 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -53,6 +53,7 @@ #define FIX_1326_SPEEDUP_05 // div->sqrt =>isqrt // 3.5 WMOPS #define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt // 3.0 WMOPS #define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt // 2.8 WMOPS +#define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; /*------------------------------------------------------------------------- @@ -4772,6 +4773,20 @@ static void formulate2x2MixingMatrix_fx( move32(); #endif +#ifdef FIX_1326_SPEEDUP_08 + // This is just a shortcut to already existing optimizations (FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC) - but makes everything even faster + { + div_fx[1] = L_add( 0, 2047986068 ); // Q = 31 - exp1 + exp1 = add( 0, 20 ); // move32(); + } + + IF( D_fx[1] != 0 ) // This is the new code: replace div sqrt by isqrt + { + exp1 = sub( 31, q_D ); + div_fx[1] = ISqrt32( D_fx[1], &exp1 ); + move32(); + } +#else IF( D_fx[1] == 0 ) { #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC @@ -4791,7 +4806,7 @@ static void formulate2x2MixingMatrix_fx( } div_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 move32(); - +#endif q_div = sub( 31, s_max( exp, exp1 ) ); div_fx[0] = L_shr( div_fx[0], sub( sub( 31, exp ), q_div ) ); // q_div -- GitLab From 56305ddde36fee4fe6ecfb9e50494117be5a6c74 Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Tue, 4 Mar 2025 17:07:26 +0100 Subject: [PATCH 06/41] fixed warning --- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 2dac2b867..7c243d7d8 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -3770,6 +3770,7 @@ static void matrixMul_fx( return; } +#ifndef FIX_1326_SPEEDUP_01 static void matrixTransp1Mul_fx( Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ @@ -3883,6 +3884,7 @@ static void matrixTransp1Mul_fx( return; } +#endif /*FIX_1326_SPEEDUP_01*/ static void matrixTransp2Mul_fx( Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ @@ -4663,7 +4665,7 @@ static void formulate2x2MixingMatrix_fx( // matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp ); { - Word16 chA, chB; + //Word16 chA, chB; { chA = 0, chB = 0; tmpRe_fx[0][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Are_fx[0][0] ), @@ -4673,7 +4675,7 @@ static void formulate2x2MixingMatrix_fx( move32(); } { - chA = 0, chB = 1; + //chA = 0, chB = 1; tmpRe_fx[1][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Are_fx[0][0] ), Are_fx[1][1], Are_fx[1][0] ), Aim_fx[0][1], Aim_fx[0][0] ), @@ -4686,7 +4688,7 @@ static void formulate2x2MixingMatrix_fx( move32(); } { - chA = 1, chB = 0; + //chA = 1, chB = 0; tmpRe_fx[1][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Are_fx[0][1] ), Are_fx[1][1], Are_fx[1][1] ), Aim_fx[0][1], Aim_fx[0][1] ), @@ -4694,7 +4696,7 @@ static void formulate2x2MixingMatrix_fx( move32(); } { - chA = 1, chB = 1; + //chA = 1, chB = 1; } q_temp = sub( add( q_A, q_A ), 31 ); -- GitLab From 4b66449d1b749bc14b31ea0e1ef9a1715ef29613 Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Tue, 4 Mar 2025 17:11:02 +0100 Subject: [PATCH 07/41] apply clang patch --- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 7c243d7d8..1544b4f1a 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -4665,7 +4665,7 @@ static void formulate2x2MixingMatrix_fx( // matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp ); { - //Word16 chA, chB; + // Word16 chA, chB; { chA = 0, chB = 0; tmpRe_fx[0][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Are_fx[0][0] ), @@ -4675,7 +4675,7 @@ static void formulate2x2MixingMatrix_fx( move32(); } { - //chA = 0, chB = 1; + // chA = 0, chB = 1; tmpRe_fx[1][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Are_fx[0][0] ), Are_fx[1][1], Are_fx[1][0] ), Aim_fx[0][1], Aim_fx[0][0] ), @@ -4688,7 +4688,7 @@ static void formulate2x2MixingMatrix_fx( move32(); } { - //chA = 1, chB = 0; + // chA = 1, chB = 0; tmpRe_fx[1][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Are_fx[0][1] ), Are_fx[1][1], Are_fx[1][1] ), Aim_fx[0][1], Aim_fx[0][1] ), @@ -4696,7 +4696,7 @@ static void formulate2x2MixingMatrix_fx( move32(); } { - //chA = 1, chB = 1; + // chA = 1, chB = 1; } q_temp = sub( add( q_A, q_A ), 31 ); -- GitLab From 03b77f5ce70483c1024a794f419882ab4a15a1bb Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Wed, 5 Mar 2025 09:26:29 +0100 Subject: [PATCH 08/41] activated SPEEDUP8 --- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 1544b4f1a..f024f53ba 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -46,14 +46,14 @@ #include "wmc_auto.h" //#define FIX_1326_SPEEDUP_00 //make sqrt(1) a const - catch bitstreams //no occurence in current bitstream -#define FIX_1326_SPEEDUP_01 // optimize matrixTransp1Mul_fx -> eig2x2_fx : .4 WMOPS -#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx // .3 WMOPS -#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx // .1 WMOPS -#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx // .2 WMOPS -#define FIX_1326_SPEEDUP_05 // div->sqrt =>isqrt // 3.5 WMOPS -#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt // 3.0 WMOPS -#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt // 2.8 WMOPS -#define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS +//#define FIX_1326_SPEEDUP_01 // optimize matrixTransp1Mul_fx -> eig2x2_fx : .4 WMOPS +//#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx // .3 WMOPS +//#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx // .1 WMOPS +//#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx // .2 WMOPS +//#define FIX_1326_SPEEDUP_05 // div->sqrt =>isqrt // 3.5 WMOPS //Quite bad diffs +//#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt // 3.0 WMOPS //Quite bad diffs +//#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt // 2.8 WMOPS //Big DIffs +#define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs! Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; /*------------------------------------------------------------------------- -- GitLab From 86520445e6af996f9ead2625174d24694c6d8ae8 Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Wed, 5 Mar 2025 11:13:06 +0100 Subject: [PATCH 09/41] activate SPEEDUP 07 small version --- .../ivas_dirac_dec_binaural_functions_fx.c | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index f024f53ba..c452437f1 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -52,8 +52,8 @@ //#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx // .2 WMOPS //#define FIX_1326_SPEEDUP_05 // div->sqrt =>isqrt // 3.5 WMOPS //Quite bad diffs //#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt // 3.0 WMOPS //Quite bad diffs -//#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt // 2.8 WMOPS //Big DIffs -#define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs! +#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt // 2.8 WMOPS //Big DIffs , no replacement of divSqrt +#define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs, PIPELINE GREEN! Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; /*------------------------------------------------------------------------- @@ -4746,12 +4746,18 @@ static void formulate2x2MixingMatrix_fx( } ELSE { - exp = sub( 31, q_D ); - div_fx[0] = ISqrt32( D_fx[0], &exp ); - // temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp ); - // exp = sub( exp, sub( Q30, q_D ) ); - // div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp +#if 1 //old code + push_wmops( "formulate2x2MixingMatrix Division" ); + temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp ); + exp = sub( exp, sub( Q30, q_D ) ); + pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/ + div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp + move32(); +#else + exp = sub(31, q_D); + div_fx[0] = ISqrt32_2( D_fx[0], &exp ); move32(); +#endif } #else IF( D_fx[0] == 0 ) -- GitLab From 12809f5b6359704415a114ddde609e193c899283 Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Wed, 5 Mar 2025 11:18:14 +0100 Subject: [PATCH 10/41] apply clang format patch --- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index c452437f1..1e37860c9 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -4746,18 +4746,18 @@ static void formulate2x2MixingMatrix_fx( } ELSE { -#if 1 //old code +#if 1 // old code push_wmops( "formulate2x2MixingMatrix Division" ); temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp ); exp = sub( exp, sub( Q30, q_D ) ); - pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/ + pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/ div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp move32(); #else - exp = sub(31, q_D); + exp = sub( 31, q_D ); div_fx[0] = ISqrt32_2( D_fx[0], &exp ); move32(); -#endif +#endif } #else IF( D_fx[0] == 0 ) -- GitLab From 9028c88021ef93cc2cfadb421f1db6be435a788c Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Wed, 5 Mar 2025 12:22:09 +0100 Subject: [PATCH 11/41] activated speedup 01 02 03 04 --- .../ivas_dirac_dec_binaural_functions_fx.c | 44 ++++++++++++++----- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 1e37860c9..06b5be3ee 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -46,14 +46,14 @@ #include "wmc_auto.h" //#define FIX_1326_SPEEDUP_00 //make sqrt(1) a const - catch bitstreams //no occurence in current bitstream -//#define FIX_1326_SPEEDUP_01 // optimize matrixTransp1Mul_fx -> eig2x2_fx : .4 WMOPS -//#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx // .3 WMOPS -//#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx // .1 WMOPS -//#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx // .2 WMOPS -//#define FIX_1326_SPEEDUP_05 // div->sqrt =>isqrt // 3.5 WMOPS //Quite bad diffs -//#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt // 3.0 WMOPS //Quite bad diffs -#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt // 2.8 WMOPS //Big DIffs , no replacement of divSqrt -#define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs, PIPELINE GREEN! +#define FIX_1326_SPEEDUP_01 // optimize matrixTransp1Mul_fx -> eig2x2_fx : .4 WMOPS +#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx // .3 WMOPS +#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx // .1 WMOPS +#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx // .2 WMOPS +//#define FIX_1326_SPEEDUP_05 // div->sqrt =>isqrt // 3.5 WMOPS //Quite bad diffs --> DONT USE +//#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt // 3.0 WMOPS //Quite bad diffs -- > DONT USE +#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt // ? WMOPS //Big DIffs , no replacement of divSqrt , PIPELINE GREEN, --> USE +#define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs, PIPELINE GREEN! -- > USE Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; /*------------------------------------------------------------------------- @@ -4521,8 +4521,12 @@ static void formulate2x2MixingMatrix_fx( #ifdef FIX_1326_SPEEDUP_05 { Word16 shift = norm_l( temp ); +#if 1 // oldcode + temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); +#else + temp = L_add( L_shl( temp, sub( shift, 1 ) ), L_shl_sat( EPSILON_MANT, sub( sub( EPSILON_EXP, shift ), 1 ) ) ); - exp_temp = sub( 31, q_ein ); + exp_temp = sub( 30, q_ein ); if ( temp == 0 ) { exp_temp = EPSILON_EXP; @@ -4533,6 +4537,7 @@ static void formulate2x2MixingMatrix_fx( temp = EPSILON_MANT; move32(); } +#endif temp = ISqrt32( temp, &exp_temp ); shift = sub( 31, q_eout ); Ghat_fx[0] = Mpy_32_32( Sqrt32( E_out1, &shift ), temp ); @@ -4582,8 +4587,11 @@ static void formulate2x2MixingMatrix_fx( #ifdef FIX_1326_SPEEDUP_06 { Word16 shift = norm_l( temp ); +#if 0 //oldcode + temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); +#else temp = L_add( L_shl( temp, sub( shift, 1 ) ), L_shl_sat( EPSILON_MANT, sub( sub( EPSILON_EXP, shift ), 1 ) ) ); - exp_temp = sub( 31, q_ein ); + exp_temp = sub( 31 - 1, q_ein ); if ( temp == 0 ) { exp_temp = add( 0, EPSILON_EXP ); @@ -4592,10 +4600,22 @@ static void formulate2x2MixingMatrix_fx( { temp = L_add( 0, EPSILON_MANT ); } +#endif +#if 1 //oldcode - new code introduces too much noise + push_wmops( "formulate2x2MixingMatrix Division" ); + temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, temp, &exp1 ); + pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/ + exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) ); +#ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC + Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 +#endif +#else temp = ISqrt32( temp, &exp_temp ); shift = sub( 31, q_eout ); - Ghat_fx[1] = Mpy_32_32( temp, ISqrt32( E_out2, &shift ) ); - exp_temp = add( shift, exp_temp ); + Ghat_fx[1] = Mpy_32_32( temp, Sqrt32( E_out2, &shift ) ); + exp1 = add( shift, exp_temp ); +#endif + } #else { -- GitLab From 90d2563bf00e217b994ad64a27421f37adb0b628 Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Wed, 5 Mar 2025 12:35:34 +0100 Subject: [PATCH 12/41] apply clang format patch --- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 06b5be3ee..5fcc8a2d1 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -54,6 +54,7 @@ //#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt // 3.0 WMOPS //Quite bad diffs -- > DONT USE #define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt // ? WMOPS //Big DIffs , no replacement of divSqrt , PIPELINE GREEN, --> USE #define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs, PIPELINE GREEN! -- > USE +#define FIX_1326_SPEEDUP_09 // Relocate matrixMul Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; /*------------------------------------------------------------------------- @@ -4587,7 +4588,7 @@ static void formulate2x2MixingMatrix_fx( #ifdef FIX_1326_SPEEDUP_06 { Word16 shift = norm_l( temp ); -#if 0 //oldcode +#if 0 // oldcode temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); #else temp = L_add( L_shl( temp, sub( shift, 1 ) ), L_shl_sat( EPSILON_MANT, sub( sub( EPSILON_EXP, shift ), 1 ) ) ); @@ -4601,7 +4602,7 @@ static void formulate2x2MixingMatrix_fx( temp = L_add( 0, EPSILON_MANT ); } #endif -#if 1 //oldcode - new code introduces too much noise +#if 1 // oldcode - new code introduces too much noise push_wmops( "formulate2x2MixingMatrix Division" ); temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, temp, &exp1 ); pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/ @@ -4614,8 +4615,7 @@ static void formulate2x2MixingMatrix_fx( shift = sub( 31, q_eout ); Ghat_fx[1] = Mpy_32_32( temp, Sqrt32( E_out2, &shift ) ); exp1 = add( shift, exp_temp ); -#endif - +#endif } #else { @@ -4749,10 +4749,17 @@ static void formulate2x2MixingMatrix_fx( eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D ); +#ifdef FIX_1326_SPEEDUP_09 + matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp ); +#endif #else matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp ); eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D ); + +#ifdef FIX_1326_SPEEDUP_09 + matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp ); +#endif #endif pop_wmops(); /*push_wmops( "oPtoA MT1M" );*/ @@ -4860,7 +4867,9 @@ static void formulate2x2MixingMatrix_fx( move16(); } +#ifndef FIX_1326_SPEEDUP_09 matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp ); +#endif exp = L_norm_arr( div_fx, BINAURAL_CHANNELS ); scale_sig32( div_fx, BINAURAL_CHANNELS, exp ); -- GitLab From 33a98171ada436d22c6f538c5ad3dbefd93bebb0 Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Wed, 5 Mar 2025 14:30:57 +0100 Subject: [PATCH 13/41] activate speedup 09 10 11 for testing --- .../ivas_dirac_dec_binaural_functions_fx.c | 80 ++++++++++++++----- 1 file changed, 60 insertions(+), 20 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 5fcc8a2d1..e99cb6b34 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -45,16 +45,25 @@ #include "wmc_auto.h" -//#define FIX_1326_SPEEDUP_00 //make sqrt(1) a const - catch bitstreams //no occurence in current bitstream -#define FIX_1326_SPEEDUP_01 // optimize matrixTransp1Mul_fx -> eig2x2_fx : .4 WMOPS -#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx // .3 WMOPS -#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx // .1 WMOPS -#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx // .2 WMOPS -//#define FIX_1326_SPEEDUP_05 // div->sqrt =>isqrt // 3.5 WMOPS //Quite bad diffs --> DONT USE -//#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt // 3.0 WMOPS //Quite bad diffs -- > DONT USE -#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt // ? WMOPS //Big DIffs , no replacement of divSqrt , PIPELINE GREEN, --> USE -#define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs, PIPELINE GREEN! -- > USE -#define FIX_1326_SPEEDUP_09 // Relocate matrixMul +// MHZ NUMBERS: +// NULL: 179.292 + + +//#define FIX_1326_SPEEDUP_00 //make sqrt(1) a const - catch bitstreams //no occurence --> DONT USE +//#define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx // .4 WMOPS --> USE +//#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx // .3 WMOPS --> USE +//#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx // .1 WMOPS --> USE +//#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx // .2 WMOPS --> USE +//#define FIX_1326_SPEEDUP_05 // div->sqrt =>isqrt // 3.5 WMOPS //Quite bad diffs --> DONT USE +//#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt // 3.0 WMOPS //Quite bad diffs --> DONT USE +//#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt // 0 WMOPS --> DONT USE +//#define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs --> USE +#define FIX_1326_SPEEDUP_09 // tiny speedup // .1 WMOPS +#define FIX_1326_SPEEDUP_10 // tiny speedup // .1 WMOPS +#define FIX_1326_SPEEDUP_11 // tiny speedup // .1 WMOPS +//#define FIX_1326_SPEEDUP_12 // tiny speedup // <.1 WMOPS -->DONTUSE +//#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS +//#define FIX_1326_SPEEDUP_14 // Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; /*------------------------------------------------------------------------- @@ -2142,6 +2151,14 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( exp = sub( get_min_scalefactor( resultMtxRe_fx[0][0], resultMtxRe_fx[1][1] ), 2 ); tmp2 = L_add( L_shl( resultMtxRe_fx[0][0], exp ), L_shl( resultMtxRe_fx[1][1], exp ) ); q_tmp2 = add( q_res, exp ); +#ifdef FIX_1326_SPEEDUP_11 + { + Word16 shift1 = s_max( 0, sub( q_tmp2, q_CrEne ) ); + Word16 shift2 = s_max( 0, sub( q_CrEne, q_tmp2 ) ); + realizedOutputEne_fx = L_add( L_shr( tmp1, shift2 ), L_shr( tmp2, shift1 ) ); + q_realizedOutputEne = s_min( q_CrEne, q_tmp2 ); + } +#else IF( LT_16( q_CrEne, q_tmp2 ) ) { realizedOutputEne_fx = L_add( tmp1, L_shr( tmp2, sub( q_tmp2, q_CrEne ) ) ); @@ -2154,7 +2171,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( q_realizedOutputEne = q_tmp2; move16(); } - +#endif exp = sub( get_min_scalefactor( hDiracDecBin->ChEneOut_fx[0][bin], hDiracDecBin->ChEneOut_fx[1][bin] ), 1 ); targetOutputEne_fx = L_add( L_shl( hDiracDecBin->ChEneOut_fx[0][bin], exp ), L_shl( hDiracDecBin->ChEneOut_fx[1][bin], exp ) ); q_targetOutputEne = add( hDiracDecBin->q_ChEneOut, exp ); @@ -2177,9 +2194,17 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( q_missingOutputEne = q_targetOutputEne; move16(); } - tmp1 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), missingOutputEne_fx, sub( 31, q_missingOutputEne ), &exp1 ); +#ifdef FIX_1326_SPEEDUP_13 + { + Word16 exp_temp; + tmp2 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); + tmp2 = ISqrt32( tmp2, &exp_temp ); + gain_fx = Mpy_32_32(tmp2, Sqrt32(tmp1, &exp1)); + q_gain = sub( 31, add( exp_temp, exp1 ) ); + } +#else { Word16 exp_temp; tmp2 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); @@ -2188,6 +2213,8 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( } gain_fx = Sqrt32( tmp2, &exp2 ); q_gain = sub( 31, exp2 ); +#endif + // 1073741824 = 4 in Q28 IF( LT_16( q_gain, Q28 ) ) @@ -4749,17 +4776,11 @@ static void formulate2x2MixingMatrix_fx( eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D ); -#ifdef FIX_1326_SPEEDUP_09 - matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp ); -#endif #else matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp ); eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D ); -#ifdef FIX_1326_SPEEDUP_09 - matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp ); -#endif #endif pop_wmops(); /*push_wmops( "oPtoA MT1M" );*/ @@ -4849,7 +4870,19 @@ static void formulate2x2MixingMatrix_fx( div_fx[1] = L_shr( div_fx[1], sub( sub( 31, exp1 ), q_div ) ); // q_div move32(); + // 1310720000 = 10,000.0f in Q17 +#ifdef FIX_1326_SPEEDUP_09 + { + Word16 shift1 = s_max( sub( Q17, q_div ), 0 ); + Word16 shift2 = s_max( sub( q_div, Q17 ), 0 ); + + div_fx[0] = L_min( L_shr( 1310720000, shift1 ), L_shr( div_fx[0], shift2 ) ); // q_div + move32(); + div_fx[1] = L_min( L_shr( 1310720000, shift1 ), L_shr( div_fx[1], shift2 ) ); // q_div + move32(); + } +#else IF( LT_16( q_div, Q17 ) ) { div_fx[0] = L_min( L_shr( 1310720000, sub( Q17, q_div ) ), div_fx[0] ); // q_div @@ -4866,10 +4899,9 @@ static void formulate2x2MixingMatrix_fx( q_div = Q17; move16(); } +#endif -#ifndef FIX_1326_SPEEDUP_09 matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp ); -#endif exp = L_norm_arr( div_fx, BINAURAL_CHANNELS ); scale_sig32( div_fx, BINAURAL_CHANNELS, exp ); @@ -4884,7 +4916,11 @@ static void formulate2x2MixingMatrix_fx( W_tmp = W_mult0_32_32( tmpRe_fx[chA][chB], div_fx[chB] ); IF( W_tmp != 0 ) { +#ifdef FIX_1326_SPEEDUP_10 + hdrm_re[chA][chB] = W_norm( W_tmp ); +#else hdrm_re[chA][chB] = sub( W_norm( W_tmp ), 0 ); +#endif move16(); W_tmp = W_shl( W_tmp, hdrm_re[chA][chB] ); tmpRe_fx[chA][chB] = W_extract_h( W_tmp ); @@ -4901,7 +4937,11 @@ static void formulate2x2MixingMatrix_fx( W_tmp = W_mult0_32_32( tmpIm_fx[chA][chB], div_fx[chB] ); IF( W_tmp != 0 ) { +#ifdef FIX_1326_SPEEDUP_10 + hdrm_im[chA][chB] = W_norm( W_tmp ); +#else hdrm_im[chA][chB] = sub( W_norm( W_tmp ), 0 ); +#endif move16(); W_tmp = W_shl( W_tmp, hdrm_im[chA][chB] ); tmpIm_fx[chA][chB] = W_extract_h( W_tmp ); -- GitLab From 1a23f8d8deb4ce90aeac714fd274f3fa6c380f16 Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Wed, 5 Mar 2025 14:56:15 +0100 Subject: [PATCH 14/41] apply clang format patch --- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index e99cb6b34..2336151e0 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -58,12 +58,12 @@ //#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt // 3.0 WMOPS //Quite bad diffs --> DONT USE //#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt // 0 WMOPS --> DONT USE //#define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs --> USE -#define FIX_1326_SPEEDUP_09 // tiny speedup // .1 WMOPS -#define FIX_1326_SPEEDUP_10 // tiny speedup // .1 WMOPS -#define FIX_1326_SPEEDUP_11 // tiny speedup // .1 WMOPS +#define FIX_1326_SPEEDUP_09 // tiny speedup // .1 WMOPS +#define FIX_1326_SPEEDUP_10 // tiny speedup // .1 WMOPS +#define FIX_1326_SPEEDUP_11 // tiny speedup // .1 WMOPS //#define FIX_1326_SPEEDUP_12 // tiny speedup // <.1 WMOPS -->DONTUSE //#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS -//#define FIX_1326_SPEEDUP_14 // +//#define FIX_1326_SPEEDUP_14 // Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; /*------------------------------------------------------------------------- @@ -2201,7 +2201,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( Word16 exp_temp; tmp2 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); tmp2 = ISqrt32( tmp2, &exp_temp ); - gain_fx = Mpy_32_32(tmp2, Sqrt32(tmp1, &exp1)); + gain_fx = Mpy_32_32( tmp2, Sqrt32( tmp1, &exp1 ) ); q_gain = sub( 31, add( exp_temp, exp1 ) ); } #else @@ -2214,7 +2214,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( gain_fx = Sqrt32( tmp2, &exp2 ); q_gain = sub( 31, exp2 ); #endif - + // 1073741824 = 4 in Q28 IF( LT_16( q_gain, Q28 ) ) -- GitLab From c8151a8382939a33af975c6c5bf7e192030fcc10 Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Wed, 5 Mar 2025 15:33:23 +0100 Subject: [PATCH 15/41] added assert testing --- .../ivas_dirac_dec_binaural_functions_fx.c | 34 +++++++++++++------ 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 2336151e0..cc6d977f0 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -58,12 +58,12 @@ //#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt // 3.0 WMOPS //Quite bad diffs --> DONT USE //#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt // 0 WMOPS --> DONT USE //#define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs --> USE -#define FIX_1326_SPEEDUP_09 // tiny speedup // .1 WMOPS -#define FIX_1326_SPEEDUP_10 // tiny speedup // .1 WMOPS -#define FIX_1326_SPEEDUP_11 // tiny speedup // .1 WMOPS -//#define FIX_1326_SPEEDUP_12 // tiny speedup // <.1 WMOPS -->DONTUSE -//#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS -//#define FIX_1326_SPEEDUP_14 // +//#define FIX_1326_SPEEDUP_09 // tiny speedup // .1 WMOPS --> USE? (pipe 48851 fails --> DONTUSEYET) +//#define FIX_1326_SPEEDUP_10 // tiny speedup // .1 WMOPS --> USE? (pipe 48851 fails --> DONTUSEYET) +//#define FIX_1326_SPEEDUP_11 // tiny speedup // .1 WMOPS --> USE? (pipe 48851 fails --> DONTUSEYET) +//#define FIX_1326_SPEEDUP_12 // tiny speedup // <.1 WMOPS -->DONTUSE +//#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS -->USE? (pipe coming) +#define FIX_1326_SPEEDUP_14 // test Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; /*------------------------------------------------------------------------- @@ -3213,8 +3213,14 @@ static void eig2x2_fx( pm_fx = 0.5f * sqrtf(max(0.0f, a_fx)) add_fx = 0.5f * (e1 + e2)*/ + #ifdef FIX_1326_SPEEDUP_14 + static int tstcnt = 0; + #endif IF( L_and( c_re == 0, c_im == 0 ) ) { +#ifdef FIX_1326_SPEEDUP_14 + tstcnt ++; +#endif /* if c_re = 0 and c_im = 0, then crossSquare_fx = (c_re * c_re) + (c_im * c_im) = 0 a_fx = (E1 - E2)^2 pm_fx = 0.5 * sqrt(max(0, a_fx)) = 0.5 * max(0, (e1 - e2)) */ @@ -3232,6 +3238,9 @@ static void eig2x2_fx( q_crossSquare = sub( add( q_c, q_c ), 31 ); IF( EQ_32( e1, e2 ) ) { +#ifdef FIX_1326_SPEEDUP_14 + tstcnt++; +#endif /* if e1 - e2 = 0, then a_fx = 4 * crossSquare_fx pm_fx = 0.5 * sqrt(max(0, 4 * crossSquare_fx)) = sqrt(0, crossSquare_fx)*/ test(); @@ -3265,6 +3274,9 @@ static void eig2x2_fx( IF( GT_16( sub( q_c, q_e ), Q15 ) ) { +#ifdef FIX_1326_SPEEDUP_14 + tstcnt++; +#endif pm_fx = L_shr( L_max( 0, L_abs( L_sub( e1, e2 ) ) ), 1 ); q_tmp2 = q_e; move16(); @@ -3288,6 +3300,10 @@ static void eig2x2_fx( } } } +#ifdef FIX_1326_SPEEDUP_14 + if (tstcnt>10000) + assert(0); +#endif // add_fx = 0.5 * (e1 + e2) add_fx = L_shr( L_add( e1, e2 ), 1 ); q_tmp1 = q_e; @@ -4669,7 +4685,6 @@ static void formulate2x2MixingMatrix_fx( move32(); pop_wmops(); - push_wmops( "formulate2x2MixingMatrix MMUL K*Ghat*Q" ); /* Matrix multiplication, tmp = Ky' * G_hat * Q */ FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { @@ -4695,14 +4710,11 @@ static void formulate2x2MixingMatrix_fx( move32(); } } - pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix MMUL K*Ghat*Q" );*/ q_temp = sub( add( q_ky, q_GhatQ ), 31 ); - push_wmops( "formulate2x2MixingMatrix MMUL K*Ghat*Q*Kx" ); /* A = Ky' * G_hat * Q * Kx (see publication) */ matrixMul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Kxre_fx, Kxim_fx, &q_Kx, Are_fx, Aim_fx, &q_A ); - pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix MMUL K*Ghat*Q*Kx" );*/ push_wmops( "formulate2x2MixingMatrix nrst orthonrm PtoA (oPtoA)" ); /* Find nearest orthonormal matrix P to A = Ky' * G_hat * Q * Kx @@ -4980,7 +4992,7 @@ static void formulate2x2MixingMatrix_fx( 0 /*int Bscale*/, #endif Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */ - pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix nrst orthonrm PtoA" );*/ + pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix nrst orthonrm PtoA (oPtoA)" );*/ push_wmops( "formulate2x2MixingMatrix Ky P Kx^-1" ); /* These are the final formulas of the JAES publication M = Ky P Kx^(-1) */ -- GitLab From c6d0d7dab2acc799534d4a284cf603826d37460c Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Wed, 5 Mar 2025 15:35:49 +0100 Subject: [PATCH 16/41] apply clang format patch --- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index cc6d977f0..c5b3500b2 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -3213,13 +3213,13 @@ static void eig2x2_fx( pm_fx = 0.5f * sqrtf(max(0.0f, a_fx)) add_fx = 0.5f * (e1 + e2)*/ - #ifdef FIX_1326_SPEEDUP_14 +#ifdef FIX_1326_SPEEDUP_14 static int tstcnt = 0; - #endif +#endif IF( L_and( c_re == 0, c_im == 0 ) ) { #ifdef FIX_1326_SPEEDUP_14 - tstcnt ++; + tstcnt++; #endif /* if c_re = 0 and c_im = 0, then crossSquare_fx = (c_re * c_re) + (c_im * c_im) = 0 a_fx = (E1 - E2)^2 @@ -3301,8 +3301,8 @@ static void eig2x2_fx( } } #ifdef FIX_1326_SPEEDUP_14 - if (tstcnt>10000) - assert(0); + if ( tstcnt > 10000 ) + assert( 0 ); #endif // add_fx = 0.5 * (e1 + e2) add_fx = L_shr( L_add( e1, e2 ), 1 ); -- GitLab From 2986c80dd253c39ee60868f08bd0e89d2b64a683 Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Fri, 7 Mar 2025 09:52:48 +0100 Subject: [PATCH 17/41] deactivate SPeedup 14, activate Speedup 13 for testing --- .../ivas_dirac_dec_binaural_functions_fx.c | 75 +++++++++++++++---- 1 file changed, 61 insertions(+), 14 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index c5b3500b2..ad9769583 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -49,7 +49,6 @@ // NULL: 179.292 -//#define FIX_1326_SPEEDUP_00 //make sqrt(1) a const - catch bitstreams //no occurence --> DONT USE //#define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx // .4 WMOPS --> USE //#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx // .3 WMOPS --> USE //#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx // .1 WMOPS --> USE @@ -58,12 +57,18 @@ //#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt // 3.0 WMOPS //Quite bad diffs --> DONT USE //#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt // 0 WMOPS --> DONT USE //#define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs --> USE -//#define FIX_1326_SPEEDUP_09 // tiny speedup // .1 WMOPS --> USE? (pipe 48851 fails --> DONTUSEYET) -//#define FIX_1326_SPEEDUP_10 // tiny speedup // .1 WMOPS --> USE? (pipe 48851 fails --> DONTUSEYET) -//#define FIX_1326_SPEEDUP_11 // tiny speedup // .1 WMOPS --> USE? (pipe 48851 fails --> DONTUSEYET) -//#define FIX_1326_SPEEDUP_12 // tiny speedup // <.1 WMOPS -->DONTUSE -//#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS -->USE? (pipe coming) -#define FIX_1326_SPEEDUP_14 // test +//#define FIX_1326_SPEEDUP_09 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET +//#define FIX_1326_SPEEDUP_10 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET +//#define FIX_1326_SPEEDUP_11 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET +//#define FIX_1326_SPEEDUP_12 // tiny speedup // <.1 WMOPS --> DONTUSE +//#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS --> USE? (pipe tbd) +//#define FIX_1326_SPEEDUP_14 // test wether any of these paths is realy necessary, then assert --> DONTUSE (pipes red, asserts!) +//#define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd // .1 WMOPS --> USE? (pipe tbd) +//#define FIX_1326_SPEEDUP_16 // tiny speedup like 04 // .18 WMOPS --> USE? (pipe tbd) + + + + Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; /*------------------------------------------------------------------------- @@ -943,9 +948,9 @@ static void ivas_dirac_dec_binaural_internal_fx( } test(); - push_wmops( "IDRBI cov matrices" ); + push_wmops( "IDRBI cov matrices (IDRBCM)" ); ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matrices_fx( hDiracDecBin, hSpatParamRendCom, &config_data, Cldfb_RealBuffer_in_fx, Cldfb_ImagBuffer_in_fx, Rmat_fx, subframe, hCombinedOrientationData && hCombinedOrientationData->enableCombinedOrientation[hCombinedOrientationData->subframe_idx] > 0, st_ivas->hMasaIsmData, q_inp ); - pop_wmops(); /*push_wmops( "IDRBI cov matrices" );*/ + pop_wmops(); /*push_wmops( "IDRBI cov matrices (IDRBCM)" );*/ IF( EQ_32( config_data.ivas_format, ISM_FORMAT ) ) { @@ -1168,7 +1173,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric nBins = hSpatParamRendCom->num_freq_bands; /* Actually bins */ move16(); - + push_wmops( "IDRBCM inits" ); q_earlyPartEneCorrection = s_min( Q31, add( getScaleFactor32( hDiracDecBin->earlyPartEneCorrection_fx, nBins ), hDiracDecBin->q_earlyPartEneCorrection ) ); scale_sig32( hDiracDecBin->earlyPartEneCorrection_fx, nBins, sub( q_earlyPartEneCorrection, hDiracDecBin->q_earlyPartEneCorrection ) ); hDiracDecBin->q_earlyPartEneCorrection = q_earlyPartEneCorrection; @@ -1202,6 +1207,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric gainCache[idx].azi = -1000; /* Use -1000 as value for uninitialized cache. */ move16(); } + pop_wmops(); /*push_wmops( "IDRBCM inits" );*/ /* Determine EQ for low bit rates (13.2 and 16.4 kbps) */ applyLowBitRateEQ = 0; @@ -1214,11 +1220,13 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric move16(); IF( EQ_32( ivas_total_brate, IVAS_16k4 ) ) { + push_wmops( "IDRBCM Determine EQ_low_rates" ); FOR( bin = 0; bin < LOW_BIT_RATE_BINAURAL_EQ_BINS; bin++ ) { lowBitRateEQ_fx[bin + LOW_BIT_RATE_BINAURAL_EQ_OFFSET] = L_add( L_shr( lowBitRateBinauralEQ_fx[bin], 1 ), ONE_IN_Q30 ); // Q31 move32(); } + pop_wmops(); /*push_wmops( "IDRBCM Determine EQ_low_rates" );*/ } ELSE { @@ -1237,6 +1245,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric exp = sub( 63, shl( q, 1 ) ); // exp for the energy (inRe_fx * inRe_fx + inIm_fx * inIm_fx) computed below + push_wmops( "IDRBCM input Matrix" ); /* Calculate input covariance matrix */ FOR( slot = 0; slot < hSpatParamRendCom->subframe_nbslots[subframe]; slot++ ) { @@ -1271,7 +1280,9 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric move32(); } } + pop_wmops(); /*push_wmops( "IDRBCM input Matrix" );*/ + push_wmops( "IDRBCM apply EQ_low" ); /* Apply EQ at low bit rates */ IF( applyLowBitRateEQ != 0 ) { @@ -1324,7 +1335,9 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric } } } + pop_wmops(); /*push_wmops( "IDRBCM apply EQ_low" );*/ + push_wmops( "IDRBCM target matrix" ); /* Determine target covariance matrix containing target binaural properties */ FOR( bin = 0; bin < nBins; bin++ ) { @@ -1484,12 +1497,14 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric Word32 hrtfEneCenter_fx, hrtfEneSides_fx, hrtfEneRealized_fx; Word16 eneCorrectionFactor_fx, eneCorrectionFactor_e; Word16 w1_fx, w2_fx, w3_fx, eq_fx; - +#ifdef FIX_1326_SPEEDUP_15 + hrtfEneCenter_fx = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( rRealp_fx, rRealp_fx ), rImagp_fx, rImagp_fx ), lImagp_fx, lImagp_fx ), lRealp_fx, lRealp_fx ); //Q25 +#else hrtfEneCenter_fx = L_add( Mpy_32_32( lRealp_fx, lRealp_fx ), // Q25 L_add( Mpy_32_32( lImagp_fx, lImagp_fx ), // Q25 L_add( Mpy_32_32( rRealp_fx, rRealp_fx ), // Q25 Mpy_32_32( rImagp_fx, rImagp_fx ) ) ) ); // Q25 - +#endif /* Spread coherence is synthesized as coherent sources at 30 degree horizontal spacing. * The following formulas determine the gains for these sources. * spreadCoh = 0: Only panning @@ -1518,11 +1533,14 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric /* Apply the gain for the left source of the three coherent sources */ getDirectPartGains_fx( bin, add( aziDeg, 30 ), eleDeg, &lRealpTmp_fx, &lImagpTmp_fx, &rRealpTmp_fx, &rImagpTmp_fx, hDiracDecBin->renderStereoOutputInsteadOfBinaural, Rmat_fx, &gainCache[gainCacheBaseIndex + 1], isHeadtracked ); - +#ifdef FIX_1326_SPEEDUP_15 + hrtfEneSides_fx = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( rRealpTmp_fx, rRealpTmp_fx ), rImagpTmp_fx, rImagpTmp_fx ), lImagpTmp_fx, lImagpTmp_fx ), lRealpTmp_fx, lRealpTmp_fx ); // Q25 +#else hrtfEneSides_fx = L_add( Mpy_32_32( lRealpTmp_fx, lRealpTmp_fx ), // Q25 L_add( Mpy_32_32( lImagpTmp_fx, lImagpTmp_fx ), // Q25 L_add( Mpy_32_32( rRealpTmp_fx, rRealpTmp_fx ), // Q25 Mpy_32_32( rImagpTmp_fx, rImagpTmp_fx ) ) ) ); // Q25 +#endif lRealp_fx = L_add( lRealp_fx, Mpy_32_32( sidesMul_fx, lRealpTmp_fx ) ); // Q25 lImagp_fx = L_add( lImagp_fx, Mpy_32_32( sidesMul_fx, lImagpTmp_fx ) ); // Q25 rRealp_fx = L_add( rRealp_fx, Mpy_32_32( sidesMul_fx, rRealpTmp_fx ) ); // Q25 @@ -1610,12 +1628,21 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric move16(); } +#ifdef FIX_1326_SPEEDUP_15 + hrtfEne_fx[0] = Madd_32_32( Mpy_32_32( lRealp_fx, lRealp_fx ), lImagp_fx, lImagp_fx ); // Q( 2*q_lr - 31 ) + hrtfEne_fx[1] = Madd_32_32( Mpy_32_32( rRealp_fx, rRealp_fx ), rImagp_fx, rImagp_fx ); // Q( 2*q_lr - 31 ) + move32(); + move32(); + hrtfCrossRe_fx = Madd_32_32( Mpy_32_32( lRealp_fx, rRealp_fx ), lImagp_fx, rImagp_fx ); // Q( 2*q_lr - 31 ) + hrtfCrossIm_fx = Madd_32_32( Mpy_32_32( -lImagp_fx, rRealp_fx ), lRealp_fx, rImagp_fx ); // Q( 2*q_lr - 31 ) +#else hrtfEne_fx[0] = L_add( Mpy_32_32( lRealp_fx, lRealp_fx ), Mpy_32_32( lImagp_fx, lImagp_fx ) ); // Q( 2*q_lr - 31 ) hrtfEne_fx[1] = L_add( Mpy_32_32( rRealp_fx, rRealp_fx ), Mpy_32_32( rImagp_fx, rImagp_fx ) ); // Q( 2*q_lr - 31 ) move32(); move32(); hrtfCrossRe_fx = L_add( Mpy_32_32( lRealp_fx, rRealp_fx ), Mpy_32_32( lImagp_fx, rImagp_fx ) ); // Q( 2*q_lr - 31 ) hrtfCrossIm_fx = L_add( Mpy_32_32( -lImagp_fx, rRealp_fx ), Mpy_32_32( lRealp_fx, rImagp_fx ) ); // Q( 2*q_lr - 31 ) +#endif /* Add direct part (1 or 2) covariance matrix */ dirEne_fx = Mpy_32_32( ratio_fx, meanEnePerCh_fx ); // Q(q_meanEnePerCh - 1) @@ -1690,7 +1717,11 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric } ELSE { +#ifdef FIX_1326_SPEEDUP_15 + hDiracDecBin->ChCrossReOut_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossReOut_fx[bin], hDiracDecBin->ChCrossReOut_e[bin], Mpy_32_32( Madd_32_16( L_shl( surCoh_fx, 16 ), hDiracDecBin->diffuseFieldCoherence_fx[bin], sub( 32767, surCoh_fx ) ), diffEne_fx ), sub( 31, q_diffEne ), &hDiracDecBin->ChCrossReOut_e[bin] ); +#else hDiracDecBin->ChCrossReOut_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossReOut_fx[bin], hDiracDecBin->ChCrossReOut_e[bin], Mpy_32_32( L_add( Mpy_32_16_1( hDiracDecBin->diffuseFieldCoherence_fx[bin], sub( 32767, surCoh_fx ) ), L_shl( surCoh_fx, 16 ) ), diffEne_fx ), sub( 31, q_diffEne ), &hDiracDecBin->ChCrossReOut_e[bin] ); +#endif } move32(); } @@ -1706,6 +1737,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric hDiracDecBin->frameMeanDiffuseness_fx[bin] = L_shl( frameMeanDiffuseness, sub( exp, 2 ) ); // Q29 move32(); } + pop_wmops();/*push_wmops( "IDRBCM target matrix" );*/ test(); /* Temporal IIR-type smoothing of covariance matrices. Also apply encoding quality based smoothing factor. */ @@ -3435,10 +3467,25 @@ static void eig2x2_fx( tmp2 = Mpy_32_32( s_fx, s_fx ); q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 ); + +#ifdef FIX_1326_SPEEDUP_16 + + { + Word16 tmp2_exp; + Word32 eps_tmp; + tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &tmp2_exp ); + + //Add epsilon if relevant + eps_tmp = L_shl_sat( epsilon_mant, sub(epsilon_exp, tmp2_exp )); + tmp3 = L_add( L_shr( tmp2, 1 ), L_shr( eps_tmp, 1 ) ); + + exp_tmp3 = add( tmp2_exp, 1 ); + } +#else tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 ); q_tmp2 = sub( 31, q_tmp2 ); - tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); +#endif #if 1 tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp ); -- GitLab From fb3e05c804ec9194ab97c64ce3769c51424ae600 Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Fri, 7 Mar 2025 09:54:39 +0100 Subject: [PATCH 18/41] deactivate SPeedup 14, activate Speedup 13 for testing --- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index ad9769583..620b4e323 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -61,7 +61,7 @@ //#define FIX_1326_SPEEDUP_10 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET //#define FIX_1326_SPEEDUP_11 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET //#define FIX_1326_SPEEDUP_12 // tiny speedup // <.1 WMOPS --> DONTUSE -//#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS --> USE? (pipe tbd) +#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS --> USE? (pipe tbd) //#define FIX_1326_SPEEDUP_14 // test wether any of these paths is realy necessary, then assert --> DONTUSE (pipes red, asserts!) //#define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd // .1 WMOPS --> USE? (pipe tbd) //#define FIX_1326_SPEEDUP_16 // tiny speedup like 04 // .18 WMOPS --> USE? (pipe tbd) -- GitLab From 52740b20377af49c2a78be634f9adf9112b934fa Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Fri, 7 Mar 2025 10:09:06 +0100 Subject: [PATCH 19/41] apply clang format patch --- .../ivas_dirac_dec_binaural_functions_fx.c | 36 +++++++++---------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 620b4e323..7721c3eaa 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -67,8 +67,6 @@ //#define FIX_1326_SPEEDUP_16 // tiny speedup like 04 // .18 WMOPS --> USE? (pipe tbd) - - Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; /*------------------------------------------------------------------------- @@ -1498,12 +1496,12 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric Word16 eneCorrectionFactor_fx, eneCorrectionFactor_e; Word16 w1_fx, w2_fx, w3_fx, eq_fx; #ifdef FIX_1326_SPEEDUP_15 - hrtfEneCenter_fx = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( rRealp_fx, rRealp_fx ), rImagp_fx, rImagp_fx ), lImagp_fx, lImagp_fx ), lRealp_fx, lRealp_fx ); //Q25 + hrtfEneCenter_fx = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( rRealp_fx, rRealp_fx ), rImagp_fx, rImagp_fx ), lImagp_fx, lImagp_fx ), lRealp_fx, lRealp_fx ); // Q25 #else - hrtfEneCenter_fx = L_add( Mpy_32_32( lRealp_fx, lRealp_fx ), // Q25 - L_add( Mpy_32_32( lImagp_fx, lImagp_fx ), // Q25 - L_add( Mpy_32_32( rRealp_fx, rRealp_fx ), // Q25 - Mpy_32_32( rImagp_fx, rImagp_fx ) ) ) ); // Q25 + hrtfEneCenter_fx = L_add( Mpy_32_32( lRealp_fx, lRealp_fx ), // Q25 + L_add( Mpy_32_32( lImagp_fx, lImagp_fx ), // Q25 + L_add( Mpy_32_32( rRealp_fx, rRealp_fx ), // Q25 + Mpy_32_32( rImagp_fx, rImagp_fx ) ) ) ); // Q25 #endif /* Spread coherence is synthesized as coherent sources at 30 degree horizontal spacing. * The following formulas determine the gains for these sources. @@ -1536,15 +1534,15 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric #ifdef FIX_1326_SPEEDUP_15 hrtfEneSides_fx = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( rRealpTmp_fx, rRealpTmp_fx ), rImagpTmp_fx, rImagpTmp_fx ), lImagpTmp_fx, lImagpTmp_fx ), lRealpTmp_fx, lRealpTmp_fx ); // Q25 #else - hrtfEneSides_fx = L_add( Mpy_32_32( lRealpTmp_fx, lRealpTmp_fx ), // Q25 - L_add( Mpy_32_32( lImagpTmp_fx, lImagpTmp_fx ), // Q25 - L_add( Mpy_32_32( rRealpTmp_fx, rRealpTmp_fx ), // Q25 - Mpy_32_32( rImagpTmp_fx, rImagpTmp_fx ) ) ) ); // Q25 + hrtfEneSides_fx = L_add( Mpy_32_32( lRealpTmp_fx, lRealpTmp_fx ), // Q25 + L_add( Mpy_32_32( lImagpTmp_fx, lImagpTmp_fx ), // Q25 + L_add( Mpy_32_32( rRealpTmp_fx, rRealpTmp_fx ), // Q25 + Mpy_32_32( rImagpTmp_fx, rImagpTmp_fx ) ) ) ); // Q25 #endif - lRealp_fx = L_add( lRealp_fx, Mpy_32_32( sidesMul_fx, lRealpTmp_fx ) ); // Q25 - lImagp_fx = L_add( lImagp_fx, Mpy_32_32( sidesMul_fx, lImagpTmp_fx ) ); // Q25 - rRealp_fx = L_add( rRealp_fx, Mpy_32_32( sidesMul_fx, rRealpTmp_fx ) ); // Q25 - rImagp_fx = L_add( rImagp_fx, Mpy_32_32( sidesMul_fx, rImagpTmp_fx ) ); // Q25 + lRealp_fx = L_add( lRealp_fx, Mpy_32_32( sidesMul_fx, lRealpTmp_fx ) ); // Q25 + lImagp_fx = L_add( lImagp_fx, Mpy_32_32( sidesMul_fx, lImagpTmp_fx ) ); // Q25 + rRealp_fx = L_add( rRealp_fx, Mpy_32_32( sidesMul_fx, rRealpTmp_fx ) ); // Q25 + rImagp_fx = L_add( rImagp_fx, Mpy_32_32( sidesMul_fx, rImagpTmp_fx ) ); // Q25 /* Apply the gain for the right source of the three coherent sources. * -30 degrees to 330 wrapping due to internal functions. */ @@ -1737,7 +1735,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric hDiracDecBin->frameMeanDiffuseness_fx[bin] = L_shl( frameMeanDiffuseness, sub( exp, 2 ) ); // Q29 move32(); } - pop_wmops();/*push_wmops( "IDRBCM target matrix" );*/ + pop_wmops(); /*push_wmops( "IDRBCM target matrix" );*/ test(); /* Temporal IIR-type smoothing of covariance matrices. Also apply encoding quality based smoothing factor. */ @@ -3475,12 +3473,12 @@ static void eig2x2_fx( Word32 eps_tmp; tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &tmp2_exp ); - //Add epsilon if relevant - eps_tmp = L_shl_sat( epsilon_mant, sub(epsilon_exp, tmp2_exp )); + // Add epsilon if relevant + eps_tmp = L_shl_sat( epsilon_mant, sub( epsilon_exp, tmp2_exp ) ); tmp3 = L_add( L_shr( tmp2, 1 ), L_shr( eps_tmp, 1 ) ); exp_tmp3 = add( tmp2_exp, 1 ); - } + } #else tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 ); q_tmp2 = sub( 31, q_tmp2 ); -- GitLab From deae6b08447d0260424ce2f4b4d8455a94172ed5 Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Fri, 7 Mar 2025 10:15:25 +0100 Subject: [PATCH 20/41] fix build warning --- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 7721c3eaa..4db8980e1 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -1916,7 +1916,11 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( move16(); Word32 tmp1, tmp2, res1, res2; Word16 q_tmp1, q_tmp2, q_realizedOutputEne, q_targetOutputEne, q_missingOutputEne, q_gain; +#ifdef FIX_1326_SPEEDUP_13 + Word16 exp1, q_processMtx_bin, q_processMtxDec_bin; +#else Word16 exp1, exp2, q_processMtx_bin, q_processMtxDec_bin; +#endif CrEneL_fx = 0; move32(); -- GitLab From 2248f4d1a256b06ffc8e850d8f3c368a1be827e7 Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Fri, 7 Mar 2025 11:08:02 +0100 Subject: [PATCH 21/41] ctivated speedup 15, 16 to test --- .../ivas_dirac_dec_binaural_functions_fx.c | 118 +----------------- 1 file changed, 6 insertions(+), 112 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 4db8980e1..45e1d2bed 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -53,18 +53,14 @@ //#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx // .3 WMOPS --> USE //#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx // .1 WMOPS --> USE //#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx // .2 WMOPS --> USE -//#define FIX_1326_SPEEDUP_05 // div->sqrt =>isqrt // 3.5 WMOPS //Quite bad diffs --> DONT USE -//#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt // 3.0 WMOPS //Quite bad diffs --> DONT USE -//#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt // 0 WMOPS --> DONT USE //#define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs --> USE //#define FIX_1326_SPEEDUP_09 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET //#define FIX_1326_SPEEDUP_10 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET //#define FIX_1326_SPEEDUP_11 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET -//#define FIX_1326_SPEEDUP_12 // tiny speedup // <.1 WMOPS --> DONTUSE -#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS --> USE? (pipe tbd) -//#define FIX_1326_SPEEDUP_14 // test wether any of these paths is realy necessary, then assert --> DONTUSE (pipes red, asserts!) -//#define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd // .1 WMOPS --> USE? (pipe tbd) -//#define FIX_1326_SPEEDUP_16 // tiny speedup like 04 // .18 WMOPS --> USE? (pipe tbd) +//#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS --> USE + +#define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd // .1 WMOPS --> USE? (pipe tbd) +#define FIX_1326_SPEEDUP_16 // tiny speedup like 04 // .18 WMOPS --> USE? (pipe tbd) Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; @@ -3246,15 +3242,8 @@ static void eig2x2_fx( a_fx = (e1 + e2) * (e1 + e2) - 4.0f * ((e1 * e2) - crossSquare_fx) = (e1 - e2)^2 + 4 * crossSquare_fx pm_fx = 0.5f * sqrtf(max(0.0f, a_fx)) add_fx = 0.5f * (e1 + e2)*/ - -#ifdef FIX_1326_SPEEDUP_14 - static int tstcnt = 0; -#endif IF( L_and( c_re == 0, c_im == 0 ) ) { -#ifdef FIX_1326_SPEEDUP_14 - tstcnt++; -#endif /* if c_re = 0 and c_im = 0, then crossSquare_fx = (c_re * c_re) + (c_im * c_im) = 0 a_fx = (E1 - E2)^2 pm_fx = 0.5 * sqrt(max(0, a_fx)) = 0.5 * max(0, (e1 - e2)) */ @@ -3272,9 +3261,6 @@ static void eig2x2_fx( q_crossSquare = sub( add( q_c, q_c ), 31 ); IF( EQ_32( e1, e2 ) ) { -#ifdef FIX_1326_SPEEDUP_14 - tstcnt++; -#endif /* if e1 - e2 = 0, then a_fx = 4 * crossSquare_fx pm_fx = 0.5 * sqrt(max(0, 4 * crossSquare_fx)) = sqrt(0, crossSquare_fx)*/ test(); @@ -3308,9 +3294,6 @@ static void eig2x2_fx( IF( GT_16( sub( q_c, q_e ), Q15 ) ) { -#ifdef FIX_1326_SPEEDUP_14 - tstcnt++; -#endif pm_fx = L_shr( L_max( 0, L_abs( L_sub( e1, e2 ) ) ), 1 ); q_tmp2 = q_e; move16(); @@ -3334,10 +3317,6 @@ static void eig2x2_fx( } } } -#ifdef FIX_1326_SPEEDUP_14 - if ( tstcnt > 10000 ) - assert( 0 ); -#endif // add_fx = 0.5 * (e1 + e2) add_fx = L_shr( L_add( e1, e2 ), 1 ); q_tmp1 = q_e; @@ -4611,33 +4590,6 @@ static void formulate2x2MixingMatrix_fx( #endif } ELSE -#ifdef FIX_1326_SPEEDUP_05 - { - Word16 shift = norm_l( temp ); -#if 1 // oldcode - temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); -#else - - temp = L_add( L_shl( temp, sub( shift, 1 ) ), L_shl_sat( EPSILON_MANT, sub( sub( EPSILON_EXP, shift ), 1 ) ) ); - exp_temp = sub( 30, q_ein ); - if ( temp == 0 ) - { - exp_temp = EPSILON_EXP; - move32(); - } - if ( temp == 0 ) - { - temp = EPSILON_MANT; - move32(); - } -#endif - temp = ISqrt32( temp, &exp_temp ); - shift = sub( 31, q_eout ); - Ghat_fx[0] = Mpy_32_32( Sqrt32( E_out1, &shift ), temp ); - move32(); - exp = add( shift, exp_temp ); - } -#else { temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); push_wmops( "formulate2x2MixingMatrix Division" ); @@ -4648,7 +4600,7 @@ static void formulate2x2MixingMatrix_fx( Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp #endif } -#endif + #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp #endif @@ -4677,39 +4629,6 @@ static void formulate2x2MixingMatrix_fx( #endif } ELSE -#ifdef FIX_1326_SPEEDUP_06 - { - Word16 shift = norm_l( temp ); -#if 0 // oldcode - temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); -#else - temp = L_add( L_shl( temp, sub( shift, 1 ) ), L_shl_sat( EPSILON_MANT, sub( sub( EPSILON_EXP, shift ), 1 ) ) ); - exp_temp = sub( 31 - 1, q_ein ); - if ( temp == 0 ) - { - exp_temp = add( 0, EPSILON_EXP ); - } - if ( temp == 0 ) - { - temp = L_add( 0, EPSILON_MANT ); - } -#endif -#if 1 // oldcode - new code introduces too much noise - push_wmops( "formulate2x2MixingMatrix Division" ); - temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, temp, &exp1 ); - pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/ - exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) ); -#ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC - Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 -#endif -#else - temp = ISqrt32( temp, &exp_temp ); - shift = sub( 31, q_eout ); - Ghat_fx[1] = Mpy_32_32( temp, Sqrt32( E_out2, &shift ) ); - exp1 = add( shift, exp_temp ); -#endif - } -#else { temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); push_wmops( "formulate2x2MixingMatrix Division" ); @@ -4720,7 +4639,7 @@ static void formulate2x2MixingMatrix_fx( Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 #endif } -#endif + #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 #endif @@ -4845,30 +4764,6 @@ static void formulate2x2MixingMatrix_fx( #endif pop_wmops(); /*push_wmops( "oPtoA MT1M" );*/ -#ifdef FIX_1326_SPEEDUP_07 - IF( D_fx[0] == 0 ) - { - // temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */ - // exp = ONE_DIV_EPSILON_EXP; - div_fx[0] = L_add( 0, 2047986068 ); // Sqrt32( temp, &exp ); // Q = 31 - exp - exp = add( 0, 20 ); - } - ELSE - { -#if 1 // old code - push_wmops( "formulate2x2MixingMatrix Division" ); - temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp ); - exp = sub( exp, sub( Q30, q_D ) ); - pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/ - div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp - move32(); -#else - exp = sub( 31, q_D ); - div_fx[0] = ISqrt32_2( D_fx[0], &exp ); - move32(); -#endif - } -#else IF( D_fx[0] == 0 ) { #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC @@ -4888,7 +4783,6 @@ static void formulate2x2MixingMatrix_fx( } div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp move32(); -#endif #ifdef FIX_1326_SPEEDUP_08 // This is just a shortcut to already existing optimizations (FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC) - but makes everything even faster -- GitLab From 7263a7eebb093028a1980e52b0b2db57ea6182fb Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Wed, 19 Mar 2025 08:26:44 +0100 Subject: [PATCH 22/41] add SPEEDUP 17, 18, inactive --- .../ivas_dirac_dec_binaural_functions_fx.c | 73 +++++++++++++++---- 1 file changed, 59 insertions(+), 14 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 45e1d2bed..3a10590f6 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -49,19 +49,20 @@ // NULL: 179.292 -//#define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx // .4 WMOPS --> USE -//#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx // .3 WMOPS --> USE -//#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx // .1 WMOPS --> USE -//#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx // .2 WMOPS --> USE -//#define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs --> USE -//#define FIX_1326_SPEEDUP_09 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET -//#define FIX_1326_SPEEDUP_10 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET -//#define FIX_1326_SPEEDUP_11 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET -//#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS --> USE - -#define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd // .1 WMOPS --> USE? (pipe tbd) -#define FIX_1326_SPEEDUP_16 // tiny speedup like 04 // .18 WMOPS --> USE? (pipe tbd) - +//#define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx // .4 WMOPS --> USE +//#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx // .3 WMOPS --> USE +//#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx // .1 WMOPS --> USE +//#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx // .2 WMOPS --> USE +//#define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs --> USE +//#define FIX_1326_SPEEDUP_09 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET +//#define FIX_1326_SPEEDUP_10 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET +//#define FIX_1326_SPEEDUP_11 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET +//#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS --> USE + +//#define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd // .1 WMOPS --> USE +//#define FIX_1326_SPEEDUP_16 // tiny speedup like 04 // .2 WMOPS --> USE +//#define FIX_1326_SPEEDUP_17 // use 1/x // 1 WMOPS --> USE? +//#define FIX_1326_SPEEDUP_18 // structural speedup // 1 WMOPS --> USE? Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; @@ -3343,6 +3344,7 @@ static void eig2x2_fx( /* Numeric case, when input is practically zeros */ // IF( D_fx[0] < EPSILON_FX ) + #ifdef FIX_1326_SPEEDUP_02 IF( LT_32( L_shl_sat( D_fx[0], sub( sub( 31, *q_D ), EPSILON_EXP ) ), EPSILON_MANT ) ) { @@ -3370,7 +3372,7 @@ static void eig2x2_fx( #endif /* Numeric case, when input is near an identity matrix with a gain */ -#ifdef FIX_1326_SPEEDUP_03 // 178.932 +#ifdef FIX_1326_SPEEDUP_03 tmp1 = Mpy_32_32( 2147484, add_fx ); // 2147484 = 1e-3f in Q31 IF( LT_32( pm_fx, L_shl_sat( tmp1, sub( q_tmp1, q_tmp2 ) ) ) ) @@ -3469,7 +3471,11 @@ static void eig2x2_fx( #endif #if 1 +#ifdef FIX_1326_SPEEDUP_17 + tmp2 = BASOP_Util_Divide3232_Scale_cadence_1( ONE_IN_Q30, tmp3, &exp ); +#else tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp ); +#endif exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) ); normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2 q_tmp2 = sub( 31, exp ); @@ -3557,7 +3563,11 @@ static void eig2x2_fx( #endif #if 1 +#ifdef FIX_1326_SPEEDUP_17 + tmp2 = BASOP_Util_Divide3232_Scale_cadence_1( ONE_IN_Q30, tmp3, &exp ); +#else tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp ); +#endif exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) ); normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2 q_tmp2 = sub( 31, exp ); @@ -3619,7 +3629,19 @@ static void eig2x2_fx( move16(); } } +#ifdef FIX_1326_SPEEDUP_18 + if( q_U_1 != 0 ) + { + *q_U = q_U_1; + move16(); + } + if (q_U_1 == 0) + { + *q_U = q_U_2; + move16(); + } +#else IF( q_U_1 != 0 ) { *q_U = q_U_1; @@ -3629,6 +3651,7 @@ static void eig2x2_fx( *q_U = q_U_2; } move16(); +#endif return; } @@ -4508,7 +4531,11 @@ static void formulate2x2MixingMatrix_fx( ELSE { push_wmops( "formulate2x2MixingMatrix Division" ); +#ifdef FIX_1326_SPEEDUP_17 + maxEneDiv_fx = BASOP_Util_Divide3232_Scale_cadence_1( ONE_IN_Q30, maxEne_fx, &exp ); +#else maxEneDiv_fx = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, maxEne_fx, &exp ); +#endif pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/ q_maxEneDiv = add( sub( 31, exp ), sub( Q30, q_maxEne ) ); } @@ -4630,6 +4657,11 @@ static void formulate2x2MixingMatrix_fx( } ELSE { + if ( E_out2 == 0 ) + { + static int a = 0; + a++; + } temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); push_wmops( "formulate2x2MixingMatrix Division" ); temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, temp, &exp1 ); @@ -4777,7 +4809,12 @@ static void formulate2x2MixingMatrix_fx( ELSE { push_wmops( "formulate2x2MixingMatrix Division" ); +#ifdef FIX_1326_SPEEDUP_17 + temp = BASOP_Util_Divide3232_Scale_cadence_1( ONE_IN_Q30, D_fx[0], &exp ); +#else + temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp ); +#endif exp = sub( exp, sub( Q30, q_D ) ); pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/ } @@ -4811,7 +4848,11 @@ static void formulate2x2MixingMatrix_fx( ELSE { push_wmops( "formulate2x2MixingMatrix Division" ); +#ifdef FIX_1326_SPEEDUP_17 + temp = BASOP_Util_Divide3232_Scale_cadence_1( ONE_IN_Q30, D_fx[1], &exp1 ); +#else temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[1], &exp1 ); +#endif pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/ exp1 = sub( exp1, sub( Q30, q_D ) ); } @@ -5021,7 +5062,11 @@ static void formulate2x2MixingMatrix_fx( Word16 Pre_shift, Pim_shift; temp = BASOP_Util_Add_Mant32Exp( Sx_fx[chB], sub( 31, q_Sx ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); push_wmops( "formulate2x2MixingMatrix Division" ); +#ifdef FIX_1326_SPEEDUP_17 + temp = BASOP_Util_Divide3232_Scale_cadence_1( ONE_IN_Q30, temp, &exp ); +#else temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, temp, &exp ); +#endif pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/ q_temp = add( sub( sub( q_P, exp ), sub( 31, Q30 ) ), exp_temp ); -- GitLab From f2a018b19a1fe121adfedb8c38abe8f8bf8c3604 Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Wed, 19 Mar 2025 08:41:59 +0100 Subject: [PATCH 23/41] add modified version of division '1/x' --- lib_com/basop_util.c | 35 +++++++++++++++++++++++++++++++++++ lib_com/basop_util.h | 4 ++++ 2 files changed, 39 insertions(+) diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index 609ca234d..0449ff125 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -1066,6 +1066,41 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) return z; } +/*1bit HR in x > 0*/ +Word32 BASOP_Util_Divide3232_Scale_cadence_1( Word32 x, Word32 y, Word16 *s ) +{ + Word32 z; + //Word16 sx; + Word16 sy; + Word32 sign; + + /* assert (x >= (Word32)0); */ + assert( y != (Word32) 0 ); + + sign = 0; + move16(); + + IF( y < 0 ) + { + y = L_negate( y ); + sign = L_xor( sign, 1 ); + } + + sy = norm_l( y ); + y = L_shl( y, sy ); + move16(); + *s = add( 0, sy ); + move16(); + + z = div_w( x, y ); + + if ( sign != 0 ) + { + z = L_negate( z ); + } + return z; + +} Word16 BASOP_Util_Divide3232_Scale( Word32 x, Word32 y, Word16 *s ) { Word16 z; diff --git a/lib_com/basop_util.h b/lib_com/basop_util.h index a6db7dc8d..697f0b9c6 100644 --- a/lib_com/basop_util.h +++ b/lib_com/basop_util.h @@ -332,6 +332,10 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, /*!< i : Numerator*/ Word32 y, /*!< i : Denominator*/ Word16 *s ); /*!< o : Additional scalefactor difference*/ +Word32 BASOP_Util_Divide3232_Scale_cadence_1( Word32 x, /*!< i : Numerator*/ + Word32 y, /*!< i : Denominator*/ + + Word16 *s ); /*!< o : Additional scalefactor difference*/ /************************************************************************/ /*! -- GitLab From bdab4c96801c4c19d454e6d280ccc0cbf73752b1 Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Wed, 19 Mar 2025 08:46:15 +0100 Subject: [PATCH 24/41] applied clang format patch --- lib_com/basop_util.c | 3 +-- lib_com/basop_util.h | 6 +++--- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 8 ++++---- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index 0449ff125..5eee369f8 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -1070,7 +1070,7 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) Word32 BASOP_Util_Divide3232_Scale_cadence_1( Word32 x, Word32 y, Word16 *s ) { Word32 z; - //Word16 sx; + // Word16 sx; Word16 sy; Word32 sign; @@ -1099,7 +1099,6 @@ Word32 BASOP_Util_Divide3232_Scale_cadence_1( Word32 x, Word32 y, Word16 *s ) z = L_negate( z ); } return z; - } Word16 BASOP_Util_Divide3232_Scale( Word32 x, Word32 y, Word16 *s ) { diff --git a/lib_com/basop_util.h b/lib_com/basop_util.h index 697f0b9c6..1ef2cd8e7 100644 --- a/lib_com/basop_util.h +++ b/lib_com/basop_util.h @@ -332,10 +332,10 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, /*!< i : Numerator*/ Word32 y, /*!< i : Denominator*/ Word16 *s ); /*!< o : Additional scalefactor difference*/ -Word32 BASOP_Util_Divide3232_Scale_cadence_1( Word32 x, /*!< i : Numerator*/ - Word32 y, /*!< i : Denominator*/ +Word32 BASOP_Util_Divide3232_Scale_cadence_1( Word32 x, /*!< i : Numerator*/ + Word32 y, /*!< i : Denominator*/ - Word16 *s ); /*!< o : Additional scalefactor difference*/ + Word16 *s ); /*!< o : Additional scalefactor difference*/ /************************************************************************/ /*! diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 3a10590f6..399ac50ca 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -3372,7 +3372,7 @@ static void eig2x2_fx( #endif /* Numeric case, when input is near an identity matrix with a gain */ -#ifdef FIX_1326_SPEEDUP_03 +#ifdef FIX_1326_SPEEDUP_03 tmp1 = Mpy_32_32( 2147484, add_fx ); // 2147484 = 1e-3f in Q31 IF( LT_32( pm_fx, L_shl_sat( tmp1, sub( q_tmp1, q_tmp2 ) ) ) ) @@ -3630,13 +3630,13 @@ static void eig2x2_fx( } } #ifdef FIX_1326_SPEEDUP_18 - if( q_U_1 != 0 ) + if ( q_U_1 != 0 ) { *q_U = q_U_1; move16(); } - if (q_U_1 == 0) + if ( q_U_1 == 0 ) { *q_U = q_U_2; move16(); @@ -4812,7 +4812,7 @@ static void formulate2x2MixingMatrix_fx( #ifdef FIX_1326_SPEEDUP_17 temp = BASOP_Util_Divide3232_Scale_cadence_1( ONE_IN_Q30, D_fx[0], &exp ); #else - + temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp ); #endif exp = sub( exp, sub( Q30, q_D ) ); -- GitLab From 0fb2d30b8285abdbc76468fc43848939f863a03c Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Wed, 19 Mar 2025 08:47:08 +0100 Subject: [PATCH 25/41] activated SPEEDUP 17, 18, for test --- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 399ac50ca..682a881cd 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -61,8 +61,8 @@ //#define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd // .1 WMOPS --> USE //#define FIX_1326_SPEEDUP_16 // tiny speedup like 04 // .2 WMOPS --> USE -//#define FIX_1326_SPEEDUP_17 // use 1/x // 1 WMOPS --> USE? -//#define FIX_1326_SPEEDUP_18 // structural speedup // 1 WMOPS --> USE? +#define FIX_1326_SPEEDUP_17 // use 1/x // 1 WMOPS --> USE? +#define FIX_1326_SPEEDUP_18 // structural speedup // 1 WMOPS --> USE? Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; -- GitLab From 73f2a5078c6fd02f318924c41ebb28d74ab3abf7 Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Wed, 19 Mar 2025 09:35:17 +0100 Subject: [PATCH 26/41] Activate all SPEEDUP macros available and change division 1/x a bit --- lib_com/basop_util.c | 8 +++-- .../ivas_dirac_dec_binaural_functions_fx.c | 35 ++++++++++--------- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index 5eee369f8..60174c3c9 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -1080,12 +1080,16 @@ Word32 BASOP_Util_Divide3232_Scale_cadence_1( Word32 x, Word32 y, Word16 *s ) sign = 0; move16(); - IF( y < 0 ) + if( y < 0 ) { - y = L_negate( y ); sign = L_xor( sign, 1 ); } + if ( y < 0 ) + { + y = L_negate( y ); + } + sy = norm_l( y ); y = L_shl( y, sy ); move16(); diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 682a881cd..f3e6b54d6 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -46,23 +46,24 @@ #include "wmc_auto.h" // MHZ NUMBERS: -// NULL: 179.292 - - -//#define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx // .4 WMOPS --> USE -//#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx // .3 WMOPS --> USE -//#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx // .1 WMOPS --> USE -//#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx // .2 WMOPS --> USE -//#define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs --> USE -//#define FIX_1326_SPEEDUP_09 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET -//#define FIX_1326_SPEEDUP_10 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET -//#define FIX_1326_SPEEDUP_11 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET -//#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS --> USE - -//#define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd // .1 WMOPS --> USE -//#define FIX_1326_SPEEDUP_16 // tiny speedup like 04 // .2 WMOPS --> USE -#define FIX_1326_SPEEDUP_17 // use 1/x // 1 WMOPS --> USE? -#define FIX_1326_SPEEDUP_18 // structural speedup // 1 WMOPS --> USE? +// NULL: 178.407 +// ALL: 169.499 + + +#define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx // .4 WMOPS --> USE +#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx // .3 WMOPS --> USE +#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx // .1 WMOPS --> USE +#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx // .2 WMOPS --> USE +#define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs --> USE +#define FIX_1326_SPEEDUP_09 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET +#define FIX_1326_SPEEDUP_10 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET +#define FIX_1326_SPEEDUP_11 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET +#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS --> USE + +#define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd // .1 WMOPS --> USE +#define FIX_1326_SPEEDUP_16 // tiny speedup like 04 // .2 WMOPS --> USE +#define FIX_1326_SPEEDUP_17 // use 1/x // 1.25WMOPS --> USE +#define FIX_1326_SPEEDUP_18 // structural speedup // 1 WMOPS --> USE Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; -- GitLab From f2b3f155d5d77c922553381a7659f1452230a77f Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Wed, 19 Mar 2025 10:33:50 +0100 Subject: [PATCH 27/41] deactivate 1/x macro, activate all others --- lib_com/basop_util.c | 2 +- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index 60174c3c9..273667255 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -1080,7 +1080,7 @@ Word32 BASOP_Util_Divide3232_Scale_cadence_1( Word32 x, Word32 y, Word16 *s ) sign = 0; move16(); - if( y < 0 ) + if ( y < 0 ) { sign = L_xor( sign, 1 ); } diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index f3e6b54d6..333e7603a 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -62,7 +62,7 @@ #define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd // .1 WMOPS --> USE #define FIX_1326_SPEEDUP_16 // tiny speedup like 04 // .2 WMOPS --> USE -#define FIX_1326_SPEEDUP_17 // use 1/x // 1.25WMOPS --> USE +//#define FIX_1326_SPEEDUP_17 // use 1/x // 1.25WMOPS --> USE #define FIX_1326_SPEEDUP_18 // structural speedup // 1 WMOPS --> USE Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; -- GitLab From d4f5a9acea1e0583a3f4f4ffd2b6e4d2af7a7dec Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Wed, 19 Mar 2025 09:46:33 +0000 Subject: [PATCH 28/41] revert divison variation --- lib_com/basop_util.c | 44 +------------------------------------------- 1 file changed, 1 insertion(+), 43 deletions(-) diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index 273667255..b7ee35ab3 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -1010,7 +1010,6 @@ Word32 div_w( Word32 L_num, Word32 L_den ) } } - Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) { Word32 z; @@ -1018,8 +1017,6 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) Word16 sy; Word32 sign; - // push_wmops( "BASOP_Util_Divide3232_Scale_cadence" ); - /* assert (x >= (Word32)0); */ assert( y != (Word32) 0 ); @@ -1041,7 +1038,6 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) IF( x == (Word32) 0 ) { *s = 0; - // pop_wmops(); return ( (Word32) 0 ); } @@ -1062,48 +1058,10 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) { z = L_negate( z ); } - // pop_wmops(); - return z; -} - -/*1bit HR in x > 0*/ -Word32 BASOP_Util_Divide3232_Scale_cadence_1( Word32 x, Word32 y, Word16 *s ) -{ - Word32 z; - // Word16 sx; - Word16 sy; - Word32 sign; - /* assert (x >= (Word32)0); */ - assert( y != (Word32) 0 ); - - sign = 0; - move16(); - - if ( y < 0 ) - { - sign = L_xor( sign, 1 ); - } - - if ( y < 0 ) - { - y = L_negate( y ); - } - - sy = norm_l( y ); - y = L_shl( y, sy ); - move16(); - *s = add( 0, sy ); - move16(); - - z = div_w( x, y ); - - if ( sign != 0 ) - { - z = L_negate( z ); - } return z; } + Word16 BASOP_Util_Divide3232_Scale( Word32 x, Word32 y, Word16 *s ) { Word16 z; -- GitLab From cb8c3aa9793f09e28933e9ca9de5da91447c425b Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Wed, 19 Mar 2025 09:47:44 +0000 Subject: [PATCH 29/41] more revert division variation --- lib_com/basop_util.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lib_com/basop_util.h b/lib_com/basop_util.h index 1ef2cd8e7..a6db7dc8d 100644 --- a/lib_com/basop_util.h +++ b/lib_com/basop_util.h @@ -332,10 +332,6 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, /*!< i : Numerator*/ Word32 y, /*!< i : Denominator*/ Word16 *s ); /*!< o : Additional scalefactor difference*/ -Word32 BASOP_Util_Divide3232_Scale_cadence_1( Word32 x, /*!< i : Numerator*/ - Word32 y, /*!< i : Denominator*/ - - Word16 *s ); /*!< o : Additional scalefactor difference*/ /************************************************************************/ /*! -- GitLab From a0f0eac658a432e0bba4ec28c6db3350bdde6f7b Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Wed, 19 Mar 2025 11:51:31 +0100 Subject: [PATCH 30/41] cleanup useless speedup macros --- .../ivas_dirac_dec_binaural_functions_fx.c | 123 +++--------------- 1 file changed, 15 insertions(+), 108 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 333e7603a..8da4f82e7 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -47,22 +47,16 @@ // MHZ NUMBERS: // NULL: 178.407 -// ALL: 169.499 +// ALL: 169.499 77 (170.650 wo 17) #define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx // .4 WMOPS --> USE #define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx // .3 WMOPS --> USE -#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx // .1 WMOPS --> USE #define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx // .2 WMOPS --> USE #define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs --> USE -#define FIX_1326_SPEEDUP_09 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET -#define FIX_1326_SPEEDUP_10 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET -#define FIX_1326_SPEEDUP_11 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET #define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS --> USE -#define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd // .1 WMOPS --> USE #define FIX_1326_SPEEDUP_16 // tiny speedup like 04 // .2 WMOPS --> USE -//#define FIX_1326_SPEEDUP_17 // use 1/x // 1.25WMOPS --> USE #define FIX_1326_SPEEDUP_18 // structural speedup // 1 WMOPS --> USE Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; @@ -1333,7 +1327,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric } pop_wmops(); /*push_wmops( "IDRBCM apply EQ_low" );*/ - push_wmops( "IDRBCM target matrix" ); + push_wmops( "IDRBCM target matrix (IDRBCMtm)" ); /* Determine target covariance matrix containing target binaural properties */ FOR( bin = 0; bin < nBins; bin++ ) { @@ -1359,6 +1353,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric meanEnePerCh_fx = Mpy_32_32( hDiracDecBin->earlyPartEneCorrection_fx[bin], subFrameTotalEne_fx[bin] ); // Q( q_meanEnePerCh ) q_meanEnePerCh = add( sub( q_earlyPartEneCorrection, subFrameTotalEne_e[bin] ), 1 ); // q_earlyPartEneCorrection + 31 - subFrameTotalEne_e[bin] - 31 + Q1(0.5f) /* Determine direct part target covariance matrix (for 1 or 2 directions) */ + push_wmops( "IDRBCMtm LOOP1" ); FOR( dirIndex = 0; dirIndex < hSpatParamRendCom->numSimultaneousDirections; dirIndex++ ) { Word16 aziDeg, eleDeg; @@ -1437,6 +1432,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric diffuseness_fx = 0; move32(); } + IF( isIsmDirection ) { /* Objects cause lesser decorrelation reduction, to avoid removing all decorrelation when only objects are present */ @@ -1446,7 +1442,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric { diffusenessValForDecorrelationReduction_fx = L_sub( diffusenessValForDecorrelationReduction_fx, ratio_fx ); /*Q30*/ } - IF( separateCenterChannelRendering ) { /* In masa + mono rendering mode, the center directions originate from phantom sources, so the @@ -1493,14 +1488,12 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric Word32 hrtfEneCenter_fx, hrtfEneSides_fx, hrtfEneRealized_fx; Word16 eneCorrectionFactor_fx, eneCorrectionFactor_e; Word16 w1_fx, w2_fx, w3_fx, eq_fx; -#ifdef FIX_1326_SPEEDUP_15 - hrtfEneCenter_fx = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( rRealp_fx, rRealp_fx ), rImagp_fx, rImagp_fx ), lImagp_fx, lImagp_fx ), lRealp_fx, lRealp_fx ); // Q25 -#else + hrtfEneCenter_fx = L_add( Mpy_32_32( lRealp_fx, lRealp_fx ), // Q25 L_add( Mpy_32_32( lImagp_fx, lImagp_fx ), // Q25 L_add( Mpy_32_32( rRealp_fx, rRealp_fx ), // Q25 Mpy_32_32( rImagp_fx, rImagp_fx ) ) ) ); // Q25 -#endif + /* Spread coherence is synthesized as coherent sources at 30 degree horizontal spacing. * The following formulas determine the gains for these sources. * spreadCoh = 0: Only panning @@ -1529,14 +1522,12 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric /* Apply the gain for the left source of the three coherent sources */ getDirectPartGains_fx( bin, add( aziDeg, 30 ), eleDeg, &lRealpTmp_fx, &lImagpTmp_fx, &rRealpTmp_fx, &rImagpTmp_fx, hDiracDecBin->renderStereoOutputInsteadOfBinaural, Rmat_fx, &gainCache[gainCacheBaseIndex + 1], isHeadtracked ); -#ifdef FIX_1326_SPEEDUP_15 - hrtfEneSides_fx = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( rRealpTmp_fx, rRealpTmp_fx ), rImagpTmp_fx, rImagpTmp_fx ), lImagpTmp_fx, lImagpTmp_fx ), lRealpTmp_fx, lRealpTmp_fx ); // Q25 -#else + hrtfEneSides_fx = L_add( Mpy_32_32( lRealpTmp_fx, lRealpTmp_fx ), // Q25 L_add( Mpy_32_32( lImagpTmp_fx, lImagpTmp_fx ), // Q25 L_add( Mpy_32_32( rRealpTmp_fx, rRealpTmp_fx ), // Q25 Mpy_32_32( rImagpTmp_fx, rImagpTmp_fx ) ) ) ); // Q25 -#endif + lRealp_fx = L_add( lRealp_fx, Mpy_32_32( sidesMul_fx, lRealpTmp_fx ) ); // Q25 lImagp_fx = L_add( lImagp_fx, Mpy_32_32( sidesMul_fx, lImagpTmp_fx ) ); // Q25 rRealp_fx = L_add( rRealp_fx, Mpy_32_32( sidesMul_fx, rRealpTmp_fx ) ); // Q25 @@ -1624,21 +1615,12 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric move16(); } -#ifdef FIX_1326_SPEEDUP_15 - hrtfEne_fx[0] = Madd_32_32( Mpy_32_32( lRealp_fx, lRealp_fx ), lImagp_fx, lImagp_fx ); // Q( 2*q_lr - 31 ) - hrtfEne_fx[1] = Madd_32_32( Mpy_32_32( rRealp_fx, rRealp_fx ), rImagp_fx, rImagp_fx ); // Q( 2*q_lr - 31 ) - move32(); - move32(); - hrtfCrossRe_fx = Madd_32_32( Mpy_32_32( lRealp_fx, rRealp_fx ), lImagp_fx, rImagp_fx ); // Q( 2*q_lr - 31 ) - hrtfCrossIm_fx = Madd_32_32( Mpy_32_32( -lImagp_fx, rRealp_fx ), lRealp_fx, rImagp_fx ); // Q( 2*q_lr - 31 ) -#else hrtfEne_fx[0] = L_add( Mpy_32_32( lRealp_fx, lRealp_fx ), Mpy_32_32( lImagp_fx, lImagp_fx ) ); // Q( 2*q_lr - 31 ) hrtfEne_fx[1] = L_add( Mpy_32_32( rRealp_fx, rRealp_fx ), Mpy_32_32( rImagp_fx, rImagp_fx ) ); // Q( 2*q_lr - 31 ) move32(); move32(); hrtfCrossRe_fx = L_add( Mpy_32_32( lRealp_fx, rRealp_fx ), Mpy_32_32( lImagp_fx, rImagp_fx ) ); // Q( 2*q_lr - 31 ) hrtfCrossIm_fx = L_add( Mpy_32_32( -lImagp_fx, rRealp_fx ), Mpy_32_32( lRealp_fx, rImagp_fx ) ); // Q( 2*q_lr - 31 ) -#endif /* Add direct part (1 or 2) covariance matrix */ dirEne_fx = Mpy_32_32( ratio_fx, meanEnePerCh_fx ); // Q(q_meanEnePerCh - 1) @@ -1655,6 +1637,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric move32(); move32(); } + pop_wmops(); //push_wmops( "IDRBCMtm LOOP1" ); /* Add diffuse / ambient part covariance matrix */ diffuseness_fx = L_max( 0, diffuseness_fx ); // Q30 @@ -1713,11 +1696,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric } ELSE { -#ifdef FIX_1326_SPEEDUP_15 - hDiracDecBin->ChCrossReOut_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossReOut_fx[bin], hDiracDecBin->ChCrossReOut_e[bin], Mpy_32_32( Madd_32_16( L_shl( surCoh_fx, 16 ), hDiracDecBin->diffuseFieldCoherence_fx[bin], sub( 32767, surCoh_fx ) ), diffEne_fx ), sub( 31, q_diffEne ), &hDiracDecBin->ChCrossReOut_e[bin] ); -#else hDiracDecBin->ChCrossReOut_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossReOut_fx[bin], hDiracDecBin->ChCrossReOut_e[bin], Mpy_32_32( L_add( Mpy_32_16_1( hDiracDecBin->diffuseFieldCoherence_fx[bin], sub( 32767, surCoh_fx ) ), L_shl( surCoh_fx, 16 ) ), diffEne_fx ), sub( 31, q_diffEne ), &hDiracDecBin->ChCrossReOut_e[bin] ); -#endif } move32(); } @@ -1733,7 +1712,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric hDiracDecBin->frameMeanDiffuseness_fx[bin] = L_shl( frameMeanDiffuseness, sub( exp, 2 ) ); // Q29 move32(); } - pop_wmops(); /*push_wmops( "IDRBCM target matrix" );*/ + pop_wmops(); /*push_wmops( "IDRBCM target matrix (IDRBCMtm)" );;*/ test(); /* Temporal IIR-type smoothing of covariance matrices. Also apply encoding quality based smoothing factor. */ @@ -2183,14 +2162,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( exp = sub( get_min_scalefactor( resultMtxRe_fx[0][0], resultMtxRe_fx[1][1] ), 2 ); tmp2 = L_add( L_shl( resultMtxRe_fx[0][0], exp ), L_shl( resultMtxRe_fx[1][1], exp ) ); q_tmp2 = add( q_res, exp ); -#ifdef FIX_1326_SPEEDUP_11 - { - Word16 shift1 = s_max( 0, sub( q_tmp2, q_CrEne ) ); - Word16 shift2 = s_max( 0, sub( q_CrEne, q_tmp2 ) ); - realizedOutputEne_fx = L_add( L_shr( tmp1, shift2 ), L_shr( tmp2, shift1 ) ); - q_realizedOutputEne = s_min( q_CrEne, q_tmp2 ); - } -#else + IF( LT_16( q_CrEne, q_tmp2 ) ) { realizedOutputEne_fx = L_add( tmp1, L_shr( tmp2, sub( q_tmp2, q_CrEne ) ) ); @@ -2203,7 +2175,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( q_realizedOutputEne = q_tmp2; move16(); } -#endif + exp = sub( get_min_scalefactor( hDiracDecBin->ChEneOut_fx[0][bin], hDiracDecBin->ChEneOut_fx[1][bin] ), 1 ); targetOutputEne_fx = L_add( L_shl( hDiracDecBin->ChEneOut_fx[0][bin], exp ), L_shl( hDiracDecBin->ChEneOut_fx[1][bin], exp ) ); q_targetOutputEne = add( hDiracDecBin->q_ChEneOut, exp ); @@ -3373,21 +3345,7 @@ static void eig2x2_fx( #endif /* Numeric case, when input is near an identity matrix with a gain */ -#ifdef FIX_1326_SPEEDUP_03 - tmp1 = Mpy_32_32( 2147484, add_fx ); // 2147484 = 1e-3f in Q31 - IF( LT_32( pm_fx, L_shl_sat( tmp1, sub( q_tmp1, q_tmp2 ) ) ) ) - { - Ure_fx[0][0] = ONE_IN_Q30; - move32(); - Ure_fx[1][1] = ONE_IN_Q30; - move32(); - *q_U = Q30; - move16(); - - return; - } -#else tmp1 = Mpy_32_32( 2147484, add_fx ); // 2147484 = 1e-3f in Q31 IF( LT_16( q_tmp1, q_tmp2 ) ) @@ -3418,7 +3376,6 @@ static void eig2x2_fx( return; } } -#endif q_U_1 = 0; q_U_2 = 0; @@ -3472,11 +3429,7 @@ static void eig2x2_fx( #endif #if 1 -#ifdef FIX_1326_SPEEDUP_17 - tmp2 = BASOP_Util_Divide3232_Scale_cadence_1( ONE_IN_Q30, tmp3, &exp ); -#else tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp ); -#endif exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) ); normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2 q_tmp2 = sub( 31, exp ); @@ -3564,11 +3517,7 @@ static void eig2x2_fx( #endif #if 1 -#ifdef FIX_1326_SPEEDUP_17 - tmp2 = BASOP_Util_Divide3232_Scale_cadence_1( ONE_IN_Q30, tmp3, &exp ); -#else tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp ); -#endif exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) ); normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2 q_tmp2 = sub( 31, exp ); @@ -4531,13 +4480,7 @@ static void formulate2x2MixingMatrix_fx( } ELSE { - push_wmops( "formulate2x2MixingMatrix Division" ); -#ifdef FIX_1326_SPEEDUP_17 - maxEneDiv_fx = BASOP_Util_Divide3232_Scale_cadence_1( ONE_IN_Q30, maxEne_fx, &exp ); -#else maxEneDiv_fx = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, maxEne_fx, &exp ); -#endif - pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/ q_maxEneDiv = add( sub( 31, exp ), sub( Q30, q_maxEne ) ); } exp = norm_l( maxEneDiv_fx ); @@ -4620,9 +4563,7 @@ static void formulate2x2MixingMatrix_fx( ELSE { temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); - push_wmops( "formulate2x2MixingMatrix Division" ); temp = BASOP_Util_Divide3232_Scale_cadence( E_out1, temp, &exp ); - pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/ exp = sub( exp, sub( q_eout, sub( 31, exp_temp ) ) ); #ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp @@ -4664,9 +4605,7 @@ static void formulate2x2MixingMatrix_fx( a++; } temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); - push_wmops( "formulate2x2MixingMatrix Division" ); temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, temp, &exp1 ); - pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/ exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) ); #ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 @@ -4797,6 +4736,7 @@ static void formulate2x2MixingMatrix_fx( #endif pop_wmops(); /*push_wmops( "oPtoA MT1M" );*/ + IF( D_fx[0] == 0 ) { #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC @@ -4804,20 +4744,15 @@ static void formulate2x2MixingMatrix_fx( exp = sub( exp, sub( Q30, 62 ) ); #else temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */ + move32(); exp = ONE_DIV_EPSILON_EXP; + move16(); #endif } ELSE { - push_wmops( "formulate2x2MixingMatrix Division" ); -#ifdef FIX_1326_SPEEDUP_17 - temp = BASOP_Util_Divide3232_Scale_cadence_1( ONE_IN_Q30, D_fx[0], &exp ); -#else - temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp ); -#endif exp = sub( exp, sub( Q30, q_D ) ); - pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/ } div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp move32(); @@ -4848,13 +4783,7 @@ static void formulate2x2MixingMatrix_fx( } ELSE { - push_wmops( "formulate2x2MixingMatrix Division" ); -#ifdef FIX_1326_SPEEDUP_17 - temp = BASOP_Util_Divide3232_Scale_cadence_1( ONE_IN_Q30, D_fx[1], &exp1 ); -#else temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[1], &exp1 ); -#endif - pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/ exp1 = sub( exp1, sub( Q30, q_D ) ); } div_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 @@ -4869,17 +4798,6 @@ static void formulate2x2MixingMatrix_fx( // 1310720000 = 10,000.0f in Q17 -#ifdef FIX_1326_SPEEDUP_09 - { - Word16 shift1 = s_max( sub( Q17, q_div ), 0 ); - Word16 shift2 = s_max( sub( q_div, Q17 ), 0 ); - - div_fx[0] = L_min( L_shr( 1310720000, shift1 ), L_shr( div_fx[0], shift2 ) ); // q_div - move32(); - div_fx[1] = L_min( L_shr( 1310720000, shift1 ), L_shr( div_fx[1], shift2 ) ); // q_div - move32(); - } -#else IF( LT_16( q_div, Q17 ) ) { div_fx[0] = L_min( L_shr( 1310720000, sub( Q17, q_div ) ), div_fx[0] ); // q_div @@ -4896,7 +4814,6 @@ static void formulate2x2MixingMatrix_fx( q_div = Q17; move16(); } -#endif matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp ); @@ -4913,11 +4830,7 @@ static void formulate2x2MixingMatrix_fx( W_tmp = W_mult0_32_32( tmpRe_fx[chA][chB], div_fx[chB] ); IF( W_tmp != 0 ) { -#ifdef FIX_1326_SPEEDUP_10 - hdrm_re[chA][chB] = W_norm( W_tmp ); -#else hdrm_re[chA][chB] = sub( W_norm( W_tmp ), 0 ); -#endif move16(); W_tmp = W_shl( W_tmp, hdrm_re[chA][chB] ); tmpRe_fx[chA][chB] = W_extract_h( W_tmp ); @@ -5062,13 +4975,7 @@ static void formulate2x2MixingMatrix_fx( { Word16 Pre_shift, Pim_shift; temp = BASOP_Util_Add_Mant32Exp( Sx_fx[chB], sub( 31, q_Sx ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); - push_wmops( "formulate2x2MixingMatrix Division" ); -#ifdef FIX_1326_SPEEDUP_17 - temp = BASOP_Util_Divide3232_Scale_cadence_1( ONE_IN_Q30, temp, &exp ); -#else temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, temp, &exp ); -#endif - pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/ q_temp = add( sub( sub( q_P, exp ), sub( 31, Q30 ) ), exp_temp ); Pre_shift = norm_l( Pre_fx[0][chB] ); -- GitLab From f97ec39f828d4e605dcca2ed09a3bf3d3cce0f44 Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Wed, 19 Mar 2025 11:54:30 +0100 Subject: [PATCH 31/41] apply clang format patch --- .../ivas_dirac_dec_binaural_functions_fx.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 8da4f82e7..101f76a37 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -1489,10 +1489,10 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric Word16 eneCorrectionFactor_fx, eneCorrectionFactor_e; Word16 w1_fx, w2_fx, w3_fx, eq_fx; - hrtfEneCenter_fx = L_add( Mpy_32_32( lRealp_fx, lRealp_fx ), // Q25 - L_add( Mpy_32_32( lImagp_fx, lImagp_fx ), // Q25 - L_add( Mpy_32_32( rRealp_fx, rRealp_fx ), // Q25 - Mpy_32_32( rImagp_fx, rImagp_fx ) ) ) ); // Q25 + hrtfEneCenter_fx = L_add( Mpy_32_32( lRealp_fx, lRealp_fx ), // Q25 + L_add( Mpy_32_32( lImagp_fx, lImagp_fx ), // Q25 + L_add( Mpy_32_32( rRealp_fx, rRealp_fx ), // Q25 + Mpy_32_32( rImagp_fx, rImagp_fx ) ) ) ); // Q25 /* Spread coherence is synthesized as coherent sources at 30 degree horizontal spacing. * The following formulas determine the gains for these sources. @@ -1523,10 +1523,10 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric /* Apply the gain for the left source of the three coherent sources */ getDirectPartGains_fx( bin, add( aziDeg, 30 ), eleDeg, &lRealpTmp_fx, &lImagpTmp_fx, &rRealpTmp_fx, &rImagpTmp_fx, hDiracDecBin->renderStereoOutputInsteadOfBinaural, Rmat_fx, &gainCache[gainCacheBaseIndex + 1], isHeadtracked ); - hrtfEneSides_fx = L_add( Mpy_32_32( lRealpTmp_fx, lRealpTmp_fx ), // Q25 - L_add( Mpy_32_32( lImagpTmp_fx, lImagpTmp_fx ), // Q25 - L_add( Mpy_32_32( rRealpTmp_fx, rRealpTmp_fx ), // Q25 - Mpy_32_32( rImagpTmp_fx, rImagpTmp_fx ) ) ) ); // Q25 + hrtfEneSides_fx = L_add( Mpy_32_32( lRealpTmp_fx, lRealpTmp_fx ), // Q25 + L_add( Mpy_32_32( lImagpTmp_fx, lImagpTmp_fx ), // Q25 + L_add( Mpy_32_32( rRealpTmp_fx, rRealpTmp_fx ), // Q25 + Mpy_32_32( rImagpTmp_fx, rImagpTmp_fx ) ) ) ); // Q25 lRealp_fx = L_add( lRealp_fx, Mpy_32_32( sidesMul_fx, lRealpTmp_fx ) ); // Q25 lImagp_fx = L_add( lImagp_fx, Mpy_32_32( sidesMul_fx, lImagpTmp_fx ) ); // Q25 @@ -1637,7 +1637,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric move32(); move32(); } - pop_wmops(); //push_wmops( "IDRBCMtm LOOP1" ); + pop_wmops(); // push_wmops( "IDRBCMtm LOOP1" ); /* Add diffuse / ambient part covariance matrix */ diffuseness_fx = L_max( 0, diffuseness_fx ); // Q30 -- GitLab From 1d475785f8351fd7f3fa6f8021878273a945563e Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Wed, 19 Mar 2025 12:00:04 +0100 Subject: [PATCH 32/41] deactivate all speedups --- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 101f76a37..4a55a37cc 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -50,14 +50,14 @@ // ALL: 169.499 77 (170.650 wo 17) -#define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx // .4 WMOPS --> USE -#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx // .3 WMOPS --> USE -#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx // .2 WMOPS --> USE -#define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs --> USE -#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS --> USE - -#define FIX_1326_SPEEDUP_16 // tiny speedup like 04 // .2 WMOPS --> USE -#define FIX_1326_SPEEDUP_18 // structural speedup // 1 WMOPS --> USE +//#define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx // .4 WMOPS --> USE +//#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx // .3 WMOPS --> USE +//#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx // .2 WMOPS --> USE +//#define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs --> USE +//#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS --> USE + +//#define FIX_1326_SPEEDUP_16 // tiny speedup like 04 // .2 WMOPS --> USE +//#define FIX_1326_SPEEDUP_18 // structural speedup // 1 WMOPS --> USE Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; -- GitLab From cc08c7185e22852fd126643e5ba085e19571e393 Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Wed, 19 Mar 2025 12:16:25 +0000 Subject: [PATCH 33/41] cleaup a bit --- .../ivas_dirac_dec_binaural_functions_fx.c | 28 ++++++++----------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 4a55a37cc..ce36e0ae3 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -1432,7 +1432,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric diffuseness_fx = 0; move32(); } - IF( isIsmDirection ) { /* Objects cause lesser decorrelation reduction, to avoid removing all decorrelation when only objects are present */ @@ -1442,6 +1441,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric { diffusenessValForDecorrelationReduction_fx = L_sub( diffusenessValForDecorrelationReduction_fx, ratio_fx ); /*Q30*/ } + IF( separateCenterChannelRendering ) { /* In masa + mono rendering mode, the center directions originate from phantom sources, so the @@ -1527,11 +1527,10 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric L_add( Mpy_32_32( lImagpTmp_fx, lImagpTmp_fx ), // Q25 L_add( Mpy_32_32( rRealpTmp_fx, rRealpTmp_fx ), // Q25 Mpy_32_32( rImagpTmp_fx, rImagpTmp_fx ) ) ) ); // Q25 - - lRealp_fx = L_add( lRealp_fx, Mpy_32_32( sidesMul_fx, lRealpTmp_fx ) ); // Q25 - lImagp_fx = L_add( lImagp_fx, Mpy_32_32( sidesMul_fx, lImagpTmp_fx ) ); // Q25 - rRealp_fx = L_add( rRealp_fx, Mpy_32_32( sidesMul_fx, rRealpTmp_fx ) ); // Q25 - rImagp_fx = L_add( rImagp_fx, Mpy_32_32( sidesMul_fx, rImagpTmp_fx ) ); // Q25 + lRealp_fx = L_add( lRealp_fx, Mpy_32_32( sidesMul_fx, lRealpTmp_fx ) ); // Q25 + lImagp_fx = L_add( lImagp_fx, Mpy_32_32( sidesMul_fx, lImagpTmp_fx ) ); // Q25 + rRealp_fx = L_add( rRealp_fx, Mpy_32_32( sidesMul_fx, rRealpTmp_fx ) ); // Q25 + rImagp_fx = L_add( rImagp_fx, Mpy_32_32( sidesMul_fx, rImagpTmp_fx ) ); // Q25 /* Apply the gain for the right source of the three coherent sources. * -30 degrees to 330 wrapping due to internal functions. */ @@ -2162,7 +2161,6 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( exp = sub( get_min_scalefactor( resultMtxRe_fx[0][0], resultMtxRe_fx[1][1] ), 2 ); tmp2 = L_add( L_shl( resultMtxRe_fx[0][0], exp ), L_shl( resultMtxRe_fx[1][1], exp ) ); q_tmp2 = add( q_res, exp ); - IF( LT_16( q_CrEne, q_tmp2 ) ) { realizedOutputEne_fx = L_add( tmp1, L_shr( tmp2, sub( q_tmp2, q_CrEne ) ) ); @@ -2198,6 +2196,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( q_missingOutputEne = q_targetOutputEne; move16(); } + tmp1 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), missingOutputEne_fx, sub( 31, q_missingOutputEne ), &exp1 ); #ifdef FIX_1326_SPEEDUP_13 @@ -2258,7 +2257,6 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( q_processMtxDec_bin = q_processMtxDec[bin]; move16(); move16(); - /* Store processing matrices */ FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { @@ -3216,6 +3214,7 @@ static void eig2x2_fx( a_fx = (e1 + e2) * (e1 + e2) - 4.0f * ((e1 * e2) - crossSquare_fx) = (e1 - e2)^2 + 4 * crossSquare_fx pm_fx = 0.5f * sqrtf(max(0.0f, a_fx)) add_fx = 0.5f * (e1 + e2)*/ + IF( L_and( c_re == 0, c_im == 0 ) ) { /* if c_re = 0 and c_im = 0, then crossSquare_fx = (c_re * c_re) + (c_im * c_im) = 0 @@ -3345,7 +3344,6 @@ static void eig2x2_fx( #endif /* Numeric case, when input is near an identity matrix with a gain */ - tmp1 = Mpy_32_32( 2147484, add_fx ); // 2147484 = 1e-3f in Q31 IF( LT_16( q_tmp1, q_tmp2 ) ) @@ -3425,6 +3423,7 @@ static void eig2x2_fx( #else tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 ); q_tmp2 = sub( 31, q_tmp2 ); + tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); #endif @@ -3513,6 +3512,7 @@ static void eig2x2_fx( #else tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 ); q_tmp2 = sub( 31, q_tmp2 ); + tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); #endif @@ -4563,13 +4563,13 @@ static void formulate2x2MixingMatrix_fx( ELSE { temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); + temp = BASOP_Util_Divide3232_Scale_cadence( E_out1, temp, &exp ); exp = sub( exp, sub( q_eout, sub( 31, exp_temp ) ) ); #ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp #endif } - #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp #endif @@ -4599,19 +4599,14 @@ static void formulate2x2MixingMatrix_fx( } ELSE { - if ( E_out2 == 0 ) - { - static int a = 0; - a++; - } temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); + temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, temp, &exp1 ); exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) ); #ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 #endif } - #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 #endif @@ -4796,7 +4791,6 @@ static void formulate2x2MixingMatrix_fx( div_fx[1] = L_shr( div_fx[1], sub( sub( 31, exp1 ), q_div ) ); // q_div move32(); - // 1310720000 = 10,000.0f in Q17 IF( LT_16( q_div, Q17 ) ) { -- GitLab From 4f33c171f1892b375ef96274ab4ada09801f2509 Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Wed, 19 Mar 2025 13:54:05 +0100 Subject: [PATCH 34/41] activate the big chunks - SPEEDUP 8, 13 --- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index ce36e0ae3..f63f27eb0 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -53,8 +53,8 @@ //#define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx // .4 WMOPS --> USE //#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx // .3 WMOPS --> USE //#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx // .2 WMOPS --> USE -//#define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs --> USE -//#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS --> USE +#define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs --> USE +#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS --> USE //#define FIX_1326_SPEEDUP_16 // tiny speedup like 04 // .2 WMOPS --> USE //#define FIX_1326_SPEEDUP_18 // structural speedup // 1 WMOPS --> USE -- GitLab From 4c1d9288ffd0180ca25a2b0f083d3adf4770accb Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Wed, 19 Mar 2025 14:32:17 +0100 Subject: [PATCH 35/41] activated spedups 1 , 2, 4, 16, 18 --- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index f63f27eb0..7dee99218 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -50,14 +50,14 @@ // ALL: 169.499 77 (170.650 wo 17) -//#define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx // .4 WMOPS --> USE -//#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx // .3 WMOPS --> USE -//#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx // .2 WMOPS --> USE +#define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx // .4 WMOPS --> USE +#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx // .3 WMOPS --> USE +#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx // .2 WMOPS --> USE #define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs --> USE #define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS --> USE -//#define FIX_1326_SPEEDUP_16 // tiny speedup like 04 // .2 WMOPS --> USE -//#define FIX_1326_SPEEDUP_18 // structural speedup // 1 WMOPS --> USE +#define FIX_1326_SPEEDUP_16 // tiny speedup like 04 // .2 WMOPS --> USE +#define FIX_1326_SPEEDUP_18 // structural speedup // 1 WMOPS --> USE Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; @@ -4618,7 +4618,7 @@ static void formulate2x2MixingMatrix_fx( move32(); Ghat_fx[1] = L_shr( Ghat_fx[1], sub( sub( 31, exp1 ), q_Ghat ) ); // q_Ghat move32(); - pop_wmops(); + pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix RegSMInv" );*/ /* Matrix multiplication, tmp = Ky' * G_hat * Q */ FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) -- GitLab From 9c07d08c9a8733f22592fb9146b9563e089f45c9 Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Wed, 19 Mar 2025 15:56:56 +0100 Subject: [PATCH 36/41] rename optimiztion macros, move macros to options.h --- lib_com/options.h | 3 + .../ivas_dirac_dec_binaural_functions_fx.c | 140 +----------------- 2 files changed, 11 insertions(+), 132 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index ccaeca46a..ab7efb8fd 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -176,4 +176,7 @@ #define FIX_ISSUE_1376 /* VA: Fix for issue 1376 (issue with GSC excitation) */ #define OPT_SBA_AVOID_SPAR_RESCALE /* Optimization made to spar decoder and IGF */ #define NONBE_FIX_1386_STEREO_DMX_EVS_PHA /* Orange: Fix for stereo DMX / PHA mode : Change the filter taps resolution (Q31->Q30), improve precision for the IR window, for the ILD & IPD smoothing in sub-bands, for the ISD counters and for ICCr. */ +#define FIX_1326_SUBSTITUTE_CMPMANT32EXP /* FhG: Minor WMOPS tuning*/ +#define FIX_1326_SUBSTITUTE_DIV_SQRT_IOSQRT /* FhG: WMOPS tuning */ +#define FIX_1326_SPEEDUP_eig2x2_fx /* FhG: Minor WMOPS tuning*/ #endif diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 7dee99218..7f2c6b4d4 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -46,18 +46,6 @@ #include "wmc_auto.h" // MHZ NUMBERS: -// NULL: 178.407 -// ALL: 169.499 77 (170.650 wo 17) - - -#define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx // .4 WMOPS --> USE -#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx // .3 WMOPS --> USE -#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx // .2 WMOPS --> USE -#define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs --> USE -#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS --> USE - -#define FIX_1326_SPEEDUP_16 // tiny speedup like 04 // .2 WMOPS --> USE -#define FIX_1326_SPEEDUP_18 // structural speedup // 1 WMOPS --> USE Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; @@ -1892,11 +1880,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( move16(); Word32 tmp1, tmp2, res1, res2; Word16 q_tmp1, q_tmp2, q_realizedOutputEne, q_targetOutputEne, q_missingOutputEne, q_gain; -#ifdef FIX_1326_SPEEDUP_13 - Word16 exp1, q_processMtx_bin, q_processMtxDec_bin; -#else Word16 exp1, exp2, q_processMtx_bin, q_processMtxDec_bin; -#endif CrEneL_fx = 0; move32(); @@ -2199,13 +2183,12 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( tmp1 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), missingOutputEne_fx, sub( 31, q_missingOutputEne ), &exp1 ); -#ifdef FIX_1326_SPEEDUP_13 +#ifdef FIX_1326_SUBSTITUTE_DIV_SQRT_IOSQRT { - Word16 exp_temp; - tmp2 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); - tmp2 = ISqrt32( tmp2, &exp_temp ); + tmp2 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), EPSILON_MANT, EPSILON_EXP, &exp2 ); + tmp2 = ISqrt32( tmp2, &exp2 ); gain_fx = Mpy_32_32( tmp2, Sqrt32( tmp1, &exp1 ) ); - q_gain = sub( 31, add( exp_temp, exp1 ) ); + q_gain = sub( 31, add( exp2, exp1 ) ); } #else { @@ -3317,7 +3300,7 @@ static void eig2x2_fx( /* Numeric case, when input is practically zeros */ // IF( D_fx[0] < EPSILON_FX ) -#ifdef FIX_1326_SPEEDUP_02 +#ifdef FIX_1326_SUBSTITUTE_CMPMANT32EXP IF( LT_32( L_shl_sat( D_fx[0], sub( sub( 31, *q_D ), EPSILON_EXP ) ), EPSILON_MANT ) ) { Ure_fx[0][0] = ONE_IN_Q31; @@ -3406,27 +3389,10 @@ static void eig2x2_fx( tmp2 = Mpy_32_32( s_fx, s_fx ); q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 ); - -#ifdef FIX_1326_SPEEDUP_16 - - { - Word16 tmp2_exp; - Word32 eps_tmp; - tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &tmp2_exp ); - - // Add epsilon if relevant - eps_tmp = L_shl_sat( epsilon_mant, sub( epsilon_exp, tmp2_exp ) ); - tmp3 = L_add( L_shr( tmp2, 1 ), L_shr( eps_tmp, 1 ) ); - - exp_tmp3 = add( tmp2_exp, 1 ); - } -#else tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 ); q_tmp2 = sub( 31, q_tmp2 ); tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); -#endif - #if 1 tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp ); exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) ); @@ -3499,22 +3465,10 @@ static void eig2x2_fx( q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 ); -#ifdef FIX_1326_SPEEDUP_04 - Word16 exp_tmp2; - Word32 eps_tmp; - - tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &exp_tmp2 ); - eps_tmp = L_shl_sat( epsilon_mant, sub( epsilon_exp, exp_tmp2 ) ); - - tmp3 = L_add( L_shr( tmp2, 1 ), L_shr( eps_tmp, 1 ) ); // Add Epsilon if relevant - - exp_tmp3 = add( exp_tmp2, 1 ); -#else tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 ); q_tmp2 = sub( 31, q_tmp2 ); tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); -#endif #if 1 tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp ); @@ -3815,7 +3769,6 @@ static void matrixMul_fx( return; } -#ifndef FIX_1326_SPEEDUP_01 static void matrixTransp1Mul_fx( Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ @@ -3929,7 +3882,6 @@ static void matrixTransp1Mul_fx( return; } -#endif /*FIX_1326_SPEEDUP_01*/ static void matrixTransp2Mul_fx( Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ @@ -4655,80 +4607,10 @@ static void formulate2x2MixingMatrix_fx( /* Find nearest orthonormal matrix P to A = Ky' * G_hat * Q * Kx For matrix A that is P = A(A'A)^0.5 */ push_wmops( "oPtoA MT1M" ); -#ifdef FIX_1326_SPEEDUP_01 - // matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp ); - - { - // Word16 chA, chB; - { - chA = 0, chB = 0; - tmpRe_fx[0][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Are_fx[0][0] ), - Are_fx[1][0], Are_fx[1][0] ), - Aim_fx[0][0], Aim_fx[0][0] ), - Aim_fx[1][0], Aim_fx[1][0] ); - move32(); - } - { - // chA = 0, chB = 1; - tmpRe_fx[1][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Are_fx[0][0] ), - Are_fx[1][1], Are_fx[1][0] ), - Aim_fx[0][1], Aim_fx[0][0] ), - Aim_fx[1][1], Aim_fx[1][0] ); - move32(); - tmpIm_fx[1][0] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Aim_fx[0][0] ), - Are_fx[1][1], Aim_fx[1][0] ), - Aim_fx[0][1], Are_fx[0][0] ), - Aim_fx[1][1], Are_fx[1][0] ); - move32(); - } - { - // chA = 1, chB = 0; - tmpRe_fx[1][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Are_fx[0][1] ), - Are_fx[1][1], Are_fx[1][1] ), - Aim_fx[0][1], Aim_fx[0][1] ), - Aim_fx[1][1], Aim_fx[1][1] ); - move32(); - } - { - // chA = 1, chB = 1; - } - - q_temp = sub( add( q_A, q_A ), 31 ); - - move16(); - Word16 ZeroState = add( 1, 0 ); - if ( tmpRe_fx[0][0] != 0 ) - { - ZeroState = add( 0, 0 ); - } - if ( tmpRe_fx[1][1] != 0 ) - { - ZeroState = add( 0, 0 ); - } - if ( tmpRe_fx[1][0] != 0 ) - { - ZeroState = add( 0, 0 ); - } - if ( tmpIm_fx[1][0] != 0 ) - { - ZeroState = add( 0, 0 ); - } - - if ( sub( ZeroState, 1 ) == 0 ) - { - q_temp = Q31; - move16(); - } - } - - - eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D ); -#else matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp ); eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D ); -#endif pop_wmops(); /*push_wmops( "oPtoA MT1M" );*/ @@ -4753,11 +4635,9 @@ static void formulate2x2MixingMatrix_fx( move32(); #ifdef FIX_1326_SPEEDUP_08 - // This is just a shortcut to already existing optimizations (FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC) - but makes everything even faster - { - div_fx[1] = L_add( 0, 2047986068 ); // Q = 31 - exp1 - exp1 = add( 0, 20 ); // move32(); - } + //Sqrt(1) + div_fx[1] = L_add( 0, 2047986068 ); // Q = 31 - exp1 + exp1 = add( 0, 20 ); IF( D_fx[1] != 0 ) // This is the new code: replace div sqrt by isqrt { @@ -4841,11 +4721,7 @@ static void formulate2x2MixingMatrix_fx( W_tmp = W_mult0_32_32( tmpIm_fx[chA][chB], div_fx[chB] ); IF( W_tmp != 0 ) { -#ifdef FIX_1326_SPEEDUP_10 - hdrm_im[chA][chB] = W_norm( W_tmp ); -#else hdrm_im[chA][chB] = sub( W_norm( W_tmp ), 0 ); -#endif move16(); W_tmp = W_shl( W_tmp, hdrm_im[chA][chB] ); tmpIm_fx[chA][chB] = W_extract_h( W_tmp ); -- GitLab From 596a724fac86c406dc66f1a2353bddeaff67a42f Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Wed, 19 Mar 2025 15:06:07 +0000 Subject: [PATCH 37/41] fix: rename some mocros Cleanup: push/pop wmops --- .../ivas_dirac_dec_binaural_functions_fx.c | 67 ++----------------- 1 file changed, 6 insertions(+), 61 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 7f2c6b4d4..e8052b08c 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -45,8 +45,6 @@ #include "wmc_auto.h" -// MHZ NUMBERS: - Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; /*------------------------------------------------------------------------- @@ -506,9 +504,8 @@ void ivas_dirac_dec_binaural_render_fx( FOR( subframe_idx = first_sf; subframe_idx < last_sf; subframe_idx++ ) { Word16 n_samples_sf = imult1616( slot_size, hSpatParamRendCom->subframe_nbslots[subframe_idx] ); - push_wmops( "IDR binaural internal (IDRBI)" ); ivas_dirac_dec_binaural_internal_fx( st_ivas, st_ivas->hCombinedOrientationData, output_fx_local, nchan_transport, subframe_idx ); - pop_wmops(); /*push_wmops( "IDR binaural internal (IDRBI)" );*/ + FOR( ch = 0; ch < nchan_out; ch++ ) { output_fx_local[ch] += n_samples_sf; @@ -711,7 +708,6 @@ static void ivas_dirac_dec_binaural_internal_fx( } } /* CLDFB Analysis of input */ - push_wmops( "IDRBI CLDFB ANALYSYS" ); FOR( slot = 0; slot < hSpatParamRendCom->subframe_nbslots[subframe]; slot++ ) { FOR( ch = 0; ch < numInChannels; ch++ ) @@ -861,7 +857,6 @@ static void ivas_dirac_dec_binaural_internal_fx( } } } - pop_wmops(); /*push_wmops( "IDRBI CLDFB ANALYSYS" );*/ test(); IF( EQ_32( config_data.ivas_format, SBA_FORMAT ) || EQ_32( config_data.ivas_format, SBA_ISM_FORMAT ) ) @@ -926,9 +921,7 @@ static void ivas_dirac_dec_binaural_internal_fx( } test(); - push_wmops( "IDRBI cov matrices (IDRBCM)" ); ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matrices_fx( hDiracDecBin, hSpatParamRendCom, &config_data, Cldfb_RealBuffer_in_fx, Cldfb_ImagBuffer_in_fx, Rmat_fx, subframe, hCombinedOrientationData && hCombinedOrientationData->enableCombinedOrientation[hCombinedOrientationData->subframe_idx] > 0, st_ivas->hMasaIsmData, q_inp ); - pop_wmops(); /*push_wmops( "IDRBI cov matrices (IDRBCM)" );*/ IF( EQ_32( config_data.ivas_format, ISM_FORMAT ) ) { @@ -966,9 +959,7 @@ static void ivas_dirac_dec_binaural_internal_fx( move16(); } - push_wmops( "IDRBI proc matrices (IRDBI pm)" ); ivas_dirac_dec_binaural_determine_processing_matrices_fx( hDiracDecBin, hSpatParamRendCom, &config_data, max_band_decorr, Rmat_fx, subframe, hCombinedOrientationData && hCombinedOrientationData->enableCombinedOrientation[hCombinedOrientationData->subframe_idx] > 0, nchanSeparateChannels, st_ivas->hMasaIsmData ); - pop_wmops(); /*push_wmops( "IDRBI proc matrices (IRDBI pm)" );*/ q_inp = Q6; move16(); @@ -1014,10 +1005,8 @@ static void ivas_dirac_dec_binaural_internal_fx( hDiracDecBin->q_processMtxDecPrev = q_mat; move16(); - push_wmops( "IDRBI processOutput" ); ivas_dirac_dec_binaural_process_output_fx( hDiracDecBin, hSpatParamRendCom, st_ivas->cldfbSynDec, output_fx, &q_out, Cldfb_RealBuffer_in_fx, Cldfb_ImagBuffer_in_fx, q_inp, max_band_decorr, numInChannels, config_data.processReverb, subframe, q_mat ); - pop_wmops(); /*push_wmops( "IDRBI processOutput" ); - */ + hDiracDecBin->hDiffuseDist = NULL; hSpatParamRendCom->slots_rendered = add( hSpatParamRendCom->slots_rendered, hSpatParamRendCom->subframe_nbslots[subframe] ); @@ -1151,7 +1140,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric nBins = hSpatParamRendCom->num_freq_bands; /* Actually bins */ move16(); - push_wmops( "IDRBCM inits" ); + q_earlyPartEneCorrection = s_min( Q31, add( getScaleFactor32( hDiracDecBin->earlyPartEneCorrection_fx, nBins ), hDiracDecBin->q_earlyPartEneCorrection ) ); scale_sig32( hDiracDecBin->earlyPartEneCorrection_fx, nBins, sub( q_earlyPartEneCorrection, hDiracDecBin->q_earlyPartEneCorrection ) ); hDiracDecBin->q_earlyPartEneCorrection = q_earlyPartEneCorrection; @@ -1185,7 +1174,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric gainCache[idx].azi = -1000; /* Use -1000 as value for uninitialized cache. */ move16(); } - pop_wmops(); /*push_wmops( "IDRBCM inits" );*/ /* Determine EQ for low bit rates (13.2 and 16.4 kbps) */ applyLowBitRateEQ = 0; @@ -1198,13 +1186,11 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric move16(); IF( EQ_32( ivas_total_brate, IVAS_16k4 ) ) { - push_wmops( "IDRBCM Determine EQ_low_rates" ); FOR( bin = 0; bin < LOW_BIT_RATE_BINAURAL_EQ_BINS; bin++ ) { lowBitRateEQ_fx[bin + LOW_BIT_RATE_BINAURAL_EQ_OFFSET] = L_add( L_shr( lowBitRateBinauralEQ_fx[bin], 1 ), ONE_IN_Q30 ); // Q31 move32(); } - pop_wmops(); /*push_wmops( "IDRBCM Determine EQ_low_rates" );*/ } ELSE { @@ -1223,7 +1209,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric exp = sub( 63, shl( q, 1 ) ); // exp for the energy (inRe_fx * inRe_fx + inIm_fx * inIm_fx) computed below - push_wmops( "IDRBCM input Matrix" ); /* Calculate input covariance matrix */ FOR( slot = 0; slot < hSpatParamRendCom->subframe_nbslots[subframe]; slot++ ) { @@ -1258,9 +1243,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric move32(); } } - pop_wmops(); /*push_wmops( "IDRBCM input Matrix" );*/ - push_wmops( "IDRBCM apply EQ_low" ); /* Apply EQ at low bit rates */ IF( applyLowBitRateEQ != 0 ) { @@ -1313,9 +1296,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric } } } - pop_wmops(); /*push_wmops( "IDRBCM apply EQ_low" );*/ - push_wmops( "IDRBCM target matrix (IDRBCMtm)" ); /* Determine target covariance matrix containing target binaural properties */ FOR( bin = 0; bin < nBins; bin++ ) { @@ -1341,7 +1322,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric meanEnePerCh_fx = Mpy_32_32( hDiracDecBin->earlyPartEneCorrection_fx[bin], subFrameTotalEne_fx[bin] ); // Q( q_meanEnePerCh ) q_meanEnePerCh = add( sub( q_earlyPartEneCorrection, subFrameTotalEne_e[bin] ), 1 ); // q_earlyPartEneCorrection + 31 - subFrameTotalEne_e[bin] - 31 + Q1(0.5f) /* Determine direct part target covariance matrix (for 1 or 2 directions) */ - push_wmops( "IDRBCMtm LOOP1" ); FOR( dirIndex = 0; dirIndex < hSpatParamRendCom->numSimultaneousDirections; dirIndex++ ) { Word16 aziDeg, eleDeg; @@ -1624,7 +1604,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric move32(); move32(); } - pop_wmops(); // push_wmops( "IDRBCMtm LOOP1" ); /* Add diffuse / ambient part covariance matrix */ diffuseness_fx = L_max( 0, diffuseness_fx ); // Q30 @@ -1699,7 +1678,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric hDiracDecBin->frameMeanDiffuseness_fx[bin] = L_shl( frameMeanDiffuseness, sub( exp, 2 ) ); // Q29 move32(); } - pop_wmops(); /*push_wmops( "IDRBCM target matrix (IDRBCMtm)" );;*/ test(); /* Temporal IIR-type smoothing of covariance matrices. Also apply encoding quality based smoothing factor. */ @@ -1865,7 +1843,6 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( move16(); } - push_wmops( "IRDBI pm LOOP1 (IDRBI pm LOOP1)" ); FOR( bin = 0; bin < nBins; bin++ ) { Word32 tmpMtxRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], tmpMtxIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], resultMtxRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], resultMtxIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], gain_fx; @@ -1889,7 +1866,6 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( q_CrEne = Q31; move16(); - push_wmops( "IDRBI pm LOOP1 sec A (formulate2x2MixingMatrix)" ); IF( GT_16( hDiracDecBin->ChEne_e[0][bin], hDiracDecBin->ChEne_e[1][bin] ) ) { hDiracDecBin->ChEne_fx[1][bin] = L_shr( hDiracDecBin->ChEne_fx[1][bin], sub( hDiracDecBin->ChEne_e[0][bin], hDiracDecBin->ChEne_e[1][bin] ) ); @@ -1959,9 +1935,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( hDiracDecBin->ChCrossReOut_fx[bin], hDiracDecBin->ChCrossImOut_fx[bin], hDiracDecBin->q_ChCrossOut, prototypeMtx_fx, Mre_fx, Mim_fx, &q_M, hDiracDecBin->reqularizationFactor_fx ); - pop_wmops(); /*push_wmops( "IDRBI pm LOOP1 sec A (formulate2x2MixingMatrix)" );*/ - push_wmops( "IDRBI pm LOOP1 sec B" ); IF( LT_16( hDiracDecBin->q_ChEne, hDiracDecBin->q_ChCross ) ) { CxRe_fx[0][0] = hDiracDecBin->ChEne_fx[0][bin]; @@ -2015,13 +1989,9 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( #endif resultMtxRe_fx, resultMtxIm_fx, &q_res ); - pop_wmops(); /*push_wmops( "IDRBI pm LOOP1 sec B" );*/ - /* When below the frequency limit where decorrelation is applied, we inject the decorrelated * residual (or missing) signal component. The procedure is active when there are not enough independent * signal energy to synthesize a signal with the target covariance matrix from the non-decorrelated signals */ - - push_wmops( "IDRBI pm LOOP1 sec C" ); IF( LT_16( bin, max_band_decorr ) ) { Word32 decorrelationReductionFactor_fx; @@ -2137,9 +2107,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( q_Mdec = Q31; move16(); } - pop_wmops(); /*push_wmops( "IDRBI pm LOOP1 sec C" );*/ - push_wmops( "IDRBI pm LOOP1 sec D" ); /* The regularizations at determining mixing matrices cause signal energy to be lost to some degree, which is compensated for here */ tmp1 = L_add( CrEneL_fx, CrEneR_fx ); exp = sub( get_min_scalefactor( resultMtxRe_fx[0][0], resultMtxRe_fx[1][1] ), 2 ); @@ -2274,10 +2242,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( move16(); q_processMtxDec[bin] = sub( q_Mdec, 16 ); move16(); - pop_wmops(); /*push_wmops( "IDRBI pm LOOP1 sec D" );*/ - - push_wmops( "IDRBI pm LOOP1 sec E" ); IF( separateCenterChannelRendering ) { /* The rendering of the separate center channel in masa + mono mode. @@ -2367,10 +2332,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( } } } - pop_wmops(); /*push_wmops( "IDRBI pm LOOP1 sec E" );*/ } - pop_wmops(); /*push_wmops( "IRDBI pm LOOP1 (IDRBI pm LOOP1)" );*/ - /* Aligning Q-factors of all bins in the processing matrices to a common Q-factor */ minimum_s( q_processMtx, nBins, &hDiracDecBin->q_processMtx ); minimum_s( q_processMtxPrev, nBins, &hDiracDecBin->q_processMtxPrev ); @@ -2390,7 +2352,6 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( minimum_s( q_processMtxDec, nBins, &hDiracDecBin->q_processMtxDec ); minimum_s( q_processMtxDecPrev, nBins, &hDiracDecBin->q_processMtxDecPrev ); - push_wmops( "IRDBI pm LOOP2" ); FOR( bin = 0; bin < nBins; bin++ ) { FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) @@ -2430,7 +2391,6 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( } } } - pop_wmops(); /*push_wmops( "IRDBI pm LOOP2" );*/ return; } @@ -3393,6 +3353,7 @@ static void eig2x2_fx( q_tmp2 = sub( 31, q_tmp2 ); tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); + #if 1 tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp ); exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) ); @@ -3464,7 +3425,6 @@ static void eig2x2_fx( tmp2 = Mpy_32_32( s_fx, s_fx ); q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 ); - tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 ); q_tmp2 = sub( 31, q_tmp2 ); @@ -3533,7 +3493,7 @@ static void eig2x2_fx( move16(); } } -#ifdef FIX_1326_SPEEDUP_18 +#ifdef FIX_1326_SPEEDUP_eig2x2_fx if ( q_U_1 != 0 ) { *q_U = q_U_1; @@ -4455,12 +4415,9 @@ static void formulate2x2MixingMatrix_fx( Cout_im = Mpy_32_32( Cout_im, maxEneDiv_fx ); q_cout = sub( add( q_cout, q_maxEneDiv ), 31 ); - push_wmops( "formulate2x2MixingMatrix cholesky" ); /* Cholesky decomposition of target / output covariance matrix */ chol2x2_fx( E_out1, E_out2, q_eout, Cout_re, Cout_im, q_cout, KyRe_fx, KyIm_fx, &q_ky ); - pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix cholesky" );*/ - push_wmops( "formulate2x2MixingMatrix Eigendecomp" ); /* Eigendecomposition of input covariance matrix */ eig2x2_fx( E_in1, E_in2, q_ein, Cin_re, Cin_im, q_cin, Uxre_fx, Uxim_fx, &q_Ux, Sx_fx, &q_Sx ); @@ -4478,9 +4435,7 @@ static void formulate2x2MixingMatrix_fx( move32(); matrixDiagMul_fx( Uxre_fx, Uxim_fx, q_Ux, Sx_fx, q_Sx, Kxre_fx, Kxim_fx, &q_Kx ); - pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Eigendecomp" );*/ - push_wmops( "formulate2x2MixingMatrix RegSMInv" ); /* Regularize the diagonal Sx for matrix inversion */ Sx_fx[0] = L_max( L_shr( Sx_fx[0], 1 ), Mpy_32_16_1( Sx_fx[1], regularizationFactor_fx ) ); Sx_fx[1] = L_max( L_shr( Sx_fx[1], 1 ), L_shl( Mpy_32_16_1( Sx_fx[0], regularizationFactor_fx ), 1 ) ); @@ -4570,7 +4525,6 @@ static void formulate2x2MixingMatrix_fx( move32(); Ghat_fx[1] = L_shr( Ghat_fx[1], sub( sub( 31, exp1 ), q_Ghat ) ); // q_Ghat move32(); - pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix RegSMInv" );*/ /* Matrix multiplication, tmp = Ky' * G_hat * Q */ FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) @@ -4603,17 +4557,12 @@ static void formulate2x2MixingMatrix_fx( /* A = Ky' * G_hat * Q * Kx (see publication) */ matrixMul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Kxre_fx, Kxim_fx, &q_Kx, Are_fx, Aim_fx, &q_A ); - push_wmops( "formulate2x2MixingMatrix nrst orthonrm PtoA (oPtoA)" ); /* Find nearest orthonormal matrix P to A = Ky' * G_hat * Q * Kx For matrix A that is P = A(A'A)^0.5 */ - push_wmops( "oPtoA MT1M" ); matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp ); eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D ); - pop_wmops(); /*push_wmops( "oPtoA MT1M" );*/ - - IF( D_fx[0] == 0 ) { #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC @@ -4634,7 +4583,7 @@ static void formulate2x2MixingMatrix_fx( div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp move32(); -#ifdef FIX_1326_SPEEDUP_08 +#ifdef FIX_1326_SUBSTITUTE_DIV_SQRT_IOSQRT //Sqrt(1) div_fx[1] = L_add( 0, 2047986068 ); // Q = 31 - exp1 exp1 = add( 0, 20 ); @@ -4760,9 +4709,7 @@ static void formulate2x2MixingMatrix_fx( 0 /*int Bscale*/, #endif Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */ - pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix nrst orthonrm PtoA (oPtoA)" );*/ - push_wmops( "formulate2x2MixingMatrix Ky P Kx^-1" ); /* These are the final formulas of the JAES publication M = Ky P Kx^(-1) */ #if ( BINAURAL_CHANNELS != 2 ) FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) @@ -4916,8 +4863,6 @@ static void formulate2x2MixingMatrix_fx( 0 /*int Bscale*/, #endif Mre_fx, Mim_fx, q_M ); - pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Ky P Kx^-1" );*/ - return; } -- GitLab From cfab49b3830826388292f9fe62787a1c773e620c Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Wed, 19 Mar 2025 16:44:16 +0100 Subject: [PATCH 38/41] apply clang format patch --- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index e8052b08c..be86281ef 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -4584,9 +4584,9 @@ static void formulate2x2MixingMatrix_fx( move32(); #ifdef FIX_1326_SUBSTITUTE_DIV_SQRT_IOSQRT - //Sqrt(1) + // Sqrt(1) div_fx[1] = L_add( 0, 2047986068 ); // Q = 31 - exp1 - exp1 = add( 0, 20 ); + exp1 = add( 0, 20 ); IF( D_fx[1] != 0 ) // This is the new code: replace div sqrt by isqrt { -- GitLab From 5f45790784cd617c8a35309386167a8b4ff6a65b Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Thu, 20 Mar 2025 07:55:08 +0000 Subject: [PATCH 39/41] revert:push_wmops: renamed label --- lib_dec/ivas_jbm_dec_fx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_dec/ivas_jbm_dec_fx.c b/lib_dec/ivas_jbm_dec_fx.c index ce60c0d65..1918ce3f3 100644 --- a/lib_dec/ivas_jbm_dec_fx.c +++ b/lib_dec/ivas_jbm_dec_fx.c @@ -1875,7 +1875,7 @@ ivas_error ivas_jbm_dec_render_fx( move16(); SPAT_PARAM_REND_COMMON_DATA_HANDLE hSpatParamRendCom; - push_wmops( "ivas_dec_render (IDR)" ); + push_wmops( "ivas_dec_render" ); /*----------------------------------------------------------------* * Initialization of local vars after struct has been set *----------------------------------------------------------------*/ -- GitLab From 7ab17f0711234491d62a812cbb9755d5152bfc71 Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Thu, 20 Mar 2025 09:52:45 +0100 Subject: [PATCH 40/41] introduce FIX_1326_SUBSTITUTE_DIV_SQRT_IOSQRT_2 --- .../ivas_dirac_dec_binaural_functions_fx.c | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index be86281ef..141fe8c4b 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -29,7 +29,7 @@ the United Nations Convention on Contracts on the International Sales of Goods. *******************************************************************************************************/ - +#define FIX_1326_SUBSTITUTE_DIV_SQRT_IOSQRT_2 #include #include "options.h" #include @@ -4563,6 +4563,28 @@ static void formulate2x2MixingMatrix_fx( eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D ); + +#ifdef FIX_1326_SUBSTITUTE_DIV_SQRT_IOSQRT_2 + IF( D_fx[0] == 0 ) + { +#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC + temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, 4611686, &exp ); // 4611686 = 1e-12 in Q62 + exp = sub( exp, sub( Q30, 62 ) ); +#else + temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */ + move32(); + exp = ONE_DIV_EPSILON_EXP; + move16(); +#endif + div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp + move32(); + } + ELSE + { + exp = sub( 31, q_D ); + div_fx[0] = ISqrt32( D_fx[0], &exp ); + } +#else IF( D_fx[0] == 0 ) { #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC @@ -4582,6 +4604,7 @@ static void formulate2x2MixingMatrix_fx( } div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp move32(); +#endif /*FIX_1326_SUBSTITUTE_DIV_SQRT_IOSQRT_2*/ #ifdef FIX_1326_SUBSTITUTE_DIV_SQRT_IOSQRT // Sqrt(1) -- GitLab From 49363e47e693b6ea13b06be69fae0947b1c0d33c Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Thu, 20 Mar 2025 10:40:35 +0100 Subject: [PATCH 41/41] Revert "introduce FIX_1326_SUBSTITUTE_DIV_SQRT_IOSQRT_2" This reverts commit 18d10e60341ba2cd76c8161c396c02a8e7293290. --- .../ivas_dirac_dec_binaural_functions_fx.c | 25 +------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 141fe8c4b..be86281ef 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -29,7 +29,7 @@ the United Nations Convention on Contracts on the International Sales of Goods. *******************************************************************************************************/ -#define FIX_1326_SUBSTITUTE_DIV_SQRT_IOSQRT_2 + #include #include "options.h" #include @@ -4563,28 +4563,6 @@ static void formulate2x2MixingMatrix_fx( eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D ); - -#ifdef FIX_1326_SUBSTITUTE_DIV_SQRT_IOSQRT_2 - IF( D_fx[0] == 0 ) - { -#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC - temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, 4611686, &exp ); // 4611686 = 1e-12 in Q62 - exp = sub( exp, sub( Q30, 62 ) ); -#else - temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */ - move32(); - exp = ONE_DIV_EPSILON_EXP; - move16(); -#endif - div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp - move32(); - } - ELSE - { - exp = sub( 31, q_D ); - div_fx[0] = ISqrt32( D_fx[0], &exp ); - } -#else IF( D_fx[0] == 0 ) { #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC @@ -4604,7 +4582,6 @@ static void formulate2x2MixingMatrix_fx( } div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp move32(); -#endif /*FIX_1326_SUBSTITUTE_DIV_SQRT_IOSQRT_2*/ #ifdef FIX_1326_SUBSTITUTE_DIV_SQRT_IOSQRT // Sqrt(1) -- GitLab