Replace some divisions, everything is encapsulated in FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT macro (07282bbb) · Commits · SA4 / Audio / IVAS BASOP

lib_rend/ivas_dirac_dec_binaural_functions.c

+183 −2

Original line number	Diff line number	Diff line
		@@ -47,6 +47,14 @@

		#include "wmc_auto.h"

		#define FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT

		#ifndef FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT
		#define FIX_1072_SET_DIV3232_RETURN_VAL /only meaningful if FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT is not defined/
		#else
		#define FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT_ADDEPSIILONOBSOLETE
		#endif

		Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };

		/*-------------------------------------------------------------------------
		@@ -486,6 +494,7 @@ void ivas_dirac_dec_binaural_render_fx(

		output_length = 0;
		move16();
		push_wmops( "IDR: ivas_dirac_dec_binaural_internal_fx (IDDBI)" );
		FOR( subframe_idx = first_sf; subframe_idx < last_sf; subframe_idx++ )
		{
		Word16 n_samples_sf = imult1616( slot_size, hSpatParamRendCom->subframe_nbslots[subframe_idx] );
		@@ -501,6 +510,7 @@ void ivas_dirac_dec_binaural_render_fx(
		/* update combined orientation access index */
		ivas_combined_orientation_update_index( st_ivas->hCombinedOrientationData, n_samples_sf );
		}
		pop_wmops();

		FOR( ch = 0; ch < nchan_out; ch++ )
		{
		@@ -693,6 +703,7 @@ static void ivas_dirac_dec_binaural_internal_fx(
		}
		}
		/* CLDFB Analysis of input */
		push_wmops( "IDDBI: CLDFB Analysis of input" );
		FOR( slot = 0; slot < hSpatParamRendCom->subframe_nbslots[subframe]; slot++ )
		{
		FOR( ch = 0; ch < numInChannels; ch++ )
		@@ -818,10 +829,12 @@ static void ivas_dirac_dec_binaural_internal_fx(
		}
		}
		}
		pop_wmops();

		test();
		IF( EQ_32( config_data.ivas_format, SBA_FORMAT ) \|\| EQ_32( config_data.ivas_format, SBA_ISM_FORMAT ) )
		{
		push_wmops( "IDDBI: ivas_sba_prototype_renderer_fx" );
		hDiracDecBin->hDiffuseDist = &diffuseDistData;
		ivas_spar_param_to_masa_param_mapping_fx( st_ivas, Cldfb_RealBuffer_in_fx, Cldfb_ImagBuffer_in_fx, q_cldfb, subframe );

		@@ -837,6 +850,7 @@ static void ivas_dirac_dec_binaural_internal_fx(
		{
		Scale_sig32( st_ivas->hSpar->hFbMixer->pFb->fb_bin_to_band.pp_cldfb_weights_per_spar_band_fx[idx], IVAS_MAX_NUM_FB_BANDS, Q22 - Q31 ); /Q31 to Q22/
		}
		pop_wmops();
		}

		Word16 q_inp = Q6;
		@@ -878,9 +892,10 @@ static void ivas_dirac_dec_binaural_internal_fx(
		}

		test();
		push_wmops( "IDDBI: ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matrices_fx" );
		ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matrices_fx( hDiracDecBin, hSpatParamRendCom, &config_data, Cldfb_RealBuffer_in_fx, Cldfb_ImagBuffer_in_fx, Rmat_fx, subframe,
		hCombinedOrientationData && hCombinedOrientationData->enableCombinedOrientation[hCombinedOrientationData->subframe_idx] > 0, st_ivas->hMasaIsmData, q_inp );

		pop_wmops();
		IF( EQ_32( config_data.ivas_format, ISM_FORMAT ) )
		{
		max_band_decorr = 0;
		@@ -917,8 +932,10 @@ static void ivas_dirac_dec_binaural_internal_fx(
		move16();
		}

		push_wmops( "IDDBI: IDDB_detProcMat_fx" );
		ivas_dirac_dec_binaural_determine_processing_matrices_fx( hDiracDecBin, hSpatParamRendCom, &config_data, max_band_decorr, Rmat_fx, subframe,
		hCombinedOrientationData && hCombinedOrientationData->enableCombinedOrientation[hCombinedOrientationData->subframe_idx] > 0, nchanSeparateChannels, st_ivas->hMasaIsmData );
		pop_wmops();

		q_inp = Q6;
		move16();
		@@ -964,7 +981,9 @@ static void ivas_dirac_dec_binaural_internal_fx(
		hDiracDecBin->q_processMtxDecPrev = q_mat;
		move16();

		push_wmops( "IDDBI: ivas_dirac_dec_binaural_process_output_fx" );
		ivas_dirac_dec_binaural_process_output_fx( hDiracDecBin, hSpatParamRendCom, st_ivas->cldfbSynDec, output_fx, &q_out, Cldfb_RealBuffer_in_fx, Cldfb_ImagBuffer_in_fx, q_inp, max_band_decorr, numInChannels, config_data.processReverb, subframe, q_mat );
		pop_wmops();

		hDiracDecBin->hDiffuseDist = NULL;

		@@ -1634,6 +1653,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
		/* Formulate average diffuseness over frame */
		frameMeanDiffuseness = BASOP_Util_Divide3232_Scale_cadence( frameMeanDiffuseness, L_max( EPSILLON_FX, frameMeanDiffusenessEneWeight_fx[bin] ), &exp ); // exp = exp + 31 - q_meanEnePerCh - exp1
		exp = sub( exp, sub( sub( 31, q_meanEnePerCh ), exp1 ) );

		hDiracDecBin->frameMeanDiffuseness_fx[bin] = L_shl( frameMeanDiffuseness, sub( exp, 2 ) ); // Q29
		move32();
		}
		@@ -1802,6 +1822,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
		move16();
		}

		push_wmops( "IDDB_detProcMat_bigLoop1" );
		FOR( bin = 0; bin < nBins; bin++ )
		{
		Word32 tmpMtxRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], tmpMtxIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], resultMtxRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], resultMtxIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], gain_fx;
		@@ -1947,6 +1968,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
		* signal energy to synthesize a signal with the target covariance matrix from the non-decorrelated signals */
		IF( LT_16( bin, max_band_decorr ) )
		{
		push_wmops( "IDDB_detProcMat_bigLoop1_bigBranch1" );
		Word32 decorrelationReductionFactor_fx;
		Word16 q_decorrelationReductionFactor;

		@@ -2049,6 +2071,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
		Q31, CrEneL_fx, CrEneR_fx, q_CrEne,
		CrCrossRe_fx, CrCrossIm_fx, q_CrCross,
		prototypeMtx_fx, MdecRe_fx, MdecIm_fx, &q_Mdec, 3277 ); // 3277 = 0.2 in Q14
		pop_wmops();
		}
		ELSE
		{
		@@ -2199,6 +2222,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
		move16();
		UWord8 instantChange = 0;
		move16();
		push_wmops( "IDDB_detProcMat_bigLoop1_bigBranch2" );

		exp = sub( 31, hDiracDecBin->q_earlyPartEneCorrection );
		tmp1 = Sqrt32( hDiracDecBin->earlyPartEneCorrection_fx[bin], &exp );
		@@ -2274,8 +2298,11 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
		move16();
		}
		}
		pop_wmops();
		}
		}
		pop_wmops();

		/* Aligning Q-factors of all bins in the processing matrices to a common Q-factor */
		minimum_s( q_processMtx, nBins, &hDiracDecBin->q_processMtx );
		minimum_s( q_processMtxPrev, nBins, &hDiracDecBin->q_processMtxPrev );
		@@ -2295,6 +2322,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
		minimum_s( q_processMtxDec, nBins, &hDiracDecBin->q_processMtxDec );
		minimum_s( q_processMtxDecPrev, nBins, &hDiracDecBin->q_processMtxDecPrev );

		push_wmops( "IDDB_detProcMat_bigLoop2" );
		FOR( bin = 0; bin < nBins; bin++ )
		{
		FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
		@@ -2334,6 +2362,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
		}
		}
		}
		pop_wmops();

		return;
		}
		@@ -3965,6 +3994,8 @@ static void chol2x2_fx(
		// 4611686 = Q62
		IF( outRe[1][1] == 0 )
		{
		push_wmops( "DIV3232_0" );
		#if !defined( FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT ) \|\| 1
		outRe[0][1] = BASOP_Util_Divide3232_Scale_cadence( c_re, 4611686, &exp );
		move32();
		q_re2 = add( sub( 31, exp ), sub( q_c, 62 ) );
		@@ -3972,9 +4003,27 @@ static void chol2x2_fx(
		outIm[0][1] = BASOP_Util_Divide3232_Scale_cadence( -c_im, 4611686, &exp );
		move32();
		q_im = add( sub( 31, exp ), sub( q_c, 62 ) );

		#else /ALternative code has to be verified, first/
		CDK_ASSERT( 0 );
		// outRe[0][1] = BASOP_Util_Divide3232_Scale_cadence( c_re, 4611686, &exp );
		Word32 tmp1 = 1953125005;
		exp = 9;
		outRe[0][1] = Mpy_32_32( tmp1, c_re );

		move32();
		q_re2 = add( sub( 31, exp ), sub( q_c, 62 ) );

		// outIm[0][1] = BASOP_Util_Divide3232_Scale_cadence( -c_im, 4611686, &exp );
		outIm[0][1] = Mpy_32_32( tmp1, -c_im );
		move32();
		q_im = add( sub( 31, exp ), sub( q_c, 62 ) );
		#endif
		pop_wmops();
		}
		ELSE
		{
		#if !defined( FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT ) \|\| 1
		outRe[0][1] = BASOP_Util_Divide3232_Scale_cadence( c_re, outRe[1][1], &exp );
		move32();
		q_re2 = add( sub( 31, exp ), sub( q_c, q_re1 ) );
		@@ -3982,6 +4031,21 @@ static void chol2x2_fx(
		outIm[0][1] = BASOP_Util_Divide3232_Scale_cadence( -c_im, outRe[1][1], &exp );
		move32();
		q_im = add( sub( 31, exp ), sub( q_c, q_re1 ) );
		#else /alternative code has to be verified, first/
		{
		CDK_ASSERT( 0 );
		// outRe[0][1] = BASOP_Util_Divide3232_Scale_cadence( c_re, outRe[1][1], &exp );
		Word32 tmp1 = BASOP_Util_Divide3232_Scale_cadence( 0x7FFFFFFF, outRe[1][1], &exp );
		outRe[0][1] = Mpy_32_32( tmp1, c_re );
		move32();
		q_re2 = add( sub( 31, exp ), sub( q_c, q_re1 ) );

		// outIm[0][1] = BASOP_Util_Divide3232_Scale_cadence( -c_im, outRe[1][1], &exp );
		outIm[0][1] = Mpy_32_32( tmp1, -c_im );
		move32();
		q_im = add( sub( 31, exp ), sub( q_c, q_re1 ) );
		}
		#endif
		}
		if ( outRe[0][1] == 0 )
		{
		@@ -4092,6 +4156,9 @@ static void formulate2x2MixingMatrix_fx(
		Word32 temp;
		Word16 q_Pre[BINAURAL_CHANNELS][BINAURAL_CHANNELS], q_Pim[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
		Word16 hdrm_re[BINAURAL_CHANNELS][BINAURAL_CHANNELS], hdrm_im[BINAURAL_CHANNELS][BINAURAL_CHANNELS];

		push_wmops( "IDDB_detProcMat_bigLoop1_2x2Matrix" );

		set16_fx( hdrm_re[0], 63, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) );
		set16_fx( hdrm_im[0], 63, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) );
		set16_fx( q_Pre[0], Q31, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) );
		@@ -4205,9 +4272,48 @@ static void formulate2x2MixingMatrix_fx(
		temp = Mpy_32_32( E_in2, 2147484 ); // 2147484 = 0.001f in Q31
		temp = L_max( temp, E_in1 );

		#if defined( FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT )
		/IF (E_out1 == 0)/
		{
		Ghat_fx[0] = 0;
		move32();
		exp = -19;
		}

		IF( E_out1 != 0 )
		{
		IF( temp == 0 )
		{
		// ASSERT for testing
		temp = BASOP_Util_Divide3232_Scale_cadence( E_out1, 4611686, &exp ); // 4611686 = Q62
		exp = sub( exp, sub( q_eout, 62 ) );
		Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
		move32();
		}
		ELSE
		{
		#ifndef FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT_ADDEPSIILONOBSOLETE
		temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); // obsolete?
		exp_temp = add( exp_temp, sub( 31, q_eout ) );
		#else
		exp_temp = add( sub( 31, q_ein ), sub( 31, q_eout ) );
		#endif
		temp = Mpy_32_32( temp, E_out1 );
		temp = ISqrt32( temp, &exp_temp );
		Ghat_fx[0] = Mpy_32_32( temp, E_out1 );
		move32();
		exp = add( exp_temp, sub( 31, q_eout ) );
		}
		}

		#else
		IF( temp == 0 )
		{
		#ifdef FIX_1072_SET_DIV3232_RETURN_VAL
		temp = BASOP_Util_Divide3232_Scale_cadence( E_out1, 4611686, &exp ); // 4611686 = Q62
		#else
		BASOP_Util_Divide3232_Scale_cadence( E_out1, 4611686, &exp ); // 4611686 = Q62
		#endif
		exp = sub( exp, sub( q_eout, 62 ) );
		}
		ELSE
		@@ -4219,12 +4325,53 @@ static void formulate2x2MixingMatrix_fx(
		}
		Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
		move32();
		#endif /FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT/

		temp = Mpy_32_32( E_in1, 2147484 ); // 2147484 = 0.001f in Q31
		temp = L_max( temp, E_in2 ); // q_ein


		#if defined( FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT )
		// IF( E_out2 == 0 )
		{
		Ghat_fx[1] = 0;
		exp1 = -19;
		move32();
		}

		IF( E_out2 != 0 )
		{
		IF( temp == 0 )
		{
		/ASSERT for testing/
		temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, 4611686, &exp1 ); // 4611686 = Q62
		exp1 = sub( exp1, sub( q_eout, 62 ) );
		Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
		move32();
		}
		ELSE
		{
		#ifndef FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT_ADDEPSIILONOBSOLETE
		temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); // obsolete?
		exp1 = add( sub( 31, q_eout ), exp_temp );
		#else
		exp1 = add( sub( 31, q_eout ), sub( 31, q_ein ) );
		#endif
		temp = Mpy_32_32( E_out2, temp );
		temp = ISqrt32( temp, &exp1 );
		Ghat_fx[1] = Mpy_32_32( temp, E_out2 );
		move32();
		exp1 = add( sub( 31, q_eout ), exp1 );
		}
		}
		#else
		IF( temp == 0 )
		{
		#ifdef FIX_1072_SET_DIV3232_RETURN_VAL
		temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, 4611686, &exp1 ); // 4611686 = Q62
		#else
		BASOP_Util_Divide3232_Scale_cadence( E_out2, 4611686, &exp1 ); // 4611686 = Q62
		#endif
		exp1 = sub( exp1, sub( q_eout, 62 ) );
		}
		ELSE
		@@ -4236,6 +4383,7 @@ static void formulate2x2MixingMatrix_fx(
		}
		Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
		move32();
		#endif /FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT/

		q_Ghat = sub( 31, s_max( exp, exp1 ) );

		@@ -4281,6 +4429,21 @@ static void formulate2x2MixingMatrix_fx(

		eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D );

		#if defined( FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT )
		IF( D_fx[0] == 0 )
		{
		temp = 2047986068;
		exp = 20;
		div_fx[0] = temp;
		move32();
		}
		ELSE
		{
		exp = sub( 31, q_D );
		div_fx[0] = ISqrt32( D_fx[0], &exp );
		move32();
		}
		#else
		IF( D_fx[0] == 0 )
		{
		temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, 4611686, &exp ); // 4611686 = 1e-12 in Q62
		@@ -4293,7 +4456,23 @@ static void formulate2x2MixingMatrix_fx(
		}
		div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
		move32();
		#endif /FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT/

		#if defined( FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT )
		IF( D_fx[1] == 0 )
		{
		temp = 2047986068;
		exp1 = 20;
		div_fx[1] = temp;
		move32();
		}
		ELSE
		{
		exp1 = sub( 31, q_D );
		div_fx[1] = ISqrt32( D_fx[1], &exp1 );
		move32();
		}
		#else
		IF( D_fx[1] == 0 )
		{
		temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, 4611686, &exp1 ); // 4611686 = 1e-12 in Q62
		@@ -4306,6 +4485,7 @@ static void formulate2x2MixingMatrix_fx(
		}
		div_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
		move32();
		#endif /FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT/

		q_div = sub( 31, s_max( exp, exp1 ) );

		@@ -4446,6 +4626,7 @@ static void formulate2x2MixingMatrix_fx(

		matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Uxre_fx, Uxim_fx, &q_Ux, Mre_fx, Mim_fx, q_M );

		pop_wmops();
		return;
		}