deactivate SPeedup 14, activate Speedup 13 for testing (3b77dc2d) · Commits · SA4 / Audio / IVAS BASOP

lib_rend/ivas_dirac_dec_binaural_functions_fx.c

+61 −14

Original line number	Diff line number	Diff line
		@@ -49,7 +49,6 @@
		// NULL: 179.292


		//#define FIX_1326_SPEEDUP_00 //make sqrt(1) a const - catch bitstreams //no occurence --> DONT USE
		//#define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx // .4 WMOPS --> USE
		//#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx // .3 WMOPS --> USE
		//#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx // .1 WMOPS --> USE
		@@ -58,12 +57,18 @@
		//#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt // 3.0 WMOPS //Quite bad diffs --> DONT USE
		//#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt // 0 WMOPS --> DONT USE
		//#define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs --> USE
		//#define FIX_1326_SPEEDUP_09 // tiny speedup // .1 WMOPS --> USE? (pipe 48851 fails --> DONTUSEYET)
		//#define FIX_1326_SPEEDUP_10 // tiny speedup // .1 WMOPS --> USE? (pipe 48851 fails --> DONTUSEYET)
		//#define FIX_1326_SPEEDUP_11 // tiny speedup // .1 WMOPS --> USE? (pipe 48851 fails --> DONTUSEYET)
		//#define FIX_1326_SPEEDUP_09 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET
		//#define FIX_1326_SPEEDUP_10 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET
		//#define FIX_1326_SPEEDUP_11 // tiny speedup // .1 WMOPS pipe 48851 fails --> DONTUSEYET
		//#define FIX_1326_SPEEDUP_12 // tiny speedup // <.1 WMOPS --> DONTUSE
		//#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS -->USE? (pipe coming)
		#define FIX_1326_SPEEDUP_14 // test
		//#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS --> USE? (pipe tbd)
		//#define FIX_1326_SPEEDUP_14 // test wether any of these paths is realy necessary, then assert --> DONTUSE (pipes red, asserts!)
		//#define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd // .1 WMOPS --> USE? (pipe tbd)
		//#define FIX_1326_SPEEDUP_16 // tiny speedup like 04 // .18 WMOPS --> USE? (pipe tbd)




		Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };

		/*-------------------------------------------------------------------------
		@@ -927,9 +932,9 @@ static void ivas_dirac_dec_binaural_internal_fx(
		}

		test();
		push_wmops( "IDRBI cov matrices" );
		push_wmops( "IDRBI cov matrices (IDRBCM)" );
		ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matrices_fx( hDiracDecBin, hSpatParamRendCom, &config_data, Cldfb_RealBuffer_in_fx, Cldfb_ImagBuffer_in_fx, Rmat_fx, subframe, hCombinedOrientationData && hCombinedOrientationData->enableCombinedOrientation[hCombinedOrientationData->subframe_idx] > 0, st_ivas->hMasaIsmData, q_inp );
		pop_wmops(); /push_wmops( "IDRBI cov matrices" );/
		pop_wmops(); /push_wmops( "IDRBI cov matrices (IDRBCM)" );/

		IF( EQ_32( config_data.ivas_format, ISM_FORMAT ) )
		{
		@@ -1152,7 +1157,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric

		nBins = hSpatParamRendCom->num_freq_bands; /* Actually bins */
		move16();

		push_wmops( "IDRBCM inits" );
		q_earlyPartEneCorrection = s_min( Q31, add( getScaleFactor32( hDiracDecBin->earlyPartEneCorrection_fx, nBins ), hDiracDecBin->q_earlyPartEneCorrection ) );
		scale_sig32( hDiracDecBin->earlyPartEneCorrection_fx, nBins, sub( q_earlyPartEneCorrection, hDiracDecBin->q_earlyPartEneCorrection ) );
		hDiracDecBin->q_earlyPartEneCorrection = q_earlyPartEneCorrection;
		@@ -1186,6 +1191,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
		gainCache[idx].azi = -1000; /* Use -1000 as value for uninitialized cache. */
		move16();
		}
		pop_wmops(); /push_wmops( "IDRBCM inits" );/

		/* Determine EQ for low bit rates (13.2 and 16.4 kbps) */
		applyLowBitRateEQ = 0;
		@@ -1198,11 +1204,13 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
		move16();
		IF( EQ_32( ivas_total_brate, IVAS_16k4 ) )
		{
		push_wmops( "IDRBCM Determine EQ_low_rates" );
		FOR( bin = 0; bin < LOW_BIT_RATE_BINAURAL_EQ_BINS; bin++ )
		{
		lowBitRateEQ_fx[bin + LOW_BIT_RATE_BINAURAL_EQ_OFFSET] = L_add( L_shr( lowBitRateBinauralEQ_fx[bin], 1 ), ONE_IN_Q30 ); // Q31
		move32();
		}
		pop_wmops(); /push_wmops( "IDRBCM Determine EQ_low_rates" );/
		}
		ELSE
		{
		@@ -1221,6 +1229,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric

		exp = sub( 63, shl( q, 1 ) ); // exp for the energy (inRe_fx * inRe_fx + inIm_fx * inIm_fx) computed below

		push_wmops( "IDRBCM input Matrix" );
		/* Calculate input covariance matrix */
		FOR( slot = 0; slot < hSpatParamRendCom->subframe_nbslots[subframe]; slot++ )
		{
		@@ -1255,7 +1264,9 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
		move32();
		}
		}
		pop_wmops(); /push_wmops( "IDRBCM input Matrix" );/

		push_wmops( "IDRBCM apply EQ_low" );
		/* Apply EQ at low bit rates */
		IF( applyLowBitRateEQ != 0 )
		{
		@@ -1308,7 +1319,9 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
		}
		}
		}
		pop_wmops(); /push_wmops( "IDRBCM apply EQ_low" );/

		push_wmops( "IDRBCM target matrix" );
		/* Determine target covariance matrix containing target binaural properties */
		FOR( bin = 0; bin < nBins; bin++ )
		{
		@@ -1468,12 +1481,14 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
		Word32 hrtfEneCenter_fx, hrtfEneSides_fx, hrtfEneRealized_fx;
		Word16 eneCorrectionFactor_fx, eneCorrectionFactor_e;
		Word16 w1_fx, w2_fx, w3_fx, eq_fx;

		#ifdef FIX_1326_SPEEDUP_15
		hrtfEneCenter_fx = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( rRealp_fx, rRealp_fx ), rImagp_fx, rImagp_fx ), lImagp_fx, lImagp_fx ), lRealp_fx, lRealp_fx ); //Q25
		#else
		hrtfEneCenter_fx = L_add( Mpy_32_32( lRealp_fx, lRealp_fx ), // Q25
		L_add( Mpy_32_32( lImagp_fx, lImagp_fx ), // Q25
		L_add( Mpy_32_32( rRealp_fx, rRealp_fx ), // Q25
		Mpy_32_32( rImagp_fx, rImagp_fx ) ) ) ); // Q25

		#endif
		/* Spread coherence is synthesized as coherent sources at 30 degree horizontal spacing.
		* The following formulas determine the gains for these sources.
		* spreadCoh = 0: Only panning
		@@ -1502,11 +1517,14 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric

		/* Apply the gain for the left source of the three coherent sources */
		getDirectPartGains_fx( bin, add( aziDeg, 30 ), eleDeg, &lRealpTmp_fx, &lImagpTmp_fx, &rRealpTmp_fx, &rImagpTmp_fx, hDiracDecBin->renderStereoOutputInsteadOfBinaural, Rmat_fx, &gainCache[gainCacheBaseIndex + 1], isHeadtracked );

		#ifdef FIX_1326_SPEEDUP_15
		hrtfEneSides_fx = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( rRealpTmp_fx, rRealpTmp_fx ), rImagpTmp_fx, rImagpTmp_fx ), lImagpTmp_fx, lImagpTmp_fx ), lRealpTmp_fx, lRealpTmp_fx ); // Q25
		#else
		hrtfEneSides_fx = L_add( Mpy_32_32( lRealpTmp_fx, lRealpTmp_fx ), // Q25
		L_add( Mpy_32_32( lImagpTmp_fx, lImagpTmp_fx ), // Q25
		L_add( Mpy_32_32( rRealpTmp_fx, rRealpTmp_fx ), // Q25
		Mpy_32_32( rImagpTmp_fx, rImagpTmp_fx ) ) ) ); // Q25
		#endif
		lRealp_fx = L_add( lRealp_fx, Mpy_32_32( sidesMul_fx, lRealpTmp_fx ) ); // Q25
		lImagp_fx = L_add( lImagp_fx, Mpy_32_32( sidesMul_fx, lImagpTmp_fx ) ); // Q25
		rRealp_fx = L_add( rRealp_fx, Mpy_32_32( sidesMul_fx, rRealpTmp_fx ) ); // Q25
		@@ -1594,12 +1612,21 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
		move16();
		}

		#ifdef FIX_1326_SPEEDUP_15
		hrtfEne_fx[0] = Madd_32_32( Mpy_32_32( lRealp_fx, lRealp_fx ), lImagp_fx, lImagp_fx ); // Q( 2*q_lr - 31 )
		hrtfEne_fx[1] = Madd_32_32( Mpy_32_32( rRealp_fx, rRealp_fx ), rImagp_fx, rImagp_fx ); // Q( 2*q_lr - 31 )
		move32();
		move32();
		hrtfCrossRe_fx = Madd_32_32( Mpy_32_32( lRealp_fx, rRealp_fx ), lImagp_fx, rImagp_fx ); // Q( 2*q_lr - 31 )
		hrtfCrossIm_fx = Madd_32_32( Mpy_32_32( -lImagp_fx, rRealp_fx ), lRealp_fx, rImagp_fx ); // Q( 2*q_lr - 31 )
		#else
		hrtfEne_fx[0] = L_add( Mpy_32_32( lRealp_fx, lRealp_fx ), Mpy_32_32( lImagp_fx, lImagp_fx ) ); // Q( 2*q_lr - 31 )
		hrtfEne_fx[1] = L_add( Mpy_32_32( rRealp_fx, rRealp_fx ), Mpy_32_32( rImagp_fx, rImagp_fx ) ); // Q( 2*q_lr - 31 )
		move32();
		move32();
		hrtfCrossRe_fx = L_add( Mpy_32_32( lRealp_fx, rRealp_fx ), Mpy_32_32( lImagp_fx, rImagp_fx ) ); // Q( 2*q_lr - 31 )
		hrtfCrossIm_fx = L_add( Mpy_32_32( -lImagp_fx, rRealp_fx ), Mpy_32_32( lRealp_fx, rImagp_fx ) ); // Q( 2*q_lr - 31 )
		#endif

		/* Add direct part (1 or 2) covariance matrix */
		dirEne_fx = Mpy_32_32( ratio_fx, meanEnePerCh_fx ); // Q(q_meanEnePerCh - 1)
		@@ -1674,7 +1701,11 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
		}
		ELSE
		{
		#ifdef FIX_1326_SPEEDUP_15
		hDiracDecBin->ChCrossReOut_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossReOut_fx[bin], hDiracDecBin->ChCrossReOut_e[bin], Mpy_32_32( Madd_32_16( L_shl( surCoh_fx, 16 ), hDiracDecBin->diffuseFieldCoherence_fx[bin], sub( 32767, surCoh_fx ) ), diffEne_fx ), sub( 31, q_diffEne ), &hDiracDecBin->ChCrossReOut_e[bin] );
		#else
		hDiracDecBin->ChCrossReOut_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossReOut_fx[bin], hDiracDecBin->ChCrossReOut_e[bin], Mpy_32_32( L_add( Mpy_32_16_1( hDiracDecBin->diffuseFieldCoherence_fx[bin], sub( 32767, surCoh_fx ) ), L_shl( surCoh_fx, 16 ) ), diffEne_fx ), sub( 31, q_diffEne ), &hDiracDecBin->ChCrossReOut_e[bin] );
		#endif
		}
		move32();
		}
		@@ -1690,6 +1721,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
		hDiracDecBin->frameMeanDiffuseness_fx[bin] = L_shl( frameMeanDiffuseness, sub( exp, 2 ) ); // Q29
		move32();
		}
		pop_wmops();/push_wmops( "IDRBCM target matrix" );/

		test();
		/* Temporal IIR-type smoothing of covariance matrices. Also apply encoding quality based smoothing factor. */
		@@ -3413,10 +3445,25 @@ static void eig2x2_fx(
		tmp2 = Mpy_32_32( s_fx, s_fx );
		q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 );


		#ifdef FIX_1326_SPEEDUP_16

		{
		Word16 tmp2_exp;
		Word32 eps_tmp;
		tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &tmp2_exp );

		//Add epsilon if relevant
		eps_tmp = L_shl_sat( epsilon_mant, sub(epsilon_exp, tmp2_exp ));
		tmp3 = L_add( L_shr( tmp2, 1 ), L_shr( eps_tmp, 1 ) );

		exp_tmp3 = add( tmp2_exp, 1 );
		}
		#else
		tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 );
		q_tmp2 = sub( 31, q_tmp2 );

		tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 );
		#endif

		#if 1
		tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp );