improve high complexity of param_mc_prm_est: MC/7-1-4/128kBit reduced by 166 WMOPS (7acfb9db) · Commits · SA4 / Audio / IVAS BASOP

lib_com/options.h

+13 −4

Original line number	Diff line number	Diff line
		@@ -68,9 +68,18 @@
		#endif

		/* Note: each compile switch (FIX_1101_...) is independent from the other ones */
		//#define OPT_STEREO_32KBPS_V1 /* Optimization made in stereo decoding path for 32kbps decoding */
		#define OPT_AVOID_STATE_BUF_RESCALE /* Optimization made to avoid rescale of synth state buffer */
		#define FIX_1310_SPEEDUP_ivas_dirac_dec_get_response_fx /FhG: WMOPS tuning, nonbe/
		#define FIX_1310_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot /FhG: WMOPS tuning, nonbe/
		#define FIX_1379_MASA_ANGLE_ROUND
		#define FIX_1101_IVAS_SPAR_DEC_UPMIXER_SF_USE_MADD_ADD_WEIGHTS /* FhG: Defines 1.0f-weight variables, uses Madd operation instead of L_add_sat */
		#define FIX_1101_IVAS_SPAR_DEC_UPMIXER_SF_SPLIT_LOOPS /* FhG: Splits single loop with IF-statements into two low-complex loops */
		#define FIX_1101_IVAS_SPAR_DEC_UPMIXER_SF_HQ_CONSTANTS /* FhG: IMPROVE PRECISION: Uses 1/6 and 1/20 in full-precise Q31 constants instead of Q15 */
		#define FIX_1101_IVAS_SPAR_DEC_UPMIXER_SF_USE_UNIQUE_SHL /* FhG: Uses unique shift amount in each loop iteration */
		#define FIX_11_1_IVAS_SPAR_DEC_UPMIXER_SF_RND_COEFFS /* FhG ivas_spar_com.c: Zeroes very small negative coeffs via L_shr_r (was L_shr) */
		#define FIX_ISSUE_1237 /* VA: replacement of Copy_Scale_sig_16_32_DEPREC() that are doing 16 bits left shift by Copy_Scale_sig_16_32_no_sat() */
		#define FIX_ISSUE_1237_KEEP_EVS_BE /* VA: Fix to keep EVS bitexactness to 26.444 */
		#define FIX_ISSUE_1214 /* Ittiam: Fix for issue 1214: Energy leakage in IGF tiles for MDCT-stereo @64kbps SWB*/
		#define FIX_881_HILBERT_FILTER /* VA: improve the precision of the Hilbert filter to remove 2kHz unwanted tone */
		#define FIX_ISSUE_1245 /* Ittiam: Fix for issue 1245: Basop Encoder: Audible noise for silent Stereo input DTX on @24.4 kbps, @32 kbps*/
		#define FIX_MINOR_SVD_WMOPS_MR1010X /* FhG: Minor WMOPS tuning, bit-exact to previous version, saves about 8.2 WMOPS for MR1010 */
		#define SVD_WMOPS_OPT /* Ittiam : SVD related optimizations */
		#define IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST /* FhG: reduces WMOPS of param_mc_prm_est, bit-exact to previous version */
		#endif

lib_enc/ivas_mc_param_enc_fx.c

+83 −0

Original line number	Diff line number	Diff line
		@@ -720,9 +720,16 @@ static void ivas_param_mc_param_est_enc_fx(
		}
		}

		#ifdef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST
		Word16 gb = find_guarded_bits_fx( l_ts );
		Word16 add20gb = add( 20, gb );
		#endif

		FOR( ts = start_ts; ts < num_time_slots; ts++ )
		{
		#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST
		Word16 gb = find_guarded_bits_fx( l_ts );
		#endif
		ivas_fb_mixer_get_windowed_fr_fx( hParamMC->hFbMixer, pcm_in_fx, p_slot_frame_f_real_fx, p_slot_frame_f_imag_fx, l_ts, l_ts, hParamMC->hFbMixer->fb_cfg->num_in_chans, gb );
		ivas_fb_mixer_update_prior_input_fx( hParamMC->hFbMixer, pcm_in_fx, l_ts, hParamMC->hFbMixer->fb_cfg->num_in_chans );

		@@ -807,10 +814,25 @@ static void ivas_param_mc_param_est_enc_fx(
		{
		FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 )
		{
		#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST
		a_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], add( 20, gb ), 0, 0, &a_e );
		b_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], add( 20, gb ), 0, 0, &b_e );
		c_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &c_e );
		d_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &d_e );
		#else
		a_e = norm_l( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band]);
		a_fx = L_shl( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], a_e);
		a_e = sub(add20gb, a_e);
		b_e = norm_l( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band] );
		b_fx = L_shl( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], b_e );
		b_e = sub( add20gb, b_e );
		c_e = norm_l( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band] );
		c_fx = L_shl( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], c_e );
		c_e = sub( add20gb, c_e );
		d_e = norm_l( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band] );
		d_fx = L_shl( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], d_e );
		d_e = sub( add20gb, d_e );
		#endif

		/* (a-ib)(c+id) = ac + bd + i(ad-bc) */
		L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, c_fx ), add( a_e, c_e ), Mpy_32_32( b_fx, d_fx ), add( b_e, d_e ), &tmp_e );
		@@ -841,6 +863,7 @@ static void ivas_param_mc_param_est_enc_fx(

		FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 )
		{
		#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST
		dmx_real_fx[ch_idx1] = 0;
		move32();
		dmx_real_e[ch_idx1] = 0;
		@@ -860,13 +883,49 @@ static void ivas_param_mc_param_est_enc_fx(
		move32();
		p_dmx_fac_fx++;
		}
		#else
		Word32 real_fx = L_add(0, 0);
		Word16 real_e = 0;
		move16();
		Word32 imag_fx = L_add( 0, 0 );
		Word16 imag_e = 0;
		move16();

		FOR( inp_ch = 0; inp_ch < nchan_input; inp_ch++ )
		{
		L_tmp = Mpy_32_32( slot_frame_f_real_fx[inp_ch][cur_cldfb_band], ( *p_dmx_fac_fx ) );
		real_fx = BASOP_Util_Add_Mant32Exp( real_fx, real_e, L_tmp, add20gb, &real_e );
		L_tmp = Mpy_32_32( slot_frame_f_imag_fx[inp_ch][cur_cldfb_band], ( *p_dmx_fac_fx ) );
		imag_fx = BASOP_Util_Add_Mant32Exp( imag_fx, imag_e, L_tmp, add( 20, gb ), &imag_e );
		p_dmx_fac_fx++;
		}
		dmx_real_fx[ch_idx1] = real_fx;
		move32();
		dmx_real_e[ch_idx1] = real_e;
		move16();
		dmx_imag_fx[ch_idx1] = imag_fx;
		move32();
		dmx_imag_e[ch_idx1] = imag_e;
		move16();
		#endif
		}

		/* Cx for transport channels */
		FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 )
		{
		#ifdef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST
		a_fx = dmx_real_fx[ch_idx1];
		move32();
		a_e = dmx_real_e[ch_idx1];
		move16();
		b_fx = dmx_imag_fx[ch_idx1];
		move32();
		b_e = dmx_imag_e[ch_idx1];
		move16();
		#endif
		FOR( ch_idx2 = 0; ch_idx2 < nchan_transport; ++ch_idx2 )
		{
		#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST
		a_fx = dmx_real_fx[ch_idx1];
		move32();
		a_e = dmx_real_e[ch_idx1];
		@@ -888,6 +947,12 @@ static void ivas_param_mc_param_est_enc_fx(
		L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, c_fx ), add( a_e, c_e ), Mpy_32_32( b_fx, d_fx ), add( b_e, d_e ), &tmp_e );
		Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cx_sum_e[cur_param_band][ch_idx1][ch_idx2], L_tmp, tmp_e,
		&Cx_sum_e[cur_param_band][ch_idx1][ch_idx2] );
		#else
		/* (a-ib)(c+id) = ac + bd + i(ad-bc) */
		L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, dmx_real_fx[ch_idx2] ), add( a_e, dmx_real_e[ch_idx2] ), Mpy_32_32( b_fx, dmx_imag_fx[ch_idx2] ), add( b_e, dmx_imag_e[ch_idx2] ), &tmp_e );
		Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cx_sum_e[cur_param_band][ch_idx1][ch_idx2], L_tmp, tmp_e,
		&Cx_sum_e[cur_param_band][ch_idx1][ch_idx2] );
		#endif
		move32();
		}
		}
		@@ -895,12 +960,30 @@ static void ivas_param_mc_param_est_enc_fx(
		/* Cy for input channels */
		FOR( ch_idx1 = 0; ch_idx1 < nchan_input; ++ch_idx1 )
		{
		#ifdef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST
		a_e = norm_l( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band] );
		a_fx = L_shl( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], a_e );
		a_e = sub( add20gb, a_e );
		b_e = norm_l( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band] );
		b_fx = L_shl( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], b_e );
		b_e = sub( add20gb, b_e );
		#endif
		FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 )
		{
		#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST
		a_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], add( 20, gb ), 0, 0, &a_e );
		b_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], add( 20, gb ), 0, 0, &b_e );
		c_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &c_e );
		d_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &d_e );
		#else

		c_e = norm_l( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band] );
		c_fx = L_shl( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], c_e );
		c_e = sub( add20gb, c_e );
		d_e = norm_l( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band] );
		d_fx = L_shl( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], d_e );
		d_e = sub( add20gb, d_e );
		#endif

		/* (a-ib)(c+id) = ac + bd + i(ad-bc) */
		L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, c_fx ), add( a_e, c_e ), Mpy_32_32( b_fx, d_fx ), add( b_e, d_e ), &tmp_e );