diff --git a/lib_com/options.h b/lib_com/options.h index 8f404dd74afa69f16dd3ce8bbcc3518fbfb0f77c..2133b86ac48a78abced5458bde234e1bf1123cea 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -72,4 +72,7 @@ #define OPT_AVOID_STATE_BUF_RESCALE /* Optimization made to avoid rescale of synth state buffer */ #define FIX_1310_SPEEDUP_ivas_dirac_dec_get_response_fx /*FhG: WMOPS tuning, nonbe*/ #define FIX_1310_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot /*FhG: WMOPS tuning, nonbe*/ +/* Both following 2 macros (IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST*) are independent from each other, they refer to different code blocks */ +#define IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_BE /* FhG: reduces WMOPS of param_mc_prm_est, bit-exact to previous version */ +#define IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE /* FhG: reduces WMOPS of param_mc_prm_est, not bit-exact to previous version */ #endif diff --git a/lib_enc/ivas_mc_param_enc_fx.c b/lib_enc/ivas_mc_param_enc_fx.c index 039bbdf981e6cff0c4212c8e3cc607cc52b39d8f..92cd33e2fc931684a72aa706351119bd78995fdb 100644 --- a/lib_enc/ivas_mc_param_enc_fx.c +++ b/lib_enc/ivas_mc_param_enc_fx.c @@ -720,9 +720,16 @@ static void ivas_param_mc_param_est_enc_fx( } } +#if defined( IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_BE ) || defined( IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE ) + Word16 gb = find_guarded_bits_fx( l_ts ); + Word16 add20gb = add( 20, gb ); +#endif + FOR( ts = start_ts; ts < num_time_slots; ts++ ) { +#if !defined( IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_BE ) && !defined( IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE ) Word16 gb = find_guarded_bits_fx( l_ts ); +#endif ivas_fb_mixer_get_windowed_fr_fx( hParamMC->hFbMixer, pcm_in_fx, p_slot_frame_f_real_fx, p_slot_frame_f_imag_fx, l_ts, l_ts, hParamMC->hFbMixer->fb_cfg->num_in_chans, gb ); ivas_fb_mixer_update_prior_input_fx( hParamMC->hFbMixer, pcm_in_fx, l_ts, hParamMC->hFbMixer->fb_cfg->num_in_chans ); @@ -731,6 +738,7 @@ static void ivas_param_mc_param_est_enc_fx( FOR( i = 0; i < nchan_input; i++ ) { pcm_in_fx[i] += l_ts; + move32(); } /* Computing the downmix */ FOR( cur_param_band = 0; cur_param_band < hParamMC->max_param_band_abs_cov; cur_param_band++ ) @@ -748,6 +756,7 @@ static void ivas_param_mc_param_est_enc_fx( FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 ) { +#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_BE dmx_real_fx[ch_idx1] = 0; move32(); dmx_real_e[ch_idx1] = 0; @@ -766,6 +775,28 @@ static void ivas_param_mc_param_est_enc_fx( move32(); p_dmx_fac_fx++; } +#else + Word32 real_fx = L_add( 0, 0 ); + Word16 real_e = add( 0, 0 ); + Word32 imag_fx = L_add( 0, 0 ); + Word16 imag_e = add( 0, 0 ); + FOR( inp_ch = 0; inp_ch < nchan_input; inp_ch++ ) + { + L_tmp = Mpy_32_32( slot_frame_f_real_fx[inp_ch][cur_cldfb_band], ( *p_dmx_fac_fx ) ); + real_fx = BASOP_Util_Add_Mant32Exp( real_fx, real_e, L_tmp, add20gb, &real_e ); + L_tmp = Mpy_32_32( slot_frame_f_imag_fx[inp_ch][cur_cldfb_band], ( *p_dmx_fac_fx ) ); + imag_fx = BASOP_Util_Add_Mant32Exp( imag_fx, imag_e, L_tmp, add20gb, &imag_e ); + p_dmx_fac_fx++; + } + dmx_real_fx[ch_idx1] = real_fx; + dmx_real_e[ch_idx1] = real_e; + dmx_imag_fx[ch_idx1] = imag_fx; + dmx_imag_e[ch_idx1] = imag_e; + move32(); + move16(); + move32(); + move16(); +#endif } /* Cx for transport channels */ @@ -805,13 +836,49 @@ static void ivas_param_mc_param_est_enc_fx( /* Cy for input channels */ FOR( ch_idx1 = 0; ch_idx1 < nchan_input; ++ch_idx1 ) { +#ifdef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE + a_e = norm_l( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band] ); + a_fx = L_shl( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], a_e ); + a_e = sub( add20gb, a_e ); + if ( a_fx == 0 ) + { + a_e = 0; + move16(); + } + b_e = norm_l( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band] ); + b_fx = L_shl( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], b_e ); + b_e = sub( add20gb, b_e ); + if ( b_fx == 0 ) + { + b_e = 0; + move16(); + } +#endif FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 ) { +#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE a_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], add( 20, gb ), 0, 0, &a_e ); b_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], add( 20, gb ), 0, 0, &b_e ); c_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &c_e ); d_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &d_e ); - +#else + c_e = norm_l( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band] ); + c_fx = L_shl( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], c_e ); + c_e = sub( add20gb, c_e ); + if ( c_fx == 0 ) + { + c_e = 0; + move16(); + } + d_e = norm_l( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band] ); + d_fx = L_shl( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], d_e ); + d_e = sub( add20gb, d_e ); + if ( d_fx == 0 ) + { + d_e = 0; + move16(); + } +#endif /* (a-ib)(c+id) = ac + bd + i(ad-bc) */ L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, c_fx ), add( a_e, c_e ), Mpy_32_32( b_fx, d_fx ), add( b_e, d_e ), &tmp_e ); Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cy_sum_e[cur_param_band][ch_idx1][ch_idx2], @@ -841,6 +908,7 @@ static void ivas_param_mc_param_est_enc_fx( FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 ) { +#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_BE dmx_real_fx[ch_idx1] = 0; move32(); dmx_real_e[ch_idx1] = 0; @@ -860,13 +928,49 @@ static void ivas_param_mc_param_est_enc_fx( move32(); p_dmx_fac_fx++; } +#else + Word32 real_fx = L_add( 0, 0 ); + Word16 real_e = 0; + move16(); + Word32 imag_fx = L_add( 0, 0 ); + Word16 imag_e = 0; + move16(); + + FOR( inp_ch = 0; inp_ch < nchan_input; inp_ch++ ) + { + L_tmp = Mpy_32_32( slot_frame_f_real_fx[inp_ch][cur_cldfb_band], ( *p_dmx_fac_fx ) ); + real_fx = BASOP_Util_Add_Mant32Exp( real_fx, real_e, L_tmp, add20gb, &real_e ); + L_tmp = Mpy_32_32( slot_frame_f_imag_fx[inp_ch][cur_cldfb_band], ( *p_dmx_fac_fx ) ); + imag_fx = BASOP_Util_Add_Mant32Exp( imag_fx, imag_e, L_tmp, add20gb, &imag_e ); + p_dmx_fac_fx++; + } + dmx_real_fx[ch_idx1] = real_fx; + move32(); + dmx_real_e[ch_idx1] = real_e; + move16(); + dmx_imag_fx[ch_idx1] = imag_fx; + move32(); + dmx_imag_e[ch_idx1] = imag_e; + move16(); +#endif } /* Cx for transport channels */ FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 ) { +#ifdef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_BE + a_fx = dmx_real_fx[ch_idx1]; + move32(); + a_e = dmx_real_e[ch_idx1]; + move16(); + b_fx = dmx_imag_fx[ch_idx1]; + move32(); + b_e = dmx_imag_e[ch_idx1]; + move16(); +#endif FOR( ch_idx2 = 0; ch_idx2 < nchan_transport; ++ch_idx2 ) { +#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_BE a_fx = dmx_real_fx[ch_idx1]; move32(); a_e = dmx_real_e[ch_idx1]; @@ -888,6 +992,12 @@ static void ivas_param_mc_param_est_enc_fx( L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, c_fx ), add( a_e, c_e ), Mpy_32_32( b_fx, d_fx ), add( b_e, d_e ), &tmp_e ); Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cx_sum_e[cur_param_band][ch_idx1][ch_idx2], L_tmp, tmp_e, &Cx_sum_e[cur_param_band][ch_idx1][ch_idx2] ); +#else + /* (a-ib)(c+id) = ac + bd + i(ad-bc) */ + L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, dmx_real_fx[ch_idx2] ), add( a_e, dmx_real_e[ch_idx2] ), Mpy_32_32( b_fx, dmx_imag_fx[ch_idx2] ), add( b_e, dmx_imag_e[ch_idx2] ), &tmp_e ); + Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cx_sum_e[cur_param_band][ch_idx1][ch_idx2], L_tmp, tmp_e, + &Cx_sum_e[cur_param_band][ch_idx1][ch_idx2] ); +#endif move32(); } } @@ -895,13 +1005,49 @@ static void ivas_param_mc_param_est_enc_fx( /* Cy for input channels */ FOR( ch_idx1 = 0; ch_idx1 < nchan_input; ++ch_idx1 ) { +#ifdef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE + a_e = norm_l( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band] ); + a_fx = L_shl( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], a_e ); + a_e = sub( add20gb, a_e ); + if ( a_fx == 0 ) + { + a_e = 0; + move16(); + } + b_e = norm_l( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band] ); + b_fx = L_shl( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], b_e ); + b_e = sub( add20gb, b_e ); + if ( b_fx == 0 ) + { + b_e = 0; + move16(); + } +#endif FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 ) { +#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE a_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], add( 20, gb ), 0, 0, &a_e ); b_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], add( 20, gb ), 0, 0, &b_e ); c_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &c_e ); d_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &d_e ); - +#else + c_e = norm_l( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band] ); + c_fx = L_shl( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], c_e ); + c_e = sub( add20gb, c_e ); + if ( c_fx == 0 ) + { + c_e = 0; + move16(); + } + d_e = norm_l( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band] ); + d_fx = L_shl( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], d_e ); + d_e = sub( add20gb, d_e ); + if ( d_fx == 0 ) + { + d_e = 0; + move16(); + } +#endif /* (a-ib)(c+id) = ac + bd + i(ad-bc) */ L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, c_fx ), add( a_e, c_e ), Mpy_32_32( b_fx, d_fx ), add( b_e, d_e ), &tmp_e ); Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cy_sum_e[cur_param_band][ch_idx1][ch_idx2], L_tmp, tmp_e,