Loading lib_com/options.h +1 −0 Original line number Diff line number Diff line Loading @@ -154,4 +154,5 @@ #define FIX_ISSUE_1245 /* Ittiam: Fix for issue 1245: Basop Encoder: Audible noise for silent Stereo input DTX on @24.4 kbps, @32 kbps*/ #define FIX_MINOR_SVD_WMOPS_MR1010X /* FhG: Minor WMOPS tuning, bit-exact to previous version, saves about 8.2 WMOPS for MR1010 */ #define SVD_WMOPS_OPT /* Ittiam : SVD related optimizations */ #define IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST /* FhG: reduces WMOPS of param_mc_prm_est, bit-exact to previous version */ #endif lib_enc/ivas_mc_param_enc.c +83 −0 Original line number Diff line number Diff line Loading @@ -723,9 +723,16 @@ static void ivas_param_mc_param_est_enc_fx( } } #ifdef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST Word16 gb = find_guarded_bits_fx( l_ts ); Word16 add20gb = add( 20, gb ); #endif FOR( ts = start_ts; ts < num_time_slots; ts++ ) { #ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST Word16 gb = find_guarded_bits_fx( l_ts ); #endif ivas_fb_mixer_get_windowed_fr_fx( hParamMC->hFbMixer, pcm_in_fx, p_slot_frame_f_real_fx, p_slot_frame_f_imag_fx, l_ts, l_ts, hParamMC->hFbMixer->fb_cfg->num_in_chans, gb ); ivas_fb_mixer_update_prior_input_fx( hParamMC->hFbMixer, pcm_in_fx, l_ts, hParamMC->hFbMixer->fb_cfg->num_in_chans ); Loading Loading @@ -810,10 +817,25 @@ static void ivas_param_mc_param_est_enc_fx( { FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 ) { #ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST a_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], add( 20, gb ), 0, 0, &a_e ); b_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], add( 20, gb ), 0, 0, &b_e ); c_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &c_e ); d_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &d_e ); #else a_e = norm_l( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band]); a_fx = L_shl( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], a_e); a_e = sub(add20gb, a_e); b_e = norm_l( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band] ); b_fx = L_shl( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], b_e ); b_e = sub( add20gb, b_e ); c_e = norm_l( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band] ); c_fx = L_shl( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], c_e ); c_e = sub( add20gb, c_e ); d_e = norm_l( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band] ); d_fx = L_shl( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], d_e ); d_e = sub( add20gb, d_e ); #endif /* (a-ib)(c+id) = ac + bd + i(ad-bc) */ L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, c_fx ), add( a_e, c_e ), Mpy_32_32( b_fx, d_fx ), add( b_e, d_e ), &tmp_e ); Loading Loading @@ -844,6 +866,7 @@ static void ivas_param_mc_param_est_enc_fx( FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 ) { #ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST dmx_real_fx[ch_idx1] = 0; move32(); dmx_real_e[ch_idx1] = 0; Loading @@ -863,13 +886,49 @@ static void ivas_param_mc_param_est_enc_fx( move32(); p_dmx_fac_fx++; } #else Word32 real_fx = L_add(0, 0); Word16 real_e = 0; move16(); Word32 imag_fx = L_add( 0, 0 ); Word16 imag_e = 0; move16(); FOR( inp_ch = 0; inp_ch < nchan_input; inp_ch++ ) { L_tmp = Mpy_32_32( slot_frame_f_real_fx[inp_ch][cur_cldfb_band], ( *p_dmx_fac_fx ) ); real_fx = BASOP_Util_Add_Mant32Exp( real_fx, real_e, L_tmp, add20gb, &real_e ); L_tmp = Mpy_32_32( slot_frame_f_imag_fx[inp_ch][cur_cldfb_band], ( *p_dmx_fac_fx ) ); imag_fx = BASOP_Util_Add_Mant32Exp( imag_fx, imag_e, L_tmp, add( 20, gb ), &imag_e ); p_dmx_fac_fx++; } dmx_real_fx[ch_idx1] = real_fx; move32(); dmx_real_e[ch_idx1] = real_e; move16(); dmx_imag_fx[ch_idx1] = imag_fx; move32(); dmx_imag_e[ch_idx1] = imag_e; move16(); #endif } /* Cx for transport channels */ FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 ) { #ifdef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST a_fx = dmx_real_fx[ch_idx1]; move32(); a_e = dmx_real_e[ch_idx1]; move16(); b_fx = dmx_imag_fx[ch_idx1]; move32(); b_e = dmx_imag_e[ch_idx1]; move16(); #endif FOR( ch_idx2 = 0; ch_idx2 < nchan_transport; ++ch_idx2 ) { #ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST a_fx = dmx_real_fx[ch_idx1]; move32(); a_e = dmx_real_e[ch_idx1]; Loading @@ -891,6 +950,12 @@ static void ivas_param_mc_param_est_enc_fx( L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, c_fx ), add( a_e, c_e ), Mpy_32_32( b_fx, d_fx ), add( b_e, d_e ), &tmp_e ); Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cx_sum_e[cur_param_band][ch_idx1][ch_idx2], L_tmp, tmp_e, &Cx_sum_e[cur_param_band][ch_idx1][ch_idx2] ); #else /* (a-ib)(c+id) = ac + bd + i(ad-bc) */ L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, dmx_real_fx[ch_idx2] ), add( a_e, dmx_real_e[ch_idx2] ), Mpy_32_32( b_fx, dmx_imag_fx[ch_idx2] ), add( b_e, dmx_imag_e[ch_idx2] ), &tmp_e ); Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cx_sum_e[cur_param_band][ch_idx1][ch_idx2], L_tmp, tmp_e, &Cx_sum_e[cur_param_band][ch_idx1][ch_idx2] ); #endif move32(); } } Loading @@ -898,12 +963,30 @@ static void ivas_param_mc_param_est_enc_fx( /* Cy for input channels */ FOR( ch_idx1 = 0; ch_idx1 < nchan_input; ++ch_idx1 ) { #ifdef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST a_e = norm_l( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band] ); a_fx = L_shl( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], a_e ); a_e = sub( add20gb, a_e ); b_e = norm_l( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band] ); b_fx = L_shl( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], b_e ); b_e = sub( add20gb, b_e ); #endif FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 ) { #ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST a_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], add( 20, gb ), 0, 0, &a_e ); b_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], add( 20, gb ), 0, 0, &b_e ); c_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &c_e ); d_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &d_e ); #else c_e = norm_l( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band] ); c_fx = L_shl( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], c_e ); c_e = sub( add20gb, c_e ); d_e = norm_l( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band] ); d_fx = L_shl( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], d_e ); d_e = sub( add20gb, d_e ); #endif /* (a-ib)(c+id) = ac + bd + i(ad-bc) */ L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, c_fx ), add( a_e, c_e ), Mpy_32_32( b_fx, d_fx ), add( b_e, d_e ), &tmp_e ); Loading Loading
lib_com/options.h +1 −0 Original line number Diff line number Diff line Loading @@ -154,4 +154,5 @@ #define FIX_ISSUE_1245 /* Ittiam: Fix for issue 1245: Basop Encoder: Audible noise for silent Stereo input DTX on @24.4 kbps, @32 kbps*/ #define FIX_MINOR_SVD_WMOPS_MR1010X /* FhG: Minor WMOPS tuning, bit-exact to previous version, saves about 8.2 WMOPS for MR1010 */ #define SVD_WMOPS_OPT /* Ittiam : SVD related optimizations */ #define IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST /* FhG: reduces WMOPS of param_mc_prm_est, bit-exact to previous version */ #endif
lib_enc/ivas_mc_param_enc.c +83 −0 Original line number Diff line number Diff line Loading @@ -723,9 +723,16 @@ static void ivas_param_mc_param_est_enc_fx( } } #ifdef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST Word16 gb = find_guarded_bits_fx( l_ts ); Word16 add20gb = add( 20, gb ); #endif FOR( ts = start_ts; ts < num_time_slots; ts++ ) { #ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST Word16 gb = find_guarded_bits_fx( l_ts ); #endif ivas_fb_mixer_get_windowed_fr_fx( hParamMC->hFbMixer, pcm_in_fx, p_slot_frame_f_real_fx, p_slot_frame_f_imag_fx, l_ts, l_ts, hParamMC->hFbMixer->fb_cfg->num_in_chans, gb ); ivas_fb_mixer_update_prior_input_fx( hParamMC->hFbMixer, pcm_in_fx, l_ts, hParamMC->hFbMixer->fb_cfg->num_in_chans ); Loading Loading @@ -810,10 +817,25 @@ static void ivas_param_mc_param_est_enc_fx( { FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 ) { #ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST a_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], add( 20, gb ), 0, 0, &a_e ); b_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], add( 20, gb ), 0, 0, &b_e ); c_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &c_e ); d_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &d_e ); #else a_e = norm_l( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band]); a_fx = L_shl( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], a_e); a_e = sub(add20gb, a_e); b_e = norm_l( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band] ); b_fx = L_shl( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], b_e ); b_e = sub( add20gb, b_e ); c_e = norm_l( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band] ); c_fx = L_shl( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], c_e ); c_e = sub( add20gb, c_e ); d_e = norm_l( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band] ); d_fx = L_shl( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], d_e ); d_e = sub( add20gb, d_e ); #endif /* (a-ib)(c+id) = ac + bd + i(ad-bc) */ L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, c_fx ), add( a_e, c_e ), Mpy_32_32( b_fx, d_fx ), add( b_e, d_e ), &tmp_e ); Loading Loading @@ -844,6 +866,7 @@ static void ivas_param_mc_param_est_enc_fx( FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 ) { #ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST dmx_real_fx[ch_idx1] = 0; move32(); dmx_real_e[ch_idx1] = 0; Loading @@ -863,13 +886,49 @@ static void ivas_param_mc_param_est_enc_fx( move32(); p_dmx_fac_fx++; } #else Word32 real_fx = L_add(0, 0); Word16 real_e = 0; move16(); Word32 imag_fx = L_add( 0, 0 ); Word16 imag_e = 0; move16(); FOR( inp_ch = 0; inp_ch < nchan_input; inp_ch++ ) { L_tmp = Mpy_32_32( slot_frame_f_real_fx[inp_ch][cur_cldfb_band], ( *p_dmx_fac_fx ) ); real_fx = BASOP_Util_Add_Mant32Exp( real_fx, real_e, L_tmp, add20gb, &real_e ); L_tmp = Mpy_32_32( slot_frame_f_imag_fx[inp_ch][cur_cldfb_band], ( *p_dmx_fac_fx ) ); imag_fx = BASOP_Util_Add_Mant32Exp( imag_fx, imag_e, L_tmp, add( 20, gb ), &imag_e ); p_dmx_fac_fx++; } dmx_real_fx[ch_idx1] = real_fx; move32(); dmx_real_e[ch_idx1] = real_e; move16(); dmx_imag_fx[ch_idx1] = imag_fx; move32(); dmx_imag_e[ch_idx1] = imag_e; move16(); #endif } /* Cx for transport channels */ FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 ) { #ifdef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST a_fx = dmx_real_fx[ch_idx1]; move32(); a_e = dmx_real_e[ch_idx1]; move16(); b_fx = dmx_imag_fx[ch_idx1]; move32(); b_e = dmx_imag_e[ch_idx1]; move16(); #endif FOR( ch_idx2 = 0; ch_idx2 < nchan_transport; ++ch_idx2 ) { #ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST a_fx = dmx_real_fx[ch_idx1]; move32(); a_e = dmx_real_e[ch_idx1]; Loading @@ -891,6 +950,12 @@ static void ivas_param_mc_param_est_enc_fx( L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, c_fx ), add( a_e, c_e ), Mpy_32_32( b_fx, d_fx ), add( b_e, d_e ), &tmp_e ); Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cx_sum_e[cur_param_band][ch_idx1][ch_idx2], L_tmp, tmp_e, &Cx_sum_e[cur_param_band][ch_idx1][ch_idx2] ); #else /* (a-ib)(c+id) = ac + bd + i(ad-bc) */ L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, dmx_real_fx[ch_idx2] ), add( a_e, dmx_real_e[ch_idx2] ), Mpy_32_32( b_fx, dmx_imag_fx[ch_idx2] ), add( b_e, dmx_imag_e[ch_idx2] ), &tmp_e ); Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cx_sum_e[cur_param_band][ch_idx1][ch_idx2], L_tmp, tmp_e, &Cx_sum_e[cur_param_band][ch_idx1][ch_idx2] ); #endif move32(); } } Loading @@ -898,12 +963,30 @@ static void ivas_param_mc_param_est_enc_fx( /* Cy for input channels */ FOR( ch_idx1 = 0; ch_idx1 < nchan_input; ++ch_idx1 ) { #ifdef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST a_e = norm_l( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band] ); a_fx = L_shl( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], a_e ); a_e = sub( add20gb, a_e ); b_e = norm_l( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band] ); b_fx = L_shl( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], b_e ); b_e = sub( add20gb, b_e ); #endif FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 ) { #ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST a_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], add( 20, gb ), 0, 0, &a_e ); b_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], add( 20, gb ), 0, 0, &b_e ); c_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &c_e ); d_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &d_e ); #else c_e = norm_l( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band] ); c_fx = L_shl( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], c_e ); c_e = sub( add20gb, c_e ); d_e = norm_l( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band] ); d_fx = L_shl( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], d_e ); d_e = sub( add20gb, d_e ); #endif /* (a-ib)(c+id) = ac + bd + i(ad-bc) */ L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, c_fx ), add( a_e, c_e ), Mpy_32_32( b_fx, d_fx ), add( b_e, d_e ), &tmp_e ); Loading