From fd003f8c14898f23130026e71cdcd51fa57cf44c Mon Sep 17 00:00:00 2001 From: Arthur Date: Tue, 11 Feb 2025 14:53:41 +0100 Subject: [PATCH 1/9] improve high complexity of param_mc_prm_est: MC/7-1-4/128kBit reduced by 166 WMOPS --- lib_com/options.h | 1 + lib_enc/ivas_mc_param_enc.c | 83 +++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+) diff --git a/lib_com/options.h b/lib_com/options.h index 1b2c15f87..11a10b5b0 100755 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -154,4 +154,5 @@ #define FIX_ISSUE_1245 /* Ittiam: Fix for issue 1245: Basop Encoder: Audible noise for silent Stereo input DTX on @24.4 kbps, @32 kbps*/ #define FIX_MINOR_SVD_WMOPS_MR1010X /* FhG: Minor WMOPS tuning, bit-exact to previous version, saves about 8.2 WMOPS for MR1010 */ #define SVD_WMOPS_OPT /* Ittiam : SVD related optimizations */ +#define IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST /* FhG: reduces WMOPS of param_mc_prm_est, bit-exact to previous version */ #endif diff --git a/lib_enc/ivas_mc_param_enc.c b/lib_enc/ivas_mc_param_enc.c index 0577df340..e3eb59445 100644 --- a/lib_enc/ivas_mc_param_enc.c +++ b/lib_enc/ivas_mc_param_enc.c @@ -723,9 +723,16 @@ static void ivas_param_mc_param_est_enc_fx( } } +#ifdef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST + Word16 gb = find_guarded_bits_fx( l_ts ); + Word16 add20gb = add( 20, gb ); +#endif + FOR( ts = start_ts; ts < num_time_slots; ts++ ) { +#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST Word16 gb = find_guarded_bits_fx( l_ts ); +#endif ivas_fb_mixer_get_windowed_fr_fx( hParamMC->hFbMixer, pcm_in_fx, p_slot_frame_f_real_fx, p_slot_frame_f_imag_fx, l_ts, l_ts, hParamMC->hFbMixer->fb_cfg->num_in_chans, gb ); ivas_fb_mixer_update_prior_input_fx( hParamMC->hFbMixer, pcm_in_fx, l_ts, hParamMC->hFbMixer->fb_cfg->num_in_chans ); @@ -810,10 +817,25 @@ static void ivas_param_mc_param_est_enc_fx( { FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 ) { +#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST a_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], add( 20, gb ), 0, 0, &a_e ); b_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], add( 20, gb ), 0, 0, &b_e ); c_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &c_e ); d_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &d_e ); +#else + a_e = norm_l( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band]); + a_fx = L_shl( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], a_e); + a_e = sub(add20gb, a_e); + b_e = norm_l( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band] ); + b_fx = L_shl( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], b_e ); + b_e = sub( add20gb, b_e ); + c_e = norm_l( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band] ); + c_fx = L_shl( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], c_e ); + c_e = sub( add20gb, c_e ); + d_e = norm_l( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band] ); + d_fx = L_shl( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], d_e ); + d_e = sub( add20gb, d_e ); +#endif /* (a-ib)(c+id) = ac + bd + i(ad-bc) */ L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, c_fx ), add( a_e, c_e ), Mpy_32_32( b_fx, d_fx ), add( b_e, d_e ), &tmp_e ); @@ -844,6 +866,7 @@ static void ivas_param_mc_param_est_enc_fx( FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 ) { +#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST dmx_real_fx[ch_idx1] = 0; move32(); dmx_real_e[ch_idx1] = 0; @@ -863,13 +886,49 @@ static void ivas_param_mc_param_est_enc_fx( move32(); p_dmx_fac_fx++; } +#else + Word32 real_fx = L_add(0, 0); + Word16 real_e = 0; + move16(); + Word32 imag_fx = L_add( 0, 0 ); + Word16 imag_e = 0; + move16(); + + FOR( inp_ch = 0; inp_ch < nchan_input; inp_ch++ ) + { + L_tmp = Mpy_32_32( slot_frame_f_real_fx[inp_ch][cur_cldfb_band], ( *p_dmx_fac_fx ) ); + real_fx = BASOP_Util_Add_Mant32Exp( real_fx, real_e, L_tmp, add20gb, &real_e ); + L_tmp = Mpy_32_32( slot_frame_f_imag_fx[inp_ch][cur_cldfb_band], ( *p_dmx_fac_fx ) ); + imag_fx = BASOP_Util_Add_Mant32Exp( imag_fx, imag_e, L_tmp, add( 20, gb ), &imag_e ); + p_dmx_fac_fx++; + } + dmx_real_fx[ch_idx1] = real_fx; + move32(); + dmx_real_e[ch_idx1] = real_e; + move16(); + dmx_imag_fx[ch_idx1] = imag_fx; + move32(); + dmx_imag_e[ch_idx1] = imag_e; + move16(); +#endif } /* Cx for transport channels */ FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 ) { +#ifdef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST + a_fx = dmx_real_fx[ch_idx1]; + move32(); + a_e = dmx_real_e[ch_idx1]; + move16(); + b_fx = dmx_imag_fx[ch_idx1]; + move32(); + b_e = dmx_imag_e[ch_idx1]; + move16(); +#endif FOR( ch_idx2 = 0; ch_idx2 < nchan_transport; ++ch_idx2 ) { +#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST a_fx = dmx_real_fx[ch_idx1]; move32(); a_e = dmx_real_e[ch_idx1]; @@ -891,6 +950,12 @@ static void ivas_param_mc_param_est_enc_fx( L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, c_fx ), add( a_e, c_e ), Mpy_32_32( b_fx, d_fx ), add( b_e, d_e ), &tmp_e ); Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cx_sum_e[cur_param_band][ch_idx1][ch_idx2], L_tmp, tmp_e, &Cx_sum_e[cur_param_band][ch_idx1][ch_idx2] ); +#else + /* (a-ib)(c+id) = ac + bd + i(ad-bc) */ + L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, dmx_real_fx[ch_idx2] ), add( a_e, dmx_real_e[ch_idx2] ), Mpy_32_32( b_fx, dmx_imag_fx[ch_idx2] ), add( b_e, dmx_imag_e[ch_idx2] ), &tmp_e ); + Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cx_sum_e[cur_param_band][ch_idx1][ch_idx2], L_tmp, tmp_e, + &Cx_sum_e[cur_param_band][ch_idx1][ch_idx2] ); +#endif move32(); } } @@ -898,12 +963,30 @@ static void ivas_param_mc_param_est_enc_fx( /* Cy for input channels */ FOR( ch_idx1 = 0; ch_idx1 < nchan_input; ++ch_idx1 ) { +#ifdef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST + a_e = norm_l( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band] ); + a_fx = L_shl( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], a_e ); + a_e = sub( add20gb, a_e ); + b_e = norm_l( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band] ); + b_fx = L_shl( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], b_e ); + b_e = sub( add20gb, b_e ); +#endif FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 ) { +#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST a_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], add( 20, gb ), 0, 0, &a_e ); b_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], add( 20, gb ), 0, 0, &b_e ); c_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &c_e ); d_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &d_e ); +#else + + c_e = norm_l( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band] ); + c_fx = L_shl( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], c_e ); + c_e = sub( add20gb, c_e ); + d_e = norm_l( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band] ); + d_fx = L_shl( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], d_e ); + d_e = sub( add20gb, d_e ); +#endif /* (a-ib)(c+id) = ac + bd + i(ad-bc) */ L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, c_fx ), add( a_e, c_e ), Mpy_32_32( b_fx, d_fx ), add( b_e, d_e ), &tmp_e ); -- GitLab From 31ef1423be708164aeae8527d161e4098115f331 Mon Sep 17 00:00:00 2001 From: Arthur Date: Tue, 11 Feb 2025 15:26:46 +0100 Subject: [PATCH 2/9] fix clang-fomat-issues --- lib_enc/ivas_mc_param_enc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib_enc/ivas_mc_param_enc.c b/lib_enc/ivas_mc_param_enc.c index e3eb59445..438451ceb 100644 --- a/lib_enc/ivas_mc_param_enc.c +++ b/lib_enc/ivas_mc_param_enc.c @@ -823,9 +823,9 @@ static void ivas_param_mc_param_est_enc_fx( c_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &c_e ); d_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &d_e ); #else - a_e = norm_l( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band]); - a_fx = L_shl( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], a_e); - a_e = sub(add20gb, a_e); + a_e = norm_l( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band] ); + a_fx = L_shl( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], a_e ); + a_e = sub( add20gb, a_e ); b_e = norm_l( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band] ); b_fx = L_shl( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], b_e ); b_e = sub( add20gb, b_e ); @@ -887,7 +887,7 @@ static void ivas_param_mc_param_est_enc_fx( p_dmx_fac_fx++; } #else - Word32 real_fx = L_add(0, 0); + Word32 real_fx = L_add( 0, 0 ); Word16 real_e = 0; move16(); Word32 imag_fx = L_add( 0, 0 ); -- GitLab From 91f046f47e652e6addc52b1fca20391f0087325f Mon Sep 17 00:00:00 2001 From: Arthur Date: Thu, 13 Feb 2025 22:07:15 +0100 Subject: [PATCH 3/9] completed tuning of MC param: no more BE now --- lib_com/options.h | 2 +- lib_enc/ivas_mc_param_enc.c | 59 ++++++++++++++++++++++++++++++------- 2 files changed, 50 insertions(+), 11 deletions(-) mode change 100644 => 100755 lib_enc/ivas_mc_param_enc.c diff --git a/lib_com/options.h b/lib_com/options.h index b7a3eaddb..7d6004fcc 100755 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -158,5 +158,5 @@ #define FIX_MINOR_SVD_WMOPS_MR1010X /* FhG: Minor WMOPS tuning, bit-exact to previous version, saves about 8.2 WMOPS for MR1010 */ #define SVD_WMOPS_OPT /* Ittiam : SVD related optimizations */ #define NONBE_FIX_1087_OOB_SBA_DTX_RS /* VA: issue 1087: Extend the length of the buffer for MCT decoding to avoid out-of-bound writing in SBA SID bitrate switching decoding */ -#define IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST /* FhG: reduces WMOPS of param_mc_prm_est, bit-exact to previous version */ +#define IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST /* FhG: reduces WMOPS of param_mc_prm_est, not bit-exact to previous version */ #endif diff --git a/lib_enc/ivas_mc_param_enc.c b/lib_enc/ivas_mc_param_enc.c old mode 100644 new mode 100755 index 438451ceb..b99a8ef74 --- a/lib_enc/ivas_mc_param_enc.c +++ b/lib_enc/ivas_mc_param_enc.c @@ -815,6 +815,24 @@ static void ivas_param_mc_param_est_enc_fx( /* Cy for input channels */ FOR( ch_idx1 = 0; ch_idx1 < nchan_input; ++ch_idx1 ) { +#ifdef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST + a_e = norm_l( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band] ); + a_fx = L_shl( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], a_e ); + a_e = sub( add20gb, a_e ); + if ( a_fx == 0 ) + { + a_e = 0; + move16(); + } + b_e = norm_l( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band] ); + b_fx = L_shl( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], b_e ); + b_e = sub( add20gb, b_e ); + if ( b_fx == 0 ) + { + b_e = 0; + move16(); + } +#endif FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 ) { #ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST @@ -823,20 +841,23 @@ static void ivas_param_mc_param_est_enc_fx( c_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &c_e ); d_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &d_e ); #else - a_e = norm_l( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band] ); - a_fx = L_shl( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], a_e ); - a_e = sub( add20gb, a_e ); - b_e = norm_l( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band] ); - b_fx = L_shl( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], b_e ); - b_e = sub( add20gb, b_e ); c_e = norm_l( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band] ); c_fx = L_shl( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], c_e ); c_e = sub( add20gb, c_e ); + if ( c_fx == 0 ) + { + c_e = 0; + move16(); + } d_e = norm_l( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band] ); d_fx = L_shl( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], d_e ); d_e = sub( add20gb, d_e ); + if ( d_fx == 0 ) + { + d_e = 0; + move16(); + } #endif - /* (a-ib)(c+id) = ac + bd + i(ad-bc) */ L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, c_fx ), add( a_e, c_e ), Mpy_32_32( b_fx, d_fx ), add( b_e, d_e ), &tmp_e ); Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cy_sum_e[cur_param_band][ch_idx1][ch_idx2], @@ -899,7 +920,7 @@ static void ivas_param_mc_param_est_enc_fx( L_tmp = Mpy_32_32( slot_frame_f_real_fx[inp_ch][cur_cldfb_band], ( *p_dmx_fac_fx ) ); real_fx = BASOP_Util_Add_Mant32Exp( real_fx, real_e, L_tmp, add20gb, &real_e ); L_tmp = Mpy_32_32( slot_frame_f_imag_fx[inp_ch][cur_cldfb_band], ( *p_dmx_fac_fx ) ); - imag_fx = BASOP_Util_Add_Mant32Exp( imag_fx, imag_e, L_tmp, add( 20, gb ), &imag_e ); + imag_fx = BASOP_Util_Add_Mant32Exp( imag_fx, imag_e, L_tmp, add20gb, &imag_e ); p_dmx_fac_fx++; } dmx_real_fx[ch_idx1] = real_fx; @@ -967,9 +988,19 @@ static void ivas_param_mc_param_est_enc_fx( a_e = norm_l( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band] ); a_fx = L_shl( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], a_e ); a_e = sub( add20gb, a_e ); + if ( a_fx == 0 ) + { + a_e = 0; + move16(); + } b_e = norm_l( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band] ); b_fx = L_shl( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], b_e ); b_e = sub( add20gb, b_e ); + if ( b_fx == 0 ) + { + b_e = 0; + move16(); + } #endif FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 ) { @@ -979,15 +1010,23 @@ static void ivas_param_mc_param_est_enc_fx( c_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &c_e ); d_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &d_e ); #else - c_e = norm_l( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band] ); c_fx = L_shl( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], c_e ); c_e = sub( add20gb, c_e ); + if ( c_fx == 0 ) + { + c_e = 0; + move16(); + } d_e = norm_l( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band] ); d_fx = L_shl( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], d_e ); d_e = sub( add20gb, d_e ); + if ( d_fx == 0 ) + { + d_e = 0; + move16(); + } #endif - /* (a-ib)(c+id) = ac + bd + i(ad-bc) */ L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, c_fx ), add( a_e, c_e ), Mpy_32_32( b_fx, d_fx ), add( b_e, d_e ), &tmp_e ); Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cy_sum_e[cur_param_band][ch_idx1][ch_idx2], L_tmp, tmp_e, -- GitLab From d9eb2c888d202ebc3e1f51595d9903fbaf8e5813 Mon Sep 17 00:00:00 2001 From: Jan Kiene Date: Mon, 17 Feb 2025 10:44:28 +0100 Subject: [PATCH 4/9] deactivate switches for testing --- lib_com/options.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index 2fc2f64b4..c87cd23da 100755 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -159,8 +159,8 @@ #define FIX_920_IGF_INIT_ERROR /* FhG: issue 920: fix bitrate mismatch in initial IGF config to avoid error message in same cases */ #define FIX_MINOR_SVD_WMOPS_MR1010X /* FhG: Minor WMOPS tuning, bit-exact to previous version, saves about 8.2 WMOPS for MR1010 */ #define SVD_WMOPS_OPT /* Ittiam : SVD related optimizations */ -#define NONBE_FIX_1087_OOB_SBA_DTX_RS /* VA: issue 1087: Extend the length of the buffer for MCT decoding to avoid out-of-bound writing in SBA SID bitrate switching decoding */ -#define IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST /* FhG: reduces WMOPS of param_mc_prm_est, not bit-exact to previous version */ +// #define NONBE_FIX_1087_OOB_SBA_DTX_RS /* VA: issue 1087: Extend the length of the buffer for MCT decoding to avoid out-of-bound writing in SBA SID bitrate switching decoding */ +// #define IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST /* FhG: reduces WMOPS of param_mc_prm_est, not bit-exact to previous version */ #define NONBE_FIX_1087_OOB_SBA_DTX_RS /* VA: issue 1087: Extend the length of the buffer for MCT decoding to avoid out-of-bound writing in SBA SID bitrate switching decoding */ #define FIX_ISSUE_1279 /* VA: correction of wrong scaling update */ #define FIX_ISSUE_1247 -- GitLab From b949948e2b31ca8013cc26ca8e6a3b8cf8cca02d Mon Sep 17 00:00:00 2001 From: Arthur Date: Thu, 20 Feb 2025 10:23:33 +0100 Subject: [PATCH 5/9] version now with 2 macros for NONBE and BE code parts --- lib_com/options.h | 4 +++- lib_enc/ivas_mc_param_enc.c | 44 ++++++++++++++++++++++++++++--------- 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index c87cd23da..9ee0d46fc 100755 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -160,7 +160,9 @@ #define FIX_MINOR_SVD_WMOPS_MR1010X /* FhG: Minor WMOPS tuning, bit-exact to previous version, saves about 8.2 WMOPS for MR1010 */ #define SVD_WMOPS_OPT /* Ittiam : SVD related optimizations */ // #define NONBE_FIX_1087_OOB_SBA_DTX_RS /* VA: issue 1087: Extend the length of the buffer for MCT decoding to avoid out-of-bound writing in SBA SID bitrate switching decoding */ -// #define IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST /* FhG: reduces WMOPS of param_mc_prm_est, not bit-exact to previous version */ +/* Both following 2 macros (IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST*) are independent from each other, they refer to different code blocks */ +#define IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_BE /* FhG: reduces WMOPS of param_mc_prm_est, bit-exact to previous version */ +#define IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE /* FhG: reduces WMOPS of param_mc_prm_est, not bit-exact to previous version */ #define NONBE_FIX_1087_OOB_SBA_DTX_RS /* VA: issue 1087: Extend the length of the buffer for MCT decoding to avoid out-of-bound writing in SBA SID bitrate switching decoding */ #define FIX_ISSUE_1279 /* VA: correction of wrong scaling update */ #define FIX_ISSUE_1247 diff --git a/lib_enc/ivas_mc_param_enc.c b/lib_enc/ivas_mc_param_enc.c index e205a015e..b5bc92016 100755 --- a/lib_enc/ivas_mc_param_enc.c +++ b/lib_enc/ivas_mc_param_enc.c @@ -723,14 +723,14 @@ static void ivas_param_mc_param_est_enc_fx( } } -#ifdef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST +#if defined( IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_BE ) || defined( IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE ) Word16 gb = find_guarded_bits_fx( l_ts ); Word16 add20gb = add( 20, gb ); #endif FOR( ts = start_ts; ts < num_time_slots; ts++ ) { -#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST +#if !defined( IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_BE ) && !defined( IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE ) Word16 gb = find_guarded_bits_fx( l_ts ); #endif ivas_fb_mixer_get_windowed_fr_fx( hParamMC->hFbMixer, pcm_in_fx, p_slot_frame_f_real_fx, p_slot_frame_f_imag_fx, l_ts, l_ts, hParamMC->hFbMixer->fb_cfg->num_in_chans, gb ); @@ -741,6 +741,7 @@ static void ivas_param_mc_param_est_enc_fx( FOR( i = 0; i < nchan_input; i++ ) { pcm_in_fx[i] += l_ts; + move32(); } /* Computing the downmix */ FOR( cur_param_band = 0; cur_param_band < hParamMC->max_param_band_abs_cov; cur_param_band++ ) @@ -758,6 +759,7 @@ static void ivas_param_mc_param_est_enc_fx( FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 ) { +#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_BE dmx_real_fx[ch_idx1] = 0; move32(); dmx_real_e[ch_idx1] = 0; @@ -776,6 +778,28 @@ static void ivas_param_mc_param_est_enc_fx( move32(); p_dmx_fac_fx++; } +#else + Word32 real_fx = L_add(0,0); + Word16 real_e = add(0, 0); + Word32 imag_fx = L_add( 0, 0 ); + Word16 imag_e = add( 0, 0 ); + FOR( inp_ch = 0; inp_ch < nchan_input; inp_ch++ ) + { + L_tmp = Mpy_32_32( slot_frame_f_real_fx[inp_ch][cur_cldfb_band], ( *p_dmx_fac_fx ) ); + real_fx = BASOP_Util_Add_Mant32Exp( real_fx, real_e, L_tmp, add20gb, &real_e ); + L_tmp = Mpy_32_32( slot_frame_f_imag_fx[inp_ch][cur_cldfb_band], ( *p_dmx_fac_fx ) ); + imag_fx = BASOP_Util_Add_Mant32Exp( imag_fx, imag_e, L_tmp, add20gb, &imag_e ); + p_dmx_fac_fx++; + } + dmx_real_fx[ch_idx1] = real_fx; + dmx_real_e[ch_idx1] = real_e; + dmx_imag_fx[ch_idx1] = imag_fx; + dmx_imag_e[ch_idx1] = imag_e; + move32(); + move16(); + move32(); + move16(); +#endif } /* Cx for transport channels */ @@ -815,7 +839,7 @@ static void ivas_param_mc_param_est_enc_fx( /* Cy for input channels */ FOR( ch_idx1 = 0; ch_idx1 < nchan_input; ++ch_idx1 ) { -#ifdef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST +#ifdef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE a_e = norm_l( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band] ); a_fx = L_shl( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], a_e ); a_e = sub( add20gb, a_e ); @@ -835,7 +859,7 @@ static void ivas_param_mc_param_est_enc_fx( #endif FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 ) { -#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST +#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE a_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], add( 20, gb ), 0, 0, &a_e ); b_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], add( 20, gb ), 0, 0, &b_e ); c_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &c_e ); @@ -887,7 +911,7 @@ static void ivas_param_mc_param_est_enc_fx( FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 ) { -#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST +#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_BE dmx_real_fx[ch_idx1] = 0; move32(); dmx_real_e[ch_idx1] = 0; @@ -937,7 +961,7 @@ static void ivas_param_mc_param_est_enc_fx( /* Cx for transport channels */ FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 ) { -#ifdef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST +#ifdef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_BE a_fx = dmx_real_fx[ch_idx1]; move32(); a_e = dmx_real_e[ch_idx1]; @@ -949,7 +973,7 @@ static void ivas_param_mc_param_est_enc_fx( #endif FOR( ch_idx2 = 0; ch_idx2 < nchan_transport; ++ch_idx2 ) { -#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST +#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_BE a_fx = dmx_real_fx[ch_idx1]; move32(); a_e = dmx_real_e[ch_idx1]; @@ -984,7 +1008,7 @@ static void ivas_param_mc_param_est_enc_fx( /* Cy for input channels */ FOR( ch_idx1 = 0; ch_idx1 < nchan_input; ++ch_idx1 ) { -#ifdef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST +#ifdef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE a_e = norm_l( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band] ); a_fx = L_shl( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], a_e ); a_e = sub( add20gb, a_e ); @@ -1004,7 +1028,7 @@ static void ivas_param_mc_param_est_enc_fx( #endif FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 ) { -#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST +#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE a_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], add( 20, gb ), 0, 0, &a_e ); b_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], add( 20, gb ), 0, 0, &b_e ); c_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &c_e ); @@ -1031,7 +1055,7 @@ static void ivas_param_mc_param_est_enc_fx( L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, c_fx ), add( a_e, c_e ), Mpy_32_32( b_fx, d_fx ), add( b_e, d_e ), &tmp_e ); Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cy_sum_e[cur_param_band][ch_idx1][ch_idx2], L_tmp, tmp_e, &Cy_sum_e[cur_param_band][ch_idx1][ch_idx2] ); - move32(); +pri move32(); } } } -- GitLab From d07b495424684c5238177f60d920ca6a2584cb30 Mon Sep 17 00:00:00 2001 From: Arthur Date: Thu, 20 Feb 2025 10:25:46 +0100 Subject: [PATCH 6/9] fix stupid typo --- lib_enc/ivas_mc_param_enc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_enc/ivas_mc_param_enc.c b/lib_enc/ivas_mc_param_enc.c index b5bc92016..248f80d81 100755 --- a/lib_enc/ivas_mc_param_enc.c +++ b/lib_enc/ivas_mc_param_enc.c @@ -1055,7 +1055,7 @@ static void ivas_param_mc_param_est_enc_fx( L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, c_fx ), add( a_e, c_e ), Mpy_32_32( b_fx, d_fx ), add( b_e, d_e ), &tmp_e ); Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cy_sum_e[cur_param_band][ch_idx1][ch_idx2], L_tmp, tmp_e, &Cy_sum_e[cur_param_band][ch_idx1][ch_idx2] ); -pri move32(); + move32(); } } } -- GitLab From cfa5fa996e7797541a2b2bd9fe04305f77ea7379 Mon Sep 17 00:00:00 2001 From: Arthur Date: Thu, 20 Feb 2025 10:28:27 +0100 Subject: [PATCH 7/9] fix clang-format-issues --- lib_enc/ivas_mc_param_enc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) mode change 100755 => 100644 lib_enc/ivas_mc_param_enc.c diff --git a/lib_enc/ivas_mc_param_enc.c b/lib_enc/ivas_mc_param_enc.c old mode 100755 new mode 100644 index 248f80d81..c50f58ff3 --- a/lib_enc/ivas_mc_param_enc.c +++ b/lib_enc/ivas_mc_param_enc.c @@ -779,8 +779,8 @@ static void ivas_param_mc_param_est_enc_fx( p_dmx_fac_fx++; } #else - Word32 real_fx = L_add(0,0); - Word16 real_e = add(0, 0); + Word32 real_fx = L_add( 0, 0 ); + Word16 real_e = add( 0, 0 ); Word32 imag_fx = L_add( 0, 0 ); Word16 imag_e = add( 0, 0 ); FOR( inp_ch = 0; inp_ch < nchan_input; inp_ch++ ) -- GitLab From 62d1d624acb3f90d7b7b3405ef026972dd794a33 Mon Sep 17 00:00:00 2001 From: Arthur Date: Thu, 20 Feb 2025 11:32:40 +0100 Subject: [PATCH 8/9] deactivated temporarily the NONBE part of this MR to check the pipeline results --- lib_com/options.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_com/options.h b/lib_com/options.h index 9ee0d46fc..bee60af30 100755 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -162,7 +162,7 @@ // #define NONBE_FIX_1087_OOB_SBA_DTX_RS /* VA: issue 1087: Extend the length of the buffer for MCT decoding to avoid out-of-bound writing in SBA SID bitrate switching decoding */ /* Both following 2 macros (IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST*) are independent from each other, they refer to different code blocks */ #define IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_BE /* FhG: reduces WMOPS of param_mc_prm_est, bit-exact to previous version */ -#define IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE /* FhG: reduces WMOPS of param_mc_prm_est, not bit-exact to previous version */ +//#define IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE /* FhG: reduces WMOPS of param_mc_prm_est, not bit-exact to previous version */ #define NONBE_FIX_1087_OOB_SBA_DTX_RS /* VA: issue 1087: Extend the length of the buffer for MCT decoding to avoid out-of-bound writing in SBA SID bitrate switching decoding */ #define FIX_ISSUE_1279 /* VA: correction of wrong scaling update */ #define FIX_ISSUE_1247 -- GitLab From 552ffd749e2a208554bcf06ed5afb13cd9acbc85 Mon Sep 17 00:00:00 2001 From: Arthur Date: Thu, 20 Feb 2025 14:57:16 +0100 Subject: [PATCH 9/9] reactivated NONBE modifications to see pipeline effects --- lib_com/options.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_com/options.h b/lib_com/options.h index 006a3811d..1a2e77c95 100755 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -162,7 +162,7 @@ // #define NONBE_FIX_1087_OOB_SBA_DTX_RS /* VA: issue 1087: Extend the length of the buffer for MCT decoding to avoid out-of-bound writing in SBA SID bitrate switching decoding */ /* Both following 2 macros (IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST*) are independent from each other, they refer to different code blocks */ #define IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_BE /* FhG: reduces WMOPS of param_mc_prm_est, bit-exact to previous version */ -//#define IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE /* FhG: reduces WMOPS of param_mc_prm_est, not bit-exact to previous version */ +#define IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE /* FhG: reduces WMOPS of param_mc_prm_est, not bit-exact to previous version */ #define NONBE_FIX_1087_OOB_SBA_DTX_RS /* VA: issue 1087: Extend the length of the buffer for MCT decoding to avoid out-of-bound writing in SBA SID bitrate switching decoding */ #define FIX_ISSUE_1279 /* VA: correction of wrong scaling update */ #define FIX_ISSUE_1247 -- GitLab