From dd2dc9c9d2b82b9a51bae57f11211c15c46cbedb Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Fri, 25 Apr 2025 09:56:03 +0200 Subject: [PATCH 1/8] replaced the down mix channel floating point code with 64 bit fix point. WMOPS went from total 150.00 263.631 365.398 357.230 to total 150.00 244.736 315.064 308.406 for ./IVAS_cod -mc 7_1_4 128000 48 scripts/testv/stv714MC48c.wav out.128 --- lib_enc/ivas_mc_param_enc_fx.c | 123 +++++++++++++++++++++++++++------ 1 file changed, 103 insertions(+), 20 deletions(-) diff --git a/lib_enc/ivas_mc_param_enc_fx.c b/lib_enc/ivas_mc_param_enc_fx.c index 0c996b25a..e8e3fd61a 100644 --- a/lib_enc/ivas_mc_param_enc_fx.c +++ b/lib_enc/ivas_mc_param_enc_fx.c @@ -29,7 +29,8 @@ the United Nations Convention on Contracts on the International Sales of Goods. *******************************************************************************************************/ - +#define MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE +#define CONVERT64( x_64, y_fx, y_e ) { Word16 norm; norm=W_norm(x_64); y_fx = W_extract_h( W_shl( x_64, norm ) ); y_e = sub( sub( 35, gb ), norm ); } #include #include #include "options.h" @@ -649,10 +650,15 @@ static void ivas_param_mc_param_est_enc_fx( Word32 *p_slot_frame_f_real_fx[MAX_CICP_CHANNELS]; /* Output of the MDFT FB - real part */ Word32 *p_slot_frame_f_imag_fx[MAX_CICP_CHANNELS]; /* Output of the MDFT FB - imag part */ +#ifdef MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE + Word64 dmx_real_64[PARAM_MC_MAX_TRANSPORT_CHANS]; + Word64 dmx_imag_64[PARAM_MC_MAX_TRANSPORT_CHANS]; +#else Word32 dmx_real_fx[PARAM_MC_MAX_TRANSPORT_CHANS]; /* Downmix channel - Real Part */ Word16 dmx_real_e[PARAM_MC_MAX_TRANSPORT_CHANS]; /* Downmix channel - Real Part */ Word32 dmx_imag_fx[PARAM_MC_MAX_TRANSPORT_CHANS]; /* Downmix channel - Imag Part */ Word16 dmx_imag_e[PARAM_MC_MAX_TRANSPORT_CHANS]; /* Downmix channel - Imag Part */ +#endif Word32 a_fx, b_fx, c_fx, d_fx; /* Tmp complex values */ Word16 a_e, b_e, c_e, d_e; /* Tmp complex values */ #ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE @@ -774,6 +780,26 @@ static void ivas_param_mc_param_est_enc_fx( FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 ) { +#ifdef MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE + Word64 real_64; + Word64 imag_64; + + real_64 = 0; + imag_64 = 0; + move64(); + move64(); + FOR( inp_ch = 0; inp_ch < nchan_input; inp_ch++ ) + { + real_64 = W_add( real_64, W_mult0_32_32( slot_frame_f_real_fx[inp_ch][cur_cldfb_band], ( *p_dmx_fac_fx ) ) ); + imag_64 = W_add( imag_64, W_mult0_32_32( slot_frame_f_imag_fx[inp_ch][cur_cldfb_band], ( *p_dmx_fac_fx ) ) ); + p_dmx_fac_fx++; + } + dmx_real_64[ch_idx1] = real_64; + dmx_imag_64[ch_idx1] = imag_64; + move64(); + move64(); + +#else #ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_BE dmx_real_fx[ch_idx1] = 0; move32(); @@ -814,14 +840,33 @@ static void ivas_param_mc_param_est_enc_fx( move16(); move32(); move16(); +#endif + #endif } /* Cx for transport channels */ FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 ) { +#ifdef MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE + CONVERT64(dmx_real_64[ch_idx1], a_fx, a_e ); + CONVERT64(dmx_imag_64[ch_idx1], b_fx, b_e ); + move32(); + move32(); + move16(); + move16(); +#endif FOR( ch_idx2 = 0; ch_idx2 < nchan_transport; ++ch_idx2 ) { +#ifdef MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE + CONVERT64(dmx_real_64[ch_idx2], c_fx, c_e ); + CONVERT64(dmx_imag_64[ch_idx2], d_fx, d_e ); + move32(); + move32(); + move16(); + move16(); + +#else a_fx = dmx_real_fx[ch_idx1]; move32(); a_e = dmx_real_e[ch_idx1]; @@ -838,6 +883,7 @@ static void ivas_param_mc_param_est_enc_fx( move32(); d_e = dmx_imag_e[ch_idx2]; move16(); +#endif /* (a-ib)(c+id) = ac + bd + i(ad-bc) */ L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, c_fx ), add( a_e, c_e ), Mpy_32_32( b_fx, d_fx ), add( b_e, d_e ), &tmp_e ); @@ -858,7 +904,6 @@ static void ivas_param_mc_param_est_enc_fx( move32(); FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 ) { - Word16 norm; c_fx = slot_frame_f_real_fx[ch_idx2][cur_cldfb_band]; d_fx = slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band]; move32(); @@ -870,10 +915,6 @@ static void ivas_param_mc_param_est_enc_fx( W_sub( W_mult0_32_32( a_fx, d_fx ), W_mult0_32_32( b_fx, c_fx ) ) ); move64(); - // convert the 64 bit fixpoint back into the 48 bit float format - norm = W_norm( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] ); - Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2] = W_extract_h( W_shl( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2], norm ) ); - Cy_sum_e[cur_param_band][ch_idx1][ch_idx2] = sub( sub( 62, gb ), norm ); } } #else @@ -953,6 +994,26 @@ static void ivas_param_mc_param_est_enc_fx( FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 ) { +#ifdef MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE + Word64 real_64; + Word64 imag_64; + + real_64 = 0; + imag_64 = 0; + move64(); + move64(); + + FOR( inp_ch = 0; inp_ch < nchan_input; inp_ch++ ) + { + real_64 = W_add( real_64, W_mult0_32_32( slot_frame_f_real_fx[inp_ch][cur_cldfb_band], ( *p_dmx_fac_fx ) ) ); + imag_64 = W_add( imag_64, W_mult0_32_32( slot_frame_f_imag_fx[inp_ch][cur_cldfb_band], ( *p_dmx_fac_fx ) ) ); + p_dmx_fac_fx++; + } + dmx_real_64[ch_idx1] = real_64; + dmx_imag_64[ch_idx1] = imag_64; + move64(); + move64(); +#else #ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_BE dmx_real_fx[ch_idx1] = 0; move32(); @@ -997,12 +1058,22 @@ static void ivas_param_mc_param_est_enc_fx( move32(); dmx_imag_e[ch_idx1] = imag_e; move16(); +#endif + #endif } /* Cx for transport channels */ FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 ) { +#ifdef MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE + CONVERT64( dmx_real_64[ch_idx1], a_fx, a_e ); + CONVERT64( dmx_imag_64[ch_idx1], b_fx, b_e ); + move32(); + move32(); + move16(); + move16(); +#else #ifdef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_BE a_fx = dmx_real_fx[ch_idx1]; move32(); @@ -1012,9 +1083,24 @@ static void ivas_param_mc_param_est_enc_fx( move32(); b_e = dmx_imag_e[ch_idx1]; move16(); +#endif + #endif FOR( ch_idx2 = 0; ch_idx2 < nchan_transport; ++ch_idx2 ) { +#ifdef MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE + CONVERT64( dmx_real_64[ch_idx2], c_fx, c_e ); + CONVERT64( dmx_imag_64[ch_idx2], d_fx, d_e ); + move32(); + move32(); + move16(); + move16(); + + /* (a-ib)(c+id) = ac + bd + i(ad-bc) */ + L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, c_fx ), add( a_e, c_e ), Mpy_32_32( b_fx, d_fx ), add( b_e, d_e ), &tmp_e ); + Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cx_sum_e[cur_param_band][ch_idx1][ch_idx2], L_tmp, tmp_e, + &Cx_sum_e[cur_param_band][ch_idx1][ch_idx2] ); +#else #ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_BE a_fx = dmx_real_fx[ch_idx1]; move32(); @@ -1042,6 +1128,7 @@ static void ivas_param_mc_param_est_enc_fx( L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, dmx_real_fx[ch_idx2] ), add( a_e, dmx_real_e[ch_idx2] ), Mpy_32_32( b_fx, dmx_imag_fx[ch_idx2] ), add( b_e, dmx_imag_e[ch_idx2] ), &tmp_e ); Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cx_sum_e[cur_param_band][ch_idx1][ch_idx2], L_tmp, tmp_e, &Cx_sum_e[cur_param_band][ch_idx1][ch_idx2] ); +#endif #endif move32(); } @@ -1075,7 +1162,6 @@ static void ivas_param_mc_param_est_enc_fx( move32(); FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 ) { - Word16 norm; c_fx = slot_frame_f_real_fx[ch_idx2][cur_cldfb_band]; d_fx = slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band]; move32(); @@ -1084,10 +1170,6 @@ static void ivas_param_mc_param_est_enc_fx( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] = W_add( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2], W_add( W_mult0_32_32( a_fx, c_fx ), W_mult0_32_32( b_fx, d_fx ) ) ); move64(); - // convert the 64 bit fixpoint back into the 48 bit float format - norm = W_norm( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] ); - Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2] = W_extract_h( W_shl( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2], norm ) ); - Cy_sum_e[cur_param_band][ch_idx1][ch_idx2] = sub( sub( 62, gb ), norm ); } #else FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 ) @@ -1135,14 +1217,6 @@ static void ivas_param_mc_param_est_enc_fx( { FOR( ch_idx1 = 0; ch_idx1 < nchan_input; ++ch_idx1 ) { - Cy_sum_fx[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0; - move32(); - Cy_sum_e[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0; - move16(); - Cy_sum_fx[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0; - move32(); - Cy_sum_e[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0; - move16(); #ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE Cy_sum_real_64[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0; move64(); @@ -1153,6 +1227,14 @@ static void ivas_param_mc_param_est_enc_fx( Cy_sum_imag_64[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0; move64(); #else + Cy_sum_fx[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0; + move32(); + Cy_sum_e[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0; + move16(); + Cy_sum_fx[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0; + move32(); + Cy_sum_e[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0; + move16(); Cy_sum_imag_fx[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0; move32(); Cy_sum_imag_e[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0; @@ -1174,7 +1256,7 @@ static void ivas_param_mc_param_est_enc_fx( move64(); Cy_sum_real_64[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0; move64(); -#endif +#else Cy_sum_fx[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0; move32(); Cy_sum_e[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0; @@ -1183,6 +1265,7 @@ static void ivas_param_mc_param_est_enc_fx( move32(); Cy_sum_e[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0; move16(); +#endif } } } -- GitLab From dcb916fba996abeea2e53e2a2097283282b6dc6a Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Fri, 25 Apr 2025 10:03:19 +0200 Subject: [PATCH 2/8] applied the clang patch --- lib_enc/ivas_mc_param_enc_fx.c | 37 +++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/lib_enc/ivas_mc_param_enc_fx.c b/lib_enc/ivas_mc_param_enc_fx.c index e8e3fd61a..270fbeebd 100644 --- a/lib_enc/ivas_mc_param_enc_fx.c +++ b/lib_enc/ivas_mc_param_enc_fx.c @@ -29,8 +29,14 @@ the United Nations Convention on Contracts on the International Sales of Goods. *******************************************************************************************************/ -#define MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE -#define CONVERT64( x_64, y_fx, y_e ) { Word16 norm; norm=W_norm(x_64); y_fx = W_extract_h( W_shl( x_64, norm ) ); y_e = sub( sub( 35, gb ), norm ); } +#define MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE +#define CONVERT64( x_64, y_fx, y_e ) \ + { \ + Word16 norm; \ + norm = W_norm( x_64 ); \ + y_fx = W_extract_h( W_shl( x_64, norm ) ); \ + y_e = sub( sub( 35, gb ), norm ); \ + } #include #include #include "options.h" @@ -650,7 +656,7 @@ static void ivas_param_mc_param_est_enc_fx( Word32 *p_slot_frame_f_real_fx[MAX_CICP_CHANNELS]; /* Output of the MDFT FB - real part */ Word32 *p_slot_frame_f_imag_fx[MAX_CICP_CHANNELS]; /* Output of the MDFT FB - imag part */ -#ifdef MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE +#ifdef MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE Word64 dmx_real_64[PARAM_MC_MAX_TRANSPORT_CHANS]; Word64 dmx_imag_64[PARAM_MC_MAX_TRANSPORT_CHANS]; #else @@ -659,8 +665,8 @@ static void ivas_param_mc_param_est_enc_fx( Word32 dmx_imag_fx[PARAM_MC_MAX_TRANSPORT_CHANS]; /* Downmix channel - Imag Part */ Word16 dmx_imag_e[PARAM_MC_MAX_TRANSPORT_CHANS]; /* Downmix channel - Imag Part */ #endif - Word32 a_fx, b_fx, c_fx, d_fx; /* Tmp complex values */ - Word16 a_e, b_e, c_e, d_e; /* Tmp complex values */ + Word32 a_fx, b_fx, c_fx, d_fx; /* Tmp complex values */ + Word16 a_e, b_e, c_e, d_e; /* Tmp complex values */ #ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE Word64 Cy_sum_real_64[PARAM_MC_MAX_PARAMETER_BANDS][MAX_CICP_CHANNELS][MAX_CICP_CHANNELS]; Word64 Cy_sum_imag_64[PARAM_MC_MAX_PARAM_BAND_ABS_COV_ENC][MAX_CICP_CHANNELS][MAX_CICP_CHANNELS]; @@ -780,7 +786,7 @@ static void ivas_param_mc_param_est_enc_fx( FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 ) { -#ifdef MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE +#ifdef MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE Word64 real_64; Word64 imag_64; @@ -848,9 +854,9 @@ static void ivas_param_mc_param_est_enc_fx( /* Cx for transport channels */ FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 ) { -#ifdef MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE - CONVERT64(dmx_real_64[ch_idx1], a_fx, a_e ); - CONVERT64(dmx_imag_64[ch_idx1], b_fx, b_e ); +#ifdef MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE + CONVERT64( dmx_real_64[ch_idx1], a_fx, a_e ); + CONVERT64( dmx_imag_64[ch_idx1], b_fx, b_e ); move32(); move32(); move16(); @@ -858,9 +864,9 @@ static void ivas_param_mc_param_est_enc_fx( #endif FOR( ch_idx2 = 0; ch_idx2 < nchan_transport; ++ch_idx2 ) { -#ifdef MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE - CONVERT64(dmx_real_64[ch_idx2], c_fx, c_e ); - CONVERT64(dmx_imag_64[ch_idx2], d_fx, d_e ); +#ifdef MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE + CONVERT64( dmx_real_64[ch_idx2], c_fx, c_e ); + CONVERT64( dmx_imag_64[ch_idx2], d_fx, d_e ); move32(); move32(); move16(); @@ -914,7 +920,6 @@ static void ivas_param_mc_param_est_enc_fx( Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2] = W_add( Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2], W_sub( W_mult0_32_32( a_fx, d_fx ), W_mult0_32_32( b_fx, c_fx ) ) ); move64(); - } } #else @@ -994,7 +999,7 @@ static void ivas_param_mc_param_est_enc_fx( FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 ) { -#ifdef MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE +#ifdef MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE Word64 real_64; Word64 imag_64; @@ -1066,7 +1071,7 @@ static void ivas_param_mc_param_est_enc_fx( /* Cx for transport channels */ FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 ) { -#ifdef MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE +#ifdef MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE CONVERT64( dmx_real_64[ch_idx1], a_fx, a_e ); CONVERT64( dmx_imag_64[ch_idx1], b_fx, b_e ); move32(); @@ -1088,7 +1093,7 @@ static void ivas_param_mc_param_est_enc_fx( #endif FOR( ch_idx2 = 0; ch_idx2 < nchan_transport; ++ch_idx2 ) { -#ifdef MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE +#ifdef MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE CONVERT64( dmx_real_64[ch_idx2], c_fx, c_e ); CONVERT64( dmx_imag_64[ch_idx2], d_fx, d_e ); move32(); -- GitLab From 17ea3a460bdc04b7a9bd55d8cc947e792dfcaefe Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Fri, 25 Apr 2025 10:13:55 +0200 Subject: [PATCH 3/8] removed a warning. --- lib_enc/ivas_mc_param_enc_fx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib_enc/ivas_mc_param_enc_fx.c b/lib_enc/ivas_mc_param_enc_fx.c index 270fbeebd..f7f274b48 100644 --- a/lib_enc/ivas_mc_param_enc_fx.c +++ b/lib_enc/ivas_mc_param_enc_fx.c @@ -752,7 +752,10 @@ static void ivas_param_mc_param_est_enc_fx( #if defined( IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_BE ) || defined( IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE ) Word16 gb = find_guarded_bits_fx( l_ts ); +#ifndef MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE Word16 add20gb = add( 20, gb ); +#endif + #endif FOR( ts = start_ts; ts < num_time_slots; ts++ ) -- GitLab From 0f7493762b47726945321f8656eb9462934a8a6d Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Fri, 25 Apr 2025 12:49:17 +0200 Subject: [PATCH 4/8] moved the define for the merge request into options.h. --- lib_com/options.h | 1 + lib_enc/ivas_mc_param_enc_fx.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_com/options.h b/lib_com/options.h index cbc77b6e7..8cb4f0368 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -86,6 +86,7 @@ //#define HARM_SCE_INIT #define DIV32_OPT_NEWTON /* FhG: faster 32 by 32 bit division */ #define MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE /* FhG: reduce WMOPS of Cy calculation in ivas_param_mc_param_est_enc_fx() by using 64 Bit addition. Obsoletes IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE. */ +#define MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE /* FhG: reduce WMOPS of dmx calculation in ivas_param_mc_param_est_enc_fx() by using 64 Bit addition. Requires MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE. */ #define FIX_1439_SPEEDUP_Copy_Scale_sig_16_32_no_sat /*FhG: reduces WMOPS - bit-exact*/ #define FIX_1439_SPEEDUP_stereo_icBWE_dec_fx /*FhG: reduces WMOPS - bit-exact*/ #define FIX_1439_SPEEDUP_ivas_swb_tbe_dec_fx /*FhG: reduces WMOPS - bit-exact*/ diff --git a/lib_enc/ivas_mc_param_enc_fx.c b/lib_enc/ivas_mc_param_enc_fx.c index f7f274b48..184a982d5 100644 --- a/lib_enc/ivas_mc_param_enc_fx.c +++ b/lib_enc/ivas_mc_param_enc_fx.c @@ -29,7 +29,6 @@ the United Nations Convention on Contracts on the International Sales of Goods. *******************************************************************************************************/ -#define MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE #define CONVERT64( x_64, y_fx, y_e ) \ { \ Word16 norm; \ -- GitLab From 5babfdc8bba989670937eef2dd6d8f3ae5e4a0f3 Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Mon, 28 Apr 2025 10:32:56 +0200 Subject: [PATCH 5/8] macro names. --- lib_com/options.h | 2 +- lib_enc/ivas_mc_param_enc_fx.c | 60 ++++++++++++++++------------------ 2 files changed, 30 insertions(+), 32 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index 8cb4f0368..1b5760f40 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -86,7 +86,7 @@ //#define HARM_SCE_INIT #define DIV32_OPT_NEWTON /* FhG: faster 32 by 32 bit division */ #define MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE /* FhG: reduce WMOPS of Cy calculation in ivas_param_mc_param_est_enc_fx() by using 64 Bit addition. Obsoletes IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE. */ -#define MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE /* FhG: reduce WMOPS of dmx calculation in ivas_param_mc_param_est_enc_fx() by using 64 Bit addition. Requires MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE. */ +#define MERGE_REQUEST_1472_SPEEDUP_ivas_mc_param_enc_fx_NONBE /* FhG: reduce WMOPS of dmx calculation in ivas_param_mc_param_est_enc_fx() by using 64 Bit addition. Requires MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE. */ #define FIX_1439_SPEEDUP_Copy_Scale_sig_16_32_no_sat /*FhG: reduces WMOPS - bit-exact*/ #define FIX_1439_SPEEDUP_stereo_icBWE_dec_fx /*FhG: reduces WMOPS - bit-exact*/ #define FIX_1439_SPEEDUP_ivas_swb_tbe_dec_fx /*FhG: reduces WMOPS - bit-exact*/ diff --git a/lib_enc/ivas_mc_param_enc_fx.c b/lib_enc/ivas_mc_param_enc_fx.c index 184a982d5..41d15c241 100644 --- a/lib_enc/ivas_mc_param_enc_fx.c +++ b/lib_enc/ivas_mc_param_enc_fx.c @@ -29,7 +29,16 @@ the United Nations Convention on Contracts on the International Sales of Goods. *******************************************************************************************************/ -#define CONVERT64( x_64, y_fx, y_e ) \ + +// helper macros to convert the 64 bitt accumulators into the 48 bit float format +#define CONVERT_CY( x_64, y_fx, y_e ) \ + { \ + Word16 norm; \ + norm = W_norm( x_64 ); \ + y_fx = W_extract_h( W_shl( x_64, norm ) ); \ + y_e = sub( sub( 62, gb ), norm ); \ + } +#define CONVERT_DMX( x_64, y_fx, y_e ) \ { \ Word16 norm; \ norm = W_norm( x_64 ); \ @@ -655,7 +664,7 @@ static void ivas_param_mc_param_est_enc_fx( Word32 *p_slot_frame_f_real_fx[MAX_CICP_CHANNELS]; /* Output of the MDFT FB - real part */ Word32 *p_slot_frame_f_imag_fx[MAX_CICP_CHANNELS]; /* Output of the MDFT FB - imag part */ -#ifdef MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE +#ifdef MERGE_REQUEST_1472_SPEEDUP_ivas_mc_param_enc_fx_NONBE Word64 dmx_real_64[PARAM_MC_MAX_TRANSPORT_CHANS]; Word64 dmx_imag_64[PARAM_MC_MAX_TRANSPORT_CHANS]; #else @@ -751,7 +760,7 @@ static void ivas_param_mc_param_est_enc_fx( #if defined( IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_BE ) || defined( IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE ) Word16 gb = find_guarded_bits_fx( l_ts ); -#ifndef MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE +#ifndef MERGE_REQUEST_1472_SPEEDUP_ivas_mc_param_enc_fx_NONBE Word16 add20gb = add( 20, gb ); #endif @@ -788,7 +797,7 @@ static void ivas_param_mc_param_est_enc_fx( FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 ) { -#ifdef MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE +#ifdef MERGE_REQUEST_1472_SPEEDUP_ivas_mc_param_enc_fx_NONBE Word64 real_64; Word64 imag_64; @@ -856,9 +865,9 @@ static void ivas_param_mc_param_est_enc_fx( /* Cx for transport channels */ FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 ) { -#ifdef MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE - CONVERT64( dmx_real_64[ch_idx1], a_fx, a_e ); - CONVERT64( dmx_imag_64[ch_idx1], b_fx, b_e ); +#ifdef MERGE_REQUEST_1472_SPEEDUP_ivas_mc_param_enc_fx_NONBE + CONVERT_DMX( dmx_real_64[ch_idx1], a_fx, a_e ); + CONVERT_DMX( dmx_imag_64[ch_idx1], b_fx, b_e ); move32(); move32(); move16(); @@ -866,9 +875,9 @@ static void ivas_param_mc_param_est_enc_fx( #endif FOR( ch_idx2 = 0; ch_idx2 < nchan_transport; ++ch_idx2 ) { -#ifdef MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE - CONVERT64( dmx_real_64[ch_idx2], c_fx, c_e ); - CONVERT64( dmx_imag_64[ch_idx2], d_fx, d_e ); +#ifdef MERGE_REQUEST_1472_SPEEDUP_ivas_mc_param_enc_fx_NONBE + CONVERT_DMX( dmx_real_64[ch_idx2], c_fx, c_e ); + CONVERT_DMX( dmx_imag_64[ch_idx2], d_fx, d_e ); move32(); move32(); move16(); @@ -1001,7 +1010,7 @@ static void ivas_param_mc_param_est_enc_fx( FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 ) { -#ifdef MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE +#ifdef MERGE_REQUEST_1472_SPEEDUP_ivas_mc_param_enc_fx_NONBE Word64 real_64; Word64 imag_64; @@ -1073,9 +1082,9 @@ static void ivas_param_mc_param_est_enc_fx( /* Cx for transport channels */ FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 ) { -#ifdef MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE - CONVERT64( dmx_real_64[ch_idx1], a_fx, a_e ); - CONVERT64( dmx_imag_64[ch_idx1], b_fx, b_e ); +#ifdef MERGE_REQUEST_1472_SPEEDUP_ivas_mc_param_enc_fx_NONBE + CONVERT_DMX( dmx_real_64[ch_idx1], a_fx, a_e ); + CONVERT_DMX( dmx_imag_64[ch_idx1], b_fx, b_e ); move32(); move32(); move16(); @@ -1095,9 +1104,9 @@ static void ivas_param_mc_param_est_enc_fx( #endif FOR( ch_idx2 = 0; ch_idx2 < nchan_transport; ++ch_idx2 ) { -#ifdef MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE - CONVERT64( dmx_real_64[ch_idx2], c_fx, c_e ); - CONVERT64( dmx_imag_64[ch_idx2], d_fx, d_e ); +#ifdef MERGE_REQUEST_1472_SPEEDUP_ivas_mc_param_enc_fx_NONBE + CONVERT_DMX( dmx_real_64[ch_idx2], c_fx, c_e ); + CONVERT_DMX( dmx_imag_64[ch_idx2], d_fx, d_e ); move32(); move32(); move16(); @@ -1310,11 +1319,7 @@ static void ivas_param_mc_param_est_enc_fx( FOR( k = 0; k < nchan_input; ++k ) { #ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE - Word16 norm; - // convert the 64 bit fixpoint back into the 48 bit float format - norm = W_norm( Cy_sum_real_64[cur_param_band][k][k] ); - Nrg_fx[k] = W_extract_h( W_shl( Cy_sum_real_64[cur_param_band][k][k], norm ) ); - Nrg_e[k] = sub( sub( 62, gb ), norm ); + CONVERT_CY( Cy_sum_real_64[cur_param_band][k][k], Nrg_fx[k], Nrg_e[k] ); #else Nrg_fx[k] = Cy_sum_fx[cur_param_band][k][k]; move32(); @@ -1451,10 +1456,7 @@ static void ivas_param_mc_param_est_enc_fx( { FOR( ch_idx2 = 0; ch_idx2 < MAX_CICP_CHANNELS; ch_idx2++ ) { - Word16 norm; - norm = W_norm( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] ); - Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2] = W_extract_h( W_shl( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2], norm ) ); - Cy_sum_e[cur_param_band][ch_idx1][ch_idx2] = sub( sub( 62, gb ), norm ); + CONVERT_CY( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2], Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cy_sum_e[cur_param_band][ch_idx1][ch_idx2] ); } } } @@ -1497,11 +1499,7 @@ static void ivas_param_mc_param_est_enc_fx( FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ch_idx2++ ) { #ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE - Word16 norm; - // convert the 64 bit fixpoint back into the 48 bit float format - norm = W_norm( Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2] ); - imag_part_fx = W_extract_h( W_shl( Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2], norm ) ); - imag_part_e = sub( sub( 62, gb ), norm ); + CONVERT_CY( Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2], imag_part_fx, imag_part_e ); #else imag_part_fx = Cy_sum_imag_fx[cur_param_band][ch_idx1][ch_idx2]; move32(); -- GitLab From 5251c4791f48fa8ded63157ade33db356f7a9c8b Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Mon, 28 Apr 2025 10:42:27 +0200 Subject: [PATCH 6/8] clang patch applied. --- lib_enc/ivas_mc_param_enc_fx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib_enc/ivas_mc_param_enc_fx.c b/lib_enc/ivas_mc_param_enc_fx.c index 41d15c241..41d35f8e3 100644 --- a/lib_enc/ivas_mc_param_enc_fx.c +++ b/lib_enc/ivas_mc_param_enc_fx.c @@ -31,14 +31,14 @@ *******************************************************************************************************/ // helper macros to convert the 64 bitt accumulators into the 48 bit float format -#define CONVERT_CY( x_64, y_fx, y_e ) \ +#define CONVERT_CY( x_64, y_fx, y_e ) \ { \ Word16 norm; \ norm = W_norm( x_64 ); \ y_fx = W_extract_h( W_shl( x_64, norm ) ); \ y_e = sub( sub( 62, gb ), norm ); \ } -#define CONVERT_DMX( x_64, y_fx, y_e ) \ +#define CONVERT_DMX( x_64, y_fx, y_e ) \ { \ Word16 norm; \ norm = W_norm( x_64 ); \ -- GitLab From 43fb033f99043f6723c40178cb5916e183dd7ed5 Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Tue, 29 Apr 2025 10:56:24 +0200 Subject: [PATCH 7/8] applied some patches. --- lib_enc/ivas_mc_param_enc_fx.c | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/lib_enc/ivas_mc_param_enc_fx.c b/lib_enc/ivas_mc_param_enc_fx.c index 41d35f8e3..bf7e49cbb 100644 --- a/lib_enc/ivas_mc_param_enc_fx.c +++ b/lib_enc/ivas_mc_param_enc_fx.c @@ -36,14 +36,14 @@ Word16 norm; \ norm = W_norm( x_64 ); \ y_fx = W_extract_h( W_shl( x_64, norm ) ); \ - y_e = sub( sub( 62, gb ), norm ); \ + y_e = sub( sub62gb, norm ); \ } #define CONVERT_DMX( x_64, y_fx, y_e ) \ { \ Word16 norm; \ norm = W_norm( x_64 ); \ y_fx = W_extract_h( W_shl( x_64, norm ) ); \ - y_e = sub( sub( 35, gb ), norm ); \ + y_e = sub( sub35gb, norm ); \ } #include #include @@ -678,6 +678,8 @@ static void ivas_param_mc_param_est_enc_fx( #ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE Word64 Cy_sum_real_64[PARAM_MC_MAX_PARAMETER_BANDS][MAX_CICP_CHANNELS][MAX_CICP_CHANNELS]; Word64 Cy_sum_imag_64[PARAM_MC_MAX_PARAM_BAND_ABS_COV_ENC][MAX_CICP_CHANNELS][MAX_CICP_CHANNELS]; + Word16 sub62gb; + Word16 sub35gb; #else Word32 Cy_sum_imag_fx[PARAM_MC_MAX_PARAM_BAND_ABS_COV_ENC][MAX_CICP_CHANNELS][MAX_CICP_CHANNELS]; Word16 Cy_sum_imag_e[PARAM_MC_MAX_PARAM_BAND_ABS_COV_ENC][MAX_CICP_CHANNELS][MAX_CICP_CHANNELS]; @@ -764,6 +766,10 @@ static void ivas_param_mc_param_est_enc_fx( Word16 add20gb = add( 20, gb ); #endif +#endif +#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE + sub35gb = sub( 35, find_guarded_bits_fx( l_ts ) ); + sub62gb = sub( 62, find_guarded_bits_fx( l_ts ) ); #endif FOR( ts = start_ts; ts < num_time_slots; ts++ ) @@ -868,21 +874,12 @@ static void ivas_param_mc_param_est_enc_fx( #ifdef MERGE_REQUEST_1472_SPEEDUP_ivas_mc_param_enc_fx_NONBE CONVERT_DMX( dmx_real_64[ch_idx1], a_fx, a_e ); CONVERT_DMX( dmx_imag_64[ch_idx1], b_fx, b_e ); - move32(); - move32(); - move16(); - move16(); #endif FOR( ch_idx2 = 0; ch_idx2 < nchan_transport; ++ch_idx2 ) { #ifdef MERGE_REQUEST_1472_SPEEDUP_ivas_mc_param_enc_fx_NONBE CONVERT_DMX( dmx_real_64[ch_idx2], c_fx, c_e ); CONVERT_DMX( dmx_imag_64[ch_idx2], d_fx, d_e ); - move32(); - move32(); - move16(); - move16(); - #else a_fx = dmx_real_fx[ch_idx1]; move32(); @@ -1085,10 +1082,6 @@ static void ivas_param_mc_param_est_enc_fx( #ifdef MERGE_REQUEST_1472_SPEEDUP_ivas_mc_param_enc_fx_NONBE CONVERT_DMX( dmx_real_64[ch_idx1], a_fx, a_e ); CONVERT_DMX( dmx_imag_64[ch_idx1], b_fx, b_e ); - move32(); - move32(); - move16(); - move16(); #else #ifdef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_BE a_fx = dmx_real_fx[ch_idx1]; @@ -1107,10 +1100,6 @@ static void ivas_param_mc_param_est_enc_fx( #ifdef MERGE_REQUEST_1472_SPEEDUP_ivas_mc_param_enc_fx_NONBE CONVERT_DMX( dmx_real_64[ch_idx2], c_fx, c_e ); CONVERT_DMX( dmx_imag_64[ch_idx2], d_fx, d_e ); - move32(); - move32(); - move16(); - move16(); /* (a-ib)(c+id) = ac + bd + i(ad-bc) */ L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, c_fx ), add( a_e, c_e ), Mpy_32_32( b_fx, d_fx ), add( b_e, d_e ), &tmp_e ); @@ -1320,6 +1309,8 @@ static void ivas_param_mc_param_est_enc_fx( { #ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE CONVERT_CY( Cy_sum_real_64[cur_param_band][k][k], Nrg_fx[k], Nrg_e[k] ); + move32(); + move16(); #else Nrg_fx[k] = Cy_sum_fx[cur_param_band][k][k]; move32(); @@ -1457,6 +1448,8 @@ static void ivas_param_mc_param_est_enc_fx( FOR( ch_idx2 = 0; ch_idx2 < MAX_CICP_CHANNELS; ch_idx2++ ) { CONVERT_CY( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2], Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cy_sum_e[cur_param_band][ch_idx1][ch_idx2] ); + move32(); + move16(); } } } @@ -1500,6 +1493,8 @@ static void ivas_param_mc_param_est_enc_fx( { #ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE CONVERT_CY( Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2], imag_part_fx, imag_part_e ); + move32(); + move16(); #else imag_part_fx = Cy_sum_imag_fx[cur_param_band][ch_idx1][ch_idx2]; move32(); -- GitLab From 38547d2ea286f5d0cadcb8a30ab33dbcae3f93b0 Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Tue, 29 Apr 2025 12:09:06 +0200 Subject: [PATCH 8/8] applied the clang patch. --- lib_enc/ivas_mc_param_enc_fx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib_enc/ivas_mc_param_enc_fx.c b/lib_enc/ivas_mc_param_enc_fx.c index bf7e49cbb..3058f87f3 100644 --- a/lib_enc/ivas_mc_param_enc_fx.c +++ b/lib_enc/ivas_mc_param_enc_fx.c @@ -36,14 +36,14 @@ Word16 norm; \ norm = W_norm( x_64 ); \ y_fx = W_extract_h( W_shl( x_64, norm ) ); \ - y_e = sub( sub62gb, norm ); \ + y_e = sub( sub62gb, norm ); \ } #define CONVERT_DMX( x_64, y_fx, y_e ) \ { \ Word16 norm; \ norm = W_norm( x_64 ); \ y_fx = W_extract_h( W_shl( x_64, norm ) ); \ - y_e = sub( sub35gb, norm ); \ + y_e = sub( sub35gb, norm ); \ } #include #include -- GitLab