From c25105a8f3c6d710462a10a80ab5aa563b5a00a4 Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Mon, 7 Apr 2025 11:46:08 +0200 Subject: [PATCH 1/5] replacingthe basop_util_add_mant2exp() for cy_sum and cy_sum_imag in ivas_param_mc_param_est_enc_fx(). --- lib_enc/ivas_mc_param_enc_fx.c | 99 ++++++++++++++++++++++++++++++++-- 1 file changed, 94 insertions(+), 5 deletions(-) diff --git a/lib_enc/ivas_mc_param_enc_fx.c b/lib_enc/ivas_mc_param_enc_fx.c index 54fe249cd..67037e0ad 100644 --- a/lib_enc/ivas_mc_param_enc_fx.c +++ b/lib_enc/ivas_mc_param_enc_fx.c @@ -30,6 +30,7 @@ *******************************************************************************************************/ +#define MERGE_REQUEST_1462 #include #include #include "options.h" @@ -59,7 +60,7 @@ static void ivas_param_mc_range_encoder_fx( const Word16 *seq_in, const Word16 n #define ATTACKTHRESHOLD_E 4 -static void ivas_param_mc_quantize_ilds_fx( PARAM_MC_ENC_HANDLE hParamMC, Word32 Cy_fx[MAX_CICP_CHANNELS][MAX_CICP_CHANNELS], Word16 Cy_e[MAX_CICP_CHANNELS][MAX_CICP_CHANNELS], Word32 Cx[PARAM_MC_MAX_TRANSPORT_CHANS][PARAM_MC_MAX_TRANSPORT_CHANS], Word16 Cx_fx[PARAM_MC_MAX_TRANSPORT_CHANS][PARAM_MC_MAX_TRANSPORT_CHANS], const Word16 freq_idx, const Word16 nchan_input, const Word16 nchan_transport, Word16 *ILD_idx_out, Word16 ILD_q[PARAM_MC_SZ_ILD_MAP] ); +static void ivas_param_mc_quantize_ilds_fx( PARAM_MC_ENC_HANDLE hParamMC, Word32 Cy_fx[MAX_CICP_CHANNELS][MAX_CICP_CHANNELS], Word16 Cy_e[MAX_CICP_CHANNELS][MAX_CICP_CHANNELS], Word32 Cx_fx[PARAM_MC_MAX_TRANSPORT_CHANS][PARAM_MC_MAX_TRANSPORT_CHANS], Word16 Cx_e[PARAM_MC_MAX_TRANSPORT_CHANS][PARAM_MC_MAX_TRANSPORT_CHANS], const Word16 freq_idx, const Word16 nchan_input, const Word16 nchan_transport, Word16 *ILD_idx_out, Word16 ILD_q[PARAM_MC_SZ_ILD_MAP] ); static void ivas_param_mc_parameter_quantizer_fx( const Word32 *x, const Word16 *x_e, const Word16 L, const Word16 sz_quantizer, const Word16 *quantizer_fx, const Word16 Q_quant, Word16 *quant_idx, Word16 *y ); @@ -655,8 +656,13 @@ static void ivas_param_mc_param_est_enc_fx( Word16 dmx_imag_e[PARAM_MC_MAX_TRANSPORT_CHANS]; /* Downmix channel - Imag Part */ Word32 a_fx, b_fx, c_fx, d_fx; /* Tmp complex values */ Word16 a_e, b_e, c_e, d_e; /* Tmp complex values */ +#ifdef MERGE_REQUEST_1462 + Word64 Cy_sum_real_64[PARAM_MC_MAX_PARAMETER_BANDS][MAX_CICP_CHANNELS][MAX_CICP_CHANNELS]; + Word64 Cy_sum_imag_64[PARAM_MC_MAX_PARAM_BAND_ABS_COV_ENC][MAX_CICP_CHANNELS][MAX_CICP_CHANNELS]; +#else Word32 Cy_sum_imag_fx[PARAM_MC_MAX_PARAM_BAND_ABS_COV_ENC][MAX_CICP_CHANNELS][MAX_CICP_CHANNELS]; Word16 Cy_sum_imag_e[PARAM_MC_MAX_PARAM_BAND_ABS_COV_ENC][MAX_CICP_CHANNELS][MAX_CICP_CHANNELS]; +#endif Word32 Cx_sum_imag_fx[PARAM_MC_MAX_PARAM_BAND_ABS_COV_ENC][PARAM_MC_MAX_TRANSPORT_CHANS][PARAM_MC_MAX_TRANSPORT_CHANS]; Word16 Cx_sum_imag_e[PARAM_MC_MAX_PARAM_BAND_ABS_COV_ENC][PARAM_MC_MAX_TRANSPORT_CHANS][PARAM_MC_MAX_TRANSPORT_CHANS]; Word32 real_part_fx, imag_part_fx; @@ -685,13 +691,26 @@ static void ivas_param_mc_param_est_enc_fx( move16(); band_step = 1; move16(); +#ifdef MERGE_REQUEST_1462 + FOR( cur_param_band = 0; cur_param_band < PARAM_MC_MAX_PARAMETER_BANDS; cur_param_band++ ) + { + FOR( ch_idx1 = 0; ch_idx1 < MAX_CICP_CHANNELS; ch_idx1++ ) + { + set64_fx( Cy_sum_real_64[cur_param_band][ch_idx1], 0, MAX_CICP_CHANNELS ); + } + } +#endif FOR( cur_param_band = 0; cur_param_band < PARAM_MC_MAX_PARAM_BAND_ABS_COV_ENC; cur_param_band++ ) { FOR( ch_idx1 = 0; ch_idx1 < MAX_CICP_CHANNELS; ch_idx1++ ) { - set32_fx( Cy_sum_imag_fx[cur_param_band][ch_idx1], 0, MAX_CICP_CHANNELS ); - set16_fx( Cy_sum_imag_e[cur_param_band][ch_idx1], 0, MAX_CICP_CHANNELS ); +#ifdef MERGE_REQUEST_1462 + set64_fx( Cy_sum_imag_64[cur_param_band][ch_idx1], 0, MAX_CICP_CHANNELS ); +#else + set32_fx( Cy_sum_fx[cur_param_band][ch_idx1], 0, MAX_CICP_CHANNELS ); + set16_fx( Cy_sum_e[cur_param_band][ch_idx1], 0, MAX_CICP_CHANNELS ); +#endif } FOR( ch_idx1 = 0; ch_idx1 < PARAM_MC_MAX_TRANSPORT_CHANS; ch_idx1++ ) @@ -832,7 +851,33 @@ static void ivas_param_mc_param_est_enc_fx( move32(); } } - +#ifdef MERGE_REQUEST_1462 + FOR( ch_idx1 = 0; ch_idx1 < nchan_input; ++ch_idx1 ) + { + a_fx = slot_frame_f_real_fx[ch_idx1][cur_cldfb_band]; + b_fx = slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band]; + move32(); + FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 ) + { + Word16 norm; + c_fx = slot_frame_f_real_fx[ch_idx2][cur_cldfb_band]; + d_fx = slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band]; + move32(); +// Conjugated complex multiplication (a-ib)(c+id) = ac+bd + i(ad-bc) + Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] = W_add( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2], + W_add( W_mult0_32_32( a_fx, c_fx ), W_mult0_32_32( b_fx, d_fx ) ) ); + move64(); + Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2] = W_add( Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2], + W_sub( W_mult0_32_32( a_fx, d_fx ), W_mult0_32_32( b_fx, c_fx ) ) ); + move64(); + + // convert the 64 bit fixpoint back into the 48 bit float format + norm = W_norm( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] ); + Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2] = W_extract_h( W_shl( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2], norm ) ); + Cy_sum_e[cur_param_band][ch_idx1][ch_idx2] = sub( sub( 62, gb ), norm ); + } + } +#else /* Cy for input channels */ FOR( ch_idx1 = 0; ch_idx1 < nchan_input; ++ch_idx1 ) { @@ -890,6 +935,7 @@ static void ivas_param_mc_param_est_enc_fx( move32(); } } +#endif } } @@ -1023,6 +1069,28 @@ static void ivas_param_mc_param_est_enc_fx( move16(); } #endif +#ifdef MERGE_REQUEST_1462 + a_fx = slot_frame_f_real_fx[ch_idx1][cur_cldfb_band]; + b_fx = slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band]; + move32(); + move32(); + FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 ) + { + Word16 norm; + c_fx = slot_frame_f_real_fx[ch_idx2][cur_cldfb_band]; + d_fx = slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band]; + move32(); + move32(); +// Conjugated complex multiplication (a-ib)(c+id) = ac+bd + i(ad-bc) + Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] = W_add( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2], + W_add( W_mult0_32_32( a_fx, c_fx ), W_mult0_32_32( b_fx, d_fx ) ) ); + move64(); + // convert the 64 bit fixpoint back into the 48 bit float format + norm = W_norm( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] ); + Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2] = W_extract_h( W_shl( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2], norm ) ); + Cy_sum_e[cur_param_band][ch_idx1][ch_idx2] = sub( sub( 62, gb ), norm ); + } +#else FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 ) { #ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE @@ -1054,6 +1122,7 @@ static void ivas_param_mc_param_est_enc_fx( &Cy_sum_e[cur_param_band][ch_idx1][ch_idx2] ); move32(); } +#endif } } } @@ -1075,6 +1144,12 @@ static void ivas_param_mc_param_est_enc_fx( move32(); Cy_sum_e[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0; move16(); +#ifdef MERGE_REQUEST_1462 + Cy_sum_imag_64[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0; + move64(); + Cy_sum_imag_64[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0; + move64(); +#else Cy_sum_imag_fx[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0; move32(); Cy_sum_imag_e[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0; @@ -1083,6 +1158,7 @@ static void ivas_param_mc_param_est_enc_fx( move32(); Cy_sum_imag_e[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0; move16(); +#endif } } @@ -1208,10 +1284,16 @@ static void ivas_param_mc_param_est_enc_fx( Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cy_sum_e[cur_param_band][ch_idx1][ch_idx2], &Cy_sum_e[cur_param_band - 1][ch_idx1][ch_idx2] ); move32(); +#ifdef MERGE_REQUEST_1462 + Cy_sum_imag_64[cur_param_band - 1][ch_idx1][ch_idx2] = W_add( Cy_sum_imag_64[cur_param_band - 1][ch_idx1][ch_idx2], Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2] ); + move64(); + +#else Cy_sum_imag_fx[cur_param_band - 1][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cy_sum_imag_fx[cur_param_band - 1][ch_idx1][ch_idx2], Cy_sum_imag_e[cur_param_band - 1][ch_idx1][ch_idx2], Cy_sum_imag_fx[cur_param_band][ch_idx1][ch_idx2], Cy_sum_imag_e[cur_param_band][ch_idx1][ch_idx2], &Cy_sum_imag_e[cur_param_band - 1][ch_idx1][ch_idx2] ); move32(); +#endif } } } @@ -1284,15 +1366,22 @@ static void ivas_param_mc_param_est_enc_fx( { FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ch_idx2++ ) { + Word16 norm; real_part_fx = Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2]; move32(); real_part_e = Cy_sum_e[cur_param_band][ch_idx1][ch_idx2]; move16(); +#ifdef MERGE_REQUEST_1462 + // convert the 64 bit fixpoint back into the 48 bit float format + norm = W_norm( Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2] ); + imag_part_fx = W_extract_h( W_shl( Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2], norm ) ); + imag_part_e = sub( (62-gb ), norm ); +#else imag_part_fx = Cy_sum_imag_fx[cur_param_band][ch_idx1][ch_idx2]; move32(); imag_part_e = Cy_sum_imag_e[cur_param_band][ch_idx1][ch_idx2]; move16(); - +#endif real_part_fx = Mpy_32_32( real_part_fx, real_part_fx ); imag_part_fx = Mpy_32_32( imag_part_fx, imag_part_fx ); -- GitLab From 3f8794eb82a9c036fd20dd7451c00fd2166ccc5d Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Mon, 7 Apr 2025 11:52:36 +0200 Subject: [PATCH 2/5] applied the clang patch. --- lib_enc/ivas_mc_param_enc_fx.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/lib_enc/ivas_mc_param_enc_fx.c b/lib_enc/ivas_mc_param_enc_fx.c index 67037e0ad..ef1e93405 100644 --- a/lib_enc/ivas_mc_param_enc_fx.c +++ b/lib_enc/ivas_mc_param_enc_fx.c @@ -30,7 +30,7 @@ *******************************************************************************************************/ -#define MERGE_REQUEST_1462 +#define MERGE_REQUEST_1462 #include #include #include "options.h" @@ -691,7 +691,7 @@ static void ivas_param_mc_param_est_enc_fx( move16(); band_step = 1; move16(); -#ifdef MERGE_REQUEST_1462 +#ifdef MERGE_REQUEST_1462 FOR( cur_param_band = 0; cur_param_band < PARAM_MC_MAX_PARAMETER_BANDS; cur_param_band++ ) { FOR( ch_idx1 = 0; ch_idx1 < MAX_CICP_CHANNELS; ch_idx1++ ) @@ -705,7 +705,7 @@ static void ivas_param_mc_param_est_enc_fx( { FOR( ch_idx1 = 0; ch_idx1 < MAX_CICP_CHANNELS; ch_idx1++ ) { -#ifdef MERGE_REQUEST_1462 +#ifdef MERGE_REQUEST_1462 set64_fx( Cy_sum_imag_64[cur_param_band][ch_idx1], 0, MAX_CICP_CHANNELS ); #else set32_fx( Cy_sum_fx[cur_param_band][ch_idx1], 0, MAX_CICP_CHANNELS ); @@ -863,12 +863,12 @@ static void ivas_param_mc_param_est_enc_fx( c_fx = slot_frame_f_real_fx[ch_idx2][cur_cldfb_band]; d_fx = slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band]; move32(); -// Conjugated complex multiplication (a-ib)(c+id) = ac+bd + i(ad-bc) - Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] = W_add( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2], - W_add( W_mult0_32_32( a_fx, c_fx ), W_mult0_32_32( b_fx, d_fx ) ) ); + // Conjugated complex multiplication (a-ib)(c+id) = ac+bd + i(ad-bc) + Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] = W_add( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2], + W_add( W_mult0_32_32( a_fx, c_fx ), W_mult0_32_32( b_fx, d_fx ) ) ); move64(); - Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2] = W_add( Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2], - W_sub( W_mult0_32_32( a_fx, d_fx ), W_mult0_32_32( b_fx, c_fx ) ) ); + Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2] = W_add( Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2], + W_sub( W_mult0_32_32( a_fx, d_fx ), W_mult0_32_32( b_fx, c_fx ) ) ); move64(); // convert the 64 bit fixpoint back into the 48 bit float format @@ -1081,9 +1081,9 @@ static void ivas_param_mc_param_est_enc_fx( d_fx = slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band]; move32(); move32(); -// Conjugated complex multiplication (a-ib)(c+id) = ac+bd + i(ad-bc) - Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] = W_add( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2], - W_add( W_mult0_32_32( a_fx, c_fx ), W_mult0_32_32( b_fx, d_fx ) ) ); + // Conjugated complex multiplication (a-ib)(c+id) = ac+bd + i(ad-bc) + Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] = W_add( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2], + W_add( W_mult0_32_32( a_fx, c_fx ), W_mult0_32_32( b_fx, d_fx ) ) ); move64(); // convert the 64 bit fixpoint back into the 48 bit float format norm = W_norm( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] ); @@ -1375,7 +1375,7 @@ static void ivas_param_mc_param_est_enc_fx( // convert the 64 bit fixpoint back into the 48 bit float format norm = W_norm( Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2] ); imag_part_fx = W_extract_h( W_shl( Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2], norm ) ); - imag_part_e = sub( (62-gb ), norm ); + imag_part_e = sub( sub( 62, gb ), norm ); #else imag_part_fx = Cy_sum_imag_fx[cur_param_band][ch_idx1][ch_idx2]; move32(); -- GitLab From f3f44d9fad0a858398c13400e72915b21f9ba559 Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Wed, 9 Apr 2025 09:51:06 +0200 Subject: [PATCH 3/5] made the define name a bit more clearer --- lib_enc/ivas_mc_param_enc_fx.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/lib_enc/ivas_mc_param_enc_fx.c b/lib_enc/ivas_mc_param_enc_fx.c index ef1e93405..6ac7c3428 100644 --- a/lib_enc/ivas_mc_param_enc_fx.c +++ b/lib_enc/ivas_mc_param_enc_fx.c @@ -30,7 +30,7 @@ *******************************************************************************************************/ -#define MERGE_REQUEST_1462 +#define MERGE_REQUEST_1378_TO_ISSUE_1462 #include #include #include "options.h" @@ -656,7 +656,7 @@ static void ivas_param_mc_param_est_enc_fx( Word16 dmx_imag_e[PARAM_MC_MAX_TRANSPORT_CHANS]; /* Downmix channel - Imag Part */ Word32 a_fx, b_fx, c_fx, d_fx; /* Tmp complex values */ Word16 a_e, b_e, c_e, d_e; /* Tmp complex values */ -#ifdef MERGE_REQUEST_1462 +#ifdef MERGE_REQUEST_1378_TO_ISSUE_1462 Word64 Cy_sum_real_64[PARAM_MC_MAX_PARAMETER_BANDS][MAX_CICP_CHANNELS][MAX_CICP_CHANNELS]; Word64 Cy_sum_imag_64[PARAM_MC_MAX_PARAM_BAND_ABS_COV_ENC][MAX_CICP_CHANNELS][MAX_CICP_CHANNELS]; #else @@ -691,7 +691,7 @@ static void ivas_param_mc_param_est_enc_fx( move16(); band_step = 1; move16(); -#ifdef MERGE_REQUEST_1462 +#ifdef MERGE_REQUEST_1378_TO_ISSUE_1462 FOR( cur_param_band = 0; cur_param_band < PARAM_MC_MAX_PARAMETER_BANDS; cur_param_band++ ) { FOR( ch_idx1 = 0; ch_idx1 < MAX_CICP_CHANNELS; ch_idx1++ ) @@ -705,7 +705,7 @@ static void ivas_param_mc_param_est_enc_fx( { FOR( ch_idx1 = 0; ch_idx1 < MAX_CICP_CHANNELS; ch_idx1++ ) { -#ifdef MERGE_REQUEST_1462 +#ifdef MERGE_REQUEST_1378_TO_ISSUE_1462 set64_fx( Cy_sum_imag_64[cur_param_band][ch_idx1], 0, MAX_CICP_CHANNELS ); #else set32_fx( Cy_sum_fx[cur_param_band][ch_idx1], 0, MAX_CICP_CHANNELS ); @@ -851,7 +851,7 @@ static void ivas_param_mc_param_est_enc_fx( move32(); } } -#ifdef MERGE_REQUEST_1462 +#ifdef MERGE_REQUEST_1378_TO_ISSUE_1462 FOR( ch_idx1 = 0; ch_idx1 < nchan_input; ++ch_idx1 ) { a_fx = slot_frame_f_real_fx[ch_idx1][cur_cldfb_band]; @@ -1069,7 +1069,7 @@ static void ivas_param_mc_param_est_enc_fx( move16(); } #endif -#ifdef MERGE_REQUEST_1462 +#ifdef MERGE_REQUEST_1378_TO_ISSUE_1462 a_fx = slot_frame_f_real_fx[ch_idx1][cur_cldfb_band]; b_fx = slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band]; move32(); @@ -1144,7 +1144,7 @@ static void ivas_param_mc_param_est_enc_fx( move32(); Cy_sum_e[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0; move16(); -#ifdef MERGE_REQUEST_1462 +#ifdef MERGE_REQUEST_1378_TO_ISSUE_1462 Cy_sum_imag_64[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0; move64(); Cy_sum_imag_64[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0; @@ -1284,7 +1284,7 @@ static void ivas_param_mc_param_est_enc_fx( Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cy_sum_e[cur_param_band][ch_idx1][ch_idx2], &Cy_sum_e[cur_param_band - 1][ch_idx1][ch_idx2] ); move32(); -#ifdef MERGE_REQUEST_1462 +#ifdef MERGE_REQUEST_1378_TO_ISSUE_1462 Cy_sum_imag_64[cur_param_band - 1][ch_idx1][ch_idx2] = W_add( Cy_sum_imag_64[cur_param_band - 1][ch_idx1][ch_idx2], Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2] ); move64(); @@ -1371,7 +1371,7 @@ static void ivas_param_mc_param_est_enc_fx( move32(); real_part_e = Cy_sum_e[cur_param_band][ch_idx1][ch_idx2]; move16(); -#ifdef MERGE_REQUEST_1462 +#ifdef MERGE_REQUEST_1378_TO_ISSUE_1462 // convert the 64 bit fixpoint back into the 48 bit float format norm = W_norm( Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2] ); imag_part_fx = W_extract_h( W_shl( Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2], norm ) ); -- GitLab From 740f4864ad452d7ddab3eed1e00868aaef140e27 Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Wed, 9 Apr 2025 12:20:36 +0200 Subject: [PATCH 4/5] Properly named the define in the options.h file. --- lib_com/options.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index eb0054c53..e8a44d873 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -78,10 +78,10 @@ #define FIX_1310_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot /*FhG: WMOPS tuning, nonbe*/ /* Both following 2 macros (IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST*) are independent from each other, they refer to different code blocks */ #define IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_BE /* FhG: reduces WMOPS of param_mc_prm_est, bit-exact to previous version */ -#define IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE /* FhG: reduces WMOPS of param_mc_prm_est, not bit-exact to previous version */ +//#define IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE /* FhG: reduces WMOPS of param_mc_prm_est, not bit-exact to previous version. Obsoleted by MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE. */ #define HARM_PUSH_BIT #define HARM_ENC_INIT //#define HARM_SCE_INIT #define DIV32_OPT_NEWTON /* FhG: faster 32 by 32 bit division */ - +#define MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE /* FhG: reduce WMOPS of Cy calculation in ivas_param_mc_param_est_enc_fx() by using 64 Bit addition. Obsoletes IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE. */ #endif -- GitLab From e586b634bcd9608e2a73c7fb5dadb4938f4c4c59 Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Thu, 10 Apr 2025 11:26:04 +0200 Subject: [PATCH 5/5] Updated the macro names in the ivas_mc_param_enc_fx.c file. --- lib_enc/ivas_mc_param_enc_fx.c | 76 ++++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 18 deletions(-) diff --git a/lib_enc/ivas_mc_param_enc_fx.c b/lib_enc/ivas_mc_param_enc_fx.c index 6ac7c3428..d0316be0f 100644 --- a/lib_enc/ivas_mc_param_enc_fx.c +++ b/lib_enc/ivas_mc_param_enc_fx.c @@ -30,7 +30,6 @@ *******************************************************************************************************/ -#define MERGE_REQUEST_1378_TO_ISSUE_1462 #include #include #include "options.h" @@ -656,7 +655,7 @@ static void ivas_param_mc_param_est_enc_fx( Word16 dmx_imag_e[PARAM_MC_MAX_TRANSPORT_CHANS]; /* Downmix channel - Imag Part */ Word32 a_fx, b_fx, c_fx, d_fx; /* Tmp complex values */ Word16 a_e, b_e, c_e, d_e; /* Tmp complex values */ -#ifdef MERGE_REQUEST_1378_TO_ISSUE_1462 +#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE Word64 Cy_sum_real_64[PARAM_MC_MAX_PARAMETER_BANDS][MAX_CICP_CHANNELS][MAX_CICP_CHANNELS]; Word64 Cy_sum_imag_64[PARAM_MC_MAX_PARAM_BAND_ABS_COV_ENC][MAX_CICP_CHANNELS][MAX_CICP_CHANNELS]; #else @@ -691,7 +690,7 @@ static void ivas_param_mc_param_est_enc_fx( move16(); band_step = 1; move16(); -#ifdef MERGE_REQUEST_1378_TO_ISSUE_1462 +#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE FOR( cur_param_band = 0; cur_param_band < PARAM_MC_MAX_PARAMETER_BANDS; cur_param_band++ ) { FOR( ch_idx1 = 0; ch_idx1 < MAX_CICP_CHANNELS; ch_idx1++ ) @@ -705,7 +704,7 @@ static void ivas_param_mc_param_est_enc_fx( { FOR( ch_idx1 = 0; ch_idx1 < MAX_CICP_CHANNELS; ch_idx1++ ) { -#ifdef MERGE_REQUEST_1378_TO_ISSUE_1462 +#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE set64_fx( Cy_sum_imag_64[cur_param_band][ch_idx1], 0, MAX_CICP_CHANNELS ); #else set32_fx( Cy_sum_fx[cur_param_band][ch_idx1], 0, MAX_CICP_CHANNELS ); @@ -851,7 +850,7 @@ static void ivas_param_mc_param_est_enc_fx( move32(); } } -#ifdef MERGE_REQUEST_1378_TO_ISSUE_1462 +#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE FOR( ch_idx1 = 0; ch_idx1 < nchan_input; ++ch_idx1 ) { a_fx = slot_frame_f_real_fx[ch_idx1][cur_cldfb_band]; @@ -1069,7 +1068,7 @@ static void ivas_param_mc_param_est_enc_fx( move16(); } #endif -#ifdef MERGE_REQUEST_1378_TO_ISSUE_1462 +#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE a_fx = slot_frame_f_real_fx[ch_idx1][cur_cldfb_band]; b_fx = slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band]; move32(); @@ -1144,7 +1143,11 @@ static void ivas_param_mc_param_est_enc_fx( move32(); Cy_sum_e[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0; move16(); -#ifdef MERGE_REQUEST_1378_TO_ISSUE_1462 +#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE + Cy_sum_real_64[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0; + move64(); + Cy_sum_real_64[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0; + move64(); Cy_sum_imag_64[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0; move64(); Cy_sum_imag_64[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0; @@ -1166,6 +1169,12 @@ static void ivas_param_mc_param_est_enc_fx( { FOR( ch_idx1 = 0; ch_idx1 < nchan_input; ++ch_idx1 ) { +#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE + Cy_sum_real_64[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0; + move64(); + Cy_sum_real_64[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0; + move64(); +#endif Cy_sum_fx[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0; move32(); Cy_sum_e[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0; @@ -1210,10 +1219,18 @@ static void ivas_param_mc_param_est_enc_fx( /* get ICLDs */ FOR( k = 0; k < nchan_input; ++k ) { +#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE + Word16 norm; + // convert the 64 bit fixpoint back into the 48 bit float format + norm = W_norm( Cy_sum_real_64[cur_param_band][k][k] ); + Nrg_fx[k] = W_extract_h( W_shl( Cy_sum_real_64[cur_param_band][k][k], norm ) ); + Nrg_e[k] = sub( sub( 62, gb ), norm ); +#else Nrg_fx[k] = Cy_sum_fx[cur_param_band][k][k]; move32(); Nrg_e[k] = Cy_sum_e[cur_param_band][k][k]; move16(); +#endif } FOR( k = 0; k < num_ilds_to_code; ++k ) { @@ -1280,15 +1297,17 @@ static void ivas_param_mc_param_est_enc_fx( { FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 ) { - Cy_sum_fx[cur_param_band - 1][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cy_sum_fx[cur_param_band - 1][ch_idx1][ch_idx2], Cy_sum_e[cur_param_band - 1][ch_idx1][ch_idx2], - Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cy_sum_e[cur_param_band][ch_idx1][ch_idx2], - &Cy_sum_e[cur_param_band - 1][ch_idx1][ch_idx2] ); - move32(); -#ifdef MERGE_REQUEST_1378_TO_ISSUE_1462 +#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE + Cy_sum_real_64[cur_param_band - 1][ch_idx1][ch_idx2] = W_add( Cy_sum_real_64[cur_param_band - 1][ch_idx1][ch_idx2], Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] ); + move64(); Cy_sum_imag_64[cur_param_band - 1][ch_idx1][ch_idx2] = W_add( Cy_sum_imag_64[cur_param_band - 1][ch_idx1][ch_idx2], Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2] ); move64(); #else + Cy_sum_fx[cur_param_band - 1][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cy_sum_fx[cur_param_band - 1][ch_idx1][ch_idx2], Cy_sum_e[cur_param_band - 1][ch_idx1][ch_idx2], + Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cy_sum_e[cur_param_band][ch_idx1][ch_idx2], + &Cy_sum_e[cur_param_band - 1][ch_idx1][ch_idx2] ); + move32(); Cy_sum_imag_fx[cur_param_band - 1][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cy_sum_imag_fx[cur_param_band - 1][ch_idx1][ch_idx2], Cy_sum_imag_e[cur_param_band - 1][ch_idx1][ch_idx2], Cy_sum_imag_fx[cur_param_band][ch_idx1][ch_idx2], Cy_sum_imag_e[cur_param_band][ch_idx1][ch_idx2], &Cy_sum_imag_e[cur_param_band - 1][ch_idx1][ch_idx2] ); @@ -1317,10 +1336,14 @@ static void ivas_param_mc_param_est_enc_fx( { FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 ) { +#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE + Cy_sum_real_64[cur_param_band - 1][ch_idx1][ch_idx2] = W_add( Cy_sum_real_64[cur_param_band - 1][ch_idx1][ch_idx2], Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] ); +#else Cy_sum_fx[cur_param_band - 1][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cy_sum_fx[cur_param_band - 1][ch_idx1][ch_idx2], Cy_sum_e[cur_param_band - 1][ch_idx1][ch_idx2], Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cy_sum_e[cur_param_band][ch_idx1][ch_idx2], &Cy_sum_e[cur_param_band - 1][ch_idx1][ch_idx2] ); move32(); +#endif } } } @@ -1329,7 +1352,24 @@ static void ivas_param_mc_param_est_enc_fx( band_step = 2; move16(); } - +#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE + { + // convert the 64 bit fixpoint back into the 48 bit float format + FOR( cur_param_band = 0; cur_param_band < PARAM_MC_MAX_PARAMETER_BANDS; cur_param_band++ ) + { + FOR( ch_idx1 = 0; ch_idx1 < MAX_CICP_CHANNELS; ch_idx1++ ) + { + FOR( ch_idx2 = 0; ch_idx2 < MAX_CICP_CHANNELS; ch_idx2++ ) + { + Word16 norm; + norm = W_norm( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] ); + Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2] = W_extract_h( W_shl( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2], norm ) ); + Cy_sum_e[cur_param_band][ch_idx1][ch_idx2] = sub( sub( 62, gb ), norm ); + } + } + } + } +#endif /* map complex covariances to real values */ FOR( cur_param_band = 0; cur_param_band < hParamMC->max_param_band_abs_cov; cur_param_band += band_step ) @@ -1366,12 +1406,8 @@ static void ivas_param_mc_param_est_enc_fx( { FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ch_idx2++ ) { +#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE Word16 norm; - real_part_fx = Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2]; - move32(); - real_part_e = Cy_sum_e[cur_param_band][ch_idx1][ch_idx2]; - move16(); -#ifdef MERGE_REQUEST_1378_TO_ISSUE_1462 // convert the 64 bit fixpoint back into the 48 bit float format norm = W_norm( Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2] ); imag_part_fx = W_extract_h( W_shl( Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2], norm ) ); @@ -1382,6 +1418,10 @@ static void ivas_param_mc_param_est_enc_fx( imag_part_e = Cy_sum_imag_e[cur_param_band][ch_idx1][ch_idx2]; move16(); #endif + real_part_fx = Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2]; + move32(); + real_part_e = Cy_sum_e[cur_param_band][ch_idx1][ch_idx2]; + move16(); real_part_fx = Mpy_32_32( real_part_fx, real_part_fx ); imag_part_fx = Mpy_32_32( imag_part_fx, imag_part_fx ); -- GitLab