diff --git a/lib_com/options.h b/lib_com/options.h index eb0054c53c39261340524b6f6f17494817a11d69..e8a44d873d9aaa3d8cc9b9db02792a309469fbec 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -78,10 +78,10 @@ #define FIX_1310_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot /*FhG: WMOPS tuning, nonbe*/ /* Both following 2 macros (IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST*) are independent from each other, they refer to different code blocks */ #define IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_BE /* FhG: reduces WMOPS of param_mc_prm_est, bit-exact to previous version */ -#define IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE /* FhG: reduces WMOPS of param_mc_prm_est, not bit-exact to previous version */ +//#define IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE /* FhG: reduces WMOPS of param_mc_prm_est, not bit-exact to previous version. Obsoleted by MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE. */ #define HARM_PUSH_BIT #define HARM_ENC_INIT //#define HARM_SCE_INIT #define DIV32_OPT_NEWTON /* FhG: faster 32 by 32 bit division */ - +#define MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE /* FhG: reduce WMOPS of Cy calculation in ivas_param_mc_param_est_enc_fx() by using 64 Bit addition. Obsoletes IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE. */ #endif diff --git a/lib_enc/ivas_mc_param_enc_fx.c b/lib_enc/ivas_mc_param_enc_fx.c index 54fe249cd015aa323eb56b86b1f8b3f43b053f72..d0316be0ff0de173e84415dc2bfe26286c531749 100644 --- a/lib_enc/ivas_mc_param_enc_fx.c +++ b/lib_enc/ivas_mc_param_enc_fx.c @@ -59,7 +59,7 @@ static void ivas_param_mc_range_encoder_fx( const Word16 *seq_in, const Word16 n #define ATTACKTHRESHOLD_E 4 -static void ivas_param_mc_quantize_ilds_fx( PARAM_MC_ENC_HANDLE hParamMC, Word32 Cy_fx[MAX_CICP_CHANNELS][MAX_CICP_CHANNELS], Word16 Cy_e[MAX_CICP_CHANNELS][MAX_CICP_CHANNELS], Word32 Cx[PARAM_MC_MAX_TRANSPORT_CHANS][PARAM_MC_MAX_TRANSPORT_CHANS], Word16 Cx_fx[PARAM_MC_MAX_TRANSPORT_CHANS][PARAM_MC_MAX_TRANSPORT_CHANS], const Word16 freq_idx, const Word16 nchan_input, const Word16 nchan_transport, Word16 *ILD_idx_out, Word16 ILD_q[PARAM_MC_SZ_ILD_MAP] ); +static void ivas_param_mc_quantize_ilds_fx( PARAM_MC_ENC_HANDLE hParamMC, Word32 Cy_fx[MAX_CICP_CHANNELS][MAX_CICP_CHANNELS], Word16 Cy_e[MAX_CICP_CHANNELS][MAX_CICP_CHANNELS], Word32 Cx_fx[PARAM_MC_MAX_TRANSPORT_CHANS][PARAM_MC_MAX_TRANSPORT_CHANS], Word16 Cx_e[PARAM_MC_MAX_TRANSPORT_CHANS][PARAM_MC_MAX_TRANSPORT_CHANS], const Word16 freq_idx, const Word16 nchan_input, const Word16 nchan_transport, Word16 *ILD_idx_out, Word16 ILD_q[PARAM_MC_SZ_ILD_MAP] ); static void ivas_param_mc_parameter_quantizer_fx( const Word32 *x, const Word16 *x_e, const Word16 L, const Word16 sz_quantizer, const Word16 *quantizer_fx, const Word16 Q_quant, Word16 *quant_idx, Word16 *y ); @@ -655,8 +655,13 @@ static void ivas_param_mc_param_est_enc_fx( Word16 dmx_imag_e[PARAM_MC_MAX_TRANSPORT_CHANS]; /* Downmix channel - Imag Part */ Word32 a_fx, b_fx, c_fx, d_fx; /* Tmp complex values */ Word16 a_e, b_e, c_e, d_e; /* Tmp complex values */ +#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE + Word64 Cy_sum_real_64[PARAM_MC_MAX_PARAMETER_BANDS][MAX_CICP_CHANNELS][MAX_CICP_CHANNELS]; + Word64 Cy_sum_imag_64[PARAM_MC_MAX_PARAM_BAND_ABS_COV_ENC][MAX_CICP_CHANNELS][MAX_CICP_CHANNELS]; +#else Word32 Cy_sum_imag_fx[PARAM_MC_MAX_PARAM_BAND_ABS_COV_ENC][MAX_CICP_CHANNELS][MAX_CICP_CHANNELS]; Word16 Cy_sum_imag_e[PARAM_MC_MAX_PARAM_BAND_ABS_COV_ENC][MAX_CICP_CHANNELS][MAX_CICP_CHANNELS]; +#endif Word32 Cx_sum_imag_fx[PARAM_MC_MAX_PARAM_BAND_ABS_COV_ENC][PARAM_MC_MAX_TRANSPORT_CHANS][PARAM_MC_MAX_TRANSPORT_CHANS]; Word16 Cx_sum_imag_e[PARAM_MC_MAX_PARAM_BAND_ABS_COV_ENC][PARAM_MC_MAX_TRANSPORT_CHANS][PARAM_MC_MAX_TRANSPORT_CHANS]; Word32 real_part_fx, imag_part_fx; @@ -685,13 +690,26 @@ static void ivas_param_mc_param_est_enc_fx( move16(); band_step = 1; move16(); +#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE + FOR( cur_param_band = 0; cur_param_band < PARAM_MC_MAX_PARAMETER_BANDS; cur_param_band++ ) + { + FOR( ch_idx1 = 0; ch_idx1 < MAX_CICP_CHANNELS; ch_idx1++ ) + { + set64_fx( Cy_sum_real_64[cur_param_band][ch_idx1], 0, MAX_CICP_CHANNELS ); + } + } +#endif FOR( cur_param_band = 0; cur_param_band < PARAM_MC_MAX_PARAM_BAND_ABS_COV_ENC; cur_param_band++ ) { FOR( ch_idx1 = 0; ch_idx1 < MAX_CICP_CHANNELS; ch_idx1++ ) { - set32_fx( Cy_sum_imag_fx[cur_param_band][ch_idx1], 0, MAX_CICP_CHANNELS ); - set16_fx( Cy_sum_imag_e[cur_param_band][ch_idx1], 0, MAX_CICP_CHANNELS ); +#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE + set64_fx( Cy_sum_imag_64[cur_param_band][ch_idx1], 0, MAX_CICP_CHANNELS ); +#else + set32_fx( Cy_sum_fx[cur_param_band][ch_idx1], 0, MAX_CICP_CHANNELS ); + set16_fx( Cy_sum_e[cur_param_band][ch_idx1], 0, MAX_CICP_CHANNELS ); +#endif } FOR( ch_idx1 = 0; ch_idx1 < PARAM_MC_MAX_TRANSPORT_CHANS; ch_idx1++ ) @@ -832,7 +850,33 @@ static void ivas_param_mc_param_est_enc_fx( move32(); } } - +#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE + FOR( ch_idx1 = 0; ch_idx1 < nchan_input; ++ch_idx1 ) + { + a_fx = slot_frame_f_real_fx[ch_idx1][cur_cldfb_band]; + b_fx = slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band]; + move32(); + FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 ) + { + Word16 norm; + c_fx = slot_frame_f_real_fx[ch_idx2][cur_cldfb_band]; + d_fx = slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band]; + move32(); + // Conjugated complex multiplication (a-ib)(c+id) = ac+bd + i(ad-bc) + Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] = W_add( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2], + W_add( W_mult0_32_32( a_fx, c_fx ), W_mult0_32_32( b_fx, d_fx ) ) ); + move64(); + Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2] = W_add( Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2], + W_sub( W_mult0_32_32( a_fx, d_fx ), W_mult0_32_32( b_fx, c_fx ) ) ); + move64(); + + // convert the 64 bit fixpoint back into the 48 bit float format + norm = W_norm( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] ); + Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2] = W_extract_h( W_shl( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2], norm ) ); + Cy_sum_e[cur_param_band][ch_idx1][ch_idx2] = sub( sub( 62, gb ), norm ); + } + } +#else /* Cy for input channels */ FOR( ch_idx1 = 0; ch_idx1 < nchan_input; ++ch_idx1 ) { @@ -890,6 +934,7 @@ static void ivas_param_mc_param_est_enc_fx( move32(); } } +#endif } } @@ -1023,6 +1068,28 @@ static void ivas_param_mc_param_est_enc_fx( move16(); } #endif +#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE + a_fx = slot_frame_f_real_fx[ch_idx1][cur_cldfb_band]; + b_fx = slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band]; + move32(); + move32(); + FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 ) + { + Word16 norm; + c_fx = slot_frame_f_real_fx[ch_idx2][cur_cldfb_band]; + d_fx = slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band]; + move32(); + move32(); + // Conjugated complex multiplication (a-ib)(c+id) = ac+bd + i(ad-bc) + Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] = W_add( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2], + W_add( W_mult0_32_32( a_fx, c_fx ), W_mult0_32_32( b_fx, d_fx ) ) ); + move64(); + // convert the 64 bit fixpoint back into the 48 bit float format + norm = W_norm( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] ); + Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2] = W_extract_h( W_shl( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2], norm ) ); + Cy_sum_e[cur_param_band][ch_idx1][ch_idx2] = sub( sub( 62, gb ), norm ); + } +#else FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 ) { #ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE @@ -1054,6 +1121,7 @@ static void ivas_param_mc_param_est_enc_fx( &Cy_sum_e[cur_param_band][ch_idx1][ch_idx2] ); move32(); } +#endif } } } @@ -1075,6 +1143,16 @@ static void ivas_param_mc_param_est_enc_fx( move32(); Cy_sum_e[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0; move16(); +#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE + Cy_sum_real_64[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0; + move64(); + Cy_sum_real_64[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0; + move64(); + Cy_sum_imag_64[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0; + move64(); + Cy_sum_imag_64[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0; + move64(); +#else Cy_sum_imag_fx[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0; move32(); Cy_sum_imag_e[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0; @@ -1083,6 +1161,7 @@ static void ivas_param_mc_param_est_enc_fx( move32(); Cy_sum_imag_e[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0; move16(); +#endif } } @@ -1090,6 +1169,12 @@ static void ivas_param_mc_param_est_enc_fx( { FOR( ch_idx1 = 0; ch_idx1 < nchan_input; ++ch_idx1 ) { +#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE + Cy_sum_real_64[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0; + move64(); + Cy_sum_real_64[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0; + move64(); +#endif Cy_sum_fx[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0; move32(); Cy_sum_e[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0; @@ -1134,10 +1219,18 @@ static void ivas_param_mc_param_est_enc_fx( /* get ICLDs */ FOR( k = 0; k < nchan_input; ++k ) { +#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE + Word16 norm; + // convert the 64 bit fixpoint back into the 48 bit float format + norm = W_norm( Cy_sum_real_64[cur_param_band][k][k] ); + Nrg_fx[k] = W_extract_h( W_shl( Cy_sum_real_64[cur_param_band][k][k], norm ) ); + Nrg_e[k] = sub( sub( 62, gb ), norm ); +#else Nrg_fx[k] = Cy_sum_fx[cur_param_band][k][k]; move32(); Nrg_e[k] = Cy_sum_e[cur_param_band][k][k]; move16(); +#endif } FOR( k = 0; k < num_ilds_to_code; ++k ) { @@ -1204,6 +1297,13 @@ static void ivas_param_mc_param_est_enc_fx( { FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 ) { +#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE + Cy_sum_real_64[cur_param_band - 1][ch_idx1][ch_idx2] = W_add( Cy_sum_real_64[cur_param_band - 1][ch_idx1][ch_idx2], Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] ); + move64(); + Cy_sum_imag_64[cur_param_band - 1][ch_idx1][ch_idx2] = W_add( Cy_sum_imag_64[cur_param_band - 1][ch_idx1][ch_idx2], Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2] ); + move64(); + +#else Cy_sum_fx[cur_param_band - 1][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cy_sum_fx[cur_param_band - 1][ch_idx1][ch_idx2], Cy_sum_e[cur_param_band - 1][ch_idx1][ch_idx2], Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cy_sum_e[cur_param_band][ch_idx1][ch_idx2], &Cy_sum_e[cur_param_band - 1][ch_idx1][ch_idx2] ); @@ -1212,6 +1312,7 @@ static void ivas_param_mc_param_est_enc_fx( Cy_sum_imag_fx[cur_param_band][ch_idx1][ch_idx2], Cy_sum_imag_e[cur_param_band][ch_idx1][ch_idx2], &Cy_sum_imag_e[cur_param_band - 1][ch_idx1][ch_idx2] ); move32(); +#endif } } } @@ -1235,10 +1336,14 @@ static void ivas_param_mc_param_est_enc_fx( { FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 ) { +#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE + Cy_sum_real_64[cur_param_band - 1][ch_idx1][ch_idx2] = W_add( Cy_sum_real_64[cur_param_band - 1][ch_idx1][ch_idx2], Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] ); +#else Cy_sum_fx[cur_param_band - 1][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cy_sum_fx[cur_param_band - 1][ch_idx1][ch_idx2], Cy_sum_e[cur_param_band - 1][ch_idx1][ch_idx2], Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cy_sum_e[cur_param_band][ch_idx1][ch_idx2], &Cy_sum_e[cur_param_band - 1][ch_idx1][ch_idx2] ); move32(); +#endif } } } @@ -1247,7 +1352,24 @@ static void ivas_param_mc_param_est_enc_fx( band_step = 2; move16(); } - +#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE + { + // convert the 64 bit fixpoint back into the 48 bit float format + FOR( cur_param_band = 0; cur_param_band < PARAM_MC_MAX_PARAMETER_BANDS; cur_param_band++ ) + { + FOR( ch_idx1 = 0; ch_idx1 < MAX_CICP_CHANNELS; ch_idx1++ ) + { + FOR( ch_idx2 = 0; ch_idx2 < MAX_CICP_CHANNELS; ch_idx2++ ) + { + Word16 norm; + norm = W_norm( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] ); + Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2] = W_extract_h( W_shl( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2], norm ) ); + Cy_sum_e[cur_param_band][ch_idx1][ch_idx2] = sub( sub( 62, gb ), norm ); + } + } + } + } +#endif /* map complex covariances to real values */ FOR( cur_param_band = 0; cur_param_band < hParamMC->max_param_band_abs_cov; cur_param_band += band_step ) @@ -1284,15 +1406,22 @@ static void ivas_param_mc_param_est_enc_fx( { FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ch_idx2++ ) { - real_part_fx = Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2]; - move32(); - real_part_e = Cy_sum_e[cur_param_band][ch_idx1][ch_idx2]; - move16(); +#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE + Word16 norm; + // convert the 64 bit fixpoint back into the 48 bit float format + norm = W_norm( Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2] ); + imag_part_fx = W_extract_h( W_shl( Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2], norm ) ); + imag_part_e = sub( sub( 62, gb ), norm ); +#else imag_part_fx = Cy_sum_imag_fx[cur_param_band][ch_idx1][ch_idx2]; move32(); imag_part_e = Cy_sum_imag_e[cur_param_band][ch_idx1][ch_idx2]; move16(); - +#endif + real_part_fx = Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2]; + move32(); + real_part_e = Cy_sum_e[cur_param_band][ch_idx1][ch_idx2]; + move16(); real_part_fx = Mpy_32_32( real_part_fx, real_part_fx ); imag_part_fx = Mpy_32_32( imag_part_fx, imag_part_fx );