diff --git a/lib_com/options.h b/lib_com/options.h index cbc77b6e7057a8f879213759cebf40a567f7a2c6..af3a65d479ac3c15b3e0145c45db77d68e816a2d 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -86,6 +86,7 @@ //#define HARM_SCE_INIT #define DIV32_OPT_NEWTON /* FhG: faster 32 by 32 bit division */ #define MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE /* FhG: reduce WMOPS of Cy calculation in ivas_param_mc_param_est_enc_fx() by using 64 Bit addition. Obsoletes IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE. */ +#define MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE /* FhG: reduce WMOPS by inlining the matrix multiplications for the smoothing operation. */ #define FIX_1439_SPEEDUP_Copy_Scale_sig_16_32_no_sat /*FhG: reduces WMOPS - bit-exact*/ #define FIX_1439_SPEEDUP_stereo_icBWE_dec_fx /*FhG: reduces WMOPS - bit-exact*/ #define FIX_1439_SPEEDUP_ivas_swb_tbe_dec_fx /*FhG: reduces WMOPS - bit-exact*/ diff --git a/lib_dec/ivas_dirac_output_synthesis_cov_fx.c b/lib_dec/ivas_dirac_output_synthesis_cov_fx.c index 6d367538d26d575e27d55756a9222a5c96d5c322..caddb5c5cc9fe94936879def3f490ad0ae42ed6b 100644 --- a/lib_dec/ivas_dirac_output_synthesis_cov_fx.c +++ b/lib_dec/ivas_dirac_output_synthesis_cov_fx.c @@ -30,6 +30,8 @@ *******************************************************************************************************/ +#define MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE + #include #include #include @@ -516,10 +518,14 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx( Word16 mixing_matrix_buffer_e; Word32 input_f_real_fx[PARAM_MC_MAX_TRANSPORT_CHANS]; Word32 input_f_imag_fx[PARAM_MC_MAX_TRANSPORT_CHANS]; +#ifdef MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE + +#else Word32 output_f_real_fx[MAX_CICP_CHANNELS]; Word32 output_f_imag_fx[MAX_CICP_CHANNELS]; Word16 output_f_real_e; Word16 output_f_imag_e; +#endif Word32 diff_f_real_fx[MAX_CICP_CHANNELS]; Word32 diff_f_imag_fx[MAX_CICP_CHANNELS]; @@ -527,8 +533,12 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx( set_zero_fx( input_f_real_fx, PARAM_MC_MAX_TRANSPORT_CHANS ); set_zero_fx( input_f_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS ); +#ifdef MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE + +#else set_zero_fx( output_f_real_fx, MAX_CICP_CHANNELS ); set_zero_fx( output_f_imag_fx, MAX_CICP_CHANNELS ); +#endif set_zero_fx( diff_f_real_fx, MAX_CICP_CHANNELS ); set_zero_fx( diff_f_imag_fx, MAX_CICP_CHANNELS ); @@ -592,6 +602,36 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx( } /* apply residual mixing */ +#ifdef MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE + { + Word16 shifter; + + shifter = 31 - mixing_matrix_res_smooth_e; + FOR( ch_idx = 0; ch_idx < nY; ch_idx++ ) + { + int i; + Word16 idx; + Word64 temp_real, temp_imag; + + + idx = ch_idx; + temp_real = 0; + temp_imag = 0; + move64(); + move64(); + for ( i = 0; i < nY; i++ ) + { + temp_real = W_add( temp_real, W_mult0_32_32( mixing_matrix_res_smooth_fx[idx], diff_f_real_fx[i] ) ); + temp_imag = W_add( temp_imag, W_mult0_32_32( mixing_matrix_res_smooth_fx[idx], diff_f_imag_fx[i] ) ); + idx += nY; + } + Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = W_extract_l( W_shr( temp_real, shifter ) ); + Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = W_extract_l( W_shr( temp_imag, shifter ) ); + } + } + + +#else matrix_product_mant_exp_fx( mixing_matrix_res_smooth_fx, mixing_matrix_res_smooth_e, nY, nY, 0, diff_f_real_fx, 25, nY, 1, 0, output_f_real_fx, &output_f_real_e ); scale_sig32( output_f_real_fx, nY, sub( Q6, sub( Q31, output_f_real_e ) ) ); // Q6 @@ -607,6 +647,7 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx( Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = output_f_imag_fx[ch_idx]; // Q6 move32(); } +#endif } ELSE { @@ -630,6 +671,36 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx( } /* apply mixing matrix */ +#ifdef MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE + { + Word16 shifter; + shifter = 31 - mixing_matrix_smooth_e; + + FOR( ch_idx = 0; ch_idx < nY; ch_idx++ ) + { + int i; + Word16 idx; + Word64 temp_real, temp_imag; + + + idx = ch_idx; + temp_real = 0; + temp_imag = 0; + move64(); + move64(); + for ( i = 0; i < nX; i++ ) + { + temp_real = W_add( temp_real, W_mult0_32_32( mixing_matrix_smooth_fx[idx], input_f_real_fx[i] ) ); + temp_imag = W_add( temp_imag, W_mult0_32_32( mixing_matrix_smooth_fx[idx], input_f_imag_fx[i] ) ); + idx += nY; + } + Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_real, shifter ) ) ); + move32(); + Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_imag, shifter ) ) ); + move32(); + } + } +#else matrix_product_mant_exp_fx( mixing_matrix_smooth_fx, mixing_matrix_smooth_e, nY, nX, 0, input_f_real_fx, 25, nX, 1, 0, output_f_real_fx, &output_f_real_e ); scale_sig32( output_f_real_fx, MAX_CICP_CHANNELS, sub( 6, sub( 31, output_f_real_e ) ) ); // Q6 @@ -646,6 +717,7 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx( Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band], output_f_imag_fx[ch_idx] ); move32(); } +#endif } }