From 40b6da870308be6baf24239922ed5d668769b8ca Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Mon, 12 May 2025 10:40:07 +0200 Subject: [PATCH 1/5] inling the matrix multiplication in ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx(). --- lib_com/options.h | 1 + lib_dec/ivas_dirac_output_synthesis_cov_fx.c | 70 ++++++++++++++++++++ 2 files changed, 71 insertions(+) diff --git a/lib_com/options.h b/lib_com/options.h index cbc77b6e7..af3a65d47 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -86,6 +86,7 @@ //#define HARM_SCE_INIT #define DIV32_OPT_NEWTON /* FhG: faster 32 by 32 bit division */ #define MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE /* FhG: reduce WMOPS of Cy calculation in ivas_param_mc_param_est_enc_fx() by using 64 Bit addition. Obsoletes IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE. */ +#define MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE /* FhG: reduce WMOPS by inlining the matrix multiplications for the smoothing operation. */ #define FIX_1439_SPEEDUP_Copy_Scale_sig_16_32_no_sat /*FhG: reduces WMOPS - bit-exact*/ #define FIX_1439_SPEEDUP_stereo_icBWE_dec_fx /*FhG: reduces WMOPS - bit-exact*/ #define FIX_1439_SPEEDUP_ivas_swb_tbe_dec_fx /*FhG: reduces WMOPS - bit-exact*/ diff --git a/lib_dec/ivas_dirac_output_synthesis_cov_fx.c b/lib_dec/ivas_dirac_output_synthesis_cov_fx.c index 6d367538d..4d4f37a02 100644 --- a/lib_dec/ivas_dirac_output_synthesis_cov_fx.c +++ b/lib_dec/ivas_dirac_output_synthesis_cov_fx.c @@ -30,6 +30,8 @@ *******************************************************************************************************/ +#define MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE + #include #include #include @@ -516,10 +518,14 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx( Word16 mixing_matrix_buffer_e; Word32 input_f_real_fx[PARAM_MC_MAX_TRANSPORT_CHANS]; Word32 input_f_imag_fx[PARAM_MC_MAX_TRANSPORT_CHANS]; +#ifdef MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE + +#else Word32 output_f_real_fx[MAX_CICP_CHANNELS]; Word32 output_f_imag_fx[MAX_CICP_CHANNELS]; Word16 output_f_real_e; Word16 output_f_imag_e; +#endif Word32 diff_f_real_fx[MAX_CICP_CHANNELS]; Word32 diff_f_imag_fx[MAX_CICP_CHANNELS]; @@ -527,8 +533,12 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx( set_zero_fx( input_f_real_fx, PARAM_MC_MAX_TRANSPORT_CHANS ); set_zero_fx( input_f_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS ); +#ifdef MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE + +#else set_zero_fx( output_f_real_fx, MAX_CICP_CHANNELS ); set_zero_fx( output_f_imag_fx, MAX_CICP_CHANNELS ); +#endif set_zero_fx( diff_f_real_fx, MAX_CICP_CHANNELS ); set_zero_fx( diff_f_imag_fx, MAX_CICP_CHANNELS ); @@ -592,6 +602,36 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx( } /* apply residual mixing */ +#ifdef MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE + { + Word16 shifter; + + shifter=31-mixing_matrix_res_smooth_e; + FOR( ch_idx = 0; ch_idx < nY; ch_idx++ ) + { + int i; + Word16 idx; + Word64 temp_real, temp_imag; + + + idx = ch_idx; + temp_real = 0; + temp_imag = 0; + move64(); + move64(); + for (i=0;i Date: Mon, 12 May 2025 10:45:14 +0200 Subject: [PATCH 2/5] applied the clang patch. --- lib_dec/ivas_dirac_output_synthesis_cov_fx.c | 106 +++++++++---------- 1 file changed, 53 insertions(+), 53 deletions(-) diff --git a/lib_dec/ivas_dirac_output_synthesis_cov_fx.c b/lib_dec/ivas_dirac_output_synthesis_cov_fx.c index 4d4f37a02..e17f6fd1e 100644 --- a/lib_dec/ivas_dirac_output_synthesis_cov_fx.c +++ b/lib_dec/ivas_dirac_output_synthesis_cov_fx.c @@ -30,7 +30,7 @@ *******************************************************************************************************/ -#define MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE +#define MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE #include #include @@ -518,7 +518,7 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx( Word16 mixing_matrix_buffer_e; Word32 input_f_real_fx[PARAM_MC_MAX_TRANSPORT_CHANS]; Word32 input_f_imag_fx[PARAM_MC_MAX_TRANSPORT_CHANS]; -#ifdef MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE +#ifdef MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE #else Word32 output_f_real_fx[MAX_CICP_CHANNELS]; @@ -533,7 +533,7 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx( set_zero_fx( input_f_real_fx, PARAM_MC_MAX_TRANSPORT_CHANS ); set_zero_fx( input_f_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS ); -#ifdef MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE +#ifdef MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE #else set_zero_fx( output_f_real_fx, MAX_CICP_CHANNELS ); @@ -603,32 +603,32 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx( /* apply residual mixing */ #ifdef MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE - { - Word16 shifter; - - shifter=31-mixing_matrix_res_smooth_e; - FOR( ch_idx = 0; ch_idx < nY; ch_idx++ ) - { - int i; - Word16 idx; - Word64 temp_real, temp_imag; - - - idx = ch_idx; - temp_real = 0; - temp_imag = 0; - move64(); - move64(); - for (i=0;i Date: Mon, 12 May 2025 12:49:35 +0200 Subject: [PATCH 3/5] the index calculation of the matrix multiplication does not require BASOP. --- lib_dec/ivas_dirac_output_synthesis_cov_fx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib_dec/ivas_dirac_output_synthesis_cov_fx.c b/lib_dec/ivas_dirac_output_synthesis_cov_fx.c index e17f6fd1e..bd95f6f54 100644 --- a/lib_dec/ivas_dirac_output_synthesis_cov_fx.c +++ b/lib_dec/ivas_dirac_output_synthesis_cov_fx.c @@ -623,7 +623,7 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx( { temp_real = W_add( temp_real, W_mult0_32_32( mixing_matrix_res_smooth_fx[idx], diff_f_real_fx[i] ) ); temp_imag = W_add( temp_imag, W_mult0_32_32( mixing_matrix_res_smooth_fx[idx], diff_f_imag_fx[i] ) ); - idx = add( idx, nY ); + idx += nY; } Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = W_extract_l( W_shr( temp_real, shifter ) ); Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = W_extract_l( W_shr( temp_imag, shifter ) ); @@ -692,7 +692,7 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx( { temp_real = W_add( temp_real, W_mult0_32_32( mixing_matrix_smooth_fx[idx], input_f_real_fx[i] ) ); temp_imag = W_add( temp_imag, W_mult0_32_32( mixing_matrix_smooth_fx[idx], input_f_imag_fx[i] ) ); - idx = add( idx, nY ); + idx += nY; } Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_real, shifter ) ) ); Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_imag, shifter ) ) ); -- GitLab From 053425f3c77a6bc6c2e8e81187d51d061c726ca4 Mon Sep 17 00:00:00 2001 From: Markus Multrus Date: Wed, 14 May 2025 12:03:56 +0200 Subject: [PATCH 4/5] add missing move32() instructions --- lib_dec/ivas_dirac_output_synthesis_cov_fx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib_dec/ivas_dirac_output_synthesis_cov_fx.c b/lib_dec/ivas_dirac_output_synthesis_cov_fx.c index bd95f6f54..5b31e31a4 100644 --- a/lib_dec/ivas_dirac_output_synthesis_cov_fx.c +++ b/lib_dec/ivas_dirac_output_synthesis_cov_fx.c @@ -695,7 +695,9 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx( idx += nY; } Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_real, shifter ) ) ); + move32(); Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_imag, shifter ) ) ); + move32(); } } #else -- GitLab From 99d73e309b999f9e14503ad9d50227b2ecd336c8 Mon Sep 17 00:00:00 2001 From: Markus Multrus Date: Wed, 14 May 2025 12:11:42 +0200 Subject: [PATCH 5/5] formatting --- lib_dec/ivas_dirac_output_synthesis_cov_fx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib_dec/ivas_dirac_output_synthesis_cov_fx.c b/lib_dec/ivas_dirac_output_synthesis_cov_fx.c index 5b31e31a4..caddb5c5c 100644 --- a/lib_dec/ivas_dirac_output_synthesis_cov_fx.c +++ b/lib_dec/ivas_dirac_output_synthesis_cov_fx.c @@ -695,9 +695,9 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx( idx += nY; } Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_real, shifter ) ) ); - move32(); + move32(); Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_imag, shifter ) ) ); - move32(); + move32(); } } #else -- GitLab