From a404a8de1d3b4cf1fb84d0d403907e3ee4387ede Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Tue, 18 Mar 2025 13:54:07 +0530 Subject: [PATCH 1/2] Optimizations for SBA path in decoder Test case:: // SBA at 80 kbps, 32kHz in, 32kHz out, HOA3 out Scale_sig32 (14), Copy32 (1.1), cldfbSynthesis_ivas_fx (3) and IGF_getWhiteSpectralData_ivas(1.25) WMOPS reduced. --- lib_com/cldfb.c | 74 ++++++++++++++----- lib_com/ivas_prot_fx.h | 26 +++++-- lib_com/options.h | 3 +- lib_com/prot_fx.h | 11 ++- lib_dec/acelp_core_dec_ivas_fx.c | 18 ++++- lib_dec/acelp_core_switch_dec_fx.c | 4 + lib_dec/core_switching_dec_fx.c | 8 ++ lib_dec/dec_tcx_fx.c | 35 +++++++++ lib_dec/igf_dec_fx.c | 47 ++++++++++++ lib_dec/ivas_dirac_dec_fx.c | 16 ++++ lib_dec/ivas_ism_param_dec_fx.c | 4 + lib_dec/ivas_jbm_dec_fx.c | 24 ++++++ lib_dec/ivas_mc_param_dec_fx.c | 5 ++ lib_dec/ivas_mc_paramupmix_dec_fx.c | 11 ++- lib_dec/ivas_osba_dec_fx.c | 15 +++- lib_dec/ivas_sba_dec_fx.c | 18 ++++- lib_dec/ivas_spar_decoder_fx.c | 53 +++++++++++-- lib_enc/swb_pre_proc_fx.c | 4 + .../ivas_dirac_dec_binaural_functions_fx.c | 4 + lib_rend/ivas_dirac_output_synthesis_dec_fx.c | 36 +++++---- lib_rend/ivas_dirac_rend_fx.c | 4 + 21 files changed, 356 insertions(+), 64 deletions(-) diff --git a/lib_com/cldfb.c b/lib_com/cldfb.c index 246a8a8e8..48fe4d9bc 100644 --- a/lib_com/cldfb.c +++ b/lib_com/cldfb.c @@ -1097,10 +1097,13 @@ void cldfbAnalysis_ts_fx_fixed_q( * Conduct inverse multple overlap cmplex low delay MDCT *--------------------------------------------------------------------*/ void cldfbSynthesis_ivas_fx( - Word32 **realBuffer_fx, /* i : real values Qx*/ - Word32 **imagBuffer_fx, /* i : imag values Qx*/ - Word32 *timeOut_fx, /* o : output time domain samples Qx - 1*/ - const Word16 samplesToProcess, /* i : number of processed samples */ + Word32 **realBuffer_fx, /* i : real values Qx*/ + Word32 **imagBuffer_fx, /* i : imag values Qx*/ + Word32 *timeOut_fx, /* o : output time domain samples Qx - 1*/ + const Word16 samplesToProcess, /* i : number of processed samples */ +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + const Word16 shift, /* i : scale for state buffer */ +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ HANDLE_CLDFB_FILTER_BANK h_cldfb /* i : filter bank state */ ) { @@ -1266,25 +1269,56 @@ void cldfbSynthesis_ivas_fx( } /* synthesis prototype filter */ - FOR( i = 0; i < L2; i++ ) +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + IF( 0 == shift ) + { +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ + FOR( i = 0; i < L2; i++ ) + { + accu0 = Madd_32_16( synthesisBuffer_fx[i], Mpy_32_16_1( new_samples_fx[L2 - 1 - i], p_filter[i] ), p_filter_sf ); // Qx - 1 + accu1 = Madd_32_16( synthesisBuffer_fx[1 * L2 + i], Mpy_32_16_1( new_samples_fx[L2 - 1 - i], p_filter[( 1 * L2 + i )] ), p_filter_sf ); // Qx - 1 + accu2 = Madd_32_16( synthesisBuffer_fx[2 * L2 + i], Mpy_32_16_1( new_samples_fx[L2 - 1 - i], p_filter[( 2 * L2 + i )] ), p_filter_sf ); // Qx - 1 + accu3 = Madd_32_16( synthesisBuffer_fx[3 * L2 + i], Mpy_32_16_1( new_samples_fx[L2 - 1 - i], p_filter[( 3 * L2 + i )] ), p_filter_sf ); // Qx - 1 + accu4 = Madd_32_16( synthesisBuffer_fx[4 * L2 + i], Mpy_32_16_1( new_samples_fx[L2 - 1 - i], p_filter[( 4 * L2 + i )] ), p_filter_sf ); // Qx - 1 + + synthesisBuffer_fx[i] = accu0; + move32(); + synthesisBuffer_fx[1 * L2 + i] = accu1; + move32(); + synthesisBuffer_fx[2 * L2 + i] = accu2; + move32(); + synthesisBuffer_fx[3 * L2 + i] = accu3; + move32(); + synthesisBuffer_fx[4 * L2 + i] = accu4; + move32(); + } +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + } + ELSE { - accu0 = Madd_32_16( synthesisBuffer_fx[i], Mpy_32_16_1( new_samples_fx[L2 - 1 - i], p_filter[i] ), p_filter_sf ); // Qx - 1 - accu1 = Madd_32_16( synthesisBuffer_fx[1 * L2 + i], Mpy_32_16_1( new_samples_fx[L2 - 1 - i], p_filter[( 1 * L2 + i )] ), p_filter_sf ); // Qx - 1 - accu2 = Madd_32_16( synthesisBuffer_fx[2 * L2 + i], Mpy_32_16_1( new_samples_fx[L2 - 1 - i], p_filter[( 2 * L2 + i )] ), p_filter_sf ); // Qx - 1 - accu3 = Madd_32_16( synthesisBuffer_fx[3 * L2 + i], Mpy_32_16_1( new_samples_fx[L2 - 1 - i], p_filter[( 3 * L2 + i )] ), p_filter_sf ); // Qx - 1 - accu4 = Madd_32_16( synthesisBuffer_fx[4 * L2 + i], Mpy_32_16_1( new_samples_fx[L2 - 1 - i], p_filter[( 4 * L2 + i )] ), p_filter_sf ); // Qx - 1 - synthesisBuffer_fx[i] = accu0; - move32(); - synthesisBuffer_fx[1 * L2 + i] = accu1; - move32(); - synthesisBuffer_fx[2 * L2 + i] = accu2; - move32(); - synthesisBuffer_fx[3 * L2 + i] = accu3; - move32(); - synthesisBuffer_fx[4 * L2 + i] = accu4; - move32(); + FOR( i = 0; i < L2; i++ ) + { + Word32 prod = L_shl_sat( Mpy_32_16_1( new_samples_fx[L2 - 1 - i], p_filter_sf ), shift ); + accu0 = Madd_32_16( synthesisBuffer_fx[i], prod, p_filter[i] ); // Qx - 1 + accu1 = Madd_32_16( synthesisBuffer_fx[1 * L2 + i], prod, p_filter[( 1 * L2 + i )] ); // Qx - 1 + accu2 = Madd_32_16( synthesisBuffer_fx[2 * L2 + i], prod, p_filter[( 2 * L2 + i )] ); // Qx - 1 + accu3 = Madd_32_16( synthesisBuffer_fx[3 * L2 + i], prod, p_filter[( 3 * L2 + i )] ); // Qx - 1 + accu4 = Madd_32_16( synthesisBuffer_fx[4 * L2 + i], prod, p_filter[( 4 * L2 + i )] ); // Qx - 1 + + synthesisBuffer_fx[i] = accu0; + move32(); + synthesisBuffer_fx[1 * L2 + i] = accu1; + move32(); + synthesisBuffer_fx[2 * L2 + i] = accu2; + move32(); + synthesisBuffer_fx[3 * L2 + i] = accu3; + move32(); + synthesisBuffer_fx[4 * L2 + i] = accu4; + move32(); + } } +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ FOR( i = 0; i < M1; i++ ) { diff --git a/lib_com/ivas_prot_fx.h b/lib_com/ivas_prot_fx.h index 4255ca842..ab043fc0f 100644 --- a/lib_com/ivas_prot_fx.h +++ b/lib_com/ivas_prot_fx.h @@ -3786,8 +3786,11 @@ ivas_error ivas_osba_dirac_td_binaural_jbm_fx( const UWord16 nSamplesAsked, /* i : number of CLDFB slots requested */ UWord16 *nSamplesRendered, /* o : number of CLDFB slots rendered */ UWord16 *nSamplesAvailable, /* o : number of CLDFB slots still to render */ - Word32 *output_fx[], /* o : rendered time signal */ - Word16 out_len /*Store the length of values in each channel*/ + Word32 *output_fx[] /* o : rendered time signal */ +#ifndef OPT_SBA_AVOID_SPAR_RESCALE + , + Word16 out_len /*Store the length of values in each channel*/ +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ ); ivas_error ivas_osba_ism_metadata_dec_fx( @@ -5343,15 +5346,22 @@ ivas_error ivas_sba_dec_render_fx( const UWord16 nSamplesAsked, /* i : number of CLDFB slots requested */ UWord16 *nSamplesRendered, /* o : number of CLDFB slots rendered */ UWord16 *nSamplesAvailableNext, /* o : number of CLDFB slots still to render */ - Word32 *output_fx[], /* o : rendered time signal Q11*/ - Word16 out_len /*Store the length of values in each channel*/ + Word32 *output_fx[] /* o : rendered time signal Q11*/ +#ifndef OPT_SBA_AVOID_SPAR_RESCALE + , + Word16 out_len /*Store the length of values in each channel*/ +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ ); void ivas_spar_dec_upmixer_sf_fx( - Decoder_Struct *st_ivas, /* i/o: IVAS decoder handle */ - Word32 *output_fx[], /* o : output audio channels */ - const Word16 nchan_internal, /* i : number of internal channels */ - Word16 out_len ); + Decoder_Struct *st_ivas, /* i/o: IVAS decoder handle */ + Word32 *output_fx[], /* o : output audio channels */ + const Word16 nchan_internal /* i : number of internal channels */ +#ifndef OPT_SBA_AVOID_SPAR_RESCALE + , + Word16 out_len +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ +); ivas_error ivas_spar_md_enc_open_fx( ivas_spar_md_enc_state_t **hMdEnc_in, /* i/o: SPAR MD encoder handle */ diff --git a/lib_com/options.h b/lib_com/options.h index c51d10333..02f7d18f3 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -168,10 +168,11 @@ #define FIX_1301_CORRECT_TD_CNST /* VA: Fix 1301, correct wrong constant in TD stereo */ #define NONBE_FIX_1277_EVS_DTX_HIGH_RATE_THRESHOLD /* VA/Eri: FLP issue 1277: Fix Mismatch in DTX high-rate threshold between EVS float and BASOP */ #define NONBE_FIX_708_OSBA_BR_SWITCHING_CRASH /* FhG: issue 708: fix crash in OSBA BR switching with long test vectors */ -//#define OPT_STEREO_32KBPS_V1 /* Optimization made in stereo decoding path for 32kbps decoding */ +#define OPT_STEREO_32KBPS_V1 /* Optimization made in stereo decoding path for 32kbps decoding */ #define DOT_PROD_CHOLESKY_64BIT /* FhG: Issue 1323, optimized 64 bit implementation of dot_product_cholesky() */ #define OPT_BASOP_ADD_v1 /* optimizations to avoid usage of BASOP_Util_Add_MantExp */ #define FIX_ISSUE_1327 /* Ittiam: Fix for issue 1327: Glitch when stereo is switching from TD to FD*/ #define NONBE_FIX_1402_WAVEADJUST /* VA: BASOP iisue 1402: fix waveform adjustment decoder PLC */ #define FIX_ISSUE_1376 /* VA: Fix for issue 1376 (issue with GSC excitation) */ +#define OPT_SBA_AVOID_SPAR_RESCALE /* Optimization made to spar decoder and IGF */ #endif diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index c4bc98162..65f0d2e69 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -9751,10 +9751,13 @@ void cldfbAnalysis_ivas_fx( ); void cldfbSynthesis_ivas_fx( - Word32 **realBuffer_fx, /* i : real values Qx*/ - Word32 **imagBuffer_fx, /* i : imag values Qx*/ - Word32 *timeOut_fx, /* o : output time domain samples Qx - 1*/ - const Word16 samplesToProcess, /* i : number of processed samples */ + Word32 **realBuffer_fx, /* i : real values Qx*/ + Word32 **imagBuffer_fx, /* i : imag values Qx*/ + Word32 *timeOut_fx, /* o : output time domain samples Qx - 1*/ + const Word16 samplesToProcess, /* i : number of processed samples */ +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + const Word16 shift, /* i : scale for state buffer */ +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ HANDLE_CLDFB_FILTER_BANK h_cldfb /* i : filter bank state */ ); diff --git a/lib_dec/acelp_core_dec_ivas_fx.c b/lib_dec/acelp_core_dec_ivas_fx.c index 8499d1335..2ca524bb0 100644 --- a/lib_dec/acelp_core_dec_ivas_fx.c +++ b/lib_dec/acelp_core_dec_ivas_fx.c @@ -1932,7 +1932,11 @@ ivas_error acelp_core_dec_ivas_fx( } } +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + cldfbSynthesis_ivas_fx( realBuffer_fx, imagBuffer_fx, save_hb_synth_fx, -1, 0, st->cldfbSynHB ); +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ cldfbSynthesis_ivas_fx( realBuffer_fx, imagBuffer_fx, save_hb_synth_fx, -1, st->cldfbSynHB ); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ Scale_sig32( save_hb_synth_fx, L_FRAME48k, negate( ( sub( Q_real, 1 ) ) ) ); // Q0 Scale_sig32( st->cldfbSynHB->cldfb_state_fx, st->cldfbSynHB->p_filter_length, sub( Q10, sub( Q_real, 1 ) ) ); // Q10 @@ -1952,7 +1956,11 @@ ivas_error acelp_core_dec_ivas_fx( Scale_sig32( st->cldfbSyn->cldfb_state_fx, st->cldfbSyn->p_filter_length, sub( sub( Q_real, 1 ), Q10 ) ); // Q_real-1 st->cldfbSynHB->Q_cldfb_state = sub( Q_real, 1 ); move16(); +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + cldfbSynthesis_ivas_fx( pRealSave_fx, pImagSave_fx, synth_fx, -1, 0, st->cldfbSyn ); +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ cldfbSynthesis_ivas_fx( pRealSave_fx, pImagSave_fx, synth_fx, -1, st->cldfbSyn ); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ Scale_sig32( synth_fx, L_FRAME48k, negate( sub( Q_real, 1 ) ) ); // Q0 Scale_sig32( st->cldfbSyn->cldfb_state_fx, st->cldfbSyn->p_filter_length, sub( Q10, sub( Q_real, 1 ) ) ); // Q10 st->cldfbSynHB->Q_cldfb_state = Q10; @@ -1991,7 +1999,7 @@ ivas_error acelp_core_dec_ivas_fx( #ifdef OPT_STEREO_32KBPS_V1 scale_sig32( st->cldfbSyn->cldfb_state_fx, st->cldfbSyn->p_filter_length, sub( Q_real, Q11 ) ); // Q10 - > (Q_real-1) #else /* OPT_STEREO_32KBPS_V1 */ - scale_sig32_r( st->cldfbSyn->cldfb_state_fx, st->cldfbSyn->p_filter_length, sub( sub( Q_real, 1 ), Q10 ) ); //(Q_real - 1) + scale_sig32_r( st->cldfbSyn->cldfb_state_fx, st->cldfbSyn->p_filter_length, sub( sub( Q_real, 1 ), Q10 ) ); //(Q_real - 1) #endif /* OPT_STEREO_32KBPS_V1 */ st->cldfbSyn->Q_cldfb_state = sub( Q_real, 1 ); move16(); @@ -1999,7 +2007,11 @@ ivas_error acelp_core_dec_ivas_fx( Scale_sig32( synth_fx, L_FRAME48k, Q_real - 1 ); #endif +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + cldfbSynthesis_ivas_fx( realBuffer_fx, imagBuffer_fx, synth_fx, -1, 0, st->cldfbSyn ); +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ cldfbSynthesis_ivas_fx( realBuffer_fx, imagBuffer_fx, synth_fx, -1, st->cldfbSyn ); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ #ifdef MSAN_FIX scale_sig32( synth_fx, output_frame, negate( sub( Q_real, 1 ) ) ); // Q0 #else @@ -2108,7 +2120,11 @@ ivas_error acelp_core_dec_ivas_fx( Scale_sig32( synth_fx, L_FRAME48k, Q_real - 1 ); #endif +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + cldfbSynthesis_ivas_fx( realBuffer_fx, imagBuffer_fx, synth_fx /*dummy*/, NS2SA_FX2( st->output_Fs, FRAME_SIZE_NS /*DELAY_CLDFB_NS*/ ), 0, st->cldfbSyn ); +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ cldfbSynthesis_ivas_fx( realBuffer_fx, imagBuffer_fx, synth_fx /*dummy*/, NS2SA_FX2( st->output_Fs, FRAME_SIZE_NS /*DELAY_CLDFB_NS*/ ), st->cldfbSyn ); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ #ifdef MSAN_FIX Scale_sig32( synth_fx, output_frame, negate( sub( Q_real, 1 ) ) ); // Q0 diff --git a/lib_dec/acelp_core_switch_dec_fx.c b/lib_dec/acelp_core_switch_dec_fx.c index 3217e2f2a..6a1e60b06 100644 --- a/lib_dec/acelp_core_switch_dec_fx.c +++ b/lib_dec/acelp_core_switch_dec_fx.c @@ -845,7 +845,11 @@ ivas_error acelp_core_switch_dec_bfi_ivas_fx( move16(); Copy_Scale_sig_16_32_DEPREC( synth_out, synth32, L_FRAME48k, 5 ); /*11-5-1*/ +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + cldfbSynthesis_ivas_fx( realBuffer, imagBuffer, synth32, extract_l( Mpy_32_16_1( st_fx->output_Fs, 328 ) ), 0, st_fx->cldfbSyn ); +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ cldfbSynthesis_ivas_fx( realBuffer, imagBuffer, synth32, extract_l( Mpy_32_16_1( st_fx->output_Fs, 328 ) ), st_fx->cldfbSyn ); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ Scale_sig32( st_fx->cldfbSyn->cldfb_state_fx, st_fx->cldfbSyn->cldfb_state_length, -1 ); // Q_cldfb_state-1 st_fx->cldfbSyn->Q_cldfb_state = sub( st_fx->cldfbSyn->Q_cldfb_state, 1 ); move16(); diff --git a/lib_dec/core_switching_dec_fx.c b/lib_dec/core_switching_dec_fx.c index b913dedb1..1f6b0dd51 100644 --- a/lib_dec/core_switching_dec_fx.c +++ b/lib_dec/core_switching_dec_fx.c @@ -2058,7 +2058,11 @@ static void core_switch_lb_upsamp_fx( } /* synthesis of the combined signal */ +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + cldfbSynthesis_ivas_fx( realBuffer_fx, imagBuffer_fx, output, i_mult( CLDFB_OVRLP_MIN_SLOTS, st->cldfbSyn->no_channels ), 0, st->cldfbSyn ); +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ cldfbSynthesis_ivas_fx( realBuffer_fx, imagBuffer_fx, output, i_mult( CLDFB_OVRLP_MIN_SLOTS, st->cldfbSyn->no_channels ), st->cldfbSyn ); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ /*rescaling whole buffer to a common Q*/ no_col = st->cldfbSyn->no_col; @@ -2375,7 +2379,11 @@ ivas_error core_switching_pre_dec_ivas_fx( return error; } +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + cldfbSynthesis_ivas_fx( realBuffer_fx, imagBuffer_fx, fer_samples_fx, delay_comp, 0, st->cldfbSyn ); +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ cldfbSynthesis_ivas_fx( realBuffer_fx, imagBuffer_fx, fer_samples_fx, delay_comp, st->cldfbSyn ); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ cldfb_restore_memory_ivas_fx( st->cldfbSyn ); Copy_Scale_sig_32_16( syn_Overl_fx, st->hTcxDec->syn_Overl, 320, 15 ); Copy_Scale_sig_32_16( fer_samples_fx, st->hHQ_core->fer_samples_fx, 960, 9 ); diff --git a/lib_dec/dec_tcx_fx.c b/lib_dec/dec_tcx_fx.c index 510370aee..8ff50f8a5 100644 --- a/lib_dec/dec_tcx_fx.c +++ b/lib_dec/dec_tcx_fx.c @@ -2682,7 +2682,11 @@ void IMDCT_ivas_fx( } move16(); +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + set16_fx( win_fx, 0, ( L_FRAME_PLUS + L_MDCT_OVLP_MAX ) >> 1 ); +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ set16_fx( win_fx, 0, shr( add( L_FRAME_PLUS, L_MDCT_OVLP_MAX ), 1 ) ); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ Word16 tcx_offset_tmp = add( tcx_offset, shr( L_ola, 1 ) ); set16_fx( xn_buf_fx, 0, tcx_offset_tmp ); /* zero left end of buffer */ @@ -2861,9 +2865,16 @@ void IMDCT_ivas_fx( q_tmp_fx_32 = q_xn_buf_fx_32; move16(); +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + Word16 diff = sub( q_tmp_fx_32, q_win ); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ FOR( Word16 ind = 0; ind < L_frame; ind++ ) { +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + old_out_fx_32[ind] = L_shl( old_out_fx[ind], diff ); +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ old_out_fx_32[ind] = L_shl( old_out_fx[ind], sub( q_tmp_fx_32, q_win ) ); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ move32(); } @@ -2871,8 +2882,13 @@ void IMDCT_ivas_fx( FOR( Word16 ind = 0; ind < L_frame; ind++ ) { +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + old_out_fx[ind] = extract_l( L_shr( old_out_fx_32[ind], diff ) ); + xn_buf_fx[ind] = extract_l( L_shr( xn_buf_fx_32[ind], diff ) ); +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ old_out_fx[ind] = (Word16) L_shr( old_out_fx_32[ind], sub( q_tmp_fx_32, q_win ) ); xn_buf_fx[ind] = (Word16) L_shr( xn_buf_fx_32[ind], sub( q_tmp_fx_32, q_win ) ); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ move16(); move16(); } @@ -2896,20 +2912,39 @@ void IMDCT_ivas_fx( q_tmp_fx_32 = sub( q_xn_buf_fx_32, res_e ); // v_multc_fixed( xn_buf_fx_32 + overlap / 2 + nz, (float) sqrt( (float) L_frame / NORM_MDCT_FACTOR ), tmp_fx_32, L_frame ); +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + Word16 q_diff = sub( q_xn_buf_fx_32, q_win ); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ FOR( Word16 ind = 0; ind < L_frame; ind++ ) { +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + xn_buf_fx[( ind + ( overlap / 2 ) ) + nz] = extract_l( L_shr( xn_buf_fx_32[( ind + ( overlap / 2 ) ) + nz], q_diff ) ); +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ xn_buf_fx[( ind + ( overlap / 2 ) ) + nz] = (Word16) L_shr( xn_buf_fx_32[( ind + ( overlap / 2 ) ) + nz], sub( q_xn_buf_fx_32, q_win ) ); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ move16(); } window_ola_fx( tmp_fx_32, xn_buf_fx, &q_tmp_fx_32, old_out_fx, &q_old_out, L_frame, hTcxCfg->tcx_last_overlap_mode, hTcxCfg->tcx_curr_overlap_mode, 0, 0, NULL ); +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + q_diff = sub( q_old_out, q_win ); + + Word16 diff = sub( q_tmp_fx_32, q_win ); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ FOR( Word16 ind = 0; ind < L_frame; ind++ ) { +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + old_out_fx[ind] = shr_sat( old_out_fx[ind], q_diff ); + move16(); + xn_buf_fx[ind] = shr_sat( xn_buf_fx[ind], diff ); + move16(); +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ old_out_fx[ind] = shr_sat( old_out_fx[ind], sub( q_old_out, q_win ) ); move16(); xn_buf_fx[ind] = shr_sat( xn_buf_fx[ind], sub( q_tmp_fx_32, q_win ) ); move16(); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ } } aldo = 1; diff --git a/lib_dec/igf_dec_fx.c b/lib_dec/igf_dec_fx.c index c1b0ce93a..5bfa7476a 100644 --- a/lib_dec/igf_dec_fx.c +++ b/lib_dec/igf_dec_fx.c @@ -2883,7 +2883,9 @@ static void IGF_getWhiteSpectralData_ivas( Word16 j; Word32 ak; Word16 ak_e; +#ifndef OPT_SBA_AVOID_SPAR_RESCALE Word16 tmp_16; +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ Word16 tmp_e; Word16 out_e_arr[IGF_START_MX + MAX_IGF_SFB_LEN]; Word16 max_out_e; @@ -2902,12 +2904,38 @@ static void IGF_getWhiteSpectralData_ivas( Word16 guard_bits = add( find_guarded_bits_fx( add( i_mult( 2, level ), 1 ) ), 1 ) / 2; s_l = sub( s_l, guard_bits ); +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + Word16 shift = sub( shl( s_l, 1 ), 32 ); + Word16 eff_e = sub( shl( sub( in_e, s_l ), 1 ), 15 ); + Word16 diff = add( 21, in_e ); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ + Word16 quo = BASOP_Util_Divide3216_Scale( ONE_IN_Q30, add( shl( level, 1 ), 1 ), &tmp_e ); tmp_e = add( tmp_e, 1 ); ak_e = add( tmp_e, sub( shl( sub( in_e, s_l ), 1 ), 15 ) ); // tmp_e + 2 * (in_e - s_l) - 15 +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + ak_e = sub( ak_e, 1 ); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ + FOR( i = start; i < stop - level; i++ ) { +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + Word64 temp = 0; + move64(); + FOR( j = i - level; j < i + level + 1; j++ ) + { + temp = W_mac_32_32( temp, in[j], in[j] ); + } + ak = Mult_32_16( W_shl_sat_l( temp, shift ), quo ); // add( shl( level, 1 ), 1 ), &tmp_e ) ); + + + n = sub( ak_e, norm_l( ak ) ); + n = shr( n, 1 ); + + out_e_arr[i] = sub( diff, n ); + move16(); +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ ak = 0; move32(); move32(); @@ -2924,11 +2952,29 @@ static void IGF_getWhiteSpectralData_ivas( out_e_arr[i] = add( sub( 21, n ), in_e ); move16(); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ max_out_e = s_max( max_out_e, out_e_arr[i] ); } FOR( ; i < stop; i++ ) { +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + Word64 temp = 0; + move64(); + + FOR( j = i - level; j < stop; j++ ) + { + temp = W_mac_32_32( temp, in[j], in[j] ); + } + + ak = L_deposit_h( BASOP_Util_Divide3216_Scale( W_shl_sat_l( temp, shift ), sub( stop, sub( i, level ) ), &tmp_e ) ); + ak_e = add( tmp_e, eff_e ); // tmp_e + 2 * (in_e - s_l) - 15 + n = sub( ak_e, add( norm_l( ak ), 1 ) ); + n = shr( n, 1 ); + + out_e_arr[i] = sub( diff, n ); + move16(); +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ ak = 0; move32(); @@ -2945,6 +2991,7 @@ static void IGF_getWhiteSpectralData_ivas( out_e_arr[i] = add( sub( 21, n ), in_e ); move16(); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ max_out_e = s_max( max_out_e, out_e_arr[i] ); } diff --git a/lib_dec/ivas_dirac_dec_fx.c b/lib_dec/ivas_dirac_dec_fx.c index 7c7d49381..ae4098f19 100644 --- a/lib_dec/ivas_dirac_dec_fx.c +++ b/lib_dec/ivas_dirac_dec_fx.c @@ -3747,7 +3747,11 @@ void ivas_dirac_dec_render_sf_fx( st_ivas->cldfbSynDec[ch]->Q_cldfb_state = ( Q6 - 1 ); move16(); +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + cldfbSynthesis_ivas_fx( RealBuffer_fx, ImagBuffer_fx, synth_fx, i_mult( hSpatParamRendCom->num_freq_bands, hSpatParamRendCom->subframe_nbslots[subframe_idx] ), 0, st_ivas->cldfbSynDec[ch] ); +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ cldfbSynthesis_ivas_fx( RealBuffer_fx, ImagBuffer_fx, synth_fx, i_mult( hSpatParamRendCom->num_freq_bands, hSpatParamRendCom->subframe_nbslots[subframe_idx] ), st_ivas->cldfbSynDec[ch] ); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ Word16 no_col = st_ivas->cldfbSynDec[ch]->no_col; move16(); @@ -3850,7 +3854,11 @@ void ivas_dirac_dec_render_sf_fx( ImagBuffer_fx[i] = Cldfb_ImagBuffer_fx[idx_in][i]; move32(); } +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + cldfbSynthesis_ivas_fx( RealBuffer_fx, ImagBuffer_fx, &( output_buf_fx[ch][subframe_start_sample] ), num_samples_subframe, 0, st_ivas->cldfbSynDec[idx_in] ); +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ cldfbSynthesis_ivas_fx( RealBuffer_fx, ImagBuffer_fx, &( output_buf_fx[ch][subframe_start_sample] ), num_samples_subframe, st_ivas->cldfbSynDec[idx_in] ); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ IF( !st_ivas->hLsSetupCustom->separate_ch_found ) { @@ -3890,7 +3898,11 @@ void ivas_dirac_dec_render_sf_fx( scale_sig32( st_ivas->cldfbSynDec[cldfbSynIdx]->cldfb_state_fx, st_ivas->cldfbSynDec[cldfbSynIdx]->p_filter_length, sub( ( Q6 - 1 ), st_ivas->cldfbSynDec[cldfbSynIdx]->Q_cldfb_state ) ); // Q6-1 st_ivas->cldfbSynDec[cldfbSynIdx]->Q_cldfb_state = ( Q6 - 1 ); move16(); +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + cldfbSynthesis_ivas_fx( RealBuffer_fx, ImagBuffer_fx, p_out, samplesToProcess, 0, st_ivas->cldfbSynDec[cldfbSynIdx] ); +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ cldfbSynthesis_ivas_fx( RealBuffer_fx, ImagBuffer_fx, p_out, samplesToProcess, st_ivas->cldfbSynDec[cldfbSynIdx] ); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ // Calculating length of output Word16 no_col = st_ivas->cldfbSynDec[cldfbSynIdx]->no_col; @@ -3959,7 +3971,11 @@ void ivas_dirac_dec_render_sf_fx( st_ivas->cldfbSynDec[idx_in]->Q_cldfb_state = ( Q6 - 1 ); move16(); +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + cldfbSynthesis_ivas_fx( RealBuffer_fx, ImagBuffer_fx, p_out, samplesToProcess, 0, st_ivas->cldfbSynDec[idx_in] ); +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ cldfbSynthesis_ivas_fx( RealBuffer_fx, ImagBuffer_fx, p_out, samplesToProcess, st_ivas->cldfbSynDec[idx_in] ); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ // Scaling output from Q6-1 to Q11 Scale_sig32( p_out, out_len, ( Q11 - ( Q6 - 1 ) ) ); diff --git a/lib_dec/ivas_ism_param_dec_fx.c b/lib_dec/ivas_ism_param_dec_fx.c index 24e61c5f3..c818af237 100644 --- a/lib_dec/ivas_ism_param_dec_fx.c +++ b/lib_dec/ivas_ism_param_dec_fx.c @@ -1535,7 +1535,11 @@ static void ivas_ism_param_dec_render_sf_fx( Scale_sig32( st_ivas->cldfbSynDec[ch]->cldfb_state_fx, st_ivas->cldfbSynDec[ch]->p_filter_length, sub( sub( Q_real, 1 ), Q11 ) ); // Q_real-1 st_ivas->cldfbSynDec[ch]->Q_cldfb_state = sub( Q_real, 1 ); move16(); +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + cldfbSynthesis_ivas_fx( RealBuffer_fx, ImagBuffer_fx, output_f_fx[ch], i_mult( hSpatParamRendCom->num_freq_bands, hSpatParamRendCom->subframe_nbslots[subframe_idx] ), 0, st_ivas->cldfbSynDec[ch] ); +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ cldfbSynthesis_ivas_fx( RealBuffer_fx, ImagBuffer_fx, output_f_fx[ch], i_mult( hSpatParamRendCom->num_freq_bands, hSpatParamRendCom->subframe_nbslots[subframe_idx] ), st_ivas->cldfbSynDec[ch] ); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ Scale_sig32( st_ivas->cldfbSynDec[ch]->cldfb_state_fx, st_ivas->cldfbSynDec[ch]->p_filter_length, sub( Q11, sub( Q_real, 1 ) ) ); // Q11 st_ivas->cldfbSynDec[ch]->Q_cldfb_state = Q11; move16(); diff --git a/lib_dec/ivas_jbm_dec_fx.c b/lib_dec/ivas_jbm_dec_fx.c index c5e3d451a..e7ee06648 100644 --- a/lib_dec/ivas_jbm_dec_fx.c +++ b/lib_dec/ivas_jbm_dec_fx.c @@ -2147,7 +2147,11 @@ ivas_error ivas_jbm_dec_render_fx( } ELSE { +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + IF( NE_32( ( error = ivas_sba_dec_render_fx( st_ivas, nSamplesAskedLocal, nSamplesRendered, nSamplesAvailableNext, p_output_fx ) ), IVAS_ERR_OK ) ) +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ IF( NE_32( ( error = ivas_sba_dec_render_fx( st_ivas, nSamplesAskedLocal, nSamplesRendered, nSamplesAvailableNext, p_output_fx, 960 ) ), IVAS_ERR_OK ) ) +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ { return error; } @@ -2195,7 +2199,11 @@ ivas_error ivas_jbm_dec_render_fx( hSpar->hMdDec->Q_mixer_mat = 30; move16(); +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + IF( NE_32( ( error = ivas_osba_dirac_td_binaural_jbm_fx( st_ivas, nSamplesAskedLocal, nSamplesRendered, nSamplesAvailableNext, p_output_fx ) ), IVAS_ERR_OK ) ) +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ IF( NE_32( ( error = ivas_osba_dirac_td_binaural_jbm_fx( st_ivas, nSamplesAskedLocal, nSamplesRendered, nSamplesAvailableNext, p_output_fx, 960 ) ), IVAS_ERR_OK ) ) +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ { return error; } @@ -2233,7 +2241,11 @@ ivas_error ivas_jbm_dec_render_fx( } ELSE IF( EQ_32( st_ivas->hDecoderConfig->output_config, IVAS_AUDIO_CONFIG_EXTERNAL ) ) /*EXT output = individual objects + HOA3*/ { +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + IF( NE_32( ( error = ivas_sba_dec_render_fx( st_ivas, nSamplesAskedLocal, nSamplesRendered, nSamplesAvailableNext, &p_output_fx[st_ivas->nchan_ism] ) ), IVAS_ERR_OK ) ) +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ IF( NE_32( ( error = ivas_sba_dec_render_fx( st_ivas, nSamplesAskedLocal, nSamplesRendered, nSamplesAvailableNext, &p_output_fx[st_ivas->nchan_ism], 960 ) ), IVAS_ERR_OK ) ) +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ { return error; } @@ -2245,7 +2257,11 @@ ivas_error ivas_jbm_dec_render_fx( } ELSE { +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + IF( NE_32( ( error = ivas_sba_dec_render_fx( st_ivas, nSamplesAskedLocal, nSamplesRendered, nSamplesAvailableNext, p_output_fx ) ), IVAS_ERR_OK ) ) +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ IF( NE_32( ( error = ivas_sba_dec_render_fx( st_ivas, nSamplesAskedLocal, nSamplesRendered, nSamplesAvailableNext, p_output_fx, 960 ) ), IVAS_ERR_OK ) ) +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ { return error; } @@ -2257,7 +2273,11 @@ ivas_error ivas_jbm_dec_render_fx( } ELSE { +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + IF( NE_32( ( error = ivas_sba_dec_render_fx( st_ivas, nSamplesAskedLocal, nSamplesRendered, nSamplesAvailableNext, p_output_fx ) ), IVAS_ERR_OK ) ) +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ IF( NE_32( ( error = ivas_sba_dec_render_fx( st_ivas, nSamplesAskedLocal, nSamplesRendered, nSamplesAvailableNext, p_output_fx, 960 ) ), IVAS_ERR_OK ) ) +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ { return error; } @@ -2859,7 +2879,11 @@ ivas_error ivas_jbm_dec_flush_renderer_fx( set16_fx( st_ivas->hSpatParamRendCom->render_to_md_map, last_dirac_md_idx, n_slots_still_available ); /* render the last subframe */ +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + IF( NE_32( ( error = ivas_osba_dirac_td_binaural_jbm_fx( st_ivas, (UWord16) hTcBuffer->n_samples_granularity, nSamplesRendered, &nSamplesAvailableNext, p_output_fx ) ), IVAS_ERR_OK ) ) +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ IF( NE_32( ( error = ivas_osba_dirac_td_binaural_jbm_fx( st_ivas, (UWord16) hTcBuffer->n_samples_granularity, nSamplesRendered, &nSamplesAvailableNext, p_output_fx, L_FRAME48k / MAX_PARAM_SPATIAL_SUBFRAMES ) ), IVAS_ERR_OK ) ) +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ { return error; } diff --git a/lib_dec/ivas_mc_param_dec_fx.c b/lib_dec/ivas_mc_param_dec_fx.c index 404854f91..db56ad03e 100644 --- a/lib_dec/ivas_mc_param_dec_fx.c +++ b/lib_dec/ivas_mc_param_dec_fx.c @@ -2240,8 +2240,13 @@ void ivas_param_mc_dec_render_fx( Word16 len = add( imult1616( slot_idx_start_cldfb_synth, hParamMC->num_freq_bands ), imult1616( hParamMC->num_freq_bands, hParamMC->subframe_nbslots[subframe_idx] ) ); scale_sig32( output_f_fx[ch], len, 5 - 11 ); +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + cldfbSynthesis_ivas_fx( RealBuffer_fx, ImagBuffer_fx, &( output_f_fx[ch][slot_idx_start_cldfb_synth * hParamMC->num_freq_bands] ), + imult1616( hParamMC->num_freq_bands, hParamMC->subframe_nbslots[subframe_idx] ), 0, st_ivas->cldfbSynDec[ch] ); +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ cldfbSynthesis_ivas_fx( RealBuffer_fx, ImagBuffer_fx, &( output_f_fx[ch][slot_idx_start_cldfb_synth * hParamMC->num_freq_bands] ), imult1616( hParamMC->num_freq_bands, hParamMC->subframe_nbslots[subframe_idx] ), st_ivas->cldfbSynDec[ch] ); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ scale_sig32( output_f_fx[ch], len, 11 - 5 ); // Q11 } diff --git a/lib_dec/ivas_mc_paramupmix_dec_fx.c b/lib_dec/ivas_mc_paramupmix_dec_fx.c index e70730926..f4d6cb535 100644 --- a/lib_dec/ivas_mc_paramupmix_dec_fx.c +++ b/lib_dec/ivas_mc_paramupmix_dec_fx.c @@ -874,8 +874,12 @@ static void ivas_mc_paramupmix_dec_sf( scale_sig32( st_ivas->cldfbSynDec[ch]->cldfb_state_fx, st_ivas->cldfbSynDec[ch]->cldfb_size, Q5 - Q11 ); // Q11 -> Q5 st_ivas->cldfbSynDec[ch]->Q_cldfb_state = Q5; move16(); +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + cldfbSynthesis_ivas_fx( RealBuffer_fx, ImagBuffer_fx, &( output_fx[ch][0] ), imult1616( maxBand, st_ivas->hTcBuffer->subframe_nbslots[subframeIdx] ), 0, st_ivas->cldfbSynDec[ch] ); // output_fx returned in Q5 +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ cldfbSynthesis_ivas_fx( RealBuffer_fx, ImagBuffer_fx, &( output_fx[ch][0] ), imult1616( maxBand, st_ivas->hTcBuffer->subframe_nbslots[subframeIdx] ), st_ivas->cldfbSynDec[ch] ); // output_fx returned in Q5 - scale_sig32( st_ivas->cldfbSynDec[ch]->cldfb_state_fx, st_ivas->cldfbSynDec[ch]->cldfb_size, Q11 - Q5 ); // Q5 -> Q11 +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ + scale_sig32( st_ivas->cldfbSynDec[ch]->cldfb_state_fx, st_ivas->cldfbSynDec[ch]->cldfb_size, Q11 - Q5 ); // Q5 -> Q11 st_ivas->cldfbSynDec[ch]->Q_cldfb_state = Q11; move16(); } @@ -909,8 +913,13 @@ static void ivas_mc_paramupmix_dec_sf( ptr_re_fx[0] = Cldfb_RealBuffer_fx[ch][slot_idx]; // Q6 ptr_im_fx[0] = Cldfb_ImagBuffer_fx[ch][slot_idx]; // Q6 +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + cldfbSynthesis_ivas_fx( ptr_re_fx, ptr_im_fx, &( pPcm_temp_fx[ch][L_mult0( hMCParamUpmix->num_freq_bands, slot_idx )] ), + hMCParamUpmix->num_freq_bands, 0, st_ivas->cldfbSynDec[ch] ); +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ cldfbSynthesis_ivas_fx( ptr_re_fx, ptr_im_fx, &( pPcm_temp_fx[ch][L_mult0( hMCParamUpmix->num_freq_bands, slot_idx )] ), hMCParamUpmix->num_freq_bands, st_ivas->cldfbSynDec[ch] ); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ } scale_sig32( st_ivas->cldfbSynDec[ch]->cldfb_state_fx, st_ivas->cldfbSynDec[ch]->cldfb_size, sub( Q11, st_ivas->cldfbSynDec[ch]->Q_cldfb_state ) ); // Q6 -> Q11 st_ivas->cldfbSynDec[ch]->Q_cldfb_state = Q11; diff --git a/lib_dec/ivas_osba_dec_fx.c b/lib_dec/ivas_osba_dec_fx.c index b048470d4..493301743 100644 --- a/lib_dec/ivas_osba_dec_fx.c +++ b/lib_dec/ivas_osba_dec_fx.c @@ -129,8 +129,11 @@ ivas_error ivas_osba_dirac_td_binaural_jbm_fx( const UWord16 nSamplesAsked, /* i : number of CLDFB slots requested */ UWord16 *nSamplesRendered, /* o : number of CLDFB slots rendered */ UWord16 *nSamplesAvailable, /* o : number of CLDFB slots still to render */ - Word32 *output_fx[], /* o : rendered time signal Q11*/ - Word16 out_len /*Store the length of values in each channel*/ + Word32 *output_fx[] /* o : rendered time signal Q11*/ +#ifndef OPT_SBA_AVOID_SPAR_RESCALE + , + Word16 out_len /*Store the length of values in each channel*/ +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ ) { Word16 n; @@ -147,7 +150,11 @@ ivas_error ivas_osba_dirac_td_binaural_jbm_fx( channel_offset = st_ivas->nchan_ism; move16(); +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + IF( NE_32( ( error = ivas_sba_dec_render_fx( st_ivas, nSamplesAsked, nSamplesRendered, nSamplesAvailable, &output_fx[channel_offset] ) ), IVAS_ERR_OK ) ) +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ IF( NE_32( ( error = ivas_sba_dec_render_fx( st_ivas, nSamplesAsked, nSamplesRendered, nSamplesAvailable, &output_fx[channel_offset], out_len ) ), IVAS_ERR_OK ) ) +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ { return error; } @@ -233,7 +240,11 @@ ivas_error ivas_osba_render_sf_fx( v_shr( p_output[n], Q11 - Q11, output_ism[n], nSamplesAsked ); // Q11 } +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + IF( NE_32( ( error = ivas_sba_dec_render_fx( st_ivas, nSamplesAsked, nSamplesRendered, nSamplesAvailableNext, p_output ) ), IVAS_ERR_OK ) ) +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ IF( NE_32( ( error = ivas_sba_dec_render_fx( st_ivas, nSamplesAsked, nSamplesRendered, nSamplesAvailableNext, p_output, 960 ) ), IVAS_ERR_OK ) ) +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ { return error; } diff --git a/lib_dec/ivas_sba_dec_fx.c b/lib_dec/ivas_sba_dec_fx.c index eb2a40ecb..f89e9601f 100644 --- a/lib_dec/ivas_sba_dec_fx.c +++ b/lib_dec/ivas_sba_dec_fx.c @@ -873,8 +873,11 @@ ivas_error ivas_sba_dec_render_fx( const UWord16 nSamplesAsked, /* i : number of CLDFB slots requested Q0*/ UWord16 *nSamplesRendered, /* o : number of CLDFB slots rendered Q0*/ UWord16 *nSamplesAvailableNext, /* o : number of CLDFB slots still to render Q0*/ - Word32 *output_fx[], /* o : rendered time signal Q11*/ - Word16 out_len /*Store the length of values in each channel*/ + Word32 *output_fx[] /* o : rendered time signal Q11*/ +#ifndef OPT_SBA_AVOID_SPAR_RESCALE + , + Word16 out_len /*Store the length of values in each channel*/ +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ ) { Word16 slots_to_render, first_sf, last_sf, subframe_idx; @@ -883,11 +886,15 @@ ivas_error ivas_sba_dec_render_fx( SPAR_DEC_HANDLE hSpar; SPAT_PARAM_REND_COMMON_DATA_HANDLE hSpatParamRendCom; Word32 *output_f_local_fx[MAX_OUTPUT_CHANNELS]; +#ifndef OPT_SBA_AVOID_SPAR_RESCALE Word16 output_f_local_len; +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ ivas_error error; +#ifndef OPT_SBA_AVOID_SPAR_RESCALE output_f_local_len = out_len; move16(); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ hSpar = st_ivas->hSpar; hSpatParamRendCom = st_ivas->hSpatParamRendCom; nchan_internal = ivas_sba_get_nchan_metadata_fx( st_ivas->sba_analysis_order, st_ivas->hDecoderConfig->ivas_total_brate ); @@ -919,13 +926,18 @@ ivas_error ivas_sba_dec_render_fx( { Word16 n_samples_sf = imult1616( slot_size, hSpar->subframe_nbslots[subframe_idx] ); /*Q0*/ +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + ivas_spar_dec_upmixer_sf_fx( st_ivas, output_f_local_fx, nchan_internal ); +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ ivas_spar_dec_upmixer_sf_fx( st_ivas, output_f_local_fx, nchan_internal, output_f_local_len ); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ FOR( ch = 0; ch < nchan_out; ch++ ) { output_f_local_fx[ch] = output_f_local_fx[ch] + n_samples_sf; /*Q11*/ } - +#ifndef OPT_SBA_AVOID_SPAR_RESCALE output_f_local_len = sub( output_f_local_len, n_samples_sf ); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ /* update combined orientation access index */ ivas_combined_orientation_update_index( st_ivas->hCombinedOrientationData, n_samples_sf ); } diff --git a/lib_dec/ivas_spar_decoder_fx.c b/lib_dec/ivas_spar_decoder_fx.c index c6c7565a9..461b18fbc 100644 --- a/lib_dec/ivas_spar_decoder_fx.c +++ b/lib_dec/ivas_spar_decoder_fx.c @@ -787,8 +787,11 @@ void ivas_spar_get_cldfb_gains_fx( cldfbAnalysis_ts_fx_fixed_q( ts_inout_fx, ts_re_fx, ts_im_fx, num_cldfb_bands, cldfbAnaDec0, &q_cldfb ); cldfb_reset_memory_fx( cldfbSynDec0 ); +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + cldfbSynthesis_ivas_fx( pp_ts_re_fx, pp_ts_im_fx, ts_inout_fx, num_cldfb_bands, 0, cldfbSynDec0 ); +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ cldfbSynthesis_ivas_fx( pp_ts_re_fx, pp_ts_im_fx, ts_inout_fx, num_cldfb_bands, cldfbSynDec0 ); - +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ FOR( sample = 0; sample < stride; sample++ ) { T_fx[( ( slot * stride ) + sample )][slot] = ts_inout_fx[sample]; /*Q21*/ @@ -1683,10 +1686,14 @@ void ivas_spar_dec_digest_tc_fx( *-------------------------------------------------------------------*/ void ivas_spar_dec_upmixer_sf_fx( - Decoder_Struct *st_ivas, /* i/o: IVAS decoder handle */ - Word32 *output_fx[], /* o : output audio channels Q11*/ - const Word16 nchan_internal, /* i : number of internal channels Q0*/ - Word16 out_len ) + Decoder_Struct *st_ivas, /* i/o: IVAS decoder handle */ + Word32 *output_fx[], /* o : output audio channels Q11*/ + const Word16 nchan_internal /* i : number of internal channels Q0*/ +#ifndef OPT_SBA_AVOID_SPAR_RESCALE + , + Word16 out_len +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ +) { Word16 cldfb_band, num_cldfb_bands, numch_in, numch_out; Word32 *cldfb_in_ts_re_fx[MAX_OUTPUT_CHANNELS + MAX_NUM_OBJECTS][CLDFB_NO_COL_MAX]; @@ -1882,11 +1889,18 @@ void ivas_spar_dec_upmixer_sf_fx( { FOR( spar_band = 0; spar_band < num_spar_bands; spar_band++ ) { +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + Word16 diff = sub( 32767, hSpar->hMdDec->smooth_fac_fx[spar_band] ); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ FOR( out_ch = 0; out_ch < numch_out; out_ch++ ) { FOR( in_ch = 0; in_ch < numch_in; in_ch++ ) { +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + mixer_mat_fx[out_ch][in_ch][spar_band] = Madd_32_16( Mpy_32_16_1( mixer_mat_fx[out_ch][in_ch][spar_band], diff ), hSpar->hMdDec->mixer_mat_prev2_fx[out_ch][in_ch][spar_band], hSpar->hMdDec->smooth_fac_fx[spar_band] ); /*q1*/ +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ mixer_mat_fx[out_ch][in_ch][spar_band] = Madd_32_16( Mpy_32_16_1( mixer_mat_fx[out_ch][in_ch][spar_band], sub( 32767, hSpar->hMdDec->smooth_fac_fx[spar_band] ) ), hSpar->hMdDec->mixer_mat_prev2_fx[out_ch][in_ch][spar_band], hSpar->hMdDec->smooth_fac_fx[spar_band] ); /*q1*/ +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ move32(); hSpar->hMdDec->mixer_mat_prev2_fx[out_ch][in_ch][spar_band] = mixer_mat_fx[out_ch][in_ch][spar_band]; /*q1*/ move32(); @@ -2066,10 +2080,12 @@ void ivas_spar_dec_upmixer_sf_fx( } IF( LT_16( split_band, IVAS_MAX_NUM_BANDS ) ) { +#ifndef OPT_SBA_AVOID_SPAR_RESCALE Copy32( hSpar->hMdDec->mixer_mat_prev_fx[1][0][0], hSpar->hMdDec->mixer_mat_prev_fx[0][0][0], IVAS_MAX_FB_MIXER_OUT_CH * IVAS_MAX_SPAR_FB_MIXER_IN_CH * IVAS_MAX_NUM_BANDS ); /*hSpar->hMdDec->Q_mixer_mat*/ Copy32( hSpar->hMdDec->mixer_mat_prev_fx[2][0][0], hSpar->hMdDec->mixer_mat_prev_fx[1][0][0], IVAS_MAX_FB_MIXER_OUT_CH * IVAS_MAX_SPAR_FB_MIXER_IN_CH * IVAS_MAX_NUM_BANDS ); /*hSpar->hMdDec->Q_mixer_mat*/ Copy32( hSpar->hMdDec->mixer_mat_prev_fx[3][0][0], hSpar->hMdDec->mixer_mat_prev_fx[2][0][0], IVAS_MAX_FB_MIXER_OUT_CH * IVAS_MAX_SPAR_FB_MIXER_IN_CH * IVAS_MAX_NUM_BANDS ); /*hSpar->hMdDec->Q_mixer_mat*/ Copy32( hSpar->hMdDec->mixer_mat_prev_fx[4][0][0], hSpar->hMdDec->mixer_mat_prev_fx[3][0][0], IVAS_MAX_FB_MIXER_OUT_CH * IVAS_MAX_SPAR_FB_MIXER_IN_CH * IVAS_MAX_NUM_BANDS ); /*hSpar->hMdDec->Q_mixer_mat*/ +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ FOR( out_ch = 0; out_ch < numch_out; out_ch++ ) { @@ -2077,6 +2093,17 @@ void ivas_spar_dec_upmixer_sf_fx( { FOR( b = 0; b < num_spar_bands; b++ ) { +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + hSpar->hMdDec->mixer_mat_prev_fx[0][out_ch][in_ch][b] = hSpar->hMdDec->mixer_mat_prev_fx[1][out_ch][in_ch][b]; + hSpar->hMdDec->mixer_mat_prev_fx[1][out_ch][in_ch][b] = hSpar->hMdDec->mixer_mat_prev_fx[2][out_ch][in_ch][b]; + hSpar->hMdDec->mixer_mat_prev_fx[2][out_ch][in_ch][b] = hSpar->hMdDec->mixer_mat_prev_fx[3][out_ch][in_ch][b]; + hSpar->hMdDec->mixer_mat_prev_fx[3][out_ch][in_ch][b] = hSpar->hMdDec->mixer_mat_prev_fx[4][out_ch][in_ch][b]; + move32(); + move32(); + move32(); + move32(); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ + hSpar->hMdDec->mixer_mat_prev_fx[4][out_ch][in_ch][b] = hSpar->hMdDec->mixer_mat_fx[out_ch][in_ch][( b + ( md_sf * IVAS_MAX_NUM_BANDS ) )]; /*hSpar->hMdDec->Q_mixer_mat*/ move32(); } @@ -2129,18 +2156,26 @@ void ivas_spar_dec_upmixer_sf_fx( IF( ( EQ_32( hDecoderConfig->output_config, IVAS_AUDIO_CONFIG_FOA ) || !( EQ_32( st_ivas->hOutSetup.output_config, IVAS_AUDIO_CONFIG_BINAURAL ) || EQ_32( st_ivas->hOutSetup.output_config, IVAS_AUDIO_CONFIG_BINAURAL_ROOM_IR ) || EQ_32( st_ivas->hOutSetup.output_config, IVAS_AUDIO_CONFIG_BINAURAL_ROOM_REVERB ) ) ) && !( EQ_32( st_ivas->ivas_format, SBA_ISM_FORMAT ) && EQ_32( st_ivas->ism_mode, ISM_SBA_MODE_DISC ) && EQ_32( st_ivas->renderer_type, RENDERER_BINAURAL_FASTCONV_ROOM ) ) ) { +#ifndef OPT_SBA_AVOID_SPAR_RESCALE Scale_sig32( st_ivas->cldfbSynDec[idx_in]->cldfb_state_fx, st_ivas->cldfbSynDec[idx_in]->p_filter_length, -6 ); /*st_ivas->cldfbSynDec[idx_in]->Q_cldfb_state-6*/ st_ivas->cldfbSynDec[idx_in]->Q_cldfb_state = sub( st_ivas->cldfbSynDec[idx_in]->Q_cldfb_state, 6 ); move16(); Scale_sig32( output_fx[ch], out_len, -6 ); /*Q5*/ +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ FOR( ts = 0; ts < hSpar->subframe_nbslots[hSpar->subframes_rendered]; ts++ ) { +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + cldfbSynthesis_ivas_fx( &cldfb_in_ts_re_fx[idx_in][ts], &cldfb_in_ts_im_fx[idx_in][ts], &output_fx[ch][i_mult( ts, num_cldfb_bands )], num_cldfb_bands, 6, st_ivas->cldfbSynDec[idx_in] ); +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ cldfbSynthesis_ivas_fx( &cldfb_in_ts_re_fx[idx_in][ts], &cldfb_in_ts_im_fx[idx_in][ts], &output_fx[ch][i_mult( ts, num_cldfb_bands )], num_cldfb_bands, st_ivas->cldfbSynDec[idx_in] ); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ } +#ifndef OPT_SBA_AVOID_SPAR_RESCALE Scale_sig32( output_fx[ch], out_len, 6 ); /*Q11*/ Scale_sig32( st_ivas->cldfbSynDec[idx_in]->cldfb_state_fx, st_ivas->cldfbSynDec[idx_in]->p_filter_length, 6 ); /*st_ivas->cldfbSynDec[idx_in]->Q_cldfb_state+6*/ st_ivas->cldfbSynDec[idx_in]->Q_cldfb_state = add( st_ivas->cldfbSynDec[idx_in]->Q_cldfb_state, 6 ); move16(); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ } idx_in = add( idx_in, 1 ); @@ -2152,18 +2187,26 @@ void ivas_spar_dec_upmixer_sf_fx( /* CLDFB to time synthesis (overwrite mixer output) */ FOR( out_ch = 0; out_ch < numch_out_dirac; out_ch++ ) { +#ifndef OPT_SBA_AVOID_SPAR_RESCALE Scale_sig32( st_ivas->cldfbSynDec[out_ch]->cldfb_state_fx, st_ivas->cldfbSynDec[out_ch]->p_filter_length, -6 ); /*st_ivas->cldfbSynDec[out_ch]->Q_cldfb_state-6*/ st_ivas->cldfbSynDec[out_ch]->Q_cldfb_state = sub( st_ivas->cldfbSynDec[out_ch]->Q_cldfb_state, 6 ); move16(); Scale_sig32( output_fx[out_ch], out_len, -6 ); /*Q5*/ +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ FOR( ts = 0; ts < hSpar->subframe_nbslots[hSpar->subframes_rendered]; ts++ ) { +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + cldfbSynthesis_ivas_fx( &cldfb_in_ts_re_fx[out_ch][ts], &cldfb_in_ts_im_fx[out_ch][ts], &output_fx[out_ch][i_mult( ts, num_cldfb_bands )], num_cldfb_bands, 6, st_ivas->cldfbSynDec[out_ch] ); +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ cldfbSynthesis_ivas_fx( &cldfb_in_ts_re_fx[out_ch][ts], &cldfb_in_ts_im_fx[out_ch][ts], &output_fx[out_ch][i_mult( ts, num_cldfb_bands )], num_cldfb_bands, st_ivas->cldfbSynDec[out_ch] ); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ } +#ifndef OPT_SBA_AVOID_SPAR_RESCALE Scale_sig32( output_fx[out_ch], out_len, 6 ); /*Q11*/ Scale_sig32( st_ivas->cldfbSynDec[out_ch]->cldfb_state_fx, st_ivas->cldfbSynDec[out_ch]->p_filter_length, 6 ); /*st_ivas->cldfbSynDec[out_ch]->Q_cldfb_state+6*/ st_ivas->cldfbSynDec[out_ch]->Q_cldfb_state = add( st_ivas->cldfbSynDec[out_ch]->Q_cldfb_state, 6 ); move16(); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ } } diff --git a/lib_enc/swb_pre_proc_fx.c b/lib_enc/swb_pre_proc_fx.c index acdd971fd..b73b0f992 100644 --- a/lib_enc/swb_pre_proc_fx.c +++ b/lib_enc/swb_pre_proc_fx.c @@ -1263,7 +1263,11 @@ void swb_pre_proc_ivas_fx( thr = icbwe_thr_TDM_fx; regV = icbwe_regressionValuesTDM_fx; +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + cldfbSynthesis_ivas_fx( realBufferFlipped, imagBufferFlipped, shb_speech_fx_32, -1, 0, st->cldfbSynTd ); +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ cldfbSynthesis_ivas_fx( realBufferFlipped, imagBufferFlipped, shb_speech_fx_32, -1, st->cldfbSynTd ); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ Copy_Scale_sig_32_16( shb_speech_fx_32, shb_speech, L_FRAME16k, negate( sub( q_reImBuffer, 1 ) ) ); *Q_shb_spch = 0; move16(); diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 65a68889a..b5dd1f8b9 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -2600,7 +2600,11 @@ static void ivas_dirac_dec_binaural_process_output_fx( outSlotRePr_fx = &( outSlotRe_fx[0] ); outSlotImPr_fx = &( outSlotIm_fx[0] ); +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + cldfbSynthesis_ivas_fx( &outSlotRePr_fx, &outSlotImPr_fx, &( output_fx[chA][nBins * slot + offsetSamples] ), nBins, 0, cldfbSynDec[chA] ); +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ cldfbSynthesis_ivas_fx( &outSlotRePr_fx, &outSlotImPr_fx, &( output_fx[chA][nBins * slot + offsetSamples] ), nBins, cldfbSynDec[chA] ); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ cldfbSynDec[chA]->Q_cldfb_state = sub( q_result, 1 ); move16(); } diff --git a/lib_rend/ivas_dirac_output_synthesis_dec_fx.c b/lib_rend/ivas_dirac_output_synthesis_dec_fx.c index aa19285cf..1b4ee91b2 100644 --- a/lib_rend/ivas_dirac_output_synthesis_dec_fx.c +++ b/lib_rend/ivas_dirac_output_synthesis_dec_fx.c @@ -1333,22 +1333,22 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx( sub( q_com, h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev ) ); /*h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev->q_com*/ } - /*Directional gain*/ - FOR( ch_idx = nchan_transport_foa; ch_idx < num_channels_dir; ch_idx++ ) + FOR( l = 0; l < num_freq_bands; l++ ) { - FOR( l = 0; l < num_freq_bands; l++ ) - { - aux_buf[l] = L_sub( ONE_IN_Q30, diffuseness[l] ); // Q30 - move32(); - ratio_float[l] = L_sub( ONE_IN_Q31, h_dirac_output_synthesis_state.direct_power_factor_fx[num_freq_bands + l] ); // Q31 - move32(); - ratio_float[l + num_freq_bands] = L_sub( ONE_IN_Q31, ratio_float[l] ); // Q31 - move32(); - } + aux_buf[l] = L_sub( ONE_IN_Q30, diffuseness[l] ); // Q30 + move32(); + ratio_float[l] = L_sub( ONE_IN_Q31, h_dirac_output_synthesis_state.direct_power_factor_fx[num_freq_bands + l] ); // Q31 + move32(); + ratio_float[l + num_freq_bands] = L_sub( ONE_IN_Q31, ratio_float[l] ); // Q31 + move32(); + } - v_mult_fixed( aux_buf, ratio_float, ratio_float, num_freq_bands ); //(Q30, Q31) -> Q30 - v_mult_fixed( aux_buf, &ratio_float[num_freq_bands], &ratio_float[num_freq_bands], num_freq_bands ); //(Q30, Q31) -> Q30 + v_mult_fixed( aux_buf, ratio_float, ratio_float, num_freq_bands ); //(Q30, Q31) -> Q30 + v_mult_fixed( aux_buf, &ratio_float[num_freq_bands], &ratio_float[num_freq_bands], num_freq_bands ); //(Q30, Q31) -> Q30 + /*Directional gain*/ + FOR( ch_idx = nchan_transport_foa; ch_idx < num_channels_dir; ch_idx++ ) + { v_mult_fixed( ratio_float, // Q30 &h_dirac_output_synthesis_state.direct_responses_fx[ch_idx * num_freq_bands], // Q31 &h_dirac_output_synthesis_state.cy_cross_dir_smooth_fx[ch_idx * num_freq_bands], //(Q30, Q31) -> Q30 @@ -1642,6 +1642,7 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx( } /*Directional stream*/ + Word16 offset = shl( i_mult( buf_idx, i_mult( num_freq_bands, num_protos_dir ) ), Q1 ); FOR( ch_idx = nchan_transport_foa; ch_idx < num_channels_dir; ch_idx++ ) { IF( hodirac_flag ) @@ -1709,7 +1710,7 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx( ELSE { p_proto = h_dirac_output_synthesis_state.proto_direct_buffer_f_fx + - shl( i_mult( buf_idx, i_mult( num_freq_bands, num_protos_dir ) ), Q1 ) + + offset + shl( i_mult( proto_direct_index[ch_idx], num_freq_bands ), Q1 ); IF( EQ_16( proto_direct_index[ch_idx], 0 ) ) { @@ -1781,11 +1782,8 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx( } ELSE { - FOR( l = 0; l < num_freq_bands_diff; l++ ) - { - p_gains_diff++; - p_gains_diff_prev++; - } + p_gains_diff += num_freq_bands_diff; + p_gains_diff_prev += num_freq_bands_diff; } } diff --git a/lib_rend/ivas_dirac_rend_fx.c b/lib_rend/ivas_dirac_rend_fx.c index 031a39855..e2e369389 100644 --- a/lib_rend/ivas_dirac_rend_fx.c +++ b/lib_rend/ivas_dirac_rend_fx.c @@ -4212,7 +4212,11 @@ static void ivas_masa_ext_dirac_render_sf_fx( ImagBuffer_fx[i] = Cldfb_ImagBuffer_fx[idx_in][i]; // q_cldfb } Word16 out_size = imult1616( hSpatParamRendCom->num_freq_bands, hSpatParamRendCom->subframe_nbslots[subframe_idx] ); +#ifdef OPT_SBA_AVOID_SPAR_RESCALE + cldfbSynthesis_ivas_fx( RealBuffer_fx, ImagBuffer_fx, &( output_f_fx[ch][index_slot * hSpatParamRendCom->num_freq_bands] ), out_size, 0, hMasaExtRend->cldfbSynRend[idx_in] ); +#else /* OPT_SBA_AVOID_SPAR_RESCALE */ cldfbSynthesis_ivas_fx( RealBuffer_fx, ImagBuffer_fx, &( output_f_fx[ch][index_slot * hSpatParamRendCom->num_freq_bands] ), out_size, hMasaExtRend->cldfbSynRend[idx_in] ); +#endif /* OPT_SBA_AVOID_SPAR_RESCALE */ scale_sig32( &( output_f_fx[ch][index_slot * hSpatParamRendCom->num_freq_bands] ), out_size, sub( 11, q_out ) ); // q11 idx_in++; } -- GitLab From 79149552d20c26f57d5dbfd4d0b64e32f9f0ec96 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Wed, 19 Mar 2025 09:41:47 +0530 Subject: [PATCH 2/2] Disabling macro for another optimization --- lib_com/options.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_com/options.h b/lib_com/options.h index 02f7d18f3..c9207fb3c 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -168,7 +168,7 @@ #define FIX_1301_CORRECT_TD_CNST /* VA: Fix 1301, correct wrong constant in TD stereo */ #define NONBE_FIX_1277_EVS_DTX_HIGH_RATE_THRESHOLD /* VA/Eri: FLP issue 1277: Fix Mismatch in DTX high-rate threshold between EVS float and BASOP */ #define NONBE_FIX_708_OSBA_BR_SWITCHING_CRASH /* FhG: issue 708: fix crash in OSBA BR switching with long test vectors */ -#define OPT_STEREO_32KBPS_V1 /* Optimization made in stereo decoding path for 32kbps decoding */ +//#define OPT_STEREO_32KBPS_V1 /* Optimization made in stereo decoding path for 32kbps decoding */ #define DOT_PROD_CHOLESKY_64BIT /* FhG: Issue 1323, optimized 64 bit implementation of dot_product_cholesky() */ #define OPT_BASOP_ADD_v1 /* optimizations to avoid usage of BASOP_Util_Add_MantExp */ #define FIX_ISSUE_1327 /* Ittiam: Fix for issue 1327: Glitch when stereo is switching from TD to FD*/ -- GitLab