From 96b69bd9e04b8369e54303ca1dc5994f2ae62cb6 Mon Sep 17 00:00:00 2001 From: Markus Multrus Date: Fri, 10 Apr 2026 16:36:09 +0200 Subject: [PATCH 01/10] BASOP issue 2521: Fix wrong porting for determination of first subframe length in ivas_sba_dirac_stereo_compute_td_stefi_nrgs() --- lib_com/options.h | 1 + lib_dec/ivas_sba_dirac_stereo_dec_fx.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/lib_com/options.h b/lib_com/options.h index 8d06feb49..86fdd924e 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -129,6 +129,7 @@ #define FIX_BASOP_2513_EXTRA_RETURN_REND_OPEN /* Nokia: BASOP issue 2513: Removes extra return block */ #define FIX_BASOP_2514_EFAP_PORTING_ERROR /* Nokia: BASOP issue 2514: Fix wrongly ported line */ #define FIX_BASOP_2516_REND_CUSTOM_LAYOUT_PORT_BUG /* Nokia: BASOP issue 2516: Fix porting bug in setting planar state for custom layout in renderer */ +#define FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS /* FhG: BASOP issue 2521: Fix wrong porting for determination of first subframe length in ivas_sba_dirac_stereo_compute_td_stefi_nrgs() */ /* ##################### End NON-BE switches ########################### */ diff --git a/lib_dec/ivas_sba_dirac_stereo_dec_fx.c b/lib_dec/ivas_sba_dirac_stereo_dec_fx.c index 2f7db14e8..1ae8fa501 100644 --- a/lib_dec/ivas_sba_dirac_stereo_dec_fx.c +++ b/lib_dec/ivas_sba_dirac_stereo_dec_fx.c @@ -693,7 +693,11 @@ static void ivas_sba_dirac_stereo_compute_td_stefi_nrgs( test(); IF( ( EQ_16( core, ACELP_CORE ) && !fd_cng_flag ) || EQ_16( hStereoDft->core_hist[1], ACELP_CORE ) ) { +#ifdef FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS + FOR( i = 0; i < shr( output_frame, 1 ); i++ ) +#else FOR( i = 0; i < shr( output_frame, 2 ); i++ ) +#endif { hb_nrg2 = Madd_32_32( hb_nrg2, hb_synth[i], hb_synth[i] ); /*2*q_hb_synth-31*/ } -- GitLab From a4d343e1ebed02e370fe7d483015c1bca75f354b Mon Sep 17 00:00:00 2001 From: Markus Multrus Date: Fri, 10 Apr 2026 21:47:35 +0200 Subject: [PATCH 02/10] fix calculation of hStereoDft->hb_nrg_subr_fx[] --- lib_dec/ivas_sba_dirac_stereo_dec_fx.c | 60 +++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/lib_dec/ivas_sba_dirac_stereo_dec_fx.c b/lib_dec/ivas_sba_dirac_stereo_dec_fx.c index 1ae8fa501..0a066f160 100644 --- a/lib_dec/ivas_sba_dirac_stereo_dec_fx.c +++ b/lib_dec/ivas_sba_dirac_stereo_dec_fx.c @@ -684,20 +684,67 @@ static void ivas_sba_dirac_stereo_compute_td_stefi_nrgs( const Word16 q_hb_synth ) { Word16 i; +#ifdef FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS + Word32 hb_nrg; + Word32 max_val; + Word16 shift; +#else Word32 hb_nrg = EPSILON_FIX; move32(); Word32 hb_nrg2 = EPSILON_FIX; move32(); +#endif + +#ifdef FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS + maximum_abs_32_fx( hb_synth, output_frame, &max_val ); + shift = norm_l( max_val ); + if ( max_val == 0 ) + { + shift = 31; + move16(); + } + shift = sub( shift, shr( add( find_guarded_bits_fx( shr( output_frame, 1 ) ), 1 ), 1 ) ); +#endif test(); test(); IF( ( EQ_16( core, ACELP_CORE ) && !fd_cng_flag ) || EQ_16( hStereoDft->core_hist[1], ACELP_CORE ) ) { #ifdef FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS + Word32 L_tmp; + Word16 tmp; + Word64 W_tmp; + + tmp = shl_sat( 1, shift ); + + W_tmp = EPSILON_FIX; + move64(); FOR( i = 0; i < shr( output_frame, 1 ); i++ ) + { + // needed to be adjusted for q + L_tmp = Mpy_32_16_1( hb_synth[i], tmp ); /* q_hb_synth + shift - 15 */ + W_tmp = W_mac_32_32( W_tmp, L_tmp, L_tmp ); /* 2 * (q_hb_synth + shift) - 29 */ + } + + hStereoDft->hb_nrg_subr_fx[0] = W_round48_L( W_tmp ); /* 2 * (q_hb_synth + shift) - 45 */ + move32(); + + W_tmp = EPSILON_FIX; + move64(); + FOR( ; i < output_frame; i++ ) + { + L_tmp = Mpy_32_16_1( hb_synth[i], tmp ); /* q_hb_synth + shift - 15 */ + W_tmp = W_mac_32_32( W_tmp, L_tmp, L_tmp ); /* 2 * (q_hb_synth + shift) - 45 */ + } + + hStereoDft->hb_nrg_subr_fx[1] = W_round48_L( W_tmp ); // 2 * (q_hb_synth + shift) - 45 + move32(); + hStereoDft->q_hb_nrg_subr = sub( shl( add( q_hb_synth, shift ), 1 ), 45 ); + move16(); + + hb_nrg = L_add( hStereoDft->hb_nrg_subr_fx[0], hStereoDft->hb_nrg_subr_fx[1] ); // 2 * (q_hb_synth + shift) - 45 #else FOR( i = 0; i < shr( output_frame, 2 ); i++ ) -#endif { hb_nrg2 = Madd_32_32( hb_nrg2, hb_synth[i], hb_synth[i] ); /*2*q_hb_synth-31*/ } @@ -719,6 +766,7 @@ static void ivas_sba_dirac_stereo_compute_td_stefi_nrgs( hStereoDft->hb_nrg_subr_fx[1] = hb_nrg2; /*2*q_hb_synth-31*/ move32(); hb_nrg = L_add( hb_nrg, hb_nrg2 ); +#endif IF( EQ_16( hStereoDft->q_hb_stefi_sig_fx, q_hb_synth ) ) { @@ -747,12 +795,22 @@ static void ivas_sba_dirac_stereo_compute_td_stefi_nrgs( hStereoDft->hb_nrg_subr_fx[1] = 0; move32(); } +#ifdef FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS + hStereoDft->hb_nrg_subr_fx[0] = ( Mpy_32_16_1( hStereoDft->hb_nrg_subr_fx[0], shl( shr( hStereoDft->NFFT, 1 ), 6 ) ) ); // 2 * (q_hb_synth + shift) - 40 + move32(); + hStereoDft->hb_nrg_subr_fx[1] = ( Mpy_32_16_1( hStereoDft->hb_nrg_subr_fx[1], shl( shr( hStereoDft->NFFT, 1 ), 6 ) ) ); // 2 * (q_hb_synth + shift) - 40 + move32(); + hStereoDft->q_hb_nrg_subr = sub( shl( ( q_hb_synth + shift ), 1 ), 45 + 9 ); + hStereoDft->hb_nrg_fx[0] = hb_nrg; /* todo: which Q-value is this supposed to be? */ + move32(); +#else hStereoDft->hb_nrg_subr_fx[0] = hStereoDft->hb_nrg_subr_fx[0]; // imult3216(hStereoDft->hb_nrg_subr_fx[0] , shr(hStereoDft->NFFT, 1)); /*hStereoDft->q_hb_nrg_subr*/ move32(); hStereoDft->hb_nrg_subr_fx[1] = hStereoDft->hb_nrg_subr_fx[1]; // imult3216(hStereoDft->hb_nrg_subr_fx[1] , shr(hStereoDft->NFFT, 1)); /*hStereoDft->q_hb_nrg_subr*/ move32(); hStereoDft->hb_nrg_fx[0] = hb_nrg; move32(); +#endif hStereoDft->td_gain_fx[0] = 0; move32(); hStereoDft->core_hist[0] = core; /* full signal available for DTX with FD-CNG, thus apply stereo filling on full spectrum like in TCX */ -- GitLab From 5a673474fc44da5941abaed1313dc65bdc5f0edf Mon Sep 17 00:00:00 2001 From: Markus Multrus Date: Fri, 10 Apr 2026 22:12:47 +0200 Subject: [PATCH 03/10] fix initialization --- lib_dec/ivas_sba_dirac_stereo_dec_fx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib_dec/ivas_sba_dirac_stereo_dec_fx.c b/lib_dec/ivas_sba_dirac_stereo_dec_fx.c index 0a066f160..259500d1e 100644 --- a/lib_dec/ivas_sba_dirac_stereo_dec_fx.c +++ b/lib_dec/ivas_sba_dirac_stereo_dec_fx.c @@ -685,9 +685,10 @@ static void ivas_sba_dirac_stereo_compute_td_stefi_nrgs( { Word16 i; #ifdef FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS - Word32 hb_nrg; + Word32 hb_nrg = EPSILON_FIX; Word32 max_val; Word16 shift; + move32(); #else Word32 hb_nrg = EPSILON_FIX; move32(); -- GitLab From eb97f38064744c4815973ec10f4a065fc97fbd9a Mon Sep 17 00:00:00 2001 From: Markus Multrus Date: Sat, 11 Apr 2026 23:27:43 +0200 Subject: [PATCH 04/10] various fixes wrt scaling of hStereoDft->hb_nrg_fx[] --- lib_dec/ivas_sba_dirac_stereo_dec_fx.c | 7 +++- lib_dec/ivas_stat_dec.h | 5 +++ lib_dec/ivas_stereo_dft_dec_fx.c | 54 +++++++++++++++++++++++++- lib_dec/ivas_stereo_icbwe_dec_fx.c | 15 ++++++- 4 files changed, 77 insertions(+), 4 deletions(-) diff --git a/lib_dec/ivas_sba_dirac_stereo_dec_fx.c b/lib_dec/ivas_sba_dirac_stereo_dec_fx.c index 259500d1e..333a92a97 100644 --- a/lib_dec/ivas_sba_dirac_stereo_dec_fx.c +++ b/lib_dec/ivas_sba_dirac_stereo_dec_fx.c @@ -686,6 +686,7 @@ static void ivas_sba_dirac_stereo_compute_td_stefi_nrgs( Word16 i; #ifdef FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS Word32 hb_nrg = EPSILON_FIX; + Word16 q_hb_nrg = Q31; Word32 max_val; Word16 shift; move32(); @@ -744,6 +745,8 @@ static void ivas_sba_dirac_stereo_compute_td_stefi_nrgs( move16(); hb_nrg = L_add( hStereoDft->hb_nrg_subr_fx[0], hStereoDft->hb_nrg_subr_fx[1] ); // 2 * (q_hb_synth + shift) - 45 + q_hb_nrg = hStereoDft->q_hb_nrg_subr; + move16(); #else FOR( i = 0; i < shr( output_frame, 2 ); i++ ) { @@ -802,8 +805,10 @@ static void ivas_sba_dirac_stereo_compute_td_stefi_nrgs( hStereoDft->hb_nrg_subr_fx[1] = ( Mpy_32_16_1( hStereoDft->hb_nrg_subr_fx[1], shl( shr( hStereoDft->NFFT, 1 ), 6 ) ) ); // 2 * (q_hb_synth + shift) - 40 move32(); hStereoDft->q_hb_nrg_subr = sub( shl( ( q_hb_synth + shift ), 1 ), 45 + 9 ); - hStereoDft->hb_nrg_fx[0] = hb_nrg; /* todo: which Q-value is this supposed to be? */ + hStereoDft->hb_nrg_fx[0] = hb_nrg; move32(); + hStereoDft->q_hb_nrg[0] = q_hb_nrg; + move16(); #else hStereoDft->hb_nrg_subr_fx[0] = hStereoDft->hb_nrg_subr_fx[0]; // imult3216(hStereoDft->hb_nrg_subr_fx[0] , shr(hStereoDft->NFFT, 1)); /*hStereoDft->q_hb_nrg_subr*/ move32(); diff --git a/lib_dec/ivas_stat_dec.h b/lib_dec/ivas_stat_dec.h index a3e039022..2994d56aa 100644 --- a/lib_dec/ivas_stat_dec.h +++ b/lib_dec/ivas_stat_dec.h @@ -199,6 +199,9 @@ typedef struct stereo_dft_dec_data_struct Word32 hb_nrg_fx[STEREO_DFT_CORE_HIST_MAX]; /* Q(q_hb_nrg) */ Word32 hb_nrg_subr_fx[STEREO_DFT_NBDIV]; /* Q(q_hb_nrg_subr) */ +#ifdef FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS + Word16 q_hb_nrg[STEREO_DFT_CORE_HIST_MAX]; +#endif Word16 Q_nrg_subr; Word16 prev_Q_stefi_sig; Word16 q_td_gain[STEREO_DFT_CORE_HIST_MAX]; @@ -224,7 +227,9 @@ typedef struct stereo_dft_dec_data_struct Word32 smooth_buf_fx[SBA_DIRAC_STEREO_NUM_BANDS][SBA_DIRAC_NRG_SMOOTH_LONG + 1]; /* Q(q_smooth_buf_fx) */ Word16 smooth_fac_fx[NB_DIV][SBA_DIRAC_STEREO_NUM_BANDS]; /* Q15 */ Word16 q_smooth_buf_fx; +#ifndef FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS Word16 q_hb_nrg; +#endif Word16 q_hb_nrg_subr; Word16 q_res_mem; diff --git a/lib_dec/ivas_stereo_dft_dec_fx.c b/lib_dec/ivas_stereo_dft_dec_fx.c index b578f52c3..11f7113ee 100644 --- a/lib_dec/ivas_stereo_dft_dec_fx.c +++ b/lib_dec/ivas_stereo_dft_dec_fx.c @@ -194,6 +194,9 @@ void stereo_dft_dec_reset_fx( set32_fx( hStereoDft->hb_stefi_sig_fx, 0, L_FRAME48k + NS2SA( 48000, STEREO_DFT_TD_STEFI_DELAY_NS ) ); set32_fx( hStereoDft->hb_nrg_fx, 0, STEREO_DFT_CORE_HIST_MAX ); +#ifdef FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS + set16_fx( hStereoDft->q_hb_nrg, Q31, STEREO_DFT_CORE_HIST_MAX ); +#endif set32_fx( hStereoDft->td_gain_fx, 0, STEREO_DFT_CORE_HIST_MAX ); set16_fx( hStereoDft->q_td_gain, 0, STEREO_DFT_CORE_HIST_MAX ); hStereoDft->q_dft = 0; @@ -659,8 +662,13 @@ void stereo_dft_dec_update_fx( } Copy32( hStereoDft->hb_stefi_sig_fx + output_frame, hStereoDft->hb_stefi_sig_fx, hStereoDft->hb_stefi_delay ); /* Qx */ - Copy32( hStereoDft->hb_nrg_fx, hStereoDft->hb_nrg_fx + 1, STEREO_DFT_CORE_HIST_MAX - 1 ); /* Qx */ - Copy32( hStereoDft->td_gain_fx, hStereoDft->td_gain_fx + 1, STEREO_DFT_CORE_HIST_MAX - 1 ); /* q_td_gain */ +#ifdef FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS + Copy32( hStereoDft->hb_nrg_fx, hStereoDft->hb_nrg_fx + 1, STEREO_DFT_CORE_HIST_MAX - 1 ); /* q_hb_nrg */ + Copy( hStereoDft->q_hb_nrg, hStereoDft->q_hb_nrg + 1, STEREO_DFT_CORE_HIST_MAX - 1 ); +#else + Copy32( hStereoDft->hb_nrg_fx, hStereoDft->hb_nrg_fx + 1, STEREO_DFT_CORE_HIST_MAX - 1 ); /* Qx */ +#endif + Copy32( hStereoDft->td_gain_fx, hStereoDft->td_gain_fx + 1, STEREO_DFT_CORE_HIST_MAX - 1 ); /* q_td_gain */ Copy( hStereoDft->q_td_gain, hStereoDft->q_td_gain + 1, STEREO_DFT_CORE_HIST_MAX - 1 ); IF( sba_dirac_stereo_flag ) @@ -2778,6 +2786,16 @@ static void stereo_dft_compute_td_stefi_params_fx( move32(); nrg_pred_DMX = hStereoDft->hb_nrg_fx[1]; move32(); +#ifdef FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS + IF( GT_16( hStereoDft->q_hb_nrg[0], hStereoDft->q_hb_nrg[1] ) ) + { + nrg_DMX = L_shr( nrg_DMX, sub( hStereoDft->q_hb_nrg[0], hStereoDft->q_hb_nrg[1] ) ); + } + ELSE + { + nrg_pred_DMX = L_shr( nrg_pred_DMX, sub( hStereoDft->q_hb_nrg[1], hStereoDft->q_hb_nrg[0] ) ); + } +#endif op1 = BASOP_Util_Divide3232_Scale( L_add( EPSILON_FIX, nrg_DMX ), L_add( EPSILON_FIX, nrg_pred_DMX ), &q_div ); /* q_div */ q_sqrt = add( Q16, q_div ); @@ -3223,6 +3241,12 @@ void stereo_dft_generate_res_pred_fx( } ELSE IF( hStereoDft->core_hist[STEREO_DFT_STEFFI_DELAY_SHORT / 2] == ACELP_CORE ) { +#ifdef FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS + Word16 q_dmx_nrg, q_diff; + Word16 norm_dmx_nrg; + Word16 temp_e; + Word32 L_temp; +#endif /* ACELP -> TCX/HQ core transition */ /* calculate high band energy only */ dmx_nrg = EPSILON_FIX; @@ -3232,8 +3256,34 @@ void stereo_dft_generate_res_pred_fx( dmx_nrg = L_add( dmx_nrg, Madd_32_32( Mpy_32_32( pDFT_DMX[2 * i], pDFT_DMX[2 * i] ), pDFT_DMX[2 * i + 1], pDFT_DMX[2 * i + 1] ) ); /* 2 * q_dft - 31 */ } +#ifdef FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS + norm_dmx_nrg = norm_l( dmx_nrg ); + dmx_nrg = L_shl( dmx_nrg, norm_dmx_nrg ); + q_dmx_nrg = add( sub( shl( hStereoDft->q_dft, 1 ), 31 ), norm_dmx_nrg ); + + q_diff = sub( hStereoDft->q_hb_nrg[0], q_dmx_nrg ); + + /* dmx_nrg * 2 / hStereoDft->NFFT */ + L_temp = BASOP_Util_Divide3232_Scale_newton( dmx_nrg, shr( hStereoDft->NFFT, 1 ), &temp_e ); + L_temp = L_shr( L_temp, sub( 31, temp_e ) ); + + IF( GT_16( q_diff, 0 ) ) + { + hStereoDft->hb_nrg_fx[0] = L_add( L_shr( hStereoDft->hb_nrg_fx[0], q_diff ), L_temp ); + move32(); + hStereoDft->q_hb_nrg[0] = sub( hStereoDft->q_hb_nrg[0], q_diff ); + move16(); + } + ELSE + { + hStereoDft->hb_nrg_fx[0] = L_add( hStereoDft->hb_nrg_fx[0], L_shl( L_temp, q_diff ) ); + move32(); + /* hStereoDft->q_hb_nrg[0] stays as is */ + } +#else hStereoDft->hb_nrg_fx[0] = L_add( hStereoDft->hb_nrg_fx[0], div_l( dmx_nrg, shr( hStereoDft->NFFT, 2 ) ) ); /* Q15 */ move32(); +#endif *stop = bin0; move16(); } diff --git a/lib_dec/ivas_stereo_icbwe_dec_fx.c b/lib_dec/ivas_stereo_icbwe_dec_fx.c index a6fdf3aad..0a0956936 100644 --- a/lib_dec/ivas_stereo_icbwe_dec_fx.c +++ b/lib_dec/ivas_stereo_icbwe_dec_fx.c @@ -146,6 +146,9 @@ void stereo_icBWE_dec_fx( Word16 icbweM2Ref_fx, ratio_L_fx; Word16 gsMapping_fx; Word32 hb_nrg_fx; +#ifdef FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS + Word16 q_hb_nrg; +#endif Word16 Q_syn_shb; Word16 shift_prev_pow, synthRef_shift; Word32 L_tmp; @@ -199,6 +202,9 @@ void stereo_icBWE_dec_fx( { hb_nrg_fx = 0; move32(); +#ifdef FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS + q_hb_nrg = Q31; +#endif move32(); maximum_abs_32_fx( synthRef_fx, output_frame, &maxVal ); synthRef_shift = norm_l( maxVal ); @@ -235,7 +241,10 @@ void stereo_icBWE_dec_fx( hCPE->hStereoDft->hb_nrg_subr_fx[1] = W_round48_L( W_tmp ); // 2 * (Qsyn + SynthRef_shift) - 45 hb_nrg_fx = L_add( hCPE->hStereoDft->hb_nrg_subr_fx[0], hCPE->hStereoDft->hb_nrg_subr_fx[1] ); // 2 * (Qsyn + SynthRef_shift) - 45 - +#ifdef FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS + q_hb_nrg = hStereoDft->q_hb_nrg_subr; + move16(); +#endif Copy32( synthRef_fx, hCPE->hStereoDft->hb_stefi_sig_fx + hCPE->hStereoDft->hb_stefi_delay, output_frame ); /* Qsynth */ } @@ -254,6 +263,10 @@ void stereo_icBWE_dec_fx( hCPE->hStereoDft->q_hb_nrg_subr = sub( shl( ( *Q_syn + synthRef_shift ), 1 ), 45 + 9 ); hCPE->hStereoDft->hb_nrg_fx[0] = hb_nrg_fx; // 2 * (Qx + SynthRef_shift) - 31 move32(); +#ifdef FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS + hCPE->hStereoDft->q_hb_nrg[0] = q_hb_nrg; + move16(); +#endif hCPE->hStereoDft->td_gain_fx[0] = 0; move32(); hCPE->hStereoDft->core_hist[0] = st->core; -- GitLab From 8bc69a4a799baaa7dc39a45168da89aff6d9514c Mon Sep 17 00:00:00 2001 From: Markus Multrus Date: Sun, 12 Apr 2026 17:38:23 +0200 Subject: [PATCH 05/10] add safety mechanism to not overflow when adding the two energy values --- lib_dec/ivas_stereo_dft_dec_fx.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/lib_dec/ivas_stereo_dft_dec_fx.c b/lib_dec/ivas_stereo_dft_dec_fx.c index 11f7113ee..7fed6f2f8 100644 --- a/lib_dec/ivas_stereo_dft_dec_fx.c +++ b/lib_dec/ivas_stereo_dft_dec_fx.c @@ -3261,11 +3261,19 @@ void stereo_dft_generate_res_pred_fx( dmx_nrg = L_shl( dmx_nrg, norm_dmx_nrg ); q_dmx_nrg = add( sub( shl( hStereoDft->q_dft, 1 ), 31 ), norm_dmx_nrg ); - q_diff = sub( hStereoDft->q_hb_nrg[0], q_dmx_nrg ); - /* dmx_nrg * 2 / hStereoDft->NFFT */ L_temp = BASOP_Util_Divide3232_Scale_newton( dmx_nrg, shr( hStereoDft->NFFT, 1 ), &temp_e ); - L_temp = L_shr( L_temp, sub( 31, temp_e ) ); + L_temp = L_shr( L_temp, sub( 31, temp_e ) ); /* q_dmx_nrg */ + + /* for L_temp we have at least one bit of headroom due to the division above; check also for hStereoDft->hb_nrg_fx[0] and possibly reserve one bit*/ + test(); + IF( norm_l( hStereoDft->hb_nrg_fx[0] ) == 0 && hStereoDft->hb_nrg_fx[0] != 0 ) + { + hStereoDft->hb_nrg_fx[0] = L_shr( hStereoDft->hb_nrg_fx[0], 1 ); + hStereoDft->q_hb_nrg[0] = sub( hStereoDft->q_hb_nrg[0], 1 ); + } + + q_diff = sub( hStereoDft->q_hb_nrg[0], q_dmx_nrg ); IF( GT_16( q_diff, 0 ) ) { -- GitLab From 36c1cd946b3494442b515942d9db57f4193d0cbe Mon Sep 17 00:00:00 2001 From: Markus Multrus Date: Sun, 19 Apr 2026 20:08:39 +0200 Subject: [PATCH 06/10] replace maximum_abs_32_fx()-construct by L_norm_arr() --- lib_dec/ivas_sba_dirac_stereo_dec_fx.c | 8 +------- lib_dec/ivas_stereo_icbwe_dec_fx.c | 5 ++++- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/lib_dec/ivas_sba_dirac_stereo_dec_fx.c b/lib_dec/ivas_sba_dirac_stereo_dec_fx.c index 8eeb7aabf..a6cb6a919 100644 --- a/lib_dec/ivas_sba_dirac_stereo_dec_fx.c +++ b/lib_dec/ivas_sba_dirac_stereo_dec_fx.c @@ -702,13 +702,7 @@ static void ivas_sba_dirac_stereo_compute_td_stefi_nrgs( #endif #ifdef FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS - maximum_abs_32_fx( hb_synth, output_frame, &max_val ); - shift = norm_l( max_val ); - if ( max_val == 0 ) - { - shift = 31; - move16(); - } + shift = L_norm_arr( hb_synth, output_frame ); shift = sub( shift, shr( add( find_guarded_bits_fx( shr( output_frame, 1 ) ), 1 ), 1 ) ); #endif diff --git a/lib_dec/ivas_stereo_icbwe_dec_fx.c b/lib_dec/ivas_stereo_icbwe_dec_fx.c index 0a0956936..0cde3301b 100644 --- a/lib_dec/ivas_stereo_icbwe_dec_fx.c +++ b/lib_dec/ivas_stereo_icbwe_dec_fx.c @@ -204,7 +204,9 @@ void stereo_icBWE_dec_fx( move32(); #ifdef FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS q_hb_nrg = Q31; -#endif + + synthRef_shift = L_norm_arr( synthRef_fx, output_frame ); +#else move32(); maximum_abs_32_fx( synthRef_fx, output_frame, &maxVal ); synthRef_shift = norm_l( maxVal ); @@ -213,6 +215,7 @@ void stereo_icBWE_dec_fx( synthRef_shift = 31; move16(); } +#endif synthRef_shift = sub( synthRef_shift, shr( add( find_guarded_bits_fx( shr( output_frame, 1 ) ), 1 ), 1 ) ); test(); IF( EQ_16( st->core, ACELP_CORE ) || EQ_16( st->last_core, ACELP_CORE ) ) -- GitLab From f0ed4cb947a8b7d715dd39e3f9637c90bf36f8e3 Mon Sep 17 00:00:00 2001 From: Markus Multrus Date: Sun, 19 Apr 2026 22:36:38 +0200 Subject: [PATCH 07/10] take several review comments into account --- lib_dec/ivas_sba_dirac_stereo_dec_fx.c | 36 ++++++++-------- lib_dec/ivas_stereo_icbwe_dec_fx.c | 58 ++++++++++++++++++++++---- 2 files changed, 68 insertions(+), 26 deletions(-) diff --git a/lib_dec/ivas_sba_dirac_stereo_dec_fx.c b/lib_dec/ivas_sba_dirac_stereo_dec_fx.c index a6cb6a919..905d62d3c 100644 --- a/lib_dec/ivas_sba_dirac_stereo_dec_fx.c +++ b/lib_dec/ivas_sba_dirac_stereo_dec_fx.c @@ -690,9 +690,9 @@ static void ivas_sba_dirac_stereo_compute_td_stefi_nrgs( Word16 i; #ifdef FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS Word32 hb_nrg = EPSILON_FIX; + Word16 q_hb_nrg_subr = Q31 + Q15 - Q6; /* in case IF( ( EQ_16( core, ACELP_CORE ) && !fd_cng_flag ) || EQ_16( hStereoDft->core_hist[1], ACELP_CORE ) ) is false, we want to end up with Q31, and we subtract Q15 - Q6 below; otherwise, we set it within this condition */ Word16 q_hb_nrg = Q31; - Word32 max_val; - Word16 shift; + Word16 shift, shift_hb_nrg_subr; move32(); #else Word32 hb_nrg = EPSILON_FIX; @@ -712,38 +712,34 @@ static void ivas_sba_dirac_stereo_compute_td_stefi_nrgs( { #ifdef FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS Word32 L_tmp; - Word16 tmp; Word64 W_tmp; - tmp = shl_sat( 1, shift ); - W_tmp = EPSILON_FIX; move64(); + FOR( i = 0; i < shr( output_frame, 1 ); i++ ) { - // needed to be adjusted for q - L_tmp = Mpy_32_16_1( hb_synth[i], tmp ); /* q_hb_synth + shift - 15 */ - W_tmp = W_mac_32_32( W_tmp, L_tmp, L_tmp ); /* 2 * (q_hb_synth + shift) - 29 */ + L_tmp = L_shl( hb_synth[i], shift ); /* q_hb_synth + shift */ + W_tmp = W_mac_32_32( W_tmp, L_tmp, L_tmp ); /* 2 * (q_hb_synth + shift) + 1 */ } - hStereoDft->hb_nrg_subr_fx[0] = W_round48_L( W_tmp ); /* 2 * (q_hb_synth + shift) - 45 */ + hStereoDft->hb_nrg_subr_fx[0] = W_round48_L( W_shr( W_tmp, 16 ) ); /* 2 * (q_hb_synth + shift) - 31 */ move32(); W_tmp = EPSILON_FIX; move64(); FOR( ; i < output_frame; i++ ) { - L_tmp = Mpy_32_16_1( hb_synth[i], tmp ); /* q_hb_synth + shift - 15 */ - W_tmp = W_mac_32_32( W_tmp, L_tmp, L_tmp ); /* 2 * (q_hb_synth + shift) - 45 */ + L_tmp = L_shl( hb_synth[i], shift ); /* q_hb_synth + shift */ + W_tmp = W_mac_32_32( W_tmp, L_tmp, L_tmp ); /* 2 * (q_hb_synth + shift) + 1 */ } - hStereoDft->hb_nrg_subr_fx[1] = W_round48_L( W_tmp ); // 2 * (q_hb_synth + shift) - 45 + hStereoDft->hb_nrg_subr_fx[1] = W_round48_L( W_shr( W_tmp, 16 ) ); /* 2 * (q_hb_synth + shift) - 31 */ move32(); - hStereoDft->q_hb_nrg_subr = sub( shl( add( q_hb_synth, shift ), 1 ), 45 ); - move16(); + q_hb_nrg_subr = sub( shl( add( q_hb_synth, shift ), 1 ), 31 ); - hb_nrg = L_add( hStereoDft->hb_nrg_subr_fx[0], hStereoDft->hb_nrg_subr_fx[1] ); // 2 * (q_hb_synth + shift) - 45 - q_hb_nrg = hStereoDft->q_hb_nrg_subr; + hb_nrg = L_add( L_shr( hStereoDft->hb_nrg_subr_fx[0], 1 ), L_shr( hStereoDft->hb_nrg_subr_fx[1], 1 ) ); /* 2 * (q_hb_synth + shift) - 16 - 1 */ + q_hb_nrg = sub( q_hb_nrg_subr, 1 ); move16(); #else FOR( i = 0; i < shr( output_frame, 2 ); i++ ) @@ -798,11 +794,13 @@ static void ivas_sba_dirac_stereo_compute_td_stefi_nrgs( move32(); } #ifdef FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS - hStereoDft->hb_nrg_subr_fx[0] = ( Mpy_32_16_1( hStereoDft->hb_nrg_subr_fx[0], shl( shr( hStereoDft->NFFT, 1 ), 6 ) ) ); // 2 * (q_hb_synth + shift) - 40 + shift_hb_nrg_subr = s_min( norm_l( hStereoDft->hb_nrg_subr_fx[0] ), norm_l( hStereoDft->hb_nrg_subr_fx[1] ) ); + + hStereoDft->hb_nrg_subr_fx[0] = ( Mpy_32_16_1( L_shl( hStereoDft->hb_nrg_subr_fx[0], shift_hb_nrg_subr ), shl( hStereoDft->NFFT, 5 ) ) ); /* shl( ., 5) is shortcut for shl( shr( hStereoDft->NFFT, 1 ), 6 ) */ /* q_hb_nrg_subr - (15 - 6) */ move32(); - hStereoDft->hb_nrg_subr_fx[1] = ( Mpy_32_16_1( hStereoDft->hb_nrg_subr_fx[1], shl( shr( hStereoDft->NFFT, 1 ), 6 ) ) ); // 2 * (q_hb_synth + shift) - 40 + hStereoDft->hb_nrg_subr_fx[1] = ( Mpy_32_16_1( L_shl( hStereoDft->hb_nrg_subr_fx[1], shift_hb_nrg_subr ), shl( hStereoDft->NFFT, 5 ) ) ); /* shl( ., 5) is shortcut for shl( shr( hStereoDft->NFFT, 1 ), 6 ) */ /* q_hb_nrg_subr - (15 - 6) */ move32(); - hStereoDft->q_hb_nrg_subr = sub( shl( ( q_hb_synth + shift ), 1 ), 45 + 9 ); + hStereoDft->q_hb_nrg_subr = sub( add( q_hb_nrg_subr, shift_hb_nrg_subr ), Q15 - Q6 ); hStereoDft->hb_nrg_fx[0] = hb_nrg; move32(); hStereoDft->q_hb_nrg[0] = q_hb_nrg; diff --git a/lib_dec/ivas_stereo_icbwe_dec_fx.c b/lib_dec/ivas_stereo_icbwe_dec_fx.c index 0cde3301b..fc1526ca9 100644 --- a/lib_dec/ivas_stereo_icbwe_dec_fx.c +++ b/lib_dec/ivas_stereo_icbwe_dec_fx.c @@ -138,7 +138,11 @@ void stereo_icBWE_dec_fx( Word16 nlMixFac_fx[NB_SUBFR16k]; Word16 specMapping_fx; Word16 fb_synth_nonref_fx[L_FRAME48k]; +#ifdef FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS + Word32 prev_pow_fx, curr_pow_fx, maxVal1; +#else Word32 prev_pow_fx, curr_pow_fx, maxVal1, maxVal; +#endif Word16 scale_fx, e_scale_fx; Word16 alpha_fx, winSlope_fx, winLen_fx; Word16 prevgsMapping_fx; @@ -147,7 +151,8 @@ void stereo_icBWE_dec_fx( Word16 gsMapping_fx; Word32 hb_nrg_fx; #ifdef FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS - Word16 q_hb_nrg; + Word16 q_hb_nrg, q_hb_nrg_subr; + Word16 shift_hb_nrg_subr; #endif Word16 Q_syn_shb; Word16 shift_prev_pow, synthRef_shift; @@ -200,6 +205,10 @@ void stereo_icBWE_dec_fx( /* update buffers for TD stereo filling */ IF( EQ_16( hCPE->element_mode, IVAS_CPE_DFT ) ) { +#ifdef FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS + q_hb_nrg_subr = Q31 + Q15 - Q6; /* in case IF( EQ_16( st->core, ACELP_CORE ) || EQ_16( st->last_core, ACELP_CORE ) ) is flase, we want to end up with Q31, and we subtract Q15 - Q6 below; otherwise, we set it within this condition */ + move16(); +#endif hb_nrg_fx = 0; move32(); #ifdef FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS @@ -220,6 +229,34 @@ void stereo_icBWE_dec_fx( test(); IF( EQ_16( st->core, ACELP_CORE ) || EQ_16( st->last_core, ACELP_CORE ) ) { +#ifdef FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS + Word64 W_tmp = EPSILON_FIX; + move64(); + + FOR( i = 0; i < shr( output_frame, 1 ); i++ ) + { + L_tmp = L_shl( synthRef_fx[i], synthRef_shift ); /* Qsyn + SynthRef_shift */ + W_tmp = W_mac_32_32( W_tmp, L_tmp, L_tmp ); /* 2 * (Qsyn + SynthRef_shift) + 1 */ + } + hCPE->hStereoDft->hb_nrg_subr_fx[0] = W_round48_L( W_shr( W_tmp, 16 ) ); /* 2 * (Qsyn + SynthRef_shift) - 31 */ + move32(); + q_hb_nrg_subr = sub( shl( add( *Q_syn, synthRef_shift ), 1 ), 31 ); + move16(); + + W_tmp = EPSILON_FIX; + move64(); + + FOR( ; i < output_frame; i++ ) + { + L_tmp = L_shl( synthRef_fx[i], synthRef_shift ); /* Qsyn + SynthRef_shift */ + W_tmp = W_mac_32_32( W_tmp, L_tmp, L_tmp ); /* 2 * (Qsyn + SynthRef_shift) + 1 */ + } + hCPE->hStereoDft->hb_nrg_subr_fx[1] = W_round48_L( W_shr( W_tmp, 16 ) ); /* 2 * (Qsyn + SynthRef_shift) - 31 */ + + hb_nrg_fx = L_add( L_shr( hCPE->hStereoDft->hb_nrg_subr_fx[0], 1 ), L_shr( hCPE->hStereoDft->hb_nrg_subr_fx[1], 1 ) ); // 2 * (Qsyn + SynthRef_shift) - 31 - 1 + q_hb_nrg = sub( q_hb_nrg_subr, 1 ); + move16(); +#else Word64 W_tmp = 0; move64(); tmp = shl_sat( 1, synthRef_shift ); @@ -244,9 +281,6 @@ void stereo_icBWE_dec_fx( hCPE->hStereoDft->hb_nrg_subr_fx[1] = W_round48_L( W_tmp ); // 2 * (Qsyn + SynthRef_shift) - 45 hb_nrg_fx = L_add( hCPE->hStereoDft->hb_nrg_subr_fx[0], hCPE->hStereoDft->hb_nrg_subr_fx[1] ); // 2 * (Qsyn + SynthRef_shift) - 45 -#ifdef FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS - q_hb_nrg = hStereoDft->q_hb_nrg_subr; - move16(); #endif Copy32( synthRef_fx, hCPE->hStereoDft->hb_stefi_sig_fx + hCPE->hStereoDft->hb_stefi_delay, output_frame ); /* Qsynth */ @@ -259,6 +293,19 @@ void stereo_icBWE_dec_fx( hCPE->hStereoDft->hb_nrg_subr_fx[1] = 0; move32(); } +#ifdef FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS + shift_hb_nrg_subr = s_min( norm_l( hCPE->hStereoDft->hb_nrg_subr_fx[0] ), norm_l( hCPE->hStereoDft->hb_nrg_subr_fx[1] ) ); + + hCPE->hStereoDft->hb_nrg_subr_fx[0] = ( Mpy_32_16_1( L_shl( hCPE->hStereoDft->hb_nrg_subr_fx[0], shift_hb_nrg_subr ), shl( hCPE->hStereoDft->NFFT, 5 ) ) ); /* shl( ., 5) is shortcut for shl( shr( hStereoDft->NFFT, 1 ), 6 ) */ /* q_hb_nrg_subr + shift_hb_nrg_subr - (15 - 6) */ + move32(); + hCPE->hStereoDft->hb_nrg_subr_fx[1] = ( Mpy_32_16_1( L_shl( hCPE->hStereoDft->hb_nrg_subr_fx[1], shift_hb_nrg_subr ), shl( hCPE->hStereoDft->NFFT, 5 ) ) ); /* shl( ., 5) is shortcut for shl( shr( hStereoDft->NFFT, 1 ), 6 ) */ /* q_hb_nrg_subr + shift_hb_nrg_subr - (15 - 6) */ + move32(); + hCPE->hStereoDft->q_hb_nrg_subr = sub( add( q_hb_nrg_subr, shift_hb_nrg_subr ), Q15 - Q6 ); + hCPE->hStereoDft->hb_nrg_fx[0] = hb_nrg_fx; /* q_hb_nrg */ + move32(); + hCPE->hStereoDft->q_hb_nrg[0] = q_hb_nrg; + move16(); +#else hCPE->hStereoDft->hb_nrg_subr_fx[0] = ( Mpy_32_16_1( hCPE->hStereoDft->hb_nrg_subr_fx[0], shl( shr( hCPE->hStereoDft->NFFT, 1 ), 6 ) ) ); // 2 * (Qsynth + SynthRef_shift) - 40 // 2 * (Qx + SynthRef_shift) - 31 - 15 move32(); hCPE->hStereoDft->hb_nrg_subr_fx[1] = ( Mpy_32_16_1( hCPE->hStereoDft->hb_nrg_subr_fx[1], shl( shr( hCPE->hStereoDft->NFFT, 1 ), 6 ) ) ); // 2 * (Qsynth + SynthRef_shift) - 40 @@ -266,9 +313,6 @@ void stereo_icBWE_dec_fx( hCPE->hStereoDft->q_hb_nrg_subr = sub( shl( ( *Q_syn + synthRef_shift ), 1 ), 45 + 9 ); hCPE->hStereoDft->hb_nrg_fx[0] = hb_nrg_fx; // 2 * (Qx + SynthRef_shift) - 31 move32(); -#ifdef FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS - hCPE->hStereoDft->q_hb_nrg[0] = q_hb_nrg; - move16(); #endif hCPE->hStereoDft->td_gain_fx[0] = 0; move32(); -- GitLab From 7f02a2514ccdc42407a77618b5440df09f734abd Mon Sep 17 00:00:00 2001 From: Markus Multrus Date: Sun, 19 Apr 2026 23:27:29 +0200 Subject: [PATCH 08/10] replace division by multiplication by inverse --- lib_dec/ivas_stereo_dft_dec_fx.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/lib_dec/ivas_stereo_dft_dec_fx.c b/lib_dec/ivas_stereo_dft_dec_fx.c index 7fed6f2f8..c9ae34889 100644 --- a/lib_dec/ivas_stereo_dft_dec_fx.c +++ b/lib_dec/ivas_stereo_dft_dec_fx.c @@ -3262,8 +3262,27 @@ void stereo_dft_generate_res_pred_fx( q_dmx_nrg = add( sub( shl( hStereoDft->q_dft, 1 ), 31 ), norm_dmx_nrg ); /* dmx_nrg * 2 / hStereoDft->NFFT */ - L_temp = BASOP_Util_Divide3232_Scale_newton( dmx_nrg, shr( hStereoDft->NFFT, 1 ), &temp_e ); - L_temp = L_shr( L_temp, sub( 31, temp_e ) ); /* q_dmx_nrg */ + SWITCH( hStereoDft->NFFT ) + { + case 960: + L_temp = Mpy_32_16_1( dmx_nrg, 17476 /* 1 / 480 in Q8 */ ); + L_temp = L_shr( L_temp, Q1 ); /* reserve 1 bit headroom */ + q_dmx_nrg = add( q_dmx_nrg, Q7 ); + BREAK; + case 640: + L_temp = Mpy_32_16_1( dmx_nrg, 26214 /* 1 / 320 in Q8 */ ); + L_temp = L_shr( L_temp, Q1 ); /* reserve 1 bit headroom */ + q_dmx_nrg = add( q_dmx_nrg, Q7 ); + BREAK; + case 320: + L_temp = Mpy_32_16_1( dmx_nrg, 26214 /* 1 / 320 in Q7 */ ); + L_temp = L_shr( L_temp, Q1 ); /* reserve 1 bit headroom */ + q_dmx_nrg = add( q_dmx_nrg, Q6 ); + BREAK; + default: + L_temp = BASOP_Util_Divide3232_Scale_newton( dmx_nrg, shr( hStereoDft->NFFT, 1 ), &temp_e ); + L_temp = L_shr( L_temp, sub( 31, temp_e ) ); /* q_dmx_nrg */ + } /* for L_temp we have at least one bit of headroom due to the division above; check also for hStereoDft->hb_nrg_fx[0] and possibly reserve one bit*/ test(); -- GitLab From 3d3c6e9f304a1ada3a4b8c8ce0d5f0f73e8a2fda Mon Sep 17 00:00:00 2001 From: Markus Multrus Date: Mon, 20 Apr 2026 11:24:35 +0200 Subject: [PATCH 09/10] make default case more beautiful --- lib_dec/ivas_stereo_dft_dec_fx.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib_dec/ivas_stereo_dft_dec_fx.c b/lib_dec/ivas_stereo_dft_dec_fx.c index c9ae34889..ad601b185 100644 --- a/lib_dec/ivas_stereo_dft_dec_fx.c +++ b/lib_dec/ivas_stereo_dft_dec_fx.c @@ -3280,8 +3280,11 @@ void stereo_dft_generate_res_pred_fx( q_dmx_nrg = add( q_dmx_nrg, Q6 ); BREAK; default: - L_temp = BASOP_Util_Divide3232_Scale_newton( dmx_nrg, shr( hStereoDft->NFFT, 1 ), &temp_e ); - L_temp = L_shr( L_temp, sub( 31, temp_e ) ); /* q_dmx_nrg */ + assert( 0 && "Invalid hStereoDft->NFFT" ); + /* we support only hStereoDft->NFFT == 320, 640 and 960; a more generic expression would be: */ + /* L_temp = BASOP_Util_Divide3232_Scale_newton( dmx_nrg, shr( hStereoDft->NFFT, 1 ), &temp_e ); */ + /* q_dmx_nrg = add( q_dmx_nrg, sub( 31, temp_e ) ); */ + BREAK; } /* for L_temp we have at least one bit of headroom due to the division above; check also for hStereoDft->hb_nrg_fx[0] and possibly reserve one bit*/ -- GitLab From 3f3ab338f71ab51f0e1970a208e6ff5d240b4f47 Mon Sep 17 00:00:00 2001 From: Markus Multrus Date: Mon, 20 Apr 2026 11:36:08 +0200 Subject: [PATCH 10/10] remove unused variable --- lib_dec/ivas_stereo_dft_dec_fx.c | 1 - 1 file changed, 1 deletion(-) diff --git a/lib_dec/ivas_stereo_dft_dec_fx.c b/lib_dec/ivas_stereo_dft_dec_fx.c index ad601b185..34f47a91c 100644 --- a/lib_dec/ivas_stereo_dft_dec_fx.c +++ b/lib_dec/ivas_stereo_dft_dec_fx.c @@ -3244,7 +3244,6 @@ void stereo_dft_generate_res_pred_fx( #ifdef FIX_BASOP_2521_DIRAC_STEREO_STEFFI_NRGS Word16 q_dmx_nrg, q_diff; Word16 norm_dmx_nrg; - Word16 temp_e; Word32 L_temp; #endif /* ACELP -> TCX/HQ core transition */ -- GitLab