From 9daf8e10191a7e445d3fe6e60b9240a665846aa1 Mon Sep 17 00:00:00 2001 From: Markus Multrus Date: Fri, 11 Jul 2025 10:52:47 +0200 Subject: [PATCH 1/2] replace i_mult_o() --- lib_com/options.h | 8 ++++++++ lib_dec/swb_tbe_dec_fx.c | 10 +++++++++- lib_enc/swb_tbe_enc_fx.c | 12 ++++++++++-- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index 0cf958d03..c555d9cff 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -120,6 +120,14 @@ #define FIX_1735_W_SHL_SAT_L /* FhG: Usage of W_shl_sat_l() */ #define FIX_ISSUE_1792 /* FhG: fix noise bursts in binaural rendering */ +/* Info for issue 1816: + * Some compilers do not automatically use 32 bit for 16x16bit products. The code "Word32 c = (Word16) a * (Word16) b;" creates then a 16-bit result, sign-extending the + * lower 16-bit of the product, any upper bits are omitted. Example: Product 0x0100 * 0x0100 results in 0x0001.0000, gets truncated to its lower bits and return 0x0000. + * The issue is fixed by simply casting one of the product operands to Word32 in lib_com/basop32.c + */ +#define FIX_ISSUE_1816_USE_W32_FOR_MPY_W16xW16 /* FhG: (QA-FIX) Use doubled data width for 16x16 product, some compilers keep 16-bit format also for products */ +#define FIX_ISSUE_1816_IMPROVE_MPY_ZERO_DOT_1_PRECISION /* FhG: (NON-BE) improve precision of multiplications with factor 0.1f, avoids overflow with up-rounded value */ + #define FIX_ISSUE_1795_Q3_OVERFLOW /* FhG: Q3 overflow in function WB_BWE_gain_pred_fx (EVS legacy code) BE, MR1855 */ #define NONBE_FIX_1748_SPAR_DIV_OPT /*Dlb: issue 1748: SPAR common div optimizations*/ diff --git a/lib_dec/swb_tbe_dec_fx.c b/lib_dec/swb_tbe_dec_fx.c index 61dc3d0c5..35655a74b 100644 --- a/lib_dec/swb_tbe_dec_fx.c +++ b/lib_dec/swb_tbe_dec_fx.c @@ -2725,7 +2725,11 @@ void swb_tbe_dec_fx( } FOR( ; i < L_SHB_LAHEAD + 10; i++ ) { +#ifdef FIX_ISSUE_1816_IMPROVE_MPY_ZERO_DOT_1_PRECISION + temp = round_fx_sat( L_shl( L_mult( 0x6666 /* 0.1 in Q12 */, shl( sub( i, 19 ), 11 ) ), 1 ) ); +#else temp = i_mult_sat( sub( i, 19 ), 3277 /*0.1f Q15*/ ); /* Q15 */ +#endif L_tmp1 = Mult_32_16( L_shl_sat( 1, sub( 31, exp ) ), temp ); /* Q31-exp */ temp = sub( 32767 /*1.0f Q15*/, temp ); Lscale = L_add( Mult_32_16( Lscale, temp ), L_tmp1 ); @@ -6287,7 +6291,11 @@ void ivas_swb_tbe_dec_fx( } FOR( ; i < L_SHB_LAHEAD + 10; i++ ) { - temp_fx = i_mult_sat( sub( i, 19 ), 3277 /*0.1f Q15*/ ); /* Q15 */ +#ifdef FIX_ISSUE_1816_IMPROVE_MPY_ZERO_DOT_1_PRECISION + temp_fx = round_fx_sat( L_shl( L_mult( 0x6666 /* 0.1 in Q12 */, shl( sub( i, 19 ), 11 ) ), 1 ) ); +#else + temp_fx = i_mult_sat( sub( i, 19 ), 3277 /*0.1f Q15*/ ); /* Q15 */ +#endif L_tmp1 = Mult_32_16( L_shl_sat( 1, sub( 31, exp ) ), temp_fx ); /* Q31-exp */ temp_fx = sub( 32767 /*1.0f Q15*/, temp_fx ); Lscale = L_add( Mult_32_16( Lscale, temp_fx ), L_tmp1 ); diff --git a/lib_enc/swb_tbe_enc_fx.c b/lib_enc/swb_tbe_enc_fx.c index d016cd721..611a3625e 100644 --- a/lib_enc/swb_tbe_enc_fx.c +++ b/lib_enc/swb_tbe_enc_fx.c @@ -2537,7 +2537,11 @@ void swb_tbe_enc_fx( } FOR( ; i < L_SHB_LAHEAD + 10; i++ ) { - tmp = i_mult_o( sub( i, 19 ), 3277 /*0.1f Q15*/, &Overflow ); /* Q15 */ +#ifdef FIX_ISSUE_1816_IMPROVE_MPY_ZERO_DOT_1_PRECISION + tmp = round_fx_sat( L_shl( L_mult( 0x6666 /* 0.1 in Q12 */, shl( sub( i, 19 ), 11 ) ), 1 ) ); +#else + tmp = i_mult_o( sub( i, 19 ), 3277 /*0.1f Q15*/, &Overflow ); /* Q15 */ +#endif L_tmp1 = Mult_32_16( L_shl_o( 1, sub( 31, exp ), &Overflow ), tmp ); /* Q31-exp */ tmp = sub( 32767 /*1.0f Q15*/, tmp ); Lscale = L_add( Mult_32_16( Lscale, tmp ), L_tmp1 ); @@ -3833,7 +3837,11 @@ void swb_tbe_enc_ivas_fx( } FOR( ; i < L_SHB_LAHEAD + 10; i++ ) { - tmp = i_mult_o( sub( i, 19 ), 3277 /*0.1f Q15*/, &Overflow ); /* Q15 */ +#ifdef FIX_ISSUE_1816_IMPROVE_MPY_ZERO_DOT_1_PRECISION + tmp = round_fx_sat( L_shl( L_mult( 0x6666 /* 0.1 in Q12 */, shl( sub( i, 19 ), 11 ) ), 1 ) ); +#else + tmp = i_mult_o( sub( i, 19 ), 3277 /*0.1f Q15*/, &Overflow ); /* Q15 */ +#endif L_tmp1 = Mult_32_16( L_shl_o( 1, sub( 31, exp ), &Overflow ), tmp ); /* Q31-exp */ tmp = sub( 32767 /*1.0f Q15*/, tmp ); L_tmp = L_add( Mult_32_16( Lscale, tmp ), L_tmp1 ); -- GitLab From f3d2893fb20c3647145cb288dfabd3e0945d6f69 Mon Sep 17 00:00:00 2001 From: Markus Multrus Date: Fri, 11 Jul 2025 12:09:51 +0200 Subject: [PATCH 2/2] keep EVS BE --- lib_dec/swb_tbe_dec_fx.c | 76 ++++++++++++++++++++++++++++++++++----- lib_enc/swb_tbe_enc_fx.c | 78 ++++++++++++++++++++++++++++++++++------ 2 files changed, 135 insertions(+), 19 deletions(-) diff --git a/lib_dec/swb_tbe_dec_fx.c b/lib_dec/swb_tbe_dec_fx.c index 35655a74b..3ed08633c 100644 --- a/lib_dec/swb_tbe_dec_fx.c +++ b/lib_dec/swb_tbe_dec_fx.c @@ -2723,13 +2723,41 @@ void swb_tbe_dec_fx( exp = 0; move16(); } - FOR( ; i < L_SHB_LAHEAD + 10; i++ ) - { #ifdef FIX_ISSUE_1816_IMPROVE_MPY_ZERO_DOT_1_PRECISION - temp = round_fx_sat( L_shl( L_mult( 0x6666 /* 0.1 in Q12 */, shl( sub( i, 19 ), 11 ) ), 1 ) ); + /* + code for EVS and IVAS are basically identical with the exception of i_mult_sat() which has precision issues + thus is was replaced for IVAS and kept for EVS, in order to keep EVS BE to test sequences and legacy implementations + */ + IF( EQ_16( st_fx->element_mode, EVS_MONO ) ) + { + FOR( ; i < L_SHB_LAHEAD + 10; i++ ) + { + temp = i_mult_sat( sub( i, 19 ), 3277 /*0.1f Q15*/ ); /* Q15 */ + L_tmp1 = Mult_32_16( L_shl_sat( 1, sub( 31, exp ) ), temp ); /* Q31-exp */ + temp = sub( 32767 /*1.0f Q15*/, temp ); + Lscale = L_add( Mult_32_16( Lscale, temp ), L_tmp1 ); + L_tmp = Mult_32_16( Lscale, shaped_shb_excitation[i] ); /* Q_bwe_exc + (31-exp) - 15 */ + shaped_shb_excitation[i] = round_fx( L_shl( L_tmp, exp ) ); /* Q_bwe_exc */ + move16(); + } + } + ELSE + { + FOR( ; i < L_SHB_LAHEAD + 10; i++ ) + { + temp = round_fx_sat( L_shl( L_mult( 0x6666 /* 0.1 in Q12 */, shl( sub( i, 19 ), 11 ) ), 1 ) ); + L_tmp1 = Mult_32_16( L_shl_sat( 1, sub( 31, exp ) ), temp ); /* Q31-exp */ + temp = sub( 32767 /*1.0f Q15*/, temp ); + Lscale = L_add( Mult_32_16( Lscale, temp ), L_tmp1 ); + L_tmp = Mult_32_16( Lscale, shaped_shb_excitation[i] ); /* Q_bwe_exc + (31-exp) - 15 */ + shaped_shb_excitation[i] = round_fx( L_shl( L_tmp, exp ) ); /* Q_bwe_exc */ + move16(); + } + } #else + FOR( ; i < L_SHB_LAHEAD + 10; i++ ) + { temp = i_mult_sat( sub( i, 19 ), 3277 /*0.1f Q15*/ ); /* Q15 */ -#endif L_tmp1 = Mult_32_16( L_shl_sat( 1, sub( 31, exp ) ), temp ); /* Q31-exp */ temp = sub( 32767 /*1.0f Q15*/, temp ); Lscale = L_add( Mult_32_16( Lscale, temp ), L_tmp1 ); @@ -2737,6 +2765,7 @@ void swb_tbe_dec_fx( shaped_shb_excitation[i] = round_fx( L_shl( L_tmp, exp ) ); /* Q_bwe_exc */ move16(); } +#endif /* Update SHB excitation */ Copy( shaped_shb_excitation + L_FRAME16k, hBWE_TD->state_syn_shbexc_fx, L_SHB_LAHEAD ); /* Q_bwe_exc */ @@ -6289,13 +6318,41 @@ void ivas_swb_tbe_dec_fx( exp = 0; move16(); } - FOR( ; i < L_SHB_LAHEAD + 10; i++ ) - { #ifdef FIX_ISSUE_1816_IMPROVE_MPY_ZERO_DOT_1_PRECISION - temp_fx = round_fx_sat( L_shl( L_mult( 0x6666 /* 0.1 in Q12 */, shl( sub( i, 19 ), 11 ) ), 1 ) ); + /* + code for EVS and IVAS are basically identical with the exception of i_mult_sat() which has precision issues + thus is was replaced for IVAS and kept for EVS, in order to keep EVS BE to test sequences and legacy implementations + */ + IF( EQ_16( st->element_mode, EVS_MONO ) ) + { + FOR( ; i < L_SHB_LAHEAD + 10; i++ ) + { + temp_fx = i_mult_sat( sub( i, 19 ), 3277 /*0.1f Q15*/ ); /* Q15 */ + L_tmp1 = Mult_32_16( L_shl_sat( 1, sub( 31, exp ) ), temp_fx ); /* Q31-exp */ + temp_fx = sub( 32767 /*1.0f Q15*/, temp_fx ); + Lscale = L_add( Mult_32_16( Lscale, temp_fx ), L_tmp1 ); + L_tmp = Mult_32_16( Lscale, shaped_shb_excitation_fx[i] ); /* Q_bwe_exc + (31-exp) - 15 */ + shaped_shb_excitation_fx[i] = round_fx_sat( L_shl_sat( L_tmp, exp ) ); /* Q_bwe_exc */ + move16(); + } + } + ELSE + { + FOR( ; i < L_SHB_LAHEAD + 10; i++ ) + { + temp_fx = round_fx_sat( L_shl( L_mult( 0x6666 /* 0.1 in Q12 */, shl( sub( i, 19 ), 11 ) ), 1 ) ); + L_tmp1 = Mult_32_16( L_shl_sat( 1, sub( 31, exp ) ), temp_fx ); /* Q31-exp */ + temp_fx = sub( 32767 /*1.0f Q15*/, temp_fx ); + Lscale = L_add( Mult_32_16( Lscale, temp_fx ), L_tmp1 ); + L_tmp = Mult_32_16( Lscale, shaped_shb_excitation_fx[i] ); /* Q_bwe_exc + (31-exp) - 15 */ + shaped_shb_excitation_fx[i] = round_fx_sat( L_shl_sat( L_tmp, exp ) ); /* Q_bwe_exc */ + move16(); + } + } #else - temp_fx = i_mult_sat( sub( i, 19 ), 3277 /*0.1f Q15*/ ); /* Q15 */ -#endif + FOR( ; i < L_SHB_LAHEAD + 10; i++ ) + { + temp_fx = i_mult_sat( sub( i, 19 ), 3277 /*0.1f Q15*/ ); /* Q15 */ L_tmp1 = Mult_32_16( L_shl_sat( 1, sub( 31, exp ) ), temp_fx ); /* Q31-exp */ temp_fx = sub( 32767 /*1.0f Q15*/, temp_fx ); Lscale = L_add( Mult_32_16( Lscale, temp_fx ), L_tmp1 ); @@ -6303,6 +6360,7 @@ void ivas_swb_tbe_dec_fx( shaped_shb_excitation_fx[i] = round_fx_sat( L_shl_sat( L_tmp, exp ) ); /* Q_bwe_exc */ move16(); } +#endif } ELSE { diff --git a/lib_enc/swb_tbe_enc_fx.c b/lib_enc/swb_tbe_enc_fx.c index 611a3625e..e1bbd3e19 100644 --- a/lib_enc/swb_tbe_enc_fx.c +++ b/lib_enc/swb_tbe_enc_fx.c @@ -2535,13 +2535,41 @@ void swb_tbe_enc_fx( exp = 0; move16(); } - FOR( ; i < L_SHB_LAHEAD + 10; i++ ) - { #ifdef FIX_ISSUE_1816_IMPROVE_MPY_ZERO_DOT_1_PRECISION - tmp = round_fx_sat( L_shl( L_mult( 0x6666 /* 0.1 in Q12 */, shl( sub( i, 19 ), 11 ) ), 1 ) ); + /* + code for EVS and IVAS are basically identical with the exception of i_mult_sat() which has precision issues + thus is was replaced for IVAS and kept for EVS, in order to keep EVS BE to test sequences and legacy implementations + */ + IF( EQ_16( st_fx->element_mode, EVS_MONO ) ) + { + FOR( ; i < L_SHB_LAHEAD + 10; i++ ) + { + tmp = i_mult_sat( sub( i, 19 ), 3277 /*0.1f Q15*/ ); /* Q15 */ + L_tmp1 = Mult_32_16( L_shl_o( 1, sub( 31, exp ), &Overflow ), tmp ); /* Q31-exp */ + tmp = sub( 32767 /*1.0f Q15*/, tmp ); + Lscale = L_add( Mult_32_16( Lscale, tmp ), L_tmp1 ); + L_tmp = Mult_32_16( Lscale, shaped_shb_excitation_fx[i] ); /* Q_bwe_exc + (31-exp) - 15 */ + shaped_shb_excitation_fx[i] = round_fx_o( L_shl_o( L_tmp, exp, &Overflow ), &Overflow ); /* Q_bwe_exc */ + move16(); + } + } + ELSE + { + FOR( ; i < L_SHB_LAHEAD + 10; i++ ) + { + tmp = round_fx_sat( L_shl( L_mult( 0x6666 /* 0.1 in Q12 */, shl( sub( i, 19 ), 11 ) ), 1 ) ); + L_tmp1 = Mult_32_16( L_shl_o( 1, sub( 31, exp ), &Overflow ), tmp ); /* Q31-exp */ + tmp = sub( 32767 /*1.0f Q15*/, tmp ); + Lscale = L_add( Mult_32_16( Lscale, tmp ), L_tmp1 ); + L_tmp = Mult_32_16( Lscale, shaped_shb_excitation_fx[i] ); /* Q_bwe_exc + (31-exp) - 15 */ + shaped_shb_excitation_fx[i] = round_fx_o( L_shl_o( L_tmp, exp, &Overflow ), &Overflow ); /* Q_bwe_exc */ + move16(); + } + } #else - tmp = i_mult_o( sub( i, 19 ), 3277 /*0.1f Q15*/, &Overflow ); /* Q15 */ -#endif + FOR( ; i < L_SHB_LAHEAD + 10; i++ ) + { + tmp = i_mult_o( sub( i, 19 ), 3277 /*0.1f Q15*/, &Overflow ); /* Q15 */ L_tmp1 = Mult_32_16( L_shl_o( 1, sub( 31, exp ), &Overflow ), tmp ); /* Q31-exp */ tmp = sub( 32767 /*1.0f Q15*/, tmp ); Lscale = L_add( Mult_32_16( Lscale, tmp ), L_tmp1 ); @@ -2549,6 +2577,7 @@ void swb_tbe_enc_fx( shaped_shb_excitation_fx[i] = round_fx_o( L_shl_o( L_tmp, exp, &Overflow ), &Overflow ); /* Q_bwe_exc */ move16(); } +#endif /* Update SHB excitation */ Copy( shaped_shb_excitation_fx + L_FRAME16k, hBWE_TD->state_syn_shbexc_fx, L_SHB_LAHEAD ); /* Q_bwe_exc */ @@ -3835,13 +3864,41 @@ void swb_tbe_enc_ivas_fx( exp = 0; move16(); } - FOR( ; i < L_SHB_LAHEAD + 10; i++ ) - { #ifdef FIX_ISSUE_1816_IMPROVE_MPY_ZERO_DOT_1_PRECISION - tmp = round_fx_sat( L_shl( L_mult( 0x6666 /* 0.1 in Q12 */, shl( sub( i, 19 ), 11 ) ), 1 ) ); + /* + code for EVS and IVAS are basically identical with the exception of i_mult_sat() which has precision issues + thus is was replaced for IVAS and kept for EVS, in order to keep EVS BE to test sequences and legacy implementations + */ + IF( EQ_16( st_fx->element_mode, EVS_MONO ) ) + { + FOR( ; i < L_SHB_LAHEAD + 10; i++ ) + { + tmp = i_mult_sat( sub( i, 19 ), 3277 /*0.1f Q15*/ ); /* Q15 */ + L_tmp1 = Mult_32_16( L_shl_o( 1, sub( 31, exp ), &Overflow ), tmp ); /* Q31-exp */ + tmp = sub( 32767 /*1.0f Q15*/, tmp ); + L_tmp = L_add( Mult_32_16( Lscale, tmp ), L_tmp1 ); + L_tmp = Mult_32_16( L_tmp, shaped_shb_excitation_fx[i] ); /* Q_bwe_exc + (31-exp) - 15 */ + shaped_shb_excitation_fx[i] = round_fx_o( L_shl_o( L_tmp, exp, &Overflow ), &Overflow ); /* Q_bwe_exc */ + move16(); + } + } + ELSE + { + FOR( ; i < L_SHB_LAHEAD + 10; i++ ) + { + tmp = round_fx_sat( L_shl( L_mult( 0x6666 /* 0.1 in Q12 */, shl( sub( i, 19 ), 11 ) ), 1 ) ); + L_tmp1 = Mult_32_16( L_shl_o( 1, sub( 31, exp ), &Overflow ), tmp ); /* Q31-exp */ + tmp = sub( 32767 /*1.0f Q15*/, tmp ); + L_tmp = L_add( Mult_32_16( Lscale, tmp ), L_tmp1 ); + L_tmp = Mult_32_16( L_tmp, shaped_shb_excitation_fx[i] ); /* Q_bwe_exc + (31-exp) - 15 */ + shaped_shb_excitation_fx[i] = round_fx_o( L_shl_o( L_tmp, exp, &Overflow ), &Overflow ); /* Q_bwe_exc */ + move16(); + } + } #else - tmp = i_mult_o( sub( i, 19 ), 3277 /*0.1f Q15*/, &Overflow ); /* Q15 */ -#endif + FOR( ; i < L_SHB_LAHEAD + 10; i++ ) + { + tmp = i_mult_o( sub( i, 19 ), 3277 /*0.1f Q15*/, &Overflow ); /* Q15 */ L_tmp1 = Mult_32_16( L_shl_o( 1, sub( 31, exp ), &Overflow ), tmp ); /* Q31-exp */ tmp = sub( 32767 /*1.0f Q15*/, tmp ); L_tmp = L_add( Mult_32_16( Lscale, tmp ), L_tmp1 ); @@ -3849,6 +3906,7 @@ void swb_tbe_enc_ivas_fx( shaped_shb_excitation_fx[i] = round_fx_o( L_shl_o( L_tmp, exp, &Overflow ), &Overflow ); /* Q_bwe_exc */ move16(); } +#endif } ELSE { -- GitLab