From 5e62bea20a756142b3b19457b8460a1aa0a8d24e Mon Sep 17 00:00:00 2001 From: Jan Kiene Date: Mon, 27 Oct 2025 11:23:55 +0100 Subject: [PATCH 1/2] replace sinf and cosf with BASOP implementation in DFT stereo encoder this achieves bit-exactness with Apple Clang between -O0 and -O2 --- lib_com/basop32.c | 6 ++++ lib_com/basop32.h | 53 ++++++++++++++++++----------------- lib_com/basop_util.c | 47 +++++++++++++++++++++++++++++++ lib_com/basop_util.h | 3 ++ lib_com/options.h | 1 + lib_enc/ivas_stereo_dft_enc.c | 19 ++++++++++++- 6 files changed, 102 insertions(+), 27 deletions(-) diff --git a/lib_com/basop32.c b/lib_com/basop32.c index 5a5a434846..704b2e21b3 100644 --- a/lib_com/basop32.c +++ b/lib_com/basop32.c @@ -616,6 +616,12 @@ Word16 shl( Word16 var1, Word16 var2 ) return ( var_out ); } + +Word16 shl_sat( Word16 var1, Word16 var2 ) +{ + Flag Overflow; + return shl_o( var1, var2, &Overflow ); +} #endif /* BASOP_NOGLOB */ /*___________________________________________________________________________ diff --git a/lib_com/basop32.h b/lib_com/basop32.h index a8a72aed04..59fdcf2ff4 100644 --- a/lib_com/basop32.h +++ b/lib_com/basop32.h @@ -169,46 +169,46 @@ Word32 L_shr( Word32 L_var1, Word16 var2 ); /* Long shift right, 1 */ Word16 shr_r( Word16 var1, Word16 var2 ); /* Shift right with round, 2 */ #else /* BASOP_NOGLOB */ -Word32 L_add( Word32 L_var1, Word32 L_var2 ); /* Long add, 1 */ -Word32 L_sub( Word32 L_var1, Word32 L_var2 ); /* Long sub, 1 */ -Word32 DEPR_L_add_c( Word32 L_var1, Word32 L_var2, Flag *Carry ); /* Long add with c, 2 */ -Word32 DEPR_L_sub_c( Word32 L_var1, Word32 L_var2, Flag *Carry ); /* Long sub with c, 2 */ -Word32 L_negate( Word32 L_var1 ); /* Long negate, 1 */ -Word16 mult_r( Word16 var1, Word16 var2 ); /* Mult with round, 1 */ -Word32 L_shl( Word32 L_var1, Word16 var2 ); /* Long shift left, 1 */ -Word32 L_shr( Word32 L_var1, Word16 var2 ); /* Long shift right, 1 */ -Word16 shr_r( Word16 var1, Word16 var2 ); /* Shift right with - round, 2 */ +Word32 L_add( Word32 L_var1, Word32 L_var2 ); /* Long add, 1 */ +Word32 L_sub( Word32 L_var1, Word32 L_var2 ); /* Long sub, 1 */ +Word32 DEPR_L_add_c( Word32 L_var1, Word32 L_var2, Flag *Carry ); /* Long add with c, 2 */ +Word32 DEPR_L_sub_c( Word32 L_var1, Word32 L_var2, Flag *Carry ); /* Long sub with c, 2 */ +Word32 L_negate( Word32 L_var1 ); /* Long negate, 1 */ +Word16 mult_r( Word16 var1, Word16 var2 ); /* Mult with round, 1 */ +Word32 L_shl( Word32 L_var1, Word16 var2 ); /* Long shift left, 1 */ +Word32 L_shr( Word32 L_var1, Word16 var2 ); /* Long shift right, 1 */ +Word16 shr_r( Word16 var1, Word16 var2 ); /* Shift right with + round, 2 */ #endif /* BASOP_NOGLOB */ #ifndef BASOP_NOGLOB Word16 mac_r( Word32 L_var3, Word16 var1, Word16 var2 ); /* Mac with rounding, 1 */ #else /* BASOP_NOGLOB */ -Word16 mac_r( Word32 L_var3, Word16 var1, Word16 var2 ); /* Mac with - rounding, 1 */ +Word16 mac_r( Word32 L_var3, Word16 var1, Word16 var2 ); /* Mac with + rounding, 1 */ #endif /* BASOP_NOGLOB */ #ifndef BASOP_NOGLOB Word16 msu_r( Word32 L_var3, Word16 var1, Word16 var2 ); /* Msu with rounding, 1 */ #else /* BASOP_NOGLOB */ -Word16 msu_r( Word32 L_var3, Word16 var1, Word16 var2 ); /* Msu with - rounding, 1 */ +Word16 msu_r( Word32 L_var3, Word16 var1, Word16 var2 ); /* Msu with + rounding, 1 */ #endif /* BASOP_NOGLOB */ #ifndef BASOP_NOGLOB Word32 L_deposit_h( Word16 var1 ); /* 16 bit var1 -> MSB, 1 */ Word32 L_deposit_l( Word16 var1 ); /* 16 bit var1 -> LSB, 1 */ #else /* BASOP_NOGLOB */ -Word32 L_deposit_h( Word16 var1 ); /* 16 bit var1 -> MSB, 1 */ -Word32 L_deposit_l( Word16 var1 ); /* 16 bit var1 -> LSB, 1 */ +Word32 L_deposit_h( Word16 var1 ); /* 16 bit var1 -> MSB, 1 */ +Word32 L_deposit_l( Word16 var1 ); /* 16 bit var1 -> LSB, 1 */ #endif /* BASOP_NOGLOB */ #ifndef BASOP_NOGLOB Word32 L_shr_r( Word32 L_var1, Word16 var2 ); /* Long shift right with round, 3 */ #else /* BASOP_NOGLOB */ -Word32 L_shr_r( Word32 L_var1, Word16 var2 ); /* Long shift right with - round, 3 */ +Word32 L_shr_r( Word32 L_var1, Word16 var2 ); /* Long shift right with + round, 3 */ #endif /* BASOP_NOGLOB */ #ifndef BASOP_NOGLOB Word32 L_abs( Word32 L_var1 ); /* Long abs, 1 */ @@ -217,11 +217,11 @@ Word16 norm_s( Word16 var1 ); /* Short norm, 1 */ Word16 div_s( Word16 var1, Word16 var2 ); /* Short division, 18 */ Word16 norm_l( Word32 L_var1 ); /* Long norm, 1 */ #else /* BASOP_NOGLOB */ -Word32 L_abs( Word32 L_var1 ); /* Long abs, 1 */ -Word32 DEPR_L_sat_co( Word32 L_var1, Flag Overflow, Flag Carry ); /* Long saturation, 4 */ -Word16 norm_s( Word16 var1 ); /* Short norm, 1 */ -Word16 div_s( Word16 var1, Word16 var2 ); /* Short division, 18 */ -Word16 norm_l( Word32 L_var1 ); /* Long norm, 1 */ +Word32 L_abs( Word32 L_var1 ); /* Long abs, 1 */ +Word32 DEPR_L_sat_co( Word32 L_var1, Flag Overflow, Flag Carry ); /* Long saturation, 4 */ +Word16 norm_s( Word16 var1 ); /* Short norm, 1 */ +Word16 div_s( Word16 var1, Word16 var2 ); /* Short division, 18 */ +Word16 norm_l( Word32 L_var1 ); /* Long norm, 1 */ #endif /* BASOP_NOGLOB */ @@ -233,9 +233,9 @@ Word32 L_mls( Word32, Word16 ); /* Weight FFS; currently assigned 5 */ Word16 div_l( Word32, Word16 ); /* Weight FFS; currently assigned 32 */ Word16 i_mult( Word16 a, Word16 b ); /* Weight FFS; currently assigned 3 */ #else /* BASOP_NOGLOB */ -Word32 L_mls( Word32, Word16 ); /* Weight FFS; currently assigned 5 */ -Word16 div_l( Word32, Word16 ); /* Weight FFS; currently assigned 32 */ -Word16 DEPR_i_mult( Word16 a, Word16 b ); /* Weight FFS; currently assigned 3 */ +Word32 L_mls( Word32, Word16 ); /* Weight FFS; currently assigned 5 */ +Word16 div_l( Word32, Word16 ); /* Weight FFS; currently assigned 32 */ +Word16 DEPR_i_mult( Word16 a, Word16 b ); /* Weight FFS; currently assigned 3 */ #endif /* BASOP_NOGLOB */ /* @@ -252,6 +252,7 @@ Word32 L_msu0( Word32 L_v3, Word16 v1, Word16 v2 ); /* 32-bit Msu w/o shift 1 * Word16 add_o( Word16 var1, Word16 var2, Flag *Overflow ); Word16 sub_o( Word16 var1, Word16 var2, Flag *Overflow ); Word16 shl_o( Word16 var1, Word16 var2, Flag *Overflow ); +Word16 shl_sat( Word16 var1, Word16 var2 ); Word16 mult_o( Word16 var1, Word16 var2, Flag *Overflow ); Word32 L_mult_o( Word16 var1, Word16 var2, Flag *Overflow ); Word16 round_fx_o( Word32 L_var1, Flag *Overflow ); diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index 197fe88408..0b4784ef37 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -636,6 +636,53 @@ static Word16 fixp_sin_cos_residual_16( return residual; } +Word16 getCosWord16( Word16 theta ) +{ + Word16 result, residual, sine, cosine; + + residual = fixp_sin_cos_residual_16( theta, 2, &sine, &cosine, 0 ); + /* This negation prevents the subsequent addition from overflow */ + /* The negation cannot overflow, sine is in range [0x0..0x7FFF] */ + sine = negate( sine ); + result = mac_r( L_mult0( sine, residual ), cosine, 16384 ); + + + return result; +} + +#define EVS_PI_BY_2_FX ( Word16 )( 0x3244 ) // Q13 +#define EVS_PI_FX 25736 /* pi in Q13 */ +#define ONE_IN_Q14 16384 + +Word16 getSinWord16( Word16 theta ) +{ + Word16 sine; + Word32 theta_new = L_sub( EVS_PI_BY_2_FX, theta ); + Word16 l_theta; + IF( GT_32( theta_new, EVS_PI_FX ) ) + { + l_theta = extract_l( L_sub( L_sub( theta_new, EVS_PI_FX ), EVS_PI_FX ) ); + } + ELSE IF( LT_32( theta_new, -EVS_PI_FX ) ) + { + l_theta = extract_l( L_add( L_add( theta_new, EVS_PI_FX ), EVS_PI_FX ) ); + } + ELSE + { + l_theta = extract_l( theta_new ); + } + sine = getCosWord16( l_theta ); + IF( EQ_16( sine, ONE_IN_Q14 ) ) + { + sine = MAX_16; + } + ELSE + { + sine = shl( sine, 1 ); + } + return sine; +} + Word16 getCosWord16R2( Word16 theta ) diff --git a/lib_com/basop_util.h b/lib_com/basop_util.h index b76d5982f2..5c23b55861 100644 --- a/lib_com/basop_util.h +++ b/lib_com/basop_util.h @@ -273,6 +273,9 @@ Word16 mult0( Word16 x, /* i : Multiplier */ */ Word16 getCosWord16R2( Word16 theta ); +Word16 getSinWord16( Word16 theta ); +Word16 getCosWord16( Word16 theta ); + /****************************************************************************/ /*! \brief 16/16->16 unsigned integer division diff --git a/lib_com/options.h b/lib_com/options.h index 4c31c1480c..6b90358a0a 100755 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -185,6 +185,7 @@ #define NONBE_1412_AVOID_ROUNDING_AZ_ELEV /* FhG: Avoid rounding when passing azimuth and elevation to efap_determine_gains() */ #define NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG /* FhG: Fix MDCT-Stereo comfort noise for certain noise types */ +#define NONBE_TRIG_FUNC_2_BASOP_IN_DFT_STEREO /* FhG: Fix for non-BE between different optimization levels in dft stereo code by using BASOPs for trigonometric functions */ /* ##################### End NON-BE switches ########################### */ diff --git a/lib_enc/ivas_stereo_dft_enc.c b/lib_enc/ivas_stereo_dft_enc.c index 0ae36724d8..4c29009f0d 100755 --- a/lib_enc/ivas_stereo_dft_enc.c +++ b/lib_enc/ivas_stereo_dft_enc.c @@ -42,6 +42,9 @@ #include "ivas_cnst.h" #include "ivas_rom_com.h" #include "ivas_rom_enc.h" +#ifdef NONBE_TRIG_FUNC_2_BASOP_IN_DFT_STEREO +#include "basop_util.h" +#endif #ifdef DEBUGGING #include "debug.h" #endif @@ -60,7 +63,7 @@ static FILE *pF = NULL; #define STEREO_DFT_NRG_PAST_MAX_BAND 9 #define STEREO_DFT_NRG_PAST_MAX_BAND_LB 4 -#define STEREO_DFT_DMX_CROSSOVER ( int16_t )( 132 * ( (float) ( STEREO_DFT_N_NS_ENC ) / STEREO_DFT_N_NS ) + 0.5f ) /* crossover bin between binwise and bandwise DMX */ +#define STEREO_DFT_DMX_CROSSOVER (int16_t) ( 132 * ( (float) ( STEREO_DFT_N_NS_ENC ) / STEREO_DFT_N_NS ) + 0.5f ) /* crossover bin between binwise and bandwise DMX */ #define ITD_VAD_E_BAND_N_INIT 200000 #define ITD_SID_PREV_FRAMES 5 @@ -1316,12 +1319,26 @@ void stereo_dft_enc_process( if ( hStereoDft->hItd->deltaItd[k_offset] != 0 && hStereoDft->hConfig->dmx_active ) { +#ifdef NONBE_TRIG_FUNC_2_BASOP_IN_DFT_STEREO + Word16 alpha_fx, c1_fx, s1_fx; +#endif /*time shift channels*/ alpha = -2.0f * EVS_PI * hStereoDft->hItd->deltaItd[k_offset] / hStereoDft->NFFT; c = 1.f; /*cos(0)*/ s = 0.f; /*sin(0)*/ +#ifdef NONBE_TRIG_FUNC_2_BASOP_IN_DFT_STEREO + /* Use BASOPs for calculating trigonometric functions to be independent of compiler optimization levels */ + /* convert angle to Q13 */ + alpha_fx = (Word16) ( alpha * 8192 ); + c1_fx = shl_sat( getCosWord16( alpha_fx ), 1 ); // Q15 + s1_fx = getSinWord16( alpha_fx ); // Q15 + + c1 = c1_fx / 32768.f; + s1 = s1_fx / 32768.f; +#else c1 = cosf( alpha ); s1 = sinf( alpha ); +#endif if ( alpha >= 0 ) { -- GitLab From f25607f0c43604d7d85884fa33c92d0199b775fd Mon Sep 17 00:00:00 2001 From: Jan Kiene Date: Mon, 27 Oct 2025 11:36:06 +0100 Subject: [PATCH 2/2] apply clang-format --- lib_com/basop32.h | 52 +++++++++++++++++------------------ lib_enc/ivas_stereo_dft_enc.c | 2 +- 2 files changed, 27 insertions(+), 27 deletions(-) mode change 100755 => 100644 lib_enc/ivas_stereo_dft_enc.c diff --git a/lib_com/basop32.h b/lib_com/basop32.h index 59fdcf2ff4..c72e18e824 100644 --- a/lib_com/basop32.h +++ b/lib_com/basop32.h @@ -169,46 +169,46 @@ Word32 L_shr( Word32 L_var1, Word16 var2 ); /* Long shift right, 1 */ Word16 shr_r( Word16 var1, Word16 var2 ); /* Shift right with round, 2 */ #else /* BASOP_NOGLOB */ -Word32 L_add( Word32 L_var1, Word32 L_var2 ); /* Long add, 1 */ -Word32 L_sub( Word32 L_var1, Word32 L_var2 ); /* Long sub, 1 */ -Word32 DEPR_L_add_c( Word32 L_var1, Word32 L_var2, Flag *Carry ); /* Long add with c, 2 */ -Word32 DEPR_L_sub_c( Word32 L_var1, Word32 L_var2, Flag *Carry ); /* Long sub with c, 2 */ -Word32 L_negate( Word32 L_var1 ); /* Long negate, 1 */ -Word16 mult_r( Word16 var1, Word16 var2 ); /* Mult with round, 1 */ -Word32 L_shl( Word32 L_var1, Word16 var2 ); /* Long shift left, 1 */ -Word32 L_shr( Word32 L_var1, Word16 var2 ); /* Long shift right, 1 */ -Word16 shr_r( Word16 var1, Word16 var2 ); /* Shift right with - round, 2 */ +Word32 L_add( Word32 L_var1, Word32 L_var2 ); /* Long add, 1 */ +Word32 L_sub( Word32 L_var1, Word32 L_var2 ); /* Long sub, 1 */ +Word32 DEPR_L_add_c( Word32 L_var1, Word32 L_var2, Flag *Carry ); /* Long add with c, 2 */ +Word32 DEPR_L_sub_c( Word32 L_var1, Word32 L_var2, Flag *Carry ); /* Long sub with c, 2 */ +Word32 L_negate( Word32 L_var1 ); /* Long negate, 1 */ +Word16 mult_r( Word16 var1, Word16 var2 ); /* Mult with round, 1 */ +Word32 L_shl( Word32 L_var1, Word16 var2 ); /* Long shift left, 1 */ +Word32 L_shr( Word32 L_var1, Word16 var2 ); /* Long shift right, 1 */ +Word16 shr_r( Word16 var1, Word16 var2 ); /* Shift right with + round, 2 */ #endif /* BASOP_NOGLOB */ #ifndef BASOP_NOGLOB Word16 mac_r( Word32 L_var3, Word16 var1, Word16 var2 ); /* Mac with rounding, 1 */ #else /* BASOP_NOGLOB */ -Word16 mac_r( Word32 L_var3, Word16 var1, Word16 var2 ); /* Mac with - rounding, 1 */ +Word16 mac_r( Word32 L_var3, Word16 var1, Word16 var2 ); /* Mac with + rounding, 1 */ #endif /* BASOP_NOGLOB */ #ifndef BASOP_NOGLOB Word16 msu_r( Word32 L_var3, Word16 var1, Word16 var2 ); /* Msu with rounding, 1 */ #else /* BASOP_NOGLOB */ -Word16 msu_r( Word32 L_var3, Word16 var1, Word16 var2 ); /* Msu with - rounding, 1 */ +Word16 msu_r( Word32 L_var3, Word16 var1, Word16 var2 ); /* Msu with + rounding, 1 */ #endif /* BASOP_NOGLOB */ #ifndef BASOP_NOGLOB Word32 L_deposit_h( Word16 var1 ); /* 16 bit var1 -> MSB, 1 */ Word32 L_deposit_l( Word16 var1 ); /* 16 bit var1 -> LSB, 1 */ #else /* BASOP_NOGLOB */ -Word32 L_deposit_h( Word16 var1 ); /* 16 bit var1 -> MSB, 1 */ -Word32 L_deposit_l( Word16 var1 ); /* 16 bit var1 -> LSB, 1 */ +Word32 L_deposit_h( Word16 var1 ); /* 16 bit var1 -> MSB, 1 */ +Word32 L_deposit_l( Word16 var1 ); /* 16 bit var1 -> LSB, 1 */ #endif /* BASOP_NOGLOB */ #ifndef BASOP_NOGLOB Word32 L_shr_r( Word32 L_var1, Word16 var2 ); /* Long shift right with round, 3 */ #else /* BASOP_NOGLOB */ -Word32 L_shr_r( Word32 L_var1, Word16 var2 ); /* Long shift right with - round, 3 */ +Word32 L_shr_r( Word32 L_var1, Word16 var2 ); /* Long shift right with + round, 3 */ #endif /* BASOP_NOGLOB */ #ifndef BASOP_NOGLOB Word32 L_abs( Word32 L_var1 ); /* Long abs, 1 */ @@ -217,11 +217,11 @@ Word16 norm_s( Word16 var1 ); /* Short norm, 1 */ Word16 div_s( Word16 var1, Word16 var2 ); /* Short division, 18 */ Word16 norm_l( Word32 L_var1 ); /* Long norm, 1 */ #else /* BASOP_NOGLOB */ -Word32 L_abs( Word32 L_var1 ); /* Long abs, 1 */ -Word32 DEPR_L_sat_co( Word32 L_var1, Flag Overflow, Flag Carry ); /* Long saturation, 4 */ -Word16 norm_s( Word16 var1 ); /* Short norm, 1 */ -Word16 div_s( Word16 var1, Word16 var2 ); /* Short division, 18 */ -Word16 norm_l( Word32 L_var1 ); /* Long norm, 1 */ +Word32 L_abs( Word32 L_var1 ); /* Long abs, 1 */ +Word32 DEPR_L_sat_co( Word32 L_var1, Flag Overflow, Flag Carry ); /* Long saturation, 4 */ +Word16 norm_s( Word16 var1 ); /* Short norm, 1 */ +Word16 div_s( Word16 var1, Word16 var2 ); /* Short division, 18 */ +Word16 norm_l( Word32 L_var1 ); /* Long norm, 1 */ #endif /* BASOP_NOGLOB */ @@ -233,9 +233,9 @@ Word32 L_mls( Word32, Word16 ); /* Weight FFS; currently assigned 5 */ Word16 div_l( Word32, Word16 ); /* Weight FFS; currently assigned 32 */ Word16 i_mult( Word16 a, Word16 b ); /* Weight FFS; currently assigned 3 */ #else /* BASOP_NOGLOB */ -Word32 L_mls( Word32, Word16 ); /* Weight FFS; currently assigned 5 */ -Word16 div_l( Word32, Word16 ); /* Weight FFS; currently assigned 32 */ -Word16 DEPR_i_mult( Word16 a, Word16 b ); /* Weight FFS; currently assigned 3 */ +Word32 L_mls( Word32, Word16 ); /* Weight FFS; currently assigned 5 */ +Word16 div_l( Word32, Word16 ); /* Weight FFS; currently assigned 32 */ +Word16 DEPR_i_mult( Word16 a, Word16 b ); /* Weight FFS; currently assigned 3 */ #endif /* BASOP_NOGLOB */ /* diff --git a/lib_enc/ivas_stereo_dft_enc.c b/lib_enc/ivas_stereo_dft_enc.c old mode 100755 new mode 100644 index 4c29009f0d..1cbd8b209a --- a/lib_enc/ivas_stereo_dft_enc.c +++ b/lib_enc/ivas_stereo_dft_enc.c @@ -63,7 +63,7 @@ static FILE *pF = NULL; #define STEREO_DFT_NRG_PAST_MAX_BAND 9 #define STEREO_DFT_NRG_PAST_MAX_BAND_LB 4 -#define STEREO_DFT_DMX_CROSSOVER (int16_t) ( 132 * ( (float) ( STEREO_DFT_N_NS_ENC ) / STEREO_DFT_N_NS ) + 0.5f ) /* crossover bin between binwise and bandwise DMX */ +#define STEREO_DFT_DMX_CROSSOVER ( int16_t )( 132 * ( (float) ( STEREO_DFT_N_NS_ENC ) / STEREO_DFT_N_NS ) + 0.5f ) /* crossover bin between binwise and bandwise DMX */ #define ITD_VAD_E_BAND_N_INIT 200000 #define ITD_SID_PREV_FRAMES 5 -- GitLab