diff --git a/lib_com/options.h b/lib_com/options.h index c228d4cc6d6fe121615ccd7bedac1e62d7fef302..daa609ed4784d0aec8826dadec02ddef1bf6fcd9 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -69,8 +69,8 @@ #define FIX_867_CLDFB_NRG_SCALE -#define FIX_1378_ACELP_OUT_OF_BOUNDS - +#define FIX_1378_ACELP_OUT_OF_BOUNDS + #define FIX_1379_MASA_ANGLE_ROUND /* Note: each compile switch (FIX_1101_...) is independent from the other ones */ @@ -86,10 +86,15 @@ //#define HARM_SCE_INIT #define DIV32_OPT_NEWTON /* FhG: faster 32 by 32 bit division */ #define MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE /* FhG: reduce WMOPS of Cy calculation in ivas_param_mc_param_est_enc_fx() by using 64 Bit addition. Obsoletes IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE. */ +#define FIX_1439_SPEEDUP_Copy_Scale_sig_16_32_no_sat /*FhG: reduces WMOPS - bit-exact*/ +#define FIX_1439_SPEEDUP_stereo_icBWE_dec_fx /*FhG: reduces WMOPS - bit-exact*/ +#define FIX_1439_SPEEDUP_ivas_swb_tbe_dec_fx /*FhG: reduces WMOPS - bit-exact*/ +#define FIX_1439_SPEEDUP_synthesise_fb_high_band_fx /*FhG: reduces WMOPS - bit-exact*/ #define FIX_1481_HARDCODE_DIV /* FhG: hardcode division results in stereo_dmx_evs_init_encoder_fx() */ #define TEST_HR #define REMOVE_EVS_DUPLICATES /* remove core-coder duplicated functions, ACELP low-band decoder */ + #endif diff --git a/lib_com/swb_tbe_com_fx.c b/lib_com/swb_tbe_com_fx.c index 869d73733629d32357fc04346799cfaf2a981112..2c0da33f5071c614d2b739b7d87e6d6d6ef0b774 100644 --- a/lib_com/swb_tbe_com_fx.c +++ b/lib_com/swb_tbe_com_fx.c @@ -6714,7 +6714,6 @@ void elliptic_bpf_48k_generic_fx( memory_fx[0][i] = shl_sat( memory_fx0[0][i], sub( *Q_input_fx, memory_fx_Q[0] ) ); memory2_fx[1][i] = L_shl_sat( memory_fx2[1][i], sub( add( *Q_input_fx, 11 ), memory_fx_Q[1] ) ); memory2_fx[2][i] = L_shl_sat( memory_fx2[2][i], sub( add( *Q_input_fx, 6 ), memory_fx_Q[2] ) ); - memory2_fx[3][i] = L_shl_sat( memory_fx2[3][i], sub( add( *Q_input_fx, 1 ), memory_fx_Q[3] ) ); move32(); move32(); move32(); @@ -7074,6 +7073,50 @@ void synthesise_fb_high_band_fx( tmp3 = add( sub( Qout, add( sub( 1, exp ), exp_tmp ) ), 16 ); /*Qout - (1 -exp +exp_tmp) + 16 */ FOR( i = 0; i < L_FRAME48k; i++ ) { +#ifdef FIX_1439_SPEEDUP_synthesise_fb_high_band_fx + L_tmp = Mult_32_16( ratio2, tmp[i] ); /* Q(16-exp+exp_tmp-15 = 1-exp+exp_tmp) */ + Word32 L_tmp32; + Word16 tmp16; + + // if (L_tmp < 0) + if ( L_tmp < 0 ) + { + L_tmp32 = L_negate( L_tmp ); + } + if ( L_tmp < 0 ) + { + L_tmp32 = L_shl_sat( L_tmp32, tmp3 ); + } + if ( L_tmp < 0 ) + { + tmp16 = extract_h( L_tmp32 ); + } + if ( L_tmp < 0 ) + { + tmp16 = negate( tmp16 ); + } + + // if (L_tmp == 0) + if ( L_tmp == 0 ) + { + tmp16 = 0; + move16(); + } + + // if (L_tmp > 0) + if ( L_tmp > 0 ) + { + L_tmp32 = L_shl_sat( L_tmp, tmp3 ); + } + if ( L_tmp > 0 ) + { + tmp16 = extract_h( L_tmp32 ); + } + + output[i] = tmp16; + move16(); + +#else L_tmp = Mult_32_16( ratio2, tmp[i] ); /* Q(16-exp+exp_tmp-15 = 1-exp+exp_tmp) */ IF( L_tmp < 0 ) { @@ -7085,6 +7128,7 @@ void synthesise_fb_high_band_fx( output[i] = extract_h( L_shl_sat( L_tmp, tmp3 ) ); /*Qout*/ move16(); } +#endif } return; } diff --git a/lib_com/tools_fx.c b/lib_com/tools_fx.c index 5f0e172bf5059905e04bbb9a205c91e4d22fdf5f..6dc1c45f29bea357c3529bd90ec7b2cace0924be 100644 --- a/lib_com/tools_fx.c +++ b/lib_com/tools_fx.c @@ -794,13 +794,37 @@ void Copy_Scale_sig_16_32_no_sat( } return; } +#ifdef FIX_1439_SPEEDUP_Copy_Scale_sig_16_32_no_sat + L_tmp = L_shl_o( 1, exp0 - 1, &Overflow ); + + IF( L_tmp >= 0x7FFF ) + { + FOR( i = 0; i < lg; i++ ) + { + // y[i] = L_mult0(x[i], L_tmp); + y[i] = W_extract_l( W_mult_32_16( L_tmp, x[i] ) ); + move32(); /* Overflow can occur here */ + } + return; + } + // ELSE + { + Word16 tmp = extract_l( L_tmp ); + FOR( i = 0; i < lg; i++ ) + { + y[i] = L_mult( x[i], tmp ); + move32(); + } + } +#else L_tmp = L_shl_o( 1, exp0 - 1, &Overflow ); FOR( i = 0; i < lg; i++ ) { // y[i] = L_mult0(x[i], L_tmp); y[i] = W_extract_l( W_mult_32_16( L_tmp, x[i] ) ); - move32(); /* saturation can occur here */ + move32(); /* Overflow can occur here */ } +#endif } void Copy_Scale_sig_32_16( diff --git a/lib_dec/ivas_stereo_icbwe_dec_fx.c b/lib_dec/ivas_stereo_icbwe_dec_fx.c index c7bc98566326d456bc85283242ad1137e28fb4c6..772eb2d7d218c7061e3fd9208ac65f818c0ad218 100644 --- a/lib_dec/ivas_stereo_icbwe_dec_fx.c +++ b/lib_dec/ivas_stereo_icbwe_dec_fx.c @@ -904,6 +904,9 @@ void stereo_icBWE_dec_fx( winSlope_fx = div_s( 1, winLen_fx ); /* Q15 */ alpha_fx = winSlope_fx; /* Q15 */ move16(); +#ifdef FIX_1439_SPEEDUP_stereo_icBWE_dec_fx + Word16 winSlope_fx_ = sub( 32767 /* 1.0 in Q15*/, winSlope_fx ); +#endif FOR( i = 0; i < winLen_fx; i++ ) { L_tmp = L_mult0( alpha_fx, icbweM2Ref_fx ); /* Q29 */ @@ -911,10 +914,17 @@ void stereo_icBWE_dec_fx( tmp = shl( round_fx( L_tmp ), 1 ); /* Q14 */ synthRef_fx[i] = Mpy_32_16_1( synthRef_fx[i], tmp ); /* Qsyn - 1 */ move32(); +#ifdef FIX_1439_SPEEDUP_stereo_icBWE_dec_fx + if ( LE_16( alpha_fx, winSlope_fx_ ) ) + { + alpha_fx = add( alpha_fx, winSlope_fx ); /* Q15 */ + } +#else IF( LE_16( alpha_fx, sub( 32767 /* 1.0 in Q15*/, winSlope_fx ) ) ) { alpha_fx = add( alpha_fx, winSlope_fx ); /* Q15 */ } +#endif } FOR( ; i < NS2SA_FX2( st->output_Fs, FRAME_SIZE_NS ); i++ ) diff --git a/lib_dec/swb_tbe_dec_fx.c b/lib_dec/swb_tbe_dec_fx.c index 1b51a29fb642cfffd08c7a39517f1f2d7224b226..00be6455dc513fb50178f811b26fd32e5ba3e57b 100644 --- a/lib_dec/swb_tbe_dec_fx.c +++ b/lib_dec/swb_tbe_dec_fx.c @@ -7022,14 +7022,24 @@ void ivas_swb_tbe_dec_fx( tmp1 = 0; move16(); + +#ifdef FIX_1439_SPEEDUP_ivas_swb_tbe_dec_fx + Word32 idx32 = L_shr_r( 0x00333333, 10 ); /*NUM_SHB_SUBFR/L_FRAME16k*/ // Q16 +#endif + FOR( i = 0; i < L_FRAME16k; i++ ) { +#ifndef FIX_1439_SPEEDUP_ivas_swb_tbe_dec_fx Word16 idx = 0; move16(); IF( i != 0 ) { idx = idiv1616( i_mult( NUM_SHB_SUBFR, i ), L_FRAME16k ); } +#else + Word16 idx; + idx = extract_h( imult3216( idx32, i ) ); /*Q0*/ +#endif L_tmp1 = Mult_32_16( L_tmp, GainShape_fx[idx] ); /* Q : 18 + tmp +15 -15*/ White_exc16k_fx[i] = round_fx( Mult_32_16( L_tmp1, White_exc16k_fx[i] ) ); /* 18 + tmp +*Q_white_exc -15 -16 */ move16();