diff --git a/lib_com/options.h b/lib_com/options.h index 3f158a8e89e5866193ff8bd205c886265559cfed..99206fa9d7879eebb3d5199f8de46307827f36de 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -90,12 +90,12 @@ #define FIX_1439_SPEEDUP_stereo_icBWE_dec_fx /*FhG: reduces WMOPS - bit-exact*/ #define FIX_1439_SPEEDUP_ivas_swb_tbe_dec_fx /*FhG: reduces WMOPS - bit-exact*/ #define FIX_1439_SPEEDUP_synthesise_fb_high_band_fx /*FhG: reduces WMOPS - bit-exact*/ +#define FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic /*FhG: reduces maintenance complexity & reduces WMOPS & prepares STAGE2 patch*/ +#define FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2 /*FhG: reduces WMOPS*/ #define FIX_1481_HARDCODE_DIV /* FhG: hardcode division results in stereo_dmx_evs_init_encoder_fx() */ #define FIX_1486_IND_SHB_RES /* VA: Fix for issue 1486: align the usage of IND_SHB_RES_GS indices with float code */ #define TEST_HR - - #define REMOVE_EVS_DUPLICATES /* remove core-coder duplicated functions, ACELP low-band decoder */ #endif diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index e4d9b3dab41aee18e74813c80bdc38d23d801c0d..41f68807ed3d6c443660fbfa16a6b5c5e817d62d 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -3264,6 +3264,20 @@ void interp_code_4over2_fx( void wb_tbe_extras_reset_synth_fx( Word16 state_lsyn_filt_shb[], Word16 state_lsyn_filt_dwn_shb[], Word16 state_32and48k_WB_upsample[], Word16 state_resamp_HB[] ); +#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic +void elliptic_bpf_48k_generic_fx( +#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2 + const int16_t element_mode, +#endif + Word16 IsUpsampled3, + Word16 input_fx[], /* i : input signal Q_input_fx*/ + Word16 *Q_input_fx, + Word16 output_fx[], /* o : output signal */ + Word32 memory_fx[][4], /* i/o: 4 arrays of 4 for memory memory_fx_Q */ + Word16 memory_fx_Q[], + const Word16 full_band_bpf[][5] /* i : filter coefficients b0,b1,b2,a0,a1,a2 Q13 */ +); +#else void elliptic_bpf_48k_generic_fx( const Word16 input_fx[], /* i : i signal Q_input_fx */ Word16 *Q_input_fx, @@ -3272,6 +3286,7 @@ void elliptic_bpf_48k_generic_fx( Word16 memory_fx_Q[], const Word16 full_band_bpf[][5] /* i : filter coefficients b0,b1,b2,a0,a1,a2 Q13 */ ); +#endif void synthesise_fb_high_band_fx( const Word16 excitation_in[], /* i : full band excitation */ @@ -3284,7 +3299,12 @@ void synthesise_fb_high_band_fx( Word16 *prev_fbbwe_ratio, /* o : previous frame energy for FEC */ Word32 bpf_memory[][4], /* i/o: memory for elliptic bpf 48k */ Word16 bpf_memory_Q[], - Word16 Qout ); + Word16 Qout +#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2 + , + int16_t element_mode +#endif +); #ifndef REMOVE_EVS_DUPLICATES void prep_tbe_exc_fx( diff --git a/lib_com/swb_tbe_com_fx.c b/lib_com/swb_tbe_com_fx.c index 2c0da33f5071c614d2b739b7d87e6d6d6ef0b774..5f391b271fbb9f8669429a69fbf5aa11e0180141 100644 --- a/lib_com/swb_tbe_com_fx.c +++ b/lib_com/swb_tbe_com_fx.c @@ -6695,7 +6695,15 @@ void wb_tbe_extras_reset_synth_fx( *-------------------------------------------------------------------*/ void elliptic_bpf_48k_generic_fx( +#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2 + int16_t element_mode, +#endif +#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic + Word16 IsUpsampled3, + Word16 input_fx[], /* i : input signal Q_input_fx*/ +#else const Word16 input_fx[], /* i : input signal Q_input_fx*/ +#endif Word16 *Q_input_fx, Word16 output_fx[], /* o : output signal memory_fx_Q */ Word32 memory_fx2[][4], /* i/o: 4 arrays of 4 for memory */ @@ -6704,12 +6712,38 @@ void elliptic_bpf_48k_generic_fx( ) { Word16 i, j; +#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic + Word16 memory_fx0, Q_temp, Q_temp2; + Word32 L_tmp_buffer[L_FRAME48k + 4], L_tmp2_buffer[L_FRAME48k + 4], L_output_buffer[L_FRAME48k + 4], L_tmpX; + Word32 L_tmpMax; +#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2 + Word64 W_tmpX; + Word64 W_tmpY; +#endif + + Word32 *L_tmp = &L_tmp_buffer[4]; + Word32 *L_tmp2 = &L_tmp2_buffer[4]; + Word32 *L_output = &L_output_buffer[4]; +#else Word16 memory_fx0[4][4], memory_fx[4][4], Q_temp, Q_temp2; Word32 L_tmp[L_FRAME48k], L_tmp2[L_FRAME48k], L_output[L_FRAME48k], L_tmpX, memory2_fx[4][4], L_tmpMax; Word32 memory2_fx_2[4], memory2_fx_3[4]; +#endif FOR( i = 0; i < 4; i++ ) { +#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic + memory_fx0 = extract_l( memory_fx2[0][i] ); + input_fx[i - 4] = shl_sat( memory_fx0, sub( *Q_input_fx, memory_fx_Q[0] ) ); + L_tmp[i - 4] = L_shl_sat( memory_fx2[1][i], sub( add( *Q_input_fx, 11 ), memory_fx_Q[1] ) ); + L_tmp2[i - 4] = L_shl_sat( memory_fx2[2][i], sub( add( *Q_input_fx, 6 ), memory_fx_Q[2] ) ); + // memory2_fx[3][i] = L_shl_sat( memory_fx2[3][i], sub( add( *Q_input_fx, 1 ), memory_fx_Q[3] ) ); + move32(); + move32(); + move32(); + move32(); + move32(); +#else memory_fx0[0][i] = extract_l( memory_fx2[0][i] ); memory_fx[0][i] = shl_sat( memory_fx0[0][i], sub( *Q_input_fx, memory_fx_Q[0] ) ); memory2_fx[1][i] = L_shl_sat( memory_fx2[1][i], sub( add( *Q_input_fx, 11 ), memory_fx_Q[1] ) ); @@ -6719,8 +6753,123 @@ void elliptic_bpf_48k_generic_fx( move32(); move32(); move32(); +#endif } +#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic + L_tmpMax = L_add( 0, 0 ); + IF( !IsUpsampled3 ) + { +#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2 + IF( element_mode ) + { + FOR( i = 0; i < L_FRAME48k; i++ ) + { + /*duplicate this into unrolled loopsections in IsUpsampled3-path and dont forget to delete 0-set-input[1,2,4,5,7,8...]*/ + W_tmpX = W_mac_16_16( 0, input_fx[i - 4], full_band_bpf_fx[0][4] ); + W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 3], full_band_bpf_fx[0][3] ); + W_tmpY = W_msu_32_16( 0, L_tmp[i - 1], full_band_bpf_fx[3][1] ); + W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 2], full_band_bpf_fx[0][2] ); + W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 2], full_band_bpf_fx[3][2] ); + W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 1], full_band_bpf_fx[0][1] ); + W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 3], full_band_bpf_fx[3][3] ); + W_tmpX = W_mac_16_16( W_tmpX, input_fx[i], full_band_bpf_fx[0][0] ); + W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 4], full_band_bpf_fx[3][4] ); + L_tmp[i] = W_sat_l( W_shr( W_add( W_tmpX, W_shl( W_tmpY, 2 - 16 + 3 ) ), 3 ) ); /*Q_input_fx + 11*/ + move32(); + } + } + ELSE +#endif /*FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2*/ + { + FOR( i = 0; i < L_FRAME48k; i++ ) + { + L_tmpX = L_shr( L_mult( input_fx[i - 4], full_band_bpf_fx[0][4] ), 3 ); /*Q_input_fx + 13 + 1 - 3*/ + L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 3], full_band_bpf_fx[0][3] ), 3 ), L_tmpX ); /*Q_input_fx + 13 + 1 - 3*/ + L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 1], full_band_bpf_fx[3][1] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ + L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 2], full_band_bpf_fx[0][2] ), 3 ), L_tmpX ); /*Q_input_fx + 13 + 1 - 3*/ + L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 2], full_band_bpf_fx[3][2] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ + L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 1], full_band_bpf_fx[0][1] ), 3 ), L_tmpX ); /*Q_input_fx + 13 + 1 - 3*/ + L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 3], full_band_bpf_fx[3][3] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ + L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i], full_band_bpf_fx[0][0] ), 3 ), L_tmpX ); /*Q_input_fx + 13 + 1 - 3*/ + L_tmp[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ + move32(); + } + } + } /*IsUpsampled3*/ + ELSE + { +#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2 + IF( element_mode ) + { + FOR( i = 0; i < L_FRAME48k; ) + { + W_tmpX = W_mac_16_16( 0, input_fx[i - 3], full_band_bpf_fx[0][3] ); + W_tmpY = W_msu_32_16( 0, L_tmp[i - 1], full_band_bpf_fx[3][1] ); + W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 2], full_band_bpf_fx[3][2] ); + W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 3], full_band_bpf_fx[3][3] ); + W_tmpX = W_mac_16_16( W_tmpX, input_fx[i], full_band_bpf_fx[0][0] ); + W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 4], full_band_bpf_fx[3][4] ); + L_tmp[i] = W_sat_l( W_shr( W_add( W_tmpX, W_shl( W_tmpY, 2 - 16 + 3 ) ), 3 ) ); /*Q_input_fx + 11*/ + move32(); + i++; + + W_tmpX = W_mac_16_16( 0, input_fx[i - 4], full_band_bpf_fx[0][4] ); + W_tmpY = W_msu_32_16( 0, L_tmp[i - 1], full_band_bpf_fx[3][1] ); + W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 2], full_band_bpf_fx[3][2] ); + W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 1], full_band_bpf_fx[0][1] ); + W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 3], full_band_bpf_fx[3][3] ); + W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 4], full_band_bpf_fx[3][4] ); + L_tmp[i] = W_sat_l( W_shr( W_add( W_tmpX, W_shl( W_tmpY, 2 - 16 + 3 ) ), 3 ) ); /*Q_input_fx + 11*/ + move32(); + i++; + + W_tmpY = W_msu_32_16( 0, L_tmp[i - 1], full_band_bpf_fx[3][1] ); + W_tmpX = W_mac_16_16( 0, input_fx[i - 2], full_band_bpf_fx[0][2] ); + W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 2], full_band_bpf_fx[3][2] ); + W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 3], full_band_bpf_fx[3][3] ); + W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 4], full_band_bpf_fx[3][4] ); + L_tmp[i] = W_sat_l( W_shr( W_add( W_tmpX, W_shl( W_tmpY, 2 - 16 + 3 ) ), 3 ) ); /*Q_input_fx + 11*/ + move32(); + i++; + } + } + ELSE +#endif /*FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2*/ + { + FOR( i = 0; i < L_FRAME48k; ) + { + L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 3], full_band_bpf_fx[0][3] ), 3 ), 0 ); /*Q_input_fx + 13 + 1 - 3*/ + L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 1], full_band_bpf_fx[3][1] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ + L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 2], full_band_bpf_fx[3][2] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ + L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 3], full_band_bpf_fx[3][3] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ + L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i], full_band_bpf_fx[0][0] ), 3 ), L_tmpX ); /*Q_input_fx + 13 + 1 - 3*/ + L_tmp[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ + move32(); + i++; + + L_tmpX = L_shr( L_mult( input_fx[i - 4], full_band_bpf_fx[0][4] ), 3 ); /*Q_input_fx + 13 + 1 - 3*/ + L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 1], full_band_bpf_fx[3][1] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ + L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 2], full_band_bpf_fx[3][2] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ + L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 1], full_band_bpf_fx[0][1] ), 3 ), L_tmpX ); /*Q_input_fx + 13 + 1 - 3*/ + L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 3], full_band_bpf_fx[3][3] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ + L_tmp[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ + move32(); + i++; + + L_tmpX = L_sub_sat( 0, L_shl_sat( Mult_32_16( L_tmp[i - 1], full_band_bpf_fx[3][1] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ + L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 2], full_band_bpf_fx[0][2] ), 3 ), L_tmpX ); /*Q_input_fx + 13 + 1 - 3*/ + L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 2], full_band_bpf_fx[3][2] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ + L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 3], full_band_bpf_fx[3][3] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ + L_tmp[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ + move32(); + i++; + } + } + + } /*IsUpsampled3*/ + +#else /*FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic*/ L_tmpX = L_shr( L_mult( memory_fx[0][0], full_band_bpf_fx[0][4] ), 3 ); /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_add( L_shr( L_mult( memory_fx[0][1], full_band_bpf_fx[0][3] ), 3 ), L_tmpX ); /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_add( L_shr( L_mult( memory_fx[0][2], full_band_bpf_fx[0][2] ), 3 ), L_tmpX ); /*Q_input_fx + 13 + 1 - 3*/ @@ -6776,6 +6925,7 @@ void elliptic_bpf_48k_generic_fx( L_tmp[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ move32(); } +#endif /*FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic*/ memory_fx2[0][0] = input_fx[L_FRAME48k - 4]; memory_fx2[0][1] = input_fx[L_FRAME48k - 3]; @@ -6786,6 +6936,47 @@ void elliptic_bpf_48k_generic_fx( move32(); move32(); move32(); + +#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic + L_tmpMax = L_add( 0, 0 ); +#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2 + IF( element_mode ) + { + FOR( i = 0; i < L_FRAME48k; i++ ) + { + W_tmpX = W_mac_32_16( 0, L_tmp[i - 4], full_band_bpf_fx[1][4] ); + W_tmpX = W_mac_32_16( W_tmpX, L_tmp[i - 3], full_band_bpf_fx[1][3] ); + W_tmpY = W_msu_32_16( 0, L_tmp2[i - 1], full_band_bpf_fx[4][1] ); + W_tmpX = W_mac_32_16( W_tmpX, L_tmp[i - 2], full_band_bpf_fx[1][2] ); + W_tmpY = W_msu_32_16( W_tmpY, L_tmp2[i - 2], full_band_bpf_fx[4][2] ); + W_tmpX = W_mac_32_16( W_tmpX, L_tmp[i - 1], full_band_bpf_fx[1][1] ); + W_tmpY = W_msu_32_16( W_tmpY, L_tmp2[i - 3], full_band_bpf_fx[4][3] ); + W_tmpX = W_mac_32_16( W_tmpX, L_tmp[i], full_band_bpf_fx[1][0] ); + W_tmpY = W_msu_32_16( W_tmpY, L_tmp2[i - 4], full_band_bpf_fx[4][4] ); + L_tmp2[i] = W_sat_l( W_shr( W_add( W_tmpX, W_shl( W_tmpY, 2 - 16 + 3 + 16 ) ), 3 + 16 ) ); + move32(); + L_tmpMax = L_max( L_tmpMax, L_abs( L_tmp2[i] ) ); + } + } + ELSE +#endif /*FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2*/ + { + FOR( i = 0; i < L_FRAME48k; i++ ) + { + L_tmpX = L_shr( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[1][4] ), 3 ); /*Q_input_fx + 11 + 13 - 15 -3*/ + L_tmpX = L_add_sat( L_shr( Mult_32_16( L_tmp[i - 3], full_band_bpf_fx[1][3] ), 3 ), L_tmpX ); /*Q_input_fx + 11 + 13 - 15 -3*/ + L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp2[i - 1], full_band_bpf_fx[4][1] ), 2 ) ); /*Q_input_fx + 6 +13 -15 +2 */ + L_tmpX = L_add_sat( L_shr( Mult_32_16( L_tmp[i - 2], full_band_bpf_fx[1][2] ), 3 ), L_tmpX ); /*Q_input_fx + 11 + 13 - 15 -3*/ + L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp2[i - 2], full_band_bpf_fx[4][2] ), 2 ) ); /*Q_input_fx + 6 +13 -15 +2 */ + L_tmpX = L_add_sat( L_shr( Mult_32_16( L_tmp[i - 1], full_band_bpf_fx[1][1] ), 3 ), L_tmpX ); /*Q_input_fx + 11 + 13 - 15 -3*/ + L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp2[i - 3], full_band_bpf_fx[4][3] ), 2 ) ); /*Q_input_fx + 6 +13 -15 +2 */ + L_tmpX = L_add_sat( L_shr( Mult_32_16( L_tmp[i], full_band_bpf_fx[1][0] ), 3 ), L_tmpX ); /*Q_input_fx + 11 + 13 - 15 -3*/ + L_tmp2[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp2[i - 4], full_band_bpf_fx[4][4] ), 2 ) ); /*Q_input_fx + 6 +13 -15 +2 */ + move32(); + L_tmpMax = L_max( L_tmpMax, L_abs( L_tmp2[i] ) ); + } + } +#else /*FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic*/ L_tmpX = L_shr( Mult_32_16( memory2_fx[1][0], full_band_bpf_fx[1][4] ), 3 ); /*Q_input_fx + 11 + 13 - 15 -3*/ L_tmpX = L_add( L_shr( Mult_32_16( memory2_fx[1][1], full_band_bpf_fx[1][3] ), 3 ), L_tmpX ); /*Q_input_fx + 11 + 13 - 15 -3*/ L_tmpX = L_add( L_shr( Mult_32_16( memory2_fx[1][2], full_band_bpf_fx[1][2] ), 3 ), L_tmpX ); /*Q_input_fx + 11 + 13 - 15 -3*/ @@ -6844,6 +7035,8 @@ void elliptic_bpf_48k_generic_fx( move32(); L_tmpMax = L_max( L_tmpMax, L_abs( L_tmp2[i] ) ); } +#endif + Q_temp = norm_l( L_tmpMax ); Q_temp = sub( Q_temp, 4 ); @@ -6858,6 +7051,16 @@ void elliptic_bpf_48k_generic_fx( move32(); move32(); move32(); +#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic + FOR( j = 0; j < 4; j++ ) + { + L_tmp2[j - 4] = L_shl_sat( memory_fx2[2][j], sub( add( add( *Q_input_fx, 6 ), Q_temp ), memory_fx_Q[2] ) ); + L_output[j - 4] = L_shl_sat( memory_fx2[3][j], sub( add( add( *Q_input_fx, 1 ), Q_temp ), memory_fx_Q[3] ) ); + move32(); + move32(); + move32(); + } +#else FOR( j = 0; j < 4; j++ ) { memory2_fx_2[j] = L_shl_sat( memory_fx2[2][j], sub( add( add( *Q_input_fx, 6 ), Q_temp ), memory_fx_Q[2] ) ); @@ -6866,6 +7069,51 @@ void elliptic_bpf_48k_generic_fx( move32(); move32(); } +#endif +#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic + L_tmpMax = L_add( 0, 0 ); +#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2 + IF( element_mode ) + { + FOR( i = 0; i < L_FRAME48k; i++ ) + { + W_tmpX = W_mac_32_16( 0, L_tmp2[i - 4], full_band_bpf_fx[2][4] ); + W_tmpX = W_mac_32_16( W_tmpX, L_tmp2[i - 3], full_band_bpf_fx[2][3] ); + W_tmpY = W_msu_32_16( 0, L_output[i - 1], full_band_bpf_fx[5][1] ); + W_tmpX = W_mac_32_16( W_tmpX, L_tmp2[i - 2], full_band_bpf_fx[2][2] ); + W_tmpY = W_msu_32_16( W_tmpY, L_output[i - 2], full_band_bpf_fx[5][2] ); + W_tmpX = W_mac_32_16( W_tmpX, L_tmp2[i - 1], full_band_bpf_fx[2][1] ); + W_tmpY = W_msu_32_16( W_tmpY, L_output[i - 3], full_band_bpf_fx[5][3] ); + W_tmpX = W_mac_32_16( W_tmpX, L_tmp2[i], full_band_bpf_fx[2][0] ); + W_tmpY = W_msu_32_16( W_tmpY, L_output[i - 4], full_band_bpf_fx[5][4] ); + L_output[i] = W_sat_l( W_shr( W_add( W_tmpX, W_shl( W_tmpY, 2 - 16 + 3 + 16 ) ), 3 + 16 ) ); + move32(); + L_tmpMax = L_max( L_tmpMax, L_abs( L_output[i] ) ); + } + } + ELSE +#endif /*FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2*/ + { + FOR( i = 0; i < L_FRAME48k; i++ ) + { + L_tmpX = L_shr( Mult_32_16( L_tmp2[i - 4], full_band_bpf_fx[2][4] ), 3 ); /*Q_input_fx + 6 +Q_temp+13 -15 -3 */ + L_tmpX = L_add_sat( L_shr( Mult_32_16( L_tmp2[i - 3], full_band_bpf_fx[2][3] ), 3 ), L_tmpX ); /*Q_input_fx + 6 +Q_temp +13 -15 -3*/ + L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_output[i - 1], full_band_bpf_fx[5][1] ), 2 ) ); /*Q_input_fx + 1 +Q_temp+13 -15 + 2*/ + + L_tmpX = L_add_sat( L_shr( Mult_32_16( L_tmp2[i - 2], full_band_bpf_fx[2][2] ), 3 ), L_tmpX ); /*Q_input_fx + 6 +Q_temp+13 -15 -3*/ + L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_output[i - 2], full_band_bpf_fx[5][2] ), 2 ) ); /*Q_input_fx + 1 +Q_temp+13 -15 + 2*/ + + L_tmpX = L_add_sat( L_shr( Mult_32_16( L_tmp2[i - 1], full_band_bpf_fx[2][1] ), 3 ), L_tmpX ); /*Q_input_fx + 6 +Q_temp +13 -15 -3*/ + L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_output[i - 3], full_band_bpf_fx[5][3] ), 2 ) ); /*Q_input_fx + 1 +Q_temp+13 -15 + 2*/ + + L_tmpX = L_add_sat( L_shr( Mult_32_16( L_tmp2[i], full_band_bpf_fx[2][0] ), 3 ), L_tmpX ); /*Q_input_fx + 6 +Q_temp +13 -15 -3*/ + L_output[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_output[i - 4], full_band_bpf_fx[5][4] ), 2 ) ); /*Q_input_fx + 1 +Q_temp+13 -15 + 2*/ + move32(); + L_tmpMax = L_max( L_tmpMax, L_abs( L_output[i] ) ); + } + } + +#else /*FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic*/ L_tmpX = L_shr( Mult_32_16( memory2_fx_2[0], full_band_bpf_fx[2][4] ), 3 ); /* *Q_input_fx+6 +Q_temp +13 -15 -3 */ L_tmpX = L_add_sat( L_shr( Mult_32_16( memory2_fx_2[1], full_band_bpf_fx[2][3] ), 3 ), L_tmpX ); /*Q_input_fx + 6 +Q_temp+13 -15 -3*/ L_tmpX = L_add_sat( L_shr( Mult_32_16( memory2_fx_2[2], full_band_bpf_fx[2][2] ), 3 ), L_tmpX ); /*Q_input_fx + 6 +Q_temp+13 -15 -3*/ @@ -6931,6 +7179,8 @@ void elliptic_bpf_48k_generic_fx( move32(); L_tmpMax = L_max( L_tmpMax, L_abs( L_output[i] ) ); } +#endif + memory_fx2[2][0] = L_tmp2[L_FRAME48k - 4]; memory_fx2[2][1] = L_tmp2[L_FRAME48k - 3]; memory_fx2[2][2] = L_tmp2[L_FRAME48k - 2]; @@ -6994,10 +7244,20 @@ void synthesise_fb_high_band_fx( Word16 *prev_fbbwe_ratio, /* o : previous frame energy for FEC */ Word32 bpf_memory[][4], /* i/o: memory for elliptic bpf 48k */ Word16 bpf_memory_Q[], - Word16 Qout ) + Word16 Qout +#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2 + , + int16_t element_mode +#endif +) { Word16 i, j; +#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic + Word16 excitation_in_interp3_buffer[L_FRAME48k + 4]; + Word16 *excitation_in_interp3 = &excitation_in_interp3_buffer[0] + 4; +#else Word16 excitation_in_interp3[L_FRAME48k]; +#endif Word16 tmp[L_FRAME48k]; Word32 temp1; Word32 ratio2; @@ -7022,12 +7282,32 @@ void synthesise_fb_high_band_fx( IF( EQ_16( L_frame, L_FRAME16k ) ) { /* for 16kHz ACELP core */ +#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic + elliptic_bpf_48k_generic_fx( +#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2 + element_mode, +#endif + 1, // IsUpsampled3 + excitation_in_interp3, &exp_tmp, tmp, bpf_memory, bpf_memory_Q, full_band_bpf_3_fx + + ); +#else elliptic_bpf_48k_generic_fx( excitation_in_interp3, &exp_tmp, tmp, bpf_memory, bpf_memory_Q, full_band_bpf_3_fx ); +#endif } ELSE { /* for 12.8kHz ACELP core */ +#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic + elliptic_bpf_48k_generic_fx( +#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2 + element_mode, +#endif + 1, // IsUpsampled3 + excitation_in_interp3, &exp_tmp, tmp, bpf_memory, bpf_memory_Q, full_band_bpf_1_fx ); +#else elliptic_bpf_48k_generic_fx( excitation_in_interp3, &exp_tmp, tmp, bpf_memory, bpf_memory_Q, full_band_bpf_1_fx ); +#endif } temp1 = sum2_fx_mod( tmp, L_FRAME48k ); diff --git a/lib_dec/swb_tbe_dec_fx.c b/lib_dec/swb_tbe_dec_fx.c index 00be6455dc513fb50178f811b26fd32e5ba3e57b..890e0e5c894ce3e9647abed4db0c47793360f597 100644 --- a/lib_dec/swb_tbe_dec_fx.c +++ b/lib_dec/swb_tbe_dec_fx.c @@ -4644,7 +4644,12 @@ void fb_tbe_dec_fx( fb_exc_energy = sum2_fx_mod( fb_exc, L_FRAME16k ); /* FB TBE synthesis */ - synthesise_fb_high_band_fx( fb_exc, Q_fb_exc, fb_synth, fb_exc_energy, ratio, st->L_frame, st->bfi, &( hBWE_TD->prev_fbbwe_ratio_fx ), hBWE_TD->fbbwe_hpf_mem_fx, hBWE_TD->fbbwe_hpf_mem_fx_Q, hb_synth_exp ); + synthesise_fb_high_band_fx( fb_exc, Q_fb_exc, fb_synth, fb_exc_energy, ratio, st->L_frame, st->bfi, &( hBWE_TD->prev_fbbwe_ratio_fx ), hBWE_TD->fbbwe_hpf_mem_fx, hBWE_TD->fbbwe_hpf_mem_fx_Q, hb_synth_exp +#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2 + , + st->element_mode +#endif /*FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2*/ + ); /* add the fb_synth component to the hb_synth component */ /* v_add_fx( hb_synth, fb_synth, hb_synth, L_FRAME48k );*/ @@ -4713,7 +4718,12 @@ void fb_tbe_dec_ivas_fx( fb_exc_energy = sum2_fx_mod( fb_exc, L_FRAME16k ); /* FB TBE synthesis */ - synthesise_fb_high_band_fx( fb_exc, Q_fb_exc, fb_synth, fb_exc_energy, ratio, st->L_frame, st->bfi, &( hBWE_TD->prev_fbbwe_ratio_fx ), hBWE_TD->fbbwe_hpf_mem_fx, hBWE_TD->fbbwe_hpf_mem_fx_Q, hb_synth_exp ); + synthesise_fb_high_band_fx( fb_exc, Q_fb_exc, fb_synth, fb_exc_energy, ratio, st->L_frame, st->bfi, &( hBWE_TD->prev_fbbwe_ratio_fx ), hBWE_TD->fbbwe_hpf_mem_fx, hBWE_TD->fbbwe_hpf_mem_fx_Q, hb_synth_exp +#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2 + , + st->element_mode +#endif /*FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2*/ + ); test(); IF( GE_16( st->element_mode, IVAS_CPE_DFT ) && ( st->idchan == 0 ) ) diff --git a/lib_enc/swb_tbe_enc_fx.c b/lib_enc/swb_tbe_enc_fx.c index acfc5043cec79f665bf1d4c83dfb290258480398..903f3a46c7f62292b1922a6ff7c7a51398098549 100644 --- a/lib_enc/swb_tbe_enc_fx.c +++ b/lib_enc/swb_tbe_enc_fx.c @@ -7307,7 +7307,12 @@ void fb_tbe_enc_fx( Word16 ratio; Word16 tmp_vec[L_FRAME48k]; Word16 idxGain; +#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic + Word16 input_fhb_buffer[L_FRAME48k + 4]; + Word16 *input_fhb = &input_fhb_buffer[0] + 4; +#else Word16 input_fhb[L_FRAME48k]; +#endif Word16 Sample_Delay_HP; Word32 fb_exc_energy, temp2; Word32 L_tmp; @@ -7333,7 +7338,16 @@ void fb_tbe_enc_fx( Copy_Scale_sig( new_input, input_fhb, L_FRAME48k, exp_temp ); +#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic + elliptic_bpf_48k_generic_fx( +#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2 + st->element_mode, +#endif /*FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2*/ + 0, // IsUpsampled3 + input_fhb, &exp_temp, tmp_vec, hBWE_TD->elliptic_bpf_2_48k_mem_fx, hBWE_TD->elliptic_bpf_2_48k_mem_fx_Q, full_band_bpf_2_fx ); +#else elliptic_bpf_48k_generic_fx( input_fhb, &exp_temp, tmp_vec, hBWE_TD->elliptic_bpf_2_48k_mem_fx, hBWE_TD->elliptic_bpf_2_48k_mem_fx_Q, full_band_bpf_2_fx ); +#endif Sample_Delay_HP = NS2SA( 48000, ACELP_LOOK_NS + DELAY_FD_BWE_ENC_12k8_NS + DELAY_FIR_RESAMPL_NS ) - L_FRAME48k / 2; IF( NE_16( st->last_extl, FB_TBE ) ) @@ -7431,7 +7445,12 @@ void fb_tbe_enc_ivas_fx( Word16 ratio; Word16 tmp_vec[L_FRAME48k]; Word16 idxGain; +#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic + Word16 input_fhb_new_buffer[L_FRAME48k + NS2SA( 48000, DELAY_FIR_RESAMPL_NS ) + 4]; + Word16 *input_fhb_new = &input_fhb_new_buffer[0] + 4; +#else Word16 input_fhb_new[L_FRAME48k + NS2SA( 48000, DELAY_FIR_RESAMPL_NS )]; +#endif Word16 input_fhb[L_FRAME48k]; Word16 Sample_Delay_HP; Word64 fb_exc_energy; @@ -7458,11 +7477,29 @@ void fb_tbe_enc_ivas_fx( exp_temp = add( exp_temp, Q_new_input ); IF( EQ_16( st->element_mode, IVAS_CPE_DFT ) ) { +#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic + elliptic_bpf_48k_generic_fx( +#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2 + st->element_mode, +#endif /*FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2*/ + 0, // IsUpsampled3 + input_fhb_new, &exp_temp, tmp_vec, hBWE_TD->elliptic_bpf_2_48k_mem_fx, hBWE_TD->elliptic_bpf_2_48k_mem_fx_Q, full_band_bpf_2_fx ); +#else elliptic_bpf_48k_generic_fx( input_fhb_new, &exp_temp, tmp_vec, hBWE_TD->elliptic_bpf_2_48k_mem_fx, hBWE_TD->elliptic_bpf_2_48k_mem_fx_Q, full_band_bpf_2_fx ); +#endif } ELSE { +#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic + elliptic_bpf_48k_generic_fx( +#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2 + st->element_mode, +#endif /*FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2*/ + 0, // IsUpsampled3 + input_fhb_new + NS2SA( 48000, DELAY_FIR_RESAMPL_NS ), &exp_temp, tmp_vec, hBWE_TD->elliptic_bpf_2_48k_mem_fx, hBWE_TD->elliptic_bpf_2_48k_mem_fx_Q, full_band_bpf_2_fx ); +#else elliptic_bpf_48k_generic_fx( input_fhb_new + NS2SA( 48000, DELAY_FIR_RESAMPL_NS ), &exp_temp, tmp_vec, hBWE_TD->elliptic_bpf_2_48k_mem_fx, hBWE_TD->elliptic_bpf_2_48k_mem_fx_Q, full_band_bpf_2_fx ); +#endif } test();