Loading lib_com/options.h +1 −6 Original line number Diff line number Diff line Loading @@ -81,12 +81,7 @@ #define HARM_ENC_INIT //#define HARM_SCE_INIT // new speedups - BE ones already in another branch //#define FIX_1439_SPEEDUP_Copy_Scale_sig_16_32_no_sat /*FhG: reduces WMOPS - bit-exact*/ // | //#define FIX_1439_SPEEDUP_stereo_icBWE_dec_fx /*FhG: reduces WMOPS - bit-exact*/ // | 2.4 WMOPS //#define FIX_1439_SPEEDUP_ivas_swb_tbe_dec_fx /*FhG: reduces WMOPS - bit-exact*/ // | //#define FIX_1439_SPEEDUP_elliptic_bpf_48k_generic_upsampledsig /*FhG: reduces WMOPS - bit-exact*/ // | //#define FIX_1439_SPEEDUP_synthesise_fb_high_band_fx // | 0.4 WMOPS - BE? Need for a manual test as long as be tests dont work #define FIX_1439_SPEEDUP_elliptic_bpf_48k_generic_STAGE1 // nonbe // | #define FIX_1439_SPEEDUP_elliptic_bpf_48k_generic_STAGE2 // nonbe // | 3.1 WMOPS, pipe testing https://forge.3gpp.org/rep/sa4/audio/ivas-basop/-/pipelines/50562 Loading lib_com/swb_tbe_com_fx.c +0 −195 Original line number Diff line number Diff line Loading @@ -6767,89 +6767,6 @@ void elliptic_bpf_48k_generic_fx( L_tmp[3] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( memory2_fx[1][3], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ move32(); #ifdef FIX_1439_SPEEDUP_elliptic_bpf_48k_generic_STAGE1 #ifdef FIX_1439_SPEEDUP_elliptic_bpf_48k_generic_upsampledsig test(); IF( full_band_bpf_fx == full_band_bpf_3_fx || full_band_bpf_fx == full_band_bpf_1_fx ) { Word64 W_tmpX; Word64 W_tmpY; i = 4; W_tmpX = W_mac_16_16( 0, input_fx[i - 4], full_band_bpf_fx[0][4] ); // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 3], full_band_bpf_fx[0][3] ); W_tmpY = W_msu_32_16( 0, L_tmp[i - 1], full_band_bpf_fx[3][1] ); // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 2], full_band_bpf_fx[0][2] ); W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 2], full_band_bpf_fx[3][2] ); W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 1], full_band_bpf_fx[0][1] ); W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 3], full_band_bpf_fx[3][3] ); // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i], full_band_bpf_fx[0][0] ); W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 4], full_band_bpf_fx[3][4] ); //L_tmp[i] = W_sat_l( W_add( W_shr( W_tmpX, 3 ), W_shl( W_tmpY, 2 - 16 ) ) ); /*Q_input_fx + 11*/ L_tmp[i] = W_sat_l( W_shr( W_add( W_tmpX, W_shl( W_tmpY, 2 - 16 + 3 ) ), 3 ) ); /*Q_input_fx + 11*/ move32(); i++; // W_tmpX = W_mac_16_16( 0, input_fx[i - 4], full_band_bpf_fx[0][4] ); // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 3], full_band_bpf_fx[0][3] ); W_tmpY = W_msu_32_16( 0, L_tmp[i - 1], full_band_bpf_fx[3][1] ); W_tmpX = W_mac_16_16( 0, input_fx[i - 2], full_band_bpf_fx[0][2] ); W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 2], full_band_bpf_fx[3][2] ); // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 1], full_band_bpf_fx[0][1] ); W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 3], full_band_bpf_fx[3][3] ); // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i], full_band_bpf_fx[0][0] ); W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 4], full_band_bpf_fx[3][4] ); //L_tmp[i] = W_sat_l( W_add( W_shr( W_tmpX, 3 ), W_shl( W_tmpY, 2 - 16 ) ) ); /*Q_input_fx + 11*/ L_tmp[i] = W_sat_l( W_shr( W_add( W_tmpX, W_shl( W_tmpY, 2 - 16 + 3 ) ), 3 ) ); /*Q_input_fx + 11*/ move32(); i++; FOR( ; i < L_FRAME48k; ) { // W_tmpX = W_mac_16_16( 0, input_fx[i - 4], full_band_bpf_fx[0][4] ); //2 W_tmpX = W_mac_16_16( 0, input_fx[i - 3], full_band_bpf_fx[0][3] ); // 3 W_tmpY = W_msu_32_16( 0, L_tmp[i - 1], full_band_bpf_fx[3][1] ); // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 2], full_band_bpf_fx[0][2] ); //4 W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 2], full_band_bpf_fx[3][2] ); // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 1], full_band_bpf_fx[0][1] ); //5 W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 3], full_band_bpf_fx[3][3] ); W_tmpX = W_mac_16_16( W_tmpX, input_fx[i], full_band_bpf_fx[0][0] ); // 6 W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 4], full_band_bpf_fx[3][4] ); //L_tmp[i] = W_sat_l( W_add( W_shr( W_tmpX, 3 ), W_shl( W_tmpY, 2 - 16 ) ) ); /*Q_input_fx + 11*/ L_tmp[i] = W_sat_l( W_shr( W_add( W_tmpX, W_shl( W_tmpY, 2 - 16 + 3 ) ), 3 ) ); /*Q_input_fx + 11*/ move32(); i++; W_tmpX = W_mac_16_16( 0, input_fx[i - 4], full_band_bpf_fx[0][4] ); // 3 // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 3], full_band_bpf_fx[0][3] ); //4 W_tmpY = W_msu_32_16( 0, L_tmp[i - 1], full_band_bpf_fx[3][1] ); // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 2], full_band_bpf_fx[0][2] ); //5 W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 2], full_band_bpf_fx[3][2] ); W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 1], full_band_bpf_fx[0][1] ); // 6 W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 3], full_band_bpf_fx[3][3] ); // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i], full_band_bpf_fx[0][0] ); //7 W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 4], full_band_bpf_fx[3][4] ); //L_tmp[i] = W_sat_l( W_add( W_shr( W_tmpX, 3 ), W_shl( W_tmpY, 2 - 16 ) ) ); /*Q_input_fx + 11*/ L_tmp[i] = W_sat_l( W_shr( W_add( W_tmpX, W_shl( W_tmpY, 2 - 16 + 3 ) ), 3 ) ); /*Q_input_fx + 11*/ move32(); i++; // W_tmpX = W_mac_16_16( 0, input_fx[i - 4], full_band_bpf_fx[0][4] ); //4 // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 3], full_band_bpf_fx[0][3] ); //5 W_tmpY = W_msu_32_16( 0, L_tmp[i - 1], full_band_bpf_fx[3][1] ); W_tmpX = W_mac_16_16( 0, input_fx[i - 2], full_band_bpf_fx[0][2] ); // 6 W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 2], full_band_bpf_fx[3][2] ); // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 1], full_band_bpf_fx[0][1] ); //7 W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 3], full_band_bpf_fx[3][3] ); // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i], full_band_bpf_fx[0][0] ); //8 W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 4], full_band_bpf_fx[3][4] ); //L_tmp[i] = W_sat_l( W_add( W_shr( W_tmpX, 3 ), W_shl( W_tmpY, 2 - 16 ) ) ); /*Q_input_fx + 11*/ L_tmp[i] = W_sat_l( W_shr( W_add( W_tmpX, W_shl( W_tmpY, 2 - 16 + 3 ) ), 3 ) ); /*Q_input_fx + 11*/ move32(); i++; } } ELSE #endif /*FIX_1439_SPEEDUP_elliptic_bpf_48k_generic_upsampledsig*/ { FOR( i = 4; i < L_FRAME48k; i++ ) { Loading @@ -6871,76 +6788,6 @@ void elliptic_bpf_48k_generic_fx( } #else /*FIX_1439_SPEEDUP_elliptic_bpf_48k_generic_STAGE1*/ #ifdef FIX_1439_SPEEDUP_elliptic_bpf_48k_generic_upsampledsig test(); IF( full_band_bpf_fx == full_band_bpf_3_fx || full_band_bpf_fx == full_band_bpf_1_fx ) { i = 4; L_tmpX = L_shr( L_mult( input_fx[i - 4], full_band_bpf_fx[0][4] ), 3 ); // 0 /*Q_input_fx + 13 + 1 - 3*/ // L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 3], full_band_bpf_fx[0][3] ), 3 ), L_tmpX ); // 1 /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 1], full_band_bpf_fx[3][1] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ // L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 2], full_band_bpf_fx[0][2] ), 3 ), L_tmpX ); // 2 /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 2], full_band_bpf_fx[3][2] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 1], full_band_bpf_fx[0][1] ), 3 ), L_tmpX ); // 3 /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 3], full_band_bpf_fx[3][3] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ // L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i], full_band_bpf_fx[0][0] ), 3 ), L_tmpX ); // 4 /*Q_input_fx + 13 + 1 - 3*/ L_tmp[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ move32(); i++; // L_tmpX = L_shr( L_mult( input_fx[i - 4], full_band_bpf_fx[0][4] ), 3 ); // 1 /*Q_input_fx + 13 + 1 - 3*/ // L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 3], full_band_bpf_fx[0][3] ), 3 ), L_tmpX ); // 2 /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_sub_sat( 0, L_shl_sat( Mult_32_16( L_tmp[i - 1], full_band_bpf_fx[3][1] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 2], full_band_bpf_fx[0][2] ), 3 ), L_tmpX ); // 3 /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 2], full_band_bpf_fx[3][2] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ // L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 1], full_band_bpf_fx[0][1] ), 3 ), L_tmpX ); // 4 /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 3], full_band_bpf_fx[3][3] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ // L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i], full_band_bpf_fx[0][0] ), 3 ), L_tmpX ); // 5 /*Q_input_fx + 13 + 1 - 3*/ L_tmp[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ move32(); i++; FOR( ; i < L_FRAME48k; ) { // L_tmpX = L_shr( L_mult( input_fx[i - 4], full_band_bpf_fx[0][4] ), 3 ); // 2 /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 3], full_band_bpf_fx[0][3] ), 3 ), 0 ); // 3 /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 1], full_band_bpf_fx[3][1] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ // L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 2], full_band_bpf_fx[0][2] ), 3 ), L_tmpX ); // 4 /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 2], full_band_bpf_fx[3][2] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ // L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 1], full_band_bpf_fx[0][1] ), 3 ), L_tmpX ); // 5 /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 3], full_band_bpf_fx[3][3] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i], full_band_bpf_fx[0][0] ), 3 ), L_tmpX ); // 6 /*Q_input_fx + 13 + 1 - 3*/ L_tmp[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ move32(); i++; L_tmpX = L_shr( L_mult( input_fx[i - 4], full_band_bpf_fx[0][4] ), 3 ); // 3 /*Q_input_fx + 13 + 1 - 3*/ // L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 3], full_band_bpf_fx[0][3] ), 3 ), L_tmpX ); // 4 /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 1], full_band_bpf_fx[3][1] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ // L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 2], full_band_bpf_fx[0][2] ), 3 ), L_tmpX ); // 5 /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 2], full_band_bpf_fx[3][2] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 1], full_band_bpf_fx[0][1] ), 3 ), L_tmpX ); // 6 /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 3], full_band_bpf_fx[3][3] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ // L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i], full_band_bpf_fx[0][0] ), 3 ), L_tmpX ); // 7 /*Q_input_fx + 13 + 1 - 3*/ L_tmp[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ move32(); i++; // L_tmpX = L_shr( L_mult( input_fx[i - 4], full_band_bpf_fx[0][4] ), 3 ); // 4 /*Q_input_fx + 13 + 1 - 3*/ // L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 3], full_band_bpf_fx[0][3] ), 3 ), L_tmpX ); // 5 /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_sub_sat( 0, L_shl_sat( Mult_32_16( L_tmp[i - 1], full_band_bpf_fx[3][1] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 2], full_band_bpf_fx[0][2] ), 3 ), L_tmpX ); // 6 /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 2], full_band_bpf_fx[3][2] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ // L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 1], full_band_bpf_fx[0][1] ), 3 ), L_tmpX ); // 7 /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 3], full_band_bpf_fx[3][3] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ // L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i], full_band_bpf_fx[0][0] ), 3 ), L_tmpX ); // 8 /*Q_input_fx + 13 + 1 - 3*/ L_tmp[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ move32(); i++; } } ELSE #endif /*FIX_1439_SPEEDUP_elliptic_bpf_48k_generic_upsampledsig*/ { FOR( i = 4; i < L_FRAME48k; i++ ) { Loading Loading @@ -7307,47 +7154,6 @@ void synthesise_fb_high_band_fx( tmp3 = add( sub( Qout, add( sub( 1, exp ), exp_tmp ) ), 16 ); /*Qout - (1 -exp +exp_tmp) + 16 */ FOR( i = 0; i < L_FRAME48k; i++ ) { #ifdef FIX_1439_SPEEDUP_synthesise_fb_high_band_fx L_tmp = Mult_32_16( ratio2, tmp[i] ); /* Q(16-exp+exp_tmp-15 = 1-exp+exp_tmp) */ Word32 L_tmp32; Word16 tmp16; if ( L_tmp < 0 ) { L_tmp32 = L_negate( L_tmp ); } if ( L_tmp < 0 ) { L_tmp32 = L_shl_sat( L_tmp32, tmp3 ); } if ( L_tmp < 0 ) { tmp16 = extract_h( L_tmp32 ); } if ( L_tmp < 0 ) { tmp16 = negate( tmp16 ); } if ( L_tmp == 0 ) { tmp16 = 0; move16(); } if ( L_tmp > 0 ) { L_tmp32 = L_shl_sat( L_tmp, tmp3 ); } if ( L_tmp > 0 ) { tmp16 = extract_h( L_tmp32 ); } output[i] = tmp16; move16(); #else L_tmp = Mult_32_16( ratio2, tmp[i] ); /* Q(16-exp+exp_tmp-15 = 1-exp+exp_tmp) */ IF( L_tmp < 0 ) { Loading @@ -7359,7 +7165,6 @@ void synthesise_fb_high_band_fx( output[i] = extract_h( L_shl_sat( L_tmp, tmp3 ) ); /*Qout*/ move16(); } #endif } pop_wmops(); /*push_wmops( "SYNTHESISE_FB_HIGH_BAND PART B" );*/ return; Loading lib_com/tools_fx.c +0 −24 Original line number Diff line number Diff line Loading @@ -794,37 +794,13 @@ void Copy_Scale_sig_16_32_no_sat( } return; } #ifdef FIX_1439_SPEEDUP_Copy_Scale_sig_16_32_no_sat L_tmp = L_shl_o( 1, exp0 - 1, &Overflow ); IF( L_tmp >= 0x7FFF ) { FOR( i = 0; i < lg; i++ ) { // y[i] = L_mult0(x[i], L_tmp); y[i] = W_extract_l( W_mult_32_16( L_tmp, x[i] ) ); move32(); /* Overflow can occur here */ } return; } // ELSE { Word16 tmp = extract_l( L_tmp ); FOR( i = 0; i < lg; i++ ) { y[i] = L_mult( x[i], tmp ); move32(); } } #else L_tmp = L_shl_o( 1, exp0 - 1, &Overflow ); FOR( i = 0; i < lg; i++ ) { // y[i] = L_mult0(x[i], L_tmp); y[i] = W_extract_l( W_mult_32_16( L_tmp, x[i] ) ); move32(); /* Overflow can occur here */ } #endif } void Copy_Scale_sig_32_16( Loading lib_dec/ivas_stereo_icbwe_dec_fx.c +0 −10 Original line number Diff line number Diff line Loading @@ -904,9 +904,6 @@ void stereo_icBWE_dec_fx( winSlope_fx = div_s( 1, winLen_fx ); /* Q15 */ alpha_fx = winSlope_fx; /* Q15 */ move16(); #ifdef FIX_1439_SPEEDUP_stereo_icBWE_dec_fx Word16 winSlope_fx_ = sub( 32767 /* 1.0 in Q15*/, winSlope_fx ); #endif FOR( i = 0; i < winLen_fx; i++ ) { L_tmp = L_mult0( alpha_fx, icbweM2Ref_fx ); /* Q29 */ Loading @@ -914,17 +911,10 @@ void stereo_icBWE_dec_fx( tmp = shl( round_fx( L_tmp ), 1 ); /* Q14 */ synthRef_fx[i] = Mpy_32_16_1( synthRef_fx[i], tmp ); /* Qsyn - 1 */ move32(); #ifdef FIX_1439_SPEEDUP_stereo_icBWE_dec_fx if ( LE_16( alpha_fx, winSlope_fx_ ) ) { alpha_fx = add( alpha_fx, winSlope_fx ); /* Q15 */ } #else IF( LE_16( alpha_fx, sub( 32767 /* 1.0 in Q15*/, winSlope_fx ) ) ) { alpha_fx = add( alpha_fx, winSlope_fx ); /* Q15 */ } #endif } FOR( ; i < NS2SA_FX2( st->output_Fs, FRAME_SIZE_NS ); i++ ) Loading lib_dec/swb_tbe_dec_fx.c +0 −5 Original line number Diff line number Diff line Loading @@ -7037,17 +7037,12 @@ void ivas_swb_tbe_dec_fx( FOR( i = 0; i < L_FRAME16k; i++ ) { #ifndef FIX_1439_SPEEDUP_ivas_swb_tbe_dec_fx Word16 idx = 0; move16(); IF( i != 0 ) { idx = idiv1616( i_mult( NUM_SHB_SUBFR, i ), L_FRAME16k ); } #else Word16 idx; idx = extract_h( imult3216( idx32, i ) ); /*Q0*/ #endif L_tmp1 = Mult_32_16( L_tmp, GainShape_fx[idx] ); /* Q : 18 + tmp +15 -15*/ White_exc16k_fx[i] = round_fx( Mult_32_16( L_tmp1, White_exc16k_fx[i] ) ); /* 18 + tmp +*Q_white_exc -15 -16 */ move16(); Loading Loading
lib_com/options.h +1 −6 Original line number Diff line number Diff line Loading @@ -81,12 +81,7 @@ #define HARM_ENC_INIT //#define HARM_SCE_INIT // new speedups - BE ones already in another branch //#define FIX_1439_SPEEDUP_Copy_Scale_sig_16_32_no_sat /*FhG: reduces WMOPS - bit-exact*/ // | //#define FIX_1439_SPEEDUP_stereo_icBWE_dec_fx /*FhG: reduces WMOPS - bit-exact*/ // | 2.4 WMOPS //#define FIX_1439_SPEEDUP_ivas_swb_tbe_dec_fx /*FhG: reduces WMOPS - bit-exact*/ // | //#define FIX_1439_SPEEDUP_elliptic_bpf_48k_generic_upsampledsig /*FhG: reduces WMOPS - bit-exact*/ // | //#define FIX_1439_SPEEDUP_synthesise_fb_high_band_fx // | 0.4 WMOPS - BE? Need for a manual test as long as be tests dont work #define FIX_1439_SPEEDUP_elliptic_bpf_48k_generic_STAGE1 // nonbe // | #define FIX_1439_SPEEDUP_elliptic_bpf_48k_generic_STAGE2 // nonbe // | 3.1 WMOPS, pipe testing https://forge.3gpp.org/rep/sa4/audio/ivas-basop/-/pipelines/50562 Loading
lib_com/swb_tbe_com_fx.c +0 −195 Original line number Diff line number Diff line Loading @@ -6767,89 +6767,6 @@ void elliptic_bpf_48k_generic_fx( L_tmp[3] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( memory2_fx[1][3], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ move32(); #ifdef FIX_1439_SPEEDUP_elliptic_bpf_48k_generic_STAGE1 #ifdef FIX_1439_SPEEDUP_elliptic_bpf_48k_generic_upsampledsig test(); IF( full_band_bpf_fx == full_band_bpf_3_fx || full_band_bpf_fx == full_band_bpf_1_fx ) { Word64 W_tmpX; Word64 W_tmpY; i = 4; W_tmpX = W_mac_16_16( 0, input_fx[i - 4], full_band_bpf_fx[0][4] ); // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 3], full_band_bpf_fx[0][3] ); W_tmpY = W_msu_32_16( 0, L_tmp[i - 1], full_band_bpf_fx[3][1] ); // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 2], full_band_bpf_fx[0][2] ); W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 2], full_band_bpf_fx[3][2] ); W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 1], full_band_bpf_fx[0][1] ); W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 3], full_band_bpf_fx[3][3] ); // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i], full_band_bpf_fx[0][0] ); W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 4], full_band_bpf_fx[3][4] ); //L_tmp[i] = W_sat_l( W_add( W_shr( W_tmpX, 3 ), W_shl( W_tmpY, 2 - 16 ) ) ); /*Q_input_fx + 11*/ L_tmp[i] = W_sat_l( W_shr( W_add( W_tmpX, W_shl( W_tmpY, 2 - 16 + 3 ) ), 3 ) ); /*Q_input_fx + 11*/ move32(); i++; // W_tmpX = W_mac_16_16( 0, input_fx[i - 4], full_band_bpf_fx[0][4] ); // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 3], full_band_bpf_fx[0][3] ); W_tmpY = W_msu_32_16( 0, L_tmp[i - 1], full_band_bpf_fx[3][1] ); W_tmpX = W_mac_16_16( 0, input_fx[i - 2], full_band_bpf_fx[0][2] ); W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 2], full_band_bpf_fx[3][2] ); // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 1], full_band_bpf_fx[0][1] ); W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 3], full_band_bpf_fx[3][3] ); // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i], full_band_bpf_fx[0][0] ); W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 4], full_band_bpf_fx[3][4] ); //L_tmp[i] = W_sat_l( W_add( W_shr( W_tmpX, 3 ), W_shl( W_tmpY, 2 - 16 ) ) ); /*Q_input_fx + 11*/ L_tmp[i] = W_sat_l( W_shr( W_add( W_tmpX, W_shl( W_tmpY, 2 - 16 + 3 ) ), 3 ) ); /*Q_input_fx + 11*/ move32(); i++; FOR( ; i < L_FRAME48k; ) { // W_tmpX = W_mac_16_16( 0, input_fx[i - 4], full_band_bpf_fx[0][4] ); //2 W_tmpX = W_mac_16_16( 0, input_fx[i - 3], full_band_bpf_fx[0][3] ); // 3 W_tmpY = W_msu_32_16( 0, L_tmp[i - 1], full_band_bpf_fx[3][1] ); // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 2], full_band_bpf_fx[0][2] ); //4 W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 2], full_band_bpf_fx[3][2] ); // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 1], full_band_bpf_fx[0][1] ); //5 W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 3], full_band_bpf_fx[3][3] ); W_tmpX = W_mac_16_16( W_tmpX, input_fx[i], full_band_bpf_fx[0][0] ); // 6 W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 4], full_band_bpf_fx[3][4] ); //L_tmp[i] = W_sat_l( W_add( W_shr( W_tmpX, 3 ), W_shl( W_tmpY, 2 - 16 ) ) ); /*Q_input_fx + 11*/ L_tmp[i] = W_sat_l( W_shr( W_add( W_tmpX, W_shl( W_tmpY, 2 - 16 + 3 ) ), 3 ) ); /*Q_input_fx + 11*/ move32(); i++; W_tmpX = W_mac_16_16( 0, input_fx[i - 4], full_band_bpf_fx[0][4] ); // 3 // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 3], full_band_bpf_fx[0][3] ); //4 W_tmpY = W_msu_32_16( 0, L_tmp[i - 1], full_band_bpf_fx[3][1] ); // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 2], full_band_bpf_fx[0][2] ); //5 W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 2], full_band_bpf_fx[3][2] ); W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 1], full_band_bpf_fx[0][1] ); // 6 W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 3], full_band_bpf_fx[3][3] ); // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i], full_band_bpf_fx[0][0] ); //7 W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 4], full_band_bpf_fx[3][4] ); //L_tmp[i] = W_sat_l( W_add( W_shr( W_tmpX, 3 ), W_shl( W_tmpY, 2 - 16 ) ) ); /*Q_input_fx + 11*/ L_tmp[i] = W_sat_l( W_shr( W_add( W_tmpX, W_shl( W_tmpY, 2 - 16 + 3 ) ), 3 ) ); /*Q_input_fx + 11*/ move32(); i++; // W_tmpX = W_mac_16_16( 0, input_fx[i - 4], full_band_bpf_fx[0][4] ); //4 // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 3], full_band_bpf_fx[0][3] ); //5 W_tmpY = W_msu_32_16( 0, L_tmp[i - 1], full_band_bpf_fx[3][1] ); W_tmpX = W_mac_16_16( 0, input_fx[i - 2], full_band_bpf_fx[0][2] ); // 6 W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 2], full_band_bpf_fx[3][2] ); // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 1], full_band_bpf_fx[0][1] ); //7 W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 3], full_band_bpf_fx[3][3] ); // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i], full_band_bpf_fx[0][0] ); //8 W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 4], full_band_bpf_fx[3][4] ); //L_tmp[i] = W_sat_l( W_add( W_shr( W_tmpX, 3 ), W_shl( W_tmpY, 2 - 16 ) ) ); /*Q_input_fx + 11*/ L_tmp[i] = W_sat_l( W_shr( W_add( W_tmpX, W_shl( W_tmpY, 2 - 16 + 3 ) ), 3 ) ); /*Q_input_fx + 11*/ move32(); i++; } } ELSE #endif /*FIX_1439_SPEEDUP_elliptic_bpf_48k_generic_upsampledsig*/ { FOR( i = 4; i < L_FRAME48k; i++ ) { Loading @@ -6871,76 +6788,6 @@ void elliptic_bpf_48k_generic_fx( } #else /*FIX_1439_SPEEDUP_elliptic_bpf_48k_generic_STAGE1*/ #ifdef FIX_1439_SPEEDUP_elliptic_bpf_48k_generic_upsampledsig test(); IF( full_band_bpf_fx == full_band_bpf_3_fx || full_band_bpf_fx == full_band_bpf_1_fx ) { i = 4; L_tmpX = L_shr( L_mult( input_fx[i - 4], full_band_bpf_fx[0][4] ), 3 ); // 0 /*Q_input_fx + 13 + 1 - 3*/ // L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 3], full_band_bpf_fx[0][3] ), 3 ), L_tmpX ); // 1 /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 1], full_band_bpf_fx[3][1] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ // L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 2], full_band_bpf_fx[0][2] ), 3 ), L_tmpX ); // 2 /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 2], full_band_bpf_fx[3][2] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 1], full_band_bpf_fx[0][1] ), 3 ), L_tmpX ); // 3 /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 3], full_band_bpf_fx[3][3] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ // L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i], full_band_bpf_fx[0][0] ), 3 ), L_tmpX ); // 4 /*Q_input_fx + 13 + 1 - 3*/ L_tmp[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ move32(); i++; // L_tmpX = L_shr( L_mult( input_fx[i - 4], full_band_bpf_fx[0][4] ), 3 ); // 1 /*Q_input_fx + 13 + 1 - 3*/ // L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 3], full_band_bpf_fx[0][3] ), 3 ), L_tmpX ); // 2 /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_sub_sat( 0, L_shl_sat( Mult_32_16( L_tmp[i - 1], full_band_bpf_fx[3][1] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 2], full_band_bpf_fx[0][2] ), 3 ), L_tmpX ); // 3 /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 2], full_band_bpf_fx[3][2] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ // L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 1], full_band_bpf_fx[0][1] ), 3 ), L_tmpX ); // 4 /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 3], full_band_bpf_fx[3][3] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ // L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i], full_band_bpf_fx[0][0] ), 3 ), L_tmpX ); // 5 /*Q_input_fx + 13 + 1 - 3*/ L_tmp[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ move32(); i++; FOR( ; i < L_FRAME48k; ) { // L_tmpX = L_shr( L_mult( input_fx[i - 4], full_band_bpf_fx[0][4] ), 3 ); // 2 /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 3], full_band_bpf_fx[0][3] ), 3 ), 0 ); // 3 /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 1], full_band_bpf_fx[3][1] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ // L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 2], full_band_bpf_fx[0][2] ), 3 ), L_tmpX ); // 4 /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 2], full_band_bpf_fx[3][2] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ // L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 1], full_band_bpf_fx[0][1] ), 3 ), L_tmpX ); // 5 /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 3], full_band_bpf_fx[3][3] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i], full_band_bpf_fx[0][0] ), 3 ), L_tmpX ); // 6 /*Q_input_fx + 13 + 1 - 3*/ L_tmp[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ move32(); i++; L_tmpX = L_shr( L_mult( input_fx[i - 4], full_band_bpf_fx[0][4] ), 3 ); // 3 /*Q_input_fx + 13 + 1 - 3*/ // L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 3], full_band_bpf_fx[0][3] ), 3 ), L_tmpX ); // 4 /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 1], full_band_bpf_fx[3][1] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ // L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 2], full_band_bpf_fx[0][2] ), 3 ), L_tmpX ); // 5 /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 2], full_band_bpf_fx[3][2] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 1], full_band_bpf_fx[0][1] ), 3 ), L_tmpX ); // 6 /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 3], full_band_bpf_fx[3][3] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ // L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i], full_band_bpf_fx[0][0] ), 3 ), L_tmpX ); // 7 /*Q_input_fx + 13 + 1 - 3*/ L_tmp[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ move32(); i++; // L_tmpX = L_shr( L_mult( input_fx[i - 4], full_band_bpf_fx[0][4] ), 3 ); // 4 /*Q_input_fx + 13 + 1 - 3*/ // L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 3], full_band_bpf_fx[0][3] ), 3 ), L_tmpX ); // 5 /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_sub_sat( 0, L_shl_sat( Mult_32_16( L_tmp[i - 1], full_band_bpf_fx[3][1] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 2], full_band_bpf_fx[0][2] ), 3 ), L_tmpX ); // 6 /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 2], full_band_bpf_fx[3][2] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ // L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 1], full_band_bpf_fx[0][1] ), 3 ), L_tmpX ); // 7 /*Q_input_fx + 13 + 1 - 3*/ L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 3], full_band_bpf_fx[3][3] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ // L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i], full_band_bpf_fx[0][0] ), 3 ), L_tmpX ); // 8 /*Q_input_fx + 13 + 1 - 3*/ L_tmp[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13 -15 +2*/ move32(); i++; } } ELSE #endif /*FIX_1439_SPEEDUP_elliptic_bpf_48k_generic_upsampledsig*/ { FOR( i = 4; i < L_FRAME48k; i++ ) { Loading Loading @@ -7307,47 +7154,6 @@ void synthesise_fb_high_band_fx( tmp3 = add( sub( Qout, add( sub( 1, exp ), exp_tmp ) ), 16 ); /*Qout - (1 -exp +exp_tmp) + 16 */ FOR( i = 0; i < L_FRAME48k; i++ ) { #ifdef FIX_1439_SPEEDUP_synthesise_fb_high_band_fx L_tmp = Mult_32_16( ratio2, tmp[i] ); /* Q(16-exp+exp_tmp-15 = 1-exp+exp_tmp) */ Word32 L_tmp32; Word16 tmp16; if ( L_tmp < 0 ) { L_tmp32 = L_negate( L_tmp ); } if ( L_tmp < 0 ) { L_tmp32 = L_shl_sat( L_tmp32, tmp3 ); } if ( L_tmp < 0 ) { tmp16 = extract_h( L_tmp32 ); } if ( L_tmp < 0 ) { tmp16 = negate( tmp16 ); } if ( L_tmp == 0 ) { tmp16 = 0; move16(); } if ( L_tmp > 0 ) { L_tmp32 = L_shl_sat( L_tmp, tmp3 ); } if ( L_tmp > 0 ) { tmp16 = extract_h( L_tmp32 ); } output[i] = tmp16; move16(); #else L_tmp = Mult_32_16( ratio2, tmp[i] ); /* Q(16-exp+exp_tmp-15 = 1-exp+exp_tmp) */ IF( L_tmp < 0 ) { Loading @@ -7359,7 +7165,6 @@ void synthesise_fb_high_band_fx( output[i] = extract_h( L_shl_sat( L_tmp, tmp3 ) ); /*Qout*/ move16(); } #endif } pop_wmops(); /*push_wmops( "SYNTHESISE_FB_HIGH_BAND PART B" );*/ return; Loading
lib_com/tools_fx.c +0 −24 Original line number Diff line number Diff line Loading @@ -794,37 +794,13 @@ void Copy_Scale_sig_16_32_no_sat( } return; } #ifdef FIX_1439_SPEEDUP_Copy_Scale_sig_16_32_no_sat L_tmp = L_shl_o( 1, exp0 - 1, &Overflow ); IF( L_tmp >= 0x7FFF ) { FOR( i = 0; i < lg; i++ ) { // y[i] = L_mult0(x[i], L_tmp); y[i] = W_extract_l( W_mult_32_16( L_tmp, x[i] ) ); move32(); /* Overflow can occur here */ } return; } // ELSE { Word16 tmp = extract_l( L_tmp ); FOR( i = 0; i < lg; i++ ) { y[i] = L_mult( x[i], tmp ); move32(); } } #else L_tmp = L_shl_o( 1, exp0 - 1, &Overflow ); FOR( i = 0; i < lg; i++ ) { // y[i] = L_mult0(x[i], L_tmp); y[i] = W_extract_l( W_mult_32_16( L_tmp, x[i] ) ); move32(); /* Overflow can occur here */ } #endif } void Copy_Scale_sig_32_16( Loading
lib_dec/ivas_stereo_icbwe_dec_fx.c +0 −10 Original line number Diff line number Diff line Loading @@ -904,9 +904,6 @@ void stereo_icBWE_dec_fx( winSlope_fx = div_s( 1, winLen_fx ); /* Q15 */ alpha_fx = winSlope_fx; /* Q15 */ move16(); #ifdef FIX_1439_SPEEDUP_stereo_icBWE_dec_fx Word16 winSlope_fx_ = sub( 32767 /* 1.0 in Q15*/, winSlope_fx ); #endif FOR( i = 0; i < winLen_fx; i++ ) { L_tmp = L_mult0( alpha_fx, icbweM2Ref_fx ); /* Q29 */ Loading @@ -914,17 +911,10 @@ void stereo_icBWE_dec_fx( tmp = shl( round_fx( L_tmp ), 1 ); /* Q14 */ synthRef_fx[i] = Mpy_32_16_1( synthRef_fx[i], tmp ); /* Qsyn - 1 */ move32(); #ifdef FIX_1439_SPEEDUP_stereo_icBWE_dec_fx if ( LE_16( alpha_fx, winSlope_fx_ ) ) { alpha_fx = add( alpha_fx, winSlope_fx ); /* Q15 */ } #else IF( LE_16( alpha_fx, sub( 32767 /* 1.0 in Q15*/, winSlope_fx ) ) ) { alpha_fx = add( alpha_fx, winSlope_fx ); /* Q15 */ } #endif } FOR( ; i < NS2SA_FX2( st->output_Fs, FRAME_SIZE_NS ); i++ ) Loading
lib_dec/swb_tbe_dec_fx.c +0 −5 Original line number Diff line number Diff line Loading @@ -7037,17 +7037,12 @@ void ivas_swb_tbe_dec_fx( FOR( i = 0; i < L_FRAME16k; i++ ) { #ifndef FIX_1439_SPEEDUP_ivas_swb_tbe_dec_fx Word16 idx = 0; move16(); IF( i != 0 ) { idx = idiv1616( i_mult( NUM_SHB_SUBFR, i ), L_FRAME16k ); } #else Word16 idx; idx = extract_h( imult3216( idx32, i ) ); /*Q0*/ #endif L_tmp1 = Mult_32_16( L_tmp, GainShape_fx[idx] ); /* Q : 18 + tmp +15 -15*/ White_exc16k_fx[i] = round_fx( Mult_32_16( L_tmp1, White_exc16k_fx[i] ) ); /* 18 + tmp +*Q_white_exc -15 -16 */ move16(); Loading