diff --git a/lib_com/options.h b/lib_com/options.h index 18dc45e90c29ff8b211d94777102f047f11bbc76..74080fa32d692e7bb3d595d311de4777bfe987f8 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -99,6 +99,7 @@ #define FIX_2196_UNREACHABLE_CODE_IN_OSBA_STEREO_OUTPUT /* FhG: remove unreachable code in ivas_osba_stereo_add_channels */ #define FIX_OUTPUT_FRAME /* VA: harmonize "output_frame" parameter usage to correspond to FLP */ #define HARMONIZE_ISSUE_2435_WRITETNSDATA /* FhG basop 2435: Harmonize WriteTnsData*_fx(), EncodeTnsData*_fx() */ +#define HARM_2336_DOTP /* VA: basop 2336; Harmonisation of some dot_product function + some BE optimisation */ /* #################### End BE switches ################################## */ @@ -111,7 +112,6 @@ /* ##################### End NON-BE switches ########################### */ /* ################## End MAINTENANCE switches ######################### */ - /* clang-format on */ #endif diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index 8d22557a8417aea3416683345e3d17d923711b68..017b51dc71b354141a484b49dd6ee20c67c208b5 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -5687,7 +5687,7 @@ void v_add_fx_me( const Word16 N, /* i : Vector length */ const Word16 hdrm /* i : headroom for when subtraction result > 1 or < -1 */ ); - +#ifndef HARM_2336_DOTP void v_add_w64( const Word64 x1[], /* i : Input vector 1 */ const Word64 x2[], /* i : Input vector 2 */ @@ -5695,7 +5695,7 @@ void v_add_w64( const Word16 N, /* i : Vector length */ const Word16 hdrm /* i : headroom for when subtraction result > 1 or < -1 */ ); - +#endif void v_sub_fx( const Word32 x1[], /* i : Input vector 1 */ const Word32 x2[], /* i : Input vector 2 */ @@ -5725,7 +5725,7 @@ Word32 dotp_fx32_o( const Word16 n, /* i : vector length */ const Word16 log_len, /* i : max factor added to result q after dot product (equal to log2(n)) */ Word16 *res_q ); - +#ifndef HARM_2336_DOTP Word32 dotp_fx32_fac( const Word32 x[], /* i : vector x[] */ const Word32 y[], /* i : vector y[] */ @@ -5733,7 +5733,7 @@ Word32 dotp_fx32_fac( const Word16 log_len, /* i : max factor added to result q after dot product (equal to log2(n)) */ Word16 *res_q /*stores resultant Q*/ ); - +#endif Word32 dotp_fx_ivas_fx( const Word32 x[], /* i : vector x[] */ Word16 x_e, @@ -5747,7 +5747,7 @@ Word32 dotp_fx_guarded( const Word32 y[], /* i : vector y[] */ const Word16 n /* i : vector length */ ); - +#ifndef HARM_2336_DOTP Word32 dotp_me_fx( const Word32 x[], /* i : vector x[] */ const Word32 y[], /* i : vector y[] */ @@ -5755,7 +5755,7 @@ Word32 dotp_me_fx( Word16 exp_x, Word16 exp_y, Word16 *exp_suma ); - +#endif void lsf_end_dec_fx( Decoder_State *st, /* i/o: decoder state structure */ Word16 mode2_flag, /* Q0 */ diff --git a/lib_com/tools_fx.c b/lib_com/tools_fx.c index 4e48897453d0bea78a7e599fa1973d53b47a1899..1f491ced83fbf403917adf35f4bad4309812994f 100644 --- a/lib_com/tools_fx.c +++ b/lib_com/tools_fx.c @@ -630,7 +630,7 @@ Word32 dotp_fx32_o( return W_extract_l( suma ); } - +#ifndef HARM_2336_DOTP Word32 dotp_fx32_fac( const Word32 x[], /* i : vector x[] */ const Word32 y[], /* i : vector y[] */ @@ -686,6 +686,7 @@ void v_add_w64( return; } +#endif /*-------------------------------------------------------------------* @@ -703,10 +704,18 @@ void v_sub_fx( ) { Word16 i; - +#ifdef HARM_2336_DOTP + Word32 wtmp; + assert( hdrm < 31 && hdrm > 0 ); + wtmp = L_shl_sat( 1, sub( 31, hdrm ) ); +#endif FOR( i = 0; i < N; i++ ) { +#ifndef HARM_2336_DOTP y[i] = L_sub( L_shr( x1[i], hdrm ), L_shr( x2[i], hdrm ) ); +#else + y[i] = Msub_32_32( Mpy_32_32( x1[i], wtmp ), x2[i], wtmp ); +#endif move32(); } @@ -5143,7 +5152,7 @@ UWord32 mvl2s_r( return noClipping; } - +#ifndef HARM_2336_DOTP Word32 dotp_me_fx( const Word32 x[], /* i : vector x[] */ const Word32 y[], /* i : vector y[] */ @@ -5166,7 +5175,7 @@ Word32 dotp_me_fx( return suma; } - +#endif Word32 dotp_fx_guarded( const Word32 x[], /* i : vector x[] */ const Word32 y[], /* i : vector y[] */ @@ -5195,6 +5204,7 @@ Word32 dotp_fx_ivas_fx( const Word16 n, /* i : vector length */ Word16 *out_e ) { +#ifndef HARM_2336_DOTP Word16 i, exp; Word32 suma = 0; @@ -5208,7 +5218,20 @@ Word32 dotp_fx_ivas_fx( *out_e = exp; move16(); - +#else + Word16 i; + Word32 suma; + Word32 mul; + Word16 mul_exp = add( x_e, y_e ); + suma = Mpy_32_32( x[0], y[0] ); + *out_e = mul_exp; + move16(); + FOR( i = 1; i < n; i++ ) + { + mul = Mpy_32_32( x[i], y[i] ); + suma = BASOP_Util_Add_Mant32Exp( suma, *out_e, mul, mul_exp, out_e ); // exp_x+exp_A + } +#endif return suma; } diff --git a/lib_enc/ivas_mct_enc_mct_fx.c b/lib_enc/ivas_mct_enc_mct_fx.c index c725950b164569cbdddd20e0153cb120f6d760d5..522e493e2326f126c0f3f552ec817cf1bfbecc88 100644 --- a/lib_enc/ivas_mct_enc_mct_fx.c +++ b/lib_enc/ivas_mct_enc_mct_fx.c @@ -235,8 +235,17 @@ static void getCorrelationMatrix_fx( FOR( n = 0; n < nSubframes; n++ ) { +#ifndef HARM_2336_DOTP res_q = sub( 31, sts[0]->hTcxEnc->spectrum_e[0] ); L_tmp = dotp_fx32_fac( sts[ch1]->hTcxEnc->spectrum_fx[n], sts[ch2]->hTcxEnc->spectrum_fx[n], L_subframe, 1, &res_q ); +#else + Word16 tmp_Q; + res_q = sub( 31, sts[0]->hTcxEnc->spectrum_e[0] ); + tmp_Q = res_q; + move16(); + L_tmp = dotp_fx32_o( sts[ch1]->hTcxEnc->spectrum_fx[n], sts[ch2]->hTcxEnc->spectrum_fx[n], L_subframe, 1, &res_q ); + res_q = add( res_q, tmp_Q ); +#endif xCorrMatrix[ch1][ch2] = BASOP_Util_Add_Mant32Exp( xCorrMatrix[ch1][ch2], xCorrMatrix_exp[ch1][ch2], L_tmp, sub( 31, res_q ), &xCorrMatrix_exp[ch1][ch2] ); move32(); } diff --git a/lib_enc/speech_music_classif_fx.c b/lib_enc/speech_music_classif_fx.c index 2f184957a0c911d1d692044d49cb3d63d2af4a2d..bde429c81a262728026daa4febcb884147c59470 100644 --- a/lib_enc/speech_music_classif_fx.c +++ b/lib_enc/speech_music_classif_fx.c @@ -1740,7 +1740,11 @@ Word16 ivas_smc_gmm_fx( move16(); len = mel_fb_len[i]; move16(); +#ifndef HARM_2336_DOTP temp32 = dotp_me_fx( &PS_fx[j], pt_mel_fb_fx, len, 31 - Qfact_PS, Q1, &dotp_exp ); +#else + temp32 = dotp_fx_ivas_fx( &PS_fx[j], 31 - Qfact_PS, pt_mel_fb_fx, Q1, len, &dotp_exp ); +#endif IF( LT_16( dotp_exp, -17 ) ) /*-18 is exponent of 10737:to avoid overflow when left shifting 10737*/ { temp32 = L_shr( temp32, sub( -17, dotp_exp ) );