diff --git a/lib_com/cnst.h b/lib_com/cnst.h index 2f886ed1e50a9cd46ef6824e58a2597ee5499e37..ef9e41faf93f5ddb205f66e52bba7c0d2195214a 100644 --- a/lib_com/cnst.h +++ b/lib_com/cnst.h @@ -698,6 +698,7 @@ enum #define AUTO_REGRESSIVE 2 #define INT_FS_12k8 12800 /* internal sampling frequency */ +#define ONE_BY_INT_FS_12k8_Q42 343597384 /* internal sampling frequency */ #define M 16 /* order of the LP filter @ 12.8kHz */ #define L_FRAME 256 /* frame size at 12.8kHz */ #define NB_SUBFR 4 /* number of subframes per frame */ diff --git a/lib_com/ivas_prot.h b/lib_com/ivas_prot.h index f6a1a1f4cc564acddeaf8eeca0731fe23c8b9aa4..1ccd0b33294f5d00960ceb305e66d3d765265603 100644 --- a/lib_com/ivas_prot.h +++ b/lib_com/ivas_prot.h @@ -3178,9 +3178,9 @@ int16_t read_GR0( void ivas_mdct_core_whitening_enc( CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ - float new_samples[CPE_CHANNELS][L_INP], /* i : new samples */ - float old_wsp[CPE_CHANNELS][L_WSP], /* i : 12.8kHz weighted speech (for LTP */ - float pitch_buf[CPE_CHANNELS][NB_SUBFR16k], /* o : floating pitch for each subframe */ + Word16 new_samples_fx[CPE_CHANNELS][L_INP], /* i : new samples */ + Word16 old_wsp_fx[CPE_CHANNELS][L_WSP], /* i : 12.8kHz weighted speech (for LTP */ + Word16 pitch_buf[CPE_CHANNELS][NB_SUBFR16k], /* o : floating pitch for each subframe */ float *mdst_spectrum_long[CPE_CHANNELS], /* o : buffer for MDST spectrum */ int16_t tnsBits[CPE_CHANNELS][NB_DIV], /* o : buffer TNS bits */ float *orig_spectrum_long[CPE_CHANNELS], /* o : origingal spectrum w/o whitening */ diff --git a/lib_com/ivas_prot_fx.h b/lib_com/ivas_prot_fx.h index e383aeea06a2558c21e7ed78aa1be0251c47f9ff..0189a086250adf246dc0c18e33d2213923e91094 100644 --- a/lib_com/ivas_prot_fx.h +++ b/lib_com/ivas_prot_fx.h @@ -2875,7 +2875,7 @@ void stereo_dft_cng_side_gain_fx( Word16 quantize_sns_fx( Word32 sns_in_fx[CPE_CHANNELS][NB_DIV][M], /* sns_e */ Word32 snsQ_out_fx[CPE_CHANNELS][NB_DIV][M], /* sns_e */ - Word16 *sns_e, + Word16 sns_e, Encoder_State **sts, Word16 *indices, /* Q0 */ Word16 *zero_side_flag, /* Q0 */ diff --git a/lib_com/prot.h b/lib_com/prot.h index cc4f3abe149a9a7ef737b3be42eac0d90b653b55..895ab0336f0a5e2fc71937bc93e2e45b23432431 100644 --- a/lib_com/prot.h +++ b/lib_com/prot.h @@ -2400,7 +2400,11 @@ ivas_error acelp_core_enc( const int16_t vad_hover_flag, /* i : VAD hangover flag */ const int16_t attack_flag, /* i : attack flag (GSC or TC) */ float bwe_exc_extended[], /* i/o: bandwidth extended excitation */ +#ifndef IVAS_FLOAT_FIXED float *voice_factors, /* o : voicing factors */ +#else + Word16 *voice_factors_fx, /* o : voicing factors Q15 */ +#endif float old_syn_12k8_16k[], /* o : ACELP core synthesis at 12.8kHz or 16kHz to be used by SWB BWE */ float pitch_buf[NB_SUBFR16k], /* o : floating pitch for each subframe */ int16_t *unbits, /* o : number of unused bits */ diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index bfc3ea6c134db86695e18a18a1d4258d3abd81c1..7c75943f80dc802f63a0ec3208a5d5684113336e 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -2368,6 +2368,19 @@ void Syn_filt_s( const Word16 update /* i : 0=no update, 1=update of memory. Q0 */ ); +#ifndef IVAS_FLOAT_FIXED +void syn_filt_fx( + const Word16 shift, /* i : scaling to apply Q0 */ + const Word16 a[], /* i : LP filter coefficients Q12 */ + const Word16 m, /* i : order of LP filter Q0 */ + const Word16 x[], /* i : input signal Qx */ + Word16 y[], /* o : output signal Qx-s */ + const Word16 l, /* i : size of filtering Q0 */ + Word16 mem[], /* i/o: initial filter states Qx-s */ + const Word16 update_m /* i : update memory flag Q0 : 0 --> no memory update */ +); /* 1 --> update of memory */ +#endif + void E_UTIL_synthesis( const Word16 shift, const Word16 a[], const Word16 x[], Word16 y[], const Word16 lg, Word16 mem[], const Word16 update, const Word16 m ); void E_UTIL_synthesis_fx( const Word16 shift, const Word32 a[], const Word32 x[], Word32 y[], const Word16 lg, Word32 mem[], const Word16 update, const Word16 m ); @@ -2847,6 +2860,17 @@ void PostShortTerm_fx( const Word16 formant_fac_fx /* i : Strength of post-filter*/ ); +void PostShortTerm_ivas_fx( + Word16 *sig_in, /* i : i signal (pointer to current subframe */ + Word16 *lpccoeff, /* i : LPC coefficients for current subframe */ + Word16 *sig_out, /* o : postfiltered output */ + Word16 *mem_stp, /* i/o: postfilter memory*/ + Word16 *ptr_mem_stp, /* i/o: pointer to postfilter memory*/ + Word16 *ptr_gain_prec, /* i/o: for gain adjustment*/ + Word16 *mem_zero, /* i/o: null memory to compute h_st*/ + const Word16 formant_fac_fx /* i : Strength of post-filter*/ +); + void flip_spectrum_and_decimby4_fx( const Word16 i[], /* i : i spectrum */ Word16 output[], /* o : output spectrum */ diff --git a/lib_com/swb_tbe_com_fx.c b/lib_com/swb_tbe_com_fx.c index 95421581db4f86f14c31f70a962546bffdda39f1..75bcaf1d696f59004402bb8344973130e9682eb5 100644 --- a/lib_com/swb_tbe_com_fx.c +++ b/lib_com/swb_tbe_com_fx.c @@ -854,6 +854,46 @@ static void Calc_st_filt_tbe( } } +static void Calc_st_filt_tbe_ivas_fx( + Word16 *apond2, /* i : coefficients of numerator */ + Word16 *apond1, /* i : coefficients of denominator */ + Word16 *parcor0, /* o : 1st parcor calcul. on composed filter */ + Word16 *sig_ltp_ptr, /* i/o: i of 1/A(gamma1) : scaled by 1/g0 */ + Word16 *mem_zero /* i : All zero memory */ +) +{ + Word32 L_g0; + + Word16 h[LONG_H_ST]; + + Word16 g0, temp; + Word16 i; + temp = sub( 2, norm_s( apond2[0] ) ); + /* compute i.r. of composed filter apond2 / apond1 */ + Syn_filt_s( temp, apond1, LPC_SHB_ORDER, apond2, h, LONG_H_ST, mem_zero, 0 ); + /* compute 1st parcor */ + Calc_rc0_h( h, parcor0 ); + + /* compute g0 */ + L_g0 = L_mult0( 1, abs_s( h[0] ) ); + FOR( i = 1; i < LONG_H_ST; i++ ) + { + L_g0 = L_mac0( L_g0, 1, abs_s( h[i] ) ); + } + g0 = extract_h( L_shl( L_g0, 14 ) ); + + /* Scale signal i of 1/A(gamma1) */ + IF( GT_16( g0, 1024 ) ) + { + temp = div_s( 1024, g0 ); /* temp = 2**15 / gain0 */ + FOR( i = 0; i < L_SUBFR16k; i++ ) + { + sig_ltp_ptr[i] = mult_r( sig_ltp_ptr[i], temp ); + move16(); + } + } +} + static void filt_mu_fx( const Word16 *sig_in, /* i : signal (beginning at sample -1) */ Word16 *sig_out, /* o : output signal */ @@ -1087,6 +1127,66 @@ void PostShortTerm_fx( return; } +void PostShortTerm_ivas_fx( + Word16 *sig_in, /* i : input signal (pointer to current subframe */ + Word16 *lpccoeff, /* i : LPC coefficients for current subframe */ + Word16 *sig_out, /* o : postfiltered output */ + Word16 *mem_stp, /* i/o: postfilter memory*/ + Word16 *ptr_mem_stp, /* i/o: pointer to postfilter memory*/ + Word16 *ptr_gain_prec, /* i/o: for gain adjustment*/ + Word16 *mem_zero, /* i/o: null memory to compute h_st*/ + const Word16 formant_fac_fx /* i : Strength of post-filter*/ +) +{ + Word16 apond1_fx[LPC_SHB_ORDER + 1]; /* denominator coeff.*/ + Word16 apond2_fx[LONG_H_ST]; /* numerator coeff. */ + Word16 sig_ltp_fx[L_SUBFR16k + 1]; /* residual signal */ + /*Word16 lpccoeff_fx[LPC_SHB_ORDER+1];//Q12 */ + Word16 g1_fx, g2_fx, parcor0_fx; /*Q15 */ + Word16 tmp; + + parcor0_fx = 0; + move16(); + set16_fx( apond1_fx, 0, LPC_SHB_ORDER + 1 ); + set16_fx( apond2_fx, 0, LONG_H_ST ); + set16_fx( sig_ltp_fx, 0, L_SUBFR16k + 1 ); + + /* Obtain post-filter weights */ + tmp = extract_h( L_mult( GAMMA_SHARP_FX, formant_fac_fx ) ); /*Q15 */ + g1_fx = add( GAMMA0_FX, tmp ); /*Q15 */ + g2_fx = sub( GAMMA0_FX, tmp ); /*Q15 */ + + /* Compute weighted LPC coefficients */ + weight_a_fx( lpccoeff, apond1_fx, g1_fx, LPC_SHB_ORDER ); + weight_a_fx( lpccoeff, apond2_fx, g2_fx, LPC_SHB_ORDER ); + /* o: apond1_fx, apond2_fx in Q12 */ + + /* Compute A(gamma2) residual */ + Residu3_10_fx( apond2_fx, sig_in, sig_ltp_fx + 1, L_SUBFR16k, 0 ); + /* o: sig_ltp_fx in Q_bwe_exc */ + + /* Save last output of 1/A(gamma1) */ + sig_ltp_fx[0] = *ptr_mem_stp; + move16(); + + /* Control short term pst filter gain and compute parcor0 */ + Calc_st_filt_tbe_ivas_fx( apond2_fx, apond1_fx, &parcor0_fx, sig_ltp_fx + 1, mem_zero ); + /* o: parcor0 in Q15 */ + /* i/o: sig_ltp_fx in Q_bwe_exc */ + + /* 1/A(gamma1) filtering, mem_stp is updated */ + Syn_filt_s( 0, apond1_fx, LPC_SHB_ORDER, sig_ltp_fx + 1, sig_ltp_fx + 1, L_SUBFR16k, mem_stp, 1 ); + + /* (1 + mu z-1) tilt filtering */ + filt_mu_fx( sig_ltp_fx, sig_out, parcor0_fx, L_SUBFR16k ); + /* o: sig_out in Q_bwe_exc */ + + /* gain control */ + scale_st_swb( sig_in, sig_out, ptr_gain_prec, L_SUBFR16k ); + + return; +} + void flip_spectrum_and_decimby4_fx( const Word16 input[], /* i : input spectrum Q_inp */ Word16 output[], /* o : output spectrum Q_inp */ diff --git a/lib_com/syn_filt_fx.c b/lib_com/syn_filt_fx.c index 6b000db50e389e53aad286b4acd8512d83043f52..6b0ba4d03e7bd53c86c0a2fa244e54c744ad7fbc 100644 --- a/lib_com/syn_filt_fx.c +++ b/lib_com/syn_filt_fx.c @@ -186,6 +186,96 @@ void Syn_filt_s( } +#ifndef IVAS_FLOAT_FIXED +/*------------------------------------------------------------------* + * syn_filt_fx: + * + * perform the synthesis filtering 1/A(z). + *------------------------------------------------------------------*/ +void syn_filt_fx( + const Word16 shift, /* i : scaling to apply Q0 */ + const Word16 a[], /* i : LP filter coefficients Q12 */ + const Word16 m, /* i : order of LP filter Q0 */ + const Word16 x[], /* i : input signal Qx */ + Word16 y[], /* o : output signal Qx-s */ + const Word16 l, /* i : size of filtering Q0 */ + Word16 mem[], /* i/o: initial filter states Qx-s */ + const Word16 update_m /* i : update memory flag Q0 : 0 --> no memory update */ + ) /* 1 --> update of memory */ +{ + Word16 i, j; + Word16 buf[L_FRAME48k + L_FRAME48k / 2 + TCXLTP_LTP_ORDER]; /* temporary synthesis buffer */ + Word16 s, *yy; + Word16 q; +#ifdef BASOP_NOGLOB + Flag Overflow = 0; + move16(); +#endif + Word16 a0; + + yy = &buf[0]; + q = add( norm_s( a[0] ), 1 ); + +#ifdef BASOP_NOGLOB + a0 = shr_o( a[0], shift, &Overflow ); /* input / 2^shift */ +#else + a0 = shr( a[0], shift ); /* input / 2^shift */ +#endif + + /*------------------------------------------------------------------* + * copy initial filter states into synthesis buffer and do synthesis + *------------------------------------------------------------------*/ + + FOR( i = 0; i < m; i++ ) + { + *yy++ = mem[i]; + move16(); + } + + /*-----------------------------------------------------------------------* + * Do the filtering + *-----------------------------------------------------------------------*/ + + FOR( i = 0; i < l; i++ ) + { +#ifdef BASOP_NOGLOB + s = mult_r( shl_o( a0, q, &Overflow ), x[i] ); +#else + s = mult_r( shl( a0, q ), x[i] ); +#endif + FOR( j = 1; j <= m; j++ ) + { +#ifdef BASOP_NOGLOB + s = msu_ro( L_deposit_h( s ), shl_o( a[j], q, &Overflow ), yy[i - j], &Overflow ); +#else + s = msu_r( L_deposit_h( s ), shl( a[j], q ), yy[i - j] ); +#endif + } + + yy[i] = s; + move16(); + y[i] = s; + move16(); + } + + /*------------------------------------------------------------------* + * Update memory if required + *------------------------------------------------------------------*/ + + IF( update_m ) + { + FOR( i = 0; i < m; i++ ) + { + mem[i] = yy[l - m + i]; + move16(); + } + } + + return; +} +#endif + + /* * E_UTIL_synthesis * diff --git a/lib_dec/swb_tbe_dec.c b/lib_dec/swb_tbe_dec.c index 3c788e784ae4870a75f83246e112d787d06f5a57..e802584621bf24d157ef6e787f4394df251829b4 100644 --- a/lib_dec/swb_tbe_dec.c +++ b/lib_dec/swb_tbe_dec.c @@ -1824,8 +1824,8 @@ void ivas_swb_tbe_dec_fx( FOR( i = 0; i < L_FRAME16k; i += L_SUBFR16k ) { /* TD BWE post-processing */ - PostShortTerm_fx( &shaped_shb_excitation_fx[L_SHB_LAHEAD + i], lpc_shb_fx, &shaped_shb_excitationTemp_fx[i], hBWE_TD->mem_stp_swb_fx, - hBWE_TD->ptr_mem_stp_swb_fx, &( hBWE_TD->gain_prec_swb_fx ), hBWE_TD->mem_zero_swb_fx, formant_fac_fx ); + PostShortTerm_ivas_fx( &shaped_shb_excitation_fx[L_SHB_LAHEAD + i], lpc_shb_fx, &shaped_shb_excitationTemp_fx[i], hBWE_TD->mem_stp_swb_fx, + hBWE_TD->ptr_mem_stp_swb_fx, &( hBWE_TD->gain_prec_swb_fx ), hBWE_TD->mem_zero_swb_fx, formant_fac_fx ); } Copy( shaped_shb_excitationTemp_fx, &shaped_shb_excitation_fx[L_SHB_LAHEAD], L_FRAME16k ); /* Q_bwe_exc */ diff --git a/lib_enc/acelp_core_enc.c b/lib_enc/acelp_core_enc.c index 19cf8c52a0e7fc8f8cb27136a711481ddae20fae..08aa6619b5282bd894fe718c33eacfc1bf77cc8e 100644 --- a/lib_enc/acelp_core_enc.c +++ b/lib_enc/acelp_core_enc.c @@ -65,15 +65,19 @@ ivas_error acelp_core_enc( Encoder_State *st, /* i/o: encoder state structure */ /*const*/ float inp[], /* i : input signal of the current frame */ // const float ener, /* i : residual energy from Levinson-Durbin*/ - float A[NB_SUBFR16k * ( M + 1 )], /* i : A(z) unquantized for the 4 subframes*/ - float Aw[NB_SUBFR16k * ( M + 1 )], /* i : weighted A(z) unquant. for subframes*/ - const float epsP[M + 1], /* i : LP prediction errors */ - float lsp_new[M], /* i : LSPs at the end of the frame */ - float lsp_mid[M], /* i : LSPs in the middle of the frame */ - const int16_t vad_hover_flag, /* i : VAD hangover flag */ - const int16_t attack_flag, /* i : attack flag (GSC or TC) */ - float bwe_exc_extended[], /* i/o: bandwidth extended excitation */ - float *voice_factors, /* o : voicing factors */ + float A[NB_SUBFR16k * ( M + 1 )], /* i : A(z) unquantized for the 4 subframes*/ + float Aw[NB_SUBFR16k * ( M + 1 )], /* i : weighted A(z) unquant. for subframes*/ + const float epsP[M + 1], /* i : LP prediction errors */ + float lsp_new[M], /* i : LSPs at the end of the frame */ + float lsp_mid[M], /* i : LSPs in the middle of the frame */ + const int16_t vad_hover_flag, /* i : VAD hangover flag */ + const int16_t attack_flag, /* i : attack flag (GSC or TC) */ + float bwe_exc_extended[], /* i/o: bandwidth extended excitation */ +#ifndef IVAS_FLOAT_FIXED + float *voice_factors, /* o : voicing factors */ +#else + Word16 *voice_factors_fx, /* o : voicing factors Q15 */ +#endif float old_syn_12k8_16k[], /* o : intermediate ACELP synthesis at 12.8kHz or 16kHz to be used by SWB BWE */ float pitch_buf[NB_SUBFR16k], /* o : floating pitch for each subframe */ int16_t *unbits, /* o : number of unused bits */ @@ -183,8 +187,7 @@ ivas_error acelp_core_enc( Word16 tmpF_fx; #endif #ifdef IVAS_FLOAT_FIXED_CONVERSIONS - Word16 pitch_buf_fx[NB_SUBFR16k] = { 0 }; /* To be removed once this is taken as input arg of function */ - Word16 voice_factors_fx[NB_SUBFR16k] = { 0 }; /* To be removed once this is taken as input arg of function */ + Word16 pitch_buf_fx[NB_SUBFR16k] = { 0 }; /* To be removed once this is taken as input arg of function */ Word16 tmp; set_zero( old_bwe_exc, 1380 ); for ( i = 0; i < NB_SUBFR16k; i++ ) @@ -640,10 +643,23 @@ ivas_error acelp_core_enc( #endif /* reset the encoder */ -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS +#ifndef IVAS_FLOAT_FIXED CNG_reset_enc( st, pitch_buf, voice_factors, 0 ); /* To be removed once the function is completely fixed */ -#endif +#else CNG_reset_enc_fx( st, hLPDmem, pitch_buf_fx, voice_factors_fx, 0 ); +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + st->hLPDmem->mem_w0_flt = 0.0f; + // Reset for st->hLPDmem->mem_syn_flt not needed as flag is 0 + if ( st->L_frame == L_FRAME ) + { + set_f( pitch_buf, (float) L_SUBFR, NB_SUBFR ); + } + else /* st->L_frame == L_FRAME16k */ + { + set_f( pitch_buf, (float) L_SUBFR16k, NB_SUBFR16k ); + } +#endif +#endif /* update st->mem_syn1_flt for ACELP core switching */ mvr2r( hLPDmem->mem_syn3_flt, hLPDmem->mem_syn1_flt, M ); @@ -723,9 +739,7 @@ ivas_error acelp_core_enc( floatToFixed_arr( old_exc_flt, old_exc_fx, Q_new, st->L_frame ); st->preemph_fac = float_to_fix16( st->preemph_fac_flt, Q15 ); - floatToFixed_arr( st->voicing, st->voicing_fx, Q15, 3 ); - floatToFixed_arr( voice_factors, voice_factors_fx, Q15, NB_SUBFR16k ); f2me_buf_16( st->hGSCEnc->last_exc_dct_in, st->hGSCEnc->last_exc_dct_in_fx, &st->hGSCEnc->Q_last_exc_dct_in, L_FRAME16k ); st->hGSCEnc->Q_last_exc_dct_in = Q15 - st->hGSCEnc->Q_last_exc_dct_in; #endif @@ -1008,7 +1022,6 @@ ivas_error acelp_core_enc( fixedToFloat_arr( bwe_exc_fx, bwe_exc, Q_new, L_FRAME32k ); me2f_buf_16( st->hGSCEnc->last_exc_dct_in_fx, Q15 - st->hGSCEnc->Q_last_exc_dct_in, st->hGSCEnc->last_exc_dct_in, L_FRAME16k ); fixedToFloat_arr( syn_fx, syn, Q_new - 1, L_FRAME16k ); - fixedToFloat_arr( voice_factors_fx, voice_factors, Q15, NB_SUBFR16k ); fixedToFloat_arr( pitch_buf_fx, pitch_buf, Q6, NB_SUBFR16k ); #endif @@ -1187,10 +1200,6 @@ ivas_error acelp_core_enc( /* Apply a non linearity to the SHB excitation */ #ifdef IVAS_FLOAT_FIXED #ifdef IVAS_FLOAT_FIXED_CONVERSIONS - - // Word16 voice_factors_fx[NB_SUBFR16k]; - floatToFixed_arr( voice_factors, voice_factors_fx, Q15, 5 ); // Saturation Conversion used as last values have garbage values even in float - Word32 bwe_exc_extended_fx[L_FRAME32k + NL_BUFF_OFFSET]; Word16 q_bwe_exc = sub( st->prev_Q_bwe_exc, 16 ) / 2; q_bwe_exc = min( q_bwe_exc, Q_factor_arr( bwe_exc, ( ( PIT16k_MAX + ( L_FRAME16k + 1 ) + L_SUBFR16k ) * 2 ) - PIT16k_MAX * 2 ) ); diff --git a/lib_enc/core_switching_enc.c b/lib_enc/core_switching_enc.c index 5bbd809683eb4404ec73cafa450e03deb736fca6..234ed82b2e8f1749a959716143abad901d2dcdc7 100644 --- a/lib_enc/core_switching_enc.c +++ b/lib_enc/core_switching_enc.c @@ -894,7 +894,6 @@ void core_switching_post_enc( floatToFixed_arr( old_inp_12k8, old_inp_12k8_fx, Q_new, L_INP_12k8 ); floatToFixed_arr( old_inp_16k, old_inp_16k_fx, Q_new, L_INP ); floatToFixed_arr( A, A_fx, 12, NB_SUBFR16k * ( M + 1 ) ); - floatToFixed_arr( st->voicing, st->voicing_fx, 15, 3 ); floatToFixed_arr( st->old_Aq_12_8, st->old_Aq_12_8_fx, 12, M + 1 ); st->hLPDmem->mem_w0 = float_to_fix16( st->hLPDmem->mem_w0_flt, Q_new - 1 ); floatToFixed_arr( st->hLPDmem->mem_syn_flt, st->hLPDmem->mem_syn, Q_new - 1, M ); diff --git a/lib_enc/enc_uv_fx.c b/lib_enc/enc_uv_fx.c index afa32cd3a212259657e689fdd64d2a3dc3a92af7..54b5fabe844f5488620d4f5a1738488d2468e4e2 100644 --- a/lib_enc/enc_uv_fx.c +++ b/lib_enc/enc_uv_fx.c @@ -346,8 +346,8 @@ void encod_unvoiced_ivas_fx( i_subfr_idx = shr( i_subfr, 6 ); Copy( &res_fx[i_subfr], &exc_fx[i_subfr], L_SUBFR ); - find_targets_fx( speech_fx, hLPDmem->mem_syn, i_subfr, &hLPDmem->mem_w0, p_Aq_fx, - res_fx, L_SUBFR, p_Aw_fx, st_fx->preemph_fac, xn_fx, cn_fx, h1_fx ); + find_targets_ivas_fx( speech_fx, hLPDmem->mem_syn, i_subfr, &hLPDmem->mem_w0, p_Aq_fx, + res_fx, L_SUBFR, p_Aw_fx, st_fx->preemph_fac, xn_fx, cn_fx, h1_fx ); Copy_Scale_sig( h1_fx, h2_fx, L_SUBFR, -2 ); Scale_sig( h1_fx, L_SUBFR, add( 1, shift ) ); /* set h1[] in Q14 with scaling for convolution */ diff --git a/lib_enc/ext_sig_ana_fx.c b/lib_enc/ext_sig_ana_fx.c index 4092c58f2a3eabd47319ebfe4ee924ad911e0543..fedb26afe4b303f4bb9cb427d6aee450ed88d410 100644 --- a/lib_enc/ext_sig_ana_fx.c +++ b/lib_enc/ext_sig_ana_fx.c @@ -650,10 +650,10 @@ void core_signal_analysis_high_bitrate_ivas_fx( const Word16 vad_hover_flag, /* i : VAD hangover flag */ Word32 **spectrum, Word16 *spectrum_e, - Word16 *Q_new ) + Word16 *Q_new, + Word16 *q_win ) { TCX_ENC_HANDLE hTcxEnc = st->hTcxEnc; - const Word16 last_overlap = st->hTcxCfg->tcx_last_overlap_mode; const Word16 curr_overlap = st->hTcxCfg->tcx_curr_overlap_mode; const Word16 minWindowLen = sub( st->hTcxCfg->tcx_mdct_window_min_lengthFB, 1 ); @@ -703,6 +703,8 @@ void core_signal_analysis_high_bitrate_ivas_fx( Word16 *speech_fx = NULL; Word16 q_out_wtda = 0; move16(); + Word16 win_len = 0; + move16(); if ( NE_16( last_element_mode, st->element_mode ) ) { @@ -939,9 +941,12 @@ void core_signal_analysis_high_bitrate_ivas_fx( IF( windowed_samples != NULL ) /* save windowed speech_TCX samples */ { assert( L_subframe + ( left_overlap + right_overlap ) / 2 < 2 * L_FRAME_MAX / nSubframes - L_FRAME_MAX / 8 ); + win_len = add( L_subframe, shr( add( left_overlap, right_overlap ), 1 ) ); windowed_samples[frameno * L_FRAME_MAX + 0] = L_deposit_l( overlap_mode[frameno] ); windowed_samples[frameno * L_FRAME_MAX + 1] = L_deposit_l( overlap_mode[frameno + 1] ); Copy_Scale_sig_16_32( tcx20Win, windowed_samples + frameno * L_FRAME_MAX + 2, L_subframe + ( left_overlap + right_overlap ) / 2, 0 ); + *q_win = s_min( *q_win, sub( L_norm_arr( windowed_samples + frameno * L_FRAME_MAX + 2, L_subframe + ( left_overlap + right_overlap ) / 2 ), 1 ) ); + move16(); } } @@ -1395,6 +1400,17 @@ void core_signal_analysis_high_bitrate_ivas_fx( } } + IF( windowed_samples != NULL ) + { + FOR( frameno = 0; frameno < nSubframes; frameno++ ) + { + IF( !( ( EQ_16( transform_type[frameno], TCX_20 ) ) && ( NE_16( st->hTcxCfg->tcx_last_overlap_mode, TRANSITION_OVERLAP ) ) ) ) + { + Scale_sig32( windowed_samples + frameno * L_FRAME_MAX + 2, win_len, *q_win ); + } + } + } + IF( NE_16( st->element_mode, IVAS_CPE_MDCT ) ) { /* Copy memory */ diff --git a/lib_enc/init_enc.c b/lib_enc/init_enc.c index bda6175bc75efeab2622f2e6961eeab4cfd14863..538c7f834fc795c08bd3bb7dbf7a262d771a34ce 100644 --- a/lib_enc/init_enc.c +++ b/lib_enc/init_enc.c @@ -385,7 +385,11 @@ ivas_error init_encoder( st->energy_sm_fx = 0; #endif set_s( st->pitch, L_SUBFR, 3 ); +#ifndef IVAS_FLOAT_FIXED set_f( st->voicing, 0.0f, 3 ); +#else + set16_fx( st->voicing_fx, 0, 3 ); +#endif /*-----------------------------------------------------------------* * General signal buffers @@ -614,10 +618,11 @@ ivas_error init_encoder( * DTX *-----------------------------------------------------------------*/ - st->lp_speech = 45.0f; /* Initialize the long-term active speech level in dB */ #ifndef IVAS_FLOAT_FIXED + st->lp_speech = 45.0f; /* Initialize the long-term active speech level in dB */ st->lp_noise = 0.0f; #else + st->lp_speech_fx = 11520; /* Initialize the long-term active speech level in dB : 45.0f in Q8 */ st->lp_noise_fx = 0; #endif st->flag_noisy_speech_snr = 0; @@ -1343,8 +1348,6 @@ ivas_error init_encoder_ivas_fx( set_f( st->inp_16k_mem_stereo_sw, 0, STEREO_DFT_OVL_16k - L_MEM_RECALC_16K - L_FILT16k ); set_f( st->old_Aq_12_8 + 1, 0, M ); st->old_Aq_12_8[0] = 4096; - set_f( st->voicing, 0.0f, 3 ); - st->lp_speech = 45.0f; /* Initialize the long-term active speech level in dB */ st->active_cnt = 0; #ifndef IVAS_FLOAT_FIXED st->old_hpfilt_in = 0.0f; diff --git a/lib_enc/ivas_core_enc.c b/lib_enc/ivas_core_enc.c index 8a8e172b319d06c1ad3fcf84d3d53c6618da2011..747e255f5786bb562e5dc931fec52ada7b792c16 100644 --- a/lib_enc/ivas_core_enc.c +++ b/lib_enc/ivas_core_enc.c @@ -98,7 +98,11 @@ ivas_error ivas_core_enc( float new_swb_speech_buffer[L_FRAME48k + STEREO_DFT_OVL_MAX]; float bwe_exc_extended[CPE_CHANNELS][L_FRAME32k + NL_BUFF_OFFSET]; Word16 Q_new[CPE_CHANNELS]; - float voice_factors[CPE_CHANNELS][NB_SUBFR16k] = { 0 }; +#ifndef IVAS_FLOAT_FIXED + float voice_factors[CPE_CHANNELS][NB_SUBFR16k]; +#else + Word16 voice_factors_fx[CPE_CHANNELS][NB_SUBFR16k]; /* Q15 */ +#endif #ifdef IVAS_FLOAT_FIXED Word32 *new_swb_speech_fx; Word16 shb_speech_fx[L_FRAME16k]; // Q_shb_spch @@ -107,7 +111,6 @@ ivas_error ivas_core_enc( Word16 new_inp_resamp16k_fx[CPE_CHANNELS][L_FRAME16k]; Word16 hb_speech_fx[L_FRAME16k / 4]; Word32 bwe_exc_extended_fx[CPE_CHANNELS][L_FRAME32k + NL_BUFF_OFFSET]; /* 2 * Q_new */ - Word16 voice_factors_fx[CPE_CHANNELS][NB_SUBFR16k]; /* Q15 */ Word16 old_syn_12k8_16k_fx[CPE_CHANNELS][L_FRAME16k]; /* ACELP core synthesis at 12.8kHz or 16kHz to be used by the SWB BWE */ Word16 *new_swb_speech_fx_16; Word16 new_swb_speech_buffer_fx_16[L_FRAME48k + STEREO_DFT_OVL_MAX]; @@ -231,8 +234,6 @@ ivas_error ivas_core_enc( st->preemph_fac = (Word16) floatToFixed( st->preemph_fac_flt, Q15 ); - floatToFixed_arr16( st->voicing, st->voicing_fx, Q15, 3 ); - f2me( cor_map_sum[n], &cor_map_sum_fx, &exp_cor_map_sum ); f2me_buf_16( fft_buff[n], fft_buff_fx, &e_fft_buff, ( 2 * L_FFT ) ); @@ -276,8 +277,6 @@ ivas_error ivas_core_enc( #ifdef IVAS_FLOAT_FIXED_CONVERSIONS st->preemph_fac_flt = fixedToFloat_16( st->preemph_fac, Q15 ); - - fixedToFloat_arr( st->voicing_fx, st->voicing, Q15, 3 ); #endif if ( st->element_mode == IVAS_CPE_MDCT || st->element_mode == IVAS_SCE ) @@ -290,13 +289,7 @@ ivas_error ivas_core_enc( * Sanity check in combined format coding *-----------------------------------------------------------------*/ -#ifndef IVAS_FLOAT_FIXED - diff_nBits = 0; - if ( hCPE != NULL && hCPE->element_mode == IVAS_CPE_DFT && hCPE->brate_surplus > 0 ) - { - ivas_combined_format_brate_sanity( hCPE->element_brate, sts[0]->core, sts[0]->total_brate, &( sts[0]->core_brate ), &( sts[0]->inactive_coder_type_flag ), &diff_nBits ); - } -#else +#ifdef IVAS_FLOAT_FIXED diff_nBits = 0; move16(); test(); @@ -385,7 +378,11 @@ ivas_error ivas_core_enc( if ( st->core == ACELP_CORE ) { /* ACELP core encoder */ +#ifndef IVAS_FLOAT_FIXED if ( ( error = acelp_core_enc( st, inp[n], /*ener[n],*/ A[n], Aw[n], epsP[n], lsp_new[n], lsp_mid[n], vad_hover_flag[0], attack_flag[n], bwe_exc_extended[n], voice_factors[n], old_syn_12k8_16k[n], pitch_buf[n], &unbits[n], hStereoTD, tdm_lsfQ_PCh ) ) != IVAS_ERR_OK ) +#else + if ( ( error = acelp_core_enc( st, inp[n], /*ener[n],*/ A[n], Aw[n], epsP[n], lsp_new[n], lsp_mid[n], vad_hover_flag[0], attack_flag[n], bwe_exc_extended[n], voice_factors_fx[n], old_syn_12k8_16k[n], pitch_buf[n], &unbits[n], hStereoTD, tdm_lsfQ_PCh ) ) != IVAS_ERR_OK ) +#endif { return error; } @@ -445,24 +442,6 @@ ivas_error ivas_core_enc( fixedToFloat_arr( lsp_new_fx[n], lsp_new[n], Q15, M ); fixedToFloat_arr( lsp_mid_fx[n], lsp_mid[n], Q15, M ); #endif -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS_ - if ( st->element_mode == IVAS_CPE_DFT ) - { - fixedToFloat_arr( st->buf_wspeech_enc, st->buf_wspeech_enc_flt, q_fac, L_FRAME16k + L_SUBFR + L_FRAME16k + L_NEXT_MAX_16k + 320 ); - } - else if ( st->element_mode != IVAS_CPE_MDCT ) - { - hTcxEnc->tcxltp_gain_past_flt = fix16_to_float( hTcxEnc->tcxltp_gain_past, Q15 ); - hTcxEnc->tcxltp_gain_flt = fix16_to_float( hTcxEnc->tcxltp_gain, Q15 ); - fixedToFloat_arr( st->hTcxEnc->buf_speech_ltp, st->hTcxEnc->buf_speech_ltp_flt, q_fac, L_PAST_MAX_32k + L_FRAME32k + L_NEXT_MAX_32k ); - } - - fixedToFloat_arrL32( hTcxEnc->spectrum_fx[0], hTcxEnc->spectrum[0], sub( Q31, hTcxEnc->spectrum_e[0] ), st->hTcxEnc->L_frameTCX / nSubframes ); - IF( hTcxEnc->tcxMode != TCX_20 ) - { - fixedToFloat_arrL32( hTcxEnc->spectrum_fx[1], hTcxEnc->spectrum[1], sub( Q31, hTcxEnc->spectrum_e[1] ), st->hTcxEnc->L_frameTCX / nSubframes ); - } -#endif #ifdef IVAS_FLOAT_FIXED_CONVERSIONS fixedToFloat_arr( pitch_buf_fx[n], pitch_buf[n], Q6, NB_SUBFR16k ); @@ -474,9 +453,6 @@ ivas_error ivas_core_enc( } fixedToFloat_arr( st->hTcxEnc->Txnq, st->hTcxEnc->Txnq_flt, -1, L_FRAME32k / 2 + 64 ); #endif // IVAS_FLOAT_FIXED_CONVERSIONS -#else - /* TCX core encoder */ - stereo_tcx_core_enc( st, old_inp_12k8[n] + L_INP_MEM, old_inp_16k[n] + L_INP_MEM, Aw[n], lsp_new[n], lsp_mid[n], pitch_buf[n], last_element_mode, vad_hover_flag[0] ); #endif } @@ -615,16 +591,6 @@ ivas_error ivas_core_enc( } } - IF( NE_16( st->element_mode, EVS_MONO ) ) - { - // Word16 temp_e; - - // f2me( st->hHQ_core->crest_lp, &st->hHQ_core->crest_lp_fx, &temp_e ); - // st->hHQ_core->crest_lp_q = sub( Q31, temp_e ); - // f2me( st->hHQ_core->crest_mod_lp, &st->hHQ_core->crest_mod_lp_fx, &temp_e ); - // st->hHQ_core->crest_mod_lp_q = sub( Q31, temp_e ); - } - floatToFixed_arr( st->input, st->input_fx, 0, 960 ); #endif @@ -644,8 +610,6 @@ ivas_error ivas_core_enc( } } #endif -#else - hq_core_enc( st, st->input, input_frame, NORMAL_HQ_CORE, Voicing_flag[n], vad_hover_flag[0] ); #endif } @@ -655,9 +619,7 @@ ivas_error ivas_core_enc( if ( st->element_mode == IVAS_CPE_TD && n == 0 ) { -#ifndef IVAS_FLOAT_FIXED - td_stereo_param_updt( st->lsp_old, st->lsf_old, pitch_buf[0], tdm_lspQ_PCh, tdm_lsfQ_PCh, hStereoTD->tdm_Pri_pitch_buf, st->flag_ACELP16k, hStereoTD->tdm_use_IAWB_Ave_lpc ); -#else +#ifdef IVAS_FLOAT_FIXED #ifdef IVAS_FLOAT_FIXED_CONVERSIONS // lsp_old - Q15, lsf_old - Qlog2(2.56), pitch_buf - Q6 floatToFixed_arr16( pitch_buf[0], pitch_buf_fx[0], Q6, NB_SUBFR ); @@ -686,8 +648,26 @@ ivas_error ivas_core_enc( { if ( MCT_flag ) { - ivas_mdct_core_whitening_enc( hCPE, old_inp_16k, old_wsp, pitch_buf, hMCT->p_mdst_spectrum_long[cpe_id], hMCT->tnsBits[cpe_id], hMCT->p_orig_spectrum_long[cpe_id], +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + Word16 old_inp_16k_fx[CPE_CHANNELS][L_INP]; + Word16 old_wsp_fx[CPE_CHANNELS][L_WSP], q_fac[CPE_CHANNELS]; + Word16 pitch_buf_fx_new[CPE_CHANNELS][NB_SUBFR16k]; /* Q6 */ + for ( i = 0; i < CPE_CHANNELS; i++ ) + { + floatToFixed_arr( old_inp_16k[i], old_inp_16k_fx[i], 0, L_INP ); + q_fac[i] = Q_factor_arr( old_wsp[i], L_WSP ); + floatToFixed_arr( old_wsp[i], old_wsp_fx[i], q_fac[i], L_WSP ); + } +#endif + ivas_mdct_core_whitening_enc( hCPE, old_inp_16k_fx, old_wsp_fx, pitch_buf_fx_new, hMCT->p_mdst_spectrum_long[cpe_id], hMCT->tnsBits[cpe_id], hMCT->p_orig_spectrum_long[cpe_id], hMCT->tnsSize[cpe_id], hMCT->p_param[cpe_id], hMCT->hBstr, 1, hMCT->nchan_out_woLFE ); +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + for ( i = 0; i < CPE_CHANNELS; i++ ) + { + fixedToFloat_arr( old_wsp_fx[i], old_wsp[i], q_fac[i], L_WSP ); + fixedToFloat_arr( pitch_buf_fx_new[i], pitch_buf[i], Q6, NB_SUBFR16k ); + } +#endif } else { @@ -708,19 +688,11 @@ ivas_error ivas_core_enc( if ( sts[0]->cng_sba_flag ) { -#ifdef IVAS_FLOAT_FIXED FdCngEncodeDiracMDCTStereoSID_fx( hCPE ); -#else - FdCngEncodeDiracMDCTStereoSID( hCPE ); -#endif } else { -#ifdef IVAS_FLOAT_FIXED FdCngEncodeMDCTStereoSID_fx( hCPE ); -#else - FdCngEncodeMDCTStereoSID( hCPE ); -#endif } } } @@ -745,37 +717,17 @@ ivas_error ivas_core_enc( * WB BWE encoding *---------------------------------------------------------------------*/ -#ifndef IVAS_FLOAT_FIXED - if ( input_Fs >= 16000 && st->bwidth < SWB && st->hBWE_TD != NULL ) - { - /* Common pre-processing for WB TBE and WB BWE */ - wb_pre_proc( st, last_element_mode, new_inp_resamp16k[n], hb_speech ); - } - - if ( st->extl == WB_TBE ) - { - /* WB TBE encoder */ - wb_tbe_enc( st, hb_speech, bwe_exc_extended[n], voice_factors[n], pitch_buf[n] ); - } - else if ( st->extl == WB_BWE && n == 0 && st->element_mode != IVAS_CPE_MDCT ) - { - /* WB BWE encoder */ - wb_bwe_enc( st, new_inp_resamp16k[n] ); - } -#else +#ifdef IVAS_FLOAT_FIXED #ifdef IVAS_FLOAT_FIXED_CONVERSIONS /* Temporarily calculating variable Q. Will get Q values from core processing */ Q_new[n] = Q_factor_arrL( bwe_exc_extended[n], L_FRAME32k + NL_BUFF_OFFSET ); Q_new[n] = Q_new[n] / 2; floatToFixed_arr16( new_inp_resamp16k[n], new_inp_resamp16k_fx[n], -1, L_FRAME16k ); - floatToFixed_arr( voice_factors[n], voice_factors_fx[n], Q15, NB_SUBFR16k ); // Saturation Conversion used as last values have garbage values even in float - floatToFixed_arr( pitch_buf[n], pitch_buf_fx[n], Q6, NB_SUBFR16k ); // Saturation Conversion used as last values have garbage values even in float + floatToFixed_arr( pitch_buf[n], pitch_buf_fx[n], Q6, NB_SUBFR16k ); // Saturation Conversion used as last values have garbage values even in float floatToFixed_arr32( bwe_exc_extended[n], bwe_exc_extended_fx[n], 2 * Q_new[n], L_FRAME32k + NL_BUFF_OFFSET ); // prev_lsp_wb_temp_fx, prev_lsp_wb_fx and prev_lpc_wb_fx in Q15. No float counterparts - floatToFixed_arr16( st->voicing, st->voicing_fx, Q15, 3 ); - if ( st->hBWE_FD != NULL ) { floatToFixed_arr16( st->hBWE_FD->old_wtda_swb, st->hBWE_FD->L_old_wtda_swb_fx, -1, L_FRAME48k ); @@ -804,7 +756,6 @@ ivas_error ivas_core_enc( } #ifdef IVAS_FLOAT_FIXED_CONVERSIONS fixedToFloat_arr( hb_speech_fx, hb_speech, -1, L_FRAME16k / 4 ); - fixedToFloat_arr( voice_factors_fx[n], voice_factors[n], Q15, NB_SUBFR16k ); fixedToFloat_arr( pitch_buf_fx[n], pitch_buf[n], Q6, NB_SUBFR16k ); fixedToFloat_arrL( bwe_exc_extended_fx[n], bwe_exc_extended[n], 2 * Q_new[n], L_FRAME32k + NL_BUFF_OFFSET ); @@ -839,14 +790,10 @@ ivas_error ivas_core_enc( floatToFixed_arrL32( (float *) &realBuffer[n][0][0], (Word32 *) &realBuffer_fx[0][0], q_realImagBuffer, CLDFB_NO_COL_MAX * CLDFB_NO_CHANNELS_MAX ); floatToFixed_arrL32( (float *) &imagBuffer[n][0][0], (Word32 *) &imagBuffer_fx[0][0], q_realImagBuffer, CLDFB_NO_COL_MAX * CLDFB_NO_CHANNELS_MAX ); -#if 0 - floatToFixed_arr( &st->hSignalBuf->input_buff_flt[0], &st->hSignalBuf->input_buff[0], 0, L_FRAME48k + L_FRAME48k + NS2SA( 48000, DELAY_FIR_RESAMPL_NS ) ); -#else for ( Word32 idx = 0; idx < ( L_FRAME48k + L_FRAME48k + NS2SA( 48000, DELAY_FIR_RESAMPL_NS ) ); idx++ ) { st->hSignalBuf->input_buff[idx] = (Word16) st->hSignalBuf->input_buff_flt[idx]; } -#endif if ( st->element_mode == IVAS_CPE_DFT ) { f2me_buf( hCPE->hStereoDft->DFT[0], hCPE->hStereoDft->DFT_fx[0], &hCPE->hStereoDft->DFT_fx_e[0], STEREO_DFT_N_MAX_ENC ); @@ -859,17 +806,11 @@ ivas_error ivas_core_enc( { st->hBWE_FD->old_input_fx[ii] = (Word16) st->hBWE_FD->old_input[ii]; } -#if 0 - floatToFixed_arr( (float *) &st->hBWE_FD->old_wtda_swb[0], (Word16 *) &st->hBWE_FD->L_old_wtda_swb_fx[0], 0, L_FRAME48k ); - floatToFixed_arr( (float *) &st->hBWE_FD->old_fdbwe_speech[0], (Word16 *) &st->hBWE_FD->old_fdbwe_speech_fx[0], 0, L_FRAME48k ); - floatToFixed_arr( (float *) &st->hBWE_TD->old_speech_shb[0], (Word16 *) &st->hBWE_TD->old_speech_shb_fx[0], 0, L_LOOK_16k + L_SUBFR16k ); -#else for ( Word32 idx = 0; idx < ( L_FRAME48k ); idx++ ) { st->hBWE_FD->L_old_wtda_swb_fx[idx] = (Word16) st->hBWE_FD->old_wtda_swb[idx]; st->hBWE_FD->old_fdbwe_speech_fx[idx] = (Word16) st->hBWE_FD->old_fdbwe_speech[idx]; } -#endif #endif /* Scaling cldfb_state_fx */ @@ -901,8 +842,6 @@ ivas_error ivas_core_enc( fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_12k8_fx, hCPE->hStereoDft->output_mem_dmx_12k8, 16, STEREO_DFT_OVL_12k8 ); } #endif -#else - swb_pre_proc( st, new_swb_speech, shb_speech, realBuffer[n], imagBuffer[n], hCPE ); #endif } else if ( input_Fs >= 32000 ) @@ -917,22 +856,23 @@ ivas_error ivas_core_enc( #ifdef IVAS_FLOAT_FIXED_CONVERSIONS fixedToFloat_arr( shb_speech_fx, shb_speech, Q_shb_spch, L_FRAME16k ); #endif -#else - InitSWBencBufferStates( st->hBWE_TD, shb_speech ); #endif } } +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + if ( st->hBWE_TD != NULL ) + { + floatToFixed_arr16( st->hBWE_TD->cur_sub_Aq, st->hBWE_TD->cur_sub_Aq_fx, Q12, M + 1 ); + } + floatToFixed_arr( st->input_buff, st->input_buff_fx, 0 /*Q_input*/, L_FRAME48k + L_FRAME48k + NS2SA( 48000, DELAY_FIR_RESAMPL_NS ) ); + floatToFixed_arr( pitch_buf[n], pitch_buf_fx[n], Q6, NB_SUBFR16k ); // Saturation Conversion used as last values have garbage values even in float +#endif /* SWB TBE encoder */ if ( st->extl == SWB_TBE || st->extl == FB_TBE ) { if ( st->core_brate != FRAME_NO_DATA && st->core_brate != SID_2k40 ) { - float fb_exc[L_FRAME16k]; - -#ifdef IVAS_FLOAT_FIXED - Word16 fb_exc_fx[L_FRAME16k]; - #ifdef IVAS_FLOAT_FIXED_CONVERSIONS Word16 Q_fb_exc; @@ -940,59 +880,24 @@ ivas_error ivas_core_enc( Q_new[n] = Q_factor_arrL( bwe_exc_extended[n], L_FRAME32k + NL_BUFF_OFFSET ); Q_new[n] = Q_new[n] / 2; - // Q_shb_spch = Q_factor_arr( shb_speech, L_FRAME16k ); - - floatToFixed_arr( voice_factors[n], voice_factors_fx[n], Q15, NB_SUBFR16k ); // Saturation Conversion used as last values have garbage values even in float - floatToFixed_arr( pitch_buf[n], pitch_buf_fx[n], Q6, NB_SUBFR16k ); // Saturation Conversion used as last values have garbage values even in float floatToFixed_arr32( bwe_exc_extended[n], bwe_exc_extended_fx[n], 2 * Q_new[n], L_FRAME32k + NL_BUFF_OFFSET ); // prev_lsp_wb_temp_fx, prev_lsp_wb_fx and prev_lpc_wb_fx in Q15. No float counterparts - floatToFixed_arr16( st->voicing, st->voicing_fx, Q15, 3 ); floatToFixed_arr( shb_speech, shb_speech_fx, Q_shb_spch, L_FRAME16k ); - - if ( st->hBWE_TD != NULL ) - { - floatToFixed_arr16( st->hBWE_TD->cur_sub_Aq, st->hBWE_TD->cur_sub_Aq_fx, Q12, M + 1 ); - } #endif + Word16 fb_exc_fx[L_FRAME16k]; + swb_tbe_enc_ivas_fx( st, hStereoICBWE, shb_speech_fx, bwe_exc_extended_fx[n], voice_factors_fx[n], fb_exc_fx, &Q_fb_exc, Q_new[n], Q_shb_spch, st->voicing_fx, pitch_buf_fx[n] ); -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS - if ( st->hBWE_TD != NULL ) - { - fixedToFloat_arr( st->hBWE_TD->cur_sub_Aq_fx, st->hBWE_TD->cur_sub_Aq, Q12, M + 1 ); - } -#endif -#else - swb_tbe_enc( st, hStereoICBWE, shb_speech, bwe_exc_extended[n], voice_factors[n], fb_exc, pitch_buf[n] ); -#endif - if ( st->extl == FB_TBE ) + IF( EQ_16( st->extl, FB_TBE ) ) { /* FB TBE encoder */ -#ifndef IVAS_FLOAT_FIXED - fb_tbe_enc( st, st->input, fb_exc ); -#else -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS -#ifndef MSAN_FIX - Q_fb_exc = Q_factor_arr( fb_exc, L_FRAME16k ); - floatToFixed_arr( fb_exc, fb_exc_fx, Q_fb_exc, L_FRAME16k ); -#endif - // Q_input is being calculated inside already - Word16 Q_input = 0; - floatToFixed_arr( st->input_buff, st->input_buff_fx, Q_input, L_FRAME48k + L_FRAME48k + NS2SA( 48000, DELAY_FIR_RESAMPL_NS ) ); -#endif fb_tbe_enc_ivas_fx( st, st->input_fx, fb_exc_fx, Q_fb_exc ); -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS - fixedToFloat_arr( fb_exc_fx, fb_exc, Q_fb_exc, L_FRAME16k ); - fixedToFloat_arr( st->input_fx, st->input, Q_input, L_FRAME48k ); -#endif -#endif } } } else if ( st->extl == SWB_BWE || st->extl == FB_BWE ) { -#ifdef IVAS_FLOAT_FIXED #ifdef IVAS_FLOAT_FIXED_CONVERSIONS Word16 old_inp_12k8_fx[L_INP_12k8], old_inp_16k_fx[L_INP]; Word16 q_val; @@ -1046,164 +951,128 @@ ivas_error ivas_core_enc( { st->hBWE_FD->old_input[ii] = (float) st->hBWE_FD->old_input_fx[ii]; } -#endif -#else - /* SWB(FB) BWE encoder */ - swb_bwe_enc( st, last_element_mode, old_inp_12k8[n], old_inp_16k[n], old_syn_12k8_16k[n], new_swb_speech, shb_speech ); #endif } +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + Word16 new_swb_speech_buffer_fx16[L_FRAME48k + STEREO_DFT_OVL_MAX]; + Word16 q_new_swb_speech_buffer = Q_factor_arr( new_swb_speech_buffer, input_frame ); + floatToFixed_arr16( new_swb_speech_buffer, new_swb_speech_buffer_fx16, q_new_swb_speech_buffer, input_frame ); + Word32 shb_speech_fx32[L_FRAME16k]; + Word32 voice_factors_fx32[CPE_CHANNELS][NB_SUBFR16k]; + Word16 q_shb_speech_fx32 = Q_factor_arrL( shb_speech, L_FRAME16k ); + floatToFixed_arrL( shb_speech, shb_speech_fx32, q_shb_speech_fx32, L_FRAME16k ); + + f2me_buf_16( st->buf_speech_enc_pe_flt, st->buf_speech_enc_pe, &st->exp_buf_speech_enc_pe, L_PAST_MAX_32k + L_FRAME32k + L_NEXT_MAX_32k ); + f2me_buf_16( st->buf_speech_enc_flt, st->buf_speech_enc, &st->exp_buf_speech_enc, L_PAST_MAX_32k + L_FRAME32k + L_NEXT_MAX_32k ); + f2me_buf_16( st->buf_synth_flt, st->buf_synth, &st->exp_buf_synth, OLD_SYNTH_SIZE_ENC + L_FRAME32k ); + IF( st->hTcxEnc != NULL ) + { + f2me_buf_16( st->buf_wspeech_enc_flt, st->buf_wspeech_enc, &st->exp_buf_wspeech_enc, L_FRAME16k + L_SUBFR + L_FRAME16k + L_NEXT_MAX_16k ); + f2me_buf_16( st->hTcxEnc->buf_speech_ltp_flt, st->hTcxEnc->buf_speech_ltp, &st->hTcxEnc->exp_buf_speech_ltp, L_PAST_MAX_32k + L_FRAME32k + L_NEXT_MAX_32k ); + } + floatToFixed_arr( old_syn_12k8_16k[n], old_syn_12k8_16k_fx[n], 0, L_FRAME16k ); +#endif /*---------------------------------------------------------------------* * SWB DTX/CNG encoding *---------------------------------------------------------------------*/ - - if ( st->hTdCngEnc != NULL && st->Opt_DTX_ON && ( input_frame >= L_FRAME32k || st->element_mode == IVAS_CPE_DFT ) ) + test(); + test(); + test(); + IF( st->hTdCngEnc != NULL && st->Opt_DTX_ON && ( GE_16( input_frame, L_FRAME32k ) || EQ_16( st->element_mode, IVAS_CPE_DFT ) ) ) { /* SHB DTX/CNG encoder */ -#ifdef IVAS_FLOAT_FIXED -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS - FOR( i = 0; i < CPE_CHANNELS; i++ ) - { - floatToFixed_arr( old_syn_12k8_16k[i], old_syn_12k8_16k_fx[i], 0, L_FRAME16k ); - } - - floatToFixed_arr( shb_speech, shb_speech_fx, 0, L_FRAME16k ); -#endif - swb_CNG_enc_ivas_fx( st, shb_speech_fx, old_syn_12k8_16k_fx[n] ); -#else - swb_CNG_enc( st, shb_speech, old_syn_12k8_16k[n] ); -#endif + Copy_Scale_sig_32_16( shb_speech_fx32, shb_speech_fx, L_FRAME16k, negate( q_shb_speech_fx32 ) ); + swb_CNG_enc_ivas_fx( st, shb_speech_fx /* Unmodified */, old_syn_12k8_16k_fx[n] ); } /*-------------------------------------------------------------------* * Inter-channel BWE encoding *-------------------------------------------------------------------*/ - if ( n == 0 && input_Fs >= 32000 && hStereoICBWE != NULL ) + test(); + test(); + IF( n == 0 && GE_32( input_Fs, 32000 ) && hStereoICBWE != NULL ) { -#ifdef IVAS_FLOAT_FIXED - Word16 new_swb_speech_buffer_fx16[L_FRAME48k + STEREO_DFT_OVL_MAX]; - Word16 q_new_swb_speech_buffer = Q_factor_arr( new_swb_speech_buffer, input_frame ); - floatToFixed_arr16( new_swb_speech_buffer, new_swb_speech_buffer_fx16, q_new_swb_speech_buffer, input_frame ); - - // floatToFixed_arr16( hCPE->hStereoICBWE->mem_decim_shb_ch0, hCPE->hStereoICBWE->mem_decim_shb_ch0_fx, 0, 90 ); - - stereo_icBWE_preproc_fx( hCPE, input_frame, new_swb_speech_buffer_fx16 /*tmp buffer*/, q_new_swb_speech_buffer ); + q_new_swb_speech_buffer = add( q_new_swb_speech_buffer, 16 ); + Copy_Scale_sig_16_32( new_swb_speech_buffer_fx16, new_swb_speech_buffer_fx, input_frame, Q16 ); // q_new_swb_speech_buffer + Copy_Scale_sig_16_32( voice_factors_fx[0], voice_factors_fx32[0], NB_SUBFR16k, Q16 ); // Q31 - fixedToFloat_arr( new_swb_speech_buffer_fx16, new_swb_speech_buffer, q_new_swb_speech_buffer, input_frame ); - - // fixedToFloat_arr( hCPE->hStereoICBWE->mem_decim_shb_ch0_fx, hCPE->hStereoICBWE->mem_decim_shb_ch0, 0, 90 ); - -#else - stereo_icBWE_preproc( hCPE, input_frame, new_swb_speech_buffer /*tmp buffer*/ ); -#endif // IVAS_FLOAT_FIXED - -#ifdef IVAS_FLOAT_FIXED -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS - Word32 shb_speech_fx32[L_FRAME16k]; - Word32 voice_factors_fx32[CPE_CHANNELS][NB_SUBFR16k]; - Word16 q_shb_speech_fx32 = Q_factor_arrL( shb_speech, L_FRAME16k ); - floatToFixed_arrL( shb_speech, shb_speech_fx32, q_shb_speech_fx32, L_FRAME16k ); - q_new_swb_speech_buffer = Q_factor_arrL( new_swb_speech_buffer, input_frame ); - floatToFixed_arrL( new_swb_speech_buffer, new_swb_speech_buffer_fx, q_new_swb_speech_buffer, input_frame ); - Copy_Scale_sig_16_32( voice_factors_fx[0], voice_factors_fx32[0], NB_SUBFR16k, 16 ); // Q31 -#endif - stereo_icBWE_enc_ivas_fx( hCPE, shb_speech_fx32, 31 - q_shb_speech_fx32, new_swb_speech_buffer_fx, 31 - q_new_swb_speech_buffer, voice_factors_fx32[0] ); + stereo_icBWE_enc_ivas_fx( hCPE, shb_speech_fx32, sub( Q31, q_shb_speech_fx32 ), new_swb_speech_buffer_fx, sub( Q31, q_new_swb_speech_buffer ), voice_factors_fx32[0] ); -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS IF( EQ_16( st->element_mode, IVAS_CPE_DFT ) ) { - IF( ( st->extl == SWB_TBE || st->extl == FB_TBE ) && st->flag_ACELP16k == 1 ) - { - } + test(); + test(); + test(); IF( ( EQ_16( st->extl, SWB_TBE ) || EQ_16( st->extl, WB_TBE ) || EQ_16( st->extl, FB_TBE ) ) && EQ_16( st->flag_ACELP16k, 1 ) ) { - // hStereoICBWE->prevSpecMapping = fixedToFloat( hStereoICBWE->prevSpecMapping_fx, Q31 ); } ELSE { -#ifndef IVAS_FLOAT_FIXED - hStereoICBWE->prevSpecMapping = 0; -#else hStereoICBWE->prevSpecMapping_fx = 0; -#endif + move32(); } } -#endif -#else - stereo_icBWE_enc( hCPE, shb_speech, new_swb_speech_buffer, voice_factors[0] ); -#endif } /*---------------------------------------------------------------------* * Channel-aware mode - write signaling information into the bitstream *---------------------------------------------------------------------*/ -#ifdef IVAS_FLOAT_FIXED + signaling_enc_rf_fx( st ); -#else - signaling_enc_rf( st ); -#endif + /*---------------------------------------------------------------------* * Common updates *---------------------------------------------------------------------*/ - if ( !MCT_flag ) /* for MCT do this later, otherwise there can be a problem because TCX quant happens later and might get the wrong last_core on a bit rate switch */ + IF( !MCT_flag ) /* for MCT do this later, otherwise there can be a problem because TCX quant happens later and might get the wrong last_core on a bit rate switch */ { -#ifdef IVAS_FLOAT_FIXED -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS - f2me_buf_16( st->buf_speech_enc_pe_flt, st->buf_speech_enc_pe, &st->exp_buf_speech_enc_pe, L_PAST_MAX_32k + L_FRAME32k + L_NEXT_MAX_32k ); - f2me_buf_16( st->buf_speech_enc_flt, st->buf_speech_enc, &st->exp_buf_speech_enc, L_PAST_MAX_32k + L_FRAME32k + L_NEXT_MAX_32k ); - f2me_buf_16( st->buf_synth_flt, st->buf_synth, &st->exp_buf_synth, OLD_SYNTH_SIZE_ENC + L_FRAME32k ); - IF( st->hTcxEnc != NULL ) - { - f2me_buf_16( st->buf_wspeech_enc_flt, st->buf_wspeech_enc, &st->exp_buf_wspeech_enc, L_FRAME16k + L_SUBFR + L_FRAME16k + L_NEXT_MAX_16k ); - f2me_buf_16( st->hTcxEnc->buf_speech_ltp_flt, st->hTcxEnc->buf_speech_ltp, &st->hTcxEnc->exp_buf_speech_ltp, L_PAST_MAX_32k + L_FRAME32k + L_NEXT_MAX_32k ); - } -#endif - updt_enc_common_ivas_fx( st ); - + } #ifdef IVAS_FLOAT_FIXED_CONVERSIONS - me2f_buf_16( st->buf_speech_enc_pe, st->exp_buf_speech_enc_pe, st->buf_speech_enc_pe_flt, L_PAST_MAX_32k + L_FRAME32k + L_NEXT_MAX_32k ); - me2f_buf_16( st->buf_speech_enc, st->exp_buf_speech_enc, st->buf_speech_enc_flt, L_PAST_MAX_32k + L_FRAME32k + L_NEXT_MAX_32k ); - me2f_buf_16( st->buf_synth, st->exp_buf_synth, st->buf_synth_flt, OLD_SYNTH_SIZE_ENC + L_FRAME32k ); - IF( st->hTcxEnc != NULL ) + if ( st->hBWE_TD != NULL ) + { + fixedToFloat_arr( st->hBWE_TD->cur_sub_Aq_fx, st->hBWE_TD->cur_sub_Aq, Q12, M + 1 ); + } + fixedToFloat_arr( st->input_fx, st->input, 0 /* Q_input*/, L_FRAME48k ); + me2f_buf_16( st->buf_speech_enc_pe, st->exp_buf_speech_enc_pe, st->buf_speech_enc_pe_flt, L_PAST_MAX_32k + L_FRAME32k + L_NEXT_MAX_32k ); + me2f_buf_16( st->buf_speech_enc, st->exp_buf_speech_enc, st->buf_speech_enc_flt, L_PAST_MAX_32k + L_FRAME32k + L_NEXT_MAX_32k ); + me2f_buf_16( st->buf_synth, st->exp_buf_synth, st->buf_synth_flt, OLD_SYNTH_SIZE_ENC + L_FRAME32k ); + IF( st->hTcxEnc != NULL ) + { + me2f_buf_16( st->buf_wspeech_enc, st->exp_buf_wspeech_enc, st->buf_wspeech_enc_flt, L_FRAME16k + L_SUBFR + L_FRAME16k + L_NEXT_MAX_16k ); + me2f_buf_16( st->hTcxEnc->buf_speech_ltp, st->hTcxEnc->exp_buf_speech_ltp, st->hTcxEnc->buf_speech_ltp_flt, L_PAST_MAX_32k + L_FRAME32k + L_NEXT_MAX_32k ); + } + + IF( EQ_16( st->element_mode, EVS_MONO ) && EQ_16( st->mdct_sw, MODE2 ) ) + { + IF( EQ_32( st->sr_core, INT_FS_12k8 ) ) { - me2f_buf_16( st->buf_wspeech_enc, st->exp_buf_wspeech_enc, st->buf_wspeech_enc_flt, L_FRAME16k + L_SUBFR + L_FRAME16k + L_NEXT_MAX_16k ); - me2f_buf_16( st->hTcxEnc->buf_speech_ltp, st->hTcxEnc->exp_buf_speech_ltp, st->hTcxEnc->buf_speech_ltp_flt, L_PAST_MAX_32k + L_FRAME32k + L_NEXT_MAX_32k ); + st->preemph_fac_flt = PREEMPH_FAC_FLT; } - - IF( EQ_16( st->element_mode, EVS_MONO ) && EQ_16( st->mdct_sw, MODE2 ) ) + ELSE { - IF( EQ_32( st->sr_core, INT_FS_12k8 ) ) - { - st->preemph_fac_flt = PREEMPH_FAC_FLT; - } - ELSE - { - st->preemph_fac_flt = PREEMPH_FAC_16k_FLT; - } + st->preemph_fac_flt = PREEMPH_FAC_16k_FLT; } -#endif -#else - updt_enc_common( st ); -#endif } +#endif } /*------------------------------------------------------------------* * Write potentially unused bits in combined format coding *-----------------------------------------------------------------*/ - - if ( hCPE != NULL && hCPE->element_mode == IVAS_CPE_DFT && hCPE->brate_surplus > 0 ) + test(); + test(); + IF( hCPE != NULL && EQ_16( hCPE->element_mode, IVAS_CPE_DFT ) && hCPE->brate_surplus > 0 ) { - while ( diff_nBits > 0 ) + WHILE( diff_nBits > 0 ) { - n = min( diff_nBits, 16 ); + n = s_min( diff_nBits, 16 ); push_indice( sts[0]->hBstr, IND_UNUSED, 0, n ); - diff_nBits -= n; + diff_nBits = sub( diff_nBits, n ); } } diff --git a/lib_enc/ivas_core_pre_proc_front.c b/lib_enc/ivas_core_pre_proc_front.c index 648cc744d37b9ee768759054fb8bc31f027d23fc..0d5528c2071ee87135f1ac64171f9ce8029ca016 100644 --- a/lib_enc/ivas_core_pre_proc_front.c +++ b/lib_enc/ivas_core_pre_proc_front.c @@ -1567,7 +1567,6 @@ ivas_error pre_proc_front_ivas_fx( /*float to fix conversions for wb_vad_ivas_fx*/ Word16 Q_new = Q_factor_arr( fr_bands, 40 ) + 3; floatToFixed_arrL( fr_bands, fr_bands_fx, Q_new + QSCALE, 40 ); - st->lp_speech_fx = float_to_fix16( st->lp_speech, 8 ); floatToFixed_arrL( st->hNoiseEst->bckr, st->hNoiseEst->bckr_fx, Q_new + QSCALE, 20 ); floatToFixed_arrL( st->hNoiseEst->enrO, st->hNoiseEst->enrO_fx, Q_new + QSCALE, 20 ); st->flag_noisy_speech_snr_fx = (Word8) st->flag_noisy_speech_snr; @@ -2012,10 +2011,6 @@ ivas_error pre_proc_front_ivas_fx( { floatToFixed_arr( old_inp_12k8, old_inp_12k8_loc_fx, Q_new_loc, L_INP_12k8 ); } - for ( int idx = 0; idx < 3; idx++ ) - { - st->voicing_fx[idx] = (Word16) ( st->voicing[idx] * 32767 ); - } #endif alw_pitch_lag_12k8[0] = st->old_pitch_la; @@ -2127,7 +2122,6 @@ ivas_error pre_proc_front_ivas_fx( floatToFixed_arr( st->old_wsp, st->old_wsp_fx, Q_factor_arr( st->old_wsp, L_WSP_MEM ), L_WSP_MEM ); Copy( st->old_wsp_fx, old_wsp_fx, L_WSP_MEM ); wsp_fx = old_wsp_fx + L_WSP_MEM; - floatToFixed_arr( st->voicing, st->voicing_fx, Q15, 3 ); f2me_buf_16( wsp, wsp_fx, &exp_wsp, L_WSP - L_WSP_MEM ); Q_wsp = sub( 15, exp_wsp ); @@ -2140,7 +2134,6 @@ ivas_error pre_proc_front_ivas_fx( &st->delta_pit, st->old_wsp2_fx, wsp_fx, st->mem_decim2_fx, *relE_fx, st->clas, st->input_bwidth, st->Opt_SC_VBR ); #ifdef IVAS_FLOAT_FIXED_CONVERSIONS - fixedToFloat_arr( st->voicing_fx, st->voicing, Q15, 3 ); fixedToFloat_arr( st->old_wsp2_fx, st->old_wsp2, Q_wsp, 115 ); fixedToFloat_arr( st->mem_decim2_fx, st->mem_decim2, Q_wsp, 3 ); #endif @@ -2156,7 +2149,6 @@ ivas_error pre_proc_front_ivas_fx( Word16 q_wsp = Q_factor_arr( old_wsp, L_WSP ) - 3; floatToFixed_arr16( old_wsp, old_wsp_fx, q_wsp, L_WSP ); - floatToFixed_arr16( st->voicing, st->voicing_fx, Q15, 3 ); floatToFixed_arr16( st->Bin_E, st->lgBin_E_fx, Q7, L_FFT / 2 ); // Function StableHighPitchDetect_fx excepts st->lgBin_E_fx to be in Q7 @@ -2343,14 +2335,9 @@ ivas_error pre_proc_front_ivas_fx( floatToFixed_arr( old_inp_12k8, oi12k8_fx, 0, L_INP_12k8 ); // Q_fac doesn't matter as it is only being used for sign Word16 *ni12k8_fx = oi12k8_fx + L_INP_MEM; inp_12k8_fx = ni12k8_fx - L_look; - floatToFixed_arr( st->voicing, st->voicing_fx, Q15, 3 ); #endif st->clas = signal_clas_fx( st, inp_12k8_fx, ee_fx, *relE_fx, L_look, tdm_SM_last_clas ); -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS - fixedToFloat_arr( st->voicing_fx, st->voicing, Q15, 3 ); -#endif - select_TC_fx( MODE1, st->tc_cnt, &st->coder_type, st->localVAD ); if ( st->Opt_SC_VBR ) @@ -2366,7 +2353,6 @@ ivas_error pre_proc_front_ivas_fx( { #ifdef IVAS_FLOAT_FIXED_CONVERSIONS Word16 epsP_e, max_e_Etot; - floatToFixed_arr( st->voicing, st->voicing_fx, Q15, 3 ); f2me_buf( epsP, epsP_fx, &epsP_e, 17 ); #endif // IVAS_FLOAT_FIXED_CONVERSIONS @@ -2388,7 +2374,6 @@ ivas_error pre_proc_front_ivas_fx( SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas; Word32 PS_fx[128]; Word16 Q_esp; - floatToFixed_arr16( st->voicing, st->voicing_fx, 15, 3 ); Word16 Qfact_PS = Q_factor_arrL( PS, 128 ); floatToFixed_arr32( PS, PS_fx, Qfact_PS, 128 ); Word16 e_esp; @@ -2402,16 +2387,8 @@ ivas_error pre_proc_front_ivas_fx( * Update of old per-band energy spectrum *----------------------------------------------------------------*/ -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS - st->lp_speech_fx = float_to_fix16( st->lp_speech, 8 ); -#endif - ivas_long_enr_fx( st, Etot_fx, localVAD_HE_SAD, high_lpn_flag, NULL, 1, NULL, NULL ); -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS - st->lp_speech = fix16_to_float( st->lp_speech_fx, 8 ); -#endif - mvr2r( fr_bands + NB_BANDS, st->hNoiseEst->enrO, NB_BANDS ); test(); diff --git a/lib_enc/ivas_cpe_enc.c b/lib_enc/ivas_cpe_enc.c index e4205115c104d0adfcf0da4ce118470c931dec65..c8537b8258d7699c58b46899ba97f242f3552be2 100644 --- a/lib_enc/ivas_cpe_enc.c +++ b/lib_enc/ivas_cpe_enc.c @@ -271,7 +271,6 @@ ivas_error ivas_cpe_enc_fx( #ifdef IVAS_FLOAT_FIXED_CONVERSIONS for ( n = 0; n < CPE_CHANNELS; n++ ) { - sts[n]->lp_speech_fx = (Word16) floatToFixed( sts[n]->lp_speech, Q8 ); sts[n]->flag_noisy_speech_snr_fx = (Word8) sts[n]->flag_noisy_speech_snr; Q_buffer[n] = 15; move16(); @@ -285,8 +284,7 @@ ivas_error ivas_cpe_enc_fx( for ( n = 0; n < CPE_CHANNELS; n++ ) { - Q_buffer[n] = Q_factor_arr( hCPE->hFrontVad[n]->buffer_12k8 + L_FFT, L_FFT / 2 ); - floatToFixed_arr( hCPE->hFrontVad[n]->buffer_12k8, hCPE->hFrontVad[n]->buffer_12k8_fx, Q_buffer[n], 384 ); + Q_buffer[n] = hCPE->hFrontVad[n]->q_buffer_12k8; floatToFixed_arrL( hCPE->hFrontVad[n]->hNoiseEst->bckr, hCPE->hFrontVad[n]->hNoiseEst->bckr_fx, Q_new_old + QSCALE, 20 ); floatToFixed_arrL( hCPE->hFrontVad[n]->hNoiseEst->enrO, hCPE->hFrontVad[n]->hNoiseEst->enrO_fx, Q_new_old + QSCALE, 20 ); } @@ -307,7 +305,8 @@ ivas_error ivas_cpe_enc_fx( Word16 Q_new_old = add( sub( Q_inp, Qband ), Q_add ); for ( n = 0; n < CPE_CHANNELS; n++ ) { - fixedToFloat_arr( hCPE->hFrontVad[n]->buffer_12k8_fx, hCPE->hFrontVad[n]->buffer_12k8, Q_buffer[n], 384 ); + Scale_sig( hCPE->hFrontVad[n]->buffer_12k8_fx + 384, 3 * L_FRAME / 2 - 384, sub( Q_buffer[n], hCPE->hFrontVad[n]->q_buffer_12k8 ) ); + hCPE->hFrontVad[n]->q_buffer_12k8 = Q_buffer[n]; fixedToFloat_arrL( fr_bands_fx[n], fr_bands[n], Q_buffer[n] + QSCALE, 40 ); fixedToFloat_arrL( lf_E_fx[n], lf_E[n], Q_buffer[n] + QSCALE - 2, 148 ); @@ -611,11 +610,6 @@ ivas_error ivas_cpe_enc_fx( #ifndef MSAN_FIX hCPE->hStereoClassif->xtalk_score_fx = floatToFixed( hCPE->hStereoClassif->xtalk_score, 31 ); #endif // !MSAN_FIX - - /* flt2fix: to be removed */ - floatToFixed_arr( hCPE->hCoreCoder[0]->voicing, hCPE->hCoreCoder[0]->voicing_fx, 15, 3 ); - /* flt2fix end */ - /*flt2fix: dft_synthesize*/ if ( hCPE->element_mode == IVAS_CPE_DFT && hCPE->hStereoDft->res_cod_mode[STEREO_DFT_OFFSET] ) { @@ -738,7 +732,6 @@ ivas_error ivas_cpe_enc_fx( sts[i]->q_inp = Q_factor_arr( sts[i]->old_input_signal, 1965 ); // check length floatToFixed_arr( sts[i]->old_input_signal, sts[i]->old_input_signal_fx, sts[i]->q_inp, 1965 ); } - floatToFixed_arr( hCPE->hCoreCoder[0]->voicing, hCPE->hCoreCoder[0]->voicing_fx, 15, 3 ); if ( hCPE->element_mode == IVAS_CPE_DFT && hCPE->hStereoDft->res_cod_mode[STEREO_DFT_OFFSET] ) { floatToFixed_arr( sts[1]->old_inp_12k8, sts[1]->old_inp_12k8_fx, 0, L_INP_MEM ); @@ -1055,8 +1048,6 @@ ivas_error ivas_cpe_enc_fx( floatToFixed_arr( pitch_fr[1], pitch_fr_fx[1], Q6, NB_SUBFR ); floatToFixed_arr( voicing_fr[0], voicing_fr_fx[0], Q15, NB_SUBFR ); floatToFixed_arr( voicing_fr[1], voicing_fr_fx[1], Q15, NB_SUBFR ); - floatToFixed_arr( hCPE->hCoreCoder[0]->voicing, hCPE->hCoreCoder[0]->voicing_fx, Q15, 3 ); - floatToFixed_arr( hCPE->hCoreCoder[1]->voicing, hCPE->hCoreCoder[1]->voicing_fx, Q15, 3 ); #endif tdm_ol_pitch_comparison_fx( hCPE, pitch_fr_fx, voicing_fr_fx ); @@ -1066,8 +1057,6 @@ ivas_error ivas_cpe_enc_fx( fixedToFloat_arr( pitch_fr_fx[1], pitch_fr[1], Q6, NB_SUBFR ); fixedToFloat_arr( voicing_fr_fx[0], voicing_fr[0], Q15, NB_SUBFR ); fixedToFloat_arr( voicing_fr_fx[1], voicing_fr[1], Q15, NB_SUBFR ); - fixedToFloat_arr( hCPE->hCoreCoder[0]->voicing_fx, hCPE->hCoreCoder[0]->voicing, Q15, 3 ); - fixedToFloat_arr( hCPE->hCoreCoder[1]->voicing_fx, hCPE->hCoreCoder[1]->voicing, Q15, 3 ); #endif #else tdm_ol_pitch_comparison( hCPE, pitch_fr, voicing_fr ); diff --git a/lib_enc/ivas_front_vad.c b/lib_enc/ivas_front_vad.c index 4f5cddbbd39e82f4402327af4c0238c5685e8a76..085d10777b32243ed73157c8a3b970df10da9087 100644 --- a/lib_enc/ivas_front_vad.c +++ b/lib_enc/ivas_front_vad.c @@ -531,16 +531,21 @@ ivas_error front_vad_create( return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for VAD\n" ) ); } - set_f( hFrontVad->buffer_12k8, 0, 3 * L_FRAME / 2 ); #ifdef IVAS_FLOAT_FIXED set16_fx( hFrontVad->mem_decim_fx, 0, 2 * L_FILT_MAX ); + hFrontVad->q_mem_decim = Q31; + wb_vad_init_ivas_fx( hFrontVad->hVAD ); + hFrontVad->lp_speech_fx = 11520; /* Initialize the long-term active speech level in dB */ hFrontVad->lp_noise_fx = 0; /* Initialize the long-term noise level in dB */ set16_fx( hFrontVad->mem_decim_fx, 0, shl( L_FILT_MAX, 1 ) ); set16_fx( hFrontVad->buffer_12k8_fx, 0, i_mult( 3, shr( L_FRAME, 1 ) ) ); hFrontVad->mem_preemph_fx = 0; + hFrontVad->q_buffer_12k8 = Q31; + hFrontVad->q_mem_decim = Q31; #else + set_f( hFrontVad->buffer_12k8, 0, 3 * L_FRAME / 2 ); set_f( hFrontVad->mem_decim, 0, 2 * L_FILT_MAX ); wb_vad_init( hFrontVad->hVAD ); hFrontVad->mem_preemph = 0; @@ -699,7 +704,7 @@ ivas_error front_vad_spar_fx( Word16 input_fx[L_FRAME48k]; Word16 vad_flag_dtx[1]; Word32 fr_bands_fx[1][2 * NB_BANDS] = { { 0 } }; - Word16 Etot_fx[1]; + Word16 Etot_fx[1]; /* Q8 */ Word16 localVAD_HE_SAD[1]; Word16 vad_hover_flag[1]; @@ -715,7 +720,7 @@ ivas_error front_vad_spar_fx( Word16 Q_esp; Word32 epsP_fx[M + 1]; - Word16 alw_voicing_fx[2]; + Word16 alw_voicing_fx[2]; /* Q15 */ Word16 lsp_new_fx[M]; Word16 lsp_mid_fx[M]; @@ -733,7 +738,7 @@ ivas_error front_vad_spar_fx( Word16 cor_map_sum_fx; Word16 dummy_fx; Word16 S_map_fx[L_FFT / 2]; - Word16 relE_fx; + Word16 relE_fx; /* Q8 */ Word16 *wsp_fx; Word16 *inp_12k8_fx; @@ -757,11 +762,10 @@ ivas_error front_vad_spar_fx( float epsP[M + 1]; float lsp_new[M]; float lsp_mid[M]; - float alw_voicing[2]; float cor_map_sum; float non_staX; float S_map[L_FFT / 2]; - float *inp_12k8; + // float *inp_12k8; float old_wsp[L_WSP]; float *wsp; float relE; @@ -780,9 +784,9 @@ ivas_error front_vad_spar_fx( /*------------------------------------------------------------------* * Initialization *-----------------------------------------------------------------*/ - inp_12k8 = hFrontVad->buffer_12k8; + // inp_12k8 = hFrontVad->buffer_12k8; Word16 Q_bands = Q31; - Word16 Q_inp_12k8 = Q9; + Word16 Q_inp_12k8 = hFrontVad->q_buffer_12k8; move16(); move16(); @@ -854,9 +858,6 @@ ivas_error front_vad_spar_fx( move16(); Word16 Q_new_old = add( sub( Q_inp, Qband ), Q_add ); Word16 band_ener_guardbits = find_guarded_bits_fx( 2 * NB_BANDS ); - Word16 Q_buffer = Q_factor_arr( hFrontVad->buffer_12k8 + L_FFT, L_FFT / 2 ); - floatToFixed_arr( hFrontVad->buffer_12k8, hFrontVad->buffer_12k8_fx, Q_buffer, 384 ); - st->lp_speech_fx = (Word16) floatToFixed( st->lp_speech, Q8 ); floatToFixed_arrL( hFrontVad->hNoiseEst->bckr, hFrontVad->hNoiseEst->bckr_fx, Q_new_old + QSCALE + 2, 20 ); floatToFixed_arrL( hFrontVad->hNoiseEst->enrO, hFrontVad->hNoiseEst->enrO_fx, Q_new_old + QSCALE + 2, 20 ); st->flag_noisy_speech_snr_fx = (Word8) st->flag_noisy_speech_snr; @@ -864,12 +865,17 @@ ivas_error front_vad_spar_fx( floatToFixed_arrL( &band_energies[0], &band_energies_fx[0], Q_new_old + QSCALE + 2, 40 ); #endif #endif + Word16 Q_buffer = hFrontVad->q_buffer_12k8; + Scale_sig( hFrontVad->mem_decim_fx, 2 * L_FILT_MAX, sub( Q_inp, hFrontVad->q_mem_decim ) ); + hFrontVad->q_mem_decim = Q_inp; IF( ( error = front_vad_fx( NULL, st, hEncoderConfig, &hFrontVad, 0 /* MCT_flag */, input_frame, vad_flag_dtx, fr_bands_fx, Etot_fx, lf_E_fx, localVAD_HE_SAD, vad_hover_flag, band_energies_fx, &PS_fx[0], &st->lgBin_E_fx[0], Q_inp, &Q_buffer, Q_add, &front_create_flag ) ) != IVAS_ERR_OK ) { return error; } + Scale_sig( hFrontVad->buffer_12k8_fx + 384, 3 * L_FRAME / 2 - 384, sub( Q_buffer, hFrontVad->q_buffer_12k8 ) ); + hFrontVad->q_buffer_12k8 = Q_buffer; + #ifdef IVAS_FLOAT_FIXED_CONVERSIONS - fixedToFloat_arr( hFrontVad->buffer_12k8_fx, hFrontVad->buffer_12k8, Q_buffer, 384 ); fixedToFloat_arrL( fr_bands_fx[0], fr_bands[0], Q_buffer + QSCALE + 2, 40 ); fixedToFloat_arrL( lf_E_fx[0], lf_E[0], Q_buffer + QSCALE, 148 ); if ( st->lgBin_E_fx != NULL ) @@ -903,22 +909,21 @@ ivas_error front_vad_spar_fx( corr_shift_fx = correlation_shift_fx( hFrontVad->hNoiseEst->totalNoise_fx ); #ifdef IVAS_FLOAT_FIXED_CONVERSIONS corr_shift = fixedToFloat( corr_shift_fx, Q15 ); - Q_inp_12k8 = Q_factor_arr( inp_12k8, 3 * L_FRAME / 2 ); - floatToFixed_arr( inp_12k8, inp_12k8_fx, Q_inp_12k8, 3 * L_FRAME / 2 ); #endif dtx_ivas_fx( st, hEncoderConfig->ivas_total_brate, vad_flag_dtx[0], inp_12k8_fx, Q_inp_12k8 ); /* linear prediction analysis */ alw_pitch_lag_12k8[0] = st->old_pitch_la; alw_pitch_lag_12k8[1] = st->old_pitch_la; - alw_voicing[0] = st->voicing[2]; - alw_voicing[1] = st->voicing[2]; + alw_voicing_fx[0] = st->voicing_fx[2]; + alw_voicing_fx[1] = st->voicing_fx[2]; #ifdef IVAS_FLOAT_FIXED_CONVERSIONS Word16 Q_r[2] = { 0 }; - floatToFixed_arr( alw_voicing, alw_voicing_fx, Q15, 2 ); - Q_inp_12k8 = Q9; // Q_factor_arr( inp_12k8 - 90, 3 * L_FRAME / 2 + 90 ); - floatToFixed_arr( inp_12k8 - 90, inp_12k8_fx - 90, Q_inp_12k8, 3 * L_FRAME / 2 + 90 ); #endif + Scale_sig( inp_12k8_fx - 2 * L_FILT_MAX, 2 * L_FILT_MAX, sub( s_min( Q_inp_12k8, hFrontVad->q_mem_decim ), hFrontVad->q_mem_decim ) ); + Scale_sig( inp_12k8_fx, 3 * L_FRAME / 2, sub( s_min( Q_inp_12k8, hFrontVad->q_mem_decim ), Q_inp_12k8 ) ); + Q_inp_12k8 = s_min( hFrontVad->q_mem_decim, hFrontVad->q_buffer_12k8 ); + hFrontVad->q_mem_decim = s_min( hFrontVad->q_mem_decim, hFrontVad->q_buffer_12k8 ); analy_lp_ivas_fx( inp_12k8_fx, L_FRAME, L_LOOK_12k8, &res_energy_fx, A_fx, epsP_h, epsP_l, lsp_new_fx, lsp_mid_fx, st->lsp_old1_fx, alw_pitch_lag_12k8, alw_voicing_fx, INT_FS_12k8, 0 /* <-- sec_chan_low_rate */, Q_inp_12k8, Q_r ); @@ -934,11 +939,15 @@ ivas_error front_vad_spar_fx( fixedToFloat_arr( st->mem_decim2_fx, st->mem_decim2, Q9, 3 ); #endif - relE = Etot[0] - st->lp_speech; +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + Etot_fx[0] = float_to_fix16( Etot[0], Q8 ); +#endif + relE_fx = sub( Etot_fx[0], st->lp_speech_fx ); +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + relE = fixedToFloat( relE_fx, Q8 ); +#endif #ifdef IVAS_FLOAT_FIXED_CONVERSIONS - Q_inp_12k8 = Q9; // Q_factor_arr( inp_12k8-M, (3 * L_FRAME / 2)+M ); - floatToFixed_arr( inp_12k8 - M, inp_12k8_fx - M, Q_inp_12k8, ( 3 * L_FRAME / 2 ) + M ); #ifdef MSAN_FIX floatToFixed_arr( A, A_fx, Q12, ( L_FRAME / L_SUBFR ) * ( M + 1 ) ); #else @@ -962,7 +971,6 @@ ivas_error front_vad_spar_fx( old_pitch = st->pitch[1]; #ifdef IVAS_FLOAT_FIXED_CONVERSIONS - floatToFixed_arr( st->voicing, st->voicing_fx, Q15, 3 ); floatToFixed_arr( wsp, wsp_fx, Q8, 368 ); floatToFixed_arr( st->old_wsp2, st->old_wsp2_fx, Q8, 115 ); floatToFixed_arr( st->mem_decim2, st->mem_decim2_fx, Q8, 3 ); @@ -974,7 +982,6 @@ ivas_error front_vad_spar_fx( // pitch_ol( st->pitch, st->voicing, &st->old_pitch, &st->old_corr, corr_shift, &st->old_thres, &st->delta_pit, st->old_wsp2, wsp, st->mem_decim2, relE, L_LOOK_12k8, st->clas, st->input_bwidth, st->Opt_SC_VBR ); #ifdef IVAS_FLOAT_FIXED_CONVERSIONS - fixedToFloat_arr( st->voicing_fx, st->voicing, Q15, 3 ); fixedToFloat_arr( st->old_wsp2_fx, st->old_wsp2, Q8, 115 ); fixedToFloat_arr( st->mem_decim2_fx, st->mem_decim2, Q8, 3 ); #endif @@ -986,7 +993,6 @@ ivas_error front_vad_spar_fx( #else // StableHighPitchDetect( &flag_spitch, st->pitch, st->voicing, st->Bin_E, wsp, st->localVAD, &st->voicing_sm, &st->voicing0_sm, &st->LF_EnergyRatio_sm, &st->predecision_flag, &st->diff_sm, &st->energy_sm ); floatToFixed_arr( wsp, wsp_fx, Q9, 368 ); - floatToFixed_arr( st->voicing, st->voicing_fx, Q15, 3 ); floatToFixed_arr( st->Bin_E, st->lgBin_E_fx, Q7, 128 ); StableHighPitchDetect_ivas_fx( &flag_spitch, st->pitch, st->voicing_fx, wsp_fx, st->localVAD, &st->voicing_sm_fx, &st->voicing0_sm_fx, &st->LF_EnergyRatio_sm_fx, &st->predecision_flag, &st->diff_sm_fx, &st->energy_sm_fx, Q12, st->lgBin_E_fx ); fixedToFloat_arr( st->lgBin_E_fx, st->Bin_E, Q7, 128 ); @@ -1044,7 +1050,6 @@ ivas_error front_vad_spar_fx( #else floatToFixed_arr( A, A_fx, Q12, NB_SUBFR16k * ( M + 1 ) ); #endif // MSAN_FIX - floatToFixed_arr( st->voicing, st->voicing_fx, Q15, 3 ); vad_param_updt_fx( st, st->pitch[1], corr_shift_fx, corr_shift_fx, A_fx, &hFrontVad, 1 ); #endif /* 1st stage speech/music classification (GMM model) */ @@ -1059,7 +1064,6 @@ ivas_error front_vad_spar_fx( Word16 Etot_fx_0 = float_to_fix16( Etot[0], Q8 ); floatToFixed_arr( lsp_new, lsp_new_fx, Q15, M ); relE_fx = float_to_fix16( relE, 8 ); - floatToFixed_arr16( st->voicing, st->voicing_fx, 15, 3 ); Word16 Qfact_PS = Q_factor_arrL( PS, 128 ); floatToFixed_arr32( PS, PS_fx, Qfact_PS, 128 ); Word16 e_esp; diff --git a/lib_enc/ivas_masa_enc.c b/lib_enc/ivas_masa_enc.c index 0e544cd4d6b8b5bd63541cafe389a88c1a7b8dde..be113f8410fd8cf86cbd65a9aa9d705c1ac03bd4 100644 --- a/lib_enc/ivas_masa_enc.c +++ b/lib_enc/ivas_masa_enc.c @@ -8576,7 +8576,7 @@ static void ivas_encode_masaism_metadata_fx( } ELSE { - hOmasaData->masa_to_total_energy_ratio_fx[sf][0] = MAX_32; + hOmasaData->masa_to_total_energy_ratio_fx[sf][0] = ONE_IN_Q30; move32(); } } @@ -8652,7 +8652,7 @@ static void ivas_encode_masaism_metadata_fx( } ELSE { - hOmasaData->masa_to_total_energy_ratio_fx[0][band] = MAX_32; + hOmasaData->masa_to_total_energy_ratio_fx[0][band] = ONE_IN_Q30; move32(); } } diff --git a/lib_enc/ivas_mdct_core_enc.c b/lib_enc/ivas_mdct_core_enc.c index 8d98292b4391575651f9f0926c27179c7ec5abcc..07c6c721073f80175bb4b7c4000e96541a5f6d96 100644 --- a/lib_enc/ivas_mdct_core_enc.c +++ b/lib_enc/ivas_mdct_core_enc.c @@ -1600,22 +1600,22 @@ void enc_prm_igf_mdct( #ifdef IVAS_FLOAT_FIXED void ivas_mdct_core_whitening_enc( - CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ - float new_samples[CPE_CHANNELS][L_INP], /* i : new samples */ - float old_wsp[CPE_CHANNELS][L_WSP], /* i : 12.8kHz weighted speech (for LTP */ - float pitch_buf[CPE_CHANNELS][NB_SUBFR16k], /* o : floating pitch for each subframe */ - float *mdst_spectrum_long[CPE_CHANNELS], /* o : buffer for MDST spectrum */ - int16_t tnsBits[CPE_CHANNELS][NB_DIV], /* o : buffer TNS bits */ - float *orig_spectrum_long[CPE_CHANNELS], /* o : origingal spectrum w/o whitening */ - int16_t tnsSize[CPE_CHANNELS][NB_DIV], /* o : number of tns parameters put into prm */ - int16_t p_param[CPE_CHANNELS][NB_DIV], /* o : pointer to the parameter table */ - BSTR_ENC_HANDLE hBstr, /* i/o: encoder bitstream handle */ - const int16_t mct_on, /* i : flag mct block (1) or stereo (0) */ - const int16_t nChannels /* i : total number of coded channels */ + CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ + Word16 new_samples_fx[CPE_CHANNELS][L_INP], /* i : new samples */ + Word16 old_wsp_fx[CPE_CHANNELS][L_WSP], /* i : 12.8kHz weighted speech (for LTP */ + Word16 pitch_buf_fx[CPE_CHANNELS][NB_SUBFR16k], /* o : floating pitch for each subframe */ + float *mdst_spectrum_long[CPE_CHANNELS], /* o : buffer for MDST spectrum */ + int16_t tnsBits[CPE_CHANNELS][NB_DIV], /* o : buffer TNS bits */ + float *orig_spectrum_long[CPE_CHANNELS], /* o : origingal spectrum w/o whitening */ + int16_t tnsSize[CPE_CHANNELS][NB_DIV], /* o : number of tns parameters put into prm */ + int16_t p_param[CPE_CHANNELS][NB_DIV], /* o : pointer to the parameter table */ + BSTR_ENC_HANDLE hBstr, /* i/o: encoder bitstream handle */ + const int16_t mct_on, /* i : flag mct block (1) or stereo (0) */ + const int16_t nChannels /* i : total number of coded channels */ ) { int16_t n, ch, nSubframes, L_subframe, L_subframeTCX, tcx_subframe_coded_lines; - float A_q[CPE_CHANNELS][NB_DIV][M + 1]; + Word16 A_q_fx[CPE_CHANNELS][NB_DIV][M + 1]; int16_t sns_vq_indices[CPE_CHANNELS * NB_DIV * SNS_MSVQ_NSTAGES_TCX10]; int16_t sns_stereo_mode[NB_DIV]; int16_t idx; @@ -1624,8 +1624,8 @@ void ivas_mdct_core_whitening_enc( int16_t ltpBits[CPE_CHANNELS]; int16_t i, T_op[CPE_CHANNELS][3]; float *orig_spectrum[CPE_CHANNELS][NB_DIV]; /* Pointers to MDCT output for a short block (L/R) */ - float temp_buffer[15 * L_FRAME48k / 8]; - float *windowedSignal[CPE_CHANNELS]; + Word32 temp_buffer[15 * L_FRAME48k / 8]; + Word32 *windowedSignal_fx[CPE_CHANNELS]; float *powerSpec = orig_spectrum_long[0]; #ifdef IVAS_FLOAT_FIXED Word32 powerSpec_fx[N_MAX]; @@ -1637,10 +1637,7 @@ void ivas_mdct_core_whitening_enc( Word16 nrg_fx; /* Q15 */ #endif Encoder_State *st, **sts; - float scf[CPE_CHANNELS][NB_DIV][M]; - float scf_q[CPE_CHANNELS][NB_DIV][M]; #ifdef IVAS_FLOAT_FIXED - Word16 old_wsp_fx[CPE_CHANNELS][L_WSP]; Word16 q_fac; Word32 scf_fx[CPE_CHANNELS][NB_DIV][M]; Word32 scf_q_fx[CPE_CHANNELS][NB_DIV][M]; @@ -1669,8 +1666,7 @@ void ivas_mdct_core_whitening_enc( #endif #ifdef IVAS_FLOAT_FIXED_CONVERSIONS Word16 Q_new; - Word16 new_samples_fx[CPE_CHANNELS][L_INP]; - Word32 *windowedSignal_fx[CPE_CHANNELS]; + Word16 q_windowedSignal[CPE_CHANNELS]; Word32 L_tmpbuf[NB_DIV * L_FRAME48k + 4], L_tmpbuf1[NB_DIV * L_FRAME48k + 4]; #endif @@ -1679,9 +1675,10 @@ void ivas_mdct_core_whitening_enc( #ifdef MSAN_FIX FOR( ch = 0; ch < CPE_CHANNELS; ch++ ) { - set_zero( A_q[ch][0], M + 1 ); - set_zero( A_q[ch][1], M + 1 ); + set16_fx( A_q_fx[ch][0], 0, M + 1 ); + set16_fx( A_q_fx[ch][1], 0, M + 1 ); } + set16_fx( q_windowedSignal, 0, CPE_CHANNELS ); #endif /*--------------------------------------------------------------* @@ -1724,21 +1721,14 @@ void ivas_mdct_core_whitening_enc( set32_fx( mdst_spectrum_long_fx[ch], 0, 1920 ); mdst_spectrum_fx[ch][0] = mdst_spectrum_long_fx[ch]; mdst_spectrum_fx[ch][1] = mdst_spectrum_long_fx[ch] + N_TCX10_MAX; - set_f( scf[ch][0], 0.0, M ); - set_f( scf[ch][1], 0.0, M ); #ifdef MSAN_FIX - set_f( temp_buffer, 0.0, 15 * L_FRAME48k / 8 ); + set32_fx( temp_buffer, 0, 15 * L_FRAME48k / 8 ); #endif #endif } - windowedSignal[0] = orig_spectrum_long[0]; /* NOTE temporarily available */ - windowedSignal[1] = temp_buffer; /* orig_spectrum_long isn't long enough */ -#ifdef IVAS_FLOAT_FIXED windowedSignal_fx[0] = L_tmpbuf; windowedSignal_fx[1] = L_tmpbuf1; -#endif - /*--------------------------------------------------------------* * TCX20/TCX10 switching decision *---------------------------------------------------------------*/ @@ -1790,72 +1780,21 @@ void ivas_mdct_core_whitening_enc( /* tcx ltp analysis on the 12.8kHz weighted speech, saves preproc resampling to sr_core */ #ifdef IVAS_FLOAT_FIXED -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS TCX_ENC_HANDLE hTcxEnc = st->hTcxEnc; - q_fac = Q_factor_arr( old_wsp[ch], L_WSP ); - floatToFixed_arr( old_wsp[ch], old_wsp_fx[ch], q_fac, L_WSP ); -#endif tcx_ltp_encode_ivas_fx( st, st->hTcxEnc->tcxMode, L_FRAME, old_wsp_fx[ch] + L_WSP_MEM + L_LOOK_12k8, NULL, old_wsp_fx[ch] + L_WSP_MEM + L_LOOK_12k8, T_op[ch], ¶m_core[ch][1 + NOISE_FILL_RANGES], <pBits[ch], NULL, 0, IVAS_CPE_MDCT ); - -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS - fixedToFloat_arr( old_wsp_fx[ch], old_wsp[ch], q_fac, L_WSP ); -#endif #else tcx_ltp_encode( st, st->hTcxEnc->tcxMode, L_FRAME, old_wsp[ch] + L_WSP_MEM + L_LOOK_12k8, NULL, old_wsp[ch] + L_WSP_MEM + L_LOOK_12k8, T_op[ch], ¶m_core[ch][1 + NOISE_FILL_RANGES], <pBits[ch], NULL, 0, IVAS_CPE_MDCT ); #endif #ifdef IVAS_FLOAT_FIXED #ifdef IVAS_FLOAT_FIXED_CONVERSIONS - floatToFixed_arr( new_samples[ch], new_samples_fx[ch], 0, L_INP ); - Word16 leftOverlap = 0, rightOverlap = 0; - PWord16 const *left_win; - PWord16 const *right_win; - Word16 len_windowSignal; - Word16 overlap_mode[3]; - Word16 transform_type[2]; - overlap_mode[0] = st->hTcxCfg->tcx_last_overlap_mode; /* Overlap between the last and the current frame */ IF( EQ_16( hTcxEnc->tcxMode, TCX_20 ) ) { nSubframes = 1; - transform_type[0] = TCX_20; - transform_type[1] = TCX_20; - overlap_mode[1] = st->hTcxCfg->tcx_curr_overlap_mode; /* Overlap between the current and the next frame */ } ELSE { nSubframes = 2; - IF( st->hTcxCfg->tcx_curr_overlap_mode == FULL_OVERLAP ) - { - transform_type[0] = TCX_5; - transform_type[1] = TCX_10; - overlap_mode[1] = MIN_OVERLAP; /* Overlap between 2nd and 3rd sub-frame */ - if ( EQ_16( st->hTcxCfg->tcx_last_overlap_mode, HALF_OVERLAP ) ) - { - overlap_mode[1] = HALF_OVERLAP; - } - } - ELSE IF( st->hTcxCfg->tcx_last_overlap_mode == FULL_OVERLAP ) - { - transform_type[0] = TCX_10; - transform_type[1] = TCX_5; - overlap_mode[1] = MIN_OVERLAP; /* Overlap between 1st and 2nd sub-frame */ - if ( EQ_16( st->hTcxCfg->tcx_curr_overlap_mode, HALF_OVERLAP ) ) - { - overlap_mode[1] = HALF_OVERLAP; - } - } - ELSE - { - transform_type[0] = transform_type[1] = TCX_5; - overlap_mode[1] = MIN_OVERLAP; /* Overlap between 2nd and 3rd sub-frame */ - if ( EQ_16( st->hTcxCfg->tcx_last_overlap_mode, HALF_OVERLAP ) && EQ_16( st->hTcxCfg->tcx_curr_overlap_mode, HALF_OVERLAP ) ) - { - overlap_mode[1] = HALF_OVERLAP; - move16(); - } - } - overlap_mode[2] = st->hTcxCfg->tcx_curr_overlap_mode; /* Overlap between the current and the next frame */ - move16(); } #endif #ifdef IVAS_FLOAT_FIXED_CONVERSIONS_ @@ -1923,6 +1862,7 @@ void ivas_mdct_core_whitening_enc( } #endif #ifdef IVAS_FLOAT_FIXED_CONVERSIONS + if ( st->element_mode == IVAS_CPE_DFT ) { q_fac = Q_factor_arr( st->buf_wspeech_enc_flt, L_FRAME16k + L_SUBFR + L_FRAME16k + L_NEXT_MAX_16k + 320 ); @@ -1938,21 +1878,11 @@ void ivas_mdct_core_whitening_enc( st->hTcxEnc->exp_buf_speech_ltp = 15 - q_fac; } - Word16 q_spectrum = L_get_q_buf1( hTcxEnc->spectrum[0], st->hTcxEnc->L_frameTCX / nSubframes ); - hTcxEnc->spectrum_e[0] = 31 - q_spectrum; - floatToFixed_arrL32( hTcxEnc->spectrum[0], hTcxEnc->spectrum_fx[0], sub( Q31, hTcxEnc->spectrum_e[0] ), st->hTcxEnc->L_frameTCX / nSubframes ); - IF( hTcxEnc->tcxMode != TCX_20 ) - { - q_spectrum = L_get_q_buf1( hTcxEnc->spectrum[1], st->hTcxEnc->L_frameTCX / nSubframes ); - hTcxEnc->spectrum_e[1] = 31 - q_spectrum; - floatToFixed_arrL32( hTcxEnc->spectrum[1], hTcxEnc->spectrum_fx[1], sub( Q31, hTcxEnc->spectrum_e[1] ), st->hTcxEnc->L_frameTCX / nSubframes ); - } - floatToFixed_arr( st->input_buff, st->input_buff_fx, 0, L_FRAME48k + L_FRAME48k + NS2SA( 48000, DELAY_FIR_RESAMPL_NS ) ); Q_new = 0; #endif - core_signal_analysis_high_bitrate_ivas_fx( new_samples_fx[ch] + L_INP_MEM, T_op[ch], NULL, NULL, st, tnsSize[ch], tnsBits[ch], param_core[ch], <pBits[ch], windowedSignal_fx[ch], st->L_frame, st->hTcxEnc->L_frameTCX, hCPE->last_element_mode, 0, mdst_spectrum_fx[ch], mdst_spectrum_e[ch], &Q_new ); + core_signal_analysis_high_bitrate_ivas_fx( new_samples_fx[ch] + L_INP_MEM, T_op[ch], NULL, NULL, st, tnsSize[ch], tnsBits[ch], param_core[ch], <pBits[ch], windowedSignal_fx[ch], st->L_frame, st->hTcxEnc->L_frameTCX, hCPE->last_element_mode, 0, mdst_spectrum_fx[ch], mdst_spectrum_e[ch], &Q_new, &q_windowedSignal[ch] ); #ifdef IVAS_FLOAT_FIXED_CONVERSIONS fixedToFloat_arr( st->input_buff_fx, st->input_buff, 0, L_FRAME48k + L_FRAME48k + NS2SA( 48000, DELAY_FIR_RESAMPL_NS ) ); @@ -1987,35 +1917,6 @@ void ivas_mdct_core_whitening_enc( { fixedToFloat_arrL32( mdst_spectrum_fx[ch][1], mdst_spectrum[ch][1], sub( Q31, mdst_spectrum_e[ch][1] ), st->hTcxEnc->L_frameTCX / nSubframes ); } - IF( ( EQ_16( transform_type[0], TCX_20 ) ) && ( NE_16( st->hTcxCfg->tcx_last_overlap_mode, TRANSITION_OVERLAP ) ) ) - { - windowedSignal[ch][0] = (float) windowedSignal_fx[ch][0]; - windowedSignal[ch][1] = (float) windowedSignal_fx[ch][1]; - } - ELSE - { - tcx_get_windows( st->hTcxCfg, overlap_mode[0], overlap_mode[1], &leftOverlap, &left_win, &rightOverlap, &right_win, 1 ); - len_windowSignal = ( st->hTcxEnc->L_frameTCX / nSubframes ) + ( leftOverlap + rightOverlap ) / 2; - windowedSignal[ch][0] = (float) windowedSignal_fx[ch][0]; - windowedSignal[ch][1] = (float) windowedSignal_fx[ch][1]; - fixedToFloat_arrL32( windowedSignal_fx[ch] + 2, windowedSignal[ch] + 2, 0, len_windowSignal ); - } - IF( NE_16( hTcxEnc->tcxMode, TCX_20 ) ) - { - IF( ( EQ_16( transform_type[1], TCX_20 ) ) && ( NE_16( st->hTcxCfg->tcx_last_overlap_mode, TRANSITION_OVERLAP ) ) ) - { - windowedSignal[ch][L_FRAME48k + 0] = (float) windowedSignal_fx[ch][L_FRAME_MAX + 0]; - windowedSignal[ch][L_FRAME48k + 1] = (float) windowedSignal_fx[ch][L_FRAME_MAX + 1]; - } - ELSE - { - tcx_get_windows( st->hTcxCfg, overlap_mode[1], overlap_mode[2], &leftOverlap, &left_win, &rightOverlap, &right_win, 1 ); - len_windowSignal = ( st->hTcxEnc->L_frameTCX / nSubframes ) + ( leftOverlap + rightOverlap ) / 2; - windowedSignal[ch][L_FRAME48k + 0] = (float) windowedSignal_fx[ch][L_FRAME_MAX + 0]; - windowedSignal[ch][L_FRAME48k + 1] = (float) windowedSignal_fx[ch][L_FRAME_MAX + 1]; - fixedToFloat_arrL32( windowedSignal_fx[ch] + L_FRAME48k + 2, windowedSignal[ch] + L_FRAME48k + 2, 0, len_windowSignal ); - } - } #endif #else core_signal_analysis_high_bitrate( new_samples[ch] + L_INP_MEM, T_op[ch], NULL, NULL, st, mdst_spectrum[ch], tnsSize[ch], tnsBits[ch], param_core[ch], <pBits[ch], windowedSignal[ch], st->L_frame, st->hTcxEnc->L_frameTCX, hCPE->last_element_mode, 0 ); @@ -2039,8 +1940,8 @@ void ivas_mdct_core_whitening_enc( #else #ifdef IVAS_FLOAT_FIXED_CONVERSIONS #ifdef MSAN_FIX - q_spectrum = Q_factor_arr( st->hTcxEnc->spectrum[n], st->hTcxEnc->L_frameTCX / ( n + 1 ) ); - floatToFixed_arr( st->hTcxEnc->spectrum[n], spect_fx[n], q_spectrum, st->hTcxEnc->L_frameTCX / ( n + 1 ) ); + Word16 q_spectrum = L_norm_arr( st->hTcxEnc->spectrum_fx[n], st->hTcxEnc->L_frameTCX / ( n + 1 ) ); + Copy_Scale_sig32_16( st->hTcxEnc->spectrum_fx[n], spect_fx[n], st->hTcxEnc->L_frameTCX / ( n + 1 ), q_spectrum ); #else Word16 l_frame = (int16_t) ( st->input_Fs / FRAMES_PER_SEC ); q_spectrum = Q_factor_arr( st->hTcxEnc->spectrum[n], l_frame ); @@ -2061,11 +1962,7 @@ void ivas_mdct_core_whitening_enc( #endif if ( st->last_core == ACELP_CORE ) /* reset past kernel info */ { -#ifndef IVAS_FLOAT_FIXED - st->hTcxEnc->kernel_switch_corr_past_flt = 0.f; -#else st->hTcxEnc->kernel_switch_corr_past = 0; -#endif st->hTcxEnc->kernel_symmetry_past = 0; } } @@ -2087,23 +1984,15 @@ void ivas_mdct_core_whitening_enc( hTcxEnc0 = sts[0]->hTcxEnc; hTcxEnc1 = sts[1]->hTcxEnc; -#ifdef IVAS_FLOAT_FIXED init_tcx_enc_info_fx( sts[0], &L_subframe, &L_subframeTCX, &tcx_subframe_coded_lines ); -#else - init_tcx_enc_info( sts[0], &L_subframe, &L_subframeTCX, &tcx_subframe_coded_lines ); -#endif if ( nSampCore == 0 ) { nSampCore = tcx_subframe_coded_lines; } -#ifndef IVAS_FLOAT_FIXED - nrg = 0.25f * ( hTcxEnc0->tcxltp_norm_corr_past_flt + hTcxEnc0->tcxltp_norm_corr_mem_flt + /* tcxltp_norm_corr_past already contains the */ - hTcxEnc1->tcxltp_norm_corr_past_flt + hTcxEnc1->tcxltp_norm_corr_mem_flt ); /* normalized correlation of the current frame */ -#else nrg_fx = extract_l( L_shr( L_add( L_add( L_add( hTcxEnc0->tcxltp_norm_corr_past, hTcxEnc0->tcxltp_norm_corr_mem ), hTcxEnc1->tcxltp_norm_corr_past ), hTcxEnc1->tcxltp_norm_corr_mem ), 2 ) ); /* normalized correlation of the current frame */ -#endif + L_subframe = max( 512, L_subframe ); nSubframes = ( hTcxEnc0->tcxMode == TCX_20 ) ? 1 : NB_DIV; @@ -2128,7 +2017,7 @@ void ivas_mdct_core_whitening_enc( kernel_switch_detect_fx( hTcxEnc0->spectrum_fx[n], hTcxEnc1->spectrum_fx[n], mdst_spectrum_fx[0][n], mdst_spectrum_fx[1][n], q_com, nSampCore / nSubframes, L_subframeTCX / nSubframes, hTcxEnc0->transform_type[n], &hTcxEnc0->kernel_switch_corr_past, ( totalRate * L_subframe ) / nSubframes ); #else - const int16_t switchKernel = /* these 4 transform types can be applied: 0 = MDCT-IV, 1 = MDST-II, 2 = MDCT-II, 3 = MDST-IV */ + const int16_t switchKernel = /* these 4 transform types can be applied: 0 = MDCT-IV, 1 = MDST-II, 2 = MDCT-II, 3 = MDST-IV */ kernel_switch_detect( hTcxEnc0->spectrum[n], hTcxEnc1->spectrum[n], mdst_spectrum[0][n], mdst_spectrum[1][n], nSampCore / nSubframes, L_subframeTCX / nSubframes, hTcxEnc0->transform_type[n], &hTcxEnc0->kernel_switch_corr_past_flt, ( totalRate * L_subframe ) / nSubframes ); #endif @@ -2145,11 +2034,7 @@ void ivas_mdct_core_whitening_enc( } else { -#ifdef IVAS_FLOAT_FIXED hTcxEnc0->kernel_switch_corr_past = 0; /* don't update the kernel switching state, postpone it to when data is available */ -#else - hTcxEnc0->kernel_switch_corr_past_flt = 0.f; /* don't update the kernel switching state, postpone it to when data is available */ -#endif hTcxEnc0->kernel_type[n] = ( hTcxEnc0->kernel_symmetry_past ? 3 : 0 ); hTcxEnc1->kernel_type[n] = ( hTcxEnc1->kernel_symmetry_past ? 3 : 0 ); } @@ -2176,24 +2061,9 @@ void ivas_mdct_core_whitening_enc( 0, sts[0]->hTcxCfg->tcx_mdct_window_min_lengthFB ); } - - Word16 leftOverlap = 0, rightOverlap = 0; - PWord16 const *left_win; - PWord16 const *right_win; - Word16 q_windowedSignal = 0, len_windowSignal; - - windowedSignal_fx[0][0] = ( Word16 ) * ( windowedSignal[0] + n * L_FRAME48k ); - windowedSignal_fx[0][1] = ( Word16 ) * ( windowedSignal[0] + n * L_FRAME48k + 1 ); - if ( ( hTcxEnc0->transform_type[n] == TCX_5 ) || ( n == 0 /* speech_TCX != NULL*/ && NE_16( hTcxEnc0->transform_type[n], TCX_20 ) && windowedSignal_fx[0][0] == FULL_OVERLAP && GT_16( sub( L_subframeTCX / nSubframes, leftOverlap ), sts[0]->hTcxCfg->tcx_mdct_window_min_lengthFB - 1 ) ) ) - { - tcx_get_windows( sts[0]->hTcxCfg, extract_l( windowedSignal_fx[0][0] ), extract_l( windowedSignal_fx[0][1] ), &leftOverlap, &left_win, &rightOverlap, &right_win, 1 ); - len_windowSignal = ( L_subframeTCX / nSubframes ) + ( leftOverlap + rightOverlap ) / 2; - q_windowedSignal = Q_factor_arrL( windowedSignal[0] + n * L_FRAME48k + 2, len_windowSignal ) - 1; - floatToFixed_arrL32( windowedSignal[0] + n * L_FRAME48k + 2, windowedSignal_fx[0] + n * L_FRAME48k + 2, q_windowedSignal, len_windowSignal ); - } #endif kernel_switch_update_transforms_fx( hTcxEnc0->spectrum_fx[n], mdst_spectrum_fx[0][n], &q_com, hTcxEnc0->transform_type[n], sts[0]->hTcxCfg, sts[0]->bwidth_sw_cnt, hTcxEnc0->kernel_type[n], - hTcxEnc0->new_speech_TCX, ( n == 1 ? NULL : hTcxEnc0->speech_TCX ), windowedSignal_fx[0] + n * L_FRAME48k, &q_windowedSignal, L_subframeTCX / nSubframes ); + hTcxEnc0->new_speech_TCX, ( n == 1 ? NULL : hTcxEnc0->speech_TCX ), windowedSignal_fx[0] + n * L_FRAME48k, &q_windowedSignal[0], L_subframeTCX / nSubframes ); #ifdef IVAS_FLOAT_FIXED_CONVERSIONS fixedToFloat_arrL32( hTcxEnc0->spectrum_fx[n], hTcxEnc0->spectrum[n], q_com, hTcxEnc0->L_frameTCX / nSubframes ); fixedToFloat_arrL32( mdst_spectrum_fx[0][n], mdst_spectrum[0][n], q_com, hTcxEnc0->L_frameTCX / nSubframes ); @@ -2222,19 +2092,9 @@ void ivas_mdct_core_whitening_enc( 0, sts[1]->hTcxCfg->tcx_mdct_window_min_lengthFB ); } - - windowedSignal_fx[1][0] = ( Word16 ) * ( windowedSignal[1] + n * L_FRAME48k ); - windowedSignal_fx[1][1] = ( Word16 ) * ( windowedSignal[1] + n * L_FRAME48k + 1 ); - if ( ( hTcxEnc1->transform_type[n] == TCX_5 ) || ( n == 0 /* speech_TCX != NULL*/ && NE_16( hTcxEnc1->transform_type[n], TCX_20 ) && windowedSignal_fx[1][0] == FULL_OVERLAP && GT_16( sub( L_subframeTCX / nSubframes, leftOverlap ), sts[1]->hTcxCfg->tcx_mdct_window_min_lengthFB - 1 ) ) ) - { - tcx_get_windows( sts[1]->hTcxCfg, extract_l( windowedSignal_fx[1][0] ), extract_l( windowedSignal_fx[1][1] ), &leftOverlap, &left_win, &rightOverlap, &right_win, 1 ); - len_windowSignal = ( L_subframeTCX / nSubframes ) + ( leftOverlap + rightOverlap ) / 2; - q_windowedSignal = Q_factor_arrL( windowedSignal[1] + n * L_FRAME48k + 2, len_windowSignal ) - 1; - floatToFixed_arrL32( windowedSignal[1] + n * L_FRAME48k + 2, windowedSignal_fx[1] + n * L_FRAME48k + 2, q_windowedSignal, len_windowSignal ); - } #endif kernel_switch_update_transforms_fx( hTcxEnc1->spectrum_fx[n], mdst_spectrum_fx[1][n], &q_com, hTcxEnc1->transform_type[n], sts[1]->hTcxCfg, sts[1]->bwidth_sw_cnt, hTcxEnc1->kernel_type[n], - hTcxEnc1->new_speech_TCX, ( n == 1 ? NULL : hTcxEnc1->speech_TCX ), windowedSignal_fx[1] + n * L_FRAME48k, &q_windowedSignal, L_subframeTCX / nSubframes ); + hTcxEnc1->new_speech_TCX, ( n == 1 ? NULL : hTcxEnc1->speech_TCX ), windowedSignal_fx[1] + n * L_FRAME48k, &q_windowedSignal[1], L_subframeTCX / nSubframes ); #ifdef IVAS_FLOAT_FIXED_CONVERSIONS fixedToFloat_arrL32( hTcxEnc1->spectrum_fx[n], hTcxEnc1->spectrum[n], q_com, hTcxEnc1->L_frameTCX / nSubframes ); fixedToFloat_arrL32( mdst_spectrum_fx[1][n], mdst_spectrum[1][n], q_com, hTcxEnc1->L_frameTCX / nSubframes ); @@ -2363,11 +2223,7 @@ void ivas_mdct_core_whitening_enc( else { sts[0]->hTcxEnc->enc_ste_pre_corr_past = 0; -#ifdef IVAS_FLOAT_FIXED sts[0]->hTcxEnc->kernel_switch_corr_past = 0; -#else - sts[0]->hTcxEnc->kernel_switch_corr_past_flt = 0.f; -#endif for ( ch = 0; ch < CPE_CHANNELS; ch++ ) { @@ -2380,11 +2236,8 @@ void ivas_mdct_core_whitening_enc( continue; } -#ifdef IVAS_FLOAT_FIXED init_tcx_enc_info_fx( sts[ch], &L_subframe, &L_subframeTCX, &tcx_subframe_coded_lines ); -#else - init_tcx_enc_info( sts[ch], &L_subframe, &L_subframeTCX, &tcx_subframe_coded_lines ); -#endif + nSubframes = ( hTcxEncCh->tcxMode == TCX_20 ) ? 1 : NB_DIV; for ( n = 0; n < nSubframes; n++ ) @@ -2412,24 +2265,9 @@ void ivas_mdct_core_whitening_enc( 0, sts[ch]->hTcxCfg->tcx_mdct_window_min_lengthFB ); } - - Word16 leftOverlap = 0, rightOverlap = 0; - PWord16 const *left_win; - PWord16 const *right_win; - Word16 q_windowedSignal, len_windowSignal; - - windowedSignal_fx[ch][0] = ( Word16 ) * ( windowedSignal[ch] + n * L_FRAME48k ); - windowedSignal_fx[ch][1] = ( Word16 ) * ( windowedSignal[ch] + n * L_FRAME48k + 1 ); - if ( ( hTcxEncCh->transform_type[n] == TCX_5 ) || ( n == 0 /* speech_TCX != NULL*/ && NE_16( hTcxEncCh->transform_type[n], TCX_20 ) && windowedSignal_fx[ch][0] == FULL_OVERLAP && GT_16( sub( L_subframeTCX / nSubframes, leftOverlap ), sts[ch]->hTcxCfg->tcx_mdct_window_min_lengthFB - 1 ) ) ) - { - tcx_get_windows( sts[ch]->hTcxCfg, extract_l( windowedSignal_fx[ch][0] ), extract_l( windowedSignal_fx[ch][1] ), &leftOverlap, &left_win, &rightOverlap, &right_win, 1 ); - len_windowSignal = ( L_subframeTCX / nSubframes ) + ( leftOverlap + rightOverlap ) / 2; - q_windowedSignal = Q_factor_arrL( windowedSignal[ch] + n * L_FRAME48k + 2, len_windowSignal ) - 1; - floatToFixed_arrL32( windowedSignal[ch] + n * L_FRAME48k + 2, windowedSignal_fx[ch] + n * L_FRAME48k + 2, q_windowedSignal, len_windowSignal ); - } #endif kernel_switch_update_transforms_fx( hTcxEncCh->spectrum_fx[n], mdst_spectrum_fx[ch][n], &q_com, hTcxEncCh->transform_type[n], sts[ch]->hTcxCfg, sts[ch]->bwidth_sw_cnt, hTcxEncCh->kernel_type[n], - hTcxEncCh->new_speech_TCX, ( n /*1*/ ? NULL : hTcxEncCh->speech_TCX ), windowedSignal_fx[ch] + n * L_FRAME48k, &q_windowedSignal, L_subframeTCX / nSubframes ); + hTcxEncCh->new_speech_TCX, ( n /*1*/ ? NULL : hTcxEncCh->speech_TCX ), windowedSignal_fx[ch] + n * L_FRAME48k, &q_windowedSignal[ch], L_subframeTCX / nSubframes ); #ifdef IVAS_FLOAT_FIXED_CONVERSIONS fixedToFloat_arrL32( hTcxEncCh->spectrum_fx[n], hTcxEncCh->spectrum[n], q_com, hTcxEncCh->L_frameTCX / nSubframes ); fixedToFloat_arrL32( mdst_spectrum_fx[ch][n], mdst_spectrum[ch][n], q_com, hTcxEncCh->L_frameTCX / nSubframes ); @@ -2501,11 +2339,7 @@ void ivas_mdct_core_whitening_enc( set_zero( chE, NB_DIV ); } -#ifdef IVAS_FLOAT_FIXED init_tcx_enc_info_fx( st, &L_subframe, &L_subframeTCX, &tcx_subframe_coded_lines ); -#else - init_tcx_enc_info( st, &L_subframe, &L_subframeTCX, &tcx_subframe_coded_lines ); -#endif L_subframe = L_subframe / nSubframes; L_subframeTCX = ( mct_on ? L_subframeTCX / nSubframes : L_subframe ); @@ -2536,13 +2370,10 @@ void ivas_mdct_core_whitening_enc( sns_compute_scf( powerSpec, st->hTcxCfg->psychParamsCurrent, st->L_frame, scf[ch][n] ); #else #ifdef IVAS_FLOAT_FIXED_CONVERSIONS - Word16 q = L_get_q_buf1( powerSpec, st->L_frame ); - floatToFixed_arrL32( powerSpec, powerSpec_fx, q, st->L_frame ); + Word16 q = L_get_q_buf1( powerSpec, L_subframeTCX ); + floatToFixed_arrL32( powerSpec, powerSpec_fx, q, L_subframeTCX ); #endif sns_compute_scf_fx( powerSpec_fx, st->hTcxCfg->psychParamsCurrent, st->L_frame, scf_fx[ch][n], q ); -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS - fixedToFloat_arrL32( scf_fx[ch][n], scf[ch][n], Q16, SNS_NPTS ); -#endif #endif } @@ -2575,53 +2406,14 @@ void ivas_mdct_core_whitening_enc( if ( !mct_on && sts[0]->sr_core == 25600 && ( ( hCPE->element_brate == IVAS_48k || hCPE->element_brate == IVAS_64k ) ) ) { -#ifdef IVAS_FLOAT_FIXED -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS - Word16 scf_e = L_get_q_buf1( scf[0][0], M ); - scf_e = min( scf_e, L_get_q_buf1( scf[0][1], M ) ); - scf_e = min( scf_e, L_get_q_buf1( scf[1][0], M ) ); - scf_e = min( scf_e, L_get_q_buf1( scf[1][1], M ) ); - scf_e = 31 - ( scf_e - 4 ); - - floatToFixed_arrL32( scf[0][0], scf_fx[0][0], ( Q31 - scf_e ), M ); - floatToFixed_arrL32( scf[0][1], scf_fx[0][1], ( Q31 - scf_e ), M ); - floatToFixed_arrL32( scf[1][0], scf_fx[1][0], ( Q31 - scf_e ), M ); - floatToFixed_arrL32( scf[1][1], scf_fx[1][1], ( Q31 - scf_e ), M ); - -#endif - quantize_sns_fx( scf_fx, scf_q_fx, &scf_e, sts, sns_vq_indices, zero_side_flag, sns_stereo_mode ); - -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS - fixedToFloat_arrL32( scf_q_fx[0][0], scf_q[0][0], ( Q31 - scf_e ), M ); - fixedToFloat_arrL32( scf_q_fx[0][1], scf_q[0][1], ( Q31 - scf_e ), M ); - fixedToFloat_arrL32( scf_q_fx[1][0], scf_q[1][0], ( Q31 - scf_e ), M ); - fixedToFloat_arrL32( scf_q_fx[1][1], scf_q[1][1], ( Q31 - scf_e ), M ); - -#endif - -#else - quantize_sns( scf, scf_q, sts, sns_vq_indices, zero_side_flag, sns_stereo_mode ); -#endif + quantize_sns_fx( scf_fx, scf_q_fx, Q15, sts, sns_vq_indices, zero_side_flag, sns_stereo_mode ); } else { if ( sts[0]->hTcxEnc->tcxMode == TCX_20 && sts[1]->hTcxEnc->tcxMode == TCX_20 && sts[0]->mct_chan_mode == MCT_CHAN_MODE_REGULAR && sts[1]->mct_chan_mode == MCT_CHAN_MODE_REGULAR ) { -#ifdef IVAS_FLOAT_FIXED - /*=================flt-2-fix==============*/ - Word16 exp_snl = 0, exp_snr = 0; - f2me_buf( scf[0][0], scf_fx[0][0], &exp_snl, M ); - f2me_buf( scf[1][0], scf_fx[1][0], &exp_snr, M ); - /*=================flt-2-fix==============*/ - sns_avq_cod_stereo_fx( scf_fx[0][0], exp_snl, scf_fx[1][0], exp_snr, sts[0]->L_frame, scf_q_fx[0][0], scf_q_fx[1][0], param_lpc[0], param_lpc[1] ); - /*===============fix-2-flt==========================*/ - fixedToFloat_arrL( scf_q_fx[0][0], scf_q[0][0], Q16, M ); - fixedToFloat_arrL( scf_q_fx[1][0], scf_q[1][0], Q16, M ); - /*===============fix-2-flt==========================*/ -#else - sns_avq_cod_stereo( scf[0][0], scf[1][0], sts[0]->L_frame, scf_q[0][0], scf_q[1][0], param_lpc[0], param_lpc[1] ); -#endif + sns_avq_cod_stereo_fx( scf_fx[0][0], Q15, scf_fx[1][0], Q15, sts[0]->L_frame, scf_q_fx[0][0], scf_q_fx[1][0], param_lpc[0], param_lpc[1] ); } else { @@ -2633,40 +2425,14 @@ void ivas_mdct_core_whitening_enc( continue; } st = sts[ch]; -#ifdef IVAS_FLOAT_FIXED - Word16 exp_scf_1 = 0, exp_scf_0 = 0; IF( st->hTcxEnc->tcxMode == TCX_20 ) { - /*===============flt-2-fix==========================*/ - f2me_buf( scf[ch][0], scf_fx[ch][0], &exp_scf_0, M ); - /*===============flt-2-fix==========================*/ - sns_avq_cod_fx( scf_fx[ch][0], exp_scf_0, NULL, 0, scf_q_fx[ch][0], NULL, ¶m_lpc[ch][1], st->hTcxEnc->tcxMode, st->L_frame, sns_low_br_mode ); - /*===============fix-2-flt==========================*/ - fixedToFloat_arrL( scf_q_fx[ch][0], scf_q[ch][0], Q16, M ); - /*===============fix-2-flt==========================*/ + sns_avq_cod_fx( scf_fx[ch][0], Q15, NULL, 0, scf_q_fx[ch][0], NULL, ¶m_lpc[ch][1], st->hTcxEnc->tcxMode, st->L_frame, sns_low_br_mode ); } ELSE { - /*===============flt-2-fix==========================*/ - f2me_buf( scf[ch][1], scf_fx[ch][1], &exp_scf_1, M ); - f2me_buf( scf[ch][0], scf_fx[ch][0], &exp_scf_0, M ); - /*===============flt-2-fix==========================*/ - sns_avq_cod_fx( scf_fx[ch][1], exp_scf_1, scf_fx[ch][0], exp_scf_0, scf_q_fx[ch][1], scf_q_fx[ch][0], ¶m_lpc[ch][1], st->hTcxEnc->tcxMode, st->L_frame, sns_low_br_mode ); - /*===============fix-2-flt==========================*/ - fixedToFloat_arrL( scf_q_fx[ch][0], scf_q[ch][0], Q16, M ); - fixedToFloat_arrL( scf_q_fx[ch][1], scf_q[ch][1], Q16, M ); - /*===============fix-2-flt==========================*/ - } -#else - if ( st->hTcxEnc->tcxMode == TCX_20 ) - { - sns_avq_cod( scf[ch][0], NULL, scf_q[ch][0], NULL, ¶m_lpc[ch][1], st->hTcxEnc->tcxMode, st->L_frame, sns_low_br_mode ); - } - else - { - sns_avq_cod( scf[ch][1], scf[ch][0], scf_q[ch][1], scf_q[ch][0], ¶m_lpc[ch][1], st->hTcxEnc->tcxMode, st->L_frame, sns_low_br_mode ); + sns_avq_cod_fx( scf_fx[ch][1], Q15, scf_fx[ch][0], Q15, scf_q_fx[ch][1], scf_q_fx[ch][0], ¶m_lpc[ch][1], st->hTcxEnc->tcxMode, st->L_frame, sns_low_br_mode ); } -#endif } } } @@ -2679,11 +2445,7 @@ void ivas_mdct_core_whitening_enc( } st = sts[ch]; nSubframes = ( st->hTcxEnc->tcxMode == TCX_20 ) ? 1 : NB_DIV; -#ifdef IVAS_FLOAT_FIXED init_tcx_enc_info_fx( st, &L_subframe, &L_subframeTCX, &tcx_subframe_coded_lines ); -#else - init_tcx_enc_info( st, &L_subframe, &L_subframeTCX, &tcx_subframe_coded_lines ); -#endif L_subframe = L_subframe / nSubframes; L_subframeTCX = L_subframeTCX / nSubframes; @@ -2695,10 +2457,8 @@ void ivas_mdct_core_whitening_enc( #ifdef IVAS_FLOAT_FIXED #ifdef IVAS_FLOAT_FIXED_CONVERSIONS - Word16 A_q_fx[CPE_CHANNELS][NB_DIV][M + 1]; st->hTcxEnc->spectrum_e[n] = 31 - ( Q_factor_arrL( st->hTcxEnc->spectrum[n], tcx_subframe_coded_lines ) - 4 ); floatToFixed_arrL( st->hTcxEnc->spectrum[n], st->hTcxEnc->spectrum_fx[n], 31 - st->hTcxEnc->spectrum_e[n], tcx_subframe_coded_lines ); - floatToFixed_arrL( scf_q[ch][n], scf_q_fx[ch][n], Q16, M ); #endif /* Shape spectrum */ @@ -2816,11 +2576,8 @@ void ivas_mdct_core_whitening_enc( { #ifdef IVAS_FLOAT_FIXED #ifdef IVAS_FLOAT_FIXED_CONVERSIONS - Word16 A_q_fx[CPE_CHANNELS][NB_DIV][M + 1]; - floatToFixed_arr( A_q[ch][n], A_q_fx[ch][n], Q12, M + 1 ); mdst_spectrum_e[ch][n] = 31 - ( Q_factor_arrL( mdst_spectrum[ch][n], tcx_subframe_coded_lines ) - 2 ); floatToFixed_arrL( mdst_spectrum[ch][n], mdst_spectrum_fx[ch][n], 31 - mdst_spectrum_e[ch][n], tcx_subframe_coded_lines ); - floatToFixed_arrL( scf_q[ch][n], scf_q_fx[ch][n], Q16, M ); #endif /* Shape spectrum */ @@ -3020,11 +2777,11 @@ void ivas_mdct_core_whitening_enc( } if ( param_core[ch][1 + NOISE_FILL_RANGES] != 0 ) { - set_f( pitch_buf[ch], ( st->hTcxEnc->tcxltp_pitch_int + (float) st->hTcxEnc->tcxltp_pitch_fr / (float) st->pit_res_max ) * (float) st->sr_core / (float) INT_FS_12k8, NB_SUBFR16k ); + set16_fx( pitch_buf_fx[ch], extract_l( Mult_32_32( L_add( L_deposit_h( st->hTcxEnc->tcxltp_pitch_int ), L_mult( st->hTcxEnc->tcxltp_pitch_fr, div_s( 1, st->pit_res_max ) ) ), Mult_32_32( L_shl( st->sr_core, Q10 ), ONE_BY_INT_FS_12k8_Q42 ) ) ), NB_SUBFR16k ); } else { - set_f( pitch_buf[ch], L_SUBFR, NB_SUBFR16k ); + set16_fx( pitch_buf_fx[ch], L_SUBFR << Q6, NB_SUBFR16k ); } } diff --git a/lib_enc/ivas_sns_enc.c b/lib_enc/ivas_sns_enc.c index b69b151a7f3efc33ab176994c15def81d8491b89..87ac472a645f0cc8558a9f74ba0eb9e9f0ff8f10 100644 --- a/lib_enc/ivas_sns_enc.c +++ b/lib_enc/ivas_sns_enc.c @@ -811,7 +811,7 @@ void sns_avq_cod_stereo( Word16 quantize_sns_fx( Word32 sns_in_fx[CPE_CHANNELS][NB_DIV][M], /* sns_e */ Word32 snsQ_out_fx[CPE_CHANNELS][NB_DIV][M], /* sns_e */ - Word16 *sns_e, + Word16 sns_e, Encoder_State **sts, Word16 *indices, /* Q0 */ Word16 *zero_side_flag, /* Q0 */ @@ -857,9 +857,9 @@ Word16 quantize_sns_fx( Copy32( sns_in_fx[ch][k], snsQ_out_fx[ch][k], M ); // sns_e } - sns_e_tmp[ch][0] = *sns_e; + sns_e_tmp[ch][0] = sns_e; move16(); - sns_e_tmp[ch][1] = *sns_e; + sns_e_tmp[ch][1] = sns_e; move16(); } @@ -894,7 +894,7 @@ Word16 quantize_sns_fx( i = W_norm( L64_sum ); L64_sum = W_shl( L64_sum, i ); ener_side_fx = W_extract_h( L64_sum ); // ener_side_q - ener_side_q = sub( add( shl( sub( 31, *sns_e ), 1 ), add( 1, i ) ), 32 ); + ener_side_q = sub( add( shl( sub( 31, sns_e ), 1 ), add( 1, i ) ), 32 ); sns_stereo_mode[k] = 0; move16(); @@ -996,12 +996,12 @@ Word16 quantize_sns_fx( nStages = SNS_MSVQ_NSTAGES_SIDE; move16(); - msvq_enc_ivas_fx( side_cdbks_fx, Q15, NULL, NULL, snsQ_fx, *sns_e, side_levels, 3, nStages, weights_fx, M, M, 0, NULL, &indices[idxIndices] ); + msvq_enc_ivas_fx( side_cdbks_fx, Q15, NULL, NULL, snsQ_fx, sns_e, side_levels, 3, nStages, weights_fx, M, M, 0, NULL, &indices[idxIndices] ); msvq_dec_fx( side_cdbks_fx, NULL, NULL, nStages, M, M, &indices[idxIndices], 0, NULL, snsQ_fx, NULL, Q15 ); } ELSE { - msvq_enc_ivas_fx( cdbks_fx, Q12, NULL, NULL, sns_ptr_fx, *sns_e, levels, 3, nStages, weights_fx, M, M, 0, NULL, &indices[idxIndices] ); + msvq_enc_ivas_fx( cdbks_fx, Q12, NULL, NULL, sns_ptr_fx, sns_e, levels, 3, nStages, weights_fx, M, M, 0, NULL, &indices[idxIndices] ); msvq_dec_fx( cdbks_fx, NULL, NULL, nStages, M, M, &indices[idxIndices], 0, NULL, snsQ_fx, NULL, Q12 ); } Word16 shift = find_guarded_bits_fx( M ); @@ -1016,13 +1016,13 @@ Word16 quantize_sns_fx( } } /* Re-Scaling Buffers*/ - *sns_e = sns_e_tmp[0][0]; + sns_e = sns_e_tmp[0][0]; move16(); FOR( ch = 0; ch < CPE_CHANNELS; ch++ ) { - *sns_e = s_max( *sns_e, sns_e_tmp[ch][0] ); - *sns_e = s_max( *sns_e, sns_e_tmp[ch][1] ); + sns_e = s_max( sns_e, sns_e_tmp[ch][0] ); + sns_e = s_max( sns_e, sns_e_tmp[ch][1] ); } FOR( ch = 0; ch < CPE_CHANNELS; ch++ ) @@ -1030,7 +1030,7 @@ Word16 quantize_sns_fx( #ifdef MSAN_FIX FOR( k = 0; k < nSubframes; k++ ) { - scale_sig32( snsQ_out_fx[ch][k], M, sub( sns_e_tmp[ch][k], *sns_e ) ); + scale_sig32( snsQ_out_fx[ch][k], M, sub( sns_e_tmp[ch][k], sns_e ) ); } #else scale_sig32( snsQ_out_fx[ch][0], M, sub( sns_e_tmp[ch][0], *sns_e ) ); diff --git a/lib_enc/ivas_stat_enc.h b/lib_enc/ivas_stat_enc.h index 74f32fd7d068854fa07f9c67b415432da3166990..61678b6af24c4307406aeea04a0cb4762868cff6 100644 --- a/lib_enc/ivas_stat_enc.h +++ b/lib_enc/ivas_stat_enc.h @@ -899,10 +899,10 @@ typedef struct front_vad_enc /* Q9 long term speech average */ float lp_noise; /* long term noise average */ float *delay_buf; - float mem_preemph; /* preemph filter memory */ - float mem_decim[2 * L_FILT_MAX]; /* decimation filter memory */ -#endif + float mem_preemph; /* preemph filter memory */ + float mem_decim[2 * L_FILT_MAX]; /* decimation filter memory */ float buffer_12k8[3 * L_FRAME / 2]; /* 12k8 signal buffer */ +#endif Word16 mem_preemph_fx; /* preemph filter memory */ NOISE_EST_HANDLE hNoiseEst; /* Noise estimation handle */ @@ -916,6 +916,8 @@ typedef struct front_vad_enc Word16 lp_noise_fx; Word16 mem_decim_fx[2 * L_FILT_MAX]; /* decimation filter memory */ Word16 buffer_12k8_fx[3 * L_FRAME / 2]; + Word16 q_mem_decim; + Word16 q_buffer_12k8; #endif // Word32 buffer_12k8_fx[3 * L_FRAME / 2]; /* 12k8 signal buffer */ } FRONT_VAD_ENC, *FRONT_VAD_ENC_HANDLE; diff --git a/lib_enc/ivas_stereo_dft_td_itd.c b/lib_enc/ivas_stereo_dft_td_itd.c index 3462f614b69b8eabdccb4ed9c7568438ac08d436..f95a72f8f8beed18133c5732585241595dcf3b4d 100644 --- a/lib_enc/ivas_stereo_dft_td_itd.c +++ b/lib_enc/ivas_stereo_dft_td_itd.c @@ -318,7 +318,7 @@ static void stereo_td_channel_extrapolate_fx( Word16 flag; Word16 pitch_lag; Word16 res_shift; - Word16 pitch0; + Word16 pitch0, tmp, tmp_e; Word32 L_tmp; // shift_mem and shift_input are of same Q q_shift// set16_fx( shift_combined, 0, add( L_FRAME48k, L_FRAME48k ) ); @@ -339,7 +339,10 @@ static void stereo_td_channel_extrapolate_fx( pred_ovlp = idiv1616( input_frame, 10 ); /*get pitch lag from previous frame */ - pitch_lag = (int16_t) ( pitch0 * ( (float) input_frame / L_FRAME ) ); + // pitch_lag = (int16_t) ( pitch0 * ( (float) input_frame / L_FRAME ) ); + tmp = BASOP_Util_Divide3232_Scale( input_frame, L_FRAME, &tmp_e ); + L_tmp = L_mult0( pitch0, tmp ); + pitch_lag = extract_l( L_shr( L_tmp, sub( 15, tmp_e ) ) ); /* Q0 */ /* compute the parameters g, nsr and g_lpc */ dot_lead_lag = EPSILON_FX; diff --git a/lib_enc/ivas_stereo_mdct_core_enc.c b/lib_enc/ivas_stereo_mdct_core_enc.c index 90e961dec09bc7538aa5bcd2e222bd3dcd3b8c0c..ada638d040de9e73f2ad112eeb6504a88547c6ba 100644 --- a/lib_enc/ivas_stereo_mdct_core_enc.c +++ b/lib_enc/ivas_stereo_mdct_core_enc.c @@ -726,9 +726,26 @@ void stereo_mdct_core_enc( * - Envelope Quantization and FDNS * - TNS *---------------------------------------------------------------*/ - - ivas_mdct_core_whitening_enc( hCPE, new_samples, old_wsp, pitch_buf, p_mdst_spectrum_long, +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + Word16 new_samples_fx[CPE_CHANNELS][L_INP]; + Word16 old_wsp_fx[CPE_CHANNELS][L_WSP], q_fac[CPE_CHANNELS]; + Word16 pitch_buf_fx_new[CPE_CHANNELS][NB_SUBFR16k]; /* Q6 */ + for ( i = 0; i < CPE_CHANNELS; i++ ) + { + floatToFixed_arr( new_samples[i], new_samples_fx[i], 0, L_INP ); + q_fac[i] = Q_factor_arr( old_wsp[i], L_WSP ); + floatToFixed_arr( old_wsp[i], old_wsp_fx[i], q_fac[i], L_WSP ); + } +#endif + ivas_mdct_core_whitening_enc( hCPE, new_samples_fx, old_wsp_fx, pitch_buf_fx_new, p_mdst_spectrum_long, tnsBits, p_orig_spectrum_long, tnsSize, p_param, hBstr, 0, CPE_CHANNELS ); +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + for ( i = 0; i < CPE_CHANNELS; i++ ) + { + fixedToFloat_arr( old_wsp_fx[i], old_wsp[i], q_fac[i], L_WSP ); + fixedToFloat_arr( pitch_buf_fx_new[i], pitch_buf[i], Q6, NB_SUBFR16k ); + } +#endif for ( ch = 0; ch < CPE_CHANNELS; ch++ ) { diff --git a/lib_enc/ivas_stereo_td_enc.c b/lib_enc/ivas_stereo_td_enc.c index 79181844dc4187f3c7b188ad6a086fce4a578eb1..3c14a53c71fc9cdd2559a3e7ba05e081344ea5fa 100644 --- a/lib_enc/ivas_stereo_td_enc.c +++ b/lib_enc/ivas_stereo_td_enc.c @@ -1492,12 +1492,19 @@ static void tdm_downmix_fade_ivas_fx( move16(); move16(); + Word16 tmp1, tmp2, tmp3, tmp4; + tmp1 = extract_h( One_m_OldRatio_fx ); + tmp2 = extract_h( OldRatio_L_fx ); + tmp3 = extract_h( One_m_Ratio_fx ); + tmp4 = extract_h( ratio_L_fx ); + FOR( i = start_index; i < end_index; i++ ) { - FR_Y_fx[i] = add( mult( add( mult( Right_in_fx[i], extract_h( One_m_OldRatio_fx ) ), mult( Left_in_fx[i], extract_h( OldRatio_L_fx ) ) ), fade_out_fx ), mult( add( mult( Right_in_fx[i], extract_h( One_m_Ratio_fx ) ), mult( Left_in_fx[i], extract_h( ratio_L_fx ) ) ), fade_in_fx ) ); // Qx - LR_X_fx[i] = add( mult( sub( mult( Left_in_fx[i], extract_h( One_m_OldRatio_fx ) ), mult( Right_in_fx[i], extract_h( OldRatio_L_fx ) ) ), fade_out_fx ), - mult( sub( mult( Left_in_fx[i], extract_h( One_m_Ratio_fx ) ), mult( Right_in_fx[i], extract_h( ratio_L_fx ) ) ), - fade_in_fx ) ); // Qx + FR_Y_fx[i] = extract_h( L_add( Mpy_32_16_1( L_mac( L_mult( Right_in_fx[i], tmp1 ), Left_in_fx[i], tmp2 ), fade_out_fx ), + Mpy_32_16_1( L_mac( L_mult( Right_in_fx[i], tmp3 ), Left_in_fx[i], tmp4 ), fade_in_fx ) ) ); // Qx + LR_X_fx[i] = extract_h( L_add( Mpy_32_16_1( L_msu( L_mult( Left_in_fx[i], tmp1 ), Right_in_fx[i], tmp2 ), fade_out_fx ), + Mpy_32_16_1( L_msu( L_mult( Left_in_fx[i], tmp3 ), Right_in_fx[i], tmp4 ), + fade_in_fx ) ) ); // Qx move16(); move16(); diff --git a/lib_enc/ivas_tcx_core_enc.c b/lib_enc/ivas_tcx_core_enc.c index f4841867b9a0b4cfe1e418545467d730e60415aa..8b809acac417d8ae1731d7657173529b45ab5358 100644 --- a/lib_enc/ivas_tcx_core_enc.c +++ b/lib_enc/ivas_tcx_core_enc.c @@ -457,7 +457,7 @@ void stereo_tcx_core_enc( #endif Q_new = 0; move16(); - core_signal_analysis_high_bitrate_ivas_fx( p_new_samples, T_op, lsp_new_fx, lsp_mid_fx, st, tnsSize, tnsBits, param_core, <pBits, NULL, st->L_frame, hTcxEnc->L_frameTCX, last_element_mode, vad_hover_flag, NULL, NULL, &Q_new ); + core_signal_analysis_high_bitrate_ivas_fx( p_new_samples, T_op, lsp_new_fx, lsp_mid_fx, st, tnsSize, tnsBits, param_core, <pBits, NULL, st->L_frame, hTcxEnc->L_frameTCX, last_element_mode, vad_hover_flag, NULL, NULL, &Q_new, NULL ); #ifdef IVAS_FLOAT_FIXED_CONVERSIONS fixedToFloat_arr( st->input_buff_fx, st->input_buff, 0, L_FRAME48k + L_FRAME48k + NS2SA( 48000, DELAY_FIR_RESAMPL_NS ) ); #endif diff --git a/lib_enc/prot_fx_enc.h b/lib_enc/prot_fx_enc.h index 05eb312685033ab1e118bcb51629e4c2ed6a1d0e..b4c01abe8c78f5689c5359f73c27b640b064ab57 100644 --- a/lib_enc/prot_fx_enc.h +++ b/lib_enc/prot_fx_enc.h @@ -2362,7 +2362,8 @@ void core_signal_analysis_high_bitrate_ivas_fx( const Word16 vad_hover_flag, /* i : VAD hangover flag */ Word32 **spectrum, Word16 *spectrum_e, - Word16 *Q_new ); + Word16 *Q_new, + Word16 *q_win ); #endif void ShapeSpectrum_fx( diff --git a/lib_enc/rst_enc.c b/lib_enc/rst_enc.c index 2281cefa7d4dff0971d642441839ff5fb49fd133..d0732f539fc0ac986c8e1c3163afab5b865cd226 100644 --- a/lib_enc/rst_enc.c +++ b/lib_enc/rst_enc.c @@ -40,10 +40,8 @@ #include "rom_com.h" #include "prot.h" #include "wmc_auto.h" -#ifdef IVAS_FLOAT_FIXED -#include "prot_fx.h" -#endif +#ifndef IVAS_FLOAT_FIXED /*-------------------------------------------------------------------* * CNG_reset_enc() * @@ -56,42 +54,18 @@ void CNG_reset_enc( float *voice_factors, /* o : voicing factors */ int16_t VBR_cng_reset_flag ) { -#ifndef IVAS_FLOAT_FIXED init_gp_clip( st->clip_var ); mvr2r( UVWB_Ave, st->mem_AR, M ); set_f( st->mem_MA, 0, M ); -#else - init_gp_clip_fx( st->clip_var_fx ); - Copy( UVWB_Ave_fx, st->mem_AR_fx, M ); - set16_fx( st->mem_MA_fx, 0, M ); -#endif st->hLPDmem->mem_w0_flt = 0.0f; -#ifndef IVAS_FLOAT_FIXED st->hLPDmem->tilt_code_flt = 0.0f; st->hLPDmem->gc_threshold_flt = 0.0f; -#else - st->hLPDmem->tilt_code = 0; - st->hLPDmem->gc_threshold = 0; -#endif if ( VBR_cng_reset_flag ) { set_f( st->hLPDmem->mem_syn_flt, 0, M ); } -#ifndef IVAS_FLOAT_FIXED set_f( st->hLPDmem->dispMem_flt, 0, 8 ); -#else - st->hLPDmem->dm_fx.prev_state = 0; - move16(); /* This corresponds to st_fx->dispMem in FLP */ - st->hLPDmem->dm_fx.prev_gain_code = 0; - move32(); - - FOR( Word16 i = 2; i < 8; i++ ) - { - st->hLPDmem->dm_fx.prev_gain_pit[i - 2] = 0; - move16(); - } -#endif /* last good received frame for FEC in ACELP */ st->clas = UNVOICED_CLAS; @@ -119,3 +93,4 @@ void CNG_reset_enc( return; } +#endif diff --git a/lib_enc/stat_enc.h b/lib_enc/stat_enc.h index 02df0a4df6561083cc2a11fe30c53b021d12856e..1fd85ab2721a817dbee2cfcca927e08550118928 100644 --- a/lib_enc/stat_enc.h +++ b/lib_enc/stat_enc.h @@ -2011,9 +2011,12 @@ typedef struct enc_core_structure int16_t Nb_ACELP_frames; int16_t pitch[3]; /* open-loop pitch values @12.8 kHz for three half-frames */ - float voicing[3]; /* open-loop normalized correlation values for three half-frames */ // Word16 pitch_fx[3]; - Word16 voicing_fx[3]; /* Q15 */ +#ifndef IVAS_FLOAT_FIXED + float voicing[3]; /* open-loop normalized correlation values for three half-frames */ +#else + Word16 voicing_fx[3]; /* open-loop normalized correlation values for three half-frames Q15 */ +#endif LPD_state_HANDLE hLPDmem; /* ACELP LPDmem memories */ @@ -2287,14 +2290,11 @@ typedef struct enc_core_structure #ifndef IVAS_FLOAT_FIXED float bckr_tilt_lt_flt; -#else - Word32 bckr_tilt_lt; /* Q16 */ -#endif float lp_speech; - Word16 lp_speech_fx; // Q8 -#ifndef IVAS_FLOAT_FIXED float lp_noise; /* CNG and DTX - LP filtered total noise estimation */ #else + Word32 bckr_tilt_lt; /* Q16 */ + Word16 lp_speech_fx; /* Q8 */ Word16 lp_noise_fx; /* CNG and DTX - LP filtered total noise estimation Q8 */ #endif Word16 Opt_HE_SAD_ON_fx; diff --git a/lib_enc/swb_tbe_enc_fx.c b/lib_enc/swb_tbe_enc_fx.c index fa8acaab9a014c3da6442c9bd36a8b7fe45f852a..07958a06348978090c16c41518a2b145494521a8 100644 --- a/lib_enc/swb_tbe_enc_fx.c +++ b/lib_enc/swb_tbe_enc_fx.c @@ -3707,8 +3707,8 @@ void swb_tbe_enc_ivas_fx( FOR( i = 0; i < L_FRAME16k; i += L_SUBFR16k ) { - PostShortTerm_fx( &shaped_shb_excitation_fx[L_SHB_LAHEAD + i], lpc_shb_fx, &shaped_shb_excitationTemp_fx[i], hBWE_TD->mem_stp_swb_fx, - hBWE_TD->ptr_mem_stp_swb_fx, &( hBWE_TD->gain_prec_swb_fx ), hBWE_TD->mem_zero_swb_fx, formant_fac_fx ); + PostShortTerm_ivas_fx( &shaped_shb_excitation_fx[L_SHB_LAHEAD + i], lpc_shb_fx, &shaped_shb_excitationTemp_fx[i], hBWE_TD->mem_stp_swb_fx, + hBWE_TD->ptr_mem_stp_swb_fx, &( hBWE_TD->gain_prec_swb_fx ), hBWE_TD->mem_zero_swb_fx, formant_fac_fx ); /* i: shaped_shb_excitation_fx in Q_bwe_exc */ /* i: lpc_shb_fx in Q12 */ } @@ -3759,8 +3759,12 @@ void swb_tbe_enc_ivas_fx( FOR( i = 0; i < L_SHB_LAHEAD; i++ ) { - L_tmp = Mult_32_16( Lscale, shaped_shb_excitation_fx[i] ); /* Q(16-exp+Q_bwe_exc) */ - shaped_shb_excitation_fx[i] = round_fx( L_shl( L_tmp, exp ) ); /* Q_bwe_exc */ + L_tmp = Mult_32_16( Lscale, shaped_shb_excitation_fx[i] ); /* Q(16-exp+Q_bwe_exc) */ +#ifdef BASOP_NOGLOB + shaped_shb_excitation_fx[i] = round_fx_o( L_shl_o( L_tmp, exp, &Overflow ), &Overflow ); /* Q_bwe_exc */ +#else + shaped_shb_excitation_fx[i] = round_fx( L_shl( L_tmp, exp ) ); /* Q_bwe_exc */ +#endif move16(); } IF( exp < 0 )