diff --git a/lib_com/ivas_prot.h b/lib_com/ivas_prot.h index cc1a384e77deb764c91a0380349874fcca164937..598014a742616edd21e326a4fdcc386bd3df49b1 100644 --- a/lib_com/ivas_prot.h +++ b/lib_com/ivas_prot.h @@ -168,7 +168,9 @@ ivas_error pre_proc_front_ivas( const int16_t n, /* i : channel number */ float old_inp_12k8[], /* o : buffer of old input signal */ float old_inp_16k[], /* o : buffer of old input signal @16kHz */ +#ifndef REMOVE_ETOT_PROPAGATION float *Etot, /* o : total energy */ +#endif float *ener, /* o : residual energy from Levinson-Durbin */ float *relE, /* o : frame relative energy */ float A[NB_SUBFR16k * ( M + 1 )], /* o : A(z) unquantized for the 4 subframes */ @@ -414,7 +416,9 @@ ivas_error ivas_core_enc( const int16_t n_CoreChannels, /* i : number of core channels to be coded */ float old_inp_12k8[CPE_CHANNELS][L_INP_12k8], /* i : buffer of old input signal */ float old_inp_16k[CPE_CHANNELS][L_INP], /* i : buffer of old input signal */ +#ifndef REMOVE_ETOT_PROPAGATION const float Etot[CPE_CHANNELS], /* i : total energy */ +#endif float ener[CPE_CHANNELS], /* i : residual energy from Levinson-Durbin */ float A[CPE_CHANNELS][NB_SUBFR16k * ( M + 1 )], /* i : A(z) unquantized for the 4 subframes */ float Aw[CPE_CHANNELS][NB_SUBFR16k * ( M + 1 )], /* i : weighted A(z) unquantized for subframes */ @@ -1754,6 +1758,9 @@ void tdm_ol_pitch_comparison( void tdm_configure_enc( CPE_ENC_HANDLE hCPE, /* i : CPE encoder structure */ +#ifdef REMOVE_ETOT_PROPAGATION + const float Etot_last[CPE_CHANNELS], /* i/o: Energy of last frame */ +#endif const int16_t tdm_SM_or_LRTD_Pri, /* i : channel combination scheme flag in TD stereo OR LRTD primary channel */ const int16_t tdm_ratio_idx, /* i : ratio index */ const int16_t tdm_ratio_idx_SM, /* i : ratio index in SM mode */ diff --git a/lib_com/options.h b/lib_com/options.h index 89b70e7b1a7882762f49b60276f60dc4578d43c1..1949f75acdfb91a01309a9cc424b0c268f6a3c51 100755 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -158,6 +158,7 @@ #define LOW_RATE_TRANS /* Eri: Contribution 20: low rate encoding of transients */ #define MC_BITRATE_SWITCHING /* Issue 116: support bitrate switching in MC format */ #define SIMPLIFY_TD_BWE_RESET /* Issue 250: Resolve "TB-BWE state memories reset simplification" */ +#define REMOVE_ETOT_PROPAGATION /* Issue 251: Do not propagate Etot parameter */ diff --git a/lib_com/prot.h b/lib_com/prot.h old mode 100755 new mode 100644 index d8508e5152882ee3e2178c9180bc58911e199130..70af73bbe96d7c9b47f3dfbeddd55cc0025497ec --- a/lib_com/prot.h +++ b/lib_com/prot.h @@ -714,12 +714,12 @@ int16_t lev_dur( /*! r: delay value in ns */ int32_t get_delay( const int16_t enc_dec, /* i : encoder/decoder flag */ - const int32_t io_fs, /* i : input/output sampling frequency */ - const IVAS_FORMAT ivas_format, /* i : IVAS format */ - HANDLE_CLDFB_FILTER_BANK hCldfb, /* i : Handle of Cldfb analysis */ + const int32_t io_fs, /* i : input/output sampling frequency */ + const IVAS_FORMAT ivas_format, /* i : IVAS format */ + HANDLE_CLDFB_FILTER_BANK hCldfb, /* i : Handle of Cldfb analysis */ #ifndef FIX_I59_LFE_TD_DELAY - RENDERER_TYPE renderer_type, /* i : IVAS rendering type */ - const int32_t binaural_latency_ns /* i : binaural renderer HRTF delay in ns */ + RENDERER_TYPE renderer_type, /* i : IVAS rendering type */ + const int32_t binaural_latency_ns /* i : binaural renderer HRTF delay in ns */ #else const int32_t binaural_latency_ns /* i : binauralization delay in ns */ #endif @@ -2304,8 +2304,10 @@ void pre_proc( float old_inp_16k[], /* i/o: buffer of old input signal @ 16kHz */ float **inp, /* o : ptr. to inp. signal in the current frame*/ float fr_bands[2 * NB_BANDS], /* i : energy in frequency bands */ - float *Etot, /* i : total energy */ - float *ener, /* o : residual energy from Levinson-Durbin */ +#ifndef REMOVE_ETOT_PROPAGATION + float *Etot, /* i : total energy */ +#endif + float *ener, /* o : residual energy from Levinson-Durbin */ #ifndef FIX_I4_OL_PITCH int16_t pitch_orig[3], /* o : open-loop pitch values for quantization */ #endif @@ -3902,7 +3904,7 @@ void td_cng_enc_init( void dtx( Encoder_State *st, /* i/o: encoder state structure */ - const int32_t ivas_total_brate, /* i : IVAS total bitrate */ + const int32_t ivas_total_brate, /* i : IVAS total bitrate */ const int16_t vad, /* i : VAD flag for DTX */ const float speech[] /* i : Pointer to the speech frame */ ); diff --git a/lib_enc/amr_wb_enc.c b/lib_enc/amr_wb_enc.c index 5bbc890b521012f34b5e58f42e85a16f47277a7b..e9fca2c737d24dd107e4687d7318b72fe0a708e6 100644 --- a/lib_enc/amr_wb_enc.c +++ b/lib_enc/amr_wb_enc.c @@ -532,7 +532,9 @@ void amr_wb_enc( /* update main codec paramaters */ #ifdef MC_BITRATE_SWITCHING +#ifndef REMOVE_ETOT_PROPAGATION st->hNoiseEst->Etot_last = Etot; +#endif updt_enc_common( st ); #else updt_enc_common( st, Etot ); diff --git a/lib_enc/evs_enc.c b/lib_enc/evs_enc.c index c9b74b63f52c04b3ee062ebef7cb7d8d470021f5..8a75a8a8108fcff65f994e295e84b1ea16f786fa 100644 --- a/lib_enc/evs_enc.c +++ b/lib_enc/evs_enc.c @@ -72,18 +72,20 @@ ivas_error evs_enc( float old_inp_12k8[L_INP_12k8], *inp; /* buffer of input signal @ 12k8 */ float old_inp_16k[L_INP]; /* buffer of input signal @ 16kHz */ float fr_bands[2 * NB_BANDS]; /* energy in frequency bands */ - float Etot; /* total energy; correlation shift */ - float ener; /* residual energy from Levinson-Durbin */ - float A[NB_SUBFR16k * ( M + 1 )]; /* A(z) unquantized for subframes */ - float Aw[NB_SUBFR16k * ( M + 1 )]; /* weighted A(z) unquantized for subframes */ - float epsP[M + 1]; /* LP prediction errors */ - float lsp_new[M]; /* LSPs at the end of the frame */ - float lsp_mid[M]; /* ISPs in the middle of the frame */ - int16_t vad_hover_flag; /* VAD hangover flag */ - int16_t hq_core_type; /* HQ core type (HQ, or LR-MDCT) */ - int16_t attack_flag; /* attack flag (GSC or TC) */ - float new_inp_resamp16k[L_FRAME16k]; /* new input signal @16kHz, non pre-emphasised, used by the WB TBE/BWE */ - float old_syn_12k8_16k[L_FRAME16k]; /* ACELP core synthesis at 12.8kHz or 16kHz to be used by the SWB BWE */ +#ifndef REMOVE_ETOT_PROPAGATION + float Etot; /* total energy; correlation shift */ +#endif + float ener; /* residual energy from Levinson-Durbin */ + float A[NB_SUBFR16k * ( M + 1 )]; /* A(z) unquantized for subframes */ + float Aw[NB_SUBFR16k * ( M + 1 )]; /* weighted A(z) unquantized for subframes */ + float epsP[M + 1]; /* LP prediction errors */ + float lsp_new[M]; /* LSPs at the end of the frame */ + float lsp_mid[M]; /* ISPs in the middle of the frame */ + int16_t vad_hover_flag; /* VAD hangover flag */ + int16_t hq_core_type; /* HQ core type (HQ, or LR-MDCT) */ + int16_t attack_flag; /* attack flag (GSC or TC) */ + float new_inp_resamp16k[L_FRAME16k]; /* new input signal @16kHz, non pre-emphasised, used by the WB TBE/BWE */ + float old_syn_12k8_16k[L_FRAME16k]; /* ACELP core synthesis at 12.8kHz or 16kHz to be used by the SWB BWE */ float shb_speech[L_FRAME16k]; float hb_speech[L_FRAME16k / 4]; float new_swb_speech[L_FRAME48k]; @@ -183,10 +185,18 @@ ivas_error evs_enc( *---------------------------------------------------------------------*/ #ifdef FIX_I4_OL_PITCH +#ifdef REMOVE_ETOT_PROPAGATION + pre_proc( st, input_frame, old_inp_12k8, old_inp_16k, &inp, fr_bands, Etot, &ener, A, Aw, epsP, lsp_new, lsp_mid, &vad_hover_flag, &attack_flag, new_inp_resamp16k, &Voicing_flag, realBuffer, imagBuffer, &hq_core_type ); +#else pre_proc( st, input_frame, old_inp_12k8, old_inp_16k, &inp, fr_bands, &Etot, &ener, A, Aw, epsP, lsp_new, lsp_mid, &vad_hover_flag, &attack_flag, new_inp_resamp16k, &Voicing_flag, realBuffer, imagBuffer, &hq_core_type ); +#endif +#else +#ifdef REMOVE_ETOT_PROPAGATION + pre_proc( st, input_frame, old_inp_12k8, old_inp_16k, &inp, fr_bands, &ener, pitch_orig, A, Aw, epsP, lsp_new, lsp_mid, &vad_hover_flag, &attack_flag, new_inp_resamp16k, &Voicing_flag, realBuffer, imagBuffer, &hq_core_type ); #else pre_proc( st, input_frame, old_inp_12k8, old_inp_16k, &inp, fr_bands, &Etot, &ener, pitch_orig, A, Aw, epsP, lsp_new, lsp_mid, &vad_hover_flag, &attack_flag, new_inp_resamp16k, &Voicing_flag, realBuffer, imagBuffer, &hq_core_type ); #endif +#endif if ( st->mdct_sw == MODE2 ) @@ -516,7 +526,9 @@ ivas_error evs_enc( *---------------------------------------------------------------------*/ #ifdef MC_BITRATE_SWITCHING +#ifndef REMOVE_ETOT_PROPAGATION st->hNoiseEst->Etot_last = Etot; +#endif updt_enc_common( st ); #else updt_enc_common( st, Etot ); diff --git a/lib_enc/ivas_core_enc.c b/lib_enc/ivas_core_enc.c index 3f600dc6e7d09edfcdbd3aebfa0a67cfc70912b8..7a0ff91da5ea5c9a5e9abb0e6283af8b85f11e29 100644 --- a/lib_enc/ivas_core_enc.c +++ b/lib_enc/ivas_core_enc.c @@ -55,13 +55,15 @@ extern float snr_[2][320]; *-------------------------------------------------------------------*/ ivas_error ivas_core_enc( - SCE_ENC_HANDLE hSCE, /* i/o: SCE encoder structure */ - CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ - MCT_ENC_HANDLE hMCT, /* i/o: MCT encoder structure */ - const int16_t n_CoreChannels, /* i : number of core channels to be coded */ - float old_inp_12k8[CPE_CHANNELS][L_INP_12k8], /* i : buffer of old input signal */ - float old_inp_16k[CPE_CHANNELS][L_INP], /* i : buffer of old input signal */ - const float Etot[CPE_CHANNELS], /* i : total energy */ + SCE_ENC_HANDLE hSCE, /* i/o: SCE encoder structure */ + CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ + MCT_ENC_HANDLE hMCT, /* i/o: MCT encoder structure */ + const int16_t n_CoreChannels, /* i : number of core channels to be coded */ + float old_inp_12k8[CPE_CHANNELS][L_INP_12k8], /* i : buffer of old input signal */ + float old_inp_16k[CPE_CHANNELS][L_INP], /* i : buffer of old input signal */ +#ifndef REMOVE_ETOT_PROPAGATION + const float Etot[CPE_CHANNELS], /* i : total energy */ +#endif float ener[CPE_CHANNELS], /* i : residual energy from Levinson-Durbin */ float A[CPE_CHANNELS][NB_SUBFR16k * ( M + 1 )], /* i : A(z) unquantized for the 4 subframes */ float Aw[CPE_CHANNELS][NB_SUBFR16k * ( M + 1 )], /* i : weighted A(z) unquantized for subframes */ @@ -409,13 +411,15 @@ ivas_error ivas_core_enc( signaling_enc_rf( st ); -/*---------------------------------------------------------------------* - * Common updates - *---------------------------------------------------------------------*/ + /*---------------------------------------------------------------------* + * Common updates + *---------------------------------------------------------------------*/ + #ifdef MC_BITRATE_SWITCHING - /* for MCT do this later, otherwise there can be a problem because TCX quant happens later and might get the wrong last_core on a bit rate switch */ +#ifndef REMOVE_ETOT_PROPAGATION st->hNoiseEst->Etot_last = Etot[n]; - if ( !MCT_flag ) +#endif + if ( !MCT_flag ) /* for MCT do this later, otherwise there can be a problem because TCX quant happens later and might get the wrong last_core on a bit rate switch */ { updt_enc_common( st ); } diff --git a/lib_enc/ivas_core_pre_proc_front.c b/lib_enc/ivas_core_pre_proc_front.c index 195ae482d3fbe8c808dbb55b2e9dbe85e9dc0f70..f899eb7b67f5359d9592ecab72095b43f7c1b7a2 100644 --- a/lib_enc/ivas_core_pre_proc_front.c +++ b/lib_enc/ivas_core_pre_proc_front.c @@ -68,15 +68,17 @@ static void calculate_energy_buffer( CPE_ENC_HANDLE hCPE, float enerBuffer_dft[] *--------------------------------------------------------------------*/ ivas_error pre_proc_front_ivas( - SCE_ENC_HANDLE hSCE, /* i/o: SCE encoder structure */ - CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ - const int32_t element_brate, /* i : SCE/CPE element bitrate */ - const int16_t nb_bits_metadata, /* i : number of metadata bits */ - const int16_t input_frame, /* i : frame length */ - const int16_t n, /* i : channel number */ - float old_inp_12k8[], /* o : buffer of old input signal */ - float old_inp_16k[], /* o : buffer of old input signal @16kHz */ - float *Etot, /* o : total energy */ + SCE_ENC_HANDLE hSCE, /* i/o: SCE encoder structure */ + CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ + const int32_t element_brate, /* i : SCE/CPE element bitrate */ + const int16_t nb_bits_metadata, /* i : number of metadata bits */ + const int16_t input_frame, /* i : frame length */ + const int16_t n, /* i : channel number */ + float old_inp_12k8[], /* o : buffer of old input signal */ + float old_inp_16k[], /* o : buffer of old input signal @16kHz */ +#ifndef REMOVE_ETOT_PROPAGATION + float *Etot, /* o : total energy */ +#endif float *ener, /* o : residual energy from Levinson-Durbin */ float *relE, /* o : frame relative energy */ float A[NB_SUBFR16k * ( M + 1 )], /* o : A(z) unquantized for the 4 subframes */ @@ -118,8 +120,11 @@ ivas_error pre_proc_front_ivas( #endif ) { - float *inp_12k8, *new_inp_12k8; /* pointers to current frame and new data */ - float *wsp; /* weighted input signal buffer */ + float *inp_12k8, *new_inp_12k8; /* pointers to current frame and new data */ + float *wsp; /* weighted input signal buffer */ +#ifdef REMOVE_ETOT_PROPAGATION + float Etot; /* total energy */ +#endif float fr_bands[2 * NB_BANDS]; /* energy in frequency bands */ float lf_E[2 * VOIC_BINS]; /* per bin spectrum energy in lf */ float tmpN[NB_BANDS]; /* Temporary noise update */ @@ -421,11 +426,19 @@ ivas_error pre_proc_front_ivas( * Spectral analysis *--------------------------------------------------------------------------*/ +#ifdef REMOVE_ETOT_PROPAGATION + analy_sp( element_mode, hCPE, input_Fs, inp_12k8, st->Bin_E, st->Bin_E_old, fr_bands, lf_E, &Etot, st->min_band, st->max_band, band_energies, PS, fft_buff ); +#else analy_sp( element_mode, hCPE, input_Fs, inp_12k8, st->Bin_E, st->Bin_E_old, fr_bands, lf_E, Etot, st->min_band, st->max_band, band_energies, PS, fft_buff ); +#endif if ( hStereoClassif != NULL ) { +#ifdef REMOVE_ETOT_PROPAGATION + if ( st->lp_speech - Etot > 25 ) +#else if ( st->lp_speech - *Etot > 25 ) +#endif { hStereoClassif->silence_flag = 2; } @@ -440,7 +453,11 @@ ivas_error pre_proc_front_ivas( * SAD (1-signal, 0-noise) *----------------------------------------------------------------*/ +#ifdef REMOVE_ETOT_PROPAGATION + noise_est_pre( Etot, st->ini_frame, st->hNoiseEst, st->idchan, element_mode, hCPE != NULL ? hCPE->last_element_mode : element_mode ); +#else noise_est_pre( *Etot, st->ini_frame, st->hNoiseEst, st->idchan, element_mode, hCPE != NULL ? hCPE->last_element_mode : element_mode ); +#endif if ( element_mode == IVAS_CPE_TD && ( ( abs( hCPE->hStereoTD->tdm_last_ratio_idx - tdm_ratio_idx ) > 5 && st->idchan == 1 ) || abs( hCPE->hStereoTD->tdm_last_inst_ratio_idx - hCPE->hStereoTD->tdm_inst_ratio_idx ) > 10 ) ) { @@ -513,7 +530,11 @@ ivas_error pre_proc_front_ivas( * Correlation correction as a function of total noise level *----------------------------------------------------------------*/ +#ifdef REMOVE_ETOT_PROPAGATION + noise_est_down( fr_bands, st->hNoiseEst->bckr, tmpN, tmpE, st->min_band, st->max_band, &st->hNoiseEst->totalNoise, Etot, &st->hNoiseEst->Etot_last, &st->hNoiseEst->Etot_v_h2 ); +#else noise_est_down( fr_bands, st->hNoiseEst->bckr, tmpN, tmpE, st->min_band, st->max_band, &st->hNoiseEst->totalNoise, *Etot, &st->hNoiseEst->Etot_last, &st->hNoiseEst->Etot_v_h2 ); +#endif if ( lr_vad_enabled && st->idchan == 0 ) { @@ -523,7 +544,11 @@ ivas_error pre_proc_front_ivas( corr_shiftR = correlation_shift( hCPE->hFrontVad[1]->hNoiseEst->totalNoise ); } +#ifdef REMOVE_ETOT_PROPAGATION + *relE = Etot - st->lp_speech; +#else *relE = *Etot - st->lp_speech; +#endif corr_shift = correlation_shift( st->hNoiseEst->totalNoise ); @@ -669,8 +694,13 @@ ivas_error pre_proc_front_ivas( * Update estimated noise energy and voicing cut-off frequency *-----------------------------------------------------------------*/ +#ifdef REMOVE_ETOT_PROPAGATION + noise_est( st, old_pitch1, tmpN, epsP, Etot, *relE, corr_shift, tmpE, fr_bands, cor_map_sum, &ncharX, &sp_div, + &non_staX, loc_harm, lf_E, &st->hNoiseEst->harm_cor_cnt, st->hNoiseEst->Etot_l_lp, &dummy /*sp_floor*/, S_map, hStereoClassif, NULL, st->ini_frame ); +#else noise_est( st, old_pitch1, tmpN, epsP, *Etot, *relE, corr_shift, tmpE, fr_bands, cor_map_sum, &ncharX, &sp_div, &non_staX, loc_harm, lf_E, &st->hNoiseEst->harm_cor_cnt, st->hNoiseEst->Etot_l_lp, &dummy /*sp_floor*/, S_map, hStereoClassif, NULL, st->ini_frame ); +#endif if ( lr_vad_enabled && st->idchan == 0 ) { @@ -702,7 +732,11 @@ ivas_error pre_proc_front_ivas( find_tilt( fr_bands, st->hNoiseEst->bckr, ee, st->pitch, st->voicing, lf_E, corr_shift, st->input_bwidth, st->max_band, hp_E, MODE1, &( st->bckr_tilt_lt ), st->Opt_SC_VBR ); +#ifdef REMOVE_ETOT_PROPAGATION + st->coder_type = find_uv( st, pitch_fr, voicing_fr, inp_12k8, ee, &dE1X, corr_shift, *relE, Etot, hp_E, &flag_spitch, last_core_orig, hStereoClassif ); +#else st->coder_type = find_uv( st, pitch_fr, voicing_fr, inp_12k8, ee, &dE1X, corr_shift, *relE, *Etot, hp_E, &flag_spitch, last_core_orig, hStereoClassif ); +#endif /*-----------------------------------------------------------------* * channel aware mode configuration * @@ -738,7 +772,11 @@ ivas_error pre_proc_front_ivas( * 1st stage speech/music classification (GMM model) *----------------------------------------------------------------*/ +#ifdef REMOVE_ETOT_PROPAGATION + smc_dec = ivas_smc_gmm( st, hStereoClassif, localVAD_HE_SAD, Etot, lsp_new, *cor_map_sum, epsP, PS, non_staX, *relE, &high_lpn_flag, flag_spitch ); +#else smc_dec = ivas_smc_gmm( st, hStereoClassif, localVAD_HE_SAD, *Etot, lsp_new, *cor_map_sum, epsP, PS, non_staX, *relE, &high_lpn_flag, flag_spitch ); +#endif #ifdef DEBUGGING if ( st->idchan == 0 ) @@ -761,19 +799,28 @@ ivas_error pre_proc_front_ivas( * Update of old per-band energy spectrum *----------------------------------------------------------------*/ +#ifdef REMOVE_ETOT_PROPAGATION + long_enr( st, Etot, localVAD_HE_SAD, high_lpn_flag, NULL, 1, NULL, NULL ); +#else long_enr( st, *Etot, localVAD_HE_SAD, high_lpn_flag, NULL, 1, NULL, NULL ); +#endif mvr2r( fr_bands + NB_BANDS, st->hNoiseEst->enrO, NB_BANDS ); if ( lr_vad_enabled && st->idchan == 0 ) { +#ifdef REMOVE_ETOT_PROPAGATION + long_enr( st, -1, localVAD_HE_SAD, high_lpn_flag, hCPE->hFrontVad, CPE_CHANNELS, localVAD_HE_SAD_LR, Etot_LR ); +#else long_enr( st, *Etot, localVAD_HE_SAD, high_lpn_flag, hCPE->hFrontVad, CPE_CHANNELS, localVAD_HE_SAD_LR, Etot_LR ); +#endif mvr2r( fr_bands_LR[0] + NB_BANDS, hCPE->hFrontVad[0]->hNoiseEst->enrO, NB_BANDS ); mvr2r( fr_bands_LR[1] + NB_BANDS, hCPE->hFrontVad[1]->hNoiseEst->enrO, NB_BANDS ); - +#ifndef REMOVE_ETOT_PROPAGATION hCPE->hFrontVad[0]->hNoiseEst->Etot_last = Etot_LR[0]; hCPE->hFrontVad[1]->hNoiseEst->Etot_last = Etot_LR[1]; +#endif } /*----------------------------------------------------------------* @@ -843,7 +890,11 @@ ivas_error pre_proc_front_ivas( } #endif /* 2nd stage speech/music classification (ACELP/GSC/TCX core selection) */ +#ifdef REMOVE_ETOT_PROPAGATION + ivas_smc_mode_selection( st, element_brate, smc_dec, *relE, Etot, attack_flag, inp_12k8, S_map, flag_spitch ); +#else ivas_smc_mode_selection( st, element_brate, smc_dec, *relE, *Etot, attack_flag, inp_12k8, S_map, flag_spitch ); +#endif #ifdef ITD_WINNER_GAIN_MODIFY if ( element_mode == IVAS_CPE_DFT ) diff --git a/lib_enc/ivas_cpe_enc.c b/lib_enc/ivas_cpe_enc.c index 008599e36ba705bd262fa892c59897ddbba49f22..fefc4ed678e2e1cc48b899323de1bef133b4a450 100644 --- a/lib_enc/ivas_cpe_enc.c +++ b/lib_enc/ivas_cpe_enc.c @@ -63,9 +63,11 @@ ivas_error ivas_cpe_enc( CPE_ENC_HANDLE hCPE; Encoder_State **sts; int16_t n, n_CoreChannels; - float old_inp_12k8[CPE_CHANNELS][L_INP_12k8]; /* buffer of input signal @ 12k8 */ - float old_inp_16k[CPE_CHANNELS][L_INP]; /* buffer of input signal @ 16kHz */ - float Etot[CPE_CHANNELS]; /* total energy; correlation shift */ + float old_inp_12k8[CPE_CHANNELS][L_INP_12k8]; /* buffer of input signal @ 12k8 */ + float old_inp_16k[CPE_CHANNELS][L_INP]; /* buffer of input signal @ 16kHz */ +#ifndef REMOVE_ETOT_PROPAGATION + float Etot[CPE_CHANNELS]; /* total energy; correlation shift */ +#endif float ener[CPE_CHANNELS]; /* residual energy from Levinson-Durbin */ float relE[CPE_CHANNELS]; /* frame relative energy */ float A[CPE_CHANNELS][NB_SUBFR16k * ( M + 1 )]; /* A(z) unquantized for subframes */ @@ -96,6 +98,9 @@ ivas_error ivas_cpe_enc( int16_t localVAD_HE_SAD[CPE_CHANNELS]; /* HE-SAD flag without hangover, LR channels */ float band_energies_LR[2 * NB_BANDS]; /* energy in critical bands without minimum noise floor E_MIN */ float orig_input[CPE_CHANNELS][L_FRAME48k]; +#ifdef REMOVE_ETOT_PROPAGATION + float Etot_last[CPE_CHANNELS]; +#endif int32_t tmp, input_Fs; int16_t max_bwidth, ivas_format; ENCODER_CONFIG_HANDLE hEncoderConfig; @@ -366,6 +371,10 @@ ivas_error ivas_cpe_enc( { sts[1]->bits_frame_channel -= st_ivas->hQMetaData->metadata_max_bits; } +#ifdef REMOVE_ETOT_PROPAGATION + Etot_last[0] = sts[0]->hNoiseEst->Etot_last; + Etot_last[1] = sts[1]->hNoiseEst->Etot_last; +#endif } else if ( hCPE->element_mode == IVAS_CPE_MDCT ) { @@ -431,7 +440,11 @@ ivas_error ivas_cpe_enc( for ( n = 0; n < n_CoreChannels; n++ ) { #ifdef LOW_RATE_TRANS - error = pre_proc_front_ivas( NULL, hCPE, hCPE->element_brate, nb_bits_metadata, input_frame, n, old_inp_12k8[n], old_inp_16k[n], &Etot[n], &ener[n], &relE[n], A[n], Aw[n], epsP[n], lsp_new[n], lsp_mid[n], + error = pre_proc_front_ivas( NULL, hCPE, hCPE->element_brate, nb_bits_metadata, input_frame, n, old_inp_12k8[n], old_inp_16k[n], +#ifndef REMOVE_ETOT_PROPAGATION + &Etot[n], +#endif + &ener[n], &relE[n], A[n], Aw[n], epsP[n], lsp_new[n], lsp_mid[n], &vad_hover_flag[n], &attack_flag[n], realBuffer[n], imagBuffer[n], old_wsp[n], pitch_fr[n], voicing_fr[n], &loc_harm[n], &cor_map_sum[n], &vad_flag_dtx[n], enerBuffer[n], fft_buff[n], A[0], lsp_new[0], currFlatness[n], tdm_ratio_idx, fr_bands, Etot_LR, lf_E, localVAD_HE_SAD, band_energies_LR, 0, st_ivas->hSpar != NULL ? st_ivas->hSpar->front_vad_flag : 0, 0, 0, ivas_total_brate, st_ivas->hEncoderConfig->ivas_format ); @@ -523,7 +536,11 @@ ivas_error ivas_cpe_enc( { tdm_ol_pitch_comparison( hCPE, pitch_fr, voicing_fr ); +#ifdef REMOVE_ETOT_PROPAGATION + tdm_configure_enc( hCPE, Etot_last, tdm_SM_or_LRTD_Pri, tdm_ratio_idx, tdm_ratio_idx_SM, attack_flag[0], nb_bits_metadata ); +#else tdm_configure_enc( hCPE, tdm_SM_or_LRTD_Pri, tdm_ratio_idx, tdm_ratio_idx_SM, attack_flag[0], nb_bits_metadata ); +#endif if ( hEncoderConfig->Opt_DTX_ON ) { @@ -624,7 +641,11 @@ ivas_error ivas_cpe_enc( * Core Encoder *----------------------------------------------------------------*/ +#ifdef REMOVE_ETOT_PROPAGATION + if ( ( error = ivas_core_enc( NULL, hCPE, st_ivas->hMCT, n_CoreChannels, old_inp_12k8, old_inp_16k, ener, A, Aw, epsP, lsp_new, lsp_mid, vad_hover_flag, attack_flag, realBuffer, imagBuffer, old_wsp, loc_harm, cor_map_sum, vad_flag_dtx, enerBuffer, fft_buff, tdm_SM_or_LRTD_Pri, ivas_format, 0 ) ) != IVAS_ERR_OK ) +#else if ( ( error = ivas_core_enc( NULL, hCPE, st_ivas->hMCT, n_CoreChannels, old_inp_12k8, old_inp_16k, Etot, ener, A, Aw, epsP, lsp_new, lsp_mid, vad_hover_flag, attack_flag, realBuffer, imagBuffer, old_wsp, loc_harm, cor_map_sum, vad_flag_dtx, enerBuffer, fft_buff, tdm_SM_or_LRTD_Pri, ivas_format, 0 ) ) != IVAS_ERR_OK ) +#endif { return error; } diff --git a/lib_enc/ivas_front_vad.c b/lib_enc/ivas_front_vad.c index be54c3fc7e83c1509e665b89db88c3919f61f9d3..30515751501cea479090e5ef7b59a4bd8808d7f7 100644 --- a/lib_enc/ivas_front_vad.c +++ b/lib_enc/ivas_front_vad.c @@ -472,11 +472,15 @@ ivas_error front_vad_spar( /* 1st stage speech/music classification (GMM model) */ /* run only to get 'high_lpn_flag' parameter */ - ivas_smc_gmm( st, NULL /* <-- hStereoClassif */, localVAD_HE_SAD[0], *Etot, lsp_new, cor_map_sum, epsP, PS, non_staX, relE, &high_lpn_flag, flag_spitch ); + ivas_smc_gmm( st, NULL, localVAD_HE_SAD[0], Etot[0], lsp_new, cor_map_sum, epsP, PS, non_staX, relE, &high_lpn_flag, flag_spitch ); /* long-term energy update */ +#ifdef REMOVE_ETOT_PROPAGATION + long_enr( st, -1, localVAD_HE_SAD[0], high_lpn_flag, &hFrontVad, 1, localVAD_HE_SAD, Etot ); +#else long_enr( st, *Etot, localVAD_HE_SAD[0], high_lpn_flag, &hFrontVad, 1, localVAD_HE_SAD, Etot ); hFrontVad->hNoiseEst->Etot_last = Etot[0]; +#endif /* increase ini_frame counter */ hFrontVad->ini_frame = min( hFrontVad->ini_frame + 1, MAX_FRAME_COUNTER ); diff --git a/lib_enc/ivas_ism_enc.c b/lib_enc/ivas_ism_enc.c index b45fc8f7f125a817a79326adb3be1fd19dd4916f..4cbfa6acc8eddfcb0af3ef2d45b5d3a7d156ede0 100644 --- a/lib_enc/ivas_ism_enc.c +++ b/lib_enc/ivas_ism_enc.c @@ -58,10 +58,12 @@ ivas_error ivas_ism_enc( SCE_ENC_HANDLE hSCE; Encoder_State *st; int16_t sce_id; - float old_inp_12k8[MAX_NUM_OBJECTS][1][L_INP_12k8]; /* buffer of input signal @ 12k8 */ - float old_inp_16k[MAX_NUM_OBJECTS][1][L_INP]; /* buffer of input signal @ 16kHz */ - int16_t vad_flag[MAX_NUM_OBJECTS]; /* VAD flag */ - float Etot[MAX_NUM_OBJECTS][1]; /* total energy; correlation shift */ + float old_inp_12k8[MAX_NUM_OBJECTS][1][L_INP_12k8]; /* buffer of input signal @ 12k8 */ + float old_inp_16k[MAX_NUM_OBJECTS][1][L_INP]; /* buffer of input signal @ 16kHz */ + int16_t vad_flag[MAX_NUM_OBJECTS]; /* VAD flag */ +#ifndef REMOVE_ETOT_PROPAGATION + float Etot[MAX_NUM_OBJECTS][1]; /* total energy; correlation shift */ +#endif float ener[MAX_NUM_OBJECTS][1]; /* residual energy from Levinson-Durbin */ float relE[MAX_NUM_OBJECTS][1]; /* frame relative energy */ float A[MAX_NUM_OBJECTS][1][NB_SUBFR16k * ( M + 1 )]; /* A(z) unquantized for subframes */ @@ -149,13 +151,19 @@ ivas_error ivas_ism_enc( #ifdef LOW_RATE_TRANS error = pre_proc_front_ivas( hSCE, NULL, hSCE->element_brate, nb_bits_metadata[sce_id], input_frame, 0, old_inp_12k8[sce_id][0], old_inp_16k[sce_id][0], - &Etot[sce_id][0], &ener[sce_id][0], &relE[sce_id][0], A[sce_id][0], Aw[sce_id][0], epsP[sce_id][0], lsp_new[sce_id][0], lsp_mid[sce_id][0], +#ifndef REMOVE_ETOT_PROPAGATION + &Etot[sce_id][0], +#endif + &ener[sce_id][0], &relE[sce_id][0], A[sce_id][0], Aw[sce_id][0], epsP[sce_id][0], lsp_new[sce_id][0], lsp_mid[sce_id][0], &vad_hover_flag[sce_id][0], &attack_flag[sce_id][0], realBuffer[sce_id][0], imagBuffer[sce_id][0], old_wsp[sce_id][0], pitch_fr[sce_id][0], voicing_fr[sce_id][0], &loc_harm[sce_id][0], &cor_map_sum[sce_id][0], &vad_flag_dtx[sce_id][0], enerBuffer[sce_id][0], fft_buff[sce_id][0], A[sce_id][0], lsp_new[sce_id][0], currFlatness[0], 0, fr_bands, Etot_LR, lf_E, localVAD_HE_SAD, NULL, 0, 0, 0, 0, st_ivas->hEncoderConfig->ivas_total_brate, st_ivas->hEncoderConfig->ivas_format ); #else error = pre_proc_front_ivas( hSCE, NULL, hSCE->element_brate, nb_bits_metadata[sce_id], input_frame, 0, old_inp_12k8[sce_id][0], old_inp_16k[sce_id][0], - &Etot[sce_id][0], &ener[sce_id][0], &relE[sce_id][0], A[sce_id][0], Aw[sce_id][0], epsP[sce_id][0], lsp_new[sce_id][0], lsp_mid[sce_id][0], +#ifndef REMOVE_ETOT_PROPAGATION + &Etot[sce_id][0], +#endif + &ener[sce_id][0], &relE[sce_id][0], A[sce_id][0], Aw[sce_id][0], epsP[sce_id][0], lsp_new[sce_id][0], lsp_mid[sce_id][0], &vad_hover_flag[sce_id][0], &attack_flag[sce_id][0], realBuffer[sce_id][0], imagBuffer[sce_id][0], old_wsp[sce_id][0], pitch_fr[sce_id][0], voicing_fr[sce_id][0], &loc_harm[sce_id][0], &cor_map_sum[sce_id][0], &vad_flag_dtx[sce_id][0], enerBuffer[sce_id][0], fft_buff[sce_id][0], A[sce_id][0], lsp_new[sce_id][0], currFlatness[0], 0, fr_bands, Etot_LR, lf_E, localVAD_HE_SAD, NULL, 0, 0, 0, 0, st_ivas->hEncoderConfig->ivas_total_brate ); @@ -271,7 +279,11 @@ ivas_error ivas_ism_enc( * Encoder *----------------------------------------------------------------*/ +#ifdef REMOVE_ETOT_PROPAGATION + if ( ( error = ivas_core_enc( hSCE, NULL, NULL, 1, old_inp_12k8[sce_id], old_inp_16k[sce_id], ener[sce_id], A[sce_id], Aw[sce_id], epsP[sce_id], lsp_new[sce_id], lsp_mid[sce_id], vad_hover_flag[sce_id], attack_flag[sce_id], realBuffer[sce_id], imagBuffer[sce_id], old_wsp[sce_id], loc_harm[sce_id], cor_map_sum[sce_id], vad_flag_dtx[sce_id], enerBuffer[sce_id], fft_buff[sce_id], 0, ISM_FORMAT, 0 ) ) != IVAS_ERR_OK ) +#else if ( ( error = ivas_core_enc( hSCE, NULL, NULL, 1, old_inp_12k8[sce_id], old_inp_16k[sce_id], Etot[sce_id], ener[sce_id], A[sce_id], Aw[sce_id], epsP[sce_id], lsp_new[sce_id], lsp_mid[sce_id], vad_hover_flag[sce_id], attack_flag[sce_id], realBuffer[sce_id], imagBuffer[sce_id], old_wsp[sce_id], loc_harm[sce_id], cor_map_sum[sce_id], vad_flag_dtx[sce_id], enerBuffer[sce_id], fft_buff[sce_id], 0, ISM_FORMAT, 0 ) ) != IVAS_ERR_OK ) +#endif { return error; } diff --git a/lib_enc/ivas_mct_enc.c b/lib_enc/ivas_mct_enc.c index 3fa3ad04bdd84817d1f756275cac4ac8611d321a..4d76e9e5b21b046a21bb8637ea4c53204e33f66e 100755 --- a/lib_enc/ivas_mct_enc.c +++ b/lib_enc/ivas_mct_enc.c @@ -195,6 +195,7 @@ ivas_error ivas_mct_enc( for ( n = 0; n < CPE_CHANNELS; n++ ) { mvr2r( hCPE->hCoreCoder[n]->input, hCPE->hCoreCoder[n]->old_input_signal, input_frame ); + #ifdef MC_BITRATE_SWITCHING /* common encoder updates */ updt_enc_common( hCPE->hCoreCoder[n] ); diff --git a/lib_enc/ivas_sce_enc.c b/lib_enc/ivas_sce_enc.c index cdd37d8ece0415ab607036f9582d57f11defd3b4..a7d41952492e3142c32ec3b996511e0ec2716344 100644 --- a/lib_enc/ivas_sce_enc.c +++ b/lib_enc/ivas_sce_enc.c @@ -58,9 +58,11 @@ ivas_error ivas_sce_enc( const int16_t nb_bits_metadata /* i : number of metadata bits */ ) { - float old_inp_12k8[1][L_INP_12k8]; /* buffer of input signal @ 12k8 */ - float old_inp_16k[1][L_INP]; /* buffer of input signal @ 16kHz */ - float Etot[1]; /* total energy; correlation shift */ + float old_inp_12k8[1][L_INP_12k8]; /* buffer of input signal @ 12k8 */ + float old_inp_16k[1][L_INP]; /* buffer of input signal @ 16kHz */ +#ifndef REMOVE_ETOT_PROPAGATION + float Etot[1]; /* total energy; correlation shift */ +#endif float ener[1]; /* residual energy from Levinson-Durbin */ float relE[1]; /* frame relative energy */ float A[1][NB_SUBFR16k * ( M + 1 )]; /* A(z) unquantized for subframes */ @@ -187,14 +189,20 @@ ivas_error ivas_sce_enc( #ifdef LOW_RATE_TRANS error = pre_proc_front_ivas( hSCE, NULL, hSCE->element_brate, nb_bits_metadata, input_frame, 0, old_inp_12k8[0], old_inp_16k[0], - &Etot[0], &ener[0], &relE[0], A[0], Aw[0], epsP[0], lsp_new[0], lsp_mid[0], +#ifndef REMOVE_ETOT_PROPAGATION + &Etot[0], +#endif + &ener[0], &relE[0], A[0], Aw[0], epsP[0], lsp_new[0], lsp_mid[0], &vad_hover_flag[0], &attack_flag[0], realBuffer[0], imagBuffer[0], old_wsp[0], pitch_fr[0], voicing_fr[0], &loc_harm[0], &cor_map_sum[0], &vad_flag_dtx[0], enerBuffer[0], fft_buff[0], A[0], lsp_new[0], currFlatness[0], 0, fr_bands, Etot_LR, lf_E, localVAD_HE_SAD, NULL, flag_16k_smc, st_ivas->hSpar != NULL ? st_ivas->hSpar->front_vad_flag : 0, st_ivas->hSpar != NULL ? st_ivas->hSpar->force_front_vad : 0, st_ivas->hSpar != NULL ? st_ivas->hSpar->front_vad_dtx_flag : 0, st_ivas->hEncoderConfig->ivas_total_brate, st_ivas->hEncoderConfig->ivas_format ); #else error = pre_proc_front_ivas( hSCE, NULL, hSCE->element_brate, nb_bits_metadata, input_frame, 0, old_inp_12k8[0], old_inp_16k[0], - &Etot[0], &ener[0], &relE[0], A[0], Aw[0], epsP[0], lsp_new[0], lsp_mid[0], +#ifndef REMOVE_ETOT_PROPAGATION + &Etot[0], +#endif + &ener[0], &relE[0], A[0], Aw[0], epsP[0], lsp_new[0], lsp_mid[0], &vad_hover_flag[0], &attack_flag[0], realBuffer[0], imagBuffer[0], old_wsp[0], pitch_fr[0], voicing_fr[0], &loc_harm[0], &cor_map_sum[0], &vad_flag_dtx[0], enerBuffer[0], fft_buff[0], A[0], lsp_new[0], currFlatness[0], 0, fr_bands, Etot_LR, lf_E, localVAD_HE_SAD, NULL, flag_16k_smc, st_ivas->hSpar != NULL ? st_ivas->hSpar->front_vad_flag : 0, st_ivas->hSpar != NULL ? st_ivas->hSpar->force_front_vad : 0, st_ivas->hSpar != NULL ? st_ivas->hSpar->front_vad_dtx_flag : 0, @@ -251,7 +259,11 @@ ivas_error ivas_sce_enc( * Encoder *----------------------------------------------------------------*/ +#ifdef REMOVE_ETOT_PROPAGATION + if ( ( error = ivas_core_enc( hSCE, NULL, NULL, 1, old_inp_12k8, old_inp_16k, ener, A, Aw, epsP, lsp_new, lsp_mid, vad_hover_flag, attack_flag, realBuffer, imagBuffer, old_wsp, loc_harm, cor_map_sum, vad_flag_dtx, enerBuffer, fft_buff, 0, ivas_format, flag_16k_smc ) ) != IVAS_ERR_OK ) +#else if ( ( error = ivas_core_enc( hSCE, NULL, NULL, 1, old_inp_12k8, old_inp_16k, Etot, ener, A, Aw, epsP, lsp_new, lsp_mid, vad_hover_flag, attack_flag, realBuffer, imagBuffer, old_wsp, loc_harm, cor_map_sum, vad_flag_dtx, enerBuffer, fft_buff, 0, ivas_format, flag_16k_smc ) ) != IVAS_ERR_OK ) +#endif { return error; } diff --git a/lib_enc/ivas_stereo_td_enc.c b/lib_enc/ivas_stereo_td_enc.c index 1e7fe30bd1d7ba6800a3ac0cf631b37aa725fa26..bc454f0220c6ce62b43f2a36e2da4b70eccf64f2 100644 --- a/lib_enc/ivas_stereo_td_enc.c +++ b/lib_enc/ivas_stereo_td_enc.c @@ -302,7 +302,10 @@ ivas_error stereo_set_tdm( *-------------------------------------------------------------------*/ void tdm_configure_enc( - CPE_ENC_HANDLE hCPE, /* i : CPE encoder structure */ + CPE_ENC_HANDLE hCPE, /* i : CPE encoder structure */ +#ifdef REMOVE_ETOT_PROPAGATION + const float Etot_last[CPE_CHANNELS], /* i/o: Energy of last frame */ +#endif const int16_t tdm_SM_or_LRTD_Pri, /* i : channel combination scheme flag in TD stereo OR LRTD primary channel */ const int16_t tdm_ratio_idx, /* i : ratio index */ const int16_t tdm_ratio_idx_SM, /* i : ratio index in SM mode */ @@ -326,11 +329,21 @@ void tdm_configure_enc( *----------------------------------------------------------------*/ hStereoTD->tdm_use_IAWB_Ave_lpc = 0; /* Flag initialisation */ +#ifdef REMOVE_ETOT_PROPAGATION + sts[0]->hSpMusClas->tdm_lt_Etot = 0.1f * Etot_last[0] + 0.9f * sts[0]->hSpMusClas->tdm_lt_Etot; + sts[1]->hSpMusClas->tdm_lt_Etot = 0.1f * Etot_last[1] + 0.9f * sts[1]->hSpMusClas->tdm_lt_Etot; +#else sts[0]->hSpMusClas->tdm_lt_Etot = 0.1f * sts[0]->hNoiseEst->Etot_last + 0.9f * sts[0]->hSpMusClas->tdm_lt_Etot; sts[1]->hSpMusClas->tdm_lt_Etot = 0.1f * sts[1]->hNoiseEst->Etot_last + 0.9f * sts[1]->hSpMusClas->tdm_lt_Etot; +#endif +#ifdef REMOVE_ETOT_PROPAGATION + if ( hCPE->hStereoClassif->lrtd_mode == 0 && ( ( sts[1]->hSpMusClas->tdm_lt_Etot < 0 && hCPE->hCoreCoder[1]->vad_flag == 0 ) /* very clean signal */ + || ( hCPE->hCoreCoder[1]->vad_flag == 0 || ( Etot_last[1] < 30.0f && ( sts[0]->hSpMusClas->tdm_lt_Etot - sts[1]->hSpMusClas->tdm_lt_Etot ) > 26.0f ) ) ) ) +#else if ( hCPE->hStereoClassif->lrtd_mode == 0 && ( ( sts[1]->hSpMusClas->tdm_lt_Etot < 0 && hCPE->hCoreCoder[1]->vad_flag == 0 ) /* very clean signal */ || ( hCPE->hCoreCoder[1]->vad_flag == 0 || ( sts[1]->hNoiseEst->Etot_last < 30.0f && ( sts[0]->hSpMusClas->tdm_lt_Etot - sts[1]->hSpMusClas->tdm_lt_Etot ) > 26.0f ) ) ) ) +#endif { sts[1]->coder_type = INACTIVE; @@ -340,7 +353,11 @@ void tdm_configure_enc( } hStereoTD->tdm_lp_reuse_flag = 1; } +#ifdef REMOVE_ETOT_PROPAGATION + else if ( ( ( hCPE->hCoreCoder[1]->vad_flag == 0 ) || ( hCPE->hCoreCoder[0]->vad_flag == 0 && Etot_last[1] < 30.0f && ( sts[0]->hSpMusClas->tdm_lt_Etot - sts[1]->hSpMusClas->tdm_lt_Etot ) > 26.0f ) ) && ( hCPE->hStereoClassif->lrtd_mode == 1 ) /* && NO_DTX */ ) /* boths channels are inactive but not DTX used*/ +#else else if ( ( ( hCPE->hCoreCoder[1]->vad_flag == 0 ) || ( hCPE->hCoreCoder[0]->vad_flag == 0 && sts[1]->hNoiseEst->Etot_last < 30.0f && ( sts[0]->hSpMusClas->tdm_lt_Etot - sts[1]->hSpMusClas->tdm_lt_Etot ) > 26.0f ) ) && ( hCPE->hStereoClassif->lrtd_mode == 1 ) /* && NO_DTX */ ) /* boths channels are inactive but not DTX used*/ +#endif { sts[1]->coder_type = INACTIVE; if ( tdm_ratio_idx > 1 && tdm_ratio_idx < 29 ) @@ -352,7 +369,11 @@ void tdm_configure_enc( hStereoTD->tdm_lp_reuse_flag = 1; } } +#ifdef REMOVE_ETOT_PROPAGATION + else if ( !( sts[1]->sp_aud_decision0 ) && sts[1]->tc_cnt <= 0 && ( sts[1]->coder_type_raw == UNVOICED || ( hStereoTD->tdm_LRTD_flag == 1 && hStereoTD->tdm_lp_reuse_flag == 0 && ( hCPE->hCoreCoder[1]->vad_flag == 0 || ( Etot_last[1] < 30.0f && ( sts[0]->hSpMusClas->tdm_lt_Etot - sts[1]->hSpMusClas->tdm_lt_Etot ) > 26.0f ) ) ) ) ) +#else else if ( !( sts[1]->sp_aud_decision0 ) && sts[1]->tc_cnt <= 0 && ( sts[1]->coder_type_raw == UNVOICED || ( hStereoTD->tdm_LRTD_flag == 1 && hStereoTD->tdm_lp_reuse_flag == 0 && ( hCPE->hCoreCoder[1]->vad_flag == 0 || ( sts[1]->hNoiseEst->Etot_last < 30.0f && ( sts[0]->hSpMusClas->tdm_lt_Etot - sts[1]->hSpMusClas->tdm_lt_Etot ) > 26.0f ) ) ) ) ) +#endif { sts[1]->coder_type = UNVOICED; if ( hStereoTD->tdm_LRTD_flag == 1 ) diff --git a/lib_enc/long_enr.c b/lib_enc/long_enr.c index 3dc098af6193abf26bed2f8374d0417432204bc5..68128b5d8ae10b3c89f002a9d0f90fc6428c2df8 100644 --- a/lib_enc/long_enr.c +++ b/lib_enc/long_enr.c @@ -67,6 +67,7 @@ void long_enr( * Compute long term estimate of total noise energy * and total active speech energy *-----------------------------------------------------------------*/ + if ( hFrontVad != NULL ) { if ( hFrontVad[0]->ini_frame < 4 ) @@ -114,6 +115,14 @@ void long_enr( } } } + +#ifdef REMOVE_ETOT_PROPAGATION + /* Update */ + for ( n = 0; n < n_chan; n++ ) + { + hFrontVad[n]->hNoiseEst->Etot_last = Etot_LR[n]; + } +#endif } else { @@ -150,6 +159,11 @@ void long_enr( } } } + +#ifdef REMOVE_ETOT_PROPAGATION + /* Update */ + st->hNoiseEst->Etot_last = Etot; +#endif } return; diff --git a/lib_enc/nois_est.c b/lib_enc/nois_est.c index ad07d46945ce5c8a77b462a7f57113beb089cb8a..62330f71536bc822d79314c92ab9ed31b3fbc4d3 100644 --- a/lib_enc/nois_est.c +++ b/lib_enc/nois_est.c @@ -298,6 +298,7 @@ void noise_est_down( *------------------------------------------------------------------*/ Etot_v = (float) fabs( *Etot_last - Etot ); + *Etot_v_h2 = ( 1.0f - 0.02f ) * *Etot_v_h2 + 0.02f * min( 3.0f, Etot_v ); if ( *Etot_v_h2 < 0.1f ) { diff --git a/lib_enc/pre_proc.c b/lib_enc/pre_proc.c index 33728a6f40a3cfe6ca36343d379b31c2ffc2e9d3..6eb11ca68ed1383e3170674bec744974db0fe5c8 100644 --- a/lib_enc/pre_proc.c +++ b/lib_enc/pre_proc.c @@ -59,8 +59,10 @@ void pre_proc( float old_inp_16k[], /* i/o: buffer of old input signal @ 16kHz */ float **inp, /* o : ptr. to inp. signal in the current frame*/ float fr_bands[2 * NB_BANDS], /* i : energy in frequency bands */ - float *Etot, /* i : total energy */ - float *ener, /* o : residual energy from Levinson-Durbin */ +#ifndef REMOVE_ETOT_PROPAGATION + float *Etot, /* i : total energy */ +#endif + float *ener, /* o : residual energy from Levinson-Durbin */ #ifndef FIX_I4_OL_PITCH int16_t pitch_orig[3], /* o : open-loop pitch values for quantization */ #endif @@ -84,18 +86,21 @@ void pre_proc( float old_wsp[L_WSP], *wsp; /* weighted input signal buffer */ float pitch_fr[NB_SUBFR]; /* fractional pitch values */ float voicing_fr[NB_SUBFR]; /* fractional pitch gains */ - float lf_E[2 * VOIC_BINS]; /* per bin spectrum energy in lf */ - float tmpN[NB_BANDS]; /* Temporary noise update */ - float tmpE[NB_BANDS]; /* Temporary averaged energy of 2 sf. */ - float ee[2]; /* Spectral tilt */ - float corr_shift; /* correlation shift */ - float relE; /* frame relative energy */ - int16_t loc_harm; /* harmonicity flag */ - float cor_map_sum, sp_div, PS[128]; /* speech/music clasif. parameters */ - int16_t L_look; /* length of look-ahead */ - float snr_sum_he; /* HE SAD parameters */ - int16_t localVAD_HE_SAD; /* HE SAD parameters */ - int16_t vad_flag_dtx; /* HE-SAD flag with additional DTX HO */ +#ifdef REMOVE_ETOT_PROPAGATION + float Etot; /* total energy */ +#endif + float lf_E[2 * VOIC_BINS]; /* per bin spectrum energy in lf */ + float tmpN[NB_BANDS]; /* Temporary noise update */ + float tmpE[NB_BANDS]; /* Temporary averaged energy of 2 sf. */ + float ee[2]; /* Spectral tilt */ + float corr_shift; /* correlation shift */ + float relE; /* frame relative energy */ + int16_t loc_harm; /* harmonicity flag */ + float cor_map_sum, sp_div, PS[128]; /* speech/music clasif. parameters */ + int16_t L_look; /* length of look-ahead */ + float snr_sum_he; /* HE SAD parameters */ + int16_t localVAD_HE_SAD; /* HE SAD parameters */ + int16_t vad_flag_dtx; /* HE-SAD flag with additional DTX HO */ int16_t vad_flag_cldfb; float old_cor; float hp_E[2]; /* Energy in HF */ @@ -190,14 +195,24 @@ void pre_proc( * Spectral analysis *--------------------------------------------------------------------------*/ +#ifdef REMOVE_ETOT_PROPAGATION + analy_sp( -1, NULL, st->input_Fs, inp_12k8, st->Bin_E, st->Bin_E_old, fr_bands, lf_E, &Etot, st->min_band, st->max_band, band_energies, PS, fft_buff ); +#else analy_sp( -1, NULL, st->input_Fs, inp_12k8, st->Bin_E, st->Bin_E_old, fr_bands, lf_E, Etot, st->min_band, st->max_band, band_energies, PS, fft_buff ); +#endif /*----------------------------------------------------------------* * SAD (1-signal, 0-noise) *----------------------------------------------------------------*/ +#ifdef REMOVE_ETOT_PROPAGATION + noise_est_pre( Etot, st->ini_frame, st->hNoiseEst, 0, EVS_MONO, EVS_MONO ); +#else noise_est_pre( *Etot, st->ini_frame, st->hNoiseEst, 0, EVS_MONO, EVS_MONO ); +#endif + st->vad_flag = wb_vad( st, fr_bands, &noisy_speech_HO, &clean_speech_HO, &NB_speech_HO, &snr_sum_he, &localVAD_HE_SAD, &( st->flag_noisy_speech_snr ), NULL, NULL, -1000.0f, -1000.0f ); + vad_flag_cldfb = vad_proc( realBuffer, imagBuffer, enerBuffer, st->cldfbAnaEnc->no_channels, st->hVAD_CLDFB, &cldfb_addition, st->vad_flag ); if ( st->Pos_relE_cnt < 20 ) /* Ensure the level is high enough and cldfb decision is reliable */ @@ -226,9 +241,15 @@ void pre_proc( * Correlation correction as a function of total noise level *----------------------------------------------------------------*/ +#ifdef REMOVE_ETOT_PROPAGATION + noise_est_down( fr_bands, st->hNoiseEst->bckr, tmpN, tmpE, st->min_band, st->max_band, &st->hNoiseEst->totalNoise, Etot, &st->hNoiseEst->Etot_last, &st->hNoiseEst->Etot_v_h2 ); + + relE = Etot - st->lp_speech; +#else noise_est_down( fr_bands, st->hNoiseEst->bckr, tmpN, tmpE, st->min_band, st->max_band, &st->hNoiseEst->totalNoise, *Etot, &st->hNoiseEst->Etot_last, &st->hNoiseEst->Etot_v_h2 ); relE = *Etot - st->lp_speech; +#endif if ( relE > 1.5f ) { @@ -354,7 +375,11 @@ void pre_proc( * Update estimated noise energy and voicing cut-off frequency *-----------------------------------------------------------------*/ +#ifdef REMOVE_ETOT_PROPAGATION + noise_est( st, old_pitch1, tmpN, epsP, Etot, relE, corr_shift, tmpE, fr_bands, &cor_map_sum, NULL, &sp_div, &non_staX, &loc_harm, lf_E, &st->hNoiseEst->harm_cor_cnt, st->hNoiseEst->Etot_l_lp, &sp_floor, 0, NULL, NULL, st->ini_frame ); +#else noise_est( st, old_pitch1, tmpN, epsP, *Etot, relE, corr_shift, tmpE, fr_bands, &cor_map_sum, NULL, &sp_div, &non_staX, &loc_harm, lf_E, &st->hNoiseEst->harm_cor_cnt, st->hNoiseEst->Etot_l_lp, &sp_floor, 0, NULL, NULL, st->ini_frame ); +#endif /*------------------------------------------------------------------* * Update parameters used in the VAD and DTX @@ -369,7 +394,11 @@ void pre_proc( find_tilt( fr_bands, st->hNoiseEst->bckr, ee, st->pitch, st->voicing, lf_E, corr_shift, st->input_bwidth, st->max_band, hp_E, st->codec_mode, &( st->bckr_tilt_lt ), st->Opt_SC_VBR ); +#ifdef REMOVE_ETOT_PROPAGATION + st->coder_type = find_uv( st, pitch_fr, voicing_fr, inp_12k8, ee, NULL, corr_shift, relE, Etot, hp_E, &flag_spitch, last_core_orig, NULL ); +#else st->coder_type = find_uv( st, pitch_fr, voicing_fr, inp_12k8, ee, NULL, corr_shift, relE, *Etot, hp_E, &flag_spitch, last_core_orig, NULL ); +#endif /*-----------------------------------------------------------------* * channel aware mode configuration * @@ -423,8 +452,15 @@ void pre_proc( st->GSC_IVAS_mode = 0; +#ifdef REMOVE_ETOT_PROPAGATION + speech_music_classif( st, new_inp_12k8, inp_12k8, localVAD_HE_SAD, lsp_new, cor_map_sum, epsP, PS, Etot, old_cor, attack_flag, non_staX, relE, &high_lpn_flag, flag_spitch ); + + long_enr( st, Etot, localVAD_HE_SAD, high_lpn_flag, NULL, 1, NULL, NULL ); +#else speech_music_classif( st, new_inp_12k8, inp_12k8, localVAD_HE_SAD, lsp_new, cor_map_sum, epsP, PS, *Etot, old_cor, attack_flag, non_staX, relE, &high_lpn_flag, flag_spitch ); + long_enr( st, *Etot, localVAD_HE_SAD, high_lpn_flag, NULL, 1, NULL, NULL ); +#endif /*----------------------------------------------------------------* * Final VAD correction ( when HE-SAD is used instead of the normal VAD, @@ -576,7 +612,11 @@ void pre_proc( } if ( st->total_brate == ACELP_13k20 && st->bwidth != FB ) { +#ifdef REMOVE_ETOT_PROPAGATION + MDCT_selector( st, sp_floor, Etot, cor_map_sum, enerBuffer ); +#else MDCT_selector( st, sp_floor, *Etot, cor_map_sum, enerBuffer ); +#endif } } else @@ -872,7 +912,11 @@ void pre_proc( if ( st->total_brate == ACELP_16k40 && st->bwidth != FB ) { +#ifdef REMOVE_ETOT_PROPAGATION + MDCT_selector( st, sp_floor, Etot, cor_map_sum, enerBuffer ); +#else MDCT_selector( st, sp_floor, *Etot, cor_map_sum, enerBuffer ); +#endif } } else