From 444b72d4a627370f2dc510a3d7ab45f36b35d936 Mon Sep 17 00:00:00 2001 From: vaclav Date: Thu, 15 Dec 2022 18:30:10 +0100 Subject: [PATCH 1/3] Do not propagate Etot parameter */; under REMOVE_ETOT_PROPAGATION --- lib_com/ivas_prot.h | 7 +++ lib_com/options.h | 2 + lib_com/prot.h | 13 ++++-- lib_enc/amr_wb_enc.c | 4 ++ lib_enc/evs_enc.c | 38 ++++++++++----- lib_enc/ivas_core_enc.c | 20 +++++--- lib_enc/ivas_core_pre_proc_front.c | 75 +++++++++++++++++++++++++----- lib_enc/ivas_cpe_enc.c | 30 ++++++++++-- lib_enc/ivas_ism_enc.c | 19 ++++++-- lib_enc/ivas_sce_enc.c | 17 +++++-- lib_enc/ivas_stereo_td_enc.c | 23 ++++++++- lib_enc/nois_est.c | 4 ++ lib_enc/pre_proc.c | 72 ++++++++++++++++++++++------ lib_enc/updt_enc.c | 9 +++- 14 files changed, 269 insertions(+), 64 deletions(-) diff --git a/lib_com/ivas_prot.h b/lib_com/ivas_prot.h index 120042c891..08cc2effde 100644 --- a/lib_com/ivas_prot.h +++ b/lib_com/ivas_prot.h @@ -183,7 +183,9 @@ ivas_error pre_proc_front_ivas( const int16_t n, /* i : channel number */ float old_inp_12k8[], /* o : buffer of old input signal */ float old_inp_16k[], /* o : buffer of old input signal @16kHz */ +#ifndef REMOVE_ETOT_PROPAGATION float *Etot, /* o : total energy */ +#endif float *ener, /* o : residual energy from Levinson-Durbin */ float *relE, /* o : frame relative energy */ float A[NB_SUBFR16k * ( M + 1 )], /* o : A(z) unquantized for the 4 subframes */ @@ -413,7 +415,9 @@ ivas_error ivas_core_enc( const int16_t n_CoreChannels, /* i : number of core channels to be coded */ float old_inp_12k8[CPE_CHANNELS][L_INP_12k8], /* i : buffer of old input signal */ float old_inp_16k[CPE_CHANNELS][L_INP], /* i : buffer of old input signal */ +#ifndef REMOVE_ETOT_PROPAGATION const float Etot[CPE_CHANNELS], /* i : total energy */ +#endif float ener[CPE_CHANNELS], /* i : residual energy from Levinson-Durbin */ float A[CPE_CHANNELS][NB_SUBFR16k * ( M + 1 )], /* i : A(z) unquantized for the 4 subframes */ float Aw[CPE_CHANNELS][NB_SUBFR16k * ( M + 1 )], /* i : weighted A(z) unquantized for subframes */ @@ -1713,6 +1717,9 @@ void tdm_ol_pitch_comparison( void tdm_configure_enc( CPE_ENC_HANDLE hCPE, /* i : CPE encoder structure */ +#ifdef REMOVE_ETOT_PROPAGATION + const float Etot_last[CPE_CHANNELS], /* i/o: Energy of last frame */ +#endif const int16_t tdm_SM_or_LRTD_Pri, /* i : channel combination scheme flag in TD stereo OR LRTD primary channel */ const int16_t tdm_ratio_idx, /* i : ratio index */ const int16_t tdm_ratio_idx_SM, /* i : ratio index in SM mode */ diff --git a/lib_com/options.h b/lib_com/options.h index 69cdf582f8..e4216188f8 100755 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -149,6 +149,8 @@ #define BRATE_SWITCHING_RENDERING /* Bitrate switching changes related to the renderers */ #define FIX_150 /* Issue 150: Crash in EVS mono, HQ_HARMONIC mode, related to BASOP_NOGLOB */ #define FIX_VBR_COMPLEXITY /* Issue 234: fix extremely high complexity numbers for IVAS EVS mode */ +#define REMOVE_ETOT_PROPAGATION /* Issue 251: Do not propagate Etot parameter */ + /* ################## End DEVELOPMENT switches ######################### */ /* clang-format on */ diff --git a/lib_com/prot.h b/lib_com/prot.h index 760571cb46..fb2097b0dd 100644 --- a/lib_com/prot.h +++ b/lib_com/prot.h @@ -2300,8 +2300,10 @@ void pre_proc( float old_inp_16k[], /* i/o: buffer of old input signal @ 16kHz */ float **inp, /* o : ptr. to inp. signal in the current frame*/ float fr_bands[2 * NB_BANDS], /* i : energy in frequency bands */ - float *Etot, /* i : total energy */ - float *ener, /* o : residual energy from Levinson-Durbin */ +#ifndef REMOVE_ETOT_PROPAGATION + float *Etot, /* i : total energy */ +#endif + float *ener, /* o : residual energy from Levinson-Durbin */ #ifndef FIX_I4_OL_PITCH int16_t pitch_orig[3], /* o : open-loop pitch values for quantization */ #endif @@ -3914,8 +3916,11 @@ void updt_enc( ); void updt_enc_common( - Encoder_State *st, /* i/o: encoder state structure */ - const float Etot /* i : total energy */ + Encoder_State *st /* i/o: encoder state structure */ +#ifndef REMOVE_ETOT_PROPAGATION + , + const float Etot /* i : total energy */ +#endif ); void updt_IO_switch_enc( diff --git a/lib_enc/amr_wb_enc.c b/lib_enc/amr_wb_enc.c index 3b342220e3..8d2ecc5231 100644 --- a/lib_enc/amr_wb_enc.c +++ b/lib_enc/amr_wb_enc.c @@ -531,7 +531,11 @@ void amr_wb_enc( updt_enc( st, old_exc, pitch_buf, 0, Aq, isf_new, isp_new, dummy_buf ); /* update main codec paramaters */ +#ifdef REMOVE_ETOT_PROPAGATION + updt_enc_common( st ); +#else updt_enc_common( st, Etot ); +#endif #ifdef DEBUG_MODE_INFO dbgwrite( &st->codec_mode, sizeof( int16_t ), 1, input_frame, "res/codec" ); diff --git a/lib_enc/evs_enc.c b/lib_enc/evs_enc.c index 29b610db79..5f704b75aa 100644 --- a/lib_enc/evs_enc.c +++ b/lib_enc/evs_enc.c @@ -72,18 +72,20 @@ ivas_error evs_enc( float old_inp_12k8[L_INP_12k8], *inp; /* buffer of input signal @ 12k8 */ float old_inp_16k[L_INP]; /* buffer of input signal @ 16kHz */ float fr_bands[2 * NB_BANDS]; /* energy in frequency bands */ - float Etot; /* total energy; correlation shift */ - float ener; /* residual energy from Levinson-Durbin */ - float A[NB_SUBFR16k * ( M + 1 )]; /* A(z) unquantized for subframes */ - float Aw[NB_SUBFR16k * ( M + 1 )]; /* weighted A(z) unquantized for subframes */ - float epsP[M + 1]; /* LP prediction errors */ - float lsp_new[M]; /* LSPs at the end of the frame */ - float lsp_mid[M]; /* ISPs in the middle of the frame */ - int16_t vad_hover_flag; /* VAD hangover flag */ - int16_t hq_core_type; /* HQ core type (HQ, or LR-MDCT) */ - int16_t attack_flag; /* attack flag (GSC or TC) */ - float new_inp_resamp16k[L_FRAME16k]; /* new input signal @16kHz, non pre-emphasised, used by the WB TBE/BWE */ - float old_syn_12k8_16k[L_FRAME16k]; /* ACELP core synthesis at 12.8kHz or 16kHz to be used by the SWB BWE */ +#ifndef REMOVE_ETOT_PROPAGATION + float Etot; /* total energy; correlation shift */ +#endif + float ener; /* residual energy from Levinson-Durbin */ + float A[NB_SUBFR16k * ( M + 1 )]; /* A(z) unquantized for subframes */ + float Aw[NB_SUBFR16k * ( M + 1 )]; /* weighted A(z) unquantized for subframes */ + float epsP[M + 1]; /* LP prediction errors */ + float lsp_new[M]; /* LSPs at the end of the frame */ + float lsp_mid[M]; /* ISPs in the middle of the frame */ + int16_t vad_hover_flag; /* VAD hangover flag */ + int16_t hq_core_type; /* HQ core type (HQ, or LR-MDCT) */ + int16_t attack_flag; /* attack flag (GSC or TC) */ + float new_inp_resamp16k[L_FRAME16k]; /* new input signal @16kHz, non pre-emphasised, used by the WB TBE/BWE */ + float old_syn_12k8_16k[L_FRAME16k]; /* ACELP core synthesis at 12.8kHz or 16kHz to be used by the SWB BWE */ float shb_speech[L_FRAME16k]; float hb_speech[L_FRAME16k / 4]; float new_swb_speech[L_FRAME48k]; @@ -181,10 +183,18 @@ ivas_error evs_enc( *---------------------------------------------------------------------*/ #ifdef FIX_I4_OL_PITCH +#ifdef REMOVE_ETOT_PROPAGATION + pre_proc( st, input_frame, old_inp_12k8, old_inp_16k, &inp, fr_bands, Etot, &ener, A, Aw, epsP, lsp_new, lsp_mid, &vad_hover_flag, &attack_flag, new_inp_resamp16k, &Voicing_flag, realBuffer, imagBuffer, &hq_core_type ); +#else pre_proc( st, input_frame, old_inp_12k8, old_inp_16k, &inp, fr_bands, &Etot, &ener, A, Aw, epsP, lsp_new, lsp_mid, &vad_hover_flag, &attack_flag, new_inp_resamp16k, &Voicing_flag, realBuffer, imagBuffer, &hq_core_type ); +#endif +#else +#ifdef REMOVE_ETOT_PROPAGATION + pre_proc( st, input_frame, old_inp_12k8, old_inp_16k, &inp, fr_bands, &ener, pitch_orig, A, Aw, epsP, lsp_new, lsp_mid, &vad_hover_flag, &attack_flag, new_inp_resamp16k, &Voicing_flag, realBuffer, imagBuffer, &hq_core_type ); #else pre_proc( st, input_frame, old_inp_12k8, old_inp_16k, &inp, fr_bands, &Etot, &ener, pitch_orig, A, Aw, epsP, lsp_new, lsp_mid, &vad_hover_flag, &attack_flag, new_inp_resamp16k, &Voicing_flag, realBuffer, imagBuffer, &hq_core_type ); #endif +#endif if ( st->mdct_sw == MODE2 ) @@ -509,7 +519,11 @@ ivas_error evs_enc( * Updates *---------------------------------------------------------------------*/ +#ifdef REMOVE_ETOT_PROPAGATION + updt_enc_common( st ); +#else updt_enc_common( st, Etot ); +#endif if ( st->mdct_sw == MODE1 ) { diff --git a/lib_enc/ivas_core_enc.c b/lib_enc/ivas_core_enc.c index dc3247cbfb..c3052a8577 100644 --- a/lib_enc/ivas_core_enc.c +++ b/lib_enc/ivas_core_enc.c @@ -55,13 +55,15 @@ extern float snr_[2][320]; *-------------------------------------------------------------------*/ ivas_error ivas_core_enc( - SCE_ENC_HANDLE hSCE, /* i/o: SCE encoder structure */ - CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ - MCT_ENC_HANDLE hMCT, /* i/o: MCT encoder structure */ - const int16_t n_CoreChannels, /* i : number of core channels to be coded */ - float old_inp_12k8[CPE_CHANNELS][L_INP_12k8], /* i : buffer of old input signal */ - float old_inp_16k[CPE_CHANNELS][L_INP], /* i : buffer of old input signal */ - const float Etot[CPE_CHANNELS], /* i : total energy */ + SCE_ENC_HANDLE hSCE, /* i/o: SCE encoder structure */ + CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ + MCT_ENC_HANDLE hMCT, /* i/o: MCT encoder structure */ + const int16_t n_CoreChannels, /* i : number of core channels to be coded */ + float old_inp_12k8[CPE_CHANNELS][L_INP_12k8], /* i : buffer of old input signal */ + float old_inp_16k[CPE_CHANNELS][L_INP], /* i : buffer of old input signal */ +#ifndef REMOVE_ETOT_PROPAGATION + const float Etot[CPE_CHANNELS], /* i : total energy */ +#endif float ener[CPE_CHANNELS], /* i : residual energy from Levinson-Durbin */ float A[CPE_CHANNELS][NB_SUBFR16k * ( M + 1 )], /* i : A(z) unquantized for the 4 subframes */ float Aw[CPE_CHANNELS][NB_SUBFR16k * ( M + 1 )], /* i : weighted A(z) unquantized for subframes */ @@ -409,7 +411,11 @@ ivas_error ivas_core_enc( * Common updates *---------------------------------------------------------------------*/ +#ifdef REMOVE_ETOT_PROPAGATION + updt_enc_common( st ); +#else updt_enc_common( st, Etot[n] ); +#endif } diff --git a/lib_enc/ivas_core_pre_proc_front.c b/lib_enc/ivas_core_pre_proc_front.c index 1fdb26805c..a3c0dccc76 100644 --- a/lib_enc/ivas_core_pre_proc_front.c +++ b/lib_enc/ivas_core_pre_proc_front.c @@ -60,15 +60,17 @@ static void calculate_energy_buffer( CPE_ENC_HANDLE hCPE, float enerBuffer_dft[] *--------------------------------------------------------------------*/ ivas_error pre_proc_front_ivas( - SCE_ENC_HANDLE hSCE, /* i/o: SCE encoder structure */ - CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ - const int32_t element_brate, /* i : SCE/CPE element bitrate */ - const int16_t nb_bits_metadata, /* i : number of metadata bits */ - const int16_t input_frame, /* i : frame length */ - const int16_t n, /* i : channel number */ - float old_inp_12k8[], /* o : buffer of old input signal */ - float old_inp_16k[], /* o : buffer of old input signal @16kHz */ - float *Etot, /* o : total energy */ + SCE_ENC_HANDLE hSCE, /* i/o: SCE encoder structure */ + CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ + const int32_t element_brate, /* i : SCE/CPE element bitrate */ + const int16_t nb_bits_metadata, /* i : number of metadata bits */ + const int16_t input_frame, /* i : frame length */ + const int16_t n, /* i : channel number */ + float old_inp_12k8[], /* o : buffer of old input signal */ + float old_inp_16k[], /* o : buffer of old input signal @16kHz */ +#ifndef REMOVE_ETOT_PROPAGATION + float *Etot, /* o : total energy */ +#endif float *ener, /* o : residual energy from Levinson-Durbin */ float *relE, /* o : frame relative energy */ float A[NB_SUBFR16k * ( M + 1 )], /* o : A(z) unquantized for the 4 subframes */ @@ -105,8 +107,11 @@ ivas_error pre_proc_front_ivas( const int32_t ivas_total_brate /* i : IVAS total bitrate - for setting the DTX */ ) { - float *inp_12k8, *new_inp_12k8; /* pointers to current frame and new data */ - float *wsp; /* weighted input signal buffer */ + float *inp_12k8, *new_inp_12k8; /* pointers to current frame and new data */ + float *wsp; /* weighted input signal buffer */ +#ifdef REMOVE_ETOT_PROPAGATION + float Etot; /* total energy */ +#endif float fr_bands[2 * NB_BANDS]; /* energy in frequency bands */ float lf_E[2 * VOIC_BINS]; /* per bin spectrum energy in lf */ float tmpN[NB_BANDS]; /* Temporary noise update */ @@ -408,11 +413,19 @@ ivas_error pre_proc_front_ivas( * Spectral analysis *--------------------------------------------------------------------------*/ +#ifdef REMOVE_ETOT_PROPAGATION + analy_sp( element_mode, hCPE, input_Fs, inp_12k8, st->Bin_E, st->Bin_E_old, fr_bands, lf_E, &Etot, st->min_band, st->max_band, band_energies, PS, fft_buff ); +#else analy_sp( element_mode, hCPE, input_Fs, inp_12k8, st->Bin_E, st->Bin_E_old, fr_bands, lf_E, Etot, st->min_band, st->max_band, band_energies, PS, fft_buff ); +#endif if ( hStereoClassif != NULL ) { +#ifdef REMOVE_ETOT_PROPAGATION + if ( st->lp_speech - Etot > 25 ) +#else if ( st->lp_speech - *Etot > 25 ) +#endif { hStereoClassif->silence_flag = 2; } @@ -427,7 +440,11 @@ ivas_error pre_proc_front_ivas( * SAD (1-signal, 0-noise) *----------------------------------------------------------------*/ +#ifdef REMOVE_ETOT_PROPAGATION + noise_est_pre( Etot, st->ini_frame, st->hNoiseEst, st->idchan, element_mode, hCPE != NULL ? hCPE->last_element_mode : element_mode ); +#else noise_est_pre( *Etot, st->ini_frame, st->hNoiseEst, st->idchan, element_mode, hCPE != NULL ? hCPE->last_element_mode : element_mode ); +#endif if ( element_mode == IVAS_CPE_TD && ( ( abs( hCPE->hStereoTD->tdm_last_ratio_idx - tdm_ratio_idx ) > 5 && st->idchan == 1 ) || abs( hCPE->hStereoTD->tdm_last_inst_ratio_idx - hCPE->hStereoTD->tdm_inst_ratio_idx ) > 10 ) ) { @@ -500,7 +517,11 @@ ivas_error pre_proc_front_ivas( * Correlation correction as a function of total noise level *----------------------------------------------------------------*/ +#ifdef REMOVE_ETOT_PROPAGATION + noise_est_down( fr_bands, st->hNoiseEst->bckr, tmpN, tmpE, st->min_band, st->max_band, &st->hNoiseEst->totalNoise, Etot, &st->hNoiseEst->Etot_last, &st->hNoiseEst->Etot_v_h2 ); +#else noise_est_down( fr_bands, st->hNoiseEst->bckr, tmpN, tmpE, st->min_band, st->max_band, &st->hNoiseEst->totalNoise, *Etot, &st->hNoiseEst->Etot_last, &st->hNoiseEst->Etot_v_h2 ); +#endif if ( lr_vad_enabled && st->idchan == 0 ) { @@ -510,7 +531,11 @@ ivas_error pre_proc_front_ivas( corr_shiftR = correlation_shift( hCPE->hFrontVad[1]->hNoiseEst->totalNoise ); } +#ifdef REMOVE_ETOT_PROPAGATION + *relE = Etot - st->lp_speech; +#else *relE = *Etot - st->lp_speech; +#endif corr_shift = correlation_shift( st->hNoiseEst->totalNoise ); @@ -656,8 +681,13 @@ ivas_error pre_proc_front_ivas( * Update estimated noise energy and voicing cut-off frequency *-----------------------------------------------------------------*/ +#ifdef REMOVE_ETOT_PROPAGATION + noise_est( st, old_pitch1, tmpN, epsP, Etot, *relE, corr_shift, tmpE, fr_bands, cor_map_sum, &ncharX, &sp_div, + &non_staX, loc_harm, lf_E, &st->hNoiseEst->harm_cor_cnt, st->hNoiseEst->Etot_l_lp, &dummy /*sp_floor*/, S_map, hStereoClassif, NULL, st->ini_frame ); +#else noise_est( st, old_pitch1, tmpN, epsP, *Etot, *relE, corr_shift, tmpE, fr_bands, cor_map_sum, &ncharX, &sp_div, &non_staX, loc_harm, lf_E, &st->hNoiseEst->harm_cor_cnt, st->hNoiseEst->Etot_l_lp, &dummy /*sp_floor*/, S_map, hStereoClassif, NULL, st->ini_frame ); +#endif if ( lr_vad_enabled && st->idchan == 0 ) { @@ -689,7 +719,11 @@ ivas_error pre_proc_front_ivas( find_tilt( fr_bands, st->hNoiseEst->bckr, ee, st->pitch, st->voicing, lf_E, corr_shift, st->input_bwidth, st->max_band, hp_E, MODE1, &( st->bckr_tilt_lt ), st->Opt_SC_VBR ); +#ifdef REMOVE_ETOT_PROPAGATION + st->coder_type = find_uv( st, pitch_fr, voicing_fr, inp_12k8, ee, &dE1X, corr_shift, *relE, Etot, hp_E, &flag_spitch, last_core_orig, hStereoClassif ); +#else st->coder_type = find_uv( st, pitch_fr, voicing_fr, inp_12k8, ee, &dE1X, corr_shift, *relE, *Etot, hp_E, &flag_spitch, last_core_orig, hStereoClassif ); +#endif /*-----------------------------------------------------------------* * channel aware mode configuration * @@ -725,7 +759,11 @@ ivas_error pre_proc_front_ivas( * 1st stage speech/music classification (GMM model) *----------------------------------------------------------------*/ +#ifdef REMOVE_ETOT_PROPAGATION + smc_dec = ivas_smc_gmm( st, hStereoClassif, localVAD_HE_SAD, Etot, lsp_new, *cor_map_sum, epsP, PS, non_staX, *relE, &high_lpn_flag, flag_spitch ); +#else smc_dec = ivas_smc_gmm( st, hStereoClassif, localVAD_HE_SAD, *Etot, lsp_new, *cor_map_sum, epsP, PS, non_staX, *relE, &high_lpn_flag, flag_spitch ); +#endif #ifdef DEBUGGING if ( st->idchan == 0 ) @@ -748,19 +786,28 @@ ivas_error pre_proc_front_ivas( * Update of old per-band energy spectrum *----------------------------------------------------------------*/ +#ifdef REMOVE_ETOT_PROPAGATION + long_enr( st, Etot, localVAD_HE_SAD, high_lpn_flag, NULL, 1, NULL, NULL ); +#else long_enr( st, *Etot, localVAD_HE_SAD, high_lpn_flag, NULL, 1, NULL, NULL ); +#endif mvr2r( fr_bands + NB_BANDS, st->hNoiseEst->enrO, NB_BANDS ); if ( lr_vad_enabled && st->idchan == 0 ) { +#ifdef REMOVE_ETOT_PROPAGATION + long_enr( st, Etot, localVAD_HE_SAD, high_lpn_flag, hCPE->hFrontVad, CPE_CHANNELS, localVAD_HE_SAD_LR, Etot_LR ); +#else long_enr( st, *Etot, localVAD_HE_SAD, high_lpn_flag, hCPE->hFrontVad, CPE_CHANNELS, localVAD_HE_SAD_LR, Etot_LR ); +#endif mvr2r( fr_bands_LR[0] + NB_BANDS, hCPE->hFrontVad[0]->hNoiseEst->enrO, NB_BANDS ); mvr2r( fr_bands_LR[1] + NB_BANDS, hCPE->hFrontVad[1]->hNoiseEst->enrO, NB_BANDS ); - +#ifndef REMOVE_ETOT_PROPAGATION hCPE->hFrontVad[0]->hNoiseEst->Etot_last = Etot_LR[0]; hCPE->hFrontVad[1]->hNoiseEst->Etot_last = Etot_LR[1]; +#endif } /*----------------------------------------------------------------* @@ -804,7 +851,11 @@ ivas_error pre_proc_front_ivas( } /* 2nd stage speech/music classification (ACELP/GSC/TCX core selection) */ +#ifdef REMOVE_ETOT_PROPAGATION + ivas_smc_mode_selection( st, element_brate, smc_dec, *relE, Etot, attack_flag, inp_12k8, S_map, flag_spitch ); +#else ivas_smc_mode_selection( st, element_brate, smc_dec, *relE, *Etot, attack_flag, inp_12k8, S_map, flag_spitch ); +#endif #ifdef ITD_WINNER_GAIN_MODIFY if ( element_mode == IVAS_CPE_DFT ) diff --git a/lib_enc/ivas_cpe_enc.c b/lib_enc/ivas_cpe_enc.c index b422bf8c0c..3943f291af 100644 --- a/lib_enc/ivas_cpe_enc.c +++ b/lib_enc/ivas_cpe_enc.c @@ -63,9 +63,11 @@ ivas_error ivas_cpe_enc( CPE_ENC_HANDLE hCPE; Encoder_State **sts; int16_t n, n_CoreChannels; - float old_inp_12k8[CPE_CHANNELS][L_INP_12k8]; /* buffer of input signal @ 12k8 */ - float old_inp_16k[CPE_CHANNELS][L_INP]; /* buffer of input signal @ 16kHz */ - float Etot[CPE_CHANNELS]; /* total energy; correlation shift */ + float old_inp_12k8[CPE_CHANNELS][L_INP_12k8]; /* buffer of input signal @ 12k8 */ + float old_inp_16k[CPE_CHANNELS][L_INP]; /* buffer of input signal @ 16kHz */ +#ifndef REMOVE_ETOT_PROPAGATION + float Etot[CPE_CHANNELS]; /* total energy; correlation shift */ +#endif float ener[CPE_CHANNELS]; /* residual energy from Levinson-Durbin */ float relE[CPE_CHANNELS]; /* frame relative energy */ float A[CPE_CHANNELS][NB_SUBFR16k * ( M + 1 )]; /* A(z) unquantized for subframes */ @@ -96,6 +98,9 @@ ivas_error ivas_cpe_enc( int16_t localVAD_HE_SAD[CPE_CHANNELS]; /* HE-SAD flag without hangover, LR channels */ float band_energies_LR[2 * NB_BANDS]; /* energy in critical bands without minimum noise floor E_MIN */ float orig_input[CPE_CHANNELS][L_FRAME48k]; +#ifdef REMOVE_ETOT_PROPAGATION + float Etot_last[CPE_CHANNELS]; +#endif int32_t tmp, input_Fs; int16_t max_bwidth, ivas_format; ENCODER_CONFIG_HANDLE hEncoderConfig; @@ -366,6 +371,10 @@ ivas_error ivas_cpe_enc( { sts[1]->bits_frame_channel -= st_ivas->hQMetaData->metadata_max_bits; } +#ifdef REMOVE_ETOT_PROPAGATION + Etot_last[0] = sts[0]->hNoiseEst->Etot_last; + Etot_last[1] = sts[1]->hNoiseEst->Etot_last; +#endif } else if ( hCPE->element_mode == IVAS_CPE_MDCT ) { @@ -426,10 +435,17 @@ ivas_error ivas_cpe_enc( for ( n = 0; n < n_CoreChannels; n++ ) { +#ifdef REMOVE_ETOT_PROPAGATION + error = pre_proc_front_ivas( NULL, hCPE, hCPE->element_brate, nb_bits_metadata, input_frame, n, old_inp_12k8[n], old_inp_16k[n], &ener[n], &relE[n], A[n], Aw[n], epsP[n], lsp_new[n], lsp_mid[n], + &vad_hover_flag[n], &attack_flag[n], realBuffer[n], imagBuffer[n], old_wsp[n], pitch_fr[n], voicing_fr[n], &loc_harm[n], &cor_map_sum[n], &vad_flag_dtx[n], enerBuffer[n], + fft_buff[n], A[0], lsp_new[0], currFlatness[n], tdm_ratio_idx, fr_bands, Etot_LR, lf_E, localVAD_HE_SAD, band_energies_LR, 0, st_ivas->hSpar != NULL ? st_ivas->hSpar->front_vad_flag : 0, 0, 0, + ivas_total_brate ); +#else error = pre_proc_front_ivas( NULL, hCPE, hCPE->element_brate, nb_bits_metadata, input_frame, n, old_inp_12k8[n], old_inp_16k[n], &Etot[n], &ener[n], &relE[n], A[n], Aw[n], epsP[n], lsp_new[n], lsp_mid[n], &vad_hover_flag[n], &attack_flag[n], realBuffer[n], imagBuffer[n], old_wsp[n], pitch_fr[n], voicing_fr[n], &loc_harm[n], &cor_map_sum[n], &vad_flag_dtx[n], enerBuffer[n], fft_buff[n], A[0], lsp_new[0], currFlatness[n], tdm_ratio_idx, fr_bands, Etot_LR, lf_E, localVAD_HE_SAD, band_energies_LR, 0, st_ivas->hSpar != NULL ? st_ivas->hSpar->front_vad_flag : 0, 0, 0, ivas_total_brate ); +#endif if ( error != IVAS_ERR_OK ) { return error; @@ -512,7 +528,11 @@ ivas_error ivas_cpe_enc( { tdm_ol_pitch_comparison( hCPE, pitch_fr, voicing_fr ); +#ifdef REMOVE_ETOT_PROPAGATION + tdm_configure_enc( hCPE, Etot_last, tdm_SM_or_LRTD_Pri, tdm_ratio_idx, tdm_ratio_idx_SM, attack_flag[0], nb_bits_metadata ); +#else tdm_configure_enc( hCPE, tdm_SM_or_LRTD_Pri, tdm_ratio_idx, tdm_ratio_idx_SM, attack_flag[0], nb_bits_metadata ); +#endif if ( hEncoderConfig->Opt_DTX_ON ) { @@ -613,7 +633,11 @@ ivas_error ivas_cpe_enc( * Core Encoder *----------------------------------------------------------------*/ +#ifdef REMOVE_ETOT_PROPAGATION + if ( ( error = ivas_core_enc( NULL, hCPE, st_ivas->hMCT, n_CoreChannels, old_inp_12k8, old_inp_16k, ener, A, Aw, epsP, lsp_new, lsp_mid, vad_hover_flag, attack_flag, realBuffer, imagBuffer, old_wsp, loc_harm, cor_map_sum, vad_flag_dtx, enerBuffer, fft_buff, tdm_SM_or_LRTD_Pri, ivas_format, 0 ) ) != IVAS_ERR_OK ) +#else if ( ( error = ivas_core_enc( NULL, hCPE, st_ivas->hMCT, n_CoreChannels, old_inp_12k8, old_inp_16k, Etot, ener, A, Aw, epsP, lsp_new, lsp_mid, vad_hover_flag, attack_flag, realBuffer, imagBuffer, old_wsp, loc_harm, cor_map_sum, vad_flag_dtx, enerBuffer, fft_buff, tdm_SM_or_LRTD_Pri, ivas_format, 0 ) ) != IVAS_ERR_OK ) +#endif { return error; } diff --git a/lib_enc/ivas_ism_enc.c b/lib_enc/ivas_ism_enc.c index 39d5b42a07..2e70225565 100644 --- a/lib_enc/ivas_ism_enc.c +++ b/lib_enc/ivas_ism_enc.c @@ -58,10 +58,12 @@ ivas_error ivas_ism_enc( SCE_ENC_HANDLE hSCE; Encoder_State *st; int16_t sce_id; - float old_inp_12k8[MAX_NUM_OBJECTS][1][L_INP_12k8]; /* buffer of input signal @ 12k8 */ - float old_inp_16k[MAX_NUM_OBJECTS][1][L_INP]; /* buffer of input signal @ 16kHz */ - int16_t vad_flag[MAX_NUM_OBJECTS]; /* VAD flag */ - float Etot[MAX_NUM_OBJECTS][1]; /* total energy; correlation shift */ + float old_inp_12k8[MAX_NUM_OBJECTS][1][L_INP_12k8]; /* buffer of input signal @ 12k8 */ + float old_inp_16k[MAX_NUM_OBJECTS][1][L_INP]; /* buffer of input signal @ 16kHz */ + int16_t vad_flag[MAX_NUM_OBJECTS]; /* VAD flag */ +#ifndef REMOVE_ETOT_PROPAGATION + float Etot[MAX_NUM_OBJECTS][1]; /* total energy; correlation shift */ +#endif float ener[MAX_NUM_OBJECTS][1]; /* residual energy from Levinson-Durbin */ float relE[MAX_NUM_OBJECTS][1]; /* frame relative energy */ float A[MAX_NUM_OBJECTS][1][NB_SUBFR16k * ( M + 1 )]; /* A(z) unquantized for subframes */ @@ -148,7 +150,10 @@ ivas_error ivas_ism_enc( *----------------------------------------------------------------*/ error = pre_proc_front_ivas( hSCE, NULL, hSCE->element_brate, nb_bits_metadata[sce_id], input_frame, 0, old_inp_12k8[sce_id][0], old_inp_16k[sce_id][0], - &Etot[sce_id][0], &ener[sce_id][0], &relE[sce_id][0], A[sce_id][0], Aw[sce_id][0], epsP[sce_id][0], lsp_new[sce_id][0], lsp_mid[sce_id][0], +#ifndef REMOVE_ETOT_PROPAGATION + &Etot[sce_id][0], +#endif + &ener[sce_id][0], &relE[sce_id][0], A[sce_id][0], Aw[sce_id][0], epsP[sce_id][0], lsp_new[sce_id][0], lsp_mid[sce_id][0], &vad_hover_flag[sce_id][0], &attack_flag[sce_id][0], realBuffer[sce_id][0], imagBuffer[sce_id][0], old_wsp[sce_id][0], pitch_fr[sce_id][0], voicing_fr[sce_id][0], &loc_harm[sce_id][0], &cor_map_sum[sce_id][0], &vad_flag_dtx[sce_id][0], enerBuffer[sce_id][0], fft_buff[sce_id][0], A[sce_id][0], lsp_new[sce_id][0], currFlatness[0], 0, fr_bands, Etot_LR, lf_E, localVAD_HE_SAD, NULL, 0, 0, 0, 0, st_ivas->hEncoderConfig->ivas_total_brate ); @@ -263,7 +268,11 @@ ivas_error ivas_ism_enc( * Encoder *----------------------------------------------------------------*/ +#ifdef REMOVE_ETOT_PROPAGATION + if ( ( error = ivas_core_enc( hSCE, NULL, NULL, 1, old_inp_12k8[sce_id], old_inp_16k[sce_id], ener[sce_id], A[sce_id], Aw[sce_id], epsP[sce_id], lsp_new[sce_id], lsp_mid[sce_id], vad_hover_flag[sce_id], attack_flag[sce_id], realBuffer[sce_id], imagBuffer[sce_id], old_wsp[sce_id], loc_harm[sce_id], cor_map_sum[sce_id], vad_flag_dtx[sce_id], enerBuffer[sce_id], fft_buff[sce_id], 0, ISM_FORMAT, 0 ) ) != IVAS_ERR_OK ) +#else if ( ( error = ivas_core_enc( hSCE, NULL, NULL, 1, old_inp_12k8[sce_id], old_inp_16k[sce_id], Etot[sce_id], ener[sce_id], A[sce_id], Aw[sce_id], epsP[sce_id], lsp_new[sce_id], lsp_mid[sce_id], vad_hover_flag[sce_id], attack_flag[sce_id], realBuffer[sce_id], imagBuffer[sce_id], old_wsp[sce_id], loc_harm[sce_id], cor_map_sum[sce_id], vad_flag_dtx[sce_id], enerBuffer[sce_id], fft_buff[sce_id], 0, ISM_FORMAT, 0 ) ) != IVAS_ERR_OK ) +#endif { return error; } diff --git a/lib_enc/ivas_sce_enc.c b/lib_enc/ivas_sce_enc.c index 2a74fcf68d..0472d0d833 100644 --- a/lib_enc/ivas_sce_enc.c +++ b/lib_enc/ivas_sce_enc.c @@ -58,9 +58,11 @@ ivas_error ivas_sce_enc( const int16_t nb_bits_metadata /* i : number of metadata bits */ ) { - float old_inp_12k8[1][L_INP_12k8]; /* buffer of input signal @ 12k8 */ - float old_inp_16k[1][L_INP]; /* buffer of input signal @ 16kHz */ - float Etot[1]; /* total energy; correlation shift */ + float old_inp_12k8[1][L_INP_12k8]; /* buffer of input signal @ 12k8 */ + float old_inp_16k[1][L_INP]; /* buffer of input signal @ 16kHz */ +#ifndef REMOVE_ETOT_PROPAGATION + float Etot[1]; /* total energy; correlation shift */ +#endif float ener[1]; /* residual energy from Levinson-Durbin */ float relE[1]; /* frame relative energy */ float A[1][NB_SUBFR16k * ( M + 1 )]; /* A(z) unquantized for subframes */ @@ -182,7 +184,10 @@ ivas_error ivas_sce_enc( *----------------------------------------------------------------*/ error = pre_proc_front_ivas( hSCE, NULL, hSCE->element_brate, nb_bits_metadata, input_frame, 0, old_inp_12k8[0], old_inp_16k[0], - &Etot[0], &ener[0], &relE[0], A[0], Aw[0], epsP[0], lsp_new[0], lsp_mid[0], +#ifndef REMOVE_ETOT_PROPAGATION + &Etot[0], +#endif + &ener[0], &relE[0], A[0], Aw[0], epsP[0], lsp_new[0], lsp_mid[0], &vad_hover_flag[0], &attack_flag[0], realBuffer[0], imagBuffer[0], old_wsp[0], pitch_fr[0], voicing_fr[0], &loc_harm[0], &cor_map_sum[0], &vad_flag_dtx[0], enerBuffer[0], fft_buff[0], A[0], lsp_new[0], currFlatness[0], 0, fr_bands, Etot_LR, lf_E, localVAD_HE_SAD, NULL, flag_16k_smc, st_ivas->hSpar != NULL ? st_ivas->hSpar->front_vad_flag : 0, st_ivas->hSpar != NULL ? st_ivas->hSpar->force_front_vad : 0, st_ivas->hSpar != NULL ? st_ivas->hSpar->front_vad_dtx_flag : 0, @@ -238,7 +243,11 @@ ivas_error ivas_sce_enc( * Encoder *----------------------------------------------------------------*/ +#ifdef REMOVE_ETOT_PROPAGATION + if ( ( error = ivas_core_enc( hSCE, NULL, NULL, 1, old_inp_12k8, old_inp_16k, ener, A, Aw, epsP, lsp_new, lsp_mid, vad_hover_flag, attack_flag, realBuffer, imagBuffer, old_wsp, loc_harm, cor_map_sum, vad_flag_dtx, enerBuffer, fft_buff, 0, ivas_format, flag_16k_smc ) ) != IVAS_ERR_OK ) +#else if ( ( error = ivas_core_enc( hSCE, NULL, NULL, 1, old_inp_12k8, old_inp_16k, Etot, ener, A, Aw, epsP, lsp_new, lsp_mid, vad_hover_flag, attack_flag, realBuffer, imagBuffer, old_wsp, loc_harm, cor_map_sum, vad_flag_dtx, enerBuffer, fft_buff, 0, ivas_format, flag_16k_smc ) ) != IVAS_ERR_OK ) +#endif { return error; } diff --git a/lib_enc/ivas_stereo_td_enc.c b/lib_enc/ivas_stereo_td_enc.c index 1e7fe30bd1..bc454f0220 100644 --- a/lib_enc/ivas_stereo_td_enc.c +++ b/lib_enc/ivas_stereo_td_enc.c @@ -302,7 +302,10 @@ ivas_error stereo_set_tdm( *-------------------------------------------------------------------*/ void tdm_configure_enc( - CPE_ENC_HANDLE hCPE, /* i : CPE encoder structure */ + CPE_ENC_HANDLE hCPE, /* i : CPE encoder structure */ +#ifdef REMOVE_ETOT_PROPAGATION + const float Etot_last[CPE_CHANNELS], /* i/o: Energy of last frame */ +#endif const int16_t tdm_SM_or_LRTD_Pri, /* i : channel combination scheme flag in TD stereo OR LRTD primary channel */ const int16_t tdm_ratio_idx, /* i : ratio index */ const int16_t tdm_ratio_idx_SM, /* i : ratio index in SM mode */ @@ -326,11 +329,21 @@ void tdm_configure_enc( *----------------------------------------------------------------*/ hStereoTD->tdm_use_IAWB_Ave_lpc = 0; /* Flag initialisation */ +#ifdef REMOVE_ETOT_PROPAGATION + sts[0]->hSpMusClas->tdm_lt_Etot = 0.1f * Etot_last[0] + 0.9f * sts[0]->hSpMusClas->tdm_lt_Etot; + sts[1]->hSpMusClas->tdm_lt_Etot = 0.1f * Etot_last[1] + 0.9f * sts[1]->hSpMusClas->tdm_lt_Etot; +#else sts[0]->hSpMusClas->tdm_lt_Etot = 0.1f * sts[0]->hNoiseEst->Etot_last + 0.9f * sts[0]->hSpMusClas->tdm_lt_Etot; sts[1]->hSpMusClas->tdm_lt_Etot = 0.1f * sts[1]->hNoiseEst->Etot_last + 0.9f * sts[1]->hSpMusClas->tdm_lt_Etot; +#endif +#ifdef REMOVE_ETOT_PROPAGATION + if ( hCPE->hStereoClassif->lrtd_mode == 0 && ( ( sts[1]->hSpMusClas->tdm_lt_Etot < 0 && hCPE->hCoreCoder[1]->vad_flag == 0 ) /* very clean signal */ + || ( hCPE->hCoreCoder[1]->vad_flag == 0 || ( Etot_last[1] < 30.0f && ( sts[0]->hSpMusClas->tdm_lt_Etot - sts[1]->hSpMusClas->tdm_lt_Etot ) > 26.0f ) ) ) ) +#else if ( hCPE->hStereoClassif->lrtd_mode == 0 && ( ( sts[1]->hSpMusClas->tdm_lt_Etot < 0 && hCPE->hCoreCoder[1]->vad_flag == 0 ) /* very clean signal */ || ( hCPE->hCoreCoder[1]->vad_flag == 0 || ( sts[1]->hNoiseEst->Etot_last < 30.0f && ( sts[0]->hSpMusClas->tdm_lt_Etot - sts[1]->hSpMusClas->tdm_lt_Etot ) > 26.0f ) ) ) ) +#endif { sts[1]->coder_type = INACTIVE; @@ -340,7 +353,11 @@ void tdm_configure_enc( } hStereoTD->tdm_lp_reuse_flag = 1; } +#ifdef REMOVE_ETOT_PROPAGATION + else if ( ( ( hCPE->hCoreCoder[1]->vad_flag == 0 ) || ( hCPE->hCoreCoder[0]->vad_flag == 0 && Etot_last[1] < 30.0f && ( sts[0]->hSpMusClas->tdm_lt_Etot - sts[1]->hSpMusClas->tdm_lt_Etot ) > 26.0f ) ) && ( hCPE->hStereoClassif->lrtd_mode == 1 ) /* && NO_DTX */ ) /* boths channels are inactive but not DTX used*/ +#else else if ( ( ( hCPE->hCoreCoder[1]->vad_flag == 0 ) || ( hCPE->hCoreCoder[0]->vad_flag == 0 && sts[1]->hNoiseEst->Etot_last < 30.0f && ( sts[0]->hSpMusClas->tdm_lt_Etot - sts[1]->hSpMusClas->tdm_lt_Etot ) > 26.0f ) ) && ( hCPE->hStereoClassif->lrtd_mode == 1 ) /* && NO_DTX */ ) /* boths channels are inactive but not DTX used*/ +#endif { sts[1]->coder_type = INACTIVE; if ( tdm_ratio_idx > 1 && tdm_ratio_idx < 29 ) @@ -352,7 +369,11 @@ void tdm_configure_enc( hStereoTD->tdm_lp_reuse_flag = 1; } } +#ifdef REMOVE_ETOT_PROPAGATION + else if ( !( sts[1]->sp_aud_decision0 ) && sts[1]->tc_cnt <= 0 && ( sts[1]->coder_type_raw == UNVOICED || ( hStereoTD->tdm_LRTD_flag == 1 && hStereoTD->tdm_lp_reuse_flag == 0 && ( hCPE->hCoreCoder[1]->vad_flag == 0 || ( Etot_last[1] < 30.0f && ( sts[0]->hSpMusClas->tdm_lt_Etot - sts[1]->hSpMusClas->tdm_lt_Etot ) > 26.0f ) ) ) ) ) +#else else if ( !( sts[1]->sp_aud_decision0 ) && sts[1]->tc_cnt <= 0 && ( sts[1]->coder_type_raw == UNVOICED || ( hStereoTD->tdm_LRTD_flag == 1 && hStereoTD->tdm_lp_reuse_flag == 0 && ( hCPE->hCoreCoder[1]->vad_flag == 0 || ( sts[1]->hNoiseEst->Etot_last < 30.0f && ( sts[0]->hSpMusClas->tdm_lt_Etot - sts[1]->hSpMusClas->tdm_lt_Etot ) > 26.0f ) ) ) ) ) +#endif { sts[1]->coder_type = UNVOICED; if ( hStereoTD->tdm_LRTD_flag == 1 ) diff --git a/lib_enc/nois_est.c b/lib_enc/nois_est.c index b8d551a738..2b9acdf0ca 100644 --- a/lib_enc/nois_est.c +++ b/lib_enc/nois_est.c @@ -298,6 +298,10 @@ void noise_est_down( *------------------------------------------------------------------*/ Etot_v = (float) fabs( *Etot_last - Etot ); +#ifdef REMOVE_ETOT_PROPAGATION + *Etot_last = Etot; +#endif + *Etot_v_h2 = ( 1.0f - 0.02f ) * *Etot_v_h2 + 0.02f * min( 3.0f, Etot_v ); if ( *Etot_v_h2 < 0.1f ) { diff --git a/lib_enc/pre_proc.c b/lib_enc/pre_proc.c index 33728a6f40..6eb11ca68e 100644 --- a/lib_enc/pre_proc.c +++ b/lib_enc/pre_proc.c @@ -59,8 +59,10 @@ void pre_proc( float old_inp_16k[], /* i/o: buffer of old input signal @ 16kHz */ float **inp, /* o : ptr. to inp. signal in the current frame*/ float fr_bands[2 * NB_BANDS], /* i : energy in frequency bands */ - float *Etot, /* i : total energy */ - float *ener, /* o : residual energy from Levinson-Durbin */ +#ifndef REMOVE_ETOT_PROPAGATION + float *Etot, /* i : total energy */ +#endif + float *ener, /* o : residual energy from Levinson-Durbin */ #ifndef FIX_I4_OL_PITCH int16_t pitch_orig[3], /* o : open-loop pitch values for quantization */ #endif @@ -84,18 +86,21 @@ void pre_proc( float old_wsp[L_WSP], *wsp; /* weighted input signal buffer */ float pitch_fr[NB_SUBFR]; /* fractional pitch values */ float voicing_fr[NB_SUBFR]; /* fractional pitch gains */ - float lf_E[2 * VOIC_BINS]; /* per bin spectrum energy in lf */ - float tmpN[NB_BANDS]; /* Temporary noise update */ - float tmpE[NB_BANDS]; /* Temporary averaged energy of 2 sf. */ - float ee[2]; /* Spectral tilt */ - float corr_shift; /* correlation shift */ - float relE; /* frame relative energy */ - int16_t loc_harm; /* harmonicity flag */ - float cor_map_sum, sp_div, PS[128]; /* speech/music clasif. parameters */ - int16_t L_look; /* length of look-ahead */ - float snr_sum_he; /* HE SAD parameters */ - int16_t localVAD_HE_SAD; /* HE SAD parameters */ - int16_t vad_flag_dtx; /* HE-SAD flag with additional DTX HO */ +#ifdef REMOVE_ETOT_PROPAGATION + float Etot; /* total energy */ +#endif + float lf_E[2 * VOIC_BINS]; /* per bin spectrum energy in lf */ + float tmpN[NB_BANDS]; /* Temporary noise update */ + float tmpE[NB_BANDS]; /* Temporary averaged energy of 2 sf. */ + float ee[2]; /* Spectral tilt */ + float corr_shift; /* correlation shift */ + float relE; /* frame relative energy */ + int16_t loc_harm; /* harmonicity flag */ + float cor_map_sum, sp_div, PS[128]; /* speech/music clasif. parameters */ + int16_t L_look; /* length of look-ahead */ + float snr_sum_he; /* HE SAD parameters */ + int16_t localVAD_HE_SAD; /* HE SAD parameters */ + int16_t vad_flag_dtx; /* HE-SAD flag with additional DTX HO */ int16_t vad_flag_cldfb; float old_cor; float hp_E[2]; /* Energy in HF */ @@ -190,14 +195,24 @@ void pre_proc( * Spectral analysis *--------------------------------------------------------------------------*/ +#ifdef REMOVE_ETOT_PROPAGATION + analy_sp( -1, NULL, st->input_Fs, inp_12k8, st->Bin_E, st->Bin_E_old, fr_bands, lf_E, &Etot, st->min_band, st->max_band, band_energies, PS, fft_buff ); +#else analy_sp( -1, NULL, st->input_Fs, inp_12k8, st->Bin_E, st->Bin_E_old, fr_bands, lf_E, Etot, st->min_band, st->max_band, band_energies, PS, fft_buff ); +#endif /*----------------------------------------------------------------* * SAD (1-signal, 0-noise) *----------------------------------------------------------------*/ +#ifdef REMOVE_ETOT_PROPAGATION + noise_est_pre( Etot, st->ini_frame, st->hNoiseEst, 0, EVS_MONO, EVS_MONO ); +#else noise_est_pre( *Etot, st->ini_frame, st->hNoiseEst, 0, EVS_MONO, EVS_MONO ); +#endif + st->vad_flag = wb_vad( st, fr_bands, &noisy_speech_HO, &clean_speech_HO, &NB_speech_HO, &snr_sum_he, &localVAD_HE_SAD, &( st->flag_noisy_speech_snr ), NULL, NULL, -1000.0f, -1000.0f ); + vad_flag_cldfb = vad_proc( realBuffer, imagBuffer, enerBuffer, st->cldfbAnaEnc->no_channels, st->hVAD_CLDFB, &cldfb_addition, st->vad_flag ); if ( st->Pos_relE_cnt < 20 ) /* Ensure the level is high enough and cldfb decision is reliable */ @@ -226,9 +241,15 @@ void pre_proc( * Correlation correction as a function of total noise level *----------------------------------------------------------------*/ +#ifdef REMOVE_ETOT_PROPAGATION + noise_est_down( fr_bands, st->hNoiseEst->bckr, tmpN, tmpE, st->min_band, st->max_band, &st->hNoiseEst->totalNoise, Etot, &st->hNoiseEst->Etot_last, &st->hNoiseEst->Etot_v_h2 ); + + relE = Etot - st->lp_speech; +#else noise_est_down( fr_bands, st->hNoiseEst->bckr, tmpN, tmpE, st->min_band, st->max_band, &st->hNoiseEst->totalNoise, *Etot, &st->hNoiseEst->Etot_last, &st->hNoiseEst->Etot_v_h2 ); relE = *Etot - st->lp_speech; +#endif if ( relE > 1.5f ) { @@ -354,7 +375,11 @@ void pre_proc( * Update estimated noise energy and voicing cut-off frequency *-----------------------------------------------------------------*/ +#ifdef REMOVE_ETOT_PROPAGATION + noise_est( st, old_pitch1, tmpN, epsP, Etot, relE, corr_shift, tmpE, fr_bands, &cor_map_sum, NULL, &sp_div, &non_staX, &loc_harm, lf_E, &st->hNoiseEst->harm_cor_cnt, st->hNoiseEst->Etot_l_lp, &sp_floor, 0, NULL, NULL, st->ini_frame ); +#else noise_est( st, old_pitch1, tmpN, epsP, *Etot, relE, corr_shift, tmpE, fr_bands, &cor_map_sum, NULL, &sp_div, &non_staX, &loc_harm, lf_E, &st->hNoiseEst->harm_cor_cnt, st->hNoiseEst->Etot_l_lp, &sp_floor, 0, NULL, NULL, st->ini_frame ); +#endif /*------------------------------------------------------------------* * Update parameters used in the VAD and DTX @@ -369,7 +394,11 @@ void pre_proc( find_tilt( fr_bands, st->hNoiseEst->bckr, ee, st->pitch, st->voicing, lf_E, corr_shift, st->input_bwidth, st->max_band, hp_E, st->codec_mode, &( st->bckr_tilt_lt ), st->Opt_SC_VBR ); +#ifdef REMOVE_ETOT_PROPAGATION + st->coder_type = find_uv( st, pitch_fr, voicing_fr, inp_12k8, ee, NULL, corr_shift, relE, Etot, hp_E, &flag_spitch, last_core_orig, NULL ); +#else st->coder_type = find_uv( st, pitch_fr, voicing_fr, inp_12k8, ee, NULL, corr_shift, relE, *Etot, hp_E, &flag_spitch, last_core_orig, NULL ); +#endif /*-----------------------------------------------------------------* * channel aware mode configuration * @@ -423,8 +452,15 @@ void pre_proc( st->GSC_IVAS_mode = 0; +#ifdef REMOVE_ETOT_PROPAGATION + speech_music_classif( st, new_inp_12k8, inp_12k8, localVAD_HE_SAD, lsp_new, cor_map_sum, epsP, PS, Etot, old_cor, attack_flag, non_staX, relE, &high_lpn_flag, flag_spitch ); + + long_enr( st, Etot, localVAD_HE_SAD, high_lpn_flag, NULL, 1, NULL, NULL ); +#else speech_music_classif( st, new_inp_12k8, inp_12k8, localVAD_HE_SAD, lsp_new, cor_map_sum, epsP, PS, *Etot, old_cor, attack_flag, non_staX, relE, &high_lpn_flag, flag_spitch ); + long_enr( st, *Etot, localVAD_HE_SAD, high_lpn_flag, NULL, 1, NULL, NULL ); +#endif /*----------------------------------------------------------------* * Final VAD correction ( when HE-SAD is used instead of the normal VAD, @@ -576,7 +612,11 @@ void pre_proc( } if ( st->total_brate == ACELP_13k20 && st->bwidth != FB ) { +#ifdef REMOVE_ETOT_PROPAGATION + MDCT_selector( st, sp_floor, Etot, cor_map_sum, enerBuffer ); +#else MDCT_selector( st, sp_floor, *Etot, cor_map_sum, enerBuffer ); +#endif } } else @@ -872,7 +912,11 @@ void pre_proc( if ( st->total_brate == ACELP_16k40 && st->bwidth != FB ) { +#ifdef REMOVE_ETOT_PROPAGATION + MDCT_selector( st, sp_floor, Etot, cor_map_sum, enerBuffer ); +#else MDCT_selector( st, sp_floor, *Etot, cor_map_sum, enerBuffer ); +#endif } } else diff --git a/lib_enc/updt_enc.c b/lib_enc/updt_enc.c index c6663895e0..205a228964 100644 --- a/lib_enc/updt_enc.c +++ b/lib_enc/updt_enc.c @@ -297,8 +297,11 @@ void updt_IO_switch_enc( *-------------------------------------------------------------------*/ void updt_enc_common( - Encoder_State *st, /* i/o: encoder state structure */ - const float Etot /* i : total energy */ + Encoder_State *st /* i/o: encoder state structure */ +#ifndef REMOVE_ETOT_PROPAGATION + , + const float Etot /* i : total energy */ +#endif ) { /*---------------------------------------------------------------------* @@ -316,7 +319,9 @@ void updt_enc_common( st->last_extl = st->extl; st->last_input_bwidth = st->input_bwidth; st->last_bwidth = st->bwidth; +#ifndef REMOVE_ETOT_PROPAGATION st->hNoiseEst->Etot_last = Etot; +#endif st->last_coder_type_raw = st->coder_type_raw; if ( st->core_brate > SID_2k40 && st->hDtxEnc != NULL ) -- GitLab From 97f1f14911ad33d5586be181449b1bc15cd4af05 Mon Sep 17 00:00:00 2001 From: vaclav Date: Mon, 19 Dec 2022 15:26:52 +0100 Subject: [PATCH 2/3] - cleaning - disable DEBUG_MODE_INFO --- lib_com/options.h | 2 +- lib_enc/ivas_core_enc.c | 2 -- lib_enc/ivas_front_vad.c | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index a012408558..6453c7c048 100755 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -61,7 +61,7 @@ /*#define MEM_COUNT_DETAILS*/ /* RAM counting tool: print per sub-structure details */ -#define DEBUG_MODE_INFO /* output most important parameters to the subdirectory "res/" */ +/*#define DEBUG_MODE_INFO*/ /* output most important parameters to the subdirectory "res/" */ #ifdef DEBUG_MODE_INFO /*#define DEBUG_MODE_ACELP*/ /* output most important ACELP core parameters to the subdirectory "res/" */ /*#define DEBUG_MODE_TCX*/ /* output most important TCX core parameters to the subdirectory "res/" */ diff --git a/lib_enc/ivas_core_enc.c b/lib_enc/ivas_core_enc.c index a7841af3ba..6ce774ddc5 100644 --- a/lib_enc/ivas_core_enc.c +++ b/lib_enc/ivas_core_enc.c @@ -460,8 +460,6 @@ ivas_error ivas_core_enc( dbgwrite( &st->count_WB, sizeof( int16_t ), 1, input_frame, fname( debug_dir, "count_WB", n, id, ENC ) ); dbgwrite( &st->count_SWB, sizeof( int16_t ), 1, input_frame, fname( debug_dir, "count_SWB", n, id, ENC ) ); - dbgwrite( &st->hNoiseEst->Etot_last, sizeof( float ), 1, input_frame, fname( debug_dir, "Etot_last", n, id, ENC ) ); - #ifdef DEBUG_MODE_ACELP dbgwrite( snr_[n], sizeof( float ), 320, 1, fname( debug_dir, "snr", n, id, ENC ) ); #endif diff --git a/lib_enc/ivas_front_vad.c b/lib_enc/ivas_front_vad.c index 26f3fd5b3f..3051575150 100644 --- a/lib_enc/ivas_front_vad.c +++ b/lib_enc/ivas_front_vad.c @@ -472,7 +472,7 @@ ivas_error front_vad_spar( /* 1st stage speech/music classification (GMM model) */ /* run only to get 'high_lpn_flag' parameter */ - ivas_smc_gmm( st, NULL /* <-- hStereoClassif */, localVAD_HE_SAD[0], Etot[0], lsp_new, cor_map_sum, epsP, PS, non_staX, relE, &high_lpn_flag, flag_spitch ); + ivas_smc_gmm( st, NULL, localVAD_HE_SAD[0], Etot[0], lsp_new, cor_map_sum, epsP, PS, non_staX, relE, &high_lpn_flag, flag_spitch ); /* long-term energy update */ #ifdef REMOVE_ETOT_PROPAGATION -- GitLab From cde200bf34accc94a97de28e3b235c8510d34e90 Mon Sep 17 00:00:00 2001 From: vaclav Date: Mon, 19 Dec 2022 21:01:39 +0100 Subject: [PATCH 3/3] clang-format --- lib_com/prot.h | 20 ++++++++++---------- lib_enc/ivas_core_enc.c | 6 +++--- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/lib_com/prot.h b/lib_com/prot.h index 59b8dd8d0f..70af73bbe9 100644 --- a/lib_com/prot.h +++ b/lib_com/prot.h @@ -713,10 +713,10 @@ int16_t lev_dur( /*! r: delay value in ns */ int32_t get_delay( - const int16_t enc_dec, /* i : encoder/decoder flag */ - const int32_t io_fs, /* i : input/output sampling frequency */ - const IVAS_FORMAT ivas_format, /* i : IVAS format */ - HANDLE_CLDFB_FILTER_BANK hCldfb, /* i : Handle of Cldfb analysis */ + const int16_t enc_dec, /* i : encoder/decoder flag */ + const int32_t io_fs, /* i : input/output sampling frequency */ + const IVAS_FORMAT ivas_format, /* i : IVAS format */ + HANDLE_CLDFB_FILTER_BANK hCldfb, /* i : Handle of Cldfb analysis */ #ifndef FIX_I59_LFE_TD_DELAY RENDERER_TYPE renderer_type, /* i : IVAS rendering type */ const int32_t binaural_latency_ns /* i : binaural renderer HRTF delay in ns */ @@ -3846,7 +3846,7 @@ int16_t dtx_hangover_addition( int16_t *vad_hover_flag, /* o : VAD hangover flag */ VAD_HANDLE hVAD, /* i/o: VAD handle for L or R channel */ NOISE_EST_HANDLE hNoiseEst, /* i : Noise estimation handle */ - int16_t *rem_dtx_ho /* o : Expected remaining hangover frames */ + int16_t *rem_dtx_ho /* o : Expected remaining hangover frames */ ); int16_t wb_vad( @@ -7888,11 +7888,11 @@ void coder_tcx_post( ); void decoder_tcx( - Decoder_State *st, /* i/o: coder memory state */ - int16_t prm[], /* i : parameters */ - float A[], /* i : coefficients NxAz[M+1] */ - Word16 Aind[], /* i : frame-independent coefficients Az[M+1]*/ - float synth[], /* i/o: synth[-M..lg] */ + Decoder_State *st, /* i/o: coder memory state */ + int16_t prm[], /* i : parameters */ + float A[], /* i : coefficients NxAz[M+1] */ + Word16 Aind[], /* i : frame-independent coefficients Az[M+1]*/ + float synth[], /* i/o: synth[-M..lg] */ float synthFB[], /* i/o: encoder memory state */ const int16_t bfi, /* i : Bad frame indicator */ const int16_t frame_cnt, /* i : frame counter in the super_frame */ diff --git a/lib_enc/ivas_core_enc.c b/lib_enc/ivas_core_enc.c index f493ecdb26..7a0ff91da5 100644 --- a/lib_enc/ivas_core_enc.c +++ b/lib_enc/ivas_core_enc.c @@ -411,15 +411,15 @@ ivas_error ivas_core_enc( signaling_enc_rf( st ); -/*---------------------------------------------------------------------* + /*---------------------------------------------------------------------* * Common updates *---------------------------------------------------------------------*/ + #ifdef MC_BITRATE_SWITCHING - /* for MCT do this later, otherwise there can be a problem because TCX quant happens later and might get the wrong last_core on a bit rate switch */ #ifndef REMOVE_ETOT_PROPAGATION st->hNoiseEst->Etot_last = Etot[n]; #endif - if ( !MCT_flag ) + if ( !MCT_flag ) /* for MCT do this later, otherwise there can be a problem because TCX quant happens later and might get the wrong last_core on a bit rate switch */ { updt_enc_common( st ); } -- GitLab