From 94652f353c4d9380e4028212d13a73ef220f6566 Mon Sep 17 00:00:00 2001 From: Fredrik Jansson Date: Thu, 6 Oct 2022 10:46:00 +0200 Subject: [PATCH 1/3] Added ITDCNG solution under separate define --- lib_com/ivas_cnst.h | 9 +++ lib_com/ivas_prot.h | 25 ++++++++ lib_com/options.h | 2 + lib_com/prot.h | 4 ++ lib_dec/ivas_stat_dec.h | 8 +++ lib_dec/ivas_stereo_dft_dec.c | 72 ++++++++++++++++++++++ lib_dec/ivas_stereo_dft_dec_dmx.c | 4 ++ lib_enc/amr_wb_enc.c | 7 ++- lib_enc/ivas_core_pre_proc_front.c | 15 ++++- lib_enc/ivas_cpe_enc.c | 16 +++++ lib_enc/ivas_front_vad.c | 10 ++- lib_enc/ivas_stat_enc.h | 14 +++++ lib_enc/ivas_stereo_cng_enc.c | 97 ++++++++++++++++++++++++++++- lib_enc/ivas_stereo_dft_enc.c | 19 +++++- lib_enc/ivas_stereo_dft_enc_itd.c | 98 +++++++++++++++++++++++++++++- lib_enc/ivas_stereo_dft_td_itd.c | 8 +++ lib_enc/pre_proc.c | 7 ++- lib_enc/vad.c | 10 +++ 18 files changed, 415 insertions(+), 10 deletions(-) diff --git a/lib_com/ivas_cnst.h b/lib_com/ivas_cnst.h index 2df702fade..516fd7bef3 100644 --- a/lib_com/ivas_cnst.h +++ b/lib_com/ivas_cnst.h @@ -473,6 +473,11 @@ enum #define STEREO_DFT_OFFSET 1 #define STEREO_DFT_NBDIV 2 +#ifdef FIX_ITD_CNG +#define STEREO_DFT_ITD_CNG_XFADE 100 +#define STEREO_DFT_ITD_CNG_XFADE_RESET 2 +#endif + #define STEREO_DFT_DELAY_DEC_BWE_NS ( STEREO_DFT_OFFSET * STEREO_DFT_HOP_NS - ACELP_LOOK_NS ) /* 1.25ms/2.5ms: max delay for core decoder*/ #define STEREO_DFT_ENC_DFT_NB ( STEREO_DFT_OFFSET + 1 ) /*frame + lookahead*/ @@ -538,6 +543,10 @@ typedef enum #define STEREO_DFT_SID_GIPD_NBITS 2 #define STEREO_DFT_FD_FILT 0.9f +#ifdef FIX_ITD_CNG +#define STEREO_DFT_CNG_ITD_CNT 8 +#endif + /*Residual prediction*/ #define STEREO_DFT_PAST_MAX 4 #define STEREO_DFT_RES_PRED_BAND_MAX 12 diff --git a/lib_com/ivas_prot.h b/lib_com/ivas_prot.h index 6996e4a298..946a2ec858 100644 --- a/lib_com/ivas_prot.h +++ b/lib_com/ivas_prot.h @@ -893,6 +893,10 @@ float stereo_dft_enc_synthesize( void stereo_dft_enc_process( CPE_ENC_HANDLE hCPE, /* i : CPE encoder structure */ +#ifdef FIX_ITD_CNG + const int16_t vad_flag_dtx[], /* i: VAD dtx flags */ + const int16_t vad_hover_flag[], /* i: VAD hangover flags */ +#endif const int16_t input_frame /* i : input frame length */ ); @@ -941,7 +945,11 @@ void stereo_dft_dequantize_itd( void stereo_dft_enc_sid_calc_coh( STEREO_DFT_ENC_DATA_HANDLE hStereoDft, /* i/o: DFT stereo handle */ +#ifdef FIX_ITD_CNG + float prev_cohBand[2*(STEREO_DFT_BAND_MAX/2)], /* i/o: Previous coherence */ +#else float coh_crossfade[STEREO_DFT_BAND_MAX / 2], /* i/o: Coherence crossfade memory */ +#endif int16_t *td_active, /* i/o: TD stereo mode indicator */ int16_t *first_SID, /* i/o: First SID indicator */ float *cohBand /* i/o: Coherence per band */ @@ -1094,6 +1102,11 @@ void stereo_dft_dec_read_BS( void stereo_dft_dec_smooth_parameters( STEREO_DFT_DEC_DATA_HANDLE hStereoDft, /* i/o: decoder DFT stereo handle */ const int16_t prev_sid_nodata /* i : Previous SID/No data indicator */ +#ifdef FIX_ITD_CNG + , + const int16_t active_frame_counter, /* i : Active frame counter */ + const int32_t element_brate /* i : Element bitrate */ +#endif ); void stereo_dft_generate_res_pred( @@ -1249,6 +1262,10 @@ void stereo_dft_enc_compute_itd( float *DFT_R, const int16_t k_offset, const int16_t input_frame, +#ifdef FIX_ITD_CNG + const int16_t vad_flag_dtx[], + const int16_t vad_hover_flag[], +#endif float *bin_nrgL, float *bin_nrgR ); @@ -1804,6 +1821,10 @@ void deindex_lvq_SHB( void stereo_td_itd_mdct_stereo( CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder handle */ +#ifdef FIX_ITD_CNG + const int16_t vad_flag_dtx[], /* i: VAD dtx flags */ + const int16_t vad_hover_flag[], /* i: VAD hangover flags */ +#endif const int16_t input_frame /* i : frame length */ ); @@ -2367,6 +2388,10 @@ void stereo_cng_upd_counters( const int16_t nbands, /* i : Number of bands in active */ const float sidSideGain[], /* i : SID side gains */ const int16_t burst_ho_count /* i : Hang-over count */ +#ifdef FIX_ITD_CNG + , + int16_t *coh_fade_counter /* i : Coherence fade counter */ +#endif ); void stereo_cng_init_dec( diff --git a/lib_com/options.h b/lib_com/options.h index 222333bc33..94515f750c 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -158,6 +158,8 @@ #define ALIGN_SID_SIZE /* Issue 111: make all DTX modes use one SID frame bitrate (5.2 kbps) */ +#define FIX_ITD_CNG /* Eri: Fix for CNG ITD */ + /* ################## End DEVELOPMENT switches ######################### */ /* clang-format on */ #endif diff --git a/lib_com/prot.h b/lib_com/prot.h index 4c91c8cd52..dc1465a209 100644 --- a/lib_com/prot.h +++ b/lib_com/prot.h @@ -3833,6 +3833,10 @@ int16_t dtx_hangover_addition( int16_t *vad_hover_flag, /* o : VAD hangover flag */ VAD_HANDLE hVAD, /* i/o: VAD handle for L or R channel */ NOISE_EST_HANDLE hNoiseEst /* i : Noise estimation handle */ +#ifdef FIX_ITD_CNG + , + int16_t *rem_dtx_ho /* o : Expected remaining hangover frames */ +#endif ); int16_t wb_vad( diff --git a/lib_dec/ivas_stat_dec.h b/lib_dec/ivas_stat_dec.h index c773491d41..95dbddb599 100644 --- a/lib_dec/ivas_stat_dec.h +++ b/lib_dec/ivas_stat_dec.h @@ -144,6 +144,14 @@ typedef struct stereo_dft_dec_data_struct float itd[STEREO_DFT_DEC_DFT_NB]; +#ifdef FIX_ITD_CNG + float itd_xfade_step; + float itd_xfade_target; + int16_t itd_xfade_counter; + float itd_xfade_prev; + int32_t last_active_element_brate; +#endif + /*residual prediction*/ int16_t res_pred_mode[STEREO_DFT_DEC_DFT_NB]; /* residual prediction mode: 0(off), 1(stereo filling only), 2(enhanced stereo filling) */ float res_pred_gain[STEREO_DFT_DEC_DFT_NB * STEREO_DFT_BAND_MAX]; /* prediction gain for the residual HFs */ diff --git a/lib_dec/ivas_stereo_dft_dec.c b/lib_dec/ivas_stereo_dft_dec.c index b0db814511..a8eb063cd9 100644 --- a/lib_dec/ivas_stereo_dft_dec.c +++ b/lib_dec/ivas_stereo_dft_dec.c @@ -480,6 +480,14 @@ void stereo_dft_dec_reset( set_zero( hStereoDft->smooth_fac[0], SBA_DIRAC_STEREO_NUM_BANDS ); set_zero( hStereoDft->smooth_fac[1], SBA_DIRAC_STEREO_NUM_BANDS ); +#ifdef FIX_ITD_CNG + hStereoDft->itd_xfade_target = 0.0f; + hStereoDft->itd_xfade_step = 0.0f; + hStereoDft->itd_xfade_counter = 0; + hStereoDft->itd_xfade_prev = 0.0f; + hStereoDft->last_active_element_brate = 0; +#endif + return; } @@ -1162,7 +1170,11 @@ void stereo_dft_dec( } else { +#ifdef FIX_ITD_CNG + stereo_dft_dec_smooth_parameters( hStereoDft, hStereoCng->prev_sid_nodata, hStereoCng->active_frame_counter, st0->element_brate ); +#else stereo_dft_dec_smooth_parameters( hStereoDft, hStereoCng->prev_sid_nodata ); +#endif } } @@ -1746,6 +1758,10 @@ void stereo_dft_dec_read_BS( * Initialization *-----------------------------------------------------------------*/ +#ifdef FIX_ITD_CNG + k_offset = STEREO_DFT_OFFSET; +#endif + #ifdef ALIGN_SID_SIZE if ( ivas_total_brate == IVAS_SID_5k2 ) #else @@ -1778,6 +1794,9 @@ void stereo_dft_dec_read_BS( hStereoDft->frame_sid = 0; *nb_bits = 0; *total_brate = 0; +#ifdef FIX_ITD_CNG + hStereoDft->itd[k = hStereoDft->prm_res[k_offset] - 1 + k_offset] = hStereoDft->itd_xfade_target; +#endif return; } @@ -1808,7 +1827,9 @@ void stereo_dft_dec_read_BS( /*init*/ max_bits = *nb_bits; *nb_bits = 0; +#ifndef FIX_ITD_CNG k_offset = STEREO_DFT_OFFSET; +#endif N_div = STEREO_DFT_NBDIV; #ifdef ALIGN_SID_SIZE @@ -2788,6 +2809,11 @@ void stereo_dft_generate_res_pred( void stereo_dft_dec_smooth_parameters( STEREO_DFT_DEC_DATA_HANDLE hStereoDft, /* i/o: decoder DFT stereo handle */ const int16_t prev_sid_nodata /* i : Previous SID/No data indicator */ +#ifdef FIX_ITD_CNG + , + const int16_t active_frame_counter, /* i : Active frame counter */ + const int32_t element_brate /* i : Element bitrate */ +#endif ) { int16_t k_offset, k, k2, b, N_div; @@ -2814,6 +2840,38 @@ void stereo_dft_dec_smooth_parameters( hStereoDft->gipd[( k + k_offset ) - k2] = hStereoDft->gipd[k + k_offset]; } +#ifdef FIX_ITD_CNG + if ( hStereoDft->frame_sid_nodata ) + { + /* set new xfade target if new itd received */ + if ( hStereoDft->itd[k + k_offset] != hStereoDft->itd_xfade_target ) + { + hStereoDft->itd_xfade_target = hStereoDft->itd[k + k_offset]; + hStereoDft->itd_xfade_step = ( hStereoDft->itd_xfade_target - hStereoDft->itd_xfade_prev ) / ( STEREO_DFT_ITD_CNG_XFADE - hStereoDft->itd_xfade_counter ); + } + + /* xfade */ + if ( hStereoDft->itd_xfade_prev != hStereoDft->itd_xfade_target && hStereoDft->itd_xfade_counter < STEREO_DFT_ITD_CNG_XFADE && hStereoDft->last_active_element_brate <= 24400 ) + { + hStereoDft->itd[k + k_offset] = hStereoDft->itd_xfade_prev + hStereoDft->itd_xfade_step; + hStereoDft->itd_xfade_prev = hStereoDft->itd[k + k_offset]; + hStereoDft->itd_xfade_counter++; + } + } + else + { + /* First active frame, "reset" everything if long enough active encoding, only triggered if STEREO_DFT_ITD_CNG_XFADE_RESET = -1 */ + if ( active_frame_counter > STEREO_DFT_ITD_CNG_XFADE_RESET ) + { + hStereoDft->itd_xfade_target = hStereoDft->itd[k + k_offset]; + hStereoDft->itd_xfade_prev = hStereoDft->itd[k + k_offset]; + hStereoDft->itd_xfade_counter = 0; + } + + hStereoDft->last_active_element_brate = element_brate; + + } +#endif for ( k2 = 1; k2 < hStereoDft->prm_res[k + k_offset]; k2++ ) { hStereoDft->itd[( k + k_offset ) - k2] = hStereoDft->itd[k + k_offset]; @@ -2822,6 +2880,20 @@ void stereo_dft_dec_smooth_parameters( return; } +#ifdef FIX_ITD_CNG + /* Active frame, "reset" everything "reset" everything if long enough active encoding */ + if ( active_frame_counter > STEREO_DFT_ITD_CNG_XFADE_RESET ) + { + hStereoDft->itd_xfade_counter = 0; + hStereoDft->itd_xfade_target = hStereoDft->itd[STEREO_DFT_NBDIV - 1]; + hStereoDft->itd_xfade_prev = hStereoDft->itd[STEREO_DFT_NBDIV - 1]; + } +#endif + +#ifdef FIX_ITD_CNG + hStereoDft->last_active_element_brate = element_brate; +#endif + for ( k = hStereoDft->prm_res[k_offset] - 1; k < N_div; k += hStereoDft->prm_res[k + k_offset] ) { max_res_pred_ind = 0; diff --git a/lib_dec/ivas_stereo_dft_dec_dmx.c b/lib_dec/ivas_stereo_dft_dec_dmx.c index 48ee7cf4e6..f58b6ddca4 100644 --- a/lib_dec/ivas_stereo_dft_dec_dmx.c +++ b/lib_dec/ivas_stereo_dft_dec_dmx.c @@ -130,7 +130,11 @@ void stereo_dft_unify_dmx( ( st0->core == TCX_20_CORE && ( ( st0->hTcxCfg->tcx_last_overlap_mode == MIN_OVERLAP ) || ( st0->hTcxCfg->tcx_last_overlap_mode == HALF_OVERLAP ) ) ) || ( st0->core == TCX_10_CORE ); /* Smoothing for the current frame */ +#ifdef FIX_ITD_CNG + stereo_dft_dec_smooth_parameters( hStereoDft, prev_sid_nodata, st0->hFdCngDec->hFdCngCom->active_frame_counter, st0->element_brate ); +#else stereo_dft_dec_smooth_parameters( hStereoDft, prev_sid_nodata ); +#endif for ( k = 0; k < N_div; k++ ) { diff --git a/lib_enc/amr_wb_enc.c b/lib_enc/amr_wb_enc.c index 76ddf9e705..89c1579cd2 100644 --- a/lib_enc/amr_wb_enc.c +++ b/lib_enc/amr_wb_enc.c @@ -292,7 +292,12 @@ void amr_wb_enc( } /* apply DTX hangover for CNG analysis */ - vad_flag_dtx = dtx_hangover_addition( st, st->vad_flag, st->lp_speech - st->lp_noise, 0, &vad_hover_flag, NULL, NULL ); + vad_flag_dtx = dtx_hangover_addition( st, st->vad_flag, st->lp_speech - st->lp_noise, 0, &vad_hover_flag, NULL, NULL +#ifdef FIX_ITD_CNG + , + NULL +#endif + ); /*-----------------------------------------------------------------* * Select SID or FRAME_NO_DATA frame if DTX enabled diff --git a/lib_enc/ivas_core_pre_proc_front.c b/lib_enc/ivas_core_pre_proc_front.c index 51a549505f..d6f7fa6977 100644 --- a/lib_enc/ivas_core_pre_proc_front.c +++ b/lib_enc/ivas_core_pre_proc_front.c @@ -451,7 +451,12 @@ ivas_error pre_proc_front_ivas( if ( ( hCPE != NULL && !( lr_vad_enabled && st->idchan == 0 ) ) || hSCE != NULL ) { - *vad_flag_dtx = dtx_hangover_addition( st, st->vad_flag, st->lp_speech - st->lp_noise, 0, vad_hover_flag, NULL, NULL ); + *vad_flag_dtx = dtx_hangover_addition( st, st->vad_flag, st->lp_speech - st->lp_noise, 0, vad_hover_flag, NULL, NULL +#ifdef FIX_ITD_CNG + , + NULL +#endif + ); } else { @@ -549,6 +554,14 @@ ivas_error pre_proc_front_ivas( dtx( st, *vad_flag_dtx, inp_12k8 ); +#ifdef FIX_ITD_CNG + if ( hCPE != NULL && hCPE->hStereoDft != NULL && st->core_brate == SID_2k40 ) + { + /* Add another period of expected xcorr updates */ + hCPE->hStereoDft->expectedNumUpdates += st->hDtxEnc->max_SID; + } +#endif + /*----------------------------------------------------------------* * Adjust FD-CNG Noise Estimator *----------------------------------------------------------------*/ diff --git a/lib_enc/ivas_cpe_enc.c b/lib_enc/ivas_cpe_enc.c index df94fe44ab..01b4d6ddef 100644 --- a/lib_enc/ivas_cpe_enc.c +++ b/lib_enc/ivas_cpe_enc.c @@ -361,7 +361,11 @@ ivas_error ivas_cpe_enc( ); /* DFT stereo processing */ +#ifdef FIX_ITD_CNG + stereo_dft_enc_process( hCPE, vad_flag_dtx, vad_hover_flag, input_frame ); +#else stereo_dft_enc_process( hCPE, input_frame ); +#endif } else if ( hCPE->element_mode == IVAS_CPE_TD ) { @@ -381,7 +385,11 @@ ivas_error ivas_cpe_enc( } else if ( hCPE->element_mode == IVAS_CPE_MDCT ) { +#ifdef FIX_ITD_CNG + stereo_td_itd_mdct_stereo( hCPE, vad_flag_dtx, vad_hover_flag, input_frame ); +#else stereo_td_itd_mdct_stereo( hCPE, input_frame ); +#endif } /*----------------------------------------------------------------* @@ -533,7 +541,11 @@ ivas_error ivas_cpe_enc( if ( hEncoderConfig->Opt_DTX_ON ) { +#ifdef FIX_ITD_CNG + stereo_cng_upd_counters( hCPE->hStereoCng, hCPE->element_mode, -1, NULL, sts[0]->hTdCngEnc->burst_ho_cnt, NULL ); +#else stereo_cng_upd_counters( hCPE->hStereoCng, hCPE->element_mode, -1, NULL, sts[0]->hTdCngEnc->burst_ho_cnt ); +#endif } } @@ -596,7 +608,11 @@ ivas_error ivas_cpe_enc( } else { +#ifdef FIX_ITD_CNG + stereo_cng_upd_counters( hCPE->hStereoCng, hCPE->element_mode, hCPE->hStereoDft->nbands, hCPE->hStereoDft->sidSideGain, sts[0]->hTdCngEnc->burst_ho_cnt, &hCPE->hStereoDft->coh_fade_counter ); +#else stereo_cng_upd_counters( hCPE->hStereoCng, hCPE->element_mode, hCPE->hStereoDft->nbands, hCPE->hStereoDft->sidSideGain, sts[0]->hTdCngEnc->burst_ho_cnt ); +#endif } } diff --git a/lib_enc/ivas_front_vad.c b/lib_enc/ivas_front_vad.c index 09fd624ca5..2828073b5d 100644 --- a/lib_enc/ivas_front_vad.c +++ b/lib_enc/ivas_front_vad.c @@ -110,6 +110,9 @@ ivas_error front_vad( { localVAD_HE_SAD[n] = 0; vad_hover_flag[n] = 0; +#ifdef FIX_ITD_CNG + vad_flag_dtx[n] = 1; +#endif } /*------------------------------------------------------------------* @@ -192,7 +195,12 @@ ivas_error front_vad( } /* DTX hangover addition */ - vad_flag_dtx[n] = dtx_hangover_addition( sts[n], hFrontVad->hVAD->vad_flag, hFrontVad->lp_speech - hFrontVad->lp_noise, 0 /* <- no cldfb addition */, &vad_hover_flag[n], hFrontVad->hVAD, hFrontVad->hNoiseEst ); + vad_flag_dtx[n] = dtx_hangover_addition( sts[n], hFrontVad->hVAD->vad_flag, hFrontVad->lp_speech - hFrontVad->lp_noise, 0 /* <- no cldfb addition */, &vad_hover_flag[n], hFrontVad->hVAD, hFrontVad->hNoiseEst +#ifdef FIX_ITD_CNG + , + &hFrontVads[n]->rem_dtx_ho +#endif + ); if ( n_chan == 1 ) { diff --git a/lib_enc/ivas_stat_enc.h b/lib_enc/ivas_stat_enc.h index 2ed6a0ff91..a3ad1248d7 100644 --- a/lib_enc/ivas_stat_enc.h +++ b/lib_enc/ivas_stat_enc.h @@ -232,6 +232,12 @@ typedef struct stereo_dft_enc_data_struct #endif +#ifdef FIX_ITD_CNG + int16_t currentNumUpdates; + int16_t expectedNumUpdates; /* Expected number of frames before use of ITD estimate */ + int16_t resetFrames; +#endif + /* energy buffers for ICBWE */ float nrg_L[2]; float nrg_R[2]; @@ -557,6 +563,9 @@ typedef struct front_vad_enc VAD_HANDLE hVAD; /* VAD handle */ float *delay_buf; int16_t delay_samples; +#ifdef FIX_ITD_CNG + int16_t rem_dtx_ho; /* Remaining hangover frames */ +#endif } FRONT_VAD_ENC, *FRONT_VAD_ENC_HANDLE; @@ -825,7 +834,12 @@ typedef struct stereo_cng_enc float sg_average[STEREO_DFT_ERB4_BANDS]; /* Sidegain average */ float prev_sg_average[STEREO_DFT_ERB4_BANDS]; /* Previous sidegain average */ float mem_cohBand[STEREO_DFT_BAND_MAX / 2]; /* Coherence memory */ +#ifdef FIX_ITD_CNG + float prev_cohBand[2*(STEREO_DFT_BAND_MAX/2)];/* Previous coherence */ + int16_t cng_counter; /* Counter for cng period length */ +#else float coh_crossfade[STEREO_DFT_BAND_MAX / 2]; /* Coherence memory */ +#endif int16_t td_active; /* TD-stereo indication */ int16_t first_SID_after_TD; /* Set if first SID frame after TD stereo */ int16_t first_SID; /* Set if first SID frame since codec start */ diff --git a/lib_enc/ivas_stereo_cng_enc.c b/lib_enc/ivas_stereo_cng_enc.c index 5db60b5238..1e55c42ade 100644 --- a/lib_enc/ivas_stereo_cng_enc.c +++ b/lib_enc/ivas_stereo_cng_enc.c @@ -32,6 +32,7 @@ #include #include "options.h" +#include #include "cnst.h" #include "rom_enc.h" #include "rom_com.h" @@ -50,6 +51,9 @@ *-------------------------------------------------------------------*/ #define COH_FADE_MAX 4 +#ifdef FIX_ITD_CNG +#define COH_FADE_UPDATES 2 +#endif /*--------------------------------------------------------------- @@ -60,7 +64,11 @@ void stereo_dft_enc_sid_calc_coh( STEREO_DFT_ENC_DATA_HANDLE hStereoDft, /* i/o: DFT stereo handle */ - float coh_crossfade[STEREO_DFT_BAND_MAX / 2], /* i/o: Coherence crossfade memory */ +#ifdef FIX_ITD_CNG + float prev_cohBand[2 * ( STEREO_DFT_BAND_MAX / 2 )], /* i/o: Previous coherence */ +#else + float coh_crossfade[STEREO_DFT_BAND_MAX / 2], /* i/o: Coherence crossfade memory */ +#endif int16_t *td_active, /* i/o: TD stereo mode indicator */ int16_t *first_SID, /* i/o: First SID indicator */ float *cohBand /* i/o: Coherence per band */ @@ -69,7 +77,9 @@ void stereo_dft_enc_sid_calc_coh( int16_t b, k; float coh_weight; float coh_weight_sum; - +#ifdef FIX_ITD_CNG + float xspec_scale; +#endif /* Cluster the coherence into bands using a weighted average. The coherence is weighted with the energy spectrum of the mixdown signal. */ for ( b = 0; b < hStereoDft->nbands; b++ ) @@ -77,6 +87,32 @@ void stereo_dft_enc_sid_calc_coh( cohBand[b] = 0; coh_weight_sum = 0; +#ifdef FIX_ITD_CNG + if ( hStereoDft->coh_fade_counter == 0 && !*first_SID ) + { + for ( k = hStereoDft->band_limits[b]; k < hStereoDft->band_limits[b + 1]; k++ ) + { + xspec_scale = sqrtf( ( prev_cohBand[b] * ( hStereoDft->Spd_L_smooth[k] * hStereoDft->Spd_R_smooth[k] ) ) / ( hStereoDft->xspec_smooth[2 * k] * hStereoDft->xspec_smooth[2 * k] + hStereoDft->xspec_smooth[2 * k + 1] * hStereoDft->xspec_smooth[2 * k + 1] + EPSILON ) ); + hStereoDft->xspec_smooth[2 * k] *= xspec_scale; + hStereoDft->xspec_smooth[2 * k + 1] *= xspec_scale; + } + + cohBand[b] = prev_cohBand[b]; + } + else + { + for ( k = hStereoDft->band_limits[b]; k < hStereoDft->band_limits[b + 1]; k++ ) + { + coh_weight = hStereoDft->DFT[0][2 * k] * hStereoDft->DFT[0][2 * k] + hStereoDft->DFT[0][2 * k + 1] * hStereoDft->DFT[0][2 * k + 1]; + cohBand[b] += coh_weight * ( hStereoDft->xspec_smooth[2 * k] * hStereoDft->xspec_smooth[2 * k] + hStereoDft->xspec_smooth[2 * k + 1] * hStereoDft->xspec_smooth[2 * k + 1] ) / ( hStereoDft->Spd_L_smooth[k] * hStereoDft->Spd_R_smooth[k] + EPSILON ); + coh_weight_sum += coh_weight; + } + if ( coh_weight_sum > 0 ) + { + cohBand[b] = cohBand[b] / coh_weight_sum; + } + } +#else for ( k = hStereoDft->band_limits[b]; k < hStereoDft->band_limits[b + 1]; k++ ) { coh_weight = hStereoDft->DFT[0][2 * k] * hStereoDft->DFT[0][2 * k] + hStereoDft->DFT[0][2 * k + 1] * hStereoDft->DFT[0][2 * k + 1]; @@ -87,26 +123,56 @@ void stereo_dft_enc_sid_calc_coh( { cohBand[b] = cohBand[b] / coh_weight_sum; } +#endif } if ( *first_SID ) { +#ifdef FIX_ITD_CNG + mvr2r( cohBand, prev_cohBand, hStereoDft->nbands ); + mvr2r( prev_cohBand, &( prev_cohBand[ STEREO_DFT_BAND_MAX / 2 ] ), hStereoDft->nbands ); +#else mvr2r( cohBand, coh_crossfade, hStereoDft->nbands ); +#endif *first_SID = 0; } +#ifdef FIX_ITD_CNG + if ( hStereoDft->coh_fade_counter < COH_FADE_MAX && ( *td_active || hStereoDft->currentNumUpdates < COH_FADE_UPDATES ) ) +#else if ( hStereoDft->coh_fade_counter < COH_FADE_MAX && *td_active ) +#endif { for ( b = 0; b < hStereoDft->nbands; b++ ) { +#ifdef FIX_ITD_CNG + cohBand[b] = ( cohBand[b] * hStereoDft->coh_fade_counter + prev_cohBand[b] * ( COH_FADE_MAX - hStereoDft->coh_fade_counter ) ) / COH_FADE_MAX; +#else cohBand[b] = ( cohBand[b] * hStereoDft->coh_fade_counter + coh_crossfade[b] * ( COH_FADE_MAX - hStereoDft->coh_fade_counter ) ) / COH_FADE_MAX; +#endif } hStereoDft->coh_fade_counter++; +#ifdef FIX_ITD_CNG + if ( hStereoDft->coh_fade_counter > 0 ) + { + mvr2r( &prev_cohBand[STEREO_DFT_BAND_MAX / 2], prev_cohBand, hStereoDft->nbands ); + } + mvr2r( cohBand, &prev_cohBand[STEREO_DFT_BAND_MAX / 2], hStereoDft->nbands ); +#else mvr2r( cohBand, coh_crossfade, hStereoDft->nbands ); +#endif } else { +#ifdef FIX_ITD_CNG + if ( hStereoDft->coh_fade_counter > 0 ) + { + mvr2r( &prev_cohBand[STEREO_DFT_BAND_MAX / 2], prev_cohBand, hStereoDft->nbands ); + } + mvr2r( cohBand, &prev_cohBand[STEREO_DFT_BAND_MAX / 2], hStereoDft->nbands ); +#else mvr2r( cohBand, coh_crossfade, hStereoDft->nbands ); +#endif hStereoDft->coh_fade_counter = COH_FADE_MAX; *td_active = 0; } @@ -352,6 +418,11 @@ void stereo_dft_cng_side_gain( } hStereoCng->sg_average_counter++; +#ifdef FIX_ITD_CNG + hStereoCng->cng_counter++; + hStereoCng->cng_counter = min( hStereoCng->cng_counter, STEREO_DFT_SG_ACT_CNT_MAX ); +#endif + if ( core_brate == SID_2k40 ) { /* SID frame */ @@ -449,9 +520,16 @@ void stereo_enc_cng_init( hStereoCng->sg_active_cnt = 0; hStereoCng->first_SID = 1; set_f( hStereoCng->mem_cohBand, 0.5f, STEREO_DFT_BAND_MAX / 2 ); +#ifdef FIX_ITD_CNG + set_zero( hStereoCng->prev_cohBand, 2 * ( STEREO_DFT_BAND_MAX / 2 ) ); +#else set_zero( hStereoCng->coh_crossfade, STEREO_DFT_BAND_MAX / 2 ); +#endif hStereoCng->td_active = 0; hStereoCng->first_SID_after_TD = 1; +#ifdef FIX_ITD_CNG + hStereoCng->cng_counter = 0; +#endif return; } @@ -469,6 +547,10 @@ void stereo_cng_upd_counters( const int16_t nbands, /* i : Number of bands in active */ const float sidSideGain[], /* i : SID side gains */ const int16_t burst_ho_count /* i : Hang-over count */ +#ifdef FIX_ITD_CNG + , + int16_t *coh_fade_counter /* i : Coherence fade counter */ +#endif ) { int16_t b; @@ -492,5 +574,16 @@ void stereo_cng_upd_counters( hStereoCng->sg_active_cnt++; hStereoCng->sg_active_cnt = min( hStereoCng->sg_active_cnt, STEREO_DFT_SG_ACT_CNT_MAX ); +#ifdef FIX_ITD_CNG + if ( hStereoCng->sg_active_cnt > STEREO_DFT_CNG_ITD_CNT ) + { + hStereoCng->cng_counter = 0; + } + + if ( element_mode == IVAS_CPE_DFT ) + { + *coh_fade_counter = 0; + } +#endif return; } diff --git a/lib_enc/ivas_stereo_dft_enc.c b/lib_enc/ivas_stereo_dft_enc.c index 402003be6c..6759cbf2f9 100644 --- a/lib_enc/ivas_stereo_dft_enc.c +++ b/lib_enc/ivas_stereo_dft_enc.c @@ -491,6 +491,12 @@ void stereo_dft_enc_reset( set_f( hStereoDft->Spd_L_smooth, 1.0f, STEREO_DFT_N_32k_ENC / 2 ); set_f( hStereoDft->Spd_R_smooth, 1.0f, STEREO_DFT_N_32k_ENC / 2 ); +#ifdef FIX_ITD_CNG + hStereoDft->currentNumUpdates = 0; + hStereoDft->expectedNumUpdates = FIXED_SID_RATE; + hStereoDft->resetFrames = 0; +#endif + hStereoDft->coh_fade_counter = 0; /* Xtalk classifier */ @@ -1227,6 +1233,10 @@ float stereo_dft_enc_synthesize( void stereo_dft_enc_process( CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ +#ifdef FIX_ITD_CNG + const int16_t vad_flag_dtx[], /* i: VAD dtx flags */ + const int16_t vad_hover_flag[], /* i: VAD hangover flags */ +#endif const int16_t input_frame /* i : input frame length */ ) { @@ -1297,8 +1307,11 @@ void stereo_dft_enc_process( if ( hStereoDft->hConfig->itd_mode ) #endif { +#ifdef FIX_ITD_CNG + stereo_dft_enc_compute_itd( hCPE, pDFT_L, pDFT_R, k_offset, input_frame, vad_flag_dtx, vad_hover_flag, bin_nrgL, bin_nrgR ); +#else stereo_dft_enc_compute_itd( hCPE, pDFT_L, pDFT_R, k_offset, input_frame, bin_nrgL, bin_nrgR ); - +#endif if ( hCPE->element_mode == IVAS_CPE_MDCT ) { return; @@ -2315,7 +2328,11 @@ void stereo_dft_enc_write_BS( if ( core_brate == SID_2k40 ) { +#ifdef FIX_ITD_CNG + stereo_dft_enc_sid_calc_coh( hStereoDft, hCPE->hStereoCng->prev_cohBand, &hCPE->hStereoCng->td_active, &hCPE->hStereoCng->first_SID, cohBand ); +#else stereo_dft_enc_sid_calc_coh( hStereoDft, hCPE->hStereoCng->coh_crossfade, &hCPE->hStereoCng->td_active, &hCPE->hStereoCng->first_SID, cohBand ); +#endif #ifdef ALIGN_SID_SIZE if ( *nb_bits <= ( ( IVAS_SID_5k2 - SID_2k40 ) / FRAMES_PER_SEC - SID_FORMAT_NBITS - STEREO_DFT_ITD_MODE_NBITS - STEREO_DFT_SID_ITD_NBITS - 1 ) ) diff --git a/lib_enc/ivas_stereo_dft_enc_itd.c b/lib_enc/ivas_stereo_dft_enc_itd.c index 594137993e..3961865e56 100644 --- a/lib_enc/ivas_stereo_dft_enc_itd.c +++ b/lib_enc/ivas_stereo_dft_enc_itd.c @@ -67,6 +67,10 @@ #define DENOM 0.05f #define XSPEC_ALPHA ( 1.f / 32 ) +#ifdef FIX_ITD_CNG +#define CORR_FILT 0.8f +#define CORR_RESET_FRAMES_MAX 20 +#endif #define ITD_VAD_NOISE_INIT_FRAMES 30 #define ITD_VAD_THRSHOLD 0.001f @@ -722,6 +726,10 @@ void stereo_dft_enc_compute_itd( float *DFT_R, const int16_t k_offset, const int16_t input_frame, +#ifdef FIX_ITD_CNG + const int16_t vad_flag_dtx[], + const int16_t vad_hover_flag[], +#endif float *bin_nrgL, float *bin_nrgR ) { @@ -768,6 +776,10 @@ void stereo_dft_enc_compute_itd( const float *dft_trigo32k; float trigo_enc[STEREO_DFT_N_32k_ENC / 2 + 1]; +#ifdef FIX_ITD_CNG + float cng_xcorr_filt; +#endif + if ( hCPE->element_mode == IVAS_CPE_DFT ) { hStereoDft = hCPE->hStereoDft; @@ -926,6 +938,10 @@ void stereo_dft_enc_compute_itd( vad_flag_itd = stereo_dft_enc_itd_vad( hItd->E_band_n, &( hItd->vad_frm_cnt ), Spd_L, Spd_R, &mssnr ); +#ifdef FIX_ITD_CNG + vad_flag_itd = vad_flag_itd && vad_flag_dtx[0]; +#endif + if ( sum_nrg_L < EPSILON ) { sfm_L = 0; @@ -1053,17 +1069,93 @@ void stereo_dft_enc_compute_itd( if ( hCPE->hCoreCoder[0]->Opt_DTX_ON && hCPE->element_mode == IVAS_CPE_DFT ) { +#ifdef FIX_ITD_CNG + if ( hCPE->hFrontVad[0] != NULL ) + { + /* Determine if we are in hangover */ + if ( vad_hover_flag[0] && vad_hover_flag[1] ) + { + /* Determine if we are in the first DTX hangover frame (also triggers for VAD hangover frame) */ + if ( hStereoDft->resetFrames > CORR_RESET_FRAMES_MAX ) + { + /* Reset cross spectrum when there is hangover */ + set_f( hStereoDft->xspec_smooth, 0.0f, STEREO_DFT_N_32k_ENC ); + hStereoDft->resetFrames = 0; + hStereoDft->currentNumUpdates = 0; + /* Expected minimum number of updates including first SID */ + hStereoDft->expectedNumUpdates = 1 + min( hCPE->hFrontVad[0]->rem_dtx_ho, hCPE->hFrontVad[1]->rem_dtx_ho ); + } + else if ( hStereoDft->currentNumUpdates >= hStereoDft->expectedNumUpdates ) + { + hStereoDft->expectedNumUpdates += 1 + min( hCPE->hFrontVad[0]->rem_dtx_ho, hCPE->hFrontVad[1]->rem_dtx_ho ); + } + cng_xcorr_filt = max( min( CORR_FILT, 10.0f * CORR_FILT / ( hStereoDft->expectedNumUpdates + hStereoDft->currentNumUpdates ) ), sfm_L ); + hStereoDft->currentNumUpdates++; + for ( i = 1; i < NFFT / 2; i++ ) + { + /* Low pass filter cross L/R power spectrum */ + hStereoDft->xspec_smooth[2 * i] = ( 1.f - cng_xcorr_filt ) * hStereoDft->xspec_smooth[2 * i] + cng_xcorr_filt * xcorr[2 * i]; + hStereoDft->xspec_smooth[2 * i + 1] = ( 1.f - cng_xcorr_filt ) * hStereoDft->xspec_smooth[2 * i + 1] + cng_xcorr_filt * xcorr[2 * i + 1]; + + /* Low pass filter L/R power spectrum */ + /* Calculate coherence as cross spectral density divided by L*R power spectrum */ + hStereoDft->Spd_L_smooth[i] = ( 1.f - cng_xcorr_filt ) * hStereoDft->Spd_L_smooth[i] + cng_xcorr_filt * Spd_L[i]; + hStereoDft->Spd_R_smooth[i] = ( 1.f - cng_xcorr_filt ) * hStereoDft->Spd_R_smooth[i] + cng_xcorr_filt * Spd_R[i]; + } + } + else if ( vad_flag_dtx[0] == 0 ) + { + hStereoDft->resetFrames = 0; + } + else + { + if ( hStereoDft->resetFrames < CORR_RESET_FRAMES_MAX + 1 ) + { + hStereoDft->resetFrames++; + } + if ( !vad_hover_flag[0] && !vad_hover_flag[1] ) + { + hStereoDft->expectedNumUpdates = hStereoDft->currentNumUpdates; + } + } + } +#endif +#ifdef FIX_ITD_CNG + if ( ( vad_flag_dtx[0] == 0 ) || ( hCPE->hFrontVad[0] == NULL && ( hCPE->hCoreCoder[0]->last_core_brate == SID_2k40 || hCPE->hCoreCoder[0]->last_core_brate == FRAME_NO_DATA ) ) || hCPE->hStereoCng->first_SID_after_TD ) +#else if ( hCPE->hCoreCoder[0]->last_core_brate == SID_2k40 || hCPE->hCoreCoder[0]->last_core_brate == FRAME_NO_DATA || hCPE->hStereoCng->first_SID_after_TD ) +#endif { - for ( i = 1; i < NFFT / 2; i++ ) +#ifdef FIX_ITD_CNG + if ( vad_flag_dtx[0] == 0 ) { + /* expectedNumUpdates updated after call to dtx() in SID frames */ + cng_xcorr_filt = max( min( CORR_FILT, 10.0f * CORR_FILT / ( hStereoDft->expectedNumUpdates + hStereoDft->currentNumUpdates ) ), sfm_L ); + hStereoDft->currentNumUpdates++; + } + else /* use sfm for active frames */ + { + cng_xcorr_filt = sfm_L; + } + + /* Copy state of xspec_smooth to xcorr_smooth in first CNG frame */ + if ( hCPE->hStereoCng->cng_counter == 0 && vad_flag_dtx[0] == 0 ) + { + mvr2r( hStereoDft->xspec_smooth, hItd->xcorr_smooth, NFFT ); + } +#endif + for ( i = 1; i < NFFT / 2; i++ ) + { /* Low pass filter cross L/R power spectrum */ hStereoDft->xspec_smooth[2 * i] = ( 1.f - XSPEC_ALPHA ) * hStereoDft->xspec_smooth[2 * i] + XSPEC_ALPHA * xcorr[2 * i]; hStereoDft->xspec_smooth[2 * i + 1] = ( 1.f - XSPEC_ALPHA ) * hStereoDft->xspec_smooth[2 * i + 1] + XSPEC_ALPHA * xcorr[2 * i + 1]; - +#ifdef FIX_ITD_CNG + hItd->xcorr_smooth[2 * i] = ( 1.f - cng_xcorr_filt ) * hItd->xcorr_smooth[2 * i] + cng_xcorr_filt * xcorr[2 * i]; + hItd->xcorr_smooth[2 * i + 1] = ( 1.f - cng_xcorr_filt ) * hItd->xcorr_smooth[2 * i + 1] + cng_xcorr_filt * xcorr[2 * i + 1]; +#else hItd->xcorr_smooth[2 * i] = ( 1.f - sfm_L ) * hItd->xcorr_smooth[2 * i] + sfm_L * xcorr[2 * i]; hItd->xcorr_smooth[2 * i + 1] = ( 1.f - sfm_L ) * hItd->xcorr_smooth[2 * i + 1] + sfm_L * xcorr[2 * i + 1]; - +#endif tmpf1 = sqrtf( hItd->xcorr_smooth[i * 2] * hItd->xcorr_smooth[i * 2] + hItd->xcorr_smooth[i * 2 + 1] * hItd->xcorr_smooth[i * 2 + 1] ); tmpf1 += EPSILON; tmpf2 = tmpf1; diff --git a/lib_enc/ivas_stereo_dft_td_itd.c b/lib_enc/ivas_stereo_dft_td_itd.c index 84dfee6980..c8d97b6563 100644 --- a/lib_enc/ivas_stereo_dft_td_itd.c +++ b/lib_enc/ivas_stereo_dft_td_itd.c @@ -384,6 +384,10 @@ void stereo_td_itd( void stereo_td_itd_mdct_stereo( CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder handle */ +#ifdef FIX_ITD_CNG + const int16_t vad_flag_dtx[], /* i: VAD dtx flags */ + const int16_t vad_hover_flag[], /* i: VAD hangover flags */ +#endif const int16_t input_frame /* i : frame length */ ) { @@ -411,7 +415,11 @@ void stereo_td_itd_mdct_stereo( stereo_dft_enc_analyze( hCPE->hCoreCoder, CPE_CHANNELS, input_frame, NULL, hStereoMdct, DFT, hCPE->input_mem ); /*call ITD function*/ +#ifdef FIX_ITD_CNG + stereo_dft_enc_compute_itd( hCPE, DFT[0], DFT[1], STEREO_DFT_OFFSET, input_frame, vad_flag_dtx, vad_hover_flag, bin_nrgL, bin_nrgR ); +#else stereo_dft_enc_compute_itd( hCPE, DFT[0], DFT[1], STEREO_DFT_OFFSET, input_frame, bin_nrgL, bin_nrgR ); +#endif /* Time Domain ITD compensation using extrapolation */ #ifdef DEBUG_MODE_DFT diff --git a/lib_enc/pre_proc.c b/lib_enc/pre_proc.c index a17612290f..b3e04cecaa 100644 --- a/lib_enc/pre_proc.c +++ b/lib_enc/pre_proc.c @@ -211,7 +211,12 @@ void pre_proc( st->vad_flag = vad_flag_cldfb; } - vad_flag_dtx = dtx_hangover_addition( st, st->vad_flag, st->lp_speech - st->lp_noise, cldfb_addition, vad_hover_flag, NULL, NULL ); + vad_flag_dtx = dtx_hangover_addition( st, st->vad_flag, st->lp_speech - st->lp_noise, cldfb_addition, vad_hover_flag, NULL, NULL +#ifdef FIX_ITD_CNG + , + NULL +#endif + ); /*----------------------------------------------------------------* * NB/WB/SWB/FB bandwidth detector diff --git a/lib_enc/vad.c b/lib_enc/vad.c index 8d3c0beb4f..b095ffa6e5 100644 --- a/lib_enc/vad.c +++ b/lib_enc/vad.c @@ -162,6 +162,10 @@ int16_t dtx_hangover_addition( int16_t *vad_hover_flag, /* o : VAD hangover flag */ VAD_HANDLE hVAD, /* i/o: VAD handle for L or R channel */ NOISE_EST_HANDLE hNoiseEst /* i : Noise estimation handle */ +#ifdef FIX_ITD_CNG + , + int16_t *rem_dtx_ho /* o : Expected remaining hangover frames */ +#endif ) { int16_t hangover_short_dtx, flag_dtx; @@ -303,6 +307,12 @@ int16_t dtx_hangover_addition( if ( flag_dtx != 0 && st->localVAD == 0 ) { *vad_hover_flag = 1; +#ifdef FIX_ITD_CNG + if ( rem_dtx_ho != NULL ) + { + *rem_dtx_ho = max( hangover_short_dtx - hVAD->hangover_cnt_dtx, 0 ); + } +#endif } return flag_dtx; -- GitLab From c1091a4b803cef93de6266b83ca3fb6982be0ee8 Mon Sep 17 00:00:00 2001 From: Fredrik Jansson Date: Thu, 20 Oct 2022 08:58:54 +0200 Subject: [PATCH 2/3] Updated FIX_ITD_CNG --- lib_dec/ivas_stat_dec.h | 4 +++ lib_dec/ivas_stereo_dft_dec.c | 51 +++++++++++++++++++++++++++++++ lib_enc/ivas_stat_enc.h | 4 +++ lib_enc/ivas_stereo_dft_enc.c | 30 ++++++++++++++++-- lib_enc/ivas_stereo_dft_enc_itd.c | 1 + 5 files changed, 88 insertions(+), 2 deletions(-) diff --git a/lib_dec/ivas_stat_dec.h b/lib_dec/ivas_stat_dec.h index 95dbddb599..d8a1a279ac 100644 --- a/lib_dec/ivas_stat_dec.h +++ b/lib_dec/ivas_stat_dec.h @@ -150,6 +150,10 @@ typedef struct stereo_dft_dec_data_struct int16_t itd_xfade_counter; float itd_xfade_prev; int32_t last_active_element_brate; + float ipd_xfade_target; + float ipd_xfade_step; + int16_t ipd_xfade_counter; + float ipd_xfade_prev; #endif /*residual prediction*/ diff --git a/lib_dec/ivas_stereo_dft_dec.c b/lib_dec/ivas_stereo_dft_dec.c index a8eb063cd9..96732a0b3a 100644 --- a/lib_dec/ivas_stereo_dft_dec.c +++ b/lib_dec/ivas_stereo_dft_dec.c @@ -486,6 +486,10 @@ void stereo_dft_dec_reset( hStereoDft->itd_xfade_counter = 0; hStereoDft->itd_xfade_prev = 0.0f; hStereoDft->last_active_element_brate = 0; + hStereoDft->ipd_xfade_target = 0.0f; + hStereoDft->ipd_xfade_step = 0.0f; + hStereoDft->ipd_xfade_counter = 0; + hStereoDft->ipd_xfade_prev = 0.0f; #endif return; @@ -1796,6 +1800,7 @@ void stereo_dft_dec_read_BS( *total_brate = 0; #ifdef FIX_ITD_CNG hStereoDft->itd[k = hStereoDft->prm_res[k_offset] - 1 + k_offset] = hStereoDft->itd_xfade_target; + hStereoDft->gipd[hStereoDft->prm_res[k_offset] - 1 + k_offset] = hStereoDft->ipd_xfade_target; #endif return; @@ -2835,6 +2840,49 @@ void stereo_dft_dec_smooth_parameters( *( hStereoDft->side_gain + ( ( k + k_offset ) - 1 ) * STEREO_DFT_BAND_MAX + b ) = *( hStereoDft->side_gain + ( k + k_offset ) * STEREO_DFT_BAND_MAX + b ); } +#ifdef FIX_ITD_CNG + if ( hStereoDft->frame_sid_nodata ) + { + /* set new xfade target if new itd received */ + if ( hStereoDft->gipd[k + k_offset] != hStereoDft->ipd_xfade_target ) + { + if ( ( hStereoDft->gipd[k + k_offset] - hStereoDft->ipd_xfade_prev ) > EVS_PI ) + { + hStereoDft->ipd_xfade_target = hStereoDft->gipd[k + k_offset] - 2 * EVS_PI; + hStereoDft->ipd_xfade_step = ( hStereoDft->ipd_xfade_target - hStereoDft->ipd_xfade_prev ) / ( STEREO_DFT_ITD_CNG_XFADE - hStereoDft->ipd_xfade_counter ); + } + else if ( ( hStereoDft->ipd_xfade_prev - hStereoDft->gipd[k + k_offset] ) > EVS_PI ) + { + hStereoDft->ipd_xfade_target = hStereoDft->gipd[k + k_offset] + 2 * EVS_PI; + hStereoDft->ipd_xfade_step = ( hStereoDft->ipd_xfade_target - hStereoDft->ipd_xfade_prev ) / ( STEREO_DFT_ITD_CNG_XFADE - hStereoDft->ipd_xfade_counter ); + } + else + { + hStereoDft->ipd_xfade_target = hStereoDft->gipd[k + k_offset]; + hStereoDft->ipd_xfade_step = ( hStereoDft->ipd_xfade_target - hStereoDft->ipd_xfade_prev ) / ( STEREO_DFT_ITD_CNG_XFADE - hStereoDft->ipd_xfade_counter ); + } + } + + /* xfade */ + if ( hStereoDft->ipd_xfade_prev != hStereoDft->ipd_xfade_target && hStereoDft->ipd_xfade_counter < STEREO_DFT_ITD_CNG_XFADE && hStereoDft->last_active_element_brate <= 24400 ) + { + hStereoDft->gipd[k + k_offset] = hStereoDft->ipd_xfade_prev + hStereoDft->ipd_xfade_step; + hStereoDft->ipd_xfade_prev = hStereoDft->gipd[k + k_offset]; + hStereoDft->ipd_xfade_counter++; + } + } + else + { + /* First active frame, "reset" everything if long enough active encoding, only triggered if STEREO_DFT_ITD_CNG_XFADE_RESET = -1 */ + if ( active_frame_counter > STEREO_DFT_ITD_CNG_XFADE_RESET ) + { + hStereoDft->ipd_xfade_target = hStereoDft->gipd[k + k_offset]; + hStereoDft->ipd_xfade_prev = hStereoDft->gipd[k + k_offset]; + hStereoDft->ipd_xfade_counter = 0; + } + } +#endif + for ( k2 = 1; k2 < hStereoDft->prm_res[k + k_offset]; k2++ ) { hStereoDft->gipd[( k + k_offset ) - k2] = hStereoDft->gipd[k + k_offset]; @@ -2887,6 +2935,9 @@ void stereo_dft_dec_smooth_parameters( hStereoDft->itd_xfade_counter = 0; hStereoDft->itd_xfade_target = hStereoDft->itd[STEREO_DFT_NBDIV - 1]; hStereoDft->itd_xfade_prev = hStereoDft->itd[STEREO_DFT_NBDIV - 1]; + hStereoDft->ipd_xfade_counter = 0; + hStereoDft->ipd_xfade_target = hStereoDft->gipd[STEREO_DFT_NBDIV - 1]; + hStereoDft->ipd_xfade_prev = hStereoDft->gipd[STEREO_DFT_NBDIV - 1]; } #endif diff --git a/lib_enc/ivas_stat_enc.h b/lib_enc/ivas_stat_enc.h index a3ad1248d7..bc2651824e 100644 --- a/lib_enc/ivas_stat_enc.h +++ b/lib_enc/ivas_stat_enc.h @@ -194,6 +194,10 @@ typedef struct stereo_dft_enc_data_struct float Spd_R_smooth[STEREO_DFT_N_32k_ENC / 2]; float sid_gipd; int16_t coh_fade_counter; +#ifdef FIX_ITD_CNG + float prev_sid_gipd; + int16_t prev_sid_no_ipd_flag; +#endif /*IPD*/ float gipd[STEREO_DFT_ENC_DFT_NB]; diff --git a/lib_enc/ivas_stereo_dft_enc.c b/lib_enc/ivas_stereo_dft_enc.c index 6759cbf2f9..9adacddb16 100644 --- a/lib_enc/ivas_stereo_dft_enc.c +++ b/lib_enc/ivas_stereo_dft_enc.c @@ -62,6 +62,9 @@ static FILE *pF = NULL; #define STEREO_DFT_NRG_PAST_MAX_BAND_LB 4 #define STEREO_DFT_DMX_CROSSOVER ( int16_t )( 132 * ( (float) ( STEREO_DFT_N_NS_ENC ) / STEREO_DFT_N_NS ) + 0.5f ) /* crossover bin between binwise and bandwise DMX */ #define ITD_VAD_E_BAND_N_INIT 200000 +#ifdef FIX_ITD_CNG +#define ITD_SID_PREV_FRAMES 5 +#endif /*------------------------------------------------------------------------- @@ -494,7 +497,10 @@ void stereo_dft_enc_reset( #ifdef FIX_ITD_CNG hStereoDft->currentNumUpdates = 0; hStereoDft->expectedNumUpdates = FIXED_SID_RATE; - hStereoDft->resetFrames = 0; + hStereoDft->resetFrames = 0; + hStereoDft->sid_gipd = 0; + hStereoDft->prev_sid_gipd = 0; + hStereoDft->prev_sid_no_ipd_flag = 1; #endif hStereoDft->coh_fade_counter = 0; @@ -1384,6 +1390,23 @@ void stereo_dft_enc_process( /* DFT stereo parameters */ stereo_dft_enc_compute_prm( hStereoDft, pDFT_L, pDFT_R, k_offset, 1, hCPE->hCoreCoder[0]->sp_aud_decision0, hCPE->hCoreCoder[0]->vad_flag, bin_nrgL, bin_nrgR, dot_prod_nrg_ratio ); +#ifdef FIX_ITD_CNG + if ( vad_flag_dtx[0] == 0 ) + { + if ( hCPE->hStereoCng->cng_counter == 0 && !hCPE->hStereoCng->first_SID_after_TD ) + { + hStereoDft->sid_gipd = hStereoDft->prev_sid_gipd; + hStereoDft->no_ipd_flag = hStereoDft->prev_sid_no_ipd_flag; + } + + if ( hCPE->hStereoCng->cng_counter > ITD_SID_PREV_FRAMES ) + { + hStereoDft->prev_sid_gipd = hStereoDft->sid_gipd; + hStereoDft->prev_sid_no_ipd_flag = hStereoDft->no_ipd_flag; + } + } +#endif + /*----------------------------------------------------------------* * UNCLR classifier (detection of uncorrelated L and R channels) *----------------------------------------------------------------*/ @@ -1440,7 +1463,11 @@ void stereo_dft_enc_process( } } +#ifdef FIX_ITD_CNG + if ( b < hStereoDft->res_cod_band_max && vad_flag_dtx[0] ) +#else if ( b < hStereoDft->res_cod_band_max ) +#endif { #ifdef DEBUGGING assert( hStereoDft->nbands == hStereoDft->nbands_dmx && "Don't use coarser stereo parameter resolution for residual coding bitrates!" ); @@ -2437,7 +2464,6 @@ void stereo_dft_enc_write_BS( #endif ( *nb_bits ) += nb; - /*----------------------------------------------------------------* * Residual prediction *----------------------------------------------------------------*/ diff --git a/lib_enc/ivas_stereo_dft_enc_itd.c b/lib_enc/ivas_stereo_dft_enc_itd.c index 3961865e56..c679613dd6 100644 --- a/lib_enc/ivas_stereo_dft_enc_itd.c +++ b/lib_enc/ivas_stereo_dft_enc_itd.c @@ -1132,6 +1132,7 @@ void stereo_dft_enc_compute_itd( /* expectedNumUpdates updated after call to dtx() in SID frames */ cng_xcorr_filt = max( min( CORR_FILT, 10.0f * CORR_FILT / ( hStereoDft->expectedNumUpdates + hStereoDft->currentNumUpdates ) ), sfm_L ); hStereoDft->currentNumUpdates++; + hStereoDft->sfm = cng_xcorr_filt; } else /* use sfm for active frames */ { -- GitLab From 030f5f870bbce6715664f22deadb6eaed0ed893b Mon Sep 17 00:00:00 2001 From: Fredrik Jansson Date: Fri, 2 Dec 2022 10:26:52 +0100 Subject: [PATCH 3/3] Apply clang format --- lib_com/prot.h | 2 +- lib_dec/ivas_stereo_dft_dec.c | 5 ++--- lib_enc/ivas_core_pre_proc_front.c | 2 +- lib_enc/ivas_stat_enc.h | 14 +++++++------- lib_enc/ivas_stereo_cng_enc.c | 12 ++++++------ lib_enc/ivas_stereo_dft_enc.c | 6 +++--- lib_enc/ivas_stereo_dft_enc_itd.c | 16 ++++++++-------- lib_enc/ivas_stereo_dft_td_itd.c | 2 +- lib_enc/vad.c | 2 +- 9 files changed, 30 insertions(+), 31 deletions(-) diff --git a/lib_com/prot.h b/lib_com/prot.h index c6b2c07539..0800ac92f5 100755 --- a/lib_com/prot.h +++ b/lib_com/prot.h @@ -3839,7 +3839,7 @@ int16_t dtx_hangover_addition( NOISE_EST_HANDLE hNoiseEst /* i : Noise estimation handle */ #ifdef FIX_ITD_CNG , - int16_t *rem_dtx_ho /* o : Expected remaining hangover frames */ + int16_t *rem_dtx_ho /* o : Expected remaining hangover frames */ #endif ); diff --git a/lib_dec/ivas_stereo_dft_dec.c b/lib_dec/ivas_stereo_dft_dec.c index bcae1b48bf..32d3510f80 100644 --- a/lib_dec/ivas_stereo_dft_dec.c +++ b/lib_dec/ivas_stereo_dft_dec.c @@ -2776,8 +2776,8 @@ void stereo_dft_dec_smooth_parameters( const int16_t prev_sid_nodata /* i : Previous SID/No data indicator */ #ifdef FIX_ITD_CNG , - const int16_t active_frame_counter, /* i : Active frame counter */ - const int32_t element_brate /* i : Element bitrate */ + const int16_t active_frame_counter, /* i : Active frame counter */ + const int32_t element_brate /* i : Element bitrate */ #endif ) { @@ -2877,7 +2877,6 @@ void stereo_dft_dec_smooth_parameters( } hStereoDft->last_active_element_brate = element_brate; - } #endif for ( k2 = 1; k2 < hStereoDft->prm_res[k + k_offset]; k2++ ) diff --git a/lib_enc/ivas_core_pre_proc_front.c b/lib_enc/ivas_core_pre_proc_front.c index 26bfa6cc1e..e38f81a79f 100644 --- a/lib_enc/ivas_core_pre_proc_front.c +++ b/lib_enc/ivas_core_pre_proc_front.c @@ -455,7 +455,7 @@ ivas_error pre_proc_front_ivas( if ( ( hCPE != NULL && !( lr_vad_enabled && st->idchan == 0 ) ) || hSCE != NULL ) { - *vad_flag_dtx = dtx_hangover_addition( st, st->vad_flag, st->lp_speech - st->lp_noise, 0, vad_hover_flag, NULL, NULL + *vad_flag_dtx = dtx_hangover_addition( st, st->vad_flag, st->lp_speech - st->lp_noise, 0, vad_hover_flag, NULL, NULL #ifdef FIX_ITD_CNG , NULL diff --git a/lib_enc/ivas_stat_enc.h b/lib_enc/ivas_stat_enc.h index 1b59a26a6f..dfe32a9a02 100644 --- a/lib_enc/ivas_stat_enc.h +++ b/lib_enc/ivas_stat_enc.h @@ -235,7 +235,7 @@ typedef struct stereo_dft_enc_data_struct #endif -#ifdef FIX_ITD_CNG +#ifdef FIX_ITD_CNG int16_t currentNumUpdates; int16_t expectedNumUpdates; /* Expected number of frames before use of ITD estimate */ int16_t resetFrames; @@ -568,7 +568,7 @@ typedef struct front_vad_enc float *delay_buf; int16_t delay_samples; #ifdef FIX_ITD_CNG - int16_t rem_dtx_ho; /* Remaining hangover frames */ + int16_t rem_dtx_ho; /* Remaining hangover frames */ #endif } FRONT_VAD_ENC, *FRONT_VAD_ENC_HANDLE; @@ -823,14 +823,14 @@ typedef struct stereo_cng_enc float prev_sg_average[STEREO_DFT_ERB4_BANDS]; /* Previous sidegain average */ float mem_cohBand[STEREO_DFT_BAND_MAX / 2]; /* Coherence memory */ #ifdef FIX_ITD_CNG - float prev_cohBand[2*(STEREO_DFT_BAND_MAX/2)];/* Previous coherence */ - int16_t cng_counter; /* Counter for cng period length */ + float prev_cohBand[2 * ( STEREO_DFT_BAND_MAX / 2 )]; /* Previous coherence */ + int16_t cng_counter; /* Counter for cng period length */ #else float coh_crossfade[STEREO_DFT_BAND_MAX / 2]; /* Coherence memory */ #endif - int16_t td_active; /* TD-stereo indication */ - int16_t first_SID_after_TD; /* Set if first SID frame after TD stereo */ - int16_t first_SID; /* Set if first SID frame since codec start */ + int16_t td_active; /* TD-stereo indication */ + int16_t first_SID_after_TD; /* Set if first SID frame after TD stereo */ + int16_t first_SID; /* Set if first SID frame since codec start */ } STEREO_CNG_ENC, *STEREO_CNG_ENC_HANDLE; diff --git a/lib_enc/ivas_stereo_cng_enc.c b/lib_enc/ivas_stereo_cng_enc.c index 6c13f1f578..68be66c2a1 100644 --- a/lib_enc/ivas_stereo_cng_enc.c +++ b/lib_enc/ivas_stereo_cng_enc.c @@ -63,15 +63,15 @@ * ---------------------------------------------------------------*/ void stereo_dft_enc_sid_calc_coh( - STEREO_DFT_ENC_DATA_HANDLE hStereoDft, /* i/o: DFT stereo handle */ + STEREO_DFT_ENC_DATA_HANDLE hStereoDft, /* i/o: DFT stereo handle */ #ifdef FIX_ITD_CNG float prev_cohBand[2 * ( STEREO_DFT_BAND_MAX / 2 )], /* i/o: Previous coherence */ #else float coh_crossfade[STEREO_DFT_BAND_MAX / 2], /* i/o: Coherence crossfade memory */ #endif - int16_t *td_active, /* i/o: TD stereo mode indicator */ - int16_t *first_SID, /* i/o: First SID indicator */ - float *cohBand /* i/o: Coherence per band */ + int16_t *td_active, /* i/o: TD stereo mode indicator */ + int16_t *first_SID, /* i/o: First SID indicator */ + float *cohBand /* i/o: Coherence per band */ ) { int16_t b, k; @@ -130,7 +130,7 @@ void stereo_dft_enc_sid_calc_coh( { #ifdef FIX_ITD_CNG mvr2r( cohBand, prev_cohBand, hStereoDft->nbands ); - mvr2r( prev_cohBand, &( prev_cohBand[ STEREO_DFT_BAND_MAX / 2 ] ), hStereoDft->nbands ); + mvr2r( prev_cohBand, &( prev_cohBand[STEREO_DFT_BAND_MAX / 2] ), hStereoDft->nbands ); #else mvr2r( cohBand, coh_crossfade, hStereoDft->nbands ); #endif @@ -543,7 +543,7 @@ void stereo_cng_upd_counters( const int16_t burst_ho_count /* i : Hang-over count */ #ifdef FIX_ITD_CNG , - int16_t *coh_fade_counter /* i : Coherence fade counter */ + int16_t *coh_fade_counter /* i : Coherence fade counter */ #endif ) { diff --git a/lib_enc/ivas_stereo_dft_enc.c b/lib_enc/ivas_stereo_dft_enc.c index aa18c059a1..e54b98a942 100644 --- a/lib_enc/ivas_stereo_dft_enc.c +++ b/lib_enc/ivas_stereo_dft_enc.c @@ -494,10 +494,10 @@ void stereo_dft_enc_reset( set_f( hStereoDft->Spd_L_smooth, 1.0f, STEREO_DFT_N_32k_ENC / 2 ); set_f( hStereoDft->Spd_R_smooth, 1.0f, STEREO_DFT_N_32k_ENC / 2 ); -#ifdef FIX_ITD_CNG +#ifdef FIX_ITD_CNG hStereoDft->currentNumUpdates = 0; hStereoDft->expectedNumUpdates = FIXED_SID_RATE; - hStereoDft->resetFrames = 0; + hStereoDft->resetFrames = 0; hStereoDft->sid_gipd = 0; hStereoDft->prev_sid_gipd = 0; hStereoDft->prev_sid_no_ipd_flag = 1; @@ -1234,7 +1234,7 @@ float stereo_dft_enc_synthesize( *-------------------------------------------------------------------------*/ void stereo_dft_enc_process( - CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ + CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ #ifdef FIX_ITD_CNG const int16_t vad_flag_dtx[], /* i: VAD dtx flags */ const int16_t vad_hover_flag[], /* i: VAD hangover flags */ diff --git a/lib_enc/ivas_stereo_dft_enc_itd.c b/lib_enc/ivas_stereo_dft_enc_itd.c index c679613dd6..e917c36e36 100644 --- a/lib_enc/ivas_stereo_dft_enc_itd.c +++ b/lib_enc/ivas_stereo_dft_enc_itd.c @@ -68,7 +68,7 @@ #define XSPEC_ALPHA ( 1.f / 32 ) #ifdef FIX_ITD_CNG -#define CORR_FILT 0.8f +#define CORR_FILT 0.8f #define CORR_RESET_FRAMES_MAX 20 #endif @@ -727,7 +727,7 @@ void stereo_dft_enc_compute_itd( const int16_t k_offset, const int16_t input_frame, #ifdef FIX_ITD_CNG - const int16_t vad_flag_dtx[], + const int16_t vad_flag_dtx[], const int16_t vad_hover_flag[], #endif float *bin_nrgL, @@ -940,7 +940,7 @@ void stereo_dft_enc_compute_itd( #ifdef FIX_ITD_CNG vad_flag_itd = vad_flag_itd && vad_flag_dtx[0]; -#endif +#endif if ( sum_nrg_L < EPSILON ) { @@ -1076,7 +1076,7 @@ void stereo_dft_enc_compute_itd( if ( vad_hover_flag[0] && vad_hover_flag[1] ) { /* Determine if we are in the first DTX hangover frame (also triggers for VAD hangover frame) */ - if ( hStereoDft->resetFrames > CORR_RESET_FRAMES_MAX ) + if ( hStereoDft->resetFrames > CORR_RESET_FRAMES_MAX ) { /* Reset cross spectrum when there is hangover */ set_f( hStereoDft->xspec_smooth, 0.0f, STEREO_DFT_N_32k_ENC ); @@ -1108,17 +1108,17 @@ void stereo_dft_enc_compute_itd( hStereoDft->resetFrames = 0; } else - { + { if ( hStereoDft->resetFrames < CORR_RESET_FRAMES_MAX + 1 ) { - hStereoDft->resetFrames++; + hStereoDft->resetFrames++; } if ( !vad_hover_flag[0] && !vad_hover_flag[1] ) { hStereoDft->expectedNumUpdates = hStereoDft->currentNumUpdates; } } - } + } #endif #ifdef FIX_ITD_CNG if ( ( vad_flag_dtx[0] == 0 ) || ( hCPE->hFrontVad[0] == NULL && ( hCPE->hCoreCoder[0]->last_core_brate == SID_2k40 || hCPE->hCoreCoder[0]->last_core_brate == FRAME_NO_DATA ) ) || hCPE->hStereoCng->first_SID_after_TD ) @@ -1146,7 +1146,7 @@ void stereo_dft_enc_compute_itd( } #endif for ( i = 1; i < NFFT / 2; i++ ) - { + { /* Low pass filter cross L/R power spectrum */ hStereoDft->xspec_smooth[2 * i] = ( 1.f - XSPEC_ALPHA ) * hStereoDft->xspec_smooth[2 * i] + XSPEC_ALPHA * xcorr[2 * i]; hStereoDft->xspec_smooth[2 * i + 1] = ( 1.f - XSPEC_ALPHA ) * hStereoDft->xspec_smooth[2 * i + 1] + XSPEC_ALPHA * xcorr[2 * i + 1]; diff --git a/lib_enc/ivas_stereo_dft_td_itd.c b/lib_enc/ivas_stereo_dft_td_itd.c index c8d97b6563..fd4cbdde71 100644 --- a/lib_enc/ivas_stereo_dft_td_itd.c +++ b/lib_enc/ivas_stereo_dft_td_itd.c @@ -383,7 +383,7 @@ void stereo_td_itd( * ---------------------------------------------------------------*/ void stereo_td_itd_mdct_stereo( - CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder handle */ + CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder handle */ #ifdef FIX_ITD_CNG const int16_t vad_flag_dtx[], /* i: VAD dtx flags */ const int16_t vad_hover_flag[], /* i: VAD hangover flags */ diff --git a/lib_enc/vad.c b/lib_enc/vad.c index b095ffa6e5..98b6ad240e 100644 --- a/lib_enc/vad.c +++ b/lib_enc/vad.c @@ -164,7 +164,7 @@ int16_t dtx_hangover_addition( NOISE_EST_HANDLE hNoiseEst /* i : Noise estimation handle */ #ifdef FIX_ITD_CNG , - int16_t *rem_dtx_ho /* o : Expected remaining hangover frames */ + int16_t *rem_dtx_ho /* o : Expected remaining hangover frames */ #endif ) { -- GitLab