diff --git a/lib_com/ivas_cnst.h b/lib_com/ivas_cnst.h index 9d5e02c66fbe976b97e5558deffb6b10b196fe62..c575be0ee3c46a51f4349702ab125e9ec381f90c 100644 --- a/lib_com/ivas_cnst.h +++ b/lib_com/ivas_cnst.h @@ -469,6 +469,11 @@ enum #define STEREO_DFT_OFFSET 1 #define STEREO_DFT_NBDIV 2 +#ifdef FIX_ITD_CNG +#define STEREO_DFT_ITD_CNG_XFADE 100 +#define STEREO_DFT_ITD_CNG_XFADE_RESET 2 +#endif + #define STEREO_DFT_DELAY_DEC_BWE_NS ( STEREO_DFT_OFFSET * STEREO_DFT_HOP_NS - ACELP_LOOK_NS ) /* 1.25ms/2.5ms: max delay for core decoder*/ #define STEREO_DFT_ENC_DFT_NB ( STEREO_DFT_OFFSET + 1 ) /*frame + lookahead*/ @@ -534,6 +539,10 @@ typedef enum #define STEREO_DFT_SID_GIPD_NBITS 2 #define STEREO_DFT_FD_FILT 0.9f +#ifdef FIX_ITD_CNG +#define STEREO_DFT_CNG_ITD_CNT 8 +#endif + /*Residual prediction*/ #define STEREO_DFT_PAST_MAX 4 #define STEREO_DFT_RES_PRED_BAND_MAX 12 diff --git a/lib_com/ivas_prot.h b/lib_com/ivas_prot.h index 4dfc6c0dfd502fb85dcaa7a5ee1d59af738e6517..2d7ef742103c0ff6994b500c2a5711053e40c3ee 100644 --- a/lib_com/ivas_prot.h +++ b/lib_com/ivas_prot.h @@ -925,6 +925,10 @@ float stereo_dft_enc_synthesize( void stereo_dft_enc_process( CPE_ENC_HANDLE hCPE, /* i : CPE encoder structure */ +#ifdef FIX_ITD_CNG + const int16_t vad_flag_dtx[], /* i: VAD dtx flags */ + const int16_t vad_hover_flag[], /* i: VAD hangover flags */ +#endif const int16_t input_frame /* i : input frame length */ ); @@ -973,7 +977,11 @@ void stereo_dft_dequantize_itd( void stereo_dft_enc_sid_calc_coh( STEREO_DFT_ENC_DATA_HANDLE hStereoDft, /* i/o: DFT stereo handle */ +#ifdef FIX_ITD_CNG + float prev_cohBand[2*(STEREO_DFT_BAND_MAX/2)], /* i/o: Previous coherence */ +#else float coh_crossfade[STEREO_DFT_BAND_MAX / 2], /* i/o: Coherence crossfade memory */ +#endif int16_t *td_active, /* i/o: TD stereo mode indicator */ int16_t *first_SID, /* i/o: First SID indicator */ float *cohBand /* i/o: Coherence per band */ @@ -1126,6 +1134,11 @@ void stereo_dft_dec_read_BS( void stereo_dft_dec_smooth_parameters( STEREO_DFT_DEC_DATA_HANDLE hStereoDft, /* i/o: decoder DFT stereo handle */ const int16_t prev_sid_nodata /* i : Previous SID/No data indicator */ +#ifdef FIX_ITD_CNG + , + const int16_t active_frame_counter, /* i : Active frame counter */ + const int32_t element_brate /* i : Element bitrate */ +#endif ); void stereo_dft_generate_res_pred( @@ -1281,6 +1294,10 @@ void stereo_dft_enc_compute_itd( float *DFT_R, const int16_t k_offset, const int16_t input_frame, +#ifdef FIX_ITD_CNG + const int16_t vad_flag_dtx[], + const int16_t vad_hover_flag[], +#endif float *bin_nrgL, float *bin_nrgR ); @@ -1836,6 +1853,10 @@ void deindex_lvq_SHB( void stereo_td_itd_mdct_stereo( CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder handle */ +#ifdef FIX_ITD_CNG + const int16_t vad_flag_dtx[], /* i: VAD dtx flags */ + const int16_t vad_hover_flag[], /* i: VAD hangover flags */ +#endif const int16_t input_frame /* i : frame length */ ); @@ -2395,6 +2416,10 @@ void stereo_cng_upd_counters( const int16_t nbands, /* i : Number of bands in active */ const float sidSideGain[], /* i : SID side gains */ const int16_t burst_ho_count /* i : Hang-over count */ +#ifdef FIX_ITD_CNG + , + int16_t *coh_fade_counter /* i : Coherence fade counter */ +#endif ); void stereo_cng_init_dec( diff --git a/lib_com/options.h b/lib_com/options.h index 388f61bd95bb6d32042c706bdf9b3668e9c87278..b5063ecf3954295412f94ce99af490a76d7883cf 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -177,6 +177,8 @@ #define FIX_ISM_DECODER_PRINTOUT /* Issue 229: fix ISM decoder printout */ +#define FIX_ITD_CNG /* Eri: Fix for CNG ITD */ + /* ################## End DEVELOPMENT switches ######################### */ /* clang-format on */ #endif diff --git a/lib_com/prot.h b/lib_com/prot.h index 57a161f225a9cf830f05d4a4e6b3224c037b70c9..0800ac92f512ca10e19a0c5837f7b4d9d1dbbda7 100755 --- a/lib_com/prot.h +++ b/lib_com/prot.h @@ -3837,6 +3837,10 @@ int16_t dtx_hangover_addition( int16_t *vad_hover_flag, /* o : VAD hangover flag */ VAD_HANDLE hVAD, /* i/o: VAD handle for L or R channel */ NOISE_EST_HANDLE hNoiseEst /* i : Noise estimation handle */ +#ifdef FIX_ITD_CNG + , + int16_t *rem_dtx_ho /* o : Expected remaining hangover frames */ +#endif ); int16_t wb_vad( diff --git a/lib_dec/ivas_stat_dec.h b/lib_dec/ivas_stat_dec.h index 7e867a7f19c695810a43a32308ff77e79fe4e94b..552435db66825cee3a7bc2a32a047be70428d7a3 100644 --- a/lib_dec/ivas_stat_dec.h +++ b/lib_dec/ivas_stat_dec.h @@ -147,6 +147,18 @@ typedef struct stereo_dft_dec_data_struct float itd[STEREO_DFT_DEC_DFT_NB]; +#ifdef FIX_ITD_CNG + float itd_xfade_step; + float itd_xfade_target; + int16_t itd_xfade_counter; + float itd_xfade_prev; + int32_t last_active_element_brate; + float ipd_xfade_target; + float ipd_xfade_step; + int16_t ipd_xfade_counter; + float ipd_xfade_prev; +#endif + /*residual prediction*/ int16_t res_pred_mode[STEREO_DFT_DEC_DFT_NB]; /* residual prediction mode: 0(off), 1(stereo filling only), 2(enhanced stereo filling) */ float res_pred_gain[STEREO_DFT_DEC_DFT_NB * STEREO_DFT_BAND_MAX]; /* prediction gain for the residual HFs */ diff --git a/lib_dec/ivas_stereo_dft_dec.c b/lib_dec/ivas_stereo_dft_dec.c index 503c7e88f21f2aaa5239eb74eb4f21b47899aa52..32d3510f8019e24f9f5e36a264fb77fb1c35496a 100644 --- a/lib_dec/ivas_stereo_dft_dec.c +++ b/lib_dec/ivas_stereo_dft_dec.c @@ -480,6 +480,18 @@ void stereo_dft_dec_reset( set_zero( hStereoDft->smooth_fac[0], SBA_DIRAC_STEREO_NUM_BANDS ); set_zero( hStereoDft->smooth_fac[1], SBA_DIRAC_STEREO_NUM_BANDS ); +#ifdef FIX_ITD_CNG + hStereoDft->itd_xfade_target = 0.0f; + hStereoDft->itd_xfade_step = 0.0f; + hStereoDft->itd_xfade_counter = 0; + hStereoDft->itd_xfade_prev = 0.0f; + hStereoDft->last_active_element_brate = 0; + hStereoDft->ipd_xfade_target = 0.0f; + hStereoDft->ipd_xfade_step = 0.0f; + hStereoDft->ipd_xfade_counter = 0; + hStereoDft->ipd_xfade_prev = 0.0f; +#endif + return; } @@ -1162,7 +1174,11 @@ void stereo_dft_dec( } else { +#ifdef FIX_ITD_CNG + stereo_dft_dec_smooth_parameters( hStereoDft, hStereoCng->prev_sid_nodata, hStereoCng->active_frame_counter, st0->element_brate ); +#else stereo_dft_dec_smooth_parameters( hStereoDft, hStereoCng->prev_sid_nodata ); +#endif } } @@ -1746,6 +1762,10 @@ void stereo_dft_dec_read_BS( * Initialization *-----------------------------------------------------------------*/ +#ifdef FIX_ITD_CNG + k_offset = STEREO_DFT_OFFSET; +#endif + if ( ivas_total_brate == IVAS_SID_5k2 ) { if ( ivas_format == MASA_FORMAT ) @@ -1770,6 +1790,10 @@ void stereo_dft_dec_read_BS( hStereoDft->frame_sid = 0; *nb_bits = 0; *total_brate = 0; +#ifdef FIX_ITD_CNG + hStereoDft->itd[k = hStereoDft->prm_res[k_offset] - 1 + k_offset] = hStereoDft->itd_xfade_target; + hStereoDft->gipd[hStereoDft->prm_res[k_offset] - 1 + k_offset] = hStereoDft->ipd_xfade_target; +#endif return; } @@ -1800,7 +1824,9 @@ void stereo_dft_dec_read_BS( /*init*/ max_bits = *nb_bits; *nb_bits = 0; +#ifndef FIX_ITD_CNG k_offset = STEREO_DFT_OFFSET; +#endif N_div = STEREO_DFT_NBDIV; if ( ivas_total_brate > IVAS_SID_5k2 ) @@ -2748,6 +2774,11 @@ void stereo_dft_generate_res_pred( void stereo_dft_dec_smooth_parameters( STEREO_DFT_DEC_DATA_HANDLE hStereoDft, /* i/o: decoder DFT stereo handle */ const int16_t prev_sid_nodata /* i : Previous SID/No data indicator */ +#ifdef FIX_ITD_CNG + , + const int16_t active_frame_counter, /* i : Active frame counter */ + const int32_t element_brate /* i : Element bitrate */ +#endif ) { int16_t k_offset, k, k2, b, N_div; @@ -2769,11 +2800,85 @@ void stereo_dft_dec_smooth_parameters( *( hStereoDft->side_gain + ( ( k + k_offset ) - 1 ) * STEREO_DFT_BAND_MAX + b ) = *( hStereoDft->side_gain + ( k + k_offset ) * STEREO_DFT_BAND_MAX + b ); } +#ifdef FIX_ITD_CNG + if ( hStereoDft->frame_sid_nodata ) + { + /* set new xfade target if new itd received */ + if ( hStereoDft->gipd[k + k_offset] != hStereoDft->ipd_xfade_target ) + { + if ( ( hStereoDft->gipd[k + k_offset] - hStereoDft->ipd_xfade_prev ) > EVS_PI ) + { + hStereoDft->ipd_xfade_target = hStereoDft->gipd[k + k_offset] - 2 * EVS_PI; + hStereoDft->ipd_xfade_step = ( hStereoDft->ipd_xfade_target - hStereoDft->ipd_xfade_prev ) / ( STEREO_DFT_ITD_CNG_XFADE - hStereoDft->ipd_xfade_counter ); + } + else if ( ( hStereoDft->ipd_xfade_prev - hStereoDft->gipd[k + k_offset] ) > EVS_PI ) + { + hStereoDft->ipd_xfade_target = hStereoDft->gipd[k + k_offset] + 2 * EVS_PI; + hStereoDft->ipd_xfade_step = ( hStereoDft->ipd_xfade_target - hStereoDft->ipd_xfade_prev ) / ( STEREO_DFT_ITD_CNG_XFADE - hStereoDft->ipd_xfade_counter ); + } + else + { + hStereoDft->ipd_xfade_target = hStereoDft->gipd[k + k_offset]; + hStereoDft->ipd_xfade_step = ( hStereoDft->ipd_xfade_target - hStereoDft->ipd_xfade_prev ) / ( STEREO_DFT_ITD_CNG_XFADE - hStereoDft->ipd_xfade_counter ); + } + } + + /* xfade */ + if ( hStereoDft->ipd_xfade_prev != hStereoDft->ipd_xfade_target && hStereoDft->ipd_xfade_counter < STEREO_DFT_ITD_CNG_XFADE && hStereoDft->last_active_element_brate <= 24400 ) + { + hStereoDft->gipd[k + k_offset] = hStereoDft->ipd_xfade_prev + hStereoDft->ipd_xfade_step; + hStereoDft->ipd_xfade_prev = hStereoDft->gipd[k + k_offset]; + hStereoDft->ipd_xfade_counter++; + } + } + else + { + /* First active frame, "reset" everything if long enough active encoding, only triggered if STEREO_DFT_ITD_CNG_XFADE_RESET = -1 */ + if ( active_frame_counter > STEREO_DFT_ITD_CNG_XFADE_RESET ) + { + hStereoDft->ipd_xfade_target = hStereoDft->gipd[k + k_offset]; + hStereoDft->ipd_xfade_prev = hStereoDft->gipd[k + k_offset]; + hStereoDft->ipd_xfade_counter = 0; + } + } +#endif + for ( k2 = 1; k2 < hStereoDft->prm_res[k + k_offset]; k2++ ) { hStereoDft->gipd[( k + k_offset ) - k2] = hStereoDft->gipd[k + k_offset]; } +#ifdef FIX_ITD_CNG + if ( hStereoDft->frame_sid_nodata ) + { + /* set new xfade target if new itd received */ + if ( hStereoDft->itd[k + k_offset] != hStereoDft->itd_xfade_target ) + { + hStereoDft->itd_xfade_target = hStereoDft->itd[k + k_offset]; + hStereoDft->itd_xfade_step = ( hStereoDft->itd_xfade_target - hStereoDft->itd_xfade_prev ) / ( STEREO_DFT_ITD_CNG_XFADE - hStereoDft->itd_xfade_counter ); + } + + /* xfade */ + if ( hStereoDft->itd_xfade_prev != hStereoDft->itd_xfade_target && hStereoDft->itd_xfade_counter < STEREO_DFT_ITD_CNG_XFADE && hStereoDft->last_active_element_brate <= 24400 ) + { + hStereoDft->itd[k + k_offset] = hStereoDft->itd_xfade_prev + hStereoDft->itd_xfade_step; + hStereoDft->itd_xfade_prev = hStereoDft->itd[k + k_offset]; + hStereoDft->itd_xfade_counter++; + } + } + else + { + /* First active frame, "reset" everything if long enough active encoding, only triggered if STEREO_DFT_ITD_CNG_XFADE_RESET = -1 */ + if ( active_frame_counter > STEREO_DFT_ITD_CNG_XFADE_RESET ) + { + hStereoDft->itd_xfade_target = hStereoDft->itd[k + k_offset]; + hStereoDft->itd_xfade_prev = hStereoDft->itd[k + k_offset]; + hStereoDft->itd_xfade_counter = 0; + } + + hStereoDft->last_active_element_brate = element_brate; + } +#endif for ( k2 = 1; k2 < hStereoDft->prm_res[k + k_offset]; k2++ ) { hStereoDft->itd[( k + k_offset ) - k2] = hStereoDft->itd[k + k_offset]; @@ -2782,6 +2887,23 @@ void stereo_dft_dec_smooth_parameters( return; } +#ifdef FIX_ITD_CNG + /* Active frame, "reset" everything "reset" everything if long enough active encoding */ + if ( active_frame_counter > STEREO_DFT_ITD_CNG_XFADE_RESET ) + { + hStereoDft->itd_xfade_counter = 0; + hStereoDft->itd_xfade_target = hStereoDft->itd[STEREO_DFT_NBDIV - 1]; + hStereoDft->itd_xfade_prev = hStereoDft->itd[STEREO_DFT_NBDIV - 1]; + hStereoDft->ipd_xfade_counter = 0; + hStereoDft->ipd_xfade_target = hStereoDft->gipd[STEREO_DFT_NBDIV - 1]; + hStereoDft->ipd_xfade_prev = hStereoDft->gipd[STEREO_DFT_NBDIV - 1]; + } +#endif + +#ifdef FIX_ITD_CNG + hStereoDft->last_active_element_brate = element_brate; +#endif + for ( k = hStereoDft->prm_res[k_offset] - 1; k < N_div; k += hStereoDft->prm_res[k + k_offset] ) { max_res_pred_ind = 0; diff --git a/lib_dec/ivas_stereo_dft_dec_dmx.c b/lib_dec/ivas_stereo_dft_dec_dmx.c index 48ee7cf4e67a1c77a9262be212feba8ec5f56fe6..f58b6ddca44bdeaed3cba81f7aeeb43bb907fa84 100644 --- a/lib_dec/ivas_stereo_dft_dec_dmx.c +++ b/lib_dec/ivas_stereo_dft_dec_dmx.c @@ -130,7 +130,11 @@ void stereo_dft_unify_dmx( ( st0->core == TCX_20_CORE && ( ( st0->hTcxCfg->tcx_last_overlap_mode == MIN_OVERLAP ) || ( st0->hTcxCfg->tcx_last_overlap_mode == HALF_OVERLAP ) ) ) || ( st0->core == TCX_10_CORE ); /* Smoothing for the current frame */ +#ifdef FIX_ITD_CNG + stereo_dft_dec_smooth_parameters( hStereoDft, prev_sid_nodata, st0->hFdCngDec->hFdCngCom->active_frame_counter, st0->element_brate ); +#else stereo_dft_dec_smooth_parameters( hStereoDft, prev_sid_nodata ); +#endif for ( k = 0; k < N_div; k++ ) { diff --git a/lib_enc/amr_wb_enc.c b/lib_enc/amr_wb_enc.c index eebfc1c82cefe904bb3543516695cee5c79c4d09..68903a62da459ea0108a8ffb10960efeacec335c 100644 --- a/lib_enc/amr_wb_enc.c +++ b/lib_enc/amr_wb_enc.c @@ -292,7 +292,12 @@ void amr_wb_enc( } /* apply DTX hangover for CNG analysis */ - vad_flag_dtx = dtx_hangover_addition( st, st->vad_flag, st->lp_speech - st->lp_noise, 0, &vad_hover_flag, NULL, NULL ); + vad_flag_dtx = dtx_hangover_addition( st, st->vad_flag, st->lp_speech - st->lp_noise, 0, &vad_hover_flag, NULL, NULL +#ifdef FIX_ITD_CNG + , + NULL +#endif + ); /*-----------------------------------------------------------------* * Select SID or FRAME_NO_DATA frame if DTX enabled diff --git a/lib_enc/ivas_core_pre_proc_front.c b/lib_enc/ivas_core_pre_proc_front.c index 081d0511df8a7e2120f77942facd9641f80132fe..e38f81a79f67e021b5e740509f51ccf6776897ac 100644 --- a/lib_enc/ivas_core_pre_proc_front.c +++ b/lib_enc/ivas_core_pre_proc_front.c @@ -455,7 +455,12 @@ ivas_error pre_proc_front_ivas( if ( ( hCPE != NULL && !( lr_vad_enabled && st->idchan == 0 ) ) || hSCE != NULL ) { - *vad_flag_dtx = dtx_hangover_addition( st, st->vad_flag, st->lp_speech - st->lp_noise, 0, vad_hover_flag, NULL, NULL ); + *vad_flag_dtx = dtx_hangover_addition( st, st->vad_flag, st->lp_speech - st->lp_noise, 0, vad_hover_flag, NULL, NULL +#ifdef FIX_ITD_CNG + , + NULL +#endif + ); } else { @@ -557,6 +562,14 @@ ivas_error pre_proc_front_ivas( dtx( st, *vad_flag_dtx, inp_12k8 ); #endif +#ifdef FIX_ITD_CNG + if ( hCPE != NULL && hCPE->hStereoDft != NULL && st->core_brate == SID_2k40 ) + { + /* Add another period of expected xcorr updates */ + hCPE->hStereoDft->expectedNumUpdates += st->hDtxEnc->max_SID; + } +#endif + /*----------------------------------------------------------------* * Adjust FD-CNG Noise Estimator *----------------------------------------------------------------*/ diff --git a/lib_enc/ivas_cpe_enc.c b/lib_enc/ivas_cpe_enc.c index 5c4011fe0ceadee0f6885570c3d7b1d595e2d8c6..8295171f9f39336d3a13ff7cf715733acf9eb6f1 100644 --- a/lib_enc/ivas_cpe_enc.c +++ b/lib_enc/ivas_cpe_enc.c @@ -349,7 +349,11 @@ ivas_error ivas_cpe_enc( stereo_dft_enc_update( hCPE->hStereoDft, sts[0]->max_bwidth ); /* DFT stereo processing */ +#ifdef FIX_ITD_CNG + stereo_dft_enc_process( hCPE, vad_flag_dtx, vad_hover_flag, input_frame ); +#else stereo_dft_enc_process( hCPE, input_frame ); +#endif } else if ( hCPE->element_mode == IVAS_CPE_TD ) { @@ -369,7 +373,11 @@ ivas_error ivas_cpe_enc( } else if ( hCPE->element_mode == IVAS_CPE_MDCT ) { +#ifdef FIX_ITD_CNG + stereo_td_itd_mdct_stereo( hCPE, vad_flag_dtx, vad_hover_flag, input_frame ); +#else stereo_td_itd_mdct_stereo( hCPE, input_frame ); +#endif } /*----------------------------------------------------------------* @@ -520,7 +528,11 @@ ivas_error ivas_cpe_enc( if ( hEncoderConfig->Opt_DTX_ON ) { +#ifdef FIX_ITD_CNG + stereo_cng_upd_counters( hCPE->hStereoCng, hCPE->element_mode, -1, NULL, sts[0]->hTdCngEnc->burst_ho_cnt, NULL ); +#else stereo_cng_upd_counters( hCPE->hStereoCng, hCPE->element_mode, -1, NULL, sts[0]->hTdCngEnc->burst_ho_cnt ); +#endif } } @@ -573,7 +585,11 @@ ivas_error ivas_cpe_enc( } else { +#ifdef FIX_ITD_CNG + stereo_cng_upd_counters( hCPE->hStereoCng, hCPE->element_mode, hCPE->hStereoDft->nbands, hCPE->hStereoDft->sidSideGain, sts[0]->hTdCngEnc->burst_ho_cnt, &hCPE->hStereoDft->coh_fade_counter ); +#else stereo_cng_upd_counters( hCPE->hStereoCng, hCPE->element_mode, hCPE->hStereoDft->nbands, hCPE->hStereoDft->sidSideGain, sts[0]->hTdCngEnc->burst_ho_cnt ); +#endif } } diff --git a/lib_enc/ivas_front_vad.c b/lib_enc/ivas_front_vad.c index 70b900da6db1c5eec1aa2cf91f035cefc70f0b62..89d073aebad74ef6233a6184f36ace91a694524f 100644 --- a/lib_enc/ivas_front_vad.c +++ b/lib_enc/ivas_front_vad.c @@ -110,6 +110,9 @@ ivas_error front_vad( { localVAD_HE_SAD[n] = 0; vad_hover_flag[n] = 0; +#ifdef FIX_ITD_CNG + vad_flag_dtx[n] = 1; +#endif } /*------------------------------------------------------------------* @@ -192,7 +195,12 @@ ivas_error front_vad( } /* DTX hangover addition */ - vad_flag_dtx[n] = dtx_hangover_addition( sts[n], hFrontVad->hVAD->vad_flag, hFrontVad->lp_speech - hFrontVad->lp_noise, 0 /* <- no cldfb addition */, &vad_hover_flag[n], hFrontVad->hVAD, hFrontVad->hNoiseEst ); + vad_flag_dtx[n] = dtx_hangover_addition( sts[n], hFrontVad->hVAD->vad_flag, hFrontVad->lp_speech - hFrontVad->lp_noise, 0 /* <- no cldfb addition */, &vad_hover_flag[n], hFrontVad->hVAD, hFrontVad->hNoiseEst +#ifdef FIX_ITD_CNG + , + &hFrontVads[n]->rem_dtx_ho +#endif + ); if ( n_chan == 1 ) { diff --git a/lib_enc/ivas_stat_enc.h b/lib_enc/ivas_stat_enc.h index 0644fcd918e5216b40cd1610449e190ba5cd352b..dfe32a9a020eeca01e09aeb4486ee5e1579e9b28 100644 --- a/lib_enc/ivas_stat_enc.h +++ b/lib_enc/ivas_stat_enc.h @@ -193,6 +193,10 @@ typedef struct stereo_dft_enc_data_struct float Spd_R_smooth[STEREO_DFT_N_32k_ENC / 2]; float sid_gipd; int16_t coh_fade_counter; +#ifdef FIX_ITD_CNG + float prev_sid_gipd; + int16_t prev_sid_no_ipd_flag; +#endif /*IPD*/ float gipd[STEREO_DFT_ENC_DFT_NB]; @@ -231,6 +235,12 @@ typedef struct stereo_dft_enc_data_struct #endif +#ifdef FIX_ITD_CNG + int16_t currentNumUpdates; + int16_t expectedNumUpdates; /* Expected number of frames before use of ITD estimate */ + int16_t resetFrames; +#endif + /* energy buffers for ICBWE */ float nrg_L[2]; float nrg_R[2]; @@ -557,6 +567,9 @@ typedef struct front_vad_enc VAD_HANDLE hVAD; /* VAD handle */ float *delay_buf; int16_t delay_samples; +#ifdef FIX_ITD_CNG + int16_t rem_dtx_ho; /* Remaining hangover frames */ +#endif } FRONT_VAD_ENC, *FRONT_VAD_ENC_HANDLE; @@ -809,10 +822,15 @@ typedef struct stereo_cng_enc float sg_average[STEREO_DFT_ERB4_BANDS]; /* Sidegain average */ float prev_sg_average[STEREO_DFT_ERB4_BANDS]; /* Previous sidegain average */ float mem_cohBand[STEREO_DFT_BAND_MAX / 2]; /* Coherence memory */ +#ifdef FIX_ITD_CNG + float prev_cohBand[2 * ( STEREO_DFT_BAND_MAX / 2 )]; /* Previous coherence */ + int16_t cng_counter; /* Counter for cng period length */ +#else float coh_crossfade[STEREO_DFT_BAND_MAX / 2]; /* Coherence memory */ - int16_t td_active; /* TD-stereo indication */ - int16_t first_SID_after_TD; /* Set if first SID frame after TD stereo */ - int16_t first_SID; /* Set if first SID frame since codec start */ +#endif + int16_t td_active; /* TD-stereo indication */ + int16_t first_SID_after_TD; /* Set if first SID frame after TD stereo */ + int16_t first_SID; /* Set if first SID frame since codec start */ } STEREO_CNG_ENC, *STEREO_CNG_ENC_HANDLE; diff --git a/lib_enc/ivas_stereo_cng_enc.c b/lib_enc/ivas_stereo_cng_enc.c index 31f5e7c794ade1dd6c77e34c5eeb54fd4ab3d198..68be66c2a107e18d20827281aa7844ea4b6f0d37 100644 --- a/lib_enc/ivas_stereo_cng_enc.c +++ b/lib_enc/ivas_stereo_cng_enc.c @@ -32,6 +32,7 @@ #include #include "options.h" +#include #include "cnst.h" #include "rom_enc.h" #include "rom_com.h" @@ -50,6 +51,9 @@ *-------------------------------------------------------------------*/ #define COH_FADE_MAX 4 +#ifdef FIX_ITD_CNG +#define COH_FADE_UPDATES 2 +#endif /*--------------------------------------------------------------- @@ -59,17 +63,23 @@ * ---------------------------------------------------------------*/ void stereo_dft_enc_sid_calc_coh( - STEREO_DFT_ENC_DATA_HANDLE hStereoDft, /* i/o: DFT stereo handle */ - float coh_crossfade[STEREO_DFT_BAND_MAX / 2], /* i/o: Coherence crossfade memory */ - int16_t *td_active, /* i/o: TD stereo mode indicator */ - int16_t *first_SID, /* i/o: First SID indicator */ - float *cohBand /* i/o: Coherence per band */ + STEREO_DFT_ENC_DATA_HANDLE hStereoDft, /* i/o: DFT stereo handle */ +#ifdef FIX_ITD_CNG + float prev_cohBand[2 * ( STEREO_DFT_BAND_MAX / 2 )], /* i/o: Previous coherence */ +#else + float coh_crossfade[STEREO_DFT_BAND_MAX / 2], /* i/o: Coherence crossfade memory */ +#endif + int16_t *td_active, /* i/o: TD stereo mode indicator */ + int16_t *first_SID, /* i/o: First SID indicator */ + float *cohBand /* i/o: Coherence per band */ ) { int16_t b, k; float coh_weight; float coh_weight_sum; - +#ifdef FIX_ITD_CNG + float xspec_scale; +#endif /* Cluster the coherence into bands using a weighted average. The coherence is weighted with the energy spectrum of the mixdown signal. */ for ( b = 0; b < hStereoDft->nbands; b++ ) @@ -77,6 +87,32 @@ void stereo_dft_enc_sid_calc_coh( cohBand[b] = 0; coh_weight_sum = 0; +#ifdef FIX_ITD_CNG + if ( hStereoDft->coh_fade_counter == 0 && !*first_SID ) + { + for ( k = hStereoDft->band_limits[b]; k < hStereoDft->band_limits[b + 1]; k++ ) + { + xspec_scale = sqrtf( ( prev_cohBand[b] * ( hStereoDft->Spd_L_smooth[k] * hStereoDft->Spd_R_smooth[k] ) ) / ( hStereoDft->xspec_smooth[2 * k] * hStereoDft->xspec_smooth[2 * k] + hStereoDft->xspec_smooth[2 * k + 1] * hStereoDft->xspec_smooth[2 * k + 1] + EPSILON ) ); + hStereoDft->xspec_smooth[2 * k] *= xspec_scale; + hStereoDft->xspec_smooth[2 * k + 1] *= xspec_scale; + } + + cohBand[b] = prev_cohBand[b]; + } + else + { + for ( k = hStereoDft->band_limits[b]; k < hStereoDft->band_limits[b + 1]; k++ ) + { + coh_weight = hStereoDft->DFT[0][2 * k] * hStereoDft->DFT[0][2 * k] + hStereoDft->DFT[0][2 * k + 1] * hStereoDft->DFT[0][2 * k + 1]; + cohBand[b] += coh_weight * ( hStereoDft->xspec_smooth[2 * k] * hStereoDft->xspec_smooth[2 * k] + hStereoDft->xspec_smooth[2 * k + 1] * hStereoDft->xspec_smooth[2 * k + 1] ) / ( hStereoDft->Spd_L_smooth[k] * hStereoDft->Spd_R_smooth[k] + EPSILON ); + coh_weight_sum += coh_weight; + } + if ( coh_weight_sum > 0 ) + { + cohBand[b] = cohBand[b] / coh_weight_sum; + } + } +#else for ( k = hStereoDft->band_limits[b]; k < hStereoDft->band_limits[b + 1]; k++ ) { coh_weight = hStereoDft->DFT[0][2 * k] * hStereoDft->DFT[0][2 * k] + hStereoDft->DFT[0][2 * k + 1] * hStereoDft->DFT[0][2 * k + 1]; @@ -87,26 +123,56 @@ void stereo_dft_enc_sid_calc_coh( { cohBand[b] = cohBand[b] / coh_weight_sum; } +#endif } if ( *first_SID ) { +#ifdef FIX_ITD_CNG + mvr2r( cohBand, prev_cohBand, hStereoDft->nbands ); + mvr2r( prev_cohBand, &( prev_cohBand[STEREO_DFT_BAND_MAX / 2] ), hStereoDft->nbands ); +#else mvr2r( cohBand, coh_crossfade, hStereoDft->nbands ); +#endif *first_SID = 0; } +#ifdef FIX_ITD_CNG + if ( hStereoDft->coh_fade_counter < COH_FADE_MAX && ( *td_active || hStereoDft->currentNumUpdates < COH_FADE_UPDATES ) ) +#else if ( hStereoDft->coh_fade_counter < COH_FADE_MAX && *td_active ) +#endif { for ( b = 0; b < hStereoDft->nbands; b++ ) { +#ifdef FIX_ITD_CNG + cohBand[b] = ( cohBand[b] * hStereoDft->coh_fade_counter + prev_cohBand[b] * ( COH_FADE_MAX - hStereoDft->coh_fade_counter ) ) / COH_FADE_MAX; +#else cohBand[b] = ( cohBand[b] * hStereoDft->coh_fade_counter + coh_crossfade[b] * ( COH_FADE_MAX - hStereoDft->coh_fade_counter ) ) / COH_FADE_MAX; +#endif } hStereoDft->coh_fade_counter++; +#ifdef FIX_ITD_CNG + if ( hStereoDft->coh_fade_counter > 0 ) + { + mvr2r( &prev_cohBand[STEREO_DFT_BAND_MAX / 2], prev_cohBand, hStereoDft->nbands ); + } + mvr2r( cohBand, &prev_cohBand[STEREO_DFT_BAND_MAX / 2], hStereoDft->nbands ); +#else mvr2r( cohBand, coh_crossfade, hStereoDft->nbands ); +#endif } else { +#ifdef FIX_ITD_CNG + if ( hStereoDft->coh_fade_counter > 0 ) + { + mvr2r( &prev_cohBand[STEREO_DFT_BAND_MAX / 2], prev_cohBand, hStereoDft->nbands ); + } + mvr2r( cohBand, &prev_cohBand[STEREO_DFT_BAND_MAX / 2], hStereoDft->nbands ); +#else mvr2r( cohBand, coh_crossfade, hStereoDft->nbands ); +#endif hStereoDft->coh_fade_counter = COH_FADE_MAX; *td_active = 0; } @@ -346,6 +412,11 @@ void stereo_dft_cng_side_gain( } hStereoCng->sg_average_counter++; +#ifdef FIX_ITD_CNG + hStereoCng->cng_counter++; + hStereoCng->cng_counter = min( hStereoCng->cng_counter, STEREO_DFT_SG_ACT_CNT_MAX ); +#endif + if ( core_brate == SID_2k40 ) { /* SID frame */ @@ -443,9 +514,16 @@ void stereo_enc_cng_init( hStereoCng->sg_active_cnt = 0; hStereoCng->first_SID = 1; set_f( hStereoCng->mem_cohBand, 0.5f, STEREO_DFT_BAND_MAX / 2 ); +#ifdef FIX_ITD_CNG + set_zero( hStereoCng->prev_cohBand, 2 * ( STEREO_DFT_BAND_MAX / 2 ) ); +#else set_zero( hStereoCng->coh_crossfade, STEREO_DFT_BAND_MAX / 2 ); +#endif hStereoCng->td_active = 0; hStereoCng->first_SID_after_TD = 1; +#ifdef FIX_ITD_CNG + hStereoCng->cng_counter = 0; +#endif return; } @@ -463,6 +541,10 @@ void stereo_cng_upd_counters( const int16_t nbands, /* i : Number of bands in active */ const float sidSideGain[], /* i : SID side gains */ const int16_t burst_ho_count /* i : Hang-over count */ +#ifdef FIX_ITD_CNG + , + int16_t *coh_fade_counter /* i : Coherence fade counter */ +#endif ) { int16_t b; @@ -486,5 +568,16 @@ void stereo_cng_upd_counters( hStereoCng->sg_active_cnt++; hStereoCng->sg_active_cnt = min( hStereoCng->sg_active_cnt, STEREO_DFT_SG_ACT_CNT_MAX ); +#ifdef FIX_ITD_CNG + if ( hStereoCng->sg_active_cnt > STEREO_DFT_CNG_ITD_CNT ) + { + hStereoCng->cng_counter = 0; + } + + if ( element_mode == IVAS_CPE_DFT ) + { + *coh_fade_counter = 0; + } +#endif return; } diff --git a/lib_enc/ivas_stereo_dft_enc.c b/lib_enc/ivas_stereo_dft_enc.c index 8a672632fc798c66aea020322e051cc1a91aec99..e54b98a942499f062d44dec67e4d5cf9057ad06a 100644 --- a/lib_enc/ivas_stereo_dft_enc.c +++ b/lib_enc/ivas_stereo_dft_enc.c @@ -62,6 +62,9 @@ static FILE *pF = NULL; #define STEREO_DFT_NRG_PAST_MAX_BAND_LB 4 #define STEREO_DFT_DMX_CROSSOVER ( int16_t )( 132 * ( (float) ( STEREO_DFT_N_NS_ENC ) / STEREO_DFT_N_NS ) + 0.5f ) /* crossover bin between binwise and bandwise DMX */ #define ITD_VAD_E_BAND_N_INIT 200000 +#ifdef FIX_ITD_CNG +#define ITD_SID_PREV_FRAMES 5 +#endif /*------------------------------------------------------------------------- @@ -491,6 +494,15 @@ void stereo_dft_enc_reset( set_f( hStereoDft->Spd_L_smooth, 1.0f, STEREO_DFT_N_32k_ENC / 2 ); set_f( hStereoDft->Spd_R_smooth, 1.0f, STEREO_DFT_N_32k_ENC / 2 ); +#ifdef FIX_ITD_CNG + hStereoDft->currentNumUpdates = 0; + hStereoDft->expectedNumUpdates = FIXED_SID_RATE; + hStereoDft->resetFrames = 0; + hStereoDft->sid_gipd = 0; + hStereoDft->prev_sid_gipd = 0; + hStereoDft->prev_sid_no_ipd_flag = 1; +#endif + hStereoDft->coh_fade_counter = 0; /* Xtalk classifier */ @@ -1222,7 +1234,11 @@ float stereo_dft_enc_synthesize( *-------------------------------------------------------------------------*/ void stereo_dft_enc_process( - CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ + CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ +#ifdef FIX_ITD_CNG + const int16_t vad_flag_dtx[], /* i: VAD dtx flags */ + const int16_t vad_hover_flag[], /* i: VAD hangover flags */ +#endif const int16_t input_frame /* i : input frame length */ ) { @@ -1293,8 +1309,11 @@ void stereo_dft_enc_process( if ( hStereoDft->hConfig->itd_mode ) #endif { +#ifdef FIX_ITD_CNG + stereo_dft_enc_compute_itd( hCPE, pDFT_L, pDFT_R, k_offset, input_frame, vad_flag_dtx, vad_hover_flag, bin_nrgL, bin_nrgR ); +#else stereo_dft_enc_compute_itd( hCPE, pDFT_L, pDFT_R, k_offset, input_frame, bin_nrgL, bin_nrgR ); - +#endif if ( hCPE->element_mode == IVAS_CPE_MDCT ) { return; @@ -1367,6 +1386,23 @@ void stereo_dft_enc_process( /* DFT stereo parameters */ stereo_dft_enc_compute_prm( hStereoDft, pDFT_L, pDFT_R, k_offset, 1, hCPE->hCoreCoder[0]->sp_aud_decision0, hCPE->hCoreCoder[0]->vad_flag, bin_nrgL, bin_nrgR, dot_prod_nrg_ratio ); +#ifdef FIX_ITD_CNG + if ( vad_flag_dtx[0] == 0 ) + { + if ( hCPE->hStereoCng->cng_counter == 0 && !hCPE->hStereoCng->first_SID_after_TD ) + { + hStereoDft->sid_gipd = hStereoDft->prev_sid_gipd; + hStereoDft->no_ipd_flag = hStereoDft->prev_sid_no_ipd_flag; + } + + if ( hCPE->hStereoCng->cng_counter > ITD_SID_PREV_FRAMES ) + { + hStereoDft->prev_sid_gipd = hStereoDft->sid_gipd; + hStereoDft->prev_sid_no_ipd_flag = hStereoDft->no_ipd_flag; + } + } +#endif + /*----------------------------------------------------------------* * UNCLR classifier (detection of uncorrelated L and R channels) *----------------------------------------------------------------*/ @@ -1423,7 +1459,11 @@ void stereo_dft_enc_process( } } +#ifdef FIX_ITD_CNG + if ( b < hStereoDft->res_cod_band_max && vad_flag_dtx[0] ) +#else if ( b < hStereoDft->res_cod_band_max ) +#endif { #ifdef DEBUGGING assert( hStereoDft->nbands == hStereoDft->nbands_dmx && "Don't use coarser stereo parameter resolution for residual coding bitrates!" ); @@ -2313,7 +2353,11 @@ void stereo_dft_enc_write_BS( if ( core_brate == SID_2k40 ) { +#ifdef FIX_ITD_CNG + stereo_dft_enc_sid_calc_coh( hStereoDft, hCPE->hStereoCng->prev_cohBand, &hCPE->hStereoCng->td_active, &hCPE->hStereoCng->first_SID, cohBand ); +#else stereo_dft_enc_sid_calc_coh( hStereoDft, hCPE->hStereoCng->coh_crossfade, &hCPE->hStereoCng->td_active, &hCPE->hStereoCng->first_SID, cohBand ); +#endif if ( *nb_bits <= ( ( IVAS_SID_5k2 - SID_2k40 ) / FRAMES_PER_SEC - SID_FORMAT_NBITS - STEREO_DFT_ITD_MODE_NBITS - STEREO_DFT_SID_ITD_NBITS - 1 ) ) { @@ -2410,7 +2454,6 @@ void stereo_dft_enc_write_BS( #endif ( *nb_bits ) += nb; - /*----------------------------------------------------------------* * Residual prediction *----------------------------------------------------------------*/ diff --git a/lib_enc/ivas_stereo_dft_enc_itd.c b/lib_enc/ivas_stereo_dft_enc_itd.c index 594137993e16e10febf1a5665ea86af2ea558b1c..e917c36e361d780cbc78edda143909fe98032851 100644 --- a/lib_enc/ivas_stereo_dft_enc_itd.c +++ b/lib_enc/ivas_stereo_dft_enc_itd.c @@ -67,6 +67,10 @@ #define DENOM 0.05f #define XSPEC_ALPHA ( 1.f / 32 ) +#ifdef FIX_ITD_CNG +#define CORR_FILT 0.8f +#define CORR_RESET_FRAMES_MAX 20 +#endif #define ITD_VAD_NOISE_INIT_FRAMES 30 #define ITD_VAD_THRSHOLD 0.001f @@ -722,6 +726,10 @@ void stereo_dft_enc_compute_itd( float *DFT_R, const int16_t k_offset, const int16_t input_frame, +#ifdef FIX_ITD_CNG + const int16_t vad_flag_dtx[], + const int16_t vad_hover_flag[], +#endif float *bin_nrgL, float *bin_nrgR ) { @@ -768,6 +776,10 @@ void stereo_dft_enc_compute_itd( const float *dft_trigo32k; float trigo_enc[STEREO_DFT_N_32k_ENC / 2 + 1]; +#ifdef FIX_ITD_CNG + float cng_xcorr_filt; +#endif + if ( hCPE->element_mode == IVAS_CPE_DFT ) { hStereoDft = hCPE->hStereoDft; @@ -926,6 +938,10 @@ void stereo_dft_enc_compute_itd( vad_flag_itd = stereo_dft_enc_itd_vad( hItd->E_band_n, &( hItd->vad_frm_cnt ), Spd_L, Spd_R, &mssnr ); +#ifdef FIX_ITD_CNG + vad_flag_itd = vad_flag_itd && vad_flag_dtx[0]; +#endif + if ( sum_nrg_L < EPSILON ) { sfm_L = 0; @@ -1053,17 +1069,94 @@ void stereo_dft_enc_compute_itd( if ( hCPE->hCoreCoder[0]->Opt_DTX_ON && hCPE->element_mode == IVAS_CPE_DFT ) { +#ifdef FIX_ITD_CNG + if ( hCPE->hFrontVad[0] != NULL ) + { + /* Determine if we are in hangover */ + if ( vad_hover_flag[0] && vad_hover_flag[1] ) + { + /* Determine if we are in the first DTX hangover frame (also triggers for VAD hangover frame) */ + if ( hStereoDft->resetFrames > CORR_RESET_FRAMES_MAX ) + { + /* Reset cross spectrum when there is hangover */ + set_f( hStereoDft->xspec_smooth, 0.0f, STEREO_DFT_N_32k_ENC ); + hStereoDft->resetFrames = 0; + hStereoDft->currentNumUpdates = 0; + /* Expected minimum number of updates including first SID */ + hStereoDft->expectedNumUpdates = 1 + min( hCPE->hFrontVad[0]->rem_dtx_ho, hCPE->hFrontVad[1]->rem_dtx_ho ); + } + else if ( hStereoDft->currentNumUpdates >= hStereoDft->expectedNumUpdates ) + { + hStereoDft->expectedNumUpdates += 1 + min( hCPE->hFrontVad[0]->rem_dtx_ho, hCPE->hFrontVad[1]->rem_dtx_ho ); + } + cng_xcorr_filt = max( min( CORR_FILT, 10.0f * CORR_FILT / ( hStereoDft->expectedNumUpdates + hStereoDft->currentNumUpdates ) ), sfm_L ); + hStereoDft->currentNumUpdates++; + for ( i = 1; i < NFFT / 2; i++ ) + { + /* Low pass filter cross L/R power spectrum */ + hStereoDft->xspec_smooth[2 * i] = ( 1.f - cng_xcorr_filt ) * hStereoDft->xspec_smooth[2 * i] + cng_xcorr_filt * xcorr[2 * i]; + hStereoDft->xspec_smooth[2 * i + 1] = ( 1.f - cng_xcorr_filt ) * hStereoDft->xspec_smooth[2 * i + 1] + cng_xcorr_filt * xcorr[2 * i + 1]; + + /* Low pass filter L/R power spectrum */ + /* Calculate coherence as cross spectral density divided by L*R power spectrum */ + hStereoDft->Spd_L_smooth[i] = ( 1.f - cng_xcorr_filt ) * hStereoDft->Spd_L_smooth[i] + cng_xcorr_filt * Spd_L[i]; + hStereoDft->Spd_R_smooth[i] = ( 1.f - cng_xcorr_filt ) * hStereoDft->Spd_R_smooth[i] + cng_xcorr_filt * Spd_R[i]; + } + } + else if ( vad_flag_dtx[0] == 0 ) + { + hStereoDft->resetFrames = 0; + } + else + { + if ( hStereoDft->resetFrames < CORR_RESET_FRAMES_MAX + 1 ) + { + hStereoDft->resetFrames++; + } + if ( !vad_hover_flag[0] && !vad_hover_flag[1] ) + { + hStereoDft->expectedNumUpdates = hStereoDft->currentNumUpdates; + } + } + } +#endif +#ifdef FIX_ITD_CNG + if ( ( vad_flag_dtx[0] == 0 ) || ( hCPE->hFrontVad[0] == NULL && ( hCPE->hCoreCoder[0]->last_core_brate == SID_2k40 || hCPE->hCoreCoder[0]->last_core_brate == FRAME_NO_DATA ) ) || hCPE->hStereoCng->first_SID_after_TD ) +#else if ( hCPE->hCoreCoder[0]->last_core_brate == SID_2k40 || hCPE->hCoreCoder[0]->last_core_brate == FRAME_NO_DATA || hCPE->hStereoCng->first_SID_after_TD ) +#endif { +#ifdef FIX_ITD_CNG + if ( vad_flag_dtx[0] == 0 ) + { + /* expectedNumUpdates updated after call to dtx() in SID frames */ + cng_xcorr_filt = max( min( CORR_FILT, 10.0f * CORR_FILT / ( hStereoDft->expectedNumUpdates + hStereoDft->currentNumUpdates ) ), sfm_L ); + hStereoDft->currentNumUpdates++; + hStereoDft->sfm = cng_xcorr_filt; + } + else /* use sfm for active frames */ + { + cng_xcorr_filt = sfm_L; + } + + /* Copy state of xspec_smooth to xcorr_smooth in first CNG frame */ + if ( hCPE->hStereoCng->cng_counter == 0 && vad_flag_dtx[0] == 0 ) + { + mvr2r( hStereoDft->xspec_smooth, hItd->xcorr_smooth, NFFT ); + } +#endif for ( i = 1; i < NFFT / 2; i++ ) { /* Low pass filter cross L/R power spectrum */ hStereoDft->xspec_smooth[2 * i] = ( 1.f - XSPEC_ALPHA ) * hStereoDft->xspec_smooth[2 * i] + XSPEC_ALPHA * xcorr[2 * i]; hStereoDft->xspec_smooth[2 * i + 1] = ( 1.f - XSPEC_ALPHA ) * hStereoDft->xspec_smooth[2 * i + 1] + XSPEC_ALPHA * xcorr[2 * i + 1]; - +#ifdef FIX_ITD_CNG + hItd->xcorr_smooth[2 * i] = ( 1.f - cng_xcorr_filt ) * hItd->xcorr_smooth[2 * i] + cng_xcorr_filt * xcorr[2 * i]; + hItd->xcorr_smooth[2 * i + 1] = ( 1.f - cng_xcorr_filt ) * hItd->xcorr_smooth[2 * i + 1] + cng_xcorr_filt * xcorr[2 * i + 1]; +#else hItd->xcorr_smooth[2 * i] = ( 1.f - sfm_L ) * hItd->xcorr_smooth[2 * i] + sfm_L * xcorr[2 * i]; hItd->xcorr_smooth[2 * i + 1] = ( 1.f - sfm_L ) * hItd->xcorr_smooth[2 * i + 1] + sfm_L * xcorr[2 * i + 1]; - +#endif tmpf1 = sqrtf( hItd->xcorr_smooth[i * 2] * hItd->xcorr_smooth[i * 2] + hItd->xcorr_smooth[i * 2 + 1] * hItd->xcorr_smooth[i * 2 + 1] ); tmpf1 += EPSILON; tmpf2 = tmpf1; diff --git a/lib_enc/ivas_stereo_dft_td_itd.c b/lib_enc/ivas_stereo_dft_td_itd.c index 84dfee698026db9cbf388b1901231692a49f5ff5..fd4cbdde71842712c1aa5a1b5c8f63d4a4e2c7eb 100644 --- a/lib_enc/ivas_stereo_dft_td_itd.c +++ b/lib_enc/ivas_stereo_dft_td_itd.c @@ -383,7 +383,11 @@ void stereo_td_itd( * ---------------------------------------------------------------*/ void stereo_td_itd_mdct_stereo( - CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder handle */ + CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder handle */ +#ifdef FIX_ITD_CNG + const int16_t vad_flag_dtx[], /* i: VAD dtx flags */ + const int16_t vad_hover_flag[], /* i: VAD hangover flags */ +#endif const int16_t input_frame /* i : frame length */ ) { @@ -411,7 +415,11 @@ void stereo_td_itd_mdct_stereo( stereo_dft_enc_analyze( hCPE->hCoreCoder, CPE_CHANNELS, input_frame, NULL, hStereoMdct, DFT, hCPE->input_mem ); /*call ITD function*/ +#ifdef FIX_ITD_CNG + stereo_dft_enc_compute_itd( hCPE, DFT[0], DFT[1], STEREO_DFT_OFFSET, input_frame, vad_flag_dtx, vad_hover_flag, bin_nrgL, bin_nrgR ); +#else stereo_dft_enc_compute_itd( hCPE, DFT[0], DFT[1], STEREO_DFT_OFFSET, input_frame, bin_nrgL, bin_nrgR ); +#endif /* Time Domain ITD compensation using extrapolation */ #ifdef DEBUG_MODE_DFT diff --git a/lib_enc/pre_proc.c b/lib_enc/pre_proc.c index 83391ab9d41ceb270a86803d8dd935e27c7205ff..9729dd6d5459a761cf8ead91f87430c1b2fee59b 100644 --- a/lib_enc/pre_proc.c +++ b/lib_enc/pre_proc.c @@ -211,7 +211,12 @@ void pre_proc( st->vad_flag = vad_flag_cldfb; } - vad_flag_dtx = dtx_hangover_addition( st, st->vad_flag, st->lp_speech - st->lp_noise, cldfb_addition, vad_hover_flag, NULL, NULL ); + vad_flag_dtx = dtx_hangover_addition( st, st->vad_flag, st->lp_speech - st->lp_noise, cldfb_addition, vad_hover_flag, NULL, NULL +#ifdef FIX_ITD_CNG + , + NULL +#endif + ); /*----------------------------------------------------------------* * NB/WB/SWB/FB bandwidth detector diff --git a/lib_enc/vad.c b/lib_enc/vad.c index 8d3c0beb4fe795acefc925493d3205fe13bb68de..98b6ad240e3a8e22db89102724cae0c4230c2114 100644 --- a/lib_enc/vad.c +++ b/lib_enc/vad.c @@ -162,6 +162,10 @@ int16_t dtx_hangover_addition( int16_t *vad_hover_flag, /* o : VAD hangover flag */ VAD_HANDLE hVAD, /* i/o: VAD handle for L or R channel */ NOISE_EST_HANDLE hNoiseEst /* i : Noise estimation handle */ +#ifdef FIX_ITD_CNG + , + int16_t *rem_dtx_ho /* o : Expected remaining hangover frames */ +#endif ) { int16_t hangover_short_dtx, flag_dtx; @@ -303,6 +307,12 @@ int16_t dtx_hangover_addition( if ( flag_dtx != 0 && st->localVAD == 0 ) { *vad_hover_flag = 1; +#ifdef FIX_ITD_CNG + if ( rem_dtx_ho != NULL ) + { + *rem_dtx_ho = max( hangover_short_dtx - hVAD->hangover_cnt_dtx, 0 ); + } +#endif } return flag_dtx;