From 823cb4db557e9c440f92add8b66ae6297674226d Mon Sep 17 00:00:00 2001 From: Eleni Fotopoulou Date: Thu, 11 May 2023 12:58:58 +0200 Subject: [PATCH 1/3] Improvement for DFT Stereo for cases with large ITDs under HYBRID_ITD_MAX --- lib_com/ivas_prot.h | 4 +++ lib_com/options.h | 2 ++ lib_enc/ivas_cpe_enc.c | 7 ++++- lib_enc/ivas_stat_enc.h | 4 +++ lib_enc/ivas_stereo_classifier.c | 8 +++++- lib_enc/ivas_stereo_dft_enc.c | 12 ++++++++- lib_enc/ivas_stereo_dft_enc_itd.c | 44 ++++++++++++++++++++++++++++--- lib_enc/ivas_stereo_dft_td_itd.c | 9 ++++++- 8 files changed, 82 insertions(+), 8 deletions(-) diff --git a/lib_com/ivas_prot.h b/lib_com/ivas_prot.h index 0355c00470..cf81a9ffb6 100644 --- a/lib_com/ivas_prot.h +++ b/lib_com/ivas_prot.h @@ -1401,6 +1401,10 @@ int16_t read_BS_adapt_GR_sg( void stereo_dft_hybrid_ITD_flag( STEREO_DFT_CONFIG_DATA_HANDLE hConfig, /* o : DFT stereo configuration */ const int32_t input_Fs /* i : CPE element sampling rate */ +#ifdef HYBRID_ITD_MAX + , + const int16_t hybrid_itd_max /* i : flag for hybrid ITD for very large ITDs */ +#endif ); void stereo_dft_enc_compute_itd( diff --git a/lib_com/options.h b/lib_com/options.h index 396d080b98..01c40764d2 100755 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -211,6 +211,8 @@ //#define HODIRAC_READ_PARAMS #endif +#define HYBRID_ITD_MAX /* FhG: Improvement for DFT-stereo for cases with large ITDs */ + /* ################## End DEVELOPMENT switches ######################### */ /* clang-format on */ diff --git a/lib_enc/ivas_cpe_enc.c b/lib_enc/ivas_cpe_enc.c index b60f8be45b..522cfb885d 100644 --- a/lib_enc/ivas_cpe_enc.c +++ b/lib_enc/ivas_cpe_enc.c @@ -333,7 +333,12 @@ ivas_error ivas_cpe_enc( if ( hCPE->element_mode == IVAS_CPE_DFT ) { - stereo_dft_hybrid_ITD_flag( hCPE->hStereoDft->hConfig, input_Fs ); + stereo_dft_hybrid_ITD_flag( hCPE->hStereoDft->hConfig, input_Fs +#ifdef HYBRID_ITD_MAX + , + hCPE->hStereoDft->hItd->hybrid_itd_max +#endif + ); /* Time Domain ITD compensation using extrapolation */ #ifdef DEBUG_MODE_DFT diff --git a/lib_enc/ivas_stat_enc.h b/lib_enc/ivas_stat_enc.h index 5eced57880..be414ae6b8 100644 --- a/lib_enc/ivas_stat_enc.h +++ b/lib_enc/ivas_stat_enc.h @@ -81,6 +81,10 @@ typedef struct stereo_itd_data_struct int16_t prev_itd1; int16_t prev_itd2; +#ifdef HYBRID_ITD_MAX + /*flag for hybrid ITD for very large ITDs*/ + int16_t hybrid_itd_max; +#endif } ITD_DATA, *ITD_DATA_HANDLE; typedef struct dft_ana_struct diff --git a/lib_enc/ivas_stereo_classifier.c b/lib_enc/ivas_stereo_classifier.c index d4d0b310b6..5e7d3a1623 100644 --- a/lib_enc/ivas_stereo_classifier.c +++ b/lib_enc/ivas_stereo_classifier.c @@ -998,7 +998,13 @@ void xtalk_classifier_dft( printf( "\nSwitch DFT-stereo -> TD-LR on frame %d\n", frame ); #endif } - else if ( hCPE->element_brate >= IVAS_16k4 && hStereoClassif->xtalk_decision == 0 && abs( itd ) > STEREO_DFT_ITD_MAX && ( hCPE->hCoreCoder[0]->lp_speech - hCPE->hCoreCoder[0]->lp_noise ) > 25.0f ) + else if ( +#ifdef HYBRID_ITD_MAX + hCPE->element_brate > IVAS_16k4 +#else + hCPE->element_brate >= IVAS_16k4 +#endif + && hStereoClassif->xtalk_decision == 0 && abs( itd ) > STEREO_DFT_ITD_MAX && ( hCPE->hCoreCoder[0]->lp_speech - hCPE->hCoreCoder[0]->lp_noise ) > 25.0f ) { hStereoClassif->xtalk_decision = 1; } diff --git a/lib_enc/ivas_stereo_dft_enc.c b/lib_enc/ivas_stereo_dft_enc.c index 43546261a4..dbc0662255 100644 --- a/lib_enc/ivas_stereo_dft_enc.c +++ b/lib_enc/ivas_stereo_dft_enc.c @@ -294,9 +294,16 @@ ivas_error stereo_dft_enc_create( hStereoDft_loc->hConfig->force_mono_transmission = 0; stereo_dft_config( hStereoDft_loc->hConfig, IVAS_24k4, &tmpS, &tmpS ); - stereo_dft_hybrid_ITD_flag( hStereoDft_loc->hConfig, input_Fs ); + stereo_dft_enc_open( hStereoDft_loc, input_Fs, max_bwidth ); + stereo_dft_hybrid_ITD_flag( hStereoDft_loc->hConfig, input_Fs +#ifdef HYBRID_ITD_MAX + , + hStereoDft_loc->hItd->hybrid_itd_max +#endif + ); + *hStereoDft = hStereoDft_loc; return IVAS_ERR_OK; @@ -560,6 +567,9 @@ void stereo_enc_itd_init( hItd->prev_itd1 = 0; hItd->prev_itd2 = 0; +#ifdef HYBRID_ITD_MAX + hItd->hybrid_itd_max = 0; +#endif return; } diff --git a/lib_enc/ivas_stereo_dft_enc_itd.c b/lib_enc/ivas_stereo_dft_enc_itd.c index d2c0bc7fe1..025729e0cd 100644 --- a/lib_enc/ivas_stereo_dft_enc_itd.c +++ b/lib_enc/ivas_stereo_dft_enc_itd.c @@ -115,11 +115,19 @@ static void set_band_limits( void stereo_dft_hybrid_ITD_flag( STEREO_DFT_CONFIG_DATA_HANDLE hConfig, /* o : DFT stereo configuration */ const int32_t input_Fs /* i : CPE element sampling rate */ +#ifdef HYBRID_ITD_MAX + , + const int16_t hybrid_itd_max /* i : flag for hybrid ITD for very large ITDs */ +#endif ) { if ( hConfig != NULL ) { - if ( hConfig->res_cod_mode || ( hConfig->ada_wb_res_cod_mode && input_Fs == 16000 ) ) + if ( hConfig->res_cod_mode || ( hConfig->ada_wb_res_cod_mode && input_Fs == 16000 ) +#ifdef HYBRID_ITD_MAX + || ( hybrid_itd_max == 1 ) +#endif + ) { hConfig->hybrid_itd_flag = 1; } @@ -642,6 +650,11 @@ void stereo_dft_enc_compute_itd( float cng_xcorr_filt; +#ifdef HYBRID_ITD_MAX + int16_t prev_itd_max; + int16_t itd_max_flip; +#endif + if ( hCPE->element_mode == IVAS_CPE_DFT ) { hStereoDft = hCPE->hStereoDft; @@ -1328,7 +1341,12 @@ void stereo_dft_enc_compute_itd( hItd->prev_sum_nrg_L_lb = sum_nrg_L_lb; mvr2r( xcorr_lb, hItd->prev_xcorr_lb, STEREO_DFT_XCORR_LB_MAX ); } - +#ifdef HYBRID_ITD_MAX + /*save previous flag*/ + prev_itd_max = hItd->hybrid_itd_max; + /* enable hybrid ITD handling for very large ITDs*/ + hItd->hybrid_itd_max = ( abs( itd ) > STEREO_DFT_ITD_MAX && abs( itd ) < STEREO_DFT_ITD_MAX_ANA && !hCPE->hCoreCoder[0]->sp_aud_decision0 && hCPE->element_brate < IVAS_32k ); +#endif /* Update memory */ hItd->prev_itd = itd; @@ -1343,7 +1361,7 @@ void stereo_dft_enc_compute_itd( #ifdef DEBUG_STEREO_CLF dbgwrite( &hItd->itd[k_offset], sizeof( float ), 1, 1, "res/ITD.x" ); #endif - + /* limit ITD range for MDCT stereo even more */ if ( hCPE->element_mode == IVAS_CPE_MDCT && fabsf( hItd->itd[k_offset] ) > ITD_MAX_MDCT ) { itd = 0; @@ -1353,7 +1371,25 @@ void stereo_dft_enc_compute_itd( hItd->deltaItd[k_offset] = hItd->itd[k_offset] - hItd->td_itd[k_offset]; - /* limit ITD range for MDCT stereo even more */ +#ifdef HYBRID_ITD_MAX + if ( hItd->hybrid_itd_max ) + { + /*check if there is an ITD flip*/ + itd_max_flip = ( hItd->itd[k_offset] * hItd->itd[k_offset - 1] < 0 ); + + if ( hItd->deltaItd[k_offset - 1] != 0 && itd_max_flip == 0 ) + { + int16_t tmp_itd = (int16_t) floor( ( ( hItd->prev_itd ) * ( (float) input_frame / 640 ) ) + 0.5f ); + hItd->deltaItd[k_offset] = -1.0f * tmp_itd - hItd->td_itd[k_offset]; + } + } + /*signal change for next frame*/ + if ( prev_itd_max == 1 && hItd->hybrid_itd_max == 0 ) + { + hItd->hybrid_itd_max = -1; + } +#endif + #ifdef DEBUG_MODE_DFT { int16_t tmp; diff --git a/lib_enc/ivas_stereo_dft_td_itd.c b/lib_enc/ivas_stereo_dft_td_itd.c index b7fb95422f..27a71dfae4 100644 --- a/lib_enc/ivas_stereo_dft_td_itd.c +++ b/lib_enc/ivas_stereo_dft_td_itd.c @@ -271,11 +271,18 @@ void stereo_td_itd( hITD->td_itd_32k[i] = hITD->td_itd_32k[i + 1]; } } +#ifdef HYBRID_ITD_MAX + /*reset TD ITDs in case of hybrid itd_max change - turn hybrid ITD off*/ + if ( hITD->hybrid_itd_max == -1 && hybrid_itd_flag == 0 ) + { + hITD->td_itd[k_offset] = 0; + hITD->td_itd_32k[k_offset] = 0; + } +#endif if ( hybrid_itd_flag == 0 ) { return; } - stereo_td_get_td_itd( &( hITD->td_itd[k_offset] ), &( hITD->td_itd_32k[k_offset] ), hITD->itd[k_offset], sts[0]->input_Fs ); /* initializations*/ -- GitLab From 19073465f65b79a6eec38969240a1cd3fd1bf4c9 Mon Sep 17 00:00:00 2001 From: Eleni Fotopoulou Date: Tue, 16 May 2023 17:11:36 +0200 Subject: [PATCH 2/3] revert chage in stereo classifier --- lib_enc/ivas_stereo_classifier.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/lib_enc/ivas_stereo_classifier.c b/lib_enc/ivas_stereo_classifier.c index 5e7d3a1623..9a54a9ed6d 100644 --- a/lib_enc/ivas_stereo_classifier.c +++ b/lib_enc/ivas_stereo_classifier.c @@ -62,8 +62,8 @@ #define XTALK_INTERCEPT_TD -1.770983f #define XTALK_INTERCEPT_DFT -0.758556f -#define EDGE_MAX_LEN 30 /* maximum length of buffer for edge detection */ -#define REDGE_MAX_LEN 30 /* maximum length of buffer for rising edge detection */ +#define EDGE_MAX_LEN 30 /* maximum length of buffer for edge detection */ +#define REDGE_MAX_LEN 30 /* maximum length of buffer for rising edge detection */ #define CLASSIFIER_ITD_THRES 8 /* ITD threshold in samples that enables classifier to switch */ @@ -998,13 +998,7 @@ void xtalk_classifier_dft( printf( "\nSwitch DFT-stereo -> TD-LR on frame %d\n", frame ); #endif } - else if ( -#ifdef HYBRID_ITD_MAX - hCPE->element_brate > IVAS_16k4 -#else - hCPE->element_brate >= IVAS_16k4 -#endif - && hStereoClassif->xtalk_decision == 0 && abs( itd ) > STEREO_DFT_ITD_MAX && ( hCPE->hCoreCoder[0]->lp_speech - hCPE->hCoreCoder[0]->lp_noise ) > 25.0f ) + else if ( hCPE->element_brate >= IVAS_16k4 && hStereoClassif->xtalk_decision == 0 && abs( itd ) > STEREO_DFT_ITD_MAX && ( hCPE->hCoreCoder[0]->lp_speech - hCPE->hCoreCoder[0]->lp_noise ) > 25.0f ) { hStereoClassif->xtalk_decision = 1; } -- GitLab From 1f6f54591b7d2a7fe829b0e170a2e4a6cc99149e Mon Sep 17 00:00:00 2001 From: Eleni Fotopoulou Date: Tue, 16 May 2023 19:59:25 +0200 Subject: [PATCH 3/3] clang format --- lib_enc/ivas_stereo_classifier.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib_enc/ivas_stereo_classifier.c b/lib_enc/ivas_stereo_classifier.c index 9a54a9ed6d..d4d0b310b6 100644 --- a/lib_enc/ivas_stereo_classifier.c +++ b/lib_enc/ivas_stereo_classifier.c @@ -62,8 +62,8 @@ #define XTALK_INTERCEPT_TD -1.770983f #define XTALK_INTERCEPT_DFT -0.758556f -#define EDGE_MAX_LEN 30 /* maximum length of buffer for edge detection */ -#define REDGE_MAX_LEN 30 /* maximum length of buffer for rising edge detection */ +#define EDGE_MAX_LEN 30 /* maximum length of buffer for edge detection */ +#define REDGE_MAX_LEN 30 /* maximum length of buffer for rising edge detection */ #define CLASSIFIER_ITD_THRES 8 /* ITD threshold in samples that enables classifier to switch */ -- GitLab