diff --git a/lib_com/ivas_cnst.h b/lib_com/ivas_cnst.h index 2107a5b9172875a328f8f5a052cd5f0b98f1ab1b..9557f7aef1ae94191c97dd552d9abc7a3d060164 100755 --- a/lib_com/ivas_cnst.h +++ b/lib_com/ivas_cnst.h @@ -1784,6 +1784,41 @@ typedef enum #define IVAS_LIMITER_THRESHOLD 32729 /* -0.01 dBFS */ #define IVAS_LIMITER_ATTACK_SECONDS 0.005f +#ifdef ENHANCED_STEREO_DMX +/*----------------------------------------------------------------------------------* + * Stereo downmix EVS constants + *----------------------------------------------------------------------------------*/ + +#define STEREO_DMX_EVS_PHA_LEN_16 48 +#define STEREO_DMX_EVS_FAD_LEN_16 160 +#define STEREO_DMX_EVS_PHA_LEN_32 96 +#define STEREO_DMX_EVS_FAD_LEN_32 320 +#define STEREO_DMX_EVS_PHA_LEN_48 96 +#define STEREO_DMX_EVS_FAD_LEN_48 480 + +#define STEREO_DMX_EVS_SUBBAND_SIZE 2 +#define STEREO_DMX_EVS_NB_SUBBAND_MAX (L_FRAME48k / (2 * STEREO_DMX_EVS_SUBBAND_SIZE)) + +#define STEREO_DMX_EVS_PHA_LEN_MAX 96 /* Max of PHA_LEN */ +#define STEREO_DMX_EVS_FAD_LEN_MAX 480 /* Max of FAD_LEN */ + +#define STEREO_DMX_EVS_DATA_LEN_MAX (STEREO_DMX_EVS_PHA_LEN_MAX + L_FRAME48k) + +typedef enum +{ + STEREO_DMX_EVS_PHA_IPD, + STEREO_DMX_EVS_PHA_IPD2, + STEREO_DMX_EVS_NO_PHA + +} STEREO_DMX_EVS_PHA; + +typedef enum +{ + STEREO_DMX_EVS_PRC_POC, + STEREO_DMX_EVS_PRC_PHA, + +} STEREO_DMX_EVS_PRC; +#endif #endif /* clang-format on */ diff --git a/lib_com/options.h b/lib_com/options.h index 3a4a9e926f48239e7e3e1057b6918aa540d6f70c..d7f39aab3d2f4f042badcea4492d55033ce80628 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -234,6 +234,8 @@ #define FIX_TODO_NON_DIEGETIC_PAN_NOT_IMPLELENTED_IN_RENDERER /* ..\apps\renderer.c(240): .... (todo: implementation)",*/ #define REMOVE_OBS_CODE /* FhG: Remove unnecessary assignement after LFE cleanup (Issue #451)*/ +#define ENHANCED_STEREO_DMX /* Orange : Contribution 48 - Enhanced stereo downmix. */ + /* ################## End DEVELOPMENT switches ######################### */ /* clang-format on */ diff --git a/lib_enc/ivas_stat_enc.h b/lib_enc/ivas_stat_enc.h index 7690ac5b300d0a6167ed670e2e2d2a4b3a49e3a4..7caab6da6afcdc655995e281c2453e0a02cdd8fc 100644 --- a/lib_enc/ivas_stat_enc.h +++ b/lib_enc/ivas_stat_enc.h @@ -1009,9 +1009,50 @@ typedef struct stereo_dmx_evs_phase_only_correlation_structure } STEREO_DMX_EVS_POC_DATA, *STEREO_DMX_EVS_POC_HANDLE; +#ifdef ENHANCED_STEREO_DMX +typedef struct stereo_dmx_evs_correlation_filter_structure +{ + int16_t init_frmCntr; + float isd_rate_s; + float iccr_s; + float ipd_ff[STEREO_DMX_EVS_NB_SUBBAND_MAX]; + float Pr[STEREO_DMX_EVS_NB_SUBBAND_MAX]; + float Pi[STEREO_DMX_EVS_NB_SUBBAND_MAX]; + float rfft_ipd_coef[L_FRAME48k / 2 + 1]; + + int16_t pha_len; + int16_t fad_len; + + float win[STEREO_DMX_EVS_PHA_LEN_MAX]; + float fad_g[STEREO_DMX_EVS_FAD_LEN_MAX]; + float *p_prev_taps[CPE_CHANNELS], prev_taps[CPE_CHANNELS][STEREO_DMX_EVS_PHA_LEN_MAX]; + float *p_curr_taps[CPE_CHANNELS], curr_taps[CPE_CHANNELS][STEREO_DMX_EVS_PHA_LEN_MAX]; + + float data_mem[CPE_CHANNELS][STEREO_DMX_EVS_PHA_LEN_MAX]; + + STEREO_DMX_EVS_PHA curr_pha; + STEREO_DMX_EVS_PHA prev_pha; + int16_t pha_hys_cnt; + + int16_t prc_thres; + STEREO_DMX_EVS_PRC curr_prc; + STEREO_DMX_EVS_PRC prev_prc; + int16_t prc_hys_cnt; + float fad_g_prc[L_FRAME48k]; + int16_t fad_len_prc; + + float trns_aux_energy[CPE_CHANNELS]; + float crst_fctr; + +} STEREO_DMX_EVS_PHA_DATA, *STEREO_DMX_EVS_PHA_HANDLE; + +#endif typedef struct stereo_dmx_evs_enc_data_structure { STEREO_DMX_EVS_POC_HANDLE hPOC; +#ifdef ENHANCED_STEREO_DMX + STEREO_DMX_EVS_PHA_HANDLE hPHA; +#endif float itd; diff --git a/lib_enc/ivas_stereo_dmx_evs.c b/lib_enc/ivas_stereo_dmx_evs.c index a5c0fe147f0055724024771e5133507e08ccab36..ddb94525b3b4a2cd4c5f14701ee8c170bbd5badb 100644 --- a/lib_enc/ivas_stereo_dmx_evs.c +++ b/lib_enc/ivas_stereo_dmx_evs.c @@ -65,13 +65,54 @@ #define Q_BAND 0.25f +#ifdef ENHANCED_STEREO_DMX + +#define STEREO_DMX_EVS_ISD_FORGETTING 0.95f +#define STEREO_DMX_EVS_ISD_THRES_H 1.69f +#define STEREO_DMX_EVS_ISD_THRES_L 0.9f +#define STEREO_DMX_EVS_ISD_DIST_THRES_IPD 0.5f + +#define STEREO_DMX_EVS_ISD_DIST_HYST_L 0.36f +#define STEREO_DMX_EVS_ISD_DIST_HYST_H 0.43f + +#define STEREO_DMX_EVS_ICCR_FORGETTING 0.7f +#define STEREO_DMX_EVS_ICCR_HYST_L 0.75f +#define STEREO_DMX_EVS_ICCR_HYST_H 0.85f + +#define STEREO_DMX_EVS_SWTCH_HYS_THRES 1 +#define STEREO_DMX_EVS_LR_EGY 15.0f +#define STEREO_DMX_EVS_ILDS_EGY 10000.0f +#define STEREO_DMX_EVS_ILD_PRC 0.1f + +#define STEREO_DMX_EVS_SWTCH_PRC_THRES_16 55 +#define STEREO_DMX_EVS_SWTCH_PRC_THRES_32 19 +#define STEREO_DMX_EVS_SWTCH_PRC_THRES_48 29 + +#define STEREO_DMX_EVS_SWTCH_PRC_HYS_THRES 1 +#define STEREO_DMX_EVS_FADE_LEN_PRC 20.0f + +#define STEREO_DMX_EVS_NB_SBFRM 5 +#define STEREO_DMX_EVS_TRNS_DTC_INST 75.0f +#define STEREO_DMX_EVS_CRST_FCTR_16 80.0f +#define STEREO_DMX_EVS_CRST_FCTR_32 40.0f +#define STEREO_DMX_EVS_CRST_FCTR_48 35.0f + +#define STEREO_DMX_EVS_TRNS_EGY_FORGETTING 0.75f + +#endif + /*-----------------------------------------------------------------------* * Local function prototypes *-----------------------------------------------------------------------*/ static void estimate_itd_wnd_fft( const float *input, float *specr, float *speci, const float *rfft_coef, const float *wnd, const int16_t input_frame ); +#ifdef ENHANCED_STEREO_DMX +static void calc_poc( STEREO_DMX_EVS_POC_HANDLE hPOC, STEREO_DMX_EVS_PHA_HANDLE hPHA, const float wnd[], const float rfft_coef[], const float specLr[], const float specLi[], const float specRr[], const float specRi[], const int16_t input_frame ); +static ivas_error estimate_itd( float *corr, STEREO_DMX_EVS_POC_HANDLE hPOC, STEREO_DMX_EVS_PHA_HANDLE hPHA, const float srcL[], const float srcR[], float itd[], const int16_t input_frame ); +#else static void calc_poc( STEREO_DMX_EVS_POC_HANDLE hPOC, const float wnd[], const float rfft_coef[], const float specLr[], const float specLi[], const float specRr[], const float specRi[], const int16_t input_frame ); static ivas_error estimate_itd( float *corr, STEREO_DMX_EVS_POC_HANDLE hPOC, const float srcL[], const float srcR[], float itd[], const int16_t input_frame ); +#endif static void weighted_ave( const float src1[], const float src2[], float dst[], const float gain, const float old_gain, const int16_t input_frame, const float wnd[] ); static void adapt_gain( const float src[], float dst[], const float gain, const float old_gain, const int16_t input_frame, const float wnd[] ); static void create_M_signal( const float srcL[], const float srcR[], float dmx[], const float w_curr, const int16_t input_frame, const float wnd[], float *w_prev, float *dmx_energy, float *src_energy ); @@ -141,13 +182,16 @@ void estimate_itd_wnd_fft( static void calc_poc( STEREO_DMX_EVS_POC_HANDLE hPOC, /* i/o: phase only correlation structure */ - const float wnd[], /* i : window coef */ - const float rfft_coef[], /* i : RFFT coef */ - const float specLr[], /* i : Lch real-part spectra */ - const float specLi[], /* i : Lch imaginary-part input signal */ - const float specRr[], /* i : Rch real-part spectra */ - const float specRi[], /* i : Rch imaginary-part input signal */ - const int16_t input_frame /* i : input frame length per channel */ +#ifdef ENHANCED_STEREO_DMX + STEREO_DMX_EVS_PHA_HANDLE hPHA, /* i/o : correlation filter structure */ +#endif + const float wnd[], /* i : window coef */ + const float rfft_coef[], /* i : RFFT coef */ + const float specLr[], /* i : Lch real-part spectra */ + const float specLi[], /* i : Lch imaginary-part input signal */ + const float specRr[], /* i : Rch real-part spectra */ + const float specRi[], /* i : Rch imaginary-part input signal */ + const int16_t input_frame /* i : input frame length per channel */ ) { int16_t i, n1, n2; @@ -167,18 +211,31 @@ static void calc_poc( int16_t cos_step, cos_max; float eps_cos, eps_sin, EPS; +#ifdef ENHANCED_STEREO_DMX + int16_t isd_cnt_h, isd_cnt_l, ild_cnt, n, freq_8k, freq_ipd_max, nsbd, input_frame_pha; + float Nr, Ni, Dr, Di, tPr, tPi, Pn, energy, isd_rate; + float eneL, eneR, IPDr, IPDi, tIPDr, tIPDi, ICCr; + float *Pr, *Pi, *ipd_ff, *p_curr_taps; + float rfft_pha_buf[L_FRAME48k], tEr[STEREO_DMX_EVS_NB_SUBBAND_MAX], tEl[STEREO_DMX_EVS_NB_SUBBAND_MAX]; +#endif + /* Initialization */ iN = 1.0f / (float) input_frame; - s = hPOC->sin; P = hPOC->P; n0 = input_frame / 2; itdLR = hPOC->itdLR; +#ifdef ENHANCED_STEREO_DMX + Pr = hPHA->Pr; + Pi = hPHA->Pi; + nsbd = n0 / STEREO_DMX_EVS_SUBBAND_SIZE; + input_frame_pha = input_frame / STEREO_DMX_EVS_SUBBAND_SIZE; +#endif + igamma = STEREO_DMX_EVS_POC_GAMMA * iN; gamma = 1.0f - igamma; - step = 1; bias = 0; cos_step = 2; @@ -337,6 +394,291 @@ static void calc_poc( } specPOr[n0] = sign( specLr[n0] * specRr[n0] ) * wnd[i * step + bias] * gamma; +#ifdef ENHANCED_STEREO_DMX + + hPHA->init_frmCntr--; + if ( hPHA->init_frmCntr < 0 ) + { + hPHA->init_frmCntr = 0; + } + freq_8k = L_FRAME16k / 2; + freq_ipd_max = (int16_t) ( freq_8k * 5000.0f / ( 8000.0f * STEREO_DMX_EVS_SUBBAND_SIZE ) ); + + // Memorize the filters N-1 + for ( n = 0; n < CPE_CHANNELS; n++ ) + { + if ( hPHA->p_curr_taps[n] ) + { + hPHA->p_prev_taps[n] = hPHA->prev_taps[n]; + mvr2r( hPHA->p_curr_taps[n], hPHA->p_prev_taps[n], hPHA->pha_len ); + } + else + { + hPHA->p_prev_taps[n] = NULL; + } + } + + // ISD + isd_cnt_l = 0; + isd_cnt_h = 0; + for ( i = 1; i <= freq_8k; i++ ) + { + Nr = ( specLr[i] - specRr[i] ); + Ni = ( specLi[i] - specRi[i] ); + Dr = ( specLr[i] + specRr[i] ); + Di = ( specLi[i] + specRi[i] ); + if ( ( Nr * Nr + Ni * Ni ) > STEREO_DMX_EVS_ISD_THRES_H * ( Dr * Dr + Di * Di ) ) + { + isd_cnt_h++; + } + if ( ( Nr * Nr + Ni * Ni ) < STEREO_DMX_EVS_ISD_THRES_L * ( Dr * Dr + Di * Di ) ) + { + isd_cnt_l++; + } + } + + isd_rate = (float) isd_cnt_h / (float) freq_8k; + hPHA->isd_rate_s = STEREO_DMX_EVS_ISD_FORGETTING * hPHA->isd_rate_s + ( 1.0f - STEREO_DMX_EVS_ISD_FORGETTING ) * isd_rate; + + if ( hPHA->isd_rate_s > STEREO_DMX_EVS_ISD_DIST_HYST_H ) + { + if ( hPHA->curr_pha != STEREO_DMX_EVS_PHA_IPD ) + { + if ( hPHA->prev_pha == STEREO_DMX_EVS_PHA_IPD ) + { + hPHA->pha_hys_cnt += 1; + } + else + { + hPHA->pha_hys_cnt = 0; + } + + if ( hPHA->pha_hys_cnt >= STEREO_DMX_EVS_SWTCH_HYS_THRES ) + { + hPHA->curr_pha = STEREO_DMX_EVS_PHA_IPD; + } + } + + hPHA->prev_pha = STEREO_DMX_EVS_PHA_IPD; + } + else if ( hPHA->isd_rate_s < STEREO_DMX_EVS_ISD_DIST_HYST_L ) + { + if ( hPHA->curr_pha != STEREO_DMX_EVS_PHA_IPD2 ) + { + if ( hPHA->prev_pha == STEREO_DMX_EVS_PHA_IPD2 ) + { + hPHA->pha_hys_cnt += 1; + } + else + { + hPHA->pha_hys_cnt = 0; + } + + if ( hPHA->pha_hys_cnt >= STEREO_DMX_EVS_SWTCH_HYS_THRES ) + { + hPHA->curr_pha = STEREO_DMX_EVS_PHA_IPD2; + } + } + hPHA->prev_pha = STEREO_DMX_EVS_PHA_IPD2; + } + + ipd_ff = hPHA->ipd_ff; + + Nr = 0; + Ni = 0; + eneL = 0; + eneR = 0; + + for ( n = 1, i = 1; n < nsbd; n++ ) + { + tPr = 0.0f; + tPi = 0.0f; + tEr[n] = 0.0f; + tEl[n] = 0.0f; + + for ( j = 0; j < STEREO_DMX_EVS_SUBBAND_SIZE; j++, i++ ) + { + // Energy + tEl[n] += specLr[i] * specLr[i] + specLi[i] * specLi[i]; + tEr[n] += specRr[i] * specRr[i] + specRi[i] * specRi[i]; + + // IPD + IPDr = specLr[i] * specRr[i] + specLi[i] * specRi[i]; + IPDi = specLi[i] * specRr[i] - specLr[i] * specRi[i]; + tPr += IPDr; + tPi += IPDi; + + // ICCr + Pn = (float) inv_sqrt( ( IPDr * IPDr + IPDi * IPDi ) + EPSILON ); + IPDr *= Pn; + IPDi *= Pn; + + tIPDr = ( specRr[i] * IPDr - specRi[i] * IPDi ); + tIPDi = ( specRr[i] * IPDi + specRi[i] * IPDr ); + + Nr += ( specLr[i] * tIPDr + specLi[i] * tIPDi ); + Ni += ( specLi[i] * tIPDr - specLr[i] * tIPDi ); + + eneL += ( specLr[i] * specLr[i] + specLi[i] * specLi[i] ); + eneR += ( specRr[i] * specRr[i] + specRi[i] * specRi[i] ); + } + + Pn = (float) inv_sqrt( ( tPr * tPr + tPi * tPi ) + EPSILON ); + tPr *= Pn; + tPi *= Pn; + + if ( hPHA->init_frmCntr == 0 ) + { + Pr[n] = ipd_ff[n] * Pr[n] + ( 1.0f - ipd_ff[n] ) * tPr; + Pi[n] = ipd_ff[n] * Pi[n] + ( 1.0f - ipd_ff[n] ) * tPi; + Pn = (float) inv_sqrt( ( Pr[n] * Pr[n] + Pi[n] * Pi[n] ) + EPSILON ); + Pr[n] *= Pn; + Pi[n] *= Pn; + } + else + { + Pr[n] = tPr; + Pi[n] = tPi; + } + + Pr[n] = ( Pr[n] > 1.0f ) ? 1.0f : Pr[n]; + Pr[n] = ( Pr[n] < -1.0f ) ? -1.0f : Pr[n]; + } + ICCr = (float) sqrt( ( Nr * Nr + Ni * Ni ) / ( eneL * eneR + EPSILON ) ); + hPHA->iccr_s = STEREO_DMX_EVS_ICCR_FORGETTING * hPHA->iccr_s + ( 1.0f - STEREO_DMX_EVS_ICCR_FORGETTING ) * ICCr; + + if ( hPHA->curr_pha == STEREO_DMX_EVS_PHA_IPD ) + { + hPHA->p_curr_taps[0] = NULL; + hPHA->p_curr_taps[1] = hPHA->curr_taps[1]; + + rfft_pha_buf[0] = 1.; + rfft_pha_buf[1] = 1.; + + ild_cnt = 0; + for ( i = 1; i < nsbd; i++ ) + { + rfft_pha_buf[i * 2] = Pr[i]; + rfft_pha_buf[i * 2 + 1] = Pi[i]; + if ( ( tEr[i] > STEREO_DMX_EVS_LR_EGY * tEl[i] ) || ( tEl[i] > STEREO_DMX_EVS_LR_EGY * tEr[i] ) ) + { + ild_cnt++; + tEr[i] = 1; + } + else + { + tEr[i] = -1; + } + } + if ( ild_cnt > nsbd * STEREO_DMX_EVS_ILD_PRC ) + { + for ( i = 1; i < nsbd; i++ ) + { + if ( tEr[i] > 0 ) + { + rfft_pha_buf[i * 2] = 1.; + rfft_pha_buf[i * 2 + 1] = 0.; + } + } + } + + rfft( rfft_pha_buf, hPHA->rfft_ipd_coef, input_frame_pha, +1 ); + mvr2r( rfft_pha_buf, hPHA->p_curr_taps[1], hPHA->pha_len ); + } + else + { + if ( ( hPHA->iccr_s < STEREO_DMX_EVS_ICCR_HYST_L ) || ( ( hPHA->iccr_s < STEREO_DMX_EVS_ICCR_HYST_H ) && ( hPHA->p_curr_taps[0] != NULL ) ) ) + { + // IPDn + + set_f( &( Pr[freq_ipd_max] ), 1.0f, ( nsbd - freq_ipd_max ) ); + set_f( &( Pi[freq_ipd_max] ), 0.0f, ( nsbd - freq_ipd_max ) ); + + for ( n = 0; n < CPE_CHANNELS; n++ ) + { + hPHA->p_curr_taps[n] = hPHA->curr_taps[n]; + } + + rfft_pha_buf[0] = 1.; + rfft_pha_buf[1] = 1.; + + ild_cnt = 0; + isd_rate = (float) isd_cnt_l / freq_8k; + for ( i = 1; i < nsbd; i++ ) + { + rfft_pha_buf[i * 2] = (float) sqrt( ( 1.0f + Pr[i] ) / 2.0f ); + rfft_pha_buf[i * 2 + 1] = (float) sqrt( ( 1.0f - Pr[i] ) / 2.0f ) * sign( Pi[i] ); + if ( isd_rate > STEREO_DMX_EVS_ISD_DIST_THRES_IPD ) + { + rfft_pha_buf[i * 2 + 1] = (float) sqrt( ( 1.0f - rfft_pha_buf[i * 2] ) / 2.0f ) * sign( rfft_pha_buf[i * 2 + 1] ); + rfft_pha_buf[i * 2] = (float) sqrt( ( 1.0f + rfft_pha_buf[i * 2] ) / 2.0f ); + } + + if ( ( tEr[i] > STEREO_DMX_EVS_LR_EGY * tEl[i] ) || ( tEl[i] > STEREO_DMX_EVS_LR_EGY * tEr[i] ) ) + { + ild_cnt++; + tEr[i] = 1; + } + else + { + tEr[i] = -1; + } + } + if ( ild_cnt > nsbd * STEREO_DMX_EVS_ILD_PRC ) + { + for ( i = 1; i < nsbd; i++ ) + { + if ( tEr[i] > 0 ) + { + rfft_pha_buf[i * 2] = 1.; + rfft_pha_buf[i * 2 + 1] = 0.; + } + } + } + + rfft( rfft_pha_buf, hPHA->rfft_ipd_coef, input_frame_pha, +1 ); + mvr2r( rfft_pha_buf, hPHA->p_curr_taps[1], hPHA->pha_len ); + + // PHA L2R + p_curr_taps = hPHA->p_curr_taps[0]; + p_curr_taps[0] = rfft_pha_buf[0]; + for ( i = 1; i < hPHA->pha_len; i++ ) + { + p_curr_taps[i] = rfft_pha_buf[input_frame_pha - i]; + } + } + else + { + for ( n = 0; n < CPE_CHANNELS; n++ ) + { + hPHA->p_curr_taps[n] = NULL; + } + } + } + + for ( n = 0; n < CPE_CHANNELS; n++ ) + { + if ( hPHA->p_curr_taps[n] ) + { + for ( i = 0; i < hPHA->pha_len; i++ ) + { + hPHA->p_curr_taps[n][i] *= hPHA->win[i]; + } + + energy = 0.; + for ( i = 0; i < hPHA->pha_len; i++ ) + { + energy += hPHA->p_curr_taps[n][i] * hPHA->p_curr_taps[n][i]; + } + energy = (float) inv_sqrt( energy + EPSILON ); + for ( i = 0; i < hPHA->pha_len; i++ ) + { + hPHA->p_curr_taps[n][i] *= energy; + } + } + } + +#endif rfft_buf[0] = specPOr[0]; rfft_buf[1] = specPOr[n0]; @@ -550,10 +892,13 @@ static float find_poc_peak( static ivas_error estimate_itd( float *corr, /* o : correlation */ STEREO_DMX_EVS_POC_HANDLE hPOC, /* i/o: phase only correlation structure */ - const float srcL[], /* i : Lch input signal */ - const float srcR[], /* i : Rch input signal */ - float itd[], /* o : estimated itd */ - const int16_t input_frame /* i : input frame length per channel */ +#ifdef ENHANCED_STEREO_DMX + STEREO_DMX_EVS_PHA_HANDLE hPHA, /* i/o : correlation filter structure */ +#endif + const float srcL[], /* i : Lch input signal */ + const float srcR[], /* i : Rch input signal */ + float itd[], /* o : estimated itd */ + const int16_t input_frame /* i : input frame length per channel */ ) { float specLr[L_FRAME48k / 2 + 1], specLi[L_FRAME48k / 2 + 1], specRr[L_FRAME48k / 2 + 1], specRi[L_FRAME48k / 2 + 1]; @@ -598,7 +943,11 @@ static ivas_error estimate_itd( estimate_itd_wnd_fft( srcL, specLr, specLi, rfft_coef, hPOC->wnd, input_frame ); estimate_itd_wnd_fft( srcR, specRr, specRi, rfft_coef, hPOC->wnd, input_frame ); +#ifdef ENHANCED_STEREO_DMX + calc_poc( hPOC, hPHA, hPOC->wnd, rfft_coef, specLr, specLi, specRr, specRi, input_frame ); +#else calc_poc( hPOC, hPOC->wnd, rfft_coef, specLr, specLi, specRr, specRi, input_frame ); +#endif *corr = find_poc_peak( hPOC, itd, input_frame, STEREO_DMX_EVS_POC_W_FORGETTING ); return error; @@ -794,7 +1143,19 @@ void stereo_dmx_evs_enc( int16_t n; float dmx_weight, corr; float data_f[CPE_CHANNELS][L_FRAME48k]; + +#ifdef ENHANCED_STEREO_DMX + int16_t k, m, pha_len, fad_len; + float mem_prev[STEREO_DMX_EVS_FAD_LEN_MAX], data_mem[STEREO_DMX_EVS_DATA_LEN_MAX]; + float *p_data_mem, *p_prev_taps, *p_curr_taps, *fad_g, *p_data; + float dmx_poc_data[L_FRAME48k], dmx_pha_data[L_FRAME48k], *p_dmx_data, ftmp; + STEREO_DMX_EVS_PRC curr_prc; + int16_t input_subframe, is_transient; + float *p_sub_frame, subframe_energy[STEREO_DMX_EVS_NB_SBFRM]; +#else float dmx_data[L_FRAME48k]; +#endif + int16_t input_frame; input_frame = (int16_t) ( input_Fs / FRAMES_PER_SEC ); @@ -810,6 +1171,215 @@ void stereo_dmx_evs_enc( set_f( data_f[1] + n_samples, 0.0f, input_frame - n_samples ); } +#ifdef ENHANCED_STEREO_DMX + + input_subframe = n_samples / STEREO_DMX_EVS_NB_SBFRM; + is_transient = 0; + for ( k = 0; k < CPE_CHANNELS; k++ ) + { + ftmp = 0; + for ( m = 0; m < STEREO_DMX_EVS_NB_SBFRM; m++ ) + { + p_sub_frame = &( data_f[k][m * input_subframe] ); + subframe_energy[m] = 0; + for ( n = 0; n < input_subframe; n++ ) + { + subframe_energy[m] += p_sub_frame[n] * p_sub_frame[n]; + } + + if ( subframe_energy[m] / ( hStereoDmxEVS->hPHA->trns_aux_energy[k] + EPSILON ) > hStereoDmxEVS->hPHA->crst_fctr ) + { + is_transient = 1; + } + + if ( hStereoDmxEVS->hPHA->init_frmCntr == 0 ) + { + hStereoDmxEVS->hPHA->trns_aux_energy[k] = STEREO_DMX_EVS_TRNS_EGY_FORGETTING * hStereoDmxEVS->hPHA->trns_aux_energy[k] + ( 1.0f - STEREO_DMX_EVS_TRNS_EGY_FORGETTING ) * subframe_energy[m]; + } + else + { + hStereoDmxEVS->hPHA->trns_aux_energy[k] = 0.5f * hStereoDmxEVS->hPHA->trns_aux_energy[k] + 0.5f * subframe_energy[m]; + } + + ftmp += subframe_energy[m]; + } + + for ( m = 1; m < STEREO_DMX_EVS_NB_SBFRM; m++ ) + { + if ( subframe_energy[m] / ( subframe_energy[m - 1] + EPSILON ) > STEREO_DMX_EVS_TRNS_DTC_INST ) + { + is_transient = 1; + } + } + } + + estimate_itd( &corr, hStereoDmxEVS->hPOC, hStereoDmxEVS->hPHA, data_f[0], data_f[1], &hStereoDmxEVS->itd, input_frame ); + + // poc + + if ( hStereoDmxEVS->itd ) + { + dmx_weight = ( ( hStereoDmxEVS->itd > 0 ) ? ( -1 ) : 1 ) * 0.5f * corr + 0.5f; + } + else + { + dmx_weight = 0.5f; + } + + create_M_signal( data_f[0], data_f[1], dmx_poc_data, dmx_weight, input_frame, hStereoDmxEVS->s_wnd, + hStereoDmxEVS->dmx_weight, hStereoDmxEVS->pre_dmx_energy, hStereoDmxEVS->aux_dmx_energy ); + + // pha + + pha_len = hStereoDmxEVS->hPHA->pha_len; + fad_len = hStereoDmxEVS->hPHA->fad_len; + fad_g = hStereoDmxEVS->hPHA->fad_g; + + set_zero( dmx_pha_data, n_samples ); + set_zero( mem_prev, fad_len ); + + for ( k = 0; k < CPE_CHANNELS; k++ ) + { + p_data = data_f[k]; + mvr2r( hStereoDmxEVS->hPHA->data_mem[k], data_mem, pha_len ); + mvr2r( &( p_data[n_samples - pha_len] ), hStereoDmxEVS->hPHA->data_mem[k], pha_len ); + p_data_mem = &( data_mem[pha_len] ); + mvr2r( p_data, p_data_mem, n_samples ); + + p_prev_taps = hStereoDmxEVS->hPHA->p_prev_taps[k]; + if ( p_prev_taps ) + { + for ( n = 0; n < fad_len; n++ ) + { + for ( ftmp = 0, m = 0; m < pha_len; m++ ) + { + ftmp += p_data_mem[n - m] * p_prev_taps[m]; + } + mem_prev[n] += ftmp * INV_SQRT_2; + } + } + else + { + for ( n = 0; n < fad_len; n++ ) + { + mem_prev[n] += p_data[n] * INV_SQRT_2; + } + } + + p_curr_taps = hStereoDmxEVS->hPHA->p_curr_taps[k]; + if ( p_curr_taps ) + { + for ( n = 0; n < n_samples; n++ ) + { + for ( ftmp = 0, m = 0; m < pha_len; m++ ) + { + ftmp += p_data_mem[n - m] * p_curr_taps[m]; + } + dmx_pha_data[n] += ftmp * INV_SQRT_2; + } + } + else + { + for ( n = 0; n < n_samples; n++ ) + { + dmx_pha_data[n] += p_data[n] * INV_SQRT_2; + } + } + } + + for ( n = 0, m = ( fad_len - 1 ); n < fad_len; n++, m-- ) + { + dmx_pha_data[n] *= fad_g[n]; + dmx_pha_data[n] += ( mem_prev[n] ) * fad_g[m]; + } + + // prc switch + + curr_prc = hStereoDmxEVS->hPHA->curr_prc; + if ( abs( (int16_t) hStereoDmxEVS->itd ) > hStereoDmxEVS->hPHA->prc_thres ) + { + if ( hStereoDmxEVS->hPHA->curr_prc != STEREO_DMX_EVS_PRC_POC ) + { + if ( hStereoDmxEVS->hPHA->prev_prc == STEREO_DMX_EVS_PRC_POC ) + { + hStereoDmxEVS->hPHA->prc_hys_cnt += 1; + } + else + { + hStereoDmxEVS->hPHA->prc_hys_cnt = 0; + } + + if ( hStereoDmxEVS->hPHA->prc_hys_cnt >= STEREO_DMX_EVS_SWTCH_PRC_HYS_THRES ) + { + hStereoDmxEVS->hPHA->curr_prc = STEREO_DMX_EVS_PRC_POC; + } + } + hStereoDmxEVS->hPHA->prev_prc = STEREO_DMX_EVS_PRC_POC; + } + else + { + if ( hStereoDmxEVS->hPHA->curr_prc != STEREO_DMX_EVS_PRC_PHA ) + { + if ( hStereoDmxEVS->hPHA->prev_prc == STEREO_DMX_EVS_PRC_PHA ) + { + hStereoDmxEVS->hPHA->prc_hys_cnt += 1; + } + else + { + hStereoDmxEVS->hPHA->prc_hys_cnt = 0; + } + + if ( hStereoDmxEVS->hPHA->prc_hys_cnt >= STEREO_DMX_EVS_SWTCH_PRC_HYS_THRES ) + { + hStereoDmxEVS->hPHA->curr_prc = STEREO_DMX_EVS_PRC_PHA; + } + } + hStereoDmxEVS->hPHA->prev_prc = STEREO_DMX_EVS_PRC_PHA; + } + + if ( ( is_transient == 1 ) || ( hStereoDmxEVS->aux_dmx_energy[0] > STEREO_DMX_EVS_ILDS_EGY * hStereoDmxEVS->aux_dmx_energy[1] ) || ( hStereoDmxEVS->aux_dmx_energy[1] > STEREO_DMX_EVS_ILDS_EGY * hStereoDmxEVS->aux_dmx_energy[0] ) || ( ( hStereoDmxEVS->hPHA->p_curr_taps[0] == NULL ) && ( hStereoDmxEVS->hPHA->p_curr_taps[1] == NULL ) ) ) + { + hStereoDmxEVS->hPHA->curr_prc = STEREO_DMX_EVS_PRC_POC; + hStereoDmxEVS->hPHA->prc_hys_cnt = 0; + } + + if ( hStereoDmxEVS->hPHA->curr_prc == STEREO_DMX_EVS_PRC_POC ) + { + p_dmx_data = dmx_poc_data; + + if ( curr_prc != hStereoDmxEVS->hPHA->curr_prc ) + { + fad_len = hStereoDmxEVS->hPHA->fad_len_prc; + fad_g = hStereoDmxEVS->hPHA->fad_g_prc; + + for ( n = 0, m = ( fad_len - 1 ); n < fad_len; n++, m-- ) + { + p_dmx_data[n] *= fad_g[n]; + p_dmx_data[n] += fad_g[m] * dmx_pha_data[n]; + } + } + } + else + { + p_dmx_data = dmx_pha_data; + + if ( curr_prc != hStereoDmxEVS->hPHA->curr_prc ) + { + fad_len = hStereoDmxEVS->hPHA->fad_len_prc; + fad_g = hStereoDmxEVS->hPHA->fad_g_prc; + + for ( n = 0, m = ( fad_len - 1 ); n < fad_len; n++, m-- ) + { + p_dmx_data[n] *= fad_g[n]; + p_dmx_data[n] += fad_g[m] * dmx_poc_data[n]; + } + } + } + + mvr2s( p_dmx_data, data, n_samples ); + +#else + estimate_itd( &corr, hStereoDmxEVS->hPOC, data_f[0], data_f[1], &hStereoDmxEVS->itd, input_frame ); if ( hStereoDmxEVS->itd ) @@ -826,6 +1396,8 @@ void stereo_dmx_evs_enc( mvr2s( dmx_data, data, n_samples ); +#endif + return; } @@ -844,6 +1416,12 @@ ivas_error stereo_dmx_evs_init_encoder( STEREO_DMX_EVS_ENC_HANDLE hStereoDmxEVS; int16_t n, input_frame; +#ifdef ENHANCED_STEREO_DMX + int16_t m, len, pha_len, fad_len, fad_len2, trans_len, itrh, rfft_ipd_coef_step, n0, input_frame_pha; + float *win, *fad_g, fad_r, tmp_r, a_min, a_max, a_step, *ipd_ff; + const float *p_ipd_w; +#endif + input_frame = (int16_t) ( input_Fs / FRAMES_PER_SEC ); hStereoDmxEVS = NULL; @@ -933,6 +1511,156 @@ ivas_error stereo_dmx_evs_init_encoder( hStereoDmxEVS->hPOC->confidence = 0.0f; +#ifdef ENHANCED_STEREO_DMX + + hStereoDmxEVS->hPHA = NULL; + if ( ( hStereoDmxEVS->hPHA = (STEREO_DMX_EVS_PHA_HANDLE) malloc( sizeof( STEREO_DMX_EVS_PHA_DATA ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for STEREO_DMX_EVS_CORFILT_DATA\n" ) ); + } + + for ( n = 0; n < CPE_CHANNELS; n++ ) + { + hStereoDmxEVS->hPHA->p_curr_taps[n] = NULL; + hStereoDmxEVS->hPHA->p_prev_taps[n] = NULL; + + set_zero( hStereoDmxEVS->hPHA->data_mem[n], STEREO_DMX_EVS_PHA_LEN_MAX ); + set_zero( hStereoDmxEVS->hPHA->curr_taps[n], STEREO_DMX_EVS_PHA_LEN_MAX ); + } + + if ( input_Fs == 16000 ) + { + len = STEREO_DMX_EVS_PHA_LEN_16; + hStereoDmxEVS->hPHA->fad_len = STEREO_DMX_EVS_FAD_LEN_16; + hStereoDmxEVS->hPHA->prc_thres = STEREO_DMX_EVS_SWTCH_PRC_THRES_16; + hStereoDmxEVS->hPHA->crst_fctr = STEREO_DMX_EVS_CRST_FCTR_16; + } + else if ( input_Fs == 32000 ) + { + len = STEREO_DMX_EVS_PHA_LEN_32; + hStereoDmxEVS->hPHA->fad_len = STEREO_DMX_EVS_FAD_LEN_32; + hStereoDmxEVS->hPHA->prc_thres = STEREO_DMX_EVS_SWTCH_PRC_THRES_32; + hStereoDmxEVS->hPHA->crst_fctr = STEREO_DMX_EVS_CRST_FCTR_32; + } + else if ( input_Fs == 48000 ) + { + len = STEREO_DMX_EVS_PHA_LEN_48; + hStereoDmxEVS->hPHA->fad_len = STEREO_DMX_EVS_FAD_LEN_48; + hStereoDmxEVS->hPHA->prc_thres = STEREO_DMX_EVS_SWTCH_PRC_THRES_48; + hStereoDmxEVS->hPHA->crst_fctr = STEREO_DMX_EVS_CRST_FCTR_48; + } + else + { + return IVAS_ERROR( IVAS_ERR_INTERNAL_FATAL, "invalid sampling frequency\n" ); + } + + hStereoDmxEVS->hPHA->pha_len = len / 2; + hStereoDmxEVS->hPHA->init_frmCntr = (int16_t) ( FRAMES_PER_SEC * 0.2f ); + hStereoDmxEVS->hPHA->isd_rate_s = 0.0f; + hStereoDmxEVS->hPHA->iccr_s = 0.0f; + + pha_len = hStereoDmxEVS->hPHA->pha_len; + fad_len = hStereoDmxEVS->hPHA->fad_len; + + trans_len = (int16_t) ( (float) pha_len / 20.0f ); + set_f( hStereoDmxEVS->hPHA->win, 1.8f, pha_len - trans_len ); + hStereoDmxEVS->hPHA->win[0] = 1.0f; + tmp_r = 1.0f / ( ( trans_len * 2 ) + 1 ); + win = &( hStereoDmxEVS->hPHA->win[pha_len - trans_len] ); + for ( n = 0; n < trans_len; n++ ) + { + win[n] = ( 0.5f * ( 1.0f + cosf( ( PI2 * ( n + 1 ) ) * tmp_r ) ) ) * 1.8f; + } + + fad_g = hStereoDmxEVS->hPHA->fad_g; + fad_r = 1.0f / (float) ( fad_len + 1 ); + fad_len2 = fad_len / 2; + for ( n = 0, m = ( fad_len - 1 ); n < fad_len2; n++, m-- ) + { + fad_g[n] = (float) ( n + 1 ) * fad_r; + fad_g[m] = 1.0f - fad_g[n]; + } + + hStereoDmxEVS->hPHA->curr_pha = STEREO_DMX_EVS_PHA_IPD; + hStereoDmxEVS->hPHA->prev_pha = STEREO_DMX_EVS_PHA_IPD; + hStereoDmxEVS->hPHA->pha_hys_cnt = 0; + + // Compute the forgetting factor + a_min = 0.8576958985908941f; + a_max = 0.9440608762859234f; + itrh = (int16_t) ( ( 3000 * input_frame ) / ( input_Fs * STEREO_DMX_EVS_SUBBAND_SIZE ) ); // 3kHz + n0 = L_FRAME16k / ( 2 * STEREO_DMX_EVS_SUBBAND_SIZE ); + a_step = ( a_min - a_max ) / ( n0 + 1 - itrh ); + ipd_ff = hStereoDmxEVS->hPHA->ipd_ff; + for ( n = 0; n < itrh; n++ ) + { + ipd_ff[n] = a_max; + } + for ( ; n < ( n0 + 1 ); n++ ) // 8kHz + { + ipd_ff[n] = a_max + ( n - itrh ) * a_step; + } + for ( ; n < STEREO_DMX_EVS_NB_SUBBAND_MAX; n++ ) + { + ipd_ff[n] = a_min; + } + set_f( hStereoDmxEVS->hPHA->Pr, 1.0, STEREO_DMX_EVS_NB_SUBBAND_MAX ); + set_zero( hStereoDmxEVS->hPHA->Pi, STEREO_DMX_EVS_NB_SUBBAND_MAX ); + + n0 = input_frame / ( 4 * STEREO_DMX_EVS_SUBBAND_SIZE ); + input_frame_pha = input_frame / ( 2 * STEREO_DMX_EVS_SUBBAND_SIZE ); + + if ( input_frame == L_FRAME16k ) + { + p_ipd_w = dft_trigo_32k; + rfft_ipd_coef_step = 4; + } + else if ( input_frame == L_FRAME32k ) + { + p_ipd_w = dft_trigo_32k; + rfft_ipd_coef_step = 2; + } + else if ( input_frame == L_FRAME48k ) + { + p_ipd_w = dft_trigo_48k; + rfft_ipd_coef_step = 2; + } + else + { + return IVAS_ERROR( IVAS_ERR_INTERNAL_FATAL, "invalid sampling frequency\n" ); + } + + win = hStereoDmxEVS->hPHA->rfft_ipd_coef; + len = rfft_ipd_coef_step * STEREO_DMX_EVS_SUBBAND_SIZE; + for ( n = 0; n < n0; n++ ) + { + win[n] = p_ipd_w[n * len]; + win[input_frame_pha - n] = p_ipd_w[n * len]; + } + win[n0] = p_ipd_w[n0 * len]; + + hStereoDmxEVS->hPHA->curr_prc = STEREO_DMX_EVS_PRC_POC; + hStereoDmxEVS->hPHA->prev_prc = STEREO_DMX_EVS_PRC_POC; + hStereoDmxEVS->hPHA->prc_hys_cnt = 0; + + hStereoDmxEVS->hPHA->fad_len_prc = (int16_t) ( STEREO_DMX_EVS_FADE_LEN_PRC * (float) input_Fs / 1000.0f ); + fad_len = hStereoDmxEVS->hPHA->fad_len_prc; + fad_g = hStereoDmxEVS->hPHA->fad_g_prc; + fad_r = 1.0f / (float) ( fad_len + 1 ); + fad_len2 = fad_len / 2; + for ( n = 0, m = ( fad_len - 1 ); n < fad_len2; n++, m-- ) + { + fad_g[n] = (float) ( n + 1 ) * fad_r; + fad_g[m] = 1.0f - fad_g[n]; + } + + for ( n = 0; n < CPE_CHANNELS; n++ ) + { + hStereoDmxEVS->hPHA->trns_aux_energy[n] = 0.0f; + } + +#endif + *hStereoDmxEVS_out = hStereoDmxEVS; return IVAS_ERR_OK; @@ -960,6 +1688,14 @@ void stereo_dmx_evs_close_encoder( ( *hStereoDmxEVS )->hPOC = NULL; } +#ifdef ENHANCED_STEREO_DMX + if ( ( *hStereoDmxEVS )->hPHA != NULL ) + { + free( ( *hStereoDmxEVS )->hPHA ); + ( *hStereoDmxEVS )->hPHA = NULL; + } +#endif + free( ( *hStereoDmxEVS ) ); ( *hStereoDmxEVS ) = NULL;