diff --git a/apps/decoder.c b/apps/decoder.c old mode 100644 new mode 100755 index 03c916518a15680cdf7b6d6aef1586bc01c9d230..3a89b15f54f7bb8582ef1b481f778c0f3d698b9d --- a/apps/decoder.c +++ b/apps/decoder.c @@ -1062,6 +1062,11 @@ static void usage_dec( void ) fprintf( stdout, " left or l or 1->left, right or r or -1->right, center or c or 0->middle\n" ); fprintf( stdout, "-q : Quiet mode, no frame counter\n" ); fprintf( stdout, " default is deactivated\n" ); +#ifdef DEBUG_MODE_INFO +#ifdef DEBUG_MODE_INFO_TWEAK + fprintf( stdout, "-info : specify subfolder name for debug output\n" ); +#endif +#endif fprintf( stdout, "\n" ); return; diff --git a/lib_com/ivas_cnst.h b/lib_com/ivas_cnst.h old mode 100644 new mode 100755 index 3e0ece839faf0755e98cea4bb33db07eba7ddb89..ceeb78b2825cfd63c3857ec650b3f88d920b21d8 --- a/lib_com/ivas_cnst.h +++ b/lib_com/ivas_cnst.h @@ -520,6 +520,10 @@ typedef enum #define STEREO_DFT_XCORR_LB_MAX 24 +#ifdef STABILIZE_GIPD +#define STEREO_DFT_IPD_BUF_LEN 5 +#endif + #define STEREO_DFT_N_COH_PRED 4 /* Number of intra-frame predictors for coherence vector */ #define STEREO_DFT_COH_PRED_COEFFS 15 /* Number of coefficients per predictor */ #define STEREO_DFT_PRED_NBITS 2 /* Bits to signal predictor (log_2(4) = 2) */ diff --git a/lib_com/ivas_prot.h b/lib_com/ivas_prot.h old mode 100644 new mode 100755 diff --git a/lib_com/options.h b/lib_com/options.h old mode 100644 new mode 100755 index af92ab2516aba6717c2305f1b7570560cc6903ec..9e000aae89c0299975677d830f0b3886d54e636d --- a/lib_com/options.h +++ b/lib_com/options.h @@ -151,11 +151,12 @@ #define MC_BITRATE_SWITCHING /* Issue 116: support bitrate switching in MC format */ #define MC_JBM /* FhG: extend JBM beyond mono for running IVAS in VoIP mode (contribution 19) */ #define FIX_265_MC_BRATE_SWITCHING /* Issue 265: fix use-of-uninitialized-value in MC bitrate switching */ -#define FIX_ANGLE_WRAPPING /* Issue 244: Problems with angle wrapping*/ +#define FIX_ANGLE_WRAPPING /* Issue 244: Problems with angle wrapping*/ #define FIX_245_RANGE_CODER_VOIP_MSAN /* Issue 245: fix use-of-uninitialized-value in range coder in VoIP mode */ #define FIX_272_COV /* Issue 272: Cleanup for code coverage related to calls to ivas_binaural_cldfb() */ #define FIX_235 /* Issue 235: Deallocation of HR filter memory separately for lib_rend (ROM) and lib_util (from file) */ #define ENV_STAB_FIX /* Contribution 23: HQ envelope stability memory fix */ +#define STABILIZE_GIPD /* FhG: Contribution 22: gIPD stabilization */ /* ################## End DEVELOPMENT switches ######################### */ /* clang-format on */ diff --git a/lib_enc/ivas_stat_enc.h b/lib_enc/ivas_stat_enc.h old mode 100644 new mode 100755 index 11dd51104d3a8b6b29847075ee9b7364c113b9c3..9e362a04693ca375701580222e12eb66c7beef23 --- a/lib_enc/ivas_stat_enc.h +++ b/lib_enc/ivas_stat_enc.h @@ -210,6 +210,12 @@ typedef struct stereo_dft_enc_data_struct float sfm; float sum_dot_prod_real; float sum_dot_prod_img; +#ifdef STABILIZE_GIPD + float dot_prod_real_smooth[STEREO_DFT_BAND_MAX]; + float dot_prod_img_smooth[STEREO_DFT_BAND_MAX]; + float ipd_buf[STEREO_DFT_BAND_MAX][STEREO_DFT_IPD_BUF_LEN]; + float prev_gipd; +#endif /*ITD*/ ITD_DATA_HANDLE hItd; diff --git a/lib_enc/ivas_stereo_dft_enc.c b/lib_enc/ivas_stereo_dft_enc.c old mode 100644 new mode 100755 index 4109c072e466a7582112543fecf09349ddcb69e4..9f9e55390b28626b78502409da2ac9398a81ad3e --- a/lib_enc/ivas_stereo_dft_enc.c +++ b/lib_enc/ivas_stereo_dft_enc.c @@ -73,6 +73,14 @@ static void stereo_dft_enc_open( STEREO_DFT_ENC_DATA_HANDLE hStereoDft, const in static void stereo_dft_enc_compute_prm( STEREO_DFT_ENC_DATA_HANDLE hStereoDft, float *DFT_L, float *DFT_R, int16_t k_offset, int16_t flag_quant, const int16_t sp_aud_decision0, const int16_t vad_flag, float *bin_nrgL, float *bin_nrgR, float *dot_prod_nrg_ratio ); +#ifdef STABILIZE_GIPD +static float stereo_dft_calc_mean_bipd( float *pIpd, float ipd_buf[STEREO_DFT_IPD_BUF_LEN] ); + +static float stereo_dft_calc_mean_ipd_change( float *pIpd, float *ipd_smooth, int16_t gipd_band_max ); + +static void stereo_dft_gipd_stabilization( float *pgIpd, float prev_gipd, float ipd_mean_change ); +#endif + #ifdef DEBUG_MODE_DFT static void stereo_dft_enc_get_nipd_flag( STEREO_DFT_ENC_DATA_HANDLE hStereoDft, float *pgIpd, const int16_t sp_aud_decision0, const float gainIPD ); #else @@ -433,6 +441,15 @@ void stereo_dft_enc_reset( hStereoDft->side_gain_counter = 0; hStereoDft->side_gain_bitdiff_lp = STEREO_DFT_BITDIFF_INIT; set_zero( hStereoDft->gipd, STEREO_DFT_ENC_DFT_NB ); +#ifdef STABILIZE_GIPD + set_zero( hStereoDft->dot_prod_real_smooth, STEREO_DFT_BAND_MAX ); + set_zero( hStereoDft->dot_prod_img_smooth, STEREO_DFT_BAND_MAX ); + for ( i = 0; i < STEREO_DFT_BAND_MAX; i++ ) + { + set_zero( hStereoDft->ipd_buf[i], STEREO_DFT_IPD_BUF_LEN ); + } + hStereoDft->prev_gipd = 0.f; +#endif hStereoDft->gipd_index = 0; set_zero( hStereoDft->res_pred_gain, STEREO_DFT_ENC_DFT_NB * STEREO_DFT_BAND_MAX ); set_s( hStereoDft->res_pred_index_EC, 0, STEREO_DFT_BAND_MAX ); @@ -626,7 +643,11 @@ void stereo_dft_enc_update( hStereoDft->nbands_dmx = stereo_dft_band_config( hStereoDft->band_limits_dmx, 1, NFFT_inner, ENC ); /*Compute main parameters*/ +#ifdef STABILIZE_GIPD + hStereoDft->gipd_band_max = dft_band_ipd[1][3]; +#else hStereoDft->gipd_band_max = dft_band_ipd[hStereoDft->hConfig->band_res][3]; +#endif hStereoDft->res_cod_band_max = dft_band_res_cod[hStereoDft->hConfig->band_res][hStereoDft->res_cod_mode[k_offset]]; hStereoDft->res_cod_line_max = (int16_t) ( 0.5f + ( hStereoDft->band_limits[hStereoDft->res_cod_band_max] - 1 ) * 2.f * hStereoDft->N / (float) ( hStereoDft->NFFT ) ); hStereoDft->res_cod_line_max = 8 * ( hStereoDft->res_cod_line_max / 8 ); @@ -1275,7 +1296,11 @@ void stereo_dft_enc_process( /* Initialization */ k_offset = STEREO_DFT_OFFSET; /*Add an offset at encoder*/ +#ifdef STABILIZE_GIPD + hStereoDft->gipd_band_max = dft_band_ipd[1][3]; +#else hStereoDft->gipd_band_max = dft_band_ipd[hStereoDft->band_res[k_offset]][3]; +#endif hStereoDft->res_cod_band_max = dft_band_res_cod[hStereoDft->band_res[k_offset]][hStereoDft->res_cod_mode[k_offset]]; hStereoDft->res_cod_line_max = (int16_t) ( 0.5f + ( hStereoDft->band_limits[hStereoDft->res_cod_band_max] - 1 ) * 2.f * input_frame / (float) ( hStereoDft->NFFT ) ); hStereoDft->res_cod_line_max = 8 * ( hStereoDft->res_cod_line_max / 8 ); @@ -2586,8 +2611,14 @@ static void stereo_dft_enc_compute_prm( float sum_energy_L, sum_energy_R; float g, c; float abs_L_R; +#ifdef STABILIZE_GIPD + float abs_L_R2; +#endif float gain_IPD; float sub_nrg_DMX[STEREO_DFT_BAND_MAX]; +#ifdef STABILIZE_GIPD + float sub_nrg_DMX2; +#endif float sub_nrg_L[STEREO_DFT_BAND_MAX]; float sub_nrg_R[STEREO_DFT_BAND_MAX]; float diff_ipd; @@ -2601,6 +2632,11 @@ static void stereo_dft_enc_compute_prm( float sum_past_dot_prod_abs, sum_past_dot_prod_abs2; float sum_past_nrg_dmx; int16_t pos; +#ifdef STABILIZE_GIPD + float pIpd[STEREO_DFT_BAND_MAX]; + float ipd_smooth[STEREO_DFT_BAND_MAX]; + float ipd_mean_change; +#endif /*------------------------------------------------------------------* * Initialization @@ -2613,7 +2649,6 @@ static void stereo_dft_enc_compute_prm( set_f( sub_nrg_L, 0, STEREO_DFT_BAND_MAX ); set_f( sub_nrg_R, 0, STEREO_DFT_BAND_MAX ); - pSideGain = hStereoDft->side_gain + k_offset * STEREO_DFT_BAND_MAX; pgIpd = hStereoDft->gipd + k_offset; pPredGain = hStereoDft->res_pred_gain + k_offset * STEREO_DFT_BAND_MAX; @@ -2622,6 +2657,9 @@ static void stereo_dft_enc_compute_prm( sum_energy_R = EPSILON; sum_dot_prod_real = EPSILON; sum_dot_prod_img = EPSILON; +#ifdef STABILIZE_GIPD + sub_nrg_DMX2 = 0.f; +#endif #ifdef DEBUG_MODE_DFT sum_nrg_L = EPSILON; sum_nrg_R = EPSILON; @@ -2651,10 +2689,17 @@ static void stereo_dft_enc_compute_prm( pNrgL = bin_nrgL; pNrgR = bin_nrgR; +#ifdef STABILIZE_GIPD + sum_nrg_L2 = EPSILON; + sum_nrg_R2 = EPSILON; + dot_prod_real2 = EPSILON; + dot_prod_img2 = EPSILON; +#else sum_nrg_L2 = 0; sum_nrg_R2 = 0; dot_prod_real2 = 0; dot_prod_img2 = 0; +#endif for ( i = hStereoDft->band_limits_dmx[b2]; i < hStereoDft->band_limits_dmx[b2 + 1]; i++ ) { @@ -2665,6 +2710,21 @@ static void stereo_dft_enc_compute_prm( dot_prod_real2 += pDFT_L[2 * i] * pDFT_R[2 * i] + pDFT_L[2 * i + 1] * pDFT_R[2 * i + 1]; dot_prod_img2 += pDFT_L[2 * i + 1] * pDFT_R[2 * i] - pDFT_L[2 * i] * pDFT_R[2 * i + 1]; } +#ifdef STABILIZE_GIPD + abs_L_R2 = sqrtf( dot_prod_real2 * dot_prod_real2 + dot_prod_img2 * dot_prod_img2 ); + sub_nrg_DMX2 = sum_nrg_L2 + sum_nrg_R2 + 2 * abs_L_R2; + + if ( b2 < hStereoDft->gipd_band_max ) + { + hStereoDft->dot_prod_real_smooth[b2] = 0.5f * hStereoDft->dot_prod_real_smooth[b2] + 0.5f * dot_prod_real2; + hStereoDft->dot_prod_img_smooth[b2] = 0.5f * hStereoDft->dot_prod_img_smooth[b2] + 0.5f * dot_prod_img2; + pIpd[b2] = (float) atan2( hStereoDft->dot_prod_img_smooth[b2], hStereoDft->dot_prod_real_smooth[b2] ); + + ipd_smooth[b2] = stereo_dft_calc_mean_bipd( &pIpd[b2], hStereoDft->ipd_buf[b2] ); + + gain_IPD += ( sum_nrg_L2 + sum_nrg_R2 + 2 * dot_prod_real2 ) / sub_nrg_DMX2 / hStereoDft->gipd_band_max; + } +#endif sum_past_nrgL2 = EPSILON; sum_past_nrgR2 = EPSILON; @@ -2818,18 +2878,31 @@ static void stereo_dft_enc_compute_prm( pPredGain[b] = 0.f; } - +#ifndef STABILIZE_GIPD if ( b < hStereoDft->gipd_band_max ) { gain_IPD += ( sum_nrg_L + sum_nrg_R + 2 * dot_prod_real ) / sub_nrg_DMX[b] / hStereoDft->gipd_band_max; } +#endif +#ifdef STABILIZE_GIPD + if ( b2 == hStereoDft->gipd_band_max ) +#else if ( b == hStereoDft->gipd_band_max - 1 ) +#endif { +#ifdef STABILIZE_GIPD + ipd_mean_change = stereo_dft_calc_mean_ipd_change( pIpd, ipd_smooth, hStereoDft->gipd_band_max ); +#endif hStereoDft->sum_dot_prod_real = ( 1.f - hStereoDft->sfm ) * hStereoDft->sum_dot_prod_real + hStereoDft->sfm * sum_dot_prod_real; hStereoDft->sum_dot_prod_img = ( 1.f - hStereoDft->sfm ) * hStereoDft->sum_dot_prod_img + hStereoDft->sfm * sum_dot_prod_img; pgIpd[0] = (float) atan2( hStereoDft->sum_dot_prod_img, hStereoDft->sum_dot_prod_real ); + +#ifdef STABILIZE_GIPD + stereo_dft_gipd_stabilization( &pgIpd[0], hStereoDft->prev_gipd, ipd_mean_change ); + hStereoDft->prev_gipd = pgIpd[0]; +#endif } } @@ -3168,6 +3241,173 @@ static void res_pred_gain_mode_decision( } +#ifdef STABILIZE_GIPD +/*------------------------------------------------------------------------- + * stereo_dft_calc_mean_bipd() + * + * Calculate mean of previous bandwise IPD values + *------------------------------------------------------------------------*/ + +static float stereo_dft_calc_mean_bipd( + float *pIpd, /* i: current bandwise IPD */ + float ipd_buf[STEREO_DFT_IPD_BUF_LEN] /* i/o: previous bandwise IPDs */ +) +{ + int16_t i; + float ipd_smooth; + float diff_to_last; + + assert( *pIpd <= EVS_PI && *pIpd >= -EVS_PI ); + + ipd_smooth = 0.f; + for ( i = 0; i < STEREO_DFT_IPD_BUF_LEN; i++ ) + { + if ( i == 0 ) + { + diff_to_last = ipd_buf[0]; + } + else + { + diff_to_last = fabsf( ipd_buf[i] - ipd_smooth ); + } + if ( diff_to_last > EVS_PI ) + { + if ( ipd_buf[i] > 0 ) + { + ipd_buf[i] -= 2 * EVS_PI; + } + else + { + ipd_buf[i] += 2 * EVS_PI; + } + } + ipd_smooth = ( i / (float) ( i + 1 ) ) * ipd_smooth + ( 1 / (float) ( i + 1 ) ) * ipd_buf[i]; + if ( ipd_smooth < -EVS_PI ) + { + ipd_smooth += 2 * EVS_PI; + } + else if ( ipd_smooth > EVS_PI ) + { + ipd_smooth -= 2 * EVS_PI; + } + } + + for ( i = 0; i < STEREO_DFT_IPD_BUF_LEN - 1; i++ ) + { + ipd_buf[i] = ipd_buf[i + 1]; + } + ipd_buf[STEREO_DFT_IPD_BUF_LEN - 1] = *pIpd; + +#ifdef DEBUG_MODE_DFT + dbgwrite( pIpd, sizeof( float ), 1, 1, "res/stereo_dft_bipd.pcm" ); + dbgwrite( &ipd_smooth, sizeof( float ), 1, 1, "res/stereo_dft_bipd_smooth.pcm" ); +#endif + + return ipd_smooth; +} + + +/*------------------------------------------------------------------------- + * stereo_dft_calc_mean_ipd_change() + * + * Calculate mean IPD change over all bands + *------------------------------------------------------------------------*/ + +static float stereo_dft_calc_mean_ipd_change( + float *pIpd, /* i: bandwise IPDs */ + float *ipd_smooth, /* i: mean of previous bandwise IPDs */ + int16_t gipd_band_max /* i: number of IPD bands */ +) +{ + int16_t b; + float ipd_mean_change; + float ipd_change[STEREO_DFT_BAND_MAX]; + + ipd_mean_change = 0.f; + for ( b = 0; b < gipd_band_max; b++ ) + { + ipd_change[b] = fabsf( pIpd[b] - ipd_smooth[b] ); + if ( ipd_change[b] > EVS_PI ) + { + ipd_change[b] = 2 * EVS_PI - ipd_change[b]; + } + ipd_mean_change += ipd_change[b]; + } + ipd_mean_change /= gipd_band_max; + +#ifdef DEBUG_MODE_DFT + dbgwrite( ipd_change, sizeof( float ), hStereoDft->gipd_band_max, 1, "res/stereo_dft_ipd_change.pcm" ); + dbgwrite( &ipd_mean_change, sizeof( float ), 1, 1, "res/stereo_dft_ipd_mean_change.pcm" ); +#endif + + return ipd_mean_change; +} + + +/*------------------------------------------------------------------------- + * stereo_dft_gipd_stabilization() + * + * stabilize global IPD based on stability of bandwise IPDs + *------------------------------------------------------------------------*/ + +static void stereo_dft_gipd_stabilization( + float *pgIpd, /* i/o: global IPD to be stabilized */ + float prev_gipd, /* i: previous global IPD */ + float ipd_mean_change /* i: mean of previous bandwise IPDs */ +) +{ + float diff_gipd; + + if ( ipd_mean_change < 0.3f ) + { + *pgIpd = prev_gipd; + } + else + { + diff_gipd = fabsf( *pgIpd - prev_gipd ); + if ( diff_gipd > EVS_PI ) + { + diff_gipd = 2 * EVS_PI - diff_gipd; + } + if ( diff_gipd > ipd_mean_change ) + { + if ( *pgIpd > prev_gipd ) + { + if ( *pgIpd - prev_gipd < EVS_PI ) + { + *pgIpd = prev_gipd + ipd_mean_change; + } + else + { + *pgIpd = prev_gipd - ipd_mean_change; + if ( *pgIpd < -EVS_PI ) + { + *pgIpd += 2 * EVS_PI; + } + } + } + else + { + if ( prev_gipd - *pgIpd < EVS_PI ) + { + *pgIpd = prev_gipd - ipd_mean_change; + } + else + { + *pgIpd = prev_gipd + ipd_mean_change; + if ( *pgIpd > EVS_PI ) + { + *pgIpd -= 2 * EVS_PI; + } + } + } + } + } + return; +} +#endif + + /*------------------------------------------------------------------------- * stereo_dft_enc_get_nipd_flag() *