[cleanup] remove code within ITD_WINNER_GAIN_MODIFY (4c573b12) · Commits · IVAS Codec Public Collaboration / IVAS Codec

lib_com/options.h

+0 −1

Original line number	Diff line number	Diff line
		@@ -139,7 +139,6 @@
		/#define LSF_RE_USE_SECONDARY_CHANNEL_REUSEMODE / /* switch to isolate the reuse mode case */
		#endif
		#define DISABLE_ADAP_RES_COD_TMP /* temporary fix for IVAS-403, disables adaptive residual coding */
		/#define ITD_WINNER_GAIN_MODIFY / /* ITD optimization - WORK IN PROGRESS */
		/#define FIX_I4_OL_PITCH/ /* fix open-loop pitch used for EVS core switching */
		#define FIX_103_RA_PARAMS_PARAM_BIN_REND /* Issue 103: Digest room acoustics parameters for Parametric Binaural Renderer*/
		/#define SBA_HPF_TUNING_DEC/

lib_enc/ivas_core_pre_proc_front.c

+0 −34

Original line number	Diff line number	Diff line
		@@ -443,13 +443,6 @@ ivas_error pre_proc_front_ivas(

		st->vad_flag = wb_vad( st, fr_bands, &i, &i, &i, &snr_sum_he, &localVAD_HE_SAD, &( st->flag_noisy_speech_snr ), NULL, NULL, -1000.0f, -1000.0f );

		#ifdef ITD_WINNER_GAIN_MODIFY
		/Save the local_vad flag for the noise coherence calculation/
		if ( element_mode == IVAS_CPE_DFT )
		{
		hCPE->hStereoDft->local_vad = (short) ( st->vad_flag );
		}
		#endif

		if ( force_front_vad == 1 \|\| front_vad_flag == 1 )
		{
		@@ -835,33 +828,6 @@ ivas_error pre_proc_front_ivas(
		/* 2nd stage speech/music classification (ACELP/GSC/TCX core selection) */
		ivas_smc_mode_selection( st, element_brate, smc_dec, *relE, Etot, attack_flag, inp_12k8, S_map, flag_spitch );

		#ifdef ITD_WINNER_GAIN_MODIFY
		if ( element_mode == IVAS_CPE_DFT )
		{
		if ( hCPE->hStereoDft->mus_flag != smc_dec \|\| hCPE->element_mode != hCPE->last_element_mode )
		{
		hCPE->hStereoDft->noise_coherence = 0.0f;
		set_zero( hCPE->hStereoDft->spd_L_noise, STEREO_DFT_N_32k_ENC / 2 );
		set_zero( hCPE->hStereoDft->spd_R_noise, STEREO_DFT_N_32k_ENC / 2 );
		set_zero( hCPE->hStereoDft->spd_L_noise_min, STEREO_DFT_N_32k_ENC / 2 );
		set_zero( hCPE->hStereoDft->spd_R_noise_min, STEREO_DFT_N_32k_ENC / 2 );
		set_zero( hCPE->hStereoDft->spd_L_noise_max, STEREO_DFT_N_32k_ENC / 2 );
		set_zero( hCPE->hStereoDft->spd_R_noise_max, STEREO_DFT_N_32k_ENC / 2 );
		set_zero( hCPE->hStereoDft->winner_gain_L, STEREO_DFT_N_32k_ENC / 2 );
		set_zero( hCPE->hStereoDft->winner_gain_R, STEREO_DFT_N_32k_ENC / 2 );
		set_f( hCPE->hStereoDft->spd_L_smooth_new, 1.0f, STEREO_DFT_N_32k_ENC / 2 );
		set_f( hCPE->hStereoDft->spd_R_smooth_new, 1.0f, STEREO_DFT_N_32k_ENC / 2 );
		}
		if ( smc_dec == MUSIC && st->vad_flag == 1 )
		{
		hCPE->hStereoDft->mus_flag = 2;
		}
		else
		{
		hCPE->hStereoDft->mus_flag = 0;
		}
		}
		#endif
		}

		/----------------------------------------------------------------

lib_enc/ivas_stat_enc.h

+0 −16

Original line number	Diff line number	Diff line
		@@ -219,22 +219,6 @@ typedef struct stereo_dft_enc_data_struct

		float voicing_lt;

		#ifdef ITD_WINNER_GAIN_MODIFY
		float noise_coherence;
		int16_t local_vad;
		int16_t mus_flag;
		float spd_L_noise[STEREO_DFT_N_32k_ENC / 2]; /The estimation of spectral power density of noise in the left channel/
		float spd_R_noise[STEREO_DFT_N_32k_ENC / 2]; /The estimation of spectral power density of noise in the right channel/
		float spd_L_noise_min[STEREO_DFT_N_32k_ENC / 2];
		float spd_R_noise_min[STEREO_DFT_N_32k_ENC / 2];
		float spd_L_noise_max[STEREO_DFT_N_32k_ENC / 2];
		float spd_R_noise_max[STEREO_DFT_N_32k_ENC / 2];
		float winner_gain_L[STEREO_DFT_N_32k_ENC / 2]; /The estimation of the Winner gain of the left channel/
		float winner_gain_R[STEREO_DFT_N_32k_ENC / 2]; /The estimation of the Winner gain of the right channel/
		float spd_L_smooth_new[STEREO_DFT_N_32k_ENC / 2];
		float spd_R_smooth_new[STEREO_DFT_N_32k_ENC / 2];

		#endif

		int16_t currentNumUpdates;
		int16_t expectedNumUpdates; /* Expected number of frames before use of ITD estimate */

lib_enc/ivas_stereo_dft_enc.c

+0 −15

Original line number	Diff line number	Diff line
		@@ -416,21 +416,6 @@ void stereo_dft_enc_reset(
		)
		{
		int16_t i;
		#ifdef ITD_WINNER_GAIN_MODIFY
		hStereoDft->noise_coherence = 0.0f;
		hStereoDft->local_vad = 0;
		hStereoDft->mus_flag = 2;
		set_zero( hStereoDft->spd_L_noise, STEREO_DFT_N_32k_ENC / 2 );
		set_zero( hStereoDft->spd_R_noise, STEREO_DFT_N_32k_ENC / 2 );
		set_zero( hStereoDft->spd_L_noise_min, STEREO_DFT_N_32k_ENC / 2 );
		set_zero( hStereoDft->spd_R_noise_min, STEREO_DFT_N_32k_ENC / 2 );
		set_zero( hStereoDft->spd_L_noise_max, STEREO_DFT_N_32k_ENC / 2 );
		set_zero( hStereoDft->spd_R_noise_max, STEREO_DFT_N_32k_ENC / 2 );
		set_zero( hStereoDft->winner_gain_L, STEREO_DFT_N_32k_ENC / 2 );
		set_zero( hStereoDft->winner_gain_R, STEREO_DFT_N_32k_ENC / 2 );
		set_f( hStereoDft->spd_L_smooth_new, 1.0f, STEREO_DFT_N_32k_ENC / 2 );
		set_f( hStereoDft->spd_R_smooth_new, 1.0f, STEREO_DFT_N_32k_ENC / 2 );
		#endif
		/reset parameters/
		set_zero( hStereoDft->side_gain, STEREO_DFT_ENC_DFT_NB * STEREO_DFT_BAND_MAX );
		set_s( hStereoDft->side_gain_index_EC, 15, STEREO_DFT_BAND_MAX );

lib_enc/ivas_stereo_dft_enc_itd.c

+0 −295

Original line number	Diff line number	Diff line
		@@ -82,134 +82,6 @@
		#define ITD_MAX_MDCT 80


		#ifdef ITD_WINNER_GAIN_MODIFY
		/*-------------------------------------------------------------------------
		* stereo_dft_frame_coherence()
		* Calculate the frame coherence of the stereo signal
		*
		-------------------------------------------------------------------------/

		static void stereo_dft_frame_coherence(
		STEREO_DFT_ENC_DATA_HANDLE hStereoDft,
		float *xcorr,
		float *Spd_L,
		float *Spd_R,
		float *frame_coherence )
		{
		float spd_xcorr_real, spd_xcorr_imag, spd_LR;
		int16_t i, NFFT;

		NFFT = min( STEREO_DFT_N_32k_ENC, hStereoDft->NFFT );
		spd_xcorr_real = 0.0f;
		spd_xcorr_imag = 0.0f;
		spd_LR = 0.0f;
		*frame_coherence = 0.0f;

		for ( i = 1; i < NFFT / 2; i++ )
		{
		spd_xcorr_real += (float) xcorr[i * 2];
		spd_xcorr_imag += (float) xcorr[i * 2 + 1];
		spd_LR += (float) sqrt( Spd_L[i] ) * sqrt( Spd_R[i] );
		}

		frame_coherence = (float) ( sqrt( spd_xcorr_real spd_xcorr_real + spd_xcorr_imag * spd_xcorr_imag ) / spd_LR );

		return;
		}


		/*-------------------------------------------------------------------------
		* stereo_dft_itd_winner_gain()
		* Estimate the spectral power density of noise in the stereo signal and
		* calculate the winner gain for the ITD estimation
		-------------------------------------------------------------------------/

		static void stereo_dft_itd_winner_gain(
		STEREO_DFT_ENC_DATA_HANDLE hStereoDft,
		float *Spd_L,
		float *Spd_R )
		{
		int16_t i;
		float alpha1 = 0.05f;
		float alpha2 = 0.95f;
		float trackingfactor = 0.5f;
		float ratio = 2.75f;

		for ( i = 0; i < STEREO_DFT_N_16k_ENC / 2; i++ )
		{
		if ( hStereoDft->spd_L_noise_min[i] > Spd_L[i] )
		{
		hStereoDft->spd_L_noise_min[i] = ( 1 - 0.15 ) * hStereoDft->spd_L_noise_min[i] + 0.15 * Spd_L[i];
		}
		else
		{
		hStereoDft->spd_L_noise_min[i] = hStereoDft->spd_L_noise_min[i] + alpha1 * ( Spd_L[i] - hStereoDft->spd_L_noise_min[i] );
		}
		if ( hStereoDft->spd_R_noise[i] > Spd_R[i] )
		{
		hStereoDft->spd_R_noise_min[i] = ( 1 - 0.15 ) * hStereoDft->spd_R_noise_min[i] + 0.15 * Spd_R[i];
		}
		else
		{
		hStereoDft->spd_R_noise_min[i] = hStereoDft->spd_R_noise_min[i] + alpha1 * ( Spd_R[i] - hStereoDft->spd_R_noise_min[i] );
		}
		if ( hStereoDft->spd_L_noise_max[i] < Spd_L[i] )
		{
		hStereoDft->spd_L_noise_max[i] = Spd_L[i];
		}
		else
		{
		hStereoDft->spd_L_noise_max[i] = hStereoDft->spd_L_noise_max[i] + alpha2 * ( Spd_L[i] - hStereoDft->spd_L_noise_max[i] );
		}
		if ( hStereoDft->spd_R_noise_max[i] < Spd_R[i] )
		{
		hStereoDft->spd_R_noise_max[i] = Spd_R[i];
		}
		else
		{
		hStereoDft->spd_R_noise_max[i] = hStereoDft->spd_R_noise_max[i] + alpha2 * ( Spd_R[i] - hStereoDft->spd_R_noise_max[i] );
		}
		if ( hStereoDft->spd_L_noise_max[i] > ratio * hStereoDft->spd_L_noise_min[i] )
		{
		hStereoDft->spd_L_noise[i] = hStereoDft->spd_L_noise[i];
		hStereoDft->winner_gain_L[i] = ( Spd_L[i] - hStereoDft->spd_L_noise[i] ) / Spd_L[i];
		}
		else
		{
		hStereoDft->spd_L_noise[i] = hStereoDft->spd_L_noise[i] + trackingfactor * ( Spd_L[i] - hStereoDft->spd_L_noise[i] );
		hStereoDft->winner_gain_L[i] = ( Spd_L[i] - hStereoDft->spd_L_noise[i] ) / Spd_L[i];
		}
		if ( hStereoDft->spd_R_noise_max[i] > ratio * hStereoDft->spd_R_noise_min[i] )
		{
		hStereoDft->spd_R_noise[i] = hStereoDft->spd_R_noise[i];
		hStereoDft->winner_gain_R[i] = ( Spd_R[i] - hStereoDft->spd_R_noise[i] ) / Spd_R[i];
		}
		else
		{
		hStereoDft->spd_R_noise[i] = hStereoDft->spd_R_noise[i] + trackingfactor * ( Spd_R[i] - hStereoDft->spd_R_noise[i] );
		hStereoDft->winner_gain_R[i] = ( Spd_R[i] - hStereoDft->spd_R_noise[i] ) / Spd_R[i];
		}
		}

		for ( i = 0; i < STEREO_DFT_N_16k_ENC / 2; i++ )
		{
		hStereoDft->winner_gain_L[i] = max( hStereoDft->winner_gain_L[i], 0.01f );
		hStereoDft->winner_gain_R[i] = max( hStereoDft->winner_gain_R[i], 0.01f );
		hStereoDft->winner_gain_L[i] = min( hStereoDft->winner_gain_L[i], 1.0f );
		hStereoDft->winner_gain_R[i] = min( hStereoDft->winner_gain_R[i], 1.0f );
		if ( hStereoDft->winner_gain_L[i] >= 0.80f )
		hStereoDft->winner_gain_L[i] = 1.0f;
		else
		hStereoDft->winner_gain_L[i] = 0.01f;
		if ( hStereoDft->winner_gain_R[i] >= 0.80f )
		hStereoDft->winner_gain_R[i] = 1.0f;
		else
		hStereoDft->winner_gain_R[i] = 0.01f;
		}

		return;
		}
		#endif

		/*-------------------------------------------------------------------------
		* set_band_limits()
		@@ -741,9 +613,6 @@ void stereo_dft_enc_compute_itd(
		int16_t itd, itd_td;
		float xcorr_itd[STEREO_DFT_N_32k_ENC];
		float tmpf1, tmpf2, tmpf3;
		#ifdef ITD_WINNER_GAIN_MODIFY
		float frame_coherence, tmpf4;
		#endif
		float thres, alpha;
		int16_t index;
		float xcorr_max, sum_nrg_L_lb, par_L[XCORR_LB_NUM_BANDS], par_L_avrg, sum_nrg_L_tmp;
		@@ -984,32 +853,6 @@ void stereo_dft_enc_compute_itd(
		xcorr[0] = sign( hItd->xcorr_smooth[0] );
		xcorr[1] = sign( hItd->xcorr_smooth[1] );

		#ifdef ITD_WINNER_GAIN_MODIFY
		if ( hStereoDft->mus_flag == 0 )
		{
		stereo_dft_frame_coherence( hStereoDft, xcorr, Spd_L, Spd_R, &frame_coherence );
		if ( hStereoDft->local_vad == 0 )
		{
		hStereoDft->noise_coherence = 0.9f * hStereoDft->noise_coherence + ( 1.0 - 0.9f ) * frame_coherence;
		}
		else
		{
		hStereoDft->noise_coherence = hStereoDft->noise_coherence;
		}
		stereo_dft_itd_winner_gain( hStereoDft, Spd_L, Spd_R );

		if ( hStereoDft->noise_coherence > 0.25f )
		{
		for ( i = 1; i < NFFT / 2; i++ )
		{
		xcorr[2 * i] = ( hStereoDft->winner_gain_L[i] * hStereoDft->winner_gain_R[i] ) * xcorr[2 * i];
		xcorr[2 * i + 1] = ( hStereoDft->winner_gain_L[i] * hStereoDft->winner_gain_R[i] ) * xcorr[2 * i + 1];
		Spd_L[i] = hStereoDft->winner_gain_L[i] * Spd_L[i];
		Spd_R[i] = hStereoDft->winner_gain_R[i] * Spd_R[i];
		}
		}
		}
		#endif

		if ( hCPE->element_mode == IVAS_CPE_DFT && ( hItd->td_itd[k_offset] - hItd->td_itd[k_offset - 1] ) )
		{
		@@ -1032,23 +875,6 @@ void stereo_dft_enc_compute_itd(
		}

		tmpf3 = 2.f;
		#ifdef ITD_WINNER_GAIN_MODIFY
		if ( hStereoDft->mus_flag == 0 )
		{
		alpha = -0.8f;
		}
		else
		{
		if ( flag_noisy_speech_snr )
		{
		alpha = -0.8f;
		}
		else
		{
		alpha = -1.0f;
		}
		}
		#else
		if ( flag_noisy_speech_snr )
		{
		alpha = -0.8f;
		@@ -1057,7 +883,6 @@ void stereo_dft_enc_compute_itd(
		{
		alpha = -1.0f;
		}
		#endif

		if ( hCPE->hCoreCoder[0]->Opt_DTX_ON && hCPE->element_mode == IVAS_CPE_DFT )
		{
		@@ -1140,42 +965,9 @@ void stereo_dft_enc_compute_itd(
		tmpf1 += EPSILON;
		tmpf2 = tmpf1;
		tmpf1 = powf( tmpf1, alpha );
		#ifdef ITD_WINNER_GAIN_MODIFY
		if ( hStereoDft->mus_flag == 0 )
		{
		tmpf4 = 1.0f;
		/* Calculate smoothed spectral power density for the L/R channel */
		hStereoDft->spd_L_smooth_new[i] = ( 1.f - sfm_L ) * hStereoDft->spd_L_smooth_new[i] + sfm_L * Spd_L[i];
		hStereoDft->spd_R_smooth_new[i] = ( 1.f - sfm_L ) * hStereoDft->spd_R_smooth_new[i] + sfm_L * Spd_R[i];
		/* Calculate cross spectral power density using the smoothed spectral power density*/
		tmpf4 = (float) sqrt( hStereoDft->spd_L_smooth_new[i] ) * sqrt( hStereoDft->spd_R_smooth_new[i] );
		/* Calculate the value of weighted function for each frequency bin */
		tmpf4 += EPSILON;
		if ( hStereoDft->noise_coherence > 0.25f )
		{
		tmpf4 = tmpf1 * pow( tmpf2, 2.0f ) * pow( tmpf4, -2.0f );
		tmpf3 += tmpf2 * tmpf1;
		tmpf3 -= tmpf2 * tmpf4;
		}
		else
		{
		tmpf4 = tmpf2 * pow( tmpf4, -2.0f );
		}
		/* Calculate the value of weighted generlized cross-correlation function for each frequency bin */
		xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf4;
		xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf4;
		}
		else
		{
		tmpf3 += tmpf2 * tmpf1;
		xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1;
		xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1;
		}
		#else
		tmpf3 += tmpf2 * tmpf1;
		xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1;
		xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1;
		#endif

		/* Low pass filter L/R power spectrum */
		/* Calculate coherence as cross spectral density divided by LR power spectrum /
		@@ -1193,42 +985,9 @@ void stereo_dft_enc_compute_itd(
		tmpf1 += EPSILON;
		tmpf2 = tmpf1;
		tmpf1 = powf( tmpf1, alpha );
		#ifdef ITD_WINNER_GAIN_MODIFY
		if ( hStereoDft->mus_flag == 0 )
		{
		tmpf4 = 1.0f;
		/* Calculate smoothed spectral power density for the L/R channel */
		hStereoDft->spd_L_smooth_new[i] = ( 1.f - sfm_L ) * hStereoDft->spd_L_smooth_new[i] + sfm_L * Spd_L[i];
		hStereoDft->spd_R_smooth_new[i] = ( 1.f - sfm_L ) * hStereoDft->spd_R_smooth_new[i] + sfm_L * Spd_R[i];
		/* Calculate cross spectral power density using the smoothed spectral power density*/
		tmpf4 = (float) sqrt( hStereoDft->spd_L_smooth_new[i] ) * sqrt( hStereoDft->spd_R_smooth_new[i] );
		/* Calculate the value of weighted function for each frequency bin */
		tmpf4 += EPSILON;
		if ( hStereoDft->noise_coherence > 0.25f )
		{
		tmpf4 = tmpf1 * pow( tmpf2, 2.0f ) * pow( tmpf4, -2.0f );
		tmpf3 += tmpf2 * tmpf1;
		tmpf3 -= tmpf2 * tmpf4;
		}
		else
		{
		tmpf4 = tmpf2 * pow( tmpf4, -2.0f );
		}
		/* Calculate the value of weighted generlized cross-correlation function for each frequency bin */
		xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf4;
		xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf4;
		}
		else
		{
		tmpf3 += tmpf2 * tmpf1;
		xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1;
		xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1;
		}
		#else
		tmpf3 += tmpf2 * tmpf1;
		xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1;
		xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1;
		#endif
		}
		}
		}
		@@ -1242,70 +1001,16 @@ void stereo_dft_enc_compute_itd(
		tmpf1 += EPSILON;
		tmpf2 = tmpf1;
		tmpf1 = powf( tmpf1, alpha );
		#ifdef ITD_WINNER_GAIN_MODIFY
		if ( hStereoDft->mus_flag == 0 )
		{
		tmpf4 = 1.0f;
		/* Calculate smoothed spectral power density for the L/R channel */
		hStereoDft->spd_L_smooth_new[i] = ( 1.f - sfm_L ) * hStereoDft->spd_L_smooth_new[i] + sfm_L * Spd_L[i];
		hStereoDft->spd_R_smooth_new[i] = ( 1.f - sfm_L ) * hStereoDft->spd_R_smooth_new[i] + sfm_L * Spd_R[i];
		/* Calculate cross spectral power density using the smoothed spectral power density*/
		tmpf4 = (float) sqrt( hStereoDft->spd_L_smooth_new[i] ) * sqrt( hStereoDft->spd_R_smooth_new[i] );
		/* Calculate the value of weighted function for each frequency bin */
		tmpf4 += EPSILON;
		if ( hStereoDft->noise_coherence > 0.25f )
		{
		tmpf4 = tmpf1 * pow( tmpf2, 2.0f ) * pow( tmpf4, -2.0f );
		tmpf3 += tmpf2 * tmpf1;
		tmpf3 -= tmpf2 * tmpf4;
		}
		else
		{
		tmpf4 = tmpf2 * pow( tmpf4, -2.0f );
		}
		/* Calculate the value of weighted generlized cross-correlation function for each frequency bin */
		xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf4;
		xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf4;
		}
		else
		{
		tmpf3 += tmpf2 * tmpf1;
		xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1;
		xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1;
		}
		#else
		tmpf3 += tmpf2 * tmpf1;
		xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1;
		xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1;
		#endif
		}
		}
		#ifdef ITD_WINNER_GAIN_MODIFY
		if ( hStereoDft->mus_flag == 0 )
		{
		if ( hStereoDft->noise_coherence > 0.25f )
		{
		tmpf1 = (float) ( NFFT / 2 + 1 ) / tmpf3;
		for ( i = 0; i < NFFT; i++ )
		{
		xcorr[i] *= tmpf1;
		}
		}
		else
		{
		for ( i = NFFT / 2; i < NFFT; i++ )
		{
		xcorr[i] = 0.0f;
		}
		}
		}
		#else
		tmpf1 = (float) ( NFFT / 2 + 1 ) / tmpf3;
		for ( i = 0; i < NFFT; i++ )
		{
		xcorr[i] *= tmpf1;
		}
		#endif
		/calculate mean E ratio of main to background signal for cohSNR/
		if ( hCPE->element_mode == IVAS_CPE_DFT )
		{