Loading lib_com/options.h +0 −1 Original line number Diff line number Diff line Loading @@ -139,7 +139,6 @@ /*#define LSF_RE_USE_SECONDARY_CHANNEL_REUSEMODE */ /* switch to isolate the reuse mode case */ #endif #define DISABLE_ADAP_RES_COD_TMP /* temporary fix for IVAS-403, disables adaptive residual coding */ /*#define ITD_WINNER_GAIN_MODIFY */ /* ITD optimization - WORK IN PROGRESS */ /*#define FIX_I4_OL_PITCH*/ /* fix open-loop pitch used for EVS core switching */ #define FIX_103_RA_PARAMS_PARAM_BIN_REND /* Issue 103: Digest room acoustics parameters for Parametric Binaural Renderer*/ /*#define SBA_HPF_TUNING_DEC*/ Loading lib_enc/ivas_core_pre_proc_front.c +0 −34 Original line number Diff line number Diff line Loading @@ -443,13 +443,6 @@ ivas_error pre_proc_front_ivas( st->vad_flag = wb_vad( st, fr_bands, &i, &i, &i, &snr_sum_he, &localVAD_HE_SAD, &( st->flag_noisy_speech_snr ), NULL, NULL, -1000.0f, -1000.0f ); #ifdef ITD_WINNER_GAIN_MODIFY /*Save the local_vad flag for the noise coherence calculation*/ if ( element_mode == IVAS_CPE_DFT ) { hCPE->hStereoDft->local_vad = (short) ( st->vad_flag ); } #endif if ( force_front_vad == 1 || front_vad_flag == 1 ) { Loading Loading @@ -835,33 +828,6 @@ ivas_error pre_proc_front_ivas( /* 2nd stage speech/music classification (ACELP/GSC/TCX core selection) */ ivas_smc_mode_selection( st, element_brate, smc_dec, *relE, Etot, attack_flag, inp_12k8, S_map, flag_spitch ); #ifdef ITD_WINNER_GAIN_MODIFY if ( element_mode == IVAS_CPE_DFT ) { if ( hCPE->hStereoDft->mus_flag != smc_dec || hCPE->element_mode != hCPE->last_element_mode ) { hCPE->hStereoDft->noise_coherence = 0.0f; set_zero( hCPE->hStereoDft->spd_L_noise, STEREO_DFT_N_32k_ENC / 2 ); set_zero( hCPE->hStereoDft->spd_R_noise, STEREO_DFT_N_32k_ENC / 2 ); set_zero( hCPE->hStereoDft->spd_L_noise_min, STEREO_DFT_N_32k_ENC / 2 ); set_zero( hCPE->hStereoDft->spd_R_noise_min, STEREO_DFT_N_32k_ENC / 2 ); set_zero( hCPE->hStereoDft->spd_L_noise_max, STEREO_DFT_N_32k_ENC / 2 ); set_zero( hCPE->hStereoDft->spd_R_noise_max, STEREO_DFT_N_32k_ENC / 2 ); set_zero( hCPE->hStereoDft->winner_gain_L, STEREO_DFT_N_32k_ENC / 2 ); set_zero( hCPE->hStereoDft->winner_gain_R, STEREO_DFT_N_32k_ENC / 2 ); set_f( hCPE->hStereoDft->spd_L_smooth_new, 1.0f, STEREO_DFT_N_32k_ENC / 2 ); set_f( hCPE->hStereoDft->spd_R_smooth_new, 1.0f, STEREO_DFT_N_32k_ENC / 2 ); } if ( smc_dec == MUSIC && st->vad_flag == 1 ) { hCPE->hStereoDft->mus_flag = 2; } else { hCPE->hStereoDft->mus_flag = 0; } } #endif } /*----------------------------------------------------------------* Loading lib_enc/ivas_stat_enc.h +0 −16 Original line number Diff line number Diff line Loading @@ -219,22 +219,6 @@ typedef struct stereo_dft_enc_data_struct float voicing_lt; #ifdef ITD_WINNER_GAIN_MODIFY float noise_coherence; int16_t local_vad; int16_t mus_flag; float spd_L_noise[STEREO_DFT_N_32k_ENC / 2]; /*The estimation of spectral power density of noise in the left channel*/ float spd_R_noise[STEREO_DFT_N_32k_ENC / 2]; /*The estimation of spectral power density of noise in the right channel*/ float spd_L_noise_min[STEREO_DFT_N_32k_ENC / 2]; float spd_R_noise_min[STEREO_DFT_N_32k_ENC / 2]; float spd_L_noise_max[STEREO_DFT_N_32k_ENC / 2]; float spd_R_noise_max[STEREO_DFT_N_32k_ENC / 2]; float winner_gain_L[STEREO_DFT_N_32k_ENC / 2]; /*The estimation of the Winner gain of the left channel*/ float winner_gain_R[STEREO_DFT_N_32k_ENC / 2]; /*The estimation of the Winner gain of the right channel*/ float spd_L_smooth_new[STEREO_DFT_N_32k_ENC / 2]; float spd_R_smooth_new[STEREO_DFT_N_32k_ENC / 2]; #endif int16_t currentNumUpdates; int16_t expectedNumUpdates; /* Expected number of frames before use of ITD estimate */ Loading lib_enc/ivas_stereo_dft_enc.c +0 −15 Original line number Diff line number Diff line Loading @@ -416,21 +416,6 @@ void stereo_dft_enc_reset( ) { int16_t i; #ifdef ITD_WINNER_GAIN_MODIFY hStereoDft->noise_coherence = 0.0f; hStereoDft->local_vad = 0; hStereoDft->mus_flag = 2; set_zero( hStereoDft->spd_L_noise, STEREO_DFT_N_32k_ENC / 2 ); set_zero( hStereoDft->spd_R_noise, STEREO_DFT_N_32k_ENC / 2 ); set_zero( hStereoDft->spd_L_noise_min, STEREO_DFT_N_32k_ENC / 2 ); set_zero( hStereoDft->spd_R_noise_min, STEREO_DFT_N_32k_ENC / 2 ); set_zero( hStereoDft->spd_L_noise_max, STEREO_DFT_N_32k_ENC / 2 ); set_zero( hStereoDft->spd_R_noise_max, STEREO_DFT_N_32k_ENC / 2 ); set_zero( hStereoDft->winner_gain_L, STEREO_DFT_N_32k_ENC / 2 ); set_zero( hStereoDft->winner_gain_R, STEREO_DFT_N_32k_ENC / 2 ); set_f( hStereoDft->spd_L_smooth_new, 1.0f, STEREO_DFT_N_32k_ENC / 2 ); set_f( hStereoDft->spd_R_smooth_new, 1.0f, STEREO_DFT_N_32k_ENC / 2 ); #endif /*reset parameters*/ set_zero( hStereoDft->side_gain, STEREO_DFT_ENC_DFT_NB * STEREO_DFT_BAND_MAX ); set_s( hStereoDft->side_gain_index_EC, 15, STEREO_DFT_BAND_MAX ); Loading lib_enc/ivas_stereo_dft_enc_itd.c +0 −295 Original line number Diff line number Diff line Loading @@ -82,134 +82,6 @@ #define ITD_MAX_MDCT 80 #ifdef ITD_WINNER_GAIN_MODIFY /*------------------------------------------------------------------------- * stereo_dft_frame_coherence() * Calculate the frame coherence of the stereo signal * *-------------------------------------------------------------------------*/ static void stereo_dft_frame_coherence( STEREO_DFT_ENC_DATA_HANDLE hStereoDft, float *xcorr, float *Spd_L, float *Spd_R, float *frame_coherence ) { float spd_xcorr_real, spd_xcorr_imag, spd_LR; int16_t i, NFFT; NFFT = min( STEREO_DFT_N_32k_ENC, hStereoDft->NFFT ); spd_xcorr_real = 0.0f; spd_xcorr_imag = 0.0f; spd_LR = 0.0f; *frame_coherence = 0.0f; for ( i = 1; i < NFFT / 2; i++ ) { spd_xcorr_real += (float) xcorr[i * 2]; spd_xcorr_imag += (float) xcorr[i * 2 + 1]; spd_LR += (float) sqrt( Spd_L[i] ) * sqrt( Spd_R[i] ); } *frame_coherence = (float) ( sqrt( spd_xcorr_real * spd_xcorr_real + spd_xcorr_imag * spd_xcorr_imag ) / spd_LR ); return; } /*------------------------------------------------------------------------- * stereo_dft_itd_winner_gain() * Estimate the spectral power density of noise in the stereo signal and * calculate the winner gain for the ITD estimation *-------------------------------------------------------------------------*/ static void stereo_dft_itd_winner_gain( STEREO_DFT_ENC_DATA_HANDLE hStereoDft, float *Spd_L, float *Spd_R ) { int16_t i; float alpha1 = 0.05f; float alpha2 = 0.95f; float trackingfactor = 0.5f; float ratio = 2.75f; for ( i = 0; i < STEREO_DFT_N_16k_ENC / 2; i++ ) { if ( hStereoDft->spd_L_noise_min[i] > Spd_L[i] ) { hStereoDft->spd_L_noise_min[i] = ( 1 - 0.15 ) * hStereoDft->spd_L_noise_min[i] + 0.15 * Spd_L[i]; } else { hStereoDft->spd_L_noise_min[i] = hStereoDft->spd_L_noise_min[i] + alpha1 * ( Spd_L[i] - hStereoDft->spd_L_noise_min[i] ); } if ( hStereoDft->spd_R_noise[i] > Spd_R[i] ) { hStereoDft->spd_R_noise_min[i] = ( 1 - 0.15 ) * hStereoDft->spd_R_noise_min[i] + 0.15 * Spd_R[i]; } else { hStereoDft->spd_R_noise_min[i] = hStereoDft->spd_R_noise_min[i] + alpha1 * ( Spd_R[i] - hStereoDft->spd_R_noise_min[i] ); } if ( hStereoDft->spd_L_noise_max[i] < Spd_L[i] ) { hStereoDft->spd_L_noise_max[i] = Spd_L[i]; } else { hStereoDft->spd_L_noise_max[i] = hStereoDft->spd_L_noise_max[i] + alpha2 * ( Spd_L[i] - hStereoDft->spd_L_noise_max[i] ); } if ( hStereoDft->spd_R_noise_max[i] < Spd_R[i] ) { hStereoDft->spd_R_noise_max[i] = Spd_R[i]; } else { hStereoDft->spd_R_noise_max[i] = hStereoDft->spd_R_noise_max[i] + alpha2 * ( Spd_R[i] - hStereoDft->spd_R_noise_max[i] ); } if ( hStereoDft->spd_L_noise_max[i] > ratio * hStereoDft->spd_L_noise_min[i] ) { hStereoDft->spd_L_noise[i] = hStereoDft->spd_L_noise[i]; hStereoDft->winner_gain_L[i] = ( Spd_L[i] - hStereoDft->spd_L_noise[i] ) / Spd_L[i]; } else { hStereoDft->spd_L_noise[i] = hStereoDft->spd_L_noise[i] + trackingfactor * ( Spd_L[i] - hStereoDft->spd_L_noise[i] ); hStereoDft->winner_gain_L[i] = ( Spd_L[i] - hStereoDft->spd_L_noise[i] ) / Spd_L[i]; } if ( hStereoDft->spd_R_noise_max[i] > ratio * hStereoDft->spd_R_noise_min[i] ) { hStereoDft->spd_R_noise[i] = hStereoDft->spd_R_noise[i]; hStereoDft->winner_gain_R[i] = ( Spd_R[i] - hStereoDft->spd_R_noise[i] ) / Spd_R[i]; } else { hStereoDft->spd_R_noise[i] = hStereoDft->spd_R_noise[i] + trackingfactor * ( Spd_R[i] - hStereoDft->spd_R_noise[i] ); hStereoDft->winner_gain_R[i] = ( Spd_R[i] - hStereoDft->spd_R_noise[i] ) / Spd_R[i]; } } for ( i = 0; i < STEREO_DFT_N_16k_ENC / 2; i++ ) { hStereoDft->winner_gain_L[i] = max( hStereoDft->winner_gain_L[i], 0.01f ); hStereoDft->winner_gain_R[i] = max( hStereoDft->winner_gain_R[i], 0.01f ); hStereoDft->winner_gain_L[i] = min( hStereoDft->winner_gain_L[i], 1.0f ); hStereoDft->winner_gain_R[i] = min( hStereoDft->winner_gain_R[i], 1.0f ); if ( hStereoDft->winner_gain_L[i] >= 0.80f ) hStereoDft->winner_gain_L[i] = 1.0f; else hStereoDft->winner_gain_L[i] = 0.01f; if ( hStereoDft->winner_gain_R[i] >= 0.80f ) hStereoDft->winner_gain_R[i] = 1.0f; else hStereoDft->winner_gain_R[i] = 0.01f; } return; } #endif /*------------------------------------------------------------------------- * set_band_limits() Loading Loading @@ -741,9 +613,6 @@ void stereo_dft_enc_compute_itd( int16_t itd, itd_td; float xcorr_itd[STEREO_DFT_N_32k_ENC]; float tmpf1, tmpf2, tmpf3; #ifdef ITD_WINNER_GAIN_MODIFY float frame_coherence, tmpf4; #endif float thres, alpha; int16_t index; float xcorr_max, sum_nrg_L_lb, par_L[XCORR_LB_NUM_BANDS], par_L_avrg, sum_nrg_L_tmp; Loading Loading @@ -984,32 +853,6 @@ void stereo_dft_enc_compute_itd( xcorr[0] = sign( hItd->xcorr_smooth[0] ); xcorr[1] = sign( hItd->xcorr_smooth[1] ); #ifdef ITD_WINNER_GAIN_MODIFY if ( hStereoDft->mus_flag == 0 ) { stereo_dft_frame_coherence( hStereoDft, xcorr, Spd_L, Spd_R, &frame_coherence ); if ( hStereoDft->local_vad == 0 ) { hStereoDft->noise_coherence = 0.9f * hStereoDft->noise_coherence + ( 1.0 - 0.9f ) * frame_coherence; } else { hStereoDft->noise_coherence = hStereoDft->noise_coherence; } stereo_dft_itd_winner_gain( hStereoDft, Spd_L, Spd_R ); if ( hStereoDft->noise_coherence > 0.25f ) { for ( i = 1; i < NFFT / 2; i++ ) { xcorr[2 * i] = ( hStereoDft->winner_gain_L[i] * hStereoDft->winner_gain_R[i] ) * xcorr[2 * i]; xcorr[2 * i + 1] = ( hStereoDft->winner_gain_L[i] * hStereoDft->winner_gain_R[i] ) * xcorr[2 * i + 1]; Spd_L[i] = hStereoDft->winner_gain_L[i] * Spd_L[i]; Spd_R[i] = hStereoDft->winner_gain_R[i] * Spd_R[i]; } } } #endif if ( hCPE->element_mode == IVAS_CPE_DFT && ( hItd->td_itd[k_offset] - hItd->td_itd[k_offset - 1] ) ) { Loading @@ -1032,23 +875,6 @@ void stereo_dft_enc_compute_itd( } tmpf3 = 2.f; #ifdef ITD_WINNER_GAIN_MODIFY if ( hStereoDft->mus_flag == 0 ) { alpha = -0.8f; } else { if ( flag_noisy_speech_snr ) { alpha = -0.8f; } else { alpha = -1.0f; } } #else if ( flag_noisy_speech_snr ) { alpha = -0.8f; Loading @@ -1057,7 +883,6 @@ void stereo_dft_enc_compute_itd( { alpha = -1.0f; } #endif if ( hCPE->hCoreCoder[0]->Opt_DTX_ON && hCPE->element_mode == IVAS_CPE_DFT ) { Loading Loading @@ -1140,42 +965,9 @@ void stereo_dft_enc_compute_itd( tmpf1 += EPSILON; tmpf2 = tmpf1; tmpf1 = powf( tmpf1, alpha ); #ifdef ITD_WINNER_GAIN_MODIFY if ( hStereoDft->mus_flag == 0 ) { tmpf4 = 1.0f; /* Calculate smoothed spectral power density for the L/R channel */ hStereoDft->spd_L_smooth_new[i] = ( 1.f - sfm_L ) * hStereoDft->spd_L_smooth_new[i] + sfm_L * Spd_L[i]; hStereoDft->spd_R_smooth_new[i] = ( 1.f - sfm_L ) * hStereoDft->spd_R_smooth_new[i] + sfm_L * Spd_R[i]; /* Calculate cross spectral power density using the smoothed spectral power density*/ tmpf4 = (float) sqrt( hStereoDft->spd_L_smooth_new[i] ) * sqrt( hStereoDft->spd_R_smooth_new[i] ); /* Calculate the value of weighted function for each frequency bin */ tmpf4 += EPSILON; if ( hStereoDft->noise_coherence > 0.25f ) { tmpf4 = tmpf1 * pow( tmpf2, 2.0f ) * pow( tmpf4, -2.0f ); tmpf3 += tmpf2 * tmpf1; tmpf3 -= tmpf2 * tmpf4; } else { tmpf4 = tmpf2 * pow( tmpf4, -2.0f ); } /* Calculate the value of weighted generlized cross-correlation function for each frequency bin */ xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf4; xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf4; } else { tmpf3 += tmpf2 * tmpf1; xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1; xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1; } #else tmpf3 += tmpf2 * tmpf1; xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1; xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1; #endif /* Low pass filter L/R power spectrum */ /* Calculate coherence as cross spectral density divided by L*R power spectrum */ Loading @@ -1193,42 +985,9 @@ void stereo_dft_enc_compute_itd( tmpf1 += EPSILON; tmpf2 = tmpf1; tmpf1 = powf( tmpf1, alpha ); #ifdef ITD_WINNER_GAIN_MODIFY if ( hStereoDft->mus_flag == 0 ) { tmpf4 = 1.0f; /* Calculate smoothed spectral power density for the L/R channel */ hStereoDft->spd_L_smooth_new[i] = ( 1.f - sfm_L ) * hStereoDft->spd_L_smooth_new[i] + sfm_L * Spd_L[i]; hStereoDft->spd_R_smooth_new[i] = ( 1.f - sfm_L ) * hStereoDft->spd_R_smooth_new[i] + sfm_L * Spd_R[i]; /* Calculate cross spectral power density using the smoothed spectral power density*/ tmpf4 = (float) sqrt( hStereoDft->spd_L_smooth_new[i] ) * sqrt( hStereoDft->spd_R_smooth_new[i] ); /* Calculate the value of weighted function for each frequency bin */ tmpf4 += EPSILON; if ( hStereoDft->noise_coherence > 0.25f ) { tmpf4 = tmpf1 * pow( tmpf2, 2.0f ) * pow( tmpf4, -2.0f ); tmpf3 += tmpf2 * tmpf1; tmpf3 -= tmpf2 * tmpf4; } else { tmpf4 = tmpf2 * pow( tmpf4, -2.0f ); } /* Calculate the value of weighted generlized cross-correlation function for each frequency bin */ xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf4; xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf4; } else { tmpf3 += tmpf2 * tmpf1; xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1; xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1; } #else tmpf3 += tmpf2 * tmpf1; xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1; xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1; #endif } } } Loading @@ -1242,70 +1001,16 @@ void stereo_dft_enc_compute_itd( tmpf1 += EPSILON; tmpf2 = tmpf1; tmpf1 = powf( tmpf1, alpha ); #ifdef ITD_WINNER_GAIN_MODIFY if ( hStereoDft->mus_flag == 0 ) { tmpf4 = 1.0f; /* Calculate smoothed spectral power density for the L/R channel */ hStereoDft->spd_L_smooth_new[i] = ( 1.f - sfm_L ) * hStereoDft->spd_L_smooth_new[i] + sfm_L * Spd_L[i]; hStereoDft->spd_R_smooth_new[i] = ( 1.f - sfm_L ) * hStereoDft->spd_R_smooth_new[i] + sfm_L * Spd_R[i]; /* Calculate cross spectral power density using the smoothed spectral power density*/ tmpf4 = (float) sqrt( hStereoDft->spd_L_smooth_new[i] ) * sqrt( hStereoDft->spd_R_smooth_new[i] ); /* Calculate the value of weighted function for each frequency bin */ tmpf4 += EPSILON; if ( hStereoDft->noise_coherence > 0.25f ) { tmpf4 = tmpf1 * pow( tmpf2, 2.0f ) * pow( tmpf4, -2.0f ); tmpf3 += tmpf2 * tmpf1; tmpf3 -= tmpf2 * tmpf4; } else { tmpf4 = tmpf2 * pow( tmpf4, -2.0f ); } /* Calculate the value of weighted generlized cross-correlation function for each frequency bin */ xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf4; xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf4; } else { tmpf3 += tmpf2 * tmpf1; xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1; xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1; } #else tmpf3 += tmpf2 * tmpf1; xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1; xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1; #endif } } #ifdef ITD_WINNER_GAIN_MODIFY if ( hStereoDft->mus_flag == 0 ) { if ( hStereoDft->noise_coherence > 0.25f ) { tmpf1 = (float) ( NFFT / 2 + 1 ) / tmpf3; for ( i = 0; i < NFFT; i++ ) { xcorr[i] *= tmpf1; } } else { for ( i = NFFT / 2; i < NFFT; i++ ) { xcorr[i] = 0.0f; } } } #else tmpf1 = (float) ( NFFT / 2 + 1 ) / tmpf3; for ( i = 0; i < NFFT; i++ ) { xcorr[i] *= tmpf1; } #endif /*calculate mean E ratio of main to background signal for cohSNR*/ if ( hCPE->element_mode == IVAS_CPE_DFT ) { Loading Loading
lib_com/options.h +0 −1 Original line number Diff line number Diff line Loading @@ -139,7 +139,6 @@ /*#define LSF_RE_USE_SECONDARY_CHANNEL_REUSEMODE */ /* switch to isolate the reuse mode case */ #endif #define DISABLE_ADAP_RES_COD_TMP /* temporary fix for IVAS-403, disables adaptive residual coding */ /*#define ITD_WINNER_GAIN_MODIFY */ /* ITD optimization - WORK IN PROGRESS */ /*#define FIX_I4_OL_PITCH*/ /* fix open-loop pitch used for EVS core switching */ #define FIX_103_RA_PARAMS_PARAM_BIN_REND /* Issue 103: Digest room acoustics parameters for Parametric Binaural Renderer*/ /*#define SBA_HPF_TUNING_DEC*/ Loading
lib_enc/ivas_core_pre_proc_front.c +0 −34 Original line number Diff line number Diff line Loading @@ -443,13 +443,6 @@ ivas_error pre_proc_front_ivas( st->vad_flag = wb_vad( st, fr_bands, &i, &i, &i, &snr_sum_he, &localVAD_HE_SAD, &( st->flag_noisy_speech_snr ), NULL, NULL, -1000.0f, -1000.0f ); #ifdef ITD_WINNER_GAIN_MODIFY /*Save the local_vad flag for the noise coherence calculation*/ if ( element_mode == IVAS_CPE_DFT ) { hCPE->hStereoDft->local_vad = (short) ( st->vad_flag ); } #endif if ( force_front_vad == 1 || front_vad_flag == 1 ) { Loading Loading @@ -835,33 +828,6 @@ ivas_error pre_proc_front_ivas( /* 2nd stage speech/music classification (ACELP/GSC/TCX core selection) */ ivas_smc_mode_selection( st, element_brate, smc_dec, *relE, Etot, attack_flag, inp_12k8, S_map, flag_spitch ); #ifdef ITD_WINNER_GAIN_MODIFY if ( element_mode == IVAS_CPE_DFT ) { if ( hCPE->hStereoDft->mus_flag != smc_dec || hCPE->element_mode != hCPE->last_element_mode ) { hCPE->hStereoDft->noise_coherence = 0.0f; set_zero( hCPE->hStereoDft->spd_L_noise, STEREO_DFT_N_32k_ENC / 2 ); set_zero( hCPE->hStereoDft->spd_R_noise, STEREO_DFT_N_32k_ENC / 2 ); set_zero( hCPE->hStereoDft->spd_L_noise_min, STEREO_DFT_N_32k_ENC / 2 ); set_zero( hCPE->hStereoDft->spd_R_noise_min, STEREO_DFT_N_32k_ENC / 2 ); set_zero( hCPE->hStereoDft->spd_L_noise_max, STEREO_DFT_N_32k_ENC / 2 ); set_zero( hCPE->hStereoDft->spd_R_noise_max, STEREO_DFT_N_32k_ENC / 2 ); set_zero( hCPE->hStereoDft->winner_gain_L, STEREO_DFT_N_32k_ENC / 2 ); set_zero( hCPE->hStereoDft->winner_gain_R, STEREO_DFT_N_32k_ENC / 2 ); set_f( hCPE->hStereoDft->spd_L_smooth_new, 1.0f, STEREO_DFT_N_32k_ENC / 2 ); set_f( hCPE->hStereoDft->spd_R_smooth_new, 1.0f, STEREO_DFT_N_32k_ENC / 2 ); } if ( smc_dec == MUSIC && st->vad_flag == 1 ) { hCPE->hStereoDft->mus_flag = 2; } else { hCPE->hStereoDft->mus_flag = 0; } } #endif } /*----------------------------------------------------------------* Loading
lib_enc/ivas_stat_enc.h +0 −16 Original line number Diff line number Diff line Loading @@ -219,22 +219,6 @@ typedef struct stereo_dft_enc_data_struct float voicing_lt; #ifdef ITD_WINNER_GAIN_MODIFY float noise_coherence; int16_t local_vad; int16_t mus_flag; float spd_L_noise[STEREO_DFT_N_32k_ENC / 2]; /*The estimation of spectral power density of noise in the left channel*/ float spd_R_noise[STEREO_DFT_N_32k_ENC / 2]; /*The estimation of spectral power density of noise in the right channel*/ float spd_L_noise_min[STEREO_DFT_N_32k_ENC / 2]; float spd_R_noise_min[STEREO_DFT_N_32k_ENC / 2]; float spd_L_noise_max[STEREO_DFT_N_32k_ENC / 2]; float spd_R_noise_max[STEREO_DFT_N_32k_ENC / 2]; float winner_gain_L[STEREO_DFT_N_32k_ENC / 2]; /*The estimation of the Winner gain of the left channel*/ float winner_gain_R[STEREO_DFT_N_32k_ENC / 2]; /*The estimation of the Winner gain of the right channel*/ float spd_L_smooth_new[STEREO_DFT_N_32k_ENC / 2]; float spd_R_smooth_new[STEREO_DFT_N_32k_ENC / 2]; #endif int16_t currentNumUpdates; int16_t expectedNumUpdates; /* Expected number of frames before use of ITD estimate */ Loading
lib_enc/ivas_stereo_dft_enc.c +0 −15 Original line number Diff line number Diff line Loading @@ -416,21 +416,6 @@ void stereo_dft_enc_reset( ) { int16_t i; #ifdef ITD_WINNER_GAIN_MODIFY hStereoDft->noise_coherence = 0.0f; hStereoDft->local_vad = 0; hStereoDft->mus_flag = 2; set_zero( hStereoDft->spd_L_noise, STEREO_DFT_N_32k_ENC / 2 ); set_zero( hStereoDft->spd_R_noise, STEREO_DFT_N_32k_ENC / 2 ); set_zero( hStereoDft->spd_L_noise_min, STEREO_DFT_N_32k_ENC / 2 ); set_zero( hStereoDft->spd_R_noise_min, STEREO_DFT_N_32k_ENC / 2 ); set_zero( hStereoDft->spd_L_noise_max, STEREO_DFT_N_32k_ENC / 2 ); set_zero( hStereoDft->spd_R_noise_max, STEREO_DFT_N_32k_ENC / 2 ); set_zero( hStereoDft->winner_gain_L, STEREO_DFT_N_32k_ENC / 2 ); set_zero( hStereoDft->winner_gain_R, STEREO_DFT_N_32k_ENC / 2 ); set_f( hStereoDft->spd_L_smooth_new, 1.0f, STEREO_DFT_N_32k_ENC / 2 ); set_f( hStereoDft->spd_R_smooth_new, 1.0f, STEREO_DFT_N_32k_ENC / 2 ); #endif /*reset parameters*/ set_zero( hStereoDft->side_gain, STEREO_DFT_ENC_DFT_NB * STEREO_DFT_BAND_MAX ); set_s( hStereoDft->side_gain_index_EC, 15, STEREO_DFT_BAND_MAX ); Loading
lib_enc/ivas_stereo_dft_enc_itd.c +0 −295 Original line number Diff line number Diff line Loading @@ -82,134 +82,6 @@ #define ITD_MAX_MDCT 80 #ifdef ITD_WINNER_GAIN_MODIFY /*------------------------------------------------------------------------- * stereo_dft_frame_coherence() * Calculate the frame coherence of the stereo signal * *-------------------------------------------------------------------------*/ static void stereo_dft_frame_coherence( STEREO_DFT_ENC_DATA_HANDLE hStereoDft, float *xcorr, float *Spd_L, float *Spd_R, float *frame_coherence ) { float spd_xcorr_real, spd_xcorr_imag, spd_LR; int16_t i, NFFT; NFFT = min( STEREO_DFT_N_32k_ENC, hStereoDft->NFFT ); spd_xcorr_real = 0.0f; spd_xcorr_imag = 0.0f; spd_LR = 0.0f; *frame_coherence = 0.0f; for ( i = 1; i < NFFT / 2; i++ ) { spd_xcorr_real += (float) xcorr[i * 2]; spd_xcorr_imag += (float) xcorr[i * 2 + 1]; spd_LR += (float) sqrt( Spd_L[i] ) * sqrt( Spd_R[i] ); } *frame_coherence = (float) ( sqrt( spd_xcorr_real * spd_xcorr_real + spd_xcorr_imag * spd_xcorr_imag ) / spd_LR ); return; } /*------------------------------------------------------------------------- * stereo_dft_itd_winner_gain() * Estimate the spectral power density of noise in the stereo signal and * calculate the winner gain for the ITD estimation *-------------------------------------------------------------------------*/ static void stereo_dft_itd_winner_gain( STEREO_DFT_ENC_DATA_HANDLE hStereoDft, float *Spd_L, float *Spd_R ) { int16_t i; float alpha1 = 0.05f; float alpha2 = 0.95f; float trackingfactor = 0.5f; float ratio = 2.75f; for ( i = 0; i < STEREO_DFT_N_16k_ENC / 2; i++ ) { if ( hStereoDft->spd_L_noise_min[i] > Spd_L[i] ) { hStereoDft->spd_L_noise_min[i] = ( 1 - 0.15 ) * hStereoDft->spd_L_noise_min[i] + 0.15 * Spd_L[i]; } else { hStereoDft->spd_L_noise_min[i] = hStereoDft->spd_L_noise_min[i] + alpha1 * ( Spd_L[i] - hStereoDft->spd_L_noise_min[i] ); } if ( hStereoDft->spd_R_noise[i] > Spd_R[i] ) { hStereoDft->spd_R_noise_min[i] = ( 1 - 0.15 ) * hStereoDft->spd_R_noise_min[i] + 0.15 * Spd_R[i]; } else { hStereoDft->spd_R_noise_min[i] = hStereoDft->spd_R_noise_min[i] + alpha1 * ( Spd_R[i] - hStereoDft->spd_R_noise_min[i] ); } if ( hStereoDft->spd_L_noise_max[i] < Spd_L[i] ) { hStereoDft->spd_L_noise_max[i] = Spd_L[i]; } else { hStereoDft->spd_L_noise_max[i] = hStereoDft->spd_L_noise_max[i] + alpha2 * ( Spd_L[i] - hStereoDft->spd_L_noise_max[i] ); } if ( hStereoDft->spd_R_noise_max[i] < Spd_R[i] ) { hStereoDft->spd_R_noise_max[i] = Spd_R[i]; } else { hStereoDft->spd_R_noise_max[i] = hStereoDft->spd_R_noise_max[i] + alpha2 * ( Spd_R[i] - hStereoDft->spd_R_noise_max[i] ); } if ( hStereoDft->spd_L_noise_max[i] > ratio * hStereoDft->spd_L_noise_min[i] ) { hStereoDft->spd_L_noise[i] = hStereoDft->spd_L_noise[i]; hStereoDft->winner_gain_L[i] = ( Spd_L[i] - hStereoDft->spd_L_noise[i] ) / Spd_L[i]; } else { hStereoDft->spd_L_noise[i] = hStereoDft->spd_L_noise[i] + trackingfactor * ( Spd_L[i] - hStereoDft->spd_L_noise[i] ); hStereoDft->winner_gain_L[i] = ( Spd_L[i] - hStereoDft->spd_L_noise[i] ) / Spd_L[i]; } if ( hStereoDft->spd_R_noise_max[i] > ratio * hStereoDft->spd_R_noise_min[i] ) { hStereoDft->spd_R_noise[i] = hStereoDft->spd_R_noise[i]; hStereoDft->winner_gain_R[i] = ( Spd_R[i] - hStereoDft->spd_R_noise[i] ) / Spd_R[i]; } else { hStereoDft->spd_R_noise[i] = hStereoDft->spd_R_noise[i] + trackingfactor * ( Spd_R[i] - hStereoDft->spd_R_noise[i] ); hStereoDft->winner_gain_R[i] = ( Spd_R[i] - hStereoDft->spd_R_noise[i] ) / Spd_R[i]; } } for ( i = 0; i < STEREO_DFT_N_16k_ENC / 2; i++ ) { hStereoDft->winner_gain_L[i] = max( hStereoDft->winner_gain_L[i], 0.01f ); hStereoDft->winner_gain_R[i] = max( hStereoDft->winner_gain_R[i], 0.01f ); hStereoDft->winner_gain_L[i] = min( hStereoDft->winner_gain_L[i], 1.0f ); hStereoDft->winner_gain_R[i] = min( hStereoDft->winner_gain_R[i], 1.0f ); if ( hStereoDft->winner_gain_L[i] >= 0.80f ) hStereoDft->winner_gain_L[i] = 1.0f; else hStereoDft->winner_gain_L[i] = 0.01f; if ( hStereoDft->winner_gain_R[i] >= 0.80f ) hStereoDft->winner_gain_R[i] = 1.0f; else hStereoDft->winner_gain_R[i] = 0.01f; } return; } #endif /*------------------------------------------------------------------------- * set_band_limits() Loading Loading @@ -741,9 +613,6 @@ void stereo_dft_enc_compute_itd( int16_t itd, itd_td; float xcorr_itd[STEREO_DFT_N_32k_ENC]; float tmpf1, tmpf2, tmpf3; #ifdef ITD_WINNER_GAIN_MODIFY float frame_coherence, tmpf4; #endif float thres, alpha; int16_t index; float xcorr_max, sum_nrg_L_lb, par_L[XCORR_LB_NUM_BANDS], par_L_avrg, sum_nrg_L_tmp; Loading Loading @@ -984,32 +853,6 @@ void stereo_dft_enc_compute_itd( xcorr[0] = sign( hItd->xcorr_smooth[0] ); xcorr[1] = sign( hItd->xcorr_smooth[1] ); #ifdef ITD_WINNER_GAIN_MODIFY if ( hStereoDft->mus_flag == 0 ) { stereo_dft_frame_coherence( hStereoDft, xcorr, Spd_L, Spd_R, &frame_coherence ); if ( hStereoDft->local_vad == 0 ) { hStereoDft->noise_coherence = 0.9f * hStereoDft->noise_coherence + ( 1.0 - 0.9f ) * frame_coherence; } else { hStereoDft->noise_coherence = hStereoDft->noise_coherence; } stereo_dft_itd_winner_gain( hStereoDft, Spd_L, Spd_R ); if ( hStereoDft->noise_coherence > 0.25f ) { for ( i = 1; i < NFFT / 2; i++ ) { xcorr[2 * i] = ( hStereoDft->winner_gain_L[i] * hStereoDft->winner_gain_R[i] ) * xcorr[2 * i]; xcorr[2 * i + 1] = ( hStereoDft->winner_gain_L[i] * hStereoDft->winner_gain_R[i] ) * xcorr[2 * i + 1]; Spd_L[i] = hStereoDft->winner_gain_L[i] * Spd_L[i]; Spd_R[i] = hStereoDft->winner_gain_R[i] * Spd_R[i]; } } } #endif if ( hCPE->element_mode == IVAS_CPE_DFT && ( hItd->td_itd[k_offset] - hItd->td_itd[k_offset - 1] ) ) { Loading @@ -1032,23 +875,6 @@ void stereo_dft_enc_compute_itd( } tmpf3 = 2.f; #ifdef ITD_WINNER_GAIN_MODIFY if ( hStereoDft->mus_flag == 0 ) { alpha = -0.8f; } else { if ( flag_noisy_speech_snr ) { alpha = -0.8f; } else { alpha = -1.0f; } } #else if ( flag_noisy_speech_snr ) { alpha = -0.8f; Loading @@ -1057,7 +883,6 @@ void stereo_dft_enc_compute_itd( { alpha = -1.0f; } #endif if ( hCPE->hCoreCoder[0]->Opt_DTX_ON && hCPE->element_mode == IVAS_CPE_DFT ) { Loading Loading @@ -1140,42 +965,9 @@ void stereo_dft_enc_compute_itd( tmpf1 += EPSILON; tmpf2 = tmpf1; tmpf1 = powf( tmpf1, alpha ); #ifdef ITD_WINNER_GAIN_MODIFY if ( hStereoDft->mus_flag == 0 ) { tmpf4 = 1.0f; /* Calculate smoothed spectral power density for the L/R channel */ hStereoDft->spd_L_smooth_new[i] = ( 1.f - sfm_L ) * hStereoDft->spd_L_smooth_new[i] + sfm_L * Spd_L[i]; hStereoDft->spd_R_smooth_new[i] = ( 1.f - sfm_L ) * hStereoDft->spd_R_smooth_new[i] + sfm_L * Spd_R[i]; /* Calculate cross spectral power density using the smoothed spectral power density*/ tmpf4 = (float) sqrt( hStereoDft->spd_L_smooth_new[i] ) * sqrt( hStereoDft->spd_R_smooth_new[i] ); /* Calculate the value of weighted function for each frequency bin */ tmpf4 += EPSILON; if ( hStereoDft->noise_coherence > 0.25f ) { tmpf4 = tmpf1 * pow( tmpf2, 2.0f ) * pow( tmpf4, -2.0f ); tmpf3 += tmpf2 * tmpf1; tmpf3 -= tmpf2 * tmpf4; } else { tmpf4 = tmpf2 * pow( tmpf4, -2.0f ); } /* Calculate the value of weighted generlized cross-correlation function for each frequency bin */ xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf4; xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf4; } else { tmpf3 += tmpf2 * tmpf1; xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1; xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1; } #else tmpf3 += tmpf2 * tmpf1; xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1; xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1; #endif /* Low pass filter L/R power spectrum */ /* Calculate coherence as cross spectral density divided by L*R power spectrum */ Loading @@ -1193,42 +985,9 @@ void stereo_dft_enc_compute_itd( tmpf1 += EPSILON; tmpf2 = tmpf1; tmpf1 = powf( tmpf1, alpha ); #ifdef ITD_WINNER_GAIN_MODIFY if ( hStereoDft->mus_flag == 0 ) { tmpf4 = 1.0f; /* Calculate smoothed spectral power density for the L/R channel */ hStereoDft->spd_L_smooth_new[i] = ( 1.f - sfm_L ) * hStereoDft->spd_L_smooth_new[i] + sfm_L * Spd_L[i]; hStereoDft->spd_R_smooth_new[i] = ( 1.f - sfm_L ) * hStereoDft->spd_R_smooth_new[i] + sfm_L * Spd_R[i]; /* Calculate cross spectral power density using the smoothed spectral power density*/ tmpf4 = (float) sqrt( hStereoDft->spd_L_smooth_new[i] ) * sqrt( hStereoDft->spd_R_smooth_new[i] ); /* Calculate the value of weighted function for each frequency bin */ tmpf4 += EPSILON; if ( hStereoDft->noise_coherence > 0.25f ) { tmpf4 = tmpf1 * pow( tmpf2, 2.0f ) * pow( tmpf4, -2.0f ); tmpf3 += tmpf2 * tmpf1; tmpf3 -= tmpf2 * tmpf4; } else { tmpf4 = tmpf2 * pow( tmpf4, -2.0f ); } /* Calculate the value of weighted generlized cross-correlation function for each frequency bin */ xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf4; xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf4; } else { tmpf3 += tmpf2 * tmpf1; xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1; xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1; } #else tmpf3 += tmpf2 * tmpf1; xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1; xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1; #endif } } } Loading @@ -1242,70 +1001,16 @@ void stereo_dft_enc_compute_itd( tmpf1 += EPSILON; tmpf2 = tmpf1; tmpf1 = powf( tmpf1, alpha ); #ifdef ITD_WINNER_GAIN_MODIFY if ( hStereoDft->mus_flag == 0 ) { tmpf4 = 1.0f; /* Calculate smoothed spectral power density for the L/R channel */ hStereoDft->spd_L_smooth_new[i] = ( 1.f - sfm_L ) * hStereoDft->spd_L_smooth_new[i] + sfm_L * Spd_L[i]; hStereoDft->spd_R_smooth_new[i] = ( 1.f - sfm_L ) * hStereoDft->spd_R_smooth_new[i] + sfm_L * Spd_R[i]; /* Calculate cross spectral power density using the smoothed spectral power density*/ tmpf4 = (float) sqrt( hStereoDft->spd_L_smooth_new[i] ) * sqrt( hStereoDft->spd_R_smooth_new[i] ); /* Calculate the value of weighted function for each frequency bin */ tmpf4 += EPSILON; if ( hStereoDft->noise_coherence > 0.25f ) { tmpf4 = tmpf1 * pow( tmpf2, 2.0f ) * pow( tmpf4, -2.0f ); tmpf3 += tmpf2 * tmpf1; tmpf3 -= tmpf2 * tmpf4; } else { tmpf4 = tmpf2 * pow( tmpf4, -2.0f ); } /* Calculate the value of weighted generlized cross-correlation function for each frequency bin */ xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf4; xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf4; } else { tmpf3 += tmpf2 * tmpf1; xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1; xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1; } #else tmpf3 += tmpf2 * tmpf1; xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1; xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1; #endif } } #ifdef ITD_WINNER_GAIN_MODIFY if ( hStereoDft->mus_flag == 0 ) { if ( hStereoDft->noise_coherence > 0.25f ) { tmpf1 = (float) ( NFFT / 2 + 1 ) / tmpf3; for ( i = 0; i < NFFT; i++ ) { xcorr[i] *= tmpf1; } } else { for ( i = NFFT / 2; i < NFFT; i++ ) { xcorr[i] = 0.0f; } } } #else tmpf1 = (float) ( NFFT / 2 + 1 ) / tmpf3; for ( i = 0; i < NFFT; i++ ) { xcorr[i] *= tmpf1; } #endif /*calculate mean E ratio of main to background signal for cohSNR*/ if ( hCPE->element_mode == IVAS_CPE_DFT ) { Loading