Commit 4c573b12 authored by multrus's avatar multrus
Browse files

[cleanup] remove code within ITD_WINNER_GAIN_MODIFY

parent 7797b371
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -139,7 +139,6 @@
/*#define LSF_RE_USE_SECONDARY_CHANNEL_REUSEMODE */     /* switch to isolate the reuse mode case */
#endif
#define DISABLE_ADAP_RES_COD_TMP                        /* temporary fix for IVAS-403, disables adaptive residual coding */
/*#define ITD_WINNER_GAIN_MODIFY */                     /* ITD optimization - WORK IN PROGRESS */
/*#define FIX_I4_OL_PITCH*/                             /* fix open-loop pitch used for EVS core switching */
#define FIX_103_RA_PARAMS_PARAM_BIN_REND                /* Issue 103: Digest room acoustics parameters for Parametric Binaural Renderer*/
/*#define SBA_HPF_TUNING_DEC*/
+0 −34
Original line number Diff line number Diff line
@@ -443,13 +443,6 @@ ivas_error pre_proc_front_ivas(

    st->vad_flag = wb_vad( st, fr_bands, &i, &i, &i, &snr_sum_he, &localVAD_HE_SAD, &( st->flag_noisy_speech_snr ), NULL, NULL, -1000.0f, -1000.0f );

#ifdef ITD_WINNER_GAIN_MODIFY
    /*Save the local_vad flag for the noise coherence calculation*/
    if ( element_mode == IVAS_CPE_DFT )
    {
        hCPE->hStereoDft->local_vad = (short) ( st->vad_flag );
    }
#endif

    if ( force_front_vad == 1 || front_vad_flag == 1 )
    {
@@ -835,33 +828,6 @@ ivas_error pre_proc_front_ivas(
        /* 2nd stage speech/music classification (ACELP/GSC/TCX core selection) */
        ivas_smc_mode_selection( st, element_brate, smc_dec, *relE, Etot, attack_flag, inp_12k8, S_map, flag_spitch );

#ifdef ITD_WINNER_GAIN_MODIFY
        if ( element_mode == IVAS_CPE_DFT )
        {
            if ( hCPE->hStereoDft->mus_flag != smc_dec || hCPE->element_mode != hCPE->last_element_mode )
            {
                hCPE->hStereoDft->noise_coherence = 0.0f;
                set_zero( hCPE->hStereoDft->spd_L_noise, STEREO_DFT_N_32k_ENC / 2 );
                set_zero( hCPE->hStereoDft->spd_R_noise, STEREO_DFT_N_32k_ENC / 2 );
                set_zero( hCPE->hStereoDft->spd_L_noise_min, STEREO_DFT_N_32k_ENC / 2 );
                set_zero( hCPE->hStereoDft->spd_R_noise_min, STEREO_DFT_N_32k_ENC / 2 );
                set_zero( hCPE->hStereoDft->spd_L_noise_max, STEREO_DFT_N_32k_ENC / 2 );
                set_zero( hCPE->hStereoDft->spd_R_noise_max, STEREO_DFT_N_32k_ENC / 2 );
                set_zero( hCPE->hStereoDft->winner_gain_L, STEREO_DFT_N_32k_ENC / 2 );
                set_zero( hCPE->hStereoDft->winner_gain_R, STEREO_DFT_N_32k_ENC / 2 );
                set_f( hCPE->hStereoDft->spd_L_smooth_new, 1.0f, STEREO_DFT_N_32k_ENC / 2 );
                set_f( hCPE->hStereoDft->spd_R_smooth_new, 1.0f, STEREO_DFT_N_32k_ENC / 2 );
            }
            if ( smc_dec == MUSIC && st->vad_flag == 1 )
            {
                hCPE->hStereoDft->mus_flag = 2;
            }
            else
            {
                hCPE->hStereoDft->mus_flag = 0;
            }
        }
#endif
    }

    /*----------------------------------------------------------------*
+0 −16
Original line number Diff line number Diff line
@@ -219,22 +219,6 @@ typedef struct stereo_dft_enc_data_struct

    float voicing_lt;

#ifdef ITD_WINNER_GAIN_MODIFY
    float noise_coherence;
    int16_t local_vad;
    int16_t mus_flag;
    float spd_L_noise[STEREO_DFT_N_32k_ENC / 2]; /*The estimation of spectral power density of noise in the left channel*/
    float spd_R_noise[STEREO_DFT_N_32k_ENC / 2]; /*The estimation of spectral power density of noise in the right channel*/
    float spd_L_noise_min[STEREO_DFT_N_32k_ENC / 2];
    float spd_R_noise_min[STEREO_DFT_N_32k_ENC / 2];
    float spd_L_noise_max[STEREO_DFT_N_32k_ENC / 2];
    float spd_R_noise_max[STEREO_DFT_N_32k_ENC / 2];
    float winner_gain_L[STEREO_DFT_N_32k_ENC / 2]; /*The estimation of the Winner gain of the left channel*/
    float winner_gain_R[STEREO_DFT_N_32k_ENC / 2]; /*The estimation of the Winner gain of the right channel*/
    float spd_L_smooth_new[STEREO_DFT_N_32k_ENC / 2];
    float spd_R_smooth_new[STEREO_DFT_N_32k_ENC / 2];

#endif

    int16_t currentNumUpdates;
    int16_t expectedNumUpdates; /* Expected number of frames before use of ITD estimate */
+0 −15
Original line number Diff line number Diff line
@@ -416,21 +416,6 @@ void stereo_dft_enc_reset(
)
{
    int16_t i;
#ifdef ITD_WINNER_GAIN_MODIFY
    hStereoDft->noise_coherence = 0.0f;
    hStereoDft->local_vad = 0;
    hStereoDft->mus_flag = 2;
    set_zero( hStereoDft->spd_L_noise, STEREO_DFT_N_32k_ENC / 2 );
    set_zero( hStereoDft->spd_R_noise, STEREO_DFT_N_32k_ENC / 2 );
    set_zero( hStereoDft->spd_L_noise_min, STEREO_DFT_N_32k_ENC / 2 );
    set_zero( hStereoDft->spd_R_noise_min, STEREO_DFT_N_32k_ENC / 2 );
    set_zero( hStereoDft->spd_L_noise_max, STEREO_DFT_N_32k_ENC / 2 );
    set_zero( hStereoDft->spd_R_noise_max, STEREO_DFT_N_32k_ENC / 2 );
    set_zero( hStereoDft->winner_gain_L, STEREO_DFT_N_32k_ENC / 2 );
    set_zero( hStereoDft->winner_gain_R, STEREO_DFT_N_32k_ENC / 2 );
    set_f( hStereoDft->spd_L_smooth_new, 1.0f, STEREO_DFT_N_32k_ENC / 2 );
    set_f( hStereoDft->spd_R_smooth_new, 1.0f, STEREO_DFT_N_32k_ENC / 2 );
#endif
    /*reset parameters*/
    set_zero( hStereoDft->side_gain, STEREO_DFT_ENC_DFT_NB * STEREO_DFT_BAND_MAX );
    set_s( hStereoDft->side_gain_index_EC, 15, STEREO_DFT_BAND_MAX );
+0 −295
Original line number Diff line number Diff line
@@ -82,134 +82,6 @@
#define ITD_MAX_MDCT 80


#ifdef ITD_WINNER_GAIN_MODIFY
/*-------------------------------------------------------------------------
 * stereo_dft_frame_coherence()
 * Calculate the frame coherence of the stereo signal
 *
 *-------------------------------------------------------------------------*/

static void stereo_dft_frame_coherence(
    STEREO_DFT_ENC_DATA_HANDLE hStereoDft,
    float *xcorr,
    float *Spd_L,
    float *Spd_R,
    float *frame_coherence )
{
    float spd_xcorr_real, spd_xcorr_imag, spd_LR;
    int16_t i, NFFT;

    NFFT = min( STEREO_DFT_N_32k_ENC, hStereoDft->NFFT );
    spd_xcorr_real = 0.0f;
    spd_xcorr_imag = 0.0f;
    spd_LR = 0.0f;
    *frame_coherence = 0.0f;

    for ( i = 1; i < NFFT / 2; i++ )
    {
        spd_xcorr_real += (float) xcorr[i * 2];
        spd_xcorr_imag += (float) xcorr[i * 2 + 1];
        spd_LR += (float) sqrt( Spd_L[i] ) * sqrt( Spd_R[i] );
    }

    *frame_coherence = (float) ( sqrt( spd_xcorr_real * spd_xcorr_real + spd_xcorr_imag * spd_xcorr_imag ) / spd_LR );

    return;
}


/*-------------------------------------------------------------------------
 * stereo_dft_itd_winner_gain()
 * Estimate the spectral power density of noise in the stereo signal and
 * calculate the winner gain for the ITD estimation
 *-------------------------------------------------------------------------*/

static void stereo_dft_itd_winner_gain(
    STEREO_DFT_ENC_DATA_HANDLE hStereoDft,
    float *Spd_L,
    float *Spd_R )
{
    int16_t i;
    float alpha1 = 0.05f;
    float alpha2 = 0.95f;
    float trackingfactor = 0.5f;
    float ratio = 2.75f;

    for ( i = 0; i < STEREO_DFT_N_16k_ENC / 2; i++ )
    {
        if ( hStereoDft->spd_L_noise_min[i] > Spd_L[i] )
        {
            hStereoDft->spd_L_noise_min[i] = ( 1 - 0.15 ) * hStereoDft->spd_L_noise_min[i] + 0.15 * Spd_L[i];
        }
        else
        {
            hStereoDft->spd_L_noise_min[i] = hStereoDft->spd_L_noise_min[i] + alpha1 * ( Spd_L[i] - hStereoDft->spd_L_noise_min[i] );
        }
        if ( hStereoDft->spd_R_noise[i] > Spd_R[i] )
        {
            hStereoDft->spd_R_noise_min[i] = ( 1 - 0.15 ) * hStereoDft->spd_R_noise_min[i] + 0.15 * Spd_R[i];
        }
        else
        {
            hStereoDft->spd_R_noise_min[i] = hStereoDft->spd_R_noise_min[i] + alpha1 * ( Spd_R[i] - hStereoDft->spd_R_noise_min[i] );
        }
        if ( hStereoDft->spd_L_noise_max[i] < Spd_L[i] )
        {
            hStereoDft->spd_L_noise_max[i] = Spd_L[i];
        }
        else
        {
            hStereoDft->spd_L_noise_max[i] = hStereoDft->spd_L_noise_max[i] + alpha2 * ( Spd_L[i] - hStereoDft->spd_L_noise_max[i] );
        }
        if ( hStereoDft->spd_R_noise_max[i] < Spd_R[i] )
        {
            hStereoDft->spd_R_noise_max[i] = Spd_R[i];
        }
        else
        {
            hStereoDft->spd_R_noise_max[i] = hStereoDft->spd_R_noise_max[i] + alpha2 * ( Spd_R[i] - hStereoDft->spd_R_noise_max[i] );
        }
        if ( hStereoDft->spd_L_noise_max[i] > ratio * hStereoDft->spd_L_noise_min[i] )
        {
            hStereoDft->spd_L_noise[i] = hStereoDft->spd_L_noise[i];
            hStereoDft->winner_gain_L[i] = ( Spd_L[i] - hStereoDft->spd_L_noise[i] ) / Spd_L[i];
        }
        else
        {
            hStereoDft->spd_L_noise[i] = hStereoDft->spd_L_noise[i] + trackingfactor * ( Spd_L[i] - hStereoDft->spd_L_noise[i] );
            hStereoDft->winner_gain_L[i] = ( Spd_L[i] - hStereoDft->spd_L_noise[i] ) / Spd_L[i];
        }
        if ( hStereoDft->spd_R_noise_max[i] > ratio * hStereoDft->spd_R_noise_min[i] )
        {
            hStereoDft->spd_R_noise[i] = hStereoDft->spd_R_noise[i];
            hStereoDft->winner_gain_R[i] = ( Spd_R[i] - hStereoDft->spd_R_noise[i] ) / Spd_R[i];
        }
        else
        {
            hStereoDft->spd_R_noise[i] = hStereoDft->spd_R_noise[i] + trackingfactor * ( Spd_R[i] - hStereoDft->spd_R_noise[i] );
            hStereoDft->winner_gain_R[i] = ( Spd_R[i] - hStereoDft->spd_R_noise[i] ) / Spd_R[i];
        }
    }

    for ( i = 0; i < STEREO_DFT_N_16k_ENC / 2; i++ )
    {
        hStereoDft->winner_gain_L[i] = max( hStereoDft->winner_gain_L[i], 0.01f );
        hStereoDft->winner_gain_R[i] = max( hStereoDft->winner_gain_R[i], 0.01f );
        hStereoDft->winner_gain_L[i] = min( hStereoDft->winner_gain_L[i], 1.0f );
        hStereoDft->winner_gain_R[i] = min( hStereoDft->winner_gain_R[i], 1.0f );
        if ( hStereoDft->winner_gain_L[i] >= 0.80f )
            hStereoDft->winner_gain_L[i] = 1.0f;
        else
            hStereoDft->winner_gain_L[i] = 0.01f;
        if ( hStereoDft->winner_gain_R[i] >= 0.80f )
            hStereoDft->winner_gain_R[i] = 1.0f;
        else
            hStereoDft->winner_gain_R[i] = 0.01f;
    }

    return;
}
#endif

/*-------------------------------------------------------------------------
 * set_band_limits()
@@ -741,9 +613,6 @@ void stereo_dft_enc_compute_itd(
    int16_t itd, itd_td;
    float xcorr_itd[STEREO_DFT_N_32k_ENC];
    float tmpf1, tmpf2, tmpf3;
#ifdef ITD_WINNER_GAIN_MODIFY
    float frame_coherence, tmpf4;
#endif
    float thres, alpha;
    int16_t index;
    float xcorr_max, sum_nrg_L_lb, par_L[XCORR_LB_NUM_BANDS], par_L_avrg, sum_nrg_L_tmp;
@@ -984,32 +853,6 @@ void stereo_dft_enc_compute_itd(
    xcorr[0] = sign( hItd->xcorr_smooth[0] );
    xcorr[1] = sign( hItd->xcorr_smooth[1] );

#ifdef ITD_WINNER_GAIN_MODIFY
    if ( hStereoDft->mus_flag == 0 )
    {
        stereo_dft_frame_coherence( hStereoDft, xcorr, Spd_L, Spd_R, &frame_coherence );
        if ( hStereoDft->local_vad == 0 )
        {
            hStereoDft->noise_coherence = 0.9f * hStereoDft->noise_coherence + ( 1.0 - 0.9f ) * frame_coherence;
        }
        else
        {
            hStereoDft->noise_coherence = hStereoDft->noise_coherence;
        }
        stereo_dft_itd_winner_gain( hStereoDft, Spd_L, Spd_R );

        if ( hStereoDft->noise_coherence > 0.25f )
        {
            for ( i = 1; i < NFFT / 2; i++ )
            {
                xcorr[2 * i] = ( hStereoDft->winner_gain_L[i] * hStereoDft->winner_gain_R[i] ) * xcorr[2 * i];
                xcorr[2 * i + 1] = ( hStereoDft->winner_gain_L[i] * hStereoDft->winner_gain_R[i] ) * xcorr[2 * i + 1];
                Spd_L[i] = hStereoDft->winner_gain_L[i] * Spd_L[i];
                Spd_R[i] = hStereoDft->winner_gain_R[i] * Spd_R[i];
            }
        }
    }
#endif

    if ( hCPE->element_mode == IVAS_CPE_DFT && ( hItd->td_itd[k_offset] - hItd->td_itd[k_offset - 1] ) )
    {
@@ -1032,23 +875,6 @@ void stereo_dft_enc_compute_itd(
    }

    tmpf3 = 2.f;
#ifdef ITD_WINNER_GAIN_MODIFY
    if ( hStereoDft->mus_flag == 0 )
    {
        alpha = -0.8f;
    }
    else
    {
        if ( flag_noisy_speech_snr )
        {
            alpha = -0.8f;
        }
        else
        {
            alpha = -1.0f;
        }
    }
#else
    if ( flag_noisy_speech_snr )
    {
        alpha = -0.8f;
@@ -1057,7 +883,6 @@ void stereo_dft_enc_compute_itd(
    {
        alpha = -1.0f;
    }
#endif

    if ( hCPE->hCoreCoder[0]->Opt_DTX_ON && hCPE->element_mode == IVAS_CPE_DFT )
    {
@@ -1140,42 +965,9 @@ void stereo_dft_enc_compute_itd(
                tmpf1 += EPSILON;
                tmpf2 = tmpf1;
                tmpf1 = powf( tmpf1, alpha );
#ifdef ITD_WINNER_GAIN_MODIFY
                if ( hStereoDft->mus_flag == 0 )
                {
                    tmpf4 = 1.0f;
                    /* Calculate smoothed spectral power density for the L/R channel */
                    hStereoDft->spd_L_smooth_new[i] = ( 1.f - sfm_L ) * hStereoDft->spd_L_smooth_new[i] + sfm_L * Spd_L[i];
                    hStereoDft->spd_R_smooth_new[i] = ( 1.f - sfm_L ) * hStereoDft->spd_R_smooth_new[i] + sfm_L * Spd_R[i];
                    /* Calculate cross spectral power density using the smoothed spectral power density*/
                    tmpf4 = (float) sqrt( hStereoDft->spd_L_smooth_new[i] ) * sqrt( hStereoDft->spd_R_smooth_new[i] );
                    /* Calculate the value of weighted function for each frequency bin */
                    tmpf4 += EPSILON;
                    if ( hStereoDft->noise_coherence > 0.25f )
                    {
                        tmpf4 = tmpf1 * pow( tmpf2, 2.0f ) * pow( tmpf4, -2.0f );
                        tmpf3 += tmpf2 * tmpf1;
                        tmpf3 -= tmpf2 * tmpf4;
                    }
                    else
                    {
                        tmpf4 = tmpf2 * pow( tmpf4, -2.0f );
                    }
                    /* Calculate the value of weighted generlized cross-correlation function for each frequency bin */
                    xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf4;
                    xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf4;
                }
                else
                {
                tmpf3 += tmpf2 * tmpf1;
                xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1;
                xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1;
                }
#else
                tmpf3 += tmpf2 * tmpf1;
                xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1;
                xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1;
#endif

                /* Low pass filter L/R power spectrum */
                /* Calculate coherence as cross spectral density divided by L*R power spectrum */
@@ -1193,42 +985,9 @@ void stereo_dft_enc_compute_itd(
                tmpf1 += EPSILON;
                tmpf2 = tmpf1;
                tmpf1 = powf( tmpf1, alpha );
#ifdef ITD_WINNER_GAIN_MODIFY
                if ( hStereoDft->mus_flag == 0 )
                {
                    tmpf4 = 1.0f;
                    /* Calculate smoothed spectral power density for the L/R channel */
                    hStereoDft->spd_L_smooth_new[i] = ( 1.f - sfm_L ) * hStereoDft->spd_L_smooth_new[i] + sfm_L * Spd_L[i];
                    hStereoDft->spd_R_smooth_new[i] = ( 1.f - sfm_L ) * hStereoDft->spd_R_smooth_new[i] + sfm_L * Spd_R[i];
                    /* Calculate cross spectral power density using the smoothed spectral power density*/
                    tmpf4 = (float) sqrt( hStereoDft->spd_L_smooth_new[i] ) * sqrt( hStereoDft->spd_R_smooth_new[i] );
                    /* Calculate the value of weighted function for each frequency bin */
                    tmpf4 += EPSILON;
                    if ( hStereoDft->noise_coherence > 0.25f )
                    {
                        tmpf4 = tmpf1 * pow( tmpf2, 2.0f ) * pow( tmpf4, -2.0f );
                        tmpf3 += tmpf2 * tmpf1;
                        tmpf3 -= tmpf2 * tmpf4;
                    }
                    else
                    {
                        tmpf4 = tmpf2 * pow( tmpf4, -2.0f );
                    }
                    /* Calculate the value of weighted generlized cross-correlation function for each frequency bin */
                    xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf4;
                    xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf4;
                }
                else
                {
                    tmpf3 += tmpf2 * tmpf1;
                    xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1;
                    xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1;
                }
#else
                tmpf3 += tmpf2 * tmpf1;
                xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1;
                xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1;
#endif
            }
        }
    }
@@ -1242,70 +1001,16 @@ void stereo_dft_enc_compute_itd(
            tmpf1 += EPSILON;
            tmpf2 = tmpf1;
            tmpf1 = powf( tmpf1, alpha );
#ifdef ITD_WINNER_GAIN_MODIFY
            if ( hStereoDft->mus_flag == 0 )
            {
                tmpf4 = 1.0f;
                /* Calculate smoothed spectral power density for the L/R channel */
                hStereoDft->spd_L_smooth_new[i] = ( 1.f - sfm_L ) * hStereoDft->spd_L_smooth_new[i] + sfm_L * Spd_L[i];
                hStereoDft->spd_R_smooth_new[i] = ( 1.f - sfm_L ) * hStereoDft->spd_R_smooth_new[i] + sfm_L * Spd_R[i];
                /* Calculate cross spectral power density using the smoothed spectral power density*/
                tmpf4 = (float) sqrt( hStereoDft->spd_L_smooth_new[i] ) * sqrt( hStereoDft->spd_R_smooth_new[i] );
                /* Calculate the value of weighted function for each frequency bin */
                tmpf4 += EPSILON;
                if ( hStereoDft->noise_coherence > 0.25f )
                {
                    tmpf4 = tmpf1 * pow( tmpf2, 2.0f ) * pow( tmpf4, -2.0f );
                    tmpf3 += tmpf2 * tmpf1;
                    tmpf3 -= tmpf2 * tmpf4;
                }
                else
                {
                    tmpf4 = tmpf2 * pow( tmpf4, -2.0f );
                }
                /* Calculate the value of weighted generlized cross-correlation function for each frequency bin */
                xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf4;
                xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf4;
            }
            else
            {
            tmpf3 += tmpf2 * tmpf1;
            xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1;
            xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1;
        }
#else
            tmpf3 += tmpf2 * tmpf1;
            xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1;
            xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1;
#endif
        }
    }
#ifdef ITD_WINNER_GAIN_MODIFY
    if ( hStereoDft->mus_flag == 0 )
    {
        if ( hStereoDft->noise_coherence > 0.25f )
        {
            tmpf1 = (float) ( NFFT / 2 + 1 ) / tmpf3;
            for ( i = 0; i < NFFT; i++ )
            {
                xcorr[i] *= tmpf1;
            }
    }
        else
        {
            for ( i = NFFT / 2; i < NFFT; i++ )
            {
                xcorr[i] = 0.0f;
            }
        }
    }
#else
    tmpf1 = (float) ( NFFT / 2 + 1 ) / tmpf3;
    for ( i = 0; i < NFFT; i++ )
    {
        xcorr[i] *= tmpf1;
    }
#endif
    /*calculate mean E ratio of main to background signal for cohSNR*/
    if ( hCPE->element_mode == IVAS_CPE_DFT )
    {