Commit 3eff4828 authored by Stephane Ragot's avatar Stephane Ragot
Browse files

algorithmic tuning

parent dc4ed53f
Loading
Loading
Loading
Loading
+4 −1
Original line number Diff line number Diff line
@@ -995,9 +995,12 @@ typedef struct stereo_dmx_evs_correlation_filter_structure
    float fad_g_prc[L_FRAME48k];
    int16_t fad_len_prc;

    float dmx_ener;
    float dmx_pha_ener;
    float dmx_poc_ener;
    float dmx_old_gain;

    float aux_energy[CPE_CHANNELS];

} STEREO_DMX_EVS_PHA_DATA, *STEREO_DMX_EVS_PHA_HANDLE;

#endif
+135 −109
Original line number Diff line number Diff line
@@ -67,12 +67,12 @@

#ifdef ENHANCED_STEREO_DMX

#define STEREO_DMX_EVS_PHA_LEN_16 15.0f
#define STEREO_DMX_EVS_FAD_LEN_16 2.0f
#define STEREO_DMX_EVS_PHA_LEN_32 5.0f
#define STEREO_DMX_EVS_FAD_LEN_32 1.0f
#define STEREO_DMX_EVS_PHA_LEN_48 5.0f
#define STEREO_DMX_EVS_FAD_LEN_48 0.5f
#define STEREO_DMX_EVS_PHA_LEN_16 240 /* 15 */
#define STEREO_DMX_EVS_FAD_LEN_16 160 /* 10 */
#define STEREO_DMX_EVS_PHA_LEN_32 160 /*  5 */
#define STEREO_DMX_EVS_FAD_LEN_32 320 /* 10 */
#define STEREO_DMX_EVS_PHA_LEN_48 240 /*  5 */
#define STEREO_DMX_EVS_FAD_LEN_48 240 /*  5 */

#define STEREO_DMX_EVS_ISD_THRES       1.3f
#define STEREO_DMX_EVS_ISD_DIST_THRES  0.42f
@@ -82,6 +82,11 @@
#define STEREO_DMX_EVS_SWTCH_PRC_HYS_THRES 1
#define STEREO_DMX_EVS_FADE_LEN_PRC        20.0f

#define STEREO_DMX_EVS_NB_SBFRM       5
#define STEREO_DMX_EVS_TRNS_DTC_INST  30.0f
#define STEREO_DMX_EVS_CRST_FCTR      25.0f
#define STEREO_DMX_EVS_EGY_FORGETTING 0.75f

#endif

/*-----------------------------------------------------------------------*
@@ -455,9 +460,6 @@ static void calc_poc(
        hPHA->prev_pha = STEREO_DMX_EVS_PHA_IPD2;
    }

    if (hPHA->curr_pha != STEREO_DMX_EVS_NO_PHA)
    {

        if (hPHA->curr_pha == STEREO_DMX_EVS_PHA_IPD)
        {
            ipd_ff = hPHA->ipd_ff;
@@ -543,12 +545,11 @@ static void calc_poc(
            p_curr_taps_l2r = hPHA->p_curr_taps[0];

            p_curr_taps_l2r[0] = p_curr_taps[0];
            for ( i = 1; i < input_frame; i++ )
        for ( i = 1; i < hPHA->pha_len; i++ )
            {
                p_curr_taps_l2r[i] = p_curr_taps[input_frame-i];
            }
        }
    }

    for ( n = 0; n < CPE_CHANNELS; n++ )
    {
@@ -564,10 +565,10 @@ static void calc_poc(
            {
                energy += hPHA->p_curr_taps[n][i] *hPHA->p_curr_taps[n][i];
            }
            energy = sqrtf( energy );
            energy = 1.0/sqrtf( energy );
            for ( i = 0; i < hPHA->pha_len; i++ )
            {
                hPHA->p_curr_taps[n][i] /= energy;
                hPHA->p_curr_taps[n][i] *= energy;
            }
        }
    }
@@ -1040,10 +1041,12 @@ void stereo_dmx_evs_enc(

#ifdef ENHANCED_STEREO_DMX
    int16_t k, m, pha_len, fad_len;
    float mem_out_curr[CPE_CHANNELS][L_FRAME48k], mem_out_last[L_FRAME48k];
    float *p_data_mem, *p_prev_taps, *p_curr_taps, *fad_g, *p_mem_out_curr, *p_data_f;
    float dmx_poc_data[L_FRAME48k], dmx_pha_data[L_FRAME48k], *p_dmx_data, dmx_gain;
    float mem_prev[L_FRAME48k];
    float *p_data_mem, *p_prev_taps, *p_curr_taps, *fad_g, *p_data;
    float dmx_poc_data[L_FRAME48k], dmx_pha_data[L_FRAME48k], *p_dmx_data, dmx_gain, ftmp;
    STEREO_DMX_EVS_PRC curr_prc;
    int16_t is_transient, input_subframe;
    float *p_sub_frame, subframe_energy[STEREO_DMX_EVS_NB_SBFRM];
#else
    float dmx_data[L_FRAME48k];
#endif
@@ -1067,110 +1070,86 @@ void stereo_dmx_evs_enc(
    
    estimate_itd( &corr, hStereoDmxEVS->hPOC, hStereoDmxEVS->hPHA, data_f[0], data_f[1], &hStereoDmxEVS->itd, input_frame );

    // poc

    if ( hStereoDmxEVS->itd )
    {
        dmx_weight = ( ( hStereoDmxEVS->itd > 0 ) ? ( -1 ) : 1 ) * 0.5f * corr + 0.5f;
    }
    else
    {
        dmx_weight = 0.5f;
    }

    create_M_signal( data_f[0], data_f[1], dmx_poc_data, dmx_weight, input_frame, hStereoDmxEVS->s_wnd,
                     hStereoDmxEVS->dmx_weight, hStereoDmxEVS->pre_dmx_energy, hStereoDmxEVS->aux_dmx_energy );

    // pha

    pha_len = hStereoDmxEVS->hPHA->pha_len;
    fad_len = hStereoDmxEVS->hPHA->fad_len;
    fad_g = hStereoDmxEVS->hPHA->fad_g;

    set_zero(dmx_pha_data, input_frame);
    set_zero(mem_prev, fad_len);

    for ( k = 0; k < CPE_CHANNELS; k++ )
    {
        p_prev_taps = hStereoDmxEVS->hPHA->p_prev_taps[k];
        p_curr_taps = hStereoDmxEVS->hPHA->p_curr_taps[k];
        p_mem_out_curr = mem_out_curr[k];
        p_data_f = data_f[k];

        p_data = data_f[k];
        p_data_mem = hStereoDmxEVS->hPHA->data_mem[k];
        mvr2r( &( p_data_mem[input_frame] ), p_data_mem, pha_len );
        p_data_mem = &( p_data_mem[pha_len] );
        mvr2r( p_data_f, p_data_mem, input_frame );
        mvr2r( p_data, p_data_mem, input_frame );

        p_prev_taps = hStereoDmxEVS->hPHA->p_prev_taps[k];
        if (p_prev_taps)
        {
            for (n = 0; n < fad_len; n++)
            {
                mem_out_last[n] = 0;
                for ( m = 0; m < pha_len; m++ )
                {
                    mem_out_last[n] += p_data_mem[n - m] * p_prev_taps[m];
                for (ftmp = 0, m = 0; m < pha_len; m++) {
                    ftmp += p_data_mem[n - m] * p_prev_taps[m];
                }
                mem_out_last[n] *= fad_g[fad_len - (1+n)];
                mem_prev[n] += ftmp;
            }
        }

        if (p_curr_taps)
        {
            if (p_prev_taps == NULL)
        else
            {
                for (n = 0; n < fad_len; n++)
                {
                    mem_out_last[n] = fad_g[fad_len - (1+n)] * p_data_f[n];
                mem_prev[n] += p_data[n];
                }
            }

            for (n = 0; n < fad_len; n++)
            {
                p_mem_out_curr[n] = 0;
                for ( m = 0; m < pha_len; m++ )
                {
                    p_mem_out_curr[n] += p_data_mem[n - m] * p_curr_taps[m];
                }
                p_mem_out_curr[n] = fad_g[n] * p_mem_out_curr[n] + mem_out_last[n];
            }
            for (; n < input_frame; n++)
        p_curr_taps = hStereoDmxEVS->hPHA->p_curr_taps[k];
        if (p_curr_taps) {
            for (n = 0; n < input_frame; n++)
            {
                p_mem_out_curr[n] = 0;
                for (m = 0; m < pha_len; m++)
                for (ftmp = 0, m = 0; m < pha_len; m++)
                {
                    p_mem_out_curr[n] += p_data_mem[n - m] * p_curr_taps[m];
                    ftmp += p_data_mem[n - m] * p_curr_taps[m];
                }
                dmx_pha_data[n] += ftmp;
            }
        }
        else
        {
            if (p_prev_taps)
            {
                for (n = 0; n < fad_len; n++)
                {
                    p_mem_out_curr[n] = fad_g[n] * p_data_f[n] + mem_out_last[n];
                } 
                mvr2r( &(p_data_f[fad_len]), &(p_mem_out_curr[fad_len]), input_frame-fad_len );
            }
            else
            for (n = 0; n < input_frame; n++)
                {
                mvr2r( p_data_f, p_mem_out_curr, input_frame );
                dmx_pha_data[n] += p_data[n];
    }
    }
    }

    // poc

    if ( hStereoDmxEVS->itd )
    for ( n=0, m=(fad_len-1); n < fad_len; n++, m-- )
    {
        dmx_weight = ( ( hStereoDmxEVS->itd > 0 ) ? ( -1 ) : 1 ) * 0.5f * corr + 0.5f;
    }
    else
    {
        dmx_weight = 0.5f;
        dmx_pha_data[n] *= fad_g[n];
        dmx_pha_data[n] += (mem_prev[n]) * fad_g[m];
    }

    create_M_signal( data_f[0], data_f[1], dmx_poc_data, dmx_weight, input_frame, hStereoDmxEVS->s_wnd,
                     hStereoDmxEVS->dmx_weight, hStereoDmxEVS->pre_dmx_energy, hStereoDmxEVS->aux_dmx_energy );

    // pha

    for ( n=0; n < input_frame; n++ )
    {
        dmx_pha_data[n] = (mem_out_curr[0][n] + mem_out_curr[1][n])*0.5f;
    }

    /*calc_energy( dmx_pha_data, dmx_pha_data, &(hStereoDmxEVS->hPHA->dmx_ener), input_frame, STEREO_DMX_EVS_DMX_EGY_FORGETTING );
    dmx_gain = sqrtf((hStereoDmxEVS->aux_dmx_energy[0] + hStereoDmxEVS->aux_dmx_energy[1]) / (hStereoDmxEVS->hPHA->dmx_ener));
    *//*if (hStereoDmxEVS->aux_dmx_energy[0] > hStereoDmxEVS->aux_dmx_energy[1]) {
        dmx_gain = sqrtf( ( hStereoDmxEVS->aux_dmx_energy[0] + EPSILON ) / ( hStereoDmxEVS->hPHA->dmx_ener + EPSILON ) );
    } else {
        dmx_gain = sqrtf( ( hStereoDmxEVS->aux_dmx_energy[1] + EPSILON ) / ( hStereoDmxEVS->hPHA->dmx_ener + EPSILON ) );
    }*//*
    calc_energy( dmx_pha_data, dmx_pha_data, &(hStereoDmxEVS->hPHA->dmx_pha_ener), input_frame, STEREO_DMX_EVS_DMX_EGY_FORGETTING );
    dmx_gain = INV_SQRT_2 * sqrtf((hStereoDmxEVS->aux_dmx_energy[0] + hStereoDmxEVS->aux_dmx_energy[1]) / (hStereoDmxEVS->hPHA->dmx_pha_ener));
    adapt_gain(dmx_pha_data, dmx_pha_data, dmx_gain, hStereoDmxEVS->hPHA->dmx_old_gain, input_frame, hStereoDmxEVS->s_wnd);
    hStereoDmxEVS->hPHA->dmx_old_gain = dmx_gain;*/
    hStereoDmxEVS->hPHA->dmx_old_gain = dmx_gain;

    // prc switch

@@ -1216,8 +1195,43 @@ void stereo_dmx_evs_enc(
        hStereoDmxEVS->hPHA->prev_prc = STEREO_DMX_EVS_PRC_PHA;
    }
 
    if ( hStereoDmxEVS->hPHA->curr_prc == STEREO_DMX_EVS_PRC_POC )
    input_subframe = input_frame / STEREO_DMX_EVS_NB_SBFRM;
    is_transient = 0;
    for ( k = 0; k < CPE_CHANNELS; k++ )
    {     
        for (m = 0; m < STEREO_DMX_EVS_NB_SBFRM; m++)
        {
            p_sub_frame = &(data_f[k][m * input_subframe]);
            subframe_energy[m] = 0;
            for ( n=0 ; n < input_subframe; n++ )
            {
                subframe_energy[m] += p_sub_frame[n] * p_sub_frame[n];
            }

            if (subframe_energy[m] / (hStereoDmxEVS->hPHA->aux_energy[k] + EPSILON) > STEREO_DMX_EVS_CRST_FCTR)
            {
                is_transient = 1;
            }

            hStereoDmxEVS->hPHA->aux_energy[k] = STEREO_DMX_EVS_EGY_FORGETTING * hStereoDmxEVS->hPHA->aux_energy[k] + (1.0f - STEREO_DMX_EVS_EGY_FORGETTING) * subframe_energy[m];
        }

        for (m = 1; m < STEREO_DMX_EVS_NB_SBFRM; m++)
    {
            if (subframe_energy[m]/(subframe_energy[m-1] + EPSILON) > STEREO_DMX_EVS_TRNS_DTC_INST)
            {
                is_transient = 1;
            }
        }
    }

    if (is_transient == 1)
    {
        hStereoDmxEVS->hPHA->curr_prc = STEREO_DMX_EVS_PRC_POC;
        hStereoDmxEVS->hPHA->prc_hys_cnt = 0;
    }

    if (hStereoDmxEVS->hPHA->curr_prc == STEREO_DMX_EVS_PRC_POC) {
        p_dmx_data = dmx_poc_data;

        if (curr_prc != hStereoDmxEVS->hPHA->curr_prc)
@@ -1225,10 +1239,10 @@ void stereo_dmx_evs_enc(
            fad_len = hStereoDmxEVS->hPHA->fad_len_prc;
            fad_g = hStereoDmxEVS->hPHA->fad_g_prc;

            for (n = 0; n < fad_len; n++)
            for (n = 0, m=(fad_len-1); n < fad_len; n++, m--)
            {
                p_dmx_data[n] *= fad_g[n];
                p_dmx_data[n] += (1.0f-fad_g[n]) * dmx_pha_data[n];
                p_dmx_data[n] += fad_g[m] * dmx_pha_data[n];
            }
        }
    }
@@ -1241,10 +1255,10 @@ void stereo_dmx_evs_enc(
            fad_len = hStereoDmxEVS->hPHA->fad_len_prc;
            fad_g = hStereoDmxEVS->hPHA->fad_g_prc;

            for (n = 0; n < fad_len; n++)
            for (n = 0, m=(fad_len-1); n < fad_len; n++, m--)
            {
                p_dmx_data[n] *= fad_g[n];
                p_dmx_data[n] += (1.0f-fad_g[n]) * dmx_poc_data[n];
                p_dmx_data[n] += fad_g[m] * dmx_poc_data[n];
            }
        }
    }
@@ -1290,8 +1304,8 @@ ivas_error stereo_dmx_evs_init_encoder(
    int16_t n, input_frame;

#ifdef ENHANCED_STEREO_DMX
    int16_t f_len, pha_len, fad_len;
    float *win, *fad_g;
    int16_t m, f_len, pha_len, fad_len, fad_len2, trans_len;
    float *win, *fad_g, fad_r, tmp_r;

    float a_min, a_max, a_step, n0, itrh;
    float *ipd_ff;
@@ -1406,18 +1420,18 @@ ivas_error stereo_dmx_evs_init_encoder(

    if ( input_Fs == 16000 )
    {
        f_len = (int16_t)(STEREO_DMX_EVS_PHA_LEN_16 * (float)input_Fs / 1000.0f);
        hStereoDmxEVS->hPHA->fad_len = (int16_t)(STEREO_DMX_EVS_FAD_LEN_16 * (float)input_Fs / 1000.0f);
        f_len = STEREO_DMX_EVS_PHA_LEN_16;
        hStereoDmxEVS->hPHA->fad_len = STEREO_DMX_EVS_FAD_LEN_16;
    }
    else if ( input_Fs == 32000 )
    {
        f_len = (int16_t)(STEREO_DMX_EVS_PHA_LEN_32 * (float)input_Fs / 1000.0f);
        hStereoDmxEVS->hPHA->fad_len = (int16_t)(STEREO_DMX_EVS_FAD_LEN_32 * (float)input_Fs / 1000.0f);
        f_len = STEREO_DMX_EVS_PHA_LEN_32;
        hStereoDmxEVS->hPHA->fad_len = STEREO_DMX_EVS_FAD_LEN_32;
    }
    else if ( input_Fs == 48000 )
    {
        f_len = (int16_t)(STEREO_DMX_EVS_PHA_LEN_48 * (float)input_Fs / 1000.0f);
        hStereoDmxEVS->hPHA->fad_len = (int16_t)(STEREO_DMX_EVS_FAD_LEN_48 * (float)input_Fs / 1000.0f);
        f_len = STEREO_DMX_EVS_PHA_LEN_48;
        hStereoDmxEVS->hPHA->fad_len = STEREO_DMX_EVS_FAD_LEN_48;
    }
    else
    {
@@ -1429,21 +1443,25 @@ ivas_error stereo_dmx_evs_init_encoder(
    pha_len = hStereoDmxEVS->hPHA->pha_len;
    fad_len = hStereoDmxEVS->hPHA->fad_len;

    win = hStereoDmxEVS->hPHA->win;
    set_zero( win, L_FRAME48k );
    for ( n = 0; n < pha_len; n++ )
    {
        win[n] = 0.5f * (1.0f + cosf( ( PI2 * ( n + 1 ) ) / ( ( f_len ) + 1 ) ) );
    trans_len = (int16_t)((float)pha_len / 20.0f);
    set_f(hStereoDmxEVS->hPHA->win, 1.8f, pha_len - trans_len);
    hStereoDmxEVS->hPHA->win[0] = 1.0f;
    tmp_r = 1.0f / ((trans_len * 2) + 1);
    win = &(hStereoDmxEVS->hPHA->win[pha_len - trans_len]);
    for (n = 0; n < trans_len; n++) {
        win[n] = (0.5f * (1.0f + cosf( ( PI2 * ( n + 1 ) ) * tmp_r ) ))*1.8;
    }

    fad_g = hStereoDmxEVS->hPHA->fad_g;
    for ( n = 0; n < fad_len; n++ )
    {
        fad_g[n] = (float) n / (float)fad_len;
    fad_r = 1.0f / (float)(fad_len + 1);
    fad_len2 = fad_len / 2;
    for (n = 0, m=(fad_len-1); n < fad_len2; n++, m--) {
        fad_g[n] = (float)(n+1) * fad_r;
        fad_g[m] = 1.0f - fad_g[n];
    }

    hStereoDmxEVS->hPHA->curr_pha = STEREO_DMX_EVS_NO_PHA;
    hStereoDmxEVS->hPHA->prev_pha = STEREO_DMX_EVS_NO_PHA;
    hStereoDmxEVS->hPHA->curr_pha = STEREO_DMX_EVS_PHA_IPD;
    hStereoDmxEVS->hPHA->prev_pha = STEREO_DMX_EVS_PHA_IPD;
    hStereoDmxEVS->hPHA->pha_hys_cnt = 0;

    // Compute the forgetting factor
@@ -1477,13 +1495,21 @@ ivas_error stereo_dmx_evs_init_encoder(
    hStereoDmxEVS->hPHA->fad_len_prc = (int16_t)(STEREO_DMX_EVS_FADE_LEN_PRC * (float)input_Fs / 1000.0);
    fad_len = hStereoDmxEVS->hPHA->fad_len_prc;
    fad_g = hStereoDmxEVS->hPHA->fad_g_prc;
    for ( n = 0; n < fad_len; n++ )
    {
        fad_g[n] = (float) n / (float)fad_len;
    fad_r = 1.0f / (float)(fad_len + 1);
    fad_len2 = fad_len / 2;
    for (n = 0, m=(fad_len-1); n < fad_len2; n++, m--) {
        fad_g[n] = (float)(n+1) * fad_r;
        fad_g[m] = 1.0f - fad_g[n];
    }

    hStereoDmxEVS->hPHA->dmx_ener = 0;
    hStereoDmxEVS->hPHA->dmx_old_gain = 0;
    hStereoDmxEVS->hPHA->dmx_pha_ener = 0;
    hStereoDmxEVS->hPHA->dmx_poc_ener = 0;
    hStereoDmxEVS->hPHA->dmx_old_gain = 1.;

    for ( n = 0; n < CPE_CHANNELS; n++ )
    {
        hStereoDmxEVS->hPHA->aux_energy[n] = 0.0f;
    }

#endif