Commit bbe38f4f authored by sekine's avatar sekine
Browse files

Merge branch 'NTT/ROM_and_ITD_202208' into 'main'

Contribution: Reduction of ROM size and Update of ITD switch for stereo downmix for EVS [Non-BE]

See merge request !98
parents abea63b8 10372244
Loading
Loading
Loading
Loading
Loading

lib_com/options.h

100755 → 100644
+5 −0
Original line number Diff line number Diff line
@@ -154,6 +154,11 @@

#define FIX_I87                                         /*  fix for issue 86: incorrect Ambisonics order set for head rotation in SBA */
#define SBA_ORDER_BITSTREAM                             /* issue 76: Use input sba order for bitstream coding */

/* NTT switches */
#define NTT_UPDATE_ITD_SW                               /* contribution 4: Update of ITD switch in stereo downmix for EVS */
#define NTT_REMOVE_EPS_ROM                              /* contribution 4: Reduction of ROM size in stereo downmix for EVS */

/* ################## End DEVELOPMENT switches ######################### */
/* clang-format on */
#endif
+2 −0
Original line number Diff line number Diff line
@@ -534,6 +534,7 @@ const float ari_bit_estimate_s17_LC[RANGE_N_CONTEXT][RANGE_N_SYMBOLS] =
 * Stereo downmix to EVS ROM tables
 *----------------------------------------------------------------------------------*/

#ifndef NTT_REMOVE_EPS_ROM
const float Stereo_dmx_s_wnd_coef_eps_16k[L_FRAME16k * 3 / 4] = {
    0.00000000f, 0.000385506690f, 0.000770864717f, 0.00115592557f, 0.00154054083f, 0.00192456215f, 0.00230784155f, 0.00269023119f, 0.00307158381f, 0.00345175178f,
    0.00383058959f, 0.00420795102f, 0.00458368938f, 0.00495766103f, 0.00532972161f, 0.00569972629f, 0.00606753491f, 0.00643300405f, 0.00679599261f, 0.00715636183f,
@@ -686,6 +687,7 @@ const float Stereo_dmx_s_wnd_coef_eps_48k[L_FRAME48k * 3 / 4] = {
    -0.00648899190f, -0.00649444433f, -0.00649961829f, -0.00650451379f, -0.00650913082f, -0.00651346892f, -0.00651752809f, -0.00652130833f, -0.00652480870f, -0.00652803015f,
    -0.00653097173f, -0.00653363345f, -0.00653601484f, -0.00653811684f, -0.00653993897f, -0.00654148031f, -0.00654274225f, -0.00654372340f, -0.00654442422f, -0.00654484471f
};
#endif

const float Stereo_dmx_s_wnd_coef_16k[L_FRAME16k >> 4] = {
    0.00154133327f, 0.0138150426f, 0.0380602330f, 0.0736799166f, 0.119797014f, 0.175276011f, 0.238750681f, 0.308658302f, 0.383277327f, 0.460770488f,
+2 −0
Original line number Diff line number Diff line
@@ -120,9 +120,11 @@ extern const uint16_t ECSQ_tab_vals[ECSQ_PARAM_COUNT - 1][1 + ECSQ_TAB_VALS_SIZE
 * Stereo downmix to EVS ROM tables
 *----------------------------------------------------------------------------------*/

#ifndef NTT_REMOVE_EPS_ROM
extern const float Stereo_dmx_s_wnd_coef_eps_16k[L_FRAME16k * 3 / 4];
extern const float Stereo_dmx_s_wnd_coef_eps_32k[L_FRAME32k * 3 / 4];
extern const float Stereo_dmx_s_wnd_coef_eps_48k[L_FRAME48k * 3 / 4];
#endif
extern const float Stereo_dmx_s_wnd_coef_16k[L_FRAME16k >> 4];
extern const float Stereo_dmx_s_wnd_coef_32k[L_FRAME32k >> 4];
extern const float Stereo_dmx_s_wnd_coef_48k[L_FRAME48k >> 4];
+133 −11
Original line number Diff line number Diff line
@@ -63,6 +63,8 @@
#define STEREO_DMX_EVS_DMX_EGY_FORGETTING 0.25f
#define STEREO_DMX_EVS_CORR_FORGETTING    0.78f

#define Q_BAND 0.25f

/*-----------------------------------------------------------------------*
 * Local function prototypes
 *-----------------------------------------------------------------------*/
@@ -150,26 +152,34 @@ static void calc_poc(
{
    int16_t i, n1, n2;
    int16_t n0, *itdLR;
    const float *c, *s;
#ifndef NTT_REMOVE_EPS_ROM
	const float *c;
#endif
	const float *s;
    float *P;
    float tmp1, tmp2, Lr, Li, Rr, Ri, gamma, igamma, iN;
    float specPOr[L_FRAME48k], specPOi[L_FRAME48k];
    float tmpPOC1[L_FRAME48k], tmpPOC2[L_FRAME48k];
    float rfft_buf[L_FRAME48k];
    int16_t step, bias;
#ifdef NTT_REMOVE_EPS_ROM
    int16_t i_for;
    int16_t cos_step, cos_max;
    float eps_cos, eps_sin, EPS;
#endif

    /* Initialization */
    iN = 1.0f / (float) input_frame;

#ifndef NTT_REMOVE_EPS_ROM
    c = hPOC->sin + ( input_frame >> 2 );
#endif
    s = hPOC->sin;
    P = hPOC->P;
    n0 = input_frame / 2;
    itdLR = hPOC->itdLR;
    igamma = STEREO_DMX_EVS_POC_GAMMA * iN;
    gamma = 1.0f - igamma;
    set_zero( tmpPOC1, L_FRAME48k );
    set_zero( tmpPOC2, L_FRAME48k );

    if ( input_frame == L_FRAME16k )
    {
@@ -185,6 +195,65 @@ static void calc_poc(
    specPOr[0] = sign( specLr[0] ) * sign( specRr[0] ) * wnd[bias];
    specPOi[0] = 0.0f;

#ifdef NTT_REMOVE_EPS_ROM
    EPS = hPOC->eps;

    if ( input_frame == L_FRAME16k )
    {
        cos_step = 4;
        cos_max = input_frame;
    }
    else /* for 32 kHz & 48 kHz */
    {
        cos_step = 2;
        cos_max = n0;
    }

    for ( i = 1; i < n0 / 2; i++ )
    {
        Lr = specLr[i];
        Li = specLi[i];
        Rr = specRr[i];
        Ri = specRi[i];
        i_for = i * cos_step;
        eps_cos = s[cos_max - i_for] * EPS;
        eps_sin = s[i_for] * EPS;
        Lr += ( specRr[i] * eps_cos + specRi[i] * eps_sin );
        Li += ( -specRr[i] * eps_sin + specRi[i] * eps_cos );
        Rr += ( specLr[i] * eps_cos + specLi[i] * eps_sin );
        Ri += ( -specLr[i] * eps_sin + specLi[i] * eps_cos );
        tmp1 = wnd[i * step + bias] * gamma / ( sqrtf( ( ( Lr * Lr + Li * Li ) ) * ( ( Rr * Rr + Ri * Ri ) ) ) + EPS );

        specPOr[i] = ( Lr * Rr + Li * Ri ) * tmp1;
        specPOi[i] = ( Lr * Ri - Li * Rr ) * tmp1;

        gamma -= igamma;
    }

    for ( i = n0 >> 1; i < n0; i++ )
    {
        Lr = specLr[i];
        Li = specLi[i];
        Rr = specRr[i];
        Ri = specRi[i];

        i_for = ( n0 - i ) * cos_step;
        eps_cos = s[cos_max - i_for] * EPS;
        eps_sin = s[i_for] * EPS;

        Lr += ( -specRr[i] * eps_cos + specRi[i] * eps_sin );
        Li += ( -specRr[i] * eps_sin - specRi[i] * eps_cos );
        Rr += ( -specLr[i] * eps_cos + specLi[i] * eps_sin );
        Ri += ( -specLr[i] * eps_sin - specLi[i] * eps_cos );

        tmp1 = wnd[i * step + bias] * gamma / ( sqrtf( ( ( Lr * Lr + Li * Li ) ) * ( ( Rr * Rr + Ri * Ri ) ) ) + EPS );

        specPOr[i] = ( Lr * Rr + Li * Ri ) * tmp1;
        specPOi[i] = ( Lr * Ri - Li * Rr ) * tmp1;
        gamma -= igamma;
    }
    //end NTT_REMOVE_EPS_ROM
#else
    for ( i = 1; i < n0; i++ )
    {
        Lr = specLr[i];
@@ -204,6 +273,8 @@ static void calc_poc(

        gamma -= igamma;
    }
#endif //end !NTT_REMOVE_EPS_ROM

    specPOr[n0] = sign( specLr[i] ) * sign( specRr[i] ) * wnd[i * step + bias] * gamma;

    rfft_buf[0] = specPOr[0];
@@ -305,7 +376,7 @@ static float find_poc_peak(
    itd_cand[0] = itd_cand[1] = 0;
    P = hPOC->P;

    for ( i = 1; i < hPOC->shift_limit; i++ )
    for ( i = 1; i < hPOC->shift_limit; i++ ) /*find peaks of POC P[] with positive and negative ITD */
    {
        if ( P[Lh - i] > Q[0] )
        {
@@ -340,7 +411,7 @@ static float find_poc_peak(
        Q[n] = ( 1.0f - ( cQ[n] / ( peak_range * 2 + 1 ) + eps2 ) / ( Q[n] + eps2 ) );
        Q[n] = max( Q[n], 0.0f );

        if ( on[n] )
        if ( on[n] ) /*if channel n was active (likely to be preceding) in the previous frame*/
        {
            tmpf = ( 0.3f - 0.2f * (float) abs( itd_cand[n] ) / (float) hPOC->shift_limit ) * peakQ[n];
            if ( Q[n] < tmpf )
@@ -357,7 +428,7 @@ static float find_poc_peak(

            peakQ[n] = max( peakQ[n], Q[n] );
        }
        else
        else /*if channel n was not active (not likely to be preceding) in the previous frame*/
        {
            tmpf = ( 0.75f - 0.2f * (float) abs( itd_cand[n] ) / (float) hPOC->shift_limit );

@@ -374,6 +445,7 @@ static float find_poc_peak(
        }
    }

#ifndef NTT_UPDATE_ITD_SW
    if ( on[0] && prev_off[0] )
    {
        *itd = (float) itdLR[0];
@@ -386,8 +458,38 @@ static float find_poc_peak(
    {
        *itd = ( *itd > 0 ) ? (float) itdLR[0] : (float) itdLR[1];
    }
#else
    if ( ( on[0] && prev_off[0] ) && ( on[1] && prev_off[1] ) ) /*if both channels have newly detected as active (possibility of preceding), select channel by peakness Q[] of POC */
    {
        *itd = ( Q[0] > Q[1] ) ? (float) itdLR[0] : (float) itdLR[1];
    }
    else if ( ( on[0] && prev_off[0] ) && ( Q[0] > ( Q[1] - 0.1 ) ) ) /* if channel 0 becomes active, select channel 0*/
    {
        *itd = (float) itdLR[0];
    }
    else if ( ( on[1] && prev_off[1] ) && ( Q[1] > ( Q[0] - 0.1 ) ) ) /*if channel 1 becomes active, selsect channel 1*/
    {
        *itd = (float) itdLR[1];
    }
    else if ( Q[0] > ( Q[1] + Q_BAND ) ) /* if no status change, use Q[]*/
    {
        *itd = (float) itdLR[0];
    }
    else if ( Q[1] > ( Q[0] + Q_BAND ) ) /* if no status change, use Q[]*/
    {
        *itd = (float) itdLR[1];
    }
    else if ( *itd == 0.0 ) /*if no channels are likely to be preceding, follow the status of the previous frame*/
    {
        *itd = 0;
    }
    else /*follow the status of the previous frame*/
    {
        *itd = ( *itd > 0 ) ? (float) itdLR[0] : (float) itdLR[1];
    }
#endif

    cconfidence = sqrtf( fabsf( Q[0] - Q[1] ) );
    cconfidence = sqrtf( fabsf( Q[0] - Q[1] ) ); /*higher value indicates higher confidence for one preceding channel*/

    return hPOC->confidence = hPOC->confidence * STEREO_DMX_EVS_CORR_FORGETTING + cconfidence * ( 1.0f - STEREO_DMX_EVS_CORR_FORGETTING );
}
@@ -766,6 +868,24 @@ ivas_error stereo_dmx_evs_init_encoder(
    }
    hStereoDmxEVS->hPOC->eps = 2.0f * EVS_PI / ( (float) input_frame );

#ifdef NTT_REMOVE_EPS_ROM
    if ( input_frame == L_FRAME16k )
    {
        hStereoDmxEVS->hPOC->sin = dft_trigo_32k;
    }
    else if ( input_frame == L_FRAME32k )
    {
        hStereoDmxEVS->hPOC->sin = dft_trigo_32k;
    }
    else if ( input_frame == L_FRAME48k )
    {
        hStereoDmxEVS->hPOC->sin = dft_trigo_48k;
    }
    else
    {
        return IVAS_ERROR( IVAS_ERR_INTERNAL_FATAL, "invalid frame length\n" );
    }
#else
    if ( input_frame == L_FRAME16k )
    {
        hStereoDmxEVS->hPOC->sin = Stereo_dmx_s_wnd_coef_eps_16k;
@@ -782,6 +902,8 @@ ivas_error stereo_dmx_evs_init_encoder(
    {
        return IVAS_ERROR( IVAS_ERR_INTERNAL_FATAL, "invalid frame length\n" );
    }
#endif

    hStereoDmxEVS->hPOC->confidence = 0.0f;

    *hStereoDmxEVS_out = hStereoDmxEVS;