Commit b696a87b authored by Tapani Pihlajakuja's avatar Tapani Pihlajakuja
Browse files

Fix rest of issue 355 by refactoring parametric binauralizer to 5ms subframe resolution.

parent 4a2c526e
Loading
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -864,6 +864,9 @@ typedef enum {
// VE: this should be renamed to e.g. N_SPATIAL_SUBFRAMES
#define MAX_PARAM_SPATIAL_SUBFRAMES             4                           /* Maximum number of subframes for parameteric spatial coding */
#define L_SPATIAL_SUBFR_48k                     (L_FRAME48k / MAX_PARAM_SPATIAL_SUBFRAMES)
#ifdef FIX_355_REFACTOR_PARAMBIN_TO_5MS
#define CLDFB_SLOTS_PER_SUBFRAME                ( CLDFB_NO_COL_MAX / MAX_PARAM_SPATIAL_SUBFRAMES ) /* Number of CLDFB slots per subframe */
#endif


/*----------------------------------------------------------------------------------*
+9 −1
Original line number Diff line number Diff line
@@ -4691,13 +4691,21 @@ void ivas_masa_prerender(
    const int16_t output_frame                                  /* i  : output frame length per channel                 */
);

#ifdef FIX_355_REFACTOR_PARAMBIN_TO_5MS
void ivas_spar_param_to_masa_param_mapping(
    Decoder_Struct *st_ivas,                                       /* i/o: IVAS decoder struct                          */
    float inRe[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], /* i  : Input audio in CLDFB domain, real            */
    float inIm[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], /* i  : Input audio in CLDFB domain, imag            */
    const int16_t subframe                                         /* i  : Subframe to map                              */
);
#else
void ivas_spar_param_to_masa_param_mapping(
    Decoder_Struct *st_ivas,                                    /* i/o: IVAS decoder struct                             */
    float inRe[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX],      /* i  : Input audio in CLDFB domain, real               */
    float inIm[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX],      /* i  : Input audio in CLDFB domain, imag               */
    const int16_t firstSubframe,                                /* i  : First subframe to map                           */
    const int16_t nSubframes                                    /* i  : Number of subframes to map                      */
);
#endif


/*---------------------------------------------------------------------------------*
+2 −0
Original line number Diff line number Diff line
@@ -169,6 +169,8 @@

#define EUALER2QUAT_FIX                                 /*Dlb :fix for issue 430 issue in euler2quat, sign of quat y is inverted*/
#define HR_METADATA                                     /* Nok: encode directional MASA metadata with more bits at 384k and 512k */
#define FIX_355_REFACTOR_PARAMBIN_TO_5MS                /* Nokia: Fixes issue 355 by refactoring parametric binauralizer code to 5 ms mode */

/* ################## End DEVELOPMENT switches ######################### */
/* clang-format on */
#endif
+21 −1
Original line number Diff line number Diff line
@@ -426,6 +426,15 @@ static ivas_error ivas_binaural_hrtf_open(
 *
 *-------------------------------------------------------------------------*/

#ifdef FIX_355_REFACTOR_PARAMBIN_TO_5MS
static void ivas_binaural_obtain_DMX(
    const int16_t numTimeSlots,
    BINAURAL_RENDERER_HANDLE hBinRenderer,                               /* i/o: fastconv binaural renderer handle */
    float RealBuffer[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], /* i  : Contains the LS signals           */
    float ImagBuffer[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], /* i  : Contains the LS signals           */
    float realDMX[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX],
    float imagDMX[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX] )
#else
static void ivas_binaural_obtain_DMX(
    const int16_t numTimeSlots,
    BINAURAL_RENDERER_HANDLE hBinRenderer,                                  /* i/o: fastconv binaural renderer handle */
@@ -433,6 +442,7 @@ static void ivas_binaural_obtain_DMX(
    float ImagBuffer[][MAX_PARAM_SPATIAL_SUBFRAMES][CLDFB_NO_CHANNELS_MAX], /* i  : Contains the LS signals           */
    float realDMX[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX],
    float imagDMX[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX] )
#endif
{
    int16_t chIdx, bandIdx, k;

@@ -1029,11 +1039,17 @@ void ivas_binRenderer(
    /* Obtain the binaural dmx and compute the reverb */
    if ( hBinRenderer->hReverb != NULL )
    {
#ifdef FIX_355_REFACTOR_PARAMBIN_TO_5MS
        float reverbRe[BINAURAL_CHANNELS][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX];
        float reverbIm[BINAURAL_CHANNELS][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX];
        float inRe[BINAURAL_CHANNELS][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX];
        float inIm[BINAURAL_CHANNELS][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX];
#else
        float reverbRe[BINAURAL_CHANNELS][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX];
        float reverbIm[BINAURAL_CHANNELS][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX];
        float inRe[BINAURAL_CHANNELS][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX];
        float inIm[BINAURAL_CHANNELS][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX];

#endif
        ivas_binaural_obtain_DMX( numTimeSlots, hBinRenderer, RealBuffer, ImagBuffer, inRe, inIm );

        for ( chIdx = 0; chIdx < BINAURAL_CHANNELS; chIdx++ )
@@ -1045,7 +1061,11 @@ void ivas_binRenderer(
            }
        }

#ifdef FIX_355_REFACTOR_PARAMBIN_TO_5MS
        ivas_binaural_reverb_processSubframe( hBinRenderer->hReverb, BINAURAL_CHANNELS, inRe, inIm, reverbRe, reverbIm );
#else
        ivas_binaural_reverb_processFrame( hBinRenderer->hReverb, BINAURAL_CHANNELS, inRe, inIm, reverbRe, reverbIm, 0u );
#endif

        /* Add the conv module and reverb module output */
        for ( chIdx = 0; chIdx < BINAURAL_CHANNELS; chIdx++ )
+207 −0
Original line number Diff line number Diff line
@@ -1244,6 +1244,212 @@ ivas_error ivas_masa_dec_reconfigure(
 * Determine MASA metadata from the SPAR metadata
 *-------------------------------------------------------------------*/

#ifdef FIX_355_REFACTOR_PARAMBIN_TO_5MS
void ivas_spar_param_to_masa_param_mapping(
    Decoder_Struct *st_ivas,                                       /* i/o: IVAS decoder struct               */
    float inRe[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], /* i  : Input audio in CLDFB domain, real */
    float inIm[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], /* i  : Input audio in CLDFB domain, imag */
    const int16_t subframe                                         /* i  : Subframe to map                   */
)
{
    int16_t i, j, band, bin, slot, ch, nBins, nchan_transport;
    int16_t mixer_mat_index;
    int16_t dirac_write_idx;
    DIRAC_DEC_HANDLE hDirAC;
    DIFFUSE_DISTRIBUTION_HANDLE hDiffuseDist;
    float mixer_mat_sf_bands_real[MAX_PARAM_SPATIAL_SUBFRAMES][SPAR_DIRAC_SPLIT_START_BAND][FOA_CHANNELS][FOA_CHANNELS];
    float mixer_mat_sf_bins_real[MAX_PARAM_SPATIAL_SUBFRAMES][CLDFB_NO_CHANNELS_MAX][FOA_CHANNELS][FOA_CHANNELS];
    int16_t *band_grouping;
    int16_t band_start, band_end;
    float transportSignalEnergies[2][CLDFB_NO_CHANNELS_MAX];
    float transportSignalCrossCorrelation[CLDFB_NO_CHANNELS_MAX];
    float instEne;
    float inCovarianceMtx[FOA_CHANNELS][FOA_CHANNELS];
    float foaCovarianceMtx[FOA_CHANNELS][FOA_CHANNELS];
    float Iy, Iz, Ix, E, azi, ele, I, ratio;
    float diffuseGainX, diffuseGainY, diffuseGainZ, diffuseGainSum;

    /* Set values */
    hDirAC = st_ivas->hDirAC;
    hDirAC->numSimultaneousDirections = 1;
    hDiffuseDist = st_ivas->hDirAC->hDiffuseDist;
    nchan_transport = st_ivas->nchan_transport;
    band_grouping = hDirAC->band_grouping;
    dirac_write_idx = hDirAC->dirac_read_idx; /* Mixing matrices, from which MASA meta is determined, already have the delay compensation */

    /* Init arrays */
    for ( i = 0; i < FOA_CHANNELS; i++ )
    {
        set_zero( inCovarianceMtx[i], FOA_CHANNELS );
    }

    /* Delay the SPAR mixing matrices to have them synced with the audio */
    if ( subframe < SPAR_META_DELAY_SUBFRAMES )
    {
        mixer_mat_index = subframe + MAX_PARAM_SPATIAL_SUBFRAMES - SPAR_META_DELAY_SUBFRAMES + 1;
        for ( band = 0; band < SPAR_DIRAC_SPLIT_START_BAND; band++ )
        {
            for ( i = 0; i < FOA_CHANNELS; i++ )
            {
                for ( j = 0; j < FOA_CHANNELS; j++ )
                {
                    mixer_mat_sf_bands_real[subframe][band][i][j] = st_ivas->hSpar->hMdDec->mixer_mat_prev[mixer_mat_index][i][j][band];
                }
            }
        }
    }
    else
    {
        mixer_mat_index = subframe - SPAR_META_DELAY_SUBFRAMES;
        for ( band = 0; band < SPAR_DIRAC_SPLIT_START_BAND; band++ )
        {
            for ( i = 0; i < FOA_CHANNELS; i++ )
            {
                for ( j = 0; j < FOA_CHANNELS; j++ )
                {
                    mixer_mat_sf_bands_real[subframe][band][i][j] = st_ivas->hSpar->hMdDec->mixer_mat[i][j][band + mixer_mat_index * IVAS_MAX_NUM_BANDS];
                }
            }
        }
    }

    /* Map the mixing matrices from the frequency bands to frequency bins */
    bin = 0;
    for ( band = 0; band < SPAR_DIRAC_SPLIT_START_BAND; band++ )
    {
        band_start = band_grouping[band];
        band_end = band_grouping[band + 1];
        for ( bin = band_start; bin < band_end; bin++ )
        {
            for ( i = 0; i < FOA_CHANNELS; i++ )
            {
                for ( j = 0; j < FOA_CHANNELS; j++ )
                {
                    mixer_mat_sf_bins_real[subframe][bin][i][j] = mixer_mat_sf_bands_real[subframe][band][i][j];
                }
            }
        }
    }
    nBins = bin;

    /* Determine MASA metadata */
    /* Determine transport signal energies and cross correlations when more than 1 TC */
    if ( nchan_transport == 2 )
    {
        set_zero( transportSignalEnergies[0], nBins );
        set_zero( transportSignalEnergies[1], nBins );
        set_zero( transportSignalCrossCorrelation, nBins );

        for ( slot = 0; slot < hDirAC->subframe_nbslots; slot++ )
        {
            for ( bin = 0; bin < nBins; bin++ )
            {
                for ( ch = 0; ch < BINAURAL_CHANNELS; ch++ )
                {
                    instEne = ( inRe[ch][slot][bin] * inRe[ch][slot][bin] );
                    instEne += ( inIm[ch][slot][bin] * inIm[ch][slot][bin] );
                    transportSignalEnergies[ch][bin] += instEne;
                }
                transportSignalCrossCorrelation[bin] += inRe[0][slot][bin] * inRe[1][slot][bin];
                transportSignalCrossCorrelation[bin] += inIm[0][slot][bin] * inIm[1][slot][bin];
            }
        }
    }

    if ( hDiffuseDist != NULL )
    {
        set_zero( hDiffuseDist->diffuseRatioX[subframe], CLDFB_NO_CHANNELS_MAX );
        set_zero( hDiffuseDist->diffuseRatioY[subframe], CLDFB_NO_CHANNELS_MAX );
        set_zero( hDiffuseDist->diffuseRatioZ[subframe], CLDFB_NO_CHANNELS_MAX );
    }

    for ( bin = 0; bin < nBins; bin++ )
    {
        /* Set the energy of the first transport signal */
        if ( nchan_transport == 1 )
        {
            inCovarianceMtx[0][0] = 1.0f; /* In case of 1TC, fixed value can be used */
        }
        else
        {
            inCovarianceMtx[0][0] = transportSignalEnergies[0][bin]; /* In case of 2TC, use actual energies */
        }
        /* Decorrelated channels assumed to have the same energy as the source channel */
        inCovarianceMtx[1][1] = inCovarianceMtx[0][0];
        inCovarianceMtx[2][2] = inCovarianceMtx[0][0];
        inCovarianceMtx[3][3] = inCovarianceMtx[0][0];

        /* In case residuals were transmitted, use their actual energies and cross correlations */
        if ( nchan_transport == 2 )
        {
            inCovarianceMtx[1][1] = transportSignalEnergies[1][bin];
            inCovarianceMtx[0][1] = transportSignalCrossCorrelation[bin];
            inCovarianceMtx[1][0] = inCovarianceMtx[0][1];
        }

        compute_foa_cov_matrix( foaCovarianceMtx, inCovarianceMtx, mixer_mat_sf_bins_real[subframe][bin] );

        /* Estimate MASA metadata */
        Iy = foaCovarianceMtx[0][1];                                                                                      /* Intensity in Y direction */
        Iz = foaCovarianceMtx[0][2];                                                                                      /* Intensity in Z direction */
        Ix = foaCovarianceMtx[0][3];                                                                                      /* Intensity in X direction */
        I = sqrtf( Ix * Ix + Iy * Iy + Iz * Iz );                                                                         /* Intensity vector length */
        E = ( foaCovarianceMtx[0][0] + foaCovarianceMtx[1][1] + foaCovarianceMtx[2][2] + foaCovarianceMtx[3][3] ) / 2.0f; /* Overall energy */
        azi = atan2f( Iy, Ix );                                                                                           /* Azimuth */
        ele = atan2f( Iz, sqrtf( Ix * Ix + Iy * Iy ) );                                                                   /* Elevation */
        ratio = I / fmaxf( 1e-12f, E );                                                                                   /* Energy ratio */
        ratio = fmaxf( 0.0f, fminf( 1.0f, ratio ) );

        hDirAC->azimuth[dirac_write_idx][bin] = (int16_t) roundf( azi / PI_OVER_180 );
        hDirAC->elevation[dirac_write_idx][bin] = (int16_t) roundf( ele / PI_OVER_180 );
        hDirAC->energy_ratio1[dirac_write_idx][bin] = ratio;
        hDirAC->diffuseness_vector[dirac_write_idx][bin] = 1.0f - ratio;

        hDirAC->spreadCoherence[dirac_write_idx][bin] = 0.0f;
        hDirAC->surroundingCoherence[dirac_write_idx][bin] = 0.0f;

        /* Determine directional distribution of the indirect audio based on the SPAR mixing matrices (and the transport audio signals when 2 TC) */
        if ( hDiffuseDist != NULL )
        {
            if ( nchan_transport == 1 )
            {
                diffuseGainY = fabsf( mixer_mat_sf_bins_real[subframe][bin][1][1] );
                diffuseGainX = fabsf( mixer_mat_sf_bins_real[subframe][bin][3][2] );
                diffuseGainZ = fabsf( mixer_mat_sf_bins_real[subframe][bin][2][3] );
            }
            else if ( nchan_transport == 2 )
            {
                diffuseGainY = fabsf( mixer_mat_sf_bins_real[subframe][bin][1][1] * transportSignalEnergies[1][bin] );
                diffuseGainX = fabsf( mixer_mat_sf_bins_real[subframe][bin][3][2] * transportSignalEnergies[0][bin] ) + fabsf( mixer_mat_sf_bins_real[subframe][bin][3][1] * transportSignalEnergies[1][bin] );
                diffuseGainZ = fabsf( mixer_mat_sf_bins_real[subframe][bin][2][3] * transportSignalEnergies[0][bin] ) + fabsf( mixer_mat_sf_bins_real[subframe][bin][2][1] * transportSignalEnergies[1][bin] );
            }
            else
            {
                diffuseGainY = 1.0f;
                diffuseGainX = 1.0f;
                diffuseGainZ = 1.0f;
            }

            diffuseGainSum = diffuseGainY + diffuseGainX + diffuseGainZ;

            if ( diffuseGainSum == 0.0f )
            {
                hDiffuseDist->diffuseRatioX[subframe][bin] = 1.0f / 3.0f;
                hDiffuseDist->diffuseRatioY[subframe][bin] = 1.0f / 3.0f;
                hDiffuseDist->diffuseRatioZ[subframe][bin] = 1.0f / 3.0f;
            }
            else
            {
                hDiffuseDist->diffuseRatioX[subframe][bin] = diffuseGainX / ( diffuseGainSum + EPSILON );
                hDiffuseDist->diffuseRatioY[subframe][bin] = diffuseGainY / ( diffuseGainSum + EPSILON );
                hDiffuseDist->diffuseRatioZ[subframe][bin] = diffuseGainZ / ( diffuseGainSum + EPSILON );
            }
        }
    }

    return;
}
#else
void ivas_spar_param_to_masa_param_mapping(
    Decoder_Struct *st_ivas,                               /* i/o: IVAS decoder struct               */
    float inRe[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i  : Input audio in CLDFB domain, real */
@@ -1462,6 +1668,7 @@ void ivas_spar_param_to_masa_param_mapping(

    return;
}
#endif


/* Estimate FOA properties: foaCov = mixMtx * inCov * mixMtx' */
Loading