Commit 4c5ce80a authored by emerit's avatar emerit
Browse files

Merge branch 'main' of forge.3gpp.org:ivas-codec-pc/ivas-codec

parents 3e499853 3a1651ba
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -864,6 +864,9 @@ typedef enum {
// VE: this should be renamed to e.g. N_SPATIAL_SUBFRAMES
#define MAX_PARAM_SPATIAL_SUBFRAMES             4                           /* Maximum number of subframes for parameteric spatial coding */
#define L_SPATIAL_SUBFR_48k                     (L_FRAME48k / MAX_PARAM_SPATIAL_SUBFRAMES)
#ifdef FIX_355_REFACTOR_PARAMBIN_TO_5MS
#define CLDFB_SLOTS_PER_SUBFRAME                ( CLDFB_NO_COL_MAX / MAX_PARAM_SPATIAL_SUBFRAMES ) /* Number of CLDFB slots per subframe */
#endif


/*----------------------------------------------------------------------------------*
+9 −0
Original line number Diff line number Diff line
@@ -4691,6 +4691,14 @@ void ivas_masa_prerender(
    const int16_t output_frame                                  /* i  : output frame length per channel                 */
);

#ifdef FIX_355_REFACTOR_PARAMBIN_TO_5MS
void ivas_spar_param_to_masa_param_mapping(
    Decoder_Struct *st_ivas,                                       /* i/o: IVAS decoder struct                          */
    float inRe[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], /* i  : Input audio in CLDFB domain, real            */
    float inIm[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], /* i  : Input audio in CLDFB domain, imag            */
    const int16_t subframe                                         /* i  : Subframe to map                              */
);
#else
void ivas_spar_param_to_masa_param_mapping(
    Decoder_Struct *st_ivas,                                    /* i/o: IVAS decoder struct                             */
    float inRe[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX],      /* i  : Input audio in CLDFB domain, real               */
@@ -4698,6 +4706,7 @@ void ivas_spar_param_to_masa_param_mapping(
    const int16_t firstSubframe,                                /* i  : First subframe to map                           */
    const int16_t nSubframes                                    /* i  : Number of subframes to map                      */
);
#endif


/*---------------------------------------------------------------------------------*

lib_com/options.h

100644 → 100755
+5 −2
Original line number Diff line number Diff line
@@ -174,7 +174,10 @@
#define EUALER2QUAT_FIX                                 /*Dlb :fix for issue 430 issue in euler2quat, sign of quat y is inverted*/
#define HR_METADATA                                     /* Nok: encode directional MASA metadata with more bits at 384k and 512k */

#define FIX_357_DTX_32K                                 /* Eri: issue 357 - Forced LP-CNG at 32k */   
#define FIX_435_ISM_MERGE_BUG                           /* Eri: Merge bug fix for ISM NULL metadata and tcx_only cases */
#define FIX_355_REFACTOR_PARAMBIN_TO_5MS                /* Nokia: Fixes issue 355 by refactoring parametric binauralizer code to 5 ms mode */

/* ################## End DEVELOPMENT switches ######################### */
/* clang-format on */
#endif
+21 −1
Original line number Diff line number Diff line
@@ -426,6 +426,15 @@ static ivas_error ivas_binaural_hrtf_open(
 *
 *-------------------------------------------------------------------------*/

#ifdef FIX_355_REFACTOR_PARAMBIN_TO_5MS
static void ivas_binaural_obtain_DMX(
    const int16_t numTimeSlots,
    BINAURAL_RENDERER_HANDLE hBinRenderer,                               /* i/o: fastconv binaural renderer handle */
    float RealBuffer[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], /* i  : Contains the LS signals           */
    float ImagBuffer[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], /* i  : Contains the LS signals           */
    float realDMX[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX],
    float imagDMX[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX] )
#else
static void ivas_binaural_obtain_DMX(
    const int16_t numTimeSlots,
    BINAURAL_RENDERER_HANDLE hBinRenderer,                                  /* i/o: fastconv binaural renderer handle */
@@ -433,6 +442,7 @@ static void ivas_binaural_obtain_DMX(
    float ImagBuffer[][MAX_PARAM_SPATIAL_SUBFRAMES][CLDFB_NO_CHANNELS_MAX], /* i  : Contains the LS signals           */
    float realDMX[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX],
    float imagDMX[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX] )
#endif
{
    int16_t chIdx, bandIdx, k;

@@ -1029,11 +1039,17 @@ void ivas_binRenderer(
    /* Obtain the binaural dmx and compute the reverb */
    if ( hBinRenderer->hReverb != NULL )
    {
#ifdef FIX_355_REFACTOR_PARAMBIN_TO_5MS
        float reverbRe[BINAURAL_CHANNELS][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX];
        float reverbIm[BINAURAL_CHANNELS][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX];
        float inRe[BINAURAL_CHANNELS][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX];
        float inIm[BINAURAL_CHANNELS][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX];
#else
        float reverbRe[BINAURAL_CHANNELS][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX];
        float reverbIm[BINAURAL_CHANNELS][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX];
        float inRe[BINAURAL_CHANNELS][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX];
        float inIm[BINAURAL_CHANNELS][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX];

#endif
        ivas_binaural_obtain_DMX( numTimeSlots, hBinRenderer, RealBuffer, ImagBuffer, inRe, inIm );

        for ( chIdx = 0; chIdx < BINAURAL_CHANNELS; chIdx++ )
@@ -1045,7 +1061,11 @@ void ivas_binRenderer(
            }
        }

#ifdef FIX_355_REFACTOR_PARAMBIN_TO_5MS
        ivas_binaural_reverb_processSubframe( hBinRenderer->hReverb, BINAURAL_CHANNELS, inRe, inIm, reverbRe, reverbIm );
#else
        ivas_binaural_reverb_processFrame( hBinRenderer->hReverb, BINAURAL_CHANNELS, inRe, inIm, reverbRe, reverbIm, 0u );
#endif

        /* Add the conv module and reverb module output */
        for ( chIdx = 0; chIdx < BINAURAL_CHANNELS; chIdx++ )
+207 −0
Original line number Diff line number Diff line
@@ -1244,6 +1244,212 @@ ivas_error ivas_masa_dec_reconfigure(
 * Determine MASA metadata from the SPAR metadata
 *-------------------------------------------------------------------*/

#ifdef FIX_355_REFACTOR_PARAMBIN_TO_5MS
void ivas_spar_param_to_masa_param_mapping(
    Decoder_Struct *st_ivas,                                       /* i/o: IVAS decoder struct               */
    float inRe[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], /* i  : Input audio in CLDFB domain, real */
    float inIm[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], /* i  : Input audio in CLDFB domain, imag */
    const int16_t subframe                                         /* i  : Subframe to map                   */
)
{
    int16_t i, j, band, bin, slot, ch, nBins, nchan_transport;
    int16_t mixer_mat_index;
    int16_t dirac_write_idx;
    DIRAC_DEC_HANDLE hDirAC;
    DIFFUSE_DISTRIBUTION_HANDLE hDiffuseDist;
    float mixer_mat_sf_bands_real[MAX_PARAM_SPATIAL_SUBFRAMES][SPAR_DIRAC_SPLIT_START_BAND][FOA_CHANNELS][FOA_CHANNELS];
    float mixer_mat_sf_bins_real[MAX_PARAM_SPATIAL_SUBFRAMES][CLDFB_NO_CHANNELS_MAX][FOA_CHANNELS][FOA_CHANNELS];
    int16_t *band_grouping;
    int16_t band_start, band_end;
    float transportSignalEnergies[2][CLDFB_NO_CHANNELS_MAX];
    float transportSignalCrossCorrelation[CLDFB_NO_CHANNELS_MAX];
    float instEne;
    float inCovarianceMtx[FOA_CHANNELS][FOA_CHANNELS];
    float foaCovarianceMtx[FOA_CHANNELS][FOA_CHANNELS];
    float Iy, Iz, Ix, E, azi, ele, I, ratio;
    float diffuseGainX, diffuseGainY, diffuseGainZ, diffuseGainSum;

    /* Set values */
    hDirAC = st_ivas->hDirAC;
    hDirAC->numSimultaneousDirections = 1;
    hDiffuseDist = st_ivas->hDirAC->hDiffuseDist;
    nchan_transport = st_ivas->nchan_transport;
    band_grouping = hDirAC->band_grouping;
    dirac_write_idx = hDirAC->dirac_read_idx; /* Mixing matrices, from which MASA meta is determined, already have the delay compensation */

    /* Init arrays */
    for ( i = 0; i < FOA_CHANNELS; i++ )
    {
        set_zero( inCovarianceMtx[i], FOA_CHANNELS );
    }

    /* Delay the SPAR mixing matrices to have them synced with the audio */
    if ( subframe < SPAR_META_DELAY_SUBFRAMES )
    {
        mixer_mat_index = subframe + MAX_PARAM_SPATIAL_SUBFRAMES - SPAR_META_DELAY_SUBFRAMES + 1;
        for ( band = 0; band < SPAR_DIRAC_SPLIT_START_BAND; band++ )
        {
            for ( i = 0; i < FOA_CHANNELS; i++ )
            {
                for ( j = 0; j < FOA_CHANNELS; j++ )
                {
                    mixer_mat_sf_bands_real[subframe][band][i][j] = st_ivas->hSpar->hMdDec->mixer_mat_prev[mixer_mat_index][i][j][band];
                }
            }
        }
    }
    else
    {
        mixer_mat_index = subframe - SPAR_META_DELAY_SUBFRAMES;
        for ( band = 0; band < SPAR_DIRAC_SPLIT_START_BAND; band++ )
        {
            for ( i = 0; i < FOA_CHANNELS; i++ )
            {
                for ( j = 0; j < FOA_CHANNELS; j++ )
                {
                    mixer_mat_sf_bands_real[subframe][band][i][j] = st_ivas->hSpar->hMdDec->mixer_mat[i][j][band + mixer_mat_index * IVAS_MAX_NUM_BANDS];
                }
            }
        }
    }

    /* Map the mixing matrices from the frequency bands to frequency bins */
    bin = 0;
    for ( band = 0; band < SPAR_DIRAC_SPLIT_START_BAND; band++ )
    {
        band_start = band_grouping[band];
        band_end = band_grouping[band + 1];
        for ( bin = band_start; bin < band_end; bin++ )
        {
            for ( i = 0; i < FOA_CHANNELS; i++ )
            {
                for ( j = 0; j < FOA_CHANNELS; j++ )
                {
                    mixer_mat_sf_bins_real[subframe][bin][i][j] = mixer_mat_sf_bands_real[subframe][band][i][j];
                }
            }
        }
    }
    nBins = bin;

    /* Determine MASA metadata */
    /* Determine transport signal energies and cross correlations when more than 1 TC */
    if ( nchan_transport == 2 )
    {
        set_zero( transportSignalEnergies[0], nBins );
        set_zero( transportSignalEnergies[1], nBins );
        set_zero( transportSignalCrossCorrelation, nBins );

        for ( slot = 0; slot < hDirAC->subframe_nbslots; slot++ )
        {
            for ( bin = 0; bin < nBins; bin++ )
            {
                for ( ch = 0; ch < BINAURAL_CHANNELS; ch++ )
                {
                    instEne = ( inRe[ch][slot][bin] * inRe[ch][slot][bin] );
                    instEne += ( inIm[ch][slot][bin] * inIm[ch][slot][bin] );
                    transportSignalEnergies[ch][bin] += instEne;
                }
                transportSignalCrossCorrelation[bin] += inRe[0][slot][bin] * inRe[1][slot][bin];
                transportSignalCrossCorrelation[bin] += inIm[0][slot][bin] * inIm[1][slot][bin];
            }
        }
    }

    if ( hDiffuseDist != NULL )
    {
        set_zero( hDiffuseDist->diffuseRatioX[subframe], CLDFB_NO_CHANNELS_MAX );
        set_zero( hDiffuseDist->diffuseRatioY[subframe], CLDFB_NO_CHANNELS_MAX );
        set_zero( hDiffuseDist->diffuseRatioZ[subframe], CLDFB_NO_CHANNELS_MAX );
    }

    for ( bin = 0; bin < nBins; bin++ )
    {
        /* Set the energy of the first transport signal */
        if ( nchan_transport == 1 )
        {
            inCovarianceMtx[0][0] = 1.0f; /* In case of 1TC, fixed value can be used */
        }
        else
        {
            inCovarianceMtx[0][0] = transportSignalEnergies[0][bin]; /* In case of 2TC, use actual energies */
        }
        /* Decorrelated channels assumed to have the same energy as the source channel */
        inCovarianceMtx[1][1] = inCovarianceMtx[0][0];
        inCovarianceMtx[2][2] = inCovarianceMtx[0][0];
        inCovarianceMtx[3][3] = inCovarianceMtx[0][0];

        /* In case residuals were transmitted, use their actual energies and cross correlations */
        if ( nchan_transport == 2 )
        {
            inCovarianceMtx[1][1] = transportSignalEnergies[1][bin];
            inCovarianceMtx[0][1] = transportSignalCrossCorrelation[bin];
            inCovarianceMtx[1][0] = inCovarianceMtx[0][1];
        }

        compute_foa_cov_matrix( foaCovarianceMtx, inCovarianceMtx, mixer_mat_sf_bins_real[subframe][bin] );

        /* Estimate MASA metadata */
        Iy = foaCovarianceMtx[0][1];                                                                                      /* Intensity in Y direction */
        Iz = foaCovarianceMtx[0][2];                                                                                      /* Intensity in Z direction */
        Ix = foaCovarianceMtx[0][3];                                                                                      /* Intensity in X direction */
        I = sqrtf( Ix * Ix + Iy * Iy + Iz * Iz );                                                                         /* Intensity vector length */
        E = ( foaCovarianceMtx[0][0] + foaCovarianceMtx[1][1] + foaCovarianceMtx[2][2] + foaCovarianceMtx[3][3] ) / 2.0f; /* Overall energy */
        azi = atan2f( Iy, Ix );                                                                                           /* Azimuth */
        ele = atan2f( Iz, sqrtf( Ix * Ix + Iy * Iy ) );                                                                   /* Elevation */
        ratio = I / fmaxf( 1e-12f, E );                                                                                   /* Energy ratio */
        ratio = fmaxf( 0.0f, fminf( 1.0f, ratio ) );

        hDirAC->azimuth[dirac_write_idx][bin] = (int16_t) roundf( azi / PI_OVER_180 );
        hDirAC->elevation[dirac_write_idx][bin] = (int16_t) roundf( ele / PI_OVER_180 );
        hDirAC->energy_ratio1[dirac_write_idx][bin] = ratio;
        hDirAC->diffuseness_vector[dirac_write_idx][bin] = 1.0f - ratio;

        hDirAC->spreadCoherence[dirac_write_idx][bin] = 0.0f;
        hDirAC->surroundingCoherence[dirac_write_idx][bin] = 0.0f;

        /* Determine directional distribution of the indirect audio based on the SPAR mixing matrices (and the transport audio signals when 2 TC) */
        if ( hDiffuseDist != NULL )
        {
            if ( nchan_transport == 1 )
            {
                diffuseGainY = fabsf( mixer_mat_sf_bins_real[subframe][bin][1][1] );
                diffuseGainX = fabsf( mixer_mat_sf_bins_real[subframe][bin][3][2] );
                diffuseGainZ = fabsf( mixer_mat_sf_bins_real[subframe][bin][2][3] );
            }
            else if ( nchan_transport == 2 )
            {
                diffuseGainY = fabsf( mixer_mat_sf_bins_real[subframe][bin][1][1] * transportSignalEnergies[1][bin] );
                diffuseGainX = fabsf( mixer_mat_sf_bins_real[subframe][bin][3][2] * transportSignalEnergies[0][bin] ) + fabsf( mixer_mat_sf_bins_real[subframe][bin][3][1] * transportSignalEnergies[1][bin] );
                diffuseGainZ = fabsf( mixer_mat_sf_bins_real[subframe][bin][2][3] * transportSignalEnergies[0][bin] ) + fabsf( mixer_mat_sf_bins_real[subframe][bin][2][1] * transportSignalEnergies[1][bin] );
            }
            else
            {
                diffuseGainY = 1.0f;
                diffuseGainX = 1.0f;
                diffuseGainZ = 1.0f;
            }

            diffuseGainSum = diffuseGainY + diffuseGainX + diffuseGainZ;

            if ( diffuseGainSum == 0.0f )
            {
                hDiffuseDist->diffuseRatioX[subframe][bin] = 1.0f / 3.0f;
                hDiffuseDist->diffuseRatioY[subframe][bin] = 1.0f / 3.0f;
                hDiffuseDist->diffuseRatioZ[subframe][bin] = 1.0f / 3.0f;
            }
            else
            {
                hDiffuseDist->diffuseRatioX[subframe][bin] = diffuseGainX / ( diffuseGainSum + EPSILON );
                hDiffuseDist->diffuseRatioY[subframe][bin] = diffuseGainY / ( diffuseGainSum + EPSILON );
                hDiffuseDist->diffuseRatioZ[subframe][bin] = diffuseGainZ / ( diffuseGainSum + EPSILON );
            }
        }
    }

    return;
}
#else
void ivas_spar_param_to_masa_param_mapping(
    Decoder_Struct *st_ivas,                               /* i/o: IVAS decoder struct               */
    float inRe[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i  : Input audio in CLDFB domain, real */
@@ -1462,6 +1668,7 @@ void ivas_spar_param_to_masa_param_mapping(

    return;
}
#endif


/* Estimate FOA properties: foaCov = mixMtx * inCov * mixMtx' */
Loading