Commit df3e2c41 authored by Tapani Pihlajakuja's avatar Tapani Pihlajakuja
Browse files

Merge branch 'main' into 390-masa-metadata-copying-in-external-renderer-is-out-of-sync

parents 7e8c467e c79e4fd9
Loading
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -645,7 +645,9 @@ int main(
                fprintf( stderr, "Sampling rate must be specified on command line when using raw PCM input\n" );
                exit( -1 );
            }
#ifndef FIX_389_EXT_REND_PCM_SR
            args.sampleRate = inFileSampleRate;
#endif
            break;
        default:
            fprintf( stderr, "Error: %s\n", ivas_error_to_string( error ) );
+5 −0
Original line number Diff line number Diff line
@@ -153,6 +153,11 @@

#define SBA2MONO                                        /* FhG: Issue 365: Adapt processing of SBA mono output to be in line with stereo output (less delay, lower complexity) */

#define NOKIA_PARAMBIN_REQULARIZATION                   /* Nokia: Contribution - Configured reqularization factor for parametric binauralizer. */
#define NOKIA_ADAPTIVE_BINAURAL_PROTOS                  /* Nokia: Contribution 28: Adaptive binaural prototypes */
#define NOKIA_ADAPTIVE_BINAURAL_PROTOS_OPT              /* Nokia: enable adaptive binaural prototype complexity optimizations */

#define FIX_389_EXT_REND_PCM_SR                         /* Nokia: Issue 389: Fix assignment of sample rate with PCM input. */
#define FIX_390_EXT_REND_MASA_META_COPY                 /* Nokia: Issue 390: Fixes MASA metadata copying to renderer. */

/* ################## End DEVELOPMENT switches ######################### */
+312 −0
Original line number Diff line number Diff line
@@ -40,6 +40,10 @@
#include "ivas_cnst.h"
#include "ivas_rom_binauralRenderer.h"
#include "ivas_rom_rend.h"
#ifdef NOKIA_ADAPTIVE_BINAURAL_PROTOS
#include "ivas_rom_com.h"
#endif

#ifdef DEBUGGING
#include "debug.h"
#endif
@@ -54,6 +58,20 @@
#define IVAS_TDET_DUCK_MULT_FAC_PARA_BIN        ( 2.0f )
#define IVAS_TDET_DUCK_MULT_FAC_PARA_BIN_LOW_BR ( 3.0f )

#ifdef NOKIA_ADAPTIVE_BINAURAL_PROTOS
#ifdef NOKIA_ADAPTIVE_BINAURAL_PROTOS_OPT
/* powf(0.95f, 4.0f) for sub-frame smoothing instead of CLDFB slot */
#define ADAPT_HTPROTO_IIR_FAC 0.81450625f
#else
#define ADAPT_HTPROTO_IIR_FAC 0.95f
#endif

#define ADAPT_HTPROTO_ILD_LIM_DB0 1.0f
#define ADAPT_HTPROTO_ILD_LIM_DB1 4.0f
#define ADAPT_HTPROTO_ROT_LIM_0   0.4f
#define ADAPT_HTPROTO_ROT_LIM_1   0.8f
#endif

/*-------------------------------------------------------------------------
 * Local function prototypes
 *------------------------------------------------------------------------*/
@@ -68,6 +86,10 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices( Decoder_Struc

static void ivas_dirac_dec_binaural_process_output( Decoder_Struct *st_ivas, float output_f[][L_FRAME48k], float inRe[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], float inIm[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], const int16_t max_band_decorr, const uint8_t numInputChannels, const uint8_t firstSlot, const uint8_t slotEnd );

#ifdef NOKIA_ADAPTIVE_BINAURAL_PROTOS
static void adaptTransportSignalsHeadtracked( HEAD_TRACK_DATA_HANDLE hHeadTrackData, float inIm[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], float inRe[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], const uint8_t firstSlot, const uint8_t slotEnd, const uint8_t nBins, float Rmat[3][3] );
#endif

static void ivas_dirac_dec_binaural_check_and_switch_transports_headtracked( HEAD_TRACK_DATA_HANDLE hHeadTrackData, float inIm[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], float inRe[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], const uint8_t firstSlot, const uint8_t slotEnd, const uint8_t nBins, float Rmat[3][3] );

static void formulate2x2MixingMatrix( float Ein1, float Ein2, float CinRe, float CinIm, float Eout1, float Eout2, float CoutRe, float CoutIm, float Q[BINAURAL_CHANNELS][BINAURAL_CHANNELS], float Mre[BINAURAL_CHANNELS][BINAURAL_CHANNELS], float Mim[BINAURAL_CHANNELS][BINAURAL_CHANNELS], const float regularizationFactor );
@@ -80,6 +102,9 @@ static void matrixMul( float Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS], float Ai

static void matrixTransp2Mul( float Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS], float Aim[BINAURAL_CHANNELS][BINAURAL_CHANNELS], float Bre[BINAURAL_CHANNELS][BINAURAL_CHANNELS], float Bim[BINAURAL_CHANNELS][BINAURAL_CHANNELS], float outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS], float outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS] );

#ifdef NOKIA_PARAMBIN_REQULARIZATION
static float configure_reqularization_factor( const IVAS_FORMAT ivas_format, const int32_t ivas_brate );
#endif

/*-------------------------------------------------------------------------
 * ivas_dirac_dec_init_binaural_data()
@@ -268,6 +293,10 @@ ivas_error ivas_dirac_dec_init_binaural_data(
        ivas_td_decorr_dec_close( &( hBinaural->hTdDecorr ) );
    }

#ifdef NOKIA_PARAMBIN_REQULARIZATION
    hBinaural->reqularizationFactor = configure_reqularization_factor( st_ivas->ivas_format, st_ivas->hDecoderConfig->ivas_total_brate );
#endif

    st_ivas->hDiracDecBin = hBinaural;

    return IVAS_ERR_OK;
@@ -547,6 +576,10 @@ static void ivas_dirac_dec_binaural_internal(

        if ( nchan_transport == 2 )
        {
#ifdef NOKIA_ADAPTIVE_BINAURAL_PROTOS
            adaptTransportSignalsHeadtracked( st_ivas->hHeadTrackData, Cldfb_RealBuffer_in, Cldfb_ImagBuffer_in, firstSlot, slotEnd, nBins, Rmat );
#endif

            ivas_dirac_dec_binaural_check_and_switch_transports_headtracked( st_ivas->hHeadTrackData, Cldfb_ImagBuffer_in, Cldfb_RealBuffer_in, firstSlot, slotEnd, nBins, Rmat );
        }
    }
@@ -1064,11 +1097,19 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices(
        CrEneR = 0.0f;

        /* Formulate main processing matrix M */
#ifdef NOKIA_PARAMBIN_REQULARIZATION
        formulate2x2MixingMatrix( h->ChEne[0][bin], h->ChEne[1][bin],
                                  h->ChCrossRe[bin], h->ChCrossIm[bin],
                                  h->ChEneOut[0][bin], h->ChEneOut[1][bin],
                                  h->ChCrossReOut[bin], h->ChCrossImOut[bin],
                                  prototypeMtx, Mre, Mim, h->reqularizationFactor );
#else
        formulate2x2MixingMatrix( h->ChEne[0][bin], h->ChEne[1][bin],
                                  h->ChCrossRe[bin], h->ChCrossIm[bin],
                                  h->ChEneOut[0][bin], h->ChEneOut[1][bin],
                                  h->ChCrossReOut[bin], h->ChCrossImOut[bin],
                                  prototypeMtx, Mre, Mim, 1.0f );
#endif

        /* Load estimated covariance matrix to the [2][2] matrix form */
        CxRe[0][0] = h->ChEne[0][bin];
@@ -1317,6 +1358,198 @@ static void ivas_dirac_dec_binaural_process_output(
}


#ifdef NOKIA_ADAPTIVE_BINAURAL_PROTOS
static void adaptTransportSignalsHeadtracked(
    HEAD_TRACK_DATA_HANDLE hHeadTrackData,
    float inIm[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX],
    float inRe[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX],
    const uint8_t firstSlot,
    const uint8_t slotEnd,
    const uint8_t nBins,
    float Rmat[3][3] )
{
    int16_t slot, ch, bin, louderCh;
#ifdef NOKIA_ADAPTIVE_BINAURAL_PROTOS_OPT
    float ILD, mono_factor_ILD, mono_factor_rotation, mono_factor, y_val, ene_proc, ene_target;
    uint8_t n_slots_per_sf, sf_idx, n_sf;
    int16_t max_band;
#else
    float re[2], im[2], ILD, mono_factor_ILD, mono_factor_rotation, mono_factor, y_val;
    float proc_re[2], proc_im[2], sum_re, sum_im, ene_proc, ene_target, mf;
#endif

    /* Determine head-orientation-based mono factor.
       Rmat[1][1] entry informs how close the ears are aligned according to transport signals. */
    y_val = 1.0f - fabsf( Rmat[1][1] );
    mono_factor_rotation = ( y_val - ADAPT_HTPROTO_ROT_LIM_0 ) / ( ADAPT_HTPROTO_ROT_LIM_1 - ADAPT_HTPROTO_ROT_LIM_0 );
    mono_factor_rotation = fmaxf( 0.0f, fminf( 1.0f, mono_factor_rotation ) );

    /* Adapt transport signals in frequency bands */
#ifdef NOKIA_ADAPTIVE_BINAURAL_PROTOS_OPT
    /* optimization grouping CLDFB bins into MASA bands (they are readily available in ROM and suitable for the task) AND group CLDFB slots into sub-frames */
    n_slots_per_sf = CLDFB_NO_COL_MAX / MAX_PARAM_SPATIAL_SUBFRAMES;
    n_sf = ( slotEnd - firstSlot ) / n_slots_per_sf;

    max_band = 0;
    while ( max_band < MASA_FREQUENCY_BANDS && MASA_band_grouping_24[max_band] < nBins )
    {
        max_band++;
    }

    for ( sf_idx = 0; sf_idx < n_sf; sf_idx++ )
    {
        float eqVal;
        uint8_t start_slot, stop_slot;
        int16_t band_idx, bin_lo, bin_hi;

        start_slot = firstSlot + sf_idx * n_slots_per_sf;
        stop_slot = start_slot + n_slots_per_sf;

        for ( band_idx = 0; band_idx < max_band; band_idx++ )
        {
            float ch_nrg[2]; /* storage for input signal channel energies */
            bin_lo = MASA_band_grouping_24[band_idx];
            bin_hi = min( MASA_band_grouping_24[band_idx + 1], (int16_t) nBins );
            for ( ch = 0; ch < 2; ch++ )
            {
                ch_nrg[ch] = 0.0f;
                for ( slot = start_slot; slot < stop_slot; slot++ )
                {
                    for ( bin = bin_lo; bin < bin_hi; bin++ )
                    {
                        ch_nrg[ch] += ( inRe[ch][slot][bin] * inRe[ch][slot][bin] ) + ( inIm[ch][slot][bin] * inIm[ch][slot][bin] );
                    }
                }
                hHeadTrackData->chEneIIR[ch][band_idx] *= ADAPT_HTPROTO_IIR_FAC;
                hHeadTrackData->chEneIIR[ch][band_idx] += ( 1.0f - ADAPT_HTPROTO_IIR_FAC ) * ch_nrg[ch];
            }

            /* Determine ILD */
            ILD = fabsf( 10.0f * log10f( fmaxf( 1e-12f, hHeadTrackData->chEneIIR[0][band_idx] ) / fmaxf( 1e-12f, hHeadTrackData->chEneIIR[1][band_idx] ) ) );
            if ( hHeadTrackData->chEneIIR[1][band_idx] > hHeadTrackData->chEneIIR[0][band_idx] )
            {
                louderCh = 1;
            }
            else
            {
                louderCh = 0;
            }

            /* Determine ILD-based mono factor */
            mono_factor_ILD = ( ILD - ADAPT_HTPROTO_ILD_LIM_DB0 ) / ( ADAPT_HTPROTO_ILD_LIM_DB1 - ADAPT_HTPROTO_ILD_LIM_DB0 );
            mono_factor_ILD = fmaxf( 0.0f, fminf( 1.0f, mono_factor_ILD ) );

            /* Combine mono factors */
            mono_factor = mono_factor_ILD * mono_factor_rotation;

            /* Mix original audio and sum signal according to determined mono factor */
            for ( ch = 0; ch < 2; ch++ )
            {
                if ( ch != louderCh )
                {
                    float band_nrg = 0.0f;

                    for ( slot = start_slot; slot < stop_slot; slot++ )
                    {
                        for ( bin = bin_lo; bin < bin_hi; bin++ )
                        {
                            /* mono sum signal with the computed weight + rest from the original channel */
                            inRe[ch][slot][bin] = mono_factor * ( inRe[0][slot][bin] + inRe[1][slot][bin] ) + ( 1.0f - mono_factor ) * inRe[ch][slot][bin];
                            inIm[ch][slot][bin] = mono_factor * ( inIm[0][slot][bin] + inIm[1][slot][bin] ) + ( 1.0f - mono_factor ) * inIm[ch][slot][bin];
                            band_nrg += ( inRe[ch][slot][bin] * inRe[ch][slot][bin] ) + ( inIm[ch][slot][bin] * inIm[ch][slot][bin] );
                        }
                    }
                    hHeadTrackData->procChEneIIR[ch][band_idx] *= ADAPT_HTPROTO_IIR_FAC;
                    hHeadTrackData->procChEneIIR[ch][band_idx] += ( 1.0f - ADAPT_HTPROTO_IIR_FAC ) * band_nrg;
                }
                else
                {
                    /* processed signal is input. use the original channel, so no need to compute new signals or signal energy */
                    hHeadTrackData->procChEneIIR[ch][band_idx] *= ADAPT_HTPROTO_IIR_FAC;
                    hHeadTrackData->procChEneIIR[ch][band_idx] += ( 1.0f - ADAPT_HTPROTO_IIR_FAC ) * ch_nrg[ch];
                }
            }

            /* Equalize */
            ene_target = hHeadTrackData->chEneIIR[0][band_idx] + hHeadTrackData->chEneIIR[1][band_idx];
            ene_proc = hHeadTrackData->procChEneIIR[0][band_idx] + hHeadTrackData->procChEneIIR[1][band_idx];
            eqVal = fminf( 4.0f, sqrtf( ene_target / fmaxf( 1e-12f, ene_proc ) ) );

            for ( slot = start_slot; slot < stop_slot; slot++ )
            {
                for ( ch = 0; ch < 2; ch++ )
                {
                    for ( bin = bin_lo; bin < bin_hi; bin++ )
                    {
                        inRe[ch][slot][bin] *= eqVal;
                        inIm[ch][slot][bin] *= eqVal;
                    }
                }
            }
        }
    }
#else
    /* original contribution */
    for ( slot = firstSlot; slot < slotEnd; slot++ )
    {
        float eqVal[60];

        for ( bin = 0; bin < nBins; bin++ )
        {
            /* Determine channel energies */
            for ( ch = 0; ch < 2; ch++ )
            {
                re[ch] = inRe[ch][slot][bin];
                im[ch] = inIm[ch][slot][bin];

                hHeadTrackData->chEneIIR[ch][bin] *= ADAPT_HTPROTO_IIR_FAC;
                hHeadTrackData->chEneIIR[ch][bin] += ( 1.0f - ADAPT_HTPROTO_IIR_FAC ) * ( ( re[ch] * re[ch] ) + ( im[ch] * im[ch] ) );
            }

            /* Determine ILD */
            ILD = fabsf( 10.0f * log10f( fmaxf( 1e-12f, hHeadTrackData->chEneIIR[0][bin] ) / fmaxf( 1e-12f, hHeadTrackData->chEneIIR[1][bin] ) ) );
            louderCh = ( hHeadTrackData->chEneIIR[1][bin] > hHeadTrackData->chEneIIR[0][bin] );

            /* Determine ILD-based mono factor */
            mono_factor_ILD = ( ILD - ADAPT_HTPROTO_ILD_LIM_DB0 ) / ( ADAPT_HTPROTO_ILD_LIM_DB1 - ADAPT_HTPROTO_ILD_LIM_DB0 );
            mono_factor_ILD = fmaxf( 0.0f, fminf( 1.0f, mono_factor_ILD ) );

            /* Combine mono factors */
            mono_factor = mono_factor_ILD * mono_factor_rotation;

            /* Mix original audio and sum signal according to determined mono factor */
            sum_re = re[0] + re[1];
            sum_im = im[0] + im[1];
            for ( ch = 0; ch < 2; ch++ )
            {
                mf = ( ch == louderCh ) ? 0.0f : mono_factor;

                proc_re[ch] = mf * sum_re + ( 1.0f - mf ) * re[ch];
                proc_im[ch] = mf * sum_im + ( 1.0f - mf ) * im[ch];

                hHeadTrackData->procChEneIIR[ch][bin] *= ADAPT_HTPROTO_IIR_FAC;
                hHeadTrackData->procChEneIIR[ch][bin] += ( 1.0f - ADAPT_HTPROTO_IIR_FAC ) * ( ( proc_re[ch] * proc_re[ch] ) + ( proc_im[ch] * proc_im[ch] ) );
            }

            /* Equalize */
            ene_target = hHeadTrackData->chEneIIR[0][bin] + hHeadTrackData->chEneIIR[1][bin];
            ene_proc = hHeadTrackData->procChEneIIR[0][bin] + hHeadTrackData->procChEneIIR[1][bin];
            eqVal[bin] = fminf( 4.0f, sqrtf( ene_target / fmaxf( 1e-12f, ene_proc ) ) );

            for ( ch = 0; ch < 2; ch++ )
            {
                inRe[ch][slot][bin] = proc_re[ch] * eqVal[bin];
                inIm[ch][slot][bin] = proc_im[ch] * eqVal[bin];
            }
        }
    }
#endif

    return;
}
#endif


static void ivas_dirac_dec_binaural_check_and_switch_transports_headtracked(
    HEAD_TRACK_DATA_HANDLE hHeadTrackData,
    float inIm[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX],
@@ -1854,3 +2087,82 @@ static void hrtfShGetHrtf(

    return;
}


#ifdef NOKIA_PARAMBIN_REQULARIZATION
/*-------------------------------------------------------------------------
 * configure_reqularization_factor()
 *
 * Configure regularization factor for the mixing matrix generation of the
 * parametric binauralizer using IVAS codec format and current bitrate.
 *------------------------------------------------------------------------*/

/*! r: Configured reqularization factor value to be set. */
static float configure_reqularization_factor(
    const IVAS_FORMAT ivas_format, /* i: IVAS codec format in use */
    const int32_t ivas_brate )     /* i: Current IVAS bitrate     */
{
    float reqularizationFactor;
    reqularizationFactor = 1.0f; /* Default value */

    if ( ivas_format == MASA_FORMAT )
    {
        if ( ivas_brate >= IVAS_256k )
        {
            reqularizationFactor = 0.2f;
        }
        else if ( ivas_brate == IVAS_192k )
        {
            reqularizationFactor = 0.3f;
        }
        else if ( ivas_brate == IVAS_160k )
        {
            reqularizationFactor = 0.4f;
        }
        else if ( ivas_brate == IVAS_128k )
        {
            reqularizationFactor = 0.5f;
        }
        else if ( ivas_brate == IVAS_96k )
        {
            reqularizationFactor = 0.6f;
        }
        else if ( ivas_brate >= IVAS_64k )
        {
            reqularizationFactor = 0.8f;
        }
        else
        {
            reqularizationFactor = 1.0f;
        }
    }

    if ( ivas_format == MC_FORMAT ) /* This is always McMASA for parametric binauralizer. */
    {
        if ( ivas_brate >= IVAS_96k )
        {
            reqularizationFactor = 0.3f;
        }
        else if ( ivas_brate >= IVAS_80k )
        {
            reqularizationFactor = 0.5f;
        }
        else if ( ivas_brate >= IVAS_64k )
        {
            reqularizationFactor = 0.7f;
        }
        else if ( ivas_brate >= IVAS_48k )
        {
            reqularizationFactor = 0.8f;
        }
        else
        {
            reqularizationFactor = 1.0f;
        }
    }

    /* For SBA and parametric ISM, currently in default value of 1.0f. */

    return reqularizationFactor;
}
#endif
+14 −0
Original line number Diff line number Diff line
@@ -85,6 +85,20 @@ ivas_error ivas_headTrack_open(
        ( *hHeadTrackData )->Rmat_prev[i][i] = 1.0f;
    }

#ifdef NOKIA_ADAPTIVE_BINAURAL_PROTOS
#ifdef NOKIA_ADAPTIVE_BINAURAL_PROTOS_OPT
    set_zero( ( *hHeadTrackData )->chEneIIR[0], MASA_FREQUENCY_BANDS );
    set_zero( ( *hHeadTrackData )->chEneIIR[1], MASA_FREQUENCY_BANDS );
    set_zero( ( *hHeadTrackData )->procChEneIIR[0], MASA_FREQUENCY_BANDS );
    set_zero( ( *hHeadTrackData )->procChEneIIR[1], MASA_FREQUENCY_BANDS );
#else
    set_zero( ( *hHeadTrackData )->chEneIIR[0], CLDFB_NO_CHANNELS_MAX );
    set_zero( ( *hHeadTrackData )->chEneIIR[1], CLDFB_NO_CHANNELS_MAX );
    set_zero( ( *hHeadTrackData )->procChEneIIR[0], CLDFB_NO_CHANNELS_MAX );
    set_zero( ( *hHeadTrackData )->procChEneIIR[1], CLDFB_NO_CHANNELS_MAX );
#endif
#endif

    return IVAS_ERR_OK;
}

+13 −0
Original line number Diff line number Diff line
@@ -140,6 +140,9 @@ typedef struct ivas_dirac_dec_binaural_data_structure
    uint16_t useSubframeMode; /* 0 = process in 20 ms frames, 1 = process in 5 ms subframes */
    uint16_t useTdDecorr;
    ivas_td_decorr_state_t *hTdDecorr;
#ifdef NOKIA_PARAMBIN_REQULARIZATION
    float reqularizationFactor;
#endif

} DIRAC_DEC_BIN_DATA, *DIRAC_DEC_BIN_HANDLE;

@@ -265,6 +268,16 @@ typedef struct ivas_binaural_head_track_struct
    uint8_t lrSwitchedCurrent;
    float lrSwitchInterpVal;

#ifdef NOKIA_ADAPTIVE_BINAURAL_PROTOS
#ifdef NOKIA_ADAPTIVE_BINAURAL_PROTOS_OPT
    float chEneIIR[2][MASA_FREQUENCY_BANDS]; /* independent of the format. MASA bands are suitable for the task and readily available in ROM. */
    float procChEneIIR[2][MASA_FREQUENCY_BANDS];
#else
    float chEneIIR[2][CLDFB_NO_CHANNELS_MAX];
    float procChEneIIR[2][CLDFB_NO_CHANNELS_MAX];
#endif
#endif

    int16_t shd_rot_max_order;
    ivas_orient_trk_state_t *OrientationTracker;

Loading