Commit 7796d2e8 authored by Jouni Paulus's avatar Jouni Paulus
Browse files

added the implementation of the divided function into the else path of...

added the implementation of the divided function into the else path of SPLIT_REND_WITH_HEAD_ROT_PARAMBIN
parent 91807c4e
Loading
Loading
Loading
Loading
Loading
+412 −0
Original line number Diff line number Diff line
@@ -1018,6 +1018,7 @@ static void ivas_dirac_dec_decorrelate_slot(
    return;
}

#ifdef SPLIT_REND_WITH_HEAD_ROT_PARAMBIN
static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices(
    DIRAC_DEC_BIN_HANDLE hDiracDecBin,
    SPAT_PARAM_REND_COMMON_DATA_HANDLE hSpatParamRendCom,
@@ -1472,7 +1473,418 @@ static void ivas_dirac_dec_binaural_formulate_target_covariance_matrices(

    return;
}
#else
static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matrices(
    DIRAC_DEC_BIN_HANDLE hDiracDecBin,
    SPAT_PARAM_REND_COMMON_DATA_HANDLE hSpatParamRendCom,
    PARAMBIN_REND_CONFIG_HANDLE hConfig,
    float inRe[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX],
    float inIm[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX],
    float Rmat[3][3],
    const int16_t subframe,
    const int16_t isHeadtracked )
{
    int16_t ch, slot, bin;
    int16_t separateCenterChannelRendering;
    int16_t nBins, idx;
    float frameMeanDiffusenessEneWeight[CLDFB_NO_CHANNELS_MAX];
    float IIReneLimiterFactor;
    float qualityBasedSmFactor;
    float lowBitRateEQ[CLDFB_NO_CHANNELS_MAX];
    uint8_t applyLowBitRateEQ;
    int16_t dirac_read_idx;
    float subFrameTotalEne[CLDFB_NO_CHANNELS_MAX];
    PARAMBIN_HRTF_GAIN_CACHE gainCache[MAX_GAIN_CACHE_SIZE];
    IVAS_FORMAT ivas_format;
    MC_MODE mc_mode;
    int32_t ivas_total_brate;
    int16_t nchan_transport;

    separateCenterChannelRendering = hConfig->separateCenterChannelRendering;
    ivas_format = hConfig->ivas_format;
    mc_mode = hConfig->mc_mode;
    ivas_total_brate = hConfig->ivas_total_brate;
    nchan_transport = hConfig->nchan_transport;
    qualityBasedSmFactor = hConfig->qualityBasedSmFactor;
    qualityBasedSmFactor *= qualityBasedSmFactor;
    nBins = hSpatParamRendCom->num_freq_bands; /* Actually bins */

    set_zero( hDiracDecBin->ChCrossRe, nBins );
    set_zero( hDiracDecBin->ChCrossIm, nBins );
    set_zero( hDiracDecBin->ChCrossReOut, nBins );
    set_zero( hDiracDecBin->ChCrossImOut, nBins );
    for ( ch = 0; ch < BINAURAL_CHANNELS; ch++ )
    {
        set_zero( hDiracDecBin->ChEne[ch], nBins );
        set_zero( hDiracDecBin->ChEneOut[ch], nBins );
    }
    set_zero( hDiracDecBin->frameMeanDiffuseness, nBins );

    set_zero( frameMeanDiffusenessEneWeight, CLDFB_NO_CHANNELS_MAX );

    for ( idx = 0; idx < MAX_GAIN_CACHE_SIZE; idx++ )
    {
        gainCache[idx].azi = -1000; /* Use -1000 as value for uninitialized cache. */
    }

    /* Determine EQ for low bit rates (13.2 and 16.4 kbps) */
    applyLowBitRateEQ = 0;
    if ( ( ivas_format == MASA_FORMAT || ivas_format == MC_FORMAT ) && ivas_total_brate < MASA_STEREO_MIN_BITRATE )
    {
        applyLowBitRateEQ = 1;
        if ( ivas_total_brate == IVAS_16k4 )
        {
            for ( bin = 0; bin < LOW_BIT_RATE_BINAURAL_EQ_BINS; bin++ )
            {
                lowBitRateEQ[bin + LOW_BIT_RATE_BINAURAL_EQ_OFFSET] = lowBitRateBinauralEQ[bin] * 0.5f + 0.5f;
            }
        }
        else
        {
            for ( bin = 0; bin < LOW_BIT_RATE_BINAURAL_EQ_BINS; bin++ )
            {
                lowBitRateEQ[bin + LOW_BIT_RATE_BINAURAL_EQ_OFFSET] = lowBitRateBinauralEQ[bin];
            }
        }
    }

    /* Formulate input and target covariance matrices for this subframe */
    set_zero( subFrameTotalEne, CLDFB_NO_CHANNELS_MAX );
    dirac_read_idx = hSpatParamRendCom->render_to_md_map[subframe];

    /* Calculate input covariance matrix */
    for ( slot = 0; slot < hSpatParamRendCom->subframe_nbslots[subframe]; slot++ )
    {
        for ( bin = 0; bin < nBins; bin++ )
        {
            for ( ch = 0; ch < BINAURAL_CHANNELS; ch++ )
            {
                float instEne;

                instEne = ( inRe[ch][slot][bin] * inRe[ch][slot][bin] );
                instEne += ( inIm[ch][slot][bin] * inIm[ch][slot][bin] );
                hDiracDecBin->ChEne[ch][bin] += instEne;
                subFrameTotalEne[bin] += instEne;
            }
            hDiracDecBin->ChCrossRe[bin] += inRe[0][slot][bin] * inRe[1][slot][bin];
            hDiracDecBin->ChCrossRe[bin] += inIm[0][slot][bin] * inIm[1][slot][bin];
            hDiracDecBin->ChCrossIm[bin] += inRe[0][slot][bin] * inIm[1][slot][bin];
            hDiracDecBin->ChCrossIm[bin] -= inIm[0][slot][bin] * inRe[1][slot][bin];
        }
    }

    /* Apply EQ at low bit rates */
    if ( applyLowBitRateEQ )
    {
        int16_t lastEqBin = LOW_BIT_RATE_BINAURAL_EQ_OFFSET + LOW_BIT_RATE_BINAURAL_EQ_BINS - 1;

        for ( bin = LOW_BIT_RATE_BINAURAL_EQ_OFFSET; bin < lastEqBin; bin++ )
        {
            subFrameTotalEne[bin] *= lowBitRateEQ[bin];
        }
        for ( ; bin < nBins; bin++ )
        {
            subFrameTotalEne[bin] *= lowBitRateEQ[lastEqBin];
        }
    }

    if ( ivas_format == SBA_FORMAT && nchan_transport == 2 )
    {
        float tempRe, tempIm;

        set_zero( subFrameTotalEne, CLDFB_NO_CHANNELS_MAX );

        for ( slot = 0; slot < hSpatParamRendCom->subframe_nbslots[subframe]; slot++ )
        {
            for ( bin = 0; bin < nBins; bin++ )
            {
                tempRe = inRe[0][slot][bin] + inRe[1][slot][bin];
                tempIm = inIm[0][slot][bin] + inIm[1][slot][bin];
                subFrameTotalEne[bin] += tempRe * tempRe + tempIm * tempIm;
            }
        }
    }

    /* Determine target covariance matrix containing target binaural properties */
    for ( bin = 0; bin < nBins; bin++ )
    {
        float diffuseness = 1.0f;              /* ratio1 and ratio2 are subtracted from diffuseness further below */
        float surCoh = 0.0f, spreadCoh = 0.0f; /* Default values if spreadSurroundCoherenceApplied == false */
        float diffEne, dirEne, meanEnePerCh;
        int16_t dirIndex;

        /* When BINAURAL_ROOM is not indicated, hBinaural->earlyPartEneCorrection[bin] values are all 1.0f.
         * When BINAURAL_ROOM is indicated, the binaural audio output is based on combined use of the
         * HRTF data set and a BRIR-based data set. The HRTF data set is spectrally corrected to match
         * the early spectrum of the BRIR data, using the spectral correction data in
         * hBinaural->earlyPartEneCorrection[bin], based on the BRIR set. */
        meanEnePerCh = hDiracDecBin->earlyPartEneCorrection[bin] * subFrameTotalEne[bin] / 2.0f;

        /* Determine direct part target covariance matrix (for 1 or 2 directions) */
        for ( dirIndex = 0; dirIndex < hSpatParamRendCom->numSimultaneousDirections; dirIndex++ )
        {
            int16_t aziDeg, eleDeg;
            float lRealp, lImagp, rRealp, rImagp;
            float lRealpTmp, lImagpTmp, rRealpTmp, rImagpTmp;
            float hrtfEne[BINAURAL_CHANNELS], hrtfCrossRe, hrtfCrossIm, ratio;

            if ( dirIndex == 0 ) /* For first of the two simultaneous directions */
            {
                aziDeg = hSpatParamRendCom->azimuth[dirac_read_idx][bin];
                eleDeg = hSpatParamRendCom->elevation[dirac_read_idx][bin];
                ratio = hSpatParamRendCom->energy_ratio1[dirac_read_idx][bin];
                spreadCoh = hSpatParamRendCom->spreadCoherence[dirac_read_idx][bin];
            }
            else /* For second of the two simultaneous directions */
            {
                if ( ( ratio = hSpatParamRendCom->energy_ratio2[dirac_read_idx][bin] ) < 0.001 )
                {
                    /* This touches only MASA path where second direction always has smaller ratio and
                     * for non-2dir it is zero. As the whole direction contribution is multiplied with
                     * the ratio, a very small ratio does not contribute any energy to output. Thus,
                     * it is better to save complexity. */
                    continue;
                }
                aziDeg = hSpatParamRendCom->azimuth2[dirac_read_idx][bin];
                eleDeg = hSpatParamRendCom->elevation2[dirac_read_idx][bin];
                spreadCoh = hSpatParamRendCom->spreadCoherence2[dirac_read_idx][bin];
            }
            diffuseness -= ratio; /* diffuseness = 1 - ratio1 - ratio2 */

            if ( separateCenterChannelRendering )
            {
                /* In masa + mono rendering mode, the center directions originate from phantom sources, so the
                 * spread coherence is increased */
                float aziRad, eleRad, doaVectorX, spatialAngleDeg, altSpreadCoh;

                aziRad = (float) aziDeg * PI_OVER_180;
                eleRad = (float) eleDeg * PI_OVER_180;
                doaVectorX = cosf( aziRad ) * cosf( eleRad );
                spatialAngleDeg = acosf( doaVectorX ) * _180_OVER_PI;
                altSpreadCoh = 1.0f - ( spatialAngleDeg / 30.0f );
                spreadCoh = max( spreadCoh, altSpreadCoh );
            }

            getDirectPartGains( bin, aziDeg, eleDeg, &lRealp, &lImagp, &rRealp, &rImagp, hDiracDecBin->renderStereoOutputInsteadOfBinaural, Rmat, &gainCache[( dirIndex * 3 )], isHeadtracked );

            if ( hDiracDecBin->renderStereoOutputInsteadOfBinaural )
            {
                /* Synthesizing spread coherence is not needed for stereo loudspeaker output,
                 * as directional sound is reproduced with two loudspeakers in any case */
                spreadCoh = 0.0f;
            }

            if ( spreadCoh > 0.0f )
            {
                float centerMul, sidesMul;
                float hrtfEneCenter, hrtfEneSides, hrtfEneRealized, eneCorrectionFactor;
                float w1, w2, w3, eq;

                hrtfEneCenter = ( lRealp * lRealp ) + ( lImagp * lImagp ) + ( rRealp * rRealp ) + ( rImagp * rImagp );

                /* Spread coherence is synthesized as coherent sources at 30 degree horizontal spacing.
                 * The following formulas determine the gains for these sources.
                 * spreadCoh = 0: Only panning
                 * spreadCoh = 0.5: Three sources coherent panning (e.g. 30 0 -30 deg azi)
                 * spreadCoh = 1.0: Two sources coherent panning with gap (as above, but center is silent) */
                if ( spreadCoh < 0.5f )
                {
                    /* 0.0f < spreadCoh < 0.5f */
                    sidesMul = 0.5774f * spreadCoh * 2.0f; /* sqrt(1/3) = 0.5774f */
                    centerMul = 1.0f - ( spreadCoh * 2.0f ) + sidesMul;
                }
                else
                {
                    /* 0.5f <= spreadCoh < 1.0f */
                    centerMul = 2.0f - ( 2.0f * spreadCoh );
                    sidesMul = inv_sqrt( centerMul + 2.0f );
                    centerMul *= sidesMul;
                }

                /* Apply the gain for the center source of the three coherent sources */
                lRealp *= centerMul;
                lImagp *= centerMul;
                rRealp *= centerMul;
                rImagp *= centerMul;

                /* Apply the gain for the left source of the three coherent sources */
                getDirectPartGains( bin, aziDeg + 30, eleDeg, &lRealpTmp, &lImagpTmp, &rRealpTmp, &rImagpTmp, hDiracDecBin->renderStereoOutputInsteadOfBinaural, Rmat, &gainCache[( dirIndex * 3 + 1 )], isHeadtracked );

                hrtfEneSides = ( lRealpTmp * lRealpTmp ) + ( lImagpTmp * lImagpTmp ) + ( rRealpTmp * rRealpTmp ) + ( rImagpTmp * rImagpTmp );
                lRealp += sidesMul * lRealpTmp;
                lImagp += sidesMul * lImagpTmp;
                rRealp += sidesMul * rRealpTmp;
                rImagp += sidesMul * rImagpTmp;

                /* Apply the gain for the right source of the three coherent sources.
                 * -30 degrees to 330 wrapping due to internal functions. */
                getDirectPartGains( bin, aziDeg + 330, eleDeg, &lRealpTmp, &lImagpTmp, &rRealpTmp, &rImagpTmp, hDiracDecBin->renderStereoOutputInsteadOfBinaural, Rmat, &gainCache[( dirIndex * 3 + 2 )], isHeadtracked );

                hrtfEneSides += ( lRealpTmp * lRealpTmp ) + ( lImagpTmp * lImagpTmp ) + ( rRealpTmp * rRealpTmp ) + ( rImagpTmp * rImagpTmp );
                lRealp += sidesMul * lRealpTmp;
                lImagp += sidesMul * lImagpTmp;
                rRealp += sidesMul * rRealpTmp;
                rImagp += sidesMul * rImagpTmp;

                /* Formulate an eneCorrectionFactor that compensates for the coherent summation of the HRTFs */
                hrtfEneRealized = ( lRealp * lRealp ) + ( lImagp * lImagp ) + ( rRealp * rRealp ) + ( rImagp * rImagp );
                eneCorrectionFactor = ( ( hrtfEneSides * sidesMul * sidesMul ) +
                                        ( hrtfEneCenter * centerMul * centerMul ) ) /
                                      max( 1e-12f, hrtfEneRealized );

                /* Weighting factors to determine appropriate target spectrum for spread coherent sound */
                if ( spreadCoh < 0.5 )
                {
                    w1 = 1.0f - 2.0f * spreadCoh;
                    w2 = 2.0f * spreadCoh;
                    w3 = 0.0f;
                }
                else
                {
                    w1 = 0.0f;
                    w2 = 2.0f - 2.0f * spreadCoh;
                    w3 = 2.0f * spreadCoh - 1.0f;
                }

                if ( ( ivas_format == MC_FORMAT && mc_mode == MC_MODE_MCMASA ) )
                {
                    idx = min( bin, MASA_NUM_DEFINED_SUR_SPR_COH_ENE_BINS - 1 );

                    /* Apply the target spectrum to the eneCorrectionFactor */
                    if ( separateCenterChannelRendering ) /* spreadCoh mostly originates from phantom sources in separate channel rendering mode */
                    {
                        eneCorrectionFactor *= w1 * 1.0f + ( w2 + w3 ) * spreadCohEne1[idx];
                    }
                    else
                    {
                        eneCorrectionFactor *= w1 * 1.0f + w2 * spreadCohEne05[idx] + w3 * spreadCohEne1[idx];
                    }
                }

                /* Equalize the spread coherent combined HRTFs */
                eq = min( 4.0f, sqrtf( eneCorrectionFactor ) );
                lRealp *= eq;
                lImagp *= eq;
                rRealp *= eq;
                rImagp *= eq;
            }

            hrtfEne[0] = ( lRealp * lRealp ) + ( lImagp * lImagp );
            hrtfEne[1] = ( rRealp * rRealp ) + ( rImagp * rImagp );
            hrtfCrossRe = ( lRealp * rRealp ) + ( lImagp * rImagp );
            hrtfCrossIm = ( -lImagp * rRealp ) + ( lRealp * rImagp );

            /* Add direct part (1 or 2) covariance matrix */
            dirEne = ratio * meanEnePerCh;
            hDiracDecBin->ChEneOut[0][bin] += dirEne * hrtfEne[0]; /* Dir ene part*/
            hDiracDecBin->ChEneOut[1][bin] += dirEne * hrtfEne[1];
            hDiracDecBin->ChCrossReOut[bin] += dirEne * hrtfCrossRe; /* Dir cross re */
            hDiracDecBin->ChCrossImOut[bin] += dirEne * hrtfCrossIm; /* Dir cross im */
        }

        /* Add diffuse / ambient part covariance matrix */
        diffuseness = max( 0.0f, diffuseness );
        diffEne = diffuseness * meanEnePerCh;
        surCoh = hSpatParamRendCom->surroundingCoherence[dirac_read_idx][bin];
        if ( ( ivas_format == MC_FORMAT && mc_mode == MC_MODE_MCMASA ) )
        {
            if ( !hDiracDecBin->renderStereoOutputInsteadOfBinaural )
            {
                idx = min( bin, MASA_NUM_DEFINED_SUR_SPR_COH_ENE_BINS - 1 );
                /* Apply target spectrum that emphasizes low frequencies when the sound is surround coherent */
                diffEne *= ( 1.0f - surCoh ) + surCoh * surCohEne[idx];
            }
        }
        hDiracDecBin->ChEneOut[0][bin] += diffEne; /* Diff ene part*/
        hDiracDecBin->ChEneOut[1][bin] += diffEne;

        if ( hDiracDecBin->renderStereoOutputInsteadOfBinaural )
        {
            /* When rendering stereo, ambience (except for surround coherent sound) has zero ICC. */
            hDiracDecBin->ChCrossReOut[bin] += surCoh * diffEne;
        }
        else /* When rendering binaural, ambience has frequency dependent ICC. */
        {
            if ( ivas_format == SBA_FORMAT && bin < BINAURAL_COHERENCE_DIFFERENCE_BINS )
            {
                float diffuseFieldCoherence;
                diffuseFieldCoherence = hDiracDecBin->hDiffuseDist->diffuseRatioX[bin] * hDiracDecBin->diffuseFieldCoherenceX[bin] + hDiracDecBin->hDiffuseDist->diffuseRatioY[bin] * hDiracDecBin->diffuseFieldCoherenceY[bin] + hDiracDecBin->hDiffuseDist->diffuseRatioZ[bin] * hDiracDecBin->diffuseFieldCoherenceZ[bin];
                hDiracDecBin->ChCrossReOut[bin] += ( ( 1.0f - surCoh ) * diffuseFieldCoherence + surCoh ) * diffEne;
            }
            else
            {
                hDiracDecBin->ChCrossReOut[bin] += ( ( 1.0f - surCoh ) * hDiracDecBin->diffuseFieldCoherence[bin] + surCoh ) * diffEne;
            }
        }

        /* Store parameters for formulating average diffuseness over frame */
        hDiracDecBin->frameMeanDiffuseness[bin] += diffEne;
        frameMeanDiffusenessEneWeight[bin] += meanEnePerCh;
    }

    /* Formulate average diffuseness over frame */
    for ( bin = 0; bin < nBins; bin++ )
    {
        hDiracDecBin->frameMeanDiffuseness[bin] /= fmaxf( 1e-12f, frameMeanDiffusenessEneWeight[bin] );
    }

    /* Temporal IIR-type smoothing of covariance matrices. Also apply encoding quality based smoothing factor. */
    if ( ivas_format == MASA_FORMAT && ivas_total_brate < MASA_STEREO_MIN_BITRATE )
    {
        IIReneLimiterFactor = 16.0f + ( 1.0f - qualityBasedSmFactor );
    }
    else
    {
        IIReneLimiterFactor = 8.0f + ( 1.0f - qualityBasedSmFactor );
    }
    for ( bin = 0; bin < nBins; bin++ )
    {
        float eneRatio, IIReneLimiter;

        /* Temporally smooth cov mtx estimates for resulting mixing matrix stability. The design principle is that
         * the energy history (IIR) must not be more than double of the current frame energy. This provides more
         * robust performance at energy offsets when compared to typical IIR averaging. */
        eneRatio = ( hDiracDecBin->ChEne[0][bin] + hDiracDecBin->ChEne[1][bin] ) / fmaxf( 1e-12f, ( hDiracDecBin->ChEnePrev[0][bin] + hDiracDecBin->ChEnePrev[1][bin] ) );
        IIReneLimiter = fminf( 1.0f, eneRatio * IIReneLimiterFactor );

        hDiracDecBin->ChCrossRe[bin] *= qualityBasedSmFactor;
        hDiracDecBin->ChCrossIm[bin] *= qualityBasedSmFactor;
        hDiracDecBin->ChCrossReOut[bin] *= qualityBasedSmFactor;
        hDiracDecBin->ChCrossImOut[bin] *= qualityBasedSmFactor;

        for ( ch = 0; ch < BINAURAL_CHANNELS; ch++ )
        {
            hDiracDecBin->ChEne[ch][bin] *= qualityBasedSmFactor;
            hDiracDecBin->ChEneOut[ch][bin] *= qualityBasedSmFactor;
        }

        hDiracDecBin->ChCrossRe[bin] += IIReneLimiter * hDiracDecBin->ChCrossRePrev[bin];
        hDiracDecBin->ChCrossIm[bin] += IIReneLimiter * hDiracDecBin->ChCrossImPrev[bin];
        hDiracDecBin->ChCrossReOut[bin] += IIReneLimiter * hDiracDecBin->ChCrossReOutPrev[bin];
        hDiracDecBin->ChCrossImOut[bin] += IIReneLimiter * hDiracDecBin->ChCrossImOutPrev[bin];

        for ( ch = 0; ch < BINAURAL_CHANNELS; ch++ )
        {
            hDiracDecBin->ChEne[ch][bin] += IIReneLimiter * hDiracDecBin->ChEnePrev[ch][bin];
            hDiracDecBin->ChEneOut[ch][bin] += IIReneLimiter * hDiracDecBin->ChEneOutPrev[ch][bin];
        }

        /* Store energy values and coefficients for next round */
        hDiracDecBin->ChCrossRePrev[bin] = hDiracDecBin->ChCrossRe[bin];
        hDiracDecBin->ChCrossImPrev[bin] = hDiracDecBin->ChCrossIm[bin];
        hDiracDecBin->ChCrossReOutPrev[bin] = hDiracDecBin->ChCrossReOut[bin];
        hDiracDecBin->ChCrossImOutPrev[bin] = hDiracDecBin->ChCrossImOut[bin];

        for ( ch = 0; ch < BINAURAL_CHANNELS; ch++ )
        {
            hDiracDecBin->ChEnePrev[ch][bin] = hDiracDecBin->ChEne[ch][bin];
            hDiracDecBin->ChEneOutPrev[ch][bin] = hDiracDecBin->ChEneOut[ch][bin];
        }
    }

    return;
}
#endif

static void ivas_dirac_dec_binaural_determine_processing_matrices(
    DIRAC_DEC_BIN_HANDLE hDiracDecBin,