added the implementation of the divided function into the else path of... (7796d2e8) · Commits · IVAS Codec Public Collaboration / IVAS Codec

lib_rend/ivas_dirac_dec_binaural_functions.c

+412 −0

Original line number	Diff line number	Diff line
		@@ -1018,6 +1018,7 @@ static void ivas_dirac_dec_decorrelate_slot(
		return;
		}

		#ifdef SPLIT_REND_WITH_HEAD_ROT_PARAMBIN
		static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices(
		DIRAC_DEC_BIN_HANDLE hDiracDecBin,
		SPAT_PARAM_REND_COMMON_DATA_HANDLE hSpatParamRendCom,
		@@ -1472,7 +1473,418 @@ static void ivas_dirac_dec_binaural_formulate_target_covariance_matrices(

		return;
		}
		#else
		static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matrices(
		DIRAC_DEC_BIN_HANDLE hDiracDecBin,
		SPAT_PARAM_REND_COMMON_DATA_HANDLE hSpatParamRendCom,
		PARAMBIN_REND_CONFIG_HANDLE hConfig,
		float inRe[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX],
		float inIm[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX],
		float Rmat[3][3],
		const int16_t subframe,
		const int16_t isHeadtracked )
		{
		int16_t ch, slot, bin;
		int16_t separateCenterChannelRendering;
		int16_t nBins, idx;
		float frameMeanDiffusenessEneWeight[CLDFB_NO_CHANNELS_MAX];
		float IIReneLimiterFactor;
		float qualityBasedSmFactor;
		float lowBitRateEQ[CLDFB_NO_CHANNELS_MAX];
		uint8_t applyLowBitRateEQ;
		int16_t dirac_read_idx;
		float subFrameTotalEne[CLDFB_NO_CHANNELS_MAX];
		PARAMBIN_HRTF_GAIN_CACHE gainCache[MAX_GAIN_CACHE_SIZE];
		IVAS_FORMAT ivas_format;
		MC_MODE mc_mode;
		int32_t ivas_total_brate;
		int16_t nchan_transport;

		separateCenterChannelRendering = hConfig->separateCenterChannelRendering;
		ivas_format = hConfig->ivas_format;
		mc_mode = hConfig->mc_mode;
		ivas_total_brate = hConfig->ivas_total_brate;
		nchan_transport = hConfig->nchan_transport;
		qualityBasedSmFactor = hConfig->qualityBasedSmFactor;
		qualityBasedSmFactor *= qualityBasedSmFactor;
		nBins = hSpatParamRendCom->num_freq_bands; /* Actually bins */

		set_zero( hDiracDecBin->ChCrossRe, nBins );
		set_zero( hDiracDecBin->ChCrossIm, nBins );
		set_zero( hDiracDecBin->ChCrossReOut, nBins );
		set_zero( hDiracDecBin->ChCrossImOut, nBins );
		for ( ch = 0; ch < BINAURAL_CHANNELS; ch++ )
		{
		set_zero( hDiracDecBin->ChEne[ch], nBins );
		set_zero( hDiracDecBin->ChEneOut[ch], nBins );
		}
		set_zero( hDiracDecBin->frameMeanDiffuseness, nBins );

		set_zero( frameMeanDiffusenessEneWeight, CLDFB_NO_CHANNELS_MAX );

		for ( idx = 0; idx < MAX_GAIN_CACHE_SIZE; idx++ )
		{
		gainCache[idx].azi = -1000; /* Use -1000 as value for uninitialized cache. */
		}

		/* Determine EQ for low bit rates (13.2 and 16.4 kbps) */
		applyLowBitRateEQ = 0;
		if ( ( ivas_format == MASA_FORMAT \|\| ivas_format == MC_FORMAT ) && ivas_total_brate < MASA_STEREO_MIN_BITRATE )
		{
		applyLowBitRateEQ = 1;
		if ( ivas_total_brate == IVAS_16k4 )
		{
		for ( bin = 0; bin < LOW_BIT_RATE_BINAURAL_EQ_BINS; bin++ )
		{
		lowBitRateEQ[bin + LOW_BIT_RATE_BINAURAL_EQ_OFFSET] = lowBitRateBinauralEQ[bin] * 0.5f + 0.5f;
		}
		}
		else
		{
		for ( bin = 0; bin < LOW_BIT_RATE_BINAURAL_EQ_BINS; bin++ )
		{
		lowBitRateEQ[bin + LOW_BIT_RATE_BINAURAL_EQ_OFFSET] = lowBitRateBinauralEQ[bin];
		}
		}
		}

		/* Formulate input and target covariance matrices for this subframe */
		set_zero( subFrameTotalEne, CLDFB_NO_CHANNELS_MAX );
		dirac_read_idx = hSpatParamRendCom->render_to_md_map[subframe];

		/* Calculate input covariance matrix */
		for ( slot = 0; slot < hSpatParamRendCom->subframe_nbslots[subframe]; slot++ )
		{
		for ( bin = 0; bin < nBins; bin++ )
		{
		for ( ch = 0; ch < BINAURAL_CHANNELS; ch++ )
		{
		float instEne;

		instEne = ( inRe[ch][slot][bin] * inRe[ch][slot][bin] );
		instEne += ( inIm[ch][slot][bin] * inIm[ch][slot][bin] );
		hDiracDecBin->ChEne[ch][bin] += instEne;
		subFrameTotalEne[bin] += instEne;
		}
		hDiracDecBin->ChCrossRe[bin] += inRe[0][slot][bin] * inRe[1][slot][bin];
		hDiracDecBin->ChCrossRe[bin] += inIm[0][slot][bin] * inIm[1][slot][bin];
		hDiracDecBin->ChCrossIm[bin] += inRe[0][slot][bin] * inIm[1][slot][bin];
		hDiracDecBin->ChCrossIm[bin] -= inIm[0][slot][bin] * inRe[1][slot][bin];
		}
		}

		/* Apply EQ at low bit rates */
		if ( applyLowBitRateEQ )
		{
		int16_t lastEqBin = LOW_BIT_RATE_BINAURAL_EQ_OFFSET + LOW_BIT_RATE_BINAURAL_EQ_BINS - 1;

		for ( bin = LOW_BIT_RATE_BINAURAL_EQ_OFFSET; bin < lastEqBin; bin++ )
		{
		subFrameTotalEne[bin] *= lowBitRateEQ[bin];
		}
		for ( ; bin < nBins; bin++ )
		{
		subFrameTotalEne[bin] *= lowBitRateEQ[lastEqBin];
		}
		}

		if ( ivas_format == SBA_FORMAT && nchan_transport == 2 )
		{
		float tempRe, tempIm;

		set_zero( subFrameTotalEne, CLDFB_NO_CHANNELS_MAX );

		for ( slot = 0; slot < hSpatParamRendCom->subframe_nbslots[subframe]; slot++ )
		{
		for ( bin = 0; bin < nBins; bin++ )
		{
		tempRe = inRe[0][slot][bin] + inRe[1][slot][bin];
		tempIm = inIm[0][slot][bin] + inIm[1][slot][bin];
		subFrameTotalEne[bin] += tempRe * tempRe + tempIm * tempIm;
		}
		}
		}

		/* Determine target covariance matrix containing target binaural properties */
		for ( bin = 0; bin < nBins; bin++ )
		{
		float diffuseness = 1.0f; /* ratio1 and ratio2 are subtracted from diffuseness further below */
		float surCoh = 0.0f, spreadCoh = 0.0f; /* Default values if spreadSurroundCoherenceApplied == false */
		float diffEne, dirEne, meanEnePerCh;
		int16_t dirIndex;

		/* When BINAURAL_ROOM is not indicated, hBinaural->earlyPartEneCorrection[bin] values are all 1.0f.
		* When BINAURAL_ROOM is indicated, the binaural audio output is based on combined use of the
		* HRTF data set and a BRIR-based data set. The HRTF data set is spectrally corrected to match
		* the early spectrum of the BRIR data, using the spectral correction data in
		* hBinaural->earlyPartEneCorrection[bin], based on the BRIR set. */
		meanEnePerCh = hDiracDecBin->earlyPartEneCorrection[bin] * subFrameTotalEne[bin] / 2.0f;

		/* Determine direct part target covariance matrix (for 1 or 2 directions) */
		for ( dirIndex = 0; dirIndex < hSpatParamRendCom->numSimultaneousDirections; dirIndex++ )
		{
		int16_t aziDeg, eleDeg;
		float lRealp, lImagp, rRealp, rImagp;
		float lRealpTmp, lImagpTmp, rRealpTmp, rImagpTmp;
		float hrtfEne[BINAURAL_CHANNELS], hrtfCrossRe, hrtfCrossIm, ratio;

		if ( dirIndex == 0 ) /* For first of the two simultaneous directions */
		{
		aziDeg = hSpatParamRendCom->azimuth[dirac_read_idx][bin];
		eleDeg = hSpatParamRendCom->elevation[dirac_read_idx][bin];
		ratio = hSpatParamRendCom->energy_ratio1[dirac_read_idx][bin];
		spreadCoh = hSpatParamRendCom->spreadCoherence[dirac_read_idx][bin];
		}
		else /* For second of the two simultaneous directions */
		{
		if ( ( ratio = hSpatParamRendCom->energy_ratio2[dirac_read_idx][bin] ) < 0.001 )
		{
		/* This touches only MASA path where second direction always has smaller ratio and
		* for non-2dir it is zero. As the whole direction contribution is multiplied with
		* the ratio, a very small ratio does not contribute any energy to output. Thus,
		* it is better to save complexity. */
		continue;
		}
		aziDeg = hSpatParamRendCom->azimuth2[dirac_read_idx][bin];
		eleDeg = hSpatParamRendCom->elevation2[dirac_read_idx][bin];
		spreadCoh = hSpatParamRendCom->spreadCoherence2[dirac_read_idx][bin];
		}
		diffuseness -= ratio; /* diffuseness = 1 - ratio1 - ratio2 */

		if ( separateCenterChannelRendering )
		{
		/* In masa + mono rendering mode, the center directions originate from phantom sources, so the
		* spread coherence is increased */
		float aziRad, eleRad, doaVectorX, spatialAngleDeg, altSpreadCoh;

		aziRad = (float) aziDeg * PI_OVER_180;
		eleRad = (float) eleDeg * PI_OVER_180;
		doaVectorX = cosf( aziRad ) * cosf( eleRad );
		spatialAngleDeg = acosf( doaVectorX ) * _180_OVER_PI;
		altSpreadCoh = 1.0f - ( spatialAngleDeg / 30.0f );
		spreadCoh = max( spreadCoh, altSpreadCoh );
		}

		getDirectPartGains( bin, aziDeg, eleDeg, &lRealp, &lImagp, &rRealp, &rImagp, hDiracDecBin->renderStereoOutputInsteadOfBinaural, Rmat, &gainCache[( dirIndex * 3 )], isHeadtracked );

		if ( hDiracDecBin->renderStereoOutputInsteadOfBinaural )
		{
		/* Synthesizing spread coherence is not needed for stereo loudspeaker output,
		* as directional sound is reproduced with two loudspeakers in any case */
		spreadCoh = 0.0f;
		}

		if ( spreadCoh > 0.0f )
		{
		float centerMul, sidesMul;
		float hrtfEneCenter, hrtfEneSides, hrtfEneRealized, eneCorrectionFactor;
		float w1, w2, w3, eq;

		hrtfEneCenter = ( lRealp * lRealp ) + ( lImagp * lImagp ) + ( rRealp * rRealp ) + ( rImagp * rImagp );

		/* Spread coherence is synthesized as coherent sources at 30 degree horizontal spacing.
		* The following formulas determine the gains for these sources.
		* spreadCoh = 0: Only panning
		* spreadCoh = 0.5: Three sources coherent panning (e.g. 30 0 -30 deg azi)
		* spreadCoh = 1.0: Two sources coherent panning with gap (as above, but center is silent) */
		if ( spreadCoh < 0.5f )
		{
		/* 0.0f < spreadCoh < 0.5f */
		sidesMul = 0.5774f * spreadCoh * 2.0f; /* sqrt(1/3) = 0.5774f */
		centerMul = 1.0f - ( spreadCoh * 2.0f ) + sidesMul;
		}
		else
		{
		/* 0.5f <= spreadCoh < 1.0f */
		centerMul = 2.0f - ( 2.0f * spreadCoh );
		sidesMul = inv_sqrt( centerMul + 2.0f );
		centerMul *= sidesMul;
		}

		/* Apply the gain for the center source of the three coherent sources */
		lRealp *= centerMul;
		lImagp *= centerMul;
		rRealp *= centerMul;
		rImagp *= centerMul;

		/* Apply the gain for the left source of the three coherent sources */
		getDirectPartGains( bin, aziDeg + 30, eleDeg, &lRealpTmp, &lImagpTmp, &rRealpTmp, &rImagpTmp, hDiracDecBin->renderStereoOutputInsteadOfBinaural, Rmat, &gainCache[( dirIndex * 3 + 1 )], isHeadtracked );

		hrtfEneSides = ( lRealpTmp * lRealpTmp ) + ( lImagpTmp * lImagpTmp ) + ( rRealpTmp * rRealpTmp ) + ( rImagpTmp * rImagpTmp );
		lRealp += sidesMul * lRealpTmp;
		lImagp += sidesMul * lImagpTmp;
		rRealp += sidesMul * rRealpTmp;
		rImagp += sidesMul * rImagpTmp;

		/* Apply the gain for the right source of the three coherent sources.
		* -30 degrees to 330 wrapping due to internal functions. */
		getDirectPartGains( bin, aziDeg + 330, eleDeg, &lRealpTmp, &lImagpTmp, &rRealpTmp, &rImagpTmp, hDiracDecBin->renderStereoOutputInsteadOfBinaural, Rmat, &gainCache[( dirIndex * 3 + 2 )], isHeadtracked );

		hrtfEneSides += ( lRealpTmp * lRealpTmp ) + ( lImagpTmp * lImagpTmp ) + ( rRealpTmp * rRealpTmp ) + ( rImagpTmp * rImagpTmp );
		lRealp += sidesMul * lRealpTmp;
		lImagp += sidesMul * lImagpTmp;
		rRealp += sidesMul * rRealpTmp;
		rImagp += sidesMul * rImagpTmp;

		/* Formulate an eneCorrectionFactor that compensates for the coherent summation of the HRTFs */
		hrtfEneRealized = ( lRealp * lRealp ) + ( lImagp * lImagp ) + ( rRealp * rRealp ) + ( rImagp * rImagp );
		eneCorrectionFactor = ( ( hrtfEneSides * sidesMul * sidesMul ) +
		( hrtfEneCenter * centerMul * centerMul ) ) /
		max( 1e-12f, hrtfEneRealized );

		/* Weighting factors to determine appropriate target spectrum for spread coherent sound */
		if ( spreadCoh < 0.5 )
		{
		w1 = 1.0f - 2.0f * spreadCoh;
		w2 = 2.0f * spreadCoh;
		w3 = 0.0f;
		}
		else
		{
		w1 = 0.0f;
		w2 = 2.0f - 2.0f * spreadCoh;
		w3 = 2.0f * spreadCoh - 1.0f;
		}

		if ( ( ivas_format == MC_FORMAT && mc_mode == MC_MODE_MCMASA ) )
		{
		idx = min( bin, MASA_NUM_DEFINED_SUR_SPR_COH_ENE_BINS - 1 );

		/* Apply the target spectrum to the eneCorrectionFactor */
		if ( separateCenterChannelRendering ) /* spreadCoh mostly originates from phantom sources in separate channel rendering mode */
		{
		eneCorrectionFactor = w1 1.0f + ( w2 + w3 ) * spreadCohEne1[idx];
		}
		else
		{
		eneCorrectionFactor = w1 1.0f + w2 * spreadCohEne05[idx] + w3 * spreadCohEne1[idx];
		}
		}

		/* Equalize the spread coherent combined HRTFs */
		eq = min( 4.0f, sqrtf( eneCorrectionFactor ) );
		lRealp *= eq;
		lImagp *= eq;
		rRealp *= eq;
		rImagp *= eq;
		}

		hrtfEne[0] = ( lRealp * lRealp ) + ( lImagp * lImagp );
		hrtfEne[1] = ( rRealp * rRealp ) + ( rImagp * rImagp );
		hrtfCrossRe = ( lRealp * rRealp ) + ( lImagp * rImagp );
		hrtfCrossIm = ( -lImagp * rRealp ) + ( lRealp * rImagp );

		/* Add direct part (1 or 2) covariance matrix */
		dirEne = ratio * meanEnePerCh;
		hDiracDecBin->ChEneOut[0][bin] += dirEne * hrtfEne[0]; /* Dir ene part*/
		hDiracDecBin->ChEneOut[1][bin] += dirEne * hrtfEne[1];
		hDiracDecBin->ChCrossReOut[bin] += dirEne * hrtfCrossRe; /* Dir cross re */
		hDiracDecBin->ChCrossImOut[bin] += dirEne * hrtfCrossIm; /* Dir cross im */
		}

		/* Add diffuse / ambient part covariance matrix */
		diffuseness = max( 0.0f, diffuseness );
		diffEne = diffuseness * meanEnePerCh;
		surCoh = hSpatParamRendCom->surroundingCoherence[dirac_read_idx][bin];
		if ( ( ivas_format == MC_FORMAT && mc_mode == MC_MODE_MCMASA ) )
		{
		if ( !hDiracDecBin->renderStereoOutputInsteadOfBinaural )
		{
		idx = min( bin, MASA_NUM_DEFINED_SUR_SPR_COH_ENE_BINS - 1 );
		/* Apply target spectrum that emphasizes low frequencies when the sound is surround coherent */
		diffEne = ( 1.0f - surCoh ) + surCoh surCohEne[idx];
		}
		}
		hDiracDecBin->ChEneOut[0][bin] += diffEne; /* Diff ene part*/
		hDiracDecBin->ChEneOut[1][bin] += diffEne;

		if ( hDiracDecBin->renderStereoOutputInsteadOfBinaural )
		{
		/* When rendering stereo, ambience (except for surround coherent sound) has zero ICC. */
		hDiracDecBin->ChCrossReOut[bin] += surCoh * diffEne;
		}
		else /* When rendering binaural, ambience has frequency dependent ICC. */
		{
		if ( ivas_format == SBA_FORMAT && bin < BINAURAL_COHERENCE_DIFFERENCE_BINS )
		{
		float diffuseFieldCoherence;
		diffuseFieldCoherence = hDiracDecBin->hDiffuseDist->diffuseRatioX[bin] * hDiracDecBin->diffuseFieldCoherenceX[bin] + hDiracDecBin->hDiffuseDist->diffuseRatioY[bin] * hDiracDecBin->diffuseFieldCoherenceY[bin] + hDiracDecBin->hDiffuseDist->diffuseRatioZ[bin] * hDiracDecBin->diffuseFieldCoherenceZ[bin];
		hDiracDecBin->ChCrossReOut[bin] += ( ( 1.0f - surCoh ) * diffuseFieldCoherence + surCoh ) * diffEne;
		}
		else
		{
		hDiracDecBin->ChCrossReOut[bin] += ( ( 1.0f - surCoh ) * hDiracDecBin->diffuseFieldCoherence[bin] + surCoh ) * diffEne;
		}
		}

		/* Store parameters for formulating average diffuseness over frame */
		hDiracDecBin->frameMeanDiffuseness[bin] += diffEne;
		frameMeanDiffusenessEneWeight[bin] += meanEnePerCh;
		}

		/* Formulate average diffuseness over frame */
		for ( bin = 0; bin < nBins; bin++ )
		{
		hDiracDecBin->frameMeanDiffuseness[bin] /= fmaxf( 1e-12f, frameMeanDiffusenessEneWeight[bin] );
		}

		/* Temporal IIR-type smoothing of covariance matrices. Also apply encoding quality based smoothing factor. */
		if ( ivas_format == MASA_FORMAT && ivas_total_brate < MASA_STEREO_MIN_BITRATE )
		{
		IIReneLimiterFactor = 16.0f + ( 1.0f - qualityBasedSmFactor );
		}
		else
		{
		IIReneLimiterFactor = 8.0f + ( 1.0f - qualityBasedSmFactor );
		}
		for ( bin = 0; bin < nBins; bin++ )
		{
		float eneRatio, IIReneLimiter;

		/* Temporally smooth cov mtx estimates for resulting mixing matrix stability. The design principle is that
		* the energy history (IIR) must not be more than double of the current frame energy. This provides more
		* robust performance at energy offsets when compared to typical IIR averaging. */
		eneRatio = ( hDiracDecBin->ChEne[0][bin] + hDiracDecBin->ChEne[1][bin] ) / fmaxf( 1e-12f, ( hDiracDecBin->ChEnePrev[0][bin] + hDiracDecBin->ChEnePrev[1][bin] ) );
		IIReneLimiter = fminf( 1.0f, eneRatio * IIReneLimiterFactor );

		hDiracDecBin->ChCrossRe[bin] *= qualityBasedSmFactor;
		hDiracDecBin->ChCrossIm[bin] *= qualityBasedSmFactor;
		hDiracDecBin->ChCrossReOut[bin] *= qualityBasedSmFactor;
		hDiracDecBin->ChCrossImOut[bin] *= qualityBasedSmFactor;

		for ( ch = 0; ch < BINAURAL_CHANNELS; ch++ )
		{
		hDiracDecBin->ChEne[ch][bin] *= qualityBasedSmFactor;
		hDiracDecBin->ChEneOut[ch][bin] *= qualityBasedSmFactor;
		}

		hDiracDecBin->ChCrossRe[bin] += IIReneLimiter * hDiracDecBin->ChCrossRePrev[bin];
		hDiracDecBin->ChCrossIm[bin] += IIReneLimiter * hDiracDecBin->ChCrossImPrev[bin];
		hDiracDecBin->ChCrossReOut[bin] += IIReneLimiter * hDiracDecBin->ChCrossReOutPrev[bin];
		hDiracDecBin->ChCrossImOut[bin] += IIReneLimiter * hDiracDecBin->ChCrossImOutPrev[bin];

		for ( ch = 0; ch < BINAURAL_CHANNELS; ch++ )
		{
		hDiracDecBin->ChEne[ch][bin] += IIReneLimiter * hDiracDecBin->ChEnePrev[ch][bin];
		hDiracDecBin->ChEneOut[ch][bin] += IIReneLimiter * hDiracDecBin->ChEneOutPrev[ch][bin];
		}

		/* Store energy values and coefficients for next round */
		hDiracDecBin->ChCrossRePrev[bin] = hDiracDecBin->ChCrossRe[bin];
		hDiracDecBin->ChCrossImPrev[bin] = hDiracDecBin->ChCrossIm[bin];
		hDiracDecBin->ChCrossReOutPrev[bin] = hDiracDecBin->ChCrossReOut[bin];
		hDiracDecBin->ChCrossImOutPrev[bin] = hDiracDecBin->ChCrossImOut[bin];

		for ( ch = 0; ch < BINAURAL_CHANNELS; ch++ )
		{
		hDiracDecBin->ChEnePrev[ch][bin] = hDiracDecBin->ChEne[ch][bin];
		hDiracDecBin->ChEneOutPrev[ch][bin] = hDiracDecBin->ChEneOut[ch][bin];
		}
		}

		return;
		}
		#endif

		static void ivas_dirac_dec_binaural_determine_processing_matrices(
		DIRAC_DEC_BIN_HANDLE hDiracDecBin,