Fix rest of issue 355 by refactoring parametric binauralizer to 5ms subframe resolution. (b696a87b) · Commits · IVAS Codec Public Collaboration / IVAS Codec

lib_com/ivas_cnst.h

+3 −0

Original line number	Diff line number	Diff line
		@@ -864,6 +864,9 @@ typedef enum {
		// VE: this should be renamed to e.g. N_SPATIAL_SUBFRAMES
		#define MAX_PARAM_SPATIAL_SUBFRAMES 4 /* Maximum number of subframes for parameteric spatial coding */
		#define L_SPATIAL_SUBFR_48k (L_FRAME48k / MAX_PARAM_SPATIAL_SUBFRAMES)
		#ifdef FIX_355_REFACTOR_PARAMBIN_TO_5MS
		#define CLDFB_SLOTS_PER_SUBFRAME ( CLDFB_NO_COL_MAX / MAX_PARAM_SPATIAL_SUBFRAMES ) /* Number of CLDFB slots per subframe */
		#endif


		/----------------------------------------------------------------------------------

lib_com/ivas_prot.h

+9 −1

Original line number	Diff line number	Diff line
		@@ -4691,13 +4691,21 @@ void ivas_masa_prerender(
		const int16_t output_frame /* i : output frame length per channel */
		);

		#ifdef FIX_355_REFACTOR_PARAMBIN_TO_5MS
		void ivas_spar_param_to_masa_param_mapping(
		Decoder_Struct st_ivas, / i/o: IVAS decoder struct */
		float inRe[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], /* i : Input audio in CLDFB domain, real */
		float inIm[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], /* i : Input audio in CLDFB domain, imag */
		const int16_t subframe /* i : Subframe to map */
		);
		#else
		void ivas_spar_param_to_masa_param_mapping(
		Decoder_Struct st_ivas, / i/o: IVAS decoder struct */
		float inRe[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i : Input audio in CLDFB domain, real */
		float inIm[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i : Input audio in CLDFB domain, imag */
		const int16_t firstSubframe, /* i : First subframe to map */
		const int16_t nSubframes /* i : Number of subframes to map */
		);
		#endif


		/---------------------------------------------------------------------------------

lib_com/options.h

+2 −0

Original line number	Diff line number	Diff line
		@@ -169,6 +169,8 @@

		#define EUALER2QUAT_FIX /Dlb :fix for issue 430 issue in euler2quat, sign of quat y is inverted/
		#define HR_METADATA /* Nok: encode directional MASA metadata with more bits at 384k and 512k */
		#define FIX_355_REFACTOR_PARAMBIN_TO_5MS /* Nokia: Fixes issue 355 by refactoring parametric binauralizer code to 5 ms mode */

		/* ################## End DEVELOPMENT switches ######################### */
		/* clang-format on */
		#endif

lib_dec/ivas_binRenderer_internal.c

+21 −1

Original line number	Diff line number	Diff line
		@@ -426,6 +426,15 @@ static ivas_error ivas_binaural_hrtf_open(
		*
		-------------------------------------------------------------------------/

		#ifdef FIX_355_REFACTOR_PARAMBIN_TO_5MS
		static void ivas_binaural_obtain_DMX(
		const int16_t numTimeSlots,
		BINAURAL_RENDERER_HANDLE hBinRenderer, /* i/o: fastconv binaural renderer handle */
		float RealBuffer[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], /* i : Contains the LS signals */
		float ImagBuffer[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], /* i : Contains the LS signals */
		float realDMX[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX],
		float imagDMX[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX] )
		#else
		static void ivas_binaural_obtain_DMX(
		const int16_t numTimeSlots,
		BINAURAL_RENDERER_HANDLE hBinRenderer, /* i/o: fastconv binaural renderer handle */
		@@ -433,6 +442,7 @@ static void ivas_binaural_obtain_DMX(
		float ImagBuffer[][MAX_PARAM_SPATIAL_SUBFRAMES][CLDFB_NO_CHANNELS_MAX], /* i : Contains the LS signals */
		float realDMX[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX],
		float imagDMX[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX] )
		#endif
		{
		int16_t chIdx, bandIdx, k;

		@@ -1029,11 +1039,17 @@ void ivas_binRenderer(
		/* Obtain the binaural dmx and compute the reverb */
		if ( hBinRenderer->hReverb != NULL )
		{
		#ifdef FIX_355_REFACTOR_PARAMBIN_TO_5MS
		float reverbRe[BINAURAL_CHANNELS][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX];
		float reverbIm[BINAURAL_CHANNELS][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX];
		float inRe[BINAURAL_CHANNELS][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX];
		float inIm[BINAURAL_CHANNELS][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX];
		#else
		float reverbRe[BINAURAL_CHANNELS][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX];
		float reverbIm[BINAURAL_CHANNELS][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX];
		float inRe[BINAURAL_CHANNELS][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX];
		float inIm[BINAURAL_CHANNELS][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX];

		#endif
		ivas_binaural_obtain_DMX( numTimeSlots, hBinRenderer, RealBuffer, ImagBuffer, inRe, inIm );

		for ( chIdx = 0; chIdx < BINAURAL_CHANNELS; chIdx++ )
		@@ -1045,7 +1061,11 @@ void ivas_binRenderer(
		}
		}

		#ifdef FIX_355_REFACTOR_PARAMBIN_TO_5MS
		ivas_binaural_reverb_processSubframe( hBinRenderer->hReverb, BINAURAL_CHANNELS, inRe, inIm, reverbRe, reverbIm );
		#else
		ivas_binaural_reverb_processFrame( hBinRenderer->hReverb, BINAURAL_CHANNELS, inRe, inIm, reverbRe, reverbIm, 0u );
		#endif

		/* Add the conv module and reverb module output */
		for ( chIdx = 0; chIdx < BINAURAL_CHANNELS; chIdx++ )

lib_dec/ivas_masa_dec.c

+207 −0

Original line number	Diff line number	Diff line
		@@ -1244,6 +1244,212 @@ ivas_error ivas_masa_dec_reconfigure(
		* Determine MASA metadata from the SPAR metadata
		-------------------------------------------------------------------/

		#ifdef FIX_355_REFACTOR_PARAMBIN_TO_5MS
		void ivas_spar_param_to_masa_param_mapping(
		Decoder_Struct st_ivas, / i/o: IVAS decoder struct */
		float inRe[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], /* i : Input audio in CLDFB domain, real */
		float inIm[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], /* i : Input audio in CLDFB domain, imag */
		const int16_t subframe /* i : Subframe to map */
		)
		{
		int16_t i, j, band, bin, slot, ch, nBins, nchan_transport;
		int16_t mixer_mat_index;
		int16_t dirac_write_idx;
		DIRAC_DEC_HANDLE hDirAC;
		DIFFUSE_DISTRIBUTION_HANDLE hDiffuseDist;
		float mixer_mat_sf_bands_real[MAX_PARAM_SPATIAL_SUBFRAMES][SPAR_DIRAC_SPLIT_START_BAND][FOA_CHANNELS][FOA_CHANNELS];
		float mixer_mat_sf_bins_real[MAX_PARAM_SPATIAL_SUBFRAMES][CLDFB_NO_CHANNELS_MAX][FOA_CHANNELS][FOA_CHANNELS];
		int16_t *band_grouping;
		int16_t band_start, band_end;
		float transportSignalEnergies[2][CLDFB_NO_CHANNELS_MAX];
		float transportSignalCrossCorrelation[CLDFB_NO_CHANNELS_MAX];
		float instEne;
		float inCovarianceMtx[FOA_CHANNELS][FOA_CHANNELS];
		float foaCovarianceMtx[FOA_CHANNELS][FOA_CHANNELS];
		float Iy, Iz, Ix, E, azi, ele, I, ratio;
		float diffuseGainX, diffuseGainY, diffuseGainZ, diffuseGainSum;

		/* Set values */
		hDirAC = st_ivas->hDirAC;
		hDirAC->numSimultaneousDirections = 1;
		hDiffuseDist = st_ivas->hDirAC->hDiffuseDist;
		nchan_transport = st_ivas->nchan_transport;
		band_grouping = hDirAC->band_grouping;
		dirac_write_idx = hDirAC->dirac_read_idx; /* Mixing matrices, from which MASA meta is determined, already have the delay compensation */

		/* Init arrays */
		for ( i = 0; i < FOA_CHANNELS; i++ )
		{
		set_zero( inCovarianceMtx[i], FOA_CHANNELS );
		}

		/* Delay the SPAR mixing matrices to have them synced with the audio */
		if ( subframe < SPAR_META_DELAY_SUBFRAMES )
		{
		mixer_mat_index = subframe + MAX_PARAM_SPATIAL_SUBFRAMES - SPAR_META_DELAY_SUBFRAMES + 1;
		for ( band = 0; band < SPAR_DIRAC_SPLIT_START_BAND; band++ )
		{
		for ( i = 0; i < FOA_CHANNELS; i++ )
		{
		for ( j = 0; j < FOA_CHANNELS; j++ )
		{
		mixer_mat_sf_bands_real[subframe][band][i][j] = st_ivas->hSpar->hMdDec->mixer_mat_prev[mixer_mat_index][i][j][band];
		}
		}
		}
		}
		else
		{
		mixer_mat_index = subframe - SPAR_META_DELAY_SUBFRAMES;
		for ( band = 0; band < SPAR_DIRAC_SPLIT_START_BAND; band++ )
		{
		for ( i = 0; i < FOA_CHANNELS; i++ )
		{
		for ( j = 0; j < FOA_CHANNELS; j++ )
		{
		mixer_mat_sf_bands_real[subframe][band][i][j] = st_ivas->hSpar->hMdDec->mixer_mat[i][j][band + mixer_mat_index * IVAS_MAX_NUM_BANDS];
		}
		}
		}
		}

		/* Map the mixing matrices from the frequency bands to frequency bins */
		bin = 0;
		for ( band = 0; band < SPAR_DIRAC_SPLIT_START_BAND; band++ )
		{
		band_start = band_grouping[band];
		band_end = band_grouping[band + 1];
		for ( bin = band_start; bin < band_end; bin++ )
		{
		for ( i = 0; i < FOA_CHANNELS; i++ )
		{
		for ( j = 0; j < FOA_CHANNELS; j++ )
		{
		mixer_mat_sf_bins_real[subframe][bin][i][j] = mixer_mat_sf_bands_real[subframe][band][i][j];
		}
		}
		}
		}
		nBins = bin;

		/* Determine MASA metadata */
		/* Determine transport signal energies and cross correlations when more than 1 TC */
		if ( nchan_transport == 2 )
		{
		set_zero( transportSignalEnergies[0], nBins );
		set_zero( transportSignalEnergies[1], nBins );
		set_zero( transportSignalCrossCorrelation, nBins );

		for ( slot = 0; slot < hDirAC->subframe_nbslots; slot++ )
		{
		for ( bin = 0; bin < nBins; bin++ )
		{
		for ( ch = 0; ch < BINAURAL_CHANNELS; ch++ )
		{
		instEne = ( inRe[ch][slot][bin] * inRe[ch][slot][bin] );
		instEne += ( inIm[ch][slot][bin] * inIm[ch][slot][bin] );
		transportSignalEnergies[ch][bin] += instEne;
		}
		transportSignalCrossCorrelation[bin] += inRe[0][slot][bin] * inRe[1][slot][bin];
		transportSignalCrossCorrelation[bin] += inIm[0][slot][bin] * inIm[1][slot][bin];
		}
		}
		}

		if ( hDiffuseDist != NULL )
		{
		set_zero( hDiffuseDist->diffuseRatioX[subframe], CLDFB_NO_CHANNELS_MAX );
		set_zero( hDiffuseDist->diffuseRatioY[subframe], CLDFB_NO_CHANNELS_MAX );
		set_zero( hDiffuseDist->diffuseRatioZ[subframe], CLDFB_NO_CHANNELS_MAX );
		}

		for ( bin = 0; bin < nBins; bin++ )
		{
		/* Set the energy of the first transport signal */
		if ( nchan_transport == 1 )
		{
		inCovarianceMtx[0][0] = 1.0f; /* In case of 1TC, fixed value can be used */
		}
		else
		{
		inCovarianceMtx[0][0] = transportSignalEnergies[0][bin]; /* In case of 2TC, use actual energies */
		}
		/* Decorrelated channels assumed to have the same energy as the source channel */
		inCovarianceMtx[1][1] = inCovarianceMtx[0][0];
		inCovarianceMtx[2][2] = inCovarianceMtx[0][0];
		inCovarianceMtx[3][3] = inCovarianceMtx[0][0];

		/* In case residuals were transmitted, use their actual energies and cross correlations */
		if ( nchan_transport == 2 )
		{
		inCovarianceMtx[1][1] = transportSignalEnergies[1][bin];
		inCovarianceMtx[0][1] = transportSignalCrossCorrelation[bin];
		inCovarianceMtx[1][0] = inCovarianceMtx[0][1];
		}

		compute_foa_cov_matrix( foaCovarianceMtx, inCovarianceMtx, mixer_mat_sf_bins_real[subframe][bin] );

		/* Estimate MASA metadata */
		Iy = foaCovarianceMtx[0][1]; /* Intensity in Y direction */
		Iz = foaCovarianceMtx[0][2]; /* Intensity in Z direction */
		Ix = foaCovarianceMtx[0][3]; /* Intensity in X direction */
		I = sqrtf( Ix * Ix + Iy * Iy + Iz * Iz ); /* Intensity vector length */
		E = ( foaCovarianceMtx[0][0] + foaCovarianceMtx[1][1] + foaCovarianceMtx[2][2] + foaCovarianceMtx[3][3] ) / 2.0f; /* Overall energy */
		azi = atan2f( Iy, Ix ); /* Azimuth */
		ele = atan2f( Iz, sqrtf( Ix * Ix + Iy * Iy ) ); /* Elevation */
		ratio = I / fmaxf( 1e-12f, E ); /* Energy ratio */
		ratio = fmaxf( 0.0f, fminf( 1.0f, ratio ) );

		hDirAC->azimuth[dirac_write_idx][bin] = (int16_t) roundf( azi / PI_OVER_180 );
		hDirAC->elevation[dirac_write_idx][bin] = (int16_t) roundf( ele / PI_OVER_180 );
		hDirAC->energy_ratio1[dirac_write_idx][bin] = ratio;
		hDirAC->diffuseness_vector[dirac_write_idx][bin] = 1.0f - ratio;

		hDirAC->spreadCoherence[dirac_write_idx][bin] = 0.0f;
		hDirAC->surroundingCoherence[dirac_write_idx][bin] = 0.0f;

		/* Determine directional distribution of the indirect audio based on the SPAR mixing matrices (and the transport audio signals when 2 TC) */
		if ( hDiffuseDist != NULL )
		{
		if ( nchan_transport == 1 )
		{
		diffuseGainY = fabsf( mixer_mat_sf_bins_real[subframe][bin][1][1] );
		diffuseGainX = fabsf( mixer_mat_sf_bins_real[subframe][bin][3][2] );
		diffuseGainZ = fabsf( mixer_mat_sf_bins_real[subframe][bin][2][3] );
		}
		else if ( nchan_transport == 2 )
		{
		diffuseGainY = fabsf( mixer_mat_sf_bins_real[subframe][bin][1][1] * transportSignalEnergies[1][bin] );
		diffuseGainX = fabsf( mixer_mat_sf_bins_real[subframe][bin][3][2] * transportSignalEnergies[0][bin] ) + fabsf( mixer_mat_sf_bins_real[subframe][bin][3][1] * transportSignalEnergies[1][bin] );
		diffuseGainZ = fabsf( mixer_mat_sf_bins_real[subframe][bin][2][3] * transportSignalEnergies[0][bin] ) + fabsf( mixer_mat_sf_bins_real[subframe][bin][2][1] * transportSignalEnergies[1][bin] );
		}
		else
		{
		diffuseGainY = 1.0f;
		diffuseGainX = 1.0f;
		diffuseGainZ = 1.0f;
		}

		diffuseGainSum = diffuseGainY + diffuseGainX + diffuseGainZ;

		if ( diffuseGainSum == 0.0f )
		{
		hDiffuseDist->diffuseRatioX[subframe][bin] = 1.0f / 3.0f;
		hDiffuseDist->diffuseRatioY[subframe][bin] = 1.0f / 3.0f;
		hDiffuseDist->diffuseRatioZ[subframe][bin] = 1.0f / 3.0f;
		}
		else
		{
		hDiffuseDist->diffuseRatioX[subframe][bin] = diffuseGainX / ( diffuseGainSum + EPSILON );
		hDiffuseDist->diffuseRatioY[subframe][bin] = diffuseGainY / ( diffuseGainSum + EPSILON );
		hDiffuseDist->diffuseRatioZ[subframe][bin] = diffuseGainZ / ( diffuseGainSum + EPSILON );
		}
		}
		}

		return;
		}
		#else
		void ivas_spar_param_to_masa_param_mapping(
		Decoder_Struct st_ivas, / i/o: IVAS decoder struct */
		float inRe[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i : Input audio in CLDFB domain, real */
		@@ -1462,6 +1668,7 @@ void ivas_spar_param_to_masa_param_mapping(

		return;
		}
		#endif


		/* Estimate FOA properties: foaCov = mixMtx * inCov * mixMtx' */