[fix] split rendering for combined formats with ext renderer; perform CLDFB... (3ff21457) · Commits · IVAS Codec Public Collaboration / IVAS Codec

lib_isar/lib_isar_pre_rend.c

+1 −3

Original line number	Diff line number	Diff line
		@@ -94,9 +94,7 @@ ivas_error ISAR_PRE_REND_open(
		isCldfbNeeded = 1;
		}

		hSplitBinRend->hCldfbHandles = NULL;

		if ( isCldfbNeeded )
		if ( isCldfbNeeded && hSplitBinRend->hCldfbHandles == NULL )
		{
		if ( ( hSplitBinRend->hCldfbHandles = (CLDFB_HANDLES_WRAPPER_HANDLE) malloc( sizeof( CLDFB_HANDLES_WRAPPER ) ) ) == NULL )
		{

lib_rend/lib_rend.c

+101 −55

Original line number	Diff line number	Diff line
		@@ -192,7 +192,7 @@ typedef struct hrtf_handles

		struct IVAS_REND
		{
		int32_t sampleRateOut; // TODO rename to sampleRate?
		int32_t sampleRateOut;
		int32_t maxGlobalDelayNs;

		IVAS_LIMITER_HANDLE hLimiter;
		@@ -209,11 +209,12 @@ struct IVAS_REND
		AUDIO_CONFIG outputConfig;
		EFAP_WRAPPER efapOutWrapper;
		IVAS_LSSETUP_CUSTOM_STRUCT customLsOut;

		int16_t splitRendBFI;
		SPLIT_REND_WRAPPER *splitRendWrapper;
		IVAS_REND_AudioBuffer splitRendEncBuffer;

		IVAS_REND_HeadRotData headRotData;
		int16_t splitRendBFI;

		EXTERNAL_ORIENTATION_HANDLE hExternalOrientationData;
		COMBINED_ORIENTATION_HANDLE hCombinedOrientationData;
		@@ -381,7 +382,7 @@ static void copyBufferToCLDFBarray(
		static void accumulateCLDFBArrayToBuffer(
		float re[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX],
		float im[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX],
		IVAS_REND_AudioBuffer *buffer )
		const IVAS_REND_AudioBuffer *buffer )
		{
		uint32_t smplIdx, slotIdx;
		uint32_t numCldfbSamples, num_bands;
		@@ -3619,10 +3620,15 @@ static int16_t getCldfbRendFlag(
		const IVAS_REND_AudioConfigType new_configType )
		{
		int16_t i;
		int16_t numMasaInputs = 0, numSbaInputs = 0, numIsmInputs = 0, numMcInputs = 0;
		int16_t numMasaInputs = 0, numSbaInputs = 0;
		int16_t isCldfbRend;

		isCldfbRend = 0;
		/* This function is called during three different phases of renderer processing:
		* - IVAS_REND_AddInput()
		* - IVAS_REND_FeedRenderConfig()
		* - IVAS_REND_GetSplitBinauralBitstream()
		* Only the last case can assume all inputs are present for the current frame to be rendered */
		if ( hIvasRend->hRendererConfig != NULL )
		{
		for ( i = 0; i < RENDERER_MAX_MASA_INPUTS; ++i )
		@@ -3633,20 +3639,7 @@ static int16_t getCldfbRendFlag(
		{
		numSbaInputs += ( hIvasRend->inputsSba[i].base.inConfig == IVAS_AUDIO_CONFIG_INVALID && new_configType != IVAS_REND_AUDIO_CONFIG_TYPE_AMBISONICS ) ? 0 : 1;
		}
		for ( i = 0; i < RENDERER_MAX_ISM_INPUTS; ++i )
		{
		numIsmInputs += ( hIvasRend->inputsIsm[i].base.inConfig == IVAS_AUDIO_CONFIG_INVALID && new_configType != IVAS_REND_AUDIO_CONFIG_TYPE_OBJECT_BASED ) ? 0 : 1;
		}
		for ( i = 0; i < RENDERER_MAX_MC_INPUTS; ++i )
		{
		numMcInputs += ( hIvasRend->inputsMc[i].base.inConfig == IVAS_AUDIO_CONFIG_INVALID && new_configType != IVAS_REND_AUDIO_CONFIG_TYPE_CHANNEL_BASED ) ? 0 : 1;
		}

		if ( numIsmInputs > 0 \|\| numMcInputs > 0 )
		{
		isCldfbRend = 0;
		}
		else if ( ( numMasaInputs > 0 ) \|\| ( numSbaInputs > 0 && hIvasRend->hRendererConfig->split_rend_config.rendererSelection == IVAS_BIN_RENDERER_TYPE_FASTCONV ) )
		if ( ( numMasaInputs > 0 ) \|\| ( numSbaInputs > 0 && hIvasRend->hRendererConfig->split_rend_config.rendererSelection == IVAS_BIN_RENDERER_TYPE_FASTCONV ) )
		{
		isCldfbRend = 1;
		}
		@@ -3661,7 +3654,7 @@ static int16_t getCldfbRendFlag(
		*
		------------------------------------------------------------------------/

		static ivas_error ivas_pre_rend_init(
		static ivas_error isar_pre_rend_init(
		SPLIT_REND_WRAPPER *pSplitRendWrapper,
		IVAS_REND_AudioBuffer *pSplitRendEncBuffer,
		ISAR_SPLIT_REND_CONFIG_DATA *pSplit_rend_config,
		@@ -3671,9 +3664,12 @@ static ivas_error ivas_pre_rend_init(
		const int16_t cldfb_in_flag,
		const int16_t num_subframes )
		{
		bool realloc;
		ivas_error error;
		IVAS_REND_AudioBufferConfig bufConfig;

		realloc = false;

		if ( outConfig == IVAS_AUDIO_CONFIG_BINAURAL_SPLIT_CODED \|\| outConfig == IVAS_AUDIO_CONFIG_BINAURAL_SPLIT_PCM )
		{
		if ( pSplit_rend_config->poseCorrectionMode == ISAR_SPLIT_REND_POSE_CORRECTION_MODE_CLDFB )
		@@ -3685,31 +3681,43 @@ static ivas_error ivas_pre_rend_init(
		isar_renderSplitUpdateNoCorrectionPoseData( pSplit_rend_config, &pSplitRendWrapper->multiBinPoseData );
		}

		if ( ( error = ISAR_PRE_REND_open( pSplitRendWrapper, pSplit_rend_config, outputSampleRate, cldfb_in_flag, outConfig == IVAS_AUDIO_CONFIG_BINAURAL_SPLIT_PCM, num_subframes, 0 ) ) != IVAS_ERR_OK )
		if ( ( error = ISAR_PRE_REND_open( pSplitRendWrapper,
		pSplit_rend_config,
		outputSampleRate,
		cldfb_in_flag,
		outConfig == IVAS_AUDIO_CONFIG_BINAURAL_SPLIT_PCM,
		num_subframes,
		0 ) ) != IVAS_ERR_OK )
		{
		return error;
		}

		/allocate for CLDFB in and change to TD during process if needed/
		bufConfig.numSamplesPerChannel = MAX_CLDFB_BUFFER_LENGTH_PER_CHANNEL;

		/* If the cldfb_in_flag is different from what was previously allocated for the buffer, change the size */
		if ( pSplitRendEncBuffer->data != NULL && ( cldfb_in_flag != bufConfig.is_cldfb ) )
		{
		realloc = true;
		}

		if ( pSplitRendEncBuffer->data == NULL \|\| realloc )
		{
		/* set buffer config */
		bufConfig.numSamplesPerChannel = cldfb_in_flag ? MAX_CLDFB_BUFFER_LENGTH_PER_CHANNEL : L_FRAME_MAX;
		bufConfig.numChannels = BINAURAL_CHANNELS * pSplitRendWrapper->multiBinPoseData.num_poses;
		bufConfig.is_cldfb = 1;
		bufConfig.is_cldfb = cldfb_in_flag;
		pSplitRendEncBuffer->config = bufConfig;

		/* allocate memory */
		if ( realloc )
		{
		free( pSplitRendEncBuffer->data );
		}

		if ( ( pSplitRendEncBuffer->data = malloc( bufConfig.numChannels * bufConfig.numSamplesPerChannel * sizeof( float ) ) ) == NULL )
		{
		return IVAS_ERR_FAILED_ALLOC;
		}
		}
		else
		{
		IVAS_REND_AudioBufferConfig bufConfig2;

		bufConfig2.numSamplesPerChannel = 0;
		bufConfig2.numChannels = 0;
		bufConfig2.is_cldfb = 0;
		pSplitRendEncBuffer->config = bufConfig2;
		pSplitRendEncBuffer->data = NULL;
		}

		return IVAS_ERR_OK;
		@@ -3744,12 +3752,12 @@ ivas_error IVAS_REND_AddInput(
		return IVAS_ERR_UNEXPECTED_NULL_POINTER;
		}

		if ( hIvasRend->splitRendEncBuffer.data == NULL && hIvasRend->hRendererConfig != NULL )
		if ( hIvasRend->hRendererConfig != NULL )
		{
		int16_t cldfb_in_flag;
		cldfb_in_flag = getCldfbRendFlag( hIvasRend, getAudioConfigType( inConfig ) );

		if ( ( error = ivas_pre_rend_init( hIvasRend->splitRendWrapper,
		if ( ( error = isar_pre_rend_init( hIvasRend->splitRendWrapper,
		&hIvasRend->splitRendEncBuffer,
		&hIvasRend->hRendererConfig->split_rend_config,
		hIvasRend->headRotData,
		@@ -4651,7 +4659,7 @@ ivas_error IVAS_REND_FeedRenderConfig(
		hIvasRend->splitRendWrapper = NULL;
		}

		if ( ( error = ivas_pre_rend_init( hIvasRend->splitRendWrapper,
		if ( ( error = isar_pre_rend_init( hIvasRend->splitRendWrapper,
		&hIvasRend->splitRendEncBuffer,
		&hIvasRend->hRendererConfig->split_rend_config,
		hIvasRend->headRotData,
		@@ -5757,8 +5765,10 @@ static ivas_error renderIsmToSplitBinaural(
		const MULTI_BIN_REND_POSE_DATA *pMultiBinPoseData;
		const SPLIT_REND_WRAPPER *pSplitRendWrapper;
		IVAS_QUATERNION originalHeadRot[MAX_PARAM_SPATIAL_SUBFRAMES];
		int16_t i;
		int16_t i, ch, slot_idx, num_bands;
		float tmpBinaural[MAX_HEAD_ROT_POSES * BINAURAL_CHANNELS][L_FRAME48k];
		float tmpBinaural_CldfbRe[MAX_HEAD_ROT_POSES * BINAURAL_CHANNELS][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX];
		float tmpBinaural_CldfbIm[MAX_HEAD_ROT_POSES * BINAURAL_CHANNELS][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX];
		int16_t output_frame = ismInput->base.inputBuffer.config.numSamplesPerChannel;
		COMBINED_ORIENTATION_HANDLE pCombinedOrientationData;
		int16_t ism_md_subframe_update_ext;
		@@ -5819,16 +5829,43 @@ static ivas_error renderIsmToSplitBinaural(
		}

		/* Render */
		if ( ( error = ivas_td_binaural_renderer_ext( ( pos_idx == 0 ) ? &ismInput->tdRendWrapper : &ismInput->splitTdRendWrappers[pos_idx - 1], ismInput->base.inConfig, NULL, ismInput->base.ctx.pCombinedOrientationData, &ismInput->currentPos,
		NULL, ism_md_subframe_update_ext, *ismInput->base.ctx.pOutSampleRate, output_frame, tmpProcessing ) ) != IVAS_ERR_OK )
		if ( ( error = ivas_td_binaural_renderer_ext( ( pos_idx == 0 ) ? &ismInput->tdRendWrapper : &ismInput->splitTdRendWrappers[pos_idx - 1],
		ismInput->base.inConfig,
		NULL,
		ismInput->base.ctx.pCombinedOrientationData,
		&ismInput->currentPos,
		NULL,
		ism_md_subframe_update_ext,
		*ismInput->base.ctx.pOutSampleRate,
		output_frame,
		tmpProcessing ) ) != IVAS_ERR_OK )
		{
		return error;
		}

		if ( outAudio.config.is_cldfb )
		{
		/* Perform CLDFB analysis on rendered audio, since the output buffer is CLDFB domain */
		num_bands = (int16_t) ( ( BINAURAL_MAXBANDS * *ismInput->base.ctx.pOutSampleRate ) / 48000 );
		for ( ch = 0; ch < BINAURAL_CHANNELS; ch++ )
		{
		for ( slot_idx = 0; slot_idx < IVAS_CLDFB_NO_COL_MAX; slot_idx++ )
		{
		cldfbAnalysis_ts( &tmpProcessing[ch][num_bands * slot_idx],
		&tmpBinaural_CldfbRe[BINAURAL_CHANNELS * pos_idx + ch][slot_idx][0],
		&tmpBinaural_CldfbIm[BINAURAL_CHANNELS * pos_idx + ch][slot_idx][0],
		num_bands,
		ismInput->base.ctx.pSplitRendWrapper->hCldfbHandles->cldfbAna[pos_idx + ch] );
		}
		}
		}
		else
		{
		/* Copy rendered audio to tmp storage buffer. Copying directly to output would
		* overwrite original audio, which is still needed for rendering next head pose. */
		mvr2r( tmpProcessing[0], tmpBinaural[2 * pos_idx], output_frame );
		mvr2r( tmpProcessing[1], tmpBinaural[2 * pos_idx + 1], output_frame );
		mvr2r( tmpProcessing[0], tmpBinaural[BINAURAL_CHANNELS * pos_idx], output_frame );
		mvr2r( tmpProcessing[1], tmpBinaural[BINAURAL_CHANNELS * pos_idx + 1], output_frame );
		}

		/* Overwrite processing buffer with original input audio again */
		copyBufferTo2dArray( ismInput->base.inputBuffer, tmpProcessing );
		@@ -5840,7 +5877,14 @@ static ivas_error renderIsmToSplitBinaural(
		pCombinedOrientationData->Quaternions[i] = originalHeadRot[i];
		}

		if ( outAudio.config.is_cldfb )
		{
		accumulateCLDFBArrayToBuffer( tmpBinaural_CldfbRe, tmpBinaural_CldfbIm, &outAudio );
		}
		else
		{
		accumulate2dArrayToBuffer( tmpBinaural, &outAudio );
		}
		pop_wmops();

		/* Encoding to split rendering bitstream done at a higher level */
		@@ -5875,11 +5919,13 @@ static ivas_error renderInputIsm(
		{
		ivas_error error;
		IVAS_REND_AudioBuffer inAudio;
		int16_t cldfb2tdSampleFact;

		error = IVAS_ERR_OK;
		inAudio = ismInput->base.inputBuffer;

		if ( ismInput->base.numNewSamplesPerChannel != outAudio.config.numSamplesPerChannel )
		cldfb2tdSampleFact = outAudio.config.is_cldfb ? 2 : 1;
		if ( ismInput->base.numNewSamplesPerChannel * cldfb2tdSampleFact != outAudio.config.numSamplesPerChannel )
		{
		return IVAS_ERROR( IVAS_ERR_INVALID_BUFFER_SIZE, "Mismatch between the number of input samples vs number of requested output samples - currently not allowed" );
		}
		@@ -7653,7 +7699,9 @@ static ivas_error getSamplesInternal(
		return error;
		}

		if ( numOutChannels != outAudio.config.numChannels && hIvasRend->outputConfig != IVAS_AUDIO_CONFIG_BINAURAL_SPLIT_CODED && hIvasRend->outputConfig != IVAS_AUDIO_CONFIG_BINAURAL_SPLIT_PCM )
		if ( numOutChannels != outAudio.config.numChannels &&
		hIvasRend->outputConfig != IVAS_AUDIO_CONFIG_BINAURAL_SPLIT_CODED &&
		hIvasRend->outputConfig != IVAS_AUDIO_CONFIG_BINAURAL_SPLIT_PCM )
		{
		return IVAS_ERR_WRONG_NUM_CHANNELS;
		}
		@@ -7753,12 +7801,13 @@ ivas_error IVAS_REND_GetSplitBinauralBitstream(
		pSplitEncBufConfig = &hIvasRend->splitRendEncBuffer.config;
		pSplitRendConfig = &hIvasRend->hRendererConfig->split_rend_config;

		/* configure output buffer for the split rendering multi-poses */
		/* 0 DoF / No pose correction retains frame size */
		pSplitEncBufConfig->is_cldfb = cldfb_in_flag;
		if ( pSplitRendConfig->dof == 0 \|\| pSplitRendConfig->poseCorrectionMode == ISAR_SPLIT_REND_POSE_CORRECTION_MODE_NONE )
		{
		pSplitEncBufConfig->numSamplesPerChannel = outAudio.config.numSamplesPerChannel;
		}
		/* Pose correction requires 20ms */
		else
		{
		pSplitEncBufConfig->numSamplesPerChannel = (int16_t) ( hIvasRend->sampleRateOut / FRAMES_PER_SEC );
		@@ -7771,11 +7820,8 @@ ivas_error IVAS_REND_GetSplitBinauralBitstream(
		hIvasRend->headRotData.sr_pose_pred_axis );
		assert( num_poses_orig == hIvasRend->splitRendWrapper->multiBinPoseData.num_poses && "number of poses should not change dynamically" );

		/* Clear output buffer for split rendering bitstream */
		set_zero( hIvasRend->splitRendEncBuffer.data, pSplitEncBufConfig->numChannels * pSplitEncBufConfig->numSamplesPerChannel );

		/* hIvasRend->splitRendEncBuffer used for BINAURAL_SPLIT_CODED output
		outAudio used later for BINAURAL_SPLIT_PCM output */
		/* hIvasRend->splitRendEncBuffer contains multi-pose data for BINAURAL_SPLIT_CODED output
		outAudio used later for main pose BINAURAL_SPLIT_PCM output */
		if ( ( error = getSamplesInternal( hIvasRend, hIvasRend->splitRendEncBuffer ) ) != IVAS_ERR_OK )
		{
		return error;