From 058a764376b587dc239a9c3bd10ee645516795c9 Mon Sep 17 00:00:00 2001 From: Jan Kiene Date: Fri, 24 Oct 2025 17:08:33 +0200 Subject: [PATCH 1/3] turn md delay renderer argument into integer type --- apps/renderer.c | 5 +++-- lib_rend/lib_rend.c | 33 ++++++++++++--------------------- lib_rend/lib_rend.h | 2 +- 3 files changed, 16 insertions(+), 24 deletions(-) diff --git a/apps/renderer.c b/apps/renderer.c index 3f8c479d9..eccabd2a6 100644 --- a/apps/renderer.c +++ b/apps/renderer.c @@ -33,6 +33,7 @@ #include "lib_rend.h" #include #include +#include #include #include "audio_file_reader.h" #include "audio_file_writer.h" @@ -186,7 +187,7 @@ typedef struct float lfeConfigElevation; bool lfeCustomRoutingEnabled; char inLfePanningMatrixFile[RENDERER_MAX_CLI_ARG_LENGTH]; - float syncMdDelay; + int16_t syncMdDelay; IVAS_RENDER_FRAMESIZE render_framesize; uint16_t directivityPatternId[RENDERER_MAX_ISM_INPUTS]; AcousticEnvironmentSequence aeSequence; @@ -2904,7 +2905,7 @@ static void parseOption( case CmdLnOptionId_syncMdDelay: assert( numOptionValues == 1 ); /* Metadata Delay to sync with audio delay in ms */ - args->syncMdDelay = strtof( optionValues[0], NULL ); + args->syncMdDelay = (int16_t) strtol( optionValues[0], NULL, 10 ); break; default: assert( 0 && "This should be unreachable - all command line options should be explicitly handled." ); diff --git a/lib_rend/lib_rend.c b/lib_rend/lib_rend.c index be092691f..a479af20f 100644 --- a/lib_rend/lib_rend.c +++ b/lib_rend/lib_rend.c @@ -44,6 +44,7 @@ #include #include #include "wmc_auto.h" +#include /*-------------------------------------------------------------------* @@ -125,7 +126,7 @@ typedef struct #ifdef NONBE_1377_REND_DIRATT_CONF int16_t object_id; #endif - float ism_metadata_delay_ms; + int16_t ism_metadata_delay_ms; } input_ism; typedef struct @@ -1510,7 +1511,7 @@ static ivas_error alignInputDelay( if ( getAudioConfigType( inputBase->inConfig ) == IVAS_REND_AUDIO_CONFIG_TYPE_OBJECT_BASED ) { inputIsm = (input_ism *) inputBase; - inputIsm->ism_metadata_delay_ms = maxGlobalDelayNs / 1e6f; + inputIsm->ism_metadata_delay_ms = (int16_t) roundf( inputIsm->ism_metadata_delay_ms + maxGlobalDelayNs / 1e6f / ( 1000.f / FRAMES_PER_SEC / MAX_PARAM_SPATIAL_SUBFRAMES ) ); } } } @@ -5473,14 +5474,12 @@ static ivas_error renderIsmToBinaural( { float tmpTDRendBuffer[MAX_OUTPUT_CHANNELS][L_FRAME48k]; ivas_error error; - int16_t ism_md_subframe_update_ext; push_wmops( "renderIsmToBinaural" ); - /* Metadata Delay to sync with audio delay converted from ms to 5ms (1000/50/4) subframe index */ - ism_md_subframe_update_ext = (int16_t) roundf( ismInput->ism_metadata_delay_ms / ( 1000.f / FRAMES_PER_SEC / MAX_PARAM_SPATIAL_SUBFRAMES ) ); + copyBufferTo2dArray( ismInput->base.inputBuffer, tmpTDRendBuffer ); - if ( ( error = ivas_td_binaural_renderer_ext( &ismInput->tdRendWrapper, ismInput->base.inConfig, NULL, ismInput->base.ctx.pCombinedOrientationData, &ismInput->currentPos, ismInput->hReverb, ism_md_subframe_update_ext, + if ( ( error = ivas_td_binaural_renderer_ext( &ismInput->tdRendWrapper, ismInput->base.inConfig, NULL, ismInput->base.ctx.pCombinedOrientationData, &ismInput->currentPos, ismInput->hReverb, ismInput->ism_metadata_delay_ms, *ismInput->base.ctx.pOutSampleRate, outAudio.config.numSamplesPerChannel, tmpTDRendBuffer ) ) != IVAS_ERR_OK ) { return error; @@ -5675,17 +5674,13 @@ static ivas_error renderIsmToBinauralReverb( { float tmpRendBuffer[MAX_OUTPUT_CHANNELS][L_FRAME48k]; ivas_error error; - int16_t ism_md_subframe_update_ext; push_wmops( "renderIsmToBinauralRoom" ); - /* Metadata Delay to sync with audio delay converted from ms to 5ms (1000/50/4) subframe index */ - ism_md_subframe_update_ext = (int16_t) roundf( ismInput->ism_metadata_delay_ms / ( 1000.f / FRAMES_PER_SEC / MAX_PARAM_SPATIAL_SUBFRAMES ) ); - copyBufferTo2dArray( ismInput->base.inputBuffer, tmpRendBuffer ); if ( ( error = ivas_td_binaural_renderer_ext( &ismInput->tdRendWrapper, ismInput->base.inConfig, NULL, ismInput->base.ctx.pCombinedOrientationData, &ismInput->currentPos, ismInput->hReverb, - ism_md_subframe_update_ext, *ismInput->base.ctx.pOutSampleRate, outAudio.config.numSamplesPerChannel, tmpRendBuffer ) ) != IVAS_ERR_OK ) + ismInput->ism_metadata_delay_ms, *ismInput->base.ctx.pOutSampleRate, outAudio.config.numSamplesPerChannel, tmpRendBuffer ) ) != IVAS_ERR_OK ) { return error; } @@ -5852,16 +5847,12 @@ static ivas_error renderIsmToSplitBinaural( float tmpBinaural_CldfbIm[MAX_HEAD_ROT_POSES * BINAURAL_CHANNELS][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX]; int16_t output_frame = ismInput->base.inputBuffer.config.numSamplesPerChannel; COMBINED_ORIENTATION_HANDLE pCombinedOrientationData; - int16_t ism_md_subframe_update_ext; push_wmops( "renderIsmToSplitBinaural" ); pSplitRendWrapper = ismInput->base.ctx.pSplitRendWrapper; pMultiBinPoseData = &pSplitRendWrapper->multiBinPoseData; - /* Metadata Delay to sync with audio delay converted from ms to 5ms (1000/50/4) subframe index */ - ism_md_subframe_update_ext = (int16_t) roundf( ismInput->ism_metadata_delay_ms / ( 1000.f / FRAMES_PER_SEC / MAX_PARAM_SPATIAL_SUBFRAMES ) ); - pCombinedOrientationData = *ismInput->base.ctx.pCombinedOrientationData; if ( pMultiBinPoseData->poseCorrectionMode == ISAR_SPLIT_REND_POSE_CORRECTION_MODE_CLDFB ) @@ -5911,7 +5902,7 @@ static ivas_error renderIsmToSplitBinaural( /* Render */ if ( ( error = ivas_td_binaural_renderer_ext( ( pos_idx == 0 ) ? &ismInput->tdRendWrapper : &ismInput->splitTdRendWrappers[pos_idx - 1], ismInput->base.inConfig, NULL, ismInput->base.ctx.pCombinedOrientationData, &ismInput->currentPos, - NULL, ism_md_subframe_update_ext, *ismInput->base.ctx.pOutSampleRate, output_frame, tmpProcessing ) ) != IVAS_ERR_OK ) + NULL, ismInput->ism_metadata_delay_ms, *ismInput->base.ctx.pOutSampleRate, output_frame, tmpProcessing ) ) != IVAS_ERR_OK ) { return error; } @@ -7585,8 +7576,8 @@ ivas_error IVAS_REND_MergeMasaMetadata( ) { MASA_DECODER_EXT_OUT_META_HANDLE inMeta2; - float( *inEne1 )[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS]; - float( *inEne2 )[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS]; + float ( *inEne1 )[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS]; + float ( *inEne2 )[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS]; if ( hIvasRend == NULL ) { @@ -7683,7 +7674,7 @@ ivas_error IVAS_REND_SetTotalNumberOfObjects( ivas_error IVAS_REND_SetIsmMetadataDelay( IVAS_REND_HANDLE hIvasRend, /* i/o: IVAS renderer handle */ - const float sync_md_delay /* i : ISM Metadata Delay in ms to sync with audio delay */ + const int16_t sync_md_delay /* i : ISM Metadata Delay in ms to sync with audio delay */ ) { int16_t i; @@ -7695,7 +7686,7 @@ ivas_error IVAS_REND_SetIsmMetadataDelay( for ( i = 0; i < RENDERER_MAX_ISM_INPUTS; ++i ) { - hIvasRend->inputsIsm[i].ism_metadata_delay_ms = sync_md_delay; + hIvasRend->inputsIsm[i].ism_metadata_delay_ms = (int16_t) roundf( sync_md_delay / ( 1000.f / FRAMES_PER_SEC / MAX_PARAM_SPATIAL_SUBFRAMES ) ); } return IVAS_ERR_OK; @@ -7942,7 +7933,7 @@ ivas_error IVAS_REND_GetSplitBinauralBitstream( &bits, Cldfb_RealBuffer_Binaural, Cldfb_ImagBuffer_Binaural, - ( const int16_t )( ( BINAURAL_MAXBANDS * hIvasRend->sampleRateOut ) / 48000 ), + (const int16_t) ( ( BINAURAL_MAXBANDS * hIvasRend->sampleRateOut ) / 48000 ), tmpBinaural, 1, cldfb_in_flag, diff --git a/lib_rend/lib_rend.h b/lib_rend/lib_rend.h index 211fd0d6e..f2cf92e08 100644 --- a/lib_rend/lib_rend.h +++ b/lib_rend/lib_rend.h @@ -380,7 +380,7 @@ ivas_error IVAS_REND_SetTotalNumberOfObjects( ivas_error IVAS_REND_SetIsmMetadataDelay( IVAS_REND_HANDLE hIvasRend, /* i/o: IVAS renderer handle */ - const float sync_md_delay /* i : Metadata Delay in ms to sync with audio delay */ + const int16_t sync_md_delay /* i : Metadata Delay in ms to sync with audio delay */ ); ivas_error IVAS_REND_GetNumAllObjects( -- GitLab From 27771584f23bdc88c8e7c1a75b76435f72cee10d Mon Sep 17 00:00:00 2001 From: Jan Kiene Date: Wed, 29 Oct 2025 14:48:01 +0100 Subject: [PATCH 2/3] make sure to store delay in ms and not already in subframe indexes --- lib_rend/lib_rend.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/lib_rend/lib_rend.c b/lib_rend/lib_rend.c index a479af20f..a21401e2f 100644 --- a/lib_rend/lib_rend.c +++ b/lib_rend/lib_rend.c @@ -1511,7 +1511,7 @@ static ivas_error alignInputDelay( if ( getAudioConfigType( inputBase->inConfig ) == IVAS_REND_AUDIO_CONFIG_TYPE_OBJECT_BASED ) { inputIsm = (input_ism *) inputBase; - inputIsm->ism_metadata_delay_ms = (int16_t) roundf( inputIsm->ism_metadata_delay_ms + maxGlobalDelayNs / 1e6f / ( 1000.f / FRAMES_PER_SEC / MAX_PARAM_SPATIAL_SUBFRAMES ) ); + inputIsm->ism_metadata_delay_ms = (int16_t) roundf( inputIsm->ism_metadata_delay_ms + maxGlobalDelayNs / 1e6f ); } } } @@ -5474,12 +5474,15 @@ static ivas_error renderIsmToBinaural( { float tmpTDRendBuffer[MAX_OUTPUT_CHANNELS][L_FRAME48k]; ivas_error error; + int16_t ism_md_subframe_update_ext; push_wmops( "renderIsmToBinaural" ); + /* Metadata Delay to sync with audio delay converted from ms to 5ms (1000/50/4) subframe index */ + ism_md_subframe_update_ext = (int16_t) roundf( ismInput->ism_metadata_delay_ms / ( 1000.f / FRAMES_PER_SEC / MAX_PARAM_SPATIAL_SUBFRAMES ) ); copyBufferTo2dArray( ismInput->base.inputBuffer, tmpTDRendBuffer ); - if ( ( error = ivas_td_binaural_renderer_ext( &ismInput->tdRendWrapper, ismInput->base.inConfig, NULL, ismInput->base.ctx.pCombinedOrientationData, &ismInput->currentPos, ismInput->hReverb, ismInput->ism_metadata_delay_ms, + if ( ( error = ivas_td_binaural_renderer_ext( &ismInput->tdRendWrapper, ismInput->base.inConfig, NULL, ismInput->base.ctx.pCombinedOrientationData, &ismInput->currentPos, ismInput->hReverb, ism_md_subframe_update_ext, *ismInput->base.ctx.pOutSampleRate, outAudio.config.numSamplesPerChannel, tmpTDRendBuffer ) ) != IVAS_ERR_OK ) { return error; @@ -5674,13 +5677,16 @@ static ivas_error renderIsmToBinauralReverb( { float tmpRendBuffer[MAX_OUTPUT_CHANNELS][L_FRAME48k]; ivas_error error; + int16_t ism_md_subframe_update_ext; push_wmops( "renderIsmToBinauralRoom" ); + /* Metadata Delay to sync with audio delay converted from ms to 5ms (1000/50/4) subframe index */ + ism_md_subframe_update_ext = (int16_t) roundf( ismInput->ism_metadata_delay_ms / ( 1000.f / FRAMES_PER_SEC / MAX_PARAM_SPATIAL_SUBFRAMES ) ); copyBufferTo2dArray( ismInput->base.inputBuffer, tmpRendBuffer ); if ( ( error = ivas_td_binaural_renderer_ext( &ismInput->tdRendWrapper, ismInput->base.inConfig, NULL, ismInput->base.ctx.pCombinedOrientationData, &ismInput->currentPos, ismInput->hReverb, - ismInput->ism_metadata_delay_ms, *ismInput->base.ctx.pOutSampleRate, outAudio.config.numSamplesPerChannel, tmpRendBuffer ) ) != IVAS_ERR_OK ) + ism_md_subframe_update_ext, *ismInput->base.ctx.pOutSampleRate, outAudio.config.numSamplesPerChannel, tmpRendBuffer ) ) != IVAS_ERR_OK ) { return error; } @@ -5847,12 +5853,16 @@ static ivas_error renderIsmToSplitBinaural( float tmpBinaural_CldfbIm[MAX_HEAD_ROT_POSES * BINAURAL_CHANNELS][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX]; int16_t output_frame = ismInput->base.inputBuffer.config.numSamplesPerChannel; COMBINED_ORIENTATION_HANDLE pCombinedOrientationData; + int16_t ism_md_subframe_update_ext; push_wmops( "renderIsmToSplitBinaural" ); pSplitRendWrapper = ismInput->base.ctx.pSplitRendWrapper; pMultiBinPoseData = &pSplitRendWrapper->multiBinPoseData; + /* Metadata Delay to sync with audio delay converted from ms to 5ms (1000/50/4) subframe index */ + ism_md_subframe_update_ext = (int16_t) roundf( ismInput->ism_metadata_delay_ms / ( 1000.f / FRAMES_PER_SEC / MAX_PARAM_SPATIAL_SUBFRAMES ) ); + pCombinedOrientationData = *ismInput->base.ctx.pCombinedOrientationData; if ( pMultiBinPoseData->poseCorrectionMode == ISAR_SPLIT_REND_POSE_CORRECTION_MODE_CLDFB ) @@ -5902,7 +5912,7 @@ static ivas_error renderIsmToSplitBinaural( /* Render */ if ( ( error = ivas_td_binaural_renderer_ext( ( pos_idx == 0 ) ? &ismInput->tdRendWrapper : &ismInput->splitTdRendWrappers[pos_idx - 1], ismInput->base.inConfig, NULL, ismInput->base.ctx.pCombinedOrientationData, &ismInput->currentPos, - NULL, ismInput->ism_metadata_delay_ms, *ismInput->base.ctx.pOutSampleRate, output_frame, tmpProcessing ) ) != IVAS_ERR_OK ) + NULL, ism_md_subframe_update_ext, *ismInput->base.ctx.pOutSampleRate, output_frame, tmpProcessing ) ) != IVAS_ERR_OK ) { return error; } @@ -7686,7 +7696,7 @@ ivas_error IVAS_REND_SetIsmMetadataDelay( for ( i = 0; i < RENDERER_MAX_ISM_INPUTS; ++i ) { - hIvasRend->inputsIsm[i].ism_metadata_delay_ms = (int16_t) roundf( sync_md_delay / ( 1000.f / FRAMES_PER_SEC / MAX_PARAM_SPATIAL_SUBFRAMES ) ); + hIvasRend->inputsIsm[i].ism_metadata_delay_ms = sync_md_delay; } return IVAS_ERR_OK; -- GitLab From 07ef3ad1cb901d96fa038b1c225dba8d9904f39b Mon Sep 17 00:00:00 2001 From: Jan Kiene Date: Wed, 29 Oct 2025 15:07:13 +0100 Subject: [PATCH 3/3] use single constant instead of multiple ones --- lib_rend/lib_rend.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib_rend/lib_rend.c b/lib_rend/lib_rend.c index a21401e2f..a0da4e4b3 100644 --- a/lib_rend/lib_rend.c +++ b/lib_rend/lib_rend.c @@ -5479,7 +5479,7 @@ static ivas_error renderIsmToBinaural( push_wmops( "renderIsmToBinaural" ); /* Metadata Delay to sync with audio delay converted from ms to 5ms (1000/50/4) subframe index */ - ism_md_subframe_update_ext = (int16_t) roundf( ismInput->ism_metadata_delay_ms / ( 1000.f / FRAMES_PER_SEC / MAX_PARAM_SPATIAL_SUBFRAMES ) ); + ism_md_subframe_update_ext = (int16_t) roundf( ismInput->ism_metadata_delay_ms / (float) BINAURAL_RENDERING_FRAME_SIZE_MS ); copyBufferTo2dArray( ismInput->base.inputBuffer, tmpTDRendBuffer ); if ( ( error = ivas_td_binaural_renderer_ext( &ismInput->tdRendWrapper, ismInput->base.inConfig, NULL, ismInput->base.ctx.pCombinedOrientationData, &ismInput->currentPos, ismInput->hReverb, ism_md_subframe_update_ext, @@ -5682,7 +5682,7 @@ static ivas_error renderIsmToBinauralReverb( push_wmops( "renderIsmToBinauralRoom" ); /* Metadata Delay to sync with audio delay converted from ms to 5ms (1000/50/4) subframe index */ - ism_md_subframe_update_ext = (int16_t) roundf( ismInput->ism_metadata_delay_ms / ( 1000.f / FRAMES_PER_SEC / MAX_PARAM_SPATIAL_SUBFRAMES ) ); + ism_md_subframe_update_ext = (int16_t) roundf( ismInput->ism_metadata_delay_ms / (float) BINAURAL_RENDERING_FRAME_SIZE_MS ); copyBufferTo2dArray( ismInput->base.inputBuffer, tmpRendBuffer ); if ( ( error = ivas_td_binaural_renderer_ext( &ismInput->tdRendWrapper, ismInput->base.inConfig, NULL, ismInput->base.ctx.pCombinedOrientationData, &ismInput->currentPos, ismInput->hReverb, @@ -5861,7 +5861,7 @@ static ivas_error renderIsmToSplitBinaural( pMultiBinPoseData = &pSplitRendWrapper->multiBinPoseData; /* Metadata Delay to sync with audio delay converted from ms to 5ms (1000/50/4) subframe index */ - ism_md_subframe_update_ext = (int16_t) roundf( ismInput->ism_metadata_delay_ms / ( 1000.f / FRAMES_PER_SEC / MAX_PARAM_SPATIAL_SUBFRAMES ) ); + ism_md_subframe_update_ext = (int16_t) roundf( ismInput->ism_metadata_delay_ms / (float) BINAURAL_RENDERING_FRAME_SIZE_MS ); pCombinedOrientationData = *ismInput->base.ctx.pCombinedOrientationData; -- GitLab