From 058a764376b587dc239a9c3bd10ee645516795c9 Mon Sep 17 00:00:00 2001
From: Jan Kiene <jan.kiene@iis.fraunhofer.de>
Date: Fri, 24 Oct 2025 17:08:33 +0200
Subject: [PATCH 1/3] turn md delay renderer argument into integer type

---
 apps/renderer.c     |  5 +++--
 lib_rend/lib_rend.c | 33 ++++++++++++---------------------
 lib_rend/lib_rend.h |  2 +-
 3 files changed, 16 insertions(+), 24 deletions(-)

diff --git a/apps/renderer.c b/apps/renderer.c
index 3f8c479d9..eccabd2a6 100644
--- a/apps/renderer.c
+++ b/apps/renderer.c
@@ -33,6 +33,7 @@
 #include "lib_rend.h"
 #include <assert.h>
 #include <math.h>
+#include <stdint.h>
 #include <string.h>
 #include "audio_file_reader.h"
 #include "audio_file_writer.h"
@@ -186,7 +187,7 @@ typedef struct
     float lfeConfigElevation;
     bool lfeCustomRoutingEnabled;
     char inLfePanningMatrixFile[RENDERER_MAX_CLI_ARG_LENGTH];
-    float syncMdDelay;
+    int16_t syncMdDelay;
     IVAS_RENDER_FRAMESIZE render_framesize;
     uint16_t directivityPatternId[RENDERER_MAX_ISM_INPUTS];
     AcousticEnvironmentSequence aeSequence;
@@ -2904,7 +2905,7 @@ static void parseOption(
         case CmdLnOptionId_syncMdDelay:
             assert( numOptionValues == 1 );
             /* Metadata Delay to sync with audio delay in ms */
-            args->syncMdDelay = strtof( optionValues[0], NULL );
+            args->syncMdDelay = (int16_t) strtol( optionValues[0], NULL, 10 );
             break;
         default:
             assert( 0 && "This should be unreachable - all command line options should be explicitly handled." );
diff --git a/lib_rend/lib_rend.c b/lib_rend/lib_rend.c
index be092691f..a479af20f 100644
--- a/lib_rend/lib_rend.c
+++ b/lib_rend/lib_rend.c
@@ -44,6 +44,7 @@
 #include <math.h>
 #include <stdbool.h>
 #include "wmc_auto.h"
+#include <stdint.h>
 
 
 /*-------------------------------------------------------------------*
@@ -125,7 +126,7 @@ typedef struct
 #ifdef NONBE_1377_REND_DIRATT_CONF
     int16_t object_id;
 #endif
-    float ism_metadata_delay_ms;
+    int16_t ism_metadata_delay_ms;
 } input_ism;
 
 typedef struct
@@ -1510,7 +1511,7 @@ static ivas_error alignInputDelay(
             if ( getAudioConfigType( inputBase->inConfig ) == IVAS_REND_AUDIO_CONFIG_TYPE_OBJECT_BASED )
             {
                 inputIsm = (input_ism *) inputBase;
-                inputIsm->ism_metadata_delay_ms = maxGlobalDelayNs / 1e6f;
+                inputIsm->ism_metadata_delay_ms = (int16_t) roundf( inputIsm->ism_metadata_delay_ms + maxGlobalDelayNs / 1e6f / ( 1000.f / FRAMES_PER_SEC / MAX_PARAM_SPATIAL_SUBFRAMES ) );
             }
         }
     }
@@ -5473,14 +5474,12 @@ static ivas_error renderIsmToBinaural(
 {
     float tmpTDRendBuffer[MAX_OUTPUT_CHANNELS][L_FRAME48k];
     ivas_error error;
-    int16_t ism_md_subframe_update_ext;
 
     push_wmops( "renderIsmToBinaural" );
-    /* Metadata Delay to sync with audio delay converted from ms to 5ms (1000/50/4) subframe index */
-    ism_md_subframe_update_ext = (int16_t) roundf( ismInput->ism_metadata_delay_ms / ( 1000.f / FRAMES_PER_SEC / MAX_PARAM_SPATIAL_SUBFRAMES ) );
+
     copyBufferTo2dArray( ismInput->base.inputBuffer, tmpTDRendBuffer );
 
-    if ( ( error = ivas_td_binaural_renderer_ext( &ismInput->tdRendWrapper, ismInput->base.inConfig, NULL, ismInput->base.ctx.pCombinedOrientationData, &ismInput->currentPos, ismInput->hReverb, ism_md_subframe_update_ext,
+    if ( ( error = ivas_td_binaural_renderer_ext( &ismInput->tdRendWrapper, ismInput->base.inConfig, NULL, ismInput->base.ctx.pCombinedOrientationData, &ismInput->currentPos, ismInput->hReverb, ismInput->ism_metadata_delay_ms,
                                                   *ismInput->base.ctx.pOutSampleRate, outAudio.config.numSamplesPerChannel, tmpTDRendBuffer ) ) != IVAS_ERR_OK )
     {
         return error;
@@ -5675,17 +5674,13 @@ static ivas_error renderIsmToBinauralReverb(
 {
     float tmpRendBuffer[MAX_OUTPUT_CHANNELS][L_FRAME48k];
     ivas_error error;
-    int16_t ism_md_subframe_update_ext;
 
     push_wmops( "renderIsmToBinauralRoom" );
 
-    /* Metadata Delay to sync with audio delay converted from ms to 5ms (1000/50/4) subframe index */
-    ism_md_subframe_update_ext = (int16_t) roundf( ismInput->ism_metadata_delay_ms / ( 1000.f / FRAMES_PER_SEC / MAX_PARAM_SPATIAL_SUBFRAMES ) );
-
     copyBufferTo2dArray( ismInput->base.inputBuffer, tmpRendBuffer );
 
     if ( ( error = ivas_td_binaural_renderer_ext( &ismInput->tdRendWrapper, ismInput->base.inConfig, NULL, ismInput->base.ctx.pCombinedOrientationData, &ismInput->currentPos, ismInput->hReverb,
-                                                  ism_md_subframe_update_ext, *ismInput->base.ctx.pOutSampleRate, outAudio.config.numSamplesPerChannel, tmpRendBuffer ) ) != IVAS_ERR_OK )
+                                                  ismInput->ism_metadata_delay_ms, *ismInput->base.ctx.pOutSampleRate, outAudio.config.numSamplesPerChannel, tmpRendBuffer ) ) != IVAS_ERR_OK )
     {
         return error;
     }
@@ -5852,16 +5847,12 @@ static ivas_error renderIsmToSplitBinaural(
     float tmpBinaural_CldfbIm[MAX_HEAD_ROT_POSES * BINAURAL_CHANNELS][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX];
     int16_t output_frame = ismInput->base.inputBuffer.config.numSamplesPerChannel;
     COMBINED_ORIENTATION_HANDLE pCombinedOrientationData;
-    int16_t ism_md_subframe_update_ext;
 
     push_wmops( "renderIsmToSplitBinaural" );
 
     pSplitRendWrapper = ismInput->base.ctx.pSplitRendWrapper;
     pMultiBinPoseData = &pSplitRendWrapper->multiBinPoseData;
 
-    /* Metadata Delay to sync with audio delay converted from ms to 5ms (1000/50/4) subframe index */
-    ism_md_subframe_update_ext = (int16_t) roundf( ismInput->ism_metadata_delay_ms / ( 1000.f / FRAMES_PER_SEC / MAX_PARAM_SPATIAL_SUBFRAMES ) );
-
     pCombinedOrientationData = *ismInput->base.ctx.pCombinedOrientationData;
 
     if ( pMultiBinPoseData->poseCorrectionMode == ISAR_SPLIT_REND_POSE_CORRECTION_MODE_CLDFB )
@@ -5911,7 +5902,7 @@ static ivas_error renderIsmToSplitBinaural(
 
         /* Render */
         if ( ( error = ivas_td_binaural_renderer_ext( ( pos_idx == 0 ) ? &ismInput->tdRendWrapper : &ismInput->splitTdRendWrappers[pos_idx - 1], ismInput->base.inConfig, NULL, ismInput->base.ctx.pCombinedOrientationData, &ismInput->currentPos,
-                                                      NULL, ism_md_subframe_update_ext, *ismInput->base.ctx.pOutSampleRate, output_frame, tmpProcessing ) ) != IVAS_ERR_OK )
+                                                      NULL, ismInput->ism_metadata_delay_ms, *ismInput->base.ctx.pOutSampleRate, output_frame, tmpProcessing ) ) != IVAS_ERR_OK )
         {
             return error;
         }
@@ -7585,8 +7576,8 @@ ivas_error IVAS_REND_MergeMasaMetadata(
 )
 {
     MASA_DECODER_EXT_OUT_META_HANDLE inMeta2;
-    float( *inEne1 )[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS];
-    float( *inEne2 )[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS];
+    float ( *inEne1 )[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS];
+    float ( *inEne2 )[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS];
 
     if ( hIvasRend == NULL )
     {
@@ -7683,7 +7674,7 @@ ivas_error IVAS_REND_SetTotalNumberOfObjects(
 
 ivas_error IVAS_REND_SetIsmMetadataDelay(
     IVAS_REND_HANDLE hIvasRend, /* i/o: IVAS renderer handle    */
-    const float sync_md_delay   /* i  : ISM Metadata Delay in ms to sync with audio delay   */
+    const int16_t sync_md_delay /* i  : ISM Metadata Delay in ms to sync with audio delay   */
 )
 {
     int16_t i;
@@ -7695,7 +7686,7 @@ ivas_error IVAS_REND_SetIsmMetadataDelay(
 
     for ( i = 0; i < RENDERER_MAX_ISM_INPUTS; ++i )
     {
-        hIvasRend->inputsIsm[i].ism_metadata_delay_ms = sync_md_delay;
+        hIvasRend->inputsIsm[i].ism_metadata_delay_ms = (int16_t) roundf( sync_md_delay / ( 1000.f / FRAMES_PER_SEC / MAX_PARAM_SPATIAL_SUBFRAMES ) );
     }
 
     return IVAS_ERR_OK;
@@ -7942,7 +7933,7 @@ ivas_error IVAS_REND_GetSplitBinauralBitstream(
                                                           &bits,
                                                           Cldfb_RealBuffer_Binaural,
                                                           Cldfb_ImagBuffer_Binaural,
-                                                          ( const int16_t )( ( BINAURAL_MAXBANDS * hIvasRend->sampleRateOut ) / 48000 ),
+                                                          (const int16_t) ( ( BINAURAL_MAXBANDS * hIvasRend->sampleRateOut ) / 48000 ),
                                                           tmpBinaural,
                                                           1,
                                                           cldfb_in_flag,
diff --git a/lib_rend/lib_rend.h b/lib_rend/lib_rend.h
index 211fd0d6e..f2cf92e08 100644
--- a/lib_rend/lib_rend.h
+++ b/lib_rend/lib_rend.h
@@ -380,7 +380,7 @@ ivas_error IVAS_REND_SetTotalNumberOfObjects(
 
 ivas_error IVAS_REND_SetIsmMetadataDelay(
     IVAS_REND_HANDLE hIvasRend,                     /* i/o: IVAS renderer handle                                */
-    const float sync_md_delay                       /* i  :   Metadata Delay in ms to sync with audio delay     */
+    const int16_t sync_md_delay                       /* i  :   Metadata Delay in ms to sync with audio delay     */
 );
 
 ivas_error IVAS_REND_GetNumAllObjects(
-- 
GitLab


From 27771584f23bdc88c8e7c1a75b76435f72cee10d Mon Sep 17 00:00:00 2001
From: Jan Kiene <jan.kiene@iis.fraunhofer.de>
Date: Wed, 29 Oct 2025 14:48:01 +0100
Subject: [PATCH 2/3] make sure to store delay in ms and not already in
 subframe indexes

---
 lib_rend/lib_rend.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/lib_rend/lib_rend.c b/lib_rend/lib_rend.c
index a479af20f..a21401e2f 100644
--- a/lib_rend/lib_rend.c
+++ b/lib_rend/lib_rend.c
@@ -1511,7 +1511,7 @@ static ivas_error alignInputDelay(
             if ( getAudioConfigType( inputBase->inConfig ) == IVAS_REND_AUDIO_CONFIG_TYPE_OBJECT_BASED )
             {
                 inputIsm = (input_ism *) inputBase;
-                inputIsm->ism_metadata_delay_ms = (int16_t) roundf( inputIsm->ism_metadata_delay_ms + maxGlobalDelayNs / 1e6f / ( 1000.f / FRAMES_PER_SEC / MAX_PARAM_SPATIAL_SUBFRAMES ) );
+                inputIsm->ism_metadata_delay_ms = (int16_t) roundf( inputIsm->ism_metadata_delay_ms + maxGlobalDelayNs / 1e6f );
             }
         }
     }
@@ -5474,12 +5474,15 @@ static ivas_error renderIsmToBinaural(
 {
     float tmpTDRendBuffer[MAX_OUTPUT_CHANNELS][L_FRAME48k];
     ivas_error error;
+    int16_t ism_md_subframe_update_ext;
 
     push_wmops( "renderIsmToBinaural" );
 
+    /* Metadata Delay to sync with audio delay converted from ms to 5ms (1000/50/4) subframe index */
+    ism_md_subframe_update_ext = (int16_t) roundf( ismInput->ism_metadata_delay_ms / ( 1000.f / FRAMES_PER_SEC / MAX_PARAM_SPATIAL_SUBFRAMES ) );
     copyBufferTo2dArray( ismInput->base.inputBuffer, tmpTDRendBuffer );
 
-    if ( ( error = ivas_td_binaural_renderer_ext( &ismInput->tdRendWrapper, ismInput->base.inConfig, NULL, ismInput->base.ctx.pCombinedOrientationData, &ismInput->currentPos, ismInput->hReverb, ismInput->ism_metadata_delay_ms,
+    if ( ( error = ivas_td_binaural_renderer_ext( &ismInput->tdRendWrapper, ismInput->base.inConfig, NULL, ismInput->base.ctx.pCombinedOrientationData, &ismInput->currentPos, ismInput->hReverb, ism_md_subframe_update_ext,
                                                   *ismInput->base.ctx.pOutSampleRate, outAudio.config.numSamplesPerChannel, tmpTDRendBuffer ) ) != IVAS_ERR_OK )
     {
         return error;
@@ -5674,13 +5677,16 @@ static ivas_error renderIsmToBinauralReverb(
 {
     float tmpRendBuffer[MAX_OUTPUT_CHANNELS][L_FRAME48k];
     ivas_error error;
+    int16_t ism_md_subframe_update_ext;
 
     push_wmops( "renderIsmToBinauralRoom" );
 
+    /* Metadata Delay to sync with audio delay converted from ms to 5ms (1000/50/4) subframe index */
+    ism_md_subframe_update_ext = (int16_t) roundf( ismInput->ism_metadata_delay_ms / ( 1000.f / FRAMES_PER_SEC / MAX_PARAM_SPATIAL_SUBFRAMES ) );
     copyBufferTo2dArray( ismInput->base.inputBuffer, tmpRendBuffer );
 
     if ( ( error = ivas_td_binaural_renderer_ext( &ismInput->tdRendWrapper, ismInput->base.inConfig, NULL, ismInput->base.ctx.pCombinedOrientationData, &ismInput->currentPos, ismInput->hReverb,
-                                                  ismInput->ism_metadata_delay_ms, *ismInput->base.ctx.pOutSampleRate, outAudio.config.numSamplesPerChannel, tmpRendBuffer ) ) != IVAS_ERR_OK )
+                                                  ism_md_subframe_update_ext, *ismInput->base.ctx.pOutSampleRate, outAudio.config.numSamplesPerChannel, tmpRendBuffer ) ) != IVAS_ERR_OK )
     {
         return error;
     }
@@ -5847,12 +5853,16 @@ static ivas_error renderIsmToSplitBinaural(
     float tmpBinaural_CldfbIm[MAX_HEAD_ROT_POSES * BINAURAL_CHANNELS][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX];
     int16_t output_frame = ismInput->base.inputBuffer.config.numSamplesPerChannel;
     COMBINED_ORIENTATION_HANDLE pCombinedOrientationData;
+    int16_t ism_md_subframe_update_ext;
 
     push_wmops( "renderIsmToSplitBinaural" );
 
     pSplitRendWrapper = ismInput->base.ctx.pSplitRendWrapper;
     pMultiBinPoseData = &pSplitRendWrapper->multiBinPoseData;
 
+    /* Metadata Delay to sync with audio delay converted from ms to 5ms (1000/50/4) subframe index */
+    ism_md_subframe_update_ext = (int16_t) roundf( ismInput->ism_metadata_delay_ms / ( 1000.f / FRAMES_PER_SEC / MAX_PARAM_SPATIAL_SUBFRAMES ) );
+
     pCombinedOrientationData = *ismInput->base.ctx.pCombinedOrientationData;
 
     if ( pMultiBinPoseData->poseCorrectionMode == ISAR_SPLIT_REND_POSE_CORRECTION_MODE_CLDFB )
@@ -5902,7 +5912,7 @@ static ivas_error renderIsmToSplitBinaural(
 
         /* Render */
         if ( ( error = ivas_td_binaural_renderer_ext( ( pos_idx == 0 ) ? &ismInput->tdRendWrapper : &ismInput->splitTdRendWrappers[pos_idx - 1], ismInput->base.inConfig, NULL, ismInput->base.ctx.pCombinedOrientationData, &ismInput->currentPos,
-                                                      NULL, ismInput->ism_metadata_delay_ms, *ismInput->base.ctx.pOutSampleRate, output_frame, tmpProcessing ) ) != IVAS_ERR_OK )
+                                                      NULL, ism_md_subframe_update_ext, *ismInput->base.ctx.pOutSampleRate, output_frame, tmpProcessing ) ) != IVAS_ERR_OK )
         {
             return error;
         }
@@ -7686,7 +7696,7 @@ ivas_error IVAS_REND_SetIsmMetadataDelay(
 
     for ( i = 0; i < RENDERER_MAX_ISM_INPUTS; ++i )
     {
-        hIvasRend->inputsIsm[i].ism_metadata_delay_ms = (int16_t) roundf( sync_md_delay / ( 1000.f / FRAMES_PER_SEC / MAX_PARAM_SPATIAL_SUBFRAMES ) );
+        hIvasRend->inputsIsm[i].ism_metadata_delay_ms = sync_md_delay;
     }
 
     return IVAS_ERR_OK;
-- 
GitLab


From 07ef3ad1cb901d96fa038b1c225dba8d9904f39b Mon Sep 17 00:00:00 2001
From: Jan Kiene <jan.kiene@iis.fraunhofer.de>
Date: Wed, 29 Oct 2025 15:07:13 +0100
Subject: [PATCH 3/3] use single constant instead of multiple ones

---
 lib_rend/lib_rend.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib_rend/lib_rend.c b/lib_rend/lib_rend.c
index a21401e2f..a0da4e4b3 100644
--- a/lib_rend/lib_rend.c
+++ b/lib_rend/lib_rend.c
@@ -5479,7 +5479,7 @@ static ivas_error renderIsmToBinaural(
     push_wmops( "renderIsmToBinaural" );
 
     /* Metadata Delay to sync with audio delay converted from ms to 5ms (1000/50/4) subframe index */
-    ism_md_subframe_update_ext = (int16_t) roundf( ismInput->ism_metadata_delay_ms / ( 1000.f / FRAMES_PER_SEC / MAX_PARAM_SPATIAL_SUBFRAMES ) );
+    ism_md_subframe_update_ext = (int16_t) roundf( ismInput->ism_metadata_delay_ms / (float) BINAURAL_RENDERING_FRAME_SIZE_MS );
     copyBufferTo2dArray( ismInput->base.inputBuffer, tmpTDRendBuffer );
 
     if ( ( error = ivas_td_binaural_renderer_ext( &ismInput->tdRendWrapper, ismInput->base.inConfig, NULL, ismInput->base.ctx.pCombinedOrientationData, &ismInput->currentPos, ismInput->hReverb, ism_md_subframe_update_ext,
@@ -5682,7 +5682,7 @@ static ivas_error renderIsmToBinauralReverb(
     push_wmops( "renderIsmToBinauralRoom" );
 
     /* Metadata Delay to sync with audio delay converted from ms to 5ms (1000/50/4) subframe index */
-    ism_md_subframe_update_ext = (int16_t) roundf( ismInput->ism_metadata_delay_ms / ( 1000.f / FRAMES_PER_SEC / MAX_PARAM_SPATIAL_SUBFRAMES ) );
+    ism_md_subframe_update_ext = (int16_t) roundf( ismInput->ism_metadata_delay_ms / (float) BINAURAL_RENDERING_FRAME_SIZE_MS );
     copyBufferTo2dArray( ismInput->base.inputBuffer, tmpRendBuffer );
 
     if ( ( error = ivas_td_binaural_renderer_ext( &ismInput->tdRendWrapper, ismInput->base.inConfig, NULL, ismInput->base.ctx.pCombinedOrientationData, &ismInput->currentPos, ismInput->hReverb,
@@ -5861,7 +5861,7 @@ static ivas_error renderIsmToSplitBinaural(
     pMultiBinPoseData = &pSplitRendWrapper->multiBinPoseData;
 
     /* Metadata Delay to sync with audio delay converted from ms to 5ms (1000/50/4) subframe index */
-    ism_md_subframe_update_ext = (int16_t) roundf( ismInput->ism_metadata_delay_ms / ( 1000.f / FRAMES_PER_SEC / MAX_PARAM_SPATIAL_SUBFRAMES ) );
+    ism_md_subframe_update_ext = (int16_t) roundf( ismInput->ism_metadata_delay_ms / (float) BINAURAL_RENDERING_FRAME_SIZE_MS );
 
     pCombinedOrientationData = *ismInput->base.ctx.pCombinedOrientationData;
 
-- 
GitLab