From b0229483e57f1503e2908e56dfeba821e707d518 Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Wed, 20 Aug 2025 19:19:11 +0200
Subject: [PATCH 1/4] support RIR convolution for SBA formats

---
 .../generation/generate_masa_items.py         |  21 +--
 .../generation/generate_omasa_items.py        |   4 +-
 .../generation/generate_osba_items.py         |   2 +-
 .../generation/generate_sba_items.py          | 139 +++++++++++++++---
 4 files changed, 132 insertions(+), 34 deletions(-)

diff --git a/ivas_processing_scripts/generation/generate_masa_items.py b/ivas_processing_scripts/generation/generate_masa_items.py
index 6ddac870..5ecf2358 100644
--- a/ivas_processing_scripts/generation/generate_masa_items.py
+++ b/ivas_processing_scripts/generation/generate_masa_items.py
@@ -246,22 +246,18 @@ def generate_MASA_scene(
             # of the reference signal (0-based index)
             if isinstance(scene["shift"][i], str) and "(" in scene["shift"][i]:
                 # extract X and i_ref
-                match = re.match(
-                    r"([+-]?\d*\.?\d+)[\(\[]([+-]?\d+)[\)\]]", scene["shift"][i]
-                )
+                match = re.match(r"([+-]?\d*\.?\d+)[\(\[]([+-]?\d+)[\)\]]", scene["shift"][i])
 
                 if match:
                     overlap = float(match.group(1))
                     overlap_ref = int(match.group(2))
                 else:
                     scene_shift_str = scene["shift"][i]
-                    logger.error(
-                        f"Unable to parse {scene_shift_str}. The specification of overlap or reference is incorrect!"
-                    )
+                    logger.error(f"Unable to parse {scene_shift_str}. The specification of overlap or reference is incorrect!")
                     sys.exit(-1)
 
                 # calculate absolute shift of the source signal in seconds
-                source_shift = end_position[overlap_ref] - overlap
+                source_shift = end_position[overlap_ref] + overlap
         else:
             source_shift = 0.0
 
@@ -432,15 +428,14 @@ def generate_MASA_scene(
     # trim the output signal if the total duration exceeds X seconds
     if "duration" in cfg.__dict__:
         # convert from seconds to samples (ensure multiple of 20ms)
-        duration = int(np.floor(int(cfg.duration * cfg.fs) / frame_len) * frame_len)
+        duration = int(
+            np.floor(int(cfg.duration * cfg.fs) / frame_len) * frame_len
+        )
 
         # check if the current length of the output signal exceeds the duration
         if len(y_int.audio) > duration:
             y_int.audio = audioarray.trim(
-                y_int.audio,
-                y_int.fs,
-                limits=[0, len(y_int.audio) - duration],
-                samples=True,
+                y_int.audio, y_int.fs, limits=[0, len(y_int.audio) - duration], samples=True
             )
 
     # adjust the loudness of the output signal
@@ -525,7 +520,7 @@ def generate_MASA_scene(
         y_int.audio = audioarray.window(y_int.audio, y_int.fs, cfg.fade_in_out * 1000)
 
     # generate MASA metadata filename (should end with .met)
-    y.metadata_file = output_filename.with_suffix(".met")
+    y.metadata_file = output_filename.with_suffix(output_filename.suffix + ".met")
 
     # convert the intermediate SBA output signal to MASA format
     render_sba_to_masa(y_int, y)
diff --git a/ivas_processing_scripts/generation/generate_omasa_items.py b/ivas_processing_scripts/generation/generate_omasa_items.py
index 0881c7ca..929fed63 100644
--- a/ivas_processing_scripts/generation/generate_omasa_items.py
+++ b/ivas_processing_scripts/generation/generate_omasa_items.py
@@ -474,7 +474,7 @@ def generate_OMASA_scene(
 
             # add ISM metadata .csv filename (should end with .wav.0.csv, .wav.1.csv, ...)
             y_int.metadata_files.insert(
-                i - 1, str(output_filename.with_suffix(f".{i - 1}.csv"))
+                i - 1, str(output_filename.with_suffix(output_filename.suffix + f".{i - 1}.csv"))
             )
 
     # append pre-amble and post-amble
@@ -520,7 +520,7 @@ def generate_OMASA_scene(
         y_int.audio = audioarray.window(y_int.audio, y_int.fs, cfg.fade_in_out * 1000)
 
     # generate and insert MASA metadata filename (should end with .met)
-    y.metadata_files.append(str(output_filename.with_suffix(".met")))
+    y.metadata_files.append(str(output_filename.with_suffix(output_filename.suffix + ".met")))
 
     # convert the intermediate OSBA object to OMASA object
     convert_osba(y_int, y)
diff --git a/ivas_processing_scripts/generation/generate_osba_items.py b/ivas_processing_scripts/generation/generate_osba_items.py
index 8d2ca0d8..7c1ee264 100644
--- a/ivas_processing_scripts/generation/generate_osba_items.py
+++ b/ivas_processing_scripts/generation/generate_osba_items.py
@@ -460,7 +460,7 @@ def generate_OSBA_scene(
 
             # add ISM metadata .csv filename (should end with .wav.0.csv, .wav.1.csv, ...)
             y.metadata_files.insert(
-                i - 1, str(output_filename.with_suffix(f".{i - 1}.csv"))
+                i - 1, str(output_filename.with_suffix(output_filename.suffix + f".{i - 1}.csv"))
             )
 
     # append pre-amble and post-amble
diff --git a/ivas_processing_scripts/generation/generate_sba_items.py b/ivas_processing_scripts/generation/generate_sba_items.py
index 631d6165..297a332d 100644
--- a/ivas_processing_scripts/generation/generate_sba_items.py
+++ b/ivas_processing_scripts/generation/generate_sba_items.py
@@ -39,6 +39,7 @@ from pathlib import Path
 import numpy as np
 
 from ivas_processing_scripts.audiotools import audio, audioarray, audiofile
+from ivas_processing_scripts.audiotools.convert.objectbased import convert_objectbased
 from ivas_processing_scripts.audiotools.convert.scenebased import convert_scenebased
 from ivas_processing_scripts.audiotools.wrappers.bs1770 import loudness_norm
 from ivas_processing_scripts.audiotools.wrappers.reverb import (
@@ -208,13 +209,27 @@ def generate_sba_scene(
         source_file = (
             scene["input"][i] if isinstance(scene["input"], list) else scene["input"]
         )
-        IR_file = scene["IR"][i] if isinstance(scene["IR"], list) else scene["IR"]
 
-        # get input filename and IR filename
         input_filename = Path(source_file).parent / (
             cfg.use_input_prefix + Path(source_file).name
         )
-        IR_filename = Path(IR_file).parent / (cfg.use_IR_prefix + Path(IR_file).name)
+
+        # get input filename and IR filename
+        if "IR" in scene.keys():
+            IR_file = scene["IR"][i] if isinstance(scene["IR"], list) else scene["IR"]
+            IR_filename = Path(IR_file).parent / (cfg.use_IR_prefix + Path(IR_file).name)
+        else:
+            # read azimuth and elevation information
+            source_azi = (
+                scene["azimuth"][i]
+                if isinstance(scene["azimuth"], list)
+                else scene["azimuth"]
+            )
+            source_ele = (
+                scene["elevation"][i]
+                if isinstance(scene["elevation"], list)
+                else scene["elevation"]
+            )
 
         # read the source shift length (in seconds)
         if "shift" in scene.keys():
@@ -282,9 +297,14 @@ def generate_sba_scene(
         else:
             level = -26
 
-        logger.info(
-            f"-- Convolving {source_file} with {IR_file} at {level} LKFS with shift of {source_shift_in_seconds} seconds"
-        )
+        if "IR" in scene.keys():
+            logger.info(
+                f"-- Convolving {source_file} with {IR_file} at {level} LKFS with shift of {source_shift_in_seconds} seconds"
+            )
+        else:
+            logger.info(
+                f"-- Encoding {source_file} at position(s) {source_azi},{source_ele} at {level} LKFS with shift of {source_shift_in_seconds} seconds"
+            )
 
         # read source file
         x = audio.fromfile("MONO", input_filename)
@@ -301,17 +321,6 @@ def generate_sba_scene(
             x.audio = resampled_audio
             x.fs = cfg.fs
 
-        # read the IR file (!must be in target format!)
-        IR = audio.fromfile(cfg.format, IR_filename)
-
-        # convolve MONO source audio with FOA/HOA2/HOA3 IR -> results in FOA/HOA2/HOA3 audio object
-        if cfg.format == "FOA":
-            x = reverb_foa(x, IR, mode=None)
-        elif cfg.format == "HOA2":
-            x = reverb_hoa2(x, IR, mode=None)
-        elif cfg.format == "HOA3":
-            x = reverb_hoa3(x, IR, mode=None)
-
         # adjust the level of the FOA/HOA2/HOA3 signal
         if level is None:
             # do not change the level of the audio source signal
@@ -331,6 +340,100 @@ def generate_sba_scene(
                     x.audio, x.fs, limits=[0, -N_pad], samples=True
                 )
 
+        # get the number of frames (multiple of 20ms)
+        N_frames = int(len(x.audio) / frame_len)
+
+        if "IR" in scene.keys():
+            # read the IR file (!must be in target format!)
+            IR = audio.fromfile(cfg.format, IR_filename)
+
+            # convolve MONO source audio with FOA/HOA2/HOA3 IR -> results in FOA/HOA2/HOA3 audio object
+            if cfg.format == "FOA":
+                x = reverb_foa(x, IR, mode=None)
+            elif cfg.format == "HOA2":
+                x = reverb_hoa2(x, IR, mode=None)
+            elif cfg.format == "HOA3":
+                x = reverb_hoa3(x, IR, mode=None)
+        else:
+            # convert MONO to ISM1
+            x_ism = audio.ObjectBasedAudio("ISM1")  # ISM with 1 channel
+            x_ism.fs = cfg.fs
+            x_ism.audio = x.audio.copy()
+
+            # convert azimuth information in case of moving object
+            if isinstance(source_azi, str):
+                if ":" in source_azi:
+                    # convert into array (initial_value:step:stop_value)
+                    start_str, step_str, stop_str = source_azi.split(":")
+                    start = float(eval(start_str))
+                    step = float(eval(step_str))
+                    stop = float(eval(stop_str))
+                    azi = np.arange(start, stop, step)
+
+                    # adjust length to N_frames
+                    if len(azi) > N_frames:
+                        azi = azi[:N_frames]
+                    elif len(azi) < N_frames:
+                        azi = np.append(azi, np.full(N_frames - len(azi), azi[-1]))
+                else:
+                    # replicate static azimuth value N_frames times
+                    azi = np.repeat(float(eval(source_azi)), N_frames)
+            else:
+                # replicate static azimuth value N_frames times
+                azi = np.repeat(float(source_azi), N_frames)
+
+            # convert azimuth from 0 .. 360 to -180 .. +180
+            azi = (azi + 180) % 360 - 180
+
+            # check if azimuth is from -180 .. +180
+            if any(azi > 180) or any(azi < -180):
+                logger.error(
+                    f"Incorrect value(s) of azimuth: {azi[(azi > 180) | (azi < -180)]}"
+                )
+
+            # convert elevation information in case mof moving object
+            if isinstance(source_ele, str):
+                if ":" in source_ele:
+                    # convert into array (initial_value:step:stop_value)
+                    start_str, step_str, stop_str = source_ele.split(":")
+                    start = float(eval(start_str))
+                    step = float(eval(step_str))
+                    stop = float(eval(stop_str))
+                    ele = np.arange(start, stop, step)
+
+                    # adjust length to N_frames
+                    if len(ele) > N_frames:
+                        ele = ele[:N_frames]
+                    elif len(ele) < N_frames:
+                        ele = np.append(ele, np.full(N_frames - len(ele), ele[-1]))
+
+                else:
+                    # replicate static elevation value N_frames times
+                    ele = np.repeat(float(eval(source_ele)), N_frames)
+            else:
+                # replicate static elevation value N_frames times
+                ele = np.repeat(float(source_ele), N_frames)
+
+            # wrap elevation angle to -90 .. +90
+            ele = ((ele + 90) % 180) - 90
+
+            # check if elevation is from -90 .. +90
+            if any(ele > 90) or any(ele < -90):
+                logger.error(
+                    f"Incorrect value(s) of elevation: {ele[(ele > 90) | (ele < -90)]}"
+                )
+
+            # generate radius vector with all values equal to 1.0
+            rad = np.ones(N_frames)
+
+            # arrange all metadata fields column-wise into a matrix
+            x_ism.object_pos.append(np.column_stack((azi, ele, rad)))
+
+            # convert ISM1 object to SBA
+            x_sba = audio.SceneBasedAudio(cfg.format)
+            convert_objectbased(x_ism, x_sba)
+            x = x_sba  # replace x with the SBA object
+
         # add the convolved FOA/HOA2/HOA3 audio source signal to the output signal
         if y.audio is None:
             # add source signal to the array of all source signals
@@ -338,7 +441,7 @@ def generate_sba_scene(
 
             if source_shift > 0:
                 # insert zeros to the new audio source signal to shift it right
-                y.audio = audioarray.trim_meta(
+                y.audio = audioarray.trim(
                     y.audio, y.fs, limits=[-source_shift, 0], samples=True
                 )
             else:
-- 
GitLab


From d44605c80c329afce35cc18c349a87eb782997db Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Thu, 21 Aug 2025 09:03:29 +0200
Subject: [PATCH 2/4] small correction to suffix

---
 ivas_processing_scripts/generation/generate_ismN_items.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ivas_processing_scripts/generation/generate_ismN_items.py b/ivas_processing_scripts/generation/generate_ismN_items.py
index 3c474309..8f9f292c 100644
--- a/ivas_processing_scripts/generation/generate_ismN_items.py
+++ b/ivas_processing_scripts/generation/generate_ismN_items.py
@@ -446,7 +446,7 @@ def generate_ismN_scene(
             y.object_pos.extend(x.object_pos)
 
         # add ISM metadata .csv filename (should end with .wav.0.csv, .wav.1.csv, ...)
-        y.metadata_files.insert(i, str(output_filename.with_suffix(f".{i}.csv")))
+        y.metadata_files.insert(i, str(output_filename.with_suffix(output_filename.suffix + f".{i}.csv")))
 
     # append pre-amble and post-amble
     if "preamble" in cfg.__dict__ or "postamble" in cfg.__dict__:
-- 
GitLab


From c57e1c57dbeb419b6d77283e051b6c485eca1efa Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Thu, 21 Aug 2025 09:05:26 +0200
Subject: [PATCH 3/4] formatting

---
 .../generation/generate_masa_items.py           | 17 +++++++++++------
 .../generation/generate_omasa_items.py          | 11 +++++++++--
 .../generation/generate_osba_items.py           |  7 ++++++-
 .../generation/generate_sba_items.py            |  4 +++-
 4 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/ivas_processing_scripts/generation/generate_masa_items.py b/ivas_processing_scripts/generation/generate_masa_items.py
index 5ecf2358..faba9112 100644
--- a/ivas_processing_scripts/generation/generate_masa_items.py
+++ b/ivas_processing_scripts/generation/generate_masa_items.py
@@ -246,14 +246,18 @@ def generate_MASA_scene(
             # of the reference signal (0-based index)
             if isinstance(scene["shift"][i], str) and "(" in scene["shift"][i]:
                 # extract X and i_ref
-                match = re.match(r"([+-]?\d*\.?\d+)[\(\[]([+-]?\d+)[\)\]]", scene["shift"][i])
+                match = re.match(
+                    r"([+-]?\d*\.?\d+)[\(\[]([+-]?\d+)[\)\]]", scene["shift"][i]
+                )
 
                 if match:
                     overlap = float(match.group(1))
                     overlap_ref = int(match.group(2))
                 else:
                     scene_shift_str = scene["shift"][i]
-                    logger.error(f"Unable to parse {scene_shift_str}. The specification of overlap or reference is incorrect!")
+                    logger.error(
+                        f"Unable to parse {scene_shift_str}. The specification of overlap or reference is incorrect!"
+                    )
                     sys.exit(-1)
 
                 # calculate absolute shift of the source signal in seconds
@@ -428,14 +432,15 @@ def generate_MASA_scene(
     # trim the output signal if the total duration exceeds X seconds
     if "duration" in cfg.__dict__:
         # convert from seconds to samples (ensure multiple of 20ms)
-        duration = int(
-            np.floor(int(cfg.duration * cfg.fs) / frame_len) * frame_len
-        )
+        duration = int(np.floor(int(cfg.duration * cfg.fs) / frame_len) * frame_len)
 
         # check if the current length of the output signal exceeds the duration
         if len(y_int.audio) > duration:
             y_int.audio = audioarray.trim(
-                y_int.audio, y_int.fs, limits=[0, len(y_int.audio) - duration], samples=True
+                y_int.audio,
+                y_int.fs,
+                limits=[0, len(y_int.audio) - duration],
+                samples=True,
             )
 
     # adjust the loudness of the output signal
diff --git a/ivas_processing_scripts/generation/generate_omasa_items.py b/ivas_processing_scripts/generation/generate_omasa_items.py
index 929fed63..b8aba2b6 100644
--- a/ivas_processing_scripts/generation/generate_omasa_items.py
+++ b/ivas_processing_scripts/generation/generate_omasa_items.py
@@ -474,7 +474,12 @@ def generate_OMASA_scene(
 
             # add ISM metadata .csv filename (should end with .wav.0.csv, .wav.1.csv, ...)
             y_int.metadata_files.insert(
-                i - 1, str(output_filename.with_suffix(output_filename.suffix + f".{i - 1}.csv"))
+                i - 1,
+                str(
+                    output_filename.with_suffix(
+                        output_filename.suffix + f".{i - 1}.csv"
+                    )
+                ),
             )
 
     # append pre-amble and post-amble
@@ -520,7 +525,9 @@ def generate_OMASA_scene(
         y_int.audio = audioarray.window(y_int.audio, y_int.fs, cfg.fade_in_out * 1000)
 
     # generate and insert MASA metadata filename (should end with .met)
-    y.metadata_files.append(str(output_filename.with_suffix(output_filename.suffix + ".met")))
+    y.metadata_files.append(
+        str(output_filename.with_suffix(output_filename.suffix + ".met"))
+    )
 
     # convert the intermediate OSBA object to OMASA object
     convert_osba(y_int, y)
diff --git a/ivas_processing_scripts/generation/generate_osba_items.py b/ivas_processing_scripts/generation/generate_osba_items.py
index 7c1ee264..8190f5ca 100644
--- a/ivas_processing_scripts/generation/generate_osba_items.py
+++ b/ivas_processing_scripts/generation/generate_osba_items.py
@@ -460,7 +460,12 @@ def generate_OSBA_scene(
 
             # add ISM metadata .csv filename (should end with .wav.0.csv, .wav.1.csv, ...)
             y.metadata_files.insert(
-                i - 1, str(output_filename.with_suffix(output_filename.suffix + f".{i - 1}.csv"))
+                i - 1,
+                str(
+                    output_filename.with_suffix(
+                        output_filename.suffix + f".{i - 1}.csv"
+                    )
+                ),
             )
 
     # append pre-amble and post-amble
diff --git a/ivas_processing_scripts/generation/generate_sba_items.py b/ivas_processing_scripts/generation/generate_sba_items.py
index 297a332d..22aba8ab 100644
--- a/ivas_processing_scripts/generation/generate_sba_items.py
+++ b/ivas_processing_scripts/generation/generate_sba_items.py
@@ -217,7 +217,9 @@ def generate_sba_scene(
         # get input filename and IR filename
         if "IR" in scene.keys():
             IR_file = scene["IR"][i] if isinstance(scene["IR"], list) else scene["IR"]
-            IR_filename = Path(IR_file).parent / (cfg.use_IR_prefix + Path(IR_file).name)
+            IR_filename = Path(IR_file).parent / (
+                cfg.use_IR_prefix + Path(IR_file).name
+            )
         else:
             # read azimuth and elevation information
             source_azi = (
-- 
GitLab


From cee72a170eca9618e35572bf199d629e39fd6531 Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Thu, 21 Aug 2025 09:49:37 +0200
Subject: [PATCH 4/4] formatting

---
 ivas_processing_scripts/generation/generate_ismN_items.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ivas_processing_scripts/generation/generate_ismN_items.py b/ivas_processing_scripts/generation/generate_ismN_items.py
index 8f9f292c..948c6d48 100644
--- a/ivas_processing_scripts/generation/generate_ismN_items.py
+++ b/ivas_processing_scripts/generation/generate_ismN_items.py
@@ -446,7 +446,9 @@ def generate_ismN_scene(
             y.object_pos.extend(x.object_pos)
 
         # add ISM metadata .csv filename (should end with .wav.0.csv, .wav.1.csv, ...)
-        y.metadata_files.insert(i, str(output_filename.with_suffix(output_filename.suffix + f".{i}.csv")))
+        y.metadata_files.insert(
+            i, str(output_filename.with_suffix(output_filename.suffix + f".{i}.csv"))
+        )
 
     # append pre-amble and post-amble
     if "preamble" in cfg.__dict__ or "postamble" in cfg.__dict__:
-- 
GitLab