From 66b1c883738079db474f840c9b27f65c15ef6467 Mon Sep 17 00:00:00 2001
From: Archit Tamarapu <archit.tamarapu@iis.fraunhofer.de>
Date: Mon, 24 Oct 2022 10:57:18 +0200
Subject: [PATCH 1/6] add a new generic function for multiprocessing and
 cleanup audio3dtools.py

---
 scripts/pyaudio3dtools/audio3dtools.py | 218 ++++++++++++-------------
 scripts/pyaudio3dtools/audioarray.py   |  17 +-
 2 files changed, 125 insertions(+), 110 deletions(-)

diff --git a/scripts/pyaudio3dtools/audio3dtools.py b/scripts/pyaudio3dtools/audio3dtools.py
index 99ae71351b..d459615eae 100644
--- a/scripts/pyaudio3dtools/audio3dtools.py
+++ b/scripts/pyaudio3dtools/audio3dtools.py
@@ -46,7 +46,114 @@ logger = main_logger.getChild(__name__)
 logger.setLevel(logging.DEBUG)
 
 
-def main():
+def main(args):
+    # Set up logging handlers
+    console_handler = logging.StreamHandler()
+    console_handler.setLevel(logging.INFO)
+    console_handler.setFormatter(logging.Formatter("%(message)s"))
+
+    # Configure loggers
+    LOGGER_FORMAT = "%(asctime)s | %(name)-12s | %(levelname)-8s | %(message)s"
+    LOGGER_DATEFMT = "%m-%d %H:%M"
+    logging.basicConfig(
+        format=LOGGER_FORMAT,
+        datefmt=LOGGER_DATEFMT,
+        level=logging.INFO,
+        handlers=[console_handler],
+    )
+    logger.info("Audio3DTools")
+
+    if args.list is True or args.long is True:
+        logger.info("===Supported spatial audio formats===")
+        spatialaudioformat.Format.list_all(args.long)
+
+    elif args.infiles is not None:
+        logger.info("===Convert spatial audio file===")
+        # Input folder can be a path, a file or a list of files
+        if os.path.isdir(args.infiles):
+            path = args.infiles
+            audio_list = [
+                os.path.join(path, f) for f in os.listdir(path) if f.endswith((".wav"))
+            ]
+        else:
+            audio_list = [args.infiles]
+
+        outdir = args.outdir
+        _, output_ext = os.path.splitext(os.path.basename(outdir))
+        if (len(audio_list) == 1) and (
+            (output_ext.lower() == ".wav") or (output_ext.lower() == ".pcm")
+        ):
+            outfile = outdir
+        else:
+            outfile = None
+            if not os.path.exists(outdir):
+                os.makedirs(outdir)
+
+        for infile in audio_list:
+            logger.info(f"  process {infile}")
+
+            _, input_ext = os.path.splitext(os.path.basename(infile))
+
+            if outfile is None:
+                outfile = os.path.basename(infile)
+                if not args.dont_rename:
+                    if args.outformat is not None:
+                        outfile = outfile.replace(input_ext, f"_{args.outformat}.wav")
+                    else:
+                        outfile = outfile.replace(input_ext, ".out.wav")
+                outfile = os.path.join(outdir, outfile)
+
+            spatialaudioconvert.spatial_audio_convert(
+                infile,
+                outfile,
+                in_format=args.informat,
+                in_fs=args.infs,
+                in_nchans=args.inchan,
+                in_meta_files=args.metadata,
+                in_ls_layout_file=args.layoutfile,
+                out_format=args.outformat,
+                out_fs=args.outfs,
+                out_fc=args.outfc,
+                output_loudness=args.normalize,
+                loudness_tool=args.loudness_tool,
+                trajectory=args.trajectory,
+                binaural_dataset=args.binaural_dataset,
+            )
+
+            logger.info(f"  Output {outfile}")
+
+            if args.binaural:
+                if args.outformat.startswith("BINAURAL"):
+                    raise SystemExit(
+                        "BINAURAL output format can not be binauralized again!"
+                    )
+
+                _, output_ext = os.path.splitext(os.path.basename(outfile))
+                outfile_bin = outfile.replace(output_ext, "_BINAURAL.wav")
+                logger.info(f"  Output binaural {outfile_bin}")
+
+                spatialaudioconvert.spatial_audio_convert(
+                    in_file=outfile,
+                    out_file=outfile_bin,
+                    in_format=args.outformat,
+                    in_fs=args.outfs,
+                    in_meta_files=args.metadata,
+                    in_ls_layout_file=args.layoutfile,
+                    out_format="BINAURAL",
+                    output_loudness=args.normalize,
+                    loudness_tool=args.loudness_tool,
+                    trajectory=args.trajectory,
+                    binaural_dataset=args.binaural_dataset,
+                )
+
+            outfile = None
+    else:
+        raise Exception(
+            "Input file must be provided for conversion and audio manipulation."
+        )
+
+
+if __name__ == "__main__":
     parser = argparse.ArgumentParser(
         description="Audio3DTools: Convert/Manipulate spatial audio files."
     )
@@ -186,111 +293,4 @@ def main():
     )
     args = parser.parse_args()
 
-    # Set up logging handlers
-    console_handler = logging.StreamHandler()
-    console_handler.setLevel(logging.INFO)
-    console_handler.setFormatter(logging.Formatter("%(message)s"))
-
-    # Configure loggers
-    LOGGER_FORMAT = "%(asctime)s | %(name)-12s | %(levelname)-8s | %(message)s"
-    LOGGER_DATEFMT = "%m-%d %H:%M"
-    logging.basicConfig(
-        format=LOGGER_FORMAT,
-        datefmt=LOGGER_DATEFMT,
-        level=logging.INFO,
-        handlers=[console_handler],
-    )
-    logger.info("Audio3DTools")
-
-    if args.list is True or args.long is True:
-        logger.info("===Supported spatial audio formats===")
-        spatialaudioformat.Format.list_all(args.long)
-
-    elif args.infiles is not None:
-        logger.info("===Convert spatial audio file===")
-        # Input folder can be a path, a file or a list of files
-        if os.path.isdir(args.infiles):
-            path = args.infiles
-            audio_list = [
-                os.path.join(path, f) for f in os.listdir(path) if f.endswith((".wav"))
-            ]
-        else:
-            audio_list = [args.infiles]
-
-        outdir = args.outdir
-        _, output_ext = os.path.splitext(os.path.basename(outdir))
-        if (len(audio_list) == 1) and (
-            (output_ext.lower() == ".wav") or (output_ext.lower() == ".pcm")
-        ):
-            outfile = outdir
-        else:
-            outfile = None
-            if not os.path.exists(outdir):
-                os.makedirs(outdir)
-
-        for infile in audio_list:
-            logger.info(f"  process {infile}")
-
-            _, input_ext = os.path.splitext(os.path.basename(infile))
-
-            if outfile is None:
-                outfile = os.path.basename(infile)
-                if not args.dont_rename:
-                    if args.outformat is not None:
-                        outfile = outfile.replace(input_ext, f"_{args.outformat}.wav")
-                    else:
-                        outfile = outfile.replace(input_ext, ".out.wav")
-                outfile = os.path.join(outdir, outfile)
-
-            spatialaudioconvert.spatial_audio_convert(
-                infile,
-                outfile,
-                in_format=args.informat,
-                in_fs=args.infs,
-                in_nchans=args.inchan,
-                in_meta_files=args.metadata,
-                in_ls_layout_file=args.layoutfile,
-                out_format=args.outformat,
-                out_fs=args.outfs,
-                out_fc=args.outfc,
-                output_loudness=args.normalize,
-                loudness_tool=args.loudness_tool,
-                trajectory=args.trajectory,
-                binaural_dataset=args.binaural_dataset,
-            )
-
-            logger.info(f"  Output {outfile}")
-
-            if args.binaural:
-                if args.outformat.startswith("BINAURAL"):
-                    raise SystemExit(
-                        "BINAURAL output format can not be binauralized again!"
-                    )
-
-                _, output_ext = os.path.splitext(os.path.basename(outfile))
-                outfile_bin = outfile.replace(output_ext, "_BINAURAL.wav")
-                logger.info(f"  Output binaural {outfile_bin}")
-
-                spatialaudioconvert.spatial_audio_convert(
-                    in_file=outfile,
-                    out_file=outfile_bin,
-                    in_format=args.outformat,
-                    in_fs=args.outfs,
-                    in_meta_files=args.metadata,
-                    in_ls_layout_file=args.layoutfile,
-                    out_format="BINAURAL",
-                    output_loudness=args.normalize,
-                    loudness_tool=args.loudness_tool,
-                    trajectory=args.trajectory,
-                    binaural_dataset=args.binaural_dataset,
-                )
-
-            outfile = None
-    else:
-        raise Exception(
-            "Input file must be provided for conversion and audio manipulation."
-        )
-
-
-if __name__ == "__main__":
-    main()
+    main(args)
diff --git a/scripts/pyaudio3dtools/audioarray.py b/scripts/pyaudio3dtools/audioarray.py
index 16569e1ec7..5dbc43f769 100644
--- a/scripts/pyaudio3dtools/audioarray.py
+++ b/scripts/pyaudio3dtools/audioarray.py
@@ -32,9 +32,10 @@
 
 import logging
 import math
-from typing import Optional, Tuple
+from typing import Callable, Iterable, Optional, Tuple
 
 import numpy as np
+import multiprocessing as mp
 import scipy.signal as sig
 
 main_logger = logging.getLogger("__main__")
@@ -430,3 +431,17 @@ def get_framewise(x: np.ndarray, chunk_size: int) -> np.ndarray:
         yield x[i * chunk_size : (i + 1) * chunk_size, :]
     if x.shape[0] % chunk_size:
         yield x[n_frames * chunk_size :, :]
+
+
+def process_async(files: Iterable, func: Callable, **kwargs):
+    """Applies a function asynchronously to an array of audio files/filenames using a multiprocessing pool"""
+
+    p = mp.pool(mp.cpu_count())
+    results = []
+    for f in files:
+        results.append(p.apply_async(func, args=(f, kwargs)))
+    p.close()
+    p.join()
+    for r in results:
+        r.get()
+    return results
-- 
GitLab


From e3c5bb7fdfa409238898f38d7a7b25effeaf0694 Mon Sep 17 00:00:00 2001
From: Archit Tamarapu <archit.tamarapu@iis.fraunhofer.de>
Date: Wed, 26 Oct 2022 14:34:22 +0200
Subject: [PATCH 2/6] [pyaudio3dtools] fix PCM support for ISM input

---
 scripts/pyaudio3dtools/spatialaudioconvert.py | 2 +-
 scripts/pyaudio3dtools/spatialmetadata.py     | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/scripts/pyaudio3dtools/spatialaudioconvert.py b/scripts/pyaudio3dtools/spatialaudioconvert.py
index 3d71dc0e6f..29ac5f5d79 100644
--- a/scripts/pyaudio3dtools/spatialaudioconvert.py
+++ b/scripts/pyaudio3dtools/spatialaudioconvert.py
@@ -196,7 +196,7 @@ def spatial_audio_convert(
 
             # initialise metadata object for ISM
             metadata_obj = spatialmetadata.Metadata()
-            metadata_obj.init_for_ism(in_file, in_fs, in_meta_files)
+            metadata_obj.init_for_ism(in_file, in_fs, in_nchans, in_meta_files)
 
             # TODO alternative paths for binaural rendering for now
             if out_format.startswith("BINAURAL_ROOM"):
diff --git a/scripts/pyaudio3dtools/spatialmetadata.py b/scripts/pyaudio3dtools/spatialmetadata.py
index 829bd298bc..9fa42a2e06 100644
--- a/scripts/pyaudio3dtools/spatialmetadata.py
+++ b/scripts/pyaudio3dtools/spatialmetadata.py
@@ -211,13 +211,13 @@ class Metadata:
         for object_index in range(self.nb_objects):
             print(f"  Object #{object_index} Type: {self.objects[object_index]}")
 
-    def _append_audio_array(self, audio_wav=None, fs=48000, object_index=None):
+    def _append_audio_array(self, audio_wav=None, fs=48000, nchan=1, object_index=None):
         if audio_wav is None:
             audio_wav = self.audio_wav[-1]
         if object_index is None:
             object_index = -1
 
-        x, fs = audiofile.readfile(audio_wav, fs=fs)
+        x, fs = audiofile.readfile(audio_wav, fs=fs, nchannels=nchan)
         logger.debug(f"Append {audio_wav}: {x.shape[0]} by {x.shape[1]}")
 
         # Select appropriate channels & resample if necessary
@@ -245,6 +245,7 @@ class Metadata:
         self,
         in_file: str,
         in_fs: int,
+        in_nchan: int,
         metadata_files: list,
     ) -> None:
         self.audio_wav.append(in_file)
@@ -252,7 +253,7 @@ class Metadata:
         for csv in metadata_files:
             self.objects.append(read_ism_ivas_data(csv, object_index=self.nb_objects))
             self.objects[-1]["track_index"] = self.nb_objects
-            self._append_audio_array(self.audio_wav[-1], fs=in_fs)
+            self._append_audio_array(self.audio_wav[-1], fs=in_fs, nchan=in_nchan)
             self.nb_objects += 1
 
     # Get audio array with sampling rate
-- 
GitLab


From 81a87199b60d10b575f1818c1f8a49111f314e90 Mon Sep 17 00:00:00 2001
From: Archit Tamarapu <archit.tamarapu@iis.fraunhofer.de>
Date: Wed, 26 Oct 2022 14:47:49 +0200
Subject: [PATCH 3/6] [pyaudio3dtools] add a warning and automatically adjust
 when a very low sampling rate is specified

---
 scripts/pyaudio3dtools/audio3dtools.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/scripts/pyaudio3dtools/audio3dtools.py b/scripts/pyaudio3dtools/audio3dtools.py
index d459615eae..e9feccce80 100644
--- a/scripts/pyaudio3dtools/audio3dtools.py
+++ b/scripts/pyaudio3dtools/audio3dtools.py
@@ -33,6 +33,7 @@
 import argparse
 import logging
 import os
+import warnings
 
 from pyaudio3dtools import (
     audiofile,
@@ -63,6 +64,18 @@ def main(args):
     )
     logger.info("Audio3DTools")
 
+    if args.infs is not None and args.infs < 1000:
+        warnings.warn(
+            f"Input sampling rate specified as {args.infs} Hz! Assuming {args.infs*1000} Hz"
+        )
+        args.infs *= 1000
+
+    if args.outfs is not None and args.outfs < 1000:
+        warnings.warn(
+            f"Input sampling rate specified as {args.outfs} Hz! Assuming {args.outfs*1000} Hz"
+        )
+        args.outfs *= 1000
+
     if args.list is True or args.long is True:
         logger.info("===Supported spatial audio formats===")
         spatialaudioformat.Format.list_all(args.long)
-- 
GitLab


From e5ef00eeac582354b52f6e9d0b63c31a5e1c9d0f Mon Sep 17 00:00:00 2001
From: Archit Tamarapu <archit.tamarapu@iis.fraunhofer.de>
Date: Fri, 28 Oct 2022 10:59:17 +0200
Subject: [PATCH 4/6] fix for binaural rendering in generate_test_items.py

---
 scripts/pyprocessing/prepost_processing.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/pyprocessing/prepost_processing.py b/scripts/pyprocessing/prepost_processing.py
index dee95b77a6..a2c014b6a9 100644
--- a/scripts/pyprocessing/prepost_processing.py
+++ b/scripts/pyprocessing/prepost_processing.py
@@ -161,7 +161,8 @@ class PostProcessing(Processing):
             out_sig, fs = audiofile.readfile(output_path)
             bin_sig = binauralrenderer.binaural_rendering(
                 out_sig,
-                self.out_spfmt.name,
+                self.out_spfmt,
+                spatialaudioformat.Format("BINAURAL"),
                 fs=fs,
                 include_LFE=self.bin_rend_include_LFE,
                 LFE_gain=self.bin_rend_LFE_gain,
-- 
GitLab


From e33c9bf6735fa3a65d5bf0c72fde4a24edfb5b03 Mon Sep 17 00:00:00 2001
From: Archit Tamarapu <archit.tamarapu@iis.fraunhofer.de>
Date: Fri, 11 Nov 2022 16:34:46 +0100
Subject: [PATCH 5/6] [scripts] fix for ISM rendering when input file has extra
 samples

- fix a missing comma in cicp13.txt ls layout file
- correctly set executable bit on some python files
---
 scripts/ls_layouts/cicp13.txt                 |  6 +++---
 scripts/pyaudio3dtools/audio3dtools.py        |  0
 scripts/pyaudio3dtools/audioarray.py          | 10 ++++++++--
 scripts/pyaudio3dtools/audiofile.py           |  0
 scripts/pyaudio3dtools/spatialaudioconvert.py |  7 +++++++
 scripts/pyivastest/IvasModeRunner.py          |  0
 6 files changed, 18 insertions(+), 5 deletions(-)
 mode change 100644 => 100755 scripts/pyaudio3dtools/audio3dtools.py
 mode change 100755 => 100644 scripts/pyaudio3dtools/audiofile.py
 mode change 100755 => 100644 scripts/pyivastest/IvasModeRunner.py

diff --git a/scripts/ls_layouts/cicp13.txt b/scripts/ls_layouts/cicp13.txt
index 5ff15f86c1..d0510c1d8f 100644
--- a/scripts/ls_layouts/cicp13.txt
+++ b/scripts/ls_layouts/cicp13.txt
@@ -1,3 +1,3 @@
-0, 30 -30, 60, -60, 90, -90, 135, -135, 180, 0,  45, -45, 90, -90, 0,  135, -135, 180,   0,  45, -45
-0,  0,  0,  0,   0,  0,   0,   0,    0,   0, 35, 35,  35, 35,  35, 90,  35,   35,  35, -15, -15, -15
-3, 9
\ No newline at end of file
+0, 30, -30, 60, -60, 90, -90, 135, -135, 180, 0,  45, -45, 90, -90, 0,  135, -135, 180,   0,  45, -45
+0,  0,   0,  0,   0,  0,   0,   0,    0,   0, 35, 35,  35, 35,  35, 90,  35,   35,  35, -15, -15, -15
+3, 9
diff --git a/scripts/pyaudio3dtools/audio3dtools.py b/scripts/pyaudio3dtools/audio3dtools.py
old mode 100644
new mode 100755
diff --git a/scripts/pyaudio3dtools/audioarray.py b/scripts/pyaudio3dtools/audioarray.py
index 0a918e5f1d..1906e82033 100644
--- a/scripts/pyaudio3dtools/audioarray.py
+++ b/scripts/pyaudio3dtools/audioarray.py
@@ -410,7 +410,7 @@ def limiter(x: np.ndarray, fs: int):
         fr_sig[idx_min] = -32768
 
 
-def get_framewise(x: np.ndarray, chunk_size: int) -> np.ndarray:
+def get_framewise(x: np.ndarray, chunk_size: int, zero_pad=False) -> np.ndarray:
     """Generator to yield a signal frame by frame
         If array size is not a multiple of chunk_size, last frame contains the remainder
 
@@ -420,6 +420,8 @@ def get_framewise(x: np.ndarray, chunk_size: int) -> np.ndarray:
         Input reference array
     chunk_size: int
         Size of frames to yield
+    zero_pad: bool
+        Whether to zero pad the last chunk if there are not enough samples
 
     Yields
     -------
@@ -430,7 +432,11 @@ def get_framewise(x: np.ndarray, chunk_size: int) -> np.ndarray:
     for i in range(n_frames):
         yield x[i * chunk_size : (i + 1) * chunk_size, :]
     if x.shape[0] % chunk_size:
-        yield x[n_frames * chunk_size :, :]
+        last_chunk = x[n_frames * chunk_size :, :]
+        if zero_pad:
+            yield np.pad(last_chunk, [[0, x.shape[0] % chunk_size], [0, 0]])
+        else:
+            yield last_chunk
 
 
 def process_async(files: Iterable, func: Callable, **kwargs):
diff --git a/scripts/pyaudio3dtools/audiofile.py b/scripts/pyaudio3dtools/audiofile.py
old mode 100755
new mode 100644
diff --git a/scripts/pyaudio3dtools/spatialaudioconvert.py b/scripts/pyaudio3dtools/spatialaudioconvert.py
index 430d362ab2..1ed144943f 100644
--- a/scripts/pyaudio3dtools/spatialaudioconvert.py
+++ b/scripts/pyaudio3dtools/spatialaudioconvert.py
@@ -426,6 +426,13 @@ def convert_ism(
             audioarray.get_framewise(out_sig, frame_len),
         )
     ):
+        # update the crossfade if we have a smaller last frame
+        if out_frame.shape[0] != frame_len:
+            frame_size = out_frame.shape[0]
+            fade_in = np.arange(frame_size) / (frame_size - 1)
+            fade_in = fade_in[:, np.newaxis]
+            fade_out = 1.0 - fade_in
+
         pos = EFAP.wrap_angles(*pos_data[i_frame % pos_frames, :], clip_ele=True)
 
         # ISM -> MC
diff --git a/scripts/pyivastest/IvasModeRunner.py b/scripts/pyivastest/IvasModeRunner.py
old mode 100755
new mode 100644
-- 
GitLab


From 4759c7bb4aabbc11d952f2f5ec94c24863941463 Mon Sep 17 00:00:00 2001
From: Treffehn <anika.treffehn@iis.fraunhofer.de>
Date: Fri, 25 Nov 2022 16:19:38 +0100
Subject: [PATCH 6/6] fixed get framewise

---
 scripts/pyaudio3dtools/audioarray.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/pyaudio3dtools/audioarray.py b/scripts/pyaudio3dtools/audioarray.py
index 1906e82033..917cdf59c6 100644
--- a/scripts/pyaudio3dtools/audioarray.py
+++ b/scripts/pyaudio3dtools/audioarray.py
@@ -434,7 +434,7 @@ def get_framewise(x: np.ndarray, chunk_size: int, zero_pad=False) -> np.ndarray:
     if x.shape[0] % chunk_size:
         last_chunk = x[n_frames * chunk_size :, :]
         if zero_pad:
-            yield np.pad(last_chunk, [[0, x.shape[0] % chunk_size], [0, 0]])
+            yield np.pad(last_chunk, [[0, chunk_size - (x.shape[0] % chunk_size)], [0, 0]])
         else:
             yield last_chunk
 
-- 
GitLab