From 323e4b86207e0a22f6edc8665e9766b03596a208 Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Tue, 2 May 2023 15:21:36 +0200
Subject: [PATCH 01/27] Skeleton framework for item generation scripts

---
 item_generation_scripts/__init__.py           | 101 ++
 item_generation_scripts/__main__.py           |  50 +
 item_generation_scripts/audiotools/EFAP.py    | 922 ++++++++++++++++++
 .../audiotools/__init__.py                    | 286 ++++++
 .../audiotools/__main__.py                    |  36 +
 item_generation_scripts/audiotools/audio.py   | 428 ++++++++
 .../audiotools/audioarray.py                  | 690 +++++++++++++
 .../audiotools/audiofile.py                   | 433 ++++++++
 .../BRIR_IISofficialMPEG222UC_FULL.mat        |   3 +
 .../BRIR_IISofficialMPEG222UC_LS.mat          |   3 +
 .../HRIR_ORANGE53_Dolby_SBA1.mat              |   3 +
 .../HRIR_ORANGE53_Dolby_SBA2.mat              |   3 +
 .../HRIR_ORANGE53_Dolby_SBA3.mat              |   3 +
 .../binaural_datasets/HRIR_ORANGE53_FULL.mat  |   3 +
 .../binaural_datasets/HRIR_ORANGE53_LS.mat    |   3 +
 .../audiotools/binaural_datasets/README.txt   |  34 +
 .../audiotools/binaural_datasets/__init__.py  |  31 +
 .../binaural_datasets/binaural_dataset.py     | 288 ++++++
 .../audiotools/binauralobjectrenderer.py      | 652 +++++++++++++
 .../audiotools/constants.py                   | 704 +++++++++++++
 .../audiotools/convert/__init__.py            | 323 ++++++
 .../audiotools/convert/binaural.py            | 108 ++
 .../audiotools/convert/channelbased.py        | 390 ++++++++
 .../audiotools/convert/masa.py                | 165 ++++
 .../audiotools/convert/objectbased.py         | 352 +++++++
 .../audiotools/convert/scenebased.py          | 429 ++++++++
 .../audiotools/metadata.py                    | 571 +++++++++++
 .../audiotools/rotation.py                    | 379 +++++++
 item_generation_scripts/audiotools/utils.py   |  71 ++
 .../audiotools/wrappers/__init__.py           |  31 +
 .../audiotools/wrappers/bs1770.py             | 291 ++++++
 .../audiotools/wrappers/eid_xor.py            | 193 ++++
 .../audiotools/wrappers/esdru.py              | 130 +++
 .../audiotools/wrappers/filter.py             | 366 +++++++
 .../audiotools/wrappers/gen_patt.py           | 171 ++++
 .../audiotools/wrappers/masaRenderer.py       | 117 +++
 .../audiotools/wrappers/networkSimulator.py   | 224 +++++
 .../audiotools/wrappers/p50fbmnru.py          | 110 +++
 .../audiotools/wrappers/random_seed.py        |  92 ++
 item_generation_scripts/binary_paths.yml      |  30 +
 .../config/ISM1_CONFIG.yml                    | 338 +++++++
 .../config/ISM2_CONFIG.yml                    | 338 +++++++
 item_generation_scripts/constants.py          |  80 ++
 .../processing/__init__.py                    |  31 +
 item_generation_scripts/processing/config.py  | 130 +++
 .../processing/preprocessing_2.py             | 155 +++
 .../processing/process_ism_items.py           | 221 +++++
 .../processing/processing.py                  | 455 +++++++++
 item_generation_scripts/utils.py              | 297 ++++++
 49 files changed, 11264 insertions(+)
 create mode 100644 item_generation_scripts/__init__.py
 create mode 100644 item_generation_scripts/__main__.py
 create mode 100644 item_generation_scripts/audiotools/EFAP.py
 create mode 100644 item_generation_scripts/audiotools/__init__.py
 create mode 100644 item_generation_scripts/audiotools/__main__.py
 create mode 100644 item_generation_scripts/audiotools/audio.py
 create mode 100644 item_generation_scripts/audiotools/audioarray.py
 create mode 100644 item_generation_scripts/audiotools/audiofile.py
 create mode 100644 item_generation_scripts/audiotools/binaural_datasets/BRIR_IISofficialMPEG222UC_FULL.mat
 create mode 100644 item_generation_scripts/audiotools/binaural_datasets/BRIR_IISofficialMPEG222UC_LS.mat
 create mode 100644 item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_Dolby_SBA1.mat
 create mode 100644 item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_Dolby_SBA2.mat
 create mode 100644 item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_Dolby_SBA3.mat
 create mode 100644 item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_FULL.mat
 create mode 100644 item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_LS.mat
 create mode 100644 item_generation_scripts/audiotools/binaural_datasets/README.txt
 create mode 100644 item_generation_scripts/audiotools/binaural_datasets/__init__.py
 create mode 100644 item_generation_scripts/audiotools/binaural_datasets/binaural_dataset.py
 create mode 100644 item_generation_scripts/audiotools/binauralobjectrenderer.py
 create mode 100644 item_generation_scripts/audiotools/constants.py
 create mode 100644 item_generation_scripts/audiotools/convert/__init__.py
 create mode 100644 item_generation_scripts/audiotools/convert/binaural.py
 create mode 100644 item_generation_scripts/audiotools/convert/channelbased.py
 create mode 100644 item_generation_scripts/audiotools/convert/masa.py
 create mode 100644 item_generation_scripts/audiotools/convert/objectbased.py
 create mode 100644 item_generation_scripts/audiotools/convert/scenebased.py
 create mode 100644 item_generation_scripts/audiotools/metadata.py
 create mode 100644 item_generation_scripts/audiotools/rotation.py
 create mode 100644 item_generation_scripts/audiotools/utils.py
 create mode 100644 item_generation_scripts/audiotools/wrappers/__init__.py
 create mode 100644 item_generation_scripts/audiotools/wrappers/bs1770.py
 create mode 100644 item_generation_scripts/audiotools/wrappers/eid_xor.py
 create mode 100644 item_generation_scripts/audiotools/wrappers/esdru.py
 create mode 100644 item_generation_scripts/audiotools/wrappers/filter.py
 create mode 100644 item_generation_scripts/audiotools/wrappers/gen_patt.py
 create mode 100644 item_generation_scripts/audiotools/wrappers/masaRenderer.py
 create mode 100644 item_generation_scripts/audiotools/wrappers/networkSimulator.py
 create mode 100644 item_generation_scripts/audiotools/wrappers/p50fbmnru.py
 create mode 100644 item_generation_scripts/audiotools/wrappers/random_seed.py
 create mode 100644 item_generation_scripts/binary_paths.yml
 create mode 100644 item_generation_scripts/config/ISM1_CONFIG.yml
 create mode 100644 item_generation_scripts/config/ISM2_CONFIG.yml
 create mode 100644 item_generation_scripts/constants.py
 create mode 100644 item_generation_scripts/processing/__init__.py
 create mode 100644 item_generation_scripts/processing/config.py
 create mode 100644 item_generation_scripts/processing/preprocessing_2.py
 create mode 100644 item_generation_scripts/processing/process_ism_items.py
 create mode 100644 item_generation_scripts/processing/processing.py
 create mode 100644 item_generation_scripts/utils.py

diff --git a/item_generation_scripts/__init__.py b/item_generation_scripts/__init__.py
new file mode 100644
index 00000000..989d61a6
--- /dev/null
+++ b/item_generation_scripts/__init__.py
@@ -0,0 +1,101 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+import os
+import logging
+from itertools import repeat
+import yaml
+import pdb
+
+from item_generation_scripts.constants import (
+    LOGGER_DATEFMT,
+    LOGGER_FORMAT,
+    LOGGER_SUFFIX,
+)
+from item_generation_scripts.processing import config, process_ism_items
+from item_generation_scripts.processing import config
+from item_generation_scripts.utils import create_dir
+
+
+def logging_init(args, cfg):
+    """set up logging for a test file"""
+    logger = logging.getLogger("__main__")
+    logger.setLevel(logging.DEBUG)
+
+    # console handler
+    console_handler = logging.StreamHandler()
+    console_handler.setFormatter(logging.Formatter("%(message)s"))
+    console_handler.setLevel(logging.DEBUG if args.debug else logging.INFO)
+    logger.addHandler(console_handler)
+
+    # main log file
+    file_handler = logging.FileHandler(
+        cfg.output_path.joinpath(f"{cfg.format}{LOGGER_SUFFIX}"), mode="w"
+    )
+    file_handler.setFormatter(logging.Formatter(LOGGER_FORMAT, datefmt=LOGGER_DATEFMT))
+    file_handler.setLevel(logging.DEBUG if args.debug else logging.INFO)
+    logger.addHandler(file_handler)
+
+    logger.info(f"Processing test configuration file {args.config}")
+    logger.info(f"Input path: {cfg.input_path.absolute()}")
+    logger.info(f"Output path: {cfg.output_path.absolute()}")
+
+    return logger
+
+
+def main(args):
+
+    # parse configuration
+    cfg = config.TestConfig(args.config)
+
+    # create output directory, if not existing
+    if not os.path.exists(cfg.output_path):
+        create_dir(cfg.output_path)
+
+    # set up logging
+    logger = logging_init(args, cfg)
+
+    # generate input items
+    if cfg.format.startswith("ISM"):
+        # generate ISM items according to scene description
+        process_ism_items.generate_ism_items(
+            cfg.format,
+            cfg.loudness,
+            cfg.input_path,
+            cfg.output_path,
+            cfg.scenes,
+            logger
+        )
+
+    # copy configuration to output directory
+    with open(cfg.output_path.joinpath(f"{cfg.format}.yml"), "w") as f:
+        yaml.safe_dump(cfg._yaml_dump, f)
diff --git a/item_generation_scripts/__main__.py b/item_generation_scripts/__main__.py
new file mode 100644
index 00000000..b49109d3
--- /dev/null
+++ b/item_generation_scripts/__main__.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+import argparse
+
+from item_generation_scripts import main
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="IVAS item generation scripts for listening tests. Please refer to README.md for usage."
+    )
+    parser.add_argument(
+        "config",
+        help="YAML configuration file",
+    )
+    parser.add_argument(
+        "--debug", help="Set logging level to debug", action="store_true", default=False
+    )
+    args = parser.parse_args()
+
+    main(args)
diff --git a/item_generation_scripts/audiotools/EFAP.py b/item_generation_scripts/audiotools/EFAP.py
new file mode 100644
index 00000000..b83d57e6
--- /dev/null
+++ b/item_generation_scripts/audiotools/EFAP.py
@@ -0,0 +1,922 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+import argparse
+from enum import Enum
+from itertools import combinations
+from pathlib import Path
+from typing import Optional, Tuple, Union
+
+import numpy as np
+
+
+def wrap_angles(
+    azi: float,
+    ele: float,
+    clip_ele: Optional[bool] = False,
+) -> Tuple[float, float]:
+    """
+    Wrap angles to (-180, 180] azimuth and [-90, 90] elevation
+    Takes into account hemisphere flips from large elevation changes unless clip_ele is specified
+    """
+    if clip_ele:
+        ele = min(max(ele, -90), 90)
+
+    if ele % 90 == 0 and ele % 180 != 0:
+        # if elevation is a multiple of 90, azimuth is irrelevant since we are at a pole
+        azi = 0
+        while np.abs(ele) > 90:
+            ele -= 360
+    else:
+        # wrap elevation value
+        while np.abs(ele) > 90:
+            # flip azimuth to other hemisphere
+            azi += 180
+
+            # compensate elevation accordingly
+            if ele > 90:
+                ele = 180 - ele
+            elif ele < -90:
+                ele = -180 - ele
+
+        # wrap azimuth value
+        while azi > 180:
+            azi -= 360
+        while azi <= -180:
+            azi += 360
+
+    return azi, ele
+
+
+class EfapDmxType(Enum):
+    NONE = 0
+    AMPLITUDE = 1
+    INTENSITY = 2
+
+
+class EfapVertex:
+    """
+    Vertex data structure for EFAP
+
+    Initialises a vertex from the given spherical coordinate pair,
+    with a flag specifying if it is a ghost loudspeaker
+
+    Parameters
+    ----------
+    azi : float
+        Azimuth of vertex
+    ele : float
+        Elevation of vertex
+    is_ghost : bool
+        Whether the vertex is a ghost, default is False
+    dmx_type : EfapDmxType
+        Downmix type for ghost vertices
+    """
+
+    def __init__(
+        self,
+        azi: float,
+        ele: float,
+        is_ghost: Optional[bool] = False,
+        dmx_type: Optional[EfapDmxType] = EfapDmxType.INTENSITY,
+    ):
+        self.azi, self.ele = wrap_angles(azi, ele)
+        self.pos = np.array(
+            [
+                np.cos(np.deg2rad(azi)) * np.cos(np.deg2rad(ele)),
+                np.sin(np.deg2rad(azi)) * np.cos(np.deg2rad(ele)),
+                np.sin(np.deg2rad(ele)),
+            ]
+        )
+
+        idx_azi = np.round(np.abs(90 - np.abs(self.azi)))
+        idx_ele = 90 - np.round(np.abs(self.ele))
+        self.index = (
+            idx_azi + 181 * idx_ele
+        )  # vertices on the median plane have lowest index
+
+        self.is_ghost = is_ghost
+        self.dmx_type = dmx_type
+
+    def __str__(self):
+        str_ = f"a{self.azi}e{self.ele}"
+        if self.is_ghost:
+            str_ += "*"
+        return str_
+
+    def __lt__(self, other):
+        return self.index < other.index
+
+
+class EFAP:
+    """
+    EFAP data structure
+
+    Initialise EFAP data for computing panning gains
+
+    Parameters
+    ----------
+    azimuths : np.ndarray
+        Azimuth positions of the loudspeaker array
+    elevations : np.ndarray
+        Elevation postions of the loudspeaker array
+    intensity_panning : bool
+        Whether intensity panning is enabled or not
+
+    Examples
+    --------
+    >>> from EFAP import EFAP
+    >>> panner = EFAP([30, -30, 0, 110, -110], [0, 0, 0, 0, 0], False)
+    >>> panner.pan(15, 45)
+    array([0.66742381, 0.19069252, 0.66742381, 0.19069252, 0.19069252])
+    """
+
+    _EFAP_HULL_TOL = 1e-4  # tolerance for a point to be added to the convex hull
+    _EFAP_MAX_AZI_GAP = 160  # maximum allowed angular gap in the middle layer
+    _EFAP_POLAR_ELE = 90  # elevation of north / south poles (zenith / nadir)
+    _EFAP_THRESH_COPLANAR = 1e-3  # tolerance for points to be considered coplanar
+    _EFAP_THRESH_MID_LAYER = 45  # elevation threshold for loudspeakers to be considered as in the middle layer
+    _EFAP_THRESH_POLES = 1e-6  # tolerance for a vertex to be considered polar
+    _EFAP_THRESH_TRI = 1e-10  # tolerance for a point to be inside a triangle
+
+    def __init__(
+        self,
+        azimuths: Union[list, np.ndarray],
+        elevations: Union[list, np.ndarray],
+        intensity_panning: Optional[bool] = False,
+    ):
+        # validation
+        azimuths = np.array(azimuths)
+        elevations = np.array(elevations)
+        if np.squeeze(azimuths).ndim > 1:
+            raise ValueError("Too many dimensions for loudspeaker azimuth array")
+        if np.squeeze(elevations).ndim > 1:
+            raise ValueError("Too many dimensions for loudspeaker elevations array")
+        if azimuths.shape != elevations.shape:
+            raise ValueError("Mismatch between loudspeaker azimuths and elevations")
+
+        # set EFIP flag
+        self.intensity_panning = intensity_panning
+
+        # initialise vertices and add ghost loudspeakers if needed
+        self.verts = np.array(
+            [EfapVertex(azi, ele) for azi, ele in zip(azimuths, elevations)]
+        )
+        self._add_ghost_speakers()
+
+        # formulate initial tetrahedron for the convex hull
+        self._init_simplex()
+
+        # add the remaining vertices to the convex hull in order of their index
+        for i in np.argsort(self.verts):
+            if self.verts[i] not in self.verts[self.tris]:
+                self._add_vertex_to_hull(i)
+
+        # compute downmix matrix with remapped ghost speakers
+        self._remap_ghost_speakers()
+
+        # set vertices near poles to have NaN azimuth
+        for v in self.verts:
+            if (
+                v.ele > self._EFAP_POLAR_ELE - self._EFAP_THRESH_POLES
+                or v.ele < self._EFAP_THRESH_POLES - self._EFAP_POLAR_ELE
+            ):
+                v.azi = np.nan
+
+        # combine triangles into polygons
+        self._tri2poly()
+
+    def _add_ghost_speakers(self) -> None:
+        """
+        Add ghost loudspeakers at the poles, or to fill large horizontal gaps
+        """
+        ele = [v.ele for v in self.verts]
+
+        dmx_type = EfapDmxType.INTENSITY
+
+        # add ghost loudspeakers at the poles if necessary
+        if max(ele) < self._EFAP_POLAR_ELE:
+            if self.intensity_panning:
+                if max(ele) > self._EFAP_THRESH_MID_LAYER:
+                    dmx_type = EfapDmxType.NONE
+                else:
+                    dmx_type = EfapDmxType.AMPLITUDE
+
+            self.verts = np.append(self.verts, EfapVertex(0, 90, True, dmx_type))
+
+        if min(ele) > -self._EFAP_POLAR_ELE:
+            if self.intensity_panning:
+                if min(ele) < -self._EFAP_THRESH_MID_LAYER:
+                    dmx_type = EfapDmxType.NONE
+                else:
+                    dmx_type = EfapDmxType.AMPLITUDE
+
+            self.verts = np.append(self.verts, EfapVertex(0, -90, True, dmx_type))
+
+        # check for large gaps in the middle horizontal layer
+        mid_spkrs = [
+            v.azi for v in self.verts if np.abs(v.ele) < self._EFAP_THRESH_MID_LAYER
+        ]
+
+        # no speakers in middle layer; add a triangle of ghost speakers
+        if not mid_spkrs:
+            self.verts = np.append(
+                self.verts,
+                [
+                    EfapVertex(0, 0, True),
+                    EfapVertex(180, 0, True),
+                    EfapVertex(240, 0, True),
+                ],
+            )
+        # only one speaker in the threshold; add two ghost speakers to form a triangle
+        elif len(mid_spkrs) == 1:
+            self.verts = np.append(
+                self.verts,
+                [
+                    EfapVertex(mid_spkrs[0] + 120, 0, True),
+                    EfapVertex(mid_spkrs[0] + 240, 0, True),
+                ],
+            )
+        # search for and fill gaps greater than MAX_AZI_GAP
+        else:
+            mid_spkrs = np.sort(mid_spkrs)
+            angle_diff = np.diff(np.concatenate([mid_spkrs, [mid_spkrs[0] + 360]]))
+            sectors = np.ceil(angle_diff / self._EFAP_MAX_AZI_GAP)
+
+            for i, s in enumerate(sectors):
+                if s > 1:
+                    new_diff = angle_diff[i] / s
+                    num_new = s - 1
+                    for k in range(int(num_new)):
+                        new_azi = mid_spkrs[i] + (k + 1) * new_diff
+                        self.verts = np.append(self.verts, EfapVertex(new_azi, 0, True))
+
+    def _init_simplex(self) -> None:
+        """
+        Create an initial tetrahedron / simplex for the convex hull from 4 vertices
+        """
+        # take the first vertex as seed
+        t = [0]
+
+        # attempt to form an edge with non-zero length
+        for i, v in enumerate(self.verts):
+            if (
+                v.azi != self.verts[t[0]].azi or v.ele != self.verts[t[0]].ele
+            ) and i not in t:
+                t.append(i)
+                break
+        else:
+            raise ValueError("Vertices are conincident!")
+
+        # attempt to form a triangle with non-zero area
+        for i, v in enumerate(self.verts):
+            if (
+                np.linalg.norm(
+                    np.cross(
+                        self.verts[t[1]].pos - self.verts[t[0]].pos,
+                        v.pos - self.verts[t[0]].pos,
+                    ),
+                    2,
+                )
+                > self._EFAP_HULL_TOL
+                and i not in t
+            ):
+                t.append(i)
+                break
+        else:
+            raise ValueError("Vertices are colinear!")
+
+        # attempt to form a tetrahedron with non-zero volume
+        for i, v in enumerate(self.verts):
+            if (
+                np.abs(
+                    np.dot(
+                        np.cross(
+                            self.verts[t[1]].pos - self.verts[t[0]].pos,
+                            self.verts[t[2]].pos - self.verts[t[0]].pos,
+                        ),
+                        v.pos - self.verts[t[0]].pos,
+                    )
+                )
+            ) > self._EFAP_HULL_TOL and i not in t:
+                t.append(i)
+                break
+        else:
+            raise ValueError("Vertices are coplanar!")
+
+        # create a list of the triangles of the initial simplex / tetrahedron
+        t = np.array(t)
+        self.tris = np.array([t[[0, 1, 2]], t[[0, 1, 3]], t[[0, 2, 3]], t[[1, 2, 3]]])
+
+        # orient the triangle surface planes outwards from the centroid
+        self.centroid = np.mean([self.verts[i].pos for i in t], axis=0)
+        for i, tri in enumerate(self.tris):
+            self.tris[i, :] = self._flip_plane(tri)
+
+    def _add_vertex_to_hull(self, idx_new_vert: int) -> None:
+        """
+        Add a vertex to the convex hull and update the list of triangles in the hull
+        """
+        # compute the centroid of the current convex hull
+        self.centroid = np.mean(
+            [self.verts[i].pos for i in np.unique(self.tris)], axis=0
+        )
+
+        tris_new = []
+        visible = []
+
+        # find which hull surfaces are visible from the new vertex
+        for i, tri in enumerate(self.tris):
+            if self._vertex_dist(tri, idx_new_vert) > -1e-6:
+                visible.append(i)
+            else:
+                tris_new.append(tri)
+
+        tris_new = np.array(tris_new)
+        visible = np.array(visible, dtype=int)
+
+        # find edges of the visible hull surfaces
+        max_vert = np.amax(self.tris[visible]) + 1
+        counter = np.zeros([max_vert, max_vert])
+        for i, tri in enumerate(self.tris[visible]):
+            surface = np.append(tri, tri[0])
+            for n in range(3):
+                a = surface[n]
+                b = surface[n + 1]
+                counter[a, b] = counter[a, b] + 1
+
+        counter += counter.T
+
+        edges = []
+        for a in range(max_vert - 1):
+            for b in range(a + 1, max_vert):
+                if counter[a, b] == 1:
+                    edges.append([a, b])
+        edges = np.vstack(edges)
+
+        # break the edges visible from the new vertex and add the new triangle
+        for e in edges:
+            tris_new = np.vstack(
+                [tris_new, self._flip_plane(np.append(e, idx_new_vert))]
+            )
+
+        # update the list of triangles in the convex hull
+        self.tris = tris_new
+
+    def _remap_ghost_speakers(self) -> None:
+        """
+        Remove unused ghost speakers and compute a downmix matrix for the rest
+        """
+        # find ghosts that are not part of the convex hull
+        ghosts = [i for i, v in enumerate(self.verts) if v.is_ghost]
+        unused_ghosts = np.compress(
+            np.isin(ghosts, np.unique(self.tris), invert=True), ghosts
+        )
+
+        if unused_ghosts.size > 0:
+            # remove the unused ghosts from the triangle array and also adjust indices
+            self.tris[self.tris > unused_ghosts.min()] -= unused_ghosts.size
+            # delete them from the vertex array
+            self.verts = np.delete(self.verts, unused_ghosts)
+
+        # generate initial sound energy distribution matrix
+        n_vtx = len(self.verts)
+        n_ghost = len(ghosts) - len(unused_ghosts)
+
+        M = np.eye(n_vtx)
+        for i, v in enumerate(self.verts):
+            if v.is_ghost:
+                neighbours = self._get_neighbours(i)
+                M[:, i] = np.zeros(n_vtx)
+                M[neighbours, i] = np.ones(len(neighbours)) / len(neighbours)
+
+        # re-distribute sound energy from ghosts
+        M2 = M.copy()
+        for i, v in enumerate(self.verts):
+            if v.is_ghost:
+                vec = M[:, i]
+                while np.sum(vec[-n_ghost:]) > 1e-4:
+                    vec = M @ vec
+                M2[:, i] = vec
+
+        self.dmx_mat = M2[:-n_ghost, :]
+
+        # amplitude downmix for real loudspeakers
+        self.dmx_mat[:, :-n_ghost] = np.sqrt(self.dmx_mat[:, :-n_ghost])
+
+        # distribute ghosts according to downmix type
+        for i, v in enumerate(self.verts):
+            if v.is_ghost:
+                if v.dmx_type == EfapDmxType.NONE:
+                    self.dmx_mat[:, i] = 0
+                elif v.dmx_type == EfapDmxType.AMPLITUDE:
+                    pass
+                else:
+                    self.dmx_mat[:, i] = np.sqrt(self.dmx_mat[:, i])
+
+    def _tri2poly(self) -> None:
+        """
+        Merge hull triangles into polygons if they are coplanar
+        """
+        polys = []
+
+        for tri in self.tris:
+            # find all vertices coplanar with this triangle (including those already in the triangle)
+            new_poly = np.array(
+                [
+                    i
+                    for i, _ in enumerate(self.verts)
+                    if np.abs(self._vertex_dist(tri, i)) < self._EFAP_THRESH_COPLANAR
+                ]
+            )
+
+            # check if we already found this polygon as a complete subset
+            is_subset = [
+                i for i, poly in enumerate(polys) if np.all(np.isin(new_poly, poly))
+            ]
+            is_superset = [
+                i for i, poly in enumerate(polys) if np.all(np.isin(poly, new_poly))
+            ]
+
+            if is_subset:
+                continue
+            elif is_superset:
+                # remove the other polygon since it will be replaced by the superset polygon
+                polys_new = [p for i, p in enumerate(polys) if i not in is_superset]
+                polys = polys_new
+
+            # orient the polygon plane in the same direction as the triangle
+            P1 = self.verts[tri[0]].pos
+            P2 = self.verts[tri[1]].pos
+            P3 = self.verts[tri[2]].pos
+
+            # first base vector
+            U = P2 - P1
+            U = U / np.linalg.norm(U)
+
+            # second base vector
+            V = P3 - P2
+            V = V - np.dot(U, V) * U
+            V = V / np.linalg.norm(V)
+
+            # center of the first triangle
+            M = np.mean([P1, P2, P3], axis=0)
+
+            # sort vertices
+            azi = np.zeros_like(new_poly, dtype=float)
+            for i, idx_v in enumerate(new_poly):
+                P = self.verts[idx_v].pos - M
+                X = np.dot(P, U)
+                Y = np.dot(P, V)
+                azi[i] = np.arctan2(Y, X)
+
+            idx = np.argsort(azi)
+            new_poly = new_poly[idx]
+
+            # add the polygon to the main list
+            polys.append(new_poly)
+
+        self.polys = polys
+
+    def _pan_EFAP_poly(
+        self, azimuth: float, elevation: float, poly: np.ndarray, mod: int
+    ) -> np.ndarray:
+        """
+        Compute panning gains for each vertex in the given polygon
+
+        Parameters
+        ----------
+        azimuth : float
+            Azimuth of requested panning position
+        elevation : float
+            Elevation of requested panning position
+        poly : np.ndarray
+            Array of vertices defining the polygon
+
+        Returns
+        -------
+        poly_gain: np.ndarray
+            Gains for each vertex in the polygon
+        """
+        poly_gain = np.zeros_like(poly, dtype=float)
+
+        P = np.array([azimuth, elevation])
+        # search for the triangle of the polygon in which P belongs
+        for i in range(1, poly.size + 1):
+            A = np.array([self.verts[poly[i - 1]].azi, self.verts[poly[i - 1]].ele])
+            for j in range(i, poly.size - 2 + i):
+                idx1 = 1 + (j % poly.size)
+                idx2 = 1 + (idx1 % poly.size)
+                B = np.array(
+                    [self.verts[poly[idx1 - 1]].azi, self.verts[poly[idx1 - 1]].ele]
+                )
+                C = np.array(
+                    [self.verts[poly[idx2 - 1]].azi, self.verts[poly[idx2 - 1]].ele]
+                )
+
+                if mod:
+                    if not np.isnan(A[0]):
+                        A[0] %= mod
+                    if not np.isnan(B[0]):
+                        B[0] %= mod
+                    if not np.isnan(C[0]):
+                        C[0] %= mod
+
+                if self._in_triangle(P, A, B, C):
+                    N = np.transpose([B[1] - C[1], C[0] - B[0]])
+                    N = N / np.dot(N, B - A)
+                    poly_gain[i - 1] = 1 - np.dot(P - A, N)
+
+        """ DEBUGGING / TODO """
+        # set gains <= -60dB to 0
+        poly_gain[np.abs(poly_gain) < 1e-6] = 0
+
+        return poly_gain
+
+    """ geometric / math helper functions """
+
+    def _get_neighbours(self, idx_vert: int) -> np.ndarray:
+        """
+        Find triangles containing the given vertex index (neighbouring vertices)
+        """
+        n = self.tris[np.any(np.isin(self.tris, idx_vert), axis=1)]
+        return np.unique(n[n != idx_vert])
+
+    def _get_azi_ele(self, idx_vert: int) -> Tuple[float, float]:
+        """
+        Return a tuple of (azi, ele) for a vertex at the given index
+        """
+        return self.verts[idx_vert].azi, self.verts[idx_vert].ele
+
+    def _in_polygon(
+        self, azimuth: float, elevation: float, poly: np.ndarray
+    ) -> Tuple[bool, int]:
+        """
+        Determine whether the panning position lies within the given polygon
+        by iteratively checking its triangles
+
+        Parameters
+        ----------
+        azimuth : float
+            Azimuth of requested panning position
+        elevation : float
+            Elevation of requested panning position
+        poly : np.ndarray
+            Array of vertices defining the polygon
+
+        Returns
+        -------
+        in_polygon, mod: Tuple[bool, int]
+            Flag indicating whether the point is inside the given polygon
+            Value of wrapping required if used
+        """
+        azi = [self.verts[v].azi for v in poly]
+
+        P = np.array([azimuth, elevation])
+
+        for tri in combinations(poly, 3):
+            A = np.array(self._get_azi_ele(tri[0]))
+            B = np.array(self._get_azi_ele(tri[1]))
+            C = np.array(self._get_azi_ele(tri[2]))
+            if self._in_triangle(P, A, B, C):
+                return True, None
+
+        # if the azimuth difference is large, perform the 2D check again with azimuths wrapped to (-360, 0] and [0, 360)
+        if np.nanmax(azi) - np.nanmin(azi) > 180:
+            for tri in combinations(poly, 3):
+                A = np.array(self._get_azi_ele(tri[0]))
+                B = np.array(self._get_azi_ele(tri[1]))
+                C = np.array(self._get_azi_ele(tri[2]))
+                if not np.isnan(A[0]):
+                    A[0] %= 360
+                if not np.isnan(B[0]):
+                    B[0] %= 360
+                if not np.isnan(C[0]):
+                    C[0] %= 360
+                if self._in_triangle(P, A, B, C):
+                    return True, 360
+
+            for tri in combinations(poly, 3):
+                A = np.array(self._get_azi_ele(tri[0]))
+                B = np.array(self._get_azi_ele(tri[1]))
+                C = np.array(self._get_azi_ele(tri[2]))
+                if not np.isnan(A[0]):
+                    A[0] %= -360
+                if not np.isnan(B[0]):
+                    B[0] %= -360
+                if not np.isnan(C[0]):
+                    C[0] %= -360
+                if self._in_triangle(P, A, B, C):
+                    return True, -360
+
+        return False, None
+
+    def _in_triangle(
+        self, P: np.ndarray, A: np.ndarray, B: np.ndarray, C: np.ndarray
+    ) -> bool:
+        """
+        Determine whether the panning position lies within the given triangle
+
+        Parameters
+        ----------
+        P : float
+            Point under test
+        A : float
+            First vertex of the triangle
+        B : float
+            Second vertex of the triangle
+        C : float
+            Third vertex of the triangle
+
+        Returns
+        -------
+        bool
+            Flag indicating whether the point is inside the given triangle
+        """
+        if np.isnan(A[0]):
+            A[0] = P[0]
+
+        if np.isnan(B[0]):
+            B[0] = P[0]
+
+        if np.isnan(C[0]):
+            C[0] = P[0]
+
+        tmpMat = np.transpose([B - A, C - A])
+        if (1 / np.linalg.cond(tmpMat)) < self._EFAP_THRESH_TRI:
+            return False
+
+        Minv = np.linalg.inv(tmpMat)
+        S = Minv @ (P - A)
+
+        if (
+            S[0] < -self._EFAP_THRESH_TRI
+            or S[1] < -self._EFAP_THRESH_TRI
+            or S[0] + S[1] > 1 + self._EFAP_THRESH_TRI
+        ):
+            return False
+
+        return True
+
+    def _vertex_dist(self, surface: np.ndarray, idx_vert: int) -> float:
+        """
+        Compute the distance of a vertex from a given plane
+
+        Parameters
+        ----------
+        surface : np.ndarray
+            Array of 3 ordered vertices defining the plane and its orientation
+        idx_vert: int
+            Index of the vertex to compute the distance for
+
+        Returns
+        -------
+        float
+            Distance of the vertex from the given plane
+        """
+        return self._point_plane_dist(
+            self.verts[surface[0]].pos,
+            self.verts[surface[1]].pos,
+            self.verts[surface[2]].pos,
+            self.verts[idx_vert].pos,
+        )
+
+    def _point_plane_dist(
+        self, P1: np.ndarray, P2: np.ndarray, P3: np.ndarray, X: np.ndarray
+    ) -> float:
+        """
+        Compute the distance of a vertex from a plane defined by three points
+
+        Parameters
+        ----------
+        P1 : np.ndarray
+            Cartesian coordinates of the first point
+        P2 : np.ndarray
+            Cartesian coordinates of the second point
+        P3 : np.ndarray
+            Cartesian coordinates of the third point
+        X: np.ndarray
+            Cartesian coordinates of the vertex
+
+        Returns
+        -------
+        float
+            Distance of the vertex from the given plane
+        """
+
+        if np.all(X == P1) or np.all(X == P2) or np.all(X == P3):
+            return 0
+        else:
+            N = np.cross(P1 - P2, P1 - P3)
+            eps = np.finfo(float).eps
+            return np.dot(X - P1, N / (np.linalg.norm(N) + eps))
+
+    def _flip_plane(self, surface: np.ndarray) -> np.ndarray:
+        """
+        Flip the orientation of a plane (invert normal vector)
+
+        Parameters
+        ----------
+        surface : np.ndarray
+            Array of 3 ordered vertices defining the plane and its orientation
+
+        Returns
+        -------
+        surface : np.ndarray
+            Reordered vertices with plane normal pointing outwards from the hull centroid
+        """
+        if (
+            self._point_plane_dist(
+                self.verts[surface[0]].pos,
+                self.verts[surface[1]].pos,
+                self.verts[surface[2]].pos,
+                self.centroid,
+            )
+            > 0
+        ):
+            surface = np.flip(surface.copy())
+
+        return surface
+
+    def _compute_gains_point(self, azimuth: float, elevation: float) -> np.ndarray:
+        """
+        Compute gains for the requested panning position
+
+        Parameters
+        ----------
+        azimuth : float
+            Azimuth of requested panning position
+        elevation : float
+            Elevation of requested panning position
+
+        Returns
+        -------
+        gains: np.ndarray
+            Panning gains for the loudspeaker layout
+        """
+        if np.isnan(azimuth) or np.isnan(elevation):
+            raise ValueError(f"Angles cannot be NaNs : ({azimuth}, {elevation})")
+
+        azimuth, elevation = wrap_angles(azimuth, elevation)
+        point_pos = [
+            np.cos(np.deg2rad(azimuth)) * np.cos(np.deg2rad(elevation)),
+            np.sin(np.deg2rad(azimuth)) * np.cos(np.deg2rad(elevation)),
+            np.sin(np.deg2rad(elevation)),
+        ]
+
+        # filter the polygon list with a quick 2d check
+        found_polys = []
+        for poly in self.polys:
+            in_poly, mod = self._in_polygon(azimuth, elevation, poly)
+            if in_poly:
+                found_polys.append((poly, mod))
+
+        if not found_polys:
+            raise AssertionError("Unexpected error during panning")
+
+        # find a visible polygon with the smallest distance
+        dist = []
+
+        for poly, mod in found_polys:
+            surface = self.verts[poly]
+            d = self._point_plane_dist(
+                surface[0].pos,
+                surface[1].pos,
+                surface[2].pos,
+                point_pos,
+            )
+            if d >= 0:
+                dist.append(d)
+            else:
+                dist.append(np.inf)
+
+        found_poly, mod = found_polys[np.argmin(dist)]
+
+        # compute gains for the polygon vertices
+        poly_gain = self._pan_EFAP_poly(azimuth, elevation, found_poly, mod)
+
+        # downmix ghost loudspeakers
+        gains = np.zeros(self.verts.size)
+        gains[found_poly] = poly_gain / np.linalg.norm(poly_gain)
+        gains = gains @ self.dmx_mat.T
+        gains = gains / np.linalg.norm(gains)
+
+        if self.intensity_panning:
+            gains = np.sqrt(gains / np.sum(gains))
+
+        return gains
+
+    """ public functions """
+
+    def pan(
+        self,
+        azimuths: float,
+        elevations: float,
+        intensity_panning: Optional[bool] = False,
+    ) -> np.ndarray:
+        """
+        Compute gains for the requested panning position
+
+        Parameters
+        ----------
+        azimuths : float
+            Azimuth of requested panning position
+        elevations : float
+            Elevation of requested panning position
+        intensity_panning : bool
+            Flag whether to use intensity panning (Default is False == amplitude panning)
+
+        Returns
+        -------
+        gains: np.ndarray
+            Panning gains for the loudspeaker layout
+        """
+        azimuths = np.array(azimuths)
+        elevations = np.array(elevations)
+        if azimuths.size == 1 and elevations.size == 1:
+            return self._compute_gains_point(azimuths, elevations)
+        elif np.squeeze(azimuths).ndim == 1 and np.squeeze(elevations).ndim == 1:
+            gains = []
+            for a, e in zip(azimuths, elevations):
+                gains.append(self._compute_gains_point(a, e))
+            return np.vstack(gains)
+        else:
+            raise ValueError(
+                "Azimuth and Elevation arrays cannot have more than one dimension and must be of equal size"
+            )
+
+
+def main(args):
+    """
+    Parses a speaker layout text file and prints the panning gains
+    for the requested position
+
+    Parameters
+    ----------
+    args : Namespace
+        Command line arguments
+    """
+
+    speaker_positions = np.loadtxt(Path(args.input), delimiter=",", max_rows=2)
+    panner = EFAP(speaker_positions[0, :], speaker_positions[1, :], args.efip)
+    print(panner.pan(args.azimuth, args.elevation))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Edge-Fading Amplitude Panning")
+    parser.add_argument(
+        "-i",
+        "--input",
+        metavar="layout_file",
+        required=True,
+        type=str,
+        help="IVAS compatible loudspeaker layout file (Loudspeaker azimuths in first line, elevations in second, subsequent lines are ignored)",
+    )
+    parser.add_argument(
+        "-efip",
+        "-intensity_panning",
+        default=False,
+        action="store_true",
+        help="Intensity panning mode (EFIP)",
+    )
+    parser.add_argument(
+        "azimuth",
+        type=float,
+        help="Azimuth of direction to compute panning gains for (positive-left)",
+    )
+    parser.add_argument(
+        "elevation",
+        type=float,
+        help="Elevation of direction to compute panning gains for (positive-up)",
+    )
+    args = parser.parse_args()
+    main(args)
diff --git a/item_generation_scripts/audiotools/__init__.py b/item_generation_scripts/audiotools/__init__.py
new file mode 100644
index 00000000..effc5a25
--- /dev/null
+++ b/item_generation_scripts/audiotools/__init__.py
@@ -0,0 +1,286 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+import argparse
+from itertools import repeat
+from pathlib import Path
+
+from item_generation_scripts.audiotools.constants import AUDIO_FORMATS
+from item_generation_scripts.audiotools.convert import convert_file
+from item_generation_scripts.utils import apply_func_parallel
+
+
+def add_processing_args(group, input=True):
+    # set up prefixes to avoid argument collision
+    if input:
+        p = "in"
+        ps = "i"
+    else:
+        p = "out"
+        ps = "o"
+
+    group.add_argument(
+        f"-{ps}",
+        f"--{p}",
+        dest=f"{p}put",
+        required=True,
+        type=Path,
+        help="Path to *.{wav, pcm, raw} file or directory",
+    )
+    group.add_argument(
+        f"-{ps}f",
+        f"--{p}_fmt",
+        required=input,
+        type=str,
+        help="Audio format (use -l, --list for a list / -L, --long for a detailed list)",
+        default=None,
+    )
+    group.add_argument(
+        f"-{ps}s",
+        f"--{p}_fs",
+        type=int,
+        help="Sampling rate (Hz) (deduced for .wav input, same as input if output not specified, default = %(default)s)",
+        default=48000,
+    )
+    group.add_argument(
+        f"-{ps}fc",
+        f"--{p}_cutoff",
+        type=int,
+        help="Cut-off frequency for low-pass filtering (default = %(default)s)",
+        default=None,
+    )
+    group.add_argument(
+        f"-{ps}hp",
+        f"--{p}_hp50",
+        help="Apply 50 Hz high-pass filtering (default = %(default)s)",
+        action="store_true",
+    )
+    group.add_argument(
+        f"-{ps}w",
+        f"--{p}_window",
+        type=float,
+        help="Window the start/end of the signal by this amount in milliseconds (default = %(default)s)",
+        default=None,
+    )
+    group.add_argument(
+        f"-{ps}t",
+        f"--{p}_trim",
+        type=float,
+        nargs=2,
+        metavar=("PRE_TRIM", "POST_TRIM"),
+        help="Pre-/post-trim the signal by this amount in milliseconds (negative values pad silence), (default = %(default)s)",
+    )
+    group.add_argument(
+        f"-{ps}pn",
+        f"--{p}_pad_noise",
+        help="Flag for padding with noise instead of zeros",
+        action="store_true",
+    )
+    group.add_argument(
+        f"-{ps}d",
+        f"--{p}_delay",
+        type=float,
+        help="Delay the signal by this amount in milliseconds (negative values advance, default = %(default)s)",
+        default=None,
+    )
+    group.add_argument(
+        f"-{ps}l",
+        f"--{p}_loudness",
+        type=float,
+        help="Normalize to given loudness with BS 1770-4 (default = %(default)s)",
+        default=None,
+    )
+    group.add_argument(
+        f"-{ps}nf",
+        f"--{p}_loudness_fmt",
+        type=str,
+        help=f"Format used for loudness computation (only valid with with -{ps}l/--{p}_loudness, default = {p.upper()}_FMT)",
+        default=None,
+    )
+
+
+def get_args():
+    parser = argparse.ArgumentParser(
+        description="Audiotools: Convert/Manipulate spatial audio files."
+    )
+
+    """ Input file arguments """
+    input_parser = parser.add_argument_group("Input (pre-) processing options")
+
+    # add common arguments
+    add_processing_args(input_parser)
+
+    # input only arguments
+    input_parser.add_argument(
+        "-im",
+        "--in_meta",
+        type=str,
+        nargs="+",
+        help="list of input metadata files (only relevant for ISM and MASA input)",
+        default=None,
+    )
+
+    """ Output file arguments """
+    output_parser = parser.add_argument_group("Output (post-) processing options")
+
+    # add common arguments
+    add_processing_args(output_parser, False)
+
+    # output only arguments
+    output_parser.add_argument(
+        "-lm",
+        "--limit",
+        help="Apply limiting to output (default = %(default)s)",
+        action="store_true",
+    )
+    output_parser.add_argument(
+        "-t",
+        "--trajectory",
+        type=str,
+        help="Head-tracking trajectory file for binaural output (default = %(default)s)",
+        default=None,
+    )
+    output_parser.add_argument(
+        "-bd",
+        "--bin_dataset",
+        type=str,
+        help="Use a custom binaural dataset (see README.md and audiotools/binaural_datasets/README.txt for further information)",
+        default=None,
+    )
+    output_parser.add_argument(
+        "-bl",
+        "--bin_lfe_gain",
+        type=float,
+        help="Render LFE to binaural output with the specified gain (only valid for channel-based input, default = %(default)s)",
+        default=None,
+    )
+    output_parser.add_argument(
+        "-mnru",
+        "--mnru_q",
+        type=float,
+        help="Flag for MNRU processing",
+        default=None,
+    )
+    output_parser.add_argument(
+        "-esdru",
+        "--esdru_alpha",
+        type=float,
+        help="Flag for ESDRU processing",
+        default=None,
+    )
+
+    misc_parser = parser.add_argument_group("General options")
+
+    """ Miscellaneous or meta arguments """
+    misc_parser.add_argument(
+        "-l",
+        "--list",
+        help="list all supported audio formats and exit",
+        action="store_true",
+    )
+    misc_parser.add_argument(
+        "-L",
+        "--long",
+        help="list all supported audio formats with long description and exit",
+        action="store_true",
+    )
+    misc_parser.add_argument(
+        "-mp",
+        "--multiprocessing",
+        help="Enable multiprocessing (default = %(default)s)",
+        action="store_true",
+    )
+
+    return parser.parse_args()
+
+
+def main():
+    args = get_args()
+
+    if args.list is True or args.long is True:
+        for fmt in AUDIO_FORMATS:
+            if args.long:
+                for f, d in fmt.items():
+                    print(f)
+                    [print(f"\t{k}: {v}", end=None) for k, v in d.items()]
+            else:
+                print(", ".join(fmt.keys()))
+        exit()
+
+    elif args.input is not None:
+        if not args.out_fs:
+            args.out_fs = args.in_fs
+
+        if not args.out_fmt:
+            args.out_fmt = args.in_fmt
+
+        if not args.out_loudness_fmt:
+            args.out_loudness_fmt = args.out_fmt
+
+        # List input files
+        args.input = Path(args.input)
+        in_files = []
+        if args.input.exists():
+            if args.input.is_dir():
+                in_files.extend(args.input.glob("*.wav"))
+                in_files.extend(args.input.glob("*.pcm"))
+                in_files.extend(args.input.glob("*.raw"))
+            else:
+                in_files = [args.input]
+        else:
+            raise ValueError(f"Input path {args.input} does not exist!")
+
+        if len(in_files) == 0:
+            raise ValueError(f"Input directory {args.input} empty!")
+
+        # Create output directory
+        args.output = Path(args.output)
+
+        if len(in_files) == 1 and args.input.is_file():
+            out_files = [args.output]
+        else:
+            args.output.mkdir(exist_ok=True)
+            out_files = [args.output.joinpath(i.name) for i in in_files]
+
+        # Multiprocessing
+        enable_multiprocessing = args.multiprocessing
+
+        # Remove unneeded keys to avoid passing to convert_file()
+        for k in ["list", "long", "multiprocessing", "input", "output"]:
+            args.__dict__.pop(k)
+
+        apply_func_parallel(
+            convert_file,
+            zip(in_files, out_files),
+            repeat(args.__dict__),
+            "mp" if enable_multiprocessing else None,
+        )
diff --git a/item_generation_scripts/audiotools/__main__.py b/item_generation_scripts/audiotools/__main__.py
new file mode 100644
index 00000000..9bdf64cd
--- /dev/null
+++ b/item_generation_scripts/audiotools/__main__.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+from item_generation_scripts.audiotools import main
+
+if __name__ == "__main__":
+    main()
diff --git a/item_generation_scripts/audiotools/audio.py b/item_generation_scripts/audiotools/audio.py
new file mode 100644
index 00000000..1804f5dd
--- /dev/null
+++ b/item_generation_scripts/audiotools/audio.py
@@ -0,0 +1,428 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+import warnings
+from abc import ABC, abstractmethod
+from pathlib import Path
+from typing import Optional, Union
+
+import numpy as np
+
+from item_generation_scripts.audiotools.audiofile import read
+from item_generation_scripts.audiotools.constants import (
+    BINAURAL_AUDIO_FORMATS,
+    CHANNEL_BASED_AUDIO_ALTNAMES,
+    CHANNEL_BASED_AUDIO_FORMATS,
+    IVAS_FRAME_LEN_MS,
+    METADATA_ASSISTED_SPATIAL_AUDIO_FORMATS,
+    OBJECT_BASED_AUDIO_FORMATS,
+    SCENE_BASED_AUDIO_FORMATS,
+)
+
+from .EFAP import wrap_angles
+
+
+class Audio(ABC):
+    """Base class for audio data"""
+
+    def __init__(self, name: str):
+        self.name = name.upper()
+        self.audio = None
+        self.fs = None
+        self.num_channels = None
+        # self.logger = None # TODO needed?
+
+    def __repr__(self):
+        return f"{self.__class__} : {self.__dict__}"
+
+    @classmethod
+    @abstractmethod
+    def _from_file(cls, name: str, filename: Path, fs: Optional[int] = None) -> "Audio":
+        """Create an Audio object from a file"""
+        out_audio = cls(name)
+
+        filename = Path(filename)
+        if filename.suffix in [".pcm", ".raw"]:
+            if fs is None:
+                raise ValueError(
+                    "Sampling rate must be specified for headerless files!"
+                )
+            out_audio.audio, out_audio.fs = read(filename, out_audio.num_channels, fs)
+        elif filename.suffix == ".wav":
+            out_audio.audio, out_audio.fs = read(filename)
+        else:
+            raise NotImplementedError(f"Filetype {filename.suffix} is unsupported!")
+
+        return out_audio
+
+    @classmethod
+    @abstractmethod
+    def _from_filelist(
+        cls, name, files: list[Path], fs: Optional[int] = None
+    ) -> "Audio":
+        """Create an Audio object from a list of files with channels"""
+        out_audio = cls(name)
+
+        for f in files:
+            f = Path(f)
+
+            if f.suffix in [".pcm", ".raw"]:
+                if fs is None:
+                    raise ValueError(
+                        "Sampling rate must be specified for headerless files!"
+                    )
+                channel, fs = read(f, out_audio.num_channels, fs)
+            elif f.suffix == ".wav":
+                channel, fs = read(f)
+            else:
+                raise NotImplementedError(f"Filetype {f.suffix} is unsupported!")
+
+            if out_audio.audio is None:
+                out_audio.audio = channel
+                out_audio.fs = fs
+            else:
+                if fs != out_audio.fs:
+                    raise ValueError(
+                        f"Sampling rate mismatch between input audio files, expected {out_audio.fs}, encountered {fs} for {f}!"
+                    )
+
+                if channel.shape[0] > out_audio.audio.shape[0]:
+                    channel = channel[: out_audio.audio.shape[0], :]
+                elif channel.shape[0] < out_audio.audio.shape[0]:
+                    out_audio.audio = out_audio.audio[: channel.shape[0], :]
+                out_audio.audio = np.column_stack([out_audio.audio, channel])
+
+        return out_audio
+
+    def apply(self, func, **kwargs) -> None:
+        """Apply a function to the audio array"""
+        self.audio = func(self.audio, self.fs, **kwargs)
+
+
+class BinauralAudio(Audio):
+    """Sub-class for binaural audio"""
+
+    def __init__(self, name: str):
+        super().__init__(name)
+        try:
+            self.__dict__.update(BINAURAL_AUDIO_FORMATS[name.upper()])
+        except KeyError:
+            raise ValueError(f"Unsupported binaural audio format {name}")
+
+    @classmethod
+    def _from_file(
+        cls, name: str, filename: Path, fs: Optional[int] = None
+    ) -> "BinauralAudio":
+        return super()._from_file(name, filename, fs)
+
+    @classmethod
+    def _from_filelist(
+        cls, name: str, filename: Path, fs: Optional[int] = None
+    ) -> "BinauralAudio":
+        return super()._from_filelist(name, filename, fs)
+
+
+class ChannelBasedAudio(Audio):
+    """Sub-class for channel-based audio"""
+
+    def __init__(self, name: str):
+        if Path(name).exists() and Path(name).suffix == ".txt":
+            self.parse_custom_layout(name)
+        else:
+            # remap configuration name to internal naming
+            if name.upper() in CHANNEL_BASED_AUDIO_ALTNAMES.keys():
+                name = CHANNEL_BASED_AUDIO_ALTNAMES[name.upper()]
+
+            super().__init__(name)
+            try:
+                self.__dict__.update(CHANNEL_BASED_AUDIO_FORMATS[name.upper()])
+            except KeyError:
+                raise ValueError(f"Unsupported channel-based audio format {name}")
+
+        self.is_planar = np.all([e == 0 for e in self.ls_ele])
+
+    def parse_custom_layout(self, layout_file: Union[Path, str]):
+        layout_file = Path(layout_file)
+        with open(layout_file) as f_ls:
+            self.ls_azi = [float(x.strip()) for x in f_ls.readline().strip().split(",")]
+            self.ls_ele = [float(x.strip()) for x in f_ls.readline().strip().split(",")]
+            try:
+                self.lfe_index = [
+                    int(x.strip()) for x in f_ls.readline().strip().split(",")
+                ]
+            except Exception:
+                self.lfe_index = []
+
+            if self.lfe_index:
+                [self.ls_azi.insert(i, 0.0) for i in self.lfe_index]
+                [self.ls_ele.insert(i, 0.0) for i in self.lfe_index]
+
+        self.name = layout_file.stem
+        self.num_channels = len(self.ls_azi)
+        self.layout_file = layout_file
+
+    @classmethod
+    def _from_file(
+        cls, name: str, filename: Path, fs: Optional[int] = None
+    ) -> "ChannelBasedAudio":
+        return super()._from_file(name, filename, fs)
+
+    @classmethod
+    def _from_filelist(
+        cls, name: str, filename: Path, fs: Optional[int] = None
+    ) -> "ChannelBasedAudio":
+        return super()._from_filelist(name, filename, fs)
+
+
+class MetadataAssistedSpatialAudio(Audio):
+    """Sub-class for metadata-assisted spatial audio"""
+
+    def __init__(self, name: str):
+        super().__init__(name)
+        try:
+            self.__dict__.update(METADATA_ASSISTED_SPATIAL_AUDIO_FORMATS[name.upper()])
+        except KeyError:
+            raise ValueError(
+                f"Unsupported metadata assisted spatial audio format {name}"
+            )
+        self.metadata_files = []
+
+    @classmethod
+    def _from_file(
+        cls,
+        name: str,
+        filename: Path,
+        metadata_files: list[str],
+        fs: Optional[int] = None,
+    ) -> "MetadataAssistedSpatialAudio":
+        obj = super()._from_file(name, filename, fs)
+        obj.metadata_file = Path(metadata_files[0])
+        return obj
+
+    @classmethod
+    def _from_filelist(
+        cls,
+        name: str,
+        filename: Path,
+        metadata_files: list[str],
+        fs: Optional[int] = None,
+    ) -> "MetadataAssistedSpatialAudio":
+        obj = super()._from_file(name, filename, fs)
+        obj.metadata_file = Path(metadata_files[0])
+        return obj
+
+
+class ObjectBasedAudio(Audio):
+    """Sub-class for object-based audio"""
+
+    def __init__(self, name: str):
+        super().__init__(name)
+        try:
+            self.__dict__.update(OBJECT_BASED_AUDIO_FORMATS[name.upper()])
+        except KeyError:
+            raise ValueError(f"Unsupported object-based audio format {name}")
+        self.object_pos = []
+        self.metadata_files = []
+
+    @classmethod
+    def _from_file(
+        cls,
+        name: str,
+        filename: Union[str, Path],
+        metadata_files: list[Union[str, Path]],
+        fs: Optional[int] = None,
+    ) -> "ObjectBasedAudio":
+        obj = super()._from_file(name, filename, fs)
+        if metadata_files is not None:
+            obj.metadata_files = [Path(f) for f in metadata_files]
+        else:
+            # search for metadata with naming scheme: name.(wav, pcm).(0-3).csv
+            for obj_idx in range(obj.num_channels):
+                file_name_meta = filename.with_suffix(
+                    f"{filename.suffix}.{obj_idx}.csv"
+                )
+                if file_name_meta.is_file():
+                    obj.metadata_files.append(file_name_meta)
+                else:
+                    raise ValueError(f"Metadata file {file_name_meta} not found.")
+            warnings.warn(
+                f"No metadata files specified: The following files were found and used: \n {*obj.metadata_files,}"
+            )
+
+        obj.init_metadata()
+        return obj
+
+    @classmethod
+    def _from_filelist(
+        cls,
+        name: str,
+        filename: Path,
+        metadata_files: list[Union[str, Path]],
+        fs: Optional[int] = None,
+    ) -> "ObjectBasedAudio":
+        obj = super()._from_filelist(name, filename, fs)
+        obj.metadata_files = [Path(f) for f in metadata_files]
+        obj.init_metadata()
+        return obj
+
+    def init_metadata(self):
+        if self.audio.shape[1] != len(self.metadata_files):
+            raise ValueError(
+                f"Mismatch between number of channels in file [{self.audio.shape[1]}], and metadata [{len(self.metadata_files)}]"
+            )
+
+        self.object_pos = []
+        for i, f in enumerate(self.metadata_files):
+            pos = np.genfromtxt(f, delimiter=",")
+
+            # check if metadata has right number of columns
+            if pos.shape[1] < 5:
+                raise ValueError("Metadata incomplete. Columns are missing.")
+            elif pos.shape[1] > 5:
+                if pos.shape[1] == 7:
+                    pos = pos[:, :5]
+                else:
+                    raise ValueError(
+                        "Too many columns in metadata (possibly old version with frame index used)"
+                    )
+
+            # check if metadata is longer than file -> cut off
+            num_frames = int(
+                np.ceil(self.audio.shape[0] / (self.fs * IVAS_FRAME_LEN_MS / 1000))
+            )
+            if num_frames < pos.shape[0]:
+                pos = pos[:num_frames]
+            # check if metadata is shorter than file -> loop
+            elif num_frames > pos.shape[0]:
+                pos_loop = np.zeros((num_frames, pos.shape[1]))
+                pos_loop[: pos.shape[0]] = pos
+                for idx in range(pos.shape[0], num_frames):
+                    pos_loop[idx, :2] = pos[idx % pos.shape[0], :2]
+                pos = pos_loop
+
+            # wrap metadata to target value range
+            for j in range(num_frames):
+                pos[j, 0], pos[j, 1] = wrap_angles(pos[j, 0], pos[j, 1], clip_ele=True)
+
+            self.object_pos.append(pos)
+
+
+class SceneBasedAudio(Audio):
+    """Sub-class for scene-based audio"""
+
+    def __init__(self, name: str):
+        if name == "SBA1":
+            name = "FOA"
+        elif name == "SBA2":
+            name = "HOA2"
+        elif name == "SBA3":
+            name = "HOA3"
+
+        super().__init__(name)
+        try:
+            self.__dict__.update(SCENE_BASED_AUDIO_FORMATS[name.upper()])
+        except KeyError:
+            raise ValueError(f"Unsupported scene-based audio format {name}")
+
+        # self.ambi_order = ambi_order_from_nchan(self.num_channels)
+        self.ambi_order = int(np.sqrt(self.num_channels) - 1)
+
+    @classmethod
+    def _from_file(
+        cls, name: str, filename: Path, fs: Optional[int] = None
+    ) -> "SceneBasedAudio":
+        return super()._from_file(name, filename, fs)
+
+    @classmethod
+    def _from_filelist(
+        cls, name: str, filename: Path, fs: Optional[int] = None
+    ) -> "SceneBasedAudio":
+        return super()._from_filelist(name, filename, fs)
+
+
+def _get_audio_class(fmt) -> Audio:
+    """Return a child audio class corresponding to the specifed format"""
+    if fmt in BINAURAL_AUDIO_FORMATS.keys():
+        return BinauralAudio
+    elif fmt in METADATA_ASSISTED_SPATIAL_AUDIO_FORMATS.keys():
+        return MetadataAssistedSpatialAudio
+    elif fmt in OBJECT_BASED_AUDIO_FORMATS.keys():
+        return ObjectBasedAudio
+    elif fmt in SCENE_BASED_AUDIO_FORMATS.keys():
+        return SceneBasedAudio
+    elif (
+        fmt in CHANNEL_BASED_AUDIO_FORMATS.keys() or CHANNEL_BASED_AUDIO_ALTNAMES.keys()
+    ):
+        return ChannelBasedAudio
+    elif Path(fmt).suffix == ".txt":
+        return ChannelBasedAudio
+    else:
+        raise ValueError(f"Unknown audio format {fmt}!")
+
+
+def fromtype(fmt: str) -> Audio:
+    return _get_audio_class(fmt)(fmt)
+
+
+def fromarray(fmt: str, x: np.ndarray, fs: int) -> Audio:
+    """Wrap the given array into an audio format"""
+    if x is None or not fs:
+        return ValueError("Both array and sampling rate must be specified!")
+
+    output = _get_audio_class(fmt)(fmt)
+
+    output.audio = x
+    output.fs = fs
+
+    return output
+
+
+def fromfile(
+    fmt: str,
+    filename: Union[str, Path],
+    fs: Optional[int] = None,
+    in_meta: Optional[list[Union[str, Path]]] = None,
+) -> Audio:
+    """Create an Audio object of the specified format from the given file"""
+    filename = Path(filename)
+    fmt_cls = _get_audio_class(fmt)
+    if fmt_cls is ObjectBasedAudio or fmt_cls is MetadataAssistedSpatialAudio:
+        return fmt_cls._from_file(fmt, filename, in_meta, fs)
+    else:
+        return fmt_cls._from_file(fmt, filename, fs)
+
+
+def fromfilelist(
+    fmt: str, files: list[Union[str, Path]], fs: Optional[int] = None
+) -> Audio:
+    """Create an Audio object of the specified format from the given list of files"""
+    return _get_audio_class(fmt)._from_filelist(fmt, files, fs)
diff --git a/item_generation_scripts/audiotools/audioarray.py b/item_generation_scripts/audiotools/audioarray.py
new file mode 100644
index 00000000..c0909c4c
--- /dev/null
+++ b/item_generation_scripts/audiotools/audioarray.py
@@ -0,0 +1,690 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+import logging
+import warnings
+from typing import Iterator, Optional, Tuple, Union
+
+import numpy as np
+import scipy.signal as sig
+
+from .constants import DELAY_COMPENSATION_FOR_FILTERING, SEED_PADDING
+
+logger = logging.getLogger("__main__")
+logger.setLevel(logging.DEBUG)
+
+
+"""Functions used in this module"""
+
+
+def trim(
+    x: np.ndarray,
+    fs: Optional[int] = 48000,
+    limits: Optional[Tuple[int, int]] = None,
+    pad_noise: Optional[bool] = False,
+    samples: Optional[bool] = False,
+) -> np.ndarray:
+    """
+    Trim an audio array
+
+    Parameters
+    ----------
+    x: np.ndarray
+        Input array
+    fs: Optional[int]
+        Input sampling rate in Hz, default = 48000
+    limits: Optional[Tuple[int, int]]
+        Pre- and post-trim duration in milliseconds (negative values pad)
+    pad_noise: Optional[bool]
+        If true noise will be padded otherwise zeros will be padded
+    samples: Optional[bool]
+        If true limits are interpreted as samples, otherwise as ms
+
+    Returns
+    -------
+    y : np.ndarray
+        Output trimmed array
+    """
+
+    if not limits:
+        return x
+
+    if not samples:
+        pre_trim = int(limits[0] * fs // 1000)
+        post_trim = int(limits[1] * fs // 1000)
+    else:
+        pre_trim = limits[0]
+        post_trim = limits[1]
+
+    if pre_trim < 0:
+        if pad_noise:
+            # pad with uniformly distributed noise between -4 and 4
+            np.random.seed(SEED_PADDING)
+            noise = np.random.randint(
+                low=-4, high=5, size=(np.abs(pre_trim), np.shape(x)[1])
+            ).astype("float")
+            x = np.concatenate((noise, x), axis=0)
+        else:
+            x = np.pad(x, [[np.abs(pre_trim), 0], [0, 0]])
+    elif pre_trim > 0:
+        x = x[pre_trim:, :]
+
+    if post_trim < 0:
+        if pad_noise:
+            # pad with uniformly distributed noise between -4 and 4
+            np.random.seed(SEED_PADDING)
+            noise = np.random.randint(
+                low=-4, high=5, size=(np.abs(post_trim), np.shape(x)[1])
+            ).astype("float")
+            x = np.concatenate((x, noise), axis=0)
+        else:
+            x = np.pad(x, [[0, np.abs(post_trim)], [0, 0]])
+    elif post_trim > 0:
+        x = x[:-post_trim, :]
+
+    return x
+
+
+def window(
+    x: np.ndarray,
+    fs: Optional[int] = 48000,
+    len_ms: Optional[float] = 100,
+) -> np.ndarray:
+    """
+    Apply windowing to the start and end
+    of an audio array
+
+
+    Parameters
+    ----------
+    x: np.ndarray
+        Input audio array
+    fs: Optional[int]
+        Input sampling rate in Hz, default = 48000
+    len_ms: Optional[float]
+        Window length used at start and end of array in milliseconds, default = 100 ms
+
+    Returns
+    -------
+    y: np.ndarray
+        Output windowed array
+    """
+
+    wlen_smp = int(len_ms * fs // 1000)
+
+    # if requested window length is larger than the signal, simply window the signal
+    if wlen_smp > x.shape[0]:
+        wlen_smp = x.shape[0] // 2
+
+    window = sig.windows.hann(2 * wlen_smp)
+
+    # we only need half of the window
+    window = window[:wlen_smp, np.newaxis]
+
+    x[:wlen_smp, :] *= window
+    x[-wlen_smp:, :] *= window[::-1, :]
+
+    return x
+
+
+def delay_compensation(
+    x: np.ndarray,
+    flt_type: str,
+    fs: Optional[int] = 48000,
+    up: Optional[bool] = False,
+    down: Optional[bool] = False,
+) -> np.ndarray:
+    """
+    Compensation for a delayed signal
+
+    Parameters
+    ----------
+    x: np.ndarray
+        Input array
+    flt_type: str
+        Name of filter type used for filtering
+    fs: Optional[int]
+        Input sampling rate
+    up: Optional[bool]
+        Flag for up-sampling
+    down: Optional[bool]
+        Flag for down-sampling
+
+    Returns
+    -------
+    x: np.ndarray
+        Delay compensated test array
+    """
+
+    # Get the delay in number of samples
+    if flt_type == "SHQ2" and up:
+        d_samples = DELAY_COMPENSATION_FOR_FILTERING["SHQ2"]["up"]
+    elif flt_type == "SHQ2" and down:
+        d_samples = DELAY_COMPENSATION_FOR_FILTERING["SHQ2"]["down"]
+    elif flt_type == "SHQ3" and up:
+        d_samples = DELAY_COMPENSATION_FOR_FILTERING["SHQ3"]["up"]
+    elif flt_type == "SHQ3" and down:
+        d_samples = DELAY_COMPENSATION_FOR_FILTERING["SHQ3"]["down"]
+    else:
+        d_samples = DELAY_COMPENSATION_FOR_FILTERING[flt_type]
+    # Delay compensation
+    x = delay(x, fs, -d_samples, samples=True)
+
+    return x
+
+
+def delay(
+    x: np.ndarray,
+    fs: Optional[int] = 48000,
+    delay: Optional[float] = 0,
+    samples: Optional[bool] = False,
+) -> np.ndarray:
+    """
+    Delay a signal by a specified duration (ms) or number of samples
+
+    Parameters
+    ----------
+    x: np.ndarray
+        Input array
+    fs: Optional[int]
+        Sampling rate
+    delay: Optional[float]
+        Delay in milliseconds or samples (negative values advance file)
+    samples: Optional[bool]
+        If true delay is interpreted as samples, if false as milliseconds
+
+    Returns
+    -------
+    x: np.ndarray
+        Delayed audio signal
+    """
+
+    if not samples:
+        delay = int(delay * fs / 1000)
+
+    delay_abs = np.abs(delay)
+
+    x = np.roll(x, delay, axis=0)
+
+    if delay < 0:
+        x[-delay_abs:, :] = 0
+    elif delay > 0:
+        x[:delay_abs, :] = 0
+
+    return x
+
+
+def limiter(
+    x: np.ndarray,
+    fs: int,
+) -> np.ndarray:
+    """
+    Apply limiting to an audio signal
+
+    Parameters
+    ----------
+    x: np.ndarray
+        Input reference array
+    fs: int
+        Input sampling frequency
+
+    Returns
+    -------
+    x: np.ndarray
+        Limited audio signal
+    """
+
+    limiter_threshold = 32729  # -0.01dB FS
+    limiter_attack_seconds = 0.005
+    attack_constant = 0.01 ** (1.0 / (limiter_attack_seconds * fs))
+    release_heuristics_mem = 0.0
+    gain = 1.0
+    strong_saturation_cnt = 0
+    limited = False
+
+    if x.ndim == 1:
+        n_samples_x = x.shape
+        n_chan_x = 1
+    else:
+        n_samples_x, n_chan_x = x.shape
+    # framing
+    framesize = fs // 50
+    nframes = n_samples_x // framesize
+    for fr in range(nframes):
+        apply_limiting = True
+        fr_sig = x[fr * framesize : ((fr + 1) * framesize), :]
+        sig_max = np.amax(np.absolute(fr_sig))
+        release_heuristic = release_heuristics_mem
+        if sig_max > limiter_threshold:
+            frame_gain = limiter_threshold / sig_max
+            release_heuristic = min(1.0, release_heuristic + (4.0 * framesize / fs))
+        else:
+            release_heuristic = max(0.0, release_heuristic - (framesize / fs))
+            if gain >= 1.0 - 1e-10:
+                apply_limiting = False
+
+            frame_gain = 1.0
+
+        if sig_max > 3 * limiter_threshold and strong_saturation_cnt > 0:
+            apply_strong_limiting = True
+        elif sig_max > 10 * limiter_threshold:
+            strong_saturation_cnt += 20
+            apply_strong_limiting = True
+        else:
+            strong_saturation_cnt -= 1
+            if strong_saturation_cnt < 0:
+                strong_saturation_cnt = 0
+            apply_strong_limiting = False
+
+        if apply_strong_limiting is True:
+            if frame_gain < 0.3:
+                frame_gain /= 3.0
+            else:
+                apply_strong_limiting = False
+
+        if frame_gain < 0.1 and apply_strong_limiting is False:
+            frame_gain = 0.1
+
+        if apply_limiting is True:
+            if frame_gain < gain:
+                fac = attack_constant ** (np.arange(1, framesize + 1, dtype=np.float32))
+            else:
+                release_constant = 0.01 ** (
+                    1.0 / (0.005 * (200.0**release_heuristic) * fs)
+                )
+                fac = release_constant ** (
+                    np.arange(1, framesize + 1, dtype=np.float32)
+                )
+
+            fr_gain = np.tile(gain * fac + frame_gain * (1.0 - fac), (n_chan_x, 1)).T
+            fr_sig *= fr_gain
+            gain = fr_gain[-1, 0]
+            limited = True
+        else:
+            gain = 1.0
+
+        release_heuristics_mem = release_heuristic
+        # hard limiting for everything that still sticks out
+        if (fr_sig > 32767).any() or (fr_sig < -32768).any():
+            limited = True
+        idx_max = np.where(fr_sig > 32767)
+        fr_sig[idx_max] = 32767
+        idx_min = np.where(fr_sig < -32768)
+        fr_sig[idx_min] = -32768
+
+    if limited:
+        warnings.warn("Limiting had to be applied")
+    return x
+
+
+def get_framewise(
+    x: np.ndarray,
+    chunk_size: int,
+    zero_pad: Optional[bool] = False,
+) -> Iterator:
+    """
+    Generator to yield a signal frame by frame
+    If array size is not a multiple of chunk_size, last frame contains the remainder
+
+    Parameters
+    ----------
+    x: np.ndarray
+        Input reference array
+    chunk_size: int
+        Size of frames to yield
+    zero_pad: Optional[bool]
+        Whether to zero pad the last chunk if there are not enough samples
+
+    Yields
+    -------
+    frame : np.ndarray
+        One frame of the input audio signal
+    """
+
+    n_frames = x.shape[0] // chunk_size
+    for i in range(n_frames):
+        yield x[i * chunk_size : (i + 1) * chunk_size, :]
+    if x.shape[0] % chunk_size:
+        last_chunk = x[n_frames * chunk_size :, :]
+        if zero_pad:
+            yield np.pad(
+                last_chunk, [[0, chunk_size - (x.shape[0] % chunk_size)], [0, 0]]
+            )
+        else:
+            yield last_chunk
+
+
+def framewise_io(
+    i: np.ndarray, o: np.ndarray, chunk_size: int, zero_pad: Optional[bool] = False
+) -> Iterator:
+    """
+    Return an iterator over frame_index, input_frame and output_frame
+
+    Parameters
+    ----------
+    i: np.ndarray
+        Input array
+    o: np.ndarray
+        Output array
+    chunk_size: int
+        Size of frames to yield
+    zero_pad: Optional[bool]
+        Whether to zero pad the last chunk if there are not enough samples
+
+    Yields
+    -------
+    frame : Iterator
+        Frame index, one frame of the input and output audio signal
+    """
+
+    return enumerate(
+        zip(
+            get_framewise(i, chunk_size, zero_pad),
+            get_framewise(o, chunk_size, zero_pad),
+        )
+    )
+
+
+"""Deprecated functions (partly replaced by ITU binaries)"""
+
+
+def resample(
+    x: np.ndarray,
+    in_freq: int,
+    out_freq: int,
+) -> np.ndarray:
+    """
+    Resample a multi-channel audio array
+
+    Parameters
+    ----------
+    x: np.ndarray
+        Input array
+    in_freq: int
+        Input sampling rate
+    out_freq: int
+        Output sampling rate
+
+    Returns
+    -------
+    y: np.ndarray
+        Output resampled array
+    """
+
+    if in_freq == out_freq or out_freq is None:
+        y = x
+    else:
+        datatype = x.dtype
+        if datatype.name.startswith("int"):
+            # cast necessary due to bug in resample_poly() with input of type int
+            x = x.astype("float")
+
+        y = sig.resample_poly(x, out_freq, in_freq)
+
+        if datatype.name.startswith("int"):
+            y = x.astype(datatype)
+
+    return y
+
+
+def lpfilter(
+    x: np.ndarray,
+    fc: int,
+    fs: int,
+) -> np.ndarray:
+    """
+    Low-pass filter a multi-channel audio array
+
+    Parameters
+    ----------
+    x: np.ndarray
+        Input array
+    fc: int
+        Cut-off frequency in Hz
+    fs: int
+        Sampling rate in Hz
+
+    Returns
+    -------
+    y: np.ndarray
+        Output low-pass filtered array
+    """
+
+    if (fc + 500) < (fs / 2.0):
+        # Design a Chebychev Type II  filter, band_pass-band_stop = 500 Hz
+        N, Wn = sig.cheb2ord(fc / (fs / 2), (fc + 500) / (fs / 2), 3, 60)
+        b, a = sig.cheby2(N, 60, Wn, "low")
+
+        # Apply the Butterworth filter for each channels, across time axis
+        # y = sig.lfilter(b, a, axis=0) # non zero-phase filter
+        y = sig.filtfilt(b, a, x, axis=0)  # zero-phase filer, batch processing
+    else:
+        y = x
+
+    return y
+
+
+def cut(
+    x: np.ndarray,
+    limits: Optional[Tuple[int, int]],
+) -> np.ndarray:
+    """
+    Cut an audio array
+
+    Parameters
+    ----------
+    x: np.ndarray
+        Input array
+    limits: Tuple[int, int]
+        first and last samples to extract
+
+    Returns
+    -------
+    y: np.ndarray
+        Output cut array
+    """
+
+    in_samples, in_channels = x.shape
+    first_sample = limits[0]
+    last_sample = limits[1]
+
+    if first_sample == 0 and (last_sample == -1 or last_sample == in_samples):
+        y = x
+    else:
+        if last_sample == -1:
+            last_sample = in_samples
+
+        signal_start = first_sample
+        signal_end = last_sample
+        insert_start = 0
+        insert_end = last_sample - first_sample
+        total_samples = last_sample - first_sample
+        if first_sample < 0:
+            samples_to_pad_begin = -first_sample
+            insert_start = samples_to_pad_begin
+            insert_end += samples_to_pad_begin
+        if last_sample > in_samples:
+            signal_end = in_samples
+            insert_end = insert_end - last_sample + in_samples
+        y = np.zeros([total_samples, in_channels], dtype=x.dtype)
+        y[insert_start:insert_end, :] = x[signal_start:signal_end, :]
+
+    return y
+
+
+def compare(
+    ref: np.ndarray,
+    test: np.ndarray,
+    fs: int,
+    per_frame: bool = False,
+) -> dict:
+    """
+    Compare two audio arrays
+
+    Parameters
+    ----------
+    ref: np.ndarray
+        Input reference array
+    test: np.ndarray
+        Input test array
+    fs: int
+        Input sampling rate in Hz
+
+    Returns
+    -------
+    result: dict
+        Comparison results
+    """
+
+    framesize = fs // 50
+    diff = abs(test - ref)
+    max_diff = int(diff.max())
+    result = {
+        "bitexact": True,
+        "max_abs_diff": 0,
+        "max_abs_diff_pos_sample": 0,
+        "max_abs_diff_pos_channel": 0,
+        "nsamples_diff": 0,
+        "nsamples_diff_percentage": 0.0,
+        "first_diff_pos_sample": -1,
+        "first_diff_pos_channel": -1,
+        "first_diff_pos_frame": -1,
+    }
+    if per_frame:
+        result["max_abs_diff_pos_frame"] = 0
+        result["nframes_diff"] = 0
+        result["nframes_diff_percentage"] = 0.0
+
+    if max_diff != 0:
+        if diff.ndim == 1:
+            nsamples_total = diff.shape
+            nchannels = 1
+        else:
+            nsamples_total, nchannels = diff.shape
+        max_diff_pos = np.nonzero(diff == max_diff)
+        max_diff_pos = [
+            max_diff_pos[0][0],
+            max_diff_pos[0][0] // framesize,
+            max_diff_pos[1][0],
+        ]
+
+        first_diff_pos = np.nonzero(diff)
+        first_diff_pos = [
+            first_diff_pos[0][0],
+            first_diff_pos[0][0] // framesize,
+            first_diff_pos[1][0],
+        ]
+
+        nsamples_diff = np.nonzero(diff)[0].size
+        nsamples_diff_percentage = nsamples_diff / (nsamples_total * nchannels) * 100.0
+        nframes = nsamples_total // framesize
+        nframes_diff = 0
+
+        result = {
+            "bitexact": False,
+            "max_abs_diff": max_diff,
+            "max_abs_diff_pos_sample": max_diff_pos[0],
+            "max_abs_diff_pos_channel": max_diff_pos[2],
+            "nsamples_diff": nsamples_diff,
+            "nsamples_diff_percentage": nsamples_diff_percentage,
+            "first_diff_pos_sample": first_diff_pos[0],
+            "first_diff_pos_channel": first_diff_pos[2],
+            "first_diff_pos_frame": first_diff_pos[1],
+        }
+
+        if per_frame:
+            for fr in range(nframes):
+                diff_fr = diff[fr * framesize : ((fr + 1) * framesize), :]
+                nframes_diff += 1 if diff_fr.nonzero()[0].size > 0 else 0
+            nframes_diff_percentage = nframes_diff / nframes * 100.0
+            result["max_abs_diff_pos_frame"] = max_diff_pos[1]
+            result["nframes_diff"] = nframes_diff
+            result["nframes_diff_percentage"] = nframes_diff_percentage
+
+    return result
+
+
+def getdelay(
+    x: np.ndarray,
+    y: np.ndarray,
+) -> int:
+    """
+    Get the delay between two audio signals
+
+    Parameters
+    ----------
+    x: np.ndarray
+        Input reference array
+    y: np.ndarray
+        Input test array
+
+    Returns
+    -------
+    result: int
+        Delay of y in samples with respect to x (median of individual channel delays)
+    """
+
+    if x.ndim == 1:
+        n_samples_x = x.shape
+        n_chan_x = 1
+    else:
+        n_samples_x, n_chan_x = x.shape
+    if y.ndim == 1:
+        n_samples_y = y.shape
+        n_chan_y = 1
+    else:
+        n_samples_y, n_chan_y = y.shape
+    if n_chan_x != n_chan_y:
+        raise ValueError
+    lags = np.arange(-n_samples_x + 1, n_samples_y)
+    lag = np.zeros([n_chan_x, 1], dtype=int)
+    for chan in range(n_chan_x):
+        correlation = sig.correlate(y[:, chan], x[:, chan], mode="full")
+        lag[chan] = lags[np.argmax(correlation)]
+    return int(np.median(lag))
+
+
+def mono_downmix(x: np.ndarray) -> np.ndarray:
+    """
+    Creates a passive mono downmix for a multi-channel audio signal
+    """
+    return np.sum(x, axis=1)
+
+
+def mute_channels(
+    x: np.ndarray, mute: Optional[Union[list, np.ndarray]] = None
+) -> np.ndarray:
+    """
+    Mute audio channels in signal
+    """
+    x[:, mute] = 0
+    return x
diff --git a/item_generation_scripts/audiotools/audiofile.py b/item_generation_scripts/audiotools/audiofile.py
new file mode 100644
index 00000000..954c91f8
--- /dev/null
+++ b/item_generation_scripts/audiotools/audiofile.py
@@ -0,0 +1,433 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+import logging
+import struct
+from pathlib import Path
+from typing import Optional, Tuple, Union
+
+import numpy as np
+import scipy.io.wavfile as wav
+
+from .audioarray import trim, window
+
+logger = logging.getLogger("__main__")
+logger.setLevel(logging.DEBUG)
+
+
+def read(
+    filename: Union[str, Path],
+    nchannels: Optional[int] = 1,
+    fs: Optional[int] = 48000,
+    outdtype: Optional[str] = "float",
+) -> Tuple[np.ndarray, int]:
+    """
+    Read audio file (.pcm, .wav or .raw)
+
+    Parameters
+    ----------
+    filename: str
+        Input file path
+    nchannels: Optional[int]
+        Number of input channels, required for .pcm otherwise default = 1
+    fs: Optional[int]
+        Input sampling rate, required for .pcm input file, otherwise default = 48000 (Hz)
+    outdtype: Optional[str]
+        Data type of output array, python builtin or np.dtype
+
+    Returns
+    -------
+    x: np.ndarray
+        audio signal array
+    fs: int
+        signal sampling frequency
+    """
+
+    file_extension = Path(filename).suffix
+
+    if file_extension == ".wav":
+        fs, data = wav.read(filename)
+        if data.dtype == np.int32:
+            data = np.interp(
+                data,
+                (np.iinfo(np.int32).min, np.iinfo(np.int32).max),
+                (np.iinfo(np.int16).min, np.iinfo(np.int16).max),
+            )
+        elif data.dtype == np.float32:
+            data = np.interp(
+                data,
+                (-1, 1),
+                (np.iinfo(np.int16).min, np.iinfo(np.int16).max),
+            )
+        x = np.array(data, dtype=outdtype)
+        file_len = x.shape[0]
+        if x.ndim == 1:
+            # force to be a mtx
+            x = np.reshape(x, (file_len, 1))
+    elif file_extension in [".pcm", ".raw"]:
+        x = np.fromfile(filename, dtype=np.int16).astype(outdtype)
+        signal_len = len(x) // nchannels
+        try:
+            x = x.reshape(signal_len, nchannels)
+        except ValueError:
+            raise ValueError("Wrong number of channels")
+    else:
+        raise ValueError("Wrong input format. Use wav, pcm or raw")
+
+    return x, fs
+
+
+def write(
+    filename: Union[str, Path],
+    x: np.ndarray,
+    fs: Optional[int] = 48000,
+) -> None:
+    """
+    Write audio file (.pcm, .wav or .raw)
+
+    Parameters
+    ----------
+    filename: str
+        Output file path (.pcm, .wav or .raw)
+    x: np.ndarray
+        Numpy 2D array of dimension: number of channels x number of samples
+    fs: Optional[int]
+        Sampling rate, required for .pcm or .raw input file, default = 48000 (Hz)
+
+    Returns
+    -------
+    None
+    """
+
+    file_extension = Path(filename).suffix
+
+    clipped_samples = np.sum(
+        np.logical_or(x < np.iinfo(np.int16).min, x > np.iinfo(np.int16).max)
+    )
+    if clipped_samples > 0:
+        logger.warning(f"  Warning: {clipped_samples} samples clipped")
+        x = np.clip(x, np.iinfo(np.int16).min, np.iinfo(np.int16).max)
+
+    if file_extension == ".wav":
+        x = x.astype(np.int16)
+        wav.write(filename, fs, x)
+    elif file_extension == ".pcm" or file_extension == ".raw":
+        x = x.astype("int16").reshape(-1, 1)
+        x.tofile(filename)
+    else:
+        raise ValueError("Wrong input format. Use wav, pcm or raw")
+
+
+def concat(
+    in_filenames: list,
+    out_file: str,
+    silence_pre: Optional[int] = 0,
+    silence_post: Optional[int] = 0,
+    in_fs: Optional[int] = 48000,
+    num_channels: Optional[int] = None,
+    pad_noise: Optional[bool] = False,
+    preamble: Optional[int] = None,
+    pad_noise_preamble: Optional[bool] = False,
+) -> list:
+    """
+    Horizontally concatenates audio files into one long file
+
+    Parameters
+    __________
+    in_filenames: list
+        Input list of filenmames (.pcm, .raw or .wav)
+    out_file: str
+        Output multi-channel audio file name (.pcm, .raw or .wav)
+    silence_pre: int
+        Padded zeros before signal in samples
+    silence_post: int
+        Padded zeros after signal in samples
+    in_fs: Optional[int]
+        Input sampling rate, default 48000 Hz
+    pad_noise: Optional[bool]
+        If true noise will be padded otherwise zeros will be padded
+
+    Returns
+    -------
+    splits
+        List of sample indices to split the resulting file at
+    """
+
+    y = None
+    fs_compare = 0
+
+    # create a list of splits
+    splits = [0]
+
+    # Read input files
+    for in_file in in_filenames:
+        x, fs = read(in_file, fs=in_fs, nchannels=num_channels)
+        if fs_compare and fs_compare != fs:
+            raise ValueError("Sampling rates of files to concatenate don't match")
+        else:
+            fs_compare = fs
+
+        # pad with very low amplitude noise
+        x = trim(
+            x, in_fs, (-silence_pre, -silence_post), samples=True, pad_noise=pad_noise
+        )
+
+        # add the length to our splits list
+        splits.append(splits[-1] + x.shape[0])
+
+        # concatenate
+        y = np.concatenate([y, x]) if y is not None else x
+
+    # add preamble
+    if preamble:
+        y = trim(y, in_fs, (-preamble, 0), pad_noise_preamble)
+
+    write(out_file, y, fs=in_fs)
+
+    return splits[1:]
+
+
+def split(
+    in_filename: Union[str, Path],
+    out_folder: Union[str, Path],
+    split_filenames: list[Union[str, Path]],
+    splits: list[int],
+    in_fs: Optional[int] = 48000,
+    preamble: Optional[int] = 0,
+    loudness: Optional[float] = None,
+) -> list[Union[str, Path]]:
+    """
+    Horizontally splits audio files into multiple shorter files and applies windowing and scaling
+
+    Parameters
+    __________
+    in_filename: Union[str, Path]
+        Input filenmame (.pcm, .raw or .wav)
+    out_folder: Union[str, Path]
+        Output folder where to put the splits
+    split_filenames: list[Union[str, Path]]
+        List of names for the split files
+    splits: list[int]
+        List of sample indices where to cut the signal
+    in_fs: Optional[int]
+        Input sampling rate, default 48000 Hz
+    loudness: Optional[float]
+        Desired loudness of individual files
+    """
+
+    # create a list of output files
+    out_paths = []
+
+    # Read input file
+    x, fs = read(in_filename, fs=in_fs)
+
+    # remove preamble
+    if preamble:
+        x = trim(x, fs, (preamble, 0))
+
+    split_old = 0
+    for idx, split in enumerate(splits):
+        out_file = Path(out_folder) / Path(split_filenames[idx]).with_suffix(
+            in_filename.suffix
+        )
+
+        # add the path to our list
+        out_paths.append(out_file)
+
+        # split
+        y = x[split_old:split, :]
+
+        # windowing
+        y = window(y)
+
+        # write file
+        write(out_file, y, fs=in_fs)
+
+        split_old = split
+
+    return out_paths
+
+
+def combine(
+    in_filenames: list,
+    out_file: str,
+    in_fs: Optional[int] = 48000,
+) -> None:
+    """
+    Combines audio files into one multi-channel file
+
+    Parameters
+    ----------
+    in_filenames: list
+        Input list of filenmames (.pcm, .raw or .wav)
+    out_file: str
+        Output multi-channel audio file name (.pcm, .raw or .wav)
+    in_fs: Optional[int]
+        Input sampling rate, required for .pcm and .raw input file, default 48000 Hz
+
+    Returns
+    -------
+    None
+    """
+
+    y = None
+    fs_compare = 0
+
+    # Read input files
+    for in_file in in_filenames:
+        # assign correct channel
+        x, fs = read(in_file, fs=in_fs)
+        if fs_compare and fs_compare != in_fs:
+            raise ValueError("Sampling rates of files to combine don't match")
+        else:
+            fs_compare = fs
+        if y is None:
+            y = x
+        else:
+            if x.shape[0] > y.shape[0]:
+                x = x[: y.shape[0], :]
+            elif y.shape[0] > x.shape[0]:
+                y = y[: x.shape[0], :]
+            y = np.column_stack([y, x])
+
+    write(out_file, y, fs=in_fs)
+
+
+def split_channels(
+    in_file: str,
+    out_filenames: list,
+    in_nchans: int,
+    in_fs: Optional[int] = 48000,
+) -> None:
+    """
+    Split multi-channel audio files into individual mono files
+
+    Parameters
+    ----------
+    in_file: str
+        Input file name (.pcm, .raw or .wav)
+    out_filenames: list
+        List of output file names (.pcm, .raw or .wav)
+    in_nchans: int
+        Input number of channels
+    in_fs: Optional[int] = 48000
+        Input sampling rate, default 48000 Hz
+
+    Returns
+    -------
+    None
+    """
+
+    # validation
+    if in_nchans is None:
+        raise ValueError("Number of channels to split must be specified!")
+    if in_nchans != len(out_filenames):
+        print(
+            "Split: Mismatch between number of channels and output filenames length. Truncating output filenames list."
+        )
+        out_filenames = out_filenames[:in_nchans]
+
+    x, in_fs = read(in_file, nchannels=in_nchans, fs=in_fs)
+
+    # Write output files
+    for idx, out_file in enumerate(out_filenames):
+        # extract correct channel
+        y = x[:, idx]
+
+        write(out_file, y, fs=in_fs)
+
+
+def parse_wave_header(
+    filename: str,
+) -> dict:
+    """
+    Get the format information from a WAV file.
+    Return a dictionary with the format information
+
+    Parameters
+    ----------
+    filename : string or open file handle
+        Input WAV file.
+
+    Returns
+    -------
+    Dictionary
+    """
+
+    with open(filename, "rb") as fid:
+        riff = fid.read(4)
+
+        if riff == b"RIFF":
+            binary_format = "<"
+        elif riff == b"RIFX":
+            binary_format = ">"
+        else:
+            raise IOError("No RIFF chunk found!")
+
+        wav_size = struct.unpack(f"{binary_format}I", fid.read(4))[0]
+
+        wav_identifier = fid.read(4)
+        if wav_identifier != b"WAVE":
+            raise IOError("No WAVE chunk found!")
+
+        fmt_chunk_id = fid.read(4)
+
+        if fmt_chunk_id == b"fmt ":
+            fmt_size = struct.unpack(f"{binary_format}I", fid.read(4))[0]
+            wav_format = struct.unpack(f"{binary_format}H", fid.read(2))[0]
+            channels = struct.unpack(f"{binary_format}H", fid.read(2))[0]
+            fs = struct.unpack(f"{binary_format}I", fid.read(4))[0]
+            bytes_per_second = struct.unpack(f"{binary_format}I", fid.read(4))[0]
+            block_align = struct.unpack(f"{binary_format}H", fid.read(2))[0]
+            bit_depth = struct.unpack(f"{binary_format}H", fid.read(2))[0]
+            rem_bytes = fmt_size - 16
+            ext_param_size = 0
+            ext_param = None
+            if rem_bytes:
+                ext_param_size = struct.unpack(f"{binary_format}H", fid.read(2))[0]
+
+            if ext_param_size:
+                ext_param = fid.read(ext_param_size)
+        else:
+            raise IOError("Missing or corrupt fmt chunk!")
+
+    return {
+        "size": wav_size,
+        "format_tag": wav_format,
+        "channels": channels,
+        "fs": fs,
+        "bytes_per_second": bytes_per_second,
+        "block_align": block_align,
+        "bit_depth": bit_depth,
+        "ext_param_size": ext_param_size,
+        "ext_param": ext_param,
+    }
diff --git a/item_generation_scripts/audiotools/binaural_datasets/BRIR_IISofficialMPEG222UC_FULL.mat b/item_generation_scripts/audiotools/binaural_datasets/BRIR_IISofficialMPEG222UC_FULL.mat
new file mode 100644
index 00000000..42e702db
--- /dev/null
+++ b/item_generation_scripts/audiotools/binaural_datasets/BRIR_IISofficialMPEG222UC_FULL.mat
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3ddecef64dfcf8887904b5cc370c0d9723bd8fd1637e32232205cdcd739b80d
+size 12623190
diff --git a/item_generation_scripts/audiotools/binaural_datasets/BRIR_IISofficialMPEG222UC_LS.mat b/item_generation_scripts/audiotools/binaural_datasets/BRIR_IISofficialMPEG222UC_LS.mat
new file mode 100644
index 00000000..1d590edb
--- /dev/null
+++ b/item_generation_scripts/audiotools/binaural_datasets/BRIR_IISofficialMPEG222UC_LS.mat
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e2c964b96d802532c0ecf1076092c7d246a54293a3a0c4c72995953c66bfec71
+size 6348499
diff --git a/item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_Dolby_SBA1.mat b/item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_Dolby_SBA1.mat
new file mode 100644
index 00000000..4f59a8a9
--- /dev/null
+++ b/item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_Dolby_SBA1.mat
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a9ad5d8d874ac2fb851f5d2b0b303494f1d115612e9f6cab40e5eb33591b05c
+size 4630
diff --git a/item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_Dolby_SBA2.mat b/item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_Dolby_SBA2.mat
new file mode 100644
index 00000000..1ad2162a
--- /dev/null
+++ b/item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_Dolby_SBA2.mat
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6fc2a15579b80493597a8096bd815e8b847fe1880bdba760d4405122878b0b0a
+size 10323
diff --git a/item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_Dolby_SBA3.mat b/item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_Dolby_SBA3.mat
new file mode 100644
index 00000000..0e7c3ef4
--- /dev/null
+++ b/item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_Dolby_SBA3.mat
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:83822cfa090c345a6ece14d1ec1a92023626f467e2f8d982cf099c071dfc1080
+size 18229
diff --git a/item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_FULL.mat b/item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_FULL.mat
new file mode 100644
index 00000000..a2ab24e5
--- /dev/null
+++ b/item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_FULL.mat
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf86a03f0b13932c5c138af22584f864b75c5733df1b01ac3fdf7750a1bdbe5f
+size 14335913
diff --git a/item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_LS.mat b/item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_LS.mat
new file mode 100644
index 00000000..65c2684c
--- /dev/null
+++ b/item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_LS.mat
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e25ef101e9e72c5d70a55bc1451a07d041d29f96a803d7d3f968f20fe403316
+size 20190
diff --git a/item_generation_scripts/audiotools/binaural_datasets/README.txt b/item_generation_scripts/audiotools/binaural_datasets/README.txt
new file mode 100644
index 00000000..9fd37c96
--- /dev/null
+++ b/item_generation_scripts/audiotools/binaural_datasets/README.txt
@@ -0,0 +1,34 @@
+Files in this directory should contain impulse responses for use in rendering in Matlab .mat format
+Samplingrate of 48kHz is assumed
+
+Files should adhere to the following naming scheme:
+
+{HRIR|BRIR}_{DATASETNAME}_{FULL|LS|SBA(1-3)}.mat
+
+- HRIR or BRIR
+    specifies the type of impulse response which will be used
+    for either BINAURAL or BINAURAL_ROOM output respectively
+- DATASETNAME
+    specifies the name used with the binaural_dataset commandline argument
+    or YAML key to enable selection of this dataset
+- FULL or LS or SBA3
+    specifies the subset of impulse responses in the file:
+    FULL:       all available measurements on the sphere
+    LS:         superset of supported loudspeaker layouts
+                (see audiotools.constants.CHANNEL_BASED_AUDIO_FORMATS["LS""])
+    SBA(1-3):   impulse responses transformed to ambisonics by external conversion
+                if available SBA1 is used for FOA, SBA2 for HOA2 and SBA3 for HOA3
+                if not available SBA3 is used and truncated for all Ambisonic formats
+
+Each Matlab file should contain the following variables:
+- IR
+    Impulse responses with dimensions [ir_length x n_ears x n_channels]
+- SourcePosition 
+    array of {azimuth, elevation, radius} of dimensions [n_channels x 3]
+    required for FULL, optional otherwise
+- latency_s
+    latency of the dataset in samples
+    optional, will be estimated if not provided
+    
+LICENSES:
+Please see HRIR.txt and BRIR.txt for license info
\ No newline at end of file
diff --git a/item_generation_scripts/audiotools/binaural_datasets/__init__.py b/item_generation_scripts/audiotools/binaural_datasets/__init__.py
new file mode 100644
index 00000000..aea270d8
--- /dev/null
+++ b/item_generation_scripts/audiotools/binaural_datasets/__init__.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
diff --git a/item_generation_scripts/audiotools/binaural_datasets/binaural_dataset.py b/item_generation_scripts/audiotools/binaural_datasets/binaural_dataset.py
new file mode 100644
index 00000000..e6c4dbe7
--- /dev/null
+++ b/item_generation_scripts/audiotools/binaural_datasets/binaural_dataset.py
@@ -0,0 +1,288 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+import warnings
+from pathlib import Path
+from typing import Optional, Tuple, Union
+
+import numpy as np
+from scipy.io import loadmat
+
+from item_generation_scripts.audiotools.audio import fromtype
+from item_generation_scripts.audiotools.constants import (
+    CHANNEL_BASED_AUDIO_FORMATS,
+    OBJECT_BASED_AUDIO_FORMATS,
+    SCENE_BASED_AUDIO_FORMATS,
+)
+from item_generation_scripts.audiotools.EFAP import wrap_angles
+
+
+def load_hrtf(
+    filename: Union[str, Path],
+) -> Tuple[np.ndarray, np.ndarray, int]:
+    """
+    Read HRTFs from Matlab dictionary file mat
+
+    Parameters
+    ----------
+    filename: str
+        HRTFs file name (.mat)
+
+    Returns
+    -------
+    IR: np.ndarray
+        Array of impulse responses
+    SourcePosition: np.ndarray
+        Array of source positions corresponding to the impulse responses
+    latency_s: int
+        Latency in samples
+    """
+
+    if not filename.exists():
+        raise FileNotFoundError(
+            f"File {filename.name} was not found in dataset folder!"
+        )
+
+    mat_contents = loadmat(filename)
+
+    try:
+        IR = mat_contents["IR"]
+    except KeyError:
+        raise KeyError(f"Key 'IR' not found in .mat file: {filename} !")
+
+    SourcePosition = mat_contents.get("SourcePosition")
+    latency_s = mat_contents.get("latency_s")
+    if latency_s is not None:
+        latency_s = latency_s.astype(np.int32)[0, 0]
+
+    return IR, SourcePosition, latency_s
+
+
+def load_ir(
+    in_fmt: str,
+    out_fmt: str,
+    dataset: Optional[str] = None,
+) -> Tuple[np.ndarray, np.ndarray, int]:
+    """
+    Load IRs for a specified rendering format
+
+    Parameters
+    ----------
+    in_fmt: str
+        Input format
+    out_fmt: str
+        Output format
+    dataset: Optional[str]
+        Name of desired dataset without prefix and suffix
+
+    Returns
+    -------
+    IR: np.ndarray
+        Array of impulse responses
+    SourcePosition: np.ndarray
+        Array of source positions corresponding to the impulse responses
+    latency_smp: int
+        Latency in samples
+    """
+
+    dataset_prefix = None
+    dataset_suffix = None
+
+    if out_fmt.startswith("BINAURAL") and "ROOM" in out_fmt:
+        dataset_prefix = "BRIR"
+        if dataset is None:
+            dataset = "IISofficialMPEG222UC"
+
+        if in_fmt.startswith("MOZART"):
+            dataset_suffix = "FULL"
+        elif in_fmt in CHANNEL_BASED_AUDIO_FORMATS.keys():
+            dataset_suffix = "LS"
+
+    elif out_fmt.startswith("BINAURAL"):
+        dataset_prefix = "HRIR"
+        if dataset is None:
+            dataset = "ORANGE53"
+
+        if in_fmt in OBJECT_BASED_AUDIO_FORMATS.keys() or in_fmt.startswith(
+            "CUSTOM_LS"
+        ):
+            dataset_suffix = "FULL"
+        elif in_fmt in CHANNEL_BASED_AUDIO_FORMATS.keys() and in_fmt != "MONO":
+            dataset_suffix = "LS"
+        elif in_fmt in SCENE_BASED_AUDIO_FORMATS.keys():
+            dataset = "ORANGE53_Dolby"
+            if in_fmt == "SBA1" or in_fmt == "FOA":
+                dataset_suffix = "SBA1"
+                # Use truncated SBA3 dataset if no SBA1 or 2 dataset exists
+                if not (
+                    Path(__file__).parent.joinpath(
+                        f"{dataset_prefix}_{dataset}_{dataset_suffix}.mat"
+                    )
+                ).is_file():
+                    dataset_suffix = "SBA3"
+                    warnings.warn("No SBA1 dataset found -> use truncated SBA3 dataset")
+            elif in_fmt.endswith("2"):
+                dataset_suffix = "SBA2"
+                # Use truncated SBA3 dataset if no SBA1 or 2 dataset exists
+                if not (
+                    Path(__file__).parent.joinpath(
+                        f"{dataset_prefix}_{dataset}_{dataset_suffix}.mat"
+                    )
+                ).is_file():
+                    dataset_suffix = "SBA3"
+                    warnings.warn("No SBA2 dataset found -> use truncated SBA3 dataset")
+            else:
+                dataset_suffix = "SBA3"
+
+    path_dataset = Path(__file__).parent.joinpath(
+        f"{dataset_prefix}_{dataset}_{dataset_suffix}.mat"
+    )
+    IR, SourcePosition, latency_s = load_hrtf(path_dataset)
+
+    if latency_s is not None:
+        latency_smp = latency_s
+    else:
+        latency_smp = int(np.min(np.argmax(np.sum(np.abs(IR), axis=1), axis=0)))
+        warnings.warn(
+            f"No latency of HRTF dataset specified in {path_dataset} file -> computed latency: {latency_smp} sample(s)"
+        )
+
+    if in_fmt.startswith("STEREO"):
+        IR = IR[:, :, :2]  # use L and R channels.
+    elif (
+        in_fmt in CHANNEL_BASED_AUDIO_FORMATS.keys()
+        and not in_fmt.startswith("CUSTOM_LS")
+        and not in_fmt.startswith("MOZART")
+    ):
+        # extract positions from the loudspeaker file
+        in_fmt = fromtype(in_fmt)
+        tmp_fmt = fromtype("LS")
+
+        IR_tmp = IR.copy()
+        IR = np.zeros([IR_tmp.shape[0], IR_tmp.shape[1], in_fmt.num_channels])
+
+        ir_index = 0
+        for i in range(tmp_fmt.num_channels):
+            for j in range(in_fmt.num_channels):
+                if (
+                    tmp_fmt.ls_azi[i] == in_fmt.ls_azi[j]
+                    and tmp_fmt.ls_ele[i] == in_fmt.ls_ele[j]
+                ):
+                    if j != in_fmt.lfe_index[0]:
+                        IR[:, :, ir_index] = IR_tmp[:, :, i]
+                    ir_index += 1
+
+    return IR, SourcePosition, latency_smp
+
+
+def find_ir(
+    SourcePosition: np.ndarray,
+    azi: float,
+    ele: float,
+    num_filter: Optional[int] = None,
+) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Find HRTF measurement closest to the selected direction
+
+    Parameters
+    ----------
+    SourcePosition: np.ndarray
+        Source IR positions
+    azi: float
+        Desired response azimuth
+    ele: float
+        Desired response elevation
+    num_filter: Optional[int]
+        Number of filters to return, if None return all
+
+    Returns
+    -------
+    i_dir: np.ndarray
+        Indices of nearest SourcePositions
+    dist_sort: np.ndarray
+        Distances corresponding to the indices
+    """
+
+    dist = dist_on_sphere(SourcePosition, azi, ele)
+
+    if num_filter is None:
+        i_dir = np.argsort(dist)
+        dist_sort = np.sort(dist)
+    else:
+        i_dir = np.argsort(dist)[:num_filter]
+        dist_sort = np.sort(dist)[:num_filter]
+
+    return i_dir, dist_sort
+
+
+def dist_on_sphere(
+    positions: np.ndarray,
+    azi: float,
+    ele: float,
+) -> np.ndarray:
+    """
+    Compute great-circle distance
+
+    Parameters
+    ----------
+    positions: np.ndarray
+        Source IR positions
+    azi: float
+        Desired response azimuth
+    ele: float
+        Desired response elevation
+
+    Returns
+    -------
+    dist: np.ndarray
+        Distances from desired point
+    """
+
+    azi, ele = wrap_angles(azi, ele)
+
+    delta_azi = np.deg2rad(np.abs(azi - positions[:, 0]))
+
+    # compute great circle distance
+    a = np.sin(np.deg2rad(positions[:, 1])) * np.sin(np.deg2rad(ele)) + np.cos(
+        np.deg2rad(positions[:, 1])
+    ) * np.cos(np.deg2rad(ele)) * np.cos(delta_azi)
+    if np.max(a) > 1.001 or np.min(a) < -1.001:
+        raise ValueError(
+            f"Absolute distance value larger than one! Min: {np.min(a)}, Max: {np.max(a)}"
+        )
+
+    # limiting to prevent errors in arccos due to numerical inaccuracies
+    a[a > 1] = 1
+    a[a < -1] = -1
+    dist = np.arccos(a)
+
+    return dist
diff --git a/item_generation_scripts/audiotools/binauralobjectrenderer.py b/item_generation_scripts/audiotools/binauralobjectrenderer.py
new file mode 100644
index 00000000..548c4921
--- /dev/null
+++ b/item_generation_scripts/audiotools/binauralobjectrenderer.py
@@ -0,0 +1,652 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+import itertools
+from itertools import repeat
+from typing import Optional, Tuple
+
+import numpy as np
+from scipy.signal import convolve
+
+from item_generation_scripts.audiotools.binaural_datasets.binaural_dataset import (
+    find_ir,
+)
+from item_generation_scripts.audiotools.constants import IVAS_FRAME_LEN_MS
+from item_generation_scripts.audiotools.EFAP import wrap_angles
+from item_generation_scripts.utils import apply_func_parallel
+
+
+def barycentric_weights(
+    azi_deg: np.ndarray,
+    ele_deg: np.ndarray,
+    pos_in: np.ndarray,
+    interp_1d: Optional[bool] = False,
+) -> Tuple[int, int, int]:
+    """
+    Computation of spherical Barycentric weights
+    Implementation based on paper "Spherical Barycentric Coordinates"
+    from T. Langer, A. Belyaev und H. Seidel
+
+    Parameters
+    ----------
+    azi_deg: np.ndarray
+        Azimuthal coordinates of three points that form a triangle in degrees
+    ele_deg: np.ndarray
+        Elevation coordinates of three points that form a triangle in degrees
+    pos_in: np.ndarray
+        Azimuthal and elevation coordinates in degrees for point to compute weights
+    interp_1d: bool
+        1d interpolation between two points
+
+    Returns
+    -------
+    W_1, W_2, W_3: scalar values
+        Barycentric weights for corresponding vertices
+    """
+
+    # check if point is equal to vertex
+    for k in range(3):
+        if azi_deg[k] == pos_in[0] and ele_deg[k] == pos_in[1]:
+            output = np.zeros(3)
+            output[k] = 1
+            return tuple(output)
+
+    pos = np.copy(pos_in)
+
+    pos[0], pos[1] = wrap_angles(pos[0], pos[1])
+
+    # convert rad
+    ele = (
+        -np.deg2rad(ele_deg, dtype="float64") + np.pi / 2
+    )  # different definition of elevation in metadata
+    azi = np.deg2rad(azi_deg, dtype="float64")
+    pos[0] = np.deg2rad(pos[0])
+    pos[1] = -np.deg2rad(pos[1]) + np.pi / 2
+
+    """ spherical barycentric coordinates """
+
+    # convert to cartesian coordinates
+    x = np.sin(ele) * np.cos(azi)
+    y = np.sin(ele) * np.sin(azi)
+    z = np.cos(ele)
+    pos_x = np.sin(pos[1]) * np.cos(pos[0])
+    pos_y = np.sin(pos[1]) * np.sin(pos[0])
+    pos_z = np.cos(pos[1])
+
+    pos_cart = np.array([pos_x, pos_y, pos_z])
+    v_1 = np.array([x[0], y[0], z[0]])
+    v_2 = np.array([x[1], y[1], z[1]])
+    v_3 = np.array([x[2], y[2], z[2]])
+
+    # rotate coordinate system
+    unit = np.array([0, 0, 1])
+    a = np.cross(pos_cart, unit)
+    b = np.dot(pos_cart, unit)
+    a_matrix = np.array([[0, -a[2], a[1]], [a[2], 0, -a[0]], [-a[1], a[0], 0]])
+    if b == -1:
+        rot_matrix = np.eye(3, 3)  # a and b point to opposite directions
+    else:
+        rot_matrix = np.eye(3, 3) + a_matrix + np.dot(a_matrix, a_matrix) / (1 + b)
+
+    v_1 = rot_matrix @ v_1
+    v_2 = rot_matrix @ v_2
+    v_3 = rot_matrix @ v_3
+    # test_vec = rot_matrix @ pos_cart  # should be [0, 0, 1]
+
+    # scale verticies to tangent plane
+    v_1_plane = v_1 / v_1[2]
+    v_2_plane = v_2 / v_2[2]
+    v_3_plane = v_3 / v_3[2]
+    eps = 10**-10
+
+    # compute planar barycentric coordinates
+    denom = (v_2_plane[1] - v_3_plane[1]) * (v_1_plane[0] - v_3_plane[0]) + (
+        v_3_plane[0] - v_2_plane[0]
+    ) * (v_1_plane[1] - v_3_plane[1])
+    # denom is proportional to area of triangle -> when area is zero, use linear 1d interpolation
+    if abs(denom) <= 10**-15:
+        interp_1d = True
+
+    if not interp_1d:
+        W_1_plane = (
+            (v_2_plane[1] - v_3_plane[1]) * (0 - v_3_plane[0])
+            + (v_3_plane[0] - v_2_plane[0]) * (0 - v_3_plane[1])
+        ) / (denom + eps)
+        W_2_plane = (
+            (v_3_plane[1] - v_1_plane[1]) * (0 - v_3_plane[0])
+            + (v_1_plane[0] - v_3_plane[0]) * (0 - v_3_plane[1])
+        ) / (denom + eps)
+        W_3_plane = 1 - W_1_plane - W_2_plane
+    else:
+        v_diff = np.array(
+            [v_1_plane[:-1], v_2_plane[:-1], v_3_plane[:-1]]
+        )  # z entry always one
+        dist_all = np.linalg.norm(v_diff, axis=1)
+        v_diff_norm = np.divide(v_diff, dist_all[:, None])
+        dot_v_ind = np.array(
+            [[0, 1], [1, 2], [2, 0]]
+        )  # the three possible combinations of points
+        # compute dot product between all vertices to find pairs that lie in opposite directions w.r.t. the point
+        # in this case the dot product is -1 (due to normalization)
+        dot = np.empty(3)
+        k = 0
+        for ind_i, ind_j in dot_v_ind:
+            dot[k] = np.dot(v_diff_norm[ind_i], v_diff_norm[ind_j])
+            k += 1
+
+        margin = 10**-5
+        indices_minus_one = np.array(np.abs(dot + 1) < margin)
+        if indices_minus_one.any():  # test if one entry is -1
+            v_ind = dot_v_ind[indices_minus_one]
+            # use vertex pair with smalles distance from origin (current position)
+            if np.shape(v_ind)[0] >= 2:
+                used_vertices = v_ind[
+                    np.argmin(
+                        np.array([sum(dist_all[v_ind[0]]), sum(dist_all[v_ind[1]])])
+                    )
+                ]
+            else:
+                used_vertices = v_ind[0]
+            dist = dist_all[used_vertices[0]] / sum(dist_all[used_vertices])
+            if 0 in used_vertices and 1 in used_vertices:
+                W_1_plane = 1 - dist
+                W_2_plane = dist
+                W_3_plane = 0
+            elif 1 in used_vertices and 2 in used_vertices:
+                W_1_plane = 0
+                W_2_plane = 1 - dist
+                W_3_plane = dist
+            elif 2 in used_vertices and 0 in used_vertices:
+                W_1_plane = dist
+                W_2_plane = 0
+                W_3_plane = 1 - dist
+            else:
+                raise ValueError("problem in 1d interpolation")
+        else:
+            # point does not lie on line spanned by two of the points
+            W_1_plane = -1
+            W_2_plane = -1
+            W_3_plane = -1
+
+    # compute spherical weights from planar weights
+    W_1 = W_1_plane * np.dot(v_1, v_1_plane)
+    W_2 = W_2_plane * np.dot(v_2, v_2_plane)
+    W_3 = W_3_plane * np.dot(v_3, v_3_plane)
+
+    # avoid rejection of triangles due to numerical errors since point lies on edge of tiangle
+    threshold_error = -1 * 10**-8
+    if threshold_error < W_1 < 0:
+        W_1 = 0
+    if threshold_error < W_2 < 0:
+        W_2 = 0
+    if threshold_error < W_3 < 0:
+        W_3 = 0
+
+    return W_1, W_2, W_3
+
+
+def get_tri_weights(
+    pos: np.ndarray,
+    SourcePosition: np.ndarray,
+) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Finds suitable triangle of data points on surface in which the defined point lies
+
+    Parameters
+    ----------
+    pos: np.ndarray
+        Point of interest given as [azimutahal, elevation]
+    SourcePosition: np.ndarray
+        Positions of the source in the measurements in IR
+
+    Returns
+    -------
+    combination_vertices: np.ndarray
+        Indices of the three vertices in SourcePosition
+    W: np.ndarray
+        Barycentric weights of point in triangle;
+        if negative, no suitable triangle was found
+    """
+
+    W_1, W_2, W_3 = -1, -1, -1
+    index_triangle = 3
+    # get indices of source positions sorted by distance on the plane from pos
+    index_vertices, _ = find_ir(SourcePosition, pos[0], pos[1])
+    pos = np.array(wrap_angles(pos[0], pos[1]))
+    combination_vertices = None
+    while W_1 < 0 or W_2 < 0 or W_3 < 0:
+        if (
+            SourcePosition[index_vertices[0], 0] == pos[0]
+            and SourcePosition[index_vertices[0], 1] == pos[1]
+        ):
+            # if position is position in data set take first triangle that incudes the point
+            combination_vertices = index_vertices[:3]
+            W_1, W_2, W_3 = (1, 0, 0)
+            break
+        index_HRIR = index_vertices[:index_triangle]  # get nearest positions
+        y_ele_all = SourcePosition[index_HRIR, 1]
+        if pos[1] > np.max(y_ele_all) or pos[1] < np.min(y_ele_all):
+            # no need to compute weights since all possible triangles lie completely above or below point
+            # attention: this can be problematic if no point is available at [0, +-90]
+            pass
+        else:
+            # test all triangle combinations with new point
+            for combination_vertices_tmp in itertools.combinations(index_HRIR[:-1], 2):
+                combination_vertices = np.concatenate(
+                    (index_HRIR[-1, None], combination_vertices_tmp), axis=0
+                )
+
+                x_azi = SourcePosition[combination_vertices, 0]
+                y_ele = SourcePosition[combination_vertices, 1]
+                W_1, W_2, W_3 = barycentric_weights(x_azi, y_ele, pos)
+                if W_1 >= 0 and W_2 >= 0 and W_3 >= 0:
+                    # found suitable triangle
+                    break
+        index_triangle += 1
+        if index_triangle > 30:
+            # stop after too many iterations
+            return np.array(combination_vertices), np.array([-1, -1, -1])
+
+    W = np.array([W_1, W_2, W_3])
+    return np.array(combination_vertices), W
+
+
+def interpolate_2d(
+    azi_in: np.ndarray,
+    ele_in: np.ndarray,
+    values: np.ndarray,
+    pos: np.ndarray,
+    interp_1d: Optional[bool] = False,
+    weights: Optional[np.ndarray] = None,
+    ghost: Optional[list[bool]] = None,
+    SourcePosition: Optional[np.ndarray] = None,
+    IR: Optional[np.ndarray] = None,
+    phase: Optional[bool] = False,
+) -> np.ndarray:
+    """
+    Compute HRIR for point on surface spanned by three points via barycentric coordinates
+
+    Parameters
+    ----------
+    azi_in: np.ndarray
+        Azimuthal coordinates of three points that form a triangle in degrees
+    ele_in: np.ndarray
+        Elevation coordinates of three points that form a triangle in degrees
+    values: np.ndarray
+        Values to interpolate, here either HRIRs or magnitude or phase of HRTFs
+    pos: np.ndarray
+        Position of desired interpolation value
+    interp_1d: bool
+        1d interpolation between two points
+    weights: tuple
+        If barycentric weights are already known these values are used
+    ghost: list of bool
+        If north and/or south pole is ghost source
+    SourcePosition: np.ndarray
+        Only necessary if at least one element in ghost is true
+    IR: np.ndarray
+        Only necessary if at least one element in ghost is true
+    phase: bool
+        If interpolated values are phases and should be wrapped
+
+    Returns
+    -------
+    HRIR: np.ndarray
+        Interpolated value at point pos
+    """
+
+    if ghost is None:
+        ghost = [False, False]
+
+    if weights is None:
+        W_1, W_2, W_3 = barycentric_weights(
+            azi_in, ele_in, pos, interp_1d
+        )  # compute barycentric weights
+    else:
+        (W_1, W_2, W_3) = weights
+
+    if (
+        W_1 + W_2 + W_3 > 1.5
+    ):  # on sphere sum of weights is not necessarily equal to one!
+        raise ValueError(
+            f"Sum of positive barycentric weights larger than expected: {W_1 +W_2 +W_3}"
+        )
+
+    threshold_error = -1 * 10**-10
+    if W_1 < threshold_error or W_2 < threshold_error or W_3 < threshold_error:
+        raise ValueError("Point lies outside of triangle! No interpolation possible")
+
+    # do some phase unwrapping
+    if phase:
+        values = np.unwrap(values, axis=1)
+
+    # treat potential ghost sources at the north and south pole
+    if (ghost[0] and 90 in ele_in) or (ghost[1] and -90 in ele_in):
+        if SourcePosition is None or IR is None:
+            raise ValueError(
+                "Source positions and IRs are required in interpolation if ghost source is used"
+            )
+        ele_ghost = []
+        additional_term = 0
+        weights_copy = np.copy(weights)
+        if ghost[0] and 90 in ele_in:
+            ele_ghost.append(90)
+        if ghost[1] and -90 in ele_in:
+            ele_ghost.append(-90)
+        for ele_g in ele_ghost:
+            ind_dist, dist = find_ir(SourcePosition[: -len(ele_ghost)], 0, ele_g)
+            ind_dist = ind_dist[dist == dist[0]]
+            weight_spread = weights_copy[ele_in == ele_g] / len(ind_dist)
+            weights_copy[ele_in == ele_g] = 0
+            additional_term += np.sum(IR[:, ind_dist], axis=1) * weight_spread
+
+        HRIR = (
+            values[:, 0] * W_1
+            + values[:, 1] * W_2
+            + values[:, 2] * W_3
+            + additional_term
+        )
+
+    else:
+        HRIR = (
+            values[:, 0] * W_1 + values[:, 1] * W_2 + values[:, 2] * W_3
+        )  # apply weights
+
+    return HRIR
+
+
+def add_ghost_speaker_bary(
+    SourcePosition: np.ndarray,
+    IR: np.ndarray,
+) -> Tuple[list[bool], np.ndarray, np.ndarray]:
+    """
+    Adds a ghost speaker at the poles if necessary and indicates result by bool values
+
+    Parameters
+    ----------
+    SourcePosition: np.ndarray
+        All source positions
+    IR: np.ndarray
+        IRs at corresponding source positions
+
+    Returns
+    -------
+    ghost_pos: list of bool
+        If entry is True a ghost speaker is introduced at the north or south pole, respectively
+    SourcePosition: np.ndarray
+        All source positions plus poles if ghost_pos is True
+    IR: np.ndarray
+        IRs at corresponding source positions
+    """
+
+    ghost_pos = [False, False]
+    if 90 not in SourcePosition[:, 1]:
+        # if north pole is not in dataset add it
+        ghost_pos[0] = True
+        pole = np.array([0, 90, 1])
+        SourcePosition = np.concatenate((SourcePosition, pole[None, :]), axis=0)
+        IR = np.concatenate((IR, np.zeros((*np.shape(IR)[:2], 1))), axis=2)
+    if -90 not in SourcePosition[:, 1]:
+        # if south pole is not in dataset add it
+        ghost_pos[1] = True
+        pole = np.array([0, -90, 1])
+        SourcePosition = np.concatenate((SourcePosition, pole[None, :]), axis=0)
+        IR = np.concatenate((IR, np.zeros((*np.shape(IR)[:2], 1))), axis=2)
+
+    return ghost_pos, SourcePosition, IR
+
+
+def binaural_fftconv_framewise(
+    x: np.ndarray,
+    IR: np.ndarray,
+    SourcePosition: np.ndarray,
+    azi: Optional[np.ndarray] = None,
+    ele: Optional[np.ndarray] = None,
+    frame_len: Optional[int] = (IVAS_FRAME_LEN_MS // 4) * 48,
+) -> np.ndarray:
+    """
+    Binauralization using fft convolution with frame-wise processing
+    supports rotation on trajectories with interpolation between measured Source
+    positions, reimplemented roughly along the lines of ConvBinauralRenderer.m
+
+    Parameters
+    ----------
+    x: np.ndarray
+        Input multi-channel array
+    IR: np.ndarray
+        HRIRs array
+    SourcePosition: np.ndarray
+        Positions of the source in the measurements in IR
+    azi: np.ndarray
+        Azimuth angles for all frames
+    ele: np.ndarray
+        Elevation angles for all frames
+    frame_len: int
+        Frame length, optional, default = (IVAS_FRAME_LEN_MS // 4) * 48000
+
+    Returns
+    -------
+    y: np.ndarray
+        Output binaural signal array
+    """
+
+    sig_len = x.shape[0]
+    N_frames = int(
+        sig_len / frame_len
+    )  # TODO add ceil function for non-integer frame length multiples
+    num_points_interp = 3  # interpolation in triangle
+
+    N_HRIR_taps = IR.shape[0]
+
+    if azi is None or ele is None:
+        azi = np.repeat([0.0], N_frames)
+        ele = np.repeat([0.0], N_frames)
+    elif len(azi) < N_frames or len(ele) < N_frames:
+        azi = np.concatenate(
+            [np.repeat(azi, N_frames // len(azi)), azi[: N_frames % len(azi)]]
+        )
+        ele = np.concatenate(
+            [np.repeat(ele, N_frames // len(ele)), ele[: N_frames % len(ele)]]
+        )
+
+    indices_HRIR = np.empty([N_frames, num_points_interp], dtype=int)
+    IR_2d = np.empty((N_frames, N_HRIR_taps, 2, num_points_interp))
+    Bary_weights = np.empty((N_frames, 3))
+
+    # find three points to form a triangle for interpolation
+    # test if point lies within triangle spanned by these points by checking the signas of barycentric coordinates
+    # if all weights are >= 0 the point lies within the triangle
+    for index in range(np.shape(SourcePosition)[0]):
+        SourcePosition[index, 0:2] = np.array(
+            wrap_angles(SourcePosition[index, 0], SourcePosition[index, 1])
+        )
+
+    # add ghost speaker to poles if necessary
+    ghost_pos, SourcePosition, IR = add_ghost_speaker_bary(SourcePosition, IR)
+    for i_frame in range(N_frames):
+        if (
+            i_frame
+            and azi[i_frame] == azi[i_frame - 1]
+            and ele[i_frame] == ele[i_frame - 1]
+        ):
+            IR_2d[i_frame] = IR_2d[i_frame - 1]
+            indices_HRIR[i_frame] = indices_HRIR[i_frame - 1]
+            Bary_weights[i_frame] = Bary_weights[i_frame - 1]
+            continue
+        pos = np.array([azi[i_frame], ele[i_frame]])
+        combination_vertices, W = get_tri_weights(pos, SourcePosition)
+        if (W < 0).all():
+            raise ValueError("No suitable triangle found in frame " + str(i_frame))
+        IR_2d[i_frame] = IR[:, :, np.array(combination_vertices)]
+        indices_HRIR[i_frame] = combination_vertices
+        Bary_weights[i_frame] = W
+
+    T_rev = frame_len + N_HRIR_taps - 1
+    N_rev = int(np.ceil(T_rev / frame_len))
+
+    fade_in = np.arange(frame_len) / (frame_len - 1)
+    fade_in = fade_in[:, np.newaxis]
+    fade_out = 1.0 - fade_in
+
+    # compute both ears in parallel
+    i_ear = list(range(2))
+    result = apply_func_parallel(
+        render_ear,
+        zip(
+            i_ear,
+            repeat(frame_len),
+            repeat(N_frames),
+            repeat(N_rev),
+            repeat(T_rev),
+            repeat(fade_in),
+            repeat(fade_out),
+            repeat(x),
+            repeat(sig_len),
+            repeat(N_HRIR_taps),
+            repeat(azi),
+            repeat(ele),
+            repeat(SourcePosition),
+            repeat(IR_2d),
+            repeat(Bary_weights),
+            repeat(ghost_pos),
+            repeat(IR),
+            repeat(indices_HRIR),
+        ),
+        None,
+        "mp",
+        False,
+    )
+
+    y = np.stack(result, axis=1)
+
+    return y[0:sig_len]
+
+
+def render_ear(
+    i_ear,
+    frame_len,
+    N_frames,
+    N_rev,
+    T_rev,
+    fade_in,
+    fade_out,
+    x,
+    sig_len,
+    N_HRIR_taps,
+    azi,
+    ele,
+    SourcePosition,
+    IR_2d,
+    Bary_weights,
+    ghost_pos,
+    IR,
+    indices_HRIR,
+) -> np.ndarray:
+    # function to process one ear used in multiprocessing
+    G = np.empty((N_frames, N_HRIR_taps))
+
+    for frame in range(N_frames):
+        pos = np.array([azi[frame], ele[frame]])
+        # Interpolation of time-domain signals
+        G[frame] = interpolate_2d(
+            SourcePosition[indices_HRIR[frame], 0],
+            SourcePosition[indices_HRIR[frame], 1],
+            IR_2d[frame, :, i_ear],
+            pos,
+            weights=Bary_weights[frame],
+            ghost=ghost_pos,
+            SourcePosition=SourcePosition,
+            IR=IR[:, i_ear],
+        )
+
+    # frame wise parallel computation slow (many frames, small computational load per frame)
+    i_frame = list(range(N_frames))
+    result = apply_func_parallel(
+        convolve_frame,
+        zip(
+            i_frame,
+            repeat(frame_len),
+            repeat(N_frames),
+            repeat(N_rev),
+            repeat(T_rev),
+            repeat(i_ear),
+            repeat(fade_in),
+            repeat(fade_out),
+            repeat(G),
+            repeat(x),
+            repeat(sig_len),
+            repeat(N_HRIR_taps),
+        ),
+        None,
+        "mt",
+        False,
+    )
+
+    return np.hstack(result)
+
+
+def convolve_frame(
+    i_frame,
+    frame_len,
+    N_frames,
+    N_rev,
+    T_rev,
+    i_ear,
+    fade_in,
+    fade_out,
+    G,
+    x,
+    sig_len,
+    N_HRIR_taps,
+) -> np.ndarray:
+    # function to process one frame used in multiprocessing
+    i1 = i_frame * frame_len
+    i2 = (i_frame + 1) * frame_len
+
+    y0 = np.zeros([2, sig_len + N_HRIR_taps - 1, 2])
+
+    G0 = G[i_frame]
+    G1 = G[min(i_frame + 1, N_frames - 1)]
+
+    for j_frame in range(max(0, i_frame - N_rev), min(i_frame + 1, N_frames)):
+        j1 = j_frame * frame_len
+        j2 = (j_frame + 1) * frame_len
+        j2p = j1 + T_rev
+
+        y0[0, j1:j2p, i_ear] += convolve(np.squeeze(x[j1:j2]), G0)
+        y0[1, j1:j2p, i_ear] += convolve(np.squeeze(x[j1:j2]), G1)
+
+    y_frame = (
+        np.squeeze(fade_out) * y0[0, i1:i2, i_ear]
+        + np.squeeze(fade_in) * y0[1, i1:i2, i_ear]
+    )
+    return y_frame
diff --git a/item_generation_scripts/audiotools/constants.py b/item_generation_scripts/audiotools/constants.py
new file mode 100644
index 00000000..c3af9d29
--- /dev/null
+++ b/item_generation_scripts/audiotools/constants.py
@@ -0,0 +1,704 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+import numpy as np
+
+BINAURAL_AUDIO_FORMATS = {
+    "BINAURAL": {
+        "num_channels": 2,
+    },
+    "BINAURAL_ROOM": {
+        "num_channels": 2,
+    },
+}
+
+BINAURAL_LFE_GAIN = 10 ** (5.5 / 20)
+
+LFE_INDEX_DEFAULT = 3
+
+LS_AZI_MONO = [0]
+LS_ELE_MONO = [0]
+
+LS_AZI_STEREO = [30, -30]
+LS_ELE_STEREO = [0, 0]
+
+LS_AZI_CICP6 = [30, -30, 0, 0, 110, -110]
+LS_ELE_CICP6 = [0, 0, 0, 0, 0, 0]
+
+LS_AZI_CICP12 = [30, -30, 0, 0, 110, -110, 135, -135]
+LS_ELE_CICP12 = [0, 0, 0, 0, 0, 0, 0, 0]
+
+LS_AZI_CICP14 = [30, -30, 0, 0, 110, -110, 30, -30]
+LS_ELE_CICP14 = [0, 0, 0, 0, 0, 0, 35, 35]
+
+LS_AZI_CICP16 = [30, -30, 0, 0, 110, -110, 30, -30, 110, -110]
+LS_ELE_CICP16 = [0, 0, 0, 0, 0, 0, 35, 35, 35, 35]
+
+LS_AZI_CICP19 = [30, -30, 0, 0, 135, -135, 90, -90, 30, -30, 135, -135]
+LS_ELE_CICP19 = [0, 0, 0, 0, 0, 0, 0, 0, 35, 35, 35, 35]
+
+
+CHANNEL_BASED_AUDIO_FORMATS = {
+    "MONO": {
+        "num_channels": 1,
+        "ls_azi": LS_AZI_MONO,
+        "ls_ele": LS_ELE_MONO,
+        "lfe_index": [],
+    },
+    "STEREO": {
+        "num_channels": 2,
+        "ls_azi": LS_AZI_STEREO,
+        "ls_ele": LS_ELE_STEREO,
+        "lfe_index": [],
+    },
+    "5_1": {
+        "num_channels": 6,
+        "ls_azi": LS_AZI_CICP6,
+        "ls_ele": LS_ELE_CICP6,
+        "lfe_index": [LFE_INDEX_DEFAULT],
+    },
+    "5_1_2": {
+        "num_channels": 8,
+        "ls_azi": LS_AZI_CICP14,
+        "ls_ele": LS_ELE_CICP14,
+        "lfe_index": [LFE_INDEX_DEFAULT],
+    },
+    "5_1_4": {
+        "num_channels": 10,
+        "ls_azi": LS_AZI_CICP16,
+        "ls_ele": LS_ELE_CICP16,
+        "lfe_index": [LFE_INDEX_DEFAULT],
+    },
+    "7_1": {
+        "num_channels": 8,
+        "ls_azi": LS_AZI_CICP12,
+        "ls_ele": LS_ELE_CICP12,
+        "lfe_index": [LFE_INDEX_DEFAULT],
+    },
+    "7_1_4": {
+        "num_channels": 12,
+        "ls_azi": LS_AZI_CICP19,
+        "ls_ele": LS_ELE_CICP19,
+        "lfe_index": [LFE_INDEX_DEFAULT],
+    },
+    "LS": {
+        "num_channels": 15,
+        "ls_azi": [
+            30,
+            -30,
+            0,
+            135,
+            -135,
+            110,
+            -110,
+            90,
+            -90,
+            30,
+            -30,
+            110,
+            -110,
+            135,
+            -135,
+        ],
+        "ls_ele": [0, 0, 0, 0, 0, 0, 0, 0, 0, 35, 35, 35, 35, 35, 35],
+        "lfe_index": [],
+    },
+    "MOZART": {
+        "num_channels": 30,
+        "ls_azi": [
+            0,
+            0,
+            135,
+            -135,
+            30,
+            -30,
+            180,
+            0,
+            90,
+            -90,
+            45,
+            -45,
+            0,
+            0,
+            135,
+            -135,
+            90,
+            -90,
+            180,
+            0,
+            45,
+            -45,
+            60,
+            -60,
+            110,
+            -110,
+            30,
+            -30,
+            110,
+            -110,
+        ],
+        "ls_ele": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            35,
+            35,
+            35,
+            90,
+            35,
+            35,
+            35,
+            35,
+            35,
+            -15,
+            -15,
+            -15,
+            0,
+            0,
+            0,
+            0,
+            35,
+            35,
+            35,
+            35,
+        ],
+        "lfe_index": [1, 7],
+    },
+    "CUSTOM_LS": {
+        "num_channels": -1,
+        "ls_azi": None,
+        "ls_ele": None,
+        "lfe_index": None,
+    },
+}
+
+# Support a variety of names for multichannel configs
+CHANNEL_BASED_AUDIO_ALTNAMES = {
+    # 5_1
+    51: "5_1",  # YAML by default will interpret underscore delimited numbers as integers, similar to python
+    "5d1": "5_1",
+    "5.1": "5_1",
+    "CICP6": "5_1",
+    # 7_1
+    71: "7_1",
+    "7d1": "7_1",
+    "7.1": "7_1",
+    "CICP12": "7_1",
+    # 5_1_2
+    512: "5_1_2",
+    "5d1p2": "5_1_2",
+    "5.1+2": "5_1_2",
+    "5.1.2": "5_1_2",
+    "CICP14": "5_1_2",
+    # 5_1_4
+    514: "5_1_4",
+    "5d1p4": "5_1_4",
+    "5.1+4": "5_1_4",
+    "5.1.4": "5_1_4",
+    "CICP16": "5_1_4",
+    # 7_1_4
+    714: "7_1_4",
+    "7d1p4": "7_1_4",
+    "7.1+4": "7_1_4",
+    "7.1.4": "7_1_4",
+    "CICP19": "7_1_4",
+}
+
+METADATA_ASSISTED_SPATIAL_AUDIO_FORMATS = {
+    "MASA1": {
+        "num_channels": 1,
+    },
+    "MASA2": {
+        "num_channels": 2,
+    },
+}
+OBJECT_BASED_AUDIO_FORMATS = {
+    "ISM1": {
+        "num_channels": 1,
+    },
+    "ISM2": {
+        "num_channels": 2,
+    },
+    "ISM3": {
+        "num_channels": 3,
+    },
+    "ISM4": {
+        "num_channels": 4,
+    },
+}
+
+
+SCENE_BASED_AUDIO_FORMATS = {
+    "FOA": {
+        "num_channels": 4,
+        "is_planar": False,
+    },
+    "HOA2": {
+        "num_channels": 9,
+        "is_planar": False,
+    },
+    "HOA3": {
+        "num_channels": 16,
+        "is_planar": False,
+    },
+    "PLANARFOA": {
+        "num_channels": 4,
+        "is_planar": True,
+    },
+    "PLANARHOA2": {
+        "num_channels": 9,
+        "is_planar": True,
+    },
+    "PLANARHOA3": {
+        "num_channels": 16,
+        "is_planar": True,
+    },
+    "SBA1": {
+        "num_channels": 4,
+        "is_planar": False,
+    },
+    "SBA2": {
+        "num_channels": 9,
+        "is_planar": False,
+    },
+    "SBA3": {
+        "num_channels": 16,
+        "is_planar": False,
+    },
+}
+
+SCENE_METADATA_FORMATS = {"META"}
+
+AUDIO_FORMATS = [
+    BINAURAL_AUDIO_FORMATS,
+    CHANNEL_BASED_AUDIO_FORMATS,
+    METADATA_ASSISTED_SPATIAL_AUDIO_FORMATS,
+    OBJECT_BASED_AUDIO_FORMATS,
+    SCENE_BASED_AUDIO_FORMATS,
+]
+
+
+IVAS_FRAME_LEN_MS = 20
+
+IVAS_CICPX_TO_MONO = np.array(
+    [
+        [
+            1,
+            1,
+            1,
+            1,
+            0.79999995,
+            0.79999995,
+            0.79999995,
+            0.79999995,
+            0.849999964,
+            0.849999964,
+            0.849999964,
+            0.849999964,
+        ]
+    ]
+).T
+
+IVAS_CICPX_TO_STEREO = np.array(
+    [
+        [1, 0],
+        [0, 1],
+        [np.sqrt(0.5), np.sqrt(0.5)],
+        [np.sqrt(0.5), np.sqrt(0.5)],
+        [0.79999995, 0],
+        [0, 0.79999995],
+        [0.79999995, 0],
+        [0, 0.79999995],
+        [0.849999964, 0],
+        [0, 0.849999964],
+        [0.849999964, 0],
+        [0, 0.849999964],
+    ]
+)
+
+# downmix matrices
+IVAS_CICP12_TO_6 = np.zeros(8 * 6)
+IVAS_CICP12_TO_6[[0, 7, 14, 21, 28, 35, 40, 47]] = 1
+IVAS_CICP12_TO_6 = IVAS_CICP12_TO_6.reshape(8, 6)
+
+IVAS_CICP14_TO_6 = np.zeros(8 * 6)
+IVAS_CICP14_TO_6[[0, 7, 14, 21, 28, 35]] = 1
+IVAS_CICP14_TO_6[[36, 43]] = 0.849999964
+IVAS_CICP14_TO_6 = IVAS_CICP14_TO_6.reshape(8, 6)
+
+IVAS_CICP16_TO_6 = np.zeros(10 * 6)
+IVAS_CICP16_TO_6[[0, 7, 14, 21, 28, 35]] = 1
+IVAS_CICP16_TO_6[[36, 43, 52, 59]] = 0.849999964
+IVAS_CICP16_TO_6 = IVAS_CICP16_TO_6.reshape(10, 6)
+
+IVAS_CICP16_TO_12 = np.zeros(10 * 8)
+IVAS_CICP16_TO_12[[0, 9, 18, 27, 36, 45]] = 1
+IVAS_CICP16_TO_12[[48, 57, 68, 77]] = 0.849999964
+IVAS_CICP16_TO_12 = IVAS_CICP16_TO_12.reshape(10, 8)
+
+IVAS_CICP16_TO_14 = np.zeros(10 * 8)
+IVAS_CICP16_TO_14[[0, 9, 18, 27, 36, 45, 54, 63]] = 1
+IVAS_CICP16_TO_14[[68, 77]] = 0.849999964
+IVAS_CICP16_TO_14 = IVAS_CICP16_TO_14.reshape(10, 8)
+
+IVAS_CICP19_TO_6 = np.zeros(12 * 6)
+IVAS_CICP19_TO_6[[0, 7, 14, 21, 28, 35]] = 1
+IVAS_CICP19_TO_6[[36, 43]] = 0.367322683
+IVAS_CICP19_TO_6[[48, 55, 64, 71]] = 0.849999964
+IVAS_CICP19_TO_6[[40, 47]] = 0.930093586
+IVAS_CICP19_TO_6 = IVAS_CICP19_TO_6.reshape(12, 6)
+
+IVAS_CICP19_TO_12 = np.zeros(12 * 8)
+IVAS_CICP19_TO_12[[0, 9, 18, 27, 38, 47]] = 1
+IVAS_CICP19_TO_12[[48, 57]] = 0.367322683
+IVAS_CICP19_TO_12[[64, 73, 84, 93]] = 0.849999964
+IVAS_CICP19_TO_12[[52, 61]] = 0.930093586
+IVAS_CICP19_TO_12 = IVAS_CICP19_TO_12.reshape(12, 8)
+
+IVAS_CICP19_TO_14 = np.zeros(12 * 8)
+IVAS_CICP19_TO_14[[0, 9, 18, 27, 36, 45, 70, 79]] = 1
+IVAS_CICP19_TO_14[[48, 57]] = 0.367322683
+IVAS_CICP19_TO_14[[84, 93]] = 0.849999964
+IVAS_CICP19_TO_14[[52, 61]] = 0.930093586
+IVAS_CICP19_TO_14 = IVAS_CICP19_TO_14.reshape(12, 8)
+
+IVAS_CICP19_TO_16 = np.zeros(12 * 10)
+IVAS_CICP19_TO_16[[0, 11, 22, 33, 44, 55, 86, 97, 108, 119]] = 1
+IVAS_CICP19_TO_16[[60, 71]] = 0.367322683
+IVAS_CICP19_TO_16[[64, 75]] = 0.930093586
+IVAS_CICP19_TO_16 = IVAS_CICP19_TO_16.reshape(12, 10)
+
+# upmix matrices
+IVAS_MONO_TO_CICPX = np.zeros([1, 12])
+IVAS_MONO_TO_CICPX[0, 2] = 1
+
+IVAS_STEREO_TO_CICPX = np.zeros([2, 12])
+IVAS_STEREO_TO_CICPX[0, 0] = 1
+IVAS_STEREO_TO_CICPX[1, 1] = 1
+
+IVAS_CICP12_TO_14 = np.zeros(8 * 8)
+IVAS_CICP12_TO_14[[0, 9, 18, 27, 36, 45, 52, 61]] = 1
+IVAS_CICP12_TO_14 = IVAS_CICP12_TO_14.reshape(8, 8)
+
+IVAS_CICP12_TO_16 = np.zeros(8 * 10)
+IVAS_CICP12_TO_16[[0, 11, 22, 33, 44, 55, 64, 75]] = 1
+IVAS_CICP12_TO_16 = IVAS_CICP12_TO_16.reshape(8, 10)
+
+IVAS_CICP12_TO_19 = np.zeros(8 * 12)
+IVAS_CICP12_TO_19[[0, 13, 26, 39, 54, 67, 76, 89]] = 1
+IVAS_CICP12_TO_19 = IVAS_CICP12_TO_19.reshape(8, 12)
+
+IVAS_CICP14_TO_19 = np.zeros(8 * 12)
+IVAS_CICP14_TO_19[[0, 13, 26, 39, 52, 65, 80, 93]] = 1
+IVAS_CICP14_TO_19 = IVAS_CICP14_TO_19.reshape(8, 12)
+
+IVAS_CICP16_TO_19 = np.zeros(10 * 12)
+IVAS_CICP16_TO_19[[0, 13, 26, 39, 52, 65, 80, 93, 106, 119]] = 1
+IVAS_CICP16_TO_19 = IVAS_CICP16_TO_19.reshape(10, 12)
+
+# mapping dict
+IVAS_MC_CONVERSION = {
+    "MONO": {
+        # upmix
+        "5_1": IVAS_MONO_TO_CICPX[:, :6],
+        "7_1": IVAS_MONO_TO_CICPX[:, :8],
+        "5_1_2": IVAS_MONO_TO_CICPX[:, :8],
+        "5_1_4": IVAS_MONO_TO_CICPX[:, :10],
+        "7_1_4": IVAS_MONO_TO_CICPX[:, :12],
+    },
+    "STEREO": {
+        # upmix
+        "5_1": IVAS_STEREO_TO_CICPX[:, :6],
+        "7_1": IVAS_STEREO_TO_CICPX[:, :8],
+        "5_1_2": IVAS_STEREO_TO_CICPX[:, :8],
+        "5_1_4": IVAS_STEREO_TO_CICPX[:, :10],
+        "7_1_4": IVAS_STEREO_TO_CICPX[:, :12],
+    },
+    "5_1": {
+        # downmix
+        "MONO": IVAS_CICPX_TO_MONO[:6, :],
+        "STEREO": IVAS_CICPX_TO_STEREO[:6, :],
+        # upmix
+        "7_1": np.pad(np.eye(6), [[0, 0], [0, 2]]),
+        "5_1_2": np.pad(np.eye(6), [[0, 0], [0, 2]]),
+        "5_1_4": np.pad(np.eye(6), [[0, 0], [0, 4]]),
+        "7_1_4": np.pad(np.eye(6), [[0, 0], [0, 6]]),
+    },
+    "7_1": {
+        # downmix
+        "MONO": IVAS_CICPX_TO_MONO[:8, :],
+        "STEREO": IVAS_CICPX_TO_STEREO[:8, :],
+        "5_1": IVAS_CICP12_TO_6,
+        # upmix
+        "5_1_2": IVAS_CICP12_TO_14,
+        "5_1_4": IVAS_CICP12_TO_16,
+        "7_1_4": IVAS_CICP12_TO_19,
+    },
+    "5_1_2": {
+        # downmix
+        "MONO": np.vstack([IVAS_CICPX_TO_MONO[:6, :], IVAS_CICPX_TO_MONO[-2:, :]]),
+        "STEREO": np.vstack(
+            [IVAS_CICPX_TO_STEREO[:6, :], IVAS_CICPX_TO_STEREO[-2:, :]]
+        ),
+        "5_1": IVAS_CICP14_TO_6,
+        "7_1": np.pad(IVAS_CICP14_TO_6, [[0, 0], [0, 2]]),
+        # upmix
+        "5_1_4": np.pad(np.eye(8), [[0, 0], [0, 2]]),
+        "7_1_4": IVAS_CICP14_TO_19,
+    },
+    "5_1_4": {
+        # downmix
+        "MONO": np.vstack([IVAS_CICPX_TO_MONO[:6, :], IVAS_CICPX_TO_MONO[-4:, :]]),
+        "STEREO": np.vstack(
+            [IVAS_CICPX_TO_STEREO[:6, :], IVAS_CICPX_TO_STEREO[-4:, :]]
+        ),
+        "5_1": IVAS_CICP16_TO_6,
+        "7_1": IVAS_CICP16_TO_12,
+        "5_1_2": IVAS_CICP16_TO_14,
+        # upmix
+        "7_1_4": IVAS_CICP16_TO_19,
+    },
+    "7_1_4": {
+        # downmix
+        "MONO": IVAS_CICPX_TO_MONO,
+        "STEREO": IVAS_CICPX_TO_STEREO,
+        "5_1": IVAS_CICP19_TO_6,
+        "7_1": IVAS_CICP19_TO_12,
+        "5_1_2": IVAS_CICP19_TO_14,
+        "5_1_4": IVAS_CICP19_TO_16,
+    },
+}
+
+# LFE 120 Hz LPF filter coefficients
+IVAS_LPF_4_BUTTER_48K_SOS = np.array(
+    [
+        [
+            5.12617881476274e-09,
+            1.02523584294987e-08,
+            5.12617879059970e-09,
+            1,
+            -1.96875982668433,
+            0.969044914826862,
+        ],
+        [
+            1,
+            1.99999984394358,
+            1.00000000471366,
+            1,
+            -1.98677297369091,
+            0.987060670205863,
+        ],
+    ]
+)
+
+T_DESIGN_11_AZI = np.array(
+    [
+        132.927291884332,
+        -83.9349499672527,
+        8.47410038634525,
+        -113.340833834572,
+        -103.265909909537,
+        -33.2370360923825,
+        21.8564347471830,
+        -156.539486489880,
+        -64.2647531387317,
+        165.779530068738,
+        -25.2028339893249,
+        -97.0037973959711,
+        27.8546391256925,
+        153.214218975132,
+        -155.061608694663,
+        -11.8421354925543,
+        80.5387312016125,
+        -42.0561606270165,
+        -31.2233262205060,
+        38.8379041944063,
+        93.7606877469492,
+        -84.7560200078398,
+        7.75536818082863,
+        -122.276883381108,
+        46.8012705252113,
+        -24.7686335284573,
+        99.8904719062334,
+        -134.783996960185,
+        -83.0880230164493,
+        60.1281736000420,
+        152.644656278084,
+        29.7576658909417,
+        40.7793187974476,
+        110.183927562412,
+        165.652065916454,
+        -12.9926632105736,
+        79.7359893585681,
+        -50.5245271190884,
+        118.923930267733,
+        47.2202861862577,
+        171.925276523721,
+        -62.5145800558502,
+        -11.1156697680531,
+        132.018041099963,
+        -135.355486412425,
+        102.370921576708,
+        112.739282398012,
+        -178.304963670831,
+        -122.319932198534,
+        59.0763464570905,
+        151.704200334501,
+        21.3763364190503,
+        -169.005476417779,
+        118.980811786769,
+        -116.089295979010,
+        9.64767870353308,
+        60.8933243657771,
+        -156.021526862757,
+        -63.4602993325163,
+        174.929787427393,
+        -175.288768596346,
+        -105.951907934032,
+        -50.1928304519800,
+        131.358266702971,
+        -136.296815007542,
+        93.5644603506407,
+        -97.0840116473627,
+        -169.158278888619,
+        -44.1323835471345,
+        81.4795403841382,
+    ]
+)
+
+T_DESIGN_11_ELE = np.array(
+    [
+        7.69254738757899,
+        -23.7300652200871,
+        23.5127556185301,
+        70.4225940747938,
+        -9.89694439538752,
+        -70.7513316063095,
+        -26.4618527647561,
+        47.7764936689044,
+        -7.72047049524459,
+        44.5343602375216,
+        26.3897904767450,
+        -44.6578850137166,
+        9.76703456924600,
+        -47.7053318175498,
+        7.45302934155972,
+        -23.5901209534773,
+        23.7194484034707,
+        70.4382693912270,
+        -9.83541588740259,
+        -70.4980825105727,
+        -26.2949218109204,
+        47.6148028805222,
+        -7.51718499746626,
+        44.2862347125773,
+        26.6442619674660,
+        -44.5693707254340,
+        9.91271928508000,
+        -47.9599550372574,
+        7.29679922953795,
+        -23.3445981426306,
+        23.6415261666079,
+        70.6843143997832,
+        -9.58140351749889,
+        -70.3934534122902,
+        -26.4258159091605,
+        47.7510668062369,
+        -7.30853603036844,
+        44.2632768570349,
+        26.7140614474957,
+        -44.3149733480527,
+        9.75899721561506,
+        -48.0361913333593,
+        7.43965099805872,
+        -23.3326075548841,
+        23.3868959687598,
+        70.8219078016791,
+        -9.48596399169388,
+        -70.5801867828491,
+        -26.6740262349265,
+        47.9978414043199,
+        -7.38276167631068,
+        44.4970603752708,
+        26.5024990214418,
+        -44.2461913308458,
+        9.51845076548334,
+        -47.8281351088411,
+        7.68427447425834,
+        -23.5706842106942,
+        23.3074499244045,
+        70.6586472132300,
+        -9.68088860263008,
+        -70.8026785673948,
+        -26.6963451935976,
+        48.0136296461397,
+        -7.63734823159200,
+        44.6651234222196,
+        26.3023490002159,
+        -44.4576351865647,
+        9.52341455917443,
+        -47.6242211091394,
+    ]
+)
+PLANAR_HOA_CHANNELS_ACN = np.array([0, 1, 3, 4, 8, 9, 15])
+VERT_HOA_CHANNELS_ACN = np.array([2, 5, 6, 7, 10, 11, 12, 13, 14])
+
+SEED_PADDING = 0
+
+# delay in number of samples
+DELAY_COMPENSATION_FOR_FILTERING = {
+    "SHQ2": {
+        "up": 436,
+        "down": 218,
+    },
+    "SHQ3": {
+        "up": 436,
+        "down": 145,
+    },
+    "MSIN": 92,
+    "LP1p5": 322,
+    "LP35": 232,
+    "LP7": 117,
+    "LP10": 82,
+    "LP12": 164,
+    "LP14": 234,
+    "LP20": 161,
+    "HP50_32KHZ": 559,
+    "HP50_48KHZ": 839,
+}
diff --git a/item_generation_scripts/audiotools/convert/__init__.py b/item_generation_scripts/audiotools/convert/__init__.py
new file mode 100644
index 00000000..4ec23739
--- /dev/null
+++ b/item_generation_scripts/audiotools/convert/__init__.py
@@ -0,0 +1,323 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+import logging
+from pathlib import Path, PurePath
+from typing import Optional, Union
+
+from item_generation_scripts.audiotools import audio, audioarray, metadata
+from item_generation_scripts.audiotools.audiofile import write
+from item_generation_scripts.audiotools.convert.channelbased import convert_channelbased
+from item_generation_scripts.audiotools.convert.masa import convert_masa
+from item_generation_scripts.audiotools.convert.objectbased import convert_objectbased
+from item_generation_scripts.audiotools.convert.scenebased import convert_scenebased
+from item_generation_scripts.audiotools.wrappers.bs1770 import loudness_norm
+from item_generation_scripts.audiotools.wrappers.esdru import esdru
+from item_generation_scripts.audiotools.wrappers.filter import (
+    hp50filter_itu,
+    lpfilter_itu,
+    resample_itu,
+)
+from item_generation_scripts.audiotools.wrappers.p50fbmnru import p50fbmnru
+
+from ..metadata import write_ISM_metadata_in_file
+
+
+def convert_file(
+    in_file: Union[str, Path],
+    out_file: Union[str, Path],
+    in_fs: int,
+    in_fmt: Union[str, Path],
+    out_fmt: Optional[Union[str, Path]] = None,
+    out_fs: Optional[int] = None,
+    in_meta: Optional[list] = None,
+    logger: Optional[logging.Logger] = None,
+    **kwargs,
+) -> None:
+    """Conversion function for one audio file"""
+
+    if not in_fmt:
+        raise ValueError("Input audio format must be specified!")
+
+    # get audio class object - can be either a regular single audio or scene description .txt
+    if not isinstance(in_fmt, PurePath) and in_fmt.startswith("META"):
+        input = metadata.Metadata(in_file)
+    else:
+        input = audio.fromfile(in_fmt, in_file, in_fs, in_meta)
+
+    # try to set reasonable defaults if missing
+    if not in_fs:
+        in_fs = input.fs
+    if not out_fs:
+        out_fs = input.fs
+
+    if not out_fmt:
+        if isinstance(input, metadata.Metadata):
+            raise ValueError(
+                "Output format must be specified for scene description files!"
+            )
+        else:
+            out_fmt = input.name
+
+    output = audio.fromtype(out_fmt)
+    if isinstance(output, audio.ObjectBasedAudio):
+        try:
+            output.object_pos = input.object_pos
+            output.metadata_files = input.metadata_files
+        except Exception:
+            raise ValueError(
+                "ISM is not supported as an output for rendering! Only usable as pass-through"
+            )
+
+    if isinstance(input, metadata.Metadata):
+        if logger:
+            logger.debug(f"Converting metadata to {out_fmt} : {in_file} -> {out_file}")
+
+        # render each audio instance separately
+        for audio_in in input.audio:
+            output.fs = out_fs
+            tmp = audio.fromtype(out_fmt)
+            tmp.fs = in_fs  # resampling not yet applied
+            convert(audio_in, tmp, in_fs=in_fs, out_fs=out_fs, logger=logger, **kwargs)
+            if output.audio is not None:
+                output.audio += tmp.audio
+            else:
+                output.audio = tmp.audio
+    else:
+        if logger:
+            logger.debug(f"Converting {in_fmt} to {out_fmt} : {in_file} -> {out_file}")
+        # run main conversion method
+        output.fs = in_fs  # resampling not yet applied
+        convert(input, output, in_fs=in_fs, out_fs=out_fs, logger=logger, **kwargs)
+
+    # write output
+    write(out_file, output.audio, output.fs)
+    if isinstance(output, audio.ObjectBasedAudio):
+        write_ISM_metadata_in_file(output.object_pos, [out_file], automatic_naming=True)
+
+
+def convert(
+    input: audio.Audio,
+    output: audio.Audio,
+    in_trim: Optional[list] = None,
+    in_pad_noise: Optional[bool] = False,
+    in_delay: Optional[float] = None,
+    in_fs: Optional[int] = None,
+    in_cutoff: Optional[int] = None,
+    in_hp50: Optional[bool] = None,
+    in_window: Optional[list] = None,
+    in_loudness: Optional[float] = None,
+    in_loudness_fmt: Optional[str] = None,
+    out_trim: Optional[list] = None,
+    out_pad_noise: Optional[bool] = False,
+    out_delay: Optional[float] = None,
+    out_fs: Optional[int] = None,
+    out_cutoff: Optional[int] = None,
+    out_hp50: Optional[bool] = None,
+    out_window: Optional[list] = None,
+    out_loudness: Optional[float] = None,
+    out_loudness_fmt: Optional[str] = None,
+    limit: Optional[bool] = False,
+    mnru_q: Optional[float] = None,
+    esdru_alpha: Optional[float] = None,
+    logger: Optional[logging.Logger] = None,
+    **kwargs,
+) -> None:
+    """Perform pre-processing, conversion and post-processing"""
+
+    """pre-processing"""
+    process_audio(
+        x=input,
+        trim=in_trim,
+        pad_noise=in_pad_noise,
+        delay=in_delay,
+        fs=in_fs,
+        fc=in_cutoff,
+        hp50=in_hp50,
+        window=in_window,
+        loudness=in_loudness,
+        loudness_fmt=in_loudness_fmt,
+        logger=logger,
+    )
+
+    """format conversion"""
+    format_conversion(input, output, logger=logger, **kwargs)
+
+    """post-processing"""
+    process_audio(
+        x=output,
+        trim=out_trim,
+        pad_noise=out_pad_noise,
+        delay=out_delay,
+        fs=out_fs,
+        fc=out_cutoff,
+        hp50=out_hp50,
+        window=out_window,
+        loudness=out_loudness,
+        loudness_fmt=out_loudness_fmt,
+        limit=limit,
+        mnru_q=mnru_q,
+        esdru_alpha=esdru_alpha,
+        logger=logger,
+    )
+
+
+def process_audio(
+    x: audio.Audio,
+    trim: Optional[list] = None,
+    pad_noise: Optional[bool] = False,
+    delay: Optional[float] = None,
+    fs: Optional[int] = None,
+    fc: Optional[int] = None,
+    hp50: Optional[bool] = False,
+    window: Optional[float] = None,
+    loudness: Optional[float] = None,
+    loudness_fmt: Optional[str] = None,
+    limit: Optional[bool] = False,
+    mnru_q: Optional[float] = None,
+    esdru_alpha: Optional[float] = None,
+    logger: Optional[logging.Logger] = None,
+) -> None:
+    """Perform (pre-/pos-) processing of audio"""
+
+    if fs is None:
+        fs = x.fs
+
+    """delay audio"""
+    if delay is not None:
+        if logger:
+            logger.debug(f"Delaying audio by {delay} ms")
+        x.audio = audioarray.delay(x.audio, x.fs, delay)
+
+    """trim or pad audio"""
+    if trim is not None:
+        if isinstance(x, audio.ObjectBasedAudio):
+            # metadata concatenation necessary for ISM
+            metadata.trim_meta(x, tuple(trim), pad_noise)
+        else:
+            x.audio = audioarray.trim(x.audio, x.fs, tuple(trim), pad_noise)
+
+    """windowing"""
+    if window is not None:
+        if logger:
+            logger.debug(f"Windowing audio with {window} ms Hann window")
+        x.audio = audioarray.window(x.audio, x.fs, window)
+
+    """high-pass (50 Hz) filtering"""
+    if hp50:
+        if logger:
+            logger.debug("Applying 50 Hz high-pass filter using ITU STL filter")
+        x.audio = hp50filter_itu(x)
+
+    """resampling"""
+    if x.fs != fs:
+        if logger:
+            logger.debug(f"Resampling from {x.fs} to {fs} using ITU STL filter")
+        x.audio = resample_itu(x, fs)
+        x.fs = fs
+
+    """loudness normalization"""
+    if loudness is not None:
+        if logger:
+            logger.debug(
+                f"Applying loudness adjustment to {loudness} LKFS for format {loudness_fmt} using ITU STL bs1770demo"
+            )
+        x.audio = loudness_norm(x, loudness, loudness_fmt)
+
+    """low-pass filtering"""
+    if fc is not None:
+        if logger:
+            logger.debug(
+                f"Applying low-pass filter with cutoff {fc} Hz using ITU STL filter"
+            )
+        x.audio = lpfilter_itu(x, fc)
+
+    """MNRU"""
+    if mnru_q is not None:
+        if logger:
+            logger.debug("Applying P.50 Fullband MNRU")
+        x.audio = p50fbmnru(x, mnru_q)
+
+    """ESDRU"""
+    if esdru_alpha is not None:
+        if logger:
+            logger.debug("Applying ESDRU Recommendation ITU-T P.811")
+        x.audio = esdru(x, esdru_alpha)
+
+    """limiting"""
+    if limit:
+        if logger:
+            logger.debug("Applying limiter")
+        audioarray.limiter(x.audio, x.fs)
+
+
+def format_conversion(
+    input: audio.Audio,
+    output: audio.Audio,
+    logger: Optional[logging.Logger] = None,
+    **kwargs,
+) -> None:
+    """Convert one audio format to another"""
+
+    # validation
+    if isinstance(output, audio.MetadataAssistedSpatialAudio):
+        raise NotImplementedError("MASA is not supported as an output for rendering!")
+
+    if isinstance(output, audio.ObjectBasedAudio) and input.name != output.name:
+        raise NotImplementedError(
+            "ISM is not supported as an output for rendering! Only usable as pass-through"
+        )
+
+    if logger:
+        logger.debug(f"Format conversion: {input.name} -> {output.name}")
+
+    if input.name == output.name or (
+        input.name.startswith("BINAURAL") and output.name.startswith("BINAURAL")
+    ):
+        output.audio = input.audio
+    else:
+        if isinstance(input, audio.BinauralAudio):
+            raise NotImplementedError(
+                f"{input.name} is not supported as an input for rendering!"
+            )
+        elif isinstance(input, audio.ChannelBasedAudio):
+            convert_channelbased(input, output, **kwargs)
+        elif isinstance(input, audio.MetadataAssistedSpatialAudio):
+            convert_masa(input, output, **kwargs)
+        elif isinstance(input, audio.ObjectBasedAudio):
+            convert_objectbased(input, output, **kwargs)
+        elif isinstance(input, audio.SceneBasedAudio):
+            convert_scenebased(input, output, **kwargs)
+        else:
+            raise NotImplementedError(
+                f"Unknown or unsupported audio format {input.name}"
+            )
diff --git a/item_generation_scripts/audiotools/convert/binaural.py b/item_generation_scripts/audiotools/convert/binaural.py
new file mode 100644
index 00000000..b23e69ee
--- /dev/null
+++ b/item_generation_scripts/audiotools/convert/binaural.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+from typing import Optional
+
+import numpy as np
+from scipy.signal import fftconvolve
+
+
+def NS2SA(
+    fs: float,
+    x: float,
+) -> int:
+    """
+    Converts from nanoseconds to number of samples
+
+    Parameters
+    ----------
+    fs: float
+        Sampling rate
+    x: float
+        Duration in nano seconds
+
+    Returns
+    -------
+    Number of samples
+    """
+
+    return int(int(fs / 100) * (x / 100) / 100000)
+
+
+def binaural_fftconv(
+    x: np.ndarray,
+    IR: np.ndarray,
+    nchannels: int,
+    lfe_index: Optional[list[int]] = None,
+) -> np.ndarray:
+    """
+    Binauralization using fft convolution
+
+    Parameters
+    ----------
+    x: np.ndarray
+        Input multi-channel array
+    IR: np.ndarray
+        HRIRs array
+    nchannels: int
+        Maximum number of channels to process
+    lfe_index: Optional[list[int]]
+        List of LFE channel indices
+
+    Returns
+    -------
+    y: np.ndarray
+        Output convolved signal array
+    """
+
+    if lfe_index is None:
+        lfe_index = []
+
+    y = np.zeros([x.shape[0], 2])
+    for chan_idx in range(min(x.shape[1], nchannels)):
+        if chan_idx not in lfe_index:
+            y[:, 0] = np.add(
+                y[:, 0],
+                fftconvolve(x[:, chan_idx].astype(float), IR[:, 0, chan_idx]).astype(
+                    float
+                )[: x.shape[0]],
+            )
+            y[:, 1] = np.add(
+                y[:, 1],
+                fftconvolve(x[:, chan_idx].astype(float), IR[:, 1, chan_idx]).astype(
+                    float
+                )[: x.shape[0]],
+            )
+        else:
+            ...
+
+    return y
diff --git a/item_generation_scripts/audiotools/convert/channelbased.py b/item_generation_scripts/audiotools/convert/channelbased.py
new file mode 100644
index 00000000..a8d941e2
--- /dev/null
+++ b/item_generation_scripts/audiotools/convert/channelbased.py
@@ -0,0 +1,390 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+from pathlib import Path
+from typing import Optional, Tuple, Union
+
+import numpy as np
+
+from item_generation_scripts.audiotools import audio
+from item_generation_scripts.audiotools.audioarray import delay, framewise_io
+from item_generation_scripts.audiotools.binaural_datasets.binaural_dataset import (
+    load_ir,
+)
+from item_generation_scripts.audiotools.constants import (
+    BINAURAL_LFE_GAIN,
+    IVAS_FRAME_LEN_MS,
+    IVAS_MC_CONVERSION,
+)
+from item_generation_scripts.audiotools.convert import scenebased
+from item_generation_scripts.audiotools.convert.binaural import binaural_fftconv
+from item_generation_scripts.audiotools.EFAP import EFAP
+from item_generation_scripts.audiotools.rotation import Quat2RotMat, rotateAziEle
+from item_generation_scripts.audiotools.wrappers.filter import resample_itu
+
+""" ChannelBasedAudio functions """
+
+
+def convert_channelbased(
+    cba: audio.ChannelBasedAudio,
+    out: audio.Audio,
+    **kwargs,
+) -> audio.Audio:
+    """Convert channel-based audio to the requested output format"""
+    # CBA -> Binaural
+    if isinstance(out, audio.BinauralAudio):
+        render_cba_to_binaural(cba, out, **kwargs)
+
+    # CBA -> CBA
+    elif isinstance(out, audio.ChannelBasedAudio):
+        render_cba_to_cba(cba, out)
+
+    # CBA -> SBA
+    elif isinstance(out, audio.SceneBasedAudio):
+        render_cba_to_sba(cba, out)
+
+    else:
+        raise NotImplementedError(
+            f"Conversion from {cba.name} to {out.name} is unsupported!"
+        )
+
+    return out
+
+
+def render_cba_to_binaural(
+    cba: audio.ChannelBasedAudio,
+    bin: audio.BinauralAudio,
+    trajectory: Optional[Union[str, Path]] = None,
+    bin_dataset: Optional[str] = None,
+    bin_lfe_gain: Optional[float] = None,
+    **kwargs,
+) -> None:
+    """
+    Binauralization of channel-based audio
+
+    Parameters
+    ----------
+    cba: audio.ChannelBasedAudio
+        Channel-based input audio
+    bin: audio.BinauralAudio
+        Binaural output audio
+    trajectory: Optional[Union[str, Path]]
+        Head rotation trajectory path
+    bin_dataset: Optional[str]
+        Name of binaural dataset wihtout prefix or suffix
+    bin_lfe_gain: Optional[float]
+        LFE gain for binaural rendering
+    """
+
+    if cba.name == "MONO":
+        # no binauralization possible for mono -> render to stereo and assume binaural signal
+        cba_stereo = audio.fromtype("STEREO")
+        cba_stereo.fs = bin.fs
+        render_cba_to_cba(cba, cba_stereo)
+        bin.audio = cba_stereo.audio
+        return
+
+    cba.audio = resample_itu(cba, 48000)
+    old_fs = cba.fs
+    cba.fs = 48000
+    bin.fs = 48000
+
+    if trajectory is not None:
+        cba.audio = rotate_cba(cba, trajectory)
+
+    IR, _, latency_smp = load_ir(cba.name, bin.name, bin_dataset)
+
+    # render LFE
+    if bin_lfe_gain is not None:
+        bin_lfe, lfe_delay_ns = render_lfe_to_binaural(
+            cba.audio, cba.fs, cba.lfe_index, bin_lfe_gain
+        )
+
+    # render rest of the signal
+    bin.audio = binaural_fftconv(cba.audio, IR, cba.num_channels, cba.lfe_index)
+    # compensate delay from binaural dataset
+    bin.audio = delay(bin.audio, bin.fs, -latency_smp, samples=True)
+
+    # add LFE and rest
+    if bin_lfe_gain is not None:
+        bin.audio += bin_lfe
+
+    bin.audio = resample_itu(bin, old_fs)
+
+
+def render_custom_ls_binaural(
+    custom_ls: audio.ChannelBasedAudio,
+    output: audio.BinauralAudio,
+    IR: np.ndarray,
+    SourcePosition: np.ndarray,
+    trajectory: str,
+):
+    # TODO rework impl. (with EFAP)
+    # logger.info("      Processing channels on custom LS layout")
+    # azis = ", ".join([f"{a:7.2f}" for a in ls_azi_all])
+    # eles = ", ".join([f"{e:7.2f}" for e in ls_ele_all])
+    # logger.info(f"      azi: {azis}")
+    # logger.info(f"      ele: {eles}")
+    # logger.info(f"      lfe_index: {lfe_index_all}")
+
+    # if output.name == "BINAURAL_ROOM":
+    #     tmp = get_audio_type("MOZART")
+    #     convert_channel_based(custom_ls, tmp)
+    #     logger.info(f"      {custom_ls.name} -> {tmp.name} -> {output.name}")
+    #     custom_ls.audio = tmp.audio
+    # else:
+    #     tmp = custom_ls
+    #
+    # ls_azi_all = tmp.ls_azi
+    # ls_ele_all = tmp.ls_ele
+    # lfe_index_all = tmp.lfe_index
+    #
+    # frame_len = (IVAS_FRAME_LEN_MS // 4) * (fs // 1000)
+    # sig_len = custom_ls.audio.shape[0]
+    # N_frames = int(sig_len / frame_len)
+    #
+    # i_ls = 0
+    # y = np.zeros([sig_len, 2])
+    # for i_chan in range(custom_ls.audio.shape[1]):
+    #
+    #     # skip LFE
+    #     if i_chan in lfe_index_all:
+    #         continue
+    #
+    #     # skip silent (or very low volume) channels
+    #     if np.allclose(custom_ls.audio[:, i_chan], 0.0, atol=32.0):
+    #         continue
+    #
+    #     ls_azi = np.repeat(ls_azi_all[i_ls], N_frames)
+    #     ls_ele = np.repeat(ls_ele_all[i_ls], N_frames)
+    #
+    #     azi, ele = rotateISM(ls_azi, ls_ele, trajectory=trajectory)
+    #
+    #     y += binaural_fftconv_framewise(
+    #         custom_ls.audio[:, i_chan],
+    #         IR,
+    #         SourcePosition,
+    #         frame_len=frame_len,
+    #         azi=azi,
+    #         ele=ele,
+    #     )
+    #     i_ls += 1
+    #
+    # return y
+    return
+
+
+def render_cba_to_cba(
+    cba_in: audio.ChannelBasedAudio, cba_out: audio.ChannelBasedAudio
+) -> None:
+    """
+    Rendering of channel-based input signal to channel-based output
+
+    Parameters
+    ----------
+    cba_in: audio.ObjectBasedAudio
+        Channel-based input audio
+    cba_out: audio.ChannelBasedAudio
+        Channel-based output audio
+    """
+
+    # Stereo to Mono
+    if cba_in.name == "STEREO" and cba_out.name == "MONO":
+        render_mtx = np.vstack([[0.5], [0.5]])
+    else:
+        try:
+            render_mtx = IVAS_MC_CONVERSION[cba_in.name][cba_out.name]
+        except KeyError:
+            # Use EFAP panning if no matrix was found
+            panner = EFAP(
+                np.delete(cba_out.ls_azi, cba_out.lfe_index).astype(float),
+                np.delete(cba_out.ls_ele, cba_out.lfe_index).astype(float),
+            )
+
+            render_mtx = np.vstack(
+                [
+                    panner.pan(a, e).T
+                    for i, (a, e) in enumerate(zip(cba_in.ls_azi, cba_in.ls_ele))
+                    if i not in cba_in.lfe_index
+                ]
+            )
+
+            # pass-through for LFE
+            for index in np.sort(cba_in.lfe_index):
+                render_mtx = np.insert(render_mtx, index, 0, axis=0)
+            render_mtx = np.insert(render_mtx, cba_out.lfe_index, 0, axis=1)
+            render_mtx[cba_in.lfe_index, cba_out.lfe_index] = 1
+
+        if cba_out.num_channels <= 2:
+            render_mtx[cba_in.lfe_index, :] = 0
+
+    cba_out.audio = cba_in.audio @ render_mtx
+
+
+def render_cba_to_sba(cba: audio.ChannelBasedAudio, sba: audio.SceneBasedAudio) -> None:
+    """
+    Rendering of channel-based input signal to SBA output
+
+    Parameters
+    ----------
+    cba: audio.ObjectBasedAudio
+        Channel-based input audio
+    sba: audio.ChannelBasedAudio
+        SBA output audio
+    """
+
+    if cba.name == "MONO":
+        raise ValueError(f"Rendering from MONO to {sba.name} is not supported.")
+
+    # SH response for loudspeaker positions
+    render_mtx = np.hstack(
+        [
+            scenebased.getRSH(np.array([a]), np.array([e]), sba.ambi_order)
+            for a, e in zip(cba.ls_azi, cba.ls_ele)
+        ]
+    ).T
+    render_mtx[cba.lfe_index] = 0
+
+    sba.audio = cba.audio @ render_mtx
+    # do not add LFE to output
+    if sba.is_planar:
+        scenebased.zero_vert_channels(sba)
+
+
+def rotate_cba(
+    cba: audio.ChannelBasedAudio,
+    trajectory: str,
+) -> np.ndarray:
+    """
+    Rotate MC signal by applying a rotation matrix calculated from the current quaternion
+    in each subframe
+
+    Parameters:
+    ----------
+    x: np.ndarray
+        Input multichannel signal
+    trajectory: str
+        Path to trajectory file
+
+    Returns:
+    ----------
+    y: np.ndarray
+        Rotated multichannel signal
+    """
+
+    trj_data = np.genfromtxt(trajectory, delimiter=",")
+    trj_frames = trj_data.shape[0]
+
+    sig_len = cba.audio.shape[0]
+    sig_dim = cba.audio.shape[1]
+    frame_len = (IVAS_FRAME_LEN_MS // 4) * 48
+
+    out = np.zeros([sig_len, sig_dim])
+
+    panner = EFAP(cba.ls_azi, cba.ls_ele)
+
+    fade_in = np.arange(frame_len) / (frame_len - 1)
+    fade_in = fade_in[:, np.newaxis]
+    fade_out = 1.0 - fade_in
+
+    R_old = np.eye(cba.num_channels)
+
+    for i, (frame_in, frame_out) in framewise_io(cba.audio, out, frame_len):
+        # update the crossfade if we have a smaller last frame
+        if frame_out.shape[0] != frame_len:
+            frame_size = frame_out.shape[0]
+            fade_in = np.arange(frame_size) / (frame_size - 1)
+            fade_in = fade_in[:, np.newaxis]
+            fade_out = 1.0 - fade_in
+
+        q = trj_data[i % trj_frames, :]
+        rotated_pos = np.array(
+            [rotateAziEle(a, e, Quat2RotMat(q)) for a, e in zip(cba.ls_azi, cba.ls_ele)]
+        )
+        R = panner.pan(rotated_pos[:, 0], rotated_pos[:, 1])
+        R[:, [cba.lfe_index]] = 0
+        R[[cba.lfe_index], :] = 0
+        R[cba.lfe_index, cba.lfe_index] = 1
+
+        frame_out[:, :] = (fade_in * frame_in @ R) + (fade_out * frame_in @ R_old)
+
+        R_old = R.copy()
+
+    return out
+
+
+""" Helper functions """
+
+
+def render_lfe_to_binaural(
+    x: np.ndarray,
+    fs: Optional[int] = 48000,
+    lfe_index: Optional[list] = None,
+    LFE_gain: Optional[float] = BINAURAL_LFE_GAIN,
+) -> Tuple[np.ndarray, int]:
+    """
+    Extract LFE from the given input and render
+    it binaurally, accounting for delay
+    """
+
+    lfe = x[:, lfe_index].copy()
+
+    # if there is more than one LFE sum them into one
+    if lfe.shape[1] > 1:
+        lfe = np.sum(lfe, axis=1)
+
+    """
+    # 120 Hz low-pass filtering for LFE using IVAS filter coefficients
+    if fs == 48000:
+        lfe = sig.sosfilt(IVAS_LPF_4_BUTTER_48K_SOS, lfe, axis=0)
+    else:
+        raise NotImplementedError("Only 48 kHz supported at the moment!")
+
+    # 3.5ms LP filter delay from IVAS ROM
+    lfe_delay_ns = 0.0035 * 1e9
+    lfe_delay_smp = round(lfe_delay_ns * fs / 1e9)
+
+    # Delay LFE by the same amount as the HRTF delay
+    lfe = np.roll(lfe, round(latency_smp), axis=0)
+    lfe[0 : round(latency_smp), :] = 0
+    """
+    lfe_delay_ns = 0
+
+    # apply gain
+    lfe *= LFE_gain
+
+    # duplicate for each binaural channel
+    if len(np.shape(lfe)) < 2:
+        lfe = lfe[:, np.newaxis]
+    lfe = np.hstack([lfe, lfe])
+
+    return lfe, lfe_delay_ns
diff --git a/item_generation_scripts/audiotools/convert/masa.py b/item_generation_scripts/audiotools/convert/masa.py
new file mode 100644
index 00000000..15f1c683
--- /dev/null
+++ b/item_generation_scripts/audiotools/convert/masa.py
@@ -0,0 +1,165 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+from pathlib import Path
+from typing import Optional, Union
+from warnings import warn
+
+from item_generation_scripts.audiotools import audio
+from item_generation_scripts.audiotools.convert import channelbased
+from item_generation_scripts.audiotools.wrappers.masaRenderer import masaRenderer
+
+""" MetadataAssistedSpatialAudio functions """
+
+
+def convert_masa(
+    masa: audio.MetadataAssistedSpatialAudio,
+    out: audio.Audio,
+    **kwargs,
+) -> audio.Audio:
+    """Convert Metadata Assisted Spatial audio to the requested output format"""
+
+    # MASA -> Binaural
+    if isinstance(out, audio.BinauralAudio):
+        render_masa_to_binaural(masa, out, **kwargs)
+
+    # MASA -> CBA
+    elif isinstance(out, audio.ChannelBasedAudio):
+        render_masa_to_cba(masa, out)
+
+    # MASA -> SBA
+    elif isinstance(out, audio.SceneBasedAudio):
+        render_masa_to_sba(masa, out)
+
+    else:
+        raise NotImplementedError(
+            f"Conversion from {masa.name} to {out.name} is unsupported!"
+        )
+
+    return out
+
+
+def render_masa_to_binaural(
+    masa: audio.MetadataAssistedSpatialAudio,
+    bin: audio.BinauralAudio,
+    trajectory: Optional[Union[str, Path]] = None,
+    bin_dataset: Optional[str] = None,
+    **kwargs,
+) -> None:
+    """
+    Binauralization of MASA audio
+
+    Parameters
+    ----------
+    masa: audio.MetadataAssistedSpatialAudio
+        MASA input audio
+    bin: audio.BinauralAudio
+        Output binaural audio
+    trajectory: Optional[Union[str, Path]]
+        Head rotation trajectory path
+    bin_dataset: Optional[str]
+        Name of binaural dataset without prefix or suffix
+    """
+
+    if "ROOM" in bin.name:
+        cba_tmp = audio.fromtype("7_1_4")
+        cba_tmp.fs = masa.fs
+
+        render_masa_to_cba(masa, cba_tmp)
+
+        channelbased.render_cba_to_binaural(cba_tmp, bin, trajectory)
+    else:
+        if trajectory is not None:
+            warn(
+                f"Head-rotation not supported by MasaRenderer! Trajectory {trajectory} will be ignored!"
+            )
+        if bin_dataset is not None:
+            warn(
+                "Binaural dataset selection not supported by MasaRenderer - please copy the required hrir.bin manually!"
+            )
+
+        bin.audio = masaRenderer(masa, "BINAURAL")
+
+
+def render_masa_to_cba(
+    masa: audio.MetadataAssistedSpatialAudio,
+    cba: audio.ChannelBasedAudio,
+) -> None:
+    """
+    Rendering of MASA input signal to Channel-based format
+
+    Parameters
+    ----------
+    masa: audio.MetadataAssistedSpatialAudio
+        MASA input audio
+    cba: audio.ChannelBasedAudio
+        Channel-based output audio
+    """
+
+    if cba.name not in ["5_1", "7_1_4"]:
+        warn(
+            f"MasaRenderer does not support {cba.name} natively. Using 7_1_4 as an intermediate format."
+        )
+
+        cba_tmp = audio.fromtype("7_1_4")
+        cba_tmp.fs = masa.fs
+        cba_tmp.audio = masaRenderer(masa, cba_tmp.name)
+
+        channelbased.render_cba_to_cba(cba_tmp, cba)
+    else:
+        cba.audio = masaRenderer(masa, cba.name)
+
+
+def render_masa_to_sba(
+    masa: audio.MetadataAssistedSpatialAudio,
+    sba: audio.SceneBasedAudio,
+) -> None:
+    """
+    Rendering of MASA input signal to SBA format
+
+    Parameters
+    ----------
+    masa: audio.MetadataAssistedSpatialAudio
+        MASA input audio
+    sba: audio.SceneBasedAudio
+        SBA output audio
+    """
+
+    warn(
+        f"MasaRenderer does not support {sba.name} natively. Using 7_1_4 as an intermediate format."
+    )
+
+    cba_tmp = audio.fromtype("7_1_4")
+    cba_tmp.fs = masa.fs
+    cba_tmp.audio = masaRenderer(masa, cba_tmp.name)
+
+    channelbased.render_cba_to_sba(cba_tmp, sba)
diff --git a/item_generation_scripts/audiotools/convert/objectbased.py b/item_generation_scripts/audiotools/convert/objectbased.py
new file mode 100644
index 00000000..9fb74ed1
--- /dev/null
+++ b/item_generation_scripts/audiotools/convert/objectbased.py
@@ -0,0 +1,352 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+from itertools import repeat
+from pathlib import Path
+from typing import Optional, Tuple, Union
+
+import numpy as np
+
+from item_generation_scripts.audiotools import audio
+from item_generation_scripts.audiotools.audioarray import delay, framewise_io
+from item_generation_scripts.audiotools.binaural_datasets.binaural_dataset import (
+    load_ir,
+)
+from item_generation_scripts.audiotools.binauralobjectrenderer import (
+    binaural_fftconv_framewise,
+)
+from item_generation_scripts.audiotools.constants import IVAS_FRAME_LEN_MS
+from item_generation_scripts.audiotools.convert.channelbased import (
+    render_cba_to_binaural,
+)
+from item_generation_scripts.audiotools.convert.scenebased import getRSH
+from item_generation_scripts.audiotools.EFAP import EFAP, wrap_angles
+from item_generation_scripts.audiotools.rotation import Quat2RotMat, rotateAziEle
+from item_generation_scripts.audiotools.wrappers.filter import resample_itu
+from item_generation_scripts.utils import apply_func_parallel
+
+""" ObjectBasedAudio functions """
+
+
+def convert_objectbased(
+    oba: audio.ObjectBasedAudio,
+    out: audio.Audio,
+    **kwargs,
+) -> audio.Audio:
+    """Convert an ISM signal to the requested output format"""
+
+    # OBA -> Binaural
+    if isinstance(out, audio.BinauralAudio):
+        render_oba_to_binaural(oba, out, **kwargs)
+
+    # OBA -> CBA
+    elif isinstance(out, audio.ChannelBasedAudio):
+        render_oba_to_cba(oba, out)
+
+    # OBA -> SBA
+    elif isinstance(out, audio.SceneBasedAudio):
+        render_oba_to_sba(oba, out)
+    else:
+        raise NotImplementedError(
+            f"Conversion from {oba.name} to {out.name} is unsupported!"
+        )
+
+    return out
+
+
+def render_oba_to_binaural(
+    oba: audio.ObjectBasedAudio,
+    bin: audio.BinauralAudio,
+    trajectory: Optional[Union[str, Path]] = None,
+    bin_dataset: Optional[str] = None,
+    **kwargs,
+) -> None:
+    """
+    Binauralization of ISM input signal
+
+    Parameters
+    ----------
+    oba: audio.ObjectBasedAudio
+        Object based input audio
+    bin: audio.BinauralAudio
+        Binaural output audio
+    trajectory: Optional[Union[str, Path]]
+        Head rotation trajectory
+    bin_dataset: Optional[str]
+        Name of binaural dataset, if None default dataset is used
+    """
+
+    # bin.audio = np.zeros([oba.audio.shape[0], bin.num_channels])
+
+    if "ROOM" in bin.name:
+        cba_tmp = audio.fromtype("7_1_4")
+        cba_tmp.fs = oba.fs
+
+        render_oba_to_cba(oba, cba_tmp)
+
+        render_cba_to_binaural(cba_tmp, bin, trajectory)
+    else:
+        IR, SourcePosition, latency_smp = load_ir(oba.name, bin.name, bin_dataset)
+
+        oba.audio = resample_itu(oba, 48000)
+        fs_old = oba.fs
+        oba.fs = 48000
+
+        # apply processing for every object in parallel
+        obj_pos = oba.object_pos
+        obj_idx = list(range(oba.num_channels))
+        result = apply_func_parallel(
+            render_object,
+            zip(
+                obj_idx,
+                obj_pos,
+                repeat(oba),
+                repeat(trajectory),
+                repeat(IR),
+                repeat(SourcePosition),
+            ),
+            None,
+            "mt",
+            False,
+        )
+
+        # sum results over all objects
+        bin.audio = np.sum(np.stack(result, axis=2), axis=2)
+
+        # compensate delay from binaural dataset
+        bin.audio = delay(bin.audio, bin.fs, -latency_smp, samples=True)
+
+        bin.audio = resample_itu(bin, fs_old)
+        bin.fs = fs_old
+
+
+def render_oba_to_cba(
+    oba: audio.ObjectBasedAudio,
+    cba: audio.ChannelBasedAudio,
+) -> None:
+    """
+    Rendering of ISM input signal to channel-based format
+
+    Parameters
+    ----------
+    oba: audio.ObjectBasedAudio
+        Object based input audio
+    cba: audio.ChannelBasedAudio
+        Channel-based output audio
+    """
+
+    cba.audio = np.zeros([oba.audio.shape[0], cba.num_channels])
+
+    for obj_idx, obj_pos in enumerate(oba.object_pos):
+        obj_audio = oba.audio[:, [obj_idx]]
+        pos_frames = obj_pos.shape[0]
+
+        frame_len = IVAS_FRAME_LEN_MS * (oba.fs // 1000)
+
+        fade_in = np.arange(frame_len) / (frame_len - 1)
+        fade_in = fade_in[:, np.newaxis]
+        fade_out = 1.0 - fade_in
+
+        # use EFAP for rendering
+        panner = EFAP(
+            np.delete(cba.ls_azi, cba.lfe_index), np.delete(cba.ls_ele, cba.lfe_index)
+        )
+        gains_old = None
+
+        for i, (frame_in, frame_out) in framewise_io(obj_audio, cba.audio, frame_len):
+            # update the crossfade if we have a smaller last frame
+            if frame_out.shape[0] != frame_len:
+                frame_size = frame_out.shape[0]
+                fade_in = np.arange(frame_size) / (frame_size - 1)
+                fade_in = fade_in[:, np.newaxis]
+                fade_out = 1.0 - fade_in
+
+            azi, ele = wrap_angles(*obj_pos[i % pos_frames, :2], clip_ele=True)
+            gains = panner.pan(azi, ele)
+            for lfe in np.sort(cba.lfe_index):
+                gains = np.insert(gains, lfe, 0)
+            gains = gains[np.newaxis, :]
+
+            if gains_old is None:
+                gains_old = gains.copy()
+
+            frame_out[:] += (fade_in * frame_in @ gains) + (
+                fade_out * frame_in @ gains_old
+            )
+
+            gains_old = gains.copy()
+
+
+def render_oba_to_sba(
+    oba: audio.ObjectBasedAudio,
+    sba: audio.SceneBasedAudio,
+) -> None:
+    """
+    Rendering of ISM input signal to SBA format
+
+    Parameters
+    ----------
+    oba: audio.ObjectBasedAudio
+        Object based input audio
+    sba: audio.SceneBasedAudio
+        SBA output audio
+    """
+
+    sba.audio = np.zeros([oba.audio.shape[0], sba.num_channels])
+
+    for obj_idx, obj_pos in enumerate(oba.object_pos):
+        obj_audio = oba.audio[:, [obj_idx]]
+        pos_frames = obj_pos.shape[0]
+
+        frame_len = IVAS_FRAME_LEN_MS * (oba.fs // 1000)
+
+        fade_in = np.arange(frame_len) / (frame_len - 1)
+        fade_in = fade_in[:, np.newaxis]
+        fade_out = 1.0 - fade_in
+
+        gains_old = None
+
+        for i, (frame_in, frame_out) in framewise_io(obj_audio, sba.audio, frame_len):
+            # update the crossfade if we have a smaller last frame
+            if frame_out.shape[0] != frame_len:
+                frame_size = frame_out.shape[0]
+                fade_in = np.arange(frame_size) / (frame_size - 1)
+                fade_in = fade_in[:, np.newaxis]
+                fade_out = 1.0 - fade_in
+
+            pos = obj_pos[i % pos_frames, :]
+            gains = getRSH(np.array([pos[0]]), np.array([pos[1]]), sba.ambi_order)
+
+            if gains_old is None:
+                gains_old = gains.copy()
+
+            frame_out[:] += (fade_in * frame_in @ gains.T) + (
+                fade_out * frame_in @ gains_old.T
+            )
+
+            gains_old = gains.copy()
+
+
+def rotate_oba(
+    azi: np.ndarray,
+    ele: np.ndarray,
+    trajectory: Optional[str] = None,
+) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Application of head tracking trajectory
+
+    Parameters:
+    ----------
+    azi: np.ndarray
+        Azimuth coordinates of objects
+    ele: np.ndarray
+        Elevation coordinates of objects
+    trajectory: str
+        Head-tracking trajectory path
+
+    Returns:
+    ----------
+    azi_rot: np.ndarray
+        Azimuth coordinates after application of trajectory
+    ele_rot: np.ndarray
+        Elevation coordinates after application of trajectory
+    """
+
+    if trajectory is None:
+        return azi, ele
+
+    trj_data = np.genfromtxt(trajectory, delimiter=",")
+    trj_frames = trj_data.shape[0]
+
+    N_frames = azi.shape[0]
+    if ele.shape[0] != azi.shape[0]:
+        raise ValueError("Inconsistent input in azi and ele")
+
+    azi_rot = np.zeros([N_frames])
+    ele_rot = np.zeros([N_frames])
+
+    for i_frame in range(N_frames):
+        q = trj_data[i_frame % trj_frames, :]
+        azi_rot[i_frame], ele_rot[i_frame] = rotateAziEle(
+            azi[i_frame], ele[i_frame], Quat2RotMat(q)
+        )
+
+    return azi_rot, ele_rot
+
+
+def render_object(
+    obj_idx: int,
+    obj_pos: np.ndarray,
+    oba: audio.ObjectBasedAudio,
+    trajectory: str,
+    IR: np.ndarray,
+    SourcePosition: np.ndarray,
+) -> np.ndarray:
+    """
+    Binaural rendering for one ISM object
+
+    Parameters:
+    ----------
+    obj_idx: int
+        Index of object in list of all objects
+    obj_pos: np.ndarray
+        Position of object
+    oba: audio.ObjectBasedAudio
+        Input ISM audio object
+    trajectory: str
+        Head-tracking trajectory path
+    IR: np.ndarray
+        HRIRs for binauralization
+    SourcePosition: np.ndarray
+        Positions of HRIR measurements
+
+    Returns:
+    ----------
+    result_audio: np.ndarray
+        Binaurally rendered object
+    """
+
+    # repeat each value four times since head rotation data is on sub-frame basis
+    azi = np.repeat(obj_pos[:, 0], 4)
+    ele = np.repeat(obj_pos[:, 1], 4)
+    # apply head-rotation trajectory
+    obj_audio = oba.audio[:, [obj_idx]]
+    azi, ele = rotate_oba(azi, ele, trajectory)
+    # convolve signal with HRIRs
+    result_audio = binaural_fftconv_framewise(
+        obj_audio,
+        IR,
+        SourcePosition,
+        azi,
+        ele,
+    )
+    return result_audio
diff --git a/item_generation_scripts/audiotools/convert/scenebased.py b/item_generation_scripts/audiotools/convert/scenebased.py
new file mode 100644
index 00000000..a7e89b4f
--- /dev/null
+++ b/item_generation_scripts/audiotools/convert/scenebased.py
@@ -0,0 +1,429 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+from pathlib import Path
+from typing import Optional, Union
+from warnings import warn
+
+import numpy as np
+from scipy.special import lpmv
+
+from item_generation_scripts.audiotools import audio
+from item_generation_scripts.audiotools.audioarray import delay, framewise_io
+from item_generation_scripts.audiotools.binaural_datasets.binaural_dataset import (
+    load_ir,
+)
+from item_generation_scripts.audiotools.constants import (
+    IVAS_FRAME_LEN_MS,
+    T_DESIGN_11_AZI,
+    T_DESIGN_11_ELE,
+    VERT_HOA_CHANNELS_ACN,
+)
+from item_generation_scripts.audiotools.convert import channelbased
+from item_generation_scripts.audiotools.convert.binaural import binaural_fftconv
+from item_generation_scripts.audiotools.EFAP import EFAP
+from item_generation_scripts.audiotools.rotation import Quat2RotMat, SHrotmatgen
+from item_generation_scripts.audiotools.wrappers.filter import resample_itu
+
+""" SceneBasedAudio functions """
+
+
+def convert_scenebased(
+    sba: audio.SceneBasedAudio,
+    out: audio.Audio,
+    **kwargs,
+) -> audio.Audio:
+    """Convert scene-based audio to the requested output format"""
+
+    # SBA -> Binaural
+    if isinstance(out, audio.BinauralAudio):
+        render_sba_to_binaural(sba, out, **kwargs)
+
+    # SBA -> CBA
+    elif isinstance(out, audio.ChannelBasedAudio):
+        render_sba_to_cba(sba, out)
+
+    # SBA -> SBA
+    elif isinstance(out, audio.SceneBasedAudio):
+        render_sba_to_sba(sba, out)
+    else:
+        raise NotImplementedError(
+            f"Conversion from {sba.name} to {out.name} is unsupported!"
+        )
+
+    return out
+
+
+def render_sba_to_binaural(
+    sba: audio.SceneBasedAudio,
+    bin: audio.BinauralAudio,
+    trajectory: Optional[Union[str, Path]] = None,
+    bin_dataset: Optional[str] = None,
+    **kwargs,
+) -> None:
+    """
+    Binauralization of scene-based audio
+
+    Parameters
+    ----------
+    sba: audio.SceneBasedAudio
+        Input SBA audio
+    bin: audio.BinauralAudio
+        Output binaural audio
+    trajectory: Optional[Union[str, Path]]
+        Head rotation trajectory path
+    bin_dataset: Optional[str]
+        Name of binaural dataset without prefix or suffix
+    """
+
+    if trajectory is not None:
+        sba.audio = rotate_sba(sba, trajectory)
+
+    if "ROOM" in bin.name:
+        cba_tmp = audio.fromtype("7_1_4")
+        cba_tmp.fs = sba.fs
+
+        render_sba_to_cba(sba, cba_tmp)
+
+        channelbased.render_cba_to_binaural(cba_tmp, bin, trajectory)
+    else:
+        IR, _, latency_smp = load_ir(sba.name, bin.name, bin_dataset)
+
+        sba.audio = resample_itu(sba, 48000)
+        fs_old = sba.fs
+        sba.fs = 48000
+
+        bin.audio = binaural_fftconv(sba.audio, IR, sba.num_channels)
+
+        # compensate delay from binaural dataset
+        bin.audio = delay(bin.audio, bin.fs, -latency_smp, samples=True)
+
+        bin.audio = resample_itu(bin, fs_old)
+        bin.fs = fs_old
+
+
+def render_sba_to_cba(
+    sba: audio.SceneBasedAudio,
+    cba: audio.ChannelBasedAudio,
+) -> None:
+    """
+    Rendering of SBA input signal to channel-based format
+
+    Parameters
+    ----------
+    sba: audio.SceneBasedAudio
+        Scene-based input audio
+    cba: audio.ChannelBasedAudio
+        Channel-based output audio
+    """
+
+    render_mtx = get_allrad_mtx(sba.ambi_order, cba)
+    cba.audio = sba.audio @ render_mtx.T
+
+
+def render_sba_to_sba(
+    sba_in: audio.SceneBasedAudio,
+    sba_out: audio.SceneBasedAudio,
+) -> None:
+    """
+    Rendering of SBA input signal to SBA output format
+
+    Parameters
+    ----------
+    sba_in: audio.SceneBasedAudio
+        Scene-based input audio
+    sba_out: audio.SceneBasedAudio
+        Scene-based output audio
+    """
+
+    if sba_out.ambi_order > sba_in.ambi_order:
+        sba_out.audio = np.pad(
+            sba_in.audio, [[0, 0], [0, sba_out.num_channels - sba_in.num_channels]]
+        )
+    elif sba_out.ambi_order < sba_in.ambi_order:
+        sba_out.audio = sba_in.audio[:, : sba_out.num_channels]
+
+    if sba_out.is_planar:
+        zero_vert_channels(sba_out)
+
+
+def rotate_sba(
+    sba: audio.SceneBasedAudio,
+    trajectory: str,
+) -> np.ndarray:
+    """
+    Rotate HOA signal by applying a rotation matrix calculated from the current quaternion
+    in each subframe
+
+    Parameters:
+    ----------
+    x: np.ndarray
+        Input signal upto HOA3
+    trajectory: str
+        Path to trajectory file
+
+    Returns:
+    ----------
+    y: np.ndarray
+        Rotated HOA signal
+    """
+
+    trj_data = np.genfromtxt(trajectory, delimiter=",")
+    trj_frames = trj_data.shape[0]
+
+    sig_len = sba.audio.shape[0]
+    sig_dim = sba.audio.shape[1]
+    frame_len = (IVAS_FRAME_LEN_MS // 4) * 48
+
+    if sig_dim not in [4, 9, 16]:
+        raise ValueError("rotate_sba can only handle FOA, HOA2 or HOA3 signals!")
+
+    out = np.zeros([sig_len, sig_dim])
+
+    fade_in = np.arange(frame_len) / (frame_len - 1)
+    fade_in = fade_in[:, np.newaxis]
+    fade_out = 1.0 - fade_in
+
+    R = np.eye(sig_dim)
+    R_old = np.eye(sig_dim)
+    for i, (frame_in, frame_out) in framewise_io(sba.audio, out, frame_len):
+        # update the crossfade if we have a smaller last frame
+        if frame_out.shape[0] != frame_len:
+            frame_size = frame_out.shape[0]
+            fade_in = np.arange(frame_size) / (frame_size - 1)
+            fade_in = fade_in[:, np.newaxis]
+            fade_out = 1.0 - fade_in
+
+        R_r = Quat2RotMat(trj_data[i % trj_frames, :])
+        R[:, :] = SHrotmatgen(R_r, order=ambi_order_from_nchan(sig_dim))
+
+        frame_out[:, :] = (fade_in * frame_in @ R.T) + (fade_out * frame_in @ R_old.T)
+
+        R_old[:, :] = R.copy()
+
+    return out
+
+
+""" Helper functions """
+
+
+def zero_vert_channels(sba: audio.SceneBasedAudio) -> None:
+    """Remove all ambisonics parts with vertical components"""
+    sba.audio[:, VERT_HOA_CHANNELS_ACN[VERT_HOA_CHANNELS_ACN < sba.num_channels]] = 0
+
+
+def nchan_from_ambi_order(ambi_order: int) -> int:
+    """Compute number of channels based on ambisonics order"""
+    return (ambi_order + 1) ** 2
+
+
+def ambi_order_from_nchan(nchan: int) -> int:
+    """Compute ambisonics order based on number of channels"""
+    return int(np.sqrt(nchan) - 1)
+
+
+def rE_weight(order: int) -> np.ndarray:
+    """Compute max-rE weighting matrix"""
+    return np.array(
+        [
+            lpmv(0, l, np.cos(np.deg2rad(137.9) / (order + 1.51)))
+            for l in range(order + 1)
+            for _ in range(-l, l + 1)
+        ]
+    ).T
+
+
+def n2sn(order: int) -> np.ndarray:
+    """Compute conversion matrix for N3D to SN3D normalization"""
+    return np.array(
+        [1.0 / np.sqrt(2 * l + 1) for l in range(order + 1) for _ in range(-l, l + 1)]
+    )
+
+
+def sn2n(order: int) -> np.ndarray:
+    """Compute conversion matrix for SN3D to N3D normalization"""
+    return np.array(
+        [np.sqrt(2 * l + 1) for l in range(order + 1) for _ in range(-l, l + 1)]
+    )
+
+
+def getRSH(
+    azi: np.ndarray,
+    ele: np.ndarray,
+    ambi_order: int,
+    norm: Optional[str] = "sn3d",
+    degrees: Optional[bool] = True,
+) -> np.ndarray:
+    """
+    Returns real spherical harmonic response for the given position(s)
+
+    Parameters:
+    ----------
+    azi: np.ndarray
+        Azimuth angles
+    ele: np.ndarray
+        Elevation angles
+    ambi_order: int
+        Ambisonics order
+    norm: Optional[str]
+        Normalization of ambisonic bases.
+        Possible values: "sn3d", "n3d", everything else is interpreted as orthogonal
+    degrees: Optional[bool]
+        If true azi and ele are interpreted as angles in degrees, otherwise as radians
+
+    Returns:
+    ----------
+    response: np.ndarray
+        Real spherical harmonic response
+    """
+
+    if degrees:
+        azi = np.deg2rad(azi)
+        ele = np.deg2rad(ele)
+
+    azi = azi.astype("float64")
+    ele = ele.astype("float64")
+
+    LM = np.array([(l, m) for l in range(ambi_order + 1) for m in range(-l, l + 1)])
+
+    response = np.zeros([LM.shape[0], azi.shape[0]])
+
+    # trig_term * legendre * uncondon
+    for i, (l, m) in enumerate(LM):
+        # N3D norm
+        response[i, :] = np.sqrt(
+            ((2 * l + 1) * float(np.math.factorial(l - np.abs(m))))
+            / (4 * np.pi * float(np.math.factorial(l + np.abs(m))))
+        )
+
+        # trig term
+        if m < 0:
+            response[i, :] *= np.sqrt(2) * np.sin(azi * np.abs(m))
+        elif m == 0:
+            pass  # response[i,:] *= 1
+        else:
+            response[i, :] *= np.sqrt(2) * np.cos(azi * m)
+
+        # legendre polynomial
+        a = lpmv(np.abs(m), l, np.sin(ele)) * ((-1) ** np.abs(m))
+        if np.inf in a or -np.inf in a:
+            a[a == np.inf] = np.finfo(np.float64).max
+            a[a == -np.inf] = np.finfo(np.float64).min
+            warn(
+                "Warning: order too large -> leads to overflow. Inf values are discarded!"
+            )
+        response[i, :] *= a
+
+    if norm == "sn3d":
+        response *= np.sqrt(4 * np.pi)
+        response[:] = np.diag(n2sn(ambi_order)) @ response
+    elif norm == "n3d":
+        response *= np.sqrt(4 * np.pi)
+    else:
+        pass  # ortho
+
+    return response
+
+
+def get_allrad_mtx(
+    ambi_order: int,
+    cba: audio.ChannelBasedAudio,
+    norm: Optional[str] = "sn3d",
+    rE_weight_bool: Optional[bool] = False,
+    intensity_panning: Optional[bool] = True,
+) -> np.ndarray:
+    """
+    Returns ALLRAD matrix
+
+    Parameters:
+    ----------
+    ambi_order: int
+        Ambisonics order
+    cba: audio.ChannelBasedAudio
+        Channel-based audio object
+    norm: Optional[str]
+        Normalization of ambisonic bases.
+        Possible values: "sn3d", "ortho", everything else is interpreted as n3d
+    re_weight_bool: Optional[bool]
+        Flag for max-rE weighting
+    intensity_panning: Optional[bool]
+        Flag for intensity panning
+
+    Returns:
+    ----------
+    hoa_dec: np.ndarray
+        ALLRAD matrix
+    """
+
+    n_harm = nchan_from_ambi_order(ambi_order)
+
+    if cba.name == "MONO":
+        hoa_dec = np.zeros([1, n_harm])
+        hoa_dec[0, 0] = 1
+    elif cba.name == "STEREO":
+        hoa_dec = np.zeros([2, n_harm])
+        # Cardioids +/- 90 degrees
+        hoa_dec[0, 0] = 0.5
+        hoa_dec[0, 1] = 0.5
+        hoa_dec[1, 0] = 0.5
+        hoa_dec[1, 1] = -0.5
+    else:
+        Y_td = getRSH(
+            T_DESIGN_11_AZI,
+            T_DESIGN_11_ELE,
+            ambi_order,
+            norm="ortho",
+        )
+        Y_td *= np.sqrt(4 * np.pi)
+
+        n_ls_woLFE = cba.num_channels - len(cba.lfe_index)
+        ls_azi_woLFE = np.delete(cba.ls_azi, cba.lfe_index).astype(float)
+        ls_ele_woLFE = np.delete(cba.ls_ele, cba.lfe_index).astype(float)
+
+        panner = EFAP(ls_azi_woLFE, ls_ele_woLFE, intensity_panning)
+        G_td = panner.pan(T_DESIGN_11_AZI, T_DESIGN_11_ELE)
+
+        hoa_dec = (G_td.T @ Y_td.T) / T_DESIGN_11_AZI.size
+
+        if norm == "sn3d":
+            hoa_dec = hoa_dec @ np.diag(sn2n(ambi_order))
+        elif norm == "ortho":
+            hoa_dec *= np.sqrt(4 * np.pi)
+
+        if rE_weight_bool:
+            a_n = rE_weight(ambi_order)
+            nrg_pre = np.sqrt(len(n_ls_woLFE) / np.sum(a_n**2))
+            hoa_dec = hoa_dec @ np.diag(a_n) * nrg_pre
+
+        hoa_dec = np.insert(hoa_dec, cba.lfe_index, np.zeros(n_harm), axis=0)
+
+    return hoa_dec
diff --git a/item_generation_scripts/audiotools/metadata.py b/item_generation_scripts/audiotools/metadata.py
new file mode 100644
index 00000000..0a4631ae
--- /dev/null
+++ b/item_generation_scripts/audiotools/metadata.py
@@ -0,0 +1,571 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+import csv
+from pathlib import Path
+from typing import Optional, TextIO, Tuple, Union
+
+import numpy as np
+
+from item_generation_scripts.audiotools import audio
+from item_generation_scripts.audiotools.audio import fromtype
+from item_generation_scripts.audiotools.audioarray import trim
+from item_generation_scripts.audiotools.audiofile import read
+from item_generation_scripts.audiotools.constants import IVAS_FRAME_LEN_MS
+
+
+class Metadata:
+    def __init__(self, meta_file: Union[str, Path]):
+        self.meta_file = Path(meta_file)
+
+        if not self.meta_file.exists():
+            raise FileNotFoundError(
+                f"Scene description file {self.meta_file} does not exist!"
+            )
+
+        with open(self.meta_file) as f:
+            audio_file = self.meta_file.parent.joinpath(f.readline().strip()).absolute()
+
+            if audio_file.suffix != ".wav":
+                raise ValueError(
+                    "Scene description files can only be used with WAVE input!"
+                )
+
+            self.audio_array, self.fs = read(audio_file)
+            self.audio = []
+
+            num_audio = int(f.readline().strip())
+            for _ in range(num_audio):
+                in_fmt = f.readline().strip().upper()
+
+                if in_fmt == "ISM":
+                    self.parse_ism_input(f)
+                elif in_fmt == "MASA":
+                    self.parse_masa_input(f)
+                elif in_fmt == "MC":
+                    self.parse_mc_input(f)
+                elif in_fmt == "SBA":
+                    self.parse_sba_input(f)
+                else:
+                    raise KeyError(f"Unknown input type in metadata file {in_fmt}")
+
+    def parse_ism_input(self, f: TextIO):
+        start = int(f.readline().strip()) - 1
+
+        ism = fromtype("ISM1")
+        ism.audio = self.audio_array[:, start : start + 1]
+        ism.fs = self.fs
+
+        line = f.readline().strip()
+        tmp_path = self.meta_file.parent.joinpath(line).absolute()
+        if tmp_path.exists():
+            # csv metadata
+            ism.metadata_files = [tmp_path]
+            ism.init_metadata()
+        else:
+            # manually specified metadata
+            positions = [f.readline().strip() for _ in range(int(line))]
+            positions = np.genfromtxt(
+                positions, delimiter=","
+            )  # TODO can use ndmin = 2 with numpy > 1.23.0; check support
+            if positions.ndim == 1:
+                positions = positions[np.newaxis, :]
+
+            obj_pos = []
+            # repeat based on first column
+            for p in positions:
+                repeats = int(p[0])
+                obj_pos.append(np.tile(p[1:], [repeats, 1]))
+            obj_pos = np.vstack(obj_pos)
+
+            ism.object_pos = [obj_pos]
+
+        self.audio.append(ism)
+
+    def parse_masa_input(self, f: TextIO):
+        start = int(f.readline().strip()) - 1
+        masa_tc = int(f.readline().strip())
+
+        masa = fromtype(f"MASA{masa_tc}")
+        masa.audio = self.audio_array[:, start : start + masa_tc]
+        masa.fs = self.fs
+        masa.metadata_files = [
+            self.meta_file.parent.joinpath(f.readline().strip()).absolute()
+        ]
+        masa.init_metadata()
+
+        self.audio.append(masa)
+
+    def parse_mc_input(self, f: TextIO):
+        start = int(f.readline().strip()) - 1
+        mc_fmt = f.readline().strip()
+
+        mc = fromtype(mc_fmt)
+        mc.audio = self.audio_array[:, start : start + mc.num_channels]
+        mc.fs = self.fs
+
+        self.audio.append(mc)
+
+    def parse_sba_input(self, f: TextIO):
+        start = int(f.readline().strip()) - 1
+        sba_order = int(f.readline().strip())
+
+        sba = fromtype(f"SBA{sba_order}")
+        sba.audio = self.audio_array[:, start : start + sba.num_channels]
+        sba.fs = self.fs
+
+        self.audio.append(sba)
+
+    def parse_optional_values(self, f: TextIO):
+        raise NotImplementedError(
+            "Additional configuration keys in metadata currently unsupported!"
+        )
+
+        # opts = {}
+        # original_pos = f.tell()
+        # key_value = f.readline().strip()
+
+        # try to parse a key, otherwise reset read pointer
+        # for key in OPT_KEYS:
+        #     if key_value.startswith(key):
+        #         opts[key] = key_value.replace(key, "").replace(":", "")
+        #         original_pos = f.tell()
+        #         key_value = f.readline.strip()
+        #     else:
+        #         f.seek(original_pos)
+        #
+
+
+def write_ISM_metadata_in_file(
+    metadata: list[np.ndarray],
+    file_name: list[Union[str, Path]],
+    automatic_naming: Optional[bool] = False,
+) -> list[str, Path]:
+    """
+    Write ISM metadata into csv file(s)
+
+    Parameters
+    ----------
+    metadata: list[np.ndarray]
+        List of metadata arrays
+    file_name: list[Union[str, Path]]
+        List of file names for csv files
+    automatic_naming: Optional[bool]
+        If true files are named automatically name.0.csv, name.1.csv, ... with name as the first entry of file_name
+
+    Returns
+    ----------
+    file_names: list[str, Path]
+        List of acutally used file names
+    """
+
+    if not automatic_naming and len(metadata) != len(file_name):
+        raise ValueError("Number of metadata objects and file names has to match")
+    number_objects = len(metadata)
+
+    if automatic_naming:
+        file_names = []
+        for m_object in range(number_objects):
+            file_names.append(f"{file_name[0]}.{m_object}.csv")
+    else:
+        file_names = file_name
+
+    for i, csv_file in enumerate(file_names):
+        number_frames = metadata[i].shape[0]
+        with open(csv_file, "w", newline="") as file:
+            writer = csv.writer(file)
+            for k in range(number_frames):
+                row_list = [
+                    "%+07.2f" % np.round(metadata[i][k, 0], 2),
+                    "%+06.2f" % np.round(metadata[i][k, 1], 2),
+                    "01.00",
+                    "000.00",
+                    "1.00",
+                ]
+                writer.writerow(row_list)
+
+    return file_names
+
+
+def trim_meta(
+    x: audio.ObjectBasedAudio,
+    limits: Optional[Tuple[int, int]] = None,
+    pad_noise: Optional[bool] = False,
+    samples: Optional[bool] = False,
+) -> None:
+    """
+    Trim or pad ISM including metadata
+    positive limits trim negative limits pad
+
+    Parameters
+    ----------
+    x: audio.ObjectBasedAudio
+        ISM audio object
+    limits: Optional[Tuple[int, int]]
+        Number of samples to trim or pad at beginning and end
+    pad_noise: Optional[bool]
+        Flag for padding noise instead of silence
+    samples: Optional[bool]
+        Flag for interpreting limits as samples, otherwise milliseconds
+    """
+
+    if not limits:
+        return
+
+    frame_length = int(IVAS_FRAME_LEN_MS * x.fs // 1000)
+
+    # check if trim values are multiples of the frame length
+    if not samples:
+        pre_trim = int(limits[0] * x.fs // 1000)
+        post_trim = int(limits[1] * x.fs // 1000)
+    else:
+        pre_trim = limits[0]
+        post_trim = limits[1]
+
+    if pre_trim % frame_length != 0 or post_trim % frame_length != 0:
+        raise ValueError(
+            f"ISM metadata padding and trimming only possible if pad/trim length is multiple of frame length. "
+            f"Frame length: {IVAS_FRAME_LEN_MS}ms"
+        )
+
+    # check if audio is multiple of frame length
+    if np.shape(x.audio)[0] % frame_length != 0:
+        raise ValueError(
+            f"ISM metadata padding and trimming only possible if audio length is multiple of frame length. "
+            f"Frame length: {IVAS_FRAME_LEN_MS}ms"
+        )
+
+    # check if metadata length fits exactly to audio length
+    for meta in x.object_pos:
+        if np.shape(meta)[0] * frame_length != np.shape(x.audio)[0]:
+            raise ValueError(
+                f"ISM metadata padding and trimming only possible if audio length is multiple of frame "
+                f"length and audio and metadata length match. Frame length: {IVAS_FRAME_LEN_MS}ms"
+            )
+
+    # trim audio
+    x.audio = trim(x.audio, x.fs, limits, pad_noise, samples)
+
+    # trim metadata
+    trim_frames_pre = int(pre_trim / frame_length)
+    trim_frames_post = int(post_trim / frame_length)
+    for i in range(len(x.object_pos)):
+        x.object_pos[i] = trim(
+            x.object_pos[i],
+            limits=(trim_frames_pre, trim_frames_post),
+            pad_noise=False,
+            samples=True,
+        )
+
+        # add radius 1
+        if trim_frames_pre < 0:
+            x.object_pos[i][: abs(trim_frames_pre), 2] = 1
+        if trim_frames_post < 0:
+            x.object_pos[i][abs(trim_frames_post) :, 2] = 1
+
+    return
+
+
+def concat_meta_from_file(
+    audio_files: list[str],
+    meta_files: list[list[str]],
+    out_file: list[str],
+    input_fmt: str,
+    silence_pre: Optional[int] = 0,
+    silence_post: Optional[int] = 0,
+    preamble: Optional[int] = None,
+) -> None:
+    """
+    Concatenate ISM metadata from files
+
+    Parameters
+    ----------
+    audio_files: list[str]
+        List of audio file names
+    meta_files: list[list[str]]
+        List of corresponding metadata file names
+    out_file: list[str]
+        Name of concatenated output file
+    input_fmt: str
+        Input audio format
+    silence_pre: Optional[int]
+        Silence inserted before each item
+    silence_post: Optional[int]
+        Silence inserted after each item
+    preamble: Optional[int]
+        Length of preamble in milliseconds
+    """
+
+    # create audio objects
+    audio_objects = []
+    fs = None
+    for i, audio_file in enumerate(audio_files):
+        # metadata is cut/looped to signal length in init of audio object
+        audio_object = audio.fromfile(input_fmt, audio_file, in_meta=meta_files[i])
+        audio_objects.append(audio_object)
+        if fs:
+            if audio_object.fs != fs:
+                raise ValueError("Sampling rates of files to concatenate don't match")
+        else:
+            fs = audio_object.fs
+
+    frame_length = int(IVAS_FRAME_LEN_MS * audio_objects[0].fs // 1000)
+
+    # pad and concatenate
+    concat_meta_all_obj = [None] * audio_objects[0].num_channels
+
+    for audio_item in audio_objects:
+        # check if audio is multiple of frame length
+        if np.shape(audio_item.audio)[0] % frame_length != 0:
+            raise ValueError(
+                f"ISM metadata padding and trimming only possible if audio length is multiple of frame length. "
+                f"Frame length: {IVAS_FRAME_LEN_MS}ms"
+            )
+
+        # check if metadata length fits exactly to audio length
+        for meta in audio_item.object_pos:
+            if np.shape(meta)[0] * frame_length != np.shape(audio_item.audio)[0]:
+                raise ValueError(
+                    f"ISM metadata padding and trimming only possible if audio length is multiple of frame "
+                    f"length and audio and metadata length match. Frame length: {IVAS_FRAME_LEN_MS}ms"
+                )
+
+        # pad
+        trim_meta(
+            audio_item, (-silence_pre, -silence_post)
+        )  # use negative value since we want to pad, not trim
+
+        # concatenate
+        for idx, obj_pos in enumerate(audio_item.object_pos):
+            concat_meta_all_obj[idx] = (
+                np.concatenate([concat_meta_all_obj[idx], obj_pos])
+                if concat_meta_all_obj[idx] is not None
+                else obj_pos
+            )
+
+    # add preamble
+    if preamble:
+        concat_meta_all_obj = add_remove_preamble(concat_meta_all_obj, preamble)
+
+    write_ISM_metadata_in_file(concat_meta_all_obj, out_file)
+
+    return
+
+
+def split_meta_in_file(
+    in_filename: Union[str, Path],
+    out_folder: Union[str, Path],
+    split_filenames: list[Union[str, Path]],
+    splits: list[int],
+    input_fmt: str,
+    meta_files: Optional[list[Union[str, Path]]] = None,
+    in_fs: Optional[int] = 48000,
+    preamble: Optional[int] = 0,
+):
+    """
+    Splits ISM metadata files into multiple shorter files
+
+    Parameters
+    __________
+    in_filename: Union[str, Path]
+        Input filenmame (.pcm, .raw or .wav)
+    out_folder: Union[str, Path]
+        Output folder where to put the splits
+    split_filenames: list[Union[str, Path]]
+        List of names for the split files
+    splits: list[int]
+        List of sample indices where to cut the signal
+    in_fs: Optional[int]
+        Input sampling rate, default 48000 Hz
+    """
+
+    # create a list of output files
+    out_paths = []
+
+    # Read input file by creating ISM audio object
+    audio_object = audio.fromfile(input_fmt, in_filename, in_meta=meta_files, fs=in_fs)
+
+    split_old = 0
+    for idx, split in enumerate(splits):
+        out_paths_obj = []
+        for obj in range(audio_object.num_channels):
+            out_file = (
+                Path(out_folder)
+                / f"{Path(split_filenames[idx]).with_suffix(in_filename.suffix)}.{obj}.csv"
+            )
+
+            # add the path to our list
+            out_paths_obj.append(out_file)
+
+            # remove preamble
+            if preamble:
+                preamble_frames = int(preamble / IVAS_FRAME_LEN_MS)
+                y = trim(
+                    audio_object.object_pos[obj],
+                    audio_object.fs,
+                    (preamble_frames, 0),
+                    samples=True,
+                )
+            else:
+                y = audio_object.object_pos[obj]
+
+            # split
+            split_start = int(split_old / IVAS_FRAME_LEN_MS / audio_object.fs * 1000)
+            split_end = int(split / IVAS_FRAME_LEN_MS / audio_object.fs * 1000)
+            y = y[split_start:split_end, :]
+
+            # write file
+            write_ISM_metadata_in_file([y], [out_file])
+
+        out_paths.append(out_paths_obj)
+
+        split_old = split
+
+    return out_paths
+
+
+def check_ISM_metadata(
+    in_meta: dict,
+    num_objects: int,
+    num_items: int,
+    item_names: Optional[list] = None,
+) -> list:
+    """Find ISM metadata"""
+
+    list_meta = []
+    if in_meta is None:
+        for item in item_names:
+            list_item = metadata_search(Path(item).parent, [item], num_objects)
+            list_meta.append(list_item)
+    else:
+        if len(in_meta) == 1 and num_items != 1:
+            # automatic search for metadata files in folder for all items and objects
+            try:
+                path_meta = in_meta["all_items"]
+            except KeyError:
+                raise ValueError(
+                    'Only one metadata path is given but not with key "all_items".'
+                )
+
+            list_meta = metadata_search(path_meta, item_names, num_objects)
+
+        elif num_items == len(in_meta):
+            # search for every item individually
+            for item_idx in range(num_items):
+                # try to use item_names as keys
+                try:
+                    if item_names:
+                        current_item = in_meta[item_names[item_idx].name]
+                    else:
+                        raise KeyError
+                except KeyError:
+                    current_item = in_meta[f"item{item_idx + 1}"]
+
+                if len(current_item) == 1:
+                    # automatic search in folder
+                    list_item = metadata_search(
+                        current_item[0], [item_names[item_idx]], num_objects
+                    )
+
+                elif len(current_item) == num_objects:
+                    # just read out
+                    list_item = current_item
+                else:
+                    raise ValueError("Number of objects and metadata does not match.")
+                list_meta.append(list_item)
+        else:
+            raise ValueError("Number of metadata inputs does not match number of items")
+
+    # return list of lists of metadata files
+    return list_meta
+
+
+def metadata_search(
+    in_meta_path: Union[str, Path],
+    item_names: list[Union[str, Path]],
+    num_objects: int,
+) -> list[list[Union[Path, str]]]:
+    """Search for ISM metadata with structure item_name.{0-3}.csv in in_meta folder"""
+
+    if not item_names:
+        raise ValueError("Item names not provided, can't search for metadata")
+
+    list_meta = []
+    for item in item_names:
+        list_item = []
+        for obj_idx in range(num_objects):
+            file_name_meta = in_meta_path / Path(item.stem).with_suffix(
+                f"{item.suffix}.{obj_idx}.csv"
+            )
+            # check if file exists and add to list
+            if file_name_meta.is_file():
+                list_item.append(file_name_meta)
+            else:
+                raise ValueError(f"Metadata file {file_name_meta} not found.")
+        if len(item_names) == 1:
+            list_meta = list_item
+        else:
+            list_meta.append(list_item)
+
+    return list_meta
+
+
+def add_remove_preamble(
+    metadata,
+    preamble,
+    add: Optional[bool] = True,
+):
+    preamble_frames = preamble / IVAS_FRAME_LEN_MS
+    if not preamble_frames.is_integer():
+        raise ValueError(
+            f"Application of preamble for ISM metadata is only possible if preamble length is multiple of frame length. "
+            f"Frame length: {IVAS_FRAME_LEN_MS}ms"
+        )
+    for obj_idx in range(len(metadata)):
+        if metadata is not None and metadata[obj_idx] is not None:
+            if add:
+                metadata[obj_idx] = trim(
+                    metadata[obj_idx],
+                    limits=(-int(preamble_frames), 0),
+                    samples=True,
+                )
+
+                # add radius 1
+                metadata[obj_idx][: int(preamble_frames), 2] = 1
+            else:
+                metadata[obj_idx] = trim(
+                    metadata[obj_idx],
+                    limits=(int(preamble_frames), 0),
+                    samples=True,
+                )
+
+    return metadata
diff --git a/item_generation_scripts/audiotools/rotation.py b/item_generation_scripts/audiotools/rotation.py
new file mode 100644
index 00000000..742548a8
--- /dev/null
+++ b/item_generation_scripts/audiotools/rotation.py
@@ -0,0 +1,379 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+from typing import Optional, Tuple
+
+import numpy as np
+
+"""
+Helper functions used by Ruedenberg,
+an implementation of the algorithm in
+Ivanic, J. & Ruedenberg, K., J. Phys. Chem. 100, 6342 (1996)
+translated from  ivas_rotation.c
+"""
+
+
+def SHrot_p(
+    i: int,
+    l: int,
+    a: int,
+    b: int,
+    SHrotmat: np.ndarray,
+    R_lm1: np.ndarray,
+) -> float:
+    """Helper function to calculate the ps"""
+
+    ri1 = SHrotmat[i + 1 + 1][1 + 1 + 1]
+    rim1 = SHrotmat[i + 1 + 1][-1 + 1 + 1]
+    ri0 = SHrotmat[i + 1 + 1][0 + 1 + 1]
+
+    if b == -l:
+        R_lm1_1 = R_lm1[a + l - 1][0]
+        R_lm1_2 = R_lm1[a + l - 1][2 * l - 2]
+        p = ri1 * R_lm1_1 + rim1 * R_lm1_2
+    else:
+        if b == l:
+            R_lm1_1 = R_lm1[a + l - 1][2 * l - 2]
+            R_lm1_2 = R_lm1[a + l - 1][0]
+            p = ri1 * R_lm1_1 - rim1 * R_lm1_2
+        else:
+            R_lm1_1 = R_lm1[a + l - 1][b + l - 1]
+            p = ri0 * R_lm1_1
+
+    return p
+
+
+def SHrot_u(
+    l: int,
+    m: int,
+    n: int,
+    SHrotmat: np.ndarray,
+    R_lm1: np.ndarray,
+) -> float:
+    """Helper function to calculate the us"""
+
+    return SHrot_p(0, l, m, n, SHrotmat, R_lm1)
+
+
+def SHrot_v(
+    l: int,
+    m: int,
+    n: int,
+    SHrotmat: np.ndarray,
+    R_lm1: np.ndarray,
+) -> float:
+    """Helper function to calculate the vs"""
+
+    if m == 0:
+        p0 = SHrot_p(1, l, 1, n, SHrotmat, R_lm1)
+        p1 = SHrot_p(-1, l, -1, n, SHrotmat, R_lm1)
+        return p0 + p1
+    else:
+        if m > 0:
+            d = 1.0 if (m == 1) else 0.0
+            p0 = SHrot_p(1, l, m - 1, n, SHrotmat, R_lm1)
+            p1 = SHrot_p(-1, l, -m + 1, n, SHrotmat, R_lm1)
+            return p0 * np.sqrt(1.0 + d) - p1 * (1.0 - d)
+        else:
+            d = 1.0 if (m == -1) else 0.0
+            p0 = SHrot_p(1, l, m + 1, n, SHrotmat, R_lm1)
+            p1 = SHrot_p(-1, l, -m - 1, n, SHrotmat, R_lm1)
+            return p0 * (1.0 - d) + p1 * np.sqrt(1.0 + d)
+
+
+def SHrot_w(
+    l: int,
+    m: int,
+    n: int,
+    SHrotmat: np.ndarray,
+    R_lm1: np.ndarray,
+) -> float:
+    """Helper function to calculate the w"""
+
+    if m == 0:
+        raise ValueError("ERROR should not be called\n")
+    else:
+        if m > 0:
+            p0 = SHrot_p(1, l, m + 1, n, SHrotmat, R_lm1)
+            p1 = SHrot_p(-1, l, -m - 1, n, SHrotmat, R_lm1)
+            return p0 + p1
+        else:
+            p0 = SHrot_p(1, l, m - 1, n, SHrotmat, R_lm1)
+            p1 = SHrot_p(-1, l, -m + 1, n, SHrotmat, R_lm1)
+            return p0 - p1
+
+
+def SHrotmatgen(
+    R: np.ndarray,
+    order: Optional[int] = 3,
+) -> np.ndarray:
+    """
+    Calculate SHD rotation matrix from that in real space
+    translated from ivas_rotation.c
+
+    Parameters:
+    ----------
+    R: np.ndarray
+        real-space rotation matrix
+    order: Optional[int]
+        Ambisonics order, default = 3
+
+    Returns:
+    ----------
+    SHrotmat: np.ndarray
+       SHD rotation matrix
+    """
+
+    dim = (order + 1) * (order + 1)
+
+    SHrotmat = np.zeros([dim, dim])
+    R_lm1 = np.zeros([dim, dim])
+    R_l = np.zeros([dim, dim])
+
+    SHrotmat[0][0] = 1.0
+
+    SHrotmat[1][1] = R[1][1]
+    SHrotmat[1][2] = R[1][2]
+    SHrotmat[1][3] = R[1][0]
+
+    SHrotmat[2][1] = R[2][1]
+    SHrotmat[2][2] = R[2][2]
+    SHrotmat[2][3] = R[2][0]
+
+    SHrotmat[3][1] = R[0][1]
+    SHrotmat[3][2] = R[0][2]
+    SHrotmat[3][3] = R[0][0]
+
+    for i in range(2 * 1 + 1):
+        for j in range(2 * 1 + 1):
+            R_lm1[i][j] = SHrotmat[i + 1][j + 1]
+
+    band_idx = 4
+    for l in range(2, order + 1):
+        R_l[:, :] = 0.0
+
+        for m in range(-l, l + 1):
+            d = 1 if (m == 0) else 0
+            absm = abs(m)
+            sql2mm2 = np.sqrt((l * l - m * m))
+            sqdabsm = np.sqrt(((1 + d) * (l + absm - 1) * (l + absm)))
+            sqlabsm = np.sqrt(((l - absm - 1) * (l - absm)))
+
+            for n in range(-l, l + 1):
+                if abs(n) == l:
+                    sqdenom = np.sqrt((2 * l) * (2 * l - 1))
+                else:
+                    sqdenom = np.sqrt(l * l - n * n)
+
+                u = sql2mm2 / sqdenom
+                v = sqdabsm / sqdenom * (1 - 2 * d) * 0.5
+                w = sqlabsm / sqdenom * (1 - d) * (-0.5)
+
+                if u != 0:
+                    u = u * SHrot_u(l, m, n, SHrotmat, R_lm1)
+                if v != 0:
+                    v = v * SHrot_v(l, m, n, SHrotmat, R_lm1)
+                if w != 0:
+                    w = w * SHrot_w(l, m, n, SHrotmat, R_lm1)
+                R_l[m + l][n + l] = u + v + w
+
+        for i in range(2 * l + 1):
+            for j in range(2 * l + 1):
+                SHrotmat[band_idx + i][band_idx + j] = R_l[i][j]
+
+        for i in range(2 * l + 1):
+            for j in range(2 * l + 1):
+                R_lm1[i][j] = R_l[i][j]
+
+        band_idx += 2 * l + 1
+
+    return SHrotmat
+
+
+def Quat2Euler(
+    quat: np.ndarray,
+    degrees: bool = True,
+) -> np.ndarray:
+    """Convert Quaternion to Euler angles"""
+
+    sinr = +2.0 * (quat[..., 0] * quat[..., 1] + quat[..., 2] * quat[..., 3])
+    cosr = +1.0 - 2.0 * (quat[..., 1] * quat[..., 1] + quat[..., 2] * quat[..., 2])
+    roll = np.arctan2(sinr, cosr)
+
+    sinp = +2.0 * (quat[..., 0] * quat[..., 2] - quat[..., 3] * quat[..., 1])
+    pitch = np.where(np.fabs(sinp) >= 1, np.copysign(np.pi / 2, sinp), np.arcsin(sinp))
+
+    siny = +2.0 * (quat[..., 0] * quat[..., 3] + quat[..., 1] * quat[..., 2])
+    cosy = +1.0 - 2.0 * (quat[..., 2] * quat[..., 2] + quat[..., 3] * quat[..., 3])
+    yaw = np.arctan2(siny, cosy)
+
+    ypr = np.array([yaw, pitch, roll]).T
+
+    if degrees:
+        ypr = np.rad2deg(ypr)
+
+    return ypr
+
+
+def Euler2Quat(
+    ypr: np.ndarray,
+    degrees: bool = True,
+) -> np.ndarray:
+    """Convert Euler angles to Quaternion"""
+
+    if degrees:
+        ypr = np.deg2rad(ypr)
+
+    if len(ypr.shape) == 2:
+        N_quat = ypr.shape[0]
+        quat = np.zeros([N_quat, 4])
+        yaw = ypr[:, 0]
+        pitch = ypr[:, 1]
+        roll = ypr[:, 2]
+    else:
+        quat = np.zeros([4])
+        yaw = ypr[0]
+        pitch = ypr[1]
+        roll = ypr[2]
+
+    c1 = np.cos(0.5 * yaw)
+    c2 = np.cos(0.5 * pitch)
+    c3 = np.cos(0.5 * roll)
+
+    s1 = np.sin(0.5 * yaw)
+    s2 = np.sin(0.5 * pitch)
+    s3 = np.sin(0.5 * roll)
+
+    quat[..., 0] = c3 * c2 * c1 + s3 * s2 * s1
+    quat[..., 1] = s3 * c2 * c1 - c3 * s2 * s1
+    quat[..., 2] = s3 * c2 * s1 + c3 * s2 * c1
+    quat[..., 3] = c3 * c2 * s1 - s3 * s2 * c1
+
+    return quat
+
+
+def Quat2RotMat(
+    quat: np.ndarray,
+) -> np.ndarray:
+    """Convert quaternion to rotation matrix"""
+
+    R = np.zeros([3, 3])
+
+    if quat[0] != -3:
+        # Quaternions
+        # formula taken from ivas_rotation.c
+
+        R[0, 0] = (
+            quat[0] * quat[0]
+            + quat[1] * quat[1]
+            - quat[2] * quat[2]
+            - quat[3] * quat[3]
+        )
+        R[0, 1] = 2.0 * (quat[1] * quat[2] - quat[0] * quat[3])
+        R[0, 2] = 2.0 * (quat[1] * quat[3] + quat[0] * quat[2])
+
+        R[1, 0] = 2.0 * (quat[1] * quat[2] + quat[0] * quat[3])
+        R[1, 1] = (
+            quat[0] * quat[0]
+            - quat[1] * quat[1]
+            + quat[2] * quat[2]
+            - quat[3] * quat[3]
+        )
+        R[1, 2] = 2.0 * (quat[2] * quat[3] - quat[0] * quat[1])
+
+        R[2, 0] = 2.0 * (quat[1] * quat[3] - quat[0] * quat[2])
+        R[2, 1] = 2.0 * (quat[2] * quat[3] + quat[0] * quat[1])
+        R[2, 2] = (
+            quat[0] * quat[0]
+            - quat[1] * quat[1]
+            - quat[2] * quat[2]
+            + quat[3] * quat[3]
+        )
+
+    else:
+        # Euler angles in R_X(roll)*R_Y(pitch)*R_Z(yaw) convention
+        #
+        #  yaw:   rotate scene counter-clockwise in the horizontal plane
+        #  pitch: rotate scene in the median plane, increase elevation with positive values
+        #  roll:  rotate scene from the right ear to the top
+        #
+        # formula taken from ivas_rotation.c
+
+        c1 = np.cos(quat[3] / 180.0 * np.pi)
+        c2 = np.cos(quat[2] / 180.0 * np.pi)
+        c3 = np.cos(quat[1] / 180.0 * np.pi)
+
+        s1 = np.sin(quat[3] / 180.0 * np.pi)
+        s2 = np.sin(-quat[2] / 180.0 * np.pi)
+        s3 = np.sin(quat[1] / 180.0 * np.pi)
+
+        R[0, 0] = c2 * c3
+        R[0, 1] = -c2 * s3
+        R[0, 2] = s2
+
+        R[1, 0] = c1 * s3 + c3 * s1 * s2
+        R[1, 1] = c1 * c3 - s1 * s2 * s3
+        R[1, 2] = -c2 * s1
+
+        R[2, 0] = s1 * s3 - c1 * c3 * s2
+        R[2, 1] = c3 * s1 + c1 * s2 * s3
+        R[2, 2] = c1 * c2
+
+    return R
+
+
+def rotateAziEle(
+    azi: float,
+    ele: float,
+    R: np.ndarray,
+    is_planar: bool = False,
+) -> Tuple[float, float]:
+    """Rotate azimuth and elevation angles with rotation matrix"""
+
+    w = np.cos(np.deg2rad(ele))
+    dv = np.array(
+        [
+            w * np.cos(np.deg2rad(azi)),
+            w * np.sin(np.deg2rad(azi)),
+            np.sin(np.deg2rad(ele)),
+        ]
+    )
+
+    dv_rot = R @ dv
+
+    azi = np.rad2deg(np.arctan2(dv_rot[1], dv_rot[0]))
+    if is_planar:
+        ele = 0
+    else:
+        ele = np.rad2deg(np.arctan2(dv_rot[2], np.sqrt(np.sum(dv_rot[:2] ** 2))))
+
+    return azi, ele
diff --git a/item_generation_scripts/audiotools/utils.py b/item_generation_scripts/audiotools/utils.py
new file mode 100644
index 00000000..6aaf5fa9
--- /dev/null
+++ b/item_generation_scripts/audiotools/utils.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+from pathlib import Path
+
+import numpy as np
+
+from item_generation_scripts.audiotools.rotation import Euler2Quat, Quat2Euler
+
+
+def read_trajectory(trj_file: Path, return_quat=True):
+    trj = np.genfromtext(trj_file, delimiter=",")
+
+    if np.all(trj[:, 0] == -3):
+        # Euler
+        if return_quat:
+            return Euler2Quat(trj[:, 1:])
+        else:
+            return trj[:, 1:]
+    else:
+        # Quat
+        if return_quat:
+            return trj
+        else:
+            return Quat2Euler(trj)
+
+
+def write_trajectory(trj, out_file, write_quat=True):
+    if trj.shape[1] == 3:
+        # Euler
+        if write_quat:
+            trj = Euler2Quat(trj)
+        else:
+            trj = np.insert(trj, 0, -3.0, axis=1)
+    elif not write_quat:
+        trj = Quat2Euler(trj)
+        trj = np.insert(trj, 0, -3.0, axis=1)
+
+    with open(out_file, "w") as f:
+        for pos in trj:
+            f.write(", ".join([f"{q:.6f}" for q in pos]))
+            f.write("\n")
diff --git a/item_generation_scripts/audiotools/wrappers/__init__.py b/item_generation_scripts/audiotools/wrappers/__init__.py
new file mode 100644
index 00000000..aea270d8
--- /dev/null
+++ b/item_generation_scripts/audiotools/wrappers/__init__.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
diff --git a/item_generation_scripts/audiotools/wrappers/bs1770.py b/item_generation_scripts/audiotools/wrappers/bs1770.py
new file mode 100644
index 00000000..d238bec3
--- /dev/null
+++ b/item_generation_scripts/audiotools/wrappers/bs1770.py
@@ -0,0 +1,291 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+import copy
+import logging
+from pathlib import Path
+from tempfile import TemporaryDirectory
+from typing import Optional, Tuple, Union
+from warnings import warn
+
+import numpy as np
+
+from item_generation_scripts.audiotools import audio, convert
+from item_generation_scripts.audiotools.audiofile import write
+from item_generation_scripts.audiotools.wrappers.filter import resample_itu
+from item_generation_scripts.constants import DEFAULT_CONFIG_BINARIES
+from item_generation_scripts.utils import find_binary, get_devnull, run
+
+logger = logging.getLogger("__main__")
+logger.setLevel(logging.DEBUG)
+
+
+def bs1770demo(
+    input: audio.Audio,
+    target_loudness: Optional[float] = -26,
+) -> Tuple[float, float]:
+    """
+    Wrapper for ITU-R BS.1770-4, requires bs1770demo binary
+
+    Parameters
+    ----------
+    input: Audio
+        Input audio
+    target_loudness: Optional[float]
+        Desired loudness in LKFS
+
+    Returns
+    -------
+    measured_loudness : float
+        Measured loudness of input
+    scale_factor: float
+        Scale factor to achieve desired loudness
+    """
+
+    null_file = get_devnull()
+
+    if "bs1770demo" in DEFAULT_CONFIG_BINARIES["binary_paths"]:
+        binary = find_binary(
+            DEFAULT_CONFIG_BINARIES["binary_paths"]["bs1770demo"].name,
+            binary_path=DEFAULT_CONFIG_BINARIES["binary_paths"]["bs1770demo"].parent,
+        )
+    else:
+        binary = find_binary("bs1770demo")
+
+    if not isinstance(input, audio.BinauralAudio) and not isinstance(
+        input, audio.ChannelBasedAudio
+    ):
+        raise NotImplementedError(f"{input.name} is unsupported in ITU-R BS.1770-4.")
+
+    if input.fs != 48000:
+        warn(
+            "ITU-R BS.1770-4 only supports 48kHz sampling rate. Temporarily resampling signal for measurement."
+        )
+        tmp_sig = resample_itu(input, 48000)
+    else:
+        tmp_sig = input.audio
+
+    with TemporaryDirectory() as tmp_dir:
+        tmp_dir = Path(tmp_dir)
+        tmp_file = tmp_dir.joinpath("tmp_loudness.pcm")
+
+        """
+        ITU-R BS.1770-4
+        """
+
+        cmd = [
+            str(binary),
+            "-nchan",
+            str(input.num_channels),  # input nchan
+            "-lev",
+            str(target_loudness),  # level
+            "-conf",
+            "",  # config string
+            str(tmp_file),
+            null_file,
+        ]
+
+        if isinstance(input, audio.BinauralAudio):
+            cmd[6] = "00"  # -conf
+        elif isinstance(input, audio.ChannelBasedAudio):
+            # if loudspeaker position fulfills the criteria, set the config string to 1 for that index
+            conf_str = [
+                str(int(abs(e) < 30 and (60 <= abs(a) <= 120)))
+                for a, e in zip(input.ls_azi, input.ls_ele)
+            ]
+            for lfe in input.lfe_index:
+                conf_str[lfe] = "L"
+
+            cmd[6] = "".join(conf_str)
+
+        # write temporary file
+        write(tmp_file, tmp_sig, 48000)
+
+        # run command
+        result = run(cmd, logger=logger)
+
+        # parse output
+        measured_loudness = float(result.stdout.splitlines()[3].split(":")[1])
+        scale_factor = float(result.stdout.splitlines()[-3].split(":")[1])
+
+    return measured_loudness, scale_factor
+
+
+def get_loudness(
+    input: audio.Audio,
+    target_loudness: Optional[float] = -26,
+    loudness_format: Optional[str] = None,
+) -> Tuple[float, float]:
+    """
+    Loudness measurement using ITU-R BS.1770-4
+
+    Parameters
+    ----------
+    input : Audio
+        Input audio
+    target_loudness: float
+        Desired loudness in LKFS
+    loudness_format: str
+        Loudness format to render to for loudness computation (default input format if possible)
+
+    Returns
+    -------
+    measured_loudness : float
+        Measured loudness (after conversion to loudness_format if specified)
+    scale_factor: float
+        Scale factor to acheive desired loudness
+    """
+
+    if target_loudness > 0:
+        raise ValueError("Desired loudness is too high!")
+
+    if loudness_format is None:
+        # for some formats rendering is necessary prior to loudness measurement
+        if isinstance(input, audio.SceneBasedAudio) or isinstance(
+            input, audio.MetadataAssistedSpatialAudio
+        ):
+            loudness_format = "7_1_4"
+        elif isinstance(input, audio.ObjectBasedAudio):
+            loudness_format = "BINAURAL"
+        elif hasattr(input, "layout_file"):
+            loudness_format = input.layout_file
+        else:
+            # default use input format
+            loudness_format = input.name
+
+    # configure intermediate format
+    tmp = audio.fromtype(loudness_format)
+    tmp.fs = input.fs
+
+    if input.name != loudness_format:
+        convert.format_conversion(input, tmp)
+    else:
+        tmp.audio = input.audio
+
+    return bs1770demo(tmp, target_loudness)
+
+
+def loudness_norm(
+    input: audio.Audio,
+    target_loudness: Optional[float] = -26,
+    loudness_format: Optional[str] = None,
+) -> np.ndarray:
+    """
+    Iterative loudness normalization using ITU-R BS.1770-4
+    Signal is iteratively scaled after rendering to the specified format
+    until loudness converges to the target value
+
+    Parameters
+    ----------
+    input : Audio
+        Input audio
+    target_loudness: Optional[float]
+        Desired loudness level in LKFS
+    loudness_format: Optional[str]
+        Loudness format to render to for loudness computation (default input format)
+
+    Returns
+    -------
+    norm : Audio
+        Normalized audio
+    """
+
+    # repeat until convergence of loudness
+    measured_loudness = np.inf
+    scale_factor = 1
+    num_iter = 1
+
+    while np.abs(measured_loudness - target_loudness) > 0.5 and num_iter < 10:
+        measured_loudness, scale_factor_new = get_loudness(
+            input, target_loudness, loudness_format
+        )
+
+        # scale input
+        input.audio *= scale_factor_new
+
+        # update scale factor
+        scale_factor *= scale_factor_new
+
+        num_iter += 1
+
+    if num_iter >= 10:
+        warn(
+            f"Loudness did not converge to desired value, stopping at: {measured_loudness:.2f}"
+        )
+
+    return input.audio
+
+
+def scale_files(
+    file_list: list[list[Union[Path, str]]],
+    fmt: str,
+    loudness: float,
+    fs: Optional[int] = 48000,
+    in_meta: Optional[list] = None,
+) -> None:
+    """
+    Scales audio files to desired loudness
+
+    Parameters
+    ----------
+    file_list : list[list[Union[Path, str]]]
+        List of file paths in a list of the condition folders
+    fmt: str
+        Audio format of files in list
+    loudness: float
+        Desired loudness level in LKFS/dBov
+    fs: Optional[int]
+        Sampling rate
+    in_meta: Optional[list]
+        Metadata for ISM with same structure as file_list but one layer more
+        for the list of metadata for one file
+    """
+
+    if fmt.startswith("ISM") and in_meta:
+        meta_bool = True
+    else:
+        in_meta = copy.copy(file_list)
+        meta_bool = False
+
+    for folder, meta_folder in zip(file_list, in_meta):
+        for file, meta in zip(folder, meta_folder):
+            # create audio object
+            if meta_bool:
+                audio_obj = audio.fromfile(fmt, file, fs, meta)
+            else:
+                audio_obj = audio.fromfile(fmt, file, fs)
+
+            # adjust loudness
+            scaled_audio = loudness_norm(audio_obj, loudness)
+
+            # write into file
+            write(file, scaled_audio, audio_obj.fs)
diff --git a/item_generation_scripts/audiotools/wrappers/eid_xor.py b/item_generation_scripts/audiotools/wrappers/eid_xor.py
new file mode 100644
index 00000000..0b807d94
--- /dev/null
+++ b/item_generation_scripts/audiotools/wrappers/eid_xor.py
@@ -0,0 +1,193 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+import os.path
+from pathlib import Path
+from typing import Optional, Union
+
+from item_generation_scripts.audiotools.wrappers.gen_patt import create_error_pattern
+from item_generation_scripts.constants import DEFAULT_CONFIG_BINARIES
+from item_generation_scripts.utils import find_binary, run
+
+
+def eid_xor(
+    error_pattern: Union[str, Path],
+    in_bitstream: Union[str, Path],
+    out_bitstream: Union[str, Path],
+) -> None:
+    """
+    Wrapper for eid-xor binary to apply error patterns for the bitstream processing
+
+    Parameters
+    ----------
+    error_pattern: Union[str, Path]
+        Path to error pattern file
+    in_bitstream: Union[str, Path]
+        Path to input bitstream file
+    out_bitstream: Union[str, Path]
+        Output path for modified bitstream
+    """
+
+    # find binary
+    if "eid-xor" in DEFAULT_CONFIG_BINARIES["binary_paths"]:
+        binary = find_binary(
+            DEFAULT_CONFIG_BINARIES["binary_paths"]["eid-xor"].name,
+            binary_path=DEFAULT_CONFIG_BINARIES["binary_paths"]["eid-xor"].parent,
+        )
+    else:
+        binary = find_binary("eid-xor")
+
+    # check for valid inputs
+    if not Path(in_bitstream).is_file():
+        raise ValueError(
+            f"Input bitstream file {in_bitstream} for bitstream processing does not exist"
+        )
+    elif not Path(error_pattern).is_file():
+        raise ValueError(
+            f"Error pattern file {error_pattern} for bitstream processing does not exist"
+        )
+
+    # set up command line
+    cmd = [
+        str(binary),
+        "-vbr",  # Enables variable bit rate operation
+        "-fer",  # Error pattern is a frame erasure pattern
+        in_bitstream,
+        error_pattern,
+        out_bitstream,
+    ]
+
+    # run command
+    run(cmd)
+
+    return
+
+
+def create_and_apply_error_pattern(
+    in_bitstream: Union[Path, str],
+    out_bitstream: Union[Path, str],
+    len_sig: int,
+    error_pattern: Optional[Union[Path, str]] = None,
+    error_rate: Optional[float] = None,
+    preamble: Optional[int] = 0,
+    master_seed: Optional[int] = 0,
+    prerun_seed: Optional[int] = 0,
+) -> None:
+    """
+    Function to create (or use existing) frame error pattern for bitstream processing
+
+    Parameters
+    ----------
+    in_bitstream: Union[Path, str]
+        Path of input bitstream
+    out_bitstream: Union[Path, str]
+        Path of output bitstream
+    len_sig: int
+        Length of signal in frames
+    error_pattern: Optional[Union[Path, str]]
+        Path to existing error pattern
+    error_rate: float
+        Error rate in percent
+    preamble: Optional[int]
+        Length of preamble in frames
+    master_seed: Optional[int]
+        Master seed for error pattern generation
+    prerun_seed: Optional[int]
+        Number of preruns in seed generation
+    """
+
+    if error_pattern is None:
+        # create error pattern
+        if error_rate is not None:
+            error_pattern = in_bitstream.parent.joinpath("error_pattern").with_suffix(
+                ".192"
+            )
+            create_error_pattern(
+                len_sig, error_pattern, error_rate, preamble, master_seed, prerun_seed
+            )
+        else:
+            raise ValueError(
+                "Either error pattern or error rate has to be specified for bitstream processing"
+            )
+    elif error_rate is not None:
+        raise ValueError(
+            "Error pattern and error rate are specified for bitstream processing. Can't use both"
+        )
+
+    # apply error pattern
+    eid_xor(error_pattern, in_bitstream, out_bitstream)
+
+    return
+
+
+def validate_error_pattern_application(
+    error_pattern: Optional[Union[Path, str]] = None,
+    error_rate: Optional[int] = None,
+) -> None:
+    """
+    Validate settings for the network simulator
+
+    Parameters
+    ----------
+    error_pattern: Optional[Union[Path, str]]
+        Path to existing error pattern
+    error_rate: Optional[int]
+        Frame error rate
+    """
+
+    if find_binary("gen-patt") is None:
+        raise FileNotFoundError(
+            "The binary gen-patt for error pattern generation was not found! Please check the configuration."
+        )
+    if find_binary("eid-xor") is None:
+        raise FileNotFoundError(
+            "The binary eid-xor for error patter application was not found! Please check the configuration."
+        )
+    if error_pattern is not None:
+        if not os.path.exists(os.path.realpath(error_pattern)):
+            raise FileNotFoundError(
+                f"The frame error profile file {error_pattern} was not found! Please check the configuration."
+            )
+        if error_rate is not None:
+            raise ValueError(
+                "Frame error pattern and error rate are specified for bitstream processing. Can't use both! Please check the configuration."
+            )
+    else:
+        if error_rate is None:
+            raise ValueError(
+                "Either error rate or error pattern has to be specified for FER bitstream processing."
+            )
+        elif error_rate < 0 or error_rate > 100:
+            raise ValueError(
+                f"Specified error rate of {error_rate}% is either too large or too small."
+            )
+    return
diff --git a/item_generation_scripts/audiotools/wrappers/esdru.py b/item_generation_scripts/audiotools/wrappers/esdru.py
new file mode 100644
index 00000000..7785a586
--- /dev/null
+++ b/item_generation_scripts/audiotools/wrappers/esdru.py
@@ -0,0 +1,130 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+from pathlib import Path
+from tempfile import TemporaryDirectory
+from typing import Optional
+
+import numpy as np
+
+from item_generation_scripts.audiotools import audio
+from item_generation_scripts.audiotools.audiofile import read, write
+from item_generation_scripts.constants import DEFAULT_CONFIG_BINARIES
+from item_generation_scripts.utils import find_binary, run
+
+
+def esdru(
+    input: audio.Audio,
+    alpha: float,
+    sf: Optional[int] = 48000,
+    e_step: Optional[float] = 0.5,
+    seed: Optional[int] = 1,
+) -> np.ndarray:
+    """
+    Wrapper for ESDRU (Ericsson spatial distortion reference unit) Recommendation ITU-T P.811, requires esdru binary
+
+    Parameters
+    ----------
+    input : Audio
+        Input audio (16 bit Stereo PCM)
+    alpha: float
+        Alpha value [0.0 ... 1.0]
+    sf: Optional[int]
+        Sampling frequency FS Hz (Default: 48000 Hz)
+    e_step: Optional[float]
+        Max step S during high energy [0.0 ... 1.0] (Default: 0.5)
+    seed: Optional[int]
+        Set random seed I [unsigned int] (Default: 1)
+
+    Returns
+    -------
+    output: np.ndarray
+        Output array (16 bit Stereo PCM)
+    """
+    if "esdru" in DEFAULT_CONFIG_BINARIES["binary_paths"]:
+        binary = find_binary(
+            DEFAULT_CONFIG_BINARIES["binary_paths"]["esdru"].name,
+            binary_path=DEFAULT_CONFIG_BINARIES["binary_paths"]["esdru"].parent,
+        )
+    else:
+        binary = find_binary("esdru")
+
+    if not isinstance(input, audio.BinauralAudio) and not input.name == "STEREO":
+        raise Exception(
+            "ESDRU condition only available for STEREO or BINAURAL output format"
+        )
+
+    if alpha < 0.0 or alpha > 1.0:
+        raise Exception(
+            "Alpha value is out of bounds. Please choose a value between 0.0 and 1.0."
+        )
+
+    if e_step < 0.0 or e_step > 1.0:
+        raise Exception(
+            "Step value is out of bounds. Please choose a value between 0.0 and 1.0."
+        )
+
+    tmp_input_signal = input.audio
+    tmp_output_signal = np.ones((48000, 2))
+
+    with TemporaryDirectory() as tmp_dir:
+        tmp_dir = Path(tmp_dir)
+        tmp_input_file = tmp_dir.joinpath("tmp_input_signal.raw")
+        tmp_output_file = tmp_dir.joinpath("tmp_output_signal.raw")
+
+        """
+        ITU-T Recommendation P.811, ESDRU
+        """
+
+        cmd = [
+            str(binary),
+            "-sf",
+            str(sf),
+            "-e_step",
+            str(e_step),
+            "-seed",
+            str(seed),
+            str(alpha),
+            str(tmp_input_file),
+            str(tmp_output_file),
+        ]
+
+        # write temporary file
+        write(tmp_input_file, tmp_input_signal, sf)
+        write(tmp_output_file, tmp_output_signal, sf)
+
+        # run command
+        run(cmd)
+
+        tmp_output_signal, out_fs = read(tmp_output_file, 2, sf)
+
+    return tmp_output_signal
diff --git a/item_generation_scripts/audiotools/wrappers/filter.py b/item_generation_scripts/audiotools/wrappers/filter.py
new file mode 100644
index 00000000..4c7b61b4
--- /dev/null
+++ b/item_generation_scripts/audiotools/wrappers/filter.py
@@ -0,0 +1,366 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+import re
+from copy import copy
+from pathlib import Path
+from tempfile import TemporaryDirectory
+from typing import Optional
+from warnings import warn
+
+import numpy as np
+
+from item_generation_scripts.audiotools.audio import Audio, ChannelBasedAudio
+from item_generation_scripts.audiotools.audioarray import delay_compensation
+from item_generation_scripts.audiotools.audiofile import read, write
+from item_generation_scripts.constants import DEFAULT_CONFIG_BINARIES
+from item_generation_scripts.utils import find_binary, run
+
+FILTER_TYPES_REGEX = r"[\n][\s]{3}[A-Z0-9]\w+\s+"
+
+
+def filter_itu(
+    input: Audio,
+    flt_type: str,
+    block_size: Optional[int] = None,
+    mod: Optional[bool] = False,
+    up: Optional[bool] = False,
+    down: Optional[bool] = False,
+    is_async: Optional[bool] = False,
+    delay: Optional[int] = None,
+    skip_channel: Optional[list[int]] = None,
+) -> np.ndarray:
+    """
+    Low-pass filter a multi-channel audio array
+
+    Parameters
+    ----------
+    input: Audio
+        Input array
+    flt_type: str
+        Name of filter type used for filtering
+    block_size: Optional[int]
+        Processing block size in number of samples (default 256 samples)
+    mod: Optional[bool]
+        Flag for using the modified IRS characteristic
+    up: Optional[bool]
+        Flag for up-sampling
+    down: Optional[bool]
+        Flag for down-sampling
+    is_async: Optional[bool]
+        Flag for asynchronization operation
+    delay: Optional[int]
+        Delay in number of samples
+    skip_channel: Optional[list[int]]
+        List of channel indices which should not be filtered
+
+    Returns
+    -------
+    output: np.ndarray
+        Output filtered array
+    """
+
+    if "filter" in DEFAULT_CONFIG_BINARIES["binary_paths"]:
+        binary = find_binary(
+            DEFAULT_CONFIG_BINARIES["binary_paths"]["filter"].name,
+            binary_path=DEFAULT_CONFIG_BINARIES["binary_paths"]["filter"].parent,
+        )
+    else:
+        binary = find_binary("filter")
+
+    # check if filter type is supported
+    tmp = run([binary], check=False)
+
+    FILTER_TYPES = [
+        f.group().strip() for f in re.finditer(FILTER_TYPES_REGEX, tmp.stdout)
+    ]
+
+    if flt_type not in FILTER_TYPES:
+        raise ValueError(
+            f"Filter type {flt_type} does not seem to be supported by the binary: {FILTER_TYPES}"
+        )
+
+    # create command line
+    cmd = [
+        binary,
+        "-q",
+    ]
+
+    if mod:
+        cmd.append("-mod")
+    if up and down:
+        raise ValueError("Either up-sampling or down-sampling has to be chosen")
+    if up:
+        cmd.append("-up")
+    elif down:
+        cmd.append("-down")
+    if is_async:
+        cmd.append("-async")
+    if delay:
+        cmd.extend(["-delay", str(delay)])
+
+    cmd.append(str(flt_type))
+
+    # create output array with according size
+    if up:
+        # upsampling -> size increases
+        if flt_type == "SHQ2":
+            output = np.zeros((np.shape(input.audio)[0] * 2, np.shape(input.audio)[1]))
+        elif flt_type == "SHQ3":
+            output = np.zeros((np.shape(input.audio)[0] * 3, np.shape(input.audio)[1]))
+        else:
+            raise ValueError(f"No upsampling with {flt_type} possible")
+    elif down:
+        # downsampling -> size decreases
+        if flt_type == "SHQ2":
+            output = np.zeros(
+                (int(np.ceil(np.shape(input.audio)[0] / 2)), np.shape(input.audio)[1])
+            )
+        elif flt_type == "SHQ3":
+            output = np.zeros(
+                (int(np.ceil(np.shape(input.audio)[0] / 3)), np.shape(input.audio)[1])
+            )
+        else:
+            raise ValueError(f"No downsampling with {flt_type} possible")
+    else:
+        # normal filtering -> size remains
+        output = np.zeros_like(input.audio)
+
+    with TemporaryDirectory() as tmp_dir:
+        tmp_dir = Path(tmp_dir)
+
+        # process channels separately
+        for channel in range(input.num_channels):
+            if skip_channel and channel in skip_channel:
+                continue
+
+            cmd_in_out = cmd.copy()
+
+            tmp_in = tmp_dir.joinpath(f"tmp_filterIn{channel}.pcm")
+            tmp_out = tmp_dir.joinpath(f"tmp_filterOut{channel}.pcm")
+
+            cmd_in_out.append(str(tmp_in))
+            cmd_in_out.append(str(tmp_out))
+
+            if block_size:
+                cmd_in_out.append(str(block_size))
+
+            write(tmp_in, input.audio[:, channel], input.fs)
+
+            run(cmd_in_out)
+
+            a, _ = read(tmp_out, nchannels=1, fs=input.fs)
+            output[:, channel][:, None] = a
+
+    return output
+
+
+def lpfilter_itu(
+    x: Audio,
+    fc: int,
+) -> np.ndarray:
+    """
+    Low-pass filter a multi-channel audio array
+
+    Parameters
+    ----------
+    x: Audio
+        Input audio
+    fc: int
+        Cut-off frequency in Hz
+
+    Returns
+    -------
+    y: np.ndarray
+        Output low-pass filtered array
+    """
+
+    # find right filter type for cut-off frequency
+    flt_types = ["LP1p5", "LP35", "LP7", "LP10", "LP12", "LP14", "LP20"]
+    flt_vals = [1500, 3500, 7000, 10000, 12000, 14000, 20000]
+    try:
+        flt_type = flt_types[flt_vals.index(fc)]
+    except Exception:
+        raise ValueError(f"LP cut-off frequency {fc}Hz not supported.")
+
+    # resample if samplingrate is not supported
+    old_fs = None
+    tmp = copy(x)
+    if x.fs != 48000:
+        warn(
+            f"Filter type {flt_type} only supported for 48kHz samplingrate, not for {x.fs}Hz -> resampling"
+        )
+        old_fs = x.fs
+        tmp.audio = resample_itu(tmp, 48000)
+        tmp.fs = 48000
+
+    # apply filter
+    y = filter_itu(tmp, flt_type=flt_type, block_size=960)
+
+    # delay compensation
+    y = delay_compensation(y, flt_type=flt_type, fs=tmp.fs)
+
+    # reverse resampling
+    if old_fs:
+        tmp.audio = y
+        y = resample_itu(tmp, old_fs)
+
+    return y
+
+
+def hp50filter_itu(
+    x: Audio,
+) -> np.ndarray:
+    """
+    High-pass 50Hz filter a multi-channel audio array
+
+    Parameters
+    ----------
+    x: Audio
+        Input audio
+
+    Returns
+    -------
+    y: np.ndarray
+        Output high-pass filtered array
+    """
+
+    # set filter type and check if sampling rate is supported
+    old_fs = None
+    tmp = copy(x)
+    if x.fs == 48000:
+        flt_type = "HP50_48KHZ"
+    elif x.fs == 32000:
+        flt_type = "HP50_32KHZ"
+    else:
+        # resample if samplingrate is not supported
+        warn(
+            f"Filter type HP50 only supported for 48kHz and 32kHz samlingrate, not for {x.fs}Hz -> resampling"
+        )
+        flt_type = "HP50_48KHZ"
+        old_fs = x.fs
+        tmp.audio = resample_itu(tmp, 48000)
+        tmp.fs = 48000
+
+    # don't apply high-pass filtering to LFE channel
+    if isinstance(x, ChannelBasedAudio):
+        skip_channel = x.lfe_index
+    else:
+        skip_channel = None
+
+    # apply filter
+    y = filter_itu(tmp, flt_type=flt_type, skip_channel=skip_channel)
+
+    # delay compensation
+    y = delay_compensation(y, flt_type=flt_type, fs=tmp.fs)
+
+    # reverse resampling
+    if old_fs:
+        tmp.audio = y
+        y = resample_itu(tmp, old_fs)
+
+    return y
+
+
+def resample_itu(
+    x: Audio,
+    fs_new: int,
+) -> np.ndarray:
+    """
+    Resampling of multi-channel audio array
+
+    Parameters
+    ----------
+    x: Audio
+        Input audio
+    fs_new: int
+        Target sampling rate in Hz
+
+    Returns
+    -------
+    y: np.ndarray
+        Output resampled array
+    """
+
+    fs_old = x.fs
+
+    # if samplingrate is the same do nothing
+    if fs_new == fs_old:
+        return x.audio
+
+    ratio_fs = fs_new / fs_old
+    up = [False]
+    down = [False]
+
+    # select suitable processing to achieve target samplingrate
+    if ratio_fs == 2:
+        flt_type = ["SHQ2"]
+        up = [True]
+    elif ratio_fs == 0.5:
+        flt_type = ["SHQ2"]
+        down = [True]
+    elif ratio_fs == 3:
+        flt_type = ["SHQ3"]
+        up = [True]
+    elif ratio_fs == 1 / 3:
+        flt_type = ["SHQ3"]
+        down = [True]
+    elif ratio_fs == 2 / 3:
+        flt_type = ["SHQ2", "SHQ3"]
+        up = [True, False]
+        down = [False, True]
+    elif ratio_fs == ratio_fs == 3 / 2:
+        flt_type = ["SHQ3", "SHQ2"]
+        up = [True, False]
+        down = [False, True]
+    else:
+        raise ValueError("Ratio of input and output sampling frequency not supported")
+
+    # apply filter
+    y = copy(x)
+    for i, flt in enumerate(flt_type):
+        y.audio = filter_itu(y, flt_type=flt, up=up[i], down=down[i])
+        y.audio = delay_compensation(
+            y.audio, flt_type=flt, fs=y.fs, up=up[i], down=down[i]
+        )
+        # if up[i]:
+        #     if flt == "SHQ2":
+        #         y.fs = y.fs * 2
+        #     elif flt == "SHQ3":
+        #         y.fs = y.fs * 3
+        # elif down[i]:
+        #     if flt == "SHQ2":
+        #         y.fs = int(y.fs / 2)
+        #     elif flt == "SHQ3":
+        #         y.fs = int(y.fs / 3)
+
+    return y.audio
diff --git a/item_generation_scripts/audiotools/wrappers/gen_patt.py b/item_generation_scripts/audiotools/wrappers/gen_patt.py
new file mode 100644
index 00000000..a68706a7
--- /dev/null
+++ b/item_generation_scripts/audiotools/wrappers/gen_patt.py
@@ -0,0 +1,171 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+from os import getcwd
+from pathlib import Path
+from tempfile import TemporaryDirectory
+from typing import Optional, Union
+
+from item_generation_scripts.audiotools.wrappers.random_seed import random_seed
+from item_generation_scripts.constants import DEFAULT_CONFIG_BINARIES
+from item_generation_scripts.utils import find_binary, run
+
+ERROR_PATTERNS_DIR = Path(__file__).parent.parent.parent.joinpath("error_patterns")
+
+
+def gen_patt(
+    len_sig: int,
+    path_pattern: Union[Path, str],
+    error_rate: float,
+    start: Optional[int] = 0,
+    working_dir: Optional[Union[Path, str]] = None,
+) -> None:
+    """
+    Wrapper for gen-patt binary to create error patterns for the bitstream processing
+
+    Parameters
+    ----------
+    len_sig: int
+       Length of signal in frames
+    path_pattern: Union[Path, str]
+        Path of output pattern
+    error_rate: float
+        Error rate in percent
+    start: Optional[int]
+        Start frame of error pattern (length preamble)
+    working_dir: Optional[Union[Path, str]]
+        Directory where binary should be called (sta file has to be in this dir if desired)
+    """
+
+    # find binary
+    if "gen-patt" in DEFAULT_CONFIG_BINARIES["binary_paths"]:
+        binary = find_binary(
+            DEFAULT_CONFIG_BINARIES["binary_paths"]["gen-patt"].name,
+            binary_path=DEFAULT_CONFIG_BINARIES["binary_paths"]["gen-patt"].parent,
+        )
+    else:
+        binary = find_binary("gen-patt")
+
+    if working_dir is None:
+        working_dir = getcwd()
+
+    # set up command line
+    cmd = [
+        str(binary),
+        "-tailstat",  # Statistics performed on the tail
+        "-fer",  # Frame erasure mode using Gilbert model
+        "-g192",  # Save error pattern in 16-bit G.192 format
+        "-gamma",  # Correlation for BER|FER modes
+        str(0),
+        "-rate",
+        str(error_rate / 100),
+        "-tol",  # Max deviation of specified BER/FER/BFER
+        str(0.001),
+        "-reset",  # Reset EID state in between iteractions
+        "-n",
+        str(int(len_sig)),
+        "-start",
+        str(int(start) + 1),
+        path_pattern,
+    ]
+
+    # run command
+    run(cmd, cwd=working_dir)
+
+    return
+
+
+def create_error_pattern(
+    len_sig: int,
+    path_pattern: Union[Path, str],
+    frame_error_rate: float,
+    preamble: Optional[int] = 0,
+    master_seed: Optional[int] = 0,
+    prerun_seed: Optional[int] = 0,
+) -> None:
+    """
+    Creates error pattern with desired frame error rate for bitstream processing
+
+    Parameters
+    ----------
+    len_sig: int
+       Length of signal in frames
+    path_pattern: Union[Path, str]
+        Path of output pattern
+    frame_error_rate: float
+        Error rate in percent
+    preamble: Optional[int]
+        Length of preamble in frames
+    master_seed: Optional[int]
+        Master seed for error pattern generation
+    prerun_seed: optional[int]
+        Number of preruns in seed generation
+    """
+
+    with TemporaryDirectory() as tmp_dir:
+        tmp_dir = Path(tmp_dir)
+
+        sta_file = ERROR_PATTERNS_DIR.joinpath("sta_template")
+        tmp_sta_file = tmp_dir.joinpath("sta")
+
+        # compute seed
+        seed = random_seed((0, 99999999), master_seed, prerun_seed)
+
+        # open file and modify
+        lines = []
+        with open(sta_file, "r") as sta_file_txt:
+            lines.append(sta_file_txt.readline())  # not changed
+            lines.append(f"{sta_file_txt.readline()[:-2]}{frame_error_rate/100}\n")
+            lines.append(sta_file_txt.readline())  # not changed
+            lines.append(f"{sta_file_txt.readline()[:-2]}{seed}\n")
+            lines.append(sta_file_txt.readline())  # not changed
+            lines.append(
+                f"{sta_file_txt.readline()[:-2]}{1-(frame_error_rate/100*2)}\n"
+            )
+            lines.append(sta_file_txt.readline())  # not changed
+            lines.append(
+                f"{sta_file_txt.readline()[:-2]}{1-(frame_error_rate/100*2)}\n"
+            )
+            lines.append(sta_file_txt.readline())  # not changed
+
+        with open(tmp_sta_file, "w") as tmp_sta_file_txt:
+            tmp_sta_file_txt.write("".join(lines))
+
+        gen_patt(
+            len_sig=len_sig,
+            error_rate=frame_error_rate,
+            path_pattern=path_pattern,
+            start=preamble,
+            working_dir=tmp_dir,
+        )
+
+    return
diff --git a/item_generation_scripts/audiotools/wrappers/masaRenderer.py b/item_generation_scripts/audiotools/wrappers/masaRenderer.py
new file mode 100644
index 00000000..a5987b1e
--- /dev/null
+++ b/item_generation_scripts/audiotools/wrappers/masaRenderer.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+from pathlib import Path
+from tempfile import TemporaryDirectory
+
+import numpy as np
+
+from item_generation_scripts.audiotools import audio
+from item_generation_scripts.audiotools.audiofile import read, write
+from item_generation_scripts.audiotools.wrappers.filter import resample_itu
+from item_generation_scripts.constants import DEFAULT_CONFIG_BINARIES
+from item_generation_scripts.utils import find_binary, run
+
+
+def masaRenderer(
+    masa: audio.MetadataAssistedSpatialAudio,
+    out_fmt: str,
+) -> np.ndarray:
+    """
+    Wrapper for masaRenderer (from MASA reference software)
+
+    Parameters
+    ----------
+    masa : MetadataAssistedSpatialAudio
+        Input MASA audio
+    out_fmt: str
+        Desired output format (only 5_1, 7_1_4 and BINAURAL supported)
+
+    Returns
+    -------
+    output : np.ndarray
+        MASA rendered to out_fmt
+    """
+
+    if "masaRenderer" in DEFAULT_CONFIG_BINARIES["binary_paths"]:
+        binary = find_binary(
+            DEFAULT_CONFIG_BINARIES["binary_paths"]["masaRenderer"].name,
+            binary_path=DEFAULT_CONFIG_BINARIES["binary_paths"]["masaRenderer"].parent,
+        )
+    else:
+        binary = find_binary("masaRenderer")
+
+    if out_fmt not in ["5_1", "7_1_4", "BINAURAL"]:
+        raise ValueError(f"Output format {out_fmt} is not supported by MasaRenderer!")
+
+    if out_fmt == "5_1":
+        output_mode = "-LS51"
+        num_channels = 6
+    elif out_fmt == "7_1_4":
+        output_mode = "-LS714"
+        num_channels = 12
+    else:
+        output_mode = "-BINAURAL"
+        num_channels = 2
+
+    cmd = [
+        str(binary),
+        output_mode,
+        "",  # 2 -> inputPcm
+        str(masa.metadata_files.resolve()),
+        "",  # 4 -> outputPcm
+    ]
+
+    with TemporaryDirectory() as tmp_dir:
+        tmp_dir = Path(tmp_dir)
+        tmp_in = tmp_dir.joinpath("tmp_masaRendIn.pcm")
+        tmp_out = tmp_dir.joinpath("tmp_masaRendOut.pcm")
+
+        cmd[2] = str(tmp_in)
+        cmd[4] = str(tmp_out)
+
+        tmp_audio = resample_itu(masa, 48000)
+        old_fs = masa.fs
+
+        write(tmp_in, tmp_audio, 48000)
+
+        # we need to run in the masaRenderer directory to use the .bin files it requires
+        run(cmd, cwd=binary.resolve().parent)
+
+        output, _ = read(tmp_out, num_channels)
+
+        output_audio = audio.fromtype(out_fmt)
+        output_audio.audio = output
+        output_audio.fs = 48000
+        output = resample_itu(output_audio, old_fs)
+
+        return output
diff --git a/item_generation_scripts/audiotools/wrappers/networkSimulator.py b/item_generation_scripts/audiotools/wrappers/networkSimulator.py
new file mode 100644
index 00000000..4e74c3ce
--- /dev/null
+++ b/item_generation_scripts/audiotools/wrappers/networkSimulator.py
@@ -0,0 +1,224 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+import logging
+import os.path
+from pathlib import Path
+from typing import Optional, Union
+
+from item_generation_scripts.constants import DEFAULT_CONFIG_BINARIES
+from item_generation_scripts.utils import find_binary, run
+
+LIST_JBM_PROFILES = range(12)
+ERROR_PATTERNS_DIR = Path(__file__).parent.parent.parent.joinpath("dly_error_profiles")
+
+
+def validate_network_simulator(
+    error_pattern: Optional[Union[Path, str]] = None,
+    error_profile: Optional[int] = None,
+    n_frames_per_packet: Optional[int] = None,
+) -> None:
+    """
+    Validate settings for the network simulator
+
+    Parameters
+    ----------
+    error_pattern: Optional[Union[Path, str]]
+        Path to existing error pattern
+    error_profile: Optional[int]
+        Index of existing error pattern
+    n_frames_per_packet: Optional[int]
+        Number of frames per paket
+    """
+
+    if "networkSimulator_g192" in DEFAULT_CONFIG_BINARIES["binary_paths"]:
+        binary = find_binary(
+            DEFAULT_CONFIG_BINARIES["binary_paths"]["networkSimulator_g192"].name,
+            binary_path=DEFAULT_CONFIG_BINARIES["binary_paths"][
+                "networkSimulator_g192"
+            ].parent,
+        )
+    else:
+        binary = find_binary("networkSimulator_g192")
+
+    if binary is None:
+        raise FileNotFoundError(
+            "The network simulator binary was not found! Please check the configuration."
+        )
+    if error_pattern is not None:
+        if not os.path.exists(os.path.realpath(error_pattern)):
+            raise FileNotFoundError(
+                f"The network simulator error profile file {error_pattern} was not found! Please check the configuration."
+            )
+        if error_profile is not None:
+            raise ValueError(
+                "JBM pattern and JBM profile number are specified for bitstream processing. Can't use both! Please check the configuration."
+            )
+    elif error_profile is not None:
+        if error_profile not in LIST_JBM_PROFILES:
+            raise ValueError(
+                f"JBM profile number {error_profile} does not exist, should be between {LIST_JBM_PROFILES[0]} and {LIST_JBM_PROFILES[-1]}"
+            )
+    if n_frames_per_packet is not None and n_frames_per_packet not in [1, 2]:
+        raise ValueError(
+            f"n_frames_per_paket is {n_frames_per_packet}. Should be 1 or 2. Please check your configuration."
+        )
+
+    return
+
+
+def network_simulator(
+    error_pattern: Union[str, Path],
+    in_bitstream: Union[str, Path],
+    out_bitstream: Union[str, Path],
+    n_frames_per_packet: int,
+    offset: int,
+    logger: Optional[logging.Logger] = None,
+) -> None:
+    """
+    Wrapper for networkSimulator_g192 binary to apply error patterns for the bitstream processing
+
+    Parameters
+    ----------
+    error_pattern: Union[str, Path]
+        Path to error pattern file
+    in_bitstream: Union[str, Path]
+        Path to input bitstream file
+    out_bitstream: Union[str, Path]
+        Output path for modified bitstream
+    n_frames_per_packet: int,
+        Number of frames per paket [1,2]
+    offset: Optional[int]
+        delay offset
+    logger: Optional[logging.Logger]
+        logger
+    """
+
+    # find binary
+    if "networkSimulator_g192" in DEFAULT_CONFIG_BINARIES["binary_paths"]:
+        binary = find_binary(
+            DEFAULT_CONFIG_BINARIES["binary_paths"]["networkSimulator_g192"].name,
+            binary_path=DEFAULT_CONFIG_BINARIES["binary_paths"][
+                "networkSimulator_g192"
+            ].parent,
+        )
+    else:
+        binary = find_binary("networkSimulator_g192")
+
+    # check for valid inputs
+    if not Path(in_bitstream).is_file():
+        raise ValueError(
+            f"Input bitstream file {in_bitstream} for bitstream processing does not exist"
+        )
+    elif not Path(error_pattern).is_file():
+        raise ValueError(
+            f"Error pattern file {error_pattern} for bitstream processing does not exist"
+        )
+
+    # set up command line
+    cmd = [
+        str(binary),
+        error_pattern,
+        in_bitstream,
+        out_bitstream,
+        f"{out_bitstream}_tracefile_sim",
+        str(n_frames_per_packet),
+        str(offset),
+    ]
+
+    # run command
+    run(cmd, logger=logger)
+
+    return
+
+
+def apply_network_simulator(
+    in_bitstream: Union[Path, str],
+    out_bitstream: Union[Path, str],
+    error_pattern: Optional[Union[Path, str]] = None,
+    error_profile: Optional[int] = None,
+    n_frames_per_packet: Optional[int] = None,
+    offset: Optional[int] = 0,
+    logger: Optional[logging.Logger] = None,
+) -> None:
+    """
+    Function to apply a network simulator profile to a bitstreaam
+
+    Parameters
+    ----------
+    in_bitstream: Union[Path, str]
+        Path of input bitstream
+    out_bitstream: Union[Path, str]
+        Path of output bitstream
+    error_pattern: Optional[Union[Path, str]]
+        Path to existing error pattern
+    error_profile: Optional[int]
+        Index of existing error pattern
+    n_frames_per_packet: Optional[int]
+        Number of frames per paket
+    offset: Optional[int]
+        delay offset
+    logger: Optional[logging.Logger]
+        logger
+    """
+
+    if error_pattern is None:
+        # create error pattern
+        if error_profile is not None:
+            if error_profile in LIST_JBM_PROFILES:
+                error_pattern = ERROR_PATTERNS_DIR.joinpath(
+                    f"dly_error_profile_{error_profile}.dat"
+                )
+            else:
+                raise ValueError(
+                    f"JBM profile number {error_profile} does not exist, should be between {LIST_JBM_PROFILES[0]} and {LIST_JBM_PROFILES[-1]}"
+                )
+        else:
+            raise ValueError(
+                "Either error pattern or error profile number has to be specified for network simulator bitstream processing"
+            )
+    elif error_profile is not None:
+        raise ValueError(
+            "JBM pattern and JBM profile number are specified for bitstream processing. Can't use both"
+        )
+
+    if n_frames_per_packet is None:
+        n_frames_per_packet = 1
+        if error_profile is not None and error_profile == 5:
+            n_frames_per_packet = 2
+
+    # apply error pattern
+    network_simulator(
+        error_pattern, in_bitstream, out_bitstream, n_frames_per_packet, offset, logger
+    )
+
+    return
diff --git a/item_generation_scripts/audiotools/wrappers/p50fbmnru.py b/item_generation_scripts/audiotools/wrappers/p50fbmnru.py
new file mode 100644
index 00000000..2f4c19ef
--- /dev/null
+++ b/item_generation_scripts/audiotools/wrappers/p50fbmnru.py
@@ -0,0 +1,110 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+from pathlib import Path
+from tempfile import TemporaryDirectory
+from warnings import warn
+
+import numpy as np
+
+from item_generation_scripts.audiotools import audio
+from item_generation_scripts.audiotools.audiofile import read, write
+from item_generation_scripts.audiotools.wrappers.filter import resample_itu
+from item_generation_scripts.constants import DEFAULT_CONFIG_BINARIES
+from item_generation_scripts.utils import find_binary, run
+
+
+def p50fbmnru(
+    input: audio.Audio,
+    q_db: float,
+) -> np.ndarray:
+    """
+    Wrapper for P.50 Fullband MNRU (Modulated Noise Reference Unit), requires p50fbmnru binary
+    The mode is M (Modulated Noise) as specified in section 5.2.1 of S4-141392 - EVS-7c Processing functions for characterization phase v110.doc
+
+    Parameters
+    ----------
+    input : Audio
+        Input audio
+    q_db: float
+        The ratio, in dB, of speech power to modulated noise power
+
+    Returns
+    -------
+    output: np.ndarray
+        Output array
+    """
+
+    if "p50fbmnru" in DEFAULT_CONFIG_BINARIES["binary_paths"]:
+        binary = find_binary(
+            DEFAULT_CONFIG_BINARIES["binary_paths"]["p50fbmnru"].name,
+            binary_path=DEFAULT_CONFIG_BINARIES["binary_paths"]["p50fbmnru"].parent,
+        )
+    else:
+        binary = find_binary("p50fbmnru")
+
+    if input.fs != 48000:
+        warn("P.50 Fullband MNRU requires a sampling rate of 48kHz.")
+        tmp_sig = resample_itu(input, 48000)
+    else:
+        tmp_sig = input.audio
+
+    tmp_input_signal = tmp_sig
+    tmp_output_signal = np.ones((48000, input.num_channels))
+
+    with TemporaryDirectory() as tmp_dir:
+        tmp_dir = Path(tmp_dir)
+        tmp_input_file = tmp_dir.joinpath("tmp_input_signal.raw")
+        tmp_output_file = tmp_dir.joinpath("tmp_output_signal.raw")
+
+        """
+        P.50 Fullband MNRU
+        """
+
+        cmd = [
+            str(binary),
+            str(tmp_input_file),
+            str(tmp_output_file),
+            str(q_db),
+            "M",
+        ]
+
+        # write temporary file
+        write(tmp_input_file, tmp_input_signal)
+        write(tmp_output_file, tmp_output_signal)
+
+        # run command
+        run(cmd)
+
+        tmp_output_signal, out_fs = read(tmp_output_file, input.num_channels)
+
+    return tmp_output_signal
diff --git a/item_generation_scripts/audiotools/wrappers/random_seed.py b/item_generation_scripts/audiotools/wrappers/random_seed.py
new file mode 100644
index 00000000..01cf0870
--- /dev/null
+++ b/item_generation_scripts/audiotools/wrappers/random_seed.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+from typing import Optional, Tuple
+
+from item_generation_scripts.constants import DEFAULT_CONFIG_BINARIES
+from item_generation_scripts.utils import find_binary, run
+
+
+def random_seed(
+    range: Tuple[int, int],
+    master_seed: Optional[int] = 0,
+    prerun_seed: Optional[int] = 0,
+    hexa: Optional[bool] = True,
+) -> int:
+    """
+
+    Parameters
+    ----------
+    master_seed: Optional[int]
+        Master seed for error pattern generation
+    prerun_seed: Optional[int]
+        Number of preruns in seed generation
+    hexa: Optonal[bool]
+        Flag if output should be in hexadecimal or decimal format
+
+    Returns
+    -------
+    result: int
+        One random value
+    """
+
+    # find binary
+    if "random" in DEFAULT_CONFIG_BINARIES["binary_paths"]:
+        binary = find_binary(
+            DEFAULT_CONFIG_BINARIES["binary_paths"]["random"].name,
+            binary_path=DEFAULT_CONFIG_BINARIES["binary_paths"]["random"].parent,
+        )
+    else:
+        binary = find_binary("random")
+
+    # set up command line
+    cmd = [
+        str(binary),
+        "-n",  # Number of items
+        str(1),
+        "-s",
+        str(master_seed),
+        "-d",
+        str(prerun_seed),
+        "-r",  # value range for results
+        str(range[0]),
+        str(range[1]),
+    ]
+
+    # run command
+    result = run(cmd)
+    result = int(result.stdout[:-1])
+
+    if hexa:
+        result = hex(result)
+
+    return result
diff --git a/item_generation_scripts/binary_paths.yml b/item_generation_scripts/binary_paths.yml
new file mode 100644
index 00000000..bafcacfc
--- /dev/null
+++ b/item_generation_scripts/binary_paths.yml
@@ -0,0 +1,30 @@
+---
+################################################
+# Binary paths 
+################################################
+### Custom binary paths and names can be specified here.
+### If not defined here, the binaries in item_generation_scripts/bin would be used
+### If binaries are neither specified here nor found in the bin folder, the scripts would look for them in $PATH
+### DO NOT change the location of this file.
+### DO NOT USE relative paths. The paths have to be absolute.
+### DO NOT change the default keys.
+### For example, if the user has renamed the 'filter' binary to 'foo' then use --> filter: path/to/binary/foo
+
+# ### Binary for resampling and filtering
+# filter: "path/to/binary/filter_new"
+# ### Binary for loudness adjustment
+# bs1770demo: "path/to/binary/bs1880"
+# ### Binary for MNRU
+# p50fbmnru: "path/to/binary/p50fbmnru"
+# ### Binary for ESDRU
+# esdru: "path/to/binary/esdru"
+# ### Binary for frame error pattern application
+# eid-xor: "path/to/binary/eid-xor"
+# ### Binary for error pattern generation
+# gen-patt: "path/to/binary/gen-patt"
+# ### Binary for random offset/seed generation
+# random: "path/to/binary/random"
+# ### Binary for JBM network similulator
+# networkSimulator_g192: "path/to/binary/networkSimulator_g192"
+# ### Binary for MASA rendering 
+# masaRenderer: "path/to/binary/masaRenderer"
\ No newline at end of file
diff --git a/item_generation_scripts/config/ISM1_CONFIG.yml b/item_generation_scripts/config/ISM1_CONFIG.yml
new file mode 100644
index 00000000..f4e1ee31
--- /dev/null
+++ b/item_generation_scripts/config/ISM1_CONFIG.yml
@@ -0,0 +1,338 @@
+---
+################################################
+# General configuration
+################################################
+
+### Output format
+format: "ISM1"
+
+### Date; default = YYYYMMDD_HH.MM.SS
+# date: 2023.06.30
+
+### Deletion of temporary directories containing intermediate processing files, bitstreams etc.; default = false
+# delete_tmp: true
+
+### Output sampling rate in Hz needed for headerless audio files; default = 48000
+# fs: 32000
+
+### Any relative paths will be interpreted relative to the working directory the script is called from!
+### Usage of absolute paths is recommended.
+### Do not use file names with dots "." in them! This is not supported, use "_" instead
+### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions
+
+### Input path to mono files
+input_path: "/mnt/c/Work/IVAS/3gpp_forge_gitlab/ivas-processing-scripts/items_mono"
+
+### Output path for generated test items and metadata files
+output_path: "/mnt/c/Work/IVAS/3gpp_forge_gitlab/ivas-processing-scripts/output"
+
+### Target loudness in LKFS; default = null (no loudness normalization applied)
+loudness: -26
+
+
+################################################
+### Scene description
+################################################
+
+### Each scene must start with the sceneN tag
+### Specify the mono source filename (the program will search for it in the input_path folder)
+### Specify azimuth and elevation for each input source
+### Note 1: use [val1, val2, ...] for multiple sources in a scene
+### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames
+
+### Note 3: we're using right-handed coordinate system with azi = 0 pointing from the nose to the screen
+### azimuth: float, [-180,180]; positive indicates left
+### elevation: float, [-90,90]; positive indicates up
+### distance: float, tbd: default: 1
+### spread: float, [0,360]; spread in angles from 0 ... 360˚
+### gain: float, [0,1]
+
+scenes:
+    a1: 
+        name: "G1S1.wav"
+        description: "Talker sitting at a table"
+        source: "f2s5a_Talker1.wav"
+        azimuth: 0 
+        elevation: 0 
+        delay: 0 
+        
+    a2: 
+        name: "G6S2.wav"
+        description: "Talker sitting at a table"
+        source: "f5s10a_Talker1.wav"
+        azimuth: 60 
+        elevation: 0 
+        delay: 0 
+        
+    a3: 
+        name: "G5S3.wav"
+        description: "Talker sitting at a table"
+        source: "f2s5a_Talker1.wav"
+        azimuth: 120 
+        elevation: 0 
+        delay: 0 
+
+    a4: 
+        name: "G4S4.wav"
+        description: "Talker sitting at a table"
+        source: "m4s11b_Talker1.wav"
+        azimuth: 180 
+        elevation: 0 
+        delay: 0 
+
+    a5: 
+        name: "G3S5.wav"
+        description: "Talker sitting at a table"
+        source: "m1s4a_Talker1.wav"
+        azimuth: 240 
+        elevation: 0 
+        delay: 0 
+
+    a6: 
+        name: "G2S6.wav"
+        description: "Talker sitting at a table"
+        source: "f5s10a_Talker1.wav"
+        azimuth: 300 
+        elevation: 0 
+        delay: 0 
+
+    b1: 
+        name: "G2S1.wav"
+        description: "standing talker."
+        source: "f5s10b_Talker1.wav"
+        azimuth: 120 
+        elevation: 35 
+        delay: 0 
+ 
+    b2: 
+        name: "G1S2.wav"
+        description: "standing talker."
+        source: "f2s1a_Talker1.wav"
+        azimuth: 180 
+        elevation: 35 
+        delay: 0 
+ 
+    b3: 
+        name: "G6S3.wav"
+        description: "standing talker."
+        source: "f5s10b_Talker1.wav"
+        azimuth: 240 
+        elevation: 35 
+        delay: 0 
+ 
+    b4: 
+        name: "G5S4.wav"
+        description: "standing talker."
+        source: "f2s1a_Talker1.wav"
+        azimuth: 300 
+        elevation: 35 
+        delay: 0 
+
+    b5: 
+        name: "G4S5.wav"
+        description: "standing talker."
+        source: "m4s11a_Talker1.wav"
+        azimuth: 0 
+        elevation: 35 
+        delay: 0 
+
+    b6: 
+        name: "G3S6.wav"
+        description: "standing talker."
+        source: "m1s2b_Talker1.wav"
+        azimuth: 60 
+        elevation: 35 
+        delay: 0 
+
+    c1: 
+        name: "G3S1.wav"
+        description: "Smaller talker (child) walking around a table."
+        source: "m1s6b_Talker1.wav"
+        azimuth: "0:1:360"
+        elevation: 0 
+        delay: 0 
+
+    c2: 
+        name: "G2S2.wav"
+        description: "Smaller talker (child) walking around a table."
+        source: "f5s14a_Talker1.wav"
+        azimuth: "60:1:60+360" 
+        elevation: 0 
+        delay: 0 
+  
+    c3: 
+        name: "G1S3.wav"
+        description: "Smaller talker (child) walking around a table."
+        source: "f2s6a_Talker1.wav"
+        azimuth: "120:1:120+360" 
+        elevation: 0 
+        delay: 0 
+  
+    c4: 
+        name: "G6S4.wav"
+        description: "Smaller talker (child) walking around a table."
+        source: "f5s14a_Talker1.wav"
+        azimuth: "180:1:180+360" 
+        elevation: 0 
+        delay: 0 
+  
+    c5: 
+        name: "G5S5.wav"
+        description: "Smaller talker (child) walking around a table."
+        source: "f2s6a_Talker1.wav"
+        azimuth: "240:1:240+360"
+        elevation: 0 
+        delay: 0 
+  
+    c6: 
+        name: "G4S6.wav"
+        description: "Smaller talker (child) walking around a table."
+        source: "m4s13a_Talker1.wav"
+        azimuth: "300:1:300+360" 
+        elevation: 0 
+        delay: 0 
+ 
+    d1: 
+        name: "G4S1.wav"
+        description: "Talker walking around the table."
+        source: "m4s12b_Talker1.wav"
+        azimuth: "0:-1:-360"
+        elevation: 35 
+        delay: 0 
+        
+    d2: 
+        name: "G3S2.wav"
+        description: "Talker walking around the table."
+        source: "m1s12a_Talker1.wav"
+        azimuth: "60:-1:60-360" 
+        elevation: 35 
+        delay: 0 
+        
+    d3: 
+        name: "G3S2.wav"
+        description: "Talker walking around the table."
+        source: "f5s15b_Talker1.wav"
+        azimuth: "120:-1:120-360" 
+        elevation: 35 
+        delay: 0 
+ 
+    d4: 
+        name: "G1S4.wav"
+        description: "Talker walking around the table."
+        source: "f2s3b_Talker1.wav"
+        azimuth: "180:-1:180-360" 
+        elevation: 35 
+        delay: 0 
+ 
+    d5: 
+        name: "G6S5.wav"
+        description: "Talker walking around the table."
+        source: "f5s15b_Talker1.wav"
+        azimuth: "240:-1:240-360"
+        elevation: 35 
+        delay: 0 
+ 
+    d6: 
+        name: "G5S6.wav"
+        description: "Talker walking around the table."
+        source: "f2s3b_Talker1.wav"
+        azimuth: "300:-1:300-360" 
+        elevation: 35
+        delay: 0 
+ 
+    e1: 
+        name: "G5S1.wav"
+        description: "Elevation displacement."
+        source: "f2s4a_Talker1.wav"
+        azimuth: 240 
+        elevation: "-90:0.5:90" 
+        delay: 0 
+ 
+    e2: 
+        name: "G4S2.wav"
+        description: "Elevation displacement."
+        source: "m4s16a_Talker1.wav"
+        azimuth: 300 
+        elevation: 0 
+        delay: 0 
+        
+    e3: 
+        name: "G3S3.wav"
+        description: "Elevation displacement."
+        source: "m1s16b_Talker1.wav"
+        azimuth: 0 
+        elevation: "-90:0.5:90"  
+        delay: 0 
+  
+    e4: 
+        name: "G2S4.wav"
+        description: "Elevation displacement."
+        source: "f5s19a_Talker1.wav"
+        azimuth: 60 
+        elevation: "-90:0.5:90"  
+        delay: 0 
+  
+    e5: 
+        name: "G1S5.wav"
+        description: "Elevation displacement."
+        source: "f2s4a_Talker1.wav"
+        azimuth: 120 
+        elevation: "-90:0.5:90"  
+        delay: 0 
+  
+    e6: 
+        name: "G6S6.wav"
+        description: "Elevation displacement."
+        source: "f5s19a_Talker1.wav"
+        azimuth: 180 
+        elevation: "-90:0.5:90"  
+        delay: 0 
+ 
+    f1: 
+        name: "G6S1.wav"
+        description: "Azimuth and elevation displacement."
+        source: "f5s15a_Talker1.wav"
+        azimuth: "60:0.5:60+180" 
+        elevation: "35:-0.2:-35"
+        delay: 0 
+ 
+    f2: 
+        name: "G5S2.wav"
+        description: "Azimuth and elevation displacement."
+        source: "f2s7b_Talker1.wav"
+        azimuth: "120:0.5:120+180" 
+        elevation: "35:-0.2:-35" 
+        delay: 0 
+  
+    f3: 
+        name: "G4S3.wav"
+        description: "Azimuth and elevation displacement."
+        source: "m4s14a_Talker1.wav"
+        azimuth: "180:0.5:180+180" 
+        elevation: "35:-0.2:-35" 
+        delay: 0 
+  
+    f4: 
+        name: "G3S4.wav"
+        description: "Azimuth and elevation displacement."
+        source: "m1s7a_Talker1.wav"
+        azimuth: "240:0.5:240+180" 
+        elevation: "35:-0.2:-35"
+        delay: 0 
+  
+    f5: 
+        name: "G2S5.wav"
+        description: "Azimuth and elevation displacement."
+        source: "f5s15a_Talker1.wav"
+        azimuth: "300:0.5:300+180" 
+        elevation: "35:-0.2:-35" 
+        delay: 0 
+  
+    f6: 
+        name: "G1S6.wav"
+        description: "Azimuth and elevation displacement."
+        source: "f2s7b_Talker1.wav"
+        azimuth: "0:0.5:0+180" 
+        elevation: "35:-0.2:-35" 
+        delay: 0 
+  
\ No newline at end of file
diff --git a/item_generation_scripts/config/ISM2_CONFIG.yml b/item_generation_scripts/config/ISM2_CONFIG.yml
new file mode 100644
index 00000000..c4a65c07
--- /dev/null
+++ b/item_generation_scripts/config/ISM2_CONFIG.yml
@@ -0,0 +1,338 @@
+---
+################################################
+# General configuration
+################################################
+
+### Output format
+format: "ISM2"
+
+### Date; default = YYYYMMDD_HH.MM.SS
+# date: 2023.06.30
+
+### Deletion of temporary directories containing intermediate processing files, bitstreams etc.; default = false
+# delete_tmp: true
+
+### Output sampling rate in Hz needed for headerless audio files; default = 48000
+# fs: 32000
+
+### Any relative paths will be interpreted relative to the working directory the script is called from!
+### Usage of absolute paths is recommended.
+### Do not use file names with dots "." in them! This is not supported, use "_" instead
+### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions
+
+### Input path to mono files
+input_path: "/mnt/c/Work/IVAS/3gpp_forge_gitlab/ivas-processing-scripts/items_mono"
+
+### Output path for generated test items and metadata files
+output_path: "/mnt/c/Work/IVAS/3gpp_forge_gitlab/ivas-processing-scripts/output"
+
+### Target loudness in LKFS; default = null (no loudness normalization applied)
+loudness: -26
+
+
+################################################
+### Scene description
+################################################
+
+### Each scene must start with the sceneN tag
+### Specify the mono source filename (the program will search for it in the input_path folder)
+### Specify azimuth and elevation for each input source
+### Note 1: use [val1, val2, ...] for multiple sources in a scene
+### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames
+
+### Note 3: we're using right-handed coordinate system with azi = 0 pointing from the nose to the screen
+### azimuth: float, [-180,180]; positive indicates left
+### elevation: float, [-90,90]; positive indicates up
+### distance: float, tbd: default: 1
+### spread: float, [0,360]; spread in angles from 0 ... 360˚
+### gain: float, [0,1]
+
+scenes:
+    a1: 
+        name: "G1S1.wav"
+        description: "two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
+        source: ["f2s5a_Talker1.wav", "m2s16b_Talker2.wav"]
+        azimuth: [0, 50]
+        elevation: [0, 0]
+        delay: [0, 0]
+        
+    a2: 
+        name: "G6S2.wav"
+        description: "two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
+        source: ["f5s10a_Talker1.wav", "m3s2a_Talker2.wav"]
+        azimuth: [50, 350]
+        elevation: [0, 0]
+        delay: [0, 0]
+        
+    a3: 
+        name: "G5S3.wav"
+        description: "two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
+        source: ["f2s5a_Talker1.wav", "m2s16b_Talker2.wav"]
+        azimuth: [40, 290]
+        elevation: [0, 0]
+        delay: [0, 0]
+
+    a4: 
+        name: "G4S4.wav"
+        description: "two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
+        source: ["m4s11b_Talker1.wav", "f1s4b_Talker2.wav"]
+        azimuth: [30, 230]
+        elevation: [15, 15]
+        delay: [0, 0]
+
+    a5: 
+        name: "G3S5.wav"
+        description: "two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
+        source: ["m1s4a_Talker1.wav", "f3s3a_Talker2.wav"]
+        azimuth: [20, 170]
+        elevation: [15, 15]
+        delay: [0, 0]
+
+    a6: 
+        name: "G2S6.wav"
+        description: "two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
+        source: ["f5s10a_Talker1.wav", "m3s2a_Talker2.wav"]
+        azimuth: [10, 110]
+        elevation: [15, 15]
+        delay: [0, 0]
+
+    b1: 
+        name: "G2S1.wav"
+        description: "two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
+        source: ["f5s10b_Talker1.wav", "m3s2b_Talker2.wav"]
+        azimuth: [20, 170]
+        elevation: [30, 30]
+        delay: [0, 0]
+ 
+    b2: 
+        name: "G1S2.wav"
+        description: "two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
+        source: ["f2s1a_Talker1.wav", "m2s10a_Talker2.wav"]
+        azimuth: [10, 110]
+        elevation: [30, 30]
+        delay: [0, 0]
+ 
+    b3: 
+        name: "G6S3.wav"
+        description: "two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
+        source: ["f5s10b_Talker1.wav", "m3s2b_Talker2.wav"]
+        azimuth: [0, 50]
+        elevation: [30, 30]
+        delay: [0, 0]
+ 
+    b4: 
+        name: "G5S4.wav"
+        description: "two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
+        source: ["f2s1a_Talker1.wav", "m2s10a_Talker2.wav"]
+        azimuth: [50, 350]
+        elevation: [60, 60]
+        delay: [0, 0] 
+
+    b5: 
+        name: "G4S5.wav"
+        description: "two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
+        source: ["m4s11a_Talker1.wav", "f1s6a_Talker2.wav"]
+        azimuth: [40, 290]
+        elevation: [60, 60]
+        delay: [0, 0] 
+
+    b6: 
+        name: "G3S6.wav"
+        description: "two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
+        source: ["m1s2b_Talker1.wav", "f3s5a_Talker2.wav"]
+        azimuth: [30, 230]
+        elevation: [60, 60]
+        delay: [0, 0] 
+
+    c1: 
+        name: "G3S1.wav"
+        description: "one talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
+        source: ["m1s6b_Talker1.wav", "f3s5b_Talker2.wav"]
+        azimuth: [40, 290]
+        elevation: [0, 60]
+        delay: [0, 0] 
+
+    c2: 
+        name: "G2S2.wav"
+        description: "one talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
+        source: ["f5s14a_Talker1.wav", "m3s8a_Talker2.wav"]
+        azimuth: [30, 230]
+        elevation: [0, 60]
+        delay: [0, 0] 
+  
+    c3: 
+        name: "G1S3.wav"
+        description: "one talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
+        source: ["f2s6a_Talker1.wav", "m2s13a_Talker2.wav"]
+        azimuth: [20, 170]
+        elevation: [0, 60]
+        delay: [0, 0]   
+  
+    c4: 
+        name: "G6S4.wav"
+        description: "one talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
+        source: ["f5s14a_Talker1.wav", "m3s8a_Talker2.wav"]
+        azimuth: [10, 110]
+        elevation: [0, 60]
+        delay: [0, 0]     
+  
+    c5: 
+        name: "G5S5.wav"
+        description: "one talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
+        source: ["f2s6a_Talker1.wav", "m2s13a_Talker2.wav"]
+        azimuth: [0, 50]
+        elevation: [0, 60]
+        delay: [0, 0]     
+  
+    c6: 
+        name: "G4S6.wav"
+        description: "one talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
+        source: ["m4s13a_Talker1.wav", "f1s20a_Talker2.wav"]
+        azimuth: [50, 350]
+        elevation: [0, 60]
+        delay: [0, 0]      
+ 
+    d1: 
+        name: "G4S1.wav"
+        description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
+        source: ["m4s12b_Talker1.wav", "f1s12b_Talker2.wav"]
+        azimuth: [50, "180:1:120 + 360"]
+        elevation: [0, 60]
+        delay: [0, 0]   
+        
+    d2: 
+        name: "G3S2.wav"
+        description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
+        source: ["m1s12a_Talker1.wav", "f3s20a_Talker2.wav"]
+        azimuth: [300, "-70:-1:-10 - 360"]
+        elevation: [0, 60]
+        delay: [0, 0]   
+        
+    d3: 
+        name: "G3S2.wav"
+        description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
+        source: ["f5s15b_Talker1.wav", "m3s1a_Talker2.wav"]
+        azimuth: [250, "-20:-1:-320"]
+        elevation: [0, 60]
+        delay: [0, 0]          
+ 
+    d4: 
+        name: "G1S4.wav"
+        description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
+        source: ["f2s3b_Talker1.wav", "m2s15a_Talker2.wav"]
+        azimuth: [200, "30:-1:-270"]
+        elevation: [0, 60]
+        delay: [0, 0]  
+ 
+    d5: 
+        name: "G6S5.wav"
+        description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
+        source: ["f5s15b_Talker1.wav", "m3s1a_Talker2.wav"]
+        azimuth: [150, "80:1:20 + 360"]
+        elevation: [0, 60]
+        delay: [0, 0]   
+ 
+    d6: 
+        name: "G5S6.wav"
+        description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
+        source: ["f2s3b_Talker1.wav", "m2s15a_Talker2.wav"]
+        azimuth: [100, "130:1:70 + 360"]
+        elevation: [0, 60]
+        delay: [0, 0]   
+ 
+    e1: 
+        name: "G5S1.wav"
+        description: "two talkers walking side-by-side around the table, ~30% overlapping utterances"
+        source: ["f2s4a_Talker1.wav", "m2s17b_Talker2.wav"]
+        azimuth: ["80:1:20 + 360", "80:1:20 + 360"]
+        elevation: [10, 60]
+        delay: [0, 0]
+ 
+    e2: 
+        name: "G4S2.wav"
+        description: "two talkers walking side-by-side around the table, ~30% overlapping utterances"
+        source: ["m4s16a_Talker1.wav", "f1s16b_Talker2.wav"]
+        azimuth: ["130:1:70 + 360", "130:1:70 + 360"]
+        elevation: [10, 60]
+        delay: [0, 0]    
+        
+    e3: 
+        name: "G3S3.wav"
+        description: "two talkers walking side-by-side around the table, ~30% overlapping utterances"
+        source: ["m1s16b_Talker1.wav", "f3s10b_Talker2.wav"]
+        azimuth: ["180:1:120 + 360", "180:1:120 + 360"]
+        elevation: [10, 60]
+        delay: [0, 0]            
+  
+    e4: 
+        name: "G2S4.wav"
+        description: "two talkers walking side-by-side around the table, ~30% overlapping utterances"
+        source: ["f5s19a_Talker1.wav", "m3s1b_Talker2.wav"]
+        azimuth: ["-70:-1:-10 - 360", "-70:-1:-10 - 360"]
+        elevation: [10, 60]
+        delay: [0, 0]    
+  
+    e5: 
+        name: "G1S5.wav"
+        description: "two talkers walking side-by-side around the table, ~30% overlapping utterances"
+        source: ["f2s4a_Talker1.wav", "m2s17b_Talker2.wav"]
+        azimuth: ["-20:-1:-320", "-20:-1:-320"]
+        elevation: [10, 60]
+        delay: [0, 0]   
+  
+    e6: 
+        name: "G6S6.wav"
+        description: "two talkers walking side-by-side around the table, ~30% overlapping utterances"
+        source: ["f5s19a_Talker1.wav", "m3s1b_Talker2.wav"]
+        azimuth: ["30:-1:-270", "30:-1:-270"]
+        elevation: [10, 60]
+        delay: [0, 0]     
+ 
+    f1: 
+        name: "G6S1.wav"
+        description: "two talkers walking around the table in opposite directions, non-overlapping utterances."
+        source: ["f5s15a_Talker1.wav", "m3s8b_Talker2.wav"]
+        azimuth: ["60:1:0 + 360", "60:-1:120 - 360"]
+        elevation: [20, 50]
+        delay: [0, 0]    
+ 
+    f2: 
+        name: "G5S2.wav"
+        description: "two talkers walking around the table in opposite directions, non-overlapping utterances."
+        source: ["f2s7b_Talker1.wav", "m2s6b_Talker2.wav"]
+        azimuth: ["0:1:300", "0:-1:60 - 360"]
+        elevation: [20, 50]
+        delay: [0, 0]   
+  
+    f3: 
+        name: "G4S3.wav"
+        description: "two talkers walking around the table in opposite directions, non-overlapping utterances."
+        source: ["m4s14a_Talker1.wav", "f1s7a_Talker2.wav"]
+        azimuth: ["300:1:240 + 360", "300:-1:0"]
+        elevation: [20, 50]
+        delay: [0, 0]     
+  
+    f4: 
+        name: "G3S4.wav"
+        description: "two talkers walking around the table in opposite directions, non-overlapping utterances."
+        source: ["m1s7a_Talker1.wav", "f3s7a_Talker2.wav"]
+        azimuth: ["240:1:180 + 360", "240:-1:-60"]
+        elevation: [20, 50]
+        delay: [0, 0]  
+  
+    f5: 
+        name: "G2S5.wav"
+        description: "two talkers walking around the table in opposite directions, non-overlapping utterances."
+        source: ["f5s15a_Talker1.wav", "m3s8b_Talker2.wav"]
+        azimuth: ["180:1:120 + 360", "180:-1:-120"]
+        elevation: [20, 50]
+        delay: [0, 0]    
+  
+    f6: 
+        name: "G1S6.wav"
+        description: "two talkers walking around the table in opposite directions, non-overlapping utterances."
+        source: ["f2s7b_Talker1.wav", "m2s6b_Talker2.wav"]
+        azimuth: ["120:1:60 + 360", "120:-1:180 - 360"]
+        elevation: [20, 50]
+        delay: [0, 0]      
+  
\ No newline at end of file
diff --git a/item_generation_scripts/constants.py b/item_generation_scripts/constants.py
new file mode 100644
index 00000000..3b554800
--- /dev/null
+++ b/item_generation_scripts/constants.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+from datetime import datetime
+from pathlib import Path
+
+from item_generation_scripts.utils import find_binary, get_binary_paths
+
+LOGGER_SUFFIX = ".log"
+LOGGER_FORMAT = (
+    "%(levelname)-8s:%(processName)-10s | %(name)s | %(asctime)s | %(message)s"
+)
+LOGGER_DATEFMT = "%m-%d %H:%M:%S"
+
+SUPPORTED_FORMATS = {
+    "ISM1",
+    "ISM2",
+    "ISM3",
+    "ISM4",
+}
+
+DEFAULT_CONFIG = {
+    # general options
+    "date": f"{datetime.now().strftime('%Y%m%d_%H.%M.%S')}",
+    "delete_tmp": False,
+}
+
+DEFAULT_CONFIG_ISM2 = {
+    "format": "ISM2",
+    "input_path" : "./input",
+    "output_path": "./output",
+    # "cod": {
+        # "bin": find_binary("IVAS_cod", raise_error=False),
+    # },
+    # "dec": {
+        # "bin": find_binary("IVAS_dec", raise_error=False),
+    # },
+}
+
+DEFAULT_CONFIG_BINARIES = {
+    "binary_paths": get_binary_paths(
+        Path(__file__).parent.joinpath("binary_paths.yml")
+    ),
+}
+
+REQUIRED_KEYS = [
+    "format",
+    "input_path",
+    "output_path",
+    "scenes",
+]
diff --git a/item_generation_scripts/processing/__init__.py b/item_generation_scripts/processing/__init__.py
new file mode 100644
index 00000000..aea270d8
--- /dev/null
+++ b/item_generation_scripts/processing/__init__.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
diff --git a/item_generation_scripts/processing/config.py b/item_generation_scripts/processing/config.py
new file mode 100644
index 00000000..926689c4
--- /dev/null
+++ b/item_generation_scripts/processing/config.py
@@ -0,0 +1,130 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+from copy import deepcopy
+from pathlib import Path
+
+import yaml
+
+from item_generation_scripts.constants import (
+    DEFAULT_CONFIG,
+    DEFAULT_CONFIG_ISM2,
+    REQUIRED_KEYS
+)
+
+
+def merge_dicts(base: dict, other: dict) -> None:
+    """
+    updates base with new keys from other
+    overrides existing keys
+    """
+    for k in other.keys():
+        if k in base and isinstance(base[k], dict) and isinstance(other[k], dict):
+            merge_dicts(base[k], other[k])
+        # explicitly check for None here;
+        # if the user accidentally specifies only the parent but no sub-keys we don't want to overwrite the default
+        # however we do want to set non-truthy values e.g. False
+        elif other[k] is not None:
+            base[k] = other[k]
+
+
+class TestConfig:
+    def __init__(self, filename: str):
+        """Parse a YAML or JSON configuration file"""
+        # init lists of conditions and associated dirs
+        self.out_dirs = []
+        self.tmp_dirs = []
+
+        # get default config
+        cfg = DEFAULT_CONFIG
+
+        # parse configuration file
+        file_cfg = self._parse_yaml(filename)
+
+        # validate configuration from file
+        self._validate(file_cfg)
+
+        # merge dictionaries, overriding from config file
+        merge_dicts(cfg, file_cfg)
+
+        # set attributes from merged dictionary
+        self.__dict__.update(cfg)
+
+        # store the merged config for writing to file later
+        self._yaml_dump = self._dump_yaml(cfg)
+
+        # convert to Path
+        self.input_path = Path(self.input_path)
+        self.output_path = Path(self.output_path)
+
+    def _parse_yaml(self, filename):
+        """parse configuration file"""
+        with open(filename) as fp:
+            return yaml.safe_load(fp)
+
+    def _dump_yaml(self, cfg: dict):
+        """convert objects to to strings to avoid YAML dump as object"""
+        cfg = deepcopy(cfg)
+
+        def format(d: dict):
+            for k, v in d.items():
+                if isinstance(v, dict):
+                    format(v)
+                else:
+                    d[k] = str(v)
+
+        format(cfg)
+
+        return cfg
+
+    def _validate(self, cfg: dict):
+        """ensure configuration contains required keys"""
+        MISSING_KEYS = []
+        # check required keys
+        for r in REQUIRED_KEYS:
+            # if there was a tuple, we have a list of subkeys to check
+            if isinstance(r, tuple):
+                req_key, req_values = r
+                if not cfg.get(req_key):
+                    MISSING_KEYS.append(req_key)
+                else:
+                    # check all required values
+                    for v in req_values:
+                        if not cfg.get(req_key).get(v):
+                            MISSING_KEYS.append(f"{req_key} : {v}")
+            elif not cfg.get(r):
+                MISSING_KEYS.append(r)
+
+        # Report missing keys to the user
+        if MISSING_KEYS:
+            raise KeyError(f"The following key(s) must be specified : {MISSING_KEYS}")
+
diff --git a/item_generation_scripts/processing/preprocessing_2.py b/item_generation_scripts/processing/preprocessing_2.py
new file mode 100644
index 00000000..1152ccc7
--- /dev/null
+++ b/item_generation_scripts/processing/preprocessing_2.py
@@ -0,0 +1,155 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+import logging
+from pathlib import Path
+from warnings import warn
+
+import numpy as np
+
+from item_generation_scripts.audiotools import audio
+from item_generation_scripts.audiotools.audioarray import delay, trim
+from item_generation_scripts.audiotools.audiofile import write
+from item_generation_scripts.audiotools.metadata import (
+    add_remove_preamble,
+    write_ISM_metadata_in_file,
+)
+from item_generation_scripts.audiotools.wrappers.bs1770 import (
+    get_loudness,
+    loudness_norm,
+)
+from item_generation_scripts.audiotools.wrappers.random_seed import random_seed
+from item_generation_scripts.processing.processing import Processing
+
+
+class Preprocessing2(Processing):
+    def __init__(self, attrs: dict):
+        super().__init__(attrs)
+        self.name = "pre_2"
+
+    def process(self, in_file: Path, out_file: Path, in_meta, logger: logging.Logger):
+        logger.debug(f"Preprocessing2 configuration : {self.__dict__}")
+        logger.debug(f"Preprocessing2 {in_file.absolute()} -> {out_file.absolute()}")
+
+        # load in file
+        audio_object = audio.fromfile(
+            self.in_fmt, in_file, fs=self.in_fs, in_meta=in_meta
+        )
+
+        # add preamble
+        if self.preamble:
+            # also apply preamble to ISM metadata
+            if self.in_fmt.startswith("ISM"):
+                # read out old
+                metadata = []
+                for meta in in_meta:
+                    metadata.append(np.genfromtxt(meta, delimiter=","))
+
+                # modify metadata
+                metadata = add_remove_preamble(metadata, self.preamble)
+                meta_files = write_ISM_metadata_in_file(metadata, [out_file], True)
+
+                # modify audio object
+                audio_object.metadata_files = meta_files
+                audio_object.obect_pos = metadata
+
+            # add preamble to actual signal
+            audio_object.audio = trim(
+                audio_object.audio,
+                audio_object.fs,
+                (-self.preamble, 0),
+                self.pad_noise_preamble,
+            )
+
+        # add background noise
+        if self.background_noise:
+            audio_object.audio = self.add_background_noise(audio_object, in_meta)
+
+        # save file
+        write(out_file, audio_object.audio, fs=audio_object.fs)
+
+        return
+
+    def add_background_noise(self, audio_object: audio.Audio, in_meta) -> np.ndarray:
+        # range for random delay
+        range_delay = (1, 2400000)
+
+        # load background noise
+        noise_object = audio.fromfile(
+            self.in_fmt,
+            self.background_noise["background_noise_path"],
+            fs=self.in_fs,
+            in_meta=in_meta,
+        )
+
+        # if noise is too short raise error
+        if len(noise_object.audio) < len(audio_object.audio):
+            raise ValueError("Background noise too short for audio signal")
+        if len(noise_object.audio) - range_delay[1] < len(audio_object.audio):
+            warn(
+                "Background noise may be to short for audio signal when considering the random delay"
+            )
+
+        # measure loudness of audio signal based on output format
+        tmp_object = audio.fromtype(self.out_fmt)
+        if (
+            isinstance(tmp_object, audio.ObjectBasedAudio)
+            or isinstance(tmp_object, audio.SceneBasedAudio)
+            or isinstance(tmp_object, audio.MetadataAssistedSpatialAudio)
+        ):
+            out_format = None
+        else:
+            out_format = self.out_fmt
+
+        loudness_signal, _ = get_loudness(audio_object, loudness_format=out_format)
+
+        # compute desired loudness of background noise
+        loudness_noise = loudness_signal - self.background_noise["snr"]
+
+        # apply random delay and cut signal
+        rand_delay = random_seed(
+            range=range_delay,
+            master_seed=self.background_noise["master_seed"],
+            prerun_seed=self.background_noise["seed_delay"],
+            hexa=False,
+        )
+        noise_object.audio = delay(
+            noise_object.audio, delay=-rand_delay, samples=True, fs=noise_object.fs
+        )[: len(audio_object.audio)]
+
+        # scale background noise to desired loudness based on output format
+        noise_object.audio = loudness_norm(noise_object, loudness_noise, out_format)
+
+        # add array to signal
+        audio_object.audio = noise_object.audio + audio_object.audio
+
+        return audio_object.audio
diff --git a/item_generation_scripts/processing/process_ism_items.py b/item_generation_scripts/processing/process_ism_items.py
new file mode 100644
index 00000000..95bfb159
--- /dev/null
+++ b/item_generation_scripts/processing/process_ism_items.py
@@ -0,0 +1,221 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+
+import os
+import sys
+import shutil
+import numpy as np
+import logging
+import csv
+import subprocess as sp
+from pathlib import Path
+
+from item_generation_scripts.audiotools import (
+    audio,
+    audioarray,
+    audiofile,
+    binauralobjectrenderer,
+    metadata
+)
+
+from item_generation_scripts.audiotools.wrappers.bs1770 import get_loudness
+from item_generation_scripts.audiotools import audio
+
+# function for converting nd numpy array to strings with 2 decimal digits  
+def csv_formatdata(data):
+    for row in data:
+        yield ["%0.2f" % v for v in row]
+
+
+def generate_ism_items(
+    format: str,
+    target_level: int,
+    input_path: Path,
+    output_path: Path,
+    scenes: dict,
+    logger: logging.Logger
+):
+
+    """Generate ISM items with metadata from mono items based on scene description """
+    
+    # get the number of scenes
+    N_scenes = len(scenes)
+    
+    for scene_name, scene in scenes.items():
+        logger.info(f"Processing {scene_name} out of {N_scenes} scenes")
+        
+        # extract the number of audio sources
+        N_sources = len(np.atleast_1d(scene['source']))
+
+        y = None
+        y_meta = None
+        for i in range(N_sources):
+            
+            source_file = np.atleast_1d(scene['source'])[i]
+            source_azi = np.atleast_1d(scene['azimuth'])[i]
+            source_ele = np.atleast_1d(scene['elevation'])[i]
+            source_type = 'speech'      #### !!!! TBD - support generic audio + background noise and speech in the .yml file
+            source_delay = np.atleast_1d(scene['delay'])[i]   
+        
+            logger.info(f"Encoding {source_file} at position(s) {source_azi},{source_ele}")
+        
+            # read source file
+            # x, fs = audiofile.read(os.path.join(input_path, source_file))    #### !!!! TBD - check the support for headerless .raw files
+            # pdb.set_trace()
+            audio_object = audio.fromfile("MONO", os.path.join(input_path, source_file))
+            
+            
+            x = audio_object.audio
+            fs = audio_object.fs
+            
+            # find the number of frames
+            N_frames = int(len(x) / fs * 50 + 1)
+        
+            # adjust the level of the source file
+            _, scale_factor = get_loudness(audio_object, target_level, "MONO") 
+            # print(f"Scaling loudness with factor: {scale_factor}")
+            x *= scale_factor
+            
+            # read azimuth information and create array
+            if isinstance(source_azi, str):
+                if ':' in source_azi:
+                    source_azi = source_azi.split(':')
+                    azi = np.arange(float(eval(source_azi[0])), float(eval(source_azi[2])), float(eval(source_azi[1])))
+                else:
+                    azi = np.array(float(eval(source_azi)), ndmin=1)[:N_frames]
+            else:
+                azi = np.array(source_azi, ndmin=1)[:N_frames]
+                
+            # ensure that azimuth array has N_frames values
+            if len(azi) > N_frames:
+                # cut the array of azimuth values
+                azi = azi[:N_frames]
+            elif len(azi) < N_frames:
+                # replicate the last azimuth
+                azi = np.append(azi, np.full( N_frames - len(azi), azi[-1]))
+                
+            # convert azimuth from 0 .. 360 to -180 .. +180
+            azi = (azi + 180) % 360 - 180                 
+
+            # check if azimuth is from -180 .. +180
+            if any(azi > 180) or any(azi < -180):
+                logger.error(f"Incorrect value(s) of azimuth: {azi[(azi > 180) | (azi < -180)]}")
+                
+            # read elevation information and create array
+            if isinstance(source_ele, str):
+                if ':' in source_ele:
+                    source_ele = source_ele.split(':')
+                    ele = np.arange(float(eval(source_ele[0])), float(eval(source_ele[2])), float(eval(source_ele[1])))
+                else:
+                    ele = np.array(float(eval(source_ele)), ndmin=1)[:N_frames]
+            else:
+                ele = np.array(source_ele, ndmin=1)[:N_frames]
+                
+            # ensure that elevation array has N_frames values
+            if len(ele) > N_frames:
+                # cut the array of elevation values
+                ele = ele[:N_frames]
+            elif len(ele) < N_frames:
+                # replicate the last elevation
+                ele = np.append(ele, np.full( N_frames - len(ele), ele[-1]))        
+
+            # check if elevation is from -90 .. +90
+            if any(ele > 90) or any(ele < -90):
+                logger.error(f"Incorrect value(s) of elevation: {ele[(ele > 90) | (ele < -90)]}")
+                
+            # additional metadata
+            dist = np.ones(N_frames)            #### !!!! TBD - check what to do with these metadata
+            spread = np.zeros(N_frames)
+            gain = np.ones(N_frames)
+            
+            # arrange all metadata fields column-wise into a matrix
+            x_meta = np.column_stack((azi, ele, dist, spread, gain))
+            
+            # delay the source file
+            if source_delay > 0:
+                pre = np.zeros((int(source_delay * fs), x.shape[1]))
+                x = np.concatenate([pre, x])
+                
+                # apply delay to metadata as well
+                pre = np.tile([0.00,0.00,1.00,0.00,1.00], (int(source_delay * 50), 1))
+                # pre = np.zeros((int(source_delay * 50), x_meta.shape[1]))
+                x_meta = np.concatenate([pre, x_meta])
+            
+            # add source signal to the array of source signals
+            if y is None:
+                y = x
+            else:
+                # append zeros to have equal length of all source signals
+                if x.shape[0] > y.shape[0]:
+                    y = np.vstack((y, np.zeros((x.shape[0]-y.shape[0], y.shape[1]))))
+                elif y.shape[0] > x.shape[0]:
+                    x = np.vstack((x, np.zeros((y.shape[0]-x.shape[0], x.shape[1]))))
+                y = np.hstack((y, x))
+            
+            # add metadata to the array of all metadata
+            x_meta = x_meta[np.newaxis, :]      # make sure x_meta is a 3d array
+            if y_meta is None:
+                y_meta = x_meta
+            else:
+                N_srcs = y_meta.shape[0]
+                N_meta_features = y_meta.shape[2]
+                
+                # append postamble (create by repeating the last row of metadata) to have equal length of all metadata
+                if x_meta.shape[1] > y_meta.shape[1]:
+                    N_delta = x_meta.shape[1] - y_meta.shape[1]
+                    y_meta = y_meta.reshape(y_meta.shape[1], -1)         # reshape to 2d array
+                    y_meta = np.vstack((y_meta, np.tile(y_meta[-1,:], (N_delta, 1))))  # repeat last row N_delta times and append to the array
+                    y_meta = y_meta.reshape(N_srcs, -1, N_meta_features) # reshape back to 3d array
+                elif y_meta.shape[1] > x_meta.shape[1]:
+                    N_delta = y_meta.shape[1] - x_meta.shape[1]
+                    x_meta = x_meta.reshape(x_meta.shape[1], -1)         # reshape to 2d array
+                    x_meta = np.vstack((x_meta, np.tile(x_meta[-1,:], (N_delta, 1))))  # repeat last row N_delta times and append to the array
+                    x_meta = np.expand_dims(x_meta, axis=0)              # reshape back to 3d array
+                    
+                y_meta = np.concatenate([y_meta, x_meta])
+       
+        # write individual ISM audio streams to the output file in an interleaved format
+        output_filename = scene['name']
+        audiofile.write(os.path.join(output_path, output_filename), y, fs)      ### !!!! replace all os.path.xxx operations with the Path object
+        
+        # write individual ISM metadata to output files in .csv format 
+        for i in range(N_sources):
+            # generate .csv filename (should end with .0.csv, .1.csv, ...)
+            csv_filename = os.path.normpath(f"{output_filename}.{i}.csv")
+            
+            with open(os.path.join(output_path, csv_filename), 'w') as f:
+                # create csv writer
+                writer = csv.writer(f)
+               
+                # write all rows to the .csv file
+                writer.writerows(csv_formatdata(y_meta[i]))   
diff --git a/item_generation_scripts/processing/processing.py b/item_generation_scripts/processing/processing.py
new file mode 100644
index 00000000..ad2cf272
--- /dev/null
+++ b/item_generation_scripts/processing/processing.py
@@ -0,0 +1,455 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+import logging
+from abc import ABC, abstractmethod
+from itertools import repeat
+from pathlib import Path
+from shutil import copyfile
+from typing import Iterable, Union
+from warnings import warn
+
+import numpy as np
+
+from item_generation_scripts.audiotools import audio
+from item_generation_scripts.audiotools.audiofile import (
+    concat,
+    read,
+    split,
+    trim,
+    write,
+)
+from item_generation_scripts.audiotools.metadata import (
+    add_remove_preamble,
+    concat_meta_from_file,
+    metadata_search,
+    split_meta_in_file,
+    write_ISM_metadata_in_file,
+)
+from item_generation_scripts.audiotools.wrappers.bs1770 import scale_files
+from item_generation_scripts.constants import LOGGER_DATEFMT, LOGGER_FORMAT
+from item_generation_scripts.processing.config import TestConfig
+from item_generation_scripts.utils import apply_func_parallel, list_audio, pairwise
+
+
+class Processing(ABC):
+    def __init__(self, attrs: dict):
+        self.__dict__.update(attrs)
+
+    @abstractmethod
+    def process(
+        self, in_file: Path, out_file: Path, in_meta, logger: logging.Logger
+    ) -> None:
+        pass
+
+
+def reorder_items_list(items_list: list, concatenation_order: list) -> list:
+    name_to_full = {Path(full_file).name: full_file for full_file in items_list}
+    ordered_full_files = [
+        name_to_full[name] for name in concatenation_order if name in name_to_full
+    ]
+    return ordered_full_files
+
+
+def concat_setup(cfg: TestConfig, chain, logger: logging.Logger):
+    n_items_list = len(cfg.items_list)
+    cfg_pre2 = chain[0]
+
+    # check for text files
+    if any([i for i in cfg.items_list if i.suffix == ".txt"]):
+        raise SystemExit("Concatenation for text files is unsupported")
+
+    # apply concatenation order
+    if cfg_pre2.concatenation_order is not None:
+        n_concatenation_order = len(cfg_pre2.concatenation_order)
+        if n_concatenation_order != n_items_list:
+            warn(
+                f"Warning: Mismatch in specified concatenation order and number of items to process!\n"
+                f"Number of items specified in concatenation order: {n_concatenation_order}\n"
+                f"Number of items in the directory: {n_items_list}\n"
+                f"Concatenation will use the following order:\n{cfg_pre2.concatenation_order}"
+            )
+
+    logger.info(f"Concatenating input files in directory {cfg.input_path}")
+
+    # concatenate ISM metadata
+    if cfg.input["fmt"].startswith("ISM"):
+        cfg.concat_meta = []
+        for obj_idx in range(len(cfg.metadata_path[0])):
+            cfg.concat_meta.append(
+                cfg.tmp_dirs[0].joinpath(
+                    f"{cfg.input_path.name}_concatenated.wav.{obj_idx}.csv"
+                )
+            )
+        concat_meta_from_file(
+            cfg.items_list,
+            cfg.metadata_path,
+            cfg.concat_meta,
+            cfg.input["fmt"],
+        )
+
+        # set input to the concatenated file we have just written to the output dir
+        cfg.metadata_path = [cfg.concat_meta]
+
+    # concatenate audio
+    cfg.concat_file = cfg.tmp_dirs[0].joinpath(
+        f"{cfg.input_path.name}_concatenated.wav"
+    )
+
+    # determine number of channels for pcm and raw files
+    tmp_audio = audio.fromtype(cfg_pre2.in_fmt)
+    tmp_num_chans = tmp_audio.num_channels
+
+    cfg.splits = concat(
+        cfg.items_list,
+        cfg.concat_file,
+        in_fs=cfg.input.get("fs", 48000),
+        num_channels=tmp_num_chans,
+    )
+
+    # save item naming for splits naming in the end
+    cfg.split_names = []
+    for name in cfg.items_list:
+        cfg.split_names.append(Path(name).stem.split(".")[0])
+    # set input to the concatenated file we have just written to the output dir
+    cfg.items_list = [cfg.concat_file]
+
+    # write out splits
+    with open(cfg.concat_file.with_suffix(".splits.log"), "w") as f:
+        print(", ".join([str(s) for s in cfg.splits]), file=f)
+        print(", ".join([str(sn) for sn in cfg.split_names]), file=f)
+        print(", ".join([str(i.stem) for i in cfg.items_list]), file=f)
+
+    logger.info(f"Splits written to file {cfg.concat_file.with_suffix('.splits.log')}")
+
+
+def concat_teardown(cfg: TestConfig, logger: logging.Logger):
+    if not cfg.splits:
+        raise ValueError("Splitting not possible without split marker")
+
+    output_format = cfg.postprocessing["fmt"]
+
+    out_files = []
+    out_meta = []
+
+    logger.info(f"Splitting output file in directory {cfg.output_path}")
+
+    for odir in cfg.out_dirs:
+        path_input = odir / cfg.items_list[0].name
+        out_paths = split(
+            path_input,
+            odir,
+            cfg.split_names,
+            cfg.splits,
+            in_fs=cfg.postprocessing["fs"],
+        )
+
+        logger.debug(
+            f"Resulting split files condition {odir.name}: {', '.join([str(op) for op in out_paths])}"
+        )
+        out_files.append(out_paths)
+
+    # split ISM metadata
+    if output_format.startswith("ISM"):
+        for odir in cfg.out_dirs:
+            path_input = odir / cfg.items_list[0].name
+            out_meta_paths = split_meta_in_file(
+                path_input,
+                odir,
+                cfg.split_names,
+                cfg.splits,
+                output_format,
+                meta_files=cfg.metadata_path[0],
+            )
+            out_meta.append(out_meta_paths)
+
+    # remove concatenated file
+    if cfg.delete_tmp:
+        cfg.concat_file.unlink(missing_ok=True)
+
+    return out_files, out_meta
+
+
+def preprocess(cfg, logger):
+    preprocessing = cfg.proc_chains[0]
+    chain = preprocessing["processes"]
+
+    logger.info(f"  Generating condition: {preprocessing['name']}")
+
+    # run preprocessing
+    apply_func_parallel(
+        process_item,
+        zip(
+            cfg.items_list,
+            repeat(cfg.tmp_dirs[0]),
+            repeat(cfg.out_dirs[0]),
+            repeat(chain),
+            repeat(logger),
+            cfg.metadata_path,
+        ),
+        None,
+        "mp" if cfg.multiprocessing else None,
+    )
+
+    # update the configuration to use preprocessing outputs as new inputs
+    cfg.items_list = list_audio(
+        cfg.out_dirs[0], absolute=False, select_list=getattr(cfg, "input_select", None)
+    )
+
+    # Re-ordering items based on concatenation order
+    if (
+        hasattr(cfg, "preprocessing_2")
+        and cfg.preprocessing_2.get("concatenate_input", False)
+        and cfg.preprocessing_2.get("concatenation_order", None) is not None
+    ):
+        cfg.items_list = reorder_items_list(cfg.items_list, cfg.concatenation_order)
+
+    if cfg.metadata_path[0] is not None:
+        for item_idx in range(len(cfg.metadata_path)):
+            for obj_idx in range(len(cfg.metadata_path[item_idx])):
+                if cfg.metadata_path[item_idx][obj_idx]:
+                    cfg.metadata_path[item_idx][obj_idx] = cfg.out_dirs[0] / Path(
+                        f"{cfg.items_list[item_idx].stem}.wav.{obj_idx}.csv"
+                    )
+    # remove already applied processing stage
+    cfg.proc_chains = cfg.proc_chains[1:]
+    cfg.tmp_dirs = cfg.tmp_dirs[1:]
+    cfg.out_dirs = cfg.out_dirs[1:]
+
+
+def preprocess_2(cfg, logger):
+    preprocessing_2 = cfg.proc_chains[0]
+    chain = preprocessing_2["processes"]
+
+    logger.info(f"  Generating condition: {preprocessing_2['name']}")
+
+    # concatenate items if required
+    if chain[0].concatenate_input:
+        concat_setup(cfg, chain, logger)
+
+    # run preprocessing 2
+    apply_func_parallel(
+        process_item,
+        zip(
+            cfg.items_list,
+            repeat(cfg.tmp_dirs[0]),
+            repeat(cfg.out_dirs[0]),
+            repeat(chain),
+            repeat(logger),
+            cfg.metadata_path,
+        ),
+        None,
+        "mp" if cfg.multiprocessing else None,
+    )
+
+    # update the configuration to use preprocessing 2 outputs as new inputs
+    cfg.items_list = list_audio(
+        cfg.out_dirs[0], absolute=False, select_list=getattr(cfg, "input_select", None)
+    )
+
+    # Re-ordering items based on concatenation order
+    if (
+        hasattr(cfg, "preprocessing_2")
+        and cfg.preprocessing_2.get("concatenate_input", False)
+        and cfg.preprocessing_2.get("concatenation_order", None) is not None
+    ):
+        cfg.items_list = reorder_items_list(cfg.items_list, cfg.concatenation_order)
+
+    if cfg.metadata_path[0] is not None:
+        for item_idx in range(len(cfg.metadata_path)):
+            for obj_idx in range(len(cfg.metadata_path[item_idx])):
+                if cfg.metadata_path[item_idx][obj_idx]:
+                    cfg.metadata_path[item_idx][obj_idx] = cfg.out_dirs[0] / Path(
+                        f"{cfg.items_list[item_idx].stem}.wav.{obj_idx}.csv"
+                    )
+    # remove already applied processing stage
+    cfg.proc_chains = cfg.proc_chains[1:]
+    cfg.tmp_dirs = cfg.tmp_dirs[1:]
+    cfg.out_dirs = cfg.out_dirs[1:]
+
+    return
+
+
+def reverse_process_2(cfg, logger):
+    # remove preamble
+    if cfg.pre2.preamble:
+        remove_preamble(cfg)
+
+    # reverse concatenation
+    if cfg.pre2.concatenate_input:
+        # write out the splits, optionally remove file
+        out_paths_splits, out_meta_splits = concat_teardown(cfg, logger)
+    else:
+        # if no concatenation read files from folder
+        out_paths_splits = []
+        for out_dir in cfg.out_dirs:
+            list_audio_dir = list_audio(out_dir, absolute=True)
+            out_paths_splits.append(list_audio_dir)
+        if cfg.postprocessing["fmt"].startswith("ISM"):
+            out_meta_splits = []
+            for i, condition in enumerate(out_paths_splits):
+                meta_condition = metadata_search(
+                    cfg.out_dirs[i],
+                    condition,
+                    num_objects=int(cfg.postprocessing["fmt"][-1]),
+                )
+                out_meta_splits.append(meta_condition)
+        else:
+            out_meta_splits = None
+
+    # scale individual files
+    if cfg.postprocessing.get("loudness", False):
+        scale_files(
+            out_paths_splits,
+            cfg.postprocessing["fmt"],
+            cfg.postprocessing["loudness"],
+            cfg.postprocessing["fs"],
+            out_meta_splits,
+        )
+    return
+
+
+def process_item(
+    in_file: Union[Path, str],
+    tmp_dir: Union[Path, str],
+    out_dir: Union[Path, str],
+    chain: Iterable,
+    logger: logging.Logger,
+    in_meta,
+) -> None:
+    tmp_file = tmp_dir.joinpath(in_file.name)
+    tmp_file_meta = []
+    if in_meta:
+        for im in in_meta:
+            tmp_file_meta.append(tmp_dir.joinpath(Path(im).name))
+
+    # assemble a list of files to be used during the processing chain
+    out_dir_wav = False
+    processing_paths = [in_file]
+    processing_paths_meta = [in_meta]
+    for p in chain:
+        if Path(in_file.name).suffix == ".txt" and p.out_fmt is not None:
+            processing_paths.append(tmp_file.with_suffix(f".{p.name}.wav"))
+            out_dir_wav = True
+        else:
+            processing_paths.append(tmp_file.with_suffix(f".{p.name}{tmp_file.suffix}"))
+            try:
+                out_format = p.out_fmt
+            except AttributeError:
+                # EVS has no attribute out_fmt
+                out_format = p.in_fmt
+            try:
+                bool_ism = out_format.startswith("ISM")
+            except Exception:
+                bool_ism = out_format.name.startswith("ISM")
+
+            if bool_ism:
+                list_meta_step = []
+                for idx, tfm in enumerate(tmp_file_meta):
+                    list_meta_step.append(
+                        tfm.parent
+                        / f"{in_file.stem.split('.')[0]}.{p.name}.wav.{idx}.csv"
+                    )
+                processing_paths_meta.append(list_meta_step)
+            else:
+                processing_paths_meta.append(None)
+            # TODO: support txt file writing for META pass-through
+
+    if out_dir_wav:
+        out_file = out_dir.joinpath(in_file.name).with_suffix(".wav")
+    else:
+        out_file = out_dir.joinpath(in_file.name)
+
+    out_meta = []
+    if in_meta:
+        for im in range(len(in_meta)):
+            out_meta.append(out_dir.joinpath(f"{Path(out_file).stem}.wav.{im}.csv"))
+
+    # execute each process sequentially, feed output into input of next process
+    for p, (input, output), input_meta in zip(
+        chain, pairwise(processing_paths), processing_paths_meta[:-1]
+    ):
+        # setup logging for the output
+        item_logger = logger.getChild(output.stem)
+        fh = logging.FileHandler(output.with_suffix(".log"), mode="w")
+        fh.setLevel(logging.DEBUG)
+        fh.setFormatter(logging.Formatter(LOGGER_FORMAT, datefmt=LOGGER_DATEFMT))
+        item_logger.addHandler(fh)
+
+        p.process(input, output, input_meta, item_logger)
+
+    # copy output and metadata from final process to output file
+    copyfile(processing_paths[-1], out_file)
+    if processing_paths_meta[-1]:
+        for idx, ppm in enumerate(processing_paths_meta[-1]):
+            copyfile(ppm, out_meta[idx])
+
+
+def remove_preamble(cfg):
+    # get number of channels from output format
+    num_channels = audio.fromtype(cfg.postprocessing["fmt"]).num_channels
+    for odir in cfg.out_dirs:
+        for item in cfg.items_list:
+            path_input = odir / item.name
+
+            # remove preamble for ISM metadata
+            if cfg.postprocessing["fmt"].startswith("ISM"):
+                # search for metadata
+                meta_item = metadata_search(
+                    odir, [Path(item.name)], num_objects=num_channels
+                )
+                metadata_array = []
+                for meta_i in meta_item:
+                    metadata_array.append(np.genfromtxt(meta_i, delimiter=","))
+
+                # remove preamble
+                metadata_array = add_remove_preamble(
+                    metadata_array, cfg.pre2.preamble, add=False
+                )
+
+                # write csv files
+                write_ISM_metadata_in_file(
+                    metadata_array, [path_input], automatic_naming=True
+                )
+
+            # read file
+            x, fs = read(
+                path_input, nchannels=num_channels, fs=cfg.postprocessing["fs"]
+            )
+
+            # remove preamble
+            x = trim(x, fs, (cfg.pre2.preamble, 0))
+
+            # write file
+            write(path_input, x, fs)
+
+    return
diff --git a/item_generation_scripts/utils.py b/item_generation_scripts/utils.py
new file mode 100644
index 00000000..1e21b0db
--- /dev/null
+++ b/item_generation_scripts/utils.py
@@ -0,0 +1,297 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+import logging
+import shutil
+import subprocess as sp
+import sys
+from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
+from itertools import repeat, tee
+from os import devnull
+from pathlib import Path
+from shutil import which
+from typing import Callable, Iterable, Optional, Union
+
+import yaml
+
+ALLOWED_INPUT_EXT = (".wav", ".pcm", ".txt", ".raw")
+BIN_DIR = Path(__file__).parent.joinpath("bin")
+
+
+"""
+Directory/path handling
+"""
+
+
+def create_dir(p: str) -> None:
+    p = Path(p)
+    p.mkdir(exist_ok=True, parents=True)
+
+
+def delete_dir(p: str) -> None:
+    p = Path(p)
+    if p.exists() and p.is_dir():
+        shutil.rmtree(p)
+
+
+class DirManager:
+    """
+    Context manager that creates directories if not already present and
+    automatically cleans up (i.e. deletes) all specified paths
+    """
+
+    def __init__(
+        self, create_paths: Union[str, list], delete_paths: Union[str, list] = list()
+    ):
+        self.create_paths = (
+            create_paths if isinstance(create_paths, list) else [create_paths]
+        )
+        self.delete_paths = (
+            delete_paths if isinstance(create_paths, list) else [delete_paths]
+        )
+
+    def __enter__(self):
+        for path in self.create_paths:
+            create_dir(path)
+
+    def __exit__(self, exc_type, exc_value, exc_traceback):
+        for path in self.delete_paths:
+            if path in self.create_paths:
+                delete_dir(path)
+            else:
+                print(
+                    f"Tmp dir '{path}' was not present in creation paths - skipping deletion."
+                )
+
+
+def list_audio(path: str, absolute: bool = False, select_list: list = None) -> list:
+    """
+    Return list with all files with ALLOWED_INPUT_EXT found under the given path.
+
+    If path is a directory, all files in it are included, if it is a file, just the file
+    will be in the list. If a select list is provided, files are filtered accordingly.
+    """
+    path = Path(path)
+    audio_list = []
+
+    if path.exists():
+        if path.is_dir():
+            if absolute:
+                [audio_list.extend(list(path.glob(ext))) for ext in ALLOWED_INPUT_EXT]
+                audio_list = [
+                    path.joinpath(f)
+                    for f in path.iterdir()
+                    if f.suffix in ALLOWED_INPUT_EXT
+                ]
+            else:
+                audio_list = [
+                    f for f in path.iterdir() if f.suffix in ALLOWED_INPUT_EXT
+                ]
+        else:
+            if not absolute:
+                path = path.name
+            ext = path.suffix
+            if ext in ALLOWED_INPUT_EXT:
+                audio_list.append(path)
+
+    # filter according to select list
+    if select_list:
+        select_set = set([Path(i).stem for i in select_list])
+        audio_list = [
+            f for f in audio_list if any([pattern in f.stem for pattern in select_set])
+        ]
+
+    return audio_list
+
+
+def get_nickname(p: Path) -> str:
+    return f"{p.parent.name}/{p.name}"
+
+
+"""
+System interaction
+"""
+
+
+def find_binary(
+    binary: str,
+    raise_error: Optional[bool] = True,
+    logger: Optional[logging.Logger] = None,
+    binary_path: Optional[str] = None,
+) -> Union[Path, None]:
+    """Attempt to find and return the path to the given binary"""
+    # prioritise binaries placed in the directory over $PATH
+    if binary_path is not None:
+        bin = which(binary, path=binary_path)
+    else:
+        bin = which(binary, path=BIN_DIR)
+    if not bin:
+        bin = which(binary)
+
+    if not bin and raise_error:
+        raise FileNotFoundError(
+            f"Binary {binary} was neither found in {binary_path.absolute()} nor in {BIN_DIR.absolute()} or in $PATH!"
+        )
+    elif not bin:
+        if logger:
+            logger.debug(f"Couldn't find binary {binary}")
+        return None
+    else:
+        if logger:
+            logger.debug(f"Found binary {bin}")
+        return Path(bin)
+
+
+def get_devnull():
+    return devnull
+
+
+def get_gitsha():
+    try:
+        git_sha = sp.check_output(
+            ["git", "rev-parse", "HEAD"], stderr=sp.STDOUT, text=True
+        ).strip()
+    except sp.CalledProcessError:
+        git_sha = "git repository not found!"
+
+    return git_sha
+
+
+def run(cmd, cwd=None, check=True, logger: Optional[logging.Logger] = None):
+    if logger:
+        logger.debug(f"Running command {' '.join([str(c) for c in cmd])}; cwd = {cwd}")
+
+    try:
+        result = sp.run(cmd, check=check, capture_output=True, text=True, cwd=cwd)
+    except sp.CalledProcessError as e:
+        raise SystemError(
+            f"Command returned non-zero exit status ({e.returncode}): {' '.join([str(c) for c in e.cmd])}\n{e.stderr}\n{e.stdout}"
+        )
+
+    if logger:
+        logger.debug(result.stderr.strip())
+        logger.debug(result.stdout.strip())
+
+    return result
+
+
+"""
+Utility functions
+"""
+
+
+def apply_func_parallel(
+    func: Callable,
+    args: Iterable,
+    kwargs: Optional[Iterable] = None,
+    type: Optional[str] = None,
+    show_progress: Optional[bool] = True,
+) -> list:
+    """
+    Apply a function iteratively to a list of arguments and keyword arguments
+    Optionally with multiprocessing or multithreading
+
+    Parameters
+    ----------
+    func : Callable
+        Function to use
+    args : Iterable
+        List of positional arguments
+    kwargs: Optional[Iterable]
+        List of keyword arguments
+    type: Optional[str]
+        Type of parallel processing to use, "mp" for multiprocessing or "mt" for threading, default = None (sequential processing)
+    show_progress: Optional[bool]
+        Flag whether to show progress bar
+
+    Returns
+    -------
+    List of function results
+    """
+
+    # if no kwargs are specified, repeat the empty dict to avoid issues with zipping and unpacking
+    if not kwargs:
+        kwargs = repeat({})
+
+    args_zip = zip(args, kwargs)
+
+    if type == "mp":
+        executor = ProcessPoolExecutor
+    elif type == "mt":
+        executor = ThreadPoolExecutor
+    else:
+        return [
+            func(*a, **k)
+            for a, k in (progressbar(list(args_zip)) if show_progress else args_zip)
+        ]
+
+    with executor() as e:
+        results = [e.submit(func, *a, **k) for a, k in args_zip]
+        return [
+            r.result() for r in (progressbar(results) if show_progress else results)
+        ]
+
+
+def pairwise(iter):
+    """itertools.pairwise() for python < 3.10"""
+    a, b = tee(iter)
+    next(b, None)
+    return zip(a, b)
+
+
+def progressbar(iter: Iterable, width=80):
+    """simple unicode progressbar"""
+    count = len(iter)
+
+    def update(progress):
+        fill = int(width * progress / count)
+        print(
+            f"{int(progress/count*100):3d}%{u'│'}{u'█'*fill}{(u'░'*(width-fill))}{u'│'}{progress}/{count}",
+            end="\r",
+            file=sys.stdout,
+            flush=True,
+        )
+
+    update(0)
+    for i, item in enumerate(iter):
+        yield item
+        update(i + 1)
+    print("\n", flush=True, file=sys.stdout)
+
+
+def get_binary_paths(yaml_file_with_binary_paths):
+    with open(yaml_file_with_binary_paths, "r") as f:
+        data = yaml.safe_load(f)
+    if data is None:
+        return {}
+    else:
+        return {key: Path(value) for key, value in data.items()}
-- 
GitLab


From f2d3f6e9a2d6383a7a29d943f4599eba783af71c Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Tue, 2 May 2023 15:28:16 +0200
Subject: [PATCH 02/27] formatting

---
 item_generation_scripts/__init__.py           |   9 +-
 item_generation_scripts/constants.py          |   6 +-
 item_generation_scripts/processing/config.py  |   3 +-
 .../processing/process_ism_items.py           | 167 ++++++++++--------
 4 files changed, 103 insertions(+), 82 deletions(-)

diff --git a/item_generation_scripts/__init__.py b/item_generation_scripts/__init__.py
index 989d61a6..64efb46d 100644
--- a/item_generation_scripts/__init__.py
+++ b/item_generation_scripts/__init__.py
@@ -30,11 +30,12 @@
 #  the United Nations Convention on Contracts on the International Sales of Goods.
 #
 
-import os
 import logging
+import os
+import pdb
 from itertools import repeat
+
 import yaml
-import pdb
 
 from item_generation_scripts.constants import (
     LOGGER_DATEFMT,
@@ -42,7 +43,6 @@ from item_generation_scripts.constants import (
     LOGGER_SUFFIX,
 )
 from item_generation_scripts.processing import config, process_ism_items
-from item_generation_scripts.processing import config
 from item_generation_scripts.utils import create_dir
 
 
@@ -73,7 +73,6 @@ def logging_init(args, cfg):
 
 
 def main(args):
-
     # parse configuration
     cfg = config.TestConfig(args.config)
 
@@ -93,7 +92,7 @@ def main(args):
             cfg.input_path,
             cfg.output_path,
             cfg.scenes,
-            logger
+            logger,
         )
 
     # copy configuration to output directory
diff --git a/item_generation_scripts/constants.py b/item_generation_scripts/constants.py
index 3b554800..c3d5061f 100644
--- a/item_generation_scripts/constants.py
+++ b/item_generation_scripts/constants.py
@@ -56,13 +56,13 @@ DEFAULT_CONFIG = {
 
 DEFAULT_CONFIG_ISM2 = {
     "format": "ISM2",
-    "input_path" : "./input",
+    "input_path": "./input",
     "output_path": "./output",
     # "cod": {
-        # "bin": find_binary("IVAS_cod", raise_error=False),
+    # "bin": find_binary("IVAS_cod", raise_error=False),
     # },
     # "dec": {
-        # "bin": find_binary("IVAS_dec", raise_error=False),
+    # "bin": find_binary("IVAS_dec", raise_error=False),
     # },
 }
 
diff --git a/item_generation_scripts/processing/config.py b/item_generation_scripts/processing/config.py
index 926689c4..0fa1fa5e 100644
--- a/item_generation_scripts/processing/config.py
+++ b/item_generation_scripts/processing/config.py
@@ -38,7 +38,7 @@ import yaml
 from item_generation_scripts.constants import (
     DEFAULT_CONFIG,
     DEFAULT_CONFIG_ISM2,
-    REQUIRED_KEYS
+    REQUIRED_KEYS,
 )
 
 
@@ -127,4 +127,3 @@ class TestConfig:
         # Report missing keys to the user
         if MISSING_KEYS:
             raise KeyError(f"The following key(s) must be specified : {MISSING_KEYS}")
-
diff --git a/item_generation_scripts/processing/process_ism_items.py b/item_generation_scripts/processing/process_ism_items.py
index 95bfb159..f4f58fc1 100644
--- a/item_generation_scripts/processing/process_ism_items.py
+++ b/item_generation_scripts/processing/process_ism_items.py
@@ -31,27 +31,27 @@
 #
 
 
+import csv
+import logging
 import os
-import sys
 import shutil
-import numpy as np
-import logging
-import csv
 import subprocess as sp
+import sys
 from pathlib import Path
 
+import numpy as np
+
 from item_generation_scripts.audiotools import (
     audio,
     audioarray,
     audiofile,
     binauralobjectrenderer,
-    metadata
+    metadata,
 )
-
 from item_generation_scripts.audiotools.wrappers.bs1770 import get_loudness
-from item_generation_scripts.audiotools import audio
 
-# function for converting nd numpy array to strings with 2 decimal digits  
+
+# function for converting nd numpy array to strings with 2 decimal digits
 def csv_formatdata(data):
     for row in data:
         yield ["%0.2f" % v for v in row]
@@ -63,159 +63,182 @@ def generate_ism_items(
     input_path: Path,
     output_path: Path,
     scenes: dict,
-    logger: logging.Logger
+    logger: logging.Logger,
 ):
+    """Generate ISM items with metadata from mono items based on scene description"""
 
-    """Generate ISM items with metadata from mono items based on scene description """
-    
     # get the number of scenes
     N_scenes = len(scenes)
-    
+
     for scene_name, scene in scenes.items():
         logger.info(f"Processing {scene_name} out of {N_scenes} scenes")
-        
+
         # extract the number of audio sources
-        N_sources = len(np.atleast_1d(scene['source']))
+        N_sources = len(np.atleast_1d(scene["source"]))
 
         y = None
         y_meta = None
         for i in range(N_sources):
-            
-            source_file = np.atleast_1d(scene['source'])[i]
-            source_azi = np.atleast_1d(scene['azimuth'])[i]
-            source_ele = np.atleast_1d(scene['elevation'])[i]
-            source_type = 'speech'      #### !!!! TBD - support generic audio + background noise and speech in the .yml file
-            source_delay = np.atleast_1d(scene['delay'])[i]   
-        
-            logger.info(f"Encoding {source_file} at position(s) {source_azi},{source_ele}")
-        
+            source_file = np.atleast_1d(scene["source"])[i]
+            source_azi = np.atleast_1d(scene["azimuth"])[i]
+            source_ele = np.atleast_1d(scene["elevation"])[i]
+            source_type = "speech"  #### !!!! TBD - support generic audio + background noise and speech in the .yml file
+            source_delay = np.atleast_1d(scene["delay"])[i]
+
+            logger.info(
+                f"Encoding {source_file} at position(s) {source_azi},{source_ele}"
+            )
+
             # read source file
             # x, fs = audiofile.read(os.path.join(input_path, source_file))    #### !!!! TBD - check the support for headerless .raw files
             # pdb.set_trace()
             audio_object = audio.fromfile("MONO", os.path.join(input_path, source_file))
-            
-            
+
             x = audio_object.audio
             fs = audio_object.fs
-            
+
             # find the number of frames
             N_frames = int(len(x) / fs * 50 + 1)
-        
+
             # adjust the level of the source file
-            _, scale_factor = get_loudness(audio_object, target_level, "MONO") 
+            _, scale_factor = get_loudness(audio_object, target_level, "MONO")
             # print(f"Scaling loudness with factor: {scale_factor}")
             x *= scale_factor
-            
+
             # read azimuth information and create array
             if isinstance(source_azi, str):
-                if ':' in source_azi:
-                    source_azi = source_azi.split(':')
-                    azi = np.arange(float(eval(source_azi[0])), float(eval(source_azi[2])), float(eval(source_azi[1])))
+                if ":" in source_azi:
+                    source_azi = source_azi.split(":")
+                    azi = np.arange(
+                        float(eval(source_azi[0])),
+                        float(eval(source_azi[2])),
+                        float(eval(source_azi[1])),
+                    )
                 else:
                     azi = np.array(float(eval(source_azi)), ndmin=1)[:N_frames]
             else:
                 azi = np.array(source_azi, ndmin=1)[:N_frames]
-                
+
             # ensure that azimuth array has N_frames values
             if len(azi) > N_frames:
                 # cut the array of azimuth values
                 azi = azi[:N_frames]
             elif len(azi) < N_frames:
                 # replicate the last azimuth
-                azi = np.append(azi, np.full( N_frames - len(azi), azi[-1]))
-                
+                azi = np.append(azi, np.full(N_frames - len(azi), azi[-1]))
+
             # convert azimuth from 0 .. 360 to -180 .. +180
-            azi = (azi + 180) % 360 - 180                 
+            azi = (azi + 180) % 360 - 180
 
             # check if azimuth is from -180 .. +180
             if any(azi > 180) or any(azi < -180):
-                logger.error(f"Incorrect value(s) of azimuth: {azi[(azi > 180) | (azi < -180)]}")
-                
+                logger.error(
+                    f"Incorrect value(s) of azimuth: {azi[(azi > 180) | (azi < -180)]}"
+                )
+
             # read elevation information and create array
             if isinstance(source_ele, str):
-                if ':' in source_ele:
-                    source_ele = source_ele.split(':')
-                    ele = np.arange(float(eval(source_ele[0])), float(eval(source_ele[2])), float(eval(source_ele[1])))
+                if ":" in source_ele:
+                    source_ele = source_ele.split(":")
+                    ele = np.arange(
+                        float(eval(source_ele[0])),
+                        float(eval(source_ele[2])),
+                        float(eval(source_ele[1])),
+                    )
                 else:
                     ele = np.array(float(eval(source_ele)), ndmin=1)[:N_frames]
             else:
                 ele = np.array(source_ele, ndmin=1)[:N_frames]
-                
+
             # ensure that elevation array has N_frames values
             if len(ele) > N_frames:
                 # cut the array of elevation values
                 ele = ele[:N_frames]
             elif len(ele) < N_frames:
                 # replicate the last elevation
-                ele = np.append(ele, np.full( N_frames - len(ele), ele[-1]))        
+                ele = np.append(ele, np.full(N_frames - len(ele), ele[-1]))
 
             # check if elevation is from -90 .. +90
             if any(ele > 90) or any(ele < -90):
-                logger.error(f"Incorrect value(s) of elevation: {ele[(ele > 90) | (ele < -90)]}")
-                
+                logger.error(
+                    f"Incorrect value(s) of elevation: {ele[(ele > 90) | (ele < -90)]}"
+                )
+
             # additional metadata
-            dist = np.ones(N_frames)            #### !!!! TBD - check what to do with these metadata
+            dist = np.ones(
+                N_frames
+            )  #### !!!! TBD - check what to do with these metadata
             spread = np.zeros(N_frames)
             gain = np.ones(N_frames)
-            
+
             # arrange all metadata fields column-wise into a matrix
             x_meta = np.column_stack((azi, ele, dist, spread, gain))
-            
+
             # delay the source file
             if source_delay > 0:
                 pre = np.zeros((int(source_delay * fs), x.shape[1]))
                 x = np.concatenate([pre, x])
-                
+
                 # apply delay to metadata as well
-                pre = np.tile([0.00,0.00,1.00,0.00,1.00], (int(source_delay * 50), 1))
+                pre = np.tile(
+                    [0.00, 0.00, 1.00, 0.00, 1.00], (int(source_delay * 50), 1)
+                )
                 # pre = np.zeros((int(source_delay * 50), x_meta.shape[1]))
                 x_meta = np.concatenate([pre, x_meta])
-            
+
             # add source signal to the array of source signals
             if y is None:
                 y = x
             else:
                 # append zeros to have equal length of all source signals
                 if x.shape[0] > y.shape[0]:
-                    y = np.vstack((y, np.zeros((x.shape[0]-y.shape[0], y.shape[1]))))
+                    y = np.vstack((y, np.zeros((x.shape[0] - y.shape[0], y.shape[1]))))
                 elif y.shape[0] > x.shape[0]:
-                    x = np.vstack((x, np.zeros((y.shape[0]-x.shape[0], x.shape[1]))))
+                    x = np.vstack((x, np.zeros((y.shape[0] - x.shape[0], x.shape[1]))))
                 y = np.hstack((y, x))
-            
+
             # add metadata to the array of all metadata
-            x_meta = x_meta[np.newaxis, :]      # make sure x_meta is a 3d array
+            x_meta = x_meta[np.newaxis, :]  # make sure x_meta is a 3d array
             if y_meta is None:
                 y_meta = x_meta
             else:
                 N_srcs = y_meta.shape[0]
                 N_meta_features = y_meta.shape[2]
-                
+
                 # append postamble (create by repeating the last row of metadata) to have equal length of all metadata
                 if x_meta.shape[1] > y_meta.shape[1]:
                     N_delta = x_meta.shape[1] - y_meta.shape[1]
-                    y_meta = y_meta.reshape(y_meta.shape[1], -1)         # reshape to 2d array
-                    y_meta = np.vstack((y_meta, np.tile(y_meta[-1,:], (N_delta, 1))))  # repeat last row N_delta times and append to the array
-                    y_meta = y_meta.reshape(N_srcs, -1, N_meta_features) # reshape back to 3d array
+                    y_meta = y_meta.reshape(y_meta.shape[1], -1)  # reshape to 2d array
+                    y_meta = np.vstack(
+                        (y_meta, np.tile(y_meta[-1, :], (N_delta, 1)))
+                    )  # repeat last row N_delta times and append to the array
+                    y_meta = y_meta.reshape(
+                        N_srcs, -1, N_meta_features
+                    )  # reshape back to 3d array
                 elif y_meta.shape[1] > x_meta.shape[1]:
                     N_delta = y_meta.shape[1] - x_meta.shape[1]
-                    x_meta = x_meta.reshape(x_meta.shape[1], -1)         # reshape to 2d array
-                    x_meta = np.vstack((x_meta, np.tile(x_meta[-1,:], (N_delta, 1))))  # repeat last row N_delta times and append to the array
-                    x_meta = np.expand_dims(x_meta, axis=0)              # reshape back to 3d array
-                    
+                    x_meta = x_meta.reshape(x_meta.shape[1], -1)  # reshape to 2d array
+                    x_meta = np.vstack(
+                        (x_meta, np.tile(x_meta[-1, :], (N_delta, 1)))
+                    )  # repeat last row N_delta times and append to the array
+                    x_meta = np.expand_dims(x_meta, axis=0)  # reshape back to 3d array
+
                 y_meta = np.concatenate([y_meta, x_meta])
-       
+
         # write individual ISM audio streams to the output file in an interleaved format
-        output_filename = scene['name']
-        audiofile.write(os.path.join(output_path, output_filename), y, fs)      ### !!!! replace all os.path.xxx operations with the Path object
-        
-        # write individual ISM metadata to output files in .csv format 
+        output_filename = scene["name"]
+        audiofile.write(
+            os.path.join(output_path, output_filename), y, fs
+        )  ### !!!! replace all os.path.xxx operations with the Path object
+
+        # write individual ISM metadata to output files in .csv format
         for i in range(N_sources):
             # generate .csv filename (should end with .0.csv, .1.csv, ...)
             csv_filename = os.path.normpath(f"{output_filename}.{i}.csv")
-            
-            with open(os.path.join(output_path, csv_filename), 'w') as f:
+
+            with open(os.path.join(output_path, csv_filename), "w") as f:
                 # create csv writer
                 writer = csv.writer(f)
-               
+
                 # write all rows to the .csv file
-                writer.writerows(csv_formatdata(y_meta[i]))   
+                writer.writerows(csv_formatdata(y_meta[i]))
-- 
GitLab


From 8d7b16e85bbf11c9c8fc28a899a600ae6cc1821a Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Tue, 2 May 2023 15:37:19 +0200
Subject: [PATCH 03/27] formatting

---
 ivas_processing_scripts/audiotools/wrappers/gen_patt.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ivas_processing_scripts/audiotools/wrappers/gen_patt.py b/ivas_processing_scripts/audiotools/wrappers/gen_patt.py
index f801b07b..d8737ef4 100644
--- a/ivas_processing_scripts/audiotools/wrappers/gen_patt.py
+++ b/ivas_processing_scripts/audiotools/wrappers/gen_patt.py
@@ -138,7 +138,7 @@ def create_error_pattern(
         gen_patt(100, "ep.g192", 5, working_dir=tmp_dir_test)
         if not tmp_sta_file_test.exists():
             raise RuntimeError(
-                "Used version of gen-patt was detected to be faulty (unable to write \"sta\"-file). See bin/README.md for details."
+                'Used version of gen-patt was detected to be faulty (unable to write "sta"-file). See bin/README.md for details.'
             )
 
     with TemporaryDirectory() as tmp_dir:
-- 
GitLab


From 872d533c7ce23e3d18649d1e71fab3b1b81fb24f Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Tue, 2 May 2023 15:46:39 +0200
Subject: [PATCH 04/27] formatting

---
 item_generation_scripts/__init__.py           |  2 --
 item_generation_scripts/constants.py          |  2 +-
 item_generation_scripts/processing/config.py  |  1 -
 .../processing/process_ism_items.py           | 19 +++++--------------
 4 files changed, 6 insertions(+), 18 deletions(-)

diff --git a/item_generation_scripts/__init__.py b/item_generation_scripts/__init__.py
index 64efb46d..88951e80 100644
--- a/item_generation_scripts/__init__.py
+++ b/item_generation_scripts/__init__.py
@@ -32,8 +32,6 @@
 
 import logging
 import os
-import pdb
-from itertools import repeat
 
 import yaml
 
diff --git a/item_generation_scripts/constants.py b/item_generation_scripts/constants.py
index c3d5061f..9509d069 100644
--- a/item_generation_scripts/constants.py
+++ b/item_generation_scripts/constants.py
@@ -33,7 +33,7 @@
 from datetime import datetime
 from pathlib import Path
 
-from item_generation_scripts.utils import find_binary, get_binary_paths
+from item_generation_scripts.utils import get_binary_paths
 
 LOGGER_SUFFIX = ".log"
 LOGGER_FORMAT = (
diff --git a/item_generation_scripts/processing/config.py b/item_generation_scripts/processing/config.py
index 0fa1fa5e..06f828bb 100644
--- a/item_generation_scripts/processing/config.py
+++ b/item_generation_scripts/processing/config.py
@@ -37,7 +37,6 @@ import yaml
 
 from item_generation_scripts.constants import (
     DEFAULT_CONFIG,
-    DEFAULT_CONFIG_ISM2,
     REQUIRED_KEYS,
 )
 
diff --git a/item_generation_scripts/processing/process_ism_items.py b/item_generation_scripts/processing/process_ism_items.py
index f4f58fc1..73267607 100644
--- a/item_generation_scripts/processing/process_ism_items.py
+++ b/item_generation_scripts/processing/process_ism_items.py
@@ -34,20 +34,11 @@
 import csv
 import logging
 import os
-import shutil
-import subprocess as sp
-import sys
 from pathlib import Path
 
 import numpy as np
 
-from item_generation_scripts.audiotools import (
-    audio,
-    audioarray,
-    audiofile,
-    binauralobjectrenderer,
-    metadata,
-)
+from item_generation_scripts.audiotools import audio, audiofile
 from item_generation_scripts.audiotools.wrappers.bs1770 import get_loudness
 
 
@@ -82,7 +73,7 @@ def generate_ism_items(
             source_file = np.atleast_1d(scene["source"])[i]
             source_azi = np.atleast_1d(scene["azimuth"])[i]
             source_ele = np.atleast_1d(scene["elevation"])[i]
-            source_type = "speech"  #### !!!! TBD - support generic audio + background noise and speech in the .yml file
+            # source_type = "speech"  # !!!! TBD - support generic audio + background noise and speech in the .yml file
             source_delay = np.atleast_1d(scene["delay"])[i]
 
             logger.info(
@@ -90,7 +81,7 @@ def generate_ism_items(
             )
 
             # read source file
-            # x, fs = audiofile.read(os.path.join(input_path, source_file))    #### !!!! TBD - check the support for headerless .raw files
+            # x, fs = audiofile.read(os.path.join(input_path, source_file))    # !!!! TBD - check the support for headerless .raw files
             # pdb.set_trace()
             audio_object = audio.fromfile("MONO", os.path.join(input_path, source_file))
 
@@ -167,7 +158,7 @@ def generate_ism_items(
             # additional metadata
             dist = np.ones(
                 N_frames
-            )  #### !!!! TBD - check what to do with these metadata
+            )  # !!!! TBD - check what to do with these metadata
             spread = np.zeros(N_frames)
             gain = np.ones(N_frames)
 
@@ -229,7 +220,7 @@ def generate_ism_items(
         output_filename = scene["name"]
         audiofile.write(
             os.path.join(output_path, output_filename), y, fs
-        )  ### !!!! replace all os.path.xxx operations with the Path object
+        )  # !!!! TBD: replace all os.path.xxx operations with the Path object
 
         # write individual ISM metadata to output files in .csv format
         for i in range(N_sources):
-- 
GitLab


From 81628b69aa93028d232c242895bdc0e205c8b25c Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Tue, 2 May 2023 16:34:12 +0200
Subject: [PATCH 05/27] support of .raw format

---
 item_generation_scripts/__init__.py           |  1 +
 .../config/ISM1_CONFIG.yml                    | 78 +++++++++----------
 .../config/ISM2_CONFIG.yml                    |  6 +-
 item_generation_scripts/processing/config.py  |  5 +-
 .../processing/process_ism_items.py           | 13 +---
 5 files changed, 48 insertions(+), 55 deletions(-)

diff --git a/item_generation_scripts/__init__.py b/item_generation_scripts/__init__.py
index 88951e80..c08820ea 100644
--- a/item_generation_scripts/__init__.py
+++ b/item_generation_scripts/__init__.py
@@ -91,6 +91,7 @@ def main(args):
             cfg.output_path,
             cfg.scenes,
             logger,
+            fs=cfg.fs
         )
 
     # copy configuration to output directory
diff --git a/item_generation_scripts/config/ISM1_CONFIG.yml b/item_generation_scripts/config/ISM1_CONFIG.yml
index f4e1ee31..cbe4eb71 100644
--- a/item_generation_scripts/config/ISM1_CONFIG.yml
+++ b/item_generation_scripts/config/ISM1_CONFIG.yml
@@ -13,7 +13,7 @@ format: "ISM1"
 # delete_tmp: true
 
 ### Output sampling rate in Hz needed for headerless audio files; default = 48000
-# fs: 32000
+fs: 48000
 
 ### Any relative paths will be interpreted relative to the working directory the script is called from!
 ### Usage of absolute paths is recommended.
@@ -21,10 +21,10 @@ format: "ISM1"
 ### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions
 
 ### Input path to mono files
-input_path: "/mnt/c/Work/IVAS/3gpp_forge_gitlab/ivas-processing-scripts/items_mono"
+input_path: "./items_mono"
 
 ### Output path for generated test items and metadata files
-output_path: "/mnt/c/Work/IVAS/3gpp_forge_gitlab/ivas-processing-scripts/output"
+output_path: "./output"
 
 ### Target loudness in LKFS; default = null (no loudness normalization applied)
 loudness: -26
@@ -51,7 +51,7 @@ scenes:
     a1: 
         name: "G1S1.wav"
         description: "Talker sitting at a table"
-        source: "f2s5a_Talker1.wav"
+        source: "test_single.wav"
         azimuth: 0 
         elevation: 0 
         delay: 0 
@@ -59,7 +59,7 @@ scenes:
     a2: 
         name: "G6S2.wav"
         description: "Talker sitting at a table"
-        source: "f5s10a_Talker1.wav"
+        source: "test_single.wav"
         azimuth: 60 
         elevation: 0 
         delay: 0 
@@ -67,7 +67,7 @@ scenes:
     a3: 
         name: "G5S3.wav"
         description: "Talker sitting at a table"
-        source: "f2s5a_Talker1.wav"
+        source: "test_single.wav"
         azimuth: 120 
         elevation: 0 
         delay: 0 
@@ -75,7 +75,7 @@ scenes:
     a4: 
         name: "G4S4.wav"
         description: "Talker sitting at a table"
-        source: "m4s11b_Talker1.wav"
+        source: "test_single.wav"
         azimuth: 180 
         elevation: 0 
         delay: 0 
@@ -83,7 +83,7 @@ scenes:
     a5: 
         name: "G3S5.wav"
         description: "Talker sitting at a table"
-        source: "m1s4a_Talker1.wav"
+        source: "test_single.wav"
         azimuth: 240 
         elevation: 0 
         delay: 0 
@@ -91,7 +91,7 @@ scenes:
     a6: 
         name: "G2S6.wav"
         description: "Talker sitting at a table"
-        source: "f5s10a_Talker1.wav"
+        source: "test_single.wav"
         azimuth: 300 
         elevation: 0 
         delay: 0 
@@ -99,7 +99,7 @@ scenes:
     b1: 
         name: "G2S1.wav"
         description: "standing talker."
-        source: "f5s10b_Talker1.wav"
+        source: "test_single.wav"
         azimuth: 120 
         elevation: 35 
         delay: 0 
@@ -107,7 +107,7 @@ scenes:
     b2: 
         name: "G1S2.wav"
         description: "standing talker."
-        source: "f2s1a_Talker1.wav"
+        source: "test_single.wav"
         azimuth: 180 
         elevation: 35 
         delay: 0 
@@ -115,7 +115,7 @@ scenes:
     b3: 
         name: "G6S3.wav"
         description: "standing talker."
-        source: "f5s10b_Talker1.wav"
+        source: "test_single.wav"
         azimuth: 240 
         elevation: 35 
         delay: 0 
@@ -123,7 +123,7 @@ scenes:
     b4: 
         name: "G5S4.wav"
         description: "standing talker."
-        source: "f2s1a_Talker1.wav"
+        source: "test_single.wav"
         azimuth: 300 
         elevation: 35 
         delay: 0 
@@ -131,7 +131,7 @@ scenes:
     b5: 
         name: "G4S5.wav"
         description: "standing talker."
-        source: "m4s11a_Talker1.wav"
+        source: "test_single.wav"
         azimuth: 0 
         elevation: 35 
         delay: 0 
@@ -139,7 +139,7 @@ scenes:
     b6: 
         name: "G3S6.wav"
         description: "standing talker."
-        source: "m1s2b_Talker1.wav"
+        source: "test_single.wav"
         azimuth: 60 
         elevation: 35 
         delay: 0 
@@ -147,7 +147,7 @@ scenes:
     c1: 
         name: "G3S1.wav"
         description: "Smaller talker (child) walking around a table."
-        source: "m1s6b_Talker1.wav"
+        source: "test_single.wav"
         azimuth: "0:1:360"
         elevation: 0 
         delay: 0 
@@ -155,7 +155,7 @@ scenes:
     c2: 
         name: "G2S2.wav"
         description: "Smaller talker (child) walking around a table."
-        source: "f5s14a_Talker1.wav"
+        source: "test_single.wav"
         azimuth: "60:1:60+360" 
         elevation: 0 
         delay: 0 
@@ -163,7 +163,7 @@ scenes:
     c3: 
         name: "G1S3.wav"
         description: "Smaller talker (child) walking around a table."
-        source: "f2s6a_Talker1.wav"
+        source: "test_single.wav"
         azimuth: "120:1:120+360" 
         elevation: 0 
         delay: 0 
@@ -171,7 +171,7 @@ scenes:
     c4: 
         name: "G6S4.wav"
         description: "Smaller talker (child) walking around a table."
-        source: "f5s14a_Talker1.wav"
+        source: "test_single.wav"
         azimuth: "180:1:180+360" 
         elevation: 0 
         delay: 0 
@@ -179,7 +179,7 @@ scenes:
     c5: 
         name: "G5S5.wav"
         description: "Smaller talker (child) walking around a table."
-        source: "f2s6a_Talker1.wav"
+        source: "test_single.wav"
         azimuth: "240:1:240+360"
         elevation: 0 
         delay: 0 
@@ -187,7 +187,7 @@ scenes:
     c6: 
         name: "G4S6.wav"
         description: "Smaller talker (child) walking around a table."
-        source: "m4s13a_Talker1.wav"
+        source: "test_single.wav"
         azimuth: "300:1:300+360" 
         elevation: 0 
         delay: 0 
@@ -195,7 +195,7 @@ scenes:
     d1: 
         name: "G4S1.wav"
         description: "Talker walking around the table."
-        source: "m4s12b_Talker1.wav"
+        source: "test_single.wav"
         azimuth: "0:-1:-360"
         elevation: 35 
         delay: 0 
@@ -203,7 +203,7 @@ scenes:
     d2: 
         name: "G3S2.wav"
         description: "Talker walking around the table."
-        source: "m1s12a_Talker1.wav"
+        source: "test_single.wav"
         azimuth: "60:-1:60-360" 
         elevation: 35 
         delay: 0 
@@ -211,7 +211,7 @@ scenes:
     d3: 
         name: "G3S2.wav"
         description: "Talker walking around the table."
-        source: "f5s15b_Talker1.wav"
+        source: "test_single.wav"
         azimuth: "120:-1:120-360" 
         elevation: 35 
         delay: 0 
@@ -219,7 +219,7 @@ scenes:
     d4: 
         name: "G1S4.wav"
         description: "Talker walking around the table."
-        source: "f2s3b_Talker1.wav"
+        source: "test_single.wav"
         azimuth: "180:-1:180-360" 
         elevation: 35 
         delay: 0 
@@ -227,7 +227,7 @@ scenes:
     d5: 
         name: "G6S5.wav"
         description: "Talker walking around the table."
-        source: "f5s15b_Talker1.wav"
+        source: "test_single.wav"
         azimuth: "240:-1:240-360"
         elevation: 35 
         delay: 0 
@@ -235,7 +235,7 @@ scenes:
     d6: 
         name: "G5S6.wav"
         description: "Talker walking around the table."
-        source: "f2s3b_Talker1.wav"
+        source: "test_single.wav"
         azimuth: "300:-1:300-360" 
         elevation: 35
         delay: 0 
@@ -243,7 +243,7 @@ scenes:
     e1: 
         name: "G5S1.wav"
         description: "Elevation displacement."
-        source: "f2s4a_Talker1.wav"
+        source: "test_single.wav"
         azimuth: 240 
         elevation: "-90:0.5:90" 
         delay: 0 
@@ -251,7 +251,7 @@ scenes:
     e2: 
         name: "G4S2.wav"
         description: "Elevation displacement."
-        source: "m4s16a_Talker1.wav"
+        source: "test_single.wav"
         azimuth: 300 
         elevation: 0 
         delay: 0 
@@ -259,7 +259,7 @@ scenes:
     e3: 
         name: "G3S3.wav"
         description: "Elevation displacement."
-        source: "m1s16b_Talker1.wav"
+        source: "test_single.wav"
         azimuth: 0 
         elevation: "-90:0.5:90"  
         delay: 0 
@@ -267,7 +267,7 @@ scenes:
     e4: 
         name: "G2S4.wav"
         description: "Elevation displacement."
-        source: "f5s19a_Talker1.wav"
+        source: "test_single.wav"
         azimuth: 60 
         elevation: "-90:0.5:90"  
         delay: 0 
@@ -275,7 +275,7 @@ scenes:
     e5: 
         name: "G1S5.wav"
         description: "Elevation displacement."
-        source: "f2s4a_Talker1.wav"
+        source: "test_single.wav"
         azimuth: 120 
         elevation: "-90:0.5:90"  
         delay: 0 
@@ -283,7 +283,7 @@ scenes:
     e6: 
         name: "G6S6.wav"
         description: "Elevation displacement."
-        source: "f5s19a_Talker1.wav"
+        source: "test_single.wav"
         azimuth: 180 
         elevation: "-90:0.5:90"  
         delay: 0 
@@ -291,7 +291,7 @@ scenes:
     f1: 
         name: "G6S1.wav"
         description: "Azimuth and elevation displacement."
-        source: "f5s15a_Talker1.wav"
+        source: "test_single.wav"
         azimuth: "60:0.5:60+180" 
         elevation: "35:-0.2:-35"
         delay: 0 
@@ -299,7 +299,7 @@ scenes:
     f2: 
         name: "G5S2.wav"
         description: "Azimuth and elevation displacement."
-        source: "f2s7b_Talker1.wav"
+        source: "test_single.wav"
         azimuth: "120:0.5:120+180" 
         elevation: "35:-0.2:-35" 
         delay: 0 
@@ -307,7 +307,7 @@ scenes:
     f3: 
         name: "G4S3.wav"
         description: "Azimuth and elevation displacement."
-        source: "m4s14a_Talker1.wav"
+        source: "test_single.wav"
         azimuth: "180:0.5:180+180" 
         elevation: "35:-0.2:-35" 
         delay: 0 
@@ -315,7 +315,7 @@ scenes:
     f4: 
         name: "G3S4.wav"
         description: "Azimuth and elevation displacement."
-        source: "m1s7a_Talker1.wav"
+        source: "test_single.wav"
         azimuth: "240:0.5:240+180" 
         elevation: "35:-0.2:-35"
         delay: 0 
@@ -323,7 +323,7 @@ scenes:
     f5: 
         name: "G2S5.wav"
         description: "Azimuth and elevation displacement."
-        source: "f5s15a_Talker1.wav"
+        source: "test_single.wav"
         azimuth: "300:0.5:300+180" 
         elevation: "35:-0.2:-35" 
         delay: 0 
@@ -331,7 +331,7 @@ scenes:
     f6: 
         name: "G1S6.wav"
         description: "Azimuth and elevation displacement."
-        source: "f2s7b_Talker1.wav"
+        source: "test_single.wav"
         azimuth: "0:0.5:0+180" 
         elevation: "35:-0.2:-35" 
         delay: 0 
diff --git a/item_generation_scripts/config/ISM2_CONFIG.yml b/item_generation_scripts/config/ISM2_CONFIG.yml
index c4a65c07..8886f562 100644
--- a/item_generation_scripts/config/ISM2_CONFIG.yml
+++ b/item_generation_scripts/config/ISM2_CONFIG.yml
@@ -13,7 +13,7 @@ format: "ISM2"
 # delete_tmp: true
 
 ### Output sampling rate in Hz needed for headerless audio files; default = 48000
-# fs: 32000
+fs: 48000
 
 ### Any relative paths will be interpreted relative to the working directory the script is called from!
 ### Usage of absolute paths is recommended.
@@ -21,10 +21,10 @@ format: "ISM2"
 ### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions
 
 ### Input path to mono files
-input_path: "/mnt/c/Work/IVAS/3gpp_forge_gitlab/ivas-processing-scripts/items_mono"
+input_path: "./items_mono"
 
 ### Output path for generated test items and metadata files
-output_path: "/mnt/c/Work/IVAS/3gpp_forge_gitlab/ivas-processing-scripts/output"
+output_path: "./output"
 
 ### Target loudness in LKFS; default = null (no loudness normalization applied)
 loudness: -26
diff --git a/item_generation_scripts/processing/config.py b/item_generation_scripts/processing/config.py
index 06f828bb..3e9aaaa5 100644
--- a/item_generation_scripts/processing/config.py
+++ b/item_generation_scripts/processing/config.py
@@ -35,10 +35,7 @@ from pathlib import Path
 
 import yaml
 
-from item_generation_scripts.constants import (
-    DEFAULT_CONFIG,
-    REQUIRED_KEYS,
-)
+from item_generation_scripts.constants import DEFAULT_CONFIG, REQUIRED_KEYS
 
 
 def merge_dicts(base: dict, other: dict) -> None:
diff --git a/item_generation_scripts/processing/process_ism_items.py b/item_generation_scripts/processing/process_ism_items.py
index 73267607..8f69a4c6 100644
--- a/item_generation_scripts/processing/process_ism_items.py
+++ b/item_generation_scripts/processing/process_ism_items.py
@@ -35,6 +35,7 @@ import csv
 import logging
 import os
 from pathlib import Path
+from typing import Optional
 
 import numpy as np
 
@@ -55,6 +56,7 @@ def generate_ism_items(
     output_path: Path,
     scenes: dict,
     logger: logging.Logger,
+    fs: Optional[int] = 48000,
 ):
     """Generate ISM items with metadata from mono items based on scene description"""
 
@@ -73,7 +75,6 @@ def generate_ism_items(
             source_file = np.atleast_1d(scene["source"])[i]
             source_azi = np.atleast_1d(scene["azimuth"])[i]
             source_ele = np.atleast_1d(scene["elevation"])[i]
-            # source_type = "speech"  # !!!! TBD - support generic audio + background noise and speech in the .yml file
             source_delay = np.atleast_1d(scene["delay"])[i]
 
             logger.info(
@@ -81,10 +82,7 @@ def generate_ism_items(
             )
 
             # read source file
-            # x, fs = audiofile.read(os.path.join(input_path, source_file))    # !!!! TBD - check the support for headerless .raw files
-            # pdb.set_trace()
-            audio_object = audio.fromfile("MONO", os.path.join(input_path, source_file))
-
+            audio_object = audio.fromfile("MONO", os.path.join(input_path, source_file), fs=fs)
             x = audio_object.audio
             fs = audio_object.fs
 
@@ -93,7 +91,6 @@ def generate_ism_items(
 
             # adjust the level of the source file
             _, scale_factor = get_loudness(audio_object, target_level, "MONO")
-            # print(f"Scaling loudness with factor: {scale_factor}")
             x *= scale_factor
 
             # read azimuth information and create array
@@ -156,9 +153,7 @@ def generate_ism_items(
                 )
 
             # additional metadata
-            dist = np.ones(
-                N_frames
-            )  # !!!! TBD - check what to do with these metadata
+            dist = np.ones(N_frames)  # !!!! TBD - check what to do with these metadata
             spread = np.zeros(N_frames)
             gain = np.ones(N_frames)
 
-- 
GitLab


From 086b23096564833d75d51c4374ed24195884df98 Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Tue, 2 May 2023 17:40:30 +0200
Subject: [PATCH 06/27] support delay of mono items to crate some overlap

---
 .../config/ISM1_CONFIG.yml                    |  36 -----
 .../config/ISM2_CONFIG.yml                    | 145 +++++++++---------
 .../processing/process_ism_items.py           |   9 +-
 3 files changed, 80 insertions(+), 110 deletions(-)

diff --git a/item_generation_scripts/config/ISM1_CONFIG.yml b/item_generation_scripts/config/ISM1_CONFIG.yml
index cbe4eb71..8d85906b 100644
--- a/item_generation_scripts/config/ISM1_CONFIG.yml
+++ b/item_generation_scripts/config/ISM1_CONFIG.yml
@@ -54,7 +54,6 @@ scenes:
         source: "test_single.wav"
         azimuth: 0 
         elevation: 0 
-        delay: 0 
         
     a2: 
         name: "G6S2.wav"
@@ -62,7 +61,6 @@ scenes:
         source: "test_single.wav"
         azimuth: 60 
         elevation: 0 
-        delay: 0 
         
     a3: 
         name: "G5S3.wav"
@@ -70,7 +68,6 @@ scenes:
         source: "test_single.wav"
         azimuth: 120 
         elevation: 0 
-        delay: 0 
 
     a4: 
         name: "G4S4.wav"
@@ -78,7 +75,6 @@ scenes:
         source: "test_single.wav"
         azimuth: 180 
         elevation: 0 
-        delay: 0 
 
     a5: 
         name: "G3S5.wav"
@@ -86,7 +82,6 @@ scenes:
         source: "test_single.wav"
         azimuth: 240 
         elevation: 0 
-        delay: 0 
 
     a6: 
         name: "G2S6.wav"
@@ -94,7 +89,6 @@ scenes:
         source: "test_single.wav"
         azimuth: 300 
         elevation: 0 
-        delay: 0 
 
     b1: 
         name: "G2S1.wav"
@@ -102,7 +96,6 @@ scenes:
         source: "test_single.wav"
         azimuth: 120 
         elevation: 35 
-        delay: 0 
  
     b2: 
         name: "G1S2.wav"
@@ -110,7 +103,6 @@ scenes:
         source: "test_single.wav"
         azimuth: 180 
         elevation: 35 
-        delay: 0 
  
     b3: 
         name: "G6S3.wav"
@@ -118,7 +110,6 @@ scenes:
         source: "test_single.wav"
         azimuth: 240 
         elevation: 35 
-        delay: 0 
  
     b4: 
         name: "G5S4.wav"
@@ -126,7 +117,6 @@ scenes:
         source: "test_single.wav"
         azimuth: 300 
         elevation: 35 
-        delay: 0 
 
     b5: 
         name: "G4S5.wav"
@@ -134,7 +124,6 @@ scenes:
         source: "test_single.wav"
         azimuth: 0 
         elevation: 35 
-        delay: 0 
 
     b6: 
         name: "G3S6.wav"
@@ -142,7 +131,6 @@ scenes:
         source: "test_single.wav"
         azimuth: 60 
         elevation: 35 
-        delay: 0 
 
     c1: 
         name: "G3S1.wav"
@@ -150,7 +138,6 @@ scenes:
         source: "test_single.wav"
         azimuth: "0:1:360"
         elevation: 0 
-        delay: 0 
 
     c2: 
         name: "G2S2.wav"
@@ -158,7 +145,6 @@ scenes:
         source: "test_single.wav"
         azimuth: "60:1:60+360" 
         elevation: 0 
-        delay: 0 
   
     c3: 
         name: "G1S3.wav"
@@ -166,7 +152,6 @@ scenes:
         source: "test_single.wav"
         azimuth: "120:1:120+360" 
         elevation: 0 
-        delay: 0 
   
     c4: 
         name: "G6S4.wav"
@@ -174,7 +159,6 @@ scenes:
         source: "test_single.wav"
         azimuth: "180:1:180+360" 
         elevation: 0 
-        delay: 0 
   
     c5: 
         name: "G5S5.wav"
@@ -182,7 +166,6 @@ scenes:
         source: "test_single.wav"
         azimuth: "240:1:240+360"
         elevation: 0 
-        delay: 0 
   
     c6: 
         name: "G4S6.wav"
@@ -190,7 +173,6 @@ scenes:
         source: "test_single.wav"
         azimuth: "300:1:300+360" 
         elevation: 0 
-        delay: 0 
  
     d1: 
         name: "G4S1.wav"
@@ -198,7 +180,6 @@ scenes:
         source: "test_single.wav"
         azimuth: "0:-1:-360"
         elevation: 35 
-        delay: 0 
         
     d2: 
         name: "G3S2.wav"
@@ -206,7 +187,6 @@ scenes:
         source: "test_single.wav"
         azimuth: "60:-1:60-360" 
         elevation: 35 
-        delay: 0 
         
     d3: 
         name: "G3S2.wav"
@@ -214,7 +194,6 @@ scenes:
         source: "test_single.wav"
         azimuth: "120:-1:120-360" 
         elevation: 35 
-        delay: 0 
  
     d4: 
         name: "G1S4.wav"
@@ -222,7 +201,6 @@ scenes:
         source: "test_single.wav"
         azimuth: "180:-1:180-360" 
         elevation: 35 
-        delay: 0 
  
     d5: 
         name: "G6S5.wav"
@@ -230,7 +208,6 @@ scenes:
         source: "test_single.wav"
         azimuth: "240:-1:240-360"
         elevation: 35 
-        delay: 0 
  
     d6: 
         name: "G5S6.wav"
@@ -238,7 +215,6 @@ scenes:
         source: "test_single.wav"
         azimuth: "300:-1:300-360" 
         elevation: 35
-        delay: 0 
  
     e1: 
         name: "G5S1.wav"
@@ -246,7 +222,6 @@ scenes:
         source: "test_single.wav"
         azimuth: 240 
         elevation: "-90:0.5:90" 
-        delay: 0 
  
     e2: 
         name: "G4S2.wav"
@@ -254,7 +229,6 @@ scenes:
         source: "test_single.wav"
         azimuth: 300 
         elevation: 0 
-        delay: 0 
         
     e3: 
         name: "G3S3.wav"
@@ -262,7 +236,6 @@ scenes:
         source: "test_single.wav"
         azimuth: 0 
         elevation: "-90:0.5:90"  
-        delay: 0 
   
     e4: 
         name: "G2S4.wav"
@@ -270,7 +243,6 @@ scenes:
         source: "test_single.wav"
         azimuth: 60 
         elevation: "-90:0.5:90"  
-        delay: 0 
   
     e5: 
         name: "G1S5.wav"
@@ -278,7 +250,6 @@ scenes:
         source: "test_single.wav"
         azimuth: 120 
         elevation: "-90:0.5:90"  
-        delay: 0 
   
     e6: 
         name: "G6S6.wav"
@@ -286,7 +257,6 @@ scenes:
         source: "test_single.wav"
         azimuth: 180 
         elevation: "-90:0.5:90"  
-        delay: 0 
  
     f1: 
         name: "G6S1.wav"
@@ -294,7 +264,6 @@ scenes:
         source: "test_single.wav"
         azimuth: "60:0.5:60+180" 
         elevation: "35:-0.2:-35"
-        delay: 0 
  
     f2: 
         name: "G5S2.wav"
@@ -302,7 +271,6 @@ scenes:
         source: "test_single.wav"
         azimuth: "120:0.5:120+180" 
         elevation: "35:-0.2:-35" 
-        delay: 0 
   
     f3: 
         name: "G4S3.wav"
@@ -310,7 +278,6 @@ scenes:
         source: "test_single.wav"
         azimuth: "180:0.5:180+180" 
         elevation: "35:-0.2:-35" 
-        delay: 0 
   
     f4: 
         name: "G3S4.wav"
@@ -318,7 +285,6 @@ scenes:
         source: "test_single.wav"
         azimuth: "240:0.5:240+180" 
         elevation: "35:-0.2:-35"
-        delay: 0 
   
     f5: 
         name: "G2S5.wav"
@@ -326,7 +292,6 @@ scenes:
         source: "test_single.wav"
         azimuth: "300:0.5:300+180" 
         elevation: "35:-0.2:-35" 
-        delay: 0 
   
     f6: 
         name: "G1S6.wav"
@@ -334,5 +299,4 @@ scenes:
         source: "test_single.wav"
         azimuth: "0:0.5:0+180" 
         elevation: "35:-0.2:-35" 
-        delay: 0 
   
\ No newline at end of file
diff --git a/item_generation_scripts/config/ISM2_CONFIG.yml b/item_generation_scripts/config/ISM2_CONFIG.yml
index 8886f562..798da9d0 100644
--- a/item_generation_scripts/config/ISM2_CONFIG.yml
+++ b/item_generation_scripts/config/ISM2_CONFIG.yml
@@ -37,6 +37,7 @@ loudness: -26
 ### Each scene must start with the sceneN tag
 ### Specify the mono source filename (the program will search for it in the input_path folder)
 ### Specify azimuth and elevation for each input source
+### Specify the delay in seconds for each input source
 ### Note 1: use [val1, val2, ...] for multiple sources in a scene
 ### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames
 
@@ -51,288 +52,288 @@ scenes:
     a1: 
         name: "G1S1.wav"
         description: "two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
-        source: ["f2s5a_Talker1.wav", "m2s16b_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: [0, 50]
         elevation: [0, 0]
-        delay: [0, 0]
+        delay: [0, 1]
         
     a2: 
         name: "G6S2.wav"
         description: "two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
-        source: ["f5s10a_Talker1.wav", "m3s2a_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: [50, 350]
         elevation: [0, 0]
-        delay: [0, 0]
+        delay: [0, 1]
         
     a3: 
         name: "G5S3.wav"
         description: "two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
-        source: ["f2s5a_Talker1.wav", "m2s16b_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: [40, 290]
         elevation: [0, 0]
-        delay: [0, 0]
+        delay: [0, 1]
 
     a4: 
         name: "G4S4.wav"
         description: "two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
-        source: ["m4s11b_Talker1.wav", "f1s4b_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: [30, 230]
         elevation: [15, 15]
-        delay: [0, 0]
+        delay: [0, 1]
 
     a5: 
         name: "G3S5.wav"
         description: "two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
-        source: ["m1s4a_Talker1.wav", "f3s3a_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: [20, 170]
         elevation: [15, 15]
-        delay: [0, 0]
+        delay: [0, 1]
 
     a6: 
         name: "G2S6.wav"
         description: "two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
-        source: ["f5s10a_Talker1.wav", "m3s2a_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: [10, 110]
         elevation: [15, 15]
-        delay: [0, 0]
+        delay: [0, 1]
 
     b1: 
         name: "G2S1.wav"
         description: "two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
-        source: ["f5s10b_Talker1.wav", "m3s2b_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: [20, 170]
         elevation: [30, 30]
-        delay: [0, 0]
+        delay: [0, 1]
  
     b2: 
         name: "G1S2.wav"
         description: "two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
-        source: ["f2s1a_Talker1.wav", "m2s10a_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: [10, 110]
         elevation: [30, 30]
-        delay: [0, 0]
+        delay: [0, 1]
  
     b3: 
         name: "G6S3.wav"
         description: "two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
-        source: ["f5s10b_Talker1.wav", "m3s2b_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: [0, 50]
         elevation: [30, 30]
-        delay: [0, 0]
+        delay: [0, 1]
  
     b4: 
         name: "G5S4.wav"
         description: "two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
-        source: ["f2s1a_Talker1.wav", "m2s10a_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: [50, 350]
         elevation: [60, 60]
-        delay: [0, 0] 
+        delay: [0, 1] 
 
     b5: 
         name: "G4S5.wav"
         description: "two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
-        source: ["m4s11a_Talker1.wav", "f1s6a_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: [40, 290]
         elevation: [60, 60]
-        delay: [0, 0] 
+        delay: [0, 1] 
 
     b6: 
         name: "G3S6.wav"
         description: "two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
-        source: ["m1s2b_Talker1.wav", "f3s5a_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: [30, 230]
         elevation: [60, 60]
-        delay: [0, 0] 
+        delay: [0, 1] 
 
     c1: 
         name: "G3S1.wav"
         description: "one talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
-        source: ["m1s6b_Talker1.wav", "f3s5b_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: [40, 290]
         elevation: [0, 60]
-        delay: [0, 0] 
+        delay: [0, 1] 
 
     c2: 
         name: "G2S2.wav"
         description: "one talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
-        source: ["f5s14a_Talker1.wav", "m3s8a_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: [30, 230]
         elevation: [0, 60]
-        delay: [0, 0] 
+        delay: [0, 1] 
   
     c3: 
         name: "G1S3.wav"
         description: "one talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
-        source: ["f2s6a_Talker1.wav", "m2s13a_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: [20, 170]
         elevation: [0, 60]
-        delay: [0, 0]   
+        delay: [0, 1]   
   
     c4: 
         name: "G6S4.wav"
         description: "one talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
-        source: ["f5s14a_Talker1.wav", "m3s8a_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: [10, 110]
         elevation: [0, 60]
-        delay: [0, 0]     
+        delay: [0, 1]     
   
     c5: 
         name: "G5S5.wav"
         description: "one talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
-        source: ["f2s6a_Talker1.wav", "m2s13a_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: [0, 50]
         elevation: [0, 60]
-        delay: [0, 0]     
+        delay: [0, 1]     
   
     c6: 
         name: "G4S6.wav"
         description: "one talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
-        source: ["m4s13a_Talker1.wav", "f1s20a_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: [50, 350]
         elevation: [0, 60]
-        delay: [0, 0]      
+        delay: [0, 1]      
  
     d1: 
         name: "G4S1.wav"
         description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
-        source: ["m4s12b_Talker1.wav", "f1s12b_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: [50, "180:1:120 + 360"]
         elevation: [0, 60]
-        delay: [0, 0]   
+        delay: [0, 1]   
         
     d2: 
         name: "G3S2.wav"
         description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
-        source: ["m1s12a_Talker1.wav", "f3s20a_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: [300, "-70:-1:-10 - 360"]
         elevation: [0, 60]
-        delay: [0, 0]   
+        delay: [0, 1]   
         
     d3: 
         name: "G3S2.wav"
         description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
-        source: ["f5s15b_Talker1.wav", "m3s1a_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: [250, "-20:-1:-320"]
         elevation: [0, 60]
-        delay: [0, 0]          
+        delay: [0, 1]          
  
     d4: 
         name: "G1S4.wav"
         description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
-        source: ["f2s3b_Talker1.wav", "m2s15a_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: [200, "30:-1:-270"]
         elevation: [0, 60]
-        delay: [0, 0]  
+        delay: [0, 1]  
  
     d5: 
         name: "G6S5.wav"
         description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
-        source: ["f5s15b_Talker1.wav", "m3s1a_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: [150, "80:1:20 + 360"]
         elevation: [0, 60]
-        delay: [0, 0]   
+        delay: [0, 1]   
  
     d6: 
         name: "G5S6.wav"
         description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
-        source: ["f2s3b_Talker1.wav", "m2s15a_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: [100, "130:1:70 + 360"]
         elevation: [0, 60]
-        delay: [0, 0]   
+        delay: [0, 1]   
  
     e1: 
         name: "G5S1.wav"
         description: "two talkers walking side-by-side around the table, ~30% overlapping utterances"
-        source: ["f2s4a_Talker1.wav", "m2s17b_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: ["80:1:20 + 360", "80:1:20 + 360"]
         elevation: [10, 60]
-        delay: [0, 0]
+        delay: [0, 1]
  
     e2: 
         name: "G4S2.wav"
         description: "two talkers walking side-by-side around the table, ~30% overlapping utterances"
-        source: ["m4s16a_Talker1.wav", "f1s16b_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: ["130:1:70 + 360", "130:1:70 + 360"]
         elevation: [10, 60]
-        delay: [0, 0]    
+        delay: [0, 1]    
         
     e3: 
         name: "G3S3.wav"
         description: "two talkers walking side-by-side around the table, ~30% overlapping utterances"
-        source: ["m1s16b_Talker1.wav", "f3s10b_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: ["180:1:120 + 360", "180:1:120 + 360"]
         elevation: [10, 60]
-        delay: [0, 0]            
+        delay: [0, 1]            
   
     e4: 
         name: "G2S4.wav"
         description: "two talkers walking side-by-side around the table, ~30% overlapping utterances"
-        source: ["f5s19a_Talker1.wav", "m3s1b_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: ["-70:-1:-10 - 360", "-70:-1:-10 - 360"]
         elevation: [10, 60]
-        delay: [0, 0]    
+        delay: [0, 1]    
   
     e5: 
         name: "G1S5.wav"
         description: "two talkers walking side-by-side around the table, ~30% overlapping utterances"
-        source: ["f2s4a_Talker1.wav", "m2s17b_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: ["-20:-1:-320", "-20:-1:-320"]
         elevation: [10, 60]
-        delay: [0, 0]   
+        delay: [0, 1]   
   
     e6: 
         name: "G6S6.wav"
         description: "two talkers walking side-by-side around the table, ~30% overlapping utterances"
-        source: ["f5s19a_Talker1.wav", "m3s1b_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: ["30:-1:-270", "30:-1:-270"]
         elevation: [10, 60]
-        delay: [0, 0]     
+        delay: [0, 1]     
  
     f1: 
         name: "G6S1.wav"
         description: "two talkers walking around the table in opposite directions, non-overlapping utterances."
-        source: ["f5s15a_Talker1.wav", "m3s8b_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: ["60:1:0 + 360", "60:-1:120 - 360"]
         elevation: [20, 50]
-        delay: [0, 0]    
+        delay: [0, 1]    
  
     f2: 
         name: "G5S2.wav"
         description: "two talkers walking around the table in opposite directions, non-overlapping utterances."
-        source: ["f2s7b_Talker1.wav", "m2s6b_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: ["0:1:300", "0:-1:60 - 360"]
         elevation: [20, 50]
-        delay: [0, 0]   
+        delay: [0, 1]   
   
     f3: 
         name: "G4S3.wav"
         description: "two talkers walking around the table in opposite directions, non-overlapping utterances."
-        source: ["m4s14a_Talker1.wav", "f1s7a_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: ["300:1:240 + 360", "300:-1:0"]
         elevation: [20, 50]
-        delay: [0, 0]     
+        delay: [0, 1]     
   
     f4: 
         name: "G3S4.wav"
         description: "two talkers walking around the table in opposite directions, non-overlapping utterances."
-        source: ["m1s7a_Talker1.wav", "f3s7a_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: ["240:1:180 + 360", "240:-1:-60"]
         elevation: [20, 50]
-        delay: [0, 0]  
+        delay: [0, 1]  
   
     f5: 
         name: "G2S5.wav"
         description: "two talkers walking around the table in opposite directions, non-overlapping utterances."
-        source: ["f5s15a_Talker1.wav", "m3s8b_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: ["180:1:120 + 360", "180:-1:-120"]
         elevation: [20, 50]
-        delay: [0, 0]    
+        delay: [0, 1]    
   
     f6: 
         name: "G1S6.wav"
         description: "two talkers walking around the table in opposite directions, non-overlapping utterances."
-        source: ["f2s7b_Talker1.wav", "m2s6b_Talker2.wav"]
+        source: ["test_double.wav", "test_double.wav"]
         azimuth: ["120:1:60 + 360", "120:-1:180 - 360"]
         elevation: [20, 50]
-        delay: [0, 0]      
+        delay: [0, 1]      
   
\ No newline at end of file
diff --git a/item_generation_scripts/processing/process_ism_items.py b/item_generation_scripts/processing/process_ism_items.py
index 8f69a4c6..cf6ade22 100644
--- a/item_generation_scripts/processing/process_ism_items.py
+++ b/item_generation_scripts/processing/process_ism_items.py
@@ -72,11 +72,16 @@ def generate_ism_items(
         y = None
         y_meta = None
         for i in range(N_sources):
+        
+            # parse parameters from the scene description
             source_file = np.atleast_1d(scene["source"])[i]
             source_azi = np.atleast_1d(scene["azimuth"])[i]
             source_ele = np.atleast_1d(scene["elevation"])[i]
-            source_delay = np.atleast_1d(scene["delay"])[i]
-
+            if 'delay' in scene.keys():
+                source_delay = np.atleast_1d(scene["delay"])[i]
+            else:
+                source_delay = np.array([0])
+            
             logger.info(
                 f"Encoding {source_file} at position(s) {source_azi},{source_ele}"
             )
-- 
GitLab


From 9db60d12ee647e33a07c77c446ddc670909d3000 Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Thu, 4 May 2023 08:42:25 +0200
Subject: [PATCH 07/27] fix extra CRLF in .csv files on Windows

---
 .../config/ISM1_CONFIG.yml                    |   2 +-
 .../config/ISM2_CONFIG.yml                    |  72 ++---
 .../config/STEREO_CONFIG.yml                  | 306 ++++++++++++++++++
 .../processing/process_ism_items.py           |   2 +-
 4 files changed, 344 insertions(+), 38 deletions(-)
 create mode 100644 item_generation_scripts/config/STEREO_CONFIG.yml

diff --git a/item_generation_scripts/config/ISM1_CONFIG.yml b/item_generation_scripts/config/ISM1_CONFIG.yml
index 8d85906b..66f81617 100644
--- a/item_generation_scripts/config/ISM1_CONFIG.yml
+++ b/item_generation_scripts/config/ISM1_CONFIG.yml
@@ -189,7 +189,7 @@ scenes:
         elevation: 35 
         
     d3: 
-        name: "G3S2.wav"
+        name: "G2S3.wav"
         description: "Talker walking around the table."
         source: "test_single.wav"
         azimuth: "120:-1:120-360" 
diff --git a/item_generation_scripts/config/ISM2_CONFIG.yml b/item_generation_scripts/config/ISM2_CONFIG.yml
index 798da9d0..3bb200e2 100644
--- a/item_generation_scripts/config/ISM2_CONFIG.yml
+++ b/item_generation_scripts/config/ISM2_CONFIG.yml
@@ -55,7 +55,7 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: [0, 50]
         elevation: [0, 0]
-        delay: [0, 1]
+        delay: [0, 0]
         
     a2: 
         name: "G6S2.wav"
@@ -63,7 +63,7 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: [50, 350]
         elevation: [0, 0]
-        delay: [0, 1]
+        delay: [0, 0]
         
     a3: 
         name: "G5S3.wav"
@@ -71,7 +71,7 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: [40, 290]
         elevation: [0, 0]
-        delay: [0, 1]
+        delay: [0, 0]
 
     a4: 
         name: "G4S4.wav"
@@ -79,7 +79,7 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: [30, 230]
         elevation: [15, 15]
-        delay: [0, 1]
+        delay: [0, 0]
 
     a5: 
         name: "G3S5.wav"
@@ -87,7 +87,7 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: [20, 170]
         elevation: [15, 15]
-        delay: [0, 1]
+        delay: [0, 0]
 
     a6: 
         name: "G2S6.wav"
@@ -95,7 +95,7 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: [10, 110]
         elevation: [15, 15]
-        delay: [0, 1]
+        delay: [0, 0]
 
     b1: 
         name: "G2S1.wav"
@@ -103,7 +103,7 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: [20, 170]
         elevation: [30, 30]
-        delay: [0, 1]
+        delay: [0, 1.5]
  
     b2: 
         name: "G1S2.wav"
@@ -111,7 +111,7 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: [10, 110]
         elevation: [30, 30]
-        delay: [0, 1]
+        delay: [0, 1.5]
  
     b3: 
         name: "G6S3.wav"
@@ -119,7 +119,7 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: [0, 50]
         elevation: [30, 30]
-        delay: [0, 1]
+        delay: [0, 1.5]
  
     b4: 
         name: "G5S4.wav"
@@ -127,7 +127,7 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: [50, 350]
         elevation: [60, 60]
-        delay: [0, 1] 
+        delay: [0, 1.5] 
 
     b5: 
         name: "G4S5.wav"
@@ -135,7 +135,7 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: [40, 290]
         elevation: [60, 60]
-        delay: [0, 1] 
+        delay: [0, 1.5] 
 
     b6: 
         name: "G3S6.wav"
@@ -143,7 +143,7 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: [30, 230]
         elevation: [60, 60]
-        delay: [0, 1] 
+        delay: [0, 1.5] 
 
     c1: 
         name: "G3S1.wav"
@@ -151,7 +151,7 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: [40, 290]
         elevation: [0, 60]
-        delay: [0, 1] 
+        delay: [0, 0] 
 
     c2: 
         name: "G2S2.wav"
@@ -159,7 +159,7 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: [30, 230]
         elevation: [0, 60]
-        delay: [0, 1] 
+        delay: [0, 0] 
   
     c3: 
         name: "G1S3.wav"
@@ -167,7 +167,7 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: [20, 170]
         elevation: [0, 60]
-        delay: [0, 1]   
+        delay: [0, 0]   
   
     c4: 
         name: "G6S4.wav"
@@ -183,7 +183,7 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: [0, 50]
         elevation: [0, 60]
-        delay: [0, 1]     
+        delay: [0, 0]     
   
     c6: 
         name: "G4S6.wav"
@@ -191,7 +191,7 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: [50, 350]
         elevation: [0, 60]
-        delay: [0, 1]      
+        delay: [0, 0]      
  
     d1: 
         name: "G4S1.wav"
@@ -199,7 +199,7 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: [50, "180:1:120 + 360"]
         elevation: [0, 60]
-        delay: [0, 1]   
+        delay: [0, 1.5]   
         
     d2: 
         name: "G3S2.wav"
@@ -207,15 +207,15 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: [300, "-70:-1:-10 - 360"]
         elevation: [0, 60]
-        delay: [0, 1]   
+        delay: [0, 1.5]   
         
     d3: 
-        name: "G3S2.wav"
+        name: "G2S3.wav"
         description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
         source: ["test_double.wav", "test_double.wav"]
         azimuth: [250, "-20:-1:-320"]
         elevation: [0, 60]
-        delay: [0, 1]          
+        delay: [0, 1.5]          
  
     d4: 
         name: "G1S4.wav"
@@ -223,7 +223,7 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: [200, "30:-1:-270"]
         elevation: [0, 60]
-        delay: [0, 1]  
+        delay: [0, 1.5]  
  
     d5: 
         name: "G6S5.wav"
@@ -231,7 +231,7 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: [150, "80:1:20 + 360"]
         elevation: [0, 60]
-        delay: [0, 1]   
+        delay: [0, 1.5]   
  
     d6: 
         name: "G5S6.wav"
@@ -239,7 +239,7 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: [100, "130:1:70 + 360"]
         elevation: [0, 60]
-        delay: [0, 1]   
+        delay: [0, 1.5]   
  
     e1: 
         name: "G5S1.wav"
@@ -247,7 +247,7 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: ["80:1:20 + 360", "80:1:20 + 360"]
         elevation: [10, 60]
-        delay: [0, 1]
+        delay: [0, 1.5]
  
     e2: 
         name: "G4S2.wav"
@@ -255,7 +255,7 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: ["130:1:70 + 360", "130:1:70 + 360"]
         elevation: [10, 60]
-        delay: [0, 1]    
+        delay: [0, 1.5]    
         
     e3: 
         name: "G3S3.wav"
@@ -263,7 +263,7 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: ["180:1:120 + 360", "180:1:120 + 360"]
         elevation: [10, 60]
-        delay: [0, 1]            
+        delay: [0, 1.5]            
   
     e4: 
         name: "G2S4.wav"
@@ -271,7 +271,7 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: ["-70:-1:-10 - 360", "-70:-1:-10 - 360"]
         elevation: [10, 60]
-        delay: [0, 1]    
+        delay: [0, 1.5]    
   
     e5: 
         name: "G1S5.wav"
@@ -279,7 +279,7 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: ["-20:-1:-320", "-20:-1:-320"]
         elevation: [10, 60]
-        delay: [0, 1]   
+        delay: [0, 1.5]   
   
     e6: 
         name: "G6S6.wav"
@@ -287,7 +287,7 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: ["30:-1:-270", "30:-1:-270"]
         elevation: [10, 60]
-        delay: [0, 1]     
+        delay: [0, 1.5]     
  
     f1: 
         name: "G6S1.wav"
@@ -295,7 +295,7 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: ["60:1:0 + 360", "60:-1:120 - 360"]
         elevation: [20, 50]
-        delay: [0, 1]    
+        delay: [0, 0]    
  
     f2: 
         name: "G5S2.wav"
@@ -303,7 +303,7 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: ["0:1:300", "0:-1:60 - 360"]
         elevation: [20, 50]
-        delay: [0, 1]   
+        delay: [0, 0]   
   
     f3: 
         name: "G4S3.wav"
@@ -311,7 +311,7 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: ["300:1:240 + 360", "300:-1:0"]
         elevation: [20, 50]
-        delay: [0, 1]     
+        delay: [0, 0]     
   
     f4: 
         name: "G3S4.wav"
@@ -319,7 +319,7 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: ["240:1:180 + 360", "240:-1:-60"]
         elevation: [20, 50]
-        delay: [0, 1]  
+        delay: [0, 0]  
   
     f5: 
         name: "G2S5.wav"
@@ -327,7 +327,7 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: ["180:1:120 + 360", "180:-1:-120"]
         elevation: [20, 50]
-        delay: [0, 1]    
+        delay: [0, 0]    
   
     f6: 
         name: "G1S6.wav"
@@ -335,5 +335,5 @@ scenes:
         source: ["test_double.wav", "test_double.wav"]
         azimuth: ["120:1:60 + 360", "120:-1:180 - 360"]
         elevation: [20, 50]
-        delay: [0, 1]      
+        delay: [0, 0]      
   
\ No newline at end of file
diff --git a/item_generation_scripts/config/STEREO_CONFIG.yml b/item_generation_scripts/config/STEREO_CONFIG.yml
new file mode 100644
index 00000000..adc08b4c
--- /dev/null
+++ b/item_generation_scripts/config/STEREO_CONFIG.yml
@@ -0,0 +1,306 @@
+---
+################################################
+# General configuration
+################################################
+
+### Output format
+format: "STEREO"
+
+### Date; default = YYYYMMDD_HH.MM.SS
+# date: 2023.06.30
+
+### Deletion of temporary directories containing intermediate processing files, bitstreams etc.; default = false
+# delete_tmp: true
+
+### Output sampling rate in Hz needed for headerless audio files; default = 48000
+fs: 48000
+
+### Any relative paths will be interpreted relative to the working directory the script is called from!
+### Usage of absolute paths is recommended.
+### Do not use file names with dots "." in them! This is not supported, use "_" instead
+### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions
+
+### Input path to mono files
+input_path: "./items_mono"
+
+### Input path to stereo impulse response files
+input_path_IR: "./IR"
+
+### Output path for generated test items and metadata files
+output_path: "./output"
+
+### Target loudness in LKFS; default = null (no loudness normalization applied)
+loudness: -26
+
+
+################################################
+### Scene description
+################################################
+
+### Each scene must start with the sceneN tag
+### Specify the mono source filename (the program will search for it in the input_path folder)
+### Specify azimuth and elevation for each input source
+### Specify the delay in seconds for each input source
+### Note 1: use [val1, val2, ...] for multiple sources in a scene
+### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames
+
+### Note 3: we're using right-handed coordinate system with azi = 0 pointing from the nose to the screen
+### azimuth: float, [-180,180]; positive indicates left
+### elevation: float, [-90,90]; positive indicates up
+### distance: float, tbd: default: 1
+### spread: float, [0,360]; spread in angles from 0 ... 360˚
+### gain: float, [0,1]
+
+scenes:
+    a1: 
+        name: "G1S1.wav"
+        description: "Large anechoic room with AB microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["LAABP01.L.IR32", "LAABP01.R.IR32"]
+        delay: [0, 0]
+        
+    a2: 
+        name: "G6S2.wav"
+        description: "Large anechoic room with AB microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["LAABP02.L.IR32", "LAABP02.R.IR32"]
+        delay: [0, 0]
+        
+    a3: 
+        name: "G5S3.wav"
+        description: "Large anechoic room with AB microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["LAABP03.L.IR32", "LAABP03.R.IR32"]
+        delay: [0, 0]
+
+    a4: 
+        name: "G4S4.wav"
+        description: "Large anechoic room with AB microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["LAABP04.L.IR32", "LAABP04.R.IR32"]
+        delay: [0, 0]
+
+    a5: 
+        name: "G3S5.wav"
+        description: "Large anechoic room with AB microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["LAABP05.L.IR32", "LAABP05.R.IR32"]
+        delay: [0, 0]
+
+    a6: 
+        name: "G2S6.wav"
+        description: "Large anechoic room with AB microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["LAABP06.L.IR32", "LAABP06.R.IR32"]
+        delay: [0, 0]
+
+    b1: 
+        name: "G2S1.wav"
+        description: "Small anechoic room with AB microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["SAABP01.L.IR32", "SAABP01.R.IR32"]
+        delay: [0, 1.5]
+ 
+    b2: 
+        name: "G1S2.wav"
+        description: "Small anechoic room with AB microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["SAABP01.L.IR32", "SAABP01.R.IR32"]
+        delay: [0, 1.5]
+ 
+    b3: 
+        name: "G6S3.wav"
+        description: "Small anechoic room with AB microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["SAABP01.L.IR32", "SAABP01.R.IR32"]
+        delay: [0, 1.5]
+ 
+    b4: 
+        name: "G5S4.wav"
+        description: "Small anechoic room with AB microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["SAABP01.L.IR32", "SAABP01.R.IR32"]
+        delay: [0, 1.5] 
+
+    b5: 
+        name: "G4S5.wav"
+        description: "Small anechoic room with AB microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["SAABP01.L.IR32", "SAABP01.R.IR32"]
+        delay: [0, 1.5] 
+
+    b6: 
+        name: "G3S6.wav"
+        description: "Small anechoic room with AB microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["SAABP01.L.IR32", "SAABP01.R.IR32"]
+        delay: [0, 1.5] 
+
+    c1: 
+        name: "G3S1.wav"
+        description: "Small anechoic room with MS microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["SAMSP01.L.IR32", "SAMSP01.R.IR32"]
+        delay: [0, 0] 
+
+    c2: 
+        name: "G2S2.wav"
+        description: "Small anechoic room with MS microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["SAMSP01.L.IR32", "SAMSP01.R.IR32"]
+        delay: [0, 0] 
+  
+    c3: 
+        name: "G1S3.wav"
+        description: "Small anechoic room with MS microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["SAMSP01.L.IR32", "SAMSP01.R.IR32"]
+        delay: [0, 0]   
+  
+    c4: 
+        name: "G6S4.wav"
+        description: "Small anechoic room with MS microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["SAMSP01.L.IR32", "SAMSP01.R.IR32"]
+        delay: [0, 1]     
+  
+    c5: 
+        name: "G5S5.wav"
+        description: "Small anechoic room with MS microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["SAMSP01.L.IR32", "SAMSP01.R.IR32"]
+        delay: [0, 0]     
+  
+    c6: 
+        name: "G4S6.wav"
+        description: "Small anechoic room with MS microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["SAMSP01.L.IR32", "SAMSP01.R.IR32"]
+        delay: [0, 0]      
+ 
+    d1: 
+        name: "G4S1.wav"
+        description: "Small echoic room with MS microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"]
+        delay: [0, 1.5]   
+        
+    d2: 
+        name: "G3S2.wav"
+        description: "Small echoic room with MS microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"]
+        delay: [0, 1.5]   
+        
+    d3: 
+        name: "G3S2.wav"
+        description: "Small echoic room with MS microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"]
+        delay: [0, 1.5]          
+ 
+    d4: 
+        name: "G1S4.wav"
+        description: "Small echoic room with MS microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"]
+        delay: [0, 1.5]  
+ 
+    d5: 
+        name: "G6S5.wav"
+        description: "Small echoic room with MS microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"]
+        delay: [0, 1.5]   
+ 
+    d6: 
+        name: "G5S6.wav"
+        description: "Small echoic room with MS microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"]
+        delay: [0, 1.5]   
+ 
+    e1: 
+        name: "G5S1.wav"
+        description: "Small echoic room with binaural microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["SEBIP01.L.IR32", "SEBIP01.R.IR32"]
+        delay: [0, 1.5]
+ 
+    e2: 
+        name: "G4S2.wav"
+        description: "Small echoic room with binaural microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["SEBIP01.L.IR32", "SEBIP01.R.IR32"]
+        delay: [0, 1.5]    
+        
+    e3: 
+        name: "G3S3.wav"
+        description: "Small echoic room with binaural microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["SEBIP01.L.IR32", "SEBIP01.R.IR32"]
+        delay: [0, 1.5]            
+  
+    e4: 
+        name: "G2S4.wav"
+        description: "Small echoic room with binaural microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["SEBIP01.L.IR32", "SEBIP01.R.IR32"]
+        delay: [0, 1.5]    
+  
+    e5: 
+        name: "G1S5.wav"
+        description: "Small echoic room with binaural microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["SEBIP01.L.IR32", "SEBIP01.R.IR32"]
+        delay: [0, 1.5]   
+  
+    e6: 
+        name: "G6S6.wav"
+        description: "Small echoic room with binaural microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["SEBIP01.L.IR32", "SEBIP01.R.IR32"]
+        delay: [0, 1.5]     
+ 
+    f1: 
+        name: "G6S1.wav"
+        description: "Small echoic room with MS microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"]
+        delay: [0, 0]    
+ 
+    f2: 
+        name: "G5S2.wav"
+        description: "Small echoic room with MS microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"]
+        delay: [0, 0]   
+  
+    f3: 
+        name: "G4S3.wav"
+        description: "Small echoic room with MS microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"]
+        delay: [0, 0]     
+  
+    f4: 
+        name: "G3S4.wav"
+        description: "Small echoic room with MS microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"]
+        delay: [0, 0]  
+  
+    f5: 
+        name: "G2S5.wav"
+        description: "Small echoic room with MS microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"]
+        delay: [0, 0]    
+  
+    f6: 
+        name: "G1S6.wav"
+        description: "Small echoic room with MS microphone pickup."
+        source: ["test_double.wav", "test_double.wav"]
+        IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"]
+        delay: [0, 0]      
+  
\ No newline at end of file
diff --git a/item_generation_scripts/processing/process_ism_items.py b/item_generation_scripts/processing/process_ism_items.py
index cf6ade22..f1b84cda 100644
--- a/item_generation_scripts/processing/process_ism_items.py
+++ b/item_generation_scripts/processing/process_ism_items.py
@@ -227,7 +227,7 @@ def generate_ism_items(
             # generate .csv filename (should end with .0.csv, .1.csv, ...)
             csv_filename = os.path.normpath(f"{output_filename}.{i}.csv")
 
-            with open(os.path.join(output_path, csv_filename), "w") as f:
+            with open(os.path.join(output_path, csv_filename), 'w', newline='', encoding='utf-8') as f:
                 # create csv writer
                 writer = csv.writer(f)
 
-- 
GitLab


From e0fbcf7a0eb7fd4ef3d48749e78611fecb5785c8 Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Mon, 8 May 2023 18:36:16 +0200
Subject: [PATCH 08/27] fix 20ms frame alignment

---
 .../processing/process_ism_items.py           | 83 ++++++++++++-------
 1 file changed, 51 insertions(+), 32 deletions(-)

diff --git a/item_generation_scripts/processing/process_ism_items.py b/item_generation_scripts/processing/process_ism_items.py
index f1b84cda..4b33a84e 100644
--- a/item_generation_scripts/processing/process_ism_items.py
+++ b/item_generation_scripts/processing/process_ism_items.py
@@ -36,8 +36,8 @@ import logging
 import os
 from pathlib import Path
 from typing import Optional
-
 import numpy as np
+from math import floor
 
 from item_generation_scripts.audiotools import audio, audiofile
 from item_generation_scripts.audiotools.wrappers.bs1770 import get_loudness
@@ -69,8 +69,14 @@ def generate_ism_items(
         # extract the number of audio sources
         N_sources = len(np.atleast_1d(scene["source"]))
 
-        y = None
+        # initialize output variables
+        if format == "ISM2":
+            y = audio.ChannelBasedAudio("STEREO")
+        else:
+            y = audio.ChannelBasedAudio("MONO")
         y_meta = None
+        
+        # repeat for all source files
         for i in range(N_sources):
         
             # parse parameters from the scene description
@@ -87,16 +93,18 @@ def generate_ism_items(
             )
 
             # read source file
-            audio_object = audio.fromfile("MONO", os.path.join(input_path, source_file), fs=fs)
-            x = audio_object.audio
-            fs = audio_object.fs
+            x = audio.fromfile("MONO", os.path.join(input_path, source_file), fs=fs)
 
-            # find the number of frames
-            N_frames = int(len(x) / fs * 50 + 1)
+            # get the number of frames (multiple of 20ms)
+            N_frames = int(len(x.audio) / x.fs * 50)
+            
+            # trim the source signal to align to 20ms boundary
+            len = int(N_frames * x.fs / 50)
+            x.audio = x.audio[:len]
 
             # adjust the level of the source file
-            _, scale_factor = get_loudness(audio_object, target_level, "MONO")
-            x *= scale_factor
+            _, scale_factor = get_loudness(x, target_level, "MONO")
+            x.audio *= scale_factor
 
             # read azimuth information and create array
             if isinstance(source_azi, str):
@@ -167,59 +175,70 @@ def generate_ism_items(
 
             # delay the source file
             if source_delay > 0:
-                pre = np.zeros((int(source_delay * fs), x.shape[1]))
-                x = np.concatenate([pre, x])
+                # ensure delay is a multiple of 20ms
+                N_delay = int(floor(source_delay * 50) / 50 * x.fs)
+            
+                # insert all-zero preamble
+                pre = np.zeros((N_delay, x.audio.shape[1]))
+                x.audio = np.concatenate([pre, x.audio])
 
-                # apply delay to metadata as well
+                # insert neutral position as a pre-amble
                 pre = np.tile(
-                    [0.00, 0.00, 1.00, 0.00, 1.00], (int(source_delay * 50), 1)
-                )
-                # pre = np.zeros((int(source_delay * 50), x_meta.shape[1]))
+                    [0.00, 0.00, 1.00, 0.00, 1.00], (N_delay, 1)
+                )   # !!!! TBD - check if we should insert  netrual position or the first position of the metadata
                 x_meta = np.concatenate([pre, x_meta])
 
-            # add source signal to the array of source signals
-            if y is None:
-                y = x
+            # add source signal to the array of all source signals
+            y.fs = x.fs
+            if y.audio is None:
+                y.audio = x.audio
             else:
                 # append zeros to have equal length of all source signals
-                if x.shape[0] > y.shape[0]:
-                    y = np.vstack((y, np.zeros((x.shape[0] - y.shape[0], y.shape[1]))))
-                elif y.shape[0] > x.shape[0]:
-                    x = np.vstack((x, np.zeros((y.shape[0] - x.shape[0], x.shape[1]))))
-                y = np.hstack((y, x))
+                if x.audio.shape[0] > y.audio.shape[0]:
+                    y.audio = np.vstack((y.audio, np.zeros((x.audio.shape[0] - y.audio.shape[0], y.audio.shape[1]))))
+                elif y.audio.shape[0] > x.audio.shape[0]:
+                    x.audio = np.vstack((x.audio, np.zeros((y.audio.shape[0] - x.audio.shape[0], x.audio.shape[1]))))
+                y.audio = np.hstack((y.audio, x.audio))
 
             # add metadata to the array of all metadata
-            x_meta = x_meta[np.newaxis, :]  # make sure x_meta is a 3d array
+            # make sure x_meta is a 3d array
+            x_meta = x_meta[np.newaxis, :]  
             if y_meta is None:
                 y_meta = x_meta
             else:
                 N_srcs = y_meta.shape[0]
                 N_meta_features = y_meta.shape[2]
 
-                # append postamble (create by repeating the last row of metadata) to have equal length of all metadata
+                # append the last position of the metadata to have equal length of all metadata
                 if x_meta.shape[1] > y_meta.shape[1]:
                     N_delta = x_meta.shape[1] - y_meta.shape[1]
-                    y_meta = y_meta.reshape(y_meta.shape[1], -1)  # reshape to 2d array
+                    # reshape to 2d array
+                    y_meta = y_meta.reshape(y_meta.shape[1], -1)  
+                    # repeat last row N_delta times and append to the array
                     y_meta = np.vstack(
                         (y_meta, np.tile(y_meta[-1, :], (N_delta, 1)))
-                    )  # repeat last row N_delta times and append to the array
+                    )  
+                    # reshape back to 3d array
                     y_meta = y_meta.reshape(
                         N_srcs, -1, N_meta_features
-                    )  # reshape back to 3d array
+                    )  
                 elif y_meta.shape[1] > x_meta.shape[1]:
                     N_delta = y_meta.shape[1] - x_meta.shape[1]
-                    x_meta = x_meta.reshape(x_meta.shape[1], -1)  # reshape to 2d array
+                    # reshape to 2d array
+                    x_meta = x_meta.reshape(x_meta.shape[1], -1)  
+                    # repeat last row N_delta times and append to the array
                     x_meta = np.vstack(
                         (x_meta, np.tile(x_meta[-1, :], (N_delta, 1)))
-                    )  # repeat last row N_delta times and append to the array
-                    x_meta = np.expand_dims(x_meta, axis=0)  # reshape back to 3d array
+                    )  
+                    # reshape back to 3d array
+                    x_meta = np.expand_dims(x_meta, axis=0)  
 
                 y_meta = np.concatenate([y_meta, x_meta])
 
         # write individual ISM audio streams to the output file in an interleaved format
         output_filename = scene["name"]
         audiofile.write(
-            os.path.join(output_path, output_filename), y, fs
+            os.path.join(output_path, output_filename), y.audio, y.fs
         )  # !!!! TBD: replace all os.path.xxx operations with the Path object
 
         # write individual ISM metadata to output files in .csv format
-- 
GitLab


From 9b9dead85aea5e0510521221614b50cff7598ff7 Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Mon, 8 May 2023 18:36:50 +0200
Subject: [PATCH 09/27] stereo item generation

---
 item_generation_scripts/__init__.py           |  19 +-
 .../audiotools/audiofile.py                   |   5 +-
 .../audiotools/wrappers/reverb.py             | 186 ++++++++++++++++++
 .../config/STEREO_CONFIG.yml                  | 164 ++++++++-------
 item_generation_scripts/constants.py          |  15 +-
 .../processing/process_stereo_items.py        | 144 ++++++++++++++
 6 files changed, 431 insertions(+), 102 deletions(-)
 create mode 100644 item_generation_scripts/audiotools/wrappers/reverb.py
 create mode 100644 item_generation_scripts/processing/process_stereo_items.py

diff --git a/item_generation_scripts/__init__.py b/item_generation_scripts/__init__.py
index c08820ea..5afa3cc6 100644
--- a/item_generation_scripts/__init__.py
+++ b/item_generation_scripts/__init__.py
@@ -40,7 +40,7 @@ from item_generation_scripts.constants import (
     LOGGER_FORMAT,
     LOGGER_SUFFIX,
 )
-from item_generation_scripts.processing import config, process_ism_items
+from item_generation_scripts.processing import config, process_ism_items, process_stereo_items
 from item_generation_scripts.utils import create_dir
 
 
@@ -83,7 +83,7 @@ def main(args):
 
     # generate input items
     if cfg.format.startswith("ISM"):
-        # generate ISM items according to scene description
+        # generate ISM items with metadata according to scene description
         process_ism_items.generate_ism_items(
             cfg.format,
             cfg.loudness,
@@ -93,7 +93,20 @@ def main(args):
             logger,
             fs=cfg.fs
         )
-
+    elif cfg.format == "STEREO":
+        # generate STEREO items according to scene description
+        process_stereo_items.generate_stereo_items(
+            cfg.format,
+            cfg.loudness,
+            cfg.input_path,
+            cfg.IR_path,
+            cfg.output_path,
+            cfg.scenes,
+            logger,
+            fs=cfg.fs,
+            IR_fs=cfg.IR_fs,
+        )
+        
     # copy configuration to output directory
     with open(cfg.output_path.joinpath(f"{cfg.format}.yml"), "w") as f:
         yaml.safe_dump(cfg._yaml_dump, f)
diff --git a/item_generation_scripts/audiotools/audiofile.py b/item_generation_scripts/audiotools/audiofile.py
index 954c91f8..d5687a89 100644
--- a/item_generation_scripts/audiotools/audiofile.py
+++ b/item_generation_scripts/audiotools/audiofile.py
@@ -110,6 +110,7 @@ def write(
     filename: Union[str, Path],
     x: np.ndarray,
     fs: Optional[int] = 48000,
+    dtype: Optional[str] = "int16",
 ) -> None:
     """
     Write audio file (.pcm, .wav or .raw)
@@ -122,6 +123,8 @@ def write(
         Numpy 2D array of dimension: number of channels x number of samples
     fs: Optional[int]
         Sampling rate, required for .pcm or .raw input file, default = 48000 (Hz)
+    dtype: Optional[str]
+        Data type format required for .pcm or .raw input file, default = 'int16'
 
     Returns
     -------
@@ -141,7 +144,7 @@ def write(
         x = x.astype(np.int16)
         wav.write(filename, fs, x)
     elif file_extension == ".pcm" or file_extension == ".raw":
-        x = x.astype("int16").reshape(-1, 1)
+        x = x.astype(dtype).reshape(-1, 1)
         x.tofile(filename)
     else:
         raise ValueError("Wrong input format. Use wav, pcm or raw")
diff --git a/item_generation_scripts/audiotools/wrappers/reverb.py b/item_generation_scripts/audiotools/wrappers/reverb.py
new file mode 100644
index 00000000..97fae8f5
--- /dev/null
+++ b/item_generation_scripts/audiotools/wrappers/reverb.py
@@ -0,0 +1,186 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+import os.path
+import numpy as np
+from scipy.fft import fft
+from copy import copy
+from pathlib import Path
+from tempfile import TemporaryDirectory
+from typing import Optional, Union
+
+from item_generation_scripts.constants import DEFAULT_CONFIG_BINARIES
+from item_generation_scripts.utils import find_binary, run
+from item_generation_scripts.audiotools.audio import Audio
+from item_generation_scripts.audiotools.audiofile import read, write
+from item_generation_scripts.audiotools.wrappers.filter import resample_itu
+
+
+def reverb(
+    input: Audio,
+    IR: Audio,
+    align: Optional[float] = None,
+) -> Audio:
+    """
+    Wrapper for the ITU-T reverb binary to convolve mono audio signal with an impulse response
+    Note: The 'reverb' binary tool expects that the IR file is written in the 32b IEEE Standard 754 floating-point representation.
+
+    Parameters
+    ----------
+    input: Audio
+        Input audio signal
+    IR: Audio
+        Impulse response
+    align: float
+         multiplicative factor to apply to the reverberated sound in order to align its energy level with a second filePath to the output file
+         
+    Returns
+    -------
+    output: Audio
+        Convolved audio signal with IR
+    """
+    
+    # find binary
+    if "reverb" in DEFAULT_CONFIG_BINARIES["binary_paths"]:
+        binary = find_binary(
+            DEFAULT_CONFIG_BINARIES["binary_paths"]["reverb"].name,
+            binary_path=DEFAULT_CONFIG_BINARIES["binary_paths"]["reverb"].parent,
+        )
+    else:
+        binary = find_binary("reverb")
+ 
+    with TemporaryDirectory(dir="./tmp_reverb") as tmp_dir:
+        tmp_dir = Path(tmp_dir)
+        
+        # resample input audio signal to that of the IR
+        old_fs = None
+        tmp_input = copy(input)
+        if input.fs != IR.fs:
+            old_fs = input.fs
+            tmp_input.audio = resample_itu(tmp_input, IR.fs)
+            tmp_input.fs = IR.fs
+
+        # write input audio signal to temporary file in .pcm format
+        tmp_input_file = tmp_dir.joinpath(f"tmp_reverbIn.pcm")
+        write(tmp_input_file, tmp_input.audio, tmp_input.fs)
+        
+        # down-scale IR to prevent saturation
+        # max_value = np.max(np.abs(IR.audio))
+        # if max_value > 1.0:
+            # IR.audio = IR.audio / max_value
+        
+        # write IR to temporary file in .pcm format
+        # note: the reverb tool expects 32b float format
+        tmp_IR_file = tmp_dir.joinpath(f"tmp_IR.pcm")
+        write(tmp_IR_file, IR.audio.astype("float32"), IR.fs, dtype="float32")
+
+        # set up the 'reverb' command line
+        cmd = [
+            str(binary),
+        ]
+
+        # append multiplicative factor, if provided
+        if align:
+            cmd.extend(["-align", str(align)])
+        
+        # append temporary filenames
+        tmp_output_file = tmp_dir.joinpath(f"tmp_reverbOut.pcm")
+        cmd.extend([tmp_input_file, tmp_IR_file, tmp_output_file])
+
+        # run the 'reverb' command
+        run(cmd)
+
+        # read the reverberated output file 
+        output = copy(tmp_input)
+        output.audio, _ = read(tmp_output_file, nchannels=1, fs=tmp_input.fs)
+        
+        # reverse the resampling
+        if old_fs:
+            output.audio = resample_itu(output, old_fs)
+            output.fs = old_fs
+            
+    return output
+
+def reverb_stereo(
+    input: Audio,
+    stereo_IR: Audio,
+    align: Optional[float] = None,
+) -> Audio:
+    """
+    Wrapper for the ITU-T reverb binary to convolve mono audio signal with a stereo impulse response
+
+    Parameters
+    ----------
+    input: Audio
+        Input audio signal
+    IR: Audio
+        Impulse response
+    align: float
+         multiplicative factor to apply to the reverberated sound in order to align its energy level with the second file
+         
+    Returns
+    -------
+    output: Audio
+        Convolved audio signal with stereo IR
+    """
+    
+    # convert to float32
+    stereo_IR.audio = np.float32(stereo_IR.audio)
+
+    # separate into left and right IR
+    IR_left = copy(stereo_IR)
+    IR_left.name = "MONO"
+    IR_left.num_channels = 1
+    IR_left.audio = np.reshape(stereo_IR.audio[:,0], (-1, 1))
+    
+    IR_right = copy(stereo_IR)
+    IR_right.name = "MONO"
+    IR_right.num_channels = 1
+    IR_right.audio = np.reshape(stereo_IR.audio[:,1], (-1, 1))
+
+    # calculate the scaling factor such that the maximum gain of the IR filter across all frequencies is 0dB
+    if align is None:
+        H = fft(stereo_IR.audio, axis=0)
+        align = 1.0 / np.max(np.abs(H))
+        # stereo_IR.audio *= align
+    
+    # convolve mono input with left and right IR
+    y_left = reverb(input, IR_left, align=align)
+    y_right = reverb(input, IR_right, align=align)
+    
+    # combine into stereo output
+    y = copy(input)
+    y.name = "STEREO"
+    y.num_channels = 2
+    y.audio = np.column_stack([y_left.audio, y_right.audio])
+    
+    return y
diff --git a/item_generation_scripts/config/STEREO_CONFIG.yml b/item_generation_scripts/config/STEREO_CONFIG.yml
index adc08b4c..65b9b7e4 100644
--- a/item_generation_scripts/config/STEREO_CONFIG.yml
+++ b/item_generation_scripts/config/STEREO_CONFIG.yml
@@ -15,6 +15,9 @@ format: "STEREO"
 ### Output sampling rate in Hz needed for headerless audio files; default = 48000
 fs: 48000
 
+### IR sampling rate in Hz needed for headerless audio files; default = 48000
+IR_fs: 32000
+
 ### Any relative paths will be interpreted relative to the working directory the script is called from!
 ### Usage of absolute paths is recommended.
 ### Do not use file names with dots "." in them! This is not supported, use "_" instead
@@ -24,7 +27,7 @@ fs: 48000
 input_path: "./items_mono"
 
 ### Input path to stereo impulse response files
-input_path_IR: "./IR"
+IR_path: "./IR"
 
 ### Output path for generated test items and metadata files
 output_path: "./output"
@@ -39,268 +42,261 @@ loudness: -26
 
 ### Each scene must start with the sceneN tag
 ### Specify the mono source filename (the program will search for it in the input_path folder)
-### Specify azimuth and elevation for each input source
+### Specify the stereo IR source filename (the program will search for it in the input_path_IR folder)
 ### Specify the delay in seconds for each input source
 ### Note 1: use [val1, val2, ...] for multiple sources in a scene
 ### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames
 
-### Note 3: we're using right-handed coordinate system with azi = 0 pointing from the nose to the screen
-### azimuth: float, [-180,180]; positive indicates left
-### elevation: float, [-90,90]; positive indicates up
-### distance: float, tbd: default: 1
-### spread: float, [0,360]; spread in angles from 0 ... 360˚
-### gain: float, [0,1]
-
 scenes:
     a1: 
         name: "G1S1.wav"
-        description: "Large anechoic room with AB microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["LAABP01.L.IR32", "LAABP01.R.IR32"]
-        delay: [0, 0]
+        description: "Two speakers sitting at oval table in opposite corners in a large echoic conference room."
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["LEABP05.wav", "LEABP11.wav"]
+        delay: [0, 3]
         
     a2: 
         name: "G6S2.wav"
         description: "Large anechoic room with AB microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["LAABP02.L.IR32", "LAABP02.R.IR32"]
-        delay: [0, 0]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["LEABP05.wav", "LEABP11.wav"]
+        delay: [0, 3]
         
     a3: 
         name: "G5S3.wav"
         description: "Large anechoic room with AB microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["LAABP03.L.IR32", "LAABP03.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["LAABP03.wav", "LAABP03.wav"]
         delay: [0, 0]
 
     a4: 
         name: "G4S4.wav"
         description: "Large anechoic room with AB microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["LAABP04.L.IR32", "LAABP04.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["LAABP04.wav", "LAABP04.wav"]
         delay: [0, 0]
 
     a5: 
         name: "G3S5.wav"
         description: "Large anechoic room with AB microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["LAABP05.L.IR32", "LAABP05.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["LAABP05.wav", "LAABP05.wav"]
         delay: [0, 0]
 
     a6: 
         name: "G2S6.wav"
         description: "Large anechoic room with AB microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["LAABP06.L.IR32", "LAABP06.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["LAABP06.wav", "LAABP06.wav"]
         delay: [0, 0]
 
     b1: 
         name: "G2S1.wav"
         description: "Small anechoic room with AB microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["SAABP01.L.IR32", "SAABP01.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["SAABP01.wav", "SAABP01.wav"]
         delay: [0, 1.5]
  
     b2: 
         name: "G1S2.wav"
         description: "Small anechoic room with AB microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["SAABP01.L.IR32", "SAABP01.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["SAABP01.wav", "SAABP01.wav"]
         delay: [0, 1.5]
  
     b3: 
         name: "G6S3.wav"
         description: "Small anechoic room with AB microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["SAABP01.L.IR32", "SAABP01.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["SAABP01.wav", "SAABP01.wav"]
         delay: [0, 1.5]
  
     b4: 
         name: "G5S4.wav"
         description: "Small anechoic room with AB microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["SAABP01.L.IR32", "SAABP01.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["SAABP01.wav", "SAABP01.wav"]
         delay: [0, 1.5] 
 
     b5: 
         name: "G4S5.wav"
         description: "Small anechoic room with AB microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["SAABP01.L.IR32", "SAABP01.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["SAABP01.wav", "SAABP01.wav"]
         delay: [0, 1.5] 
 
     b6: 
         name: "G3S6.wav"
         description: "Small anechoic room with AB microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["SAABP01.L.IR32", "SAABP01.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["SAABP01.wav", "SAABP01.wav"]
         delay: [0, 1.5] 
 
     c1: 
         name: "G3S1.wav"
         description: "Small anechoic room with MS microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["SAMSP01.L.IR32", "SAMSP01.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["SAMSP01.wav", "SAMSP01.wav"]
         delay: [0, 0] 
 
     c2: 
         name: "G2S2.wav"
         description: "Small anechoic room with MS microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["SAMSP01.L.IR32", "SAMSP01.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["SAMSP01.wav", "SAMSP01.wav"]
         delay: [0, 0] 
   
     c3: 
         name: "G1S3.wav"
         description: "Small anechoic room with MS microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["SAMSP01.L.IR32", "SAMSP01.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["SAMSP01.wav", "SAMSP01.wav"]
         delay: [0, 0]   
   
     c4: 
         name: "G6S4.wav"
         description: "Small anechoic room with MS microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["SAMSP01.L.IR32", "SAMSP01.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["SAMSP01.wav", "SAMSP01.wav"]
         delay: [0, 1]     
   
     c5: 
         name: "G5S5.wav"
         description: "Small anechoic room with MS microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["SAMSP01.L.IR32", "SAMSP01.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["SAMSP01.wav", "SAMSP01.wav"]
         delay: [0, 0]     
   
     c6: 
         name: "G4S6.wav"
         description: "Small anechoic room with MS microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["SAMSP01.L.IR32", "SAMSP01.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["SAMSP01.wav", "SAMSP01.wav"]
         delay: [0, 0]      
  
     d1: 
         name: "G4S1.wav"
         description: "Small echoic room with MS microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["SEMSP01.wav", "SEMSP01.wav"]
         delay: [0, 1.5]   
         
     d2: 
         name: "G3S2.wav"
         description: "Small echoic room with MS microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["SEMSP01.wav", "SEMSP01.wav"]
         delay: [0, 1.5]   
         
     d3: 
         name: "G3S2.wav"
         description: "Small echoic room with MS microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["SEMSP01.wav", "SEMSP01.wav"]
         delay: [0, 1.5]          
  
     d4: 
         name: "G1S4.wav"
         description: "Small echoic room with MS microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["SEMSP01.wav", "SEMSP01.wav"]
         delay: [0, 1.5]  
  
     d5: 
         name: "G6S5.wav"
         description: "Small echoic room with MS microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["SEMSP01.wav", "SEMSP01.wav"]
         delay: [0, 1.5]   
  
     d6: 
         name: "G5S6.wav"
         description: "Small echoic room with MS microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["SEMSP01.wav", "SEMSP01.wav"]
         delay: [0, 1.5]   
  
     e1: 
         name: "G5S1.wav"
         description: "Small echoic room with binaural microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["SEBIP01.L.IR32", "SEBIP01.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["SEBIP01.wav", "SEBIP01.wav"]
         delay: [0, 1.5]
  
     e2: 
         name: "G4S2.wav"
         description: "Small echoic room with binaural microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["SEBIP01.L.IR32", "SEBIP01.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["SEBIP01.wav", "SEBIP01.wav"]
         delay: [0, 1.5]    
         
     e3: 
         name: "G3S3.wav"
         description: "Small echoic room with binaural microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["SEBIP01.L.IR32", "SEBIP01.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["SEBIP01.wav", "SEBIP01.wav"]
         delay: [0, 1.5]            
   
     e4: 
         name: "G2S4.wav"
         description: "Small echoic room with binaural microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["SEBIP01.L.IR32", "SEBIP01.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["SEBIP01.wav", "SEBIP01.wav"]
         delay: [0, 1.5]    
   
     e5: 
         name: "G1S5.wav"
         description: "Small echoic room with binaural microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["SEBIP01.L.IR32", "SEBIP01.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["SEBIP01.wav", "SEBIP01.wav"]
         delay: [0, 1.5]   
   
     e6: 
         name: "G6S6.wav"
         description: "Small echoic room with binaural microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["SEBIP01.L.IR32", "SEBIP01.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["SEBIP01.wav", "SEBIP01.wav"]
         delay: [0, 1.5]     
  
     f1: 
         name: "G6S1.wav"
         description: "Small echoic room with MS microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["SEMSP01.wav", "SEMSP01.wav"]
         delay: [0, 0]    
  
     f2: 
         name: "G5S2.wav"
         description: "Small echoic room with MS microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["SEMSP01.wav", "SEMSP01.wav"]
         delay: [0, 0]   
   
     f3: 
         name: "G4S3.wav"
         description: "Small echoic room with MS microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["SEMSP01.wav", "SEMSP01.wav"]
         delay: [0, 0]     
   
     f4: 
         name: "G3S4.wav"
         description: "Small echoic room with MS microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["SEMSP01.wav", "SEMSP01.wav"]
         delay: [0, 0]  
   
     f5: 
         name: "G2S5.wav"
         description: "Small echoic room with MS microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["SEMSP01.wav", "SEMSP01.wav"]
         delay: [0, 0]    
   
     f6: 
         name: "G1S6.wav"
         description: "Small echoic room with MS microphone pickup."
-        source: ["test_double.wav", "test_double.wav"]
-        IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"]
+        source: ["test_single.wav", "test_single.wav"]
+        IR: ["SEMSP01.wav", "SEMSP01.wav"]
         delay: [0, 0]      
   
\ No newline at end of file
diff --git a/item_generation_scripts/constants.py b/item_generation_scripts/constants.py
index 9509d069..6b0d0681 100644
--- a/item_generation_scripts/constants.py
+++ b/item_generation_scripts/constants.py
@@ -42,10 +42,9 @@ LOGGER_FORMAT = (
 LOGGER_DATEFMT = "%m-%d %H:%M:%S"
 
 SUPPORTED_FORMATS = {
+    "STEREO",
     "ISM1",
     "ISM2",
-    "ISM3",
-    "ISM4",
 }
 
 DEFAULT_CONFIG = {
@@ -54,18 +53,6 @@ DEFAULT_CONFIG = {
     "delete_tmp": False,
 }
 
-DEFAULT_CONFIG_ISM2 = {
-    "format": "ISM2",
-    "input_path": "./input",
-    "output_path": "./output",
-    # "cod": {
-    # "bin": find_binary("IVAS_cod", raise_error=False),
-    # },
-    # "dec": {
-    # "bin": find_binary("IVAS_dec", raise_error=False),
-    # },
-}
-
 DEFAULT_CONFIG_BINARIES = {
     "binary_paths": get_binary_paths(
         Path(__file__).parent.joinpath("binary_paths.yml")
diff --git a/item_generation_scripts/processing/process_stereo_items.py b/item_generation_scripts/processing/process_stereo_items.py
new file mode 100644
index 00000000..f8dcc43d
--- /dev/null
+++ b/item_generation_scripts/processing/process_stereo_items.py
@@ -0,0 +1,144 @@
+#!/usr/bin/env python3
+
+#
+#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository. All Rights Reserved.
+#
+#  This software is protected by copyright law and by international treaties.
+#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+#  contributors to this repository retain full ownership rights in their respective contributions in
+#  the software. This notice grants no license of any kind, including but not limited to patent
+#  license, nor is any license granted by implication, estoppel or otherwise.
+#
+#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+#  contributions.
+#
+#  This software is provided "AS IS", without any express or implied warranties. The software is in the
+#  development stage. It is intended exclusively for experts who have experience with such software and
+#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+#  and fitness for a particular purpose are hereby disclaimed and excluded.
+#
+#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
+#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+#  the United Nations Convention on Contracts on the International Sales of Goods.
+#
+
+
+import csv
+import logging
+import os
+from pathlib import Path
+from typing import Optional
+from copy import copy
+import numpy as np
+from math import floor
+
+
+from item_generation_scripts.audiotools import audio, audiofile
+from item_generation_scripts.audiotools.wrappers.bs1770 import get_loudness
+from item_generation_scripts.audiotools.wrappers.reverb import reverb_stereo
+
+
+# function for converting nd numpy array to strings with 2 decimal digits
+def csv_formatdata(data):
+    for row in data:
+        yield ["%0.2f" % v for v in row]
+
+
+def generate_stereo_items(
+    format: str,
+    target_level: int,
+    input_path: Path,
+    IR_path: Path,
+    output_path: Path,
+    scenes: dict,
+    logger: logging.Logger,
+    fs: Optional[int] = 48000,
+    IR_fs: Optional[int] = 48000,
+):
+    """Generate STEREO items from mono items based on scene description"""
+
+    # get the number of scenes
+    N_scenes = len(scenes)
+    
+    for scene_name, scene in scenes.items():
+        logger.info(f"Processing scene: {scene_name} out of {N_scenes} scenes")
+
+        # extract the number of audio sources
+        N_sources = len(np.atleast_1d(scene["source"]))
+        
+        # read the IR (check if stereo or two mono files were provided)
+        source_IR = np.atleast_1d(scene["IR"])
+
+        y = audio.ChannelBasedAudio("STEREO")
+        for i in range(N_sources):
+        
+            # parse parameters from the scene description
+            source_file = np.atleast_1d(scene["source"])[i]
+            IR_file = np.atleast_1d(scene["IR"])[i]
+            if 'delay' in scene.keys():
+                source_delay = np.atleast_1d(scene["delay"])[i]
+            else:
+                source_delay = np.array([0])
+            
+            logger.info(
+                f"Convolving {source_file} with {source_IR}"
+            )
+
+            # read source file
+            x = audio.fromfile("MONO", os.path.join(input_path, source_file), fs=fs)
+
+            # get the number of frames (multiple of 20ms)
+            N_frames = int(len(x.audio) / x.fs * 50)
+            
+            # trim the source signal to align to 20ms boundary
+            N_trim = int(N_frames * x.fs / 50)
+            x.audio = x.audio[:N_trim]
+
+            # read the IR file
+            IR = audio.fromfile("STEREO", os.path.join(IR_path, IR_file), fs=IR_fs)
+            
+            # delay the source file
+            if source_delay > 0:
+                # ensure delay is a multiple of 20ms
+                N_delay = int(floor(source_delay * 50) / 50 * x.fs)
+                
+                # insert all-zero preamble
+                pre = np.zeros((N_delay, x.audio.shape[1]))
+                x.audio = np.concatenate([pre, x.audio])
+                
+            # convolve with stereo IR
+            x_rev = reverb_stereo(x, IR)
+            
+            # adjust the level of the stereo signal
+            _, scale_factor = get_loudness(x_rev, target_level, "STEREO")
+            x_rev.audio *= scale_factor
+            
+            # add source signal to the array of source signals
+            y.fs = x.fs
+            if y.audio is None:
+                y.audio = x_rev.audio
+            else:
+                # append zeros to have equal length of all source signals
+                if x_rev.audio.shape[0] > y.audio.shape[0]:
+                    y.audio = np.vstack((y.audio, np.zeros((x_rev.audio.shape[0] - y.audio.shape[0], y.audio.shape[1]))))
+                elif y.audio.shape[0] > x_rev.audio.shape[0]:
+                    x_rev.audio = np.vstack((x_rev.audio, np.zeros((y.audio.shape[0] - x_rev.audio.shape[0], x_rev.audio.shape[1]))))
+                    
+                # superimpose 
+                y.audio += x_rev.audio
+
+        # write the reverberated audio into output file
+        output_filename = scene["name"]
+        audiofile.write(
+            os.path.join(output_path, output_filename), y.audio, y.fs
+        )  # !!!! TBD: replace all os.path.xxx operations with the Path object
+
+    return
\ No newline at end of file
-- 
GitLab


From 714ab327aa7c82e82a984b8047f2aed200efc116 Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Tue, 9 May 2023 09:25:04 +0200
Subject: [PATCH 10/27] fix incorrect usage of the len keyword

---
 item_generation_scripts/audiotools/wrappers/reverb.py   | 5 ++---
 item_generation_scripts/config/ISM1_CONFIG.yml          | 2 +-
 item_generation_scripts/config/ISM2_CONFIG.yml          | 2 +-
 item_generation_scripts/config/STEREO_CONFIG.yml        | 2 +-
 item_generation_scripts/processing/process_ism_items.py | 5 ++---
 5 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/item_generation_scripts/audiotools/wrappers/reverb.py b/item_generation_scripts/audiotools/wrappers/reverb.py
index 97fae8f5..1c4491bd 100644
--- a/item_generation_scripts/audiotools/wrappers/reverb.py
+++ b/item_generation_scripts/audiotools/wrappers/reverb.py
@@ -78,7 +78,7 @@ def reverb(
     else:
         binary = find_binary("reverb")
  
-    with TemporaryDirectory(dir="./tmp_reverb") as tmp_dir:
+    with TemporaryDirectory() as tmp_dir:
         tmp_dir = Path(tmp_dir)
         
         # resample input audio signal to that of the IR
@@ -167,11 +167,10 @@ def reverb_stereo(
     IR_right.num_channels = 1
     IR_right.audio = np.reshape(stereo_IR.audio[:,1], (-1, 1))
 
-    # calculate the scaling factor such that the maximum gain of the IR filter across all frequencies is 0dB
+    # calculate the scaling (multiplicative) factor such that the maximum gain of the IR filter across all frequencies is 0dB
     if align is None:
         H = fft(stereo_IR.audio, axis=0)
         align = 1.0 / np.max(np.abs(H))
-        # stereo_IR.audio *= align
     
     # convolve mono input with left and right IR
     y_left = reverb(input, IR_left, align=align)
diff --git a/item_generation_scripts/config/ISM1_CONFIG.yml b/item_generation_scripts/config/ISM1_CONFIG.yml
index 66f81617..560c48fe 100644
--- a/item_generation_scripts/config/ISM1_CONFIG.yml
+++ b/item_generation_scripts/config/ISM1_CONFIG.yml
@@ -24,7 +24,7 @@ fs: 48000
 input_path: "./items_mono"
 
 ### Output path for generated test items and metadata files
-output_path: "./output"
+output_path: "./items_ISM1"
 
 ### Target loudness in LKFS; default = null (no loudness normalization applied)
 loudness: -26
diff --git a/item_generation_scripts/config/ISM2_CONFIG.yml b/item_generation_scripts/config/ISM2_CONFIG.yml
index 3bb200e2..3329b440 100644
--- a/item_generation_scripts/config/ISM2_CONFIG.yml
+++ b/item_generation_scripts/config/ISM2_CONFIG.yml
@@ -24,7 +24,7 @@ fs: 48000
 input_path: "./items_mono"
 
 ### Output path for generated test items and metadata files
-output_path: "./output"
+output_path: "./items_ISM2"
 
 ### Target loudness in LKFS; default = null (no loudness normalization applied)
 loudness: -26
diff --git a/item_generation_scripts/config/STEREO_CONFIG.yml b/item_generation_scripts/config/STEREO_CONFIG.yml
index 65b9b7e4..b1095a4b 100644
--- a/item_generation_scripts/config/STEREO_CONFIG.yml
+++ b/item_generation_scripts/config/STEREO_CONFIG.yml
@@ -30,7 +30,7 @@ input_path: "./items_mono"
 IR_path: "./IR"
 
 ### Output path for generated test items and metadata files
-output_path: "./output"
+output_path: "./items_STEREO"
 
 ### Target loudness in LKFS; default = null (no loudness normalization applied)
 loudness: -26
diff --git a/item_generation_scripts/processing/process_ism_items.py b/item_generation_scripts/processing/process_ism_items.py
index 4b33a84e..db931d48 100644
--- a/item_generation_scripts/processing/process_ism_items.py
+++ b/item_generation_scripts/processing/process_ism_items.py
@@ -30,7 +30,6 @@
 #  the United Nations Convention on Contracts on the International Sales of Goods.
 #
 
-
 import csv
 import logging
 import os
@@ -99,8 +98,8 @@ def generate_ism_items(
             N_frames = int(len(x.audio) / x.fs * 50)
             
             # trim the source signal to align to 20ms boundary
-            len = int(N_frames * x.fs / 50)
-            x.audio = x.audio[:len]
+            N_trim = int(N_frames * x.fs / 50)
+            x.audio = x.audio[:N_trim]
 
             # adjust the level of the source file
             _, scale_factor = get_loudness(x, target_level, "MONO")
-- 
GitLab


From bab5d25fe3aba6bdc20fc4fc95e9097a1332df80 Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Tue, 9 May 2023 17:05:04 +0200
Subject: [PATCH 11/27] update of the example .yml config file for STEREO

---
 .../config/STEREO_CONFIG.yml                  | 226 +++++++++---------
 1 file changed, 113 insertions(+), 113 deletions(-)

diff --git a/item_generation_scripts/config/STEREO_CONFIG.yml b/item_generation_scripts/config/STEREO_CONFIG.yml
index b1095a4b..0933b1da 100644
--- a/item_generation_scripts/config/STEREO_CONFIG.yml
+++ b/item_generation_scripts/config/STEREO_CONFIG.yml
@@ -52,251 +52,251 @@ scenes:
         name: "G1S1.wav"
         description: "Two speakers sitting at oval table in opposite corners in a large echoic conference room."
         source: ["test_single.wav", "test_single.wav"]
-        IR: ["LEABP05.wav", "LEABP11.wav"]
+        IR: ["LEABP04.wav", "LEABP11.wav"]
         delay: [0, 3]
         
     a2: 
         name: "G6S2.wav"
-        description: "Large anechoic room with AB microphone pickup."
+        description: "Two speakers sitting at oval table in opposite corners in a large echoic conference room."
         source: ["test_single.wav", "test_single.wav"]
         IR: ["LEABP05.wav", "LEABP11.wav"]
         delay: [0, 3]
         
     a3: 
         name: "G5S3.wav"
-        description: "Large anechoic room with AB microphone pickup."
+        description: "Two speakers sitting at oval table in opposite corners in a large echoic conference room."
         source: ["test_single.wav", "test_single.wav"]
-        IR: ["LAABP03.wav", "LAABP03.wav"]
-        delay: [0, 0]
+        IR: ["LEABP06.wav", "LEABP11.wav"]
+        delay: [0, 3]
 
     a4: 
         name: "G4S4.wav"
-        description: "Large anechoic room with AB microphone pickup."
+        description: "Two speakers sitting at oval table in opposite corners in a large echoic conference room."
         source: ["test_single.wav", "test_single.wav"]
-        IR: ["LAABP04.wav", "LAABP04.wav"]
-        delay: [0, 0]
+        IR: ["LEABP05.wav", "LEABP10.wav"]
+        delay: [0, 1.5]
 
     a5: 
         name: "G3S5.wav"
-        description: "Large anechoic room with AB microphone pickup."
+        description: "Two speakers sitting at oval table in opposite corners in a large echoic conference room."
         source: ["test_single.wav", "test_single.wav"]
-        IR: ["LAABP05.wav", "LAABP05.wav"]
-        delay: [0, 0]
+        IR: ["LEABP05.wav", "LEABP11.wav"]
+        delay: [0, 1.5]
 
     a6: 
         name: "G2S6.wav"
-        description: "Large anechoic room with AB microphone pickup."
+        description: "Two speakers sitting at oval table in opposite corners in a large echoic conference room."
         source: ["test_single.wav", "test_single.wav"]
-        IR: ["LAABP06.wav", "LAABP06.wav"]
-        delay: [0, 0]
+        IR: ["LEABP05.wav", "LEABP12.wav"]
+        delay: [0, 1.5]
 
     b1: 
         name: "G2S1.wav"
-        description: "Small anechoic room with AB microphone pickup."
+        description: "Two speakers sitting at oval table side by side in a large anechoic conference room."
         source: ["test_single.wav", "test_single.wav"]
-        IR: ["SAABP01.wav", "SAABP01.wav"]
-        delay: [0, 1.5]
+        IR: ["LAABP05.wav", "LAABP06.wav"]
+        delay: [0, 35]
  
     b2: 
         name: "G1S2.wav"
-        description: "Small anechoic room with AB microphone pickup."
+        description: "Two speakers sitting at oval table side by side in a large anechoic conference room."
         source: ["test_single.wav", "test_single.wav"]
-        IR: ["SAABP01.wav", "SAABP01.wav"]
-        delay: [0, 1.5]
+        IR: ["LAABP07.wav", "LAABP08.wav"]
+        delay: [0, 3]
  
     b3: 
         name: "G6S3.wav"
-        description: "Small anechoic room with AB microphone pickup."
+        description: "Two speakers sitting at oval table side by side in a large anechoic conference room."
         source: ["test_single.wav", "test_single.wav"]
-        IR: ["SAABP01.wav", "SAABP01.wav"]
-        delay: [0, 1.5]
+        IR: ["LAABP09.wav", "LAABP10.wav"]
+        delay: [0, 3]
  
     b4: 
         name: "G5S4.wav"
-        description: "Small anechoic room with AB microphone pickup."
+        description: "Two speakers sitting at oval table side by side in a large anechoic conference room."
         source: ["test_single.wav", "test_single.wav"]
-        IR: ["SAABP01.wav", "SAABP01.wav"]
+        IR: ["LAABP11.wav", "LAABP12.wav"]
         delay: [0, 1.5] 
 
     b5: 
         name: "G4S5.wav"
-        description: "Small anechoic room with AB microphone pickup."
+        description: "Two speakers sitting at oval table side by side in a large anechoic conference room."
         source: ["test_single.wav", "test_single.wav"]
-        IR: ["SAABP01.wav", "SAABP01.wav"]
+        IR: ["LAABP01.wav", "LAABP02.wav"]
         delay: [0, 1.5] 
 
     b6: 
         name: "G3S6.wav"
-        description: "Small anechoic room with AB microphone pickup."
+        description: "Two speakers sitting at oval table side by side in a large anechoic conference room."
         source: ["test_single.wav", "test_single.wav"]
-        IR: ["SAABP01.wav", "SAABP01.wav"]
+        IR: ["LAABP03.wav", "LAABP04.wav"]
         delay: [0, 1.5] 
 
     c1: 
         name: "G3S1.wav"
-        description: "Small anechoic room with MS microphone pickup."
-        source: ["test_single.wav", "test_single.wav"]
-        IR: ["SAMSP01.wav", "SAMSP01.wav"]
-        delay: [0, 0] 
+        description: "One talker sitting at table in a small anechoic conference room."
+        source: ["test_single.wav"]
+        IR: ["SAMSP01.wav"]
+        delay: [0] 
 
     c2: 
         name: "G2S2.wav"
-        description: "Small anechoic room with MS microphone pickup."
-        source: ["test_single.wav", "test_single.wav"]
-        IR: ["SAMSP01.wav", "SAMSP01.wav"]
-        delay: [0, 0] 
+        description: "One talker sitting at table in a small anechoic conference room."
+        source: ["test_single.wav"]
+        IR: ["SAMSP04.wav"]
+        delay: [0] 
   
     c3: 
         name: "G1S3.wav"
-        description: "Small anechoic room with MS microphone pickup."
-        source: ["test_single.wav", "test_single.wav"]
-        IR: ["SAMSP01.wav", "SAMSP01.wav"]
-        delay: [0, 0]   
+        description: "One talker sitting at table in a small anechoic conference room."
+        source: ["test_single.wav"]
+        IR: ["SAMSP07.wav"]
+        delay: [0] 
   
     c4: 
         name: "G6S4.wav"
-        description: "Small anechoic room with MS microphone pickup."
-        source: ["test_single.wav", "test_single.wav"]
-        IR: ["SAMSP01.wav", "SAMSP01.wav"]
-        delay: [0, 1]     
+        description: "One talker sitting at table in a small echoic conference room."
+        source: ["test_single.wav"]
+        IR: ["SEABP01.wav"]
+        delay: [0] 
   
     c5: 
         name: "G5S5.wav"
-        description: "Small anechoic room with MS microphone pickup."
-        source: ["test_single.wav", "test_single.wav"]
-        IR: ["SAMSP01.wav", "SAMSP01.wav"]
-        delay: [0, 0]     
+        description: "One talker sitting at table in a small echoic conference room."
+        source: ["test_single.wav"]
+        IR: ["SEABP03.wav"]
+        delay: [0] 
   
     c6: 
         name: "G4S6.wav"
-        description: "Small anechoic room with MS microphone pickup."
-        source: ["test_single.wav", "test_single.wav"]
-        IR: ["SAMSP01.wav", "SAMSP01.wav"]
-        delay: [0, 0]      
+        description: "One talker sitting at table in a small echoic conference room."
+        source: ["test_single.wav"]
+        IR: ["SEABP06.wav"]
+        delay: [0] 
  
     d1: 
         name: "G4S1.wav"
-        description: "Small echoic room with MS microphone pickup."
-        source: ["test_single.wav", "test_single.wav"]
-        IR: ["SEMSP01.wav", "SEMSP01.wav"]
-        delay: [0, 1.5]   
+        description: "One talker sitting at table in a small anechoic conference room."
+        source: ["test_single.wav"]
+        IR: ["SEBIP01.wav"]
+        delay: [0]   
         
     d2: 
         name: "G3S2.wav"
-        description: "Small echoic room with MS microphone pickup."
-        source: ["test_single.wav", "test_single.wav"]
-        IR: ["SEMSP01.wav", "SEMSP01.wav"]
-        delay: [0, 1.5]   
+        description: "One talker sitting at table in a small anechoic conference room."
+        source: ["test_single.wav"]
+        IR: ["SEBIP04.wav"]
+        delay: [0]   
         
     d3: 
         name: "G3S2.wav"
-        description: "Small echoic room with MS microphone pickup."
-        source: ["test_single.wav", "test_single.wav"]
-        IR: ["SEMSP01.wav", "SEMSP01.wav"]
-        delay: [0, 1.5]          
+        description: "One talker sitting at table in a small anechoic conference room."
+        source: ["test_single.wav"]
+        IR: ["SEBIP07.wav"]
+        delay: [0]   
  
     d4: 
         name: "G1S4.wav"
-        description: "Small echoic room with MS microphone pickup."
-        source: ["test_single.wav", "test_single.wav"]
-        IR: ["SEMSP01.wav", "SEMSP01.wav"]
-        delay: [0, 1.5]  
+        description: "One talker sitting at table in a small echoic conference room."
+        source: ["test_single.wav"]
+        IR: ["SEBIP07.wav"]
+        delay: [0]   
  
     d5: 
         name: "G6S5.wav"
-        description: "Small echoic room with MS microphone pickup."
-        source: ["test_single.wav", "test_single.wav"]
-        IR: ["SEMSP01.wav", "SEMSP01.wav"]
-        delay: [0, 1.5]   
+        description: "One talker sitting at table in a small echoic conference room."
+        source: ["test_single.wav"]
+        IR: ["SEBIP07.wav"]
+        delay: [0]   
  
     d6: 
         name: "G5S6.wav"
-        description: "Small echoic room with MS microphone pickup."
-        source: ["test_single.wav", "test_single.wav"]
-        IR: ["SEMSP01.wav", "SEMSP01.wav"]
-        delay: [0, 1.5]   
+        description: "One talker sitting at table in a small echoic conference room."
+        source: ["test_single.wav"]
+        IR: ["SEBIP07.wav"]
+        delay: [0]   
  
     e1: 
         name: "G5S1.wav"
-        description: "Small echoic room with binaural microphone pickup."
+        description: "Two talkers sitting in a room."
         source: ["test_single.wav", "test_single.wav"]
-        IR: ["SEBIP01.wav", "SEBIP01.wav"]
-        delay: [0, 1.5]
+        IR: ["SEMSP01.wav", "SEMSP03.wav"]
+        delay: [0, 3]
  
     e2: 
         name: "G4S2.wav"
-        description: "Small echoic room with binaural microphone pickup."
+        description: "Two talkers sitting in a room."
         source: ["test_single.wav", "test_single.wav"]
-        IR: ["SEBIP01.wav", "SEBIP01.wav"]
-        delay: [0, 1.5]    
+        IR: ["SEMSP01.wav", "SEMSP05.wav"]
+        delay: [0, 3]
         
     e3: 
         name: "G3S3.wav"
-        description: "Small echoic room with binaural microphone pickup."
+        description: "Two talkers sitting in a room."
         source: ["test_single.wav", "test_single.wav"]
-        IR: ["SEBIP01.wav", "SEBIP01.wav"]
-        delay: [0, 1.5]            
+        IR: ["SEMSP01.wav", "SEMSP07.wav"]
+        delay: [0, 3]
   
     e4: 
         name: "G2S4.wav"
-        description: "Small echoic room with binaural microphone pickup."
+        description: "Two talkers sitting in a room."
         source: ["test_single.wav", "test_single.wav"]
-        IR: ["SEBIP01.wav", "SEBIP01.wav"]
-        delay: [0, 1.5]    
+        IR: ["SEMSP03.wav", "SEMSP04.wav"]
+        delay: [0, 1.5]
   
     e5: 
         name: "G1S5.wav"
-        description: "Small echoic room with binaural microphone pickup."
+        description: "Two talkers sitting in a room."
         source: ["test_single.wav", "test_single.wav"]
-        IR: ["SEBIP01.wav", "SEBIP01.wav"]
-        delay: [0, 1.5]   
+        IR: ["SEMSP05.wav", "SEMSP07.wav"]
+        delay: [0, 1.5]
   
     e6: 
         name: "G6S6.wav"
-        description: "Small echoic room with binaural microphone pickup."
+        description: "Two talkers sitting in a room."
         source: ["test_single.wav", "test_single.wav"]
-        IR: ["SEBIP01.wav", "SEBIP01.wav"]
-        delay: [0, 1.5]     
+        IR: ["SEMSP06.wav", "SEMSP02.wav"]
+        delay: [0, 1.5]
  
     f1: 
         name: "G6S1.wav"
-        description: "Small echoic room with MS microphone pickup."
+        description: "Two talkers sitting in a room."
         source: ["test_single.wav", "test_single.wav"]
-        IR: ["SEMSP01.wav", "SEMSP01.wav"]
-        delay: [0, 0]    
+        IR: ["SEBIP05.wav", "SEBIP01.wav"]
+        delay: [0, 3]
  
     f2: 
         name: "G5S2.wav"
-        description: "Small echoic room with MS microphone pickup."
+        description: "Two talkers sitting in a room."
         source: ["test_single.wav", "test_single.wav"]
-        IR: ["SEMSP01.wav", "SEMSP01.wav"]
-        delay: [0, 0]   
+        IR: ["SEBIP07.wav", "SEBIP01.wav"]
+        delay: [0, 3]
   
     f3: 
         name: "G4S3.wav"
-        description: "Small echoic room with MS microphone pickup."
+        description: "Two talkers sitting in a room."
         source: ["test_single.wav", "test_single.wav"]
-        IR: ["SEMSP01.wav", "SEMSP01.wav"]
-        delay: [0, 0]     
+        IR: ["SEBIP04.wav", "SEBIP01.wav"]
+        delay: [0, 3]
   
     f4: 
         name: "G3S4.wav"
-        description: "Small echoic room with MS microphone pickup."
+        description: "Two talkers sitting in a room."
         source: ["test_single.wav", "test_single.wav"]
-        IR: ["SEMSP01.wav", "SEMSP01.wav"]
-        delay: [0, 0]  
+        IR: ["SEBIP02.wav", "SEBIP06.wav"]
+        delay: [0, 1.5]
   
     f5: 
         name: "G2S5.wav"
-        description: "Small echoic room with MS microphone pickup."
+        description: "Two talkers sitting in a room."
         source: ["test_single.wav", "test_single.wav"]
-        IR: ["SEMSP01.wav", "SEMSP01.wav"]
-        delay: [0, 0]    
+        IR: ["SEBIP02.wav", "SEBIP06.wav"]
+        delay: [0, 1.5]
   
     f6: 
         name: "G1S6.wav"
-        description: "Small echoic room with MS microphone pickup."
+        description: "Two talkers sitting in a room."
         source: ["test_single.wav", "test_single.wav"]
-        IR: ["SEMSP01.wav", "SEMSP01.wav"]
-        delay: [0, 0]      
+        IR: ["SEBIP03.wav", "SEBIP04.wav"]
+        delay: [0, 1.5]
   
\ No newline at end of file
-- 
GitLab


From 8a6542d4b6907ca378f9631b3be613cf065c97d1 Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Thu, 11 May 2023 11:08:15 +0200
Subject: [PATCH 12/27] support for +- overlap in ISM items, expect trimmed
 sentences, support for low-level random noise addition

---
 item_generation_scripts/__init__.py           |   7 +-
 .../config/ISM1_CONFIG.yml                    |   4 +
 .../config/ISM2_CONFIG.yml                    | 152 +++++++++---------
 .../processing/process_ism_items.py           |  92 +++++++++--
 4 files changed, 170 insertions(+), 85 deletions(-)

diff --git a/item_generation_scripts/__init__.py b/item_generation_scripts/__init__.py
index 5afa3cc6..8b3d8bae 100644
--- a/item_generation_scripts/__init__.py
+++ b/item_generation_scripts/__init__.py
@@ -91,7 +91,10 @@ def main(args):
             cfg.output_path,
             cfg.scenes,
             logger,
-            fs=cfg.fs
+            fs=cfg.fs,
+            preamble=cfg.preamble,
+            postamble=cfg.postamble,
+            add_low_level_random_noise=cfg.add_low_level_random_noise,
         )
     elif cfg.format == "STEREO":
         # generate STEREO items according to scene description
@@ -105,6 +108,8 @@ def main(args):
             logger,
             fs=cfg.fs,
             IR_fs=cfg.IR_fs,
+            preamble=cfg.preamble,
+            postamble=cfg.postamble,
         )
         
     # copy configuration to output directory
diff --git a/item_generation_scripts/config/ISM1_CONFIG.yml b/item_generation_scripts/config/ISM1_CONFIG.yml
index 560c48fe..9ba070f7 100644
--- a/item_generation_scripts/config/ISM1_CONFIG.yml
+++ b/item_generation_scripts/config/ISM1_CONFIG.yml
@@ -29,6 +29,10 @@ output_path: "./items_ISM1"
 ### Target loudness in LKFS; default = null (no loudness normalization applied)
 loudness: -26
 
+### Pre-amble and Post-amble length in seconds (default = None)
+preamble: 0.5
+postamble: 0.5
+
 
 ################################################
 ### Scene description
diff --git a/item_generation_scripts/config/ISM2_CONFIG.yml b/item_generation_scripts/config/ISM2_CONFIG.yml
index 3329b440..198571d2 100644
--- a/item_generation_scripts/config/ISM2_CONFIG.yml
+++ b/item_generation_scripts/config/ISM2_CONFIG.yml
@@ -29,6 +29,12 @@ output_path: "./items_ISM2"
 ### Target loudness in LKFS; default = null (no loudness normalization applied)
 loudness: -26
 
+### Pre-amble and Post-amble length in seconds (default = 0.0)
+preamble: 0.5
+postamble: 0.5
+
+### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence)
+add_low_level_random_noise: true
 
 ################################################
 ### Scene description
@@ -37,7 +43,7 @@ loudness: -26
 ### Each scene must start with the sceneN tag
 ### Specify the mono source filename (the program will search for it in the input_path folder)
 ### Specify azimuth and elevation for each input source
-### Specify the delay in seconds for each input source
+### Specify the overlap length in seconds for each input source (negative value creates a gap)
 ### Note 1: use [val1, val2, ...] for multiple sources in a scene
 ### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames
 
@@ -52,288 +58,288 @@ scenes:
     a1: 
         name: "G1S1.wav"
         description: "two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [0, 50]
         elevation: [0, 0]
-        delay: [0, 0]
+        overlap: -0.5
         
     a2: 
         name: "G6S2.wav"
         description: "two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [50, 350]
         elevation: [0, 0]
-        delay: [0, 0]
+        overlap: -0.5
         
     a3: 
         name: "G5S3.wav"
         description: "two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [40, 290]
         elevation: [0, 0]
-        delay: [0, 0]
+        overlap: -0.5
 
     a4: 
         name: "G4S4.wav"
         description: "two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [30, 230]
         elevation: [15, 15]
-        delay: [0, 0]
+        overlap: -0.5
 
     a5: 
         name: "G3S5.wav"
         description: "two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [20, 170]
         elevation: [15, 15]
-        delay: [0, 0]
+        overlap: -0.5
 
     a6: 
         name: "G2S6.wav"
         description: "two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [10, 110]
         elevation: [15, 15]
-        delay: [0, 0]
+        overlap: -0.5
 
     b1: 
         name: "G2S1.wav"
         description: "two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [20, 170]
         elevation: [30, 30]
-        delay: [0, 1.5]
+        overlap: 0.5
  
     b2: 
         name: "G1S2.wav"
         description: "two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [10, 110]
         elevation: [30, 30]
-        delay: [0, 1.5]
+        overlap: 0.5
  
     b3: 
         name: "G6S3.wav"
         description: "two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [0, 50]
         elevation: [30, 30]
-        delay: [0, 1.5]
+        overlap: 0.5
  
     b4: 
         name: "G5S4.wav"
         description: "two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [50, 350]
         elevation: [60, 60]
-        delay: [0, 1.5] 
+        overlap: 0.5 
 
     b5: 
         name: "G4S5.wav"
         description: "two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [40, 290]
         elevation: [60, 60]
-        delay: [0, 1.5] 
+        overlap: 0.5 
 
     b6: 
         name: "G3S6.wav"
         description: "two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [30, 230]
         elevation: [60, 60]
-        delay: [0, 1.5] 
+        overlap: 0.5 
 
     c1: 
         name: "G3S1.wav"
         description: "one talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [40, 290]
         elevation: [0, 60]
-        delay: [0, 0] 
+        overlap: -0.5 
 
     c2: 
         name: "G2S2.wav"
         description: "one talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [30, 230]
         elevation: [0, 60]
-        delay: [0, 0] 
+        overlap: -0.5 
   
     c3: 
         name: "G1S3.wav"
         description: "one talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [20, 170]
         elevation: [0, 60]
-        delay: [0, 0]   
+        overlap: -0.5   
   
     c4: 
         name: "G6S4.wav"
         description: "one talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [10, 110]
         elevation: [0, 60]
-        delay: [0, 1]     
+        shift: [0, 1]     
   
     c5: 
         name: "G5S5.wav"
         description: "one talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [0, 50]
         elevation: [0, 60]
-        delay: [0, 0]     
+        overlap: -0.5     
   
     c6: 
         name: "G4S6.wav"
         description: "one talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [50, 350]
         elevation: [0, 60]
-        delay: [0, 0]      
+        overlap: -0.5      
  
     d1: 
         name: "G4S1.wav"
         description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [50, "180:1:120 + 360"]
         elevation: [0, 60]
-        delay: [0, 1.5]   
+        overlap: 0.5   
         
     d2: 
         name: "G3S2.wav"
         description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [300, "-70:-1:-10 - 360"]
         elevation: [0, 60]
-        delay: [0, 1.5]   
+        overlap: 0.5   
         
     d3: 
         name: "G2S3.wav"
         description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [250, "-20:-1:-320"]
         elevation: [0, 60]
-        delay: [0, 1.5]          
+        overlap: 0.5          
  
     d4: 
         name: "G1S4.wav"
         description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [200, "30:-1:-270"]
         elevation: [0, 60]
-        delay: [0, 1.5]  
+        overlap: 0.5  
  
     d5: 
         name: "G6S5.wav"
         description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [150, "80:1:20 + 360"]
         elevation: [0, 60]
-        delay: [0, 1.5]   
+        overlap: 0.5   
  
     d6: 
         name: "G5S6.wav"
         description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [100, "130:1:70 + 360"]
         elevation: [0, 60]
-        delay: [0, 1.5]   
+        overlap: 0.5   
  
     e1: 
         name: "G5S1.wav"
         description: "two talkers walking side-by-side around the table, ~30% overlapping utterances"
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: ["80:1:20 + 360", "80:1:20 + 360"]
         elevation: [10, 60]
-        delay: [0, 1.5]
+        overlap: 0.5
  
     e2: 
         name: "G4S2.wav"
         description: "two talkers walking side-by-side around the table, ~30% overlapping utterances"
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: ["130:1:70 + 360", "130:1:70 + 360"]
         elevation: [10, 60]
-        delay: [0, 1.5]    
+        overlap: 0.5    
         
     e3: 
         name: "G3S3.wav"
         description: "two talkers walking side-by-side around the table, ~30% overlapping utterances"
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: ["180:1:120 + 360", "180:1:120 + 360"]
         elevation: [10, 60]
-        delay: [0, 1.5]            
+        overlap: 0.5            
   
     e4: 
         name: "G2S4.wav"
         description: "two talkers walking side-by-side around the table, ~30% overlapping utterances"
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: ["-70:-1:-10 - 360", "-70:-1:-10 - 360"]
         elevation: [10, 60]
-        delay: [0, 1.5]    
+        overlap: 0.5    
   
     e5: 
         name: "G1S5.wav"
         description: "two talkers walking side-by-side around the table, ~30% overlapping utterances"
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: ["-20:-1:-320", "-20:-1:-320"]
         elevation: [10, 60]
-        delay: [0, 1.5]   
+        overlap: 0.5   
   
     e6: 
         name: "G6S6.wav"
         description: "two talkers walking side-by-side around the table, ~30% overlapping utterances"
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: ["30:-1:-270", "30:-1:-270"]
         elevation: [10, 60]
-        delay: [0, 1.5]     
+        overlap: 0.5     
  
     f1: 
         name: "G6S1.wav"
         description: "two talkers walking around the table in opposite directions, non-overlapping utterances."
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: ["60:1:0 + 360", "60:-1:120 - 360"]
         elevation: [20, 50]
-        delay: [0, 0]    
+        overlap: -0.5    
  
     f2: 
         name: "G5S2.wav"
         description: "two talkers walking around the table in opposite directions, non-overlapping utterances."
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: ["0:1:300", "0:-1:60 - 360"]
         elevation: [20, 50]
-        delay: [0, 0]   
+        overlap: -0.5   
   
     f3: 
         name: "G4S3.wav"
         description: "two talkers walking around the table in opposite directions, non-overlapping utterances."
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: ["300:1:240 + 360", "300:-1:0"]
         elevation: [20, 50]
-        delay: [0, 0]     
+        overlap: -0.5     
   
     f4: 
         name: "G3S4.wav"
         description: "two talkers walking around the table in opposite directions, non-overlapping utterances."
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: ["240:1:180 + 360", "240:-1:-60"]
         elevation: [20, 50]
-        delay: [0, 0]  
+        overlap: -0.5  
   
     f5: 
         name: "G2S5.wav"
         description: "two talkers walking around the table in opposite directions, non-overlapping utterances."
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: ["180:1:120 + 360", "180:-1:-120"]
         elevation: [20, 50]
-        delay: [0, 0]    
+        overlap: -0.5    
   
     f6: 
         name: "G1S6.wav"
         description: "two talkers walking around the table in opposite directions, non-overlapping utterances."
-        source: ["test_double.wav", "test_double.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: ["120:1:60 + 360", "120:-1:180 - 360"]
         elevation: [20, 50]
-        delay: [0, 0]      
+        overlap: -0.5      
   
\ No newline at end of file
diff --git a/item_generation_scripts/processing/process_ism_items.py b/item_generation_scripts/processing/process_ism_items.py
index db931d48..fe62f048 100644
--- a/item_generation_scripts/processing/process_ism_items.py
+++ b/item_generation_scripts/processing/process_ism_items.py
@@ -41,6 +41,7 @@ from math import floor
 from item_generation_scripts.audiotools import audio, audiofile
 from item_generation_scripts.audiotools.wrappers.bs1770 import get_loudness
 
+SEED_RANDOM_NOISE = 0
 
 # function for converting nd numpy array to strings with 2 decimal digits
 def csv_formatdata(data):
@@ -56,6 +57,9 @@ def generate_ism_items(
     scenes: dict,
     logger: logging.Logger,
     fs: Optional[int] = 48000,
+    preamble: Optional[float] = 0.0,
+    postamble: Optional[float] = 0.0,
+    add_low_level_random_noise: Optional[bool] = False,
 ):
     """Generate ISM items with metadata from mono items based on scene description"""
 
@@ -75,6 +79,12 @@ def generate_ism_items(
             y = audio.ChannelBasedAudio("MONO")
         y_meta = None
         
+        # read the overlap length
+        if 'overlap' in scene.keys():
+            source_overlap = float(scene["overlap"])
+        else:
+            source_overlap = 0.0
+        
         # repeat for all source files
         for i in range(N_sources):
         
@@ -82,10 +92,6 @@ def generate_ism_items(
             source_file = np.atleast_1d(scene["source"])[i]
             source_azi = np.atleast_1d(scene["azimuth"])[i]
             source_ele = np.atleast_1d(scene["elevation"])[i]
-            if 'delay' in scene.keys():
-                source_delay = np.atleast_1d(scene["delay"])[i]
-            else:
-                source_delay = np.array([0])
             
             logger.info(
                 f"Encoding {source_file} at position(s) {source_azi},{source_ele}"
@@ -93,13 +99,16 @@ def generate_ism_items(
 
             # read source file
             x = audio.fromfile("MONO", os.path.join(input_path, source_file), fs=fs)
+            
+            ############### DEBUG ############33
+            # x.audio = x.audio[:-10]
 
             # get the number of frames (multiple of 20ms)
             N_frames = int(len(x.audio) / x.fs * 50)
             
             # trim the source signal to align to 20ms boundary
-            N_trim = int(N_frames * x.fs / 50)
-            x.audio = x.audio[:N_trim]
+            # N_trim = int(N_frames * x.fs / 50)
+            # x.audio = x.audio[:N_trim]
 
             # adjust the level of the source file
             _, scale_factor = get_loudness(x, target_level, "MONO")
@@ -171,11 +180,17 @@ def generate_ism_items(
 
             # arrange all metadata fields column-wise into a matrix
             x_meta = np.column_stack((azi, ele, dist, spread, gain))
-
-            # delay the source file
-            if source_delay > 0:
+            
+            # shift the second (and all other) source files (positive shift creates overlap, negative shift creates a gap)
+            if i > 0 and source_overlap != 0.0:
+                # get the length of the first source file
+                N_delay = len(y.audio[:,0])
+                
+                # add the shift
+                N_delay += int(source_overlap * x.fs)
+            
                 # ensure delay is a multiple of 20ms
-                N_delay = int(floor(source_delay * 50) / 50 * x.fs)
+                # N_delay = int(floor(source_shift * 50) / 50 * x.fs)
             
                 # insert all-zero preamble
                 pre = np.zeros((N_delay, x.audio.shape[1]))
@@ -186,13 +201,28 @@ def generate_ism_items(
                     [0.00, 0.00, 1.00, 0.00, 1.00], (N_delay, 1)
                 )   # !!!! TBD - check if we should insert  netrual position or the first position of the metadata
                 x_meta = np.concatenate([pre, x_meta])
+                
+            # pad with zeros to ensure that the signal length is a multiple of 20ms  
+            N_frame = x.fs / 50
+            if len(x.audio) % N_frame != 0:
+                N_pad = int(N_frame - len(x.audio) % N_frame)
+                
+                # insert all-zero preamble
+                pre = np.zeros((N_pad, x.audio.shape[1]))
+                x.audio = np.concatenate([pre, x.audio])
+
+                # insert neutral position as a pre-amble
+                pre = np.tile(
+                    [0.00, 0.00, 1.00, 0.00, 1.00], (N_pad, 1)
+                )   # !!!! TBD - check if we should insert  netrual position or the first position of the metadata
+                x_meta = np.concatenate([pre, x_meta])
 
             # add source signal to the array of all source signals
             y.fs = x.fs
             if y.audio is None:
                 y.audio = x.audio
             else:
-                # append zeros to have equal length of all source signals
+                # pad with zeros to have the same length of all source signals
                 if x.audio.shape[0] > y.audio.shape[0]:
                     y.audio = np.vstack((y.audio, np.zeros((x.audio.shape[0] - y.audio.shape[0], y.audio.shape[1]))))
                 elif y.audio.shape[0] > x.audio.shape[0]:
@@ -234,6 +264,46 @@ def generate_ism_items(
 
                 y_meta = np.concatenate([y_meta, x_meta])
 
+        # append pre-amble and post-amble to all sources
+        if preamble != 0.0:
+            # ensure that pre-mable is a multiple of 20ms
+            N_pre = int(floor(preamble * 50) / 50 * y.fs)
+            
+            # insert all-zero preamble to all sources
+            pre = np.zeros((N_pre, y.audio.shape[1]))
+            y.audio = np.concatenate([pre, y.audio])
+
+            # insert neutral position as a pre-amble to all sources
+            pre = np.tile(
+                [0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_pre, 1)
+            )   # !!!! TBD - check if we should insert netrual position or the first position of the metadata
+            y_meta = np.concatenate([pre, y_meta], axis=1)
+        
+        if postamble != 0.0:
+            # ensure that post-mable is a multiple of 20ms
+            N_post = int(floor(postamble * 50) / 50 * y.fs)
+            
+            # append all-zero postamble to all sources
+            post = np.zeros((N_post, y.audio.shape[1]))
+            y.audio = np.concatenate([y.audio, post])
+
+            # append neutral position as a post-amble to all sources
+            post = np.tile(
+                [0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_post, 1)
+            )   # !!!! TBD - check if we should insert netrual position or the last position of the metadata
+            y_meta = np.concatenate([y_meta, post], axis=1)
+            
+        # add random noise
+        if add_low_level_random_noise:
+            # create uniformly distributed noise between -4 and 4
+            np.random.seed(SEED_RANDOM_NOISE)
+            noise = np.random.randint(
+                low=-4, high=5, size=y.audio.shape
+            ).astype("float")
+            
+            # superimpose
+            y.audio += noise
+
         # write individual ISM audio streams to the output file in an interleaved format
         output_filename = scene["name"]
         audiofile.write(
-- 
GitLab


From 48039fc61c0295ff7e439005ff533defd0dc68dc Mon Sep 17 00:00:00 2001
From: Archit Tamarapu <archit.tamarapu@iis.fraunhofer.de>
Date: Thu, 11 May 2023 15:53:07 +0200
Subject: [PATCH 13/27] [cleanup] move item generation scripts into subfolder
 generation; see notes below

- created __init__.py and __main__.py for generation module
    !! now use python -m ivas_processing_scripts.generation !!
- moved reverb.py wrapper to main wrappers folder
- moved modified config.py to generation/config.py
- moved modified constants.py to generation/constants.py
- moved process_{ism,stereo}_items to generation/
- integrated modifications to audiotools.audiofile.py
---
 .../ISM1_CONFIG.yml                           |   0
 .../ISM2_CONFIG.yml                           |   0
 .../STEREO_CONFIG.yml                         |   0
 item_generation_scripts/audiotools/EFAP.py    | 922 ------------------
 .../audiotools/__init__.py                    | 286 ------
 .../audiotools/__main__.py                    |  36 -
 item_generation_scripts/audiotools/audio.py   | 428 --------
 .../audiotools/audioarray.py                  | 690 -------------
 .../audiotools/audiofile.py                   | 436 ---------
 .../BRIR_IISofficialMPEG222UC_FULL.mat        |   3 -
 .../BRIR_IISofficialMPEG222UC_LS.mat          |   3 -
 .../HRIR_ORANGE53_Dolby_SBA1.mat              |   3 -
 .../HRIR_ORANGE53_Dolby_SBA2.mat              |   3 -
 .../HRIR_ORANGE53_Dolby_SBA3.mat              |   3 -
 .../binaural_datasets/HRIR_ORANGE53_FULL.mat  |   3 -
 .../binaural_datasets/HRIR_ORANGE53_LS.mat    |   3 -
 .../audiotools/binaural_datasets/README.txt   |  34 -
 .../audiotools/binaural_datasets/__init__.py  |  31 -
 .../binaural_datasets/binaural_dataset.py     | 288 ------
 .../audiotools/binauralobjectrenderer.py      | 652 -------------
 .../audiotools/constants.py                   | 704 -------------
 .../audiotools/convert/__init__.py            | 323 ------
 .../audiotools/convert/binaural.py            | 108 --
 .../audiotools/convert/channelbased.py        | 390 --------
 .../audiotools/convert/masa.py                | 165 ----
 .../audiotools/convert/objectbased.py         | 352 -------
 .../audiotools/convert/scenebased.py          | 429 --------
 .../audiotools/metadata.py                    | 571 -----------
 .../audiotools/rotation.py                    | 379 -------
 item_generation_scripts/audiotools/utils.py   |  71 --
 .../audiotools/wrappers/__init__.py           |  31 -
 .../audiotools/wrappers/bs1770.py             | 291 ------
 .../audiotools/wrappers/eid_xor.py            | 193 ----
 .../audiotools/wrappers/esdru.py              | 130 ---
 .../audiotools/wrappers/filter.py             | 366 -------
 .../audiotools/wrappers/gen_patt.py           | 171 ----
 .../audiotools/wrappers/masaRenderer.py       | 117 ---
 .../audiotools/wrappers/networkSimulator.py   | 224 -----
 .../audiotools/wrappers/p50fbmnru.py          | 110 ---
 .../audiotools/wrappers/random_seed.py        |  92 --
 item_generation_scripts/binary_paths.yml      |  30 -
 .../processing/__init__.py                    |  31 -
 .../processing/preprocessing_2.py             | 155 ---
 .../processing/processing.py                  | 455 ---------
 item_generation_scripts/utils.py              | 297 ------
 .../audiotools/audiofile.py                   |   5 +-
 .../audiotools/wrappers/reverb.py             |  54 +-
 .../generation}/__init__.py                   |  12 +-
 .../generation}/__main__.py                   |   2 +-
 .../generation}/config.py                     |   4 +-
 .../generation}/constants.py                  |   6 +-
 .../generation}/process_ism_items.py          | 106 +-
 .../generation}/process_stereo_items.py       |  67 +-
 53 files changed, 147 insertions(+), 10118 deletions(-)
 rename {item_generation_scripts/config => item_gen_configs}/ISM1_CONFIG.yml (100%)
 rename {item_generation_scripts/config => item_gen_configs}/ISM2_CONFIG.yml (100%)
 rename {item_generation_scripts/config => item_gen_configs}/STEREO_CONFIG.yml (100%)
 delete mode 100644 item_generation_scripts/audiotools/EFAP.py
 delete mode 100644 item_generation_scripts/audiotools/__init__.py
 delete mode 100644 item_generation_scripts/audiotools/__main__.py
 delete mode 100644 item_generation_scripts/audiotools/audio.py
 delete mode 100644 item_generation_scripts/audiotools/audioarray.py
 delete mode 100644 item_generation_scripts/audiotools/audiofile.py
 delete mode 100644 item_generation_scripts/audiotools/binaural_datasets/BRIR_IISofficialMPEG222UC_FULL.mat
 delete mode 100644 item_generation_scripts/audiotools/binaural_datasets/BRIR_IISofficialMPEG222UC_LS.mat
 delete mode 100644 item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_Dolby_SBA1.mat
 delete mode 100644 item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_Dolby_SBA2.mat
 delete mode 100644 item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_Dolby_SBA3.mat
 delete mode 100644 item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_FULL.mat
 delete mode 100644 item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_LS.mat
 delete mode 100644 item_generation_scripts/audiotools/binaural_datasets/README.txt
 delete mode 100644 item_generation_scripts/audiotools/binaural_datasets/__init__.py
 delete mode 100644 item_generation_scripts/audiotools/binaural_datasets/binaural_dataset.py
 delete mode 100644 item_generation_scripts/audiotools/binauralobjectrenderer.py
 delete mode 100644 item_generation_scripts/audiotools/constants.py
 delete mode 100644 item_generation_scripts/audiotools/convert/__init__.py
 delete mode 100644 item_generation_scripts/audiotools/convert/binaural.py
 delete mode 100644 item_generation_scripts/audiotools/convert/channelbased.py
 delete mode 100644 item_generation_scripts/audiotools/convert/masa.py
 delete mode 100644 item_generation_scripts/audiotools/convert/objectbased.py
 delete mode 100644 item_generation_scripts/audiotools/convert/scenebased.py
 delete mode 100644 item_generation_scripts/audiotools/metadata.py
 delete mode 100644 item_generation_scripts/audiotools/rotation.py
 delete mode 100644 item_generation_scripts/audiotools/utils.py
 delete mode 100644 item_generation_scripts/audiotools/wrappers/__init__.py
 delete mode 100644 item_generation_scripts/audiotools/wrappers/bs1770.py
 delete mode 100644 item_generation_scripts/audiotools/wrappers/eid_xor.py
 delete mode 100644 item_generation_scripts/audiotools/wrappers/esdru.py
 delete mode 100644 item_generation_scripts/audiotools/wrappers/filter.py
 delete mode 100644 item_generation_scripts/audiotools/wrappers/gen_patt.py
 delete mode 100644 item_generation_scripts/audiotools/wrappers/masaRenderer.py
 delete mode 100644 item_generation_scripts/audiotools/wrappers/networkSimulator.py
 delete mode 100644 item_generation_scripts/audiotools/wrappers/p50fbmnru.py
 delete mode 100644 item_generation_scripts/audiotools/wrappers/random_seed.py
 delete mode 100644 item_generation_scripts/binary_paths.yml
 delete mode 100644 item_generation_scripts/processing/__init__.py
 delete mode 100644 item_generation_scripts/processing/preprocessing_2.py
 delete mode 100644 item_generation_scripts/processing/processing.py
 delete mode 100644 item_generation_scripts/utils.py
 rename {item_generation_scripts => ivas_processing_scripts}/audiotools/wrappers/reverb.py (90%)
 rename {item_generation_scripts => ivas_processing_scripts/generation}/__init__.py (90%)
 mode change 100644 => 100755
 rename {item_generation_scripts => ivas_processing_scripts/generation}/__main__.py (98%)
 mode change 100644 => 100755
 rename {item_generation_scripts/processing => ivas_processing_scripts/generation}/config.py (97%)
 rename {item_generation_scripts => ivas_processing_scripts/generation}/constants.py (95%)
 rename {item_generation_scripts/processing => ivas_processing_scripts/generation}/process_ism_items.py (86%)
 rename {item_generation_scripts/processing => ivas_processing_scripts/generation}/process_stereo_items.py (81%)

diff --git a/item_generation_scripts/config/ISM1_CONFIG.yml b/item_gen_configs/ISM1_CONFIG.yml
similarity index 100%
rename from item_generation_scripts/config/ISM1_CONFIG.yml
rename to item_gen_configs/ISM1_CONFIG.yml
diff --git a/item_generation_scripts/config/ISM2_CONFIG.yml b/item_gen_configs/ISM2_CONFIG.yml
similarity index 100%
rename from item_generation_scripts/config/ISM2_CONFIG.yml
rename to item_gen_configs/ISM2_CONFIG.yml
diff --git a/item_generation_scripts/config/STEREO_CONFIG.yml b/item_gen_configs/STEREO_CONFIG.yml
similarity index 100%
rename from item_generation_scripts/config/STEREO_CONFIG.yml
rename to item_gen_configs/STEREO_CONFIG.yml
diff --git a/item_generation_scripts/audiotools/EFAP.py b/item_generation_scripts/audiotools/EFAP.py
deleted file mode 100644
index b83d57e6..00000000
--- a/item_generation_scripts/audiotools/EFAP.py
+++ /dev/null
@@ -1,922 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
-
-import argparse
-from enum import Enum
-from itertools import combinations
-from pathlib import Path
-from typing import Optional, Tuple, Union
-
-import numpy as np
-
-
-def wrap_angles(
-    azi: float,
-    ele: float,
-    clip_ele: Optional[bool] = False,
-) -> Tuple[float, float]:
-    """
-    Wrap angles to (-180, 180] azimuth and [-90, 90] elevation
-    Takes into account hemisphere flips from large elevation changes unless clip_ele is specified
-    """
-    if clip_ele:
-        ele = min(max(ele, -90), 90)
-
-    if ele % 90 == 0 and ele % 180 != 0:
-        # if elevation is a multiple of 90, azimuth is irrelevant since we are at a pole
-        azi = 0
-        while np.abs(ele) > 90:
-            ele -= 360
-    else:
-        # wrap elevation value
-        while np.abs(ele) > 90:
-            # flip azimuth to other hemisphere
-            azi += 180
-
-            # compensate elevation accordingly
-            if ele > 90:
-                ele = 180 - ele
-            elif ele < -90:
-                ele = -180 - ele
-
-        # wrap azimuth value
-        while azi > 180:
-            azi -= 360
-        while azi <= -180:
-            azi += 360
-
-    return azi, ele
-
-
-class EfapDmxType(Enum):
-    NONE = 0
-    AMPLITUDE = 1
-    INTENSITY = 2
-
-
-class EfapVertex:
-    """
-    Vertex data structure for EFAP
-
-    Initialises a vertex from the given spherical coordinate pair,
-    with a flag specifying if it is a ghost loudspeaker
-
-    Parameters
-    ----------
-    azi : float
-        Azimuth of vertex
-    ele : float
-        Elevation of vertex
-    is_ghost : bool
-        Whether the vertex is a ghost, default is False
-    dmx_type : EfapDmxType
-        Downmix type for ghost vertices
-    """
-
-    def __init__(
-        self,
-        azi: float,
-        ele: float,
-        is_ghost: Optional[bool] = False,
-        dmx_type: Optional[EfapDmxType] = EfapDmxType.INTENSITY,
-    ):
-        self.azi, self.ele = wrap_angles(azi, ele)
-        self.pos = np.array(
-            [
-                np.cos(np.deg2rad(azi)) * np.cos(np.deg2rad(ele)),
-                np.sin(np.deg2rad(azi)) * np.cos(np.deg2rad(ele)),
-                np.sin(np.deg2rad(ele)),
-            ]
-        )
-
-        idx_azi = np.round(np.abs(90 - np.abs(self.azi)))
-        idx_ele = 90 - np.round(np.abs(self.ele))
-        self.index = (
-            idx_azi + 181 * idx_ele
-        )  # vertices on the median plane have lowest index
-
-        self.is_ghost = is_ghost
-        self.dmx_type = dmx_type
-
-    def __str__(self):
-        str_ = f"a{self.azi}e{self.ele}"
-        if self.is_ghost:
-            str_ += "*"
-        return str_
-
-    def __lt__(self, other):
-        return self.index < other.index
-
-
-class EFAP:
-    """
-    EFAP data structure
-
-    Initialise EFAP data for computing panning gains
-
-    Parameters
-    ----------
-    azimuths : np.ndarray
-        Azimuth positions of the loudspeaker array
-    elevations : np.ndarray
-        Elevation postions of the loudspeaker array
-    intensity_panning : bool
-        Whether intensity panning is enabled or not
-
-    Examples
-    --------
-    >>> from EFAP import EFAP
-    >>> panner = EFAP([30, -30, 0, 110, -110], [0, 0, 0, 0, 0], False)
-    >>> panner.pan(15, 45)
-    array([0.66742381, 0.19069252, 0.66742381, 0.19069252, 0.19069252])
-    """
-
-    _EFAP_HULL_TOL = 1e-4  # tolerance for a point to be added to the convex hull
-    _EFAP_MAX_AZI_GAP = 160  # maximum allowed angular gap in the middle layer
-    _EFAP_POLAR_ELE = 90  # elevation of north / south poles (zenith / nadir)
-    _EFAP_THRESH_COPLANAR = 1e-3  # tolerance for points to be considered coplanar
-    _EFAP_THRESH_MID_LAYER = 45  # elevation threshold for loudspeakers to be considered as in the middle layer
-    _EFAP_THRESH_POLES = 1e-6  # tolerance for a vertex to be considered polar
-    _EFAP_THRESH_TRI = 1e-10  # tolerance for a point to be inside a triangle
-
-    def __init__(
-        self,
-        azimuths: Union[list, np.ndarray],
-        elevations: Union[list, np.ndarray],
-        intensity_panning: Optional[bool] = False,
-    ):
-        # validation
-        azimuths = np.array(azimuths)
-        elevations = np.array(elevations)
-        if np.squeeze(azimuths).ndim > 1:
-            raise ValueError("Too many dimensions for loudspeaker azimuth array")
-        if np.squeeze(elevations).ndim > 1:
-            raise ValueError("Too many dimensions for loudspeaker elevations array")
-        if azimuths.shape != elevations.shape:
-            raise ValueError("Mismatch between loudspeaker azimuths and elevations")
-
-        # set EFIP flag
-        self.intensity_panning = intensity_panning
-
-        # initialise vertices and add ghost loudspeakers if needed
-        self.verts = np.array(
-            [EfapVertex(azi, ele) for azi, ele in zip(azimuths, elevations)]
-        )
-        self._add_ghost_speakers()
-
-        # formulate initial tetrahedron for the convex hull
-        self._init_simplex()
-
-        # add the remaining vertices to the convex hull in order of their index
-        for i in np.argsort(self.verts):
-            if self.verts[i] not in self.verts[self.tris]:
-                self._add_vertex_to_hull(i)
-
-        # compute downmix matrix with remapped ghost speakers
-        self._remap_ghost_speakers()
-
-        # set vertices near poles to have NaN azimuth
-        for v in self.verts:
-            if (
-                v.ele > self._EFAP_POLAR_ELE - self._EFAP_THRESH_POLES
-                or v.ele < self._EFAP_THRESH_POLES - self._EFAP_POLAR_ELE
-            ):
-                v.azi = np.nan
-
-        # combine triangles into polygons
-        self._tri2poly()
-
-    def _add_ghost_speakers(self) -> None:
-        """
-        Add ghost loudspeakers at the poles, or to fill large horizontal gaps
-        """
-        ele = [v.ele for v in self.verts]
-
-        dmx_type = EfapDmxType.INTENSITY
-
-        # add ghost loudspeakers at the poles if necessary
-        if max(ele) < self._EFAP_POLAR_ELE:
-            if self.intensity_panning:
-                if max(ele) > self._EFAP_THRESH_MID_LAYER:
-                    dmx_type = EfapDmxType.NONE
-                else:
-                    dmx_type = EfapDmxType.AMPLITUDE
-
-            self.verts = np.append(self.verts, EfapVertex(0, 90, True, dmx_type))
-
-        if min(ele) > -self._EFAP_POLAR_ELE:
-            if self.intensity_panning:
-                if min(ele) < -self._EFAP_THRESH_MID_LAYER:
-                    dmx_type = EfapDmxType.NONE
-                else:
-                    dmx_type = EfapDmxType.AMPLITUDE
-
-            self.verts = np.append(self.verts, EfapVertex(0, -90, True, dmx_type))
-
-        # check for large gaps in the middle horizontal layer
-        mid_spkrs = [
-            v.azi for v in self.verts if np.abs(v.ele) < self._EFAP_THRESH_MID_LAYER
-        ]
-
-        # no speakers in middle layer; add a triangle of ghost speakers
-        if not mid_spkrs:
-            self.verts = np.append(
-                self.verts,
-                [
-                    EfapVertex(0, 0, True),
-                    EfapVertex(180, 0, True),
-                    EfapVertex(240, 0, True),
-                ],
-            )
-        # only one speaker in the threshold; add two ghost speakers to form a triangle
-        elif len(mid_spkrs) == 1:
-            self.verts = np.append(
-                self.verts,
-                [
-                    EfapVertex(mid_spkrs[0] + 120, 0, True),
-                    EfapVertex(mid_spkrs[0] + 240, 0, True),
-                ],
-            )
-        # search for and fill gaps greater than MAX_AZI_GAP
-        else:
-            mid_spkrs = np.sort(mid_spkrs)
-            angle_diff = np.diff(np.concatenate([mid_spkrs, [mid_spkrs[0] + 360]]))
-            sectors = np.ceil(angle_diff / self._EFAP_MAX_AZI_GAP)
-
-            for i, s in enumerate(sectors):
-                if s > 1:
-                    new_diff = angle_diff[i] / s
-                    num_new = s - 1
-                    for k in range(int(num_new)):
-                        new_azi = mid_spkrs[i] + (k + 1) * new_diff
-                        self.verts = np.append(self.verts, EfapVertex(new_azi, 0, True))
-
-    def _init_simplex(self) -> None:
-        """
-        Create an initial tetrahedron / simplex for the convex hull from 4 vertices
-        """
-        # take the first vertex as seed
-        t = [0]
-
-        # attempt to form an edge with non-zero length
-        for i, v in enumerate(self.verts):
-            if (
-                v.azi != self.verts[t[0]].azi or v.ele != self.verts[t[0]].ele
-            ) and i not in t:
-                t.append(i)
-                break
-        else:
-            raise ValueError("Vertices are conincident!")
-
-        # attempt to form a triangle with non-zero area
-        for i, v in enumerate(self.verts):
-            if (
-                np.linalg.norm(
-                    np.cross(
-                        self.verts[t[1]].pos - self.verts[t[0]].pos,
-                        v.pos - self.verts[t[0]].pos,
-                    ),
-                    2,
-                )
-                > self._EFAP_HULL_TOL
-                and i not in t
-            ):
-                t.append(i)
-                break
-        else:
-            raise ValueError("Vertices are colinear!")
-
-        # attempt to form a tetrahedron with non-zero volume
-        for i, v in enumerate(self.verts):
-            if (
-                np.abs(
-                    np.dot(
-                        np.cross(
-                            self.verts[t[1]].pos - self.verts[t[0]].pos,
-                            self.verts[t[2]].pos - self.verts[t[0]].pos,
-                        ),
-                        v.pos - self.verts[t[0]].pos,
-                    )
-                )
-            ) > self._EFAP_HULL_TOL and i not in t:
-                t.append(i)
-                break
-        else:
-            raise ValueError("Vertices are coplanar!")
-
-        # create a list of the triangles of the initial simplex / tetrahedron
-        t = np.array(t)
-        self.tris = np.array([t[[0, 1, 2]], t[[0, 1, 3]], t[[0, 2, 3]], t[[1, 2, 3]]])
-
-        # orient the triangle surface planes outwards from the centroid
-        self.centroid = np.mean([self.verts[i].pos for i in t], axis=0)
-        for i, tri in enumerate(self.tris):
-            self.tris[i, :] = self._flip_plane(tri)
-
-    def _add_vertex_to_hull(self, idx_new_vert: int) -> None:
-        """
-        Add a vertex to the convex hull and update the list of triangles in the hull
-        """
-        # compute the centroid of the current convex hull
-        self.centroid = np.mean(
-            [self.verts[i].pos for i in np.unique(self.tris)], axis=0
-        )
-
-        tris_new = []
-        visible = []
-
-        # find which hull surfaces are visible from the new vertex
-        for i, tri in enumerate(self.tris):
-            if self._vertex_dist(tri, idx_new_vert) > -1e-6:
-                visible.append(i)
-            else:
-                tris_new.append(tri)
-
-        tris_new = np.array(tris_new)
-        visible = np.array(visible, dtype=int)
-
-        # find edges of the visible hull surfaces
-        max_vert = np.amax(self.tris[visible]) + 1
-        counter = np.zeros([max_vert, max_vert])
-        for i, tri in enumerate(self.tris[visible]):
-            surface = np.append(tri, tri[0])
-            for n in range(3):
-                a = surface[n]
-                b = surface[n + 1]
-                counter[a, b] = counter[a, b] + 1
-
-        counter += counter.T
-
-        edges = []
-        for a in range(max_vert - 1):
-            for b in range(a + 1, max_vert):
-                if counter[a, b] == 1:
-                    edges.append([a, b])
-        edges = np.vstack(edges)
-
-        # break the edges visible from the new vertex and add the new triangle
-        for e in edges:
-            tris_new = np.vstack(
-                [tris_new, self._flip_plane(np.append(e, idx_new_vert))]
-            )
-
-        # update the list of triangles in the convex hull
-        self.tris = tris_new
-
-    def _remap_ghost_speakers(self) -> None:
-        """
-        Remove unused ghost speakers and compute a downmix matrix for the rest
-        """
-        # find ghosts that are not part of the convex hull
-        ghosts = [i for i, v in enumerate(self.verts) if v.is_ghost]
-        unused_ghosts = np.compress(
-            np.isin(ghosts, np.unique(self.tris), invert=True), ghosts
-        )
-
-        if unused_ghosts.size > 0:
-            # remove the unused ghosts from the triangle array and also adjust indices
-            self.tris[self.tris > unused_ghosts.min()] -= unused_ghosts.size
-            # delete them from the vertex array
-            self.verts = np.delete(self.verts, unused_ghosts)
-
-        # generate initial sound energy distribution matrix
-        n_vtx = len(self.verts)
-        n_ghost = len(ghosts) - len(unused_ghosts)
-
-        M = np.eye(n_vtx)
-        for i, v in enumerate(self.verts):
-            if v.is_ghost:
-                neighbours = self._get_neighbours(i)
-                M[:, i] = np.zeros(n_vtx)
-                M[neighbours, i] = np.ones(len(neighbours)) / len(neighbours)
-
-        # re-distribute sound energy from ghosts
-        M2 = M.copy()
-        for i, v in enumerate(self.verts):
-            if v.is_ghost:
-                vec = M[:, i]
-                while np.sum(vec[-n_ghost:]) > 1e-4:
-                    vec = M @ vec
-                M2[:, i] = vec
-
-        self.dmx_mat = M2[:-n_ghost, :]
-
-        # amplitude downmix for real loudspeakers
-        self.dmx_mat[:, :-n_ghost] = np.sqrt(self.dmx_mat[:, :-n_ghost])
-
-        # distribute ghosts according to downmix type
-        for i, v in enumerate(self.verts):
-            if v.is_ghost:
-                if v.dmx_type == EfapDmxType.NONE:
-                    self.dmx_mat[:, i] = 0
-                elif v.dmx_type == EfapDmxType.AMPLITUDE:
-                    pass
-                else:
-                    self.dmx_mat[:, i] = np.sqrt(self.dmx_mat[:, i])
-
-    def _tri2poly(self) -> None:
-        """
-        Merge hull triangles into polygons if they are coplanar
-        """
-        polys = []
-
-        for tri in self.tris:
-            # find all vertices coplanar with this triangle (including those already in the triangle)
-            new_poly = np.array(
-                [
-                    i
-                    for i, _ in enumerate(self.verts)
-                    if np.abs(self._vertex_dist(tri, i)) < self._EFAP_THRESH_COPLANAR
-                ]
-            )
-
-            # check if we already found this polygon as a complete subset
-            is_subset = [
-                i for i, poly in enumerate(polys) if np.all(np.isin(new_poly, poly))
-            ]
-            is_superset = [
-                i for i, poly in enumerate(polys) if np.all(np.isin(poly, new_poly))
-            ]
-
-            if is_subset:
-                continue
-            elif is_superset:
-                # remove the other polygon since it will be replaced by the superset polygon
-                polys_new = [p for i, p in enumerate(polys) if i not in is_superset]
-                polys = polys_new
-
-            # orient the polygon plane in the same direction as the triangle
-            P1 = self.verts[tri[0]].pos
-            P2 = self.verts[tri[1]].pos
-            P3 = self.verts[tri[2]].pos
-
-            # first base vector
-            U = P2 - P1
-            U = U / np.linalg.norm(U)
-
-            # second base vector
-            V = P3 - P2
-            V = V - np.dot(U, V) * U
-            V = V / np.linalg.norm(V)
-
-            # center of the first triangle
-            M = np.mean([P1, P2, P3], axis=0)
-
-            # sort vertices
-            azi = np.zeros_like(new_poly, dtype=float)
-            for i, idx_v in enumerate(new_poly):
-                P = self.verts[idx_v].pos - M
-                X = np.dot(P, U)
-                Y = np.dot(P, V)
-                azi[i] = np.arctan2(Y, X)
-
-            idx = np.argsort(azi)
-            new_poly = new_poly[idx]
-
-            # add the polygon to the main list
-            polys.append(new_poly)
-
-        self.polys = polys
-
-    def _pan_EFAP_poly(
-        self, azimuth: float, elevation: float, poly: np.ndarray, mod: int
-    ) -> np.ndarray:
-        """
-        Compute panning gains for each vertex in the given polygon
-
-        Parameters
-        ----------
-        azimuth : float
-            Azimuth of requested panning position
-        elevation : float
-            Elevation of requested panning position
-        poly : np.ndarray
-            Array of vertices defining the polygon
-
-        Returns
-        -------
-        poly_gain: np.ndarray
-            Gains for each vertex in the polygon
-        """
-        poly_gain = np.zeros_like(poly, dtype=float)
-
-        P = np.array([azimuth, elevation])
-        # search for the triangle of the polygon in which P belongs
-        for i in range(1, poly.size + 1):
-            A = np.array([self.verts[poly[i - 1]].azi, self.verts[poly[i - 1]].ele])
-            for j in range(i, poly.size - 2 + i):
-                idx1 = 1 + (j % poly.size)
-                idx2 = 1 + (idx1 % poly.size)
-                B = np.array(
-                    [self.verts[poly[idx1 - 1]].azi, self.verts[poly[idx1 - 1]].ele]
-                )
-                C = np.array(
-                    [self.verts[poly[idx2 - 1]].azi, self.verts[poly[idx2 - 1]].ele]
-                )
-
-                if mod:
-                    if not np.isnan(A[0]):
-                        A[0] %= mod
-                    if not np.isnan(B[0]):
-                        B[0] %= mod
-                    if not np.isnan(C[0]):
-                        C[0] %= mod
-
-                if self._in_triangle(P, A, B, C):
-                    N = np.transpose([B[1] - C[1], C[0] - B[0]])
-                    N = N / np.dot(N, B - A)
-                    poly_gain[i - 1] = 1 - np.dot(P - A, N)
-
-        """ DEBUGGING / TODO """
-        # set gains <= -60dB to 0
-        poly_gain[np.abs(poly_gain) < 1e-6] = 0
-
-        return poly_gain
-
-    """ geometric / math helper functions """
-
-    def _get_neighbours(self, idx_vert: int) -> np.ndarray:
-        """
-        Find triangles containing the given vertex index (neighbouring vertices)
-        """
-        n = self.tris[np.any(np.isin(self.tris, idx_vert), axis=1)]
-        return np.unique(n[n != idx_vert])
-
-    def _get_azi_ele(self, idx_vert: int) -> Tuple[float, float]:
-        """
-        Return a tuple of (azi, ele) for a vertex at the given index
-        """
-        return self.verts[idx_vert].azi, self.verts[idx_vert].ele
-
-    def _in_polygon(
-        self, azimuth: float, elevation: float, poly: np.ndarray
-    ) -> Tuple[bool, int]:
-        """
-        Determine whether the panning position lies within the given polygon
-        by iteratively checking its triangles
-
-        Parameters
-        ----------
-        azimuth : float
-            Azimuth of requested panning position
-        elevation : float
-            Elevation of requested panning position
-        poly : np.ndarray
-            Array of vertices defining the polygon
-
-        Returns
-        -------
-        in_polygon, mod: Tuple[bool, int]
-            Flag indicating whether the point is inside the given polygon
-            Value of wrapping required if used
-        """
-        azi = [self.verts[v].azi for v in poly]
-
-        P = np.array([azimuth, elevation])
-
-        for tri in combinations(poly, 3):
-            A = np.array(self._get_azi_ele(tri[0]))
-            B = np.array(self._get_azi_ele(tri[1]))
-            C = np.array(self._get_azi_ele(tri[2]))
-            if self._in_triangle(P, A, B, C):
-                return True, None
-
-        # if the azimuth difference is large, perform the 2D check again with azimuths wrapped to (-360, 0] and [0, 360)
-        if np.nanmax(azi) - np.nanmin(azi) > 180:
-            for tri in combinations(poly, 3):
-                A = np.array(self._get_azi_ele(tri[0]))
-                B = np.array(self._get_azi_ele(tri[1]))
-                C = np.array(self._get_azi_ele(tri[2]))
-                if not np.isnan(A[0]):
-                    A[0] %= 360
-                if not np.isnan(B[0]):
-                    B[0] %= 360
-                if not np.isnan(C[0]):
-                    C[0] %= 360
-                if self._in_triangle(P, A, B, C):
-                    return True, 360
-
-            for tri in combinations(poly, 3):
-                A = np.array(self._get_azi_ele(tri[0]))
-                B = np.array(self._get_azi_ele(tri[1]))
-                C = np.array(self._get_azi_ele(tri[2]))
-                if not np.isnan(A[0]):
-                    A[0] %= -360
-                if not np.isnan(B[0]):
-                    B[0] %= -360
-                if not np.isnan(C[0]):
-                    C[0] %= -360
-                if self._in_triangle(P, A, B, C):
-                    return True, -360
-
-        return False, None
-
-    def _in_triangle(
-        self, P: np.ndarray, A: np.ndarray, B: np.ndarray, C: np.ndarray
-    ) -> bool:
-        """
-        Determine whether the panning position lies within the given triangle
-
-        Parameters
-        ----------
-        P : float
-            Point under test
-        A : float
-            First vertex of the triangle
-        B : float
-            Second vertex of the triangle
-        C : float
-            Third vertex of the triangle
-
-        Returns
-        -------
-        bool
-            Flag indicating whether the point is inside the given triangle
-        """
-        if np.isnan(A[0]):
-            A[0] = P[0]
-
-        if np.isnan(B[0]):
-            B[0] = P[0]
-
-        if np.isnan(C[0]):
-            C[0] = P[0]
-
-        tmpMat = np.transpose([B - A, C - A])
-        if (1 / np.linalg.cond(tmpMat)) < self._EFAP_THRESH_TRI:
-            return False
-
-        Minv = np.linalg.inv(tmpMat)
-        S = Minv @ (P - A)
-
-        if (
-            S[0] < -self._EFAP_THRESH_TRI
-            or S[1] < -self._EFAP_THRESH_TRI
-            or S[0] + S[1] > 1 + self._EFAP_THRESH_TRI
-        ):
-            return False
-
-        return True
-
-    def _vertex_dist(self, surface: np.ndarray, idx_vert: int) -> float:
-        """
-        Compute the distance of a vertex from a given plane
-
-        Parameters
-        ----------
-        surface : np.ndarray
-            Array of 3 ordered vertices defining the plane and its orientation
-        idx_vert: int
-            Index of the vertex to compute the distance for
-
-        Returns
-        -------
-        float
-            Distance of the vertex from the given plane
-        """
-        return self._point_plane_dist(
-            self.verts[surface[0]].pos,
-            self.verts[surface[1]].pos,
-            self.verts[surface[2]].pos,
-            self.verts[idx_vert].pos,
-        )
-
-    def _point_plane_dist(
-        self, P1: np.ndarray, P2: np.ndarray, P3: np.ndarray, X: np.ndarray
-    ) -> float:
-        """
-        Compute the distance of a vertex from a plane defined by three points
-
-        Parameters
-        ----------
-        P1 : np.ndarray
-            Cartesian coordinates of the first point
-        P2 : np.ndarray
-            Cartesian coordinates of the second point
-        P3 : np.ndarray
-            Cartesian coordinates of the third point
-        X: np.ndarray
-            Cartesian coordinates of the vertex
-
-        Returns
-        -------
-        float
-            Distance of the vertex from the given plane
-        """
-
-        if np.all(X == P1) or np.all(X == P2) or np.all(X == P3):
-            return 0
-        else:
-            N = np.cross(P1 - P2, P1 - P3)
-            eps = np.finfo(float).eps
-            return np.dot(X - P1, N / (np.linalg.norm(N) + eps))
-
-    def _flip_plane(self, surface: np.ndarray) -> np.ndarray:
-        """
-        Flip the orientation of a plane (invert normal vector)
-
-        Parameters
-        ----------
-        surface : np.ndarray
-            Array of 3 ordered vertices defining the plane and its orientation
-
-        Returns
-        -------
-        surface : np.ndarray
-            Reordered vertices with plane normal pointing outwards from the hull centroid
-        """
-        if (
-            self._point_plane_dist(
-                self.verts[surface[0]].pos,
-                self.verts[surface[1]].pos,
-                self.verts[surface[2]].pos,
-                self.centroid,
-            )
-            > 0
-        ):
-            surface = np.flip(surface.copy())
-
-        return surface
-
-    def _compute_gains_point(self, azimuth: float, elevation: float) -> np.ndarray:
-        """
-        Compute gains for the requested panning position
-
-        Parameters
-        ----------
-        azimuth : float
-            Azimuth of requested panning position
-        elevation : float
-            Elevation of requested panning position
-
-        Returns
-        -------
-        gains: np.ndarray
-            Panning gains for the loudspeaker layout
-        """
-        if np.isnan(azimuth) or np.isnan(elevation):
-            raise ValueError(f"Angles cannot be NaNs : ({azimuth}, {elevation})")
-
-        azimuth, elevation = wrap_angles(azimuth, elevation)
-        point_pos = [
-            np.cos(np.deg2rad(azimuth)) * np.cos(np.deg2rad(elevation)),
-            np.sin(np.deg2rad(azimuth)) * np.cos(np.deg2rad(elevation)),
-            np.sin(np.deg2rad(elevation)),
-        ]
-
-        # filter the polygon list with a quick 2d check
-        found_polys = []
-        for poly in self.polys:
-            in_poly, mod = self._in_polygon(azimuth, elevation, poly)
-            if in_poly:
-                found_polys.append((poly, mod))
-
-        if not found_polys:
-            raise AssertionError("Unexpected error during panning")
-
-        # find a visible polygon with the smallest distance
-        dist = []
-
-        for poly, mod in found_polys:
-            surface = self.verts[poly]
-            d = self._point_plane_dist(
-                surface[0].pos,
-                surface[1].pos,
-                surface[2].pos,
-                point_pos,
-            )
-            if d >= 0:
-                dist.append(d)
-            else:
-                dist.append(np.inf)
-
-        found_poly, mod = found_polys[np.argmin(dist)]
-
-        # compute gains for the polygon vertices
-        poly_gain = self._pan_EFAP_poly(azimuth, elevation, found_poly, mod)
-
-        # downmix ghost loudspeakers
-        gains = np.zeros(self.verts.size)
-        gains[found_poly] = poly_gain / np.linalg.norm(poly_gain)
-        gains = gains @ self.dmx_mat.T
-        gains = gains / np.linalg.norm(gains)
-
-        if self.intensity_panning:
-            gains = np.sqrt(gains / np.sum(gains))
-
-        return gains
-
-    """ public functions """
-
-    def pan(
-        self,
-        azimuths: float,
-        elevations: float,
-        intensity_panning: Optional[bool] = False,
-    ) -> np.ndarray:
-        """
-        Compute gains for the requested panning position
-
-        Parameters
-        ----------
-        azimuths : float
-            Azimuth of requested panning position
-        elevations : float
-            Elevation of requested panning position
-        intensity_panning : bool
-            Flag whether to use intensity panning (Default is False == amplitude panning)
-
-        Returns
-        -------
-        gains: np.ndarray
-            Panning gains for the loudspeaker layout
-        """
-        azimuths = np.array(azimuths)
-        elevations = np.array(elevations)
-        if azimuths.size == 1 and elevations.size == 1:
-            return self._compute_gains_point(azimuths, elevations)
-        elif np.squeeze(azimuths).ndim == 1 and np.squeeze(elevations).ndim == 1:
-            gains = []
-            for a, e in zip(azimuths, elevations):
-                gains.append(self._compute_gains_point(a, e))
-            return np.vstack(gains)
-        else:
-            raise ValueError(
-                "Azimuth and Elevation arrays cannot have more than one dimension and must be of equal size"
-            )
-
-
-def main(args):
-    """
-    Parses a speaker layout text file and prints the panning gains
-    for the requested position
-
-    Parameters
-    ----------
-    args : Namespace
-        Command line arguments
-    """
-
-    speaker_positions = np.loadtxt(Path(args.input), delimiter=",", max_rows=2)
-    panner = EFAP(speaker_positions[0, :], speaker_positions[1, :], args.efip)
-    print(panner.pan(args.azimuth, args.elevation))
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Edge-Fading Amplitude Panning")
-    parser.add_argument(
-        "-i",
-        "--input",
-        metavar="layout_file",
-        required=True,
-        type=str,
-        help="IVAS compatible loudspeaker layout file (Loudspeaker azimuths in first line, elevations in second, subsequent lines are ignored)",
-    )
-    parser.add_argument(
-        "-efip",
-        "-intensity_panning",
-        default=False,
-        action="store_true",
-        help="Intensity panning mode (EFIP)",
-    )
-    parser.add_argument(
-        "azimuth",
-        type=float,
-        help="Azimuth of direction to compute panning gains for (positive-left)",
-    )
-    parser.add_argument(
-        "elevation",
-        type=float,
-        help="Elevation of direction to compute panning gains for (positive-up)",
-    )
-    args = parser.parse_args()
-    main(args)
diff --git a/item_generation_scripts/audiotools/__init__.py b/item_generation_scripts/audiotools/__init__.py
deleted file mode 100644
index effc5a25..00000000
--- a/item_generation_scripts/audiotools/__init__.py
+++ /dev/null
@@ -1,286 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
-
-import argparse
-from itertools import repeat
-from pathlib import Path
-
-from item_generation_scripts.audiotools.constants import AUDIO_FORMATS
-from item_generation_scripts.audiotools.convert import convert_file
-from item_generation_scripts.utils import apply_func_parallel
-
-
-def add_processing_args(group, input=True):
-    # set up prefixes to avoid argument collision
-    if input:
-        p = "in"
-        ps = "i"
-    else:
-        p = "out"
-        ps = "o"
-
-    group.add_argument(
-        f"-{ps}",
-        f"--{p}",
-        dest=f"{p}put",
-        required=True,
-        type=Path,
-        help="Path to *.{wav, pcm, raw} file or directory",
-    )
-    group.add_argument(
-        f"-{ps}f",
-        f"--{p}_fmt",
-        required=input,
-        type=str,
-        help="Audio format (use -l, --list for a list / -L, --long for a detailed list)",
-        default=None,
-    )
-    group.add_argument(
-        f"-{ps}s",
-        f"--{p}_fs",
-        type=int,
-        help="Sampling rate (Hz) (deduced for .wav input, same as input if output not specified, default = %(default)s)",
-        default=48000,
-    )
-    group.add_argument(
-        f"-{ps}fc",
-        f"--{p}_cutoff",
-        type=int,
-        help="Cut-off frequency for low-pass filtering (default = %(default)s)",
-        default=None,
-    )
-    group.add_argument(
-        f"-{ps}hp",
-        f"--{p}_hp50",
-        help="Apply 50 Hz high-pass filtering (default = %(default)s)",
-        action="store_true",
-    )
-    group.add_argument(
-        f"-{ps}w",
-        f"--{p}_window",
-        type=float,
-        help="Window the start/end of the signal by this amount in milliseconds (default = %(default)s)",
-        default=None,
-    )
-    group.add_argument(
-        f"-{ps}t",
-        f"--{p}_trim",
-        type=float,
-        nargs=2,
-        metavar=("PRE_TRIM", "POST_TRIM"),
-        help="Pre-/post-trim the signal by this amount in milliseconds (negative values pad silence), (default = %(default)s)",
-    )
-    group.add_argument(
-        f"-{ps}pn",
-        f"--{p}_pad_noise",
-        help="Flag for padding with noise instead of zeros",
-        action="store_true",
-    )
-    group.add_argument(
-        f"-{ps}d",
-        f"--{p}_delay",
-        type=float,
-        help="Delay the signal by this amount in milliseconds (negative values advance, default = %(default)s)",
-        default=None,
-    )
-    group.add_argument(
-        f"-{ps}l",
-        f"--{p}_loudness",
-        type=float,
-        help="Normalize to given loudness with BS 1770-4 (default = %(default)s)",
-        default=None,
-    )
-    group.add_argument(
-        f"-{ps}nf",
-        f"--{p}_loudness_fmt",
-        type=str,
-        help=f"Format used for loudness computation (only valid with with -{ps}l/--{p}_loudness, default = {p.upper()}_FMT)",
-        default=None,
-    )
-
-
-def get_args():
-    parser = argparse.ArgumentParser(
-        description="Audiotools: Convert/Manipulate spatial audio files."
-    )
-
-    """ Input file arguments """
-    input_parser = parser.add_argument_group("Input (pre-) processing options")
-
-    # add common arguments
-    add_processing_args(input_parser)
-
-    # input only arguments
-    input_parser.add_argument(
-        "-im",
-        "--in_meta",
-        type=str,
-        nargs="+",
-        help="list of input metadata files (only relevant for ISM and MASA input)",
-        default=None,
-    )
-
-    """ Output file arguments """
-    output_parser = parser.add_argument_group("Output (post-) processing options")
-
-    # add common arguments
-    add_processing_args(output_parser, False)
-
-    # output only arguments
-    output_parser.add_argument(
-        "-lm",
-        "--limit",
-        help="Apply limiting to output (default = %(default)s)",
-        action="store_true",
-    )
-    output_parser.add_argument(
-        "-t",
-        "--trajectory",
-        type=str,
-        help="Head-tracking trajectory file for binaural output (default = %(default)s)",
-        default=None,
-    )
-    output_parser.add_argument(
-        "-bd",
-        "--bin_dataset",
-        type=str,
-        help="Use a custom binaural dataset (see README.md and audiotools/binaural_datasets/README.txt for further information)",
-        default=None,
-    )
-    output_parser.add_argument(
-        "-bl",
-        "--bin_lfe_gain",
-        type=float,
-        help="Render LFE to binaural output with the specified gain (only valid for channel-based input, default = %(default)s)",
-        default=None,
-    )
-    output_parser.add_argument(
-        "-mnru",
-        "--mnru_q",
-        type=float,
-        help="Flag for MNRU processing",
-        default=None,
-    )
-    output_parser.add_argument(
-        "-esdru",
-        "--esdru_alpha",
-        type=float,
-        help="Flag for ESDRU processing",
-        default=None,
-    )
-
-    misc_parser = parser.add_argument_group("General options")
-
-    """ Miscellaneous or meta arguments """
-    misc_parser.add_argument(
-        "-l",
-        "--list",
-        help="list all supported audio formats and exit",
-        action="store_true",
-    )
-    misc_parser.add_argument(
-        "-L",
-        "--long",
-        help="list all supported audio formats with long description and exit",
-        action="store_true",
-    )
-    misc_parser.add_argument(
-        "-mp",
-        "--multiprocessing",
-        help="Enable multiprocessing (default = %(default)s)",
-        action="store_true",
-    )
-
-    return parser.parse_args()
-
-
-def main():
-    args = get_args()
-
-    if args.list is True or args.long is True:
-        for fmt in AUDIO_FORMATS:
-            if args.long:
-                for f, d in fmt.items():
-                    print(f)
-                    [print(f"\t{k}: {v}", end=None) for k, v in d.items()]
-            else:
-                print(", ".join(fmt.keys()))
-        exit()
-
-    elif args.input is not None:
-        if not args.out_fs:
-            args.out_fs = args.in_fs
-
-        if not args.out_fmt:
-            args.out_fmt = args.in_fmt
-
-        if not args.out_loudness_fmt:
-            args.out_loudness_fmt = args.out_fmt
-
-        # List input files
-        args.input = Path(args.input)
-        in_files = []
-        if args.input.exists():
-            if args.input.is_dir():
-                in_files.extend(args.input.glob("*.wav"))
-                in_files.extend(args.input.glob("*.pcm"))
-                in_files.extend(args.input.glob("*.raw"))
-            else:
-                in_files = [args.input]
-        else:
-            raise ValueError(f"Input path {args.input} does not exist!")
-
-        if len(in_files) == 0:
-            raise ValueError(f"Input directory {args.input} empty!")
-
-        # Create output directory
-        args.output = Path(args.output)
-
-        if len(in_files) == 1 and args.input.is_file():
-            out_files = [args.output]
-        else:
-            args.output.mkdir(exist_ok=True)
-            out_files = [args.output.joinpath(i.name) for i in in_files]
-
-        # Multiprocessing
-        enable_multiprocessing = args.multiprocessing
-
-        # Remove unneeded keys to avoid passing to convert_file()
-        for k in ["list", "long", "multiprocessing", "input", "output"]:
-            args.__dict__.pop(k)
-
-        apply_func_parallel(
-            convert_file,
-            zip(in_files, out_files),
-            repeat(args.__dict__),
-            "mp" if enable_multiprocessing else None,
-        )
diff --git a/item_generation_scripts/audiotools/__main__.py b/item_generation_scripts/audiotools/__main__.py
deleted file mode 100644
index 9bdf64cd..00000000
--- a/item_generation_scripts/audiotools/__main__.py
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
-
-from item_generation_scripts.audiotools import main
-
-if __name__ == "__main__":
-    main()
diff --git a/item_generation_scripts/audiotools/audio.py b/item_generation_scripts/audiotools/audio.py
deleted file mode 100644
index 1804f5dd..00000000
--- a/item_generation_scripts/audiotools/audio.py
+++ /dev/null
@@ -1,428 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
-
-import warnings
-from abc import ABC, abstractmethod
-from pathlib import Path
-from typing import Optional, Union
-
-import numpy as np
-
-from item_generation_scripts.audiotools.audiofile import read
-from item_generation_scripts.audiotools.constants import (
-    BINAURAL_AUDIO_FORMATS,
-    CHANNEL_BASED_AUDIO_ALTNAMES,
-    CHANNEL_BASED_AUDIO_FORMATS,
-    IVAS_FRAME_LEN_MS,
-    METADATA_ASSISTED_SPATIAL_AUDIO_FORMATS,
-    OBJECT_BASED_AUDIO_FORMATS,
-    SCENE_BASED_AUDIO_FORMATS,
-)
-
-from .EFAP import wrap_angles
-
-
-class Audio(ABC):
-    """Base class for audio data"""
-
-    def __init__(self, name: str):
-        self.name = name.upper()
-        self.audio = None
-        self.fs = None
-        self.num_channels = None
-        # self.logger = None # TODO needed?
-
-    def __repr__(self):
-        return f"{self.__class__} : {self.__dict__}"
-
-    @classmethod
-    @abstractmethod
-    def _from_file(cls, name: str, filename: Path, fs: Optional[int] = None) -> "Audio":
-        """Create an Audio object from a file"""
-        out_audio = cls(name)
-
-        filename = Path(filename)
-        if filename.suffix in [".pcm", ".raw"]:
-            if fs is None:
-                raise ValueError(
-                    "Sampling rate must be specified for headerless files!"
-                )
-            out_audio.audio, out_audio.fs = read(filename, out_audio.num_channels, fs)
-        elif filename.suffix == ".wav":
-            out_audio.audio, out_audio.fs = read(filename)
-        else:
-            raise NotImplementedError(f"Filetype {filename.suffix} is unsupported!")
-
-        return out_audio
-
-    @classmethod
-    @abstractmethod
-    def _from_filelist(
-        cls, name, files: list[Path], fs: Optional[int] = None
-    ) -> "Audio":
-        """Create an Audio object from a list of files with channels"""
-        out_audio = cls(name)
-
-        for f in files:
-            f = Path(f)
-
-            if f.suffix in [".pcm", ".raw"]:
-                if fs is None:
-                    raise ValueError(
-                        "Sampling rate must be specified for headerless files!"
-                    )
-                channel, fs = read(f, out_audio.num_channels, fs)
-            elif f.suffix == ".wav":
-                channel, fs = read(f)
-            else:
-                raise NotImplementedError(f"Filetype {f.suffix} is unsupported!")
-
-            if out_audio.audio is None:
-                out_audio.audio = channel
-                out_audio.fs = fs
-            else:
-                if fs != out_audio.fs:
-                    raise ValueError(
-                        f"Sampling rate mismatch between input audio files, expected {out_audio.fs}, encountered {fs} for {f}!"
-                    )
-
-                if channel.shape[0] > out_audio.audio.shape[0]:
-                    channel = channel[: out_audio.audio.shape[0], :]
-                elif channel.shape[0] < out_audio.audio.shape[0]:
-                    out_audio.audio = out_audio.audio[: channel.shape[0], :]
-                out_audio.audio = np.column_stack([out_audio.audio, channel])
-
-        return out_audio
-
-    def apply(self, func, **kwargs) -> None:
-        """Apply a function to the audio array"""
-        self.audio = func(self.audio, self.fs, **kwargs)
-
-
-class BinauralAudio(Audio):
-    """Sub-class for binaural audio"""
-
-    def __init__(self, name: str):
-        super().__init__(name)
-        try:
-            self.__dict__.update(BINAURAL_AUDIO_FORMATS[name.upper()])
-        except KeyError:
-            raise ValueError(f"Unsupported binaural audio format {name}")
-
-    @classmethod
-    def _from_file(
-        cls, name: str, filename: Path, fs: Optional[int] = None
-    ) -> "BinauralAudio":
-        return super()._from_file(name, filename, fs)
-
-    @classmethod
-    def _from_filelist(
-        cls, name: str, filename: Path, fs: Optional[int] = None
-    ) -> "BinauralAudio":
-        return super()._from_filelist(name, filename, fs)
-
-
-class ChannelBasedAudio(Audio):
-    """Sub-class for channel-based audio"""
-
-    def __init__(self, name: str):
-        if Path(name).exists() and Path(name).suffix == ".txt":
-            self.parse_custom_layout(name)
-        else:
-            # remap configuration name to internal naming
-            if name.upper() in CHANNEL_BASED_AUDIO_ALTNAMES.keys():
-                name = CHANNEL_BASED_AUDIO_ALTNAMES[name.upper()]
-
-            super().__init__(name)
-            try:
-                self.__dict__.update(CHANNEL_BASED_AUDIO_FORMATS[name.upper()])
-            except KeyError:
-                raise ValueError(f"Unsupported channel-based audio format {name}")
-
-        self.is_planar = np.all([e == 0 for e in self.ls_ele])
-
-    def parse_custom_layout(self, layout_file: Union[Path, str]):
-        layout_file = Path(layout_file)
-        with open(layout_file) as f_ls:
-            self.ls_azi = [float(x.strip()) for x in f_ls.readline().strip().split(",")]
-            self.ls_ele = [float(x.strip()) for x in f_ls.readline().strip().split(",")]
-            try:
-                self.lfe_index = [
-                    int(x.strip()) for x in f_ls.readline().strip().split(",")
-                ]
-            except Exception:
-                self.lfe_index = []
-
-            if self.lfe_index:
-                [self.ls_azi.insert(i, 0.0) for i in self.lfe_index]
-                [self.ls_ele.insert(i, 0.0) for i in self.lfe_index]
-
-        self.name = layout_file.stem
-        self.num_channels = len(self.ls_azi)
-        self.layout_file = layout_file
-
-    @classmethod
-    def _from_file(
-        cls, name: str, filename: Path, fs: Optional[int] = None
-    ) -> "ChannelBasedAudio":
-        return super()._from_file(name, filename, fs)
-
-    @classmethod
-    def _from_filelist(
-        cls, name: str, filename: Path, fs: Optional[int] = None
-    ) -> "ChannelBasedAudio":
-        return super()._from_filelist(name, filename, fs)
-
-
-class MetadataAssistedSpatialAudio(Audio):
-    """Sub-class for metadata-assisted spatial audio"""
-
-    def __init__(self, name: str):
-        super().__init__(name)
-        try:
-            self.__dict__.update(METADATA_ASSISTED_SPATIAL_AUDIO_FORMATS[name.upper()])
-        except KeyError:
-            raise ValueError(
-                f"Unsupported metadata assisted spatial audio format {name}"
-            )
-        self.metadata_files = []
-
-    @classmethod
-    def _from_file(
-        cls,
-        name: str,
-        filename: Path,
-        metadata_files: list[str],
-        fs: Optional[int] = None,
-    ) -> "MetadataAssistedSpatialAudio":
-        obj = super()._from_file(name, filename, fs)
-        obj.metadata_file = Path(metadata_files[0])
-        return obj
-
-    @classmethod
-    def _from_filelist(
-        cls,
-        name: str,
-        filename: Path,
-        metadata_files: list[str],
-        fs: Optional[int] = None,
-    ) -> "MetadataAssistedSpatialAudio":
-        obj = super()._from_file(name, filename, fs)
-        obj.metadata_file = Path(metadata_files[0])
-        return obj
-
-
-class ObjectBasedAudio(Audio):
-    """Sub-class for object-based audio"""
-
-    def __init__(self, name: str):
-        super().__init__(name)
-        try:
-            self.__dict__.update(OBJECT_BASED_AUDIO_FORMATS[name.upper()])
-        except KeyError:
-            raise ValueError(f"Unsupported object-based audio format {name}")
-        self.object_pos = []
-        self.metadata_files = []
-
-    @classmethod
-    def _from_file(
-        cls,
-        name: str,
-        filename: Union[str, Path],
-        metadata_files: list[Union[str, Path]],
-        fs: Optional[int] = None,
-    ) -> "ObjectBasedAudio":
-        obj = super()._from_file(name, filename, fs)
-        if metadata_files is not None:
-            obj.metadata_files = [Path(f) for f in metadata_files]
-        else:
-            # search for metadata with naming scheme: name.(wav, pcm).(0-3).csv
-            for obj_idx in range(obj.num_channels):
-                file_name_meta = filename.with_suffix(
-                    f"{filename.suffix}.{obj_idx}.csv"
-                )
-                if file_name_meta.is_file():
-                    obj.metadata_files.append(file_name_meta)
-                else:
-                    raise ValueError(f"Metadata file {file_name_meta} not found.")
-            warnings.warn(
-                f"No metadata files specified: The following files were found and used: \n {*obj.metadata_files,}"
-            )
-
-        obj.init_metadata()
-        return obj
-
-    @classmethod
-    def _from_filelist(
-        cls,
-        name: str,
-        filename: Path,
-        metadata_files: list[Union[str, Path]],
-        fs: Optional[int] = None,
-    ) -> "ObjectBasedAudio":
-        obj = super()._from_filelist(name, filename, fs)
-        obj.metadata_files = [Path(f) for f in metadata_files]
-        obj.init_metadata()
-        return obj
-
-    def init_metadata(self):
-        if self.audio.shape[1] != len(self.metadata_files):
-            raise ValueError(
-                f"Mismatch between number of channels in file [{self.audio.shape[1]}], and metadata [{len(self.metadata_files)}]"
-            )
-
-        self.object_pos = []
-        for i, f in enumerate(self.metadata_files):
-            pos = np.genfromtxt(f, delimiter=",")
-
-            # check if metadata has right number of columns
-            if pos.shape[1] < 5:
-                raise ValueError("Metadata incomplete. Columns are missing.")
-            elif pos.shape[1] > 5:
-                if pos.shape[1] == 7:
-                    pos = pos[:, :5]
-                else:
-                    raise ValueError(
-                        "Too many columns in metadata (possibly old version with frame index used)"
-                    )
-
-            # check if metadata is longer than file -> cut off
-            num_frames = int(
-                np.ceil(self.audio.shape[0] / (self.fs * IVAS_FRAME_LEN_MS / 1000))
-            )
-            if num_frames < pos.shape[0]:
-                pos = pos[:num_frames]
-            # check if metadata is shorter than file -> loop
-            elif num_frames > pos.shape[0]:
-                pos_loop = np.zeros((num_frames, pos.shape[1]))
-                pos_loop[: pos.shape[0]] = pos
-                for idx in range(pos.shape[0], num_frames):
-                    pos_loop[idx, :2] = pos[idx % pos.shape[0], :2]
-                pos = pos_loop
-
-            # wrap metadata to target value range
-            for j in range(num_frames):
-                pos[j, 0], pos[j, 1] = wrap_angles(pos[j, 0], pos[j, 1], clip_ele=True)
-
-            self.object_pos.append(pos)
-
-
-class SceneBasedAudio(Audio):
-    """Sub-class for scene-based audio"""
-
-    def __init__(self, name: str):
-        if name == "SBA1":
-            name = "FOA"
-        elif name == "SBA2":
-            name = "HOA2"
-        elif name == "SBA3":
-            name = "HOA3"
-
-        super().__init__(name)
-        try:
-            self.__dict__.update(SCENE_BASED_AUDIO_FORMATS[name.upper()])
-        except KeyError:
-            raise ValueError(f"Unsupported scene-based audio format {name}")
-
-        # self.ambi_order = ambi_order_from_nchan(self.num_channels)
-        self.ambi_order = int(np.sqrt(self.num_channels) - 1)
-
-    @classmethod
-    def _from_file(
-        cls, name: str, filename: Path, fs: Optional[int] = None
-    ) -> "SceneBasedAudio":
-        return super()._from_file(name, filename, fs)
-
-    @classmethod
-    def _from_filelist(
-        cls, name: str, filename: Path, fs: Optional[int] = None
-    ) -> "SceneBasedAudio":
-        return super()._from_filelist(name, filename, fs)
-
-
-def _get_audio_class(fmt) -> Audio:
-    """Return a child audio class corresponding to the specifed format"""
-    if fmt in BINAURAL_AUDIO_FORMATS.keys():
-        return BinauralAudio
-    elif fmt in METADATA_ASSISTED_SPATIAL_AUDIO_FORMATS.keys():
-        return MetadataAssistedSpatialAudio
-    elif fmt in OBJECT_BASED_AUDIO_FORMATS.keys():
-        return ObjectBasedAudio
-    elif fmt in SCENE_BASED_AUDIO_FORMATS.keys():
-        return SceneBasedAudio
-    elif (
-        fmt in CHANNEL_BASED_AUDIO_FORMATS.keys() or CHANNEL_BASED_AUDIO_ALTNAMES.keys()
-    ):
-        return ChannelBasedAudio
-    elif Path(fmt).suffix == ".txt":
-        return ChannelBasedAudio
-    else:
-        raise ValueError(f"Unknown audio format {fmt}!")
-
-
-def fromtype(fmt: str) -> Audio:
-    return _get_audio_class(fmt)(fmt)
-
-
-def fromarray(fmt: str, x: np.ndarray, fs: int) -> Audio:
-    """Wrap the given array into an audio format"""
-    if x is None or not fs:
-        return ValueError("Both array and sampling rate must be specified!")
-
-    output = _get_audio_class(fmt)(fmt)
-
-    output.audio = x
-    output.fs = fs
-
-    return output
-
-
-def fromfile(
-    fmt: str,
-    filename: Union[str, Path],
-    fs: Optional[int] = None,
-    in_meta: Optional[list[Union[str, Path]]] = None,
-) -> Audio:
-    """Create an Audio object of the specified format from the given file"""
-    filename = Path(filename)
-    fmt_cls = _get_audio_class(fmt)
-    if fmt_cls is ObjectBasedAudio or fmt_cls is MetadataAssistedSpatialAudio:
-        return fmt_cls._from_file(fmt, filename, in_meta, fs)
-    else:
-        return fmt_cls._from_file(fmt, filename, fs)
-
-
-def fromfilelist(
-    fmt: str, files: list[Union[str, Path]], fs: Optional[int] = None
-) -> Audio:
-    """Create an Audio object of the specified format from the given list of files"""
-    return _get_audio_class(fmt)._from_filelist(fmt, files, fs)
diff --git a/item_generation_scripts/audiotools/audioarray.py b/item_generation_scripts/audiotools/audioarray.py
deleted file mode 100644
index c0909c4c..00000000
--- a/item_generation_scripts/audiotools/audioarray.py
+++ /dev/null
@@ -1,690 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
-
-import logging
-import warnings
-from typing import Iterator, Optional, Tuple, Union
-
-import numpy as np
-import scipy.signal as sig
-
-from .constants import DELAY_COMPENSATION_FOR_FILTERING, SEED_PADDING
-
-logger = logging.getLogger("__main__")
-logger.setLevel(logging.DEBUG)
-
-
-"""Functions used in this module"""
-
-
-def trim(
-    x: np.ndarray,
-    fs: Optional[int] = 48000,
-    limits: Optional[Tuple[int, int]] = None,
-    pad_noise: Optional[bool] = False,
-    samples: Optional[bool] = False,
-) -> np.ndarray:
-    """
-    Trim an audio array
-
-    Parameters
-    ----------
-    x: np.ndarray
-        Input array
-    fs: Optional[int]
-        Input sampling rate in Hz, default = 48000
-    limits: Optional[Tuple[int, int]]
-        Pre- and post-trim duration in milliseconds (negative values pad)
-    pad_noise: Optional[bool]
-        If true noise will be padded otherwise zeros will be padded
-    samples: Optional[bool]
-        If true limits are interpreted as samples, otherwise as ms
-
-    Returns
-    -------
-    y : np.ndarray
-        Output trimmed array
-    """
-
-    if not limits:
-        return x
-
-    if not samples:
-        pre_trim = int(limits[0] * fs // 1000)
-        post_trim = int(limits[1] * fs // 1000)
-    else:
-        pre_trim = limits[0]
-        post_trim = limits[1]
-
-    if pre_trim < 0:
-        if pad_noise:
-            # pad with uniformly distributed noise between -4 and 4
-            np.random.seed(SEED_PADDING)
-            noise = np.random.randint(
-                low=-4, high=5, size=(np.abs(pre_trim), np.shape(x)[1])
-            ).astype("float")
-            x = np.concatenate((noise, x), axis=0)
-        else:
-            x = np.pad(x, [[np.abs(pre_trim), 0], [0, 0]])
-    elif pre_trim > 0:
-        x = x[pre_trim:, :]
-
-    if post_trim < 0:
-        if pad_noise:
-            # pad with uniformly distributed noise between -4 and 4
-            np.random.seed(SEED_PADDING)
-            noise = np.random.randint(
-                low=-4, high=5, size=(np.abs(post_trim), np.shape(x)[1])
-            ).astype("float")
-            x = np.concatenate((x, noise), axis=0)
-        else:
-            x = np.pad(x, [[0, np.abs(post_trim)], [0, 0]])
-    elif post_trim > 0:
-        x = x[:-post_trim, :]
-
-    return x
-
-
-def window(
-    x: np.ndarray,
-    fs: Optional[int] = 48000,
-    len_ms: Optional[float] = 100,
-) -> np.ndarray:
-    """
-    Apply windowing to the start and end
-    of an audio array
-
-
-    Parameters
-    ----------
-    x: np.ndarray
-        Input audio array
-    fs: Optional[int]
-        Input sampling rate in Hz, default = 48000
-    len_ms: Optional[float]
-        Window length used at start and end of array in milliseconds, default = 100 ms
-
-    Returns
-    -------
-    y: np.ndarray
-        Output windowed array
-    """
-
-    wlen_smp = int(len_ms * fs // 1000)
-
-    # if requested window length is larger than the signal, simply window the signal
-    if wlen_smp > x.shape[0]:
-        wlen_smp = x.shape[0] // 2
-
-    window = sig.windows.hann(2 * wlen_smp)
-
-    # we only need half of the window
-    window = window[:wlen_smp, np.newaxis]
-
-    x[:wlen_smp, :] *= window
-    x[-wlen_smp:, :] *= window[::-1, :]
-
-    return x
-
-
-def delay_compensation(
-    x: np.ndarray,
-    flt_type: str,
-    fs: Optional[int] = 48000,
-    up: Optional[bool] = False,
-    down: Optional[bool] = False,
-) -> np.ndarray:
-    """
-    Compensation for a delayed signal
-
-    Parameters
-    ----------
-    x: np.ndarray
-        Input array
-    flt_type: str
-        Name of filter type used for filtering
-    fs: Optional[int]
-        Input sampling rate
-    up: Optional[bool]
-        Flag for up-sampling
-    down: Optional[bool]
-        Flag for down-sampling
-
-    Returns
-    -------
-    x: np.ndarray
-        Delay compensated test array
-    """
-
-    # Get the delay in number of samples
-    if flt_type == "SHQ2" and up:
-        d_samples = DELAY_COMPENSATION_FOR_FILTERING["SHQ2"]["up"]
-    elif flt_type == "SHQ2" and down:
-        d_samples = DELAY_COMPENSATION_FOR_FILTERING["SHQ2"]["down"]
-    elif flt_type == "SHQ3" and up:
-        d_samples = DELAY_COMPENSATION_FOR_FILTERING["SHQ3"]["up"]
-    elif flt_type == "SHQ3" and down:
-        d_samples = DELAY_COMPENSATION_FOR_FILTERING["SHQ3"]["down"]
-    else:
-        d_samples = DELAY_COMPENSATION_FOR_FILTERING[flt_type]
-    # Delay compensation
-    x = delay(x, fs, -d_samples, samples=True)
-
-    return x
-
-
-def delay(
-    x: np.ndarray,
-    fs: Optional[int] = 48000,
-    delay: Optional[float] = 0,
-    samples: Optional[bool] = False,
-) -> np.ndarray:
-    """
-    Delay a signal by a specified duration (ms) or number of samples
-
-    Parameters
-    ----------
-    x: np.ndarray
-        Input array
-    fs: Optional[int]
-        Sampling rate
-    delay: Optional[float]
-        Delay in milliseconds or samples (negative values advance file)
-    samples: Optional[bool]
-        If true delay is interpreted as samples, if false as milliseconds
-
-    Returns
-    -------
-    x: np.ndarray
-        Delayed audio signal
-    """
-
-    if not samples:
-        delay = int(delay * fs / 1000)
-
-    delay_abs = np.abs(delay)
-
-    x = np.roll(x, delay, axis=0)
-
-    if delay < 0:
-        x[-delay_abs:, :] = 0
-    elif delay > 0:
-        x[:delay_abs, :] = 0
-
-    return x
-
-
-def limiter(
-    x: np.ndarray,
-    fs: int,
-) -> np.ndarray:
-    """
-    Apply limiting to an audio signal
-
-    Parameters
-    ----------
-    x: np.ndarray
-        Input reference array
-    fs: int
-        Input sampling frequency
-
-    Returns
-    -------
-    x: np.ndarray
-        Limited audio signal
-    """
-
-    limiter_threshold = 32729  # -0.01dB FS
-    limiter_attack_seconds = 0.005
-    attack_constant = 0.01 ** (1.0 / (limiter_attack_seconds * fs))
-    release_heuristics_mem = 0.0
-    gain = 1.0
-    strong_saturation_cnt = 0
-    limited = False
-
-    if x.ndim == 1:
-        n_samples_x = x.shape
-        n_chan_x = 1
-    else:
-        n_samples_x, n_chan_x = x.shape
-    # framing
-    framesize = fs // 50
-    nframes = n_samples_x // framesize
-    for fr in range(nframes):
-        apply_limiting = True
-        fr_sig = x[fr * framesize : ((fr + 1) * framesize), :]
-        sig_max = np.amax(np.absolute(fr_sig))
-        release_heuristic = release_heuristics_mem
-        if sig_max > limiter_threshold:
-            frame_gain = limiter_threshold / sig_max
-            release_heuristic = min(1.0, release_heuristic + (4.0 * framesize / fs))
-        else:
-            release_heuristic = max(0.0, release_heuristic - (framesize / fs))
-            if gain >= 1.0 - 1e-10:
-                apply_limiting = False
-
-            frame_gain = 1.0
-
-        if sig_max > 3 * limiter_threshold and strong_saturation_cnt > 0:
-            apply_strong_limiting = True
-        elif sig_max > 10 * limiter_threshold:
-            strong_saturation_cnt += 20
-            apply_strong_limiting = True
-        else:
-            strong_saturation_cnt -= 1
-            if strong_saturation_cnt < 0:
-                strong_saturation_cnt = 0
-            apply_strong_limiting = False
-
-        if apply_strong_limiting is True:
-            if frame_gain < 0.3:
-                frame_gain /= 3.0
-            else:
-                apply_strong_limiting = False
-
-        if frame_gain < 0.1 and apply_strong_limiting is False:
-            frame_gain = 0.1
-
-        if apply_limiting is True:
-            if frame_gain < gain:
-                fac = attack_constant ** (np.arange(1, framesize + 1, dtype=np.float32))
-            else:
-                release_constant = 0.01 ** (
-                    1.0 / (0.005 * (200.0**release_heuristic) * fs)
-                )
-                fac = release_constant ** (
-                    np.arange(1, framesize + 1, dtype=np.float32)
-                )
-
-            fr_gain = np.tile(gain * fac + frame_gain * (1.0 - fac), (n_chan_x, 1)).T
-            fr_sig *= fr_gain
-            gain = fr_gain[-1, 0]
-            limited = True
-        else:
-            gain = 1.0
-
-        release_heuristics_mem = release_heuristic
-        # hard limiting for everything that still sticks out
-        if (fr_sig > 32767).any() or (fr_sig < -32768).any():
-            limited = True
-        idx_max = np.where(fr_sig > 32767)
-        fr_sig[idx_max] = 32767
-        idx_min = np.where(fr_sig < -32768)
-        fr_sig[idx_min] = -32768
-
-    if limited:
-        warnings.warn("Limiting had to be applied")
-    return x
-
-
-def get_framewise(
-    x: np.ndarray,
-    chunk_size: int,
-    zero_pad: Optional[bool] = False,
-) -> Iterator:
-    """
-    Generator to yield a signal frame by frame
-    If array size is not a multiple of chunk_size, last frame contains the remainder
-
-    Parameters
-    ----------
-    x: np.ndarray
-        Input reference array
-    chunk_size: int
-        Size of frames to yield
-    zero_pad: Optional[bool]
-        Whether to zero pad the last chunk if there are not enough samples
-
-    Yields
-    -------
-    frame : np.ndarray
-        One frame of the input audio signal
-    """
-
-    n_frames = x.shape[0] // chunk_size
-    for i in range(n_frames):
-        yield x[i * chunk_size : (i + 1) * chunk_size, :]
-    if x.shape[0] % chunk_size:
-        last_chunk = x[n_frames * chunk_size :, :]
-        if zero_pad:
-            yield np.pad(
-                last_chunk, [[0, chunk_size - (x.shape[0] % chunk_size)], [0, 0]]
-            )
-        else:
-            yield last_chunk
-
-
-def framewise_io(
-    i: np.ndarray, o: np.ndarray, chunk_size: int, zero_pad: Optional[bool] = False
-) -> Iterator:
-    """
-    Return an iterator over frame_index, input_frame and output_frame
-
-    Parameters
-    ----------
-    i: np.ndarray
-        Input array
-    o: np.ndarray
-        Output array
-    chunk_size: int
-        Size of frames to yield
-    zero_pad: Optional[bool]
-        Whether to zero pad the last chunk if there are not enough samples
-
-    Yields
-    -------
-    frame : Iterator
-        Frame index, one frame of the input and output audio signal
-    """
-
-    return enumerate(
-        zip(
-            get_framewise(i, chunk_size, zero_pad),
-            get_framewise(o, chunk_size, zero_pad),
-        )
-    )
-
-
-"""Deprecated functions (partly replaced by ITU binaries)"""
-
-
-def resample(
-    x: np.ndarray,
-    in_freq: int,
-    out_freq: int,
-) -> np.ndarray:
-    """
-    Resample a multi-channel audio array
-
-    Parameters
-    ----------
-    x: np.ndarray
-        Input array
-    in_freq: int
-        Input sampling rate
-    out_freq: int
-        Output sampling rate
-
-    Returns
-    -------
-    y: np.ndarray
-        Output resampled array
-    """
-
-    if in_freq == out_freq or out_freq is None:
-        y = x
-    else:
-        datatype = x.dtype
-        if datatype.name.startswith("int"):
-            # cast necessary due to bug in resample_poly() with input of type int
-            x = x.astype("float")
-
-        y = sig.resample_poly(x, out_freq, in_freq)
-
-        if datatype.name.startswith("int"):
-            y = x.astype(datatype)
-
-    return y
-
-
-def lpfilter(
-    x: np.ndarray,
-    fc: int,
-    fs: int,
-) -> np.ndarray:
-    """
-    Low-pass filter a multi-channel audio array
-
-    Parameters
-    ----------
-    x: np.ndarray
-        Input array
-    fc: int
-        Cut-off frequency in Hz
-    fs: int
-        Sampling rate in Hz
-
-    Returns
-    -------
-    y: np.ndarray
-        Output low-pass filtered array
-    """
-
-    if (fc + 500) < (fs / 2.0):
-        # Design a Chebychev Type II  filter, band_pass-band_stop = 500 Hz
-        N, Wn = sig.cheb2ord(fc / (fs / 2), (fc + 500) / (fs / 2), 3, 60)
-        b, a = sig.cheby2(N, 60, Wn, "low")
-
-        # Apply the Butterworth filter for each channels, across time axis
-        # y = sig.lfilter(b, a, axis=0) # non zero-phase filter
-        y = sig.filtfilt(b, a, x, axis=0)  # zero-phase filer, batch processing
-    else:
-        y = x
-
-    return y
-
-
-def cut(
-    x: np.ndarray,
-    limits: Optional[Tuple[int, int]],
-) -> np.ndarray:
-    """
-    Cut an audio array
-
-    Parameters
-    ----------
-    x: np.ndarray
-        Input array
-    limits: Tuple[int, int]
-        first and last samples to extract
-
-    Returns
-    -------
-    y: np.ndarray
-        Output cut array
-    """
-
-    in_samples, in_channels = x.shape
-    first_sample = limits[0]
-    last_sample = limits[1]
-
-    if first_sample == 0 and (last_sample == -1 or last_sample == in_samples):
-        y = x
-    else:
-        if last_sample == -1:
-            last_sample = in_samples
-
-        signal_start = first_sample
-        signal_end = last_sample
-        insert_start = 0
-        insert_end = last_sample - first_sample
-        total_samples = last_sample - first_sample
-        if first_sample < 0:
-            samples_to_pad_begin = -first_sample
-            insert_start = samples_to_pad_begin
-            insert_end += samples_to_pad_begin
-        if last_sample > in_samples:
-            signal_end = in_samples
-            insert_end = insert_end - last_sample + in_samples
-        y = np.zeros([total_samples, in_channels], dtype=x.dtype)
-        y[insert_start:insert_end, :] = x[signal_start:signal_end, :]
-
-    return y
-
-
-def compare(
-    ref: np.ndarray,
-    test: np.ndarray,
-    fs: int,
-    per_frame: bool = False,
-) -> dict:
-    """
-    Compare two audio arrays
-
-    Parameters
-    ----------
-    ref: np.ndarray
-        Input reference array
-    test: np.ndarray
-        Input test array
-    fs: int
-        Input sampling rate in Hz
-
-    Returns
-    -------
-    result: dict
-        Comparison results
-    """
-
-    framesize = fs // 50
-    diff = abs(test - ref)
-    max_diff = int(diff.max())
-    result = {
-        "bitexact": True,
-        "max_abs_diff": 0,
-        "max_abs_diff_pos_sample": 0,
-        "max_abs_diff_pos_channel": 0,
-        "nsamples_diff": 0,
-        "nsamples_diff_percentage": 0.0,
-        "first_diff_pos_sample": -1,
-        "first_diff_pos_channel": -1,
-        "first_diff_pos_frame": -1,
-    }
-    if per_frame:
-        result["max_abs_diff_pos_frame"] = 0
-        result["nframes_diff"] = 0
-        result["nframes_diff_percentage"] = 0.0
-
-    if max_diff != 0:
-        if diff.ndim == 1:
-            nsamples_total = diff.shape
-            nchannels = 1
-        else:
-            nsamples_total, nchannels = diff.shape
-        max_diff_pos = np.nonzero(diff == max_diff)
-        max_diff_pos = [
-            max_diff_pos[0][0],
-            max_diff_pos[0][0] // framesize,
-            max_diff_pos[1][0],
-        ]
-
-        first_diff_pos = np.nonzero(diff)
-        first_diff_pos = [
-            first_diff_pos[0][0],
-            first_diff_pos[0][0] // framesize,
-            first_diff_pos[1][0],
-        ]
-
-        nsamples_diff = np.nonzero(diff)[0].size
-        nsamples_diff_percentage = nsamples_diff / (nsamples_total * nchannels) * 100.0
-        nframes = nsamples_total // framesize
-        nframes_diff = 0
-
-        result = {
-            "bitexact": False,
-            "max_abs_diff": max_diff,
-            "max_abs_diff_pos_sample": max_diff_pos[0],
-            "max_abs_diff_pos_channel": max_diff_pos[2],
-            "nsamples_diff": nsamples_diff,
-            "nsamples_diff_percentage": nsamples_diff_percentage,
-            "first_diff_pos_sample": first_diff_pos[0],
-            "first_diff_pos_channel": first_diff_pos[2],
-            "first_diff_pos_frame": first_diff_pos[1],
-        }
-
-        if per_frame:
-            for fr in range(nframes):
-                diff_fr = diff[fr * framesize : ((fr + 1) * framesize), :]
-                nframes_diff += 1 if diff_fr.nonzero()[0].size > 0 else 0
-            nframes_diff_percentage = nframes_diff / nframes * 100.0
-            result["max_abs_diff_pos_frame"] = max_diff_pos[1]
-            result["nframes_diff"] = nframes_diff
-            result["nframes_diff_percentage"] = nframes_diff_percentage
-
-    return result
-
-
-def getdelay(
-    x: np.ndarray,
-    y: np.ndarray,
-) -> int:
-    """
-    Get the delay between two audio signals
-
-    Parameters
-    ----------
-    x: np.ndarray
-        Input reference array
-    y: np.ndarray
-        Input test array
-
-    Returns
-    -------
-    result: int
-        Delay of y in samples with respect to x (median of individual channel delays)
-    """
-
-    if x.ndim == 1:
-        n_samples_x = x.shape
-        n_chan_x = 1
-    else:
-        n_samples_x, n_chan_x = x.shape
-    if y.ndim == 1:
-        n_samples_y = y.shape
-        n_chan_y = 1
-    else:
-        n_samples_y, n_chan_y = y.shape
-    if n_chan_x != n_chan_y:
-        raise ValueError
-    lags = np.arange(-n_samples_x + 1, n_samples_y)
-    lag = np.zeros([n_chan_x, 1], dtype=int)
-    for chan in range(n_chan_x):
-        correlation = sig.correlate(y[:, chan], x[:, chan], mode="full")
-        lag[chan] = lags[np.argmax(correlation)]
-    return int(np.median(lag))
-
-
-def mono_downmix(x: np.ndarray) -> np.ndarray:
-    """
-    Creates a passive mono downmix for a multi-channel audio signal
-    """
-    return np.sum(x, axis=1)
-
-
-def mute_channels(
-    x: np.ndarray, mute: Optional[Union[list, np.ndarray]] = None
-) -> np.ndarray:
-    """
-    Mute audio channels in signal
-    """
-    x[:, mute] = 0
-    return x
diff --git a/item_generation_scripts/audiotools/audiofile.py b/item_generation_scripts/audiotools/audiofile.py
deleted file mode 100644
index d5687a89..00000000
--- a/item_generation_scripts/audiotools/audiofile.py
+++ /dev/null
@@ -1,436 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
-
-import logging
-import struct
-from pathlib import Path
-from typing import Optional, Tuple, Union
-
-import numpy as np
-import scipy.io.wavfile as wav
-
-from .audioarray import trim, window
-
-logger = logging.getLogger("__main__")
-logger.setLevel(logging.DEBUG)
-
-
-def read(
-    filename: Union[str, Path],
-    nchannels: Optional[int] = 1,
-    fs: Optional[int] = 48000,
-    outdtype: Optional[str] = "float",
-) -> Tuple[np.ndarray, int]:
-    """
-    Read audio file (.pcm, .wav or .raw)
-
-    Parameters
-    ----------
-    filename: str
-        Input file path
-    nchannels: Optional[int]
-        Number of input channels, required for .pcm otherwise default = 1
-    fs: Optional[int]
-        Input sampling rate, required for .pcm input file, otherwise default = 48000 (Hz)
-    outdtype: Optional[str]
-        Data type of output array, python builtin or np.dtype
-
-    Returns
-    -------
-    x: np.ndarray
-        audio signal array
-    fs: int
-        signal sampling frequency
-    """
-
-    file_extension = Path(filename).suffix
-
-    if file_extension == ".wav":
-        fs, data = wav.read(filename)
-        if data.dtype == np.int32:
-            data = np.interp(
-                data,
-                (np.iinfo(np.int32).min, np.iinfo(np.int32).max),
-                (np.iinfo(np.int16).min, np.iinfo(np.int16).max),
-            )
-        elif data.dtype == np.float32:
-            data = np.interp(
-                data,
-                (-1, 1),
-                (np.iinfo(np.int16).min, np.iinfo(np.int16).max),
-            )
-        x = np.array(data, dtype=outdtype)
-        file_len = x.shape[0]
-        if x.ndim == 1:
-            # force to be a mtx
-            x = np.reshape(x, (file_len, 1))
-    elif file_extension in [".pcm", ".raw"]:
-        x = np.fromfile(filename, dtype=np.int16).astype(outdtype)
-        signal_len = len(x) // nchannels
-        try:
-            x = x.reshape(signal_len, nchannels)
-        except ValueError:
-            raise ValueError("Wrong number of channels")
-    else:
-        raise ValueError("Wrong input format. Use wav, pcm or raw")
-
-    return x, fs
-
-
-def write(
-    filename: Union[str, Path],
-    x: np.ndarray,
-    fs: Optional[int] = 48000,
-    dtype: Optional[str] = "int16",
-) -> None:
-    """
-    Write audio file (.pcm, .wav or .raw)
-
-    Parameters
-    ----------
-    filename: str
-        Output file path (.pcm, .wav or .raw)
-    x: np.ndarray
-        Numpy 2D array of dimension: number of channels x number of samples
-    fs: Optional[int]
-        Sampling rate, required for .pcm or .raw input file, default = 48000 (Hz)
-    dtype: Optional[str]
-        Data type format required for .pcm or .raw input file, default = 'int16'
-
-    Returns
-    -------
-    None
-    """
-
-    file_extension = Path(filename).suffix
-
-    clipped_samples = np.sum(
-        np.logical_or(x < np.iinfo(np.int16).min, x > np.iinfo(np.int16).max)
-    )
-    if clipped_samples > 0:
-        logger.warning(f"  Warning: {clipped_samples} samples clipped")
-        x = np.clip(x, np.iinfo(np.int16).min, np.iinfo(np.int16).max)
-
-    if file_extension == ".wav":
-        x = x.astype(np.int16)
-        wav.write(filename, fs, x)
-    elif file_extension == ".pcm" or file_extension == ".raw":
-        x = x.astype(dtype).reshape(-1, 1)
-        x.tofile(filename)
-    else:
-        raise ValueError("Wrong input format. Use wav, pcm or raw")
-
-
-def concat(
-    in_filenames: list,
-    out_file: str,
-    silence_pre: Optional[int] = 0,
-    silence_post: Optional[int] = 0,
-    in_fs: Optional[int] = 48000,
-    num_channels: Optional[int] = None,
-    pad_noise: Optional[bool] = False,
-    preamble: Optional[int] = None,
-    pad_noise_preamble: Optional[bool] = False,
-) -> list:
-    """
-    Horizontally concatenates audio files into one long file
-
-    Parameters
-    __________
-    in_filenames: list
-        Input list of filenmames (.pcm, .raw or .wav)
-    out_file: str
-        Output multi-channel audio file name (.pcm, .raw or .wav)
-    silence_pre: int
-        Padded zeros before signal in samples
-    silence_post: int
-        Padded zeros after signal in samples
-    in_fs: Optional[int]
-        Input sampling rate, default 48000 Hz
-    pad_noise: Optional[bool]
-        If true noise will be padded otherwise zeros will be padded
-
-    Returns
-    -------
-    splits
-        List of sample indices to split the resulting file at
-    """
-
-    y = None
-    fs_compare = 0
-
-    # create a list of splits
-    splits = [0]
-
-    # Read input files
-    for in_file in in_filenames:
-        x, fs = read(in_file, fs=in_fs, nchannels=num_channels)
-        if fs_compare and fs_compare != fs:
-            raise ValueError("Sampling rates of files to concatenate don't match")
-        else:
-            fs_compare = fs
-
-        # pad with very low amplitude noise
-        x = trim(
-            x, in_fs, (-silence_pre, -silence_post), samples=True, pad_noise=pad_noise
-        )
-
-        # add the length to our splits list
-        splits.append(splits[-1] + x.shape[0])
-
-        # concatenate
-        y = np.concatenate([y, x]) if y is not None else x
-
-    # add preamble
-    if preamble:
-        y = trim(y, in_fs, (-preamble, 0), pad_noise_preamble)
-
-    write(out_file, y, fs=in_fs)
-
-    return splits[1:]
-
-
-def split(
-    in_filename: Union[str, Path],
-    out_folder: Union[str, Path],
-    split_filenames: list[Union[str, Path]],
-    splits: list[int],
-    in_fs: Optional[int] = 48000,
-    preamble: Optional[int] = 0,
-    loudness: Optional[float] = None,
-) -> list[Union[str, Path]]:
-    """
-    Horizontally splits audio files into multiple shorter files and applies windowing and scaling
-
-    Parameters
-    __________
-    in_filename: Union[str, Path]
-        Input filenmame (.pcm, .raw or .wav)
-    out_folder: Union[str, Path]
-        Output folder where to put the splits
-    split_filenames: list[Union[str, Path]]
-        List of names for the split files
-    splits: list[int]
-        List of sample indices where to cut the signal
-    in_fs: Optional[int]
-        Input sampling rate, default 48000 Hz
-    loudness: Optional[float]
-        Desired loudness of individual files
-    """
-
-    # create a list of output files
-    out_paths = []
-
-    # Read input file
-    x, fs = read(in_filename, fs=in_fs)
-
-    # remove preamble
-    if preamble:
-        x = trim(x, fs, (preamble, 0))
-
-    split_old = 0
-    for idx, split in enumerate(splits):
-        out_file = Path(out_folder) / Path(split_filenames[idx]).with_suffix(
-            in_filename.suffix
-        )
-
-        # add the path to our list
-        out_paths.append(out_file)
-
-        # split
-        y = x[split_old:split, :]
-
-        # windowing
-        y = window(y)
-
-        # write file
-        write(out_file, y, fs=in_fs)
-
-        split_old = split
-
-    return out_paths
-
-
-def combine(
-    in_filenames: list,
-    out_file: str,
-    in_fs: Optional[int] = 48000,
-) -> None:
-    """
-    Combines audio files into one multi-channel file
-
-    Parameters
-    ----------
-    in_filenames: list
-        Input list of filenmames (.pcm, .raw or .wav)
-    out_file: str
-        Output multi-channel audio file name (.pcm, .raw or .wav)
-    in_fs: Optional[int]
-        Input sampling rate, required for .pcm and .raw input file, default 48000 Hz
-
-    Returns
-    -------
-    None
-    """
-
-    y = None
-    fs_compare = 0
-
-    # Read input files
-    for in_file in in_filenames:
-        # assign correct channel
-        x, fs = read(in_file, fs=in_fs)
-        if fs_compare and fs_compare != in_fs:
-            raise ValueError("Sampling rates of files to combine don't match")
-        else:
-            fs_compare = fs
-        if y is None:
-            y = x
-        else:
-            if x.shape[0] > y.shape[0]:
-                x = x[: y.shape[0], :]
-            elif y.shape[0] > x.shape[0]:
-                y = y[: x.shape[0], :]
-            y = np.column_stack([y, x])
-
-    write(out_file, y, fs=in_fs)
-
-
-def split_channels(
-    in_file: str,
-    out_filenames: list,
-    in_nchans: int,
-    in_fs: Optional[int] = 48000,
-) -> None:
-    """
-    Split multi-channel audio files into individual mono files
-
-    Parameters
-    ----------
-    in_file: str
-        Input file name (.pcm, .raw or .wav)
-    out_filenames: list
-        List of output file names (.pcm, .raw or .wav)
-    in_nchans: int
-        Input number of channels
-    in_fs: Optional[int] = 48000
-        Input sampling rate, default 48000 Hz
-
-    Returns
-    -------
-    None
-    """
-
-    # validation
-    if in_nchans is None:
-        raise ValueError("Number of channels to split must be specified!")
-    if in_nchans != len(out_filenames):
-        print(
-            "Split: Mismatch between number of channels and output filenames length. Truncating output filenames list."
-        )
-        out_filenames = out_filenames[:in_nchans]
-
-    x, in_fs = read(in_file, nchannels=in_nchans, fs=in_fs)
-
-    # Write output files
-    for idx, out_file in enumerate(out_filenames):
-        # extract correct channel
-        y = x[:, idx]
-
-        write(out_file, y, fs=in_fs)
-
-
-def parse_wave_header(
-    filename: str,
-) -> dict:
-    """
-    Get the format information from a WAV file.
-    Return a dictionary with the format information
-
-    Parameters
-    ----------
-    filename : string or open file handle
-        Input WAV file.
-
-    Returns
-    -------
-    Dictionary
-    """
-
-    with open(filename, "rb") as fid:
-        riff = fid.read(4)
-
-        if riff == b"RIFF":
-            binary_format = "<"
-        elif riff == b"RIFX":
-            binary_format = ">"
-        else:
-            raise IOError("No RIFF chunk found!")
-
-        wav_size = struct.unpack(f"{binary_format}I", fid.read(4))[0]
-
-        wav_identifier = fid.read(4)
-        if wav_identifier != b"WAVE":
-            raise IOError("No WAVE chunk found!")
-
-        fmt_chunk_id = fid.read(4)
-
-        if fmt_chunk_id == b"fmt ":
-            fmt_size = struct.unpack(f"{binary_format}I", fid.read(4))[0]
-            wav_format = struct.unpack(f"{binary_format}H", fid.read(2))[0]
-            channels = struct.unpack(f"{binary_format}H", fid.read(2))[0]
-            fs = struct.unpack(f"{binary_format}I", fid.read(4))[0]
-            bytes_per_second = struct.unpack(f"{binary_format}I", fid.read(4))[0]
-            block_align = struct.unpack(f"{binary_format}H", fid.read(2))[0]
-            bit_depth = struct.unpack(f"{binary_format}H", fid.read(2))[0]
-            rem_bytes = fmt_size - 16
-            ext_param_size = 0
-            ext_param = None
-            if rem_bytes:
-                ext_param_size = struct.unpack(f"{binary_format}H", fid.read(2))[0]
-
-            if ext_param_size:
-                ext_param = fid.read(ext_param_size)
-        else:
-            raise IOError("Missing or corrupt fmt chunk!")
-
-    return {
-        "size": wav_size,
-        "format_tag": wav_format,
-        "channels": channels,
-        "fs": fs,
-        "bytes_per_second": bytes_per_second,
-        "block_align": block_align,
-        "bit_depth": bit_depth,
-        "ext_param_size": ext_param_size,
-        "ext_param": ext_param,
-    }
diff --git a/item_generation_scripts/audiotools/binaural_datasets/BRIR_IISofficialMPEG222UC_FULL.mat b/item_generation_scripts/audiotools/binaural_datasets/BRIR_IISofficialMPEG222UC_FULL.mat
deleted file mode 100644
index 42e702db..00000000
--- a/item_generation_scripts/audiotools/binaural_datasets/BRIR_IISofficialMPEG222UC_FULL.mat
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a3ddecef64dfcf8887904b5cc370c0d9723bd8fd1637e32232205cdcd739b80d
-size 12623190
diff --git a/item_generation_scripts/audiotools/binaural_datasets/BRIR_IISofficialMPEG222UC_LS.mat b/item_generation_scripts/audiotools/binaural_datasets/BRIR_IISofficialMPEG222UC_LS.mat
deleted file mode 100644
index 1d590edb..00000000
--- a/item_generation_scripts/audiotools/binaural_datasets/BRIR_IISofficialMPEG222UC_LS.mat
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:e2c964b96d802532c0ecf1076092c7d246a54293a3a0c4c72995953c66bfec71
-size 6348499
diff --git a/item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_Dolby_SBA1.mat b/item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_Dolby_SBA1.mat
deleted file mode 100644
index 4f59a8a9..00000000
--- a/item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_Dolby_SBA1.mat
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:3a9ad5d8d874ac2fb851f5d2b0b303494f1d115612e9f6cab40e5eb33591b05c
-size 4630
diff --git a/item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_Dolby_SBA2.mat b/item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_Dolby_SBA2.mat
deleted file mode 100644
index 1ad2162a..00000000
--- a/item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_Dolby_SBA2.mat
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:6fc2a15579b80493597a8096bd815e8b847fe1880bdba760d4405122878b0b0a
-size 10323
diff --git a/item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_Dolby_SBA3.mat b/item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_Dolby_SBA3.mat
deleted file mode 100644
index 0e7c3ef4..00000000
--- a/item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_Dolby_SBA3.mat
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:83822cfa090c345a6ece14d1ec1a92023626f467e2f8d982cf099c071dfc1080
-size 18229
diff --git a/item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_FULL.mat b/item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_FULL.mat
deleted file mode 100644
index a2ab24e5..00000000
--- a/item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_FULL.mat
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:bf86a03f0b13932c5c138af22584f864b75c5733df1b01ac3fdf7750a1bdbe5f
-size 14335913
diff --git a/item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_LS.mat b/item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_LS.mat
deleted file mode 100644
index 65c2684c..00000000
--- a/item_generation_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_LS.mat
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:2e25ef101e9e72c5d70a55bc1451a07d041d29f96a803d7d3f968f20fe403316
-size 20190
diff --git a/item_generation_scripts/audiotools/binaural_datasets/README.txt b/item_generation_scripts/audiotools/binaural_datasets/README.txt
deleted file mode 100644
index 9fd37c96..00000000
--- a/item_generation_scripts/audiotools/binaural_datasets/README.txt
+++ /dev/null
@@ -1,34 +0,0 @@
-Files in this directory should contain impulse responses for use in rendering in Matlab .mat format
-Samplingrate of 48kHz is assumed
-
-Files should adhere to the following naming scheme:
-
-{HRIR|BRIR}_{DATASETNAME}_{FULL|LS|SBA(1-3)}.mat
-
-- HRIR or BRIR
-    specifies the type of impulse response which will be used
-    for either BINAURAL or BINAURAL_ROOM output respectively
-- DATASETNAME
-    specifies the name used with the binaural_dataset commandline argument
-    or YAML key to enable selection of this dataset
-- FULL or LS or SBA3
-    specifies the subset of impulse responses in the file:
-    FULL:       all available measurements on the sphere
-    LS:         superset of supported loudspeaker layouts
-                (see audiotools.constants.CHANNEL_BASED_AUDIO_FORMATS["LS""])
-    SBA(1-3):   impulse responses transformed to ambisonics by external conversion
-                if available SBA1 is used for FOA, SBA2 for HOA2 and SBA3 for HOA3
-                if not available SBA3 is used and truncated for all Ambisonic formats
-
-Each Matlab file should contain the following variables:
-- IR
-    Impulse responses with dimensions [ir_length x n_ears x n_channels]
-- SourcePosition 
-    array of {azimuth, elevation, radius} of dimensions [n_channels x 3]
-    required for FULL, optional otherwise
-- latency_s
-    latency of the dataset in samples
-    optional, will be estimated if not provided
-    
-LICENSES:
-Please see HRIR.txt and BRIR.txt for license info
\ No newline at end of file
diff --git a/item_generation_scripts/audiotools/binaural_datasets/__init__.py b/item_generation_scripts/audiotools/binaural_datasets/__init__.py
deleted file mode 100644
index aea270d8..00000000
--- a/item_generation_scripts/audiotools/binaural_datasets/__init__.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
diff --git a/item_generation_scripts/audiotools/binaural_datasets/binaural_dataset.py b/item_generation_scripts/audiotools/binaural_datasets/binaural_dataset.py
deleted file mode 100644
index e6c4dbe7..00000000
--- a/item_generation_scripts/audiotools/binaural_datasets/binaural_dataset.py
+++ /dev/null
@@ -1,288 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
-
-import warnings
-from pathlib import Path
-from typing import Optional, Tuple, Union
-
-import numpy as np
-from scipy.io import loadmat
-
-from item_generation_scripts.audiotools.audio import fromtype
-from item_generation_scripts.audiotools.constants import (
-    CHANNEL_BASED_AUDIO_FORMATS,
-    OBJECT_BASED_AUDIO_FORMATS,
-    SCENE_BASED_AUDIO_FORMATS,
-)
-from item_generation_scripts.audiotools.EFAP import wrap_angles
-
-
-def load_hrtf(
-    filename: Union[str, Path],
-) -> Tuple[np.ndarray, np.ndarray, int]:
-    """
-    Read HRTFs from Matlab dictionary file mat
-
-    Parameters
-    ----------
-    filename: str
-        HRTFs file name (.mat)
-
-    Returns
-    -------
-    IR: np.ndarray
-        Array of impulse responses
-    SourcePosition: np.ndarray
-        Array of source positions corresponding to the impulse responses
-    latency_s: int
-        Latency in samples
-    """
-
-    if not filename.exists():
-        raise FileNotFoundError(
-            f"File {filename.name} was not found in dataset folder!"
-        )
-
-    mat_contents = loadmat(filename)
-
-    try:
-        IR = mat_contents["IR"]
-    except KeyError:
-        raise KeyError(f"Key 'IR' not found in .mat file: {filename} !")
-
-    SourcePosition = mat_contents.get("SourcePosition")
-    latency_s = mat_contents.get("latency_s")
-    if latency_s is not None:
-        latency_s = latency_s.astype(np.int32)[0, 0]
-
-    return IR, SourcePosition, latency_s
-
-
-def load_ir(
-    in_fmt: str,
-    out_fmt: str,
-    dataset: Optional[str] = None,
-) -> Tuple[np.ndarray, np.ndarray, int]:
-    """
-    Load IRs for a specified rendering format
-
-    Parameters
-    ----------
-    in_fmt: str
-        Input format
-    out_fmt: str
-        Output format
-    dataset: Optional[str]
-        Name of desired dataset without prefix and suffix
-
-    Returns
-    -------
-    IR: np.ndarray
-        Array of impulse responses
-    SourcePosition: np.ndarray
-        Array of source positions corresponding to the impulse responses
-    latency_smp: int
-        Latency in samples
-    """
-
-    dataset_prefix = None
-    dataset_suffix = None
-
-    if out_fmt.startswith("BINAURAL") and "ROOM" in out_fmt:
-        dataset_prefix = "BRIR"
-        if dataset is None:
-            dataset = "IISofficialMPEG222UC"
-
-        if in_fmt.startswith("MOZART"):
-            dataset_suffix = "FULL"
-        elif in_fmt in CHANNEL_BASED_AUDIO_FORMATS.keys():
-            dataset_suffix = "LS"
-
-    elif out_fmt.startswith("BINAURAL"):
-        dataset_prefix = "HRIR"
-        if dataset is None:
-            dataset = "ORANGE53"
-
-        if in_fmt in OBJECT_BASED_AUDIO_FORMATS.keys() or in_fmt.startswith(
-            "CUSTOM_LS"
-        ):
-            dataset_suffix = "FULL"
-        elif in_fmt in CHANNEL_BASED_AUDIO_FORMATS.keys() and in_fmt != "MONO":
-            dataset_suffix = "LS"
-        elif in_fmt in SCENE_BASED_AUDIO_FORMATS.keys():
-            dataset = "ORANGE53_Dolby"
-            if in_fmt == "SBA1" or in_fmt == "FOA":
-                dataset_suffix = "SBA1"
-                # Use truncated SBA3 dataset if no SBA1 or 2 dataset exists
-                if not (
-                    Path(__file__).parent.joinpath(
-                        f"{dataset_prefix}_{dataset}_{dataset_suffix}.mat"
-                    )
-                ).is_file():
-                    dataset_suffix = "SBA3"
-                    warnings.warn("No SBA1 dataset found -> use truncated SBA3 dataset")
-            elif in_fmt.endswith("2"):
-                dataset_suffix = "SBA2"
-                # Use truncated SBA3 dataset if no SBA1 or 2 dataset exists
-                if not (
-                    Path(__file__).parent.joinpath(
-                        f"{dataset_prefix}_{dataset}_{dataset_suffix}.mat"
-                    )
-                ).is_file():
-                    dataset_suffix = "SBA3"
-                    warnings.warn("No SBA2 dataset found -> use truncated SBA3 dataset")
-            else:
-                dataset_suffix = "SBA3"
-
-    path_dataset = Path(__file__).parent.joinpath(
-        f"{dataset_prefix}_{dataset}_{dataset_suffix}.mat"
-    )
-    IR, SourcePosition, latency_s = load_hrtf(path_dataset)
-
-    if latency_s is not None:
-        latency_smp = latency_s
-    else:
-        latency_smp = int(np.min(np.argmax(np.sum(np.abs(IR), axis=1), axis=0)))
-        warnings.warn(
-            f"No latency of HRTF dataset specified in {path_dataset} file -> computed latency: {latency_smp} sample(s)"
-        )
-
-    if in_fmt.startswith("STEREO"):
-        IR = IR[:, :, :2]  # use L and R channels.
-    elif (
-        in_fmt in CHANNEL_BASED_AUDIO_FORMATS.keys()
-        and not in_fmt.startswith("CUSTOM_LS")
-        and not in_fmt.startswith("MOZART")
-    ):
-        # extract positions from the loudspeaker file
-        in_fmt = fromtype(in_fmt)
-        tmp_fmt = fromtype("LS")
-
-        IR_tmp = IR.copy()
-        IR = np.zeros([IR_tmp.shape[0], IR_tmp.shape[1], in_fmt.num_channels])
-
-        ir_index = 0
-        for i in range(tmp_fmt.num_channels):
-            for j in range(in_fmt.num_channels):
-                if (
-                    tmp_fmt.ls_azi[i] == in_fmt.ls_azi[j]
-                    and tmp_fmt.ls_ele[i] == in_fmt.ls_ele[j]
-                ):
-                    if j != in_fmt.lfe_index[0]:
-                        IR[:, :, ir_index] = IR_tmp[:, :, i]
-                    ir_index += 1
-
-    return IR, SourcePosition, latency_smp
-
-
-def find_ir(
-    SourcePosition: np.ndarray,
-    azi: float,
-    ele: float,
-    num_filter: Optional[int] = None,
-) -> Tuple[np.ndarray, np.ndarray]:
-    """
-    Find HRTF measurement closest to the selected direction
-
-    Parameters
-    ----------
-    SourcePosition: np.ndarray
-        Source IR positions
-    azi: float
-        Desired response azimuth
-    ele: float
-        Desired response elevation
-    num_filter: Optional[int]
-        Number of filters to return, if None return all
-
-    Returns
-    -------
-    i_dir: np.ndarray
-        Indices of nearest SourcePositions
-    dist_sort: np.ndarray
-        Distances corresponding to the indices
-    """
-
-    dist = dist_on_sphere(SourcePosition, azi, ele)
-
-    if num_filter is None:
-        i_dir = np.argsort(dist)
-        dist_sort = np.sort(dist)
-    else:
-        i_dir = np.argsort(dist)[:num_filter]
-        dist_sort = np.sort(dist)[:num_filter]
-
-    return i_dir, dist_sort
-
-
-def dist_on_sphere(
-    positions: np.ndarray,
-    azi: float,
-    ele: float,
-) -> np.ndarray:
-    """
-    Compute great-circle distance
-
-    Parameters
-    ----------
-    positions: np.ndarray
-        Source IR positions
-    azi: float
-        Desired response azimuth
-    ele: float
-        Desired response elevation
-
-    Returns
-    -------
-    dist: np.ndarray
-        Distances from desired point
-    """
-
-    azi, ele = wrap_angles(azi, ele)
-
-    delta_azi = np.deg2rad(np.abs(azi - positions[:, 0]))
-
-    # compute great circle distance
-    a = np.sin(np.deg2rad(positions[:, 1])) * np.sin(np.deg2rad(ele)) + np.cos(
-        np.deg2rad(positions[:, 1])
-    ) * np.cos(np.deg2rad(ele)) * np.cos(delta_azi)
-    if np.max(a) > 1.001 or np.min(a) < -1.001:
-        raise ValueError(
-            f"Absolute distance value larger than one! Min: {np.min(a)}, Max: {np.max(a)}"
-        )
-
-    # limiting to prevent errors in arccos due to numerical inaccuracies
-    a[a > 1] = 1
-    a[a < -1] = -1
-    dist = np.arccos(a)
-
-    return dist
diff --git a/item_generation_scripts/audiotools/binauralobjectrenderer.py b/item_generation_scripts/audiotools/binauralobjectrenderer.py
deleted file mode 100644
index 548c4921..00000000
--- a/item_generation_scripts/audiotools/binauralobjectrenderer.py
+++ /dev/null
@@ -1,652 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
-
-import itertools
-from itertools import repeat
-from typing import Optional, Tuple
-
-import numpy as np
-from scipy.signal import convolve
-
-from item_generation_scripts.audiotools.binaural_datasets.binaural_dataset import (
-    find_ir,
-)
-from item_generation_scripts.audiotools.constants import IVAS_FRAME_LEN_MS
-from item_generation_scripts.audiotools.EFAP import wrap_angles
-from item_generation_scripts.utils import apply_func_parallel
-
-
-def barycentric_weights(
-    azi_deg: np.ndarray,
-    ele_deg: np.ndarray,
-    pos_in: np.ndarray,
-    interp_1d: Optional[bool] = False,
-) -> Tuple[int, int, int]:
-    """
-    Computation of spherical Barycentric weights
-    Implementation based on paper "Spherical Barycentric Coordinates"
-    from T. Langer, A. Belyaev und H. Seidel
-
-    Parameters
-    ----------
-    azi_deg: np.ndarray
-        Azimuthal coordinates of three points that form a triangle in degrees
-    ele_deg: np.ndarray
-        Elevation coordinates of three points that form a triangle in degrees
-    pos_in: np.ndarray
-        Azimuthal and elevation coordinates in degrees for point to compute weights
-    interp_1d: bool
-        1d interpolation between two points
-
-    Returns
-    -------
-    W_1, W_2, W_3: scalar values
-        Barycentric weights for corresponding vertices
-    """
-
-    # check if point is equal to vertex
-    for k in range(3):
-        if azi_deg[k] == pos_in[0] and ele_deg[k] == pos_in[1]:
-            output = np.zeros(3)
-            output[k] = 1
-            return tuple(output)
-
-    pos = np.copy(pos_in)
-
-    pos[0], pos[1] = wrap_angles(pos[0], pos[1])
-
-    # convert rad
-    ele = (
-        -np.deg2rad(ele_deg, dtype="float64") + np.pi / 2
-    )  # different definition of elevation in metadata
-    azi = np.deg2rad(azi_deg, dtype="float64")
-    pos[0] = np.deg2rad(pos[0])
-    pos[1] = -np.deg2rad(pos[1]) + np.pi / 2
-
-    """ spherical barycentric coordinates """
-
-    # convert to cartesian coordinates
-    x = np.sin(ele) * np.cos(azi)
-    y = np.sin(ele) * np.sin(azi)
-    z = np.cos(ele)
-    pos_x = np.sin(pos[1]) * np.cos(pos[0])
-    pos_y = np.sin(pos[1]) * np.sin(pos[0])
-    pos_z = np.cos(pos[1])
-
-    pos_cart = np.array([pos_x, pos_y, pos_z])
-    v_1 = np.array([x[0], y[0], z[0]])
-    v_2 = np.array([x[1], y[1], z[1]])
-    v_3 = np.array([x[2], y[2], z[2]])
-
-    # rotate coordinate system
-    unit = np.array([0, 0, 1])
-    a = np.cross(pos_cart, unit)
-    b = np.dot(pos_cart, unit)
-    a_matrix = np.array([[0, -a[2], a[1]], [a[2], 0, -a[0]], [-a[1], a[0], 0]])
-    if b == -1:
-        rot_matrix = np.eye(3, 3)  # a and b point to opposite directions
-    else:
-        rot_matrix = np.eye(3, 3) + a_matrix + np.dot(a_matrix, a_matrix) / (1 + b)
-
-    v_1 = rot_matrix @ v_1
-    v_2 = rot_matrix @ v_2
-    v_3 = rot_matrix @ v_3
-    # test_vec = rot_matrix @ pos_cart  # should be [0, 0, 1]
-
-    # scale verticies to tangent plane
-    v_1_plane = v_1 / v_1[2]
-    v_2_plane = v_2 / v_2[2]
-    v_3_plane = v_3 / v_3[2]
-    eps = 10**-10
-
-    # compute planar barycentric coordinates
-    denom = (v_2_plane[1] - v_3_plane[1]) * (v_1_plane[0] - v_3_plane[0]) + (
-        v_3_plane[0] - v_2_plane[0]
-    ) * (v_1_plane[1] - v_3_plane[1])
-    # denom is proportional to area of triangle -> when area is zero, use linear 1d interpolation
-    if abs(denom) <= 10**-15:
-        interp_1d = True
-
-    if not interp_1d:
-        W_1_plane = (
-            (v_2_plane[1] - v_3_plane[1]) * (0 - v_3_plane[0])
-            + (v_3_plane[0] - v_2_plane[0]) * (0 - v_3_plane[1])
-        ) / (denom + eps)
-        W_2_plane = (
-            (v_3_plane[1] - v_1_plane[1]) * (0 - v_3_plane[0])
-            + (v_1_plane[0] - v_3_plane[0]) * (0 - v_3_plane[1])
-        ) / (denom + eps)
-        W_3_plane = 1 - W_1_plane - W_2_plane
-    else:
-        v_diff = np.array(
-            [v_1_plane[:-1], v_2_plane[:-1], v_3_plane[:-1]]
-        )  # z entry always one
-        dist_all = np.linalg.norm(v_diff, axis=1)
-        v_diff_norm = np.divide(v_diff, dist_all[:, None])
-        dot_v_ind = np.array(
-            [[0, 1], [1, 2], [2, 0]]
-        )  # the three possible combinations of points
-        # compute dot product between all vertices to find pairs that lie in opposite directions w.r.t. the point
-        # in this case the dot product is -1 (due to normalization)
-        dot = np.empty(3)
-        k = 0
-        for ind_i, ind_j in dot_v_ind:
-            dot[k] = np.dot(v_diff_norm[ind_i], v_diff_norm[ind_j])
-            k += 1
-
-        margin = 10**-5
-        indices_minus_one = np.array(np.abs(dot + 1) < margin)
-        if indices_minus_one.any():  # test if one entry is -1
-            v_ind = dot_v_ind[indices_minus_one]
-            # use vertex pair with smalles distance from origin (current position)
-            if np.shape(v_ind)[0] >= 2:
-                used_vertices = v_ind[
-                    np.argmin(
-                        np.array([sum(dist_all[v_ind[0]]), sum(dist_all[v_ind[1]])])
-                    )
-                ]
-            else:
-                used_vertices = v_ind[0]
-            dist = dist_all[used_vertices[0]] / sum(dist_all[used_vertices])
-            if 0 in used_vertices and 1 in used_vertices:
-                W_1_plane = 1 - dist
-                W_2_plane = dist
-                W_3_plane = 0
-            elif 1 in used_vertices and 2 in used_vertices:
-                W_1_plane = 0
-                W_2_plane = 1 - dist
-                W_3_plane = dist
-            elif 2 in used_vertices and 0 in used_vertices:
-                W_1_plane = dist
-                W_2_plane = 0
-                W_3_plane = 1 - dist
-            else:
-                raise ValueError("problem in 1d interpolation")
-        else:
-            # point does not lie on line spanned by two of the points
-            W_1_plane = -1
-            W_2_plane = -1
-            W_3_plane = -1
-
-    # compute spherical weights from planar weights
-    W_1 = W_1_plane * np.dot(v_1, v_1_plane)
-    W_2 = W_2_plane * np.dot(v_2, v_2_plane)
-    W_3 = W_3_plane * np.dot(v_3, v_3_plane)
-
-    # avoid rejection of triangles due to numerical errors since point lies on edge of tiangle
-    threshold_error = -1 * 10**-8
-    if threshold_error < W_1 < 0:
-        W_1 = 0
-    if threshold_error < W_2 < 0:
-        W_2 = 0
-    if threshold_error < W_3 < 0:
-        W_3 = 0
-
-    return W_1, W_2, W_3
-
-
-def get_tri_weights(
-    pos: np.ndarray,
-    SourcePosition: np.ndarray,
-) -> Tuple[np.ndarray, np.ndarray]:
-    """
-    Finds suitable triangle of data points on surface in which the defined point lies
-
-    Parameters
-    ----------
-    pos: np.ndarray
-        Point of interest given as [azimutahal, elevation]
-    SourcePosition: np.ndarray
-        Positions of the source in the measurements in IR
-
-    Returns
-    -------
-    combination_vertices: np.ndarray
-        Indices of the three vertices in SourcePosition
-    W: np.ndarray
-        Barycentric weights of point in triangle;
-        if negative, no suitable triangle was found
-    """
-
-    W_1, W_2, W_3 = -1, -1, -1
-    index_triangle = 3
-    # get indices of source positions sorted by distance on the plane from pos
-    index_vertices, _ = find_ir(SourcePosition, pos[0], pos[1])
-    pos = np.array(wrap_angles(pos[0], pos[1]))
-    combination_vertices = None
-    while W_1 < 0 or W_2 < 0 or W_3 < 0:
-        if (
-            SourcePosition[index_vertices[0], 0] == pos[0]
-            and SourcePosition[index_vertices[0], 1] == pos[1]
-        ):
-            # if position is position in data set take first triangle that incudes the point
-            combination_vertices = index_vertices[:3]
-            W_1, W_2, W_3 = (1, 0, 0)
-            break
-        index_HRIR = index_vertices[:index_triangle]  # get nearest positions
-        y_ele_all = SourcePosition[index_HRIR, 1]
-        if pos[1] > np.max(y_ele_all) or pos[1] < np.min(y_ele_all):
-            # no need to compute weights since all possible triangles lie completely above or below point
-            # attention: this can be problematic if no point is available at [0, +-90]
-            pass
-        else:
-            # test all triangle combinations with new point
-            for combination_vertices_tmp in itertools.combinations(index_HRIR[:-1], 2):
-                combination_vertices = np.concatenate(
-                    (index_HRIR[-1, None], combination_vertices_tmp), axis=0
-                )
-
-                x_azi = SourcePosition[combination_vertices, 0]
-                y_ele = SourcePosition[combination_vertices, 1]
-                W_1, W_2, W_3 = barycentric_weights(x_azi, y_ele, pos)
-                if W_1 >= 0 and W_2 >= 0 and W_3 >= 0:
-                    # found suitable triangle
-                    break
-        index_triangle += 1
-        if index_triangle > 30:
-            # stop after too many iterations
-            return np.array(combination_vertices), np.array([-1, -1, -1])
-
-    W = np.array([W_1, W_2, W_3])
-    return np.array(combination_vertices), W
-
-
-def interpolate_2d(
-    azi_in: np.ndarray,
-    ele_in: np.ndarray,
-    values: np.ndarray,
-    pos: np.ndarray,
-    interp_1d: Optional[bool] = False,
-    weights: Optional[np.ndarray] = None,
-    ghost: Optional[list[bool]] = None,
-    SourcePosition: Optional[np.ndarray] = None,
-    IR: Optional[np.ndarray] = None,
-    phase: Optional[bool] = False,
-) -> np.ndarray:
-    """
-    Compute HRIR for point on surface spanned by three points via barycentric coordinates
-
-    Parameters
-    ----------
-    azi_in: np.ndarray
-        Azimuthal coordinates of three points that form a triangle in degrees
-    ele_in: np.ndarray
-        Elevation coordinates of three points that form a triangle in degrees
-    values: np.ndarray
-        Values to interpolate, here either HRIRs or magnitude or phase of HRTFs
-    pos: np.ndarray
-        Position of desired interpolation value
-    interp_1d: bool
-        1d interpolation between two points
-    weights: tuple
-        If barycentric weights are already known these values are used
-    ghost: list of bool
-        If north and/or south pole is ghost source
-    SourcePosition: np.ndarray
-        Only necessary if at least one element in ghost is true
-    IR: np.ndarray
-        Only necessary if at least one element in ghost is true
-    phase: bool
-        If interpolated values are phases and should be wrapped
-
-    Returns
-    -------
-    HRIR: np.ndarray
-        Interpolated value at point pos
-    """
-
-    if ghost is None:
-        ghost = [False, False]
-
-    if weights is None:
-        W_1, W_2, W_3 = barycentric_weights(
-            azi_in, ele_in, pos, interp_1d
-        )  # compute barycentric weights
-    else:
-        (W_1, W_2, W_3) = weights
-
-    if (
-        W_1 + W_2 + W_3 > 1.5
-    ):  # on sphere sum of weights is not necessarily equal to one!
-        raise ValueError(
-            f"Sum of positive barycentric weights larger than expected: {W_1 +W_2 +W_3}"
-        )
-
-    threshold_error = -1 * 10**-10
-    if W_1 < threshold_error or W_2 < threshold_error or W_3 < threshold_error:
-        raise ValueError("Point lies outside of triangle! No interpolation possible")
-
-    # do some phase unwrapping
-    if phase:
-        values = np.unwrap(values, axis=1)
-
-    # treat potential ghost sources at the north and south pole
-    if (ghost[0] and 90 in ele_in) or (ghost[1] and -90 in ele_in):
-        if SourcePosition is None or IR is None:
-            raise ValueError(
-                "Source positions and IRs are required in interpolation if ghost source is used"
-            )
-        ele_ghost = []
-        additional_term = 0
-        weights_copy = np.copy(weights)
-        if ghost[0] and 90 in ele_in:
-            ele_ghost.append(90)
-        if ghost[1] and -90 in ele_in:
-            ele_ghost.append(-90)
-        for ele_g in ele_ghost:
-            ind_dist, dist = find_ir(SourcePosition[: -len(ele_ghost)], 0, ele_g)
-            ind_dist = ind_dist[dist == dist[0]]
-            weight_spread = weights_copy[ele_in == ele_g] / len(ind_dist)
-            weights_copy[ele_in == ele_g] = 0
-            additional_term += np.sum(IR[:, ind_dist], axis=1) * weight_spread
-
-        HRIR = (
-            values[:, 0] * W_1
-            + values[:, 1] * W_2
-            + values[:, 2] * W_3
-            + additional_term
-        )
-
-    else:
-        HRIR = (
-            values[:, 0] * W_1 + values[:, 1] * W_2 + values[:, 2] * W_3
-        )  # apply weights
-
-    return HRIR
-
-
-def add_ghost_speaker_bary(
-    SourcePosition: np.ndarray,
-    IR: np.ndarray,
-) -> Tuple[list[bool], np.ndarray, np.ndarray]:
-    """
-    Adds a ghost speaker at the poles if necessary and indicates result by bool values
-
-    Parameters
-    ----------
-    SourcePosition: np.ndarray
-        All source positions
-    IR: np.ndarray
-        IRs at corresponding source positions
-
-    Returns
-    -------
-    ghost_pos: list of bool
-        If entry is True a ghost speaker is introduced at the north or south pole, respectively
-    SourcePosition: np.ndarray
-        All source positions plus poles if ghost_pos is True
-    IR: np.ndarray
-        IRs at corresponding source positions
-    """
-
-    ghost_pos = [False, False]
-    if 90 not in SourcePosition[:, 1]:
-        # if north pole is not in dataset add it
-        ghost_pos[0] = True
-        pole = np.array([0, 90, 1])
-        SourcePosition = np.concatenate((SourcePosition, pole[None, :]), axis=0)
-        IR = np.concatenate((IR, np.zeros((*np.shape(IR)[:2], 1))), axis=2)
-    if -90 not in SourcePosition[:, 1]:
-        # if south pole is not in dataset add it
-        ghost_pos[1] = True
-        pole = np.array([0, -90, 1])
-        SourcePosition = np.concatenate((SourcePosition, pole[None, :]), axis=0)
-        IR = np.concatenate((IR, np.zeros((*np.shape(IR)[:2], 1))), axis=2)
-
-    return ghost_pos, SourcePosition, IR
-
-
-def binaural_fftconv_framewise(
-    x: np.ndarray,
-    IR: np.ndarray,
-    SourcePosition: np.ndarray,
-    azi: Optional[np.ndarray] = None,
-    ele: Optional[np.ndarray] = None,
-    frame_len: Optional[int] = (IVAS_FRAME_LEN_MS // 4) * 48,
-) -> np.ndarray:
-    """
-    Binauralization using fft convolution with frame-wise processing
-    supports rotation on trajectories with interpolation between measured Source
-    positions, reimplemented roughly along the lines of ConvBinauralRenderer.m
-
-    Parameters
-    ----------
-    x: np.ndarray
-        Input multi-channel array
-    IR: np.ndarray
-        HRIRs array
-    SourcePosition: np.ndarray
-        Positions of the source in the measurements in IR
-    azi: np.ndarray
-        Azimuth angles for all frames
-    ele: np.ndarray
-        Elevation angles for all frames
-    frame_len: int
-        Frame length, optional, default = (IVAS_FRAME_LEN_MS // 4) * 48000
-
-    Returns
-    -------
-    y: np.ndarray
-        Output binaural signal array
-    """
-
-    sig_len = x.shape[0]
-    N_frames = int(
-        sig_len / frame_len
-    )  # TODO add ceil function for non-integer frame length multiples
-    num_points_interp = 3  # interpolation in triangle
-
-    N_HRIR_taps = IR.shape[0]
-
-    if azi is None or ele is None:
-        azi = np.repeat([0.0], N_frames)
-        ele = np.repeat([0.0], N_frames)
-    elif len(azi) < N_frames or len(ele) < N_frames:
-        azi = np.concatenate(
-            [np.repeat(azi, N_frames // len(azi)), azi[: N_frames % len(azi)]]
-        )
-        ele = np.concatenate(
-            [np.repeat(ele, N_frames // len(ele)), ele[: N_frames % len(ele)]]
-        )
-
-    indices_HRIR = np.empty([N_frames, num_points_interp], dtype=int)
-    IR_2d = np.empty((N_frames, N_HRIR_taps, 2, num_points_interp))
-    Bary_weights = np.empty((N_frames, 3))
-
-    # find three points to form a triangle for interpolation
-    # test if point lies within triangle spanned by these points by checking the signas of barycentric coordinates
-    # if all weights are >= 0 the point lies within the triangle
-    for index in range(np.shape(SourcePosition)[0]):
-        SourcePosition[index, 0:2] = np.array(
-            wrap_angles(SourcePosition[index, 0], SourcePosition[index, 1])
-        )
-
-    # add ghost speaker to poles if necessary
-    ghost_pos, SourcePosition, IR = add_ghost_speaker_bary(SourcePosition, IR)
-    for i_frame in range(N_frames):
-        if (
-            i_frame
-            and azi[i_frame] == azi[i_frame - 1]
-            and ele[i_frame] == ele[i_frame - 1]
-        ):
-            IR_2d[i_frame] = IR_2d[i_frame - 1]
-            indices_HRIR[i_frame] = indices_HRIR[i_frame - 1]
-            Bary_weights[i_frame] = Bary_weights[i_frame - 1]
-            continue
-        pos = np.array([azi[i_frame], ele[i_frame]])
-        combination_vertices, W = get_tri_weights(pos, SourcePosition)
-        if (W < 0).all():
-            raise ValueError("No suitable triangle found in frame " + str(i_frame))
-        IR_2d[i_frame] = IR[:, :, np.array(combination_vertices)]
-        indices_HRIR[i_frame] = combination_vertices
-        Bary_weights[i_frame] = W
-
-    T_rev = frame_len + N_HRIR_taps - 1
-    N_rev = int(np.ceil(T_rev / frame_len))
-
-    fade_in = np.arange(frame_len) / (frame_len - 1)
-    fade_in = fade_in[:, np.newaxis]
-    fade_out = 1.0 - fade_in
-
-    # compute both ears in parallel
-    i_ear = list(range(2))
-    result = apply_func_parallel(
-        render_ear,
-        zip(
-            i_ear,
-            repeat(frame_len),
-            repeat(N_frames),
-            repeat(N_rev),
-            repeat(T_rev),
-            repeat(fade_in),
-            repeat(fade_out),
-            repeat(x),
-            repeat(sig_len),
-            repeat(N_HRIR_taps),
-            repeat(azi),
-            repeat(ele),
-            repeat(SourcePosition),
-            repeat(IR_2d),
-            repeat(Bary_weights),
-            repeat(ghost_pos),
-            repeat(IR),
-            repeat(indices_HRIR),
-        ),
-        None,
-        "mp",
-        False,
-    )
-
-    y = np.stack(result, axis=1)
-
-    return y[0:sig_len]
-
-
-def render_ear(
-    i_ear,
-    frame_len,
-    N_frames,
-    N_rev,
-    T_rev,
-    fade_in,
-    fade_out,
-    x,
-    sig_len,
-    N_HRIR_taps,
-    azi,
-    ele,
-    SourcePosition,
-    IR_2d,
-    Bary_weights,
-    ghost_pos,
-    IR,
-    indices_HRIR,
-) -> np.ndarray:
-    # function to process one ear used in multiprocessing
-    G = np.empty((N_frames, N_HRIR_taps))
-
-    for frame in range(N_frames):
-        pos = np.array([azi[frame], ele[frame]])
-        # Interpolation of time-domain signals
-        G[frame] = interpolate_2d(
-            SourcePosition[indices_HRIR[frame], 0],
-            SourcePosition[indices_HRIR[frame], 1],
-            IR_2d[frame, :, i_ear],
-            pos,
-            weights=Bary_weights[frame],
-            ghost=ghost_pos,
-            SourcePosition=SourcePosition,
-            IR=IR[:, i_ear],
-        )
-
-    # frame wise parallel computation slow (many frames, small computational load per frame)
-    i_frame = list(range(N_frames))
-    result = apply_func_parallel(
-        convolve_frame,
-        zip(
-            i_frame,
-            repeat(frame_len),
-            repeat(N_frames),
-            repeat(N_rev),
-            repeat(T_rev),
-            repeat(i_ear),
-            repeat(fade_in),
-            repeat(fade_out),
-            repeat(G),
-            repeat(x),
-            repeat(sig_len),
-            repeat(N_HRIR_taps),
-        ),
-        None,
-        "mt",
-        False,
-    )
-
-    return np.hstack(result)
-
-
-def convolve_frame(
-    i_frame,
-    frame_len,
-    N_frames,
-    N_rev,
-    T_rev,
-    i_ear,
-    fade_in,
-    fade_out,
-    G,
-    x,
-    sig_len,
-    N_HRIR_taps,
-) -> np.ndarray:
-    # function to process one frame used in multiprocessing
-    i1 = i_frame * frame_len
-    i2 = (i_frame + 1) * frame_len
-
-    y0 = np.zeros([2, sig_len + N_HRIR_taps - 1, 2])
-
-    G0 = G[i_frame]
-    G1 = G[min(i_frame + 1, N_frames - 1)]
-
-    for j_frame in range(max(0, i_frame - N_rev), min(i_frame + 1, N_frames)):
-        j1 = j_frame * frame_len
-        j2 = (j_frame + 1) * frame_len
-        j2p = j1 + T_rev
-
-        y0[0, j1:j2p, i_ear] += convolve(np.squeeze(x[j1:j2]), G0)
-        y0[1, j1:j2p, i_ear] += convolve(np.squeeze(x[j1:j2]), G1)
-
-    y_frame = (
-        np.squeeze(fade_out) * y0[0, i1:i2, i_ear]
-        + np.squeeze(fade_in) * y0[1, i1:i2, i_ear]
-    )
-    return y_frame
diff --git a/item_generation_scripts/audiotools/constants.py b/item_generation_scripts/audiotools/constants.py
deleted file mode 100644
index c3af9d29..00000000
--- a/item_generation_scripts/audiotools/constants.py
+++ /dev/null
@@ -1,704 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
-
-import numpy as np
-
-BINAURAL_AUDIO_FORMATS = {
-    "BINAURAL": {
-        "num_channels": 2,
-    },
-    "BINAURAL_ROOM": {
-        "num_channels": 2,
-    },
-}
-
-BINAURAL_LFE_GAIN = 10 ** (5.5 / 20)
-
-LFE_INDEX_DEFAULT = 3
-
-LS_AZI_MONO = [0]
-LS_ELE_MONO = [0]
-
-LS_AZI_STEREO = [30, -30]
-LS_ELE_STEREO = [0, 0]
-
-LS_AZI_CICP6 = [30, -30, 0, 0, 110, -110]
-LS_ELE_CICP6 = [0, 0, 0, 0, 0, 0]
-
-LS_AZI_CICP12 = [30, -30, 0, 0, 110, -110, 135, -135]
-LS_ELE_CICP12 = [0, 0, 0, 0, 0, 0, 0, 0]
-
-LS_AZI_CICP14 = [30, -30, 0, 0, 110, -110, 30, -30]
-LS_ELE_CICP14 = [0, 0, 0, 0, 0, 0, 35, 35]
-
-LS_AZI_CICP16 = [30, -30, 0, 0, 110, -110, 30, -30, 110, -110]
-LS_ELE_CICP16 = [0, 0, 0, 0, 0, 0, 35, 35, 35, 35]
-
-LS_AZI_CICP19 = [30, -30, 0, 0, 135, -135, 90, -90, 30, -30, 135, -135]
-LS_ELE_CICP19 = [0, 0, 0, 0, 0, 0, 0, 0, 35, 35, 35, 35]
-
-
-CHANNEL_BASED_AUDIO_FORMATS = {
-    "MONO": {
-        "num_channels": 1,
-        "ls_azi": LS_AZI_MONO,
-        "ls_ele": LS_ELE_MONO,
-        "lfe_index": [],
-    },
-    "STEREO": {
-        "num_channels": 2,
-        "ls_azi": LS_AZI_STEREO,
-        "ls_ele": LS_ELE_STEREO,
-        "lfe_index": [],
-    },
-    "5_1": {
-        "num_channels": 6,
-        "ls_azi": LS_AZI_CICP6,
-        "ls_ele": LS_ELE_CICP6,
-        "lfe_index": [LFE_INDEX_DEFAULT],
-    },
-    "5_1_2": {
-        "num_channels": 8,
-        "ls_azi": LS_AZI_CICP14,
-        "ls_ele": LS_ELE_CICP14,
-        "lfe_index": [LFE_INDEX_DEFAULT],
-    },
-    "5_1_4": {
-        "num_channels": 10,
-        "ls_azi": LS_AZI_CICP16,
-        "ls_ele": LS_ELE_CICP16,
-        "lfe_index": [LFE_INDEX_DEFAULT],
-    },
-    "7_1": {
-        "num_channels": 8,
-        "ls_azi": LS_AZI_CICP12,
-        "ls_ele": LS_ELE_CICP12,
-        "lfe_index": [LFE_INDEX_DEFAULT],
-    },
-    "7_1_4": {
-        "num_channels": 12,
-        "ls_azi": LS_AZI_CICP19,
-        "ls_ele": LS_ELE_CICP19,
-        "lfe_index": [LFE_INDEX_DEFAULT],
-    },
-    "LS": {
-        "num_channels": 15,
-        "ls_azi": [
-            30,
-            -30,
-            0,
-            135,
-            -135,
-            110,
-            -110,
-            90,
-            -90,
-            30,
-            -30,
-            110,
-            -110,
-            135,
-            -135,
-        ],
-        "ls_ele": [0, 0, 0, 0, 0, 0, 0, 0, 0, 35, 35, 35, 35, 35, 35],
-        "lfe_index": [],
-    },
-    "MOZART": {
-        "num_channels": 30,
-        "ls_azi": [
-            0,
-            0,
-            135,
-            -135,
-            30,
-            -30,
-            180,
-            0,
-            90,
-            -90,
-            45,
-            -45,
-            0,
-            0,
-            135,
-            -135,
-            90,
-            -90,
-            180,
-            0,
-            45,
-            -45,
-            60,
-            -60,
-            110,
-            -110,
-            30,
-            -30,
-            110,
-            -110,
-        ],
-        "ls_ele": [
-            0,
-            0,
-            0,
-            0,
-            0,
-            0,
-            0,
-            0,
-            0,
-            0,
-            35,
-            35,
-            35,
-            90,
-            35,
-            35,
-            35,
-            35,
-            35,
-            -15,
-            -15,
-            -15,
-            0,
-            0,
-            0,
-            0,
-            35,
-            35,
-            35,
-            35,
-        ],
-        "lfe_index": [1, 7],
-    },
-    "CUSTOM_LS": {
-        "num_channels": -1,
-        "ls_azi": None,
-        "ls_ele": None,
-        "lfe_index": None,
-    },
-}
-
-# Support a variety of names for multichannel configs
-CHANNEL_BASED_AUDIO_ALTNAMES = {
-    # 5_1
-    51: "5_1",  # YAML by default will interpret underscore delimited numbers as integers, similar to python
-    "5d1": "5_1",
-    "5.1": "5_1",
-    "CICP6": "5_1",
-    # 7_1
-    71: "7_1",
-    "7d1": "7_1",
-    "7.1": "7_1",
-    "CICP12": "7_1",
-    # 5_1_2
-    512: "5_1_2",
-    "5d1p2": "5_1_2",
-    "5.1+2": "5_1_2",
-    "5.1.2": "5_1_2",
-    "CICP14": "5_1_2",
-    # 5_1_4
-    514: "5_1_4",
-    "5d1p4": "5_1_4",
-    "5.1+4": "5_1_4",
-    "5.1.4": "5_1_4",
-    "CICP16": "5_1_4",
-    # 7_1_4
-    714: "7_1_4",
-    "7d1p4": "7_1_4",
-    "7.1+4": "7_1_4",
-    "7.1.4": "7_1_4",
-    "CICP19": "7_1_4",
-}
-
-METADATA_ASSISTED_SPATIAL_AUDIO_FORMATS = {
-    "MASA1": {
-        "num_channels": 1,
-    },
-    "MASA2": {
-        "num_channels": 2,
-    },
-}
-OBJECT_BASED_AUDIO_FORMATS = {
-    "ISM1": {
-        "num_channels": 1,
-    },
-    "ISM2": {
-        "num_channels": 2,
-    },
-    "ISM3": {
-        "num_channels": 3,
-    },
-    "ISM4": {
-        "num_channels": 4,
-    },
-}
-
-
-SCENE_BASED_AUDIO_FORMATS = {
-    "FOA": {
-        "num_channels": 4,
-        "is_planar": False,
-    },
-    "HOA2": {
-        "num_channels": 9,
-        "is_planar": False,
-    },
-    "HOA3": {
-        "num_channels": 16,
-        "is_planar": False,
-    },
-    "PLANARFOA": {
-        "num_channels": 4,
-        "is_planar": True,
-    },
-    "PLANARHOA2": {
-        "num_channels": 9,
-        "is_planar": True,
-    },
-    "PLANARHOA3": {
-        "num_channels": 16,
-        "is_planar": True,
-    },
-    "SBA1": {
-        "num_channels": 4,
-        "is_planar": False,
-    },
-    "SBA2": {
-        "num_channels": 9,
-        "is_planar": False,
-    },
-    "SBA3": {
-        "num_channels": 16,
-        "is_planar": False,
-    },
-}
-
-SCENE_METADATA_FORMATS = {"META"}
-
-AUDIO_FORMATS = [
-    BINAURAL_AUDIO_FORMATS,
-    CHANNEL_BASED_AUDIO_FORMATS,
-    METADATA_ASSISTED_SPATIAL_AUDIO_FORMATS,
-    OBJECT_BASED_AUDIO_FORMATS,
-    SCENE_BASED_AUDIO_FORMATS,
-]
-
-
-IVAS_FRAME_LEN_MS = 20
-
-IVAS_CICPX_TO_MONO = np.array(
-    [
-        [
-            1,
-            1,
-            1,
-            1,
-            0.79999995,
-            0.79999995,
-            0.79999995,
-            0.79999995,
-            0.849999964,
-            0.849999964,
-            0.849999964,
-            0.849999964,
-        ]
-    ]
-).T
-
-IVAS_CICPX_TO_STEREO = np.array(
-    [
-        [1, 0],
-        [0, 1],
-        [np.sqrt(0.5), np.sqrt(0.5)],
-        [np.sqrt(0.5), np.sqrt(0.5)],
-        [0.79999995, 0],
-        [0, 0.79999995],
-        [0.79999995, 0],
-        [0, 0.79999995],
-        [0.849999964, 0],
-        [0, 0.849999964],
-        [0.849999964, 0],
-        [0, 0.849999964],
-    ]
-)
-
-# downmix matrices
-IVAS_CICP12_TO_6 = np.zeros(8 * 6)
-IVAS_CICP12_TO_6[[0, 7, 14, 21, 28, 35, 40, 47]] = 1
-IVAS_CICP12_TO_6 = IVAS_CICP12_TO_6.reshape(8, 6)
-
-IVAS_CICP14_TO_6 = np.zeros(8 * 6)
-IVAS_CICP14_TO_6[[0, 7, 14, 21, 28, 35]] = 1
-IVAS_CICP14_TO_6[[36, 43]] = 0.849999964
-IVAS_CICP14_TO_6 = IVAS_CICP14_TO_6.reshape(8, 6)
-
-IVAS_CICP16_TO_6 = np.zeros(10 * 6)
-IVAS_CICP16_TO_6[[0, 7, 14, 21, 28, 35]] = 1
-IVAS_CICP16_TO_6[[36, 43, 52, 59]] = 0.849999964
-IVAS_CICP16_TO_6 = IVAS_CICP16_TO_6.reshape(10, 6)
-
-IVAS_CICP16_TO_12 = np.zeros(10 * 8)
-IVAS_CICP16_TO_12[[0, 9, 18, 27, 36, 45]] = 1
-IVAS_CICP16_TO_12[[48, 57, 68, 77]] = 0.849999964
-IVAS_CICP16_TO_12 = IVAS_CICP16_TO_12.reshape(10, 8)
-
-IVAS_CICP16_TO_14 = np.zeros(10 * 8)
-IVAS_CICP16_TO_14[[0, 9, 18, 27, 36, 45, 54, 63]] = 1
-IVAS_CICP16_TO_14[[68, 77]] = 0.849999964
-IVAS_CICP16_TO_14 = IVAS_CICP16_TO_14.reshape(10, 8)
-
-IVAS_CICP19_TO_6 = np.zeros(12 * 6)
-IVAS_CICP19_TO_6[[0, 7, 14, 21, 28, 35]] = 1
-IVAS_CICP19_TO_6[[36, 43]] = 0.367322683
-IVAS_CICP19_TO_6[[48, 55, 64, 71]] = 0.849999964
-IVAS_CICP19_TO_6[[40, 47]] = 0.930093586
-IVAS_CICP19_TO_6 = IVAS_CICP19_TO_6.reshape(12, 6)
-
-IVAS_CICP19_TO_12 = np.zeros(12 * 8)
-IVAS_CICP19_TO_12[[0, 9, 18, 27, 38, 47]] = 1
-IVAS_CICP19_TO_12[[48, 57]] = 0.367322683
-IVAS_CICP19_TO_12[[64, 73, 84, 93]] = 0.849999964
-IVAS_CICP19_TO_12[[52, 61]] = 0.930093586
-IVAS_CICP19_TO_12 = IVAS_CICP19_TO_12.reshape(12, 8)
-
-IVAS_CICP19_TO_14 = np.zeros(12 * 8)
-IVAS_CICP19_TO_14[[0, 9, 18, 27, 36, 45, 70, 79]] = 1
-IVAS_CICP19_TO_14[[48, 57]] = 0.367322683
-IVAS_CICP19_TO_14[[84, 93]] = 0.849999964
-IVAS_CICP19_TO_14[[52, 61]] = 0.930093586
-IVAS_CICP19_TO_14 = IVAS_CICP19_TO_14.reshape(12, 8)
-
-IVAS_CICP19_TO_16 = np.zeros(12 * 10)
-IVAS_CICP19_TO_16[[0, 11, 22, 33, 44, 55, 86, 97, 108, 119]] = 1
-IVAS_CICP19_TO_16[[60, 71]] = 0.367322683
-IVAS_CICP19_TO_16[[64, 75]] = 0.930093586
-IVAS_CICP19_TO_16 = IVAS_CICP19_TO_16.reshape(12, 10)
-
-# upmix matrices
-IVAS_MONO_TO_CICPX = np.zeros([1, 12])
-IVAS_MONO_TO_CICPX[0, 2] = 1
-
-IVAS_STEREO_TO_CICPX = np.zeros([2, 12])
-IVAS_STEREO_TO_CICPX[0, 0] = 1
-IVAS_STEREO_TO_CICPX[1, 1] = 1
-
-IVAS_CICP12_TO_14 = np.zeros(8 * 8)
-IVAS_CICP12_TO_14[[0, 9, 18, 27, 36, 45, 52, 61]] = 1
-IVAS_CICP12_TO_14 = IVAS_CICP12_TO_14.reshape(8, 8)
-
-IVAS_CICP12_TO_16 = np.zeros(8 * 10)
-IVAS_CICP12_TO_16[[0, 11, 22, 33, 44, 55, 64, 75]] = 1
-IVAS_CICP12_TO_16 = IVAS_CICP12_TO_16.reshape(8, 10)
-
-IVAS_CICP12_TO_19 = np.zeros(8 * 12)
-IVAS_CICP12_TO_19[[0, 13, 26, 39, 54, 67, 76, 89]] = 1
-IVAS_CICP12_TO_19 = IVAS_CICP12_TO_19.reshape(8, 12)
-
-IVAS_CICP14_TO_19 = np.zeros(8 * 12)
-IVAS_CICP14_TO_19[[0, 13, 26, 39, 52, 65, 80, 93]] = 1
-IVAS_CICP14_TO_19 = IVAS_CICP14_TO_19.reshape(8, 12)
-
-IVAS_CICP16_TO_19 = np.zeros(10 * 12)
-IVAS_CICP16_TO_19[[0, 13, 26, 39, 52, 65, 80, 93, 106, 119]] = 1
-IVAS_CICP16_TO_19 = IVAS_CICP16_TO_19.reshape(10, 12)
-
-# mapping dict
-IVAS_MC_CONVERSION = {
-    "MONO": {
-        # upmix
-        "5_1": IVAS_MONO_TO_CICPX[:, :6],
-        "7_1": IVAS_MONO_TO_CICPX[:, :8],
-        "5_1_2": IVAS_MONO_TO_CICPX[:, :8],
-        "5_1_4": IVAS_MONO_TO_CICPX[:, :10],
-        "7_1_4": IVAS_MONO_TO_CICPX[:, :12],
-    },
-    "STEREO": {
-        # upmix
-        "5_1": IVAS_STEREO_TO_CICPX[:, :6],
-        "7_1": IVAS_STEREO_TO_CICPX[:, :8],
-        "5_1_2": IVAS_STEREO_TO_CICPX[:, :8],
-        "5_1_4": IVAS_STEREO_TO_CICPX[:, :10],
-        "7_1_4": IVAS_STEREO_TO_CICPX[:, :12],
-    },
-    "5_1": {
-        # downmix
-        "MONO": IVAS_CICPX_TO_MONO[:6, :],
-        "STEREO": IVAS_CICPX_TO_STEREO[:6, :],
-        # upmix
-        "7_1": np.pad(np.eye(6), [[0, 0], [0, 2]]),
-        "5_1_2": np.pad(np.eye(6), [[0, 0], [0, 2]]),
-        "5_1_4": np.pad(np.eye(6), [[0, 0], [0, 4]]),
-        "7_1_4": np.pad(np.eye(6), [[0, 0], [0, 6]]),
-    },
-    "7_1": {
-        # downmix
-        "MONO": IVAS_CICPX_TO_MONO[:8, :],
-        "STEREO": IVAS_CICPX_TO_STEREO[:8, :],
-        "5_1": IVAS_CICP12_TO_6,
-        # upmix
-        "5_1_2": IVAS_CICP12_TO_14,
-        "5_1_4": IVAS_CICP12_TO_16,
-        "7_1_4": IVAS_CICP12_TO_19,
-    },
-    "5_1_2": {
-        # downmix
-        "MONO": np.vstack([IVAS_CICPX_TO_MONO[:6, :], IVAS_CICPX_TO_MONO[-2:, :]]),
-        "STEREO": np.vstack(
-            [IVAS_CICPX_TO_STEREO[:6, :], IVAS_CICPX_TO_STEREO[-2:, :]]
-        ),
-        "5_1": IVAS_CICP14_TO_6,
-        "7_1": np.pad(IVAS_CICP14_TO_6, [[0, 0], [0, 2]]),
-        # upmix
-        "5_1_4": np.pad(np.eye(8), [[0, 0], [0, 2]]),
-        "7_1_4": IVAS_CICP14_TO_19,
-    },
-    "5_1_4": {
-        # downmix
-        "MONO": np.vstack([IVAS_CICPX_TO_MONO[:6, :], IVAS_CICPX_TO_MONO[-4:, :]]),
-        "STEREO": np.vstack(
-            [IVAS_CICPX_TO_STEREO[:6, :], IVAS_CICPX_TO_STEREO[-4:, :]]
-        ),
-        "5_1": IVAS_CICP16_TO_6,
-        "7_1": IVAS_CICP16_TO_12,
-        "5_1_2": IVAS_CICP16_TO_14,
-        # upmix
-        "7_1_4": IVAS_CICP16_TO_19,
-    },
-    "7_1_4": {
-        # downmix
-        "MONO": IVAS_CICPX_TO_MONO,
-        "STEREO": IVAS_CICPX_TO_STEREO,
-        "5_1": IVAS_CICP19_TO_6,
-        "7_1": IVAS_CICP19_TO_12,
-        "5_1_2": IVAS_CICP19_TO_14,
-        "5_1_4": IVAS_CICP19_TO_16,
-    },
-}
-
-# LFE 120 Hz LPF filter coefficients
-IVAS_LPF_4_BUTTER_48K_SOS = np.array(
-    [
-        [
-            5.12617881476274e-09,
-            1.02523584294987e-08,
-            5.12617879059970e-09,
-            1,
-            -1.96875982668433,
-            0.969044914826862,
-        ],
-        [
-            1,
-            1.99999984394358,
-            1.00000000471366,
-            1,
-            -1.98677297369091,
-            0.987060670205863,
-        ],
-    ]
-)
-
-T_DESIGN_11_AZI = np.array(
-    [
-        132.927291884332,
-        -83.9349499672527,
-        8.47410038634525,
-        -113.340833834572,
-        -103.265909909537,
-        -33.2370360923825,
-        21.8564347471830,
-        -156.539486489880,
-        -64.2647531387317,
-        165.779530068738,
-        -25.2028339893249,
-        -97.0037973959711,
-        27.8546391256925,
-        153.214218975132,
-        -155.061608694663,
-        -11.8421354925543,
-        80.5387312016125,
-        -42.0561606270165,
-        -31.2233262205060,
-        38.8379041944063,
-        93.7606877469492,
-        -84.7560200078398,
-        7.75536818082863,
-        -122.276883381108,
-        46.8012705252113,
-        -24.7686335284573,
-        99.8904719062334,
-        -134.783996960185,
-        -83.0880230164493,
-        60.1281736000420,
-        152.644656278084,
-        29.7576658909417,
-        40.7793187974476,
-        110.183927562412,
-        165.652065916454,
-        -12.9926632105736,
-        79.7359893585681,
-        -50.5245271190884,
-        118.923930267733,
-        47.2202861862577,
-        171.925276523721,
-        -62.5145800558502,
-        -11.1156697680531,
-        132.018041099963,
-        -135.355486412425,
-        102.370921576708,
-        112.739282398012,
-        -178.304963670831,
-        -122.319932198534,
-        59.0763464570905,
-        151.704200334501,
-        21.3763364190503,
-        -169.005476417779,
-        118.980811786769,
-        -116.089295979010,
-        9.64767870353308,
-        60.8933243657771,
-        -156.021526862757,
-        -63.4602993325163,
-        174.929787427393,
-        -175.288768596346,
-        -105.951907934032,
-        -50.1928304519800,
-        131.358266702971,
-        -136.296815007542,
-        93.5644603506407,
-        -97.0840116473627,
-        -169.158278888619,
-        -44.1323835471345,
-        81.4795403841382,
-    ]
-)
-
-T_DESIGN_11_ELE = np.array(
-    [
-        7.69254738757899,
-        -23.7300652200871,
-        23.5127556185301,
-        70.4225940747938,
-        -9.89694439538752,
-        -70.7513316063095,
-        -26.4618527647561,
-        47.7764936689044,
-        -7.72047049524459,
-        44.5343602375216,
-        26.3897904767450,
-        -44.6578850137166,
-        9.76703456924600,
-        -47.7053318175498,
-        7.45302934155972,
-        -23.5901209534773,
-        23.7194484034707,
-        70.4382693912270,
-        -9.83541588740259,
-        -70.4980825105727,
-        -26.2949218109204,
-        47.6148028805222,
-        -7.51718499746626,
-        44.2862347125773,
-        26.6442619674660,
-        -44.5693707254340,
-        9.91271928508000,
-        -47.9599550372574,
-        7.29679922953795,
-        -23.3445981426306,
-        23.6415261666079,
-        70.6843143997832,
-        -9.58140351749889,
-        -70.3934534122902,
-        -26.4258159091605,
-        47.7510668062369,
-        -7.30853603036844,
-        44.2632768570349,
-        26.7140614474957,
-        -44.3149733480527,
-        9.75899721561506,
-        -48.0361913333593,
-        7.43965099805872,
-        -23.3326075548841,
-        23.3868959687598,
-        70.8219078016791,
-        -9.48596399169388,
-        -70.5801867828491,
-        -26.6740262349265,
-        47.9978414043199,
-        -7.38276167631068,
-        44.4970603752708,
-        26.5024990214418,
-        -44.2461913308458,
-        9.51845076548334,
-        -47.8281351088411,
-        7.68427447425834,
-        -23.5706842106942,
-        23.3074499244045,
-        70.6586472132300,
-        -9.68088860263008,
-        -70.8026785673948,
-        -26.6963451935976,
-        48.0136296461397,
-        -7.63734823159200,
-        44.6651234222196,
-        26.3023490002159,
-        -44.4576351865647,
-        9.52341455917443,
-        -47.6242211091394,
-    ]
-)
-PLANAR_HOA_CHANNELS_ACN = np.array([0, 1, 3, 4, 8, 9, 15])
-VERT_HOA_CHANNELS_ACN = np.array([2, 5, 6, 7, 10, 11, 12, 13, 14])
-
-SEED_PADDING = 0
-
-# delay in number of samples
-DELAY_COMPENSATION_FOR_FILTERING = {
-    "SHQ2": {
-        "up": 436,
-        "down": 218,
-    },
-    "SHQ3": {
-        "up": 436,
-        "down": 145,
-    },
-    "MSIN": 92,
-    "LP1p5": 322,
-    "LP35": 232,
-    "LP7": 117,
-    "LP10": 82,
-    "LP12": 164,
-    "LP14": 234,
-    "LP20": 161,
-    "HP50_32KHZ": 559,
-    "HP50_48KHZ": 839,
-}
diff --git a/item_generation_scripts/audiotools/convert/__init__.py b/item_generation_scripts/audiotools/convert/__init__.py
deleted file mode 100644
index 4ec23739..00000000
--- a/item_generation_scripts/audiotools/convert/__init__.py
+++ /dev/null
@@ -1,323 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
-
-import logging
-from pathlib import Path, PurePath
-from typing import Optional, Union
-
-from item_generation_scripts.audiotools import audio, audioarray, metadata
-from item_generation_scripts.audiotools.audiofile import write
-from item_generation_scripts.audiotools.convert.channelbased import convert_channelbased
-from item_generation_scripts.audiotools.convert.masa import convert_masa
-from item_generation_scripts.audiotools.convert.objectbased import convert_objectbased
-from item_generation_scripts.audiotools.convert.scenebased import convert_scenebased
-from item_generation_scripts.audiotools.wrappers.bs1770 import loudness_norm
-from item_generation_scripts.audiotools.wrappers.esdru import esdru
-from item_generation_scripts.audiotools.wrappers.filter import (
-    hp50filter_itu,
-    lpfilter_itu,
-    resample_itu,
-)
-from item_generation_scripts.audiotools.wrappers.p50fbmnru import p50fbmnru
-
-from ..metadata import write_ISM_metadata_in_file
-
-
-def convert_file(
-    in_file: Union[str, Path],
-    out_file: Union[str, Path],
-    in_fs: int,
-    in_fmt: Union[str, Path],
-    out_fmt: Optional[Union[str, Path]] = None,
-    out_fs: Optional[int] = None,
-    in_meta: Optional[list] = None,
-    logger: Optional[logging.Logger] = None,
-    **kwargs,
-) -> None:
-    """Conversion function for one audio file"""
-
-    if not in_fmt:
-        raise ValueError("Input audio format must be specified!")
-
-    # get audio class object - can be either a regular single audio or scene description .txt
-    if not isinstance(in_fmt, PurePath) and in_fmt.startswith("META"):
-        input = metadata.Metadata(in_file)
-    else:
-        input = audio.fromfile(in_fmt, in_file, in_fs, in_meta)
-
-    # try to set reasonable defaults if missing
-    if not in_fs:
-        in_fs = input.fs
-    if not out_fs:
-        out_fs = input.fs
-
-    if not out_fmt:
-        if isinstance(input, metadata.Metadata):
-            raise ValueError(
-                "Output format must be specified for scene description files!"
-            )
-        else:
-            out_fmt = input.name
-
-    output = audio.fromtype(out_fmt)
-    if isinstance(output, audio.ObjectBasedAudio):
-        try:
-            output.object_pos = input.object_pos
-            output.metadata_files = input.metadata_files
-        except Exception:
-            raise ValueError(
-                "ISM is not supported as an output for rendering! Only usable as pass-through"
-            )
-
-    if isinstance(input, metadata.Metadata):
-        if logger:
-            logger.debug(f"Converting metadata to {out_fmt} : {in_file} -> {out_file}")
-
-        # render each audio instance separately
-        for audio_in in input.audio:
-            output.fs = out_fs
-            tmp = audio.fromtype(out_fmt)
-            tmp.fs = in_fs  # resampling not yet applied
-            convert(audio_in, tmp, in_fs=in_fs, out_fs=out_fs, logger=logger, **kwargs)
-            if output.audio is not None:
-                output.audio += tmp.audio
-            else:
-                output.audio = tmp.audio
-    else:
-        if logger:
-            logger.debug(f"Converting {in_fmt} to {out_fmt} : {in_file} -> {out_file}")
-        # run main conversion method
-        output.fs = in_fs  # resampling not yet applied
-        convert(input, output, in_fs=in_fs, out_fs=out_fs, logger=logger, **kwargs)
-
-    # write output
-    write(out_file, output.audio, output.fs)
-    if isinstance(output, audio.ObjectBasedAudio):
-        write_ISM_metadata_in_file(output.object_pos, [out_file], automatic_naming=True)
-
-
-def convert(
-    input: audio.Audio,
-    output: audio.Audio,
-    in_trim: Optional[list] = None,
-    in_pad_noise: Optional[bool] = False,
-    in_delay: Optional[float] = None,
-    in_fs: Optional[int] = None,
-    in_cutoff: Optional[int] = None,
-    in_hp50: Optional[bool] = None,
-    in_window: Optional[list] = None,
-    in_loudness: Optional[float] = None,
-    in_loudness_fmt: Optional[str] = None,
-    out_trim: Optional[list] = None,
-    out_pad_noise: Optional[bool] = False,
-    out_delay: Optional[float] = None,
-    out_fs: Optional[int] = None,
-    out_cutoff: Optional[int] = None,
-    out_hp50: Optional[bool] = None,
-    out_window: Optional[list] = None,
-    out_loudness: Optional[float] = None,
-    out_loudness_fmt: Optional[str] = None,
-    limit: Optional[bool] = False,
-    mnru_q: Optional[float] = None,
-    esdru_alpha: Optional[float] = None,
-    logger: Optional[logging.Logger] = None,
-    **kwargs,
-) -> None:
-    """Perform pre-processing, conversion and post-processing"""
-
-    """pre-processing"""
-    process_audio(
-        x=input,
-        trim=in_trim,
-        pad_noise=in_pad_noise,
-        delay=in_delay,
-        fs=in_fs,
-        fc=in_cutoff,
-        hp50=in_hp50,
-        window=in_window,
-        loudness=in_loudness,
-        loudness_fmt=in_loudness_fmt,
-        logger=logger,
-    )
-
-    """format conversion"""
-    format_conversion(input, output, logger=logger, **kwargs)
-
-    """post-processing"""
-    process_audio(
-        x=output,
-        trim=out_trim,
-        pad_noise=out_pad_noise,
-        delay=out_delay,
-        fs=out_fs,
-        fc=out_cutoff,
-        hp50=out_hp50,
-        window=out_window,
-        loudness=out_loudness,
-        loudness_fmt=out_loudness_fmt,
-        limit=limit,
-        mnru_q=mnru_q,
-        esdru_alpha=esdru_alpha,
-        logger=logger,
-    )
-
-
-def process_audio(
-    x: audio.Audio,
-    trim: Optional[list] = None,
-    pad_noise: Optional[bool] = False,
-    delay: Optional[float] = None,
-    fs: Optional[int] = None,
-    fc: Optional[int] = None,
-    hp50: Optional[bool] = False,
-    window: Optional[float] = None,
-    loudness: Optional[float] = None,
-    loudness_fmt: Optional[str] = None,
-    limit: Optional[bool] = False,
-    mnru_q: Optional[float] = None,
-    esdru_alpha: Optional[float] = None,
-    logger: Optional[logging.Logger] = None,
-) -> None:
-    """Perform (pre-/pos-) processing of audio"""
-
-    if fs is None:
-        fs = x.fs
-
-    """delay audio"""
-    if delay is not None:
-        if logger:
-            logger.debug(f"Delaying audio by {delay} ms")
-        x.audio = audioarray.delay(x.audio, x.fs, delay)
-
-    """trim or pad audio"""
-    if trim is not None:
-        if isinstance(x, audio.ObjectBasedAudio):
-            # metadata concatenation necessary for ISM
-            metadata.trim_meta(x, tuple(trim), pad_noise)
-        else:
-            x.audio = audioarray.trim(x.audio, x.fs, tuple(trim), pad_noise)
-
-    """windowing"""
-    if window is not None:
-        if logger:
-            logger.debug(f"Windowing audio with {window} ms Hann window")
-        x.audio = audioarray.window(x.audio, x.fs, window)
-
-    """high-pass (50 Hz) filtering"""
-    if hp50:
-        if logger:
-            logger.debug("Applying 50 Hz high-pass filter using ITU STL filter")
-        x.audio = hp50filter_itu(x)
-
-    """resampling"""
-    if x.fs != fs:
-        if logger:
-            logger.debug(f"Resampling from {x.fs} to {fs} using ITU STL filter")
-        x.audio = resample_itu(x, fs)
-        x.fs = fs
-
-    """loudness normalization"""
-    if loudness is not None:
-        if logger:
-            logger.debug(
-                f"Applying loudness adjustment to {loudness} LKFS for format {loudness_fmt} using ITU STL bs1770demo"
-            )
-        x.audio = loudness_norm(x, loudness, loudness_fmt)
-
-    """low-pass filtering"""
-    if fc is not None:
-        if logger:
-            logger.debug(
-                f"Applying low-pass filter with cutoff {fc} Hz using ITU STL filter"
-            )
-        x.audio = lpfilter_itu(x, fc)
-
-    """MNRU"""
-    if mnru_q is not None:
-        if logger:
-            logger.debug("Applying P.50 Fullband MNRU")
-        x.audio = p50fbmnru(x, mnru_q)
-
-    """ESDRU"""
-    if esdru_alpha is not None:
-        if logger:
-            logger.debug("Applying ESDRU Recommendation ITU-T P.811")
-        x.audio = esdru(x, esdru_alpha)
-
-    """limiting"""
-    if limit:
-        if logger:
-            logger.debug("Applying limiter")
-        audioarray.limiter(x.audio, x.fs)
-
-
-def format_conversion(
-    input: audio.Audio,
-    output: audio.Audio,
-    logger: Optional[logging.Logger] = None,
-    **kwargs,
-) -> None:
-    """Convert one audio format to another"""
-
-    # validation
-    if isinstance(output, audio.MetadataAssistedSpatialAudio):
-        raise NotImplementedError("MASA is not supported as an output for rendering!")
-
-    if isinstance(output, audio.ObjectBasedAudio) and input.name != output.name:
-        raise NotImplementedError(
-            "ISM is not supported as an output for rendering! Only usable as pass-through"
-        )
-
-    if logger:
-        logger.debug(f"Format conversion: {input.name} -> {output.name}")
-
-    if input.name == output.name or (
-        input.name.startswith("BINAURAL") and output.name.startswith("BINAURAL")
-    ):
-        output.audio = input.audio
-    else:
-        if isinstance(input, audio.BinauralAudio):
-            raise NotImplementedError(
-                f"{input.name} is not supported as an input for rendering!"
-            )
-        elif isinstance(input, audio.ChannelBasedAudio):
-            convert_channelbased(input, output, **kwargs)
-        elif isinstance(input, audio.MetadataAssistedSpatialAudio):
-            convert_masa(input, output, **kwargs)
-        elif isinstance(input, audio.ObjectBasedAudio):
-            convert_objectbased(input, output, **kwargs)
-        elif isinstance(input, audio.SceneBasedAudio):
-            convert_scenebased(input, output, **kwargs)
-        else:
-            raise NotImplementedError(
-                f"Unknown or unsupported audio format {input.name}"
-            )
diff --git a/item_generation_scripts/audiotools/convert/binaural.py b/item_generation_scripts/audiotools/convert/binaural.py
deleted file mode 100644
index b23e69ee..00000000
--- a/item_generation_scripts/audiotools/convert/binaural.py
+++ /dev/null
@@ -1,108 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
-
-from typing import Optional
-
-import numpy as np
-from scipy.signal import fftconvolve
-
-
-def NS2SA(
-    fs: float,
-    x: float,
-) -> int:
-    """
-    Converts from nanoseconds to number of samples
-
-    Parameters
-    ----------
-    fs: float
-        Sampling rate
-    x: float
-        Duration in nano seconds
-
-    Returns
-    -------
-    Number of samples
-    """
-
-    return int(int(fs / 100) * (x / 100) / 100000)
-
-
-def binaural_fftconv(
-    x: np.ndarray,
-    IR: np.ndarray,
-    nchannels: int,
-    lfe_index: Optional[list[int]] = None,
-) -> np.ndarray:
-    """
-    Binauralization using fft convolution
-
-    Parameters
-    ----------
-    x: np.ndarray
-        Input multi-channel array
-    IR: np.ndarray
-        HRIRs array
-    nchannels: int
-        Maximum number of channels to process
-    lfe_index: Optional[list[int]]
-        List of LFE channel indices
-
-    Returns
-    -------
-    y: np.ndarray
-        Output convolved signal array
-    """
-
-    if lfe_index is None:
-        lfe_index = []
-
-    y = np.zeros([x.shape[0], 2])
-    for chan_idx in range(min(x.shape[1], nchannels)):
-        if chan_idx not in lfe_index:
-            y[:, 0] = np.add(
-                y[:, 0],
-                fftconvolve(x[:, chan_idx].astype(float), IR[:, 0, chan_idx]).astype(
-                    float
-                )[: x.shape[0]],
-            )
-            y[:, 1] = np.add(
-                y[:, 1],
-                fftconvolve(x[:, chan_idx].astype(float), IR[:, 1, chan_idx]).astype(
-                    float
-                )[: x.shape[0]],
-            )
-        else:
-            ...
-
-    return y
diff --git a/item_generation_scripts/audiotools/convert/channelbased.py b/item_generation_scripts/audiotools/convert/channelbased.py
deleted file mode 100644
index a8d941e2..00000000
--- a/item_generation_scripts/audiotools/convert/channelbased.py
+++ /dev/null
@@ -1,390 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
-
-from pathlib import Path
-from typing import Optional, Tuple, Union
-
-import numpy as np
-
-from item_generation_scripts.audiotools import audio
-from item_generation_scripts.audiotools.audioarray import delay, framewise_io
-from item_generation_scripts.audiotools.binaural_datasets.binaural_dataset import (
-    load_ir,
-)
-from item_generation_scripts.audiotools.constants import (
-    BINAURAL_LFE_GAIN,
-    IVAS_FRAME_LEN_MS,
-    IVAS_MC_CONVERSION,
-)
-from item_generation_scripts.audiotools.convert import scenebased
-from item_generation_scripts.audiotools.convert.binaural import binaural_fftconv
-from item_generation_scripts.audiotools.EFAP import EFAP
-from item_generation_scripts.audiotools.rotation import Quat2RotMat, rotateAziEle
-from item_generation_scripts.audiotools.wrappers.filter import resample_itu
-
-""" ChannelBasedAudio functions """
-
-
-def convert_channelbased(
-    cba: audio.ChannelBasedAudio,
-    out: audio.Audio,
-    **kwargs,
-) -> audio.Audio:
-    """Convert channel-based audio to the requested output format"""
-    # CBA -> Binaural
-    if isinstance(out, audio.BinauralAudio):
-        render_cba_to_binaural(cba, out, **kwargs)
-
-    # CBA -> CBA
-    elif isinstance(out, audio.ChannelBasedAudio):
-        render_cba_to_cba(cba, out)
-
-    # CBA -> SBA
-    elif isinstance(out, audio.SceneBasedAudio):
-        render_cba_to_sba(cba, out)
-
-    else:
-        raise NotImplementedError(
-            f"Conversion from {cba.name} to {out.name} is unsupported!"
-        )
-
-    return out
-
-
-def render_cba_to_binaural(
-    cba: audio.ChannelBasedAudio,
-    bin: audio.BinauralAudio,
-    trajectory: Optional[Union[str, Path]] = None,
-    bin_dataset: Optional[str] = None,
-    bin_lfe_gain: Optional[float] = None,
-    **kwargs,
-) -> None:
-    """
-    Binauralization of channel-based audio
-
-    Parameters
-    ----------
-    cba: audio.ChannelBasedAudio
-        Channel-based input audio
-    bin: audio.BinauralAudio
-        Binaural output audio
-    trajectory: Optional[Union[str, Path]]
-        Head rotation trajectory path
-    bin_dataset: Optional[str]
-        Name of binaural dataset wihtout prefix or suffix
-    bin_lfe_gain: Optional[float]
-        LFE gain for binaural rendering
-    """
-
-    if cba.name == "MONO":
-        # no binauralization possible for mono -> render to stereo and assume binaural signal
-        cba_stereo = audio.fromtype("STEREO")
-        cba_stereo.fs = bin.fs
-        render_cba_to_cba(cba, cba_stereo)
-        bin.audio = cba_stereo.audio
-        return
-
-    cba.audio = resample_itu(cba, 48000)
-    old_fs = cba.fs
-    cba.fs = 48000
-    bin.fs = 48000
-
-    if trajectory is not None:
-        cba.audio = rotate_cba(cba, trajectory)
-
-    IR, _, latency_smp = load_ir(cba.name, bin.name, bin_dataset)
-
-    # render LFE
-    if bin_lfe_gain is not None:
-        bin_lfe, lfe_delay_ns = render_lfe_to_binaural(
-            cba.audio, cba.fs, cba.lfe_index, bin_lfe_gain
-        )
-
-    # render rest of the signal
-    bin.audio = binaural_fftconv(cba.audio, IR, cba.num_channels, cba.lfe_index)
-    # compensate delay from binaural dataset
-    bin.audio = delay(bin.audio, bin.fs, -latency_smp, samples=True)
-
-    # add LFE and rest
-    if bin_lfe_gain is not None:
-        bin.audio += bin_lfe
-
-    bin.audio = resample_itu(bin, old_fs)
-
-
-def render_custom_ls_binaural(
-    custom_ls: audio.ChannelBasedAudio,
-    output: audio.BinauralAudio,
-    IR: np.ndarray,
-    SourcePosition: np.ndarray,
-    trajectory: str,
-):
-    # TODO rework impl. (with EFAP)
-    # logger.info("      Processing channels on custom LS layout")
-    # azis = ", ".join([f"{a:7.2f}" for a in ls_azi_all])
-    # eles = ", ".join([f"{e:7.2f}" for e in ls_ele_all])
-    # logger.info(f"      azi: {azis}")
-    # logger.info(f"      ele: {eles}")
-    # logger.info(f"      lfe_index: {lfe_index_all}")
-
-    # if output.name == "BINAURAL_ROOM":
-    #     tmp = get_audio_type("MOZART")
-    #     convert_channel_based(custom_ls, tmp)
-    #     logger.info(f"      {custom_ls.name} -> {tmp.name} -> {output.name}")
-    #     custom_ls.audio = tmp.audio
-    # else:
-    #     tmp = custom_ls
-    #
-    # ls_azi_all = tmp.ls_azi
-    # ls_ele_all = tmp.ls_ele
-    # lfe_index_all = tmp.lfe_index
-    #
-    # frame_len = (IVAS_FRAME_LEN_MS // 4) * (fs // 1000)
-    # sig_len = custom_ls.audio.shape[0]
-    # N_frames = int(sig_len / frame_len)
-    #
-    # i_ls = 0
-    # y = np.zeros([sig_len, 2])
-    # for i_chan in range(custom_ls.audio.shape[1]):
-    #
-    #     # skip LFE
-    #     if i_chan in lfe_index_all:
-    #         continue
-    #
-    #     # skip silent (or very low volume) channels
-    #     if np.allclose(custom_ls.audio[:, i_chan], 0.0, atol=32.0):
-    #         continue
-    #
-    #     ls_azi = np.repeat(ls_azi_all[i_ls], N_frames)
-    #     ls_ele = np.repeat(ls_ele_all[i_ls], N_frames)
-    #
-    #     azi, ele = rotateISM(ls_azi, ls_ele, trajectory=trajectory)
-    #
-    #     y += binaural_fftconv_framewise(
-    #         custom_ls.audio[:, i_chan],
-    #         IR,
-    #         SourcePosition,
-    #         frame_len=frame_len,
-    #         azi=azi,
-    #         ele=ele,
-    #     )
-    #     i_ls += 1
-    #
-    # return y
-    return
-
-
-def render_cba_to_cba(
-    cba_in: audio.ChannelBasedAudio, cba_out: audio.ChannelBasedAudio
-) -> None:
-    """
-    Rendering of channel-based input signal to channel-based output
-
-    Parameters
-    ----------
-    cba_in: audio.ObjectBasedAudio
-        Channel-based input audio
-    cba_out: audio.ChannelBasedAudio
-        Channel-based output audio
-    """
-
-    # Stereo to Mono
-    if cba_in.name == "STEREO" and cba_out.name == "MONO":
-        render_mtx = np.vstack([[0.5], [0.5]])
-    else:
-        try:
-            render_mtx = IVAS_MC_CONVERSION[cba_in.name][cba_out.name]
-        except KeyError:
-            # Use EFAP panning if no matrix was found
-            panner = EFAP(
-                np.delete(cba_out.ls_azi, cba_out.lfe_index).astype(float),
-                np.delete(cba_out.ls_ele, cba_out.lfe_index).astype(float),
-            )
-
-            render_mtx = np.vstack(
-                [
-                    panner.pan(a, e).T
-                    for i, (a, e) in enumerate(zip(cba_in.ls_azi, cba_in.ls_ele))
-                    if i not in cba_in.lfe_index
-                ]
-            )
-
-            # pass-through for LFE
-            for index in np.sort(cba_in.lfe_index):
-                render_mtx = np.insert(render_mtx, index, 0, axis=0)
-            render_mtx = np.insert(render_mtx, cba_out.lfe_index, 0, axis=1)
-            render_mtx[cba_in.lfe_index, cba_out.lfe_index] = 1
-
-        if cba_out.num_channels <= 2:
-            render_mtx[cba_in.lfe_index, :] = 0
-
-    cba_out.audio = cba_in.audio @ render_mtx
-
-
-def render_cba_to_sba(cba: audio.ChannelBasedAudio, sba: audio.SceneBasedAudio) -> None:
-    """
-    Rendering of channel-based input signal to SBA output
-
-    Parameters
-    ----------
-    cba: audio.ObjectBasedAudio
-        Channel-based input audio
-    sba: audio.ChannelBasedAudio
-        SBA output audio
-    """
-
-    if cba.name == "MONO":
-        raise ValueError(f"Rendering from MONO to {sba.name} is not supported.")
-
-    # SH response for loudspeaker positions
-    render_mtx = np.hstack(
-        [
-            scenebased.getRSH(np.array([a]), np.array([e]), sba.ambi_order)
-            for a, e in zip(cba.ls_azi, cba.ls_ele)
-        ]
-    ).T
-    render_mtx[cba.lfe_index] = 0
-
-    sba.audio = cba.audio @ render_mtx
-    # do not add LFE to output
-    if sba.is_planar:
-        scenebased.zero_vert_channels(sba)
-
-
-def rotate_cba(
-    cba: audio.ChannelBasedAudio,
-    trajectory: str,
-) -> np.ndarray:
-    """
-    Rotate MC signal by applying a rotation matrix calculated from the current quaternion
-    in each subframe
-
-    Parameters:
-    ----------
-    x: np.ndarray
-        Input multichannel signal
-    trajectory: str
-        Path to trajectory file
-
-    Returns:
-    ----------
-    y: np.ndarray
-        Rotated multichannel signal
-    """
-
-    trj_data = np.genfromtxt(trajectory, delimiter=",")
-    trj_frames = trj_data.shape[0]
-
-    sig_len = cba.audio.shape[0]
-    sig_dim = cba.audio.shape[1]
-    frame_len = (IVAS_FRAME_LEN_MS // 4) * 48
-
-    out = np.zeros([sig_len, sig_dim])
-
-    panner = EFAP(cba.ls_azi, cba.ls_ele)
-
-    fade_in = np.arange(frame_len) / (frame_len - 1)
-    fade_in = fade_in[:, np.newaxis]
-    fade_out = 1.0 - fade_in
-
-    R_old = np.eye(cba.num_channels)
-
-    for i, (frame_in, frame_out) in framewise_io(cba.audio, out, frame_len):
-        # update the crossfade if we have a smaller last frame
-        if frame_out.shape[0] != frame_len:
-            frame_size = frame_out.shape[0]
-            fade_in = np.arange(frame_size) / (frame_size - 1)
-            fade_in = fade_in[:, np.newaxis]
-            fade_out = 1.0 - fade_in
-
-        q = trj_data[i % trj_frames, :]
-        rotated_pos = np.array(
-            [rotateAziEle(a, e, Quat2RotMat(q)) for a, e in zip(cba.ls_azi, cba.ls_ele)]
-        )
-        R = panner.pan(rotated_pos[:, 0], rotated_pos[:, 1])
-        R[:, [cba.lfe_index]] = 0
-        R[[cba.lfe_index], :] = 0
-        R[cba.lfe_index, cba.lfe_index] = 1
-
-        frame_out[:, :] = (fade_in * frame_in @ R) + (fade_out * frame_in @ R_old)
-
-        R_old = R.copy()
-
-    return out
-
-
-""" Helper functions """
-
-
-def render_lfe_to_binaural(
-    x: np.ndarray,
-    fs: Optional[int] = 48000,
-    lfe_index: Optional[list] = None,
-    LFE_gain: Optional[float] = BINAURAL_LFE_GAIN,
-) -> Tuple[np.ndarray, int]:
-    """
-    Extract LFE from the given input and render
-    it binaurally, accounting for delay
-    """
-
-    lfe = x[:, lfe_index].copy()
-
-    # if there is more than one LFE sum them into one
-    if lfe.shape[1] > 1:
-        lfe = np.sum(lfe, axis=1)
-
-    """
-    # 120 Hz low-pass filtering for LFE using IVAS filter coefficients
-    if fs == 48000:
-        lfe = sig.sosfilt(IVAS_LPF_4_BUTTER_48K_SOS, lfe, axis=0)
-    else:
-        raise NotImplementedError("Only 48 kHz supported at the moment!")
-
-    # 3.5ms LP filter delay from IVAS ROM
-    lfe_delay_ns = 0.0035 * 1e9
-    lfe_delay_smp = round(lfe_delay_ns * fs / 1e9)
-
-    # Delay LFE by the same amount as the HRTF delay
-    lfe = np.roll(lfe, round(latency_smp), axis=0)
-    lfe[0 : round(latency_smp), :] = 0
-    """
-    lfe_delay_ns = 0
-
-    # apply gain
-    lfe *= LFE_gain
-
-    # duplicate for each binaural channel
-    if len(np.shape(lfe)) < 2:
-        lfe = lfe[:, np.newaxis]
-    lfe = np.hstack([lfe, lfe])
-
-    return lfe, lfe_delay_ns
diff --git a/item_generation_scripts/audiotools/convert/masa.py b/item_generation_scripts/audiotools/convert/masa.py
deleted file mode 100644
index 15f1c683..00000000
--- a/item_generation_scripts/audiotools/convert/masa.py
+++ /dev/null
@@ -1,165 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
-
-from pathlib import Path
-from typing import Optional, Union
-from warnings import warn
-
-from item_generation_scripts.audiotools import audio
-from item_generation_scripts.audiotools.convert import channelbased
-from item_generation_scripts.audiotools.wrappers.masaRenderer import masaRenderer
-
-""" MetadataAssistedSpatialAudio functions """
-
-
-def convert_masa(
-    masa: audio.MetadataAssistedSpatialAudio,
-    out: audio.Audio,
-    **kwargs,
-) -> audio.Audio:
-    """Convert Metadata Assisted Spatial audio to the requested output format"""
-
-    # MASA -> Binaural
-    if isinstance(out, audio.BinauralAudio):
-        render_masa_to_binaural(masa, out, **kwargs)
-
-    # MASA -> CBA
-    elif isinstance(out, audio.ChannelBasedAudio):
-        render_masa_to_cba(masa, out)
-
-    # MASA -> SBA
-    elif isinstance(out, audio.SceneBasedAudio):
-        render_masa_to_sba(masa, out)
-
-    else:
-        raise NotImplementedError(
-            f"Conversion from {masa.name} to {out.name} is unsupported!"
-        )
-
-    return out
-
-
-def render_masa_to_binaural(
-    masa: audio.MetadataAssistedSpatialAudio,
-    bin: audio.BinauralAudio,
-    trajectory: Optional[Union[str, Path]] = None,
-    bin_dataset: Optional[str] = None,
-    **kwargs,
-) -> None:
-    """
-    Binauralization of MASA audio
-
-    Parameters
-    ----------
-    masa: audio.MetadataAssistedSpatialAudio
-        MASA input audio
-    bin: audio.BinauralAudio
-        Output binaural audio
-    trajectory: Optional[Union[str, Path]]
-        Head rotation trajectory path
-    bin_dataset: Optional[str]
-        Name of binaural dataset without prefix or suffix
-    """
-
-    if "ROOM" in bin.name:
-        cba_tmp = audio.fromtype("7_1_4")
-        cba_tmp.fs = masa.fs
-
-        render_masa_to_cba(masa, cba_tmp)
-
-        channelbased.render_cba_to_binaural(cba_tmp, bin, trajectory)
-    else:
-        if trajectory is not None:
-            warn(
-                f"Head-rotation not supported by MasaRenderer! Trajectory {trajectory} will be ignored!"
-            )
-        if bin_dataset is not None:
-            warn(
-                "Binaural dataset selection not supported by MasaRenderer - please copy the required hrir.bin manually!"
-            )
-
-        bin.audio = masaRenderer(masa, "BINAURAL")
-
-
-def render_masa_to_cba(
-    masa: audio.MetadataAssistedSpatialAudio,
-    cba: audio.ChannelBasedAudio,
-) -> None:
-    """
-    Rendering of MASA input signal to Channel-based format
-
-    Parameters
-    ----------
-    masa: audio.MetadataAssistedSpatialAudio
-        MASA input audio
-    cba: audio.ChannelBasedAudio
-        Channel-based output audio
-    """
-
-    if cba.name not in ["5_1", "7_1_4"]:
-        warn(
-            f"MasaRenderer does not support {cba.name} natively. Using 7_1_4 as an intermediate format."
-        )
-
-        cba_tmp = audio.fromtype("7_1_4")
-        cba_tmp.fs = masa.fs
-        cba_tmp.audio = masaRenderer(masa, cba_tmp.name)
-
-        channelbased.render_cba_to_cba(cba_tmp, cba)
-    else:
-        cba.audio = masaRenderer(masa, cba.name)
-
-
-def render_masa_to_sba(
-    masa: audio.MetadataAssistedSpatialAudio,
-    sba: audio.SceneBasedAudio,
-) -> None:
-    """
-    Rendering of MASA input signal to SBA format
-
-    Parameters
-    ----------
-    masa: audio.MetadataAssistedSpatialAudio
-        MASA input audio
-    sba: audio.SceneBasedAudio
-        SBA output audio
-    """
-
-    warn(
-        f"MasaRenderer does not support {sba.name} natively. Using 7_1_4 as an intermediate format."
-    )
-
-    cba_tmp = audio.fromtype("7_1_4")
-    cba_tmp.fs = masa.fs
-    cba_tmp.audio = masaRenderer(masa, cba_tmp.name)
-
-    channelbased.render_cba_to_sba(cba_tmp, sba)
diff --git a/item_generation_scripts/audiotools/convert/objectbased.py b/item_generation_scripts/audiotools/convert/objectbased.py
deleted file mode 100644
index 9fb74ed1..00000000
--- a/item_generation_scripts/audiotools/convert/objectbased.py
+++ /dev/null
@@ -1,352 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
-
-from itertools import repeat
-from pathlib import Path
-from typing import Optional, Tuple, Union
-
-import numpy as np
-
-from item_generation_scripts.audiotools import audio
-from item_generation_scripts.audiotools.audioarray import delay, framewise_io
-from item_generation_scripts.audiotools.binaural_datasets.binaural_dataset import (
-    load_ir,
-)
-from item_generation_scripts.audiotools.binauralobjectrenderer import (
-    binaural_fftconv_framewise,
-)
-from item_generation_scripts.audiotools.constants import IVAS_FRAME_LEN_MS
-from item_generation_scripts.audiotools.convert.channelbased import (
-    render_cba_to_binaural,
-)
-from item_generation_scripts.audiotools.convert.scenebased import getRSH
-from item_generation_scripts.audiotools.EFAP import EFAP, wrap_angles
-from item_generation_scripts.audiotools.rotation import Quat2RotMat, rotateAziEle
-from item_generation_scripts.audiotools.wrappers.filter import resample_itu
-from item_generation_scripts.utils import apply_func_parallel
-
-""" ObjectBasedAudio functions """
-
-
-def convert_objectbased(
-    oba: audio.ObjectBasedAudio,
-    out: audio.Audio,
-    **kwargs,
-) -> audio.Audio:
-    """Convert an ISM signal to the requested output format"""
-
-    # OBA -> Binaural
-    if isinstance(out, audio.BinauralAudio):
-        render_oba_to_binaural(oba, out, **kwargs)
-
-    # OBA -> CBA
-    elif isinstance(out, audio.ChannelBasedAudio):
-        render_oba_to_cba(oba, out)
-
-    # OBA -> SBA
-    elif isinstance(out, audio.SceneBasedAudio):
-        render_oba_to_sba(oba, out)
-    else:
-        raise NotImplementedError(
-            f"Conversion from {oba.name} to {out.name} is unsupported!"
-        )
-
-    return out
-
-
-def render_oba_to_binaural(
-    oba: audio.ObjectBasedAudio,
-    bin: audio.BinauralAudio,
-    trajectory: Optional[Union[str, Path]] = None,
-    bin_dataset: Optional[str] = None,
-    **kwargs,
-) -> None:
-    """
-    Binauralization of ISM input signal
-
-    Parameters
-    ----------
-    oba: audio.ObjectBasedAudio
-        Object based input audio
-    bin: audio.BinauralAudio
-        Binaural output audio
-    trajectory: Optional[Union[str, Path]]
-        Head rotation trajectory
-    bin_dataset: Optional[str]
-        Name of binaural dataset, if None default dataset is used
-    """
-
-    # bin.audio = np.zeros([oba.audio.shape[0], bin.num_channels])
-
-    if "ROOM" in bin.name:
-        cba_tmp = audio.fromtype("7_1_4")
-        cba_tmp.fs = oba.fs
-
-        render_oba_to_cba(oba, cba_tmp)
-
-        render_cba_to_binaural(cba_tmp, bin, trajectory)
-    else:
-        IR, SourcePosition, latency_smp = load_ir(oba.name, bin.name, bin_dataset)
-
-        oba.audio = resample_itu(oba, 48000)
-        fs_old = oba.fs
-        oba.fs = 48000
-
-        # apply processing for every object in parallel
-        obj_pos = oba.object_pos
-        obj_idx = list(range(oba.num_channels))
-        result = apply_func_parallel(
-            render_object,
-            zip(
-                obj_idx,
-                obj_pos,
-                repeat(oba),
-                repeat(trajectory),
-                repeat(IR),
-                repeat(SourcePosition),
-            ),
-            None,
-            "mt",
-            False,
-        )
-
-        # sum results over all objects
-        bin.audio = np.sum(np.stack(result, axis=2), axis=2)
-
-        # compensate delay from binaural dataset
-        bin.audio = delay(bin.audio, bin.fs, -latency_smp, samples=True)
-
-        bin.audio = resample_itu(bin, fs_old)
-        bin.fs = fs_old
-
-
-def render_oba_to_cba(
-    oba: audio.ObjectBasedAudio,
-    cba: audio.ChannelBasedAudio,
-) -> None:
-    """
-    Rendering of ISM input signal to channel-based format
-
-    Parameters
-    ----------
-    oba: audio.ObjectBasedAudio
-        Object based input audio
-    cba: audio.ChannelBasedAudio
-        Channel-based output audio
-    """
-
-    cba.audio = np.zeros([oba.audio.shape[0], cba.num_channels])
-
-    for obj_idx, obj_pos in enumerate(oba.object_pos):
-        obj_audio = oba.audio[:, [obj_idx]]
-        pos_frames = obj_pos.shape[0]
-
-        frame_len = IVAS_FRAME_LEN_MS * (oba.fs // 1000)
-
-        fade_in = np.arange(frame_len) / (frame_len - 1)
-        fade_in = fade_in[:, np.newaxis]
-        fade_out = 1.0 - fade_in
-
-        # use EFAP for rendering
-        panner = EFAP(
-            np.delete(cba.ls_azi, cba.lfe_index), np.delete(cba.ls_ele, cba.lfe_index)
-        )
-        gains_old = None
-
-        for i, (frame_in, frame_out) in framewise_io(obj_audio, cba.audio, frame_len):
-            # update the crossfade if we have a smaller last frame
-            if frame_out.shape[0] != frame_len:
-                frame_size = frame_out.shape[0]
-                fade_in = np.arange(frame_size) / (frame_size - 1)
-                fade_in = fade_in[:, np.newaxis]
-                fade_out = 1.0 - fade_in
-
-            azi, ele = wrap_angles(*obj_pos[i % pos_frames, :2], clip_ele=True)
-            gains = panner.pan(azi, ele)
-            for lfe in np.sort(cba.lfe_index):
-                gains = np.insert(gains, lfe, 0)
-            gains = gains[np.newaxis, :]
-
-            if gains_old is None:
-                gains_old = gains.copy()
-
-            frame_out[:] += (fade_in * frame_in @ gains) + (
-                fade_out * frame_in @ gains_old
-            )
-
-            gains_old = gains.copy()
-
-
-def render_oba_to_sba(
-    oba: audio.ObjectBasedAudio,
-    sba: audio.SceneBasedAudio,
-) -> None:
-    """
-    Rendering of ISM input signal to SBA format
-
-    Parameters
-    ----------
-    oba: audio.ObjectBasedAudio
-        Object based input audio
-    sba: audio.SceneBasedAudio
-        SBA output audio
-    """
-
-    sba.audio = np.zeros([oba.audio.shape[0], sba.num_channels])
-
-    for obj_idx, obj_pos in enumerate(oba.object_pos):
-        obj_audio = oba.audio[:, [obj_idx]]
-        pos_frames = obj_pos.shape[0]
-
-        frame_len = IVAS_FRAME_LEN_MS * (oba.fs // 1000)
-
-        fade_in = np.arange(frame_len) / (frame_len - 1)
-        fade_in = fade_in[:, np.newaxis]
-        fade_out = 1.0 - fade_in
-
-        gains_old = None
-
-        for i, (frame_in, frame_out) in framewise_io(obj_audio, sba.audio, frame_len):
-            # update the crossfade if we have a smaller last frame
-            if frame_out.shape[0] != frame_len:
-                frame_size = frame_out.shape[0]
-                fade_in = np.arange(frame_size) / (frame_size - 1)
-                fade_in = fade_in[:, np.newaxis]
-                fade_out = 1.0 - fade_in
-
-            pos = obj_pos[i % pos_frames, :]
-            gains = getRSH(np.array([pos[0]]), np.array([pos[1]]), sba.ambi_order)
-
-            if gains_old is None:
-                gains_old = gains.copy()
-
-            frame_out[:] += (fade_in * frame_in @ gains.T) + (
-                fade_out * frame_in @ gains_old.T
-            )
-
-            gains_old = gains.copy()
-
-
-def rotate_oba(
-    azi: np.ndarray,
-    ele: np.ndarray,
-    trajectory: Optional[str] = None,
-) -> Tuple[np.ndarray, np.ndarray]:
-    """
-    Application of head tracking trajectory
-
-    Parameters:
-    ----------
-    azi: np.ndarray
-        Azimuth coordinates of objects
-    ele: np.ndarray
-        Elevation coordinates of objects
-    trajectory: str
-        Head-tracking trajectory path
-
-    Returns:
-    ----------
-    azi_rot: np.ndarray
-        Azimuth coordinates after application of trajectory
-    ele_rot: np.ndarray
-        Elevation coordinates after application of trajectory
-    """
-
-    if trajectory is None:
-        return azi, ele
-
-    trj_data = np.genfromtxt(trajectory, delimiter=",")
-    trj_frames = trj_data.shape[0]
-
-    N_frames = azi.shape[0]
-    if ele.shape[0] != azi.shape[0]:
-        raise ValueError("Inconsistent input in azi and ele")
-
-    azi_rot = np.zeros([N_frames])
-    ele_rot = np.zeros([N_frames])
-
-    for i_frame in range(N_frames):
-        q = trj_data[i_frame % trj_frames, :]
-        azi_rot[i_frame], ele_rot[i_frame] = rotateAziEle(
-            azi[i_frame], ele[i_frame], Quat2RotMat(q)
-        )
-
-    return azi_rot, ele_rot
-
-
-def render_object(
-    obj_idx: int,
-    obj_pos: np.ndarray,
-    oba: audio.ObjectBasedAudio,
-    trajectory: str,
-    IR: np.ndarray,
-    SourcePosition: np.ndarray,
-) -> np.ndarray:
-    """
-    Binaural rendering for one ISM object
-
-    Parameters:
-    ----------
-    obj_idx: int
-        Index of object in list of all objects
-    obj_pos: np.ndarray
-        Position of object
-    oba: audio.ObjectBasedAudio
-        Input ISM audio object
-    trajectory: str
-        Head-tracking trajectory path
-    IR: np.ndarray
-        HRIRs for binauralization
-    SourcePosition: np.ndarray
-        Positions of HRIR measurements
-
-    Returns:
-    ----------
-    result_audio: np.ndarray
-        Binaurally rendered object
-    """
-
-    # repeat each value four times since head rotation data is on sub-frame basis
-    azi = np.repeat(obj_pos[:, 0], 4)
-    ele = np.repeat(obj_pos[:, 1], 4)
-    # apply head-rotation trajectory
-    obj_audio = oba.audio[:, [obj_idx]]
-    azi, ele = rotate_oba(azi, ele, trajectory)
-    # convolve signal with HRIRs
-    result_audio = binaural_fftconv_framewise(
-        obj_audio,
-        IR,
-        SourcePosition,
-        azi,
-        ele,
-    )
-    return result_audio
diff --git a/item_generation_scripts/audiotools/convert/scenebased.py b/item_generation_scripts/audiotools/convert/scenebased.py
deleted file mode 100644
index a7e89b4f..00000000
--- a/item_generation_scripts/audiotools/convert/scenebased.py
+++ /dev/null
@@ -1,429 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
-
-from pathlib import Path
-from typing import Optional, Union
-from warnings import warn
-
-import numpy as np
-from scipy.special import lpmv
-
-from item_generation_scripts.audiotools import audio
-from item_generation_scripts.audiotools.audioarray import delay, framewise_io
-from item_generation_scripts.audiotools.binaural_datasets.binaural_dataset import (
-    load_ir,
-)
-from item_generation_scripts.audiotools.constants import (
-    IVAS_FRAME_LEN_MS,
-    T_DESIGN_11_AZI,
-    T_DESIGN_11_ELE,
-    VERT_HOA_CHANNELS_ACN,
-)
-from item_generation_scripts.audiotools.convert import channelbased
-from item_generation_scripts.audiotools.convert.binaural import binaural_fftconv
-from item_generation_scripts.audiotools.EFAP import EFAP
-from item_generation_scripts.audiotools.rotation import Quat2RotMat, SHrotmatgen
-from item_generation_scripts.audiotools.wrappers.filter import resample_itu
-
-""" SceneBasedAudio functions """
-
-
-def convert_scenebased(
-    sba: audio.SceneBasedAudio,
-    out: audio.Audio,
-    **kwargs,
-) -> audio.Audio:
-    """Convert scene-based audio to the requested output format"""
-
-    # SBA -> Binaural
-    if isinstance(out, audio.BinauralAudio):
-        render_sba_to_binaural(sba, out, **kwargs)
-
-    # SBA -> CBA
-    elif isinstance(out, audio.ChannelBasedAudio):
-        render_sba_to_cba(sba, out)
-
-    # SBA -> SBA
-    elif isinstance(out, audio.SceneBasedAudio):
-        render_sba_to_sba(sba, out)
-    else:
-        raise NotImplementedError(
-            f"Conversion from {sba.name} to {out.name} is unsupported!"
-        )
-
-    return out
-
-
-def render_sba_to_binaural(
-    sba: audio.SceneBasedAudio,
-    bin: audio.BinauralAudio,
-    trajectory: Optional[Union[str, Path]] = None,
-    bin_dataset: Optional[str] = None,
-    **kwargs,
-) -> None:
-    """
-    Binauralization of scene-based audio
-
-    Parameters
-    ----------
-    sba: audio.SceneBasedAudio
-        Input SBA audio
-    bin: audio.BinauralAudio
-        Output binaural audio
-    trajectory: Optional[Union[str, Path]]
-        Head rotation trajectory path
-    bin_dataset: Optional[str]
-        Name of binaural dataset without prefix or suffix
-    """
-
-    if trajectory is not None:
-        sba.audio = rotate_sba(sba, trajectory)
-
-    if "ROOM" in bin.name:
-        cba_tmp = audio.fromtype("7_1_4")
-        cba_tmp.fs = sba.fs
-
-        render_sba_to_cba(sba, cba_tmp)
-
-        channelbased.render_cba_to_binaural(cba_tmp, bin, trajectory)
-    else:
-        IR, _, latency_smp = load_ir(sba.name, bin.name, bin_dataset)
-
-        sba.audio = resample_itu(sba, 48000)
-        fs_old = sba.fs
-        sba.fs = 48000
-
-        bin.audio = binaural_fftconv(sba.audio, IR, sba.num_channels)
-
-        # compensate delay from binaural dataset
-        bin.audio = delay(bin.audio, bin.fs, -latency_smp, samples=True)
-
-        bin.audio = resample_itu(bin, fs_old)
-        bin.fs = fs_old
-
-
-def render_sba_to_cba(
-    sba: audio.SceneBasedAudio,
-    cba: audio.ChannelBasedAudio,
-) -> None:
-    """
-    Rendering of SBA input signal to channel-based format
-
-    Parameters
-    ----------
-    sba: audio.SceneBasedAudio
-        Scene-based input audio
-    cba: audio.ChannelBasedAudio
-        Channel-based output audio
-    """
-
-    render_mtx = get_allrad_mtx(sba.ambi_order, cba)
-    cba.audio = sba.audio @ render_mtx.T
-
-
-def render_sba_to_sba(
-    sba_in: audio.SceneBasedAudio,
-    sba_out: audio.SceneBasedAudio,
-) -> None:
-    """
-    Rendering of SBA input signal to SBA output format
-
-    Parameters
-    ----------
-    sba_in: audio.SceneBasedAudio
-        Scene-based input audio
-    sba_out: audio.SceneBasedAudio
-        Scene-based output audio
-    """
-
-    if sba_out.ambi_order > sba_in.ambi_order:
-        sba_out.audio = np.pad(
-            sba_in.audio, [[0, 0], [0, sba_out.num_channels - sba_in.num_channels]]
-        )
-    elif sba_out.ambi_order < sba_in.ambi_order:
-        sba_out.audio = sba_in.audio[:, : sba_out.num_channels]
-
-    if sba_out.is_planar:
-        zero_vert_channels(sba_out)
-
-
-def rotate_sba(
-    sba: audio.SceneBasedAudio,
-    trajectory: str,
-) -> np.ndarray:
-    """
-    Rotate HOA signal by applying a rotation matrix calculated from the current quaternion
-    in each subframe
-
-    Parameters:
-    ----------
-    x: np.ndarray
-        Input signal upto HOA3
-    trajectory: str
-        Path to trajectory file
-
-    Returns:
-    ----------
-    y: np.ndarray
-        Rotated HOA signal
-    """
-
-    trj_data = np.genfromtxt(trajectory, delimiter=",")
-    trj_frames = trj_data.shape[0]
-
-    sig_len = sba.audio.shape[0]
-    sig_dim = sba.audio.shape[1]
-    frame_len = (IVAS_FRAME_LEN_MS // 4) * 48
-
-    if sig_dim not in [4, 9, 16]:
-        raise ValueError("rotate_sba can only handle FOA, HOA2 or HOA3 signals!")
-
-    out = np.zeros([sig_len, sig_dim])
-
-    fade_in = np.arange(frame_len) / (frame_len - 1)
-    fade_in = fade_in[:, np.newaxis]
-    fade_out = 1.0 - fade_in
-
-    R = np.eye(sig_dim)
-    R_old = np.eye(sig_dim)
-    for i, (frame_in, frame_out) in framewise_io(sba.audio, out, frame_len):
-        # update the crossfade if we have a smaller last frame
-        if frame_out.shape[0] != frame_len:
-            frame_size = frame_out.shape[0]
-            fade_in = np.arange(frame_size) / (frame_size - 1)
-            fade_in = fade_in[:, np.newaxis]
-            fade_out = 1.0 - fade_in
-
-        R_r = Quat2RotMat(trj_data[i % trj_frames, :])
-        R[:, :] = SHrotmatgen(R_r, order=ambi_order_from_nchan(sig_dim))
-
-        frame_out[:, :] = (fade_in * frame_in @ R.T) + (fade_out * frame_in @ R_old.T)
-
-        R_old[:, :] = R.copy()
-
-    return out
-
-
-""" Helper functions """
-
-
-def zero_vert_channels(sba: audio.SceneBasedAudio) -> None:
-    """Remove all ambisonics parts with vertical components"""
-    sba.audio[:, VERT_HOA_CHANNELS_ACN[VERT_HOA_CHANNELS_ACN < sba.num_channels]] = 0
-
-
-def nchan_from_ambi_order(ambi_order: int) -> int:
-    """Compute number of channels based on ambisonics order"""
-    return (ambi_order + 1) ** 2
-
-
-def ambi_order_from_nchan(nchan: int) -> int:
-    """Compute ambisonics order based on number of channels"""
-    return int(np.sqrt(nchan) - 1)
-
-
-def rE_weight(order: int) -> np.ndarray:
-    """Compute max-rE weighting matrix"""
-    return np.array(
-        [
-            lpmv(0, l, np.cos(np.deg2rad(137.9) / (order + 1.51)))
-            for l in range(order + 1)
-            for _ in range(-l, l + 1)
-        ]
-    ).T
-
-
-def n2sn(order: int) -> np.ndarray:
-    """Compute conversion matrix for N3D to SN3D normalization"""
-    return np.array(
-        [1.0 / np.sqrt(2 * l + 1) for l in range(order + 1) for _ in range(-l, l + 1)]
-    )
-
-
-def sn2n(order: int) -> np.ndarray:
-    """Compute conversion matrix for SN3D to N3D normalization"""
-    return np.array(
-        [np.sqrt(2 * l + 1) for l in range(order + 1) for _ in range(-l, l + 1)]
-    )
-
-
-def getRSH(
-    azi: np.ndarray,
-    ele: np.ndarray,
-    ambi_order: int,
-    norm: Optional[str] = "sn3d",
-    degrees: Optional[bool] = True,
-) -> np.ndarray:
-    """
-    Returns real spherical harmonic response for the given position(s)
-
-    Parameters:
-    ----------
-    azi: np.ndarray
-        Azimuth angles
-    ele: np.ndarray
-        Elevation angles
-    ambi_order: int
-        Ambisonics order
-    norm: Optional[str]
-        Normalization of ambisonic bases.
-        Possible values: "sn3d", "n3d", everything else is interpreted as orthogonal
-    degrees: Optional[bool]
-        If true azi and ele are interpreted as angles in degrees, otherwise as radians
-
-    Returns:
-    ----------
-    response: np.ndarray
-        Real spherical harmonic response
-    """
-
-    if degrees:
-        azi = np.deg2rad(azi)
-        ele = np.deg2rad(ele)
-
-    azi = azi.astype("float64")
-    ele = ele.astype("float64")
-
-    LM = np.array([(l, m) for l in range(ambi_order + 1) for m in range(-l, l + 1)])
-
-    response = np.zeros([LM.shape[0], azi.shape[0]])
-
-    # trig_term * legendre * uncondon
-    for i, (l, m) in enumerate(LM):
-        # N3D norm
-        response[i, :] = np.sqrt(
-            ((2 * l + 1) * float(np.math.factorial(l - np.abs(m))))
-            / (4 * np.pi * float(np.math.factorial(l + np.abs(m))))
-        )
-
-        # trig term
-        if m < 0:
-            response[i, :] *= np.sqrt(2) * np.sin(azi * np.abs(m))
-        elif m == 0:
-            pass  # response[i,:] *= 1
-        else:
-            response[i, :] *= np.sqrt(2) * np.cos(azi * m)
-
-        # legendre polynomial
-        a = lpmv(np.abs(m), l, np.sin(ele)) * ((-1) ** np.abs(m))
-        if np.inf in a or -np.inf in a:
-            a[a == np.inf] = np.finfo(np.float64).max
-            a[a == -np.inf] = np.finfo(np.float64).min
-            warn(
-                "Warning: order too large -> leads to overflow. Inf values are discarded!"
-            )
-        response[i, :] *= a
-
-    if norm == "sn3d":
-        response *= np.sqrt(4 * np.pi)
-        response[:] = np.diag(n2sn(ambi_order)) @ response
-    elif norm == "n3d":
-        response *= np.sqrt(4 * np.pi)
-    else:
-        pass  # ortho
-
-    return response
-
-
-def get_allrad_mtx(
-    ambi_order: int,
-    cba: audio.ChannelBasedAudio,
-    norm: Optional[str] = "sn3d",
-    rE_weight_bool: Optional[bool] = False,
-    intensity_panning: Optional[bool] = True,
-) -> np.ndarray:
-    """
-    Returns ALLRAD matrix
-
-    Parameters:
-    ----------
-    ambi_order: int
-        Ambisonics order
-    cba: audio.ChannelBasedAudio
-        Channel-based audio object
-    norm: Optional[str]
-        Normalization of ambisonic bases.
-        Possible values: "sn3d", "ortho", everything else is interpreted as n3d
-    re_weight_bool: Optional[bool]
-        Flag for max-rE weighting
-    intensity_panning: Optional[bool]
-        Flag for intensity panning
-
-    Returns:
-    ----------
-    hoa_dec: np.ndarray
-        ALLRAD matrix
-    """
-
-    n_harm = nchan_from_ambi_order(ambi_order)
-
-    if cba.name == "MONO":
-        hoa_dec = np.zeros([1, n_harm])
-        hoa_dec[0, 0] = 1
-    elif cba.name == "STEREO":
-        hoa_dec = np.zeros([2, n_harm])
-        # Cardioids +/- 90 degrees
-        hoa_dec[0, 0] = 0.5
-        hoa_dec[0, 1] = 0.5
-        hoa_dec[1, 0] = 0.5
-        hoa_dec[1, 1] = -0.5
-    else:
-        Y_td = getRSH(
-            T_DESIGN_11_AZI,
-            T_DESIGN_11_ELE,
-            ambi_order,
-            norm="ortho",
-        )
-        Y_td *= np.sqrt(4 * np.pi)
-
-        n_ls_woLFE = cba.num_channels - len(cba.lfe_index)
-        ls_azi_woLFE = np.delete(cba.ls_azi, cba.lfe_index).astype(float)
-        ls_ele_woLFE = np.delete(cba.ls_ele, cba.lfe_index).astype(float)
-
-        panner = EFAP(ls_azi_woLFE, ls_ele_woLFE, intensity_panning)
-        G_td = panner.pan(T_DESIGN_11_AZI, T_DESIGN_11_ELE)
-
-        hoa_dec = (G_td.T @ Y_td.T) / T_DESIGN_11_AZI.size
-
-        if norm == "sn3d":
-            hoa_dec = hoa_dec @ np.diag(sn2n(ambi_order))
-        elif norm == "ortho":
-            hoa_dec *= np.sqrt(4 * np.pi)
-
-        if rE_weight_bool:
-            a_n = rE_weight(ambi_order)
-            nrg_pre = np.sqrt(len(n_ls_woLFE) / np.sum(a_n**2))
-            hoa_dec = hoa_dec @ np.diag(a_n) * nrg_pre
-
-        hoa_dec = np.insert(hoa_dec, cba.lfe_index, np.zeros(n_harm), axis=0)
-
-    return hoa_dec
diff --git a/item_generation_scripts/audiotools/metadata.py b/item_generation_scripts/audiotools/metadata.py
deleted file mode 100644
index 0a4631ae..00000000
--- a/item_generation_scripts/audiotools/metadata.py
+++ /dev/null
@@ -1,571 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
-
-import csv
-from pathlib import Path
-from typing import Optional, TextIO, Tuple, Union
-
-import numpy as np
-
-from item_generation_scripts.audiotools import audio
-from item_generation_scripts.audiotools.audio import fromtype
-from item_generation_scripts.audiotools.audioarray import trim
-from item_generation_scripts.audiotools.audiofile import read
-from item_generation_scripts.audiotools.constants import IVAS_FRAME_LEN_MS
-
-
-class Metadata:
-    def __init__(self, meta_file: Union[str, Path]):
-        self.meta_file = Path(meta_file)
-
-        if not self.meta_file.exists():
-            raise FileNotFoundError(
-                f"Scene description file {self.meta_file} does not exist!"
-            )
-
-        with open(self.meta_file) as f:
-            audio_file = self.meta_file.parent.joinpath(f.readline().strip()).absolute()
-
-            if audio_file.suffix != ".wav":
-                raise ValueError(
-                    "Scene description files can only be used with WAVE input!"
-                )
-
-            self.audio_array, self.fs = read(audio_file)
-            self.audio = []
-
-            num_audio = int(f.readline().strip())
-            for _ in range(num_audio):
-                in_fmt = f.readline().strip().upper()
-
-                if in_fmt == "ISM":
-                    self.parse_ism_input(f)
-                elif in_fmt == "MASA":
-                    self.parse_masa_input(f)
-                elif in_fmt == "MC":
-                    self.parse_mc_input(f)
-                elif in_fmt == "SBA":
-                    self.parse_sba_input(f)
-                else:
-                    raise KeyError(f"Unknown input type in metadata file {in_fmt}")
-
-    def parse_ism_input(self, f: TextIO):
-        start = int(f.readline().strip()) - 1
-
-        ism = fromtype("ISM1")
-        ism.audio = self.audio_array[:, start : start + 1]
-        ism.fs = self.fs
-
-        line = f.readline().strip()
-        tmp_path = self.meta_file.parent.joinpath(line).absolute()
-        if tmp_path.exists():
-            # csv metadata
-            ism.metadata_files = [tmp_path]
-            ism.init_metadata()
-        else:
-            # manually specified metadata
-            positions = [f.readline().strip() for _ in range(int(line))]
-            positions = np.genfromtxt(
-                positions, delimiter=","
-            )  # TODO can use ndmin = 2 with numpy > 1.23.0; check support
-            if positions.ndim == 1:
-                positions = positions[np.newaxis, :]
-
-            obj_pos = []
-            # repeat based on first column
-            for p in positions:
-                repeats = int(p[0])
-                obj_pos.append(np.tile(p[1:], [repeats, 1]))
-            obj_pos = np.vstack(obj_pos)
-
-            ism.object_pos = [obj_pos]
-
-        self.audio.append(ism)
-
-    def parse_masa_input(self, f: TextIO):
-        start = int(f.readline().strip()) - 1
-        masa_tc = int(f.readline().strip())
-
-        masa = fromtype(f"MASA{masa_tc}")
-        masa.audio = self.audio_array[:, start : start + masa_tc]
-        masa.fs = self.fs
-        masa.metadata_files = [
-            self.meta_file.parent.joinpath(f.readline().strip()).absolute()
-        ]
-        masa.init_metadata()
-
-        self.audio.append(masa)
-
-    def parse_mc_input(self, f: TextIO):
-        start = int(f.readline().strip()) - 1
-        mc_fmt = f.readline().strip()
-
-        mc = fromtype(mc_fmt)
-        mc.audio = self.audio_array[:, start : start + mc.num_channels]
-        mc.fs = self.fs
-
-        self.audio.append(mc)
-
-    def parse_sba_input(self, f: TextIO):
-        start = int(f.readline().strip()) - 1
-        sba_order = int(f.readline().strip())
-
-        sba = fromtype(f"SBA{sba_order}")
-        sba.audio = self.audio_array[:, start : start + sba.num_channels]
-        sba.fs = self.fs
-
-        self.audio.append(sba)
-
-    def parse_optional_values(self, f: TextIO):
-        raise NotImplementedError(
-            "Additional configuration keys in metadata currently unsupported!"
-        )
-
-        # opts = {}
-        # original_pos = f.tell()
-        # key_value = f.readline().strip()
-
-        # try to parse a key, otherwise reset read pointer
-        # for key in OPT_KEYS:
-        #     if key_value.startswith(key):
-        #         opts[key] = key_value.replace(key, "").replace(":", "")
-        #         original_pos = f.tell()
-        #         key_value = f.readline.strip()
-        #     else:
-        #         f.seek(original_pos)
-        #
-
-
-def write_ISM_metadata_in_file(
-    metadata: list[np.ndarray],
-    file_name: list[Union[str, Path]],
-    automatic_naming: Optional[bool] = False,
-) -> list[str, Path]:
-    """
-    Write ISM metadata into csv file(s)
-
-    Parameters
-    ----------
-    metadata: list[np.ndarray]
-        List of metadata arrays
-    file_name: list[Union[str, Path]]
-        List of file names for csv files
-    automatic_naming: Optional[bool]
-        If true files are named automatically name.0.csv, name.1.csv, ... with name as the first entry of file_name
-
-    Returns
-    ----------
-    file_names: list[str, Path]
-        List of acutally used file names
-    """
-
-    if not automatic_naming and len(metadata) != len(file_name):
-        raise ValueError("Number of metadata objects and file names has to match")
-    number_objects = len(metadata)
-
-    if automatic_naming:
-        file_names = []
-        for m_object in range(number_objects):
-            file_names.append(f"{file_name[0]}.{m_object}.csv")
-    else:
-        file_names = file_name
-
-    for i, csv_file in enumerate(file_names):
-        number_frames = metadata[i].shape[0]
-        with open(csv_file, "w", newline="") as file:
-            writer = csv.writer(file)
-            for k in range(number_frames):
-                row_list = [
-                    "%+07.2f" % np.round(metadata[i][k, 0], 2),
-                    "%+06.2f" % np.round(metadata[i][k, 1], 2),
-                    "01.00",
-                    "000.00",
-                    "1.00",
-                ]
-                writer.writerow(row_list)
-
-    return file_names
-
-
-def trim_meta(
-    x: audio.ObjectBasedAudio,
-    limits: Optional[Tuple[int, int]] = None,
-    pad_noise: Optional[bool] = False,
-    samples: Optional[bool] = False,
-) -> None:
-    """
-    Trim or pad ISM including metadata
-    positive limits trim negative limits pad
-
-    Parameters
-    ----------
-    x: audio.ObjectBasedAudio
-        ISM audio object
-    limits: Optional[Tuple[int, int]]
-        Number of samples to trim or pad at beginning and end
-    pad_noise: Optional[bool]
-        Flag for padding noise instead of silence
-    samples: Optional[bool]
-        Flag for interpreting limits as samples, otherwise milliseconds
-    """
-
-    if not limits:
-        return
-
-    frame_length = int(IVAS_FRAME_LEN_MS * x.fs // 1000)
-
-    # check if trim values are multiples of the frame length
-    if not samples:
-        pre_trim = int(limits[0] * x.fs // 1000)
-        post_trim = int(limits[1] * x.fs // 1000)
-    else:
-        pre_trim = limits[0]
-        post_trim = limits[1]
-
-    if pre_trim % frame_length != 0 or post_trim % frame_length != 0:
-        raise ValueError(
-            f"ISM metadata padding and trimming only possible if pad/trim length is multiple of frame length. "
-            f"Frame length: {IVAS_FRAME_LEN_MS}ms"
-        )
-
-    # check if audio is multiple of frame length
-    if np.shape(x.audio)[0] % frame_length != 0:
-        raise ValueError(
-            f"ISM metadata padding and trimming only possible if audio length is multiple of frame length. "
-            f"Frame length: {IVAS_FRAME_LEN_MS}ms"
-        )
-
-    # check if metadata length fits exactly to audio length
-    for meta in x.object_pos:
-        if np.shape(meta)[0] * frame_length != np.shape(x.audio)[0]:
-            raise ValueError(
-                f"ISM metadata padding and trimming only possible if audio length is multiple of frame "
-                f"length and audio and metadata length match. Frame length: {IVAS_FRAME_LEN_MS}ms"
-            )
-
-    # trim audio
-    x.audio = trim(x.audio, x.fs, limits, pad_noise, samples)
-
-    # trim metadata
-    trim_frames_pre = int(pre_trim / frame_length)
-    trim_frames_post = int(post_trim / frame_length)
-    for i in range(len(x.object_pos)):
-        x.object_pos[i] = trim(
-            x.object_pos[i],
-            limits=(trim_frames_pre, trim_frames_post),
-            pad_noise=False,
-            samples=True,
-        )
-
-        # add radius 1
-        if trim_frames_pre < 0:
-            x.object_pos[i][: abs(trim_frames_pre), 2] = 1
-        if trim_frames_post < 0:
-            x.object_pos[i][abs(trim_frames_post) :, 2] = 1
-
-    return
-
-
-def concat_meta_from_file(
-    audio_files: list[str],
-    meta_files: list[list[str]],
-    out_file: list[str],
-    input_fmt: str,
-    silence_pre: Optional[int] = 0,
-    silence_post: Optional[int] = 0,
-    preamble: Optional[int] = None,
-) -> None:
-    """
-    Concatenate ISM metadata from files
-
-    Parameters
-    ----------
-    audio_files: list[str]
-        List of audio file names
-    meta_files: list[list[str]]
-        List of corresponding metadata file names
-    out_file: list[str]
-        Name of concatenated output file
-    input_fmt: str
-        Input audio format
-    silence_pre: Optional[int]
-        Silence inserted before each item
-    silence_post: Optional[int]
-        Silence inserted after each item
-    preamble: Optional[int]
-        Length of preamble in milliseconds
-    """
-
-    # create audio objects
-    audio_objects = []
-    fs = None
-    for i, audio_file in enumerate(audio_files):
-        # metadata is cut/looped to signal length in init of audio object
-        audio_object = audio.fromfile(input_fmt, audio_file, in_meta=meta_files[i])
-        audio_objects.append(audio_object)
-        if fs:
-            if audio_object.fs != fs:
-                raise ValueError("Sampling rates of files to concatenate don't match")
-        else:
-            fs = audio_object.fs
-
-    frame_length = int(IVAS_FRAME_LEN_MS * audio_objects[0].fs // 1000)
-
-    # pad and concatenate
-    concat_meta_all_obj = [None] * audio_objects[0].num_channels
-
-    for audio_item in audio_objects:
-        # check if audio is multiple of frame length
-        if np.shape(audio_item.audio)[0] % frame_length != 0:
-            raise ValueError(
-                f"ISM metadata padding and trimming only possible if audio length is multiple of frame length. "
-                f"Frame length: {IVAS_FRAME_LEN_MS}ms"
-            )
-
-        # check if metadata length fits exactly to audio length
-        for meta in audio_item.object_pos:
-            if np.shape(meta)[0] * frame_length != np.shape(audio_item.audio)[0]:
-                raise ValueError(
-                    f"ISM metadata padding and trimming only possible if audio length is multiple of frame "
-                    f"length and audio and metadata length match. Frame length: {IVAS_FRAME_LEN_MS}ms"
-                )
-
-        # pad
-        trim_meta(
-            audio_item, (-silence_pre, -silence_post)
-        )  # use negative value since we want to pad, not trim
-
-        # concatenate
-        for idx, obj_pos in enumerate(audio_item.object_pos):
-            concat_meta_all_obj[idx] = (
-                np.concatenate([concat_meta_all_obj[idx], obj_pos])
-                if concat_meta_all_obj[idx] is not None
-                else obj_pos
-            )
-
-    # add preamble
-    if preamble:
-        concat_meta_all_obj = add_remove_preamble(concat_meta_all_obj, preamble)
-
-    write_ISM_metadata_in_file(concat_meta_all_obj, out_file)
-
-    return
-
-
-def split_meta_in_file(
-    in_filename: Union[str, Path],
-    out_folder: Union[str, Path],
-    split_filenames: list[Union[str, Path]],
-    splits: list[int],
-    input_fmt: str,
-    meta_files: Optional[list[Union[str, Path]]] = None,
-    in_fs: Optional[int] = 48000,
-    preamble: Optional[int] = 0,
-):
-    """
-    Splits ISM metadata files into multiple shorter files
-
-    Parameters
-    __________
-    in_filename: Union[str, Path]
-        Input filenmame (.pcm, .raw or .wav)
-    out_folder: Union[str, Path]
-        Output folder where to put the splits
-    split_filenames: list[Union[str, Path]]
-        List of names for the split files
-    splits: list[int]
-        List of sample indices where to cut the signal
-    in_fs: Optional[int]
-        Input sampling rate, default 48000 Hz
-    """
-
-    # create a list of output files
-    out_paths = []
-
-    # Read input file by creating ISM audio object
-    audio_object = audio.fromfile(input_fmt, in_filename, in_meta=meta_files, fs=in_fs)
-
-    split_old = 0
-    for idx, split in enumerate(splits):
-        out_paths_obj = []
-        for obj in range(audio_object.num_channels):
-            out_file = (
-                Path(out_folder)
-                / f"{Path(split_filenames[idx]).with_suffix(in_filename.suffix)}.{obj}.csv"
-            )
-
-            # add the path to our list
-            out_paths_obj.append(out_file)
-
-            # remove preamble
-            if preamble:
-                preamble_frames = int(preamble / IVAS_FRAME_LEN_MS)
-                y = trim(
-                    audio_object.object_pos[obj],
-                    audio_object.fs,
-                    (preamble_frames, 0),
-                    samples=True,
-                )
-            else:
-                y = audio_object.object_pos[obj]
-
-            # split
-            split_start = int(split_old / IVAS_FRAME_LEN_MS / audio_object.fs * 1000)
-            split_end = int(split / IVAS_FRAME_LEN_MS / audio_object.fs * 1000)
-            y = y[split_start:split_end, :]
-
-            # write file
-            write_ISM_metadata_in_file([y], [out_file])
-
-        out_paths.append(out_paths_obj)
-
-        split_old = split
-
-    return out_paths
-
-
-def check_ISM_metadata(
-    in_meta: dict,
-    num_objects: int,
-    num_items: int,
-    item_names: Optional[list] = None,
-) -> list:
-    """Find ISM metadata"""
-
-    list_meta = []
-    if in_meta is None:
-        for item in item_names:
-            list_item = metadata_search(Path(item).parent, [item], num_objects)
-            list_meta.append(list_item)
-    else:
-        if len(in_meta) == 1 and num_items != 1:
-            # automatic search for metadata files in folder for all items and objects
-            try:
-                path_meta = in_meta["all_items"]
-            except KeyError:
-                raise ValueError(
-                    'Only one metadata path is given but not with key "all_items".'
-                )
-
-            list_meta = metadata_search(path_meta, item_names, num_objects)
-
-        elif num_items == len(in_meta):
-            # search for every item individually
-            for item_idx in range(num_items):
-                # try to use item_names as keys
-                try:
-                    if item_names:
-                        current_item = in_meta[item_names[item_idx].name]
-                    else:
-                        raise KeyError
-                except KeyError:
-                    current_item = in_meta[f"item{item_idx + 1}"]
-
-                if len(current_item) == 1:
-                    # automatic search in folder
-                    list_item = metadata_search(
-                        current_item[0], [item_names[item_idx]], num_objects
-                    )
-
-                elif len(current_item) == num_objects:
-                    # just read out
-                    list_item = current_item
-                else:
-                    raise ValueError("Number of objects and metadata does not match.")
-                list_meta.append(list_item)
-        else:
-            raise ValueError("Number of metadata inputs does not match number of items")
-
-    # return list of lists of metadata files
-    return list_meta
-
-
-def metadata_search(
-    in_meta_path: Union[str, Path],
-    item_names: list[Union[str, Path]],
-    num_objects: int,
-) -> list[list[Union[Path, str]]]:
-    """Search for ISM metadata with structure item_name.{0-3}.csv in in_meta folder"""
-
-    if not item_names:
-        raise ValueError("Item names not provided, can't search for metadata")
-
-    list_meta = []
-    for item in item_names:
-        list_item = []
-        for obj_idx in range(num_objects):
-            file_name_meta = in_meta_path / Path(item.stem).with_suffix(
-                f"{item.suffix}.{obj_idx}.csv"
-            )
-            # check if file exists and add to list
-            if file_name_meta.is_file():
-                list_item.append(file_name_meta)
-            else:
-                raise ValueError(f"Metadata file {file_name_meta} not found.")
-        if len(item_names) == 1:
-            list_meta = list_item
-        else:
-            list_meta.append(list_item)
-
-    return list_meta
-
-
-def add_remove_preamble(
-    metadata,
-    preamble,
-    add: Optional[bool] = True,
-):
-    preamble_frames = preamble / IVAS_FRAME_LEN_MS
-    if not preamble_frames.is_integer():
-        raise ValueError(
-            f"Application of preamble for ISM metadata is only possible if preamble length is multiple of frame length. "
-            f"Frame length: {IVAS_FRAME_LEN_MS}ms"
-        )
-    for obj_idx in range(len(metadata)):
-        if metadata is not None and metadata[obj_idx] is not None:
-            if add:
-                metadata[obj_idx] = trim(
-                    metadata[obj_idx],
-                    limits=(-int(preamble_frames), 0),
-                    samples=True,
-                )
-
-                # add radius 1
-                metadata[obj_idx][: int(preamble_frames), 2] = 1
-            else:
-                metadata[obj_idx] = trim(
-                    metadata[obj_idx],
-                    limits=(int(preamble_frames), 0),
-                    samples=True,
-                )
-
-    return metadata
diff --git a/item_generation_scripts/audiotools/rotation.py b/item_generation_scripts/audiotools/rotation.py
deleted file mode 100644
index 742548a8..00000000
--- a/item_generation_scripts/audiotools/rotation.py
+++ /dev/null
@@ -1,379 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
-
-from typing import Optional, Tuple
-
-import numpy as np
-
-"""
-Helper functions used by Ruedenberg,
-an implementation of the algorithm in
-Ivanic, J. & Ruedenberg, K., J. Phys. Chem. 100, 6342 (1996)
-translated from  ivas_rotation.c
-"""
-
-
-def SHrot_p(
-    i: int,
-    l: int,
-    a: int,
-    b: int,
-    SHrotmat: np.ndarray,
-    R_lm1: np.ndarray,
-) -> float:
-    """Helper function to calculate the ps"""
-
-    ri1 = SHrotmat[i + 1 + 1][1 + 1 + 1]
-    rim1 = SHrotmat[i + 1 + 1][-1 + 1 + 1]
-    ri0 = SHrotmat[i + 1 + 1][0 + 1 + 1]
-
-    if b == -l:
-        R_lm1_1 = R_lm1[a + l - 1][0]
-        R_lm1_2 = R_lm1[a + l - 1][2 * l - 2]
-        p = ri1 * R_lm1_1 + rim1 * R_lm1_2
-    else:
-        if b == l:
-            R_lm1_1 = R_lm1[a + l - 1][2 * l - 2]
-            R_lm1_2 = R_lm1[a + l - 1][0]
-            p = ri1 * R_lm1_1 - rim1 * R_lm1_2
-        else:
-            R_lm1_1 = R_lm1[a + l - 1][b + l - 1]
-            p = ri0 * R_lm1_1
-
-    return p
-
-
-def SHrot_u(
-    l: int,
-    m: int,
-    n: int,
-    SHrotmat: np.ndarray,
-    R_lm1: np.ndarray,
-) -> float:
-    """Helper function to calculate the us"""
-
-    return SHrot_p(0, l, m, n, SHrotmat, R_lm1)
-
-
-def SHrot_v(
-    l: int,
-    m: int,
-    n: int,
-    SHrotmat: np.ndarray,
-    R_lm1: np.ndarray,
-) -> float:
-    """Helper function to calculate the vs"""
-
-    if m == 0:
-        p0 = SHrot_p(1, l, 1, n, SHrotmat, R_lm1)
-        p1 = SHrot_p(-1, l, -1, n, SHrotmat, R_lm1)
-        return p0 + p1
-    else:
-        if m > 0:
-            d = 1.0 if (m == 1) else 0.0
-            p0 = SHrot_p(1, l, m - 1, n, SHrotmat, R_lm1)
-            p1 = SHrot_p(-1, l, -m + 1, n, SHrotmat, R_lm1)
-            return p0 * np.sqrt(1.0 + d) - p1 * (1.0 - d)
-        else:
-            d = 1.0 if (m == -1) else 0.0
-            p0 = SHrot_p(1, l, m + 1, n, SHrotmat, R_lm1)
-            p1 = SHrot_p(-1, l, -m - 1, n, SHrotmat, R_lm1)
-            return p0 * (1.0 - d) + p1 * np.sqrt(1.0 + d)
-
-
-def SHrot_w(
-    l: int,
-    m: int,
-    n: int,
-    SHrotmat: np.ndarray,
-    R_lm1: np.ndarray,
-) -> float:
-    """Helper function to calculate the w"""
-
-    if m == 0:
-        raise ValueError("ERROR should not be called\n")
-    else:
-        if m > 0:
-            p0 = SHrot_p(1, l, m + 1, n, SHrotmat, R_lm1)
-            p1 = SHrot_p(-1, l, -m - 1, n, SHrotmat, R_lm1)
-            return p0 + p1
-        else:
-            p0 = SHrot_p(1, l, m - 1, n, SHrotmat, R_lm1)
-            p1 = SHrot_p(-1, l, -m + 1, n, SHrotmat, R_lm1)
-            return p0 - p1
-
-
-def SHrotmatgen(
-    R: np.ndarray,
-    order: Optional[int] = 3,
-) -> np.ndarray:
-    """
-    Calculate SHD rotation matrix from that in real space
-    translated from ivas_rotation.c
-
-    Parameters:
-    ----------
-    R: np.ndarray
-        real-space rotation matrix
-    order: Optional[int]
-        Ambisonics order, default = 3
-
-    Returns:
-    ----------
-    SHrotmat: np.ndarray
-       SHD rotation matrix
-    """
-
-    dim = (order + 1) * (order + 1)
-
-    SHrotmat = np.zeros([dim, dim])
-    R_lm1 = np.zeros([dim, dim])
-    R_l = np.zeros([dim, dim])
-
-    SHrotmat[0][0] = 1.0
-
-    SHrotmat[1][1] = R[1][1]
-    SHrotmat[1][2] = R[1][2]
-    SHrotmat[1][3] = R[1][0]
-
-    SHrotmat[2][1] = R[2][1]
-    SHrotmat[2][2] = R[2][2]
-    SHrotmat[2][3] = R[2][0]
-
-    SHrotmat[3][1] = R[0][1]
-    SHrotmat[3][2] = R[0][2]
-    SHrotmat[3][3] = R[0][0]
-
-    for i in range(2 * 1 + 1):
-        for j in range(2 * 1 + 1):
-            R_lm1[i][j] = SHrotmat[i + 1][j + 1]
-
-    band_idx = 4
-    for l in range(2, order + 1):
-        R_l[:, :] = 0.0
-
-        for m in range(-l, l + 1):
-            d = 1 if (m == 0) else 0
-            absm = abs(m)
-            sql2mm2 = np.sqrt((l * l - m * m))
-            sqdabsm = np.sqrt(((1 + d) * (l + absm - 1) * (l + absm)))
-            sqlabsm = np.sqrt(((l - absm - 1) * (l - absm)))
-
-            for n in range(-l, l + 1):
-                if abs(n) == l:
-                    sqdenom = np.sqrt((2 * l) * (2 * l - 1))
-                else:
-                    sqdenom = np.sqrt(l * l - n * n)
-
-                u = sql2mm2 / sqdenom
-                v = sqdabsm / sqdenom * (1 - 2 * d) * 0.5
-                w = sqlabsm / sqdenom * (1 - d) * (-0.5)
-
-                if u != 0:
-                    u = u * SHrot_u(l, m, n, SHrotmat, R_lm1)
-                if v != 0:
-                    v = v * SHrot_v(l, m, n, SHrotmat, R_lm1)
-                if w != 0:
-                    w = w * SHrot_w(l, m, n, SHrotmat, R_lm1)
-                R_l[m + l][n + l] = u + v + w
-
-        for i in range(2 * l + 1):
-            for j in range(2 * l + 1):
-                SHrotmat[band_idx + i][band_idx + j] = R_l[i][j]
-
-        for i in range(2 * l + 1):
-            for j in range(2 * l + 1):
-                R_lm1[i][j] = R_l[i][j]
-
-        band_idx += 2 * l + 1
-
-    return SHrotmat
-
-
-def Quat2Euler(
-    quat: np.ndarray,
-    degrees: bool = True,
-) -> np.ndarray:
-    """Convert Quaternion to Euler angles"""
-
-    sinr = +2.0 * (quat[..., 0] * quat[..., 1] + quat[..., 2] * quat[..., 3])
-    cosr = +1.0 - 2.0 * (quat[..., 1] * quat[..., 1] + quat[..., 2] * quat[..., 2])
-    roll = np.arctan2(sinr, cosr)
-
-    sinp = +2.0 * (quat[..., 0] * quat[..., 2] - quat[..., 3] * quat[..., 1])
-    pitch = np.where(np.fabs(sinp) >= 1, np.copysign(np.pi / 2, sinp), np.arcsin(sinp))
-
-    siny = +2.0 * (quat[..., 0] * quat[..., 3] + quat[..., 1] * quat[..., 2])
-    cosy = +1.0 - 2.0 * (quat[..., 2] * quat[..., 2] + quat[..., 3] * quat[..., 3])
-    yaw = np.arctan2(siny, cosy)
-
-    ypr = np.array([yaw, pitch, roll]).T
-
-    if degrees:
-        ypr = np.rad2deg(ypr)
-
-    return ypr
-
-
-def Euler2Quat(
-    ypr: np.ndarray,
-    degrees: bool = True,
-) -> np.ndarray:
-    """Convert Euler angles to Quaternion"""
-
-    if degrees:
-        ypr = np.deg2rad(ypr)
-
-    if len(ypr.shape) == 2:
-        N_quat = ypr.shape[0]
-        quat = np.zeros([N_quat, 4])
-        yaw = ypr[:, 0]
-        pitch = ypr[:, 1]
-        roll = ypr[:, 2]
-    else:
-        quat = np.zeros([4])
-        yaw = ypr[0]
-        pitch = ypr[1]
-        roll = ypr[2]
-
-    c1 = np.cos(0.5 * yaw)
-    c2 = np.cos(0.5 * pitch)
-    c3 = np.cos(0.5 * roll)
-
-    s1 = np.sin(0.5 * yaw)
-    s2 = np.sin(0.5 * pitch)
-    s3 = np.sin(0.5 * roll)
-
-    quat[..., 0] = c3 * c2 * c1 + s3 * s2 * s1
-    quat[..., 1] = s3 * c2 * c1 - c3 * s2 * s1
-    quat[..., 2] = s3 * c2 * s1 + c3 * s2 * c1
-    quat[..., 3] = c3 * c2 * s1 - s3 * s2 * c1
-
-    return quat
-
-
-def Quat2RotMat(
-    quat: np.ndarray,
-) -> np.ndarray:
-    """Convert quaternion to rotation matrix"""
-
-    R = np.zeros([3, 3])
-
-    if quat[0] != -3:
-        # Quaternions
-        # formula taken from ivas_rotation.c
-
-        R[0, 0] = (
-            quat[0] * quat[0]
-            + quat[1] * quat[1]
-            - quat[2] * quat[2]
-            - quat[3] * quat[3]
-        )
-        R[0, 1] = 2.0 * (quat[1] * quat[2] - quat[0] * quat[3])
-        R[0, 2] = 2.0 * (quat[1] * quat[3] + quat[0] * quat[2])
-
-        R[1, 0] = 2.0 * (quat[1] * quat[2] + quat[0] * quat[3])
-        R[1, 1] = (
-            quat[0] * quat[0]
-            - quat[1] * quat[1]
-            + quat[2] * quat[2]
-            - quat[3] * quat[3]
-        )
-        R[1, 2] = 2.0 * (quat[2] * quat[3] - quat[0] * quat[1])
-
-        R[2, 0] = 2.0 * (quat[1] * quat[3] - quat[0] * quat[2])
-        R[2, 1] = 2.0 * (quat[2] * quat[3] + quat[0] * quat[1])
-        R[2, 2] = (
-            quat[0] * quat[0]
-            - quat[1] * quat[1]
-            - quat[2] * quat[2]
-            + quat[3] * quat[3]
-        )
-
-    else:
-        # Euler angles in R_X(roll)*R_Y(pitch)*R_Z(yaw) convention
-        #
-        #  yaw:   rotate scene counter-clockwise in the horizontal plane
-        #  pitch: rotate scene in the median plane, increase elevation with positive values
-        #  roll:  rotate scene from the right ear to the top
-        #
-        # formula taken from ivas_rotation.c
-
-        c1 = np.cos(quat[3] / 180.0 * np.pi)
-        c2 = np.cos(quat[2] / 180.0 * np.pi)
-        c3 = np.cos(quat[1] / 180.0 * np.pi)
-
-        s1 = np.sin(quat[3] / 180.0 * np.pi)
-        s2 = np.sin(-quat[2] / 180.0 * np.pi)
-        s3 = np.sin(quat[1] / 180.0 * np.pi)
-
-        R[0, 0] = c2 * c3
-        R[0, 1] = -c2 * s3
-        R[0, 2] = s2
-
-        R[1, 0] = c1 * s3 + c3 * s1 * s2
-        R[1, 1] = c1 * c3 - s1 * s2 * s3
-        R[1, 2] = -c2 * s1
-
-        R[2, 0] = s1 * s3 - c1 * c3 * s2
-        R[2, 1] = c3 * s1 + c1 * s2 * s3
-        R[2, 2] = c1 * c2
-
-    return R
-
-
-def rotateAziEle(
-    azi: float,
-    ele: float,
-    R: np.ndarray,
-    is_planar: bool = False,
-) -> Tuple[float, float]:
-    """Rotate azimuth and elevation angles with rotation matrix"""
-
-    w = np.cos(np.deg2rad(ele))
-    dv = np.array(
-        [
-            w * np.cos(np.deg2rad(azi)),
-            w * np.sin(np.deg2rad(azi)),
-            np.sin(np.deg2rad(ele)),
-        ]
-    )
-
-    dv_rot = R @ dv
-
-    azi = np.rad2deg(np.arctan2(dv_rot[1], dv_rot[0]))
-    if is_planar:
-        ele = 0
-    else:
-        ele = np.rad2deg(np.arctan2(dv_rot[2], np.sqrt(np.sum(dv_rot[:2] ** 2))))
-
-    return azi, ele
diff --git a/item_generation_scripts/audiotools/utils.py b/item_generation_scripts/audiotools/utils.py
deleted file mode 100644
index 6aaf5fa9..00000000
--- a/item_generation_scripts/audiotools/utils.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
-
-from pathlib import Path
-
-import numpy as np
-
-from item_generation_scripts.audiotools.rotation import Euler2Quat, Quat2Euler
-
-
-def read_trajectory(trj_file: Path, return_quat=True):
-    trj = np.genfromtext(trj_file, delimiter=",")
-
-    if np.all(trj[:, 0] == -3):
-        # Euler
-        if return_quat:
-            return Euler2Quat(trj[:, 1:])
-        else:
-            return trj[:, 1:]
-    else:
-        # Quat
-        if return_quat:
-            return trj
-        else:
-            return Quat2Euler(trj)
-
-
-def write_trajectory(trj, out_file, write_quat=True):
-    if trj.shape[1] == 3:
-        # Euler
-        if write_quat:
-            trj = Euler2Quat(trj)
-        else:
-            trj = np.insert(trj, 0, -3.0, axis=1)
-    elif not write_quat:
-        trj = Quat2Euler(trj)
-        trj = np.insert(trj, 0, -3.0, axis=1)
-
-    with open(out_file, "w") as f:
-        for pos in trj:
-            f.write(", ".join([f"{q:.6f}" for q in pos]))
-            f.write("\n")
diff --git a/item_generation_scripts/audiotools/wrappers/__init__.py b/item_generation_scripts/audiotools/wrappers/__init__.py
deleted file mode 100644
index aea270d8..00000000
--- a/item_generation_scripts/audiotools/wrappers/__init__.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
diff --git a/item_generation_scripts/audiotools/wrappers/bs1770.py b/item_generation_scripts/audiotools/wrappers/bs1770.py
deleted file mode 100644
index d238bec3..00000000
--- a/item_generation_scripts/audiotools/wrappers/bs1770.py
+++ /dev/null
@@ -1,291 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
-
-import copy
-import logging
-from pathlib import Path
-from tempfile import TemporaryDirectory
-from typing import Optional, Tuple, Union
-from warnings import warn
-
-import numpy as np
-
-from item_generation_scripts.audiotools import audio, convert
-from item_generation_scripts.audiotools.audiofile import write
-from item_generation_scripts.audiotools.wrappers.filter import resample_itu
-from item_generation_scripts.constants import DEFAULT_CONFIG_BINARIES
-from item_generation_scripts.utils import find_binary, get_devnull, run
-
-logger = logging.getLogger("__main__")
-logger.setLevel(logging.DEBUG)
-
-
-def bs1770demo(
-    input: audio.Audio,
-    target_loudness: Optional[float] = -26,
-) -> Tuple[float, float]:
-    """
-    Wrapper for ITU-R BS.1770-4, requires bs1770demo binary
-
-    Parameters
-    ----------
-    input: Audio
-        Input audio
-    target_loudness: Optional[float]
-        Desired loudness in LKFS
-
-    Returns
-    -------
-    measured_loudness : float
-        Measured loudness of input
-    scale_factor: float
-        Scale factor to achieve desired loudness
-    """
-
-    null_file = get_devnull()
-
-    if "bs1770demo" in DEFAULT_CONFIG_BINARIES["binary_paths"]:
-        binary = find_binary(
-            DEFAULT_CONFIG_BINARIES["binary_paths"]["bs1770demo"].name,
-            binary_path=DEFAULT_CONFIG_BINARIES["binary_paths"]["bs1770demo"].parent,
-        )
-    else:
-        binary = find_binary("bs1770demo")
-
-    if not isinstance(input, audio.BinauralAudio) and not isinstance(
-        input, audio.ChannelBasedAudio
-    ):
-        raise NotImplementedError(f"{input.name} is unsupported in ITU-R BS.1770-4.")
-
-    if input.fs != 48000:
-        warn(
-            "ITU-R BS.1770-4 only supports 48kHz sampling rate. Temporarily resampling signal for measurement."
-        )
-        tmp_sig = resample_itu(input, 48000)
-    else:
-        tmp_sig = input.audio
-
-    with TemporaryDirectory() as tmp_dir:
-        tmp_dir = Path(tmp_dir)
-        tmp_file = tmp_dir.joinpath("tmp_loudness.pcm")
-
-        """
-        ITU-R BS.1770-4
-        """
-
-        cmd = [
-            str(binary),
-            "-nchan",
-            str(input.num_channels),  # input nchan
-            "-lev",
-            str(target_loudness),  # level
-            "-conf",
-            "",  # config string
-            str(tmp_file),
-            null_file,
-        ]
-
-        if isinstance(input, audio.BinauralAudio):
-            cmd[6] = "00"  # -conf
-        elif isinstance(input, audio.ChannelBasedAudio):
-            # if loudspeaker position fulfills the criteria, set the config string to 1 for that index
-            conf_str = [
-                str(int(abs(e) < 30 and (60 <= abs(a) <= 120)))
-                for a, e in zip(input.ls_azi, input.ls_ele)
-            ]
-            for lfe in input.lfe_index:
-                conf_str[lfe] = "L"
-
-            cmd[6] = "".join(conf_str)
-
-        # write temporary file
-        write(tmp_file, tmp_sig, 48000)
-
-        # run command
-        result = run(cmd, logger=logger)
-
-        # parse output
-        measured_loudness = float(result.stdout.splitlines()[3].split(":")[1])
-        scale_factor = float(result.stdout.splitlines()[-3].split(":")[1])
-
-    return measured_loudness, scale_factor
-
-
-def get_loudness(
-    input: audio.Audio,
-    target_loudness: Optional[float] = -26,
-    loudness_format: Optional[str] = None,
-) -> Tuple[float, float]:
-    """
-    Loudness measurement using ITU-R BS.1770-4
-
-    Parameters
-    ----------
-    input : Audio
-        Input audio
-    target_loudness: float
-        Desired loudness in LKFS
-    loudness_format: str
-        Loudness format to render to for loudness computation (default input format if possible)
-
-    Returns
-    -------
-    measured_loudness : float
-        Measured loudness (after conversion to loudness_format if specified)
-    scale_factor: float
-        Scale factor to acheive desired loudness
-    """
-
-    if target_loudness > 0:
-        raise ValueError("Desired loudness is too high!")
-
-    if loudness_format is None:
-        # for some formats rendering is necessary prior to loudness measurement
-        if isinstance(input, audio.SceneBasedAudio) or isinstance(
-            input, audio.MetadataAssistedSpatialAudio
-        ):
-            loudness_format = "7_1_4"
-        elif isinstance(input, audio.ObjectBasedAudio):
-            loudness_format = "BINAURAL"
-        elif hasattr(input, "layout_file"):
-            loudness_format = input.layout_file
-        else:
-            # default use input format
-            loudness_format = input.name
-
-    # configure intermediate format
-    tmp = audio.fromtype(loudness_format)
-    tmp.fs = input.fs
-
-    if input.name != loudness_format:
-        convert.format_conversion(input, tmp)
-    else:
-        tmp.audio = input.audio
-
-    return bs1770demo(tmp, target_loudness)
-
-
-def loudness_norm(
-    input: audio.Audio,
-    target_loudness: Optional[float] = -26,
-    loudness_format: Optional[str] = None,
-) -> np.ndarray:
-    """
-    Iterative loudness normalization using ITU-R BS.1770-4
-    Signal is iteratively scaled after rendering to the specified format
-    until loudness converges to the target value
-
-    Parameters
-    ----------
-    input : Audio
-        Input audio
-    target_loudness: Optional[float]
-        Desired loudness level in LKFS
-    loudness_format: Optional[str]
-        Loudness format to render to for loudness computation (default input format)
-
-    Returns
-    -------
-    norm : Audio
-        Normalized audio
-    """
-
-    # repeat until convergence of loudness
-    measured_loudness = np.inf
-    scale_factor = 1
-    num_iter = 1
-
-    while np.abs(measured_loudness - target_loudness) > 0.5 and num_iter < 10:
-        measured_loudness, scale_factor_new = get_loudness(
-            input, target_loudness, loudness_format
-        )
-
-        # scale input
-        input.audio *= scale_factor_new
-
-        # update scale factor
-        scale_factor *= scale_factor_new
-
-        num_iter += 1
-
-    if num_iter >= 10:
-        warn(
-            f"Loudness did not converge to desired value, stopping at: {measured_loudness:.2f}"
-        )
-
-    return input.audio
-
-
-def scale_files(
-    file_list: list[list[Union[Path, str]]],
-    fmt: str,
-    loudness: float,
-    fs: Optional[int] = 48000,
-    in_meta: Optional[list] = None,
-) -> None:
-    """
-    Scales audio files to desired loudness
-
-    Parameters
-    ----------
-    file_list : list[list[Union[Path, str]]]
-        List of file paths in a list of the condition folders
-    fmt: str
-        Audio format of files in list
-    loudness: float
-        Desired loudness level in LKFS/dBov
-    fs: Optional[int]
-        Sampling rate
-    in_meta: Optional[list]
-        Metadata for ISM with same structure as file_list but one layer more
-        for the list of metadata for one file
-    """
-
-    if fmt.startswith("ISM") and in_meta:
-        meta_bool = True
-    else:
-        in_meta = copy.copy(file_list)
-        meta_bool = False
-
-    for folder, meta_folder in zip(file_list, in_meta):
-        for file, meta in zip(folder, meta_folder):
-            # create audio object
-            if meta_bool:
-                audio_obj = audio.fromfile(fmt, file, fs, meta)
-            else:
-                audio_obj = audio.fromfile(fmt, file, fs)
-
-            # adjust loudness
-            scaled_audio = loudness_norm(audio_obj, loudness)
-
-            # write into file
-            write(file, scaled_audio, audio_obj.fs)
diff --git a/item_generation_scripts/audiotools/wrappers/eid_xor.py b/item_generation_scripts/audiotools/wrappers/eid_xor.py
deleted file mode 100644
index 0b807d94..00000000
--- a/item_generation_scripts/audiotools/wrappers/eid_xor.py
+++ /dev/null
@@ -1,193 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
-
-import os.path
-from pathlib import Path
-from typing import Optional, Union
-
-from item_generation_scripts.audiotools.wrappers.gen_patt import create_error_pattern
-from item_generation_scripts.constants import DEFAULT_CONFIG_BINARIES
-from item_generation_scripts.utils import find_binary, run
-
-
-def eid_xor(
-    error_pattern: Union[str, Path],
-    in_bitstream: Union[str, Path],
-    out_bitstream: Union[str, Path],
-) -> None:
-    """
-    Wrapper for eid-xor binary to apply error patterns for the bitstream processing
-
-    Parameters
-    ----------
-    error_pattern: Union[str, Path]
-        Path to error pattern file
-    in_bitstream: Union[str, Path]
-        Path to input bitstream file
-    out_bitstream: Union[str, Path]
-        Output path for modified bitstream
-    """
-
-    # find binary
-    if "eid-xor" in DEFAULT_CONFIG_BINARIES["binary_paths"]:
-        binary = find_binary(
-            DEFAULT_CONFIG_BINARIES["binary_paths"]["eid-xor"].name,
-            binary_path=DEFAULT_CONFIG_BINARIES["binary_paths"]["eid-xor"].parent,
-        )
-    else:
-        binary = find_binary("eid-xor")
-
-    # check for valid inputs
-    if not Path(in_bitstream).is_file():
-        raise ValueError(
-            f"Input bitstream file {in_bitstream} for bitstream processing does not exist"
-        )
-    elif not Path(error_pattern).is_file():
-        raise ValueError(
-            f"Error pattern file {error_pattern} for bitstream processing does not exist"
-        )
-
-    # set up command line
-    cmd = [
-        str(binary),
-        "-vbr",  # Enables variable bit rate operation
-        "-fer",  # Error pattern is a frame erasure pattern
-        in_bitstream,
-        error_pattern,
-        out_bitstream,
-    ]
-
-    # run command
-    run(cmd)
-
-    return
-
-
-def create_and_apply_error_pattern(
-    in_bitstream: Union[Path, str],
-    out_bitstream: Union[Path, str],
-    len_sig: int,
-    error_pattern: Optional[Union[Path, str]] = None,
-    error_rate: Optional[float] = None,
-    preamble: Optional[int] = 0,
-    master_seed: Optional[int] = 0,
-    prerun_seed: Optional[int] = 0,
-) -> None:
-    """
-    Function to create (or use existing) frame error pattern for bitstream processing
-
-    Parameters
-    ----------
-    in_bitstream: Union[Path, str]
-        Path of input bitstream
-    out_bitstream: Union[Path, str]
-        Path of output bitstream
-    len_sig: int
-        Length of signal in frames
-    error_pattern: Optional[Union[Path, str]]
-        Path to existing error pattern
-    error_rate: float
-        Error rate in percent
-    preamble: Optional[int]
-        Length of preamble in frames
-    master_seed: Optional[int]
-        Master seed for error pattern generation
-    prerun_seed: Optional[int]
-        Number of preruns in seed generation
-    """
-
-    if error_pattern is None:
-        # create error pattern
-        if error_rate is not None:
-            error_pattern = in_bitstream.parent.joinpath("error_pattern").with_suffix(
-                ".192"
-            )
-            create_error_pattern(
-                len_sig, error_pattern, error_rate, preamble, master_seed, prerun_seed
-            )
-        else:
-            raise ValueError(
-                "Either error pattern or error rate has to be specified for bitstream processing"
-            )
-    elif error_rate is not None:
-        raise ValueError(
-            "Error pattern and error rate are specified for bitstream processing. Can't use both"
-        )
-
-    # apply error pattern
-    eid_xor(error_pattern, in_bitstream, out_bitstream)
-
-    return
-
-
-def validate_error_pattern_application(
-    error_pattern: Optional[Union[Path, str]] = None,
-    error_rate: Optional[int] = None,
-) -> None:
-    """
-    Validate settings for the network simulator
-
-    Parameters
-    ----------
-    error_pattern: Optional[Union[Path, str]]
-        Path to existing error pattern
-    error_rate: Optional[int]
-        Frame error rate
-    """
-
-    if find_binary("gen-patt") is None:
-        raise FileNotFoundError(
-            "The binary gen-patt for error pattern generation was not found! Please check the configuration."
-        )
-    if find_binary("eid-xor") is None:
-        raise FileNotFoundError(
-            "The binary eid-xor for error patter application was not found! Please check the configuration."
-        )
-    if error_pattern is not None:
-        if not os.path.exists(os.path.realpath(error_pattern)):
-            raise FileNotFoundError(
-                f"The frame error profile file {error_pattern} was not found! Please check the configuration."
-            )
-        if error_rate is not None:
-            raise ValueError(
-                "Frame error pattern and error rate are specified for bitstream processing. Can't use both! Please check the configuration."
-            )
-    else:
-        if error_rate is None:
-            raise ValueError(
-                "Either error rate or error pattern has to be specified for FER bitstream processing."
-            )
-        elif error_rate < 0 or error_rate > 100:
-            raise ValueError(
-                f"Specified error rate of {error_rate}% is either too large or too small."
-            )
-    return
diff --git a/item_generation_scripts/audiotools/wrappers/esdru.py b/item_generation_scripts/audiotools/wrappers/esdru.py
deleted file mode 100644
index 7785a586..00000000
--- a/item_generation_scripts/audiotools/wrappers/esdru.py
+++ /dev/null
@@ -1,130 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
-
-from pathlib import Path
-from tempfile import TemporaryDirectory
-from typing import Optional
-
-import numpy as np
-
-from item_generation_scripts.audiotools import audio
-from item_generation_scripts.audiotools.audiofile import read, write
-from item_generation_scripts.constants import DEFAULT_CONFIG_BINARIES
-from item_generation_scripts.utils import find_binary, run
-
-
-def esdru(
-    input: audio.Audio,
-    alpha: float,
-    sf: Optional[int] = 48000,
-    e_step: Optional[float] = 0.5,
-    seed: Optional[int] = 1,
-) -> np.ndarray:
-    """
-    Wrapper for ESDRU (Ericsson spatial distortion reference unit) Recommendation ITU-T P.811, requires esdru binary
-
-    Parameters
-    ----------
-    input : Audio
-        Input audio (16 bit Stereo PCM)
-    alpha: float
-        Alpha value [0.0 ... 1.0]
-    sf: Optional[int]
-        Sampling frequency FS Hz (Default: 48000 Hz)
-    e_step: Optional[float]
-        Max step S during high energy [0.0 ... 1.0] (Default: 0.5)
-    seed: Optional[int]
-        Set random seed I [unsigned int] (Default: 1)
-
-    Returns
-    -------
-    output: np.ndarray
-        Output array (16 bit Stereo PCM)
-    """
-    if "esdru" in DEFAULT_CONFIG_BINARIES["binary_paths"]:
-        binary = find_binary(
-            DEFAULT_CONFIG_BINARIES["binary_paths"]["esdru"].name,
-            binary_path=DEFAULT_CONFIG_BINARIES["binary_paths"]["esdru"].parent,
-        )
-    else:
-        binary = find_binary("esdru")
-
-    if not isinstance(input, audio.BinauralAudio) and not input.name == "STEREO":
-        raise Exception(
-            "ESDRU condition only available for STEREO or BINAURAL output format"
-        )
-
-    if alpha < 0.0 or alpha > 1.0:
-        raise Exception(
-            "Alpha value is out of bounds. Please choose a value between 0.0 and 1.0."
-        )
-
-    if e_step < 0.0 or e_step > 1.0:
-        raise Exception(
-            "Step value is out of bounds. Please choose a value between 0.0 and 1.0."
-        )
-
-    tmp_input_signal = input.audio
-    tmp_output_signal = np.ones((48000, 2))
-
-    with TemporaryDirectory() as tmp_dir:
-        tmp_dir = Path(tmp_dir)
-        tmp_input_file = tmp_dir.joinpath("tmp_input_signal.raw")
-        tmp_output_file = tmp_dir.joinpath("tmp_output_signal.raw")
-
-        """
-        ITU-T Recommendation P.811, ESDRU
-        """
-
-        cmd = [
-            str(binary),
-            "-sf",
-            str(sf),
-            "-e_step",
-            str(e_step),
-            "-seed",
-            str(seed),
-            str(alpha),
-            str(tmp_input_file),
-            str(tmp_output_file),
-        ]
-
-        # write temporary file
-        write(tmp_input_file, tmp_input_signal, sf)
-        write(tmp_output_file, tmp_output_signal, sf)
-
-        # run command
-        run(cmd)
-
-        tmp_output_signal, out_fs = read(tmp_output_file, 2, sf)
-
-    return tmp_output_signal
diff --git a/item_generation_scripts/audiotools/wrappers/filter.py b/item_generation_scripts/audiotools/wrappers/filter.py
deleted file mode 100644
index 4c7b61b4..00000000
--- a/item_generation_scripts/audiotools/wrappers/filter.py
+++ /dev/null
@@ -1,366 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
-
-import re
-from copy import copy
-from pathlib import Path
-from tempfile import TemporaryDirectory
-from typing import Optional
-from warnings import warn
-
-import numpy as np
-
-from item_generation_scripts.audiotools.audio import Audio, ChannelBasedAudio
-from item_generation_scripts.audiotools.audioarray import delay_compensation
-from item_generation_scripts.audiotools.audiofile import read, write
-from item_generation_scripts.constants import DEFAULT_CONFIG_BINARIES
-from item_generation_scripts.utils import find_binary, run
-
-FILTER_TYPES_REGEX = r"[\n][\s]{3}[A-Z0-9]\w+\s+"
-
-
-def filter_itu(
-    input: Audio,
-    flt_type: str,
-    block_size: Optional[int] = None,
-    mod: Optional[bool] = False,
-    up: Optional[bool] = False,
-    down: Optional[bool] = False,
-    is_async: Optional[bool] = False,
-    delay: Optional[int] = None,
-    skip_channel: Optional[list[int]] = None,
-) -> np.ndarray:
-    """
-    Low-pass filter a multi-channel audio array
-
-    Parameters
-    ----------
-    input: Audio
-        Input array
-    flt_type: str
-        Name of filter type used for filtering
-    block_size: Optional[int]
-        Processing block size in number of samples (default 256 samples)
-    mod: Optional[bool]
-        Flag for using the modified IRS characteristic
-    up: Optional[bool]
-        Flag for up-sampling
-    down: Optional[bool]
-        Flag for down-sampling
-    is_async: Optional[bool]
-        Flag for asynchronization operation
-    delay: Optional[int]
-        Delay in number of samples
-    skip_channel: Optional[list[int]]
-        List of channel indices which should not be filtered
-
-    Returns
-    -------
-    output: np.ndarray
-        Output filtered array
-    """
-
-    if "filter" in DEFAULT_CONFIG_BINARIES["binary_paths"]:
-        binary = find_binary(
-            DEFAULT_CONFIG_BINARIES["binary_paths"]["filter"].name,
-            binary_path=DEFAULT_CONFIG_BINARIES["binary_paths"]["filter"].parent,
-        )
-    else:
-        binary = find_binary("filter")
-
-    # check if filter type is supported
-    tmp = run([binary], check=False)
-
-    FILTER_TYPES = [
-        f.group().strip() for f in re.finditer(FILTER_TYPES_REGEX, tmp.stdout)
-    ]
-
-    if flt_type not in FILTER_TYPES:
-        raise ValueError(
-            f"Filter type {flt_type} does not seem to be supported by the binary: {FILTER_TYPES}"
-        )
-
-    # create command line
-    cmd = [
-        binary,
-        "-q",
-    ]
-
-    if mod:
-        cmd.append("-mod")
-    if up and down:
-        raise ValueError("Either up-sampling or down-sampling has to be chosen")
-    if up:
-        cmd.append("-up")
-    elif down:
-        cmd.append("-down")
-    if is_async:
-        cmd.append("-async")
-    if delay:
-        cmd.extend(["-delay", str(delay)])
-
-    cmd.append(str(flt_type))
-
-    # create output array with according size
-    if up:
-        # upsampling -> size increases
-        if flt_type == "SHQ2":
-            output = np.zeros((np.shape(input.audio)[0] * 2, np.shape(input.audio)[1]))
-        elif flt_type == "SHQ3":
-            output = np.zeros((np.shape(input.audio)[0] * 3, np.shape(input.audio)[1]))
-        else:
-            raise ValueError(f"No upsampling with {flt_type} possible")
-    elif down:
-        # downsampling -> size decreases
-        if flt_type == "SHQ2":
-            output = np.zeros(
-                (int(np.ceil(np.shape(input.audio)[0] / 2)), np.shape(input.audio)[1])
-            )
-        elif flt_type == "SHQ3":
-            output = np.zeros(
-                (int(np.ceil(np.shape(input.audio)[0] / 3)), np.shape(input.audio)[1])
-            )
-        else:
-            raise ValueError(f"No downsampling with {flt_type} possible")
-    else:
-        # normal filtering -> size remains
-        output = np.zeros_like(input.audio)
-
-    with TemporaryDirectory() as tmp_dir:
-        tmp_dir = Path(tmp_dir)
-
-        # process channels separately
-        for channel in range(input.num_channels):
-            if skip_channel and channel in skip_channel:
-                continue
-
-            cmd_in_out = cmd.copy()
-
-            tmp_in = tmp_dir.joinpath(f"tmp_filterIn{channel}.pcm")
-            tmp_out = tmp_dir.joinpath(f"tmp_filterOut{channel}.pcm")
-
-            cmd_in_out.append(str(tmp_in))
-            cmd_in_out.append(str(tmp_out))
-
-            if block_size:
-                cmd_in_out.append(str(block_size))
-
-            write(tmp_in, input.audio[:, channel], input.fs)
-
-            run(cmd_in_out)
-
-            a, _ = read(tmp_out, nchannels=1, fs=input.fs)
-            output[:, channel][:, None] = a
-
-    return output
-
-
-def lpfilter_itu(
-    x: Audio,
-    fc: int,
-) -> np.ndarray:
-    """
-    Low-pass filter a multi-channel audio array
-
-    Parameters
-    ----------
-    x: Audio
-        Input audio
-    fc: int
-        Cut-off frequency in Hz
-
-    Returns
-    -------
-    y: np.ndarray
-        Output low-pass filtered array
-    """
-
-    # find right filter type for cut-off frequency
-    flt_types = ["LP1p5", "LP35", "LP7", "LP10", "LP12", "LP14", "LP20"]
-    flt_vals = [1500, 3500, 7000, 10000, 12000, 14000, 20000]
-    try:
-        flt_type = flt_types[flt_vals.index(fc)]
-    except Exception:
-        raise ValueError(f"LP cut-off frequency {fc}Hz not supported.")
-
-    # resample if samplingrate is not supported
-    old_fs = None
-    tmp = copy(x)
-    if x.fs != 48000:
-        warn(
-            f"Filter type {flt_type} only supported for 48kHz samplingrate, not for {x.fs}Hz -> resampling"
-        )
-        old_fs = x.fs
-        tmp.audio = resample_itu(tmp, 48000)
-        tmp.fs = 48000
-
-    # apply filter
-    y = filter_itu(tmp, flt_type=flt_type, block_size=960)
-
-    # delay compensation
-    y = delay_compensation(y, flt_type=flt_type, fs=tmp.fs)
-
-    # reverse resampling
-    if old_fs:
-        tmp.audio = y
-        y = resample_itu(tmp, old_fs)
-
-    return y
-
-
-def hp50filter_itu(
-    x: Audio,
-) -> np.ndarray:
-    """
-    High-pass 50Hz filter a multi-channel audio array
-
-    Parameters
-    ----------
-    x: Audio
-        Input audio
-
-    Returns
-    -------
-    y: np.ndarray
-        Output high-pass filtered array
-    """
-
-    # set filter type and check if sampling rate is supported
-    old_fs = None
-    tmp = copy(x)
-    if x.fs == 48000:
-        flt_type = "HP50_48KHZ"
-    elif x.fs == 32000:
-        flt_type = "HP50_32KHZ"
-    else:
-        # resample if samplingrate is not supported
-        warn(
-            f"Filter type HP50 only supported for 48kHz and 32kHz samlingrate, not for {x.fs}Hz -> resampling"
-        )
-        flt_type = "HP50_48KHZ"
-        old_fs = x.fs
-        tmp.audio = resample_itu(tmp, 48000)
-        tmp.fs = 48000
-
-    # don't apply high-pass filtering to LFE channel
-    if isinstance(x, ChannelBasedAudio):
-        skip_channel = x.lfe_index
-    else:
-        skip_channel = None
-
-    # apply filter
-    y = filter_itu(tmp, flt_type=flt_type, skip_channel=skip_channel)
-
-    # delay compensation
-    y = delay_compensation(y, flt_type=flt_type, fs=tmp.fs)
-
-    # reverse resampling
-    if old_fs:
-        tmp.audio = y
-        y = resample_itu(tmp, old_fs)
-
-    return y
-
-
-def resample_itu(
-    x: Audio,
-    fs_new: int,
-) -> np.ndarray:
-    """
-    Resampling of multi-channel audio array
-
-    Parameters
-    ----------
-    x: Audio
-        Input audio
-    fs_new: int
-        Target sampling rate in Hz
-
-    Returns
-    -------
-    y: np.ndarray
-        Output resampled array
-    """
-
-    fs_old = x.fs
-
-    # if samplingrate is the same do nothing
-    if fs_new == fs_old:
-        return x.audio
-
-    ratio_fs = fs_new / fs_old
-    up = [False]
-    down = [False]
-
-    # select suitable processing to achieve target samplingrate
-    if ratio_fs == 2:
-        flt_type = ["SHQ2"]
-        up = [True]
-    elif ratio_fs == 0.5:
-        flt_type = ["SHQ2"]
-        down = [True]
-    elif ratio_fs == 3:
-        flt_type = ["SHQ3"]
-        up = [True]
-    elif ratio_fs == 1 / 3:
-        flt_type = ["SHQ3"]
-        down = [True]
-    elif ratio_fs == 2 / 3:
-        flt_type = ["SHQ2", "SHQ3"]
-        up = [True, False]
-        down = [False, True]
-    elif ratio_fs == ratio_fs == 3 / 2:
-        flt_type = ["SHQ3", "SHQ2"]
-        up = [True, False]
-        down = [False, True]
-    else:
-        raise ValueError("Ratio of input and output sampling frequency not supported")
-
-    # apply filter
-    y = copy(x)
-    for i, flt in enumerate(flt_type):
-        y.audio = filter_itu(y, flt_type=flt, up=up[i], down=down[i])
-        y.audio = delay_compensation(
-            y.audio, flt_type=flt, fs=y.fs, up=up[i], down=down[i]
-        )
-        # if up[i]:
-        #     if flt == "SHQ2":
-        #         y.fs = y.fs * 2
-        #     elif flt == "SHQ3":
-        #         y.fs = y.fs * 3
-        # elif down[i]:
-        #     if flt == "SHQ2":
-        #         y.fs = int(y.fs / 2)
-        #     elif flt == "SHQ3":
-        #         y.fs = int(y.fs / 3)
-
-    return y.audio
diff --git a/item_generation_scripts/audiotools/wrappers/gen_patt.py b/item_generation_scripts/audiotools/wrappers/gen_patt.py
deleted file mode 100644
index a68706a7..00000000
--- a/item_generation_scripts/audiotools/wrappers/gen_patt.py
+++ /dev/null
@@ -1,171 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
-
-from os import getcwd
-from pathlib import Path
-from tempfile import TemporaryDirectory
-from typing import Optional, Union
-
-from item_generation_scripts.audiotools.wrappers.random_seed import random_seed
-from item_generation_scripts.constants import DEFAULT_CONFIG_BINARIES
-from item_generation_scripts.utils import find_binary, run
-
-ERROR_PATTERNS_DIR = Path(__file__).parent.parent.parent.joinpath("error_patterns")
-
-
-def gen_patt(
-    len_sig: int,
-    path_pattern: Union[Path, str],
-    error_rate: float,
-    start: Optional[int] = 0,
-    working_dir: Optional[Union[Path, str]] = None,
-) -> None:
-    """
-    Wrapper for gen-patt binary to create error patterns for the bitstream processing
-
-    Parameters
-    ----------
-    len_sig: int
-       Length of signal in frames
-    path_pattern: Union[Path, str]
-        Path of output pattern
-    error_rate: float
-        Error rate in percent
-    start: Optional[int]
-        Start frame of error pattern (length preamble)
-    working_dir: Optional[Union[Path, str]]
-        Directory where binary should be called (sta file has to be in this dir if desired)
-    """
-
-    # find binary
-    if "gen-patt" in DEFAULT_CONFIG_BINARIES["binary_paths"]:
-        binary = find_binary(
-            DEFAULT_CONFIG_BINARIES["binary_paths"]["gen-patt"].name,
-            binary_path=DEFAULT_CONFIG_BINARIES["binary_paths"]["gen-patt"].parent,
-        )
-    else:
-        binary = find_binary("gen-patt")
-
-    if working_dir is None:
-        working_dir = getcwd()
-
-    # set up command line
-    cmd = [
-        str(binary),
-        "-tailstat",  # Statistics performed on the tail
-        "-fer",  # Frame erasure mode using Gilbert model
-        "-g192",  # Save error pattern in 16-bit G.192 format
-        "-gamma",  # Correlation for BER|FER modes
-        str(0),
-        "-rate",
-        str(error_rate / 100),
-        "-tol",  # Max deviation of specified BER/FER/BFER
-        str(0.001),
-        "-reset",  # Reset EID state in between iteractions
-        "-n",
-        str(int(len_sig)),
-        "-start",
-        str(int(start) + 1),
-        path_pattern,
-    ]
-
-    # run command
-    run(cmd, cwd=working_dir)
-
-    return
-
-
-def create_error_pattern(
-    len_sig: int,
-    path_pattern: Union[Path, str],
-    frame_error_rate: float,
-    preamble: Optional[int] = 0,
-    master_seed: Optional[int] = 0,
-    prerun_seed: Optional[int] = 0,
-) -> None:
-    """
-    Creates error pattern with desired frame error rate for bitstream processing
-
-    Parameters
-    ----------
-    len_sig: int
-       Length of signal in frames
-    path_pattern: Union[Path, str]
-        Path of output pattern
-    frame_error_rate: float
-        Error rate in percent
-    preamble: Optional[int]
-        Length of preamble in frames
-    master_seed: Optional[int]
-        Master seed for error pattern generation
-    prerun_seed: optional[int]
-        Number of preruns in seed generation
-    """
-
-    with TemporaryDirectory() as tmp_dir:
-        tmp_dir = Path(tmp_dir)
-
-        sta_file = ERROR_PATTERNS_DIR.joinpath("sta_template")
-        tmp_sta_file = tmp_dir.joinpath("sta")
-
-        # compute seed
-        seed = random_seed((0, 99999999), master_seed, prerun_seed)
-
-        # open file and modify
-        lines = []
-        with open(sta_file, "r") as sta_file_txt:
-            lines.append(sta_file_txt.readline())  # not changed
-            lines.append(f"{sta_file_txt.readline()[:-2]}{frame_error_rate/100}\n")
-            lines.append(sta_file_txt.readline())  # not changed
-            lines.append(f"{sta_file_txt.readline()[:-2]}{seed}\n")
-            lines.append(sta_file_txt.readline())  # not changed
-            lines.append(
-                f"{sta_file_txt.readline()[:-2]}{1-(frame_error_rate/100*2)}\n"
-            )
-            lines.append(sta_file_txt.readline())  # not changed
-            lines.append(
-                f"{sta_file_txt.readline()[:-2]}{1-(frame_error_rate/100*2)}\n"
-            )
-            lines.append(sta_file_txt.readline())  # not changed
-
-        with open(tmp_sta_file, "w") as tmp_sta_file_txt:
-            tmp_sta_file_txt.write("".join(lines))
-
-        gen_patt(
-            len_sig=len_sig,
-            error_rate=frame_error_rate,
-            path_pattern=path_pattern,
-            start=preamble,
-            working_dir=tmp_dir,
-        )
-
-    return
diff --git a/item_generation_scripts/audiotools/wrappers/masaRenderer.py b/item_generation_scripts/audiotools/wrappers/masaRenderer.py
deleted file mode 100644
index a5987b1e..00000000
--- a/item_generation_scripts/audiotools/wrappers/masaRenderer.py
+++ /dev/null
@@ -1,117 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
-
-from pathlib import Path
-from tempfile import TemporaryDirectory
-
-import numpy as np
-
-from item_generation_scripts.audiotools import audio
-from item_generation_scripts.audiotools.audiofile import read, write
-from item_generation_scripts.audiotools.wrappers.filter import resample_itu
-from item_generation_scripts.constants import DEFAULT_CONFIG_BINARIES
-from item_generation_scripts.utils import find_binary, run
-
-
-def masaRenderer(
-    masa: audio.MetadataAssistedSpatialAudio,
-    out_fmt: str,
-) -> np.ndarray:
-    """
-    Wrapper for masaRenderer (from MASA reference software)
-
-    Parameters
-    ----------
-    masa : MetadataAssistedSpatialAudio
-        Input MASA audio
-    out_fmt: str
-        Desired output format (only 5_1, 7_1_4 and BINAURAL supported)
-
-    Returns
-    -------
-    output : np.ndarray
-        MASA rendered to out_fmt
-    """
-
-    if "masaRenderer" in DEFAULT_CONFIG_BINARIES["binary_paths"]:
-        binary = find_binary(
-            DEFAULT_CONFIG_BINARIES["binary_paths"]["masaRenderer"].name,
-            binary_path=DEFAULT_CONFIG_BINARIES["binary_paths"]["masaRenderer"].parent,
-        )
-    else:
-        binary = find_binary("masaRenderer")
-
-    if out_fmt not in ["5_1", "7_1_4", "BINAURAL"]:
-        raise ValueError(f"Output format {out_fmt} is not supported by MasaRenderer!")
-
-    if out_fmt == "5_1":
-        output_mode = "-LS51"
-        num_channels = 6
-    elif out_fmt == "7_1_4":
-        output_mode = "-LS714"
-        num_channels = 12
-    else:
-        output_mode = "-BINAURAL"
-        num_channels = 2
-
-    cmd = [
-        str(binary),
-        output_mode,
-        "",  # 2 -> inputPcm
-        str(masa.metadata_files.resolve()),
-        "",  # 4 -> outputPcm
-    ]
-
-    with TemporaryDirectory() as tmp_dir:
-        tmp_dir = Path(tmp_dir)
-        tmp_in = tmp_dir.joinpath("tmp_masaRendIn.pcm")
-        tmp_out = tmp_dir.joinpath("tmp_masaRendOut.pcm")
-
-        cmd[2] = str(tmp_in)
-        cmd[4] = str(tmp_out)
-
-        tmp_audio = resample_itu(masa, 48000)
-        old_fs = masa.fs
-
-        write(tmp_in, tmp_audio, 48000)
-
-        # we need to run in the masaRenderer directory to use the .bin files it requires
-        run(cmd, cwd=binary.resolve().parent)
-
-        output, _ = read(tmp_out, num_channels)
-
-        output_audio = audio.fromtype(out_fmt)
-        output_audio.audio = output
-        output_audio.fs = 48000
-        output = resample_itu(output_audio, old_fs)
-
-        return output
diff --git a/item_generation_scripts/audiotools/wrappers/networkSimulator.py b/item_generation_scripts/audiotools/wrappers/networkSimulator.py
deleted file mode 100644
index 4e74c3ce..00000000
--- a/item_generation_scripts/audiotools/wrappers/networkSimulator.py
+++ /dev/null
@@ -1,224 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
-
-import logging
-import os.path
-from pathlib import Path
-from typing import Optional, Union
-
-from item_generation_scripts.constants import DEFAULT_CONFIG_BINARIES
-from item_generation_scripts.utils import find_binary, run
-
-LIST_JBM_PROFILES = range(12)
-ERROR_PATTERNS_DIR = Path(__file__).parent.parent.parent.joinpath("dly_error_profiles")
-
-
-def validate_network_simulator(
-    error_pattern: Optional[Union[Path, str]] = None,
-    error_profile: Optional[int] = None,
-    n_frames_per_packet: Optional[int] = None,
-) -> None:
-    """
-    Validate settings for the network simulator
-
-    Parameters
-    ----------
-    error_pattern: Optional[Union[Path, str]]
-        Path to existing error pattern
-    error_profile: Optional[int]
-        Index of existing error pattern
-    n_frames_per_packet: Optional[int]
-        Number of frames per paket
-    """
-
-    if "networkSimulator_g192" in DEFAULT_CONFIG_BINARIES["binary_paths"]:
-        binary = find_binary(
-            DEFAULT_CONFIG_BINARIES["binary_paths"]["networkSimulator_g192"].name,
-            binary_path=DEFAULT_CONFIG_BINARIES["binary_paths"][
-                "networkSimulator_g192"
-            ].parent,
-        )
-    else:
-        binary = find_binary("networkSimulator_g192")
-
-    if binary is None:
-        raise FileNotFoundError(
-            "The network simulator binary was not found! Please check the configuration."
-        )
-    if error_pattern is not None:
-        if not os.path.exists(os.path.realpath(error_pattern)):
-            raise FileNotFoundError(
-                f"The network simulator error profile file {error_pattern} was not found! Please check the configuration."
-            )
-        if error_profile is not None:
-            raise ValueError(
-                "JBM pattern and JBM profile number are specified for bitstream processing. Can't use both! Please check the configuration."
-            )
-    elif error_profile is not None:
-        if error_profile not in LIST_JBM_PROFILES:
-            raise ValueError(
-                f"JBM profile number {error_profile} does not exist, should be between {LIST_JBM_PROFILES[0]} and {LIST_JBM_PROFILES[-1]}"
-            )
-    if n_frames_per_packet is not None and n_frames_per_packet not in [1, 2]:
-        raise ValueError(
-            f"n_frames_per_paket is {n_frames_per_packet}. Should be 1 or 2. Please check your configuration."
-        )
-
-    return
-
-
-def network_simulator(
-    error_pattern: Union[str, Path],
-    in_bitstream: Union[str, Path],
-    out_bitstream: Union[str, Path],
-    n_frames_per_packet: int,
-    offset: int,
-    logger: Optional[logging.Logger] = None,
-) -> None:
-    """
-    Wrapper for networkSimulator_g192 binary to apply error patterns for the bitstream processing
-
-    Parameters
-    ----------
-    error_pattern: Union[str, Path]
-        Path to error pattern file
-    in_bitstream: Union[str, Path]
-        Path to input bitstream file
-    out_bitstream: Union[str, Path]
-        Output path for modified bitstream
-    n_frames_per_packet: int,
-        Number of frames per paket [1,2]
-    offset: Optional[int]
-        delay offset
-    logger: Optional[logging.Logger]
-        logger
-    """
-
-    # find binary
-    if "networkSimulator_g192" in DEFAULT_CONFIG_BINARIES["binary_paths"]:
-        binary = find_binary(
-            DEFAULT_CONFIG_BINARIES["binary_paths"]["networkSimulator_g192"].name,
-            binary_path=DEFAULT_CONFIG_BINARIES["binary_paths"][
-                "networkSimulator_g192"
-            ].parent,
-        )
-    else:
-        binary = find_binary("networkSimulator_g192")
-
-    # check for valid inputs
-    if not Path(in_bitstream).is_file():
-        raise ValueError(
-            f"Input bitstream file {in_bitstream} for bitstream processing does not exist"
-        )
-    elif not Path(error_pattern).is_file():
-        raise ValueError(
-            f"Error pattern file {error_pattern} for bitstream processing does not exist"
-        )
-
-    # set up command line
-    cmd = [
-        str(binary),
-        error_pattern,
-        in_bitstream,
-        out_bitstream,
-        f"{out_bitstream}_tracefile_sim",
-        str(n_frames_per_packet),
-        str(offset),
-    ]
-
-    # run command
-    run(cmd, logger=logger)
-
-    return
-
-
-def apply_network_simulator(
-    in_bitstream: Union[Path, str],
-    out_bitstream: Union[Path, str],
-    error_pattern: Optional[Union[Path, str]] = None,
-    error_profile: Optional[int] = None,
-    n_frames_per_packet: Optional[int] = None,
-    offset: Optional[int] = 0,
-    logger: Optional[logging.Logger] = None,
-) -> None:
-    """
-    Function to apply a network simulator profile to a bitstreaam
-
-    Parameters
-    ----------
-    in_bitstream: Union[Path, str]
-        Path of input bitstream
-    out_bitstream: Union[Path, str]
-        Path of output bitstream
-    error_pattern: Optional[Union[Path, str]]
-        Path to existing error pattern
-    error_profile: Optional[int]
-        Index of existing error pattern
-    n_frames_per_packet: Optional[int]
-        Number of frames per paket
-    offset: Optional[int]
-        delay offset
-    logger: Optional[logging.Logger]
-        logger
-    """
-
-    if error_pattern is None:
-        # create error pattern
-        if error_profile is not None:
-            if error_profile in LIST_JBM_PROFILES:
-                error_pattern = ERROR_PATTERNS_DIR.joinpath(
-                    f"dly_error_profile_{error_profile}.dat"
-                )
-            else:
-                raise ValueError(
-                    f"JBM profile number {error_profile} does not exist, should be between {LIST_JBM_PROFILES[0]} and {LIST_JBM_PROFILES[-1]}"
-                )
-        else:
-            raise ValueError(
-                "Either error pattern or error profile number has to be specified for network simulator bitstream processing"
-            )
-    elif error_profile is not None:
-        raise ValueError(
-            "JBM pattern and JBM profile number are specified for bitstream processing. Can't use both"
-        )
-
-    if n_frames_per_packet is None:
-        n_frames_per_packet = 1
-        if error_profile is not None and error_profile == 5:
-            n_frames_per_packet = 2
-
-    # apply error pattern
-    network_simulator(
-        error_pattern, in_bitstream, out_bitstream, n_frames_per_packet, offset, logger
-    )
-
-    return
diff --git a/item_generation_scripts/audiotools/wrappers/p50fbmnru.py b/item_generation_scripts/audiotools/wrappers/p50fbmnru.py
deleted file mode 100644
index 2f4c19ef..00000000
--- a/item_generation_scripts/audiotools/wrappers/p50fbmnru.py
+++ /dev/null
@@ -1,110 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
-
-from pathlib import Path
-from tempfile import TemporaryDirectory
-from warnings import warn
-
-import numpy as np
-
-from item_generation_scripts.audiotools import audio
-from item_generation_scripts.audiotools.audiofile import read, write
-from item_generation_scripts.audiotools.wrappers.filter import resample_itu
-from item_generation_scripts.constants import DEFAULT_CONFIG_BINARIES
-from item_generation_scripts.utils import find_binary, run
-
-
-def p50fbmnru(
-    input: audio.Audio,
-    q_db: float,
-) -> np.ndarray:
-    """
-    Wrapper for P.50 Fullband MNRU (Modulated Noise Reference Unit), requires p50fbmnru binary
-    The mode is M (Modulated Noise) as specified in section 5.2.1 of S4-141392 - EVS-7c Processing functions for characterization phase v110.doc
-
-    Parameters
-    ----------
-    input : Audio
-        Input audio
-    q_db: float
-        The ratio, in dB, of speech power to modulated noise power
-
-    Returns
-    -------
-    output: np.ndarray
-        Output array
-    """
-
-    if "p50fbmnru" in DEFAULT_CONFIG_BINARIES["binary_paths"]:
-        binary = find_binary(
-            DEFAULT_CONFIG_BINARIES["binary_paths"]["p50fbmnru"].name,
-            binary_path=DEFAULT_CONFIG_BINARIES["binary_paths"]["p50fbmnru"].parent,
-        )
-    else:
-        binary = find_binary("p50fbmnru")
-
-    if input.fs != 48000:
-        warn("P.50 Fullband MNRU requires a sampling rate of 48kHz.")
-        tmp_sig = resample_itu(input, 48000)
-    else:
-        tmp_sig = input.audio
-
-    tmp_input_signal = tmp_sig
-    tmp_output_signal = np.ones((48000, input.num_channels))
-
-    with TemporaryDirectory() as tmp_dir:
-        tmp_dir = Path(tmp_dir)
-        tmp_input_file = tmp_dir.joinpath("tmp_input_signal.raw")
-        tmp_output_file = tmp_dir.joinpath("tmp_output_signal.raw")
-
-        """
-        P.50 Fullband MNRU
-        """
-
-        cmd = [
-            str(binary),
-            str(tmp_input_file),
-            str(tmp_output_file),
-            str(q_db),
-            "M",
-        ]
-
-        # write temporary file
-        write(tmp_input_file, tmp_input_signal)
-        write(tmp_output_file, tmp_output_signal)
-
-        # run command
-        run(cmd)
-
-        tmp_output_signal, out_fs = read(tmp_output_file, input.num_channels)
-
-    return tmp_output_signal
diff --git a/item_generation_scripts/audiotools/wrappers/random_seed.py b/item_generation_scripts/audiotools/wrappers/random_seed.py
deleted file mode 100644
index 01cf0870..00000000
--- a/item_generation_scripts/audiotools/wrappers/random_seed.py
+++ /dev/null
@@ -1,92 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
-
-from typing import Optional, Tuple
-
-from item_generation_scripts.constants import DEFAULT_CONFIG_BINARIES
-from item_generation_scripts.utils import find_binary, run
-
-
-def random_seed(
-    range: Tuple[int, int],
-    master_seed: Optional[int] = 0,
-    prerun_seed: Optional[int] = 0,
-    hexa: Optional[bool] = True,
-) -> int:
-    """
-
-    Parameters
-    ----------
-    master_seed: Optional[int]
-        Master seed for error pattern generation
-    prerun_seed: Optional[int]
-        Number of preruns in seed generation
-    hexa: Optonal[bool]
-        Flag if output should be in hexadecimal or decimal format
-
-    Returns
-    -------
-    result: int
-        One random value
-    """
-
-    # find binary
-    if "random" in DEFAULT_CONFIG_BINARIES["binary_paths"]:
-        binary = find_binary(
-            DEFAULT_CONFIG_BINARIES["binary_paths"]["random"].name,
-            binary_path=DEFAULT_CONFIG_BINARIES["binary_paths"]["random"].parent,
-        )
-    else:
-        binary = find_binary("random")
-
-    # set up command line
-    cmd = [
-        str(binary),
-        "-n",  # Number of items
-        str(1),
-        "-s",
-        str(master_seed),
-        "-d",
-        str(prerun_seed),
-        "-r",  # value range for results
-        str(range[0]),
-        str(range[1]),
-    ]
-
-    # run command
-    result = run(cmd)
-    result = int(result.stdout[:-1])
-
-    if hexa:
-        result = hex(result)
-
-    return result
diff --git a/item_generation_scripts/binary_paths.yml b/item_generation_scripts/binary_paths.yml
deleted file mode 100644
index bafcacfc..00000000
--- a/item_generation_scripts/binary_paths.yml
+++ /dev/null
@@ -1,30 +0,0 @@
----
-################################################
-# Binary paths 
-################################################
-### Custom binary paths and names can be specified here.
-### If not defined here, the binaries in item_generation_scripts/bin would be used
-### If binaries are neither specified here nor found in the bin folder, the scripts would look for them in $PATH
-### DO NOT change the location of this file.
-### DO NOT USE relative paths. The paths have to be absolute.
-### DO NOT change the default keys.
-### For example, if the user has renamed the 'filter' binary to 'foo' then use --> filter: path/to/binary/foo
-
-# ### Binary for resampling and filtering
-# filter: "path/to/binary/filter_new"
-# ### Binary for loudness adjustment
-# bs1770demo: "path/to/binary/bs1880"
-# ### Binary for MNRU
-# p50fbmnru: "path/to/binary/p50fbmnru"
-# ### Binary for ESDRU
-# esdru: "path/to/binary/esdru"
-# ### Binary for frame error pattern application
-# eid-xor: "path/to/binary/eid-xor"
-# ### Binary for error pattern generation
-# gen-patt: "path/to/binary/gen-patt"
-# ### Binary for random offset/seed generation
-# random: "path/to/binary/random"
-# ### Binary for JBM network similulator
-# networkSimulator_g192: "path/to/binary/networkSimulator_g192"
-# ### Binary for MASA rendering 
-# masaRenderer: "path/to/binary/masaRenderer"
\ No newline at end of file
diff --git a/item_generation_scripts/processing/__init__.py b/item_generation_scripts/processing/__init__.py
deleted file mode 100644
index aea270d8..00000000
--- a/item_generation_scripts/processing/__init__.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
diff --git a/item_generation_scripts/processing/preprocessing_2.py b/item_generation_scripts/processing/preprocessing_2.py
deleted file mode 100644
index 1152ccc7..00000000
--- a/item_generation_scripts/processing/preprocessing_2.py
+++ /dev/null
@@ -1,155 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
-
-import logging
-from pathlib import Path
-from warnings import warn
-
-import numpy as np
-
-from item_generation_scripts.audiotools import audio
-from item_generation_scripts.audiotools.audioarray import delay, trim
-from item_generation_scripts.audiotools.audiofile import write
-from item_generation_scripts.audiotools.metadata import (
-    add_remove_preamble,
-    write_ISM_metadata_in_file,
-)
-from item_generation_scripts.audiotools.wrappers.bs1770 import (
-    get_loudness,
-    loudness_norm,
-)
-from item_generation_scripts.audiotools.wrappers.random_seed import random_seed
-from item_generation_scripts.processing.processing import Processing
-
-
-class Preprocessing2(Processing):
-    def __init__(self, attrs: dict):
-        super().__init__(attrs)
-        self.name = "pre_2"
-
-    def process(self, in_file: Path, out_file: Path, in_meta, logger: logging.Logger):
-        logger.debug(f"Preprocessing2 configuration : {self.__dict__}")
-        logger.debug(f"Preprocessing2 {in_file.absolute()} -> {out_file.absolute()}")
-
-        # load in file
-        audio_object = audio.fromfile(
-            self.in_fmt, in_file, fs=self.in_fs, in_meta=in_meta
-        )
-
-        # add preamble
-        if self.preamble:
-            # also apply preamble to ISM metadata
-            if self.in_fmt.startswith("ISM"):
-                # read out old
-                metadata = []
-                for meta in in_meta:
-                    metadata.append(np.genfromtxt(meta, delimiter=","))
-
-                # modify metadata
-                metadata = add_remove_preamble(metadata, self.preamble)
-                meta_files = write_ISM_metadata_in_file(metadata, [out_file], True)
-
-                # modify audio object
-                audio_object.metadata_files = meta_files
-                audio_object.obect_pos = metadata
-
-            # add preamble to actual signal
-            audio_object.audio = trim(
-                audio_object.audio,
-                audio_object.fs,
-                (-self.preamble, 0),
-                self.pad_noise_preamble,
-            )
-
-        # add background noise
-        if self.background_noise:
-            audio_object.audio = self.add_background_noise(audio_object, in_meta)
-
-        # save file
-        write(out_file, audio_object.audio, fs=audio_object.fs)
-
-        return
-
-    def add_background_noise(self, audio_object: audio.Audio, in_meta) -> np.ndarray:
-        # range for random delay
-        range_delay = (1, 2400000)
-
-        # load background noise
-        noise_object = audio.fromfile(
-            self.in_fmt,
-            self.background_noise["background_noise_path"],
-            fs=self.in_fs,
-            in_meta=in_meta,
-        )
-
-        # if noise is too short raise error
-        if len(noise_object.audio) < len(audio_object.audio):
-            raise ValueError("Background noise too short for audio signal")
-        if len(noise_object.audio) - range_delay[1] < len(audio_object.audio):
-            warn(
-                "Background noise may be to short for audio signal when considering the random delay"
-            )
-
-        # measure loudness of audio signal based on output format
-        tmp_object = audio.fromtype(self.out_fmt)
-        if (
-            isinstance(tmp_object, audio.ObjectBasedAudio)
-            or isinstance(tmp_object, audio.SceneBasedAudio)
-            or isinstance(tmp_object, audio.MetadataAssistedSpatialAudio)
-        ):
-            out_format = None
-        else:
-            out_format = self.out_fmt
-
-        loudness_signal, _ = get_loudness(audio_object, loudness_format=out_format)
-
-        # compute desired loudness of background noise
-        loudness_noise = loudness_signal - self.background_noise["snr"]
-
-        # apply random delay and cut signal
-        rand_delay = random_seed(
-            range=range_delay,
-            master_seed=self.background_noise["master_seed"],
-            prerun_seed=self.background_noise["seed_delay"],
-            hexa=False,
-        )
-        noise_object.audio = delay(
-            noise_object.audio, delay=-rand_delay, samples=True, fs=noise_object.fs
-        )[: len(audio_object.audio)]
-
-        # scale background noise to desired loudness based on output format
-        noise_object.audio = loudness_norm(noise_object, loudness_noise, out_format)
-
-        # add array to signal
-        audio_object.audio = noise_object.audio + audio_object.audio
-
-        return audio_object.audio
diff --git a/item_generation_scripts/processing/processing.py b/item_generation_scripts/processing/processing.py
deleted file mode 100644
index ad2cf272..00000000
--- a/item_generation_scripts/processing/processing.py
+++ /dev/null
@@ -1,455 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
-
-import logging
-from abc import ABC, abstractmethod
-from itertools import repeat
-from pathlib import Path
-from shutil import copyfile
-from typing import Iterable, Union
-from warnings import warn
-
-import numpy as np
-
-from item_generation_scripts.audiotools import audio
-from item_generation_scripts.audiotools.audiofile import (
-    concat,
-    read,
-    split,
-    trim,
-    write,
-)
-from item_generation_scripts.audiotools.metadata import (
-    add_remove_preamble,
-    concat_meta_from_file,
-    metadata_search,
-    split_meta_in_file,
-    write_ISM_metadata_in_file,
-)
-from item_generation_scripts.audiotools.wrappers.bs1770 import scale_files
-from item_generation_scripts.constants import LOGGER_DATEFMT, LOGGER_FORMAT
-from item_generation_scripts.processing.config import TestConfig
-from item_generation_scripts.utils import apply_func_parallel, list_audio, pairwise
-
-
-class Processing(ABC):
-    def __init__(self, attrs: dict):
-        self.__dict__.update(attrs)
-
-    @abstractmethod
-    def process(
-        self, in_file: Path, out_file: Path, in_meta, logger: logging.Logger
-    ) -> None:
-        pass
-
-
-def reorder_items_list(items_list: list, concatenation_order: list) -> list:
-    name_to_full = {Path(full_file).name: full_file for full_file in items_list}
-    ordered_full_files = [
-        name_to_full[name] for name in concatenation_order if name in name_to_full
-    ]
-    return ordered_full_files
-
-
-def concat_setup(cfg: TestConfig, chain, logger: logging.Logger):
-    n_items_list = len(cfg.items_list)
-    cfg_pre2 = chain[0]
-
-    # check for text files
-    if any([i for i in cfg.items_list if i.suffix == ".txt"]):
-        raise SystemExit("Concatenation for text files is unsupported")
-
-    # apply concatenation order
-    if cfg_pre2.concatenation_order is not None:
-        n_concatenation_order = len(cfg_pre2.concatenation_order)
-        if n_concatenation_order != n_items_list:
-            warn(
-                f"Warning: Mismatch in specified concatenation order and number of items to process!\n"
-                f"Number of items specified in concatenation order: {n_concatenation_order}\n"
-                f"Number of items in the directory: {n_items_list}\n"
-                f"Concatenation will use the following order:\n{cfg_pre2.concatenation_order}"
-            )
-
-    logger.info(f"Concatenating input files in directory {cfg.input_path}")
-
-    # concatenate ISM metadata
-    if cfg.input["fmt"].startswith("ISM"):
-        cfg.concat_meta = []
-        for obj_idx in range(len(cfg.metadata_path[0])):
-            cfg.concat_meta.append(
-                cfg.tmp_dirs[0].joinpath(
-                    f"{cfg.input_path.name}_concatenated.wav.{obj_idx}.csv"
-                )
-            )
-        concat_meta_from_file(
-            cfg.items_list,
-            cfg.metadata_path,
-            cfg.concat_meta,
-            cfg.input["fmt"],
-        )
-
-        # set input to the concatenated file we have just written to the output dir
-        cfg.metadata_path = [cfg.concat_meta]
-
-    # concatenate audio
-    cfg.concat_file = cfg.tmp_dirs[0].joinpath(
-        f"{cfg.input_path.name}_concatenated.wav"
-    )
-
-    # determine number of channels for pcm and raw files
-    tmp_audio = audio.fromtype(cfg_pre2.in_fmt)
-    tmp_num_chans = tmp_audio.num_channels
-
-    cfg.splits = concat(
-        cfg.items_list,
-        cfg.concat_file,
-        in_fs=cfg.input.get("fs", 48000),
-        num_channels=tmp_num_chans,
-    )
-
-    # save item naming for splits naming in the end
-    cfg.split_names = []
-    for name in cfg.items_list:
-        cfg.split_names.append(Path(name).stem.split(".")[0])
-    # set input to the concatenated file we have just written to the output dir
-    cfg.items_list = [cfg.concat_file]
-
-    # write out splits
-    with open(cfg.concat_file.with_suffix(".splits.log"), "w") as f:
-        print(", ".join([str(s) for s in cfg.splits]), file=f)
-        print(", ".join([str(sn) for sn in cfg.split_names]), file=f)
-        print(", ".join([str(i.stem) for i in cfg.items_list]), file=f)
-
-    logger.info(f"Splits written to file {cfg.concat_file.with_suffix('.splits.log')}")
-
-
-def concat_teardown(cfg: TestConfig, logger: logging.Logger):
-    if not cfg.splits:
-        raise ValueError("Splitting not possible without split marker")
-
-    output_format = cfg.postprocessing["fmt"]
-
-    out_files = []
-    out_meta = []
-
-    logger.info(f"Splitting output file in directory {cfg.output_path}")
-
-    for odir in cfg.out_dirs:
-        path_input = odir / cfg.items_list[0].name
-        out_paths = split(
-            path_input,
-            odir,
-            cfg.split_names,
-            cfg.splits,
-            in_fs=cfg.postprocessing["fs"],
-        )
-
-        logger.debug(
-            f"Resulting split files condition {odir.name}: {', '.join([str(op) for op in out_paths])}"
-        )
-        out_files.append(out_paths)
-
-    # split ISM metadata
-    if output_format.startswith("ISM"):
-        for odir in cfg.out_dirs:
-            path_input = odir / cfg.items_list[0].name
-            out_meta_paths = split_meta_in_file(
-                path_input,
-                odir,
-                cfg.split_names,
-                cfg.splits,
-                output_format,
-                meta_files=cfg.metadata_path[0],
-            )
-            out_meta.append(out_meta_paths)
-
-    # remove concatenated file
-    if cfg.delete_tmp:
-        cfg.concat_file.unlink(missing_ok=True)
-
-    return out_files, out_meta
-
-
-def preprocess(cfg, logger):
-    preprocessing = cfg.proc_chains[0]
-    chain = preprocessing["processes"]
-
-    logger.info(f"  Generating condition: {preprocessing['name']}")
-
-    # run preprocessing
-    apply_func_parallel(
-        process_item,
-        zip(
-            cfg.items_list,
-            repeat(cfg.tmp_dirs[0]),
-            repeat(cfg.out_dirs[0]),
-            repeat(chain),
-            repeat(logger),
-            cfg.metadata_path,
-        ),
-        None,
-        "mp" if cfg.multiprocessing else None,
-    )
-
-    # update the configuration to use preprocessing outputs as new inputs
-    cfg.items_list = list_audio(
-        cfg.out_dirs[0], absolute=False, select_list=getattr(cfg, "input_select", None)
-    )
-
-    # Re-ordering items based on concatenation order
-    if (
-        hasattr(cfg, "preprocessing_2")
-        and cfg.preprocessing_2.get("concatenate_input", False)
-        and cfg.preprocessing_2.get("concatenation_order", None) is not None
-    ):
-        cfg.items_list = reorder_items_list(cfg.items_list, cfg.concatenation_order)
-
-    if cfg.metadata_path[0] is not None:
-        for item_idx in range(len(cfg.metadata_path)):
-            for obj_idx in range(len(cfg.metadata_path[item_idx])):
-                if cfg.metadata_path[item_idx][obj_idx]:
-                    cfg.metadata_path[item_idx][obj_idx] = cfg.out_dirs[0] / Path(
-                        f"{cfg.items_list[item_idx].stem}.wav.{obj_idx}.csv"
-                    )
-    # remove already applied processing stage
-    cfg.proc_chains = cfg.proc_chains[1:]
-    cfg.tmp_dirs = cfg.tmp_dirs[1:]
-    cfg.out_dirs = cfg.out_dirs[1:]
-
-
-def preprocess_2(cfg, logger):
-    preprocessing_2 = cfg.proc_chains[0]
-    chain = preprocessing_2["processes"]
-
-    logger.info(f"  Generating condition: {preprocessing_2['name']}")
-
-    # concatenate items if required
-    if chain[0].concatenate_input:
-        concat_setup(cfg, chain, logger)
-
-    # run preprocessing 2
-    apply_func_parallel(
-        process_item,
-        zip(
-            cfg.items_list,
-            repeat(cfg.tmp_dirs[0]),
-            repeat(cfg.out_dirs[0]),
-            repeat(chain),
-            repeat(logger),
-            cfg.metadata_path,
-        ),
-        None,
-        "mp" if cfg.multiprocessing else None,
-    )
-
-    # update the configuration to use preprocessing 2 outputs as new inputs
-    cfg.items_list = list_audio(
-        cfg.out_dirs[0], absolute=False, select_list=getattr(cfg, "input_select", None)
-    )
-
-    # Re-ordering items based on concatenation order
-    if (
-        hasattr(cfg, "preprocessing_2")
-        and cfg.preprocessing_2.get("concatenate_input", False)
-        and cfg.preprocessing_2.get("concatenation_order", None) is not None
-    ):
-        cfg.items_list = reorder_items_list(cfg.items_list, cfg.concatenation_order)
-
-    if cfg.metadata_path[0] is not None:
-        for item_idx in range(len(cfg.metadata_path)):
-            for obj_idx in range(len(cfg.metadata_path[item_idx])):
-                if cfg.metadata_path[item_idx][obj_idx]:
-                    cfg.metadata_path[item_idx][obj_idx] = cfg.out_dirs[0] / Path(
-                        f"{cfg.items_list[item_idx].stem}.wav.{obj_idx}.csv"
-                    )
-    # remove already applied processing stage
-    cfg.proc_chains = cfg.proc_chains[1:]
-    cfg.tmp_dirs = cfg.tmp_dirs[1:]
-    cfg.out_dirs = cfg.out_dirs[1:]
-
-    return
-
-
-def reverse_process_2(cfg, logger):
-    # remove preamble
-    if cfg.pre2.preamble:
-        remove_preamble(cfg)
-
-    # reverse concatenation
-    if cfg.pre2.concatenate_input:
-        # write out the splits, optionally remove file
-        out_paths_splits, out_meta_splits = concat_teardown(cfg, logger)
-    else:
-        # if no concatenation read files from folder
-        out_paths_splits = []
-        for out_dir in cfg.out_dirs:
-            list_audio_dir = list_audio(out_dir, absolute=True)
-            out_paths_splits.append(list_audio_dir)
-        if cfg.postprocessing["fmt"].startswith("ISM"):
-            out_meta_splits = []
-            for i, condition in enumerate(out_paths_splits):
-                meta_condition = metadata_search(
-                    cfg.out_dirs[i],
-                    condition,
-                    num_objects=int(cfg.postprocessing["fmt"][-1]),
-                )
-                out_meta_splits.append(meta_condition)
-        else:
-            out_meta_splits = None
-
-    # scale individual files
-    if cfg.postprocessing.get("loudness", False):
-        scale_files(
-            out_paths_splits,
-            cfg.postprocessing["fmt"],
-            cfg.postprocessing["loudness"],
-            cfg.postprocessing["fs"],
-            out_meta_splits,
-        )
-    return
-
-
-def process_item(
-    in_file: Union[Path, str],
-    tmp_dir: Union[Path, str],
-    out_dir: Union[Path, str],
-    chain: Iterable,
-    logger: logging.Logger,
-    in_meta,
-) -> None:
-    tmp_file = tmp_dir.joinpath(in_file.name)
-    tmp_file_meta = []
-    if in_meta:
-        for im in in_meta:
-            tmp_file_meta.append(tmp_dir.joinpath(Path(im).name))
-
-    # assemble a list of files to be used during the processing chain
-    out_dir_wav = False
-    processing_paths = [in_file]
-    processing_paths_meta = [in_meta]
-    for p in chain:
-        if Path(in_file.name).suffix == ".txt" and p.out_fmt is not None:
-            processing_paths.append(tmp_file.with_suffix(f".{p.name}.wav"))
-            out_dir_wav = True
-        else:
-            processing_paths.append(tmp_file.with_suffix(f".{p.name}{tmp_file.suffix}"))
-            try:
-                out_format = p.out_fmt
-            except AttributeError:
-                # EVS has no attribute out_fmt
-                out_format = p.in_fmt
-            try:
-                bool_ism = out_format.startswith("ISM")
-            except Exception:
-                bool_ism = out_format.name.startswith("ISM")
-
-            if bool_ism:
-                list_meta_step = []
-                for idx, tfm in enumerate(tmp_file_meta):
-                    list_meta_step.append(
-                        tfm.parent
-                        / f"{in_file.stem.split('.')[0]}.{p.name}.wav.{idx}.csv"
-                    )
-                processing_paths_meta.append(list_meta_step)
-            else:
-                processing_paths_meta.append(None)
-            # TODO: support txt file writing for META pass-through
-
-    if out_dir_wav:
-        out_file = out_dir.joinpath(in_file.name).with_suffix(".wav")
-    else:
-        out_file = out_dir.joinpath(in_file.name)
-
-    out_meta = []
-    if in_meta:
-        for im in range(len(in_meta)):
-            out_meta.append(out_dir.joinpath(f"{Path(out_file).stem}.wav.{im}.csv"))
-
-    # execute each process sequentially, feed output into input of next process
-    for p, (input, output), input_meta in zip(
-        chain, pairwise(processing_paths), processing_paths_meta[:-1]
-    ):
-        # setup logging for the output
-        item_logger = logger.getChild(output.stem)
-        fh = logging.FileHandler(output.with_suffix(".log"), mode="w")
-        fh.setLevel(logging.DEBUG)
-        fh.setFormatter(logging.Formatter(LOGGER_FORMAT, datefmt=LOGGER_DATEFMT))
-        item_logger.addHandler(fh)
-
-        p.process(input, output, input_meta, item_logger)
-
-    # copy output and metadata from final process to output file
-    copyfile(processing_paths[-1], out_file)
-    if processing_paths_meta[-1]:
-        for idx, ppm in enumerate(processing_paths_meta[-1]):
-            copyfile(ppm, out_meta[idx])
-
-
-def remove_preamble(cfg):
-    # get number of channels from output format
-    num_channels = audio.fromtype(cfg.postprocessing["fmt"]).num_channels
-    for odir in cfg.out_dirs:
-        for item in cfg.items_list:
-            path_input = odir / item.name
-
-            # remove preamble for ISM metadata
-            if cfg.postprocessing["fmt"].startswith("ISM"):
-                # search for metadata
-                meta_item = metadata_search(
-                    odir, [Path(item.name)], num_objects=num_channels
-                )
-                metadata_array = []
-                for meta_i in meta_item:
-                    metadata_array.append(np.genfromtxt(meta_i, delimiter=","))
-
-                # remove preamble
-                metadata_array = add_remove_preamble(
-                    metadata_array, cfg.pre2.preamble, add=False
-                )
-
-                # write csv files
-                write_ISM_metadata_in_file(
-                    metadata_array, [path_input], automatic_naming=True
-                )
-
-            # read file
-            x, fs = read(
-                path_input, nchannels=num_channels, fs=cfg.postprocessing["fs"]
-            )
-
-            # remove preamble
-            x = trim(x, fs, (cfg.pre2.preamble, 0))
-
-            # write file
-            write(path_input, x, fs)
-
-    return
diff --git a/item_generation_scripts/utils.py b/item_generation_scripts/utils.py
deleted file mode 100644
index 1e21b0db..00000000
--- a/item_generation_scripts/utils.py
+++ /dev/null
@@ -1,297 +0,0 @@
-#!/usr/bin/env python3
-
-#
-#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository. All Rights Reserved.
-#
-#  This software is protected by copyright law and by international treaties.
-#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
-#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
-#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
-#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
-#  contributors to this repository retain full ownership rights in their respective contributions in
-#  the software. This notice grants no license of any kind, including but not limited to patent
-#  license, nor is any license granted by implication, estoppel or otherwise.
-#
-#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
-#  contributions.
-#
-#  This software is provided "AS IS", without any express or implied warranties. The software is in the
-#  development stage. It is intended exclusively for experts who have experience with such software and
-#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
-#  and fitness for a particular purpose are hereby disclaimed and excluded.
-#
-#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
-#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
-#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
-#  the United Nations Convention on Contracts on the International Sales of Goods.
-#
-
-import logging
-import shutil
-import subprocess as sp
-import sys
-from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
-from itertools import repeat, tee
-from os import devnull
-from pathlib import Path
-from shutil import which
-from typing import Callable, Iterable, Optional, Union
-
-import yaml
-
-ALLOWED_INPUT_EXT = (".wav", ".pcm", ".txt", ".raw")
-BIN_DIR = Path(__file__).parent.joinpath("bin")
-
-
-"""
-Directory/path handling
-"""
-
-
-def create_dir(p: str) -> None:
-    p = Path(p)
-    p.mkdir(exist_ok=True, parents=True)
-
-
-def delete_dir(p: str) -> None:
-    p = Path(p)
-    if p.exists() and p.is_dir():
-        shutil.rmtree(p)
-
-
-class DirManager:
-    """
-    Context manager that creates directories if not already present and
-    automatically cleans up (i.e. deletes) all specified paths
-    """
-
-    def __init__(
-        self, create_paths: Union[str, list], delete_paths: Union[str, list] = list()
-    ):
-        self.create_paths = (
-            create_paths if isinstance(create_paths, list) else [create_paths]
-        )
-        self.delete_paths = (
-            delete_paths if isinstance(create_paths, list) else [delete_paths]
-        )
-
-    def __enter__(self):
-        for path in self.create_paths:
-            create_dir(path)
-
-    def __exit__(self, exc_type, exc_value, exc_traceback):
-        for path in self.delete_paths:
-            if path in self.create_paths:
-                delete_dir(path)
-            else:
-                print(
-                    f"Tmp dir '{path}' was not present in creation paths - skipping deletion."
-                )
-
-
-def list_audio(path: str, absolute: bool = False, select_list: list = None) -> list:
-    """
-    Return list with all files with ALLOWED_INPUT_EXT found under the given path.
-
-    If path is a directory, all files in it are included, if it is a file, just the file
-    will be in the list. If a select list is provided, files are filtered accordingly.
-    """
-    path = Path(path)
-    audio_list = []
-
-    if path.exists():
-        if path.is_dir():
-            if absolute:
-                [audio_list.extend(list(path.glob(ext))) for ext in ALLOWED_INPUT_EXT]
-                audio_list = [
-                    path.joinpath(f)
-                    for f in path.iterdir()
-                    if f.suffix in ALLOWED_INPUT_EXT
-                ]
-            else:
-                audio_list = [
-                    f for f in path.iterdir() if f.suffix in ALLOWED_INPUT_EXT
-                ]
-        else:
-            if not absolute:
-                path = path.name
-            ext = path.suffix
-            if ext in ALLOWED_INPUT_EXT:
-                audio_list.append(path)
-
-    # filter according to select list
-    if select_list:
-        select_set = set([Path(i).stem for i in select_list])
-        audio_list = [
-            f for f in audio_list if any([pattern in f.stem for pattern in select_set])
-        ]
-
-    return audio_list
-
-
-def get_nickname(p: Path) -> str:
-    return f"{p.parent.name}/{p.name}"
-
-
-"""
-System interaction
-"""
-
-
-def find_binary(
-    binary: str,
-    raise_error: Optional[bool] = True,
-    logger: Optional[logging.Logger] = None,
-    binary_path: Optional[str] = None,
-) -> Union[Path, None]:
-    """Attempt to find and return the path to the given binary"""
-    # prioritise binaries placed in the directory over $PATH
-    if binary_path is not None:
-        bin = which(binary, path=binary_path)
-    else:
-        bin = which(binary, path=BIN_DIR)
-    if not bin:
-        bin = which(binary)
-
-    if not bin and raise_error:
-        raise FileNotFoundError(
-            f"Binary {binary} was neither found in {binary_path.absolute()} nor in {BIN_DIR.absolute()} or in $PATH!"
-        )
-    elif not bin:
-        if logger:
-            logger.debug(f"Couldn't find binary {binary}")
-        return None
-    else:
-        if logger:
-            logger.debug(f"Found binary {bin}")
-        return Path(bin)
-
-
-def get_devnull():
-    return devnull
-
-
-def get_gitsha():
-    try:
-        git_sha = sp.check_output(
-            ["git", "rev-parse", "HEAD"], stderr=sp.STDOUT, text=True
-        ).strip()
-    except sp.CalledProcessError:
-        git_sha = "git repository not found!"
-
-    return git_sha
-
-
-def run(cmd, cwd=None, check=True, logger: Optional[logging.Logger] = None):
-    if logger:
-        logger.debug(f"Running command {' '.join([str(c) for c in cmd])}; cwd = {cwd}")
-
-    try:
-        result = sp.run(cmd, check=check, capture_output=True, text=True, cwd=cwd)
-    except sp.CalledProcessError as e:
-        raise SystemError(
-            f"Command returned non-zero exit status ({e.returncode}): {' '.join([str(c) for c in e.cmd])}\n{e.stderr}\n{e.stdout}"
-        )
-
-    if logger:
-        logger.debug(result.stderr.strip())
-        logger.debug(result.stdout.strip())
-
-    return result
-
-
-"""
-Utility functions
-"""
-
-
-def apply_func_parallel(
-    func: Callable,
-    args: Iterable,
-    kwargs: Optional[Iterable] = None,
-    type: Optional[str] = None,
-    show_progress: Optional[bool] = True,
-) -> list:
-    """
-    Apply a function iteratively to a list of arguments and keyword arguments
-    Optionally with multiprocessing or multithreading
-
-    Parameters
-    ----------
-    func : Callable
-        Function to use
-    args : Iterable
-        List of positional arguments
-    kwargs: Optional[Iterable]
-        List of keyword arguments
-    type: Optional[str]
-        Type of parallel processing to use, "mp" for multiprocessing or "mt" for threading, default = None (sequential processing)
-    show_progress: Optional[bool]
-        Flag whether to show progress bar
-
-    Returns
-    -------
-    List of function results
-    """
-
-    # if no kwargs are specified, repeat the empty dict to avoid issues with zipping and unpacking
-    if not kwargs:
-        kwargs = repeat({})
-
-    args_zip = zip(args, kwargs)
-
-    if type == "mp":
-        executor = ProcessPoolExecutor
-    elif type == "mt":
-        executor = ThreadPoolExecutor
-    else:
-        return [
-            func(*a, **k)
-            for a, k in (progressbar(list(args_zip)) if show_progress else args_zip)
-        ]
-
-    with executor() as e:
-        results = [e.submit(func, *a, **k) for a, k in args_zip]
-        return [
-            r.result() for r in (progressbar(results) if show_progress else results)
-        ]
-
-
-def pairwise(iter):
-    """itertools.pairwise() for python < 3.10"""
-    a, b = tee(iter)
-    next(b, None)
-    return zip(a, b)
-
-
-def progressbar(iter: Iterable, width=80):
-    """simple unicode progressbar"""
-    count = len(iter)
-
-    def update(progress):
-        fill = int(width * progress / count)
-        print(
-            f"{int(progress/count*100):3d}%{u'│'}{u'█'*fill}{(u'░'*(width-fill))}{u'│'}{progress}/{count}",
-            end="\r",
-            file=sys.stdout,
-            flush=True,
-        )
-
-    update(0)
-    for i, item in enumerate(iter):
-        yield item
-        update(i + 1)
-    print("\n", flush=True, file=sys.stdout)
-
-
-def get_binary_paths(yaml_file_with_binary_paths):
-    with open(yaml_file_with_binary_paths, "r") as f:
-        data = yaml.safe_load(f)
-    if data is None:
-        return {}
-    else:
-        return {key: Path(value) for key, value in data.items()}
diff --git a/ivas_processing_scripts/audiotools/audiofile.py b/ivas_processing_scripts/audiotools/audiofile.py
index 954c91f8..d5687a89 100755
--- a/ivas_processing_scripts/audiotools/audiofile.py
+++ b/ivas_processing_scripts/audiotools/audiofile.py
@@ -110,6 +110,7 @@ def write(
     filename: Union[str, Path],
     x: np.ndarray,
     fs: Optional[int] = 48000,
+    dtype: Optional[str] = "int16",
 ) -> None:
     """
     Write audio file (.pcm, .wav or .raw)
@@ -122,6 +123,8 @@ def write(
         Numpy 2D array of dimension: number of channels x number of samples
     fs: Optional[int]
         Sampling rate, required for .pcm or .raw input file, default = 48000 (Hz)
+    dtype: Optional[str]
+        Data type format required for .pcm or .raw input file, default = 'int16'
 
     Returns
     -------
@@ -141,7 +144,7 @@ def write(
         x = x.astype(np.int16)
         wav.write(filename, fs, x)
     elif file_extension == ".pcm" or file_extension == ".raw":
-        x = x.astype("int16").reshape(-1, 1)
+        x = x.astype(dtype).reshape(-1, 1)
         x.tofile(filename)
     else:
         raise ValueError("Wrong input format. Use wav, pcm or raw")
diff --git a/item_generation_scripts/audiotools/wrappers/reverb.py b/ivas_processing_scripts/audiotools/wrappers/reverb.py
similarity index 90%
rename from item_generation_scripts/audiotools/wrappers/reverb.py
rename to ivas_processing_scripts/audiotools/wrappers/reverb.py
index 1c4491bd..46f4ee33 100644
--- a/item_generation_scripts/audiotools/wrappers/reverb.py
+++ b/ivas_processing_scripts/audiotools/wrappers/reverb.py
@@ -31,18 +31,19 @@
 #
 
 import os.path
-import numpy as np
-from scipy.fft import fft
 from copy import copy
 from pathlib import Path
 from tempfile import TemporaryDirectory
 from typing import Optional, Union
 
-from item_generation_scripts.constants import DEFAULT_CONFIG_BINARIES
-from item_generation_scripts.utils import find_binary, run
-from item_generation_scripts.audiotools.audio import Audio
-from item_generation_scripts.audiotools.audiofile import read, write
-from item_generation_scripts.audiotools.wrappers.filter import resample_itu
+import numpy as np
+from scipy.fft import fft
+
+from ivas_processing_scripts.audiotools.audio import Audio
+from ivas_processing_scripts.audiotools.audiofile import read, write
+from ivas_processing_scripts.audiotools.wrappers.filter import resample_itu
+from ivas_processing_scripts.constants import DEFAULT_CONFIG_BINARIES
+from ivas_processing_scripts.utils import find_binary, run
 
 
 def reverb(
@@ -62,13 +63,13 @@ def reverb(
         Impulse response
     align: float
          multiplicative factor to apply to the reverberated sound in order to align its energy level with a second filePath to the output file
-         
+
     Returns
     -------
     output: Audio
         Convolved audio signal with IR
     """
-    
+
     # find binary
     if "reverb" in DEFAULT_CONFIG_BINARIES["binary_paths"]:
         binary = find_binary(
@@ -77,10 +78,10 @@ def reverb(
         )
     else:
         binary = find_binary("reverb")
- 
+
     with TemporaryDirectory() as tmp_dir:
         tmp_dir = Path(tmp_dir)
-        
+
         # resample input audio signal to that of the IR
         old_fs = None
         tmp_input = copy(input)
@@ -92,12 +93,12 @@ def reverb(
         # write input audio signal to temporary file in .pcm format
         tmp_input_file = tmp_dir.joinpath(f"tmp_reverbIn.pcm")
         write(tmp_input_file, tmp_input.audio, tmp_input.fs)
-        
+
         # down-scale IR to prevent saturation
         # max_value = np.max(np.abs(IR.audio))
         # if max_value > 1.0:
-            # IR.audio = IR.audio / max_value
-        
+        # IR.audio = IR.audio / max_value
+
         # write IR to temporary file in .pcm format
         # note: the reverb tool expects 32b float format
         tmp_IR_file = tmp_dir.joinpath(f"tmp_IR.pcm")
@@ -111,7 +112,7 @@ def reverb(
         # append multiplicative factor, if provided
         if align:
             cmd.extend(["-align", str(align)])
-        
+
         # append temporary filenames
         tmp_output_file = tmp_dir.joinpath(f"tmp_reverbOut.pcm")
         cmd.extend([tmp_input_file, tmp_IR_file, tmp_output_file])
@@ -119,17 +120,18 @@ def reverb(
         # run the 'reverb' command
         run(cmd)
 
-        # read the reverberated output file 
+        # read the reverberated output file
         output = copy(tmp_input)
         output.audio, _ = read(tmp_output_file, nchannels=1, fs=tmp_input.fs)
-        
+
         # reverse the resampling
         if old_fs:
             output.audio = resample_itu(output, old_fs)
             output.fs = old_fs
-            
+
     return output
 
+
 def reverb_stereo(
     input: Audio,
     stereo_IR: Audio,
@@ -146,13 +148,13 @@ def reverb_stereo(
         Impulse response
     align: float
          multiplicative factor to apply to the reverberated sound in order to align its energy level with the second file
-         
+
     Returns
     -------
     output: Audio
         Convolved audio signal with stereo IR
     """
-    
+
     # convert to float32
     stereo_IR.audio = np.float32(stereo_IR.audio)
 
@@ -160,26 +162,26 @@ def reverb_stereo(
     IR_left = copy(stereo_IR)
     IR_left.name = "MONO"
     IR_left.num_channels = 1
-    IR_left.audio = np.reshape(stereo_IR.audio[:,0], (-1, 1))
-    
+    IR_left.audio = np.reshape(stereo_IR.audio[:, 0], (-1, 1))
+
     IR_right = copy(stereo_IR)
     IR_right.name = "MONO"
     IR_right.num_channels = 1
-    IR_right.audio = np.reshape(stereo_IR.audio[:,1], (-1, 1))
+    IR_right.audio = np.reshape(stereo_IR.audio[:, 1], (-1, 1))
 
     # calculate the scaling (multiplicative) factor such that the maximum gain of the IR filter across all frequencies is 0dB
     if align is None:
         H = fft(stereo_IR.audio, axis=0)
         align = 1.0 / np.max(np.abs(H))
-    
+
     # convolve mono input with left and right IR
     y_left = reverb(input, IR_left, align=align)
     y_right = reverb(input, IR_right, align=align)
-    
+
     # combine into stereo output
     y = copy(input)
     y.name = "STEREO"
     y.num_channels = 2
     y.audio = np.column_stack([y_left.audio, y_right.audio])
-    
+
     return y
diff --git a/item_generation_scripts/__init__.py b/ivas_processing_scripts/generation/__init__.py
old mode 100644
new mode 100755
similarity index 90%
rename from item_generation_scripts/__init__.py
rename to ivas_processing_scripts/generation/__init__.py
index 8b3d8bae..2c7c9bf3
--- a/item_generation_scripts/__init__.py
+++ b/ivas_processing_scripts/generation/__init__.py
@@ -35,13 +35,13 @@ import os
 
 import yaml
 
-from item_generation_scripts.constants import (
+from ivas_processing_scripts.constants import (
     LOGGER_DATEFMT,
     LOGGER_FORMAT,
     LOGGER_SUFFIX,
 )
-from item_generation_scripts.processing import config, process_ism_items, process_stereo_items
-from item_generation_scripts.utils import create_dir
+from ivas_processing_scripts.generation import config, process_ism_items, process_stereo_items
+from ivas_processing_scripts.utils import create_dir
 
 
 def logging_init(args, cfg):
@@ -94,7 +94,9 @@ def main(args):
             fs=cfg.fs,
             preamble=cfg.preamble,
             postamble=cfg.postamble,
-            add_low_level_random_noise=cfg.add_low_level_random_noise,
+            add_low_level_random_noise=cfg.get("add_low_level_random_noise", False), 
+            # TODO@VM dict.get() can provide a default value if the key is not found
+            # please check if this is a viable solution - I kept getting "AttributeError: 'TestConfig' object has no attribute 'add_low_level_random_noise'"
         )
     elif cfg.format == "STEREO":
         # generate STEREO items according to scene description
@@ -111,7 +113,7 @@ def main(args):
             preamble=cfg.preamble,
             postamble=cfg.postamble,
         )
-        
+
     # copy configuration to output directory
     with open(cfg.output_path.joinpath(f"{cfg.format}.yml"), "w") as f:
         yaml.safe_dump(cfg._yaml_dump, f)
diff --git a/item_generation_scripts/__main__.py b/ivas_processing_scripts/generation/__main__.py
old mode 100644
new mode 100755
similarity index 98%
rename from item_generation_scripts/__main__.py
rename to ivas_processing_scripts/generation/__main__.py
index b49109d3..9ba00fd5
--- a/item_generation_scripts/__main__.py
+++ b/ivas_processing_scripts/generation/__main__.py
@@ -32,7 +32,7 @@
 
 import argparse
 
-from item_generation_scripts import main
+from ivas_processing_scripts.generation import main
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(
diff --git a/item_generation_scripts/processing/config.py b/ivas_processing_scripts/generation/config.py
similarity index 97%
rename from item_generation_scripts/processing/config.py
rename to ivas_processing_scripts/generation/config.py
index 3e9aaaa5..ca9dbcc2 100644
--- a/item_generation_scripts/processing/config.py
+++ b/ivas_processing_scripts/generation/config.py
@@ -35,7 +35,7 @@ from pathlib import Path
 
 import yaml
 
-from item_generation_scripts.constants import DEFAULT_CONFIG, REQUIRED_KEYS
+from ivas_processing_scripts.generation.constants import DEFAULT_CONFIG, REQUIRED_KEYS
 
 
 def merge_dicts(base: dict, other: dict) -> None:
@@ -122,4 +122,4 @@ class TestConfig:
 
         # Report missing keys to the user
         if MISSING_KEYS:
-            raise KeyError(f"The following key(s) must be specified : {MISSING_KEYS}")
+            raise KeyError(f"The following key(s) must be specified : {MISSING_KEYS}")
\ No newline at end of file
diff --git a/item_generation_scripts/constants.py b/ivas_processing_scripts/generation/constants.py
similarity index 95%
rename from item_generation_scripts/constants.py
rename to ivas_processing_scripts/generation/constants.py
index 6b0d0681..34001207 100644
--- a/item_generation_scripts/constants.py
+++ b/ivas_processing_scripts/generation/constants.py
@@ -33,7 +33,7 @@
 from datetime import datetime
 from pathlib import Path
 
-from item_generation_scripts.utils import get_binary_paths
+from ivas_processing_scripts.utils import get_binary_paths
 
 LOGGER_SUFFIX = ".log"
 LOGGER_FORMAT = (
@@ -55,7 +55,7 @@ DEFAULT_CONFIG = {
 
 DEFAULT_CONFIG_BINARIES = {
     "binary_paths": get_binary_paths(
-        Path(__file__).parent.joinpath("binary_paths.yml")
+        Path(__file__).parent.parent.joinpath("binary_paths.yml")
     ),
 }
 
@@ -64,4 +64,4 @@ REQUIRED_KEYS = [
     "input_path",
     "output_path",
     "scenes",
-]
+]
\ No newline at end of file
diff --git a/item_generation_scripts/processing/process_ism_items.py b/ivas_processing_scripts/generation/process_ism_items.py
similarity index 86%
rename from item_generation_scripts/processing/process_ism_items.py
rename to ivas_processing_scripts/generation/process_ism_items.py
index fe62f048..810f770b 100644
--- a/item_generation_scripts/processing/process_ism_items.py
+++ b/ivas_processing_scripts/generation/process_ism_items.py
@@ -33,16 +33,18 @@
 import csv
 import logging
 import os
+from math import floor
 from pathlib import Path
 from typing import Optional
+
 import numpy as np
-from math import floor
 
-from item_generation_scripts.audiotools import audio, audiofile
-from item_generation_scripts.audiotools.wrappers.bs1770 import get_loudness
+from ivas_processing_scripts.audiotools import audio, audiofile
+from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness
 
 SEED_RANDOM_NOISE = 0
 
+
 # function for converting nd numpy array to strings with 2 decimal digits
 def csv_formatdata(data):
     for row in data:
@@ -78,34 +80,33 @@ def generate_ism_items(
         else:
             y = audio.ChannelBasedAudio("MONO")
         y_meta = None
-        
+
         # read the overlap length
-        if 'overlap' in scene.keys():
+        if "overlap" in scene.keys():
             source_overlap = float(scene["overlap"])
         else:
             source_overlap = 0.0
-        
+
         # repeat for all source files
         for i in range(N_sources):
-        
             # parse parameters from the scene description
             source_file = np.atleast_1d(scene["source"])[i]
             source_azi = np.atleast_1d(scene["azimuth"])[i]
             source_ele = np.atleast_1d(scene["elevation"])[i]
-            
+
             logger.info(
                 f"Encoding {source_file} at position(s) {source_azi},{source_ele}"
             )
 
             # read source file
             x = audio.fromfile("MONO", os.path.join(input_path, source_file), fs=fs)
-            
+
             ############### DEBUG ############33
             # x.audio = x.audio[:-10]
 
             # get the number of frames (multiple of 20ms)
             N_frames = int(len(x.audio) / x.fs * 50)
-            
+
             # trim the source signal to align to 20ms boundary
             # N_trim = int(N_frames * x.fs / 50)
             # x.audio = x.audio[:N_trim]
@@ -180,18 +181,18 @@ def generate_ism_items(
 
             # arrange all metadata fields column-wise into a matrix
             x_meta = np.column_stack((azi, ele, dist, spread, gain))
-            
+
             # shift the second (and all other) source files (positive shift creates overlap, negative shift creates a gap)
             if i > 0 and source_overlap != 0.0:
                 # get the length of the first source file
-                N_delay = len(y.audio[:,0])
-                
+                N_delay = len(y.audio[:, 0])
+
                 # add the shift
                 N_delay += int(source_overlap * x.fs)
-            
+
                 # ensure delay is a multiple of 20ms
                 # N_delay = int(floor(source_shift * 50) / 50 * x.fs)
-            
+
                 # insert all-zero preamble
                 pre = np.zeros((N_delay, x.audio.shape[1]))
                 x.audio = np.concatenate([pre, x.audio])
@@ -199,14 +200,14 @@ def generate_ism_items(
                 # insert neutral position as a pre-amble
                 pre = np.tile(
                     [0.00, 0.00, 1.00, 0.00, 1.00], (N_delay, 1)
-                )   # !!!! TBD - check if we should insert  netrual position or the first position of the metadata
+                )  # !!!! TBD - check if we should insert  netrual position or the first position of the metadata
                 x_meta = np.concatenate([pre, x_meta])
-                
-            # pad with zeros to ensure that the signal length is a multiple of 20ms  
+
+            # pad with zeros to ensure that the signal length is a multiple of 20ms
             N_frame = x.fs / 50
             if len(x.audio) % N_frame != 0:
                 N_pad = int(N_frame - len(x.audio) % N_frame)
-                
+
                 # insert all-zero preamble
                 pre = np.zeros((N_pad, x.audio.shape[1]))
                 x.audio = np.concatenate([pre, x.audio])
@@ -214,7 +215,7 @@ def generate_ism_items(
                 # insert neutral position as a pre-amble
                 pre = np.tile(
                     [0.00, 0.00, 1.00, 0.00, 1.00], (N_pad, 1)
-                )   # !!!! TBD - check if we should insert  netrual position or the first position of the metadata
+                )  # !!!! TBD - check if we should insert  netrual position or the first position of the metadata
                 x_meta = np.concatenate([pre, x_meta])
 
             # add source signal to the array of all source signals
@@ -224,14 +225,28 @@ def generate_ism_items(
             else:
                 # pad with zeros to have the same length of all source signals
                 if x.audio.shape[0] > y.audio.shape[0]:
-                    y.audio = np.vstack((y.audio, np.zeros((x.audio.shape[0] - y.audio.shape[0], y.audio.shape[1]))))
+                    y.audio = np.vstack(
+                        (
+                            y.audio,
+                            np.zeros(
+                                (x.audio.shape[0] - y.audio.shape[0], y.audio.shape[1])
+                            ),
+                        )
+                    )
                 elif y.audio.shape[0] > x.audio.shape[0]:
-                    x.audio = np.vstack((x.audio, np.zeros((y.audio.shape[0] - x.audio.shape[0], x.audio.shape[1]))))
+                    x.audio = np.vstack(
+                        (
+                            x.audio,
+                            np.zeros(
+                                (y.audio.shape[0] - x.audio.shape[0], x.audio.shape[1])
+                            ),
+                        )
+                    )
                 y.audio = np.hstack((y.audio, x.audio))
 
             # add metadata to the array of all metadata
             # make sure x_meta is a 3d array
-            x_meta = x_meta[np.newaxis, :]  
+            x_meta = x_meta[np.newaxis, :]
             if y_meta is None:
                 y_meta = x_meta
             else:
@@ -242,25 +257,19 @@ def generate_ism_items(
                 if x_meta.shape[1] > y_meta.shape[1]:
                     N_delta = x_meta.shape[1] - y_meta.shape[1]
                     # reshape to 2d array
-                    y_meta = y_meta.reshape(y_meta.shape[1], -1)  
+                    y_meta = y_meta.reshape(y_meta.shape[1], -1)
                     # repeat last row N_delta times and append to the array
-                    y_meta = np.vstack(
-                        (y_meta, np.tile(y_meta[-1, :], (N_delta, 1)))
-                    )  
+                    y_meta = np.vstack((y_meta, np.tile(y_meta[-1, :], (N_delta, 1))))
                     # reshape back to 3d array
-                    y_meta = y_meta.reshape(
-                        N_srcs, -1, N_meta_features
-                    )  
+                    y_meta = y_meta.reshape(N_srcs, -1, N_meta_features)
                 elif y_meta.shape[1] > x_meta.shape[1]:
                     N_delta = y_meta.shape[1] - x_meta.shape[1]
                     # reshape to 2d array
-                    x_meta = x_meta.reshape(x_meta.shape[1], -1)  
+                    x_meta = x_meta.reshape(x_meta.shape[1], -1)
                     # repeat last row N_delta times and append to the array
-                    x_meta = np.vstack(
-                        (x_meta, np.tile(x_meta[-1, :], (N_delta, 1)))
-                    )  
+                    x_meta = np.vstack((x_meta, np.tile(x_meta[-1, :], (N_delta, 1))))
                     # reshape back to 3d array
-                    x_meta = np.expand_dims(x_meta, axis=0)  
+                    x_meta = np.expand_dims(x_meta, axis=0)
 
                 y_meta = np.concatenate([y_meta, x_meta])
 
@@ -268,7 +277,7 @@ def generate_ism_items(
         if preamble != 0.0:
             # ensure that pre-mable is a multiple of 20ms
             N_pre = int(floor(preamble * 50) / 50 * y.fs)
-            
+
             # insert all-zero preamble to all sources
             pre = np.zeros((N_pre, y.audio.shape[1]))
             y.audio = np.concatenate([pre, y.audio])
@@ -276,13 +285,13 @@ def generate_ism_items(
             # insert neutral position as a pre-amble to all sources
             pre = np.tile(
                 [0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_pre, 1)
-            )   # !!!! TBD - check if we should insert netrual position or the first position of the metadata
+            )  # !!!! TBD - check if we should insert netrual position or the first position of the metadata
             y_meta = np.concatenate([pre, y_meta], axis=1)
-        
+
         if postamble != 0.0:
             # ensure that post-mable is a multiple of 20ms
             N_post = int(floor(postamble * 50) / 50 * y.fs)
-            
+
             # append all-zero postamble to all sources
             post = np.zeros((N_post, y.audio.shape[1]))
             y.audio = np.concatenate([y.audio, post])
@@ -290,17 +299,17 @@ def generate_ism_items(
             # append neutral position as a post-amble to all sources
             post = np.tile(
                 [0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_post, 1)
-            )   # !!!! TBD - check if we should insert netrual position or the last position of the metadata
+            )  # !!!! TBD - check if we should insert netrual position or the last position of the metadata
             y_meta = np.concatenate([y_meta, post], axis=1)
-            
+
         # add random noise
         if add_low_level_random_noise:
             # create uniformly distributed noise between -4 and 4
             np.random.seed(SEED_RANDOM_NOISE)
-            noise = np.random.randint(
-                low=-4, high=5, size=y.audio.shape
-            ).astype("float")
-            
+            noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype(
+                "float"
+            )
+
             # superimpose
             y.audio += noise
 
@@ -315,7 +324,12 @@ def generate_ism_items(
             # generate .csv filename (should end with .0.csv, .1.csv, ...)
             csv_filename = os.path.normpath(f"{output_filename}.{i}.csv")
 
-            with open(os.path.join(output_path, csv_filename), 'w', newline='', encoding='utf-8') as f:
+            with open(
+                os.path.join(output_path, csv_filename),
+                "w",
+                newline="",
+                encoding="utf-8",
+            ) as f:
                 # create csv writer
                 writer = csv.writer(f)
 
diff --git a/item_generation_scripts/processing/process_stereo_items.py b/ivas_processing_scripts/generation/process_stereo_items.py
similarity index 81%
rename from item_generation_scripts/processing/process_stereo_items.py
rename to ivas_processing_scripts/generation/process_stereo_items.py
index f8dcc43d..aecc1a57 100644
--- a/item_generation_scripts/processing/process_stereo_items.py
+++ b/ivas_processing_scripts/generation/process_stereo_items.py
@@ -34,16 +34,16 @@
 import csv
 import logging
 import os
-from pathlib import Path
-from typing import Optional
 from copy import copy
-import numpy as np
 from math import floor
+from pathlib import Path
+from typing import Optional
 
+import numpy as np
 
-from item_generation_scripts.audiotools import audio, audiofile
-from item_generation_scripts.audiotools.wrappers.bs1770 import get_loudness
-from item_generation_scripts.audiotools.wrappers.reverb import reverb_stereo
+from ivas_processing_scripts.audiotools import audio, audiofile
+from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness
+from ivas_processing_scripts.audiotools.wrappers.reverb import reverb_stereo
 
 
 # function for converting nd numpy array to strings with 2 decimal digits
@@ -67,60 +67,57 @@ def generate_stereo_items(
 
     # get the number of scenes
     N_scenes = len(scenes)
-    
+
     for scene_name, scene in scenes.items():
         logger.info(f"Processing scene: {scene_name} out of {N_scenes} scenes")
 
         # extract the number of audio sources
         N_sources = len(np.atleast_1d(scene["source"]))
-        
+
         # read the IR (check if stereo or two mono files were provided)
         source_IR = np.atleast_1d(scene["IR"])
 
         y = audio.ChannelBasedAudio("STEREO")
         for i in range(N_sources):
-        
             # parse parameters from the scene description
             source_file = np.atleast_1d(scene["source"])[i]
             IR_file = np.atleast_1d(scene["IR"])[i]
-            if 'delay' in scene.keys():
+            if "delay" in scene.keys():
                 source_delay = np.atleast_1d(scene["delay"])[i]
             else:
                 source_delay = np.array([0])
-            
-            logger.info(
-                f"Convolving {source_file} with {source_IR}"
-            )
+
+            logger.info(f"Convolving {source_file} with {source_IR}")
 
             # read source file
             x = audio.fromfile("MONO", os.path.join(input_path, source_file), fs=fs)
 
             # get the number of frames (multiple of 20ms)
             N_frames = int(len(x.audio) / x.fs * 50)
-            
+
             # trim the source signal to align to 20ms boundary
             N_trim = int(N_frames * x.fs / 50)
             x.audio = x.audio[:N_trim]
 
             # read the IR file
             IR = audio.fromfile("STEREO", os.path.join(IR_path, IR_file), fs=IR_fs)
-            
+
             # delay the source file
             if source_delay > 0:
                 # ensure delay is a multiple of 20ms
                 N_delay = int(floor(source_delay * 50) / 50 * x.fs)
-                
+
                 # insert all-zero preamble
                 pre = np.zeros((N_delay, x.audio.shape[1]))
                 x.audio = np.concatenate([pre, x.audio])
-                
+
             # convolve with stereo IR
             x_rev = reverb_stereo(x, IR)
-            
+
             # adjust the level of the stereo signal
             _, scale_factor = get_loudness(x_rev, target_level, "STEREO")
             x_rev.audio *= scale_factor
-            
+
             # add source signal to the array of source signals
             y.fs = x.fs
             if y.audio is None:
@@ -128,11 +125,31 @@ def generate_stereo_items(
             else:
                 # append zeros to have equal length of all source signals
                 if x_rev.audio.shape[0] > y.audio.shape[0]:
-                    y.audio = np.vstack((y.audio, np.zeros((x_rev.audio.shape[0] - y.audio.shape[0], y.audio.shape[1]))))
+                    y.audio = np.vstack(
+                        (
+                            y.audio,
+                            np.zeros(
+                                (
+                                    x_rev.audio.shape[0] - y.audio.shape[0],
+                                    y.audio.shape[1],
+                                )
+                            ),
+                        )
+                    )
                 elif y.audio.shape[0] > x_rev.audio.shape[0]:
-                    x_rev.audio = np.vstack((x_rev.audio, np.zeros((y.audio.shape[0] - x_rev.audio.shape[0], x_rev.audio.shape[1]))))
-                    
-                # superimpose 
+                    x_rev.audio = np.vstack(
+                        (
+                            x_rev.audio,
+                            np.zeros(
+                                (
+                                    y.audio.shape[0] - x_rev.audio.shape[0],
+                                    x_rev.audio.shape[1],
+                                )
+                            ),
+                        )
+                    )
+
+                # superimpose
                 y.audio += x_rev.audio
 
         # write the reverberated audio into output file
@@ -141,4 +158,4 @@ def generate_stereo_items(
             os.path.join(output_path, output_filename), y.audio, y.fs
         )  # !!!! TBD: replace all os.path.xxx operations with the Path object
 
-    return
\ No newline at end of file
+    return
-- 
GitLab


From 3616e6dfb65f49f9d59d946ba3b91f6633d1889b Mon Sep 17 00:00:00 2001
From: Archit Tamarapu <archit.tamarapu@iis.fraunhofer.de>
Date: Thu, 11 May 2023 16:00:37 +0200
Subject: [PATCH 14/27] [fix] get() -> getattr()

---
 ivas_processing_scripts/generation/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ivas_processing_scripts/generation/__init__.py b/ivas_processing_scripts/generation/__init__.py
index 2c7c9bf3..094bfe35 100755
--- a/ivas_processing_scripts/generation/__init__.py
+++ b/ivas_processing_scripts/generation/__init__.py
@@ -94,7 +94,7 @@ def main(args):
             fs=cfg.fs,
             preamble=cfg.preamble,
             postamble=cfg.postamble,
-            add_low_level_random_noise=cfg.get("add_low_level_random_noise", False), 
+            add_low_level_random_noise=getattr(cfg, "add_low_level_random_noise", False), 
             # TODO@VM dict.get() can provide a default value if the key is not found
             # please check if this is a viable solution - I kept getting "AttributeError: 'TestConfig' object has no attribute 'add_low_level_random_noise'"
         )
-- 
GitLab


From f41efcb89708f821a70ea28a292cb6e173ca8719 Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Thu, 11 May 2023 16:11:44 +0200
Subject: [PATCH 15/27] support for +- overlap in STEREO items, expect trimmed
 sentences, support for low-level random noise addition

---
 item_generation_scripts/__init__.py           |   1 +
 .../config/STEREO_CONFIG.yml                  | 129 +++++++++---------
 .../processing/process_ism_items.py           |   5 +-
 .../processing/process_stereo_items.py        |  85 +++++++++---
 4 files changed, 135 insertions(+), 85 deletions(-)

diff --git a/item_generation_scripts/__init__.py b/item_generation_scripts/__init__.py
index 8b3d8bae..93516464 100644
--- a/item_generation_scripts/__init__.py
+++ b/item_generation_scripts/__init__.py
@@ -110,6 +110,7 @@ def main(args):
             IR_fs=cfg.IR_fs,
             preamble=cfg.preamble,
             postamble=cfg.postamble,
+            add_low_level_random_noise=cfg.add_low_level_random_noise,
         )
         
     # copy configuration to output directory
diff --git a/item_generation_scripts/config/STEREO_CONFIG.yml b/item_generation_scripts/config/STEREO_CONFIG.yml
index 0933b1da..cb14747d 100644
--- a/item_generation_scripts/config/STEREO_CONFIG.yml
+++ b/item_generation_scripts/config/STEREO_CONFIG.yml
@@ -35,6 +35,13 @@ output_path: "./items_STEREO"
 ### Target loudness in LKFS; default = null (no loudness normalization applied)
 loudness: -26
 
+### Pre-amble and Post-amble length in seconds (default = 0.0)
+preamble: 0.5
+postamble: 0.5
+
+### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence)
+add_low_level_random_noise: true
+
 
 ################################################
 ### Scene description
@@ -43,7 +50,7 @@ loudness: -26
 ### Each scene must start with the sceneN tag
 ### Specify the mono source filename (the program will search for it in the input_path folder)
 ### Specify the stereo IR source filename (the program will search for it in the input_path_IR folder)
-### Specify the delay in seconds for each input source
+### Specify the overlap length in seconds for each input source (negative value creates a gap)
 ### Note 1: use [val1, val2, ...] for multiple sources in a scene
 ### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames
 
@@ -51,252 +58,252 @@ scenes:
     a1: 
         name: "G1S1.wav"
         description: "Two speakers sitting at oval table in opposite corners in a large echoic conference room."
-        source: ["test_single.wav", "test_single.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["LEABP04.wav", "LEABP11.wav"]
-        delay: [0, 3]
+        overlap: 0.5
         
     a2: 
         name: "G6S2.wav"
         description: "Two speakers sitting at oval table in opposite corners in a large echoic conference room."
-        source: ["test_single.wav", "test_single.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["LEABP05.wav", "LEABP11.wav"]
-        delay: [0, 3]
+        overlap: 0.5
         
     a3: 
         name: "G5S3.wav"
         description: "Two speakers sitting at oval table in opposite corners in a large echoic conference room."
-        source: ["test_single.wav", "test_single.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["LEABP06.wav", "LEABP11.wav"]
-        delay: [0, 3]
+        overlap: 0.5
 
     a4: 
         name: "G4S4.wav"
         description: "Two speakers sitting at oval table in opposite corners in a large echoic conference room."
-        source: ["test_single.wav", "test_single.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["LEABP05.wav", "LEABP10.wav"]
-        delay: [0, 1.5]
+        overlap: -0.5
 
     a5: 
         name: "G3S5.wav"
         description: "Two speakers sitting at oval table in opposite corners in a large echoic conference room."
-        source: ["test_single.wav", "test_single.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["LEABP05.wav", "LEABP11.wav"]
-        delay: [0, 1.5]
+        overlap: -0.5
 
     a6: 
         name: "G2S6.wav"
         description: "Two speakers sitting at oval table in opposite corners in a large echoic conference room."
-        source: ["test_single.wav", "test_single.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["LEABP05.wav", "LEABP12.wav"]
-        delay: [0, 1.5]
+        overlap: -0.5
 
     b1: 
         name: "G2S1.wav"
         description: "Two speakers sitting at oval table side by side in a large anechoic conference room."
-        source: ["test_single.wav", "test_single.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["LAABP05.wav", "LAABP06.wav"]
-        delay: [0, 35]
+        overlap: -0.5
  
     b2: 
         name: "G1S2.wav"
         description: "Two speakers sitting at oval table side by side in a large anechoic conference room."
-        source: ["test_single.wav", "test_single.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["LAABP07.wav", "LAABP08.wav"]
-        delay: [0, 3]
+        overlap: 0.5
  
     b3: 
         name: "G6S3.wav"
         description: "Two speakers sitting at oval table side by side in a large anechoic conference room."
-        source: ["test_single.wav", "test_single.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["LAABP09.wav", "LAABP10.wav"]
-        delay: [0, 3]
+        overlap: 0.5
  
     b4: 
         name: "G5S4.wav"
         description: "Two speakers sitting at oval table side by side in a large anechoic conference room."
-        source: ["test_single.wav", "test_single.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["LAABP11.wav", "LAABP12.wav"]
-        delay: [0, 1.5] 
+        overlap: -0.5 
 
     b5: 
         name: "G4S5.wav"
         description: "Two speakers sitting at oval table side by side in a large anechoic conference room."
-        source: ["test_single.wav", "test_single.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["LAABP01.wav", "LAABP02.wav"]
-        delay: [0, 1.5] 
+        overlap: -0.5 
 
     b6: 
         name: "G3S6.wav"
         description: "Two speakers sitting at oval table side by side in a large anechoic conference room."
-        source: ["test_single.wav", "test_single.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["LAABP03.wav", "LAABP04.wav"]
-        delay: [0, 1.5] 
+        overlap: -0.5 
 
     c1: 
         name: "G3S1.wav"
         description: "One talker sitting at table in a small anechoic conference room."
         source: ["test_single.wav"]
         IR: ["SAMSP01.wav"]
-        delay: [0] 
+        overlap: -0.5
 
     c2: 
         name: "G2S2.wav"
         description: "One talker sitting at table in a small anechoic conference room."
         source: ["test_single.wav"]
         IR: ["SAMSP04.wav"]
-        delay: [0] 
+        overlap: -0.5
   
     c3: 
         name: "G1S3.wav"
         description: "One talker sitting at table in a small anechoic conference room."
         source: ["test_single.wav"]
         IR: ["SAMSP07.wav"]
-        delay: [0] 
+        overlap: -0.5
   
     c4: 
         name: "G6S4.wav"
         description: "One talker sitting at table in a small echoic conference room."
         source: ["test_single.wav"]
         IR: ["SEABP01.wav"]
-        delay: [0] 
+        overlap: -0.5
   
     c5: 
         name: "G5S5.wav"
         description: "One talker sitting at table in a small echoic conference room."
         source: ["test_single.wav"]
         IR: ["SEABP03.wav"]
-        delay: [0] 
+        overlap: -0.5
   
     c6: 
         name: "G4S6.wav"
         description: "One talker sitting at table in a small echoic conference room."
         source: ["test_single.wav"]
         IR: ["SEABP06.wav"]
-        delay: [0] 
+        overlap: -0.5
  
     d1: 
         name: "G4S1.wav"
         description: "One talker sitting at table in a small anechoic conference room."
         source: ["test_single.wav"]
         IR: ["SEBIP01.wav"]
-        delay: [0]   
+        overlap: -0.5  
         
     d2: 
         name: "G3S2.wav"
         description: "One talker sitting at table in a small anechoic conference room."
         source: ["test_single.wav"]
         IR: ["SEBIP04.wav"]
-        delay: [0]   
+        overlap: -0.5  
         
     d3: 
         name: "G3S2.wav"
         description: "One talker sitting at table in a small anechoic conference room."
         source: ["test_single.wav"]
         IR: ["SEBIP07.wav"]
-        delay: [0]   
+        overlap: -0.5  
  
     d4: 
         name: "G1S4.wav"
         description: "One talker sitting at table in a small echoic conference room."
         source: ["test_single.wav"]
         IR: ["SEBIP07.wav"]
-        delay: [0]   
+        overlap: -0.5  
  
     d5: 
         name: "G6S5.wav"
         description: "One talker sitting at table in a small echoic conference room."
         source: ["test_single.wav"]
         IR: ["SEBIP07.wav"]
-        delay: [0]   
+        overlap: -0.5  
  
     d6: 
         name: "G5S6.wav"
         description: "One talker sitting at table in a small echoic conference room."
         source: ["test_single.wav"]
         IR: ["SEBIP07.wav"]
-        delay: [0]   
+        overlap: -0.5  
  
     e1: 
         name: "G5S1.wav"
         description: "Two talkers sitting in a room."
-        source: ["test_single.wav", "test_single.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["SEMSP01.wav", "SEMSP03.wav"]
-        delay: [0, 3]
+        overlap: 0.5
  
     e2: 
         name: "G4S2.wav"
         description: "Two talkers sitting in a room."
-        source: ["test_single.wav", "test_single.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["SEMSP01.wav", "SEMSP05.wav"]
-        delay: [0, 3]
+        overlap: 0.5
         
     e3: 
         name: "G3S3.wav"
         description: "Two talkers sitting in a room."
-        source: ["test_single.wav", "test_single.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["SEMSP01.wav", "SEMSP07.wav"]
-        delay: [0, 3]
+        overlap: 0.5
   
     e4: 
         name: "G2S4.wav"
         description: "Two talkers sitting in a room."
-        source: ["test_single.wav", "test_single.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["SEMSP03.wav", "SEMSP04.wav"]
-        delay: [0, 1.5]
+        overlap: -0.5
   
     e5: 
         name: "G1S5.wav"
         description: "Two talkers sitting in a room."
-        source: ["test_single.wav", "test_single.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["SEMSP05.wav", "SEMSP07.wav"]
-        delay: [0, 1.5]
+        overlap: -0.5
   
     e6: 
         name: "G6S6.wav"
         description: "Two talkers sitting in a room."
-        source: ["test_single.wav", "test_single.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["SEMSP06.wav", "SEMSP02.wav"]
-        delay: [0, 1.5]
+        overlap: -0.5
  
     f1: 
         name: "G6S1.wav"
         description: "Two talkers sitting in a room."
-        source: ["test_single.wav", "test_single.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["SEBIP05.wav", "SEBIP01.wav"]
-        delay: [0, 3]
+        overlap: 0.5
  
     f2: 
         name: "G5S2.wav"
         description: "Two talkers sitting in a room."
-        source: ["test_single.wav", "test_single.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["SEBIP07.wav", "SEBIP01.wav"]
-        delay: [0, 3]
+        overlap: 0.5
   
     f3: 
         name: "G4S3.wav"
         description: "Two talkers sitting in a room."
-        source: ["test_single.wav", "test_single.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["SEBIP04.wav", "SEBIP01.wav"]
-        delay: [0, 3]
+        overlap: 0.5
   
     f4: 
         name: "G3S4.wav"
         description: "Two talkers sitting in a room."
-        source: ["test_single.wav", "test_single.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["SEBIP02.wav", "SEBIP06.wav"]
-        delay: [0, 1.5]
+        overlap: -0.5
   
     f5: 
         name: "G2S5.wav"
         description: "Two talkers sitting in a room."
-        source: ["test_single.wav", "test_single.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["SEBIP02.wav", "SEBIP06.wav"]
-        delay: [0, 1.5]
+        overlap: -0.5
   
     f6: 
         name: "G1S6.wav"
         description: "Two talkers sitting in a room."
-        source: ["test_single.wav", "test_single.wav"]
+        source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["SEBIP03.wav", "SEBIP04.wav"]
-        delay: [0, 1.5]
+        overlap: -0.5
   
\ No newline at end of file
diff --git a/item_generation_scripts/processing/process_ism_items.py b/item_generation_scripts/processing/process_ism_items.py
index fe62f048..b03468ec 100644
--- a/item_generation_scripts/processing/process_ism_items.py
+++ b/item_generation_scripts/processing/process_ism_items.py
@@ -99,10 +99,7 @@ def generate_ism_items(
 
             # read source file
             x = audio.fromfile("MONO", os.path.join(input_path, source_file), fs=fs)
-            
-            ############### DEBUG ############33
-            # x.audio = x.audio[:-10]
-
+ 
             # get the number of frames (multiple of 20ms)
             N_frames = int(len(x.audio) / x.fs * 50)
             
diff --git a/item_generation_scripts/processing/process_stereo_items.py b/item_generation_scripts/processing/process_stereo_items.py
index f8dcc43d..a6ed6c8a 100644
--- a/item_generation_scripts/processing/process_stereo_items.py
+++ b/item_generation_scripts/processing/process_stereo_items.py
@@ -40,11 +40,12 @@ from copy import copy
 import numpy as np
 from math import floor
 
-
 from item_generation_scripts.audiotools import audio, audiofile
 from item_generation_scripts.audiotools.wrappers.bs1770 import get_loudness
 from item_generation_scripts.audiotools.wrappers.reverb import reverb_stereo
 
+SEED_RANDOM_NOISE = 0
+
 
 # function for converting nd numpy array to strings with 2 decimal digits
 def csv_formatdata(data):
@@ -62,6 +63,9 @@ def generate_stereo_items(
     logger: logging.Logger,
     fs: Optional[int] = 48000,
     IR_fs: Optional[int] = 48000,
+    preamble: Optional[float] = 0.0,
+    postamble: Optional[float] = 0.0,
+    add_low_level_random_noise: Optional[bool] = False,
 ):
     """Generate STEREO items from mono items based on scene description"""
 
@@ -76,6 +80,12 @@ def generate_stereo_items(
         
         # read the IR (check if stereo or two mono files were provided)
         source_IR = np.atleast_1d(scene["IR"])
+        
+        # read the overlap length
+        if 'overlap' in scene.keys():
+            source_overlap = float(scene["overlap"])
+        else:
+            source_overlap = 0.0
 
         y = audio.ChannelBasedAudio("STEREO")
         for i in range(N_sources):
@@ -83,10 +93,6 @@ def generate_stereo_items(
             # parse parameters from the scene description
             source_file = np.atleast_1d(scene["source"])[i]
             IR_file = np.atleast_1d(scene["IR"])[i]
-            if 'delay' in scene.keys():
-                source_delay = np.atleast_1d(scene["delay"])[i]
-            else:
-                source_delay = np.array([0])
             
             logger.info(
                 f"Convolving {source_file} with {source_IR}"
@@ -98,22 +104,9 @@ def generate_stereo_items(
             # get the number of frames (multiple of 20ms)
             N_frames = int(len(x.audio) / x.fs * 50)
             
-            # trim the source signal to align to 20ms boundary
-            N_trim = int(N_frames * x.fs / 50)
-            x.audio = x.audio[:N_trim]
-
             # read the IR file
             IR = audio.fromfile("STEREO", os.path.join(IR_path, IR_file), fs=IR_fs)
-            
-            # delay the source file
-            if source_delay > 0:
-                # ensure delay is a multiple of 20ms
-                N_delay = int(floor(source_delay * 50) / 50 * x.fs)
-                
-                # insert all-zero preamble
-                pre = np.zeros((N_delay, x.audio.shape[1]))
-                x.audio = np.concatenate([pre, x.audio])
-                
+                 
             # convolve with stereo IR
             x_rev = reverb_stereo(x, IR)
             
@@ -121,12 +114,36 @@ def generate_stereo_items(
             _, scale_factor = get_loudness(x_rev, target_level, "STEREO")
             x_rev.audio *= scale_factor
             
+            # shift the second (and all other) source files (positive shift creates overlap, negative shift creates a gap)
+            if i > 0 and source_overlap != 0.0:
+                # get the length of the first source file
+                N_delay = len(y.audio[:,0])
+                
+                # add the shift
+                N_delay += int(source_overlap * x.fs)
+            
+                # ensure delay is a multiple of 20ms
+                # N_delay = int(floor(source_shift * 50) / 50 * x.fs)
+            
+                # insert all-zero preamble
+                pre = np.zeros((N_delay, x.audio.shape[1]))
+                x.audio = np.concatenate([pre, x.audio])
+                
+            # pad with zeros to ensure that the signal length is a multiple of 20ms  
+            N_frame = x.fs / 50
+            if len(x.audio) % N_frame != 0:
+                N_pad = int(N_frame - len(x.audio) % N_frame)
+                
+                # insert all-zero preamble
+                pre = np.zeros((N_pad, x.audio.shape[1]))
+                x.audio = np.concatenate([pre, x.audio])
+               
             # add source signal to the array of source signals
             y.fs = x.fs
             if y.audio is None:
                 y.audio = x_rev.audio
             else:
-                # append zeros to have equal length of all source signals
+                # pad with zeros to have equal length of all source signals
                 if x_rev.audio.shape[0] > y.audio.shape[0]:
                     y.audio = np.vstack((y.audio, np.zeros((x_rev.audio.shape[0] - y.audio.shape[0], y.audio.shape[1]))))
                 elif y.audio.shape[0] > x_rev.audio.shape[0]:
@@ -135,6 +152,34 @@ def generate_stereo_items(
                 # superimpose 
                 y.audio += x_rev.audio
 
+        # append pre-amble and post-amble to all sources
+        if preamble != 0.0:
+            # ensure that pre-mable is a multiple of 20ms
+            N_pre = int(floor(preamble * 50) / 50 * y.fs)
+            
+            # insert all-zero preamble to all sources
+            pre = np.zeros((N_pre, y.audio.shape[1]))
+            y.audio = np.concatenate([pre, y.audio])
+        
+        if postamble != 0.0:
+            # ensure that post-mable is a multiple of 20ms
+            N_post = int(floor(postamble * 50) / 50 * y.fs)
+            
+            # append all-zero postamble to all sources
+            post = np.zeros((N_post, y.audio.shape[1]))
+            y.audio = np.concatenate([y.audio, post])
+            
+        # add random noise
+        if add_low_level_random_noise:
+            # create uniformly distributed noise between -4 and 4
+            np.random.seed(SEED_RANDOM_NOISE)
+            noise = np.random.randint(
+                low=-4, high=5, size=y.audio.shape
+            ).astype("float")
+            
+            # superimpose
+            y.audio += noise
+            
         # write the reverberated audio into output file
         output_filename = scene["name"]
         audiofile.write(
-- 
GitLab


From 7c0bac405bfe5a40f0aa29ce91663ae441976c67 Mon Sep 17 00:00:00 2001
From: Archit Tamarapu <archit.tamarapu@iis.fraunhofer.de>
Date: Fri, 12 May 2023 10:20:09 +0200
Subject: [PATCH 16/27] [fix] audiofile.py: write out specified dtype for .wav
 output too (function still clips to int16 range!

---
 ivas_processing_scripts/audiotools/audiofile.py       | 2 +-
 ivas_processing_scripts/audiotools/wrappers/reverb.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ivas_processing_scripts/audiotools/audiofile.py b/ivas_processing_scripts/audiotools/audiofile.py
index d5687a89..d6f39f65 100755
--- a/ivas_processing_scripts/audiotools/audiofile.py
+++ b/ivas_processing_scripts/audiotools/audiofile.py
@@ -141,7 +141,7 @@ def write(
         x = np.clip(x, np.iinfo(np.int16).min, np.iinfo(np.int16).max)
 
     if file_extension == ".wav":
-        x = x.astype(np.int16)
+        x = x.astype(dtype)
         wav.write(filename, fs, x)
     elif file_extension == ".pcm" or file_extension == ".raw":
         x = x.astype(dtype).reshape(-1, 1)
diff --git a/ivas_processing_scripts/audiotools/wrappers/reverb.py b/ivas_processing_scripts/audiotools/wrappers/reverb.py
index 46f4ee33..4f4de5dd 100644
--- a/ivas_processing_scripts/audiotools/wrappers/reverb.py
+++ b/ivas_processing_scripts/audiotools/wrappers/reverb.py
@@ -102,7 +102,7 @@ def reverb(
         # write IR to temporary file in .pcm format
         # note: the reverb tool expects 32b float format
         tmp_IR_file = tmp_dir.joinpath(f"tmp_IR.pcm")
-        write(tmp_IR_file, IR.audio.astype("float32"), IR.fs, dtype="float32")
+        write(tmp_IR_file, IR.audio.astype(np.float32), IR.fs, dtype=np.float32)
 
         # set up the 'reverb' command line
         cmd = [
-- 
GitLab


From 18b3e256c665ca8bf85aa9cc894f67f22fc26e58 Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Fri, 12 May 2023 12:43:24 +0200
Subject: [PATCH 17/27] stereo IR files of the ITU-T reverb package in int16
 32kHz WAVE format

---
 .gitignore                                    |  1 -
 .../generation/IR/LAABP01.wav                 |  3 +
 .../generation/IR/LAABP02.wav                 |  3 +
 .../generation/IR/LAABP03.wav                 |  3 +
 .../generation/IR/LAABP04.wav                 |  3 +
 .../generation/IR/LAABP05.wav                 |  3 +
 .../generation/IR/LAABP06.wav                 |  3 +
 .../generation/IR/LAABP07.wav                 |  3 +
 .../generation/IR/LAABP08.wav                 |  3 +
 .../generation/IR/LAABP09.wav                 |  3 +
 .../generation/IR/LAABP10.wav                 |  3 +
 .../generation/IR/LAABP11.wav                 |  3 +
 .../generation/IR/LAABP12.wav                 |  3 +
 .../generation/IR/LEABP01.wav                 |  3 +
 .../generation/IR/LEABP02.wav                 |  3 +
 .../generation/IR/LEABP03.wav                 |  3 +
 .../generation/IR/LEABP04.wav                 |  3 +
 .../generation/IR/LEABP05.wav                 |  3 +
 .../generation/IR/LEABP06.wav                 |  3 +
 .../generation/IR/LEABP07.wav                 |  3 +
 .../generation/IR/LEABP08.wav                 |  3 +
 .../generation/IR/LEABP09.wav                 |  3 +
 .../generation/IR/LEABP10.wav                 |  3 +
 .../generation/IR/LEABP11.wav                 |  3 +
 .../generation/IR/LEABP12.wav                 |  3 +
 .../generation/IR/README.TXT                  | 56 +++++++++++++++++++
 .../generation/IR/SAABP01.wav                 |  3 +
 .../generation/IR/SAABP02.wav                 |  3 +
 .../generation/IR/SAABP03.wav                 |  3 +
 .../generation/IR/SAABP04.wav                 |  3 +
 .../generation/IR/SAABP05.wav                 |  3 +
 .../generation/IR/SAABP06.wav                 |  3 +
 .../generation/IR/SAABP07.wav                 |  3 +
 .../generation/IR/SAMSP01.wav                 |  3 +
 .../generation/IR/SAMSP02.wav                 |  3 +
 .../generation/IR/SAMSP03.wav                 |  3 +
 .../generation/IR/SAMSP04.wav                 |  3 +
 .../generation/IR/SAMSP05.wav                 |  3 +
 .../generation/IR/SAMSP06.wav                 |  3 +
 .../generation/IR/SAMSP07.wav                 |  3 +
 .../generation/IR/SEABP01.wav                 |  3 +
 .../generation/IR/SEABP02.wav                 |  3 +
 .../generation/IR/SEABP03.wav                 |  3 +
 .../generation/IR/SEABP04.wav                 |  3 +
 .../generation/IR/SEABP05.wav                 |  3 +
 .../generation/IR/SEABP06.wav                 |  3 +
 .../generation/IR/SEABP07.wav                 |  3 +
 .../generation/IR/SEBIP01.wav                 |  3 +
 .../generation/IR/SEBIP02.wav                 |  3 +
 .../generation/IR/SEBIP03.wav                 |  3 +
 .../generation/IR/SEBIP04.wav                 |  3 +
 .../generation/IR/SEBIP05.wav                 |  3 +
 .../generation/IR/SEBIP06.wav                 |  3 +
 .../generation/IR/SEBIP07.wav                 |  3 +
 .../generation/IR/SEMSP01.wav                 |  3 +
 .../generation/IR/SEMSP02.wav                 |  3 +
 .../generation/IR/SEMSP03.wav                 |  3 +
 .../generation/IR/SEMSP04.wav                 |  3 +
 .../generation/IR/SEMSP05.wav                 |  3 +
 .../generation/IR/SEMSP06.wav                 |  3 +
 .../generation/IR/SEMSP07.wav                 |  3 +
 61 files changed, 233 insertions(+), 1 deletion(-)
 create mode 100644 ivas_processing_scripts/generation/IR/LAABP01.wav
 create mode 100644 ivas_processing_scripts/generation/IR/LAABP02.wav
 create mode 100644 ivas_processing_scripts/generation/IR/LAABP03.wav
 create mode 100644 ivas_processing_scripts/generation/IR/LAABP04.wav
 create mode 100644 ivas_processing_scripts/generation/IR/LAABP05.wav
 create mode 100644 ivas_processing_scripts/generation/IR/LAABP06.wav
 create mode 100644 ivas_processing_scripts/generation/IR/LAABP07.wav
 create mode 100644 ivas_processing_scripts/generation/IR/LAABP08.wav
 create mode 100644 ivas_processing_scripts/generation/IR/LAABP09.wav
 create mode 100644 ivas_processing_scripts/generation/IR/LAABP10.wav
 create mode 100644 ivas_processing_scripts/generation/IR/LAABP11.wav
 create mode 100644 ivas_processing_scripts/generation/IR/LAABP12.wav
 create mode 100644 ivas_processing_scripts/generation/IR/LEABP01.wav
 create mode 100644 ivas_processing_scripts/generation/IR/LEABP02.wav
 create mode 100644 ivas_processing_scripts/generation/IR/LEABP03.wav
 create mode 100644 ivas_processing_scripts/generation/IR/LEABP04.wav
 create mode 100644 ivas_processing_scripts/generation/IR/LEABP05.wav
 create mode 100644 ivas_processing_scripts/generation/IR/LEABP06.wav
 create mode 100644 ivas_processing_scripts/generation/IR/LEABP07.wav
 create mode 100644 ivas_processing_scripts/generation/IR/LEABP08.wav
 create mode 100644 ivas_processing_scripts/generation/IR/LEABP09.wav
 create mode 100644 ivas_processing_scripts/generation/IR/LEABP10.wav
 create mode 100644 ivas_processing_scripts/generation/IR/LEABP11.wav
 create mode 100644 ivas_processing_scripts/generation/IR/LEABP12.wav
 create mode 100644 ivas_processing_scripts/generation/IR/README.TXT
 create mode 100644 ivas_processing_scripts/generation/IR/SAABP01.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SAABP02.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SAABP03.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SAABP04.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SAABP05.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SAABP06.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SAABP07.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SAMSP01.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SAMSP02.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SAMSP03.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SAMSP04.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SAMSP05.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SAMSP06.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SAMSP07.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SEABP01.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SEABP02.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SEABP03.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SEABP04.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SEABP05.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SEABP06.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SEABP07.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SEBIP01.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SEBIP02.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SEBIP03.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SEBIP04.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SEBIP05.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SEBIP06.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SEBIP07.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SEMSP01.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SEMSP02.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SEMSP03.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SEMSP04.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SEMSP05.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SEMSP06.wav
 create mode 100644 ivas_processing_scripts/generation/IR/SEMSP07.wav

diff --git a/.gitignore b/.gitignore
index 7855f81e..77abd26a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,7 +6,6 @@ venv/
 .vscode/
 .idea/
 .DS_Store
-*.wav
 !tests/data/**/*.wav
 *.pcm
 *.bs
diff --git a/ivas_processing_scripts/generation/IR/LAABP01.wav b/ivas_processing_scripts/generation/IR/LAABP01.wav
new file mode 100644
index 00000000..aeaa9eeb
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/LAABP01.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a4e959d347d3f99468dbe75bce9853eb9d66af6cb22cf3ea9ad2dc4c9e84a2a
+size 36804
diff --git a/ivas_processing_scripts/generation/IR/LAABP02.wav b/ivas_processing_scripts/generation/IR/LAABP02.wav
new file mode 100644
index 00000000..41586c2f
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/LAABP02.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2658ddec94aa86e2fa0ed365686daded586a6a46436dff1c6d8dba6d17d0182c
+size 36804
diff --git a/ivas_processing_scripts/generation/IR/LAABP03.wav b/ivas_processing_scripts/generation/IR/LAABP03.wav
new file mode 100644
index 00000000..c4ec38f9
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/LAABP03.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5616c8bcf3959aeee246a96a9f2ce6793d4087bfce3dfd1d97e313e3717b5bd6
+size 36804
diff --git a/ivas_processing_scripts/generation/IR/LAABP04.wav b/ivas_processing_scripts/generation/IR/LAABP04.wav
new file mode 100644
index 00000000..1c50022f
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/LAABP04.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f433047f7fdba568183873d11c7f4423550a675b3e0677b6d846137227862bac
+size 36804
diff --git a/ivas_processing_scripts/generation/IR/LAABP05.wav b/ivas_processing_scripts/generation/IR/LAABP05.wav
new file mode 100644
index 00000000..e3bd1916
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/LAABP05.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:791b69ca22d15226e5e2f6c5a39d3d40af04264523f3373d842a070ea4d40862
+size 36804
diff --git a/ivas_processing_scripts/generation/IR/LAABP06.wav b/ivas_processing_scripts/generation/IR/LAABP06.wav
new file mode 100644
index 00000000..1c50022f
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/LAABP06.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f433047f7fdba568183873d11c7f4423550a675b3e0677b6d846137227862bac
+size 36804
diff --git a/ivas_processing_scripts/generation/IR/LAABP07.wav b/ivas_processing_scripts/generation/IR/LAABP07.wav
new file mode 100644
index 00000000..c4ec38f9
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/LAABP07.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5616c8bcf3959aeee246a96a9f2ce6793d4087bfce3dfd1d97e313e3717b5bd6
+size 36804
diff --git a/ivas_processing_scripts/generation/IR/LAABP08.wav b/ivas_processing_scripts/generation/IR/LAABP08.wav
new file mode 100644
index 00000000..41586c2f
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/LAABP08.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2658ddec94aa86e2fa0ed365686daded586a6a46436dff1c6d8dba6d17d0182c
+size 36804
diff --git a/ivas_processing_scripts/generation/IR/LAABP09.wav b/ivas_processing_scripts/generation/IR/LAABP09.wav
new file mode 100644
index 00000000..aeaa9eeb
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/LAABP09.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a4e959d347d3f99468dbe75bce9853eb9d66af6cb22cf3ea9ad2dc4c9e84a2a
+size 36804
diff --git a/ivas_processing_scripts/generation/IR/LAABP10.wav b/ivas_processing_scripts/generation/IR/LAABP10.wav
new file mode 100644
index 00000000..37693eb5
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/LAABP10.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9271410ecad011fbcf22fb8f7af5b0f19f02510ef0f198ef6c6d9e33e64d38da
+size 36804
diff --git a/ivas_processing_scripts/generation/IR/LAABP11.wav b/ivas_processing_scripts/generation/IR/LAABP11.wav
new file mode 100644
index 00000000..482a0e76
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/LAABP11.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cda11409aae6b99f6ccb4d20db24b065b7b2bda004dddd7659607215568d90b6
+size 36804
diff --git a/ivas_processing_scripts/generation/IR/LAABP12.wav b/ivas_processing_scripts/generation/IR/LAABP12.wav
new file mode 100644
index 00000000..37693eb5
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/LAABP12.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9271410ecad011fbcf22fb8f7af5b0f19f02510ef0f198ef6c6d9e33e64d38da
+size 36804
diff --git a/ivas_processing_scripts/generation/IR/LEABP01.wav b/ivas_processing_scripts/generation/IR/LEABP01.wav
new file mode 100644
index 00000000..424ddfb5
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/LEABP01.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d810da26d72e818444c6ee16a3a59a77eabf74df3aaebd2b021696fa7fdd610f
+size 82068
diff --git a/ivas_processing_scripts/generation/IR/LEABP02.wav b/ivas_processing_scripts/generation/IR/LEABP02.wav
new file mode 100644
index 00000000..784caa2d
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/LEABP02.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c21239ff8bbf0e465a175f7ea5125c03f02568a8dbc9b4b63e064955529c489
+size 82068
diff --git a/ivas_processing_scripts/generation/IR/LEABP03.wav b/ivas_processing_scripts/generation/IR/LEABP03.wav
new file mode 100644
index 00000000..c81bce1f
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/LEABP03.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96e5b25de682dc8e0c1f036bbb0c193cfef574a48621069584d48cdd40f520ed
+size 82068
diff --git a/ivas_processing_scripts/generation/IR/LEABP04.wav b/ivas_processing_scripts/generation/IR/LEABP04.wav
new file mode 100644
index 00000000..87d97879
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/LEABP04.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bfd86b594612a319e30676e4e3c0d177f01ee5626379864610df9796532e7024
+size 82068
diff --git a/ivas_processing_scripts/generation/IR/LEABP05.wav b/ivas_processing_scripts/generation/IR/LEABP05.wav
new file mode 100644
index 00000000..5e01d3be
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/LEABP05.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e31f9bf16791af9b3e01e75316d5bfe32115a5dec8a4b820d253e78e0b84edb
+size 82068
diff --git a/ivas_processing_scripts/generation/IR/LEABP06.wav b/ivas_processing_scripts/generation/IR/LEABP06.wav
new file mode 100644
index 00000000..a1027066
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/LEABP06.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65be054317c4dfd5cb0f9bef1d9fc90f35df6ae841e223280946e435c7b6b0c7
+size 82068
diff --git a/ivas_processing_scripts/generation/IR/LEABP07.wav b/ivas_processing_scripts/generation/IR/LEABP07.wav
new file mode 100644
index 00000000..3bfe1b97
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/LEABP07.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:78da36e2a0652cc9c7f77279ba1342d0f58b4a879ef4e3038da38580c9bfd07d
+size 82068
diff --git a/ivas_processing_scripts/generation/IR/LEABP08.wav b/ivas_processing_scripts/generation/IR/LEABP08.wav
new file mode 100644
index 00000000..7ac86eb1
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/LEABP08.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa78fae31221631fd31d251ea6ad5f7369bbcc054c84e8b82dca7c8613f3867a
+size 82068
diff --git a/ivas_processing_scripts/generation/IR/LEABP09.wav b/ivas_processing_scripts/generation/IR/LEABP09.wav
new file mode 100644
index 00000000..010be6fb
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/LEABP09.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd7a9ca0ff37a58455414d8e66efb9aa6d8f686af7459751e24f40eb3c2d6415
+size 82068
diff --git a/ivas_processing_scripts/generation/IR/LEABP10.wav b/ivas_processing_scripts/generation/IR/LEABP10.wav
new file mode 100644
index 00000000..4fbadb40
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/LEABP10.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7149eb3558db62f34e4f476c85a57733e0ca153a297aa183ebeb550878a5ab40
+size 82068
diff --git a/ivas_processing_scripts/generation/IR/LEABP11.wav b/ivas_processing_scripts/generation/IR/LEABP11.wav
new file mode 100644
index 00000000..156d4156
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/LEABP11.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2665ed857b1e3f095581c591e400e9ef532ff9e130a414bc2cc939c37b829c8a
+size 82068
diff --git a/ivas_processing_scripts/generation/IR/LEABP12.wav b/ivas_processing_scripts/generation/IR/LEABP12.wav
new file mode 100644
index 00000000..e84b30b8
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/LEABP12.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad942c2d19303a80ccadab2289172c514c83397096e8317476d6e8dd6463f0f4
+size 82068
diff --git a/ivas_processing_scripts/generation/IR/README.TXT b/ivas_processing_scripts/generation/IR/README.TXT
new file mode 100644
index 00000000..ba5b2281
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/README.TXT
@@ -0,0 +1,56 @@
+----------------------------------------------------------------------------------------------
+ This set of stereo impulse responses for superwideband audio signals has been measured
+ by France Telecom/Orange
+ Copyright (c) 2008-2023
+
+ Authors:       Claude Marro, David Virette, France Telecom/Orange, France
+ 
+ WARRANTIES:
+ This set of stereo impulse responses is made available by Orange in the hope they will be useful,
+ but without any warranty.
+ France Telecom/Orange is not liable for any consequence related to the use of the provided data.
+ ----------------------------------------------------------------------------------------------
+
+The naming of stereo impulse responses is defined as
+[Room][Reverb][Mic]P[Position].WAV
+
+where:
+Room     is S=Small or L=Large, 
+Reverb   is  E=Echoic or A=Anechoic, 
+Mic      is AB or MS or BI=binaural, 
+Position is a two digit position number
+
+
+----------------------------------------------------------------------------------------------------------
+|Scenario                                     | Main Characteristics     | Naming of Impulse response pair
+|                                             |                          | (with example positions):
+----------------------------------------------------------------------------------------------------------
+|Scenario 1, Large conf. room, 12 positions,  | Large, Anechoic, AB      |  LAABP12.WAV
+|AB microphone, no reverb, anechoic.          |                          |  
+----------------------------------------------------------------------------------------------------------
+|Scenario 1, Large conf. room, 12 positions,  | Large, Echoic, AB        |  LEABP01.WAV
+|AB microphone, including reverberation.      |                          |  
+----------------------------------------------------------------------------------------------------------
+|Scenario 2, small conf room, 7 positions,    | Small, Anechoic, AB      |  SAABP01.WAV
+|AB microphone, no reverb, anechoic.          |                          |  
+----------------------------------------------------------------------------------------------------------
+|Scenario 2, small conf room, 7 positions,    | Small, Anechoic, MS      |  SAMSP05.WAV
+|MS microphone, no reverb, anechoic.          |                          |  
+----------------------------------------------------------------------------------------------------------
+|Scenario 2, small conf room, 7 positions,    | Small, Echoic, AB        |  SEABP02.WAV
+|AB microphone, including reverberation.      |                          |  
+----------------------------------------------------------------------------------------------------------
+|Scenario 2, small conf room, 7 positions,    | Small, Echoic, Binaural  |  SEBIP04.WAV
+|Binaural microphone, including reverberation.|                          |  
+----------------------------------------------------------------------------------------------------------
+|Scenario 2, small conf room, 7 positions,    | Small, Echoic, MS        |  SEMSP07.WAV
+|MS microphone, including reverberation.      |                          |  
+----------------------------------------------------------------------------------------------------------
+
+Stereo impulse responses are stored in WAV format (16-bit integer, 32 kHz).
+WARNING : All these impulse responses were measured with a sampling frequency of 32kHz.
+They are for use with 32 kHz sampled speech files.
+
+References:
+[1] original description, http://ties.itu.int/u/tsg16/sg16/xchange/wp3/0809-Geneva/q10/AC-0809-Q10-22-Ericsson_STL_updates.doc
+[2] original IRs, https://www.itu.int/u/tsg16/sg16/xchange/wp3/q23/g729.1_g718_swbst_qualification/impulse_resp/stereo/FT/
\ No newline at end of file
diff --git a/ivas_processing_scripts/generation/IR/SAABP01.wav b/ivas_processing_scripts/generation/IR/SAABP01.wav
new file mode 100644
index 00000000..180b682a
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SAABP01.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd68dd01200bbfd25bebec4dfc63b8f528a03c88d1307e75d7a6c91eeec8be6e
+size 36764
diff --git a/ivas_processing_scripts/generation/IR/SAABP02.wav b/ivas_processing_scripts/generation/IR/SAABP02.wav
new file mode 100644
index 00000000..f0acab78
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SAABP02.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f145f6f8eb8324c7f3e18c5af5047641e82952603a787e0b7e069d26d5c4ca6
+size 36764
diff --git a/ivas_processing_scripts/generation/IR/SAABP03.wav b/ivas_processing_scripts/generation/IR/SAABP03.wav
new file mode 100644
index 00000000..1efea8d6
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SAABP03.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8493653f497915b35377984c6d79e04aa344ccf44e0d5b8e286fbec492c9c31
+size 36764
diff --git a/ivas_processing_scripts/generation/IR/SAABP04.wav b/ivas_processing_scripts/generation/IR/SAABP04.wav
new file mode 100644
index 00000000..ec788896
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SAABP04.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:36c04e66154b91979160d18faaf02dc226f6d2ed61f63d19227d777bb3459987
+size 36764
diff --git a/ivas_processing_scripts/generation/IR/SAABP05.wav b/ivas_processing_scripts/generation/IR/SAABP05.wav
new file mode 100644
index 00000000..3098f0b4
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SAABP05.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dae758e6b7b3fd8ef3a8d76fa3210f5f412f3286056085e68a1f9ca7a13e9bab
+size 36764
diff --git a/ivas_processing_scripts/generation/IR/SAABP06.wav b/ivas_processing_scripts/generation/IR/SAABP06.wav
new file mode 100644
index 00000000..a4553381
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SAABP06.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5d52622ceb146c340c8a52689468c355c09bd3f71ef1f2f5dae9fb5d217b27e
+size 36764
diff --git a/ivas_processing_scripts/generation/IR/SAABP07.wav b/ivas_processing_scripts/generation/IR/SAABP07.wav
new file mode 100644
index 00000000..8e641a98
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SAABP07.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf6c77ccfa239f5a0cb44a071dcb0d0ca92da0bbc858e4cc060af814ab3ffe3e
+size 36764
diff --git a/ivas_processing_scripts/generation/IR/SAMSP01.wav b/ivas_processing_scripts/generation/IR/SAMSP01.wav
new file mode 100644
index 00000000..7d59592a
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SAMSP01.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ba6e8d380d91e5492338ac98df45e444532b92ff84a71f569673610e59cde136
+size 36764
diff --git a/ivas_processing_scripts/generation/IR/SAMSP02.wav b/ivas_processing_scripts/generation/IR/SAMSP02.wav
new file mode 100644
index 00000000..b8b62cef
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SAMSP02.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dd991ef690a9c86fa00064c56ad3df3ef726d9b6232efaf256b33cbc1ad3ac32
+size 36764
diff --git a/ivas_processing_scripts/generation/IR/SAMSP03.wav b/ivas_processing_scripts/generation/IR/SAMSP03.wav
new file mode 100644
index 00000000..feab358d
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SAMSP03.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a996729d0c2573d4f219d72db60273b986280fac7ae0f5fe0a35524b83a0d95
+size 36764
diff --git a/ivas_processing_scripts/generation/IR/SAMSP04.wav b/ivas_processing_scripts/generation/IR/SAMSP04.wav
new file mode 100644
index 00000000..0f29ec53
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SAMSP04.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dad01524476b6f8a5fc2d4d31f8c1b7589a836d9b98cc4d27201e42481931962
+size 36764
diff --git a/ivas_processing_scripts/generation/IR/SAMSP05.wav b/ivas_processing_scripts/generation/IR/SAMSP05.wav
new file mode 100644
index 00000000..71293903
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SAMSP05.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0f0b5f91b292924c4e1eb1e2d884059720ab5c3eaae05d22230d786f19de7879
+size 36764
diff --git a/ivas_processing_scripts/generation/IR/SAMSP06.wav b/ivas_processing_scripts/generation/IR/SAMSP06.wav
new file mode 100644
index 00000000..0d51fc62
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SAMSP06.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:06e0d7f97b4ce56065d143d19a45ad8c757ed21cf0fe3f8ed05cbedbd966084e
+size 36764
diff --git a/ivas_processing_scripts/generation/IR/SAMSP07.wav b/ivas_processing_scripts/generation/IR/SAMSP07.wav
new file mode 100644
index 00000000..a20ac5f9
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SAMSP07.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cb825349bec07813ea7ccb936948783aed31683805a3daae867568445820f8ea
+size 36764
diff --git a/ivas_processing_scripts/generation/IR/SEABP01.wav b/ivas_processing_scripts/generation/IR/SEABP01.wav
new file mode 100644
index 00000000..6120c6a0
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SEABP01.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a934da1fee82c8131c427680304c9102a3289179697318735b87536d2db6261e
+size 42112
diff --git a/ivas_processing_scripts/generation/IR/SEABP02.wav b/ivas_processing_scripts/generation/IR/SEABP02.wav
new file mode 100644
index 00000000..3dc413d8
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SEABP02.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:21bd1f242bf459bda18ea9e444eedbdf97db20e0956e3600c4e3c03870f1a877
+size 42112
diff --git a/ivas_processing_scripts/generation/IR/SEABP03.wav b/ivas_processing_scripts/generation/IR/SEABP03.wav
new file mode 100644
index 00000000..27d2af1c
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SEABP03.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6bd27e370e9fff391ef37a9e45e3f1583cdcef6ce23cef6135368fb6964674f2
+size 42112
diff --git a/ivas_processing_scripts/generation/IR/SEABP04.wav b/ivas_processing_scripts/generation/IR/SEABP04.wav
new file mode 100644
index 00000000..ed3c9918
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SEABP04.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4399629b729b0ceb8b30f3c994b736557bd8b35a968cb80cba486833b7c54d1
+size 42112
diff --git a/ivas_processing_scripts/generation/IR/SEABP05.wav b/ivas_processing_scripts/generation/IR/SEABP05.wav
new file mode 100644
index 00000000..2e990d65
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SEABP05.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5c7af3d46eea2d738cb1c6e25a351489f9daff2976c365251595cec719b7ebe
+size 42112
diff --git a/ivas_processing_scripts/generation/IR/SEABP06.wav b/ivas_processing_scripts/generation/IR/SEABP06.wav
new file mode 100644
index 00000000..3d1397a0
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SEABP06.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb284bd97e306b890b9ccdd2e7649c602f6fd78774c1b2140b29051126a1fece
+size 42112
diff --git a/ivas_processing_scripts/generation/IR/SEABP07.wav b/ivas_processing_scripts/generation/IR/SEABP07.wav
new file mode 100644
index 00000000..075da1a1
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SEABP07.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a4fce653f7d80f389f3114a1e07688c5ad292e1419a59c6c4630a3bb8f2bf74
+size 42112
diff --git a/ivas_processing_scripts/generation/IR/SEBIP01.wav b/ivas_processing_scripts/generation/IR/SEBIP01.wav
new file mode 100644
index 00000000..a6068236
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SEBIP01.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:55a349cb20898415609ea49187f871ba2dc980d07a1fa36fb655efde96208b4c
+size 42112
diff --git a/ivas_processing_scripts/generation/IR/SEBIP02.wav b/ivas_processing_scripts/generation/IR/SEBIP02.wav
new file mode 100644
index 00000000..10f8a62c
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SEBIP02.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b35e9171ceaeb3e4e00f1e73b337c39c6c933620c39394e3a5ff095535db657a
+size 42112
diff --git a/ivas_processing_scripts/generation/IR/SEBIP03.wav b/ivas_processing_scripts/generation/IR/SEBIP03.wav
new file mode 100644
index 00000000..fd0ec69f
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SEBIP03.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:524e0505f83bc579774e5e36f730b40fcb62b9b10f3a7767cec4389f4689d87b
+size 42112
diff --git a/ivas_processing_scripts/generation/IR/SEBIP04.wav b/ivas_processing_scripts/generation/IR/SEBIP04.wav
new file mode 100644
index 00000000..30be4326
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SEBIP04.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aa2e8e18ef82a299d142fcbfc462b2370472fa202cbe361e1d661c20e21cd4c8
+size 42112
diff --git a/ivas_processing_scripts/generation/IR/SEBIP05.wav b/ivas_processing_scripts/generation/IR/SEBIP05.wav
new file mode 100644
index 00000000..91e57937
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SEBIP05.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:88d9cb43b175c2cf94eb861780e48fdb56da0bc4a2dd4f6034b179fa17dd09ab
+size 42112
diff --git a/ivas_processing_scripts/generation/IR/SEBIP06.wav b/ivas_processing_scripts/generation/IR/SEBIP06.wav
new file mode 100644
index 00000000..eb589f49
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SEBIP06.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca350c682655d3ba8b075e3744adde034783dc87036b8fa9aaf9ccb3500f9286
+size 42112
diff --git a/ivas_processing_scripts/generation/IR/SEBIP07.wav b/ivas_processing_scripts/generation/IR/SEBIP07.wav
new file mode 100644
index 00000000..d8a20381
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SEBIP07.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:79e4520ada475e1c37b8707da8062bbbfb26e617261e0130b7344b2bc1a937c5
+size 42112
diff --git a/ivas_processing_scripts/generation/IR/SEMSP01.wav b/ivas_processing_scripts/generation/IR/SEMSP01.wav
new file mode 100644
index 00000000..4dab142a
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SEMSP01.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:66a37fc3855a0929cf4a4702301bf231fe346f1964b845b9cf464a5bfd3e29ad
+size 42112
diff --git a/ivas_processing_scripts/generation/IR/SEMSP02.wav b/ivas_processing_scripts/generation/IR/SEMSP02.wav
new file mode 100644
index 00000000..d59419c5
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SEMSP02.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1cf267a42add5770e08b756d5577e95459b3efc5e49076ac910bb00aabe879b1
+size 42112
diff --git a/ivas_processing_scripts/generation/IR/SEMSP03.wav b/ivas_processing_scripts/generation/IR/SEMSP03.wav
new file mode 100644
index 00000000..0e2e8205
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SEMSP03.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b4947d0762a6d653690d164c1a0dc09acc9c2bf38e8c28f33b9661d899094cd7
+size 42112
diff --git a/ivas_processing_scripts/generation/IR/SEMSP04.wav b/ivas_processing_scripts/generation/IR/SEMSP04.wav
new file mode 100644
index 00000000..dc665c65
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SEMSP04.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f8a703057836541f8ca3e1e788d95302adcf983e12e1f6481e0743548559eeb
+size 42112
diff --git a/ivas_processing_scripts/generation/IR/SEMSP05.wav b/ivas_processing_scripts/generation/IR/SEMSP05.wav
new file mode 100644
index 00000000..aec9c66f
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SEMSP05.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:12613e8b6f43d6a8df2a4b78961fcacc2956d3b0bd8e3321fdea487ab00679ab
+size 42112
diff --git a/ivas_processing_scripts/generation/IR/SEMSP06.wav b/ivas_processing_scripts/generation/IR/SEMSP06.wav
new file mode 100644
index 00000000..84f990ed
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SEMSP06.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b107956649319df472cfe311e278f73735708957b44f7af6a6e444a33b7cb9d0
+size 42112
diff --git a/ivas_processing_scripts/generation/IR/SEMSP07.wav b/ivas_processing_scripts/generation/IR/SEMSP07.wav
new file mode 100644
index 00000000..bf89445a
--- /dev/null
+++ b/ivas_processing_scripts/generation/IR/SEMSP07.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef5a76c1026510861b8cac697415e6e08810857252b9ce52e0157c6024400bf2
+size 42112
-- 
GitLab


From 8947746678efb2bfac403b7c4e1a331e46dc056d Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Fri, 12 May 2023 14:03:13 +0200
Subject: [PATCH 18/27] simplification of the top-level functions
 generate_[ism|stereo]_items()

---
 item_gen_configs/ISM1_CONFIG.yml              | 11 ++--
 item_gen_configs/ISM2_CONFIG.yml              |  6 --
 item_gen_configs/STEREO_CONFIG.yml            | 10 +--
 .../generation/__init__.py                    | 30 +--------
 .../generation/process_ism_items.py           | 59 ++++++++++-------
 .../generation/process_stereo_items.py        | 64 ++++++++++++-------
 6 files changed, 83 insertions(+), 97 deletions(-)

diff --git a/item_gen_configs/ISM1_CONFIG.yml b/item_gen_configs/ISM1_CONFIG.yml
index 9ba070f7..0f26866a 100644
--- a/item_gen_configs/ISM1_CONFIG.yml
+++ b/item_gen_configs/ISM1_CONFIG.yml
@@ -6,12 +6,6 @@
 ### Output format
 format: "ISM1"
 
-### Date; default = YYYYMMDD_HH.MM.SS
-# date: 2023.06.30
-
-### Deletion of temporary directories containing intermediate processing files, bitstreams etc.; default = false
-# delete_tmp: true
-
 ### Output sampling rate in Hz needed for headerless audio files; default = 48000
 fs: 48000
 
@@ -29,10 +23,13 @@ output_path: "./items_ISM1"
 ### Target loudness in LKFS; default = null (no loudness normalization applied)
 loudness: -26
 
-### Pre-amble and Post-amble length in seconds (default = None)
+### Pre-amble and Post-amble length in seconds (default = 0.0)
 preamble: 0.5
 postamble: 0.5
 
+### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence)
+add_low_level_random_noise: true
+
 
 ################################################
 ### Scene description
diff --git a/item_gen_configs/ISM2_CONFIG.yml b/item_gen_configs/ISM2_CONFIG.yml
index 198571d2..cbbb8b60 100644
--- a/item_gen_configs/ISM2_CONFIG.yml
+++ b/item_gen_configs/ISM2_CONFIG.yml
@@ -6,12 +6,6 @@
 ### Output format
 format: "ISM2"
 
-### Date; default = YYYYMMDD_HH.MM.SS
-# date: 2023.06.30
-
-### Deletion of temporary directories containing intermediate processing files, bitstreams etc.; default = false
-# delete_tmp: true
-
 ### Output sampling rate in Hz needed for headerless audio files; default = 48000
 fs: 48000
 
diff --git a/item_gen_configs/STEREO_CONFIG.yml b/item_gen_configs/STEREO_CONFIG.yml
index cb14747d..8f6cccc3 100644
--- a/item_gen_configs/STEREO_CONFIG.yml
+++ b/item_gen_configs/STEREO_CONFIG.yml
@@ -6,12 +6,6 @@
 ### Output format
 format: "STEREO"
 
-### Date; default = YYYYMMDD_HH.MM.SS
-# date: 2023.06.30
-
-### Deletion of temporary directories containing intermediate processing files, bitstreams etc.; default = false
-# delete_tmp: true
-
 ### Output sampling rate in Hz needed for headerless audio files; default = 48000
 fs: 48000
 
@@ -26,8 +20,8 @@ IR_fs: 32000
 ### Input path to mono files
 input_path: "./items_mono"
 
-### Input path to stereo impulse response files
-IR_path: "./IR"
+### Input path to stereo impulse response files, default = './ivas_processing_scripts/generation/IR'
+# IR_path: "./IR"
 
 ### Output path for generated test items and metadata files
 output_path: "./items_STEREO"
diff --git a/ivas_processing_scripts/generation/__init__.py b/ivas_processing_scripts/generation/__init__.py
index 76d10610..98591883 100755
--- a/ivas_processing_scripts/generation/__init__.py
+++ b/ivas_processing_scripts/generation/__init__.py
@@ -84,36 +84,10 @@ def main(args):
     # generate input items
     if cfg.format.startswith("ISM"):
         # generate ISM items with metadata according to scene description
-        process_ism_items.generate_ism_items(
-            cfg.format,
-            cfg.loudness,
-            cfg.input_path,
-            cfg.output_path,
-            cfg.scenes,
-            logger,
-            fs=cfg.fs,
-            preamble=cfg.preamble,
-            postamble=cfg.postamble,
-            add_low_level_random_noise=getattr(cfg, "add_low_level_random_noise", False), 
-            # TODO@VM dict.get() can provide a default value if the key is not found
-            # please check if this is a viable solution - I kept getting "AttributeError: 'TestConfig' object has no attribute 'add_low_level_random_noise'"
-        )
+        process_ism_items.generate_ism_items(cfg, logger)
     elif cfg.format == "STEREO":
         # generate STEREO items according to scene description
-        process_stereo_items.generate_stereo_items(
-            cfg.format,
-            cfg.loudness,
-            cfg.input_path,
-            cfg.IR_path,
-            cfg.output_path,
-            cfg.scenes,
-            logger,
-            fs=cfg.fs,
-            IR_fs=cfg.IR_fs,
-            preamble=cfg.preamble,
-            postamble=cfg.postamble,
-            add_low_level_random_noise=cfg.add_low_level_random_noise,
-        )
+        process_stereo_items.generate_stereo_items(cfg, logger)
 
     # copy configuration to output directory
     with open(cfg.output_path.joinpath(f"{cfg.format}.yml"), "w") as f:
diff --git a/ivas_processing_scripts/generation/process_ism_items.py b/ivas_processing_scripts/generation/process_ism_items.py
index d788da34..a8c7e228 100644
--- a/ivas_processing_scripts/generation/process_ism_items.py
+++ b/ivas_processing_scripts/generation/process_ism_items.py
@@ -33,12 +33,11 @@
 import csv
 import logging
 import os
+import numpy as np
 from math import floor
 from pathlib import Path
-from typing import Optional
-
-import numpy as np
 
+from ivas_processing_scripts.generation import config
 from ivas_processing_scripts.audiotools import audio, audiofile
 from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness
 
@@ -52,23 +51,34 @@ def csv_formatdata(data):
 
 
 def generate_ism_items(
-    format: str,
-    target_level: int,
-    input_path: Path,
-    output_path: Path,
-    scenes: dict,
+    cfg : config.TestConfig,
     logger: logging.Logger,
-    fs: Optional[int] = 48000,
-    preamble: Optional[float] = 0.0,
-    postamble: Optional[float] = 0.0,
-    add_low_level_random_noise: Optional[bool] = False,
 ):
     """Generate ISM items with metadata from mono items based on scene description"""
 
     # get the number of scenes
-    N_scenes = len(scenes)
-
-    for scene_name, scene in scenes.items():
+    N_scenes = len(cfg.scenes)
+
+    # set the target level
+    if "loudness" not in cfg.__dict__:
+        cfg.loudness = -26
+
+    # set the fs
+    if "fs" not in cfg.__dict__:
+        cfg.fs = 48000
+
+    # set the pre-amble and post-amble
+    if "preamble" not in cfg.__dict__:
+        cfg.preamble = 0.0
+        
+    if "postamble" not in cfg.__dict__:
+        cfg.postamble = 0.0
+           
+    # set the pre-amble and post-amble
+    if "add_low_level_random_noise" not in cfg.__dict__:
+        cfg.add_low_level_random_noise = False
+        
+    for scene_name, scene in cfg.scenes.items():
         logger.info(f"Processing {scene_name} out of {N_scenes} scenes")
 
         # extract the number of audio sources
@@ -89,6 +99,7 @@ def generate_ism_items(
 
         # repeat for all source files
         for i in range(N_sources):
+        
             # parse parameters from the scene description
             source_file = np.atleast_1d(scene["source"])[i]
             source_azi = np.atleast_1d(scene["azimuth"])[i]
@@ -99,7 +110,7 @@ def generate_ism_items(
             )
 
             # read source file
-            x = audio.fromfile("MONO", os.path.join(input_path, source_file), fs=fs)
+            x = audio.fromfile("MONO", os.path.join(cfg.input_path, source_file), fs=cfg.fs)
  
             # get the number of frames (multiple of 20ms)
             N_frames = int(len(x.audio) / x.fs * 50)
@@ -109,7 +120,7 @@ def generate_ism_items(
             # x.audio = x.audio[:N_trim]
 
             # adjust the level of the source file
-            _, scale_factor = get_loudness(x, target_level, "MONO")
+            _, scale_factor = get_loudness(x, cfg.loudness, "MONO")
             x.audio *= scale_factor
 
             # read azimuth information and create array
@@ -271,9 +282,9 @@ def generate_ism_items(
                 y_meta = np.concatenate([y_meta, x_meta])
 
         # append pre-amble and post-amble to all sources
-        if preamble != 0.0:
+        if cfg.preamble != 0.0:
             # ensure that pre-mable is a multiple of 20ms
-            N_pre = int(floor(preamble * 50) / 50 * y.fs)
+            N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs)
 
             # insert all-zero preamble to all sources
             pre = np.zeros((N_pre, y.audio.shape[1]))
@@ -285,9 +296,9 @@ def generate_ism_items(
             )  # !!!! TBD - check if we should insert netrual position or the first position of the metadata
             y_meta = np.concatenate([pre, y_meta], axis=1)
 
-        if postamble != 0.0:
+        if cfg.postamble != 0.0:
             # ensure that post-mable is a multiple of 20ms
-            N_post = int(floor(postamble * 50) / 50 * y.fs)
+            N_post = int(floor(cfg.postamble * 50) / 50 * y.fs)
 
             # append all-zero postamble to all sources
             post = np.zeros((N_post, y.audio.shape[1]))
@@ -300,7 +311,7 @@ def generate_ism_items(
             y_meta = np.concatenate([y_meta, post], axis=1)
 
         # add random noise
-        if add_low_level_random_noise:
+        if cfg.add_low_level_random_noise:
             # create uniformly distributed noise between -4 and 4
             np.random.seed(SEED_RANDOM_NOISE)
             noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype(
@@ -313,7 +324,7 @@ def generate_ism_items(
         # write individual ISM audio streams to the output file in an interleaved format
         output_filename = scene["name"]
         audiofile.write(
-            os.path.join(output_path, output_filename), y.audio, y.fs
+            os.path.join(cfg.output_path, output_filename), y.audio, y.fs
         )  # !!!! TBD: replace all os.path.xxx operations with the Path object
 
         # write individual ISM metadata to output files in .csv format
@@ -322,7 +333,7 @@ def generate_ism_items(
             csv_filename = os.path.normpath(f"{output_filename}.{i}.csv")
 
             with open(
-                os.path.join(output_path, csv_filename),
+                os.path.join(cfg.output_path, csv_filename),
                 "w",
                 newline="",
                 encoding="utf-8",
diff --git a/ivas_processing_scripts/generation/process_stereo_items.py b/ivas_processing_scripts/generation/process_stereo_items.py
index feae1b26..109d0b08 100644
--- a/ivas_processing_scripts/generation/process_stereo_items.py
+++ b/ivas_processing_scripts/generation/process_stereo_items.py
@@ -30,17 +30,15 @@
 #  the United Nations Convention on Contracts on the International Sales of Goods.
 #
 
-
 import csv
 import logging
 import os
+import numpy as np
 from copy import copy
 from math import floor
 from pathlib import Path
-from typing import Optional
-
-import numpy as np
 
+from ivas_processing_scripts.generation import config
 from ivas_processing_scripts.audiotools import audio, audiofile
 from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness
 from ivas_processing_scripts.audiotools.wrappers.reverb import reverb_stereo
@@ -55,30 +53,48 @@ def csv_formatdata(data):
 
 
 def generate_stereo_items(
-    format: str,
-    target_level: int,
-    input_path: Path,
-    IR_path: Path,
-    output_path: Path,
-    scenes: dict,
+    cfg : config.TestConfig,
     logger: logging.Logger,
-    fs: Optional[int] = 48000,
-    IR_fs: Optional[int] = 48000,
-    preamble: Optional[float] = 0.0,
-    postamble: Optional[float] = 0.0,
-    add_low_level_random_noise: Optional[bool] = False,
 ):
     """Generate STEREO items from mono items based on scene description"""
 
     # get the number of scenes
-    N_scenes = len(scenes)
+    N_scenes = len(cfg.scenes)
+    
+    # set the target level
+    if "loudness" not in cfg.__dict__:
+        cfg.loudness = -26
+
+    # set the fs
+    if "fs" not in cfg.__dict__:
+        cfg.fs = 48000
+
+    # set the IR fs
+    if "IR_fs" not in cfg.__dict__:
+        cfg.IR_fs = 48000
+
+    # set the pre-amble and post-amble
+    if "preamble" not in cfg.__dict__:
+        cfg.preamble = 0.0
+        
+    if "postamble" not in cfg.__dict__:
+        cfg.postamble = 0.0
+           
+    # set the IR path
+    if "IR_path" not in cfg.__dict__:
+        cfg.IR_path = os.path.join(os.path.dirname(__file__), "IR")
+        
+    # set the pre-amble and post-amble
+    if "add_low_level_random_noise" not in cfg.__dict__:
+        cfg.add_low_level_random_noise = False
 
-    for scene_name, scene in scenes.items():
+    # repeat for all source files
+    for scene_name, scene in cfg.scenes.items():
         logger.info(f"Processing scene: {scene_name} out of {N_scenes} scenes")
 
         # extract the number of audio sources
         N_sources = len(np.atleast_1d(scene["source"]))
-
+        
         # read the IR (check if stereo or two mono files were provided)
         source_IR = np.atleast_1d(scene["IR"])
         
@@ -99,19 +115,19 @@ def generate_stereo_items(
             )
 
             # read source file
-            x = audio.fromfile("MONO", os.path.join(input_path, source_file), fs=fs)
+            x = audio.fromfile("MONO", os.path.join(cfg.input_path, source_file), fs=cfg.fs)
 
             # get the number of frames (multiple of 20ms)
             N_frames = int(len(x.audio) / x.fs * 50)
             
             # read the IR file
-            IR = audio.fromfile("STEREO", os.path.join(IR_path, IR_file), fs=IR_fs)
+            IR = audio.fromfile("STEREO", os.path.join(cfg.IR_path, IR_file), fs=cfg.IR_fs)
                  
             # convolve with stereo IR
             x_rev = reverb_stereo(x, IR)
 
             # adjust the level of the stereo signal
-            _, scale_factor = get_loudness(x_rev, target_level, "STEREO")
+            _, scale_factor = get_loudness(x_rev, cfg.loudness, "STEREO")
             x_rev.audio *= scale_factor
             
             # shift the second (and all other) source files (positive shift creates overlap, negative shift creates a gap)
@@ -173,9 +189,9 @@ def generate_stereo_items(
                 y.audio += x_rev.audio
 
         # append pre-amble and post-amble to all sources
-        if preamble != 0.0:
+        if cfg.preamble != 0.0:
             # ensure that pre-mable is a multiple of 20ms
-            N_pre = int(floor(preamble * 50) / 50 * y.fs)
+            N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs)
             
             # insert all-zero preamble to all sources
             pre = np.zeros((N_pre, y.audio.shape[1]))
@@ -203,7 +219,7 @@ def generate_stereo_items(
         # write the reverberated audio into output file
         output_filename = scene["name"]
         audiofile.write(
-            os.path.join(output_path, output_filename), y.audio, y.fs
+            os.path.join(cfg.output_path, output_filename), y.audio, y.fs
         )  # !!!! TBD: replace all os.path.xxx operations with the Path object
 
     return
-- 
GitLab


From 67085bed4e1045319c76c6417e53295a34d63688 Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Fri, 12 May 2023 15:15:04 +0200
Subject: [PATCH 19/27] formatting

---
 .../generation/__init__.py                    |  6 +-
 ivas_processing_scripts/generation/config.py  |  2 +-
 .../generation/constants.py                   |  2 +-
 .../generation/process_ism_items.py           | 20 ++---
 .../generation/process_stereo_items.py        | 75 ++++++++++---------
 5 files changed, 57 insertions(+), 48 deletions(-)

diff --git a/ivas_processing_scripts/generation/__init__.py b/ivas_processing_scripts/generation/__init__.py
index 98591883..27ff9021 100755
--- a/ivas_processing_scripts/generation/__init__.py
+++ b/ivas_processing_scripts/generation/__init__.py
@@ -40,7 +40,11 @@ from ivas_processing_scripts.constants import (
     LOGGER_FORMAT,
     LOGGER_SUFFIX,
 )
-from ivas_processing_scripts.generation import config, process_ism_items, process_stereo_items
+from ivas_processing_scripts.generation import (
+    config,
+    process_ism_items,
+    process_stereo_items,
+)
 from ivas_processing_scripts.utils import create_dir
 
 
diff --git a/ivas_processing_scripts/generation/config.py b/ivas_processing_scripts/generation/config.py
index ca9dbcc2..a84b156c 100644
--- a/ivas_processing_scripts/generation/config.py
+++ b/ivas_processing_scripts/generation/config.py
@@ -122,4 +122,4 @@ class TestConfig:
 
         # Report missing keys to the user
         if MISSING_KEYS:
-            raise KeyError(f"The following key(s) must be specified : {MISSING_KEYS}")
\ No newline at end of file
+            raise KeyError(f"The following key(s) must be specified : {MISSING_KEYS}")
diff --git a/ivas_processing_scripts/generation/constants.py b/ivas_processing_scripts/generation/constants.py
index 34001207..c1454730 100644
--- a/ivas_processing_scripts/generation/constants.py
+++ b/ivas_processing_scripts/generation/constants.py
@@ -64,4 +64,4 @@ REQUIRED_KEYS = [
     "input_path",
     "output_path",
     "scenes",
-]
\ No newline at end of file
+]
diff --git a/ivas_processing_scripts/generation/process_ism_items.py b/ivas_processing_scripts/generation/process_ism_items.py
index a8c7e228..b2e09151 100644
--- a/ivas_processing_scripts/generation/process_ism_items.py
+++ b/ivas_processing_scripts/generation/process_ism_items.py
@@ -33,13 +33,14 @@
 import csv
 import logging
 import os
-import numpy as np
 from math import floor
 from pathlib import Path
 
-from ivas_processing_scripts.generation import config
+import numpy as np
+
 from ivas_processing_scripts.audiotools import audio, audiofile
 from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness
+from ivas_processing_scripts.generation import config
 
 SEED_RANDOM_NOISE = 0
 
@@ -51,7 +52,7 @@ def csv_formatdata(data):
 
 
 def generate_ism_items(
-    cfg : config.TestConfig,
+    cfg: config.TestConfig,
     logger: logging.Logger,
 ):
     """Generate ISM items with metadata from mono items based on scene description"""
@@ -70,14 +71,14 @@ def generate_ism_items(
     # set the pre-amble and post-amble
     if "preamble" not in cfg.__dict__:
         cfg.preamble = 0.0
-        
+
     if "postamble" not in cfg.__dict__:
         cfg.postamble = 0.0
-           
+
     # set the pre-amble and post-amble
     if "add_low_level_random_noise" not in cfg.__dict__:
         cfg.add_low_level_random_noise = False
-        
+
     for scene_name, scene in cfg.scenes.items():
         logger.info(f"Processing {scene_name} out of {N_scenes} scenes")
 
@@ -99,7 +100,6 @@ def generate_ism_items(
 
         # repeat for all source files
         for i in range(N_sources):
-        
             # parse parameters from the scene description
             source_file = np.atleast_1d(scene["source"])[i]
             source_azi = np.atleast_1d(scene["azimuth"])[i]
@@ -110,8 +110,10 @@ def generate_ism_items(
             )
 
             # read source file
-            x = audio.fromfile("MONO", os.path.join(cfg.input_path, source_file), fs=cfg.fs)
- 
+            x = audio.fromfile(
+                "MONO", os.path.join(cfg.input_path, source_file), fs=cfg.fs
+            )
+
             # get the number of frames (multiple of 20ms)
             N_frames = int(len(x.audio) / x.fs * 50)
 
diff --git a/ivas_processing_scripts/generation/process_stereo_items.py b/ivas_processing_scripts/generation/process_stereo_items.py
index 109d0b08..82ba54ca 100644
--- a/ivas_processing_scripts/generation/process_stereo_items.py
+++ b/ivas_processing_scripts/generation/process_stereo_items.py
@@ -33,15 +33,16 @@
 import csv
 import logging
 import os
-import numpy as np
 from copy import copy
 from math import floor
 from pathlib import Path
 
-from ivas_processing_scripts.generation import config
+import numpy as np
+
 from ivas_processing_scripts.audiotools import audio, audiofile
 from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness
 from ivas_processing_scripts.audiotools.wrappers.reverb import reverb_stereo
+from ivas_processing_scripts.generation import config
 
 SEED_RANDOM_NOISE = 0
 
@@ -53,14 +54,14 @@ def csv_formatdata(data):
 
 
 def generate_stereo_items(
-    cfg : config.TestConfig,
+    cfg: config.TestConfig,
     logger: logging.Logger,
 ):
     """Generate STEREO items from mono items based on scene description"""
 
     # get the number of scenes
     N_scenes = len(cfg.scenes)
-    
+
     # set the target level
     if "loudness" not in cfg.__dict__:
         cfg.loudness = -26
@@ -76,14 +77,14 @@ def generate_stereo_items(
     # set the pre-amble and post-amble
     if "preamble" not in cfg.__dict__:
         cfg.preamble = 0.0
-        
+
     if "postamble" not in cfg.__dict__:
         cfg.postamble = 0.0
-           
+
     # set the IR path
     if "IR_path" not in cfg.__dict__:
         cfg.IR_path = os.path.join(os.path.dirname(__file__), "IR")
-        
+
     # set the pre-amble and post-amble
     if "add_low_level_random_noise" not in cfg.__dict__:
         cfg.add_low_level_random_noise = False
@@ -94,12 +95,12 @@ def generate_stereo_items(
 
         # extract the number of audio sources
         N_sources = len(np.atleast_1d(scene["source"]))
-        
+
         # read the IR (check if stereo or two mono files were provided)
         source_IR = np.atleast_1d(scene["IR"])
-        
+
         # read the overlap length
-        if 'overlap' in scene.keys():
+        if "overlap" in scene.keys():
             source_overlap = float(scene["overlap"])
         else:
             source_overlap = 0.0
@@ -109,51 +110,53 @@ def generate_stereo_items(
             # parse parameters from the scene description
             source_file = np.atleast_1d(scene["source"])[i]
             IR_file = np.atleast_1d(scene["IR"])[i]
-            
-            logger.info(
-                f"Convolving {source_file} with {source_IR}"
-            )
+
+            logger.info(f"Convolving {source_file} with {source_IR}")
 
             # read source file
-            x = audio.fromfile("MONO", os.path.join(cfg.input_path, source_file), fs=cfg.fs)
+            x = audio.fromfile(
+                "MONO", os.path.join(cfg.input_path, source_file), fs=cfg.fs
+            )
 
             # get the number of frames (multiple of 20ms)
             N_frames = int(len(x.audio) / x.fs * 50)
-            
+
             # read the IR file
-            IR = audio.fromfile("STEREO", os.path.join(cfg.IR_path, IR_file), fs=cfg.IR_fs)
-                 
+            IR = audio.fromfile(
+                "STEREO", os.path.join(cfg.IR_path, IR_file), fs=cfg.IR_fs
+            )
+
             # convolve with stereo IR
             x_rev = reverb_stereo(x, IR)
 
             # adjust the level of the stereo signal
             _, scale_factor = get_loudness(x_rev, cfg.loudness, "STEREO")
             x_rev.audio *= scale_factor
-            
+
             # shift the second (and all other) source files (positive shift creates overlap, negative shift creates a gap)
             if i > 0 and source_overlap != 0.0:
                 # get the length of the first source file
-                N_delay = len(y.audio[:,0])
-                
+                N_delay = len(y.audio[:, 0])
+
                 # add the shift
                 N_delay += int(source_overlap * x.fs)
-            
+
                 # ensure delay is a multiple of 20ms
                 # N_delay = int(floor(source_shift * 50) / 50 * x.fs)
-            
+
                 # insert all-zero preamble
                 pre = np.zeros((N_delay, x.audio.shape[1]))
                 x.audio = np.concatenate([pre, x.audio])
-                
-            # pad with zeros to ensure that the signal length is a multiple of 20ms  
+
+            # pad with zeros to ensure that the signal length is a multiple of 20ms
             N_frame = x.fs / 50
             if len(x.audio) % N_frame != 0:
                 N_pad = int(N_frame - len(x.audio) % N_frame)
-                
+
                 # insert all-zero preamble
                 pre = np.zeros((N_pad, x.audio.shape[1]))
                 x.audio = np.concatenate([pre, x.audio])
-               
+
             # add source signal to the array of source signals
             y.fs = x.fs
             if y.audio is None:
@@ -192,30 +195,30 @@ def generate_stereo_items(
         if cfg.preamble != 0.0:
             # ensure that pre-mable is a multiple of 20ms
             N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs)
-            
+
             # insert all-zero preamble to all sources
             pre = np.zeros((N_pre, y.audio.shape[1]))
             y.audio = np.concatenate([pre, y.audio])
-        
+
         if postamble != 0.0:
             # ensure that post-mable is a multiple of 20ms
             N_post = int(floor(postamble * 50) / 50 * y.fs)
-            
+
             # append all-zero postamble to all sources
             post = np.zeros((N_post, y.audio.shape[1]))
             y.audio = np.concatenate([y.audio, post])
-            
+
         # add random noise
         if add_low_level_random_noise:
             # create uniformly distributed noise between -4 and 4
             np.random.seed(SEED_RANDOM_NOISE)
-            noise = np.random.randint(
-                low=-4, high=5, size=y.audio.shape
-            ).astype("float")
-            
+            noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype(
+                "float"
+            )
+
             # superimpose
             y.audio += noise
-            
+
         # write the reverberated audio into output file
         output_filename = scene["name"]
         audiofile.write(
-- 
GitLab


From 1fa207776bc0b272f1b845dd6a3fa66afabff519 Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Fri, 12 May 2023 15:19:57 +0200
Subject: [PATCH 20/27] moving legal notice of using ITU-T IR responses to the
 proper place

---
 .../IR/README.TXT => thirdPartyLegalNotices/REVERB_IR.TXT         | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename ivas_processing_scripts/generation/IR/README.TXT => thirdPartyLegalNotices/REVERB_IR.TXT (100%)

diff --git a/ivas_processing_scripts/generation/IR/README.TXT b/thirdPartyLegalNotices/REVERB_IR.TXT
similarity index 100%
rename from ivas_processing_scripts/generation/IR/README.TXT
rename to thirdPartyLegalNotices/REVERB_IR.TXT
-- 
GitLab


From fa6708f39eb4c1f2e212d5638dbfc00500601a12 Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Fri, 12 May 2023 15:29:10 +0200
Subject: [PATCH 21/27] cleanup + fix unknown variables

---
 .../audiotools/wrappers/reverb.py                 |  9 ++++-----
 ivas_processing_scripts/generation/config.py      |  1 +
 .../generation/process_ism_items.py               |  5 -----
 .../generation/process_stereo_items.py            | 15 ++++-----------
 4 files changed, 9 insertions(+), 21 deletions(-)

diff --git a/ivas_processing_scripts/audiotools/wrappers/reverb.py b/ivas_processing_scripts/audiotools/wrappers/reverb.py
index 4f4de5dd..d0f04677 100644
--- a/ivas_processing_scripts/audiotools/wrappers/reverb.py
+++ b/ivas_processing_scripts/audiotools/wrappers/reverb.py
@@ -30,11 +30,10 @@
 #  the United Nations Convention on Contracts on the International Sales of Goods.
 #
 
-import os.path
 from copy import copy
 from pathlib import Path
 from tempfile import TemporaryDirectory
-from typing import Optional, Union
+from typing import Optional
 
 import numpy as np
 from scipy.fft import fft
@@ -91,7 +90,7 @@ def reverb(
             tmp_input.fs = IR.fs
 
         # write input audio signal to temporary file in .pcm format
-        tmp_input_file = tmp_dir.joinpath(f"tmp_reverbIn.pcm")
+        tmp_input_file = tmp_dir.joinpath("tmp_reverbIn.pcm")
         write(tmp_input_file, tmp_input.audio, tmp_input.fs)
 
         # down-scale IR to prevent saturation
@@ -101,7 +100,7 @@ def reverb(
 
         # write IR to temporary file in .pcm format
         # note: the reverb tool expects 32b float format
-        tmp_IR_file = tmp_dir.joinpath(f"tmp_IR.pcm")
+        tmp_IR_file = tmp_dir.joinpath("tmp_IR.pcm")
         write(tmp_IR_file, IR.audio.astype(np.float32), IR.fs, dtype=np.float32)
 
         # set up the 'reverb' command line
@@ -114,7 +113,7 @@ def reverb(
             cmd.extend(["-align", str(align)])
 
         # append temporary filenames
-        tmp_output_file = tmp_dir.joinpath(f"tmp_reverbOut.pcm")
+        tmp_output_file = tmp_dir.joinpath("tmp_reverbOut.pcm")
         cmd.extend([tmp_input_file, tmp_IR_file, tmp_output_file])
 
         # run the 'reverb' command
diff --git a/ivas_processing_scripts/generation/config.py b/ivas_processing_scripts/generation/config.py
index a84b156c..1947f8d5 100644
--- a/ivas_processing_scripts/generation/config.py
+++ b/ivas_processing_scripts/generation/config.py
@@ -123,3 +123,4 @@ class TestConfig:
         # Report missing keys to the user
         if MISSING_KEYS:
             raise KeyError(f"The following key(s) must be specified : {MISSING_KEYS}")
+
diff --git a/ivas_processing_scripts/generation/process_ism_items.py b/ivas_processing_scripts/generation/process_ism_items.py
index b2e09151..bb2e6523 100644
--- a/ivas_processing_scripts/generation/process_ism_items.py
+++ b/ivas_processing_scripts/generation/process_ism_items.py
@@ -34,7 +34,6 @@ import csv
 import logging
 import os
 from math import floor
-from pathlib import Path
 
 import numpy as np
 
@@ -117,10 +116,6 @@ def generate_ism_items(
             # get the number of frames (multiple of 20ms)
             N_frames = int(len(x.audio) / x.fs * 50)
 
-            # trim the source signal to align to 20ms boundary
-            # N_trim = int(N_frames * x.fs / 50)
-            # x.audio = x.audio[:N_trim]
-
             # adjust the level of the source file
             _, scale_factor = get_loudness(x, cfg.loudness, "MONO")
             x.audio *= scale_factor
diff --git a/ivas_processing_scripts/generation/process_stereo_items.py b/ivas_processing_scripts/generation/process_stereo_items.py
index 82ba54ca..98b4129d 100644
--- a/ivas_processing_scripts/generation/process_stereo_items.py
+++ b/ivas_processing_scripts/generation/process_stereo_items.py
@@ -30,14 +30,10 @@
 #  the United Nations Convention on Contracts on the International Sales of Goods.
 #
 
-import csv
 import logging
 import os
-from copy import copy
-from math import floor
-from pathlib import Path
-
 import numpy as np
+from math import floor
 
 from ivas_processing_scripts.audiotools import audio, audiofile
 from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness
@@ -118,9 +114,6 @@ def generate_stereo_items(
                 "MONO", os.path.join(cfg.input_path, source_file), fs=cfg.fs
             )
 
-            # get the number of frames (multiple of 20ms)
-            N_frames = int(len(x.audio) / x.fs * 50)
-
             # read the IR file
             IR = audio.fromfile(
                 "STEREO", os.path.join(cfg.IR_path, IR_file), fs=cfg.IR_fs
@@ -200,16 +193,16 @@ def generate_stereo_items(
             pre = np.zeros((N_pre, y.audio.shape[1]))
             y.audio = np.concatenate([pre, y.audio])
 
-        if postamble != 0.0:
+        if cfg.postamble != 0.0:
             # ensure that post-mable is a multiple of 20ms
-            N_post = int(floor(postamble * 50) / 50 * y.fs)
+            N_post = int(floor(cfg.postamble * 50) / 50 * y.fs)
 
             # append all-zero postamble to all sources
             post = np.zeros((N_post, y.audio.shape[1]))
             y.audio = np.concatenate([y.audio, post])
 
         # add random noise
-        if add_low_level_random_noise:
+        if cfg.add_low_level_random_noise:
             # create uniformly distributed noise between -4 and 4
             np.random.seed(SEED_RANDOM_NOISE)
             noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype(
-- 
GitLab


From a221268b0db9a0700c9b94f46bdcea19da87bc42 Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Fri, 12 May 2023 16:08:50 +0200
Subject: [PATCH 22/27] formatting

---
 ivas_processing_scripts/generation/config.py               | 1 -
 ivas_processing_scripts/generation/process_stereo_items.py | 3 ++-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ivas_processing_scripts/generation/config.py b/ivas_processing_scripts/generation/config.py
index 1947f8d5..a84b156c 100644
--- a/ivas_processing_scripts/generation/config.py
+++ b/ivas_processing_scripts/generation/config.py
@@ -123,4 +123,3 @@ class TestConfig:
         # Report missing keys to the user
         if MISSING_KEYS:
             raise KeyError(f"The following key(s) must be specified : {MISSING_KEYS}")
-
diff --git a/ivas_processing_scripts/generation/process_stereo_items.py b/ivas_processing_scripts/generation/process_stereo_items.py
index 98b4129d..11b19b43 100644
--- a/ivas_processing_scripts/generation/process_stereo_items.py
+++ b/ivas_processing_scripts/generation/process_stereo_items.py
@@ -32,9 +32,10 @@
 
 import logging
 import os
-import numpy as np
 from math import floor
 
+import numpy as np
+
 from ivas_processing_scripts.audiotools import audio, audiofile
 from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness
 from ivas_processing_scripts.audiotools.wrappers.reverb import reverb_stereo
-- 
GitLab


From 6a6e89e96f412af6c59c45c231157f85924b3400 Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Mon, 15 May 2023 10:10:07 +0200
Subject: [PATCH 23/27] fix incorrect overlap handling

---
 .../generation/process_ism_items.py           |  2 +-
 .../generation/process_stereo_items.py        | 26 +++++++++----------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/ivas_processing_scripts/generation/process_ism_items.py b/ivas_processing_scripts/generation/process_ism_items.py
index bb2e6523..7bd682a7 100644
--- a/ivas_processing_scripts/generation/process_ism_items.py
+++ b/ivas_processing_scripts/generation/process_ism_items.py
@@ -193,7 +193,7 @@ def generate_ism_items(
                 N_delay = len(y.audio[:, 0])
 
                 # add the shift
-                N_delay += int(source_overlap * x.fs)
+                N_delay += int(-source_overlap * x.fs)
 
                 # ensure delay is a multiple of 20ms
                 # N_delay = int(floor(source_shift * 50) / 50 * x.fs)
diff --git a/ivas_processing_scripts/generation/process_stereo_items.py b/ivas_processing_scripts/generation/process_stereo_items.py
index 11b19b43..9498e2cf 100644
--- a/ivas_processing_scripts/generation/process_stereo_items.py
+++ b/ivas_processing_scripts/generation/process_stereo_items.py
@@ -121,11 +121,11 @@ def generate_stereo_items(
             )
 
             # convolve with stereo IR
-            x_rev = reverb_stereo(x, IR)
+            x = reverb_stereo(x, IR)
 
             # adjust the level of the stereo signal
-            _, scale_factor = get_loudness(x_rev, cfg.loudness, "STEREO")
-            x_rev.audio *= scale_factor
+            _, scale_factor = get_loudness(x, cfg.loudness, "STEREO")
+            x.audio *= scale_factor
 
             # shift the second (and all other) source files (positive shift creates overlap, negative shift creates a gap)
             if i > 0 and source_overlap != 0.0:
@@ -133,7 +133,7 @@ def generate_stereo_items(
                 N_delay = len(y.audio[:, 0])
 
                 # add the shift
-                N_delay += int(source_overlap * x.fs)
+                N_delay += int(-source_overlap * x.fs)
 
                 # ensure delay is a multiple of 20ms
                 # N_delay = int(floor(source_shift * 50) / 50 * x.fs)
@@ -154,36 +154,36 @@ def generate_stereo_items(
             # add source signal to the array of source signals
             y.fs = x.fs
             if y.audio is None:
-                y.audio = x_rev.audio
+                y.audio = x.audio
             else:
                 # pad with zeros to have equal length of all source signals
-                if x_rev.audio.shape[0] > y.audio.shape[0]:
+                if x.audio.shape[0] > y.audio.shape[0]:
                     y.audio = np.vstack(
                         (
                             y.audio,
                             np.zeros(
                                 (
-                                    x_rev.audio.shape[0] - y.audio.shape[0],
+                                    x.audio.shape[0] - y.audio.shape[0],
                                     y.audio.shape[1],
                                 )
                             ),
                         )
                     )
-                elif y.audio.shape[0] > x_rev.audio.shape[0]:
-                    x_rev.audio = np.vstack(
+                elif y.audio.shape[0] > x.audio.shape[0]:
+                    x.audio = np.vstack(
                         (
-                            x_rev.audio,
+                            x.audio,
                             np.zeros(
                                 (
-                                    y.audio.shape[0] - x_rev.audio.shape[0],
-                                    x_rev.audio.shape[1],
+                                    y.audio.shape[0] - x.audio.shape[0],
+                                    x.audio.shape[1],
                                 )
                             ),
                         )
                     )
 
                 # superimpose
-                y.audio += x_rev.audio
+                y.audio += x.audio
 
         # append pre-amble and post-amble to all sources
         if cfg.preamble != 0.0:
-- 
GitLab


From eb7fbabcd0b0bb63c741c3a9d084466d50d9e656 Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Tue, 16 May 2023 09:53:49 +0200
Subject: [PATCH 24/27] fix typo in the .yml file

---
 item_gen_configs/ISM2_CONFIG.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/item_gen_configs/ISM2_CONFIG.yml b/item_gen_configs/ISM2_CONFIG.yml
index cbbb8b60..3f3c4fb8 100644
--- a/item_gen_configs/ISM2_CONFIG.yml
+++ b/item_gen_configs/ISM2_CONFIG.yml
@@ -175,7 +175,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [10, 110]
         elevation: [0, 60]
-        shift: [0, 1]     
+        overlap: [0, 1]     
   
     c5: 
         name: "G5S5.wav"
-- 
GitLab


From 7bcde6443393bd1512cda75414dc455325c438f4 Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Tue, 16 May 2023 09:56:24 +0200
Subject: [PATCH 25/27] change overlap/gap to -1.0/+1.0s

---
 item_gen_configs/ISM2_CONFIG.yml   | 76 +++++++++++++++---------------
 item_gen_configs/STEREO_CONFIG.yml | 76 +++++++++++++++---------------
 2 files changed, 76 insertions(+), 76 deletions(-)

diff --git a/item_gen_configs/ISM2_CONFIG.yml b/item_gen_configs/ISM2_CONFIG.yml
index 3f3c4fb8..c9b749a5 100644
--- a/item_gen_configs/ISM2_CONFIG.yml
+++ b/item_gen_configs/ISM2_CONFIG.yml
@@ -24,8 +24,8 @@ output_path: "./items_ISM2"
 loudness: -26
 
 ### Pre-amble and Post-amble length in seconds (default = 0.0)
-preamble: 0.5
-postamble: 0.5
+preamble: 1.0
+postamble: 1.0
 
 ### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence)
 add_low_level_random_noise: true
@@ -55,7 +55,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [0, 50]
         elevation: [0, 0]
-        overlap: -0.5
+        overlap: -1.0
         
     a2: 
         name: "G6S2.wav"
@@ -63,7 +63,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [50, 350]
         elevation: [0, 0]
-        overlap: -0.5
+        overlap: -1.0
         
     a3: 
         name: "G5S3.wav"
@@ -71,7 +71,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [40, 290]
         elevation: [0, 0]
-        overlap: -0.5
+        overlap: -1.0
 
     a4: 
         name: "G4S4.wav"
@@ -79,7 +79,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [30, 230]
         elevation: [15, 15]
-        overlap: -0.5
+        overlap: -1.0
 
     a5: 
         name: "G3S5.wav"
@@ -87,7 +87,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [20, 170]
         elevation: [15, 15]
-        overlap: -0.5
+        overlap: -1.0
 
     a6: 
         name: "G2S6.wav"
@@ -95,7 +95,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [10, 110]
         elevation: [15, 15]
-        overlap: -0.5
+        overlap: -1.0
 
     b1: 
         name: "G2S1.wav"
@@ -103,7 +103,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [20, 170]
         elevation: [30, 30]
-        overlap: 0.5
+        overlap: 1.0
  
     b2: 
         name: "G1S2.wav"
@@ -111,7 +111,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [10, 110]
         elevation: [30, 30]
-        overlap: 0.5
+        overlap: 1.0
  
     b3: 
         name: "G6S3.wav"
@@ -119,7 +119,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [0, 50]
         elevation: [30, 30]
-        overlap: 0.5
+        overlap: 1.0
  
     b4: 
         name: "G5S4.wav"
@@ -127,7 +127,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [50, 350]
         elevation: [60, 60]
-        overlap: 0.5 
+        overlap: 1.0 
 
     b5: 
         name: "G4S5.wav"
@@ -135,7 +135,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [40, 290]
         elevation: [60, 60]
-        overlap: 0.5 
+        overlap: 1.0 
 
     b6: 
         name: "G3S6.wav"
@@ -143,7 +143,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [30, 230]
         elevation: [60, 60]
-        overlap: 0.5 
+        overlap: 1.0 
 
     c1: 
         name: "G3S1.wav"
@@ -151,7 +151,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [40, 290]
         elevation: [0, 60]
-        overlap: -0.5 
+        overlap: -1.0 
 
     c2: 
         name: "G2S2.wav"
@@ -159,7 +159,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [30, 230]
         elevation: [0, 60]
-        overlap: -0.5 
+        overlap: -1.0 
   
     c3: 
         name: "G1S3.wav"
@@ -167,7 +167,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [20, 170]
         elevation: [0, 60]
-        overlap: -0.5   
+        overlap: -1.0   
   
     c4: 
         name: "G6S4.wav"
@@ -175,7 +175,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [10, 110]
         elevation: [0, 60]
-        overlap: [0, 1]     
+        overlap: -1.0     
   
     c5: 
         name: "G5S5.wav"
@@ -183,7 +183,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [0, 50]
         elevation: [0, 60]
-        overlap: -0.5     
+        overlap: -1.0     
   
     c6: 
         name: "G4S6.wav"
@@ -191,7 +191,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [50, 350]
         elevation: [0, 60]
-        overlap: -0.5      
+        overlap: -1.0      
  
     d1: 
         name: "G4S1.wav"
@@ -199,7 +199,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [50, "180:1:120 + 360"]
         elevation: [0, 60]
-        overlap: 0.5   
+        overlap: 1.0   
         
     d2: 
         name: "G3S2.wav"
@@ -207,7 +207,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [300, "-70:-1:-10 - 360"]
         elevation: [0, 60]
-        overlap: 0.5   
+        overlap: 1.0   
         
     d3: 
         name: "G2S3.wav"
@@ -215,7 +215,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [250, "-20:-1:-320"]
         elevation: [0, 60]
-        overlap: 0.5          
+        overlap: 1.0          
  
     d4: 
         name: "G1S4.wav"
@@ -223,7 +223,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [200, "30:-1:-270"]
         elevation: [0, 60]
-        overlap: 0.5  
+        overlap: 1.0  
  
     d5: 
         name: "G6S5.wav"
@@ -231,7 +231,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [150, "80:1:20 + 360"]
         elevation: [0, 60]
-        overlap: 0.5   
+        overlap: 1.0   
  
     d6: 
         name: "G5S6.wav"
@@ -239,7 +239,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: [100, "130:1:70 + 360"]
         elevation: [0, 60]
-        overlap: 0.5   
+        overlap: 1.0   
  
     e1: 
         name: "G5S1.wav"
@@ -247,7 +247,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: ["80:1:20 + 360", "80:1:20 + 360"]
         elevation: [10, 60]
-        overlap: 0.5
+        overlap: 1.0
  
     e2: 
         name: "G4S2.wav"
@@ -255,7 +255,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: ["130:1:70 + 360", "130:1:70 + 360"]
         elevation: [10, 60]
-        overlap: 0.5    
+        overlap: 1.0    
         
     e3: 
         name: "G3S3.wav"
@@ -263,7 +263,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: ["180:1:120 + 360", "180:1:120 + 360"]
         elevation: [10, 60]
-        overlap: 0.5            
+        overlap: 1.0            
   
     e4: 
         name: "G2S4.wav"
@@ -271,7 +271,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: ["-70:-1:-10 - 360", "-70:-1:-10 - 360"]
         elevation: [10, 60]
-        overlap: 0.5    
+        overlap: 1.0    
   
     e5: 
         name: "G1S5.wav"
@@ -279,7 +279,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: ["-20:-1:-320", "-20:-1:-320"]
         elevation: [10, 60]
-        overlap: 0.5   
+        overlap: 1.0   
   
     e6: 
         name: "G6S6.wav"
@@ -287,7 +287,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: ["30:-1:-270", "30:-1:-270"]
         elevation: [10, 60]
-        overlap: 0.5     
+        overlap: 1.0     
  
     f1: 
         name: "G6S1.wav"
@@ -295,7 +295,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: ["60:1:0 + 360", "60:-1:120 - 360"]
         elevation: [20, 50]
-        overlap: -0.5    
+        overlap: -1.0    
  
     f2: 
         name: "G5S2.wav"
@@ -303,7 +303,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: ["0:1:300", "0:-1:60 - 360"]
         elevation: [20, 50]
-        overlap: -0.5   
+        overlap: -1.0   
   
     f3: 
         name: "G4S3.wav"
@@ -311,7 +311,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: ["300:1:240 + 360", "300:-1:0"]
         elevation: [20, 50]
-        overlap: -0.5     
+        overlap: -1.0     
   
     f4: 
         name: "G3S4.wav"
@@ -319,7 +319,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: ["240:1:180 + 360", "240:-1:-60"]
         elevation: [20, 50]
-        overlap: -0.5  
+        overlap: -1.0  
   
     f5: 
         name: "G2S5.wav"
@@ -327,7 +327,7 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: ["180:1:120 + 360", "180:-1:-120"]
         elevation: [20, 50]
-        overlap: -0.5    
+        overlap: -1.0    
   
     f6: 
         name: "G1S6.wav"
@@ -335,5 +335,5 @@ scenes:
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         azimuth: ["120:1:60 + 360", "120:-1:180 - 360"]
         elevation: [20, 50]
-        overlap: -0.5      
+        overlap: -1.0      
   
\ No newline at end of file
diff --git a/item_gen_configs/STEREO_CONFIG.yml b/item_gen_configs/STEREO_CONFIG.yml
index 8f6cccc3..7dd1a956 100644
--- a/item_gen_configs/STEREO_CONFIG.yml
+++ b/item_gen_configs/STEREO_CONFIG.yml
@@ -30,8 +30,8 @@ output_path: "./items_STEREO"
 loudness: -26
 
 ### Pre-amble and Post-amble length in seconds (default = 0.0)
-preamble: 0.5
-postamble: 0.5
+preamble: 1.0
+postamble: 1.0
 
 ### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence)
 add_low_level_random_noise: true
@@ -54,250 +54,250 @@ scenes:
         description: "Two speakers sitting at oval table in opposite corners in a large echoic conference room."
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["LEABP04.wav", "LEABP11.wav"]
-        overlap: 0.5
+        overlap: 1.0
         
     a2: 
         name: "G6S2.wav"
         description: "Two speakers sitting at oval table in opposite corners in a large echoic conference room."
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["LEABP05.wav", "LEABP11.wav"]
-        overlap: 0.5
+        overlap: 1.0
         
     a3: 
         name: "G5S3.wav"
         description: "Two speakers sitting at oval table in opposite corners in a large echoic conference room."
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["LEABP06.wav", "LEABP11.wav"]
-        overlap: 0.5
+        overlap: 1.0
 
     a4: 
         name: "G4S4.wav"
         description: "Two speakers sitting at oval table in opposite corners in a large echoic conference room."
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["LEABP05.wav", "LEABP10.wav"]
-        overlap: -0.5
+        overlap: -1.0
 
     a5: 
         name: "G3S5.wav"
         description: "Two speakers sitting at oval table in opposite corners in a large echoic conference room."
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["LEABP05.wav", "LEABP11.wav"]
-        overlap: -0.5
+        overlap: -1.0
 
     a6: 
         name: "G2S6.wav"
         description: "Two speakers sitting at oval table in opposite corners in a large echoic conference room."
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["LEABP05.wav", "LEABP12.wav"]
-        overlap: -0.5
+        overlap: -1.0
 
     b1: 
         name: "G2S1.wav"
         description: "Two speakers sitting at oval table side by side in a large anechoic conference room."
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["LAABP05.wav", "LAABP06.wav"]
-        overlap: -0.5
+        overlap: -1.0
  
     b2: 
         name: "G1S2.wav"
         description: "Two speakers sitting at oval table side by side in a large anechoic conference room."
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["LAABP07.wav", "LAABP08.wav"]
-        overlap: 0.5
+        overlap: 1.0
  
     b3: 
         name: "G6S3.wav"
         description: "Two speakers sitting at oval table side by side in a large anechoic conference room."
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["LAABP09.wav", "LAABP10.wav"]
-        overlap: 0.5
+        overlap: 1.0
  
     b4: 
         name: "G5S4.wav"
         description: "Two speakers sitting at oval table side by side in a large anechoic conference room."
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["LAABP11.wav", "LAABP12.wav"]
-        overlap: -0.5 
+        overlap: -1.0 
 
     b5: 
         name: "G4S5.wav"
         description: "Two speakers sitting at oval table side by side in a large anechoic conference room."
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["LAABP01.wav", "LAABP02.wav"]
-        overlap: -0.5 
+        overlap: -1.0 
 
     b6: 
         name: "G3S6.wav"
         description: "Two speakers sitting at oval table side by side in a large anechoic conference room."
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["LAABP03.wav", "LAABP04.wav"]
-        overlap: -0.5 
+        overlap: -1.0 
 
     c1: 
         name: "G3S1.wav"
         description: "One talker sitting at table in a small anechoic conference room."
         source: ["test_single.wav"]
         IR: ["SAMSP01.wav"]
-        overlap: -0.5
+        overlap: -1.0
 
     c2: 
         name: "G2S2.wav"
         description: "One talker sitting at table in a small anechoic conference room."
         source: ["test_single.wav"]
         IR: ["SAMSP04.wav"]
-        overlap: -0.5
+        overlap: -1.0
   
     c3: 
         name: "G1S3.wav"
         description: "One talker sitting at table in a small anechoic conference room."
         source: ["test_single.wav"]
         IR: ["SAMSP07.wav"]
-        overlap: -0.5
+        overlap: -1.0
   
     c4: 
         name: "G6S4.wav"
         description: "One talker sitting at table in a small echoic conference room."
         source: ["test_single.wav"]
         IR: ["SEABP01.wav"]
-        overlap: -0.5
+        overlap: -1.0
   
     c5: 
         name: "G5S5.wav"
         description: "One talker sitting at table in a small echoic conference room."
         source: ["test_single.wav"]
         IR: ["SEABP03.wav"]
-        overlap: -0.5
+        overlap: -1.0
   
     c6: 
         name: "G4S6.wav"
         description: "One talker sitting at table in a small echoic conference room."
         source: ["test_single.wav"]
         IR: ["SEABP06.wav"]
-        overlap: -0.5
+        overlap: -1.0
  
     d1: 
         name: "G4S1.wav"
         description: "One talker sitting at table in a small anechoic conference room."
         source: ["test_single.wav"]
         IR: ["SEBIP01.wav"]
-        overlap: -0.5  
+        overlap: -1.0  
         
     d2: 
         name: "G3S2.wav"
         description: "One talker sitting at table in a small anechoic conference room."
         source: ["test_single.wav"]
         IR: ["SEBIP04.wav"]
-        overlap: -0.5  
+        overlap: -1.0  
         
     d3: 
         name: "G3S2.wav"
         description: "One talker sitting at table in a small anechoic conference room."
         source: ["test_single.wav"]
         IR: ["SEBIP07.wav"]
-        overlap: -0.5  
+        overlap: -1.0  
  
     d4: 
         name: "G1S4.wav"
         description: "One talker sitting at table in a small echoic conference room."
         source: ["test_single.wav"]
         IR: ["SEBIP07.wav"]
-        overlap: -0.5  
+        overlap: -1.0  
  
     d5: 
         name: "G6S5.wav"
         description: "One talker sitting at table in a small echoic conference room."
         source: ["test_single.wav"]
         IR: ["SEBIP07.wav"]
-        overlap: -0.5  
+        overlap: -1.0  
  
     d6: 
         name: "G5S6.wav"
         description: "One talker sitting at table in a small echoic conference room."
         source: ["test_single.wav"]
         IR: ["SEBIP07.wav"]
-        overlap: -0.5  
+        overlap: -1.0  
  
     e1: 
         name: "G5S1.wav"
         description: "Two talkers sitting in a room."
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["SEMSP01.wav", "SEMSP03.wav"]
-        overlap: 0.5
+        overlap: 1.0
  
     e2: 
         name: "G4S2.wav"
         description: "Two talkers sitting in a room."
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["SEMSP01.wav", "SEMSP05.wav"]
-        overlap: 0.5
+        overlap: 1.0
         
     e3: 
         name: "G3S3.wav"
         description: "Two talkers sitting in a room."
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["SEMSP01.wav", "SEMSP07.wav"]
-        overlap: 0.5
+        overlap: 1.0
   
     e4: 
         name: "G2S4.wav"
         description: "Two talkers sitting in a room."
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["SEMSP03.wav", "SEMSP04.wav"]
-        overlap: -0.5
+        overlap: -1.0
   
     e5: 
         name: "G1S5.wav"
         description: "Two talkers sitting in a room."
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["SEMSP05.wav", "SEMSP07.wav"]
-        overlap: -0.5
+        overlap: -1.0
   
     e6: 
         name: "G6S6.wav"
         description: "Two talkers sitting in a room."
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["SEMSP06.wav", "SEMSP02.wav"]
-        overlap: -0.5
+        overlap: -1.0
  
     f1: 
         name: "G6S1.wav"
         description: "Two talkers sitting in a room."
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["SEBIP05.wav", "SEBIP01.wav"]
-        overlap: 0.5
+        overlap: 1.0
  
     f2: 
         name: "G5S2.wav"
         description: "Two talkers sitting in a room."
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["SEBIP07.wav", "SEBIP01.wav"]
-        overlap: 0.5
+        overlap: 1.0
   
     f3: 
         name: "G4S3.wav"
         description: "Two talkers sitting in a room."
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["SEBIP04.wav", "SEBIP01.wav"]
-        overlap: 0.5
+        overlap: 1.0
   
     f4: 
         name: "G3S4.wav"
         description: "Two talkers sitting in a room."
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["SEBIP02.wav", "SEBIP06.wav"]
-        overlap: -0.5
+        overlap: -1.0
   
     f5: 
         name: "G2S5.wav"
         description: "Two talkers sitting in a room."
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["SEBIP02.wav", "SEBIP06.wav"]
-        overlap: -0.5
+        overlap: -1.0
   
     f6: 
         name: "G1S6.wav"
         description: "Two talkers sitting in a room."
         source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"]
         IR: ["SEBIP03.wav", "SEBIP04.wav"]
-        overlap: -0.5
+        overlap: -1.0
   
\ No newline at end of file
-- 
GitLab


From 3ea660b926e92d9aaece18db73a3925a17bacef3 Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Tue, 16 May 2023 11:01:05 +0200
Subject: [PATCH 26/27] fix incorrect length of .csv files; improving source
 code readability

---
 .../generation/process_ism_items.py           | 47 ++++++++++---------
 .../generation/process_stereo_items.py        |  9 ++--
 2 files changed, 29 insertions(+), 27 deletions(-)

diff --git a/ivas_processing_scripts/generation/process_ism_items.py b/ivas_processing_scripts/generation/process_ism_items.py
index 7bd682a7..54c7556a 100644
--- a/ivas_processing_scripts/generation/process_ism_items.py
+++ b/ivas_processing_scripts/generation/process_ism_items.py
@@ -115,10 +115,15 @@ def generate_ism_items(
 
             # get the number of frames (multiple of 20ms)
             N_frames = int(len(x.audio) / x.fs * 50)
+            frame_len = int(x.fs / 50)
+            
+            # trim the samples from the end to ensure that the signal length is a multiple of 20ms
+            x.audio = x.audio[:N_frames * frame_len]
 
             # adjust the level of the source file
             _, scale_factor = get_loudness(x, cfg.loudness, "MONO")
             x.audio *= scale_factor
+            
 
             # read azimuth information and create array
             if isinstance(source_azi, str):
@@ -192,36 +197,34 @@ def generate_ism_items(
                 # get the length of the first source file
                 N_delay = len(y.audio[:, 0])
 
-                # add the shift
-                N_delay += int(-source_overlap * x.fs)
+                # add the shift value (ensure that the shift is a multiple of 20ms)
+                N_delay += int(floor(-source_overlap * 50) / 50 * x.fs)
 
-                # ensure delay is a multiple of 20ms
-                # N_delay = int(floor(source_shift * 50) / 50 * x.fs)
-
-                # insert all-zero preamble
+                # insert all-zero signal
                 pre = np.zeros((N_delay, x.audio.shape[1]))
                 x.audio = np.concatenate([pre, x.audio])
 
                 # insert neutral position as a pre-amble
+                N_delay = int(N_delay / frame_len)
                 pre = np.tile(
                     [0.00, 0.00, 1.00, 0.00, 1.00], (N_delay, 1)
                 )  # !!!! TBD - check if we should insert  netrual position or the first position of the metadata
                 x_meta = np.concatenate([pre, x_meta])
 
             # pad with zeros to ensure that the signal length is a multiple of 20ms
-            N_frame = x.fs / 50
-            if len(x.audio) % N_frame != 0:
-                N_pad = int(N_frame - len(x.audio) % N_frame)
-
-                # insert all-zero preamble
-                pre = np.zeros((N_pad, x.audio.shape[1]))
-                x.audio = np.concatenate([pre, x.audio])
-
-                # insert neutral position as a pre-amble
-                pre = np.tile(
-                    [0.00, 0.00, 1.00, 0.00, 1.00], (N_pad, 1)
-                )  # !!!! TBD - check if we should insert  netrual position or the first position of the metadata
-                x_meta = np.concatenate([pre, x_meta])
+            if len(x.audio) % frame_len != 0:
+                # pad the source signal
+                N_pad = int(frame_len - len(x.audio) % frame_len)
+                post = np.zeros((N_pad, x.audio.shape[1]))
+                x.audio = np.concatenate([x.audio, post])
+
+                # pad the metadata
+                N_pad = int(len(x.audio) / frame_len) - len(x_meta)
+                if N_pad > 0:
+                    post = np.tile(
+                        [0.00, 0.00, 1.00, 0.00, 1.00], (N_pad, 1)
+                    )  # !!!! TBD - check if we should insert  netrual position or the first position of the metadata
+                    x_meta = np.concatenate([x_meta, post])
 
             # add source signal to the array of all source signals
             y.fs = x.fs
@@ -280,7 +283,7 @@ def generate_ism_items(
 
         # append pre-amble and post-amble to all sources
         if cfg.preamble != 0.0:
-            # ensure that pre-mable is a multiple of 20ms
+            # ensure that pre-amble is a multiple of 20ms
             N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs)
 
             # insert all-zero preamble to all sources
@@ -288,6 +291,7 @@ def generate_ism_items(
             y.audio = np.concatenate([pre, y.audio])
 
             # insert neutral position as a pre-amble to all sources
+            N_pre = int(N_pre / frame_len)
             pre = np.tile(
                 [0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_pre, 1)
             )  # !!!! TBD - check if we should insert netrual position or the first position of the metadata
@@ -302,6 +306,7 @@ def generate_ism_items(
             y.audio = np.concatenate([y.audio, post])
 
             # append neutral position as a post-amble to all sources
+            N_post = int(N_post / frame_len)
             post = np.tile(
                 [0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_post, 1)
             )  # !!!! TBD - check if we should insert netrual position or the last position of the metadata
@@ -319,7 +324,7 @@ def generate_ism_items(
             y.audio += noise
 
         # write individual ISM audio streams to the output file in an interleaved format
-        output_filename = scene["name"]
+        output_filename = scene_name
         audiofile.write(
             os.path.join(cfg.output_path, output_filename), y.audio, y.fs
         )  # !!!! TBD: replace all os.path.xxx operations with the Path object
diff --git a/ivas_processing_scripts/generation/process_stereo_items.py b/ivas_processing_scripts/generation/process_stereo_items.py
index 9498e2cf..bd7e5915 100644
--- a/ivas_processing_scripts/generation/process_stereo_items.py
+++ b/ivas_processing_scripts/generation/process_stereo_items.py
@@ -88,7 +88,7 @@ def generate_stereo_items(
 
     # repeat for all source files
     for scene_name, scene in cfg.scenes.items():
-        logger.info(f"Processing scene: {scene_name} out of {N_scenes} scenes")
+        logger.info(f"Processing scene: {scene_name} out of {N_scenes} scenes, name: {scene_name}")
 
         # extract the number of audio sources
         N_sources = len(np.atleast_1d(scene["source"]))
@@ -135,9 +135,6 @@ def generate_stereo_items(
                 # add the shift
                 N_delay += int(-source_overlap * x.fs)
 
-                # ensure delay is a multiple of 20ms
-                # N_delay = int(floor(source_shift * 50) / 50 * x.fs)
-
                 # insert all-zero preamble
                 pre = np.zeros((N_delay, x.audio.shape[1]))
                 x.audio = np.concatenate([pre, x.audio])
@@ -187,7 +184,7 @@ def generate_stereo_items(
 
         # append pre-amble and post-amble to all sources
         if cfg.preamble != 0.0:
-            # ensure that pre-mable is a multiple of 20ms
+            # ensure that pre-amble is a multiple of 20ms
             N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs)
 
             # insert all-zero preamble to all sources
@@ -214,7 +211,7 @@ def generate_stereo_items(
             y.audio += noise
 
         # write the reverberated audio into output file
-        output_filename = scene["name"]
+        output_filename = scene_name
         audiofile.write(
             os.path.join(cfg.output_path, output_filename), y.audio, y.fs
         )  # !!!! TBD: replace all os.path.xxx operations with the Path object
-- 
GitLab


From bfa648d433dad4ec6c6e577f1ec19bdc5b421ddf Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Tue, 16 May 2023 11:44:15 +0200
Subject: [PATCH 27/27] formatting

---
 ivas_processing_scripts/generation/process_ism_items.py    | 5 ++---
 ivas_processing_scripts/generation/process_stereo_items.py | 4 +++-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/ivas_processing_scripts/generation/process_ism_items.py b/ivas_processing_scripts/generation/process_ism_items.py
index 54c7556a..800f12a1 100644
--- a/ivas_processing_scripts/generation/process_ism_items.py
+++ b/ivas_processing_scripts/generation/process_ism_items.py
@@ -116,14 +116,13 @@ def generate_ism_items(
             # get the number of frames (multiple of 20ms)
             N_frames = int(len(x.audio) / x.fs * 50)
             frame_len = int(x.fs / 50)
-            
+
             # trim the samples from the end to ensure that the signal length is a multiple of 20ms
-            x.audio = x.audio[:N_frames * frame_len]
+            x.audio = x.audio[: N_frames * frame_len]
 
             # adjust the level of the source file
             _, scale_factor = get_loudness(x, cfg.loudness, "MONO")
             x.audio *= scale_factor
-            
 
             # read azimuth information and create array
             if isinstance(source_azi, str):
diff --git a/ivas_processing_scripts/generation/process_stereo_items.py b/ivas_processing_scripts/generation/process_stereo_items.py
index bd7e5915..ff3ec592 100644
--- a/ivas_processing_scripts/generation/process_stereo_items.py
+++ b/ivas_processing_scripts/generation/process_stereo_items.py
@@ -88,7 +88,9 @@ def generate_stereo_items(
 
     # repeat for all source files
     for scene_name, scene in cfg.scenes.items():
-        logger.info(f"Processing scene: {scene_name} out of {N_scenes} scenes, name: {scene_name}")
+        logger.info(
+            f"Processing scene: {scene_name} out of {N_scenes} scenes, name: {scene_name}"
+        )
 
         # extract the number of audio sources
         N_sources = len(np.atleast_1d(scene["source"]))
-- 
GitLab