From 81538a2cd52bf5b09504dca0c85e737612c345dc Mon Sep 17 00:00:00 2001
From: veeravt <vinit.veera@iis.fraunhofer.de>
Date: Wed, 3 May 2023 16:23:11 +0200
Subject: [PATCH 1/5] Added a function to check if the audio length is a
 multiple of frame size.

---
 ivas_processing_scripts/audiotools/audio.py   |  4 +-
 .../audiotools/audioarray.py                  | 44 ++++++++++++++++++-
 .../binaural_datasets/binaural_dataset.py     |  8 ++--
 .../audiotools/convert/__init__.py            |  3 ++
 4 files changed, 51 insertions(+), 8 deletions(-)

diff --git a/ivas_processing_scripts/audiotools/audio.py b/ivas_processing_scripts/audiotools/audio.py
index f6c45fca..850c3c49 100755
--- a/ivas_processing_scripts/audiotools/audio.py
+++ b/ivas_processing_scripts/audiotools/audio.py
@@ -30,7 +30,7 @@
 #  the United Nations Convention on Contracts on the International Sales of Goods.
 #
 
-import warnings
+from warnings import warn
 from abc import ABC, abstractmethod
 from pathlib import Path
 from typing import Optional, Union
@@ -274,7 +274,7 @@ class ObjectBasedAudio(Audio):
                     obj.metadata_files.append(file_name_meta)
                 else:
                     raise ValueError(f"Metadata file {file_name_meta} not found.")
-            warnings.warn(
+            warn(
                 f"No metadata files specified: The following files were found and used: \n {*obj.metadata_files,}"
             )
 
diff --git a/ivas_processing_scripts/audiotools/audioarray.py b/ivas_processing_scripts/audiotools/audioarray.py
index c0909c4c..5b431fee 100755
--- a/ivas_processing_scripts/audiotools/audioarray.py
+++ b/ivas_processing_scripts/audiotools/audioarray.py
@@ -31,7 +31,7 @@
 #
 
 import logging
-import warnings
+from warnings import warn
 from typing import Iterator, Optional, Tuple, Union
 
 import numpy as np
@@ -342,7 +342,7 @@ def limiter(
         fr_sig[idx_min] = -32768
 
     if limited:
-        warnings.warn("Limiting had to be applied")
+        warn("Limiting had to be applied")
     return x
 
 
@@ -688,3 +688,43 @@ def mute_channels(
     """
     x[:, mute] = 0
     return x
+
+
+def multiple_of_frame_size(
+    x: np.ndarray,
+    fs: int = 48000,
+    frame_size_in_ms: Optional[int] = 20,
+) -> np.ndarray:
+    """
+    Warn and pad audio if it isn't a multiple of frame size
+
+    Parameters
+    ----------
+    x: np.ndarray
+        Input array
+    fs: int
+        Input sampling rate in Hz; default = 48000
+    frame_size_in_ms: Optional[int]
+        Frame size in milliseconds; default = 20
+
+    Returns
+    -------
+    x: np.ndarray
+        Padded array
+    """
+
+    if x.ndim == 1:
+        n_samples_x = x.shape
+        n_chan_x = 1
+    else:
+        n_samples_x, n_chan_x = x.shape
+
+    frame_length_samples = (frame_size_in_ms / 1000) * fs
+    if n_samples_x % frame_length_samples != 0:
+        warn(
+            "Audio length is not a multiple of frame length (20 ms). Padding with zeros."
+        )
+        pad_length = int(frame_length_samples - (n_samples_x % frame_length_samples))
+        x = np.pad(x, ((0, int(pad_length)), (0, 0)), "constant")
+
+    return x
diff --git a/ivas_processing_scripts/audiotools/binaural_datasets/binaural_dataset.py b/ivas_processing_scripts/audiotools/binaural_datasets/binaural_dataset.py
index e5d5ac95..5b95f14f 100755
--- a/ivas_processing_scripts/audiotools/binaural_datasets/binaural_dataset.py
+++ b/ivas_processing_scripts/audiotools/binaural_datasets/binaural_dataset.py
@@ -30,7 +30,7 @@
 #  the United Nations Convention on Contracts on the International Sales of Goods.
 #
 
-import warnings
+from warnings import warn
 from pathlib import Path
 from typing import Optional, Tuple, Union
 
@@ -149,7 +149,7 @@ def load_ir(
                     )
                 ).is_file():
                     dataset_suffix = "SBA3"
-                    warnings.warn("No SBA1 dataset found -> use truncated SBA3 dataset")
+                    warn("No SBA1 dataset found -> use truncated SBA3 dataset")
             elif in_fmt.endswith("2"):
                 dataset_suffix = "SBA2"
                 # Use truncated SBA3 dataset if no SBA1 or 2 dataset exists
@@ -159,7 +159,7 @@ def load_ir(
                     )
                 ).is_file():
                     dataset_suffix = "SBA3"
-                    warnings.warn("No SBA2 dataset found -> use truncated SBA3 dataset")
+                    warn("No SBA2 dataset found -> use truncated SBA3 dataset")
             else:
                 dataset_suffix = "SBA3"
 
@@ -172,7 +172,7 @@ def load_ir(
         latency_smp = latency_s
     else:
         latency_smp = int(np.min(np.argmax(np.sum(np.abs(IR), axis=1), axis=0)))
-        warnings.warn(
+        warn(
             f"No latency of HRTF dataset specified in {path_dataset} file -> computed latency: {latency_smp} sample(s)"
         )
 
diff --git a/ivas_processing_scripts/audiotools/convert/__init__.py b/ivas_processing_scripts/audiotools/convert/__init__.py
index 024faa47..0a35a92b 100755
--- a/ivas_processing_scripts/audiotools/convert/__init__.py
+++ b/ivas_processing_scripts/audiotools/convert/__init__.py
@@ -212,6 +212,9 @@ def process_audio(
     if fs is None:
         fs = x.fs
 
+    """making sure length is a multiple of the frame size"""
+    x.audio = audioarray.multiple_of_frame_size(x.audio, fs)
+
     """delay audio"""
     if delay is not None:
         if logger:
-- 
GitLab


From ce164d8ae5ad05a3509a655b2a36b176c7536f20 Mon Sep 17 00:00:00 2001
From: veeravt <vinit.veera@iis.fraunhofer.de>
Date: Thu, 4 May 2023 16:51:37 +0200
Subject: [PATCH 2/5] Applied format patch.

---
 ivas_processing_scripts/audiotools/audio.py                     | 2 +-
 ivas_processing_scripts/audiotools/audioarray.py                | 2 +-
 .../audiotools/binaural_datasets/binaural_dataset.py            | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ivas_processing_scripts/audiotools/audio.py b/ivas_processing_scripts/audiotools/audio.py
index 850c3c49..3bc10c53 100755
--- a/ivas_processing_scripts/audiotools/audio.py
+++ b/ivas_processing_scripts/audiotools/audio.py
@@ -30,10 +30,10 @@
 #  the United Nations Convention on Contracts on the International Sales of Goods.
 #
 
-from warnings import warn
 from abc import ABC, abstractmethod
 from pathlib import Path
 from typing import Optional, Union
+from warnings import warn
 
 import numpy as np
 
diff --git a/ivas_processing_scripts/audiotools/audioarray.py b/ivas_processing_scripts/audiotools/audioarray.py
index 5b431fee..999381fe 100755
--- a/ivas_processing_scripts/audiotools/audioarray.py
+++ b/ivas_processing_scripts/audiotools/audioarray.py
@@ -31,8 +31,8 @@
 #
 
 import logging
-from warnings import warn
 from typing import Iterator, Optional, Tuple, Union
+from warnings import warn
 
 import numpy as np
 import scipy.signal as sig
diff --git a/ivas_processing_scripts/audiotools/binaural_datasets/binaural_dataset.py b/ivas_processing_scripts/audiotools/binaural_datasets/binaural_dataset.py
index 5b95f14f..37008adf 100755
--- a/ivas_processing_scripts/audiotools/binaural_datasets/binaural_dataset.py
+++ b/ivas_processing_scripts/audiotools/binaural_datasets/binaural_dataset.py
@@ -30,9 +30,9 @@
 #  the United Nations Convention on Contracts on the International Sales of Goods.
 #
 
-from warnings import warn
 from pathlib import Path
 from typing import Optional, Tuple, Union
+from warnings import warn
 
 import numpy as np
 from scipy.io import loadmat
-- 
GitLab


From 8184202801f4dd8c8385b75b8026f9d318267cd0 Mon Sep 17 00:00:00 2001
From: veeravt <vinit.veera@iis.fraunhofer.de>
Date: Mon, 8 May 2023 11:22:12 +0200
Subject: [PATCH 3/5] Modified the function and it's placement in the scripts.

---
 ivas_processing_scripts/__init__.py           |  4 ++
 .../audiotools/audioarray.py                  | 40 ---------------
 .../audiotools/convert/__init__.py            |  3 --
 .../processing/processing.py                  | 49 ++++++++++++++++++-
 4 files changed, 52 insertions(+), 44 deletions(-)

diff --git a/ivas_processing_scripts/__init__.py b/ivas_processing_scripts/__init__.py
index a16309e0..6c76d636 100755
--- a/ivas_processing_scripts/__init__.py
+++ b/ivas_processing_scripts/__init__.py
@@ -43,6 +43,7 @@ from ivas_processing_scripts.constants import (
 )
 from ivas_processing_scripts.processing import chains, config
 from ivas_processing_scripts.processing.processing import (
+    multiple_of_frame_size,
     preprocess,
     preprocess_2,
     preprocess_background_noise,
@@ -126,6 +127,9 @@ def main(args):
 
         cfg.metadata_path = metadata
 
+        # checking if audio is a multiple of frame size
+        multiple_of_frame_size(cfg.items_list, cfg.input["fmt"])
+
         # run preprocessing only once
         if hasattr(cfg, "preprocessing"):
             # save process info for background noise
diff --git a/ivas_processing_scripts/audiotools/audioarray.py b/ivas_processing_scripts/audiotools/audioarray.py
index 999381fe..2c770ce4 100755
--- a/ivas_processing_scripts/audiotools/audioarray.py
+++ b/ivas_processing_scripts/audiotools/audioarray.py
@@ -688,43 +688,3 @@ def mute_channels(
     """
     x[:, mute] = 0
     return x
-
-
-def multiple_of_frame_size(
-    x: np.ndarray,
-    fs: int = 48000,
-    frame_size_in_ms: Optional[int] = 20,
-) -> np.ndarray:
-    """
-    Warn and pad audio if it isn't a multiple of frame size
-
-    Parameters
-    ----------
-    x: np.ndarray
-        Input array
-    fs: int
-        Input sampling rate in Hz; default = 48000
-    frame_size_in_ms: Optional[int]
-        Frame size in milliseconds; default = 20
-
-    Returns
-    -------
-    x: np.ndarray
-        Padded array
-    """
-
-    if x.ndim == 1:
-        n_samples_x = x.shape
-        n_chan_x = 1
-    else:
-        n_samples_x, n_chan_x = x.shape
-
-    frame_length_samples = (frame_size_in_ms / 1000) * fs
-    if n_samples_x % frame_length_samples != 0:
-        warn(
-            "Audio length is not a multiple of frame length (20 ms). Padding with zeros."
-        )
-        pad_length = int(frame_length_samples - (n_samples_x % frame_length_samples))
-        x = np.pad(x, ((0, int(pad_length)), (0, 0)), "constant")
-
-    return x
diff --git a/ivas_processing_scripts/audiotools/convert/__init__.py b/ivas_processing_scripts/audiotools/convert/__init__.py
index 0a35a92b..024faa47 100755
--- a/ivas_processing_scripts/audiotools/convert/__init__.py
+++ b/ivas_processing_scripts/audiotools/convert/__init__.py
@@ -212,9 +212,6 @@ def process_audio(
     if fs is None:
         fs = x.fs
 
-    """making sure length is a multiple of the frame size"""
-    x.audio = audioarray.multiple_of_frame_size(x.audio, fs)
-
     """delay audio"""
     if delay is not None:
         if logger:
diff --git a/ivas_processing_scripts/processing/processing.py b/ivas_processing_scripts/processing/processing.py
index 0d2097fa..3121fb03 100755
--- a/ivas_processing_scripts/processing/processing.py
+++ b/ivas_processing_scripts/processing/processing.py
@@ -35,7 +35,7 @@ from abc import ABC, abstractmethod
 from itertools import repeat
 from pathlib import Path
 from shutil import copyfile
-from typing import Iterable, Union
+from typing import Iterable, Optional, Union
 from warnings import warn
 
 import numpy as np
@@ -74,6 +74,21 @@ class Processing(ABC):
 
 
 def reorder_items_list(items_list: list, concatenation_order: list) -> list:
+    """
+    Reorder input items list based on conactenation order
+
+    Parameters
+    ----------
+    items_list: list
+        List of input items
+    concatenation_order: list
+        Concatenation order
+
+    Returns
+    -------
+    ordered_full_files: list
+        Re-ordered list of input items
+    """
     name_to_full = {Path(full_file).name: full_file for full_file in items_list}
     ordered_full_files = [
         name_to_full[name] for name in concatenation_order if name in name_to_full
@@ -487,3 +502,35 @@ def preprocess_background_noise(cfg):
     ] = output_audio
 
     return
+
+
+def multiple_of_frame_size(
+    items_list: list,
+    input_format: str,
+    frame_size_in_ms: Optional[int] = 20,
+) -> np.ndarray:
+    """
+    Warn and pad audio if it isn't a multiple of frame size
+
+    Parameters
+    ----------
+    items_list: list
+        List of input items
+    frame_size_in_ms: Optional[int]
+        Frame size in milliseconds; default = 20
+    """
+    for item in items_list:
+        # read file
+        x, fs = read(item)
+        # warning if audio length not a multiple of frame lenght
+        n_samples_x, n_chan_x = x.shape
+        frame_length_samples = (frame_size_in_ms / 1000) * fs
+        if n_samples_x % frame_length_samples != 0:
+            if input_format.startswith("ISM") or input_format.startswith("MASA"):
+                raise ValueError(
+                    f"The length ({n_samples_x} samples) of audio ({item.name}) is not a multiple of frame length (20 ms)."
+                )
+            else:
+                warn(
+                    f"The length ({n_samples_x} samples) of audio ({item.name}) is not a multiple of frame length (20 ms)."
+                )
-- 
GitLab


From 33c84ad6c00768169bf8ed79b0f78e7601fd8a99 Mon Sep 17 00:00:00 2001
From: veeravt <vinit.veera@iis.fraunhofer.de>
Date: Mon, 8 May 2023 16:59:33 +0200
Subject: [PATCH 4/5] Added checks for sampling rate and number of channels.

---
 ivas_processing_scripts/__init__.py           |  2 +-
 .../processing/processing.py                  | 34 ++++++++++++++-----
 2 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/ivas_processing_scripts/__init__.py b/ivas_processing_scripts/__init__.py
index 6c76d636..16ac8b93 100755
--- a/ivas_processing_scripts/__init__.py
+++ b/ivas_processing_scripts/__init__.py
@@ -128,7 +128,7 @@ def main(args):
         cfg.metadata_path = metadata
 
         # checking if audio is a multiple of frame size
-        multiple_of_frame_size(cfg.items_list, cfg.input["fmt"])
+        multiple_of_frame_size(cfg)
 
         # run preprocessing only once
         if hasattr(cfg, "preprocessing"):
diff --git a/ivas_processing_scripts/processing/processing.py b/ivas_processing_scripts/processing/processing.py
index 3121fb03..d72e97b3 100755
--- a/ivas_processing_scripts/processing/processing.py
+++ b/ivas_processing_scripts/processing/processing.py
@@ -505,25 +505,41 @@ def preprocess_background_noise(cfg):
 
 
 def multiple_of_frame_size(
-    items_list: list,
-    input_format: str,
+    cfg: TestConfig,
     frame_size_in_ms: Optional[int] = 20,
 ) -> np.ndarray:
     """
-    Warn and pad audio if it isn't a multiple of frame size
+    Warn/Exit if audio if it isn't a multiple of frame size
 
     Parameters
     ----------
-    items_list: list
-        List of input items
+    cfg: TestConfig
+        Input configuration
     frame_size_in_ms: Optional[int]
         Frame size in milliseconds; default = 20
     """
-    for item in items_list:
-        # read file
-        x, fs = read(item)
-        # warning if audio length not a multiple of frame lenght
+    # get the number of channels from the input format
+    input_format = cfg.input["fmt"]
+    num_channels = audio.fromtype(input_format).num_channels
+    for item in cfg.items_list:
+        # read the audio file
+        if "fs" in cfg.input:
+            sampling_rate = cfg.input["fs"]
+        elif item.suffix == ".wav":
+            sampling_rate = None
+        elif item.suffix == ".pcm" or item.suffix == ".raw":
+            raise ValueError("Sampling rate must be specified for headerless files!")
+        x, fs = read(item, nchannels=num_channels, fs=sampling_rate)
         n_samples_x, n_chan_x = x.shape
+        if fs != sampling_rate:
+            raise ValueError(
+                f"Sampling rate of the file ({fs}) does NOT match with that ({sampling_rate}) specified in the config yaml."
+            )
+        if n_chan_x != num_channels:
+            raise ValueError(
+                f"The number of channels in the file ({n_chan_x}) do NOT match with those of format ({num_channels}, {input_format}) specified in the config yaml."
+            )
+        # warn if audio length not a multiple of frame length
         frame_length_samples = (frame_size_in_ms / 1000) * fs
         if n_samples_x % frame_length_samples != 0:
             if input_format.startswith("ISM") or input_format.startswith("MASA"):
-- 
GitLab


From 50ef8834edb013357db7ddc785321c39ec928111 Mon Sep 17 00:00:00 2001
From: veeravt <vinit.veera@iis.fraunhofer.de>
Date: Tue, 9 May 2023 12:10:10 +0200
Subject: [PATCH 5/5] Attempt 1 to fix the broken pipeline #16138

---
 ivas_processing_scripts/processing/processing.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/ivas_processing_scripts/processing/processing.py b/ivas_processing_scripts/processing/processing.py
index d72e97b3..50538f59 100755
--- a/ivas_processing_scripts/processing/processing.py
+++ b/ivas_processing_scripts/processing/processing.py
@@ -525,11 +525,12 @@ def multiple_of_frame_size(
         # read the audio file
         if "fs" in cfg.input:
             sampling_rate = cfg.input["fs"]
-        elif item.suffix == ".wav":
-            sampling_rate = None
+            x, fs = read(item, nchannels=num_channels, fs=sampling_rate)
         elif item.suffix == ".pcm" or item.suffix == ".raw":
             raise ValueError("Sampling rate must be specified for headerless files!")
-        x, fs = read(item, nchannels=num_channels, fs=sampling_rate)
+        elif item.suffix == ".wav":
+            x, fs = read(item)
+            sampling_rate = fs
         n_samples_x, n_chan_x = x.shape
         if fs != sampling_rate:
             raise ValueError(
-- 
GitLab