From 35e1cc0036173cd8b447a4da8ae0341d49542288 Mon Sep 17 00:00:00 2001
From: veeravt <vinit.veera@iis.fraunhofer.de>
Date: Thu, 25 May 2023 17:17:21 +0200
Subject: [PATCH 1/4] Extended the pre-existing function to pad and align the
 audio to 20ms.

---
 ivas_processing_scripts/__init__.py           |  9 ++-
 .../processing/processing.py                  | 75 ++++++++++++++++++-
 2 files changed, 77 insertions(+), 7 deletions(-)

diff --git a/ivas_processing_scripts/__init__.py b/ivas_processing_scripts/__init__.py
index 1a3fa594..d719a156 100755
--- a/ivas_processing_scripts/__init__.py
+++ b/ivas_processing_scripts/__init__.py
@@ -43,6 +43,7 @@ from ivas_processing_scripts.constants import (
 )
 from ivas_processing_scripts.processing import chains, config
 from ivas_processing_scripts.processing.processing import (
+    compare_wav_lengths,
     multiple_of_frame_size,
     preprocess,
     preprocess_2,
@@ -96,6 +97,9 @@ def main(args):
         # set up logging
         logger = logging_init(args, cfg)
 
+        # checking if audio is a multiple of frame size
+        multiple_of_frame_size(cfg, logger)
+
         # Re-ordering items based on concatenation order
         if hasattr(cfg, "preprocessing_2"):
             if (
@@ -128,9 +132,6 @@ def main(args):
 
         cfg.metadata_path = metadata
 
-        # checking if audio is a multiple of frame size
-        multiple_of_frame_size(cfg)
-
         # run preprocessing only once
         if hasattr(cfg, "preprocessing"):
             # save process info for background noise
@@ -180,6 +181,8 @@ def main(args):
         if cfg.condition_in_output_filename:
             rename_generated_conditions(cfg.output_path)
 
+        compare_wav_lengths(cfg.input_path, cfg.output_path, logger)
+
     # copy configuration to output directory
     with open(cfg.output_path.joinpath(f"{cfg.name}.yml"), "w") as f:
         yaml.safe_dump(cfg._yaml_dump, f)
diff --git a/ivas_processing_scripts/processing/processing.py b/ivas_processing_scripts/processing/processing.py
index cfc5a381..dfb65e3d 100755
--- a/ivas_processing_scripts/processing/processing.py
+++ b/ivas_processing_scripts/processing/processing.py
@@ -319,6 +319,9 @@ def preprocess_2(cfg, logger):
     cfg.tmp_dirs = cfg.tmp_dirs[1:]
     cfg.out_dirs = cfg.out_dirs[1:]
 
+    # Copy the conactenated file to the 20ms_aligned_files folder
+    copyfile(cfg.concat_file, cfg.input_path / cfg.concat_file.name)
+
     return
 
 
@@ -519,10 +522,13 @@ def preprocess_background_noise(cfg):
 
 def multiple_of_frame_size(
     cfg: TestConfig,
+    logger: logging.Logger,
     frame_size_in_ms: Optional[int] = 20,
 ) -> np.ndarray:
     """
-    Warn/Exit if audio if it isn't a multiple of frame size
+    This function checks if the list of multi channel audio files is a multiple of frame size.
+    If the file isn't a multiple then the function pads it to the next integer of frame size and writes the file to an output directory.
+    It also copies the already aligned files to the output directory.
 
     Parameters
     ----------
@@ -534,7 +540,14 @@ def multiple_of_frame_size(
     # get the number of channels from the input format
     input_format = cfg.input["fmt"]
     num_channels = audio.fromtype(input_format).num_channels
-    for item in cfg.items_list:
+
+    # Create output directory if it doesn't exist
+    output_dir = cfg.output_path / "20ms_aligned_files"
+    output_dir.mkdir(exist_ok=True)
+
+    # padded_audio_files_list = []
+
+    for i, item in enumerate(cfg.items_list):
         # read the audio file
         if "fs" in cfg.input:
             sampling_rate = cfg.input["fs"]
@@ -555,15 +568,42 @@ def multiple_of_frame_size(
             )
         # warn if audio length not a multiple of frame length
         frame_length_samples = (frame_size_in_ms / 1000) * fs
-        if n_samples_x % frame_length_samples != 0:
+        remainder = n_samples_x % frame_length_samples
+        if remainder != 0:
+            # Calculate number of samples needed for padding
+            padding_samples = int(frame_length_samples - remainder)
+
             if input_format.startswith("ISM") or input_format.startswith("MASA"):
                 raise ValueError(
                     f"The length ({n_samples_x} samples) of audio ({item.name}) is not a multiple of frame length (20 ms)."
                 )
             else:
                 warn(
-                    f"The length ({n_samples_x} samples) of audio ({item.name}) is not a multiple of frame length (20 ms)."
+                    f"The length ({n_samples_x} samples) of audio ({item.name}) is not a multiple of frame length (20 ms). Padding to the nearest integer multiple."
+                )
+                logger.info(
+                    f"The length ({n_samples_x} samples) of audio ({item.name}) is not a multiple of frame length (20 ms). Padding to the nearest integer multiple."
                 )
+                # Create and appending zeros
+                padding_array = np.zeros((padding_samples, n_chan_x))
+                padded_data = np.vstack((x, padding_array))
+                # Write padded data to output directory
+                write(output_dir / item.name, padded_data, fs)
+        else:
+            copyfile(item, output_dir / item.name)
+
+        # Update audio file path in list
+        cfg.items_list[i] = output_dir / item.name
+
+    # Check if all files are present in output directory
+    all_files_present = all(
+        [(output_dir / item.name).exists() for audio_file in cfg.items_list]
+    )
+    if not all_files_present:
+        raise Exception("Not all files are present in the output directory")
+
+    # Make the output path as the new input path
+    cfg.input_path = output_dir
 
 
 def rename_generated_conditions(output_path: Path):
@@ -583,3 +623,30 @@ def rename_generated_conditions(output_path: Path):
             for file_path in subdirectory.iterdir():
                 new_filename = f"{file_path.stem}.{subdirectory.name}{file_path.suffix}"
                 file_path.rename(subdirectory / new_filename)
+
+
+def compare_wav_lengths(input_path: Path, output_path: Path, logger: logging.Logger):
+    """
+    The function compares the number of samples of the files present in the input directory
+    to the corresponding output files in the subdirectories which start with "c" followed by 2 digits.
+
+    Parameters
+    ----------
+    input_path: Path
+        Path to input directory
+    output_path: Path
+        Path to output directory
+    """
+    for subdir in output_path.iterdir():
+        if subdir.is_dir() and subdir.name.startswith("c"):
+            for file in subdir.glob("*.wav"):
+                input_file = input_path / file.name
+                output_file = file
+                input_array, input_fs = read(str(input_file))
+                output_array, output_fs = read(str(output_file))
+                input_length, input_channels = input_array.shape
+                output_length, output_channles = output_array.shape
+                if input_length != output_length:
+                    logger.info(
+                        f"The {input_file.name} has {input_length} samples and the output condition {subdir.name} {output_file.name} has {output_length} samples and the difference between the two is {input_length - output_length} samples.\n"
+                    )
-- 
GitLab


From 369dba9325f5ef91347950be7ca482ee2f06d605 Mon Sep 17 00:00:00 2001
From: veeravt <vinit.veera@iis.fraunhofer.de>
Date: Fri, 26 May 2023 15:03:43 +0200
Subject: [PATCH 2/4] Updating the metadata paths and copying the metadata to
 the new folder.

---
 ivas_processing_scripts/__init__.py              | 8 ++++----
 ivas_processing_scripts/processing/processing.py | 8 ++++++++
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/ivas_processing_scripts/__init__.py b/ivas_processing_scripts/__init__.py
index d719a156..b1b31a5a 100755
--- a/ivas_processing_scripts/__init__.py
+++ b/ivas_processing_scripts/__init__.py
@@ -97,9 +97,6 @@ def main(args):
         # set up logging
         logger = logging_init(args, cfg)
 
-        # checking if audio is a multiple of frame size
-        multiple_of_frame_size(cfg, logger)
-
         # Re-ordering items based on concatenation order
         if hasattr(cfg, "preprocessing_2"):
             if (
@@ -132,6 +129,9 @@ def main(args):
 
         cfg.metadata_path = metadata
 
+        # checking if audio is a multiple of frame size
+        multiple_of_frame_size(cfg, logger)
+
         # run preprocessing only once
         if hasattr(cfg, "preprocessing"):
             # save process info for background noise
@@ -181,7 +181,7 @@ def main(args):
         if cfg.condition_in_output_filename:
             rename_generated_conditions(cfg.output_path)
 
-        compare_wav_lengths(cfg.input_path, cfg.output_path, logger)
+        # compare_wav_lengths(cfg.input_path, cfg.output_path, logger)
 
     # copy configuration to output directory
     with open(cfg.output_path.joinpath(f"{cfg.name}.yml"), "w") as f:
diff --git a/ivas_processing_scripts/processing/processing.py b/ivas_processing_scripts/processing/processing.py
index dfb65e3d..fbf512a2 100755
--- a/ivas_processing_scripts/processing/processing.py
+++ b/ivas_processing_scripts/processing/processing.py
@@ -595,6 +595,14 @@ def multiple_of_frame_size(
         # Update audio file path in list
         cfg.items_list[i] = output_dir / item.name
 
+        # Copy metadata and update path
+        if input_format.startswith("ISM") or input_format.startswith("MASA"):
+            for j in range(int(cfg.input["fmt"][3])):
+                copyfile(
+                    cfg.metadata_path[i][j], output_dir / cfg.metadata_path[i][j].name
+                )
+                cfg.metadata_path[i][j] = output_dir / cfg.metadata_path[i][j].name
+
     # Check if all files are present in output directory
     all_files_present = all(
         [(output_dir / item.name).exists() for audio_file in cfg.items_list]
-- 
GitLab


From fa793acf01c7a3d22041605e0dcf8090dd30a872 Mon Sep 17 00:00:00 2001
From: Treffehn <anika.treffehn@iis.fraunhofer.de>
Date: Mon, 5 Jun 2023 13:48:43 +0200
Subject: [PATCH 3/4] moved test for item length to processing splitting
 scaling

---
 ivas_processing_scripts/__init__.py           |  5 +--
 .../processing/processing.py                  | 39 ++++---------------
 .../processing_splitting_scaling.py           |  8 ++++
 3 files changed, 17 insertions(+), 35 deletions(-)

diff --git a/ivas_processing_scripts/__init__.py b/ivas_processing_scripts/__init__.py
index 2656b5d1..51ba68a2 100755
--- a/ivas_processing_scripts/__init__.py
+++ b/ivas_processing_scripts/__init__.py
@@ -43,7 +43,6 @@ from ivas_processing_scripts.constants import (
 )
 from ivas_processing_scripts.processing import chains, config
 from ivas_processing_scripts.processing.processing import (
-    compare_wav_lengths,
     multiple_of_frame_size,
     preprocess,
     preprocess_2,
@@ -131,7 +130,7 @@ def main(args):
         cfg.metadata_path = metadata
 
         # checking if audio is a multiple of frame size
-        multiple_of_frame_size(cfg, logger)
+        multiple_of_frame_size(cfg)
 
         # run preprocessing only once
         if hasattr(cfg, "preprocessing"):
@@ -176,8 +175,6 @@ def main(args):
                 "mp" if cfg.multiprocessing else None,
             )
 
-        compare_wav_lengths(cfg.input_path, cfg.output_path, logger)
-
     # copy configuration to output directory
     with open(cfg.output_path.joinpath(f"{cfg.name}.yml"), "w") as f:
         yaml.safe_dump(cfg._yaml_dump, f)
diff --git a/ivas_processing_scripts/processing/processing.py b/ivas_processing_scripts/processing/processing.py
index 07437314..092ecb2d 100755
--- a/ivas_processing_scripts/processing/processing.py
+++ b/ivas_processing_scripts/processing/processing.py
@@ -177,6 +177,12 @@ def concat_teardown(x, splits, out_fmt, fs, in_fs, meta, logger: logging.Logger)
         new_splits.append(int(float(split_i) * relative_fs_change))
     splits = new_splits
 
+    # check if last split ending coincides with last sample of signal
+    if splits[-1] > len(x):
+        raise ValueError(f"Last split index {splits[-1]} is larger than the signal length {len(x)}")
+    elif splits[-1] < len(x):
+        warn(f"Last split index {splits[-1]} is smaller that the signal length {len(x)}")
+
     split_old = 0
     split_signals = []
     split_meta = []
@@ -448,7 +454,6 @@ def preprocess_background_noise(cfg):
 
 def multiple_of_frame_size(
     cfg: TestConfig,
-    logger: logging.Logger,
     frame_size_in_ms: Optional[int] = 20,
 ) -> np.ndarray:
     """
@@ -515,9 +520,8 @@ def multiple_of_frame_size(
                     f"The length ({n_samples_x} samples) of audio ({item.name}) is not a multiple of frame length (20 ms). Padding to the nearest integer multiple."
                 )
 
-                # Create and appending zeros
-                padding_array = np.zeros((padding_samples, n_chan_x))
-                padded_data = np.vstack((x, padding_array))
+                # Create and append zeros
+                padded_data = trim(x, sampling_rate, (0, -padding_samples), pad_noise=True, samples=True)
                 # Write padded data to output directory
                 write(output_dir / item.name, padded_data, fs)
         else:
@@ -545,30 +549,3 @@ def multiple_of_frame_size(
 
     # Make the output path as the new input path
     cfg.input_path = output_dir
-    
-
-def compare_wav_lengths(input_path: Path, output_path: Path, logger: logging.Logger):
-    """
-    The function compares the number of samples of the files present in the input directory
-    to the corresponding output files in the subdirectories which start with "c" followed by 2 digits.
-
-    Parameters
-    ----------
-    input_path: Path
-        Path to input directory
-    output_path: Path
-        Path to output directory
-    """
-    for subdir in output_path.iterdir():
-        if subdir.is_dir() and subdir.name.startswith("c"):
-            for file in subdir.glob("*.wav"):
-                input_file = input_path / file.name
-                output_file = file
-                input_array, input_fs = read(str(input_file))
-                output_array, output_fs = read(str(output_file))
-                input_length, input_channels = input_array.shape
-                output_length, output_channles = output_array.shape
-                if input_length != output_length:
-                    logger.info(
-                        f"The {input_file.name} has {input_length} samples and the output condition {subdir.name} {output_file.name} has {output_length} samples and the difference between the two is {input_length - output_length} samples.\n"
-                    )
diff --git a/ivas_processing_scripts/processing/processing_splitting_scaling.py b/ivas_processing_scripts/processing/processing_splitting_scaling.py
index af359b49..4aa54c2d 100644
--- a/ivas_processing_scripts/processing/processing_splitting_scaling.py
+++ b/ivas_processing_scripts/processing/processing_splitting_scaling.py
@@ -3,6 +3,7 @@ import logging
 import re
 from itertools import repeat
 from pathlib import Path
+from warnings import warn
 
 import numpy as np
 
@@ -239,6 +240,13 @@ class Processing_splitting_scaling(Processing):
                 out_meta = repeat(None)
 
         else:
+            # check length of output signals
+            input_aligned_file = in_file.parent.parent / "20ms_aligned_files" / f"{Path(in_file.stem).stem}.wav"
+            input_aligned_array, _ = read(input_aligned_file)
+            if (len_inp := len(input_aligned_array)) != (len_out := len(x)):
+                warn(f"For file {out_file} the length is {len_out} and does not match the (frame aligned) input length {len_inp}.")
+
+            # set output values
             out_files = [out_file]
             file_splits = [x]
             if isinstance(audio.fromtype(self.out_fmt), audio.ObjectBasedAudio):
-- 
GitLab


From 8e03ba519660a50638ce2e3c684ce363169785af Mon Sep 17 00:00:00 2001
From: Treffehn <anika.treffehn@iis.fraunhofer.de>
Date: Mon, 5 Jun 2023 14:10:40 +0200
Subject: [PATCH 4/4] formatting

---
 ivas_processing_scripts/processing/processing.py     | 12 +++++++++---
 .../processing/processing_splitting_scaling.py       | 10 ++++++++--
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/ivas_processing_scripts/processing/processing.py b/ivas_processing_scripts/processing/processing.py
index 092ecb2d..093fa91b 100755
--- a/ivas_processing_scripts/processing/processing.py
+++ b/ivas_processing_scripts/processing/processing.py
@@ -179,9 +179,13 @@ def concat_teardown(x, splits, out_fmt, fs, in_fs, meta, logger: logging.Logger)
 
     # check if last split ending coincides with last sample of signal
     if splits[-1] > len(x):
-        raise ValueError(f"Last split index {splits[-1]} is larger than the signal length {len(x)}")
+        raise ValueError(
+            f"Last split index {splits[-1]} is larger than the signal length {len(x)}"
+        )
     elif splits[-1] < len(x):
-        warn(f"Last split index {splits[-1]} is smaller that the signal length {len(x)}")
+        warn(
+            f"Last split index {splits[-1]} is smaller that the signal length {len(x)}"
+        )
 
     split_old = 0
     split_signals = []
@@ -477,7 +481,9 @@ def multiple_of_frame_size(
     try:
         output_dir.mkdir(exist_ok=False)
     except FileExistsError:
-        raise ValueError("Folder for 20ms aligned files already exists. Please move or delete folder")
+        raise ValueError(
+            "Folder for 20ms aligned files already exists. Please move or delete folder"
+        )
 
     # iterate over input files
     for i, item in enumerate(cfg.items_list):
diff --git a/ivas_processing_scripts/processing/processing_splitting_scaling.py b/ivas_processing_scripts/processing/processing_splitting_scaling.py
index 4aa54c2d..b49454ff 100644
--- a/ivas_processing_scripts/processing/processing_splitting_scaling.py
+++ b/ivas_processing_scripts/processing/processing_splitting_scaling.py
@@ -241,10 +241,16 @@ class Processing_splitting_scaling(Processing):
 
         else:
             # check length of output signals
-            input_aligned_file = in_file.parent.parent / "20ms_aligned_files" / f"{Path(in_file.stem).stem}.wav"
+            input_aligned_file = (
+                in_file.parent.parent
+                / "20ms_aligned_files"
+                / f"{Path(in_file.stem).stem}.wav"
+            )
             input_aligned_array, _ = read(input_aligned_file)
             if (len_inp := len(input_aligned_array)) != (len_out := len(x)):
-                warn(f"For file {out_file} the length is {len_out} and does not match the (frame aligned) input length {len_inp}.")
+                warn(
+                    f"For file {out_file} the length is {len_out} and does not match the (frame aligned) input length {len_inp}."
+                )
 
             # set output values
             out_files = [out_file]
-- 
GitLab