From 81538a2cd52bf5b09504dca0c85e737612c345dc Mon Sep 17 00:00:00 2001 From: veeravt Date: Wed, 3 May 2023 16:23:11 +0200 Subject: [PATCH 1/5] Added a function to check if the audio length is a multiple of frame size. --- ivas_processing_scripts/audiotools/audio.py | 4 +- .../audiotools/audioarray.py | 44 ++++++++++++++++++- .../binaural_datasets/binaural_dataset.py | 8 ++-- .../audiotools/convert/__init__.py | 3 ++ 4 files changed, 51 insertions(+), 8 deletions(-) diff --git a/ivas_processing_scripts/audiotools/audio.py b/ivas_processing_scripts/audiotools/audio.py index f6c45fca..850c3c49 100755 --- a/ivas_processing_scripts/audiotools/audio.py +++ b/ivas_processing_scripts/audiotools/audio.py @@ -30,7 +30,7 @@ # the United Nations Convention on Contracts on the International Sales of Goods. # -import warnings +from warnings import warn from abc import ABC, abstractmethod from pathlib import Path from typing import Optional, Union @@ -274,7 +274,7 @@ class ObjectBasedAudio(Audio): obj.metadata_files.append(file_name_meta) else: raise ValueError(f"Metadata file {file_name_meta} not found.") - warnings.warn( + warn( f"No metadata files specified: The following files were found and used: \n {*obj.metadata_files,}" ) diff --git a/ivas_processing_scripts/audiotools/audioarray.py b/ivas_processing_scripts/audiotools/audioarray.py index c0909c4c..5b431fee 100755 --- a/ivas_processing_scripts/audiotools/audioarray.py +++ b/ivas_processing_scripts/audiotools/audioarray.py @@ -31,7 +31,7 @@ # import logging -import warnings +from warnings import warn from typing import Iterator, Optional, Tuple, Union import numpy as np @@ -342,7 +342,7 @@ def limiter( fr_sig[idx_min] = -32768 if limited: - warnings.warn("Limiting had to be applied") + warn("Limiting had to be applied") return x @@ -688,3 +688,43 @@ def mute_channels( """ x[:, mute] = 0 return x + + +def multiple_of_frame_size( + x: np.ndarray, + fs: int = 48000, + frame_size_in_ms: Optional[int] = 20, +) -> np.ndarray: + """ + Warn and pad audio if it isn't a multiple of frame size + + Parameters + ---------- + x: np.ndarray + Input array + fs: int + Input sampling rate in Hz; default = 48000 + frame_size_in_ms: Optional[int] + Frame size in milliseconds; default = 20 + + Returns + ------- + x: np.ndarray + Padded array + """ + + if x.ndim == 1: + n_samples_x = x.shape + n_chan_x = 1 + else: + n_samples_x, n_chan_x = x.shape + + frame_length_samples = (frame_size_in_ms / 1000) * fs + if n_samples_x % frame_length_samples != 0: + warn( + "Audio length is not a multiple of frame length (20 ms). Padding with zeros." + ) + pad_length = int(frame_length_samples - (n_samples_x % frame_length_samples)) + x = np.pad(x, ((0, int(pad_length)), (0, 0)), "constant") + + return x diff --git a/ivas_processing_scripts/audiotools/binaural_datasets/binaural_dataset.py b/ivas_processing_scripts/audiotools/binaural_datasets/binaural_dataset.py index e5d5ac95..5b95f14f 100755 --- a/ivas_processing_scripts/audiotools/binaural_datasets/binaural_dataset.py +++ b/ivas_processing_scripts/audiotools/binaural_datasets/binaural_dataset.py @@ -30,7 +30,7 @@ # the United Nations Convention on Contracts on the International Sales of Goods. # -import warnings +from warnings import warn from pathlib import Path from typing import Optional, Tuple, Union @@ -149,7 +149,7 @@ def load_ir( ) ).is_file(): dataset_suffix = "SBA3" - warnings.warn("No SBA1 dataset found -> use truncated SBA3 dataset") + warn("No SBA1 dataset found -> use truncated SBA3 dataset") elif in_fmt.endswith("2"): dataset_suffix = "SBA2" # Use truncated SBA3 dataset if no SBA1 or 2 dataset exists @@ -159,7 +159,7 @@ def load_ir( ) ).is_file(): dataset_suffix = "SBA3" - warnings.warn("No SBA2 dataset found -> use truncated SBA3 dataset") + warn("No SBA2 dataset found -> use truncated SBA3 dataset") else: dataset_suffix = "SBA3" @@ -172,7 +172,7 @@ def load_ir( latency_smp = latency_s else: latency_smp = int(np.min(np.argmax(np.sum(np.abs(IR), axis=1), axis=0))) - warnings.warn( + warn( f"No latency of HRTF dataset specified in {path_dataset} file -> computed latency: {latency_smp} sample(s)" ) diff --git a/ivas_processing_scripts/audiotools/convert/__init__.py b/ivas_processing_scripts/audiotools/convert/__init__.py index 024faa47..0a35a92b 100755 --- a/ivas_processing_scripts/audiotools/convert/__init__.py +++ b/ivas_processing_scripts/audiotools/convert/__init__.py @@ -212,6 +212,9 @@ def process_audio( if fs is None: fs = x.fs + """making sure length is a multiple of the frame size""" + x.audio = audioarray.multiple_of_frame_size(x.audio, fs) + """delay audio""" if delay is not None: if logger: -- GitLab From ce164d8ae5ad05a3509a655b2a36b176c7536f20 Mon Sep 17 00:00:00 2001 From: veeravt Date: Thu, 4 May 2023 16:51:37 +0200 Subject: [PATCH 2/5] Applied format patch. --- ivas_processing_scripts/audiotools/audio.py | 2 +- ivas_processing_scripts/audiotools/audioarray.py | 2 +- .../audiotools/binaural_datasets/binaural_dataset.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ivas_processing_scripts/audiotools/audio.py b/ivas_processing_scripts/audiotools/audio.py index 850c3c49..3bc10c53 100755 --- a/ivas_processing_scripts/audiotools/audio.py +++ b/ivas_processing_scripts/audiotools/audio.py @@ -30,10 +30,10 @@ # the United Nations Convention on Contracts on the International Sales of Goods. # -from warnings import warn from abc import ABC, abstractmethod from pathlib import Path from typing import Optional, Union +from warnings import warn import numpy as np diff --git a/ivas_processing_scripts/audiotools/audioarray.py b/ivas_processing_scripts/audiotools/audioarray.py index 5b431fee..999381fe 100755 --- a/ivas_processing_scripts/audiotools/audioarray.py +++ b/ivas_processing_scripts/audiotools/audioarray.py @@ -31,8 +31,8 @@ # import logging -from warnings import warn from typing import Iterator, Optional, Tuple, Union +from warnings import warn import numpy as np import scipy.signal as sig diff --git a/ivas_processing_scripts/audiotools/binaural_datasets/binaural_dataset.py b/ivas_processing_scripts/audiotools/binaural_datasets/binaural_dataset.py index 5b95f14f..37008adf 100755 --- a/ivas_processing_scripts/audiotools/binaural_datasets/binaural_dataset.py +++ b/ivas_processing_scripts/audiotools/binaural_datasets/binaural_dataset.py @@ -30,9 +30,9 @@ # the United Nations Convention on Contracts on the International Sales of Goods. # -from warnings import warn from pathlib import Path from typing import Optional, Tuple, Union +from warnings import warn import numpy as np from scipy.io import loadmat -- GitLab From 8184202801f4dd8c8385b75b8026f9d318267cd0 Mon Sep 17 00:00:00 2001 From: veeravt Date: Mon, 8 May 2023 11:22:12 +0200 Subject: [PATCH 3/5] Modified the function and it's placement in the scripts. --- ivas_processing_scripts/__init__.py | 4 ++ .../audiotools/audioarray.py | 40 --------------- .../audiotools/convert/__init__.py | 3 -- .../processing/processing.py | 49 ++++++++++++++++++- 4 files changed, 52 insertions(+), 44 deletions(-) diff --git a/ivas_processing_scripts/__init__.py b/ivas_processing_scripts/__init__.py index a16309e0..6c76d636 100755 --- a/ivas_processing_scripts/__init__.py +++ b/ivas_processing_scripts/__init__.py @@ -43,6 +43,7 @@ from ivas_processing_scripts.constants import ( ) from ivas_processing_scripts.processing import chains, config from ivas_processing_scripts.processing.processing import ( + multiple_of_frame_size, preprocess, preprocess_2, preprocess_background_noise, @@ -126,6 +127,9 @@ def main(args): cfg.metadata_path = metadata + # checking if audio is a multiple of frame size + multiple_of_frame_size(cfg.items_list, cfg.input["fmt"]) + # run preprocessing only once if hasattr(cfg, "preprocessing"): # save process info for background noise diff --git a/ivas_processing_scripts/audiotools/audioarray.py b/ivas_processing_scripts/audiotools/audioarray.py index 999381fe..2c770ce4 100755 --- a/ivas_processing_scripts/audiotools/audioarray.py +++ b/ivas_processing_scripts/audiotools/audioarray.py @@ -688,43 +688,3 @@ def mute_channels( """ x[:, mute] = 0 return x - - -def multiple_of_frame_size( - x: np.ndarray, - fs: int = 48000, - frame_size_in_ms: Optional[int] = 20, -) -> np.ndarray: - """ - Warn and pad audio if it isn't a multiple of frame size - - Parameters - ---------- - x: np.ndarray - Input array - fs: int - Input sampling rate in Hz; default = 48000 - frame_size_in_ms: Optional[int] - Frame size in milliseconds; default = 20 - - Returns - ------- - x: np.ndarray - Padded array - """ - - if x.ndim == 1: - n_samples_x = x.shape - n_chan_x = 1 - else: - n_samples_x, n_chan_x = x.shape - - frame_length_samples = (frame_size_in_ms / 1000) * fs - if n_samples_x % frame_length_samples != 0: - warn( - "Audio length is not a multiple of frame length (20 ms). Padding with zeros." - ) - pad_length = int(frame_length_samples - (n_samples_x % frame_length_samples)) - x = np.pad(x, ((0, int(pad_length)), (0, 0)), "constant") - - return x diff --git a/ivas_processing_scripts/audiotools/convert/__init__.py b/ivas_processing_scripts/audiotools/convert/__init__.py index 0a35a92b..024faa47 100755 --- a/ivas_processing_scripts/audiotools/convert/__init__.py +++ b/ivas_processing_scripts/audiotools/convert/__init__.py @@ -212,9 +212,6 @@ def process_audio( if fs is None: fs = x.fs - """making sure length is a multiple of the frame size""" - x.audio = audioarray.multiple_of_frame_size(x.audio, fs) - """delay audio""" if delay is not None: if logger: diff --git a/ivas_processing_scripts/processing/processing.py b/ivas_processing_scripts/processing/processing.py index 0d2097fa..3121fb03 100755 --- a/ivas_processing_scripts/processing/processing.py +++ b/ivas_processing_scripts/processing/processing.py @@ -35,7 +35,7 @@ from abc import ABC, abstractmethod from itertools import repeat from pathlib import Path from shutil import copyfile -from typing import Iterable, Union +from typing import Iterable, Optional, Union from warnings import warn import numpy as np @@ -74,6 +74,21 @@ class Processing(ABC): def reorder_items_list(items_list: list, concatenation_order: list) -> list: + """ + Reorder input items list based on conactenation order + + Parameters + ---------- + items_list: list + List of input items + concatenation_order: list + Concatenation order + + Returns + ------- + ordered_full_files: list + Re-ordered list of input items + """ name_to_full = {Path(full_file).name: full_file for full_file in items_list} ordered_full_files = [ name_to_full[name] for name in concatenation_order if name in name_to_full @@ -487,3 +502,35 @@ def preprocess_background_noise(cfg): ] = output_audio return + + +def multiple_of_frame_size( + items_list: list, + input_format: str, + frame_size_in_ms: Optional[int] = 20, +) -> np.ndarray: + """ + Warn and pad audio if it isn't a multiple of frame size + + Parameters + ---------- + items_list: list + List of input items + frame_size_in_ms: Optional[int] + Frame size in milliseconds; default = 20 + """ + for item in items_list: + # read file + x, fs = read(item) + # warning if audio length not a multiple of frame lenght + n_samples_x, n_chan_x = x.shape + frame_length_samples = (frame_size_in_ms / 1000) * fs + if n_samples_x % frame_length_samples != 0: + if input_format.startswith("ISM") or input_format.startswith("MASA"): + raise ValueError( + f"The length ({n_samples_x} samples) of audio ({item.name}) is not a multiple of frame length (20 ms)." + ) + else: + warn( + f"The length ({n_samples_x} samples) of audio ({item.name}) is not a multiple of frame length (20 ms)." + ) -- GitLab From 33c84ad6c00768169bf8ed79b0f78e7601fd8a99 Mon Sep 17 00:00:00 2001 From: veeravt Date: Mon, 8 May 2023 16:59:33 +0200 Subject: [PATCH 4/5] Added checks for sampling rate and number of channels. --- ivas_processing_scripts/__init__.py | 2 +- .../processing/processing.py | 34 ++++++++++++++----- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/ivas_processing_scripts/__init__.py b/ivas_processing_scripts/__init__.py index 6c76d636..16ac8b93 100755 --- a/ivas_processing_scripts/__init__.py +++ b/ivas_processing_scripts/__init__.py @@ -128,7 +128,7 @@ def main(args): cfg.metadata_path = metadata # checking if audio is a multiple of frame size - multiple_of_frame_size(cfg.items_list, cfg.input["fmt"]) + multiple_of_frame_size(cfg) # run preprocessing only once if hasattr(cfg, "preprocessing"): diff --git a/ivas_processing_scripts/processing/processing.py b/ivas_processing_scripts/processing/processing.py index 3121fb03..d72e97b3 100755 --- a/ivas_processing_scripts/processing/processing.py +++ b/ivas_processing_scripts/processing/processing.py @@ -505,25 +505,41 @@ def preprocess_background_noise(cfg): def multiple_of_frame_size( - items_list: list, - input_format: str, + cfg: TestConfig, frame_size_in_ms: Optional[int] = 20, ) -> np.ndarray: """ - Warn and pad audio if it isn't a multiple of frame size + Warn/Exit if audio if it isn't a multiple of frame size Parameters ---------- - items_list: list - List of input items + cfg: TestConfig + Input configuration frame_size_in_ms: Optional[int] Frame size in milliseconds; default = 20 """ - for item in items_list: - # read file - x, fs = read(item) - # warning if audio length not a multiple of frame lenght + # get the number of channels from the input format + input_format = cfg.input["fmt"] + num_channels = audio.fromtype(input_format).num_channels + for item in cfg.items_list: + # read the audio file + if "fs" in cfg.input: + sampling_rate = cfg.input["fs"] + elif item.suffix == ".wav": + sampling_rate = None + elif item.suffix == ".pcm" or item.suffix == ".raw": + raise ValueError("Sampling rate must be specified for headerless files!") + x, fs = read(item, nchannels=num_channels, fs=sampling_rate) n_samples_x, n_chan_x = x.shape + if fs != sampling_rate: + raise ValueError( + f"Sampling rate of the file ({fs}) does NOT match with that ({sampling_rate}) specified in the config yaml." + ) + if n_chan_x != num_channels: + raise ValueError( + f"The number of channels in the file ({n_chan_x}) do NOT match with those of format ({num_channels}, {input_format}) specified in the config yaml." + ) + # warn if audio length not a multiple of frame length frame_length_samples = (frame_size_in_ms / 1000) * fs if n_samples_x % frame_length_samples != 0: if input_format.startswith("ISM") or input_format.startswith("MASA"): -- GitLab From 50ef8834edb013357db7ddc785321c39ec928111 Mon Sep 17 00:00:00 2001 From: veeravt Date: Tue, 9 May 2023 12:10:10 +0200 Subject: [PATCH 5/5] Attempt 1 to fix the broken pipeline #16138 --- ivas_processing_scripts/processing/processing.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ivas_processing_scripts/processing/processing.py b/ivas_processing_scripts/processing/processing.py index d72e97b3..50538f59 100755 --- a/ivas_processing_scripts/processing/processing.py +++ b/ivas_processing_scripts/processing/processing.py @@ -525,11 +525,12 @@ def multiple_of_frame_size( # read the audio file if "fs" in cfg.input: sampling_rate = cfg.input["fs"] - elif item.suffix == ".wav": - sampling_rate = None + x, fs = read(item, nchannels=num_channels, fs=sampling_rate) elif item.suffix == ".pcm" or item.suffix == ".raw": raise ValueError("Sampling rate must be specified for headerless files!") - x, fs = read(item, nchannels=num_channels, fs=sampling_rate) + elif item.suffix == ".wav": + x, fs = read(item) + sampling_rate = fs n_samples_x, n_chan_x = x.shape if fs != sampling_rate: raise ValueError( -- GitLab