From 35e1cc0036173cd8b447a4da8ae0341d49542288 Mon Sep 17 00:00:00 2001 From: veeravt Date: Thu, 25 May 2023 17:17:21 +0200 Subject: [PATCH 1/4] Extended the pre-existing function to pad and align the audio to 20ms. --- ivas_processing_scripts/__init__.py | 9 ++- .../processing/processing.py | 75 ++++++++++++++++++- 2 files changed, 77 insertions(+), 7 deletions(-) diff --git a/ivas_processing_scripts/__init__.py b/ivas_processing_scripts/__init__.py index 1a3fa594..d719a156 100755 --- a/ivas_processing_scripts/__init__.py +++ b/ivas_processing_scripts/__init__.py @@ -43,6 +43,7 @@ from ivas_processing_scripts.constants import ( ) from ivas_processing_scripts.processing import chains, config from ivas_processing_scripts.processing.processing import ( + compare_wav_lengths, multiple_of_frame_size, preprocess, preprocess_2, @@ -96,6 +97,9 @@ def main(args): # set up logging logger = logging_init(args, cfg) + # checking if audio is a multiple of frame size + multiple_of_frame_size(cfg, logger) + # Re-ordering items based on concatenation order if hasattr(cfg, "preprocessing_2"): if ( @@ -128,9 +132,6 @@ def main(args): cfg.metadata_path = metadata - # checking if audio is a multiple of frame size - multiple_of_frame_size(cfg) - # run preprocessing only once if hasattr(cfg, "preprocessing"): # save process info for background noise @@ -180,6 +181,8 @@ def main(args): if cfg.condition_in_output_filename: rename_generated_conditions(cfg.output_path) + compare_wav_lengths(cfg.input_path, cfg.output_path, logger) + # copy configuration to output directory with open(cfg.output_path.joinpath(f"{cfg.name}.yml"), "w") as f: yaml.safe_dump(cfg._yaml_dump, f) diff --git a/ivas_processing_scripts/processing/processing.py b/ivas_processing_scripts/processing/processing.py index cfc5a381..dfb65e3d 100755 --- a/ivas_processing_scripts/processing/processing.py +++ b/ivas_processing_scripts/processing/processing.py @@ -319,6 +319,9 @@ def preprocess_2(cfg, logger): cfg.tmp_dirs = cfg.tmp_dirs[1:] cfg.out_dirs = cfg.out_dirs[1:] + # Copy the conactenated file to the 20ms_aligned_files folder + copyfile(cfg.concat_file, cfg.input_path / cfg.concat_file.name) + return @@ -519,10 +522,13 @@ def preprocess_background_noise(cfg): def multiple_of_frame_size( cfg: TestConfig, + logger: logging.Logger, frame_size_in_ms: Optional[int] = 20, ) -> np.ndarray: """ - Warn/Exit if audio if it isn't a multiple of frame size + This function checks if the list of multi channel audio files is a multiple of frame size. + If the file isn't a multiple then the function pads it to the next integer of frame size and writes the file to an output directory. + It also copies the already aligned files to the output directory. Parameters ---------- @@ -534,7 +540,14 @@ def multiple_of_frame_size( # get the number of channels from the input format input_format = cfg.input["fmt"] num_channels = audio.fromtype(input_format).num_channels - for item in cfg.items_list: + + # Create output directory if it doesn't exist + output_dir = cfg.output_path / "20ms_aligned_files" + output_dir.mkdir(exist_ok=True) + + # padded_audio_files_list = [] + + for i, item in enumerate(cfg.items_list): # read the audio file if "fs" in cfg.input: sampling_rate = cfg.input["fs"] @@ -555,15 +568,42 @@ def multiple_of_frame_size( ) # warn if audio length not a multiple of frame length frame_length_samples = (frame_size_in_ms / 1000) * fs - if n_samples_x % frame_length_samples != 0: + remainder = n_samples_x % frame_length_samples + if remainder != 0: + # Calculate number of samples needed for padding + padding_samples = int(frame_length_samples - remainder) + if input_format.startswith("ISM") or input_format.startswith("MASA"): raise ValueError( f"The length ({n_samples_x} samples) of audio ({item.name}) is not a multiple of frame length (20 ms)." ) else: warn( - f"The length ({n_samples_x} samples) of audio ({item.name}) is not a multiple of frame length (20 ms)." + f"The length ({n_samples_x} samples) of audio ({item.name}) is not a multiple of frame length (20 ms). Padding to the nearest integer multiple." + ) + logger.info( + f"The length ({n_samples_x} samples) of audio ({item.name}) is not a multiple of frame length (20 ms). Padding to the nearest integer multiple." ) + # Create and appending zeros + padding_array = np.zeros((padding_samples, n_chan_x)) + padded_data = np.vstack((x, padding_array)) + # Write padded data to output directory + write(output_dir / item.name, padded_data, fs) + else: + copyfile(item, output_dir / item.name) + + # Update audio file path in list + cfg.items_list[i] = output_dir / item.name + + # Check if all files are present in output directory + all_files_present = all( + [(output_dir / item.name).exists() for audio_file in cfg.items_list] + ) + if not all_files_present: + raise Exception("Not all files are present in the output directory") + + # Make the output path as the new input path + cfg.input_path = output_dir def rename_generated_conditions(output_path: Path): @@ -583,3 +623,30 @@ def rename_generated_conditions(output_path: Path): for file_path in subdirectory.iterdir(): new_filename = f"{file_path.stem}.{subdirectory.name}{file_path.suffix}" file_path.rename(subdirectory / new_filename) + + +def compare_wav_lengths(input_path: Path, output_path: Path, logger: logging.Logger): + """ + The function compares the number of samples of the files present in the input directory + to the corresponding output files in the subdirectories which start with "c" followed by 2 digits. + + Parameters + ---------- + input_path: Path + Path to input directory + output_path: Path + Path to output directory + """ + for subdir in output_path.iterdir(): + if subdir.is_dir() and subdir.name.startswith("c"): + for file in subdir.glob("*.wav"): + input_file = input_path / file.name + output_file = file + input_array, input_fs = read(str(input_file)) + output_array, output_fs = read(str(output_file)) + input_length, input_channels = input_array.shape + output_length, output_channles = output_array.shape + if input_length != output_length: + logger.info( + f"The {input_file.name} has {input_length} samples and the output condition {subdir.name} {output_file.name} has {output_length} samples and the difference between the two is {input_length - output_length} samples.\n" + ) -- GitLab From 369dba9325f5ef91347950be7ca482ee2f06d605 Mon Sep 17 00:00:00 2001 From: veeravt Date: Fri, 26 May 2023 15:03:43 +0200 Subject: [PATCH 2/4] Updating the metadata paths and copying the metadata to the new folder. --- ivas_processing_scripts/__init__.py | 8 ++++---- ivas_processing_scripts/processing/processing.py | 8 ++++++++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/ivas_processing_scripts/__init__.py b/ivas_processing_scripts/__init__.py index d719a156..b1b31a5a 100755 --- a/ivas_processing_scripts/__init__.py +++ b/ivas_processing_scripts/__init__.py @@ -97,9 +97,6 @@ def main(args): # set up logging logger = logging_init(args, cfg) - # checking if audio is a multiple of frame size - multiple_of_frame_size(cfg, logger) - # Re-ordering items based on concatenation order if hasattr(cfg, "preprocessing_2"): if ( @@ -132,6 +129,9 @@ def main(args): cfg.metadata_path = metadata + # checking if audio is a multiple of frame size + multiple_of_frame_size(cfg, logger) + # run preprocessing only once if hasattr(cfg, "preprocessing"): # save process info for background noise @@ -181,7 +181,7 @@ def main(args): if cfg.condition_in_output_filename: rename_generated_conditions(cfg.output_path) - compare_wav_lengths(cfg.input_path, cfg.output_path, logger) + # compare_wav_lengths(cfg.input_path, cfg.output_path, logger) # copy configuration to output directory with open(cfg.output_path.joinpath(f"{cfg.name}.yml"), "w") as f: diff --git a/ivas_processing_scripts/processing/processing.py b/ivas_processing_scripts/processing/processing.py index dfb65e3d..fbf512a2 100755 --- a/ivas_processing_scripts/processing/processing.py +++ b/ivas_processing_scripts/processing/processing.py @@ -595,6 +595,14 @@ def multiple_of_frame_size( # Update audio file path in list cfg.items_list[i] = output_dir / item.name + # Copy metadata and update path + if input_format.startswith("ISM") or input_format.startswith("MASA"): + for j in range(int(cfg.input["fmt"][3])): + copyfile( + cfg.metadata_path[i][j], output_dir / cfg.metadata_path[i][j].name + ) + cfg.metadata_path[i][j] = output_dir / cfg.metadata_path[i][j].name + # Check if all files are present in output directory all_files_present = all( [(output_dir / item.name).exists() for audio_file in cfg.items_list] -- GitLab From fa793acf01c7a3d22041605e0dcf8090dd30a872 Mon Sep 17 00:00:00 2001 From: Treffehn Date: Mon, 5 Jun 2023 13:48:43 +0200 Subject: [PATCH 3/4] moved test for item length to processing splitting scaling --- ivas_processing_scripts/__init__.py | 5 +-- .../processing/processing.py | 39 ++++--------------- .../processing_splitting_scaling.py | 8 ++++ 3 files changed, 17 insertions(+), 35 deletions(-) diff --git a/ivas_processing_scripts/__init__.py b/ivas_processing_scripts/__init__.py index 2656b5d1..51ba68a2 100755 --- a/ivas_processing_scripts/__init__.py +++ b/ivas_processing_scripts/__init__.py @@ -43,7 +43,6 @@ from ivas_processing_scripts.constants import ( ) from ivas_processing_scripts.processing import chains, config from ivas_processing_scripts.processing.processing import ( - compare_wav_lengths, multiple_of_frame_size, preprocess, preprocess_2, @@ -131,7 +130,7 @@ def main(args): cfg.metadata_path = metadata # checking if audio is a multiple of frame size - multiple_of_frame_size(cfg, logger) + multiple_of_frame_size(cfg) # run preprocessing only once if hasattr(cfg, "preprocessing"): @@ -176,8 +175,6 @@ def main(args): "mp" if cfg.multiprocessing else None, ) - compare_wav_lengths(cfg.input_path, cfg.output_path, logger) - # copy configuration to output directory with open(cfg.output_path.joinpath(f"{cfg.name}.yml"), "w") as f: yaml.safe_dump(cfg._yaml_dump, f) diff --git a/ivas_processing_scripts/processing/processing.py b/ivas_processing_scripts/processing/processing.py index 07437314..092ecb2d 100755 --- a/ivas_processing_scripts/processing/processing.py +++ b/ivas_processing_scripts/processing/processing.py @@ -177,6 +177,12 @@ def concat_teardown(x, splits, out_fmt, fs, in_fs, meta, logger: logging.Logger) new_splits.append(int(float(split_i) * relative_fs_change)) splits = new_splits + # check if last split ending coincides with last sample of signal + if splits[-1] > len(x): + raise ValueError(f"Last split index {splits[-1]} is larger than the signal length {len(x)}") + elif splits[-1] < len(x): + warn(f"Last split index {splits[-1]} is smaller that the signal length {len(x)}") + split_old = 0 split_signals = [] split_meta = [] @@ -448,7 +454,6 @@ def preprocess_background_noise(cfg): def multiple_of_frame_size( cfg: TestConfig, - logger: logging.Logger, frame_size_in_ms: Optional[int] = 20, ) -> np.ndarray: """ @@ -515,9 +520,8 @@ def multiple_of_frame_size( f"The length ({n_samples_x} samples) of audio ({item.name}) is not a multiple of frame length (20 ms). Padding to the nearest integer multiple." ) - # Create and appending zeros - padding_array = np.zeros((padding_samples, n_chan_x)) - padded_data = np.vstack((x, padding_array)) + # Create and append zeros + padded_data = trim(x, sampling_rate, (0, -padding_samples), pad_noise=True, samples=True) # Write padded data to output directory write(output_dir / item.name, padded_data, fs) else: @@ -545,30 +549,3 @@ def multiple_of_frame_size( # Make the output path as the new input path cfg.input_path = output_dir - - -def compare_wav_lengths(input_path: Path, output_path: Path, logger: logging.Logger): - """ - The function compares the number of samples of the files present in the input directory - to the corresponding output files in the subdirectories which start with "c" followed by 2 digits. - - Parameters - ---------- - input_path: Path - Path to input directory - output_path: Path - Path to output directory - """ - for subdir in output_path.iterdir(): - if subdir.is_dir() and subdir.name.startswith("c"): - for file in subdir.glob("*.wav"): - input_file = input_path / file.name - output_file = file - input_array, input_fs = read(str(input_file)) - output_array, output_fs = read(str(output_file)) - input_length, input_channels = input_array.shape - output_length, output_channles = output_array.shape - if input_length != output_length: - logger.info( - f"The {input_file.name} has {input_length} samples and the output condition {subdir.name} {output_file.name} has {output_length} samples and the difference between the two is {input_length - output_length} samples.\n" - ) diff --git a/ivas_processing_scripts/processing/processing_splitting_scaling.py b/ivas_processing_scripts/processing/processing_splitting_scaling.py index af359b49..4aa54c2d 100644 --- a/ivas_processing_scripts/processing/processing_splitting_scaling.py +++ b/ivas_processing_scripts/processing/processing_splitting_scaling.py @@ -3,6 +3,7 @@ import logging import re from itertools import repeat from pathlib import Path +from warnings import warn import numpy as np @@ -239,6 +240,13 @@ class Processing_splitting_scaling(Processing): out_meta = repeat(None) else: + # check length of output signals + input_aligned_file = in_file.parent.parent / "20ms_aligned_files" / f"{Path(in_file.stem).stem}.wav" + input_aligned_array, _ = read(input_aligned_file) + if (len_inp := len(input_aligned_array)) != (len_out := len(x)): + warn(f"For file {out_file} the length is {len_out} and does not match the (frame aligned) input length {len_inp}.") + + # set output values out_files = [out_file] file_splits = [x] if isinstance(audio.fromtype(self.out_fmt), audio.ObjectBasedAudio): -- GitLab From 8e03ba519660a50638ce2e3c684ce363169785af Mon Sep 17 00:00:00 2001 From: Treffehn Date: Mon, 5 Jun 2023 14:10:40 +0200 Subject: [PATCH 4/4] formatting --- ivas_processing_scripts/processing/processing.py | 12 +++++++++--- .../processing/processing_splitting_scaling.py | 10 ++++++++-- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/ivas_processing_scripts/processing/processing.py b/ivas_processing_scripts/processing/processing.py index 092ecb2d..093fa91b 100755 --- a/ivas_processing_scripts/processing/processing.py +++ b/ivas_processing_scripts/processing/processing.py @@ -179,9 +179,13 @@ def concat_teardown(x, splits, out_fmt, fs, in_fs, meta, logger: logging.Logger) # check if last split ending coincides with last sample of signal if splits[-1] > len(x): - raise ValueError(f"Last split index {splits[-1]} is larger than the signal length {len(x)}") + raise ValueError( + f"Last split index {splits[-1]} is larger than the signal length {len(x)}" + ) elif splits[-1] < len(x): - warn(f"Last split index {splits[-1]} is smaller that the signal length {len(x)}") + warn( + f"Last split index {splits[-1]} is smaller that the signal length {len(x)}" + ) split_old = 0 split_signals = [] @@ -477,7 +481,9 @@ def multiple_of_frame_size( try: output_dir.mkdir(exist_ok=False) except FileExistsError: - raise ValueError("Folder for 20ms aligned files already exists. Please move or delete folder") + raise ValueError( + "Folder for 20ms aligned files already exists. Please move or delete folder" + ) # iterate over input files for i, item in enumerate(cfg.items_list): diff --git a/ivas_processing_scripts/processing/processing_splitting_scaling.py b/ivas_processing_scripts/processing/processing_splitting_scaling.py index 4aa54c2d..b49454ff 100644 --- a/ivas_processing_scripts/processing/processing_splitting_scaling.py +++ b/ivas_processing_scripts/processing/processing_splitting_scaling.py @@ -241,10 +241,16 @@ class Processing_splitting_scaling(Processing): else: # check length of output signals - input_aligned_file = in_file.parent.parent / "20ms_aligned_files" / f"{Path(in_file.stem).stem}.wav" + input_aligned_file = ( + in_file.parent.parent + / "20ms_aligned_files" + / f"{Path(in_file.stem).stem}.wav" + ) input_aligned_array, _ = read(input_aligned_file) if (len_inp := len(input_aligned_array)) != (len_out := len(x)): - warn(f"For file {out_file} the length is {len_out} and does not match the (frame aligned) input length {len_inp}.") + warn( + f"For file {out_file} the length is {len_out} and does not match the (frame aligned) input length {len_inp}." + ) # set output values out_files = [out_file] -- GitLab