diff --git a/examples/TEMPLATE.yml b/examples/TEMPLATE.yml index 0dcd1ae86990535ddef5ec071a562a1922652eda..89415ceeb261e2bf0b9def3a20c35d626778f6c0 100755 --- a/examples/TEMPLATE.yml +++ b/examples/TEMPLATE.yml @@ -65,6 +65,13 @@ input: fmt: "HOA3" ### Input sampling rate in Hz needed for headerless audio files; default = 48000 # fs: 32000 + ### Enable check for input files being aligned to a integer multiple of a given length in ms. + ### If a file is not aligned, a warning will be issued. If the input format has metadata or force is true, an error is raised instead. + # aligned_to: + ### alignment length in ms, is needed if aligned_to is used + # len: 20 + ### default: false + # force: true ################################################ ### Pre-processing on individual items @@ -112,6 +119,10 @@ input: # preamble: 10000 ### Flag wheter to use noise (amplitude +-4) for the preamble or silence; default = false (silence) # preamble_noise: true + ### Specify postamble duration in ms. Postamble is added after concatenation and possible signal repetition. defaut = 0 + # postamble: 20 + ### Flag wheter to use noise (amplitude +-4) for the postamble or silence; default = false (silence) + # postamble_noise: true ### Additive background noise # background_noise: ### SNR for background noise in dB; REQUIRED for prerecorded background noise and ignored for low level noise diff --git a/experiments/selection/BS1534-1a/config/BS1534-1a.yml b/experiments/selection/BS1534-1a/config/BS1534-1a.yml index 4ed1bf738ec493de1a4a0b7378fd894a23ddda0c..ec8a6aab9b3b68d7e32ccfe071a66f6687a89d2e 100644 --- a/experiments/selection/BS1534-1a/config/BS1534-1a.yml +++ b/experiments/selection/BS1534-1a/config/BS1534-1a.yml @@ -18,6 +18,8 @@ condition_in_output_filename: true input: fmt: "STEREO" fs: 48000 + aligned_to: + len: 20 ################################################ ### Pre-processing on individual items @@ -33,6 +35,8 @@ preprocessing: preprocessing_2: concatenate_input: false preamble_noise: false + postamble: 20 + postamble_noise: true repeat_signal: true ################################################# diff --git a/experiments/selection/BS1534-1b/config/BS1534-1b.yml b/experiments/selection/BS1534-1b/config/BS1534-1b.yml index 742b61e481434cc2f5fad1cbf47eb68a77c8d0c7..5c06316a6356b5d77eef011e1bdeebaae51dc427 100644 --- a/experiments/selection/BS1534-1b/config/BS1534-1b.yml +++ b/experiments/selection/BS1534-1b/config/BS1534-1b.yml @@ -18,6 +18,8 @@ condition_in_output_filename: true input: fmt: "STEREO" fs: 48000 + aligned_to: + len: 20 ################################################ ### Pre-processing on individual items @@ -33,6 +35,8 @@ preprocessing: preprocessing_2: concatenate_input: false preamble_noise: false + postamble: 20 + postamble_noise: true repeat_signal: true ################################################# diff --git a/experiments/selection/BS1534-2a/config/BS1534-2a.yml b/experiments/selection/BS1534-2a/config/BS1534-2a.yml index 543adb698e9a44d80fc8611d9ca74f998e80d7e4..66e9a41c24bd940f62783928c67a9d75dbeda01f 100644 --- a/experiments/selection/BS1534-2a/config/BS1534-2a.yml +++ b/experiments/selection/BS1534-2a/config/BS1534-2a.yml @@ -18,6 +18,8 @@ condition_in_output_filename: true input: fmt: "5_1" fs: 48000 + aligned_to: + len: 20 ################################################ ### Pre-processing on individual items @@ -33,6 +35,8 @@ preprocessing: preprocessing_2: concatenate_input: false preamble_noise: false + postamble: 20 + postamble_noise: true repeat_signal: true ################################################# diff --git a/experiments/selection/BS1534-2b/config/BS1534-2b.yml b/experiments/selection/BS1534-2b/config/BS1534-2b.yml index fcaf4dfacfe5e93511e16eb9fc45e3acda971dfa..1728956963ee383ade685be1ce5b78a2c50f85ff 100644 --- a/experiments/selection/BS1534-2b/config/BS1534-2b.yml +++ b/experiments/selection/BS1534-2b/config/BS1534-2b.yml @@ -18,6 +18,8 @@ condition_in_output_filename: true input: fmt: "5_1" fs: 48000 + aligned_to: + len: 20 ################################################ ### Pre-processing on individual items @@ -33,6 +35,8 @@ preprocessing: preprocessing_2: concatenate_input: false preamble_noise: false + postamble: 20 + postamble_noise: true repeat_signal: true ################################################# diff --git a/experiments/selection/BS1534-3a/config/BS1534-3a.yml b/experiments/selection/BS1534-3a/config/BS1534-3a.yml index dfaf1808c78d1b047995c23735f6026a92c9714b..c7e384d7f43f259d4e88a89e54f0f729c7aa253c 100644 --- a/experiments/selection/BS1534-3a/config/BS1534-3a.yml +++ b/experiments/selection/BS1534-3a/config/BS1534-3a.yml @@ -18,6 +18,8 @@ condition_in_output_filename: true input: fmt: "7_1_4" fs: 48000 + aligned_to: + len: 20 ################################################ ### Pre-processing on individual items @@ -33,6 +35,8 @@ preprocessing: preprocessing_2: concatenate_input: false preamble_noise: false + postamble: 20 + postamble_noise: true repeat_signal: true ################################################# diff --git a/experiments/selection/BS1534-3b/config/BS1534-3b.yml b/experiments/selection/BS1534-3b/config/BS1534-3b.yml index 7c15efd7260e9b91b69e0af842dccce2a4a516b3..889c06d64dc07abe5b35c0520b507ac81b156741 100644 --- a/experiments/selection/BS1534-3b/config/BS1534-3b.yml +++ b/experiments/selection/BS1534-3b/config/BS1534-3b.yml @@ -18,6 +18,8 @@ condition_in_output_filename: true input: fmt: "7_1_4" fs: 48000 + aligned_to: + len: 20 ################################################ ### Pre-processing on individual items @@ -33,6 +35,8 @@ preprocessing: preprocessing_2: concatenate_input: false preamble_noise: false + postamble: 20 + postamble_noise: true repeat_signal: true ################################################# diff --git a/experiments/selection/BS1534-4a/config/BS1534-4a.yml b/experiments/selection/BS1534-4a/config/BS1534-4a.yml index 917482b6ee3fe0ce814daba5f457fcb5ebc9e125..a1901c1d8443a388451b05481bc9ce168c69d943 100644 --- a/experiments/selection/BS1534-4a/config/BS1534-4a.yml +++ b/experiments/selection/BS1534-4a/config/BS1534-4a.yml @@ -18,6 +18,8 @@ condition_in_output_filename: true input: fmt: "HOA3" fs: 48000 + aligned_to: + len: 20 ################################################ ### Pre-processing on individual items @@ -33,6 +35,8 @@ preprocessing: preprocessing_2: concatenate_input: false preamble_noise: false + postamble: 20 + postamble_noise: true repeat_signal: true ################################################# diff --git a/experiments/selection/BS1534-4b/config/BS1534-4b.yml b/experiments/selection/BS1534-4b/config/BS1534-4b.yml index a549c07dcb83b4b7054ba034a24494b471cce1f9..619fa75b4c1bf83b99f6fd177ba3f6e0cf66e2e9 100644 --- a/experiments/selection/BS1534-4b/config/BS1534-4b.yml +++ b/experiments/selection/BS1534-4b/config/BS1534-4b.yml @@ -18,6 +18,8 @@ condition_in_output_filename: true input: fmt: "HOA3" fs: 48000 + aligned_to: + len: 20 ################################################ ### Pre-processing on individual items @@ -33,6 +35,8 @@ preprocessing: preprocessing_2: concatenate_input: false preamble_noise: false + postamble: 20 + postamble_noise: true repeat_signal: true ################################################# diff --git a/experiments/selection/BS1534-5a/config/BS1534-5a.yml b/experiments/selection/BS1534-5a/config/BS1534-5a.yml index 8ac50c057d45ebc0146a416612d5d57a2f9c3062..5186df641d2878fb29cb9933e6887e1e74d408c5 100644 --- a/experiments/selection/BS1534-5a/config/BS1534-5a.yml +++ b/experiments/selection/BS1534-5a/config/BS1534-5a.yml @@ -18,6 +18,8 @@ condition_in_output_filename: true input: fmt: "HOA3" fs: 48000 + aligned_to: + len: 20 ################################################ ### Pre-processing on individual items @@ -33,6 +35,8 @@ preprocessing: preprocessing_2: concatenate_input: false preamble_noise: false + postamble: 20 + postamble_noise: true repeat_signal: true ################################################# diff --git a/experiments/selection/BS1534-5b/config/BS1534-5b.yml b/experiments/selection/BS1534-5b/config/BS1534-5b.yml index 50ca255304d181812bfd22d56aae9dce51afe33e..49eaa813f0c754a4f48f0cedfc2a0ad8811bf70d 100644 --- a/experiments/selection/BS1534-5b/config/BS1534-5b.yml +++ b/experiments/selection/BS1534-5b/config/BS1534-5b.yml @@ -18,6 +18,8 @@ condition_in_output_filename: true input: fmt: "HOA3" fs: 48000 + aligned_to: + len: 20 ################################################ ### Pre-processing on individual items @@ -33,6 +35,8 @@ preprocessing: preprocessing_2: concatenate_input: false preamble_noise: false + postamble: 20 + postamble_noise: true repeat_signal: true ################################################# diff --git a/experiments/selection/BS1534-6a/config/BS1534-6a.yml b/experiments/selection/BS1534-6a/config/BS1534-6a.yml index 8d69f1f7cd62b4eeff0e4015900a2950d6cb88c1..d3363f81970cf52a270d27842480eea43fb6b3c4 100644 --- a/experiments/selection/BS1534-6a/config/BS1534-6a.yml +++ b/experiments/selection/BS1534-6a/config/BS1534-6a.yml @@ -18,6 +18,9 @@ condition_in_output_filename: true input: fmt: "ISM3" fs: 48000 + aligned_to: + len: 20 + force: true ################################################ ### Pre-processing on individual items diff --git a/experiments/selection/BS1534-6b/config/BS1534-6b.yml b/experiments/selection/BS1534-6b/config/BS1534-6b.yml index d7d5763a53c12ce8590c99e2c946d573f917c0a8..63d00c7b40842f1b5e241fbef1f48806821de237 100644 --- a/experiments/selection/BS1534-6b/config/BS1534-6b.yml +++ b/experiments/selection/BS1534-6b/config/BS1534-6b.yml @@ -18,6 +18,9 @@ condition_in_output_filename: true input: fmt: "ISM4" fs: 48000 + aligned_to: + len: 20 + force: true ################################################ ### Pre-processing on individual items diff --git a/experiments/selection/BS1534-7a/config/BS1534-7a.yml b/experiments/selection/BS1534-7a/config/BS1534-7a.yml index 062830a0393b97833684da7a7508a55128c2fdb2..69574c294b55020321f3989df2303c13090c5bc5 100644 --- a/experiments/selection/BS1534-7a/config/BS1534-7a.yml +++ b/experiments/selection/BS1534-7a/config/BS1534-7a.yml @@ -18,6 +18,8 @@ condition_in_output_filename: true input: fmt: "FOA" fs: 48000 + aligned_to: + len: 20 ################################################ ### Pre-processing on individual items @@ -33,6 +35,8 @@ preprocessing: preprocessing_2: concatenate_input: false preamble_noise: false + postamble: 20 + postamble_noise: true repeat_signal: true ################################################# diff --git a/experiments/selection/BS1534-7b/config/BS1534-7b.yml b/experiments/selection/BS1534-7b/config/BS1534-7b.yml index a6cad5d61cec4d971503dc044b44bb8cd04f423a..5aea94738dcdb767269a99afaa386ee0ef813fb2 100644 --- a/experiments/selection/BS1534-7b/config/BS1534-7b.yml +++ b/experiments/selection/BS1534-7b/config/BS1534-7b.yml @@ -18,6 +18,8 @@ condition_in_output_filename: true input: fmt: "FOA" fs: 48000 + aligned_to: + len: 20 ################################################ ### Pre-processing on individual items @@ -33,6 +35,8 @@ preprocessing: preprocessing_2: concatenate_input: false preamble_noise: false + postamble: 20 + postamble_noise: true repeat_signal: true ################################################# diff --git a/experiments/selection/P800-1/config/P800-1.yml b/experiments/selection/P800-1/config/P800-1.yml index f596a06681ea946ef2b8212c681ba817f7e7ec6a..238484b99a06590b7eef6c3c0008cfe8072cb53a 100644 --- a/experiments/selection/P800-1/config/P800-1.yml +++ b/experiments/selection/P800-1/config/P800-1.yml @@ -18,6 +18,8 @@ condition_in_output_filename: true input: fmt: "STEREO" fs: 48000 + aligned_to: + len: 20 ################################################ ### Pre-processing on individual items @@ -35,6 +37,8 @@ preprocessing_2: # concatenation_order: [] preamble: 10000 preamble_noise: true + postamble: 20 + postamble_noise: true repeat_signal: true background_noise: ### REQUIRED: SNR for background noise in dB diff --git a/experiments/selection/P800-2/config/P800-2.yml b/experiments/selection/P800-2/config/P800-2.yml index 99b8494f00a0d60be0ec1a9241cfefa5aed26683..97e3d19632c195eebab5820495c14f498e9ddb31 100644 --- a/experiments/selection/P800-2/config/P800-2.yml +++ b/experiments/selection/P800-2/config/P800-2.yml @@ -18,6 +18,8 @@ condition_in_output_filename: true input: fmt: "STEREO" fs: 48000 + aligned_to: + len: 20 ################################################ ### Pre-processing on individual items @@ -35,6 +37,8 @@ preprocessing_2: # concatenation_order: [] preamble: 10000 preamble_noise: true + postamble: 20 + postamble_noise: true ### Additive background noise background_noise: ### REQUIRED: SNR for background noise in dB diff --git a/experiments/selection/P800-3/config/P800-3.yml b/experiments/selection/P800-3/config/P800-3.yml index 0b454c00901c17aa3c0650a481a546c0ed86e641..762831f2caff5829d90ffc8a496db992c548d5c0 100644 --- a/experiments/selection/P800-3/config/P800-3.yml +++ b/experiments/selection/P800-3/config/P800-3.yml @@ -17,7 +17,10 @@ condition_in_output_filename: true ################################################ input: fmt: "STEREO" - + fs: 48000 + aligned_to: + len: 20 + ################################################ ### Pre-processing on individual items ################################################ @@ -34,6 +37,8 @@ preprocessing_2: # concatenation_order: [] preamble: 10000 preamble_noise: true + postamble: 20 + postamble_noise: true repeat_signal: true ################################################# diff --git a/experiments/selection/P800-4/config/P800-4.yml b/experiments/selection/P800-4/config/P800-4.yml index bc6e53c7accc6398a18dfd730184a2cff8828be4..9a760d9903a72be347f1e6c33d07a68ba0654c43 100644 --- a/experiments/selection/P800-4/config/P800-4.yml +++ b/experiments/selection/P800-4/config/P800-4.yml @@ -18,6 +18,8 @@ condition_in_output_filename: true input: fmt: "FOA" fs: 48000 + aligned_to: + len: 20 ################################################ ### Pre-processing on individual items @@ -36,6 +38,8 @@ preprocessing_2: # concatenation_order: [] preamble: 10000 preamble_noise: true + postamble: 20 + postamble_noise: true repeat_signal: true background_noise: ### REQUIRED: SNR for background noise in dB diff --git a/experiments/selection/P800-5/config/P800-5.yml b/experiments/selection/P800-5/config/P800-5.yml index 24e6783921da75772b044ca2d7b112d1952d8ee1..96c39191f27e2b3b8feb7844cf896389f17a29b5 100644 --- a/experiments/selection/P800-5/config/P800-5.yml +++ b/experiments/selection/P800-5/config/P800-5.yml @@ -18,6 +18,8 @@ condition_in_output_filename: true input: fmt: "FOA" fs: 48000 + aligned_to: + len: 20 ################################################ ### Pre-processing on individual items @@ -36,6 +38,8 @@ preprocessing_2: # concatenation_order: [] preamble: 10000 preamble_noise: true + postamble: 20 + postamble_noise: true repeat_signal: true background_noise: ### REQUIRED: SNR for background noise in dB diff --git a/experiments/selection/P800-6/config/P800-6.yml b/experiments/selection/P800-6/config/P800-6.yml index afd34dd18870970dec61a712112cad90c3e91ae8..5032d715174852e7ebbbc52d87e33d398bbf7ee3 100644 --- a/experiments/selection/P800-6/config/P800-6.yml +++ b/experiments/selection/P800-6/config/P800-6.yml @@ -18,6 +18,9 @@ condition_in_output_filename: true input: fmt: "ISM1" fs: 48000 + aligned_to: + len: 20 + force: true ################################################ ### Pre-processing on individual items diff --git a/experiments/selection/P800-7/config/P800-7.yml b/experiments/selection/P800-7/config/P800-7.yml index 8560b7dbafdc4ddf34541f2168814673b060fab8..aa4adb6a1a20904594c6de26f54548e532e73166 100644 --- a/experiments/selection/P800-7/config/P800-7.yml +++ b/experiments/selection/P800-7/config/P800-7.yml @@ -18,6 +18,9 @@ condition_in_output_filename: true input: fmt: "ISM2" fs: 48000 + aligned_to: + len: 20 + force: true ################################################ ### Pre-processing on individual items diff --git a/experiments/selection/P800-8/config/P800-8.yml b/experiments/selection/P800-8/config/P800-8.yml index a271a384adb5cb374ab6796cf1e855044c6574ac..c4eb1d52585547d37a9d4ac6c036b0c5c653de47 100644 --- a/experiments/selection/P800-8/config/P800-8.yml +++ b/experiments/selection/P800-8/config/P800-8.yml @@ -18,6 +18,8 @@ condition_in_output_filename: true input: fmt: "FOA" fs: 48000 + aligned_to: + len: 20 ################################################ ### Pre-processing on individual items @@ -35,6 +37,8 @@ preprocessing_2: # concatenation_order: [] preamble: 10000 preamble_noise: true + postamble: 20 + postamble_noise: true background_noise: ### REQUIRED: SNR for background noise in dB snr: 45 diff --git a/experiments/selection/P800-9/config/P800-9.yml b/experiments/selection/P800-9/config/P800-9.yml index fe941a8a0ccc84bdc7452c26f99e96d1cc211346..8cfb2b37806fb5541d327f4a8dde419f43d2e139 100644 --- a/experiments/selection/P800-9/config/P800-9.yml +++ b/experiments/selection/P800-9/config/P800-9.yml @@ -18,6 +18,8 @@ condition_in_output_filename: true input: fmt: "FOA" fs: 48000 + aligned_to: + len: 20 ################################################ ### Pre-processing on individual items @@ -35,6 +37,8 @@ preprocessing_2: # concatenation_order: [] preamble: 10000 preamble_noise: true + postamble: 20 + postamble_noise: true background_noise: ### REQUIRED: SNR for background noise in dB snr: 10 diff --git a/generate_test.py b/generate_test.py index c393dd7380c274cc55c3b42b4e5c4b7a9dd1f906..c187dbc2d76e298c4990f965f7449c297e5ddbb9 100644 --- a/generate_test.py +++ b/generate_test.py @@ -96,7 +96,7 @@ def create_experiment_setup(experiment, lab) -> list[Path]: experiments = EXPERIMENTS_P800 + EXPERIMENTS_BS1534 seed = 101 + experiments.index(experiment) * 4 + LAB_IDS.index(lab) - base_path = Path(HERE.name).joinpath(f"experiments/selection/{experiment}") + base_path = HERE.joinpath(f"experiments/selection/{experiment}") cfgs = list() for cat in categories: diff --git a/ivas_processing_scripts/__init__.py b/ivas_processing_scripts/__init__.py index 1b7eed979552e63a2a64e3b8ec1f23538a18d84b..15424a3a36d74396fc4bbbcab3025475ace59838 100755 --- a/ivas_processing_scripts/__init__.py +++ b/ivas_processing_scripts/__init__.py @@ -41,7 +41,6 @@ from ivas_processing_scripts.constants import ( ) from ivas_processing_scripts.processing import chains, config from ivas_processing_scripts.processing.processing import ( - multiple_of_frame_size, preprocess, preprocess_2, preprocess_background_noise, @@ -127,9 +126,6 @@ def main(args): cfg.metadata_path = metadata - # checking if audio is a multiple of frame size - multiple_of_frame_size(cfg) - # run preprocessing only once if hasattr(cfg, "preprocessing"): # save process info for background noise diff --git a/ivas_processing_scripts/processing/chains.py b/ivas_processing_scripts/processing/chains.py index cf7f43b63385193e7db40449327d457dff3ee484..3d7e04912a97edd6461579883ef580f43a603c12 100755 --- a/ivas_processing_scripts/processing/chains.py +++ b/ivas_processing_scripts/processing/chains.py @@ -33,6 +33,8 @@ from typing import Optional from warnings import warn +from ivas_processing_scripts.audiotools import audio +from ivas_processing_scripts.audiotools.audiofile import read from ivas_processing_scripts.processing.config import TestConfig from ivas_processing_scripts.processing.evs import EVS from ivas_processing_scripts.processing.ivas import IVAS @@ -95,11 +97,14 @@ def init_processing_chains(cfg: TestConfig) -> None: cfg.items_list = list_audio( cfg.input_path, select_list=getattr(cfg, "input_select", None) ) - if not cfg.items_list: + if len(cfg.items_list) == 0: raise SystemExit( f"Directory {cfg.input_path} does not exist, contains no audio files or all files were filtered out." ) + # validate input files for correct format and sampling rate + validate_input_files(cfg) + # assemble a list of output and temporary directories to create for chain in cfg.proc_chains: cfg.out_dirs.append(cfg.output_path.joinpath(chain["name"])) @@ -184,6 +189,8 @@ def get_preprocessing_2(cfg: TestConfig) -> dict: "concatenation_order": pre2_cfg.get("concatenation_order", None), "preamble": pre2_cfg.get("preamble", 0), "pad_noise_preamble": pre2_cfg.get("preamble_noise", False), + "postamble": pre2_cfg.get("postamble", 0), + "pad_noise_postamble": pre2_cfg.get("preamble_noise", False), "background_noise": background, "in_mask": pre2_cfg.get("mask", None), "multiprocessing": cfg.multiprocessing, @@ -495,6 +502,7 @@ def get_processing_chain( "out_fmt": post_fmt, # no rendering here "concatenate_input": pre2_cfg.get("concatenate_input", False), "preamble": pre2_cfg.get("preamble", 0), + "postamble": pre2_cfg.get("postamble", 0), "repeat_signal": pre2_cfg.get("repeat_signal", False), "loudness": post_cfg.get("loudness", None), "loudness_fmt": post_cfg.get("loudness_fmt", None), @@ -505,3 +513,52 @@ def get_processing_chain( ) return chain + + +def validate_input_files(cfg: TestConfig): + """ + Go through list of input files and check whether they match the sampling rate and format + (by checking number of channels) specified in the config and are aligned to the given + input block size. + """ + input_format = cfg.input["fmt"] + num_chan_expected = audio.fromtype(input_format).num_channels + + for item in cfg.items_list: + if "fs" in cfg.input: + sampling_rate = cfg.input["fs"] + x, fs = read(item, nchannels=num_chan_expected, fs=sampling_rate) + elif item.suffix == ".pcm" or item.suffix == ".raw": + raise ValueError("Sampling rate must be specified for headerless files!") + elif item.suffix == ".wav": + x, fs = read(item) + sampling_rate = fs + else: + raise ValueError(f"Unsupported input file type {item.suffix}") + n_samples_x, n_chan_x = x.shape + + # check for number of channels and sampling rate + if fs != sampling_rate: + raise ValueError( + f"Sampling rate of the file ({fs}) does NOT match with that ({sampling_rate}) specified in the config yaml." + ) + if n_chan_x != num_chan_expected: + raise ValueError( + f"The number of channels in the file ({n_chan_x}) do NOT match with those of format ({num_chan_expected}, {input_format}) specified in the config yaml." + ) + + if (input_aligned_cfg := cfg.input.get("aligned_to", None)) is not None: + input_fmt_has_metadata = input_format.startswith( + "ISM" + ) or input_format.startswith("MASA") + force_alignment = ( + input_aligned_cfg.get("force", False) or input_fmt_has_metadata + ) + + alignment_len_samples = (input_aligned_cfg["len"] / 1000) * fs + if n_samples_x % alignment_len_samples != 0: + msg = f"The length ({n_samples_x} samples) of audio ({item.name}) is not a multiple of given alignment length ({input_aligned_cfg['len']} ms)." + if force_alignment: + raise ValueError(msg) + else: + warn(msg) diff --git a/ivas_processing_scripts/processing/preprocessing_2.py b/ivas_processing_scripts/processing/preprocessing_2.py index 2d2fd5003b1152acb2e73f496061f5297a071536..b894dd963d72b4679518ad0d76bf27538138b9a5 100644 --- a/ivas_processing_scripts/processing/preprocessing_2.py +++ b/ivas_processing_scripts/processing/preprocessing_2.py @@ -85,9 +85,9 @@ class Preprocessing2(Processing): audio_object.metadata_files = meta_files audio_object.obect_pos = metadata - if self.preamble: + # add preamble + if self.preamble > 0: logger.debug(f"Add preamble of length {self.preamble}ms") - # add preamble to actual signal audio_object.audio = trim( audio_object.audio, audio_object.fs, @@ -111,6 +111,16 @@ class Preprocessing2(Processing): (audio_object.audio, audio_object.audio), axis=0 ) + # add postamble - do ater signal repetition as this is just for ensuring equal lengths between in- and output signals + if self.postamble > 0: + logger.debug(f"Add postamble of length {self.postamble}ms") + audio_object.audio = trim( + audio_object.audio, + audio_object.fs, + (0, -self.postamble), + self.pad_noise_postamble, + ) + # save file write(out_file, audio_object.audio, fs=audio_object.fs) diff --git a/ivas_processing_scripts/processing/processing.py b/ivas_processing_scripts/processing/processing.py index 6a480b7f374ff2c9fda82d815dd203e119ec0f13..29797295cc205b6fb5b2ad726211fba53a3d693e 100755 --- a/ivas_processing_scripts/processing/processing.py +++ b/ivas_processing_scripts/processing/processing.py @@ -35,14 +35,12 @@ from abc import ABC, abstractmethod from itertools import repeat from pathlib import Path from shutil import copyfile -from typing import Iterable, Optional, Union +from typing import Iterable, Union from warnings import warn -import numpy as np - from ivas_processing_scripts.audiotools import audio from ivas_processing_scripts.audiotools.audioarray import window -from ivas_processing_scripts.audiotools.audiofile import concat, read, trim, write +from ivas_processing_scripts.audiotools.audiofile import concat, trim from ivas_processing_scripts.audiotools.constants import IVAS_FRAME_LEN_MS from ivas_processing_scripts.audiotools.convert.__init__ import convert from ivas_processing_scripts.audiotools.metadata import ( @@ -161,7 +159,9 @@ def concat_setup(cfg: TestConfig, chain, logger: logging.Logger): logger.info(f"Splits written to file {splits_info_file}") -def concat_teardown(x, splits, out_fmt, fs, in_fs, meta, logger: logging.Logger): +def concat_teardown( + x, splits, out_fmt, fs, in_fs, meta, len_postamble_ms, logger: logging.Logger +): if not splits: raise ValueError("Splitting not possible without split marker") @@ -182,9 +182,14 @@ def concat_teardown(x, splits, out_fmt, fs, in_fs, meta, logger: logging.Logger) raise ValueError( f"Last split index {splits[-1]} is larger than the signal length {len(x)}" ) - elif splits[-1] < len(x): + elif splits[-1] < len(x) - ( + postamble_len_samples := (len_postamble_ms * fs_old) // 1000 + ): + msg_file_len = len(x) + if len_postamble_ms > 0: + msg_file_len = f"(minus postamble length of {postamble_len_samples}): {len(x) - postamble_len_samples}" warn( - f"Last split index {splits[-1]} is smaller that the signal length {len(x)}" + f"Last split index {splits[-1]} is smaller than the signal length {msg_file_len}" ) split_old = 0 @@ -397,7 +402,9 @@ def process_item( copyfile(ppm, out_meta[idx]) -def remove_preamble(x, out_fmt, fs, repeat_signal, preamble, meta, logger): +def remove_pre_and_postamble( + x, out_fmt, fs, repeat_signal, preamble_len_ms, postamble_len_ms, meta, logger +): # remove preamble for ISM metadata if out_fmt.startswith("ISM"): # cut first half of the metadata @@ -405,20 +412,23 @@ def remove_preamble(x, out_fmt, fs, repeat_signal, preamble, meta, logger): meta = [m[int(len(m) / 2) :, :] for m in meta] # remove preamble - if preamble: - meta = add_remove_preamble(meta, preamble, add=False) + if preamble_len_ms > 0: + meta = add_remove_preamble(meta, preamble_len_ms, add=False) - # remove first half of signal + # get number of samples to cut from start + trim_len_samples = (preamble_len_ms * fs) // 1000 + postamble_len_samples = (postamble_len_ms * fs) // 1000 if repeat_signal: if logger: logger.debug("Remove first half of signal") - x = x[int(len(x) / 2) :, :] - # remove preamble - if preamble: - if logger: - logger.debug("Remove preamble") - x = trim(x, fs, (preamble, 0)) + # need to subtract the postamble length before getting half of signal length - it was added after concatenation + trim_len_samples += (len(x) - postamble_len_samples) // 2 + + if trim_len_samples > 0 and logger: + logger.debug("Remove preamble") + + x = trim(x, fs, (trim_len_samples, postamble_len_samples), samples=True) return x, meta @@ -455,109 +465,3 @@ def preprocess_background_noise(cfg): return - -def multiple_of_frame_size( - cfg: TestConfig, - frame_size_in_ms: Optional[int] = 20, -) -> np.ndarray: - """ - This function checks if the list of multi channel audio files is a multiple of frame size. - If the file isn't a multiple then the function pads it to the next integer of frame size and writes the file to an output directory. - It also copies the already aligned files to the output directory. - - Parameters - ---------- - cfg: TestConfig - Input configuration - frame_size_in_ms: Optional[int] - Frame size in milliseconds; default = 20 - """ - # get the number of channels from the input format - input_format = cfg.input["fmt"] - num_channels = audio.fromtype(input_format).num_channels - - # Create output directory - output_dir = cfg.output_path / "20ms_aligned_files" - try: - output_dir.mkdir(exist_ok=False) - except FileExistsError: - raise ValueError( - "Folder for 20ms aligned files already exists. Please move or delete folder" - ) - - # iterate over input files - for i, item in enumerate(cfg.items_list): - # read the audio file - if "fs" in cfg.input: - sampling_rate = cfg.input["fs"] - x, fs = read(item, nchannels=num_channels, fs=sampling_rate) - elif item.suffix == ".pcm" or item.suffix == ".raw": - raise ValueError("Sampling rate must be specified for headerless files!") - elif item.suffix == ".wav": - x, fs = read(item) - sampling_rate = fs - else: - raise ValueError(f"Unsupported input file type {item.suffix}") - n_samples_x, n_chan_x = x.shape - - # check for number of channels and sampling rate - if fs != sampling_rate: - raise ValueError( - f"Sampling rate of the file ({fs}) does NOT match with that ({sampling_rate}) specified in the config yaml." - ) - if n_chan_x != num_channels: - raise ValueError( - f"The number of channels in the file ({n_chan_x}) do NOT match with those of format ({num_channels}, {input_format}) specified in the config yaml." - ) - - # warn if audio length not a multiple of frame length - frame_length_samples = (frame_size_in_ms / 1000) * fs - remainder = n_samples_x % frame_length_samples - if remainder != 0: - # Calculate number of samples needed for padding - padding_samples = int(frame_length_samples - remainder) - - if input_format.startswith("ISM") or input_format.startswith("MASA"): - raise ValueError( - f"The length ({n_samples_x} samples) of audio ({item.name}) is not a multiple of frame length (20 ms)." - ) - else: - warn( - f"The length ({n_samples_x} samples) of audio ({item.name}) is not a multiple of frame length (20 ms). Padding to the nearest integer multiple." - ) - - # Create and append zeros - padded_data = trim( - x, - sampling_rate, - (0, -padding_samples), - pad_noise=True, - samples=True, - ) - # Write padded data to output directory - write(output_dir / item.name, padded_data, fs) - else: - copyfile(item, output_dir / item.name) - - # Update audio file path in list - cfg.items_list[i] = output_dir / item.name - - # Copy metadata and update path - if input_format.startswith("ISM"): - for j in range(int(cfg.input["fmt"][3])): - copyfile( - cfg.metadata_path[i][j], output_dir / cfg.metadata_path[i][j].name - ) - cfg.metadata_path[i][j] = output_dir / cfg.metadata_path[i][j].name - elif input_format.startswith("MASA"): - raise ValueError("MASA as input format not implemented yet") - - # Check if all files are present in output directory - all_files_present = all( - [(output_dir / audio_file.name).exists() for audio_file in cfg.items_list] - ) - if not all_files_present: - raise Exception("Not all files are present in the output directory") - - # Make the output path as the new input path - cfg.input_path = output_dir diff --git a/ivas_processing_scripts/processing/processing_splitting_scaling.py b/ivas_processing_scripts/processing/processing_splitting_scaling.py index b49454ff58de90a6f695f9d762f1ab87dddba380..86b893b26ee88aae7cbe9f88ced87fbbde6eca9a 100644 --- a/ivas_processing_scripts/processing/processing_splitting_scaling.py +++ b/ivas_processing_scripts/processing/processing_splitting_scaling.py @@ -3,7 +3,6 @@ import logging import re from itertools import repeat from pathlib import Path -from warnings import warn import numpy as np @@ -14,7 +13,7 @@ from ivas_processing_scripts.audiotools.wrappers.bs1770 import loudness_norm from ivas_processing_scripts.processing.processing import ( Processing, concat_teardown, - remove_preamble, + remove_pre_and_postamble, ) # @@ -191,13 +190,14 @@ class Processing_splitting_scaling(Processing): self, x, fs, in_file, out_file, in_meta, noerror=False, logger=None ): # remove preamble and first half of signal due to repetition - if self.preamble or self.repeat_signal: - x, in_meta = remove_preamble( + if self.preamble or self.postamble or self.repeat_signal: + x, in_meta = remove_pre_and_postamble( x, self.out_fmt, self.fs, self.repeat_signal, self.preamble, + self.postamble, in_meta, logger, ) @@ -214,7 +214,7 @@ class Processing_splitting_scaling(Processing): # split file file_splits, meta_splits = concat_teardown( - x, splits, self.out_fmt, fs, split_fs, in_meta, logger + x, splits, self.out_fmt, fs, split_fs, in_meta, self.postamble, logger ) # set new out_files @@ -241,16 +241,15 @@ class Processing_splitting_scaling(Processing): else: # check length of output signals - input_aligned_file = ( - in_file.parent.parent - / "20ms_aligned_files" - / f"{Path(in_file.stem).stem}.wav" - ) - input_aligned_array, _ = read(input_aligned_file) - if (len_inp := len(input_aligned_array)) != (len_out := len(x)): - warn( - f"For file {out_file} the length is {len_out} and does not match the (frame aligned) input length {len_inp}." - ) + # input_aligned_file = ( + # in_file.parent.parent + # / f"{Path(in_file.stem).stem}.wav" + # ) + # input_aligned_array, _ = read(input_aligned_file) + # if (len_inp := len(input_aligned_array)) != (len_out := len(x)): + # warn( + # f"For file {out_file} the length is {len_out} and does not match the (frame aligned) input length {len_inp}." + # ) # set output values out_files = [out_file] diff --git a/tests/test_experiments.py b/tests/test_experiments.py index a073b2e98a7a1ba0ef1b516bfc751c544cbec1bd..b8e62114fde2b4867a56dfb7edcc11d6183a9c05 100644 --- a/tests/test_experiments.py +++ b/tests/test_experiments.py @@ -39,8 +39,9 @@ from numpy.random import random, seed from ivas_processing_scripts import main as generate_test from ivas_processing_scripts.audiotools import audio -from ivas_processing_scripts.audiotools.audiofile import concat, write +from ivas_processing_scripts.audiotools.audiofile import concat, read, write from ivas_processing_scripts.processing.config import TestConfig +from ivas_processing_scripts.utils import list_audio from tests.constants import ( FORMAT_TO_METADATA_FILES, INPUT_EXPERIMENT_NAMES, @@ -98,6 +99,28 @@ def setup_input_files_for_config(config): write(bg_noise_path, noise) +def all_lengths_equal(cfg): + output_folder = cfg.output_path + + all_lengths_equal = True + for condition in cfg.conditions_to_generate.keys(): + output_condition_folder = output_folder.joinpath(condition) + for input_file in list_audio(cfg.input_path): + output_file = output_condition_folder.joinpath(input_file.name).with_suffix( + f".{condition}.wav" + ) + in_signal, _ = read(input_file) + out_signal, _ = read(output_file) + lengths_equal = in_signal.shape[0] == out_signal.shape[0] + if not lengths_equal: + print( + f"Unequal file length for {input_file.name} in condition {condition} - in len {in_signal.shape[0]} vs. out len {out_signal.shape[0]}" + ) + all_lengths_equal = False + + return all_lengths_equal + + @pytest.mark.parametrize( "exp_lab_pair", zip(INPUT_EXPERIMENT_NAMES, LAB_IDS_FOR_EXPERIMENTS) ) @@ -108,5 +131,9 @@ def test_generate_test_items(exp_lab_pair): args = Arguments(str(cfg)) config = TestConfig(cfg) + setup_input_files_for_config(config) generate_test(args) + + if not all_lengths_equal(config): + raise RuntimeError("Unequal lengths between input and output files detected")