diff --git a/README.md b/README.md index d718979fbcd534c5e96ba91465579c75056ff1e3..3a74688dd51693742ede87c986aeef93cb9cd3bf 100755 --- a/README.md +++ b/README.md @@ -239,10 +239,13 @@ input: # preamble_noise: true ### Additive background noise # background_noise: - ### REQUIRED: SNR for background noise in dB + ### SNR for background noise in dB; REQUIRED for prerecorded background noise and ignored for low level noise # snr: 10 - ### REQUIRED: Path to background noise, must have same format and sampling rate as input signal(s) + ### REQUIRED: Either background noise path or low level noise flag + ### Path to background noise, must have same format and sampling rate as input signal(s); default = null # background_noise_path: ".../noise.wav" + ### Flag for using low level [-4,+4] background noise; default = false + # low_level_noise: true ``` diff --git a/examples/TEMPLATE.yml b/examples/TEMPLATE.yml index b570718f50c3f7099fba1de7b6ea91ee145a308f..9252a44df02ffde494929694a26a916e173171ff 100755 --- a/examples/TEMPLATE.yml +++ b/examples/TEMPLATE.yml @@ -106,10 +106,13 @@ input: # preamble_noise: true ### Additive background noise # background_noise: - ### REQUIRED: SNR for background noise in dB + ### SNR for background noise in dB; REQUIRED for prerecorded background noise and ignored for low level noise # snr: 10 - ### REQUIRED: Path to background noise, must have same format and sampling rate as input signal(s) + ### REQUIRED: Either background noise path or low level noise flag + ### Path to background noise, must have same format and sampling rate as input signal(s); default = null # background_noise_path: ".../noise.wav" + ### Flag for using low level [-4,+4] background noise; default = false + # low_level_noise: true ################################################# ### Bitstream processing diff --git a/ivas_processing_scripts/__init__.py b/ivas_processing_scripts/__init__.py index 4a6fa6e8736f8c54246530cd9cfc45b5e3f89ffb..62b26ec99e10fb7109de88cc01747a4f4624cda1 100755 --- a/ivas_processing_scripts/__init__.py +++ b/ivas_processing_scripts/__init__.py @@ -145,6 +145,7 @@ def main(args): hasattr(cfg, "preprocessing") and hasattr(cfg.pre2, "background_noise") and cfg.pre2.background_noise is not None + and cfg.pre2.background_noise.get("background_noise_path") ): preprocess_background_noise(cfg) # preprocess 2 diff --git a/ivas_processing_scripts/audiotools/audioarray.py b/ivas_processing_scripts/audiotools/audioarray.py index 2c770ce4902ee6cacb1aeb22a43e7cd335297292..9278b2402a3cf9a69c7decd6929b2d92b35f8d03 100755 --- a/ivas_processing_scripts/audiotools/audioarray.py +++ b/ivas_processing_scripts/audiotools/audioarray.py @@ -52,6 +52,7 @@ def trim( limits: Optional[Tuple[int, int]] = None, pad_noise: Optional[bool] = False, samples: Optional[bool] = False, + seed: Optional[int] = None, ) -> np.ndarray: """ Trim an audio array @@ -88,7 +89,10 @@ def trim( if pre_trim < 0: if pad_noise: # pad with uniformly distributed noise between -4 and 4 - np.random.seed(SEED_PADDING) + if seed: + np.random.seed(seed) + else: + np.random.seed(SEED_PADDING) noise = np.random.randint( low=-4, high=5, size=(np.abs(pre_trim), np.shape(x)[1]) ).astype("float") @@ -101,7 +105,10 @@ def trim( if post_trim < 0: if pad_noise: # pad with uniformly distributed noise between -4 and 4 - np.random.seed(SEED_PADDING) + if seed: + np.random.seed(seed) + else: + np.random.seed(SEED_PADDING) noise = np.random.randint( low=-4, high=5, size=(np.abs(post_trim), np.shape(x)[1]) ).astype("float") diff --git a/ivas_processing_scripts/processing/chains.py b/ivas_processing_scripts/processing/chains.py index 60b7aa72bc157cc8e412b47ba86f2540759131d5..338b4e8ea2999c978bdfb5f7c310130c285110ba 100755 --- a/ivas_processing_scripts/processing/chains.py +++ b/ivas_processing_scripts/processing/chains.py @@ -30,7 +30,6 @@ # the United Nations Convention on Contracts on the International Sales of Goods. # -from pathlib import Path from typing import Optional from warnings import warn @@ -40,7 +39,7 @@ from ivas_processing_scripts.processing.ivas import IVAS from ivas_processing_scripts.processing.postprocessing import Postprocessing from ivas_processing_scripts.processing.preprocessing import Preprocessing from ivas_processing_scripts.processing.preprocessing_2 import Preprocessing2 -from ivas_processing_scripts.utils import list_audio +from ivas_processing_scripts.utils import get_abs_path, list_audio def init_processing_chains(cfg: TestConfig) -> None: @@ -133,6 +132,8 @@ def get_preprocessing_2(cfg: TestConfig) -> dict: } pre2_cfg = cfg.preprocessing_2 + + # set up background noise background_cfg = pre2_cfg.get("background_noise", None) if background_cfg: background = { @@ -140,6 +141,7 @@ def get_preprocessing_2(cfg: TestConfig) -> dict: "background_noise_path": get_abs_path( background_cfg.get("background_noise_path", None) ), + "low_level_noise": background_cfg.get("low_level_noise", False), "seed_delay": cfg.prerun_seed, "master_seed": cfg.master_seed, "output_fmt": cfg.postprocessing["fmt"], @@ -414,11 +416,3 @@ def get_processing_chain( ) return chain - - -def get_abs_path(rel_path): - if rel_path is not None: - abs_path = Path(rel_path).resolve().absolute() - else: - abs_path = None - return abs_path diff --git a/ivas_processing_scripts/processing/config.py b/ivas_processing_scripts/processing/config.py index a90487e6121545eb03322d2af7078c9208e2576d..1fef013bafddc9642d6ce29fca050ca99e52cd4f 100755 --- a/ivas_processing_scripts/processing/config.py +++ b/ivas_processing_scripts/processing/config.py @@ -46,6 +46,7 @@ from ivas_processing_scripts.constants import ( REQUIRED_KEYS_MNRU, SUPPORTED_CONDITIONS, ) +from ivas_processing_scripts.utils import get_abs_path def merge_dicts(base: dict, other: dict) -> None: @@ -139,16 +140,36 @@ class TestConfig: raise KeyError(f"The following key(s) must be specified : {MISSING_KEYS}") # validate preprocessing on concatenated file stage - if (pre_proc_2 := getattr(cfg, "preprocessing_2", None)) is not None: - bg_noise_folder = Path(pre_proc_2["background_noise_path"]).parent - if ( - bg_noise_folder.resolve().absolute() - == cfg.input_path.resolve().absolute() + if (pre_proc_2 := cfg.get("preprocessing_2", None)) is not None and ( + bg_noise := pre_proc_2.get("background_noise", None) + ) is not None: + # check if low level flag or path is given + if not bg_noise.get("background_noise_path", None) and not bg_noise.get( + "low_level_noise", False ): raise ValueError( - "Background noise file has to be placed outside the input folder!" + "Path to prerecorded noise or low level noise flag has to be provided for background noise" ) + if bg_noise.get("background_noise_path", None) and bg_noise.get( + "low_level_noise", False + ): + raise ValueError( + "Only prerecorded or low level background noise possible, not both" + ) + + if bg_noise.get("background_noise_path", None): + # check snr + if not bg_noise.get("snr", None): + raise ValueError("SNR has to be specified for background noise") + + # check if path of background noise is in input folder + bg_noise_folder = Path(bg_noise["background_noise_path"]).parent + if get_abs_path(bg_noise_folder) == get_abs_path(cfg["input_path"]): + raise ValueError( + "Background noise file has to be placed outside the input folder!" + ) + for cond_name, cond_cfg in cfg.get("conditions_to_generate").items(): type = cond_cfg.get("type") if not type: diff --git a/ivas_processing_scripts/processing/preprocessing_2.py b/ivas_processing_scripts/processing/preprocessing_2.py index 0da4a2fc4e1cef745326db5fa5858d91e864004a..ac1f124fcee526841919c659c08f7e95b1b29e1f 100644 --- a/ivas_processing_scripts/processing/preprocessing_2.py +++ b/ivas_processing_scripts/processing/preprocessing_2.py @@ -32,6 +32,7 @@ import logging from pathlib import Path +from warnings import warn import numpy as np @@ -107,76 +108,87 @@ class Preprocessing2(Processing): def add_background_noise( self, audio_object: audio.Audio, in_meta, logger ) -> np.ndarray: - # check if SNR and background noise are given - if ( - not self.background_noise["background_noise_path"] - or not Path(self.background_noise["background_noise_path"]).exists() - ): - raise ValueError("Background noise does not exist") - if not self.background_noise["snr"]: - raise ValueError("SNR value needed for using background noise") - # range for random delay - range_delay = (1, 2400000) - - # load background noise - if self.background_noise["background_object"] is not None: - noise_object = self.background_noise["background_object"] - else: - noise_object = audio.fromfile( - self.in_fmt, - self.background_noise["background_noise_path"], - fs=self.in_fs, - in_meta=in_meta, + max_delay = int(2400000 * audio_object.fs / 48000) + + if self.background_noise.get("background_noise_path"): + if not self.background_noise.get("background_noise_path").exists(): + raise ValueError( + f"Background noise path {self.background_noise.get('background_noise_path')} does not exist" + ) + # load background noise + if self.background_noise["background_object"] is not None: + noise_object = self.background_noise["background_object"] + else: + noise_object = audio.fromfile( + self.in_fmt, + self.background_noise["background_noise_path"], + fs=self.in_fs, + in_meta=in_meta, + ) + + # if noise is too short raise error + if len(noise_object.audio) < len(audio_object.audio): + raise ValueError("Background noise too short for audio signal") + if len(noise_object.audio) - max_delay < len(audio_object.audio): + raise ValueError( + "Background noise may be to short for audio signal when considering the random delay" + ) + + # measure loudness of audio signal based on output format + tmp_object = audio.fromtype(self.out_fmt) + if ( + isinstance(tmp_object, audio.ObjectBasedAudio) + or isinstance(tmp_object, audio.SceneBasedAudio) + or isinstance(tmp_object, audio.MetadataAssistedSpatialAudio) + ): + out_format = None + else: + out_format = self.out_fmt + + loudness_signal, _ = get_loudness(audio_object, loudness_format=out_format) + logger.debug(f"Loudness of audio signal: {loudness_signal}LKFS") + + # compute desired loudness of background noise + loudness_noise = loudness_signal - self.background_noise["snr"] + + # apply random delay and cut signal + rand_delay = random_seed( + range=(1, max_delay), + master_seed=self.background_noise["master_seed"], + prerun_seed=self.background_noise["seed_delay"], + hexa=False, ) + noise_object.audio = delay( + noise_object.audio, delay=-rand_delay, samples=True, fs=noise_object.fs + )[: len(audio_object.audio)] - # if noise is too short raise error - if len(noise_object.audio) < len(audio_object.audio): - raise ValueError("Background noise too short for audio signal") - if len(noise_object.audio) - range_delay[1] < len(audio_object.audio): - raise ValueError( - "Background noise may be to short for audio signal when considering the random delay" + # scale background noise to desired loudness based on output format + logger.debug( + f"Scaling of background noise to {self.background_noise['snr']}dB SNR" + ) + noise_object.audio = loudness_norm( + noise_object, + loudness_noise, + out_format, + rms=True, + logger=logger, + ) + elif self.background_noise.get("low_level_noise"): + # use low level noise instead of prerecoded background noise + if self.background_noise["snr"]: + warn("SNR will be ignored for low level background noise") + + noise_array = trim( + np.zeros((0, audio_object.num_channels)), + samples=True, + limits=(0, -len(audio_object.audio)), + pad_noise=True, + seed=self.background_noise["master_seed"], + ) + noise_object = audio.fromarray( + x=noise_array, fs=self.in_fs, fmt=self.in_fmt ) - - # measure loudness of audio signal based on output format - tmp_object = audio.fromtype(self.out_fmt) - if ( - isinstance(tmp_object, audio.ObjectBasedAudio) - or isinstance(tmp_object, audio.SceneBasedAudio) - or isinstance(tmp_object, audio.MetadataAssistedSpatialAudio) - ): - out_format = None - else: - out_format = self.out_fmt - - loudness_signal, _ = get_loudness(audio_object, loudness_format=out_format) - logger.debug(f"Loudness of audio signal: {loudness_signal}LKFS") - - # compute desired loudness of background noise - loudness_noise = loudness_signal - self.background_noise["snr"] - - # apply random delay and cut signal - rand_delay = random_seed( - range=range_delay, - master_seed=self.background_noise["master_seed"], - prerun_seed=self.background_noise["seed_delay"], - hexa=False, - ) - noise_object.audio = delay( - noise_object.audio, delay=-rand_delay, samples=True, fs=noise_object.fs - )[: len(audio_object.audio)] - - # scale background noise to desired loudness based on output format - logger.debug( - f"Scaling of background noise to {self.background_noise['snr']}dB SNR" - ) - noise_object.audio = loudness_norm( - noise_object, - loudness_noise, - out_format, - rms=True, - logger=logger, - ) # add array to signal audio_object.audio = noise_object.audio + audio_object.audio diff --git a/ivas_processing_scripts/utils.py b/ivas_processing_scripts/utils.py index a858a6c054677bcef026cbb87707736d305872de..0ce6696e00e4078812310b42bc5c37aa04b62b52 100755 --- a/ivas_processing_scripts/utils.py +++ b/ivas_processing_scripts/utils.py @@ -290,3 +290,11 @@ def get_binary_paths(yaml_file_with_binary_paths): return {} else: return {key: Path(value) for key, value in data.items()} + + +def get_abs_path(rel_path): + if rel_path is not None: + abs_path = Path(rel_path).resolve().absolute() + else: + abs_path = None + return abs_path