diff --git a/examples/TEMPLATE.yml b/examples/TEMPLATE.yml index 68ffcb5e38b0034e26e912c9fa70140d58fea44e..8c5fdd8e9e0df61640a48cb16147c03ff3accd21 100755 --- a/examples/TEMPLATE.yml +++ b/examples/TEMPLATE.yml @@ -16,17 +16,15 @@ # delete_tmp: true ### Master seed for random processes like bitstream error pattern generation; default = 0 # master_seed: 5 -### Additional seed to specify number of preruns; default = 0 -# prerun_seed: 2 ### Any relative paths will be interpreted relative to the working directory the script is called from! ### Usage of absolute paths is recommended. ### Do not use file names with dots "." in them! This is not supported, use "_" instead ### For Windows user: please use double back slash '\\' in paths and add '.exe' to executable definitions ### REQUIRED: Input path or file -input_path: "~/ivas/items/HOA3" +input_path: ".../ivas/items/HOA3" ### REQUIRED: Output path or file -output_path: "./tmp_output" +output_path: ".../tmp_output" ### Metadata path or file(s) ### If input format is ISM{1-4} a path for the metadata files can be specified; ### default = null (for ISM search for item_name.{wav, raw, pcm}.{0-3}.csv in input folder, otherise ignored) @@ -49,17 +47,6 @@ output_path: "./tmp_output" # input_select: # - "48kHz" -### Horizontally concatenate input items into one long file; default = false -# concatenate_input: true -### Specify the concatenation order in a list of strings. If not specified, the concatenation order would be -### as per the filesystem on the users' device -### Should only be used if concatenate_input = true -# concatenation_order: [] -### Specify preamble duration in ms; default = 0 -# preamble: 40 -### Flag wheter to use noise (amplitude +-4) for the preamble or silence; default = false (silence) -# pad_noise_preamble: true - ################################################ ### Input configuration ################################################ @@ -70,7 +57,7 @@ input: # fs: 32000 ################################################ -### Pre-processing +### Pre-processing on individual items ################################################ ### Pre-processing step performed prior to core processing for all conditions ### If not defined, preprocessing step is skipped @@ -96,6 +83,31 @@ input: # delay: 20 ### Length of window used at start/end of signal (ms); default = 0 # window: 100 + +################################################ +### Pre-processing on whole signal(s) +################################################ +# preprocessing_2: + ### Options for processing of the concatenated item (concatenate_input: true) or + ### the individual items (concatenate_input: false) after previous pre-processing step + ### Horizontally concatenate input items into one long file; default = false + # concatenate_input: true + ### Specify the concatenation order in a list of strings. If not specified, the concatenation order would be + ### as per the filesystem on the users' device + ### Should only be used if concatenate_input = true + # concatenation_order: [] + ### Specify preamble duration in ms; default = 0 + # preamble: 10000 + ### Flag wheter to use noise (amplitude +-4) for the preamble or silence; default = false (silence) + # preamble_noise: true + ### Additive background noise + # background_noise: + ### REQUIRED: SNR for background noise in dB + # snr: 10 + ### REQUIRED: Path to background noise + # background_noise_path: ".../noise.wav" + ### Seed for delay offest; default = 0 + # seed_delay: 10 ################################################# ### Bitstream processing @@ -105,7 +117,7 @@ input: ### can be given globally here or in individual conditions of type ivas or evs # tx: ### REQUIRED: Type of bitstream processing; possible types: "JBM" or "FER" - #type: "JBM" + # type: "JBM" ### JBM ### REQUIRED: either error_pattern or error_profile @@ -122,6 +134,8 @@ input: # error_pattern: "path/pattern.192" ### Error rate in percent # error_rate: 5 + ### Additional seed to specify number of preruns; default = 0 + # prerun_seed: 2 ################################################ ### Configuration for conditions under test @@ -209,7 +223,7 @@ conditions_to_generate: ### Path to decoder binary; default search for IVAS_dec in bin folder (primary) and PATH (secondary) bin: ~/git/ivas-codec/IVAS_dec ### Decoder output format; default = postprocessing fmt - fmt: "CICP19" + fmt: "7_1_4" ### Decoder output sampling rate; default = null (same as input) # fs: 48000 ### Additional commandline options; default = null @@ -244,8 +258,8 @@ conditions_to_generate: postprocessing: ### REQUIRED: Target format for output fmt: "BINAURAL" - ### Target sampling rate in Hz for resampling; default = null (no resampling) - # fs: 16000 + ### REQUIRED: Target sampling rate in Hz for resampling + fs: 48000 ### Low-pass cut-off frequency in Hz; default = null (no filtering) # lp_cutoff: 24000 ### Target loudness in LKFS; default = null (no loudness change applied) diff --git a/ivas_processing_scripts/__init__.py b/ivas_processing_scripts/__init__.py index 5fedfc3252f0d81c935b886f2918e92a887ee713..036ddd6b9e9e6631827cb226546cbbbc71ec8496 100755 --- a/ivas_processing_scripts/__init__.py +++ b/ivas_processing_scripts/__init__.py @@ -36,7 +36,6 @@ from itertools import repeat import yaml from ivas_processing_scripts.audiotools.metadata import check_ISM_metadata -from ivas_processing_scripts.audiotools.wrappers.bs1770 import scale_files from ivas_processing_scripts.constants import ( LOGGER_DATEFMT, LOGGER_FORMAT, @@ -44,11 +43,11 @@ from ivas_processing_scripts.constants import ( ) from ivas_processing_scripts.processing import chains, config from ivas_processing_scripts.processing.processing import ( - concat_setup, - concat_teardown, preprocess, + preprocess_2, process_item, reorder_items_list, + reverse_process_2, ) from ivas_processing_scripts.utils import DirManager, apply_func_parallel @@ -95,8 +94,15 @@ def main(args): logger = logging_init(args, cfg) # Re-ordering items based on concatenation order - if cfg.concatenate_input and cfg.concatenation_order is not None: - cfg.items_list = reorder_items_list(cfg.items_list, cfg.concatenation_order) + if hasattr(cfg, "preprocessing_2"): + if ( + cfg.preprocessing_2.get("concatenate_input") + and cfg.preprocessing_2.get("concatenation_order", None) is not None + ): + cfg.items_list = reorder_items_list( + cfg.items_list, cfg.preprocessing_2["concatenation_order"] + ) + # check for ISM metadata if cfg.input["fmt"].startswith("ISM"): metadata = check_ISM_metadata( @@ -121,12 +127,15 @@ def main(args): # run preprocessing only once if hasattr(cfg, "preprocessing"): - preprocess(cfg, cfg.metadata_path, logger) + preprocess(cfg, logger) - if cfg.concatenate_input: - # concatenate items if required - concat_setup(cfg, logger) + # preprocessing on whole signal(s) + if hasattr(cfg, "preprocessing_2"): + # save process info to revert it later + cfg.pre2 = cfg.proc_chains[0]["processes"][0] + preprocess_2(cfg, logger) + # run conditions for condition, out_dir, tmp_dir in zip( cfg.proc_chains, cfg.out_dirs, cfg.tmp_dirs ): @@ -134,11 +143,6 @@ def main(args): logger.info(f" Generating condition: {condition['name']}") - # # TODO: what happens when no concatenation or only one file for concatenation? - # if condition["processes"][0].name == "ivas": # TODO: check if 0 index sufficient - # a = {"number_frames": cfg.num_frames, "number_frames_preamble": cfg.num_frames_preamble} - # condition["processes"][0].tx.update(a) - apply_func_parallel( process_item, zip( @@ -153,19 +157,8 @@ def main(args): "mp" if cfg.multiprocessing else None, ) - if cfg.concatenate_input: - # write out the splits, optionally remove file - out_paths_splits, out_meta_splits = concat_teardown(cfg, logger) - # scale individual files - if cfg.postprocessing.get("loudness", False): - # TODO: take care of samplingrate - scale_files( - out_paths_splits, - cfg.postprocessing["fmt"], - cfg.postprocessing["loudness"], - cfg.postprocessing.get("fs", None), - out_meta_splits, - ) + if hasattr(cfg, "preprocessing_2"): + reverse_process_2(cfg, logger) # copy configuration to output directory with open(cfg.output_path.joinpath(f"{cfg.name}.yml"), "w") as f: diff --git a/ivas_processing_scripts/audiotools/audiofile.py b/ivas_processing_scripts/audiotools/audiofile.py index a3c84e7eb98adae913f33e4201d96c1ff3020bf8..954c91f8441a7a3fad3ae58794537af4a357742f 100755 --- a/ivas_processing_scripts/audiotools/audiofile.py +++ b/ivas_processing_scripts/audiotools/audiofile.py @@ -150,8 +150,8 @@ def write( def concat( in_filenames: list, out_file: str, - silence_pre: int, - silence_post: int, + silence_pre: Optional[int] = 0, + silence_post: Optional[int] = 0, in_fs: Optional[int] = 48000, num_channels: Optional[int] = None, pad_noise: Optional[bool] = False, diff --git a/ivas_processing_scripts/audiotools/metadata.py b/ivas_processing_scripts/audiotools/metadata.py index d7fd167d668385c74de88ca352cbd04a60c4aefc..d0d44502ceaf3427cd4d24a7203fd2c211a7137d 100755 --- a/ivas_processing_scripts/audiotools/metadata.py +++ b/ivas_processing_scripts/audiotools/metadata.py @@ -188,7 +188,7 @@ def write_ISM_metadata_in_file( List of acutally used file names """ - if len(metadata) != len(file_name) and not automatic_naming: + if not automatic_naming and len(metadata) != len(file_name): raise ValueError("Number of metadata objects and file names has to match") number_objects = len(metadata) @@ -299,9 +299,9 @@ def concat_meta_from_file( audio_files: list[str], meta_files: list[list[str]], out_file: list[str], - silence_pre: int, - silence_post: int, input_fmt: str, + silence_pre: Optional[int] = 0, + silence_post: Optional[int] = 0, preamble: Optional[int] = None, ) -> None: """ @@ -315,12 +315,12 @@ def concat_meta_from_file( List of corresponding metadata file names out_file: list[str] Name of concatenated output file - silence_pre: int - Silence inserted before each item - silence_post: int - Silence inserted after each item input_fmt: str Input audio format + silence_pre: Optional[int] + Silence inserted before each item + silence_post: Optional[int] + Silence inserted after each item preamble: Optional[int] Length of preamble in milliseconds """ @@ -362,7 +362,7 @@ def concat_meta_from_file( # pad trim_meta( audio_item, (-silence_pre, -silence_post) - ) # use negative value since we wante to pad, not trim + ) # use negative value since we want to pad, not trim # concatenate for idx, obj_pos in enumerate(audio_item.object_pos): @@ -374,26 +374,7 @@ def concat_meta_from_file( # add preamble if preamble: - preamble_frames = preamble / IVAS_FRAME_LEN_MS - if not preamble_frames.is_integer(): - raise ValueError( - f"ISM metadata padding and trimming only possible if pad/trim length is multiple of frame length. " - f"Frame length: {IVAS_FRAME_LEN_MS}ms" - ) - for obj_idx in range(len(concat_meta_all_obj)): - if ( - concat_meta_all_obj is not None - and concat_meta_all_obj[obj_idx] is not None - ): - concat_meta_all_obj[obj_idx] = trim( - concat_meta_all_obj[obj_idx], - limits=(-int(preamble_frames), 0), - samples=True, - ) - - # add radius 1 - concat_meta_all_obj[obj_idx][: int(preamble_frames), 2] = 1 - pass + concat_meta_all_obj = add_remove_preamble(concat_meta_all_obj, preamble) write_ISM_metadata_in_file(concat_meta_all_obj, out_file) @@ -529,7 +510,7 @@ def check_ISM_metadata( def metadata_search( - in_meta: Union[str, Path], + in_meta_path: Union[str, Path], item_names: list[Union[str, Path]], num_objects: int, ) -> list[list[Union[Path, str]]]: @@ -542,7 +523,7 @@ def metadata_search( for item in item_names: list_item = [] for obj_idx in range(num_objects): - file_name_meta = in_meta / Path(item.stem).with_suffix( + file_name_meta = in_meta_path / Path(item.stem).with_suffix( f"{item.suffix}.{obj_idx}.csv" ) # check if file exists and add to list @@ -556,3 +537,35 @@ def metadata_search( list_meta.append(list_item) return list_meta + + +def add_remove_preamble( + metadata, + preamble, + add: Optional[bool] = True, +): + preamble_frames = preamble / IVAS_FRAME_LEN_MS + if not preamble_frames.is_integer(): + raise ValueError( + f"Application of preamble for ISM metadata is only possible if preamble length is multiple of frame length. " + f"Frame length: {IVAS_FRAME_LEN_MS}ms" + ) + for obj_idx in range(len(metadata)): + if metadata is not None and metadata[obj_idx] is not None: + if add: + metadata[obj_idx] = trim( + metadata[obj_idx], + limits=(-int(preamble_frames), 0), + samples=True, + ) + + # add radius 1 + metadata[obj_idx][: int(preamble_frames), 2] = 1 + else: + metadata[obj_idx] = trim( + metadata[obj_idx], + limits=(int(preamble_frames), 0), + samples=True, + ) + + return metadata diff --git a/ivas_processing_scripts/audiotools/wrappers/gen_patt.py b/ivas_processing_scripts/audiotools/wrappers/gen_patt.py index aa480af1103e972ba9173d31df1055bf53a47277..cfe5d5520482d3ab20faf60a7d2fa1a098b145e0 100644 --- a/ivas_processing_scripts/audiotools/wrappers/gen_patt.py +++ b/ivas_processing_scripts/audiotools/wrappers/gen_patt.py @@ -131,7 +131,7 @@ def create_error_pattern( tmp_sta_file = tmp_dir.joinpath("sta") # compute seed - seed = random_seed(master_seed, prerun_seed) + seed = random_seed((0, 99999999), master_seed, prerun_seed) # open file and modify lines = [] diff --git a/ivas_processing_scripts/audiotools/wrappers/random_seed.py b/ivas_processing_scripts/audiotools/wrappers/random_seed.py index 802f68b9e78ff95d8426e44bb0e8a837279149e7..fd5b0cdd3424b856e7447ca6313cb949c91fcafd 100644 --- a/ivas_processing_scripts/audiotools/wrappers/random_seed.py +++ b/ivas_processing_scripts/audiotools/wrappers/random_seed.py @@ -30,12 +30,13 @@ # the United Nations Convention on Contracts on the International Sales of Goods. # -from typing import Optional +from typing import Optional, Tuple from ivas_processing_scripts.utils import find_binary, run def random_seed( + range: Tuple[int, int], master_seed: Optional[int] = 0, prerun_seed: Optional[int] = 0, hexa: Optional[bool] = True, @@ -70,8 +71,8 @@ def random_seed( "-d", str(prerun_seed), "-r", # value range for results - str(0), - str(99999999), + str(range[0]), + str(range[1]), ] # run command diff --git a/ivas_processing_scripts/constants.py b/ivas_processing_scripts/constants.py index 5d92d19209143eaad21e3967567746a2998ffc97..7f02695706eeab181239540346f1af68318ede79 100755 --- a/ivas_processing_scripts/constants.py +++ b/ivas_processing_scripts/constants.py @@ -59,15 +59,6 @@ DEFAULT_CONFIG = { "multiprocessing": True, "delete_tmp": False, "master_seed": 0, - "prerun_seed": 0, - "concatenate_input": False, - "concatenation_order": None, - "concat_silence": { - "pre": 0, - "post": 0, - }, - "preamble": None, - "pad_noise_preamble": False, "metadata_path": None, # postprocessing "postprocessing": { @@ -98,6 +89,7 @@ REQUIRED_KEYS = [ "input_path", "output_path", ("postprocessing", {"fmt"}), + ("postprocessing", {"fs"}), "conditions_to_generate", ] REQUIRED_KEYS_EVS = {"bitrates"} diff --git a/ivas_processing_scripts/processing/chains.py b/ivas_processing_scripts/processing/chains.py index 7f54330d7c634d9839db5d33d177166966b2334d..f830a18b15f55508bf5d49b1f082b29b1cc92855 100755 --- a/ivas_processing_scripts/processing/chains.py +++ b/ivas_processing_scripts/processing/chains.py @@ -37,6 +37,7 @@ from ivas_processing_scripts.processing.evs import EVS from ivas_processing_scripts.processing.ivas import IVAS from ivas_processing_scripts.processing.postprocessing import Postprocessing from ivas_processing_scripts.processing.preprocessing import Preprocessing +from ivas_processing_scripts.processing.preprocessing_2 import Preprocessing2 from ivas_processing_scripts.utils import list_audio @@ -46,6 +47,9 @@ def init_processing_chains(cfg: TestConfig) -> None: if hasattr(cfg, "preprocessing"): cfg.proc_chains.append(get_preprocessing(cfg)) + if hasattr(cfg, "preprocessing_2"): + cfg.proc_chains.append(get_preprocessing_2(cfg)) + # other processing chains for cond_name, cond_cfg in cfg.conditions_to_generate.items(): bitrates = cond_cfg.get("bitrates") @@ -119,6 +123,51 @@ def get_preprocessing(cfg: TestConfig) -> dict: return chain +def get_preprocessing_2(cfg: TestConfig) -> dict: + """Mapping from test configuration to preprocessing 2 keyword arguments""" + chain = { + "name": "preprocessing_2", + "processes": [], + } + + pre2_cfg = cfg.preprocessing_2 + background_cfg = pre2_cfg.get("background_noise", None) + if background_cfg: + background = { + "snr": background_cfg.get("snr", None), + "background_noise_path": background_cfg.get("background_noise_path", None), + "seed_delay": background_cfg.get("seed_delay", 0), + "master_seed": cfg.master_seed, + "output_fmt": cfg.postprocessing["fmt"], + } + else: + background = None + + # default to input values if preprocessing was not requested + pre_cfg = getattr(cfg, "preprocessing", {}) + tmp_in_fs = pre_cfg.get("fs", cfg.input.get("fs")) + tmp_in_fmt = pre_cfg.get("fmt", cfg.input["fmt"]) + + chain["processes"].append( + Preprocessing2( + { + "in_fs": tmp_in_fs, + "in_fmt": tmp_in_fmt, + "out_fmt": cfg.postprocessing["fmt"], + "concatenate_input": pre2_cfg.get("concatenate_input", False), + "concatenation_order": pre2_cfg.get("concatenation_order", None), + "preamble": pre2_cfg.get("preamble", 0), + "pad_noise_preamble": pre2_cfg.get("preamble_noise", False), + "background_noise": background, + "in_hp50": pre2_cfg.get("hp50", False), + "multiprocessing": cfg.multiprocessing, + } + ) + ) + + return chain + + def get_processing_chain( condition: str, cfg: TestConfig, bitrate: Optional[int] = None ) -> dict: @@ -199,7 +248,7 @@ def get_processing_chain( "error_pattern": cfg.tx.get("error_pattern", None), "error_rate": cfg.tx.get("error_rate", None), "master_seed": cfg.master_seed, - "prerun_seed": cfg.prerun_seed, + "prerun_seed": cfg.tx.get("prerun_seed", 0), } elif cfg.tx.get("type", None) == "JBM": tx_cfg = { @@ -215,6 +264,11 @@ def get_processing_chain( else: tx_cfg = None + if hasattr(cfg, "preprocessing_2"): + preamble = cfg.preprocessing_2.get("preamble", 0) + else: + preamble = 0 + chain["processes"].append( EVS( { @@ -228,7 +282,7 @@ def get_processing_chain( "dec_opts": dec_cfg.get("opts"), "multiprocessing": cfg.multiprocessing, "tx": tx_cfg, - "preamble": cfg.preamble, + "preamble": preamble, } ) ) @@ -249,7 +303,7 @@ def get_processing_chain( "error_pattern": cfg.tx.get("error_pattern", None), "error_rate": cfg.tx.get("error_rate", None), "master_seed": cfg.master_seed, - "prerun_seed": cfg.prerun_seed, + "prerun_seed": cfg.tx.get("prerun_seed", 0), } elif cfg.tx.get("type", None) == "JBM": tx_cfg = { @@ -265,6 +319,11 @@ def get_processing_chain( else: tx_cfg = None + if hasattr(cfg, "preprocessing_2"): + preamble = cfg.preprocessing_2.get("preamble", 0) + else: + preamble = 0 + chain["processes"].append( IVAS( { @@ -279,7 +338,7 @@ def get_processing_chain( "dec_opts": dec_cfg.get("opts"), "multiprocessing": cfg.multiprocessing, "tx": tx_cfg, - "preamble": cfg.preamble, + "preamble": preamble, } ) ) diff --git a/ivas_processing_scripts/processing/preprocessing_2.py b/ivas_processing_scripts/processing/preprocessing_2.py new file mode 100644 index 0000000000000000000000000000000000000000..425dbd23cb20b4c8731823872741c4d3f3f072f5 --- /dev/null +++ b/ivas_processing_scripts/processing/preprocessing_2.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python3 + +# +# (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository. All Rights Reserved. +# +# This software is protected by copyright law and by international treaties. +# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository retain full ownership rights in their respective contributions in +# the software. This notice grants no license of any kind, including but not limited to patent +# license, nor is any license granted by implication, estoppel or otherwise. +# +# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making +# contributions. +# +# This software is provided "AS IS", without any express or implied warranties. The software is in the +# development stage. It is intended exclusively for experts who have experience with such software and +# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability +# and fitness for a particular purpose are hereby disclaimed and excluded. +# +# Any dispute, controversy or claim arising under or in relation to providing this software shall be +# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in +# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and +# the United Nations Convention on Contracts on the International Sales of Goods. +# + +import logging +from pathlib import Path +from warnings import warn + +import numpy as np + +from ivas_processing_scripts.audiotools import audio +from ivas_processing_scripts.audiotools.audioarray import delay, trim +from ivas_processing_scripts.audiotools.audiofile import write +from ivas_processing_scripts.audiotools.metadata import ( + add_remove_preamble, + write_ISM_metadata_in_file, +) +from ivas_processing_scripts.audiotools.wrappers.bs1770 import ( + get_loudness, + loudness_norm, +) +from ivas_processing_scripts.audiotools.wrappers.random_seed import random_seed +from ivas_processing_scripts.processing.processing import Processing + + +class Preprocessing2(Processing): + def __init__(self, attrs: dict): + super().__init__(attrs) + self.name = "pre_2" + + def process(self, in_file: Path, out_file: Path, in_meta, logger: logging.Logger): + logger.debug(f"Preprocessing2 configuration : {self.__dict__}") + logger.debug(f"Preprocessing2 {in_file.absolute()} -> {out_file.absolute()}") + + # load in file + audio_object = audio.fromfile( + self.in_fmt, in_file, fs=self.in_fs, in_meta=in_meta + ) + + # add preamble + if self.preamble: + # also apply preamble to ISM metadata + if self.in_fmt.startswith("ISM"): + # read out old + metadata = [] + for meta in in_meta: + metadata.append(np.genfromtxt(meta, delimiter=",")) + + # modify metadata + metadata = add_remove_preamble(metadata, self.preamble) + meta_files = write_ISM_metadata_in_file(metadata, [out_file], True) + + # modify audio object + audio_object.metadata_files = meta_files + audio_object.obect_pos = metadata + + # add preamble to actual signal + audio_object.audio = trim( + audio_object.audio, + audio_object.fs, + (-self.preamble, 0), + self.pad_noise_preamble, + ) + + # add background noise + if self.background_noise: + audio_object.audio = self.add_background_noise(audio_object, in_meta) + + # save file + write(out_file, audio_object.audio, fs=audio_object.fs) + + return + + def add_background_noise(self, audio_object: audio.Audio, in_meta) -> np.ndarray: + # range for random delay + range_delay = (1, 2400000) + + # load background noise + noise_object = audio.fromfile( + self.in_fmt, + self.background_noise["background_noise_path"], + fs=self.in_fs, + in_meta=in_meta, + ) + + # if noise is too short raise error + if len(noise_object.audio) < len(audio_object.audio): + raise ValueError("Background noise too short for audio signal") + if len(noise_object.audio) - range_delay[1] < len(audio_object.audio): + warn( + "Background noise may be to short for audio signal when considering the random delay" + ) + + # measure loudness of audio signal based on output format + tmp_object = audio.fromtype(self.out_fmt) + if ( + isinstance(tmp_object, audio.ObjectBasedAudio) + or isinstance(tmp_object, audio.SceneBasedAudio) + or isinstance(tmp_object, audio.MetadataAssistedSpatialAudio) + ): + out_format = None + else: + out_format = self.out_fmt + + loudness_signal, _ = get_loudness(audio_object, loudness_format=out_format) + + # compute desired loudness of background noise + loudness_noise = loudness_signal - self.background_noise["snr"] + + # apply random delay and cut signal + rand_delay = random_seed( + range=range_delay, + master_seed=self.background_noise["master_seed"], + prerun_seed=self.background_noise["seed_delay"], + hexa=False, + ) + noise_object.audio = delay( + noise_object.audio, delay=-rand_delay, samples=True, fs=noise_object.fs + )[: len(audio_object.audio)] + + # scale background noise to desired loudness based on output format + noise_object.audio = loudness_norm(noise_object, loudness_noise, out_format) + + # add array to signal + audio_object.audio = noise_object.audio + audio_object.audio + + return audio_object.audio diff --git a/ivas_processing_scripts/processing/processing.py b/ivas_processing_scripts/processing/processing.py index 99f33564d32debd2f45201b2e53b74abc56f5ba2..4b367606abf120a0ebd97aa3635b36c9ea5b42b6 100755 --- a/ivas_processing_scripts/processing/processing.py +++ b/ivas_processing_scripts/processing/processing.py @@ -38,12 +38,24 @@ from shutil import copyfile from typing import Iterable, Union from warnings import warn +import numpy as np + from ivas_processing_scripts.audiotools import audio -from ivas_processing_scripts.audiotools.audiofile import concat, split +from ivas_processing_scripts.audiotools.audiofile import ( + concat, + read, + split, + trim, + write, +) from ivas_processing_scripts.audiotools.metadata import ( + add_remove_preamble, concat_meta_from_file, + metadata_search, split_meta_in_file, + write_ISM_metadata_in_file, ) +from ivas_processing_scripts.audiotools.wrappers.bs1770 import scale_files from ivas_processing_scripts.constants import LOGGER_DATEFMT, LOGGER_FORMAT from ivas_processing_scripts.processing.config import TestConfig from ivas_processing_scripts.utils import apply_func_parallel, list_audio, pairwise @@ -68,16 +80,24 @@ def reorder_items_list(items_list: list, concatenation_order: list) -> list: return ordered_full_files -def concat_setup(cfg: TestConfig, logger: logging.Logger): +def concat_setup(cfg: TestConfig, chain, logger: logging.Logger): n_items_list = len(cfg.items_list) - if cfg.concatenation_order is not None: - n_concatenation_order = len(cfg.concatenation_order) + cfg_pre2 = chain[0] + + # check for text files + if any([i for i in cfg.items_list if i.suffix == ".txt"]): + raise SystemExit("Concatenation for text files is unsupported") + + # apply concatenation order + if cfg_pre2.concatenation_order is not None: + n_concatenation_order = len(cfg_pre2.concatenation_order) if n_concatenation_order != n_items_list: warn( - f"Warning: Mismatch in specified concatenation order and number of items to process!\nNumber of items specified in concatenation order: {n_concatenation_order}\nNumber of items in the directory: {n_items_list}\nConcatenation will use the following order:\n{cfg.concatenation_order}" + f"Warning: Mismatch in specified concatenation order and number of items to process!\n" + f"Number of items specified in concatenation order: {n_concatenation_order}\n" + f"Number of items in the directory: {n_items_list}\n" + f"Concatenation will use the following order:\n{cfg_pre2.concatenation_order}" ) - if any([i for i in cfg.items_list if i.suffix == ".txt"]): - raise SystemExit("Concatenation for text files is unsupported") logger.info(f"Concatenating input files in directory {cfg.input_path}") @@ -86,46 +106,34 @@ def concat_setup(cfg: TestConfig, logger: logging.Logger): cfg.concat_meta = [] for obj_idx in range(len(cfg.metadata_path[0])): cfg.concat_meta.append( - cfg.output_path.joinpath( + cfg.tmp_dirs[0].joinpath( f"{cfg.input_path.name}_concatenated.wav.{obj_idx}.csv" ) ) - concat_meta_from_file( cfg.items_list, cfg.metadata_path, cfg.concat_meta, - cfg.concat_silence.get("pre", 0), - cfg.concat_silence.get("post", 0), cfg.input["fmt"], - preamble=cfg.preamble, ) # set input to the concatenated file we have just written to the output dir cfg.metadata_path = [cfg.concat_meta] # concatenate audio - cfg.concat_file = cfg.output_path.joinpath( + cfg.concat_file = cfg.tmp_dirs[0].joinpath( f"{cfg.input_path.name}_concatenated.wav" ) # determine number of channels for pcm and raw files - if hasattr(cfg, "preprocessing"): - tmp_in_fmt = cfg.preprocessing.get("fmt", cfg.input["fmt"]) - else: - tmp_in_fmt = cfg.input["fmt"] - tmp_audio = audio.fromtype(tmp_in_fmt) + tmp_audio = audio.fromtype(cfg_pre2.in_fmt) tmp_num_chans = tmp_audio.num_channels cfg.splits = concat( cfg.items_list, cfg.concat_file, - cfg.concat_silence.get("pre", 0), - cfg.concat_silence.get("post", 0), in_fs=cfg.input.get("fs", 48000), num_channels=tmp_num_chans, - preamble=cfg.preamble, - pad_noise_preamble=cfg.pad_noise_preamble, ) # save item naming for splits naming in the end @@ -158,7 +166,11 @@ def concat_teardown(cfg: TestConfig, logger: logging.Logger): for odir in cfg.out_dirs: path_input = odir / cfg.items_list[0].name out_paths = split( - path_input, odir, cfg.split_names, cfg.splits, preamble=cfg.preamble + path_input, + odir, + cfg.split_names, + cfg.splits, + in_fs=cfg.postprocessing["fs"], ) logger.debug( @@ -176,7 +188,7 @@ def concat_teardown(cfg: TestConfig, logger: logging.Logger): cfg.split_names, cfg.splits, output_format, - preamble=cfg.preamble, + meta_files=cfg.metadata_path[0], ) out_meta.append(out_meta_paths) @@ -187,7 +199,7 @@ def concat_teardown(cfg: TestConfig, logger: logging.Logger): return out_files, out_meta -def preprocess(cfg, in_meta, logger): +def preprocess(cfg, logger): preprocessing = cfg.proc_chains[0] chain = preprocessing["processes"] @@ -202,7 +214,7 @@ def preprocess(cfg, in_meta, logger): repeat(cfg.out_dirs[0]), repeat(chain), repeat(logger), - in_meta, + cfg.metadata_path, ), None, "mp" if cfg.multiprocessing else None, @@ -214,7 +226,11 @@ def preprocess(cfg, in_meta, logger): ) # Re-ordering items based on concatenation order - if cfg.concatenate_input and cfg.concatenation_order is not None: + if ( + hasattr(cfg, "preprocessing_2") + and cfg.preprocessing_2.get("concatenate_input", False) + and cfg.preprocessing_2.get("concatenation_order", None) is not None + ): cfg.items_list = reorder_items_list(cfg.items_list, cfg.concatenation_order) if cfg.metadata_path[0] is not None: @@ -230,6 +246,98 @@ def preprocess(cfg, in_meta, logger): cfg.out_dirs = cfg.out_dirs[1:] +def preprocess_2(cfg, logger): + preprocessing_2 = cfg.proc_chains[0] + chain = preprocessing_2["processes"] + + logger.info(f" Generating condition: {preprocessing_2['name']}") + + # concatenate items if required + if chain[0].concatenate_input: + concat_setup(cfg, chain, logger) + + # run preprocessing 2 + apply_func_parallel( + process_item, + zip( + cfg.items_list, + repeat(cfg.tmp_dirs[0]), + repeat(cfg.out_dirs[0]), + repeat(chain), + repeat(logger), + cfg.metadata_path, + ), + None, + "mp" if cfg.multiprocessing else None, + ) + + # update the configuration to use preprocessing 2 outputs as new inputs + cfg.items_list = list_audio( + cfg.out_dirs[0], absolute=False, select_list=getattr(cfg, "input_select", None) + ) + + # Re-ordering items based on concatenation order + if ( + hasattr(cfg, "preprocessing_2") + and cfg.preprocessing_2.get("concatenate_input", False) + and cfg.preprocessing_2.get("concatenation_order", None) is not None + ): + cfg.items_list = reorder_items_list(cfg.items_list, cfg.concatenation_order) + + if cfg.metadata_path[0] is not None: + for item_idx in range(len(cfg.metadata_path)): + for obj_idx in range(len(cfg.metadata_path[item_idx])): + if cfg.metadata_path[item_idx][obj_idx]: + cfg.metadata_path[item_idx][obj_idx] = cfg.out_dirs[0] / Path( + f"{cfg.items_list[item_idx].stem}.wav.{obj_idx}.csv" + ) + # remove already applied processing stage + cfg.proc_chains = cfg.proc_chains[1:] + cfg.tmp_dirs = cfg.tmp_dirs[1:] + cfg.out_dirs = cfg.out_dirs[1:] + + return + + +def reverse_process_2(cfg, logger): + # remove preamble + if cfg.pre2.preamble: + remove_preamble(cfg) + + # reverse concatenation + if cfg.pre2.concatenate_input: + # write out the splits, optionally remove file + out_paths_splits, out_meta_splits = concat_teardown(cfg, logger) + else: + # if no concatenation read files from folder + out_paths_splits = [] + for out_dir in cfg.out_dirs: + list_audio_dir = list_audio(out_dir, absolute=True) + out_paths_splits.append(list_audio_dir) + if cfg.postprocessing["fmt"].startswith("ISM"): + out_meta_splits = [] + for i, condition in enumerate(out_paths_splits): + meta_condition = metadata_search( + cfg.out_dirs[i], + condition, + num_objects=int(cfg.postprocessing["fmt"][-1]), + ) + out_meta_splits.append(meta_condition) + else: + out_meta_splits = None + + # scale individual files + if cfg.postprocessing.get("loudness", False): + scale_files( + out_paths_splits, + cfg.postprocessing["fmt"], + cfg.postprocessing["loudness"], + cfg.postprocessing["fs"], + out_meta_splits, + ) + return + + def process_item( in_file: Union[Path, str], tmp_dir: Union[Path, str], @@ -304,3 +412,44 @@ def process_item( if processing_paths_meta[-1]: for idx, ppm in enumerate(processing_paths_meta[-1]): copyfile(ppm, out_meta[idx]) + + +def remove_preamble(cfg): + # get number of channels from output format + num_channels = audio.fromtype(cfg.postprocessing["fmt"]).num_channels + for odir in cfg.out_dirs: + for item in cfg.items_list: + path_input = odir / item.name + + # remove preamble for ISM metadata + if cfg.postprocessing["fmt"].startswith("ISM"): + # search for metadata + meta_item = metadata_search( + odir, [Path(item.name)], num_objects=num_channels + ) + metadata_array = [] + for meta_i in meta_item: + metadata_array.append(np.genfromtxt(meta_i, delimiter=",")) + + # remove preamble + metadata_array = add_remove_preamble( + metadata_array, cfg.pre2.preamble, add=False + ) + + # write csv files + write_ISM_metadata_in_file( + metadata_array, [path_input], automatic_naming=True + ) + + # read file + x, fs = read( + path_input, nchannels=num_channels, fs=cfg.postprocessing["fs"] + ) + + # remove preamble + x = trim(x, fs, (cfg.pre2.preamble, 0)) + + # write file + write(path_input, x, fs) + + return diff --git a/tests/data/test_ISM.yml b/tests/data/test_ISM.yml index 66753c9fdb3cb95862db7d65ea56e905ad2b406c..6be2b37766db1dade33d22d0fa6fa8176c023405 100644 --- a/tests/data/test_ISM.yml +++ b/tests/data/test_ISM.yml @@ -216,7 +216,7 @@ postprocessing: ### REQUIRED: Target format for output fmt: "BINAURAL" ### Target sampling rate in Hz for resampling; default = null (no resampling) - # fs: 16000 + fs: 48000 ### Low-pass cut-off frequency in Hz; default = null (no filtering) # lp_cutoff: 24000 ### Target loudness in LKFS; default = null (no loudness change applied) diff --git a/tests/data/test_MC.yml b/tests/data/test_MC.yml index c8c3cd1b0674e5f140d77d676853484063065702..81cb34467afbcc1a7e335aa6868efedb8bd9a38a 100644 --- a/tests/data/test_MC.yml +++ b/tests/data/test_MC.yml @@ -211,7 +211,7 @@ postprocessing: ### REQUIRED: Target format for output fmt: "BINAURAL" ### Target sampling rate in Hz for resampling; default = null (no resampling) - # fs: 16000 + fs: 48000 ### Low-pass cut-off frequency in Hz; default = null (no filtering) # lp_cutoff: 24000 ### Target loudness in LKFS; default = null (no loudness change applied) diff --git a/tests/data/test_SBA.yml b/tests/data/test_SBA.yml index cbdeecd4ef86d137654e34aaa702dc95460e32ff..da243a2142c283d023bf7320b0cb2c7101bb5b3a 100644 --- a/tests/data/test_SBA.yml +++ b/tests/data/test_SBA.yml @@ -213,7 +213,7 @@ postprocessing: ### REQUIRED: Target format for output fmt: "BINAURAL" ### Target sampling rate in Hz for resampling; default = null (no resampling) - # fs: 16000 + fs: 48000 ### Low-pass cut-off frequency in Hz; default = null (no filtering) # lp_cutoff: 24000 ### Target loudness in LKFS; default = null (no loudness change applied)