diff --git a/.gitignore b/.gitignore index 3f2d086e47f62e9957daa6d3f2ad1d08beb0bf72..29582b6d9c3b4da9f18dad2e32cbe0fd1a3afc4d 100644 --- a/.gitignore +++ b/.gitignore @@ -15,7 +15,7 @@ experiments/selection/*/proc_input/*.wav experiments/selection/*/proc_input/*.pcm experiments/selection/*/proc_output/ *~ -tests/tmp_output_* +tests/temp_output_* tests/cut tests/ref tests/concatenation_folder \ No newline at end of file diff --git a/examples/TEMPLATE.yml b/examples/TEMPLATE.yml index 7439db3f11359f26fe6e3ea04eccd24ddb5cd368..0dcd1ae86990535ddef5ec071a562a1922652eda 100755 --- a/examples/TEMPLATE.yml +++ b/examples/TEMPLATE.yml @@ -25,11 +25,12 @@ ### Any relative paths will be interpreted relative to the working directory the script is called from! ### Usage of absolute paths is recommended. ### Do not use file names with dots "." in them! This is not supported, use "_" instead -### For Windows user: please use double back slash '\\' in paths and add '.exe' to executable definitions +### Do not use "tmp_" in file or folder names ("temp_" is fine) +### For Windows user: please use double back slash '\\' in paths ### REQUIRED: Input path or file input_path: ".../ivas/items/HOA3" ### REQUIRED: Output path or file -output_path: ".../tmp_output" +output_path: ".../temp_output" ### Metadata path or file(s) ### If input format is ISM{1-4} a path for the metadata files can be specified; ### default = null (for ISM search for item_name.{wav, raw, pcm}.{0-3}.csv in input folder, otherise ignored) diff --git a/ivas_processing_scripts/__init__.py b/ivas_processing_scripts/__init__.py index 29d47f82d64e5b6cddd3b45c16c44a85856e30a1..51ba68a2d4c60b8e013ccc2a5fc74f41cc64f9d6 100755 --- a/ivas_processing_scripts/__init__.py +++ b/ivas_processing_scripts/__init__.py @@ -48,10 +48,7 @@ from ivas_processing_scripts.processing.processing import ( preprocess_2, preprocess_background_noise, process_item, - rename_generated_conditions, reorder_items_list, - reverse_process_2, - scale_resulting_files, ) from ivas_processing_scripts.utils import DirManager, apply_func_parallel @@ -178,18 +175,6 @@ def main(args): "mp" if cfg.multiprocessing else None, ) - # remove preamble and split signals - if hasattr(cfg, "preprocessing_2"): - reverse_process_2(cfg, logger) - - # scale individual files - if cfg.postprocessing.get("loudness", False): - scale_resulting_files(cfg, logger) - - # rename output with condition name - if cfg.condition_in_output_filename: - rename_generated_conditions(cfg.output_path) - # copy configuration to output directory with open(cfg.output_path.joinpath(f"{cfg.name}.yml"), "w") as f: yaml.safe_dump(cfg._yaml_dump, f) diff --git a/ivas_processing_scripts/audiotools/convert/__init__.py b/ivas_processing_scripts/audiotools/convert/__init__.py index adc6e6d00fbd4259df4aed7460fae8e66ad781b4..e26c8c6fc95a4c0182a719aeb40d59d41ce2ddce 100755 --- a/ivas_processing_scripts/audiotools/convert/__init__.py +++ b/ivas_processing_scripts/audiotools/convert/__init__.py @@ -293,7 +293,7 @@ def process_audio( logger.debug( f"Applying loudness adjustment to {loudness} LKFS for format {loudness_fmt} using ITU STL bs1770demo" ) - x.audio = loudness_norm(x, loudness, loudness_fmt, logger=logger) + x.audio, _ = loudness_norm(x, loudness, loudness_fmt, logger=logger) """limiting""" if limit: diff --git a/ivas_processing_scripts/audiotools/wrappers/bs1770.py b/ivas_processing_scripts/audiotools/wrappers/bs1770.py index 0d9c8ff47ae86d39c857a731f2668d8ea1db671b..f8052bc41b940576bef4ac0732f5f1318f21514c 100755 --- a/ivas_processing_scripts/audiotools/wrappers/bs1770.py +++ b/ivas_processing_scripts/audiotools/wrappers/bs1770.py @@ -225,7 +225,7 @@ def loudness_norm( rms: Optional[bool] = False, logger: Optional[logging.Logger] = None, file_name_logging: Optional[Union[str, Path]] = None, -) -> np.ndarray: +) -> Tuple[np.ndarray, float]: """ Iterative loudness normalization using ITU-R BS.1770-4 Signal is iteratively scaled after rendering to the specified format @@ -248,13 +248,16 @@ def loudness_norm( Returns ------- - norm : Audio + norm: Audio Normalized audio + scaling_factor: float + applied scaling factor """ measured_loudness = np.inf num_iter = 1 scaled_input = copy.deepcopy(input) + scaling_factor = 1.0 # save loudness before and after scaling for the logger info loudness_before, scale_factor_new, loundness_fmt_used = get_loudness( @@ -265,6 +268,7 @@ def loudness_norm( while np.abs(measured_loudness - target_loudness) > 0.5 and num_iter <= 10: # scale input scaled_input.audio *= scale_factor_new + scaling_factor *= scale_factor_new # measure loudness and get scaling factor measured_loudness, scale_factor_new, _ = get_loudness( @@ -291,65 +295,4 @@ def loudness_norm( f"Loudness did not converge to desired value, stopping at: {loudness_after:.2f}" ) - return scaled_input.audio - - -def scale_files( - file_list: list[list[Union[Path, str]]], - fmt: str, - loudness: float, - loudness_format: Optional[str] = None, - fs: Optional[int] = 48000, - in_meta: Optional[list] = None, - logger: Optional[logging.Logger] = None, -) -> None: - """ - Scales audio files to desired loudness - - Parameters - ---------- - file_list : list[list[Union[Path, str]]] - List of file paths in a list of the condition folders - fmt: str - Audio format of files in list - loudness: float - Desired loudness level in LKFS/dBov - loudness_format: Optional[str] - Format for loudness measurement - fs: Optional[int] - Sampling rate - in_meta: Optional[list] - Metadata for ISM with same structure as file_list but one layer more - for the list of metadata for one file - logger: Optional[logging.Logger] - Logger to log loudness information - """ - - if fmt.startswith("ISM"): - if in_meta: - meta_bool = True - else: - raise ValueError("No metadata available for loudness measurement") - else: - in_meta = copy.copy(file_list) - meta_bool = False - - for folder, meta_folder in zip(file_list, in_meta): - for file, meta in zip(folder, meta_folder): - # create audio object - if meta_bool: - audio_obj = audio.fromfile(fmt, file, fs, meta) - else: - audio_obj = audio.fromfile(fmt, file, fs) - - # adjust loudness - scaled_audio = loudness_norm( - audio_obj, - loudness, - loudness_format, - logger=logger, - file_name_logging=file, - ) - - # write into file - write(file, scaled_audio, audio_obj.fs) + return scaled_input.audio, scaling_factor diff --git a/ivas_processing_scripts/processing/chains.py b/ivas_processing_scripts/processing/chains.py index c25dd06208eec9b482ca06dfa7e71704e09e1ec4..cf7f43b63385193e7db40449327d457dff3ee484 100755 --- a/ivas_processing_scripts/processing/chains.py +++ b/ivas_processing_scripts/processing/chains.py @@ -39,6 +39,9 @@ from ivas_processing_scripts.processing.ivas import IVAS from ivas_processing_scripts.processing.postprocessing import Postprocessing from ivas_processing_scripts.processing.preprocessing import Preprocessing from ivas_processing_scripts.processing.preprocessing_2 import Preprocessing2 +from ivas_processing_scripts.processing.processing_splitting_scaling import ( + Processing_splitting_scaling, +) from ivas_processing_scripts.utils import get_abs_path, list_audio @@ -54,9 +57,6 @@ def init_processing_chains(cfg: TestConfig) -> None: # other processing chains for cond_name, cond_cfg in cfg.conditions_to_generate.items(): bitrates = cond_cfg.get("bitrates") - # TODO we may need to change this to ensure it is only one value for IVAS and a possible list for EVS - # condition naming will also need to be checked since we rename to {cond_name}_{bitrate} - # this may not be desired if bitrates is not None and len(bitrates) > 1: multiple_bitrates_flag = True else: @@ -203,12 +203,11 @@ def get_processing_chain( ) -> dict: """Mapping from test configuration to condition and postprocessing keyword arguments""" name = f"{condition}" - # TODO related to naming TODO above if bitrate: - if isinstance(bitrate, list): - name += f"_{sum(bitrate)}" - else: - if multiple_bitrates is True: + if multiple_bitrates: + if isinstance(bitrate, list): + name += f"_{sum(bitrate)}" + else: name += f"_{bitrate}" chain = { @@ -219,6 +218,7 @@ def get_processing_chain( # get pre and post processing configurations pre_cfg = getattr(cfg, "preprocessing", {}) post_cfg = cfg.postprocessing + pre2_cfg = getattr(cfg, "preprocessing_2", {}) # default to input values if preprocessing was not requested tmp_in_fs = pre_cfg.get("fs", cfg.input.get("fs")) @@ -230,6 +230,7 @@ def get_processing_chain( tmp_lp_cutoff = post_cfg.get("lp_cutoff") tmp_mnru_q = None tmp_esdru_alpha = None + tx_condition = False # override / add values based on specific conditions cond_cfg = cfg.conditions_to_generate[condition] @@ -245,6 +246,7 @@ def get_processing_chain( elif cond_cfg["type"] == "esdru": tmp_esdru_alpha = cond_cfg["alpha"] elif cond_cfg["type"] == "mono_dmx": + # add another postprocessing from in_fmt to mono chain["processes"].append( Postprocessing( { @@ -252,12 +254,12 @@ def get_processing_chain( "in_fmt": tmp_in_fmt, "out_fmt": "MONO", "multiprocessing": cfg.multiprocessing, + "tx_condition": False, }, name="mono_dmx", ) ) tmp_in_fmt = "MONO" - # add another postprocessing from in_fmt to mono elif cond_cfg["type"] == "evs": cod_cfg = cond_cfg["cod"] dec_cfg = cond_cfg["dec"] @@ -267,6 +269,9 @@ def get_processing_chain( # Frame error pattern bitstream modification if "tx" in cond_cfg.keys() or hasattr(cfg, "tx"): + # postprocess also signal without error if there is loudness scaling + if post_cfg.get("loudness"): + tx_condition = True # local specification overwrites global one if "tx" in cond_cfg.keys(): tx_cfg_tmp = cond_cfg["tx"] @@ -314,7 +319,9 @@ def get_processing_chain( "out_fs": tmp_in_fs, "out_fmt": cod_fmt, "multiprocessing": cfg.multiprocessing, - } + "tx_condition": False, + }, + name="cod_fmt", ) ) tmp_in_fmt = cod_fmt @@ -335,12 +342,12 @@ def get_processing_chain( "preamble": preamble, "evs_lfe_9k6bps_nb": evs_lfe_9k6bps_nb, "use_windows_codec_binaries": cfg.use_windows_codec_binaries, + "tx_condition": tx_condition, } ) ) # update values to reflect decoder output tmp_in_fs = dec_cfg.get("fs", tmp_in_fs) - elif cond_cfg["type"] == "ivas": cod_cfg = cond_cfg["cod"] dec_cfg = cond_cfg["dec"] @@ -355,6 +362,9 @@ def get_processing_chain( # Frame error pattern bitstream modification if "tx" in cond_cfg.keys() or hasattr(cfg, "tx"): + # postprocess also signal without error if there is loudness scaling + if post_cfg.get("loudness"): + tx_condition = True # local specification overwrites global one if "tx" in cond_cfg.keys(): tx_cfg_tmp = cond_cfg["tx"] @@ -400,7 +410,9 @@ def get_processing_chain( "out_fs": tmp_in_fs, "out_fmt": cod_fmt, "multiprocessing": cfg.multiprocessing, - } + "tx_condition": False, + }, + name="cod_fmt", ) ) tmp_in_fmt = cod_fmt @@ -421,13 +433,13 @@ def get_processing_chain( "tx": tx_cfg, "preamble": preamble, "use_windows_codec_binaries": cfg.use_windows_codec_binaries, + "tx_condition": tx_condition, } ) ) # update values to reflect decoder output tmp_in_fs = dec_cfg.get("fs", tmp_in_fs) tmp_in_fmt = dec_cfg.get("fmt", tmp_out_fmt) - else: raise SystemExit(f"Unknown condition {condition}!") @@ -440,16 +452,19 @@ def get_processing_chain( # add Postprocessing with only format conversion for each format except the last fmts = [tmp_in_fmt] + pre_fmts for fmt_in, fmt_out in zip(fmts[:-1], fmts[1:]): - chain["processes"].append( - Postprocessing( - { - "in_fs": tmp_in_fs, - "in_fmt": fmt_in, - "out_fs": tmp_in_fs, - "out_fmt": fmt_out, - } + if fmt_in != fmt_out: + chain["processes"].append( + Postprocessing( + { + "in_fs": tmp_in_fs, + "in_fmt": fmt_in, + "out_fs": tmp_in_fs, + "out_fmt": fmt_out, + "tx_condition": tx_condition, + }, + name=f"post_{fmt_out}", + ) ) - ) tmp_in_fmt = fmt_out chain["processes"].append( @@ -467,6 +482,24 @@ def get_processing_chain( "multiprocessing": cfg.multiprocessing, "mnru_q": tmp_mnru_q, "esdru_alpha": tmp_esdru_alpha, + "tx_condition": tx_condition, + } + ) + ) + # add splitting and scaling for all conditions + chain["processes"].append( + Processing_splitting_scaling( + { + "fs": post_cfg["fs"], + "in_fmt": post_cfg["fmt"], + "out_fmt": post_fmt, # no rendering here + "concatenate_input": pre2_cfg.get("concatenate_input", False), + "preamble": pre2_cfg.get("preamble", 0), + "repeat_signal": pre2_cfg.get("repeat_signal", False), + "loudness": post_cfg.get("loudness", None), + "loudness_fmt": post_cfg.get("loudness_fmt", None), + "tx_condition": tx_condition, + "condition_in_output_filename": cfg.condition_in_output_filename, } ) ) diff --git a/ivas_processing_scripts/processing/evs.py b/ivas_processing_scripts/processing/evs.py index d646e7a9e46042195f18332d37cce593a749cd45..6697a71ac1be1f1ea74a11a5f6ed1f391c55a086 100755 --- a/ivas_processing_scripts/processing/evs.py +++ b/ivas_processing_scripts/processing/evs.py @@ -36,7 +36,7 @@ import platform from itertools import repeat from pathlib import Path from shutil import copyfile -from typing import Optional, Union +from typing import Optional, Tuple, Union from ivas_processing_scripts.audiotools import audio from ivas_processing_scripts.audiotools.audiofile import ( @@ -195,6 +195,9 @@ class EVS(Processing): # run processing split_chan_bs = [f.with_suffix(".192") for f in split_chan_files] split_chan_out = [f.with_suffix(".pcm") for f in split_chan_files] + split_chan_out_noerror = [ + f.with_suffix(".noerror.pcm") for f in split_chan_files + ] # run all encoders logger.debug(f"Running EVS encoders for {out_file.stem.split('.')[0]}") @@ -212,6 +215,8 @@ class EVS(Processing): show_progress=False, ) + # apply bitstream processing and save unprocessed bitstream + split_chan_bs_unprocessed = split_chan_bs split_chan_bs = apply_func_parallel( self.simulate_tx, zip(split_chan_files, split_chan_bs, repeat(logger)), @@ -219,33 +224,69 @@ class EVS(Processing): "mt" if self.multiprocessing else None, show_progress=False, ) + voip = [scb[1] for scb in split_chan_bs] + split_chan_bs = [scb[0] for scb in split_chan_bs] - # run all decoders + # run all decoders twice with and without bitstream errors logger.debug(f"Running EVS decoders for {out_file.stem.split('.')[0]}") apply_func_parallel( self.dec, - zip(split_chan_bs, split_chan_out, repeat(logger)), + zip(split_chan_bs, split_chan_out, voip, repeat(logger)), None, "mt" if self.multiprocessing else None, show_progress=False, ) + if split_chan_bs_unprocessed != split_chan_bs: + apply_func_parallel( + self.dec, + zip( + split_chan_bs_unprocessed, + split_chan_out_noerror, + repeat(False), + repeat(logger), + ), + None, + "mt" if self.multiprocessing else None, + show_progress=False, + ) # combine the decoded channels into the output file if out_file.suffix in [".wav", ".pcm"]: combine(split_chan_out, out_file, in_fs=self.out_fs, is_planar=is_planar) + if split_chan_bs_unprocessed != split_chan_bs and self.tx_condition: + out_file_unprocessed = f"{Path(out_file.parent).joinpath(Path(out_file.name).with_suffix(''))}.noerror{out_file.suffix}" + combine( + split_chan_out_noerror, + out_file_unprocessed, + in_fs=self.out_fs, + is_planar=is_planar, + ) # copy ISM metadata for ISM pass-through - if in_meta: + if isinstance(self.in_fmt, audio.ObjectBasedAudio): for idx in range(len(in_meta)): out_file_meta = ( out_file.parent / f"{out_file.stem.split('.')[0]}.evs{out_file.suffix}.{idx}.csv" ) copyfile(in_meta[idx], out_file_meta) + if split_chan_bs_unprocessed != split_chan_bs and self.tx_condition: + out_file_meta_unprocessed = ( + out_file.parent + / f"{out_file.stem.split('.')[0]}.evs.noerror{out_file.suffix}.{idx}.csv" + ) + copyfile(in_meta[idx], out_file_meta_unprocessed) + # copy MASA metadata for MASA pass-through if isinstance(self.in_fmt, audio.MetadataAssistedSpatialAudio): md_file_in = in_file.parent / (in_file.name + ".met") md_file_out = out_file.parent / (out_file.name + ".met") copyfile(md_file_in, md_file_out) + if split_chan_bs_unprocessed != split_chan_bs and self.tx_condition: + md_file_out_noerror = out_file.parent / ( + Path(out_file_unprocessed).name + ".met" + ) + copyfile(md_file_in, md_file_out_noerror) + elif out_file.suffix == ".txt": raise NotImplementedError(".txt file support is WIP") # output_wav = out_file.replace(output_ext, ".wav") @@ -312,6 +353,9 @@ class EVS(Processing): cmd.extend(cod_opts) + # use quiet mode + cmd.extend(["-q"]) + cmd.extend( [ str(bitrate), @@ -328,7 +372,7 @@ class EVS(Processing): in_file: Union[Path, str], bitstream: Path, logger: Optional[logging.Logger] = None, - ) -> Union[Path, str]: + ) -> Tuple[Union[Path, str], bool]: if self.tx is not None: if self.tx["type"] == "JBM": bs, ext = os.path.splitext(bitstream) @@ -341,14 +385,8 @@ class EVS(Processing): self.tx["error_profile"], self.tx["n_frames_per_packet"], ) - # add -voip cmdline option to the decoder - # TODO: tracefile also? - if self.dec_opts: - if "-voip" not in self.dec_opts: - self.dec_opts.extend(["-voip"]) - else: - self.dec_opts = ["-voip"] - return bitstream_processed + voip = True + return bitstream_processed, voip elif self.tx["type"] == "FER": bs, ext = os.path.splitext(bitstream) @@ -372,24 +410,36 @@ class EVS(Processing): master_seed=self.tx["master_seed"], prerun_seed=self.tx["prerun_seed"], ) - - return bitstream_processed + voip = False + return bitstream_processed, voip else: - return bitstream + voip = False + return bitstream, voip def dec( self, bitstream: Path, out_pcm_file: Path, + voip: bool = False, logger: Optional[logging.Logger] = None, ) -> None: cmd = [self.dec_bin] if self._use_wine: cmd.insert(0, "wine") + # add -voip cmdline option to the decoder + if voip: + cmd.extend(["-voip"]) + + if self.dec_opts: + cmd.extend(self.dec_opts) + if self.dec_opts: cmd.extend(self.dec_opts) + # use quiet mode + cmd.extend(["-q"]) + cmd.extend([str(self.out_fs // 1000), str(bitstream), str(out_pcm_file)]) run(cmd, logger=logger) diff --git a/ivas_processing_scripts/processing/ivas.py b/ivas_processing_scripts/processing/ivas.py index fc7d756d554dbd7f332462c79a028b253c4f382a..217f47ad87cbc723dcab2d7304f9102845e3ca9f 100755 --- a/ivas_processing_scripts/processing/ivas.py +++ b/ivas_processing_scripts/processing/ivas.py @@ -34,7 +34,7 @@ import logging import os.path import platform from pathlib import Path -from typing import Optional, Union +from typing import Optional, Tuple, Union from ivas_processing_scripts.audiotools import audio from ivas_processing_scripts.audiotools.audiofile import parse_wave_header, read @@ -122,17 +122,20 @@ class IVAS(Processing): bitstream = out_file.with_suffix(".192") + # encode signal self.enc(in_file, bitstream, in_meta, logger) - bitstream = self.simulate_tx(in_file, bitstream, logger) + # apply bitstream processing and save unprocessed bitstream + bitstream_noerror = bitstream + bitstream, voip = self.simulate_tx(in_file, bitstream, logger) - self.dec(bitstream, out_file, logger) - - if self.out_fmt == "EXT": - for i in range(self.in_fmt.num_channels): # TODO treffehn: check - # we need to read out_file.0.csv, out_file.1.csv ... - # self.out.object_pos = np.genfromtxt(out_file.with_suffix(f"{i}.0.csv"), delimiter=",") - ... + # decode twice with and without bitstream errors + self.dec(bitstream, out_file, voip=voip, logger=logger) + if bitstream_noerror != bitstream and self.tx_condition: + out_file_unprocessed = Path( + f"{out_file.parent.joinpath(out_file.stem)}.noerror{out_file.suffix}" + ) + self.dec(bitstream_noerror, out_file_unprocessed, voip=False, logger=logger) def enc( self, @@ -194,6 +197,9 @@ class IVAS(Processing): if self.cod_opts: cmd.extend(self.cod_opts) + # use quiet mode + cmd.extend(["-q"]) + fmt_codec = IVAS.parse_config(self.in_fmt, metadata_files) if fmt_codec == [""]: cmd.extend( @@ -222,7 +228,7 @@ class IVAS(Processing): in_file: Union[Path, str], bitstream: Path, logger: Optional[logging.Logger] = None, - ) -> Union[Path, str]: + ) -> Tuple[Union[Path, str], bool]: if self.tx is not None: if self.tx["type"] == "JBM": bs, ext = os.path.splitext(bitstream) @@ -236,15 +242,8 @@ class IVAS(Processing): self.tx["n_frames_per_packet"], logger=logger, ) - # add -voip cmdline option to the decoder - # TODO: tracefile also? - if self.dec_opts: - if "-voip" not in self.dec_opts: - self.dec_opts.extend(["-voip"]) - - else: - self.dec_opts = ["-voip"] - return bitstream_processed + voip = True + return bitstream_processed, voip elif self.tx["type"] == "FER": bs, ext = os.path.splitext(bitstream) @@ -270,13 +269,17 @@ class IVAS(Processing): master_seed=self.tx["master_seed"], prerun_seed=self.tx["prerun_seed"], ) - - return bitstream_processed + voip = False + return bitstream_processed, voip else: - return bitstream + return bitstream, False def dec( - self, bitstream: Path, out_file: Path, logger: Optional[logging.Logger] = None + self, + bitstream: Path, + out_file: Path, + voip: bool = False, + logger: Optional[logging.Logger] = None, ) -> None: logger.debug(f"IVAS decoder {bitstream} -> {out_file}") @@ -287,9 +290,15 @@ class IVAS(Processing): if hasattr(self, "trajectory"): cmd.extend(["-T", self.trajectory]) + # add -voip cmdline option to the decoder + if voip: + cmd.extend(["-voip"]) if self.dec_opts: cmd.extend(self.dec_opts) + # use quiet mode + cmd.extend(["-q"]) + if self.out_fmt.name.startswith("ISM") or self.out_fmt.name.startswith("MASA"): output_format = "EXT" elif self.in_fmt.name == "MONO": diff --git a/ivas_processing_scripts/processing/postprocessing.py b/ivas_processing_scripts/processing/postprocessing.py index 1d2f224014ca62efae633a3e8e36ab4d4a6b02d6..b96d6b5c6e81342b35b6d90c384c3b6df035cfc4 100755 --- a/ivas_processing_scripts/processing/postprocessing.py +++ b/ivas_processing_scripts/processing/postprocessing.py @@ -52,3 +52,24 @@ class Postprocessing(Processing): convert.convert_file( in_file, out_file, logger=logger, in_meta=in_meta, **self.__dict__ ) + # additional postprocessing of signal without error modification + if self.tx_condition: + in_file_no_error = Path(f"{in_file.with_suffix('')}.noerror.wav") + out_file_no_error = Path(f"{out_file.with_suffix('')}.noerror.wav") + + if in_meta: + in_meta_noerror = [] + for meta in in_meta: + path_parts = str(meta).split(".") + suffix = ".".join(path_parts[-3:]) + name = ".".join(path_parts[:-3]) + in_meta_noerror.append(Path(f"{name}.noerror.{suffix}")) + else: + in_meta_noerror = None + convert.convert_file( + in_file_no_error, + out_file_no_error, + logger=logger, + in_meta=in_meta_noerror, + **self.__dict__, + ) diff --git a/ivas_processing_scripts/processing/preprocessing_2.py b/ivas_processing_scripts/processing/preprocessing_2.py index b0f60519a87a6bb7f9dfc895dd9b20948a5a19e4..2d2fd5003b1152acb2e73f496061f5297a071536 100644 --- a/ivas_processing_scripts/processing/preprocessing_2.py +++ b/ivas_processing_scripts/processing/preprocessing_2.py @@ -65,7 +65,6 @@ class Preprocessing2(Processing): ) # add preamble - # also apply preamble to ISM metadata if self.in_fmt.startswith("ISM"): if not self.preamble: @@ -74,9 +73,7 @@ class Preprocessing2(Processing): preamble = self.preamble # read out old - metadata = [] - for meta in in_meta: - metadata.append(np.genfromtxt(meta, delimiter=",")) + metadata = audio_object.object_pos # modify metadata metadata = add_remove_preamble(metadata, preamble) @@ -171,7 +168,7 @@ class Preprocessing2(Processing): logger.debug( f"Scaling of background noise to {self.background_noise['snr']}dB SNR" ) - noise_object.audio = loudness_norm( + noise_object.audio, _ = loudness_norm( noise_object, loudness_noise, out_format, diff --git a/ivas_processing_scripts/processing/processing.py b/ivas_processing_scripts/processing/processing.py index 703bc381d06efd0ec7cd00468315dee67985d91a..b2424cbdce65addeacaae0745eb3f9c55497d5c5 100755 --- a/ivas_processing_scripts/processing/processing.py +++ b/ivas_processing_scripts/processing/processing.py @@ -31,7 +31,6 @@ # import logging -import re from abc import ABC, abstractmethod from itertools import repeat from pathlib import Path @@ -42,22 +41,14 @@ from warnings import warn import numpy as np from ivas_processing_scripts.audiotools import audio -from ivas_processing_scripts.audiotools.audiofile import ( - concat, - read, - split, - trim, - write, -) +from ivas_processing_scripts.audiotools.audioarray import window +from ivas_processing_scripts.audiotools.audiofile import concat, read, trim +from ivas_processing_scripts.audiotools.constants import IVAS_FRAME_LEN_MS from ivas_processing_scripts.audiotools.convert.__init__ import convert from ivas_processing_scripts.audiotools.metadata import ( add_remove_preamble, concat_meta_from_file, - metadata_search, - split_meta_in_file, - write_ISM_metadata_in_file, ) -from ivas_processing_scripts.audiotools.wrappers.bs1770 import scale_files from ivas_processing_scripts.constants import LOGGER_DATEFMT, LOGGER_FORMAT from ivas_processing_scripts.processing.config import TestConfig from ivas_processing_scripts.utils import apply_func_parallel, list_audio, pairwise @@ -160,74 +151,66 @@ def concat_setup(cfg: TestConfig, chain, logger: logging.Logger): # set input to the concatenated file we have just written to the output dir cfg.items_list = [cfg.concat_file] - # save input sampling rate for splitting at the end - cfg.pre2.in_fs = fs - # write out splits - with open(cfg.concat_file.with_suffix(".splits.log"), "w") as f: + splits_info_file = cfg.concat_file.parent.joinpath(Path("undo_concat.txt")) + with open(splits_info_file, "w") as f: print(", ".join([str(s) for s in cfg.splits]), file=f) print(", ".join([str(sn) for sn in cfg.split_names]), file=f) - print(", ".join([str(i.stem) for i in cfg.items_list]), file=f) + print(f"{fs}", file=f) - logger.info(f"Splits written to file {cfg.concat_file.with_suffix('.splits.log')}") + logger.info(f"Splits written to file {splits_info_file}") -def concat_teardown(cfg: TestConfig, logger: logging.Logger): - if not cfg.splits: +def concat_teardown(x, splits, out_fmt, fs, in_fs, meta, logger: logging.Logger): + if not splits: raise ValueError("Splitting not possible without split marker") - output_format = cfg.postprocessing["fmt"] - if isinstance(output_format, list): - output_format = output_format[-1] - - out_files = [] - out_meta = [] - - logger.info(f"Splitting output file in directory {cfg.output_path}") + if logger: + logger.debug("Split files") # if sampling rate changed, adjust splits - fs_new = float(cfg.postprocessing["fs"]) - fs_old = float(cfg.pre2.in_fs) + fs_new = float(fs) + fs_old = float(in_fs) relative_fs_change = fs_new / fs_old new_splits = [] - for split_i in cfg.splits: + for split_i in splits: new_splits.append(int(float(split_i) * relative_fs_change)) - cfg.splits = new_splits - - for odir in cfg.out_dirs: - path_input = odir / cfg.items_list[0].name - out_paths = split( - path_input, - odir, - cfg.split_names, - cfg.splits, - in_fs=cfg.postprocessing["fs"], - ) + splits = new_splits - logger.debug( - f"Resulting split files condition {odir.name}: {', '.join([str(op) for op in out_paths])}" - ) - out_files.append(out_paths) - - # split ISM metadata - if output_format.startswith("ISM"): - for odir in cfg.out_dirs: - path_input = odir / cfg.items_list[0].name - out_meta_paths = split_meta_in_file( - path_input, - odir, - cfg.split_names, - cfg.splits, - output_format, - meta_files=cfg.metadata_path[0], - ) - out_meta.append(out_meta_paths) + split_old = 0 + split_signals = [] + split_meta = [] + for idx, split in enumerate(splits): + # split + y = x[split_old:split, :] + + # windowing + y = window(y) + + # add signal to list + split_signals.append(y) - # remove concatenated file - if cfg.delete_tmp: - cfg.concat_file.unlink(missing_ok=True) + # split ISM metadata + if out_fmt.startswith("ISM"): + split_meta_object = [] + for obj_meta in meta: + # compute number of frames per split + split_old_frames = int(split_old / IVAS_FRAME_LEN_MS / fs * 1000) + split_frames = int(split / IVAS_FRAME_LEN_MS / fs * 1000) - return out_files, out_meta + # split + obj_meta = obj_meta[split_old_frames:split_frames, :] + + # add signal to list + split_meta_object.append(obj_meta) + + split_meta.append(split_meta_object) + else: + split_meta = repeat(None) + + split_old = split + + return split_signals, split_meta def preprocess(cfg, logger): @@ -324,42 +307,6 @@ def preprocess_2(cfg, logger): return -def reverse_process_2(cfg, logger): - # remove preamble and first half of signal due to repetition - if cfg.pre2.preamble or cfg.pre2.repeat_signal: - remove_preamble(cfg, logger) - - fmt = cfg.postprocessing["fmt"] - if isinstance(fmt, list): - fmt = fmt[-1] - - # reverse concatenation - if cfg.pre2.concatenate_input: - # write out the splits, optionally remove file - out_paths_splits, out_meta_splits = concat_teardown(cfg, logger) - else: - # if no concatenation read files from folder - out_paths_splits = [] - for out_dir in cfg.out_dirs: - list_audio_dir = list_audio(out_dir) - out_paths_splits.append(list_audio_dir) - if fmt.startswith("ISM"): - out_meta_splits = [] - for i, condition in enumerate(out_paths_splits): - meta_condition = metadata_search( - cfg.out_dirs[i], - condition, - num_objects=int(fmt[-1]), - ) - out_meta_splits.append(meta_condition) - else: - out_meta_splits = None - - cfg.pre2.out_paths_splits = out_paths_splits - cfg.pre2.out_meta_splits = out_meta_splits - return - - def process_item( in_file: Union[Path, str], tmp_dir: Union[Path, str], @@ -433,67 +380,37 @@ def process_item( p.process(input, output, input_meta, item_logger) # copy output and metadata from final process to output file - copyfile(processing_paths[-1], out_file) - if processing_paths_meta[-1]: - for idx, ppm in enumerate(processing_paths_meta[-1]): - copyfile(ppm, out_meta[idx]) - - -def remove_preamble(cfg, logger): - # get number of channels from output format - fmt = cfg.postprocessing["fmt"] - if isinstance(cfg.postprocessing["fmt"], list): - fmt = fmt[-1] - - num_channels = audio.fromtype(fmt).num_channels - for odir in cfg.out_dirs: - for item in cfg.items_list: - path_input = odir / item.name - - # remove preamble for ISM metadata - if fmt.startswith("ISM"): - # search for metadata - meta_item = metadata_search( - odir, [Path(item.name)], num_objects=num_channels - ) - metadata_array = [] - for meta_i in meta_item: - metadata_array.append(np.genfromtxt(meta_i, delimiter=",")) - - # cut first half of the metadata - if cfg.pre2.repeat_signal: - metadata_array = [m[int(len(m) / 2) :, :] for m in metadata_array] - - # remove preamble - if cfg.pre2.preamble: - metadata_array = add_remove_preamble( - metadata_array, cfg.pre2.preamble, add=False - ) + if not chain[-1].name == "processing_splitting_scaling": + copyfile(processing_paths[-1], out_file) + if processing_paths_meta[-1]: + for idx, ppm in enumerate(processing_paths_meta[-1]): + copyfile(ppm, out_meta[idx]) - # write csv files - write_ISM_metadata_in_file( - metadata_array, [path_input], automatic_naming=True - ) - # read file - x, fs = read( - path_input, nchannels=num_channels, fs=cfg.postprocessing["fs"] - ) +def remove_preamble(x, out_fmt, fs, repeat_signal, preamble, meta, logger): + # remove preamble for ISM metadata + if out_fmt.startswith("ISM"): + # cut first half of the metadata + if repeat_signal: + meta = [m[int(len(m) / 2) :, :] for m in meta] - # remove first half of signal - if cfg.pre2.repeat_signal: - logger.info("Remove first half of signal") - x = x[int(len(x) / 2) :, :] + # remove preamble + if preamble: + meta = add_remove_preamble(meta, preamble, add=False) - # remove preamble - if cfg.pre2.preamble: - logger.info("Remove preamble") - x = trim(x, fs, (cfg.pre2.preamble, 0)) + # remove first half of signal + if repeat_signal: + if logger: + logger.debug("Remove first half of signal") + x = x[int(len(x) / 2) :, :] - # write file - write(path_input, x, fs) + # remove preamble + if preamble: + if logger: + logger.debug("Remove preamble") + x = trim(x, fs, (preamble, 0)) - return + return x, meta def preprocess_background_noise(cfg): @@ -576,60 +493,3 @@ def multiple_of_frame_size( warn( f"The length ({n_samples_x} samples) of audio ({item.name}) is not a multiple of frame length (20 ms)." ) - - -def rename_generated_conditions(output_path: Path): - """ - Rename the output files. Only renames the files in directories that contain "cXX" in thier names. - The "XX" in "cXX" stands for the condition number, for example "c01" - - Parameters - ---------- - output_path: Path - Path to output directory - """ - directory = output_path - pattern = re.compile(r"^c\d{2}") - for subdirectory in directory.iterdir(): - if subdirectory.is_dir() and pattern.match(subdirectory.name): - for file_path in subdirectory.iterdir(): - new_filename = f"{file_path.stem}.{subdirectory.name}{file_path.suffix}" - file_path.rename(subdirectory / new_filename) - - -def scale_resulting_files(cfg, logger): - if hasattr(cfg, "preprocessing_2"): - out_paths_splits = cfg.pre2.out_paths_splits - out_meta_splits = cfg.pre2.out_meta_splits - else: - num_obj = audio.fromtype(cfg.postprocessing["fmt"]).num_channels - out_paths_splits = [] - if isinstance( - audio.fromtype(cfg.postprocessing["fmt"]), audio.ObjectBasedAudio - ): - out_meta_splits = [] - else: - out_meta_splits = None - item_names = [Path(i.name) for i in cfg.items_list] - for out_dir in cfg.out_dirs: - condition_list = [] - for item in item_names: - condition_list.append(out_dir.joinpath(item)) - out_paths_splits.append(condition_list) - if isinstance( - audio.fromtype(cfg.postprocessing["fmt"]), audio.ObjectBasedAudio - ): - out_meta_splits.append(metadata_search(out_dir, item_names, num_obj)) - - post_fmt = cfg.postprocessing["fmt"] - if isinstance(post_fmt, list): - post_fmt = post_fmt[-1] - scale_files( - out_paths_splits, - post_fmt, - cfg.postprocessing["loudness"], - cfg.postprocessing.get("loudness_fmt", None), - cfg.postprocessing["fs"], - out_meta_splits, - logger, - ) diff --git a/ivas_processing_scripts/processing/processing_splitting_scaling.py b/ivas_processing_scripts/processing/processing_splitting_scaling.py new file mode 100644 index 0000000000000000000000000000000000000000..af359b49ae92e5e1ced3c1517f08705431210097 --- /dev/null +++ b/ivas_processing_scripts/processing/processing_splitting_scaling.py @@ -0,0 +1,301 @@ +#!/usr/bin/env python3 +import logging +import re +from itertools import repeat +from pathlib import Path + +import numpy as np + +from ivas_processing_scripts.audiotools import audio +from ivas_processing_scripts.audiotools.audiofile import read, write +from ivas_processing_scripts.audiotools.metadata import write_ISM_metadata_in_file +from ivas_processing_scripts.audiotools.wrappers.bs1770 import loudness_norm +from ivas_processing_scripts.processing.processing import ( + Processing, + concat_teardown, + remove_preamble, +) + +# +# (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository. All Rights Reserved. +# +# This software is protected by copyright law and by international treaties. +# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository retain full ownership rights in their respective contributions in +# the software. This notice grants no license of any kind, including but not limited to patent +# license, nor is any license granted by implication, estoppel or otherwise. +# +# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making +# contributions. +# +# This software is provided "AS IS", without any express or implied warranties. The software is in the +# development stage. It is intended exclusively for experts who have experience with such software and +# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability +# and fitness for a particular purpose are hereby disclaimed and excluded. +# +# Any dispute, controversy or claim arising under or in relation to providing this software shall be +# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in +# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and +# the United Nations Convention on Contracts on the International Sales of Goods. +# + + +class Processing_splitting_scaling(Processing): + def __init__(self, attrs: dict): + super().__init__(attrs) + self.name = "processing_splitting_scaling" + + def process(self, in_file: Path, out_file: Path, in_meta, logger: logging.Logger): + logger.debug(f"Processing splitting scaling configuration : {self.__dict__}") + logger.debug(f"Processing splitting scaling {in_file.absolute()}") + + # get number of channels from output format + num_channels = audio.fromtype(self.out_fmt).num_channels + + # read file and metadata + x, fs = read(in_file, nchannels=num_channels, fs=self.fs) + if isinstance(audio.fromtype(self.out_fmt), audio.ObjectBasedAudio): + meta_arrays = [] + for meta in in_meta: + meta_arrays.append(np.genfromtxt(meta, delimiter=",")) + else: + meta_arrays = None + + # read file and metadata for signal with no bitstream errors + if self.tx_condition: + in_file_noerror = Path(f"{in_file.with_suffix('')}.noerror.wav") + out_file_noerror = Path(f"{out_file.with_suffix('')}.noerror.wav") + x_noerror, _ = read(in_file_noerror, nchannels=num_channels, fs=self.fs) + if isinstance(audio.fromtype(self.out_fmt), audio.ObjectBasedAudio): + meta_arrays_noerror = [] + for meta in in_meta: + path_parts = str(meta).split(".") + suffix = ".".join(path_parts[-3:]) + name = ".".join(path_parts[:-3]) + meta_noerror = Path(f"{name}.noerror.{suffix}") + meta_arrays_noerror.append( + np.genfromtxt(meta_noerror, delimiter=",") + ) + else: + meta_arrays_noerror = None + + # cut preamble and split file + ( + out_files, + file_splits, + out_meta, + meta_splits, + ) = self.revert_preamble_concatenation( + x, fs, in_file, out_file, meta_arrays, False, logger + ) + if self.tx_condition: + ( + out_files_noerror, + file_splits_noerror, + out_meta_noerror, + meta_splits_noerror, + ) = self.revert_preamble_concatenation( + x_noerror, + fs, + in_file_noerror, + out_file_noerror, + meta_arrays_noerror, + True, + logger=None, + ) + + # scale splitted files + if self.loudness: + if self.tx_condition: + # do special scaling -> measure noerror loudness and apply scaling to signal with error + scaling_splits = measure_loudness( + file_splits_noerror, + self.out_fmt, + fs, + self.loudness, + self.loudness_fmt, + meta_splits_noerror, + logger, + ) + file_splits = [f * loud for f, loud in zip(file_splits, scaling_splits)] + else: + # do normal scaling + file_splits = adjust_loudness( + file_splits, + self.out_fmt, + fs, + self.loudness, + self.loudness_fmt, + meta_splits, + logger, + ) + + # derive output folder names + out_out_files = [] + for f in out_files: + regex_filter = r"[/\\][t][m][p][_]\w+[/\\]" + tmp_name = re.search(regex_filter, str(f)).group().strip()[1:-1] + out_name = tmp_name.replace("tmp_", "") + if self.condition_in_output_filename: + f_out = Path( + str(f) + .replace(tmp_name, out_name) + .replace(f".{self.name}.", f".{out_name}.") + ) + else: + f_out = Path( + str(f).replace(tmp_name, out_name).replace(f".{self.name}.", ".") + ) + out_out_files.append(f_out) + if isinstance(audio.fromtype(self.out_fmt), audio.ObjectBasedAudio): + out_out_meta = [] + for f in out_out_files: + oom = [] + for idx in range(num_channels): + f_out = f.with_suffix(f".wav.{idx}.csv") + oom.append(f_out) + out_out_meta.append(oom) + else: + out_out_meta = repeat(None) + + # write file(s) in tmp and output folder + for of, oof, file_s, om, oom, meta_s in zip( + out_files, out_out_files, file_splits, out_meta, out_out_meta, meta_splits + ): + write(of, file_s, fs) + write(oof, file_s, fs) + if isinstance(audio.fromtype(self.out_fmt), audio.ObjectBasedAudio): + write_ISM_metadata_in_file(meta_s, om) + write_ISM_metadata_in_file(meta_s, oom) + # write noerror files in tmp folder + if self.tx_condition: + for ofne, file_sne, omne, meta_sne in zip( + out_files_noerror, + file_splits_noerror, + out_meta_noerror, + meta_splits_noerror, + ): + write(ofne, file_sne, fs) + if isinstance(audio.fromtype(self.out_fmt), audio.ObjectBasedAudio): + write_ISM_metadata_in_file(meta_sne, omne) + + def revert_preamble_concatenation( + self, x, fs, in_file, out_file, in_meta, noerror=False, logger=None + ): + # remove preamble and first half of signal due to repetition + if self.preamble or self.repeat_signal: + x, in_meta = remove_preamble( + x, + self.out_fmt, + self.fs, + self.repeat_signal, + self.preamble, + in_meta, + logger, + ) + + # reverse concatenation + if self.concatenate_input: + # read out splits file -> start/end, names, sampling rate + splits_info_file = Path( + in_file.parent.parent.joinpath("tmp_preprocessing_2").joinpath( + "undo_concat.txt" + ) + ) + splits, split_names, split_fs = read_splits_file(splits_info_file) + + # split file + file_splits, meta_splits = concat_teardown( + x, splits, self.out_fmt, fs, split_fs, in_meta, logger + ) + + # set new out_files + if noerror: + out_files = [ + in_file.parent.joinpath(sn).with_suffix(f".{self.name}.noerror.wav") + for sn in split_names + ] + else: + out_files = [ + in_file.parent.joinpath(sn).with_suffix(f".{self.name}.wav") + for sn in split_names + ] + + if isinstance(audio.fromtype(self.out_fmt), audio.ObjectBasedAudio): + out_meta = [] + for of in out_files: + of_list = [] + for idx in range(x.shape[1]): + of_list.append(of.with_suffix(f".wav.{idx}.csv")) + out_meta.append(of_list) + else: + out_meta = repeat(None) + + else: + out_files = [out_file] + file_splits = [x] + if isinstance(audio.fromtype(self.out_fmt), audio.ObjectBasedAudio): + meta_splits = [in_meta] + out_meta = [ + [ + out_file.with_suffix(f".wav.{idx}.csv") + for idx in range(x.shape[1]) + ] + ] + else: + meta_splits = repeat(None) + out_meta = repeat(None) + + return out_files, file_splits, out_meta, meta_splits + + +def read_splits_file(splits_file): + """Read out splitting information from split log in preproceesing 2 temp folder""" + with open(splits_file, "r") as f: + splits = f.readline()[:-1].split(", ") + names = f.readline()[:-1].split(", ") + fs = f.readline()[:-1] + return splits, names, fs + + +def adjust_loudness( + file_splits, out_fmt, fs, loudness, loudness_fmt, meta, logger=None +): + if logger: + logger.debug( + "Apply normal loudness scaling. The following loudness values are in the concatenation order." + ) + scaled_signals = [] + for f, m in zip(file_splits, meta): + audio_object = audio.fromarray(fmt=out_fmt, x=f, fs=fs) + if isinstance(audio_object, audio.ObjectBasedAudio): + audio_object.object_pos = m + scaled_signal, _ = loudness_norm( + audio_object, loudness, loudness_fmt, logger=logger + ) + scaled_signals.append(scaled_signal) + return scaled_signals + + +def measure_loudness(file_splits, out_fmt, fs, loudness, loudness_fmt, meta, logger): + if logger: + logger.debug( + "Apply special scaling for bitstream error conditions. The following values are based on the signal without error" + ) + scaling_splits = [] + for f, m in zip(file_splits, meta): + audio_object = audio.fromarray(fmt=out_fmt, x=f, fs=fs) + if isinstance(audio_object, audio.ObjectBasedAudio): + audio_object.object_pos = m + _, scale_factor = loudness_norm( + audio_object, loudness, loudness_fmt, logger=logger + ) + scaling_splits.append(scale_factor) + return scaling_splits diff --git a/ivas_processing_scripts/utils.py b/ivas_processing_scripts/utils.py index 0ce6696e00e4078812310b42bc5c37aa04b62b52..b46a104e1535e8bdfe4bda5939c6be22f1621c91 100755 --- a/ivas_processing_scripts/utils.py +++ b/ivas_processing_scripts/utils.py @@ -52,9 +52,9 @@ Directory/path handling """ -def create_dir(p: str) -> None: +def create_dir(p: str, exist_ok=True) -> None: p = Path(p) - p.mkdir(exist_ok=True, parents=True) + p.mkdir(exist_ok=exist_ok, parents=True) def delete_dir(p: str) -> None: @@ -80,8 +80,13 @@ class DirManager: ) def __enter__(self): - for path in self.create_paths: - create_dir(path) + try: + for path in self.create_paths: + create_dir(path, exist_ok=False) + except FileExistsError: + raise ValueError( + "At least one of the output folders already exists. Please delete or move this folder." + ) def __exit__(self, exc_type, exc_value, exc_traceback): for path in self.delete_paths: diff --git a/tests/constants.py b/tests/constants.py index 971e38286f043bced9b34f2ac9a2b4718b9e6b36..8e9abad4b425f730c981a815192fe9b8dec87e4a 100644 --- a/tests/constants.py +++ b/tests/constants.py @@ -197,7 +197,7 @@ HR_TRAJECTORIES_TO_TEST = [ """ Generate Test Items Configs """ INPUT_CONFIG_FILES = [ str(TEST_VECTOR_DIR.joinpath("test_ISM.yml")), - # str(TEST_VECTOR_DIR.joinpath("test_MASA.yml")), # TODO + str(TEST_VECTOR_DIR.joinpath("test_MASA.yml")), str(TEST_VECTOR_DIR.joinpath("test_MC.yml")), str(TEST_VECTOR_DIR.joinpath("test_SBA.yml")), ] diff --git a/tests/data/test_ISM.yml b/tests/data/test_ISM.yml index 7122ab4e7a7c48c1ee8c31fa331f755ed0c51f04..9f1fe7992bff4f2daad21780055c6a6c17c4714b 100644 --- a/tests/data/test_ISM.yml +++ b/tests/data/test_ISM.yml @@ -24,7 +24,7 @@ ### REQUIRED: Input path or file input_path: "./tests/concatenation_folder/ISM" ### REQUIRED: Output path or file -output_path: "./tests/tmp_output_ISM" +output_path: "./tests/temp_output_ISM" ### Metadata path or file(s) ### If input format is ISM{1-4} a path for the metadata files can be specified; ### default = null (for ISM search for item_name.{wav, raw, pcm}.{0-3}.csv in input folder, otherise ignored) @@ -51,6 +51,7 @@ metadata_path: ### searches for the specified substring in found filenames; default = null # input_select: # - "48kHz" +condition_in_output_filename: true ################################################ ### Input configuration @@ -70,11 +71,11 @@ preprocessing: ### Target format used in rendering from input format; default = null (no rendering) # fmt: "7_1_4" ### Define mask (HP50 or 20KBP) for input signal filtering; default = null - # mask: "HP50" + mask: "HP50" ### Target sampling rate in Hz for resampling; default = null (no resampling) fs: 48000 ### Target loudness in LKFS; default = null (no loudness change applied) - loudness: -26 + loudness: -30 ### Spatial audio format in which loudness is adjusted (only used if preprocessing loudness is not null); ### default = null (uses preprocessing fmt if possible) # loudness_fmt: "BINAURAL" diff --git a/tests/data/test_MASA.yml b/tests/data/test_MASA.yml new file mode 100644 index 0000000000000000000000000000000000000000..b4a3eebd6c929d106412890c16f95d9e9b2070e1 --- /dev/null +++ b/tests/data/test_MASA.yml @@ -0,0 +1,295 @@ +--- +################################################ +# General configuration +################################################ +### Name of test; default = YYYYMMDD_HH.MM.SS_listening_test +# name: test SBA +### Date; default = YYYYMMDD_HH.MM.SS +# date: 2023.06.30 +### git commit SHA; default = git rev-parse HEAD +# git_sha: abc123 + +### Whether to use multiprocessing; default = true +# multiprocessing: false +### Deletion of temporary directories containing +### intermediate processing files, bitstreams etc.; default = false +# delete_tmp: true +### Master seed for random processes like bitstream error pattern generation; default = 0 +master_seed: 5 + +### Any relative paths will be interpreted relative to the working directory the script is called from! +### Usage of absolute paths is recommended. +### Do not use file names with dots "." in them! This is not supported, use "_" instead +### For Windows user: please use double back slash '\\' in paths and add '.exe' to executable definitions +### REQUIRED: Input path or file +input_path: "./tests/concatenation_folder/MASA" +### REQUIRED: Output path or file +output_path: "./tests/temp_output_MASA" +### Metadata path or file(s) +### If input format is ISM{1-4} a path for the metadata files can be specified; +### default = null (for ISM search for item_name.{wav, raw, pcm}.{0-3}.csv in input folder, otherise ignored) +# metadata_path: + ### Path can be set for all items with the 'all_items' key (automatic search for item_name.{wav, raw, pcm}.{0-3}.csv within this folder) + # all_items: ".../metadata_folder" + ### Path can be set for all items individually with 'item{1-4}' keys + ### 'item{1-4}' keys can also be renamed to the input file names including extension {wav, raw, pcm} + ### Either list individual files for all objects or name folder for automatic search for one item + # item1: + # - ".../meta_all_obj" + # item2: + # - ".../meta_obj1.csv" + # - ".../meta_ob2.csv" + # noise.wav: + # - ".../metadata_folder_for_noise_item" + +### Select only a subset of items +### searches for the specified substring in found filenames; default = null +# input_select: +# - "48kHz" + +################################################ +### Input configuration +################################################ +input: + ### REQUIRED: Input format + fmt: "FOA" + ### Input sampling rate in Hz needed for headerless audio files; default = 48000 + # fs: 32000 + +################################################ +### Pre-processing on individual items +################################################ +### Pre-processing step performed prior to core processing for all conditions +### If not defined, preprocessing step is skipped +preprocessing: + ### Target format used in rendering from input format; default = null (no rendering) + # fmt: "7_1_4" + ### Define mask (HP50 or 20KBP) for input signal filtering; default = null + mask: "HP50" + ### Target sampling rate in Hz for resampling; default = null (no resampling) + # fs: 32000 + ### Target loudness in LKFS; default = null (no loudness change applied) + loudness: -30 + ### Spatial audio format in which loudness is adjusted (only used if preprocessing loudness is not null); + ### default = null (uses preprocessing fmt if possible) + # loudness_fmt: "MONO" + ### Pre-/post-trim individual signal(s) (ms) (negative values pad silence); default = 0 + # trim: + # - 50 + # - -50 + ### Flag for using noise (amplitude +-4) instead of silence for padding; default = false (silence) + # pad_noise: true + ### Value for application of delay (ms) (negative values advance); default = 0 + # delay: 20 + ### Length of window used at start/end of signal (ms); default = 0 + window: 100 + +################################################ +### Pre-processing on whole signal(s) +################################################ +preprocessing_2: + ### Options for processing of the concatenated item (concatenate_input: true) or + ### the individual items (concatenate_input: false) after previous pre-processing step + ### Horizontally concatenate input items into one long file; default = false + concatenate_input: true + ### Specify the concatenation order in a list of strings. If not specified, the concatenation order would be + ### as per the filesystem on the users' device + ### Should only be used if concatenate_input = true + ### Specify the filename with extension. + ### For example, concatenation_order: ["file3.wav", "file1.wav", "file4.wav", "file2.wav"] + # concatenation_order: [] + ### Specify preamble duration in ms; default = 0 + preamble: 10000 + ### Flag wheter to use noise (amplitude +-4) for the preamble or silence; default = false (silence) + preamble_noise: true + ### Additive background noise + # background_noise: + ### REQUIRED: SNR for background noise in dB + # snr: 10 + ### REQUIRED: Path to background noise, must have same format and sampling rate as input signal(s) + # background_noise_path: ".../noise.wav" + ### Seed for delay offest; default = 0 + # seed_delay: 10 + +################################################# +### Bitstream processing +################################################# +### Bitstream processing (transport simulation) done after encoding and before decoding +### e.g. frame error insertion or transport simulation for JBM testing +### can be given globally here or in individual conditions of type ivas or evs +# tx: + ### REQUIRED: Type of bitstream processing; possible types: "JBM" or "FER" + # type: "JBM" + + ### JBM + ### REQUIRED: either error_pattern or error_profile + ### delay error profile file + # error_pattern: ".../dly_error_profile.dat" + ### Index of one of the existing delay error profile files to use (1-11) + # error_profile: 5 + ## nFramesPerPacket parameter for the network simulator; default = 1 + # n_frames_per_packet: 2 + + ### FER + ### REQUIRED: either error_pattern or error_rate + ### Frame error pattern file + # error_pattern: "path/pattern.192" + ### Error rate in percent + # error_rate: 5 + ### Additional seed to specify number of preruns; default = 0 + # prerun_seed: 2 + +################################################ +### Configuration for conditions under test +################################################ +### List of conditions to generate +### Name of the key will be used as output directory name +### conditions must specify the "type" key which may be one of the following options: +### ref generate the reference condition +### lp3k5 generate a low-pass anchor with cut-off frequency 3.5 kHz +### lp7k generate a low-pass anchor with cut-off frequency 7 kHz +### mnru generate MNRU condition +### esdru generate ESDRU condition +### mono_dmx generate mono downmix condition +### evs generate an EVS coded condition (see below examples for additional required keys) +### ivas generate an IVAS coded condition (see below examples for additional required keys) +conditions_to_generate: + ### Reference and anchor conditions ########################## + c01: + ### REQUIRED: type of condition + type: ref + ### optional low-pass cut-off frequency in Hz; default = null + # out_fc: 22500 + c02: + ### REQUIRED: type of condition + type: lp7k + c03: + ### REQUIRED: type of condition + type: mnru + ### REQUIRED: the ratio of speech power to modulated noise power in dB + q: 20 + c04: + ### REQUIRED: type of condition + type: esdru + ### REQUIRED: spatial degradation value between 0 and 1 + alpha: 0.5 + + ### IVAS condition ############################### + c06: + ### REQUIRED: type of condition + type: ivas + ### REQUIRED: Bitrates to use for coding + bitrates: + - 160000 + # - 32000 + ### Encoder options + cod: + fmt: "MASA2" + ### Path to encoder binary; default search for IVAS_cod in bin folder (primary) and PATH (secondary) + #bin: ~/git/ivas-codec/IVAS_cod + ### Encoder input sampling rate in Hz (resampling performed in case of mismatch); default = null (no resampling) + # fs: 32000 + ### Additional commandline options; default = null + # opts: ["-q", "-dtx", 4] + ### Decoder options + dec: + ### Path to decoder binary; default search for IVAS_dec in bin folder (primary) and PATH (secondary) + #bin: ~/git/ivas-codec/IVAS_dec + ### Decoder output format; default = postprocessing fmt + fmt: "MASA2" + ### Decoder output sampling rate; default = null (same as input) + # fs: 48000 + ### Additional commandline options; default = null + # opts: ["-q", "-no_delay_cmp"] + ### Bitstream options + # tx: + ### For possible arguments see overall bitstream modification + + ### IVAS condition ############################### + c07: + ### REQUIRED: type of condition + type: ivas + ### REQUIRED: Bitrates to use for coding + bitrates: + - 160000 + # - 32000 + ### Encoder options + cod: + fmt: "MASA2" + ### Path to encoder binary; default search for IVAS_cod in bin folder (primary) and PATH (secondary) + #bin: ~/git/ivas-codec/IVAS_cod + ### Encoder input sampling rate in Hz (resampling performed in case of mismatch); default = null (no resampling) + # fs: 32000 + ### Additional commandline options; default = null + # opts: ["-q", "-dtx", 4] + ### Decoder options + dec: + ### Path to decoder binary; default search for IVAS_dec in bin folder (primary) and PATH (secondary) + #bin: ~/git/ivas-codec/IVAS_dec + ### Decoder output format; default = postprocessing fmt + # fmt: "CICP19" + ### Decoder output sampling rate; default = null (same as input) + # fs: 48000 + ### Additional commandline options; default = null + # opts: ["-q", "-no_delay_cmp"] + ### Bitstream options + tx: + ### For possible arguments see overall bitstream modification + type: "FER" + error_rate: 3 + prerun_seed: 2 + + ### EVS condition ################################ + c08: + ### REQUIRED: type of condition + type: evs + ### REQUIRED: Bitrates to use for coding + ### For EVS mono, this may be a per-channel bitrate configuration (must match input/preprocessing format!) + ### the last value will be repeated if too few are specified + bitrates: + # - 9600 + - [13200, 13200, 8000, 13200, 9600] + cod: + fmt: "MASA2" + ### Path to encoder binary; default search for EVS_cod in bin folder (primary) and PATH (secondary) + #bin: EVS_cod + ### Encoder input sampling rate in Hz (resampling performed in case of mismatch); default = null (no resampling) + # fs: 32000 + dec: + fmt: "MASA2" + ### Path to encoder binary; default search for EVS_dec in bin folder (primary) and PATH (secondary) + #bin: EVS_dec + ### Decoder output sampling rate; default = null (same as input) + # fs: 48000 + tx: + ### For possible arguments see overall bitstream modification + type: "FER" + error_rate: 3 + prerun_seed: 2 + +################################################ +### Post-processing +################################################ +### Post-processing step performed after core processing for all conditions +### Post-processing is required and can not be omitted +postprocessing: + ### REQUIRED: Target format for output + fmt: ["MASA2", "BINAURAL"] + ### REQUIRED: Target sampling rate in Hz for resampling + fs: 48000 + ### Low-pass cut-off frequency in Hz; default = null (no filtering) + # lp_cutoff: 24000 + ### Target loudness in LKFS; default = null (no loudness change applied) + loudness: -26 + ### Spatial audio format in which loudness is adjusted (only used if preprocessing loudness is not null); + ### default = null (uses postprocessing fmt if possible) + # loudness_fmt: null + ### Name of custom binaural dataset (without prefix or suffix); + ### default = null (ORANGE53(_Dolby) for BINAURAL, IISofficialMPEG222UC for BINAURAL_ROOM) + # bin_dataset: SADIE + ### Render LFE to binaural output with the specified gain (only valid for channel-based input); default = null + # bin_lfe_gain: 1 + ### Flag whether output should be limited to avoid clipping (can alter target loudness); default = false + limit: true + ### Head-tracking trajectory file for binaural output; default = null + # trajectory: "path/to/file" diff --git a/tests/data/test_MC.yml b/tests/data/test_MC.yml index 9e2c748e0eae3fc9b2478b568639283ab70cb9b2..4e1ea6f1ff62f31b83ab502f8d332c7279627635 100644 --- a/tests/data/test_MC.yml +++ b/tests/data/test_MC.yml @@ -24,7 +24,7 @@ master_seed: 5 ### REQUIRED: Input path or file input_path: "./tests/concatenation_folder/MC" ### REQUIRED: Output path or file -output_path: "./tests/tmp_output_MC" +output_path: "./tests/temp_output_MC" ### Metadata path or file(s) ### If input format is ISM{1-4} a path for the metadata files can be specified; ### default = null (for ISM search for item_name.{wav, raw, pcm}.{0-3}.csv in input folder, otherise ignored) @@ -119,25 +119,25 @@ preprocessing_2: ### can be given globally here or in individual conditions of type ivas or evs tx: ### REQUIRED: Type of bitstream processing; possible types: "JBM" or "FER" - type: "JBM" + type: "FER" ### JBM ### REQUIRED: either error_pattern or error_profile ### delay error profile file # error_pattern: ".../dly_error_profile.dat" ### Index of one of the existing delay error profile files to use (1-11) - error_profile: 5 + #error_profile: 5 ## nFramesPerPacket parameter for the network simulator; default = 1 - n_frames_per_packet: 2 + #n_frames_per_packet: 2 ### FER ### REQUIRED: either error_pattern or error_rate ### Frame error pattern file # error_pattern: "path/pattern.192" ### Error rate in percent - # error_rate: 5 + error_rate: 5 ### Additional seed to specify number of preruns; default = 0 - # prerun_seed: 2 + prerun_seed: 2 ################################################ ### Configuration for conditions under test diff --git a/tests/data/test_SBA.yml b/tests/data/test_SBA.yml index da6bfe2251935afddef095823ec7cd01facac209..e5fedc5d2642e58f8c4bf1f12488a7b2a7b5df94 100644 --- a/tests/data/test_SBA.yml +++ b/tests/data/test_SBA.yml @@ -24,7 +24,7 @@ master_seed: 5 ### REQUIRED: Input path or file input_path: "./tests/concatenation_folder/SBA" ### REQUIRED: Output path or file -output_path: "./tests/tmp_output_SBA" +output_path: "./tests/temp_output_SBA" ### Metadata path or file(s) ### If input format is ISM{1-4} a path for the metadata files can be specified; ### default = null (for ISM search for item_name.{wav, raw, pcm}.{0-3}.csv in input folder, otherise ignored) @@ -251,6 +251,7 @@ conditions_to_generate: # - 9600 - [13200, 13200, 8000, 13200, 9600] cod: + fmt: "PLANARFOA" ### Path to encoder binary; default search for EVS_cod in bin folder (primary) and PATH (secondary) #bin: EVS_cod ### Encoder input sampling rate in Hz (resampling performed in case of mismatch); default = null (no resampling) @@ -260,12 +261,6 @@ conditions_to_generate: #bin: EVS_dec ### Decoder output sampling rate; default = null (same as input) # fs: 48000 - ### Bitstream options - tx: - ### For possible arguments see overall bitstream modification - type: "JBM" - error_profile: 3 - sba_fmt: "PLANARFOA" ################################################ ### Post-processing diff --git a/tests/test_processing.py b/tests/test_processing.py index edf9f031c8279ff89afd66f9c92d30a8da0cef02..e9bdddc9b1babcb560945c50322bb8bdfb02c8b2 100644 --- a/tests/test_processing.py +++ b/tests/test_processing.py @@ -61,10 +61,12 @@ def test_generate_test_items(cfg): num_channels = 4 # test ISM4 elif "SBA" in cfg: num_channels = 4 # test FOA + elif "MASA" in cfg: + num_channels = 4 # test MASA with FOA input else: raise ValueError("Test setup missing") - # create input folder for MC, SBA and ISM tests with concatenation + # create input folder for MC, SBA, MASA and ISM tests with concatenation input_path.mkdir(exist_ok=True, parents=True) # copy items to folder -> pink noise and spectral test