From ae40b7cb365fef82ebb2bd90e3a8e8b7d2b36565 Mon Sep 17 00:00:00 2001 From: veeravt Date: Mon, 8 May 2023 09:57:56 +0200 Subject: [PATCH 1/6] Checking for nan and inf --- .../audiotools/wrappers/bs1770.py | 25 +++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/ivas_processing_scripts/audiotools/wrappers/bs1770.py b/ivas_processing_scripts/audiotools/wrappers/bs1770.py index fe756c07..3a44ccf4 100755 --- a/ivas_processing_scripts/audiotools/wrappers/bs1770.py +++ b/ivas_processing_scripts/audiotools/wrappers/bs1770.py @@ -134,8 +134,12 @@ def bs1770demo( result = run(cmd, logger=logger) # parse output - measured_loudness = float(result.stdout.splitlines()[3].split(":")[1]) - scale_factor = float(result.stdout.splitlines()[-3].split(":")[1]) + measured_loudness = check_for_nan_and_inf( + result.stdout.splitlines()[3].split(":")[1] + ) + scale_factor = check_for_nan_and_inf( + result.stdout.splitlines()[-3].split(":")[1] + ) return measured_loudness, scale_factor @@ -289,3 +293,20 @@ def scale_files( # write into file write(file, scaled_audio, audio_obj.fs) + + +def check_for_nan_and_inf(input_string: str): + """ + Checks the input string for nan and inf + + Parameters + ---------- + input_string: str + Input string + """ + if "nan" in input_string.lower(): + raise ValueError("Too quiet.") + elif "inf" in input_string.lower(): + raise ValueError("All zeros.") + else: + return float(input_string) -- GitLab From 5e0de9ed560c668cf6d9ed95c5bf0ad0e1df79d3 Mon Sep 17 00:00:00 2001 From: veeravt Date: Wed, 10 May 2023 12:42:08 +0200 Subject: [PATCH 2/6] merged main into branch --- .gitignore | 4 + .gitlab-ci.yml | 13 ++ README.md | 18 +- examples/TEMPLATE.yml | 16 +- ivas_processing_scripts/__init__.py | 4 + ivas_processing_scripts/audiotools/audio.py | 4 +- .../audiotools/audioarray.py | 4 +- .../audiotools/audiofile.py | 27 ++- .../binaural_datasets/binaural_dataset.py | 8 +- .../audiotools/convert/__init__.py | 2 + .../audiotools/convert/channelbased.py | 1 + .../audiotools/convert/scenebased.py | 1 + .../audiotools/wrappers/bs1770.py | 10 +- .../audiotools/wrappers/esdru.py | 4 +- .../audiotools/wrappers/p50fbmnru.py | 26 ++- ivas_processing_scripts/bin/README.txt | 14 -- ivas_processing_scripts/constants.py | 4 +- ivas_processing_scripts/processing/chains.py | 2 + ivas_processing_scripts/processing/config.py | 3 + ivas_processing_scripts/processing/evs.py | 23 +- .../processing/processing.py | 94 +++++++- ivas_processing_scripts/utils.py | 11 +- tests/constants.py | 2 +- tests/data/test_ISM.yml | 207 +++++++++++------- tests/data/test_MC.yml | 202 +++++++++++------ tests/data/test_SBA.yml | 195 +++++++++++------ tests/test_binaries_present.py | 52 +++++ tests/test_processing.py | 35 +++ 28 files changed, 692 insertions(+), 294 deletions(-) delete mode 100755 ivas_processing_scripts/bin/README.txt create mode 100755 tests/test_binaries_present.py diff --git a/.gitignore b/.gitignore index 94274513..7855f81e 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,7 @@ proc_input/*.wav proc_input/*.pcm proc_output/ *~ +tests/tmp_output_* +tests/cut +tests/ref +tests/concatenation_folder \ No newline at end of file diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a4cfa7c1..5c654af3 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -7,6 +7,7 @@ default: stages: + - check - test - analyze @@ -31,6 +32,18 @@ stages: - cd $dir +# ------------------------------------ +# check pre-conditions are met +# ------------------------------------ +check_for_binaries: + stage: check + tags: + - linux + script: + - *print-common-info + - python3 -m pytest tests/test_binaries_present.py + + # ------------------------------------ # functionality tests # ------------------------------------ diff --git a/README.md b/README.md index 76e717d5..c8732be1 100755 --- a/README.md +++ b/README.md @@ -103,9 +103,9 @@ conditions_to_generate: bitrates: - 9600 cod: - bin: ~/git/ivas-codec/IVAS_cod + bin: ~/git/ivas-codec/EVS_cod dec: - bin: ~/git/ivas-codec/IVAS_dec + bin: ~/git/ivas-codec/EVS_dec postprocessing: fmt: "BINAURAL" fs: 48000 @@ -296,7 +296,7 @@ input: ### mnru generate MNRU condition ### esdru generate ESDRU condition ### mono_dmx generate mono downmix condition -### evs generate an EVS coded condition (see below examples for additional required keys) (currently uses IVAS EVS mode) +### evs generate an EVS coded condition (see below examples for additional required keys) ### ivas generate an IVAS coded condition (see below examples for additional required keys) conditions_to_generate: ### Reference and anchor conditions ########################## @@ -389,15 +389,17 @@ conditions_to_generate: ### for multi-channel configs, code LFE with 9.6 kbps NB (as mandated by IVAS-3) evs_lfe_9k6bps_nb: true cod: - ### Path to encoder binary; default search for IVAS_cod in bin folder (primary) and PATH (secondary) - bin: ~/git/ivas-codec/IVAS_cod + ### Path to encoder binary; default search for EVS_cod in bin folder (primary) and PATH (secondary) + bin: ~/git/ivas-codec/EVS_cod ### Encoder input sampling rate in Hz (resampling performed in case of mismatch); default = null (no resampling) # fs: 32000 dec: - ### Path to encoder binary; default search for IVAS_dec in bin folder (primary) and PATH (secondary) - bin: ~/git/ivas-codec/IVAS_dec + ### Path to encoder binary; default search for EVS_dec in bin folder (primary) and PATH (secondary) + bin: ~/git/ivas-codec/EVS_dec ### Decoder output sampling rate; default = null (same as input) # fs: 48000 + ### Option to use SBA format of lower or same order (planar also possible) for SBA input formats + # sba_format: "PLANARFOA" ``` @@ -471,6 +473,7 @@ For the encoding stage `cod` and the decoding stage `dec`, the path to the IVAS_ Additionally some resampling can be applied by using the key `fs` followed by the desired sampling rate. The general bitstream processing configuration can be locally overwritten for each EVS and IVAS condition with the key `tx`. The additional key `evs_lfe_9k6bps_nb` is only available for EVS conditions and ensures a bitrate of 9.6kbps and narrow band processing of the LFE channel(s). +Also only available for EVS conditions is the `sba_format` key where a SBA format of lower order than the input can be specified for SBA input formats. #### IVAS The configuration of the IVAS condition is similar to the EVS condition. However, only one bitrate for all channels (and metadata) can be specified. In addition to that, the encoder and decoder take some additional arguments defined by the key `opts`. @@ -521,6 +524,7 @@ The following additional executables are needed for the different processing ste | Random offset/seed generation (necessary for background noise and FER bitstream processing) | random | https://www.3gpp.org/ftp/tsg_sa/WG4_CODEC/TSGS4_76/docs/S4-131277.zip | | JBM network simulator | networkSimulator_g192 | https://www.3gpp.org/ftp/tsg_sa/WG4_CODEC/TSGS4_76/docs/S4-131277.zip | | MASA rendering (also used in loudness measurement of MASA items) | masaRenderer | https://www.3gpp.org/ftp/TSG_SA/WG4_CODEC/TSGS4_122_Athens/Docs/S4-230221.zip | +| EVS reference conditions | EVS_cod, EVS_dec | https://www.3gpp.org/ftp/Specs/archive/26_series/26.443/26443-h00.zip | The necessary binaries have to be either placed in the [ivas_processing_scripts/bin](./ivas_processing_scripts/bin) folder or the path has to be specified in [ivas_processing_scripts/binary_paths.yml](./ivas_processing_scripts/binary_paths.yml). diff --git a/examples/TEMPLATE.yml b/examples/TEMPLATE.yml index f99384fc..83901013 100755 --- a/examples/TEMPLATE.yml +++ b/examples/TEMPLATE.yml @@ -151,7 +151,7 @@ input: ### mnru generate MNRU condition ### esdru generate ESDRU condition ### mono_dmx generate mono downmix condition -### evs generate an EVS coded condition (see below examples for additional required keys) (currently uses IVAS EVS mode) +### evs generate an EVS coded condition (see below examples for additional required keys) ### ivas generate an IVAS coded condition (see below examples for additional required keys) conditions_to_generate: ### Reference and anchor conditions ########################## @@ -251,19 +251,21 @@ conditions_to_generate: evs_lfe_9k6bps_nb: true ### Encoder options cod: - ### Path to encoder binary; default search for IVAS_cod in bin folder (primary) and PATH (secondary) - bin: ~/git/ivas-codec/IVAS_cod + ### Path to encoder binary; default search for EVS_cod in bin folder (primary) and PATH (secondary) + bin: EVS_cod ### Encoder input sampling rate in Hz (resampling performed in case of mismatch); default = null (no resampling) # fs: 32000 ### Decoder options dec: - ### Path to encoder binary; default search for IVAS_dec in bin folder (primary) and PATH (secondary) - bin: ~/git/ivas-codec/IVAS_dec + ### Path to encoder binary; default search for EVS_dec in bin folder (primary) and PATH (secondary) + bin: EVS_dec ### Decoder output sampling rate; default = null (same as input) # fs: 48000 ### Bitstream options # tx: ### For possible arguments see overall bitstream modification + ### Option to use SBA format of lower or same order (planar also possible) for SBA input formats + # sba_format: "PLANARFOA" ################################################ ### Post-processing @@ -287,7 +289,7 @@ postprocessing: # bin_dataset: SADIE ### Render LFE to binaural output with the specified gain (only valid for channel-based input); default = null # bin_lfe_gain: 1 - ### Flag whether output should be limited to avoid clipping (can alter target loudness); default = true - # limit: false + ### Flag whether output should be limited to avoid clipping (can alter target loudness); default = false + # limit: true ### Head-tracking trajectory file for binaural output; default = null # trajectory: "path/to/file" diff --git a/ivas_processing_scripts/__init__.py b/ivas_processing_scripts/__init__.py index a16309e0..16ac8b93 100755 --- a/ivas_processing_scripts/__init__.py +++ b/ivas_processing_scripts/__init__.py @@ -43,6 +43,7 @@ from ivas_processing_scripts.constants import ( ) from ivas_processing_scripts.processing import chains, config from ivas_processing_scripts.processing.processing import ( + multiple_of_frame_size, preprocess, preprocess_2, preprocess_background_noise, @@ -126,6 +127,9 @@ def main(args): cfg.metadata_path = metadata + # checking if audio is a multiple of frame size + multiple_of_frame_size(cfg) + # run preprocessing only once if hasattr(cfg, "preprocessing"): # save process info for background noise diff --git a/ivas_processing_scripts/audiotools/audio.py b/ivas_processing_scripts/audiotools/audio.py index f6c45fca..3bc10c53 100755 --- a/ivas_processing_scripts/audiotools/audio.py +++ b/ivas_processing_scripts/audiotools/audio.py @@ -30,10 +30,10 @@ # the United Nations Convention on Contracts on the International Sales of Goods. # -import warnings from abc import ABC, abstractmethod from pathlib import Path from typing import Optional, Union +from warnings import warn import numpy as np @@ -274,7 +274,7 @@ class ObjectBasedAudio(Audio): obj.metadata_files.append(file_name_meta) else: raise ValueError(f"Metadata file {file_name_meta} not found.") - warnings.warn( + warn( f"No metadata files specified: The following files were found and used: \n {*obj.metadata_files,}" ) diff --git a/ivas_processing_scripts/audiotools/audioarray.py b/ivas_processing_scripts/audiotools/audioarray.py index c0909c4c..2c770ce4 100755 --- a/ivas_processing_scripts/audiotools/audioarray.py +++ b/ivas_processing_scripts/audiotools/audioarray.py @@ -31,8 +31,8 @@ # import logging -import warnings from typing import Iterator, Optional, Tuple, Union +from warnings import warn import numpy as np import scipy.signal as sig @@ -342,7 +342,7 @@ def limiter( fr_sig[idx_min] = -32768 if limited: - warnings.warn("Limiting had to be applied") + warn("Limiting had to be applied") return x diff --git a/ivas_processing_scripts/audiotools/audiofile.py b/ivas_processing_scripts/audiotools/audiofile.py index 954c91f8..2d6ee25a 100755 --- a/ivas_processing_scripts/audiotools/audiofile.py +++ b/ivas_processing_scripts/audiotools/audiofile.py @@ -34,11 +34,13 @@ import logging import struct from pathlib import Path from typing import Optional, Tuple, Union +from warnings import warn import numpy as np import scipy.io.wavfile as wav from .audioarray import trim, window +from .constants import VERT_HOA_CHANNELS_ACN logger = logging.getLogger("__main__") logger.setLevel(logging.DEBUG) @@ -157,7 +159,7 @@ def concat( pad_noise: Optional[bool] = False, preamble: Optional[int] = None, pad_noise_preamble: Optional[bool] = False, -) -> list: +) -> Tuple[list, int]: """ Horizontally concatenates audio files into one long file @@ -198,7 +200,7 @@ def concat( # pad with very low amplitude noise x = trim( - x, in_fs, (-silence_pre, -silence_post), samples=True, pad_noise=pad_noise + x, fs, (-silence_pre, -silence_post), samples=True, pad_noise=pad_noise ) # add the length to our splits list @@ -209,11 +211,11 @@ def concat( # add preamble if preamble: - y = trim(y, in_fs, (-preamble, 0), pad_noise_preamble) + y = trim(y, fs, (-preamble, 0), pad_noise_preamble) - write(out_file, y, fs=in_fs) + write(out_file, y, fs=fs) - return splits[1:] + return splits[1:], fs def split( @@ -326,6 +328,8 @@ def split_channels( in_file: str, out_filenames: list, in_nchans: int, + out_nchans: int, + is_planar: Optional[bool] = False, in_fs: Optional[int] = 48000, ) -> None: """ @@ -339,6 +343,10 @@ def split_channels( List of output file names (.pcm, .raw or .wav) in_nchans: int Input number of channels + out_nchans: int + Number of channels to be split + is_planar: Optional[bool] + If true vertical SBA channels are set to zero in_fs: Optional[int] = 48000 Input sampling rate, default 48000 Hz @@ -350,14 +358,17 @@ def split_channels( # validation if in_nchans is None: raise ValueError("Number of channels to split must be specified!") - if in_nchans != len(out_filenames): - print( + if out_nchans != len(out_filenames): + warn( "Split: Mismatch between number of channels and output filenames length. Truncating output filenames list." ) - out_filenames = out_filenames[:in_nchans] + out_filenames = out_filenames[:out_nchans] x, in_fs = read(in_file, nchannels=in_nchans, fs=in_fs) + if is_planar: + x[:, VERT_HOA_CHANNELS_ACN[VERT_HOA_CHANNELS_ACN < in_nchans]] = 0 + # Write output files for idx, out_file in enumerate(out_filenames): # extract correct channel diff --git a/ivas_processing_scripts/audiotools/binaural_datasets/binaural_dataset.py b/ivas_processing_scripts/audiotools/binaural_datasets/binaural_dataset.py index e5d5ac95..37008adf 100755 --- a/ivas_processing_scripts/audiotools/binaural_datasets/binaural_dataset.py +++ b/ivas_processing_scripts/audiotools/binaural_datasets/binaural_dataset.py @@ -30,9 +30,9 @@ # the United Nations Convention on Contracts on the International Sales of Goods. # -import warnings from pathlib import Path from typing import Optional, Tuple, Union +from warnings import warn import numpy as np from scipy.io import loadmat @@ -149,7 +149,7 @@ def load_ir( ) ).is_file(): dataset_suffix = "SBA3" - warnings.warn("No SBA1 dataset found -> use truncated SBA3 dataset") + warn("No SBA1 dataset found -> use truncated SBA3 dataset") elif in_fmt.endswith("2"): dataset_suffix = "SBA2" # Use truncated SBA3 dataset if no SBA1 or 2 dataset exists @@ -159,7 +159,7 @@ def load_ir( ) ).is_file(): dataset_suffix = "SBA3" - warnings.warn("No SBA2 dataset found -> use truncated SBA3 dataset") + warn("No SBA2 dataset found -> use truncated SBA3 dataset") else: dataset_suffix = "SBA3" @@ -172,7 +172,7 @@ def load_ir( latency_smp = latency_s else: latency_smp = int(np.min(np.argmax(np.sum(np.abs(IR), axis=1), axis=0))) - warnings.warn( + warn( f"No latency of HRTF dataset specified in {path_dataset} file -> computed latency: {latency_smp} sample(s)" ) diff --git a/ivas_processing_scripts/audiotools/convert/__init__.py b/ivas_processing_scripts/audiotools/convert/__init__.py index 9698dd4f..8ca2d2f7 100755 --- a/ivas_processing_scripts/audiotools/convert/__init__.py +++ b/ivas_processing_scripts/audiotools/convert/__init__.py @@ -214,6 +214,8 @@ def process_audio( """delay audio""" if delay is not None: + if isinstance(x, audio.ObjectBasedAudio): + raise ValueError("Delay not possible for ISM input") if logger: logger.debug(f"Delaying audio by {delay} ms") x.audio = audioarray.delay(x.audio, x.fs, delay) diff --git a/ivas_processing_scripts/audiotools/convert/channelbased.py b/ivas_processing_scripts/audiotools/convert/channelbased.py index 6bdd6b33..480d635e 100755 --- a/ivas_processing_scripts/audiotools/convert/channelbased.py +++ b/ivas_processing_scripts/audiotools/convert/channelbased.py @@ -139,6 +139,7 @@ def render_cba_to_binaural( bin.audio += bin_lfe bin.audio = resample_itu(bin, old_fs) + bin.fs = old_fs def render_custom_ls_binaural( diff --git a/ivas_processing_scripts/audiotools/convert/scenebased.py b/ivas_processing_scripts/audiotools/convert/scenebased.py index b8295808..ab22713d 100755 --- a/ivas_processing_scripts/audiotools/convert/scenebased.py +++ b/ivas_processing_scripts/audiotools/convert/scenebased.py @@ -123,6 +123,7 @@ def render_sba_to_binaural( sba.fs = 48000 bin.audio = binaural_fftconv(sba.audio, IR, sba.num_channels) + bin.fs = 48000 # compensate delay from binaural dataset bin.audio = delay(bin.audio, bin.fs, -latency_smp, samples=True) diff --git a/ivas_processing_scripts/audiotools/wrappers/bs1770.py b/ivas_processing_scripts/audiotools/wrappers/bs1770.py index 3a44ccf4..1cd9c753 100755 --- a/ivas_processing_scripts/audiotools/wrappers/bs1770.py +++ b/ivas_processing_scripts/audiotools/wrappers/bs1770.py @@ -32,6 +32,7 @@ import copy import logging +import re from pathlib import Path from tempfile import TemporaryDirectory from typing import Optional, Tuple, Union @@ -134,11 +135,12 @@ def bs1770demo( result = run(cmd, logger=logger) # parse output - measured_loudness = check_for_nan_and_inf( - result.stdout.splitlines()[3].split(":")[1] + # we are looking for the (floating-point) number after the search string - '( )' around the number denotes the first group + measured_loudness = float( + re.search(r"Input level:\s+([-+]?(?:\d*\.*\d+))", result.stdout).group(1) ) - scale_factor = check_for_nan_and_inf( - result.stdout.splitlines()[-3].split(":")[1] + scale_factor = float( + re.search(r"Scaling factor:\s+([-+]?(?:\d*\.*\d+))", result.stdout).group(1) ) return measured_loudness, scale_factor diff --git a/ivas_processing_scripts/audiotools/wrappers/esdru.py b/ivas_processing_scripts/audiotools/wrappers/esdru.py index d7fbd925..92c9a653 100755 --- a/ivas_processing_scripts/audiotools/wrappers/esdru.py +++ b/ivas_processing_scripts/audiotools/wrappers/esdru.py @@ -94,7 +94,6 @@ def esdru( ) tmp_input_signal = input.audio - tmp_output_signal = np.ones((48000, 2)) with TemporaryDirectory() as tmp_dir: tmp_dir = Path(tmp_dir) @@ -120,10 +119,9 @@ def esdru( # write temporary file write(tmp_input_file, tmp_input_signal, sf) - write(tmp_output_file, tmp_output_signal, sf) # run command - run(cmd) + run(cmd, cwd=tmp_dir) tmp_output_signal, out_fs = read(tmp_output_file, 2, sf) diff --git a/ivas_processing_scripts/audiotools/wrappers/p50fbmnru.py b/ivas_processing_scripts/audiotools/wrappers/p50fbmnru.py index 4bc6b046..c31d0aae 100755 --- a/ivas_processing_scripts/audiotools/wrappers/p50fbmnru.py +++ b/ivas_processing_scripts/audiotools/wrappers/p50fbmnru.py @@ -30,6 +30,7 @@ # the United Nations Convention on Contracts on the International Sales of Goods. # +from copy import copy from pathlib import Path from tempfile import TemporaryDirectory from warnings import warn @@ -72,14 +73,15 @@ def p50fbmnru( else: binary = find_binary("p50fbmnru") - if input.fs != 48000: - warn("P.50 Fullband MNRU requires a sampling rate of 48kHz.") - tmp_sig = resample_itu(input, 48000) - else: - tmp_sig = input.audio + tmp_audio_obj = copy(input) - tmp_input_signal = tmp_sig - tmp_output_signal = np.ones((48000, input.num_channels)) + # resample signal to 48kHz + if input.fs != 48000: + warn( + "P.50 Fullband MNRU requires a sampling rate of 48kHz. Resampling will be applied" + ) + tmp_audio_obj.audio = resample_itu(tmp_audio_obj, 48000) + tmp_audio_obj.fs = 48000 with TemporaryDirectory() as tmp_dir: tmp_dir = Path(tmp_dir) @@ -99,12 +101,16 @@ def p50fbmnru( ] # write temporary file - write(tmp_input_file, tmp_input_signal) - write(tmp_output_file, tmp_output_signal) + write(tmp_input_file, tmp_audio_obj.audio) # run command run(cmd) - tmp_output_signal, out_fs = read(tmp_output_file, input.num_channels) + tmp_output_signal, _ = read(tmp_output_file, input.num_channels) + tmp_audio_obj.audio = tmp_output_signal + + # revert resampling + if input.fs != 48000: + tmp_output_signal = resample_itu(tmp_audio_obj, input.fs) return tmp_output_signal diff --git a/ivas_processing_scripts/bin/README.txt b/ivas_processing_scripts/bin/README.txt deleted file mode 100755 index 19519057..00000000 --- a/ivas_processing_scripts/bin/README.txt +++ /dev/null @@ -1,14 +0,0 @@ - -Necessary additional executables: - -| Processing step | Executable | Where to find | -|-------------------------------------------------|-----------------------|-------------------------------------------------------------------------------------------------------------| -| Loudness measurement and adjustment | bs1770demo | https://github.com/openitu/STL | -| MNRU | p50fbmnru | https://github.com/openitu/STL | -| ESDRU | esdru | https://github.com/openitu/STL | -| Frame error pattern application | eid-xor | https://github.com/openitu/STL | -| Error pattern generation | gen-patt | https://www.itu.int/rec/T-REC-G.191-201003-S/en (Note: Version in https://github.com/openitu/STL is buggy!) | -| Filtering, Resampling | filter | https://www.3gpp.org/ftp/tsg_sa/WG4_CODEC/TSGS4_76/docs/S4-131277.zip | -| Random offset/seed generation (necessary for background noise and FER bitstream processing) | random | https://www.3gpp.org/ftp/tsg_sa/WG4_CODEC/TSGS4_76/docs/S4-131277.zip | -| JBM network simulator | networkSimulator_g192 | https://www.3gpp.org/ftp/tsg_sa/WG4_CODEC/TSGS4_76/docs/S4-131277.zip | -| MASA rendering (also used in loudness measurement of MASA items) | masaRenderer | https://www.3gpp.org/ftp/TSG_SA/WG4_CODEC/TSGS4_122_Athens/Docs/S4-230221.zip | diff --git a/ivas_processing_scripts/constants.py b/ivas_processing_scripts/constants.py index fbadd523..f89e8589 100755 --- a/ivas_processing_scripts/constants.py +++ b/ivas_processing_scripts/constants.py @@ -69,10 +69,10 @@ DEFAULT_CONFIG = { } DEFAULT_CONFIG_EVS = { "cod": { - "bin": find_binary("IVAS_cod", raise_error=False), + "bin": find_binary("EVS_cod", raise_error=False), }, "dec": { - "bin": find_binary("IVAS_dec", raise_error=False), + "bin": find_binary("EVS_dec", raise_error=False), }, } DEFAULT_CONFIG_IVAS = { diff --git a/ivas_processing_scripts/processing/chains.py b/ivas_processing_scripts/processing/chains.py index accf1014..bfa80451 100755 --- a/ivas_processing_scripts/processing/chains.py +++ b/ivas_processing_scripts/processing/chains.py @@ -300,11 +300,13 @@ def get_processing_chain( "tx": tx_cfg, "preamble": preamble, "evs_lfe_9k6bps_nb": evs_lfe_9k6bps_nb, + "sba_format": cond_cfg.get("sba_format", tmp_in_fmt), } ) ) # update values to reflect decoder output tmp_in_fs = dec_cfg.get("fs", tmp_in_fs) + tmp_in_fmt = cond_cfg.get("sba_format", cfg.input["fmt"]) elif cond_cfg["type"] == "ivas": cod_cfg = cond_cfg["cod"] diff --git a/ivas_processing_scripts/processing/config.py b/ivas_processing_scripts/processing/config.py index 386f542e..77ab4e63 100755 --- a/ivas_processing_scripts/processing/config.py +++ b/ivas_processing_scripts/processing/config.py @@ -64,6 +64,9 @@ def merge_dicts(base: dict, other: dict) -> None: class TestConfig: + # avoid confusion with pytest tests due to naming + __test__ = False + def __init__(self, filename: str): """Parse a YAML or JSON configuration file""" # init lists of conditions and associated dirs diff --git a/ivas_processing_scripts/processing/evs.py b/ivas_processing_scripts/processing/evs.py index 7c52eaab..605e61df 100755 --- a/ivas_processing_scripts/processing/evs.py +++ b/ivas_processing_scripts/processing/evs.py @@ -167,16 +167,33 @@ class EVS(Processing): if not self.out_fs: self.out_fs = self.in_fs + # consider SBA format for condition + condition_fmt = audio.fromtype(self.sba_format) + if isinstance(self.in_fmt, audio.SceneBasedAudio): + if not isinstance(condition_fmt, audio.SceneBasedAudio) or ( + condition_fmt.num_channels > self.in_fmt.num_channels + ): + raise ValueError( + "SBA format has to be SBA with lower order than input signal" + ) + is_planar = condition_fmt.is_planar + else: + if self.in_fmt.name != condition_fmt.name: + raise ValueError("Cannot specify SBA format for non SBA input formats") + is_planar = False + # Split the channels to prepare for multi-mono coding split_chan_files = [ - out_file.with_suffix(f".{i}.pcm") for i in range(self.in_fmt.num_channels) + out_file.with_suffix(f".{i}.pcm") for i in range(condition_fmt.num_channels) ] split_channels( in_file, split_chan_files, - self.in_fmt.num_channels, - self.in_fs, + in_nchans=self.in_fmt.num_channels, + out_nchans=condition_fmt.num_channels, + in_fs=self.in_fs, + is_planar=is_planar, ) # run processing diff --git a/ivas_processing_scripts/processing/processing.py b/ivas_processing_scripts/processing/processing.py index 0c1c72c0..6c4fabc0 100755 --- a/ivas_processing_scripts/processing/processing.py +++ b/ivas_processing_scripts/processing/processing.py @@ -35,7 +35,7 @@ from abc import ABC, abstractmethod from itertools import repeat from pathlib import Path from shutil import copyfile -from typing import Iterable, Union +from typing import Iterable, Optional, Union from warnings import warn import numpy as np @@ -74,6 +74,21 @@ class Processing(ABC): def reorder_items_list(items_list: list, concatenation_order: list) -> list: + """ + Reorder input items list based on conactenation order + + Parameters + ---------- + items_list: list + List of input items + concatenation_order: list + Concatenation order + + Returns + ------- + ordered_full_files: list + Re-ordered list of input items + """ name_to_full = {Path(full_file).name: full_file for full_file in items_list} ordered_full_files = [ name_to_full[name] for name in concatenation_order if name in name_to_full @@ -130,10 +145,10 @@ def concat_setup(cfg: TestConfig, chain, logger: logging.Logger): tmp_audio = audio.fromtype(cfg_pre2.in_fmt) tmp_num_chans = tmp_audio.num_channels - cfg.splits = concat( + cfg.splits, fs = concat( cfg.items_list, cfg.concat_file, - in_fs=cfg.input.get("fs", 48000), + in_fs=cfg_pre2.in_fs, num_channels=tmp_num_chans, ) @@ -144,6 +159,9 @@ def concat_setup(cfg: TestConfig, chain, logger: logging.Logger): # set input to the concatenated file we have just written to the output dir cfg.items_list = [cfg.concat_file] + # save input sampling rate for splitting at the end + cfg.pre2.in_fs = fs + # write out splits with open(cfg.concat_file.with_suffix(".splits.log"), "w") as f: print(", ".join([str(s) for s in cfg.splits]), file=f) @@ -164,6 +182,15 @@ def concat_teardown(cfg: TestConfig, logger: logging.Logger): logger.info(f"Splitting output file in directory {cfg.output_path}") + # if sampling rate changed, adjust splits + fs_new = float(cfg.postprocessing["fs"]) + fs_old = float(cfg.pre2.in_fs) + relative_fs_change = fs_new / fs_old + new_splits = [] + for split_i in cfg.splits: + new_splits.append(int(float(split_i) * relative_fs_change)) + cfg.splits = new_splits + for odir in cfg.out_dirs: path_input = odir / cfg.items_list[0].name out_paths = split( @@ -232,7 +259,9 @@ def preprocess(cfg, logger): and cfg.preprocessing_2.get("concatenate_input", False) and cfg.preprocessing_2.get("concatenation_order", None) is not None ): - cfg.items_list = reorder_items_list(cfg.items_list, cfg.concatenation_order) + cfg.items_list = reorder_items_list( + cfg.items_list, cfg.preprocessing_2["concatenation_order"] + ) if cfg.metadata_path[0] is not None: for item_idx in range(len(cfg.metadata_path)): @@ -277,14 +306,6 @@ def preprocess_2(cfg, logger): cfg.out_dirs[0], select_list=getattr(cfg, "input_select", None) ) - # Re-ordering items based on concatenation order - if ( - hasattr(cfg, "preprocessing_2") - and cfg.preprocessing_2.get("concatenate_input", False) - and cfg.preprocessing_2.get("concatenation_order", None) is not None - ): - cfg.items_list = reorder_items_list(cfg.items_list, cfg.concatenation_order) - if cfg.metadata_path[0] is not None: for item_idx in range(len(cfg.metadata_path)): for obj_idx in range(len(cfg.metadata_path[item_idx])): @@ -490,3 +511,52 @@ def preprocess_background_noise(cfg): ] = output_audio return + + +def multiple_of_frame_size( + cfg: TestConfig, + frame_size_in_ms: Optional[int] = 20, +) -> np.ndarray: + """ + Warn/Exit if audio if it isn't a multiple of frame size + + Parameters + ---------- + cfg: TestConfig + Input configuration + frame_size_in_ms: Optional[int] + Frame size in milliseconds; default = 20 + """ + # get the number of channels from the input format + input_format = cfg.input["fmt"] + num_channels = audio.fromtype(input_format).num_channels + for item in cfg.items_list: + # read the audio file + if "fs" in cfg.input: + sampling_rate = cfg.input["fs"] + x, fs = read(item, nchannels=num_channels, fs=sampling_rate) + elif item.suffix == ".pcm" or item.suffix == ".raw": + raise ValueError("Sampling rate must be specified for headerless files!") + elif item.suffix == ".wav": + x, fs = read(item) + sampling_rate = fs + n_samples_x, n_chan_x = x.shape + if fs != sampling_rate: + raise ValueError( + f"Sampling rate of the file ({fs}) does NOT match with that ({sampling_rate}) specified in the config yaml." + ) + if n_chan_x != num_channels: + raise ValueError( + f"The number of channels in the file ({n_chan_x}) do NOT match with those of format ({num_channels}, {input_format}) specified in the config yaml." + ) + # warn if audio length not a multiple of frame length + frame_length_samples = (frame_size_in_ms / 1000) * fs + if n_samples_x % frame_length_samples != 0: + if input_format.startswith("ISM") or input_format.startswith("MASA"): + raise ValueError( + f"The length ({n_samples_x} samples) of audio ({item.name}) is not a multiple of frame length (20 ms)." + ) + else: + warn( + f"The length ({n_samples_x} samples) of audio ({item.name}) is not a multiple of frame length (20 ms)." + ) diff --git a/ivas_processing_scripts/utils.py b/ivas_processing_scripts/utils.py index 916db8a4..a858a6c0 100755 --- a/ivas_processing_scripts/utils.py +++ b/ivas_processing_scripts/utils.py @@ -138,7 +138,7 @@ def find_binary( binary: str, raise_error: Optional[bool] = True, logger: Optional[logging.Logger] = None, - binary_path: Optional[str] = None, + binary_path: Optional[Path] = None, ) -> Union[Path, None]: """Attempt to find and return the path to the given binary""" # prioritise binaries placed in the directory over $PATH @@ -150,9 +150,12 @@ def find_binary( bin = which(binary) if not bin and raise_error: - raise FileNotFoundError( - f"Binary {binary} was neither found in {binary_path.absolute()} nor in {BIN_DIR.absolute()} or in $PATH!" - ) + msg = f"Binary {binary} was not found - neither in {BIN_DIR.absolute()} nor in $PATH" + if binary_path is None: + msg += " and binary_paths.yml file contains no items." + else: + msg += " nor in {binary_path.absolute()}." + raise FileNotFoundError(msg) elif not bin: if logger: logger.debug(f"Couldn't find binary {binary}") diff --git a/tests/constants.py b/tests/constants.py index 11c48dc2..56a531a4 100644 --- a/tests/constants.py +++ b/tests/constants.py @@ -42,6 +42,7 @@ SCENE_DESC_DIR = TEST_VECTOR_DIR.joinpath("scene_description") OUTPUT_PATH_REF = TESTS_DIR.joinpath("ref") OUTPUT_PATH_CUT = TESTS_DIR.joinpath("cut") +INPUT_PATH_CONCATENATION = TESTS_DIR.joinpath("concatenation_folder") CUSTOM_LAYOUT_DIR = TEST_VECTOR_DIR.joinpath("ls_layouts") HR_TRAJECTORY_DIR = TEST_VECTOR_DIR.joinpath("trajectories") @@ -193,7 +194,6 @@ HR_TRAJECTORIES_TO_TEST = [ ] """ Generate Test Items Configs """ -# TODO reorganize later INPUT_CONFIG_FILES = [ str(TEST_VECTOR_DIR.joinpath("test_ISM.yml")), # str(TEST_VECTOR_DIR.joinpath("test_MASA.yml")), # TODO diff --git a/tests/data/test_ISM.yml b/tests/data/test_ISM.yml index 8543d180..7122ab4e 100644 --- a/tests/data/test_ISM.yml +++ b/tests/data/test_ISM.yml @@ -14,32 +14,37 @@ ### Deletion of temporary directories containing ### intermediate processing files, bitstreams etc.; default = false # delete_tmp: true +### Master seed for random processes like bitstream error pattern generation; default = 0 +# master_seed: 5 ### Any relative paths will be interpreted relative to the working directory the script is called from! ### Usage of absolute paths is recommended. ### Do not use file names with dots "." in them! This is not supported, use "_" instead +### For Windows user: please use double back slash '\\' in paths and add '.exe' to executable definitions ### REQUIRED: Input path or file -input_path: "./tests/data/pinknoise/pink_noise_4ch_48kHz.wav" +input_path: "./tests/concatenation_folder/ISM" ### REQUIRED: Output path or file -output_path: "./tmp_output_ISM" +output_path: "./tests/tmp_output_ISM" ### Metadata path or file(s) ### If input format is ISM{1-4} a path for the metadata files can be specified; ### default = null (for ISM search for item_name.{wav, raw, pcm}.{0-3}.csv in input folder, otherise ignored) metadata_path: ### Path can be set for all items with the 'all_items' key (automatic search for item_name.{wav, raw, pcm}.{0-3}.csv within this folder) - # all_items: "../path" + # all_items: ".../metadata_folder" ### Path can be set for all items individually with 'item{1-4}' keys - ### 'item{1-4}' keys can also be renamed to the input file names with extension + ### 'item{1-4}' keys can also be renamed to the input file names including extension {wav, raw, pcm} + ### Either list individual files for all objects or name folder for automatic search for one item pink_noise_4ch_48kHz.wav: - ### Either list individual files for all objects or give folder for automatic search - "./tests/data/ism_metadata/stvISM1.csv" - "./tests/data/ism_metadata/stvISM2.csv" - "./tests/data/ism_metadata/stvISM3.csv" - "./tests/data/ism_metadata/stvISM4.csv" - # item2: - # - ".../meta_name.csv" - # - ".../different_name.csv" - # noise: + spectral_test_4ch_48kHz.wav: + - "./tests/data/ism_metadata/ism_0a_0e.csv" + - "./tests/data/ism_metadata/ism_90a_0e.csv" + - "./tests/data/ism_metadata/ism_-90a_0e.csv" + - "./tests/data/ism_metadata/ism_180a_0e.csv" + # noise.wav: # - ".../metadata_folder_for_noise_item" ### Select only a subset of items @@ -47,13 +52,6 @@ metadata_path: # input_select: # - "48kHz" -### Horizontally concatenate input items into one long file; default = false -# concatenate_input: true -### Specify silence duration (ms) to add in between concatenated items; default = null -# concat_silence: - # pre: 0 - # post: 0 - ################################################ ### Input configuration ################################################ @@ -64,7 +62,7 @@ input: # fs: 32000 ################################################ -### Pre-processing +### Pre-processing on individual items ################################################ ### Pre-processing step performed prior to core processing for all conditions ### If not defined, preprocessing step is skipped @@ -74,54 +72,94 @@ preprocessing: ### Define mask (HP50 or 20KBP) for input signal filtering; default = null # mask: "HP50" ### Target sampling rate in Hz for resampling; default = null (no resampling) - # fs: 16000 + fs: 48000 ### Target loudness in LKFS; default = null (no loudness change applied) - # loudness: -26 + loudness: -26 ### Spatial audio format in which loudness is adjusted (only used if preprocessing loudness is not null); ### default = null (uses preprocessing fmt if possible) - # loudness_fmt: BINAURAL - ### Pre-/post-trim signal (ms) (negative values pad silence); default = 0 - trim: - - 40 - - -60 - ### Flag for using noise instead of silence for padding - pad_noise: true + # loudness_fmt: "BINAURAL" + ### Pre-/post-trim individual signal(s) (ms) (negative values pad silence); default = 0 + # trim: + # - 50 + # - -50 + ### Flag for using noise (amplitude +-4) instead of silence for padding; default = false (silence) + # pad_noise: true ### Value for application of delay (ms) (negative values advance); default = 0 # delay: 20 ### Length of window used at start/end of signal (ms); default = 0 # window: 100 + +################################################ +### Pre-processing on whole signal(s) +################################################ +preprocessing_2: + ### Options for processing of the concatenated item (concatenate_input: true) or + ### the individual items (concatenate_input: false) after previous pre-processing step + ### Horizontally concatenate input items into one long file; default = false + concatenate_input: true + ### Specify the concatenation order in a list of strings. If not specified, the concatenation order would be + ### as per the filesystem on the users' device + ### Should only be used if concatenate_input = true + ### Specify the filename with extension. + ### For example, concatenation_order: ["file3.wav", "file1.wav", "file4.wav", "file2.wav"] + # concatenation_order: [] + ### Specify preamble duration in ms; default = 0 + preamble: 10000 + ### Flag wheter to use noise (amplitude +-4) for the preamble or silence; default = false (silence) + preamble_noise: true + ### Additive background noise + # background_noise: + ### REQUIRED: SNR for background noise in dB + # snr: 10 + ### REQUIRED: Path to background noise, must have same format and sampling rate as input signal(s) + # background_noise_path: ".../noise.wav" + ### Seed for delay offest; default = 0 + # seed_delay: 10 ################################################# -### bitstream processing +### Bitstream processing ################################################# -### bistream processing (transport simulation) done after encoding and before decoding +### Bitstream processing (transport simulation) done after encoding and before decoding ### e.g. frame error insertion or transport simulation for JBM testing -# tx: - ### other options for generating error files,.... - ### maybe directly give something like fer and generate error_pottern and bs_proc_opts internally - # fer: - # error_rate: 5 ### in percent - ### Path to network simulation binary (mandatory) - # bs_proc_bin: C:/local/bay/ivas/VR/src/ivas_python_testscripts/networkSimulator_g192.exe - ### Path to error pattern (mandatory if no information for generating the error pattern is given) - # error_pattern: C:/local/bay/ivas/VR/src/ivas_python_testscripts/dly_error_profiles/dly_error_profile_6.dat - ### options for the binary, possible placeholders are {error_pattern} for the error pattern, - ### {bitstream} for the bitstream to process and {bitstream_processed} for the processed bitstream - # bs_proc_opts: [ "{error_pattern}", "{bitstream}", "{processed_bitstream}", "{processed_bitstream}_tracefile_sim", "2", "0" ] - +### can be given globally here or in individual conditions of type ivas or evs +tx: + ### REQUIRED: Type of bitstream processing; possible types: "JBM" or "FER" + type: "FER" + + ### JBM + ### REQUIRED: either error_pattern or error_profile + ### delay error profile file + # error_pattern: ".../dly_error_profile.dat" + ### Index of one of the existing delay error profile files to use (1-11) + # error_profile: 5 + ## nFramesPerPacket parameter for the network simulator; default = 1 + # n_frames_per_packet: 2 + + ### FER + ### REQUIRED: either error_pattern or error_rate + ### Frame error pattern file + # error_pattern: "path/pattern.192" + ### Error rate in percent + error_rate: 5 + ### Additional seed to specify number of preruns; default = 0 + # prerun_seed: 2 + ################################################ ### Configuration for conditions under test ################################################ ### List of conditions to generate ### Name of the key will be used as output directory name ### conditions must specify the "type" key which may be one of the following options: -### ref generate the reference condition -### lp3k5 generate a low-pass anchor with cut-off frequency 3.5 kHz -### lp7k generate a low-pass anchor with cut-off frequency 7 kHz -### evs generate an EVS coded condition (see below examples for additional required keys) -### ivas generate an IVAS coded condition (see below examples for additional required keys) +### ref generate the reference condition +### lp3k5 generate a low-pass anchor with cut-off frequency 3.5 kHz +### lp7k generate a low-pass anchor with cut-off frequency 7 kHz +### mnru generate MNRU condition +### esdru generate ESDRU condition +### mono_dmx generate mono downmix condition +### evs generate an EVS coded condition (see below examples for additional required keys) +### ivas generate an IVAS coded condition (see below examples for additional required keys) conditions_to_generate: - ### Reference condition ########################## + ### Reference and anchor conditions ########################## c01: ### REQUIRED: type of condition type: ref @@ -129,19 +167,29 @@ conditions_to_generate: # out_fc: 22500 c02: ### REQUIRED: type of condition - type: lp3k5 - ### optional low-pass cut-off frequency in Hz; default = null - # out_fc: 22500 + type: lp7k + c03: + ### REQUIRED: type of condition + type: mnru + ### REQUIRED: the ratio of speech power to modulated noise power in dB + q: 20 + c04: + ### REQUIRED: type of condition + type: esdru + ### REQUIRED: spatial degradation value between 0 and 1 + alpha: 0.5 + c05: + ### REQUIRED: type of condition + type: mono_dmx ### IVAS condition ############################### - c03: + c06: ### REQUIRED: type of condition type: ivas ### REQUIRED: Bitrates to use for coding bitrates: - 160000 - # - 32000 - # TODO bitstream corruption + - 32000 ### Encoder options cod: ### Path to encoder binary; default search for IVAS_cod in bin folder (primary) and PATH (secondary) @@ -155,22 +203,27 @@ conditions_to_generate: ### Path to decoder binary; default search for IVAS_dec in bin folder (primary) and PATH (secondary) #bin: ~/git/ivas-codec/IVAS_dec ### Decoder output format; default = postprocessing fmt - #fmt: HOA3 + fmt: "ISM4" ### Decoder output sampling rate; default = null (same as input) # fs: 48000 ### Additional commandline options; default = null # opts: ["-q", "-no_delay_cmp"] + ### Bitstream options + tx: + ### For possible arguments see overall bitstream modification + type: "FER" + error_rate: 3 ### IVAS condition ############################### - #c04: + c07: ### REQUIRED: type of condition - #type: ivas + type: ivas ### REQUIRED: Bitrates to use for coding - #bitrates: - #- 160000 + bitrates: + - 160000 # - 32000 ### Encoder options - #cod: + cod: ### Path to encoder binary; default search for IVAS_cod in bin folder (primary) and PATH (secondary) #bin: ~/git/ivas-codec/IVAS_cod ### Encoder input sampling rate in Hz (resampling performed in case of mismatch); default = null (no resampling) @@ -178,36 +231,42 @@ conditions_to_generate: ### Additional commandline options; default = null # opts: ["-q", "-dtx", 4] ### Decoder options - #dec: + dec: ### Path to decoder binary; default search for IVAS_dec in bin folder (primary) and PATH (secondary) #bin: ~/git/ivas-codec/IVAS_dec ### Decoder output format; default = postprocessing fmt - #fmt: CICP19 + #fmt: "7_1_4" ### Decoder output sampling rate; default = null (same as input) # fs: 48000 ### Additional commandline options; default = null # opts: ["-q", "-no_delay_cmp"] + ### Bitstream options + # tx: + ### For possible arguments see overall bitstream modification ### EVS condition ################################ - c05: + c08: ### REQUIRED: type of condition type: evs ### REQUIRED: Bitrates to use for coding ### For EVS mono, this may be a per-channel bitrate configuration (must match input/preprocessing format!) ### the last value will be repeated if too few are specified bitrates: - - 9600 - #- [13200, 13200, 8000, 13200, 9600] + # - 9600 + - [13200, 13200, 8000, 13200, 9600] cod: ### Path to encoder binary; default search for EVS_cod in bin folder (primary) and PATH (secondary) - #bin: ~/git/ivas-codec/IVAS_cod + #bin: EVS_cod ### Encoder input sampling rate in Hz (resampling performed in case of mismatch); default = null (no resampling) # fs: 32000 dec: ### Path to encoder binary; default search for EVS_dec in bin folder (primary) and PATH (secondary) - #bin: ~/git/ivas-codec/IVAS_dec + #bin: EVS_dec ### Decoder output sampling rate; default = null (same as input) # fs: 48000 + ### Bitstream options + # tx: + ### For possible arguments see overall bitstream modification ################################################ ### Post-processing @@ -217,21 +276,21 @@ conditions_to_generate: postprocessing: ### REQUIRED: Target format for output fmt: "BINAURAL" - ### Target sampling rate in Hz for resampling; default = null (no resampling) - fs: 48000 + ### REQUIRED: Target sampling rate in Hz for resampling + fs: 32000 ### Low-pass cut-off frequency in Hz; default = null (no filtering) - # lp_cutoff: 24000 + lp_cutoff: 10000 ### Target loudness in LKFS; default = null (no loudness change applied) - # loudness: -26 + loudness: -26 ### Spatial audio format in which loudness is adjusted (only used if preprocessing loudness is not null); ### default = null (uses postprocessing fmt if possible) # loudness_fmt: null ### Name of custom binaural dataset (without prefix or suffix); - ### default = null (ORANGE53 for BINAURAL, IISofficialMPEG222UC for BINAURAL_ROOM) + ### default = null (ORANGE53(_Dolby) for BINAURAL, IISofficialMPEG222UC for BINAURAL_ROOM) # bin_dataset: SADIE ### Render LFE to binaural output with the specified gain (only valid for channel-based input); default = null # bin_lfe_gain: 1 - ### Flag whether output should be limited to avoid clipping (can alter target loudness); default = true - # limit: false + ### Flag whether output should be limited to avoid clipping (can alter target loudness); default = false + # limit: true ### Head-tracking trajectory file for binaural output; default = null - # trajectory: path/to/file + # trajectory: "path/to/file" diff --git a/tests/data/test_MC.yml b/tests/data/test_MC.yml index 414c2cdf..9e2c748e 100644 --- a/tests/data/test_MC.yml +++ b/tests/data/test_MC.yml @@ -14,43 +14,39 @@ ### Deletion of temporary directories containing ### intermediate processing files, bitstreams etc.; default = false # delete_tmp: true +### Master seed for random processes like bitstream error pattern generation; default = 0 +master_seed: 5 ### Any relative paths will be interpreted relative to the working directory the script is called from! ### Usage of absolute paths is recommended. ### Do not use file names with dots "." in them! This is not supported, use "_" instead +### For Windows user: please use double back slash '\\' in paths and add '.exe' to executable definitions ### REQUIRED: Input path or file -input_path: "./tests/data/pinknoise/pink_noise_2ch_48kHz.wav" +input_path: "./tests/concatenation_folder/MC" ### REQUIRED: Output path or file -output_path: "./tmp_output_MC" +output_path: "./tests/tmp_output_MC" ### Metadata path or file(s) ### If input format is ISM{1-4} a path for the metadata files can be specified; ### default = null (for ISM search for item_name.{wav, raw, pcm}.{0-3}.csv in input folder, otherise ignored) # metadata_path: ### Path can be set for all items with the 'all_items' key (automatic search for item_name.{wav, raw, pcm}.{0-3}.csv within this folder) - # all_items: "..\path" + # all_items: ".../metadata_folder" ### Path can be set for all items individually with 'item{1-4}' keys - ### 'item{1-4}' keys can also be renamed to the input file names with extension + ### 'item{1-4}' keys can also be renamed to the input file names including extension {wav, raw, pcm} + ### Either list individual files for all objects or name folder for automatic search for one item # item1: - ### Either list individual files for all objects or give folder for automatic search - # - "...\\metadata_folder" + # - ".../meta_all_obj" # item2: - # - "...\\meta_name.csv" - # - "...\\different_name.csv" - # noise: - # - "...\\metadata_folder_for_noise_item" + # - ".../meta_obj1.csv" + # - ".../meta_ob2.csv" + # noise.wav: + # - ".../metadata_folder_for_noise_item" ### Select only a subset of items ### searches for the specified substring in found filenames; default = null # input_select: # - "48kHz" -### Horizontally concatenate input items into one long file; default = false -# concatenate_input: true -### Specify silence duration (ms) to add in between concatenated items; default = null -# concat_silence: - # pre: 0 - # post: 0 - ################################################ ### Input configuration ################################################ @@ -61,7 +57,7 @@ input: # fs: 32000 ################################################ -### Pre-processing +### Pre-processing on individual items ################################################ ### Pre-processing step performed prior to core processing for all conditions ### If not defined, preprocessing step is skipped @@ -69,56 +65,96 @@ preprocessing: ### Target format used in rendering from input format; default = null (no rendering) fmt: "7_1_4" ### Define mask (HP50 or 20KBP) for input signal filtering; default = null - # mask: "HP50" + mask: "HP50" ### Target sampling rate in Hz for resampling; default = null (no resampling) - fs: 16000 + fs: 32000 ### Target loudness in LKFS; default = null (no loudness change applied) loudness: -26 ### Spatial audio format in which loudness is adjusted (only used if preprocessing loudness is not null); ### default = null (uses preprocessing fmt if possible) - # loudness_fmt: STEREO - ### Pre-\\post-trim signal (ms) (negative values pad silence); default = 0 + loudness_fmt: "BINAURAL" + ### Pre-/post-trim individual signal(s) (ms) (negative values pad silence); default = 0 # trim: # - 50 # - -50 - ### Flag for using noise instead of silence for padding + ### Flag for using noise (amplitude +-4) instead of silence for padding; default = false (silence) # pad_noise: true ### Value for application of delay (ms) (negative values advance); default = 0 delay: 20 - ### Length of window used at start\\end of signal (ms); default = 0 + ### Length of window used at start/end of signal (ms); default = 0 window: 100 + +################################################ +### Pre-processing on whole signal(s) +################################################ +preprocessing_2: + ### Options for processing of the concatenated item (concatenate_input: true) or + ### the individual items (concatenate_input: false) after previous pre-processing step + ### Horizontally concatenate input items into one long file; default = false + concatenate_input: true + ### Specify the concatenation order in a list of strings. If not specified, the concatenation order would be + ### as per the filesystem on the users' device + ### Should only be used if concatenate_input = true + ### Specify the filename with extension. + ### For example, concatenation_order: ["file3.wav", "file1.wav", "file4.wav", "file2.wav"] + concatenation_order: ["spectral_test_2ch_48kHz.wav", "pink_noise_2ch_48kHz.wav"] + ### Specify preamble duration in ms; default = 0 + preamble: 10000 + ### Flag wheter to use noise (amplitude +-4) for the preamble or silence; default = false (silence) + preamble_noise: true + ### Additive background noise + # background_noise: + ### REQUIRED: SNR for background noise in dB + # snr: 10 + ### REQUIRED: Path to background noise, must have same format and sampling rate as input signal(s) + # background_noise_path: ".../noise.wav" + ### Seed for delay offest; default = 0 + # seed_delay: 10 ################################################# -### bitstream processing +### Bitstream processing ################################################# -### bistream processing (transport simulation) done after encoding and before decoding +### Bitstream processing (transport simulation) done after encoding and before decoding ### e.g. frame error insertion or transport simulation for JBM testing -# tx: - ### other options for generating error files,.... - ### maybe directly give something like fer and generate error_pottern and bs_proc_opts internally - # fer: - # error_rate: 5 ### in percent - ### Path to network simulation binary (mandatory) - # bs_proc_bin: C:\\local\\bay\\ivas\\VR\\src\\ivas_python_testscripts\\networkSimulator_g192.exe - ### Path to error pattern (mandatory if no information for generating the error pattern is given) - # error_pattern: C:\\local\\bay\\ivas\\VR\\src\\ivas_python_testscripts\\dly_error_profiles\\dly_error_profile_6.dat - ### options for the binary, possible placeholders are {error_pattern} for the error pattern, - ### {bitstream} for the bitstream to process and {bitstream_processed} for the processed bitstream - # bs_proc_opts: [ "{error_pattern}", "{bitstream}", "{processed_bitstream}", "{processed_bitstream}_tracefile_sim", "2", "0" ] - +### can be given globally here or in individual conditions of type ivas or evs +tx: + ### REQUIRED: Type of bitstream processing; possible types: "JBM" or "FER" + type: "JBM" + + ### JBM + ### REQUIRED: either error_pattern or error_profile + ### delay error profile file + # error_pattern: ".../dly_error_profile.dat" + ### Index of one of the existing delay error profile files to use (1-11) + error_profile: 5 + ## nFramesPerPacket parameter for the network simulator; default = 1 + n_frames_per_packet: 2 + + ### FER + ### REQUIRED: either error_pattern or error_rate + ### Frame error pattern file + # error_pattern: "path/pattern.192" + ### Error rate in percent + # error_rate: 5 + ### Additional seed to specify number of preruns; default = 0 + # prerun_seed: 2 + ################################################ ### Configuration for conditions under test ################################################ ### List of conditions to generate ### Name of the key will be used as output directory name ### conditions must specify the "type" key which may be one of the following options: -### ref generate the reference condition -### lp3k5 generate a low-pass anchor with cut-off frequency 3.5 kHz -### lp7k generate a low-pass anchor with cut-off frequency 7 kHz -### evs generate an EVS coded condition (see below examples for additional required keys) -### ivas generate an IVAS coded condition (see below examples for additional required keys) +### ref generate the reference condition +### lp3k5 generate a low-pass anchor with cut-off frequency 3.5 kHz +### lp7k generate a low-pass anchor with cut-off frequency 7 kHz +### mnru generate MNRU condition +### esdru generate ESDRU condition +### mono_dmx generate mono downmix condition +### evs generate an EVS coded condition (see below examples for additional required keys) +### ivas generate an IVAS coded condition (see below examples for additional required keys) conditions_to_generate: - ### Reference condition ########################## + ### Reference and anchor conditions ########################## c01: ### REQUIRED: type of condition type: ref @@ -126,21 +162,33 @@ conditions_to_generate: # out_fc: 22500 c02: ### REQUIRED: type of condition - type: lp3k5 + type: lp7k + c03: + ### REQUIRED: type of condition + type: mnru + ### REQUIRED: the ratio of speech power to modulated noise power in dB + q: 20 + c04: + ### REQUIRED: type of condition + type: esdru + ### REQUIRED: spatial degradation value between 0 and 1 + alpha: 0.5 + c05: + ### REQUIRED: type of condition + type: mono_dmx ### IVAS condition ############################### - c03: + c06: ### REQUIRED: type of condition type: ivas ### REQUIRED: Bitrates to use for coding bitrates: - 160000 - 32000 - # TODO bitstream corruption ### Encoder options cod: ### Path to encoder binary; default search for IVAS_cod in bin folder (primary) and PATH (secondary) - #bin: ~\\git\\ivas-codec\\IVAS_cod + #bin: ~/git/ivas-codec/IVAS_cod ### Encoder input sampling rate in Hz (resampling performed in case of mismatch); default = null (no resampling) # fs: 32000 ### Additional commandline options; default = null @@ -148,61 +196,73 @@ conditions_to_generate: ### Decoder options dec: ### Path to decoder binary; default search for IVAS_dec in bin folder (primary) and PATH (secondary) - #bin: ~\\git\\ivas-codec\\IVAS_dec + #bin: ~/git/ivas-codec/IVAS_dec ### Decoder output format; default = postprocessing fmt fmt: "5_1" ### Decoder output sampling rate; default = null (same as input) # fs: 48000 ### Additional commandline options; default = null # opts: ["-q", "-no_delay_cmp"] + ### Bitstream options + tx: + ### For possible arguments see overall bitstream modification + type: "FER" + error_rate: 3 + prerun_seed: 2 ### IVAS condition ############################### - #c04: + c07: ### REQUIRED: type of condition - #type: ivas + type: ivas ### REQUIRED: Bitrates to use for coding - #bitrates: - #- 160000 + bitrates: + - 160000 # - 32000 ### Encoder options - #cod: + cod: ### Path to encoder binary; default search for IVAS_cod in bin folder (primary) and PATH (secondary) - #bin: ~\\git\\ivas-codec\\IVAS_cod + #bin: ~/git/ivas-codec/IVAS_cod ### Encoder input sampling rate in Hz (resampling performed in case of mismatch); default = null (no resampling) # fs: 32000 ### Additional commandline options; default = null # opts: ["-q", "-dtx", 4] ### Decoder options - #dec: + dec: ### Path to decoder binary; default search for IVAS_dec in bin folder (primary) and PATH (secondary) - #bin: ~\\git\\ivas-codec\\IVAS_dec + #bin: ~/git/ivas-codec/IVAS_dec ### Decoder output format; default = postprocessing fmt - #fmt: CICP19 + fmt: "7_1_4" ### Decoder output sampling rate; default = null (same as input) # fs: 48000 ### Additional commandline options; default = null # opts: ["-q", "-no_delay_cmp"] + ### Bitstream options + # tx: + ### For possible arguments see overall bitstream modification ### EVS condition ################################ - c05: + c08: ### REQUIRED: type of condition type: evs ### REQUIRED: Bitrates to use for coding - ### For EVS mono, this may be a per-channel bitrate configuration (must match input\\preprocessing format!) + ### For EVS mono, this may be a per-channel bitrate configuration (must match input/preprocessing format!) ### the last value will be repeated if too few are specified bitrates: - - 9600 - #- [13200, 13200, 8000, 13200, 9600] + # - 9600 + - [13200, 13200, 8000, 13200, 9600] cod: ### Path to encoder binary; default search for EVS_cod in bin folder (primary) and PATH (secondary) - #bin: ~\\git\\ivas-codec\\IVAS_cod + #bin: EVS_cod ### Encoder input sampling rate in Hz (resampling performed in case of mismatch); default = null (no resampling) # fs: 32000 dec: ### Path to encoder binary; default search for EVS_dec in bin folder (primary) and PATH (secondary) - #bin: ~\\git\\ivas-codec\\IVAS_dec + #bin: EVS_dec ### Decoder output sampling rate; default = null (same as input) # fs: 48000 + ### Bitstream options + # tx: + ### For possible arguments see overall bitstream modification ################################################ ### Post-processing @@ -212,21 +272,21 @@ conditions_to_generate: postprocessing: ### REQUIRED: Target format for output fmt: "BINAURAL" - ### Target sampling rate in Hz for resampling; default = null (no resampling) + ### REQUIRED: Target sampling rate in Hz for resampling fs: 48000 ### Low-pass cut-off frequency in Hz; default = null (no filtering) # lp_cutoff: 24000 ### Target loudness in LKFS; default = null (no loudness change applied) - # loudness: -26 + loudness: -26 ### Spatial audio format in which loudness is adjusted (only used if preprocessing loudness is not null); ### default = null (uses postprocessing fmt if possible) # loudness_fmt: null ### Name of custom binaural dataset (without prefix or suffix); - ### default = null (ORANGE53 for BINAURAL, IISofficialMPEG222UC for BINAURAL_ROOM) + ### default = null (ORANGE53(_Dolby) for BINAURAL, IISofficialMPEG222UC for BINAURAL_ROOM) # bin_dataset: SADIE ### Render LFE to binaural output with the specified gain (only valid for channel-based input); default = null # bin_lfe_gain: 1 - ### Flag whether output should be limited to avoid clipping (can alter target loudness); default = true - # limit: false + ### Flag whether output should be limited to avoid clipping (can alter target loudness); default = false + # limit: true ### Head-tracking trajectory file for binaural output; default = null - # trajectory: path\\to\\file + # trajectory: "path/to/file" diff --git a/tests/data/test_SBA.yml b/tests/data/test_SBA.yml index 2fc2adfe..679e57c4 100644 --- a/tests/data/test_SBA.yml +++ b/tests/data/test_SBA.yml @@ -14,54 +14,50 @@ ### Deletion of temporary directories containing ### intermediate processing files, bitstreams etc.; default = false # delete_tmp: true +### Master seed for random processes like bitstream error pattern generation; default = 0 +master_seed: 5 ### Any relative paths will be interpreted relative to the working directory the script is called from! ### Usage of absolute paths is recommended. ### Do not use file names with dots "." in them! This is not supported, use "_" instead +### For Windows user: please use double back slash '\\' in paths and add '.exe' to executable definitions ### REQUIRED: Input path or file -input_path: "./tests/data/pinknoise/pink_noise_4ch_48kHz.wav" +input_path: "./tests/concatenation_folder/SBA" ### REQUIRED: Output path or file -output_path: "./tmp_output_SBA" +output_path: "./tests/tmp_output_SBA" ### Metadata path or file(s) ### If input format is ISM{1-4} a path for the metadata files can be specified; ### default = null (for ISM search for item_name.{wav, raw, pcm}.{0-3}.csv in input folder, otherise ignored) # metadata_path: ### Path can be set for all items with the 'all_items' key (automatic search for item_name.{wav, raw, pcm}.{0-3}.csv within this folder) - # all_items: "../path" + # all_items: ".../metadata_folder" ### Path can be set for all items individually with 'item{1-4}' keys - ### 'item{1-4}' keys can also be renamed to the input file names with extension + ### 'item{1-4}' keys can also be renamed to the input file names including extension {wav, raw, pcm} + ### Either list individual files for all objects or name folder for automatic search for one item # item1: - ### Either list individual files for all objects or give folder for automatic search - # - ".../metadata_folder" + # - ".../meta_all_obj" # item2: - # - ".../meta_name.csv" - # - ".../different_name.csv" - # noise: - # - ".../metadata_folder_for_noise_item" + # - ".../meta_obj1.csv" + # - ".../meta_ob2.csv" + # noise.wav: + # - ".../metadata_folder_for_noise_item" ### Select only a subset of items ### searches for the specified substring in found filenames; default = null # input_select: # - "48kHz" -### Horizontally concatenate input items into one long file; default = false -# concatenate_input: true -### Specify silence duration (ms) to add in between concatenated items; default = null -# concat_silence: - # pre: 0 - # post: 0 - ################################################ ### Input configuration ################################################ input: ### REQUIRED: Input format - fmt: FOA + fmt: "FOA" ### Input sampling rate in Hz needed for headerless audio files; default = 48000 # fs: 32000 ################################################ -### Pre-processing +### Pre-processing on individual items ################################################ ### Pre-processing step performed prior to core processing for all conditions ### If not defined, preprocessing step is skipped @@ -71,54 +67,94 @@ preprocessing: ### Define mask (HP50 or 20KBP) for input signal filtering; default = null mask: "HP50" ### Target sampling rate in Hz for resampling; default = null (no resampling) - # fs: 16000 + fs: 32000 ### Target loudness in LKFS; default = null (no loudness change applied) - # loudness: -26 + loudness: -26 ### Spatial audio format in which loudness is adjusted (only used if preprocessing loudness is not null); ### default = null (uses preprocessing fmt if possible) - # loudness_fmt: BINAURAL - ### Pre-/post-trim signal (ms) (negative values pad silence); default = 0 - trim: - - 50 - - -50 - ### Flag for using noise instead of silence for padding - pad_noise: true + loudness_fmt: "MONO" + ### Pre-/post-trim individual signal(s) (ms) (negative values pad silence); default = 0 + # trim: + # - 50 + # - -50 + ### Flag for using noise (amplitude +-4) instead of silence for padding; default = false (silence) + # pad_noise: true ### Value for application of delay (ms) (negative values advance); default = 0 - # delay: 20 + delay: 20 ### Length of window used at start/end of signal (ms); default = 0 window: 100 + +################################################ +### Pre-processing on whole signal(s) +################################################ +preprocessing_2: + ### Options for processing of the concatenated item (concatenate_input: true) or + ### the individual items (concatenate_input: false) after previous pre-processing step + ### Horizontally concatenate input items into one long file; default = false + concatenate_input: true + ### Specify the concatenation order in a list of strings. If not specified, the concatenation order would be + ### as per the filesystem on the users' device + ### Should only be used if concatenate_input = true + ### Specify the filename with extension. + ### For example, concatenation_order: ["file3.wav", "file1.wav", "file4.wav", "file2.wav"] + # concatenation_order: [] + ### Specify preamble duration in ms; default = 0 + preamble: 10000 + ### Flag wheter to use noise (amplitude +-4) for the preamble or silence; default = false (silence) + # preamble_noise: true + ### Additive background noise + # background_noise: + ### REQUIRED: SNR for background noise in dB + # snr: 10 + ### REQUIRED: Path to background noise, must have same format and sampling rate as input signal(s) + # background_noise_path: ".../noise.wav" + ### Seed for delay offest; default = 0 + # seed_delay: 10 ################################################# -### bitstream processing +### Bitstream processing ################################################# -### bistream processing (transport simulation) done after encoding and before decoding +### Bitstream processing (transport simulation) done after encoding and before decoding ### e.g. frame error insertion or transport simulation for JBM testing +### can be given globally here or in individual conditions of type ivas or evs # tx: - ### other options for generating error files,.... - ### maybe directly give something like fer and generate error_pottern and bs_proc_opts internally - # fer: - # error_rate: 5 ### in percent - ### Path to network simulation binary (mandatory) - # bs_proc_bin: C:\local\bay\ivas\VR\src\ivas_python_testscripts\networkSimulator_g192.exe - ### Path to error pattern (mandatory if no information for generating the error pattern is given) - # error_pattern: C:\local\bay\ivas\VR\src\ivas_python_testscripts\dly_error_profiles\dly_error_profile_6.dat - ### options for the binary, possible placeholders are {error_pattern} for the error pattern, - ### {bitstream} for the bitstream to process and {bitstream_processed} for the processed bitstream - # bs_proc_opts: [ "{error_pattern}", "{bitstream}", "{processed_bitstream}", "{processed_bitstream}_tracefile_sim", "2", "0" ] - + ### REQUIRED: Type of bitstream processing; possible types: "JBM" or "FER" + # type: "JBM" + + ### JBM + ### REQUIRED: either error_pattern or error_profile + ### delay error profile file + # error_pattern: ".../dly_error_profile.dat" + ### Index of one of the existing delay error profile files to use (1-11) + # error_profile: 5 + ## nFramesPerPacket parameter for the network simulator; default = 1 + # n_frames_per_packet: 2 + + ### FER + ### REQUIRED: either error_pattern or error_rate + ### Frame error pattern file + # error_pattern: "path/pattern.192" + ### Error rate in percent + # error_rate: 5 + ### Additional seed to specify number of preruns; default = 0 + # prerun_seed: 2 + ################################################ ### Configuration for conditions under test ################################################ ### List of conditions to generate ### Name of the key will be used as output directory name ### conditions must specify the "type" key which may be one of the following options: -### ref generate the reference condition -### lp3k5 generate a low-pass anchor with cut-off frequency 3.5 kHz -### lp7k generate a low-pass anchor with cut-off frequency 7 kHz -### evs generate an EVS coded condition (see below examples for additional required keys) -### ivas generate an IVAS coded condition (see below examples for additional required keys) +### ref generate the reference condition +### lp3k5 generate a low-pass anchor with cut-off frequency 3.5 kHz +### lp7k generate a low-pass anchor with cut-off frequency 7 kHz +### mnru generate MNRU condition +### esdru generate ESDRU condition +### mono_dmx generate mono downmix condition +### evs generate an EVS coded condition (see below examples for additional required keys) +### ivas generate an IVAS coded condition (see below examples for additional required keys) conditions_to_generate: - ### Reference condition ########################## + ### Reference and anchor conditions ########################## c01: ### REQUIRED: type of condition type: ref @@ -127,20 +163,32 @@ conditions_to_generate: c02: ### REQUIRED: type of condition type: lp3k5 + c03: + ### REQUIRED: type of condition + type: mnru + ### REQUIRED: the ratio of speech power to modulated noise power in dB + q: 20 + c04: + ### REQUIRED: type of condition + type: esdru + ### REQUIRED: spatial degradation value between 0 and 1 + alpha: 0.5 + c05: + ### REQUIRED: type of condition + type: mono_dmx ### IVAS condition ############################### - c03: + c06: ### REQUIRED: type of condition type: ivas ### REQUIRED: Bitrates to use for coding bitrates: - 160000 # - 32000 - # TODO bitstream corruption ### Encoder options cod: ### Path to encoder binary; default search for IVAS_cod in bin folder (primary) and PATH (secondary) - # bin: ~/git/ivas-codec/IVAS_cod + #bin: ~/git/ivas-codec/IVAS_cod ### Encoder input sampling rate in Hz (resampling performed in case of mismatch); default = null (no resampling) # fs: 32000 ### Additional commandline options; default = null @@ -148,16 +196,19 @@ conditions_to_generate: ### Decoder options dec: ### Path to decoder binary; default search for IVAS_dec in bin folder (primary) and PATH (secondary) - # bin: ~/git/ivas-codec/IVAS_dec + #bin: ~/git/ivas-codec/IVAS_dec ### Decoder output format; default = postprocessing fmt - fmt: FOA + fmt: "HOA3" ### Decoder output sampling rate; default = null (same as input) # fs: 48000 ### Additional commandline options; default = null # opts: ["-q", "-no_delay_cmp"] + ### Bitstream options + # tx: + ### For possible arguments see overall bitstream modification ### IVAS condition ############################### - c04: + c07: ### REQUIRED: type of condition type: ivas ### REQUIRED: Bitrates to use for coding @@ -167,7 +218,7 @@ conditions_to_generate: ### Encoder options cod: ### Path to encoder binary; default search for IVAS_cod in bin folder (primary) and PATH (secondary) - # bin: ~/git/ivas-codec/IVAS_cod + #bin: ~/git/ivas-codec/IVAS_cod ### Encoder input sampling rate in Hz (resampling performed in case of mismatch); default = null (no resampling) # fs: 32000 ### Additional commandline options; default = null @@ -175,16 +226,22 @@ conditions_to_generate: ### Decoder options dec: ### Path to decoder binary; default search for IVAS_dec in bin folder (primary) and PATH (secondary) - # bin: ~/git/ivas-codec/IVAS_dec + #bin: ~/git/ivas-codec/IVAS_dec ### Decoder output format; default = postprocessing fmt - fmt: CICP19 + fmt: "CICP19" ### Decoder output sampling rate; default = null (same as input) # fs: 48000 ### Additional commandline options; default = null # opts: ["-q", "-no_delay_cmp"] + ### Bitstream options + tx: + ### For possible arguments see overall bitstream modification + type: "FER" + error_rate: 3 + prerun_seed: 2 ### EVS condition ################################ - c05: + c08: ### REQUIRED: type of condition type: evs ### REQUIRED: Bitrates to use for coding @@ -195,14 +252,20 @@ conditions_to_generate: - [13200, 13200, 8000, 13200, 9600] cod: ### Path to encoder binary; default search for EVS_cod in bin folder (primary) and PATH (secondary) - # bin: ~/git/ivas-codec/IVAS_cod + #bin: EVS_cod ### Encoder input sampling rate in Hz (resampling performed in case of mismatch); default = null (no resampling) # fs: 32000 dec: ### Path to encoder binary; default search for EVS_dec in bin folder (primary) and PATH (secondary) - # bin: ~/git/ivas-codec/IVAS_dec + #bin: EVS_dec ### Decoder output sampling rate; default = null (same as input) # fs: 48000 + ### Bitstream options + tx: + ### For possible arguments see overall bitstream modification + type: "JBM" + error_profile: 3 + sba_format: "PLANARFOA" ################################################ ### Post-processing @@ -212,7 +275,7 @@ conditions_to_generate: postprocessing: ### REQUIRED: Target format for output fmt: "BINAURAL" - ### Target sampling rate in Hz for resampling; default = null (no resampling) + ### REQUIRED: Target sampling rate in Hz for resampling fs: 48000 ### Low-pass cut-off frequency in Hz; default = null (no filtering) # lp_cutoff: 24000 @@ -222,11 +285,11 @@ postprocessing: ### default = null (uses postprocessing fmt if possible) # loudness_fmt: null ### Name of custom binaural dataset (without prefix or suffix); - ### default = null (ORANGE53 for BINAURAL, IISofficialMPEG222UC for BINAURAL_ROOM) + ### default = null (ORANGE53(_Dolby) for BINAURAL, IISofficialMPEG222UC for BINAURAL_ROOM) # bin_dataset: SADIE ### Render LFE to binaural output with the specified gain (only valid for channel-based input); default = null # bin_lfe_gain: 1 - ### Flag whether output should be limited to avoid clipping (can alter target loudness); default = true - # limit: false + ### Flag whether output should be limited to avoid clipping (can alter target loudness); default = false + limit: true ### Head-tracking trajectory file for binaural output; default = null - # trajectory: path/to/file + # trajectory: "path/to/file" diff --git a/tests/test_binaries_present.py b/tests/test_binaries_present.py new file mode 100755 index 00000000..0ba0d4bb --- /dev/null +++ b/tests/test_binaries_present.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 + +# +# (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository. All Rights Reserved. +# +# This software is protected by copyright law and by international treaties. +# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository retain full ownership rights in their respective contributions in +# the software. This notice grants no license of any kind, including but not limited to patent +# license, nor is any license granted by implication, estoppel or otherwise. +# +# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making +# contributions. +# +# This software is provided "AS IS", without any express or implied warranties. The software is in the +# development stage. It is intended exclusively for experts who have experience with such software and +# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability +# and fitness for a particular purpose are hereby disclaimed and excluded. +# +# Any dispute, controversy or claim arising under or in relation to providing this software shall be +# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in +# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and +# the United Nations Convention on Contracts on the International Sales of Goods. +# + +import pytest + +from ivas_processing_scripts.utils import find_binary + +BINARIES = [ + "bs1770demo", + "p50fbmnru", + "esdru", + "eid-xor", + "gen-patt", + "filter", + "random", + "networkSimulator_g192", + "masaRenderer", +] + + +@pytest.mark.parametrize("binary", BINARIES) +def test_find_binary(binary): + assert find_binary(binary, raise_error=False) is not None diff --git a/tests/test_processing.py b/tests/test_processing.py index b0576bc0..edf9f031 100644 --- a/tests/test_processing.py +++ b/tests/test_processing.py @@ -30,9 +30,13 @@ # the United Nations Convention on Contracts on the International Sales of Goods. # +import shutil +from pathlib import Path + import pytest from ivas_processing_scripts import main as generate_test +from ivas_processing_scripts.processing.config import TestConfig from tests.constants import INPUT_CONFIG_FILES @@ -46,4 +50,35 @@ class Arguments: def test_generate_test_items(cfg): args = Arguments(cfg) + # read out input path + config = TestConfig(cfg) + input_path = Path(config.input_path).resolve().absolute() + + # deduce number of channels based on name + if "MC" in cfg: + num_channels = 2 # test stereo + elif "ISM" in cfg: + num_channels = 4 # test ISM4 + elif "SBA" in cfg: + num_channels = 4 # test FOA + else: + raise ValueError("Test setup missing") + + # create input folder for MC, SBA and ISM tests with concatenation + input_path.mkdir(exist_ok=True, parents=True) + + # copy items to folder -> pink noise and spectral test + pink_noise = ( + Path(f"./tests/data/pinknoise/pink_noise_{num_channels}ch_48kHz.wav") + .resolve() + .absolute() + ) + shutil.copy(pink_noise, input_path.joinpath(pink_noise.name)) + spectral = ( + Path(f"./tests/data/spectral/spectral_test_{num_channels}ch_48kHz.wav") + .resolve() + .absolute() + ) + shutil.copy(spectral, input_path.joinpath(spectral.name)) + generate_test(args) -- GitLab From d0bb519f8d28a9d1d35ec37a3c3bf495710aa5c6 Mon Sep 17 00:00:00 2001 From: veeravt Date: Wed, 10 May 2023 16:26:51 +0200 Subject: [PATCH 3/6] Adding support for the new binary. --- .../audiotools/wrappers/bs1770.py | 37 ++++++++++++++++--- .../processing/preprocessing_2.py | 4 +- 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/ivas_processing_scripts/audiotools/wrappers/bs1770.py b/ivas_processing_scripts/audiotools/wrappers/bs1770.py index 1cd9c753..a87d20ef 100755 --- a/ivas_processing_scripts/audiotools/wrappers/bs1770.py +++ b/ivas_processing_scripts/audiotools/wrappers/bs1770.py @@ -33,6 +33,7 @@ import copy import logging import re +import subprocess as sp from pathlib import Path from tempfile import TemporaryDirectory from typing import Optional, Tuple, Union @@ -53,6 +54,7 @@ logger.setLevel(logging.DEBUG) def bs1770demo( input: audio.Audio, target_loudness: Optional[float] = -26, + rms: Optional[bool] = False, ) -> Tuple[float, float]: """ Wrapper for ITU-R BS.1770-4, requires bs1770demo binary @@ -82,6 +84,16 @@ def bs1770demo( else: binary = find_binary("bs1770demo") + # checking if the new binary (with '-rms') is used + with TemporaryDirectory() as tmp_dir_test: + tmp_dir_test = Path(tmp_dir_test) + result = sp.run(str(binary), shell=True, stdout=sp.PIPE, stderr=sp.PIPE) + result_stdout = result.stdout.decode("utf-8") + if "-rms" not in result_stdout: + raise RuntimeError( + "An old version of bs1770demo binary was detected. Use the new one. See bin/README.md for details." + ) + if not isinstance(input, audio.BinauralAudio) and not isinstance( input, audio.ChannelBasedAudio ): @@ -131,6 +143,9 @@ def bs1770demo( # write temporary file write(tmp_file, tmp_sig, 48000) + # using rms if true + if rms: + cmd.insert(1, "-rms") # run command result = run(cmd, logger=logger) @@ -150,6 +165,7 @@ def get_loudness( input: audio.Audio, target_loudness: Optional[float] = -26, loudness_format: Optional[str] = None, + rms: Optional[bool] = False, ) -> Tuple[float, float]: """ Loudness measurement using ITU-R BS.1770-4 @@ -196,14 +212,17 @@ def get_loudness( convert.format_conversion(input, tmp) else: tmp.audio = input.audio - - return bs1770demo(tmp, target_loudness) + if rms: + return bs1770demo(tmp, target_loudness, rms=True) + else: + return bs1770demo(tmp, target_loudness) def loudness_norm( input: audio.Audio, target_loudness: Optional[float] = -26, loudness_format: Optional[str] = None, + rms: Optional[bool] = False, ) -> np.ndarray: """ Iterative loudness normalization using ITU-R BS.1770-4 @@ -231,9 +250,17 @@ def loudness_norm( num_iter = 1 while np.abs(measured_loudness - target_loudness) > 0.5 and num_iter < 10: - measured_loudness, scale_factor_new = get_loudness( - input, target_loudness, loudness_format - ) + if rms: + measured_loudness, scale_factor_new = get_loudness( + input, + target_loudness, + loudness_format, + rms=True, + ) + else: + measured_loudness, scale_factor_new = get_loudness( + input, target_loudness, loudness_format + ) # scale input input.audio *= scale_factor_new diff --git a/ivas_processing_scripts/processing/preprocessing_2.py b/ivas_processing_scripts/processing/preprocessing_2.py index 0bacc8ee..5d6c51a3 100644 --- a/ivas_processing_scripts/processing/preprocessing_2.py +++ b/ivas_processing_scripts/processing/preprocessing_2.py @@ -149,7 +149,9 @@ class Preprocessing2(Processing): )[: len(audio_object.audio)] # scale background noise to desired loudness based on output format - noise_object.audio = loudness_norm(noise_object, loudness_noise, out_format) + noise_object.audio = loudness_norm( + noise_object, loudness_noise, out_format, rms=True + ) # add array to signal audio_object.audio = noise_object.audio + audio_object.audio -- GitLab From fabe0d61dc7d3e1f3ecfb9f611cbef5d67a77871 Mon Sep 17 00:00:00 2001 From: veeravt Date: Wed, 10 May 2023 17:22:49 +0200 Subject: [PATCH 4/6] Remove redundant if-else --- .../audiotools/wrappers/bs1770.py | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/ivas_processing_scripts/audiotools/wrappers/bs1770.py b/ivas_processing_scripts/audiotools/wrappers/bs1770.py index a87d20ef..a6484563 100755 --- a/ivas_processing_scripts/audiotools/wrappers/bs1770.py +++ b/ivas_processing_scripts/audiotools/wrappers/bs1770.py @@ -250,17 +250,12 @@ def loudness_norm( num_iter = 1 while np.abs(measured_loudness - target_loudness) > 0.5 and num_iter < 10: - if rms: - measured_loudness, scale_factor_new = get_loudness( - input, - target_loudness, - loudness_format, - rms=True, - ) - else: - measured_loudness, scale_factor_new = get_loudness( - input, target_loudness, loudness_format - ) + measured_loudness, scale_factor_new = get_loudness( + input, + target_loudness, + loudness_format, + rms=rms, + ) # scale input input.audio *= scale_factor_new @@ -284,6 +279,7 @@ def scale_files( loudness: float, fs: Optional[int] = 48000, in_meta: Optional[list] = None, + rms: Optional[bool] = False, ) -> None: """ Scales audio files to desired loudness @@ -318,7 +314,10 @@ def scale_files( audio_obj = audio.fromfile(fmt, file, fs) # adjust loudness - scaled_audio = loudness_norm(audio_obj, loudness) + if rms: + scaled_audio = loudness_norm(audio_obj, loudness, rms=True) + else: + scaled_audio = loudness_norm(audio_obj, loudness) # write into file write(file, scaled_audio, audio_obj.fs) -- GitLab From 6270f6ee35c1e66af66e3636cc74667a7dba70aa Mon Sep 17 00:00:00 2001 From: Markus Multrus Date: Wed, 10 May 2023 17:41:21 +0200 Subject: [PATCH 5/6] rephrase error message --- ivas_processing_scripts/audiotools/wrappers/bs1770.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ivas_processing_scripts/audiotools/wrappers/bs1770.py b/ivas_processing_scripts/audiotools/wrappers/bs1770.py index a6484563..3a711f47 100755 --- a/ivas_processing_scripts/audiotools/wrappers/bs1770.py +++ b/ivas_processing_scripts/audiotools/wrappers/bs1770.py @@ -91,7 +91,7 @@ def bs1770demo( result_stdout = result.stdout.decode("utf-8") if "-rms" not in result_stdout: raise RuntimeError( - "An old version of bs1770demo binary was detected. Use the new one. See bin/README.md for details." + 'An bs1770demo executable without RMS support (cmdl option "-rms") was detected. Please update the bs1770demo executable. See bin/README.md for details.' ) if not isinstance(input, audio.BinauralAudio) and not isinstance( -- GitLab From e6cf789794df6e938870816ba40d3e955aa6bc8b Mon Sep 17 00:00:00 2001 From: veeravt Date: Thu, 11 May 2023 14:34:23 +0200 Subject: [PATCH 6/6] Removed the unused check_inf_and_nan function. --- .../audiotools/wrappers/bs1770.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/ivas_processing_scripts/audiotools/wrappers/bs1770.py b/ivas_processing_scripts/audiotools/wrappers/bs1770.py index 3a711f47..20bc86b4 100755 --- a/ivas_processing_scripts/audiotools/wrappers/bs1770.py +++ b/ivas_processing_scripts/audiotools/wrappers/bs1770.py @@ -321,20 +321,3 @@ def scale_files( # write into file write(file, scaled_audio, audio_obj.fs) - - -def check_for_nan_and_inf(input_string: str): - """ - Checks the input string for nan and inf - - Parameters - ---------- - input_string: str - Input string - """ - if "nan" in input_string.lower(): - raise ValueError("Too quiet.") - elif "inf" in input_string.lower(): - raise ValueError("All zeros.") - else: - return float(input_string) -- GitLab