Skip to content
---
################################################
# General configuration
################################################
name: P800-5
master_seed: 5
prerun_seed: 2
input_path: "experiments/selection/P800-5/proc_input"
output_path: "experiments/selection/P800-5/proc_output"
################################################
### Input configuration
################################################
input:
fmt: "FOA"
# TODO: to be clarified in Test Plan
fs: 48000
################################################
### Pre-processing on individual items
################################################
preprocessing:
mask: "HP50"
loudness: -26
loudness_fmt: "BINAURAL"
window: 100
################################################
### Pre-processing on whole signal(s)
################################################
preprocessing_2:
concatenate_input: true
# concatenation_order: []
preamble: 10000
preamble_noise: true
background_noise:
### REQUIRED: SNR for background noise in dB
snr: 15
### REQUIRED: Path to background noise, must have same format and sampling rate as input signal(s)
background_noise_path: "experiments/selection/P800-5/background_noise/background_noise.wav"
#################################################
### Bitstream processing
#################################################
################################################
### Configuration for conditions under test
################################################
conditions_to_generate:
### Reference and anchor conditions ##########################
c01:
type: ref
c02:
type: mnru
q: 28
c03:
type: mnru
q: 24
c04:
type: mnru
q: 20
c05:
type: mnru
q: 16
c06:
type: esdru
alpha: 0.7
c07:
type: esdru
alpha: 0.4
c08:
type: esdru
alpha: 0.1
### EVS condition ################################
c09:
type: evs
bitrates:
- 7200
cod:
opts: ["-max_band", "FB"]
sba_format: "PLANARFOA"
c10:
type: evs
bitrates:
- 7200
cod:
opts: ["-max_band", "FB"]
dec:
c11:
type: evs
bitrates:
- 8000
cod:
opts: ["-max_band", "FB"]
dec:
c12:
type: evs
bitrates:
- 9600
cod:
opts: ["-max_band", "FB"]
dec:
c13:
type: evs
bitrates:
- 13200
cod:
opts: ["-max_band", "FB"]
dec:
c14:
type: evs
bitrates:
- 16400
cod:
opts: ["-max_band", "FB"]
dec:
c15:
type: evs
bitrates:
- 24400
cod:
opts: ["-max_band", "FB"]
dec:
c16:
type: evs
bitrates:
- 32000
cod:
opts: ["-max_band", "FB"]
dec:
c17:
type: evs
bitrates:
- 7200
cod:
opts: ["-dtx", "-max_band", "FB"]
dec:
c18:
type: evs
bitrates:
- 8000
cod:
opts: ["-dtx", "-max_band", "FB"]
dec:
c19:
type: evs
bitrates:
- 9600
cod:
opts: ["-dtx", "-max_band", "FB"]
dec:
c20:
type: evs
bitrates:
- 13200
cod:
opts: ["-dtx", "-max_band", "FB"]
dec:
c21:
type: evs
bitrates:
- 16400
cod:
opts: ["-dtx", "-max_band", "FB"]
dec:
c22:
type: evs
bitrates:
- 24400
cod:
opts: ["-dtx", "-max_band", "FB"]
dec:
### IVAS condition ###############################
c23:
type: ivas
bitrates:
- 13200
cod:
dec:
fmt: "FOA"
c24:
type: ivas
bitrates:
- 16400
cod:
dec:
fmt: "FOA"
c25:
type: ivas
bitrates:
- 24400
cod:
dec:
fmt: "FOA"
c26:
type: ivas
bitrates:
- 32000
cod:
dec:
fmt: "FOA"
c27:
type: ivas
bitrates:
- 48000
cod:
dec:
fmt: "FOA"
c28:
type: ivas
bitrates:
- 64000
cod:
dec:
fmt: "FOA"
c29:
type: ivas
bitrates:
- 80000
cod:
dec:
fmt: "FOA"
c30:
type: ivas
bitrates:
- 96000
cod:
dec:
fmt: "FOA"
c31:
type: ivas
bitrates:
- 16400
cod:
opts: ["-dtx"]
dec:
fmt: "FOA"
c32:
type: ivas
bitrates:
- 24400
cod:
opts: ["-dtx"]
dec:
fmt: "FOA"
c33:
type: ivas
bitrates:
- 32000
cod:
opts: ["-dtx"]
dec:
fmt: "FOA"
c34:
type: ivas
bitrates:
- 48000
cod:
opts: ["-dtx"]
dec:
fmt: "FOA"
c35:
type: ivas
bitrates:
- 64000
cod:
opts: ["-dtx"]
dec:
fmt: "FOA"
c36:
type: ivas
bitrates:
- 80000
cod:
opts: ["-dtx"]
dec:
fmt: "FOA"
################################################
### Post-processing
################################################
postprocessing:
fmt: "BINAURAL"
fs: 48000
loudness: -26
---
################################################
# General configuration
################################################
name: P800-6
master_seed: 5
prerun_seed: 2
input_path: "experiments/selection/P800-6/proc_input"
output_path: "experiments/selection/P800-6/proc_output"
################################################
### Input configuration
################################################
input:
fmt: "ISM1"
# TODO: to be clarified in Test Plan
fs: 48000
################################################
### Pre-processing on individual items
################################################
preprocessing:
mask: "HP50"
loudness: -26
window: 100
################################################
### Pre-processing on whole signal(s)
################################################
preprocessing_2:
concatenate_input: true
# concatenation_order: []
preamble: 10000
preamble_noise: true
#################################################
### Bitstream processing
#################################################
################################################
### Configuration for conditions under test
################################################
conditions_to_generate:
### Reference and anchor conditions ##########################
c01:
type: ref
c02:
type: mnru
q: 15
c03:
type: mnru
q: 23
c04:
type: mnru
q: 31
c05:
type: mnru
q: 39
c06:
type: mnru
q: 47
c07:
type: esdru
alpha: 0.1
c08:
type: esdru
alpha: 0.3
c09:
type: esdru
alpha: 0.5
c10:
type: esdru
alpha: 0.7
### EVS condition ################################
c11:
type: evs
bitrates:
- 13200
cod:
opts: ["-max_band", "FB"]
dec:
c12:
type: evs
bitrates:
- 16400
cod:
opts: ["-max_band", "FB"]
dec:
c13:
type: evs
bitrates:
- 24400
cod:
opts: ["-max_band", "FB"]
dec:
c14:
type: evs
bitrates:
- 32000
cod:
opts: ["-max_band", "FB"]
dec:
c15:
type: evs
bitrates:
- 13200
cod:
opts: ["-max_band", "FB"]
dec:
tx:
type: "FER"
error_rate: 5
c16:
type: evs
bitrates:
- 16400
cod:
opts: ["-max_band", "FB"]
dec:
tx:
type: "FER"
error_rate: 5
c17:
type: evs
bitrates:
- 24400
cod:
opts: ["-max_band", "FB"]
dec:
tx:
type: "FER"
error_rate: 5
c18:
type: evs
bitrates:
- 13200
cod:
opts: ["-dtx", "-max_band", "FB"]
dec:
c19:
type: evs
bitrates:
- 16400
cod:
opts: ["-dtx", "-max_band", "FB"]
dec:
c20:
type: evs
bitrates:
- 24400
cod:
opts: ["-dtx", "-max_band", "FB"]
dec:
### IVAS condition ###############################
c21:
type: ivas
bitrates:
- 13200
cod:
dec:
c22:
type: ivas
bitrates:
- 16400
cod:
dec:
c23:
type: ivas
bitrates:
- 24400
cod:
dec:
c24:
type: ivas
bitrates:
- 32000
cod:
dec:
c25:
type: ivas
bitrates:
- 13200
cod:
dec:
tx:
type: "FER"
error_rate: 5
c26:
type: ivas
bitrates:
- 16400
cod:
dec:
tx:
type: "FER"
error_rate: 5
c27:
type: ivas
bitrates:
- 24400
cod:
dec:
tx:
type: "FER"
error_rate: 5
c28:
type: ivas
bitrates:
- 13200
cod:
opts: ["-dtx"]
dec:
c29:
type: ivas
bitrates:
- 16400
cod:
opts: ["-dtx"]
dec:
c30:
type: ivas
bitrates:
- 24400
cod:
opts: ["-dtx"]
dec:
################################################
### Post-processing
################################################
postprocessing:
fmt: "BINAURAL"
fs: 48000
loudness: -26
---
################################################
# General configuration
################################################
name: P800-7
master_seed: 5
prerun_seed: 2
input_path: "experiments/selection/P800-7/proc_input"
output_path: "experiments/selection/P800-7/proc_output"
################################################
### Input configuration
################################################
input:
fmt: "ISM2"
# TODO: to be clarified in Test Plan
fs: 48000
################################################
### Pre-processing on individual items
################################################
preprocessing:
mask: "HP50"
loudness: -26
window: 100
################################################
### Pre-processing on whole signal(s)
################################################
preprocessing_2:
concatenate_input: true
# concatenation_order: []
preamble: 10000
preamble_noise: true
#################################################
### Bitstream processing
#################################################
################################################
### Configuration for conditions under test
################################################
conditions_to_generate:
### Reference and anchor conditions ##########################
c01:
type: ref
c02:
type: mnru
q: 15
c03:
type: mnru
q: 23
c04:
type: mnru
q: 31
c05:
type: mnru
q: 39
c06:
type: mnru
q: 47
c07:
type: esdru
alpha: 0.1
c08:
type: esdru
alpha: 0.3
c09:
type: esdru
alpha: 0.5
c10:
type: esdru
alpha: 0.7
### EVS condition ################################
c11:
type: evs
bitrates:
- 8000
cod:
opts: ["-max_band", "FB"]
dec:
c12:
type: evs
bitrates:
- 13200
cod:
opts: ["-max_band", "FB"]
dec:
c13:
type: evs
bitrates:
- 16400
cod:
opts: ["-max_band", "FB"]
dec:
c14:
type: evs
bitrates:
- 24400
cod:
opts: ["-max_band", "FB"]
dec:
c15:
type: evs
bitrates:
- 8000
cod:
opts: ["-max_band", "FB"]
dec:
tx:
type: "FER"
error_rate: 5
c16:
type: evs
bitrates:
- 13200
cod:
opts: ["-max_band", "FB"]
dec:
tx:
type: "FER"
error_rate: 5
c17:
type: evs
bitrates:
- 16400
cod:
opts: ["-max_band", "FB"]
dec:
tx:
type: "FER"
error_rate: 5
c18:
type: evs
bitrates:
- 24400
cod:
opts: ["-dtx", "-max_band", "FB"]
dec:
c19:
type: evs
bitrates:
- 8000
cod:
opts: ["-dtx", "-max_band", "FB"]
dec:
c20:
type: evs
bitrates:
- 13200
cod:
opts: ["-dtx", "-max_band", "FB"]
dec:
c21:
type: evs
bitrates:
- 16400
cod:
opts: ["-dtx", "-max_band", "FB"]
dec:
c22:
type: evs
bitrates:
- 24400
cod:
opts: ["-dtx", "-max_band", "FB"]
dec:
### IVAS condition ###############################
c23:
type: ivas
bitrates:
- 16400
cod:
dec:
c24:
type: ivas
bitrates:
- 24400
cod:
dec:
c25:
type: ivas
bitrates:
- 32000
cod:
dec:
c26:
type: ivas
bitrates:
- 48000
cod:
dec:
c27:
type: ivas
bitrates:
- 16400
cod:
dec:
tx:
type: "FER"
error_rate: 5
c28:
type: ivas
bitrates:
- 24400
cod:
dec:
tx:
type: "FER"
error_rate: 5
c29:
type: ivas
bitrates:
- 32000
cod:
dec:
tx:
type: "FER"
error_rate: 5
c30:
type: ivas
bitrates:
- 48000
cod:
dec:
tx:
type: "FER"
error_rate: 5
c31:
type: ivas
bitrates:
- 16400
cod:
opts: ["-dtx"]
dec:
c32:
type: ivas
bitrates:
- 24400
cod:
opts: ["-dtx"]
dec:
c33:
type: ivas
bitrates:
- 32000
cod:
opts: ["-dtx"]
dec:
c34:
type: ivas
bitrates:
- 48000
cod:
opts: ["-dtx"]
dec:
################################################
### Post-processing
################################################
postprocessing:
fmt: "BINAURAL"
fs: 48000
loudness: -26
......@@ -71,7 +71,7 @@ def logging_init(args, cfg):
cfg.output_path.joinpath(f"{cfg.name}{LOGGER_SUFFIX}"), mode="w"
)
file_handler.setFormatter(logging.Formatter(LOGGER_FORMAT, datefmt=LOGGER_DATEFMT))
file_handler.setLevel(logging.DEBUG if args.debug else logging.INFO)
file_handler.setLevel(logging.DEBUG)
logger.addHandler(file_handler)
logger.info(f"Processing test configuration file {args.config}")
......@@ -146,6 +146,7 @@ def main(args):
hasattr(cfg, "preprocessing")
and hasattr(cfg.pre2, "background_noise")
and cfg.pre2.background_noise is not None
and cfg.pre2.background_noise.get("background_noise_path")
):
preprocess_background_noise(cfg)
# preprocess 2
......
......@@ -308,7 +308,8 @@ class ObjectBasedAudio(Audio):
if pos.shape[1] < 5:
raise ValueError("Metadata incomplete. Columns are missing.")
elif pos.shape[1] > 5:
if pos.shape[1] == 7:
if pos.shape[1] <= 8:
# TODO: FIXME
pos = pos[:, :5]
else:
raise ValueError(
......
......@@ -52,6 +52,7 @@ def trim(
limits: Optional[Tuple[int, int]] = None,
pad_noise: Optional[bool] = False,
samples: Optional[bool] = False,
seed: Optional[int] = None,
) -> np.ndarray:
"""
Trim an audio array
......@@ -88,6 +89,9 @@ def trim(
if pre_trim < 0:
if pad_noise:
# pad with uniformly distributed noise between -4 and 4
if seed:
np.random.seed(seed)
else:
np.random.seed(SEED_PADDING)
noise = np.random.randint(
low=-4, high=5, size=(np.abs(pre_trim), np.shape(x)[1])
......@@ -101,6 +105,9 @@ def trim(
if post_trim < 0:
if pad_noise:
# pad with uniformly distributed noise between -4 and 4
if seed:
np.random.seed(seed)
else:
np.random.seed(SEED_PADDING)
noise = np.random.randint(
low=-4, high=5, size=(np.abs(post_trim), np.shape(x)[1])
......
......@@ -253,7 +253,7 @@ def process_audio(
logger.debug(
f"Applying loudness adjustment to {loudness} LKFS for format {loudness_fmt} using ITU STL bs1770demo"
)
x.audio = loudness_norm(x, loudness, loudness_fmt)
x.audio = loudness_norm(x, loudness, loudness_fmt, logger=logger)
"""low-pass filtering"""
if fc is not None:
......
......@@ -47,9 +47,6 @@ from ivas_processing_scripts.audiotools.wrappers.filter import resample_itu
from ivas_processing_scripts.constants import DEFAULT_CONFIG_BINARIES
from ivas_processing_scripts.utils import find_binary, get_devnull, run
logger = logging.getLogger("__main__")
logger.setLevel(logging.DEBUG)
def bs1770demo(
input: audio.Audio,
......@@ -65,6 +62,8 @@ def bs1770demo(
Input audio
target_loudness: Optional[float]
Desired loudness in LKFS
rms: Optional[bool]
Flag for using rms argument in bs1770demo tool (for low level signals)
Returns
-------
......@@ -85,8 +84,6 @@ def bs1770demo(
binary = find_binary("bs1770demo")
# checking if the new binary (with '-rms') is used
with TemporaryDirectory() as tmp_dir_test:
tmp_dir_test = Path(tmp_dir_test)
result = sp.run(str(binary), shell=True, stdout=sp.PIPE, stderr=sp.PIPE)
result_stdout = result.stdout.decode("utf-8")
if "-rms" not in result_stdout:
......@@ -146,8 +143,9 @@ def bs1770demo(
# using rms if true
if rms:
cmd.insert(1, "-rms")
# run command
result = run(cmd, logger=logger)
result = run(cmd)
# parse output
# we are looking for the (floating-point) number after the search string - '( )' around the number denotes the first group
......@@ -174,10 +172,12 @@ def get_loudness(
----------
input : Audio
Input audio
target_loudness: float
target_loudness: Optional[float]
Desired loudness in LKFS
loudness_format: str
loudness_format: Optional[str]
Loudness format to render to for loudness computation (default input format if possible)
rms: Optional[bool]
Flag for using rms argument in bs1770demo tool (for low level signals)
Returns
-------
......@@ -212,10 +212,7 @@ def get_loudness(
convert.format_conversion(input, tmp)
else:
tmp.audio = input.audio
if rms:
return bs1770demo(tmp, target_loudness, rms=True)
else:
return bs1770demo(tmp, target_loudness)
return bs1770demo(tmp, target_loudness, rms)
def loudness_norm(
......@@ -223,6 +220,8 @@ def loudness_norm(
target_loudness: Optional[float] = -26,
loudness_format: Optional[str] = None,
rms: Optional[bool] = False,
logger: Optional[logging.Logger] = None,
file_name_logging: Optional[Union[str, Path]] = None,
) -> np.ndarray:
"""
Iterative loudness normalization using ITU-R BS.1770-4
......@@ -237,6 +236,12 @@ def loudness_norm(
Desired loudness level in LKFS
loudness_format: Optional[str]
Loudness format to render to for loudness computation (default input format)
rms: Optional[bool]
Flag for using rms argument in bs1770demo tool (for low level signals)
logger: Optional[logging.Logger]
Logger to log loudness information
file_name_logging: Optional[Union[str, Path]]
Name of processed file for logging information
Returns
-------
......@@ -244,42 +249,56 @@ def loudness_norm(
Normalized audio
"""
# repeat until convergence of loudness
measured_loudness = np.inf
scale_factor = 1
num_iter = 1
scaled_input = copy.deepcopy(input)
while np.abs(measured_loudness - target_loudness) > 0.5 and num_iter < 10:
measured_loudness, scale_factor_new = get_loudness(
input,
target_loudness,
loudness_format,
rms=rms,
# save loudness before and after scaling for the logger info
loudness_before, scale_factor_new = get_loudness(
scaled_input, target_loudness, loudness_format, rms
)
# repeat until convergence of loudness
while np.abs(measured_loudness - target_loudness) > 0.5 and num_iter < 10:
# scale input
input.audio *= scale_factor_new
scaled_input.audio *= scale_factor_new
# update scale factor
scale_factor *= scale_factor_new
# measure loudness and get scaling factor
measured_loudness, scale_factor_new = get_loudness(
scaled_input, target_loudness, loudness_format, rms
)
num_iter += 1
loudness_after = measured_loudness
# log loudness before and after adjustment
if logger:
if file_name_logging:
logger.debug(
f"File {file_name_logging} loudness; before: {loudness_before}, after: {loudness_after}"
)
else:
logger.debug(
f"Loudness; before: {loudness_before}, after: {loudness_after}"
)
if num_iter >= 10:
warn(
f"Loudness did not converge to desired value, stopping at: {measured_loudness:.2f}"
f"Loudness did not converge to desired value, stopping at: {loudness_after:.2f}"
)
return input.audio
return scaled_input.audio
def scale_files(
file_list: list[list[Union[Path, str]]],
fmt: str,
loudness: float,
loudness_format: Optional[str] = None,
fs: Optional[int] = 48000,
in_meta: Optional[list] = None,
rms: Optional[bool] = False,
logger: Optional[logging.Logger] = None,
) -> None:
"""
Scales audio files to desired loudness
......@@ -292,15 +311,22 @@ def scale_files(
Audio format of files in list
loudness: float
Desired loudness level in LKFS/dBov
loudness_format: Optional[str]
Format for loudness measurement
fs: Optional[int]
Sampling rate
in_meta: Optional[list]
Metadata for ISM with same structure as file_list but one layer more
for the list of metadata for one file
logger: Optional[logging.Logger]
Logger to log loudness information
"""
if fmt.startswith("ISM") and in_meta:
if fmt.startswith("ISM"):
if in_meta:
meta_bool = True
else:
raise ValueError("No metadata available for loudness measurement")
else:
in_meta = copy.copy(file_list)
meta_bool = False
......@@ -314,10 +340,13 @@ def scale_files(
audio_obj = audio.fromfile(fmt, file, fs)
# adjust loudness
if rms:
scaled_audio = loudness_norm(audio_obj, loudness, rms=True)
else:
scaled_audio = loudness_norm(audio_obj, loudness)
scaled_audio = loudness_norm(
audio_obj,
loudness,
loudness_format,
logger=logger,
file_name_logging=file,
)
# write into file
write(file, scaled_audio, audio_obj.fs)
......@@ -3,7 +3,7 @@ Necessary additional executables:
| Processing step | Executable | Where to find |
|-------------------------------------------------|-----------------------|-------------------------------------------------------------------------------------------------------------|
| Loudness measurement and adjustment | bs1770demo | https://github.com/openitu/STL |
| Loudness measurement and adjustment | bs1770demo | https://github.com/ErikNorvell-Ericsson/STL (Note branch) |
| MNRU | p50fbmnru | https://github.com/openitu/STL |
| ESDRU | esdru | https://github.com/openitu/STL |
| Frame error pattern application | eid-xor | https://github.com/openitu/STL |
......
......@@ -30,7 +30,6 @@
# the United Nations Convention on Contracts on the International Sales of Goods.
#
from pathlib import Path
from typing import Optional
from warnings import warn
......@@ -40,7 +39,7 @@ from ivas_processing_scripts.processing.ivas import IVAS
from ivas_processing_scripts.processing.postprocessing import Postprocessing
from ivas_processing_scripts.processing.preprocessing import Preprocessing
from ivas_processing_scripts.processing.preprocessing_2 import Preprocessing2
from ivas_processing_scripts.utils import list_audio
from ivas_processing_scripts.utils import get_abs_path, list_audio
def init_processing_chains(cfg: TestConfig) -> None:
......@@ -146,6 +145,8 @@ def get_preprocessing_2(cfg: TestConfig) -> dict:
}
pre2_cfg = cfg.preprocessing_2
# set up background noise
background_cfg = pre2_cfg.get("background_noise", None)
if background_cfg:
background = {
......@@ -153,6 +154,7 @@ def get_preprocessing_2(cfg: TestConfig) -> dict:
"background_noise_path": get_abs_path(
background_cfg.get("background_noise_path", None)
),
"low_level_noise": background_cfg.get("low_level_noise", False),
"seed_delay": cfg.prerun_seed,
"master_seed": cfg.master_seed,
"output_fmt": cfg.postprocessing["fmt"],
......@@ -209,6 +211,7 @@ def get_processing_chain(
# get pre and post processing configurations
pre_cfg = getattr(cfg, "preprocessing", {})
pre2_cfg = getattr(cfg, "preprocessing_2", {})
post_cfg = cfg.postprocessing
# default to input values if preprocessing was not requested
......@@ -400,6 +403,14 @@ def get_processing_chain(
raise SystemExit(f"Unknown condition {condition}!")
# add postprocessing step based on condition
# if concatenation and splitting do loudness adjustment only on splitted files
if pre2_cfg.get("concatenate_input", False):
loudness_postprocessing = None
loudness_fmt_postprocessing = None
else:
loudness_postprocessing = post_cfg.get("loudness")
loudness_fmt_postprocessing = post_cfg.get("loudness_fmt")
chain["processes"].append(
Postprocessing(
{
......@@ -408,8 +419,8 @@ def get_processing_chain(
"out_fs": post_cfg.get("fs"),
"out_fmt": post_cfg.get("fmt"),
"out_cutoff": tmp_lp_cutoff,
"out_loudness": post_cfg.get("loudness"),
"out_loudness_fmt": post_cfg.get("loudness_fmt"),
"out_loudness": loudness_postprocessing,
"out_loudness_fmt": loudness_fmt_postprocessing,
"bin_dataset": post_cfg.get("bin_dataset"),
"bin_lfe_gain": post_cfg.get("bin_lfe_gain"),
"limit": post_cfg.get("limit", True),
......@@ -422,11 +433,3 @@ def get_processing_chain(
)
return chain
def get_abs_path(rel_path):
if rel_path is not None:
abs_path = Path(rel_path).resolve().absolute()
else:
abs_path = None
return abs_path
......@@ -46,6 +46,7 @@ from ivas_processing_scripts.constants import (
REQUIRED_KEYS_MNRU,
SUPPORTED_CONDITIONS,
)
from ivas_processing_scripts.utils import get_abs_path
def merge_dicts(base: dict, other: dict) -> None:
......@@ -139,14 +140,34 @@ class TestConfig:
raise KeyError(f"The following key(s) must be specified : {MISSING_KEYS}")
# validate preprocessing on concatenated file stage
if (pre_proc_2 := getattr(cfg, "preprocessing_2", None)) is not None:
bg_noise_folder = Path(pre_proc_2["background_noise_path"]).parent
if (
bg_noise_folder.resolve().absolute()
== cfg.input_path.resolve().absolute()
if (pre_proc_2 := cfg.get("preprocessing_2", None)) is not None and (
bg_noise := pre_proc_2.get("background_noise", None)
) is not None:
# check if low level flag or path is given
if not bg_noise.get("background_noise_path", None) and not bg_noise.get(
"low_level_noise", False
):
raise ValueError(
f"Background noise file has to be placed outside the input folder!"
"Path to prerecorded noise or low level noise flag has to be provided for background noise"
)
if bg_noise.get("background_noise_path", None) and bg_noise.get(
"low_level_noise", False
):
raise ValueError(
"Only prerecorded or low level background noise possible, not both"
)
if bg_noise.get("background_noise_path", None):
# check snr
if not bg_noise.get("snr", None):
raise ValueError("SNR has to be specified for background noise")
# check if path of background noise is in input folder
bg_noise_folder = Path(bg_noise["background_noise_path"]).parent
if get_abs_path(bg_noise_folder) == get_abs_path(cfg["input_path"]):
raise ValueError(
"Background noise file has to be placed outside the input folder!"
)
for cond_name, cond_cfg in cfg.get("conditions_to_generate").items():
......
......@@ -32,6 +32,7 @@
import logging
from pathlib import Path
from warnings import warn
import numpy as np
......@@ -66,6 +67,7 @@ class Preprocessing2(Processing):
# add preamble
if self.preamble:
logger.debug(f"Add preamble of length {self.preamble}ms")
# also apply preamble to ISM metadata
if self.in_fmt.startswith("ISM"):
# read out old
......@@ -91,17 +93,29 @@ class Preprocessing2(Processing):
# add background noise
if self.background_noise:
audio_object.audio = self.add_background_noise(audio_object, in_meta)
logger.debug(
f"Add background noise from file {self.background_noise.get('background_noise_path', 'file missing')} and SNR {self.background_noise.get('snr', 'snr missing')}"
)
audio_object.audio = self.add_background_noise(
audio_object, in_meta, logger
)
# save file
write(out_file, audio_object.audio, fs=audio_object.fs)
return
def add_background_noise(self, audio_object: audio.Audio, in_meta) -> np.ndarray:
def add_background_noise(
self, audio_object: audio.Audio, in_meta, logger
) -> np.ndarray:
# range for random delay
range_delay = (1, 2400000)
max_delay = int(2400000 * audio_object.fs / 48000)
if self.background_noise.get("background_noise_path"):
if not self.background_noise.get("background_noise_path").exists():
raise ValueError(
f"Background noise path {self.background_noise.get('background_noise_path')} does not exist"
)
# load background noise
if self.background_noise["background_object"] is not None:
noise_object = self.background_noise["background_object"]
......@@ -116,7 +130,7 @@ class Preprocessing2(Processing):
# if noise is too short raise error
if len(noise_object.audio) < len(audio_object.audio):
raise ValueError("Background noise too short for audio signal")
if len(noise_object.audio) - range_delay[1] < len(audio_object.audio):
if len(noise_object.audio) - max_delay < len(audio_object.audio):
raise ValueError(
"Background noise may be to short for audio signal when considering the random delay"
)
......@@ -133,13 +147,14 @@ class Preprocessing2(Processing):
out_format = self.out_fmt
loudness_signal, _ = get_loudness(audio_object, loudness_format=out_format)
logger.debug(f"Loudness of audio signal: {loudness_signal}LKFS")
# compute desired loudness of background noise
loudness_noise = loudness_signal - self.background_noise["snr"]
# apply random delay and cut signal
rand_delay = random_seed(
range=range_delay,
range=(1, max_delay),
master_seed=self.background_noise["master_seed"],
prerun_seed=self.background_noise["seed_delay"],
hexa=False,
......@@ -149,8 +164,30 @@ class Preprocessing2(Processing):
)[: len(audio_object.audio)]
# scale background noise to desired loudness based on output format
logger.debug(
f"Scaling of background noise to {self.background_noise['snr']}dB SNR"
)
noise_object.audio = loudness_norm(
noise_object, loudness_noise, out_format, rms=True
noise_object,
loudness_noise,
out_format,
rms=True,
logger=logger,
)
elif self.background_noise.get("low_level_noise"):
# use low level noise instead of prerecoded background noise
if self.background_noise["snr"]:
warn("SNR will be ignored for low level background noise")
noise_array = trim(
np.zeros((0, audio_object.num_channels)),
samples=True,
limits=(0, -len(audio_object.audio)),
pad_noise=True,
seed=self.background_noise["master_seed"],
)
noise_object = audio.fromarray(
x=noise_array, fs=self.in_fs, fmt=self.in_fmt
)
# add array to signal
......
......@@ -325,6 +325,7 @@ def preprocess_2(cfg, logger):
def reverse_process_2(cfg, logger):
# remove preamble
if cfg.pre2.preamble:
logger.info("Remove preamble")
remove_preamble(cfg)
# reverse concatenation
......@@ -355,8 +356,10 @@ def reverse_process_2(cfg, logger):
out_paths_splits,
cfg.postprocessing["fmt"],
cfg.postprocessing["loudness"],
cfg.postprocessing.get("loudness_fmt", None),
cfg.postprocessing["fs"],
out_meta_splits,
logger,
)
return
......
......@@ -290,3 +290,11 @@ def get_binary_paths(yaml_file_with_binary_paths):
return {}
else:
return {key: Path(value) for key, value in data.items()}
def get_abs_path(rel_path):
if rel_path is not None:
abs_path = Path(rel_path).resolve().absolute()
else:
abs_path = None
return abs_path
......@@ -35,6 +35,7 @@ from pathlib import PurePath
""" Set up paths """
TESTS_DIR = PurePath(__file__).parent
TEST_VECTOR_DIR = TESTS_DIR.joinpath("data")
EXPERIMENTS_DIR = "../experiments/selection"
ISM_METADATA_DIR = TEST_VECTOR_DIR.joinpath("ism_metadata")
MASA_METADATA_DIR = TEST_VECTOR_DIR.joinpath("masa")
......@@ -200,3 +201,29 @@ INPUT_CONFIG_FILES = [
str(TEST_VECTOR_DIR.joinpath("test_MC.yml")),
str(TEST_VECTOR_DIR.joinpath("test_SBA.yml")),
]
INPUT_EXPERIMENT_NAMES = [
"BS1534-1a",
"BS1534-1b",
"BS1534-2a",
"BS1534-2b",
"BS1534-3a",
"BS1534-3b",
"BS1534-4a",
"BS1534-4b",
"BS1534-5a",
"BS1534-5b",
"BS1534-6a",
"BS1534-6b",
# "BS1534-7a",
# "BS1534-7b",
"P800-1",
"P800-2",
"P800-3",
"P800-4",
"P800-5",
"P800-6",
"P800-7",
# "P800-8",
# "P800-9",
]
#!/usr/bin/env python3
#
# (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
# contributors to this repository. All Rights Reserved.
#
# This software is protected by copyright law and by international treaties.
# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
# contributors to this repository retain full ownership rights in their respective contributions in
# the software. This notice grants no license of any kind, including but not limited to patent
# license, nor is any license granted by implication, estoppel or otherwise.
#
# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
# contributions.
#
# This software is provided "AS IS", without any express or implied warranties. The software is in the
# development stage. It is intended exclusively for experts who have experience with such software and
# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
# and fitness for a particular purpose are hereby disclaimed and excluded.
#
# Any dispute, controversy or claim arising under or in relation to providing this software shall be
# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
# the United Nations Convention on Contracts on the International Sales of Goods.
#
import shutil
from pathlib import Path
import pytest
from ivas_processing_scripts import main as generate_test
from ivas_processing_scripts.audiotools import audio
from ivas_processing_scripts.audiotools.audiofile import concat
from ivas_processing_scripts.processing.config import TestConfig
from tests.constants import (
EXPERIMENTS_DIR,
FORMAT_TO_METADATA_FILES,
INPUT_EXPERIMENT_NAMES,
NCHAN_TO_FILE,
TESTS_DIR,
)
BG_NOISE_FOR_EXPERIMENT = {
"P800-2": Path(TESTS_DIR).joinpath("data").joinpath("p800-2_background_noise.wav"),
"P800-5": Path(TESTS_DIR).joinpath("data").joinpath("p800-5_background_noise.wav"),
}
BG_NOISE_NAME = "background_noise.wav"
class Arguments:
def __init__(self, config):
self.config = config
self.debug = True
@pytest.mark.parametrize("exp_name", INPUT_EXPERIMENT_NAMES)
def test_generate_test_items(exp_name):
cfg_dir = Path(TESTS_DIR).joinpath(EXPERIMENTS_DIR)
cfg = Path(cfg_dir).joinpath(Path(exp_name)).resolve().absolute()
cfg = cfg.joinpath(f"config/{exp_name}.yml")
args = Arguments(cfg)
# read out input path
config = TestConfig(cfg)
input_path = Path(config.input_path).resolve().absolute()
input_fmt = config.input["fmt"]
# deduce number of channels based on input format
input_object = audio.fromtype(input_fmt)
num_channels = input_object.num_channels
# check if folder exists
if not input_path.exists():
raise ValueError("Input path from experiments config does not exist")
# get dummy input files:
spectral_dummy = NCHAN_TO_FILE[num_channels]
pink_noise_dummy = Path(
str(spectral_dummy).replace("spectral/spectral_test", "pinknoise/pink_noise")
)
dummy_input_files = [spectral_dummy, pink_noise_dummy]
# get dummy metadata files
dummy_md_files = FORMAT_TO_METADATA_FILES.get(input_fmt, list())
# copy input files
for f in dummy_input_files:
f_out = input_path.joinpath(f.name).resolve().absolute()
# need at least 2s of input files for gen-patt to be happy (can not keep the tolerance for 50 frames only)
concat([str(f)] * 2, str(f_out))
for i, md_f in enumerate(dummy_md_files):
suffix = f"{i}.csv"
md_f_out = ".".join([str(f_out), suffix])
shutil.copy(md_f, md_f_out)
# copy background noise if needed
if (bg_noise := BG_NOISE_FOR_EXPERIMENT.get(exp_name)) is not None:
bg_noise_folder = input_path.parent.joinpath("background_noise")
bg_noise_out = bg_noise_folder.joinpath(BG_NOISE_NAME)
shutil.copy(bg_noise, bg_noise_out)
generate_test(args)