Skip to content
---
################################################
# General configuration
################################################
name: P800-5
master_seed: 5
prerun_seed: 2
input_path: "experiments/selection/P800-5/proc_input"
output_path: "experiments/selection/P800-5/proc_output"
################################################
### Input configuration
################################################
input:
fmt: "FOA"
# TODO: to be clarified in Test Plan
fs: 48000
################################################
### Pre-processing on individual items
################################################
preprocessing:
mask: "HP50"
loudness: -26
loudness_fmt: "BINAURAL"
window: 100
################################################
### Pre-processing on whole signal(s)
################################################
preprocessing_2:
concatenate_input: true
# concatenation_order: []
preamble: 10000
preamble_noise: true
background_noise:
### REQUIRED: SNR for background noise in dB
snr: 15
### REQUIRED: Path to background noise, must have same format and sampling rate as input signal(s)
background_noise_path: "experiments/selection/P800-5/background_noise/background_noise.wav"
#################################################
### Bitstream processing
#################################################
################################################
### Configuration for conditions under test
################################################
conditions_to_generate:
### Reference and anchor conditions ##########################
c01:
type: ref
c02:
type: mnru
q: 28
c03:
type: mnru
q: 24
c04:
type: mnru
q: 20
c05:
type: mnru
q: 16
c06:
type: esdru
alpha: 0.7
c07:
type: esdru
alpha: 0.4
c08:
type: esdru
alpha: 0.1
### EVS condition ################################
c09:
type: evs
bitrates:
- 7200
cod:
opts: ["-max_band", "FB"]
sba_format: "PLANARFOA"
c10:
type: evs
bitrates:
- 7200
cod:
opts: ["-max_band", "FB"]
dec:
c11:
type: evs
bitrates:
- 8000
cod:
opts: ["-max_band", "FB"]
dec:
c12:
type: evs
bitrates:
- 9600
cod:
opts: ["-max_band", "FB"]
dec:
c13:
type: evs
bitrates:
- 13200
cod:
opts: ["-max_band", "FB"]
dec:
c14:
type: evs
bitrates:
- 16400
cod:
opts: ["-max_band", "FB"]
dec:
c15:
type: evs
bitrates:
- 24400
cod:
opts: ["-max_band", "FB"]
dec:
c16:
type: evs
bitrates:
- 32000
cod:
opts: ["-max_band", "FB"]
dec:
c17:
type: evs
bitrates:
- 7200
cod:
opts: ["-dtx", "-max_band", "FB"]
dec:
c18:
type: evs
bitrates:
- 8000
cod:
opts: ["-dtx", "-max_band", "FB"]
dec:
c19:
type: evs
bitrates:
- 9600
cod:
opts: ["-dtx", "-max_band", "FB"]
dec:
c20:
type: evs
bitrates:
- 13200
cod:
opts: ["-dtx", "-max_band", "FB"]
dec:
c21:
type: evs
bitrates:
- 16400
cod:
opts: ["-dtx", "-max_band", "FB"]
dec:
c22:
type: evs
bitrates:
- 24400
cod:
opts: ["-dtx", "-max_band", "FB"]
dec:
### IVAS condition ###############################
c23:
type: ivas
bitrates:
- 13200
cod:
dec:
fmt: "FOA"
c24:
type: ivas
bitrates:
- 16400
cod:
dec:
fmt: "FOA"
c25:
type: ivas
bitrates:
- 24400
cod:
dec:
fmt: "FOA"
c26:
type: ivas
bitrates:
- 32000
cod:
dec:
fmt: "FOA"
c27:
type: ivas
bitrates:
- 48000
cod:
dec:
fmt: "FOA"
c28:
type: ivas
bitrates:
- 64000
cod:
dec:
fmt: "FOA"
c29:
type: ivas
bitrates:
- 80000
cod:
dec:
fmt: "FOA"
c30:
type: ivas
bitrates:
- 96000
cod:
dec:
fmt: "FOA"
c31:
type: ivas
bitrates:
- 16400
cod:
opts: ["-dtx"]
dec:
fmt: "FOA"
c32:
type: ivas
bitrates:
- 24400
cod:
opts: ["-dtx"]
dec:
fmt: "FOA"
c33:
type: ivas
bitrates:
- 32000
cod:
opts: ["-dtx"]
dec:
fmt: "FOA"
c34:
type: ivas
bitrates:
- 48000
cod:
opts: ["-dtx"]
dec:
fmt: "FOA"
c35:
type: ivas
bitrates:
- 64000
cod:
opts: ["-dtx"]
dec:
fmt: "FOA"
c36:
type: ivas
bitrates:
- 80000
cod:
opts: ["-dtx"]
dec:
fmt: "FOA"
################################################
### Post-processing
################################################
postprocessing:
fmt: "BINAURAL"
fs: 48000
loudness: -26
---
################################################
# General configuration
################################################
name: P800-6
master_seed: 5
prerun_seed: 2
input_path: "experiments/selection/P800-6/proc_input"
output_path: "experiments/selection/P800-6/proc_output"
################################################
### Input configuration
################################################
input:
fmt: "ISM1"
# TODO: to be clarified in Test Plan
fs: 48000
################################################
### Pre-processing on individual items
################################################
preprocessing:
mask: "HP50"
loudness: -26
window: 100
################################################
### Pre-processing on whole signal(s)
################################################
preprocessing_2:
concatenate_input: true
# concatenation_order: []
preamble: 10000
preamble_noise: true
#################################################
### Bitstream processing
#################################################
################################################
### Configuration for conditions under test
################################################
conditions_to_generate:
### Reference and anchor conditions ##########################
c01:
type: ref
c02:
type: mnru
q: 15
c03:
type: mnru
q: 23
c04:
type: mnru
q: 31
c05:
type: mnru
q: 39
c06:
type: mnru
q: 47
c07:
type: esdru
alpha: 0.1
c08:
type: esdru
alpha: 0.3
c09:
type: esdru
alpha: 0.5
c10:
type: esdru
alpha: 0.7
### EVS condition ################################
c11:
type: evs
bitrates:
- 13200
cod:
opts: ["-max_band", "FB"]
dec:
c12:
type: evs
bitrates:
- 16400
cod:
opts: ["-max_band", "FB"]
dec:
c13:
type: evs
bitrates:
- 24400
cod:
opts: ["-max_band", "FB"]
dec:
c14:
type: evs
bitrates:
- 32000
cod:
opts: ["-max_band", "FB"]
dec:
c15:
type: evs
bitrates:
- 13200
cod:
opts: ["-max_band", "FB"]
dec:
tx:
type: "FER"
error_rate: 5
c16:
type: evs
bitrates:
- 16400
cod:
opts: ["-max_band", "FB"]
dec:
tx:
type: "FER"
error_rate: 5
c17:
type: evs
bitrates:
- 24400
cod:
opts: ["-max_band", "FB"]
dec:
tx:
type: "FER"
error_rate: 5
c18:
type: evs
bitrates:
- 13200
cod:
opts: ["-dtx", "-max_band", "FB"]
dec:
c19:
type: evs
bitrates:
- 16400
cod:
opts: ["-dtx", "-max_band", "FB"]
dec:
c20:
type: evs
bitrates:
- 24400
cod:
opts: ["-dtx", "-max_band", "FB"]
dec:
### IVAS condition ###############################
c21:
type: ivas
bitrates:
- 13200
cod:
dec:
c22:
type: ivas
bitrates:
- 16400
cod:
dec:
c23:
type: ivas
bitrates:
- 24400
cod:
dec:
c24:
type: ivas
bitrates:
- 32000
cod:
dec:
c25:
type: ivas
bitrates:
- 13200
cod:
dec:
tx:
type: "FER"
error_rate: 5
c26:
type: ivas
bitrates:
- 16400
cod:
dec:
tx:
type: "FER"
error_rate: 5
c27:
type: ivas
bitrates:
- 24400
cod:
dec:
tx:
type: "FER"
error_rate: 5
c28:
type: ivas
bitrates:
- 13200
cod:
opts: ["-dtx"]
dec:
c29:
type: ivas
bitrates:
- 16400
cod:
opts: ["-dtx"]
dec:
c30:
type: ivas
bitrates:
- 24400
cod:
opts: ["-dtx"]
dec:
################################################
### Post-processing
################################################
postprocessing:
fmt: "BINAURAL"
fs: 48000
loudness: -26
---
################################################
# General configuration
################################################
name: P800-7
master_seed: 5
prerun_seed: 2
input_path: "experiments/selection/P800-7/proc_input"
output_path: "experiments/selection/P800-7/proc_output"
################################################
### Input configuration
################################################
input:
fmt: "ISM2"
# TODO: to be clarified in Test Plan
fs: 48000
################################################
### Pre-processing on individual items
################################################
preprocessing:
mask: "HP50"
loudness: -26
window: 100
################################################
### Pre-processing on whole signal(s)
################################################
preprocessing_2:
concatenate_input: true
# concatenation_order: []
preamble: 10000
preamble_noise: true
#################################################
### Bitstream processing
#################################################
################################################
### Configuration for conditions under test
################################################
conditions_to_generate:
### Reference and anchor conditions ##########################
c01:
type: ref
c02:
type: mnru
q: 15
c03:
type: mnru
q: 23
c04:
type: mnru
q: 31
c05:
type: mnru
q: 39
c06:
type: mnru
q: 47
c07:
type: esdru
alpha: 0.1
c08:
type: esdru
alpha: 0.3
c09:
type: esdru
alpha: 0.5
c10:
type: esdru
alpha: 0.7
### EVS condition ################################
c11:
type: evs
bitrates:
- 8000
cod:
opts: ["-max_band", "FB"]
dec:
c12:
type: evs
bitrates:
- 13200
cod:
opts: ["-max_band", "FB"]
dec:
c13:
type: evs
bitrates:
- 16400
cod:
opts: ["-max_band", "FB"]
dec:
c14:
type: evs
bitrates:
- 24400
cod:
opts: ["-max_band", "FB"]
dec:
c15:
type: evs
bitrates:
- 8000
cod:
opts: ["-max_band", "FB"]
dec:
tx:
type: "FER"
error_rate: 5
c16:
type: evs
bitrates:
- 13200
cod:
opts: ["-max_band", "FB"]
dec:
tx:
type: "FER"
error_rate: 5
c17:
type: evs
bitrates:
- 16400
cod:
opts: ["-max_band", "FB"]
dec:
tx:
type: "FER"
error_rate: 5
c18:
type: evs
bitrates:
- 24400
cod:
opts: ["-dtx", "-max_band", "FB"]
dec:
c19:
type: evs
bitrates:
- 8000
cod:
opts: ["-dtx", "-max_band", "FB"]
dec:
c20:
type: evs
bitrates:
- 13200
cod:
opts: ["-dtx", "-max_band", "FB"]
dec:
c21:
type: evs
bitrates:
- 16400
cod:
opts: ["-dtx", "-max_band", "FB"]
dec:
c22:
type: evs
bitrates:
- 24400
cod:
opts: ["-dtx", "-max_band", "FB"]
dec:
### IVAS condition ###############################
c23:
type: ivas
bitrates:
- 16400
cod:
dec:
c24:
type: ivas
bitrates:
- 24400
cod:
dec:
c25:
type: ivas
bitrates:
- 32000
cod:
dec:
c26:
type: ivas
bitrates:
- 48000
cod:
dec:
c27:
type: ivas
bitrates:
- 16400
cod:
dec:
tx:
type: "FER"
error_rate: 5
c28:
type: ivas
bitrates:
- 24400
cod:
dec:
tx:
type: "FER"
error_rate: 5
c29:
type: ivas
bitrates:
- 32000
cod:
dec:
tx:
type: "FER"
error_rate: 5
c30:
type: ivas
bitrates:
- 48000
cod:
dec:
tx:
type: "FER"
error_rate: 5
c31:
type: ivas
bitrates:
- 16400
cod:
opts: ["-dtx"]
dec:
c32:
type: ivas
bitrates:
- 24400
cod:
opts: ["-dtx"]
dec:
c33:
type: ivas
bitrates:
- 32000
cod:
opts: ["-dtx"]
dec:
c34:
type: ivas
bitrates:
- 48000
cod:
opts: ["-dtx"]
dec:
################################################
### Post-processing
################################################
postprocessing:
fmt: "BINAURAL"
fs: 48000
loudness: -26
...@@ -71,7 +71,7 @@ def logging_init(args, cfg): ...@@ -71,7 +71,7 @@ def logging_init(args, cfg):
cfg.output_path.joinpath(f"{cfg.name}{LOGGER_SUFFIX}"), mode="w" cfg.output_path.joinpath(f"{cfg.name}{LOGGER_SUFFIX}"), mode="w"
) )
file_handler.setFormatter(logging.Formatter(LOGGER_FORMAT, datefmt=LOGGER_DATEFMT)) file_handler.setFormatter(logging.Formatter(LOGGER_FORMAT, datefmt=LOGGER_DATEFMT))
file_handler.setLevel(logging.DEBUG if args.debug else logging.INFO) file_handler.setLevel(logging.DEBUG)
logger.addHandler(file_handler) logger.addHandler(file_handler)
logger.info(f"Processing test configuration file {args.config}") logger.info(f"Processing test configuration file {args.config}")
...@@ -146,6 +146,7 @@ def main(args): ...@@ -146,6 +146,7 @@ def main(args):
hasattr(cfg, "preprocessing") hasattr(cfg, "preprocessing")
and hasattr(cfg.pre2, "background_noise") and hasattr(cfg.pre2, "background_noise")
and cfg.pre2.background_noise is not None and cfg.pre2.background_noise is not None
and cfg.pre2.background_noise.get("background_noise_path")
): ):
preprocess_background_noise(cfg) preprocess_background_noise(cfg)
# preprocess 2 # preprocess 2
......
...@@ -308,7 +308,8 @@ class ObjectBasedAudio(Audio): ...@@ -308,7 +308,8 @@ class ObjectBasedAudio(Audio):
if pos.shape[1] < 5: if pos.shape[1] < 5:
raise ValueError("Metadata incomplete. Columns are missing.") raise ValueError("Metadata incomplete. Columns are missing.")
elif pos.shape[1] > 5: elif pos.shape[1] > 5:
if pos.shape[1] == 7: if pos.shape[1] <= 8:
# TODO: FIXME
pos = pos[:, :5] pos = pos[:, :5]
else: else:
raise ValueError( raise ValueError(
......
...@@ -52,6 +52,7 @@ def trim( ...@@ -52,6 +52,7 @@ def trim(
limits: Optional[Tuple[int, int]] = None, limits: Optional[Tuple[int, int]] = None,
pad_noise: Optional[bool] = False, pad_noise: Optional[bool] = False,
samples: Optional[bool] = False, samples: Optional[bool] = False,
seed: Optional[int] = None,
) -> np.ndarray: ) -> np.ndarray:
""" """
Trim an audio array Trim an audio array
...@@ -88,6 +89,9 @@ def trim( ...@@ -88,6 +89,9 @@ def trim(
if pre_trim < 0: if pre_trim < 0:
if pad_noise: if pad_noise:
# pad with uniformly distributed noise between -4 and 4 # pad with uniformly distributed noise between -4 and 4
if seed:
np.random.seed(seed)
else:
np.random.seed(SEED_PADDING) np.random.seed(SEED_PADDING)
noise = np.random.randint( noise = np.random.randint(
low=-4, high=5, size=(np.abs(pre_trim), np.shape(x)[1]) low=-4, high=5, size=(np.abs(pre_trim), np.shape(x)[1])
...@@ -101,6 +105,9 @@ def trim( ...@@ -101,6 +105,9 @@ def trim(
if post_trim < 0: if post_trim < 0:
if pad_noise: if pad_noise:
# pad with uniformly distributed noise between -4 and 4 # pad with uniformly distributed noise between -4 and 4
if seed:
np.random.seed(seed)
else:
np.random.seed(SEED_PADDING) np.random.seed(SEED_PADDING)
noise = np.random.randint( noise = np.random.randint(
low=-4, high=5, size=(np.abs(post_trim), np.shape(x)[1]) low=-4, high=5, size=(np.abs(post_trim), np.shape(x)[1])
......
...@@ -253,7 +253,7 @@ def process_audio( ...@@ -253,7 +253,7 @@ def process_audio(
logger.debug( logger.debug(
f"Applying loudness adjustment to {loudness} LKFS for format {loudness_fmt} using ITU STL bs1770demo" f"Applying loudness adjustment to {loudness} LKFS for format {loudness_fmt} using ITU STL bs1770demo"
) )
x.audio = loudness_norm(x, loudness, loudness_fmt) x.audio = loudness_norm(x, loudness, loudness_fmt, logger=logger)
"""low-pass filtering""" """low-pass filtering"""
if fc is not None: if fc is not None:
......
...@@ -47,9 +47,6 @@ from ivas_processing_scripts.audiotools.wrappers.filter import resample_itu ...@@ -47,9 +47,6 @@ from ivas_processing_scripts.audiotools.wrappers.filter import resample_itu
from ivas_processing_scripts.constants import DEFAULT_CONFIG_BINARIES from ivas_processing_scripts.constants import DEFAULT_CONFIG_BINARIES
from ivas_processing_scripts.utils import find_binary, get_devnull, run from ivas_processing_scripts.utils import find_binary, get_devnull, run
logger = logging.getLogger("__main__")
logger.setLevel(logging.DEBUG)
def bs1770demo( def bs1770demo(
input: audio.Audio, input: audio.Audio,
...@@ -65,6 +62,8 @@ def bs1770demo( ...@@ -65,6 +62,8 @@ def bs1770demo(
Input audio Input audio
target_loudness: Optional[float] target_loudness: Optional[float]
Desired loudness in LKFS Desired loudness in LKFS
rms: Optional[bool]
Flag for using rms argument in bs1770demo tool (for low level signals)
Returns Returns
------- -------
...@@ -85,8 +84,6 @@ def bs1770demo( ...@@ -85,8 +84,6 @@ def bs1770demo(
binary = find_binary("bs1770demo") binary = find_binary("bs1770demo")
# checking if the new binary (with '-rms') is used # checking if the new binary (with '-rms') is used
with TemporaryDirectory() as tmp_dir_test:
tmp_dir_test = Path(tmp_dir_test)
result = sp.run(str(binary), shell=True, stdout=sp.PIPE, stderr=sp.PIPE) result = sp.run(str(binary), shell=True, stdout=sp.PIPE, stderr=sp.PIPE)
result_stdout = result.stdout.decode("utf-8") result_stdout = result.stdout.decode("utf-8")
if "-rms" not in result_stdout: if "-rms" not in result_stdout:
...@@ -146,8 +143,9 @@ def bs1770demo( ...@@ -146,8 +143,9 @@ def bs1770demo(
# using rms if true # using rms if true
if rms: if rms:
cmd.insert(1, "-rms") cmd.insert(1, "-rms")
# run command # run command
result = run(cmd, logger=logger) result = run(cmd)
# parse output # parse output
# we are looking for the (floating-point) number after the search string - '( )' around the number denotes the first group # we are looking for the (floating-point) number after the search string - '( )' around the number denotes the first group
...@@ -174,10 +172,12 @@ def get_loudness( ...@@ -174,10 +172,12 @@ def get_loudness(
---------- ----------
input : Audio input : Audio
Input audio Input audio
target_loudness: float target_loudness: Optional[float]
Desired loudness in LKFS Desired loudness in LKFS
loudness_format: str loudness_format: Optional[str]
Loudness format to render to for loudness computation (default input format if possible) Loudness format to render to for loudness computation (default input format if possible)
rms: Optional[bool]
Flag for using rms argument in bs1770demo tool (for low level signals)
Returns Returns
------- -------
...@@ -212,10 +212,7 @@ def get_loudness( ...@@ -212,10 +212,7 @@ def get_loudness(
convert.format_conversion(input, tmp) convert.format_conversion(input, tmp)
else: else:
tmp.audio = input.audio tmp.audio = input.audio
if rms: return bs1770demo(tmp, target_loudness, rms)
return bs1770demo(tmp, target_loudness, rms=True)
else:
return bs1770demo(tmp, target_loudness)
def loudness_norm( def loudness_norm(
...@@ -223,6 +220,8 @@ def loudness_norm( ...@@ -223,6 +220,8 @@ def loudness_norm(
target_loudness: Optional[float] = -26, target_loudness: Optional[float] = -26,
loudness_format: Optional[str] = None, loudness_format: Optional[str] = None,
rms: Optional[bool] = False, rms: Optional[bool] = False,
logger: Optional[logging.Logger] = None,
file_name_logging: Optional[Union[str, Path]] = None,
) -> np.ndarray: ) -> np.ndarray:
""" """
Iterative loudness normalization using ITU-R BS.1770-4 Iterative loudness normalization using ITU-R BS.1770-4
...@@ -237,6 +236,12 @@ def loudness_norm( ...@@ -237,6 +236,12 @@ def loudness_norm(
Desired loudness level in LKFS Desired loudness level in LKFS
loudness_format: Optional[str] loudness_format: Optional[str]
Loudness format to render to for loudness computation (default input format) Loudness format to render to for loudness computation (default input format)
rms: Optional[bool]
Flag for using rms argument in bs1770demo tool (for low level signals)
logger: Optional[logging.Logger]
Logger to log loudness information
file_name_logging: Optional[Union[str, Path]]
Name of processed file for logging information
Returns Returns
------- -------
...@@ -244,42 +249,56 @@ def loudness_norm( ...@@ -244,42 +249,56 @@ def loudness_norm(
Normalized audio Normalized audio
""" """
# repeat until convergence of loudness
measured_loudness = np.inf measured_loudness = np.inf
scale_factor = 1
num_iter = 1 num_iter = 1
scaled_input = copy.deepcopy(input)
while np.abs(measured_loudness - target_loudness) > 0.5 and num_iter < 10: # save loudness before and after scaling for the logger info
measured_loudness, scale_factor_new = get_loudness( loudness_before, scale_factor_new = get_loudness(
input, scaled_input, target_loudness, loudness_format, rms
target_loudness,
loudness_format,
rms=rms,
) )
# repeat until convergence of loudness
while np.abs(measured_loudness - target_loudness) > 0.5 and num_iter < 10:
# scale input # scale input
input.audio *= scale_factor_new scaled_input.audio *= scale_factor_new
# update scale factor # measure loudness and get scaling factor
scale_factor *= scale_factor_new measured_loudness, scale_factor_new = get_loudness(
scaled_input, target_loudness, loudness_format, rms
)
num_iter += 1 num_iter += 1
loudness_after = measured_loudness
# log loudness before and after adjustment
if logger:
if file_name_logging:
logger.debug(
f"File {file_name_logging} loudness; before: {loudness_before}, after: {loudness_after}"
)
else:
logger.debug(
f"Loudness; before: {loudness_before}, after: {loudness_after}"
)
if num_iter >= 10: if num_iter >= 10:
warn( warn(
f"Loudness did not converge to desired value, stopping at: {measured_loudness:.2f}" f"Loudness did not converge to desired value, stopping at: {loudness_after:.2f}"
) )
return input.audio return scaled_input.audio
def scale_files( def scale_files(
file_list: list[list[Union[Path, str]]], file_list: list[list[Union[Path, str]]],
fmt: str, fmt: str,
loudness: float, loudness: float,
loudness_format: Optional[str] = None,
fs: Optional[int] = 48000, fs: Optional[int] = 48000,
in_meta: Optional[list] = None, in_meta: Optional[list] = None,
rms: Optional[bool] = False, logger: Optional[logging.Logger] = None,
) -> None: ) -> None:
""" """
Scales audio files to desired loudness Scales audio files to desired loudness
...@@ -292,15 +311,22 @@ def scale_files( ...@@ -292,15 +311,22 @@ def scale_files(
Audio format of files in list Audio format of files in list
loudness: float loudness: float
Desired loudness level in LKFS/dBov Desired loudness level in LKFS/dBov
loudness_format: Optional[str]
Format for loudness measurement
fs: Optional[int] fs: Optional[int]
Sampling rate Sampling rate
in_meta: Optional[list] in_meta: Optional[list]
Metadata for ISM with same structure as file_list but one layer more Metadata for ISM with same structure as file_list but one layer more
for the list of metadata for one file for the list of metadata for one file
logger: Optional[logging.Logger]
Logger to log loudness information
""" """
if fmt.startswith("ISM") and in_meta: if fmt.startswith("ISM"):
if in_meta:
meta_bool = True meta_bool = True
else:
raise ValueError("No metadata available for loudness measurement")
else: else:
in_meta = copy.copy(file_list) in_meta = copy.copy(file_list)
meta_bool = False meta_bool = False
...@@ -314,10 +340,13 @@ def scale_files( ...@@ -314,10 +340,13 @@ def scale_files(
audio_obj = audio.fromfile(fmt, file, fs) audio_obj = audio.fromfile(fmt, file, fs)
# adjust loudness # adjust loudness
if rms: scaled_audio = loudness_norm(
scaled_audio = loudness_norm(audio_obj, loudness, rms=True) audio_obj,
else: loudness,
scaled_audio = loudness_norm(audio_obj, loudness) loudness_format,
logger=logger,
file_name_logging=file,
)
# write into file # write into file
write(file, scaled_audio, audio_obj.fs) write(file, scaled_audio, audio_obj.fs)
...@@ -3,7 +3,7 @@ Necessary additional executables: ...@@ -3,7 +3,7 @@ Necessary additional executables:
| Processing step | Executable | Where to find | | Processing step | Executable | Where to find |
|-------------------------------------------------|-----------------------|-------------------------------------------------------------------------------------------------------------| |-------------------------------------------------|-----------------------|-------------------------------------------------------------------------------------------------------------|
| Loudness measurement and adjustment | bs1770demo | https://github.com/openitu/STL | | Loudness measurement and adjustment | bs1770demo | https://github.com/ErikNorvell-Ericsson/STL (Note branch) |
| MNRU | p50fbmnru | https://github.com/openitu/STL | | MNRU | p50fbmnru | https://github.com/openitu/STL |
| ESDRU | esdru | https://github.com/openitu/STL | | ESDRU | esdru | https://github.com/openitu/STL |
| Frame error pattern application | eid-xor | https://github.com/openitu/STL | | Frame error pattern application | eid-xor | https://github.com/openitu/STL |
......
...@@ -30,7 +30,6 @@ ...@@ -30,7 +30,6 @@
# the United Nations Convention on Contracts on the International Sales of Goods. # the United Nations Convention on Contracts on the International Sales of Goods.
# #
from pathlib import Path
from typing import Optional from typing import Optional
from warnings import warn from warnings import warn
...@@ -40,7 +39,7 @@ from ivas_processing_scripts.processing.ivas import IVAS ...@@ -40,7 +39,7 @@ from ivas_processing_scripts.processing.ivas import IVAS
from ivas_processing_scripts.processing.postprocessing import Postprocessing from ivas_processing_scripts.processing.postprocessing import Postprocessing
from ivas_processing_scripts.processing.preprocessing import Preprocessing from ivas_processing_scripts.processing.preprocessing import Preprocessing
from ivas_processing_scripts.processing.preprocessing_2 import Preprocessing2 from ivas_processing_scripts.processing.preprocessing_2 import Preprocessing2
from ivas_processing_scripts.utils import list_audio from ivas_processing_scripts.utils import get_abs_path, list_audio
def init_processing_chains(cfg: TestConfig) -> None: def init_processing_chains(cfg: TestConfig) -> None:
...@@ -146,6 +145,8 @@ def get_preprocessing_2(cfg: TestConfig) -> dict: ...@@ -146,6 +145,8 @@ def get_preprocessing_2(cfg: TestConfig) -> dict:
} }
pre2_cfg = cfg.preprocessing_2 pre2_cfg = cfg.preprocessing_2
# set up background noise
background_cfg = pre2_cfg.get("background_noise", None) background_cfg = pre2_cfg.get("background_noise", None)
if background_cfg: if background_cfg:
background = { background = {
...@@ -153,6 +154,7 @@ def get_preprocessing_2(cfg: TestConfig) -> dict: ...@@ -153,6 +154,7 @@ def get_preprocessing_2(cfg: TestConfig) -> dict:
"background_noise_path": get_abs_path( "background_noise_path": get_abs_path(
background_cfg.get("background_noise_path", None) background_cfg.get("background_noise_path", None)
), ),
"low_level_noise": background_cfg.get("low_level_noise", False),
"seed_delay": cfg.prerun_seed, "seed_delay": cfg.prerun_seed,
"master_seed": cfg.master_seed, "master_seed": cfg.master_seed,
"output_fmt": cfg.postprocessing["fmt"], "output_fmt": cfg.postprocessing["fmt"],
...@@ -209,6 +211,7 @@ def get_processing_chain( ...@@ -209,6 +211,7 @@ def get_processing_chain(
# get pre and post processing configurations # get pre and post processing configurations
pre_cfg = getattr(cfg, "preprocessing", {}) pre_cfg = getattr(cfg, "preprocessing", {})
pre2_cfg = getattr(cfg, "preprocessing_2", {})
post_cfg = cfg.postprocessing post_cfg = cfg.postprocessing
# default to input values if preprocessing was not requested # default to input values if preprocessing was not requested
...@@ -400,6 +403,14 @@ def get_processing_chain( ...@@ -400,6 +403,14 @@ def get_processing_chain(
raise SystemExit(f"Unknown condition {condition}!") raise SystemExit(f"Unknown condition {condition}!")
# add postprocessing step based on condition # add postprocessing step based on condition
# if concatenation and splitting do loudness adjustment only on splitted files
if pre2_cfg.get("concatenate_input", False):
loudness_postprocessing = None
loudness_fmt_postprocessing = None
else:
loudness_postprocessing = post_cfg.get("loudness")
loudness_fmt_postprocessing = post_cfg.get("loudness_fmt")
chain["processes"].append( chain["processes"].append(
Postprocessing( Postprocessing(
{ {
...@@ -408,8 +419,8 @@ def get_processing_chain( ...@@ -408,8 +419,8 @@ def get_processing_chain(
"out_fs": post_cfg.get("fs"), "out_fs": post_cfg.get("fs"),
"out_fmt": post_cfg.get("fmt"), "out_fmt": post_cfg.get("fmt"),
"out_cutoff": tmp_lp_cutoff, "out_cutoff": tmp_lp_cutoff,
"out_loudness": post_cfg.get("loudness"), "out_loudness": loudness_postprocessing,
"out_loudness_fmt": post_cfg.get("loudness_fmt"), "out_loudness_fmt": loudness_fmt_postprocessing,
"bin_dataset": post_cfg.get("bin_dataset"), "bin_dataset": post_cfg.get("bin_dataset"),
"bin_lfe_gain": post_cfg.get("bin_lfe_gain"), "bin_lfe_gain": post_cfg.get("bin_lfe_gain"),
"limit": post_cfg.get("limit", True), "limit": post_cfg.get("limit", True),
...@@ -422,11 +433,3 @@ def get_processing_chain( ...@@ -422,11 +433,3 @@ def get_processing_chain(
) )
return chain return chain
def get_abs_path(rel_path):
if rel_path is not None:
abs_path = Path(rel_path).resolve().absolute()
else:
abs_path = None
return abs_path
...@@ -46,6 +46,7 @@ from ivas_processing_scripts.constants import ( ...@@ -46,6 +46,7 @@ from ivas_processing_scripts.constants import (
REQUIRED_KEYS_MNRU, REQUIRED_KEYS_MNRU,
SUPPORTED_CONDITIONS, SUPPORTED_CONDITIONS,
) )
from ivas_processing_scripts.utils import get_abs_path
def merge_dicts(base: dict, other: dict) -> None: def merge_dicts(base: dict, other: dict) -> None:
...@@ -139,14 +140,34 @@ class TestConfig: ...@@ -139,14 +140,34 @@ class TestConfig:
raise KeyError(f"The following key(s) must be specified : {MISSING_KEYS}") raise KeyError(f"The following key(s) must be specified : {MISSING_KEYS}")
# validate preprocessing on concatenated file stage # validate preprocessing on concatenated file stage
if (pre_proc_2 := getattr(cfg, "preprocessing_2", None)) is not None: if (pre_proc_2 := cfg.get("preprocessing_2", None)) is not None and (
bg_noise_folder = Path(pre_proc_2["background_noise_path"]).parent bg_noise := pre_proc_2.get("background_noise", None)
if ( ) is not None:
bg_noise_folder.resolve().absolute() # check if low level flag or path is given
== cfg.input_path.resolve().absolute() if not bg_noise.get("background_noise_path", None) and not bg_noise.get(
"low_level_noise", False
): ):
raise ValueError( raise ValueError(
f"Background noise file has to be placed outside the input folder!" "Path to prerecorded noise or low level noise flag has to be provided for background noise"
)
if bg_noise.get("background_noise_path", None) and bg_noise.get(
"low_level_noise", False
):
raise ValueError(
"Only prerecorded or low level background noise possible, not both"
)
if bg_noise.get("background_noise_path", None):
# check snr
if not bg_noise.get("snr", None):
raise ValueError("SNR has to be specified for background noise")
# check if path of background noise is in input folder
bg_noise_folder = Path(bg_noise["background_noise_path"]).parent
if get_abs_path(bg_noise_folder) == get_abs_path(cfg["input_path"]):
raise ValueError(
"Background noise file has to be placed outside the input folder!"
) )
for cond_name, cond_cfg in cfg.get("conditions_to_generate").items(): for cond_name, cond_cfg in cfg.get("conditions_to_generate").items():
......
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
import logging import logging
from pathlib import Path from pathlib import Path
from warnings import warn
import numpy as np import numpy as np
...@@ -66,6 +67,7 @@ class Preprocessing2(Processing): ...@@ -66,6 +67,7 @@ class Preprocessing2(Processing):
# add preamble # add preamble
if self.preamble: if self.preamble:
logger.debug(f"Add preamble of length {self.preamble}ms")
# also apply preamble to ISM metadata # also apply preamble to ISM metadata
if self.in_fmt.startswith("ISM"): if self.in_fmt.startswith("ISM"):
# read out old # read out old
...@@ -91,17 +93,29 @@ class Preprocessing2(Processing): ...@@ -91,17 +93,29 @@ class Preprocessing2(Processing):
# add background noise # add background noise
if self.background_noise: if self.background_noise:
audio_object.audio = self.add_background_noise(audio_object, in_meta) logger.debug(
f"Add background noise from file {self.background_noise.get('background_noise_path', 'file missing')} and SNR {self.background_noise.get('snr', 'snr missing')}"
)
audio_object.audio = self.add_background_noise(
audio_object, in_meta, logger
)
# save file # save file
write(out_file, audio_object.audio, fs=audio_object.fs) write(out_file, audio_object.audio, fs=audio_object.fs)
return return
def add_background_noise(self, audio_object: audio.Audio, in_meta) -> np.ndarray: def add_background_noise(
self, audio_object: audio.Audio, in_meta, logger
) -> np.ndarray:
# range for random delay # range for random delay
range_delay = (1, 2400000) max_delay = int(2400000 * audio_object.fs / 48000)
if self.background_noise.get("background_noise_path"):
if not self.background_noise.get("background_noise_path").exists():
raise ValueError(
f"Background noise path {self.background_noise.get('background_noise_path')} does not exist"
)
# load background noise # load background noise
if self.background_noise["background_object"] is not None: if self.background_noise["background_object"] is not None:
noise_object = self.background_noise["background_object"] noise_object = self.background_noise["background_object"]
...@@ -116,7 +130,7 @@ class Preprocessing2(Processing): ...@@ -116,7 +130,7 @@ class Preprocessing2(Processing):
# if noise is too short raise error # if noise is too short raise error
if len(noise_object.audio) < len(audio_object.audio): if len(noise_object.audio) < len(audio_object.audio):
raise ValueError("Background noise too short for audio signal") raise ValueError("Background noise too short for audio signal")
if len(noise_object.audio) - range_delay[1] < len(audio_object.audio): if len(noise_object.audio) - max_delay < len(audio_object.audio):
raise ValueError( raise ValueError(
"Background noise may be to short for audio signal when considering the random delay" "Background noise may be to short for audio signal when considering the random delay"
) )
...@@ -133,13 +147,14 @@ class Preprocessing2(Processing): ...@@ -133,13 +147,14 @@ class Preprocessing2(Processing):
out_format = self.out_fmt out_format = self.out_fmt
loudness_signal, _ = get_loudness(audio_object, loudness_format=out_format) loudness_signal, _ = get_loudness(audio_object, loudness_format=out_format)
logger.debug(f"Loudness of audio signal: {loudness_signal}LKFS")
# compute desired loudness of background noise # compute desired loudness of background noise
loudness_noise = loudness_signal - self.background_noise["snr"] loudness_noise = loudness_signal - self.background_noise["snr"]
# apply random delay and cut signal # apply random delay and cut signal
rand_delay = random_seed( rand_delay = random_seed(
range=range_delay, range=(1, max_delay),
master_seed=self.background_noise["master_seed"], master_seed=self.background_noise["master_seed"],
prerun_seed=self.background_noise["seed_delay"], prerun_seed=self.background_noise["seed_delay"],
hexa=False, hexa=False,
...@@ -149,8 +164,30 @@ class Preprocessing2(Processing): ...@@ -149,8 +164,30 @@ class Preprocessing2(Processing):
)[: len(audio_object.audio)] )[: len(audio_object.audio)]
# scale background noise to desired loudness based on output format # scale background noise to desired loudness based on output format
logger.debug(
f"Scaling of background noise to {self.background_noise['snr']}dB SNR"
)
noise_object.audio = loudness_norm( noise_object.audio = loudness_norm(
noise_object, loudness_noise, out_format, rms=True noise_object,
loudness_noise,
out_format,
rms=True,
logger=logger,
)
elif self.background_noise.get("low_level_noise"):
# use low level noise instead of prerecoded background noise
if self.background_noise["snr"]:
warn("SNR will be ignored for low level background noise")
noise_array = trim(
np.zeros((0, audio_object.num_channels)),
samples=True,
limits=(0, -len(audio_object.audio)),
pad_noise=True,
seed=self.background_noise["master_seed"],
)
noise_object = audio.fromarray(
x=noise_array, fs=self.in_fs, fmt=self.in_fmt
) )
# add array to signal # add array to signal
......
...@@ -325,6 +325,7 @@ def preprocess_2(cfg, logger): ...@@ -325,6 +325,7 @@ def preprocess_2(cfg, logger):
def reverse_process_2(cfg, logger): def reverse_process_2(cfg, logger):
# remove preamble # remove preamble
if cfg.pre2.preamble: if cfg.pre2.preamble:
logger.info("Remove preamble")
remove_preamble(cfg) remove_preamble(cfg)
# reverse concatenation # reverse concatenation
...@@ -355,8 +356,10 @@ def reverse_process_2(cfg, logger): ...@@ -355,8 +356,10 @@ def reverse_process_2(cfg, logger):
out_paths_splits, out_paths_splits,
cfg.postprocessing["fmt"], cfg.postprocessing["fmt"],
cfg.postprocessing["loudness"], cfg.postprocessing["loudness"],
cfg.postprocessing.get("loudness_fmt", None),
cfg.postprocessing["fs"], cfg.postprocessing["fs"],
out_meta_splits, out_meta_splits,
logger,
) )
return return
......
...@@ -290,3 +290,11 @@ def get_binary_paths(yaml_file_with_binary_paths): ...@@ -290,3 +290,11 @@ def get_binary_paths(yaml_file_with_binary_paths):
return {} return {}
else: else:
return {key: Path(value) for key, value in data.items()} return {key: Path(value) for key, value in data.items()}
def get_abs_path(rel_path):
if rel_path is not None:
abs_path = Path(rel_path).resolve().absolute()
else:
abs_path = None
return abs_path
...@@ -35,6 +35,7 @@ from pathlib import PurePath ...@@ -35,6 +35,7 @@ from pathlib import PurePath
""" Set up paths """ """ Set up paths """
TESTS_DIR = PurePath(__file__).parent TESTS_DIR = PurePath(__file__).parent
TEST_VECTOR_DIR = TESTS_DIR.joinpath("data") TEST_VECTOR_DIR = TESTS_DIR.joinpath("data")
EXPERIMENTS_DIR = "../experiments/selection"
ISM_METADATA_DIR = TEST_VECTOR_DIR.joinpath("ism_metadata") ISM_METADATA_DIR = TEST_VECTOR_DIR.joinpath("ism_metadata")
MASA_METADATA_DIR = TEST_VECTOR_DIR.joinpath("masa") MASA_METADATA_DIR = TEST_VECTOR_DIR.joinpath("masa")
...@@ -200,3 +201,29 @@ INPUT_CONFIG_FILES = [ ...@@ -200,3 +201,29 @@ INPUT_CONFIG_FILES = [
str(TEST_VECTOR_DIR.joinpath("test_MC.yml")), str(TEST_VECTOR_DIR.joinpath("test_MC.yml")),
str(TEST_VECTOR_DIR.joinpath("test_SBA.yml")), str(TEST_VECTOR_DIR.joinpath("test_SBA.yml")),
] ]
INPUT_EXPERIMENT_NAMES = [
"BS1534-1a",
"BS1534-1b",
"BS1534-2a",
"BS1534-2b",
"BS1534-3a",
"BS1534-3b",
"BS1534-4a",
"BS1534-4b",
"BS1534-5a",
"BS1534-5b",
"BS1534-6a",
"BS1534-6b",
# "BS1534-7a",
# "BS1534-7b",
"P800-1",
"P800-2",
"P800-3",
"P800-4",
"P800-5",
"P800-6",
"P800-7",
# "P800-8",
# "P800-9",
]
#!/usr/bin/env python3
#
# (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
# contributors to this repository. All Rights Reserved.
#
# This software is protected by copyright law and by international treaties.
# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
# contributors to this repository retain full ownership rights in their respective contributions in
# the software. This notice grants no license of any kind, including but not limited to patent
# license, nor is any license granted by implication, estoppel or otherwise.
#
# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
# contributions.
#
# This software is provided "AS IS", without any express or implied warranties. The software is in the
# development stage. It is intended exclusively for experts who have experience with such software and
# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
# and fitness for a particular purpose are hereby disclaimed and excluded.
#
# Any dispute, controversy or claim arising under or in relation to providing this software shall be
# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
# the United Nations Convention on Contracts on the International Sales of Goods.
#
import shutil
from pathlib import Path
import pytest
from ivas_processing_scripts import main as generate_test
from ivas_processing_scripts.audiotools import audio
from ivas_processing_scripts.audiotools.audiofile import concat
from ivas_processing_scripts.processing.config import TestConfig
from tests.constants import (
EXPERIMENTS_DIR,
FORMAT_TO_METADATA_FILES,
INPUT_EXPERIMENT_NAMES,
NCHAN_TO_FILE,
TESTS_DIR,
)
BG_NOISE_FOR_EXPERIMENT = {
"P800-2": Path(TESTS_DIR).joinpath("data").joinpath("p800-2_background_noise.wav"),
"P800-5": Path(TESTS_DIR).joinpath("data").joinpath("p800-5_background_noise.wav"),
}
BG_NOISE_NAME = "background_noise.wav"
class Arguments:
def __init__(self, config):
self.config = config
self.debug = True
@pytest.mark.parametrize("exp_name", INPUT_EXPERIMENT_NAMES)
def test_generate_test_items(exp_name):
cfg_dir = Path(TESTS_DIR).joinpath(EXPERIMENTS_DIR)
cfg = Path(cfg_dir).joinpath(Path(exp_name)).resolve().absolute()
cfg = cfg.joinpath(f"config/{exp_name}.yml")
args = Arguments(cfg)
# read out input path
config = TestConfig(cfg)
input_path = Path(config.input_path).resolve().absolute()
input_fmt = config.input["fmt"]
# deduce number of channels based on input format
input_object = audio.fromtype(input_fmt)
num_channels = input_object.num_channels
# check if folder exists
if not input_path.exists():
raise ValueError("Input path from experiments config does not exist")
# get dummy input files:
spectral_dummy = NCHAN_TO_FILE[num_channels]
pink_noise_dummy = Path(
str(spectral_dummy).replace("spectral/spectral_test", "pinknoise/pink_noise")
)
dummy_input_files = [spectral_dummy, pink_noise_dummy]
# get dummy metadata files
dummy_md_files = FORMAT_TO_METADATA_FILES.get(input_fmt, list())
# copy input files
for f in dummy_input_files:
f_out = input_path.joinpath(f.name).resolve().absolute()
# need at least 2s of input files for gen-patt to be happy (can not keep the tolerance for 50 frames only)
concat([str(f)] * 2, str(f_out))
for i, md_f in enumerate(dummy_md_files):
suffix = f"{i}.csv"
md_f_out = ".".join([str(f_out), suffix])
shutil.copy(md_f, md_f_out)
# copy background noise if needed
if (bg_noise := BG_NOISE_FOR_EXPERIMENT.get(exp_name)) is not None:
bg_noise_folder = input_path.parent.joinpath("background_noise")
bg_noise_out = bg_noise_folder.joinpath(BG_NOISE_NAME)
shutil.copy(bg_noise, bg_noise_out)
generate_test(args)