diff --git a/examples/ITEM_GENERATION_5_1_4.yml b/examples/ITEM_GENERATION_5_1_4.yml new file mode 100644 index 0000000000000000000000000000000000000000..4670d1979eefe3b9fc0a0aaeb6521c9eb1aadf8c --- /dev/null +++ b/examples/ITEM_GENERATION_5_1_4.yml @@ -0,0 +1,177 @@ +--- +################################################ +# Item generation - General configuration +################################################ + +### Any relative paths will be interpreted relative to the working directory the script is called from! +### Usage of absolute paths is recommended. +### Do not use file names with dots "." in them! This is not supported, use "_" instead +### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions + +### Output format +format: "5_1_4" +# masa_tc: 1 # applicable only to MASA/OMASA format +# masa_dirs: 1 # applicable only to MASA/OMASA format +# sba_order: 2 # applicable only to OSBA format + +### Output sampling rate in Hz +fs: 48000 + +### Generate BINAURAL output (_BINAURAL will be appended to the output filename) +binaural_output: true + +### Normalize target loudness to X LKFS +loudness: -26 + +### Apply pre-amble and post-amble in X seconds +preamble: 0.0 +postamble: 0.0 + +### Apply fade-in and fade-out of X seconds +fade_in_out: 0.5 + +### Trim the output such that the total duration is X seconds +duration: 8 + +### Add low-level random background noise (amplitude +-4) instead of silence; default = false (silence) +add_low_level_random_noise: false + +### Process with parallel streams +multiprocessing: false + +################################################ +### Item generation - Filename conventions +################################################ + +### Naming convention for the input mono files +### The input filenames are represented by: +### lLLeeettszz.wav +### where: +### l stands for the listening lab designator: a (Force Technology), b (HEAD acoustics), c (MQ University), d (Mesaqin.com) +### LL stands for the language: JP, FR, GE, MA, DA, EN +### eee stands for the experiment designator: p01, p02, p04, p05, p06, p07, p08, p09 +### tt stands for the talker ID: f1, f2, f3, m1, m2, m3 +### s stands for 'sample' and zz is the sample number; 01, ..., 14 + +### Naming convention for the generated output files +### The output filenames are represented by: +### leeeayszz.wav +### The filenames of the accompanying output metadata files (applicable to metadata-assisted spatial audio, object-based audio) are represented by: +### leeeayszz.met for metadata-assisted spatial audio +### leeeayszz.wav.o.csv for object-based audio +### where: +### l stands for the listening lab designator: a (Force Technology), b (HEAD acoustics), c (MQ University), d (Mesaqin.com) +### eee stands for the experiment designator: p01, p02, p04, p05, p06, p07, p08, p09 +### a stands 'audio' +### y is the per-experiment category according to IVAS-8a: 01, 02, 03, 04, 05, 06 +### s stands for sample and zz is the sample number; 01, 02, 03, 04, 05, 06, 07 (07 is the preliminary sample) +### o stands for the object number; 0, 1, 2, 3 + +### File designators, default is "l" for listening lab, "EN" for language, "p07" for experiment and "g" for company +listening_lab: "l" +language: "EN" +exp: "p01" +provider: "va" + +### Insert prefix for all input filenames (default: "") +### l stands for the 'listening_lab' designator, L stands for the 'language', e stands for the 'experiment' +### the number of consecutive letters define the length of each field +# use_input_prefix: "lLLeee" + +### Insert prefix for all output filenames (default: "") +### l stands for the 'listening_lab' designator, L stands for the 'language', e stands for the 'experiment' +### the number of consecutive letters define the length of each field +# use_output_prefix: "leee" + +################################################ +### Item generation - Scene description +################################################ + +### Each scene shall de described using the following parameters/properties: +### output: output filename +### description: textual description of the scene +### input: input filename(s) +### IR: filenames(s) of the input IRs +### azimuth: azimuth in the range [-180,180]; positive values point to the left +### elevation: elevation in the range [-90,90]; positive values indicate up +### shift: time adjustment of the input signal (negative value delays the signal) +### background: background noise filename (if used, the 'add_low_level_random_noise' parameter is ignored) +### background_level: normalized background noise loudness to X dB LKFS +### +### Note 0: you can use relative paths in filenames (the program assumes that the root directory is the parent directory of the ivas_processing_scripts subfolder) +### Note 1: use brackets [val1, val2, ...] when specifying multiple values +### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames +### Note 3: we're using right-handed coordinate system with azimuth = 0 pointing from the nose to the screen + +scenes: + "01": + output: "out/s01.wav" + description: "Car with AB microphone pickup, no overlap between the talkers, car noise." + input: ["items_mono/untrimmed/f1s4b_Talker2.wav", "items_mono/untrimmed/f2s1a_Talker1.wav"] + IR: ["IRs/IR_do_p04_e_01_01_FOA.wav", "IRs/IR_do_p04_e_02_01_FOA.wav"] + shift: [0.0, -1.0] + background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav" + background_level: -46 + + "02": + output: "out/s02.wav" + description: "Car with AB microphone pickup, overlap between the talkers, car noise." + input: ["items_mono/untrimmed/f1s6a_Talker2.wav", "items_mono/untrimmed/f2s3b_Talker1.wav"] + IR: ["IRs/IR_do_p04_e_03_01_FOA.wav", "IRs/IR_do_p04_e_04_01_FOA.wav"] + shift: [0.0, +1.0] + background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav" + background_level: -46 + + "03": + output: "out/s03.wav" + description: "Car with AB microphone pickup, no overlap between the talkers, car noise." + input: ["items_mono/untrimmed/f3s3a_Talker2.wav", "items_mono/untrimmed/f3s10b_Talker2.wav"] + IR: ["IRs/IR_do_p04_e_05_01_FOA.wav", "IRs/IR_do_p04_e_06_01_FOA.wav"] + shift: [0.0, -1.0] + background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav" + background_level: -46 + + "04": + output: "out/s04.wav" + description: "Car with AB microphone pickup, no overlap between the talkers, car noise." + input: ["items_mono/untrimmed/f2s7b_Talker1.wav", "items_mono/untrimmed/f5s15a_Talker1.wav"] + IR: ["IRs/IR_do_p04_e_07_01_FOA.wav", "IRs/IR_do_p04_e_08_01_FOA.wav"] + shift: [0.0, -1.0] + background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav" + background_level: -46 + + "05": + output: "out/s05.wav" + description: "Car with AB microphone pickup, no overlap between the talkers, car noise." + input: ["items_mono/untrimmed/m2s15a_Talker2.wav", "items_mono/untrimmed/m1s4a_Talker1.wav"] + IR: ["IRs/IR_do_p04_e_07_01_FOA.wav", "IRs/IR_do_p04_e_01_01_FOA.wav"] + shift: [0.0, -1.0] + background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav" + background_level: -46 + + "06": + output: "out/s06.wav" + description: "Car with AB microphone pickup, no overlap between the talkers." + input: ["items_mono/untrimmed/m3s8a_Talker2.wav", "items_mono/untrimmed/m4s13a_Talker1.wav"] + IR: ["IRs/IR_do_p04_e_03_01_FOA.wav", "IRs/IR_do_p04_e_01_01_FOA.wav"] + shift: [0.0, -1.0] + background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav" + background_level: -46 + + "07": + output: "out/s07.wav" + description: "Preliminary: Car with AB microphone pickup, no overlap between the talkers." + input: ["items_mono/untrimmed/f1s20a_Talker2.wav", "items_mono/untrimmed/f5s15b_Talker1.wav"] + IR: ["IRs/IR_do_p04_e_02_01_FOA.wav", "IRs/IR_do_p04_e_07_01_FOA.wav"] + shift: [0.0, -1.0] + background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav" + background_level: -46 + + "08": + output: "out/s08.wav" + description: "Car with AB microphone pickup, overlap between the talkers." + input: ["items_mono/untrimmed/m2s6b_Talker2.wav", "items_mono/untrimmed/f5s14a_Talker1.wav"] + IR: ["IRs/IR_do_p04_e_08_01_FOA.wav", "IRs/IR_do_p04_e_04_01_FOA.wav"] + shift: [0.0, +1.0] + background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav" + background_level: -46 diff --git a/ivas_processing_scripts/generation/__init__.py b/ivas_processing_scripts/generation/__init__.py index 1a5c313df7c6b75596baf9972c44423bac3e3b86..58c75189bee0be5dacd6edb7d9f23f4af7625e5d 100755 --- a/ivas_processing_scripts/generation/__init__.py +++ b/ivas_processing_scripts/generation/__init__.py @@ -32,6 +32,10 @@ import logging +from ivas_processing_scripts.audiotools.constants import ( + CHANNEL_BASED_AUDIO_ALTNAMES, + CHANNEL_BASED_AUDIO_FORMATS, +) from ivas_processing_scripts.constants import ( LOGGER_DATEFMT, LOGGER_FORMAT, @@ -41,6 +45,7 @@ from ivas_processing_scripts.generation import ( config, generate_ismN_items, generate_masa_items, + generate_mc_items, generate_omasa_items, generate_osba_items, generate_sba_items, @@ -100,5 +105,11 @@ def main(args): elif "OSBA" in cfg.format: # generate OSBA items from FOA/HOA2/HOA3 and MONO items according to scene description generate_osba_items.generate_osba_items(cfg, logger) + elif ( + cfg.format in CHANNEL_BASED_AUDIO_FORMATS.keys() + or cfg.format in CHANNEL_BASED_AUDIO_ALTNAMES.keys() + ): + # generate MC items from MONO items according to scene description + generate_mc_items.generate_mc_items(cfg, logger) logger.handlers.clear() diff --git a/ivas_processing_scripts/generation/generate_mc_items.py b/ivas_processing_scripts/generation/generate_mc_items.py new file mode 100644 index 0000000000000000000000000000000000000000..bac49e0c8de1720c46334cb9d576426da279e06b --- /dev/null +++ b/ivas_processing_scripts/generation/generate_mc_items.py @@ -0,0 +1,485 @@ +#!/usr/bin/env python3 + +# +# (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository. All Rights Reserved. +# +# This software is protected by copyright law and by international treaties. +# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository retain full ownership rights in their respective contributions in +# the software. This notice grants no license of any kind, including but not limited to patent +# license, nor is any license granted by implication, estoppel or otherwise. +# +# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making +# contributions. +# +# This software is provided "AS IS", without any express or implied warranties. The software is in the +# development stage. It is intended exclusively for experts who have experience with such software and +# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability +# and fitness for a particular purpose are hereby disclaimed and excluded. +# +# Any dispute, controversy or claim arising under or in relation to providing this software shall be +# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in +# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and +# the United Nations Convention on Contracts on the International Sales of Goods. +# + +import logging +import sys +from itertools import groupby, repeat +from pathlib import Path + +import numpy as np + +from ivas_processing_scripts.audiotools import audio, audioarray, audiofile +from ivas_processing_scripts.audiotools.convert.channelbased import ( + render_cba_to_binaural, +) +from ivas_processing_scripts.audiotools.convert.scenebased import render_sba_to_cba +from ivas_processing_scripts.audiotools.wrappers.bs1770 import loudness_norm +from ivas_processing_scripts.audiotools.wrappers.reverb import ( + reverb_foa, + reverb_hoa2, + reverb_hoa3, +) +from ivas_processing_scripts.generation import config +from ivas_processing_scripts.utils import apply_func_parallel + +SEED_RANDOM_NOISE = 0 + + +# function for searching sequences of same the same character and replacing it by another string +def replace_char_seq_with_string(str, char_seq, repl_str): + result = [] + + # find groups of consecutive letters + groups = ["".join(list(g)) for k, g in groupby(str)] + + # limit the length of the replacement string by the length of the character sequence + repl_str = repl_str[: len(char_seq)] + + # replace each occurence of the sequence of characters + for g in groups: + if char_seq in g: + result.append(repl_str) + else: + result.append(g) + + return "".join(result) + + +# function for appending string to a filename before file extension +def append_str_filename(filename, str_to_append): + p = Path(filename) + # Combine the stem, the string to append, and the suffix + return p.parent / (p.stem + str_to_append + p.suffix) + + +def generate_mc_items( + cfg: config.TestConfig, + logger: logging.Logger, +): + """Generate MC items with metadata from FOA/HOA2/HOA3 and ISMn items based on scene description""" + + # set the fs + if "fs" not in cfg.__dict__: + cfg.fs = 48000 + + # set the listening lab designator + if "listening_lab" not in cfg.__dict__: + cfg.listening_lab = "l" + + # set the language designator + if "language" not in cfg.__dict__: + cfg.language = "EN" + + # set the experiment designator + if "exp" not in cfg.__dict__: + cfg.exp = "p07" + + # set the provider + if "provider" not in cfg.__dict__: + cfg.provider = "g" + + # set the prefix for all input filenames + if "use_input_prefix" not in cfg.__dict__: + cfg.use_input_prefix = "" + else: + # replace file designators + cfg.use_input_prefix = replace_char_seq_with_string( + cfg.use_input_prefix, "l", cfg.listening_lab + ) + cfg.use_input_prefix = replace_char_seq_with_string( + cfg.use_input_prefix, "LL", cfg.language + ) + cfg.use_input_prefix = replace_char_seq_with_string( + cfg.use_input_prefix, "eee", cfg.exp + ) + + # set the prefix for all IR filenames + if "use_IR_prefix" not in cfg.__dict__: + cfg.use_IR_prefix = "" + else: + # replace file designators + cfg.use_IR_prefix = replace_char_seq_with_string( + cfg.use_IR_prefix, "p", cfg.provider + ) + cfg.use_IR_prefix = replace_char_seq_with_string( + cfg.use_IR_prefix, "LL", cfg.language + ) + cfg.use_IR_prefix = replace_char_seq_with_string( + cfg.use_IR_prefix, "eee", cfg.exp + ) + + # set the prefix for all output filenames + if "use_output_prefix" not in cfg.__dict__: + cfg.use_output_prefix = "" + else: + # replace file designators + cfg.use_output_prefix = replace_char_seq_with_string( + cfg.use_output_prefix, "l", cfg.listening_lab + ) + cfg.use_output_prefix = replace_char_seq_with_string( + cfg.use_output_prefix, "eee", cfg.exp + ) + + # set multiprocessing + if "multiprocessing" not in cfg.__dict__: + cfg.multiprocessing = False + + apply_func_parallel( + generate_MC_scene, + zip(cfg.scenes.keys(), cfg.scenes.values(), repeat(cfg), repeat(logger)), + type="mp" if cfg.multiprocessing else None, + show_progress=None, + ) + + return + + +def generate_MC_scene( + scene_name: str, scene: dict, cfg: config.TestConfig, logger: logging.Logger +): + """ + Processes a single scene to generate MC item. + + Args: + scene_name (str): The name of the scene being processed. + scene (dict): A dictionary containing scene description, including source files, azimuth, elevation, and other parameters. + cfg (config.TestConfig): Configuration object containing settings for processing, such as input/output paths, sampling rate, and loudness levels. + logger (logging.Logger): Logger instance for logging information and errors. + + Expected Behavior: + - Reads audio source files and processes them based on the scene description. + - Writes the processed audio and metadata to output files. + - Handles various audio formats (e.g., MONO, FOA, HOA2) and applies transformations like loudness normalization, trimming, and padding. + """ + + scenes = list(cfg.scenes.keys()) + logger.info( + f"Processing scene \"{scene_name}\" ({scenes.index(scene_name) + 1} out of {len(scenes)}), output file: {scene['output']}" + ) + + # extract the number of audio sources + N_inputs = len(np.atleast_1d(scene["input"])) + + # get output filename + output_filename = Path(scene["output"]).parent / ( + cfg.use_output_prefix + Path(scene["output"]).name + ) + + # initialize output dirs + dir_path = output_filename.parent + if dir_path and not dir_path.exists(): + dir_path.mkdir(parents=True, exist_ok=True) + + # initialize output MC object + y = audio.ChannelBasedAudio(cfg.format) + y.fs = cfg.fs + + # set the frame length + frame_len = int(cfg.fs / 50) + + # repeat for all source files + offset = 0 + for i in range(N_inputs): + # parse parameters from the scene description + source_file = ( + scene["input"][i] if isinstance(scene["input"], list) else scene["input"] + ) + IR_file = scene["IR"][i] if isinstance(scene["IR"], list) else scene["IR"] + + # get input filename and IR filename + input_filename = Path(source_file).parent / ( + cfg.use_input_prefix + Path(source_file).name + ) + IR_filename = Path(IR_file).parent / (cfg.use_IR_prefix + Path(IR_file).name) + + # read the shift time in seconds + if "shift" in scene.keys(): + source_shift = ( + scene["shift"][i] + if isinstance(scene["shift"], list) + else scene["shift"] + ) + else: + source_shift = 0.0 + + # convert shift from seconds to samples and ensure it is a multiple of 20ms + source_shift_in_seconds = source_shift + source_shift = source_shift * cfg.fs + if source_shift >= 0: + source_shift = int(np.floor(source_shift / frame_len) * frame_len) + else: + source_shift = int(np.ceil(source_shift / frame_len) * frame_len) + + # read the level + if "level" in scene.keys(): + level = ( + scene["level"][i] + if isinstance(scene["level"], list) + else scene["level"] + ) + else: + level = -26 + + logger.info( + f"-- Convolving {source_file} with {IR_file} at {level} LKFS with shift of {source_shift_in_seconds} seconds" + ) + + # get the number of channels from the IR .wav file header + wav_header = audiofile.parse_wave_header(IR_filename) + IR_channels = wav_header["channels"] + + if IR_channels == 4: + IR_fmt = "FOA" + elif IR_channels == 9: + IR_fmt = "HOA2" + elif IR_channels == 16: + IR_fmt = "HOA3" + else: + logger.error( + f"Error: Input format of the IR source file with {IR_channels} channels is not supported!" + ) + sys.exit(-1) + + # initialize intermediate SBA object (based on the format of the IR file) + y_int = audio.SceneBasedAudio(IR_fmt) + y_int.fs = cfg.fs + + # read source file + x = audio.fromfile("MONO", input_filename) + + # resample to the target fs if necessary + if x.fs != cfg.fs: + logger.warning( + f"Warning: Sample rate of the audio source is {x.fs} Hz and needs to be resampled to {cfg.fs}!" + ) + resampled_audio = audioarray.resample(x.audio, x.fs, cfg.fs) + x.audio = resampled_audio + x.fs = cfg.fs + + # read the IR file + IR = audio.fromfile(IR_fmt, IR_filename) + + # convolve MONO source audio with FOA/HOA2/HOA3 IR -> results in FOA/HOA2/HOA3 audio object + if IR_fmt == "FOA": + x = reverb_foa(x, IR) + elif IR_fmt == "HOA2": + x = reverb_hoa2(x, IR) + elif IR_fmt == "HOA3": + x = reverb_hoa3(x, IR) + + # adjust the level of the FOA/HOA2/HOA3 signal + x.audio, _ = loudness_norm(x, level, loudness_format="STEREO") + + # ensure the length of the audio source signal is a multiple of 20ms + if len(x.audio) % frame_len != 0: + # pad with zeros to ensure that the signal length is a multiple of 20ms + if len(x.audio) % frame_len != 0: + N_pad = int(frame_len - len(x.audio) % frame_len) + x.audio = audioarray.trim( + x.audio, x.fs, limits=[0, -N_pad], samples=True + ) + + # add the convolved FOA/HOA2/HOA3 audio source signal to the intermediate SBA output signal + if y_int.audio is None: + # this is the first SBA source signal + y_int.audio = x.audio.copy() + + if source_shift < 0: + # insert zeros to the first SBA source signal to shift it right + y_int.audio = audioarray.trim( + y_int.audio, y_int.fs, limits=[source_shift, 0], samples=True + ) + else: + offset = source_shift + else: + # shift the beginning of the audio source signal + delta_offset = source_shift - offset + if delta_offset > 0: + # insert zeros to the output SBA signal to shift it right + audioarray.trim( + y_int.audio, y_int.fs, limits=[-delta_offset, 0], samples=True + ) + offset = source_shift + else: + # insert zeros to the new SBA source signal to shift it right + audioarray.trim(x.audio, x.fs, limits=[delta_offset, 0], samples=True) + + # adjust the length of the audio source signal + delta_length = len(x.audio) - len(y_int.audio) + if delta_length > 0: + # pad zeros to the output SBA signal + y_int.audio = audioarray.trim( + y_int.audio, y_int.fs, limits=[0, -delta_length], samples=True + ) + else: + # pad zeros to the new SBA source signal + x.audio = audioarray.trim( + x.audio, x.fs, limits=[0, delta_length], samples=True + ) + + # superimpose + y_int.audio += x.audio + + # append pre-amble and post-amble + if "preamble" in cfg.__dict__ or "postamble" in cfg.__dict__: + preamble = int( + np.floor(cfg.preamble * cfg.fs / frame_len) * frame_len + ) # convert to samples and ensure multiple of 20ms + postamble = int( + np.floor(cfg.postamble * cfg.fs / frame_len) * frame_len + ) # convert to samples and ensure multiple of 20ms + if preamble != 0 or postamble != 0: + logger.info( + f"-- Adding pre-amble of {cfg.preamble} seconds and post-amble of {cfg.postamble} seconds" + ) + y_int.audio = audioarray.trim( + y_int.audio, y_int.fs, limits=[-preamble, -postamble], samples=True + ) + + # adjust the length of the output signal + if "duration" in cfg.__dict__: + # trim the output signal such that the total duration is X seconds + duration = int(cfg.duration * cfg.fs) # convert to samples + else: + # do not change the length of the audio signal + duration = len(y_int.audio) + duration = int( + np.floor(duration / frame_len) * frame_len + ) # ensure multiple of 20ms + if len(y_int.audio) != duration: + y_int.audio = audioarray.trim( + y_int.audio, y_int.fs, limits=[0, len(y_int.audio) - duration], samples=True + ) + + # adjust the loudness of the output signal + if "loudness" in cfg.__dict__: + logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LKFS") + y_int.audio, _ = loudness_norm(y_int, cfg.loudness, loudness_format="BINAURAL") + + # add background noise in FOA/HOA2/HOA3 format + if "background" in scene.keys(): + # check if [] are used in the background noise file name + if isinstance(scene["background"], list): + # if so, use the first element + background_filename = scene["background"][0] + else: + background_filename = scene["background"] + + # read the background noise file + background_filename = Path(scene["background"]).parent / ( + cfg.use_input_prefix + Path(scene["background"]).name + ) + logger.info(f"-- Adding background noise from {background_filename}") + background = audio.fromfile(IR_fmt, background_filename) + + # resample to the target fs if necessary + if background.fs != cfg.fs: + logger.warning( + f"Warning: Sample rate of the background noise is {background.fs} Hz and needs to be resampled to {cfg.fs}!" + ) + resampled_audio = audioarray.resample( + background.audio, background.fs, cfg.fs + ) + background.audio = resampled_audio + background.fs = cfg.fs + + # adjust the length of the background noise signal + if len(background.audio) != len(y_int.audio): + background.audio = audioarray.trim( + background.audio, + background.fs, + limits=[0, len(background.audio) - len(y_int.audio)], + samples=True, + ) + + # adjust the loudness of the background noise signal + if "background_level" in scene.keys(): + logger.info( + f"-- Rescaling background noise to target loudness: {scene['background_level']} LKFS" + ) + + # check if [] are used in the background level + if isinstance(scene["background_level"], list): + # if so, use the first element + scene["background_level"] = scene["background_level"][0] + + # convert to float if the background level was entered in string format + if not isinstance(scene["background_level"], (int, float)): + scene["background_level"] = float(scene["background_level"]) + else: + logger.warning( + "-- Warning: No target loudness for background noise specified, using default value of -26 LKFS" + ) + scene["background_level"] = -26 + background.audio, _ = loudness_norm( + background, scene["background_level"], loudness_format="STEREO", rms=True + ) + + # add the background noise to the output signal + y_int.audio += background.audio + elif ( + "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise + ): + # create uniformly distributed noise between -4 and 4 + np.random.seed(SEED_RANDOM_NOISE) + noise = np.random.randint(low=-4, high=5, size=y_int.audio.shape).astype( + "float" + ) + y_int.audio += noise + + # apply fade-in and fade-out + if "fade_in_out" in cfg.__dict__ and cfg.fade_in_out > 0: + logger.info(f"-- Applying fade-in and fade-out with {cfg.fade_in_out} seconds") + y_int.audio = audioarray.window(y_int.audio, y_int.fs, cfg.fade_in_out * 1000) + + # convert the intermediate SBA output signal to MC format + render_sba_to_cba(y_int, y) + + # write the MC audio signal to the output file + audiofile.write(output_filename, y.audio, y.fs) + + # convert the MC audio signal to BINAURAL, if option was chosen + if cfg.binaural_output: + binaural_output_filename = output_filename.with_name( + output_filename.stem + "_BINAURAL" + output_filename.suffix + ) + logger.info( + f"-- Converting to BINAURAL output file: {binaural_output_filename}" + ) + binaudio = audio.fromtype("BINAURAL") + binaudio.fs = y.fs + render_cba_to_binaural(y, binaudio) + audiofile.write( + binaural_output_filename, + binaudio.audio, + binaudio.fs, + )