diff --git a/ivas_processing_scripts/audiotools/wrappers/reverb.py b/ivas_processing_scripts/audiotools/wrappers/reverb.py index bfbe39eb8ce696008a73069128fb1358add6f062..f8fcfaa529b47c1e51b6b4ba3cb2cbfa8534a232 100644 --- a/ivas_processing_scripts/audiotools/wrappers/reverb.py +++ b/ivas_processing_scripts/audiotools/wrappers/reverb.py @@ -50,6 +50,7 @@ def reverb( input: Audio, IR: Audio, align: Optional[float] = None, + mode: Optional[str] = None, ) -> Audio: """ Wrapper for the ITU-T reverb binary to convolve mono audio signal with an impulse response @@ -63,6 +64,9 @@ def reverb( Impulse response align: float multiplicative factor to apply to the reverberated sound in order to align its energy level with a second filePath to the output file + mode: str, optional + Mode of operation, None - no operation on the output, "same" centers the output signals by trimming left and right edge effects equally + "trim_left" - trims the left edge, "trim_right" - trims the right edge. Returns ------- @@ -122,15 +126,25 @@ def reverb( tmp_output_file = tmp_dir.joinpath("tmp_reverbOut.pcm") cmd.extend([tmp_input_file, tmp_IR_file, tmp_output_file]) - # run the 'reverb' command + # run the 'reverb' command (automatically prepends N zeros to the input signal to keep the output length the same as the input) run(cmd) # read the reverberated output file output = copy(tmp_input) output.audio, _ = read(tmp_output_file, nchannels=1, fs=tmp_input.fs) - # remove trailing part (to ensure that the length of the output is the same as the input) - output.audio = output.audio[: -(IR.audio.shape[0] - 1), :] + # trim the output + if mode == "same": + # center the output by trimming left and right edge effects equally + output.audio = output.audio[ + (IR.audio.shape[0] - 1) // 2 : -(IR.audio.shape[0] - 1) // 2, : + ] + elif mode == "trim_left": + # trim the left edge + output.audio = output.audio[(IR.audio.shape[0] - 1) :, :] + elif mode == "trim_right": + # trim the right edge + output.audio = output.audio[: -(IR.audio.shape[0] - 1), :] if old_fs: output.audio = resample_itu(output, old_fs) @@ -143,6 +157,7 @@ def reverb_stereo( input: Audio, stereo_IR: Audio, align: Optional[float] = None, + mode: Optional[str] = None, ) -> Audio: """ Wrapper for the ITU-T reverb binary to convolve mono audio signal with a stereo impulse response @@ -155,6 +170,9 @@ def reverb_stereo( Impulse response align: float multiplicative factor to apply to the reverberated sound in order to align its energy level with the second file + mode: str, optional + Mode of operation, None - no operation on the output, "same" centers the output signals by trimming left and right edge effects equally + "trim_left" - trims the left edge, "trim_right" - trims the right edge. Returns ------- @@ -182,8 +200,8 @@ def reverb_stereo( align = 1.0 / np.max(np.abs(H)) # convolve mono input with left and right IR - y_left = reverb(input, IR_left, align=align) - y_right = reverb(input, IR_right, align=align) + y_left = reverb(input, IR_left, align=align, mode=mode) + y_right = reverb(input, IR_right, align=align, mode=mode) # combine into stereo output y = audio.fromtype("STEREO") @@ -197,6 +215,7 @@ def reverb_foa( input: Audio, foa_IR: Audio, align: Optional[float] = None, + mode: Optional[str] = None, ) -> Audio: """ Convolve mono audio signal with an FOA impulse response @@ -209,6 +228,9 @@ def reverb_foa( FOA impulse response align: float multiplicative factor to apply to the reverberated sound in order to align its energy level with the second file + mode: str, optional + Mode of operation, None - no operation on the output, "same" centers the output signals by trimming left and right edge effects equally + "trim_left" - trims the left edge, "trim_right" - trims the right edge. Returns ------- @@ -246,10 +268,10 @@ def reverb_foa( align = 1.0 / np.max(np.abs(H)) # convolve mono input with FOA IR - y_w = reverb(input, IR_w, align=align) - y_x = reverb(input, IR_x, align=align) - y_y = reverb(input, IR_y, align=align) - y_z = reverb(input, IR_z, align=align) + y_w = reverb(input, IR_w, align=align, mode=mode) + y_x = reverb(input, IR_x, align=align, mode=mode) + y_y = reverb(input, IR_y, align=align, mode=mode) + y_z = reverb(input, IR_z, align=align, mode=mode) # combine into FOA output y = audio.fromtype("FOA") diff --git a/ivas_processing_scripts/generation/generate_masa_items.py b/ivas_processing_scripts/generation/generate_masa_items.py index 9d71d7c4d2ce6662bb7634fac101cd998e41e23f..57ab3cbd74c702a225796fc9ee69e8f88f704418 100644 --- a/ivas_processing_scripts/generation/generate_masa_items.py +++ b/ivas_processing_scripts/generation/generate_masa_items.py @@ -292,11 +292,11 @@ def generate_MASA_scene( # convolve MONO source audio with FOA/HOA2/HOA3 IR -> results in FOA/HOA2/HOA3 audio object if IR_fmt == "FOA": - x = reverb_foa(x, IR) + x = reverb_foa(x, IR, mode=None) elif IR_fmt == "HOA2": - x = reverb_hoa2(x, IR) + x = reverb_hoa2(x, IR, mode=None) elif IR_fmt == "HOA3": - x = reverb_hoa3(x, IR) + x = reverb_hoa3(x, IR, mode=None) # adjust the level of the FOA/HOA2/HOA3 signal x.audio, _ = loudness_norm(x, level, loudness_format="STEREO") diff --git a/ivas_processing_scripts/generation/generate_mc_items.py b/ivas_processing_scripts/generation/generate_mc_items.py index 2b2da206b3b832ca821f6a2bb3ed0bad6729ff23..1b627e73f203862179d48fc896c22700cbd2871d 100644 --- a/ivas_processing_scripts/generation/generate_mc_items.py +++ b/ivas_processing_scripts/generation/generate_mc_items.py @@ -292,11 +292,11 @@ def generate_MC_scene( # convolve MONO source audio with FOA/HOA2/HOA3 IR -> results in FOA/HOA2/HOA3 audio object if IR_fmt == "FOA": - x = reverb_foa(x, IR) + x = reverb_foa(x, IR, mode=None) elif IR_fmt == "HOA2": - x = reverb_hoa2(x, IR) + x = reverb_hoa2(x, IR, mode=None) elif IR_fmt == "HOA3": - x = reverb_hoa3(x, IR) + x = reverb_hoa3(x, IR, mode=None) # adjust the level of the FOA/HOA2/HOA3 signal x.audio, _ = loudness_norm(x, level, loudness_format="STEREO") diff --git a/ivas_processing_scripts/generation/generate_sba_items.py b/ivas_processing_scripts/generation/generate_sba_items.py index 96b542d22e0ffec8d9ec7eb0e5a005c1da55abce..feadf510bd09f2a1ca00c74dbd4ee033c5c3fea4 100644 --- a/ivas_processing_scripts/generation/generate_sba_items.py +++ b/ivas_processing_scripts/generation/generate_sba_items.py @@ -262,11 +262,11 @@ def generate_sba_scene( # convolve MONO source audio with FOA/HOA2/HOA3 IR -> results in FOA/HOA2/HOA3 audio object if cfg.format == "FOA": - x = reverb_foa(x, IR) + x = reverb_foa(x, IR, mode=None) elif cfg.format == "HOA2": - x = reverb_hoa2(x, IR) + x = reverb_hoa2(x, IR, mode=None) elif cfg.format == "HOA3": - x = reverb_hoa3(x, IR) + x = reverb_hoa3(x, IR, mode=None) # adjust the level of the FOA/HOA2/HOA3 signal x.audio, _ = loudness_norm(x, level, loudness_format="STEREO") diff --git a/ivas_processing_scripts/generation/generate_stereo_items.py b/ivas_processing_scripts/generation/generate_stereo_items.py index bea865de2a294e07bb65548f2b1b692a54ff5893..2cd80fe58979e7841f02c8a34923389516031908 100644 --- a/ivas_processing_scripts/generation/generate_stereo_items.py +++ b/ivas_processing_scripts/generation/generate_stereo_items.py @@ -267,7 +267,7 @@ def generate_stereo_scene( IR = audio.fromfile("STEREO", IR_filename) # convolve MONO source audio with STEREO IR -> results in STEREO audio object - x = reverb_stereo(x, IR) + x = reverb_stereo(x, IR, mode=None) # adjust the level of the STEREO signal x.audio, _ = loudness_norm(x, level, loudness_format="STEREO") diff --git a/ivas_processing_scripts/generation/process_ambi_items.py b/ivas_processing_scripts/generation/process_ambi_items.py deleted file mode 100644 index f2b8982e15c9fdd81cc41f3add3ad9133f64684c..0000000000000000000000000000000000000000 --- a/ivas_processing_scripts/generation/process_ambi_items.py +++ /dev/null @@ -1,353 +0,0 @@ -#!/usr/bin/env python3 - -# -# (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, -# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., -# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, -# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other -# contributors to this repository. All Rights Reserved. -# -# This software is protected by copyright law and by international treaties. -# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, -# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., -# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, -# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other -# contributors to this repository retain full ownership rights in their respective contributions in -# the software. This notice grants no license of any kind, including but not limited to patent -# license, nor is any license granted by implication, estoppel or otherwise. -# -# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making -# contributions. -# -# This software is provided "AS IS", without any express or implied warranties. The software is in the -# development stage. It is intended exclusively for experts who have experience with such software and -# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability -# and fitness for a particular purpose are hereby disclaimed and excluded. -# -# Any dispute, controversy or claim arising under or in relation to providing this software shall be -# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in -# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and -# the United Nations Convention on Contracts on the International Sales of Goods. -# - -import logging -import os -from itertools import groupby, repeat -from math import floor - -import numpy as np - -from ivas_processing_scripts.audiotools import audio, audiofile, convert -from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness -from ivas_processing_scripts.audiotools.wrappers.reverb import reverb_foa, reverb_hoa2 -from ivas_processing_scripts.generation import config -from ivas_processing_scripts.utils import apply_func_parallel - -SEED_RANDOM_NOISE = 0 - - -# function for converting nd numpy array to strings with 2 decimal digits -def csv_formatdata(data): - for row in data: - yield ["%0.2f" % v for v in row] - - -# function for searching sequences of same the same character and replacing it by another string -def replace_char_seq_with_string(str, char_seq, repl_str): - result = [] - - # find groups of consecutive letters - groups = ["".join(list(g)) for k, g in groupby(str)] - - # limit the length of the replacement string by the length of the character sequence - repl_str = repl_str[: len(char_seq)] - - # replace each occurence of the sequence of characters - for g in groups: - if char_seq in g: - result.append(repl_str) - else: - result.append(g) - - return "".join(result) - - -def generate_ambi_items( - cfg: config.TestConfig, - logger: logging.Logger, -): - """Generate FOA/HOA2 items from mono items based on scene description""" - - # set the target level - if "loudness" not in cfg.__dict__: - cfg.loudness = -26 - - # set the fs - if "fs" not in cfg.__dict__: - cfg.fs = 48000 - - # set the IR fs - if "IR_fs" not in cfg.__dict__: - cfg.IR_fs = 48000 - - # set the pre-amble and post-amble - if "preamble" not in cfg.__dict__: - cfg.preamble = 0.0 - - if "postamble" not in cfg.__dict__: - cfg.postamble = 0.0 - - # set the IR path - if "IR_path" not in cfg.__dict__: - cfg.IR_path = os.path.join(os.path.dirname(__file__), "IR") - - # set the pre-amble and post-amble - if "add_low_level_random_noise" not in cfg.__dict__: - cfg.add_low_level_random_noise = False - - # setup binaural rendering - if "binaural_path" not in cfg.__dict__: - cfg.binaural_path = "" - - # set the listening lab designator - if "listening_lab" not in cfg.__dict__: - cfg.listening_lab = "l" - - # set the language designator - if "language" not in cfg.__dict__: - cfg.language = "EN" - - # set the experiment designator - if "exp" not in cfg.__dict__: - cfg.exp = "p04" - - # set the provider - if "provider" not in cfg.__dict__: - cfg.provider = "g" - - # set the prefix for all input filenames - if "use_input_prefix" not in cfg.__dict__: - cfg.use_input_prefix = "" - else: - # replace file designators - cfg.use_input_prefix = replace_char_seq_with_string( - cfg.use_input_prefix, "l", cfg.listening_lab - ) - cfg.use_input_prefix = replace_char_seq_with_string( - cfg.use_input_prefix, "LL", cfg.language - ) - cfg.use_input_prefix = replace_char_seq_with_string( - cfg.use_input_prefix, "eee", cfg.exp - ) - - # set the prefix for all IR filenames - if "use_IR_prefix" not in cfg.__dict__: - cfg.use_IR_prefix = "" - else: - # replace file designators - cfg.use_IR_prefix = replace_char_seq_with_string( - cfg.use_IR_prefix, "p", cfg.provider - ) - cfg.use_IR_prefix = replace_char_seq_with_string( - cfg.use_IR_prefix, "LL", cfg.language - ) - cfg.use_IR_prefix = replace_char_seq_with_string( - cfg.use_IR_prefix, "eee", cfg.exp - ) - - # set the prefix for all output filenames - if "use_output_prefix" not in cfg.__dict__: - cfg.use_output_prefix = None - else: - # replace file designators - cfg.use_output_prefix = replace_char_seq_with_string( - cfg.use_output_prefix, "l", cfg.listening_lab - ) - cfg.use_output_prefix = replace_char_seq_with_string( - cfg.use_output_prefix, "eee", cfg.exp - ) - - # set multiprocessing - if "multiprocessing" not in cfg.__dict__: - cfg.multiprocessing = True - - apply_func_parallel( - generate_ambi_scene, - zip(cfg.scenes.keys(), cfg.scenes.values(), repeat(cfg), repeat(logger)), - None, - "mp" if cfg.multiprocessing else None, - ) - - return - - -def generate_ambi_scene( - scene_name: str, scene: dict, cfg: config.TestConfig, logger: logging.Logger -): - logger.info( - f"Processing scene: {scene_name} out of {len(cfg.scenes)} scenes, name: {scene['name']}" - ) - - # extract the number of audio sources - N_sources = len(np.atleast_1d(scene["source"])) - - # read the shift time in seconds - if "overlap" in scene.keys(): - source_overlap = float(scene["overlap"]) - else: - source_overlap = 0.0 - - # read the ambi format - if "format" in scene.keys(): - ambi_format = scene["format"] - else: - ambi_format = "FOA" - - len_s1 = 0 - y = audio.SceneBasedAudio(ambi_format) - for i in range(N_sources): - # parse parameters from the scene description - source_file = np.atleast_1d(scene["source"])[i] - IR_file = np.atleast_1d(scene["IR"])[i] - - logger.info(f"Convolving {source_file} with {IR_file}") - - # read source file - x = audio.fromfile( - "MONO", - os.path.join( - cfg.input_path, - os.path.dirname(source_file), - cfg.use_input_prefix + os.path.basename(source_file), - ), - fs=cfg.fs, - ) - - # read the IR file - IR = audio.fromfile( - ambi_format, - os.path.join( - cfg.IR_path, - os.path.dirname(IR_file), - cfg.use_IR_prefix + os.path.basename(IR_file), - ), - fs=cfg.IR_fs, - ) - - if i == 0: - len_s1 = x.audio.shape[0] - - # convolve with the FOA/HOA2 IR - if ambi_format == "FOA": - x = reverb_foa(x, IR) - elif ambi_format == "HOA2": - x = reverb_hoa2(x, IR) - - # adjust the level of the foa signal - _, scale_factor, _ = get_loudness(x, cfg.loudness, "BINAURAL") - x.audio *= scale_factor - - # shift the second (and all other) source files (positive shift creates overlap, negative shift creates a gap) - if i > 0: - # get the length of the first source file - N_delay = len_s1 - - # add the shift - N_delay += int(-source_overlap * x.fs) - - # insert all-zero preamble - pre = np.zeros((N_delay, x.audio.shape[1])) - x.audio = np.concatenate([pre, x.audio]) - - # pad with zeros to ensure that the signal length is a multiple of 20ms - N_frame = x.fs / 50 - if len(x.audio) % N_frame != 0: - N_pad = int(N_frame - len(x.audio) % N_frame) - - # insert all-zero preamble - pre = np.zeros((N_pad, x.audio.shape[1])) - x.audio = np.concatenate([pre, x.audio]) - - # add source signal to the array of source signals - y.fs = x.fs - if y.audio is None: - y.audio = x.audio.copy() - else: - # pad with zeros to have equal length of all source signals - if x.audio.shape[0] > y.audio.shape[0]: - y.audio = np.vstack( - ( - y.audio, - np.zeros( - ( - x.audio.shape[0] - y.audio.shape[0], - y.audio.shape[1], - ) - ), - ) - ) - elif y.audio.shape[0] > x.audio.shape[0]: - x.audio = np.vstack( - ( - x.audio, - np.zeros( - ( - y.audio.shape[0] - x.audio.shape[0], - x.audio.shape[1], - ) - ), - ) - ) - - # superimpose - y.audio += x.audio - - # append pre-amble and post-amble to all sources - if cfg.preamble != 0.0: - # ensure that pre-amble is a multiple of 20ms - N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs) - - # insert all-zero preamble to all sources - pre = np.zeros((N_pre, y.audio.shape[1])) - y.audio = np.concatenate([pre, y.audio]) - - if cfg.postamble != 0.0: - # ensure that post-mable is a multiple of 20ms - N_post = int(floor(cfg.postamble * 50) / 50 * y.fs) - - # append all-zero postamble to all sources - post = np.zeros((N_post, y.audio.shape[1])) - y.audio = np.concatenate([y.audio, post]) - - # add random noise - if cfg.add_low_level_random_noise: - # create uniformly distributed noise between -4 and 4 - np.random.seed(SEED_RANDOM_NOISE) - noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") - - # superimpose - y.audio += noise - - # write the reverberated audio into output file - audiofile.write( - os.path.join( - cfg.output_path, - os.path.dirname(scene["name"]), - cfg.use_output_prefix + os.path.basename(scene["name"]), - ), - y.audio, - y.fs, - ) - - # convert to binaural if option chosen - if cfg.binaural_path != "": - binaudio = audio.fromtype("BINAURAL") - binaudio.fs = y.fs - convert.format_conversion(y, binaudio) - audiofile.write( - os.path.join(cfg.binaural_path, scene["name"]), - binaudio.audio, - binaudio.fs, - ) - - return diff --git a/ivas_processing_scripts/generation/process_ism1_items.py b/ivas_processing_scripts/generation/process_ism1_items.py deleted file mode 100644 index 2177f09bc8227e860544f0e659e9b1a7d6e7203a..0000000000000000000000000000000000000000 --- a/ivas_processing_scripts/generation/process_ism1_items.py +++ /dev/null @@ -1,356 +0,0 @@ -#!/usr/bin/env python3 - -# -# (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, -# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., -# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, -# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other -# contributors to this repository. All Rights Reserved. -# -# This software is protected by copyright law and by international treaties. -# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, -# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., -# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, -# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other -# contributors to this repository retain full ownership rights in their respective contributions in -# the software. This notice grants no license of any kind, including but not limited to patent -# license, nor is any license granted by implication, estoppel or otherwise. -# -# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making -# contributions. -# -# This software is provided "AS IS", without any express or implied warranties. The software is in the -# development stage. It is intended exclusively for experts who have experience with such software and -# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability -# and fitness for a particular purpose are hereby disclaimed and excluded. -# -# Any dispute, controversy or claim arising under or in relation to providing this software shall be -# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in -# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and -# the United Nations Convention on Contracts on the International Sales of Goods. -# - -import csv -import logging -import os -from itertools import groupby, repeat -from math import floor - -import numpy as np - -from ivas_processing_scripts.audiotools import audio, audiofile -from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness -from ivas_processing_scripts.generation import config -from ivas_processing_scripts.utils import apply_func_parallel - -SEED_RANDOM_NOISE = 0 - - -# function for converting nd numpy array to strings with 2 decimal digits -def csv_formatdata(data): - for row in data: - yield ["%0.2f" % v for v in row] - - -# function for searching sequences of same the same character and replacing it by another string -def replace_char_seq_with_string(str, char_seq, repl_str): - result = [] - - # find groups of consecutive letters - groups = ["".join(list(g)) for k, g in groupby(str)] - - # limit the length of the replacement string by the length of the character sequence - repl_str = repl_str[: len(char_seq)] - - # replace each occurence of the sequence of characters - for g in groups: - if char_seq in g: - result.append(repl_str) - else: - result.append(g) - - return "".join(result) - - -def generate_ism1_items( - cfg: config.TestConfig, - logger: logging.Logger, -): - """Generate ISM2 items with metadata from mono items based on scene description""" - - # set the target level - if "loudness" not in cfg.__dict__: - cfg.loudness = -26 - - # set the fs - if "fs" not in cfg.__dict__: - cfg.fs = 48000 - - # set the pre-amble and post-amble - if "preamble" not in cfg.__dict__: - cfg.preamble = 0.0 - - if "postamble" not in cfg.__dict__: - cfg.postamble = 0.0 - - # set the pre-amble and post-amble - if "add_low_level_random_noise" not in cfg.__dict__: - cfg.add_low_level_random_noise = False - - # set the listening lab designator - if "listening_lab" not in cfg.__dict__: - cfg.listening_lab = "l" - - # set the language designator - if "language" not in cfg.__dict__: - cfg.language = "EN" - - # set the experiment designator - if "exp" not in cfg.__dict__: - cfg.exp = "p06" - - # set the provider - if "provider" not in cfg.__dict__: - cfg.provider = "g" - - # set the prefix for all input filenames - if "use_input_prefix" not in cfg.__dict__: - cfg.use_input_prefix = "" - else: - # replace file designators - cfg.use_input_prefix = replace_char_seq_with_string( - cfg.use_input_prefix, "l", cfg.listening_lab - ) - cfg.use_input_prefix = replace_char_seq_with_string( - cfg.use_input_prefix, "LL", cfg.language - ) - cfg.use_input_prefix = replace_char_seq_with_string( - cfg.use_input_prefix, "eee", cfg.exp - ) - - # set the prefix for all output filenames - if "use_output_prefix" not in cfg.__dict__: - cfg.use_output_prefix = None - else: - # replace file designators - cfg.use_output_prefix = replace_char_seq_with_string( - cfg.use_output_prefix, "l", cfg.listening_lab - ) - cfg.use_output_prefix = replace_char_seq_with_string( - cfg.use_output_prefix, "eee", cfg.exp - ) - - # set multiprocessing - if "multiprocessing" not in cfg.__dict__: - cfg.multiprocessing = True - - apply_func_parallel( - generate_ism1_scene, - zip(cfg.scenes.keys(), cfg.scenes.values(), repeat(cfg), repeat(logger)), - None, - "mp" if cfg.multiprocessing else None, - ) - - return - - -def generate_ism1_scene( - scene_name: str, scene: dict, cfg: config.TestConfig, logger: logging.Logger -): - logger.info( - f"Processing {scene_name} out of {len(cfg.scenes)} scenes, name: {scene['name']}" - ) - - # extract the number of audio sources - N_sources = len(np.atleast_1d(scene["source"])) - - # initialize output arrays - y = audio.ChannelBasedAudio("MONO") - y_meta = None - - # read the shift time in seconds - if "overlap" in scene.keys(): - source_overlap = float(scene["overlap"]) - else: - source_overlap = 0.0 - - logger.info( - f"Encoding {scene['source']} at position(s) {scene['azimuth']},{scene['elevation']}" - ) - - # repeat for all source files - for i in range(N_sources): - # parse parameters from the scene description - source_file = ( - scene["source"][i] if isinstance(scene["source"], list) else scene["source"] - ) - - # read source file - x = audio.fromfile( - "MONO", - os.path.join( - cfg.input_path, - os.path.dirname(source_file), - cfg.use_input_prefix + os.path.basename(source_file), - ), - fs=cfg.fs, - ) - - # get the number of frames (multiple of 20ms) - N_frames = int(len(x.audio) / x.fs * 50) - frame_len = int(x.fs / 50) - - # trim the samples from the end to ensure that the signal length is a multiple of 20ms - x.audio = x.audio[: N_frames * frame_len] - - # adjust the level of the source file - _, scale_factor, _ = get_loudness(x, cfg.loudness, "MONO") - x.audio *= scale_factor - - # shift the second (and all other) source files (positive shift creates overlap, negative shift creates a gap) - if i > 0: - # get the length of the first source file - N_delay = len(y.audio) - - # add the shift value (ensure that the shift is a multiple of 20ms) - N_delay += int(floor(-source_overlap * 50) / 50 * x.fs) - - # insert all-zero signal - pre = np.zeros((N_delay, 1)) - x.audio = np.concatenate([pre, x.audio]) - - # pad with zeros to ensure that the signal length is a multiple of 20ms - if len(x.audio) % frame_len != 0: - # pad the source signal - N_pad = int(frame_len - len(x.audio) % frame_len) - post = np.zeros((N_pad, 1)) - x.audio = np.concatenate([x.audio, post]) - - # superimpose all source signals together - y.fs = x.fs - if y.audio is None: - y.audio = x.audio.copy() - else: - y.audio.resize(x.audio.shape, refcheck=False) - y.audio += x.audio - - # append pre-amble and post-amble to all sources - if cfg.preamble != 0.0: - # ensure that pre-amble is a multiple of 20ms - N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs) - - # insert all-zero preamble to all sources - pre = np.zeros((N_pre, y.audio.shape[1])) - y.audio = np.concatenate([pre, y.audio]) - - if cfg.postamble != 0.0: - # ensure that post-amble is a multiple of 20ms - N_post = int(floor(cfg.postamble * 50) / 50 * y.fs) - - # append all-zero postamble to all sources - post = np.zeros((N_post, y.audio.shape[1])) - y.audio = np.concatenate([y.audio, post]) - - # add random noise - if cfg.add_low_level_random_noise: - # create uniformly distributed noise between -4 and 4 - np.random.seed(SEED_RANDOM_NOISE) - noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") - - # superimpose - y.audio += noise - - # process azimuth and elevation - source_azi = scene["azimuth"] - source_ele = scene["elevation"] - - N_frames = int(len(y.audio) / y.fs * 50) - - # read azimuth information and convert to an array - if isinstance(source_azi, str): - if ":" in source_azi: - # start with the initial azimuth value and apply step N_frames times - source_azi = source_azi.split(":") - azi = np.arange( - float(eval(source_azi[0])), - float(eval(source_azi[0])) + N_frames * float(eval(source_azi[1])), - float(eval(source_azi[1])), - ) - else: - # replicate static azimuth value N_frames times - azi = np.repeat(float(eval(source_azi)), N_frames) - else: - # replicate static azimuth value N_frames times - azi = np.repeat(float(source_azi), N_frames) - - # convert azimuth from 0 .. 360 to -180 .. +180 - azi = (azi + 180) % 360 - 180 - - # check, if azimuth is from -180 .. +180 - if any(azi > 180) or any(azi < -180): - logger.error( - f"Incorrect value(s) of azimuth: {azi[(azi > 180) | (azi < -180)]}" - ) - - # read elevation information and convert to an array - if isinstance(source_ele, str): - if ":" in source_ele: - # convert into array (initial_value:step:stop_value) - # note: the stop_value value is +-90 degrees depending on the sign of the step - source_ele = source_ele.split(":") - ele = np.arange( - float(eval(source_ele[0])), - np.sign(float(eval(source_ele[1]))) * 90, - float(eval(source_ele[1])), - )[:N_frames] - - # repeat the last elevation value, if array is shorter than N_frames - if len(ele) < N_frames: - ele = np.append(ele, np.full(N_frames - len(ele), ele[-1])) - else: - # replicate static elevation value N_frames times - ele = np.repeat(float(eval(source_ele)), N_frames) - else: - # replicate static elevation value N_frames times - ele = np.repeat(float(source_ele), N_frames) - - # check if elevation is from -90 .. +90 - if any(ele > 90) or any(ele < -90): - logger.error( - f"Incorrect value(s) of elevation: {ele[(ele > 90) | (ele < -90)]}" - ) - - # arrange all metadata fields column-wise into a matrix - y_meta = np.column_stack((azi, ele)) - - # write ISM audio stream to the output file - audiofile.write( - os.path.join( - cfg.output_path, - os.path.dirname(scene["name"]), - cfg.use_output_prefix + os.path.basename(scene["name"]), - ), - y.audio, - y.fs, - ) - - # write ISM metadata to the output file in .0.csv format - csv_filename = os.path.join( - cfg.output_path, - os.path.dirname(scene["name"]), - cfg.use_output_prefix + os.path.basename(scene["name"]) + ".0.csv", - ) - - with open( - csv_filename, - "w", - newline="", - encoding="utf-8", - ) as f: - # create csv writer - writer = csv.writer(f) - - # write all rows to the .csv file - writer.writerows(csv_formatdata(y_meta)) - - return diff --git a/ivas_processing_scripts/generation/process_ism2_items.py b/ivas_processing_scripts/generation/process_ism2_items.py deleted file mode 100644 index 83bd59e4f6fd009889c888b84fa08341bf109581..0000000000000000000000000000000000000000 --- a/ivas_processing_scripts/generation/process_ism2_items.py +++ /dev/null @@ -1,400 +0,0 @@ -#!/usr/bin/env python3 - -# -# (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, -# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., -# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, -# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other -# contributors to this repository. All Rights Reserved. -# -# This software is protected by copyright law and by international treaties. -# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, -# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., -# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, -# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other -# contributors to this repository retain full ownership rights in their respective contributions in -# the software. This notice grants no license of any kind, including but not limited to patent -# license, nor is any license granted by implication, estoppel or otherwise. -# -# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making -# contributions. -# -# This software is provided "AS IS", without any express or implied warranties. The software is in the -# development stage. It is intended exclusively for experts who have experience with such software and -# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability -# and fitness for a particular purpose are hereby disclaimed and excluded. -# -# Any dispute, controversy or claim arising under or in relation to providing this software shall be -# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in -# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and -# the United Nations Convention on Contracts on the International Sales of Goods. -# - -import csv -import logging -import os -from itertools import groupby, repeat -from math import floor - -import numpy as np - -from ivas_processing_scripts.audiotools import audio, audiofile -from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness -from ivas_processing_scripts.generation import config -from ivas_processing_scripts.utils import apply_func_parallel - -SEED_RANDOM_NOISE = 0 - - -# function for converting nd numpy array to strings with 2 decimal digits -def csv_formatdata(data): - for row in data: - yield ["%0.2f" % v for v in row] - - -# function for searching sequences of same the same character and replacing it by another string -def replace_char_seq_with_string(str, char_seq, repl_str): - result = [] - - # find groups of consecutive letters - groups = ["".join(list(g)) for k, g in groupby(str)] - - # limit the length of the replacement string by the length of the character sequence - repl_str = repl_str[: len(char_seq)] - - # replace each occurence of the sequence of characters - for g in groups: - if char_seq in g: - result.append(repl_str) - else: - result.append(g) - - return "".join(result) - - -def generate_ism2_items( - cfg: config.TestConfig, - logger: logging.Logger, -): - """Generate ISM2 items with metadata from mono items based on scene description""" - - # set the target level - if "loudness" not in cfg.__dict__: - cfg.loudness = -26 - - # set the fs - if "fs" not in cfg.__dict__: - cfg.fs = 48000 - - # set the pre-amble and post-amble - if "preamble" not in cfg.__dict__: - cfg.preamble = 0.0 - - if "postamble" not in cfg.__dict__: - cfg.postamble = 0.0 - - # set the pre-amble and post-amble - if "add_low_level_random_noise" not in cfg.__dict__: - cfg.add_low_level_random_noise = False - - # set the listening lab designator - if "listening_lab" not in cfg.__dict__: - cfg.listening_lab = "l" - - # set the language designator - if "language" not in cfg.__dict__: - cfg.language = "EN" - - # set the experiment designator - if "exp" not in cfg.__dict__: - cfg.exp = "p07" - - # set the provider - if "provider" not in cfg.__dict__: - cfg.provider = "g" - - # set the prefix for all input filenames - if "use_input_prefix" not in cfg.__dict__: - cfg.use_input_prefix = "" - else: - # replace file designators - cfg.use_input_prefix = replace_char_seq_with_string( - cfg.use_input_prefix, "l", cfg.listening_lab - ) - cfg.use_input_prefix = replace_char_seq_with_string( - cfg.use_input_prefix, "LL", cfg.language - ) - cfg.use_input_prefix = replace_char_seq_with_string( - cfg.use_input_prefix, "eee", cfg.exp - ) - - # set the prefix for all output filenames - if "use_output_prefix" not in cfg.__dict__: - cfg.use_output_prefix = None - else: - # replace file designators - cfg.use_output_prefix = replace_char_seq_with_string( - cfg.use_output_prefix, "l", cfg.listening_lab - ) - cfg.use_output_prefix = replace_char_seq_with_string( - cfg.use_output_prefix, "eee", cfg.exp - ) - - # set multiprocessing - if "multiprocessing" not in cfg.__dict__: - cfg.multiprocessing = True - - apply_func_parallel( - generate_ism2_scene, - zip(cfg.scenes.keys(), cfg.scenes.values(), repeat(cfg), repeat(logger)), - None, - "mp" if cfg.multiprocessing else None, - ) - - return - - -def generate_ism2_scene( - scene_name: str, scene: dict, cfg: config.TestConfig, logger: logging.Logger -): - logger.info( - f"Processing {scene_name} out of {len(cfg.scenes)} scenes, name: {scene['name']}" - ) - - # extract the number of audio sources - N_sources = len(np.atleast_1d(scene["source"])) - - # initialize output arrays - y = audio.ChannelBasedAudio("STEREO") - y_meta = None - - # read the shift time in seconds - if "overlap" in scene.keys(): - source_overlap = float(scene["overlap"]) - else: - source_overlap = 0.0 - - # repeat for all source files - for i in range(N_sources): - # parse parameters from the scene description - source_file = ( - scene["source"][i] if isinstance(scene["source"], list) else scene["source"] - ) - source_azi = ( - scene["azimuth"][i] - if isinstance(scene["azimuth"], list) - else scene["azimuth"] - ) - source_ele = ( - scene["elevation"][i] - if isinstance(scene["elevation"], list) - else scene["elevation"] - ) - - logger.info(f"Encoding {source_file} at position(s) {source_azi},{source_ele}") - - # read source file - x = audio.fromfile( - "MONO", - os.path.join( - cfg.input_path, - os.path.dirname(source_file), - cfg.use_input_prefix + os.path.basename(source_file), - ), - fs=cfg.fs, - ) - - # get the number of frames (multiple of 20ms) - N_frames = int(len(x.audio) / x.fs * 50) - frame_len = int(x.fs / 50) - - # trim the samples from the end to ensure that the signal length is a multiple of 20ms - x.audio = x.audio[: N_frames * frame_len] - - # adjust the level of the source file - _, scale_factor, _ = get_loudness(x, cfg.loudness, "MONO") - x.audio *= scale_factor - - # shift the second (and all other) source files (positive shift creates overlap, negative shift creates a gap) - if i > 0: - # get the length of the first source file - N_delay = len(y.audio[:, 0]) - - # add the shift value (ensure that the shift is a multiple of 20ms) - N_delay += int(floor(-source_overlap * 50) / 50 * x.fs) - - # insert all-zero signal - pre = np.zeros((N_delay, x.audio.shape[1])) - x.audio = np.concatenate([pre, x.audio]) - - # pad with zeros to ensure that the signal length is a multiple of 20ms - if len(x.audio) % frame_len != 0: - # pad the source signal - N_pad = int(frame_len - len(x.audio) % frame_len) - post = np.zeros((N_pad, x.audio.shape[1])) - x.audio = np.concatenate([x.audio, post]) - - # add source signal to the array of all source signals - y.fs = x.fs - if y.audio is None: - y.audio = x.audio.copy() - else: - # pad with zeros to have the same length of all source signals - if x.audio.shape[0] > y.audio.shape[0]: - y.audio = np.vstack( - ( - y.audio, - np.zeros( - (x.audio.shape[0] - y.audio.shape[0], y.audio.shape[1]) - ), - ) - ) - elif y.audio.shape[0] > x.audio.shape[0]: - x.audio = np.vstack( - ( - x.audio, - np.zeros( - (y.audio.shape[0] - x.audio.shape[0], x.audio.shape[1]) - ), - ) - ) - y.audio = np.hstack((y.audio, x.audio)) - - # append pre-amble and post-amble to all sources - if cfg.preamble != 0.0: - # ensure that pre-amble is a multiple of 20ms - N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs) - - # insert all-zero preamble to all sources - pre = np.zeros((N_pre, y.audio.shape[1])) - y.audio = np.concatenate([pre, y.audio]) - - if cfg.postamble != 0.0: - # ensure that post-mable is a multiple of 20ms - N_post = int(floor(cfg.postamble * 50) / 50 * y.fs) - - # append all-zero postamble to all sources - post = np.zeros((N_post, y.audio.shape[1])) - y.audio = np.concatenate([y.audio, post]) - - # add random noise - if cfg.add_low_level_random_noise: - # create uniformly distributed noise between -4 and 4 - np.random.seed(SEED_RANDOM_NOISE) - noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") - - # superimpose - y.audio += noise - - # create metadata files - for i in range(N_sources): - # parse metadata parameters from the scene description - source_azi = ( - scene["azimuth"][i] - if isinstance(scene["azimuth"], list) - else scene["azimuth"] - ) - source_ele = ( - scene["elevation"][i] - if isinstance(scene["elevation"], list) - else scene["elevation"] - ) - - N_frames = int(len(y.audio) / y.fs * 50) - - # read azimuth information and convert to an array - if isinstance(source_azi, str): - if ":" in source_azi: - # start with the initial azimuth value and apply step N_frames times - source_azi = source_azi.split(":") - azi = np.arange( - float(eval(source_azi[0])), - float(eval(source_azi[0])) + N_frames * float(eval(source_azi[1])), - float(eval(source_azi[1])), - ) - else: - # replicate static azimuth value N_frames times - azi = np.repeat(float(eval(source_azi)), N_frames) - else: - # replicate static azimuth value N_frames times - azi = np.repeat(float(source_azi), N_frames) - - # convert azimuth from 0 .. 360 to -180 .. +180 - azi = (azi + 180) % 360 - 180 - - # check if azimuth is from -180 .. +180 - if any(azi > 180) or any(azi < -180): - logger.error( - f"Incorrect value(s) of azimuth: {azi[(azi > 180) | (azi < -180)]}" - ) - - # read elevation information and convert to an array - if isinstance(source_ele, str): - if ":" in source_ele: - # convert into array (initial_value:step:stop_value) - # note: the stop_value value is +-90 degrees depending on the sign of the step - source_ele = source_ele.split(":") - ele = np.arange( - float(eval(source_ele[0])), - np.sign(float(eval(source_ele[1]))) * 90, - float(eval(source_ele[1])), - )[:N_frames] - - # repeat the last elevation value, if array is shorter than N_frames - if len(ele) < N_frames: - ele = np.append(ele, np.full(N_frames - len(ele), ele[-1])) - else: - # replicate static elevation value N_frames times - ele = np.repeat(float(eval(source_ele)), N_frames) - else: - # replicate static elevation value N_frames times - ele = np.repeat(float(source_ele), N_frames) - - # check if elevation is from -90 .. +90 - if any(ele > 90) or any(ele < -90): - logger.error( - f"Incorrect value(s) of elevation: {ele[(ele > 90) | (ele < -90)]}" - ) - - # arrange all metadata fields column-wise into a matrix - x_meta = np.column_stack((azi, ele)) - - x_meta = x_meta[np.newaxis, :] - if y_meta is None: - y_meta = x_meta - else: - y_meta = np.concatenate([y_meta, x_meta]) - - # write individual ISM audio streams to the output file in an interleaved format - audiofile.write( - os.path.join( - cfg.output_path, - os.path.dirname(scene["name"]), - cfg.use_output_prefix + os.path.basename(scene["name"]), - ), - y.audio, - y.fs, - ) - - # write individual ISM metadata to output files in .csv format - for i in range(N_sources): - # generate .csv filename (should end with .0.csv, .1.csv, ...) - csv_filename = os.path.join( - cfg.output_path, - os.path.dirname(scene["name"]), - cfg.use_output_prefix + os.path.basename(scene["name"]) + f".{i}.csv", - ) - - with open( - csv_filename, - "w", - newline="", - encoding="utf-8", - ) as f: - # create csv writer - writer = csv.writer(f) - - # write all rows to the .csv file - writer.writerows(csv_formatdata(y_meta[i])) - - return diff --git a/ivas_processing_scripts/generation/process_stereo_items.py b/ivas_processing_scripts/generation/process_stereo_items.py deleted file mode 100644 index 7d05de54bc203964a52e7dc6c35bef6ca3c97b37..0000000000000000000000000000000000000000 --- a/ivas_processing_scripts/generation/process_stereo_items.py +++ /dev/null @@ -1,327 +0,0 @@ -#!/usr/bin/env python3 - -# -# (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, -# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., -# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, -# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other -# contributors to this repository. All Rights Reserved. -# -# This software is protected by copyright law and by international treaties. -# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, -# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., -# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, -# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other -# contributors to this repository retain full ownership rights in their respective contributions in -# the software. This notice grants no license of any kind, including but not limited to patent -# license, nor is any license granted by implication, estoppel or otherwise. -# -# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making -# contributions. -# -# This software is provided "AS IS", without any express or implied warranties. The software is in the -# development stage. It is intended exclusively for experts who have experience with such software and -# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability -# and fitness for a particular purpose are hereby disclaimed and excluded. -# -# Any dispute, controversy or claim arising under or in relation to providing this software shall be -# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in -# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and -# the United Nations Convention on Contracts on the International Sales of Goods. -# - -import logging -import os -from itertools import groupby, repeat -from math import floor - -import numpy as np - -from ivas_processing_scripts.audiotools import audio, audiofile -from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness -from ivas_processing_scripts.audiotools.wrappers.reverb import reverb_stereo -from ivas_processing_scripts.generation import config -from ivas_processing_scripts.utils import apply_func_parallel - -SEED_RANDOM_NOISE = 0 - - -# function for converting nd numpy array to strings with 2 decimal digits -def csv_formatdata(data): - for row in data: - yield ["%0.2f" % v for v in row] - - -# function for searching sequences of same the same character and replacing it by another string -def replace_char_seq_with_string(str, char_seq, repl_str): - result = [] - - # find groups of consecutive letters - groups = ["".join(list(g)) for k, g in groupby(str)] - - # limit the length of the replacement string by the length of the character sequence - repl_str = repl_str[: len(char_seq)] - - # replace each occurence of the sequence of characters - for g in groups: - if char_seq in g: - result.append(repl_str) - else: - result.append(g) - - return "".join(result) - - -def generate_stereo_items( - cfg: config.TestConfig, - logger: logging.Logger, -): - """Generate STEREO items from mono items based on scene description""" - - # set the target level - if "loudness" not in cfg.__dict__: - cfg.loudness = -26 - - # set the fs - if "fs" not in cfg.__dict__: - cfg.fs = 48000 - - # set the IR fs - if "IR_fs" not in cfg.__dict__: - cfg.IR_fs = 48000 - - # set the pre-amble and post-amble - if "preamble" not in cfg.__dict__: - cfg.preamble = 0.0 - - if "postamble" not in cfg.__dict__: - cfg.postamble = 0.0 - - # set the IR path - if "IR_path" not in cfg.__dict__: - cfg.IR_path = os.path.join(os.path.dirname(__file__), "IRs") - - # set the pre-amble and post-amble - if "add_low_level_random_noise" not in cfg.__dict__: - cfg.add_low_level_random_noise = False - - # set the listening lab designator - if "listening_lab" not in cfg.__dict__: - cfg.listening_lab = "l" - - # set the language designator - if "language" not in cfg.__dict__: - cfg.language = "EN" - - # set the experiment designator - if "exp" not in cfg.__dict__: - cfg.exp = "p01" - - # set the provider - if "provider" not in cfg.__dict__: - cfg.provider = "g" - - # set the prefix for all input filenames - if "use_input_prefix" not in cfg.__dict__: - cfg.use_input_prefix = "" - else: - # replace file designators - cfg.use_input_prefix = replace_char_seq_with_string( - cfg.use_input_prefix, "l", cfg.listening_lab - ) - cfg.use_input_prefix = replace_char_seq_with_string( - cfg.use_input_prefix, "LL", cfg.language - ) - cfg.use_input_prefix = replace_char_seq_with_string( - cfg.use_input_prefix, "eee", cfg.exp - ) - - # set the prefix for all IR filenames - if "use_IR_prefix" not in cfg.__dict__: - cfg.use_IR_prefix = "" - else: - # replace file designators - cfg.use_IR_prefix = replace_char_seq_with_string( - cfg.use_IR_prefix, "p", cfg.provider - ) - cfg.use_IR_prefix = replace_char_seq_with_string( - cfg.use_IR_prefix, "LL", cfg.language - ) - cfg.use_IR_prefix = replace_char_seq_with_string( - cfg.use_IR_prefix, "eee", cfg.exp - ) - - # set the prefix for all output filenames - if "use_output_prefix" not in cfg.__dict__: - cfg.use_output_prefix = None - else: - # replace file designators - cfg.use_output_prefix = replace_char_seq_with_string( - cfg.use_output_prefix, "l", cfg.listening_lab - ) - cfg.use_output_prefix = replace_char_seq_with_string( - cfg.use_output_prefix, "eee", cfg.exp - ) - - # set multiprocessing - if "multiprocessing" not in cfg.__dict__: - cfg.multiprocessing = True - - apply_func_parallel( - generate_stereo_scene, - zip(cfg.scenes.keys(), cfg.scenes.values(), repeat(cfg), repeat(logger)), - None, - "mp" if cfg.multiprocessing else None, - ) - - return - - -def generate_stereo_scene( - scene_name: str, scene: dict, cfg: config.TestConfig, logger: logging.Logger -): - logger.info( - f"Processing scene: {scene_name} out of {len(cfg.scenes)} scenes, name: {scene['name']}" - ) - - # extract the number of audio sources - N_sources = len(np.atleast_1d(scene["source"])) - - # read the shift time in seconds - if "overlap" in scene.keys(): - source_overlap = float(scene["overlap"]) - else: - source_overlap = 0.0 - - len_s1 = 0 - y = audio.ChannelBasedAudio("STEREO") - for i in range(N_sources): - # parse parameters from the scene description - source_file = np.atleast_1d(scene["source"])[i] - IR_file = np.atleast_1d(scene["IR"])[i] - - logger.info(f"Convolving {source_file} with {IR_file}") - - # read source file - x = audio.fromfile( - "MONO", - os.path.join( - cfg.input_path, - os.path.dirname(source_file), - cfg.use_input_prefix + os.path.basename(source_file), - ), - fs=cfg.fs, - ) - - # read the IR file - IR = audio.fromfile( - "STEREO", - os.path.join( - cfg.IR_path, - os.path.dirname(IR_file), - cfg.use_IR_prefix + os.path.basename(IR_file), - ), - fs=cfg.IR_fs, - ) - - if i == 0: - len_s1 = x.audio.shape[0] - - # convolve with stereo IR - x = reverb_stereo(x, IR) - - # adjust the level of the stereo signal - _, scale_factor, _ = get_loudness(x, cfg.loudness, "STEREO") - x.audio *= scale_factor - - # shift the second (and all other) source files (positive shift creates overlap, negative shift creates a gap) - if i > 0: - # get the length of the first source file - N_delay = len_s1 - - # add the shift - N_delay += int(-source_overlap * x.fs) - - # insert all-zero preamble - pre = np.zeros((N_delay, x.audio.shape[1])) - x.audio = np.concatenate([pre, x.audio]) - - # add source signal to the array of source signals - y.fs = x.fs - if y.audio is None: - y.audio = x.audio.copy() - else: - # pad with zeros to have equal length of all source signals - if x.audio.shape[0] > y.audio.shape[0]: - y.audio = np.vstack( - ( - y.audio, - np.zeros( - ( - x.audio.shape[0] - y.audio.shape[0], - y.audio.shape[1], - ) - ), - ) - ) - elif y.audio.shape[0] > x.audio.shape[0]: - x.audio = np.vstack( - ( - x.audio, - np.zeros( - ( - y.audio.shape[0] - x.audio.shape[0], - x.audio.shape[1], - ) - ), - ) - ) - - # superimpose - y.audio += x.audio - - # append pre-amble and post-amble to all sources - if cfg.preamble != 0.0: - # ensure that pre-amble is a multiple of 20ms - N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs) - - # insert all-zero preamble to all sources - pre = np.zeros((N_pre, y.audio.shape[1])) - y.audio = np.concatenate([pre, y.audio]) - - if cfg.postamble != 0.0: - # ensure that post-mable is a multiple of 20ms - N_post = int(floor(cfg.postamble * 50) / 50 * y.fs) - - # append all-zero postamble to all sources - post = np.zeros((N_post, y.audio.shape[1])) - y.audio = np.concatenate([y.audio, post]) - - # pad with zeros to ensure that the signal length is a multiple of 20ms - N_frame = y.fs / 50 - if y.audio.shape[0] % N_frame != 0: - N_pad = int(N_frame - y.audio.shape[0] % N_frame) - - # insert all-zero postamble - post = np.zeros((N_pad, y.audio.shape[1])) - y.audio = np.concatenate([y.audio, post]) - - # add random noise - if cfg.add_low_level_random_noise: - # create uniformly distributed noise between -4 and 4 - np.random.seed(SEED_RANDOM_NOISE) - noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") - - # superimpose - y.audio += noise - - # write the reverberated audio into output file - audiofile.write( - os.path.join( - cfg.output_path, - os.path.dirname(scene["name"]), - cfg.use_output_prefix + os.path.basename(scene["name"]), - ), - y.audio, - y.fs, - )