Loading item_gen_configs/P800-1.yml +92 −73 Original line number Diff line number Diff line Loading @@ -30,12 +30,31 @@ output_path: "experiments/selection/P800-1/proc_input" loudness: -26 ### Pre-amble and Post-amble length in seconds (default = 0.0) preamble: 1.0 preamble: 0.5 postamble: 1.0 ### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence) add_low_level_random_noise: true ### File designators listening_lab: "a" language: "JP" exp: "p01" ################################################ ### Input files ################################################ ### <LL><Lang><Exp><TalkerID>s<Sample>.wav ### ### With ### ### <LL> = a (Force Technology), b (HEAD acoustics), ### c (MQ University), d (Mesaqin.com) ### <Lang> = JP, FR, GE, MA, DA, EN ### <Exp> = p01, p02, p04, p05, p06, p07, p08, p09 ### <TalkerID> = f1, f2, f3, m1, m2, m3 ### <Sample> = 01, …, 14 ################################################ ### Scene description Loading Loading @@ -64,254 +83,254 @@ add_low_level_random_noise: true scenes: cat1_1: name: "lp01a1s01" name: "a1s01" description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers." source: ["f1_s1.wav", "m1_s1.wav"] source: ["f1s01.wav", "m1s01.wav"] IR: ["SAABP01.wav", "SAABP07.wav"] overlap: 1.0 cat1_2: name: "lp01a1s02" name: "a1s02" description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers." source: ["m2_s1.wav", "f2_s1.wav"] source: ["m2s01.wav", "f2s01.wav"] IR: ["SAABP05.wav", "SAABP03.wav"] overlap: 1.0 cat1_3: name: "lp01a1s03" name: "a1s03" description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers." source: ["f3_s1.wav", "m3_s1.wav"] source: ["f3s01.wav", "m3s01.wav"] IR: ["SAABP02.wav", "SAABP06.wav"] overlap: 1.0 cat1_4: name: "lp01a1s04" name: "a1s04" description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers." source: ["m1_s2.wav", "f1_s2.wav"] source: ["m1s02.wav", "f1s02.wav"] IR: ["SAABP04.wav", "SAABP01.wav"] overlap: 1.0 cat1_5: name: "lp01a1s05" name: "a1s05" description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers." source: ["f2_s2.wav", "m2_s2.wav"] source: ["f2s02.wav", "m2s02.wav"] IR: ["SAABP03.wav", "SAABP04.wav"] overlap: 1.0 cat1_6: name: "lp01a1s06" name: "a1s06" description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers." source: ["m3_s2.wav", "f3_s2.wav"] source: ["m3s02.wav", "f3s02.wav"] IR: ["SAABP07.wav", "SAABP02.wav"] overlap: 1.0 cat2_1: name: "lp01a2s01" name: "a2s01" description: "Large anechoic room with AB microphone pickup, no overlap between the talkers." source: ["m3_s3.wav", "f3_s3.wav"] source: ["m3s03.wav", "f3s03.wav"] IR: ["LAABP05.wav", "LAABP11.wav"] overlap: -1.0 cat2_2: name: "lp01a2s02" name: "a2s02" description: "Large anechoic room with AB microphone pickup, no overlap between the talkers." source: ["f1_s3.wav", "m1.wav"] source: ["f1s03.wav", "m1s03.wav"] IR: ["LAABP01.wav", "LAABP06.wav"] overlap: -1.0 cat2_3: name: "lp01a2s03" name: "a2s03" description: "Large anechoic room with AB microphone pickup, no overlap between the talkers." source: ["m2_s3.wav", "f2_s3.wav"] source: ["m2s03.wav", "f2s03.wav"] IR: ["LAABP03.wav", "LAABP07.wav"] overlap: -1.0 cat2_4: name: "lp01a2s04" name: "a2s04" description: "Large anechoic room with AB microphone pickup, no overlap between the talkers." source: ["f3_s4.wav", "m3_s4.wav"] source: ["f3s04.wav", "m3s04.wav"] IR: ["LAABP05.wav", "LAABP08.wav"] overlap: -1.0 cat2_5: name: "lp01a2s05" name: "a2s05" description: "Large anechoic room with AB microphone pickup, no overlap between the talkers." source: ["m1_s4.wav", "f1_s4.wav"] source: ["m1s04.wav", "f1s04.wav"] IR: ["LAABP09.wav", "LAABP07.wav"] overlap: -1.0 cat2_6: name: "lp01a2s06" name: "a2s06" description: "Large anechoic room with AB microphone pickup, no overlap between the talkers." source: ["f2_s4.wav", "m2_s4.wav"] source: ["f2s04.wav", "m2s04.wav"] IR: ["LAABP10.wav", "LAABP09.wav"] overlap: -1.0 cat3_1: name: "lp01a3s01" name: "a3s01" description: "Small anechoic room with MS microphone pickup, no overlap between the talkers." source: ["f2_s5.wav", "m2_s5.wav"] source: ["f2s05.wav", "m2s05.wav"] IR: ["SAMSP01.wav", "SAMSP07.wav"] overlap: -1.0 cat3_2: name: "lp01a3s02" name: "a3s02" description: "Small anechoic room with MS microphone pickup, no overlap between the talkers." source: ["m3_s5.wav", "f3_s5.wav"] source: ["m3s05.wav", "f3s05.wav"] IR: ["SAMSP05.wav", "SAMSP03.wav"] overlap: -1.0 cat3_3: name: "lp01a3s03" name: "a3s03" description: "Small anechoic room with MS microphone pickup, no overlap between the talkers." source: ["f1_s5.wav", "m1_s5.wav"] source: ["f1s05.wav", "m1s05.wav"] IR: ["SAMSP02.wav", "SAMSP06.wav"] overlap: -1.0 cat3_4: name: "lp01a3s04" name: "a3s04" description: "Small anechoic room with MS microphone pickup, no overlap between the talkers." source: ["m2_s6.wav", "f2_s6.wav"] source: ["m2s06.wav", "f2s06.wav"] IR: ["SAMSP04.wav", "SAMSP01.wav"] overlap: -1.0 cat3_5: name: "lp01a3s05" name: "a3s05" description: "Small anechoic room with MS microphone pickup, no overlap between the talkers." source: ["f3_s6.wav", "m3_s6.wav"] source: ["f3s06.wav", "m3s06.wav"] IR: ["SAMSP03.wav", "SAMSP04.wav"] overlap: -1.0 cat3_6: name: "lp01a3s06" name: "a3s06" description: "Small anechoic room with MS microphone pickup, no overlap between the talkers." source: ["m1_s6.wav", "f1_s6.wav"] source: ["m1s06.wav", "f1s06.wav"] IR: ["SAMSP07.wav", "SAMSP02.wav"] overlap: -1.0 cat4_1: name: "lp01a4s01" name: "a4s01" description: "Small echoic room with AB microphone pickup, partial overlap between the talkers." source: ["m1_s7.wav", "f1_s7.wav"] source: ["m1s07.wav", "f1s07.wav"] IR: ["SEABP01.wav", "SEABP07.wav"] overlap: 1.0 cat4_2: name: "lp01a4s02" name: "a4s02" description: "Small echoic room with AB microphone pickup, partial overlap between the talkers." source: ["f2_s7.wav", "m2_s7.wav"] source: ["f2s07.wav", "m2s07.wav"] IR: ["SEABP05.wav", "SEABP03.wav"] overlap: 1.0 cat4_3: name: "lp01a4s03" name: "a4s03" description: "Small echoic room with AB microphone pickup, partial overlap between the talkers." source: ["m3_s7.wav", "f3_s7.wav"] source: ["m3s07.wav", "f3s07.wav"] IR: ["SEABP02.wav", "SEABP06.wav"] overlap: 1.0 cat4_4: name: "lp01a4s04" name: "a4s04" description: "Small echoic room with AB microphone pickup, partial overlap between the talkers." source: ["f1_s8.wav", "m1_s8.wav"] source: ["f1s08.wav", "m1s08.wav"] IR: ["SEABP04.wav", "SEABP01.wav"] overlap: 1.0 cat4_5: name: "lp01a4s05" name: "a4s05" description: "Small echoic room with AB microphone pickup, partial overlap between the talkers." source: ["m2_s8.wav", "f2_s8.wav"] source: ["m2s08.wav", "f2s08.wav"] IR: ["SEABP03.wav", "SEABP04.wav"] overlap: 1.0 cat4_6: name: "lp01a4s06" name: "a4s06" description: "Small echoic room with AB microphone pickup, partial overlap between the talkers." source: ["f3_s8.wav", "m3_s8.wav"] source: ["f3s08.wav", "m3s08.wav"] IR: ["SEABP07.wav", "SEABP02.wav"] overlap: 1.0 cat5_1: name: "lp01a5s01" name: "a5s01" description: "Large echoic room with AB microphone pickup, partial overlap between the talkers." source: ["f3_s9.wav", "m3_s9.wav"] source: ["f3s09.wav", "m3s09.wav"] IR: ["LEABP02.wav", "LEABP08.wav"] overlap: 1.0 cat5_2: name: "lp01a5s02" name: "a5s02" description: "Large echoic room with AB microphone pickup, partial overlap between the talkers." source: ["m1_s9.wav", "f1_s9.wav"] source: ["m1s09.wav", "f1s09.wav"] IR: ["LEABP09.wav", "LEABP04.wav"] overlap: 1.0 cat5_3: name: "lp01a5s03" name: "a5s03" description: "Large echoic room with AB microphone pickup, partial overlap between the talkers." source: ["f2_s9.wav", "m2_s9.wav"] source: ["f2s09.wav", "m2s09.wav"] IR: ["LEABP06.wav", "LEABP10.wav"] overlap: 1.0 cat5_4: name: "lp01a5s04" name: "a5s04" description: "Large echoic room with AB microphone pickup, partial overlap between the talkers." source: ["m3_s10.wav", "f3_s10.wav"] source: ["m3s10.wav", "f3s10.wav"] IR: ["LEABP11.wav", "LEABP08.wav"] overlap: 1.0 cat5_5: name: "lp01a5s05" name: "a5s05" description: "Large echoic room with AB microphone pickup, partial overlap between the talkers." source: ["f1_s10.wav", "m1_s10.wav"] source: ["f1s10.wav", "m1s10.wav"] IR: ["LEABP10.wav", "LEABP12.wav"] overlap: 1.0 cat5_6: name: "lp01a5s06" name: "a5s06" description: "Large echoic room with AB microphone pickup, partial overlap between the talkers." source: ["m2_s10.wav", "f2_s10.wav"] source: ["m2s10.wav", "f2s10.wav"] IR: ["LEABP12.wav", "LEABP01.wav"] overlap: 1.0 cat6_1: name: "lp01a6s01" name: "a6s01" description: "Small echoic room with binaural microphone pickup, no overlap between the talkers." source: ["m2_s11.wav", "f2_s11.wav"] source: ["m2s11.wav", "f2s11.wav"] IR: ["SEABP01.wav", "SEABP07.wav"] overlap: -1.0 cat6_2: name: "lp01a6s02" name: "a6s02" description: "Small echoic room with binaural microphone pickup, no overlap between the talkers." source: ["f3_s11.wav", "m3_s11.wav"] source: ["f3s11.wav", "m3s11.wav"] IR: ["SEABP05.wav", "SEABP03.wav"] overlap: -1.0 cat6_3: name: "lp01a6s03" name: "a6s03" description: "Small echoic room with binaural microphone pickup, no overlap between the talkers." source: ["m1_s11.wav", "f1_s11.wav"] source: ["m1s11.wav", "f1s11.wav"] IR: ["SEABP02.wav", "SEABP06.wav"] overlap: -1.0 cat6_4: name: "lp01a6s04" name: "a6s04" description: "Small echoic room with binaural microphone pickup, no overlap between the talkers." source: ["f2_s12.wav", "m2_s12.wav"] source: ["f2s12.wav", "m2s12.wav"] IR: ["SEABP04.wav", "SEABP01.wav"] overlap: -1.0 cat6_5: name: "lp01a6s05" name: "a6s05" description: "Small echoic room with binaural microphone pickup, no overlap between the talkers." source: ["m3_s12.wav", "f3_s12.wav"] source: ["m3s12.wav", "f3s12.wav"] IR: ["SEABP03.wav", "SEABP04.wav"] overlap: -1.0 cat6_6: name: "lp01a6s06" name: "a6s06" description: "Small echoic room with binaural microphone pickup, no overlap between the talkers." source: ["f1_s12.wav", "m1_s12.wav"] source: ["f1s12.wav", "m1s12.wav"] IR: ["SEABP07.wav", "SEABP02.wav"] overlap: -1.0 item_gen_configs/P800-2.yml +94 −75 File changed.Preview size limit exceeded, changes collapsed. Show changes ivas_processing_scripts/generation/process_stereo_items.py +132 −124 Original line number Diff line number Diff line Loading @@ -32,6 +32,7 @@ import logging import os from itertools import repeat from math import floor import numpy as np Loading @@ -40,6 +41,7 @@ from ivas_processing_scripts.audiotools import audio, audiofile from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness from ivas_processing_scripts.audiotools.wrappers.reverb import reverb_stereo from ivas_processing_scripts.generation import config from ivas_processing_scripts.utils import apply_func_parallel SEED_RANDOM_NOISE = 0 Loading @@ -56,9 +58,6 @@ def generate_stereo_items( ): """Generate STEREO items from mono items based on scene description""" # get the number of scenes N_scenes = len(cfg.scenes) # set the target level if "loudness" not in cfg.__dict__: cfg.loudness = -26 Loading Loading @@ -86,10 +85,24 @@ def generate_stereo_items( if "add_low_level_random_noise" not in cfg.__dict__: cfg.add_low_level_random_noise = False # repeat for all source files for scene_name, scene in cfg.scenes.items(): # set multiprocessing if "multiprocessing" not in cfg.__dict__: cfg.multiprocessing = True apply_func_parallel( generate_stereo_scene, zip(cfg.scenes.keys(), cfg.scenes.values(), repeat(cfg), repeat(logger)), None, "mp" if cfg.multiprocessing else None, ) return def generate_stereo_scene( scene_name: str, scene: dict, cfg: config.TestConfig, logger: logging.Logger ): logger.info( f"Processing scene: {scene_name} out of {N_scenes} scenes, name: {scene['name']}" f"Processing scene: {scene_name} out of {len(cfg.scenes)} scenes, name: {scene['name']}" ) # extract the number of audio sources Loading @@ -106,21 +119,18 @@ def generate_stereo_items( y = audio.ChannelBasedAudio("STEREO") for i in range(N_sources): source_prefix = cfg.listening_lab + cfg.language + cfg.exp # parse parameters from the scene description source_file = np.atleast_1d(scene["source"])[i] source_file = source_prefix + np.atleast_1d(scene["source"])[i] IR_file = np.atleast_1d(scene["IR"])[i] logger.info(f"Convolving {source_file} with {source_IR}") # read source file x = audio.fromfile( "MONO", os.path.join(cfg.input_path, source_file), fs=cfg.fs ) x = audio.fromfile("MONO", os.path.join(cfg.input_path, source_file), fs=cfg.fs) # read the IR file IR = audio.fromfile( "STEREO", os.path.join(cfg.IR_path, IR_file), fs=cfg.IR_fs ) IR = audio.fromfile("STEREO", os.path.join(cfg.IR_path, IR_file), fs=cfg.IR_fs) # convolve with stereo IR x = reverb_stereo(x, IR) Loading @@ -141,15 +151,6 @@ def generate_stereo_items( pre = np.zeros((N_delay, x.audio.shape[1])) x.audio = np.concatenate([pre, x.audio]) # pad with zeros to ensure that the signal length is a multiple of 20ms N_frame = x.fs / 50 if len(x.audio) % N_frame != 0: N_pad = int(N_frame - len(x.audio) % N_frame) # insert all-zero preamble pre = np.zeros((N_pad, x.audio.shape[1])) x.audio = np.concatenate([pre, x.audio]) # add source signal to the array of source signals y.fs = x.fs if y.audio is None: Loading Loading @@ -201,21 +202,28 @@ def generate_stereo_items( post = np.zeros((N_post, y.audio.shape[1])) y.audio = np.concatenate([y.audio, post]) # pad with zeros to ensure that the signal length is a multiple of 20ms N_frame = y.fs / 50 if y.audio.shape[0] % N_frame != 0: N_pad = int(N_frame - y.audio.shape[0] % N_frame) # insert all-zero postamble post = np.zeros((N_pad, y.audio.shape[1])) y.audio = np.concatenate([y.audio, post]) # add random noise if cfg.add_low_level_random_noise: # create uniformly distributed noise between -4 and 4 np.random.seed(SEED_RANDOM_NOISE) noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype( "float" ) noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") # superimpose y.audio += noise # write the reverberated audio into output file output_filename = scene["name"] output_filename = cfg.listening_lab + cfg.exp + scene["name"] + ".wav" audiofile.write( os.path.join(cfg.output_path, output_filename), y.audio, y.fs os.path.join(cfg.output_path, scene_name.split("_")[0], output_filename), y.audio, y.fs, ) # !!!! TBD: replace all os.path.xxx operations with the Path object return Loading
item_gen_configs/P800-1.yml +92 −73 Original line number Diff line number Diff line Loading @@ -30,12 +30,31 @@ output_path: "experiments/selection/P800-1/proc_input" loudness: -26 ### Pre-amble and Post-amble length in seconds (default = 0.0) preamble: 1.0 preamble: 0.5 postamble: 1.0 ### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence) add_low_level_random_noise: true ### File designators listening_lab: "a" language: "JP" exp: "p01" ################################################ ### Input files ################################################ ### <LL><Lang><Exp><TalkerID>s<Sample>.wav ### ### With ### ### <LL> = a (Force Technology), b (HEAD acoustics), ### c (MQ University), d (Mesaqin.com) ### <Lang> = JP, FR, GE, MA, DA, EN ### <Exp> = p01, p02, p04, p05, p06, p07, p08, p09 ### <TalkerID> = f1, f2, f3, m1, m2, m3 ### <Sample> = 01, …, 14 ################################################ ### Scene description Loading Loading @@ -64,254 +83,254 @@ add_low_level_random_noise: true scenes: cat1_1: name: "lp01a1s01" name: "a1s01" description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers." source: ["f1_s1.wav", "m1_s1.wav"] source: ["f1s01.wav", "m1s01.wav"] IR: ["SAABP01.wav", "SAABP07.wav"] overlap: 1.0 cat1_2: name: "lp01a1s02" name: "a1s02" description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers." source: ["m2_s1.wav", "f2_s1.wav"] source: ["m2s01.wav", "f2s01.wav"] IR: ["SAABP05.wav", "SAABP03.wav"] overlap: 1.0 cat1_3: name: "lp01a1s03" name: "a1s03" description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers." source: ["f3_s1.wav", "m3_s1.wav"] source: ["f3s01.wav", "m3s01.wav"] IR: ["SAABP02.wav", "SAABP06.wav"] overlap: 1.0 cat1_4: name: "lp01a1s04" name: "a1s04" description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers." source: ["m1_s2.wav", "f1_s2.wav"] source: ["m1s02.wav", "f1s02.wav"] IR: ["SAABP04.wav", "SAABP01.wav"] overlap: 1.0 cat1_5: name: "lp01a1s05" name: "a1s05" description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers." source: ["f2_s2.wav", "m2_s2.wav"] source: ["f2s02.wav", "m2s02.wav"] IR: ["SAABP03.wav", "SAABP04.wav"] overlap: 1.0 cat1_6: name: "lp01a1s06" name: "a1s06" description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers." source: ["m3_s2.wav", "f3_s2.wav"] source: ["m3s02.wav", "f3s02.wav"] IR: ["SAABP07.wav", "SAABP02.wav"] overlap: 1.0 cat2_1: name: "lp01a2s01" name: "a2s01" description: "Large anechoic room with AB microphone pickup, no overlap between the talkers." source: ["m3_s3.wav", "f3_s3.wav"] source: ["m3s03.wav", "f3s03.wav"] IR: ["LAABP05.wav", "LAABP11.wav"] overlap: -1.0 cat2_2: name: "lp01a2s02" name: "a2s02" description: "Large anechoic room with AB microphone pickup, no overlap between the talkers." source: ["f1_s3.wav", "m1.wav"] source: ["f1s03.wav", "m1s03.wav"] IR: ["LAABP01.wav", "LAABP06.wav"] overlap: -1.0 cat2_3: name: "lp01a2s03" name: "a2s03" description: "Large anechoic room with AB microphone pickup, no overlap between the talkers." source: ["m2_s3.wav", "f2_s3.wav"] source: ["m2s03.wav", "f2s03.wav"] IR: ["LAABP03.wav", "LAABP07.wav"] overlap: -1.0 cat2_4: name: "lp01a2s04" name: "a2s04" description: "Large anechoic room with AB microphone pickup, no overlap between the talkers." source: ["f3_s4.wav", "m3_s4.wav"] source: ["f3s04.wav", "m3s04.wav"] IR: ["LAABP05.wav", "LAABP08.wav"] overlap: -1.0 cat2_5: name: "lp01a2s05" name: "a2s05" description: "Large anechoic room with AB microphone pickup, no overlap between the talkers." source: ["m1_s4.wav", "f1_s4.wav"] source: ["m1s04.wav", "f1s04.wav"] IR: ["LAABP09.wav", "LAABP07.wav"] overlap: -1.0 cat2_6: name: "lp01a2s06" name: "a2s06" description: "Large anechoic room with AB microphone pickup, no overlap between the talkers." source: ["f2_s4.wav", "m2_s4.wav"] source: ["f2s04.wav", "m2s04.wav"] IR: ["LAABP10.wav", "LAABP09.wav"] overlap: -1.0 cat3_1: name: "lp01a3s01" name: "a3s01" description: "Small anechoic room with MS microphone pickup, no overlap between the talkers." source: ["f2_s5.wav", "m2_s5.wav"] source: ["f2s05.wav", "m2s05.wav"] IR: ["SAMSP01.wav", "SAMSP07.wav"] overlap: -1.0 cat3_2: name: "lp01a3s02" name: "a3s02" description: "Small anechoic room with MS microphone pickup, no overlap between the talkers." source: ["m3_s5.wav", "f3_s5.wav"] source: ["m3s05.wav", "f3s05.wav"] IR: ["SAMSP05.wav", "SAMSP03.wav"] overlap: -1.0 cat3_3: name: "lp01a3s03" name: "a3s03" description: "Small anechoic room with MS microphone pickup, no overlap between the talkers." source: ["f1_s5.wav", "m1_s5.wav"] source: ["f1s05.wav", "m1s05.wav"] IR: ["SAMSP02.wav", "SAMSP06.wav"] overlap: -1.0 cat3_4: name: "lp01a3s04" name: "a3s04" description: "Small anechoic room with MS microphone pickup, no overlap between the talkers." source: ["m2_s6.wav", "f2_s6.wav"] source: ["m2s06.wav", "f2s06.wav"] IR: ["SAMSP04.wav", "SAMSP01.wav"] overlap: -1.0 cat3_5: name: "lp01a3s05" name: "a3s05" description: "Small anechoic room with MS microphone pickup, no overlap between the talkers." source: ["f3_s6.wav", "m3_s6.wav"] source: ["f3s06.wav", "m3s06.wav"] IR: ["SAMSP03.wav", "SAMSP04.wav"] overlap: -1.0 cat3_6: name: "lp01a3s06" name: "a3s06" description: "Small anechoic room with MS microphone pickup, no overlap between the talkers." source: ["m1_s6.wav", "f1_s6.wav"] source: ["m1s06.wav", "f1s06.wav"] IR: ["SAMSP07.wav", "SAMSP02.wav"] overlap: -1.0 cat4_1: name: "lp01a4s01" name: "a4s01" description: "Small echoic room with AB microphone pickup, partial overlap between the talkers." source: ["m1_s7.wav", "f1_s7.wav"] source: ["m1s07.wav", "f1s07.wav"] IR: ["SEABP01.wav", "SEABP07.wav"] overlap: 1.0 cat4_2: name: "lp01a4s02" name: "a4s02" description: "Small echoic room with AB microphone pickup, partial overlap between the talkers." source: ["f2_s7.wav", "m2_s7.wav"] source: ["f2s07.wav", "m2s07.wav"] IR: ["SEABP05.wav", "SEABP03.wav"] overlap: 1.0 cat4_3: name: "lp01a4s03" name: "a4s03" description: "Small echoic room with AB microphone pickup, partial overlap between the talkers." source: ["m3_s7.wav", "f3_s7.wav"] source: ["m3s07.wav", "f3s07.wav"] IR: ["SEABP02.wav", "SEABP06.wav"] overlap: 1.0 cat4_4: name: "lp01a4s04" name: "a4s04" description: "Small echoic room with AB microphone pickup, partial overlap between the talkers." source: ["f1_s8.wav", "m1_s8.wav"] source: ["f1s08.wav", "m1s08.wav"] IR: ["SEABP04.wav", "SEABP01.wav"] overlap: 1.0 cat4_5: name: "lp01a4s05" name: "a4s05" description: "Small echoic room with AB microphone pickup, partial overlap between the talkers." source: ["m2_s8.wav", "f2_s8.wav"] source: ["m2s08.wav", "f2s08.wav"] IR: ["SEABP03.wav", "SEABP04.wav"] overlap: 1.0 cat4_6: name: "lp01a4s06" name: "a4s06" description: "Small echoic room with AB microphone pickup, partial overlap between the talkers." source: ["f3_s8.wav", "m3_s8.wav"] source: ["f3s08.wav", "m3s08.wav"] IR: ["SEABP07.wav", "SEABP02.wav"] overlap: 1.0 cat5_1: name: "lp01a5s01" name: "a5s01" description: "Large echoic room with AB microphone pickup, partial overlap between the talkers." source: ["f3_s9.wav", "m3_s9.wav"] source: ["f3s09.wav", "m3s09.wav"] IR: ["LEABP02.wav", "LEABP08.wav"] overlap: 1.0 cat5_2: name: "lp01a5s02" name: "a5s02" description: "Large echoic room with AB microphone pickup, partial overlap between the talkers." source: ["m1_s9.wav", "f1_s9.wav"] source: ["m1s09.wav", "f1s09.wav"] IR: ["LEABP09.wav", "LEABP04.wav"] overlap: 1.0 cat5_3: name: "lp01a5s03" name: "a5s03" description: "Large echoic room with AB microphone pickup, partial overlap between the talkers." source: ["f2_s9.wav", "m2_s9.wav"] source: ["f2s09.wav", "m2s09.wav"] IR: ["LEABP06.wav", "LEABP10.wav"] overlap: 1.0 cat5_4: name: "lp01a5s04" name: "a5s04" description: "Large echoic room with AB microphone pickup, partial overlap between the talkers." source: ["m3_s10.wav", "f3_s10.wav"] source: ["m3s10.wav", "f3s10.wav"] IR: ["LEABP11.wav", "LEABP08.wav"] overlap: 1.0 cat5_5: name: "lp01a5s05" name: "a5s05" description: "Large echoic room with AB microphone pickup, partial overlap between the talkers." source: ["f1_s10.wav", "m1_s10.wav"] source: ["f1s10.wav", "m1s10.wav"] IR: ["LEABP10.wav", "LEABP12.wav"] overlap: 1.0 cat5_6: name: "lp01a5s06" name: "a5s06" description: "Large echoic room with AB microphone pickup, partial overlap between the talkers." source: ["m2_s10.wav", "f2_s10.wav"] source: ["m2s10.wav", "f2s10.wav"] IR: ["LEABP12.wav", "LEABP01.wav"] overlap: 1.0 cat6_1: name: "lp01a6s01" name: "a6s01" description: "Small echoic room with binaural microphone pickup, no overlap between the talkers." source: ["m2_s11.wav", "f2_s11.wav"] source: ["m2s11.wav", "f2s11.wav"] IR: ["SEABP01.wav", "SEABP07.wav"] overlap: -1.0 cat6_2: name: "lp01a6s02" name: "a6s02" description: "Small echoic room with binaural microphone pickup, no overlap between the talkers." source: ["f3_s11.wav", "m3_s11.wav"] source: ["f3s11.wav", "m3s11.wav"] IR: ["SEABP05.wav", "SEABP03.wav"] overlap: -1.0 cat6_3: name: "lp01a6s03" name: "a6s03" description: "Small echoic room with binaural microphone pickup, no overlap between the talkers." source: ["m1_s11.wav", "f1_s11.wav"] source: ["m1s11.wav", "f1s11.wav"] IR: ["SEABP02.wav", "SEABP06.wav"] overlap: -1.0 cat6_4: name: "lp01a6s04" name: "a6s04" description: "Small echoic room with binaural microphone pickup, no overlap between the talkers." source: ["f2_s12.wav", "m2_s12.wav"] source: ["f2s12.wav", "m2s12.wav"] IR: ["SEABP04.wav", "SEABP01.wav"] overlap: -1.0 cat6_5: name: "lp01a6s05" name: "a6s05" description: "Small echoic room with binaural microphone pickup, no overlap between the talkers." source: ["m3_s12.wav", "f3_s12.wav"] source: ["m3s12.wav", "f3s12.wav"] IR: ["SEABP03.wav", "SEABP04.wav"] overlap: -1.0 cat6_6: name: "lp01a6s06" name: "a6s06" description: "Small echoic room with binaural microphone pickup, no overlap between the talkers." source: ["f1_s12.wav", "m1_s12.wav"] source: ["f1s12.wav", "m1s12.wav"] IR: ["SEABP07.wav", "SEABP02.wav"] overlap: -1.0
item_gen_configs/P800-2.yml +94 −75 File changed.Preview size limit exceeded, changes collapsed. Show changes
ivas_processing_scripts/generation/process_stereo_items.py +132 −124 Original line number Diff line number Diff line Loading @@ -32,6 +32,7 @@ import logging import os from itertools import repeat from math import floor import numpy as np Loading @@ -40,6 +41,7 @@ from ivas_processing_scripts.audiotools import audio, audiofile from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness from ivas_processing_scripts.audiotools.wrappers.reverb import reverb_stereo from ivas_processing_scripts.generation import config from ivas_processing_scripts.utils import apply_func_parallel SEED_RANDOM_NOISE = 0 Loading @@ -56,9 +58,6 @@ def generate_stereo_items( ): """Generate STEREO items from mono items based on scene description""" # get the number of scenes N_scenes = len(cfg.scenes) # set the target level if "loudness" not in cfg.__dict__: cfg.loudness = -26 Loading Loading @@ -86,10 +85,24 @@ def generate_stereo_items( if "add_low_level_random_noise" not in cfg.__dict__: cfg.add_low_level_random_noise = False # repeat for all source files for scene_name, scene in cfg.scenes.items(): # set multiprocessing if "multiprocessing" not in cfg.__dict__: cfg.multiprocessing = True apply_func_parallel( generate_stereo_scene, zip(cfg.scenes.keys(), cfg.scenes.values(), repeat(cfg), repeat(logger)), None, "mp" if cfg.multiprocessing else None, ) return def generate_stereo_scene( scene_name: str, scene: dict, cfg: config.TestConfig, logger: logging.Logger ): logger.info( f"Processing scene: {scene_name} out of {N_scenes} scenes, name: {scene['name']}" f"Processing scene: {scene_name} out of {len(cfg.scenes)} scenes, name: {scene['name']}" ) # extract the number of audio sources Loading @@ -106,21 +119,18 @@ def generate_stereo_items( y = audio.ChannelBasedAudio("STEREO") for i in range(N_sources): source_prefix = cfg.listening_lab + cfg.language + cfg.exp # parse parameters from the scene description source_file = np.atleast_1d(scene["source"])[i] source_file = source_prefix + np.atleast_1d(scene["source"])[i] IR_file = np.atleast_1d(scene["IR"])[i] logger.info(f"Convolving {source_file} with {source_IR}") # read source file x = audio.fromfile( "MONO", os.path.join(cfg.input_path, source_file), fs=cfg.fs ) x = audio.fromfile("MONO", os.path.join(cfg.input_path, source_file), fs=cfg.fs) # read the IR file IR = audio.fromfile( "STEREO", os.path.join(cfg.IR_path, IR_file), fs=cfg.IR_fs ) IR = audio.fromfile("STEREO", os.path.join(cfg.IR_path, IR_file), fs=cfg.IR_fs) # convolve with stereo IR x = reverb_stereo(x, IR) Loading @@ -141,15 +151,6 @@ def generate_stereo_items( pre = np.zeros((N_delay, x.audio.shape[1])) x.audio = np.concatenate([pre, x.audio]) # pad with zeros to ensure that the signal length is a multiple of 20ms N_frame = x.fs / 50 if len(x.audio) % N_frame != 0: N_pad = int(N_frame - len(x.audio) % N_frame) # insert all-zero preamble pre = np.zeros((N_pad, x.audio.shape[1])) x.audio = np.concatenate([pre, x.audio]) # add source signal to the array of source signals y.fs = x.fs if y.audio is None: Loading Loading @@ -201,21 +202,28 @@ def generate_stereo_items( post = np.zeros((N_post, y.audio.shape[1])) y.audio = np.concatenate([y.audio, post]) # pad with zeros to ensure that the signal length is a multiple of 20ms N_frame = y.fs / 50 if y.audio.shape[0] % N_frame != 0: N_pad = int(N_frame - y.audio.shape[0] % N_frame) # insert all-zero postamble post = np.zeros((N_pad, y.audio.shape[1])) y.audio = np.concatenate([y.audio, post]) # add random noise if cfg.add_low_level_random_noise: # create uniformly distributed noise between -4 and 4 np.random.seed(SEED_RANDOM_NOISE) noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype( "float" ) noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") # superimpose y.audio += noise # write the reverberated audio into output file output_filename = scene["name"] output_filename = cfg.listening_lab + cfg.exp + scene["name"] + ".wav" audiofile.write( os.path.join(cfg.output_path, output_filename), y.audio, y.fs os.path.join(cfg.output_path, scene_name.split("_")[0], output_filename), y.audio, y.fs, ) # !!!! TBD: replace all os.path.xxx operations with the Path object return