Loading ivas_processing_scripts/generation/__init__.py +4 −4 Original line number Diff line number Diff line Loading @@ -42,10 +42,10 @@ from ivas_processing_scripts.constants import ( ) from ivas_processing_scripts.generation import ( config, process_ambi_items, process_ism1_items, process_ism2_items, process_stereo_items, process_ambi_items, ) from ivas_processing_scripts.utils import create_dir Loading ivas_processing_scripts/generation/process_ambi_items.py +58 −23 Original line number Diff line number Diff line Loading @@ -32,8 +32,9 @@ import logging import os from itertools import groupby, repeat from math import floor from itertools import repeat, groupby import numpy as np from ivas_processing_scripts.audiotools import audio, audiofile, convert Loading @@ -50,6 +51,7 @@ def csv_formatdata(data): for row in data: yield ["%0.2f" % v for v in row] # function for searching sequences of same the same character and replacing it by another string def replace_char_seq_with_string(str, char_seq, repl_str): result = [] Loading Loading @@ -131,26 +133,42 @@ def generate_ambi_items( cfg.use_input_prefix = "" else: # replace file designators cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "l", cfg.listening_lab ) cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "LL", cfg.language ) cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "eee", cfg.exp ) cfg.use_input_prefix = replace_char_seq_with_string( cfg.use_input_prefix, "l", cfg.listening_lab ) cfg.use_input_prefix = replace_char_seq_with_string( cfg.use_input_prefix, "LL", cfg.language ) cfg.use_input_prefix = replace_char_seq_with_string( cfg.use_input_prefix, "eee", cfg.exp ) # set the prefix for all IR filenames if "use_IR_prefix" not in cfg.__dict__: cfg.use_IR_prefix = "" else: # replace file designators cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "p", cfg.provider ) cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "LL", cfg.language ) cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "eee", cfg.exp ) cfg.use_IR_prefix = replace_char_seq_with_string( cfg.use_IR_prefix, "p", cfg.provider ) cfg.use_IR_prefix = replace_char_seq_with_string( cfg.use_IR_prefix, "LL", cfg.language ) cfg.use_IR_prefix = replace_char_seq_with_string( cfg.use_IR_prefix, "eee", cfg.exp ) # set the prefix for all output filenames if "use_output_prefix" not in cfg.__dict__: cfg.use_output_prefix = None else: # replace file designators cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "l", cfg.listening_lab ) cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "eee", cfg.exp ) cfg.use_output_prefix = replace_char_seq_with_string( cfg.use_output_prefix, "l", cfg.listening_lab ) cfg.use_output_prefix = replace_char_seq_with_string( cfg.use_output_prefix, "eee", cfg.exp ) # set multiprocessing if "multiprocessing" not in cfg.__dict__: Loading Loading @@ -190,7 +208,6 @@ def generate_ambi_scene( y = audio.SceneBasedAudio(ambi_format) for i in range(N_sources): # parse parameters from the scene description source_file = np.atleast_1d(scene["source"])[i] IR_file = np.atleast_1d(scene["IR"])[i] Loading @@ -198,10 +215,26 @@ def generate_ambi_scene( logger.info(f"Convolving {source_file} with {IR_file}") # read source file x = audio.fromfile("MONO", os.path.join(cfg.input_path, os.path.dirname(source_file), cfg.use_input_prefix + os.path.basename(source_file)), fs=cfg.fs) x = audio.fromfile( "MONO", os.path.join( cfg.input_path, os.path.dirname(source_file), cfg.use_input_prefix + os.path.basename(source_file), ), fs=cfg.fs, ) # read the IR file IR = audio.fromfile(ambi_format, os.path.join(cfg.IR_path, os.path.dirname(IR_file), cfg.use_IR_prefix + os.path.basename(IR_file)), fs=cfg.IR_fs) IR = audio.fromfile( ambi_format, os.path.join( cfg.IR_path, os.path.dirname(IR_file), cfg.use_IR_prefix + os.path.basename(IR_file), ), fs=cfg.IR_fs, ) # convolve with the FOA/HOA2 IR if ambi_format == "FOA": Loading Loading @@ -289,16 +322,18 @@ def generate_ambi_scene( if cfg.add_low_level_random_noise: # create uniformly distributed noise between -4 and 4 np.random.seed(SEED_RANDOM_NOISE) noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype( "float" ) noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") # superimpose y.audio += noise # write the reverberated audio into output file audiofile.write( os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.basename(scene["name"])), os.path.join( cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.basename(scene["name"]), ), y.audio, y.fs, ) Loading ivas_processing_scripts/generation/process_ism1_items.py +52 −30 Original line number Diff line number Diff line Loading @@ -33,8 +33,8 @@ import csv import logging import os from itertools import groupby, repeat from math import floor from itertools import repeat, groupby import numpy as np Loading @@ -51,6 +51,7 @@ def csv_formatdata(data): for row in data: yield ["%0.2f" % v for v in row] # function for searching sequences of same the same character and replacing it by another string def replace_char_seq_with_string(str, char_seq, repl_str): result = [] Loading Loading @@ -117,17 +118,27 @@ def generate_ism1_items( cfg.use_input_prefix = "" else: # replace file designators cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "l", cfg.listening_lab ) cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "LL", cfg.language ) cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "eee", cfg.exp ) cfg.use_input_prefix = replace_char_seq_with_string( cfg.use_input_prefix, "l", cfg.listening_lab ) cfg.use_input_prefix = replace_char_seq_with_string( cfg.use_input_prefix, "LL", cfg.language ) cfg.use_input_prefix = replace_char_seq_with_string( cfg.use_input_prefix, "eee", cfg.exp ) # set the prefix for all output filenames if "use_output_prefix" not in cfg.__dict__: cfg.use_output_prefix = None else: # replace file designators cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "l", cfg.listening_lab ) cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "eee", cfg.exp ) cfg.use_output_prefix = replace_char_seq_with_string( cfg.use_output_prefix, "l", cfg.listening_lab ) cfg.use_output_prefix = replace_char_seq_with_string( cfg.use_output_prefix, "eee", cfg.exp ) # set multiprocessing if "multiprocessing" not in cfg.__dict__: Loading @@ -142,6 +153,7 @@ def generate_ism1_items( return def generate_ism1_scene( scene_name: str, scene: dict, cfg: config.TestConfig, logger: logging.Logger ): Loading Loading @@ -170,14 +182,18 @@ def generate_ism1_scene( for i in range(N_sources): # parse parameters from the scene description source_file = ( scene["source"][i] if isinstance(scene["source"], list) else scene["source"] scene["source"][i] if isinstance(scene["source"], list) else scene["source"] ) # read source file x = audio.fromfile( "MONO", os.path.join(cfg.input_path, os.path.dirname(source_file), cfg.use_input_prefix + os.path.basename(source_file)), fs=cfg.fs "MONO", os.path.join( cfg.input_path, os.path.dirname(source_file), cfg.use_input_prefix + os.path.basename(source_file), ), fs=cfg.fs, ) # get the number of frames (multiple of 20ms) Loading Loading @@ -302,9 +318,7 @@ def generate_ism1_scene( # insert neutral position as a pre-amble to all sources N_pre = int(N_pre / frame_len) pre = np.tile( [0.00, 0.00, 1.00, 0.00, 1.00], (N_pre, 1) ) pre = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (N_pre, 1)) y_meta = np.concatenate([pre, y_meta], axis=0) if cfg.postamble != 0.0: Loading @@ -317,29 +331,37 @@ def generate_ism1_scene( # append neutral position as a post-amble to all sources N_post = int(N_post / frame_len) post = np.tile( [0.00, 0.00, 1.00, 0.00, 1.00], (N_post, 1) ) post = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (N_post, 1)) y_meta = np.concatenate([y_meta, post], axis=0) # add random noise if cfg.add_low_level_random_noise: # create uniformly distributed noise between -4 and 4 np.random.seed(SEED_RANDOM_NOISE) noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype( "float" ) noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") # superimpose y.audio += noise # write ISM audio stream to the output file audiofile.write( os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.basename(scene["name"])), y.audio, y.fs os.path.join( cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.basename(scene["name"]), ), y.audio, y.fs, ) # write ISM metadata to the output file in .0.csv format csv_filename = os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.splitext(os.path.basename(scene["name"]))[0] + f".0.csv") csv_filename = os.path.join( cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.splitext(os.path.basename(scene["name"]))[0] + f".0.csv", ) with open( csv_filename, Loading ivas_processing_scripts/generation/process_ism2_items.py +55 −39 Original line number Diff line number Diff line Loading @@ -29,12 +29,12 @@ # accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and # the United Nations Convention on Contracts on the International Sales of Goods. # import pdb import csv import logging import os import pdb from itertools import groupby, repeat from math import floor from itertools import repeat, groupby import numpy as np Loading @@ -51,6 +51,7 @@ def csv_formatdata(data): for row in data: yield ["%0.2f" % v for v in row] # function for searching sequences of same the same character and replacing it by another string def replace_char_seq_with_string(str, char_seq, repl_str): result = [] Loading Loading @@ -117,17 +118,27 @@ def generate_ism2_items( cfg.use_input_prefix = "" else: # replace file designators cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "l", cfg.listening_lab ) cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "LL", cfg.language ) cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "eee", cfg.exp ) cfg.use_input_prefix = replace_char_seq_with_string( cfg.use_input_prefix, "l", cfg.listening_lab ) cfg.use_input_prefix = replace_char_seq_with_string( cfg.use_input_prefix, "LL", cfg.language ) cfg.use_input_prefix = replace_char_seq_with_string( cfg.use_input_prefix, "eee", cfg.exp ) # set the prefix for all output filenames if "use_output_prefix" not in cfg.__dict__: cfg.use_output_prefix = None else: # replace file designators cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "l", cfg.listening_lab ) cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "eee", cfg.exp ) cfg.use_output_prefix = replace_char_seq_with_string( cfg.use_output_prefix, "l", cfg.listening_lab ) cfg.use_output_prefix = replace_char_seq_with_string( cfg.use_output_prefix, "eee", cfg.exp ) # set multiprocessing if "multiprocessing" not in cfg.__dict__: Loading @@ -142,6 +153,7 @@ def generate_ism2_items( return def generate_ism2_scene( scene_name: str, scene: dict, cfg: config.TestConfig, logger: logging.Logger ): Loading @@ -166,9 +178,7 @@ def generate_ism2_scene( for i in range(N_sources): # parse parameters from the scene description source_file = ( scene["source"][i] if isinstance(scene["source"], list) else scene["source"] scene["source"][i] if isinstance(scene["source"], list) else scene["source"] ) source_azi = ( scene["azimuth"][i] Loading @@ -181,13 +191,17 @@ def generate_ism2_scene( else scene["elevation"] ) logger.info( f"Encoding {source_file} at position(s) {source_azi},{source_ele}" ) logger.info(f"Encoding {source_file} at position(s) {source_azi},{source_ele}") # read source file x = audio.fromfile( "MONO", os.path.join(cfg.input_path, os.path.dirname(source_file), cfg.use_input_prefix + os.path.basename(source_file)), fs=cfg.fs "MONO", os.path.join( cfg.input_path, os.path.dirname(source_file), cfg.use_input_prefix + os.path.basename(source_file), ), fs=cfg.fs, ) # get the number of frames (multiple of 20ms) Loading Loading @@ -283,9 +297,7 @@ def generate_ism2_scene( # insert neutral position as a pre-amble N_delay = int(N_delay / frame_len) # use neutral position for padding pre = np.tile( [0.00, 0.00, 1.00, 0.00, 1.00], (N_delay, 1) ) pre = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (N_delay, 1)) x_meta = np.concatenate([pre, x_meta]) # pad with zeros to ensure that the signal length is a multiple of 20ms Loading @@ -299,9 +311,7 @@ def generate_ism2_scene( N_pad = int(len(x.audio) / frame_len) - len(x_meta) if N_pad > 0: # use neutral position for padding post = np.tile( [0.00, 0.00, 1.00, 0.00, 1.00], (N_pad, 1) ) post = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (N_pad, 1)) x_meta = np.concatenate([x_meta, post]) # add source signal to the array of all source signals Loading Loading @@ -370,9 +380,7 @@ def generate_ism2_scene( # insert neutral position as a pre-amble to all sources N_pre = int(N_pre / frame_len) pre = np.tile( [0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_pre, 1) ) pre = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_pre, 1)) y_meta = np.concatenate([pre, y_meta], axis=1) if cfg.postamble != 0.0: Loading @@ -385,31 +393,39 @@ def generate_ism2_scene( # append neutral position as a post-amble to all sources N_post = int(N_post / frame_len) post = np.tile( [0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_post, 1) ) post = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_post, 1)) y_meta = np.concatenate([y_meta, post], axis=1) # add random noise if cfg.add_low_level_random_noise: # create uniformly distributed noise between -4 and 4 np.random.seed(SEED_RANDOM_NOISE) noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype( "float" ) noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") # superimpose y.audio += noise # write individual ISM audio streams to the output file in an interleaved format audiofile.write( os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.basename(scene["name"])), y.audio, y.fs os.path.join( cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.basename(scene["name"]), ), y.audio, y.fs, ) # write individual ISM metadata to output files in .csv format for i in range(N_sources): # generate .csv filename (should end with .0.csv, .1.csv, ...) csv_filename = os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.splitext(os.path.basename(scene["name"]))[0] + f".{i}.csv") csv_filename = os.path.join( cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.splitext(os.path.basename(scene["name"]))[0] + f".{i}.csv", ) with open( csv_filename, Loading ivas_processing_scripts/generation/process_stereo_items.py +55 −19 Original line number Diff line number Diff line Loading @@ -32,7 +32,7 @@ import logging import os from itertools import repeat, groupby from itertools import groupby, repeat from math import floor import numpy as np Loading @@ -51,6 +51,7 @@ def csv_formatdata(data): for row in data: yield ["%0.2f" % v for v in row] # function for searching sequences of same the same character and replacing it by another string def replace_char_seq_with_string(str, char_seq, repl_str): result = [] Loading Loading @@ -125,26 +126,42 @@ def generate_stereo_items( cfg.use_input_prefix = "" else: # replace file designators cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "l", cfg.listening_lab ) cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "LL", cfg.language ) cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "eee", cfg.exp ) cfg.use_input_prefix = replace_char_seq_with_string( cfg.use_input_prefix, "l", cfg.listening_lab ) cfg.use_input_prefix = replace_char_seq_with_string( cfg.use_input_prefix, "LL", cfg.language ) cfg.use_input_prefix = replace_char_seq_with_string( cfg.use_input_prefix, "eee", cfg.exp ) # set the prefix for all IR filenames if "use_IR_prefix" not in cfg.__dict__: cfg.use_IR_prefix = "" else: # replace file designators cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "p", cfg.provider ) cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "LL", cfg.language ) cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "eee", cfg.exp ) cfg.use_IR_prefix = replace_char_seq_with_string( cfg.use_IR_prefix, "p", cfg.provider ) cfg.use_IR_prefix = replace_char_seq_with_string( cfg.use_IR_prefix, "LL", cfg.language ) cfg.use_IR_prefix = replace_char_seq_with_string( cfg.use_IR_prefix, "eee", cfg.exp ) # set the prefix for all output filenames if "use_output_prefix" not in cfg.__dict__: cfg.use_output_prefix = None else: # replace file designators cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "l", cfg.listening_lab ) cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "eee", cfg.exp ) cfg.use_output_prefix = replace_char_seq_with_string( cfg.use_output_prefix, "l", cfg.listening_lab ) cfg.use_output_prefix = replace_char_seq_with_string( cfg.use_output_prefix, "eee", cfg.exp ) # set multiprocessing if "multiprocessing" not in cfg.__dict__: Loading Loading @@ -178,7 +195,6 @@ def generate_stereo_scene( y = audio.ChannelBasedAudio("STEREO") for i in range(N_sources): # parse parameters from the scene description source_file = np.atleast_1d(scene["source"])[i] IR_file = np.atleast_1d(scene["IR"])[i] Loading @@ -186,10 +202,26 @@ def generate_stereo_scene( logger.info(f"Convolving {source_file} with {IR_file}") # read source file x = audio.fromfile("MONO", os.path.join(cfg.input_path, os.path.dirname(source_file), cfg.use_input_prefix + os.path.basename(source_file)), fs=cfg.fs) x = audio.fromfile( "MONO", os.path.join( cfg.input_path, os.path.dirname(source_file), cfg.use_input_prefix + os.path.basename(source_file), ), fs=cfg.fs, ) # read the IR file IR = audio.fromfile("STEREO", os.path.join(cfg.IR_path, os.path.dirname(IR_file), cfg.use_IR_prefix + os.path.basename(IR_file)), fs=cfg.IR_fs) IR = audio.fromfile( "STEREO", os.path.join( cfg.IR_path, os.path.dirname(IR_file), cfg.use_IR_prefix + os.path.basename(IR_file), ), fs=cfg.IR_fs, ) # convolve with stereo IR x = reverb_stereo(x, IR) Loading Loading @@ -281,7 +313,11 @@ def generate_stereo_scene( # write the reverberated audio into output file audiofile.write( os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.basename(scene["name"])), os.path.join( cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.basename(scene["name"]), ), y.audio, y.fs, ) Loading
ivas_processing_scripts/generation/__init__.py +4 −4 Original line number Diff line number Diff line Loading @@ -42,10 +42,10 @@ from ivas_processing_scripts.constants import ( ) from ivas_processing_scripts.generation import ( config, process_ambi_items, process_ism1_items, process_ism2_items, process_stereo_items, process_ambi_items, ) from ivas_processing_scripts.utils import create_dir Loading
ivas_processing_scripts/generation/process_ambi_items.py +58 −23 Original line number Diff line number Diff line Loading @@ -32,8 +32,9 @@ import logging import os from itertools import groupby, repeat from math import floor from itertools import repeat, groupby import numpy as np from ivas_processing_scripts.audiotools import audio, audiofile, convert Loading @@ -50,6 +51,7 @@ def csv_formatdata(data): for row in data: yield ["%0.2f" % v for v in row] # function for searching sequences of same the same character and replacing it by another string def replace_char_seq_with_string(str, char_seq, repl_str): result = [] Loading Loading @@ -131,26 +133,42 @@ def generate_ambi_items( cfg.use_input_prefix = "" else: # replace file designators cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "l", cfg.listening_lab ) cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "LL", cfg.language ) cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "eee", cfg.exp ) cfg.use_input_prefix = replace_char_seq_with_string( cfg.use_input_prefix, "l", cfg.listening_lab ) cfg.use_input_prefix = replace_char_seq_with_string( cfg.use_input_prefix, "LL", cfg.language ) cfg.use_input_prefix = replace_char_seq_with_string( cfg.use_input_prefix, "eee", cfg.exp ) # set the prefix for all IR filenames if "use_IR_prefix" not in cfg.__dict__: cfg.use_IR_prefix = "" else: # replace file designators cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "p", cfg.provider ) cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "LL", cfg.language ) cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "eee", cfg.exp ) cfg.use_IR_prefix = replace_char_seq_with_string( cfg.use_IR_prefix, "p", cfg.provider ) cfg.use_IR_prefix = replace_char_seq_with_string( cfg.use_IR_prefix, "LL", cfg.language ) cfg.use_IR_prefix = replace_char_seq_with_string( cfg.use_IR_prefix, "eee", cfg.exp ) # set the prefix for all output filenames if "use_output_prefix" not in cfg.__dict__: cfg.use_output_prefix = None else: # replace file designators cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "l", cfg.listening_lab ) cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "eee", cfg.exp ) cfg.use_output_prefix = replace_char_seq_with_string( cfg.use_output_prefix, "l", cfg.listening_lab ) cfg.use_output_prefix = replace_char_seq_with_string( cfg.use_output_prefix, "eee", cfg.exp ) # set multiprocessing if "multiprocessing" not in cfg.__dict__: Loading Loading @@ -190,7 +208,6 @@ def generate_ambi_scene( y = audio.SceneBasedAudio(ambi_format) for i in range(N_sources): # parse parameters from the scene description source_file = np.atleast_1d(scene["source"])[i] IR_file = np.atleast_1d(scene["IR"])[i] Loading @@ -198,10 +215,26 @@ def generate_ambi_scene( logger.info(f"Convolving {source_file} with {IR_file}") # read source file x = audio.fromfile("MONO", os.path.join(cfg.input_path, os.path.dirname(source_file), cfg.use_input_prefix + os.path.basename(source_file)), fs=cfg.fs) x = audio.fromfile( "MONO", os.path.join( cfg.input_path, os.path.dirname(source_file), cfg.use_input_prefix + os.path.basename(source_file), ), fs=cfg.fs, ) # read the IR file IR = audio.fromfile(ambi_format, os.path.join(cfg.IR_path, os.path.dirname(IR_file), cfg.use_IR_prefix + os.path.basename(IR_file)), fs=cfg.IR_fs) IR = audio.fromfile( ambi_format, os.path.join( cfg.IR_path, os.path.dirname(IR_file), cfg.use_IR_prefix + os.path.basename(IR_file), ), fs=cfg.IR_fs, ) # convolve with the FOA/HOA2 IR if ambi_format == "FOA": Loading Loading @@ -289,16 +322,18 @@ def generate_ambi_scene( if cfg.add_low_level_random_noise: # create uniformly distributed noise between -4 and 4 np.random.seed(SEED_RANDOM_NOISE) noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype( "float" ) noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") # superimpose y.audio += noise # write the reverberated audio into output file audiofile.write( os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.basename(scene["name"])), os.path.join( cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.basename(scene["name"]), ), y.audio, y.fs, ) Loading
ivas_processing_scripts/generation/process_ism1_items.py +52 −30 Original line number Diff line number Diff line Loading @@ -33,8 +33,8 @@ import csv import logging import os from itertools import groupby, repeat from math import floor from itertools import repeat, groupby import numpy as np Loading @@ -51,6 +51,7 @@ def csv_formatdata(data): for row in data: yield ["%0.2f" % v for v in row] # function for searching sequences of same the same character and replacing it by another string def replace_char_seq_with_string(str, char_seq, repl_str): result = [] Loading Loading @@ -117,17 +118,27 @@ def generate_ism1_items( cfg.use_input_prefix = "" else: # replace file designators cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "l", cfg.listening_lab ) cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "LL", cfg.language ) cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "eee", cfg.exp ) cfg.use_input_prefix = replace_char_seq_with_string( cfg.use_input_prefix, "l", cfg.listening_lab ) cfg.use_input_prefix = replace_char_seq_with_string( cfg.use_input_prefix, "LL", cfg.language ) cfg.use_input_prefix = replace_char_seq_with_string( cfg.use_input_prefix, "eee", cfg.exp ) # set the prefix for all output filenames if "use_output_prefix" not in cfg.__dict__: cfg.use_output_prefix = None else: # replace file designators cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "l", cfg.listening_lab ) cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "eee", cfg.exp ) cfg.use_output_prefix = replace_char_seq_with_string( cfg.use_output_prefix, "l", cfg.listening_lab ) cfg.use_output_prefix = replace_char_seq_with_string( cfg.use_output_prefix, "eee", cfg.exp ) # set multiprocessing if "multiprocessing" not in cfg.__dict__: Loading @@ -142,6 +153,7 @@ def generate_ism1_items( return def generate_ism1_scene( scene_name: str, scene: dict, cfg: config.TestConfig, logger: logging.Logger ): Loading Loading @@ -170,14 +182,18 @@ def generate_ism1_scene( for i in range(N_sources): # parse parameters from the scene description source_file = ( scene["source"][i] if isinstance(scene["source"], list) else scene["source"] scene["source"][i] if isinstance(scene["source"], list) else scene["source"] ) # read source file x = audio.fromfile( "MONO", os.path.join(cfg.input_path, os.path.dirname(source_file), cfg.use_input_prefix + os.path.basename(source_file)), fs=cfg.fs "MONO", os.path.join( cfg.input_path, os.path.dirname(source_file), cfg.use_input_prefix + os.path.basename(source_file), ), fs=cfg.fs, ) # get the number of frames (multiple of 20ms) Loading Loading @@ -302,9 +318,7 @@ def generate_ism1_scene( # insert neutral position as a pre-amble to all sources N_pre = int(N_pre / frame_len) pre = np.tile( [0.00, 0.00, 1.00, 0.00, 1.00], (N_pre, 1) ) pre = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (N_pre, 1)) y_meta = np.concatenate([pre, y_meta], axis=0) if cfg.postamble != 0.0: Loading @@ -317,29 +331,37 @@ def generate_ism1_scene( # append neutral position as a post-amble to all sources N_post = int(N_post / frame_len) post = np.tile( [0.00, 0.00, 1.00, 0.00, 1.00], (N_post, 1) ) post = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (N_post, 1)) y_meta = np.concatenate([y_meta, post], axis=0) # add random noise if cfg.add_low_level_random_noise: # create uniformly distributed noise between -4 and 4 np.random.seed(SEED_RANDOM_NOISE) noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype( "float" ) noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") # superimpose y.audio += noise # write ISM audio stream to the output file audiofile.write( os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.basename(scene["name"])), y.audio, y.fs os.path.join( cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.basename(scene["name"]), ), y.audio, y.fs, ) # write ISM metadata to the output file in .0.csv format csv_filename = os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.splitext(os.path.basename(scene["name"]))[0] + f".0.csv") csv_filename = os.path.join( cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.splitext(os.path.basename(scene["name"]))[0] + f".0.csv", ) with open( csv_filename, Loading
ivas_processing_scripts/generation/process_ism2_items.py +55 −39 Original line number Diff line number Diff line Loading @@ -29,12 +29,12 @@ # accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and # the United Nations Convention on Contracts on the International Sales of Goods. # import pdb import csv import logging import os import pdb from itertools import groupby, repeat from math import floor from itertools import repeat, groupby import numpy as np Loading @@ -51,6 +51,7 @@ def csv_formatdata(data): for row in data: yield ["%0.2f" % v for v in row] # function for searching sequences of same the same character and replacing it by another string def replace_char_seq_with_string(str, char_seq, repl_str): result = [] Loading Loading @@ -117,17 +118,27 @@ def generate_ism2_items( cfg.use_input_prefix = "" else: # replace file designators cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "l", cfg.listening_lab ) cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "LL", cfg.language ) cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "eee", cfg.exp ) cfg.use_input_prefix = replace_char_seq_with_string( cfg.use_input_prefix, "l", cfg.listening_lab ) cfg.use_input_prefix = replace_char_seq_with_string( cfg.use_input_prefix, "LL", cfg.language ) cfg.use_input_prefix = replace_char_seq_with_string( cfg.use_input_prefix, "eee", cfg.exp ) # set the prefix for all output filenames if "use_output_prefix" not in cfg.__dict__: cfg.use_output_prefix = None else: # replace file designators cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "l", cfg.listening_lab ) cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "eee", cfg.exp ) cfg.use_output_prefix = replace_char_seq_with_string( cfg.use_output_prefix, "l", cfg.listening_lab ) cfg.use_output_prefix = replace_char_seq_with_string( cfg.use_output_prefix, "eee", cfg.exp ) # set multiprocessing if "multiprocessing" not in cfg.__dict__: Loading @@ -142,6 +153,7 @@ def generate_ism2_items( return def generate_ism2_scene( scene_name: str, scene: dict, cfg: config.TestConfig, logger: logging.Logger ): Loading @@ -166,9 +178,7 @@ def generate_ism2_scene( for i in range(N_sources): # parse parameters from the scene description source_file = ( scene["source"][i] if isinstance(scene["source"], list) else scene["source"] scene["source"][i] if isinstance(scene["source"], list) else scene["source"] ) source_azi = ( scene["azimuth"][i] Loading @@ -181,13 +191,17 @@ def generate_ism2_scene( else scene["elevation"] ) logger.info( f"Encoding {source_file} at position(s) {source_azi},{source_ele}" ) logger.info(f"Encoding {source_file} at position(s) {source_azi},{source_ele}") # read source file x = audio.fromfile( "MONO", os.path.join(cfg.input_path, os.path.dirname(source_file), cfg.use_input_prefix + os.path.basename(source_file)), fs=cfg.fs "MONO", os.path.join( cfg.input_path, os.path.dirname(source_file), cfg.use_input_prefix + os.path.basename(source_file), ), fs=cfg.fs, ) # get the number of frames (multiple of 20ms) Loading Loading @@ -283,9 +297,7 @@ def generate_ism2_scene( # insert neutral position as a pre-amble N_delay = int(N_delay / frame_len) # use neutral position for padding pre = np.tile( [0.00, 0.00, 1.00, 0.00, 1.00], (N_delay, 1) ) pre = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (N_delay, 1)) x_meta = np.concatenate([pre, x_meta]) # pad with zeros to ensure that the signal length is a multiple of 20ms Loading @@ -299,9 +311,7 @@ def generate_ism2_scene( N_pad = int(len(x.audio) / frame_len) - len(x_meta) if N_pad > 0: # use neutral position for padding post = np.tile( [0.00, 0.00, 1.00, 0.00, 1.00], (N_pad, 1) ) post = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (N_pad, 1)) x_meta = np.concatenate([x_meta, post]) # add source signal to the array of all source signals Loading Loading @@ -370,9 +380,7 @@ def generate_ism2_scene( # insert neutral position as a pre-amble to all sources N_pre = int(N_pre / frame_len) pre = np.tile( [0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_pre, 1) ) pre = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_pre, 1)) y_meta = np.concatenate([pre, y_meta], axis=1) if cfg.postamble != 0.0: Loading @@ -385,31 +393,39 @@ def generate_ism2_scene( # append neutral position as a post-amble to all sources N_post = int(N_post / frame_len) post = np.tile( [0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_post, 1) ) post = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_post, 1)) y_meta = np.concatenate([y_meta, post], axis=1) # add random noise if cfg.add_low_level_random_noise: # create uniformly distributed noise between -4 and 4 np.random.seed(SEED_RANDOM_NOISE) noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype( "float" ) noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") # superimpose y.audio += noise # write individual ISM audio streams to the output file in an interleaved format audiofile.write( os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.basename(scene["name"])), y.audio, y.fs os.path.join( cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.basename(scene["name"]), ), y.audio, y.fs, ) # write individual ISM metadata to output files in .csv format for i in range(N_sources): # generate .csv filename (should end with .0.csv, .1.csv, ...) csv_filename = os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.splitext(os.path.basename(scene["name"]))[0] + f".{i}.csv") csv_filename = os.path.join( cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.splitext(os.path.basename(scene["name"]))[0] + f".{i}.csv", ) with open( csv_filename, Loading
ivas_processing_scripts/generation/process_stereo_items.py +55 −19 Original line number Diff line number Diff line Loading @@ -32,7 +32,7 @@ import logging import os from itertools import repeat, groupby from itertools import groupby, repeat from math import floor import numpy as np Loading @@ -51,6 +51,7 @@ def csv_formatdata(data): for row in data: yield ["%0.2f" % v for v in row] # function for searching sequences of same the same character and replacing it by another string def replace_char_seq_with_string(str, char_seq, repl_str): result = [] Loading Loading @@ -125,26 +126,42 @@ def generate_stereo_items( cfg.use_input_prefix = "" else: # replace file designators cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "l", cfg.listening_lab ) cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "LL", cfg.language ) cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "eee", cfg.exp ) cfg.use_input_prefix = replace_char_seq_with_string( cfg.use_input_prefix, "l", cfg.listening_lab ) cfg.use_input_prefix = replace_char_seq_with_string( cfg.use_input_prefix, "LL", cfg.language ) cfg.use_input_prefix = replace_char_seq_with_string( cfg.use_input_prefix, "eee", cfg.exp ) # set the prefix for all IR filenames if "use_IR_prefix" not in cfg.__dict__: cfg.use_IR_prefix = "" else: # replace file designators cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "p", cfg.provider ) cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "LL", cfg.language ) cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "eee", cfg.exp ) cfg.use_IR_prefix = replace_char_seq_with_string( cfg.use_IR_prefix, "p", cfg.provider ) cfg.use_IR_prefix = replace_char_seq_with_string( cfg.use_IR_prefix, "LL", cfg.language ) cfg.use_IR_prefix = replace_char_seq_with_string( cfg.use_IR_prefix, "eee", cfg.exp ) # set the prefix for all output filenames if "use_output_prefix" not in cfg.__dict__: cfg.use_output_prefix = None else: # replace file designators cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "l", cfg.listening_lab ) cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "eee", cfg.exp ) cfg.use_output_prefix = replace_char_seq_with_string( cfg.use_output_prefix, "l", cfg.listening_lab ) cfg.use_output_prefix = replace_char_seq_with_string( cfg.use_output_prefix, "eee", cfg.exp ) # set multiprocessing if "multiprocessing" not in cfg.__dict__: Loading Loading @@ -178,7 +195,6 @@ def generate_stereo_scene( y = audio.ChannelBasedAudio("STEREO") for i in range(N_sources): # parse parameters from the scene description source_file = np.atleast_1d(scene["source"])[i] IR_file = np.atleast_1d(scene["IR"])[i] Loading @@ -186,10 +202,26 @@ def generate_stereo_scene( logger.info(f"Convolving {source_file} with {IR_file}") # read source file x = audio.fromfile("MONO", os.path.join(cfg.input_path, os.path.dirname(source_file), cfg.use_input_prefix + os.path.basename(source_file)), fs=cfg.fs) x = audio.fromfile( "MONO", os.path.join( cfg.input_path, os.path.dirname(source_file), cfg.use_input_prefix + os.path.basename(source_file), ), fs=cfg.fs, ) # read the IR file IR = audio.fromfile("STEREO", os.path.join(cfg.IR_path, os.path.dirname(IR_file), cfg.use_IR_prefix + os.path.basename(IR_file)), fs=cfg.IR_fs) IR = audio.fromfile( "STEREO", os.path.join( cfg.IR_path, os.path.dirname(IR_file), cfg.use_IR_prefix + os.path.basename(IR_file), ), fs=cfg.IR_fs, ) # convolve with stereo IR x = reverb_stereo(x, IR) Loading Loading @@ -281,7 +313,11 @@ def generate_stereo_scene( # write the reverberated audio into output file audiofile.write( os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.basename(scene["name"])), os.path.join( cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.basename(scene["name"]), ), y.audio, y.fs, )