Loading item_gen_configs/ISM1_CONFIG.yml +4 −7 Original line number Diff line number Diff line Loading @@ -6,12 +6,6 @@ ### Output format format: "ISM1" ### Date; default = YYYYMMDD_HH.MM.SS # date: 2023.06.30 ### Deletion of temporary directories containing intermediate processing files, bitstreams etc.; default = false # delete_tmp: true ### Output sampling rate in Hz needed for headerless audio files; default = 48000 fs: 48000 Loading @@ -29,10 +23,13 @@ output_path: "./items_ISM1" ### Target loudness in LKFS; default = null (no loudness normalization applied) loudness: -26 ### Pre-amble and Post-amble length in seconds (default = None) ### Pre-amble and Post-amble length in seconds (default = 0.0) preamble: 0.5 postamble: 0.5 ### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence) add_low_level_random_noise: true ################################################ ### Scene description Loading item_gen_configs/ISM2_CONFIG.yml +0 −6 Original line number Diff line number Diff line Loading @@ -6,12 +6,6 @@ ### Output format format: "ISM2" ### Date; default = YYYYMMDD_HH.MM.SS # date: 2023.06.30 ### Deletion of temporary directories containing intermediate processing files, bitstreams etc.; default = false # delete_tmp: true ### Output sampling rate in Hz needed for headerless audio files; default = 48000 fs: 48000 Loading item_gen_configs/STEREO_CONFIG.yml +2 −8 Original line number Diff line number Diff line Loading @@ -6,12 +6,6 @@ ### Output format format: "STEREO" ### Date; default = YYYYMMDD_HH.MM.SS # date: 2023.06.30 ### Deletion of temporary directories containing intermediate processing files, bitstreams etc.; default = false # delete_tmp: true ### Output sampling rate in Hz needed for headerless audio files; default = 48000 fs: 48000 Loading @@ -26,8 +20,8 @@ IR_fs: 32000 ### Input path to mono files input_path: "./items_mono" ### Input path to stereo impulse response files IR_path: "./IR" ### Input path to stereo impulse response files, default = './ivas_processing_scripts/generation/IR' # IR_path: "./IR" ### Output path for generated test items and metadata files output_path: "./items_STEREO" Loading ivas_processing_scripts/generation/__init__.py +2 −28 Original line number Diff line number Diff line Loading @@ -84,36 +84,10 @@ def main(args): # generate input items if cfg.format.startswith("ISM"): # generate ISM items with metadata according to scene description process_ism_items.generate_ism_items( cfg.format, cfg.loudness, cfg.input_path, cfg.output_path, cfg.scenes, logger, fs=cfg.fs, preamble=cfg.preamble, postamble=cfg.postamble, add_low_level_random_noise=getattr(cfg, "add_low_level_random_noise", False), # TODO@VM dict.get() can provide a default value if the key is not found # please check if this is a viable solution - I kept getting "AttributeError: 'TestConfig' object has no attribute 'add_low_level_random_noise'" ) process_ism_items.generate_ism_items(cfg, logger) elif cfg.format == "STEREO": # generate STEREO items according to scene description process_stereo_items.generate_stereo_items( cfg.format, cfg.loudness, cfg.input_path, cfg.IR_path, cfg.output_path, cfg.scenes, logger, fs=cfg.fs, IR_fs=cfg.IR_fs, preamble=cfg.preamble, postamble=cfg.postamble, add_low_level_random_noise=cfg.add_low_level_random_noise, ) process_stereo_items.generate_stereo_items(cfg, logger) # copy configuration to output directory with open(cfg.output_path.joinpath(f"{cfg.format}.yml"), "w") as f: Loading ivas_processing_scripts/generation/process_ism_items.py +35 −24 Original line number Diff line number Diff line Loading @@ -33,12 +33,11 @@ import csv import logging import os import numpy as np from math import floor from pathlib import Path from typing import Optional import numpy as np from ivas_processing_scripts.generation import config from ivas_processing_scripts.audiotools import audio, audiofile from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness Loading @@ -52,23 +51,34 @@ def csv_formatdata(data): def generate_ism_items( format: str, target_level: int, input_path: Path, output_path: Path, scenes: dict, cfg : config.TestConfig, logger: logging.Logger, fs: Optional[int] = 48000, preamble: Optional[float] = 0.0, postamble: Optional[float] = 0.0, add_low_level_random_noise: Optional[bool] = False, ): """Generate ISM items with metadata from mono items based on scene description""" # get the number of scenes N_scenes = len(scenes) N_scenes = len(cfg.scenes) # set the target level if "loudness" not in cfg.__dict__: cfg.loudness = -26 # set the fs if "fs" not in cfg.__dict__: cfg.fs = 48000 for scene_name, scene in scenes.items(): # set the pre-amble and post-amble if "preamble" not in cfg.__dict__: cfg.preamble = 0.0 if "postamble" not in cfg.__dict__: cfg.postamble = 0.0 # set the pre-amble and post-amble if "add_low_level_random_noise" not in cfg.__dict__: cfg.add_low_level_random_noise = False for scene_name, scene in cfg.scenes.items(): logger.info(f"Processing {scene_name} out of {N_scenes} scenes") # extract the number of audio sources Loading @@ -89,6 +99,7 @@ def generate_ism_items( # repeat for all source files for i in range(N_sources): # parse parameters from the scene description source_file = np.atleast_1d(scene["source"])[i] source_azi = np.atleast_1d(scene["azimuth"])[i] Loading @@ -99,7 +110,7 @@ def generate_ism_items( ) # read source file x = audio.fromfile("MONO", os.path.join(input_path, source_file), fs=fs) x = audio.fromfile("MONO", os.path.join(cfg.input_path, source_file), fs=cfg.fs) # get the number of frames (multiple of 20ms) N_frames = int(len(x.audio) / x.fs * 50) Loading @@ -109,7 +120,7 @@ def generate_ism_items( # x.audio = x.audio[:N_trim] # adjust the level of the source file _, scale_factor = get_loudness(x, target_level, "MONO") _, scale_factor = get_loudness(x, cfg.loudness, "MONO") x.audio *= scale_factor # read azimuth information and create array Loading Loading @@ -271,9 +282,9 @@ def generate_ism_items( y_meta = np.concatenate([y_meta, x_meta]) # append pre-amble and post-amble to all sources if preamble != 0.0: if cfg.preamble != 0.0: # ensure that pre-mable is a multiple of 20ms N_pre = int(floor(preamble * 50) / 50 * y.fs) N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs) # insert all-zero preamble to all sources pre = np.zeros((N_pre, y.audio.shape[1])) Loading @@ -285,9 +296,9 @@ def generate_ism_items( ) # !!!! TBD - check if we should insert netrual position or the first position of the metadata y_meta = np.concatenate([pre, y_meta], axis=1) if postamble != 0.0: if cfg.postamble != 0.0: # ensure that post-mable is a multiple of 20ms N_post = int(floor(postamble * 50) / 50 * y.fs) N_post = int(floor(cfg.postamble * 50) / 50 * y.fs) # append all-zero postamble to all sources post = np.zeros((N_post, y.audio.shape[1])) Loading @@ -300,7 +311,7 @@ def generate_ism_items( y_meta = np.concatenate([y_meta, post], axis=1) # add random noise if add_low_level_random_noise: if cfg.add_low_level_random_noise: # create uniformly distributed noise between -4 and 4 np.random.seed(SEED_RANDOM_NOISE) noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype( Loading @@ -313,7 +324,7 @@ def generate_ism_items( # write individual ISM audio streams to the output file in an interleaved format output_filename = scene["name"] audiofile.write( os.path.join(output_path, output_filename), y.audio, y.fs os.path.join(cfg.output_path, output_filename), y.audio, y.fs ) # !!!! TBD: replace all os.path.xxx operations with the Path object # write individual ISM metadata to output files in .csv format Loading @@ -322,7 +333,7 @@ def generate_ism_items( csv_filename = os.path.normpath(f"{output_filename}.{i}.csv") with open( os.path.join(output_path, csv_filename), os.path.join(cfg.output_path, csv_filename), "w", newline="", encoding="utf-8", Loading Loading
item_gen_configs/ISM1_CONFIG.yml +4 −7 Original line number Diff line number Diff line Loading @@ -6,12 +6,6 @@ ### Output format format: "ISM1" ### Date; default = YYYYMMDD_HH.MM.SS # date: 2023.06.30 ### Deletion of temporary directories containing intermediate processing files, bitstreams etc.; default = false # delete_tmp: true ### Output sampling rate in Hz needed for headerless audio files; default = 48000 fs: 48000 Loading @@ -29,10 +23,13 @@ output_path: "./items_ISM1" ### Target loudness in LKFS; default = null (no loudness normalization applied) loudness: -26 ### Pre-amble and Post-amble length in seconds (default = None) ### Pre-amble and Post-amble length in seconds (default = 0.0) preamble: 0.5 postamble: 0.5 ### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence) add_low_level_random_noise: true ################################################ ### Scene description Loading
item_gen_configs/ISM2_CONFIG.yml +0 −6 Original line number Diff line number Diff line Loading @@ -6,12 +6,6 @@ ### Output format format: "ISM2" ### Date; default = YYYYMMDD_HH.MM.SS # date: 2023.06.30 ### Deletion of temporary directories containing intermediate processing files, bitstreams etc.; default = false # delete_tmp: true ### Output sampling rate in Hz needed for headerless audio files; default = 48000 fs: 48000 Loading
item_gen_configs/STEREO_CONFIG.yml +2 −8 Original line number Diff line number Diff line Loading @@ -6,12 +6,6 @@ ### Output format format: "STEREO" ### Date; default = YYYYMMDD_HH.MM.SS # date: 2023.06.30 ### Deletion of temporary directories containing intermediate processing files, bitstreams etc.; default = false # delete_tmp: true ### Output sampling rate in Hz needed for headerless audio files; default = 48000 fs: 48000 Loading @@ -26,8 +20,8 @@ IR_fs: 32000 ### Input path to mono files input_path: "./items_mono" ### Input path to stereo impulse response files IR_path: "./IR" ### Input path to stereo impulse response files, default = './ivas_processing_scripts/generation/IR' # IR_path: "./IR" ### Output path for generated test items and metadata files output_path: "./items_STEREO" Loading
ivas_processing_scripts/generation/__init__.py +2 −28 Original line number Diff line number Diff line Loading @@ -84,36 +84,10 @@ def main(args): # generate input items if cfg.format.startswith("ISM"): # generate ISM items with metadata according to scene description process_ism_items.generate_ism_items( cfg.format, cfg.loudness, cfg.input_path, cfg.output_path, cfg.scenes, logger, fs=cfg.fs, preamble=cfg.preamble, postamble=cfg.postamble, add_low_level_random_noise=getattr(cfg, "add_low_level_random_noise", False), # TODO@VM dict.get() can provide a default value if the key is not found # please check if this is a viable solution - I kept getting "AttributeError: 'TestConfig' object has no attribute 'add_low_level_random_noise'" ) process_ism_items.generate_ism_items(cfg, logger) elif cfg.format == "STEREO": # generate STEREO items according to scene description process_stereo_items.generate_stereo_items( cfg.format, cfg.loudness, cfg.input_path, cfg.IR_path, cfg.output_path, cfg.scenes, logger, fs=cfg.fs, IR_fs=cfg.IR_fs, preamble=cfg.preamble, postamble=cfg.postamble, add_low_level_random_noise=cfg.add_low_level_random_noise, ) process_stereo_items.generate_stereo_items(cfg, logger) # copy configuration to output directory with open(cfg.output_path.joinpath(f"{cfg.format}.yml"), "w") as f: Loading
ivas_processing_scripts/generation/process_ism_items.py +35 −24 Original line number Diff line number Diff line Loading @@ -33,12 +33,11 @@ import csv import logging import os import numpy as np from math import floor from pathlib import Path from typing import Optional import numpy as np from ivas_processing_scripts.generation import config from ivas_processing_scripts.audiotools import audio, audiofile from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness Loading @@ -52,23 +51,34 @@ def csv_formatdata(data): def generate_ism_items( format: str, target_level: int, input_path: Path, output_path: Path, scenes: dict, cfg : config.TestConfig, logger: logging.Logger, fs: Optional[int] = 48000, preamble: Optional[float] = 0.0, postamble: Optional[float] = 0.0, add_low_level_random_noise: Optional[bool] = False, ): """Generate ISM items with metadata from mono items based on scene description""" # get the number of scenes N_scenes = len(scenes) N_scenes = len(cfg.scenes) # set the target level if "loudness" not in cfg.__dict__: cfg.loudness = -26 # set the fs if "fs" not in cfg.__dict__: cfg.fs = 48000 for scene_name, scene in scenes.items(): # set the pre-amble and post-amble if "preamble" not in cfg.__dict__: cfg.preamble = 0.0 if "postamble" not in cfg.__dict__: cfg.postamble = 0.0 # set the pre-amble and post-amble if "add_low_level_random_noise" not in cfg.__dict__: cfg.add_low_level_random_noise = False for scene_name, scene in cfg.scenes.items(): logger.info(f"Processing {scene_name} out of {N_scenes} scenes") # extract the number of audio sources Loading @@ -89,6 +99,7 @@ def generate_ism_items( # repeat for all source files for i in range(N_sources): # parse parameters from the scene description source_file = np.atleast_1d(scene["source"])[i] source_azi = np.atleast_1d(scene["azimuth"])[i] Loading @@ -99,7 +110,7 @@ def generate_ism_items( ) # read source file x = audio.fromfile("MONO", os.path.join(input_path, source_file), fs=fs) x = audio.fromfile("MONO", os.path.join(cfg.input_path, source_file), fs=cfg.fs) # get the number of frames (multiple of 20ms) N_frames = int(len(x.audio) / x.fs * 50) Loading @@ -109,7 +120,7 @@ def generate_ism_items( # x.audio = x.audio[:N_trim] # adjust the level of the source file _, scale_factor = get_loudness(x, target_level, "MONO") _, scale_factor = get_loudness(x, cfg.loudness, "MONO") x.audio *= scale_factor # read azimuth information and create array Loading Loading @@ -271,9 +282,9 @@ def generate_ism_items( y_meta = np.concatenate([y_meta, x_meta]) # append pre-amble and post-amble to all sources if preamble != 0.0: if cfg.preamble != 0.0: # ensure that pre-mable is a multiple of 20ms N_pre = int(floor(preamble * 50) / 50 * y.fs) N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs) # insert all-zero preamble to all sources pre = np.zeros((N_pre, y.audio.shape[1])) Loading @@ -285,9 +296,9 @@ def generate_ism_items( ) # !!!! TBD - check if we should insert netrual position or the first position of the metadata y_meta = np.concatenate([pre, y_meta], axis=1) if postamble != 0.0: if cfg.postamble != 0.0: # ensure that post-mable is a multiple of 20ms N_post = int(floor(postamble * 50) / 50 * y.fs) N_post = int(floor(cfg.postamble * 50) / 50 * y.fs) # append all-zero postamble to all sources post = np.zeros((N_post, y.audio.shape[1])) Loading @@ -300,7 +311,7 @@ def generate_ism_items( y_meta = np.concatenate([y_meta, post], axis=1) # add random noise if add_low_level_random_noise: if cfg.add_low_level_random_noise: # create uniformly distributed noise between -4 and 4 np.random.seed(SEED_RANDOM_NOISE) noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype( Loading @@ -313,7 +324,7 @@ def generate_ism_items( # write individual ISM audio streams to the output file in an interleaved format output_filename = scene["name"] audiofile.write( os.path.join(output_path, output_filename), y.audio, y.fs os.path.join(cfg.output_path, output_filename), y.audio, y.fs ) # !!!! TBD: replace all os.path.xxx operations with the Path object # write individual ISM metadata to output files in .csv format Loading @@ -322,7 +333,7 @@ def generate_ism_items( csv_filename = os.path.normpath(f"{output_filename}.{i}.csv") with open( os.path.join(output_path, csv_filename), os.path.join(cfg.output_path, csv_filename), "w", newline="", encoding="utf-8", Loading