Loading ivas_processing_scripts/audiotools/wrappers/masaAnalyzer.py +4 −0 Original line number Diff line number Diff line Loading @@ -72,6 +72,10 @@ def masaAnalyzer( else: binary = find_binary("masaAnalyzer") # enforce metadata_out_filename to be a Path object if metadata_out_filename is not None and not isinstance(metadata_out_filename, Path): metadata_out_filename = Path(metadata_out_filename) if num_tcs not in [1, 2]: raise ValueError(f"Only 1 or 2 TCs supported, but {num_tcs} was given.") Loading ivas_processing_scripts/audiotools/wrappers/masaRenderer.py +6 −1 Original line number Diff line number Diff line Loading @@ -83,11 +83,16 @@ def masaRenderer( output_mode = "-BINAURAL" num_channels = 2 # enforce masa_metadata_file to be a Path object masa_metadata_file = masa.metadata_file if masa_metadata_file is not None and not isinstance(masa_metadata_file, Path): masa_metadata_file = Path(masa_metadata_file) cmd = [ str(binary), output_mode, "", # 2 -> inputPcm str(masa.metadata_file.resolve()), str(masa_metadata_file.resolve()), "", # 4 -> outputPcm ] Loading ivas_processing_scripts/generation/generate_omasa_items.py +31 −14 Original line number Diff line number Diff line Loading @@ -29,7 +29,7 @@ # accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and # the United Nations Convention on Contracts on the International Sales of Goods. # import pdb import csv import logging import os Loading @@ -47,8 +47,6 @@ from ivas_processing_scripts.audiotools.convert.omasa import convert_omasa from ivas_processing_scripts.generation import config from ivas_processing_scripts.utils import apply_func_parallel import pdb SEED_RANDOM_NOISE = 0 Loading Loading @@ -152,14 +150,13 @@ def generate_omasa_items( # set multiprocessing if "multiprocessing" not in cfg.__dict__: cfg.multiprocessing = True cfg.multiprocessing = False apply_func_parallel( generate_scene, zip(cfg.scenes.keys(), cfg.scenes.values(), repeat(cfg), repeat(logger)), None, # "mp" if cfg.multiprocessing else None, None, type = "mp" if cfg.multiprocessing else None, show_progress = None, ) return Loading @@ -168,15 +165,36 @@ def generate_omasa_items( def generate_scene( scene_name: str, scene: dict, cfg: config.TestConfig, logger: logging.Logger ): """ Processes a single scene to generate OMASA items with metadata. Args: scene_name (str): The name of the scene being processed. scene (dict): A dictionary containing scene description, including source files, azimuth, elevation, and other parameters. cfg (config.TestConfig): Configuration object containing settings for processing, such as input/output paths, sampling rate, and loudness levels. logger (logging.Logger): Logger instance for logging information and errors. Expected Behavior: - Reads audio source files and processes them based on the scene description. - Generates metadata files and appends them to the OMASA object. - Writes the processed audio and metadata to output files. - Handles various audio formats (e.g., MONO, FOA, HOA2) and applies transformations like loudness normalization, trimming, and padding. """ logger.info( f"Processing scene {scene_name}:") # extract the number of audio sources N_sources = len(np.atleast_1d(scene["source"])) N_ISMs = N_sources-1 # initialize output array # initialize output dirs omasa_format = f"ISM{N_ISMs}MASA{cfg.masa_tc}DIR{cfg.masa_dirs}" output_filename = os.path.join( cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + append_str_filename(os.path.basename(scene["name"]), f"_s{scene_name}_{omasa_format}") ) dir_path = os.path.dirname(output_filename) if dir_path and not os.path.exists(dir_path): os.makedirs(dir_path, exist_ok=True) # initialize output OMASA object y = audio.OMASAAudio(omasa_format) # repeat for all source files Loading Loading @@ -236,7 +254,7 @@ def generate_scene( logger.info(f"Error: Input format of the source file with {N_channels} channels is not supported!") sys.exit(-1) if fmt in ["FOA", "HOA2"]: if fmt in ["FOA", "HOA2", "HOA3"]: # generate MASA metadata .met filename (should end with .met) y.metadata_files.append(os.path.splitext(output_filename)[0]+".met") elif fmt == "MONO": Loading @@ -262,7 +280,7 @@ def generate_scene( audioarray.cut(x.audio, [0, N_frames * frame_len]) # adjust the level of the source file if fmt in ["FOA", "HOA2"]: if fmt in ["FOA", "HOA2", "HOA3"]: x.audio, _ = loudness_norm(x, level, loudness_format="STEREO", rms=True) else: x.audio, _ = loudness_norm(x, level, loudness_format="MONO") Loading @@ -277,9 +295,9 @@ def generate_scene( N_pad = int(frame_len - len(x.audio) % frame_len) x.audio = audioarray.trim(x.audio, x.fs, limits=[0, -N_pad], samples=True) # convert FOA to MASA if fmt in ["FOA", "HOA2"]: x_masa = audio.MetadataAssistedSpatialAudio(f"MASA2DIR1") # convert FOA/HOA2/HOA3 to MASA if fmt in ["FOA", "HOA2", "HOA3"]: x_masa = audio.MetadataAssistedSpatialAudio(f"MASA{cfg.masa_tc}DIR{cfg.masa_dirs}") x_masa.metadata_file = y.metadata_files[i] render_sba_to_masa(x, x_masa) y.audio = x_masa.audio Loading Loading @@ -320,7 +338,6 @@ def generate_scene( else scene["elevation"] ) # pdb.set_trace() N_frames = int(np.rint((len(y.audio) / y.fs * 50))) # read azimuth information and convert to an array Loading Loading
ivas_processing_scripts/audiotools/wrappers/masaAnalyzer.py +4 −0 Original line number Diff line number Diff line Loading @@ -72,6 +72,10 @@ def masaAnalyzer( else: binary = find_binary("masaAnalyzer") # enforce metadata_out_filename to be a Path object if metadata_out_filename is not None and not isinstance(metadata_out_filename, Path): metadata_out_filename = Path(metadata_out_filename) if num_tcs not in [1, 2]: raise ValueError(f"Only 1 or 2 TCs supported, but {num_tcs} was given.") Loading
ivas_processing_scripts/audiotools/wrappers/masaRenderer.py +6 −1 Original line number Diff line number Diff line Loading @@ -83,11 +83,16 @@ def masaRenderer( output_mode = "-BINAURAL" num_channels = 2 # enforce masa_metadata_file to be a Path object masa_metadata_file = masa.metadata_file if masa_metadata_file is not None and not isinstance(masa_metadata_file, Path): masa_metadata_file = Path(masa_metadata_file) cmd = [ str(binary), output_mode, "", # 2 -> inputPcm str(masa.metadata_file.resolve()), str(masa_metadata_file.resolve()), "", # 4 -> outputPcm ] Loading
ivas_processing_scripts/generation/generate_omasa_items.py +31 −14 Original line number Diff line number Diff line Loading @@ -29,7 +29,7 @@ # accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and # the United Nations Convention on Contracts on the International Sales of Goods. # import pdb import csv import logging import os Loading @@ -47,8 +47,6 @@ from ivas_processing_scripts.audiotools.convert.omasa import convert_omasa from ivas_processing_scripts.generation import config from ivas_processing_scripts.utils import apply_func_parallel import pdb SEED_RANDOM_NOISE = 0 Loading Loading @@ -152,14 +150,13 @@ def generate_omasa_items( # set multiprocessing if "multiprocessing" not in cfg.__dict__: cfg.multiprocessing = True cfg.multiprocessing = False apply_func_parallel( generate_scene, zip(cfg.scenes.keys(), cfg.scenes.values(), repeat(cfg), repeat(logger)), None, # "mp" if cfg.multiprocessing else None, None, type = "mp" if cfg.multiprocessing else None, show_progress = None, ) return Loading @@ -168,15 +165,36 @@ def generate_omasa_items( def generate_scene( scene_name: str, scene: dict, cfg: config.TestConfig, logger: logging.Logger ): """ Processes a single scene to generate OMASA items with metadata. Args: scene_name (str): The name of the scene being processed. scene (dict): A dictionary containing scene description, including source files, azimuth, elevation, and other parameters. cfg (config.TestConfig): Configuration object containing settings for processing, such as input/output paths, sampling rate, and loudness levels. logger (logging.Logger): Logger instance for logging information and errors. Expected Behavior: - Reads audio source files and processes them based on the scene description. - Generates metadata files and appends them to the OMASA object. - Writes the processed audio and metadata to output files. - Handles various audio formats (e.g., MONO, FOA, HOA2) and applies transformations like loudness normalization, trimming, and padding. """ logger.info( f"Processing scene {scene_name}:") # extract the number of audio sources N_sources = len(np.atleast_1d(scene["source"])) N_ISMs = N_sources-1 # initialize output array # initialize output dirs omasa_format = f"ISM{N_ISMs}MASA{cfg.masa_tc}DIR{cfg.masa_dirs}" output_filename = os.path.join( cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + append_str_filename(os.path.basename(scene["name"]), f"_s{scene_name}_{omasa_format}") ) dir_path = os.path.dirname(output_filename) if dir_path and not os.path.exists(dir_path): os.makedirs(dir_path, exist_ok=True) # initialize output OMASA object y = audio.OMASAAudio(omasa_format) # repeat for all source files Loading Loading @@ -236,7 +254,7 @@ def generate_scene( logger.info(f"Error: Input format of the source file with {N_channels} channels is not supported!") sys.exit(-1) if fmt in ["FOA", "HOA2"]: if fmt in ["FOA", "HOA2", "HOA3"]: # generate MASA metadata .met filename (should end with .met) y.metadata_files.append(os.path.splitext(output_filename)[0]+".met") elif fmt == "MONO": Loading @@ -262,7 +280,7 @@ def generate_scene( audioarray.cut(x.audio, [0, N_frames * frame_len]) # adjust the level of the source file if fmt in ["FOA", "HOA2"]: if fmt in ["FOA", "HOA2", "HOA3"]: x.audio, _ = loudness_norm(x, level, loudness_format="STEREO", rms=True) else: x.audio, _ = loudness_norm(x, level, loudness_format="MONO") Loading @@ -277,9 +295,9 @@ def generate_scene( N_pad = int(frame_len - len(x.audio) % frame_len) x.audio = audioarray.trim(x.audio, x.fs, limits=[0, -N_pad], samples=True) # convert FOA to MASA if fmt in ["FOA", "HOA2"]: x_masa = audio.MetadataAssistedSpatialAudio(f"MASA2DIR1") # convert FOA/HOA2/HOA3 to MASA if fmt in ["FOA", "HOA2", "HOA3"]: x_masa = audio.MetadataAssistedSpatialAudio(f"MASA{cfg.masa_tc}DIR{cfg.masa_dirs}") x_masa.metadata_file = y.metadata_files[i] render_sba_to_masa(x, x_masa) y.audio = x_masa.audio Loading Loading @@ -320,7 +338,6 @@ def generate_scene( else scene["elevation"] ) # pdb.set_trace() N_frames = int(np.rint((len(y.audio) / y.fs * 50))) # read azimuth information and convert to an array Loading