fixes in OMASA item generation script (0774f358) · Commits · IVAS Codec Public Collaboration / IVAS Processing Scripts

ivas_processing_scripts/audiotools/wrappers/masaAnalyzer.py

+4 −0

Original line number	Diff line number	Diff line
		@@ -72,6 +72,10 @@ def masaAnalyzer(
		else:
		binary = find_binary("masaAnalyzer")

		# enforce metadata_out_filename to be a Path object
		if metadata_out_filename is not None and not isinstance(metadata_out_filename, Path):
		metadata_out_filename = Path(metadata_out_filename)

		if num_tcs not in [1, 2]:
		raise ValueError(f"Only 1 or 2 TCs supported, but {num_tcs} was given.")

ivas_processing_scripts/audiotools/wrappers/masaRenderer.py

+6 −1

Original line number	Diff line number	Diff line
		@@ -83,11 +83,16 @@ def masaRenderer(
		output_mode = "-BINAURAL"
		num_channels = 2

		# enforce masa_metadata_file to be a Path object
		masa_metadata_file = masa.metadata_file
		if masa_metadata_file is not None and not isinstance(masa_metadata_file, Path):
		masa_metadata_file = Path(masa_metadata_file)

		cmd = [
		str(binary),
		output_mode,
		"", # 2 -> inputPcm
		str(masa.metadata_file.resolve()),
		str(masa_metadata_file.resolve()),
		"", # 4 -> outputPcm
		]

ivas_processing_scripts/generation/generate_omasa_items.py

+31 −14

Original line number	Diff line number	Diff line
		@@ -29,7 +29,7 @@
		# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
		# the United Nations Convention on Contracts on the International Sales of Goods.
		#
		import pdb

		import csv
		import logging
		import os
		@@ -47,8 +47,6 @@ from ivas_processing_scripts.audiotools.convert.omasa import convert_omasa
		from ivas_processing_scripts.generation import config
		from ivas_processing_scripts.utils import apply_func_parallel

		import pdb

		SEED_RANDOM_NOISE = 0


		@@ -152,14 +150,13 @@ def generate_omasa_items(

		# set multiprocessing
		if "multiprocessing" not in cfg.__dict__:
		cfg.multiprocessing = True
		cfg.multiprocessing = False

		apply_func_parallel(
		generate_scene,
		zip(cfg.scenes.keys(), cfg.scenes.values(), repeat(cfg), repeat(logger)),
		None,
		# "mp" if cfg.multiprocessing else None,
		None,
		type = "mp" if cfg.multiprocessing else None,
		show_progress = None,
		)

		return
		@@ -168,15 +165,36 @@ def generate_omasa_items(
		def generate_scene(
		scene_name: str, scene: dict, cfg: config.TestConfig, logger: logging.Logger
		):
		"""
		Processes a single scene to generate OMASA items with metadata.

		Args:
		scene_name (str): The name of the scene being processed.
		scene (dict): A dictionary containing scene description, including source files, azimuth, elevation, and other parameters.
		cfg (config.TestConfig): Configuration object containing settings for processing, such as input/output paths, sampling rate, and loudness levels.
		logger (logging.Logger): Logger instance for logging information and errors.

		Expected Behavior:
		- Reads audio source files and processes them based on the scene description.
		- Generates metadata files and appends them to the OMASA object.
		- Writes the processed audio and metadata to output files.
		- Handles various audio formats (e.g., MONO, FOA, HOA2) and applies transformations like loudness normalization, trimming, and padding.
		"""
		logger.info( f"Processing scene {scene_name}:")

		# extract the number of audio sources
		N_sources = len(np.atleast_1d(scene["source"]))
		N_ISMs = N_sources-1

		# initialize output array
		# initialize output dirs
		omasa_format = f"ISM{N_ISMs}MASA{cfg.masa_tc}DIR{cfg.masa_dirs}"
		output_filename = os.path.join( cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + append_str_filename(os.path.basename(scene["name"]), f"_s{scene_name}_{omasa_format}") )

		dir_path = os.path.dirname(output_filename)
		if dir_path and not os.path.exists(dir_path):
		os.makedirs(dir_path, exist_ok=True)

		# initialize output OMASA object
		y = audio.OMASAAudio(omasa_format)

		# repeat for all source files
		@@ -236,7 +254,7 @@ def generate_scene(
		logger.info(f"Error: Input format of the source file with {N_channels} channels is not supported!")
		sys.exit(-1)

		if fmt in ["FOA", "HOA2"]:
		if fmt in ["FOA", "HOA2", "HOA3"]:
		# generate MASA metadata .met filename (should end with .met)
		y.metadata_files.append(os.path.splitext(output_filename)[0]+".met")
		elif fmt == "MONO":
		@@ -262,7 +280,7 @@ def generate_scene(
		audioarray.cut(x.audio, [0, N_frames * frame_len])

		# adjust the level of the source file
		if fmt in ["FOA", "HOA2"]:
		if fmt in ["FOA", "HOA2", "HOA3"]:
		x.audio, _ = loudness_norm(x, level, loudness_format="STEREO", rms=True)
		else:
		x.audio, _ = loudness_norm(x, level, loudness_format="MONO")
		@@ -277,9 +295,9 @@ def generate_scene(
		N_pad = int(frame_len - len(x.audio) % frame_len)
		x.audio = audioarray.trim(x.audio, x.fs, limits=[0, -N_pad], samples=True)

		# convert FOA to MASA
		if fmt in ["FOA", "HOA2"]:
		x_masa = audio.MetadataAssistedSpatialAudio(f"MASA2DIR1")
		# convert FOA/HOA2/HOA3 to MASA
		if fmt in ["FOA", "HOA2", "HOA3"]:
		x_masa = audio.MetadataAssistedSpatialAudio(f"MASA{cfg.masa_tc}DIR{cfg.masa_dirs}")
		x_masa.metadata_file = y.metadata_files[i]
		render_sba_to_masa(x, x_masa)
		y.audio = x_masa.audio
		@@ -320,7 +338,6 @@ def generate_scene(
		else scene["elevation"]
		)

		# pdb.set_trace()
		N_frames = int(np.rint((len(y.audio) / y.fs * 50)))

		# read azimuth information and convert to an array