formatting (c57e1c57) · Commits · IVAS Codec Public Collaboration / IVAS Processing Scripts

ivas_processing_scripts/generation/generate_masa_items.py

+11 −6

Original line number	Diff line number	Diff line
		@@ -246,14 +246,18 @@ def generate_MASA_scene(
		# of the reference signal (0-based index)
		if isinstance(scene["shift"][i], str) and "(" in scene["shift"][i]:
		# extract X and i_ref
		match = re.match(r"([+-]?\d*\.?\d+)[\(\[]([+-]?\d+)[\)\]]", scene["shift"][i])
		match = re.match(
		r"([+-]?\d*\.?\d+)[\(\[]([+-]?\d+)[\)\]]", scene["shift"][i]
		)

		if match:
		overlap = float(match.group(1))
		overlap_ref = int(match.group(2))
		else:
		scene_shift_str = scene["shift"][i]
		logger.error(f"Unable to parse {scene_shift_str}. The specification of overlap or reference is incorrect!")
		logger.error(
		f"Unable to parse {scene_shift_str}. The specification of overlap or reference is incorrect!"
		)
		sys.exit(-1)

		# calculate absolute shift of the source signal in seconds
		@@ -428,14 +432,15 @@ def generate_MASA_scene(
		# trim the output signal if the total duration exceeds X seconds
		if "duration" in cfg.__dict__:
		# convert from seconds to samples (ensure multiple of 20ms)
		duration = int(
		np.floor(int(cfg.duration * cfg.fs) / frame_len) * frame_len
		)
		duration = int(np.floor(int(cfg.duration * cfg.fs) / frame_len) * frame_len)

		# check if the current length of the output signal exceeds the duration
		if len(y_int.audio) > duration:
		y_int.audio = audioarray.trim(
		y_int.audio, y_int.fs, limits=[0, len(y_int.audio) - duration], samples=True
		y_int.audio,
		y_int.fs,
		limits=[0, len(y_int.audio) - duration],
		samples=True,
		)

		# adjust the loudness of the output signal

+9 −2

Original line number	Diff line number	Diff line
		@@ -474,7 +474,12 @@ def generate_OMASA_scene(

		# add ISM metadata .csv filename (should end with .wav.0.csv, .wav.1.csv, ...)
		y_int.metadata_files.insert(
		i - 1, str(output_filename.with_suffix(output_filename.suffix + f".{i - 1}.csv"))
		i - 1,
		str(
		output_filename.with_suffix(
		output_filename.suffix + f".{i - 1}.csv"
		)
		),
		)

		# append pre-amble and post-amble
		@@ -520,7 +525,9 @@ def generate_OMASA_scene(
		y_int.audio = audioarray.window(y_int.audio, y_int.fs, cfg.fade_in_out * 1000)

		# generate and insert MASA metadata filename (should end with .met)
		y.metadata_files.append(str(output_filename.with_suffix(output_filename.suffix + ".met")))
		y.metadata_files.append(
		str(output_filename.with_suffix(output_filename.suffix + ".met"))
		)

		# convert the intermediate OSBA object to OMASA object
		convert_osba(y_int, y)

+6 −1

Original line number	Diff line number	Diff line
		@@ -460,7 +460,12 @@ def generate_OSBA_scene(

		# add ISM metadata .csv filename (should end with .wav.0.csv, .wav.1.csv, ...)
		y.metadata_files.insert(
		i - 1, str(output_filename.with_suffix(output_filename.suffix + f".{i - 1}.csv"))
		i - 1,
		str(
		output_filename.with_suffix(
		output_filename.suffix + f".{i - 1}.csv"
		)
		),
		)

		# append pre-amble and post-amble

+3 −1

Original line number	Diff line number	Diff line
		@@ -217,7 +217,9 @@ def generate_sba_scene(
		# get input filename and IR filename
		if "IR" in scene.keys():
		IR_file = scene["IR"][i] if isinstance(scene["IR"], list) else scene["IR"]
		IR_filename = Path(IR_file).parent / (cfg.use_IR_prefix + Path(IR_file).name)
		IR_filename = Path(IR_file).parent / (
		cfg.use_IR_prefix + Path(IR_file).name
		)
		else:
		# read azimuth and elevation information
		source_azi = (