add support for rotation on input format (pre-rotation) (be5bf7c5) · Commits · IVAS Codec Public Collaboration / IVAS Processing Scripts

ivas_processing_scripts/audiotools/init.py

+8 −8

Original line number	Diff line number	Diff line
		@@ -96,13 +96,20 @@ def add_processing_args(group, input=True):
		default=None,
		)
		group.add_argument(
		f"-{ps}t",
		f"-{ps}x",
		f"--{p}_trim",
		type=float,
		nargs=2,
		metavar=("PRE_TRIM", "POST_TRIM"),
		help="Pre-/post-trim the signal by this amount in milliseconds (negative values pad silence), (default = %(default)s)",
		)
		group.add_argument(
		f"-{ps}t",
		f"--{p}_trajectory",
		type=str,
		help="Head-tracking trajectory file for input pre-rotation or binaural output (default = %(default)s)",
		default=None,
		)
		group.add_argument(
		f"-{ps}pn",
		f"--{p}_pad_noise",
		@@ -166,13 +173,6 @@ def get_args():
		help="Apply limiting to output (default = %(default)s)",
		action="store_true",
		)
		output_parser.add_argument(
		"-t",
		"--trajectory",
		type=str,
		help="Head-tracking trajectory file for binaural output (default = %(default)s)",
		default=None,
		)
		output_parser.add_argument(
		"-bd",
		"--bin_dataset",

ivas_processing_scripts/audiotools/convert/init.py

+4 −4

Original line number	Diff line number	Diff line
		@@ -227,7 +227,7 @@ def process_audio(
		esdru_alpha: Optional[float] = None,
		logger: Optional[logging.Logger] = None,
		) -> None:
		"""Perform (pre-/pos-) processing of audio"""
		"""Perform (pre-/post-) processing of audio"""

		if fs is None:
		fs = x.fs
		@@ -325,9 +325,9 @@ def format_conversion(
		if logger:
		logger.debug(f"Format conversion: {input.name} -> {output.name}")

		if (fmt := input.name) == output.name or (
		input.name.startswith("BINAURAL") and output.name.startswith("BINAURAL")
		):
		if (
		((fmt := input.name) == output.name) and kwargs.get("in_trajectory") is None
		) or (input.name.startswith("BINAURAL") and output.name.startswith("BINAURAL")):
		output.audio = input.audio
		if fmt.startswith("MASA"):
		output.metadata_file = input.metadata_file

ivas_processing_scripts/audiotools/convert/channelbased.py

+9 −3

Original line number	Diff line number	Diff line
		@@ -57,9 +57,15 @@ from ivas_processing_scripts.audiotools.wrappers.filter import resample_itu
		def convert_channelbased(
		cba: audio.ChannelBasedAudio,
		out: audio.Audio,
		in_trajectory: Optional[Union[str, Path]] = None,
		**kwargs,
		) -> audio.Audio:
		"""Convert channel-based audio to the requested output format"""

		# pre-rotation if specified
		if in_trajectory is not None:
		cba.audio = rotate_cba(cba, in_trajectory)

		# CBA -> Binaural
		if isinstance(out, audio.BinauralAudio):
		render_cba_to_binaural(cba, out, **kwargs)
		@@ -83,7 +89,7 @@ def convert_channelbased(
		def render_cba_to_binaural(
		cba: audio.ChannelBasedAudio,
		bin: audio.BinauralAudio,
		trajectory: Optional[Union[str, Path]] = None,
		out_trajectory: Optional[Union[str, Path]] = None,
		bin_dataset: Optional[str] = None,
		bin_lfe_gain: Optional[float] = None,
		**kwargs,
		@@ -122,8 +128,8 @@ def render_cba_to_binaural(
		cba.fs = 48000
		bin.fs = 48000

		if trajectory is not None:
		cba.audio = rotate_cba(cba, trajectory)
		if out_trajectory is not None:
		cba.audio = rotate_cba(cba, out_trajectory)

		IR, _, latency_smp = load_ir(cba.name, bin.name, bin_dataset)

ivas_processing_scripts/audiotools/convert/masa.py

+5 −4

Original line number	Diff line number	Diff line
		@@ -71,7 +71,8 @@ def convert_masa(
		def render_masa_to_binaural(
		masa: audio.MetadataAssistedSpatialAudio,
		bin: audio.BinauralAudio,
		trajectory: Optional[Union[str, Path]] = None,
		in_trajectory: Optional[Union[str, Path]] = None,
		out_trajectory: Optional[Union[str, Path]] = None,
		bin_dataset: Optional[str] = None,
		**kwargs,
		) -> None:
		@@ -96,11 +97,11 @@ def render_masa_to_binaural(

		render_masa_to_cba(masa, cba_tmp)

		channelbased.render_cba_to_binaural(cba_tmp, bin, trajectory)
		channelbased.render_cba_to_binaural(cba_tmp, bin, out_trajectory)
		else:
		if trajectory is not None:
		if in_trajectory is not None or out_trajectory is not None:
		warn(
		f"Head-rotation not supported by MasaRenderer! Trajectory {trajectory} will be ignored!"
		f"Head-rotation not supported by MasaRenderer! Trajectory {in_trajectory or out_trajectory} will be ignored!"
		)
		if bin_dataset is not None:
		warn(

ivas_processing_scripts/audiotools/convert/objectbased.py

+17 −5

Original line number	Diff line number	Diff line
		@@ -60,10 +60,24 @@ from ivas_processing_scripts.utils import apply_func_parallel
		def convert_objectbased(
		oba: audio.ObjectBasedAudio,
		out: audio.Audio,
		in_trajectory: Optional[Union[str, Path]] = None,
		**kwargs,
		) -> audio.Audio:
		"""Convert an ISM signal to the requested output format"""

		# pre-rotation if specified - 20ms only!
		if in_trajectory is not None:
		# repeat each value four times since head rotation data is on sub-frame basis
		azi = np.repeat(oba.obj_pos[:, 0], 4)
		ele = np.repeat(oba.obj_pos[:, 1], 4)

		# apply head-rotation trajectory
		azi, ele = rotate_oba(azi, ele, in_trajectory)

		# update object metadata
		oba.obj_pos[:, 0] = azi[:, ::4]
		oba.obj_pos[:, 1] = ele[:, ::4]

		# OBA -> Binaural
		if isinstance(out, audio.BinauralAudio):
		render_oba_to_binaural(oba, out, **kwargs)
		@@ -86,7 +100,7 @@ def convert_objectbased(
		def render_oba_to_binaural(
		oba: audio.ObjectBasedAudio,
		bin: audio.BinauralAudio,
		trajectory: Optional[Union[str, Path]] = None,
		out_trajectory: Optional[Union[str, Path]] = None,
		bin_dataset: Optional[str] = None,
		**kwargs,
		) -> None:
		@@ -105,15 +119,13 @@ def render_oba_to_binaural(
		Name of binaural dataset, if None default dataset is used
		"""

		# bin.audio = np.zeros([oba.audio.shape[0], bin.num_channels])

		if "ROOM" in bin.name:
		cba_tmp = audio.fromtype("7_1_4")
		cba_tmp.fs = oba.fs

		render_oba_to_cba(oba, cba_tmp)

		render_cba_to_binaural(cba_tmp, bin, trajectory)
		render_cba_to_binaural(cba_tmp, bin, out_trajectory)
		else:
		IR, SourcePosition, latency_smp = load_ir(oba.name, bin.name, bin_dataset)

		@@ -130,7 +142,7 @@ def render_oba_to_binaural(
		obj_idx,
		obj_pos,
		repeat(oba),
		repeat(trajectory),
		repeat(out_trajectory),
		repeat(IR),
		repeat(SourcePosition),
		),