Loading ivas_processing_scripts/audiotools/__init__.py +8 −8 Original line number Diff line number Diff line Loading @@ -96,13 +96,20 @@ def add_processing_args(group, input=True): default=None, ) group.add_argument( f"-{ps}t", f"-{ps}x", f"--{p}_trim", type=float, nargs=2, metavar=("PRE_TRIM", "POST_TRIM"), help="Pre-/post-trim the signal by this amount in milliseconds (negative values pad silence), (default = %(default)s)", ) group.add_argument( f"-{ps}t", f"--{p}_trajectory", type=str, help="Head-tracking trajectory file for input pre-rotation or binaural output (default = %(default)s)", default=None, ) group.add_argument( f"-{ps}pn", f"--{p}_pad_noise", Loading Loading @@ -166,13 +173,6 @@ def get_args(): help="Apply limiting to output (default = %(default)s)", action="store_true", ) output_parser.add_argument( "-t", "--trajectory", type=str, help="Head-tracking trajectory file for binaural output (default = %(default)s)", default=None, ) output_parser.add_argument( "-bd", "--bin_dataset", Loading ivas_processing_scripts/audiotools/convert/__init__.py +4 −4 Original line number Diff line number Diff line Loading @@ -227,7 +227,7 @@ def process_audio( esdru_alpha: Optional[float] = None, logger: Optional[logging.Logger] = None, ) -> None: """Perform (pre-/pos-) processing of audio""" """Perform (pre-/post-) processing of audio""" if fs is None: fs = x.fs Loading Loading @@ -325,9 +325,9 @@ def format_conversion( if logger: logger.debug(f"Format conversion: {input.name} -> {output.name}") if (fmt := input.name) == output.name or ( input.name.startswith("BINAURAL") and output.name.startswith("BINAURAL") ): if ( ((fmt := input.name) == output.name) and kwargs.get("in_trajectory") is None ) or (input.name.startswith("BINAURAL") and output.name.startswith("BINAURAL")): output.audio = input.audio if fmt.startswith("MASA"): output.metadata_file = input.metadata_file Loading ivas_processing_scripts/audiotools/convert/channelbased.py +9 −3 Original line number Diff line number Diff line Loading @@ -57,9 +57,15 @@ from ivas_processing_scripts.audiotools.wrappers.filter import resample_itu def convert_channelbased( cba: audio.ChannelBasedAudio, out: audio.Audio, in_trajectory: Optional[Union[str, Path]] = None, **kwargs, ) -> audio.Audio: """Convert channel-based audio to the requested output format""" # pre-rotation if specified if in_trajectory is not None: cba.audio = rotate_cba(cba, in_trajectory) # CBA -> Binaural if isinstance(out, audio.BinauralAudio): render_cba_to_binaural(cba, out, **kwargs) Loading @@ -83,7 +89,7 @@ def convert_channelbased( def render_cba_to_binaural( cba: audio.ChannelBasedAudio, bin: audio.BinauralAudio, trajectory: Optional[Union[str, Path]] = None, out_trajectory: Optional[Union[str, Path]] = None, bin_dataset: Optional[str] = None, bin_lfe_gain: Optional[float] = None, **kwargs, Loading Loading @@ -122,8 +128,8 @@ def render_cba_to_binaural( cba.fs = 48000 bin.fs = 48000 if trajectory is not None: cba.audio = rotate_cba(cba, trajectory) if out_trajectory is not None: cba.audio = rotate_cba(cba, out_trajectory) IR, _, latency_smp = load_ir(cba.name, bin.name, bin_dataset) Loading ivas_processing_scripts/audiotools/convert/masa.py +5 −4 Original line number Diff line number Diff line Loading @@ -71,7 +71,8 @@ def convert_masa( def render_masa_to_binaural( masa: audio.MetadataAssistedSpatialAudio, bin: audio.BinauralAudio, trajectory: Optional[Union[str, Path]] = None, in_trajectory: Optional[Union[str, Path]] = None, out_trajectory: Optional[Union[str, Path]] = None, bin_dataset: Optional[str] = None, **kwargs, ) -> None: Loading @@ -96,11 +97,11 @@ def render_masa_to_binaural( render_masa_to_cba(masa, cba_tmp) channelbased.render_cba_to_binaural(cba_tmp, bin, trajectory) channelbased.render_cba_to_binaural(cba_tmp, bin, out_trajectory) else: if trajectory is not None: if in_trajectory is not None or out_trajectory is not None: warn( f"Head-rotation not supported by MasaRenderer! Trajectory {trajectory} will be ignored!" f"Head-rotation not supported by MasaRenderer! Trajectory {in_trajectory or out_trajectory} will be ignored!" ) if bin_dataset is not None: warn( Loading ivas_processing_scripts/audiotools/convert/objectbased.py +17 −5 Original line number Diff line number Diff line Loading @@ -60,10 +60,24 @@ from ivas_processing_scripts.utils import apply_func_parallel def convert_objectbased( oba: audio.ObjectBasedAudio, out: audio.Audio, in_trajectory: Optional[Union[str, Path]] = None, **kwargs, ) -> audio.Audio: """Convert an ISM signal to the requested output format""" # pre-rotation if specified - 20ms only! if in_trajectory is not None: # repeat each value four times since head rotation data is on sub-frame basis azi = np.repeat(oba.obj_pos[:, 0], 4) ele = np.repeat(oba.obj_pos[:, 1], 4) # apply head-rotation trajectory azi, ele = rotate_oba(azi, ele, in_trajectory) # update object metadata oba.obj_pos[:, 0] = azi[:, ::4] oba.obj_pos[:, 1] = ele[:, ::4] # OBA -> Binaural if isinstance(out, audio.BinauralAudio): render_oba_to_binaural(oba, out, **kwargs) Loading @@ -86,7 +100,7 @@ def convert_objectbased( def render_oba_to_binaural( oba: audio.ObjectBasedAudio, bin: audio.BinauralAudio, trajectory: Optional[Union[str, Path]] = None, out_trajectory: Optional[Union[str, Path]] = None, bin_dataset: Optional[str] = None, **kwargs, ) -> None: Loading @@ -105,15 +119,13 @@ def render_oba_to_binaural( Name of binaural dataset, if None default dataset is used """ # bin.audio = np.zeros([oba.audio.shape[0], bin.num_channels]) if "ROOM" in bin.name: cba_tmp = audio.fromtype("7_1_4") cba_tmp.fs = oba.fs render_oba_to_cba(oba, cba_tmp) render_cba_to_binaural(cba_tmp, bin, trajectory) render_cba_to_binaural(cba_tmp, bin, out_trajectory) else: IR, SourcePosition, latency_smp = load_ir(oba.name, bin.name, bin_dataset) Loading @@ -130,7 +142,7 @@ def render_oba_to_binaural( obj_idx, obj_pos, repeat(oba), repeat(trajectory), repeat(out_trajectory), repeat(IR), repeat(SourcePosition), ), Loading Loading
ivas_processing_scripts/audiotools/__init__.py +8 −8 Original line number Diff line number Diff line Loading @@ -96,13 +96,20 @@ def add_processing_args(group, input=True): default=None, ) group.add_argument( f"-{ps}t", f"-{ps}x", f"--{p}_trim", type=float, nargs=2, metavar=("PRE_TRIM", "POST_TRIM"), help="Pre-/post-trim the signal by this amount in milliseconds (negative values pad silence), (default = %(default)s)", ) group.add_argument( f"-{ps}t", f"--{p}_trajectory", type=str, help="Head-tracking trajectory file for input pre-rotation or binaural output (default = %(default)s)", default=None, ) group.add_argument( f"-{ps}pn", f"--{p}_pad_noise", Loading Loading @@ -166,13 +173,6 @@ def get_args(): help="Apply limiting to output (default = %(default)s)", action="store_true", ) output_parser.add_argument( "-t", "--trajectory", type=str, help="Head-tracking trajectory file for binaural output (default = %(default)s)", default=None, ) output_parser.add_argument( "-bd", "--bin_dataset", Loading
ivas_processing_scripts/audiotools/convert/__init__.py +4 −4 Original line number Diff line number Diff line Loading @@ -227,7 +227,7 @@ def process_audio( esdru_alpha: Optional[float] = None, logger: Optional[logging.Logger] = None, ) -> None: """Perform (pre-/pos-) processing of audio""" """Perform (pre-/post-) processing of audio""" if fs is None: fs = x.fs Loading Loading @@ -325,9 +325,9 @@ def format_conversion( if logger: logger.debug(f"Format conversion: {input.name} -> {output.name}") if (fmt := input.name) == output.name or ( input.name.startswith("BINAURAL") and output.name.startswith("BINAURAL") ): if ( ((fmt := input.name) == output.name) and kwargs.get("in_trajectory") is None ) or (input.name.startswith("BINAURAL") and output.name.startswith("BINAURAL")): output.audio = input.audio if fmt.startswith("MASA"): output.metadata_file = input.metadata_file Loading
ivas_processing_scripts/audiotools/convert/channelbased.py +9 −3 Original line number Diff line number Diff line Loading @@ -57,9 +57,15 @@ from ivas_processing_scripts.audiotools.wrappers.filter import resample_itu def convert_channelbased( cba: audio.ChannelBasedAudio, out: audio.Audio, in_trajectory: Optional[Union[str, Path]] = None, **kwargs, ) -> audio.Audio: """Convert channel-based audio to the requested output format""" # pre-rotation if specified if in_trajectory is not None: cba.audio = rotate_cba(cba, in_trajectory) # CBA -> Binaural if isinstance(out, audio.BinauralAudio): render_cba_to_binaural(cba, out, **kwargs) Loading @@ -83,7 +89,7 @@ def convert_channelbased( def render_cba_to_binaural( cba: audio.ChannelBasedAudio, bin: audio.BinauralAudio, trajectory: Optional[Union[str, Path]] = None, out_trajectory: Optional[Union[str, Path]] = None, bin_dataset: Optional[str] = None, bin_lfe_gain: Optional[float] = None, **kwargs, Loading Loading @@ -122,8 +128,8 @@ def render_cba_to_binaural( cba.fs = 48000 bin.fs = 48000 if trajectory is not None: cba.audio = rotate_cba(cba, trajectory) if out_trajectory is not None: cba.audio = rotate_cba(cba, out_trajectory) IR, _, latency_smp = load_ir(cba.name, bin.name, bin_dataset) Loading
ivas_processing_scripts/audiotools/convert/masa.py +5 −4 Original line number Diff line number Diff line Loading @@ -71,7 +71,8 @@ def convert_masa( def render_masa_to_binaural( masa: audio.MetadataAssistedSpatialAudio, bin: audio.BinauralAudio, trajectory: Optional[Union[str, Path]] = None, in_trajectory: Optional[Union[str, Path]] = None, out_trajectory: Optional[Union[str, Path]] = None, bin_dataset: Optional[str] = None, **kwargs, ) -> None: Loading @@ -96,11 +97,11 @@ def render_masa_to_binaural( render_masa_to_cba(masa, cba_tmp) channelbased.render_cba_to_binaural(cba_tmp, bin, trajectory) channelbased.render_cba_to_binaural(cba_tmp, bin, out_trajectory) else: if trajectory is not None: if in_trajectory is not None or out_trajectory is not None: warn( f"Head-rotation not supported by MasaRenderer! Trajectory {trajectory} will be ignored!" f"Head-rotation not supported by MasaRenderer! Trajectory {in_trajectory or out_trajectory} will be ignored!" ) if bin_dataset is not None: warn( Loading
ivas_processing_scripts/audiotools/convert/objectbased.py +17 −5 Original line number Diff line number Diff line Loading @@ -60,10 +60,24 @@ from ivas_processing_scripts.utils import apply_func_parallel def convert_objectbased( oba: audio.ObjectBasedAudio, out: audio.Audio, in_trajectory: Optional[Union[str, Path]] = None, **kwargs, ) -> audio.Audio: """Convert an ISM signal to the requested output format""" # pre-rotation if specified - 20ms only! if in_trajectory is not None: # repeat each value four times since head rotation data is on sub-frame basis azi = np.repeat(oba.obj_pos[:, 0], 4) ele = np.repeat(oba.obj_pos[:, 1], 4) # apply head-rotation trajectory azi, ele = rotate_oba(azi, ele, in_trajectory) # update object metadata oba.obj_pos[:, 0] = azi[:, ::4] oba.obj_pos[:, 1] = ele[:, ::4] # OBA -> Binaural if isinstance(out, audio.BinauralAudio): render_oba_to_binaural(oba, out, **kwargs) Loading @@ -86,7 +100,7 @@ def convert_objectbased( def render_oba_to_binaural( oba: audio.ObjectBasedAudio, bin: audio.BinauralAudio, trajectory: Optional[Union[str, Path]] = None, out_trajectory: Optional[Union[str, Path]] = None, bin_dataset: Optional[str] = None, **kwargs, ) -> None: Loading @@ -105,15 +119,13 @@ def render_oba_to_binaural( Name of binaural dataset, if None default dataset is used """ # bin.audio = np.zeros([oba.audio.shape[0], bin.num_channels]) if "ROOM" in bin.name: cba_tmp = audio.fromtype("7_1_4") cba_tmp.fs = oba.fs render_oba_to_cba(oba, cba_tmp) render_cba_to_binaural(cba_tmp, bin, trajectory) render_cba_to_binaural(cba_tmp, bin, out_trajectory) else: IR, SourcePosition, latency_smp = load_ir(oba.name, bin.name, bin_dataset) Loading @@ -130,7 +142,7 @@ def render_oba_to_binaural( obj_idx, obj_pos, repeat(oba), repeat(trajectory), repeat(out_trajectory), repeat(IR), repeat(SourcePosition), ), Loading