Commit be5bf7c5 authored by Archit Tamarapu's avatar Archit Tamarapu
Browse files

add support for rotation on input format (pre-rotation)

parent 17644dd4
Loading
Loading
Loading
Loading
+8 −8
Original line number Diff line number Diff line
@@ -96,13 +96,20 @@ def add_processing_args(group, input=True):
        default=None,
    )
    group.add_argument(
        f"-{ps}t",
        f"-{ps}x",
        f"--{p}_trim",
        type=float,
        nargs=2,
        metavar=("PRE_TRIM", "POST_TRIM"),
        help="Pre-/post-trim the signal by this amount in milliseconds (negative values pad silence), (default = %(default)s)",
    )
    group.add_argument(
        f"-{ps}t",
        f"--{p}_trajectory",
        type=str,
        help="Head-tracking trajectory file for input pre-rotation or binaural output (default = %(default)s)",
        default=None,
    )
    group.add_argument(
        f"-{ps}pn",
        f"--{p}_pad_noise",
@@ -166,13 +173,6 @@ def get_args():
        help="Apply limiting to output (default = %(default)s)",
        action="store_true",
    )
    output_parser.add_argument(
        "-t",
        "--trajectory",
        type=str,
        help="Head-tracking trajectory file for binaural output (default = %(default)s)",
        default=None,
    )
    output_parser.add_argument(
        "-bd",
        "--bin_dataset",
+4 −4
Original line number Diff line number Diff line
@@ -227,7 +227,7 @@ def process_audio(
    esdru_alpha: Optional[float] = None,
    logger: Optional[logging.Logger] = None,
) -> None:
    """Perform (pre-/pos-) processing of audio"""
    """Perform (pre-/post-) processing of audio"""

    if fs is None:
        fs = x.fs
@@ -325,9 +325,9 @@ def format_conversion(
    if logger:
        logger.debug(f"Format conversion: {input.name} -> {output.name}")

    if (fmt := input.name) == output.name or (
        input.name.startswith("BINAURAL") and output.name.startswith("BINAURAL")
    ):
    if (
        ((fmt := input.name) == output.name) and kwargs.get("in_trajectory") is None
    ) or (input.name.startswith("BINAURAL") and output.name.startswith("BINAURAL")):
        output.audio = input.audio
        if fmt.startswith("MASA"):
            output.metadata_file = input.metadata_file
+9 −3
Original line number Diff line number Diff line
@@ -57,9 +57,15 @@ from ivas_processing_scripts.audiotools.wrappers.filter import resample_itu
def convert_channelbased(
    cba: audio.ChannelBasedAudio,
    out: audio.Audio,
    in_trajectory: Optional[Union[str, Path]] = None,
    **kwargs,
) -> audio.Audio:
    """Convert channel-based audio to the requested output format"""

    # pre-rotation if specified
    if in_trajectory is not None:
        cba.audio = rotate_cba(cba, in_trajectory)

    # CBA -> Binaural
    if isinstance(out, audio.BinauralAudio):
        render_cba_to_binaural(cba, out, **kwargs)
@@ -83,7 +89,7 @@ def convert_channelbased(
def render_cba_to_binaural(
    cba: audio.ChannelBasedAudio,
    bin: audio.BinauralAudio,
    trajectory: Optional[Union[str, Path]] = None,
    out_trajectory: Optional[Union[str, Path]] = None,
    bin_dataset: Optional[str] = None,
    bin_lfe_gain: Optional[float] = None,
    **kwargs,
@@ -122,8 +128,8 @@ def render_cba_to_binaural(
    cba.fs = 48000
    bin.fs = 48000

    if trajectory is not None:
        cba.audio = rotate_cba(cba, trajectory)
    if out_trajectory is not None:
        cba.audio = rotate_cba(cba, out_trajectory)

    IR, _, latency_smp = load_ir(cba.name, bin.name, bin_dataset)

+5 −4
Original line number Diff line number Diff line
@@ -71,7 +71,8 @@ def convert_masa(
def render_masa_to_binaural(
    masa: audio.MetadataAssistedSpatialAudio,
    bin: audio.BinauralAudio,
    trajectory: Optional[Union[str, Path]] = None,
    in_trajectory: Optional[Union[str, Path]] = None,
    out_trajectory: Optional[Union[str, Path]] = None,
    bin_dataset: Optional[str] = None,
    **kwargs,
) -> None:
@@ -96,11 +97,11 @@ def render_masa_to_binaural(

        render_masa_to_cba(masa, cba_tmp)

        channelbased.render_cba_to_binaural(cba_tmp, bin, trajectory)
        channelbased.render_cba_to_binaural(cba_tmp, bin, out_trajectory)
    else:
        if trajectory is not None:
        if in_trajectory is not None or out_trajectory is not None:
            warn(
                f"Head-rotation not supported by MasaRenderer! Trajectory {trajectory} will be ignored!"
                f"Head-rotation not supported by MasaRenderer! Trajectory {in_trajectory or out_trajectory} will be ignored!"
            )
        if bin_dataset is not None:
            warn(
+17 −5
Original line number Diff line number Diff line
@@ -60,10 +60,24 @@ from ivas_processing_scripts.utils import apply_func_parallel
def convert_objectbased(
    oba: audio.ObjectBasedAudio,
    out: audio.Audio,
    in_trajectory: Optional[Union[str, Path]] = None,
    **kwargs,
) -> audio.Audio:
    """Convert an ISM signal to the requested output format"""

    # pre-rotation if specified - 20ms only!
    if in_trajectory is not None:
        # repeat each value four times since head rotation data is on sub-frame basis
        azi = np.repeat(oba.obj_pos[:, 0], 4)
        ele = np.repeat(oba.obj_pos[:, 1], 4)

        # apply head-rotation trajectory
        azi, ele = rotate_oba(azi, ele, in_trajectory)

        # update object metadata
        oba.obj_pos[:, 0] = azi[:, ::4]
        oba.obj_pos[:, 1] = ele[:, ::4]

    # OBA -> Binaural
    if isinstance(out, audio.BinauralAudio):
        render_oba_to_binaural(oba, out, **kwargs)
@@ -86,7 +100,7 @@ def convert_objectbased(
def render_oba_to_binaural(
    oba: audio.ObjectBasedAudio,
    bin: audio.BinauralAudio,
    trajectory: Optional[Union[str, Path]] = None,
    out_trajectory: Optional[Union[str, Path]] = None,
    bin_dataset: Optional[str] = None,
    **kwargs,
) -> None:
@@ -105,15 +119,13 @@ def render_oba_to_binaural(
        Name of binaural dataset, if None default dataset is used
    """

    # bin.audio = np.zeros([oba.audio.shape[0], bin.num_channels])

    if "ROOM" in bin.name:
        cba_tmp = audio.fromtype("7_1_4")
        cba_tmp.fs = oba.fs

        render_oba_to_cba(oba, cba_tmp)

        render_cba_to_binaural(cba_tmp, bin, trajectory)
        render_cba_to_binaural(cba_tmp, bin, out_trajectory)
    else:
        IR, SourcePosition, latency_smp = load_ir(oba.name, bin.name, bin_dataset)

@@ -130,7 +142,7 @@ def render_oba_to_binaural(
                obj_idx,
                obj_pos,
                repeat(oba),
                repeat(trajectory),
                repeat(out_trajectory),
                repeat(IR),
                repeat(SourcePosition),
            ),
Loading