Commit 55afb266 authored by Jan Kiene's avatar Jan Kiene
Browse files

add multiple formats in postprocessing

- allows pre-conversion to MASA for using masaRenderer
parent 9d2a29fc
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -302,6 +302,6 @@ conditions_to_generate:
### Post-processing
################################################
postprocessing:
    fmt: "BINAURAL"
    fmt: ["MASA2", "BINAURAL"]
    fs: 48000
    loudness: -26
+13 −3
Original line number Diff line number Diff line
@@ -34,6 +34,7 @@ import logging
from pathlib import Path, PurePath
from typing import Optional, Union
from numpy import empty
from shutil import copyfile

from ivas_processing_scripts.audiotools import audio, audioarray, metadata
from ivas_processing_scripts.audiotools.audiofile import write
@@ -100,7 +101,7 @@ def convert_file(
        output.audio = empty((1, num_tcs))

        # fabricate metadata file name
        output.metadata_files = [Path(out_file).with_suffix(".met")]
        output.metadata_file = Path(out_file).with_suffix(".met")
    if isinstance(output, audio.ObjectBasedAudio):
        try:
            output.object_pos = input.object_pos
@@ -135,6 +136,11 @@ def convert_file(
    write(out_file, output.audio, output.fs)
    if isinstance(output, audio.ObjectBasedAudio):
        write_ISM_metadata_in_file(output.object_pos, [out_file], automatic_naming=True)
    elif isinstance(output, audio.MetadataAssistedSpatialAudio) and in_fmt == out_fmt:
        # audio objects point to same MD file, create new one with default naming for output
        out_md_name = out_file.parent / (out_file.name + ".met")
        copyfile(output.metadata_file, out_md_name)
        output.metadata_file = out_md_name


def convert(
@@ -303,7 +309,7 @@ def format_conversion(
    """Convert one audio format to another"""

    # validation
    if isinstance(output, audio.MetadataAssistedSpatialAudio) and not isinstance(input, audio.SceneBasedAudio):
    if isinstance(output, audio.MetadataAssistedSpatialAudio) and not (isinstance(input, audio.SceneBasedAudio) or isinstance(input, audio.MetadataAssistedSpatialAudio )):
        raise NotImplementedError("Can only convert to MASA from SBA")

    if isinstance(output, audio.ObjectBasedAudio) and input.name != output.name:
@@ -314,10 +320,14 @@ def format_conversion(
    if logger:
        logger.debug(f"Format conversion: {input.name} -> {output.name}")

    if input.name == output.name or (
    if ( fmt := input.name ) == output.name or (
        input.name.startswith("BINAURAL") and output.name.startswith("BINAURAL")
    ):
        output.audio = input.audio
        if fmt.startswith("MASA"):
            output.metadata_file = input.metadata_file
        elif fmt.startswith("ISM"):
            output.metadata_files = list(output.metadata_files)
    else:
        if isinstance(input, audio.BinauralAudio):
            raise NotImplementedError(
+1 −1
Original line number Diff line number Diff line
@@ -192,7 +192,7 @@ def render_sba_to_masa(
    # two dir only possible from HOA2
    num_dirs = 1
    num_tcs = masa_out.audio.shape[1]
    md_out_path = masa_out.metadata_files[0]
    md_out_path = masa_out.metadata_file
    
    masa = masaAnalyzer(sba_in, num_tcs, num_dirs, md_out_path)
    masa_out.audio = masa.audio
+20 −1
Original line number Diff line number Diff line
@@ -409,13 +409,32 @@ def get_processing_chain(
        loudness_postprocessing = post_cfg.get("loudness")
        loudness_fmt_postprocessing = post_cfg.get("loudness_fmt")

    post_fmt = post_cfg.get("fmt")
    if isinstance(post_fmt, list):
        pre_fmts = post_fmt[:-1]
        post_fmt = post_fmt[-1]

        # add Postprocessing with only format conversion for each format except the last
        fmts = [tmp_in_fmt] + pre_fmts
        for fmt_in, fmt_out in zip(fmts[:-1], fmts[1:]):
            chain["processes"].append(
                Postprocessing(
                    {
                        "in_fs": tmp_in_fs,
                        "in_fmt": fmt_in,
                        "out_fs": tmp_in_fs,
                        "out_fmt": fmt_out
                    }
                )
            )

    chain["processes"].append(
        Postprocessing(
            {
                "in_fs": tmp_in_fs,
                "in_fmt": tmp_in_fmt,
                "out_fs": post_cfg.get("fs"),
                "out_fmt": post_cfg.get("fmt"),
                "out_fmt": post_fmt,
                "out_cutoff": tmp_lp_cutoff,
                "out_loudness": loudness_postprocessing,
                "out_loudness_fmt": loudness_fmt_postprocessing,
+15 −5
Original line number Diff line number Diff line
@@ -176,6 +176,8 @@ def concat_teardown(cfg: TestConfig, logger: logging.Logger):
        raise ValueError("Splitting not possible without split marker")

    output_format = cfg.postprocessing["fmt"]
    if isinstance(output_format, list):
        output_format = output_format[-1]

    out_files = []
    out_meta = []
@@ -327,6 +329,10 @@ def reverse_process_2(cfg, logger):
        logger.info("Remove preamble")
        remove_preamble(cfg)

    fmt = cfg.postprocessing["fmt"]
    if isinstance(fmt, list):
        fmt = fmt[-1]

    # reverse concatenation
    if cfg.pre2.concatenate_input:
        # write out the splits, optionally remove file
@@ -337,13 +343,13 @@ def reverse_process_2(cfg, logger):
        for out_dir in cfg.out_dirs:
            list_audio_dir = list_audio(out_dir)
            out_paths_splits.append(list_audio_dir)
        if cfg.postprocessing["fmt"].startswith("ISM"):
        if fmt.startswith("ISM"):
            out_meta_splits = []
            for i, condition in enumerate(out_paths_splits):
                meta_condition = metadata_search(
                    cfg.out_dirs[i],
                    condition,
                    num_objects=int(cfg.postprocessing["fmt"][-1]),
                    num_objects=int(fmt[-1]),
                )
                out_meta_splits.append(meta_condition)
        else:
@@ -353,7 +359,7 @@ def reverse_process_2(cfg, logger):
    if cfg.postprocessing.get("loudness", False):
        scale_files(
            out_paths_splits,
            cfg.postprocessing["fmt"],
            fmt,
            cfg.postprocessing["loudness"],
            cfg.postprocessing.get("loudness_fmt", None),
            cfg.postprocessing["fs"],
@@ -444,13 +450,17 @@ def process_item(

def remove_preamble(cfg):
    # get number of channels from output format
    num_channels = audio.fromtype(cfg.postprocessing["fmt"]).num_channels
    fmt = cfg.postprocessing["fmt"]
    if isinstance(cfg.postprocessing["fmt"], list):
        fmt = fmt[-1]

    num_channels = audio.fromtype(fmt).num_channels
    for odir in cfg.out_dirs:
        for item in cfg.items_list:
            path_input = odir / item.name

            # remove preamble for ISM metadata
            if cfg.postprocessing["fmt"].startswith("ISM"):
            if fmt.startswith("ISM"):
                # search for metadata
                meta_item = metadata_search(
                    odir, [Path(item.name)], num_objects=num_channels