Commit b8de52cb authored by Anika Treffehn's avatar Anika Treffehn
Browse files

enabled osba to omasa conversion

parent 07ee6f4d
Loading
Loading
Loading
Loading
+10 −6
Original line number Diff line number Diff line
@@ -218,18 +218,18 @@ class MetadataAssistedSpatialAudio(Audio):
            raise ValueError(
                f"Unsupported metadata assisted spatial audio format {name}"
            )
        self.metadata_files = []
        self.metadata_file = None

    @classmethod
    def _from_file(
        cls,
        name: str,
        filename: Path,
        metadata_files: list[str],
        metadata_file: str,
        fs: Optional[int] = None,
    ) -> "MetadataAssistedSpatialAudio":
        obj = super()._from_file(name, filename, fs)
        obj.metadata_file = Path(metadata_files[0])
        obj.metadata_file = Path(metadata_file)
        return obj

    @classmethod
@@ -237,11 +237,11 @@ class MetadataAssistedSpatialAudio(Audio):
        cls,
        name: str,
        filename: Path,
        metadata_files: list[str],
        metadata_file: str,
        fs: Optional[int] = None,
    ) -> "MetadataAssistedSpatialAudio":
        obj = super()._from_file(name, filename, fs)
        obj.metadata_file = Path(metadata_files[0])
        obj.metadata_file = Path(metadata_file)
        return obj


@@ -432,13 +432,17 @@ class OMASAAudio(Audio):

    def init_metadata(self):
        # check if number of metadata files matches format
        if self.num_ism_channels != len(self.metadata_files):
        if self.num_ism_channels != len(self.metadata_files)-1:
            raise ValueError(
                f"Mismatch between number of ism channels [{self.num_ism_channels}], and metadata [{len(self.metadata_files)}]"
            )

        self.object_pos = []
        for i, f in enumerate(self.metadata_files):
            if i >= self.num_ism_channels:
                # only read ISM metadata, not MASA metadata
                break

            pos = np.genfromtxt(f, delimiter=",")

            # check if metadata has right number of columns
+42 −13
Original line number Diff line number Diff line
@@ -34,6 +34,7 @@ import logging
from pathlib import Path, PurePath
from shutil import copyfile
from typing import Optional, Union
from copy import copy

from numpy import empty

@@ -79,9 +80,13 @@ def convert_file(
    else:
        # first check prevents crash on custom_ls setup formats
        if isinstance(in_fmt, str) and in_fmt.startswith("MASA") and in_meta is None:
            # TODO treffehn: also consider OMASA
            # if no MD file is provided, default to name (including .wav or .pcm!!!) + ".met"
            in_meta = [in_file.parent / (in_file.name + ".met")]
        elif isinstance(in_fmt, str) and "MASA" in in_fmt and "ISM" in in_fmt:
            if isinstance(in_meta, list) and len(in_meta) < (int(in_fmt[3])+int(in_fmt[8])):
                in_meta_masa = in_file.parent / (in_file.name + ".met")
                in_meta.append(in_meta_masa)

        input = audio.fromfile(in_fmt, in_file, in_fs, in_meta)

    # try to set reasonable defaults if missing
@@ -100,22 +105,40 @@ def convert_file(

    output = audio.fromtype(out_fmt)

    # handle metadata for outputs with metadata (MASA, ISM, OMASA, OSBA)
    if isinstance(output, audio.MetadataAssistedSpatialAudio):
        # create dummy audio array to allow inference of MASA mode
        num_tcs = int(output.name[-1])
        output.audio = empty((1, num_tcs))

        if isinstance(input, audio.MetadataAssistedSpatialAudio):
            # use existing metadata file
            output.metadata_file = input.metadata_file
        else:
            # fabricate metadata file name
        output.metadata_file = Path(out_file).parent / (Path(out_file).name + ".met")
    if isinstance(output, audio.ObjectBasedAudio):
            masa_meta_file_name = Path(out_file).parent / (Path(out_file).name + ".met")
            output.metadata_file = masa_meta_file_name

    elif isinstance(output, audio.ObjectBasedAudio):
        try:
            output.object_pos = input.object_pos
            output.metadata_files = input.metadata_files
            output.object_pos = copy(input.object_pos)
            output.metadata_files = copy(input.metadata_files)
        except Exception:
            raise ValueError(
                "ISM is not supported as an output for rendering! Only usable as pass-through"
            )
    elif isinstance(output, audio.OMASAAudio):
        if isinstance(input, audio.OMASAAudio):
            # use existing metadata files
            output.metadata_files = copy(input.metadata_files)
        else:
            # fabricate metadata file name
            masa_meta_file_name = Path(out_file).parent / (Path(out_file).name + ".met")
            output.metadata_files = copy(input.metadata_files)
            output.metadata_files.append(masa_meta_file_name)
        output.object_pos = copy(input.object_pos)

    elif isinstance(output, audio.OSBAAudio):
        output.object_pos = copy(input.object_pos)
        output.metadata_files = copy(input.metadata_files)

    # apply actual conversion
    if isinstance(input, metadata.Metadata):
        if logger:
            logger.debug(f"Converting metadata to {out_fmt} : {in_file} -> {out_file}")
@@ -137,16 +160,22 @@ def convert_file(
        output.fs = in_fs  # resampling not yet applied
        convert(input, output, in_fs=in_fs, out_fs=out_fs, logger=logger, **kwargs)

    # write output
    # write output audio
    write(out_file, output.audio, output.fs)
    if isinstance(output, audio.ObjectBasedAudio):
    # write metadata
    if isinstance(output, audio.ObjectBasedAudio) or isinstance(output, audio.OSBAAudio):
        write_ISM_metadata_in_file(output.object_pos, [out_file], automatic_naming=True)
    elif isinstance(output, audio.MetadataAssistedSpatialAudio) and in_fmt == out_fmt:
        # audio objects point to same MD file, create new one with default naming for output
        out_md_name = out_file.parent / (out_file.name + ".met")
        copyfile(output.metadata_file, out_md_name)
        output.metadata_file = out_md_name

    elif isinstance(output, audio.OMASAAudio):
        write_ISM_metadata_in_file(output.object_pos, [out_file], automatic_naming=True)
        if in_fmt == out_fmt:
            # audio objects point to same MD file, create new one with default naming for output
            out_md_name = out_file.parent / (out_file.name + ".met")
            copyfile(output.metadata_files[-1], out_md_name)

def convert(
    input: audio.Audio,
+14 −12
Original line number Diff line number Diff line
@@ -29,7 +29,8 @@
#  the United Nations Convention on Contracts on the International Sales of Goods.
#

import copy
from copy import copy, deepcopy
import numpy as np

from ivas_processing_scripts.audiotools import audio
from ivas_processing_scripts.audiotools.convert.objectbased import render_oba_to_binaural, render_oba_to_cba, \
@@ -49,19 +50,19 @@ def convert_osba(

    # split OSBA object in ISM and SBA object
    oba = audio.fromarray("ISM" + str(osba.num_ism_channels), osba.audio[:, :osba.num_ism_channels], osba.fs)
    oba.metadata_files = osba.metadata_files
    oba.object_pos = osba.object_pos
    oba.metadata_files = copy(osba.metadata_files)
    oba.object_pos = copy(osba.object_pos)
    sba = audio.fromarray("SBA" + str(osba.ambi_order), osba.audio[:, osba.num_ism_channels:], osba.fs)

    # OSBA -> Binaural
    if isinstance(out, audio.BinauralAudio):
        # render SBA and ISM part separately
        # ISM
        out_ism = copy.deepcopy(out)
        out_ism = deepcopy(out)
        render_oba_to_binaural(oba, out_ism, **kwargs)

        # SBA
        out_sba = copy.deepcopy(out)
        out_sba = deepcopy(out)
        render_sba_to_binaural(sba, out_sba, **kwargs)

        # combine results
@@ -71,11 +72,11 @@ def convert_osba(
    elif isinstance(out, audio.ChannelBasedAudio):
        # render SBA and ISM part separately
        # ISM
        out_ism = copy.deepcopy(out)
        out_ism = deepcopy(out)
        render_oba_to_cba(oba, out_ism)

        # SBA
        out_sba = copy.deepcopy(out)
        out_sba = deepcopy(out)
        render_sba_to_cba(sba, out_sba)

        # combine results
@@ -85,11 +86,11 @@ def convert_osba(
    elif isinstance(out, audio.SceneBasedAudio):
        # render SBA and ISM part separately
        # ISM
        out_ism = copy.deepcopy(out)
        out_ism = deepcopy(out)
        render_oba_to_sba(oba, out_ism)

        # SBA
        out_sba = copy.deepcopy(out)
        out_sba = deepcopy(out)
        render_sba_to_sba(sba, out_sba)

        # combine results
@@ -97,17 +98,18 @@ def convert_osba(

    # OSBA -> OMASA
    elif isinstance(out, audio.OMASAAudio):
        # TODO (treffehn)
        # check if ism object number is the same
        if out.num_ism_channels != osba.num_ism_channels:
            raise ValueError("OSBA to OMASA conversion only possible if number of ISM objects matches")

        # only render SBA part
        out_sba = audio.fromtype(out.name[4:])
        out_sba.metadata_file = out.metadata_files[-1]
        render_sba_to_masa(sba, out_sba)

        out.audio[:, :osba.num_ism_channels] = osba.audio[:, :osba.num_ism_channels]
        out.audio[:, osba.num_ism_channels:] = out_sba.audio
        # out.audio[:, :osba.num_ism_channels] = osba.audio[:, :osba.num_ism_channels]
        # out.audio[:, osba.num_ism_channels:] = out_sba.audio
        out.audio = np.concatenate((osba.audio[:, :osba.num_ism_channels], out_sba.audio), axis=1)

    else:
        raise NotImplementedError(
+1 −1
Original line number Diff line number Diff line
@@ -198,7 +198,7 @@ def render_sba_to_masa(
) -> None:

    num_tcs = masa_out.num_channels
    md_out_path = masa_out.metadata_files  # TODO: get metadata files
    md_out_path = masa_out.metadata_file

    masa = masaAnalyzer(sba_in, num_tcs, masa_out.dirs, md_out_path)
    masa_out.audio = masa.audio
+1 −1
Original line number Diff line number Diff line
@@ -106,6 +106,6 @@ def masaAnalyzer(
        run(cmd, cwd=binary.resolve().parent)

        fmt = f"MASA{num_tcs}DIR{num_dirs}"
        masa = audio.fromfile(fmt, tmp_out_pcm, 48000, [metadata_out_path])
        masa = audio.fromfile(fmt, tmp_out_pcm, 48000, metadata_out_path)

        return masa