Commit 1b61b072 authored by Anika Treffehn's avatar Anika Treffehn
Browse files

added omasa and osba to audio formats

parent 3d27e237
Loading
Loading
Loading
Loading
+208 −1
Original line number Diff line number Diff line
@@ -48,6 +48,8 @@ from ivas_processing_scripts.audiotools.constants import (
    NUMBER_COLUMNS_ISM_METADATA,
    OBJECT_BASED_AUDIO_FORMATS,
    SCENE_BASED_AUDIO_FORMATS,
    OMASA_AUDIO_FORMATS,
    OSBA_AUDIO_FORMATS,
)

from .EFAP import wrap_angles
@@ -376,6 +378,207 @@ class SceneBasedAudio(Audio):
        return super()._from_filelist(name, filename, fs)


class OMASAAudio(Audio):
    """Sub-class for combined OMASA format"""
    # TODO treffehn: write class
    def __init__(self, name: str):
        super().__init__(name)
        try:
            self.__dict__.update(OMASA_AUDIO_FORMATS[name.upper()])
        except KeyError:
            raise ValueError(f"Unsupported OMASA audio format {name}")
        self.object_pos = []
        self.metadata_files = []  # first ISM metadata followed by masa metadata

    # @classmethod
    # def _from_file(
    #     cls,
    #     name: str,
    #     filename: Union[str, Path],
    #     metadata_files_ism: list[Union[str, Path]],
    #     fs: Optional[int] = None,
    # ) -> "ObjectBasedAudio":
    #     obj = super()._from_file(name, filename, fs)
    #     if metadata_files is not None:
    #         obj.metadata_files = [Path(f) for f in metadata_files]
    #     else:
    #         # search for metadata with naming scheme: name.(wav, pcm).(0-3).csv
    #         for obj_idx in range(obj.num_channels):
    #             file_name_meta = filename.with_suffix(
    #                 f"{filename.suffix}.{obj_idx}.csv"
    #             )
    #             if file_name_meta.is_file():
    #                 obj.metadata_files.append(file_name_meta)
    #             else:
    #                 raise ValueError(f"Metadata file {file_name_meta} not found.")
    #         warn(
    #             f"No metadata files specified: The following files were found and used: \n {*obj.metadata_files,}"
    #         )
    #
    #     obj.init_metadata()
    #     return obj
    #
    # @classmethod
    # def _from_filelist(
    #     cls,
    #     name: str,
    #     filename: Path,
    #     metadata_files: list[Union[str, Path]],
    #     fs: Optional[int] = None,
    # ) -> "ObjectBasedAudio":
    #     obj = super()._from_filelist(name, filename, fs)
    #     obj.metadata_files = [Path(f) for f in metadata_files]
    #     obj.init_metadata()
    #     return obj
    #
    # def init_metadata(self):
    #     # check if number of metadata files matches format
    #     if self.audio.shape[1] != len(self.metadata_files):
    #         raise ValueError(
    #             f"Mismatch between number of channels in file [{self.audio.shape[1]}], and metadata [{len(self.metadata_files)}]"
    #         )
    #
    #     self.object_pos = []
    #     for i, f in enumerate(self.metadata_files):
    #         pos = np.genfromtxt(f, delimiter=",")
    #
    #         # check if metadata has right number of columns
    #         num_columns = pos.shape[1]
    #         if num_columns < 2:
    #             raise ValueError(
    #                 "Metadata incomplete. Columns are missing. Azimuth and elevation are mandatory."
    #             )
    #         elif num_columns > NUMBER_COLUMNS_ISM_METADATA:
    #             raise ValueError("Too many columns in metadata")
    #
    #         # pad metadata to max number of columns
    #         if num_columns < NUMBER_COLUMNS_ISM_METADATA:
    #             pos = np.hstack(
    #                 [pos, np.array(pos.shape[0] * [DEFAULT_ISM_METADATA[num_columns:]])]
    #             )
    #
    #         # check if metadata is longer than file -> cut off
    #         num_frames = int(
    #             np.ceil(self.audio.shape[0] / (self.fs * IVAS_FRAME_LEN_MS / 1000))
    #         )
    #         if num_frames < pos.shape[0]:
    #             pos = pos[:num_frames]
    #         # check if metadata is shorter than file -> loop
    #         elif num_frames > pos.shape[0]:
    #             pos_loop = np.zeros((num_frames, pos.shape[1]))
    #             pos_loop[: pos.shape[0]] = pos
    #             for idx in range(pos.shape[0], num_frames):
    #                 pos_loop[idx, :2] = pos[idx % pos.shape[0], :2]
    #             pos = pos_loop
    #
    #         # wrap metadata to target value range
    #         for j in range(num_frames):
    #             pos[j, 0], pos[j, 1] = wrap_angles(pos[j, 0], pos[j, 1], clip_ele=True)
    #
    #         self.object_pos.append(pos)


class OSBAAudio(Audio):
    """Sub-class for OSBA audio"""

    def __init__(self, name: str):
        super().__init__(name)
        try:
            self.__dict__.update(OSBA_AUDIO_FORMATS[name.upper()])
        except KeyError:
            raise ValueError(f"Unsupported OSBA audio format {name}")
        self.object_pos = []
        self.metadata_files = []
        self.ambi_order = int(np.sqrt(self.num_channels-self.num_ism_channels) - 1)

    @classmethod
    def _from_file(
        cls,
        name: str,
        filename: Union[str, Path],
        metadata_files: list[Union[str, Path]],
        fs: Optional[int] = None,
    ) -> "OSBAAudio":
        obj = super()._from_file(name, filename, fs)
        if metadata_files is not None:
            obj.metadata_files = [Path(f) for f in metadata_files]
        else:
            # search for metadata with naming scheme: name.(wav, pcm).(0-3).csv
            for obj_idx in range(obj.num_ism_channels):
                file_name_meta = filename.with_suffix(
                    f"{filename.suffix}.{obj_idx}.csv"
                )
                if file_name_meta.is_file():
                    obj.metadata_files.append(file_name_meta)
                else:
                    raise ValueError(f"Metadata file {file_name_meta} not found.")
            warn(
                f"No metadata files specified: The following files were found and used: \n {*obj.metadata_files,}"
            )

        obj.init_metadata()
        return obj

    @classmethod
    def _from_filelist(
        cls,
        name: str,
        filename: Path,
        metadata_files: list[Union[str, Path]],
        fs: Optional[int] = None,
    ) -> "OSBAAudio":
        obj = super()._from_filelist(name, filename, fs)
        obj.metadata_files = [Path(f) for f in metadata_files]
        obj.init_metadata()
        return obj

    def init_metadata(self):
        # check if number of metadata files matches format
        if self.num_ism_channels != len(self.metadata_files):
            raise ValueError(
                f"Mismatch between number of channels in file [{self.audio.shape[1]}], and metadata [{len(self.metadata_files)}]"
            )

        self.object_pos = []
        for i, f in enumerate(self.metadata_files):
            pos = np.genfromtxt(f, delimiter=",")

            # check if metadata has right number of columns
            num_columns = pos.shape[1]
            if num_columns < 2:
                raise ValueError(
                    "Metadata incomplete. Columns are missing. Azimuth and elevation are mandatory."
                )
            elif num_columns > NUMBER_COLUMNS_ISM_METADATA:
                raise ValueError("Too many columns in metadata")

            # pad metadata to max number of columns
            if num_columns < NUMBER_COLUMNS_ISM_METADATA:
                pos = np.hstack(
                    [pos, np.array(pos.shape[0] * [DEFAULT_ISM_METADATA[num_columns:]])]
                )

            # check if metadata is longer than file -> cut off
            num_frames = int(
                np.ceil(self.audio.shape[0] / (self.fs * IVAS_FRAME_LEN_MS / 1000))
            )
            if num_frames < pos.shape[0]:
                pos = pos[:num_frames]
            # check if metadata is shorter than file -> loop
            elif num_frames > pos.shape[0]:
                pos_loop = np.zeros((num_frames, pos.shape[1]))
                pos_loop[: pos.shape[0]] = pos
                for idx in range(pos.shape[0], num_frames):
                    pos_loop[idx, :2] = pos[idx % pos.shape[0], :2]
                pos = pos_loop

            # wrap metadata to target value range
            for j in range(num_frames):
                pos[j, 0], pos[j, 1] = wrap_angles(pos[j, 0], pos[j, 1], clip_ele=True)

            self.object_pos.append(pos)


def _get_audio_class(fmt) -> Audio:
    """Return a child audio class corresponding to the specifed format"""
    if fmt in BINAURAL_AUDIO_FORMATS.keys():
@@ -387,9 +590,13 @@ def _get_audio_class(fmt) -> Audio:
    elif fmt in SCENE_BASED_AUDIO_FORMATS.keys():
        return SceneBasedAudio
    elif (
        fmt in CHANNEL_BASED_AUDIO_FORMATS.keys() or CHANNEL_BASED_AUDIO_ALTNAMES.keys()
        fmt in CHANNEL_BASED_AUDIO_FORMATS.keys() or fmt in CHANNEL_BASED_AUDIO_ALTNAMES.keys()
    ):
        return ChannelBasedAudio
    elif fmt in OSBA_AUDIO_FORMATS.keys():
        return OSBAAudio
    elif fmt in OMASA_AUDIO_FORMATS.keys():
        return OMASAAudio
    elif Path(fmt).suffix == ".txt":
        return ChannelBasedAudio
    else:
+80 −0
Original line number Diff line number Diff line
@@ -303,6 +303,85 @@ SCENE_BASED_AUDIO_FORMATS = {
    },
}

OMASA_AUDIO_FORMATS = {
    "ISM1MASA1": {
        "num_channels": 2,
    },
    "ISM1MASA2": {
        "num_channels": 3,
    },
    "ISM2MASA1": {
        "num_channels": 3,
    },
    "ISM2MASA2": {
        "num_channels": 4,
    },
    "ISM3MASA1": {
        "num_channels": 4,
    },
    "ISM3MASA2": {
        "num_channels": 5,
    },
    "ISM4MASA1": {
        "num_channels": 5,
    },
    "ISM4MASA2": {
        "num_channels": 6,
    },
}


OSBA_AUDIO_FORMATS = {
    "ISM1SBA1": {
        "num_channels": 5,
        "num_ism_channels": 1,
    },
    "ISM1SBA2": {
        "num_channels": 10,
        "num_ism_channels": 1,
    },
    "ISM1SBA3": {
        "num_channels": 17,
        "num_ism_channels": 1,
    },
    "ISM2SBA1": {
        "num_channels": 6,
        "num_ism_channels": 2,
    },
    "ISM2SBA2": {
        "num_channels": 11,
        "num_ism_channels": 2,
    },
    "ISM2SBA3": {
        "num_channels": 18,
        "num_ism_channels": 2,
    },
    "ISM3SBA1": {
        "num_channels": 7,
        "num_ism_channels": 3,
    },
    "ISM3SBA2": {
        "num_channels": 12,
        "num_ism_channels": 3,
    },
    "ISM3SBA3": {
        "num_channels": 19,
        "num_ism_channels": 3,
    },
    "ISM4SBA1": {
        "num_channels": 8,
        "num_ism_channels": 4,
    },
    "ISM4SBA2": {
        "num_channels": 13,
        "num_ism_channels": 4,
    },
    "ISM4SBA3": {
        "num_channels": 20,
        "num_ism_channels": 4,
    },
}

SCENE_METADATA_FORMATS = {"META"}

AUDIO_FORMATS = [
@@ -311,6 +390,7 @@ AUDIO_FORMATS = [
    METADATA_ASSISTED_SPATIAL_AUDIO_FORMATS,
    OBJECT_BASED_AUDIO_FORMATS,
    SCENE_BASED_AUDIO_FORMATS,
    OMASA_AUDIO_FORMATS,
]


+19 −3
Original line number Diff line number Diff line
@@ -43,6 +43,8 @@ from ivas_processing_scripts.audiotools.convert.channelbased import convert_chan
from ivas_processing_scripts.audiotools.convert.masa import convert_masa
from ivas_processing_scripts.audiotools.convert.objectbased import convert_objectbased
from ivas_processing_scripts.audiotools.convert.scenebased import convert_scenebased
from ivas_processing_scripts.audiotools.convert.osba import convert_osba
from ivas_processing_scripts.audiotools.convert.omasa import convert_omasa
from ivas_processing_scripts.audiotools.wrappers.bs1770 import loudness_norm
from ivas_processing_scripts.audiotools.wrappers.esdru import esdru
from ivas_processing_scripts.audiotools.wrappers.filter import (
@@ -311,28 +313,38 @@ def format_conversion(
    """Convert one audio format to another"""

    # validation
    # check for MASA/OMASA as output
    if isinstance(output, audio.MetadataAssistedSpatialAudio) and not (
        isinstance(input, audio.SceneBasedAudio)
        or isinstance(input, audio.MetadataAssistedSpatialAudio)
    ):
        raise NotImplementedError("Can only convert to MASA from SBA")
    if isinstance(output, audio.OMASAAudio) and not (
        isinstance(input, audio.OSBAAudio)
        or isinstance(input, audio.OMASAAudio)
    ):
        raise NotImplementedError("Can only convert to OMASA from OSBA")

    if isinstance(output, audio.ObjectBasedAudio) and input.name != output.name:
    # check for ISM (also OMASA and OSBA) as output
    if (isinstance(output, audio.ObjectBasedAudio) or isinstance(output, audio.OMASAAudio) or isinstance(output, audio.OSBAAudio)) and input.name != output.name:
        raise NotImplementedError(
            "ISM is not supported as an output for rendering! Only usable as pass-through"
            "ISM (also in combined formats) is not supported as an output for rendering! Only usable as pass-through"
        )

    if logger:
        logger.debug(f"Format conversion: {input.name} -> {output.name}")

    # format conversion
    # check if input and output format are the same
    if (fmt := input.name) == output.name or (
        input.name.startswith("BINAURAL") and output.name.startswith("BINAURAL")
    ):
        output.audio = input.audio
        if fmt.startswith("MASA"):
            output.metadata_file = input.metadata_file
        elif fmt.startswith("ISM"):
        elif fmt.startswith("ISM"):  # also includes combined formats
            output.metadata_files = list(output.metadata_files)

    else:
        if isinstance(input, audio.BinauralAudio):
            raise NotImplementedError(
@@ -346,6 +358,10 @@ def format_conversion(
            convert_objectbased(input, output, **kwargs)
        elif isinstance(input, audio.SceneBasedAudio):
            convert_scenebased(input, output, **kwargs)
        elif isinstance(input, audio.OSBAAudio):
            convert_osba(input, output, **kwargs)
        elif isinstance(input, audio.OMASAAudio):
            convert_omasa(input, output, **kwargs)
        else:
            raise NotImplementedError(
                f"Unknown or unsupported audio format {input.name}"
+112 −0
Original line number Diff line number Diff line
#!/usr/bin/env python3
#
#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
#  contributors to this repository. All Rights Reserved.
#
#  This software is protected by copyright law and by international treaties.
#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
#  contributors to this repository retain full ownership rights in their respective contributions in
#  the software. This notice grants no license of any kind, including but not limited to patent
#  license, nor is any license granted by implication, estoppel or otherwise.
#
#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
#  contributions.
#
#  This software is provided "AS IS", without any express or implied warranties. The software is in the
#  development stage. It is intended exclusively for experts who have experience with such software and
#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
#  and fitness for a particular purpose are hereby disclaimed and excluded.
#
#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
#  the United Nations Convention on Contracts on the International Sales of Goods.
#

import copy

from ivas_processing_scripts.audiotools import audio
from ivas_processing_scripts.audiotools.convert.objectbased import render_oba_to_binaural, render_oba_to_cba, \
    render_oba_to_sba
from ivas_processing_scripts.audiotools.convert.masa import render_masa_to_binaural, render_masa_to_cba, render_masa_to_sba

""" OMASAAudio functions """


def convert_omasa(
    omasa: audio.OMASAAudio,
    out: audio.Audio,
    **kwargs,
) -> audio.Audio:
    """Convert an OMASA signal to the requested output format"""

    # split OMASA object in ISM and MASA object
    oba = audio.fromarray("ISM" + str(omasa.num_ism_channels), omasa.audio[:, :omasa.num_ism_channels], omasa.fs)
    oba.metadata_files = omasa.metadata_files
    oba.object_pos = omasa.object_pos
    masa = audio.fromarray("MASA" + str(omasa.num_channels-omasa.num_ism_channels), omasa.audio[:, omasa.num_ism_channels:], omasa.fs)

    # OMASA -> Binaural
    if isinstance(out, audio.BinauralAudio):
        # render MASA and ISM part separately
        # ISM
        out_ism = copy.deepcopy(out)
        render_oba_to_binaural(oba, out_ism, **kwargs)

        # MASA
        out_masa = copy.deepcopy(out)
        render_masa_to_binaural(masa, out_masa, **kwargs)

        # combine results
        out.audio = out_ism.audio + out_masa.audio

    # OMASA -> CBA
    elif isinstance(out, audio.ChannelBasedAudio):
        # render MASA and ISM part separately
        # ISM
        out_ism = copy.deepcopy(out)
        render_oba_to_cba(oba, out_ism)

        # MASA
        out_masa = copy.deepcopy(out)
        render_masa_to_cba(masa, out_masa)

        # combine results
        out.audio = out_ism.audio + out_masa.audio

    # OMASA -> SBA
    elif isinstance(out, audio.SceneBasedAudio):
        # render MASA and ISM part separately
        # ISM
        out_ism = copy.deepcopy(out)
        render_oba_to_sba(oba, out_ism)

        # MASA
        out_masa = copy.deepcopy(out)
        render_masa_to_sba(masa, out_masa)

        # combine results
        out.audio = out_ism.audio + out_masa.audio

    # OMASA -> OSBA
    elif isinstance(out, audio.OSBAAudio):
        # TODO (treffehn)
        # only render MASA part
        out_sba = audio.fromtype("MASA")
        render_masa_to_sba(masa, out_sba)

        out.audio[:, :omasa.num_ism_channels] = omasa.audio[:, :omasa.num_ism_channels]
        out.audio[:, omasa.num_ism_channels:] = out_sba.audio

    else:
        raise NotImplementedError(
            f"Conversion from {omasa.name} to {out.name} is unsupported!"
        )

    return out
+113 −0
Original line number Diff line number Diff line
#!/usr/bin/env python3
#
#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
#  contributors to this repository. All Rights Reserved.
#
#  This software is protected by copyright law and by international treaties.
#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
#  contributors to this repository retain full ownership rights in their respective contributions in
#  the software. This notice grants no license of any kind, including but not limited to patent
#  license, nor is any license granted by implication, estoppel or otherwise.
#
#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
#  contributions.
#
#  This software is provided "AS IS", without any express or implied warranties. The software is in the
#  development stage. It is intended exclusively for experts who have experience with such software and
#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
#  and fitness for a particular purpose are hereby disclaimed and excluded.
#
#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
#  the United Nations Convention on Contracts on the International Sales of Goods.
#

import copy

from ivas_processing_scripts.audiotools import audio
from ivas_processing_scripts.audiotools.convert.objectbased import render_oba_to_binaural, render_oba_to_cba, \
    render_oba_to_sba
from ivas_processing_scripts.audiotools.convert.scenebased import render_sba_to_binaural, render_sba_to_cba, \
    render_sba_to_sba, render_sba_to_masa

""" OSBAAudio functions """


def convert_osba(
    osba: audio.OSBAAudio,
    out: audio.Audio,
    **kwargs,
) -> audio.Audio:
    """Convert an OSBA signal to the requested output format"""

    # split OSBA object in ISM and SBA object
    oba = audio.fromarray("ISM" + str(osba.num_ism_channels), osba.audio[:, :osba.num_ism_channels], osba.fs)
    oba.metadata_files = osba.metadata_files
    oba.object_pos = osba.object_pos
    sba = audio.fromarray("SBA" + str(osba.ambi_order), osba.audio[:, osba.num_ism_channels:], osba.fs)

    # OSBA -> Binaural
    if isinstance(out, audio.BinauralAudio):
        # render SBA and ISM part separately
        # ISM
        out_ism = copy.deepcopy(out)
        render_oba_to_binaural(oba, out_ism, **kwargs)

        # SBA
        out_sba = copy.deepcopy(out)
        render_sba_to_binaural(sba, out_sba, **kwargs)

        # combine results
        out.audio = out_ism.audio + out_sba.audio

    # OSBA -> CBA
    elif isinstance(out, audio.ChannelBasedAudio):
        # render SBA and ISM part separately
        # ISM
        out_ism = copy.deepcopy(out)
        render_oba_to_cba(oba, out_ism)

        # SBA
        out_sba = copy.deepcopy(out)
        render_sba_to_cba(sba, out_sba)

        # combine results
        out.audio = out_ism.audio + out_sba.audio

    # OSBA -> SBA
    elif isinstance(out, audio.SceneBasedAudio):
        # render SBA and ISM part separately
        # ISM
        out_ism = copy.deepcopy(out)
        render_oba_to_sba(oba, out_ism)

        # SBA
        out_sba = copy.deepcopy(out)
        render_sba_to_sba(sba, out_sba)

        # combine results
        out.audio = out_ism.audio + out_sba.audio

    # OSBA -> OMASA
    elif isinstance(out, audio.OMASAAudio):
        # TODO (treffehn)
        # only render SBA part
        out_sba = audio.fromtype("MASA")
        render_sba_to_masa(sba, out_sba)

        out.audio[:, :osba.num_ism_channels] = osba.audio[:, :osba.num_ism_channels]
        out.audio[:, osba.num_ism_channels:] = out_sba.audio

    else:
        raise NotImplementedError(
            f"Conversion from {osba.name} to {out.name} is unsupported!"
        )

    return out