From 1b61b07241ad29f88e611506f13c9677c54c70fc Mon Sep 17 00:00:00 2001 From: Treffehn Date: Thu, 7 Sep 2023 17:01:26 +0200 Subject: [PATCH 01/22] added omasa and osba to audio formats --- ivas_processing_scripts/audiotools/audio.py | 209 +++++++++++++++++- .../audiotools/constants.py | 80 +++++++ .../audiotools/convert/__init__.py | 22 +- .../audiotools/convert/omasa.py | 112 ++++++++++ .../audiotools/convert/osba.py | 113 ++++++++++ 5 files changed, 532 insertions(+), 4 deletions(-) create mode 100644 ivas_processing_scripts/audiotools/convert/omasa.py create mode 100644 ivas_processing_scripts/audiotools/convert/osba.py diff --git a/ivas_processing_scripts/audiotools/audio.py b/ivas_processing_scripts/audiotools/audio.py index 5e62b3fb..8db29cd5 100755 --- a/ivas_processing_scripts/audiotools/audio.py +++ b/ivas_processing_scripts/audiotools/audio.py @@ -48,6 +48,8 @@ from ivas_processing_scripts.audiotools.constants import ( NUMBER_COLUMNS_ISM_METADATA, OBJECT_BASED_AUDIO_FORMATS, SCENE_BASED_AUDIO_FORMATS, + OMASA_AUDIO_FORMATS, + OSBA_AUDIO_FORMATS, ) from .EFAP import wrap_angles @@ -376,6 +378,207 @@ class SceneBasedAudio(Audio): return super()._from_filelist(name, filename, fs) +class OMASAAudio(Audio): + """Sub-class for combined OMASA format""" + # TODO treffehn: write class + def __init__(self, name: str): + super().__init__(name) + try: + self.__dict__.update(OMASA_AUDIO_FORMATS[name.upper()]) + except KeyError: + raise ValueError(f"Unsupported OMASA audio format {name}") + self.object_pos = [] + self.metadata_files = [] # first ISM metadata followed by masa metadata + + # @classmethod + # def _from_file( + # cls, + # name: str, + # filename: Union[str, Path], + # metadata_files_ism: list[Union[str, Path]], + # fs: Optional[int] = None, + # ) -> "ObjectBasedAudio": + # obj = super()._from_file(name, filename, fs) + # if metadata_files is not None: + # obj.metadata_files = [Path(f) for f in metadata_files] + # else: + # # search for metadata with naming scheme: name.(wav, pcm).(0-3).csv + # for obj_idx in range(obj.num_channels): + # file_name_meta = filename.with_suffix( + # f"{filename.suffix}.{obj_idx}.csv" + # ) + # if file_name_meta.is_file(): + # obj.metadata_files.append(file_name_meta) + # else: + # raise ValueError(f"Metadata file {file_name_meta} not found.") + # warn( + # f"No metadata files specified: The following files were found and used: \n {*obj.metadata_files,}" + # ) + # + # obj.init_metadata() + # return obj + # + # @classmethod + # def _from_filelist( + # cls, + # name: str, + # filename: Path, + # metadata_files: list[Union[str, Path]], + # fs: Optional[int] = None, + # ) -> "ObjectBasedAudio": + # obj = super()._from_filelist(name, filename, fs) + # obj.metadata_files = [Path(f) for f in metadata_files] + # obj.init_metadata() + # return obj + # + # def init_metadata(self): + # # check if number of metadata files matches format + # if self.audio.shape[1] != len(self.metadata_files): + # raise ValueError( + # f"Mismatch between number of channels in file [{self.audio.shape[1]}], and metadata [{len(self.metadata_files)}]" + # ) + # + # self.object_pos = [] + # for i, f in enumerate(self.metadata_files): + # pos = np.genfromtxt(f, delimiter=",") + # + # # check if metadata has right number of columns + # num_columns = pos.shape[1] + # if num_columns < 2: + # raise ValueError( + # "Metadata incomplete. Columns are missing. Azimuth and elevation are mandatory." + # ) + # elif num_columns > NUMBER_COLUMNS_ISM_METADATA: + # raise ValueError("Too many columns in metadata") + # + # # pad metadata to max number of columns + # if num_columns < NUMBER_COLUMNS_ISM_METADATA: + # pos = np.hstack( + # [pos, np.array(pos.shape[0] * [DEFAULT_ISM_METADATA[num_columns:]])] + # ) + # + # # check if metadata is longer than file -> cut off + # num_frames = int( + # np.ceil(self.audio.shape[0] / (self.fs * IVAS_FRAME_LEN_MS / 1000)) + # ) + # if num_frames < pos.shape[0]: + # pos = pos[:num_frames] + # # check if metadata is shorter than file -> loop + # elif num_frames > pos.shape[0]: + # pos_loop = np.zeros((num_frames, pos.shape[1])) + # pos_loop[: pos.shape[0]] = pos + # for idx in range(pos.shape[0], num_frames): + # pos_loop[idx, :2] = pos[idx % pos.shape[0], :2] + # pos = pos_loop + # + # # wrap metadata to target value range + # for j in range(num_frames): + # pos[j, 0], pos[j, 1] = wrap_angles(pos[j, 0], pos[j, 1], clip_ele=True) + # + # self.object_pos.append(pos) + + +class OSBAAudio(Audio): + """Sub-class for OSBA audio""" + + def __init__(self, name: str): + super().__init__(name) + try: + self.__dict__.update(OSBA_AUDIO_FORMATS[name.upper()]) + except KeyError: + raise ValueError(f"Unsupported OSBA audio format {name}") + self.object_pos = [] + self.metadata_files = [] + self.ambi_order = int(np.sqrt(self.num_channels-self.num_ism_channels) - 1) + + @classmethod + def _from_file( + cls, + name: str, + filename: Union[str, Path], + metadata_files: list[Union[str, Path]], + fs: Optional[int] = None, + ) -> "OSBAAudio": + obj = super()._from_file(name, filename, fs) + if metadata_files is not None: + obj.metadata_files = [Path(f) for f in metadata_files] + else: + # search for metadata with naming scheme: name.(wav, pcm).(0-3).csv + for obj_idx in range(obj.num_ism_channels): + file_name_meta = filename.with_suffix( + f"{filename.suffix}.{obj_idx}.csv" + ) + if file_name_meta.is_file(): + obj.metadata_files.append(file_name_meta) + else: + raise ValueError(f"Metadata file {file_name_meta} not found.") + warn( + f"No metadata files specified: The following files were found and used: \n {*obj.metadata_files,}" + ) + + obj.init_metadata() + return obj + + @classmethod + def _from_filelist( + cls, + name: str, + filename: Path, + metadata_files: list[Union[str, Path]], + fs: Optional[int] = None, + ) -> "OSBAAudio": + obj = super()._from_filelist(name, filename, fs) + obj.metadata_files = [Path(f) for f in metadata_files] + obj.init_metadata() + return obj + + def init_metadata(self): + # check if number of metadata files matches format + if self.num_ism_channels != len(self.metadata_files): + raise ValueError( + f"Mismatch between number of channels in file [{self.audio.shape[1]}], and metadata [{len(self.metadata_files)}]" + ) + + self.object_pos = [] + for i, f in enumerate(self.metadata_files): + pos = np.genfromtxt(f, delimiter=",") + + # check if metadata has right number of columns + num_columns = pos.shape[1] + if num_columns < 2: + raise ValueError( + "Metadata incomplete. Columns are missing. Azimuth and elevation are mandatory." + ) + elif num_columns > NUMBER_COLUMNS_ISM_METADATA: + raise ValueError("Too many columns in metadata") + + # pad metadata to max number of columns + if num_columns < NUMBER_COLUMNS_ISM_METADATA: + pos = np.hstack( + [pos, np.array(pos.shape[0] * [DEFAULT_ISM_METADATA[num_columns:]])] + ) + + # check if metadata is longer than file -> cut off + num_frames = int( + np.ceil(self.audio.shape[0] / (self.fs * IVAS_FRAME_LEN_MS / 1000)) + ) + if num_frames < pos.shape[0]: + pos = pos[:num_frames] + # check if metadata is shorter than file -> loop + elif num_frames > pos.shape[0]: + pos_loop = np.zeros((num_frames, pos.shape[1])) + pos_loop[: pos.shape[0]] = pos + for idx in range(pos.shape[0], num_frames): + pos_loop[idx, :2] = pos[idx % pos.shape[0], :2] + pos = pos_loop + + # wrap metadata to target value range + for j in range(num_frames): + pos[j, 0], pos[j, 1] = wrap_angles(pos[j, 0], pos[j, 1], clip_ele=True) + + self.object_pos.append(pos) + + def _get_audio_class(fmt) -> Audio: """Return a child audio class corresponding to the specifed format""" if fmt in BINAURAL_AUDIO_FORMATS.keys(): @@ -387,9 +590,13 @@ def _get_audio_class(fmt) -> Audio: elif fmt in SCENE_BASED_AUDIO_FORMATS.keys(): return SceneBasedAudio elif ( - fmt in CHANNEL_BASED_AUDIO_FORMATS.keys() or CHANNEL_BASED_AUDIO_ALTNAMES.keys() + fmt in CHANNEL_BASED_AUDIO_FORMATS.keys() or fmt in CHANNEL_BASED_AUDIO_ALTNAMES.keys() ): return ChannelBasedAudio + elif fmt in OSBA_AUDIO_FORMATS.keys(): + return OSBAAudio + elif fmt in OMASA_AUDIO_FORMATS.keys(): + return OMASAAudio elif Path(fmt).suffix == ".txt": return ChannelBasedAudio else: diff --git a/ivas_processing_scripts/audiotools/constants.py b/ivas_processing_scripts/audiotools/constants.py index a77dcce7..fa2b06a6 100755 --- a/ivas_processing_scripts/audiotools/constants.py +++ b/ivas_processing_scripts/audiotools/constants.py @@ -303,6 +303,85 @@ SCENE_BASED_AUDIO_FORMATS = { }, } +OMASA_AUDIO_FORMATS = { + "ISM1MASA1": { + "num_channels": 2, + }, + "ISM1MASA2": { + "num_channels": 3, + }, + "ISM2MASA1": { + "num_channels": 3, + }, + "ISM2MASA2": { + "num_channels": 4, + }, + "ISM3MASA1": { + "num_channels": 4, + }, + "ISM3MASA2": { + "num_channels": 5, + }, + "ISM4MASA1": { + "num_channels": 5, + }, + "ISM4MASA2": { + "num_channels": 6, + }, +} + + +OSBA_AUDIO_FORMATS = { + "ISM1SBA1": { + "num_channels": 5, + "num_ism_channels": 1, + }, + "ISM1SBA2": { + "num_channels": 10, + "num_ism_channels": 1, + }, + "ISM1SBA3": { + "num_channels": 17, + "num_ism_channels": 1, + }, + "ISM2SBA1": { + "num_channels": 6, + "num_ism_channels": 2, + }, + "ISM2SBA2": { + "num_channels": 11, + "num_ism_channels": 2, + }, + "ISM2SBA3": { + "num_channels": 18, + "num_ism_channels": 2, + }, + "ISM3SBA1": { + "num_channels": 7, + "num_ism_channels": 3, + }, + "ISM3SBA2": { + "num_channels": 12, + "num_ism_channels": 3, + }, + "ISM3SBA3": { + "num_channels": 19, + "num_ism_channels": 3, + }, + "ISM4SBA1": { + "num_channels": 8, + "num_ism_channels": 4, + }, + "ISM4SBA2": { + "num_channels": 13, + "num_ism_channels": 4, + }, + "ISM4SBA3": { + "num_channels": 20, + "num_ism_channels": 4, + }, +} + SCENE_METADATA_FORMATS = {"META"} AUDIO_FORMATS = [ @@ -311,6 +390,7 @@ AUDIO_FORMATS = [ METADATA_ASSISTED_SPATIAL_AUDIO_FORMATS, OBJECT_BASED_AUDIO_FORMATS, SCENE_BASED_AUDIO_FORMATS, + OMASA_AUDIO_FORMATS, ] diff --git a/ivas_processing_scripts/audiotools/convert/__init__.py b/ivas_processing_scripts/audiotools/convert/__init__.py index e26c8c6f..607a186d 100755 --- a/ivas_processing_scripts/audiotools/convert/__init__.py +++ b/ivas_processing_scripts/audiotools/convert/__init__.py @@ -43,6 +43,8 @@ from ivas_processing_scripts.audiotools.convert.channelbased import convert_chan from ivas_processing_scripts.audiotools.convert.masa import convert_masa from ivas_processing_scripts.audiotools.convert.objectbased import convert_objectbased from ivas_processing_scripts.audiotools.convert.scenebased import convert_scenebased +from ivas_processing_scripts.audiotools.convert.osba import convert_osba +from ivas_processing_scripts.audiotools.convert.omasa import convert_omasa from ivas_processing_scripts.audiotools.wrappers.bs1770 import loudness_norm from ivas_processing_scripts.audiotools.wrappers.esdru import esdru from ivas_processing_scripts.audiotools.wrappers.filter import ( @@ -311,28 +313,38 @@ def format_conversion( """Convert one audio format to another""" # validation + # check for MASA/OMASA as output if isinstance(output, audio.MetadataAssistedSpatialAudio) and not ( isinstance(input, audio.SceneBasedAudio) or isinstance(input, audio.MetadataAssistedSpatialAudio) ): raise NotImplementedError("Can only convert to MASA from SBA") + if isinstance(output, audio.OMASAAudio) and not ( + isinstance(input, audio.OSBAAudio) + or isinstance(input, audio.OMASAAudio) + ): + raise NotImplementedError("Can only convert to OMASA from OSBA") - if isinstance(output, audio.ObjectBasedAudio) and input.name != output.name: + # check for ISM (also OMASA and OSBA) as output + if (isinstance(output, audio.ObjectBasedAudio) or isinstance(output, audio.OMASAAudio) or isinstance(output, audio.OSBAAudio)) and input.name != output.name: raise NotImplementedError( - "ISM is not supported as an output for rendering! Only usable as pass-through" + "ISM (also in combined formats) is not supported as an output for rendering! Only usable as pass-through" ) if logger: logger.debug(f"Format conversion: {input.name} -> {output.name}") + # format conversion + # check if input and output format are the same if (fmt := input.name) == output.name or ( input.name.startswith("BINAURAL") and output.name.startswith("BINAURAL") ): output.audio = input.audio if fmt.startswith("MASA"): output.metadata_file = input.metadata_file - elif fmt.startswith("ISM"): + elif fmt.startswith("ISM"): # also includes combined formats output.metadata_files = list(output.metadata_files) + else: if isinstance(input, audio.BinauralAudio): raise NotImplementedError( @@ -346,6 +358,10 @@ def format_conversion( convert_objectbased(input, output, **kwargs) elif isinstance(input, audio.SceneBasedAudio): convert_scenebased(input, output, **kwargs) + elif isinstance(input, audio.OSBAAudio): + convert_osba(input, output, **kwargs) + elif isinstance(input, audio.OMASAAudio): + convert_omasa(input, output, **kwargs) else: raise NotImplementedError( f"Unknown or unsupported audio format {input.name}" diff --git a/ivas_processing_scripts/audiotools/convert/omasa.py b/ivas_processing_scripts/audiotools/convert/omasa.py new file mode 100644 index 00000000..3edd0654 --- /dev/null +++ b/ivas_processing_scripts/audiotools/convert/omasa.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 +# +# (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository. All Rights Reserved. +# +# This software is protected by copyright law and by international treaties. +# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository retain full ownership rights in their respective contributions in +# the software. This notice grants no license of any kind, including but not limited to patent +# license, nor is any license granted by implication, estoppel or otherwise. +# +# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making +# contributions. +# +# This software is provided "AS IS", without any express or implied warranties. The software is in the +# development stage. It is intended exclusively for experts who have experience with such software and +# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability +# and fitness for a particular purpose are hereby disclaimed and excluded. +# +# Any dispute, controversy or claim arising under or in relation to providing this software shall be +# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in +# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and +# the United Nations Convention on Contracts on the International Sales of Goods. +# + +import copy + +from ivas_processing_scripts.audiotools import audio +from ivas_processing_scripts.audiotools.convert.objectbased import render_oba_to_binaural, render_oba_to_cba, \ + render_oba_to_sba +from ivas_processing_scripts.audiotools.convert.masa import render_masa_to_binaural, render_masa_to_cba, render_masa_to_sba + +""" OMASAAudio functions """ + + +def convert_omasa( + omasa: audio.OMASAAudio, + out: audio.Audio, + **kwargs, +) -> audio.Audio: + """Convert an OMASA signal to the requested output format""" + + # split OMASA object in ISM and MASA object + oba = audio.fromarray("ISM" + str(omasa.num_ism_channels), omasa.audio[:, :omasa.num_ism_channels], omasa.fs) + oba.metadata_files = omasa.metadata_files + oba.object_pos = omasa.object_pos + masa = audio.fromarray("MASA" + str(omasa.num_channels-omasa.num_ism_channels), omasa.audio[:, omasa.num_ism_channels:], omasa.fs) + + # OMASA -> Binaural + if isinstance(out, audio.BinauralAudio): + # render MASA and ISM part separately + # ISM + out_ism = copy.deepcopy(out) + render_oba_to_binaural(oba, out_ism, **kwargs) + + # MASA + out_masa = copy.deepcopy(out) + render_masa_to_binaural(masa, out_masa, **kwargs) + + # combine results + out.audio = out_ism.audio + out_masa.audio + + # OMASA -> CBA + elif isinstance(out, audio.ChannelBasedAudio): + # render MASA and ISM part separately + # ISM + out_ism = copy.deepcopy(out) + render_oba_to_cba(oba, out_ism) + + # MASA + out_masa = copy.deepcopy(out) + render_masa_to_cba(masa, out_masa) + + # combine results + out.audio = out_ism.audio + out_masa.audio + + # OMASA -> SBA + elif isinstance(out, audio.SceneBasedAudio): + # render MASA and ISM part separately + # ISM + out_ism = copy.deepcopy(out) + render_oba_to_sba(oba, out_ism) + + # MASA + out_masa = copy.deepcopy(out) + render_masa_to_sba(masa, out_masa) + + # combine results + out.audio = out_ism.audio + out_masa.audio + + # OMASA -> OSBA + elif isinstance(out, audio.OSBAAudio): + # TODO (treffehn) + # only render MASA part + out_sba = audio.fromtype("MASA") + render_masa_to_sba(masa, out_sba) + + out.audio[:, :omasa.num_ism_channels] = omasa.audio[:, :omasa.num_ism_channels] + out.audio[:, omasa.num_ism_channels:] = out_sba.audio + + else: + raise NotImplementedError( + f"Conversion from {omasa.name} to {out.name} is unsupported!" + ) + + return out diff --git a/ivas_processing_scripts/audiotools/convert/osba.py b/ivas_processing_scripts/audiotools/convert/osba.py new file mode 100644 index 00000000..2a24f520 --- /dev/null +++ b/ivas_processing_scripts/audiotools/convert/osba.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python3 +# +# (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository. All Rights Reserved. +# +# This software is protected by copyright law and by international treaties. +# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository retain full ownership rights in their respective contributions in +# the software. This notice grants no license of any kind, including but not limited to patent +# license, nor is any license granted by implication, estoppel or otherwise. +# +# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making +# contributions. +# +# This software is provided "AS IS", without any express or implied warranties. The software is in the +# development stage. It is intended exclusively for experts who have experience with such software and +# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability +# and fitness for a particular purpose are hereby disclaimed and excluded. +# +# Any dispute, controversy or claim arising under or in relation to providing this software shall be +# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in +# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and +# the United Nations Convention on Contracts on the International Sales of Goods. +# + +import copy + +from ivas_processing_scripts.audiotools import audio +from ivas_processing_scripts.audiotools.convert.objectbased import render_oba_to_binaural, render_oba_to_cba, \ + render_oba_to_sba +from ivas_processing_scripts.audiotools.convert.scenebased import render_sba_to_binaural, render_sba_to_cba, \ + render_sba_to_sba, render_sba_to_masa + +""" OSBAAudio functions """ + + +def convert_osba( + osba: audio.OSBAAudio, + out: audio.Audio, + **kwargs, +) -> audio.Audio: + """Convert an OSBA signal to the requested output format""" + + # split OSBA object in ISM and SBA object + oba = audio.fromarray("ISM" + str(osba.num_ism_channels), osba.audio[:, :osba.num_ism_channels], osba.fs) + oba.metadata_files = osba.metadata_files + oba.object_pos = osba.object_pos + sba = audio.fromarray("SBA" + str(osba.ambi_order), osba.audio[:, osba.num_ism_channels:], osba.fs) + + # OSBA -> Binaural + if isinstance(out, audio.BinauralAudio): + # render SBA and ISM part separately + # ISM + out_ism = copy.deepcopy(out) + render_oba_to_binaural(oba, out_ism, **kwargs) + + # SBA + out_sba = copy.deepcopy(out) + render_sba_to_binaural(sba, out_sba, **kwargs) + + # combine results + out.audio = out_ism.audio + out_sba.audio + + # OSBA -> CBA + elif isinstance(out, audio.ChannelBasedAudio): + # render SBA and ISM part separately + # ISM + out_ism = copy.deepcopy(out) + render_oba_to_cba(oba, out_ism) + + # SBA + out_sba = copy.deepcopy(out) + render_sba_to_cba(sba, out_sba) + + # combine results + out.audio = out_ism.audio + out_sba.audio + + # OSBA -> SBA + elif isinstance(out, audio.SceneBasedAudio): + # render SBA and ISM part separately + # ISM + out_ism = copy.deepcopy(out) + render_oba_to_sba(oba, out_ism) + + # SBA + out_sba = copy.deepcopy(out) + render_sba_to_sba(sba, out_sba) + + # combine results + out.audio = out_ism.audio + out_sba.audio + + # OSBA -> OMASA + elif isinstance(out, audio.OMASAAudio): + # TODO (treffehn) + # only render SBA part + out_sba = audio.fromtype("MASA") + render_sba_to_masa(sba, out_sba) + + out.audio[:, :osba.num_ism_channels] = osba.audio[:, :osba.num_ism_channels] + out.audio[:, osba.num_ism_channels:] = out_sba.audio + + else: + raise NotImplementedError( + f"Conversion from {osba.name} to {out.name} is unsupported!" + ) + + return out -- GitLab From 1835ea328b67b761a7776a3a9f25e8a66bbbfca8 Mon Sep 17 00:00:00 2001 From: Treffehn Date: Fri, 8 Sep 2023 11:09:59 +0200 Subject: [PATCH 02/22] added from file functions to OMASA class --- ivas_processing_scripts/audiotools/audio.py | 175 ++++++++++---------- 1 file changed, 87 insertions(+), 88 deletions(-) diff --git a/ivas_processing_scripts/audiotools/audio.py b/ivas_processing_scripts/audiotools/audio.py index 8db29cd5..1bf996be 100755 --- a/ivas_processing_scripts/audiotools/audio.py +++ b/ivas_processing_scripts/audiotools/audio.py @@ -380,7 +380,6 @@ class SceneBasedAudio(Audio): class OMASAAudio(Audio): """Sub-class for combined OMASA format""" - # TODO treffehn: write class def __init__(self, name: str): super().__init__(name) try: @@ -390,92 +389,92 @@ class OMASAAudio(Audio): self.object_pos = [] self.metadata_files = [] # first ISM metadata followed by masa metadata - # @classmethod - # def _from_file( - # cls, - # name: str, - # filename: Union[str, Path], - # metadata_files_ism: list[Union[str, Path]], - # fs: Optional[int] = None, - # ) -> "ObjectBasedAudio": - # obj = super()._from_file(name, filename, fs) - # if metadata_files is not None: - # obj.metadata_files = [Path(f) for f in metadata_files] - # else: - # # search for metadata with naming scheme: name.(wav, pcm).(0-3).csv - # for obj_idx in range(obj.num_channels): - # file_name_meta = filename.with_suffix( - # f"{filename.suffix}.{obj_idx}.csv" - # ) - # if file_name_meta.is_file(): - # obj.metadata_files.append(file_name_meta) - # else: - # raise ValueError(f"Metadata file {file_name_meta} not found.") - # warn( - # f"No metadata files specified: The following files were found and used: \n {*obj.metadata_files,}" - # ) - # - # obj.init_metadata() - # return obj - # - # @classmethod - # def _from_filelist( - # cls, - # name: str, - # filename: Path, - # metadata_files: list[Union[str, Path]], - # fs: Optional[int] = None, - # ) -> "ObjectBasedAudio": - # obj = super()._from_filelist(name, filename, fs) - # obj.metadata_files = [Path(f) for f in metadata_files] - # obj.init_metadata() - # return obj - # - # def init_metadata(self): - # # check if number of metadata files matches format - # if self.audio.shape[1] != len(self.metadata_files): - # raise ValueError( - # f"Mismatch between number of channels in file [{self.audio.shape[1]}], and metadata [{len(self.metadata_files)}]" - # ) - # - # self.object_pos = [] - # for i, f in enumerate(self.metadata_files): - # pos = np.genfromtxt(f, delimiter=",") - # - # # check if metadata has right number of columns - # num_columns = pos.shape[1] - # if num_columns < 2: - # raise ValueError( - # "Metadata incomplete. Columns are missing. Azimuth and elevation are mandatory." - # ) - # elif num_columns > NUMBER_COLUMNS_ISM_METADATA: - # raise ValueError("Too many columns in metadata") - # - # # pad metadata to max number of columns - # if num_columns < NUMBER_COLUMNS_ISM_METADATA: - # pos = np.hstack( - # [pos, np.array(pos.shape[0] * [DEFAULT_ISM_METADATA[num_columns:]])] - # ) - # - # # check if metadata is longer than file -> cut off - # num_frames = int( - # np.ceil(self.audio.shape[0] / (self.fs * IVAS_FRAME_LEN_MS / 1000)) - # ) - # if num_frames < pos.shape[0]: - # pos = pos[:num_frames] - # # check if metadata is shorter than file -> loop - # elif num_frames > pos.shape[0]: - # pos_loop = np.zeros((num_frames, pos.shape[1])) - # pos_loop[: pos.shape[0]] = pos - # for idx in range(pos.shape[0], num_frames): - # pos_loop[idx, :2] = pos[idx % pos.shape[0], :2] - # pos = pos_loop - # - # # wrap metadata to target value range - # for j in range(num_frames): - # pos[j, 0], pos[j, 1] = wrap_angles(pos[j, 0], pos[j, 1], clip_ele=True) - # - # self.object_pos.append(pos) + @classmethod + def _from_file( + cls, + name: str, + filename: Union[str, Path], + metadata_files: list[Union[str, Path]], + fs: Optional[int] = None, + ) -> "OMASAAudio": + obj = super()._from_file(name, filename, fs) + if metadata_files is not None: + obj.metadata_files = [Path(f) for f in metadata_files] + else: + # search for metadata with naming scheme: name.(wav, pcm).(0-3).csv + for obj_idx in range(obj.num_ism_channels): + file_name_meta = filename.with_suffix( + f"{filename.suffix}.{obj_idx}.csv" + ) + if file_name_meta.is_file(): + obj.metadata_files.append(file_name_meta) + else: + raise ValueError(f"Metadata file {file_name_meta} not found.") + warn( + f"No metadata files specified: The following files were found and used: \n {*obj.metadata_files,}" + ) + + obj.init_metadata() + return obj + + @classmethod + def _from_filelist( + cls, + name: str, + filename: Path, + metadata_files: list[Union[str, Path]], + fs: Optional[int] = None, + ) -> "OMASAAudio": + obj = super()._from_filelist(name, filename, fs) + obj.metadata_files = [Path(f) for f in metadata_files] + obj.init_metadata() + return obj + + def init_metadata(self): + # check if number of metadata files matches format + if self.num_ism_channels != len(self.metadata_files): + raise ValueError( + f"Mismatch between number of channels in file [{self.audio.shape[1]}], and metadata [{len(self.metadata_files)}]" + ) + + self.object_pos = [] + for i, f in enumerate(self.metadata_files): + pos = np.genfromtxt(f, delimiter=",") + + # check if metadata has right number of columns + num_columns = pos.shape[1] + if num_columns < 2: + raise ValueError( + "Metadata incomplete. Columns are missing. Azimuth and elevation are mandatory." + ) + elif num_columns > NUMBER_COLUMNS_ISM_METADATA: + raise ValueError("Too many columns in metadata") + + # pad metadata to max number of columns + if num_columns < NUMBER_COLUMNS_ISM_METADATA: + pos = np.hstack( + [pos, np.array(pos.shape[0] * [DEFAULT_ISM_METADATA[num_columns:]])] + ) + + # check if metadata is longer than file -> cut off + num_frames = int( + np.ceil(self.audio.shape[0] / (self.fs * IVAS_FRAME_LEN_MS / 1000)) + ) + if num_frames < pos.shape[0]: + pos = pos[:num_frames] + # check if metadata is shorter than file -> loop + elif num_frames > pos.shape[0]: + pos_loop = np.zeros((num_frames, pos.shape[1])) + pos_loop[: pos.shape[0]] = pos + for idx in range(pos.shape[0], num_frames): + pos_loop[idx, :2] = pos[idx % pos.shape[0], :2] + pos = pos_loop + + # wrap metadata to target value range + for j in range(num_frames): + pos[j, 0], pos[j, 1] = wrap_angles(pos[j, 0], pos[j, 1], clip_ele=True) + + self.object_pos.append(pos) class OSBAAudio(Audio): @@ -629,7 +628,7 @@ def fromfile( """Create an Audio object of the specified format from the given file""" filename = Path(filename) fmt_cls = _get_audio_class(fmt) - if fmt_cls is ObjectBasedAudio or fmt_cls is MetadataAssistedSpatialAudio: + if fmt_cls is ObjectBasedAudio or fmt_cls is MetadataAssistedSpatialAudio or fmt_cls is OMASAAudio or fmt_cls is OSBAAudio: return fmt_cls._from_file(fmt, filename, in_meta, fs) else: return fmt_cls._from_file(fmt, filename, fs) -- GitLab From da2af526571a90122fa0bd201e45a6cfd522fd4b Mon Sep 17 00:00:00 2001 From: Treffehn Date: Fri, 8 Sep 2023 14:58:42 +0200 Subject: [PATCH 03/22] some small additions to the omasa osba support --- ivas_processing_scripts/audiotools/audio.py | 4 ++-- ivas_processing_scripts/audiotools/constants.py | 8 ++++++++ ivas_processing_scripts/audiotools/convert/__init__.py | 8 ++++++-- ivas_processing_scripts/audiotools/convert/omasa.py | 4 ++++ ivas_processing_scripts/audiotools/convert/osba.py | 6 +++++- ivas_processing_scripts/audiotools/convert/scenebased.py | 8 ++++---- ivas_processing_scripts/processing/ivas.py | 4 ++++ 7 files changed, 33 insertions(+), 9 deletions(-) diff --git a/ivas_processing_scripts/audiotools/audio.py b/ivas_processing_scripts/audiotools/audio.py index 1bf996be..647a8bd7 100755 --- a/ivas_processing_scripts/audiotools/audio.py +++ b/ivas_processing_scripts/audiotools/audio.py @@ -434,7 +434,7 @@ class OMASAAudio(Audio): # check if number of metadata files matches format if self.num_ism_channels != len(self.metadata_files): raise ValueError( - f"Mismatch between number of channels in file [{self.audio.shape[1]}], and metadata [{len(self.metadata_files)}]" + f"Mismatch between number of ism channels [{self.num_ism_channels}], and metadata [{len(self.metadata_files)}]" ) self.object_pos = [] @@ -535,7 +535,7 @@ class OSBAAudio(Audio): # check if number of metadata files matches format if self.num_ism_channels != len(self.metadata_files): raise ValueError( - f"Mismatch between number of channels in file [{self.audio.shape[1]}], and metadata [{len(self.metadata_files)}]" + f"Mismatch between number of ism channels [{self.num_ism_channels}], and metadata [{len(self.metadata_files)}]" ) self.object_pos = [] diff --git a/ivas_processing_scripts/audiotools/constants.py b/ivas_processing_scripts/audiotools/constants.py index fa2b06a6..be50ad37 100755 --- a/ivas_processing_scripts/audiotools/constants.py +++ b/ivas_processing_scripts/audiotools/constants.py @@ -306,27 +306,35 @@ SCENE_BASED_AUDIO_FORMATS = { OMASA_AUDIO_FORMATS = { "ISM1MASA1": { "num_channels": 2, + "num_ism_channels": 1, }, "ISM1MASA2": { "num_channels": 3, + "num_ism_channels": 1, }, "ISM2MASA1": { "num_channels": 3, + "num_ism_channels": 2, }, "ISM2MASA2": { "num_channels": 4, + "num_ism_channels": 2, }, "ISM3MASA1": { "num_channels": 4, + "num_ism_channels": 3, }, "ISM3MASA2": { "num_channels": 5, + "num_ism_channels": 3, }, "ISM4MASA1": { "num_channels": 5, + "num_ism_channels": 4, }, "ISM4MASA2": { "num_channels": 6, + "num_ism_channels": 4, }, } diff --git a/ivas_processing_scripts/audiotools/convert/__init__.py b/ivas_processing_scripts/audiotools/convert/__init__.py index 607a186d..7660b32f 100755 --- a/ivas_processing_scripts/audiotools/convert/__init__.py +++ b/ivas_processing_scripts/audiotools/convert/__init__.py @@ -326,9 +326,13 @@ def format_conversion( raise NotImplementedError("Can only convert to OMASA from OSBA") # check for ISM (also OMASA and OSBA) as output - if (isinstance(output, audio.ObjectBasedAudio) or isinstance(output, audio.OMASAAudio) or isinstance(output, audio.OSBAAudio)) and input.name != output.name: + if isinstance(output, audio.ObjectBasedAudio) and input.name != output.name: raise NotImplementedError( - "ISM (also in combined formats) is not supported as an output for rendering! Only usable as pass-through" + "ISM is not supported as an output for rendering! Only usable as pass-through" + ) + if (isinstance(output, audio.OMASAAudio) and not isinstance(input, audio.OSBAAudio)) or (isinstance(output, audio.OSBAAudio) and not isinstance(input, audio.OMASAAudio)): + raise NotImplementedError( + "OMASA and OSBA only possible as output if input is OMASA or OSBA" ) if logger: diff --git a/ivas_processing_scripts/audiotools/convert/omasa.py b/ivas_processing_scripts/audiotools/convert/omasa.py index 3edd0654..3c662e40 100644 --- a/ivas_processing_scripts/audiotools/convert/omasa.py +++ b/ivas_processing_scripts/audiotools/convert/omasa.py @@ -97,6 +97,10 @@ def convert_omasa( # OMASA -> OSBA elif isinstance(out, audio.OSBAAudio): # TODO (treffehn) + # check if ism object number is the same + if out.num_ism_channels != masa.num_ism_channels: + raise ValueError("OMASA to OSBA conversion only possible if number of ISM objects matches") + # only render MASA part out_sba = audio.fromtype("MASA") render_masa_to_sba(masa, out_sba) diff --git a/ivas_processing_scripts/audiotools/convert/osba.py b/ivas_processing_scripts/audiotools/convert/osba.py index 2a24f520..3264a459 100644 --- a/ivas_processing_scripts/audiotools/convert/osba.py +++ b/ivas_processing_scripts/audiotools/convert/osba.py @@ -98,8 +98,12 @@ def convert_osba( # OSBA -> OMASA elif isinstance(out, audio.OMASAAudio): # TODO (treffehn) + # check if ism object number is the same + if out.num_ism_channels != osba.num_ism_channels: + raise ValueError("OSBA to OMASA conversion only possible if number of ISM objects matches") + # only render SBA part - out_sba = audio.fromtype("MASA") + out_sba = audio.fromtype("MASA"+out.name[-1]) render_sba_to_masa(sba, out_sba) out.audio[:, :osba.num_ism_channels] = osba.audio[:, :osba.num_ism_channels] diff --git a/ivas_processing_scripts/audiotools/convert/scenebased.py b/ivas_processing_scripts/audiotools/convert/scenebased.py index 9a5f2729..b785c11f 100755 --- a/ivas_processing_scripts/audiotools/convert/scenebased.py +++ b/ivas_processing_scripts/audiotools/convert/scenebased.py @@ -78,9 +78,9 @@ def convert_scenebased( render_sba_to_sba(sba, out) # SBA -> MASA - # NOTE: only allowed for 1st order ambisonics ("FOA" + "PLANARFOA") + # NOTE: only allowed for 1st and 2nd order ambisonics elif isinstance(out, audio.MetadataAssistedSpatialAudio) and ( - sba.name.endswith("FOA") or sba.name == "HOA2" + sba.name.endswith("FOA") or sba.name.endswith("HOA2") ): render_sba_to_masa(sba, out) @@ -200,8 +200,8 @@ def render_sba_to_masa( if sba_in.name == "HOA2": num_dirs = 2 - num_tcs = masa_out.audio.shape[1] - md_out_path = masa_out.metadata_file + num_tcs = masa_out.name[-1] + md_out_path = masa_out.metadata_files masa = masaAnalyzer(sba_in, num_tcs, num_dirs, md_out_path) masa_out.audio = masa.audio diff --git a/ivas_processing_scripts/processing/ivas.py b/ivas_processing_scripts/processing/ivas.py index 217f47ad..857fea16 100755 --- a/ivas_processing_scripts/processing/ivas.py +++ b/ivas_processing_scripts/processing/ivas.py @@ -355,6 +355,10 @@ class IVAS(Processing): "CUSTOM_LS", ]: return ["-mc", fmt.name] + elif isinstance(fmt, audio.OSBAAudio): + return ["-ism_sba", str(fmt.num_ism_channels), str(fmt.ambi_order)] + metadata_files + elif isinstance(fmt, audio.OMASAAudio): + return ["-ism_masa", str(fmt.num_ism_channels), str(fmt.num_channels-fmt.num_ism_channels)] + metadata_files raise ValueError(f"IVAS: Invalid input config: {fmt.name}.") -- GitLab From 1722f8b0421063bef81ece26c084685801f6ff6e Mon Sep 17 00:00:00 2001 From: Treffehn Date: Fri, 8 Sep 2023 16:07:39 +0200 Subject: [PATCH 04/22] added osba and omasa ivas support --- .../audiotools/constants.py | 77 ++++++++++++++++++- ivas_processing_scripts/processing/ivas.py | 19 +++-- 2 files changed, 85 insertions(+), 11 deletions(-) diff --git a/ivas_processing_scripts/audiotools/constants.py b/ivas_processing_scripts/audiotools/constants.py index be50ad37..95ac253c 100755 --- a/ivas_processing_scripts/audiotools/constants.py +++ b/ivas_processing_scripts/audiotools/constants.py @@ -248,6 +248,7 @@ METADATA_ASSISTED_SPATIAL_AUDIO_FORMATS = { "num_channels": 2, }, } + OBJECT_BASED_AUDIO_FORMATS = { "ISM1": { "num_channels": 1, @@ -263,7 +264,6 @@ OBJECT_BASED_AUDIO_FORMATS = { }, } - SCENE_BASED_AUDIO_FORMATS = { "FOA": { "num_channels": 4, @@ -338,55 +338,127 @@ OMASA_AUDIO_FORMATS = { }, } - OSBA_AUDIO_FORMATS = { "ISM1SBA1": { "num_channels": 5, "num_ism_channels": 1, + "is_planar": False, }, "ISM1SBA2": { "num_channels": 10, "num_ism_channels": 1, + "is_planar": False, }, "ISM1SBA3": { "num_channels": 17, "num_ism_channels": 1, + "is_planar": False, }, "ISM2SBA1": { "num_channels": 6, "num_ism_channels": 2, + "is_planar": False, }, "ISM2SBA2": { "num_channels": 11, "num_ism_channels": 2, + "is_planar": False, }, "ISM2SBA3": { "num_channels": 18, "num_ism_channels": 2, + "is_planar": False, }, "ISM3SBA1": { "num_channels": 7, "num_ism_channels": 3, + "is_planar": False, }, "ISM3SBA2": { "num_channels": 12, "num_ism_channels": 3, + "is_planar": False, }, "ISM3SBA3": { "num_channels": 19, "num_ism_channels": 3, + "is_planar": False, }, "ISM4SBA1": { "num_channels": 8, "num_ism_channels": 4, + "is_planar": False, }, "ISM4SBA2": { "num_channels": 13, "num_ism_channels": 4, + "is_planar": False, }, "ISM4SBA3": { "num_channels": 20, "num_ism_channels": 4, + "is_planar": False, + }, + # planar sba + "ISM1PLANARSBA1": { + "num_channels": 5, + "num_ism_channels": 1, + "is_planar": True, + }, + "ISM1PLANARSBA2": { + "num_channels": 10, + "num_ism_channels": 1, + "is_planar": True, + }, + "ISM1PLANARSBA3": { + "num_channels": 17, + "num_ism_channels": 1, + "is_planar": True, + }, + "ISM2PLANARSBA1": { + "num_channels": 6, + "num_ism_channels": 2, + "is_planar": True, + }, + "ISM2PLANARSBA2": { + "num_channels": 11, + "num_ism_channels": 2, + "is_planar": True, + }, + "ISM2PLANARSBA3": { + "num_channels": 18, + "num_ism_channels": 2, + "is_planar": True, + }, + "ISM3PLANARSBA1": { + "num_channels": 7, + "num_ism_channels": 3, + "is_planar": True, + }, + "ISM3PLANARSBA2": { + "num_channels": 12, + "num_ism_channels": 3, + "is_planar": True, + }, + "ISM3PLANARSBA3": { + "num_channels": 19, + "num_ism_channels": 3, + "is_planar": True, + }, + "ISM4PLANARSBA1": { + "num_channels": 8, + "num_ism_channels": 4, + "is_planar": True, + }, + "ISM4PLANARSBA2": { + "num_channels": 13, + "num_ism_channels": 4, + "is_planar": True, + }, + "ISM4PLANARSBA3": { + "num_channels": 20, + "num_ism_channels": 4, + "is_planar": True, }, } @@ -401,7 +473,6 @@ AUDIO_FORMATS = [ OMASA_AUDIO_FORMATS, ] - IVAS_FRAME_LEN_MS = 20 IVAS_CICPX_TO_MONO = np.array( diff --git a/ivas_processing_scripts/processing/ivas.py b/ivas_processing_scripts/processing/ivas.py index 857fea16..0cd76dc0 100755 --- a/ivas_processing_scripts/processing/ivas.py +++ b/ivas_processing_scripts/processing/ivas.py @@ -153,13 +153,13 @@ class IVAS(Processing): if isinstance(self.in_fmt, audio.MetadataAssistedSpatialAudio): md_file = in_file.parent / (in_file.name + ".met") metadata_files.append(md_file) - - if isinstance(self.in_fmt, audio.ObjectBasedAudio): - if in_meta is None: - # TODO treffehn: search in folder of in_file - pass - else: - metadata_files = in_meta + elif isinstance(self.in_fmt, audio.ObjectBasedAudio) or isinstance(self.in_fmt, audio.OSBAAudio): + metadata_files = in_meta + elif isinstance(self.in_fmt, audio.OMASAAudio): + # TODO treffehn: check and maybe change here and for masa + metadata_files = in_meta + md_file = in_file.parent / (in_file.name + ".met") + metadata_files.append(md_file) # Support input file wav, pcm and txt (metadata iis) if in_file.suffix == ".wav": @@ -356,7 +356,10 @@ class IVAS(Processing): ]: return ["-mc", fmt.name] elif isinstance(fmt, audio.OSBAAudio): - return ["-ism_sba", str(fmt.num_ism_channels), str(fmt.ambi_order)] + metadata_files + if fmt.is_planar: + return ["-ism_sba", f"-{str(fmt.num_ism_channels)}", str(fmt.ambi_order)] + metadata_files + else: + return ["-ism_sba", f"+{str(fmt.num_ism_channels)}", str(fmt.ambi_order)] + metadata_files elif isinstance(fmt, audio.OMASAAudio): return ["-ism_masa", str(fmt.num_ism_channels), str(fmt.num_channels-fmt.num_ism_channels)] + metadata_files -- GitLab From a24171d20b7da4bce301ebe35f6920a212576f26 Mon Sep 17 00:00:00 2001 From: Treffehn Date: Fri, 8 Sep 2023 17:09:26 +0200 Subject: [PATCH 05/22] support evs for osba --- ivas_processing_scripts/audiotools/convert/__init__.py | 1 + ivas_processing_scripts/processing/evs.py | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/ivas_processing_scripts/audiotools/convert/__init__.py b/ivas_processing_scripts/audiotools/convert/__init__.py index 7660b32f..0b843fc4 100755 --- a/ivas_processing_scripts/audiotools/convert/__init__.py +++ b/ivas_processing_scripts/audiotools/convert/__init__.py @@ -79,6 +79,7 @@ def convert_file( else: # first check prevents crash on custom_ls setup formats if isinstance(in_fmt, str) and in_fmt.startswith("MASA") and in_meta is None: + # TODO treffehn: also consider OMASA # if no MD fileis provided, default to name (including .wav or .pcm!!!) + ".met" in_meta = [in_file.parent / (in_file.name + ".met")] input = audio.fromfile(in_fmt, in_file, in_fs, in_meta) diff --git a/ivas_processing_scripts/processing/evs.py b/ivas_processing_scripts/processing/evs.py index 80b9fe14..bf64c732 100755 --- a/ivas_processing_scripts/processing/evs.py +++ b/ivas_processing_scripts/processing/evs.py @@ -265,8 +265,13 @@ class EVS(Processing): is_planar=is_planar, ) # copy ISM metadata for ISM pass-through - if isinstance(self.in_fmt, audio.ObjectBasedAudio): - for idx in range(len(in_meta)): + if isinstance(self.in_fmt, audio.ObjectBasedAudio) or isinstance(self.in_fmt, audio.OMASAAudio) or isinstance(self.in_fmt, audio.OSBAAudio): + if isinstance(self.in_fmt, audio.ObjectBasedAudio): + num_ism_obj = self.in_fmt.num_channels + else: + num_ism_obj = self.in_fmt.num_ism_channels + + for idx in range(num_ism_obj): out_file_meta = ( out_file.parent / f"{out_file.stem.split('.')[0]}.evs{out_file.suffix}.{idx}.csv" @@ -281,6 +286,7 @@ class EVS(Processing): # copy MASA metadata for MASA pass-through if isinstance(self.in_fmt, audio.MetadataAssistedSpatialAudio): + # TODO: add OMASA md_file_in = in_file.parent / (in_file.name + ".met") md_file_out = out_file.parent / (out_file.name + ".met") copyfile(md_file_in, md_file_out) -- GitLab From b18034abc26c64ee47bbe28423410bc23b0a36af Mon Sep 17 00:00:00 2001 From: Treffehn Date: Fri, 8 Sep 2023 17:27:14 +0200 Subject: [PATCH 06/22] extended naming of masa modes by num of dirs --- .../audiotools/constants.py | 80 ++++++++++++++++--- .../audiotools/convert/osba.py | 2 +- .../audiotools/convert/scenebased.py | 9 +-- .../audiotools/wrappers/masaAnalyzer.py | 4 +- 4 files changed, 76 insertions(+), 19 deletions(-) diff --git a/ivas_processing_scripts/audiotools/constants.py b/ivas_processing_scripts/audiotools/constants.py index 95ac253c..cde12b48 100755 --- a/ivas_processing_scripts/audiotools/constants.py +++ b/ivas_processing_scripts/audiotools/constants.py @@ -241,11 +241,21 @@ CHANNEL_BASED_AUDIO_ALTNAMES = { } METADATA_ASSISTED_SPATIAL_AUDIO_FORMATS = { - "MASA1": { + "MASA1DIR1": { "num_channels": 1, + "dirs": 1, }, - "MASA2": { + "MASA1DIR2": { + "num_channels": 1, + "dirs": 2, + }, + "MASA2DIR1": { + "num_channels": 2, + "dirs": 1, + }, + "MASA2DIR2": { "num_channels": 2, + "dirs": 2, }, } @@ -304,37 +314,87 @@ SCENE_BASED_AUDIO_FORMATS = { } OMASA_AUDIO_FORMATS = { - "ISM1MASA1": { + # 1 dir + "ISM1MASA1DIR1": { + "num_channels": 2, + "num_ism_channels": 1, + "dirs": 1, + }, + "ISM1MASA2DIR1": { + "num_channels": 3, + "num_ism_channels": 1, + "dirs": 1, + }, + "ISM2MASA1DIR1": { + "num_channels": 3, + "num_ism_channels": 2, + "dirs": 1, + }, + "ISM2MASA2DIR1": { + "num_channels": 4, + "num_ism_channels": 2, + "dirs": 1, + }, + "ISM3MASA1DIR1": { + "num_channels": 4, + "num_ism_channels": 3, + "dirs": 1, + }, + "ISM3MASA2DIR1": { + "num_channels": 5, + "num_ism_channels": 3, + "dirs": 1, + }, + "ISM4MASA1DIR1": { + "num_channels": 5, + "num_ism_channels": 4, + "dirs": 1, + }, + "ISM4MASA2DIR1": { + "num_channels": 6, + "num_ism_channels": 4, + "dirs": 1, + }, + # 2 sdirs + "ISM1MASA1DIR2": { "num_channels": 2, "num_ism_channels": 1, + "dirs": 2, }, - "ISM1MASA2": { + "ISM1MASA2DIR2": { "num_channels": 3, "num_ism_channels": 1, + "dirs": 2, }, - "ISM2MASA1": { + "ISM2MASA1DIR2": { "num_channels": 3, "num_ism_channels": 2, + "dirs": 2, }, - "ISM2MASA2": { + "ISM2MASA2DIR2": { "num_channels": 4, "num_ism_channels": 2, + "dirs": 2, }, - "ISM3MASA1": { + "ISM3MASA1DIR2": { "num_channels": 4, "num_ism_channels": 3, + "dirs": 2, }, - "ISM3MASA2": { + "ISM3MASA2DIR2": { "num_channels": 5, "num_ism_channels": 3, + "dirs": 2, }, - "ISM4MASA1": { + "ISM4MASA1DIR2": { "num_channels": 5, "num_ism_channels": 4, + "dirs": 2, }, - "ISM4MASA2": { + "ISM4MASA2DIR2": { "num_channels": 6, "num_ism_channels": 4, + "dirs": 2, }, } diff --git a/ivas_processing_scripts/audiotools/convert/osba.py b/ivas_processing_scripts/audiotools/convert/osba.py index 3264a459..ce411388 100644 --- a/ivas_processing_scripts/audiotools/convert/osba.py +++ b/ivas_processing_scripts/audiotools/convert/osba.py @@ -103,7 +103,7 @@ def convert_osba( raise ValueError("OSBA to OMASA conversion only possible if number of ISM objects matches") # only render SBA part - out_sba = audio.fromtype("MASA"+out.name[-1]) + out_sba = audio.fromtype(out.name[4:]) render_sba_to_masa(sba, out_sba) out.audio[:, :osba.num_ism_channels] = osba.audio[:, :osba.num_ism_channels] diff --git a/ivas_processing_scripts/audiotools/convert/scenebased.py b/ivas_processing_scripts/audiotools/convert/scenebased.py index b785c11f..f6d0dc9b 100755 --- a/ivas_processing_scripts/audiotools/convert/scenebased.py +++ b/ivas_processing_scripts/audiotools/convert/scenebased.py @@ -196,14 +196,11 @@ def render_sba_to_masa( sba_in: audio.SceneBasedAudio, masa_out: audio.MetadataAssistedSpatialAudio, ) -> None: - num_dirs = 1 - if sba_in.name == "HOA2": - num_dirs = 2 - num_tcs = masa_out.name[-1] - md_out_path = masa_out.metadata_files + num_tcs = masa_out.num_channels + md_out_path = masa_out.metadata_files # TODO: get metadata files - masa = masaAnalyzer(sba_in, num_tcs, num_dirs, md_out_path) + masa = masaAnalyzer(sba_in, num_tcs, masa_out.dirs, md_out_path) masa_out.audio = masa.audio diff --git a/ivas_processing_scripts/audiotools/wrappers/masaAnalyzer.py b/ivas_processing_scripts/audiotools/wrappers/masaAnalyzer.py index 0ded643d..e8ce96ba 100644 --- a/ivas_processing_scripts/audiotools/wrappers/masaAnalyzer.py +++ b/ivas_processing_scripts/audiotools/wrappers/masaAnalyzer.py @@ -74,7 +74,7 @@ def masaAnalyzer( if num_dirs not in [1, 2]: raise ValueError(f"Only 1 or 2 directions supported, but {num_dirs} was given.") - if sba.name not in ["PLANARFOA", "FOA", "HOA2"]: + if sba.name not in ["PLANARFOA", "FOA", "HOA2"]: # TODO treffehn: add planarhoa2? raise ValueError(f"Only FOA or HOA2 suported, but {sba.name} was given.") if num_dirs == 2 and sba.name != "HOA2": @@ -105,7 +105,7 @@ def masaAnalyzer( # we need to run in the masaAnalyzer directory to use the .bin files it requires run(cmd, cwd=binary.resolve().parent) - fmt = f"MASA{num_tcs}" + fmt = f"MASA{num_tcs}DIR{num_dirs}" masa = audio.fromfile(fmt, tmp_out_pcm, 48000, [metadata_out_path]) return masa -- GitLab From a5d163a174b144037cb386abbdad6b90d46b96e1 Mon Sep 17 00:00:00 2001 From: Treffehn Date: Fri, 15 Sep 2023 13:39:53 +0200 Subject: [PATCH 07/22] add planar hoa2 to masa conversion formats --- ivas_processing_scripts/audiotools/wrappers/masaAnalyzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ivas_processing_scripts/audiotools/wrappers/masaAnalyzer.py b/ivas_processing_scripts/audiotools/wrappers/masaAnalyzer.py index e8ce96ba..71ea709f 100644 --- a/ivas_processing_scripts/audiotools/wrappers/masaAnalyzer.py +++ b/ivas_processing_scripts/audiotools/wrappers/masaAnalyzer.py @@ -74,7 +74,7 @@ def masaAnalyzer( if num_dirs not in [1, 2]: raise ValueError(f"Only 1 or 2 directions supported, but {num_dirs} was given.") - if sba.name not in ["PLANARFOA", "FOA", "HOA2"]: # TODO treffehn: add planarhoa2? + if sba.name not in ["PLANARFOA", "FOA", "HOA2", "PLANARHOA2"]: raise ValueError(f"Only FOA or HOA2 suported, but {sba.name} was given.") if num_dirs == 2 and sba.name != "HOA2": -- GitLab From b8de52cb2bf981bc394a8dfd83619ca729b4145b Mon Sep 17 00:00:00 2001 From: Treffehn Date: Tue, 31 Oct 2023 16:15:39 +0100 Subject: [PATCH 08/22] enabled osba to omasa conversion --- ivas_processing_scripts/audiotools/audio.py | 16 ++++-- .../audiotools/convert/__init__.py | 55 ++++++++++++++----- .../audiotools/convert/osba.py | 26 +++++---- .../audiotools/convert/scenebased.py | 2 +- .../audiotools/wrappers/masaAnalyzer.py | 2 +- 5 files changed, 68 insertions(+), 33 deletions(-) diff --git a/ivas_processing_scripts/audiotools/audio.py b/ivas_processing_scripts/audiotools/audio.py index 647a8bd7..b9222354 100755 --- a/ivas_processing_scripts/audiotools/audio.py +++ b/ivas_processing_scripts/audiotools/audio.py @@ -218,18 +218,18 @@ class MetadataAssistedSpatialAudio(Audio): raise ValueError( f"Unsupported metadata assisted spatial audio format {name}" ) - self.metadata_files = [] + self.metadata_file = None @classmethod def _from_file( cls, name: str, filename: Path, - metadata_files: list[str], + metadata_file: str, fs: Optional[int] = None, ) -> "MetadataAssistedSpatialAudio": obj = super()._from_file(name, filename, fs) - obj.metadata_file = Path(metadata_files[0]) + obj.metadata_file = Path(metadata_file) return obj @classmethod @@ -237,11 +237,11 @@ class MetadataAssistedSpatialAudio(Audio): cls, name: str, filename: Path, - metadata_files: list[str], + metadata_file: str, fs: Optional[int] = None, ) -> "MetadataAssistedSpatialAudio": obj = super()._from_file(name, filename, fs) - obj.metadata_file = Path(metadata_files[0]) + obj.metadata_file = Path(metadata_file) return obj @@ -432,13 +432,17 @@ class OMASAAudio(Audio): def init_metadata(self): # check if number of metadata files matches format - if self.num_ism_channels != len(self.metadata_files): + if self.num_ism_channels != len(self.metadata_files)-1: raise ValueError( f"Mismatch between number of ism channels [{self.num_ism_channels}], and metadata [{len(self.metadata_files)}]" ) self.object_pos = [] for i, f in enumerate(self.metadata_files): + if i >= self.num_ism_channels: + # only read ISM metadata, not MASA metadata + break + pos = np.genfromtxt(f, delimiter=",") # check if metadata has right number of columns diff --git a/ivas_processing_scripts/audiotools/convert/__init__.py b/ivas_processing_scripts/audiotools/convert/__init__.py index 0b843fc4..51501ee9 100755 --- a/ivas_processing_scripts/audiotools/convert/__init__.py +++ b/ivas_processing_scripts/audiotools/convert/__init__.py @@ -34,6 +34,7 @@ import logging from pathlib import Path, PurePath from shutil import copyfile from typing import Optional, Union +from copy import copy from numpy import empty @@ -79,9 +80,13 @@ def convert_file( else: # first check prevents crash on custom_ls setup formats if isinstance(in_fmt, str) and in_fmt.startswith("MASA") and in_meta is None: - # TODO treffehn: also consider OMASA - # if no MD fileis provided, default to name (including .wav or .pcm!!!) + ".met" + # if no MD file is provided, default to name (including .wav or .pcm!!!) + ".met" in_meta = [in_file.parent / (in_file.name + ".met")] + elif isinstance(in_fmt, str) and "MASA" in in_fmt and "ISM" in in_fmt: + if isinstance(in_meta, list) and len(in_meta) < (int(in_fmt[3])+int(in_fmt[8])): + in_meta_masa = in_file.parent / (in_file.name + ".met") + in_meta.append(in_meta_masa) + input = audio.fromfile(in_fmt, in_file, in_fs, in_meta) # try to set reasonable defaults if missing @@ -100,22 +105,40 @@ def convert_file( output = audio.fromtype(out_fmt) + # handle metadata for outputs with metadata (MASA, ISM, OMASA, OSBA) if isinstance(output, audio.MetadataAssistedSpatialAudio): - # create dummy audio array to allow inference of MASA mode - num_tcs = int(output.name[-1]) - output.audio = empty((1, num_tcs)) + if isinstance(input, audio.MetadataAssistedSpatialAudio): + # use existing metadata file + output.metadata_file = input.metadata_file + else: + # fabricate metadata file name + masa_meta_file_name = Path(out_file).parent / (Path(out_file).name + ".met") + output.metadata_file = masa_meta_file_name - # fabricate metadata file name - output.metadata_file = Path(out_file).parent / (Path(out_file).name + ".met") - if isinstance(output, audio.ObjectBasedAudio): + elif isinstance(output, audio.ObjectBasedAudio): try: - output.object_pos = input.object_pos - output.metadata_files = input.metadata_files + output.object_pos = copy(input.object_pos) + output.metadata_files = copy(input.metadata_files) except Exception: raise ValueError( "ISM is not supported as an output for rendering! Only usable as pass-through" ) + elif isinstance(output, audio.OMASAAudio): + if isinstance(input, audio.OMASAAudio): + # use existing metadata files + output.metadata_files = copy(input.metadata_files) + else: + # fabricate metadata file name + masa_meta_file_name = Path(out_file).parent / (Path(out_file).name + ".met") + output.metadata_files = copy(input.metadata_files) + output.metadata_files.append(masa_meta_file_name) + output.object_pos = copy(input.object_pos) + elif isinstance(output, audio.OSBAAudio): + output.object_pos = copy(input.object_pos) + output.metadata_files = copy(input.metadata_files) + + # apply actual conversion if isinstance(input, metadata.Metadata): if logger: logger.debug(f"Converting metadata to {out_fmt} : {in_file} -> {out_file}") @@ -137,16 +160,22 @@ def convert_file( output.fs = in_fs # resampling not yet applied convert(input, output, in_fs=in_fs, out_fs=out_fs, logger=logger, **kwargs) - # write output + # write output audio write(out_file, output.audio, output.fs) - if isinstance(output, audio.ObjectBasedAudio): + # write metadata + if isinstance(output, audio.ObjectBasedAudio) or isinstance(output, audio.OSBAAudio): write_ISM_metadata_in_file(output.object_pos, [out_file], automatic_naming=True) elif isinstance(output, audio.MetadataAssistedSpatialAudio) and in_fmt == out_fmt: # audio objects point to same MD file, create new one with default naming for output out_md_name = out_file.parent / (out_file.name + ".met") copyfile(output.metadata_file, out_md_name) output.metadata_file = out_md_name - + elif isinstance(output, audio.OMASAAudio): + write_ISM_metadata_in_file(output.object_pos, [out_file], automatic_naming=True) + if in_fmt == out_fmt: + # audio objects point to same MD file, create new one with default naming for output + out_md_name = out_file.parent / (out_file.name + ".met") + copyfile(output.metadata_files[-1], out_md_name) def convert( input: audio.Audio, diff --git a/ivas_processing_scripts/audiotools/convert/osba.py b/ivas_processing_scripts/audiotools/convert/osba.py index ce411388..8597ad8b 100644 --- a/ivas_processing_scripts/audiotools/convert/osba.py +++ b/ivas_processing_scripts/audiotools/convert/osba.py @@ -29,7 +29,8 @@ # the United Nations Convention on Contracts on the International Sales of Goods. # -import copy +from copy import copy, deepcopy +import numpy as np from ivas_processing_scripts.audiotools import audio from ivas_processing_scripts.audiotools.convert.objectbased import render_oba_to_binaural, render_oba_to_cba, \ @@ -49,19 +50,19 @@ def convert_osba( # split OSBA object in ISM and SBA object oba = audio.fromarray("ISM" + str(osba.num_ism_channels), osba.audio[:, :osba.num_ism_channels], osba.fs) - oba.metadata_files = osba.metadata_files - oba.object_pos = osba.object_pos + oba.metadata_files = copy(osba.metadata_files) + oba.object_pos = copy(osba.object_pos) sba = audio.fromarray("SBA" + str(osba.ambi_order), osba.audio[:, osba.num_ism_channels:], osba.fs) # OSBA -> Binaural if isinstance(out, audio.BinauralAudio): # render SBA and ISM part separately # ISM - out_ism = copy.deepcopy(out) + out_ism = deepcopy(out) render_oba_to_binaural(oba, out_ism, **kwargs) # SBA - out_sba = copy.deepcopy(out) + out_sba = deepcopy(out) render_sba_to_binaural(sba, out_sba, **kwargs) # combine results @@ -71,11 +72,11 @@ def convert_osba( elif isinstance(out, audio.ChannelBasedAudio): # render SBA and ISM part separately # ISM - out_ism = copy.deepcopy(out) + out_ism = deepcopy(out) render_oba_to_cba(oba, out_ism) # SBA - out_sba = copy.deepcopy(out) + out_sba = deepcopy(out) render_sba_to_cba(sba, out_sba) # combine results @@ -85,11 +86,11 @@ def convert_osba( elif isinstance(out, audio.SceneBasedAudio): # render SBA and ISM part separately # ISM - out_ism = copy.deepcopy(out) + out_ism = deepcopy(out) render_oba_to_sba(oba, out_ism) # SBA - out_sba = copy.deepcopy(out) + out_sba = deepcopy(out) render_sba_to_sba(sba, out_sba) # combine results @@ -97,17 +98,18 @@ def convert_osba( # OSBA -> OMASA elif isinstance(out, audio.OMASAAudio): - # TODO (treffehn) # check if ism object number is the same if out.num_ism_channels != osba.num_ism_channels: raise ValueError("OSBA to OMASA conversion only possible if number of ISM objects matches") # only render SBA part out_sba = audio.fromtype(out.name[4:]) + out_sba.metadata_file = out.metadata_files[-1] render_sba_to_masa(sba, out_sba) - out.audio[:, :osba.num_ism_channels] = osba.audio[:, :osba.num_ism_channels] - out.audio[:, osba.num_ism_channels:] = out_sba.audio + # out.audio[:, :osba.num_ism_channels] = osba.audio[:, :osba.num_ism_channels] + # out.audio[:, osba.num_ism_channels:] = out_sba.audio + out.audio = np.concatenate((osba.audio[:, :osba.num_ism_channels], out_sba.audio), axis=1) else: raise NotImplementedError( diff --git a/ivas_processing_scripts/audiotools/convert/scenebased.py b/ivas_processing_scripts/audiotools/convert/scenebased.py index f6d0dc9b..286724b6 100755 --- a/ivas_processing_scripts/audiotools/convert/scenebased.py +++ b/ivas_processing_scripts/audiotools/convert/scenebased.py @@ -198,7 +198,7 @@ def render_sba_to_masa( ) -> None: num_tcs = masa_out.num_channels - md_out_path = masa_out.metadata_files # TODO: get metadata files + md_out_path = masa_out.metadata_file masa = masaAnalyzer(sba_in, num_tcs, masa_out.dirs, md_out_path) masa_out.audio = masa.audio diff --git a/ivas_processing_scripts/audiotools/wrappers/masaAnalyzer.py b/ivas_processing_scripts/audiotools/wrappers/masaAnalyzer.py index 71ea709f..432abd69 100644 --- a/ivas_processing_scripts/audiotools/wrappers/masaAnalyzer.py +++ b/ivas_processing_scripts/audiotools/wrappers/masaAnalyzer.py @@ -106,6 +106,6 @@ def masaAnalyzer( run(cmd, cwd=binary.resolve().parent) fmt = f"MASA{num_tcs}DIR{num_dirs}" - masa = audio.fromfile(fmt, tmp_out_pcm, 48000, [metadata_out_path]) + masa = audio.fromfile(fmt, tmp_out_pcm, 48000, metadata_out_path) return masa -- GitLab From 97331089bbc4abfe3706933c8b53fae293d3d0ff Mon Sep 17 00:00:00 2001 From: Treffehn Date: Thu, 2 Nov 2023 17:27:46 +0100 Subject: [PATCH 09/22] fixed some omasa metadata problems --- .../audiotools/convert/omasa.py | 32 +++++++++---------- .../audiotools/convert/osba.py | 2 -- ivas_processing_scripts/processing/evs.py | 2 +- .../processing/processing.py | 5 +++ .../processing_splitting_scaling.py | 2 +- 5 files changed, 23 insertions(+), 20 deletions(-) diff --git a/ivas_processing_scripts/audiotools/convert/omasa.py b/ivas_processing_scripts/audiotools/convert/omasa.py index 3c662e40..c222903a 100644 --- a/ivas_processing_scripts/audiotools/convert/omasa.py +++ b/ivas_processing_scripts/audiotools/convert/omasa.py @@ -29,7 +29,8 @@ # the United Nations Convention on Contracts on the International Sales of Goods. # -import copy +from copy import copy, deepcopy +import numpy as np from ivas_processing_scripts.audiotools import audio from ivas_processing_scripts.audiotools.convert.objectbased import render_oba_to_binaural, render_oba_to_cba, \ @@ -48,19 +49,20 @@ def convert_omasa( # split OMASA object in ISM and MASA object oba = audio.fromarray("ISM" + str(omasa.num_ism_channels), omasa.audio[:, :omasa.num_ism_channels], omasa.fs) - oba.metadata_files = omasa.metadata_files - oba.object_pos = omasa.object_pos - masa = audio.fromarray("MASA" + str(omasa.num_channels-omasa.num_ism_channels), omasa.audio[:, omasa.num_ism_channels:], omasa.fs) + oba.metadata_files = copy(omasa.metadata_files) + oba.object_pos = copy(omasa.object_pos) + masa = audio.fromarray("MASA" + str(omasa.num_channels-omasa.num_ism_channels) + "DIR" + str(omasa.dirs), omasa.audio[:, omasa.num_ism_channels:], omasa.fs) + masa.metadata_file = omasa.metadata_files[-1] # OMASA -> Binaural if isinstance(out, audio.BinauralAudio): # render MASA and ISM part separately # ISM - out_ism = copy.deepcopy(out) + out_ism = deepcopy(out) render_oba_to_binaural(oba, out_ism, **kwargs) # MASA - out_masa = copy.deepcopy(out) + out_masa = deepcopy(out) render_masa_to_binaural(masa, out_masa, **kwargs) # combine results @@ -70,11 +72,11 @@ def convert_omasa( elif isinstance(out, audio.ChannelBasedAudio): # render MASA and ISM part separately # ISM - out_ism = copy.deepcopy(out) + out_ism = deepcopy(out) render_oba_to_cba(oba, out_ism) # MASA - out_masa = copy.deepcopy(out) + out_masa = deepcopy(out) render_masa_to_cba(masa, out_masa) # combine results @@ -84,11 +86,11 @@ def convert_omasa( elif isinstance(out, audio.SceneBasedAudio): # render MASA and ISM part separately # ISM - out_ism = copy.deepcopy(out) + out_ism = deepcopy(out) render_oba_to_sba(oba, out_ism) # MASA - out_masa = copy.deepcopy(out) + out_masa = deepcopy(out) render_masa_to_sba(masa, out_masa) # combine results @@ -96,17 +98,15 @@ def convert_omasa( # OMASA -> OSBA elif isinstance(out, audio.OSBAAudio): - # TODO (treffehn) # check if ism object number is the same - if out.num_ism_channels != masa.num_ism_channels: + if out.num_ism_channels != omasa.num_ism_channels: raise ValueError("OMASA to OSBA conversion only possible if number of ISM objects matches") # only render MASA part - out_sba = audio.fromtype("MASA") - render_masa_to_sba(masa, out_sba) + out_masa = deepcopy(out) + render_masa_to_sba(masa, out_masa) - out.audio[:, :omasa.num_ism_channels] = omasa.audio[:, :omasa.num_ism_channels] - out.audio[:, omasa.num_ism_channels:] = out_sba.audio + out.audio = np.concatenate((omasa.audio[:, :omasa.num_ism_channels], out_masa.audio), axis=1) else: raise NotImplementedError( diff --git a/ivas_processing_scripts/audiotools/convert/osba.py b/ivas_processing_scripts/audiotools/convert/osba.py index 8597ad8b..c1652b98 100644 --- a/ivas_processing_scripts/audiotools/convert/osba.py +++ b/ivas_processing_scripts/audiotools/convert/osba.py @@ -107,8 +107,6 @@ def convert_osba( out_sba.metadata_file = out.metadata_files[-1] render_sba_to_masa(sba, out_sba) - # out.audio[:, :osba.num_ism_channels] = osba.audio[:, :osba.num_ism_channels] - # out.audio[:, osba.num_ism_channels:] = out_sba.audio out.audio = np.concatenate((osba.audio[:, :osba.num_ism_channels], out_sba.audio), axis=1) else: diff --git a/ivas_processing_scripts/processing/evs.py b/ivas_processing_scripts/processing/evs.py index dfb1ca90..5681efd4 100755 --- a/ivas_processing_scripts/processing/evs.py +++ b/ivas_processing_scripts/processing/evs.py @@ -284,7 +284,7 @@ class EVS(Processing): # copy MASA metadata for MASA pass-through if isinstance(self.in_fmt, audio.MetadataAssistedSpatialAudio): - # TODO: add OMASA + # TODO treffehn: add OMASA md_file_in = in_file.parent / (in_file.name + ".met") md_file_out = out_file.parent / (out_file.name + ".met") copyfile(md_file_in, md_file_out) diff --git a/ivas_processing_scripts/processing/processing.py b/ivas_processing_scripts/processing/processing.py index b8fda56b..8d3d0026 100755 --- a/ivas_processing_scripts/processing/processing.py +++ b/ivas_processing_scripts/processing/processing.py @@ -36,6 +36,7 @@ from abc import ABC, abstractmethod from itertools import repeat from multiprocessing import Pool from pathlib import Path +import shutil from shutil import copyfile from time import sleep from typing import Iterable, Union @@ -502,6 +503,10 @@ def process_item( if processing_paths_meta[-1]: for idx, ppm in enumerate(processing_paths_meta[-1]): copyfile(ppm, out_meta[idx]) + if "MASA" in chain[-1].out_fmt: + masa_meta_file_tmp = Path(processing_paths[-1]).parent / (Path(processing_paths[-1]).name + ".met") + masa_meta_file = Path(out_file).parent / (Path(out_file).name + ".met") + copyfile(masa_meta_file_tmp, masa_meta_file) def remove_preamble(x, out_fmt, fs, repeat_signal, preamble_len_ms, meta, logger): diff --git a/ivas_processing_scripts/processing/processing_splitting_scaling.py b/ivas_processing_scripts/processing/processing_splitting_scaling.py index 83a4ac16..df3b8df5 100644 --- a/ivas_processing_scripts/processing/processing_splitting_scaling.py +++ b/ivas_processing_scripts/processing/processing_splitting_scaling.py @@ -344,7 +344,7 @@ def validate_tracefile(tracefile): if entry[1] >= 0: if entry[1] == prevRtpTs: raise ValueError(f"Error in JBM trace file at line {j}: duplicated rtpTs found") - # TODO: (treffehn) include RTP time stamp overflow handling? + # TODO: include RTP time stamp overflow handling? # else if (entry.rtpTs + rtpTsExtension < prevRtpTs) { # if (entry.rtpTs + rtpTsExtension + (1LL << 32) - prevRtpTs < # prevRtpTs - (entry.rtpTs + rtpTsExtension)) { -- GitLab From 40192caa243d48312edd23c8b29d356e2f304b3c Mon Sep 17 00:00:00 2001 From: Treffehn Date: Fri, 3 Nov 2023 18:05:08 +0100 Subject: [PATCH 10/22] restructured masa metadata processing --- .../audiotools/convert/__init__.py | 2 +- .../audiotools/wrappers/bs1770.py | 2 +- .../audiotools/wrappers/p50fbmnru.py | 4 +- ivas_processing_scripts/processing/evs.py | 3 +- ivas_processing_scripts/processing/ivas.py | 8 ++- .../processing/processing.py | 67 +++++++++++++------ 6 files changed, 57 insertions(+), 29 deletions(-) diff --git a/ivas_processing_scripts/audiotools/convert/__init__.py b/ivas_processing_scripts/audiotools/convert/__init__.py index 51501ee9..38336828 100755 --- a/ivas_processing_scripts/audiotools/convert/__init__.py +++ b/ivas_processing_scripts/audiotools/convert/__init__.py @@ -360,7 +360,7 @@ def format_conversion( raise NotImplementedError( "ISM is not supported as an output for rendering! Only usable as pass-through" ) - if (isinstance(output, audio.OMASAAudio) and not isinstance(input, audio.OSBAAudio)) or (isinstance(output, audio.OSBAAudio) and not isinstance(input, audio.OMASAAudio)): + if (isinstance(output, audio.OMASAAudio) and not isinstance(input, audio.OSBAAudio)) or (isinstance(output, audio.OSBAAudio) and not isinstance(input, audio.OMASAAudio)) and not input.name == output.name: raise NotImplementedError( "OMASA and OSBA only possible as output if input is OMASA or OSBA" ) diff --git a/ivas_processing_scripts/audiotools/wrappers/bs1770.py b/ivas_processing_scripts/audiotools/wrappers/bs1770.py index a72398ed..a834bfe2 100755 --- a/ivas_processing_scripts/audiotools/wrappers/bs1770.py +++ b/ivas_processing_scripts/audiotools/wrappers/bs1770.py @@ -196,7 +196,7 @@ def get_loudness( input, audio.MetadataAssistedSpatialAudio ): loudness_format = "7_1_4" - elif isinstance(input, audio.ObjectBasedAudio): + elif isinstance(input, audio.ObjectBasedAudio) or isinstance(audio.OMASAAudio) or isinstance(audio.OSBAAudio): loudness_format = "BINAURAL" elif hasattr(input, "layout_file"): loudness_format = input.layout_file diff --git a/ivas_processing_scripts/audiotools/wrappers/p50fbmnru.py b/ivas_processing_scripts/audiotools/wrappers/p50fbmnru.py index c31d0aae..68148597 100755 --- a/ivas_processing_scripts/audiotools/wrappers/p50fbmnru.py +++ b/ivas_processing_scripts/audiotools/wrappers/p50fbmnru.py @@ -30,7 +30,7 @@ # the United Nations Convention on Contracts on the International Sales of Goods. # -from copy import copy +from copy import deepcopy from pathlib import Path from tempfile import TemporaryDirectory from warnings import warn @@ -73,7 +73,7 @@ def p50fbmnru( else: binary = find_binary("p50fbmnru") - tmp_audio_obj = copy(input) + tmp_audio_obj = deepcopy(input) # resample signal to 48kHz if input.fs != 48000: diff --git a/ivas_processing_scripts/processing/evs.py b/ivas_processing_scripts/processing/evs.py index 5681efd4..a487ade6 100755 --- a/ivas_processing_scripts/processing/evs.py +++ b/ivas_processing_scripts/processing/evs.py @@ -283,8 +283,7 @@ class EVS(Processing): copyfile(in_meta[idx], out_file_meta_unprocessed) # copy MASA metadata for MASA pass-through - if isinstance(self.in_fmt, audio.MetadataAssistedSpatialAudio): - # TODO treffehn: add OMASA + if isinstance(self.in_fmt, audio.MetadataAssistedSpatialAudio) or isinstance(self.in_fmt, audio.OMASAAudio): md_file_in = in_file.parent / (in_file.name + ".met") md_file_out = out_file.parent / (out_file.name + ".met") copyfile(md_file_in, md_file_out) diff --git a/ivas_processing_scripts/processing/ivas.py b/ivas_processing_scripts/processing/ivas.py index c83c6d54..3caaf780 100755 --- a/ivas_processing_scripts/processing/ivas.py +++ b/ivas_processing_scripts/processing/ivas.py @@ -155,10 +155,12 @@ class IVAS(Processing): elif isinstance(self.in_fmt, audio.ObjectBasedAudio) or isinstance(self.in_fmt, audio.OSBAAudio): metadata_files = in_meta elif isinstance(self.in_fmt, audio.OMASAAudio): - # TODO treffehn: check and maybe change here and for masa metadata_files = in_meta - md_file = in_file.parent / (in_file.name + ".met") - metadata_files.append(md_file) + # TODO treffehn: check and maybe change here and for masa + # if len(metadata_files) != number of ism channels plus one + # md_file = in_file.parent / (in_file.name + ".met") + # metadata_files.append(md_file) + pass # Support input file wav, pcm and txt (metadata iis) if in_file.suffix == ".wav": diff --git a/ivas_processing_scripts/processing/processing.py b/ivas_processing_scripts/processing/processing.py index 8d3d0026..2cb7bf11 100755 --- a/ivas_processing_scripts/processing/processing.py +++ b/ivas_processing_scripts/processing/processing.py @@ -431,6 +431,7 @@ def process_item( logger: logging.Logger, in_meta, ) -> None: + # derive tmp file names tmp_file = tmp_dir.joinpath(in_file.name) tmp_file_meta = [] if in_meta: @@ -441,46 +442,76 @@ def process_item( out_dir_wav = False processing_paths = [in_file] processing_paths_meta = [in_meta] + bool_ism = False + bool_masa = False + num_ism_meta = None for p in chain: if Path(in_file.name).suffix == ".txt" and p.out_fmt is not None: processing_paths.append(tmp_file.with_suffix(f".{p.name}.wav")) out_dir_wav = True else: + # append file name processing_paths.append(tmp_file.with_suffix(f".{p.name}{tmp_file.suffix}")) - try: # TODO: clean up try except blocks + + # determine output format + try: out_format = p.out_fmt except AttributeError: # EVS has no attribute out_fmt out_format = p.in_fmt - try: - if p.name == "pre_2": - bool_ism = p.in_fmt.startswith("ISM") - else: - bool_ism = out_format.startswith("ISM") - except Exception: - bool_ism = out_format.name.startswith("ISM") + # check for ism and masa metadata + if p.name == "pre_2": + # no conversion in preprocessing 2 + bool_ism = "ISM" in p.in_fmt + bool_masa = "MASA" in p.in_fmt + elif isinstance(out_format, str): + # if out format is string + bool_ism = "ISM" in out_format + bool_masa = "MASA" in out_format + elif isinstance(out_format, audio.Audio): + out_format = out_format.name + bool_ism = "ISM" in out_format + bool_masa = "MASA" in out_format + else: + raise ValueError("wrong output format in processing setup") + + list_meta_step = [] + # append ism metadata if bool_ism: - list_meta_step = [] - for idx, tfm in enumerate(tmp_file_meta): + # ISM, OMASA and OSBA + # "ISMX..." + num_ism_meta = int(out_format[3]) + for idx in range(num_ism_meta): list_meta_step.append( - tfm.parent - / f"{in_file.stem.split('.')[0]}.{p.name}.wav.{idx}.csv" + tmp_dir / f"{in_file.stem.split('.')[0]}.{p.name}.wav.{idx}.csv" ) processing_paths_meta.append(list_meta_step) + + # append masa metadata + if bool_masa: + # MASA and OMASA + list_meta_step.append( + tmp_dir / f"{in_file.stem.split('.')[0]}.{p.name}.wav.met" + ) + + if bool_ism or bool_masa: + processing_paths_meta.append(list_meta_step) else: processing_paths_meta.append(None) - # TODO: support txt file writing for META pass-through if out_dir_wav: out_file = out_dir.joinpath(in_file.name).with_suffix(".wav") else: out_file = out_dir.joinpath(in_file.name) + # metadata from last process in chain out_meta = [] - if in_meta: - for im in range(len(in_meta)): - out_meta.append(out_dir.joinpath(f"{Path(out_file).stem}.wav.{im}.csv")) + if bool_ism: + for met in range(num_ism_meta): + out_meta.append(out_dir.joinpath(f"{Path(out_file).stem}.wav.{met}.csv")) + if bool_masa: + out_meta.append(out_dir.joinpath(f"{Path(out_file).stem}.wav.met")) # execute each process sequentially, feed output into input of next process for p, (input, output), input_meta in zip( @@ -503,10 +534,6 @@ def process_item( if processing_paths_meta[-1]: for idx, ppm in enumerate(processing_paths_meta[-1]): copyfile(ppm, out_meta[idx]) - if "MASA" in chain[-1].out_fmt: - masa_meta_file_tmp = Path(processing_paths[-1]).parent / (Path(processing_paths[-1]).name + ".met") - masa_meta_file = Path(out_file).parent / (Path(out_file).name + ".met") - copyfile(masa_meta_file_tmp, masa_meta_file) def remove_preamble(x, out_fmt, fs, repeat_signal, preamble_len_ms, meta, logger): -- GitLab From e342db421f1b978bb80d75598c8e031eaca8c5f7 Mon Sep 17 00:00:00 2001 From: Treffehn Date: Mon, 6 Nov 2023 10:45:42 +0100 Subject: [PATCH 11/22] adjustments in preprocessing steps --- .../processing/preprocessing_2.py | 4 ++++ ivas_processing_scripts/processing/processing.py | 14 ++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/ivas_processing_scripts/processing/preprocessing_2.py b/ivas_processing_scripts/processing/preprocessing_2.py index 912ef6d1..4eab213c 100644 --- a/ivas_processing_scripts/processing/preprocessing_2.py +++ b/ivas_processing_scripts/processing/preprocessing_2.py @@ -64,6 +64,10 @@ class Preprocessing2(Processing): self.in_fmt, in_file, fs=self.in_fs, in_meta=in_meta ) + if isinstance(audio_object, audio.MetadataAssistedSpatialAudio) or isinstance(audio_object, audio.OMASAAudio): + if self.preamble > 0 or self.background_noise or self.repeat_signal: + raise ValueError("No preprocessing 2 possible for formats including MASA metadata") + # modify ISM metadata if self.in_fmt.startswith("ISM"): if not self.preamble: diff --git a/ivas_processing_scripts/processing/processing.py b/ivas_processing_scripts/processing/processing.py index 2cb7bf11..4610dbea 100755 --- a/ivas_processing_scripts/processing/processing.py +++ b/ivas_processing_scripts/processing/processing.py @@ -123,8 +123,18 @@ def concat_setup(cfg: TestConfig, chain, logger: logging.Logger): logger.info(f"Concatenating input files in directory {cfg.input_path}") + # derive input format to preprocessing 2 (either input or preprocessing format) + try: + input_format = cfg.preprocessing.get("fmt", cfg.input["fmt"]) + except AttributeError: + input_format = cfg.input["fmt"] + + # concatenation of met files not possible -> do not concatenate MASA and OMASA + if "MASA" in input_format: + raise ValueError("Concatenation of formats including MASA metadata not possible") + # concatenate ISM metadata - if cfg.input["fmt"].startswith("ISM"): + if input_format.startswith("ISM"): cfg.concat_meta = [] for obj_idx in range(len(cfg.metadata_path[0])): cfg.concat_meta.append( @@ -136,7 +146,7 @@ def concat_setup(cfg: TestConfig, chain, logger: logging.Logger): cfg.items_list, cfg.metadata_path, cfg.concat_meta, - cfg.input["fmt"], + input_format, ) # set input to the concatenated file we have just written to the output dir -- GitLab From 7b170ac9507282598d7202bcd7ac70e17c605f59 Mon Sep 17 00:00:00 2001 From: Treffehn Date: Mon, 6 Nov 2023 16:00:30 +0100 Subject: [PATCH 12/22] modified preprocessing metadata handling --- .../processing/processing.py | 35 ++++++++++++++----- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/ivas_processing_scripts/processing/processing.py b/ivas_processing_scripts/processing/processing.py index 4610dbea..e85cda73 100755 --- a/ivas_processing_scripts/processing/processing.py +++ b/ivas_processing_scripts/processing/processing.py @@ -36,7 +36,6 @@ from abc import ABC, abstractmethod from itertools import repeat from multiprocessing import Pool from pathlib import Path -import shutil from shutil import copyfile from time import sleep from typing import Iterable, Union @@ -356,13 +355,32 @@ def preprocess(cfg, logger): cfg.items_list, cfg.preprocessing_2["concatenation_order"] ) - if cfg.metadata_path[0] is not None: - for item_idx in range(len(cfg.metadata_path)): - for obj_idx in range(len(cfg.metadata_path[item_idx])): - if cfg.metadata_path[item_idx][obj_idx]: - cfg.metadata_path[item_idx][obj_idx] = cfg.out_dirs[0] / Path( - f"{cfg.items_list[item_idx].stem}.wav.{obj_idx}.csv" - ) + # set new metadata files + try: + preproc_output_fmt = chain[0].out_fmt + except AttributeError: + preproc_output_fmt = chain[0].in_fmt + + cfg.metadata_path = [] + + for item_idx in range(len(cfg.items_list)): + list_item = [] + # ISM metadata + if "ISM" in preproc_output_fmt: + num_obj = int(preproc_output_fmt[3]) + for obj_idx in range(num_obj): + list_item.append(cfg.out_dirs[0] / Path(f"{cfg.items_list[item_idx].stem}.wav.{obj_idx}.csv")) + + # MASA metadata + if "MASA" in preproc_output_fmt: + list_item.append(cfg.out_dirs[0] / Path(f"{cfg.items_list[item_idx].stem}.wav.met")) + + # no metadata + if not "ISM" in preproc_output_fmt and not "MASA" in preproc_output_fmt: + list_item.append(None) + + cfg.metadata_path.append(list_item) + # remove already applied processing stage cfg.proc_chains = cfg.proc_chains[1:] cfg.tmp_dirs = cfg.tmp_dirs[1:] @@ -475,6 +493,7 @@ def process_item( # no conversion in preprocessing 2 bool_ism = "ISM" in p.in_fmt bool_masa = "MASA" in p.in_fmt + out_format = p.in_fmt elif isinstance(out_format, str): # if out format is string bool_ism = "ISM" in out_format -- GitLab From a45c817c8c52e0cd17af8523d5dbe3d70944cda5 Mon Sep 17 00:00:00 2001 From: Treffehn Date: Fri, 17 Nov 2023 16:45:55 +0100 Subject: [PATCH 13/22] added osba to osba conversion --- ivas_processing_scripts/audiotools/audio.py | 6 ++++++ ivas_processing_scripts/audiotools/constants.py | 12 ++++++++++++ .../audiotools/convert/__init__.py | 9 +++++---- ivas_processing_scripts/audiotools/convert/osba.py | 12 ++++++++++++ ivas_processing_scripts/processing/ivas.py | 4 ++-- 5 files changed, 37 insertions(+), 6 deletions(-) diff --git a/ivas_processing_scripts/audiotools/audio.py b/ivas_processing_scripts/audiotools/audio.py index b9222354..544d977f 100755 --- a/ivas_processing_scripts/audiotools/audio.py +++ b/ivas_processing_scripts/audiotools/audio.py @@ -355,6 +355,12 @@ class SceneBasedAudio(Audio): name = "HOA2" elif name == "SBA3": name = "HOA3" + elif name == "PLANARSBA1": + name = "PLANARFOA" + elif name == "PLANARSBA2": + name = "PLANARHOA2" + elif name == "PLANARSBA3": + name = "PLANARHOA3" super().__init__(name) try: diff --git a/ivas_processing_scripts/audiotools/constants.py b/ivas_processing_scripts/audiotools/constants.py index ef7482ac..b2d07f1c 100755 --- a/ivas_processing_scripts/audiotools/constants.py +++ b/ivas_processing_scripts/audiotools/constants.py @@ -317,6 +317,18 @@ SCENE_BASED_AUDIO_FORMATS = { "num_channels": 16, "is_planar": False, }, + "PLANARSBA1": { + "num_channels": 4, + "is_planar": True, + }, + "PLANARSBA2": { + "num_channels": 9, + "is_planar": True, + }, + "PLANARSBA3": { + "num_channels": 16, + "is_planar": True, + }, } OMASA_AUDIO_FORMATS = { diff --git a/ivas_processing_scripts/audiotools/convert/__init__.py b/ivas_processing_scripts/audiotools/convert/__init__.py index 38336828..e02f0b5a 100755 --- a/ivas_processing_scripts/audiotools/convert/__init__.py +++ b/ivas_processing_scripts/audiotools/convert/__init__.py @@ -360,10 +360,11 @@ def format_conversion( raise NotImplementedError( "ISM is not supported as an output for rendering! Only usable as pass-through" ) - if (isinstance(output, audio.OMASAAudio) and not isinstance(input, audio.OSBAAudio)) or (isinstance(output, audio.OSBAAudio) and not isinstance(input, audio.OMASAAudio)) and not input.name == output.name: - raise NotImplementedError( - "OMASA and OSBA only possible as output if input is OMASA or OSBA" - ) + if isinstance(output, audio.OMASAAudio) or isinstance(output, audio.OSBAAudio): + if not (isinstance(input, audio.OMASAAudio) or isinstance(input, audio.OSBAAudio)): + raise NotImplementedError( + "OMASA and OSBA only possible as output if input is OMASA or OSBA" + ) if logger: logger.debug(f"Format conversion: {input.name} -> {output.name}") diff --git a/ivas_processing_scripts/audiotools/convert/osba.py b/ivas_processing_scripts/audiotools/convert/osba.py index c1652b98..baa90420 100644 --- a/ivas_processing_scripts/audiotools/convert/osba.py +++ b/ivas_processing_scripts/audiotools/convert/osba.py @@ -109,6 +109,18 @@ def convert_osba( out.audio = np.concatenate((osba.audio[:, :osba.num_ism_channels], out_sba.audio), axis=1) + # OSBA -> OSBA + elif isinstance(out, audio.OSBAAudio): + # check if ism object number is the same + if out.num_ism_channels != osba.num_ism_channels: + raise ValueError("OSBA to OSBA conversion only possible if number of ISM objects matches") + + # only render SBA part + out_sba = audio.fromtype(out.name[4:]) + render_sba_to_sba(sba, out_sba) + + out.audio = np.concatenate((osba.audio[:, :osba.num_ism_channels], out_sba.audio), axis=1) + else: raise NotImplementedError( f"Conversion from {osba.name} to {out.name} is unsupported!" diff --git a/ivas_processing_scripts/processing/ivas.py b/ivas_processing_scripts/processing/ivas.py index 3caaf780..e270313e 100755 --- a/ivas_processing_scripts/processing/ivas.py +++ b/ivas_processing_scripts/processing/ivas.py @@ -360,9 +360,9 @@ class IVAS(Processing): return ["-mc", fmt.name] elif isinstance(fmt, audio.OSBAAudio): if fmt.is_planar: - return ["-ism_sba", f"-{str(fmt.num_ism_channels)}", str(fmt.ambi_order)] + metadata_files + return ["-ism_sba", str(fmt.num_ism_channels), f"-{str(fmt.ambi_order)}"] + metadata_files else: - return ["-ism_sba", f"+{str(fmt.num_ism_channels)}", str(fmt.ambi_order)] + metadata_files + return ["-ism_sba", str(fmt.num_ism_channels), f"+{str(fmt.ambi_order)}"] + metadata_files elif isinstance(fmt, audio.OMASAAudio): return ["-ism_masa", str(fmt.num_ism_channels), str(fmt.num_channels-fmt.num_ism_channels)] + metadata_files -- GitLab From cb06342aed1a93e0c2529e5a1906774003220c62 Mon Sep 17 00:00:00 2001 From: Treffehn Date: Fri, 17 Nov 2023 16:48:30 +0100 Subject: [PATCH 14/22] removed traces of sba_fmt --- README.md | 7 ------- examples/TEMPLATE.yml | 6 ------ 2 files changed, 13 deletions(-) diff --git a/README.md b/README.md index 3178d8fc..a151695a 100755 --- a/README.md +++ b/README.md @@ -419,8 +419,6 @@ conditions_to_generate: # fs: 48000 ### Additional commandline options; default = null # opts: ["-q", "-no_delay_cmp"] - ### Option to use SBA format of lower or same order (planar also possible) for SBA input formats - # sba_fmt: "PLANARFOA" ### IVAS condition ############################### c07: @@ -448,8 +446,6 @@ conditions_to_generate: # fs: 48000 ### Additional commandline options; default = null # opts: ["-q", "-no_delay_cmp"] - ### Option to use SBA format of lower or same order (planar also possible) for SBA input formats - # sba_fmt: "PLANARFOA" ### EVS condition ################################ c08: @@ -473,8 +469,6 @@ conditions_to_generate: bin: ~/git/ivas-codec/EVS_dec ### Decoder output sampling rate; default = null (same as input) # fs: 48000 - ### Option to use SBA format of lower or same order (planar also possible) for SBA input formats - # sba_fmt: "PLANARFOA" ``` @@ -547,7 +541,6 @@ This configuration has to match the channel configuration. If the provided list For the encoding stage `cod` and the decoding stage `dec`, the path to the IVAS_cod and IVAS_dec binaries can be specified under the key `bin`. Additionally some resampling can be applied by using the key `fs` followed by the desired sampling rate. The general bitstream processing configuration can be locally overwritten for each EVS and IVAS condition with the key `tx`. -For IVAS and EVS conditions the `sba_fmt` key is available to specify a SBA format of lower or same order compared to the input for SBA input formats. The additional key `evs_lfe_9k6bps_nb` is only available for EVS conditions and ensures a bitrate of 9.6kbps and narrow band processing of the LFE channel(s). #### IVAS The configuration of the IVAS condition is similar to the EVS condition. However, only one bitrate for all channels (and metadata) can be specified. diff --git a/examples/TEMPLATE.yml b/examples/TEMPLATE.yml index a906bd59..b09e327d 100755 --- a/examples/TEMPLATE.yml +++ b/examples/TEMPLATE.yml @@ -243,8 +243,6 @@ conditions_to_generate: ### Bitstream options # tx: ### For possible arguments see overall bitstream modification - ### Option to use SBA format of lower or same order (planar also possible) for SBA input formats - # sba_fmt: "PLANARFOA" ### IVAS condition ############################### c07: @@ -275,8 +273,6 @@ conditions_to_generate: ### Bitstream options # tx: ### For possible arguments see overall bitstream modification - ### Option to use SBA format of lower or same order (planar also possible) for SBA input formats - # sba_fmt: "PLANARFOA" ### EVS condition ################################ c08: @@ -305,8 +301,6 @@ conditions_to_generate: ### Bitstream options # tx: ### For possible arguments see overall bitstream modification - ### Option to use SBA format of lower or same order (planar also possible) for SBA input formats - # sba_fmt: "PLANARFOA" ################################################ ### Post-processing -- GitLab From 9d5cad35af7c2fb8e9de5df3076ed769a4dc30a1 Mon Sep 17 00:00:00 2001 From: Treffehn Date: Mon, 20 Nov 2023 13:26:16 +0100 Subject: [PATCH 15/22] adapted evs for planar osba --- ivas_processing_scripts/audiotools/audiofile.py | 5 ++++- ivas_processing_scripts/processing/evs.py | 7 +++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/ivas_processing_scripts/audiotools/audiofile.py b/ivas_processing_scripts/audiotools/audiofile.py index b29ba670..7d7f332a 100755 --- a/ivas_processing_scripts/audiotools/audiofile.py +++ b/ivas_processing_scripts/audiotools/audiofile.py @@ -349,6 +349,7 @@ def split_channels( in_nchans: int, out_nchans: int, is_planar: Optional[bool] = False, + is_planar_offset: Optional[int] = 0, in_fs: Optional[int] = 48000, ) -> None: """ @@ -366,6 +367,8 @@ def split_channels( Number of channels to be split is_planar: Optional[bool] If true vertical SBA channels are set to zero + is_planar_offset: Optional[int] + Offset of SBA due to OSBA (corresponds to num of ISM channels) in_fs: Optional[int] = 48000 Input sampling rate, default 48000 Hz @@ -386,7 +389,7 @@ def split_channels( x, in_fs = read(in_file, nchannels=in_nchans, fs=in_fs) if is_planar: - x[:, VERT_HOA_CHANNELS_ACN[VERT_HOA_CHANNELS_ACN < in_nchans]] = 0 + x[:, VERT_HOA_CHANNELS_ACN[VERT_HOA_CHANNELS_ACN < (in_nchans - is_planar_offset)] + is_planar_offset] = 0 # Write output files for idx, out_file in enumerate(out_filenames): diff --git a/ivas_processing_scripts/processing/evs.py b/ivas_processing_scripts/processing/evs.py index a487ade6..18becac0 100755 --- a/ivas_processing_scripts/processing/evs.py +++ b/ivas_processing_scripts/processing/evs.py @@ -177,7 +177,13 @@ class EVS(Processing): # flag for zeroing of channels for planar SBA formats is_planar = ( isinstance(self.in_fmt, audio.SceneBasedAudio) and self.in_fmt.is_planar + or + isinstance(self.in_fmt, audio.OSBAAudio) and self.in_fmt.is_planar ) + if isinstance(self.in_fmt, audio.OSBAAudio) and self.in_fmt.is_planar: + is_planar_offset = self.in_fmt.num_ism_channels + else: + is_planar_offset = 0 # Split the channels to prepare for multi-mono coding split_chan_files = [ @@ -191,6 +197,7 @@ class EVS(Processing): out_nchans=self.in_fmt.num_channels, in_fs=self.in_fs, is_planar=is_planar, + is_planar_offset=is_planar_offset, ) # run processing -- GitLab From c233c98bd9374484ac25b78f68e5aa4415a137df Mon Sep 17 00:00:00 2001 From: Treffehn Date: Mon, 20 Nov 2023 13:45:17 +0100 Subject: [PATCH 16/22] additional fix for osba evs --- ivas_processing_scripts/audiotools/audiofile.py | 7 ++++++- ivas_processing_scripts/processing/evs.py | 3 ++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/ivas_processing_scripts/audiotools/audiofile.py b/ivas_processing_scripts/audiotools/audiofile.py index 7d7f332a..632b2887 100755 --- a/ivas_processing_scripts/audiotools/audiofile.py +++ b/ivas_processing_scripts/audiotools/audiofile.py @@ -298,6 +298,7 @@ def combine( out_file: str, in_fs: Optional[int] = 48000, is_planar: Optional[bool] = False, + is_planar_offset: Optional[int] = 0, ) -> None: """ Combines audio files into one multi-channel file @@ -310,6 +311,10 @@ def combine( Output multi-channel audio file name (.pcm, .raw or .wav) in_fs: Optional[int] Input sampling rate, required for .pcm and .raw input file, default 48000 Hz + is_planar: Optional[bool] + If true vertical SBA channels are set to zero + is_planar_offset: Optional[int] + Offset of SBA due to OSBA (corresponds to num of ISM channels) Returns ------- @@ -338,7 +343,7 @@ def combine( # set vertical channels to zero if is_planar: - y[:, VERT_HOA_CHANNELS_ACN[VERT_HOA_CHANNELS_ACN < len(in_filenames)]] = 0 + y[:, VERT_HOA_CHANNELS_ACN[VERT_HOA_CHANNELS_ACN < (len(in_filenames) - is_planar_offset)] + is_planar_offset] = 0 write(out_file, y, fs=in_fs) diff --git a/ivas_processing_scripts/processing/evs.py b/ivas_processing_scripts/processing/evs.py index 18becac0..1094624e 100755 --- a/ivas_processing_scripts/processing/evs.py +++ b/ivas_processing_scripts/processing/evs.py @@ -260,7 +260,7 @@ class EVS(Processing): # combine the decoded channels into the output file if out_file.suffix in [".wav", ".pcm"]: - combine(split_chan_out, out_file, in_fs=self.out_fs, is_planar=is_planar) + combine(split_chan_out, out_file, in_fs=self.out_fs, is_planar=is_planar,is_planar_offset=is_planar_offset) if split_chan_bs_unprocessed != split_chan_bs and self.tx_condition: out_file_unprocessed = f"{Path(out_file.parent).joinpath(Path(out_file.name).with_suffix(''))}.noerror{out_file.suffix}" combine( @@ -268,6 +268,7 @@ class EVS(Processing): out_file_unprocessed, in_fs=self.out_fs, is_planar=is_planar, + is_planar_offset=is_planar_offset, ) # copy ISM metadata for ISM pass-through if isinstance(self.in_fmt, audio.ObjectBasedAudio) or isinstance(self.in_fmt, audio.OMASAAudio) or isinstance(self.in_fmt, audio.OSBAAudio): -- GitLab From 908cb84952f900dd84f3f6693df7ba93caf6a16d Mon Sep 17 00:00:00 2001 From: Treffehn Date: Mon, 20 Nov 2023 17:27:12 +0100 Subject: [PATCH 17/22] fixed metadate handling bugs --- ivas_processing_scripts/audiotools/convert/__init__.py | 4 ---- ivas_processing_scripts/processing/processing.py | 1 - 2 files changed, 5 deletions(-) diff --git a/ivas_processing_scripts/audiotools/convert/__init__.py b/ivas_processing_scripts/audiotools/convert/__init__.py index e02f0b5a..7ee3bfa8 100755 --- a/ivas_processing_scripts/audiotools/convert/__init__.py +++ b/ivas_processing_scripts/audiotools/convert/__init__.py @@ -82,10 +82,6 @@ def convert_file( if isinstance(in_fmt, str) and in_fmt.startswith("MASA") and in_meta is None: # if no MD file is provided, default to name (including .wav or .pcm!!!) + ".met" in_meta = [in_file.parent / (in_file.name + ".met")] - elif isinstance(in_fmt, str) and "MASA" in in_fmt and "ISM" in in_fmt: - if isinstance(in_meta, list) and len(in_meta) < (int(in_fmt[3])+int(in_fmt[8])): - in_meta_masa = in_file.parent / (in_file.name + ".met") - in_meta.append(in_meta_masa) input = audio.fromfile(in_fmt, in_file, in_fs, in_meta) diff --git a/ivas_processing_scripts/processing/processing.py b/ivas_processing_scripts/processing/processing.py index e85cda73..6530fcf5 100755 --- a/ivas_processing_scripts/processing/processing.py +++ b/ivas_processing_scripts/processing/processing.py @@ -515,7 +515,6 @@ def process_item( list_meta_step.append( tmp_dir / f"{in_file.stem.split('.')[0]}.{p.name}.wav.{idx}.csv" ) - processing_paths_meta.append(list_meta_step) # append masa metadata if bool_masa: -- GitLab From bfc48ce1cfb7f2becd883f32ef08f68abf9941d8 Mon Sep 17 00:00:00 2001 From: Treffehn Date: Mon, 20 Nov 2023 19:20:08 +0100 Subject: [PATCH 18/22] small fix in metadata concatenation --- ivas_processing_scripts/audiotools/audio.py | 2 +- ivas_processing_scripts/audiotools/metadata.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ivas_processing_scripts/audiotools/audio.py b/ivas_processing_scripts/audiotools/audio.py index 544d977f..9aaf0124 100755 --- a/ivas_processing_scripts/audiotools/audio.py +++ b/ivas_processing_scripts/audiotools/audio.py @@ -619,7 +619,7 @@ def fromtype(fmt: str) -> Audio: def fromarray(fmt: str, x: np.ndarray, fs: int) -> Audio: """Wrap the given array into an audio format""" if x is None or not fs: - return ValueError("Both array and sampling rate must be specified!") + raise ValueError("Both array and sampling rate must be specified!") output = _get_audio_class(fmt)(fmt) diff --git a/ivas_processing_scripts/audiotools/metadata.py b/ivas_processing_scripts/audiotools/metadata.py index 6c878097..f38972ea 100755 --- a/ivas_processing_scripts/audiotools/metadata.py +++ b/ivas_processing_scripts/audiotools/metadata.py @@ -344,7 +344,7 @@ def concat_meta_from_file( frame_length = int(IVAS_FRAME_LEN_MS * audio_objects[0].fs // 1000) # pad and concatenate - concat_meta_all_obj = [None] * audio_objects[0].num_channels + concat_meta_all_obj = [None] * len(meta_files[0]) for audio_item in audio_objects: # check if audio is multiple of frame length -- GitLab From 981a2034e043c8a1f6ad2cd458ecbb498610b71f Mon Sep 17 00:00:00 2001 From: Treffehn Date: Tue, 5 Dec 2023 19:34:40 +0100 Subject: [PATCH 19/22] fixed small bug and masa naming in test --- .../audiotools/wrappers/bs1770.py | 2 +- ivas_processing_scripts/processing/processing.py | 3 ++- tests/data/test_MASA.yml | 12 ++++++------ 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/ivas_processing_scripts/audiotools/wrappers/bs1770.py b/ivas_processing_scripts/audiotools/wrappers/bs1770.py index a834bfe2..8e1f94ba 100755 --- a/ivas_processing_scripts/audiotools/wrappers/bs1770.py +++ b/ivas_processing_scripts/audiotools/wrappers/bs1770.py @@ -196,7 +196,7 @@ def get_loudness( input, audio.MetadataAssistedSpatialAudio ): loudness_format = "7_1_4" - elif isinstance(input, audio.ObjectBasedAudio) or isinstance(audio.OMASAAudio) or isinstance(audio.OSBAAudio): + elif isinstance(input, audio.ObjectBasedAudio) or isinstance(input, audio.OMASAAudio) or isinstance(input, audio.OSBAAudio): loudness_format = "BINAURAL" elif hasattr(input, "layout_file"): loudness_format = input.layout_file diff --git a/ivas_processing_scripts/processing/processing.py b/ivas_processing_scripts/processing/processing.py index 6530fcf5..17ee6dcd 100755 --- a/ivas_processing_scripts/processing/processing.py +++ b/ivas_processing_scripts/processing/processing.py @@ -464,7 +464,8 @@ def process_item( tmp_file_meta = [] if in_meta: for im in in_meta: - tmp_file_meta.append(tmp_dir.joinpath(Path(im).name)) + if im is not None: + tmp_file_meta.append(tmp_dir.joinpath(Path(im).name)) # assemble a list of files to be used during the processing chain out_dir_wav = False diff --git a/tests/data/test_MASA.yml b/tests/data/test_MASA.yml index b4a3eebd..179dcc9d 100644 --- a/tests/data/test_MASA.yml +++ b/tests/data/test_MASA.yml @@ -184,7 +184,7 @@ conditions_to_generate: # - 32000 ### Encoder options cod: - fmt: "MASA2" + fmt: "MASA2DIR1" ### Path to encoder binary; default search for IVAS_cod in bin folder (primary) and PATH (secondary) #bin: ~/git/ivas-codec/IVAS_cod ### Encoder input sampling rate in Hz (resampling performed in case of mismatch); default = null (no resampling) @@ -196,7 +196,7 @@ conditions_to_generate: ### Path to decoder binary; default search for IVAS_dec in bin folder (primary) and PATH (secondary) #bin: ~/git/ivas-codec/IVAS_dec ### Decoder output format; default = postprocessing fmt - fmt: "MASA2" + fmt: "MASA2DIR1" ### Decoder output sampling rate; default = null (same as input) # fs: 48000 ### Additional commandline options; default = null @@ -215,7 +215,7 @@ conditions_to_generate: # - 32000 ### Encoder options cod: - fmt: "MASA2" + fmt: "MASA2DIR1" ### Path to encoder binary; default search for IVAS_cod in bin folder (primary) and PATH (secondary) #bin: ~/git/ivas-codec/IVAS_cod ### Encoder input sampling rate in Hz (resampling performed in case of mismatch); default = null (no resampling) @@ -250,13 +250,13 @@ conditions_to_generate: # - 9600 - [13200, 13200, 8000, 13200, 9600] cod: - fmt: "MASA2" + fmt: "MASA2DIR1" ### Path to encoder binary; default search for EVS_cod in bin folder (primary) and PATH (secondary) #bin: EVS_cod ### Encoder input sampling rate in Hz (resampling performed in case of mismatch); default = null (no resampling) # fs: 32000 dec: - fmt: "MASA2" + fmt: "MASA2DIR1" ### Path to encoder binary; default search for EVS_dec in bin folder (primary) and PATH (secondary) #bin: EVS_dec ### Decoder output sampling rate; default = null (same as input) @@ -274,7 +274,7 @@ conditions_to_generate: ### Post-processing is required and can not be omitted postprocessing: ### REQUIRED: Target format for output - fmt: ["MASA2", "BINAURAL"] + fmt: ["MASA2DIR1", "BINAURAL"] ### REQUIRED: Target sampling rate in Hz for resampling fs: 48000 ### Low-pass cut-off frequency in Hz; default = null (no filtering) -- GitLab From 1bf93021474da467c212fca336b14485c1aaee1a Mon Sep 17 00:00:00 2001 From: Treffehn Date: Tue, 12 Dec 2023 18:06:52 +0100 Subject: [PATCH 20/22] small bug fix --- ivas_processing_scripts/audiotools/audio.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/ivas_processing_scripts/audiotools/audio.py b/ivas_processing_scripts/audiotools/audio.py index 9aaf0124..12c01058 100755 --- a/ivas_processing_scripts/audiotools/audio.py +++ b/ivas_processing_scripts/audiotools/audio.py @@ -225,11 +225,16 @@ class MetadataAssistedSpatialAudio(Audio): cls, name: str, filename: Path, - metadata_file: str, + metadata_file: Union[str, list], fs: Optional[int] = None, ) -> "MetadataAssistedSpatialAudio": obj = super()._from_file(name, filename, fs) - obj.metadata_file = Path(metadata_file) + if isinstance(metadata_file, list): + if len(metadata_file) > 1: + warn("Only first metadata file used. Additional metadata ignored for MASA") + obj.metadata_file = Path(metadata_file[0]) + else: + obj.metadata_file = Path(metadata_file) return obj @classmethod -- GitLab From 646e9b7ffcca71e553fca3e520c642f677e054c7 Mon Sep 17 00:00:00 2001 From: Treffehn Date: Thu, 14 Dec 2023 17:51:57 +0100 Subject: [PATCH 21/22] fixed pipeline --- ivas_processing_scripts/processing/postprocessing.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/ivas_processing_scripts/processing/postprocessing.py b/ivas_processing_scripts/processing/postprocessing.py index b96d6b5c..e92667a7 100755 --- a/ivas_processing_scripts/processing/postprocessing.py +++ b/ivas_processing_scripts/processing/postprocessing.py @@ -60,9 +60,15 @@ class Postprocessing(Processing): if in_meta: in_meta_noerror = [] for meta in in_meta: + if str(meta).endswith(".met"): + # MASA + num_suffix = 2 + else: + # ISM + num_suffix = 3 path_parts = str(meta).split(".") - suffix = ".".join(path_parts[-3:]) - name = ".".join(path_parts[:-3]) + suffix = ".".join(path_parts[-num_suffix:]) + name = ".".join(path_parts[:-num_suffix]) in_meta_noerror.append(Path(f"{name}.noerror.{suffix}")) else: in_meta_noerror = None -- GitLab From 83e6f94a4d55b5c98fb3519d0b25f975abb20e7d Mon Sep 17 00:00:00 2001 From: Treffehn Date: Fri, 16 Feb 2024 17:52:31 +0100 Subject: [PATCH 22/22] added masa and omasa as input format --- ivas_processing_scripts/__init__.py | 32 +++++-- ivas_processing_scripts/audiotools/audio.py | 2 +- .../audiotools/convert/__init__.py | 4 +- .../audiotools/metadata.py | 84 ++++++++++++++++++- ivas_processing_scripts/processing/ivas.py | 11 ++- 5 files changed, 120 insertions(+), 13 deletions(-) diff --git a/ivas_processing_scripts/__init__.py b/ivas_processing_scripts/__init__.py index e3858063..c8c3a65d 100755 --- a/ivas_processing_scripts/__init__.py +++ b/ivas_processing_scripts/__init__.py @@ -36,7 +36,7 @@ from itertools import product from multiprocessing import Pool from time import sleep -from ivas_processing_scripts.audiotools.metadata import check_ISM_metadata +from ivas_processing_scripts.audiotools.metadata import check_ISM_metadata, check_MASA_metadata from ivas_processing_scripts.constants import ( LOGGER_DATEFMT, LOGGER_FORMAT, @@ -112,24 +112,44 @@ def main(args): cfg.items_list, cfg.preprocessing_2["concatenation_order"] ) + metadata = [[]] * len(cfg.items_list) # check for ISM metadata if cfg.input["fmt"].startswith("ISM"): - metadata = check_ISM_metadata( + metadata_ISM = check_ISM_metadata( cfg.metadata_path, num_objects=int(cfg.input["fmt"][3]), num_items=len(cfg.items_list), item_names=cfg.items_list, ) # print info about found and used metadata files - for i in range(len(metadata)): + for i in range(len(metadata_ISM)): metadata_str = [] - for o in range(len(metadata[i])): - metadata_str.append(str(metadata[i][o])) + for o in range(len(metadata_ISM[i])): + metadata_str.append(str(metadata_ISM[i][o])) logger.debug( f" ISM metadata files item {cfg.items_list[i]}: {', '.join(metadata_str)}" ) + metadata = metadata_ISM - else: + # check for MASA metadata + if "MASA" in cfg.input["fmt"]: + metadata_MASA = check_MASA_metadata( + cfg.metadata_path, + num_items=len(cfg.items_list), + item_names=cfg.items_list, + ) + # print info about found and used metadata files + for i in range(len(metadata_MASA)): + metadata_str = [] + for o in range(len(metadata_MASA[i])): + metadata_str.append(str(metadata_MASA[i][o])) + logger.debug( + f" MASA metadata file item {cfg.items_list[i]}: {', '.join(metadata_str)}" + ) + for i, meta in enumerate(metadata): + meta.extend(metadata_MASA[i]) + + if not cfg.input["fmt"].startswith("ISM") and not "MASA" in cfg.input["fmt"]: metadata = [None] * len(cfg.items_list) cfg.metadata_path = metadata diff --git a/ivas_processing_scripts/audiotools/audio.py b/ivas_processing_scripts/audiotools/audio.py index 12c01058..1b131260 100755 --- a/ivas_processing_scripts/audiotools/audio.py +++ b/ivas_processing_scripts/audiotools/audio.py @@ -445,7 +445,7 @@ class OMASAAudio(Audio): # check if number of metadata files matches format if self.num_ism_channels != len(self.metadata_files)-1: raise ValueError( - f"Mismatch between number of ism channels [{self.num_ism_channels}], and metadata [{len(self.metadata_files)}]" + f"Mismatch between number of ism channels [{self.num_ism_channels}], and metadata [{len(self.metadata_files)}]. Note: metadata should also include masa metadata file" ) self.object_pos = [] diff --git a/ivas_processing_scripts/audiotools/convert/__init__.py b/ivas_processing_scripts/audiotools/convert/__init__.py index 9c25b0bb..0022e0d9 100755 --- a/ivas_processing_scripts/audiotools/convert/__init__.py +++ b/ivas_processing_scripts/audiotools/convert/__init__.py @@ -123,11 +123,13 @@ def convert_file( if isinstance(input, audio.OMASAAudio): # use existing metadata files output.metadata_files = copy(input.metadata_files) - else: + elif isinstance(input, audio.OSBAAudio): # fabricate metadata file name masa_meta_file_name = Path(out_file).parent / (Path(out_file).name + ".met") output.metadata_files = copy(input.metadata_files) output.metadata_files.append(masa_meta_file_name) + else: + raise NotImplementedError("Can only convert to OMASA from OSBA") output.object_pos = copy(input.object_pos) elif isinstance(output, audio.OSBAAudio): diff --git a/ivas_processing_scripts/audiotools/metadata.py b/ivas_processing_scripts/audiotools/metadata.py index f38972ea..8ba46923 100755 --- a/ivas_processing_scripts/audiotools/metadata.py +++ b/ivas_processing_scripts/audiotools/metadata.py @@ -467,7 +467,7 @@ def check_ISM_metadata( list_meta = [] if in_meta is None: for item in item_names: - list_item = metadata_search(Path(item).parent, [item], num_objects) + list_item = metadata_search_ISM(Path(item).parent, [item], num_objects) list_meta.append(list_item) else: if len(in_meta) == 1 and num_items != 1: @@ -479,7 +479,7 @@ def check_ISM_metadata( 'Only one metadata path is given but not with key "all_items".' ) - list_meta = metadata_search(path_meta, item_names, num_objects) + list_meta = metadata_search_ISM(path_meta, item_names, num_objects) elif num_items == len(in_meta): # search for every item individually @@ -495,7 +495,7 @@ def check_ISM_metadata( if not isinstance(current_item, list): # automatic search in folder - list_item = metadata_search( + list_item = metadata_search_ISM( current_item, [item_names[item_idx]], num_objects ) @@ -514,7 +514,55 @@ def check_ISM_metadata( return list_meta -def metadata_search( +def check_MASA_metadata( + in_meta: dict, + num_items: int, + item_names: Optional[list] = None, +) -> list: + """Find MASA metadata""" + + list_meta = [] + if in_meta is None: + for item in item_names: + list_item = metadata_search_MASA(Path(item).parent, [item]) + list_meta.append(list_item) + else: + if len(in_meta) == 1 and num_items != 1: + # automatic search for metadata files in folder for all items and objects + try: + path_meta = in_meta["all_items"] + except KeyError: + raise ValueError( + 'Only one metadata path is given but not with key "all_items".' + ) + + list_meta = metadata_search_MASA(path_meta, item_names) + + elif num_items == len(in_meta): + # search for every item individually + for item_idx in range(num_items): + # try to use item_names as keys + try: + if item_names: + current_item = in_meta[item_names[item_idx].name] + else: + raise KeyError + except KeyError: + current_item = in_meta[f"item{item_idx + 1}"] + + if not isinstance(current_item, list): + # automatic search in folder + list_item = metadata_search_MASA(current_item, [item_names[item_idx]]) + + list_meta.append(list_item) + else: + raise ValueError("Number of metadata inputs does not match number of items") + + # return list of lists of metadata files + return list_meta + + +def metadata_search_ISM( in_meta_path: Union[str, Path], item_names: list[Union[str, Path]], num_objects: int, @@ -544,6 +592,34 @@ def metadata_search( return list_meta +def metadata_search_MASA( + in_meta_path: Union[str, Path], + item_names: list[Union[str, Path]], +) -> list[list[Union[Path, str]]]: + """Search for MASA metadata with structure item_name.met in in_meta folder""" + + if not item_names: + raise ValueError("Item names not provided, can't search for metadata") + + list_meta = [] + for item in item_names: + list_item = [] + file_name_meta = in_meta_path / Path(item.stem).with_suffix( + f"{item.suffix}.met" + ) + # check if file exists and add to list + if file_name_meta.is_file(): + list_item.append(Path(file_name_meta).resolve()) + else: + raise ValueError(f"Metadata file {file_name_meta} not found.") + if len(item_names) == 1: + list_meta = list_item + else: + list_meta.append(list_item) + + return list_meta + + def add_remove_preamble( metadata, preamble, diff --git a/ivas_processing_scripts/processing/ivas.py b/ivas_processing_scripts/processing/ivas.py index 26450b5c..981fca1f 100755 --- a/ivas_processing_scripts/processing/ivas.py +++ b/ivas_processing_scripts/processing/ivas.py @@ -305,7 +305,16 @@ class IVAS(Processing): cmd.extend(["-q"]) if self.out_fmt.name.startswith("ISM") or self.out_fmt.name.startswith("MASA"): - output_format = "EXT" + # the SBA part of OSBA is always rendered to HOA3 for EXT by IVAS + if isinstance(self.in_fmt, audio.OSBAAudio) and self.in_fmt.name[:]: + if self.out_fmt.num_channels != (16 + self.in_fmt.num_ism_channels): + raise ValueError("When using EXT output for IVAS for OSBA make sure the specified decoder format is ISMxSBA3") + else: + output_format = "EXT" + else: + if self.in_fmt.name != self.out_fmt.name: + raise ValueError("ISM and MASA output format for IVAS only possible if input and output format match") + output_format = "EXT" elif self.in_fmt.name == "MONO": if self.out_fmt.name == "MONO": output_format = "" # EVS -- GitLab