Loading ivas_processing_scripts/audiotools/audio.py +208 −1 Original line number Diff line number Diff line Loading @@ -48,6 +48,8 @@ from ivas_processing_scripts.audiotools.constants import ( NUMBER_COLUMNS_ISM_METADATA, OBJECT_BASED_AUDIO_FORMATS, SCENE_BASED_AUDIO_FORMATS, OMASA_AUDIO_FORMATS, OSBA_AUDIO_FORMATS, ) from .EFAP import wrap_angles Loading Loading @@ -376,6 +378,207 @@ class SceneBasedAudio(Audio): return super()._from_filelist(name, filename, fs) class OMASAAudio(Audio): """Sub-class for combined OMASA format""" # TODO treffehn: write class def __init__(self, name: str): super().__init__(name) try: self.__dict__.update(OMASA_AUDIO_FORMATS[name.upper()]) except KeyError: raise ValueError(f"Unsupported OMASA audio format {name}") self.object_pos = [] self.metadata_files = [] # first ISM metadata followed by masa metadata # @classmethod # def _from_file( # cls, # name: str, # filename: Union[str, Path], # metadata_files_ism: list[Union[str, Path]], # fs: Optional[int] = None, # ) -> "ObjectBasedAudio": # obj = super()._from_file(name, filename, fs) # if metadata_files is not None: # obj.metadata_files = [Path(f) for f in metadata_files] # else: # # search for metadata with naming scheme: name.(wav, pcm).(0-3).csv # for obj_idx in range(obj.num_channels): # file_name_meta = filename.with_suffix( # f"{filename.suffix}.{obj_idx}.csv" # ) # if file_name_meta.is_file(): # obj.metadata_files.append(file_name_meta) # else: # raise ValueError(f"Metadata file {file_name_meta} not found.") # warn( # f"No metadata files specified: The following files were found and used: \n {*obj.metadata_files,}" # ) # # obj.init_metadata() # return obj # # @classmethod # def _from_filelist( # cls, # name: str, # filename: Path, # metadata_files: list[Union[str, Path]], # fs: Optional[int] = None, # ) -> "ObjectBasedAudio": # obj = super()._from_filelist(name, filename, fs) # obj.metadata_files = [Path(f) for f in metadata_files] # obj.init_metadata() # return obj # # def init_metadata(self): # # check if number of metadata files matches format # if self.audio.shape[1] != len(self.metadata_files): # raise ValueError( # f"Mismatch between number of channels in file [{self.audio.shape[1]}], and metadata [{len(self.metadata_files)}]" # ) # # self.object_pos = [] # for i, f in enumerate(self.metadata_files): # pos = np.genfromtxt(f, delimiter=",") # # # check if metadata has right number of columns # num_columns = pos.shape[1] # if num_columns < 2: # raise ValueError( # "Metadata incomplete. Columns are missing. Azimuth and elevation are mandatory." # ) # elif num_columns > NUMBER_COLUMNS_ISM_METADATA: # raise ValueError("Too many columns in metadata") # # # pad metadata to max number of columns # if num_columns < NUMBER_COLUMNS_ISM_METADATA: # pos = np.hstack( # [pos, np.array(pos.shape[0] * [DEFAULT_ISM_METADATA[num_columns:]])] # ) # # # check if metadata is longer than file -> cut off # num_frames = int( # np.ceil(self.audio.shape[0] / (self.fs * IVAS_FRAME_LEN_MS / 1000)) # ) # if num_frames < pos.shape[0]: # pos = pos[:num_frames] # # check if metadata is shorter than file -> loop # elif num_frames > pos.shape[0]: # pos_loop = np.zeros((num_frames, pos.shape[1])) # pos_loop[: pos.shape[0]] = pos # for idx in range(pos.shape[0], num_frames): # pos_loop[idx, :2] = pos[idx % pos.shape[0], :2] # pos = pos_loop # # # wrap metadata to target value range # for j in range(num_frames): # pos[j, 0], pos[j, 1] = wrap_angles(pos[j, 0], pos[j, 1], clip_ele=True) # # self.object_pos.append(pos) class OSBAAudio(Audio): """Sub-class for OSBA audio""" def __init__(self, name: str): super().__init__(name) try: self.__dict__.update(OSBA_AUDIO_FORMATS[name.upper()]) except KeyError: raise ValueError(f"Unsupported OSBA audio format {name}") self.object_pos = [] self.metadata_files = [] self.ambi_order = int(np.sqrt(self.num_channels-self.num_ism_channels) - 1) @classmethod def _from_file( cls, name: str, filename: Union[str, Path], metadata_files: list[Union[str, Path]], fs: Optional[int] = None, ) -> "OSBAAudio": obj = super()._from_file(name, filename, fs) if metadata_files is not None: obj.metadata_files = [Path(f) for f in metadata_files] else: # search for metadata with naming scheme: name.(wav, pcm).(0-3).csv for obj_idx in range(obj.num_ism_channels): file_name_meta = filename.with_suffix( f"{filename.suffix}.{obj_idx}.csv" ) if file_name_meta.is_file(): obj.metadata_files.append(file_name_meta) else: raise ValueError(f"Metadata file {file_name_meta} not found.") warn( f"No metadata files specified: The following files were found and used: \n {*obj.metadata_files,}" ) obj.init_metadata() return obj @classmethod def _from_filelist( cls, name: str, filename: Path, metadata_files: list[Union[str, Path]], fs: Optional[int] = None, ) -> "OSBAAudio": obj = super()._from_filelist(name, filename, fs) obj.metadata_files = [Path(f) for f in metadata_files] obj.init_metadata() return obj def init_metadata(self): # check if number of metadata files matches format if self.num_ism_channels != len(self.metadata_files): raise ValueError( f"Mismatch between number of channels in file [{self.audio.shape[1]}], and metadata [{len(self.metadata_files)}]" ) self.object_pos = [] for i, f in enumerate(self.metadata_files): pos = np.genfromtxt(f, delimiter=",") # check if metadata has right number of columns num_columns = pos.shape[1] if num_columns < 2: raise ValueError( "Metadata incomplete. Columns are missing. Azimuth and elevation are mandatory." ) elif num_columns > NUMBER_COLUMNS_ISM_METADATA: raise ValueError("Too many columns in metadata") # pad metadata to max number of columns if num_columns < NUMBER_COLUMNS_ISM_METADATA: pos = np.hstack( [pos, np.array(pos.shape[0] * [DEFAULT_ISM_METADATA[num_columns:]])] ) # check if metadata is longer than file -> cut off num_frames = int( np.ceil(self.audio.shape[0] / (self.fs * IVAS_FRAME_LEN_MS / 1000)) ) if num_frames < pos.shape[0]: pos = pos[:num_frames] # check if metadata is shorter than file -> loop elif num_frames > pos.shape[0]: pos_loop = np.zeros((num_frames, pos.shape[1])) pos_loop[: pos.shape[0]] = pos for idx in range(pos.shape[0], num_frames): pos_loop[idx, :2] = pos[idx % pos.shape[0], :2] pos = pos_loop # wrap metadata to target value range for j in range(num_frames): pos[j, 0], pos[j, 1] = wrap_angles(pos[j, 0], pos[j, 1], clip_ele=True) self.object_pos.append(pos) def _get_audio_class(fmt) -> Audio: """Return a child audio class corresponding to the specifed format""" if fmt in BINAURAL_AUDIO_FORMATS.keys(): Loading @@ -387,9 +590,13 @@ def _get_audio_class(fmt) -> Audio: elif fmt in SCENE_BASED_AUDIO_FORMATS.keys(): return SceneBasedAudio elif ( fmt in CHANNEL_BASED_AUDIO_FORMATS.keys() or CHANNEL_BASED_AUDIO_ALTNAMES.keys() fmt in CHANNEL_BASED_AUDIO_FORMATS.keys() or fmt in CHANNEL_BASED_AUDIO_ALTNAMES.keys() ): return ChannelBasedAudio elif fmt in OSBA_AUDIO_FORMATS.keys(): return OSBAAudio elif fmt in OMASA_AUDIO_FORMATS.keys(): return OMASAAudio elif Path(fmt).suffix == ".txt": return ChannelBasedAudio else: Loading ivas_processing_scripts/audiotools/constants.py +80 −0 Original line number Diff line number Diff line Loading @@ -303,6 +303,85 @@ SCENE_BASED_AUDIO_FORMATS = { }, } OMASA_AUDIO_FORMATS = { "ISM1MASA1": { "num_channels": 2, }, "ISM1MASA2": { "num_channels": 3, }, "ISM2MASA1": { "num_channels": 3, }, "ISM2MASA2": { "num_channels": 4, }, "ISM3MASA1": { "num_channels": 4, }, "ISM3MASA2": { "num_channels": 5, }, "ISM4MASA1": { "num_channels": 5, }, "ISM4MASA2": { "num_channels": 6, }, } OSBA_AUDIO_FORMATS = { "ISM1SBA1": { "num_channels": 5, "num_ism_channels": 1, }, "ISM1SBA2": { "num_channels": 10, "num_ism_channels": 1, }, "ISM1SBA3": { "num_channels": 17, "num_ism_channels": 1, }, "ISM2SBA1": { "num_channels": 6, "num_ism_channels": 2, }, "ISM2SBA2": { "num_channels": 11, "num_ism_channels": 2, }, "ISM2SBA3": { "num_channels": 18, "num_ism_channels": 2, }, "ISM3SBA1": { "num_channels": 7, "num_ism_channels": 3, }, "ISM3SBA2": { "num_channels": 12, "num_ism_channels": 3, }, "ISM3SBA3": { "num_channels": 19, "num_ism_channels": 3, }, "ISM4SBA1": { "num_channels": 8, "num_ism_channels": 4, }, "ISM4SBA2": { "num_channels": 13, "num_ism_channels": 4, }, "ISM4SBA3": { "num_channels": 20, "num_ism_channels": 4, }, } SCENE_METADATA_FORMATS = {"META"} AUDIO_FORMATS = [ Loading @@ -311,6 +390,7 @@ AUDIO_FORMATS = [ METADATA_ASSISTED_SPATIAL_AUDIO_FORMATS, OBJECT_BASED_AUDIO_FORMATS, SCENE_BASED_AUDIO_FORMATS, OMASA_AUDIO_FORMATS, ] Loading ivas_processing_scripts/audiotools/convert/__init__.py +19 −3 Original line number Diff line number Diff line Loading @@ -43,6 +43,8 @@ from ivas_processing_scripts.audiotools.convert.channelbased import convert_chan from ivas_processing_scripts.audiotools.convert.masa import convert_masa from ivas_processing_scripts.audiotools.convert.objectbased import convert_objectbased from ivas_processing_scripts.audiotools.convert.scenebased import convert_scenebased from ivas_processing_scripts.audiotools.convert.osba import convert_osba from ivas_processing_scripts.audiotools.convert.omasa import convert_omasa from ivas_processing_scripts.audiotools.wrappers.bs1770 import loudness_norm from ivas_processing_scripts.audiotools.wrappers.esdru import esdru from ivas_processing_scripts.audiotools.wrappers.filter import ( Loading Loading @@ -311,28 +313,38 @@ def format_conversion( """Convert one audio format to another""" # validation # check for MASA/OMASA as output if isinstance(output, audio.MetadataAssistedSpatialAudio) and not ( isinstance(input, audio.SceneBasedAudio) or isinstance(input, audio.MetadataAssistedSpatialAudio) ): raise NotImplementedError("Can only convert to MASA from SBA") if isinstance(output, audio.OMASAAudio) and not ( isinstance(input, audio.OSBAAudio) or isinstance(input, audio.OMASAAudio) ): raise NotImplementedError("Can only convert to OMASA from OSBA") if isinstance(output, audio.ObjectBasedAudio) and input.name != output.name: # check for ISM (also OMASA and OSBA) as output if (isinstance(output, audio.ObjectBasedAudio) or isinstance(output, audio.OMASAAudio) or isinstance(output, audio.OSBAAudio)) and input.name != output.name: raise NotImplementedError( "ISM is not supported as an output for rendering! Only usable as pass-through" "ISM (also in combined formats) is not supported as an output for rendering! Only usable as pass-through" ) if logger: logger.debug(f"Format conversion: {input.name} -> {output.name}") # format conversion # check if input and output format are the same if (fmt := input.name) == output.name or ( input.name.startswith("BINAURAL") and output.name.startswith("BINAURAL") ): output.audio = input.audio if fmt.startswith("MASA"): output.metadata_file = input.metadata_file elif fmt.startswith("ISM"): elif fmt.startswith("ISM"): # also includes combined formats output.metadata_files = list(output.metadata_files) else: if isinstance(input, audio.BinauralAudio): raise NotImplementedError( Loading @@ -346,6 +358,10 @@ def format_conversion( convert_objectbased(input, output, **kwargs) elif isinstance(input, audio.SceneBasedAudio): convert_scenebased(input, output, **kwargs) elif isinstance(input, audio.OSBAAudio): convert_osba(input, output, **kwargs) elif isinstance(input, audio.OMASAAudio): convert_omasa(input, output, **kwargs) else: raise NotImplementedError( f"Unknown or unsupported audio format {input.name}" Loading ivas_processing_scripts/audiotools/convert/omasa.py 0 → 100644 +112 −0 Original line number Diff line number Diff line #!/usr/bin/env python3 # # (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, # Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., # Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, # Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other # contributors to this repository. All Rights Reserved. # # This software is protected by copyright law and by international treaties. # The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, # Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., # Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, # Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other # contributors to this repository retain full ownership rights in their respective contributions in # the software. This notice grants no license of any kind, including but not limited to patent # license, nor is any license granted by implication, estoppel or otherwise. # # Contributors are required to enter into the IVAS codec Public Collaboration agreement before making # contributions. # # This software is provided "AS IS", without any express or implied warranties. The software is in the # development stage. It is intended exclusively for experts who have experience with such software and # solely for the purpose of inspection. All implied warranties of non-infringement, merchantability # and fitness for a particular purpose are hereby disclaimed and excluded. # # Any dispute, controversy or claim arising under or in relation to providing this software shall be # submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in # accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and # the United Nations Convention on Contracts on the International Sales of Goods. # import copy from ivas_processing_scripts.audiotools import audio from ivas_processing_scripts.audiotools.convert.objectbased import render_oba_to_binaural, render_oba_to_cba, \ render_oba_to_sba from ivas_processing_scripts.audiotools.convert.masa import render_masa_to_binaural, render_masa_to_cba, render_masa_to_sba """ OMASAAudio functions """ def convert_omasa( omasa: audio.OMASAAudio, out: audio.Audio, **kwargs, ) -> audio.Audio: """Convert an OMASA signal to the requested output format""" # split OMASA object in ISM and MASA object oba = audio.fromarray("ISM" + str(omasa.num_ism_channels), omasa.audio[:, :omasa.num_ism_channels], omasa.fs) oba.metadata_files = omasa.metadata_files oba.object_pos = omasa.object_pos masa = audio.fromarray("MASA" + str(omasa.num_channels-omasa.num_ism_channels), omasa.audio[:, omasa.num_ism_channels:], omasa.fs) # OMASA -> Binaural if isinstance(out, audio.BinauralAudio): # render MASA and ISM part separately # ISM out_ism = copy.deepcopy(out) render_oba_to_binaural(oba, out_ism, **kwargs) # MASA out_masa = copy.deepcopy(out) render_masa_to_binaural(masa, out_masa, **kwargs) # combine results out.audio = out_ism.audio + out_masa.audio # OMASA -> CBA elif isinstance(out, audio.ChannelBasedAudio): # render MASA and ISM part separately # ISM out_ism = copy.deepcopy(out) render_oba_to_cba(oba, out_ism) # MASA out_masa = copy.deepcopy(out) render_masa_to_cba(masa, out_masa) # combine results out.audio = out_ism.audio + out_masa.audio # OMASA -> SBA elif isinstance(out, audio.SceneBasedAudio): # render MASA and ISM part separately # ISM out_ism = copy.deepcopy(out) render_oba_to_sba(oba, out_ism) # MASA out_masa = copy.deepcopy(out) render_masa_to_sba(masa, out_masa) # combine results out.audio = out_ism.audio + out_masa.audio # OMASA -> OSBA elif isinstance(out, audio.OSBAAudio): # TODO (treffehn) # only render MASA part out_sba = audio.fromtype("MASA") render_masa_to_sba(masa, out_sba) out.audio[:, :omasa.num_ism_channels] = omasa.audio[:, :omasa.num_ism_channels] out.audio[:, omasa.num_ism_channels:] = out_sba.audio else: raise NotImplementedError( f"Conversion from {omasa.name} to {out.name} is unsupported!" ) return out ivas_processing_scripts/audiotools/convert/osba.py 0 → 100644 +113 −0 Original line number Diff line number Diff line #!/usr/bin/env python3 # # (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, # Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., # Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, # Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other # contributors to this repository. All Rights Reserved. # # This software is protected by copyright law and by international treaties. # The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, # Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., # Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, # Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other # contributors to this repository retain full ownership rights in their respective contributions in # the software. This notice grants no license of any kind, including but not limited to patent # license, nor is any license granted by implication, estoppel or otherwise. # # Contributors are required to enter into the IVAS codec Public Collaboration agreement before making # contributions. # # This software is provided "AS IS", without any express or implied warranties. The software is in the # development stage. It is intended exclusively for experts who have experience with such software and # solely for the purpose of inspection. All implied warranties of non-infringement, merchantability # and fitness for a particular purpose are hereby disclaimed and excluded. # # Any dispute, controversy or claim arising under or in relation to providing this software shall be # submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in # accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and # the United Nations Convention on Contracts on the International Sales of Goods. # import copy from ivas_processing_scripts.audiotools import audio from ivas_processing_scripts.audiotools.convert.objectbased import render_oba_to_binaural, render_oba_to_cba, \ render_oba_to_sba from ivas_processing_scripts.audiotools.convert.scenebased import render_sba_to_binaural, render_sba_to_cba, \ render_sba_to_sba, render_sba_to_masa """ OSBAAudio functions """ def convert_osba( osba: audio.OSBAAudio, out: audio.Audio, **kwargs, ) -> audio.Audio: """Convert an OSBA signal to the requested output format""" # split OSBA object in ISM and SBA object oba = audio.fromarray("ISM" + str(osba.num_ism_channels), osba.audio[:, :osba.num_ism_channels], osba.fs) oba.metadata_files = osba.metadata_files oba.object_pos = osba.object_pos sba = audio.fromarray("SBA" + str(osba.ambi_order), osba.audio[:, osba.num_ism_channels:], osba.fs) # OSBA -> Binaural if isinstance(out, audio.BinauralAudio): # render SBA and ISM part separately # ISM out_ism = copy.deepcopy(out) render_oba_to_binaural(oba, out_ism, **kwargs) # SBA out_sba = copy.deepcopy(out) render_sba_to_binaural(sba, out_sba, **kwargs) # combine results out.audio = out_ism.audio + out_sba.audio # OSBA -> CBA elif isinstance(out, audio.ChannelBasedAudio): # render SBA and ISM part separately # ISM out_ism = copy.deepcopy(out) render_oba_to_cba(oba, out_ism) # SBA out_sba = copy.deepcopy(out) render_sba_to_cba(sba, out_sba) # combine results out.audio = out_ism.audio + out_sba.audio # OSBA -> SBA elif isinstance(out, audio.SceneBasedAudio): # render SBA and ISM part separately # ISM out_ism = copy.deepcopy(out) render_oba_to_sba(oba, out_ism) # SBA out_sba = copy.deepcopy(out) render_sba_to_sba(sba, out_sba) # combine results out.audio = out_ism.audio + out_sba.audio # OSBA -> OMASA elif isinstance(out, audio.OMASAAudio): # TODO (treffehn) # only render SBA part out_sba = audio.fromtype("MASA") render_sba_to_masa(sba, out_sba) out.audio[:, :osba.num_ism_channels] = osba.audio[:, :osba.num_ism_channels] out.audio[:, osba.num_ism_channels:] = out_sba.audio else: raise NotImplementedError( f"Conversion from {osba.name} to {out.name} is unsupported!" ) return out Loading
ivas_processing_scripts/audiotools/audio.py +208 −1 Original line number Diff line number Diff line Loading @@ -48,6 +48,8 @@ from ivas_processing_scripts.audiotools.constants import ( NUMBER_COLUMNS_ISM_METADATA, OBJECT_BASED_AUDIO_FORMATS, SCENE_BASED_AUDIO_FORMATS, OMASA_AUDIO_FORMATS, OSBA_AUDIO_FORMATS, ) from .EFAP import wrap_angles Loading Loading @@ -376,6 +378,207 @@ class SceneBasedAudio(Audio): return super()._from_filelist(name, filename, fs) class OMASAAudio(Audio): """Sub-class for combined OMASA format""" # TODO treffehn: write class def __init__(self, name: str): super().__init__(name) try: self.__dict__.update(OMASA_AUDIO_FORMATS[name.upper()]) except KeyError: raise ValueError(f"Unsupported OMASA audio format {name}") self.object_pos = [] self.metadata_files = [] # first ISM metadata followed by masa metadata # @classmethod # def _from_file( # cls, # name: str, # filename: Union[str, Path], # metadata_files_ism: list[Union[str, Path]], # fs: Optional[int] = None, # ) -> "ObjectBasedAudio": # obj = super()._from_file(name, filename, fs) # if metadata_files is not None: # obj.metadata_files = [Path(f) for f in metadata_files] # else: # # search for metadata with naming scheme: name.(wav, pcm).(0-3).csv # for obj_idx in range(obj.num_channels): # file_name_meta = filename.with_suffix( # f"{filename.suffix}.{obj_idx}.csv" # ) # if file_name_meta.is_file(): # obj.metadata_files.append(file_name_meta) # else: # raise ValueError(f"Metadata file {file_name_meta} not found.") # warn( # f"No metadata files specified: The following files were found and used: \n {*obj.metadata_files,}" # ) # # obj.init_metadata() # return obj # # @classmethod # def _from_filelist( # cls, # name: str, # filename: Path, # metadata_files: list[Union[str, Path]], # fs: Optional[int] = None, # ) -> "ObjectBasedAudio": # obj = super()._from_filelist(name, filename, fs) # obj.metadata_files = [Path(f) for f in metadata_files] # obj.init_metadata() # return obj # # def init_metadata(self): # # check if number of metadata files matches format # if self.audio.shape[1] != len(self.metadata_files): # raise ValueError( # f"Mismatch between number of channels in file [{self.audio.shape[1]}], and metadata [{len(self.metadata_files)}]" # ) # # self.object_pos = [] # for i, f in enumerate(self.metadata_files): # pos = np.genfromtxt(f, delimiter=",") # # # check if metadata has right number of columns # num_columns = pos.shape[1] # if num_columns < 2: # raise ValueError( # "Metadata incomplete. Columns are missing. Azimuth and elevation are mandatory." # ) # elif num_columns > NUMBER_COLUMNS_ISM_METADATA: # raise ValueError("Too many columns in metadata") # # # pad metadata to max number of columns # if num_columns < NUMBER_COLUMNS_ISM_METADATA: # pos = np.hstack( # [pos, np.array(pos.shape[0] * [DEFAULT_ISM_METADATA[num_columns:]])] # ) # # # check if metadata is longer than file -> cut off # num_frames = int( # np.ceil(self.audio.shape[0] / (self.fs * IVAS_FRAME_LEN_MS / 1000)) # ) # if num_frames < pos.shape[0]: # pos = pos[:num_frames] # # check if metadata is shorter than file -> loop # elif num_frames > pos.shape[0]: # pos_loop = np.zeros((num_frames, pos.shape[1])) # pos_loop[: pos.shape[0]] = pos # for idx in range(pos.shape[0], num_frames): # pos_loop[idx, :2] = pos[idx % pos.shape[0], :2] # pos = pos_loop # # # wrap metadata to target value range # for j in range(num_frames): # pos[j, 0], pos[j, 1] = wrap_angles(pos[j, 0], pos[j, 1], clip_ele=True) # # self.object_pos.append(pos) class OSBAAudio(Audio): """Sub-class for OSBA audio""" def __init__(self, name: str): super().__init__(name) try: self.__dict__.update(OSBA_AUDIO_FORMATS[name.upper()]) except KeyError: raise ValueError(f"Unsupported OSBA audio format {name}") self.object_pos = [] self.metadata_files = [] self.ambi_order = int(np.sqrt(self.num_channels-self.num_ism_channels) - 1) @classmethod def _from_file( cls, name: str, filename: Union[str, Path], metadata_files: list[Union[str, Path]], fs: Optional[int] = None, ) -> "OSBAAudio": obj = super()._from_file(name, filename, fs) if metadata_files is not None: obj.metadata_files = [Path(f) for f in metadata_files] else: # search for metadata with naming scheme: name.(wav, pcm).(0-3).csv for obj_idx in range(obj.num_ism_channels): file_name_meta = filename.with_suffix( f"{filename.suffix}.{obj_idx}.csv" ) if file_name_meta.is_file(): obj.metadata_files.append(file_name_meta) else: raise ValueError(f"Metadata file {file_name_meta} not found.") warn( f"No metadata files specified: The following files were found and used: \n {*obj.metadata_files,}" ) obj.init_metadata() return obj @classmethod def _from_filelist( cls, name: str, filename: Path, metadata_files: list[Union[str, Path]], fs: Optional[int] = None, ) -> "OSBAAudio": obj = super()._from_filelist(name, filename, fs) obj.metadata_files = [Path(f) for f in metadata_files] obj.init_metadata() return obj def init_metadata(self): # check if number of metadata files matches format if self.num_ism_channels != len(self.metadata_files): raise ValueError( f"Mismatch between number of channels in file [{self.audio.shape[1]}], and metadata [{len(self.metadata_files)}]" ) self.object_pos = [] for i, f in enumerate(self.metadata_files): pos = np.genfromtxt(f, delimiter=",") # check if metadata has right number of columns num_columns = pos.shape[1] if num_columns < 2: raise ValueError( "Metadata incomplete. Columns are missing. Azimuth and elevation are mandatory." ) elif num_columns > NUMBER_COLUMNS_ISM_METADATA: raise ValueError("Too many columns in metadata") # pad metadata to max number of columns if num_columns < NUMBER_COLUMNS_ISM_METADATA: pos = np.hstack( [pos, np.array(pos.shape[0] * [DEFAULT_ISM_METADATA[num_columns:]])] ) # check if metadata is longer than file -> cut off num_frames = int( np.ceil(self.audio.shape[0] / (self.fs * IVAS_FRAME_LEN_MS / 1000)) ) if num_frames < pos.shape[0]: pos = pos[:num_frames] # check if metadata is shorter than file -> loop elif num_frames > pos.shape[0]: pos_loop = np.zeros((num_frames, pos.shape[1])) pos_loop[: pos.shape[0]] = pos for idx in range(pos.shape[0], num_frames): pos_loop[idx, :2] = pos[idx % pos.shape[0], :2] pos = pos_loop # wrap metadata to target value range for j in range(num_frames): pos[j, 0], pos[j, 1] = wrap_angles(pos[j, 0], pos[j, 1], clip_ele=True) self.object_pos.append(pos) def _get_audio_class(fmt) -> Audio: """Return a child audio class corresponding to the specifed format""" if fmt in BINAURAL_AUDIO_FORMATS.keys(): Loading @@ -387,9 +590,13 @@ def _get_audio_class(fmt) -> Audio: elif fmt in SCENE_BASED_AUDIO_FORMATS.keys(): return SceneBasedAudio elif ( fmt in CHANNEL_BASED_AUDIO_FORMATS.keys() or CHANNEL_BASED_AUDIO_ALTNAMES.keys() fmt in CHANNEL_BASED_AUDIO_FORMATS.keys() or fmt in CHANNEL_BASED_AUDIO_ALTNAMES.keys() ): return ChannelBasedAudio elif fmt in OSBA_AUDIO_FORMATS.keys(): return OSBAAudio elif fmt in OMASA_AUDIO_FORMATS.keys(): return OMASAAudio elif Path(fmt).suffix == ".txt": return ChannelBasedAudio else: Loading
ivas_processing_scripts/audiotools/constants.py +80 −0 Original line number Diff line number Diff line Loading @@ -303,6 +303,85 @@ SCENE_BASED_AUDIO_FORMATS = { }, } OMASA_AUDIO_FORMATS = { "ISM1MASA1": { "num_channels": 2, }, "ISM1MASA2": { "num_channels": 3, }, "ISM2MASA1": { "num_channels": 3, }, "ISM2MASA2": { "num_channels": 4, }, "ISM3MASA1": { "num_channels": 4, }, "ISM3MASA2": { "num_channels": 5, }, "ISM4MASA1": { "num_channels": 5, }, "ISM4MASA2": { "num_channels": 6, }, } OSBA_AUDIO_FORMATS = { "ISM1SBA1": { "num_channels": 5, "num_ism_channels": 1, }, "ISM1SBA2": { "num_channels": 10, "num_ism_channels": 1, }, "ISM1SBA3": { "num_channels": 17, "num_ism_channels": 1, }, "ISM2SBA1": { "num_channels": 6, "num_ism_channels": 2, }, "ISM2SBA2": { "num_channels": 11, "num_ism_channels": 2, }, "ISM2SBA3": { "num_channels": 18, "num_ism_channels": 2, }, "ISM3SBA1": { "num_channels": 7, "num_ism_channels": 3, }, "ISM3SBA2": { "num_channels": 12, "num_ism_channels": 3, }, "ISM3SBA3": { "num_channels": 19, "num_ism_channels": 3, }, "ISM4SBA1": { "num_channels": 8, "num_ism_channels": 4, }, "ISM4SBA2": { "num_channels": 13, "num_ism_channels": 4, }, "ISM4SBA3": { "num_channels": 20, "num_ism_channels": 4, }, } SCENE_METADATA_FORMATS = {"META"} AUDIO_FORMATS = [ Loading @@ -311,6 +390,7 @@ AUDIO_FORMATS = [ METADATA_ASSISTED_SPATIAL_AUDIO_FORMATS, OBJECT_BASED_AUDIO_FORMATS, SCENE_BASED_AUDIO_FORMATS, OMASA_AUDIO_FORMATS, ] Loading
ivas_processing_scripts/audiotools/convert/__init__.py +19 −3 Original line number Diff line number Diff line Loading @@ -43,6 +43,8 @@ from ivas_processing_scripts.audiotools.convert.channelbased import convert_chan from ivas_processing_scripts.audiotools.convert.masa import convert_masa from ivas_processing_scripts.audiotools.convert.objectbased import convert_objectbased from ivas_processing_scripts.audiotools.convert.scenebased import convert_scenebased from ivas_processing_scripts.audiotools.convert.osba import convert_osba from ivas_processing_scripts.audiotools.convert.omasa import convert_omasa from ivas_processing_scripts.audiotools.wrappers.bs1770 import loudness_norm from ivas_processing_scripts.audiotools.wrappers.esdru import esdru from ivas_processing_scripts.audiotools.wrappers.filter import ( Loading Loading @@ -311,28 +313,38 @@ def format_conversion( """Convert one audio format to another""" # validation # check for MASA/OMASA as output if isinstance(output, audio.MetadataAssistedSpatialAudio) and not ( isinstance(input, audio.SceneBasedAudio) or isinstance(input, audio.MetadataAssistedSpatialAudio) ): raise NotImplementedError("Can only convert to MASA from SBA") if isinstance(output, audio.OMASAAudio) and not ( isinstance(input, audio.OSBAAudio) or isinstance(input, audio.OMASAAudio) ): raise NotImplementedError("Can only convert to OMASA from OSBA") if isinstance(output, audio.ObjectBasedAudio) and input.name != output.name: # check for ISM (also OMASA and OSBA) as output if (isinstance(output, audio.ObjectBasedAudio) or isinstance(output, audio.OMASAAudio) or isinstance(output, audio.OSBAAudio)) and input.name != output.name: raise NotImplementedError( "ISM is not supported as an output for rendering! Only usable as pass-through" "ISM (also in combined formats) is not supported as an output for rendering! Only usable as pass-through" ) if logger: logger.debug(f"Format conversion: {input.name} -> {output.name}") # format conversion # check if input and output format are the same if (fmt := input.name) == output.name or ( input.name.startswith("BINAURAL") and output.name.startswith("BINAURAL") ): output.audio = input.audio if fmt.startswith("MASA"): output.metadata_file = input.metadata_file elif fmt.startswith("ISM"): elif fmt.startswith("ISM"): # also includes combined formats output.metadata_files = list(output.metadata_files) else: if isinstance(input, audio.BinauralAudio): raise NotImplementedError( Loading @@ -346,6 +358,10 @@ def format_conversion( convert_objectbased(input, output, **kwargs) elif isinstance(input, audio.SceneBasedAudio): convert_scenebased(input, output, **kwargs) elif isinstance(input, audio.OSBAAudio): convert_osba(input, output, **kwargs) elif isinstance(input, audio.OMASAAudio): convert_omasa(input, output, **kwargs) else: raise NotImplementedError( f"Unknown or unsupported audio format {input.name}" Loading
ivas_processing_scripts/audiotools/convert/omasa.py 0 → 100644 +112 −0 Original line number Diff line number Diff line #!/usr/bin/env python3 # # (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, # Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., # Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, # Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other # contributors to this repository. All Rights Reserved. # # This software is protected by copyright law and by international treaties. # The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, # Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., # Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, # Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other # contributors to this repository retain full ownership rights in their respective contributions in # the software. This notice grants no license of any kind, including but not limited to patent # license, nor is any license granted by implication, estoppel or otherwise. # # Contributors are required to enter into the IVAS codec Public Collaboration agreement before making # contributions. # # This software is provided "AS IS", without any express or implied warranties. The software is in the # development stage. It is intended exclusively for experts who have experience with such software and # solely for the purpose of inspection. All implied warranties of non-infringement, merchantability # and fitness for a particular purpose are hereby disclaimed and excluded. # # Any dispute, controversy or claim arising under or in relation to providing this software shall be # submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in # accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and # the United Nations Convention on Contracts on the International Sales of Goods. # import copy from ivas_processing_scripts.audiotools import audio from ivas_processing_scripts.audiotools.convert.objectbased import render_oba_to_binaural, render_oba_to_cba, \ render_oba_to_sba from ivas_processing_scripts.audiotools.convert.masa import render_masa_to_binaural, render_masa_to_cba, render_masa_to_sba """ OMASAAudio functions """ def convert_omasa( omasa: audio.OMASAAudio, out: audio.Audio, **kwargs, ) -> audio.Audio: """Convert an OMASA signal to the requested output format""" # split OMASA object in ISM and MASA object oba = audio.fromarray("ISM" + str(omasa.num_ism_channels), omasa.audio[:, :omasa.num_ism_channels], omasa.fs) oba.metadata_files = omasa.metadata_files oba.object_pos = omasa.object_pos masa = audio.fromarray("MASA" + str(omasa.num_channels-omasa.num_ism_channels), omasa.audio[:, omasa.num_ism_channels:], omasa.fs) # OMASA -> Binaural if isinstance(out, audio.BinauralAudio): # render MASA and ISM part separately # ISM out_ism = copy.deepcopy(out) render_oba_to_binaural(oba, out_ism, **kwargs) # MASA out_masa = copy.deepcopy(out) render_masa_to_binaural(masa, out_masa, **kwargs) # combine results out.audio = out_ism.audio + out_masa.audio # OMASA -> CBA elif isinstance(out, audio.ChannelBasedAudio): # render MASA and ISM part separately # ISM out_ism = copy.deepcopy(out) render_oba_to_cba(oba, out_ism) # MASA out_masa = copy.deepcopy(out) render_masa_to_cba(masa, out_masa) # combine results out.audio = out_ism.audio + out_masa.audio # OMASA -> SBA elif isinstance(out, audio.SceneBasedAudio): # render MASA and ISM part separately # ISM out_ism = copy.deepcopy(out) render_oba_to_sba(oba, out_ism) # MASA out_masa = copy.deepcopy(out) render_masa_to_sba(masa, out_masa) # combine results out.audio = out_ism.audio + out_masa.audio # OMASA -> OSBA elif isinstance(out, audio.OSBAAudio): # TODO (treffehn) # only render MASA part out_sba = audio.fromtype("MASA") render_masa_to_sba(masa, out_sba) out.audio[:, :omasa.num_ism_channels] = omasa.audio[:, :omasa.num_ism_channels] out.audio[:, omasa.num_ism_channels:] = out_sba.audio else: raise NotImplementedError( f"Conversion from {omasa.name} to {out.name} is unsupported!" ) return out
ivas_processing_scripts/audiotools/convert/osba.py 0 → 100644 +113 −0 Original line number Diff line number Diff line #!/usr/bin/env python3 # # (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, # Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., # Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, # Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other # contributors to this repository. All Rights Reserved. # # This software is protected by copyright law and by international treaties. # The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, # Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., # Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, # Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other # contributors to this repository retain full ownership rights in their respective contributions in # the software. This notice grants no license of any kind, including but not limited to patent # license, nor is any license granted by implication, estoppel or otherwise. # # Contributors are required to enter into the IVAS codec Public Collaboration agreement before making # contributions. # # This software is provided "AS IS", without any express or implied warranties. The software is in the # development stage. It is intended exclusively for experts who have experience with such software and # solely for the purpose of inspection. All implied warranties of non-infringement, merchantability # and fitness for a particular purpose are hereby disclaimed and excluded. # # Any dispute, controversy or claim arising under or in relation to providing this software shall be # submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in # accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and # the United Nations Convention on Contracts on the International Sales of Goods. # import copy from ivas_processing_scripts.audiotools import audio from ivas_processing_scripts.audiotools.convert.objectbased import render_oba_to_binaural, render_oba_to_cba, \ render_oba_to_sba from ivas_processing_scripts.audiotools.convert.scenebased import render_sba_to_binaural, render_sba_to_cba, \ render_sba_to_sba, render_sba_to_masa """ OSBAAudio functions """ def convert_osba( osba: audio.OSBAAudio, out: audio.Audio, **kwargs, ) -> audio.Audio: """Convert an OSBA signal to the requested output format""" # split OSBA object in ISM and SBA object oba = audio.fromarray("ISM" + str(osba.num_ism_channels), osba.audio[:, :osba.num_ism_channels], osba.fs) oba.metadata_files = osba.metadata_files oba.object_pos = osba.object_pos sba = audio.fromarray("SBA" + str(osba.ambi_order), osba.audio[:, osba.num_ism_channels:], osba.fs) # OSBA -> Binaural if isinstance(out, audio.BinauralAudio): # render SBA and ISM part separately # ISM out_ism = copy.deepcopy(out) render_oba_to_binaural(oba, out_ism, **kwargs) # SBA out_sba = copy.deepcopy(out) render_sba_to_binaural(sba, out_sba, **kwargs) # combine results out.audio = out_ism.audio + out_sba.audio # OSBA -> CBA elif isinstance(out, audio.ChannelBasedAudio): # render SBA and ISM part separately # ISM out_ism = copy.deepcopy(out) render_oba_to_cba(oba, out_ism) # SBA out_sba = copy.deepcopy(out) render_sba_to_cba(sba, out_sba) # combine results out.audio = out_ism.audio + out_sba.audio # OSBA -> SBA elif isinstance(out, audio.SceneBasedAudio): # render SBA and ISM part separately # ISM out_ism = copy.deepcopy(out) render_oba_to_sba(oba, out_ism) # SBA out_sba = copy.deepcopy(out) render_sba_to_sba(sba, out_sba) # combine results out.audio = out_ism.audio + out_sba.audio # OSBA -> OMASA elif isinstance(out, audio.OMASAAudio): # TODO (treffehn) # only render SBA part out_sba = audio.fromtype("MASA") render_sba_to_masa(sba, out_sba) out.audio[:, :osba.num_ism_channels] = osba.audio[:, :osba.num_ism_channels] out.audio[:, osba.num_ism_channels:] = out_sba.audio else: raise NotImplementedError( f"Conversion from {osba.name} to {out.name} is unsupported!" ) return out