added omasa and osba to audio formats (1b61b072) · Commits · IVAS Codec Public Collaboration / IVAS Processing Scripts

ivas_processing_scripts/audiotools/audio.py

+208 −1

Original line number	Diff line number	Diff line
		@@ -48,6 +48,8 @@ from ivas_processing_scripts.audiotools.constants import (
		NUMBER_COLUMNS_ISM_METADATA,
		OBJECT_BASED_AUDIO_FORMATS,
		SCENE_BASED_AUDIO_FORMATS,
		OMASA_AUDIO_FORMATS,
		OSBA_AUDIO_FORMATS,
		)

		from .EFAP import wrap_angles
		@@ -376,6 +378,207 @@ class SceneBasedAudio(Audio):
		return super()._from_filelist(name, filename, fs)


		class OMASAAudio(Audio):
		"""Sub-class for combined OMASA format"""
		# TODO treffehn: write class
		def __init__(self, name: str):
		super().__init__(name)
		try:
		self.__dict__.update(OMASA_AUDIO_FORMATS[name.upper()])
		except KeyError:
		raise ValueError(f"Unsupported OMASA audio format {name}")
		self.object_pos = []
		self.metadata_files = [] # first ISM metadata followed by masa metadata

		# @classmethod
		# def _from_file(
		# cls,
		# name: str,
		# filename: Union[str, Path],
		# metadata_files_ism: list[Union[str, Path]],
		# fs: Optional[int] = None,
		# ) -> "ObjectBasedAudio":
		# obj = super()._from_file(name, filename, fs)
		# if metadata_files is not None:
		# obj.metadata_files = [Path(f) for f in metadata_files]
		# else:
		# # search for metadata with naming scheme: name.(wav, pcm).(0-3).csv
		# for obj_idx in range(obj.num_channels):
		# file_name_meta = filename.with_suffix(
		# f"{filename.suffix}.{obj_idx}.csv"
		# )
		# if file_name_meta.is_file():
		# obj.metadata_files.append(file_name_meta)
		# else:
		# raise ValueError(f"Metadata file {file_name_meta} not found.")
		# warn(
		# f"No metadata files specified: The following files were found and used: \n {*obj.metadata_files,}"
		# )
		#
		# obj.init_metadata()
		# return obj
		#
		# @classmethod
		# def _from_filelist(
		# cls,
		# name: str,
		# filename: Path,
		# metadata_files: list[Union[str, Path]],
		# fs: Optional[int] = None,
		# ) -> "ObjectBasedAudio":
		# obj = super()._from_filelist(name, filename, fs)
		# obj.metadata_files = [Path(f) for f in metadata_files]
		# obj.init_metadata()
		# return obj
		#
		# def init_metadata(self):
		# # check if number of metadata files matches format
		# if self.audio.shape[1] != len(self.metadata_files):
		# raise ValueError(
		# f"Mismatch between number of channels in file [{self.audio.shape[1]}], and metadata [{len(self.metadata_files)}]"
		# )
		#
		# self.object_pos = []
		# for i, f in enumerate(self.metadata_files):
		# pos = np.genfromtxt(f, delimiter=",")
		#
		# # check if metadata has right number of columns
		# num_columns = pos.shape[1]
		# if num_columns < 2:
		# raise ValueError(
		# "Metadata incomplete. Columns are missing. Azimuth and elevation are mandatory."
		# )
		# elif num_columns > NUMBER_COLUMNS_ISM_METADATA:
		# raise ValueError("Too many columns in metadata")
		#
		# # pad metadata to max number of columns
		# if num_columns < NUMBER_COLUMNS_ISM_METADATA:
		# pos = np.hstack(
		# [pos, np.array(pos.shape[0] * [DEFAULT_ISM_METADATA[num_columns:]])]
		# )
		#
		# # check if metadata is longer than file -> cut off
		# num_frames = int(
		# np.ceil(self.audio.shape[0] / (self.fs * IVAS_FRAME_LEN_MS / 1000))
		# )
		# if num_frames < pos.shape[0]:
		# pos = pos[:num_frames]
		# # check if metadata is shorter than file -> loop
		# elif num_frames > pos.shape[0]:
		# pos_loop = np.zeros((num_frames, pos.shape[1]))
		# pos_loop[: pos.shape[0]] = pos
		# for idx in range(pos.shape[0], num_frames):
		# pos_loop[idx, :2] = pos[idx % pos.shape[0], :2]
		# pos = pos_loop
		#
		# # wrap metadata to target value range
		# for j in range(num_frames):
		# pos[j, 0], pos[j, 1] = wrap_angles(pos[j, 0], pos[j, 1], clip_ele=True)
		#
		# self.object_pos.append(pos)


		class OSBAAudio(Audio):
		"""Sub-class for OSBA audio"""

		def __init__(self, name: str):
		super().__init__(name)
		try:
		self.__dict__.update(OSBA_AUDIO_FORMATS[name.upper()])
		except KeyError:
		raise ValueError(f"Unsupported OSBA audio format {name}")
		self.object_pos = []
		self.metadata_files = []
		self.ambi_order = int(np.sqrt(self.num_channels-self.num_ism_channels) - 1)

		@classmethod
		def _from_file(
		cls,
		name: str,
		filename: Union[str, Path],
		metadata_files: list[Union[str, Path]],
		fs: Optional[int] = None,
		) -> "OSBAAudio":
		obj = super()._from_file(name, filename, fs)
		if metadata_files is not None:
		obj.metadata_files = [Path(f) for f in metadata_files]
		else:
		# search for metadata with naming scheme: name.(wav, pcm).(0-3).csv
		for obj_idx in range(obj.num_ism_channels):
		file_name_meta = filename.with_suffix(
		f"{filename.suffix}.{obj_idx}.csv"
		)
		if file_name_meta.is_file():
		obj.metadata_files.append(file_name_meta)
		else:
		raise ValueError(f"Metadata file {file_name_meta} not found.")
		warn(
		f"No metadata files specified: The following files were found and used: \n {*obj.metadata_files,}"
		)

		obj.init_metadata()
		return obj

		@classmethod
		def _from_filelist(
		cls,
		name: str,
		filename: Path,
		metadata_files: list[Union[str, Path]],
		fs: Optional[int] = None,
		) -> "OSBAAudio":
		obj = super()._from_filelist(name, filename, fs)
		obj.metadata_files = [Path(f) for f in metadata_files]
		obj.init_metadata()
		return obj

		def init_metadata(self):
		# check if number of metadata files matches format
		if self.num_ism_channels != len(self.metadata_files):
		raise ValueError(
		f"Mismatch between number of channels in file [{self.audio.shape[1]}], and metadata [{len(self.metadata_files)}]"
		)

		self.object_pos = []
		for i, f in enumerate(self.metadata_files):
		pos = np.genfromtxt(f, delimiter=",")

		# check if metadata has right number of columns
		num_columns = pos.shape[1]
		if num_columns < 2:
		raise ValueError(
		"Metadata incomplete. Columns are missing. Azimuth and elevation are mandatory."
		)
		elif num_columns > NUMBER_COLUMNS_ISM_METADATA:
		raise ValueError("Too many columns in metadata")

		# pad metadata to max number of columns
		if num_columns < NUMBER_COLUMNS_ISM_METADATA:
		pos = np.hstack(
		[pos, np.array(pos.shape[0] * [DEFAULT_ISM_METADATA[num_columns:]])]
		)

		# check if metadata is longer than file -> cut off
		num_frames = int(
		np.ceil(self.audio.shape[0] / (self.fs * IVAS_FRAME_LEN_MS / 1000))
		)
		if num_frames < pos.shape[0]:
		pos = pos[:num_frames]
		# check if metadata is shorter than file -> loop
		elif num_frames > pos.shape[0]:
		pos_loop = np.zeros((num_frames, pos.shape[1]))
		pos_loop[: pos.shape[0]] = pos
		for idx in range(pos.shape[0], num_frames):
		pos_loop[idx, :2] = pos[idx % pos.shape[0], :2]
		pos = pos_loop

		# wrap metadata to target value range
		for j in range(num_frames):
		pos[j, 0], pos[j, 1] = wrap_angles(pos[j, 0], pos[j, 1], clip_ele=True)

		self.object_pos.append(pos)


		def _get_audio_class(fmt) -> Audio:
		"""Return a child audio class corresponding to the specifed format"""
		if fmt in BINAURAL_AUDIO_FORMATS.keys():
		@@ -387,9 +590,13 @@ def _get_audio_class(fmt) -> Audio:
		elif fmt in SCENE_BASED_AUDIO_FORMATS.keys():
		return SceneBasedAudio
		elif (
		fmt in CHANNEL_BASED_AUDIO_FORMATS.keys() or CHANNEL_BASED_AUDIO_ALTNAMES.keys()
		fmt in CHANNEL_BASED_AUDIO_FORMATS.keys() or fmt in CHANNEL_BASED_AUDIO_ALTNAMES.keys()
		):
		return ChannelBasedAudio
		elif fmt in OSBA_AUDIO_FORMATS.keys():
		return OSBAAudio
		elif fmt in OMASA_AUDIO_FORMATS.keys():
		return OMASAAudio
		elif Path(fmt).suffix == ".txt":
		return ChannelBasedAudio
		else:

ivas_processing_scripts/audiotools/constants.py

+80 −0

Original line number	Diff line number	Diff line
		@@ -303,6 +303,85 @@ SCENE_BASED_AUDIO_FORMATS = {
		},
		}

		OMASA_AUDIO_FORMATS = {
		"ISM1MASA1": {
		"num_channels": 2,
		},
		"ISM1MASA2": {
		"num_channels": 3,
		},
		"ISM2MASA1": {
		"num_channels": 3,
		},
		"ISM2MASA2": {
		"num_channels": 4,
		},
		"ISM3MASA1": {
		"num_channels": 4,
		},
		"ISM3MASA2": {
		"num_channels": 5,
		},
		"ISM4MASA1": {
		"num_channels": 5,
		},
		"ISM4MASA2": {
		"num_channels": 6,
		},
		}


		OSBA_AUDIO_FORMATS = {
		"ISM1SBA1": {
		"num_channels": 5,
		"num_ism_channels": 1,
		},
		"ISM1SBA2": {
		"num_channels": 10,
		"num_ism_channels": 1,
		},
		"ISM1SBA3": {
		"num_channels": 17,
		"num_ism_channels": 1,
		},
		"ISM2SBA1": {
		"num_channels": 6,
		"num_ism_channels": 2,
		},
		"ISM2SBA2": {
		"num_channels": 11,
		"num_ism_channels": 2,
		},
		"ISM2SBA3": {
		"num_channels": 18,
		"num_ism_channels": 2,
		},
		"ISM3SBA1": {
		"num_channels": 7,
		"num_ism_channels": 3,
		},
		"ISM3SBA2": {
		"num_channels": 12,
		"num_ism_channels": 3,
		},
		"ISM3SBA3": {
		"num_channels": 19,
		"num_ism_channels": 3,
		},
		"ISM4SBA1": {
		"num_channels": 8,
		"num_ism_channels": 4,
		},
		"ISM4SBA2": {
		"num_channels": 13,
		"num_ism_channels": 4,
		},
		"ISM4SBA3": {
		"num_channels": 20,
		"num_ism_channels": 4,
		},
		}

		SCENE_METADATA_FORMATS = {"META"}

		AUDIO_FORMATS = [
		@@ -311,6 +390,7 @@ AUDIO_FORMATS = [
		METADATA_ASSISTED_SPATIAL_AUDIO_FORMATS,
		OBJECT_BASED_AUDIO_FORMATS,
		SCENE_BASED_AUDIO_FORMATS,
		OMASA_AUDIO_FORMATS,
		]

ivas_processing_scripts/audiotools/convert/init.py

+19 −3

Original line number	Diff line number	Diff line
		@@ -43,6 +43,8 @@ from ivas_processing_scripts.audiotools.convert.channelbased import convert_chan
		from ivas_processing_scripts.audiotools.convert.masa import convert_masa
		from ivas_processing_scripts.audiotools.convert.objectbased import convert_objectbased
		from ivas_processing_scripts.audiotools.convert.scenebased import convert_scenebased
		from ivas_processing_scripts.audiotools.convert.osba import convert_osba
		from ivas_processing_scripts.audiotools.convert.omasa import convert_omasa
		from ivas_processing_scripts.audiotools.wrappers.bs1770 import loudness_norm
		from ivas_processing_scripts.audiotools.wrappers.esdru import esdru
		from ivas_processing_scripts.audiotools.wrappers.filter import (
		@@ -311,28 +313,38 @@ def format_conversion(
		"""Convert one audio format to another"""

		# validation
		# check for MASA/OMASA as output
		if isinstance(output, audio.MetadataAssistedSpatialAudio) and not (
		isinstance(input, audio.SceneBasedAudio)
		or isinstance(input, audio.MetadataAssistedSpatialAudio)
		):
		raise NotImplementedError("Can only convert to MASA from SBA")
		if isinstance(output, audio.OMASAAudio) and not (
		isinstance(input, audio.OSBAAudio)
		or isinstance(input, audio.OMASAAudio)
		):
		raise NotImplementedError("Can only convert to OMASA from OSBA")

		if isinstance(output, audio.ObjectBasedAudio) and input.name != output.name:
		# check for ISM (also OMASA and OSBA) as output
		if (isinstance(output, audio.ObjectBasedAudio) or isinstance(output, audio.OMASAAudio) or isinstance(output, audio.OSBAAudio)) and input.name != output.name:
		raise NotImplementedError(
		"ISM is not supported as an output for rendering! Only usable as pass-through"
		"ISM (also in combined formats) is not supported as an output for rendering! Only usable as pass-through"
		)

		if logger:
		logger.debug(f"Format conversion: {input.name} -> {output.name}")

		# format conversion
		# check if input and output format are the same
		if (fmt := input.name) == output.name or (
		input.name.startswith("BINAURAL") and output.name.startswith("BINAURAL")
		):
		output.audio = input.audio
		if fmt.startswith("MASA"):
		output.metadata_file = input.metadata_file
		elif fmt.startswith("ISM"):
		elif fmt.startswith("ISM"): # also includes combined formats
		output.metadata_files = list(output.metadata_files)

		else:
		if isinstance(input, audio.BinauralAudio):
		raise NotImplementedError(
		@@ -346,6 +358,10 @@ def format_conversion(
		convert_objectbased(input, output, **kwargs)
		elif isinstance(input, audio.SceneBasedAudio):
		convert_scenebased(input, output, **kwargs)
		elif isinstance(input, audio.OSBAAudio):
		convert_osba(input, output, **kwargs)
		elif isinstance(input, audio.OMASAAudio):
		convert_omasa(input, output, **kwargs)
		else:
		raise NotImplementedError(
		f"Unknown or unsupported audio format {input.name}"

ivas_processing_scripts/audiotools/convert/omasa.py

0 → 100644

+112 −0

Original line number	Diff line number	Diff line
		#!/usr/bin/env python3
		#
		# (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
		# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
		# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
		# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
		# contributors to this repository. All Rights Reserved.
		#
		# This software is protected by copyright law and by international treaties.
		# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
		# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
		# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
		# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
		# contributors to this repository retain full ownership rights in their respective contributions in
		# the software. This notice grants no license of any kind, including but not limited to patent
		# license, nor is any license granted by implication, estoppel or otherwise.
		#
		# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
		# contributions.
		#
		# This software is provided "AS IS", without any express or implied warranties. The software is in the
		# development stage. It is intended exclusively for experts who have experience with such software and
		# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
		# and fitness for a particular purpose are hereby disclaimed and excluded.
		#
		# Any dispute, controversy or claim arising under or in relation to providing this software shall be
		# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
		# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
		# the United Nations Convention on Contracts on the International Sales of Goods.
		#

		import copy

		from ivas_processing_scripts.audiotools import audio
		from ivas_processing_scripts.audiotools.convert.objectbased import render_oba_to_binaural, render_oba_to_cba, \
		render_oba_to_sba
		from ivas_processing_scripts.audiotools.convert.masa import render_masa_to_binaural, render_masa_to_cba, render_masa_to_sba

		""" OMASAAudio functions """


		def convert_omasa(
		omasa: audio.OMASAAudio,
		out: audio.Audio,
		**kwargs,
		) -> audio.Audio:
		"""Convert an OMASA signal to the requested output format"""

		# split OMASA object in ISM and MASA object
		oba = audio.fromarray("ISM" + str(omasa.num_ism_channels), omasa.audio[:, :omasa.num_ism_channels], omasa.fs)
		oba.metadata_files = omasa.metadata_files
		oba.object_pos = omasa.object_pos
		masa = audio.fromarray("MASA" + str(omasa.num_channels-omasa.num_ism_channels), omasa.audio[:, omasa.num_ism_channels:], omasa.fs)

		# OMASA -> Binaural
		if isinstance(out, audio.BinauralAudio):
		# render MASA and ISM part separately
		# ISM
		out_ism = copy.deepcopy(out)
		render_oba_to_binaural(oba, out_ism, **kwargs)

		# MASA
		out_masa = copy.deepcopy(out)
		render_masa_to_binaural(masa, out_masa, **kwargs)

		# combine results
		out.audio = out_ism.audio + out_masa.audio

		# OMASA -> CBA
		elif isinstance(out, audio.ChannelBasedAudio):
		# render MASA and ISM part separately
		# ISM
		out_ism = copy.deepcopy(out)
		render_oba_to_cba(oba, out_ism)

		# MASA
		out_masa = copy.deepcopy(out)
		render_masa_to_cba(masa, out_masa)

		# combine results
		out.audio = out_ism.audio + out_masa.audio

		# OMASA -> SBA
		elif isinstance(out, audio.SceneBasedAudio):
		# render MASA and ISM part separately
		# ISM
		out_ism = copy.deepcopy(out)
		render_oba_to_sba(oba, out_ism)

		# MASA
		out_masa = copy.deepcopy(out)
		render_masa_to_sba(masa, out_masa)

		# combine results
		out.audio = out_ism.audio + out_masa.audio

		# OMASA -> OSBA
		elif isinstance(out, audio.OSBAAudio):
		# TODO (treffehn)
		# only render MASA part
		out_sba = audio.fromtype("MASA")
		render_masa_to_sba(masa, out_sba)

		out.audio[:, :omasa.num_ism_channels] = omasa.audio[:, :omasa.num_ism_channels]
		out.audio[:, omasa.num_ism_channels:] = out_sba.audio

		else:
		raise NotImplementedError(
		f"Conversion from {omasa.name} to {out.name} is unsupported!"
		)

		return out

ivas_processing_scripts/audiotools/convert/osba.py

0 → 100644

+113 −0

Original line number	Diff line number	Diff line
		#!/usr/bin/env python3
		#
		# (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
		# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
		# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
		# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
		# contributors to this repository. All Rights Reserved.
		#
		# This software is protected by copyright law and by international treaties.
		# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
		# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
		# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
		# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
		# contributors to this repository retain full ownership rights in their respective contributions in
		# the software. This notice grants no license of any kind, including but not limited to patent
		# license, nor is any license granted by implication, estoppel or otherwise.
		#
		# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
		# contributions.
		#
		# This software is provided "AS IS", without any express or implied warranties. The software is in the
		# development stage. It is intended exclusively for experts who have experience with such software and
		# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
		# and fitness for a particular purpose are hereby disclaimed and excluded.
		#
		# Any dispute, controversy or claim arising under or in relation to providing this software shall be
		# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
		# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
		# the United Nations Convention on Contracts on the International Sales of Goods.
		#

		import copy

		from ivas_processing_scripts.audiotools import audio
		from ivas_processing_scripts.audiotools.convert.objectbased import render_oba_to_binaural, render_oba_to_cba, \
		render_oba_to_sba
		from ivas_processing_scripts.audiotools.convert.scenebased import render_sba_to_binaural, render_sba_to_cba, \
		render_sba_to_sba, render_sba_to_masa

		""" OSBAAudio functions """


		def convert_osba(
		osba: audio.OSBAAudio,
		out: audio.Audio,
		**kwargs,
		) -> audio.Audio:
		"""Convert an OSBA signal to the requested output format"""

		# split OSBA object in ISM and SBA object
		oba = audio.fromarray("ISM" + str(osba.num_ism_channels), osba.audio[:, :osba.num_ism_channels], osba.fs)
		oba.metadata_files = osba.metadata_files
		oba.object_pos = osba.object_pos
		sba = audio.fromarray("SBA" + str(osba.ambi_order), osba.audio[:, osba.num_ism_channels:], osba.fs)

		# OSBA -> Binaural
		if isinstance(out, audio.BinauralAudio):
		# render SBA and ISM part separately
		# ISM
		out_ism = copy.deepcopy(out)
		render_oba_to_binaural(oba, out_ism, **kwargs)

		# SBA
		out_sba = copy.deepcopy(out)
		render_sba_to_binaural(sba, out_sba, **kwargs)

		# combine results
		out.audio = out_ism.audio + out_sba.audio

		# OSBA -> CBA
		elif isinstance(out, audio.ChannelBasedAudio):
		# render SBA and ISM part separately
		# ISM
		out_ism = copy.deepcopy(out)
		render_oba_to_cba(oba, out_ism)

		# SBA
		out_sba = copy.deepcopy(out)
		render_sba_to_cba(sba, out_sba)

		# combine results
		out.audio = out_ism.audio + out_sba.audio

		# OSBA -> SBA
		elif isinstance(out, audio.SceneBasedAudio):
		# render SBA and ISM part separately
		# ISM
		out_ism = copy.deepcopy(out)
		render_oba_to_sba(oba, out_ism)

		# SBA
		out_sba = copy.deepcopy(out)
		render_sba_to_sba(sba, out_sba)

		# combine results
		out.audio = out_ism.audio + out_sba.audio

		# OSBA -> OMASA
		elif isinstance(out, audio.OMASAAudio):
		# TODO (treffehn)
		# only render SBA part
		out_sba = audio.fromtype("MASA")
		render_sba_to_masa(sba, out_sba)

		out.audio[:, :osba.num_ism_channels] = osba.audio[:, :osba.num_ism_channels]
		out.audio[:, osba.num_ism_channels:] = out_sba.audio

		else:
		raise NotImplementedError(
		f"Conversion from {osba.name} to {out.name} is unsupported!"
		)

		return out