diff --git a/README.md b/README.md index 3178d8fc4ad008af141ee924d131a7a49803f80d..a151695a6529d894d0d1ecfe0ac99770da410f2d 100755 --- a/README.md +++ b/README.md @@ -419,8 +419,6 @@ conditions_to_generate: # fs: 48000 ### Additional commandline options; default = null # opts: ["-q", "-no_delay_cmp"] - ### Option to use SBA format of lower or same order (planar also possible) for SBA input formats - # sba_fmt: "PLANARFOA" ### IVAS condition ############################### c07: @@ -448,8 +446,6 @@ conditions_to_generate: # fs: 48000 ### Additional commandline options; default = null # opts: ["-q", "-no_delay_cmp"] - ### Option to use SBA format of lower or same order (planar also possible) for SBA input formats - # sba_fmt: "PLANARFOA" ### EVS condition ################################ c08: @@ -473,8 +469,6 @@ conditions_to_generate: bin: ~/git/ivas-codec/EVS_dec ### Decoder output sampling rate; default = null (same as input) # fs: 48000 - ### Option to use SBA format of lower or same order (planar also possible) for SBA input formats - # sba_fmt: "PLANARFOA" ``` @@ -547,7 +541,6 @@ This configuration has to match the channel configuration. If the provided list For the encoding stage `cod` and the decoding stage `dec`, the path to the IVAS_cod and IVAS_dec binaries can be specified under the key `bin`. Additionally some resampling can be applied by using the key `fs` followed by the desired sampling rate. The general bitstream processing configuration can be locally overwritten for each EVS and IVAS condition with the key `tx`. -For IVAS and EVS conditions the `sba_fmt` key is available to specify a SBA format of lower or same order compared to the input for SBA input formats. The additional key `evs_lfe_9k6bps_nb` is only available for EVS conditions and ensures a bitrate of 9.6kbps and narrow band processing of the LFE channel(s). #### IVAS The configuration of the IVAS condition is similar to the EVS condition. However, only one bitrate for all channels (and metadata) can be specified. diff --git a/examples/TEMPLATE.yml b/examples/TEMPLATE.yml index a906bd591127697a83b5199fb724e4dac5f05c77..b09e327d455c9a74af4a976e0dbe7e301eb60397 100755 --- a/examples/TEMPLATE.yml +++ b/examples/TEMPLATE.yml @@ -243,8 +243,6 @@ conditions_to_generate: ### Bitstream options # tx: ### For possible arguments see overall bitstream modification - ### Option to use SBA format of lower or same order (planar also possible) for SBA input formats - # sba_fmt: "PLANARFOA" ### IVAS condition ############################### c07: @@ -275,8 +273,6 @@ conditions_to_generate: ### Bitstream options # tx: ### For possible arguments see overall bitstream modification - ### Option to use SBA format of lower or same order (planar also possible) for SBA input formats - # sba_fmt: "PLANARFOA" ### EVS condition ################################ c08: @@ -305,8 +301,6 @@ conditions_to_generate: ### Bitstream options # tx: ### For possible arguments see overall bitstream modification - ### Option to use SBA format of lower or same order (planar also possible) for SBA input formats - # sba_fmt: "PLANARFOA" ################################################ ### Post-processing diff --git a/ivas_processing_scripts/__init__.py b/ivas_processing_scripts/__init__.py index e38580636685fd49c213d890c320077b9c1a8844..c8c3a65d2e5f4e44b1e73df3c03a980264d76ebc 100755 --- a/ivas_processing_scripts/__init__.py +++ b/ivas_processing_scripts/__init__.py @@ -36,7 +36,7 @@ from itertools import product from multiprocessing import Pool from time import sleep -from ivas_processing_scripts.audiotools.metadata import check_ISM_metadata +from ivas_processing_scripts.audiotools.metadata import check_ISM_metadata, check_MASA_metadata from ivas_processing_scripts.constants import ( LOGGER_DATEFMT, LOGGER_FORMAT, @@ -112,24 +112,44 @@ def main(args): cfg.items_list, cfg.preprocessing_2["concatenation_order"] ) + metadata = [[]] * len(cfg.items_list) # check for ISM metadata if cfg.input["fmt"].startswith("ISM"): - metadata = check_ISM_metadata( + metadata_ISM = check_ISM_metadata( cfg.metadata_path, num_objects=int(cfg.input["fmt"][3]), num_items=len(cfg.items_list), item_names=cfg.items_list, ) # print info about found and used metadata files - for i in range(len(metadata)): + for i in range(len(metadata_ISM)): metadata_str = [] - for o in range(len(metadata[i])): - metadata_str.append(str(metadata[i][o])) + for o in range(len(metadata_ISM[i])): + metadata_str.append(str(metadata_ISM[i][o])) logger.debug( f" ISM metadata files item {cfg.items_list[i]}: {', '.join(metadata_str)}" ) + metadata = metadata_ISM - else: + # check for MASA metadata + if "MASA" in cfg.input["fmt"]: + metadata_MASA = check_MASA_metadata( + cfg.metadata_path, + num_items=len(cfg.items_list), + item_names=cfg.items_list, + ) + # print info about found and used metadata files + for i in range(len(metadata_MASA)): + metadata_str = [] + for o in range(len(metadata_MASA[i])): + metadata_str.append(str(metadata_MASA[i][o])) + logger.debug( + f" MASA metadata file item {cfg.items_list[i]}: {', '.join(metadata_str)}" + ) + for i, meta in enumerate(metadata): + meta.extend(metadata_MASA[i]) + + if not cfg.input["fmt"].startswith("ISM") and not "MASA" in cfg.input["fmt"]: metadata = [None] * len(cfg.items_list) cfg.metadata_path = metadata diff --git a/ivas_processing_scripts/audiotools/audio.py b/ivas_processing_scripts/audiotools/audio.py index 5e62b3fbdcce47ca5225af94d2bf367292decd43..1b131260b305fc9d210a845ceb2d8bdf7afc656b 100755 --- a/ivas_processing_scripts/audiotools/audio.py +++ b/ivas_processing_scripts/audiotools/audio.py @@ -48,6 +48,8 @@ from ivas_processing_scripts.audiotools.constants import ( NUMBER_COLUMNS_ISM_METADATA, OBJECT_BASED_AUDIO_FORMATS, SCENE_BASED_AUDIO_FORMATS, + OMASA_AUDIO_FORMATS, + OSBA_AUDIO_FORMATS, ) from .EFAP import wrap_angles @@ -216,18 +218,23 @@ class MetadataAssistedSpatialAudio(Audio): raise ValueError( f"Unsupported metadata assisted spatial audio format {name}" ) - self.metadata_files = [] + self.metadata_file = None @classmethod def _from_file( cls, name: str, filename: Path, - metadata_files: list[str], + metadata_file: Union[str, list], fs: Optional[int] = None, ) -> "MetadataAssistedSpatialAudio": obj = super()._from_file(name, filename, fs) - obj.metadata_file = Path(metadata_files[0]) + if isinstance(metadata_file, list): + if len(metadata_file) > 1: + warn("Only first metadata file used. Additional metadata ignored for MASA") + obj.metadata_file = Path(metadata_file[0]) + else: + obj.metadata_file = Path(metadata_file) return obj @classmethod @@ -235,11 +242,11 @@ class MetadataAssistedSpatialAudio(Audio): cls, name: str, filename: Path, - metadata_files: list[str], + metadata_file: str, fs: Optional[int] = None, ) -> "MetadataAssistedSpatialAudio": obj = super()._from_file(name, filename, fs) - obj.metadata_file = Path(metadata_files[0]) + obj.metadata_file = Path(metadata_file) return obj @@ -353,6 +360,12 @@ class SceneBasedAudio(Audio): name = "HOA2" elif name == "SBA3": name = "HOA3" + elif name == "PLANARSBA1": + name = "PLANARFOA" + elif name == "PLANARSBA2": + name = "PLANARHOA2" + elif name == "PLANARSBA3": + name = "PLANARHOA3" super().__init__(name) try: @@ -376,6 +389,210 @@ class SceneBasedAudio(Audio): return super()._from_filelist(name, filename, fs) +class OMASAAudio(Audio): + """Sub-class for combined OMASA format""" + def __init__(self, name: str): + super().__init__(name) + try: + self.__dict__.update(OMASA_AUDIO_FORMATS[name.upper()]) + except KeyError: + raise ValueError(f"Unsupported OMASA audio format {name}") + self.object_pos = [] + self.metadata_files = [] # first ISM metadata followed by masa metadata + + @classmethod + def _from_file( + cls, + name: str, + filename: Union[str, Path], + metadata_files: list[Union[str, Path]], + fs: Optional[int] = None, + ) -> "OMASAAudio": + obj = super()._from_file(name, filename, fs) + if metadata_files is not None: + obj.metadata_files = [Path(f) for f in metadata_files] + else: + # search for metadata with naming scheme: name.(wav, pcm).(0-3).csv + for obj_idx in range(obj.num_ism_channels): + file_name_meta = filename.with_suffix( + f"{filename.suffix}.{obj_idx}.csv" + ) + if file_name_meta.is_file(): + obj.metadata_files.append(file_name_meta) + else: + raise ValueError(f"Metadata file {file_name_meta} not found.") + warn( + f"No metadata files specified: The following files were found and used: \n {*obj.metadata_files,}" + ) + + obj.init_metadata() + return obj + + @classmethod + def _from_filelist( + cls, + name: str, + filename: Path, + metadata_files: list[Union[str, Path]], + fs: Optional[int] = None, + ) -> "OMASAAudio": + obj = super()._from_filelist(name, filename, fs) + obj.metadata_files = [Path(f) for f in metadata_files] + obj.init_metadata() + return obj + + def init_metadata(self): + # check if number of metadata files matches format + if self.num_ism_channels != len(self.metadata_files)-1: + raise ValueError( + f"Mismatch between number of ism channels [{self.num_ism_channels}], and metadata [{len(self.metadata_files)}]. Note: metadata should also include masa metadata file" + ) + + self.object_pos = [] + for i, f in enumerate(self.metadata_files): + if i >= self.num_ism_channels: + # only read ISM metadata, not MASA metadata + break + + pos = np.genfromtxt(f, delimiter=",") + + # check if metadata has right number of columns + num_columns = pos.shape[1] + if num_columns < 2: + raise ValueError( + "Metadata incomplete. Columns are missing. Azimuth and elevation are mandatory." + ) + elif num_columns > NUMBER_COLUMNS_ISM_METADATA: + raise ValueError("Too many columns in metadata") + + # pad metadata to max number of columns + if num_columns < NUMBER_COLUMNS_ISM_METADATA: + pos = np.hstack( + [pos, np.array(pos.shape[0] * [DEFAULT_ISM_METADATA[num_columns:]])] + ) + + # check if metadata is longer than file -> cut off + num_frames = int( + np.ceil(self.audio.shape[0] / (self.fs * IVAS_FRAME_LEN_MS / 1000)) + ) + if num_frames < pos.shape[0]: + pos = pos[:num_frames] + # check if metadata is shorter than file -> loop + elif num_frames > pos.shape[0]: + pos_loop = np.zeros((num_frames, pos.shape[1])) + pos_loop[: pos.shape[0]] = pos + for idx in range(pos.shape[0], num_frames): + pos_loop[idx, :2] = pos[idx % pos.shape[0], :2] + pos = pos_loop + + # wrap metadata to target value range + for j in range(num_frames): + pos[j, 0], pos[j, 1] = wrap_angles(pos[j, 0], pos[j, 1], clip_ele=True) + + self.object_pos.append(pos) + + +class OSBAAudio(Audio): + """Sub-class for OSBA audio""" + + def __init__(self, name: str): + super().__init__(name) + try: + self.__dict__.update(OSBA_AUDIO_FORMATS[name.upper()]) + except KeyError: + raise ValueError(f"Unsupported OSBA audio format {name}") + self.object_pos = [] + self.metadata_files = [] + self.ambi_order = int(np.sqrt(self.num_channels-self.num_ism_channels) - 1) + + @classmethod + def _from_file( + cls, + name: str, + filename: Union[str, Path], + metadata_files: list[Union[str, Path]], + fs: Optional[int] = None, + ) -> "OSBAAudio": + obj = super()._from_file(name, filename, fs) + if metadata_files is not None: + obj.metadata_files = [Path(f) for f in metadata_files] + else: + # search for metadata with naming scheme: name.(wav, pcm).(0-3).csv + for obj_idx in range(obj.num_ism_channels): + file_name_meta = filename.with_suffix( + f"{filename.suffix}.{obj_idx}.csv" + ) + if file_name_meta.is_file(): + obj.metadata_files.append(file_name_meta) + else: + raise ValueError(f"Metadata file {file_name_meta} not found.") + warn( + f"No metadata files specified: The following files were found and used: \n {*obj.metadata_files,}" + ) + + obj.init_metadata() + return obj + + @classmethod + def _from_filelist( + cls, + name: str, + filename: Path, + metadata_files: list[Union[str, Path]], + fs: Optional[int] = None, + ) -> "OSBAAudio": + obj = super()._from_filelist(name, filename, fs) + obj.metadata_files = [Path(f) for f in metadata_files] + obj.init_metadata() + return obj + + def init_metadata(self): + # check if number of metadata files matches format + if self.num_ism_channels != len(self.metadata_files): + raise ValueError( + f"Mismatch between number of ism channels [{self.num_ism_channels}], and metadata [{len(self.metadata_files)}]" + ) + + self.object_pos = [] + for i, f in enumerate(self.metadata_files): + pos = np.genfromtxt(f, delimiter=",") + + # check if metadata has right number of columns + num_columns = pos.shape[1] + if num_columns < 2: + raise ValueError( + "Metadata incomplete. Columns are missing. Azimuth and elevation are mandatory." + ) + elif num_columns > NUMBER_COLUMNS_ISM_METADATA: + raise ValueError("Too many columns in metadata") + + # pad metadata to max number of columns + if num_columns < NUMBER_COLUMNS_ISM_METADATA: + pos = np.hstack( + [pos, np.array(pos.shape[0] * [DEFAULT_ISM_METADATA[num_columns:]])] + ) + + # check if metadata is longer than file -> cut off + num_frames = int( + np.ceil(self.audio.shape[0] / (self.fs * IVAS_FRAME_LEN_MS / 1000)) + ) + if num_frames < pos.shape[0]: + pos = pos[:num_frames] + # check if metadata is shorter than file -> loop + elif num_frames > pos.shape[0]: + pos_loop = np.zeros((num_frames, pos.shape[1])) + pos_loop[: pos.shape[0]] = pos + for idx in range(pos.shape[0], num_frames): + pos_loop[idx, :2] = pos[idx % pos.shape[0], :2] + pos = pos_loop + + # wrap metadata to target value range + for j in range(num_frames): + pos[j, 0], pos[j, 1] = wrap_angles(pos[j, 0], pos[j, 1], clip_ele=True) + + self.object_pos.append(pos) + + def _get_audio_class(fmt) -> Audio: """Return a child audio class corresponding to the specifed format""" if fmt in BINAURAL_AUDIO_FORMATS.keys(): @@ -387,9 +604,13 @@ def _get_audio_class(fmt) -> Audio: elif fmt in SCENE_BASED_AUDIO_FORMATS.keys(): return SceneBasedAudio elif ( - fmt in CHANNEL_BASED_AUDIO_FORMATS.keys() or CHANNEL_BASED_AUDIO_ALTNAMES.keys() + fmt in CHANNEL_BASED_AUDIO_FORMATS.keys() or fmt in CHANNEL_BASED_AUDIO_ALTNAMES.keys() ): return ChannelBasedAudio + elif fmt in OSBA_AUDIO_FORMATS.keys(): + return OSBAAudio + elif fmt in OMASA_AUDIO_FORMATS.keys(): + return OMASAAudio elif Path(fmt).suffix == ".txt": return ChannelBasedAudio else: @@ -403,7 +624,7 @@ def fromtype(fmt: str) -> Audio: def fromarray(fmt: str, x: np.ndarray, fs: int) -> Audio: """Wrap the given array into an audio format""" if x is None or not fs: - return ValueError("Both array and sampling rate must be specified!") + raise ValueError("Both array and sampling rate must be specified!") output = _get_audio_class(fmt)(fmt) @@ -422,7 +643,7 @@ def fromfile( """Create an Audio object of the specified format from the given file""" filename = Path(filename) fmt_cls = _get_audio_class(fmt) - if fmt_cls is ObjectBasedAudio or fmt_cls is MetadataAssistedSpatialAudio: + if fmt_cls is ObjectBasedAudio or fmt_cls is MetadataAssistedSpatialAudio or fmt_cls is OMASAAudio or fmt_cls is OSBAAudio: return fmt_cls._from_file(fmt, filename, in_meta, fs) else: return fmt_cls._from_file(fmt, filename, fs) diff --git a/ivas_processing_scripts/audiotools/audiofile.py b/ivas_processing_scripts/audiotools/audiofile.py index b29ba6706ed53751bb95c83097033e1feb860665..632b288749c57da152d10eb9fcb67b6aedd15cbe 100755 --- a/ivas_processing_scripts/audiotools/audiofile.py +++ b/ivas_processing_scripts/audiotools/audiofile.py @@ -298,6 +298,7 @@ def combine( out_file: str, in_fs: Optional[int] = 48000, is_planar: Optional[bool] = False, + is_planar_offset: Optional[int] = 0, ) -> None: """ Combines audio files into one multi-channel file @@ -310,6 +311,10 @@ def combine( Output multi-channel audio file name (.pcm, .raw or .wav) in_fs: Optional[int] Input sampling rate, required for .pcm and .raw input file, default 48000 Hz + is_planar: Optional[bool] + If true vertical SBA channels are set to zero + is_planar_offset: Optional[int] + Offset of SBA due to OSBA (corresponds to num of ISM channels) Returns ------- @@ -338,7 +343,7 @@ def combine( # set vertical channels to zero if is_planar: - y[:, VERT_HOA_CHANNELS_ACN[VERT_HOA_CHANNELS_ACN < len(in_filenames)]] = 0 + y[:, VERT_HOA_CHANNELS_ACN[VERT_HOA_CHANNELS_ACN < (len(in_filenames) - is_planar_offset)] + is_planar_offset] = 0 write(out_file, y, fs=in_fs) @@ -349,6 +354,7 @@ def split_channels( in_nchans: int, out_nchans: int, is_planar: Optional[bool] = False, + is_planar_offset: Optional[int] = 0, in_fs: Optional[int] = 48000, ) -> None: """ @@ -366,6 +372,8 @@ def split_channels( Number of channels to be split is_planar: Optional[bool] If true vertical SBA channels are set to zero + is_planar_offset: Optional[int] + Offset of SBA due to OSBA (corresponds to num of ISM channels) in_fs: Optional[int] = 48000 Input sampling rate, default 48000 Hz @@ -386,7 +394,7 @@ def split_channels( x, in_fs = read(in_file, nchannels=in_nchans, fs=in_fs) if is_planar: - x[:, VERT_HOA_CHANNELS_ACN[VERT_HOA_CHANNELS_ACN < in_nchans]] = 0 + x[:, VERT_HOA_CHANNELS_ACN[VERT_HOA_CHANNELS_ACN < (in_nchans - is_planar_offset)] + is_planar_offset] = 0 # Write output files for idx, out_file in enumerate(out_filenames): diff --git a/ivas_processing_scripts/audiotools/constants.py b/ivas_processing_scripts/audiotools/constants.py index 1157a8fbb6d75951992a8c8475891ffbe71a4e99..b2d07f1c94422ffb0274970e728d9eb3f4e9351b 100755 --- a/ivas_processing_scripts/audiotools/constants.py +++ b/ivas_processing_scripts/audiotools/constants.py @@ -247,13 +247,24 @@ CHANNEL_BASED_AUDIO_ALTNAMES = { } METADATA_ASSISTED_SPATIAL_AUDIO_FORMATS = { - "MASA1": { + "MASA1DIR1": { "num_channels": 1, + "dirs": 1, }, - "MASA2": { + "MASA1DIR2": { + "num_channels": 1, + "dirs": 2, + }, + "MASA2DIR1": { + "num_channels": 2, + "dirs": 1, + }, + "MASA2DIR2": { "num_channels": 2, + "dirs": 2, }, } + OBJECT_BASED_AUDIO_FORMATS = { "ISM1": { "num_channels": 1, @@ -269,7 +280,6 @@ OBJECT_BASED_AUDIO_FORMATS = { }, } - SCENE_BASED_AUDIO_FORMATS = { "FOA": { "num_channels": 4, @@ -307,6 +317,227 @@ SCENE_BASED_AUDIO_FORMATS = { "num_channels": 16, "is_planar": False, }, + "PLANARSBA1": { + "num_channels": 4, + "is_planar": True, + }, + "PLANARSBA2": { + "num_channels": 9, + "is_planar": True, + }, + "PLANARSBA3": { + "num_channels": 16, + "is_planar": True, + }, +} + +OMASA_AUDIO_FORMATS = { + # 1 dir + "ISM1MASA1DIR1": { + "num_channels": 2, + "num_ism_channels": 1, + "dirs": 1, + }, + "ISM1MASA2DIR1": { + "num_channels": 3, + "num_ism_channels": 1, + "dirs": 1, + }, + "ISM2MASA1DIR1": { + "num_channels": 3, + "num_ism_channels": 2, + "dirs": 1, + }, + "ISM2MASA2DIR1": { + "num_channels": 4, + "num_ism_channels": 2, + "dirs": 1, + }, + "ISM3MASA1DIR1": { + "num_channels": 4, + "num_ism_channels": 3, + "dirs": 1, + }, + "ISM3MASA2DIR1": { + "num_channels": 5, + "num_ism_channels": 3, + "dirs": 1, + }, + "ISM4MASA1DIR1": { + "num_channels": 5, + "num_ism_channels": 4, + "dirs": 1, + }, + "ISM4MASA2DIR1": { + "num_channels": 6, + "num_ism_channels": 4, + "dirs": 1, + }, + # 2 sdirs + "ISM1MASA1DIR2": { + "num_channels": 2, + "num_ism_channels": 1, + "dirs": 2, + }, + "ISM1MASA2DIR2": { + "num_channels": 3, + "num_ism_channels": 1, + "dirs": 2, + }, + "ISM2MASA1DIR2": { + "num_channels": 3, + "num_ism_channels": 2, + "dirs": 2, + }, + "ISM2MASA2DIR2": { + "num_channels": 4, + "num_ism_channels": 2, + "dirs": 2, + }, + "ISM3MASA1DIR2": { + "num_channels": 4, + "num_ism_channels": 3, + "dirs": 2, + }, + "ISM3MASA2DIR2": { + "num_channels": 5, + "num_ism_channels": 3, + "dirs": 2, + }, + "ISM4MASA1DIR2": { + "num_channels": 5, + "num_ism_channels": 4, + "dirs": 2, + }, + "ISM4MASA2DIR2": { + "num_channels": 6, + "num_ism_channels": 4, + "dirs": 2, + }, +} + +OSBA_AUDIO_FORMATS = { + "ISM1SBA1": { + "num_channels": 5, + "num_ism_channels": 1, + "is_planar": False, + }, + "ISM1SBA2": { + "num_channels": 10, + "num_ism_channels": 1, + "is_planar": False, + }, + "ISM1SBA3": { + "num_channels": 17, + "num_ism_channels": 1, + "is_planar": False, + }, + "ISM2SBA1": { + "num_channels": 6, + "num_ism_channels": 2, + "is_planar": False, + }, + "ISM2SBA2": { + "num_channels": 11, + "num_ism_channels": 2, + "is_planar": False, + }, + "ISM2SBA3": { + "num_channels": 18, + "num_ism_channels": 2, + "is_planar": False, + }, + "ISM3SBA1": { + "num_channels": 7, + "num_ism_channels": 3, + "is_planar": False, + }, + "ISM3SBA2": { + "num_channels": 12, + "num_ism_channels": 3, + "is_planar": False, + }, + "ISM3SBA3": { + "num_channels": 19, + "num_ism_channels": 3, + "is_planar": False, + }, + "ISM4SBA1": { + "num_channels": 8, + "num_ism_channels": 4, + "is_planar": False, + }, + "ISM4SBA2": { + "num_channels": 13, + "num_ism_channels": 4, + "is_planar": False, + }, + "ISM4SBA3": { + "num_channels": 20, + "num_ism_channels": 4, + "is_planar": False, + }, + # planar sba + "ISM1PLANARSBA1": { + "num_channels": 5, + "num_ism_channels": 1, + "is_planar": True, + }, + "ISM1PLANARSBA2": { + "num_channels": 10, + "num_ism_channels": 1, + "is_planar": True, + }, + "ISM1PLANARSBA3": { + "num_channels": 17, + "num_ism_channels": 1, + "is_planar": True, + }, + "ISM2PLANARSBA1": { + "num_channels": 6, + "num_ism_channels": 2, + "is_planar": True, + }, + "ISM2PLANARSBA2": { + "num_channels": 11, + "num_ism_channels": 2, + "is_planar": True, + }, + "ISM2PLANARSBA3": { + "num_channels": 18, + "num_ism_channels": 2, + "is_planar": True, + }, + "ISM3PLANARSBA1": { + "num_channels": 7, + "num_ism_channels": 3, + "is_planar": True, + }, + "ISM3PLANARSBA2": { + "num_channels": 12, + "num_ism_channels": 3, + "is_planar": True, + }, + "ISM3PLANARSBA3": { + "num_channels": 19, + "num_ism_channels": 3, + "is_planar": True, + }, + "ISM4PLANARSBA1": { + "num_channels": 8, + "num_ism_channels": 4, + "is_planar": True, + }, + "ISM4PLANARSBA2": { + "num_channels": 13, + "num_ism_channels": 4, + "is_planar": True, + }, + "ISM4PLANARSBA3": { + "num_channels": 20, + "num_ism_channels": 4, + "is_planar": True, + }, } SCENE_METADATA_FORMATS = {"META"} @@ -317,9 +548,9 @@ AUDIO_FORMATS = [ METADATA_ASSISTED_SPATIAL_AUDIO_FORMATS, OBJECT_BASED_AUDIO_FORMATS, SCENE_BASED_AUDIO_FORMATS, + OMASA_AUDIO_FORMATS, ] - IVAS_FRAME_LEN_MS = 20 IVAS_CICPX_TO_MONO = np.array( diff --git a/ivas_processing_scripts/audiotools/convert/__init__.py b/ivas_processing_scripts/audiotools/convert/__init__.py index 845f00c69041d8766901c64e9d7041ee61f109fa..0022e0d96013f1192bf27b993dd9907711fc92ae 100755 --- a/ivas_processing_scripts/audiotools/convert/__init__.py +++ b/ivas_processing_scripts/audiotools/convert/__init__.py @@ -34,6 +34,7 @@ import logging from pathlib import Path, PurePath from shutil import copyfile from typing import Optional, Union +from copy import copy from numpy import empty @@ -43,6 +44,8 @@ from ivas_processing_scripts.audiotools.convert.channelbased import convert_chan from ivas_processing_scripts.audiotools.convert.masa import convert_masa from ivas_processing_scripts.audiotools.convert.objectbased import convert_objectbased from ivas_processing_scripts.audiotools.convert.scenebased import convert_scenebased +from ivas_processing_scripts.audiotools.convert.osba import convert_osba +from ivas_processing_scripts.audiotools.convert.omasa import convert_omasa from ivas_processing_scripts.audiotools.wrappers.bs1770 import loudness_norm from ivas_processing_scripts.audiotools.wrappers.esdru import esdru, spatial_distortion from ivas_processing_scripts.audiotools.wrappers.filter import ( @@ -77,8 +80,9 @@ def convert_file( else: # first check prevents crash on custom_ls setup formats if isinstance(in_fmt, str) and in_fmt.startswith("MASA") and in_meta is None: - # if no MD fileis provided, default to name (including .wav or .pcm!!!) + ".met" + # if no MD file is provided, default to name (including .wav or .pcm!!!) + ".met" in_meta = [in_file.parent / (in_file.name + ".met")] + input = audio.fromfile(in_fmt, in_file, in_fs, in_meta) # try to set reasonable defaults if missing @@ -97,22 +101,42 @@ def convert_file( output = audio.fromtype(out_fmt) + # handle metadata for outputs with metadata (MASA, ISM, OMASA, OSBA) if isinstance(output, audio.MetadataAssistedSpatialAudio): - # create dummy audio array to allow inference of MASA mode - num_tcs = int(output.name[-1]) - output.audio = empty((1, num_tcs)) + if isinstance(input, audio.MetadataAssistedSpatialAudio): + # use existing metadata file + output.metadata_file = input.metadata_file + else: + # fabricate metadata file name + masa_meta_file_name = Path(out_file).parent / (Path(out_file).name + ".met") + output.metadata_file = masa_meta_file_name - # fabricate metadata file name - output.metadata_file = Path(out_file).parent / (Path(out_file).name + ".met") - if isinstance(output, audio.ObjectBasedAudio): + elif isinstance(output, audio.ObjectBasedAudio): try: - output.object_pos = input.object_pos - output.metadata_files = input.metadata_files + output.object_pos = copy(input.object_pos) + output.metadata_files = copy(input.metadata_files) except Exception: raise ValueError( "ISM is not supported as an output for rendering! Only usable as pass-through" ) + elif isinstance(output, audio.OMASAAudio): + if isinstance(input, audio.OMASAAudio): + # use existing metadata files + output.metadata_files = copy(input.metadata_files) + elif isinstance(input, audio.OSBAAudio): + # fabricate metadata file name + masa_meta_file_name = Path(out_file).parent / (Path(out_file).name + ".met") + output.metadata_files = copy(input.metadata_files) + output.metadata_files.append(masa_meta_file_name) + else: + raise NotImplementedError("Can only convert to OMASA from OSBA") + output.object_pos = copy(input.object_pos) + + elif isinstance(output, audio.OSBAAudio): + output.object_pos = copy(input.object_pos) + output.metadata_files = copy(input.metadata_files) + # apply actual conversion if isinstance(input, metadata.Metadata): if logger: logger.debug(f"Converting metadata to {out_fmt} : {in_file} -> {out_file}") @@ -134,16 +158,22 @@ def convert_file( output.fs = in_fs # resampling not yet applied convert(input, output, in_fs=in_fs, out_fs=out_fs, logger=logger, **kwargs) - # write output + # write output audio write(out_file, output.audio, output.fs) - if isinstance(output, audio.ObjectBasedAudio): + # write metadata + if isinstance(output, audio.ObjectBasedAudio) or isinstance(output, audio.OSBAAudio): write_ISM_metadata_in_file(output.object_pos, [out_file], automatic_naming=True) elif isinstance(output, audio.MetadataAssistedSpatialAudio) and in_fmt == out_fmt: # audio objects point to same MD file, create new one with default naming for output out_md_name = out_file.parent / (out_file.name + ".met") copyfile(output.metadata_file, out_md_name) output.metadata_file = out_md_name - + elif isinstance(output, audio.OMASAAudio): + write_ISM_metadata_in_file(output.object_pos, [out_file], automatic_naming=True) + if in_fmt == out_fmt: + # audio objects point to same MD file, create new one with default naming for output + out_md_name = out_file.parent / (out_file.name + ".met") + copyfile(output.metadata_files[-1], out_md_name) def convert( input: audio.Audio, @@ -328,28 +358,43 @@ def format_conversion( """Convert one audio format to another""" # validation + # check for MASA/OMASA as output if isinstance(output, audio.MetadataAssistedSpatialAudio) and not ( isinstance(input, audio.SceneBasedAudio) or isinstance(input, audio.MetadataAssistedSpatialAudio) ): raise NotImplementedError("Can only convert to MASA from SBA") + if isinstance(output, audio.OMASAAudio) and not ( + isinstance(input, audio.OSBAAudio) + or isinstance(input, audio.OMASAAudio) + ): + raise NotImplementedError("Can only convert to OMASA from OSBA") + # check for ISM (also OMASA and OSBA) as output if isinstance(output, audio.ObjectBasedAudio) and input.name != output.name: raise NotImplementedError( "ISM is not supported as an output for rendering! Only usable as pass-through" ) + if isinstance(output, audio.OMASAAudio) or isinstance(output, audio.OSBAAudio): + if not (isinstance(input, audio.OMASAAudio) or isinstance(input, audio.OSBAAudio)): + raise NotImplementedError( + "OMASA and OSBA only possible as output if input is OMASA or OSBA" + ) if logger: logger.debug(f"Format conversion: {input.name} -> {output.name}") + # format conversion + # check if input and output format are the same if (fmt := input.name) == output.name or ( input.name.startswith("BINAURAL") and output.name.startswith("BINAURAL") ): output.audio = input.audio if fmt.startswith("MASA"): output.metadata_file = input.metadata_file - elif fmt.startswith("ISM"): + elif fmt.startswith("ISM"): # also includes combined formats output.metadata_files = list(output.metadata_files) + else: if isinstance(input, audio.BinauralAudio): raise NotImplementedError( @@ -363,6 +408,10 @@ def format_conversion( convert_objectbased(input, output, **kwargs) elif isinstance(input, audio.SceneBasedAudio): convert_scenebased(input, output, **kwargs) + elif isinstance(input, audio.OSBAAudio): + convert_osba(input, output, **kwargs) + elif isinstance(input, audio.OMASAAudio): + convert_omasa(input, output, **kwargs) else: raise NotImplementedError( f"Unknown or unsupported audio format {input.name}" diff --git a/ivas_processing_scripts/audiotools/convert/omasa.py b/ivas_processing_scripts/audiotools/convert/omasa.py new file mode 100644 index 0000000000000000000000000000000000000000..c222903ae84f8737bc59b54ea755a371ff4a8247 --- /dev/null +++ b/ivas_processing_scripts/audiotools/convert/omasa.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python3 +# +# (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository. All Rights Reserved. +# +# This software is protected by copyright law and by international treaties. +# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository retain full ownership rights in their respective contributions in +# the software. This notice grants no license of any kind, including but not limited to patent +# license, nor is any license granted by implication, estoppel or otherwise. +# +# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making +# contributions. +# +# This software is provided "AS IS", without any express or implied warranties. The software is in the +# development stage. It is intended exclusively for experts who have experience with such software and +# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability +# and fitness for a particular purpose are hereby disclaimed and excluded. +# +# Any dispute, controversy or claim arising under or in relation to providing this software shall be +# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in +# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and +# the United Nations Convention on Contracts on the International Sales of Goods. +# + +from copy import copy, deepcopy +import numpy as np + +from ivas_processing_scripts.audiotools import audio +from ivas_processing_scripts.audiotools.convert.objectbased import render_oba_to_binaural, render_oba_to_cba, \ + render_oba_to_sba +from ivas_processing_scripts.audiotools.convert.masa import render_masa_to_binaural, render_masa_to_cba, render_masa_to_sba + +""" OMASAAudio functions """ + + +def convert_omasa( + omasa: audio.OMASAAudio, + out: audio.Audio, + **kwargs, +) -> audio.Audio: + """Convert an OMASA signal to the requested output format""" + + # split OMASA object in ISM and MASA object + oba = audio.fromarray("ISM" + str(omasa.num_ism_channels), omasa.audio[:, :omasa.num_ism_channels], omasa.fs) + oba.metadata_files = copy(omasa.metadata_files) + oba.object_pos = copy(omasa.object_pos) + masa = audio.fromarray("MASA" + str(omasa.num_channels-omasa.num_ism_channels) + "DIR" + str(omasa.dirs), omasa.audio[:, omasa.num_ism_channels:], omasa.fs) + masa.metadata_file = omasa.metadata_files[-1] + + # OMASA -> Binaural + if isinstance(out, audio.BinauralAudio): + # render MASA and ISM part separately + # ISM + out_ism = deepcopy(out) + render_oba_to_binaural(oba, out_ism, **kwargs) + + # MASA + out_masa = deepcopy(out) + render_masa_to_binaural(masa, out_masa, **kwargs) + + # combine results + out.audio = out_ism.audio + out_masa.audio + + # OMASA -> CBA + elif isinstance(out, audio.ChannelBasedAudio): + # render MASA and ISM part separately + # ISM + out_ism = deepcopy(out) + render_oba_to_cba(oba, out_ism) + + # MASA + out_masa = deepcopy(out) + render_masa_to_cba(masa, out_masa) + + # combine results + out.audio = out_ism.audio + out_masa.audio + + # OMASA -> SBA + elif isinstance(out, audio.SceneBasedAudio): + # render MASA and ISM part separately + # ISM + out_ism = deepcopy(out) + render_oba_to_sba(oba, out_ism) + + # MASA + out_masa = deepcopy(out) + render_masa_to_sba(masa, out_masa) + + # combine results + out.audio = out_ism.audio + out_masa.audio + + # OMASA -> OSBA + elif isinstance(out, audio.OSBAAudio): + # check if ism object number is the same + if out.num_ism_channels != omasa.num_ism_channels: + raise ValueError("OMASA to OSBA conversion only possible if number of ISM objects matches") + + # only render MASA part + out_masa = deepcopy(out) + render_masa_to_sba(masa, out_masa) + + out.audio = np.concatenate((omasa.audio[:, :omasa.num_ism_channels], out_masa.audio), axis=1) + + else: + raise NotImplementedError( + f"Conversion from {omasa.name} to {out.name} is unsupported!" + ) + + return out diff --git a/ivas_processing_scripts/audiotools/convert/osba.py b/ivas_processing_scripts/audiotools/convert/osba.py new file mode 100644 index 0000000000000000000000000000000000000000..baa90420b90566d17177779534f3bf5360ba4142 --- /dev/null +++ b/ivas_processing_scripts/audiotools/convert/osba.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python3 +# +# (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository. All Rights Reserved. +# +# This software is protected by copyright law and by international treaties. +# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository retain full ownership rights in their respective contributions in +# the software. This notice grants no license of any kind, including but not limited to patent +# license, nor is any license granted by implication, estoppel or otherwise. +# +# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making +# contributions. +# +# This software is provided "AS IS", without any express or implied warranties. The software is in the +# development stage. It is intended exclusively for experts who have experience with such software and +# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability +# and fitness for a particular purpose are hereby disclaimed and excluded. +# +# Any dispute, controversy or claim arising under or in relation to providing this software shall be +# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in +# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and +# the United Nations Convention on Contracts on the International Sales of Goods. +# + +from copy import copy, deepcopy +import numpy as np + +from ivas_processing_scripts.audiotools import audio +from ivas_processing_scripts.audiotools.convert.objectbased import render_oba_to_binaural, render_oba_to_cba, \ + render_oba_to_sba +from ivas_processing_scripts.audiotools.convert.scenebased import render_sba_to_binaural, render_sba_to_cba, \ + render_sba_to_sba, render_sba_to_masa + +""" OSBAAudio functions """ + + +def convert_osba( + osba: audio.OSBAAudio, + out: audio.Audio, + **kwargs, +) -> audio.Audio: + """Convert an OSBA signal to the requested output format""" + + # split OSBA object in ISM and SBA object + oba = audio.fromarray("ISM" + str(osba.num_ism_channels), osba.audio[:, :osba.num_ism_channels], osba.fs) + oba.metadata_files = copy(osba.metadata_files) + oba.object_pos = copy(osba.object_pos) + sba = audio.fromarray("SBA" + str(osba.ambi_order), osba.audio[:, osba.num_ism_channels:], osba.fs) + + # OSBA -> Binaural + if isinstance(out, audio.BinauralAudio): + # render SBA and ISM part separately + # ISM + out_ism = deepcopy(out) + render_oba_to_binaural(oba, out_ism, **kwargs) + + # SBA + out_sba = deepcopy(out) + render_sba_to_binaural(sba, out_sba, **kwargs) + + # combine results + out.audio = out_ism.audio + out_sba.audio + + # OSBA -> CBA + elif isinstance(out, audio.ChannelBasedAudio): + # render SBA and ISM part separately + # ISM + out_ism = deepcopy(out) + render_oba_to_cba(oba, out_ism) + + # SBA + out_sba = deepcopy(out) + render_sba_to_cba(sba, out_sba) + + # combine results + out.audio = out_ism.audio + out_sba.audio + + # OSBA -> SBA + elif isinstance(out, audio.SceneBasedAudio): + # render SBA and ISM part separately + # ISM + out_ism = deepcopy(out) + render_oba_to_sba(oba, out_ism) + + # SBA + out_sba = deepcopy(out) + render_sba_to_sba(sba, out_sba) + + # combine results + out.audio = out_ism.audio + out_sba.audio + + # OSBA -> OMASA + elif isinstance(out, audio.OMASAAudio): + # check if ism object number is the same + if out.num_ism_channels != osba.num_ism_channels: + raise ValueError("OSBA to OMASA conversion only possible if number of ISM objects matches") + + # only render SBA part + out_sba = audio.fromtype(out.name[4:]) + out_sba.metadata_file = out.metadata_files[-1] + render_sba_to_masa(sba, out_sba) + + out.audio = np.concatenate((osba.audio[:, :osba.num_ism_channels], out_sba.audio), axis=1) + + # OSBA -> OSBA + elif isinstance(out, audio.OSBAAudio): + # check if ism object number is the same + if out.num_ism_channels != osba.num_ism_channels: + raise ValueError("OSBA to OSBA conversion only possible if number of ISM objects matches") + + # only render SBA part + out_sba = audio.fromtype(out.name[4:]) + render_sba_to_sba(sba, out_sba) + + out.audio = np.concatenate((osba.audio[:, :osba.num_ism_channels], out_sba.audio), axis=1) + + else: + raise NotImplementedError( + f"Conversion from {osba.name} to {out.name} is unsupported!" + ) + + return out diff --git a/ivas_processing_scripts/audiotools/convert/scenebased.py b/ivas_processing_scripts/audiotools/convert/scenebased.py index 9a5f2729734a204d71dde582ada3b2bc4206a902..286724b6df23e571944e976a6795bcff15289c8e 100755 --- a/ivas_processing_scripts/audiotools/convert/scenebased.py +++ b/ivas_processing_scripts/audiotools/convert/scenebased.py @@ -78,9 +78,9 @@ def convert_scenebased( render_sba_to_sba(sba, out) # SBA -> MASA - # NOTE: only allowed for 1st order ambisonics ("FOA" + "PLANARFOA") + # NOTE: only allowed for 1st and 2nd order ambisonics elif isinstance(out, audio.MetadataAssistedSpatialAudio) and ( - sba.name.endswith("FOA") or sba.name == "HOA2" + sba.name.endswith("FOA") or sba.name.endswith("HOA2") ): render_sba_to_masa(sba, out) @@ -196,14 +196,11 @@ def render_sba_to_masa( sba_in: audio.SceneBasedAudio, masa_out: audio.MetadataAssistedSpatialAudio, ) -> None: - num_dirs = 1 - if sba_in.name == "HOA2": - num_dirs = 2 - num_tcs = masa_out.audio.shape[1] + num_tcs = masa_out.num_channels md_out_path = masa_out.metadata_file - masa = masaAnalyzer(sba_in, num_tcs, num_dirs, md_out_path) + masa = masaAnalyzer(sba_in, num_tcs, masa_out.dirs, md_out_path) masa_out.audio = masa.audio diff --git a/ivas_processing_scripts/audiotools/metadata.py b/ivas_processing_scripts/audiotools/metadata.py index 6c87809782ef0b78961c1b189a5b1848fda1ce7f..8ba469235d5563d7af216533ea2b98aba8edc259 100755 --- a/ivas_processing_scripts/audiotools/metadata.py +++ b/ivas_processing_scripts/audiotools/metadata.py @@ -344,7 +344,7 @@ def concat_meta_from_file( frame_length = int(IVAS_FRAME_LEN_MS * audio_objects[0].fs // 1000) # pad and concatenate - concat_meta_all_obj = [None] * audio_objects[0].num_channels + concat_meta_all_obj = [None] * len(meta_files[0]) for audio_item in audio_objects: # check if audio is multiple of frame length @@ -467,7 +467,7 @@ def check_ISM_metadata( list_meta = [] if in_meta is None: for item in item_names: - list_item = metadata_search(Path(item).parent, [item], num_objects) + list_item = metadata_search_ISM(Path(item).parent, [item], num_objects) list_meta.append(list_item) else: if len(in_meta) == 1 and num_items != 1: @@ -479,7 +479,7 @@ def check_ISM_metadata( 'Only one metadata path is given but not with key "all_items".' ) - list_meta = metadata_search(path_meta, item_names, num_objects) + list_meta = metadata_search_ISM(path_meta, item_names, num_objects) elif num_items == len(in_meta): # search for every item individually @@ -495,7 +495,7 @@ def check_ISM_metadata( if not isinstance(current_item, list): # automatic search in folder - list_item = metadata_search( + list_item = metadata_search_ISM( current_item, [item_names[item_idx]], num_objects ) @@ -514,7 +514,55 @@ def check_ISM_metadata( return list_meta -def metadata_search( +def check_MASA_metadata( + in_meta: dict, + num_items: int, + item_names: Optional[list] = None, +) -> list: + """Find MASA metadata""" + + list_meta = [] + if in_meta is None: + for item in item_names: + list_item = metadata_search_MASA(Path(item).parent, [item]) + list_meta.append(list_item) + else: + if len(in_meta) == 1 and num_items != 1: + # automatic search for metadata files in folder for all items and objects + try: + path_meta = in_meta["all_items"] + except KeyError: + raise ValueError( + 'Only one metadata path is given but not with key "all_items".' + ) + + list_meta = metadata_search_MASA(path_meta, item_names) + + elif num_items == len(in_meta): + # search for every item individually + for item_idx in range(num_items): + # try to use item_names as keys + try: + if item_names: + current_item = in_meta[item_names[item_idx].name] + else: + raise KeyError + except KeyError: + current_item = in_meta[f"item{item_idx + 1}"] + + if not isinstance(current_item, list): + # automatic search in folder + list_item = metadata_search_MASA(current_item, [item_names[item_idx]]) + + list_meta.append(list_item) + else: + raise ValueError("Number of metadata inputs does not match number of items") + + # return list of lists of metadata files + return list_meta + + +def metadata_search_ISM( in_meta_path: Union[str, Path], item_names: list[Union[str, Path]], num_objects: int, @@ -544,6 +592,34 @@ def metadata_search( return list_meta +def metadata_search_MASA( + in_meta_path: Union[str, Path], + item_names: list[Union[str, Path]], +) -> list[list[Union[Path, str]]]: + """Search for MASA metadata with structure item_name.met in in_meta folder""" + + if not item_names: + raise ValueError("Item names not provided, can't search for metadata") + + list_meta = [] + for item in item_names: + list_item = [] + file_name_meta = in_meta_path / Path(item.stem).with_suffix( + f"{item.suffix}.met" + ) + # check if file exists and add to list + if file_name_meta.is_file(): + list_item.append(Path(file_name_meta).resolve()) + else: + raise ValueError(f"Metadata file {file_name_meta} not found.") + if len(item_names) == 1: + list_meta = list_item + else: + list_meta.append(list_item) + + return list_meta + + def add_remove_preamble( metadata, preamble, diff --git a/ivas_processing_scripts/audiotools/wrappers/bs1770.py b/ivas_processing_scripts/audiotools/wrappers/bs1770.py index a72398ed11c1ae7c15a06be607a9a00a8e8e9848..8e1f94bafd802ed212260202d00ee0f180ec3dbd 100755 --- a/ivas_processing_scripts/audiotools/wrappers/bs1770.py +++ b/ivas_processing_scripts/audiotools/wrappers/bs1770.py @@ -196,7 +196,7 @@ def get_loudness( input, audio.MetadataAssistedSpatialAudio ): loudness_format = "7_1_4" - elif isinstance(input, audio.ObjectBasedAudio): + elif isinstance(input, audio.ObjectBasedAudio) or isinstance(input, audio.OMASAAudio) or isinstance(input, audio.OSBAAudio): loudness_format = "BINAURAL" elif hasattr(input, "layout_file"): loudness_format = input.layout_file diff --git a/ivas_processing_scripts/audiotools/wrappers/masaAnalyzer.py b/ivas_processing_scripts/audiotools/wrappers/masaAnalyzer.py index 0ded643deb736542edb85ba51e43e140fec8bb8e..432abd694eca44446df3f8c38973d4184c0fc58b 100644 --- a/ivas_processing_scripts/audiotools/wrappers/masaAnalyzer.py +++ b/ivas_processing_scripts/audiotools/wrappers/masaAnalyzer.py @@ -74,7 +74,7 @@ def masaAnalyzer( if num_dirs not in [1, 2]: raise ValueError(f"Only 1 or 2 directions supported, but {num_dirs} was given.") - if sba.name not in ["PLANARFOA", "FOA", "HOA2"]: + if sba.name not in ["PLANARFOA", "FOA", "HOA2", "PLANARHOA2"]: raise ValueError(f"Only FOA or HOA2 suported, but {sba.name} was given.") if num_dirs == 2 and sba.name != "HOA2": @@ -105,7 +105,7 @@ def masaAnalyzer( # we need to run in the masaAnalyzer directory to use the .bin files it requires run(cmd, cwd=binary.resolve().parent) - fmt = f"MASA{num_tcs}" - masa = audio.fromfile(fmt, tmp_out_pcm, 48000, [metadata_out_path]) + fmt = f"MASA{num_tcs}DIR{num_dirs}" + masa = audio.fromfile(fmt, tmp_out_pcm, 48000, metadata_out_path) return masa diff --git a/ivas_processing_scripts/audiotools/wrappers/p50fbmnru.py b/ivas_processing_scripts/audiotools/wrappers/p50fbmnru.py index c31d0aae1219d654663b00659b9e8658e462eab4..68148597098b3840f1e0b7c0245f8885df89d1fa 100755 --- a/ivas_processing_scripts/audiotools/wrappers/p50fbmnru.py +++ b/ivas_processing_scripts/audiotools/wrappers/p50fbmnru.py @@ -30,7 +30,7 @@ # the United Nations Convention on Contracts on the International Sales of Goods. # -from copy import copy +from copy import deepcopy from pathlib import Path from tempfile import TemporaryDirectory from warnings import warn @@ -73,7 +73,7 @@ def p50fbmnru( else: binary = find_binary("p50fbmnru") - tmp_audio_obj = copy(input) + tmp_audio_obj = deepcopy(input) # resample signal to 48kHz if input.fs != 48000: diff --git a/ivas_processing_scripts/processing/evs.py b/ivas_processing_scripts/processing/evs.py index 2440eadb992cfd3aa8894e556edc6ebc6c3e8fd4..9d22dc441407efeb2c2a2e04cff18af92f7cddf9 100755 --- a/ivas_processing_scripts/processing/evs.py +++ b/ivas_processing_scripts/processing/evs.py @@ -177,7 +177,13 @@ class EVS(Processing): # flag for zeroing of channels for planar SBA formats is_planar = ( isinstance(self.in_fmt, audio.SceneBasedAudio) and self.in_fmt.is_planar + or + isinstance(self.in_fmt, audio.OSBAAudio) and self.in_fmt.is_planar ) + if isinstance(self.in_fmt, audio.OSBAAudio) and self.in_fmt.is_planar: + is_planar_offset = self.in_fmt.num_ism_channels + else: + is_planar_offset = 0 # Split the channels to prepare for multi-mono coding split_chan_files = [ @@ -191,6 +197,7 @@ class EVS(Processing): out_nchans=self.in_fmt.num_channels, in_fs=self.in_fs, is_planar=is_planar, + is_planar_offset=is_planar_offset, ) # run processing @@ -253,7 +260,7 @@ class EVS(Processing): # combine the decoded channels into the output file if out_file.suffix in [".wav", ".pcm"]: - combine(split_chan_out, out_file, in_fs=self.out_fs, is_planar=is_planar) + combine(split_chan_out, out_file, in_fs=self.out_fs, is_planar=is_planar,is_planar_offset=is_planar_offset) if split_chan_bs_unprocessed != split_chan_bs and self.tx_condition: out_file_unprocessed = f"{Path(out_file.parent).joinpath(Path(out_file.name).with_suffix(''))}.noerror{out_file.suffix}" combine( @@ -261,10 +268,16 @@ class EVS(Processing): out_file_unprocessed, in_fs=self.out_fs, is_planar=is_planar, + is_planar_offset=is_planar_offset, ) # copy ISM metadata for ISM pass-through - if isinstance(self.in_fmt, audio.ObjectBasedAudio): - for idx in range(len(in_meta)): + if isinstance(self.in_fmt, audio.ObjectBasedAudio) or isinstance(self.in_fmt, audio.OMASAAudio) or isinstance(self.in_fmt, audio.OSBAAudio): + if isinstance(self.in_fmt, audio.ObjectBasedAudio): + num_ism_obj = self.in_fmt.num_channels + else: + num_ism_obj = self.in_fmt.num_ism_channels + + for idx in range(num_ism_obj): out_file_meta = ( out_file.parent / f"{out_file.stem.split('.')[0]}.evs{out_file.suffix}.{idx}.csv" @@ -278,7 +291,7 @@ class EVS(Processing): copyfile(in_meta[idx], out_file_meta_unprocessed) # copy MASA metadata for MASA pass-through - if isinstance(self.in_fmt, audio.MetadataAssistedSpatialAudio): + if isinstance(self.in_fmt, audio.MetadataAssistedSpatialAudio) or isinstance(self.in_fmt, audio.OMASAAudio): md_file_in = in_file.parent / (in_file.name + ".met") md_file_out = out_file.parent / (out_file.name + ".met") copyfile(md_file_in, md_file_out) diff --git a/ivas_processing_scripts/processing/ivas.py b/ivas_processing_scripts/processing/ivas.py index b9d5b339c5dd5ac02ddf48ee45e9ce5d7cb9bcd8..981fca1fa747beca24b35c22a6a3dc05a686a26f 100755 --- a/ivas_processing_scripts/processing/ivas.py +++ b/ivas_processing_scripts/processing/ivas.py @@ -152,13 +152,15 @@ class IVAS(Processing): if isinstance(self.in_fmt, audio.MetadataAssistedSpatialAudio): md_file = in_file.parent / (in_file.name + ".met") metadata_files.append(md_file) - - if isinstance(self.in_fmt, audio.ObjectBasedAudio): - if in_meta is None: - # TODO treffehn: search in folder of in_file - pass - else: - metadata_files = in_meta + elif isinstance(self.in_fmt, audio.ObjectBasedAudio) or isinstance(self.in_fmt, audio.OSBAAudio): + metadata_files = in_meta + elif isinstance(self.in_fmt, audio.OMASAAudio): + metadata_files = in_meta + # TODO treffehn: check and maybe change here and for masa + # if len(metadata_files) != number of ism channels plus one + # md_file = in_file.parent / (in_file.name + ".met") + # metadata_files.append(md_file) + pass # Support input file wav, pcm and txt (metadata iis) if in_file.suffix == ".wav": @@ -303,7 +305,16 @@ class IVAS(Processing): cmd.extend(["-q"]) if self.out_fmt.name.startswith("ISM") or self.out_fmt.name.startswith("MASA"): - output_format = "EXT" + # the SBA part of OSBA is always rendered to HOA3 for EXT by IVAS + if isinstance(self.in_fmt, audio.OSBAAudio) and self.in_fmt.name[:]: + if self.out_fmt.num_channels != (16 + self.in_fmt.num_ism_channels): + raise ValueError("When using EXT output for IVAS for OSBA make sure the specified decoder format is ISMxSBA3") + else: + output_format = "EXT" + else: + if self.in_fmt.name != self.out_fmt.name: + raise ValueError("ISM and MASA output format for IVAS only possible if input and output format match") + output_format = "EXT" elif self.in_fmt.name == "MONO": if self.out_fmt.name == "MONO": output_format = "" # EVS @@ -358,6 +369,13 @@ class IVAS(Processing): "CUSTOM_LS", ]: return ["-mc", fmt.name] + elif isinstance(fmt, audio.OSBAAudio): + if fmt.is_planar: + return ["-ism_sba", str(fmt.num_ism_channels), f"-{str(fmt.ambi_order)}"] + metadata_files + else: + return ["-ism_sba", str(fmt.num_ism_channels), f"+{str(fmt.ambi_order)}"] + metadata_files + elif isinstance(fmt, audio.OMASAAudio): + return ["-ism_masa", str(fmt.num_ism_channels), str(fmt.num_channels-fmt.num_ism_channels)] + metadata_files raise ValueError(f"IVAS: Invalid input config: {fmt.name}.") diff --git a/ivas_processing_scripts/processing/postprocessing.py b/ivas_processing_scripts/processing/postprocessing.py index b96d6b5c6e81342b35b6d90c384c3b6df035cfc4..e92667a774043e29b84a9dd6efbc88ceeb4b98c6 100755 --- a/ivas_processing_scripts/processing/postprocessing.py +++ b/ivas_processing_scripts/processing/postprocessing.py @@ -60,9 +60,15 @@ class Postprocessing(Processing): if in_meta: in_meta_noerror = [] for meta in in_meta: + if str(meta).endswith(".met"): + # MASA + num_suffix = 2 + else: + # ISM + num_suffix = 3 path_parts = str(meta).split(".") - suffix = ".".join(path_parts[-3:]) - name = ".".join(path_parts[:-3]) + suffix = ".".join(path_parts[-num_suffix:]) + name = ".".join(path_parts[:-num_suffix]) in_meta_noerror.append(Path(f"{name}.noerror.{suffix}")) else: in_meta_noerror = None diff --git a/ivas_processing_scripts/processing/preprocessing_2.py b/ivas_processing_scripts/processing/preprocessing_2.py index 912ef6d13fa16f233dfc12a3843bf21db163b53d..4eab213ce17757c41389b70e014b92a2466000a5 100644 --- a/ivas_processing_scripts/processing/preprocessing_2.py +++ b/ivas_processing_scripts/processing/preprocessing_2.py @@ -64,6 +64,10 @@ class Preprocessing2(Processing): self.in_fmt, in_file, fs=self.in_fs, in_meta=in_meta ) + if isinstance(audio_object, audio.MetadataAssistedSpatialAudio) or isinstance(audio_object, audio.OMASAAudio): + if self.preamble > 0 or self.background_noise or self.repeat_signal: + raise ValueError("No preprocessing 2 possible for formats including MASA metadata") + # modify ISM metadata if self.in_fmt.startswith("ISM"): if not self.preamble: diff --git a/ivas_processing_scripts/processing/processing.py b/ivas_processing_scripts/processing/processing.py index db63c8fd787a329b64dc4819c4a5fa3c51a3de56..71c895faa70b732a9f5d1d3dcfa4ea9eb1984f8c 100755 --- a/ivas_processing_scripts/processing/processing.py +++ b/ivas_processing_scripts/processing/processing.py @@ -123,8 +123,18 @@ def concat_setup(cfg: TestConfig, chain, logger: logging.Logger): logger.info(f"Concatenating input files in directory {cfg.input_path}") + # derive input format to preprocessing 2 (either input or preprocessing format) + try: + input_format = cfg.preprocessing.get("fmt", cfg.input["fmt"]) + except AttributeError: + input_format = cfg.input["fmt"] + + # concatenation of met files not possible -> do not concatenate MASA and OMASA + if "MASA" in input_format: + raise ValueError("Concatenation of formats including MASA metadata not possible") + # concatenate ISM metadata - if cfg.input["fmt"].startswith("ISM"): + if input_format.startswith("ISM"): cfg.concat_meta = [] for obj_idx in range(len(cfg.metadata_path[0])): cfg.concat_meta.append( @@ -136,7 +146,7 @@ def concat_setup(cfg: TestConfig, chain, logger: logging.Logger): cfg.items_list, cfg.metadata_path, cfg.concat_meta, - cfg.input["fmt"], + input_format, ) # set input to the concatenated file we have just written to the output dir @@ -354,13 +364,32 @@ def preprocess(cfg, logger): cfg.items_list, cfg.preprocessing_2["concatenation_order"] ) - if cfg.metadata_path[0] is not None: - for item_idx in range(len(cfg.metadata_path)): - for obj_idx in range(len(cfg.metadata_path[item_idx])): - if cfg.metadata_path[item_idx][obj_idx]: - cfg.metadata_path[item_idx][obj_idx] = cfg.out_dirs[0] / Path( - f"{cfg.items_list[item_idx].stem}.wav.{obj_idx}.csv" - ) + # set new metadata files + try: + preproc_output_fmt = chain[0].out_fmt + except AttributeError: + preproc_output_fmt = chain[0].in_fmt + + cfg.metadata_path = [] + + for item_idx in range(len(cfg.items_list)): + list_item = [] + # ISM metadata + if "ISM" in preproc_output_fmt: + num_obj = int(preproc_output_fmt[3]) + for obj_idx in range(num_obj): + list_item.append(cfg.out_dirs[0] / Path(f"{cfg.items_list[item_idx].stem}.wav.{obj_idx}.csv")) + + # MASA metadata + if "MASA" in preproc_output_fmt: + list_item.append(cfg.out_dirs[0] / Path(f"{cfg.items_list[item_idx].stem}.wav.met")) + + # no metadata + if not "ISM" in preproc_output_fmt and not "MASA" in preproc_output_fmt: + list_item.append(None) + + cfg.metadata_path.append(list_item) + # remove already applied processing stage cfg.proc_chains = cfg.proc_chains[1:] cfg.tmp_dirs = cfg.tmp_dirs[1:] @@ -439,56 +468,88 @@ def process_item( logger: logging.Logger, in_meta, ) -> None: + # derive tmp file names tmp_file = tmp_dir.joinpath(in_file.name) tmp_file_meta = [] if in_meta: for im in in_meta: - tmp_file_meta.append(tmp_dir.joinpath(Path(im).name)) + if im is not None: + tmp_file_meta.append(tmp_dir.joinpath(Path(im).name)) # assemble a list of files to be used during the processing chain out_dir_wav = False processing_paths = [in_file] processing_paths_meta = [in_meta] + bool_ism = False + bool_masa = False + num_ism_meta = None for p in chain: if Path(in_file.name).suffix == ".txt" and p.out_fmt is not None: processing_paths.append(tmp_file.with_suffix(f".{p.name}.wav")) out_dir_wav = True else: + # append file name processing_paths.append(tmp_file.with_suffix(f".{p.name}{tmp_file.suffix}")) - try: # TODO: clean up try except blocks + + # determine output format + try: out_format = p.out_fmt except AttributeError: # EVS has no attribute out_fmt out_format = p.in_fmt - try: - if p.name == "pre_2": - bool_ism = p.in_fmt.startswith("ISM") - else: - bool_ism = out_format.startswith("ISM") - except Exception: - bool_ism = out_format.name.startswith("ISM") + # check for ism and masa metadata + if p.name == "pre_2": + # no conversion in preprocessing 2 + bool_ism = "ISM" in p.in_fmt + bool_masa = "MASA" in p.in_fmt + out_format = p.in_fmt + elif isinstance(out_format, str): + # if out format is string + bool_ism = "ISM" in out_format + bool_masa = "MASA" in out_format + elif isinstance(out_format, audio.Audio): + out_format = out_format.name + bool_ism = "ISM" in out_format + bool_masa = "MASA" in out_format + else: + raise ValueError("wrong output format in processing setup") + + list_meta_step = [] + # append ism metadata if bool_ism: - list_meta_step = [] - for idx, tfm in enumerate(tmp_file_meta): + # ISM, OMASA and OSBA + # "ISMX..." + num_ism_meta = int(out_format[3]) + for idx in range(num_ism_meta): list_meta_step.append( - tfm.parent - / f"{in_file.stem.split('.')[0]}.{p.name}.wav.{idx}.csv" + tmp_dir / f"{in_file.stem.split('.')[0]}.{p.name}.wav.{idx}.csv" ) + + # append masa metadata + if bool_masa: + # MASA and OMASA + list_meta_step.append( + tmp_dir / f"{in_file.stem.split('.')[0]}.{p.name}.wav.met" + ) + + if bool_ism or bool_masa: processing_paths_meta.append(list_meta_step) else: processing_paths_meta.append(None) - # TODO: support txt file writing for META pass-through if out_dir_wav: out_file = out_dir.joinpath(in_file.name).with_suffix(".wav") else: out_file = out_dir.joinpath(in_file.name) + # metadata from last process in chain out_meta = [] - if in_meta: - for im in range(len(in_meta)): - out_meta.append(out_dir.joinpath(f"{Path(out_file).stem}.wav.{im}.csv")) + if bool_ism: + for met in range(num_ism_meta): + out_meta.append(out_dir.joinpath(f"{Path(out_file).stem}.wav.{met}.csv")) + if bool_masa: + out_meta.append(out_dir.joinpath(f"{Path(out_file).stem}.wav.met")) # execute each process sequentially, feed output into input of next process for p, (input, output), input_meta in zip( diff --git a/tests/data/test_MASA.yml b/tests/data/test_MASA.yml index b4a3eebd6c929d106412890c16f95d9e9b2070e1..179dcc9dc9eaede886897c3e774c9b540a41e94b 100644 --- a/tests/data/test_MASA.yml +++ b/tests/data/test_MASA.yml @@ -184,7 +184,7 @@ conditions_to_generate: # - 32000 ### Encoder options cod: - fmt: "MASA2" + fmt: "MASA2DIR1" ### Path to encoder binary; default search for IVAS_cod in bin folder (primary) and PATH (secondary) #bin: ~/git/ivas-codec/IVAS_cod ### Encoder input sampling rate in Hz (resampling performed in case of mismatch); default = null (no resampling) @@ -196,7 +196,7 @@ conditions_to_generate: ### Path to decoder binary; default search for IVAS_dec in bin folder (primary) and PATH (secondary) #bin: ~/git/ivas-codec/IVAS_dec ### Decoder output format; default = postprocessing fmt - fmt: "MASA2" + fmt: "MASA2DIR1" ### Decoder output sampling rate; default = null (same as input) # fs: 48000 ### Additional commandline options; default = null @@ -215,7 +215,7 @@ conditions_to_generate: # - 32000 ### Encoder options cod: - fmt: "MASA2" + fmt: "MASA2DIR1" ### Path to encoder binary; default search for IVAS_cod in bin folder (primary) and PATH (secondary) #bin: ~/git/ivas-codec/IVAS_cod ### Encoder input sampling rate in Hz (resampling performed in case of mismatch); default = null (no resampling) @@ -250,13 +250,13 @@ conditions_to_generate: # - 9600 - [13200, 13200, 8000, 13200, 9600] cod: - fmt: "MASA2" + fmt: "MASA2DIR1" ### Path to encoder binary; default search for EVS_cod in bin folder (primary) and PATH (secondary) #bin: EVS_cod ### Encoder input sampling rate in Hz (resampling performed in case of mismatch); default = null (no resampling) # fs: 32000 dec: - fmt: "MASA2" + fmt: "MASA2DIR1" ### Path to encoder binary; default search for EVS_dec in bin folder (primary) and PATH (secondary) #bin: EVS_dec ### Decoder output sampling rate; default = null (same as input) @@ -274,7 +274,7 @@ conditions_to_generate: ### Post-processing is required and can not be omitted postprocessing: ### REQUIRED: Target format for output - fmt: ["MASA2", "BINAURAL"] + fmt: ["MASA2DIR1", "BINAURAL"] ### REQUIRED: Target sampling rate in Hz for resampling fs: 48000 ### Low-pass cut-off frequency in Hz; default = null (no filtering)