Loading ivas_processing_scripts/audiotools/audio.py +10 −6 Original line number Diff line number Diff line Loading @@ -218,18 +218,18 @@ class MetadataAssistedSpatialAudio(Audio): raise ValueError( f"Unsupported metadata assisted spatial audio format {name}" ) self.metadata_files = [] self.metadata_file = None @classmethod def _from_file( cls, name: str, filename: Path, metadata_files: list[str], metadata_file: str, fs: Optional[int] = None, ) -> "MetadataAssistedSpatialAudio": obj = super()._from_file(name, filename, fs) obj.metadata_file = Path(metadata_files[0]) obj.metadata_file = Path(metadata_file) return obj @classmethod Loading @@ -237,11 +237,11 @@ class MetadataAssistedSpatialAudio(Audio): cls, name: str, filename: Path, metadata_files: list[str], metadata_file: str, fs: Optional[int] = None, ) -> "MetadataAssistedSpatialAudio": obj = super()._from_file(name, filename, fs) obj.metadata_file = Path(metadata_files[0]) obj.metadata_file = Path(metadata_file) return obj Loading Loading @@ -432,13 +432,17 @@ class OMASAAudio(Audio): def init_metadata(self): # check if number of metadata files matches format if self.num_ism_channels != len(self.metadata_files): if self.num_ism_channels != len(self.metadata_files)-1: raise ValueError( f"Mismatch between number of ism channels [{self.num_ism_channels}], and metadata [{len(self.metadata_files)}]" ) self.object_pos = [] for i, f in enumerate(self.metadata_files): if i >= self.num_ism_channels: # only read ISM metadata, not MASA metadata break pos = np.genfromtxt(f, delimiter=",") # check if metadata has right number of columns Loading ivas_processing_scripts/audiotools/convert/__init__.py +42 −13 Original line number Diff line number Diff line Loading @@ -34,6 +34,7 @@ import logging from pathlib import Path, PurePath from shutil import copyfile from typing import Optional, Union from copy import copy from numpy import empty Loading Loading @@ -79,9 +80,13 @@ def convert_file( else: # first check prevents crash on custom_ls setup formats if isinstance(in_fmt, str) and in_fmt.startswith("MASA") and in_meta is None: # TODO treffehn: also consider OMASA # if no MD file is provided, default to name (including .wav or .pcm!!!) + ".met" in_meta = [in_file.parent / (in_file.name + ".met")] elif isinstance(in_fmt, str) and "MASA" in in_fmt and "ISM" in in_fmt: if isinstance(in_meta, list) and len(in_meta) < (int(in_fmt[3])+int(in_fmt[8])): in_meta_masa = in_file.parent / (in_file.name + ".met") in_meta.append(in_meta_masa) input = audio.fromfile(in_fmt, in_file, in_fs, in_meta) # try to set reasonable defaults if missing Loading @@ -100,22 +105,40 @@ def convert_file( output = audio.fromtype(out_fmt) # handle metadata for outputs with metadata (MASA, ISM, OMASA, OSBA) if isinstance(output, audio.MetadataAssistedSpatialAudio): # create dummy audio array to allow inference of MASA mode num_tcs = int(output.name[-1]) output.audio = empty((1, num_tcs)) if isinstance(input, audio.MetadataAssistedSpatialAudio): # use existing metadata file output.metadata_file = input.metadata_file else: # fabricate metadata file name output.metadata_file = Path(out_file).parent / (Path(out_file).name + ".met") if isinstance(output, audio.ObjectBasedAudio): masa_meta_file_name = Path(out_file).parent / (Path(out_file).name + ".met") output.metadata_file = masa_meta_file_name elif isinstance(output, audio.ObjectBasedAudio): try: output.object_pos = input.object_pos output.metadata_files = input.metadata_files output.object_pos = copy(input.object_pos) output.metadata_files = copy(input.metadata_files) except Exception: raise ValueError( "ISM is not supported as an output for rendering! Only usable as pass-through" ) elif isinstance(output, audio.OMASAAudio): if isinstance(input, audio.OMASAAudio): # use existing metadata files output.metadata_files = copy(input.metadata_files) else: # fabricate metadata file name masa_meta_file_name = Path(out_file).parent / (Path(out_file).name + ".met") output.metadata_files = copy(input.metadata_files) output.metadata_files.append(masa_meta_file_name) output.object_pos = copy(input.object_pos) elif isinstance(output, audio.OSBAAudio): output.object_pos = copy(input.object_pos) output.metadata_files = copy(input.metadata_files) # apply actual conversion if isinstance(input, metadata.Metadata): if logger: logger.debug(f"Converting metadata to {out_fmt} : {in_file} -> {out_file}") Loading @@ -137,16 +160,22 @@ def convert_file( output.fs = in_fs # resampling not yet applied convert(input, output, in_fs=in_fs, out_fs=out_fs, logger=logger, **kwargs) # write output # write output audio write(out_file, output.audio, output.fs) if isinstance(output, audio.ObjectBasedAudio): # write metadata if isinstance(output, audio.ObjectBasedAudio) or isinstance(output, audio.OSBAAudio): write_ISM_metadata_in_file(output.object_pos, [out_file], automatic_naming=True) elif isinstance(output, audio.MetadataAssistedSpatialAudio) and in_fmt == out_fmt: # audio objects point to same MD file, create new one with default naming for output out_md_name = out_file.parent / (out_file.name + ".met") copyfile(output.metadata_file, out_md_name) output.metadata_file = out_md_name elif isinstance(output, audio.OMASAAudio): write_ISM_metadata_in_file(output.object_pos, [out_file], automatic_naming=True) if in_fmt == out_fmt: # audio objects point to same MD file, create new one with default naming for output out_md_name = out_file.parent / (out_file.name + ".met") copyfile(output.metadata_files[-1], out_md_name) def convert( input: audio.Audio, Loading ivas_processing_scripts/audiotools/convert/osba.py +14 −12 Original line number Diff line number Diff line Loading @@ -29,7 +29,8 @@ # the United Nations Convention on Contracts on the International Sales of Goods. # import copy from copy import copy, deepcopy import numpy as np from ivas_processing_scripts.audiotools import audio from ivas_processing_scripts.audiotools.convert.objectbased import render_oba_to_binaural, render_oba_to_cba, \ Loading @@ -49,19 +50,19 @@ def convert_osba( # split OSBA object in ISM and SBA object oba = audio.fromarray("ISM" + str(osba.num_ism_channels), osba.audio[:, :osba.num_ism_channels], osba.fs) oba.metadata_files = osba.metadata_files oba.object_pos = osba.object_pos oba.metadata_files = copy(osba.metadata_files) oba.object_pos = copy(osba.object_pos) sba = audio.fromarray("SBA" + str(osba.ambi_order), osba.audio[:, osba.num_ism_channels:], osba.fs) # OSBA -> Binaural if isinstance(out, audio.BinauralAudio): # render SBA and ISM part separately # ISM out_ism = copy.deepcopy(out) out_ism = deepcopy(out) render_oba_to_binaural(oba, out_ism, **kwargs) # SBA out_sba = copy.deepcopy(out) out_sba = deepcopy(out) render_sba_to_binaural(sba, out_sba, **kwargs) # combine results Loading @@ -71,11 +72,11 @@ def convert_osba( elif isinstance(out, audio.ChannelBasedAudio): # render SBA and ISM part separately # ISM out_ism = copy.deepcopy(out) out_ism = deepcopy(out) render_oba_to_cba(oba, out_ism) # SBA out_sba = copy.deepcopy(out) out_sba = deepcopy(out) render_sba_to_cba(sba, out_sba) # combine results Loading @@ -85,11 +86,11 @@ def convert_osba( elif isinstance(out, audio.SceneBasedAudio): # render SBA and ISM part separately # ISM out_ism = copy.deepcopy(out) out_ism = deepcopy(out) render_oba_to_sba(oba, out_ism) # SBA out_sba = copy.deepcopy(out) out_sba = deepcopy(out) render_sba_to_sba(sba, out_sba) # combine results Loading @@ -97,17 +98,18 @@ def convert_osba( # OSBA -> OMASA elif isinstance(out, audio.OMASAAudio): # TODO (treffehn) # check if ism object number is the same if out.num_ism_channels != osba.num_ism_channels: raise ValueError("OSBA to OMASA conversion only possible if number of ISM objects matches") # only render SBA part out_sba = audio.fromtype(out.name[4:]) out_sba.metadata_file = out.metadata_files[-1] render_sba_to_masa(sba, out_sba) out.audio[:, :osba.num_ism_channels] = osba.audio[:, :osba.num_ism_channels] out.audio[:, osba.num_ism_channels:] = out_sba.audio # out.audio[:, :osba.num_ism_channels] = osba.audio[:, :osba.num_ism_channels] # out.audio[:, osba.num_ism_channels:] = out_sba.audio out.audio = np.concatenate((osba.audio[:, :osba.num_ism_channels], out_sba.audio), axis=1) else: raise NotImplementedError( Loading ivas_processing_scripts/audiotools/convert/scenebased.py +1 −1 Original line number Diff line number Diff line Loading @@ -198,7 +198,7 @@ def render_sba_to_masa( ) -> None: num_tcs = masa_out.num_channels md_out_path = masa_out.metadata_files # TODO: get metadata files md_out_path = masa_out.metadata_file masa = masaAnalyzer(sba_in, num_tcs, masa_out.dirs, md_out_path) masa_out.audio = masa.audio Loading ivas_processing_scripts/audiotools/wrappers/masaAnalyzer.py +1 −1 Original line number Diff line number Diff line Loading @@ -106,6 +106,6 @@ def masaAnalyzer( run(cmd, cwd=binary.resolve().parent) fmt = f"MASA{num_tcs}DIR{num_dirs}" masa = audio.fromfile(fmt, tmp_out_pcm, 48000, [metadata_out_path]) masa = audio.fromfile(fmt, tmp_out_pcm, 48000, metadata_out_path) return masa Loading
ivas_processing_scripts/audiotools/audio.py +10 −6 Original line number Diff line number Diff line Loading @@ -218,18 +218,18 @@ class MetadataAssistedSpatialAudio(Audio): raise ValueError( f"Unsupported metadata assisted spatial audio format {name}" ) self.metadata_files = [] self.metadata_file = None @classmethod def _from_file( cls, name: str, filename: Path, metadata_files: list[str], metadata_file: str, fs: Optional[int] = None, ) -> "MetadataAssistedSpatialAudio": obj = super()._from_file(name, filename, fs) obj.metadata_file = Path(metadata_files[0]) obj.metadata_file = Path(metadata_file) return obj @classmethod Loading @@ -237,11 +237,11 @@ class MetadataAssistedSpatialAudio(Audio): cls, name: str, filename: Path, metadata_files: list[str], metadata_file: str, fs: Optional[int] = None, ) -> "MetadataAssistedSpatialAudio": obj = super()._from_file(name, filename, fs) obj.metadata_file = Path(metadata_files[0]) obj.metadata_file = Path(metadata_file) return obj Loading Loading @@ -432,13 +432,17 @@ class OMASAAudio(Audio): def init_metadata(self): # check if number of metadata files matches format if self.num_ism_channels != len(self.metadata_files): if self.num_ism_channels != len(self.metadata_files)-1: raise ValueError( f"Mismatch between number of ism channels [{self.num_ism_channels}], and metadata [{len(self.metadata_files)}]" ) self.object_pos = [] for i, f in enumerate(self.metadata_files): if i >= self.num_ism_channels: # only read ISM metadata, not MASA metadata break pos = np.genfromtxt(f, delimiter=",") # check if metadata has right number of columns Loading
ivas_processing_scripts/audiotools/convert/__init__.py +42 −13 Original line number Diff line number Diff line Loading @@ -34,6 +34,7 @@ import logging from pathlib import Path, PurePath from shutil import copyfile from typing import Optional, Union from copy import copy from numpy import empty Loading Loading @@ -79,9 +80,13 @@ def convert_file( else: # first check prevents crash on custom_ls setup formats if isinstance(in_fmt, str) and in_fmt.startswith("MASA") and in_meta is None: # TODO treffehn: also consider OMASA # if no MD file is provided, default to name (including .wav or .pcm!!!) + ".met" in_meta = [in_file.parent / (in_file.name + ".met")] elif isinstance(in_fmt, str) and "MASA" in in_fmt and "ISM" in in_fmt: if isinstance(in_meta, list) and len(in_meta) < (int(in_fmt[3])+int(in_fmt[8])): in_meta_masa = in_file.parent / (in_file.name + ".met") in_meta.append(in_meta_masa) input = audio.fromfile(in_fmt, in_file, in_fs, in_meta) # try to set reasonable defaults if missing Loading @@ -100,22 +105,40 @@ def convert_file( output = audio.fromtype(out_fmt) # handle metadata for outputs with metadata (MASA, ISM, OMASA, OSBA) if isinstance(output, audio.MetadataAssistedSpatialAudio): # create dummy audio array to allow inference of MASA mode num_tcs = int(output.name[-1]) output.audio = empty((1, num_tcs)) if isinstance(input, audio.MetadataAssistedSpatialAudio): # use existing metadata file output.metadata_file = input.metadata_file else: # fabricate metadata file name output.metadata_file = Path(out_file).parent / (Path(out_file).name + ".met") if isinstance(output, audio.ObjectBasedAudio): masa_meta_file_name = Path(out_file).parent / (Path(out_file).name + ".met") output.metadata_file = masa_meta_file_name elif isinstance(output, audio.ObjectBasedAudio): try: output.object_pos = input.object_pos output.metadata_files = input.metadata_files output.object_pos = copy(input.object_pos) output.metadata_files = copy(input.metadata_files) except Exception: raise ValueError( "ISM is not supported as an output for rendering! Only usable as pass-through" ) elif isinstance(output, audio.OMASAAudio): if isinstance(input, audio.OMASAAudio): # use existing metadata files output.metadata_files = copy(input.metadata_files) else: # fabricate metadata file name masa_meta_file_name = Path(out_file).parent / (Path(out_file).name + ".met") output.metadata_files = copy(input.metadata_files) output.metadata_files.append(masa_meta_file_name) output.object_pos = copy(input.object_pos) elif isinstance(output, audio.OSBAAudio): output.object_pos = copy(input.object_pos) output.metadata_files = copy(input.metadata_files) # apply actual conversion if isinstance(input, metadata.Metadata): if logger: logger.debug(f"Converting metadata to {out_fmt} : {in_file} -> {out_file}") Loading @@ -137,16 +160,22 @@ def convert_file( output.fs = in_fs # resampling not yet applied convert(input, output, in_fs=in_fs, out_fs=out_fs, logger=logger, **kwargs) # write output # write output audio write(out_file, output.audio, output.fs) if isinstance(output, audio.ObjectBasedAudio): # write metadata if isinstance(output, audio.ObjectBasedAudio) or isinstance(output, audio.OSBAAudio): write_ISM_metadata_in_file(output.object_pos, [out_file], automatic_naming=True) elif isinstance(output, audio.MetadataAssistedSpatialAudio) and in_fmt == out_fmt: # audio objects point to same MD file, create new one with default naming for output out_md_name = out_file.parent / (out_file.name + ".met") copyfile(output.metadata_file, out_md_name) output.metadata_file = out_md_name elif isinstance(output, audio.OMASAAudio): write_ISM_metadata_in_file(output.object_pos, [out_file], automatic_naming=True) if in_fmt == out_fmt: # audio objects point to same MD file, create new one with default naming for output out_md_name = out_file.parent / (out_file.name + ".met") copyfile(output.metadata_files[-1], out_md_name) def convert( input: audio.Audio, Loading
ivas_processing_scripts/audiotools/convert/osba.py +14 −12 Original line number Diff line number Diff line Loading @@ -29,7 +29,8 @@ # the United Nations Convention on Contracts on the International Sales of Goods. # import copy from copy import copy, deepcopy import numpy as np from ivas_processing_scripts.audiotools import audio from ivas_processing_scripts.audiotools.convert.objectbased import render_oba_to_binaural, render_oba_to_cba, \ Loading @@ -49,19 +50,19 @@ def convert_osba( # split OSBA object in ISM and SBA object oba = audio.fromarray("ISM" + str(osba.num_ism_channels), osba.audio[:, :osba.num_ism_channels], osba.fs) oba.metadata_files = osba.metadata_files oba.object_pos = osba.object_pos oba.metadata_files = copy(osba.metadata_files) oba.object_pos = copy(osba.object_pos) sba = audio.fromarray("SBA" + str(osba.ambi_order), osba.audio[:, osba.num_ism_channels:], osba.fs) # OSBA -> Binaural if isinstance(out, audio.BinauralAudio): # render SBA and ISM part separately # ISM out_ism = copy.deepcopy(out) out_ism = deepcopy(out) render_oba_to_binaural(oba, out_ism, **kwargs) # SBA out_sba = copy.deepcopy(out) out_sba = deepcopy(out) render_sba_to_binaural(sba, out_sba, **kwargs) # combine results Loading @@ -71,11 +72,11 @@ def convert_osba( elif isinstance(out, audio.ChannelBasedAudio): # render SBA and ISM part separately # ISM out_ism = copy.deepcopy(out) out_ism = deepcopy(out) render_oba_to_cba(oba, out_ism) # SBA out_sba = copy.deepcopy(out) out_sba = deepcopy(out) render_sba_to_cba(sba, out_sba) # combine results Loading @@ -85,11 +86,11 @@ def convert_osba( elif isinstance(out, audio.SceneBasedAudio): # render SBA and ISM part separately # ISM out_ism = copy.deepcopy(out) out_ism = deepcopy(out) render_oba_to_sba(oba, out_ism) # SBA out_sba = copy.deepcopy(out) out_sba = deepcopy(out) render_sba_to_sba(sba, out_sba) # combine results Loading @@ -97,17 +98,18 @@ def convert_osba( # OSBA -> OMASA elif isinstance(out, audio.OMASAAudio): # TODO (treffehn) # check if ism object number is the same if out.num_ism_channels != osba.num_ism_channels: raise ValueError("OSBA to OMASA conversion only possible if number of ISM objects matches") # only render SBA part out_sba = audio.fromtype(out.name[4:]) out_sba.metadata_file = out.metadata_files[-1] render_sba_to_masa(sba, out_sba) out.audio[:, :osba.num_ism_channels] = osba.audio[:, :osba.num_ism_channels] out.audio[:, osba.num_ism_channels:] = out_sba.audio # out.audio[:, :osba.num_ism_channels] = osba.audio[:, :osba.num_ism_channels] # out.audio[:, osba.num_ism_channels:] = out_sba.audio out.audio = np.concatenate((osba.audio[:, :osba.num_ism_channels], out_sba.audio), axis=1) else: raise NotImplementedError( Loading
ivas_processing_scripts/audiotools/convert/scenebased.py +1 −1 Original line number Diff line number Diff line Loading @@ -198,7 +198,7 @@ def render_sba_to_masa( ) -> None: num_tcs = masa_out.num_channels md_out_path = masa_out.metadata_files # TODO: get metadata files md_out_path = masa_out.metadata_file masa = masaAnalyzer(sba_in, num_tcs, masa_out.dirs, md_out_path) masa_out.audio = masa.audio Loading
ivas_processing_scripts/audiotools/wrappers/masaAnalyzer.py +1 −1 Original line number Diff line number Diff line Loading @@ -106,6 +106,6 @@ def masaAnalyzer( run(cmd, cwd=binary.resolve().parent) fmt = f"MASA{num_tcs}DIR{num_dirs}" masa = audio.fromfile(fmt, tmp_out_pcm, 48000, [metadata_out_path]) masa = audio.fromfile(fmt, tmp_out_pcm, 48000, metadata_out_path) return masa