Loading experiments/selection/P800-8/config/P800-8.yml +4 −4 Original line number Diff line number Diff line Loading @@ -302,6 +302,6 @@ conditions_to_generate: ### Post-processing ################################################ postprocessing: fmt: "BINAURAL" fmt: ["MASA2", "BINAURAL"] fs: 48000 loudness: -26 ivas_processing_scripts/audiotools/convert/__init__.py +13 −3 Original line number Diff line number Diff line Loading @@ -34,6 +34,7 @@ import logging from pathlib import Path, PurePath from typing import Optional, Union from numpy import empty from shutil import copyfile from ivas_processing_scripts.audiotools import audio, audioarray, metadata from ivas_processing_scripts.audiotools.audiofile import write Loading Loading @@ -100,7 +101,7 @@ def convert_file( output.audio = empty((1, num_tcs)) # fabricate metadata file name output.metadata_files = [Path(out_file).with_suffix(".met")] output.metadata_file = Path(out_file).with_suffix(".met") if isinstance(output, audio.ObjectBasedAudio): try: output.object_pos = input.object_pos Loading Loading @@ -135,6 +136,11 @@ def convert_file( write(out_file, output.audio, output.fs) if isinstance(output, audio.ObjectBasedAudio): write_ISM_metadata_in_file(output.object_pos, [out_file], automatic_naming=True) elif isinstance(output, audio.MetadataAssistedSpatialAudio) and in_fmt == out_fmt: # audio objects point to same MD file, create new one with default naming for output out_md_name = out_file.parent / (out_file.name + ".met") copyfile(output.metadata_file, out_md_name) output.metadata_file = out_md_name def convert( Loading Loading @@ -303,7 +309,7 @@ def format_conversion( """Convert one audio format to another""" # validation if isinstance(output, audio.MetadataAssistedSpatialAudio) and not isinstance(input, audio.SceneBasedAudio): if isinstance(output, audio.MetadataAssistedSpatialAudio) and not (isinstance(input, audio.SceneBasedAudio) or isinstance(input, audio.MetadataAssistedSpatialAudio )): raise NotImplementedError("Can only convert to MASA from SBA") if isinstance(output, audio.ObjectBasedAudio) and input.name != output.name: Loading @@ -314,10 +320,14 @@ def format_conversion( if logger: logger.debug(f"Format conversion: {input.name} -> {output.name}") if input.name == output.name or ( if ( fmt := input.name ) == output.name or ( input.name.startswith("BINAURAL") and output.name.startswith("BINAURAL") ): output.audio = input.audio if fmt.startswith("MASA"): output.metadata_file = input.metadata_file elif fmt.startswith("ISM"): output.metadata_files = list(output.metadata_files) else: if isinstance(input, audio.BinauralAudio): raise NotImplementedError( Loading ivas_processing_scripts/audiotools/convert/scenebased.py +1 −1 Original line number Diff line number Diff line Loading @@ -192,7 +192,7 @@ def render_sba_to_masa( # two dir only possible from HOA2 num_dirs = 1 num_tcs = masa_out.audio.shape[1] md_out_path = masa_out.metadata_files[0] md_out_path = masa_out.metadata_file masa = masaAnalyzer(sba_in, num_tcs, num_dirs, md_out_path) masa_out.audio = masa.audio Loading ivas_processing_scripts/processing/chains.py +20 −1 Original line number Diff line number Diff line Loading @@ -409,13 +409,32 @@ def get_processing_chain( loudness_postprocessing = post_cfg.get("loudness") loudness_fmt_postprocessing = post_cfg.get("loudness_fmt") post_fmt = post_cfg.get("fmt") if isinstance(post_fmt, list): pre_fmts = post_fmt[:-1] post_fmt = post_fmt[-1] # add Postprocessing with only format conversion for each format except the last fmts = [tmp_in_fmt] + pre_fmts for fmt_in, fmt_out in zip(fmts[:-1], fmts[1:]): chain["processes"].append( Postprocessing( { "in_fs": tmp_in_fs, "in_fmt": fmt_in, "out_fs": tmp_in_fs, "out_fmt": fmt_out } ) ) chain["processes"].append( Postprocessing( { "in_fs": tmp_in_fs, "in_fmt": tmp_in_fmt, "out_fs": post_cfg.get("fs"), "out_fmt": post_cfg.get("fmt"), "out_fmt": post_fmt, "out_cutoff": tmp_lp_cutoff, "out_loudness": loudness_postprocessing, "out_loudness_fmt": loudness_fmt_postprocessing, Loading ivas_processing_scripts/processing/processing.py +15 −5 Original line number Diff line number Diff line Loading @@ -176,6 +176,8 @@ def concat_teardown(cfg: TestConfig, logger: logging.Logger): raise ValueError("Splitting not possible without split marker") output_format = cfg.postprocessing["fmt"] if isinstance(output_format, list): output_format = output_format[-1] out_files = [] out_meta = [] Loading Loading @@ -327,6 +329,10 @@ def reverse_process_2(cfg, logger): logger.info("Remove preamble") remove_preamble(cfg) fmt = cfg.postprocessing["fmt"] if isinstance(fmt, list): fmt = fmt[-1] # reverse concatenation if cfg.pre2.concatenate_input: # write out the splits, optionally remove file Loading @@ -337,13 +343,13 @@ def reverse_process_2(cfg, logger): for out_dir in cfg.out_dirs: list_audio_dir = list_audio(out_dir) out_paths_splits.append(list_audio_dir) if cfg.postprocessing["fmt"].startswith("ISM"): if fmt.startswith("ISM"): out_meta_splits = [] for i, condition in enumerate(out_paths_splits): meta_condition = metadata_search( cfg.out_dirs[i], condition, num_objects=int(cfg.postprocessing["fmt"][-1]), num_objects=int(fmt[-1]), ) out_meta_splits.append(meta_condition) else: Loading @@ -353,7 +359,7 @@ def reverse_process_2(cfg, logger): if cfg.postprocessing.get("loudness", False): scale_files( out_paths_splits, cfg.postprocessing["fmt"], fmt, cfg.postprocessing["loudness"], cfg.postprocessing.get("loudness_fmt", None), cfg.postprocessing["fs"], Loading Loading @@ -444,13 +450,17 @@ def process_item( def remove_preamble(cfg): # get number of channels from output format num_channels = audio.fromtype(cfg.postprocessing["fmt"]).num_channels fmt = cfg.postprocessing["fmt"] if isinstance(cfg.postprocessing["fmt"], list): fmt = fmt[-1] num_channels = audio.fromtype(fmt).num_channels for odir in cfg.out_dirs: for item in cfg.items_list: path_input = odir / item.name # remove preamble for ISM metadata if cfg.postprocessing["fmt"].startswith("ISM"): if fmt.startswith("ISM"): # search for metadata meta_item = metadata_search( odir, [Path(item.name)], num_objects=num_channels Loading Loading
experiments/selection/P800-8/config/P800-8.yml +4 −4 Original line number Diff line number Diff line Loading @@ -302,6 +302,6 @@ conditions_to_generate: ### Post-processing ################################################ postprocessing: fmt: "BINAURAL" fmt: ["MASA2", "BINAURAL"] fs: 48000 loudness: -26
ivas_processing_scripts/audiotools/convert/__init__.py +13 −3 Original line number Diff line number Diff line Loading @@ -34,6 +34,7 @@ import logging from pathlib import Path, PurePath from typing import Optional, Union from numpy import empty from shutil import copyfile from ivas_processing_scripts.audiotools import audio, audioarray, metadata from ivas_processing_scripts.audiotools.audiofile import write Loading Loading @@ -100,7 +101,7 @@ def convert_file( output.audio = empty((1, num_tcs)) # fabricate metadata file name output.metadata_files = [Path(out_file).with_suffix(".met")] output.metadata_file = Path(out_file).with_suffix(".met") if isinstance(output, audio.ObjectBasedAudio): try: output.object_pos = input.object_pos Loading Loading @@ -135,6 +136,11 @@ def convert_file( write(out_file, output.audio, output.fs) if isinstance(output, audio.ObjectBasedAudio): write_ISM_metadata_in_file(output.object_pos, [out_file], automatic_naming=True) elif isinstance(output, audio.MetadataAssistedSpatialAudio) and in_fmt == out_fmt: # audio objects point to same MD file, create new one with default naming for output out_md_name = out_file.parent / (out_file.name + ".met") copyfile(output.metadata_file, out_md_name) output.metadata_file = out_md_name def convert( Loading Loading @@ -303,7 +309,7 @@ def format_conversion( """Convert one audio format to another""" # validation if isinstance(output, audio.MetadataAssistedSpatialAudio) and not isinstance(input, audio.SceneBasedAudio): if isinstance(output, audio.MetadataAssistedSpatialAudio) and not (isinstance(input, audio.SceneBasedAudio) or isinstance(input, audio.MetadataAssistedSpatialAudio )): raise NotImplementedError("Can only convert to MASA from SBA") if isinstance(output, audio.ObjectBasedAudio) and input.name != output.name: Loading @@ -314,10 +320,14 @@ def format_conversion( if logger: logger.debug(f"Format conversion: {input.name} -> {output.name}") if input.name == output.name or ( if ( fmt := input.name ) == output.name or ( input.name.startswith("BINAURAL") and output.name.startswith("BINAURAL") ): output.audio = input.audio if fmt.startswith("MASA"): output.metadata_file = input.metadata_file elif fmt.startswith("ISM"): output.metadata_files = list(output.metadata_files) else: if isinstance(input, audio.BinauralAudio): raise NotImplementedError( Loading
ivas_processing_scripts/audiotools/convert/scenebased.py +1 −1 Original line number Diff line number Diff line Loading @@ -192,7 +192,7 @@ def render_sba_to_masa( # two dir only possible from HOA2 num_dirs = 1 num_tcs = masa_out.audio.shape[1] md_out_path = masa_out.metadata_files[0] md_out_path = masa_out.metadata_file masa = masaAnalyzer(sba_in, num_tcs, num_dirs, md_out_path) masa_out.audio = masa.audio Loading
ivas_processing_scripts/processing/chains.py +20 −1 Original line number Diff line number Diff line Loading @@ -409,13 +409,32 @@ def get_processing_chain( loudness_postprocessing = post_cfg.get("loudness") loudness_fmt_postprocessing = post_cfg.get("loudness_fmt") post_fmt = post_cfg.get("fmt") if isinstance(post_fmt, list): pre_fmts = post_fmt[:-1] post_fmt = post_fmt[-1] # add Postprocessing with only format conversion for each format except the last fmts = [tmp_in_fmt] + pre_fmts for fmt_in, fmt_out in zip(fmts[:-1], fmts[1:]): chain["processes"].append( Postprocessing( { "in_fs": tmp_in_fs, "in_fmt": fmt_in, "out_fs": tmp_in_fs, "out_fmt": fmt_out } ) ) chain["processes"].append( Postprocessing( { "in_fs": tmp_in_fs, "in_fmt": tmp_in_fmt, "out_fs": post_cfg.get("fs"), "out_fmt": post_cfg.get("fmt"), "out_fmt": post_fmt, "out_cutoff": tmp_lp_cutoff, "out_loudness": loudness_postprocessing, "out_loudness_fmt": loudness_fmt_postprocessing, Loading
ivas_processing_scripts/processing/processing.py +15 −5 Original line number Diff line number Diff line Loading @@ -176,6 +176,8 @@ def concat_teardown(cfg: TestConfig, logger: logging.Logger): raise ValueError("Splitting not possible without split marker") output_format = cfg.postprocessing["fmt"] if isinstance(output_format, list): output_format = output_format[-1] out_files = [] out_meta = [] Loading Loading @@ -327,6 +329,10 @@ def reverse_process_2(cfg, logger): logger.info("Remove preamble") remove_preamble(cfg) fmt = cfg.postprocessing["fmt"] if isinstance(fmt, list): fmt = fmt[-1] # reverse concatenation if cfg.pre2.concatenate_input: # write out the splits, optionally remove file Loading @@ -337,13 +343,13 @@ def reverse_process_2(cfg, logger): for out_dir in cfg.out_dirs: list_audio_dir = list_audio(out_dir) out_paths_splits.append(list_audio_dir) if cfg.postprocessing["fmt"].startswith("ISM"): if fmt.startswith("ISM"): out_meta_splits = [] for i, condition in enumerate(out_paths_splits): meta_condition = metadata_search( cfg.out_dirs[i], condition, num_objects=int(cfg.postprocessing["fmt"][-1]), num_objects=int(fmt[-1]), ) out_meta_splits.append(meta_condition) else: Loading @@ -353,7 +359,7 @@ def reverse_process_2(cfg, logger): if cfg.postprocessing.get("loudness", False): scale_files( out_paths_splits, cfg.postprocessing["fmt"], fmt, cfg.postprocessing["loudness"], cfg.postprocessing.get("loudness_fmt", None), cfg.postprocessing["fs"], Loading Loading @@ -444,13 +450,17 @@ def process_item( def remove_preamble(cfg): # get number of channels from output format num_channels = audio.fromtype(cfg.postprocessing["fmt"]).num_channels fmt = cfg.postprocessing["fmt"] if isinstance(cfg.postprocessing["fmt"], list): fmt = fmt[-1] num_channels = audio.fromtype(fmt).num_channels for odir in cfg.out_dirs: for item in cfg.items_list: path_input = odir / item.name # remove preamble for ISM metadata if cfg.postprocessing["fmt"].startswith("ISM"): if fmt.startswith("ISM"): # search for metadata meta_item = metadata_search( odir, [Path(item.name)], num_objects=num_channels Loading