Loading experiments/selection/P800-8/config/P800-8.yml 0 → 100644 +307 −0 Original line number Diff line number Diff line --- ################################################ # General configuration ################################################ name: P800-8 master_seed: 5 prerun_seed: 2 multiprocessing: false input_path: "experiments/selection/P800-8/proc_input" output_path: "experiments/selection/P800-8/proc_output" ################################################ ### Input configuration ################################################ input: fmt: "FOA" fs: 48000 ################################################ ### Pre-processing on individual items ################################################ preprocessing: mask: "HP50" loudness: -26 window: 100 ################################################ ### Pre-processing on whole signal(s) ################################################ preprocessing_2: concatenate_input: true # concatenation_order: [] preamble: 10000 preamble_noise: true ################################################# ### Bitstream processing ################################################# ################################################ ### Configuration for conditions under test ################################################ conditions_to_generate: ### Reference and anchor conditions ########################## # c01: # type: ref # c02: # type: mnru # q: 28 # c03: # type: mnru # q: 24 # c04: # type: mnru # q: 20 # c05: # type: mnru # q: 16 # c06: # type: esdru # alpha: 0.7 # c07: # type: esdru # alpha: 0.4 # c08: # type: esdru # alpha: 0.1 # ### EVS condition ################################ # c09: # type: evs # bitrates: # - 7200 # cod: # opts: ["-max_band", "FB"] # dec: # c10: # type: evs # bitrates: # - 8000 # cod: # opts: ["-max_band", "FB"] # dec: # c11: # type: evs # bitrates: # - 9600 # cod: # opts: ["-max_band", "FB"] # dec: # c12: # type: evs # bitrates: # - 13200 # cod: # opts: ["-max_band", "FB"] # dec: # c13: # type: evs # bitrates: # - 16400 # cod: # opts: ["-max_band", "FB"] # dec: # c14: # type: evs # bitrates: # - 24400 # cod: # opts: ["-max_band", "FB"] # dec: # c15: # type: evs # bitrates: # - 32000 # cod: # opts: ["-max_band", "FB"] # dec: # c16: # type: evs # bitrates: # - 7200 # cod: # opts: ["-max_band", "FB"] # dec: # tx: # type: "FER" # error_rate: 5 # c17: # type: evs # bitrates: # - 8000 # cod: # opts: ["-max_band", "FB"] # dec: # tx: # type: "FER" # error_rate: 5 # c18: # type: evs # bitrates: # - 9600 # cod: # opts: ["-max_band", "FB"] # dec: # tx: # type: "FER" # error_rate: 5 # c19: # type: evs # bitrates: # - 13200 # cod: # opts: ["-max_band", "FB"] # dec: # tx: # type: "FER" # error_rate: 5 # c20: # type: evs # bitrates: # - 16400 # cod: # opts: ["-max_band", "FB"] # dec: # tx: # type: "FER" # error_rate: 5 # c21: # type: evs # bitrates: # - 24400 # cod: # opts: ["-max_band", "FB"] # dec: # tx: # type: "FER" # error_rate: 5 # c22: # type: evs # bitrates: # - 32000 # cod: # opts: ["-max_band", "FB"] # dec: # tx: # type: "FER" # error_rate: 5 # ### IVAS condition ############################### # c23: # type: ivas # bitrates: # - 13200 # cod: # dec: # fmt: "STEREO" # c24: # type: ivas # bitrates: # - 16400 # cod: # dec: # fmt: "STEREO" # c25: # type: ivas # bitrates: # - 24400 # cod: # dec: # fmt: "STEREO" # c26: # type: ivas # bitrates: # - 32000 # cod: # dec: # fmt: "STEREO" # c27: # type: ivas # bitrates: # - 48000 # cod: # dec: # fmt: "STEREO" # c28: # type: ivas # bitrates: # - 13200 # cod: # dec: # fmt: "STEREO" # tx: # type: "FER" # error_rate: 5 # c29: # type: ivas # bitrates: # - 16400 # cod: # dec: # fmt: "STEREO" # tx: # type: "FER" # error_rate: 5 # c30: # type: ivas # bitrates: # - 24400 # cod: # dec: # fmt: "STEREO" # tx: # type: "FER" # error_rate: 5 # c31: # type: ivas # bitrates: # - 32000 # cod: # dec: # fmt: "STEREO" # tx: # type: "FER" # error_rate: 5 # c32: # type: ivas # bitrates: # - 48000 # cod: # dec: # fmt: "STEREO" # tx: # type: "FER" # error_rate: 5 # c33: # type: ivas # bitrates: # - 24400 # cod: # opts: ["-dtx"] # dec: # fmt: "STEREO" c34: type: ivas bitrates: - 13200 cod: fmt: "MASA2" opts: ["-dtx"] dec: fmt: "MASA2" tx: type: "FER" error_rate: 5 ################################################ ### Post-processing ################################################ postprocessing: fmt: "BINAURAL" fs: 48000 loudness: -26 ivas_processing_scripts/audiotools/convert/__init__.py +14 −2 Original line number Diff line number Diff line Loading @@ -33,6 +33,7 @@ import logging from pathlib import Path, PurePath from typing import Optional, Union from numpy import empty from ivas_processing_scripts.audiotools import audio, audioarray, metadata from ivas_processing_scripts.audiotools.audiofile import write Loading Loading @@ -72,6 +73,9 @@ def convert_file( if not isinstance(in_fmt, PurePath) and in_fmt.startswith("META"): input = metadata.Metadata(in_file) else: if in_fmt.startswith("MASA") and in_meta is None: # if no MD fileis provided, default to name (including .wav or .pcm!!!) + ".met" in_meta = [in_file.parent / (in_file.name + ".met")] input = audio.fromfile(in_fmt, in_file, in_fs, in_meta) # try to set reasonable defaults if missing Loading @@ -89,6 +93,14 @@ def convert_file( out_fmt = input.name output = audio.fromtype(out_fmt) if isinstance(output, audio.MetadataAssistedSpatialAudio): # create dummy audio array to allow inference of MASA mode num_tcs = int(output.name[-1]) output.audio = empty((1, num_tcs)) # fabricate metadata file name output.metadata_files = [Path(out_file).with_suffix(".met")] if isinstance(output, audio.ObjectBasedAudio): try: output.object_pos = input.object_pos Loading Loading @@ -291,8 +303,8 @@ def format_conversion( """Convert one audio format to another""" # validation if isinstance(output, audio.MetadataAssistedSpatialAudio): raise NotImplementedError("MASA is not supported as an output for rendering!") if isinstance(output, audio.MetadataAssistedSpatialAudio) and not isinstance(input, audio.SceneBasedAudio): raise NotImplementedError("Can only convert to MASA from SBA") if isinstance(output, audio.ObjectBasedAudio) and input.name != output.name: raise NotImplementedError( Loading ivas_processing_scripts/audiotools/convert/scenebased.py +21 −0 Original line number Diff line number Diff line Loading @@ -53,6 +53,7 @@ from ivas_processing_scripts.audiotools.convert.binaural import binaural_fftconv from ivas_processing_scripts.audiotools.EFAP import EFAP from ivas_processing_scripts.audiotools.rotation import Quat2RotMat, SHrotmatgen from ivas_processing_scripts.audiotools.wrappers.filter import resample_itu from ivas_processing_scripts.audiotools.wrappers.masaAnalyzer import masaAnalyzer """ SceneBasedAudio functions """ Loading @@ -75,6 +76,11 @@ def convert_scenebased( # SBA -> SBA elif isinstance(out, audio.SceneBasedAudio): render_sba_to_sba(sba, out) # SBA -> MASA elif isinstance(out, audio.MetadataAssistedSpatialAudio) and sba.name == "FOA": render_sba_to_masa(sba, out) else: raise NotImplementedError( f"Conversion from {sba.name} to {out.name} is unsupported!" Loading Loading @@ -177,6 +183,21 @@ def render_sba_to_sba( zero_vert_channels(sba_out) def render_sba_to_masa( sba_in: audio.SceneBasedAudio, masa_out: audio.MetadataAssistedSpatialAudio, ) -> None: assert sba_in.name == "FOA" # two dir only possible from HOA2 num_dirs = 1 num_tcs = masa_out.audio.shape[1] md_out_path = masa_out.metadata_files[0] masa = masaAnalyzer(sba_in, num_tcs, num_dirs, md_out_path) masa_out.audio = masa.audio def rotate_sba( sba: audio.SceneBasedAudio, trajectory: str, Loading ivas_processing_scripts/audiotools/wrappers/masaRenderer.py +1 −1 Original line number Diff line number Diff line Loading @@ -87,7 +87,7 @@ def masaRenderer( str(binary), output_mode, "", # 2 -> inputPcm str(masa.metadata_files.resolve()), str(masa.metadata_file.resolve()), "", # 4 -> outputPcm ] Loading ivas_processing_scripts/processing/chains.py +15 −0 Original line number Diff line number Diff line Loading @@ -360,6 +360,21 @@ def get_processing_chain( else: preamble = 0 # if the encoding format differs from the format after the preprocessing, add format conversion stuff if tmp_in_fmt != cod_cfg["fmt"]: chain["processes"].append( Preprocessing( { "in_fs": tmp_in_fs, "in_fmt": tmp_in_fmt, "out_fs": tmp_in_fs, "out_fmt": cod_cfg["fmt"], "multiprocessing": cfg.multiprocessing, } ) ) tmp_in_fmt = cod_cfg["fmt"] chain["processes"].append( IVAS( { Loading Loading
experiments/selection/P800-8/config/P800-8.yml 0 → 100644 +307 −0 Original line number Diff line number Diff line --- ################################################ # General configuration ################################################ name: P800-8 master_seed: 5 prerun_seed: 2 multiprocessing: false input_path: "experiments/selection/P800-8/proc_input" output_path: "experiments/selection/P800-8/proc_output" ################################################ ### Input configuration ################################################ input: fmt: "FOA" fs: 48000 ################################################ ### Pre-processing on individual items ################################################ preprocessing: mask: "HP50" loudness: -26 window: 100 ################################################ ### Pre-processing on whole signal(s) ################################################ preprocessing_2: concatenate_input: true # concatenation_order: [] preamble: 10000 preamble_noise: true ################################################# ### Bitstream processing ################################################# ################################################ ### Configuration for conditions under test ################################################ conditions_to_generate: ### Reference and anchor conditions ########################## # c01: # type: ref # c02: # type: mnru # q: 28 # c03: # type: mnru # q: 24 # c04: # type: mnru # q: 20 # c05: # type: mnru # q: 16 # c06: # type: esdru # alpha: 0.7 # c07: # type: esdru # alpha: 0.4 # c08: # type: esdru # alpha: 0.1 # ### EVS condition ################################ # c09: # type: evs # bitrates: # - 7200 # cod: # opts: ["-max_band", "FB"] # dec: # c10: # type: evs # bitrates: # - 8000 # cod: # opts: ["-max_band", "FB"] # dec: # c11: # type: evs # bitrates: # - 9600 # cod: # opts: ["-max_band", "FB"] # dec: # c12: # type: evs # bitrates: # - 13200 # cod: # opts: ["-max_band", "FB"] # dec: # c13: # type: evs # bitrates: # - 16400 # cod: # opts: ["-max_band", "FB"] # dec: # c14: # type: evs # bitrates: # - 24400 # cod: # opts: ["-max_band", "FB"] # dec: # c15: # type: evs # bitrates: # - 32000 # cod: # opts: ["-max_band", "FB"] # dec: # c16: # type: evs # bitrates: # - 7200 # cod: # opts: ["-max_band", "FB"] # dec: # tx: # type: "FER" # error_rate: 5 # c17: # type: evs # bitrates: # - 8000 # cod: # opts: ["-max_band", "FB"] # dec: # tx: # type: "FER" # error_rate: 5 # c18: # type: evs # bitrates: # - 9600 # cod: # opts: ["-max_band", "FB"] # dec: # tx: # type: "FER" # error_rate: 5 # c19: # type: evs # bitrates: # - 13200 # cod: # opts: ["-max_band", "FB"] # dec: # tx: # type: "FER" # error_rate: 5 # c20: # type: evs # bitrates: # - 16400 # cod: # opts: ["-max_band", "FB"] # dec: # tx: # type: "FER" # error_rate: 5 # c21: # type: evs # bitrates: # - 24400 # cod: # opts: ["-max_band", "FB"] # dec: # tx: # type: "FER" # error_rate: 5 # c22: # type: evs # bitrates: # - 32000 # cod: # opts: ["-max_band", "FB"] # dec: # tx: # type: "FER" # error_rate: 5 # ### IVAS condition ############################### # c23: # type: ivas # bitrates: # - 13200 # cod: # dec: # fmt: "STEREO" # c24: # type: ivas # bitrates: # - 16400 # cod: # dec: # fmt: "STEREO" # c25: # type: ivas # bitrates: # - 24400 # cod: # dec: # fmt: "STEREO" # c26: # type: ivas # bitrates: # - 32000 # cod: # dec: # fmt: "STEREO" # c27: # type: ivas # bitrates: # - 48000 # cod: # dec: # fmt: "STEREO" # c28: # type: ivas # bitrates: # - 13200 # cod: # dec: # fmt: "STEREO" # tx: # type: "FER" # error_rate: 5 # c29: # type: ivas # bitrates: # - 16400 # cod: # dec: # fmt: "STEREO" # tx: # type: "FER" # error_rate: 5 # c30: # type: ivas # bitrates: # - 24400 # cod: # dec: # fmt: "STEREO" # tx: # type: "FER" # error_rate: 5 # c31: # type: ivas # bitrates: # - 32000 # cod: # dec: # fmt: "STEREO" # tx: # type: "FER" # error_rate: 5 # c32: # type: ivas # bitrates: # - 48000 # cod: # dec: # fmt: "STEREO" # tx: # type: "FER" # error_rate: 5 # c33: # type: ivas # bitrates: # - 24400 # cod: # opts: ["-dtx"] # dec: # fmt: "STEREO" c34: type: ivas bitrates: - 13200 cod: fmt: "MASA2" opts: ["-dtx"] dec: fmt: "MASA2" tx: type: "FER" error_rate: 5 ################################################ ### Post-processing ################################################ postprocessing: fmt: "BINAURAL" fs: 48000 loudness: -26
ivas_processing_scripts/audiotools/convert/__init__.py +14 −2 Original line number Diff line number Diff line Loading @@ -33,6 +33,7 @@ import logging from pathlib import Path, PurePath from typing import Optional, Union from numpy import empty from ivas_processing_scripts.audiotools import audio, audioarray, metadata from ivas_processing_scripts.audiotools.audiofile import write Loading Loading @@ -72,6 +73,9 @@ def convert_file( if not isinstance(in_fmt, PurePath) and in_fmt.startswith("META"): input = metadata.Metadata(in_file) else: if in_fmt.startswith("MASA") and in_meta is None: # if no MD fileis provided, default to name (including .wav or .pcm!!!) + ".met" in_meta = [in_file.parent / (in_file.name + ".met")] input = audio.fromfile(in_fmt, in_file, in_fs, in_meta) # try to set reasonable defaults if missing Loading @@ -89,6 +93,14 @@ def convert_file( out_fmt = input.name output = audio.fromtype(out_fmt) if isinstance(output, audio.MetadataAssistedSpatialAudio): # create dummy audio array to allow inference of MASA mode num_tcs = int(output.name[-1]) output.audio = empty((1, num_tcs)) # fabricate metadata file name output.metadata_files = [Path(out_file).with_suffix(".met")] if isinstance(output, audio.ObjectBasedAudio): try: output.object_pos = input.object_pos Loading Loading @@ -291,8 +303,8 @@ def format_conversion( """Convert one audio format to another""" # validation if isinstance(output, audio.MetadataAssistedSpatialAudio): raise NotImplementedError("MASA is not supported as an output for rendering!") if isinstance(output, audio.MetadataAssistedSpatialAudio) and not isinstance(input, audio.SceneBasedAudio): raise NotImplementedError("Can only convert to MASA from SBA") if isinstance(output, audio.ObjectBasedAudio) and input.name != output.name: raise NotImplementedError( Loading
ivas_processing_scripts/audiotools/convert/scenebased.py +21 −0 Original line number Diff line number Diff line Loading @@ -53,6 +53,7 @@ from ivas_processing_scripts.audiotools.convert.binaural import binaural_fftconv from ivas_processing_scripts.audiotools.EFAP import EFAP from ivas_processing_scripts.audiotools.rotation import Quat2RotMat, SHrotmatgen from ivas_processing_scripts.audiotools.wrappers.filter import resample_itu from ivas_processing_scripts.audiotools.wrappers.masaAnalyzer import masaAnalyzer """ SceneBasedAudio functions """ Loading @@ -75,6 +76,11 @@ def convert_scenebased( # SBA -> SBA elif isinstance(out, audio.SceneBasedAudio): render_sba_to_sba(sba, out) # SBA -> MASA elif isinstance(out, audio.MetadataAssistedSpatialAudio) and sba.name == "FOA": render_sba_to_masa(sba, out) else: raise NotImplementedError( f"Conversion from {sba.name} to {out.name} is unsupported!" Loading Loading @@ -177,6 +183,21 @@ def render_sba_to_sba( zero_vert_channels(sba_out) def render_sba_to_masa( sba_in: audio.SceneBasedAudio, masa_out: audio.MetadataAssistedSpatialAudio, ) -> None: assert sba_in.name == "FOA" # two dir only possible from HOA2 num_dirs = 1 num_tcs = masa_out.audio.shape[1] md_out_path = masa_out.metadata_files[0] masa = masaAnalyzer(sba_in, num_tcs, num_dirs, md_out_path) masa_out.audio = masa.audio def rotate_sba( sba: audio.SceneBasedAudio, trajectory: str, Loading
ivas_processing_scripts/audiotools/wrappers/masaRenderer.py +1 −1 Original line number Diff line number Diff line Loading @@ -87,7 +87,7 @@ def masaRenderer( str(binary), output_mode, "", # 2 -> inputPcm str(masa.metadata_files.resolve()), str(masa.metadata_file.resolve()), "", # 4 -> outputPcm ] Loading
ivas_processing_scripts/processing/chains.py +15 −0 Original line number Diff line number Diff line Loading @@ -360,6 +360,21 @@ def get_processing_chain( else: preamble = 0 # if the encoding format differs from the format after the preprocessing, add format conversion stuff if tmp_in_fmt != cod_cfg["fmt"]: chain["processes"].append( Preprocessing( { "in_fs": tmp_in_fs, "in_fmt": tmp_in_fmt, "out_fs": tmp_in_fs, "out_fmt": cod_cfg["fmt"], "multiprocessing": cfg.multiprocessing, } ) ) tmp_in_fmt = cod_cfg["fmt"] chain["processes"].append( IVAS( { Loading