From fc5299ff77d9ed8ede4075f3b39bb5bc33a2c76f Mon Sep 17 00:00:00 2001 From: Archit Tamarapu Date: Mon, 28 Jul 2025 15:33:36 +0200 Subject: [PATCH 1/3] add per-item trajectory and render config search --- README.md | 11 +++++- examples/TEMPLATE.yml | 39 ++++++++++++------- ivas_processing_scripts/audiotools/audio.py | 1 - .../audiotools/convert/__init__.py | 2 +- ivas_processing_scripts/audiotools/utils.py | 29 ++++++++++++++ ivas_processing_scripts/processing/chains.py | 24 +++++++++++- ivas_processing_scripts/processing/ivas.py | 24 +++++++++++- .../processing/postprocessing.py | 16 ++++++++ 8 files changed, 126 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index e508ad3a..02f00dde 100755 --- a/README.md +++ b/README.md @@ -435,6 +435,13 @@ conditions_to_generate: # fs: 48000 ### Additional commandline options; default = null # opts: ["-q", "-no_delay_cmp"] + ### Per-item renderer configuration. Set to true to search for a file with suffix .cfg; default = false + # render_config: true + ### Head-tracking trajectory file for binaural output OR 'true' which will search for a file with the suffix .ht.csv next to the input; default = null + ### NOTE: this automatically configures the '-T' argument to the decoder, so may conflict if also specified in `opts` + # trajectory: "path/to/file" + ### Limit the trajectory to 3DoF via truncation; default = false + # only_3dof: false ### IVAS condition ############################### c07: @@ -520,8 +527,10 @@ postprocessing: # bin_lfe_gain: 1 ### Flag whether output should be limited to avoid clipping (can alter target loudness); default = true # limit: false - ### Head-tracking trajectory file for binaural output; default = null + ### Head-tracking trajectory file for binaural output OR 'true' which will search for a file with the suffix .ht.csv in the input dir; default = null # trajectory: "path/to/file" + ### Limit the trajectory to 3DoF via truncation; default = false + # only_3dof: false ``` diff --git a/examples/TEMPLATE.yml b/examples/TEMPLATE.yml index 72fcd82c..6ad83e5a 100755 --- a/examples/TEMPLATE.yml +++ b/examples/TEMPLATE.yml @@ -290,6 +290,13 @@ conditions_to_generate: # fs: 48000 ### Additional commandline options; default = null # opts: ["-q", "-no_delay_cmp"] + ### Per-item renderer configuration. Set to true to search for a file with suffix .cfg in the input dir; default = false + # render_config: true + ### Head-tracking trajectory file for binaural output OR 'true' which will search for a file with the suffix .ht.csv in the input dir; default = null + ### NOTE: this automatically configures the '-T' argument to the decoder, so may conflict if also specified in `opts` + # trajectory: "path/to/file" + ### Limit the trajectory to 3DoF via truncation; default = false + # only_3dof: false ### Bitstream options # tx: ### For possible arguments see overall bitstream modification @@ -339,18 +346,20 @@ postprocessing: ### Gain factor to be applied AFTER any other processing (linear, or add dB suffix) # gain_post: 3.1622776602 ### Low-pass cut-off frequency in Hz; default = null (no filtering) - # lp_cutoff: 24000 - ### Target loudness in LKFS; default = null (no loudness change applied) - # loudness: -26 - ### Spatial audio format in which loudness is adjusted (only used if preprocessing loudness is not null); - ### default = null (uses postprocessing fmt if possible) - # loudness_fmt: null - ### Name of custom binaural dataset (without prefix or suffix); - ### default = null (ORANGE53(_Dolby) for BINAURAL, IISofficialMPEG222UC for BINAURAL_ROOM) - # bin_dataset: SADIE - ### Render LFE to binaural output with the specified gain (only valid for channel-based input); default = null - # bin_lfe_gain: 1 - ### Flag whether output should be limited to avoid clipping (can alter target loudness); default = false - # limit: true - ### Head-tracking trajectory file for binaural output; default = null - # trajectory: "path/to/file" +# lp_cutoff: 24000 +### Target loudness in LKFS; default = null (no loudness change applied) +# loudness: -26 +### Spatial audio format in which loudness is adjusted (only used if preprocessing loudness is not null); +### default = null (uses postprocessing fmt if possible) +# loudness_fmt: null +### Name of custom binaural dataset (without prefix or suffix); +### default = null (ORANGE53(_Dolby) for BINAURAL, IISofficialMPEG222UC for BINAURAL_ROOM) +# bin_dataset: SADIE +### Render LFE to binaural output with the specified gain (only valid for channel-based input); default = null +# bin_lfe_gain: 1 +### Flag whether output should be limited to avoid clipping (can alter target loudness); default = false +# limit: true +### Head-tracking trajectory file for binaural output OR 'true' which will search for a file with the suffix .ht.csv in the input dir; default = null +# trajectory: "path/to/file" +### Limit the trajectory to 3DoF via truncation; default = false +# only_3dof: false diff --git a/ivas_processing_scripts/audiotools/audio.py b/ivas_processing_scripts/audiotools/audio.py index a38084d4..b7274546 100755 --- a/ivas_processing_scripts/audiotools/audio.py +++ b/ivas_processing_scripts/audiotools/audio.py @@ -63,7 +63,6 @@ class Audio(ABC): self.audio = None self.fs = None self.num_channels = None - # self.logger = None # TODO needed? def __repr__(self): return f"{self.__class__} : {self.__dict__}" diff --git a/ivas_processing_scripts/audiotools/convert/__init__.py b/ivas_processing_scripts/audiotools/convert/__init__.py index 691ca1da..d279c561 100755 --- a/ivas_processing_scripts/audiotools/convert/__init__.py +++ b/ivas_processing_scripts/audiotools/convert/__init__.py @@ -368,7 +368,7 @@ def process_audio( if loudness is not None: if logger: logger.debug( - f"Applying loudness adjustment to {loudness} LKFS for format {loudness_fmt} using ITU STL bs1770demo" + f"Applying loudness adjustment to {loudness} LKFS for format {x.name} (measured using {loudness_fmt}) using ITU STL bs1770demo" ) x.audio, _ = loudness_norm(x, loudness, loudness_fmt, logger=logger) diff --git a/ivas_processing_scripts/audiotools/utils.py b/ivas_processing_scripts/audiotools/utils.py index 1233f949..610d5d9f 100755 --- a/ivas_processing_scripts/audiotools/utils.py +++ b/ivas_processing_scripts/audiotools/utils.py @@ -31,10 +31,12 @@ # from pathlib import Path +from typing import Union import numpy as np from ivas_processing_scripts.audiotools.rotation import Euler2Quat, Quat2Euler +from ivas_processing_scripts.utils import get_abs_path def read_trajectory(trj_file: Path, return_quat=True): @@ -69,3 +71,30 @@ def write_trajectory(trj, out_file, write_quat=True): for pos in trj: f.write(", ".join([f"{q:.6f}" for q in pos])) f.write("\n") + + +def get_trajectory_or_dir( + trj: Union[str, Path, bool], input_path: Path +) -> tuple[Path, Path]: + trajectory = None + trajectory_dir = None + if isinstance(trj, bool): + trajectory_dir = input_path + else: + trajectory = get_abs_path(trj) + + return trajectory, trajectory_dir + + +def truncate_trajectory_3dof( + in_file: Path, + out_file: Path, +) -> None: + + data = np.genfromtxt(in_file, delimiter=",") + data = data[:, :4] + + with open(out_file, "w") as f: + for pos in data: + f.write(", ".join([f"{q:.6f}" for q in pos])) + f.write("\n") diff --git a/ivas_processing_scripts/processing/chains.py b/ivas_processing_scripts/processing/chains.py index 6ee222dc..1d08a7cd 100755 --- a/ivas_processing_scripts/processing/chains.py +++ b/ivas_processing_scripts/processing/chains.py @@ -38,6 +38,7 @@ from warnings import warn from ivas_processing_scripts.audiotools import audio from ivas_processing_scripts.audiotools.audioarray import trim from ivas_processing_scripts.audiotools.audiofile import read, write +from ivas_processing_scripts.audiotools.utils import get_trajectory_or_dir from ivas_processing_scripts.processing.config import TestConfig from ivas_processing_scripts.processing.evs import EVS from ivas_processing_scripts.processing.ivas import IVAS, IVAS_rend @@ -431,6 +432,15 @@ def get_processing_chain( cond_fmt.extend(tmp_out_fmt) tmp_out_fmt = tmp_out_fmt[0] + # check if trajectory is a path or boolean, boolean will trigger a search in the input dir + trajectory, trajectory_dir = get_trajectory_or_dir( + dec_cfg.get("trajectory"), cfg.input_path + ) + + # check if render config search is enabled + render_config = dec_cfg.get("render_config") + render_config_dir = cfg.output_path if render_config else None + ivas_cls = IVAS if cond_cfg["type"] == "ivas_combined": ivas_cls = IVASCombined @@ -447,6 +457,11 @@ def get_processing_chain( "dec_bin": get_abs_path(dec_cfg.get("bin", None)), "dec_opts": dec_cfg.get("opts"), "extended_metadata": cod_cfg.get("extended_metadata", False), + "trajectory": trajectory, + "trajectory_dir": trajectory_dir, + "only_3dof": dec_cfg.get("only_3dof"), + "render_config": render_config, + "render_config_dir": render_config_dir, "multiprocessing": cfg.multiprocessing, "tx": tx_cfg, "preamble": preamble, @@ -515,6 +530,11 @@ def get_processing_chain( if cond_cfg.get("out_fc") is not None: tmp_lp_cutoff = cond_cfg.get("out_fc") + # check if trajectory is a path or boolean, boolean will trigger a search in the input dir + trajectory, trajectory_dir = get_trajectory_or_dir( + post_cfg.get("trajectory"), cfg.input_path + ) + chain["processes"].append( Postprocessing( { @@ -528,7 +548,9 @@ def get_processing_chain( "bin_dataset": post_cfg.get("bin_dataset"), "bin_lfe_gain": parse_gain(post_cfg.get("bin_lfe_gain")), "limit": post_cfg.get("limit", True), - "trajectory": get_abs_path(post_cfg.get("trajectory", None)), + "trajectory": trajectory, + "trajectory_dir": trajectory_dir, + "only_3dof": post_cfg.get("only_3dof"), "multiprocessing": cfg.multiprocessing, "mnru_q": tmp_mnru_q, "esdru_alpha": tmp_esdru_alpha, diff --git a/ivas_processing_scripts/processing/ivas.py b/ivas_processing_scripts/processing/ivas.py index 22f285cb..8bf668bb 100755 --- a/ivas_processing_scripts/processing/ivas.py +++ b/ivas_processing_scripts/processing/ivas.py @@ -39,6 +39,7 @@ from typing import Optional, Tuple, Union from ivas_processing_scripts.audiotools import audio from ivas_processing_scripts.audiotools.audiofile import parse_wave_header, read from ivas_processing_scripts.audiotools.constants import IVAS_FRAME_LEN_MS +from ivas_processing_scripts.audiotools.utils import truncate_trajectory_3dof from ivas_processing_scripts.audiotools.wrappers.eid_xor import ( create_and_apply_error_pattern, validate_error_pattern_application, @@ -290,9 +291,30 @@ class IVAS(Processing): if self._use_wine: cmd.insert(0, "wine") - if hasattr(self, "trajectory"): + # search for a trajectory file if the trajectory dir is specified + if getattr(self, "trajectory_dir", None): + trj_name = out_file.name.split(".")[0] + self.trajectory = self.trajectory_dir.joinpath(f"{trj_name}.wav.ht.csv") + if not self.trajectory.exists(): + raise FileNotFoundError( + f"Trajectory file {self.trajectory} not found! Please check the configuration" + ) + + # truncate to 3DoF if needed + if getattr(self, "only_3dof", None): + trj_3dof = out_file.with_suffix(".3dof.ht.csv") + truncate_trajectory_3dof(self.trajectory, trj_3dof) + self.trajectory = trj_3dof + + if getattr(self, "trajectory", None): cmd.extend(["-T", self.trajectory]) + # add renderer config if specified + if getattr(self, "render_config", None): + rend_cfg_name = out_file.name.split(".")[0] + rend_cfg_file = self.render_config_dir.joinpath(f"{rend_cfg_name}.wav.cfg") + cmd.extend(["-render_config", rend_cfg_file]) + # add -voip cmdline option to the decoder if voip: cmd.extend( diff --git a/ivas_processing_scripts/processing/postprocessing.py b/ivas_processing_scripts/processing/postprocessing.py index a440518a..b7a47002 100755 --- a/ivas_processing_scripts/processing/postprocessing.py +++ b/ivas_processing_scripts/processing/postprocessing.py @@ -34,6 +34,7 @@ import logging from pathlib import Path from ivas_processing_scripts.audiotools import convert +from ivas_processing_scripts.audiotools.utils import truncate_trajectory_3dof from ivas_processing_scripts.processing.processing import Processing @@ -49,6 +50,21 @@ class Postprocessing(Processing): logger.debug(f"Postprocessing configuration : {self.__dict__}") logger.debug(f"Postprocessing {in_file.absolute()} -> {out_file.absolute()}") + # search for a trajectory file if the trajectory is not given, but the dir is specified + if getattr(self, "trajectory_dir", None): + trj_name = out_file.name.split(".")[0] + self.trajectory = self.trajectory_dir.joinpath(f"{trj_name}.wav.ht.csv") + if not self.trajectory.exists(): + raise FileNotFoundError( + f"Trajectory file {self.trajectory} not found! Please check the configuration" + ) + + # truncate trajectory to 3DoF if needed + if getattr(self, "only_3dof", None): + trj_3dof = out_file.with_suffix("_3dof.csv") + truncate_trajectory_3dof(self.trajectory, trj_3dof) + self.trajectory = trj_3dof + convert.convert_file( in_file, out_file, logger=logger, in_meta=in_meta, **self.__dict__ ) -- GitLab From 7c5744d9968c9be37930126709350330d45fc6ae Mon Sep 17 00:00:00 2001 From: Archit Tamarapu Date: Mon, 28 Jul 2025 15:35:01 +0200 Subject: [PATCH 2/3] formatting --- ivas_processing_scripts/audiotools/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ivas_processing_scripts/audiotools/utils.py b/ivas_processing_scripts/audiotools/utils.py index 610d5d9f..c8c5e94f 100755 --- a/ivas_processing_scripts/audiotools/utils.py +++ b/ivas_processing_scripts/audiotools/utils.py @@ -90,7 +90,6 @@ def truncate_trajectory_3dof( in_file: Path, out_file: Path, ) -> None: - data = np.genfromtxt(in_file, delimiter=",") data = data[:, :4] -- GitLab From 5390e1d03f7a47fe4a1ba0f79b8f1c504826a46a Mon Sep 17 00:00:00 2001 From: Archit Tamarapu Date: Thu, 7 Aug 2025 13:10:10 +0200 Subject: [PATCH 3/3] [fix] wrong path for render config search --- ivas_processing_scripts/processing/chains.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ivas_processing_scripts/processing/chains.py b/ivas_processing_scripts/processing/chains.py index 1d08a7cd..aeaaa11f 100755 --- a/ivas_processing_scripts/processing/chains.py +++ b/ivas_processing_scripts/processing/chains.py @@ -439,7 +439,7 @@ def get_processing_chain( # check if render config search is enabled render_config = dec_cfg.get("render_config") - render_config_dir = cfg.output_path if render_config else None + render_config_dir = cfg.input_path if render_config else None ivas_cls = IVAS if cond_cfg["type"] == "ivas_combined": -- GitLab