From de7bbee96b6b5368cee6db6fee486ebc9555dbf7 Mon Sep 17 00:00:00 2001 From: Archit Tamarapu Date: Wed, 6 Nov 2024 15:33:01 +0100 Subject: [PATCH 1/3] merge main with isar_selection --- .gitignore | 33 +-- README.md | 2 +- examples/TEMPLATE.yml | 6 +- examples/audiotools.ipynb | 89 ++++--- experiments/selection_isar/.gitkeep | 0 experiments/selection_isar/BS1534-1a/.gitkeep | 0 .../selection_isar/BS1534-1a/config/.gitkeep | 0 .../BS1534-1a/config/BS1534-1a.yml | 91 ++++++++ .../BS1534-1a/proc_input_a/.gitkeep | 0 .../BS1534-1a/proc_output_a/.gitkeep | 0 experiments/selection_isar/BS1534-2a/.gitkeep | 0 .../selection_isar/BS1534-2a/config/.gitkeep | 0 .../BS1534-2a/config/BS1534-2a.yml | 91 ++++++++ .../BS1534-2a/proc_input_a/.gitkeep | 0 .../BS1534-2a/proc_output_a/.gitkeep | 0 experiments/selection_isar/BS1534-3a/.gitkeep | 0 .../selection_isar/BS1534-3a/config/.gitkeep | 0 .../BS1534-3a/config/BS1534-3a.yml | 91 ++++++++ .../BS1534-3a/proc_input_a/.gitkeep | 0 .../BS1534-3a/proc_output_a/.gitkeep | 0 experiments/selection_isar/BS1534-4a/.gitkeep | 0 .../selection_isar/BS1534-4a/config/.gitkeep | 0 .../BS1534-4a/config/BS1534-4a.yml | 95 ++++++++ .../BS1534-4a/proc_input_a/.gitkeep | 0 .../BS1534-4a/proc_output_a/.gitkeep | 0 experiments/selection_isar/README.md | 86 +++++++ generate_test.py | 2 +- generate_test_isar.py | 161 +++++++++++++ ivas_processing_scripts/__init__.py | 27 ++- .../audiotools/__init__.py | 16 +- ivas_processing_scripts/audiotools/audio.py | 1 - .../audiotools/constants.py | 3 + .../audiotools/convert/__init__.py | 8 +- .../audiotools/convert/channelbased.py | 20 +- .../audiotools/convert/masa.py | 13 +- .../audiotools/convert/objectbased.py | 24 +- .../audiotools/convert/scenebased.py | 16 +- .../audiotools/quaternions.py | 143 ++++++++++++ ivas_processing_scripts/audiotools/utils.py | 2 +- ivas_processing_scripts/constants.py | 11 + ivas_processing_scripts/processing/chains.py | 219 ++++++++++++++++-- ivas_processing_scripts/processing/config.py | 77 ++++++ ivas_processing_scripts/processing/evs.py | 9 +- ivas_processing_scripts/processing/ivas.py | 134 +++++++++-- .../processing/postprocessing.py | 17 +- .../processing/preprocessing.py | 10 +- .../processing/preprocessing_2.py | 10 +- .../processing/processing.py | 15 +- .../processing_splitting_scaling.py | 10 +- .../trajectories/README.md | 62 +++++ .../trajectories/__init__.py | 124 ++++++++++ .../trajectories/__main__.py | 36 +++ .../trajectories/trajectories.py | 64 +++++ tests/data/test_ISM.yml | 2 +- tests/data/test_MASA.yml | 2 +- tests/data/test_MC.yml | 2 +- tests/data/test_SBA.yml | 2 +- tests/test_audiotools_convert.py | 2 +- 58 files changed, 1682 insertions(+), 146 deletions(-) create mode 100644 experiments/selection_isar/.gitkeep create mode 100644 experiments/selection_isar/BS1534-1a/.gitkeep create mode 100644 experiments/selection_isar/BS1534-1a/config/.gitkeep create mode 100644 experiments/selection_isar/BS1534-1a/config/BS1534-1a.yml create mode 100644 experiments/selection_isar/BS1534-1a/proc_input_a/.gitkeep create mode 100644 experiments/selection_isar/BS1534-1a/proc_output_a/.gitkeep create mode 100644 experiments/selection_isar/BS1534-2a/.gitkeep create mode 100644 experiments/selection_isar/BS1534-2a/config/.gitkeep create mode 100644 experiments/selection_isar/BS1534-2a/config/BS1534-2a.yml create mode 100644 experiments/selection_isar/BS1534-2a/proc_input_a/.gitkeep create mode 100644 experiments/selection_isar/BS1534-2a/proc_output_a/.gitkeep create mode 100644 experiments/selection_isar/BS1534-3a/.gitkeep create mode 100644 experiments/selection_isar/BS1534-3a/config/.gitkeep create mode 100644 experiments/selection_isar/BS1534-3a/config/BS1534-3a.yml create mode 100644 experiments/selection_isar/BS1534-3a/proc_input_a/.gitkeep create mode 100644 experiments/selection_isar/BS1534-3a/proc_output_a/.gitkeep create mode 100644 experiments/selection_isar/BS1534-4a/.gitkeep create mode 100644 experiments/selection_isar/BS1534-4a/config/.gitkeep create mode 100644 experiments/selection_isar/BS1534-4a/config/BS1534-4a.yml create mode 100644 experiments/selection_isar/BS1534-4a/proc_input_a/.gitkeep create mode 100644 experiments/selection_isar/BS1534-4a/proc_output_a/.gitkeep create mode 100644 experiments/selection_isar/README.md create mode 100755 generate_test_isar.py create mode 100755 ivas_processing_scripts/audiotools/quaternions.py create mode 100644 ivas_processing_scripts/trajectories/README.md create mode 100755 ivas_processing_scripts/trajectories/__init__.py create mode 100755 ivas_processing_scripts/trajectories/__main__.py create mode 100755 ivas_processing_scripts/trajectories/trajectories.py diff --git a/.gitignore b/.gitignore index 5d3d341c..2ac3217d 100644 --- a/.gitignore +++ b/.gitignore @@ -12,22 +12,23 @@ venv/ *.bs *.192 mc.double -experiments/selection/*/proc_input*/cat*/*.wav -experiments/selection/*/proc_input*/cat*/*.csv -experiments/selection/*/proc_input*/cat*/*.pcm -experiments/selection/*/proc_input*/FOA*/*.wav -experiments/selection/*/proc_input*/HOA2*/*.wav -experiments/selection/*/background_noise/*.wav -experiments/selection/*/background_noise/*.txt -experiments/selection/*/proc_input*/*.wav -experiments/selection/*/proc_input*/*.pcm -experiments/selection/*/proc_input*/*.csv -experiments/selection/*/proc_input*/*.log -experiments/selection/*/proc_input*/*.yml -experiments/selection/*/proc_output*/ -experiments/selection/*/config/*-lab_*.yml -experiments/selection/*/gen_input/IRs/*.wav -experiments/selection/*/gen_input/items_mono/*.wav +experiments/selection*/*/proc_input*/cat*/*.wav +experiments/selection*/*/proc_input*/cat*/*.csv +experiments/selection*/*/proc_input*/cat*/*.pcm +experiments/selection*/*/proc_input*/FOA*/*.wav +experiments/selection*/*/proc_input*/HOA2*/*.wav +experiments/selection*/*/background_noise/*.wav +experiments/selection*/*/background_noise/*.txt +experiments/selection*/*/proc_input*/*.wav +experiments/selection*/*/proc_input*/*.pcm +experiments/selection*/*/proc_input*/*.csv +experiments/selection*/*/proc_input*/*.log +experiments/selection*/*/proc_input*/*.yml +experiments/selection*/*/proc_output*/ +experiments/selection*/*/config/*-lab_*.yml +experiments/selection*/*/config/*.csv +experiments/selection*/*/gen_input/IRs/*.wav +experiments/selection*/*/gen_input/items_mono/*.wav *~ tests/temp_output_* tests/cut diff --git a/README.md b/README.md index 4ee16860..3fe7971d 100755 --- a/README.md +++ b/README.md @@ -501,7 +501,7 @@ postprocessing: ### Flag whether output should be limited to avoid clipping (can alter target loudness); default = true # limit: false ### Head-tracking trajectory file for binaural output; default = null - # trajectory: "path/to/file" + # out_trajectory: "path/to/file" ``` diff --git a/examples/TEMPLATE.yml b/examples/TEMPLATE.yml index b09e327d..89d03ad3 100755 --- a/examples/TEMPLATE.yml +++ b/examples/TEMPLATE.yml @@ -181,7 +181,7 @@ conditions_to_generate: type: ref ### optional low-pass cut-off frequency in Hz; default = null # out_fc: 22500 - ### optional use of IVAS_rend (can be used in all conditions) + ### optional use of IVAS_rend for post rendering (can be used in all conditions) # ivas_rend: ### Path to renderer binary; default search for IVAS_rend in bin folder (primary) and PATH (secondary) # bin: ~/git/ivas-codec/IVAS_rend @@ -232,7 +232,7 @@ conditions_to_generate: # fs: 48000 ### Additional commandline options; default = null # opts: ["-q", "-no_delay_cmp"] - ### optional use of IVAS_rend (can be used in all conditions) + ### optional use of IVAS_rend for post rendering (can be used in all conditions) # ivas_rend: ### Path to renderer binary; default search for IVAS_rend in bin folder (primary) and PATH (secondary) # bin: ~/git/ivas-codec/IVAS_rend @@ -328,5 +328,3 @@ postprocessing: # bin_lfe_gain: 1 ### Flag whether output should be limited to avoid clipping (can alter target loudness); default = false # limit: true - ### Head-tracking trajectory file for binaural output; default = null - # trajectory: "path/to/file" diff --git a/examples/audiotools.ipynb b/examples/audiotools.ipynb index d6ac38ad..10c8be57 100755 --- a/examples/audiotools.ipynb +++ b/examples/audiotools.ipynb @@ -9,7 +9,7 @@ "\n", "The audiotools module can be used via the CLI for performing rendering of audio files or used as a library by importing the functions in a python script.\n", "\n", - "This notebook contains a few commandline examples and a brief example of how to use the functions in an interactive python session (like this notebook) which can be also similarly used in a standalone python script." + "This notebook contains a few commandline examples and a brief example of how to use the functions in an interactive python session (like this notebook) which can be also similarly used in a standalone python script.\n" ] }, { @@ -17,22 +17,22 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "\n", "# Command-line interface / renderer\n", "\n", "The CLI can be used by running the python module: `python -m ivas_processing_scripts.audiotools --help`.\n", + "\n", "
\n", "Click to expand...\n", "\n", "```bash\n", "❯ python -m ivas_processing_scripts.audiotools --help\n", - "usage: __main__.py [-h] -i INPUT -if IN_FMT [-is IN_FS] [-ifc IN_CUTOFF] [-ihp] [-iw IN_WINDOW] [-it PRE_TRIM POST_TRIM] [-ipn] [-id IN_DELAY] [-il IN_LOUDNESS] [-inf IN_LOUDNESS_FMT]\n", - " [-im IN_META [IN_META ...]] -o OUTPUT [-of OUT_FMT] [-os OUT_FS] [-ofc OUT_CUTOFF] [-ohp] [-ow OUT_WINDOW] [-ot PRE_TRIM POST_TRIM] [-opn] [-od OUT_DELAY] [-ol OUT_LOUDNESS]\n", - " [-onf OUT_LOUDNESS_FMT] [-lm] [-t TRAJECTORY] [-bd BIN_DATASET] [-bl BIN_LFE_GAIN] [-l] [-L] [-mp]\n", + "usage: __main__.py [-h] -i INPUT -if IN_FMT [-is IN_FS] [-ifc IN_CUTOFF] [-imk IN_MASK] [-iw IN_WINDOW] [-ix PRE_TRIM POST_TRIM] [-it IN_TRAJECTORY] [-ipn] [-id IN_DELAY] [-il IN_LOUDNESS] [-inf IN_LOUDNESS_FMT] [-im IN_META [IN_META ...]] -o OUTPUT\n", + " [-of OUT_FMT] [-os OUT_FS] [-ofc OUT_CUTOFF] [-omk OUT_MASK] [-ow OUT_WINDOW] [-ox PRE_TRIM POST_TRIM] [-ot OUT_TRAJECTORY] [-opn] [-od OUT_DELAY] [-ol OUT_LOUDNESS] [-onf OUT_LOUDNESS_FMT] [-lm] [-bd BIN_DATASET] [-bl BIN_LFE_GAIN]\n", + " [-mnru MNRU_Q] [-esdru ESDRU_ALPHA] [-l] [-L] [-mp]\n", "\n", "Audiotools: Convert/Manipulate spatial audio files.\n", "\n", - "options:\n", + "optional arguments:\n", " -h, --help show this help message and exit\n", "\n", "Input (pre-) processing options:\n", @@ -43,11 +43,14 @@ " Sampling rate (Hz) (deduced for .wav input, same as input if output not specified, default = 48000)\n", " -ifc IN_CUTOFF, --in_cutoff IN_CUTOFF\n", " Cut-off frequency for low-pass filtering (default = None)\n", - " -ihp, --in_hp50 Apply 50 Hz high-pass filtering (default = False)\n", + " -imk IN_MASK, --in_mask IN_MASK\n", + " Apply filtering with mask (HP50, 20KBP or None; default = None)\n", " -iw IN_WINDOW, --in_window IN_WINDOW\n", " Window the start/end of the signal by this amount in milliseconds (default = None)\n", - " -it PRE_TRIM POST_TRIM, --in_trim PRE_TRIM POST_TRIM\n", + " -ix PRE_TRIM POST_TRIM, --in_trim PRE_TRIM POST_TRIM\n", " Pre-/post-trim the signal by this amount in milliseconds (negative values pad silence), (default = None)\n", + " -it IN_TRAJECTORY, --in_trajectory IN_TRAJECTORY\n", + " Head-tracking trajectory file for input pre-rotation or binaural output (default = None)\n", " -ipn, --in_pad_noise Flag for padding with noise instead of zeros\n", " -id IN_DELAY, --in_delay IN_DELAY\n", " Delay the signal by this amount in milliseconds (negative values advance, default = None)\n", @@ -67,11 +70,14 @@ " Sampling rate (Hz) (deduced for .wav input, same as input if output not specified, default = 48000)\n", " -ofc OUT_CUTOFF, --out_cutoff OUT_CUTOFF\n", " Cut-off frequency for low-pass filtering (default = None)\n", - " -ohp, --out_hp50 Apply 50 Hz high-pass filtering (default = False)\n", + " -omk OUT_MASK, --out_mask OUT_MASK\n", + " Apply filtering with mask (HP50, 20KBP or None; default = None)\n", " -ow OUT_WINDOW, --out_window OUT_WINDOW\n", " Window the start/end of the signal by this amount in milliseconds (default = None)\n", - " -ot PRE_TRIM POST_TRIM, --out_trim PRE_TRIM POST_TRIM\n", + " -ox PRE_TRIM POST_TRIM, --out_trim PRE_TRIM POST_TRIM\n", " Pre-/post-trim the signal by this amount in milliseconds (negative values pad silence), (default = None)\n", + " -ot OUT_TRAJECTORY, --out_trajectory OUT_TRAJECTORY\n", + " Head-tracking trajectory file for input pre-rotation or binaural output (default = None)\n", " -opn, --out_pad_noise\n", " Flag for padding with noise instead of zeros\n", " -od OUT_DELAY, --out_delay OUT_DELAY\n", @@ -81,25 +87,26 @@ " -onf OUT_LOUDNESS_FMT, --out_loudness_fmt OUT_LOUDNESS_FMT\n", " Format used for loudness computation (only valid with with -ol/--out_loudness, default = OUT_FMT)\n", " -lm, --limit Apply limiting to output (default = False)\n", - " -t TRAJECTORY, --trajectory TRAJECTORY\n", - " Head-tracking trajectory file for binaural output (default = None)\n", " -bd BIN_DATASET, --bin_dataset BIN_DATASET\n", " Use a custom binaural dataset (see README.md and audiotools/binaural_datasets/README.txt for further information)\n", " -bl BIN_LFE_GAIN, --bin_lfe_gain BIN_LFE_GAIN\n", - " Render LFE to binaural output with the specified gain (only valid for channel-based input, default = None)\n", + " Render LFE to binaural output with the specified gain (only valid for channel-based input, default = 1.8836490894898006)\n", + " -mnru MNRU_Q, --mnru_q MNRU_Q\n", + " Flag for MNRU processing\n", + " -esdru ESDRU_ALPHA, --esdru_alpha ESDRU_ALPHA\n", + " Flag for ESDRU processing\n", "\n", "General options:\n", " -l, --list list all supported audio formats and exit\n", " -L, --long list all supported audio formats with long description and exit\n", " -mp, --multiprocessing\n", - " Enable multiprocessing (default = False)\n", + " Enable multiprocessing (default = False))\n", "```\n", + "\n", "
\n", "\n", "Please refer to the README.md and `--help` for a more detailed description of possible arguments and their usage.\n", "\n", - "\n", - "\n", "## Example command line usage\n", "\n", "```bash\n", @@ -111,7 +118,7 @@ "\n", "# Rendering a directory of 5_1 files to binaural with output loudness normalization and parallel processing enabled:\n", "python -m ivas_processing_scripts.audiotools -i stereo_input/ -if 5_1 -of BINAURAL -o output_binaural_norm/ -ol -26 -mp\n", - "```" + "```\n" ] }, { @@ -122,17 +129,19 @@ "# Usage in an interactive python session\n", "\n", "## Importing the module\n", - "The module, its submodules and functions may be imported just like any other python package. To make the module available in *any* directory, the `PYTHONPATH` must be modified.\n", + "\n", + "The module, its submodules and functions may be imported just like any other python package. To make the module available in _any_ directory, the `PYTHONPATH` must be modified.\n", "\n", "The recommended way to do this is to add the following lines at the top of a script which requires this module:\n", "(refer https://docs.python.org/3/library/sys.html#sys.path)\n", + "\n", "```python\n", "import sys\n", "sys.path.append(\"/path/to/this/repository\")\n", "import ivas_processing_scripts.audiotools # import can now be resolved\n", "```\n", "\n", - "An alternative is to modify the shell environment before calling the python interpreter, but this is left to the reader to try. The above solution is cross-platform." + "An alternative is to modify the shell environment before calling the python interpreter, but this is left to the reader to try. The above solution is cross-platform.\n" ] }, { @@ -159,7 +168,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Reading audio and applying basic functions:" + "Reading audio and applying basic functions:\n" ] }, { @@ -201,7 +210,7 @@ "source": [ "For more convenient manipulation of audio, the `audio` python file offers the base class `Audio` upon which the derived classes `BinauralAudio`, `ChannelBasedAudio`, `MetadataAssistedSpatialAudio`, `ObjectBasedAudio`, `SceneBasedAudio` are implemented.\n", "\n", - "To instantiate a class object, the convenience functions (\"factory\" methods) `fromtype()`, `fromarray()` and `fromfile()` are available:" + "To instantiate a class object, the convenience functions (\"factory\" methods) `fromtype()`, `fromarray()` and `fromfile()` are available:\n" ] }, { @@ -297,7 +306,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The audio object allows usage of further functions, which accept an instance of `Audio` (i.e. derived classes), such as the ITU filter wrapper (filter executable must be in \"../bin\" or PATH!):" + "The audio object allows usage of further functions, which accept an instance of `Audio` (i.e. derived classes), such as the ITU filter wrapper (filter executable must be in \"../bin\" or PATH!):\n" ] }, { @@ -392,7 +401,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The object-based approach allows easier manipulation of audio since the necessary values for manipulation are available as attributes. A non-concrete object may also be created to be filled in with data later:" + "The object-based approach allows easier manipulation of audio since the necessary values for manipulation are available as attributes. A non-concrete object may also be created to be filled in with data later:\n" ] }, { @@ -431,7 +440,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The conversion routines are implemented in the `convert` submodule of `audiotools`. These accept two audio objects - one as an input (a \"concrete\" object) and another as output to be filled-in (\"hollow\"). Using the concrete HOA3 audio object we instantiated from an array, and the hollow 7_1_4 object, we can use the `convert_channelbased()` function to perform a conversion:" + "The conversion routines are implemented in the `convert` submodule of `audiotools`. These accept two audio objects - one as an input (a \"concrete\" object) and another as output to be filled-in (\"hollow\"). Using the concrete HOA3 audio object we instantiated from an array, and the hollow 7_1_4 object, we can use the `convert_channelbased()` function to perform a conversion:\n" ] }, { @@ -496,7 +505,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The `convert_scenebased()` function was already provided with all of the information that was required to perform a conversion from the input format to the output format since they were all class attributes. Under the hood the function checked the type of output audio, computed the necessary rendering matrix using the loudspeaker positions, applied the transformation and set the audio array of the output object." + "The `convert_scenebased()` function was already provided with all of the information that was required to perform a conversion from the input format to the output format since they were all class attributes. Under the hood the function checked the type of output audio, computed the necessary rendering matrix using the loudspeaker positions, applied the transformation and set the audio array of the output object.\n" ] }, { @@ -504,7 +513,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "A more advanced example with a generator for performing operations on a framewise basis:" + "A more advanced example with a generator for performing operations on a framewise basis:\n" ] }, { @@ -536,7 +545,6 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "\n", "This concludes the overview of the audiotools module. For readers interested in implementing scripts based on this module, it is recommended to run a debugging session for an example commandline above (either using an IDE or `python -m pdb -m ivas_processing_scripts.audiotools ...`) and examine the functions used, along with a read through of the source code.\n", "\n", "A listing of each file in the module with a description is below for reference:\n", @@ -558,23 +566,36 @@ "├── binauralobjectrenderer.py # reference binaural rendering algorithm for object based audio\n", "├── constants.py # submodule shared constants\n", "├── convert\n", - "│ ├── __init__.py # TODO rename: conversion module \n", + "│ ├── __init__.py # conversion module\n", "│ ├── binaural.py # binaural audio related conversions\n", "│ ├── channelbased.py # channel based audio related conversions\n", "│ ├── masa.py # MASA related conversions (relies on wrappers.masaRenderer)\n", "│ ├── objectbased.py # object based audio related conversions\n", + "│ ├── omasa.py\n", + "│ ├── osba.py\n", "│ └── scenebased.py # scene based audio related conversions\n", "├── EFAP.py # edge-fading amplitude panning implementation\n", - "├── metadata.py # TODO rename: scene description / composite audio format\n", - "├── rotation.py # rotation related functions \n", - "├── utils.py # TODO remove? module convenience functions\n", - "└── wrappers \n", + "├── metadata.py # scene description files and metadata handling\n", + "├── quaternions.py\n", + "├── rotation.py # rotation related functions\n", + "├── utils.py # module convenience functions\n", + "└── wrappers\n", " ├── __init__.py\n", " ├── bs1770.py # wrapper for ITU STL bs1770demo\n", + " ├── dlyerr_2_errpat.py\n", + " ├── eid_xor.py\n", + " ├── esdru.py\n", " ├── filter.py # wrapper for ITU STL filter\n", - " └── masaRenderer.py # wrapper for MASA reference software masaRenderer\n", + " ├── gen_patt.py\n", + " ├── masaAnalyzer.py # wrapper for MASA reference software masaAnalyzer\n", + " ├── masaRenderer.py # wrapper for MASA reference software masaRenderer\n", + " ├── networkSimulator.py\n", + " ├── p50fbmnru.py\n", + " ├── random_seed.py\n", + " └── reverb.py\n", "```\n", - "" + "\n", + "\n" ] } ], diff --git a/experiments/selection_isar/.gitkeep b/experiments/selection_isar/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/experiments/selection_isar/BS1534-1a/.gitkeep b/experiments/selection_isar/BS1534-1a/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/experiments/selection_isar/BS1534-1a/config/.gitkeep b/experiments/selection_isar/BS1534-1a/config/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/experiments/selection_isar/BS1534-1a/config/BS1534-1a.yml b/experiments/selection_isar/BS1534-1a/config/BS1534-1a.yml new file mode 100644 index 00000000..8491807f --- /dev/null +++ b/experiments/selection_isar/BS1534-1a/config/BS1534-1a.yml @@ -0,0 +1,91 @@ +--- +################################################ +# General configuration +################################################ + +name: BS1534-1a +master_seed: 25 +prerun_seed: 2 + +input_path: "experiments/selection/BS1534-1a/proc_input" +output_path: "experiments/selection/BS1534-1a/proc_output" +use_windows_codec_binaries: true +condition_in_output_filename: true + +################################################ +### Input configuration +################################################ +input: + fmt: "HOA3" + fs: 48000 + aligned_to: + len: 20 + +################################################ +### Pre-processing on individual items +################################################ +preprocessing: + mask: "20KBP" + loudness: -26 + window: 100 + +################################################ +### Pre-processing on whole signal(s) +################################################ +preprocessing_2: + concatenate_input: false + preamble_noise: false + repeat_signal: true + +################################################ +### Configuration for conditions under test +################################################ +conditions_to_generate: + ### IVAS conditions ############################### + c01: + type: ivas + bitrates: + - 512000 + cod: + dec: + fmt: "BINAURAL" + c02: + type: ivas + bitrates: + - 512000 + cod: + dec: + fmt: "BINAURAL" + out_fc: 7000 + c03: + type: ivas_transcoding + bitrates: + - 512000 + cod: + dec: + fmt: "BINAURAL" + trans_bitrate: 256000 + trans_cod: + fmt: "STEREO" + trans_dec: + fmt: "STEREO" + c04: + type: ivas_split_rend_full + bitrates: + - 512000 + cod: + dec: + fmt: "BINAURAL_SPLIT_CODED" + split_rend: + fmt: "BINAURAL" + bitrate: 256000 + dof: 0 + framesize: 10 + +################################################ +### Post-processing +################################################ +postprocessing: + fmt: "BINAURAL" + fs: 48000 + loudness: -26 diff --git a/experiments/selection_isar/BS1534-1a/proc_input_a/.gitkeep b/experiments/selection_isar/BS1534-1a/proc_input_a/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/experiments/selection_isar/BS1534-1a/proc_output_a/.gitkeep b/experiments/selection_isar/BS1534-1a/proc_output_a/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/experiments/selection_isar/BS1534-2a/.gitkeep b/experiments/selection_isar/BS1534-2a/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/experiments/selection_isar/BS1534-2a/config/.gitkeep b/experiments/selection_isar/BS1534-2a/config/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/experiments/selection_isar/BS1534-2a/config/BS1534-2a.yml b/experiments/selection_isar/BS1534-2a/config/BS1534-2a.yml new file mode 100644 index 00000000..00734958 --- /dev/null +++ b/experiments/selection_isar/BS1534-2a/config/BS1534-2a.yml @@ -0,0 +1,91 @@ +--- +################################################ +# General configuration +################################################ + +name: BS1534-2a +master_seed: 25 +prerun_seed: 2 + +input_path: "experiments/selection/BS1534-2a/proc_input" +output_path: "experiments/selection/BS1534-2a/proc_output" +use_windows_codec_binaries: true +condition_in_output_filename: true + +################################################ +### Input configuration +################################################ +input: + fmt: "7_1_4" + fs: 48000 + aligned_to: + len: 20 + +################################################ +### Pre-processing on individual items +################################################ +preprocessing: + mask: "20KBP" + loudness: -26 + window: 100 + +################################################ +### Pre-processing on whole signal(s) +################################################ +preprocessing_2: + concatenate_input: false + preamble_noise: false + repeat_signal: true + +################################################ +### Configuration for conditions under test +################################################ +conditions_to_generate: + ### IVAS conditions ############################### + c01: + type: ivas + bitrates: + - 512000 + cod: + dec: + fmt: "BINAURAL" + c02: + type: ivas + bitrates: + - 512000 + cod: + dec: + fmt: "BINAURAL" + out_fc: 7000 + c03: + type: ivas_transcoding + bitrates: + - 512000 + cod: + dec: + fmt: "BINAURAL" + trans_bitrate: 256000 + trans_cod: + fmt: "STEREO" + trans_dec: + fmt: "STEREO" + c04: + type: ivas_split_rend_full + bitrates: + - 512000 + cod: + dec: + fmt: "BINAURAL_SPLIT_CODED" + split_rend: + fmt: "BINAURAL" + bitrate: 256000 + dof: 0 + framesize: 10 + +################################################ +### Post-processing +################################################ +postprocessing: + fmt: "BINAURAL" + fs: 48000 + loudness: -26 diff --git a/experiments/selection_isar/BS1534-2a/proc_input_a/.gitkeep b/experiments/selection_isar/BS1534-2a/proc_input_a/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/experiments/selection_isar/BS1534-2a/proc_output_a/.gitkeep b/experiments/selection_isar/BS1534-2a/proc_output_a/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/experiments/selection_isar/BS1534-3a/.gitkeep b/experiments/selection_isar/BS1534-3a/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/experiments/selection_isar/BS1534-3a/config/.gitkeep b/experiments/selection_isar/BS1534-3a/config/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/experiments/selection_isar/BS1534-3a/config/BS1534-3a.yml b/experiments/selection_isar/BS1534-3a/config/BS1534-3a.yml new file mode 100644 index 00000000..bbb1ceab --- /dev/null +++ b/experiments/selection_isar/BS1534-3a/config/BS1534-3a.yml @@ -0,0 +1,91 @@ +--- +################################################ +# General configuration +################################################ + +name: BS1534-3a +master_seed: 25 +prerun_seed: 2 + +input_path: "experiments/selection/BS1534-3a/proc_input" +output_path: "experiments/selection/BS1534-3a/proc_output" +use_windows_codec_binaries: true +condition_in_output_filename: true + +################################################ +### Input configuration +################################################ +input: + fmt: "ISM4" + fs: 48000 + aligned_to: + len: 20 + +################################################ +### Pre-processing on individual items +################################################ +preprocessing: + mask: "20KBP" + loudness: -26 + window: 100 + +################################################ +### Pre-processing on whole signal(s) +################################################ +preprocessing_2: + concatenate_input: false + preamble_noise: false + repeat_signal: true + +################################################ +### Configuration for conditions under test +################################################ +conditions_to_generate: + ### IVAS conditions ############################### + c01: + type: ivas + bitrates: + - 512000 + cod: + dec: + fmt: "BINAURAL" + c02: + type: ivas + bitrates: + - 512000 + cod: + dec: + fmt: "BINAURAL" + out_fc: 7000 + c03: + type: ivas_transcoding + bitrates: + - 512000 + cod: + dec: + fmt: "BINAURAL" + trans_bitrate: 256000 + trans_cod: + fmt: "STEREO" + trans_dec: + fmt: "STEREO" + c04: + type: ivas_split_rend_full + bitrates: + - 512000 + cod: + dec: + fmt: "BINAURAL_SPLIT_CODED" + split_rend: + fmt: "BINAURAL" + bitrate: 256000 + dof: 0 + framesize: 10 + +################################################ +### Post-processing +################################################ +postprocessing: + fmt: "BINAURAL" + fs: 48000 + loudness: -26 diff --git a/experiments/selection_isar/BS1534-3a/proc_input_a/.gitkeep b/experiments/selection_isar/BS1534-3a/proc_input_a/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/experiments/selection_isar/BS1534-3a/proc_output_a/.gitkeep b/experiments/selection_isar/BS1534-3a/proc_output_a/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/experiments/selection_isar/BS1534-4a/.gitkeep b/experiments/selection_isar/BS1534-4a/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/experiments/selection_isar/BS1534-4a/config/.gitkeep b/experiments/selection_isar/BS1534-4a/config/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/experiments/selection_isar/BS1534-4a/config/BS1534-4a.yml b/experiments/selection_isar/BS1534-4a/config/BS1534-4a.yml new file mode 100644 index 00000000..e408cb57 --- /dev/null +++ b/experiments/selection_isar/BS1534-4a/config/BS1534-4a.yml @@ -0,0 +1,95 @@ +--- +################################################ +# General configuration +################################################ + +name: BS1534-4a +master_seed: 25 +prerun_seed: 2 + +input_path: "experiments/selection/BS1534-4a/proc_input" +output_path: "experiments/selection/BS1534-4a/proc_output" +use_windows_codec_binaries: true +condition_in_output_filename: true + +################################################ +### Input configuration +################################################ +input: + fmt: "HOA2" + fs: 48000 + aligned_to: + len: 20 + +################################################ +### Pre-processing on individual items +################################################ +preprocessing: + mask: "20KBP" + loudness: -26 + window: 100 + +################################################ +### Pre-processing on whole signal(s) +################################################ +preprocessing_2: + concatenate_input: false + preamble_noise: false + repeat_signal: true + +################################################ +### Configuration for conditions under test +################################################ +conditions_to_generate: + ### IVAS conditions ############################### + c01: + type: ivas + bitrates: + - 512000 + cod: + fmt: "MASA2DIR2" + dec: + fmt: "BINAURAL" + c02: + type: ivas + bitrates: + - 512000 + cod: + fmt: "MASA2DIR2" + dec: + fmt: "BINAURAL" + out_fc: 7000 + c03: + type: ivas_transcoding + bitrates: + - 512000 + cod: + fmt: "MASA2DIR2" + dec: + fmt: "BINAURAL" + trans_bitrate: 256000 + trans_cod: + fmt: "STEREO" + trans_dec: + fmt: "STEREO" + c04: + type: ivas_split_rend_full + bitrates: + - 512000 + cod: + fmt: "MASA2DIR2" + dec: + fmt: "BINAURAL_SPLIT_CODED" + split_rend: + fmt: "BINAURAL" + bitrate: 256000 + dof: 0 + framesize: 10 + +################################################ +### Post-processing +################################################ +postprocessing: + fmt: "BINAURAL" + fs: 48000 + loudness: -26 diff --git a/experiments/selection_isar/BS1534-4a/proc_input_a/.gitkeep b/experiments/selection_isar/BS1534-4a/proc_input_a/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/experiments/selection_isar/BS1534-4a/proc_output_a/.gitkeep b/experiments/selection_isar/BS1534-4a/proc_output_a/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/experiments/selection_isar/README.md b/experiments/selection_isar/README.md new file mode 100644 index 00000000..6224ade3 --- /dev/null +++ b/experiments/selection_isar/README.md @@ -0,0 +1,86 @@ + + +# IVAS ISAR Selection Experiments + +This directory contains input and configuration files for the ISAR selection tests based on [S4-240396](https://www.3gpp.org/ftp/TSG_SA/WG4_CODEC/TSGS4_127_Sophia-Antipolis/Docs/S4-240396.zip). + +All tests use the BS.1534 (MUSHRA) test methodology. + +## Experiments + +| Experiment | Input format | +| :--------: | :----------: | +| BS1534-1a | HOA3 | +| BS1534-2a | MASA | +| BS1534-3a | 7_1_4 | +| BS1534-4a | ISM4 | + +Note: BS1534-2a requires FOA input files to generate MASA2DIR1. To use MASA2DIR2 (two directions) HOA2 must be used. + +## Setup instructions + +1. All required binaries should be available in $PATH or in the [bin directory](../../ivas_processing_scripts/bin/). + - Ensure split rendering is enabled for the relevant binaries. + - To use a specific binary for an IVAS condition, the key `bin` should be added under either the parent keys `cod`, `dec` or `split_rend` to point to `IVAS_cod`, `IVAS_dec` or `IVAS_rend` respectively. +2. Input files should be placed in the respective directories according to format: + - For head-tracking trajectories, the files must be named `item_name.wav.pre.csv` and `item_name.wav.post.csv` for split-prerendering and post-rendering trajectories respectively. The prerendering trajectory is only used for split rendering. The post rendering trajectory is used as the trajectory for the final stage, regardless of split rendering. + - For ISM metadata, files must be named `item_name.wav.0.csv` - `item_name.wav.3.csv` as for the regular IVAS selection tests. +3. Execute `python generate_test_isar.py BS1534-1a,a` where the input argument is the relevant experiment-lab pair. Currently only one lab per experiment is configured. + +## Preprocessing for trajectory nullification + +For setting up a test with trajectory nullification, the input items must be preprocessed according to the specific trajectory. + +This preprocessing consists of two main steps. First the trajectory must be inverted to apply the inverse rotation to the source audio. + +This can be done with the help of the [trajectories submodule](../../ivas_processing_scripts/trajectories/README.md) by running the command: + +```sh +python -m ivas_processing_scripts.trajectories --invert in.csv out_inverted.csv +``` + +Then the input item needs to be rotated in place using the inverted trajectory, which can be performed using the `--in_trajectory` argument of the [audiotools module](../../examples/audiotools.ipynb): + +```sh +python -m ivas_processing_scripts.audiotools -i input.wav -if IN_FORMAT -of IN_FORMAT -it out_inverted.csv -o out.wav +``` + +Inversion may be performed directly for SBA and MC formats. + +For ISM format it is only possible to perform this on a 20ms basis due to metadata granularity. + +For MASA format, the masaAnalyzer must be run on an inverted SBA input to generate the corresponding MASA transport channels and metadata. + +Refer to [_Tdoc S4-240254: Trajectory Nullification for Binaural Renderer Evaluation, Fraunhofer IIS_](https://www.3gpp.org/ftp/TSG_SA/WG4_CODEC/TSGS4_127_Sophia-Antipolis/Docs/S4-240254.zip) for further details. + +After generation of the "inverted" item, this may be directly used as input for the processing scripts. diff --git a/generate_test.py b/generate_test.py index a3b9b4a6..c5dff6df 100755 --- a/generate_test.py +++ b/generate_test.py @@ -162,7 +162,7 @@ def create_experiment_setup(experiment, lab) -> list[Path]: # bg noise SNR only differs from default config for some experiments if experiment in ["P800-5", "P800-9"] and cat_num >= 3: bg_noise_pre_proc_2["snr"] = 15 - if cfg.preprocessing_2.get("concatenate_input", None) is not None: + if cfg.preprocessing_2.get("concatenate_input", None): cfg.preprocessing_2["concatenation_order"] = concatenation_order( lab, experiment, cat_num ) diff --git a/generate_test_isar.py b/generate_test_isar.py new file mode 100755 index 00000000..d0a90c3b --- /dev/null +++ b/generate_test_isar.py @@ -0,0 +1,161 @@ +#! /usr/bin/env python3 + +# +# (C) 2022-2024 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository. All Rights Reserved. +# +# This software is protected by copyright law and by international treaties. +# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository retain full ownership rights in their respective contributions in +# the software. This notice grants no license of any kind, including but not limited to patent +# license, nor is any license granted by implication, estoppel or otherwise. +# +# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making +# contributions. +# +# This software is provided "AS IS", without any express or implied warranties. The software is in the +# development stage. It is intended exclusively for experts who have experience with such software and +# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability +# and fitness for a particular purpose are hereby disclaimed and excluded. +# +# Any dispute, controversy or claim arising under or in relation to providing this software shall be +# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in +# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and +# the United Nations Convention on Contracts on the International Sales of Goods. +# + +import argparse +from pathlib import Path + +from ivas_processing_scripts import config +from ivas_processing_scripts import main as generate_test + +HERE = Path(__file__).parent.absolute() +LAB_IDS = ["a", "b"] +EXPERIMENTS_BS1534 = [f"BS1534-{i}{x}" for i in range(1, 5) for x in LAB_IDS] + + +def generate_tests(exp_lab_pairs, create_cfg_only=False): + """ + Create configs and run them for all given experiment/lab pairs + """ + # get config paths for all given experiment/lab combis + cfgs = [create_experiment_setup(exp, lab) for exp, lab in exp_lab_pairs] + # flatten into single list + cfgs = [c for cl in cfgs for c in cl] + + if create_cfg_only: + print("Configs generated:") + print("\n".join(["- " + cfg.name for cfg in cfgs])) + return + + for cfg in cfgs: + generate_test(Arguments(str(cfg))) + + +class Arguments: + def __init__(self, config): + self.config = config + self.debug = False + + +def create_experiment_setup(experiment, lab) -> list[Path]: + """ + Create the config files for all categories for the given experiment and lab id. + """ + default_cfg_path = HERE.joinpath( + f"experiments/selection_isar/{experiment}/config/{experiment}.yml" + ) + + num_categories = 1 + categories = [f"cat{i}" for i in range(1, num_categories + 1)] + + # calculate the seed value according to processing plan + experiments = EXPERIMENTS_BS1534 + seed = 101 + experiments.index(experiment) * 4 + LAB_IDS.index(lab) + + base_path = HERE.joinpath(f"experiments/selection_isar/{experiment}") + + cfgs = list() + for cat in categories: + suffix = cat + f"-lab_{lab}" + + input_path = base_path.joinpath(f"proc_input_{lab}") + output_path = base_path.joinpath(f"proc_output_{lab}") + + cfg_path = default_cfg_path.parent.joinpath(f"{experiment}-lab_{lab}.yml") + + cfgs.append(cfg_path) + + # set new lab- and category-dependent values + cfg = config.TestConfig(default_cfg_path) + cfg.name = f"{experiment}{suffix}" + cfg.prerun_seed = seed + cfg.input_path = str(input_path) + cfg.output_path = str(output_path) + + cat_num = int(cat[-1]) + + if cfg.preprocessing_2.get("concatenate_input", None): + cfg.preprocessing_2["concatenation_order"] = concatenation_order( + lab, experiment, cat_num + ) + + # ensure that necessary directories are there + input_path.mkdir(parents=True, exist_ok=True) + output_path.mkdir(parents=True, exist_ok=True) + + # write out config + cfg.to_file(cfg_path) + + # Return the list of configs that were generated. Not strictly necessary, but makes testing easier. + return cfgs + + +def exp_lab_pair(arg): + """ + Validation function for command line input + """ + exp, lab = arg.split(",") + + msg = "'{}' is not a valid {}. Possible values are: {}" + if exp not in EXPERIMENTS_BS1534: + experiments_msg = ",".join(EXPERIMENTS_BS1534) + err_msg = msg.format(exp, "experiment name", f"{{{experiments_msg}}}.") + raise ValueError(err_msg) + if lab not in LAB_IDS: + labs_msg = ",".join(LAB_IDS) + err_msg = msg.format(lab, "lab identifier", labs_msg) + raise ValueError(err_msg) + + return exp, lab + + +def concatenation_order(lab_id, experiment, category): + exp_id = f"p0{experiment[-1]}" + return [f"{lab_id}{exp_id}a{category}s0{i}.wav" for i in range(1, 8)] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Generate config files and process files for selecton experiments. Experiment names and lab ids must be given as comma-separated pairs (e.g. 'P800-5,b BS1534-4a,d ...')" + ) + parser.add_argument( + "exp_lab_pairs", + type=exp_lab_pair, + nargs="+", + help="The combinations of experiment/lab-id that you want to generate, separated by whitespace. Experiment and lab id need to be separated by a comma.", + ) + parser.add_argument( + "--create_cfg_only", + action="store_true", + help="If given, only create the configs and folder structure without processing items", + ) + args = parser.parse_args() + generate_tests(args.exp_lab_pairs, args.create_cfg_only) diff --git a/ivas_processing_scripts/__init__.py b/ivas_processing_scripts/__init__.py index 7f6598bf..3d82c730 100755 --- a/ivas_processing_scripts/__init__.py +++ b/ivas_processing_scripts/__init__.py @@ -53,6 +53,7 @@ from ivas_processing_scripts.processing.processing import ( process_item, reorder_items_list, ) +from ivas_processing_scripts.trajectories.trajectories import check_trajectories from ivas_processing_scripts.utils import ( DirManager, apply_func_parallel, @@ -95,6 +96,8 @@ def main(args): if hasattr(args, "multiprocessing"): cfg.multiprocessing = args.multiprocessing + traj_input_path = cfg.input_path + # set up processing chains chains.init_processing_chains(cfg) @@ -152,11 +155,19 @@ def main(args): for i, meta in enumerate(metadata): meta.extend(metadata_MASA[i]) - if not cfg.input["fmt"].startswith("ISM") and "MASA" not in cfg.input["fmt"]: + if not (cfg.input["fmt"].startswith("ISM") or "MASA" in cfg.input["fmt"]): metadata = [None] * len(cfg.items_list) cfg.metadata_path = metadata + # check for head tracking trajectories in input path (items might be copied to 20ms aligned folder) + trajectories = check_trajectories(cfg.items_list, traj_input_path) + if trajectories: + cfg.trajectories = trajectories + # print info about found and used trajectories + for i, t in zip(cfg.trajectories, cfg.items_list): + logger.debug(f" Head tracking trajectory pair {i}: {t}") + # run preprocessing only once if hasattr(cfg, "preprocessing"): # save process info for background noise @@ -182,12 +193,20 @@ def main(args): logger.info(" Generating remaining conditions with postprocessing") item_args = list() - for (chain, tmp_dir, out_dir), (item, metadata) in product( + for (chain, tmp_dir, out_dir), (item, metadata, trajectories) in product( zip(cfg.proc_chains, cfg.tmp_dirs, cfg.out_dirs), - zip(cfg.items_list, cfg.metadata_path), + zip(cfg.items_list, cfg.metadata_path, cfg.trajectories), ): item_args.append( - (item, tmp_dir, out_dir, chain["processes"], logger, metadata) + ( + item, + tmp_dir, + out_dir, + chain["processes"], + logger, + metadata, + trajectories, + ) ) if cfg.multiprocessing: diff --git a/ivas_processing_scripts/audiotools/__init__.py b/ivas_processing_scripts/audiotools/__init__.py index 7cb4f06c..a3d1ed1d 100755 --- a/ivas_processing_scripts/audiotools/__init__.py +++ b/ivas_processing_scripts/audiotools/__init__.py @@ -96,13 +96,20 @@ def add_processing_args(group, input=True): default=None, ) group.add_argument( - f"-{ps}t", + f"-{ps}x", f"--{p}_trim", type=float, nargs=2, metavar=("PRE_TRIM", "POST_TRIM"), help="Pre-/post-trim the signal by this amount in milliseconds (negative values pad silence), (default = %(default)s)", ) + group.add_argument( + f"-{ps}t", + f"--{p}_trajectory", + type=str, + help="Head-tracking trajectory file for input pre-rotation or binaural output (default = %(default)s)", + default=None, + ) group.add_argument( f"-{ps}pn", f"--{p}_pad_noise", @@ -166,13 +173,6 @@ def get_args(): help="Apply limiting to output (default = %(default)s)", action="store_true", ) - output_parser.add_argument( - "-t", - "--trajectory", - type=str, - help="Head-tracking trajectory file for binaural output (default = %(default)s)", - default=None, - ) output_parser.add_argument( "-bd", "--bin_dataset", diff --git a/ivas_processing_scripts/audiotools/audio.py b/ivas_processing_scripts/audiotools/audio.py index e3795dbd..47331366 100755 --- a/ivas_processing_scripts/audiotools/audio.py +++ b/ivas_processing_scripts/audiotools/audio.py @@ -63,7 +63,6 @@ class Audio(ABC): self.audio = None self.fs = None self.num_channels = None - # self.logger = None # TODO needed? def __repr__(self): return f"{self.__class__} : {self.__dict__}" diff --git a/ivas_processing_scripts/audiotools/constants.py b/ivas_processing_scripts/audiotools/constants.py index c5dbe6ca..7b9daf8a 100755 --- a/ivas_processing_scripts/audiotools/constants.py +++ b/ivas_processing_scripts/audiotools/constants.py @@ -45,6 +45,9 @@ BINAURAL_AUDIO_FORMATS = { "BINAURAL_ROOM_REVERB": { # for IVAS_dec and IVAS_rend "num_channels": 2, }, + "BINAURAL_SPLIT_CODED": { # for IVAS_dec and IVAS_rend + "num_channels": 2, + }, } BINAURAL_LFE_GAIN = 10 ** (5.5 / 20) diff --git a/ivas_processing_scripts/audiotools/convert/__init__.py b/ivas_processing_scripts/audiotools/convert/__init__.py index 9d53f054..cb63cbeb 100755 --- a/ivas_processing_scripts/audiotools/convert/__init__.py +++ b/ivas_processing_scripts/audiotools/convert/__init__.py @@ -264,7 +264,7 @@ def process_audio( spatial_distortion_frequency: Optional[float] = None, logger: Optional[logging.Logger] = None, ) -> None: - """Perform (pre-/pos-) processing of audio""" + """Perform (pre-/post-) processing of audio""" if fs is None: fs = x.fs @@ -388,9 +388,9 @@ def format_conversion( # format conversion # check if input and output format are the same - if (fmt := input.name) == output.name or ( - input.name.startswith("BINAURAL") and output.name.startswith("BINAURAL") - ): + if ( + ((fmt := input.name) == output.name) and kwargs.get("in_trajectory") is None + ) or (input.name.startswith("BINAURAL") and output.name.startswith("BINAURAL")): output.audio = input.audio if fmt.startswith("MASA"): output.metadata_file = input.metadata_file diff --git a/ivas_processing_scripts/audiotools/convert/channelbased.py b/ivas_processing_scripts/audiotools/convert/channelbased.py index dbb8b160..05c60abb 100755 --- a/ivas_processing_scripts/audiotools/convert/channelbased.py +++ b/ivas_processing_scripts/audiotools/convert/channelbased.py @@ -57,9 +57,15 @@ from ivas_processing_scripts.audiotools.wrappers.filter import resample_itu def convert_channelbased( cba: audio.ChannelBasedAudio, out: audio.Audio, + in_trajectory: Optional[Union[str, Path]] = None, **kwargs, ) -> audio.Audio: """Convert channel-based audio to the requested output format""" + + # pre-rotation if specified + if in_trajectory is not None: + cba.audio = rotate_cba(cba, in_trajectory) + # CBA -> Binaural if isinstance(out, audio.BinauralAudio): render_cba_to_binaural(cba, out, **kwargs) @@ -83,7 +89,7 @@ def convert_channelbased( def render_cba_to_binaural( cba: audio.ChannelBasedAudio, bin: audio.BinauralAudio, - trajectory: Optional[Union[str, Path]] = None, + out_trajectory: Optional[Union[str, Path]] = None, bin_dataset: Optional[str] = None, bin_lfe_gain: Optional[float] = None, **kwargs, @@ -97,7 +103,7 @@ def render_cba_to_binaural( Channel-based input audio bin: audio.BinauralAudio Binaural output audio - trajectory: Optional[Union[str, Path]] + out_trajectory: Optional[Union[str, Path]] Head rotation trajectory path bin_dataset: Optional[str] Name of binaural dataset wihtout prefix or suffix @@ -122,8 +128,8 @@ def render_cba_to_binaural( cba.fs = 48000 bin.fs = 48000 - if trajectory is not None: - cba.audio = rotate_cba(cba, trajectory) + if out_trajectory is not None: + cba.audio = rotate_cba(cba, out_trajectory) IR, _, latency_smp = load_ir(cba.name, bin.name, bin_dataset) @@ -151,7 +157,7 @@ def render_custom_ls_binaural( output: audio.BinauralAudio, IR: np.ndarray, SourcePosition: np.ndarray, - trajectory: str, + out_trajectory: str, ): # TODO rework impl. (with EFAP) # logger.info(" Processing channels on custom LS layout") @@ -192,7 +198,7 @@ def render_custom_ls_binaural( # ls_azi = np.repeat(ls_azi_all[i_ls], N_frames) # ls_ele = np.repeat(ls_ele_all[i_ls], N_frames) # - # azi, ele = rotateISM(ls_azi, ls_ele, trajectory=trajectory) + # azi, ele = rotateISM(ls_azi, ls_ele, trajectory=out_trajectory) # # y += binaural_fftconv_framewise( # custom_ls.audio[:, i_chan], @@ -205,7 +211,7 @@ def render_custom_ls_binaural( # i_ls += 1 # # return y - return + raise NotImplementedError("Custom LS rendering currently not implemented!") def render_cba_to_cba( diff --git a/ivas_processing_scripts/audiotools/convert/masa.py b/ivas_processing_scripts/audiotools/convert/masa.py index c33e49f9..d3cb80bc 100755 --- a/ivas_processing_scripts/audiotools/convert/masa.py +++ b/ivas_processing_scripts/audiotools/convert/masa.py @@ -71,7 +71,8 @@ def convert_masa( def render_masa_to_binaural( masa: audio.MetadataAssistedSpatialAudio, bin: audio.BinauralAudio, - trajectory: Optional[Union[str, Path]] = None, + in_trajectory: Optional[Union[str, Path]] = None, + out_trajectory: Optional[Union[str, Path]] = None, bin_dataset: Optional[str] = None, **kwargs, ) -> None: @@ -84,7 +85,9 @@ def render_masa_to_binaural( MASA input audio bin: audio.BinauralAudio Output binaural audio - trajectory: Optional[Union[str, Path]] + in_trajectory: Optional[Union[str, Path]] + Head rotation trajectory path + out_trajectory: Optional[Union[str, Path]] Head rotation trajectory path bin_dataset: Optional[str] Name of binaural dataset without prefix or suffix @@ -96,11 +99,11 @@ def render_masa_to_binaural( render_masa_to_cba(masa, cba_tmp) - channelbased.render_cba_to_binaural(cba_tmp, bin, trajectory) + channelbased.render_cba_to_binaural(cba_tmp, bin, out_trajectory) else: - if trajectory is not None: + if in_trajectory is not None or out_trajectory is not None: warn( - f"Head-rotation not supported by MasaRenderer! Trajectory {trajectory} will be ignored!" + f"Head-rotation not supported by MasaRenderer! Trajectory {in_trajectory or out_trajectory} will be ignored!" ) if bin_dataset is not None: warn( diff --git a/ivas_processing_scripts/audiotools/convert/objectbased.py b/ivas_processing_scripts/audiotools/convert/objectbased.py index 94c3f792..5ad911b5 100755 --- a/ivas_processing_scripts/audiotools/convert/objectbased.py +++ b/ivas_processing_scripts/audiotools/convert/objectbased.py @@ -60,10 +60,24 @@ from ivas_processing_scripts.utils import apply_func_parallel def convert_objectbased( oba: audio.ObjectBasedAudio, out: audio.Audio, + in_trajectory: Optional[Union[str, Path]] = None, **kwargs, ) -> audio.Audio: """Convert an ISM signal to the requested output format""" + # pre-rotation if specified - 20ms only! + if in_trajectory is not None: + # repeat each value four times since head rotation data is on sub-frame basis + azi = np.repeat(oba.obj_pos[:, 0], 4) + ele = np.repeat(oba.obj_pos[:, 1], 4) + + # apply head-rotation trajectory + azi, ele = rotate_oba(azi, ele, in_trajectory) + + # update object metadata + oba.obj_pos[:, 0] = azi[:, ::4] + oba.obj_pos[:, 1] = ele[:, ::4] + # OBA -> Binaural if isinstance(out, audio.BinauralAudio): render_oba_to_binaural(oba, out, **kwargs) @@ -86,7 +100,7 @@ def convert_objectbased( def render_oba_to_binaural( oba: audio.ObjectBasedAudio, bin: audio.BinauralAudio, - trajectory: Optional[Union[str, Path]] = None, + out_trajectory: Optional[Union[str, Path]] = None, bin_dataset: Optional[str] = None, **kwargs, ) -> None: @@ -99,21 +113,19 @@ def render_oba_to_binaural( Object based input audio bin: audio.BinauralAudio Binaural output audio - trajectory: Optional[Union[str, Path]] + out_trajectory: Optional[Union[str, Path]] Head rotation trajectory bin_dataset: Optional[str] Name of binaural dataset, if None default dataset is used """ - # bin.audio = np.zeros([oba.audio.shape[0], bin.num_channels]) - if "ROOM" in bin.name: cba_tmp = audio.fromtype("7_1_4") cba_tmp.fs = oba.fs render_oba_to_cba(oba, cba_tmp) - render_cba_to_binaural(cba_tmp, bin, trajectory) + render_cba_to_binaural(cba_tmp, bin, out_trajectory) else: IR, SourcePosition, latency_smp = load_ir(oba.name, bin.name, bin_dataset) @@ -130,7 +142,7 @@ def render_oba_to_binaural( obj_idx, obj_pos, repeat(oba), - repeat(trajectory), + repeat(out_trajectory), repeat(IR), repeat(SourcePosition), ), diff --git a/ivas_processing_scripts/audiotools/convert/scenebased.py b/ivas_processing_scripts/audiotools/convert/scenebased.py index 1239d674..5a7e4b48 100755 --- a/ivas_processing_scripts/audiotools/convert/scenebased.py +++ b/ivas_processing_scripts/audiotools/convert/scenebased.py @@ -61,10 +61,15 @@ from ivas_processing_scripts.audiotools.wrappers.masaAnalyzer import masaAnalyze def convert_scenebased( sba: audio.SceneBasedAudio, out: audio.Audio, + in_trajectory: Optional[Union[str, Path]] = None, **kwargs, ) -> audio.Audio: """Convert scene-based audio to the requested output format""" + # pre-rotation if specified + if in_trajectory is not None: + sba.audio = rotate_sba(sba, in_trajectory) + # SBA -> Binaural if isinstance(out, audio.BinauralAudio): render_sba_to_binaural(sba, out, **kwargs) @@ -95,7 +100,7 @@ def convert_scenebased( def render_sba_to_binaural( sba: audio.SceneBasedAudio, bin: audio.BinauralAudio, - trajectory: Optional[Union[str, Path]] = None, + out_trajectory: Optional[Union[str, Path]] = None, bin_dataset: Optional[str] = None, **kwargs, ) -> None: @@ -108,14 +113,14 @@ def render_sba_to_binaural( Input SBA audio bin: audio.BinauralAudio Output binaural audio - trajectory: Optional[Union[str, Path]] + out_trajectory: Optional[Union[str, Path]] Head rotation trajectory path bin_dataset: Optional[str] Name of binaural dataset without prefix or suffix """ - if trajectory is not None: - sba.audio = rotate_sba(sba, trajectory) + if out_trajectory is not None: + sba.audio = rotate_sba(sba, out_trajectory) if "ROOM" in bin.name: cba_tmp = audio.fromtype("7_1_4") @@ -123,7 +128,7 @@ def render_sba_to_binaural( render_sba_to_cba(sba, cba_tmp) - channelbased.render_cba_to_binaural(cba_tmp, bin, trajectory) + channelbased.render_cba_to_binaural(cba_tmp, bin, out_trajectory) else: IR, _, latency_smp = load_ir(sba.name, bin.name, bin_dataset) @@ -196,7 +201,6 @@ def render_sba_to_masa( sba_in: audio.SceneBasedAudio, masa_out: audio.MetadataAssistedSpatialAudio, ) -> None: - num_tcs = masa_out.num_channels md_out_path = masa_out.metadata_file diff --git a/ivas_processing_scripts/audiotools/quaternions.py b/ivas_processing_scripts/audiotools/quaternions.py new file mode 100755 index 00000000..802d32e5 --- /dev/null +++ b/ivas_processing_scripts/audiotools/quaternions.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 + +# +# (C) 2022-2024 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository. All Rights Reserved. +# +# This software is protected by copyright law and by international treaties. +# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository retain full ownership rights in their respective contributions in +# the software. This notice grants no license of any kind, including but not limited to patent +# license, nor is any license granted by implication, estoppel or otherwise. +# +# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making +# contributions. +# +# This software is provided "AS IS", without any express or implied warranties. The software is in the +# development stage. It is intended exclusively for experts who have experience with such software and +# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability +# and fitness for a particular purpose are hereby disclaimed and excluded. +# +# Any dispute, controversy or claim arising under or in relation to providing this software shall be +# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in +# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and +# the United Nations Convention on Contracts on the International Sales of Goods. +# + + +import numpy as np + +""" +Based on Jia, Y. B. (2008). Quaternions and rotations. Com S, 477(577), 15. +""" + + +################################### +# Operations on only one quaternion +################################### + + +def is_unitquat(q): + # the norm must be 1 for a unit quaternion + if q_norm2(q) == 1: + return True + else: + return False + + +def q_conj(q): + # quaternion conjugate + p = q.copy() + p[1:] = -p[1:] + return p + + +def q_norm2(q): + # quaternion norm + return np.sqrt(np.sum(q**2)) + + +def q_inv(q): + # quaternion inverse + return q_conj(q) / q_norm2(q) + + +def q_exp(q, e): + # quaternion exponentiation + p = np.zeros_like(q) + eps = np.zeros_like(q) + eps[0] = 0 + eps[1:] = q[1:] + norm_e = np.sqrt(q_norm2((e))) + + if norm_e == 0: + # real quaternion, reaise the real part + p[0] = q[0] ** e + p[1:] = 0 + else: + # go to the polar form + norm = np.sqrt(q_norm2(q)) + phi = np.arccos(q[0] / norm) + + eps = eps * (1 / norm_e) + + p[0] = norm * np.cos(e * phi) + p[1] = norm * eps[1] * np.sin(e * phi) + p[2] = norm * eps[2] * np.sin(e * phi) + p[3] = norm * eps[3] * np.sin(e * phi) + + return p + + +def q_log(q): + # quaternion logarithm + if not is_unitquat(q): + raise ValueError("Input must be a unit quaternion!") + + p = np.zeros_like(q) + + vec = q[1:] + vec_norm = np.sqrt(np.sum(vec**2)) + vec /= vec_norm + vec *= np.arccos(q[0]) + + p[1:] = vec + + return p + + +############################### +# Operations on two quaternions +############################### + + +def q_add(p, q): + # quaternion addition + return np.add(p, q) + + +def q_sub(p, q): + # quaternion subtraction + return np.sub(p, q) + + +def q_mul(p, q): + # quaternion multiplication + pq = np.zeros_like(q) + pq[0] = p[0] * q[0] - p[1] * q[1] - p[2] * q[2] - p[3] * q[3] + pq[1] = p[0] * q[1] + p[1] * q[0] + p[2] * q[3] - p[3] * q[2] + pq[2] = p[0] * q[2] + p[1] * q[3] + p[2] * q[0] - p[3] * q[1] + pq[3] = p[0] * q[3] - p[1] * q[2] + p[2] * q[1] + p[3] * q[0] + + return pq + + +def q_div(p, q): + # quaternion division + return q_mul(p, q_inv(q)) diff --git a/ivas_processing_scripts/audiotools/utils.py b/ivas_processing_scripts/audiotools/utils.py index b0b2824a..842d9555 100755 --- a/ivas_processing_scripts/audiotools/utils.py +++ b/ivas_processing_scripts/audiotools/utils.py @@ -38,7 +38,7 @@ from ivas_processing_scripts.audiotools.rotation import Euler2Quat, Quat2Euler def read_trajectory(trj_file: Path, return_quat=True): - trj = np.genfromtext(trj_file, delimiter=",") + trj = np.genfromtxt(trj_file, delimiter=",") if np.all(trj[:, 0] == -3): # Euler diff --git a/ivas_processing_scripts/constants.py b/ivas_processing_scripts/constants.py index 2119304b..02a8d2c3 100755 --- a/ivas_processing_scripts/constants.py +++ b/ivas_processing_scripts/constants.py @@ -49,6 +49,9 @@ SUPPORTED_CONDITIONS = { "esdru", "evs", "ivas", + "ivas_transcoding", + "ivas_split_rend_full", + "ivas_split_rend_ext", "mono_dmx", "spatial_distortion", } @@ -93,3 +96,11 @@ REQUIRED_KEYS_EVS = {"bitrates"} REQUIRED_KEYS_IVAS = {"bitrates"} REQUIRED_KEYS_MNRU = {"q"} REQUIRED_KEYS_ESDRU = {"alpha"} +REQUIRED_KEYS_IVAS_TRANSCODING = { + *REQUIRED_KEYS_IVAS, + "trans_bitrate", + "trans_cod", + "trans_dec", +} +REQUIRED_KEYS_IVAS_SPLIT_REND = {*REQUIRED_KEYS_IVAS, "split_rend"} +REQUIRED_KEYS_IVAS_SPLIT_REND_CFG = {"fmt", "bitrate", "dof"} diff --git a/ivas_processing_scripts/processing/chains.py b/ivas_processing_scripts/processing/chains.py index 3c0bbaf4..39a8d4bb 100755 --- a/ivas_processing_scripts/processing/chains.py +++ b/ivas_processing_scripts/processing/chains.py @@ -484,6 +484,7 @@ def get_processing_chain( "preamble": preamble, "use_windows_codec_binaries": cfg.use_windows_codec_binaries, "tx_condition": tx_condition, + "trajectory": dec_cfg.get("trajectory", None), } ) ) @@ -493,27 +494,202 @@ def get_processing_chain( tmp_in_fmt = dec_cfg.get("fmt", tmp_out_fmt)[0] else: tmp_in_fmt = dec_cfg.get("fmt", tmp_out_fmt) - else: - raise SystemExit(f"Unknown condition {condition}!") + elif cond_cfg["type"] == "ivas_transcoding": + cod_cfg = cond_cfg["cod"] + dec_cfg = cond_cfg["dec"] - # add optional IVAS_rend rendering step after each condition - if cond_cfg.get("ivas_rend", -1) != -1: - rend_cfg = cond_cfg["ivas_rend"] + trans_bitrate = cond_cfg["trans_bitrate"] + trans_cod_cfg = cond_cfg["trans_cod"] + trans_dec_cfg = cond_cfg["trans_dec"] + + # force this to be a single value for now + if isinstance(trans_bitrate, list): + trans_bitrate = trans_bitrate[0] + + if hasattr(cfg, "preprocessing_2"): + preamble = cfg.preprocessing_2.get("preamble", 0) + else: + preamble = 0 + + # if the encoding format differs from the format after the preprocessing, add format conversion stuff + if (cod_fmt := cod_cfg.get("fmt", tmp_in_fmt)) != tmp_in_fmt: + chain["processes"].append( + Postprocessing( + { + "in_fs": tmp_in_fs, + "in_fmt": tmp_in_fmt, + "out_fs": tmp_in_fs, + "out_fmt": cod_fmt, + "multiprocessing": cfg.multiprocessing, + "tx_condition": False, + }, + name="cod_fmt", + ) + ) + tmp_in_fmt = cod_fmt + + # allow list of output values for IVAS + tmp_out_fmt = dec_cfg.get("fmt", tmp_out_fmt) + if isinstance(tmp_out_fmt, list): + cond_fmt.extend(tmp_out_fmt) + tmp_out_fmt = tmp_out_fmt[0] + + # pre IVAS coding chain["processes"].append( - IVAS_rend( + IVAS( { "in_fmt": tmp_in_fmt, "in_fs": tmp_in_fs, - "out_fmt": rend_cfg.get("fmt", tmp_out_fmt), - "bin": get_abs_path(rend_cfg.get("bin", None)), - "opts": rend_cfg.get("opts"), + "out_fmt": tmp_out_fmt, + "out_fs": dec_cfg.get("fs", tmp_in_fs), + "bitrate": bitrate, + "cod_bin": get_abs_path(cod_cfg.get("bin", None)), + "cod_opts": cod_cfg.get("opts"), + "dec_bin": get_abs_path(dec_cfg.get("bin", None)), + "dec_opts": dec_cfg.get("opts"), + "multiprocessing": cfg.multiprocessing, + "tx": None, + "preamble": preamble, "use_windows_codec_binaries": cfg.use_windows_codec_binaries, + "tx_condition": tx_condition, + "trajectory": dec_cfg.get("trajectory", None), } ) ) - # update values to reflect renderer output - tmp_in_fs = rend_cfg.get("fs", tmp_in_fs) - tmp_in_fmt = rend_cfg.get("fmt", tmp_out_fmt) + # update name to use correct trajectory + chain["processes"][-1].name = "ivas_trans_pre" + + tmp_out_fmt = trans_dec_cfg.get("fmt", tmp_out_fmt) + + # native transcoding + chain["processes"].append( + IVAS( + { + "in_fmt": tmp_out_fmt, + "in_fs": trans_dec_cfg.get("fs", tmp_in_fs), + "out_fmt": tmp_out_fmt, + "out_fs": trans_dec_cfg.get("fs", tmp_in_fs), + "bitrate": trans_bitrate, + "cod_bin": get_abs_path(trans_cod_cfg.get("bin", None)), + "cod_opts": trans_cod_cfg.get("opts"), + "dec_bin": get_abs_path(trans_dec_cfg.get("bin", None)), + "dec_opts": trans_dec_cfg.get("opts"), + "multiprocessing": cfg.multiprocessing, + "tx": None, + "preamble": preamble, + "use_windows_codec_binaries": cfg.use_windows_codec_binaries, + "tx_condition": tx_condition, + } + ) + ) + # update name to avoid using the same tmp filenames + chain["processes"][-1].name = "ivas_trans_post" + + # update values to reflect transcoding decoder output + tmp_in_fs = trans_dec_cfg.get("fs", tmp_in_fs) + if isinstance(trans_dec_cfg.get("fmt", tmp_out_fmt), list): + tmp_in_fmt = trans_dec_cfg.get("fmt", tmp_out_fmt)[0] + else: + tmp_in_fmt = trans_dec_cfg.get("fmt", tmp_out_fmt) + elif cond_cfg["type"].startswith("ivas_split_rend"): + cod_cfg = cond_cfg["cod"] + dec_cfg = cond_cfg["dec"] + + split_rend_cfg = cond_cfg["split_rend"] + is_full_chain = True if cond_cfg["type"].endswith("full") else False + + if hasattr(cfg, "preprocessing_2"): + preamble = cfg.preprocessing_2.get("preamble", 0) + else: + preamble = 0 + + # if the encoding format differs from the format after the preprocessing, add format conversion stuff + if (cod_fmt := cod_cfg.get("fmt", tmp_in_fmt)) != tmp_in_fmt: + chain["processes"].append( + Postprocessing( + { + "in_fs": tmp_in_fs, + "in_fmt": tmp_in_fmt, + "out_fs": tmp_in_fs, + "out_fmt": cod_fmt, + "multiprocessing": cfg.multiprocessing, + "tx_condition": False, + }, + name="cod_fmt", + ) + ) + tmp_in_fmt = cod_fmt + + # allow list of output values for IVAS + tmp_out_fmt = dec_cfg.get("fmt", tmp_out_fmt) + if isinstance(tmp_out_fmt, list): + cond_fmt.extend(tmp_out_fmt) + tmp_out_fmt = tmp_out_fmt[0] + + # IVAS split pre rendering + if is_full_chain: + chain["processes"].append( + IVAS( + { + "in_fmt": tmp_in_fmt, + "in_fs": tmp_in_fs, + "out_fmt": tmp_out_fmt, + "out_fs": dec_cfg.get("fs", tmp_in_fs), + "bitrate": bitrate, + "cod_bin": get_abs_path(cod_cfg.get("bin", None)), + "cod_opts": cod_cfg.get("opts"), + "dec_bin": get_abs_path(dec_cfg.get("bin", None)), + "dec_opts": dec_cfg.get("opts"), + "multiprocessing": cfg.multiprocessing, + "tx": None, + "preamble": preamble, + "use_windows_codec_binaries": cfg.use_windows_codec_binaries, + "tx_condition": tx_condition, + "split_rend_cfg": split_rend_cfg, + } + ) + ) + else: + raise NotImplementedError( + "IVAS rend currently unsupported for split-pre rendering" + ) + # TODO IVAS_rend as split-pre renderer + chain["processes"].append( + IVAS_rend( + { + "in_fmt": tmp_in_fmt, + "in_fs": tmp_in_fs, + "out_fmt": tmp_out_fmt, + "bin": get_abs_path(split_rend_cfg.get("bin", None)), + "opts": split_rend_cfg.get("opts"), + "use_windows_codec_binaries": cfg.use_windows_codec_binaries, + "split_rend_cfg": split_rend_cfg, + } + ) + ) + + # split post rendering + chain["processes"].append( + IVAS_rend( + { + "in_fmt": tmp_out_fmt, + "in_fs": tmp_in_fs, + "out_fmt": split_rend_cfg["fmt"], + "bin": get_abs_path(split_rend_cfg.get("bin", None)), + "opts": split_rend_cfg.get("opts"), + "use_windows_codec_binaries": cfg.use_windows_codec_binaries, + } + ) + ) + + # update values to reflect post renderer output + tmp_in_fs = dec_cfg.get("fs", tmp_in_fs) + if isinstance(split_rend_cfg.get("fmt", tmp_out_fmt), list): + tmp_in_fmt = split_rend_cfg.get("fmt", tmp_out_fmt)[0] + else: + tmp_in_fmt = split_rend_cfg.get("fmt", tmp_out_fmt) + else: + raise SystemExit(f"Unknown condition {condition}!") # add postprocessing step based on condition post_fmt = post_cfg.get("fmt") @@ -547,6 +723,24 @@ def get_processing_chain( if cond_cfg.get("out_fc") is not None: tmp_lp_cutoff = cond_cfg.get("out_fc") + # add optional IVAS_rend rendering step after each condition + if rend_cfg := cond_cfg.get("ivas_rend"): + chain["processes"].append( + IVAS_rend( + { + "in_fmt": tmp_in_fmt, + "in_fs": tmp_in_fs, + "out_fmt": rend_cfg.get("fmt", tmp_out_fmt), + "bin": get_abs_path(rend_cfg.get("bin")), + "opts": rend_cfg.get("opts"), + "use_windows_codec_binaries": cfg.use_windows_codec_binaries, + } + ) + ) + # update values to reflect renderer output + tmp_in_fs = rend_cfg.get("fs", tmp_in_fs) + tmp_in_fmt = rend_cfg.get("fmt", tmp_out_fmt) + chain["processes"].append( Postprocessing( { @@ -558,7 +752,6 @@ def get_processing_chain( "bin_dataset": post_cfg.get("bin_dataset"), "bin_lfe_gain": post_cfg.get("bin_lfe_gain"), "limit": post_cfg.get("limit", True), - "trajectory": get_abs_path(post_cfg.get("trajectory", None)), "multiprocessing": cfg.multiprocessing, "mnru_q": tmp_mnru_q, "esdru_alpha": tmp_esdru_alpha, diff --git a/ivas_processing_scripts/processing/config.py b/ivas_processing_scripts/processing/config.py index 315ddae9..fdcc787d 100755 --- a/ivas_processing_scripts/processing/config.py +++ b/ivas_processing_scripts/processing/config.py @@ -43,6 +43,9 @@ from ivas_processing_scripts.constants import ( REQUIRED_KEYS_ESDRU, REQUIRED_KEYS_EVS, REQUIRED_KEYS_IVAS, + REQUIRED_KEYS_IVAS_SPLIT_REND, + REQUIRED_KEYS_IVAS_SPLIT_REND_CFG, + REQUIRED_KEYS_IVAS_TRANSCODING, REQUIRED_KEYS_MNRU, SUPPORTED_CONDITIONS, ) @@ -65,8 +68,18 @@ def merge_dicts(base: dict, other: dict) -> None: def get_default_config_for_codecs(codec_name: str, ext_with_dot: str = "") -> dict: + is_transcoding = "transcoding" in codec_name + is_splitrend = "ISAR" in codec_name + + if is_splitrend: + post_rend_name = codec_name.split(" ")[1] + post_rend_bin = f"{post_rend_name}_post_rend{ext_with_dot}" + + codec_name = codec_name.split(" ")[0] cod_bin = f"{codec_name}_cod{ext_with_dot}" dec_bin = f"{codec_name}_dec{ext_with_dot}" + rend_bin = f"{codec_name}_rend{ext_with_dot}" + cfg = { "cod": { @@ -76,6 +89,17 @@ def get_default_config_for_codecs(codec_name: str, ext_with_dot: str = "") -> di "bin": find_binary(dec_bin, raise_error=False), }, } + if is_transcoding: + cfg["trans_cod"] = { + "bin": find_binary(cod_bin, raise_error=False), + } + cfg["trans_dec"] = { + "bin": find_binary(dec_bin, raise_error=False), + } + if is_splitrend: + cfg["split_rend"] = { + "bin": find_binary(post_rend_bin, raise_error=False), + } return cfg @@ -240,6 +264,59 @@ class TestConfig: raise KeyError( f"The following key(s) must be specified for IVAS: {REQUIRED_KEYS_IVAS}" ) + elif type == "ivas_transcoding": + merged_cfg = get_default_config_for_codecs( + "IVAS transcoding", codec_bin_extension + ) + merge_dicts(merged_cfg, cond_cfg) + cfg["conditions_to_generate"][cond_name] = merged_cfg + if REQUIRED_KEYS_IVAS_TRANSCODING.difference( + cfg["conditions_to_generate"][cond_name].keys() + ): + raise KeyError( + f"The following key(s) must be specified for IVAS: {REQUIRED_KEYS_IVAS_TRANSCODING}" + ) + elif type.startswith("ivas_split_rend"): + merged_cfg = get_default_config_for_codecs( + "IVAS ISAR", codec_bin_extension + ) + merge_dicts(merged_cfg, cond_cfg) + cfg["conditions_to_generate"][cond_name] = merged_cfg + if REQUIRED_KEYS_IVAS_SPLIT_REND.difference( + cfg["conditions_to_generate"][cond_name].keys() + ): + raise KeyError( + f"The following key(s) must be specified for IVAS: {REQUIRED_KEYS_IVAS_SPLIT_REND}" + ) + split_rend_cfg = cfg["conditions_to_generate"][cond_name]["split_rend"] + MISSING_KEYS_SPLIT_CFG = [] + for r in REQUIRED_KEYS_IVAS_SPLIT_REND_CFG: + if not split_rend_cfg.get(r): + MISSING_KEYS.append(r) + + if MISSING_KEYS_SPLIT_CFG: + raise KeyError( + f"The following key(s) must be specified : {MISSING_KEYS_SPLIT_CFG}" + ) + + if fmt := split_rend_cfg.get("fmt") != "BINAURAL": + raise ValueError(f"Format {fmt} unsupported by split rendering!") + if (dof := split_rend_cfg.get("dof") < 0) or dof > 3: + raise ValueError( + f"DOF {dof} unsupported! Valid values are 0 <= dof <= 3." + ) + + if codec := split_rend_cfg.get("codec", None): + if not ( + codec.upper() == "LC3PLUS" + or codec.upper() == "LCLD" + or codec.upper() == "DEFAULT" + ): + raise ValueError(f"Unsupported split rendering codec {codec}!") + if codec.upper() == "DEFAULT": + # avoid explicitly writing this value to the config file if set to default + split_rend_cfg.pop("codec", None) + elif type == "mnru": if REQUIRED_KEYS_MNRU.difference( cfg["conditions_to_generate"][cond_name].keys() diff --git a/ivas_processing_scripts/processing/evs.py b/ivas_processing_scripts/processing/evs.py index db44a369..c49c6a45 100755 --- a/ivas_processing_scripts/processing/evs.py +++ b/ivas_processing_scripts/processing/evs.py @@ -36,7 +36,7 @@ import platform from itertools import repeat from pathlib import Path from shutil import copyfile -from typing import Optional, Union +from typing import Optional, Tuple, Union from ivas_processing_scripts.audiotools import audio from ivas_processing_scripts.audiotools.audiofile import ( @@ -133,7 +133,12 @@ class EVS(Processing): ) def process( - self, in_file: Path, out_file: Path, in_meta, logger: logging.Logger + self, + in_file: Path, + out_file: Path, + in_meta, + trajectories: Optional[Tuple[Path]] = None, + logger: Optional[logging.Logger] = None, ) -> None: logger.debug(f"EVS configuration : {self.__dict__}") logger.debug(f"EVS {in_file.absolute()} -> {out_file.absolute()}") diff --git a/ivas_processing_scripts/processing/ivas.py b/ivas_processing_scripts/processing/ivas.py index c13d5e11..7489919e 100755 --- a/ivas_processing_scripts/processing/ivas.py +++ b/ivas_processing_scripts/processing/ivas.py @@ -60,6 +60,8 @@ class IVAS(Processing): self.out_fmt = audio.fromtype(self.out_fmt) if not hasattr(self, "dec_opts"): self.dec_opts = None + if not hasattr(self, "split_rend_cfg"): + self.split_rend_cfg = None self._use_wine = use_wine(self.use_windows_codec_binaries) def _validate(self): @@ -100,7 +102,7 @@ class IVAS(Processing): ) def process( - self, in_file: Path, out_file: Path, in_meta, logger: logging.Logger + self, in_file: Path, out_file: Path, in_meta, trj_pair, logger: logging.Logger ) -> None: logger.debug(f"IVAS configuration : {self.__dict__}") logger.debug(f"IVAS {in_file.absolute()} -> {out_file.absolute()}") @@ -128,13 +130,35 @@ class IVAS(Processing): bitstream_noerror = bitstream bitstream, voip = self.simulate_tx(in_file, bitstream, logger) + # generate and use renderer config if split rendering + rend_cfg_file = IVAS.generate_split_rend_cfg( + out_file, self.split_rend_cfg, logger + ) + + # trajectory file for binaural/split rendering + trajectories = trj_pair if trj_pair else None + # decode twice with and without bitstream errors - self.dec(bitstream, out_file, voip=voip, logger=logger) + self.dec( + bitstream, + out_file, + voip=voip, + rend_cfg_file=rend_cfg_file, + trajectories=trajectories, + logger=logger, + ) if bitstream_noerror != bitstream and self.tx_condition: out_file_unprocessed = Path( f"{out_file.parent.joinpath(out_file.stem)}.noerror{out_file.suffix}" ) - self.dec(bitstream_noerror, out_file_unprocessed, voip=False, logger=logger) + self.dec( + bitstream_noerror, + out_file_unprocessed, + voip=False, + rend_cfg_file=rend_cfg_file, + trajectories=trajectories, + logger=logger, + ) def enc( self, @@ -284,6 +308,8 @@ class IVAS(Processing): bitstream: Path, out_file: Path, voip: bool = False, + rend_cfg_file: Optional[Path] = None, + trajectories: Optional[Tuple[Path]] = None, logger: Optional[logging.Logger] = None, ) -> None: logger.debug(f"IVAS decoder {bitstream} -> {out_file}") @@ -292,8 +318,13 @@ class IVAS(Processing): if self._use_wine: cmd.insert(0, "wine") - if hasattr(self, "trajectory"): - cmd.extend(["-T", self.trajectory]) + # head tracking trajectory + if trajectories: + pre_trj, post_trj = trajectories + if (self.out_fmt.name == "BINAURAL_SPLIT_CODED" or self.name == "ivas_trans_pre") and pre_trj: + cmd.extend(["-T", str(pre_trj)]) + elif self.out_fmt.name == "BINAURAL" and post_trj: + cmd.extend(["-T", str(post_trj)]) # add -voip cmdline option to the decoder if voip: @@ -303,6 +334,9 @@ class IVAS(Processing): if self.dec_opts: cmd.extend(self.dec_opts) + if rend_cfg_file: + cmd.extend(["-render_config", str(rend_cfg_file)]) + # use quiet mode cmd.extend(["-q"]) @@ -397,6 +431,33 @@ class IVAS(Processing): raise ValueError(f"IVAS: Invalid input config: {fmt.name}.") + @staticmethod + def generate_split_rend_cfg( + in_file: Path, split_rend_cfg: dict, logger: Optional[logging.Logger] = None + ) -> Path: + if split_rend_cfg: + cfg_file = in_file.with_stem(f"{in_file.stem}.render_config").with_suffix( + ".txt" + ) + else: + return None + + # write config file + with open(cfg_file, "wt") as f: + print("[SPLITREND]", file=f) + print(f"BITRATE = {split_rend_cfg.get('bitrate')};", file=f) + print(f"DOF = {split_rend_cfg.get('dof')};", file=f) + if hqmode := split_rend_cfg.get("hqmode"): + print(f"HQMODE = {int( hqmode )};", file=f) + if codec := split_rend_cfg.get("codec"): + print(f"CODEC = {codec.upper()};", file=f) + if framesize := split_rend_cfg.get("framesize"): + print(f"FRAMESIZE = {framesize};", file=f) + + logger.debug(f"Wrote IVAS renderer config: {cfg_file}") + + return cfg_file + class IVAS_rend(Processing): def __init__(self, attrs): @@ -407,6 +468,8 @@ class IVAS_rend(Processing): self.out_fmt = audio.fromtype(self.out_fmt) if not hasattr(self, "opts"): self.dec_opts = None + if not hasattr(self, "split_rend_cfg"): + self.split_rend_cfg = None self._use_wine = ( platform.system() == "Linux" and self.use_windows_codec_binaries ) @@ -430,6 +493,7 @@ class IVAS_rend(Processing): in_file: Path, out_file: Path, in_meta, + trajectories: Optional[Tuple[Path]] = None, logger: Optional[logging.Logger] = None, ) -> None: logger.debug(f"IVAS rend configuration : {self.__dict__}") @@ -446,25 +510,57 @@ class IVAS_rend(Processing): else: self.in_fs = parse_wave_header(str(in_file))["fs"] + # generate and use renderer config if split rendering + rend_cfg_file = IVAS.generate_split_rend_cfg( + out_file, self.split_rend_cfg, logger + ) + cmd = [self.bin] if self._use_wine: cmd.insert(0, "wine") - cmd.extend( - [ - "-fs", - str(self.in_fs // 1000), - "-i", - str(in_file), - "-if", - self.in_fmt.name, - "-o", - str(out_file), - "-of", - self.out_fmt.name, - ] - ) + if rend_cfg_file: + cmd.extend(["-render_config", str(rend_cfg_file)]) + + # head tracking trajectory + if trajectories: + pre_trj, post_trj = trajectories + if self.out_fmt.name == "BINAURAL_SPLIT_CODED" and pre_trj: + cmd.extend(["-T", str(pre_trj)]) + elif self.out_fmt.name == "BINAURAL" and post_trj: + cmd.extend(["-T", str(post_trj)]) + + if self.in_fmt.name == "BINAURAL_SPLIT_CODED": + cmd.extend( + [ + "-q", + "-fs", + str(self.in_fs // 1000), + "-i", + str(in_file), + "-if", + self.in_fmt.name, + "-o", + str(out_file), + ] + ) + else: + cmd.extend( + [ + "-q", + "-fs", + str(self.in_fs // 1000), + "-i", + str(in_file), + "-if", + self.in_fmt.name, + "-o", + str(out_file), + "-of", + self.out_fmt.name, + ] + ) if in_meta: cmd.append("-im") diff --git a/ivas_processing_scripts/processing/postprocessing.py b/ivas_processing_scripts/processing/postprocessing.py index 1a9a5ffb..e82f0a58 100755 --- a/ivas_processing_scripts/processing/postprocessing.py +++ b/ivas_processing_scripts/processing/postprocessing.py @@ -32,6 +32,7 @@ import logging from pathlib import Path +from typing import Optional, Tuple from ivas_processing_scripts.audiotools import convert from ivas_processing_scripts.processing.processing import Processing @@ -45,12 +46,24 @@ class Postprocessing(Processing): else: self.name = "post" - def process(self, in_file: Path, out_file: Path, in_meta, logger: logging.Logger): + def process( + self, + in_file: Path, + out_file: Path, + in_meta, + trajectories: Optional[Tuple[Path]] = None, + logger: Optional[logging.Logger] = None, + ): logger.debug(f"Postprocessing configuration : {self.__dict__}") logger.debug(f"Postprocessing {in_file.absolute()} -> {out_file.absolute()}") convert.convert_file( - in_file, out_file, logger=logger, in_meta=in_meta, **self.__dict__ + in_file, + out_file, + logger=logger, + in_meta=in_meta, + out_trajectory=trajectories[1] if trajectories else None, + **self.__dict__, ) # additional postprocessing of signal without error modification if self.tx_condition: diff --git a/ivas_processing_scripts/processing/preprocessing.py b/ivas_processing_scripts/processing/preprocessing.py index 9ad1d628..eda9ccc5 100755 --- a/ivas_processing_scripts/processing/preprocessing.py +++ b/ivas_processing_scripts/processing/preprocessing.py @@ -32,6 +32,7 @@ import logging from pathlib import Path +from typing import Optional, Tuple from ivas_processing_scripts.audiotools import convert from ivas_processing_scripts.processing.processing import Processing @@ -42,7 +43,14 @@ class Preprocessing(Processing): super().__init__(attrs) self.name = "pre" - def process(self, in_file: Path, out_file: Path, in_meta, logger: logging.Logger): + def process( + self, + in_file: Path, + out_file: Path, + in_meta, + trajectories: Optional[Tuple[Path]] = None, + logger: Optional[logging.Logger] = None, + ): logger.debug(f"Preprocessing configuration : {self.__dict__}") logger.debug(f"Preprocessing {in_file.absolute()} -> {out_file.absolute()}") diff --git a/ivas_processing_scripts/processing/preprocessing_2.py b/ivas_processing_scripts/processing/preprocessing_2.py index dbfbe831..df449044 100644 --- a/ivas_processing_scripts/processing/preprocessing_2.py +++ b/ivas_processing_scripts/processing/preprocessing_2.py @@ -32,6 +32,7 @@ import logging from pathlib import Path +from typing import Optional, Tuple from warnings import warn import numpy as np @@ -55,7 +56,14 @@ class Preprocessing2(Processing): super().__init__(attrs) self.name = "pre_2" - def process(self, in_file: Path, out_file: Path, in_meta, logger: logging.Logger): + def process( + self, + in_file: Path, + out_file: Path, + in_meta, + trajectories: Optional[Tuple[Path]] = None, + logger: Optional[logging.Logger] = None, + ): logger.debug(f"Preprocessing2 configuration : {self.__dict__}") logger.debug(f"Preprocessing2 {in_file.absolute()} -> {out_file.absolute()}") diff --git a/ivas_processing_scripts/processing/processing.py b/ivas_processing_scripts/processing/processing.py index a2e33160..595b1c44 100755 --- a/ivas_processing_scripts/processing/processing.py +++ b/ivas_processing_scripts/processing/processing.py @@ -330,6 +330,7 @@ def preprocess(cfg, logger): repeat(chain), repeat(logger), cfg.metadata_path, + repeat(None), ) ) p = Pool() @@ -392,7 +393,7 @@ def preprocess(cfg, logger): ) # no metadata - if "ISM" not in preproc_output_fmt and "MASA" not in preproc_output_fmt: + if not ("ISM" in preproc_output_fmt or "MASA" in preproc_output_fmt): list_item.append(None) cfg.metadata_path.append(list_item) @@ -426,6 +427,7 @@ def preprocess_2(cfg, logger): repeat(chain), repeat(logger), cfg.metadata_path, + repeat(None), ) ) p = Pool() @@ -474,6 +476,7 @@ def process_item( chain: Iterable, logger: logging.Logger, in_meta, + trajectories, ) -> None: # derive tmp file names tmp_file = tmp_dir.joinpath(in_file.name) @@ -487,6 +490,7 @@ def process_item( out_dir_wav = False processing_paths = [in_file] processing_paths_meta = [in_meta] + processing_paths_trj = trajectories bool_ism = False bool_masa = False num_ism_meta = None @@ -559,8 +563,11 @@ def process_item( out_meta.append(out_dir.joinpath(f"{Path(out_file).stem}.wav.met")) # execute each process sequentially, feed output into input of next process - for p, (input, output), input_meta in zip( - chain, pairwise(processing_paths), processing_paths_meta[:-1] + for p, (input, output), input_meta, trj_pair in zip( + chain, + pairwise(processing_paths), + processing_paths_meta[:-1], + repeat(processing_paths_trj), ): # setup logging for the output item_logger = logger.getChild(output.stem) @@ -569,7 +576,7 @@ def process_item( fh.setFormatter(logging.Formatter(LOGGER_FORMAT, datefmt=LOGGER_DATEFMT)) item_logger.addHandler(fh) - p.process(input, output, input_meta, item_logger) + p.process(input, output, input_meta, trj_pair, item_logger) item_logger.handlers.clear() diff --git a/ivas_processing_scripts/processing/processing_splitting_scaling.py b/ivas_processing_scripts/processing/processing_splitting_scaling.py index bb1dd604..3c415b62 100644 --- a/ivas_processing_scripts/processing/processing_splitting_scaling.py +++ b/ivas_processing_scripts/processing/processing_splitting_scaling.py @@ -2,6 +2,7 @@ import logging from itertools import repeat from pathlib import Path +from typing import Optional, Tuple import numpy as np @@ -51,7 +52,14 @@ class Processing_splitting_scaling(Processing): super().__init__(attrs) self.name = "processing_splitting_scaling" - def process(self, in_file: Path, out_file: Path, in_meta, logger: logging.Logger): + def process( + self, + in_file: Path, + out_file: Path, + in_meta, + trajectories: Optional[Tuple[Path]] = None, + logger: Optional[logging.Logger] = None, + ): logger.debug(f"Processing splitting scaling configuration : {self.__dict__}") logger.debug(f"Processing splitting scaling {in_file.absolute()}") diff --git a/ivas_processing_scripts/trajectories/README.md b/ivas_processing_scripts/trajectories/README.md new file mode 100644 index 00000000..d5e59983 --- /dev/null +++ b/ivas_processing_scripts/trajectories/README.md @@ -0,0 +1,62 @@ + + +# Trajectory file processing module + +This module can be executed using `python -m ivas_processing_scripts.trajectories`. + +IVAS head tracking trajectory files can be manipulated using this helper script. See the usage help below for details. + +```text +usage: Process IVAS .csv head tracking trajectories in either Euler (YPR) or Quaternion format. + Order of supported operations: zero => offset => invert => delay + Head tracking data is equivalent to scene rotation data (NOT listener orientation, use --invert if needed) + + [-h] [-i] [-d DELAY] [-o OFFSET OFFSET OFFSET] [-zy] [-zp] [-zr] [-of {q,e}] in_trj out_trj + +positional arguments: + in_trj Input Trajectory + out_trj Output Trajectory + +options: + -h, --help show this help message and exit + -i, --invert Flag to invert trajectory, default = False + -d DELAY, --delay DELAY + Delay trajectory by this amount in milliseconds + -o OFFSET OFFSET OFFSET, --offset OFFSET OFFSET OFFSET + Offset trajectory by this rotation [yaw, pitch, roll] + -zy, --zero_yaw Zero yaw axis + -zp, --zero_pitch Zero pitch axis + -zr, --zero_roll Zero roll axis + -of {q,e}, --output_format {q,e} + Output format: 'e' for Euler (YPR) and 'q' for Quaternions, default = q +``` diff --git a/ivas_processing_scripts/trajectories/__init__.py b/ivas_processing_scripts/trajectories/__init__.py new file mode 100755 index 00000000..08e8055d --- /dev/null +++ b/ivas_processing_scripts/trajectories/__init__.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 + +# +# (C) 2022-2024 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository. All Rights Reserved. +# +# This software is protected by copyright law and by international treaties. +# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository retain full ownership rights in their respective contributions in +# the software. This notice grants no license of any kind, including but not limited to patent +# license, nor is any license granted by implication, estoppel or otherwise. +# +# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making +# contributions. +# +# This software is provided "AS IS", without any express or implied warranties. The software is in the +# development stage. It is intended exclusively for experts who have experience with such software and +# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability +# and fitness for a particular purpose are hereby disclaimed and excluded. +# +# Any dispute, controversy or claim arising under or in relation to providing this software shall be +# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in +# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and +# the United Nations Convention on Contracts on the International Sales of Goods. +# + +import argparse +from functools import partial + +import numpy as np + +from ivas_processing_scripts.audiotools.quaternions import q_inv, q_mul +from ivas_processing_scripts.audiotools.rotation import Euler2Quat, Quat2Euler +from ivas_processing_scripts.audiotools.utils import read_trajectory, write_trajectory + + +def get_args(): + parser = argparse.ArgumentParser( + """Process IVAS .csv head tracking trajectories in either Euler (YPR) or Quaternion format. + Order of supported operations: zero => offset => invert => delay + Head tracking data is equivalent to scene rotation data (NOT listener orientation, use --invert if needed) + """ + ) + parser.add_argument("in_trj", help="Input Trajectory") + parser.add_argument("out_trj", help="Output Trajectory") + parser.add_argument( + "-i", + "--invert", + action="store_true", + default=False, + help="Flag to invert trajectory, default = %(default)s", + ) + parser.add_argument( + "-d", + "--delay", + default=None, + help="Delay trajectory by this amount in milliseconds", + type=int, + ) + parser.add_argument( + "-o", + "--offset", + help="Offset trajectory by this rotation [yaw, pitch, roll]", + nargs=3, + type=int, + ) + for a, ax in zip("ypr", ["yaw", "pitch", "roll"]): + parser.add_argument( + f"-z{a}", + f"--zero_{ax}", + action="store_true", + default=[], + help=f"Zero {ax} axis", + ) + parser.add_argument( + "-of", + "--output_format", + choices=["q", "e"], + default="q", + help="Output format: 'e' for Euler (YPR) and 'q' for Quaternions, default = %(default)s", + type=str, + ) + + return parser.parse_args() + + +def main(): + args = get_args() + + trj = read_trajectory(args.in_trj) + + if args.zero_yaw or args.zero_pitch or args.zero_roll: + trj_euler = Quat2Euler(trj) + + if args.zero_yaw: + trj_euler[:, 0] = 0 + if args.zero_pitch: + trj_euler[:, 1] = 0 + if args.zero_roll: + trj_euler[:, 2] = 0 + + trj = Euler2Quat(trj_euler) + + if args.offset: + args.offset = np.array(args.offset) + args.offset = Euler2Quat(args.offset) + + # left multiply by offset to chain the rotations + trj = np.apply_along_axis(partial(q_mul, args.offset), 1, trj) + + if args.invert: + trj = np.apply_along_axis(q_inv, 1, trj) + + if args.delay: + pad = np.array([[1, 0, 0, 0]]) + trj = np.vstack([np.repeat(pad, int(args.delay / 5), axis=0), trj]) + + write_trajectory(trj, args.out_trj, write_quat=(args.output_format == "q")) diff --git a/ivas_processing_scripts/trajectories/__main__.py b/ivas_processing_scripts/trajectories/__main__.py new file mode 100755 index 00000000..4b972edd --- /dev/null +++ b/ivas_processing_scripts/trajectories/__main__.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 + +# +# (C) 2022-2024 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository. All Rights Reserved. +# +# This software is protected by copyright law and by international treaties. +# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository retain full ownership rights in their respective contributions in +# the software. This notice grants no license of any kind, including but not limited to patent +# license, nor is any license granted by implication, estoppel or otherwise. +# +# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making +# contributions. +# +# This software is provided "AS IS", without any express or implied warranties. The software is in the +# development stage. It is intended exclusively for experts who have experience with such software and +# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability +# and fitness for a particular purpose are hereby disclaimed and excluded. +# +# Any dispute, controversy or claim arising under or in relation to providing this software shall be +# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in +# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and +# the United Nations Convention on Contracts on the International Sales of Goods. +# + +from ivas_processing_scripts.trajectories import main + +if __name__ == "__main__": + main() diff --git a/ivas_processing_scripts/trajectories/trajectories.py b/ivas_processing_scripts/trajectories/trajectories.py new file mode 100755 index 00000000..78cad50c --- /dev/null +++ b/ivas_processing_scripts/trajectories/trajectories.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 + +# +# (C) 2022-2024 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository. All Rights Reserved. +# +# This software is protected by copyright law and by international treaties. +# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository retain full ownership rights in their respective contributions in +# the software. This notice grants no license of any kind, including but not limited to patent +# license, nor is any license granted by implication, estoppel or otherwise. +# +# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making +# contributions. +# +# This software is provided "AS IS", without any express or implied warranties. The software is in the +# development stage. It is intended exclusively for experts who have experience with such software and +# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability +# and fitness for a particular purpose are hereby disclaimed and excluded. +# +# Any dispute, controversy or claim arising under or in relation to providing this software shall be +# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in +# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and +# the United Nations Convention on Contracts on the International Sales of Goods. +# + + +from pathlib import Path +from typing import Tuple, Union + + +def trajectory_search( + item_name: Path, + search_folder: Path, +) -> Tuple: + """Search for head tracking trajectories with item_name.wav.{pre,post}.csv""" + + if not item_name: + raise ValueError("No item name provided, can't search for trajectories") + + if search_folder is not None: + item_name = search_folder.joinpath(item_name.name) + + trj_file_pre = item_name.with_stem(f"{item_name.name}.pre").with_suffix(".csv") + trj_file_post = item_name.with_stem(f"{item_name.name}.post").with_suffix(".csv") + + pre_trj = trj_file_pre if trj_file_pre.is_file() else None + post_trj = trj_file_post if trj_file_post.is_file() else None + + return pre_trj, post_trj + + +def check_trajectories(item_names: list, search_folder: Path) -> list[Tuple]: + """Find head tracking trajectories""" + + list_trj = [trajectory_search(Path(i), search_folder) for i in item_names] + + return list_trj diff --git a/tests/data/test_ISM.yml b/tests/data/test_ISM.yml index 9f1fe799..f5ec8327 100644 --- a/tests/data/test_ISM.yml +++ b/tests/data/test_ISM.yml @@ -294,4 +294,4 @@ postprocessing: ### Flag whether output should be limited to avoid clipping (can alter target loudness); default = false # limit: true ### Head-tracking trajectory file for binaural output; default = null - # trajectory: "path/to/file" + # out_trajectory: "path/to/file" diff --git a/tests/data/test_MASA.yml b/tests/data/test_MASA.yml index 179dcc9d..0f87e050 100644 --- a/tests/data/test_MASA.yml +++ b/tests/data/test_MASA.yml @@ -292,4 +292,4 @@ postprocessing: ### Flag whether output should be limited to avoid clipping (can alter target loudness); default = false limit: true ### Head-tracking trajectory file for binaural output; default = null - # trajectory: "path/to/file" + # out_trajectory: "path/to/file" diff --git a/tests/data/test_MC.yml b/tests/data/test_MC.yml index 4e1ea6f1..451a1a99 100644 --- a/tests/data/test_MC.yml +++ b/tests/data/test_MC.yml @@ -289,4 +289,4 @@ postprocessing: ### Flag whether output should be limited to avoid clipping (can alter target loudness); default = false # limit: true ### Head-tracking trajectory file for binaural output; default = null - # trajectory: "path/to/file" + # out_trajectory: "path/to/file" diff --git a/tests/data/test_SBA.yml b/tests/data/test_SBA.yml index e5fedc5d..00dfe72e 100644 --- a/tests/data/test_SBA.yml +++ b/tests/data/test_SBA.yml @@ -287,4 +287,4 @@ postprocessing: ### Flag whether output should be limited to avoid clipping (can alter target loudness); default = false limit: true ### Head-tracking trajectory file for binaural output; default = null - # trajectory: "path/to/file" + # out_trajectory: "path/to/file" diff --git a/tests/test_audiotools_convert.py b/tests/test_audiotools_convert.py index e933da0f..a39d915c 100644 --- a/tests/test_audiotools_convert.py +++ b/tests/test_audiotools_convert.py @@ -95,7 +95,7 @@ def convert( in_fmt=in_fmt, out_fmt=out_fmt, in_meta=in_meta, - trajectory=trj_file, + out_trajectory=trj_file, limit=True, # out_loudness=-26, **kwargs, -- GitLab From 64070d243405f21dbd24bb8da7c19dd82576b81d Mon Sep 17 00:00:00 2001 From: Archit Tamarapu Date: Mon, 23 Jun 2025 15:12:37 +0200 Subject: [PATCH 2/3] update copyright header to 2025 for ISAR files --- experiments/selection_isar/README.md | 2 +- generate_test_isar.py | 2 +- ivas_processing_scripts/audiotools/quaternions.py | 2 +- ivas_processing_scripts/trajectories/README.md | 2 +- ivas_processing_scripts/trajectories/__init__.py | 2 +- ivas_processing_scripts/trajectories/__main__.py | 2 +- ivas_processing_scripts/trajectories/trajectories.py | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/experiments/selection_isar/README.md b/experiments/selection_isar/README.md index 6224ade3..5e120bf0 100644 --- a/experiments/selection_isar/README.md +++ b/experiments/selection_isar/README.md @@ -1,6 +1,6 @@