diff --git a/.gitignore b/.gitignore index 5d3d341c0221e34ae100c9d1a05104578a720e26..2ac3217d558ec5c077aef3023cb031bc751b8524 100644 --- a/.gitignore +++ b/.gitignore @@ -12,22 +12,23 @@ venv/ *.bs *.192 mc.double -experiments/selection/*/proc_input*/cat*/*.wav -experiments/selection/*/proc_input*/cat*/*.csv -experiments/selection/*/proc_input*/cat*/*.pcm -experiments/selection/*/proc_input*/FOA*/*.wav -experiments/selection/*/proc_input*/HOA2*/*.wav -experiments/selection/*/background_noise/*.wav -experiments/selection/*/background_noise/*.txt -experiments/selection/*/proc_input*/*.wav -experiments/selection/*/proc_input*/*.pcm -experiments/selection/*/proc_input*/*.csv -experiments/selection/*/proc_input*/*.log -experiments/selection/*/proc_input*/*.yml -experiments/selection/*/proc_output*/ -experiments/selection/*/config/*-lab_*.yml -experiments/selection/*/gen_input/IRs/*.wav -experiments/selection/*/gen_input/items_mono/*.wav +experiments/selection*/*/proc_input*/cat*/*.wav +experiments/selection*/*/proc_input*/cat*/*.csv +experiments/selection*/*/proc_input*/cat*/*.pcm +experiments/selection*/*/proc_input*/FOA*/*.wav +experiments/selection*/*/proc_input*/HOA2*/*.wav +experiments/selection*/*/background_noise/*.wav +experiments/selection*/*/background_noise/*.txt +experiments/selection*/*/proc_input*/*.wav +experiments/selection*/*/proc_input*/*.pcm +experiments/selection*/*/proc_input*/*.csv +experiments/selection*/*/proc_input*/*.log +experiments/selection*/*/proc_input*/*.yml +experiments/selection*/*/proc_output*/ +experiments/selection*/*/config/*-lab_*.yml +experiments/selection*/*/config/*.csv +experiments/selection*/*/gen_input/IRs/*.wav +experiments/selection*/*/gen_input/items_mono/*.wav *~ tests/temp_output_* tests/cut diff --git a/README.md b/README.md index 4ee1686015acf91454571afdbd32f889b01e0947..3fe7971d160b1bd71fdd3de68315690c7b2cf82f 100755 --- a/README.md +++ b/README.md @@ -501,7 +501,7 @@ postprocessing: ### Flag whether output should be limited to avoid clipping (can alter target loudness); default = true # limit: false ### Head-tracking trajectory file for binaural output; default = null - # trajectory: "path/to/file" + # out_trajectory: "path/to/file" ``` diff --git a/examples/TEMPLATE.yml b/examples/TEMPLATE.yml index b09e327d455c9a74af4a976e0dbe7e301eb60397..89d03ad3a63fb0dbacf160fb0ceae62f5929a583 100755 --- a/examples/TEMPLATE.yml +++ b/examples/TEMPLATE.yml @@ -181,7 +181,7 @@ conditions_to_generate: type: ref ### optional low-pass cut-off frequency in Hz; default = null # out_fc: 22500 - ### optional use of IVAS_rend (can be used in all conditions) + ### optional use of IVAS_rend for post rendering (can be used in all conditions) # ivas_rend: ### Path to renderer binary; default search for IVAS_rend in bin folder (primary) and PATH (secondary) # bin: ~/git/ivas-codec/IVAS_rend @@ -232,7 +232,7 @@ conditions_to_generate: # fs: 48000 ### Additional commandline options; default = null # opts: ["-q", "-no_delay_cmp"] - ### optional use of IVAS_rend (can be used in all conditions) + ### optional use of IVAS_rend for post rendering (can be used in all conditions) # ivas_rend: ### Path to renderer binary; default search for IVAS_rend in bin folder (primary) and PATH (secondary) # bin: ~/git/ivas-codec/IVAS_rend @@ -328,5 +328,3 @@ postprocessing: # bin_lfe_gain: 1 ### Flag whether output should be limited to avoid clipping (can alter target loudness); default = false # limit: true - ### Head-tracking trajectory file for binaural output; default = null - # trajectory: "path/to/file" diff --git a/examples/audiotools.ipynb b/examples/audiotools.ipynb index d6ac38ad04efa01844378b1fc9ba5b42696af6f5..10c8be575560fb17ee3ef50cb6b046f0a75acacf 100755 --- a/examples/audiotools.ipynb +++ b/examples/audiotools.ipynb @@ -9,7 +9,7 @@ "\n", "The audiotools module can be used via the CLI for performing rendering of audio files or used as a library by importing the functions in a python script.\n", "\n", - "This notebook contains a few commandline examples and a brief example of how to use the functions in an interactive python session (like this notebook) which can be also similarly used in a standalone python script." + "This notebook contains a few commandline examples and a brief example of how to use the functions in an interactive python session (like this notebook) which can be also similarly used in a standalone python script.\n" ] }, { @@ -17,22 +17,22 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "\n", "# Command-line interface / renderer\n", "\n", "The CLI can be used by running the python module: `python -m ivas_processing_scripts.audiotools --help`.\n", + "\n", "
\n", "Click to expand...\n", "\n", "```bash\n", "❯ python -m ivas_processing_scripts.audiotools --help\n", - "usage: __main__.py [-h] -i INPUT -if IN_FMT [-is IN_FS] [-ifc IN_CUTOFF] [-ihp] [-iw IN_WINDOW] [-it PRE_TRIM POST_TRIM] [-ipn] [-id IN_DELAY] [-il IN_LOUDNESS] [-inf IN_LOUDNESS_FMT]\n", - " [-im IN_META [IN_META ...]] -o OUTPUT [-of OUT_FMT] [-os OUT_FS] [-ofc OUT_CUTOFF] [-ohp] [-ow OUT_WINDOW] [-ot PRE_TRIM POST_TRIM] [-opn] [-od OUT_DELAY] [-ol OUT_LOUDNESS]\n", - " [-onf OUT_LOUDNESS_FMT] [-lm] [-t TRAJECTORY] [-bd BIN_DATASET] [-bl BIN_LFE_GAIN] [-l] [-L] [-mp]\n", + "usage: __main__.py [-h] -i INPUT -if IN_FMT [-is IN_FS] [-ifc IN_CUTOFF] [-imk IN_MASK] [-iw IN_WINDOW] [-ix PRE_TRIM POST_TRIM] [-it IN_TRAJECTORY] [-ipn] [-id IN_DELAY] [-il IN_LOUDNESS] [-inf IN_LOUDNESS_FMT] [-im IN_META [IN_META ...]] -o OUTPUT\n", + " [-of OUT_FMT] [-os OUT_FS] [-ofc OUT_CUTOFF] [-omk OUT_MASK] [-ow OUT_WINDOW] [-ox PRE_TRIM POST_TRIM] [-ot OUT_TRAJECTORY] [-opn] [-od OUT_DELAY] [-ol OUT_LOUDNESS] [-onf OUT_LOUDNESS_FMT] [-lm] [-bd BIN_DATASET] [-bl BIN_LFE_GAIN]\n", + " [-mnru MNRU_Q] [-esdru ESDRU_ALPHA] [-l] [-L] [-mp]\n", "\n", "Audiotools: Convert/Manipulate spatial audio files.\n", "\n", - "options:\n", + "optional arguments:\n", " -h, --help show this help message and exit\n", "\n", "Input (pre-) processing options:\n", @@ -43,11 +43,14 @@ " Sampling rate (Hz) (deduced for .wav input, same as input if output not specified, default = 48000)\n", " -ifc IN_CUTOFF, --in_cutoff IN_CUTOFF\n", " Cut-off frequency for low-pass filtering (default = None)\n", - " -ihp, --in_hp50 Apply 50 Hz high-pass filtering (default = False)\n", + " -imk IN_MASK, --in_mask IN_MASK\n", + " Apply filtering with mask (HP50, 20KBP or None; default = None)\n", " -iw IN_WINDOW, --in_window IN_WINDOW\n", " Window the start/end of the signal by this amount in milliseconds (default = None)\n", - " -it PRE_TRIM POST_TRIM, --in_trim PRE_TRIM POST_TRIM\n", + " -ix PRE_TRIM POST_TRIM, --in_trim PRE_TRIM POST_TRIM\n", " Pre-/post-trim the signal by this amount in milliseconds (negative values pad silence), (default = None)\n", + " -it IN_TRAJECTORY, --in_trajectory IN_TRAJECTORY\n", + " Head-tracking trajectory file for input pre-rotation or binaural output (default = None)\n", " -ipn, --in_pad_noise Flag for padding with noise instead of zeros\n", " -id IN_DELAY, --in_delay IN_DELAY\n", " Delay the signal by this amount in milliseconds (negative values advance, default = None)\n", @@ -67,11 +70,14 @@ " Sampling rate (Hz) (deduced for .wav input, same as input if output not specified, default = 48000)\n", " -ofc OUT_CUTOFF, --out_cutoff OUT_CUTOFF\n", " Cut-off frequency for low-pass filtering (default = None)\n", - " -ohp, --out_hp50 Apply 50 Hz high-pass filtering (default = False)\n", + " -omk OUT_MASK, --out_mask OUT_MASK\n", + " Apply filtering with mask (HP50, 20KBP or None; default = None)\n", " -ow OUT_WINDOW, --out_window OUT_WINDOW\n", " Window the start/end of the signal by this amount in milliseconds (default = None)\n", - " -ot PRE_TRIM POST_TRIM, --out_trim PRE_TRIM POST_TRIM\n", + " -ox PRE_TRIM POST_TRIM, --out_trim PRE_TRIM POST_TRIM\n", " Pre-/post-trim the signal by this amount in milliseconds (negative values pad silence), (default = None)\n", + " -ot OUT_TRAJECTORY, --out_trajectory OUT_TRAJECTORY\n", + " Head-tracking trajectory file for input pre-rotation or binaural output (default = None)\n", " -opn, --out_pad_noise\n", " Flag for padding with noise instead of zeros\n", " -od OUT_DELAY, --out_delay OUT_DELAY\n", @@ -81,25 +87,26 @@ " -onf OUT_LOUDNESS_FMT, --out_loudness_fmt OUT_LOUDNESS_FMT\n", " Format used for loudness computation (only valid with with -ol/--out_loudness, default = OUT_FMT)\n", " -lm, --limit Apply limiting to output (default = False)\n", - " -t TRAJECTORY, --trajectory TRAJECTORY\n", - " Head-tracking trajectory file for binaural output (default = None)\n", " -bd BIN_DATASET, --bin_dataset BIN_DATASET\n", " Use a custom binaural dataset (see README.md and audiotools/binaural_datasets/README.txt for further information)\n", " -bl BIN_LFE_GAIN, --bin_lfe_gain BIN_LFE_GAIN\n", - " Render LFE to binaural output with the specified gain (only valid for channel-based input, default = None)\n", + " Render LFE to binaural output with the specified gain (only valid for channel-based input, default = 1.8836490894898006)\n", + " -mnru MNRU_Q, --mnru_q MNRU_Q\n", + " Flag for MNRU processing\n", + " -esdru ESDRU_ALPHA, --esdru_alpha ESDRU_ALPHA\n", + " Flag for ESDRU processing\n", "\n", "General options:\n", " -l, --list list all supported audio formats and exit\n", " -L, --long list all supported audio formats with long description and exit\n", " -mp, --multiprocessing\n", - " Enable multiprocessing (default = False)\n", + " Enable multiprocessing (default = False))\n", "```\n", + "\n", "
\n", "\n", "Please refer to the README.md and `--help` for a more detailed description of possible arguments and their usage.\n", "\n", - "\n", - "\n", "## Example command line usage\n", "\n", "```bash\n", @@ -111,7 +118,7 @@ "\n", "# Rendering a directory of 5_1 files to binaural with output loudness normalization and parallel processing enabled:\n", "python -m ivas_processing_scripts.audiotools -i stereo_input/ -if 5_1 -of BINAURAL -o output_binaural_norm/ -ol -26 -mp\n", - "```" + "```\n" ] }, { @@ -122,17 +129,19 @@ "# Usage in an interactive python session\n", "\n", "## Importing the module\n", - "The module, its submodules and functions may be imported just like any other python package. To make the module available in *any* directory, the `PYTHONPATH` must be modified.\n", + "\n", + "The module, its submodules and functions may be imported just like any other python package. To make the module available in _any_ directory, the `PYTHONPATH` must be modified.\n", "\n", "The recommended way to do this is to add the following lines at the top of a script which requires this module:\n", "(refer https://docs.python.org/3/library/sys.html#sys.path)\n", + "\n", "```python\n", "import sys\n", "sys.path.append(\"/path/to/this/repository\")\n", "import ivas_processing_scripts.audiotools # import can now be resolved\n", "```\n", "\n", - "An alternative is to modify the shell environment before calling the python interpreter, but this is left to the reader to try. The above solution is cross-platform." + "An alternative is to modify the shell environment before calling the python interpreter, but this is left to the reader to try. The above solution is cross-platform.\n" ] }, { @@ -159,7 +168,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Reading audio and applying basic functions:" + "Reading audio and applying basic functions:\n" ] }, { @@ -201,7 +210,7 @@ "source": [ "For more convenient manipulation of audio, the `audio` python file offers the base class `Audio` upon which the derived classes `BinauralAudio`, `ChannelBasedAudio`, `MetadataAssistedSpatialAudio`, `ObjectBasedAudio`, `SceneBasedAudio` are implemented.\n", "\n", - "To instantiate a class object, the convenience functions (\"factory\" methods) `fromtype()`, `fromarray()` and `fromfile()` are available:" + "To instantiate a class object, the convenience functions (\"factory\" methods) `fromtype()`, `fromarray()` and `fromfile()` are available:\n" ] }, { @@ -297,7 +306,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The audio object allows usage of further functions, which accept an instance of `Audio` (i.e. derived classes), such as the ITU filter wrapper (filter executable must be in \"../bin\" or PATH!):" + "The audio object allows usage of further functions, which accept an instance of `Audio` (i.e. derived classes), such as the ITU filter wrapper (filter executable must be in \"../bin\" or PATH!):\n" ] }, { @@ -392,7 +401,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The object-based approach allows easier manipulation of audio since the necessary values for manipulation are available as attributes. A non-concrete object may also be created to be filled in with data later:" + "The object-based approach allows easier manipulation of audio since the necessary values for manipulation are available as attributes. A non-concrete object may also be created to be filled in with data later:\n" ] }, { @@ -431,7 +440,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The conversion routines are implemented in the `convert` submodule of `audiotools`. These accept two audio objects - one as an input (a \"concrete\" object) and another as output to be filled-in (\"hollow\"). Using the concrete HOA3 audio object we instantiated from an array, and the hollow 7_1_4 object, we can use the `convert_channelbased()` function to perform a conversion:" + "The conversion routines are implemented in the `convert` submodule of `audiotools`. These accept two audio objects - one as an input (a \"concrete\" object) and another as output to be filled-in (\"hollow\"). Using the concrete HOA3 audio object we instantiated from an array, and the hollow 7_1_4 object, we can use the `convert_channelbased()` function to perform a conversion:\n" ] }, { @@ -496,7 +505,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The `convert_scenebased()` function was already provided with all of the information that was required to perform a conversion from the input format to the output format since they were all class attributes. Under the hood the function checked the type of output audio, computed the necessary rendering matrix using the loudspeaker positions, applied the transformation and set the audio array of the output object." + "The `convert_scenebased()` function was already provided with all of the information that was required to perform a conversion from the input format to the output format since they were all class attributes. Under the hood the function checked the type of output audio, computed the necessary rendering matrix using the loudspeaker positions, applied the transformation and set the audio array of the output object.\n" ] }, { @@ -504,7 +513,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "A more advanced example with a generator for performing operations on a framewise basis:" + "A more advanced example with a generator for performing operations on a framewise basis:\n" ] }, { @@ -536,7 +545,6 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "\n", "This concludes the overview of the audiotools module. For readers interested in implementing scripts based on this module, it is recommended to run a debugging session for an example commandline above (either using an IDE or `python -m pdb -m ivas_processing_scripts.audiotools ...`) and examine the functions used, along with a read through of the source code.\n", "\n", "A listing of each file in the module with a description is below for reference:\n", @@ -558,23 +566,36 @@ "├── binauralobjectrenderer.py # reference binaural rendering algorithm for object based audio\n", "├── constants.py # submodule shared constants\n", "├── convert\n", - "│ ├── __init__.py # TODO rename: conversion module \n", + "│ ├── __init__.py # conversion module\n", "│ ├── binaural.py # binaural audio related conversions\n", "│ ├── channelbased.py # channel based audio related conversions\n", "│ ├── masa.py # MASA related conversions (relies on wrappers.masaRenderer)\n", "│ ├── objectbased.py # object based audio related conversions\n", + "│ ├── omasa.py\n", + "│ ├── osba.py\n", "│ └── scenebased.py # scene based audio related conversions\n", "├── EFAP.py # edge-fading amplitude panning implementation\n", - "├── metadata.py # TODO rename: scene description / composite audio format\n", - "├── rotation.py # rotation related functions \n", - "├── utils.py # TODO remove? module convenience functions\n", - "└── wrappers \n", + "├── metadata.py # scene description files and metadata handling\n", + "├── quaternions.py\n", + "├── rotation.py # rotation related functions\n", + "├── utils.py # module convenience functions\n", + "└── wrappers\n", " ├── __init__.py\n", " ├── bs1770.py # wrapper for ITU STL bs1770demo\n", + " ├── dlyerr_2_errpat.py\n", + " ├── eid_xor.py\n", + " ├── esdru.py\n", " ├── filter.py # wrapper for ITU STL filter\n", - " └── masaRenderer.py # wrapper for MASA reference software masaRenderer\n", + " ├── gen_patt.py\n", + " ├── masaAnalyzer.py # wrapper for MASA reference software masaAnalyzer\n", + " ├── masaRenderer.py # wrapper for MASA reference software masaRenderer\n", + " ├── networkSimulator.py\n", + " ├── p50fbmnru.py\n", + " ├── random_seed.py\n", + " └── reverb.py\n", "```\n", - "" + "\n", + "\n" ] } ], diff --git a/experiments/selection_isar/.gitkeep b/experiments/selection_isar/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/experiments/selection_isar/BS1534-1a/.gitkeep b/experiments/selection_isar/BS1534-1a/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/experiments/selection_isar/BS1534-1a/config/.gitkeep b/experiments/selection_isar/BS1534-1a/config/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/experiments/selection_isar/BS1534-1a/config/BS1534-1a.yml b/experiments/selection_isar/BS1534-1a/config/BS1534-1a.yml new file mode 100644 index 0000000000000000000000000000000000000000..8491807fcc881f176b0f4f5a215fabff9d4ee115 --- /dev/null +++ b/experiments/selection_isar/BS1534-1a/config/BS1534-1a.yml @@ -0,0 +1,91 @@ +--- +################################################ +# General configuration +################################################ + +name: BS1534-1a +master_seed: 25 +prerun_seed: 2 + +input_path: "experiments/selection/BS1534-1a/proc_input" +output_path: "experiments/selection/BS1534-1a/proc_output" +use_windows_codec_binaries: true +condition_in_output_filename: true + +################################################ +### Input configuration +################################################ +input: + fmt: "HOA3" + fs: 48000 + aligned_to: + len: 20 + +################################################ +### Pre-processing on individual items +################################################ +preprocessing: + mask: "20KBP" + loudness: -26 + window: 100 + +################################################ +### Pre-processing on whole signal(s) +################################################ +preprocessing_2: + concatenate_input: false + preamble_noise: false + repeat_signal: true + +################################################ +### Configuration for conditions under test +################################################ +conditions_to_generate: + ### IVAS conditions ############################### + c01: + type: ivas + bitrates: + - 512000 + cod: + dec: + fmt: "BINAURAL" + c02: + type: ivas + bitrates: + - 512000 + cod: + dec: + fmt: "BINAURAL" + out_fc: 7000 + c03: + type: ivas_transcoding + bitrates: + - 512000 + cod: + dec: + fmt: "BINAURAL" + trans_bitrate: 256000 + trans_cod: + fmt: "STEREO" + trans_dec: + fmt: "STEREO" + c04: + type: ivas_split_rend_full + bitrates: + - 512000 + cod: + dec: + fmt: "BINAURAL_SPLIT_CODED" + split_rend: + fmt: "BINAURAL" + bitrate: 256000 + dof: 0 + framesize: 10 + +################################################ +### Post-processing +################################################ +postprocessing: + fmt: "BINAURAL" + fs: 48000 + loudness: -26 diff --git a/experiments/selection_isar/BS1534-1a/proc_input_a/.gitkeep b/experiments/selection_isar/BS1534-1a/proc_input_a/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/experiments/selection_isar/BS1534-1a/proc_output_a/.gitkeep b/experiments/selection_isar/BS1534-1a/proc_output_a/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/experiments/selection_isar/BS1534-2a/.gitkeep b/experiments/selection_isar/BS1534-2a/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/experiments/selection_isar/BS1534-2a/config/.gitkeep b/experiments/selection_isar/BS1534-2a/config/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/experiments/selection_isar/BS1534-2a/config/BS1534-2a.yml b/experiments/selection_isar/BS1534-2a/config/BS1534-2a.yml new file mode 100644 index 0000000000000000000000000000000000000000..0073495830bde34ed5fa428023c6f02bb6e6b36c --- /dev/null +++ b/experiments/selection_isar/BS1534-2a/config/BS1534-2a.yml @@ -0,0 +1,91 @@ +--- +################################################ +# General configuration +################################################ + +name: BS1534-2a +master_seed: 25 +prerun_seed: 2 + +input_path: "experiments/selection/BS1534-2a/proc_input" +output_path: "experiments/selection/BS1534-2a/proc_output" +use_windows_codec_binaries: true +condition_in_output_filename: true + +################################################ +### Input configuration +################################################ +input: + fmt: "7_1_4" + fs: 48000 + aligned_to: + len: 20 + +################################################ +### Pre-processing on individual items +################################################ +preprocessing: + mask: "20KBP" + loudness: -26 + window: 100 + +################################################ +### Pre-processing on whole signal(s) +################################################ +preprocessing_2: + concatenate_input: false + preamble_noise: false + repeat_signal: true + +################################################ +### Configuration for conditions under test +################################################ +conditions_to_generate: + ### IVAS conditions ############################### + c01: + type: ivas + bitrates: + - 512000 + cod: + dec: + fmt: "BINAURAL" + c02: + type: ivas + bitrates: + - 512000 + cod: + dec: + fmt: "BINAURAL" + out_fc: 7000 + c03: + type: ivas_transcoding + bitrates: + - 512000 + cod: + dec: + fmt: "BINAURAL" + trans_bitrate: 256000 + trans_cod: + fmt: "STEREO" + trans_dec: + fmt: "STEREO" + c04: + type: ivas_split_rend_full + bitrates: + - 512000 + cod: + dec: + fmt: "BINAURAL_SPLIT_CODED" + split_rend: + fmt: "BINAURAL" + bitrate: 256000 + dof: 0 + framesize: 10 + +################################################ +### Post-processing +################################################ +postprocessing: + fmt: "BINAURAL" + fs: 48000 + loudness: -26 diff --git a/experiments/selection_isar/BS1534-2a/proc_input_a/.gitkeep b/experiments/selection_isar/BS1534-2a/proc_input_a/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/experiments/selection_isar/BS1534-2a/proc_output_a/.gitkeep b/experiments/selection_isar/BS1534-2a/proc_output_a/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/experiments/selection_isar/BS1534-3a/.gitkeep b/experiments/selection_isar/BS1534-3a/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/experiments/selection_isar/BS1534-3a/config/.gitkeep b/experiments/selection_isar/BS1534-3a/config/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/experiments/selection_isar/BS1534-3a/config/BS1534-3a.yml b/experiments/selection_isar/BS1534-3a/config/BS1534-3a.yml new file mode 100644 index 0000000000000000000000000000000000000000..bbb1ceabd53523ba72ffe1c0149c278c1a608952 --- /dev/null +++ b/experiments/selection_isar/BS1534-3a/config/BS1534-3a.yml @@ -0,0 +1,91 @@ +--- +################################################ +# General configuration +################################################ + +name: BS1534-3a +master_seed: 25 +prerun_seed: 2 + +input_path: "experiments/selection/BS1534-3a/proc_input" +output_path: "experiments/selection/BS1534-3a/proc_output" +use_windows_codec_binaries: true +condition_in_output_filename: true + +################################################ +### Input configuration +################################################ +input: + fmt: "ISM4" + fs: 48000 + aligned_to: + len: 20 + +################################################ +### Pre-processing on individual items +################################################ +preprocessing: + mask: "20KBP" + loudness: -26 + window: 100 + +################################################ +### Pre-processing on whole signal(s) +################################################ +preprocessing_2: + concatenate_input: false + preamble_noise: false + repeat_signal: true + +################################################ +### Configuration for conditions under test +################################################ +conditions_to_generate: + ### IVAS conditions ############################### + c01: + type: ivas + bitrates: + - 512000 + cod: + dec: + fmt: "BINAURAL" + c02: + type: ivas + bitrates: + - 512000 + cod: + dec: + fmt: "BINAURAL" + out_fc: 7000 + c03: + type: ivas_transcoding + bitrates: + - 512000 + cod: + dec: + fmt: "BINAURAL" + trans_bitrate: 256000 + trans_cod: + fmt: "STEREO" + trans_dec: + fmt: "STEREO" + c04: + type: ivas_split_rend_full + bitrates: + - 512000 + cod: + dec: + fmt: "BINAURAL_SPLIT_CODED" + split_rend: + fmt: "BINAURAL" + bitrate: 256000 + dof: 0 + framesize: 10 + +################################################ +### Post-processing +################################################ +postprocessing: + fmt: "BINAURAL" + fs: 48000 + loudness: -26 diff --git a/experiments/selection_isar/BS1534-3a/proc_input_a/.gitkeep b/experiments/selection_isar/BS1534-3a/proc_input_a/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/experiments/selection_isar/BS1534-3a/proc_output_a/.gitkeep b/experiments/selection_isar/BS1534-3a/proc_output_a/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/experiments/selection_isar/BS1534-4a/.gitkeep b/experiments/selection_isar/BS1534-4a/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/experiments/selection_isar/BS1534-4a/config/.gitkeep b/experiments/selection_isar/BS1534-4a/config/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/experiments/selection_isar/BS1534-4a/config/BS1534-4a.yml b/experiments/selection_isar/BS1534-4a/config/BS1534-4a.yml new file mode 100644 index 0000000000000000000000000000000000000000..e408cb57d4c20f6efa0f127b7f430fb2c1b62e13 --- /dev/null +++ b/experiments/selection_isar/BS1534-4a/config/BS1534-4a.yml @@ -0,0 +1,95 @@ +--- +################################################ +# General configuration +################################################ + +name: BS1534-4a +master_seed: 25 +prerun_seed: 2 + +input_path: "experiments/selection/BS1534-4a/proc_input" +output_path: "experiments/selection/BS1534-4a/proc_output" +use_windows_codec_binaries: true +condition_in_output_filename: true + +################################################ +### Input configuration +################################################ +input: + fmt: "HOA2" + fs: 48000 + aligned_to: + len: 20 + +################################################ +### Pre-processing on individual items +################################################ +preprocessing: + mask: "20KBP" + loudness: -26 + window: 100 + +################################################ +### Pre-processing on whole signal(s) +################################################ +preprocessing_2: + concatenate_input: false + preamble_noise: false + repeat_signal: true + +################################################ +### Configuration for conditions under test +################################################ +conditions_to_generate: + ### IVAS conditions ############################### + c01: + type: ivas + bitrates: + - 512000 + cod: + fmt: "MASA2DIR2" + dec: + fmt: "BINAURAL" + c02: + type: ivas + bitrates: + - 512000 + cod: + fmt: "MASA2DIR2" + dec: + fmt: "BINAURAL" + out_fc: 7000 + c03: + type: ivas_transcoding + bitrates: + - 512000 + cod: + fmt: "MASA2DIR2" + dec: + fmt: "BINAURAL" + trans_bitrate: 256000 + trans_cod: + fmt: "STEREO" + trans_dec: + fmt: "STEREO" + c04: + type: ivas_split_rend_full + bitrates: + - 512000 + cod: + fmt: "MASA2DIR2" + dec: + fmt: "BINAURAL_SPLIT_CODED" + split_rend: + fmt: "BINAURAL" + bitrate: 256000 + dof: 0 + framesize: 10 + +################################################ +### Post-processing +################################################ +postprocessing: + fmt: "BINAURAL" + fs: 48000 + loudness: -26 diff --git a/experiments/selection_isar/BS1534-4a/proc_input_a/.gitkeep b/experiments/selection_isar/BS1534-4a/proc_input_a/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/experiments/selection_isar/BS1534-4a/proc_output_a/.gitkeep b/experiments/selection_isar/BS1534-4a/proc_output_a/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/experiments/selection_isar/README.md b/experiments/selection_isar/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6224ade3e8b004cc6d3f0fa64602ca8c02221618 --- /dev/null +++ b/experiments/selection_isar/README.md @@ -0,0 +1,86 @@ + + +# IVAS ISAR Selection Experiments + +This directory contains input and configuration files for the ISAR selection tests based on [S4-240396](https://www.3gpp.org/ftp/TSG_SA/WG4_CODEC/TSGS4_127_Sophia-Antipolis/Docs/S4-240396.zip). + +All tests use the BS.1534 (MUSHRA) test methodology. + +## Experiments + +| Experiment | Input format | +| :--------: | :----------: | +| BS1534-1a | HOA3 | +| BS1534-2a | MASA | +| BS1534-3a | 7_1_4 | +| BS1534-4a | ISM4 | + +Note: BS1534-2a requires FOA input files to generate MASA2DIR1. To use MASA2DIR2 (two directions) HOA2 must be used. + +## Setup instructions + +1. All required binaries should be available in $PATH or in the [bin directory](../../ivas_processing_scripts/bin/). + - Ensure split rendering is enabled for the relevant binaries. + - To use a specific binary for an IVAS condition, the key `bin` should be added under either the parent keys `cod`, `dec` or `split_rend` to point to `IVAS_cod`, `IVAS_dec` or `IVAS_rend` respectively. +2. Input files should be placed in the respective directories according to format: + - For head-tracking trajectories, the files must be named `item_name.wav.pre.csv` and `item_name.wav.post.csv` for split-prerendering and post-rendering trajectories respectively. The prerendering trajectory is only used for split rendering. The post rendering trajectory is used as the trajectory for the final stage, regardless of split rendering. + - For ISM metadata, files must be named `item_name.wav.0.csv` - `item_name.wav.3.csv` as for the regular IVAS selection tests. +3. Execute `python generate_test_isar.py BS1534-1a,a` where the input argument is the relevant experiment-lab pair. Currently only one lab per experiment is configured. + +## Preprocessing for trajectory nullification + +For setting up a test with trajectory nullification, the input items must be preprocessed according to the specific trajectory. + +This preprocessing consists of two main steps. First the trajectory must be inverted to apply the inverse rotation to the source audio. + +This can be done with the help of the [trajectories submodule](../../ivas_processing_scripts/trajectories/README.md) by running the command: + +```sh +python -m ivas_processing_scripts.trajectories --invert in.csv out_inverted.csv +``` + +Then the input item needs to be rotated in place using the inverted trajectory, which can be performed using the `--in_trajectory` argument of the [audiotools module](../../examples/audiotools.ipynb): + +```sh +python -m ivas_processing_scripts.audiotools -i input.wav -if IN_FORMAT -of IN_FORMAT -it out_inverted.csv -o out.wav +``` + +Inversion may be performed directly for SBA and MC formats. + +For ISM format it is only possible to perform this on a 20ms basis due to metadata granularity. + +For MASA format, the masaAnalyzer must be run on an inverted SBA input to generate the corresponding MASA transport channels and metadata. + +Refer to [_Tdoc S4-240254: Trajectory Nullification for Binaural Renderer Evaluation, Fraunhofer IIS_](https://www.3gpp.org/ftp/TSG_SA/WG4_CODEC/TSGS4_127_Sophia-Antipolis/Docs/S4-240254.zip) for further details. + +After generation of the "inverted" item, this may be directly used as input for the processing scripts. diff --git a/generate_test.py b/generate_test.py index a3b9b4a6aeb11148ec2b6b75b3465e97245f84f0..c5dff6df3d625744f47be7cf7435026ad07ce0d3 100755 --- a/generate_test.py +++ b/generate_test.py @@ -162,7 +162,7 @@ def create_experiment_setup(experiment, lab) -> list[Path]: # bg noise SNR only differs from default config for some experiments if experiment in ["P800-5", "P800-9"] and cat_num >= 3: bg_noise_pre_proc_2["snr"] = 15 - if cfg.preprocessing_2.get("concatenate_input", None) is not None: + if cfg.preprocessing_2.get("concatenate_input", None): cfg.preprocessing_2["concatenation_order"] = concatenation_order( lab, experiment, cat_num ) diff --git a/generate_test_isar.py b/generate_test_isar.py new file mode 100755 index 0000000000000000000000000000000000000000..d0a90c3bac7ba73604aebb3239e4843802fd588c --- /dev/null +++ b/generate_test_isar.py @@ -0,0 +1,161 @@ +#! /usr/bin/env python3 + +# +# (C) 2022-2024 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository. All Rights Reserved. +# +# This software is protected by copyright law and by international treaties. +# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository retain full ownership rights in their respective contributions in +# the software. This notice grants no license of any kind, including but not limited to patent +# license, nor is any license granted by implication, estoppel or otherwise. +# +# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making +# contributions. +# +# This software is provided "AS IS", without any express or implied warranties. The software is in the +# development stage. It is intended exclusively for experts who have experience with such software and +# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability +# and fitness for a particular purpose are hereby disclaimed and excluded. +# +# Any dispute, controversy or claim arising under or in relation to providing this software shall be +# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in +# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and +# the United Nations Convention on Contracts on the International Sales of Goods. +# + +import argparse +from pathlib import Path + +from ivas_processing_scripts import config +from ivas_processing_scripts import main as generate_test + +HERE = Path(__file__).parent.absolute() +LAB_IDS = ["a", "b"] +EXPERIMENTS_BS1534 = [f"BS1534-{i}{x}" for i in range(1, 5) for x in LAB_IDS] + + +def generate_tests(exp_lab_pairs, create_cfg_only=False): + """ + Create configs and run them for all given experiment/lab pairs + """ + # get config paths for all given experiment/lab combis + cfgs = [create_experiment_setup(exp, lab) for exp, lab in exp_lab_pairs] + # flatten into single list + cfgs = [c for cl in cfgs for c in cl] + + if create_cfg_only: + print("Configs generated:") + print("\n".join(["- " + cfg.name for cfg in cfgs])) + return + + for cfg in cfgs: + generate_test(Arguments(str(cfg))) + + +class Arguments: + def __init__(self, config): + self.config = config + self.debug = False + + +def create_experiment_setup(experiment, lab) -> list[Path]: + """ + Create the config files for all categories for the given experiment and lab id. + """ + default_cfg_path = HERE.joinpath( + f"experiments/selection_isar/{experiment}/config/{experiment}.yml" + ) + + num_categories = 1 + categories = [f"cat{i}" for i in range(1, num_categories + 1)] + + # calculate the seed value according to processing plan + experiments = EXPERIMENTS_BS1534 + seed = 101 + experiments.index(experiment) * 4 + LAB_IDS.index(lab) + + base_path = HERE.joinpath(f"experiments/selection_isar/{experiment}") + + cfgs = list() + for cat in categories: + suffix = cat + f"-lab_{lab}" + + input_path = base_path.joinpath(f"proc_input_{lab}") + output_path = base_path.joinpath(f"proc_output_{lab}") + + cfg_path = default_cfg_path.parent.joinpath(f"{experiment}-lab_{lab}.yml") + + cfgs.append(cfg_path) + + # set new lab- and category-dependent values + cfg = config.TestConfig(default_cfg_path) + cfg.name = f"{experiment}{suffix}" + cfg.prerun_seed = seed + cfg.input_path = str(input_path) + cfg.output_path = str(output_path) + + cat_num = int(cat[-1]) + + if cfg.preprocessing_2.get("concatenate_input", None): + cfg.preprocessing_2["concatenation_order"] = concatenation_order( + lab, experiment, cat_num + ) + + # ensure that necessary directories are there + input_path.mkdir(parents=True, exist_ok=True) + output_path.mkdir(parents=True, exist_ok=True) + + # write out config + cfg.to_file(cfg_path) + + # Return the list of configs that were generated. Not strictly necessary, but makes testing easier. + return cfgs + + +def exp_lab_pair(arg): + """ + Validation function for command line input + """ + exp, lab = arg.split(",") + + msg = "'{}' is not a valid {}. Possible values are: {}" + if exp not in EXPERIMENTS_BS1534: + experiments_msg = ",".join(EXPERIMENTS_BS1534) + err_msg = msg.format(exp, "experiment name", f"{{{experiments_msg}}}.") + raise ValueError(err_msg) + if lab not in LAB_IDS: + labs_msg = ",".join(LAB_IDS) + err_msg = msg.format(lab, "lab identifier", labs_msg) + raise ValueError(err_msg) + + return exp, lab + + +def concatenation_order(lab_id, experiment, category): + exp_id = f"p0{experiment[-1]}" + return [f"{lab_id}{exp_id}a{category}s0{i}.wav" for i in range(1, 8)] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Generate config files and process files for selecton experiments. Experiment names and lab ids must be given as comma-separated pairs (e.g. 'P800-5,b BS1534-4a,d ...')" + ) + parser.add_argument( + "exp_lab_pairs", + type=exp_lab_pair, + nargs="+", + help="The combinations of experiment/lab-id that you want to generate, separated by whitespace. Experiment and lab id need to be separated by a comma.", + ) + parser.add_argument( + "--create_cfg_only", + action="store_true", + help="If given, only create the configs and folder structure without processing items", + ) + args = parser.parse_args() + generate_tests(args.exp_lab_pairs, args.create_cfg_only) diff --git a/ivas_processing_scripts/__init__.py b/ivas_processing_scripts/__init__.py index 093504f54a8d8bfd0c2c4dc079618c9a7dc727e7..3d82c7307004f99089f81c2901c532702661f71d 100755 --- a/ivas_processing_scripts/__init__.py +++ b/ivas_processing_scripts/__init__.py @@ -36,7 +36,10 @@ from itertools import product from multiprocessing import Pool from time import sleep -from ivas_processing_scripts.audiotools.metadata import check_ISM_metadata, check_MASA_metadata +from ivas_processing_scripts.audiotools.metadata import ( + check_ISM_metadata, + check_MASA_metadata, +) from ivas_processing_scripts.constants import ( LOGGER_DATEFMT, LOGGER_FORMAT, @@ -50,6 +53,7 @@ from ivas_processing_scripts.processing.processing import ( process_item, reorder_items_list, ) +from ivas_processing_scripts.trajectories.trajectories import check_trajectories from ivas_processing_scripts.utils import ( DirManager, apply_func_parallel, @@ -92,6 +96,8 @@ def main(args): if hasattr(args, "multiprocessing"): cfg.multiprocessing = args.multiprocessing + traj_input_path = cfg.input_path + # set up processing chains chains.init_processing_chains(cfg) @@ -149,11 +155,19 @@ def main(args): for i, meta in enumerate(metadata): meta.extend(metadata_MASA[i]) - if not cfg.input["fmt"].startswith("ISM") and not "MASA" in cfg.input["fmt"]: + if not (cfg.input["fmt"].startswith("ISM") or "MASA" in cfg.input["fmt"]): metadata = [None] * len(cfg.items_list) cfg.metadata_path = metadata + # check for head tracking trajectories in input path (items might be copied to 20ms aligned folder) + trajectories = check_trajectories(cfg.items_list, traj_input_path) + if trajectories: + cfg.trajectories = trajectories + # print info about found and used trajectories + for i, t in zip(cfg.trajectories, cfg.items_list): + logger.debug(f" Head tracking trajectory pair {i}: {t}") + # run preprocessing only once if hasattr(cfg, "preprocessing"): # save process info for background noise @@ -179,12 +193,20 @@ def main(args): logger.info(" Generating remaining conditions with postprocessing") item_args = list() - for (chain, tmp_dir, out_dir), (item, metadata) in product( + for (chain, tmp_dir, out_dir), (item, metadata, trajectories) in product( zip(cfg.proc_chains, cfg.tmp_dirs, cfg.out_dirs), - zip(cfg.items_list, cfg.metadata_path), + zip(cfg.items_list, cfg.metadata_path, cfg.trajectories), ): item_args.append( - (item, tmp_dir, out_dir, chain["processes"], logger, metadata) + ( + item, + tmp_dir, + out_dir, + chain["processes"], + logger, + metadata, + trajectories, + ) ) if cfg.multiprocessing: diff --git a/ivas_processing_scripts/audiotools/__init__.py b/ivas_processing_scripts/audiotools/__init__.py index 7cb4f06c59a106e20cde38d256cd6af8906b96a9..a3d1ed1d9d87831d302cf69dc617811f1b79445d 100755 --- a/ivas_processing_scripts/audiotools/__init__.py +++ b/ivas_processing_scripts/audiotools/__init__.py @@ -96,13 +96,20 @@ def add_processing_args(group, input=True): default=None, ) group.add_argument( - f"-{ps}t", + f"-{ps}x", f"--{p}_trim", type=float, nargs=2, metavar=("PRE_TRIM", "POST_TRIM"), help="Pre-/post-trim the signal by this amount in milliseconds (negative values pad silence), (default = %(default)s)", ) + group.add_argument( + f"-{ps}t", + f"--{p}_trajectory", + type=str, + help="Head-tracking trajectory file for input pre-rotation or binaural output (default = %(default)s)", + default=None, + ) group.add_argument( f"-{ps}pn", f"--{p}_pad_noise", @@ -166,13 +173,6 @@ def get_args(): help="Apply limiting to output (default = %(default)s)", action="store_true", ) - output_parser.add_argument( - "-t", - "--trajectory", - type=str, - help="Head-tracking trajectory file for binaural output (default = %(default)s)", - default=None, - ) output_parser.add_argument( "-bd", "--bin_dataset", diff --git a/ivas_processing_scripts/audiotools/audio.py b/ivas_processing_scripts/audiotools/audio.py index 213f5c2b67b309f30e32645f2bddc8bce1937ffe..4733136694cf1344d4e199ee6e3fcd27387c588b 100755 --- a/ivas_processing_scripts/audiotools/audio.py +++ b/ivas_processing_scripts/audiotools/audio.py @@ -47,9 +47,9 @@ from ivas_processing_scripts.audiotools.constants import ( METADATA_ASSISTED_SPATIAL_AUDIO_FORMATS, NUMBER_COLUMNS_ISM_METADATA, OBJECT_BASED_AUDIO_FORMATS, - SCENE_BASED_AUDIO_FORMATS, OMASA_AUDIO_FORMATS, OSBA_AUDIO_FORMATS, + SCENE_BASED_AUDIO_FORMATS, ) from .EFAP import wrap_angles @@ -63,7 +63,6 @@ class Audio(ABC): self.audio = None self.fs = None self.num_channels = None - # self.logger = None # TODO needed? def __repr__(self): return f"{self.__class__} : {self.__dict__}" @@ -231,7 +230,9 @@ class MetadataAssistedSpatialAudio(Audio): obj = super()._from_file(name, filename, fs) if isinstance(metadata_file, list): if len(metadata_file) > 1: - warn("Only first metadata file used. Additional metadata ignored for MASA") + warn( + "Only first metadata file used. Additional metadata ignored for MASA" + ) obj.metadata_file = Path(metadata_file[0]) else: obj.metadata_file = Path(metadata_file) @@ -391,6 +392,7 @@ class SceneBasedAudio(Audio): class OMASAAudio(Audio): """Sub-class for combined OMASA format""" + def __init__(self, name: str): super().__init__(name) try: @@ -443,7 +445,7 @@ class OMASAAudio(Audio): def init_metadata(self): # check if number of metadata files matches format - if self.num_ism_channels != len(self.metadata_files)-1: + if self.num_ism_channels != len(self.metadata_files) - 1: raise ValueError( f"Mismatch between number of ism channels [{self.num_ism_channels}], and metadata [{len(self.metadata_files)}]. Note: metadata should also include masa metadata file" ) @@ -503,7 +505,7 @@ class OSBAAudio(Audio): raise ValueError(f"Unsupported OSBA audio format {name}") self.object_pos = [] self.metadata_files = [] - self.ambi_order = int(np.sqrt(self.num_channels-self.num_ism_channels) - 1) + self.ambi_order = int(np.sqrt(self.num_channels - self.num_ism_channels) - 1) @classmethod def _from_file( @@ -604,7 +606,8 @@ def _get_audio_class(fmt) -> Audio: elif fmt in SCENE_BASED_AUDIO_FORMATS.keys(): return SceneBasedAudio elif ( - fmt in CHANNEL_BASED_AUDIO_FORMATS.keys() or fmt in CHANNEL_BASED_AUDIO_ALTNAMES.keys() + fmt in CHANNEL_BASED_AUDIO_FORMATS.keys() + or fmt in CHANNEL_BASED_AUDIO_ALTNAMES.keys() ): return ChannelBasedAudio elif fmt in OSBA_AUDIO_FORMATS.keys(): @@ -643,7 +646,12 @@ def fromfile( """Create an Audio object of the specified format from the given file""" filename = Path(filename) fmt_cls = _get_audio_class(fmt) - if fmt_cls is ObjectBasedAudio or fmt_cls is MetadataAssistedSpatialAudio or fmt_cls is OMASAAudio or fmt_cls is OSBAAudio: + if ( + fmt_cls is ObjectBasedAudio + or fmt_cls is MetadataAssistedSpatialAudio + or fmt_cls is OMASAAudio + or fmt_cls is OSBAAudio + ): return fmt_cls._from_file(fmt, filename, in_meta, fs) else: return fmt_cls._from_file(fmt, filename, fs) diff --git a/ivas_processing_scripts/audiotools/audiofile.py b/ivas_processing_scripts/audiotools/audiofile.py index a587187557c79774660a4630e8e0c1710323b757..fa3ef05d28b6dbd34a28f9a6cfe28f8d68805990 100755 --- a/ivas_processing_scripts/audiotools/audiofile.py +++ b/ivas_processing_scripts/audiotools/audiofile.py @@ -343,7 +343,13 @@ def combine( # set vertical channels to zero if is_planar: - y[:, VERT_HOA_CHANNELS_ACN[VERT_HOA_CHANNELS_ACN < (len(in_filenames) - is_planar_offset)] + is_planar_offset] = 0 + y[ + :, + VERT_HOA_CHANNELS_ACN[ + VERT_HOA_CHANNELS_ACN < (len(in_filenames) - is_planar_offset) + ] + + is_planar_offset, + ] = 0 write(out_file, y, fs=in_fs) @@ -394,7 +400,13 @@ def split_channels( x, in_fs = read(in_file, nchannels=in_nchans, fs=in_fs) if is_planar: - x[:, VERT_HOA_CHANNELS_ACN[VERT_HOA_CHANNELS_ACN < (in_nchans - is_planar_offset)] + is_planar_offset] = 0 + x[ + :, + VERT_HOA_CHANNELS_ACN[ + VERT_HOA_CHANNELS_ACN < (in_nchans - is_planar_offset) + ] + + is_planar_offset, + ] = 0 # Write output files for idx, out_file in enumerate(out_filenames): diff --git a/ivas_processing_scripts/audiotools/constants.py b/ivas_processing_scripts/audiotools/constants.py index 14fe6abd34a3a71fa3ee1f0d19f644adbcaeabc7..e1288da1a90ac2ac67b1c619ca0b474dd24b1bc0 100755 --- a/ivas_processing_scripts/audiotools/constants.py +++ b/ivas_processing_scripts/audiotools/constants.py @@ -45,6 +45,9 @@ BINAURAL_AUDIO_FORMATS = { "BINAURAL_ROOM_REVERB": { # for IVAS_dec and IVAS_rend "num_channels": 2, }, + "BINAURAL_SPLIT_CODED": { # for IVAS_dec and IVAS_rend + "num_channels": 2, + }, } BINAURAL_LFE_GAIN = 10 ** (5.5 / 20) diff --git a/ivas_processing_scripts/audiotools/convert/__init__.py b/ivas_processing_scripts/audiotools/convert/__init__.py index 46834e1c75d3c8e6eab42851480aeaa0c463e018..cb63cbeb6ac3f544240e0c34499a45c7d62b6ff2 100755 --- a/ivas_processing_scripts/audiotools/convert/__init__.py +++ b/ivas_processing_scripts/audiotools/convert/__init__.py @@ -31,21 +31,19 @@ # import logging +from copy import copy from pathlib import Path, PurePath from shutil import copyfile from typing import Optional, Union -from copy import copy - -from numpy import empty from ivas_processing_scripts.audiotools import audio, audioarray, metadata from ivas_processing_scripts.audiotools.audiofile import write from ivas_processing_scripts.audiotools.convert.channelbased import convert_channelbased from ivas_processing_scripts.audiotools.convert.masa import convert_masa from ivas_processing_scripts.audiotools.convert.objectbased import convert_objectbased -from ivas_processing_scripts.audiotools.convert.scenebased import convert_scenebased -from ivas_processing_scripts.audiotools.convert.osba import convert_osba from ivas_processing_scripts.audiotools.convert.omasa import convert_omasa +from ivas_processing_scripts.audiotools.convert.osba import convert_osba +from ivas_processing_scripts.audiotools.convert.scenebased import convert_scenebased from ivas_processing_scripts.audiotools.wrappers.bs1770 import loudness_norm from ivas_processing_scripts.audiotools.wrappers.esdru import esdru, spatial_distortion from ivas_processing_scripts.audiotools.wrappers.filter import ( @@ -161,7 +159,9 @@ def convert_file( # write output audio write(out_file, output.audio, output.fs) # write metadata - if isinstance(output, audio.ObjectBasedAudio) or isinstance(output, audio.OSBAAudio): + if isinstance(output, audio.ObjectBasedAudio) or isinstance( + output, audio.OSBAAudio + ): write_ISM_metadata_in_file(output.object_pos, [out_file], automatic_naming=True) elif isinstance(output, audio.MetadataAssistedSpatialAudio) and in_fmt == out_fmt: # audio objects point to same MD file, create new one with default naming for output @@ -175,6 +175,7 @@ def convert_file( out_md_name = out_file.parent / (out_file.name + ".met") copyfile(output.metadata_files[-1], out_md_name) + def convert( input: audio.Audio, output: audio.Audio, @@ -263,7 +264,7 @@ def process_audio( spatial_distortion_frequency: Optional[float] = None, logger: Optional[logging.Logger] = None, ) -> None: - """Perform (pre-/pos-) processing of audio""" + """Perform (pre-/post-) processing of audio""" if fs is None: fs = x.fs @@ -365,8 +366,7 @@ def format_conversion( ): raise NotImplementedError("Can only convert to MASA from SBA") if isinstance(output, audio.OMASAAudio) and not ( - isinstance(input, audio.OSBAAudio) - or isinstance(input, audio.OMASAAudio) + isinstance(input, audio.OSBAAudio) or isinstance(input, audio.OMASAAudio) ): raise NotImplementedError("Can only convert to OMASA from OSBA") @@ -376,7 +376,9 @@ def format_conversion( "ISM is not supported as an output for rendering! Only usable as pass-through" ) if isinstance(output, audio.OMASAAudio) or isinstance(output, audio.OSBAAudio): - if not (isinstance(input, audio.OMASAAudio) or isinstance(input, audio.OSBAAudio)): + if not ( + isinstance(input, audio.OMASAAudio) or isinstance(input, audio.OSBAAudio) + ): raise NotImplementedError( "OMASA and OSBA only possible as output if input is OMASA or OSBA" ) @@ -386,9 +388,9 @@ def format_conversion( # format conversion # check if input and output format are the same - if (fmt := input.name) == output.name or ( - input.name.startswith("BINAURAL") and output.name.startswith("BINAURAL") - ): + if ( + ((fmt := input.name) == output.name) and kwargs.get("in_trajectory") is None + ) or (input.name.startswith("BINAURAL") and output.name.startswith("BINAURAL")): output.audio = input.audio if fmt.startswith("MASA"): output.metadata_file = input.metadata_file diff --git a/ivas_processing_scripts/audiotools/convert/channelbased.py b/ivas_processing_scripts/audiotools/convert/channelbased.py index dbb8b160ed46b26f3800d6c411402323f718e012..05c60abb0e0ec0d185da2b0fe2ccb66ca8283714 100755 --- a/ivas_processing_scripts/audiotools/convert/channelbased.py +++ b/ivas_processing_scripts/audiotools/convert/channelbased.py @@ -57,9 +57,15 @@ from ivas_processing_scripts.audiotools.wrappers.filter import resample_itu def convert_channelbased( cba: audio.ChannelBasedAudio, out: audio.Audio, + in_trajectory: Optional[Union[str, Path]] = None, **kwargs, ) -> audio.Audio: """Convert channel-based audio to the requested output format""" + + # pre-rotation if specified + if in_trajectory is not None: + cba.audio = rotate_cba(cba, in_trajectory) + # CBA -> Binaural if isinstance(out, audio.BinauralAudio): render_cba_to_binaural(cba, out, **kwargs) @@ -83,7 +89,7 @@ def convert_channelbased( def render_cba_to_binaural( cba: audio.ChannelBasedAudio, bin: audio.BinauralAudio, - trajectory: Optional[Union[str, Path]] = None, + out_trajectory: Optional[Union[str, Path]] = None, bin_dataset: Optional[str] = None, bin_lfe_gain: Optional[float] = None, **kwargs, @@ -97,7 +103,7 @@ def render_cba_to_binaural( Channel-based input audio bin: audio.BinauralAudio Binaural output audio - trajectory: Optional[Union[str, Path]] + out_trajectory: Optional[Union[str, Path]] Head rotation trajectory path bin_dataset: Optional[str] Name of binaural dataset wihtout prefix or suffix @@ -122,8 +128,8 @@ def render_cba_to_binaural( cba.fs = 48000 bin.fs = 48000 - if trajectory is not None: - cba.audio = rotate_cba(cba, trajectory) + if out_trajectory is not None: + cba.audio = rotate_cba(cba, out_trajectory) IR, _, latency_smp = load_ir(cba.name, bin.name, bin_dataset) @@ -151,7 +157,7 @@ def render_custom_ls_binaural( output: audio.BinauralAudio, IR: np.ndarray, SourcePosition: np.ndarray, - trajectory: str, + out_trajectory: str, ): # TODO rework impl. (with EFAP) # logger.info(" Processing channels on custom LS layout") @@ -192,7 +198,7 @@ def render_custom_ls_binaural( # ls_azi = np.repeat(ls_azi_all[i_ls], N_frames) # ls_ele = np.repeat(ls_ele_all[i_ls], N_frames) # - # azi, ele = rotateISM(ls_azi, ls_ele, trajectory=trajectory) + # azi, ele = rotateISM(ls_azi, ls_ele, trajectory=out_trajectory) # # y += binaural_fftconv_framewise( # custom_ls.audio[:, i_chan], @@ -205,7 +211,7 @@ def render_custom_ls_binaural( # i_ls += 1 # # return y - return + raise NotImplementedError("Custom LS rendering currently not implemented!") def render_cba_to_cba( diff --git a/ivas_processing_scripts/audiotools/convert/masa.py b/ivas_processing_scripts/audiotools/convert/masa.py index c33e49f9dee4b955e23dd05298129ff9223c955e..d3cb80bcbaa111f80167a8b47c97a2443bde24b5 100755 --- a/ivas_processing_scripts/audiotools/convert/masa.py +++ b/ivas_processing_scripts/audiotools/convert/masa.py @@ -71,7 +71,8 @@ def convert_masa( def render_masa_to_binaural( masa: audio.MetadataAssistedSpatialAudio, bin: audio.BinauralAudio, - trajectory: Optional[Union[str, Path]] = None, + in_trajectory: Optional[Union[str, Path]] = None, + out_trajectory: Optional[Union[str, Path]] = None, bin_dataset: Optional[str] = None, **kwargs, ) -> None: @@ -84,7 +85,9 @@ def render_masa_to_binaural( MASA input audio bin: audio.BinauralAudio Output binaural audio - trajectory: Optional[Union[str, Path]] + in_trajectory: Optional[Union[str, Path]] + Head rotation trajectory path + out_trajectory: Optional[Union[str, Path]] Head rotation trajectory path bin_dataset: Optional[str] Name of binaural dataset without prefix or suffix @@ -96,11 +99,11 @@ def render_masa_to_binaural( render_masa_to_cba(masa, cba_tmp) - channelbased.render_cba_to_binaural(cba_tmp, bin, trajectory) + channelbased.render_cba_to_binaural(cba_tmp, bin, out_trajectory) else: - if trajectory is not None: + if in_trajectory is not None or out_trajectory is not None: warn( - f"Head-rotation not supported by MasaRenderer! Trajectory {trajectory} will be ignored!" + f"Head-rotation not supported by MasaRenderer! Trajectory {in_trajectory or out_trajectory} will be ignored!" ) if bin_dataset is not None: warn( diff --git a/ivas_processing_scripts/audiotools/convert/objectbased.py b/ivas_processing_scripts/audiotools/convert/objectbased.py index 94c3f79285b2b8eac73d3cadce4643d79e2d4843..5ad911b5864a5c39d2c377e0bd32e409effa2442 100755 --- a/ivas_processing_scripts/audiotools/convert/objectbased.py +++ b/ivas_processing_scripts/audiotools/convert/objectbased.py @@ -60,10 +60,24 @@ from ivas_processing_scripts.utils import apply_func_parallel def convert_objectbased( oba: audio.ObjectBasedAudio, out: audio.Audio, + in_trajectory: Optional[Union[str, Path]] = None, **kwargs, ) -> audio.Audio: """Convert an ISM signal to the requested output format""" + # pre-rotation if specified - 20ms only! + if in_trajectory is not None: + # repeat each value four times since head rotation data is on sub-frame basis + azi = np.repeat(oba.obj_pos[:, 0], 4) + ele = np.repeat(oba.obj_pos[:, 1], 4) + + # apply head-rotation trajectory + azi, ele = rotate_oba(azi, ele, in_trajectory) + + # update object metadata + oba.obj_pos[:, 0] = azi[:, ::4] + oba.obj_pos[:, 1] = ele[:, ::4] + # OBA -> Binaural if isinstance(out, audio.BinauralAudio): render_oba_to_binaural(oba, out, **kwargs) @@ -86,7 +100,7 @@ def convert_objectbased( def render_oba_to_binaural( oba: audio.ObjectBasedAudio, bin: audio.BinauralAudio, - trajectory: Optional[Union[str, Path]] = None, + out_trajectory: Optional[Union[str, Path]] = None, bin_dataset: Optional[str] = None, **kwargs, ) -> None: @@ -99,21 +113,19 @@ def render_oba_to_binaural( Object based input audio bin: audio.BinauralAudio Binaural output audio - trajectory: Optional[Union[str, Path]] + out_trajectory: Optional[Union[str, Path]] Head rotation trajectory bin_dataset: Optional[str] Name of binaural dataset, if None default dataset is used """ - # bin.audio = np.zeros([oba.audio.shape[0], bin.num_channels]) - if "ROOM" in bin.name: cba_tmp = audio.fromtype("7_1_4") cba_tmp.fs = oba.fs render_oba_to_cba(oba, cba_tmp) - render_cba_to_binaural(cba_tmp, bin, trajectory) + render_cba_to_binaural(cba_tmp, bin, out_trajectory) else: IR, SourcePosition, latency_smp = load_ir(oba.name, bin.name, bin_dataset) @@ -130,7 +142,7 @@ def render_oba_to_binaural( obj_idx, obj_pos, repeat(oba), - repeat(trajectory), + repeat(out_trajectory), repeat(IR), repeat(SourcePosition), ), diff --git a/ivas_processing_scripts/audiotools/convert/omasa.py b/ivas_processing_scripts/audiotools/convert/omasa.py index 27dcdd50805b8310cf8e4dacf9ec5655cc367800..68c66cbad16d4b8867f68f4dbfa46b3f1bfc535b 100644 --- a/ivas_processing_scripts/audiotools/convert/omasa.py +++ b/ivas_processing_scripts/audiotools/convert/omasa.py @@ -30,12 +30,20 @@ # from copy import copy, deepcopy + import numpy as np from ivas_processing_scripts.audiotools import audio -from ivas_processing_scripts.audiotools.convert.objectbased import render_oba_to_binaural, render_oba_to_cba, \ - render_oba_to_sba -from ivas_processing_scripts.audiotools.convert.masa import render_masa_to_binaural, render_masa_to_cba, render_masa_to_sba +from ivas_processing_scripts.audiotools.convert.masa import ( + render_masa_to_binaural, + render_masa_to_cba, + render_masa_to_sba, +) +from ivas_processing_scripts.audiotools.convert.objectbased import ( + render_oba_to_binaural, + render_oba_to_cba, + render_oba_to_sba, +) """ OMASAAudio functions """ @@ -48,10 +56,21 @@ def convert_omasa( """Convert an OMASA signal to the requested output format""" # split OMASA object in ISM and MASA object - oba = audio.fromarray("ISM" + str(omasa.num_ism_channels), omasa.audio[:, :omasa.num_ism_channels], omasa.fs) + oba = audio.fromarray( + "ISM" + str(omasa.num_ism_channels), + omasa.audio[:, : omasa.num_ism_channels], + omasa.fs, + ) oba.metadata_files = copy(omasa.metadata_files) oba.object_pos = copy(omasa.object_pos) - masa = audio.fromarray("MASA" + str(omasa.num_channels-omasa.num_ism_channels) + "DIR" + str(omasa.dirs), omasa.audio[:, omasa.num_ism_channels:], omasa.fs) + masa = audio.fromarray( + "MASA" + + str(omasa.num_channels - omasa.num_ism_channels) + + "DIR" + + str(omasa.dirs), + omasa.audio[:, omasa.num_ism_channels :], + omasa.fs, + ) masa.metadata_file = omasa.metadata_files[-1] # OMASA -> Binaural @@ -100,13 +119,17 @@ def convert_omasa( elif isinstance(out, audio.OSBAAudio): # check if ism object number is the same if out.num_ism_channels != omasa.num_ism_channels: - raise ValueError("OMASA to OSBA conversion only possible if number of ISM objects matches") + raise ValueError( + "OMASA to OSBA conversion only possible if number of ISM objects matches" + ) # only render MASA part out_masa = deepcopy(out) render_masa_to_sba(masa, out_masa) - out.audio = np.concatenate((omasa.audio[:, :omasa.num_ism_channels], out_masa.audio), axis=1) + out.audio = np.concatenate( + (omasa.audio[:, : omasa.num_ism_channels], out_masa.audio), axis=1 + ) else: raise NotImplementedError( diff --git a/ivas_processing_scripts/audiotools/convert/osba.py b/ivas_processing_scripts/audiotools/convert/osba.py index 1e91fdf09ea158d2853d7e6d39ae003583ba3878..5301bf0f0f74e62d064e9d6a5d00fd15b427ce86 100644 --- a/ivas_processing_scripts/audiotools/convert/osba.py +++ b/ivas_processing_scripts/audiotools/convert/osba.py @@ -30,13 +30,21 @@ # from copy import copy, deepcopy + import numpy as np from ivas_processing_scripts.audiotools import audio -from ivas_processing_scripts.audiotools.convert.objectbased import render_oba_to_binaural, render_oba_to_cba, \ - render_oba_to_sba -from ivas_processing_scripts.audiotools.convert.scenebased import render_sba_to_binaural, render_sba_to_cba, \ - render_sba_to_sba, render_sba_to_masa +from ivas_processing_scripts.audiotools.convert.objectbased import ( + render_oba_to_binaural, + render_oba_to_cba, + render_oba_to_sba, +) +from ivas_processing_scripts.audiotools.convert.scenebased import ( + render_sba_to_binaural, + render_sba_to_cba, + render_sba_to_masa, + render_sba_to_sba, +) """ OSBAAudio functions """ @@ -49,10 +57,16 @@ def convert_osba( """Convert an OSBA signal to the requested output format""" # split OSBA object in ISM and SBA object - oba = audio.fromarray("ISM" + str(osba.num_ism_channels), osba.audio[:, :osba.num_ism_channels], osba.fs) + oba = audio.fromarray( + "ISM" + str(osba.num_ism_channels), + osba.audio[:, : osba.num_ism_channels], + osba.fs, + ) oba.metadata_files = copy(osba.metadata_files) oba.object_pos = copy(osba.object_pos) - sba = audio.fromarray("SBA" + str(osba.ambi_order), osba.audio[:, osba.num_ism_channels:], osba.fs) + sba = audio.fromarray( + "SBA" + str(osba.ambi_order), osba.audio[:, osba.num_ism_channels :], osba.fs + ) # OSBA -> Binaural if isinstance(out, audio.BinauralAudio): @@ -100,26 +114,34 @@ def convert_osba( elif isinstance(out, audio.OMASAAudio): # check if ism object number is the same if out.num_ism_channels != osba.num_ism_channels: - raise ValueError("OSBA to OMASA conversion only possible if number of ISM objects matches") + raise ValueError( + "OSBA to OMASA conversion only possible if number of ISM objects matches" + ) # only render SBA part out_sba = audio.fromtype(out.name[4:]) out_sba.metadata_file = out.metadata_files[-1] render_sba_to_masa(sba, out_sba) - out.audio = np.concatenate((osba.audio[:, :osba.num_ism_channels], out_sba.audio), axis=1) + out.audio = np.concatenate( + (osba.audio[:, : osba.num_ism_channels], out_sba.audio), axis=1 + ) # OSBA -> OSBA elif isinstance(out, audio.OSBAAudio): # check if ism object number is the same if out.num_ism_channels != osba.num_ism_channels: - raise ValueError("OSBA to OSBA conversion only possible if number of ISM objects matches") + raise ValueError( + "OSBA to OSBA conversion only possible if number of ISM objects matches" + ) # only render SBA part out_sba = audio.fromtype(out.name[4:]) render_sba_to_sba(sba, out_sba) - out.audio = np.concatenate((osba.audio[:, :osba.num_ism_channels], out_sba.audio), axis=1) + out.audio = np.concatenate( + (osba.audio[:, : osba.num_ism_channels], out_sba.audio), axis=1 + ) else: raise NotImplementedError( diff --git a/ivas_processing_scripts/audiotools/convert/scenebased.py b/ivas_processing_scripts/audiotools/convert/scenebased.py index 1239d674cb0bb71c06450c173a7af4415df8ebc1..5a7e4b48c44c6b9148a70c2a7e202747071bfa01 100755 --- a/ivas_processing_scripts/audiotools/convert/scenebased.py +++ b/ivas_processing_scripts/audiotools/convert/scenebased.py @@ -61,10 +61,15 @@ from ivas_processing_scripts.audiotools.wrappers.masaAnalyzer import masaAnalyze def convert_scenebased( sba: audio.SceneBasedAudio, out: audio.Audio, + in_trajectory: Optional[Union[str, Path]] = None, **kwargs, ) -> audio.Audio: """Convert scene-based audio to the requested output format""" + # pre-rotation if specified + if in_trajectory is not None: + sba.audio = rotate_sba(sba, in_trajectory) + # SBA -> Binaural if isinstance(out, audio.BinauralAudio): render_sba_to_binaural(sba, out, **kwargs) @@ -95,7 +100,7 @@ def convert_scenebased( def render_sba_to_binaural( sba: audio.SceneBasedAudio, bin: audio.BinauralAudio, - trajectory: Optional[Union[str, Path]] = None, + out_trajectory: Optional[Union[str, Path]] = None, bin_dataset: Optional[str] = None, **kwargs, ) -> None: @@ -108,14 +113,14 @@ def render_sba_to_binaural( Input SBA audio bin: audio.BinauralAudio Output binaural audio - trajectory: Optional[Union[str, Path]] + out_trajectory: Optional[Union[str, Path]] Head rotation trajectory path bin_dataset: Optional[str] Name of binaural dataset without prefix or suffix """ - if trajectory is not None: - sba.audio = rotate_sba(sba, trajectory) + if out_trajectory is not None: + sba.audio = rotate_sba(sba, out_trajectory) if "ROOM" in bin.name: cba_tmp = audio.fromtype("7_1_4") @@ -123,7 +128,7 @@ def render_sba_to_binaural( render_sba_to_cba(sba, cba_tmp) - channelbased.render_cba_to_binaural(cba_tmp, bin, trajectory) + channelbased.render_cba_to_binaural(cba_tmp, bin, out_trajectory) else: IR, _, latency_smp = load_ir(sba.name, bin.name, bin_dataset) @@ -196,7 +201,6 @@ def render_sba_to_masa( sba_in: audio.SceneBasedAudio, masa_out: audio.MetadataAssistedSpatialAudio, ) -> None: - num_tcs = masa_out.num_channels md_out_path = masa_out.metadata_file diff --git a/ivas_processing_scripts/audiotools/metadata.py b/ivas_processing_scripts/audiotools/metadata.py index c5a270e4231d364c512a69d85314e4616684cf52..05c6ceda2b1b1421b4e7b385285ccf72d18cbc9c 100755 --- a/ivas_processing_scripts/audiotools/metadata.py +++ b/ivas_processing_scripts/audiotools/metadata.py @@ -552,7 +552,9 @@ def check_MASA_metadata( if not isinstance(current_item, list): # automatic search in folder - list_item = metadata_search_MASA(current_item, [item_names[item_idx]]) + list_item = metadata_search_MASA( + current_item, [item_names[item_idx]] + ) list_meta.append(list_item) else: diff --git a/ivas_processing_scripts/audiotools/quaternions.py b/ivas_processing_scripts/audiotools/quaternions.py new file mode 100755 index 0000000000000000000000000000000000000000..802d32e5ba275c2db4e46f65f5cc09ed86e99835 --- /dev/null +++ b/ivas_processing_scripts/audiotools/quaternions.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 + +# +# (C) 2022-2024 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository. All Rights Reserved. +# +# This software is protected by copyright law and by international treaties. +# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository retain full ownership rights in their respective contributions in +# the software. This notice grants no license of any kind, including but not limited to patent +# license, nor is any license granted by implication, estoppel or otherwise. +# +# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making +# contributions. +# +# This software is provided "AS IS", without any express or implied warranties. The software is in the +# development stage. It is intended exclusively for experts who have experience with such software and +# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability +# and fitness for a particular purpose are hereby disclaimed and excluded. +# +# Any dispute, controversy or claim arising under or in relation to providing this software shall be +# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in +# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and +# the United Nations Convention on Contracts on the International Sales of Goods. +# + + +import numpy as np + +""" +Based on Jia, Y. B. (2008). Quaternions and rotations. Com S, 477(577), 15. +""" + + +################################### +# Operations on only one quaternion +################################### + + +def is_unitquat(q): + # the norm must be 1 for a unit quaternion + if q_norm2(q) == 1: + return True + else: + return False + + +def q_conj(q): + # quaternion conjugate + p = q.copy() + p[1:] = -p[1:] + return p + + +def q_norm2(q): + # quaternion norm + return np.sqrt(np.sum(q**2)) + + +def q_inv(q): + # quaternion inverse + return q_conj(q) / q_norm2(q) + + +def q_exp(q, e): + # quaternion exponentiation + p = np.zeros_like(q) + eps = np.zeros_like(q) + eps[0] = 0 + eps[1:] = q[1:] + norm_e = np.sqrt(q_norm2((e))) + + if norm_e == 0: + # real quaternion, reaise the real part + p[0] = q[0] ** e + p[1:] = 0 + else: + # go to the polar form + norm = np.sqrt(q_norm2(q)) + phi = np.arccos(q[0] / norm) + + eps = eps * (1 / norm_e) + + p[0] = norm * np.cos(e * phi) + p[1] = norm * eps[1] * np.sin(e * phi) + p[2] = norm * eps[2] * np.sin(e * phi) + p[3] = norm * eps[3] * np.sin(e * phi) + + return p + + +def q_log(q): + # quaternion logarithm + if not is_unitquat(q): + raise ValueError("Input must be a unit quaternion!") + + p = np.zeros_like(q) + + vec = q[1:] + vec_norm = np.sqrt(np.sum(vec**2)) + vec /= vec_norm + vec *= np.arccos(q[0]) + + p[1:] = vec + + return p + + +############################### +# Operations on two quaternions +############################### + + +def q_add(p, q): + # quaternion addition + return np.add(p, q) + + +def q_sub(p, q): + # quaternion subtraction + return np.sub(p, q) + + +def q_mul(p, q): + # quaternion multiplication + pq = np.zeros_like(q) + pq[0] = p[0] * q[0] - p[1] * q[1] - p[2] * q[2] - p[3] * q[3] + pq[1] = p[0] * q[1] + p[1] * q[0] + p[2] * q[3] - p[3] * q[2] + pq[2] = p[0] * q[2] + p[1] * q[3] + p[2] * q[0] - p[3] * q[1] + pq[3] = p[0] * q[3] - p[1] * q[2] + p[2] * q[1] + p[3] * q[0] + + return pq + + +def q_div(p, q): + # quaternion division + return q_mul(p, q_inv(q)) diff --git a/ivas_processing_scripts/audiotools/utils.py b/ivas_processing_scripts/audiotools/utils.py index b0b2824a7317bcdfe5ad39495faa0eb7c934ec97..842d9555a5384743d2c00e17b53148923e2a95d6 100755 --- a/ivas_processing_scripts/audiotools/utils.py +++ b/ivas_processing_scripts/audiotools/utils.py @@ -38,7 +38,7 @@ from ivas_processing_scripts.audiotools.rotation import Euler2Quat, Quat2Euler def read_trajectory(trj_file: Path, return_quat=True): - trj = np.genfromtext(trj_file, delimiter=",") + trj = np.genfromtxt(trj_file, delimiter=",") if np.all(trj[:, 0] == -3): # Euler diff --git a/ivas_processing_scripts/audiotools/wrappers/bs1770.py b/ivas_processing_scripts/audiotools/wrappers/bs1770.py index 37655bd20d9c8e58516908646f4493417731b82b..bd890c1cf55fae528229fa3e1de188f3f644644d 100755 --- a/ivas_processing_scripts/audiotools/wrappers/bs1770.py +++ b/ivas_processing_scripts/audiotools/wrappers/bs1770.py @@ -196,7 +196,11 @@ def get_loudness( input, audio.MetadataAssistedSpatialAudio ): loudness_format = "7_1_4" - elif isinstance(input, audio.ObjectBasedAudio) or isinstance(input, audio.OMASAAudio) or isinstance(input, audio.OSBAAudio): + elif ( + isinstance(input, audio.ObjectBasedAudio) + or isinstance(input, audio.OMASAAudio) + or isinstance(input, audio.OSBAAudio) + ): loudness_format = "BINAURAL" elif hasattr(input, "layout_file"): loudness_format = input.layout_file diff --git a/ivas_processing_scripts/constants.py b/ivas_processing_scripts/constants.py index 2119304bdbe0b09b34467f85fda7410d0bdc1236..02a8d2c376172a10ee836c0cde862e079ec8bdaf 100755 --- a/ivas_processing_scripts/constants.py +++ b/ivas_processing_scripts/constants.py @@ -49,6 +49,9 @@ SUPPORTED_CONDITIONS = { "esdru", "evs", "ivas", + "ivas_transcoding", + "ivas_split_rend_full", + "ivas_split_rend_ext", "mono_dmx", "spatial_distortion", } @@ -93,3 +96,11 @@ REQUIRED_KEYS_EVS = {"bitrates"} REQUIRED_KEYS_IVAS = {"bitrates"} REQUIRED_KEYS_MNRU = {"q"} REQUIRED_KEYS_ESDRU = {"alpha"} +REQUIRED_KEYS_IVAS_TRANSCODING = { + *REQUIRED_KEYS_IVAS, + "trans_bitrate", + "trans_cod", + "trans_dec", +} +REQUIRED_KEYS_IVAS_SPLIT_REND = {*REQUIRED_KEYS_IVAS, "split_rend"} +REQUIRED_KEYS_IVAS_SPLIT_REND_CFG = {"fmt", "bitrate", "dof"} diff --git a/ivas_processing_scripts/processing/chains.py b/ivas_processing_scripts/processing/chains.py index 06080320fd21f620cd20eea66f5676d55cf432e2..cc7484ff1636cf8fbe9258ab996971089c8259e0 100755 --- a/ivas_processing_scripts/processing/chains.py +++ b/ivas_processing_scripts/processing/chains.py @@ -484,6 +484,7 @@ def get_processing_chain( "preamble": preamble, "use_windows_codec_binaries": cfg.use_windows_codec_binaries, "tx_condition": tx_condition, + "trajectory": dec_cfg.get("trajectory", None), } ) ) @@ -493,27 +494,202 @@ def get_processing_chain( tmp_in_fmt = dec_cfg.get("fmt", tmp_out_fmt)[0] else: tmp_in_fmt = dec_cfg.get("fmt", tmp_out_fmt) - else: - raise SystemExit(f"Unknown condition {condition}!") + elif cond_cfg["type"] == "ivas_transcoding": + cod_cfg = cond_cfg["cod"] + dec_cfg = cond_cfg["dec"] - # add optional IVAS_rend rendering step after each condition - if cond_cfg.get("ivas_rend", -1) != -1: - rend_cfg = cond_cfg["ivas_rend"] + trans_bitrate = cond_cfg["trans_bitrate"] + trans_cod_cfg = cond_cfg["trans_cod"] + trans_dec_cfg = cond_cfg["trans_dec"] + + # force this to be a single value for now + if isinstance(trans_bitrate, list): + trans_bitrate = trans_bitrate[0] + + if hasattr(cfg, "preprocessing_2"): + preamble = cfg.preprocessing_2.get("preamble", 0) + else: + preamble = 0 + + # if the encoding format differs from the format after the preprocessing, add format conversion stuff + if (cod_fmt := cod_cfg.get("fmt", tmp_in_fmt)) != tmp_in_fmt: + chain["processes"].append( + Postprocessing( + { + "in_fs": tmp_in_fs, + "in_fmt": tmp_in_fmt, + "out_fs": tmp_in_fs, + "out_fmt": cod_fmt, + "multiprocessing": cfg.multiprocessing, + "tx_condition": False, + }, + name="cod_fmt", + ) + ) + tmp_in_fmt = cod_fmt + + # allow list of output values for IVAS + tmp_out_fmt = dec_cfg.get("fmt", tmp_out_fmt) + if isinstance(tmp_out_fmt, list): + cond_fmt.extend(tmp_out_fmt) + tmp_out_fmt = tmp_out_fmt[0] + + # pre IVAS coding chain["processes"].append( - IVAS_rend( + IVAS( { "in_fmt": tmp_in_fmt, "in_fs": tmp_in_fs, - "out_fmt": rend_cfg.get("fmt", tmp_out_fmt), - "bin": get_abs_path(rend_cfg.get("bin", None)), - "opts": rend_cfg.get("opts"), + "out_fmt": tmp_out_fmt, + "out_fs": dec_cfg.get("fs", tmp_in_fs), + "bitrate": bitrate, + "cod_bin": get_abs_path(cod_cfg.get("bin", None)), + "cod_opts": cod_cfg.get("opts"), + "dec_bin": get_abs_path(dec_cfg.get("bin", None)), + "dec_opts": dec_cfg.get("opts"), + "multiprocessing": cfg.multiprocessing, + "tx": None, + "preamble": preamble, "use_windows_codec_binaries": cfg.use_windows_codec_binaries, + "tx_condition": tx_condition, + "trajectory": dec_cfg.get("trajectory", None), } ) ) - # update values to reflect renderer output - tmp_in_fs = rend_cfg.get("fs", tmp_in_fs) - tmp_in_fmt = rend_cfg.get("fmt", tmp_out_fmt) + # update name to use correct trajectory + chain["processes"][-1].name = "ivas_trans_pre" + + tmp_out_fmt = trans_dec_cfg.get("fmt", tmp_out_fmt) + + # native transcoding + chain["processes"].append( + IVAS( + { + "in_fmt": tmp_out_fmt, + "in_fs": trans_dec_cfg.get("fs", tmp_in_fs), + "out_fmt": tmp_out_fmt, + "out_fs": trans_dec_cfg.get("fs", tmp_in_fs), + "bitrate": trans_bitrate, + "cod_bin": get_abs_path(trans_cod_cfg.get("bin", None)), + "cod_opts": trans_cod_cfg.get("opts"), + "dec_bin": get_abs_path(trans_dec_cfg.get("bin", None)), + "dec_opts": trans_dec_cfg.get("opts"), + "multiprocessing": cfg.multiprocessing, + "tx": None, + "preamble": preamble, + "use_windows_codec_binaries": cfg.use_windows_codec_binaries, + "tx_condition": tx_condition, + } + ) + ) + # update name to avoid using the same tmp filenames + chain["processes"][-1].name = "ivas_trans_post" + + # update values to reflect transcoding decoder output + tmp_in_fs = trans_dec_cfg.get("fs", tmp_in_fs) + if isinstance(trans_dec_cfg.get("fmt", tmp_out_fmt), list): + tmp_in_fmt = trans_dec_cfg.get("fmt", tmp_out_fmt)[0] + else: + tmp_in_fmt = trans_dec_cfg.get("fmt", tmp_out_fmt) + elif cond_cfg["type"].startswith("ivas_split_rend"): + cod_cfg = cond_cfg["cod"] + dec_cfg = cond_cfg["dec"] + + split_rend_cfg = cond_cfg["split_rend"] + is_full_chain = True if cond_cfg["type"].endswith("full") else False + + if hasattr(cfg, "preprocessing_2"): + preamble = cfg.preprocessing_2.get("preamble", 0) + else: + preamble = 0 + + # if the encoding format differs from the format after the preprocessing, add format conversion stuff + if (cod_fmt := cod_cfg.get("fmt", tmp_in_fmt)) != tmp_in_fmt: + chain["processes"].append( + Postprocessing( + { + "in_fs": tmp_in_fs, + "in_fmt": tmp_in_fmt, + "out_fs": tmp_in_fs, + "out_fmt": cod_fmt, + "multiprocessing": cfg.multiprocessing, + "tx_condition": False, + }, + name="cod_fmt", + ) + ) + tmp_in_fmt = cod_fmt + + # allow list of output values for IVAS + tmp_out_fmt = dec_cfg.get("fmt", tmp_out_fmt) + if isinstance(tmp_out_fmt, list): + cond_fmt.extend(tmp_out_fmt) + tmp_out_fmt = tmp_out_fmt[0] + + # IVAS split pre rendering + if is_full_chain: + chain["processes"].append( + IVAS( + { + "in_fmt": tmp_in_fmt, + "in_fs": tmp_in_fs, + "out_fmt": tmp_out_fmt, + "out_fs": dec_cfg.get("fs", tmp_in_fs), + "bitrate": bitrate, + "cod_bin": get_abs_path(cod_cfg.get("bin", None)), + "cod_opts": cod_cfg.get("opts"), + "dec_bin": get_abs_path(dec_cfg.get("bin", None)), + "dec_opts": dec_cfg.get("opts"), + "multiprocessing": cfg.multiprocessing, + "tx": None, + "preamble": preamble, + "use_windows_codec_binaries": cfg.use_windows_codec_binaries, + "tx_condition": tx_condition, + "split_rend_cfg": split_rend_cfg, + } + ) + ) + else: + raise NotImplementedError( + "IVAS rend currently unsupported for split-pre rendering" + ) + # TODO IVAS_rend as split-pre renderer + chain["processes"].append( + IVAS_rend( + { + "in_fmt": tmp_in_fmt, + "in_fs": tmp_in_fs, + "out_fmt": tmp_out_fmt, + "bin": get_abs_path(split_rend_cfg.get("bin", None)), + "opts": split_rend_cfg.get("opts"), + "use_windows_codec_binaries": cfg.use_windows_codec_binaries, + "split_rend_cfg": split_rend_cfg, + } + ) + ) + + # split post rendering + chain["processes"].append( + IVAS_rend( + { + "in_fmt": tmp_out_fmt, + "in_fs": tmp_in_fs, + "out_fmt": split_rend_cfg["fmt"], + "bin": get_abs_path(split_rend_cfg.get("bin", None)), + "opts": split_rend_cfg.get("opts"), + "use_windows_codec_binaries": cfg.use_windows_codec_binaries, + } + ) + ) + + # update values to reflect post renderer output + tmp_in_fs = dec_cfg.get("fs", tmp_in_fs) + if isinstance(split_rend_cfg.get("fmt", tmp_out_fmt), list): + tmp_in_fmt = split_rend_cfg.get("fmt", tmp_out_fmt)[0] + else: + tmp_in_fmt = split_rend_cfg.get("fmt", tmp_out_fmt) + else: + raise SystemExit(f"Unknown condition {condition}!") # add postprocessing step based on condition post_fmt = post_cfg.get("fmt") @@ -547,6 +723,24 @@ def get_processing_chain( if cond_cfg.get("out_fc") is not None: tmp_lp_cutoff = cond_cfg.get("out_fc") + # add optional IVAS_rend rendering step after each condition + if rend_cfg := cond_cfg.get("ivas_rend"): + chain["processes"].append( + IVAS_rend( + { + "in_fmt": tmp_in_fmt, + "in_fs": tmp_in_fs, + "out_fmt": rend_cfg.get("fmt", tmp_out_fmt), + "bin": get_abs_path(rend_cfg.get("bin")), + "opts": rend_cfg.get("opts"), + "use_windows_codec_binaries": cfg.use_windows_codec_binaries, + } + ) + ) + # update values to reflect renderer output + tmp_in_fs = rend_cfg.get("fs", tmp_in_fs) + tmp_in_fmt = rend_cfg.get("fmt", tmp_out_fmt) + chain["processes"].append( Postprocessing( { @@ -558,7 +752,6 @@ def get_processing_chain( "bin_dataset": post_cfg.get("bin_dataset"), "bin_lfe_gain": post_cfg.get("bin_lfe_gain"), "limit": post_cfg.get("limit", True), - "trajectory": get_abs_path(post_cfg.get("trajectory", None)), "multiprocessing": cfg.multiprocessing, "mnru_q": tmp_mnru_q, "esdru_alpha": tmp_esdru_alpha, diff --git a/ivas_processing_scripts/processing/config.py b/ivas_processing_scripts/processing/config.py index 315ddae95b9b470d03f6d36eedf61b7b3253e19d..fdcc787d960e795edd686db96d86422eb0de8b01 100755 --- a/ivas_processing_scripts/processing/config.py +++ b/ivas_processing_scripts/processing/config.py @@ -43,6 +43,9 @@ from ivas_processing_scripts.constants import ( REQUIRED_KEYS_ESDRU, REQUIRED_KEYS_EVS, REQUIRED_KEYS_IVAS, + REQUIRED_KEYS_IVAS_SPLIT_REND, + REQUIRED_KEYS_IVAS_SPLIT_REND_CFG, + REQUIRED_KEYS_IVAS_TRANSCODING, REQUIRED_KEYS_MNRU, SUPPORTED_CONDITIONS, ) @@ -65,8 +68,18 @@ def merge_dicts(base: dict, other: dict) -> None: def get_default_config_for_codecs(codec_name: str, ext_with_dot: str = "") -> dict: + is_transcoding = "transcoding" in codec_name + is_splitrend = "ISAR" in codec_name + + if is_splitrend: + post_rend_name = codec_name.split(" ")[1] + post_rend_bin = f"{post_rend_name}_post_rend{ext_with_dot}" + + codec_name = codec_name.split(" ")[0] cod_bin = f"{codec_name}_cod{ext_with_dot}" dec_bin = f"{codec_name}_dec{ext_with_dot}" + rend_bin = f"{codec_name}_rend{ext_with_dot}" + cfg = { "cod": { @@ -76,6 +89,17 @@ def get_default_config_for_codecs(codec_name: str, ext_with_dot: str = "") -> di "bin": find_binary(dec_bin, raise_error=False), }, } + if is_transcoding: + cfg["trans_cod"] = { + "bin": find_binary(cod_bin, raise_error=False), + } + cfg["trans_dec"] = { + "bin": find_binary(dec_bin, raise_error=False), + } + if is_splitrend: + cfg["split_rend"] = { + "bin": find_binary(post_rend_bin, raise_error=False), + } return cfg @@ -240,6 +264,59 @@ class TestConfig: raise KeyError( f"The following key(s) must be specified for IVAS: {REQUIRED_KEYS_IVAS}" ) + elif type == "ivas_transcoding": + merged_cfg = get_default_config_for_codecs( + "IVAS transcoding", codec_bin_extension + ) + merge_dicts(merged_cfg, cond_cfg) + cfg["conditions_to_generate"][cond_name] = merged_cfg + if REQUIRED_KEYS_IVAS_TRANSCODING.difference( + cfg["conditions_to_generate"][cond_name].keys() + ): + raise KeyError( + f"The following key(s) must be specified for IVAS: {REQUIRED_KEYS_IVAS_TRANSCODING}" + ) + elif type.startswith("ivas_split_rend"): + merged_cfg = get_default_config_for_codecs( + "IVAS ISAR", codec_bin_extension + ) + merge_dicts(merged_cfg, cond_cfg) + cfg["conditions_to_generate"][cond_name] = merged_cfg + if REQUIRED_KEYS_IVAS_SPLIT_REND.difference( + cfg["conditions_to_generate"][cond_name].keys() + ): + raise KeyError( + f"The following key(s) must be specified for IVAS: {REQUIRED_KEYS_IVAS_SPLIT_REND}" + ) + split_rend_cfg = cfg["conditions_to_generate"][cond_name]["split_rend"] + MISSING_KEYS_SPLIT_CFG = [] + for r in REQUIRED_KEYS_IVAS_SPLIT_REND_CFG: + if not split_rend_cfg.get(r): + MISSING_KEYS.append(r) + + if MISSING_KEYS_SPLIT_CFG: + raise KeyError( + f"The following key(s) must be specified : {MISSING_KEYS_SPLIT_CFG}" + ) + + if fmt := split_rend_cfg.get("fmt") != "BINAURAL": + raise ValueError(f"Format {fmt} unsupported by split rendering!") + if (dof := split_rend_cfg.get("dof") < 0) or dof > 3: + raise ValueError( + f"DOF {dof} unsupported! Valid values are 0 <= dof <= 3." + ) + + if codec := split_rend_cfg.get("codec", None): + if not ( + codec.upper() == "LC3PLUS" + or codec.upper() == "LCLD" + or codec.upper() == "DEFAULT" + ): + raise ValueError(f"Unsupported split rendering codec {codec}!") + if codec.upper() == "DEFAULT": + # avoid explicitly writing this value to the config file if set to default + split_rend_cfg.pop("codec", None) + elif type == "mnru": if REQUIRED_KEYS_MNRU.difference( cfg["conditions_to_generate"][cond_name].keys() diff --git a/ivas_processing_scripts/processing/evs.py b/ivas_processing_scripts/processing/evs.py index 98ee3d7a7a956efe8294109ef82a016ee354caca..c49c6a452dcd9e0dea5a30273f2f0ad9a38fb73d 100755 --- a/ivas_processing_scripts/processing/evs.py +++ b/ivas_processing_scripts/processing/evs.py @@ -36,7 +36,7 @@ import platform from itertools import repeat from pathlib import Path from shutil import copyfile -from typing import Optional, Union +from typing import Optional, Tuple, Union from ivas_processing_scripts.audiotools import audio from ivas_processing_scripts.audiotools.audiofile import ( @@ -133,7 +133,12 @@ class EVS(Processing): ) def process( - self, in_file: Path, out_file: Path, in_meta, logger: logging.Logger + self, + in_file: Path, + out_file: Path, + in_meta, + trajectories: Optional[Tuple[Path]] = None, + logger: Optional[logging.Logger] = None, ) -> None: logger.debug(f"EVS configuration : {self.__dict__}") logger.debug(f"EVS {in_file.absolute()} -> {out_file.absolute()}") @@ -176,9 +181,10 @@ class EVS(Processing): # flag for zeroing of channels for planar SBA formats is_planar = ( - isinstance(self.in_fmt, audio.SceneBasedAudio) and self.in_fmt.is_planar - or - isinstance(self.in_fmt, audio.OSBAAudio) and self.in_fmt.is_planar + isinstance(self.in_fmt, audio.SceneBasedAudio) + and self.in_fmt.is_planar + or isinstance(self.in_fmt, audio.OSBAAudio) + and self.in_fmt.is_planar ) if isinstance(self.in_fmt, audio.OSBAAudio) and self.in_fmt.is_planar: is_planar_offset = self.in_fmt.num_ism_channels @@ -260,7 +266,13 @@ class EVS(Processing): # combine the decoded channels into the output file if out_file.suffix in [".wav", ".pcm"]: - combine(split_chan_out, out_file, in_fs=self.out_fs, is_planar=is_planar,is_planar_offset=is_planar_offset) + combine( + split_chan_out, + out_file, + in_fs=self.out_fs, + is_planar=is_planar, + is_planar_offset=is_planar_offset, + ) if split_chan_bs_unprocessed != split_chan_bs and self.tx_condition: out_file_unprocessed = f"{Path(out_file.parent).joinpath(Path(out_file.name).with_suffix(''))}.noerror{out_file.suffix}" combine( @@ -271,7 +283,11 @@ class EVS(Processing): is_planar_offset=is_planar_offset, ) # copy ISM metadata for ISM pass-through - if isinstance(self.in_fmt, audio.ObjectBasedAudio) or isinstance(self.in_fmt, audio.OMASAAudio) or isinstance(self.in_fmt, audio.OSBAAudio): + if ( + isinstance(self.in_fmt, audio.ObjectBasedAudio) + or isinstance(self.in_fmt, audio.OMASAAudio) + or isinstance(self.in_fmt, audio.OSBAAudio) + ): if isinstance(self.in_fmt, audio.ObjectBasedAudio): num_ism_obj = self.in_fmt.num_channels else: @@ -291,7 +307,9 @@ class EVS(Processing): copyfile(in_meta[idx], out_file_meta_unprocessed) # copy MASA metadata for MASA pass-through - if isinstance(self.in_fmt, audio.MetadataAssistedSpatialAudio) or isinstance(self.in_fmt, audio.OMASAAudio): + if isinstance( + self.in_fmt, audio.MetadataAssistedSpatialAudio + ) or isinstance(self.in_fmt, audio.OMASAAudio): md_file_in = in_file.parent / (in_file.name + ".met") md_file_out = out_file.parent / (out_file.name + ".met") copyfile(md_file_in, md_file_out) diff --git a/ivas_processing_scripts/processing/ivas.py b/ivas_processing_scripts/processing/ivas.py index f6c86d99c5770bc2528be3a236f0fecd7dece54e..7489919ee011fe45efaccbe670f9e3d62d97b121 100755 --- a/ivas_processing_scripts/processing/ivas.py +++ b/ivas_processing_scripts/processing/ivas.py @@ -60,6 +60,8 @@ class IVAS(Processing): self.out_fmt = audio.fromtype(self.out_fmt) if not hasattr(self, "dec_opts"): self.dec_opts = None + if not hasattr(self, "split_rend_cfg"): + self.split_rend_cfg = None self._use_wine = use_wine(self.use_windows_codec_binaries) def _validate(self): @@ -100,7 +102,7 @@ class IVAS(Processing): ) def process( - self, in_file: Path, out_file: Path, in_meta, logger: logging.Logger + self, in_file: Path, out_file: Path, in_meta, trj_pair, logger: logging.Logger ) -> None: logger.debug(f"IVAS configuration : {self.__dict__}") logger.debug(f"IVAS {in_file.absolute()} -> {out_file.absolute()}") @@ -128,13 +130,35 @@ class IVAS(Processing): bitstream_noerror = bitstream bitstream, voip = self.simulate_tx(in_file, bitstream, logger) + # generate and use renderer config if split rendering + rend_cfg_file = IVAS.generate_split_rend_cfg( + out_file, self.split_rend_cfg, logger + ) + + # trajectory file for binaural/split rendering + trajectories = trj_pair if trj_pair else None + # decode twice with and without bitstream errors - self.dec(bitstream, out_file, voip=voip, logger=logger) + self.dec( + bitstream, + out_file, + voip=voip, + rend_cfg_file=rend_cfg_file, + trajectories=trajectories, + logger=logger, + ) if bitstream_noerror != bitstream and self.tx_condition: out_file_unprocessed = Path( f"{out_file.parent.joinpath(out_file.stem)}.noerror{out_file.suffix}" ) - self.dec(bitstream_noerror, out_file_unprocessed, voip=False, logger=logger) + self.dec( + bitstream_noerror, + out_file_unprocessed, + voip=False, + rend_cfg_file=rend_cfg_file, + trajectories=trajectories, + logger=logger, + ) def enc( self, @@ -152,7 +176,9 @@ class IVAS(Processing): if isinstance(self.in_fmt, audio.MetadataAssistedSpatialAudio): md_file = in_file.parent / (in_file.name + ".met") metadata_files.append(md_file) - elif isinstance(self.in_fmt, audio.ObjectBasedAudio) or isinstance(self.in_fmt, audio.OSBAAudio): + elif isinstance(self.in_fmt, audio.ObjectBasedAudio) or isinstance( + self.in_fmt, audio.OSBAAudio + ): metadata_files = in_meta elif isinstance(self.in_fmt, audio.OMASAAudio): metadata_files = in_meta @@ -282,6 +308,8 @@ class IVAS(Processing): bitstream: Path, out_file: Path, voip: bool = False, + rend_cfg_file: Optional[Path] = None, + trajectories: Optional[Tuple[Path]] = None, logger: Optional[logging.Logger] = None, ) -> None: logger.debug(f"IVAS decoder {bitstream} -> {out_file}") @@ -290,8 +318,13 @@ class IVAS(Processing): if self._use_wine: cmd.insert(0, "wine") - if hasattr(self, "trajectory"): - cmd.extend(["-T", self.trajectory]) + # head tracking trajectory + if trajectories: + pre_trj, post_trj = trajectories + if (self.out_fmt.name == "BINAURAL_SPLIT_CODED" or self.name == "ivas_trans_pre") and pre_trj: + cmd.extend(["-T", str(pre_trj)]) + elif self.out_fmt.name == "BINAURAL" and post_trj: + cmd.extend(["-T", str(post_trj)]) # add -voip cmdline option to the decoder if voip: @@ -301,6 +334,9 @@ class IVAS(Processing): if self.dec_opts: cmd.extend(self.dec_opts) + if rend_cfg_file: + cmd.extend(["-render_config", str(rend_cfg_file)]) + # use quiet mode cmd.extend(["-q"]) @@ -308,12 +344,16 @@ class IVAS(Processing): # the SBA part of OSBA is always rendered to HOA3 for EXT by IVAS if isinstance(self.in_fmt, audio.OSBAAudio) and self.in_fmt.name[:]: if self.out_fmt.num_channels != (16 + self.in_fmt.num_ism_channels): - raise ValueError("When using EXT output for IVAS for OSBA make sure the specified decoder format is ISMxSBA3") + raise ValueError( + "When using EXT output for IVAS for OSBA make sure the specified decoder format is ISMxSBA3" + ) else: output_format = "EXT" else: if self.in_fmt.name != self.out_fmt.name: - raise ValueError("ISM and MASA output format for IVAS only possible if input and output format match") + raise ValueError( + "ISM and MASA output format for IVAS only possible if input and output format match" + ) output_format = "EXT" elif self.in_fmt.name == "MONO": if self.out_fmt.name == "MONO": @@ -371,14 +411,53 @@ class IVAS(Processing): return ["-mc", fmt.name] elif isinstance(fmt, audio.OSBAAudio): if fmt.is_planar: - return ["-ism_sba", str(fmt.num_ism_channels), f"-{str(fmt.ambi_order)}"] + metadata_files + return [ + "-ism_sba", + str(fmt.num_ism_channels), + f"-{str(fmt.ambi_order)}", + ] + metadata_files else: - return ["-ism_sba", str(fmt.num_ism_channels), f"+{str(fmt.ambi_order)}"] + metadata_files + return [ + "-ism_sba", + str(fmt.num_ism_channels), + f"+{str(fmt.ambi_order)}", + ] + metadata_files elif isinstance(fmt, audio.OMASAAudio): - return ["-ism_masa", str(fmt.num_ism_channels), str(fmt.num_channels-fmt.num_ism_channels)] + metadata_files + return [ + "-ism_masa", + str(fmt.num_ism_channels), + str(fmt.num_channels - fmt.num_ism_channels), + ] + metadata_files raise ValueError(f"IVAS: Invalid input config: {fmt.name}.") + @staticmethod + def generate_split_rend_cfg( + in_file: Path, split_rend_cfg: dict, logger: Optional[logging.Logger] = None + ) -> Path: + if split_rend_cfg: + cfg_file = in_file.with_stem(f"{in_file.stem}.render_config").with_suffix( + ".txt" + ) + else: + return None + + # write config file + with open(cfg_file, "wt") as f: + print("[SPLITREND]", file=f) + print(f"BITRATE = {split_rend_cfg.get('bitrate')};", file=f) + print(f"DOF = {split_rend_cfg.get('dof')};", file=f) + if hqmode := split_rend_cfg.get("hqmode"): + print(f"HQMODE = {int( hqmode )};", file=f) + if codec := split_rend_cfg.get("codec"): + print(f"CODEC = {codec.upper()};", file=f) + if framesize := split_rend_cfg.get("framesize"): + print(f"FRAMESIZE = {framesize};", file=f) + + logger.debug(f"Wrote IVAS renderer config: {cfg_file}") + + return cfg_file + class IVAS_rend(Processing): def __init__(self, attrs): @@ -389,6 +468,8 @@ class IVAS_rend(Processing): self.out_fmt = audio.fromtype(self.out_fmt) if not hasattr(self, "opts"): self.dec_opts = None + if not hasattr(self, "split_rend_cfg"): + self.split_rend_cfg = None self._use_wine = ( platform.system() == "Linux" and self.use_windows_codec_binaries ) @@ -412,6 +493,7 @@ class IVAS_rend(Processing): in_file: Path, out_file: Path, in_meta, + trajectories: Optional[Tuple[Path]] = None, logger: Optional[logging.Logger] = None, ) -> None: logger.debug(f"IVAS rend configuration : {self.__dict__}") @@ -428,25 +510,57 @@ class IVAS_rend(Processing): else: self.in_fs = parse_wave_header(str(in_file))["fs"] + # generate and use renderer config if split rendering + rend_cfg_file = IVAS.generate_split_rend_cfg( + out_file, self.split_rend_cfg, logger + ) + cmd = [self.bin] if self._use_wine: cmd.insert(0, "wine") - cmd.extend( - [ - "-fs", - str(self.in_fs // 1000), - "-i", - str(in_file), - "-if", - self.in_fmt.name, - "-o", - str(out_file), - "-of", - self.out_fmt.name, - ] - ) + if rend_cfg_file: + cmd.extend(["-render_config", str(rend_cfg_file)]) + + # head tracking trajectory + if trajectories: + pre_trj, post_trj = trajectories + if self.out_fmt.name == "BINAURAL_SPLIT_CODED" and pre_trj: + cmd.extend(["-T", str(pre_trj)]) + elif self.out_fmt.name == "BINAURAL" and post_trj: + cmd.extend(["-T", str(post_trj)]) + + if self.in_fmt.name == "BINAURAL_SPLIT_CODED": + cmd.extend( + [ + "-q", + "-fs", + str(self.in_fs // 1000), + "-i", + str(in_file), + "-if", + self.in_fmt.name, + "-o", + str(out_file), + ] + ) + else: + cmd.extend( + [ + "-q", + "-fs", + str(self.in_fs // 1000), + "-i", + str(in_file), + "-if", + self.in_fmt.name, + "-o", + str(out_file), + "-of", + self.out_fmt.name, + ] + ) if in_meta: cmd.append("-im") diff --git a/ivas_processing_scripts/processing/postprocessing.py b/ivas_processing_scripts/processing/postprocessing.py index 1a9a5ffb0474158de24e07915af3decca25a1892..e82f0a58c078f8e30fe2d92d980c58534192b33d 100755 --- a/ivas_processing_scripts/processing/postprocessing.py +++ b/ivas_processing_scripts/processing/postprocessing.py @@ -32,6 +32,7 @@ import logging from pathlib import Path +from typing import Optional, Tuple from ivas_processing_scripts.audiotools import convert from ivas_processing_scripts.processing.processing import Processing @@ -45,12 +46,24 @@ class Postprocessing(Processing): else: self.name = "post" - def process(self, in_file: Path, out_file: Path, in_meta, logger: logging.Logger): + def process( + self, + in_file: Path, + out_file: Path, + in_meta, + trajectories: Optional[Tuple[Path]] = None, + logger: Optional[logging.Logger] = None, + ): logger.debug(f"Postprocessing configuration : {self.__dict__}") logger.debug(f"Postprocessing {in_file.absolute()} -> {out_file.absolute()}") convert.convert_file( - in_file, out_file, logger=logger, in_meta=in_meta, **self.__dict__ + in_file, + out_file, + logger=logger, + in_meta=in_meta, + out_trajectory=trajectories[1] if trajectories else None, + **self.__dict__, ) # additional postprocessing of signal without error modification if self.tx_condition: diff --git a/ivas_processing_scripts/processing/preprocessing.py b/ivas_processing_scripts/processing/preprocessing.py index 9ad1d6283aa2d414b495268111f9f2ce6f3bb298..eda9ccc5c940f34da17b3d9f63036a02457d2171 100755 --- a/ivas_processing_scripts/processing/preprocessing.py +++ b/ivas_processing_scripts/processing/preprocessing.py @@ -32,6 +32,7 @@ import logging from pathlib import Path +from typing import Optional, Tuple from ivas_processing_scripts.audiotools import convert from ivas_processing_scripts.processing.processing import Processing @@ -42,7 +43,14 @@ class Preprocessing(Processing): super().__init__(attrs) self.name = "pre" - def process(self, in_file: Path, out_file: Path, in_meta, logger: logging.Logger): + def process( + self, + in_file: Path, + out_file: Path, + in_meta, + trajectories: Optional[Tuple[Path]] = None, + logger: Optional[logging.Logger] = None, + ): logger.debug(f"Preprocessing configuration : {self.__dict__}") logger.debug(f"Preprocessing {in_file.absolute()} -> {out_file.absolute()}") diff --git a/ivas_processing_scripts/processing/preprocessing_2.py b/ivas_processing_scripts/processing/preprocessing_2.py index fc0a874e0ca47001b722911d2d57d0fb47035f66..df449044e3aa8a8d305342f452ff717341ec4a54 100644 --- a/ivas_processing_scripts/processing/preprocessing_2.py +++ b/ivas_processing_scripts/processing/preprocessing_2.py @@ -32,6 +32,7 @@ import logging from pathlib import Path +from typing import Optional, Tuple from warnings import warn import numpy as np @@ -55,7 +56,14 @@ class Preprocessing2(Processing): super().__init__(attrs) self.name = "pre_2" - def process(self, in_file: Path, out_file: Path, in_meta, logger: logging.Logger): + def process( + self, + in_file: Path, + out_file: Path, + in_meta, + trajectories: Optional[Tuple[Path]] = None, + logger: Optional[logging.Logger] = None, + ): logger.debug(f"Preprocessing2 configuration : {self.__dict__}") logger.debug(f"Preprocessing2 {in_file.absolute()} -> {out_file.absolute()}") @@ -64,9 +72,13 @@ class Preprocessing2(Processing): self.in_fmt, in_file, fs=self.in_fs, in_meta=in_meta ) - if isinstance(audio_object, audio.MetadataAssistedSpatialAudio) or isinstance(audio_object, audio.OMASAAudio): + if isinstance(audio_object, audio.MetadataAssistedSpatialAudio) or isinstance( + audio_object, audio.OMASAAudio + ): if self.preamble > 0 or self.background_noise or self.repeat_signal: - raise ValueError("No preprocessing 2 possible for formats including MASA metadata") + raise ValueError( + "No preprocessing 2 possible for formats including MASA metadata" + ) # modify ISM metadata if self.in_fmt.startswith("ISM"): diff --git a/ivas_processing_scripts/processing/processing.py b/ivas_processing_scripts/processing/processing.py index 9b3f36b9cfc01dcc02de1817cbce150686a4144b..595b1c44052e3a5f3cbf770e11520554c388145f 100755 --- a/ivas_processing_scripts/processing/processing.py +++ b/ivas_processing_scripts/processing/processing.py @@ -131,7 +131,9 @@ def concat_setup(cfg: TestConfig, chain, logger: logging.Logger): # concatenation of met files not possible -> do not concatenate MASA and OMASA if "MASA" in input_format: - raise ValueError("Concatenation of formats including MASA metadata not possible") + raise ValueError( + "Concatenation of formats including MASA metadata not possible" + ) # concatenate ISM metadata if input_format.startswith("ISM"): @@ -328,6 +330,7 @@ def preprocess(cfg, logger): repeat(chain), repeat(logger), cfg.metadata_path, + repeat(None), ) ) p = Pool() @@ -378,14 +381,19 @@ def preprocess(cfg, logger): if "ISM" in preproc_output_fmt: num_obj = int(preproc_output_fmt[3]) for obj_idx in range(num_obj): - list_item.append(cfg.out_dirs[0] / Path(f"{cfg.items_list[item_idx].stem}.wav.{obj_idx}.csv")) + list_item.append( + cfg.out_dirs[0] + / Path(f"{cfg.items_list[item_idx].stem}.wav.{obj_idx}.csv") + ) # MASA metadata if "MASA" in preproc_output_fmt: - list_item.append(cfg.out_dirs[0] / Path(f"{cfg.items_list[item_idx].stem}.wav.met")) + list_item.append( + cfg.out_dirs[0] / Path(f"{cfg.items_list[item_idx].stem}.wav.met") + ) # no metadata - if not "ISM" in preproc_output_fmt and not "MASA" in preproc_output_fmt: + if not ("ISM" in preproc_output_fmt or "MASA" in preproc_output_fmt): list_item.append(None) cfg.metadata_path.append(list_item) @@ -419,6 +427,7 @@ def preprocess_2(cfg, logger): repeat(chain), repeat(logger), cfg.metadata_path, + repeat(None), ) ) p = Pool() @@ -467,6 +476,7 @@ def process_item( chain: Iterable, logger: logging.Logger, in_meta, + trajectories, ) -> None: # derive tmp file names tmp_file = tmp_dir.joinpath(in_file.name) @@ -480,6 +490,7 @@ def process_item( out_dir_wav = False processing_paths = [in_file] processing_paths_meta = [in_meta] + processing_paths_trj = trajectories bool_ism = False bool_masa = False num_ism_meta = None @@ -552,8 +563,11 @@ def process_item( out_meta.append(out_dir.joinpath(f"{Path(out_file).stem}.wav.met")) # execute each process sequentially, feed output into input of next process - for p, (input, output), input_meta in zip( - chain, pairwise(processing_paths), processing_paths_meta[:-1] + for p, (input, output), input_meta, trj_pair in zip( + chain, + pairwise(processing_paths), + processing_paths_meta[:-1], + repeat(processing_paths_trj), ): # setup logging for the output item_logger = logger.getChild(output.stem) @@ -562,7 +576,7 @@ def process_item( fh.setFormatter(logging.Formatter(LOGGER_FORMAT, datefmt=LOGGER_DATEFMT)) item_logger.addHandler(fh) - p.process(input, output, input_meta, item_logger) + p.process(input, output, input_meta, trj_pair, item_logger) item_logger.handlers.clear() diff --git a/ivas_processing_scripts/processing/processing_splitting_scaling.py b/ivas_processing_scripts/processing/processing_splitting_scaling.py index 771c6f0331af20d648106176adcce9f706221bbe..424bb426bf7814f30b3640881d51c9ba20bb7a7e 100644 --- a/ivas_processing_scripts/processing/processing_splitting_scaling.py +++ b/ivas_processing_scripts/processing/processing_splitting_scaling.py @@ -3,6 +3,7 @@ import logging import re from itertools import repeat from pathlib import Path +from typing import Optional, Tuple import numpy as np @@ -52,7 +53,14 @@ class Processing_splitting_scaling(Processing): super().__init__(attrs) self.name = "processing_splitting_scaling" - def process(self, in_file: Path, out_file: Path, in_meta, logger: logging.Logger): + def process( + self, + in_file: Path, + out_file: Path, + in_meta, + trajectories: Optional[Tuple[Path]] = None, + logger: Optional[logging.Logger] = None, + ): logger.debug(f"Processing splitting scaling configuration : {self.__dict__}") logger.debug(f"Processing splitting scaling {in_file.absolute()}") diff --git a/ivas_processing_scripts/trajectories/README.md b/ivas_processing_scripts/trajectories/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d5e5998306eee39699a71686daee3b4984be8b90 --- /dev/null +++ b/ivas_processing_scripts/trajectories/README.md @@ -0,0 +1,62 @@ + + +# Trajectory file processing module + +This module can be executed using `python -m ivas_processing_scripts.trajectories`. + +IVAS head tracking trajectory files can be manipulated using this helper script. See the usage help below for details. + +```text +usage: Process IVAS .csv head tracking trajectories in either Euler (YPR) or Quaternion format. + Order of supported operations: zero => offset => invert => delay + Head tracking data is equivalent to scene rotation data (NOT listener orientation, use --invert if needed) + + [-h] [-i] [-d DELAY] [-o OFFSET OFFSET OFFSET] [-zy] [-zp] [-zr] [-of {q,e}] in_trj out_trj + +positional arguments: + in_trj Input Trajectory + out_trj Output Trajectory + +options: + -h, --help show this help message and exit + -i, --invert Flag to invert trajectory, default = False + -d DELAY, --delay DELAY + Delay trajectory by this amount in milliseconds + -o OFFSET OFFSET OFFSET, --offset OFFSET OFFSET OFFSET + Offset trajectory by this rotation [yaw, pitch, roll] + -zy, --zero_yaw Zero yaw axis + -zp, --zero_pitch Zero pitch axis + -zr, --zero_roll Zero roll axis + -of {q,e}, --output_format {q,e} + Output format: 'e' for Euler (YPR) and 'q' for Quaternions, default = q +``` diff --git a/ivas_processing_scripts/trajectories/__init__.py b/ivas_processing_scripts/trajectories/__init__.py new file mode 100755 index 0000000000000000000000000000000000000000..08e8055d95b5c2be89a0301717a52e3e7357adf7 --- /dev/null +++ b/ivas_processing_scripts/trajectories/__init__.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 + +# +# (C) 2022-2024 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository. All Rights Reserved. +# +# This software is protected by copyright law and by international treaties. +# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository retain full ownership rights in their respective contributions in +# the software. This notice grants no license of any kind, including but not limited to patent +# license, nor is any license granted by implication, estoppel or otherwise. +# +# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making +# contributions. +# +# This software is provided "AS IS", without any express or implied warranties. The software is in the +# development stage. It is intended exclusively for experts who have experience with such software and +# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability +# and fitness for a particular purpose are hereby disclaimed and excluded. +# +# Any dispute, controversy or claim arising under or in relation to providing this software shall be +# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in +# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and +# the United Nations Convention on Contracts on the International Sales of Goods. +# + +import argparse +from functools import partial + +import numpy as np + +from ivas_processing_scripts.audiotools.quaternions import q_inv, q_mul +from ivas_processing_scripts.audiotools.rotation import Euler2Quat, Quat2Euler +from ivas_processing_scripts.audiotools.utils import read_trajectory, write_trajectory + + +def get_args(): + parser = argparse.ArgumentParser( + """Process IVAS .csv head tracking trajectories in either Euler (YPR) or Quaternion format. + Order of supported operations: zero => offset => invert => delay + Head tracking data is equivalent to scene rotation data (NOT listener orientation, use --invert if needed) + """ + ) + parser.add_argument("in_trj", help="Input Trajectory") + parser.add_argument("out_trj", help="Output Trajectory") + parser.add_argument( + "-i", + "--invert", + action="store_true", + default=False, + help="Flag to invert trajectory, default = %(default)s", + ) + parser.add_argument( + "-d", + "--delay", + default=None, + help="Delay trajectory by this amount in milliseconds", + type=int, + ) + parser.add_argument( + "-o", + "--offset", + help="Offset trajectory by this rotation [yaw, pitch, roll]", + nargs=3, + type=int, + ) + for a, ax in zip("ypr", ["yaw", "pitch", "roll"]): + parser.add_argument( + f"-z{a}", + f"--zero_{ax}", + action="store_true", + default=[], + help=f"Zero {ax} axis", + ) + parser.add_argument( + "-of", + "--output_format", + choices=["q", "e"], + default="q", + help="Output format: 'e' for Euler (YPR) and 'q' for Quaternions, default = %(default)s", + type=str, + ) + + return parser.parse_args() + + +def main(): + args = get_args() + + trj = read_trajectory(args.in_trj) + + if args.zero_yaw or args.zero_pitch or args.zero_roll: + trj_euler = Quat2Euler(trj) + + if args.zero_yaw: + trj_euler[:, 0] = 0 + if args.zero_pitch: + trj_euler[:, 1] = 0 + if args.zero_roll: + trj_euler[:, 2] = 0 + + trj = Euler2Quat(trj_euler) + + if args.offset: + args.offset = np.array(args.offset) + args.offset = Euler2Quat(args.offset) + + # left multiply by offset to chain the rotations + trj = np.apply_along_axis(partial(q_mul, args.offset), 1, trj) + + if args.invert: + trj = np.apply_along_axis(q_inv, 1, trj) + + if args.delay: + pad = np.array([[1, 0, 0, 0]]) + trj = np.vstack([np.repeat(pad, int(args.delay / 5), axis=0), trj]) + + write_trajectory(trj, args.out_trj, write_quat=(args.output_format == "q")) diff --git a/ivas_processing_scripts/trajectories/__main__.py b/ivas_processing_scripts/trajectories/__main__.py new file mode 100755 index 0000000000000000000000000000000000000000..4b972eddc6ac78273a82bf1de3a8279f45afbbd5 --- /dev/null +++ b/ivas_processing_scripts/trajectories/__main__.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 + +# +# (C) 2022-2024 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository. All Rights Reserved. +# +# This software is protected by copyright law and by international treaties. +# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository retain full ownership rights in their respective contributions in +# the software. This notice grants no license of any kind, including but not limited to patent +# license, nor is any license granted by implication, estoppel or otherwise. +# +# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making +# contributions. +# +# This software is provided "AS IS", without any express or implied warranties. The software is in the +# development stage. It is intended exclusively for experts who have experience with such software and +# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability +# and fitness for a particular purpose are hereby disclaimed and excluded. +# +# Any dispute, controversy or claim arising under or in relation to providing this software shall be +# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in +# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and +# the United Nations Convention on Contracts on the International Sales of Goods. +# + +from ivas_processing_scripts.trajectories import main + +if __name__ == "__main__": + main() diff --git a/ivas_processing_scripts/trajectories/trajectories.py b/ivas_processing_scripts/trajectories/trajectories.py new file mode 100755 index 0000000000000000000000000000000000000000..78cad50c228ce569ba9fa86eec904b07f1623c4b --- /dev/null +++ b/ivas_processing_scripts/trajectories/trajectories.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 + +# +# (C) 2022-2024 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository. All Rights Reserved. +# +# This software is protected by copyright law and by international treaties. +# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository retain full ownership rights in their respective contributions in +# the software. This notice grants no license of any kind, including but not limited to patent +# license, nor is any license granted by implication, estoppel or otherwise. +# +# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making +# contributions. +# +# This software is provided "AS IS", without any express or implied warranties. The software is in the +# development stage. It is intended exclusively for experts who have experience with such software and +# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability +# and fitness for a particular purpose are hereby disclaimed and excluded. +# +# Any dispute, controversy or claim arising under or in relation to providing this software shall be +# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in +# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and +# the United Nations Convention on Contracts on the International Sales of Goods. +# + + +from pathlib import Path +from typing import Tuple, Union + + +def trajectory_search( + item_name: Path, + search_folder: Path, +) -> Tuple: + """Search for head tracking trajectories with item_name.wav.{pre,post}.csv""" + + if not item_name: + raise ValueError("No item name provided, can't search for trajectories") + + if search_folder is not None: + item_name = search_folder.joinpath(item_name.name) + + trj_file_pre = item_name.with_stem(f"{item_name.name}.pre").with_suffix(".csv") + trj_file_post = item_name.with_stem(f"{item_name.name}.post").with_suffix(".csv") + + pre_trj = trj_file_pre if trj_file_pre.is_file() else None + post_trj = trj_file_post if trj_file_post.is_file() else None + + return pre_trj, post_trj + + +def check_trajectories(item_names: list, search_folder: Path) -> list[Tuple]: + """Find head tracking trajectories""" + + list_trj = [trajectory_search(Path(i), search_folder) for i in item_names] + + return list_trj diff --git a/tests/data/test_ISM.yml b/tests/data/test_ISM.yml index 9f1fe7992bff4f2daad21780055c6a6c17c4714b..f5ec8327a9d536d70d376c9f008cb0c9c053122d 100644 --- a/tests/data/test_ISM.yml +++ b/tests/data/test_ISM.yml @@ -294,4 +294,4 @@ postprocessing: ### Flag whether output should be limited to avoid clipping (can alter target loudness); default = false # limit: true ### Head-tracking trajectory file for binaural output; default = null - # trajectory: "path/to/file" + # out_trajectory: "path/to/file" diff --git a/tests/data/test_MASA.yml b/tests/data/test_MASA.yml index 179dcc9dc9eaede886897c3e774c9b540a41e94b..0f87e050623026b1aac8f7c8fbd7bead361f46d4 100644 --- a/tests/data/test_MASA.yml +++ b/tests/data/test_MASA.yml @@ -292,4 +292,4 @@ postprocessing: ### Flag whether output should be limited to avoid clipping (can alter target loudness); default = false limit: true ### Head-tracking trajectory file for binaural output; default = null - # trajectory: "path/to/file" + # out_trajectory: "path/to/file" diff --git a/tests/data/test_MC.yml b/tests/data/test_MC.yml index 4e1ea6f1ff62f31b83ab502f8d332c7279627635..451a1a99d97c598a4b98f3cc38ed6967665731a7 100644 --- a/tests/data/test_MC.yml +++ b/tests/data/test_MC.yml @@ -289,4 +289,4 @@ postprocessing: ### Flag whether output should be limited to avoid clipping (can alter target loudness); default = false # limit: true ### Head-tracking trajectory file for binaural output; default = null - # trajectory: "path/to/file" + # out_trajectory: "path/to/file" diff --git a/tests/data/test_SBA.yml b/tests/data/test_SBA.yml index e5fedc5d2642e58f8c4bf1f12488a7b2a7b5df94..00dfe72e2abd8fe5a31332eddd6b7fe480274ffc 100644 --- a/tests/data/test_SBA.yml +++ b/tests/data/test_SBA.yml @@ -287,4 +287,4 @@ postprocessing: ### Flag whether output should be limited to avoid clipping (can alter target loudness); default = false limit: true ### Head-tracking trajectory file for binaural output; default = null - # trajectory: "path/to/file" + # out_trajectory: "path/to/file" diff --git a/tests/test_audiotools_convert.py b/tests/test_audiotools_convert.py index e933da0f73f1f95b1964d53edee98e315854c9ea..a39d915c722899b3e9e6aee366f190b807ff1e1c 100644 --- a/tests/test_audiotools_convert.py +++ b/tests/test_audiotools_convert.py @@ -95,7 +95,7 @@ def convert( in_fmt=in_fmt, out_fmt=out_fmt, in_meta=in_meta, - trajectory=trj_file, + out_trajectory=trj_file, limit=True, # out_loudness=-26, **kwargs,