enable per-item trajectory files and update configurations (e5891457) · Commits · IVAS Codec Public Collaboration / IVAS Processing Scripts

examples/TEMPLATE.yml

+0 −2

Original line number	Diff line number	Diff line
		@@ -328,5 +328,3 @@ postprocessing:
		# bin_lfe_gain: 1
		### Flag whether output should be limited to avoid clipping (can alter target loudness); default = false
		# limit: true
		### Head-tracking trajectory file for binaural output; default = null
		# out_trajectory: "path/to/file"

examples/audiotools.ipynb

+42 −21

Original line number	Diff line number	Diff line
		%% Cell type:markdown id: tags:

		# audiotools module

		The audiotools module can be used via the CLI for performing rendering of audio files or used as a library by importing the functions in a python script.

		This notebook contains a few commandline examples and a brief example of how to use the functions in an interactive python session (like this notebook) which can be also similarly used in a standalone python script.

		%% Cell type:markdown id: tags:


		# Command-line interface / renderer

		The CLI can be used by running the python module: `python -m ivas_processing_scripts.audiotools --help`.

		<details>
		<summary>Click to expand...</summary>

		```bash
		❯ python -m ivas_processing_scripts.audiotools --help
		usage: __main__.py [-h] -i INPUT -if IN_FMT [-is IN_FS] [-ifc IN_CUTOFF] [-ihp] [-iw IN_WINDOW] [-it PRE_TRIM POST_TRIM] [-ipn] [-id IN_DELAY] [-il IN_LOUDNESS] [-inf IN_LOUDNESS_FMT]
		[-im IN_META [IN_META ...]] -o OUTPUT [-of OUT_FMT] [-os OUT_FS] [-ofc OUT_CUTOFF] [-ohp] [-ow OUT_WINDOW] [-ot PRE_TRIM POST_TRIM] [-opn] [-od OUT_DELAY] [-ol OUT_LOUDNESS]
		[-onf OUT_LOUDNESS_FMT] [-lm] [-t TRAJECTORY] [-bd BIN_DATASET] [-bl BIN_LFE_GAIN] [-l] [-L] [-mp]
		usage: __main__.py [-h] -i INPUT -if IN_FMT [-is IN_FS] [-ifc IN_CUTOFF] [-imk IN_MASK] [-iw IN_WINDOW] [-ix PRE_TRIM POST_TRIM] [-it IN_TRAJECTORY] [-ipn] [-id IN_DELAY] [-il IN_LOUDNESS] [-inf IN_LOUDNESS_FMT] [-im IN_META [IN_META ...]] -o OUTPUT
		[-of OUT_FMT] [-os OUT_FS] [-ofc OUT_CUTOFF] [-omk OUT_MASK] [-ow OUT_WINDOW] [-ox PRE_TRIM POST_TRIM] [-ot OUT_TRAJECTORY] [-opn] [-od OUT_DELAY] [-ol OUT_LOUDNESS] [-onf OUT_LOUDNESS_FMT] [-lm] [-bd BIN_DATASET] [-bl BIN_LFE_GAIN]
		[-mnru MNRU_Q] [-esdru ESDRU_ALPHA] [-l] [-L] [-mp]

		Audiotools: Convert/Manipulate spatial audio files.

		options:
		optional arguments:
		-h, --help show this help message and exit

		Input (pre-) processing options:
		-i INPUT, --in INPUT Path to *.{wav, pcm, raw} file or directory
		-if IN_FMT, --in_fmt IN_FMT
		Audio format (use -l, --list for a list / -L, --long for a detailed list)
		-is IN_FS, --in_fs IN_FS
		Sampling rate (Hz) (deduced for .wav input, same as input if output not specified, default = 48000)
		-ifc IN_CUTOFF, --in_cutoff IN_CUTOFF
		Cut-off frequency for low-pass filtering (default = None)
		-ihp, --in_hp50 Apply 50 Hz high-pass filtering (default = False)
		-imk IN_MASK, --in_mask IN_MASK
		Apply filtering with mask (HP50, 20KBP or None; default = None)
		-iw IN_WINDOW, --in_window IN_WINDOW
		Window the start/end of the signal by this amount in milliseconds (default = None)
		-it PRE_TRIM POST_TRIM, --in_trim PRE_TRIM POST_TRIM
		-ix PRE_TRIM POST_TRIM, --in_trim PRE_TRIM POST_TRIM
		Pre-/post-trim the signal by this amount in milliseconds (negative values pad silence), (default = None)
		-it IN_TRAJECTORY, --in_trajectory IN_TRAJECTORY
		Head-tracking trajectory file for input pre-rotation or binaural output (default = None)
		-ipn, --in_pad_noise Flag for padding with noise instead of zeros
		-id IN_DELAY, --in_delay IN_DELAY
		Delay the signal by this amount in milliseconds (negative values advance, default = None)
		-il IN_LOUDNESS, --in_loudness IN_LOUDNESS
		Normalize to given loudness with BS 1770-4 (default = None)
		-inf IN_LOUDNESS_FMT, --in_loudness_fmt IN_LOUDNESS_FMT
		Format used for loudness computation (only valid with with -il/--in_loudness, default = IN_FMT)
		-im IN_META [IN_META ...], --in_meta IN_META [IN_META ...]
		list of input metadata files (only relevant for ISM and MASA input)

		Output (post-) processing options:
		-o OUTPUT, --out OUTPUT
		Path to *.{wav, pcm, raw} file or directory
		-of OUT_FMT, --out_fmt OUT_FMT
		Audio format (use -l, --list for a list / -L, --long for a detailed list)
		-os OUT_FS, --out_fs OUT_FS
		Sampling rate (Hz) (deduced for .wav input, same as input if output not specified, default = 48000)
		-ofc OUT_CUTOFF, --out_cutoff OUT_CUTOFF
		Cut-off frequency for low-pass filtering (default = None)
		-ohp, --out_hp50 Apply 50 Hz high-pass filtering (default = False)
		-omk OUT_MASK, --out_mask OUT_MASK
		Apply filtering with mask (HP50, 20KBP or None; default = None)
		-ow OUT_WINDOW, --out_window OUT_WINDOW
		Window the start/end of the signal by this amount in milliseconds (default = None)
		-ot PRE_TRIM POST_TRIM, --out_trim PRE_TRIM POST_TRIM
		-ox PRE_TRIM POST_TRIM, --out_trim PRE_TRIM POST_TRIM
		Pre-/post-trim the signal by this amount in milliseconds (negative values pad silence), (default = None)
		-ot OUT_TRAJECTORY, --out_trajectory OUT_TRAJECTORY
		Head-tracking trajectory file for input pre-rotation or binaural output (default = None)
		-opn, --out_pad_noise
		Flag for padding with noise instead of zeros
		-od OUT_DELAY, --out_delay OUT_DELAY
		Delay the signal by this amount in milliseconds (negative values advance, default = None)
		-ol OUT_LOUDNESS, --out_loudness OUT_LOUDNESS
		Normalize to given loudness with BS 1770-4 (default = None)
		-onf OUT_LOUDNESS_FMT, --out_loudness_fmt OUT_LOUDNESS_FMT
		Format used for loudness computation (only valid with with -ol/--out_loudness, default = OUT_FMT)
		-lm, --limit Apply limiting to output (default = False)
		-t TRAJECTORY, --trajectory TRAJECTORY
		Head-tracking trajectory file for binaural output (default = None)
		-bd BIN_DATASET, --bin_dataset BIN_DATASET
		Use a custom binaural dataset (see README.md and audiotools/binaural_datasets/README.txt for further information)
		-bl BIN_LFE_GAIN, --bin_lfe_gain BIN_LFE_GAIN
		Render LFE to binaural output with the specified gain (only valid for channel-based input, default = None)
		Render LFE to binaural output with the specified gain (only valid for channel-based input, default = 1.8836490894898006)
		-mnru MNRU_Q, --mnru_q MNRU_Q
		Flag for MNRU processing
		-esdru ESDRU_ALPHA, --esdru_alpha ESDRU_ALPHA
		Flag for ESDRU processing

		General options:
		-l, --list list all supported audio formats and exit
		-L, --long list all supported audio formats with long description and exit
		-mp, --multiprocessing
		Enable multiprocessing (default = False)
		Enable multiprocessing (default = False))
		```

		</details>

		Please refer to the README.md and `--help` for a more detailed description of possible arguments and their usage.



		## Example command line usage

		```bash
		# Rendering a HOA3 scene to loudspeakers:
		python -m ivas_processing_scripts.audiotools -i input_hoa3.wav -if HOA3 -of 7_1_4 -o output_7_1_4.wav

		# Rendering 4 ISM objects to binaural with a head rotation trajectory:
		python -m ivas_processing_scripts.audiotools -i input_ism4.wav -if ISM4 -im obj1.csv obj2.csv obj3.csv obj4.csv -of BINAURAL -t trajectory.csv -o output_binaural_headtracked.wav

		# Rendering a directory of 5_1 files to binaural with output loudness normalization and parallel processing enabled:
		python -m ivas_processing_scripts.audiotools -i stereo_input/ -if 5_1 -of BINAURAL -o output_binaural_norm/ -ol -26 -mp
		```

		%% Cell type:markdown id: tags:

		# Usage in an interactive python session

		## Importing the module
		The module, its submodules and functions may be imported just like any other python package. To make the module available in any directory, the `PYTHONPATH` must be modified.

		The module, its submodules and functions may be imported just like any other python package. To make the module available in _any_ directory, the `PYTHONPATH` must be modified.

		The recommended way to do this is to add the following lines at the top of a script which requires this module:
		(refer https://docs.python.org/3/library/sys.html#sys.path)

		```python
		import sys
		sys.path.append("/path/to/this/repository")
		import ivas_processing_scripts.audiotools # import can now be resolved
		```

		An alternative is to modify the shell environment before calling the python interpreter, but this is left to the reader to try. The above solution is cross-platform.

		%% Cell type:markdown id: tags:

		## Example usage

		%% Cell type:code id: tags:

		``` python
		import sys

		sys.path.append("..") # in a real script, this would ideally be an absolute path
		```

		%% Cell type:markdown id: tags:

		Reading audio and applying basic functions:

		%% Cell type:code id: tags:

		``` python
		from ivas_processing_scripts.audiotools.audiofile import read
		from ivas_processing_scripts.audiotools.audioarray import delay, getdelay

		audio_hoa3, fs = read("../tests/data/spectral/spectral_test_16ch_48kHz.wav")

		# delay by 20ms
		audio_hoa3_delayed = delay(audio_hoa3, fs, 20)

		# compute the delay between the two audio arrays
		print(f"Delay is {getdelay(audio_hoa3, audio_hoa3_delayed)} samples")
		```

		%% Output


		%% Cell type:markdown id: tags:

		For more convenient manipulation of audio, the `audio` python file offers the base class `Audio` upon which the derived classes `BinauralAudio`, `ChannelBasedAudio`, `MetadataAssistedSpatialAudio`, `ObjectBasedAudio`, `SceneBasedAudio` are implemented.

		To instantiate a class object, the convenience functions ("factory" methods) `fromtype()`, `fromarray()` and `fromfile()` are available:

		%% Cell type:code id: tags:

		``` python
		from ivas_processing_scripts.audiotools.audio import (
		Audio,
		ChannelBasedAudio,
		SceneBasedAudio,
		)
		from ivas_processing_scripts.audiotools.audio import fromarray

		hoa3 = fromarray("HOA3", audio_hoa3, fs)

		print(hoa3)
		print(f"Is Audio instance? {isinstance(hoa3, Audio)}")
		print(f"Is SceneBasedAudio instance? {isinstance(hoa3, SceneBasedAudio)}")
		print(f"Is ChannelBasedAudio instance? {isinstance(hoa3, ChannelBasedAudio)}")
		```

		%% Output





		%% Cell type:markdown id: tags:

		The audio object allows usage of further functions, which accept an instance of `Audio` (i.e. derived classes), such as the ITU filter wrapper (filter executable must be in "../bin" or PATH!):

		%% Cell type:code id: tags:

		``` python
		from ivas_processing_scripts.audiotools.wrappers.filter import lpfilter_itu

		# the lpfilter_itu function accepts an audio object
		# and a cutoff frequency as arguments
		try:
		hoa3_lp4k = lpfilter_itu(hoa3, 4000)
		except ValueError as e:
		print(f"ValueError encountered : {e}")

		# try with a supported cut-off
		hoa3_lp3k5 = lpfilter_itu(hoa3, 3500)
		print(f"Input audio: {hoa3.audio}")
		print(
		f"Filtered audio: {hoa3_lp3k5}"
		) # the function returns the filtered array, not an object
		```

		%% Output




		%% Cell type:markdown id: tags:

		The object-based approach allows easier manipulation of audio since the necessary values for manipulation are available as attributes. A non-concrete object may also be created to be filled in with data later:

		%% Cell type:code id: tags:

		``` python
		from ivas_processing_scripts.audiotools.audio import fromtype

		mc_714 = fromtype("7_1_4")

		print(mc_714)
		```

		%% Output


		%% Cell type:markdown id: tags:

		The conversion routines are implemented in the `convert` submodule of `audiotools`. These accept two audio objects - one as an input (a "concrete" object) and another as output to be filled-in ("hollow"). Using the concrete HOA3 audio object we instantiated from an array, and the hollow 7_1_4 object, we can use the `convert_channelbased()` function to perform a conversion:

		%% Cell type:code id: tags:

		``` python
		from ivas_processing_scripts.audiotools.convert.scenebased import convert_scenebased

		convert_scenebased(hoa3, mc_714)

		print(mc_714)
		```

		%% Output


		%% Cell type:markdown id: tags:

		The `convert_scenebased()` function was already provided with all of the information that was required to perform a conversion from the input format to the output format since they were all class attributes. Under the hood the function checked the type of output audio, computed the necessary rendering matrix using the loudspeaker positions, applied the transformation and set the audio array of the output object.

		%% Cell type:markdown id: tags:

		A more advanced example with a generator for performing operations on a framewise basis:

		%% Cell type:code id: tags:

		``` python
		import numpy as np
		from ivas_processing_scripts.audiotools.audioarray import framewise_io

		frame_len = 960 # 20ms at 48 kHz
		input = audio_hoa3
		output = np.zeros_like(input)

		for frame_idx, (frame_in, frame_out) in framewise_io(
		input, output, frame_len, zero_pad=True
		):
		# example of an operation involving the input frame
		frame_out[:] += (
		frame_in[:]
		* 0.5
		* np.sin(2 * np.pi * frame_idx)
		* np.random.rand(*frame_in.shape)
		)
		```

		%% Cell type:markdown id: tags:


		This concludes the overview of the audiotools module. For readers interested in implementing scripts based on this module, it is recommended to run a debugging session for an example commandline above (either using an IDE or `python -m pdb -m ivas_processing_scripts.audiotools ...`) and examine the functions used, along with a read through of the source code.

		A listing of each file in the module with a description is below for reference:

		<details>
		<summary>Click to expand...</summary>

		```bash
		.
		├── __init__.py
		├── __main__.py # entry point for CLI
		├── audio.py # implementation of Audio base class and derived classes
		├── audioarray.py # functions to manipulate numpy audio arrays
		├── audiofile.py # functions to manipulate audio files
		├── binaural_datasets
		│ ├── __init__.py
		│ ├── binaural_dataset.py # reading and parsing of binaural datasets
		│ └── README.txt
		├── binauralobjectrenderer.py # reference binaural rendering algorithm for object based audio
		├── constants.py # submodule shared constants
		├── convert
		│ ├── __init__.py # TODO rename: conversion module
		│ ├── __init__.py # conversion module
		│ ├── binaural.py # binaural audio related conversions
		│ ├── channelbased.py # channel based audio related conversions
		│ ├── masa.py # MASA related conversions (relies on wrappers.masaRenderer)
		│ ├── objectbased.py # object based audio related conversions
		│ ├── omasa.py
		│ ├── osba.py
		│ └── scenebased.py # scene based audio related conversions
		├── EFAP.py # edge-fading amplitude panning implementation
		├── metadata.py # TODO rename: scene description / composite audio format
		├── metadata.py # scene description files and metadata handling
		├── quaternions.py
		├── rotation.py # rotation related functions
		├── utils.py # TODO remove? module convenience functions
		├── utils.py # module convenience functions
		└── wrappers
		├── __init__.py
		├── bs1770.py # wrapper for ITU STL bs1770demo
		├── dlyerr_2_errpat.py
		├── eid_xor.py
		├── esdru.py
		├── filter.py # wrapper for ITU STL filter
		└── masaRenderer.py # wrapper for MASA reference software masaRenderer
		├── gen_patt.py
		├── masaAnalyzer.py # wrapper for MASA reference software masaAnalyzer
		├── masaRenderer.py # wrapper for MASA reference software masaRenderer
		├── networkSimulator.py
		├── p50fbmnru.py
		├── random_seed.py
		└── reverb.py
		```

		</details>

experiments/selection_isar/BS1534-1a/config/BS1534-1a.yml

+0 −5

Original line number	Diff line number	Diff line
		@@ -55,7 +55,6 @@ conditions_to_generate:
		cod:
		dec:
		fmt: "BINAURAL"
		trajectory: "/Users/tmu/git/ivas-processing-scripts/experiments/selection_isar/BS1534-1a/config/pre.csv"
		c04:
		type: ivas_transcoding
		bitrates:
		@@ -63,7 +62,6 @@ conditions_to_generate:
		cod:
		dec:
		fmt: "BINAURAL"
		trajectory: "/Users/tmu/git/ivas-processing-scripts/experiments/selection_isar/BS1534-1a/config/pre.csv"
		trans_bitrate: 256000
		trans_cod:
		fmt: "STEREO"
		@@ -76,12 +74,10 @@ conditions_to_generate:
		cod:
		dec:
		fmt: "BINAURAL_SPLIT_CODED"
		trajectory: "/Users/tmu/git/ivas-processing-scripts/experiments/selection_isar/BS1534-1a/config/pre.csv"
		split_rend:
		fmt: "BINAURAL"
		bitrate: 512000
		dof: 3
		trajectory: "/Users/tmu/git/ivas-processing-scripts/experiments/selection_isar/BS1534-1a/config/post.csv"

		################################################
		### Post-processing
		@@ -90,4 +86,3 @@ postprocessing:
		fmt: "BINAURAL"
		fs: 48000
		loudness: -26
		out_trajectory: "/Users/tmu/git/ivas-processing-scripts/experiments/selection_isar/BS1534-1a/config/post.csv"

experiments/selection_isar/BS1534-2a/config/BS1534-2a.yml

+0 −5

Original line number	Diff line number	Diff line
		@@ -62,7 +62,6 @@ conditions_to_generate:
		fmt: "MASA2DIR1"
		dec:
		fmt: "BINAURAL"
		trajectory: "/Users/tmu/git/ivas-processing-scripts/experiments/selection_isar/BS1534-2a/config/pre.csv"
		c04:
		type: ivas_transcoding
		bitrates:
		@@ -71,7 +70,6 @@ conditions_to_generate:
		fmt: "MASA2DIR1"
		dec:
		fmt: "BINAURAL"
		trajectory: "/Users/tmu/git/ivas-processing-scripts/experiments/selection_isar/BS1534-2a/config/pre.csv"
		trans_bitrate: 256000
		trans_cod:
		fmt: "STEREO"
		@@ -85,12 +83,10 @@ conditions_to_generate:
		fmt: "MASA2DIR1"
		dec:
		fmt: "BINAURAL_SPLIT_CODED"
		trajectory: "/Users/tmu/git/ivas-processing-scripts/experiments/selection_isar/BS1534-2a/config/pre.csv"
		split_rend:
		fmt: "BINAURAL"
		bitrate: 512000
		dof: 3
		trajectory: "/Users/tmu/git/ivas-processing-scripts/experiments/selection_isar/BS1534-2a/config/post.csv"

		################################################
		### Post-processing
		@@ -99,4 +95,3 @@ postprocessing:
		fmt: "BINAURAL"
		fs: 48000
		loudness: -26
		out_trajectory: "/Users/tmu/git/ivas-processing-scripts/experiments/selection_isar/BS1534-2a/config/post.csv"

experiments/selection_isar/BS1534-3a/config/BS1534-3a.yml

+0 −5

Original line number	Diff line number	Diff line
		@@ -55,7 +55,6 @@ conditions_to_generate:
		cod:
		dec:
		fmt: "BINAURAL"
		trajectory: "/Users/tmu/git/ivas-processing-scripts/experiments/selection_isar/BS1534-3a/config/pre.csv"
		c04:
		type: ivas_transcoding
		bitrates:
		@@ -63,7 +62,6 @@ conditions_to_generate:
		cod:
		dec:
		fmt: "BINAURAL"
		trajectory: "/Users/tmu/git/ivas-processing-scripts/experiments/selection_isar/BS1534-3a/config/pre.csv"
		trans_bitrate: 256000
		trans_cod:
		fmt: "STEREO"
		@@ -76,12 +74,10 @@ conditions_to_generate:
		cod:
		dec:
		fmt: "BINAURAL_SPLIT_CODED"
		trajectory: "/Users/tmu/git/ivas-processing-scripts/experiments/selection_isar/BS1534-3a/config/pre.csv"
		split_rend:
		fmt: "BINAURAL"
		bitrate: 512000
		dof: 3
		trajectory: "/Users/tmu/git/ivas-processing-scripts/experiments/selection_isar/BS1534-3a/config/post.csv"

		################################################
		### Post-processing
		@@ -90,4 +86,3 @@ postprocessing:
		fmt: "BINAURAL"
		fs: 48000
		loudness: -26
		out_trajectory: "/Users/tmu/git/ivas-processing-scripts/experiments/selection_isar/BS1534-3a/config/post.csv"