Merge branch 'main' of forge.3gpp.org:ivas-codec-pc/ivas-processing-scripts... (5a18ed43) · Commits · IVAS Codec Public Collaboration / IVAS Processing Scripts

.gitignore

+5 −5

Original line number	Diff line number	Diff line
		@@ -6,17 +6,17 @@ venv/
		.vscode/
		.idea/
		.DS_Store
		*.wav
		!tests/data/*/.wav
		*.pcm
		*.bs
		*.192
		mc.double
		proc_input/*.wav
		proc_input/*.pcm
		proc_output/
		experiments/selection//proc_input/.wav
		experiments/selection//proc_input/.pcm
		experiments/selection/*/proc_output/
		experiments/selection//config/cat-lab_.yml
		*~
		tests/tmp_output_*
		tests/temp_output_*
		tests/cut
		tests/ref
		tests/concatenation_folder
		No newline at end of file

.gitlab-ci.yml

+41 −3

Original line number	Diff line number	Diff line
		@@ -6,6 +6,17 @@ default:
		interruptible: true


		workflow:
		rules:
		# see https://docs.gitlab.com/ee/ci/yaml/workflow.html#switch-between-branch-pipelines-and-merge-request-pipelines
		- if: $CI_COMMIT_BRANCH && $CI_OPEN_MERGE_REQUESTS && $CI_PIPELINE_SOURCE == "push"
		when: never
		- if: $CI_PIPELINE_SOURCE == 'merge_request_event'
		- if: $CI_PIPELINE_SOURCE == 'push' && $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH # Pushes to main
		when: never
		- if: $CI_PIPELINE_SOURCE == 'web' # for testing


		stages:
		- check
		- test
		@@ -14,10 +25,10 @@ stages:

		# print some info on the runner setup
		.print-common-info: &print-common-info
		- echo "--------------------------------------------\n"
		- echo "--------------------------------------------"
		- python3 --version
		- pip3 freeze \| grep "numpy\\|scipy\\|YAML\\|pytest\\|black\\|isort\\|flake8"
		- echo "--------------------------------------------\n\n"
		- echo "--------------------------------------------"


		# script anchor for updating the codec repo
		@@ -26,7 +37,12 @@ stages:
		# NOTE: CODEC_DIR has to be in PATH
		- cd $CODEC_DIR
		# make sure that we are at latest main
		- git pull
		# TODO: temporarily use the RC1a tag
		- git restore .
		- git checkout 20230511-RC1a-listening-tests
		- echo "--------------------------------------------"
		- echo "Building codec on commit $(git rev-parse HEAD --short)"
		- echo "--------------------------------------------"
		# only builds if code has actually changed
		- make -j
		- cd $dir
		@@ -57,9 +73,29 @@ test_audiotools_convert:
		- *print-common-info
		- python3 -m pytest -n auto tests/test_audiotools_convert.py

		# run the test configs for the selection experiments
		experiments:
		stage: test
		tags:
		- linux
		script:
		- *print-common-info
		- *get-codec-binaries
		- python3 -m pytest tests/test_experiments.py::test_generate_test_items -n auto \| tee log.txt
		artifacts:
		paths:
		- experiments/selection//proc_output/.log
		- log.txt
		when: on_failure
		expire_in: 1 week

		# run some test configs for item creation
		test_processing:
		stage: test
		rules:
		- if: $CI_PIPELINE_SOURCE == 'merge_request_event'
		- if: $CI_PIPELINE_SOURCE == 'push'
		when: never
		tags:
		- linux
		script:
		@@ -73,6 +109,7 @@ test_processing:

		lint:
		stage: analyze
		needs: []
		tags:
		- linux
		allow_failure: true
		@@ -81,6 +118,7 @@ lint:

		format:
		stage: analyze
		needs: []
		variables:
		ARTIFACT_BASE_NAME: "mr-$CI_MERGE_REQUEST_IID--sha-$CI_COMMIT_SHORT_SHA--formatting-fix"
		ARTIFACT_FOLDER: "formatting-patch"

README.md

+68 −10

Original line number	Diff line number	Diff line
		@@ -47,6 +47,13 @@

		---

		# Item generation

		The `item_generation_scripts` module may be used to generate audio items for the P.800 listening test according to the scene description. All scenes must be fully described in the `SCENE.yml` file. The module takes monophonic audio
		files from the specified input directory as the input and store the audio items in the requested format specification to the specified output directory. The module also generates the associated metadata files in case the target format requires so.

		This module may be executed from the command-line with `python -m ivas_processing_scripts.generation item_gen_configs/SCENE.YML`.

		# Listening test generation

		The `ivas_processing_scripts` module helps to quickly setup listening tests with multiple (pre-)processing and post-processing options.
		@@ -133,6 +140,11 @@ postprocessing:
		# delete_tmp: true
		### Master seed for random processes like bitstream error pattern generation; default = 0
		# master_seed: 5
		### Additional seed to specify number of preruns (used for background noise delay and FER bitstream processing); default = 0
		# prerun_seed: 2
		### flag for linux to use windows-built binaries with wine: default = false
		### this requires the wine binary to be available and will be ignored on windows
		# use_windows_codec_binaries: true

		### Any relative paths will be interpreted relative to the working directory the script is called from!
		### Usage of absolute paths is recommended.
		@@ -163,6 +175,10 @@ output_path: "./tmp_output"
		### searches for the specified substring in found filenames; default = null
		# input_select:
		# - "48kHz"

		### Include the condition number in the item name; default = false
		### for e.g. abcxyz.wav --> abcxyz.cXX.wav
		# condition_in_output_filename: true
		```

		</details>
		@@ -200,7 +216,7 @@ input:
		### Target loudness in LKFS; default = null (no loudness change applied)
		# loudness: -26
		### Spatial audio format in which loudness is adjusted (only used if preprocessing loudness is not null);
		### default = null (uses preprocessing fmt if possible)
		### default = null (uses postprocessing fmt)
		# loudness_fmt: "BINAURAL"
		### Pre-/post-trim individual signal(s) (ms) (negative values pad silence); default = 0
		# trim:
		@@ -237,12 +253,15 @@ input:
		# preamble_noise: true
		### Additive background noise
		# background_noise:
		### REQUIRED: SNR for background noise in dB
		### SNR for background noise in dB; REQUIRED for prerecorded background noise and ignored for low level noise
		# snr: 10
		### REQUIRED: Path to background noise, must have same format and sampling rate as input signal(s)
		### REQUIRED: Either background noise path or low level noise flag
		### Path to background noise, must have same format and sampling rate as input signal(s); default = null
		# background_noise_path: ".../noise.wav"
		### Seed for delay offest; default = 0
		# seed_delay: 10
		### Flag for using low level [-4,+4] background noise; default = false
		# low_level_noise: true
		### Flag for repeating the whole signal once and discarding the first half after processing
		# repeat_signal: true
		```

		</details>
		@@ -275,8 +294,6 @@ input:
		# error_pattern: "path/pattern.192"
		### Error rate in percent
		# error_rate: 5
		### Additional seed to specify number of preruns; default = 0
		# prerun_seed: 2
		```
		</details>

		@@ -348,6 +365,8 @@ conditions_to_generate:
		# fs: 48000
		### Additional commandline options; default = null
		# opts: ["-q", "-no_delay_cmp"]
		### Option to use SBA format of lower or same order (planar also possible) for SBA input formats
		# sba_fmt: "PLANARFOA"

		### IVAS condition ###############################
		c07:
		@@ -375,6 +394,8 @@ conditions_to_generate:
		# fs: 48000
		### Additional commandline options; default = null
		# opts: ["-q", "-no_delay_cmp"]
		### Option to use SBA format of lower or same order (planar also possible) for SBA input formats
		# sba_fmt: "PLANARFOA"

		### EVS condition ################################
		c08:
		@@ -398,6 +419,8 @@ conditions_to_generate:
		bin: ~/git/ivas-codec/EVS_dec
		### Decoder output sampling rate; default = null (same as input)
		# fs: 48000
		### Option to use SBA format of lower or same order (planar also possible) for SBA input formats
		# sba_fmt: "PLANARFOA"
		```

		</details>
		@@ -470,6 +493,7 @@ This configuration has to match the channel configuration. If the provided list
		For the encoding stage `cod` and the decoding stage `dec`, the path to the IVAS_cod and IVAS_dec binaries can be specified under the key `bin`.
		Additionally some resampling can be applied by using the key `fs` followed by the desired sampling rate.
		The general bitstream processing configuration can be locally overwritten for each EVS and IVAS condition with the key `tx`.
		For IVAS and EVS conditions the `sba_fmt` key is available to specify a SBA format of lower or same order compared to the input for SBA input formats.
		The additional key `evs_lfe_9k6bps_nb` is only available for EVS conditions and ensures a bitrate of 9.6kbps and narrow band processing of the LFE channel(s).
		#### IVAS
		The configuration of the IVAS condition is similar to the EVS condition. However, only one bitrate for all channels (and metadata) can be specified.
		@@ -512,15 +536,16 @@ The following additional executables are needed for the different processing ste

		\| Processing step \| Executable \| Where to find \|
		\|-------------------------------------------------\|-----------------------\|-------------------------------------------------------------------------------------------------------------\|
		\| Loudness measurement and adjustment \| bs1770demo \| https://github.com/openitu/STL \|
		\| Loudness measurement and adjustment \| bs1770demo \| https://github.com/ErikNorvell-Ericsson/STL (Note branch) \|
		\| MNRU \| p50fbmnru \| https://github.com/openitu/STL \|
		\| ESDRU \| esdru \| https://github.com/openitu/STL \|
		\| Frame error pattern application \| eid-xor \| https://github.com/openitu/STL \|
		\| Error pattern generation \| gen-patt \| https://www.itu.int/rec/T-REC-G.191-201003-S/en (Note: Version in https://github.com/openitu/STL is buggy!) \|
		\| Reverberation module \| reverb \| https://github.com/openitu/STL \|
		\| Filtering, Resampling \| filter \| https://www.3gpp.org/ftp/tsg_sa/WG4_CODEC/TSGS4_76/docs/S4-131277.zip \|
		\| Random offset/seed generation (necessary for background noise and FER bitstream processing) \| random \| https://www.3gpp.org/ftp/tsg_sa/WG4_CODEC/TSGS4_76/docs/S4-131277.zip \|
		\| JBM network simulator \| networkSimulator_g192 \| https://www.3gpp.org/ftp/tsg_sa/WG4_CODEC/TSGS4_76/docs/S4-131277.zip \|
		\| MASA rendering (also used in loudness measurement of MASA items) \| masaRenderer \| https://www.3gpp.org/ftp/TSG_SA/WG4_CODEC/TSGS4_122_Athens/Docs/S4-230221.zip \|
		\| MASA rendering (also used in loudness measurement of MASA items) \| masaRenderer, masaAnalyzer \| https://www.3gpp.org/ftp/TSG_SA/WG4_CODEC/TSGS4_122_Athens/Docs/S4-230221.zip \|
		\| EVS reference conditions \| EVS_cod, EVS_dec \| https://www.3gpp.org/ftp/Specs/archive/26_series/26.443/26443-h00.zip \|

		The necessary binaries have to be either placed in the [ivas_processing_scripts/bin](./ivas_processing_scripts/bin) folder or the path has to be specified in
		@@ -661,6 +686,39 @@ ISM

		---

		# audiotools CLI for format conversion and rendering
		# Audiotools CLI for format conversion and rendering

		Please refer to [the notebook](./examples/audiotools.ipynb) for an overview.

		# How to generate the configs and process items for the selection test experiments

		The script `generate_test.py` is used to generate config files and process items for the selection test experiments:
		```
		usage: generate_test.py [-h] [--no_parallel] [--create_cfg_only] exp_lab_pairs [exp_lab_pairs ...]

		Generate config files and process files for selecton experiments. Experiment names and lab ids must be given as comma-separated pairs (e.g. 'P800-5,b BS1534-4a,d ...')

		positional arguments:
		exp_lab_pairs The combinations of experiment/lab-id that you want to generate, separated by whitespace. Experiment and lab id need to be separated by a comma.

		options:
		-h, --help show this help message and exit
		--no_parallel If given, configs will not be run in parallel
		--create_cfg_only If given, only create the configs and folder structure without processing items
		```
		Before running the script, one needs to put the input files in the respective input folder (including the background noise files, see below). If input files are missing, the script will complain ad stop. For example, for processing tests P800-3 and BS1534-4a for labs b and d, respectively, command line would look like this (no whitespace between the commas!):
		```
		python3 generate_test.py P800-3,b BS1534-4a,d
		```

		Tests are processed separately per category and per lab (as some values in the configs are dependent on category and lab). For each experiment, a static base config is stored from which the actual configs are generated (identfied by the suffix `catX-lab_Y.yml`). For P800 tests, there are 6 categories each. The BS1534 experiments do not define categories, except for the MASA ones (BAS534-7a/b) - there one might mix FOA and HOA2 input material, so ther eare 2 categories for those in the scripts (category 1 for FOA, category 2 for HOA2). In `experiments/selection/` there is a folder structure prepared for all selection experiments, in which you have to put the input files for your test. For example, for P800-1:
		```
		experiments/selection/P800-1/
		├── background_noise <--- put your background files in here and name them as background_noisecatX.wav. Not all experiments use background noise
		├── config <--- contains base config, generated configs will be stored here, too
		│ ├── P800-1.yml
		├── proc_input
		│ ├── catX <--- put your input files for cat X in here
		└── proc_output <--- collect your output from here, example subfolder below
		│ ├── catX-lab_Y <--- NOTE: this is only generated by the script and not checked in in the repository
		```
		No newline at end of file

examples/TEMPLATE.yml

+41 −11

Original line number	Diff line number	Diff line
		@@ -19,15 +19,21 @@
		# delete_tmp: true
		### Master seed for random processes like bitstream error pattern generation; default = 0
		# master_seed: 5
		### Additional seed to specify number of preruns (used for background noise delay and FER bitstream processing); default = 0
		# prerun_seed: 2
		### flag for linux to use windows-built binaries with wine: default = false
		### this requires the wine binary to be available and will be ignored on windows
		# use_windows_codec_binaries: true

		### Any relative paths will be interpreted relative to the working directory the script is called from!
		### Usage of absolute paths is recommended.
		### Do not use file names with dots "." in them! This is not supported, use "_" instead
		### For Windows user: please use double back slash '\\' in paths and add '.exe' to executable definitions
		### Do not use "tmp_" in file or folder names ("temp_" is fine)
		### For Windows user: please use double back slash '\\' in paths
		### REQUIRED: Input path or file
		input_path: ".../ivas/items/HOA3"
		### REQUIRED: Output path or file
		output_path: ".../tmp_output"
		output_path: ".../temp_output"
		### Metadata path or file(s)
		### If input format is ISM{1-4} a path for the metadata files can be specified;
		### default = null (for ISM search for item_name.{wav, raw, pcm}.{0-3}.csv in input folder, otherise ignored)
		@@ -50,6 +56,10 @@ output_path: ".../tmp_output"
		# input_select:
		# - "48kHz"

		### Include the condition number in the item name; default = false
		### for e.g. abcxyz.wav --> abcxyz.cXX.wav
		# condition_in_output_filename: true

		################################################
		### Input configuration
		################################################
		@@ -58,6 +68,13 @@ input:
		fmt: "HOA3"
		### Input sampling rate in Hz needed for headerless audio files; default = 48000
		# fs: 32000
		### Enable check for input files being aligned to a integer multiple of a given length in ms.
		### If a file is not aligned, a warning will be issued. If the input format has metadata or force is true, an error is raised instead.
		# aligned_to:
		### alignment length in ms, is needed if aligned_to is used
		# len: 20
		### default: false
		# force: true

		################################################
		### Pre-processing on individual items
		@@ -74,7 +91,7 @@ input:
		### Target loudness in LKFS; default = null (no loudness change applied)
		# loudness: -26
		### Spatial audio format in which loudness is adjusted (only used if preprocessing loudness is not null);
		### default = null (uses preprocessing fmt if possible)
		### default = null (uses postprocessing fmt)
		# loudness_fmt: "BINAURAL"
		### Pre-/post-trim individual signal(s) (ms) (negative values pad silence); default = 0
		# trim:
		@@ -105,14 +122,21 @@ input:
		# preamble: 10000
		### Flag wheter to use noise (amplitude +-4) for the preamble or silence; default = false (silence)
		# preamble_noise: true
		### Specify postamble duration in ms. Postamble is added after concatenation and possible signal repetition. defaut = 0
		# postamble: 20
		### Flag wheter to use noise (amplitude +-4) for the postamble or silence; default = false (silence)
		# postamble_noise: true
		### Additive background noise
		# background_noise:
		### REQUIRED: SNR for background noise in dB
		### SNR for background noise in dB; REQUIRED for prerecorded background noise and ignored for low level noise
		# snr: 10
		### REQUIRED: Path to background noise, must have same format and sampling rate as input signal(s)
		### REQUIRED: Either background noise path or low level noise flag
		### Path to background noise, must have same format and sampling rate as input signal(s); default = null
		# background_noise_path: ".../noise.wav"
		### Seed for delay offest; default = 0
		# seed_delay: 10
		### Flag for using low level [-4,+4] background noise; default = false
		# low_level_noise: true
		### Flag for repeating the whole signal once and discarding the first half after processing
		# repeat_signal: true

		#################################################
		### Bitstream processing
		@@ -139,8 +163,6 @@ input:
		# error_pattern: "path/pattern.192"
		### Error rate in percent
		# error_rate: 5
		### Additional seed to specify number of preruns; default = 0
		# prerun_seed: 2

		################################################
		### Configuration for conditions under test
		@@ -209,6 +231,8 @@ conditions_to_generate:
		### Bitstream options
		# tx:
		### For possible arguments see overall bitstream modification
		### Option to use SBA format of lower or same order (planar also possible) for SBA input formats
		# sba_fmt: "PLANARFOA"

		### IVAS condition ###############################
		c07:
		@@ -239,6 +263,8 @@ conditions_to_generate:
		### Bitstream options
		# tx:
		### For possible arguments see overall bitstream modification
		### Option to use SBA format of lower or same order (planar also possible) for SBA input formats
		# sba_fmt: "PLANARFOA"

		### EVS condition ################################
		c08:
		@@ -251,7 +277,7 @@ conditions_to_generate:
		# - 9600
		- [13200, 13200, 8000, 13200, 9600]
		### for multi-channel configs, code LFE with 9.6 kbps NB (as mandated by IVAS-3)
		evs_lfe_9k6bps_nb: true
		# evs_lfe_9k6bps_nb: true
		### Encoder options
		cod:
		### Path to encoder binary; default search for EVS_cod in bin folder (primary) and PATH (secondary)
		@@ -267,6 +293,8 @@ conditions_to_generate:
		### Bitstream options
		# tx:
		### For possible arguments see overall bitstream modification
		### Option to use SBA format of lower or same order (planar also possible) for SBA input formats
		# sba_fmt: "PLANARFOA"

		################################################
		### Post-processing
		@@ -274,7 +302,9 @@ conditions_to_generate:
		### Post-processing step performed after core processing for all conditions
		### Post-processing is required and can not be omitted
		postprocessing:
		### REQUIRED: Target format for output
		### REQUIRED: Target format for output, this can be a string as below, or a list, e.g. ["FOA", "BINAURAL"].
		### Conversion will be applied in order and the last format is the final output forma. This was introduced to
		### accomodate for the MASA tests where masaRenderer is used as binaural renderer for all conditions.
		fmt: "BINAURAL"
		### REQUIRED: Target sampling rate in Hz for resampling
		fs: 48000

experiments/selection/BS1534-1a/config/BS1534-1a.yml

0 → 100644

+101 −0

Original line number	Diff line number	Diff line
		---
		################################################
		# General configuration
		################################################

		name: BS1534-1a
		master_seed: 5
		prerun_seed: 2

		input_path: "experiments/selection/BS1534-1a/proc_input"
		output_path: "experiments/selection/BS1534-1a/proc_output"
		use_windows_codec_binaries: true
		condition_in_output_filename: true

		################################################
		### Input configuration
		################################################
		input:
		fmt: "STEREO"
		fs: 48000
		aligned_to:
		len: 20

		################################################
		### Pre-processing on individual items
		################################################
		preprocessing:
		mask: "20KBP"
		loudness: -26
		window: 100

		################################################
		### Pre-processing on whole signal(s)
		################################################
		preprocessing_2:
		concatenate_input: false
		preamble_noise: false
		postamble: 20
		postamble_noise: true
		repeat_signal: true

		#################################################
		### Bitstream processing
		#################################################

		################################################
		### Configuration for conditions under test
		################################################
		conditions_to_generate:
		### Reference and anchor conditions ##########################
		c01:
		type: ref
		c02:
		type: lp7k

		### EVS condition ################################
		c03:
		type: evs
		bitrates:
		- 24400
		cod:
		opts: ["-max_band", "FB"]
		dec:
		c04:
		type: evs
		bitrates:
		- 32000
		cod:
		opts: ["-max_band", "FB"]
		dec:
		c05:
		type: evs
		bitrates:
		- 48000
		cod:
		opts: ["-max_band", "FB"]
		dec:

		### IVAS condition ###############################
		c06:
		type: ivas
		bitrates:
		- 48000
		cod:
		dec:
		fmt: "STEREO"
		c07:
		type: ivas
		bitrates:
		- 64000
		cod:
		dec:
		fmt: "STEREO"

		################################################
		### Post-processing
		################################################
		postprocessing:
		fmt: "STEREO"
		fs: 48000
		loudness: -26