Merge branch '96-missing-support-for-mc-format-in-item-generation-scripts' into 'main' (f3c848c2) · Commits · IVAS Codec Public Collaboration / IVAS Processing Scripts

examples/ITEM_GENERATION_5_1_4.yml

0 → 100644

+177 −0

Original line number	Diff line number	Diff line
		---
		################################################
		# Item generation - General configuration
		################################################

		### Any relative paths will be interpreted relative to the working directory the script is called from!
		### Usage of absolute paths is recommended.
		### Do not use file names with dots "." in them! This is not supported, use "_" instead
		### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions

		### Output format
		format: "5_1_4"
		# masa_tc: 1 # applicable only to MASA/OMASA format
		# masa_dirs: 1 # applicable only to MASA/OMASA format
		# sba_order: 2 # applicable only to OSBA format

		### Output sampling rate in Hz
		fs: 48000

		### Generate BINAURAL output (_BINAURAL will be appended to the output filename)
		binaural_output: true

		### Normalize target loudness to X LKFS
		loudness: -26

		### Apply pre-amble and post-amble in X seconds
		preamble: 0.0
		postamble: 0.0

		### Apply fade-in and fade-out of X seconds
		fade_in_out: 0.5

		### Trim the output such that the total duration is X seconds
		duration: 8

		### Add low-level random background noise (amplitude +-4) instead of silence; default = false (silence)
		add_low_level_random_noise: false

		### Process with parallel streams
		multiprocessing: false

		################################################
		### Item generation - Filename conventions
		################################################

		### Naming convention for the input mono files
		### The input filenames are represented by:
		### lLLeeettszz.wav
		### where:
		### l stands for the listening lab designator: a (Force Technology), b (HEAD acoustics), c (MQ University), d (Mesaqin.com)
		### LL stands for the language: JP, FR, GE, MA, DA, EN
		### eee stands for the experiment designator: p01, p02, p04, p05, p06, p07, p08, p09
		### tt stands for the talker ID: f1, f2, f3, m1, m2, m3
		### s stands for 'sample' and zz is the sample number; 01, ..., 14

		### Naming convention for the generated output files
		### The output filenames are represented by:
		### leeeayszz.wav
		### The filenames of the accompanying output metadata files (applicable to metadata-assisted spatial audio, object-based audio) are represented by:
		### leeeayszz.met for metadata-assisted spatial audio
		### leeeayszz.wav.o.csv for object-based audio
		### where:
		### l stands for the listening lab designator: a (Force Technology), b (HEAD acoustics), c (MQ University), d (Mesaqin.com)
		### eee stands for the experiment designator: p01, p02, p04, p05, p06, p07, p08, p09
		### a stands 'audio'
		### y is the per-experiment category according to IVAS-8a: 01, 02, 03, 04, 05, 06
		### s stands for sample and zz is the sample number; 01, 02, 03, 04, 05, 06, 07 (07 is the preliminary sample)
		### o stands for the object number; 0, 1, 2, 3

		### File designators, default is "l" for listening lab, "EN" for language, "p07" for experiment and "g" for company
		listening_lab: "l"
		language: "EN"
		exp: "p01"
		provider: "va"

		### Insert prefix for all input filenames (default: "")
		### l stands for the 'listening_lab' designator, L stands for the 'language', e stands for the 'experiment'
		### the number of consecutive letters define the length of each field
		# use_input_prefix: "lLLeee"

		### Insert prefix for all output filenames (default: "")
		### l stands for the 'listening_lab' designator, L stands for the 'language', e stands for the 'experiment'
		### the number of consecutive letters define the length of each field
		# use_output_prefix: "leee"

		################################################
		### Item generation - Scene description
		################################################

		### Each scene shall de described using the following parameters/properties:
		### output: output filename
		### description: textual description of the scene
		### input: input filename(s)
		### IR: filenames(s) of the input IRs
		### azimuth: azimuth in the range [-180,180]; positive values point to the left
		### elevation: elevation in the range [-90,90]; positive values indicate up
		### shift: time adjustment of the input signal (negative value delays the signal)
		### background: background noise filename (if used, the 'add_low_level_random_noise' parameter is ignored)
		### background_level: normalized background noise loudness to X dB LKFS
		###
		### Note 0: you can use relative paths in filenames (the program assumes that the root directory is the parent directory of the ivas_processing_scripts subfolder)
		### Note 1: use brackets [val1, val2, ...] when specifying multiple values
		### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames
		### Note 3: we're using right-handed coordinate system with azimuth = 0 pointing from the nose to the screen

		scenes:
		"01":
		output: "out/s01.wav"
		description: "Car with AB microphone pickup, no overlap between the talkers, car noise."
		input: ["items_mono/untrimmed/f1s4b_Talker2.wav", "items_mono/untrimmed/f2s1a_Talker1.wav"]
		IR: ["IRs/IR_do_p04_e_01_01_FOA.wav", "IRs/IR_do_p04_e_02_01_FOA.wav"]
		shift: [0.0, -1.0]
		background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav"
		background_level: -46

		"02":
		output: "out/s02.wav"
		description: "Car with AB microphone pickup, overlap between the talkers, car noise."
		input: ["items_mono/untrimmed/f1s6a_Talker2.wav", "items_mono/untrimmed/f2s3b_Talker1.wav"]
		IR: ["IRs/IR_do_p04_e_03_01_FOA.wav", "IRs/IR_do_p04_e_04_01_FOA.wav"]
		shift: [0.0, +1.0]
		background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav"
		background_level: -46

		"03":
		output: "out/s03.wav"
		description: "Car with AB microphone pickup, no overlap between the talkers, car noise."
		input: ["items_mono/untrimmed/f3s3a_Talker2.wav", "items_mono/untrimmed/f3s10b_Talker2.wav"]
		IR: ["IRs/IR_do_p04_e_05_01_FOA.wav", "IRs/IR_do_p04_e_06_01_FOA.wav"]
		shift: [0.0, -1.0]
		background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav"
		background_level: -46

		"04":
		output: "out/s04.wav"
		description: "Car with AB microphone pickup, no overlap between the talkers, car noise."
		input: ["items_mono/untrimmed/f2s7b_Talker1.wav", "items_mono/untrimmed/f5s15a_Talker1.wav"]
		IR: ["IRs/IR_do_p04_e_07_01_FOA.wav", "IRs/IR_do_p04_e_08_01_FOA.wav"]
		shift: [0.0, -1.0]
		background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav"
		background_level: -46

		"05":
		output: "out/s05.wav"
		description: "Car with AB microphone pickup, no overlap between the talkers, car noise."
		input: ["items_mono/untrimmed/m2s15a_Talker2.wav", "items_mono/untrimmed/m1s4a_Talker1.wav"]
		IR: ["IRs/IR_do_p04_e_07_01_FOA.wav", "IRs/IR_do_p04_e_01_01_FOA.wav"]
		shift: [0.0, -1.0]
		background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav"
		background_level: -46

		"06":
		output: "out/s06.wav"
		description: "Car with AB microphone pickup, no overlap between the talkers."
		input: ["items_mono/untrimmed/m3s8a_Talker2.wav", "items_mono/untrimmed/m4s13a_Talker1.wav"]
		IR: ["IRs/IR_do_p04_e_03_01_FOA.wav", "IRs/IR_do_p04_e_01_01_FOA.wav"]
		shift: [0.0, -1.0]
		background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav"
		background_level: -46

		"07":
		output: "out/s07.wav"
		description: "Preliminary: Car with AB microphone pickup, no overlap between the talkers."
		input: ["items_mono/untrimmed/f1s20a_Talker2.wav", "items_mono/untrimmed/f5s15b_Talker1.wav"]
		IR: ["IRs/IR_do_p04_e_02_01_FOA.wav", "IRs/IR_do_p04_e_07_01_FOA.wav"]
		shift: [0.0, -1.0]
		background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav"
		background_level: -46

		"08":
		output: "out/s08.wav"
		description: "Car with AB microphone pickup, overlap between the talkers."
		input: ["items_mono/untrimmed/m2s6b_Talker2.wav", "items_mono/untrimmed/f5s14a_Talker1.wav"]
		IR: ["IRs/IR_do_p04_e_08_01_FOA.wav", "IRs/IR_do_p04_e_04_01_FOA.wav"]
		shift: [0.0, +1.0]
		background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav"
		background_level: -46

ivas_processing_scripts/generation/init.py

+11 −0

Original line number	Diff line number	Diff line
		@@ -32,6 +32,10 @@

		import logging

		from ivas_processing_scripts.audiotools.constants import (
		CHANNEL_BASED_AUDIO_ALTNAMES,
		CHANNEL_BASED_AUDIO_FORMATS,
		)
		from ivas_processing_scripts.constants import (
		LOGGER_DATEFMT,
		LOGGER_FORMAT,
		@@ -41,6 +45,7 @@ from ivas_processing_scripts.generation import (
		config,
		generate_ismN_items,
		generate_masa_items,
		generate_mc_items,
		generate_omasa_items,
		generate_osba_items,
		generate_sba_items,
		@@ -100,5 +105,11 @@ def main(args):
		elif "OSBA" in cfg.format:
		# generate OSBA items from FOA/HOA2/HOA3 and MONO items according to scene description
		generate_osba_items.generate_osba_items(cfg, logger)
		elif (
		cfg.format in CHANNEL_BASED_AUDIO_FORMATS.keys()
		or cfg.format in CHANNEL_BASED_AUDIO_ALTNAMES.keys()
		):
		# generate MC items from MONO items according to scene description
		generate_mc_items.generate_mc_items(cfg, logger)

		logger.handlers.clear()

ivas_processing_scripts/generation/generate_mc_items.py

0 → 100644

+485 −0

File added.

Preview size limit exceeded, changes collapsed.