Merge branch 'ericsson/review-item-creation-stereo' into 'main' (90de7c0b) · Commits · IVAS Codec Public Collaboration / IVAS Processing Scripts

item_gen_configs/P800-1.yml

+92 −73

Original line number	Diff line number	Diff line
		@@ -30,12 +30,31 @@ output_path: "experiments/selection/P800-1/proc_input"
		loudness: -26

		### Pre-amble and Post-amble length in seconds (default = 0.0)
		preamble: 1.0
		preamble: 0.5
		postamble: 1.0

		### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence)
		add_low_level_random_noise: true

		### File designators
		listening_lab: "a"
		language: "JP"
		exp: "p01"

		################################################
		### Input files
		################################################

		### <LL><Lang><Exp><TalkerID>s<Sample>.wav
		###
		### With
		###
		### <LL> = a (Force Technology), b (HEAD acoustics),
		### c (MQ University), d (Mesaqin.com)
		### <Lang> = JP, FR, GE, MA, DA, EN
		### <Exp> = p01, p02, p04, p05, p06, p07, p08, p09
		### <TalkerID> = f1, f2, f3, m1, m2, m3
		### <Sample> = 01, …, 14

		################################################
		### Scene description
		@@ -64,254 +83,254 @@ add_low_level_random_noise: true

		scenes:
		cat1_1:
		name: "lp01a1s01"
		name: "a1s01"
		description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers."
		source: ["f1_s1.wav", "m1_s1.wav"]
		source: ["f1s01.wav", "m1s01.wav"]
		IR: ["SAABP01.wav", "SAABP07.wav"]
		overlap: 1.0

		cat1_2:
		name: "lp01a1s02"
		name: "a1s02"
		description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers."
		source: ["m2_s1.wav", "f2_s1.wav"]
		source: ["m2s01.wav", "f2s01.wav"]
		IR: ["SAABP05.wav", "SAABP03.wav"]
		overlap: 1.0

		cat1_3:
		name: "lp01a1s03"
		name: "a1s03"
		description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers."
		source: ["f3_s1.wav", "m3_s1.wav"]
		source: ["f3s01.wav", "m3s01.wav"]
		IR: ["SAABP02.wav", "SAABP06.wav"]
		overlap: 1.0

		cat1_4:
		name: "lp01a1s04"
		name: "a1s04"
		description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers."
		source: ["m1_s2.wav", "f1_s2.wav"]
		source: ["m1s02.wav", "f1s02.wav"]
		IR: ["SAABP04.wav", "SAABP01.wav"]
		overlap: 1.0

		cat1_5:
		name: "lp01a1s05"
		name: "a1s05"
		description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers."
		source: ["f2_s2.wav", "m2_s2.wav"]
		source: ["f2s02.wav", "m2s02.wav"]
		IR: ["SAABP03.wav", "SAABP04.wav"]
		overlap: 1.0

		cat1_6:
		name: "lp01a1s06"
		name: "a1s06"
		description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers."
		source: ["m3_s2.wav", "f3_s2.wav"]
		source: ["m3s02.wav", "f3s02.wav"]
		IR: ["SAABP07.wav", "SAABP02.wav"]
		overlap: 1.0

		cat2_1:
		name: "lp01a2s01"
		name: "a2s01"
		description: "Large anechoic room with AB microphone pickup, no overlap between the talkers."
		source: ["m3_s3.wav", "f3_s3.wav"]
		source: ["m3s03.wav", "f3s03.wav"]
		IR: ["LAABP05.wav", "LAABP11.wav"]
		overlap: -1.0

		cat2_2:
		name: "lp01a2s02"
		name: "a2s02"
		description: "Large anechoic room with AB microphone pickup, no overlap between the talkers."
		source: ["f1_s3.wav", "m1.wav"]
		source: ["f1s03.wav", "m1s03.wav"]
		IR: ["LAABP01.wav", "LAABP06.wav"]
		overlap: -1.0

		cat2_3:
		name: "lp01a2s03"
		name: "a2s03"
		description: "Large anechoic room with AB microphone pickup, no overlap between the talkers."
		source: ["m2_s3.wav", "f2_s3.wav"]
		source: ["m2s03.wav", "f2s03.wav"]
		IR: ["LAABP03.wav", "LAABP07.wav"]
		overlap: -1.0

		cat2_4:
		name: "lp01a2s04"
		name: "a2s04"
		description: "Large anechoic room with AB microphone pickup, no overlap between the talkers."
		source: ["f3_s4.wav", "m3_s4.wav"]
		source: ["f3s04.wav", "m3s04.wav"]
		IR: ["LAABP05.wav", "LAABP08.wav"]
		overlap: -1.0

		cat2_5:
		name: "lp01a2s05"
		name: "a2s05"
		description: "Large anechoic room with AB microphone pickup, no overlap between the talkers."
		source: ["m1_s4.wav", "f1_s4.wav"]
		source: ["m1s04.wav", "f1s04.wav"]
		IR: ["LAABP09.wav", "LAABP07.wav"]
		overlap: -1.0

		cat2_6:
		name: "lp01a2s06"
		name: "a2s06"
		description: "Large anechoic room with AB microphone pickup, no overlap between the talkers."
		source: ["f2_s4.wav", "m2_s4.wav"]
		source: ["f2s04.wav", "m2s04.wav"]
		IR: ["LAABP10.wav", "LAABP09.wav"]
		overlap: -1.0

		cat3_1:
		name: "lp01a3s01"
		name: "a3s01"
		description: "Small anechoic room with MS microphone pickup, no overlap between the talkers."
		source: ["f2_s5.wav", "m2_s5.wav"]
		source: ["f2s05.wav", "m2s05.wav"]
		IR: ["SAMSP01.wav", "SAMSP07.wav"]
		overlap: -1.0

		cat3_2:
		name: "lp01a3s02"
		name: "a3s02"
		description: "Small anechoic room with MS microphone pickup, no overlap between the talkers."
		source: ["m3_s5.wav", "f3_s5.wav"]
		source: ["m3s05.wav", "f3s05.wav"]
		IR: ["SAMSP05.wav", "SAMSP03.wav"]
		overlap: -1.0

		cat3_3:
		name: "lp01a3s03"
		name: "a3s03"
		description: "Small anechoic room with MS microphone pickup, no overlap between the talkers."
		source: ["f1_s5.wav", "m1_s5.wav"]
		source: ["f1s05.wav", "m1s05.wav"]
		IR: ["SAMSP02.wav", "SAMSP06.wav"]
		overlap: -1.0

		cat3_4:
		name: "lp01a3s04"
		name: "a3s04"
		description: "Small anechoic room with MS microphone pickup, no overlap between the talkers."
		source: ["m2_s6.wav", "f2_s6.wav"]
		source: ["m2s06.wav", "f2s06.wav"]
		IR: ["SAMSP04.wav", "SAMSP01.wav"]
		overlap: -1.0

		cat3_5:
		name: "lp01a3s05"
		name: "a3s05"
		description: "Small anechoic room with MS microphone pickup, no overlap between the talkers."
		source: ["f3_s6.wav", "m3_s6.wav"]
		source: ["f3s06.wav", "m3s06.wav"]
		IR: ["SAMSP03.wav", "SAMSP04.wav"]
		overlap: -1.0

		cat3_6:
		name: "lp01a3s06"
		name: "a3s06"
		description: "Small anechoic room with MS microphone pickup, no overlap between the talkers."
		source: ["m1_s6.wav", "f1_s6.wav"]
		source: ["m1s06.wav", "f1s06.wav"]
		IR: ["SAMSP07.wav", "SAMSP02.wav"]
		overlap: -1.0

		cat4_1:
		name: "lp01a4s01"
		name: "a4s01"
		description: "Small echoic room with AB microphone pickup, partial overlap between the talkers."
		source: ["m1_s7.wav", "f1_s7.wav"]
		source: ["m1s07.wav", "f1s07.wav"]
		IR: ["SEABP01.wav", "SEABP07.wav"]
		overlap: 1.0

		cat4_2:
		name: "lp01a4s02"
		name: "a4s02"
		description: "Small echoic room with AB microphone pickup, partial overlap between the talkers."
		source: ["f2_s7.wav", "m2_s7.wav"]
		source: ["f2s07.wav", "m2s07.wav"]
		IR: ["SEABP05.wav", "SEABP03.wav"]
		overlap: 1.0

		cat4_3:
		name: "lp01a4s03"
		name: "a4s03"
		description: "Small echoic room with AB microphone pickup, partial overlap between the talkers."
		source: ["m3_s7.wav", "f3_s7.wav"]
		source: ["m3s07.wav", "f3s07.wav"]
		IR: ["SEABP02.wav", "SEABP06.wav"]
		overlap: 1.0

		cat4_4:
		name: "lp01a4s04"
		name: "a4s04"
		description: "Small echoic room with AB microphone pickup, partial overlap between the talkers."
		source: ["f1_s8.wav", "m1_s8.wav"]
		source: ["f1s08.wav", "m1s08.wav"]
		IR: ["SEABP04.wav", "SEABP01.wav"]
		overlap: 1.0

		cat4_5:
		name: "lp01a4s05"
		name: "a4s05"
		description: "Small echoic room with AB microphone pickup, partial overlap between the talkers."
		source: ["m2_s8.wav", "f2_s8.wav"]
		source: ["m2s08.wav", "f2s08.wav"]
		IR: ["SEABP03.wav", "SEABP04.wav"]
		overlap: 1.0

		cat4_6:
		name: "lp01a4s06"
		name: "a4s06"
		description: "Small echoic room with AB microphone pickup, partial overlap between the talkers."
		source: ["f3_s8.wav", "m3_s8.wav"]
		source: ["f3s08.wav", "m3s08.wav"]
		IR: ["SEABP07.wav", "SEABP02.wav"]
		overlap: 1.0

		cat5_1:
		name: "lp01a5s01"
		name: "a5s01"
		description: "Large echoic room with AB microphone pickup, partial overlap between the talkers."
		source: ["f3_s9.wav", "m3_s9.wav"]
		source: ["f3s09.wav", "m3s09.wav"]
		IR: ["LEABP02.wav", "LEABP08.wav"]
		overlap: 1.0

		cat5_2:
		name: "lp01a5s02"
		name: "a5s02"
		description: "Large echoic room with AB microphone pickup, partial overlap between the talkers."
		source: ["m1_s9.wav", "f1_s9.wav"]
		source: ["m1s09.wav", "f1s09.wav"]
		IR: ["LEABP09.wav", "LEABP04.wav"]
		overlap: 1.0

		cat5_3:
		name: "lp01a5s03"
		name: "a5s03"
		description: "Large echoic room with AB microphone pickup, partial overlap between the talkers."
		source: ["f2_s9.wav", "m2_s9.wav"]
		source: ["f2s09.wav", "m2s09.wav"]
		IR: ["LEABP06.wav", "LEABP10.wav"]
		overlap: 1.0

		cat5_4:
		name: "lp01a5s04"
		name: "a5s04"
		description: "Large echoic room with AB microphone pickup, partial overlap between the talkers."
		source: ["m3_s10.wav", "f3_s10.wav"]
		source: ["m3s10.wav", "f3s10.wav"]
		IR: ["LEABP11.wav", "LEABP08.wav"]
		overlap: 1.0

		cat5_5:
		name: "lp01a5s05"
		name: "a5s05"
		description: "Large echoic room with AB microphone pickup, partial overlap between the talkers."
		source: ["f1_s10.wav", "m1_s10.wav"]
		source: ["f1s10.wav", "m1s10.wav"]
		IR: ["LEABP10.wav", "LEABP12.wav"]
		overlap: 1.0

		cat5_6:
		name: "lp01a5s06"
		name: "a5s06"
		description: "Large echoic room with AB microphone pickup, partial overlap between the talkers."
		source: ["m2_s10.wav", "f2_s10.wav"]
		source: ["m2s10.wav", "f2s10.wav"]
		IR: ["LEABP12.wav", "LEABP01.wav"]
		overlap: 1.0

		cat6_1:
		name: "lp01a6s01"
		name: "a6s01"
		description: "Small echoic room with binaural microphone pickup, no overlap between the talkers."
		source: ["m2_s11.wav", "f2_s11.wav"]
		source: ["m2s11.wav", "f2s11.wav"]
		IR: ["SEABP01.wav", "SEABP07.wav"]
		overlap: -1.0

		cat6_2:
		name: "lp01a6s02"
		name: "a6s02"
		description: "Small echoic room with binaural microphone pickup, no overlap between the talkers."
		source: ["f3_s11.wav", "m3_s11.wav"]
		source: ["f3s11.wav", "m3s11.wav"]
		IR: ["SEABP05.wav", "SEABP03.wav"]
		overlap: -1.0

		cat6_3:
		name: "lp01a6s03"
		name: "a6s03"
		description: "Small echoic room with binaural microphone pickup, no overlap between the talkers."
		source: ["m1_s11.wav", "f1_s11.wav"]
		source: ["m1s11.wav", "f1s11.wav"]
		IR: ["SEABP02.wav", "SEABP06.wav"]
		overlap: -1.0

		cat6_4:
		name: "lp01a6s04"
		name: "a6s04"
		description: "Small echoic room with binaural microphone pickup, no overlap between the talkers."
		source: ["f2_s12.wav", "m2_s12.wav"]
		source: ["f2s12.wav", "m2s12.wav"]
		IR: ["SEABP04.wav", "SEABP01.wav"]
		overlap: -1.0

		cat6_5:
		name: "lp01a6s05"
		name: "a6s05"
		description: "Small echoic room with binaural microphone pickup, no overlap between the talkers."
		source: ["m3_s12.wav", "f3_s12.wav"]
		source: ["m3s12.wav", "f3s12.wav"]
		IR: ["SEABP03.wav", "SEABP04.wav"]
		overlap: -1.0

		cat6_6:
		name: "lp01a6s06"
		name: "a6s06"
		description: "Small echoic room with binaural microphone pickup, no overlap between the talkers."
		source: ["f1_s12.wav", "m1_s12.wav"]
		source: ["f1s12.wav", "m1s12.wav"]
		IR: ["SEABP07.wav", "SEABP02.wav"]
		overlap: -1.0

item_gen_configs/P800-2.yml

+94 −75

File changed.

Preview size limit exceeded, changes collapsed.

ivas_processing_scripts/generation/process_stereo_items.py

+132 −124

Original line number	Diff line number	Diff line
		@@ -32,6 +32,7 @@

		import logging
		import os
		from itertools import repeat
		from math import floor

		import numpy as np
		@@ -40,6 +41,7 @@ from ivas_processing_scripts.audiotools import audio, audiofile
		from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness
		from ivas_processing_scripts.audiotools.wrappers.reverb import reverb_stereo
		from ivas_processing_scripts.generation import config
		from ivas_processing_scripts.utils import apply_func_parallel

		SEED_RANDOM_NOISE = 0

		@@ -56,9 +58,6 @@ def generate_stereo_items(
		):
		"""Generate STEREO items from mono items based on scene description"""

		# get the number of scenes
		N_scenes = len(cfg.scenes)

		# set the target level
		if "loudness" not in cfg.__dict__:
		cfg.loudness = -26
		@@ -86,10 +85,24 @@ def generate_stereo_items(
		if "add_low_level_random_noise" not in cfg.__dict__:
		cfg.add_low_level_random_noise = False

		# repeat for all source files
		for scene_name, scene in cfg.scenes.items():
		# set multiprocessing
		if "multiprocessing" not in cfg.__dict__:
		cfg.multiprocessing = True

		apply_func_parallel(
		generate_stereo_scene,
		zip(cfg.scenes.keys(), cfg.scenes.values(), repeat(cfg), repeat(logger)),
		None,
		"mp" if cfg.multiprocessing else None,
		)
		return


		def generate_stereo_scene(
		scene_name: str, scene: dict, cfg: config.TestConfig, logger: logging.Logger
		):
		logger.info(
		f"Processing scene: {scene_name} out of {N_scenes} scenes, name: {scene['name']}"
		f"Processing scene: {scene_name} out of {len(cfg.scenes)} scenes, name: {scene['name']}"
		)

		# extract the number of audio sources
		@@ -106,21 +119,18 @@ def generate_stereo_items(

		y = audio.ChannelBasedAudio("STEREO")
		for i in range(N_sources):
		source_prefix = cfg.listening_lab + cfg.language + cfg.exp
		# parse parameters from the scene description
		source_file = np.atleast_1d(scene["source"])[i]
		source_file = source_prefix + np.atleast_1d(scene["source"])[i]
		IR_file = np.atleast_1d(scene["IR"])[i]

		logger.info(f"Convolving {source_file} with {source_IR}")

		# read source file
		x = audio.fromfile(
		"MONO", os.path.join(cfg.input_path, source_file), fs=cfg.fs
		)
		x = audio.fromfile("MONO", os.path.join(cfg.input_path, source_file), fs=cfg.fs)

		# read the IR file
		IR = audio.fromfile(
		"STEREO", os.path.join(cfg.IR_path, IR_file), fs=cfg.IR_fs
		)
		IR = audio.fromfile("STEREO", os.path.join(cfg.IR_path, IR_file), fs=cfg.IR_fs)

		# convolve with stereo IR
		x = reverb_stereo(x, IR)
		@@ -141,15 +151,6 @@ def generate_stereo_items(
		pre = np.zeros((N_delay, x.audio.shape[1]))
		x.audio = np.concatenate([pre, x.audio])

		# pad with zeros to ensure that the signal length is a multiple of 20ms
		N_frame = x.fs / 50
		if len(x.audio) % N_frame != 0:
		N_pad = int(N_frame - len(x.audio) % N_frame)

		# insert all-zero preamble
		pre = np.zeros((N_pad, x.audio.shape[1]))
		x.audio = np.concatenate([pre, x.audio])

		# add source signal to the array of source signals
		y.fs = x.fs
		if y.audio is None:
		@@ -201,21 +202,28 @@ def generate_stereo_items(
		post = np.zeros((N_post, y.audio.shape[1]))
		y.audio = np.concatenate([y.audio, post])

		# pad with zeros to ensure that the signal length is a multiple of 20ms
		N_frame = y.fs / 50
		if y.audio.shape[0] % N_frame != 0:
		N_pad = int(N_frame - y.audio.shape[0] % N_frame)

		# insert all-zero postamble
		post = np.zeros((N_pad, y.audio.shape[1]))
		y.audio = np.concatenate([y.audio, post])

		# add random noise
		if cfg.add_low_level_random_noise:
		# create uniformly distributed noise between -4 and 4
		np.random.seed(SEED_RANDOM_NOISE)
		noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype(
		"float"
		)
		noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float")

		# superimpose
		y.audio += noise

		# write the reverberated audio into output file
		output_filename = scene["name"]
		output_filename = cfg.listening_lab + cfg.exp + scene["name"] + ".wav"
		audiofile.write(
		os.path.join(cfg.output_path, output_filename), y.audio, y.fs
		os.path.join(cfg.output_path, scene_name.split("_")[0], output_filename),
		y.audio,
		y.fs,
		) # !!!! TBD: replace all os.path.xxx operations with the Path object

		return