Align with test plan (c9fbdb5d) · Commits · IVAS Codec Public Collaboration / IVAS Processing Scripts

experiments/selection/P800-6/config/item_gen_P800-6.yml

+128 −128

Original line number	Diff line number	Diff line
		@@ -31,8 +31,8 @@ postamble: 1.0
		add_low_level_random_noise: true

		### File designators, default is "l" for listening lab, "EN" for language, "p06" for exp and "g" for provider
		listening_lab: "l"
		language: "EN"
		listening_lab: "a"
		language: "JP"
		exp: "p06"
		provider: "g"

		@@ -86,339 +86,339 @@ use_output_prefix: "leee"


		scenes:
		cat1_1:
		name: "cat1/a1s01.wav"
		a1:
		name: "a1s01"
		description: "Talker sitting at a table"
		source: ["m1s01.wav", "m1s07.wav"]
		azimuth: 0
		elevation: 0
		overlap: -1.0
		overlap: -0.5

		cat1_2:
		name: "cat1/a1s02.wav"
		a2:
		name: "a6s02"
		description: "Talker sitting at a table"
		source: ["f3s02.wav", "f3s08.wav"]
		azimuth: 60
		elevation: 0
		overlap: -1.0
		overlap: -0.5

		cat1_3:
		name: "cat1/a1s03.wav"
		a3:
		name: "a5s03"
		description: "Talker sitting at a table"
		source: ["m3s03.wav", "m3s09.wav"]
		azimuth: 120
		elevation: 0
		overlap: -1.0
		overlap: -0.5

		cat1_4:
		name: "cat1/a1s04.wav"
		a4:
		name: "a4s04"
		description: "Talker sitting at a table"
		source: ["f2s04.wav", "f2s10.wav"]
		azimuth: 180
		elevation: 0
		overlap: -1.0
		overlap: -0.5

		cat1_5:
		name: "cat1/a1s05.wav"
		a5:
		name: "a3s05"
		description: "Talker sitting at a table"
		source: ["m2s05.wav", "m2s11.wav"]
		azimuth: 240
		elevation: 0
		overlap: -1.0
		overlap: -0.5

		cat1_6:
		name: "cat1/a1s06.wav"
		a6:
		name: "a2s06"
		description: "Talker sitting at a table"
		source: ["f1s06.wav", "f1s12.wav"]
		azimuth: 300
		elevation: 0
		overlap: -1.0
		overlap: -0.5

		cat1_7:
		name: "cat1/a1s07.wav"
		a7:
		name: "a2s07"
		description: "Preliminary: Talker sitting at a table"
		source: ["f1s13.wav", "f1s14.wav"]
		azimuth: 0
		elevation: 0
		overlap: -1.0
		overlap: -0.5

		cat2_1:
		name: "cat2/a2s01.wav"
		b1:
		name: "a2s01"
		description: "Standing talker."
		source: ["f1s01.wav", "f1s07.wav"]
		azimuth: 120
		elevation: 35
		overlap: -1.0
		overlap: -0.5

		cat2_2:
		name: "cat2/a2s02.wav"
		b2:
		name: "a1s02"
		description: "Standing talker."
		source: ["m1s02.wav", "m1s08.wav"]
		azimuth: 180
		elevation: 35
		overlap: -1.0
		overlap: -0.5

		cat2_3:
		name: "cat2/a2s03.wav"
		b3:
		name: "a6s03"
		description: "Standing talker."
		source: ["f3s03.wav", "f3s09.wav"]
		azimuth: 240
		elevation: 35
		overlap: -1.0
		overlap: -0.5

		cat2_4:
		name: "cat2/a2s04.wav"
		b4:
		name: "a5s04"
		description: "Standing talker."
		source: ["m3s04.wav", "m3s10.wav"]
		azimuth: 300
		elevation: 35
		overlap: -1.0
		overlap: -0.5

		cat2_5:
		name: "cat2/a2s05.wav"
		b5:
		name: "a4s05"
		description: "Standing talker."
		source: ["f2s05.wav", "f2s11.wav"]
		azimuth: 0
		elevation: 35
		overlap: -1.0
		overlap: -0.5

		cat2_6:
		name: "cat2/a2s06.wav"
		b6:
		name: "a3s06"
		description: "Standing talker."
		source: ["m2s06.wav", "m2s12.wav"]
		azimuth: 60
		elevation: 35
		overlap: -1.0
		overlap: -0.5

		cat2_7:
		name: "cat2/a2s07.wav"
		b7:
		name: "a1s07"
		description: "Preliminary: Standing talker."
		source: ["m1s13.wav", "m1s14.wav"]
		azimuth: 180
		elevation: 35
		overlap: -1.0
		overlap: -0.5

		cat3_1:
		name: "cat3/a3s01.wav"
		c1:
		name: "a3s01"
		description: "Smaller talker (child) walking around a table."
		source: ["m2s01.wav", "m2s07.wav"]
		azimuth: "0:1:360"
		elevation: 0
		overlap: -1.0
		overlap: -0.5

		cat3_2:
		name: "cat3/a3s02.wav"
		c2:
		name: "a2s02"
		description: "Smaller talker (child) walking around a table."
		source: ["f1s02.wav", "f1s08.wav"]
		azimuth: "60:1:60+360"
		elevation: 0
		overlap: -1.0
		overlap: -0.5

		cat3_3:
		name: "cat3/a3s03.wav"
		c3:
		name: "a1s03"
		description: "Smaller talker (child) walking around a table."
		source: ["m1s03.wav", "m1s09.wav"]
		azimuth: "120:1:120+360"
		elevation: 0
		overlap: -1.0
		overlap: -0.5

		cat3_4:
		name: "cat3/a3s04.wav"
		c4:
		name: "a6s04"
		description: "Smaller talker (child) walking around a table."
		source: ["f3s04.wav", "f3s10.wav"]
		azimuth: "180:1:180+360"
		elevation: 0
		overlap: -1.0
		overlap: -0.5

		cat3_5:
		name: "cat3/a3s05.wav"
		c5:
		name: "a5s05"
		description: "Smaller talker (child) walking around a table."
		source: ["m3s05.wav", "m3s11.wav"]
		azimuth: "240:1:240+360"
		elevation: 0
		overlap: -1.0
		overlap: -0.5

		cat3_6:
		name: "cat3/a3s06.wav"
		c6:
		name: "a4s06"
		description: "Smaller talker (child) walking around a table."
		source: ["f2s06.wav", "f2s12.wav"]
		azimuth: "300:1:300+360"
		elevation: 0
		overlap: -1.0
		overlap: -0.5

		cat3_7:
		name: "cat3/a3s07.wav"
		c7:
		name: "a4s07"
		description: "Preliminary: Smaller talker (child) walking around a table."
		source: ["f2s13.wav", "f2s14.wav"]
		azimuth: "120:1:120+360"
		elevation: 0
		overlap: -1.0
		overlap: -0.5

		cat4_1:
		name: "cat4/a4s01.wav"
		d1:
		name: "a4s01"
		description: "Talker walking around the table."
		source: ["f2s01.wav", "f2s07.wav"]
		azimuth: "0:-1:-360"
		elevation: 35
		overlap: -1.0
		overlap: -0.5

		cat4_2:
		name: "cat4/a4s02.wav"
		d2:
		name: "a3s02"
		description: "Talker walking around the table."
		source: ["m2s02.wav", "m2s08.wav"]
		azimuth: "60:-1:60-360"
		elevation: 35
		overlap: -1.0
		overlap: -0.5

		cat4_3:
		name: "cat4/a4s03.wav"
		d3:
		name: "a2s03"
		description: "Talker walking around the table."
		source: ["f1s03.wav", "f1s09.wav"]
		azimuth: "120:-1:120-360"
		elevation: 35
		overlap: -1.0
		overlap: -0.5

		cat4_4:
		name: "cat4/a4s04.wav"
		d4:
		name: "a1s04"
		description: "Talker walking around the table."
		source: ["m1s04.wav", "m1s10.wav"]
		azimuth: "180:-1:180-360"
		elevation: 35
		overlap: -1.0
		overlap: -0.5

		cat4_5:
		name: "cat4/a4s05.wav"
		d5:
		name: "a6s05"
		description: "Talker walking around the table."
		source: ["f3s05.wav", "f3s11.wav"]
		azimuth: "240:-1:240-360"
		elevation: 35
		overlap: -1.0
		overlap: -0.5

		cat4_6:
		name: "cat4/a4s06.wav"
		d6:
		name: "a5s06"
		description: "Talker walking around the table."
		source: ["m3s06.wav", "m3s12.wav"]
		azimuth: "300:-1:300-360"
		elevation: 35
		overlap: -1.0
		overlap: -0.5

		cat4_7:
		name: "cat4/a4s07.wav"
		d7:
		name: "a3s07"
		description: "Preliminary: Talker walking around the table."
		source: ["m2s13.wav", "m2s14.wav"]
		azimuth: "180:-1:180-360"
		elevation: 35
		overlap: -1.0
		overlap: -0.5

		cat5_1:
		name: "cat5/a5s01.wav"
		e1:
		name: "a5s01"
		description: "Elevation displacement."
		source: ["m3s01.wav", "m3s07.wav"]
		azimuth: 240
		elevation: "-90:0.5:90"
		overlap: -1.0
		overlap: -0.5

		cat5_2:
		name: "cat5/a5s02.wav"
		e2:
		name: "a4s02"
		description: "Elevation displacement."
		source: ["f2s02.wav", "f2s08.wav"]
		azimuth: 300
		elevation: 0
		overlap: -1.0
		overlap: -0.5

		cat5_3:
		name: "cat5/a5s03.wav"
		e3:
		name: "a3s03"
		description: "Elevation displacement."
		source: ["m2s03.wav", "m2s09.wav"]
		azimuth: 0
		elevation: "-90:0.5:90"
		overlap: -1.0
		overlap: -0.5

		cat5_4:
		name: "cat5/a5s04.wav"
		e4:
		name: "a2s04"
		description: "Elevation displacement."
		source: ["f1s04.wav", "f1s10.wav"]
		azimuth: 60
		elevation: "-90:0.5:90"
		overlap: -1.0
		overlap: -0.5

		cat5_5:
		name: "cat5/a5s05.wav"
		e5:
		name: "a1s05"
		description: "Elevation displacement."
		source: ["m1s05.wav", "m1s11.wav"]
		azimuth: 120
		elevation: "-90:0.5:90"
		overlap: -1.0
		overlap: -0.5

		cat5_6:
		name: "cat5/a5s06.wav"
		e6:
		name: "a6s06"
		description: "Elevation displacement."
		source: ["f3s06.wav", "f3s12.wav"]
		azimuth: 180
		elevation: "-90:0.5:90"
		overlap: -1.0
		overlap: -0.5

		cat5_7:
		name: "cat5/a5s07.wav"
		e7:
		name: "a6s07"
		description: "Preliminary: Elevation displacement."
		source: ["f3s13.wav", "f3s14.wav"]
		azimuth: 120
		elevation: "-90:0.5:90"
		overlap: -1.0
		overlap: -0.5

		cat6_1:
		name: "cat6/a6s01.wav"
		f1:
		name: "a6s01"
		description: "Azimuth and elevation displacement."
		source: ["f3s01.wav", "f3s07.wav"]
		azimuth: "60:0.5:60+180"
		elevation: "35:-0.2:-35"
		overlap: -1.0
		overlap: -0.5

		cat6_2:
		name: "cat6/a6s02.wav"
		f2:
		name: "a5s02"
		description: "Azimuth and elevation displacement."
		source: ["m3s02.wav", "m3s08.wav"]
		azimuth: "120:0.5:120+180"
		elevation: "35:-0.2:-35"
		overlap: -1.0
		overlap: -0.5

		cat6_3:
		name: "cat6/a6s03.wav"
		f3:
		name: "a4s03"
		description: "Azimuth and elevation displacement."
		source: ["f2s03.wav", "f2s09.wav"]
		azimuth: "180:0.5:180+180"
		elevation: "35:-0.2:-35"
		overlap: -1.0
		overlap: -0.5

		cat6_4:
		name: "cat6/a6s04.wav"
		f4:
		name: "a3s04"
		description: "Azimuth and elevation displacement."
		source: ["m2s04.wav", "m2s10.wav"]
		azimuth: "240:0.5:240+180"
		elevation: "35:-0.2:-35"
		overlap: -1.0
		overlap: -0.5

		cat6_5:
		name: "cat6/a6s05.wav"
		f5:
		name: "a2s05"
		description: "Azimuth and elevation displacement."
		source: ["f1s05.wav", "f1s11.wav"]
		azimuth: "300:0.5:300+180"
		elevation: "35:-0.2:-35"
		overlap: -1.0
		overlap: -0.5

		cat6_6:
		name: "cat6/a6s06.wav"
		f6:
		name: "a1s06"
		description: "Azimuth and elevation displacement."
		source: ["m1s06.wav", "m1s12.wav"]
		azimuth: "0:0.5:0+180"
		elevation: "35:-0.2:-35"
		overlap: -1.0
		overlap: -0.5

		cat6_7:
		name: "cat6/a6s07.wav"
		f7:
		name: "a5s07"
		description: "Preliminary: Azimuth and elevation displacement."
		source: ["m3s13.wav", "m3s14.wav"]
		azimuth: "0:0.5:0+180"
		elevation: "35:-0.2:-35"
		overlap: -1.0
		overlap: -0.5

		No newline at end of file

experiments/selection/P800-7/config/item_gen_P800-7.yml

+128 −128

File changed.

Preview size limit exceeded, changes collapsed.

ivas_processing_scripts/generation/process_ism1_items.py

+43 −66

Original line number	Diff line number	Diff line
		@@ -234,6 +234,33 @@ def generate_ism1_scene(
		y.audio.resize(x.audio.shape, refcheck=False)
		y.audio += x.audio

		# append pre-amble and post-amble to all sources
		if cfg.preamble != 0.0:
		# ensure that pre-amble is a multiple of 20ms
		N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs)

		# insert all-zero preamble to all sources
		pre = np.zeros((N_pre, y.audio.shape[1]))
		y.audio = np.concatenate([pre, y.audio])

		if cfg.postamble != 0.0:
		# ensure that post-amble is a multiple of 20ms
		N_post = int(floor(cfg.postamble * 50) / 50 * y.fs)

		# append all-zero postamble to all sources
		post = np.zeros((N_post, y.audio.shape[1]))
		y.audio = np.concatenate([y.audio, post])

		# add random noise
		if cfg.add_low_level_random_noise:
		# create uniformly distributed noise between -4 and 4
		np.random.seed(SEED_RANDOM_NOISE)
		noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float")

		# superimpose
		y.audio += noise


		# process azimuth and elevation
		source_azi = scene["azimuth"]
		source_ele = scene["elevation"]
		@@ -244,22 +271,18 @@ def generate_ism1_scene(
		if isinstance(source_azi, str):
		if ":" in source_azi:
		source_azi = source_azi.split(":")
		azi = np.arange(
		azi = np.linspace(
		float(eval(source_azi[0])),
		float(eval(source_azi[2])),
		float(eval(source_azi[1])),
		N_frames
		)
		else:
		azi = np.array(float(eval(source_azi)), ndmin=1)[:N_frames]
		azi = np.array(float(eval(source_azi)), ndmin=1)
		else:
		azi = np.array(source_azi, ndmin=1)[:N_frames]

		# ensure that azimuth array has N_frames values
		if len(azi) > N_frames:
		# cut the array of azimuth values
		azi = azi[:N_frames]
		elif len(azi) < N_frames:
		# replicate the last azimuth value
		if len(azi) < N_frames:
		# replicate the last elevation
		azi = np.append(azi, np.full(N_frames - len(azi), azi[-1]))

		# convert azimuth from 0 .. 360 to -180 .. +180
		@@ -275,21 +298,17 @@ def generate_ism1_scene(
		if isinstance(source_ele, str):
		if ":" in source_ele:
		source_ele = source_ele.split(":")
		ele = np.arange(
		ele = np.linspace(
		float(eval(source_ele[0])),
		float(eval(source_ele[2])),
		float(eval(source_ele[1])),
		N_frames
		)
		else:
		ele = np.array(float(eval(source_ele)), ndmin=1)[:N_frames]
		ele = np.array(float(eval(source_ele)), ndmin=1)
		else:
		ele = np.array(source_ele, ndmin=1)[:N_frames]

		# ensure that elevation array has N_frames values
		if len(ele) > N_frames:
		# cut the array of elevation values
		ele = ele[:N_frames]
		elif len(ele) < N_frames:
		if len(ele) < N_frames:
		# replicate the last elevation
		ele = np.append(ele, np.full(N_frames - len(ele), ele[-1]))

		@@ -298,57 +317,15 @@ def generate_ism1_scene(
		logger.error(
		f"Incorrect value(s) of elevation: {ele[(ele > 90) \| (ele < -90)]}"
		)

		# additional metadata (default values)
		radius = np.ones(N_frames)
		spread = np.zeros(N_frames)
		gain = np.ones(N_frames)

		# arrange all metadata fields column-wise into a matrix
		y_meta = np.column_stack((azi, ele, radius, spread, gain))

		# append pre-amble and post-amble to all sources
		if cfg.preamble != 0.0:
		# ensure that pre-amble is a multiple of 20ms
		N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs)

		# insert all-zero preamble to all sources
		pre = np.zeros((N_pre, y.audio.shape[1]))
		y.audio = np.concatenate([pre, y.audio])

		# insert neutral position as a pre-amble to all sources
		N_pre = int(N_pre / frame_len)
		pre = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (N_pre, 1))
		y_meta = np.concatenate([pre, y_meta], axis=0)

		if cfg.postamble != 0.0:
		# ensure that post-amble is a multiple of 20ms
		N_post = int(floor(cfg.postamble * 50) / 50 * y.fs)

		# append all-zero postamble to all sources
		post = np.zeros((N_post, y.audio.shape[1]))
		y.audio = np.concatenate([y.audio, post])

		# append neutral position as a post-amble to all sources
		N_post = int(N_post / frame_len)
		post = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (N_post, 1))
		y_meta = np.concatenate([y_meta, post], axis=0)

		# add random noise
		if cfg.add_low_level_random_noise:
		# create uniformly distributed noise between -4 and 4
		np.random.seed(SEED_RANDOM_NOISE)
		noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float")

		# superimpose
		y.audio += noise
		y_meta = np.column_stack((azi, ele))

		# write ISM audio stream to the output file
		audiofile.write(
		os.path.join(
		cfg.output_path,
		os.path.dirname(scene["name"]),
		cfg.use_output_prefix + os.path.basename(scene["name"]),
		"cat"+scene["name"][1],
		cfg.use_output_prefix + os.path.basename(scene["name"]+".wav"),
		),
		y.audio,
		y.fs,
		@@ -357,8 +334,8 @@ def generate_ism1_scene(
		# write ISM metadata to the output file in .0.csv format
		csv_filename = os.path.join(
		cfg.output_path,
		os.path.dirname(scene["name"]),
		cfg.use_output_prefix + os.path.basename(scene["name"]) + ".0.csv",
		"cat"+scene["name"][1],
		cfg.use_output_prefix + os.path.basename(scene["name"]) + ".wav.0.csv",
		)

		with open(

ivas_processing_scripts/generation/process_ism2_items.py

+80 −123

File changed.

Preview size limit exceeded, changes collapsed.