Merge branch... (5e2900c2) · Commits · IVAS Codec Public Collaboration / IVAS Processing Scripts

README.md

+4 −0

Original line number	Diff line number	Diff line
		@@ -77,6 +77,8 @@ Each entry under `scenes:` describes one test item, specifying:
		- `azimuth` / `elevation`: spatial placement (°)
		- `level`: loudness in dB
		- `shift`: timing offsets in seconds
		- `background`: background noise file (applicable to STEREO and SBA only)
		- `background_level`: level of the background noise (applicable to STEREO and SBA only)

		Dynamic positioning (e.g., `"-20:1.0:360"`) means the source will move over time, stepping every 20 ms.

		@@ -84,6 +86,8 @@ The total duration of the output signal can be controlled using the `duration` f

		Start by running a single scene to verify settings. Output includes both audio and optional metadata files. You can enable multiprocessing by setting `multiprocessing: true`.

		The addition of custom background noise at specific level is supported for the STEREO and SBA formats only. For ISMs it's not applicable. For OMASA and OSBA formats, it is expected that the backround noise is provided in the FOA/HOA2/HOA3 format as the first item in the `input` list.

		### Item processing

		The input has to be in the folder `experiments/selection/P800-{X}/proc_input_{l}`. If item generation is performed previous to this step, the corresponding files are already in the right folder.

examples/ITEM_GENERATION_FOA.yml

+53 −43

Original line number	Diff line number	Diff line
		@@ -95,6 +95,8 @@ use_output_prefix: "leee"
		### azimuth: azimuth in the range [-180,180]; positive values point to the left
		### elevation: elevation in the range [-90,90]; positive values indicate up
		### shift: time adjustment of the input signal (negative value delays the signal)
		### background: background noise filename (if used, the 'add_low_level_random_noise' parameter is ignored)
		### background_level: normalized background noise loudness to X dB LKFS
		###
		### Note 0: you can use relative paths in filenames (the program assumes that the root directory is the parent directory of the ivas_processing_scripts subfolder)
		### Note 1: use brackets [val1, val2, ...] when specifying multiple values
		@@ -109,52 +111,60 @@ scenes:
		input: ["items_mono/untrimmed/f1s4b_Talker2.wav", "items_mono/untrimmed/f2s1a_Talker1.wav"]
		IR: ["IRs/IR_do_p04_e_01_01_FOA.wav", "IRs/IR_do_p04_e_02_01_FOA.wav"]
		shift: [0.0, -1.0]

		"02":
		output: "out/s02.wav"
		description: "Car with AB microphone pickup, overlap between the talkers, car noise."
		input: ["items_mono/untrimmed/f1s6a_Talker2.wav", "items_mono/untrimmed/f2s3b_Talker1.wav"]
		IR: ["IRs/IR_do_p04_e_03_01_FOA.wav", "IRs/IR_do_p04_e_04_01_FOA.wav"]
		shift: [0.0, +1.0]

		"03":
		output: "out/s03.wav"
		description: "Car with AB microphone pickup, no overlap between the talkers, car noise."
		input: ["items_mono/untrimmed/f3s3a_Talker2.wav", "items_mono/untrimmed/f3s10b_Talker2.wav"]
		IR: ["IRs/IR_do_p04_e_05_01_FOA.wav", "IRs/IR_do_p04_e_06_01_FOA.wav"]
		shift: [0.0, -1.0]

		"04":
		output: "out/s04.wav"
		description: "Car with AB microphone pickup, no overlap between the talkers, car noise."
		input: ["items_mono/untrimmed/f2s7b_Talker1.wav", "items_mono/untrimmed/f5s15a_Talker1.wav"]
		IR: ["IRs/IR_do_p04_e_07_01_FOA.wav", "IRs/IR_do_p04_e_08_01_FOA.wav"]
		shift: [0.0, -1.0]

		"05":
		output: "out/s05.wav"
		description: "Car with AB microphone pickup, no overlap between the talkers, car noise."
		input: ["items_mono/untrimmed/m2s15a_Talker2.wav", "items_mono/untrimmed/m1s4a_Talker1.wav"]
		IR: ["IRs/IR_do_p04_e_07_01_FOA.wav", "IRs/IR_do_p04_e_01_01_FOA.wav"]
		shift: [0.0, -1.0]

		"06":
		output: "out/s06.wav"
		description: "Car with AB microphone pickup, no overlap between the talkers."
		input: ["items_mono/untrimmed/m3s8a_Talker2.wav", "items_mono/untrimmed/m4s13a_Talker1.wav"]
		IR: ["IRs/IR_do_p04_e_03_01_FOA.wav", "IRs/IR_do_p04_e_01_01_FOA.wav"]
		shift: [0.0, -1.0]

		"07":
		output: "out/s07.wav"
		description: "Preliminary: Car with AB microphone pickup, no overlap between the talkers."
		input: ["items_mono/untrimmed/f1s20a_Talker2.wav", "items_mono/untrimmed/f5s15b_Talker1.wav"]
		IR: ["IRs/IR_do_p04_e_02_01_FOA.wav", "IRs/IR_do_p04_e_07_01_FOA.wav"]
		shift: [0.0, -1.0]

		"08":
		output: "out/s08.wav"
		description: "Car with AB microphone pickup, overlap between the talkers."
		input: ["items_mono/untrimmed/m2s6b_Talker2.wav", "items_mono/untrimmed/f5s14a_Talker1.wav"]
		IR: ["IRs/IR_do_p04_e_08_01_FOA.wav", "IRs/IR_do_p04_e_04_01_FOA.wav"]
		shift: [0.0, +1.0]
		background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav"
		background_level: -46

		# "02":
		# output: "out/s02.wav"
		# description: "Car with AB microphone pickup, overlap between the talkers, car noise."
		# input: ["items_mono/untrimmed/f1s6a_Talker2.wav", "items_mono/untrimmed/f2s3b_Talker1.wav"]
		# IR: ["IRs/IR_do_p04_e_03_01_FOA.wav", "IRs/IR_do_p04_e_04_01_FOA.wav"]
		# shift: [0.0, +1.0]
		# background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav"
		# background_level: -46

		# "03":
		# output: "out/s03.wav"
		# description: "Car with AB microphone pickup, no overlap between the talkers, car noise."
		# input: ["items_mono/untrimmed/f3s3a_Talker2.wav", "items_mono/untrimmed/f3s10b_Talker2.wav"]
		# IR: ["IRs/IR_do_p04_e_05_01_FOA.wav", "IRs/IR_do_p04_e_06_01_FOA.wav"]
		# shift: [0.0, -1.0]

		# "04":
		# output: "out/s04.wav"
		# description: "Car with AB microphone pickup, no overlap between the talkers, car noise."
		# input: ["items_mono/untrimmed/f2s7b_Talker1.wav", "items_mono/untrimmed/f5s15a_Talker1.wav"]
		# IR: ["IRs/IR_do_p04_e_07_01_FOA.wav", "IRs/IR_do_p04_e_08_01_FOA.wav"]
		# shift: [0.0, -1.0]
		# background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav"
		# background_level: -46

		# "05":
		# output: "out/s05.wav"
		# description: "Car with AB microphone pickup, no overlap between the talkers, car noise."
		# input: ["items_mono/untrimmed/m2s15a_Talker2.wav", "items_mono/untrimmed/m1s4a_Talker1.wav"]
		# IR: ["IRs/IR_do_p04_e_07_01_FOA.wav", "IRs/IR_do_p04_e_01_01_FOA.wav"]
		# shift: [0.0, -1.0]
		# background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav"
		# background_level: -46

		# "06":
		# output: "out/s06.wav"
		# description: "Car with AB microphone pickup, no overlap between the talkers."
		# input: ["items_mono/untrimmed/m3s8a_Talker2.wav", "items_mono/untrimmed/m4s13a_Talker1.wav"]
		# IR: ["IRs/IR_do_p04_e_03_01_FOA.wav", "IRs/IR_do_p04_e_01_01_FOA.wav"]
		# shift: [0.0, -1.0]

		# "07":
		# output: "out/s07.wav"
		# description: "Preliminary: Car with AB microphone pickup, no overlap between the talkers."
		# input: ["items_mono/untrimmed/f1s20a_Talker2.wav", "items_mono/untrimmed/f5s15b_Talker1.wav"]
		# IR: ["IRs/IR_do_p04_e_02_01_FOA.wav", "IRs/IR_do_p04_e_07_01_FOA.wav"]
		# shift: [0.0, -1.0]

		# "08":
		# output: "out/s08.wav"
		# description: "Car with AB microphone pickup, overlap between the talkers."
		# input: ["items_mono/untrimmed/m2s6b_Talker2.wav", "items_mono/untrimmed/f5s14a_Talker1.wav"]
		# IR: ["IRs/IR_do_p04_e_08_01_FOA.wav", "IRs/IR_do_p04_e_04_01_FOA.wav"]
		# shift: [0.0, +1.0]

examples/ITEM_GENERATION_STEREO.yml

+13 −1

Original line number	Diff line number	Diff line
		@@ -95,6 +95,8 @@ provider: "g"
		### azimuth: azimuth in the range [-180,180]; positive values point to the left
		### elevation: elevation in the range [-90,90]; positive values indicate up
		### shift: time adjustment of the input signal (negative value delays the signal)
		### background: background noise filename (if used, the 'add_low_level_random_noise' parameter is ignored)
		### background_level: normalized background noise loudness to X dB LKFS
		###
		### Note 0: you can use relative paths in filenames (the program assumes that the root directory is the parent directory of the ivas_processing_scripts subfolder)
		### Note 1: use brackets [val1, val2, ...] when specifying multiple values
		@@ -109,6 +111,8 @@ scenes:
		input: ["items_mono/untrimmed/f1s4b_Talker2.wav", "items_mono/untrimmed/f2s1a_Talker1.wav"]
		IR: ["IRs/Car_TalkPos1_Stereo_M5_SinSweep_2chn.wav", "IRs/Car_TalkPos2_Stereo_M5_SinSweep_2chn.wav"]
		shift: [0.0, -1.0]
		background: "items_mono/347224__rayjensen__ambience-in-car_stereo.wav"
		background_level: -66

		"02":
		output: "out/a1s02.wav"
		@@ -116,6 +120,8 @@ scenes:
		input: ["items_mono/untrimmed/f1s6a_Talker2.wav", "items_mono/untrimmed/f2s3b_Talker1.wav"]
		IR: ["IRs/Car_TalkPos3_Stereo_M5_SinSweep_2chn.wav", "IRs/Car_TalkPos4_Stereo_M5_SinSweep_2chn.wav"]
		shift: [0.0, +1.0]
		background: "items_mono/347224__rayjensen__ambience-in-car_stereo.wav"
		background_level: -66

		"03":
		output: "out/a1s03.wav"
		@@ -123,6 +129,8 @@ scenes:
		input: ["items_mono/untrimmed/f3s3a_Talker2.wav", "items_mono/untrimmed/f3s10b_Talker2.wav"]
		IR: ["IRs/Car_TalkPos1_Stereo_M5_SinSweep_2chn.wav", "IRs/Car_TalkPos1_Stereo_M5_SinSweep_2chn.wav"]
		shift: [0.0, -1.0]
		background: "items_mono/347224__rayjensen__ambience-in-car_stereo.wav"
		background_level: -66

		"04":
		output: "out/a1s04.wav"
		@@ -130,6 +138,8 @@ scenes:
		input: ["items_mono/untrimmed/f2s7b_Talker1.wav", "items_mono/untrimmed/f5s15a_Talker1.wav"]
		IR: ["IRs/FreeField_IR_Python_AB_20cm_Pos1.wav", "IRs/FreeField_IR_Python_AB_20cm_Pos2.wav"]
		shift: [0.0, -1.0]
		background: "items_mono/347224__rayjensen__ambience-in-car_stereo.wav"
		background_level: -66

		"05":
		output: "out/a1s05.wav"
		@@ -137,6 +147,8 @@ scenes:
		input: ["items_mono/untrimmed/m2s15a_Talker2.wav", "items_mono/untrimmed/m1s4a_Talker1.wav"]
		IR: ["IRs/FreeField_IR_Python_AB_20cm_Pos3.wav", "IRs/FreeField_IR_Python_AB_20cm_Pos4.wav"]
		shift: [0.0, -1.0]
		background: "items_mono/347224__rayjensen__ambience-in-car_stereo.wav"
		background_level: -66

		"06":
		output: "out/a1s06.wav"

ivas_processing_scripts/generation/generate_ismN_items.py

+2 −2

Original line number	Diff line number	Diff line
		@@ -253,7 +253,7 @@ def generate_ismN_scene(
		level = -26

		logger.info(
		f"-- Encoding {source_file} at position(s) {source_azi},{source_ele} at {level} LUFS with shift of {source_shift_in_seconds} seconds"
		f"-- Encoding {source_file} at position(s) {source_azi},{source_ele} at {level} LKFS with shift of {source_shift_in_seconds} seconds"
		)

		# read source file
		@@ -431,7 +431,7 @@ def generate_ismN_scene(

		# adjust the loudness of the output signal
		if "loudness" in cfg.__dict__:
		logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS")
		logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LKFS")
		y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="BINAURAL")

		# apply fade-in and fade-out

ivas_processing_scripts/generation/generate_omasa_items.py

+2 −2

Original line number	Diff line number	Diff line
		@@ -248,7 +248,7 @@ def generate_OMASA_scene(
		level = -26

		logger.info(
		f"-- Encoding {source_file} at position(s) {source_azi},{source_ele} at {level} LUFS with shift of {source_shift_in_seconds} seconds"
		f"-- Encoding {source_file} at position(s) {source_azi},{source_ele} at {level} LKFS with shift of {source_shift_in_seconds} seconds"
		)

		# get the number of channels from the .wav file header
		@@ -471,7 +471,7 @@ def generate_OMASA_scene(

		# adjust the loudness of the output signal
		if "loudness" in cfg.__dict__:
		logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS")
		logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LKFS")
		y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="BINAURAL")

		# apply fade-in and fade-out