fix incorrect shifting of audio signal when creating overlap between items (SBA) (dc3a06ca) · Commits · IVAS Codec Public Collaboration / IVAS Processing Scripts

examples/ITEM_GENERATION_FOA.yml

+53 −43

Original line number	Diff line number	Diff line
		@@ -95,6 +95,8 @@ use_output_prefix: "leee"
		### azimuth: azimuth in the range [-180,180]; positive values point to the left
		### elevation: elevation in the range [-90,90]; positive values indicate up
		### shift: time adjustment of the input signal (negative value delays the signal)
		### background: background noise filename (if used, the 'add_low_level_random_noise' parameter is ignored)
		### background_level: normalized background noise loudness to X dB LKFS
		###
		### Note 0: you can use relative paths in filenames (the program assumes that the root directory is the parent directory of the ivas_processing_scripts subfolder)
		### Note 1: use brackets [val1, val2, ...] when specifying multiple values
		@@ -109,52 +111,60 @@ scenes:
		input: ["items_mono/untrimmed/f1s4b_Talker2.wav", "items_mono/untrimmed/f2s1a_Talker1.wav"]
		IR: ["IRs/IR_do_p04_e_01_01_FOA.wav", "IRs/IR_do_p04_e_02_01_FOA.wav"]
		shift: [0.0, -1.0]

		"02":
		output: "out/s02.wav"
		description: "Car with AB microphone pickup, overlap between the talkers, car noise."
		input: ["items_mono/untrimmed/f1s6a_Talker2.wav", "items_mono/untrimmed/f2s3b_Talker1.wav"]
		IR: ["IRs/IR_do_p04_e_03_01_FOA.wav", "IRs/IR_do_p04_e_04_01_FOA.wav"]
		shift: [0.0, +1.0]

		"03":
		output: "out/s03.wav"
		description: "Car with AB microphone pickup, no overlap between the talkers, car noise."
		input: ["items_mono/untrimmed/f3s3a_Talker2.wav", "items_mono/untrimmed/f3s10b_Talker2.wav"]
		IR: ["IRs/IR_do_p04_e_05_01_FOA.wav", "IRs/IR_do_p04_e_06_01_FOA.wav"]
		shift: [0.0, -1.0]

		"04":
		output: "out/s04.wav"
		description: "Car with AB microphone pickup, no overlap between the talkers, car noise."
		input: ["items_mono/untrimmed/f2s7b_Talker1.wav", "items_mono/untrimmed/f5s15a_Talker1.wav"]
		IR: ["IRs/IR_do_p04_e_07_01_FOA.wav", "IRs/IR_do_p04_e_08_01_FOA.wav"]
		shift: [0.0, -1.0]

		"05":
		output: "out/s05.wav"
		description: "Car with AB microphone pickup, no overlap between the talkers, car noise."
		input: ["items_mono/untrimmed/m2s15a_Talker2.wav", "items_mono/untrimmed/m1s4a_Talker1.wav"]
		IR: ["IRs/IR_do_p04_e_07_01_FOA.wav", "IRs/IR_do_p04_e_01_01_FOA.wav"]
		shift: [0.0, -1.0]

		"06":
		output: "out/s06.wav"
		description: "Car with AB microphone pickup, no overlap between the talkers."
		input: ["items_mono/untrimmed/m3s8a_Talker2.wav", "items_mono/untrimmed/m4s13a_Talker1.wav"]
		IR: ["IRs/IR_do_p04_e_03_01_FOA.wav", "IRs/IR_do_p04_e_01_01_FOA.wav"]
		shift: [0.0, -1.0]

		"07":
		output: "out/s07.wav"
		description: "Preliminary: Car with AB microphone pickup, no overlap between the talkers."
		input: ["items_mono/untrimmed/f1s20a_Talker2.wav", "items_mono/untrimmed/f5s15b_Talker1.wav"]
		IR: ["IRs/IR_do_p04_e_02_01_FOA.wav", "IRs/IR_do_p04_e_07_01_FOA.wav"]
		shift: [0.0, -1.0]

		"08":
		output: "out/s08.wav"
		description: "Car with AB microphone pickup, overlap between the talkers."
		input: ["items_mono/untrimmed/m2s6b_Talker2.wav", "items_mono/untrimmed/f5s14a_Talker1.wav"]
		IR: ["IRs/IR_do_p04_e_08_01_FOA.wav", "IRs/IR_do_p04_e_04_01_FOA.wav"]
		shift: [0.0, +1.0]
		background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav"
		background_level: -46

		# "02":
		# output: "out/s02.wav"
		# description: "Car with AB microphone pickup, overlap between the talkers, car noise."
		# input: ["items_mono/untrimmed/f1s6a_Talker2.wav", "items_mono/untrimmed/f2s3b_Talker1.wav"]
		# IR: ["IRs/IR_do_p04_e_03_01_FOA.wav", "IRs/IR_do_p04_e_04_01_FOA.wav"]
		# shift: [0.0, +1.0]
		# background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav"
		# background_level: -46

		# "03":
		# output: "out/s03.wav"
		# description: "Car with AB microphone pickup, no overlap between the talkers, car noise."
		# input: ["items_mono/untrimmed/f3s3a_Talker2.wav", "items_mono/untrimmed/f3s10b_Talker2.wav"]
		# IR: ["IRs/IR_do_p04_e_05_01_FOA.wav", "IRs/IR_do_p04_e_06_01_FOA.wav"]
		# shift: [0.0, -1.0]

		# "04":
		# output: "out/s04.wav"
		# description: "Car with AB microphone pickup, no overlap between the talkers, car noise."
		# input: ["items_mono/untrimmed/f2s7b_Talker1.wav", "items_mono/untrimmed/f5s15a_Talker1.wav"]
		# IR: ["IRs/IR_do_p04_e_07_01_FOA.wav", "IRs/IR_do_p04_e_08_01_FOA.wav"]
		# shift: [0.0, -1.0]
		# background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav"
		# background_level: -46

		# "05":
		# output: "out/s05.wav"
		# description: "Car with AB microphone pickup, no overlap between the talkers, car noise."
		# input: ["items_mono/untrimmed/m2s15a_Talker2.wav", "items_mono/untrimmed/m1s4a_Talker1.wav"]
		# IR: ["IRs/IR_do_p04_e_07_01_FOA.wav", "IRs/IR_do_p04_e_01_01_FOA.wav"]
		# shift: [0.0, -1.0]
		# background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav"
		# background_level: -46

		# "06":
		# output: "out/s06.wav"
		# description: "Car with AB microphone pickup, no overlap between the talkers."
		# input: ["items_mono/untrimmed/m3s8a_Talker2.wav", "items_mono/untrimmed/m4s13a_Talker1.wav"]
		# IR: ["IRs/IR_do_p04_e_03_01_FOA.wav", "IRs/IR_do_p04_e_01_01_FOA.wav"]
		# shift: [0.0, -1.0]

		# "07":
		# output: "out/s07.wav"
		# description: "Preliminary: Car with AB microphone pickup, no overlap between the talkers."
		# input: ["items_mono/untrimmed/f1s20a_Talker2.wav", "items_mono/untrimmed/f5s15b_Talker1.wav"]
		# IR: ["IRs/IR_do_p04_e_02_01_FOA.wav", "IRs/IR_do_p04_e_07_01_FOA.wav"]
		# shift: [0.0, -1.0]

		# "08":
		# output: "out/s08.wav"
		# description: "Car with AB microphone pickup, overlap between the talkers."
		# input: ["items_mono/untrimmed/m2s6b_Talker2.wav", "items_mono/untrimmed/f5s14a_Talker1.wav"]
		# IR: ["IRs/IR_do_p04_e_08_01_FOA.wav", "IRs/IR_do_p04_e_04_01_FOA.wav"]
		# shift: [0.0, +1.0]

ivas_processing_scripts/generation/generate_sba_items.py

+60 −7

Original line number	Diff line number	Diff line
		@@ -339,13 +339,6 @@ def generate_sba_scene(
		y.audio, y.fs, limits=[-preamble, -postamble], samples=True
		)

		# add random noise
		if "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise:
		# create uniformly distributed noise between -4 and 4
		np.random.seed(SEED_RANDOM_NOISE)
		noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float")
		y.audio += noise

		# adjust the length of the output signal
		if "duration" in cfg.__dict__:
		# trim the output signal such that the total duration is X seconds
		@@ -366,6 +359,66 @@ def generate_sba_scene(
		logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS")
		y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="BINAURAL")

		# add background noise in FOA/HOA2/HOA3 format
		if "background" in scene.keys():
		# check if [] are used in the background noise file name
		if isinstance(scene["background"], list):
		# if so, use the first element
		background_filename = scene["background"][0]
		else:
		background_filename = scene["background"]

		# read the background noise file
		background_filename = Path(scene["background"]).parent / (
		cfg.use_input_prefix + Path(scene["background"]).name
		)
		logger.info(f"-- Adding background noise from {background_filename}")
		background = audio.fromfile(cfg.format, background_filename)

		# resample to the target fs if necessary
		if background.fs != cfg.fs:
		logger.warning(
		f"Warning: Sample rate of the background noise is {background.fs} Hz and needs to be resampled to {cfg.fs}!"
		)
		resampled_audio = audioarray.resample(background.audio, background.fs, cfg.fs)
		background.audio = resampled_audio
		background.fs = cfg.fs

		# adjust the length of the background noise signal
		if len(background.audio) != len(y.audio):
		background.audio = audioarray.trim(
		background.audio, background.fs, limits=[0, len(background.audio) - len(y.audio)], samples=True
		)

		# adjust the loudness of the background noise signal
		if "background_level" in scene.keys():
		logger.info(
		f"-- Rescaling background noise to target loudness: {scene['background_level']} LUFS"
		)

		# check if [] are used in the background level
		if isinstance(scene["background_level"], list):
		# if so, use the first element
		scene["background_level"] = scene["background_level"][0]

		# convert to float if the background level was entered in string format
		if not isinstance(scene["background_level"], (int, float)):
		scene["background_level"] = float(scene["background_level"])
		else:
		logger.warning(
		"-- Warning: No target loudness for background noise specified, using default value of -26 LUFS"
		)
		scene["background_level"] = -26
		background.audio, _ = loudness_norm(background, scene['background_level'], loudness_format="STEREO")

		# add the background noise to the output signal
		y.audio += background.audio
		elif "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise:
		# create uniformly distributed noise between -4 and 4
		np.random.seed(SEED_RANDOM_NOISE)
		noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float")
		y.audio += noise

		# apply fade-in and fade-out
		if "fade_in_out" in cfg.__dict__ and cfg.fade_in_out > 0:
		logger.info(f"-- Applying fade-in and fade-out with {cfg.fade_in_out} seconds")

ivas_processing_scripts/generation/generate_stereo_items.py

+1 −1

File changed.

Contains only whitespace changes.