support addition of custom background noise file for each generated item (cd2f9f63) · Commits · IVAS Codec Public Collaboration / IVAS Processing Scripts

examples/ITEM_GENERATION_STEREO.yml

+13 −1

Original line number	Diff line number	Diff line
		@@ -95,6 +95,8 @@ provider: "g"
		### azimuth: azimuth in the range [-180,180]; positive values point to the left
		### elevation: elevation in the range [-90,90]; positive values indicate up
		### shift: time adjustment of the input signal (negative value delays the signal)
		### background: background noise filename (if used, the 'add_low_level_random_noise' parameter is ignored)
		### background_level: normalized background noise loudness to X dB LKFS
		###
		### Note 0: you can use relative paths in filenames (the program assumes that the root directory is the parent directory of the ivas_processing_scripts subfolder)
		### Note 1: use brackets [val1, val2, ...] when specifying multiple values
		@@ -109,6 +111,8 @@ scenes:
		input: ["items_mono/untrimmed/f1s4b_Talker2.wav", "items_mono/untrimmed/f2s1a_Talker1.wav"]
		IR: ["IRs/Car_TalkPos1_Stereo_M5_SinSweep_2chn.wav", "IRs/Car_TalkPos2_Stereo_M5_SinSweep_2chn.wav"]
		shift: [0.0, -1.0]
		background: "items_mono/347224__rayjensen__ambience-in-car_stereo.wav"
		background_level: -66

		"02":
		output: "out/a1s02.wav"
		@@ -116,6 +120,8 @@ scenes:
		input: ["items_mono/untrimmed/f1s6a_Talker2.wav", "items_mono/untrimmed/f2s3b_Talker1.wav"]
		IR: ["IRs/Car_TalkPos3_Stereo_M5_SinSweep_2chn.wav", "IRs/Car_TalkPos4_Stereo_M5_SinSweep_2chn.wav"]
		shift: [0.0, +1.0]
		background: "items_mono/347224__rayjensen__ambience-in-car_stereo.wav"
		background_level: -66

		"03":
		output: "out/a1s03.wav"
		@@ -123,6 +129,8 @@ scenes:
		input: ["items_mono/untrimmed/f3s3a_Talker2.wav", "items_mono/untrimmed/f3s10b_Talker2.wav"]
		IR: ["IRs/Car_TalkPos1_Stereo_M5_SinSweep_2chn.wav", "IRs/Car_TalkPos1_Stereo_M5_SinSweep_2chn.wav"]
		shift: [0.0, -1.0]
		background: "items_mono/347224__rayjensen__ambience-in-car_stereo.wav"
		background_level: -66

		"04":
		output: "out/a1s04.wav"
		@@ -130,6 +138,8 @@ scenes:
		input: ["items_mono/untrimmed/f2s7b_Talker1.wav", "items_mono/untrimmed/f5s15a_Talker1.wav"]
		IR: ["IRs/FreeField_IR_Python_AB_20cm_Pos1.wav", "IRs/FreeField_IR_Python_AB_20cm_Pos2.wav"]
		shift: [0.0, -1.0]
		background: "items_mono/347224__rayjensen__ambience-in-car_stereo.wav"
		background_level: -66

		"05":
		output: "out/a1s05.wav"
		@@ -137,6 +147,8 @@ scenes:
		input: ["items_mono/untrimmed/m2s15a_Talker2.wav", "items_mono/untrimmed/m1s4a_Talker1.wav"]
		IR: ["IRs/FreeField_IR_Python_AB_20cm_Pos3.wav", "IRs/FreeField_IR_Python_AB_20cm_Pos4.wav"]
		shift: [0.0, -1.0]
		background: "items_mono/347224__rayjensen__ambience-in-car_stereo.wav"
		background_level: -66

		"06":
		output: "out/a1s06.wav"

ivas_processing_scripts/generation/generate_stereo_items.py

+60 −7

Original line number	Diff line number	Diff line
		@@ -340,13 +340,6 @@ def generate_stereo_scene(
		y.audio, y.fs, limits=[-preamble, -postamble], samples=True
		)

		# add random noise
		if "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise:
		# create uniformly distributed noise between -4 and 4
		np.random.seed(SEED_RANDOM_NOISE)
		noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float")
		y.audio += noise

		# adjust the length of the output signal
		if "duration" in cfg.__dict__:
		# trim the output signal such that the total duration is X seconds
		@@ -367,6 +360,66 @@ def generate_stereo_scene(
		logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS")
		y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="STEREO")

		# add background noise in STEREO format
		if "background" in scene.keys():
		# check if [] are used in the background noise file name
		if isinstance(scene["background"], list):
		# if so, use the first element
		background_filename = scene["background"][0]
		else:
		background_filename = scene["background"]

		# read the background noise file
		background_filename = Path(scene["background"]).parent / (
		cfg.use_input_prefix + Path(scene["background"]).name
		)
		logger.info(f"-- Adding background noise from {background_filename}")
		background = audio.fromfile("STEREO", background_filename)

		# resample to the target fs if necessary
		if background.fs != cfg.fs:
		logger.warning(
		f"Warning: Sample rate of the background noise is {background.fs} Hz and needs to be resampled to {cfg.fs}!"
		)
		resampled_audio = audioarray.resample(background.audio, background.fs, cfg.fs)
		background.audio = resampled_audio
		background.fs = cfg.fs

		# adjust the length of the background noise signal
		if len(background.audio) != len(y.audio):
		background.audio = audioarray.trim(
		background.audio, background.fs, limits=[0, len(background.audio) - len(y.audio)], samples=True
		)

		# adjust the loudness of the background noise signal
		if "background_level" in scene.keys():
		logger.info(
		f"-- Rescaling background noise to target loudness: {scene['background_level']} LUFS"
		)

		# check if [] are used in the background level
		if isinstance(scene["background_level"], list):
		# if so, use the first element
		scene["background_level"] = scene["background_level"][0]

		# convert to float if the background level was entered in string format
		if not isinstance(scene["background_level"], (int, float)):
		scene["background_level"] = float(scene["background_level"])
		else:
		logger.warning(
		"-- Warning: No target loudness for background noise specified, using default value of -26 LUFS"
		)
		scene["background_level"] = -26
		background.audio, _ = loudness_norm(background, scene['background_level'], loudness_format="STEREO")

		# add the background noise to the output signal
		y.audio += background.audio
		elif "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise:
		# create uniformly distributed noise between -4 and 4
		np.random.seed(SEED_RANDOM_NOISE)
		noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float")
		y.audio += noise

		# apply fade-in and fade-out
		if "fade_in_out" in cfg.__dict__ and cfg.fade_in_out > 0:
		logger.info(f"-- Applying fade-in and fade-out with {cfg.fade_in_out} seconds")