From cd2f9f636b58bd76506dfa3663ba5483e2f95018 Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Thu, 24 Jul 2025 18:16:10 +0200 Subject: [PATCH 1/6] support addition of custom background noise file for each generated item --- examples/ITEM_GENERATION_STEREO.yml | 14 +++- .../generation/generate_stereo_items.py | 67 +++++++++++++++++-- 2 files changed, 73 insertions(+), 8 deletions(-) diff --git a/examples/ITEM_GENERATION_STEREO.yml b/examples/ITEM_GENERATION_STEREO.yml index 14731b4b..48c6aa61 100644 --- a/examples/ITEM_GENERATION_STEREO.yml +++ b/examples/ITEM_GENERATION_STEREO.yml @@ -95,6 +95,8 @@ provider: "g" ### azimuth: azimuth in the range [-180,180]; positive values point to the left ### elevation: elevation in the range [-90,90]; positive values indicate up ### shift: time adjustment of the input signal (negative value delays the signal) +### background: background noise filename (if used, the 'add_low_level_random_noise' parameter is ignored) +### background_level: normalized background noise loudness to X dB LKFS ### ### Note 0: you can use relative paths in filenames (the program assumes that the root directory is the parent directory of the ivas_processing_scripts subfolder) ### Note 1: use brackets [val1, val2, ...] when specifying multiple values @@ -109,6 +111,8 @@ scenes: input: ["items_mono/untrimmed/f1s4b_Talker2.wav", "items_mono/untrimmed/f2s1a_Talker1.wav"] IR: ["IRs/Car_TalkPos1_Stereo_M5_SinSweep_2chn.wav", "IRs/Car_TalkPos2_Stereo_M5_SinSweep_2chn.wav"] shift: [0.0, -1.0] + background: "items_mono/347224__rayjensen__ambience-in-car_stereo.wav" + background_level: -66 "02": output: "out/a1s02.wav" @@ -116,6 +120,8 @@ scenes: input: ["items_mono/untrimmed/f1s6a_Talker2.wav", "items_mono/untrimmed/f2s3b_Talker1.wav"] IR: ["IRs/Car_TalkPos3_Stereo_M5_SinSweep_2chn.wav", "IRs/Car_TalkPos4_Stereo_M5_SinSweep_2chn.wav"] shift: [0.0, +1.0] + background: "items_mono/347224__rayjensen__ambience-in-car_stereo.wav" + background_level: -66 "03": output: "out/a1s03.wav" @@ -123,6 +129,8 @@ scenes: input: ["items_mono/untrimmed/f3s3a_Talker2.wav", "items_mono/untrimmed/f3s10b_Talker2.wav"] IR: ["IRs/Car_TalkPos1_Stereo_M5_SinSweep_2chn.wav", "IRs/Car_TalkPos1_Stereo_M5_SinSweep_2chn.wav"] shift: [0.0, -1.0] + background: "items_mono/347224__rayjensen__ambience-in-car_stereo.wav" + background_level: -66 "04": output: "out/a1s04.wav" @@ -130,13 +138,17 @@ scenes: input: ["items_mono/untrimmed/f2s7b_Talker1.wav", "items_mono/untrimmed/f5s15a_Talker1.wav"] IR: ["IRs/FreeField_IR_Python_AB_20cm_Pos1.wav", "IRs/FreeField_IR_Python_AB_20cm_Pos2.wav"] shift: [0.0, -1.0] - + background: "items_mono/347224__rayjensen__ambience-in-car_stereo.wav" + background_level: -66 + "05": output: "out/a1s05.wav" description: "Car with AB microphone pickup, no overlap between the talkers, car noise." input: ["items_mono/untrimmed/m2s15a_Talker2.wav", "items_mono/untrimmed/m1s4a_Talker1.wav"] IR: ["IRs/FreeField_IR_Python_AB_20cm_Pos3.wav", "IRs/FreeField_IR_Python_AB_20cm_Pos4.wav"] shift: [0.0, -1.0] + background: "items_mono/347224__rayjensen__ambience-in-car_stereo.wav" + background_level: -66 "06": output: "out/a1s06.wav" diff --git a/ivas_processing_scripts/generation/generate_stereo_items.py b/ivas_processing_scripts/generation/generate_stereo_items.py index 8dc6d50c..55be6e85 100644 --- a/ivas_processing_scripts/generation/generate_stereo_items.py +++ b/ivas_processing_scripts/generation/generate_stereo_items.py @@ -340,13 +340,6 @@ def generate_stereo_scene( y.audio, y.fs, limits=[-preamble, -postamble], samples=True ) - # add random noise - if "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise: - # create uniformly distributed noise between -4 and 4 - np.random.seed(SEED_RANDOM_NOISE) - noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") - y.audio += noise - # adjust the length of the output signal if "duration" in cfg.__dict__: # trim the output signal such that the total duration is X seconds @@ -367,6 +360,66 @@ def generate_stereo_scene( logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS") y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="STEREO") + # add background noise in STEREO format + if "background" in scene.keys(): + # check if [] are used in the background noise file name + if isinstance(scene["background"], list): + # if so, use the first element + background_filename = scene["background"][0] + else: + background_filename = scene["background"] + + # read the background noise file + background_filename = Path(scene["background"]).parent / ( + cfg.use_input_prefix + Path(scene["background"]).name + ) + logger.info(f"-- Adding background noise from {background_filename}") + background = audio.fromfile("STEREO", background_filename) + + # resample to the target fs if necessary + if background.fs != cfg.fs: + logger.warning( + f"Warning: Sample rate of the background noise is {background.fs} Hz and needs to be resampled to {cfg.fs}!" + ) + resampled_audio = audioarray.resample(background.audio, background.fs, cfg.fs) + background.audio = resampled_audio + background.fs = cfg.fs + + # adjust the length of the background noise signal + if len(background.audio) != len(y.audio): + background.audio = audioarray.trim( + background.audio, background.fs, limits=[0, len(background.audio) - len(y.audio)], samples=True + ) + + # adjust the loudness of the background noise signal + if "background_level" in scene.keys(): + logger.info( + f"-- Rescaling background noise to target loudness: {scene['background_level']} LUFS" + ) + + # check if [] are used in the background level + if isinstance(scene["background_level"], list): + # if so, use the first element + scene["background_level"] = scene["background_level"][0] + + # convert to float if the background level was entered in string format + if not isinstance(scene["background_level"], (int, float)): + scene["background_level"] = float(scene["background_level"]) + else: + logger.warning( + "-- Warning: No target loudness for background noise specified, using default value of -26 LUFS" + ) + scene["background_level"] = -26 + background.audio, _ = loudness_norm(background, scene['background_level'], loudness_format="STEREO") + + # add the background noise to the output signal + y.audio += background.audio + elif "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise: + # create uniformly distributed noise between -4 and 4 + np.random.seed(SEED_RANDOM_NOISE) + noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") + y.audio += noise + # apply fade-in and fade-out if "fade_in_out" in cfg.__dict__ and cfg.fade_in_out > 0: logger.info(f"-- Applying fade-in and fade-out with {cfg.fade_in_out} seconds") -- GitLab From dc3a06caf779a9789ad55e2c990ce53ee52a9ef5 Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Thu, 24 Jul 2025 18:34:13 +0200 Subject: [PATCH 2/6] fix incorrect shifting of audio signal when creating overlap between items (SBA) --- examples/ITEM_GENERATION_FOA.yml | 96 ++++++++++--------- .../generation/generate_sba_items.py | 67 +++++++++++-- .../generation/generate_stereo_items.py | 2 +- 3 files changed, 114 insertions(+), 51 deletions(-) diff --git a/examples/ITEM_GENERATION_FOA.yml b/examples/ITEM_GENERATION_FOA.yml index 2287af4c..46cbe845 100644 --- a/examples/ITEM_GENERATION_FOA.yml +++ b/examples/ITEM_GENERATION_FOA.yml @@ -95,6 +95,8 @@ use_output_prefix: "leee" ### azimuth: azimuth in the range [-180,180]; positive values point to the left ### elevation: elevation in the range [-90,90]; positive values indicate up ### shift: time adjustment of the input signal (negative value delays the signal) +### background: background noise filename (if used, the 'add_low_level_random_noise' parameter is ignored) +### background_level: normalized background noise loudness to X dB LKFS ### ### Note 0: you can use relative paths in filenames (the program assumes that the root directory is the parent directory of the ivas_processing_scripts subfolder) ### Note 1: use brackets [val1, val2, ...] when specifying multiple values @@ -109,52 +111,60 @@ scenes: input: ["items_mono/untrimmed/f1s4b_Talker2.wav", "items_mono/untrimmed/f2s1a_Talker1.wav"] IR: ["IRs/IR_do_p04_e_01_01_FOA.wav", "IRs/IR_do_p04_e_02_01_FOA.wav"] shift: [0.0, -1.0] + background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav" + background_level: -46 + + # "02": + # output: "out/s02.wav" + # description: "Car with AB microphone pickup, overlap between the talkers, car noise." + # input: ["items_mono/untrimmed/f1s6a_Talker2.wav", "items_mono/untrimmed/f2s3b_Talker1.wav"] + # IR: ["IRs/IR_do_p04_e_03_01_FOA.wav", "IRs/IR_do_p04_e_04_01_FOA.wav"] + # shift: [0.0, +1.0] + # background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav" + # background_level: -46 - "02": - output: "out/s02.wav" - description: "Car with AB microphone pickup, overlap between the talkers, car noise." - input: ["items_mono/untrimmed/f1s6a_Talker2.wav", "items_mono/untrimmed/f2s3b_Talker1.wav"] - IR: ["IRs/IR_do_p04_e_03_01_FOA.wav", "IRs/IR_do_p04_e_04_01_FOA.wav"] - shift: [0.0, +1.0] + # "03": + # output: "out/s03.wav" + # description: "Car with AB microphone pickup, no overlap between the talkers, car noise." + # input: ["items_mono/untrimmed/f3s3a_Talker2.wav", "items_mono/untrimmed/f3s10b_Talker2.wav"] + # IR: ["IRs/IR_do_p04_e_05_01_FOA.wav", "IRs/IR_do_p04_e_06_01_FOA.wav"] + # shift: [0.0, -1.0] - "03": - output: "out/s03.wav" - description: "Car with AB microphone pickup, no overlap between the talkers, car noise." - input: ["items_mono/untrimmed/f3s3a_Talker2.wav", "items_mono/untrimmed/f3s10b_Talker2.wav"] - IR: ["IRs/IR_do_p04_e_05_01_FOA.wav", "IRs/IR_do_p04_e_06_01_FOA.wav"] - shift: [0.0, -1.0] + # "04": + # output: "out/s04.wav" + # description: "Car with AB microphone pickup, no overlap between the talkers, car noise." + # input: ["items_mono/untrimmed/f2s7b_Talker1.wav", "items_mono/untrimmed/f5s15a_Talker1.wav"] + # IR: ["IRs/IR_do_p04_e_07_01_FOA.wav", "IRs/IR_do_p04_e_08_01_FOA.wav"] + # shift: [0.0, -1.0] + # background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav" + # background_level: -46 - "04": - output: "out/s04.wav" - description: "Car with AB microphone pickup, no overlap between the talkers, car noise." - input: ["items_mono/untrimmed/f2s7b_Talker1.wav", "items_mono/untrimmed/f5s15a_Talker1.wav"] - IR: ["IRs/IR_do_p04_e_07_01_FOA.wav", "IRs/IR_do_p04_e_08_01_FOA.wav"] - shift: [0.0, -1.0] + # "05": + # output: "out/s05.wav" + # description: "Car with AB microphone pickup, no overlap between the talkers, car noise." + # input: ["items_mono/untrimmed/m2s15a_Talker2.wav", "items_mono/untrimmed/m1s4a_Talker1.wav"] + # IR: ["IRs/IR_do_p04_e_07_01_FOA.wav", "IRs/IR_do_p04_e_01_01_FOA.wav"] + # shift: [0.0, -1.0] + # background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav" + # background_level: -46 - "05": - output: "out/s05.wav" - description: "Car with AB microphone pickup, no overlap between the talkers, car noise." - input: ["items_mono/untrimmed/m2s15a_Talker2.wav", "items_mono/untrimmed/m1s4a_Talker1.wav"] - IR: ["IRs/IR_do_p04_e_07_01_FOA.wav", "IRs/IR_do_p04_e_01_01_FOA.wav"] - shift: [0.0, -1.0] - - "06": - output: "out/s06.wav" - description: "Car with AB microphone pickup, no overlap between the talkers." - input: ["items_mono/untrimmed/m3s8a_Talker2.wav", "items_mono/untrimmed/m4s13a_Talker1.wav"] - IR: ["IRs/IR_do_p04_e_03_01_FOA.wav", "IRs/IR_do_p04_e_01_01_FOA.wav"] - shift: [0.0, -1.0] + # "06": + # output: "out/s06.wav" + # description: "Car with AB microphone pickup, no overlap between the talkers." + # input: ["items_mono/untrimmed/m3s8a_Talker2.wav", "items_mono/untrimmed/m4s13a_Talker1.wav"] + # IR: ["IRs/IR_do_p04_e_03_01_FOA.wav", "IRs/IR_do_p04_e_01_01_FOA.wav"] + # shift: [0.0, -1.0] - "07": - output: "out/s07.wav" - description: "Preliminary: Car with AB microphone pickup, no overlap between the talkers." - input: ["items_mono/untrimmed/f1s20a_Talker2.wav", "items_mono/untrimmed/f5s15b_Talker1.wav"] - IR: ["IRs/IR_do_p04_e_02_01_FOA.wav", "IRs/IR_do_p04_e_07_01_FOA.wav"] - shift: [0.0, -1.0] + # "07": + # output: "out/s07.wav" + # description: "Preliminary: Car with AB microphone pickup, no overlap between the talkers." + # input: ["items_mono/untrimmed/f1s20a_Talker2.wav", "items_mono/untrimmed/f5s15b_Talker1.wav"] + # IR: ["IRs/IR_do_p04_e_02_01_FOA.wav", "IRs/IR_do_p04_e_07_01_FOA.wav"] + # shift: [0.0, -1.0] - "08": - output: "out/s08.wav" - description: "Car with AB microphone pickup, overlap between the talkers." - input: ["items_mono/untrimmed/m2s6b_Talker2.wav", "items_mono/untrimmed/f5s14a_Talker1.wav"] - IR: ["IRs/IR_do_p04_e_08_01_FOA.wav", "IRs/IR_do_p04_e_04_01_FOA.wav"] - shift: [0.0, +1.0] + # "08": + # output: "out/s08.wav" + # description: "Car with AB microphone pickup, overlap between the talkers." + # input: ["items_mono/untrimmed/m2s6b_Talker2.wav", "items_mono/untrimmed/f5s14a_Talker1.wav"] + # IR: ["IRs/IR_do_p04_e_08_01_FOA.wav", "IRs/IR_do_p04_e_04_01_FOA.wav"] + # shift: [0.0, +1.0] diff --git a/ivas_processing_scripts/generation/generate_sba_items.py b/ivas_processing_scripts/generation/generate_sba_items.py index 1c5d5ba6..dbda385f 100644 --- a/ivas_processing_scripts/generation/generate_sba_items.py +++ b/ivas_processing_scripts/generation/generate_sba_items.py @@ -339,13 +339,6 @@ def generate_sba_scene( y.audio, y.fs, limits=[-preamble, -postamble], samples=True ) - # add random noise - if "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise: - # create uniformly distributed noise between -4 and 4 - np.random.seed(SEED_RANDOM_NOISE) - noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") - y.audio += noise - # adjust the length of the output signal if "duration" in cfg.__dict__: # trim the output signal such that the total duration is X seconds @@ -366,6 +359,66 @@ def generate_sba_scene( logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS") y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="BINAURAL") + # add background noise in FOA/HOA2/HOA3 format + if "background" in scene.keys(): + # check if [] are used in the background noise file name + if isinstance(scene["background"], list): + # if so, use the first element + background_filename = scene["background"][0] + else: + background_filename = scene["background"] + + # read the background noise file + background_filename = Path(scene["background"]).parent / ( + cfg.use_input_prefix + Path(scene["background"]).name + ) + logger.info(f"-- Adding background noise from {background_filename}") + background = audio.fromfile(cfg.format, background_filename) + + # resample to the target fs if necessary + if background.fs != cfg.fs: + logger.warning( + f"Warning: Sample rate of the background noise is {background.fs} Hz and needs to be resampled to {cfg.fs}!" + ) + resampled_audio = audioarray.resample(background.audio, background.fs, cfg.fs) + background.audio = resampled_audio + background.fs = cfg.fs + + # adjust the length of the background noise signal + if len(background.audio) != len(y.audio): + background.audio = audioarray.trim( + background.audio, background.fs, limits=[0, len(background.audio) - len(y.audio)], samples=True + ) + + # adjust the loudness of the background noise signal + if "background_level" in scene.keys(): + logger.info( + f"-- Rescaling background noise to target loudness: {scene['background_level']} LUFS" + ) + + # check if [] are used in the background level + if isinstance(scene["background_level"], list): + # if so, use the first element + scene["background_level"] = scene["background_level"][0] + + # convert to float if the background level was entered in string format + if not isinstance(scene["background_level"], (int, float)): + scene["background_level"] = float(scene["background_level"]) + else: + logger.warning( + "-- Warning: No target loudness for background noise specified, using default value of -26 LUFS" + ) + scene["background_level"] = -26 + background.audio, _ = loudness_norm(background, scene['background_level'], loudness_format="STEREO") + + # add the background noise to the output signal + y.audio += background.audio + elif "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise: + # create uniformly distributed noise between -4 and 4 + np.random.seed(SEED_RANDOM_NOISE) + noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") + y.audio += noise + # apply fade-in and fade-out if "fade_in_out" in cfg.__dict__ and cfg.fade_in_out > 0: logger.info(f"-- Applying fade-in and fade-out with {cfg.fade_in_out} seconds") diff --git a/ivas_processing_scripts/generation/generate_stereo_items.py b/ivas_processing_scripts/generation/generate_stereo_items.py index 55be6e85..a825a6c6 100644 --- a/ivas_processing_scripts/generation/generate_stereo_items.py +++ b/ivas_processing_scripts/generation/generate_stereo_items.py @@ -418,7 +418,7 @@ def generate_stereo_scene( # create uniformly distributed noise between -4 and 4 np.random.seed(SEED_RANDOM_NOISE) noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") - y.audio += noise + y.audio += noise # apply fade-in and fade-out if "fade_in_out" in cfg.__dict__ and cfg.fade_in_out > 0: -- GitLab From 901f6468b301a5cefb995a66e3e854f3e63250c5 Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Thu, 24 Jul 2025 18:36:34 +0200 Subject: [PATCH 3/6] change LUFS to LKFS --- ivas_processing_scripts/generation/generate_ismN_items.py | 4 ++-- .../generation/generate_omasa_items.py | 4 ++-- ivas_processing_scripts/generation/generate_osba_items.py | 4 ++-- ivas_processing_scripts/generation/generate_sba_items.py | 8 ++++---- .../generation/generate_stereo_items.py | 8 ++++---- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/ivas_processing_scripts/generation/generate_ismN_items.py b/ivas_processing_scripts/generation/generate_ismN_items.py index dcf76cad..bd92367e 100644 --- a/ivas_processing_scripts/generation/generate_ismN_items.py +++ b/ivas_processing_scripts/generation/generate_ismN_items.py @@ -253,7 +253,7 @@ def generate_ismN_scene( level = -26 logger.info( - f"-- Encoding {source_file} at position(s) {source_azi},{source_ele} at {level} LUFS with shift of {source_shift_in_seconds} seconds" + f"-- Encoding {source_file} at position(s) {source_azi},{source_ele} at {level} LKFS with shift of {source_shift_in_seconds} seconds" ) # read source file @@ -431,7 +431,7 @@ def generate_ismN_scene( # adjust the loudness of the output signal if "loudness" in cfg.__dict__: - logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS") + logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LKFS") y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="BINAURAL") # apply fade-in and fade-out diff --git a/ivas_processing_scripts/generation/generate_omasa_items.py b/ivas_processing_scripts/generation/generate_omasa_items.py index ed48c37b..603a3593 100644 --- a/ivas_processing_scripts/generation/generate_omasa_items.py +++ b/ivas_processing_scripts/generation/generate_omasa_items.py @@ -248,7 +248,7 @@ def generate_OMASA_scene( level = -26 logger.info( - f"-- Encoding {source_file} at position(s) {source_azi},{source_ele} at {level} LUFS with shift of {source_shift_in_seconds} seconds" + f"-- Encoding {source_file} at position(s) {source_azi},{source_ele} at {level} LKFS with shift of {source_shift_in_seconds} seconds" ) # get the number of channels from the .wav file header @@ -471,7 +471,7 @@ def generate_OMASA_scene( # adjust the loudness of the output signal if "loudness" in cfg.__dict__: - logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS") + logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LKFS") y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="BINAURAL") # apply fade-in and fade-out diff --git a/ivas_processing_scripts/generation/generate_osba_items.py b/ivas_processing_scripts/generation/generate_osba_items.py index 815be0b5..d2a71777 100644 --- a/ivas_processing_scripts/generation/generate_osba_items.py +++ b/ivas_processing_scripts/generation/generate_osba_items.py @@ -239,7 +239,7 @@ def generate_OSBA_scene( level = -26 logger.info( - f"-- Encoding {source_file} at position(s) {source_azi},{source_ele} at {level} LUFS with shift of {source_shift_in_seconds} seconds" + f"-- Encoding {source_file} at position(s) {source_azi},{source_ele} at {level} LKFS with shift of {source_shift_in_seconds} seconds" ) # get the number of channels from the .wav file header @@ -449,7 +449,7 @@ def generate_OSBA_scene( # adjust the loudness of the output signal if "loudness" in cfg.__dict__: - logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS") + logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LKFS") y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="BINAURAL") # apply fade-in and fade-out diff --git a/ivas_processing_scripts/generation/generate_sba_items.py b/ivas_processing_scripts/generation/generate_sba_items.py index dbda385f..0aab6994 100644 --- a/ivas_processing_scripts/generation/generate_sba_items.py +++ b/ivas_processing_scripts/generation/generate_sba_items.py @@ -242,7 +242,7 @@ def generate_sba_scene( level = -26 logger.info( - f"-- Convolving {source_file} with {IR_file} at {level} LUFS with shift of {source_shift_in_seconds} seconds" + f"-- Convolving {source_file} with {IR_file} at {level} LKFS with shift of {source_shift_in_seconds} seconds" ) # read source file @@ -356,7 +356,7 @@ def generate_sba_scene( # adjust the loudness of the output signal if "loudness" in cfg.__dict__: - logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS") + logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LKFS") y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="BINAURAL") # add background noise in FOA/HOA2/HOA3 format @@ -393,7 +393,7 @@ def generate_sba_scene( # adjust the loudness of the background noise signal if "background_level" in scene.keys(): logger.info( - f"-- Rescaling background noise to target loudness: {scene['background_level']} LUFS" + f"-- Rescaling background noise to target loudness: {scene['background_level']} LKFS" ) # check if [] are used in the background level @@ -406,7 +406,7 @@ def generate_sba_scene( scene["background_level"] = float(scene["background_level"]) else: logger.warning( - "-- Warning: No target loudness for background noise specified, using default value of -26 LUFS" + "-- Warning: No target loudness for background noise specified, using default value of -26 LKFS" ) scene["background_level"] = -26 background.audio, _ = loudness_norm(background, scene['background_level'], loudness_format="STEREO") diff --git a/ivas_processing_scripts/generation/generate_stereo_items.py b/ivas_processing_scripts/generation/generate_stereo_items.py index a825a6c6..648c02d0 100644 --- a/ivas_processing_scripts/generation/generate_stereo_items.py +++ b/ivas_processing_scripts/generation/generate_stereo_items.py @@ -248,7 +248,7 @@ def generate_stereo_scene( level = -26 logger.info( - f"-- Convolving {source_file} with {IR_file} at {level} LUFS with shift of {source_shift_in_seconds} seconds" + f"-- Convolving {source_file} with {IR_file} at {level} LKFS with shift of {source_shift_in_seconds} seconds" ) # read source file @@ -357,7 +357,7 @@ def generate_stereo_scene( # adjust the loudness of the output signal if "loudness" in cfg.__dict__: - logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS") + logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LKFS") y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="STEREO") # add background noise in STEREO format @@ -394,7 +394,7 @@ def generate_stereo_scene( # adjust the loudness of the background noise signal if "background_level" in scene.keys(): logger.info( - f"-- Rescaling background noise to target loudness: {scene['background_level']} LUFS" + f"-- Rescaling background noise to target loudness: {scene['background_level']} LKFS" ) # check if [] are used in the background level @@ -407,7 +407,7 @@ def generate_stereo_scene( scene["background_level"] = float(scene["background_level"]) else: logger.warning( - "-- Warning: No target loudness for background noise specified, using default value of -26 LUFS" + "-- Warning: No target loudness for background noise specified, using default value of -26 LKFS" ) scene["background_level"] = -26 background.audio, _ = loudness_norm(background, scene['background_level'], loudness_format="STEREO") -- GitLab From cf5cc8f3c133345de4e2535000453406881ea23c Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Thu, 24 Jul 2025 18:52:08 +0200 Subject: [PATCH 4/6] update readme file --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index f5ebd10e..e508ad3a 100755 --- a/README.md +++ b/README.md @@ -77,6 +77,8 @@ Each entry under `scenes:` describes one test item, specifying: - `azimuth` / `elevation`: spatial placement (°) - `level`: loudness in dB - `shift`: timing offsets in seconds +- `background`: background noise file (applicable to STEREO and SBA only) +- `background_level`: level of the background noise (applicable to STEREO and SBA only) Dynamic positioning (e.g., `"-20:1.0:360"`) means the source will move over time, stepping every 20 ms. @@ -84,6 +86,8 @@ The total duration of the output signal can be controlled using the `duration` f Start by running a single scene to verify settings. Output includes both audio and optional metadata files. You can enable multiprocessing by setting `multiprocessing: true`. +The addition of custom background noise at specific level is supported for the STEREO and SBA formats only. For ISMs it's not applicable. For OMASA and OSBA formats, it is expected that the backround noise is provided in the FOA/HOA2/HOA3 format as the first item in the `input` list. + ### Item processing The input has to be in the folder `experiments/selection/P800-{X}/proc_input_{l}`. If item generation is performed previous to this step, the corresponding files are already in the right folder. -- GitLab From 08118ad0e75b280d46fb778612697da4ae28cb89 Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Thu, 24 Jul 2025 18:55:33 +0200 Subject: [PATCH 5/6] fix formatting --- .../generation/generate_sba_items.py | 17 +++++++++++++---- .../generation/generate_stereo_items.py | 17 +++++++++++++---- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/ivas_processing_scripts/generation/generate_sba_items.py b/ivas_processing_scripts/generation/generate_sba_items.py index 0aab6994..5b3ee6f2 100644 --- a/ivas_processing_scripts/generation/generate_sba_items.py +++ b/ivas_processing_scripts/generation/generate_sba_items.py @@ -380,14 +380,19 @@ def generate_sba_scene( logger.warning( f"Warning: Sample rate of the background noise is {background.fs} Hz and needs to be resampled to {cfg.fs}!" ) - resampled_audio = audioarray.resample(background.audio, background.fs, cfg.fs) + resampled_audio = audioarray.resample( + background.audio, background.fs, cfg.fs + ) background.audio = resampled_audio background.fs = cfg.fs # adjust the length of the background noise signal if len(background.audio) != len(y.audio): background.audio = audioarray.trim( - background.audio, background.fs, limits=[0, len(background.audio) - len(y.audio)], samples=True + background.audio, + background.fs, + limits=[0, len(background.audio) - len(y.audio)], + samples=True, ) # adjust the loudness of the background noise signal @@ -409,11 +414,15 @@ def generate_sba_scene( "-- Warning: No target loudness for background noise specified, using default value of -26 LKFS" ) scene["background_level"] = -26 - background.audio, _ = loudness_norm(background, scene['background_level'], loudness_format="STEREO") + background.audio, _ = loudness_norm( + background, scene["background_level"], loudness_format="STEREO" + ) # add the background noise to the output signal y.audio += background.audio - elif "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise: + elif ( + "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise + ): # create uniformly distributed noise between -4 and 4 np.random.seed(SEED_RANDOM_NOISE) noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") diff --git a/ivas_processing_scripts/generation/generate_stereo_items.py b/ivas_processing_scripts/generation/generate_stereo_items.py index 648c02d0..2d6fd97f 100644 --- a/ivas_processing_scripts/generation/generate_stereo_items.py +++ b/ivas_processing_scripts/generation/generate_stereo_items.py @@ -381,14 +381,19 @@ def generate_stereo_scene( logger.warning( f"Warning: Sample rate of the background noise is {background.fs} Hz and needs to be resampled to {cfg.fs}!" ) - resampled_audio = audioarray.resample(background.audio, background.fs, cfg.fs) + resampled_audio = audioarray.resample( + background.audio, background.fs, cfg.fs + ) background.audio = resampled_audio background.fs = cfg.fs # adjust the length of the background noise signal if len(background.audio) != len(y.audio): background.audio = audioarray.trim( - background.audio, background.fs, limits=[0, len(background.audio) - len(y.audio)], samples=True + background.audio, + background.fs, + limits=[0, len(background.audio) - len(y.audio)], + samples=True, ) # adjust the loudness of the background noise signal @@ -410,11 +415,15 @@ def generate_stereo_scene( "-- Warning: No target loudness for background noise specified, using default value of -26 LKFS" ) scene["background_level"] = -26 - background.audio, _ = loudness_norm(background, scene['background_level'], loudness_format="STEREO") + background.audio, _ = loudness_norm( + background, scene["background_level"], loudness_format="STEREO" + ) # add the background noise to the output signal y.audio += background.audio - elif "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise: + elif ( + "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise + ): # create uniformly distributed noise between -4 and 4 np.random.seed(SEED_RANDOM_NOISE) noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") -- GitLab From 927393262cbc82dff8330609cb55c60cf9c561e8 Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Thu, 24 Jul 2025 19:03:04 +0200 Subject: [PATCH 6/6] use -rms option when adjusting the level of the background noise --- ivas_processing_scripts/generation/generate_sba_items.py | 2 +- ivas_processing_scripts/generation/generate_stereo_items.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ivas_processing_scripts/generation/generate_sba_items.py b/ivas_processing_scripts/generation/generate_sba_items.py index 5b3ee6f2..bdb40b1b 100644 --- a/ivas_processing_scripts/generation/generate_sba_items.py +++ b/ivas_processing_scripts/generation/generate_sba_items.py @@ -415,7 +415,7 @@ def generate_sba_scene( ) scene["background_level"] = -26 background.audio, _ = loudness_norm( - background, scene["background_level"], loudness_format="STEREO" + background, scene["background_level"], loudness_format="STEREO", rms=True ) # add the background noise to the output signal diff --git a/ivas_processing_scripts/generation/generate_stereo_items.py b/ivas_processing_scripts/generation/generate_stereo_items.py index 2d6fd97f..92a68906 100644 --- a/ivas_processing_scripts/generation/generate_stereo_items.py +++ b/ivas_processing_scripts/generation/generate_stereo_items.py @@ -416,7 +416,7 @@ def generate_stereo_scene( ) scene["background_level"] = -26 background.audio, _ = loudness_norm( - background, scene["background_level"], loudness_format="STEREO" + background, scene["background_level"], loudness_format="STEREO", rms=True ) # add the background noise to the output signal -- GitLab