Loading ivas_processing_scripts/generation/generate_ambi_items.py +12 −21 Original line number Diff line number Diff line Loading @@ -76,10 +76,6 @@ def generate_ambi_items( ): """Generate FOA/HOA2/HOA3 items from mono items based on scene description""" # set the target level if "loudness" not in cfg.__dict__: cfg.loudness = -26 # set the fs if "fs" not in cfg.__dict__: cfg.fs = 48000 Loading @@ -88,17 +84,6 @@ def generate_ambi_items( if "IR_fs" not in cfg.__dict__: cfg.IR_fs = 48000 # set the pre-amble and post-amble if "preamble" not in cfg.__dict__: cfg.preamble = 0.0 if "postamble" not in cfg.__dict__: cfg.postamble = 0.0 # set the pre-amble and post-amble if "add_low_level_random_noise" not in cfg.__dict__: cfg.add_low_level_random_noise = False # set the listening lab designator if "listening_lab" not in cfg.__dict__: cfg.listening_lab = "l" Loading Loading @@ -325,12 +310,17 @@ def generate_ambi_scene( y.audio += x.audio # append pre-amble and post-amble if "preamble" in cfg.__dict__ or "postamble" in cfg.__dict__: logger.info( f"-- Adding pre-amble of {cfg.preamble} seconds and post-amble of {cfg.postamble} seconds" ) if any([cfg.preamble, cfg.postamble]): preamble = int(np.floor(cfg.preamble * cfg.fs / frame_len) * frame_len) # convert to samples and ensure multiple of 20ms postamble = int(np.floor(cfg.postamble * cfg.fs / frame_len) * frame_len) # convert to samples and ensure multiple of 20ms y.audio = audioarray.trim(y.audio, y.fs, limits=[-preamble, -postamble], samples=True) # add random noise if cfg.add_low_level_random_noise: if "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise: # create uniformly distributed noise between -4 and 4 np.random.seed(SEED_RANDOM_NOISE) noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") Loading @@ -348,6 +338,7 @@ def generate_ambi_scene( y.audio = audioarray.trim(y.audio, y.fs, limits=[0, len(y.audio) - duration], samples=True) # adjust the loudness of the output signal if "loudness" in cfg.__dict__: logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS") y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="BINAURAL") Loading ivas_processing_scripts/generation/generate_ismN_items.py +12 −14 Original line number Diff line number Diff line Loading @@ -82,10 +82,6 @@ def generate_ismN_items( ): """Generate ISMN items with metadata from mono items based on scene description""" # set the target level if "loudness" not in cfg.__dict__: cfg.loudness = -26 # set the fs if "fs" not in cfg.__dict__: cfg.fs = 48000 Loading @@ -97,10 +93,6 @@ def generate_ismN_items( if "postamble" not in cfg.__dict__: cfg.postamble = 0.0 # set the pre-amble and post-amble if "add_low_level_random_noise" not in cfg.__dict__: cfg.add_low_level_random_noise = False # set the listening lab designator if "listening_lab" not in cfg.__dict__: cfg.listening_lab = "l" Loading Loading @@ -392,12 +384,17 @@ def generate_ismN_scene( y.metadata_files.insert(i, str(output_filename.with_suffix(f".{i}.csv"))) # append pre-amble and post-amble if "preamble" in cfg.__dict__ or "postamble" in cfg.__dict__: logger.info( f"-- Adding pre-amble of {cfg.preamble} seconds and post-amble of {cfg.postamble} seconds" ) if any([cfg.preamble, cfg.postamble]): preamble = int(np.floor(cfg.preamble * cfg.fs / frame_len) * frame_len) # convert to samples and ensure multiple of 20ms postamble = int(np.floor(cfg.postamble * cfg.fs / frame_len) * frame_len) # convert to samples and ensure multiple of 20ms metadata.trim_meta(y, limits=[-preamble, -postamble], samples=True) # add random noise if cfg.add_low_level_random_noise: if "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise: # create uniformly distributed noise between -4 and 4 np.random.seed(SEED_RANDOM_NOISE) noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") Loading @@ -415,6 +412,7 @@ def generate_ismN_scene( metadata.trim_meta(y, limits=[0, len(y.audio) - duration], samples=True) # adjust the loudness of the output signal if "loudness" in cfg.__dict__: logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS") y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="BINAURAL") Loading ivas_processing_scripts/generation/generate_omasa_items.py +12 −21 Original line number Diff line number Diff line Loading @@ -80,25 +80,10 @@ def generate_omasa_items( ): """Generate OMASA items with metadata from FOA/HO2 and ISMn items based on scene description""" # set the target level if "loudness" not in cfg.__dict__: cfg.loudness = -26 # set the fs if "fs" not in cfg.__dict__: cfg.fs = 48000 # set the pre-amble and post-amble if "preamble" not in cfg.__dict__: cfg.preamble = 0.0 if "postamble" not in cfg.__dict__: cfg.postamble = 0.0 # set the pre-amble and post-amble if "add_low_level_random_noise" not in cfg.__dict__: cfg.add_low_level_random_noise = False # set the listening lab designator if "listening_lab" not in cfg.__dict__: cfg.listening_lab = "l" Loading Loading @@ -439,12 +424,17 @@ def generate_OMASA_scene( y.metadata_files.insert(i - 1, str(output_filename.with_suffix(f".{i - 1}.csv"))) # append pre-amble and post-amble if "preamble" in cfg.__dict__ or "postamble" in cfg.__dict__: logger.info( f"-- Adding pre-amble of {cfg.preamble} seconds and post-amble of {cfg.postamble} seconds" ) if any([cfg.preamble, cfg.postamble]): preamble = int(np.floor(cfg.preamble * cfg.fs / frame_len) * frame_len) # convert to samples and ensure multiple of 20ms postamble = int(np.floor(cfg.postamble * cfg.fs / frame_len) * frame_len) # convert to samples and ensure multiple of 20ms metadata.trim_meta(y, limits=[-preamble, -postamble], samples=True) # add random noise if cfg.add_low_level_random_noise: if "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise: # create uniformly distributed noise between -4 and 4 np.random.seed(SEED_RANDOM_NOISE) noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") Loading @@ -462,6 +452,7 @@ def generate_OMASA_scene( metadata.trim_meta(y, limits=[0, len(y.audio) - duration], samples=True) # adjust the loudness of the output signal if "loudness" in cfg.__dict__: logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS") y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="BINAURAL") Loading ivas_processing_scripts/generation/generate_osba_items.py +12 −21 Original line number Diff line number Diff line Loading @@ -78,25 +78,10 @@ def generate_osba_items( ): """Generate OSBA items from FOA/HOA2/HOA3 and ISMn items based on scene description""" # set the target level if "loudness" not in cfg.__dict__: cfg.loudness = -26 # set the fs if "fs" not in cfg.__dict__: cfg.fs = 48000 # set the pre-amble and post-amble if "preamble" not in cfg.__dict__: cfg.preamble = 0.0 if "postamble" not in cfg.__dict__: cfg.postamble = 0.0 # set the pre-amble and post-amble if "add_low_level_random_noise" not in cfg.__dict__: cfg.add_low_level_random_noise = False # set the listening lab designator if "listening_lab" not in cfg.__dict__: cfg.listening_lab = "l" Loading Loading @@ -421,12 +406,17 @@ def generate_OSBA_scene( y.metadata_files.insert(i - 1, str(output_filename.with_suffix(f".{i - 1}.csv"))) # append pre-amble and post-amble if "preamble" in cfg.__dict__ or "postamble" in cfg.__dict__: logger.info( f"-- Adding pre-amble of {cfg.preamble} seconds and post-amble of {cfg.postamble} seconds" ) if any([cfg.preamble, cfg.postamble]): preamble = int(np.floor(cfg.preamble * cfg.fs / frame_len) * frame_len) # convert to samples and ensure multiple of 20ms postamble = int(np.floor(cfg.postamble * cfg.fs / frame_len) * frame_len) # convert to samples and ensure multiple of 20ms metadata.trim_meta(y, limits=[-preamble, -postamble], samples=True) # add random noise if cfg.add_low_level_random_noise: if "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise: # create uniformly distributed noise between -4 and 4 np.random.seed(SEED_RANDOM_NOISE) noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") Loading @@ -444,6 +434,7 @@ def generate_OSBA_scene( metadata.trim_meta(y, limits=[0, len(y.audio) - duration], samples=True) # adjust the loudness of the output signal if "loudness" in cfg.__dict__: logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS") y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="BINAURAL") Loading ivas_processing_scripts/generation/generate_stereo_items.py +12 −21 Original line number Diff line number Diff line Loading @@ -78,10 +78,6 @@ def generate_stereo_items( ): """Generate STEREO items from mono items based on scene description""" # set the target level if "loudness" not in cfg.__dict__: cfg.loudness = -26 # set the fs if "fs" not in cfg.__dict__: cfg.fs = 48000 Loading @@ -90,21 +86,10 @@ def generate_stereo_items( if "IR_fs" not in cfg.__dict__: cfg.IR_fs = 48000 # set the pre-amble and post-amble if "preamble" not in cfg.__dict__: cfg.preamble = 0.0 if "postamble" not in cfg.__dict__: cfg.postamble = 0.0 # set the IR path if "IR_path" not in cfg.__dict__: cfg.IR_path = os.path.join(os.path.dirname(__file__), "IRs") # set the pre-amble and post-amble if "add_low_level_random_noise" not in cfg.__dict__: cfg.add_low_level_random_noise = False # set the listening lab designator if "listening_lab" not in cfg.__dict__: cfg.listening_lab = "l" Loading Loading @@ -326,12 +311,17 @@ def generate_stereo_scene( y.audio += x.audio # append pre-amble and post-amble if "preamble" in cfg.__dict__ or "postamble" in cfg.__dict__: logger.info( f"-- Adding pre-amble of {cfg.preamble} seconds and post-amble of {cfg.postamble} seconds" ) if any([cfg.preamble, cfg.postamble]): preamble = int(np.floor(cfg.preamble * cfg.fs / frame_len) * frame_len) # convert to samples and ensure multiple of 20ms postamble = int(np.floor(cfg.postamble * cfg.fs / frame_len) * frame_len) # convert to samples and ensure multiple of 20ms y.audio = audioarray.trim(y.audio, y.fs, limits=[-preamble, -postamble], samples=True) # add random noise if cfg.add_low_level_random_noise: if "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise: # create uniformly distributed noise between -4 and 4 np.random.seed(SEED_RANDOM_NOISE) noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") Loading @@ -349,6 +339,7 @@ def generate_stereo_scene( y.audio = audioarray.trim(y.audio, y.fs, limits=[0, len(y.audio) - duration], samples=True) # adjust the loudness of the output signal if "loudness" in cfg.__dict__: logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS") y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="STEREO") Loading Loading
ivas_processing_scripts/generation/generate_ambi_items.py +12 −21 Original line number Diff line number Diff line Loading @@ -76,10 +76,6 @@ def generate_ambi_items( ): """Generate FOA/HOA2/HOA3 items from mono items based on scene description""" # set the target level if "loudness" not in cfg.__dict__: cfg.loudness = -26 # set the fs if "fs" not in cfg.__dict__: cfg.fs = 48000 Loading @@ -88,17 +84,6 @@ def generate_ambi_items( if "IR_fs" not in cfg.__dict__: cfg.IR_fs = 48000 # set the pre-amble and post-amble if "preamble" not in cfg.__dict__: cfg.preamble = 0.0 if "postamble" not in cfg.__dict__: cfg.postamble = 0.0 # set the pre-amble and post-amble if "add_low_level_random_noise" not in cfg.__dict__: cfg.add_low_level_random_noise = False # set the listening lab designator if "listening_lab" not in cfg.__dict__: cfg.listening_lab = "l" Loading Loading @@ -325,12 +310,17 @@ def generate_ambi_scene( y.audio += x.audio # append pre-amble and post-amble if "preamble" in cfg.__dict__ or "postamble" in cfg.__dict__: logger.info( f"-- Adding pre-amble of {cfg.preamble} seconds and post-amble of {cfg.postamble} seconds" ) if any([cfg.preamble, cfg.postamble]): preamble = int(np.floor(cfg.preamble * cfg.fs / frame_len) * frame_len) # convert to samples and ensure multiple of 20ms postamble = int(np.floor(cfg.postamble * cfg.fs / frame_len) * frame_len) # convert to samples and ensure multiple of 20ms y.audio = audioarray.trim(y.audio, y.fs, limits=[-preamble, -postamble], samples=True) # add random noise if cfg.add_low_level_random_noise: if "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise: # create uniformly distributed noise between -4 and 4 np.random.seed(SEED_RANDOM_NOISE) noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") Loading @@ -348,6 +338,7 @@ def generate_ambi_scene( y.audio = audioarray.trim(y.audio, y.fs, limits=[0, len(y.audio) - duration], samples=True) # adjust the loudness of the output signal if "loudness" in cfg.__dict__: logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS") y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="BINAURAL") Loading
ivas_processing_scripts/generation/generate_ismN_items.py +12 −14 Original line number Diff line number Diff line Loading @@ -82,10 +82,6 @@ def generate_ismN_items( ): """Generate ISMN items with metadata from mono items based on scene description""" # set the target level if "loudness" not in cfg.__dict__: cfg.loudness = -26 # set the fs if "fs" not in cfg.__dict__: cfg.fs = 48000 Loading @@ -97,10 +93,6 @@ def generate_ismN_items( if "postamble" not in cfg.__dict__: cfg.postamble = 0.0 # set the pre-amble and post-amble if "add_low_level_random_noise" not in cfg.__dict__: cfg.add_low_level_random_noise = False # set the listening lab designator if "listening_lab" not in cfg.__dict__: cfg.listening_lab = "l" Loading Loading @@ -392,12 +384,17 @@ def generate_ismN_scene( y.metadata_files.insert(i, str(output_filename.with_suffix(f".{i}.csv"))) # append pre-amble and post-amble if "preamble" in cfg.__dict__ or "postamble" in cfg.__dict__: logger.info( f"-- Adding pre-amble of {cfg.preamble} seconds and post-amble of {cfg.postamble} seconds" ) if any([cfg.preamble, cfg.postamble]): preamble = int(np.floor(cfg.preamble * cfg.fs / frame_len) * frame_len) # convert to samples and ensure multiple of 20ms postamble = int(np.floor(cfg.postamble * cfg.fs / frame_len) * frame_len) # convert to samples and ensure multiple of 20ms metadata.trim_meta(y, limits=[-preamble, -postamble], samples=True) # add random noise if cfg.add_low_level_random_noise: if "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise: # create uniformly distributed noise between -4 and 4 np.random.seed(SEED_RANDOM_NOISE) noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") Loading @@ -415,6 +412,7 @@ def generate_ismN_scene( metadata.trim_meta(y, limits=[0, len(y.audio) - duration], samples=True) # adjust the loudness of the output signal if "loudness" in cfg.__dict__: logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS") y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="BINAURAL") Loading
ivas_processing_scripts/generation/generate_omasa_items.py +12 −21 Original line number Diff line number Diff line Loading @@ -80,25 +80,10 @@ def generate_omasa_items( ): """Generate OMASA items with metadata from FOA/HO2 and ISMn items based on scene description""" # set the target level if "loudness" not in cfg.__dict__: cfg.loudness = -26 # set the fs if "fs" not in cfg.__dict__: cfg.fs = 48000 # set the pre-amble and post-amble if "preamble" not in cfg.__dict__: cfg.preamble = 0.0 if "postamble" not in cfg.__dict__: cfg.postamble = 0.0 # set the pre-amble and post-amble if "add_low_level_random_noise" not in cfg.__dict__: cfg.add_low_level_random_noise = False # set the listening lab designator if "listening_lab" not in cfg.__dict__: cfg.listening_lab = "l" Loading Loading @@ -439,12 +424,17 @@ def generate_OMASA_scene( y.metadata_files.insert(i - 1, str(output_filename.with_suffix(f".{i - 1}.csv"))) # append pre-amble and post-amble if "preamble" in cfg.__dict__ or "postamble" in cfg.__dict__: logger.info( f"-- Adding pre-amble of {cfg.preamble} seconds and post-amble of {cfg.postamble} seconds" ) if any([cfg.preamble, cfg.postamble]): preamble = int(np.floor(cfg.preamble * cfg.fs / frame_len) * frame_len) # convert to samples and ensure multiple of 20ms postamble = int(np.floor(cfg.postamble * cfg.fs / frame_len) * frame_len) # convert to samples and ensure multiple of 20ms metadata.trim_meta(y, limits=[-preamble, -postamble], samples=True) # add random noise if cfg.add_low_level_random_noise: if "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise: # create uniformly distributed noise between -4 and 4 np.random.seed(SEED_RANDOM_NOISE) noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") Loading @@ -462,6 +452,7 @@ def generate_OMASA_scene( metadata.trim_meta(y, limits=[0, len(y.audio) - duration], samples=True) # adjust the loudness of the output signal if "loudness" in cfg.__dict__: logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS") y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="BINAURAL") Loading
ivas_processing_scripts/generation/generate_osba_items.py +12 −21 Original line number Diff line number Diff line Loading @@ -78,25 +78,10 @@ def generate_osba_items( ): """Generate OSBA items from FOA/HOA2/HOA3 and ISMn items based on scene description""" # set the target level if "loudness" not in cfg.__dict__: cfg.loudness = -26 # set the fs if "fs" not in cfg.__dict__: cfg.fs = 48000 # set the pre-amble and post-amble if "preamble" not in cfg.__dict__: cfg.preamble = 0.0 if "postamble" not in cfg.__dict__: cfg.postamble = 0.0 # set the pre-amble and post-amble if "add_low_level_random_noise" not in cfg.__dict__: cfg.add_low_level_random_noise = False # set the listening lab designator if "listening_lab" not in cfg.__dict__: cfg.listening_lab = "l" Loading Loading @@ -421,12 +406,17 @@ def generate_OSBA_scene( y.metadata_files.insert(i - 1, str(output_filename.with_suffix(f".{i - 1}.csv"))) # append pre-amble and post-amble if "preamble" in cfg.__dict__ or "postamble" in cfg.__dict__: logger.info( f"-- Adding pre-amble of {cfg.preamble} seconds and post-amble of {cfg.postamble} seconds" ) if any([cfg.preamble, cfg.postamble]): preamble = int(np.floor(cfg.preamble * cfg.fs / frame_len) * frame_len) # convert to samples and ensure multiple of 20ms postamble = int(np.floor(cfg.postamble * cfg.fs / frame_len) * frame_len) # convert to samples and ensure multiple of 20ms metadata.trim_meta(y, limits=[-preamble, -postamble], samples=True) # add random noise if cfg.add_low_level_random_noise: if "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise: # create uniformly distributed noise between -4 and 4 np.random.seed(SEED_RANDOM_NOISE) noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") Loading @@ -444,6 +434,7 @@ def generate_OSBA_scene( metadata.trim_meta(y, limits=[0, len(y.audio) - duration], samples=True) # adjust the loudness of the output signal if "loudness" in cfg.__dict__: logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS") y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="BINAURAL") Loading
ivas_processing_scripts/generation/generate_stereo_items.py +12 −21 Original line number Diff line number Diff line Loading @@ -78,10 +78,6 @@ def generate_stereo_items( ): """Generate STEREO items from mono items based on scene description""" # set the target level if "loudness" not in cfg.__dict__: cfg.loudness = -26 # set the fs if "fs" not in cfg.__dict__: cfg.fs = 48000 Loading @@ -90,21 +86,10 @@ def generate_stereo_items( if "IR_fs" not in cfg.__dict__: cfg.IR_fs = 48000 # set the pre-amble and post-amble if "preamble" not in cfg.__dict__: cfg.preamble = 0.0 if "postamble" not in cfg.__dict__: cfg.postamble = 0.0 # set the IR path if "IR_path" not in cfg.__dict__: cfg.IR_path = os.path.join(os.path.dirname(__file__), "IRs") # set the pre-amble and post-amble if "add_low_level_random_noise" not in cfg.__dict__: cfg.add_low_level_random_noise = False # set the listening lab designator if "listening_lab" not in cfg.__dict__: cfg.listening_lab = "l" Loading Loading @@ -326,12 +311,17 @@ def generate_stereo_scene( y.audio += x.audio # append pre-amble and post-amble if "preamble" in cfg.__dict__ or "postamble" in cfg.__dict__: logger.info( f"-- Adding pre-amble of {cfg.preamble} seconds and post-amble of {cfg.postamble} seconds" ) if any([cfg.preamble, cfg.postamble]): preamble = int(np.floor(cfg.preamble * cfg.fs / frame_len) * frame_len) # convert to samples and ensure multiple of 20ms postamble = int(np.floor(cfg.postamble * cfg.fs / frame_len) * frame_len) # convert to samples and ensure multiple of 20ms y.audio = audioarray.trim(y.audio, y.fs, limits=[-preamble, -postamble], samples=True) # add random noise if cfg.add_low_level_random_noise: if "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise: # create uniformly distributed noise between -4 and 4 np.random.seed(SEED_RANDOM_NOISE) noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") Loading @@ -349,6 +339,7 @@ def generate_stereo_scene( y.audio = audioarray.trim(y.audio, y.fs, limits=[0, len(y.audio) - duration], samples=True) # adjust the loudness of the output signal if "loudness" in cfg.__dict__: logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS") y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="STEREO") Loading