skip pre-amble and post-amble if not specified in the .yml file (saves processing time) (caced321) · Commits · IVAS Codec Public Collaboration / IVAS Processing Scripts

ivas_processing_scripts/generation/generate_ambi_items.py

+12 −21

Original line number	Diff line number	Diff line
		@@ -76,10 +76,6 @@ def generate_ambi_items(
		):
		"""Generate FOA/HOA2/HOA3 items from mono items based on scene description"""

		# set the target level
		if "loudness" not in cfg.__dict__:
		cfg.loudness = -26

		# set the fs
		if "fs" not in cfg.__dict__:
		cfg.fs = 48000
		@@ -88,17 +84,6 @@ def generate_ambi_items(
		if "IR_fs" not in cfg.__dict__:
		cfg.IR_fs = 48000

		# set the pre-amble and post-amble
		if "preamble" not in cfg.__dict__:
		cfg.preamble = 0.0

		if "postamble" not in cfg.__dict__:
		cfg.postamble = 0.0

		# set the pre-amble and post-amble
		if "add_low_level_random_noise" not in cfg.__dict__:
		cfg.add_low_level_random_noise = False

		# set the listening lab designator
		if "listening_lab" not in cfg.__dict__:
		cfg.listening_lab = "l"
		@@ -325,12 +310,17 @@ def generate_ambi_scene(
		y.audio += x.audio

		# append pre-amble and post-amble
		if "preamble" in cfg.__dict__ or "postamble" in cfg.__dict__:
		logger.info(
		f"-- Adding pre-amble of {cfg.preamble} seconds and post-amble of {cfg.postamble} seconds"
		)
		if any([cfg.preamble, cfg.postamble]):
		preamble = int(np.floor(cfg.preamble * cfg.fs / frame_len) * frame_len) # convert to samples and ensure multiple of 20ms
		postamble = int(np.floor(cfg.postamble * cfg.fs / frame_len) * frame_len) # convert to samples and ensure multiple of 20ms
		y.audio = audioarray.trim(y.audio, y.fs, limits=[-preamble, -postamble], samples=True)

		# add random noise
		if cfg.add_low_level_random_noise:
		if "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise:
		# create uniformly distributed noise between -4 and 4
		np.random.seed(SEED_RANDOM_NOISE)
		noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float")
		@@ -348,6 +338,7 @@ def generate_ambi_scene(
		y.audio = audioarray.trim(y.audio, y.fs, limits=[0, len(y.audio) - duration], samples=True)

		# adjust the loudness of the output signal
		if "loudness" in cfg.__dict__:
		logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS")
		y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="BINAURAL")

ivas_processing_scripts/generation/generate_ismN_items.py

+12 −14

Original line number	Diff line number	Diff line
		@@ -82,10 +82,6 @@ def generate_ismN_items(
		):
		"""Generate ISMN items with metadata from mono items based on scene description"""

		# set the target level
		if "loudness" not in cfg.__dict__:
		cfg.loudness = -26

		# set the fs
		if "fs" not in cfg.__dict__:
		cfg.fs = 48000
		@@ -97,10 +93,6 @@ def generate_ismN_items(
		if "postamble" not in cfg.__dict__:
		cfg.postamble = 0.0

		# set the pre-amble and post-amble
		if "add_low_level_random_noise" not in cfg.__dict__:
		cfg.add_low_level_random_noise = False

		# set the listening lab designator
		if "listening_lab" not in cfg.__dict__:
		cfg.listening_lab = "l"
		@@ -392,12 +384,17 @@ def generate_ismN_scene(
		y.metadata_files.insert(i, str(output_filename.with_suffix(f".{i}.csv")))

		# append pre-amble and post-amble
		if "preamble" in cfg.__dict__ or "postamble" in cfg.__dict__:
		logger.info(
		f"-- Adding pre-amble of {cfg.preamble} seconds and post-amble of {cfg.postamble} seconds"
		)
		if any([cfg.preamble, cfg.postamble]):
		preamble = int(np.floor(cfg.preamble * cfg.fs / frame_len) * frame_len) # convert to samples and ensure multiple of 20ms
		postamble = int(np.floor(cfg.postamble * cfg.fs / frame_len) * frame_len) # convert to samples and ensure multiple of 20ms
		metadata.trim_meta(y, limits=[-preamble, -postamble], samples=True)

		# add random noise
		if cfg.add_low_level_random_noise:
		if "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise:
		# create uniformly distributed noise between -4 and 4
		np.random.seed(SEED_RANDOM_NOISE)
		noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float")
		@@ -415,6 +412,7 @@ def generate_ismN_scene(
		metadata.trim_meta(y, limits=[0, len(y.audio) - duration], samples=True)

		# adjust the loudness of the output signal
		if "loudness" in cfg.__dict__:
		logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS")
		y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="BINAURAL")

ivas_processing_scripts/generation/generate_omasa_items.py

+12 −21

Original line number	Diff line number	Diff line
		@@ -80,25 +80,10 @@ def generate_omasa_items(
		):
		"""Generate OMASA items with metadata from FOA/HO2 and ISMn items based on scene description"""

		# set the target level
		if "loudness" not in cfg.__dict__:
		cfg.loudness = -26

		# set the fs
		if "fs" not in cfg.__dict__:
		cfg.fs = 48000

		# set the pre-amble and post-amble
		if "preamble" not in cfg.__dict__:
		cfg.preamble = 0.0

		if "postamble" not in cfg.__dict__:
		cfg.postamble = 0.0

		# set the pre-amble and post-amble
		if "add_low_level_random_noise" not in cfg.__dict__:
		cfg.add_low_level_random_noise = False

		# set the listening lab designator
		if "listening_lab" not in cfg.__dict__:
		cfg.listening_lab = "l"
		@@ -439,12 +424,17 @@ def generate_OMASA_scene(
		y.metadata_files.insert(i - 1, str(output_filename.with_suffix(f".{i - 1}.csv")))

		# append pre-amble and post-amble
		if "preamble" in cfg.__dict__ or "postamble" in cfg.__dict__:
		logger.info(
		f"-- Adding pre-amble of {cfg.preamble} seconds and post-amble of {cfg.postamble} seconds"
		)
		if any([cfg.preamble, cfg.postamble]):
		preamble = int(np.floor(cfg.preamble * cfg.fs / frame_len) * frame_len) # convert to samples and ensure multiple of 20ms
		postamble = int(np.floor(cfg.postamble * cfg.fs / frame_len) * frame_len) # convert to samples and ensure multiple of 20ms
		metadata.trim_meta(y, limits=[-preamble, -postamble], samples=True)

		# add random noise
		if cfg.add_low_level_random_noise:
		if "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise:
		# create uniformly distributed noise between -4 and 4
		np.random.seed(SEED_RANDOM_NOISE)
		noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float")
		@@ -462,6 +452,7 @@ def generate_OMASA_scene(
		metadata.trim_meta(y, limits=[0, len(y.audio) - duration], samples=True)

		# adjust the loudness of the output signal
		if "loudness" in cfg.__dict__:
		logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS")
		y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="BINAURAL")

ivas_processing_scripts/generation/generate_osba_items.py

+12 −21

Original line number	Diff line number	Diff line
		@@ -78,25 +78,10 @@ def generate_osba_items(
		):
		"""Generate OSBA items from FOA/HOA2/HOA3 and ISMn items based on scene description"""

		# set the target level
		if "loudness" not in cfg.__dict__:
		cfg.loudness = -26

		# set the fs
		if "fs" not in cfg.__dict__:
		cfg.fs = 48000

		# set the pre-amble and post-amble
		if "preamble" not in cfg.__dict__:
		cfg.preamble = 0.0

		if "postamble" not in cfg.__dict__:
		cfg.postamble = 0.0

		# set the pre-amble and post-amble
		if "add_low_level_random_noise" not in cfg.__dict__:
		cfg.add_low_level_random_noise = False

		# set the listening lab designator
		if "listening_lab" not in cfg.__dict__:
		cfg.listening_lab = "l"
		@@ -421,12 +406,17 @@ def generate_OSBA_scene(
		y.metadata_files.insert(i - 1, str(output_filename.with_suffix(f".{i - 1}.csv")))

		# append pre-amble and post-amble
		if "preamble" in cfg.__dict__ or "postamble" in cfg.__dict__:
		logger.info(
		f"-- Adding pre-amble of {cfg.preamble} seconds and post-amble of {cfg.postamble} seconds"
		)
		if any([cfg.preamble, cfg.postamble]):
		preamble = int(np.floor(cfg.preamble * cfg.fs / frame_len) * frame_len) # convert to samples and ensure multiple of 20ms
		postamble = int(np.floor(cfg.postamble * cfg.fs / frame_len) * frame_len) # convert to samples and ensure multiple of 20ms
		metadata.trim_meta(y, limits=[-preamble, -postamble], samples=True)

		# add random noise
		if cfg.add_low_level_random_noise:
		if "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise:
		# create uniformly distributed noise between -4 and 4
		np.random.seed(SEED_RANDOM_NOISE)
		noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float")
		@@ -444,6 +434,7 @@ def generate_OSBA_scene(
		metadata.trim_meta(y, limits=[0, len(y.audio) - duration], samples=True)

		# adjust the loudness of the output signal
		if "loudness" in cfg.__dict__:
		logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS")
		y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="BINAURAL")

ivas_processing_scripts/generation/generate_stereo_items.py

+12 −21

Original line number	Diff line number	Diff line
		@@ -78,10 +78,6 @@ def generate_stereo_items(
		):
		"""Generate STEREO items from mono items based on scene description"""

		# set the target level
		if "loudness" not in cfg.__dict__:
		cfg.loudness = -26

		# set the fs
		if "fs" not in cfg.__dict__:
		cfg.fs = 48000
		@@ -90,21 +86,10 @@ def generate_stereo_items(
		if "IR_fs" not in cfg.__dict__:
		cfg.IR_fs = 48000

		# set the pre-amble and post-amble
		if "preamble" not in cfg.__dict__:
		cfg.preamble = 0.0

		if "postamble" not in cfg.__dict__:
		cfg.postamble = 0.0

		# set the IR path
		if "IR_path" not in cfg.__dict__:
		cfg.IR_path = os.path.join(os.path.dirname(__file__), "IRs")

		# set the pre-amble and post-amble
		if "add_low_level_random_noise" not in cfg.__dict__:
		cfg.add_low_level_random_noise = False

		# set the listening lab designator
		if "listening_lab" not in cfg.__dict__:
		cfg.listening_lab = "l"
		@@ -326,12 +311,17 @@ def generate_stereo_scene(
		y.audio += x.audio

		# append pre-amble and post-amble
		if "preamble" in cfg.__dict__ or "postamble" in cfg.__dict__:
		logger.info(
		f"-- Adding pre-amble of {cfg.preamble} seconds and post-amble of {cfg.postamble} seconds"
		)
		if any([cfg.preamble, cfg.postamble]):
		preamble = int(np.floor(cfg.preamble * cfg.fs / frame_len) * frame_len) # convert to samples and ensure multiple of 20ms
		postamble = int(np.floor(cfg.postamble * cfg.fs / frame_len) * frame_len) # convert to samples and ensure multiple of 20ms
		y.audio = audioarray.trim(y.audio, y.fs, limits=[-preamble, -postamble], samples=True)

		# add random noise
		if cfg.add_low_level_random_noise:
		if "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise:
		# create uniformly distributed noise between -4 and 4
		np.random.seed(SEED_RANDOM_NOISE)
		noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float")
		@@ -349,6 +339,7 @@ def generate_stereo_scene(
		y.audio = audioarray.trim(y.audio, y.fs, limits=[0, len(y.audio) - duration], samples=True)

		# adjust the loudness of the output signal
		if "loudness" in cfg.__dict__:
		logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS")
		y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="STEREO")