fix the duration parameter - only trim if duration is exceeded (73a29eb9) · Commits · IVAS Codec Public Collaboration / IVAS Processing Scripts

ivas_processing_scripts/generation/generate_ismN_items.py

+7 −11

Original line number	Diff line number	Diff line
		@@ -443,18 +443,14 @@ def generate_ismN_scene(
		noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float")
		y.audio += noise

		# adjust the length of the output signal
		# trim the output signal such if the total duration exceeds X seconds
		if "duration" in cfg.__dict__:
		# trim the output signal such that the total duration is X seconds
		duration = int(cfg.duration * cfg.fs) # convert to samples
		else:
		# do not change the length of the audio signal
		duration = len(y.audio)
		duration = int(
		np.floor(duration / frame_len) * frame_len
		) # ensure multiple of 20ms
		if len(y.audio) != duration:
		metadata.trim_meta(y, limits=[0, len(y.audio) - duration], samples=True)
		# convert from seconds to samples (ensure multiple of 20ms)
		duration = int(np.floor(int(cfg.duration * cfg.fs) / frame_len) * frame_len)

		# check if the current length of the output signal exceeds the duration
		if len(y.audio) > duration:
		metadata.trim_meta(y, limits=[0, duration], samples=True)

		# adjust the loudness of the output signal
		if "loudness" in cfg.__dict__:

ivas_processing_scripts/generation/generate_masa_items.py

+14 −16

Original line number	Diff line number	Diff line
		@@ -316,12 +316,12 @@ def generate_MASA_scene(
		x = reverb_hoa3(x, IR, mode=None)

		# adjust the level of the FOA/HOA2/HOA3 signal
		if level is None:
		# do not change the level of the audio source signal
		logger.info("-- Level of the audio source signal is not changed")
		elif np.isinf(level):
		if np.isinf(level):
		# set all channels to zero
		x.audio = np.zeros_like(x.audio)
		elif level is None:
		# do not change the level of the audio source signal
		logger.info("-- Level of the audio source signal is not changed")
		else:
		x.audio, _ = loudness_norm(x, level, loudness_format="STEREO")

		@@ -393,17 +393,15 @@ def generate_MASA_scene(
		y_int.audio, y_int.fs, limits=[-preamble, -postamble], samples=True
		)

		# adjust the length of the output signal
		# trim the output signal if the total duration exceeds X seconds
		if "duration" in cfg.__dict__:
		# trim the output signal such that the total duration is X seconds
		duration = int(cfg.duration * cfg.fs) # convert to samples
		else:
		# do not change the length of the audio signal
		duration = len(y_int.audio)
		# convert from seconds to samples (ensure multiple of 20ms)
		duration = int(
		np.floor(duration / frame_len) * frame_len
		) # ensure multiple of 20ms
		if len(y_int.audio) != duration:
		np.floor(int(cfg.duration * cfg.fs) / frame_len) * frame_len
		)

		# check if the current length of the output signal exceeds the duration
		if len(y_int.audio) > duration:
		y_int.audio = audioarray.trim(
		y_int.audio, y_int.fs, limits=[0, len(y_int.audio) - duration], samples=True
		)

ivas_processing_scripts/generation/generate_mc_items.py

+10 −12

Original line number	Diff line number	Diff line
		@@ -393,17 +393,15 @@ def generate_MC_scene(
		y_int.audio, y_int.fs, limits=[-preamble, -postamble], samples=True
		)

		# adjust the length of the output signal
		# trim the output signal if the total duration exceeds X seconds
		if "duration" in cfg.__dict__:
		# trim the output signal such that the total duration is X seconds
		duration = int(cfg.duration * cfg.fs) # convert to samples
		else:
		# do not change the length of the audio signal
		duration = len(y_int.audio)
		# convert from seconds to samples (ensure multiple of 20ms)
		duration = int(
		np.floor(duration / frame_len) * frame_len
		) # ensure multiple of 20ms
		if len(y_int.audio) != duration:
		np.floor(int(cfg.duration * cfg.fs) / frame_len) * frame_len
		)

		# check if the current length of the output signal exceeds the duration
		if len(y_int.audio) > duration:
		y_int.audio = audioarray.trim(
		y_int.audio, y_int.fs, limits=[0, len(y_int.audio) - duration], samples=True
		)

ivas_processing_scripts/generation/generate_omasa_items.py

+12 −1

Original line number	Diff line number	Diff line
		@@ -472,8 +472,9 @@ def generate_OMASA_scene(
		noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float")
		y_int.audio += noise

		# adjust the length of the output signal
		# trim the output signal such if the total duration exceeds X seconds
		if "duration" in cfg.__dict__:
		<<<<<<< Updated upstream
		# trim the output signal such that the total duration is X seconds
		duration = int(cfg.duration * cfg.fs) # convert to samples
		else:
		@@ -484,6 +485,16 @@ def generate_OMASA_scene(
		) # ensure multiple of 20ms
		if len(y_int.audio) != duration:
		metadata.trim_meta(y_int, limits=[0, len(y_int.audio) - duration], samples=True)
		=======
		# convert from seconds to samples (ensure multiple of 20ms)
		duration = int(
		np.floor(int(cfg.duration * cfg.fs) / frame_len) * frame_len
		)

		# check if the current length of the output signal exceeds the duration
		if len(y.audio) > duration:
		metadata.trim_meta(y, limits=[0, len(y.audio) - duration], samples=True)
		>>>>>>> Stashed changes

		# adjust the loudness of the output signal
		if "loudness" in cfg.__dict__:

ivas_processing_scripts/generation/generate_osba_items.py

+9 −11

Original line number	Diff line number	Diff line
		@@ -458,17 +458,15 @@ def generate_OSBA_scene(
		noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float")
		y.audio += noise

		# adjust the length of the output signal
		# trim the output signal if the total duration exceeds X seconds
		if "duration" in cfg.__dict__:
		# trim the output signal such that the total duration is X seconds
		duration = int(cfg.duration * cfg.fs) # convert to samples
		else:
		# do not change the length of the audio signal
		duration = len(y.audio)
		# convert from seconds to samples (ensure multiple of 20ms)
		duration = int(
		np.floor(duration / frame_len) * frame_len
		) # ensure multiple of 20ms
		if len(y.audio) != duration:
		np.floor(int(cfg.duration * cfg.fs) / frame_len) * frame_len
		)

		# check if the current length of the output signal exceeds the duration
		if len(y.audio) > duration:
		metadata.trim_meta(y, limits=[0, len(y.audio) - duration], samples=True)

		# adjust the loudness of the output signal