fix incorrect length of .csv files; improving source code readability (3ea660b9) · Commits · IVAS Codec Public Collaboration / IVAS Processing Scripts

ivas_processing_scripts/generation/process_ism_items.py

+26 −21

Original line number	Diff line number	Diff line
		@@ -115,11 +115,16 @@ def generate_ism_items(

		# get the number of frames (multiple of 20ms)
		N_frames = int(len(x.audio) / x.fs * 50)
		frame_len = int(x.fs / 50)

		# trim the samples from the end to ensure that the signal length is a multiple of 20ms
		x.audio = x.audio[:N_frames * frame_len]

		# adjust the level of the source file
		_, scale_factor = get_loudness(x, cfg.loudness, "MONO")
		x.audio *= scale_factor


		# read azimuth information and create array
		if isinstance(source_azi, str):
		if ":" in source_azi:
		@@ -192,36 +197,34 @@ def generate_ism_items(
		# get the length of the first source file
		N_delay = len(y.audio[:, 0])

		# add the shift
		N_delay += int(-source_overlap * x.fs)

		# ensure delay is a multiple of 20ms
		# N_delay = int(floor(source_shift * 50) / 50 * x.fs)
		# add the shift value (ensure that the shift is a multiple of 20ms)
		N_delay += int(floor(-source_overlap * 50) / 50 * x.fs)

		# insert all-zero preamble
		# insert all-zero signal
		pre = np.zeros((N_delay, x.audio.shape[1]))
		x.audio = np.concatenate([pre, x.audio])

		# insert neutral position as a pre-amble
		N_delay = int(N_delay / frame_len)
		pre = np.tile(
		[0.00, 0.00, 1.00, 0.00, 1.00], (N_delay, 1)
		) # !!!! TBD - check if we should insert netrual position or the first position of the metadata
		x_meta = np.concatenate([pre, x_meta])

		# pad with zeros to ensure that the signal length is a multiple of 20ms
		N_frame = x.fs / 50
		if len(x.audio) % N_frame != 0:
		N_pad = int(N_frame - len(x.audio) % N_frame)

		# insert all-zero preamble
		pre = np.zeros((N_pad, x.audio.shape[1]))
		x.audio = np.concatenate([pre, x.audio])

		# insert neutral position as a pre-amble
		pre = np.tile(
		if len(x.audio) % frame_len != 0:
		# pad the source signal
		N_pad = int(frame_len - len(x.audio) % frame_len)
		post = np.zeros((N_pad, x.audio.shape[1]))
		x.audio = np.concatenate([x.audio, post])

		# pad the metadata
		N_pad = int(len(x.audio) / frame_len) - len(x_meta)
		if N_pad > 0:
		post = np.tile(
		[0.00, 0.00, 1.00, 0.00, 1.00], (N_pad, 1)
		) # !!!! TBD - check if we should insert netrual position or the first position of the metadata
		x_meta = np.concatenate([pre, x_meta])
		x_meta = np.concatenate([x_meta, post])

		# add source signal to the array of all source signals
		y.fs = x.fs
		@@ -280,7 +283,7 @@ def generate_ism_items(

		# append pre-amble and post-amble to all sources
		if cfg.preamble != 0.0:
		# ensure that pre-mable is a multiple of 20ms
		# ensure that pre-amble is a multiple of 20ms
		N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs)

		# insert all-zero preamble to all sources
		@@ -288,6 +291,7 @@ def generate_ism_items(
		y.audio = np.concatenate([pre, y.audio])

		# insert neutral position as a pre-amble to all sources
		N_pre = int(N_pre / frame_len)
		pre = np.tile(
		[0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_pre, 1)
		) # !!!! TBD - check if we should insert netrual position or the first position of the metadata
		@@ -302,6 +306,7 @@ def generate_ism_items(
		y.audio = np.concatenate([y.audio, post])

		# append neutral position as a post-amble to all sources
		N_post = int(N_post / frame_len)
		post = np.tile(
		[0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_post, 1)
		) # !!!! TBD - check if we should insert netrual position or the last position of the metadata
		@@ -319,7 +324,7 @@ def generate_ism_items(
		y.audio += noise

		# write individual ISM audio streams to the output file in an interleaved format
		output_filename = scene["name"]
		output_filename = scene_name
		audiofile.write(
		os.path.join(cfg.output_path, output_filename), y.audio, y.fs
		) # !!!! TBD: replace all os.path.xxx operations with the Path object

ivas_processing_scripts/generation/process_stereo_items.py

+3 −6

Original line number	Diff line number	Diff line
		@@ -88,7 +88,7 @@ def generate_stereo_items(

		# repeat for all source files
		for scene_name, scene in cfg.scenes.items():
		logger.info(f"Processing scene: {scene_name} out of {N_scenes} scenes")
		logger.info(f"Processing scene: {scene_name} out of {N_scenes} scenes, name: {scene_name}")

		# extract the number of audio sources
		N_sources = len(np.atleast_1d(scene["source"]))
		@@ -135,9 +135,6 @@ def generate_stereo_items(
		# add the shift
		N_delay += int(-source_overlap * x.fs)

		# ensure delay is a multiple of 20ms
		# N_delay = int(floor(source_shift * 50) / 50 * x.fs)

		# insert all-zero preamble
		pre = np.zeros((N_delay, x.audio.shape[1]))
		x.audio = np.concatenate([pre, x.audio])
		@@ -187,7 +184,7 @@ def generate_stereo_items(

		# append pre-amble and post-amble to all sources
		if cfg.preamble != 0.0:
		# ensure that pre-mable is a multiple of 20ms
		# ensure that pre-amble is a multiple of 20ms
		N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs)

		# insert all-zero preamble to all sources
		@@ -214,7 +211,7 @@ def generate_stereo_items(
		y.audio += noise

		# write the reverberated audio into output file
		output_filename = scene["name"]
		output_filename = scene_name
		audiofile.write(
		os.path.join(cfg.output_path, output_filename), y.audio, y.fs
		) # !!!! TBD: replace all os.path.xxx operations with the Path object