fixed small issue and added postamble for ism metadata (6611cf9d) · Commits · IVAS Codec Public Collaboration / IVAS Processing Scripts

ivas_processing_scripts/audiotools/audio.py

+8 −6

Original line number	Diff line number	Diff line
		@@ -42,12 +42,12 @@ from ivas_processing_scripts.audiotools.constants import (
		BINAURAL_AUDIO_FORMATS,
		CHANNEL_BASED_AUDIO_ALTNAMES,
		CHANNEL_BASED_AUDIO_FORMATS,
		DEFAULT_ISM_METADATA,
		IVAS_FRAME_LEN_MS,
		METADATA_ASSISTED_SPATIAL_AUDIO_FORMATS,
		NUMBER_COLUMNS_ISM_METADATA,
		OBJECT_BASED_AUDIO_FORMATS,
		SCENE_BASED_AUDIO_FORMATS,
		NUMBER_COLUMNS_ISM_METADATA,
		DEFAULT_ISM_METADATA
		)

		from .EFAP import wrap_angles
		@@ -310,15 +310,17 @@ class ObjectBasedAudio(Audio):
		# check if metadata has right number of columns
		num_columns = pos.shape[1]
		if num_columns < 2:
		raise ValueError("Metadata incomplete. Columns are missing. Azimuth and elevation are mandatory.")
		elif num_columns > NUMBER_COLUMNS_ISM_METADATA:
		raise ValueError(
		"Too many columns in metadata"
		"Metadata incomplete. Columns are missing. Azimuth and elevation are mandatory."
		)
		elif num_columns > NUMBER_COLUMNS_ISM_METADATA:
		raise ValueError("Too many columns in metadata")

		# pad metadata to max number of columns
		if num_columns < NUMBER_COLUMNS_ISM_METADATA:
		pos = np.hstack([pos, np.array(pos.shape[0] * [DEFAULT_ISM_METADATA[num_columns:]])])
		pos = np.hstack(
		[pos, np.array(pos.shape[0] * [DEFAULT_ISM_METADATA[num_columns:]])]
		)

		# check if metadata is longer than file -> cut off
		num_frames = int(

ivas_processing_scripts/audiotools/constants.py

+10 −1

Original line number	Diff line number	Diff line
		@@ -705,5 +705,14 @@ DELAY_COMPENSATION_FOR_FILTERING = {
		}

		DEFAULT_ISM_METADATA = [0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0]
		FORMAT_ISM_METADATA_CSV = ["%+07.2f", "%+06.2f", "%05.2f", "%06.2f", "%04.2f", "%+07.2f", "%+06.2f", "%1.0f"]
		FORMAT_ISM_METADATA_CSV = [
		"%+07.2f",
		"%+06.2f",
		"%05.2f",
		"%06.2f",
		"%04.2f",
		"%+07.2f",
		"%+06.2f",
		"%1.0f",
		]
		NUMBER_COLUMNS_ISM_METADATA = len(DEFAULT_ISM_METADATA)

ivas_processing_scripts/audiotools/metadata.py

+50 −5

Original line number	Diff line number	Diff line
		@@ -40,7 +40,12 @@ from ivas_processing_scripts.audiotools import audio
		from ivas_processing_scripts.audiotools.audio import fromtype
		from ivas_processing_scripts.audiotools.audioarray import trim
		from ivas_processing_scripts.audiotools.audiofile import read
		from ivas_processing_scripts.audiotools.constants import IVAS_FRAME_LEN_MS, FORMAT_ISM_METADATA_CSV, NUMBER_COLUMNS_ISM_METADATA, DEFAULT_ISM_METADATA
		from ivas_processing_scripts.audiotools.constants import (
		DEFAULT_ISM_METADATA,
		FORMAT_ISM_METADATA_CSV,
		IVAS_FRAME_LEN_MS,
		NUMBER_COLUMNS_ISM_METADATA,
		)


		class Metadata:
		@@ -201,12 +206,15 @@ def write_ISM_metadata_in_file(

		for i, csv_file in enumerate(file_names):
		number_frames = metadata[i].shape[0]
		number_columns = metadata[i].shape[1]
		with open(csv_file, "w", newline="") as file:
		writer = csv.writer(file)
		for k in range(number_frames):
		row_list = []
		for p in range(NUMBER_COLUMNS_ISM_METADATA):
		row_list.append(FORMAT_ISM_METADATA_CSV[p] % np.round(metadata[i][k, p], 2))
		for p in range(number_columns):
		row_list.append(
		FORMAT_ISM_METADATA_CSV[p] % np.round(metadata[i][k, p], 2)
		)
		writer.writerow(row_list)

		return file_names
		@@ -370,7 +378,7 @@ def concat_meta_from_file(

		# add preamble
		if preamble:
		concat_meta_all_obj = add_remove_preamble(concat_meta_all_obj, preamble)
		concat_meta_all_obj = add_remove_preamble(concat_meta_all_obj, preamble, 0)

		write_ISM_metadata_in_file(concat_meta_all_obj, out_file)

		@@ -540,8 +548,10 @@ def metadata_search(
		def add_remove_preamble(
		metadata,
		preamble,
		postamble,
		add: Optional[bool] = True,
		):
		# preamble
		preamble_frames = preamble / IVAS_FRAME_LEN_MS
		if not preamble_frames.is_integer():
		raise ValueError(
		@@ -553,7 +563,14 @@ def add_remove_preamble(
		if add:
		num_columns = metadata[obj_idx].shape[1]
		metadata[obj_idx] = np.vstack(
		[np.repeat(np.array(DEFAULT_ISM_METADATA)[None, :num_columns], preamble_frames, 0), metadata[obj_idx]]
		[
		np.repeat(
		np.array(DEFAULT_ISM_METADATA)[None, :num_columns],
		preamble_frames,
		0,
		),
		metadata[obj_idx],
		]
		)
		else:
		metadata[obj_idx] = trim(
		@@ -562,4 +579,32 @@ def add_remove_preamble(
		samples=True,
		)

		# postamble
		postamble_frames = postamble / IVAS_FRAME_LEN_MS
		if not postamble_frames.is_integer():
		raise ValueError(
		f"Application of postamble for ISM metadata is only possible if postamble length is multiple of frame length. "
		f"Frame length: {IVAS_FRAME_LEN_MS}ms"
		)
		for obj_idx in range(len(metadata)):
		if metadata is not None and metadata[obj_idx] is not None:
		if add:
		num_columns = metadata[obj_idx].shape[1]
		metadata[obj_idx] = np.vstack(
		[
		metadata[obj_idx],
		np.repeat(
		np.array(DEFAULT_ISM_METADATA)[None, :num_columns],
		postamble_frames,
		0,
		),
		]
		)
		else:
		metadata[obj_idx] = trim(
		metadata[obj_idx],
		limits=(0, int(postamble_frames)),
		samples=True,
		)

		return metadata

ivas_processing_scripts/processing/preprocessing_2.py

+13 −6

Original line number	Diff line number	Diff line
		@@ -64,8 +64,7 @@ class Preprocessing2(Processing):
		self.in_fmt, in_file, fs=self.in_fs, in_meta=in_meta
		)

		# add preamble
		# also apply preamble to ISM metadata
		# modify ISM metadata
		if self.in_fmt.startswith("ISM"):
		if not self.preamble:
		preamble = 0
		@@ -75,16 +74,24 @@ class Preprocessing2(Processing):
		# read out old
		metadata = audio_object.object_pos

		# modify metadata
		metadata = add_remove_preamble(metadata, preamble)
		# add preamble
		metadata = add_remove_preamble(metadata, preamble, 0)

		# repeat signal
		if self.repeat_signal:
		metadata = [np.concatenate((m, m), axis=0) for m in metadata]

		# add postable
		if self.postamble:
		metadata = add_remove_preamble(metadata, 0, self.postamble)

		meta_files = write_ISM_metadata_in_file(metadata, [out_file], True)

		# modify audio object
		audio_object.metadata_files = meta_files
		audio_object.obect_pos = metadata
		audio_object.object_pos = metadata

		# modify audio signal
		# add preamble
		if self.preamble > 0:
		logger.debug(f"Add preamble of length {self.preamble}ms")
		@@ -111,7 +118,7 @@ class Preprocessing2(Processing):
		(audio_object.audio, audio_object.audio), axis=0
		)

		# add postamble - do ater signal repetition as this is just for ensuring equal lengths between in- and output signals
		# add postamble - do alter signal repetition as this is just for ensuring equal lengths between in- and output signals
		if self.postamble > 0:
		logger.debug(f"Add postamble of length {self.postamble}ms")
		audio_object.audio = trim(

ivas_processing_scripts/processing/processing.py

+19 −12

Original line number	Diff line number	Diff line
		@@ -407,28 +407,36 @@ def remove_pre_and_postamble(
		):
		# remove preamble for ISM metadata
		if out_fmt.startswith("ISM"):
		# remove postamble
		if postamble_len_ms:
		meta = add_remove_preamble(meta, 0, postamble_len_ms, add=False)

		# cut first half of the metadata
		if repeat_signal:
		meta = [m[int(len(m) / 2) :, :] for m in meta]

		# remove preamble
		if preamble_len_ms > 0:
		meta = add_remove_preamble(meta, preamble_len_ms, add=False)
		meta = add_remove_preamble(meta, preamble_len_ms, 0, add=False)

		# get number of samples to cut from start
		trim_len_samples = (preamble_len_ms * fs) // 1000
		# remove postamble
		if postamble_len_ms:
		if logger:
		logger.debug("Remove postamble")
		postamble_len_samples = (postamble_len_ms * fs) // 1000
		x = trim(x, fs, (0, postamble_len_samples), samples=True)

		# cut first half of signal
		if repeat_signal:
		if logger:
		logger.debug("Remove first half of signal")
		x = x[len(x) // 2 :, :]

		# need to subtract the postamble length before getting half of signal length - it was added after concatenation
		trim_len_samples += (len(x) - postamble_len_samples) // 2

		if trim_len_samples > 0 and logger:
		# remove preamble
		if preamble_len_ms:
		if logger:
		logger.debug("Remove preamble")

		x = trim(x, fs, (trim_len_samples, postamble_len_samples), samples=True)
		x = trim(x, fs, ((preamble_len_ms * fs) // 1000, 0), samples=True)

		return x, meta

		@@ -464,4 +472,3 @@ def preprocess_background_noise(cfg):
		] = output_audio

		return