Merge branch 'fix_ISM_different_number_metadata_columns' into 'main' (5abb8af5) · Commits · IVAS Codec Public Collaboration / IVAS Processing Scripts

.gitlab-ci.yml

+3 −2

Original line number	Diff line number	Diff line
		@@ -37,9 +37,10 @@ stages:
		# NOTE: CODEC_DIR has to be in PATH
		- cd $CODEC_DIR
		# make sure that we are at latest main
		# TODO: temporarily use the RC1a tag
		# TODO: temporarily use the RC1b tag
		- git restore .
		- git checkout 20230511-RC1a-listening-tests
		- git fetch
		- git checkout 20230516-RC1b-listening-tests
		- echo "--------------------------------------------"
		- echo "Building codec on commit $(git rev-parse HEAD --short)"
		- echo "--------------------------------------------"

ivas_processing_scripts/audiotools/audio.py

+16 −10

Original line number	Diff line number	Diff line
		@@ -42,8 +42,10 @@ from ivas_processing_scripts.audiotools.constants import (
		BINAURAL_AUDIO_FORMATS,
		CHANNEL_BASED_AUDIO_ALTNAMES,
		CHANNEL_BASED_AUDIO_FORMATS,
		DEFAULT_ISM_METADATA,
		IVAS_FRAME_LEN_MS,
		METADATA_ASSISTED_SPATIAL_AUDIO_FORMATS,
		NUMBER_COLUMNS_ISM_METADATA,
		OBJECT_BASED_AUDIO_FORMATS,
		SCENE_BASED_AUDIO_FORMATS,
		)
		@@ -295,6 +297,7 @@ class ObjectBasedAudio(Audio):
		return obj

		def init_metadata(self):
		# check if number of metadata files matches format
		if self.audio.shape[1] != len(self.metadata_files):
		raise ValueError(
		f"Mismatch between number of channels in file [{self.audio.shape[1]}], and metadata [{len(self.metadata_files)}]"
		@@ -305,15 +308,18 @@ class ObjectBasedAudio(Audio):
		pos = np.genfromtxt(f, delimiter=",")

		# check if metadata has right number of columns
		if pos.shape[1] < 5:
		raise ValueError("Metadata incomplete. Columns are missing.")
		elif pos.shape[1] > 5:
		if pos.shape[1] <= 8:
		# TODO: FIXME
		pos = pos[:, :5]
		else:
		num_columns = pos.shape[1]
		if num_columns < 2:
		raise ValueError(
		"Too many columns in metadata (possibly old version with frame index used)"
		"Metadata incomplete. Columns are missing. Azimuth and elevation are mandatory."
		)
		elif num_columns > NUMBER_COLUMNS_ISM_METADATA:
		raise ValueError("Too many columns in metadata")

		# pad metadata to max number of columns
		if num_columns < NUMBER_COLUMNS_ISM_METADATA:
		pos = np.hstack(
		[pos, np.array(pos.shape[0] * [DEFAULT_ISM_METADATA[num_columns:]])]
		)

		# check if metadata is longer than file -> cut off

ivas_processing_scripts/audiotools/constants.py

+13 −0

Original line number	Diff line number	Diff line
		@@ -703,3 +703,16 @@ DELAY_COMPENSATION_FOR_FILTERING = {
		"HP50_32KHZ": 559,
		"HP50_48KHZ": 839,
		}

		DEFAULT_ISM_METADATA = [0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0]
		FORMAT_ISM_METADATA_CSV = [
		"%+07.2f",
		"%+06.2f",
		"%05.2f",
		"%06.2f",
		"%04.2f",
		"%+07.2f",
		"%+06.2f",
		"%1.0f",
		]
		NUMBER_COLUMNS_ISM_METADATA = len(DEFAULT_ISM_METADATA)

ivas_processing_scripts/audiotools/metadata.py

+50 −13

Original line number	Diff line number	Diff line
		@@ -40,7 +40,12 @@ from ivas_processing_scripts.audiotools import audio
		from ivas_processing_scripts.audiotools.audio import fromtype
		from ivas_processing_scripts.audiotools.audioarray import trim
		from ivas_processing_scripts.audiotools.audiofile import read
		from ivas_processing_scripts.audiotools.constants import IVAS_FRAME_LEN_MS
		from ivas_processing_scripts.audiotools.constants import (
		DEFAULT_ISM_METADATA,
		FORMAT_ISM_METADATA_CSV,
		IVAS_FRAME_LEN_MS,
		NUMBER_COLUMNS_ISM_METADATA,
		)


		class Metadata:
		@@ -201,16 +206,15 @@ def write_ISM_metadata_in_file(

		for i, csv_file in enumerate(file_names):
		number_frames = metadata[i].shape[0]
		number_columns = metadata[i].shape[1]
		with open(csv_file, "w", newline="") as file:
		writer = csv.writer(file)
		for k in range(number_frames):
		row_list = [
		"%+07.2f" % np.round(metadata[i][k, 0], 2),
		"%+06.2f" % np.round(metadata[i][k, 1], 2),
		"01.00",
		"000.00",
		"1.00",
		]
		row_list = []
		for p in range(number_columns):
		row_list.append(
		FORMAT_ISM_METADATA_CSV[p] % np.round(metadata[i][k, p], 2)
		)
		writer.writerow(row_list)

		return file_names
		@@ -374,7 +378,7 @@ def concat_meta_from_file(

		# add preamble
		if preamble:
		concat_meta_all_obj = add_remove_preamble(concat_meta_all_obj, preamble)
		concat_meta_all_obj = add_remove_preamble(concat_meta_all_obj, preamble, 0)

		write_ISM_metadata_in_file(concat_meta_all_obj, out_file)

		@@ -544,8 +548,10 @@ def metadata_search(
		def add_remove_preamble(
		metadata,
		preamble,
		postamble,
		add: Optional[bool] = True,
		):
		# preamble
		preamble_frames = preamble / IVAS_FRAME_LEN_MS
		if not preamble_frames.is_integer():
		raise ValueError(
		@@ -555,18 +561,49 @@ def add_remove_preamble(
		for obj_idx in range(len(metadata)):
		if metadata is not None and metadata[obj_idx] is not None:
		if add:
		num_columns = metadata[obj_idx].shape[1]
		metadata[obj_idx] = np.vstack(
		[
		np.repeat(
		np.array(DEFAULT_ISM_METADATA)[None, :num_columns],
		preamble_frames,
		0,
		),
		metadata[obj_idx],
		]
		)
		else:
		metadata[obj_idx] = trim(
		metadata[obj_idx],
		limits=(-int(preamble_frames), 0),
		limits=(int(preamble_frames), 0),
		samples=True,
		)

		# add radius 1
		metadata[obj_idx][: int(preamble_frames), 2] = 1
		# postamble
		postamble_frames = postamble / IVAS_FRAME_LEN_MS
		if not postamble_frames.is_integer():
		raise ValueError(
		f"Application of postamble for ISM metadata is only possible if postamble length is multiple of frame length. "
		f"Frame length: {IVAS_FRAME_LEN_MS}ms"
		)
		for obj_idx in range(len(metadata)):
		if metadata is not None and metadata[obj_idx] is not None:
		if add:
		num_columns = metadata[obj_idx].shape[1]
		metadata[obj_idx] = np.vstack(
		[
		metadata[obj_idx],
		np.repeat(
		np.array(DEFAULT_ISM_METADATA)[None, :num_columns],
		postamble_frames,
		0,
		),
		]
		)
		else:
		metadata[obj_idx] = trim(
		metadata[obj_idx],
		limits=(int(preamble_frames), 0),
		limits=(0, int(postamble_frames)),
		samples=True,
		)

ivas_processing_scripts/processing/preprocessing_2.py

+13 −6

Original line number	Diff line number	Diff line
		@@ -64,8 +64,7 @@ class Preprocessing2(Processing):
		self.in_fmt, in_file, fs=self.in_fs, in_meta=in_meta
		)

		# add preamble
		# also apply preamble to ISM metadata
		# modify ISM metadata
		if self.in_fmt.startswith("ISM"):
		if not self.preamble:
		preamble = 0
		@@ -75,16 +74,24 @@ class Preprocessing2(Processing):
		# read out old
		metadata = audio_object.object_pos

		# modify metadata
		metadata = add_remove_preamble(metadata, preamble)
		# add preamble
		metadata = add_remove_preamble(metadata, preamble, 0)

		# repeat signal
		if self.repeat_signal:
		metadata = [np.concatenate((m, m), axis=0) for m in metadata]

		# add postable
		if self.postamble:
		metadata = add_remove_preamble(metadata, 0, self.postamble)

		meta_files = write_ISM_metadata_in_file(metadata, [out_file], True)

		# modify audio object
		audio_object.metadata_files = meta_files
		audio_object.obect_pos = metadata
		audio_object.object_pos = metadata

		# modify audio signal
		# add preamble
		if self.preamble > 0:
		logger.debug(f"Add preamble of length {self.preamble}ms")
		@@ -111,7 +118,7 @@ class Preprocessing2(Processing):
		(audio_object.audio, audio_object.audio), axis=0
		)

		# add postamble - do ater signal repetition as this is just for ensuring equal lengths between in- and output signals
		# add postamble - do alter signal repetition as this is just for ensuring equal lengths between in- and output signals
		if self.postamble > 0:
		logger.debug(f"Add postamble of length {self.postamble}ms")
		audio_object.audio = trim(