Merge branch 'main' into 'user-configuration-of-binary-names' (70e23fce) · Commits · IVAS Codec Public Collaboration / IVAS Processing Scripts

README.md

+3 −3

Original line number	Diff line number	Diff line
		@@ -474,9 +474,9 @@ The processing chain is as follows:
		- The postprocessing stage performs a final conversion from the output of the previous stage if necessary and applies the specified processing

		---
		## ITU Tools
		## Additional Executables

		The following binaries/executables are needed for the different processing steps:
		The following additional executables are needed for the different processing steps:

		\| Processing step \| Executable \| Where to find \|
		\|---------------------------------\|-----------------------\|-------------------------------------------------------------------------------------------------------------\|
		@@ -487,7 +487,7 @@ The following binaries/executables are needed for the different processing steps
		\| Error pattern generation \| gen-patt \| https://www.itu.int/rec/T-REC-G.191-201003-S/en (Note: Version in https://github.com/openitu/STL is buggy!) \|
		\| Filtering, Resampling \| filter \| https://www.3gpp.org/ftp/tsg_sa/WG4_CODEC/TSGS4_76/docs/S4-131277.zip \|
		\| Random offset/seed generation \| random \| https://www.3gpp.org/ftp/tsg_sa/WG4_CODEC/TSGS4_76/docs/S4-131277.zip \|
		\| JBM network similulator \| networkSimulator_g192 \| https://www.3gpp.org/ftp/tsg_sa/WG4_CODEC/TSGS4_76/docs/S4-131277.zip \|
		\| JBM network simulator \| networkSimulator_g192 \| https://www.3gpp.org/ftp/tsg_sa/WG4_CODEC/TSGS4_76/docs/S4-131277.zip \|
		\| MASA rendering \| masaRenderer \| https://www.3gpp.org/ftp/TSG_SA/WG4_CODEC/TSGS4_122_Athens/Docs/S4-230221.zip \|

		The necessary binaries have to be placed in the [ivas_processing_scripts/bin](./ivas_processing_scripts/bin) folder.

ivas_processing_scripts/init.py

+20 −27

Original line number	Diff line number	Diff line
		@@ -36,7 +36,6 @@ from itertools import repeat
		import yaml

		from ivas_processing_scripts.audiotools.metadata import check_ISM_metadata
		from ivas_processing_scripts.audiotools.wrappers.bs1770 import scale_files
		from ivas_processing_scripts.constants import (
		LOGGER_DATEFMT,
		LOGGER_FORMAT,
		@@ -44,11 +43,11 @@ from ivas_processing_scripts.constants import (
		)
		from ivas_processing_scripts.processing import chains, config
		from ivas_processing_scripts.processing.processing import (
		concat_setup,
		concat_teardown,
		preprocess,
		preprocess_2,
		process_item,
		reorder_items_list,
		reverse_process_2,
		)
		from ivas_processing_scripts.utils import DirManager, apply_func_parallel

		@@ -95,8 +94,15 @@ def main(args):
		logger = logging_init(args, cfg)

		# Re-ordering items based on concatenation order
		if cfg.concatenate_input and cfg.concatenation_order is not None:
		cfg.items_list = reorder_items_list(cfg.items_list, cfg.concatenation_order)
		if hasattr(cfg, "preprocessing_2"):
		if (
		cfg.preprocessing_2.get("concatenate_input")
		and cfg.preprocessing_2.get("concatenation_order", None) is not None
		):
		cfg.items_list = reorder_items_list(
		cfg.items_list, cfg.preprocessing_2["concatenation_order"]
		)

		# check for ISM metadata
		if cfg.input["fmt"].startswith("ISM"):
		metadata = check_ISM_metadata(
		@@ -121,12 +127,15 @@ def main(args):

		# run preprocessing only once
		if hasattr(cfg, "preprocessing"):
		preprocess(cfg, cfg.metadata_path, logger)
		preprocess(cfg, logger)

		if cfg.concatenate_input:
		# concatenate items if required
		concat_setup(cfg, logger)
		# preprocessing on whole signal(s)
		if hasattr(cfg, "preprocessing_2"):
		# save process info to revert it later
		cfg.pre2 = cfg.proc_chains[0]["processes"][0]
		preprocess_2(cfg, logger)

		# run conditions
		for condition, out_dir, tmp_dir in zip(
		cfg.proc_chains, cfg.out_dirs, cfg.tmp_dirs
		):
		@@ -134,11 +143,6 @@ def main(args):

		logger.info(f" Generating condition: {condition['name']}")

		# # TODO: what happens when no concatenation or only one file for concatenation?
		# if condition["processes"][0].name == "ivas": # TODO: check if 0 index sufficient
		# a = {"number_frames": cfg.num_frames, "number_frames_preamble": cfg.num_frames_preamble}
		# condition["processes"][0].tx.update(a)

		apply_func_parallel(
		process_item,
		zip(
		@@ -153,19 +157,8 @@ def main(args):
		"mp" if cfg.multiprocessing else None,
		)

		if cfg.concatenate_input:
		# write out the splits, optionally remove file
		out_paths_splits, out_meta_splits = concat_teardown(cfg, logger)
		# scale individual files
		if cfg.postprocessing.get("loudness", False):
		# TODO: take care of samplingrate
		scale_files(
		out_paths_splits,
		cfg.postprocessing["fmt"],
		cfg.postprocessing["loudness"],
		cfg.postprocessing.get("fs", None),
		out_meta_splits,
		)
		if hasattr(cfg, "preprocessing_2"):
		reverse_process_2(cfg, logger)

		# copy configuration to output directory
		with open(cfg.output_path.joinpath(f"{cfg.name}.yml"), "w") as f:

ivas_processing_scripts/audiotools/audioarray.py

+6 −6

Original line number	Diff line number	Diff line
		@@ -85,30 +85,30 @@ def trim(
		pre_trim = limits[0]
		post_trim = limits[1]

		if pre_trim <= 0:
		if pre_trim < 0:
		if pad_noise:
		# pad with uniformly distributed noise between -4 and 4
		np.random.seed(SEED_PADDING)
		noise = np.random.randint(
		low=-4, high=5, size=(np.abs(pre_trim), np.shape(x)[1])
		).astype("float")
		x = np.concatenate((x, noise), axis=0)
		x = np.concatenate((noise, x), axis=0)
		else:
		x = np.pad(x, [[np.abs(pre_trim), 0], [0, 0]])
		else:
		elif pre_trim > 0:
		x = x[pre_trim:, :]

		if post_trim <= 0:
		if post_trim < 0:
		if pad_noise:
		# pad with uniformly distributed noise between -4 and 4
		np.random.seed(SEED_PADDING)
		noise = np.random.randint(
		low=-4, high=5, size=(np.abs(post_trim), np.shape(x)[1])
		).astype("float")
		x = np.concatenate((noise, x), axis=0)
		x = np.concatenate((x, noise), axis=0)
		else:
		x = np.pad(x, [[0, np.abs(post_trim)], [0, 0]])
		else:
		elif post_trim > 0:
		x = x[:-post_trim, :]

		return x

ivas_processing_scripts/audiotools/audiofile.py

+4 −3

Original line number	Diff line number	Diff line
		@@ -150,9 +150,10 @@ def write(
		def concat(
		in_filenames: list,
		out_file: str,
		silence_pre: int,
		silence_post: int,
		silence_pre: Optional[int] = 0,
		silence_post: Optional[int] = 0,
		in_fs: Optional[int] = 48000,
		num_channels: Optional[int] = None,
		pad_noise: Optional[bool] = False,
		preamble: Optional[int] = None,
		pad_noise_preamble: Optional[bool] = False,
		@@ -189,7 +190,7 @@ def concat(

		# Read input files
		for in_file in in_filenames:
		x, fs = read(in_file, fs=in_fs)
		x, fs = read(in_file, fs=in_fs, nchannels=num_channels)
		if fs_compare and fs_compare != fs:
		raise ValueError("Sampling rates of files to concatenate don't match")
		else:

ivas_processing_scripts/audiotools/metadata.py

+43 −30

Original line number	Diff line number	Diff line
		@@ -188,7 +188,7 @@ def write_ISM_metadata_in_file(
		List of acutally used file names
		"""

		if len(metadata) != len(file_name) and not automatic_naming:
		if not automatic_naming and len(metadata) != len(file_name):
		raise ValueError("Number of metadata objects and file names has to match")
		number_objects = len(metadata)

		@@ -299,9 +299,9 @@ def concat_meta_from_file(
		audio_files: list[str],
		meta_files: list[list[str]],
		out_file: list[str],
		silence_pre: int,
		silence_post: int,
		input_fmt: str,
		silence_pre: Optional[int] = 0,
		silence_post: Optional[int] = 0,
		preamble: Optional[int] = None,
		) -> None:
		"""
		@@ -315,12 +315,12 @@ def concat_meta_from_file(
		List of corresponding metadata file names
		out_file: list[str]
		Name of concatenated output file
		silence_pre: int
		Silence inserted before each item
		silence_post: int
		Silence inserted after each item
		input_fmt: str
		Input audio format
		silence_pre: Optional[int]
		Silence inserted before each item
		silence_post: Optional[int]
		Silence inserted after each item
		preamble: Optional[int]
		Length of preamble in milliseconds
		"""
		@@ -362,7 +362,7 @@ def concat_meta_from_file(
		# pad
		trim_meta(
		audio_item, (-silence_pre, -silence_post)
		) # use negative value since we wante to pad, not trim
		) # use negative value since we want to pad, not trim

		# concatenate
		for idx, obj_pos in enumerate(audio_item.object_pos):
		@@ -374,26 +374,7 @@ def concat_meta_from_file(

		# add preamble
		if preamble:
		preamble_frames = preamble / IVAS_FRAME_LEN_MS
		if not preamble_frames.is_integer():
		raise ValueError(
		f"ISM metadata padding and trimming only possible if pad/trim length is multiple of frame length. "
		f"Frame length: {IVAS_FRAME_LEN_MS}ms"
		)
		for obj_idx in range(len(concat_meta_all_obj)):
		if (
		concat_meta_all_obj is not None
		and concat_meta_all_obj[obj_idx] is not None
		):
		concat_meta_all_obj[obj_idx] = trim(
		concat_meta_all_obj[obj_idx],
		limits=(-int(preamble_frames), 0),
		samples=True,
		)

		# add radius 1
		concat_meta_all_obj[obj_idx][: int(preamble_frames), 2] = 1
		pass
		concat_meta_all_obj = add_remove_preamble(concat_meta_all_obj, preamble)

		write_ISM_metadata_in_file(concat_meta_all_obj, out_file)

		@@ -529,7 +510,7 @@ def check_ISM_metadata(


		def metadata_search(
		in_meta: Union[str, Path],
		in_meta_path: Union[str, Path],
		item_names: list[Union[str, Path]],
		num_objects: int,
		) -> list[list[Union[Path, str]]]:
		@@ -542,7 +523,7 @@ def metadata_search(
		for item in item_names:
		list_item = []
		for obj_idx in range(num_objects):
		file_name_meta = in_meta / Path(item.stem).with_suffix(
		file_name_meta = in_meta_path / Path(item.stem).with_suffix(
		f"{item.suffix}.{obj_idx}.csv"
		)
		# check if file exists and add to list
		@@ -556,3 +537,35 @@ def metadata_search(
		list_meta.append(list_item)

		return list_meta


		def add_remove_preamble(
		metadata,
		preamble,
		add: Optional[bool] = True,
		):
		preamble_frames = preamble / IVAS_FRAME_LEN_MS
		if not preamble_frames.is_integer():
		raise ValueError(
		f"Application of preamble for ISM metadata is only possible if preamble length is multiple of frame length. "
		f"Frame length: {IVAS_FRAME_LEN_MS}ms"
		)
		for obj_idx in range(len(metadata)):
		if metadata is not None and metadata[obj_idx] is not None:
		if add:
		metadata[obj_idx] = trim(
		metadata[obj_idx],
		limits=(-int(preamble_frames), 0),
		samples=True,
		)

		# add radius 1
		metadata[obj_idx][: int(preamble_frames), 2] = 1
		else:
		metadata[obj_idx] = trim(
		metadata[obj_idx],
		limits=(int(preamble_frames), 0),
		samples=True,
		)

		return metadata