set up first part of second preprocessing (d655ad7e) · Commits · IVAS Codec Public Collaboration / IVAS Processing Scripts

examples/TEMPLATE.yml

+2 −2

Original line number	Diff line number	Diff line
		@@ -22,9 +22,9 @@
		### Do not use file names with dots "." in them! This is not supported, use "_" instead
		### For Windows user: please use double back slash '\\' in paths and add '.exe' to executable definitions
		### REQUIRED: Input path or file
		input_path: "~/ivas/items/HOA3"
		input_path: ".../ivas/items/HOA3"
		### REQUIRED: Output path or file
		output_path: "./tmp_output"
		output_path: ".../tmp_output"
		### Metadata path or file(s)
		### If input format is ISM{1-4} a path for the metadata files can be specified;
		### default = null (for ISM search for item_name.{wav, raw, pcm}.{0-3}.csv in input folder, otherise ignored)

ivas_processing_scripts/audiotools/audiofile.py

+2 −2

Original line number	Diff line number	Diff line
		@@ -150,8 +150,8 @@ def write(
		def concat(
		in_filenames: list,
		out_file: str,
		silence_pre: int,
		silence_post: int,
		silence_pre: Optional[int] = 0,
		silence_post: Optional[int] = 0,
		in_fs: Optional[int] = 48000,
		num_channels: Optional[int] = None,
		pad_noise: Optional[bool] = False,

ivas_processing_scripts/audiotools/metadata.py

+7 −7

Original line number	Diff line number	Diff line
		@@ -299,10 +299,10 @@ def concat_meta_from_file(
		audio_files: list[str],
		meta_files: list[list[str]],
		out_file: list[str],
		silence_pre: int,
		silence_post: int,
		input_fmt: str,
		preamble: Optional[int] = None,
		silence_pre: Optional[int] = 0,
		silence_post: Optional[int] = 0,
		preamble: Optional[int] = None, # TODO: remove preamble here
		) -> None:
		"""
		Concatenate ISM metadata from files
		@@ -315,12 +315,12 @@ def concat_meta_from_file(
		List of corresponding metadata file names
		out_file: list[str]
		Name of concatenated output file
		silence_pre: int
		Silence inserted before each item
		silence_post: int
		Silence inserted after each item
		input_fmt: str
		Input audio format
		silence_pre: Optional[int]
		Silence inserted before each item
		silence_post: Optional[int]
		Silence inserted after each item
		preamble: Optional[int]
		Length of preamble in milliseconds
		"""

ivas_processing_scripts/processing/preprocessing_2.py

+18 −1

Original line number	Diff line number	Diff line
		@@ -32,8 +32,11 @@

		import logging
		from pathlib import Path
		import numpy as np

		from ivas_processing_scripts.processing.processing import Processing
		from ivas_processing_scripts.audiotools.audiofile import read, write
		from ivas_processing_scripts.audiotools.audioarray import trim


		class Preprocessing2(Processing): # TODO
		@@ -45,8 +48,22 @@ class Preprocessing2(Processing): # TODO
		logger.debug(f"Preprocessing2 configuration : {self.__dict__}")
		logger.debug(f"Preprocessing2 {in_file.absolute()} -> {out_file.absolute()}")

		# load in file
		x, fs = read(in_file, self.in_fs, self.in_fmt)

		# add preamble
		# TODO
		if self.preamble:
		x = trim(x, fs, (-self.preamble, 0), self.pad_noise_preamble)

		# add background noise
		if self.background_noise:
		x = self.add_background_noise(x)

		# save file
		write(out_file, x, fs=fs)

		return

		def add_background_noise(self, audio_array) -> np.ndarray:
		# TODO
		return np.zeros()

ivas_processing_scripts/processing/processing.py

+19 −21

Original line number	Diff line number	Diff line
		@@ -68,16 +68,24 @@ def reorder_items_list(items_list: list, concatenation_order: list) -> list:
		return ordered_full_files


		def concat_setup(cfg: TestConfig, logger: logging.Logger):
		def concat_setup(cfg: TestConfig, chain, logger: logging.Logger):
		n_items_list = len(cfg.items_list)
		if cfg.concatenation_order is not None:
		n_concatenation_order = len(cfg.concatenation_order)
		cfg_pre2 = chain[0]

		# check for text files
		if any([i for i in cfg.items_list if i.suffix == ".txt"]):
		raise SystemExit("Concatenation for text files is unsupported")

		# apply concatenation order
		if cfg_pre2.concatenation_order is not None:
		n_concatenation_order = len(cfg_pre2.concatenation_order)
		if n_concatenation_order != n_items_list:
		warn(
		f"Warning: Mismatch in specified concatenation order and number of items to process!\nNumber of items specified in concatenation order: {n_concatenation_order}\nNumber of items in the directory: {n_items_list}\nConcatenation will use the following order:\n{cfg.concatenation_order}"
		f"Warning: Mismatch in specified concatenation order and number of items to process!\n"
		f"Number of items specified in concatenation order: {n_concatenation_order}\n"
		f"Number of items in the directory: {n_items_list}\n"
		f"Concatenation will use the following order:\n{cfg_pre2.concatenation_order}"
		)
		if any([i for i in cfg.items_list if i.suffix == ".txt"]):
		raise SystemExit("Concatenation for text files is unsupported")

		logger.info(f"Concatenating input files in directory {cfg.input_path}")

		@@ -90,15 +98,12 @@ def concat_setup(cfg: TestConfig, logger: logging.Logger):
		f"{cfg.input_path.name}_concatenated.wav.{obj_idx}.csv"
		)
		)

		# TODO: check this
		concat_meta_from_file(
		cfg.items_list,
		cfg.metadata_path,
		cfg.concat_meta,
		cfg.concat_silence.get("pre", 0),
		cfg.concat_silence.get("post", 0),
		cfg.input["fmt"],
		preamble=cfg.preamble,
		)

		# set input to the concatenated file we have just written to the output dir
		@@ -110,22 +115,14 @@ def concat_setup(cfg: TestConfig, logger: logging.Logger):
		)

		# determine number of channels for pcm and raw files
		if hasattr(cfg, "preprocessing"):
		tmp_in_fmt = cfg.preprocessing.get("fmt", cfg.input["fmt"])
		else:
		tmp_in_fmt = cfg.input["fmt"]
		tmp_audio = audio.fromtype(tmp_in_fmt)
		tmp_audio = audio.fromtype(cfg_pre2.in_fmt)
		tmp_num_chans = tmp_audio.num_channels

		cfg.splits = concat(
		cfg.items_list,
		cfg.concat_file,
		cfg.concat_silence.get("pre", 0),
		cfg.concat_silence.get("post", 0),
		in_fs=cfg.input.get("fs", 48000),
		num_channels=tmp_num_chans,
		preamble=cfg.preamble,
		pad_noise_preamble=cfg.pad_noise_preamble,
		)

		# save item naming for splits naming in the end
		@@ -232,7 +229,8 @@ def preprocess_2(cfg, logger):
		logger.info(f" Generating condition: {preprocessing_2['name']}")

		# concatenate items if required
		concat_setup(cfg, logger)
		if chain[0].concatenate_input:
		concat_setup(cfg, chain, logger)

		# run preprocessing 2
		apply_func_parallel(
		@@ -250,7 +248,7 @@ def preprocess_2(cfg, logger):
		)

		# update the configuration to use preprocessing 2 outputs as new inputs
		cfg.items_list = list_audio( # TODO: add preprocessing_2 to list of audio
		cfg.items_list = list_audio(
		cfg.out_dirs[0], absolute=False, select_list=getattr(cfg, "input_select", None)
		)
		if cfg.metadata_path[0] is not None: