added optional key for padding the input signals (fcf3171a) · Commits · IVAS Codec Public Collaboration / IVAS Processing Scripts

examples/TEMPLATE.yml

+4 −7

Original line number	Diff line number	Diff line
		@@ -65,13 +65,10 @@ input:
		fmt: "HOA3"
		### Input sampling rate in Hz needed for headerless audio files; default = 48000
		# fs: 32000
		### Enable check for input files being aligned to a integer multiple of a given length in ms.
		### If a file is not aligned, a warning will be issued. If the input format has metadata or force is true, an error is raised instead.
		# aligned_to:
		### alignment length in ms, is needed if aligned_to is used
		# len: 20
		### default: false
		# force: true
		### Treatment of items that are not 20ms aligned
		### options: "padding" (pads with low level noise to frame length), "warning" or "error" (raises a warning or an error when items are not aligned),
		### "ignore" (no special treatment for items that are not aligned); default = "padding" (for ISM: default = "error")
		# frame_alignment: "error"

		################################################
		### Pre-processing on individual items

ivas_processing_scripts/constants.py

+3 −0

Original line number	Diff line number	Diff line
		@@ -63,6 +63,9 @@ DEFAULT_CONFIG = {
		"master_seed": 0,
		"prerun_seed": 0,
		"metadata_path": None,
		"input": {
		"frame_alignment": "padding",
		},
		# postprocessing
		"postprocessing": {
		"mask": None,

ivas_processing_scripts/processing/chains.py

+56 −14

Original line number	Diff line number	Diff line
		@@ -32,9 +32,10 @@

		from typing import Optional
		from warnings import warn
		from shutil import copyfile

		from ivas_processing_scripts.audiotools import audio
		from ivas_processing_scripts.audiotools.audiofile import read
		from ivas_processing_scripts.audiotools.audiofile import read, write
		from ivas_processing_scripts.processing.config import TestConfig
		from ivas_processing_scripts.processing.evs import EVS
		from ivas_processing_scripts.processing.ivas import IVAS
		@@ -44,6 +45,7 @@ from ivas_processing_scripts.processing.preprocessing_2 import Preprocessing2
		from ivas_processing_scripts.processing.processing_splitting_scaling import (
		Processing_splitting_scaling,
		)
		from ivas_processing_scripts.audiotools.audioarray import trim
		from ivas_processing_scripts.utils import get_abs_path, list_audio


		@@ -521,7 +523,23 @@ def validate_input_files(cfg: TestConfig):
		input_format = cfg.input["fmt"]
		num_chan_expected = audio.fromtype(input_format).num_channels

		for item in cfg.items_list:
		frame_alignment = cfg.input["frame_alignment"]

		# always throw an error for ISM (and MASA) input
		if input_format.startswith("ISM") or input_format.startswith("MASA"):
		frame_alignment = "error"

		if cfg.input["frame_alignment"] == "padding":
		# Create new input directory for padded files
		output_dir = cfg.output_path / "20ms_aligned_files"
		try:
		output_dir.mkdir(exist_ok=False)
		except FileExistsError:
		raise ValueError(
		"Folder for 20ms aligned files already exists. Please move or delete folder"
		)

		for i, item in enumerate(cfg.items_list):
		if "fs" in cfg.input:
		sampling_rate = cfg.input["fs"]
		x, fs = read(item, nchannels=num_chan_expected, fs=sampling_rate)
		@@ -544,18 +562,42 @@ def validate_input_files(cfg: TestConfig):
		f"The number of channels in the file ({n_chan_x}) do NOT match with those of format ({num_chan_expected}, {input_format}) specified in the config yaml."
		)

		if (input_aligned_cfg := cfg.input.get("aligned_to", None)) is not None:
		input_fmt_has_metadata = input_format.startswith(
		"ISM"
		) or input_format.startswith("MASA")
		force_alignment = (
		input_aligned_cfg.get("force", False) or input_fmt_has_metadata
		)
		# check frame alignment of items
		if cfg.input["frame_alignment"] != "ignore":

		alignment_len_samples = (20 / 1000) * fs

		alignment_len_samples = (input_aligned_cfg["len"] / 1000) * fs
		if n_samples_x % alignment_len_samples != 0:
		msg = f"The length ({n_samples_x} samples) of audio ({item.name}) is not a multiple of given alignment length ({input_aligned_cfg['len']} ms)."
		if force_alignment:
		raise ValueError(msg)
		if frame_alignment == "warning":
		warn(f"The length ({n_samples_x} samples) of audio ({item.name}) is not a multiple of the frame length.")
		elif frame_alignment == "error":
		raise ValueError(f"The length ({n_samples_x} samples) of audio ({item.name}) is not a multiple of the frame length.")
		elif frame_alignment == "padding":
		warn(f"The length ({n_samples_x} samples) of audio ({item.name}) is not a multiple of frame length (20 ms). Padding to the nearest integer multiple.")
		# Calculate number of samples needed for padding
		padding_samples = int(alignment_len_samples - (n_samples_x % alignment_len_samples))
		# Create and append zeros
		padded_data = trim(
		x,
		fs,
		(0, -padding_samples),
		pad_noise=True,
		samples=True,
		)
		# Write padded data to output directory
		write(output_dir / item.name, padded_data, fs)
		# Update audio file path in list
		cfg.items_list[i] = output_dir / item.name
		else:
		raise ValueError(f"Value of key frame_alignment does not match possible options. Value: {frame_alignment}. Options: 'padding', 'ignore', 'warning', 'error'")
		else:
		warn(msg)
		if frame_alignment == "padding":
		copyfile(item, output_dir / item.name)
		# Update audio file path in list
		cfg.items_list[i] = output_dir / item.name
		else:
		pass

		if frame_alignment == "padding":
		# Make the output path as the new input path
		cfg.input_path = output_dir