Added a function to check if the audio length is a multiple of frame size. (81538a2c) · Commits · IVAS Codec Public Collaboration / IVAS Processing Scripts

ivas_processing_scripts/audiotools/audio.py

+2 −2

Original line number	Diff line number	Diff line
		@@ -30,7 +30,7 @@
		# the United Nations Convention on Contracts on the International Sales of Goods.
		#

		import warnings
		from warnings import warn
		from abc import ABC, abstractmethod
		from pathlib import Path
		from typing import Optional, Union
		@@ -274,7 +274,7 @@ class ObjectBasedAudio(Audio):
		obj.metadata_files.append(file_name_meta)
		else:
		raise ValueError(f"Metadata file {file_name_meta} not found.")
		warnings.warn(
		warn(
		f"No metadata files specified: The following files were found and used: \n {*obj.metadata_files,}"
		)

+42 −2

Original line number	Diff line number	Diff line
		@@ -31,7 +31,7 @@
		#

		import logging
		import warnings
		from warnings import warn
		from typing import Iterator, Optional, Tuple, Union

		import numpy as np
		@@ -342,7 +342,7 @@ def limiter(
		fr_sig[idx_min] = -32768

		if limited:
		warnings.warn("Limiting had to be applied")
		warn("Limiting had to be applied")
		return x


		@@ -688,3 +688,43 @@ def mute_channels(
		"""
		x[:, mute] = 0
		return x


		def multiple_of_frame_size(
		x: np.ndarray,
		fs: int = 48000,
		frame_size_in_ms: Optional[int] = 20,
		) -> np.ndarray:
		"""
		Warn and pad audio if it isn't a multiple of frame size

		Parameters
		----------
		x: np.ndarray
		Input array
		fs: int
		Input sampling rate in Hz; default = 48000
		frame_size_in_ms: Optional[int]
		Frame size in milliseconds; default = 20

		Returns
		-------
		x: np.ndarray
		Padded array
		"""

		if x.ndim == 1:
		n_samples_x = x.shape
		n_chan_x = 1
		else:
		n_samples_x, n_chan_x = x.shape

		frame_length_samples = (frame_size_in_ms / 1000) * fs
		if n_samples_x % frame_length_samples != 0:
		warn(
		"Audio length is not a multiple of frame length (20 ms). Padding with zeros."
		)
		pad_length = int(frame_length_samples - (n_samples_x % frame_length_samples))
		x = np.pad(x, ((0, int(pad_length)), (0, 0)), "constant")

		return x

+4 −4

Original line number	Diff line number	Diff line
		@@ -30,7 +30,7 @@
		# the United Nations Convention on Contracts on the International Sales of Goods.
		#

		import warnings
		from warnings import warn
		from pathlib import Path
		from typing import Optional, Tuple, Union

		@@ -149,7 +149,7 @@ def load_ir(
		)
		).is_file():
		dataset_suffix = "SBA3"
		warnings.warn("No SBA1 dataset found -> use truncated SBA3 dataset")
		warn("No SBA1 dataset found -> use truncated SBA3 dataset")
		elif in_fmt.endswith("2"):
		dataset_suffix = "SBA2"
		# Use truncated SBA3 dataset if no SBA1 or 2 dataset exists
		@@ -159,7 +159,7 @@ def load_ir(
		)
		).is_file():
		dataset_suffix = "SBA3"
		warnings.warn("No SBA2 dataset found -> use truncated SBA3 dataset")
		warn("No SBA2 dataset found -> use truncated SBA3 dataset")
		else:
		dataset_suffix = "SBA3"

		@@ -172,7 +172,7 @@ def load_ir(
		latency_smp = latency_s
		else:
		latency_smp = int(np.min(np.argmax(np.sum(np.abs(IR), axis=1), axis=0)))
		warnings.warn(
		warn(
		f"No latency of HRTF dataset specified in {path_dataset} file -> computed latency: {latency_smp} sample(s)"
		)

+3 −0

Original line number	Diff line number	Diff line
		@@ -212,6 +212,9 @@ def process_audio(
		if fs is None:
		fs = x.fs

		"""making sure length is a multiple of the frame size"""
		x.audio = audioarray.multiple_of_frame_size(x.audio, fs)

		"""delay audio"""
		if delay is not None:
		if logger: