clean up and documentation (776ddd1e) · Commits · IVAS Codec Public Collaboration / IVAS Processing Scripts

ivas_processing_scripts/audiotools/audio.py

+1 −0

Original line number	Diff line number	Diff line
		@@ -214,6 +214,7 @@ class MetadataAssistedSpatialAudio(Audio):
		raise ValueError(
		f"Unsupported metadata assisted spatial audio format {name}"
		)
		self.metadata_files = []

		@classmethod
		def _from_file(

LFS

+2 −2

Original line number	Diff line number	Diff line
		version https://git-lfs.github.com/spec/v1
		oid sha256:1a88a3463513647455bcc38bd7180860edfb97195602a8ff832a6be1421474f8
		size 14335861
		oid sha256:bf86a03f0b13932c5c138af22584f864b75c5733df1b01ac3fdf7750a1bdbe5f
		size 14335913

+6 −3

Original line number	Diff line number	Diff line
		Files in this directory should contain impulse responses for use in rendering in Matlab .mat format
		Samplingrate of 48kHz is assumed

		Files should adhere to the following naming scheme:

		{HRIR\|BRIR}_{DATASETNAME}_{FULL\|LS\|SBA3}.mat
		{HRIR\|BRIR}_{DATASETNAME}_{FULL\|LS\|SBA(1-3)}.mat

		- HRIR or BRIR
		specifies the type of impulse response which will be used
		@@ -15,7 +16,9 @@ Files should adhere to the following naming scheme:
		FULL: all available measurements on the sphere
		LS: superset of supported loudspeaker layouts
		(see audiotools.constants.CHANNEL_BASED_AUDIO_FORMATS["LS""])
		SBA3: impulse responses transformed to 3rd order ambisonics by external conversion
		SBA(1-3): impulse responses transformed to ambisonics by external conversion
		if available SBA1 is used for FOA, SBA2 for HOA2 and SBA3 for HOA3
		if not available SBA3 is used and truncated for all Ambisonic formats

		Each Matlab file should contain the following variables:
		- IR
		@@ -24,7 +27,7 @@ Each Matlab file should contain the following variables:
		array of {azimuth, elevation, radius} of dimensions [n_channels x 3]
		required for FULL, optional otherwise
		- latency_s
		latency of the dataset in seconds
		latency of the dataset in samples
		optional, will be estimated if not provided

		LICENSES:

+5 −8

Original line number	Diff line number	Diff line
		@@ -161,18 +161,17 @@ def load_ir(
		else:
		dataset_suffix = "SBA3"

		IR, SourcePosition, latency_s = load_hrtf(
		Path(__file__).parent.joinpath(
		path_dataset = Path(__file__).parent.joinpath(
		f"{dataset_prefix}_{dataset}_{dataset_suffix}.mat"
		)
		)
		IR, SourcePosition, latency_s = load_hrtf(path_dataset)

		if latency_s is not None:
		latency_smp = int(latency_s * 48000)
		latency_smp = latency_s[0, 0]
		else:
		latency_smp = int(np.min(np.argmax(np.sum(np.abs(IR), axis=1), axis=0)))
		warnings.warn(
		f"No latency of HRTF dataset specified in .mat file -> computed latency: {latency_smp}"
		f"No latency of HRTF dataset specified in {path_dataset} file -> computed latency: {latency_smp} sample(s)"
		)

		if in_fmt.startswith("STEREO"):
		@@ -182,7 +181,6 @@ def load_ir(
		and not in_fmt.startswith("CUSTOM_LS")
		and not in_fmt.startswith("MOZART")
		):
		# TODO update, use _get_audio_dict() instead of using fromtype object?
		# extract positions from the loudspeaker file
		in_fmt = fromtype(in_fmt)
		tmp_fmt = fromtype("LS")
		@@ -200,7 +198,6 @@ def load_ir(
		if j != in_fmt.lfe_index[0]:
		IR[:, :, ir_index] = IR_tmp[:, :, i]
		ir_index += 1
		# TODO: add custom ls support

		return IR, SourcePosition, latency_smp

+8 −2

Original line number	Diff line number	Diff line
		@@ -62,7 +62,9 @@ def convert_file(
		in_meta: Optional[list] = None,
		logger: Optional[logging.Logger] = None,
		**kwargs,
		):
		) -> None:
		"""Conversion function for one audio file"""

		if not in_fmt:
		raise ValueError("Input audio format must be specified!")

		@@ -149,7 +151,9 @@ def convert(
		esdru_alpha: Optional[float] = None,
		logger: Optional[logging.Logger] = None,
		**kwargs,
		):
		) -> None:
		"""Perform pre-processing, conversion and post-processing"""

		"""pre-processing"""
		process_audio(
		x=input,
		@@ -203,6 +207,8 @@ def process_audio(
		esdru_alpha: Optional[float] = None,
		logger: Optional[logging.Logger] = None,
		) -> None:
		"""Perform (pre-/pos-) processing of audio"""

		if fs is None:
		fs = x.fs