comments for understanding process flow + h5py support for importing custom brirs (cd5b55a5) · Commits · IVAS Codec Public Collaboration / IVAS Processing Scripts

ivas_processing_scripts/audiotools/binaural_datasets/binaural_dataset.py

+8 −3

Original line number	Diff line number	Diff line
		@@ -34,6 +34,8 @@ from pathlib import Path
		from typing import Optional, Tuple, Union
		from warnings import warn

		import h5py
		import copy
		import numpy as np
		from scipy.io import loadmat

		@@ -73,6 +75,7 @@ def load_hrtf(
		)

		mat_contents = loadmat(filename)
		#mat_contents = h5py.File(filename, 'r')

		try:
		IR = mat_contents["IR"]
		@@ -129,8 +132,8 @@ def load_ir(

		if in_fmt.startswith("MOZART"):
		dataset_suffix = "FULL"
		elif in_fmt in CHANNEL_BASED_AUDIO_FORMATS.keys():
		dataset_suffix = "LS"
		elif in_fmt in CHANNEL_BASED_AUDIO_FORMATS.keys(): ##This is triggered when -if HOA3 is passed from the command line. dataset suffix is being set to LS, and "FULL" mode cannot be used - in_format being used is 7_1_4.
		dataset_suffix = "FULL"

		elif out_fmt.startswith("BINAURAL"):
		dataset_prefix = "HRIR"
		@@ -192,7 +195,9 @@ def load_ir(
		in_fmt = fromtype(in_fmt)
		tmp_fmt = fromtype("LS")

		IR_tmp = IR.copy()
		#IR_tmp = IR.copy() # Bug: deprecated function, no longer works
		IR_tmp = np.array(IR, copy=True)
		#IR_tmp = copy.deepcopy(IR) # -> error - h5py lists cannot be pickled
		IR = np.zeros([IR_tmp.shape[0], IR_tmp.shape[1], in_fmt.num_channels])

		ir_index = 0

ivas_processing_scripts/audiotools/convert/channelbased.py

+2 −1

Original line number	Diff line number	Diff line
		@@ -125,7 +125,7 @@ def render_cba_to_binaural(
		if trajectory is not None:
		cba.audio = rotate_cba(cba, trajectory)

		IR, _, latency_smp = load_ir(cba.name, bin.name, bin_dataset)
		IR, _, latency_smp = load_ir(cba.name, bin.name, bin_dataset) ##cba.name passed to this function is 7_1_4 -> this caused the renderer to attach suffix LS to the binaural dataset name in the next step

		# render LFE
		if bin_lfe_gain is not None:
		@@ -133,6 +133,7 @@ def render_cba_to_binaural(
		cba.audio, cba.fs, cba.lfe_index, bin_lfe_gain
		)

		##The following two operations must be stateful for frame-wise, TCP enabled rendering
		# render rest of the signal
		bin.audio = binaural_fftconv(cba.audio, IR, cba.num_channels, cba.lfe_index)
		# compensate delay from binaural dataset

ivas_processing_scripts/audiotools/convert/scenebased.py

+13 −4

Original line number	Diff line number	Diff line
		@@ -89,6 +89,7 @@ def convert_scenebased(
		f"Conversion from {sba.name} to {out.name} is unsupported!"
		)

		#Rendered Audio is returned here
		return out


		@@ -114,16 +115,23 @@ def render_sba_to_binaural(
		Name of binaural dataset without prefix or suffix
		"""

		if trajectory is not None:
		if trajectory is not None: ##Trajectory option for pre-coded head rotations??
		sba.audio = rotate_sba(sba, trajectory)

		if "ROOM" in bin.name:
		if "ROOM" in bin.name: #this is triggered for BINAURAL_ROOM option
		##Does this have be changed based on the new angles mode in SOFA? Or based on input format?
		cba_tmp = audio.fromtype("7_1_4")
		##Temporary change to 5_1
		##cba_tmp = audio.fromtype("5_1")
		cba_tmp.fs = sba.fs

		render_sba_to_cba(sba, cba_tmp)

		channelbased.render_cba_to_binaural(cba_tmp, bin, trajectory)
		##connect cba_tmp to ivas python renderer

		#Binaural dataset name is not being passeed here
		channelbased.render_cba_to_binaural(cba_tmp, bin, trajectory, bin_dataset)
		#channelbased.render_cba_to_binaural(cba_tmp, bin, kwargs) - does not work if kwargs is passed as an argument
		else:
		IR, _, latency_smp = load_ir(sba.name, bin.name, bin_dataset)

		@@ -156,6 +164,7 @@ def render_sba_to_cba(
		Channel-based output audio
		"""

		##recreate
		render_mtx = get_allrad_mtx(sba.ambi_order, cba)
		cba.audio = sba.audio @ render_mtx.T

		@@ -411,7 +420,7 @@ def get_allrad_mtx(
		ALLRAD matrix
		"""

		n_harm = nchan_from_ambi_order(ambi_order)
		n_harm = nchan_from_ambi_order(ambi_order) ## (ambi_order + 1)`** 2`

		if cba.name == "MONO":
		hoa_dec = np.zeros([1, n_harm])