Merge branch '47-p-800-add-hoa2-item-generation-to-scripts' into 'main' (38d553d6) · Commits · IVAS Codec Public Collaboration / IVAS Processing Scripts

item_gen_configs/HOA2_CONFIG.yml

0 → 100644

+61 −0

Original line number	Diff line number	Diff line
		---
		################################################
		# General configuration
		################################################

		### Output format
		format: "HOA2"

		### Output sampling rate in Hz needed for headerless audio files; default = 48000
		fs: 48000

		### IR sampling rate in Hz needed for headerless audio files; default = 48000
		IR_fs: 48000

		### Any relative paths will be interpreted relative to the working directory the script is called from!
		### Usage of absolute paths is recommended.
		### Do not use file names with dots "." in them! This is not supported, use "_" instead
		### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions

		### Input path to mono files
		input_path: "./items_mono"

		### Input path to stereo impulse response files, default = './ivas_processing_scripts/generation/IR'
		IR_path: "./IRs"

		### Output path for generated test items and metadata files
		output_path: "./items_HOA2"

		### (Optional) Output path for binauralized versions of the generated HOA2 items
		# binaural_path: "./items_HOA2_bin"

		### Target loudness in LKFS; default = null (no loudness normalization applied)
		loudness: -26

		### Pre-amble and Post-amble length in seconds (default = 0.0)
		preamble: 0.5
		postamble: 1.0

		### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence)
		add_low_level_random_noise: true


		################################################
		### Scene description
		################################################

		### Each scene must start with the sceneN tag
		### Specify the mono source filename (the program will search for it in the input_path folder)
		### Specify the stereo IR source filename (the program will search for it in the input_path_IR folder)
		### Specify the overlap length in seconds for each input source (negative value creates a gap)
		### Note 1: use [val1, val2, ...] for multiple sources in a scene
		### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames

		scenes:
		a1:
		name: "out.wav"
		description: ""
		source: ["fa1.wav", "ma1.wav"]
		IR: ["IR_HOA2_env1/FreefieldFloor_TalkPos1_EigenHoA2_SinSweep_9chn.wav", "IR_HOA2_env1/FreefieldFloor_TalkPos2_EigenHoA2_SinSweep_9chn.wav"]
		overlap: -0.2

item_gen_configs/P800-4.yml

+3 −0

Original line number	Diff line number	Diff line
		@@ -26,6 +26,9 @@ IR_path: "./IRs"
		### Output path for generated test items and metadata files
		output_path: "./items_FOA"

		### (Optional) Output path for binauralized versions of the generated FOA items
		# binaural_path: "./items_FOA_bin"

		### Target loudness in LKFS; default = null (no loudness normalization applied)
		loudness: -26

item_gen_configs/P800-5.yml

+3 −0

Original line number	Diff line number	Diff line
		@@ -26,6 +26,9 @@ IR_path: "./IRs"
		### Output path for generated test items and metadata files
		output_path: "./items_FOA"

		### (Optional) Output path for binauralized versions of the generated FOA items
		# binaural_path: "./items_FOA_bin"

		### Target loudness in LKFS; default = null (no loudness normalization applied)
		loudness: -26

ivas_processing_scripts/audiotools/wrappers/reverb.py

+65 −1

Original line number	Diff line number	Diff line
		@@ -238,7 +238,7 @@ def reverb_foa(
		H = fft(foa_IR.audio, axis=0)
		align = 1.0 / np.max(np.abs(H))

		# convolve mono input with left and right IR
		# convolve mono input with FOA IR
		y_w = reverb(input, IR_w, align=align)
		y_x = reverb(input, IR_x, align=align)
		y_y = reverb(input, IR_y, align=align)
		@@ -251,3 +251,67 @@ def reverb_foa(
		y.audio = np.column_stack([y_w.audio, y_x.audio, y_y.audio, y_z.audio])

		return y


		def reverb_hoa2(
		input: Audio,
		hoa2_IR: Audio,
		align: Optional[float] = None,
		) -> Audio:
		"""
		Wrapper for the ITU-T reverb binary to convolve mono audio signal with an HOA2 impulse response

		Parameters
		----------
		input: Audio
		Input audio signal
		IR: Audio
		Impulse response
		align: float
		multiplicative factor to apply to the reverberated sound in order to align its energy level with the second file

		Returns
		-------
		output: Audio
		Convolved audio signal with HOA2 IR
		"""

		# convert to float32
		hoa2_IR.audio = np.float32(hoa2_IR.audio)

		numchannels = 9 # HOA2 by definition

		# calculate the scaling (multiplicative) factor such that the maximum gain of the IR filter across all frequencies is 0dB
		if align is None:
		H = fft(hoa2_IR.audio, axis=0)
		align = 1.0 / np.max(np.abs(H))

		IR = copy(hoa2_IR)
		IR.name = "MONO"
		IR.num_channels = 1
		ych = []
		for i in range(numchannels):
		# separate IR into each channel
		IR.audio = np.reshape(hoa2_IR.audio[:, i], (-1, 1))
		# convolve mono input with channel IR
		ych.append(reverb(input, IR, align=align))

		# combine into hoa2 output
		y = copy(input)
		y.name = "HOA2"
		y.num_channels = numchannels
		y.audio = np.column_stack(
		[
		ych[0].audio,
		ych[1].audio,
		ych[2].audio,
		ych[3].audio,
		ych[4].audio,
		ych[5].audio,
		ych[6].audio,
		ych[7].audio,
		ych[8].audio,
		]
		)

		return y

ivas_processing_scripts/generation/init.py

+4 −0

Original line number	Diff line number	Diff line
		@@ -43,6 +43,7 @@ from ivas_processing_scripts.constants import (
		from ivas_processing_scripts.generation import (
		config,
		process_foa_items,
		process_hoa2_items,
		process_ism_items,
		process_stereo_items,
		)
		@@ -96,6 +97,9 @@ def main(args):
		elif cfg.format == "FOA":
		# generate FOA items according to scene description
		process_foa_items.generate_foa_items(cfg, logger)
		elif cfg.format == "HOA2":
		# generate HOA2 items according to scene description
		process_hoa2_items.generate_hoa2_items(cfg, logger)

		# copy configuration to output directory
		with open(cfg.output_path.joinpath(f"{cfg.format}.yml"), "w") as f: