Next draft (f141f817) · Commits · IVAS Codec Public Collaboration / IVAS Processing Scripts

item_gen_configs/FOA_CONFIG.yml

+6 −6

Original line number	Diff line number	Diff line
		@@ -56,9 +56,9 @@ scenes:
		IR: ["AmbiX_036x033y_0.01m.wav", "AmbiX_-70x035y_0.01m.wav"]
		overlap: 1.0

		a2:
		name: "G6S2.wav"
		description: "Two speakers sitting in a car."
		source: ["fa1.wav", "ma1.wav"]
		IR: ["AmbiX_036x033y_0.01m.wav", "AmbiX_-70x035y_0.01m.wav"]
		overlap: 1.0
		#a2:
		# name: "G6S2.wav"
		# description: "Two speakers sitting in a car."
		# source: ["fa1.wav", "ma1.wav"]
		# IR: ["AmbiX_036x033y_0.01m.wav", "AmbiX_-70x035y_0.01m.wav"]
		# overlap: 1.0

ivas_processing_scripts/audiotools/wrappers/reverb.py

+0 −66

Original line number	Diff line number	Diff line
		@@ -184,69 +184,3 @@ def reverb_stereo(
		y.audio = np.column_stack([y_left.audio, y_right.audio])

		return y

		def reverb_foa(
		input: Audio,
		foa_IR: Audio,
		align: Optional[float] = None,
		) -> Audio:
		"""
		Wrapper for the ITU-T reverb binary to convolve mono audio signal with an FOA impulse response

		Parameters
		----------
		input: Audio
		Input audio signal
		IR: Audio
		Impulse response
		align: float
		multiplicative factor to apply to the reverberated sound in order to align its energy level with the second file

		Returns
		-------
		output: Audio
		Convolved audio signal with FOA IR
		"""

		# convert to float32
		foa_IR.audio = np.float32(foa_IR.audio)

		# separate into each channel
		IR_w = copy(foa_IR)
		IR_w.name = "MONO"
		IR_w.num_channels = 1
		IR_w.audio = np.reshape(foa_IR.audio[:, 0], (-1, 1))

		IR_x = copy(foa_IR)
		IR_x.name = "MONO"
		IR_x.num_channels = 1
		IR_x.audio = np.reshape(foa_IR.audio[:, 1], (-1, 1))

		IR_y = copy(foa_IR)
		IR_y.name = "MONO"
		IR_y.num_channels = 1
		IR_y.audio = np.reshape(foa_IR.audio[:, 2], (-1, 1))

		IR_z = copy(foa_IR)
		IR_z.name = "MONO"
		IR_z.num_channels = 1
		IR_z.audio = np.reshape(foa_IR.audio[:, 3], (-1, 1))

		# calculate the scaling (multiplicative) factor such that the maximum gain of the IR filter across all frequencies is 0dB
		if align is None:
		H = fft(foa_IR.audio, axis=0)
		align = 1.0 / np.max(np.abs(H))

		# convolve mono input with left and right IR
		y_w = reverb(input, IR_w, align=align)
		y_x = reverb(input, IR_x, align=align)
		y_y = reverb(input, IR_y, align=align)
		y_z = reverb(input, IR_z, align=align)

		# combine into foa output
		y = copy(input)
		y.name = "FOA"
		y.num_channels = 4
		y.audio = np.column_stack([y_w.audio, y_x.audio, y_y.audio, y_z.audio])

		return y

ivas_processing_scripts/generation/process_foa_items.py

+112 −5

Original line number	Diff line number	Diff line
		@@ -32,13 +32,18 @@

		import logging
		import os
		from copy import copy

		from math import floor
		from typing import Optional

		import numpy as np
		import scipy.signal as ssg
		from scipy.fft import fft

		from ivas_processing_scripts.audiotools.audio import Audio
		from ivas_processing_scripts.audiotools import audio, audiofile
		from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness
		from ivas_processing_scripts.audiotools.wrappers.reverb import reverb_foa
		from ivas_processing_scripts.generation import config

		SEED_RANDOM_NOISE = 0
		@@ -49,6 +54,108 @@ def csv_formatdata(data):
		for row in data:
		yield ["%0.2f" % v for v in row]

		def filter_one(
		input: Audio,
		IR: Audio,
		align: Optional[float] = None,
		) -> Audio:
		"""
		Parameters
		----------
		input: Audio
		Input audio signal
		IR: Audio
		Impulse response
		align: float
		multiplicative factor to apply to the reverberated sound in order to align its energy level with a second filePath to the output file

		Returns
		-------
		output: Audio
		Convolved audio signal with IR
		"""

		# resample IR to input signal
		tmp_IR = copy(IR)
		if input.fs != IR.fs:
		tmp_IR.audio = ssg.resample_poly(IR.audio, input.fs, IR.fs) # resample_itu(tmp_input, IR.fs)
		tmp_IR.fs = input.fs

		# down-scale IR to prevent saturation
		# max_value = np.max(np.abs(IR.audio))
		# if max_value > 1.0:
		# IR.audio = IR.audio / max_value

		tmp_IR.audio = tmp_IR.audio * align
		output = copy(input)
		output.audio[0] = ssg.lfilter(tmp_IR.audio[0],1,input.audio[0])

		return output

		def filter_foa(
		input: Audio,
		foa_IR: Audio,
		align: Optional[float] = None,
		) -> Audio:
		"""
		Parameters
		----------
		input: Audio
		Input audio signal
		IR: Audio
		Impulse response
		align: float
		multiplicative factor to apply to the reverberated sound in order to align its energy level with the second file

		Returns
		-------
		output: Audio
		Convolved audio signal with FOA IR
		"""

		# convert to float32
		foa_IR.audio = np.float32(foa_IR.audio)

		# separate into each channel
		IR_w = copy(foa_IR)
		IR_w.name = "MONO"
		IR_w.num_channels = 1
		IR_w.audio = np.reshape(foa_IR.audio[:, 0], (-1, 1))

		IR_x = copy(foa_IR)
		IR_x.name = "MONO"
		IR_x.num_channels = 1
		IR_x.audio = np.reshape(foa_IR.audio[:, 1], (-1, 1))

		IR_y = copy(foa_IR)
		IR_y.name = "MONO"
		IR_y.num_channels = 1
		IR_y.audio = np.reshape(foa_IR.audio[:, 2], (-1, 1))

		IR_z = copy(foa_IR)
		IR_z.name = "MONO"
		IR_z.num_channels = 1
		IR_z.audio = np.reshape(foa_IR.audio[:, 3], (-1, 1))

		# calculate the scaling (multiplicative) factor such that the maximum gain of the IR filter across all frequencies is 0dB
		if align is None:
		H = fft(foa_IR.audio, axis=0)
		align = 1.0 / np.max(np.abs(H))

		# convolve mono input with left and right IR
		y_w = filter_one(input, IR_w, align=align)
		y_x = filter_one(input, IR_x, align=align)
		y_y = filter_one(input, IR_y, align=align)
		y_z = filter_one(input, IR_z, align=align)

		# combine into foa output
		y = copy(input)
		y.name = "FOA"
		y.num_channels = 4
		y.audio = np.column_stack([y_w.audio, y_x.audio, y_y.audio, y_z.audio])

		return y


		def generate_foa_items(
		cfg: config.TestConfig,
		@@ -96,7 +203,7 @@ def generate_foa_items(
		N_sources = len(np.atleast_1d(scene["source"]))

		# read the IR (check if foa or two mono files were provided)
		source_IR = np.atleast_1d(scene["IR"])
		# source_IR = np.atleast_1d(scene["IR"])

		# read the overlap length
		if "overlap" in scene.keys():
		@@ -111,7 +218,7 @@ def generate_foa_items(
		print("source file: {}".format(source_file))
		IR_file = np.atleast_1d(scene["IR"])[i]

		logger.info(f"Convolving {source_file} with {source_IR}")
		logger.info(f"Convolving {source_file} with {IR_file}")

		# read source file
		x = audio.fromfile(
		@@ -124,10 +231,10 @@ def generate_foa_items(
		)

		# convolve with FOA IR
		x = reverb_foa(x, IR)
		x = filter_foa(x, IR)

		# adjust the level of the foa signal
		_, scale_factor, _ = get_loudness(x, cfg.loudness, "FOA")
		_, scale_factor, _ = get_loudness(x, cfg.loudness, "BINAURAL")
		x.audio *= scale_factor

		# shift the second (and all other) source files (positive shift creates overlap, negative shift creates a gap)