Fix lfilter input dimensions (7074a592) · Commits · IVAS Codec Public Collaboration / IVAS Processing Scripts

item_gen_configs/FOA_CONFIG.yml

+21 −6

Original line number	Diff line number	Diff line
		@@ -56,9 +56,24 @@ scenes:
		IR: ["AmbiX_036x033y_0.01m.wav", "AmbiX_-70x035y_0.01m.wav"]
		overlap: 1.0

		#a2:
		# name: "G6S2.wav"
		# description: "Two speakers sitting in a car."
		# source: ["fa1.wav", "ma1.wav"]
		# IR: ["AmbiX_036x033y_0.01m.wav", "AmbiX_-70x035y_0.01m.wav"]
		# overlap: 1.0
		a2:
		name: "G6S2.wav"
		description: "Two speakers sitting in a car."
		source: ["fa1.wav", "ma1.wav"]
		IR: ["AmbiX_-70x035y_0.01m.wav", "AmbiX_036x033y_0.01m.wav"]
		overlap: 1.0

		a3:
		name: "G2S1.wav"
		description: "Two speakers sitting in a car."
		source: ["fa1.wav", "ma1.wav"]
		IR: ["AmbiX_000x005y_0.1m.wav", "AmbiX_022x-38y_0.01m.wav"]
		overlap: -1.0

		a4:
		name: "G2S2.wav"
		description: "Two speakers sitting in a car."
		source: ["fa1.wav", "ma1.wav"]
		IR: ["AmbiX_022x-38y_0.01m.wav", "AmbiX_000x005y_0.1m.wav"]
		overlap: -1.0

ivas_processing_scripts/generation/process_foa_items.py

+7 −6

Original line number	Diff line number	Diff line
		@@ -88,7 +88,9 @@ def filter_one(

		tmp_IR.audio = tmp_IR.audio * align
		output = copy(input)
		output.audio[0] = ssg.lfilter(tmp_IR.audio[0],1,input.audio[0])
		intranspose = input.audio.transpose()
		outfilt = ssg.lfilter(tmp_IR.audio,[1.0],intranspose)
		output.audio = outfilt.transpose()

		return output

		@@ -120,22 +122,22 @@ def filter_foa(
		IR_w = copy(foa_IR)
		IR_w.name = "MONO"
		IR_w.num_channels = 1
		IR_w.audio = np.reshape(foa_IR.audio[:, 0], (-1, 1))
		IR_w.audio = foa_IR.audio[:, 0] # np.reshape(foa_IR.audio[:, 0], (-1, 1))

		IR_x = copy(foa_IR)
		IR_x.name = "MONO"
		IR_x.num_channels = 1
		IR_x.audio = np.reshape(foa_IR.audio[:, 1], (-1, 1))
		IR_x.audio = foa_IR.audio[:, 1] # np.reshape(foa_IR.audio[:, 1], (-1, 1))

		IR_y = copy(foa_IR)
		IR_y.name = "MONO"
		IR_y.num_channels = 1
		IR_y.audio = np.reshape(foa_IR.audio[:, 2], (-1, 1))
		IR_y.audio = foa_IR.audio[:, 2] # np.reshape(foa_IR.audio[:, 2], (-1, 1))

		IR_z = copy(foa_IR)
		IR_z.name = "MONO"
		IR_z.num_channels = 1
		IR_z.audio = np.reshape(foa_IR.audio[:, 3], (-1, 1))
		IR_z.audio = foa_IR.audio[:, 3] # np.reshape(foa_IR.audio[:, 3], (-1, 1))

		# calculate the scaling (multiplicative) factor such that the maximum gain of the IR filter across all frequencies is 0dB
		if align is None:
		@@ -215,7 +217,6 @@ def generate_foa_items(
		for i in range(N_sources):
		# parse parameters from the scene description
		source_file = np.atleast_1d(scene["source"])[i]
		print("source file: {}".format(source_file))
		IR_file = np.atleast_1d(scene["IR"])[i]

		logger.info(f"Convolving {source_file} with {IR_file}")