Loading item_gen_configs/FOA_CONFIG.yml +6 −6 Original line number Diff line number Diff line Loading @@ -56,9 +56,9 @@ scenes: IR: ["AmbiX_036x033y_0.01m.wav", "AmbiX_-70x035y_0.01m.wav"] overlap: 1.0 a2: name: "G6S2.wav" description: "Two speakers sitting in a car." source: ["fa1.wav", "ma1.wav"] IR: ["AmbiX_036x033y_0.01m.wav", "AmbiX_-70x035y_0.01m.wav"] overlap: 1.0 #a2: # name: "G6S2.wav" # description: "Two speakers sitting in a car." # source: ["fa1.wav", "ma1.wav"] # IR: ["AmbiX_036x033y_0.01m.wav", "AmbiX_-70x035y_0.01m.wav"] # overlap: 1.0 ivas_processing_scripts/audiotools/wrappers/reverb.py +0 −66 Original line number Diff line number Diff line Loading @@ -184,69 +184,3 @@ def reverb_stereo( y.audio = np.column_stack([y_left.audio, y_right.audio]) return y def reverb_foa( input: Audio, foa_IR: Audio, align: Optional[float] = None, ) -> Audio: """ Wrapper for the ITU-T reverb binary to convolve mono audio signal with an FOA impulse response Parameters ---------- input: Audio Input audio signal IR: Audio Impulse response align: float multiplicative factor to apply to the reverberated sound in order to align its energy level with the second file Returns ------- output: Audio Convolved audio signal with FOA IR """ # convert to float32 foa_IR.audio = np.float32(foa_IR.audio) # separate into each channel IR_w = copy(foa_IR) IR_w.name = "MONO" IR_w.num_channels = 1 IR_w.audio = np.reshape(foa_IR.audio[:, 0], (-1, 1)) IR_x = copy(foa_IR) IR_x.name = "MONO" IR_x.num_channels = 1 IR_x.audio = np.reshape(foa_IR.audio[:, 1], (-1, 1)) IR_y = copy(foa_IR) IR_y.name = "MONO" IR_y.num_channels = 1 IR_y.audio = np.reshape(foa_IR.audio[:, 2], (-1, 1)) IR_z = copy(foa_IR) IR_z.name = "MONO" IR_z.num_channels = 1 IR_z.audio = np.reshape(foa_IR.audio[:, 3], (-1, 1)) # calculate the scaling (multiplicative) factor such that the maximum gain of the IR filter across all frequencies is 0dB if align is None: H = fft(foa_IR.audio, axis=0) align = 1.0 / np.max(np.abs(H)) # convolve mono input with left and right IR y_w = reverb(input, IR_w, align=align) y_x = reverb(input, IR_x, align=align) y_y = reverb(input, IR_y, align=align) y_z = reverb(input, IR_z, align=align) # combine into foa output y = copy(input) y.name = "FOA" y.num_channels = 4 y.audio = np.column_stack([y_w.audio, y_x.audio, y_y.audio, y_z.audio]) return y ivas_processing_scripts/generation/process_foa_items.py +112 −5 Original line number Diff line number Diff line Loading @@ -32,13 +32,18 @@ import logging import os from copy import copy from math import floor from typing import Optional import numpy as np import scipy.signal as ssg from scipy.fft import fft from ivas_processing_scripts.audiotools.audio import Audio from ivas_processing_scripts.audiotools import audio, audiofile from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness from ivas_processing_scripts.audiotools.wrappers.reverb import reverb_foa from ivas_processing_scripts.generation import config SEED_RANDOM_NOISE = 0 Loading @@ -49,6 +54,108 @@ def csv_formatdata(data): for row in data: yield ["%0.2f" % v for v in row] def filter_one( input: Audio, IR: Audio, align: Optional[float] = None, ) -> Audio: """ Parameters ---------- input: Audio Input audio signal IR: Audio Impulse response align: float multiplicative factor to apply to the reverberated sound in order to align its energy level with a second filePath to the output file Returns ------- output: Audio Convolved audio signal with IR """ # resample IR to input signal tmp_IR = copy(IR) if input.fs != IR.fs: tmp_IR.audio = ssg.resample_poly(IR.audio, input.fs, IR.fs) # resample_itu(tmp_input, IR.fs) tmp_IR.fs = input.fs # down-scale IR to prevent saturation # max_value = np.max(np.abs(IR.audio)) # if max_value > 1.0: # IR.audio = IR.audio / max_value tmp_IR.audio = tmp_IR.audio * align output = copy(input) output.audio[0] = ssg.lfilter(tmp_IR.audio[0],1,input.audio[0]) return output def filter_foa( input: Audio, foa_IR: Audio, align: Optional[float] = None, ) -> Audio: """ Parameters ---------- input: Audio Input audio signal IR: Audio Impulse response align: float multiplicative factor to apply to the reverberated sound in order to align its energy level with the second file Returns ------- output: Audio Convolved audio signal with FOA IR """ # convert to float32 foa_IR.audio = np.float32(foa_IR.audio) # separate into each channel IR_w = copy(foa_IR) IR_w.name = "MONO" IR_w.num_channels = 1 IR_w.audio = np.reshape(foa_IR.audio[:, 0], (-1, 1)) IR_x = copy(foa_IR) IR_x.name = "MONO" IR_x.num_channels = 1 IR_x.audio = np.reshape(foa_IR.audio[:, 1], (-1, 1)) IR_y = copy(foa_IR) IR_y.name = "MONO" IR_y.num_channels = 1 IR_y.audio = np.reshape(foa_IR.audio[:, 2], (-1, 1)) IR_z = copy(foa_IR) IR_z.name = "MONO" IR_z.num_channels = 1 IR_z.audio = np.reshape(foa_IR.audio[:, 3], (-1, 1)) # calculate the scaling (multiplicative) factor such that the maximum gain of the IR filter across all frequencies is 0dB if align is None: H = fft(foa_IR.audio, axis=0) align = 1.0 / np.max(np.abs(H)) # convolve mono input with left and right IR y_w = filter_one(input, IR_w, align=align) y_x = filter_one(input, IR_x, align=align) y_y = filter_one(input, IR_y, align=align) y_z = filter_one(input, IR_z, align=align) # combine into foa output y = copy(input) y.name = "FOA" y.num_channels = 4 y.audio = np.column_stack([y_w.audio, y_x.audio, y_y.audio, y_z.audio]) return y def generate_foa_items( cfg: config.TestConfig, Loading Loading @@ -96,7 +203,7 @@ def generate_foa_items( N_sources = len(np.atleast_1d(scene["source"])) # read the IR (check if foa or two mono files were provided) source_IR = np.atleast_1d(scene["IR"]) # source_IR = np.atleast_1d(scene["IR"]) # read the overlap length if "overlap" in scene.keys(): Loading @@ -111,7 +218,7 @@ def generate_foa_items( print("source file: {}".format(source_file)) IR_file = np.atleast_1d(scene["IR"])[i] logger.info(f"Convolving {source_file} with {source_IR}") logger.info(f"Convolving {source_file} with {IR_file}") # read source file x = audio.fromfile( Loading @@ -124,10 +231,10 @@ def generate_foa_items( ) # convolve with FOA IR x = reverb_foa(x, IR) x = filter_foa(x, IR) # adjust the level of the foa signal _, scale_factor, _ = get_loudness(x, cfg.loudness, "FOA") _, scale_factor, _ = get_loudness(x, cfg.loudness, "BINAURAL") x.audio *= scale_factor # shift the second (and all other) source files (positive shift creates overlap, negative shift creates a gap) Loading Loading
item_gen_configs/FOA_CONFIG.yml +6 −6 Original line number Diff line number Diff line Loading @@ -56,9 +56,9 @@ scenes: IR: ["AmbiX_036x033y_0.01m.wav", "AmbiX_-70x035y_0.01m.wav"] overlap: 1.0 a2: name: "G6S2.wav" description: "Two speakers sitting in a car." source: ["fa1.wav", "ma1.wav"] IR: ["AmbiX_036x033y_0.01m.wav", "AmbiX_-70x035y_0.01m.wav"] overlap: 1.0 #a2: # name: "G6S2.wav" # description: "Two speakers sitting in a car." # source: ["fa1.wav", "ma1.wav"] # IR: ["AmbiX_036x033y_0.01m.wav", "AmbiX_-70x035y_0.01m.wav"] # overlap: 1.0
ivas_processing_scripts/audiotools/wrappers/reverb.py +0 −66 Original line number Diff line number Diff line Loading @@ -184,69 +184,3 @@ def reverb_stereo( y.audio = np.column_stack([y_left.audio, y_right.audio]) return y def reverb_foa( input: Audio, foa_IR: Audio, align: Optional[float] = None, ) -> Audio: """ Wrapper for the ITU-T reverb binary to convolve mono audio signal with an FOA impulse response Parameters ---------- input: Audio Input audio signal IR: Audio Impulse response align: float multiplicative factor to apply to the reverberated sound in order to align its energy level with the second file Returns ------- output: Audio Convolved audio signal with FOA IR """ # convert to float32 foa_IR.audio = np.float32(foa_IR.audio) # separate into each channel IR_w = copy(foa_IR) IR_w.name = "MONO" IR_w.num_channels = 1 IR_w.audio = np.reshape(foa_IR.audio[:, 0], (-1, 1)) IR_x = copy(foa_IR) IR_x.name = "MONO" IR_x.num_channels = 1 IR_x.audio = np.reshape(foa_IR.audio[:, 1], (-1, 1)) IR_y = copy(foa_IR) IR_y.name = "MONO" IR_y.num_channels = 1 IR_y.audio = np.reshape(foa_IR.audio[:, 2], (-1, 1)) IR_z = copy(foa_IR) IR_z.name = "MONO" IR_z.num_channels = 1 IR_z.audio = np.reshape(foa_IR.audio[:, 3], (-1, 1)) # calculate the scaling (multiplicative) factor such that the maximum gain of the IR filter across all frequencies is 0dB if align is None: H = fft(foa_IR.audio, axis=0) align = 1.0 / np.max(np.abs(H)) # convolve mono input with left and right IR y_w = reverb(input, IR_w, align=align) y_x = reverb(input, IR_x, align=align) y_y = reverb(input, IR_y, align=align) y_z = reverb(input, IR_z, align=align) # combine into foa output y = copy(input) y.name = "FOA" y.num_channels = 4 y.audio = np.column_stack([y_w.audio, y_x.audio, y_y.audio, y_z.audio]) return y
ivas_processing_scripts/generation/process_foa_items.py +112 −5 Original line number Diff line number Diff line Loading @@ -32,13 +32,18 @@ import logging import os from copy import copy from math import floor from typing import Optional import numpy as np import scipy.signal as ssg from scipy.fft import fft from ivas_processing_scripts.audiotools.audio import Audio from ivas_processing_scripts.audiotools import audio, audiofile from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness from ivas_processing_scripts.audiotools.wrappers.reverb import reverb_foa from ivas_processing_scripts.generation import config SEED_RANDOM_NOISE = 0 Loading @@ -49,6 +54,108 @@ def csv_formatdata(data): for row in data: yield ["%0.2f" % v for v in row] def filter_one( input: Audio, IR: Audio, align: Optional[float] = None, ) -> Audio: """ Parameters ---------- input: Audio Input audio signal IR: Audio Impulse response align: float multiplicative factor to apply to the reverberated sound in order to align its energy level with a second filePath to the output file Returns ------- output: Audio Convolved audio signal with IR """ # resample IR to input signal tmp_IR = copy(IR) if input.fs != IR.fs: tmp_IR.audio = ssg.resample_poly(IR.audio, input.fs, IR.fs) # resample_itu(tmp_input, IR.fs) tmp_IR.fs = input.fs # down-scale IR to prevent saturation # max_value = np.max(np.abs(IR.audio)) # if max_value > 1.0: # IR.audio = IR.audio / max_value tmp_IR.audio = tmp_IR.audio * align output = copy(input) output.audio[0] = ssg.lfilter(tmp_IR.audio[0],1,input.audio[0]) return output def filter_foa( input: Audio, foa_IR: Audio, align: Optional[float] = None, ) -> Audio: """ Parameters ---------- input: Audio Input audio signal IR: Audio Impulse response align: float multiplicative factor to apply to the reverberated sound in order to align its energy level with the second file Returns ------- output: Audio Convolved audio signal with FOA IR """ # convert to float32 foa_IR.audio = np.float32(foa_IR.audio) # separate into each channel IR_w = copy(foa_IR) IR_w.name = "MONO" IR_w.num_channels = 1 IR_w.audio = np.reshape(foa_IR.audio[:, 0], (-1, 1)) IR_x = copy(foa_IR) IR_x.name = "MONO" IR_x.num_channels = 1 IR_x.audio = np.reshape(foa_IR.audio[:, 1], (-1, 1)) IR_y = copy(foa_IR) IR_y.name = "MONO" IR_y.num_channels = 1 IR_y.audio = np.reshape(foa_IR.audio[:, 2], (-1, 1)) IR_z = copy(foa_IR) IR_z.name = "MONO" IR_z.num_channels = 1 IR_z.audio = np.reshape(foa_IR.audio[:, 3], (-1, 1)) # calculate the scaling (multiplicative) factor such that the maximum gain of the IR filter across all frequencies is 0dB if align is None: H = fft(foa_IR.audio, axis=0) align = 1.0 / np.max(np.abs(H)) # convolve mono input with left and right IR y_w = filter_one(input, IR_w, align=align) y_x = filter_one(input, IR_x, align=align) y_y = filter_one(input, IR_y, align=align) y_z = filter_one(input, IR_z, align=align) # combine into foa output y = copy(input) y.name = "FOA" y.num_channels = 4 y.audio = np.column_stack([y_w.audio, y_x.audio, y_y.audio, y_z.audio]) return y def generate_foa_items( cfg: config.TestConfig, Loading Loading @@ -96,7 +203,7 @@ def generate_foa_items( N_sources = len(np.atleast_1d(scene["source"])) # read the IR (check if foa or two mono files were provided) source_IR = np.atleast_1d(scene["IR"]) # source_IR = np.atleast_1d(scene["IR"]) # read the overlap length if "overlap" in scene.keys(): Loading @@ -111,7 +218,7 @@ def generate_foa_items( print("source file: {}".format(source_file)) IR_file = np.atleast_1d(scene["IR"])[i] logger.info(f"Convolving {source_file} with {source_IR}") logger.info(f"Convolving {source_file} with {IR_file}") # read source file x = audio.fromfile( Loading @@ -124,10 +231,10 @@ def generate_foa_items( ) # convolve with FOA IR x = reverb_foa(x, IR) x = filter_foa(x, IR) # adjust the level of the foa signal _, scale_factor, _ = get_loudness(x, cfg.loudness, "FOA") _, scale_factor, _ = get_loudness(x, cfg.loudness, "BINAURAL") x.audio *= scale_factor # shift the second (and all other) source files (positive shift creates overlap, negative shift creates a gap) Loading