Commit f141f817 authored by PLAINSI's avatar PLAINSI
Browse files

Next draft

parent 42bbe28d
Loading
Loading
Loading
Loading
+6 −6
Original line number Diff line number Diff line
@@ -56,9 +56,9 @@ scenes:
        IR: ["AmbiX_036x033y_0.01m.wav", "AmbiX_-70x035y_0.01m.wav"]
        overlap: 1.0
        
    a2: 
        name: "G6S2.wav"
        description: "Two speakers sitting in a car."
        source: ["fa1.wav", "ma1.wav"]
        IR: ["AmbiX_036x033y_0.01m.wav", "AmbiX_-70x035y_0.01m.wav"]
        overlap: 1.0
    #a2: 
    #    name: "G6S2.wav"
    #    description: "Two speakers sitting in a car."
    #    source: ["fa1.wav", "ma1.wav"]
    #    IR: ["AmbiX_036x033y_0.01m.wav", "AmbiX_-70x035y_0.01m.wav"]
    #    overlap: 1.0
+0 −66
Original line number Diff line number Diff line
@@ -184,69 +184,3 @@ def reverb_stereo(
    y.audio = np.column_stack([y_left.audio, y_right.audio])

    return y

def reverb_foa(
    input: Audio,
    foa_IR: Audio,
    align: Optional[float] = None,
) -> Audio:
    """
    Wrapper for the ITU-T reverb binary to convolve mono audio signal with an FOA impulse response

    Parameters
    ----------
    input: Audio
        Input audio signal
    IR: Audio
        Impulse response
    align: float
         multiplicative factor to apply to the reverberated sound in order to align its energy level with the second file

    Returns
    -------
    output: Audio
        Convolved audio signal with FOA IR
    """

    # convert to float32
    foa_IR.audio = np.float32(foa_IR.audio)

    # separate into each channel
    IR_w = copy(foa_IR)
    IR_w.name = "MONO"
    IR_w.num_channels = 1
    IR_w.audio = np.reshape(foa_IR.audio[:, 0], (-1, 1))

    IR_x = copy(foa_IR)
    IR_x.name = "MONO"
    IR_x.num_channels = 1
    IR_x.audio = np.reshape(foa_IR.audio[:, 1], (-1, 1))

    IR_y = copy(foa_IR)
    IR_y.name = "MONO"
    IR_y.num_channels = 1
    IR_y.audio = np.reshape(foa_IR.audio[:, 2], (-1, 1))

    IR_z = copy(foa_IR)
    IR_z.name = "MONO"
    IR_z.num_channels = 1
    IR_z.audio = np.reshape(foa_IR.audio[:, 3], (-1, 1))

    # calculate the scaling (multiplicative) factor such that the maximum gain of the IR filter across all frequencies is 0dB
    if align is None:
        H = fft(foa_IR.audio, axis=0)
        align = 1.0 / np.max(np.abs(H))

    # convolve mono input with left and right IR
    y_w = reverb(input, IR_w, align=align)
    y_x = reverb(input, IR_x, align=align)
    y_y = reverb(input, IR_y, align=align)
    y_z = reverb(input, IR_z, align=align)

    # combine into foa output
    y = copy(input)
    y.name = "FOA"
    y.num_channels = 4
    y.audio = np.column_stack([y_w.audio, y_x.audio, y_y.audio, y_z.audio])

    return y
+112 −5
Original line number Diff line number Diff line
@@ -32,13 +32,18 @@

import logging
import os
from copy import copy

from math import floor
from typing import Optional

import numpy as np
import scipy.signal as ssg
from scipy.fft import fft

from ivas_processing_scripts.audiotools.audio import Audio
from ivas_processing_scripts.audiotools import audio, audiofile
from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness
from ivas_processing_scripts.audiotools.wrappers.reverb import reverb_foa
from ivas_processing_scripts.generation import config

SEED_RANDOM_NOISE = 0
@@ -49,6 +54,108 @@ def csv_formatdata(data):
    for row in data:
        yield ["%0.2f" % v for v in row]

def filter_one(
    input: Audio,
    IR: Audio,
    align: Optional[float] = None,
) -> Audio:
    """
    Parameters
    ----------
    input: Audio
        Input audio signal
    IR: Audio
        Impulse response
    align: float
         multiplicative factor to apply to the reverberated sound in order to align its energy level with a second filePath to the output file

    Returns
    -------
    output: Audio
        Convolved audio signal with IR
    """

    # resample IR to input signal
    tmp_IR = copy(IR)
    if input.fs != IR.fs:
        tmp_IR.audio = ssg.resample_poly(IR.audio, input.fs, IR.fs) # resample_itu(tmp_input, IR.fs)
        tmp_IR.fs = input.fs

    # down-scale IR to prevent saturation
    # max_value = np.max(np.abs(IR.audio))
    # if max_value > 1.0:
    # IR.audio = IR.audio / max_value

    tmp_IR.audio = tmp_IR.audio * align
    output = copy(input)
    output.audio[0] = ssg.lfilter(tmp_IR.audio[0],1,input.audio[0])

    return output

def filter_foa(
    input: Audio,
    foa_IR: Audio,
    align: Optional[float] = None,
) -> Audio:
    """
    Parameters
    ----------
    input: Audio
        Input audio signal
    IR: Audio
        Impulse response
    align: float
         multiplicative factor to apply to the reverberated sound in order to align its energy level with the second file

    Returns
    -------
    output: Audio
        Convolved audio signal with FOA IR
    """

    # convert to float32
    foa_IR.audio = np.float32(foa_IR.audio)

    # separate into each channel
    IR_w = copy(foa_IR)
    IR_w.name = "MONO"
    IR_w.num_channels = 1
    IR_w.audio = np.reshape(foa_IR.audio[:, 0], (-1, 1))

    IR_x = copy(foa_IR)
    IR_x.name = "MONO"
    IR_x.num_channels = 1
    IR_x.audio = np.reshape(foa_IR.audio[:, 1], (-1, 1))

    IR_y = copy(foa_IR)
    IR_y.name = "MONO"
    IR_y.num_channels = 1
    IR_y.audio = np.reshape(foa_IR.audio[:, 2], (-1, 1))

    IR_z = copy(foa_IR)
    IR_z.name = "MONO"
    IR_z.num_channels = 1
    IR_z.audio = np.reshape(foa_IR.audio[:, 3], (-1, 1))

    # calculate the scaling (multiplicative) factor such that the maximum gain of the IR filter across all frequencies is 0dB
    if align is None:
        H = fft(foa_IR.audio, axis=0)
        align = 1.0 / np.max(np.abs(H))

    # convolve mono input with left and right IR
    y_w = filter_one(input, IR_w, align=align)
    y_x = filter_one(input, IR_x, align=align)
    y_y = filter_one(input, IR_y, align=align)
    y_z = filter_one(input, IR_z, align=align)

    # combine into foa output
    y = copy(input)
    y.name = "FOA"
    y.num_channels = 4
    y.audio = np.column_stack([y_w.audio, y_x.audio, y_y.audio, y_z.audio])

    return y


def generate_foa_items(
    cfg: config.TestConfig,
@@ -96,7 +203,7 @@ def generate_foa_items(
        N_sources = len(np.atleast_1d(scene["source"]))

        # read the IR (check if foa or two mono files were provided)
        source_IR = np.atleast_1d(scene["IR"])
        # source_IR = np.atleast_1d(scene["IR"])

        # read the overlap length
        if "overlap" in scene.keys():
@@ -111,7 +218,7 @@ def generate_foa_items(
            print("source file: {}".format(source_file))
            IR_file = np.atleast_1d(scene["IR"])[i]

            logger.info(f"Convolving {source_file} with {source_IR}")
            logger.info(f"Convolving {source_file} with {IR_file}")

            # read source file
            x = audio.fromfile(
@@ -124,10 +231,10 @@ def generate_foa_items(
            )

            # convolve with FOA IR
            x = reverb_foa(x, IR)
            x = filter_foa(x, IR)

            # adjust the level of the foa signal
            _, scale_factor, _ = get_loudness(x, cfg.loudness, "FOA")
            _, scale_factor, _ = get_loudness(x, cfg.loudness, "BINAURAL")
            x.audio *= scale_factor

            # shift the second (and all other) source files (positive shift creates overlap, negative shift creates a gap)