Commit a6d90601 authored by Archit Tamarapu's avatar Archit Tamarapu
Browse files

Merge branch...

Merge branch '109-characterization-background-noise-mixing-for-osba-does-not-work-correctly' into 'main'

Resolve "[characterization] Background noise mixing for OSBA does not work correctly"

See merge request !215
parents 4a791e54 2ef8353c
Loading
Loading
Loading
Loading
+1 −7
Original line number Diff line number Diff line
@@ -156,12 +156,6 @@ def main(args):
            # save process info to revert it later
            cfg.pre2 = cfg.proc_chains[0]["processes"][0]
            # preprocess background noise
            if (
                hasattr(cfg, "preprocessing")
                and hasattr(cfg.pre2, "background_noise")
                and cfg.pre2.background_noise is not None
                and cfg.pre2.background_noise.get("background_noise_path")
            ):
            preprocess_background_noise(cfg)
            # preprocess 2
            preprocess_2(cfg, logger)
+1 −1
Original line number Diff line number Diff line
@@ -427,7 +427,7 @@ def format_conversion(
        if fmt.startswith("MASA"):
            output.metadata_file = input.metadata_file
        elif fmt.startswith("ISM"):  # also includes combined formats
            output.metadata_files = list(output.metadata_files)
            output.metadata_files = list(input.metadata_files)

    else:
        if isinstance(input, audio.BinauralAudio):
+48 −27
Original line number Diff line number Diff line
@@ -32,6 +32,7 @@

import re
from copy import deepcopy
from itertools import repeat
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Optional
@@ -43,11 +44,39 @@ from ivas_processing_scripts.audiotools.audio import Audio, ChannelBasedAudio
from ivas_processing_scripts.audiotools.audioarray import delay_compensation, pad_delay
from ivas_processing_scripts.audiotools.audiofile import read, write
from ivas_processing_scripts.constants import DEFAULT_CONFIG_BINARIES
from ivas_processing_scripts.utils import find_binary, run
from ivas_processing_scripts.utils import apply_func_parallel, find_binary, run

FILTER_TYPES_REGEX = r"[\n][\s]{3}[A-Z0-9]\w+\s+"


def run_filter(
    cmd_base: list[str],
    audio: np.ndarray,
    fs: int,
    chan: int,
    skip_channel: list[int],
    block_size: Optional[int] = None,
):
    if chan in skip_channel:
        return audio

    with TemporaryDirectory() as tmp_dir:
        tmp_dir = Path(tmp_dir)
        tmp_in = tmp_dir.joinpath(f"tmpFilterOut_{chan}.pcm")
        tmp_out = tmp_dir.joinpath(f"tmpFilterIn_{chan}.pcm")

        cmd = cmd_base.copy()
        cmd.append(str(tmp_in))
        cmd.append(str(tmp_out))
        if block_size:
            cmd.append(str(block_size))

        write(tmp_in, audio, fs)
        run(cmd)
        out, _ = read(tmp_out, nchannels=1, fs=fs)
    return out


def filter_itu(
    input: Audio,
    flt_type: str,
@@ -155,32 +184,24 @@ def filter_itu(
        # normal filtering -> size remains
        output = np.zeros_like(input.audio)

    with TemporaryDirectory() as tmp_dir:
        tmp_dir = Path(tmp_dir)

        # process channels separately
        for channel in range(input.num_channels):
            if skip_channel and channel in skip_channel:
                output[:, channel] = input.audio[:, channel]
                continue

            cmd_in_out = cmd.copy()

            tmp_in = tmp_dir.joinpath(f"tmp_filterIn{channel}.pcm")
            tmp_out = tmp_dir.joinpath(f"tmp_filterOut{channel}.pcm")

            cmd_in_out.append(str(tmp_in))
            cmd_in_out.append(str(tmp_out))

            if block_size:
                cmd_in_out.append(str(block_size))

            write(tmp_in, input.audio[:, channel], input.fs)

            run(cmd_in_out)
    # make sure this is an empty list
    skip_channel = skip_channel or []

    filtered = apply_func_parallel(
        run_filter,
        zip(
            repeat(cmd),
            [input.audio[:, ch] for ch in range(input.num_channels)],
            repeat(input.fs),
            range(input.num_channels),
            repeat(skip_channel),
            repeat(block_size),
        ),
        show_progress=False,
    )

            a, _ = read(tmp_out, nchannels=1, fs=input.fs)
            output[:, channel][:, None] = a
    for ch, filt in enumerate(filtered):
        output[:, [ch]] = filt.reshape(-1, 1)

    return output

+1 −1
Original line number Diff line number Diff line
@@ -135,7 +135,7 @@ class Preprocessing2(Processing):
                raise ValueError(
                    f"Background noise path {self.background_noise.get('background_noise_path')} does not exist"
                )
            # load background noise
            # load preprocessed background noise
            if self.background_noise["background_object"] is not None:
                noise_object = self.background_noise["background_object"]
            else:
+12 −3
Original line number Diff line number Diff line
@@ -569,7 +569,14 @@ def remove_preamble(x, out_fmt, fs, repeat_signal, preamble_len_ms, meta, logger


def preprocess_background_noise(cfg):
    # TODO: add checks and errors for sampling rate and number channels compared to input signals
    if not (
        hasattr(cfg, "preprocessing")
        and hasattr(cfg.pre2, "background_noise")
        and cfg.pre2.background_noise is not None
        and cfg.pre2.background_noise.get("background_noise_path")
    ):
        return

    # create audio objects
    input_audio = audio.fromfile(
        cfg.input["fmt"],
@@ -583,7 +590,10 @@ def preprocess_background_noise(cfg):
    else:
        out_fs = cfg.pre.out_fs

    # only consider format conversion, resampling and high-pass filtering
    if hasattr(input_audio, "object_pos"):
        output_audio.object_pos = input_audio.object_pos

    # only apply format conversion, resampling and filtering
    convert(
        input=input_audio,
        output=output_audio,
@@ -593,7 +603,6 @@ def preprocess_background_noise(cfg):
    )

    # save result in cfg
    # cfg.preprocessing_2["background_noise"].update({"background_object": output_audio})
    cfg.proc_chains[0]["processes"][0].background_noise[
        "background_object"
    ] = output_audio