diff --git a/ivas_processing_scripts/__init__.py b/ivas_processing_scripts/__init__.py index 98fa796b8e7bedbc75ed2d515555331f7f42f89d..243ac32b672fe210b9260fd203850d248af32a30 100755 --- a/ivas_processing_scripts/__init__.py +++ b/ivas_processing_scripts/__init__.py @@ -156,13 +156,7 @@ def main(args): # save process info to revert it later cfg.pre2 = cfg.proc_chains[0]["processes"][0] # preprocess background noise - if ( - hasattr(cfg, "preprocessing") - and hasattr(cfg.pre2, "background_noise") - and cfg.pre2.background_noise is not None - and cfg.pre2.background_noise.get("background_noise_path") - ): - preprocess_background_noise(cfg) + preprocess_background_noise(cfg) # preprocess 2 preprocess_2(cfg, logger) diff --git a/ivas_processing_scripts/audiotools/convert/__init__.py b/ivas_processing_scripts/audiotools/convert/__init__.py index 88654b6c13d7e74c3b2bb1cc90dc4312b191eb28..d7d612af5d403858b4b53dabf2a494cb92eb8852 100755 --- a/ivas_processing_scripts/audiotools/convert/__init__.py +++ b/ivas_processing_scripts/audiotools/convert/__init__.py @@ -427,7 +427,7 @@ def format_conversion( if fmt.startswith("MASA"): output.metadata_file = input.metadata_file elif fmt.startswith("ISM"): # also includes combined formats - output.metadata_files = list(output.metadata_files) + output.metadata_files = list(input.metadata_files) else: if isinstance(input, audio.BinauralAudio): diff --git a/ivas_processing_scripts/audiotools/wrappers/filter.py b/ivas_processing_scripts/audiotools/wrappers/filter.py index 062592ed0b4e32d1cf8ad84f4d63a96db51397db..3f6ce956d9c5625079eac2183c3fc38219e343f3 100755 --- a/ivas_processing_scripts/audiotools/wrappers/filter.py +++ b/ivas_processing_scripts/audiotools/wrappers/filter.py @@ -32,6 +32,7 @@ import re from copy import deepcopy +from itertools import repeat from pathlib import Path from tempfile import TemporaryDirectory from typing import Optional @@ -43,11 +44,39 @@ from ivas_processing_scripts.audiotools.audio import Audio, ChannelBasedAudio from ivas_processing_scripts.audiotools.audioarray import delay_compensation, pad_delay from ivas_processing_scripts.audiotools.audiofile import read, write from ivas_processing_scripts.constants import DEFAULT_CONFIG_BINARIES -from ivas_processing_scripts.utils import find_binary, run +from ivas_processing_scripts.utils import apply_func_parallel, find_binary, run FILTER_TYPES_REGEX = r"[\n][\s]{3}[A-Z0-9]\w+\s+" +def run_filter( + cmd_base: list[str], + audio: np.ndarray, + fs: int, + chan: int, + skip_channel: list[int], + block_size: Optional[int] = None, +): + if chan in skip_channel: + return audio + + with TemporaryDirectory() as tmp_dir: + tmp_dir = Path(tmp_dir) + tmp_in = tmp_dir.joinpath(f"tmpFilterOut_{chan}.pcm") + tmp_out = tmp_dir.joinpath(f"tmpFilterIn_{chan}.pcm") + + cmd = cmd_base.copy() + cmd.append(str(tmp_in)) + cmd.append(str(tmp_out)) + if block_size: + cmd.append(str(block_size)) + + write(tmp_in, audio, fs) + run(cmd) + out, _ = read(tmp_out, nchannels=1, fs=fs) + return out + + def filter_itu( input: Audio, flt_type: str, @@ -155,32 +184,24 @@ def filter_itu( # normal filtering -> size remains output = np.zeros_like(input.audio) - with TemporaryDirectory() as tmp_dir: - tmp_dir = Path(tmp_dir) - - # process channels separately - for channel in range(input.num_channels): - if skip_channel and channel in skip_channel: - output[:, channel] = input.audio[:, channel] - continue - - cmd_in_out = cmd.copy() - - tmp_in = tmp_dir.joinpath(f"tmp_filterIn{channel}.pcm") - tmp_out = tmp_dir.joinpath(f"tmp_filterOut{channel}.pcm") - - cmd_in_out.append(str(tmp_in)) - cmd_in_out.append(str(tmp_out)) - - if block_size: - cmd_in_out.append(str(block_size)) - - write(tmp_in, input.audio[:, channel], input.fs) - - run(cmd_in_out) - - a, _ = read(tmp_out, nchannels=1, fs=input.fs) - output[:, channel][:, None] = a + # make sure this is an empty list + skip_channel = skip_channel or [] + + filtered = apply_func_parallel( + run_filter, + zip( + repeat(cmd), + [input.audio[:, ch] for ch in range(input.num_channels)], + repeat(input.fs), + range(input.num_channels), + repeat(skip_channel), + repeat(block_size), + ), + show_progress=False, + ) + + for ch, filt in enumerate(filtered): + output[:, [ch]] = filt.reshape(-1, 1) return output diff --git a/ivas_processing_scripts/processing/preprocessing_2.py b/ivas_processing_scripts/processing/preprocessing_2.py index 2402a856ef519add7d89e1db535ab96c3af867d9..2a4bef674fb0eebd257f9bbab50ef8d0b4a2384a 100644 --- a/ivas_processing_scripts/processing/preprocessing_2.py +++ b/ivas_processing_scripts/processing/preprocessing_2.py @@ -135,7 +135,7 @@ class Preprocessing2(Processing): raise ValueError( f"Background noise path {self.background_noise.get('background_noise_path')} does not exist" ) - # load background noise + # load preprocessed background noise if self.background_noise["background_object"] is not None: noise_object = self.background_noise["background_object"] else: diff --git a/ivas_processing_scripts/processing/processing.py b/ivas_processing_scripts/processing/processing.py index 64368954b9d9736b252db6bd1e0e1520cc66af41..f762ad38bdcb2f677f274f2e8e228dc28172e78a 100755 --- a/ivas_processing_scripts/processing/processing.py +++ b/ivas_processing_scripts/processing/processing.py @@ -569,7 +569,14 @@ def remove_preamble(x, out_fmt, fs, repeat_signal, preamble_len_ms, meta, logger def preprocess_background_noise(cfg): - # TODO: add checks and errors for sampling rate and number channels compared to input signals + if not ( + hasattr(cfg, "preprocessing") + and hasattr(cfg.pre2, "background_noise") + and cfg.pre2.background_noise is not None + and cfg.pre2.background_noise.get("background_noise_path") + ): + return + # create audio objects input_audio = audio.fromfile( cfg.input["fmt"], @@ -583,7 +590,10 @@ def preprocess_background_noise(cfg): else: out_fs = cfg.pre.out_fs - # only consider format conversion, resampling and high-pass filtering + if hasattr(input_audio, "object_pos"): + output_audio.object_pos = input_audio.object_pos + + # only apply format conversion, resampling and filtering convert( input=input_audio, output=output_audio, @@ -593,7 +603,6 @@ def preprocess_background_noise(cfg): ) # save result in cfg - # cfg.preprocessing_2["background_noise"].update({"background_object": output_audio}) cfg.proc_chains[0]["processes"][0].background_noise[ "background_object" ] = output_audio