Commit fcf3171a authored by Anika Treffehn's avatar Anika Treffehn
Browse files

added optional key for padding the input signals

parent 04d03d3a
Loading
Loading
Loading
Loading
Loading
+4 −7
Original line number Diff line number Diff line
@@ -65,13 +65,10 @@ input:
    fmt: "HOA3"
    ### Input sampling rate in Hz needed for headerless audio files; default = 48000
    # fs: 32000
    ### Enable check for input files being aligned to a integer multiple of a given length in ms.
    ### If a file is not aligned, a warning will be issued. If the input format has metadata or force is true, an error is raised instead.
    # aligned_to:
    ### alignment length in ms, is needed if aligned_to is used
    #     len: 20
    ### default: false
    #     force: true
    ### Treatment of items that are not 20ms aligned
    ### options: "padding" (pads with low level noise to frame length), "warning" or "error" (raises a warning or an error when items are not aligned),
    ### "ignore" (no special treatment for items that are not aligned); default = "padding" (for ISM: default = "error")
    # frame_alignment: "error"

################################################
### Pre-processing on individual items
+3 −0
Original line number Diff line number Diff line
@@ -63,6 +63,9 @@ DEFAULT_CONFIG = {
    "master_seed": 0,
    "prerun_seed": 0,
    "metadata_path": None,
    "input": {
        "frame_alignment": "padding",
    },
    # postprocessing
    "postprocessing": {
        "mask": None,
+56 −14
Original line number Diff line number Diff line
@@ -32,9 +32,10 @@

from typing import Optional
from warnings import warn
from shutil import copyfile

from ivas_processing_scripts.audiotools import audio
from ivas_processing_scripts.audiotools.audiofile import read
from ivas_processing_scripts.audiotools.audiofile import read, write
from ivas_processing_scripts.processing.config import TestConfig
from ivas_processing_scripts.processing.evs import EVS
from ivas_processing_scripts.processing.ivas import IVAS
@@ -44,6 +45,7 @@ from ivas_processing_scripts.processing.preprocessing_2 import Preprocessing2
from ivas_processing_scripts.processing.processing_splitting_scaling import (
    Processing_splitting_scaling,
)
from ivas_processing_scripts.audiotools.audioarray import trim
from ivas_processing_scripts.utils import get_abs_path, list_audio


@@ -521,7 +523,23 @@ def validate_input_files(cfg: TestConfig):
    input_format = cfg.input["fmt"]
    num_chan_expected = audio.fromtype(input_format).num_channels

    for item in cfg.items_list:
    frame_alignment = cfg.input["frame_alignment"]

    # always throw an error for ISM (and MASA) input
    if input_format.startswith("ISM") or input_format.startswith("MASA"):
        frame_alignment = "error"

    if cfg.input["frame_alignment"] == "padding":
        # Create new input directory for padded files
        output_dir = cfg.output_path / "20ms_aligned_files"
        try:
            output_dir.mkdir(exist_ok=False)
        except FileExistsError:
            raise ValueError(
                "Folder for 20ms aligned files already exists. Please move or delete folder"
            )

    for i, item in enumerate(cfg.items_list):
        if "fs" in cfg.input:
            sampling_rate = cfg.input["fs"]
            x, fs = read(item, nchannels=num_chan_expected, fs=sampling_rate)
@@ -544,18 +562,42 @@ def validate_input_files(cfg: TestConfig):
                f"The number of channels in the file ({n_chan_x}) do NOT match with those of format ({num_chan_expected}, {input_format}) specified in the config yaml."
            )

        if (input_aligned_cfg := cfg.input.get("aligned_to", None)) is not None:
            input_fmt_has_metadata = input_format.startswith(
                "ISM"
            ) or input_format.startswith("MASA")
            force_alignment = (
                input_aligned_cfg.get("force", False) or input_fmt_has_metadata
            )
        # check frame alignment of items
        if cfg.input["frame_alignment"] != "ignore":

            alignment_len_samples = (20 / 1000) * fs

            alignment_len_samples = (input_aligned_cfg["len"] / 1000) * fs
            if n_samples_x % alignment_len_samples != 0:
                msg = f"The length ({n_samples_x} samples) of audio ({item.name}) is not a multiple of given alignment length ({input_aligned_cfg['len']} ms)."
                if force_alignment:
                    raise ValueError(msg)
                if frame_alignment == "warning":
                    warn(f"The length ({n_samples_x} samples) of audio ({item.name}) is not a multiple of the frame length.")
                elif frame_alignment == "error":
                    raise ValueError(f"The length ({n_samples_x} samples) of audio ({item.name}) is not a multiple of the frame length.")
                elif frame_alignment == "padding":
                    warn(f"The length ({n_samples_x} samples) of audio ({item.name}) is not a multiple of frame length (20 ms). Padding to the nearest integer multiple.")
                    # Calculate number of samples needed for padding
                    padding_samples = int(alignment_len_samples - (n_samples_x % alignment_len_samples))
                    # Create and append zeros
                    padded_data = trim(
                        x,
                        fs,
                        (0, -padding_samples),
                        pad_noise=True,
                        samples=True,
                    )
                    # Write padded data to output directory
                    write(output_dir / item.name, padded_data, fs)
                    # Update audio file path in list
                    cfg.items_list[i] = output_dir / item.name
                else:
                    raise ValueError(f"Value of key frame_alignment does not match possible options. Value: {frame_alignment}. Options: 'padding', 'ignore', 'warning', 'error'")
            else:
                    warn(msg)
                if frame_alignment == "padding":
                    copyfile(item, output_dir / item.name)
                    # Update audio file path in list
                    cfg.items_list[i] = output_dir / item.name
                else:
                    pass

    if frame_alignment == "padding":
        # Make the output path as the new input path
        cfg.input_path = output_dir