Loading ivas_processing_scripts/__init__.py +4 −0 Original line number Diff line number Diff line Loading @@ -43,6 +43,7 @@ from ivas_processing_scripts.constants import ( ) from ivas_processing_scripts.processing import chains, config from ivas_processing_scripts.processing.processing import ( multiple_of_frame_size, preprocess, preprocess_2, preprocess_background_noise, Loading Loading @@ -126,6 +127,9 @@ def main(args): cfg.metadata_path = metadata # checking if audio is a multiple of frame size multiple_of_frame_size(cfg.items_list, cfg.input["fmt"]) # run preprocessing only once if hasattr(cfg, "preprocessing"): # save process info for background noise Loading ivas_processing_scripts/audiotools/audioarray.py +0 −40 Original line number Diff line number Diff line Loading @@ -688,43 +688,3 @@ def mute_channels( """ x[:, mute] = 0 return x def multiple_of_frame_size( x: np.ndarray, fs: int = 48000, frame_size_in_ms: Optional[int] = 20, ) -> np.ndarray: """ Warn and pad audio if it isn't a multiple of frame size Parameters ---------- x: np.ndarray Input array fs: int Input sampling rate in Hz; default = 48000 frame_size_in_ms: Optional[int] Frame size in milliseconds; default = 20 Returns ------- x: np.ndarray Padded array """ if x.ndim == 1: n_samples_x = x.shape n_chan_x = 1 else: n_samples_x, n_chan_x = x.shape frame_length_samples = (frame_size_in_ms / 1000) * fs if n_samples_x % frame_length_samples != 0: warn( "Audio length is not a multiple of frame length (20 ms). Padding with zeros." ) pad_length = int(frame_length_samples - (n_samples_x % frame_length_samples)) x = np.pad(x, ((0, int(pad_length)), (0, 0)), "constant") return x ivas_processing_scripts/audiotools/convert/__init__.py +0 −3 Original line number Diff line number Diff line Loading @@ -212,9 +212,6 @@ def process_audio( if fs is None: fs = x.fs """making sure length is a multiple of the frame size""" x.audio = audioarray.multiple_of_frame_size(x.audio, fs) """delay audio""" if delay is not None: if logger: Loading ivas_processing_scripts/processing/processing.py +48 −1 Original line number Diff line number Diff line Loading @@ -35,7 +35,7 @@ from abc import ABC, abstractmethod from itertools import repeat from pathlib import Path from shutil import copyfile from typing import Iterable, Union from typing import Iterable, Optional, Union from warnings import warn import numpy as np Loading Loading @@ -74,6 +74,21 @@ class Processing(ABC): def reorder_items_list(items_list: list, concatenation_order: list) -> list: """ Reorder input items list based on conactenation order Parameters ---------- items_list: list List of input items concatenation_order: list Concatenation order Returns ------- ordered_full_files: list Re-ordered list of input items """ name_to_full = {Path(full_file).name: full_file for full_file in items_list} ordered_full_files = [ name_to_full[name] for name in concatenation_order if name in name_to_full Loading Loading @@ -487,3 +502,35 @@ def preprocess_background_noise(cfg): ] = output_audio return def multiple_of_frame_size( items_list: list, input_format: str, frame_size_in_ms: Optional[int] = 20, ) -> np.ndarray: """ Warn and pad audio if it isn't a multiple of frame size Parameters ---------- items_list: list List of input items frame_size_in_ms: Optional[int] Frame size in milliseconds; default = 20 """ for item in items_list: # read file x, fs = read(item) # warning if audio length not a multiple of frame lenght n_samples_x, n_chan_x = x.shape frame_length_samples = (frame_size_in_ms / 1000) * fs if n_samples_x % frame_length_samples != 0: if input_format.startswith("ISM") or input_format.startswith("MASA"): raise ValueError( f"The length ({n_samples_x} samples) of audio ({item.name}) is not a multiple of frame length (20 ms)." ) else: warn( f"The length ({n_samples_x} samples) of audio ({item.name}) is not a multiple of frame length (20 ms)." ) Loading
ivas_processing_scripts/__init__.py +4 −0 Original line number Diff line number Diff line Loading @@ -43,6 +43,7 @@ from ivas_processing_scripts.constants import ( ) from ivas_processing_scripts.processing import chains, config from ivas_processing_scripts.processing.processing import ( multiple_of_frame_size, preprocess, preprocess_2, preprocess_background_noise, Loading Loading @@ -126,6 +127,9 @@ def main(args): cfg.metadata_path = metadata # checking if audio is a multiple of frame size multiple_of_frame_size(cfg.items_list, cfg.input["fmt"]) # run preprocessing only once if hasattr(cfg, "preprocessing"): # save process info for background noise Loading
ivas_processing_scripts/audiotools/audioarray.py +0 −40 Original line number Diff line number Diff line Loading @@ -688,43 +688,3 @@ def mute_channels( """ x[:, mute] = 0 return x def multiple_of_frame_size( x: np.ndarray, fs: int = 48000, frame_size_in_ms: Optional[int] = 20, ) -> np.ndarray: """ Warn and pad audio if it isn't a multiple of frame size Parameters ---------- x: np.ndarray Input array fs: int Input sampling rate in Hz; default = 48000 frame_size_in_ms: Optional[int] Frame size in milliseconds; default = 20 Returns ------- x: np.ndarray Padded array """ if x.ndim == 1: n_samples_x = x.shape n_chan_x = 1 else: n_samples_x, n_chan_x = x.shape frame_length_samples = (frame_size_in_ms / 1000) * fs if n_samples_x % frame_length_samples != 0: warn( "Audio length is not a multiple of frame length (20 ms). Padding with zeros." ) pad_length = int(frame_length_samples - (n_samples_x % frame_length_samples)) x = np.pad(x, ((0, int(pad_length)), (0, 0)), "constant") return x
ivas_processing_scripts/audiotools/convert/__init__.py +0 −3 Original line number Diff line number Diff line Loading @@ -212,9 +212,6 @@ def process_audio( if fs is None: fs = x.fs """making sure length is a multiple of the frame size""" x.audio = audioarray.multiple_of_frame_size(x.audio, fs) """delay audio""" if delay is not None: if logger: Loading
ivas_processing_scripts/processing/processing.py +48 −1 Original line number Diff line number Diff line Loading @@ -35,7 +35,7 @@ from abc import ABC, abstractmethod from itertools import repeat from pathlib import Path from shutil import copyfile from typing import Iterable, Union from typing import Iterable, Optional, Union from warnings import warn import numpy as np Loading Loading @@ -74,6 +74,21 @@ class Processing(ABC): def reorder_items_list(items_list: list, concatenation_order: list) -> list: """ Reorder input items list based on conactenation order Parameters ---------- items_list: list List of input items concatenation_order: list Concatenation order Returns ------- ordered_full_files: list Re-ordered list of input items """ name_to_full = {Path(full_file).name: full_file for full_file in items_list} ordered_full_files = [ name_to_full[name] for name in concatenation_order if name in name_to_full Loading Loading @@ -487,3 +502,35 @@ def preprocess_background_noise(cfg): ] = output_audio return def multiple_of_frame_size( items_list: list, input_format: str, frame_size_in_ms: Optional[int] = 20, ) -> np.ndarray: """ Warn and pad audio if it isn't a multiple of frame size Parameters ---------- items_list: list List of input items frame_size_in_ms: Optional[int] Frame size in milliseconds; default = 20 """ for item in items_list: # read file x, fs = read(item) # warning if audio length not a multiple of frame lenght n_samples_x, n_chan_x = x.shape frame_length_samples = (frame_size_in_ms / 1000) * fs if n_samples_x % frame_length_samples != 0: if input_format.startswith("ISM") or input_format.startswith("MASA"): raise ValueError( f"The length ({n_samples_x} samples) of audio ({item.name}) is not a multiple of frame length (20 ms)." ) else: warn( f"The length ({n_samples_x} samples) of audio ({item.name}) is not a multiple of frame length (20 ms)." )