Commit 8e6f0d73 authored by Anika Treffehn's avatar Anika Treffehn
Browse files

added special loudness scaling

parent 749766c9
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -50,7 +50,6 @@ from ivas_processing_scripts.processing.processing import (
    process_item,
    rename_generated_conditions,
    reorder_items_list,
    scale_resulting_files,
)
from ivas_processing_scripts.utils import DirManager, apply_func_parallel

+1 −1
Original line number Diff line number Diff line
@@ -273,7 +273,7 @@ def process_audio(
            logger.debug(
                f"Applying loudness adjustment to {loudness} LKFS for format {loudness_fmt} using ITU STL bs1770demo"
            )
        x.audio = loudness_norm(x, loudness, loudness_fmt, logger=logger)
        x.audio, _ = loudness_norm(x, loudness, loudness_fmt, logger=logger)

    """limiting"""
    if limit:
+7 −64
Original line number Diff line number Diff line
@@ -225,7 +225,7 @@ def loudness_norm(
    rms: Optional[bool] = False,
    logger: Optional[logging.Logger] = None,
    file_name_logging: Optional[Union[str, Path]] = None,
) -> np.ndarray:
) -> Tuple[np.ndarray, float]:
    """
    Iterative loudness normalization using ITU-R BS.1770-4
    Signal is iteratively scaled after rendering to the specified format
@@ -250,11 +250,14 @@ def loudness_norm(
    -------
    norm: Audio
        Normalized audio
    scaling_factor: float
        applied scaling factor
    """

    measured_loudness = np.inf
    num_iter = 1
    scaled_input = copy.deepcopy(input)
    scaling_factor = 1.0

    # save loudness before and after scaling for the logger info
    loudness_before, scale_factor_new, loundness_fmt_used = get_loudness(
@@ -265,6 +268,7 @@ def loudness_norm(
    while np.abs(measured_loudness - target_loudness) > 0.5 and num_iter <= 10:
        # scale input
        scaled_input.audio *= scale_factor_new
        scaling_factor *= scale_factor_new

        # measure loudness and get scaling factor
        measured_loudness, scale_factor_new, _ = get_loudness(
@@ -291,65 +295,4 @@ def loudness_norm(
            f"Loudness did not converge to desired value, stopping at: {loudness_after:.2f}"
        )

    return scaled_input.audio


def scale_files(
    file_list: list[list[Union[Path, str]]],
    fmt: str,
    loudness: float,
    loudness_format: Optional[str] = None,
    fs: Optional[int] = 48000,
    in_meta: Optional[list] = None,
    logger: Optional[logging.Logger] = None,
) -> None:
    """
    Scales audio files to desired loudness

    Parameters
    ----------
    file_list : list[list[Union[Path, str]]]
        List of file paths in a list of the condition folders
    fmt: str
        Audio format of files in list
    loudness: float
        Desired loudness level in LKFS/dBov
    loudness_format: Optional[str]
        Format for loudness measurement
    fs: Optional[int]
        Sampling rate
    in_meta: Optional[list]
        Metadata for ISM with same structure as file_list but one layer more
        for the list of metadata for one file
    logger: Optional[logging.Logger]
        Logger to log loudness information
    """

    if fmt.startswith("ISM"):
        if in_meta:
            meta_bool = True
        else:
            raise ValueError("No metadata available for loudness measurement")
    else:
        in_meta = copy.copy(file_list)
        meta_bool = False

    for folder, meta_folder in zip(file_list, in_meta):
        for file, meta in zip(folder, meta_folder):
            # create audio object
            if meta_bool:
                audio_obj = audio.fromfile(fmt, file, fs, meta)
            else:
                audio_obj = audio.fromfile(fmt, file, fs)

            # adjust loudness
            scaled_audio = loudness_norm(
                audio_obj,
                loudness,
                loudness_format,
                logger=logger,
                file_name_logging=file,
            )

            # write into file
            write(file, scaled_audio, audio_obj.fs)
    return scaled_input.audio, scaling_factor
+1 −1
Original line number Diff line number Diff line
@@ -170,7 +170,7 @@ class Preprocessing2(Processing):
            logger.debug(
                f"Scaling of background noise to {self.background_noise['snr']}dB SNR"
            )
            noise_object.audio = loudness_norm(
            noise_object.audio, _ = loudness_norm(
                noise_object,
                loudness_noise,
                out_format,
+20 −54
Original line number Diff line number Diff line
@@ -57,7 +57,6 @@ from ivas_processing_scripts.audiotools.metadata import (
    split_meta_in_file,
    write_ISM_metadata_in_file,
)
from ivas_processing_scripts.audiotools.wrappers.bs1770 import scale_files
from ivas_processing_scripts.constants import LOGGER_DATEFMT, LOGGER_FORMAT
from ivas_processing_scripts.processing.config import TestConfig
from ivas_processing_scripts.utils import apply_func_parallel, list_audio, pairwise
@@ -160,14 +159,11 @@ def concat_setup(cfg: TestConfig, chain, logger: logging.Logger):
    # set input to the concatenated file we have just written to the output dir
    cfg.items_list = [cfg.concat_file]

    # save input sampling rate for splitting at the end
    cfg.pre2.in_fs = fs

    # write out splits
    with open(cfg.concat_file.with_suffix(".splits.log"), "w") as f:
        print(", ".join([str(s) for s in cfg.splits]), file=f)
        print(", ".join([str(sn) for sn in cfg.split_names]), file=f)
        print(", ".join([str(i.stem) for i in cfg.items_list]), file=f)
        print(f"{fs}", file=f)

    logger.info(f"Splits written to file {cfg.concat_file.with_suffix('.splits.log')}")

@@ -176,6 +172,7 @@ def concat_teardown(x, splits, out_fmt, fs, in_fs, logger: logging.Logger):

    if not splits:
        raise ValueError("Splitting not possible without split marker")
    if logger:
        logger.debug("Split files")

    # if sampling rate changed, adjust splits
@@ -214,10 +211,10 @@ def concat_teardown(x, splits, out_fmt, fs, in_fs, logger: logging.Logger):
        #         meta_files=cfg.metadata_path[0],
        #     )
        #     out_meta.append(out_meta_paths)
        split_meta = None
        split_meta = repeat(None)
        pass
    else:
        split_meta = None
        split_meta = repeat(None)

    return split_signals, split_meta

@@ -396,6 +393,7 @@ def process_item(
        p.process(input, output, input_meta, item_logger)

    # copy output and metadata from final process to output file
    if not chain[-1].name == "processing_splitting_scaling":
        copyfile(processing_paths[-1], out_file)
        if processing_paths_meta[-1]:
            for idx, ppm in enumerate(processing_paths_meta[-1]):
@@ -440,11 +438,13 @@ def remove_preamble(x, out_fmt, fs, repeat_signal, preamble, logger):

    # remove first half of signal
    if repeat_signal:
        if logger:
            logger.info("Remove first half of signal")
        x = x[int(len(x) / 2):, :]

    # remove preamble
    if preamble:
        if logger:
            logger.info("Remove preamble")
        x = trim(x, fs, (preamble, 0))

@@ -551,37 +551,3 @@ def rename_generated_conditions(output_path: Path):
                new_filename = f"{file_path.stem}.{subdirectory.name}{file_path.suffix}"
                file_path.rename(subdirectory / new_filename)

def scale_resulting_files(cfg, logger):
    if hasattr(cfg, "preprocessing_2"):
        out_paths_splits = cfg.pre2.out_paths_splits
        out_meta_splits = cfg.pre2.out_meta_splits
    else:
        num_obj = audio.fromtype(cfg.postprocessing["fmt"]).num_channels
        out_paths_splits = []
        if isinstance(
            audio.fromtype(cfg.postprocessing["fmt"]), audio.ObjectBasedAudio
        ):
            out_meta_splits = []
        else:
            out_meta_splits = None
        item_names = [Path(i.name) for i in cfg.items_list]
        for out_dir in cfg.out_dirs:
            condition_list = []
            for item in item_names:
                condition_list.append(out_dir.joinpath(item))
            out_paths_splits.append(condition_list)
            if isinstance(
                audio.fromtype(cfg.postprocessing["fmt"]), audio.ObjectBasedAudio
            ):
                out_meta_splits.append(metadata_search(out_dir, item_names, num_obj))

    scale_files(
        out_paths_splits,
        cfg.postprocessing["fmt"],
        cfg.postprocessing["loudness"],
        cfg.postprocessing.get("loudness_fmt", None),
        cfg.postprocessing["fs"],
        out_meta_splits,
        logger,
    )
Loading