Commit 3aa55bf0 authored by Anika Treffehn's avatar Anika Treffehn
Browse files

added spatial distortion

parent 2f469658
Loading
Loading
Loading
Loading
+13 −1
Original line number Diff line number Diff line
@@ -44,7 +44,7 @@ from ivas_processing_scripts.audiotools.convert.masa import convert_masa
from ivas_processing_scripts.audiotools.convert.objectbased import convert_objectbased
from ivas_processing_scripts.audiotools.convert.scenebased import convert_scenebased
from ivas_processing_scripts.audiotools.wrappers.bs1770 import loudness_norm
from ivas_processing_scripts.audiotools.wrappers.esdru import esdru
from ivas_processing_scripts.audiotools.wrappers.esdru import esdru, spatial_distortion
from ivas_processing_scripts.audiotools.wrappers.filter import (
    lpfilter_itu,
    maskfilter_itu,
@@ -169,6 +169,8 @@ def convert(
    limit: Optional[bool] = False,
    mnru_q: Optional[float] = None,
    esdru_alpha: Optional[float] = None,
    spatial_distortion_amplitude: Optional[float] = None,
    spatial_distortion_frequency: Optional[float] = None,
    logger: Optional[logging.Logger] = None,
    **kwargs,
) -> None:
@@ -186,6 +188,8 @@ def convert(
        window=in_window,
        loudness=in_loudness,
        loudness_fmt=in_loudness_fmt,
        spatial_distortion_amplitude=spatial_distortion_amplitude,
        spatial_distortion_frequency=spatial_distortion_frequency,
        logger=logger,
    )

@@ -225,6 +229,8 @@ def process_audio(
    limit: Optional[bool] = False,
    mnru_q: Optional[float] = None,
    esdru_alpha: Optional[float] = None,
    spatial_distortion_amplitude: Optional[float] = None,
    spatial_distortion_frequency: Optional[float] = None,
    logger: Optional[logging.Logger] = None,
) -> None:
    """Perform (pre-/pos-) processing of audio"""
@@ -287,6 +293,12 @@ def process_audio(
            logger.debug("Applying ESDRU Recommendation ITU-T P.811")
        x.audio = esdru(x, esdru_alpha)

    """Spatial distortion"""
    if spatial_distortion_frequency is not None and spatial_distortion_amplitude is not None:
        if logger:
            logger.debug("Applying spatial distortion")
        x.audio = spatial_distortion(x, spatial_distortion_amplitude, spatial_distortion_frequency)

    """loudness normalization"""
    if loudness is not None:
        if logger:
+31 −0
Original line number Diff line number Diff line
@@ -33,6 +33,7 @@
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Optional
from copy import deepcopy

import numpy as np

@@ -126,3 +127,33 @@ def esdru(
        tmp_output_signal, out_fs = read(tmp_output_file, 2, sf)

    return tmp_output_signal


def spatial_distortion(
    input: audio.Audio,
    amplitude,
    frequency,
) -> np.ndarray:

    if not isinstance(input, audio.SceneBasedAudio):
        raise ValueError("Spatial distortion currently only implemented for SBA.")

    input_copy = deepcopy(input)

    # order channels WYZX
    y = input_copy.audio[:, 1]
    x = input_copy.audio[:, 3]

    # angle changes over time
    amplitude = np.deg2rad(amplitude)
    angle = amplitude * np.sin(np.arange(len(y)) * 2 * np.pi * frequency)

    # Y channel left-right
    y_new = y * np.cos(angle) + x * np.sin(angle)
    input_copy.audio[:, 1] = y_new

    # X channel front-back
    x_new = -y * np.sin(angle) + x * np.cos(angle)
    input_copy.audio[:, 3] = x_new

    return input_copy.audio
+11 −1
Original line number Diff line number Diff line
@@ -237,6 +237,8 @@ def get_processing_chain(
    tmp_lp_cutoff = post_cfg.get("lp_cutoff")
    tmp_mnru_q = None
    tmp_esdru_alpha = None
    tmp_spatial_dist_amp = None
    tmp_spatial_dist_freq = None
    tx_condition = False
    ivas_jbm = False
    cond_fmt = []
@@ -365,10 +367,16 @@ def get_processing_chain(
        cod_cfg = cond_cfg["cod"]
        dec_cfg = cond_cfg["dec"]

        # enable ESDRU after IVAS condition
        # enable ESDRU and spatial distorition after IVAS condition
        if cond_cfg.get("esdru_alpha", None) is not None:
            tmp_esdru_alpha = cond_cfg.get("esdru_alpha", None)

        if (sd := cond_cfg.get("spatial_distortion", None)) is not None:
            tmp_spatial_dist_amp = sd.get("amplitude", None)
            tmp_spatial_dist_freq = sd.get("frequency", None)
            if tmp_spatial_dist_amp is None or tmp_spatial_dist_freq is None:
                raise ValueError("For spatial distortion amplitude and frequency values need to be given")

        # 9.6 kbit/s NB for EVS LFE coding only applies to EVS conditions
        evs_lfe_9k6bps_nb = cond_cfg.get("evs_lfe_9k6bps_nb", None)

@@ -538,6 +546,8 @@ def get_processing_chain(
                "multiprocessing": cfg.multiprocessing,
                "mnru_q": tmp_mnru_q,
                "esdru_alpha": tmp_esdru_alpha,
                "spatial_distortion_amplitude": tmp_spatial_dist_amp,
                "spatial_distortion_frequency": tmp_spatial_dist_freq,
                "tx_condition": tx_condition,
            }
        )
+3 −2
Original line number Diff line number Diff line
@@ -255,9 +255,10 @@ class TestConfig:
                        f"The following key must be specified for ESDRU: {REQUIRED_KEYS_ESDRU}"
                    )

            if cond_cfg.get("ivas_rend", -1) != -1:
            if cfg["conditions_to_generate"][cond_name].get("ivas_rend", -1) != -1:
                merged_cfg = get_default_config_for_renderer("IVAS", codec_bin_extension)
                merge_dicts(merged_cfg, cond_cfg)
                cond_cfg_rend = cfg["conditions_to_generate"][cond_name]
                merge_dicts(merged_cfg, cond_cfg_rend)
                cfg["conditions_to_generate"][cond_name] = merged_cfg