Loading ivas_processing_scripts/audiotools/convert/__init__.py +13 −1 Original line number Diff line number Diff line Loading @@ -44,7 +44,7 @@ from ivas_processing_scripts.audiotools.convert.masa import convert_masa from ivas_processing_scripts.audiotools.convert.objectbased import convert_objectbased from ivas_processing_scripts.audiotools.convert.scenebased import convert_scenebased from ivas_processing_scripts.audiotools.wrappers.bs1770 import loudness_norm from ivas_processing_scripts.audiotools.wrappers.esdru import esdru from ivas_processing_scripts.audiotools.wrappers.esdru import esdru, spatial_distortion from ivas_processing_scripts.audiotools.wrappers.filter import ( lpfilter_itu, maskfilter_itu, Loading Loading @@ -169,6 +169,8 @@ def convert( limit: Optional[bool] = False, mnru_q: Optional[float] = None, esdru_alpha: Optional[float] = None, spatial_distortion_amplitude: Optional[float] = None, spatial_distortion_frequency: Optional[float] = None, logger: Optional[logging.Logger] = None, **kwargs, ) -> None: Loading @@ -186,6 +188,8 @@ def convert( window=in_window, loudness=in_loudness, loudness_fmt=in_loudness_fmt, spatial_distortion_amplitude=spatial_distortion_amplitude, spatial_distortion_frequency=spatial_distortion_frequency, logger=logger, ) Loading Loading @@ -225,6 +229,8 @@ def process_audio( limit: Optional[bool] = False, mnru_q: Optional[float] = None, esdru_alpha: Optional[float] = None, spatial_distortion_amplitude: Optional[float] = None, spatial_distortion_frequency: Optional[float] = None, logger: Optional[logging.Logger] = None, ) -> None: """Perform (pre-/pos-) processing of audio""" Loading Loading @@ -287,6 +293,12 @@ def process_audio( logger.debug("Applying ESDRU Recommendation ITU-T P.811") x.audio = esdru(x, esdru_alpha) """Spatial distortion""" if spatial_distortion_frequency is not None and spatial_distortion_amplitude is not None: if logger: logger.debug("Applying spatial distortion") x.audio = spatial_distortion(x, spatial_distortion_amplitude, spatial_distortion_frequency) """loudness normalization""" if loudness is not None: if logger: Loading ivas_processing_scripts/audiotools/wrappers/esdru.py +31 −0 Original line number Diff line number Diff line Loading @@ -33,6 +33,7 @@ from pathlib import Path from tempfile import TemporaryDirectory from typing import Optional from copy import deepcopy import numpy as np Loading Loading @@ -126,3 +127,33 @@ def esdru( tmp_output_signal, out_fs = read(tmp_output_file, 2, sf) return tmp_output_signal def spatial_distortion( input: audio.Audio, amplitude, frequency, ) -> np.ndarray: if not isinstance(input, audio.SceneBasedAudio): raise ValueError("Spatial distortion currently only implemented for SBA.") input_copy = deepcopy(input) # order channels WYZX y = input_copy.audio[:, 1] x = input_copy.audio[:, 3] # angle changes over time amplitude = np.deg2rad(amplitude) angle = amplitude * np.sin(np.arange(len(y)) * 2 * np.pi * frequency) # Y channel left-right y_new = y * np.cos(angle) + x * np.sin(angle) input_copy.audio[:, 1] = y_new # X channel front-back x_new = -y * np.sin(angle) + x * np.cos(angle) input_copy.audio[:, 3] = x_new return input_copy.audio ivas_processing_scripts/processing/chains.py +11 −1 Original line number Diff line number Diff line Loading @@ -237,6 +237,8 @@ def get_processing_chain( tmp_lp_cutoff = post_cfg.get("lp_cutoff") tmp_mnru_q = None tmp_esdru_alpha = None tmp_spatial_dist_amp = None tmp_spatial_dist_freq = None tx_condition = False ivas_jbm = False cond_fmt = [] Loading Loading @@ -365,10 +367,16 @@ def get_processing_chain( cod_cfg = cond_cfg["cod"] dec_cfg = cond_cfg["dec"] # enable ESDRU after IVAS condition # enable ESDRU and spatial distorition after IVAS condition if cond_cfg.get("esdru_alpha", None) is not None: tmp_esdru_alpha = cond_cfg.get("esdru_alpha", None) if (sd := cond_cfg.get("spatial_distortion", None)) is not None: tmp_spatial_dist_amp = sd.get("amplitude", None) tmp_spatial_dist_freq = sd.get("frequency", None) if tmp_spatial_dist_amp is None or tmp_spatial_dist_freq is None: raise ValueError("For spatial distortion amplitude and frequency values need to be given") # 9.6 kbit/s NB for EVS LFE coding only applies to EVS conditions evs_lfe_9k6bps_nb = cond_cfg.get("evs_lfe_9k6bps_nb", None) Loading Loading @@ -538,6 +546,8 @@ def get_processing_chain( "multiprocessing": cfg.multiprocessing, "mnru_q": tmp_mnru_q, "esdru_alpha": tmp_esdru_alpha, "spatial_distortion_amplitude": tmp_spatial_dist_amp, "spatial_distortion_frequency": tmp_spatial_dist_freq, "tx_condition": tx_condition, } ) Loading ivas_processing_scripts/processing/config.py +3 −2 Original line number Diff line number Diff line Loading @@ -255,9 +255,10 @@ class TestConfig: f"The following key must be specified for ESDRU: {REQUIRED_KEYS_ESDRU}" ) if cond_cfg.get("ivas_rend", -1) != -1: if cfg["conditions_to_generate"][cond_name].get("ivas_rend", -1) != -1: merged_cfg = get_default_config_for_renderer("IVAS", codec_bin_extension) merge_dicts(merged_cfg, cond_cfg) cond_cfg_rend = cfg["conditions_to_generate"][cond_name] merge_dicts(merged_cfg, cond_cfg_rend) cfg["conditions_to_generate"][cond_name] = merged_cfg Loading Loading
ivas_processing_scripts/audiotools/convert/__init__.py +13 −1 Original line number Diff line number Diff line Loading @@ -44,7 +44,7 @@ from ivas_processing_scripts.audiotools.convert.masa import convert_masa from ivas_processing_scripts.audiotools.convert.objectbased import convert_objectbased from ivas_processing_scripts.audiotools.convert.scenebased import convert_scenebased from ivas_processing_scripts.audiotools.wrappers.bs1770 import loudness_norm from ivas_processing_scripts.audiotools.wrappers.esdru import esdru from ivas_processing_scripts.audiotools.wrappers.esdru import esdru, spatial_distortion from ivas_processing_scripts.audiotools.wrappers.filter import ( lpfilter_itu, maskfilter_itu, Loading Loading @@ -169,6 +169,8 @@ def convert( limit: Optional[bool] = False, mnru_q: Optional[float] = None, esdru_alpha: Optional[float] = None, spatial_distortion_amplitude: Optional[float] = None, spatial_distortion_frequency: Optional[float] = None, logger: Optional[logging.Logger] = None, **kwargs, ) -> None: Loading @@ -186,6 +188,8 @@ def convert( window=in_window, loudness=in_loudness, loudness_fmt=in_loudness_fmt, spatial_distortion_amplitude=spatial_distortion_amplitude, spatial_distortion_frequency=spatial_distortion_frequency, logger=logger, ) Loading Loading @@ -225,6 +229,8 @@ def process_audio( limit: Optional[bool] = False, mnru_q: Optional[float] = None, esdru_alpha: Optional[float] = None, spatial_distortion_amplitude: Optional[float] = None, spatial_distortion_frequency: Optional[float] = None, logger: Optional[logging.Logger] = None, ) -> None: """Perform (pre-/pos-) processing of audio""" Loading Loading @@ -287,6 +293,12 @@ def process_audio( logger.debug("Applying ESDRU Recommendation ITU-T P.811") x.audio = esdru(x, esdru_alpha) """Spatial distortion""" if spatial_distortion_frequency is not None and spatial_distortion_amplitude is not None: if logger: logger.debug("Applying spatial distortion") x.audio = spatial_distortion(x, spatial_distortion_amplitude, spatial_distortion_frequency) """loudness normalization""" if loudness is not None: if logger: Loading
ivas_processing_scripts/audiotools/wrappers/esdru.py +31 −0 Original line number Diff line number Diff line Loading @@ -33,6 +33,7 @@ from pathlib import Path from tempfile import TemporaryDirectory from typing import Optional from copy import deepcopy import numpy as np Loading Loading @@ -126,3 +127,33 @@ def esdru( tmp_output_signal, out_fs = read(tmp_output_file, 2, sf) return tmp_output_signal def spatial_distortion( input: audio.Audio, amplitude, frequency, ) -> np.ndarray: if not isinstance(input, audio.SceneBasedAudio): raise ValueError("Spatial distortion currently only implemented for SBA.") input_copy = deepcopy(input) # order channels WYZX y = input_copy.audio[:, 1] x = input_copy.audio[:, 3] # angle changes over time amplitude = np.deg2rad(amplitude) angle = amplitude * np.sin(np.arange(len(y)) * 2 * np.pi * frequency) # Y channel left-right y_new = y * np.cos(angle) + x * np.sin(angle) input_copy.audio[:, 1] = y_new # X channel front-back x_new = -y * np.sin(angle) + x * np.cos(angle) input_copy.audio[:, 3] = x_new return input_copy.audio
ivas_processing_scripts/processing/chains.py +11 −1 Original line number Diff line number Diff line Loading @@ -237,6 +237,8 @@ def get_processing_chain( tmp_lp_cutoff = post_cfg.get("lp_cutoff") tmp_mnru_q = None tmp_esdru_alpha = None tmp_spatial_dist_amp = None tmp_spatial_dist_freq = None tx_condition = False ivas_jbm = False cond_fmt = [] Loading Loading @@ -365,10 +367,16 @@ def get_processing_chain( cod_cfg = cond_cfg["cod"] dec_cfg = cond_cfg["dec"] # enable ESDRU after IVAS condition # enable ESDRU and spatial distorition after IVAS condition if cond_cfg.get("esdru_alpha", None) is not None: tmp_esdru_alpha = cond_cfg.get("esdru_alpha", None) if (sd := cond_cfg.get("spatial_distortion", None)) is not None: tmp_spatial_dist_amp = sd.get("amplitude", None) tmp_spatial_dist_freq = sd.get("frequency", None) if tmp_spatial_dist_amp is None or tmp_spatial_dist_freq is None: raise ValueError("For spatial distortion amplitude and frequency values need to be given") # 9.6 kbit/s NB for EVS LFE coding only applies to EVS conditions evs_lfe_9k6bps_nb = cond_cfg.get("evs_lfe_9k6bps_nb", None) Loading Loading @@ -538,6 +546,8 @@ def get_processing_chain( "multiprocessing": cfg.multiprocessing, "mnru_q": tmp_mnru_q, "esdru_alpha": tmp_esdru_alpha, "spatial_distortion_amplitude": tmp_spatial_dist_amp, "spatial_distortion_frequency": tmp_spatial_dist_freq, "tx_condition": tx_condition, } ) Loading
ivas_processing_scripts/processing/config.py +3 −2 Original line number Diff line number Diff line Loading @@ -255,9 +255,10 @@ class TestConfig: f"The following key must be specified for ESDRU: {REQUIRED_KEYS_ESDRU}" ) if cond_cfg.get("ivas_rend", -1) != -1: if cfg["conditions_to_generate"][cond_name].get("ivas_rend", -1) != -1: merged_cfg = get_default_config_for_renderer("IVAS", codec_bin_extension) merge_dicts(merged_cfg, cond_cfg) cond_cfg_rend = cfg["conditions_to_generate"][cond_name] merge_dicts(merged_cfg, cond_cfg_rend) cfg["conditions_to_generate"][cond_name] = merged_cfg Loading