From 1755aa0b294904b66054b1366da0fdbf08485aa6 Mon Sep 17 00:00:00 2001 From: Archit Tamarapu Date: Mon, 17 Apr 2023 13:12:46 +0200 Subject: [PATCH 01/12] remove pyaudio3dtools and pyprocessing directories --- scripts/generate_test_items.py | 254 ----- .../IIS_BRIR_officialMPEG_222UC_SBA3.mat | 3 - .../IIS_BRIR_officialMPEG_222UC_combined.mat | 3 - .../IIS_BRIR_officialMPEG_222UC_full.mat | 3 - scripts/pyaudio3dtools/EFAP.py | 929 ------------------ .../HRIRs_mat/ORANGE_HRIR_53_48000_SBA3.mat | 3 - .../ORANGE_HRIR_53_48000_combined.mat | 3 - .../HRIRs_mat/ORANGE_HRIR_53_48000_full.mat | 3 - scripts/pyaudio3dtools/__init__.py | 52 - scripts/pyaudio3dtools/audio3dtools.py | 291 ------ scripts/pyaudio3dtools/audioarray.py | 473 --------- scripts/pyaudio3dtools/audiofile.py | 809 --------------- scripts/pyaudio3dtools/binauralrenderer.py | 782 --------------- scripts/pyaudio3dtools/constants.py | 392 -------- scripts/pyaudio3dtools/hoadecoder.py | 186 ---- scripts/pyaudio3dtools/masarenderer.py | 112 --- .../pyaudio3dtools/quaternions/__init__.py | 44 - .../pyaudio3dtools/quaternions/functions.py | 183 ---- scripts/pyaudio3dtools/rotation.py | 346 ------- scripts/pyaudio3dtools/spatialaudioconvert.py | 570 ----------- scripts/pyaudio3dtools/spatialaudioformat.py | 470 --------- scripts/pyaudio3dtools/spatialmetadata.py | 492 ---------- scripts/pyprocessing/__init__.py | 45 - scripts/pyprocessing/evs.py | 238 ----- scripts/pyprocessing/ivas.py | 200 ---- scripts/pyprocessing/prepost_processing.py | 220 ----- scripts/pyprocessing/processing.py | 123 --- scripts/pyprocessing/processing_configs.py | 375 ------- scripts/pyprocessing/utils.py | 148 --- 29 files changed, 7752 deletions(-) delete mode 100755 scripts/generate_test_items.py delete mode 100644 scripts/pyaudio3dtools/BRIRs_mat/IIS_BRIR_officialMPEG_222UC_SBA3.mat delete mode 100644 scripts/pyaudio3dtools/BRIRs_mat/IIS_BRIR_officialMPEG_222UC_combined.mat delete mode 100644 scripts/pyaudio3dtools/BRIRs_mat/IIS_BRIR_officialMPEG_222UC_full.mat delete mode 100644 scripts/pyaudio3dtools/EFAP.py delete mode 100644 scripts/pyaudio3dtools/HRIRs_mat/ORANGE_HRIR_53_48000_SBA3.mat delete mode 100644 scripts/pyaudio3dtools/HRIRs_mat/ORANGE_HRIR_53_48000_combined.mat delete mode 100644 scripts/pyaudio3dtools/HRIRs_mat/ORANGE_HRIR_53_48000_full.mat delete mode 100644 scripts/pyaudio3dtools/__init__.py delete mode 100755 scripts/pyaudio3dtools/audio3dtools.py delete mode 100644 scripts/pyaudio3dtools/audioarray.py delete mode 100644 scripts/pyaudio3dtools/audiofile.py delete mode 100644 scripts/pyaudio3dtools/binauralrenderer.py delete mode 100644 scripts/pyaudio3dtools/constants.py delete mode 100644 scripts/pyaudio3dtools/hoadecoder.py delete mode 100644 scripts/pyaudio3dtools/masarenderer.py delete mode 100644 scripts/pyaudio3dtools/quaternions/__init__.py delete mode 100644 scripts/pyaudio3dtools/quaternions/functions.py delete mode 100644 scripts/pyaudio3dtools/rotation.py delete mode 100644 scripts/pyaudio3dtools/spatialaudioconvert.py delete mode 100644 scripts/pyaudio3dtools/spatialaudioformat.py delete mode 100644 scripts/pyaudio3dtools/spatialmetadata.py delete mode 100644 scripts/pyprocessing/__init__.py delete mode 100644 scripts/pyprocessing/evs.py delete mode 100644 scripts/pyprocessing/ivas.py delete mode 100644 scripts/pyprocessing/prepost_processing.py delete mode 100644 scripts/pyprocessing/processing.py delete mode 100644 scripts/pyprocessing/processing_configs.py delete mode 100644 scripts/pyprocessing/utils.py diff --git a/scripts/generate_test_items.py b/scripts/generate_test_items.py deleted file mode 100755 index ad0717db9b..0000000000 --- a/scripts/generate_test_items.py +++ /dev/null @@ -1,254 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import argparse -import logging -import multiprocessing as mp -import os -from typing import Iterable - -from pyaudio3dtools import audiofile -from pyprocessing import processing, processing_configs, utils - -# Global logging options -logger = logging.getLogger(__name__) -LOGGER_MAIN_LOG_FILENAME = "log.txt" -LOGGER_PROC_ITEM_SUFFIX = "_log.txt" -LOGGER_FORMAT = "%(asctime)s | %(name)-12s | %(levelname)-8s | %(message)s" -LOGGER_DATEFMT = "%m-%d %H:%M" - - -def _run_condition_and_item( - out_folder: str, tmp_folder: str, item: str, proc_chain: Iterable -) -> None: - item_name, _ = os.path.splitext(os.path.basename(item)) - out_item = os.path.join(out_folder, item_name + ".wav") - - # Create a logfile for the item - fh = logging.FileHandler( - os.path.join(tmp_folder, item_name + LOGGER_PROC_ITEM_SUFFIX), mode="w" - ) - fh.setLevel(logging.DEBUG) - formatter = logging.Formatter(LOGGER_FORMAT, datefmt=LOGGER_DATEFMT) - fh.setFormatter(formatter) - logger.addHandler(fh) - - processing.process_chain(proc_chain, item, out_item, tmp_folder) - - # Log after completion - logger.removeHandler(fh) - output_nickname = "Done " + os.path.join( - os.path.basename(out_folder), item_name + ".wav" - ) - logger.info(f"{output_nickname:-^100.100}") - - -def main(test_cfg): - # Create pool if multiprocessing is enabled - if test_cfg.enable_multiprocessing: - pool = mp.Pool(mp.cpu_count()) - results = [] - - # pre-process items if required - if test_cfg.preproc_input: - condition = test_cfg.list_of_conditions[0] - out_folder = test_cfg.output_folders[0] - tmp_folder = test_cfg.tmp_folders[0] - - proc_id = condition["id"] - logger.info(" Condition: " + proc_id) - - proc_chain = condition["proc_chain"] - - for item in test_cfg.items_list: - logger.info(" Item: " + item) - if test_cfg.enable_multiprocessing: - results.append( - pool.apply_async( - _run_condition_and_item, - args=(out_folder, tmp_folder, item, proc_chain), - ) - ) - else: - _run_condition_and_item(out_folder, tmp_folder, item, proc_chain) - - if test_cfg.enable_multiprocessing: - pool.close() - pool.join() - for r in results: - r.get() - pool = mp.Pool(mp.cpu_count()) - results = [] - - test_cfg.items_list = utils.list_audio(out_folder, absolute=True) - - test_cfg.list_of_conditions = test_cfg.list_of_conditions[1:] - test_cfg.output_folders = test_cfg.output_folders[1:] - test_cfg.tmp_folders = test_cfg.tmp_folders[1:] - - # concatenate items if required - if test_cfg.concatenate_input: - if len(test_cfg.items_list) > 1: - concat_filename = os.path.join( - test_cfg.output_path, - os.path.basename(test_cfg.input_path) + "_concat.wav", - ) - audiofile.concatenatefiles( - test_cfg.items_list, - concat_filename, - test_cfg.concat_silence_pre, - test_cfg.concat_silence_post, - test_cfg.in_fs, - ) - # simply pad if single item - else: - concat_filename = os.path.join( - test_cfg.output_path, - os.path.splitext(os.path.basename(test_cfg.input_path))[0] - + "_padded.wav", - ) - audiofile.concatenatefiles( - test_cfg.items_list, - concat_filename, - test_cfg.concat_silence_pre, - test_cfg.concat_silence_post, - test_cfg.in_fs, - ) - test_cfg.items_list = [concat_filename] - - for condition, out_folder, tmp_folder in zip( - test_cfg.list_of_conditions, test_cfg.output_folders, test_cfg.tmp_folders - ): - proc_id = condition["id"] - logger.info(f" Condition: {proc_id} in {out_folder}") - - proc_chain = condition["proc_chain"] - - for item in test_cfg.items_list: - logger.info(f" Item: {item}") - if test_cfg.enable_multiprocessing: - results.append( - pool.apply_async( - _run_condition_and_item, - args=(out_folder, tmp_folder, item, proc_chain), - ) - ) - else: - _run_condition_and_item(out_folder, tmp_folder, item, proc_chain) - - if test_cfg.enable_multiprocessing: - pool.close() - pool.join() - for r in results: - r.get() - - # copy over JSON to main output directory - output_json = os.path.join(test_cfg.output_path, test_cfg.name + ".json") - with open(output_json, "w") as fp: - fp.write(test_cfg.json_out) - - # remove concatenated file - if ( - test_cfg.delete_tmp - and test_cfg.concatenate_input - and os.path.exists(concat_filename) - ): - os.remove(concat_filename) - - -if __name__ == "__main__": - # Parse command line - parser = argparse.ArgumentParser( - description="Generate test items. Refer to README.md for detailed usage instructions." - ) - parser.add_argument( - "-i", - "--infile", - required=True, - nargs="+", - help="Configuration file(s): FILE1.json FILE2.json ...", - ) - args = parser.parse_args() - - # Get all test configuration files to process - infile = args.infile - tests_list = [] - for infile in args.infile: - if os.path.isdir(infile): - tests_list.extend( - [ - os.path.join(infile, f) - for f in os.listdir(infile) - if f.endswith((".json")) - ] - ) - else: - tests_list.append(infile) - - # Read configuration file - for test in tests_list: - test_cfg = processing_configs.test_config(test) - - # context manager to create output folders and clean up temporary folders - delete_folders = test_cfg.tmp_folders if test_cfg.delete_tmp else [] - with utils.DirManager( - test_cfg.output_folders + test_cfg.tmp_folders, delete_folders - ): - - # Set up logging handlers - console_handler = logging.StreamHandler() - console_handler.setLevel(logging.INFO) - console_handler.setFormatter(logging.Formatter("%(message)s")) - - file_handler = logging.FileHandler( - os.path.join(test_cfg.output_path, LOGGER_MAIN_LOG_FILENAME), mode="w" - ) - file_handler.setLevel(logging.INFO) - file_handler.setFormatter( - logging.Formatter(LOGGER_FORMAT, datefmt=LOGGER_DATEFMT) - ) - - # Configure loggers - logging.basicConfig( - format=LOGGER_FORMAT, - datefmt=LOGGER_DATEFMT, - level=logging.INFO, - handlers=[console_handler, file_handler], - ) - - # Log some info - logger.info(f"===Generate test: {test}===") - logger.info(f"Test name: {test_cfg.name}") - logger.info(f"Input path: {test_cfg.input_path}") - logger.info(f"Output path: {test_cfg.output_path}") - - main(test_cfg) diff --git a/scripts/pyaudio3dtools/BRIRs_mat/IIS_BRIR_officialMPEG_222UC_SBA3.mat b/scripts/pyaudio3dtools/BRIRs_mat/IIS_BRIR_officialMPEG_222UC_SBA3.mat deleted file mode 100644 index 5bc7464f7f..0000000000 --- a/scripts/pyaudio3dtools/BRIRs_mat/IIS_BRIR_officialMPEG_222UC_SBA3.mat +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b41a527b6ba22b4c100265655ca801ee4d2dba3c3e03dc58f7cc5d99e397d2c3 -size 11795531 diff --git a/scripts/pyaudio3dtools/BRIRs_mat/IIS_BRIR_officialMPEG_222UC_combined.mat b/scripts/pyaudio3dtools/BRIRs_mat/IIS_BRIR_officialMPEG_222UC_combined.mat deleted file mode 100644 index 61ba946617..0000000000 --- a/scripts/pyaudio3dtools/BRIRs_mat/IIS_BRIR_officialMPEG_222UC_combined.mat +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:081a9053c8b04831d97e6f18d641d4737b2c23b076778a9b41c7b3a41d954c32 -size 6348446 diff --git a/scripts/pyaudio3dtools/BRIRs_mat/IIS_BRIR_officialMPEG_222UC_full.mat b/scripts/pyaudio3dtools/BRIRs_mat/IIS_BRIR_officialMPEG_222UC_full.mat deleted file mode 100644 index 440c8aedd6..0000000000 --- a/scripts/pyaudio3dtools/BRIRs_mat/IIS_BRIR_officialMPEG_222UC_full.mat +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0544d1cf80a7cceb156760107d81b10fd787807bb0ea1e74e9aeb552474b3373 -size 13233924 diff --git a/scripts/pyaudio3dtools/EFAP.py b/scripts/pyaudio3dtools/EFAP.py deleted file mode 100644 index dcc615355e..0000000000 --- a/scripts/pyaudio3dtools/EFAP.py +++ /dev/null @@ -1,929 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022 Baseline Development Group with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies OY, Orange, - Panasonic Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The Baseline Development Group consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies OY, Orange, - Panasonic Corporation, Qualcomm Technologies, Inc., and VoiceAge Corporation retain full ownership - rights in their respective contributions in the software. No license of any kind, including but not - limited to patent license, of any foregoing parties is hereby granted by implication, estoppel or - otherwise. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and/or fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import argparse -import os -from enum import Enum -from itertools import combinations -from typing import Optional, Tuple, Union - -import numpy as np - - -def wrap_angles( - azi: float, ele: float, clip_ele: Optional[bool] = False -) -> Tuple[float, float]: - """ - Wrap angles to (-180, 180] azimuth and [-90, 90] elevation - Takes into account hemisphere flips from large elevation changes unless clip_ele is specified - """ - if clip_ele: - ele = min(max(ele, -90), 90) - - if ele != 0 and ele % 90 == 0: - # if elevation is a multiple of 90, azimuth is irrelevant since we are at a pole - azi = 0 - while np.abs(ele) > 90: - ele -= 360 - else: - # wrap elevation value - while np.abs(ele) > 90: - # flip azimuth to other hemisphere - azi += 180 - - # compensate elevation accordingly - if ele > 90: - ele -= 180 - elif ele < -90: - ele += 180 - - # wrap azimuth value - while np.abs(azi) > 180: - azi = (azi + 180) % 360 - if azi < 0: - azi += 360 - azi -= 180 - - # set -180 azimuth to 180 - if azi == -180: - azi = 180 - - return azi, ele - - -class EfapDmxType(Enum): - NONE = 0 - AMPLITUDE = 1 - INTENSITY = 2 - - -class EfapVertex: - """ - Vertex data structure for EFAP - Initialises a vertex from the given spherical coordinate pair, with a flag specifying if it is a ghost loudspeaker - - - Parameters - ---------- - azi : float - Azimuth of vertex - ele : float - Elevation of vertex - is_ghost : bool - Whether the vertex is a ghost, default is False - dmx_type : EfapDmxType - Downmix type for ghost vertices - """ - - def __init__( - self, - azi: float, - ele: float, - is_ghost: Optional[bool] = False, - dmx_type: Optional[EfapDmxType] = EfapDmxType.INTENSITY, - ): - self.azi, self.ele = wrap_angles(azi, ele) - self.pos = np.array( - [ - np.cos(np.deg2rad(azi)) * np.cos(np.deg2rad(ele)), - np.sin(np.deg2rad(azi)) * np.cos(np.deg2rad(ele)), - np.sin(np.deg2rad(ele)), - ] - ) - - idx_azi = np.round(np.abs(90 - np.abs(self.azi))) - idx_ele = 90 - np.round(np.abs(self.ele)) - self.index = ( - idx_azi + 181 * idx_ele - ) # vertices on the median plane have lowest index - - self.is_ghost = is_ghost - self.dmx_type = dmx_type - - def __str__(self): - str_ = f"a{self.azi}e{self.ele}" - if self.is_ghost: - str_ += "*" - return str_ - - def __lt__(self, other): - return self.index < other.index - - -class EFAP: - """ - EFAP data structure - - Initialise EFAP data for computing panning gains - - - Parameters - ---------- - azimuths : np.ndarray - Azimuth positions of the loudspeaker array - elevations : npndarray - Elevation postions of the loudspeaker array - intensity_panning : bool - Whether intensity panning is enabled or not - - Examples - -------- - - >>> from EFAP import EFAP - >>> panner = EFAP([30, -30, 0, 110, -110], [0, 0, 0, 0, 0], False) - >>> panner.pan(15, 45) - array([0.66742381, 0.19069252, 0.66742381, 0.19069252, 0.19069252]) - - """ - - _EFAP_HULL_TOL = 1e-4 # tolerance for a point to be added to the convex hull - _EFAP_MAX_AZI_GAP = 160 # maximum allowed angular gap in the middle layer - _EFAP_POLAR_ELE = 90 # elevation of north / south poles (zenith / nadir) - _EFAP_THRESH_COPLANAR = 1e-3 # tolerance for points to be considered coplanar - _EFAP_THRESH_MID_LAYER = 45 # elevation threshold for loudspeakers to be considered as in the middle layer - _EFAP_THRESH_POLES = 1e-6 # tolerance for a vertex to be considered polar - _EFAP_THRESH_TRI = 1e-10 # tolerance for a point to be inside a triangle - - def __init__( - self, - azimuths: Union[list, np.ndarray], - elevations: Union[list, np.ndarray], - intensity_panning: Optional[bool] = False, - ): - # validation - azimuths = np.array(azimuths) - elevations = np.array(elevations) - if np.squeeze(azimuths).ndim > 1: - raise ValueError("Too many dimensions for loudspeaker azimuth array") - if np.squeeze(elevations).ndim > 1: - raise ValueError("Too many dimensions for loudspeaker elevations array") - if azimuths.shape != elevations.shape: - raise ValueError("Mismatch between loudspeaker azimuths and elevations") - - # set EFIP flag - self.intensity_panning = intensity_panning - - # initialise vertices and add ghost loudspeakers if needed - self.verts = np.array( - [EfapVertex(azi, ele) for azi, ele in zip(azimuths, elevations)] - ) - self._add_ghost_speakers() - - # formulate initial tetrahedron for the convex hull - self._init_simplex() - - # add the remaining vertices to the convex hull in order of their index - for i in np.argsort(self.verts): - if self.verts[i] not in self.verts[self.tris]: - self._add_vertex_to_hull(i) - - # compute downmix matrix with remapped ghost speakers - self._remap_ghost_speakers() - - # set vertices near poles to have NaN azimuth - for v in self.verts: - if ( - v.ele > self._EFAP_POLAR_ELE - self._EFAP_THRESH_POLES - or v.ele < self._EFAP_THRESH_POLES - self._EFAP_POLAR_ELE - ): - v.azi = np.nan - - # combine triangles into polygons - self._tri2poly() - - def _add_ghost_speakers(self) -> None: - """ - Add ghost loudspeakers at the poles, or to fill large horizontal gaps - """ - ele = [v.ele for v in self.verts] - - dmx_type = EfapDmxType.INTENSITY - - # add ghost loudspeakers at the poles if necessary - if max(ele) < self._EFAP_POLAR_ELE: - - if self.intensity_panning: - if max(ele) > self._EFAP_THRESH_MID_LAYER: - dmx_type = EfapDmxType.NONE - else: - dmx_type = EfapDmxType.AMPLITUDE - - self.verts = np.append(self.verts, EfapVertex(0, 90, True, dmx_type)) - - if min(ele) > -self._EFAP_POLAR_ELE: - - if self.intensity_panning: - if min(ele) < -self._EFAP_THRESH_MID_LAYER: - dmx_type = EfapDmxType.NONE - else: - dmx_type = EfapDmxType.AMPLITUDE - - self.verts = np.append(self.verts, EfapVertex(0, -90, True, dmx_type)) - - # check for large gaps in the middle horizontal layer - mid_spkrs = [ - v.azi for v in self.verts if np.abs(v.ele) < self._EFAP_THRESH_MID_LAYER - ] - - # no speakers in middle layer; add a triangle of ghost speakers - if not mid_spkrs: - self.verts = np.append( - self.verts, - [ - EfapVertex(0, 0, True), - EfapVertex(180, 0, True), - EfapVertex(240, 0, True), - ], - ) - # only one speaker in the threshold; add two ghost speakers to form a triangle - elif len(mid_spkrs) == 1: - self.verts = np.append( - self.verts, - [ - EfapVertex(mid_spkrs[0] + 120, 0, True), - EfapVertex(mid_spkrs[0] + 240, 0, True), - ], - ) - # search for and fill gaps greater than MAX_AZI_GAP - else: - mid_spkrs = np.sort(mid_spkrs) - angle_diff = np.diff(np.concatenate([mid_spkrs, [mid_spkrs[0] + 360]])) - sectors = np.ceil(angle_diff / self._EFAP_MAX_AZI_GAP) - - for i, s in enumerate(sectors): - if s > 1: - new_diff = angle_diff[i] / s - num_new = s - 1 - for k in range(int(num_new)): - new_azi = mid_spkrs[i] + (k + 1) * new_diff - self.verts = np.append(self.verts, EfapVertex(new_azi, 0, True)) - - def _init_simplex(self) -> None: - """ - Create an initial tetrahedron / simplex for the convex hull from 4 vertices - """ - # take the first vertex as seed - t = [0] - - # attempt to form an edge with non-zero length - for i, v in enumerate(self.verts): - if ( - v.azi != self.verts[t[0]].azi or v.ele != self.verts[t[0]].ele - ) and i not in t: - t.append(i) - break - else: - raise ValueError("Vertices are conincident!") - - # attempt to form a triangle with non-zero area - for i, v in enumerate(self.verts): - if ( - np.linalg.norm( - np.cross( - self.verts[t[1]].pos - self.verts[t[0]].pos, - v.pos - self.verts[t[0]].pos, - ), - 2, - ) - > self._EFAP_HULL_TOL - and i not in t - ): - t.append(i) - break - else: - raise ValueError("Vertices are colinear!") - - # attempt to form a tetrahedron with non-zero volume - for i, v in enumerate(self.verts): - if ( - np.abs( - np.dot( - np.cross( - self.verts[t[1]].pos - self.verts[t[0]].pos, - self.verts[t[2]].pos - self.verts[t[0]].pos, - ), - v.pos - self.verts[t[0]].pos, - ) - ) - ) > self._EFAP_HULL_TOL and i not in t: - t.append(i) - break - else: - raise ValueError("Vertices are coplanar!") - - # create a list of the triangles of the initial simplex / tetrahedron - t = np.array(t) - self.tris = np.array([t[[0, 1, 2]], t[[0, 1, 3]], t[[0, 2, 3]], t[[1, 2, 3]]]) - - # orient the triangle surface planes outwards from the centroid - self.centroid = np.mean([self.verts[i].pos for i in t], axis=0) - for i, tri in enumerate(self.tris): - self.tris[i, :] = self._flip_plane(tri) - - def _add_vertex_to_hull(self, idx_new_vert: int) -> None: - """ - Add a vertex to the convex hull and update the list of triangles in the hull - """ - # compute the centroid of the current convex hull - self.centroid = np.mean( - [self.verts[i].pos for i in np.unique(self.tris)], axis=0 - ) - - tris_new = [] - visible = [] - - # find which hull surfaces are visible from the new vertex - for i, tri in enumerate(self.tris): - if self._vertex_dist(tri, idx_new_vert) > -1e-6: - visible.append(i) - else: - tris_new.append(tri) - - tris_new = np.array(tris_new) - visible = np.array(visible, dtype=int) - - # find edges of the visible hull surfaces - max_vert = np.amax(self.tris[visible]) + 1 - counter = np.zeros([max_vert, max_vert]) - for i, tri in enumerate(self.tris[visible]): - surface = np.append(tri, tri[0]) - for n in range(3): - a = surface[n] - b = surface[n + 1] - counter[a, b] = counter[a, b] + 1 - - counter += counter.T - - edges = [] - for a in range(max_vert - 1): - for b in range(a + 1, max_vert): - if counter[a, b] == 1: - edges.append([a, b]) - edges = np.vstack(edges) - - # break the edges visible from the new vertex and add the new triangle - for e in edges: - tris_new = np.vstack( - [tris_new, self._flip_plane(np.append(e, idx_new_vert))] - ) - - # update the list of triangles in the convex hull - self.tris = tris_new - - def _remap_ghost_speakers(self) -> None: - """ - Remove unused ghost speakers and compute a downmix matrix for the rest - """ - # find ghosts that are not part of the convex hull - ghosts = [i for i, v in enumerate(self.verts) if v.is_ghost] - unused_ghosts = np.compress( - np.isin(ghosts, np.unique(self.tris), invert=True), ghosts - ) - - if unused_ghosts.size > 0: - # remove the unused ghosts from the triangle array and also adjust indices - self.tris[self.tris > unused_ghosts.min()] -= unused_ghosts.size - # delete them from the vertex array - self.verts = np.delete(self.verts, unused_ghosts) - - # generate initial sound energy distribution matrix - n_vtx = len(self.verts) - n_ghost = len(ghosts) - len(unused_ghosts) - - M = np.eye(n_vtx) - for i, v in enumerate(self.verts): - if v.is_ghost: - neighbours = self._get_neighbours(i) - M[:, i] = np.zeros(n_vtx) - M[neighbours, i] = np.ones(len(neighbours)) / len(neighbours) - - # re-distribute sound energy from ghosts - M2 = M.copy() - for i, v in enumerate(self.verts): - if v.is_ghost: - vec = M[:, i] - while np.sum(vec[-n_ghost:]) > 1e-4: - vec = M @ vec - M2[:, i] = vec - - self.dmx_mat = M2[:-n_ghost, :] - - # amplitude downmix for real loudspeakers - self.dmx_mat[:, :-n_ghost] = np.sqrt(self.dmx_mat[:, :-n_ghost]) - - # distribute ghosts according to downmix type - for i, v in enumerate(self.verts): - if v.is_ghost: - if v.dmx_type == EfapDmxType.NONE: - self.dmx_mat[:, i] = 0 - elif v.dmx_type == EfapDmxType.AMPLITUDE: - pass - else: - self.dmx_mat[:, i] = np.sqrt(self.dmx_mat[:, i]) - - def _tri2poly(self) -> None: - """ - Merge hull triangles into polygons if they are coplanar - """ - polys = [] - - for tri in self.tris: - # find all vertices coplanar with this triangle (including those already in the triangle) - new_poly = np.array( - [ - i - for i, _ in enumerate(self.verts) - if np.abs(self._vertex_dist(tri, i)) < self._EFAP_THRESH_COPLANAR - ] - ) - - # check if we already found this polygon as a complete subset - is_subset = [ - i for i, poly in enumerate(polys) if np.all(np.isin(new_poly, poly)) - ] - is_superset = [ - i for i, poly in enumerate(polys) if np.all(np.isin(poly, new_poly)) - ] - - if is_subset: - continue - elif is_superset: - # remove the other polygon since it will be replaced by the superset polygon - polys_new = [p for i, p in enumerate(polys) if i not in is_superset] - polys = polys_new - - # orient the polygon plane in the same direction as the triangle - P1 = self.verts[tri[0]].pos - P2 = self.verts[tri[1]].pos - P3 = self.verts[tri[2]].pos - - # first base vector - U = P2 - P1 - U = U / np.linalg.norm(U) - - # second base vector - V = P3 - P2 - V = V - np.dot(U, V) * U - V = V / np.linalg.norm(V) - - # center of the first triangle - M = np.mean([P1, P2, P3], axis=0) - - # sort vertices - azi = np.zeros_like(new_poly, dtype=float) - for i, idx_v in enumerate(new_poly): - P = self.verts[idx_v].pos - M - X = np.dot(P, U) - Y = np.dot(P, V) - azi[i] = np.arctan2(Y, X) - - idx = np.argsort(azi) - new_poly = new_poly[idx] - - # add the polygon to the main list - polys.append(new_poly) - - self.polys = polys - - def _pan_EFAP_poly( - self, azimuth: float, elevation: float, poly: np.ndarray, mod: int - ) -> np.ndarray: - """ - Compute panning gains for each vertex in the given polygon - - - Parameters - ---------- - azimuth : float - Azimuth of requested panning position - elevation : float - Elevation of requested panning position - poly : np.ndarray - Array of vertices defining the polygon - - Returns - ------- - poly_gain: np.ndarray - Gains for each vertex in the polygon - """ - poly_gain = np.zeros_like(poly, dtype=float) - - P = np.array([azimuth, elevation]) - # search for the triangle of the polygon in which P belongs - for i in range(1, poly.size + 1): - A = np.array([self.verts[poly[i - 1]].azi, self.verts[poly[i - 1]].ele]) - for j in range(i, poly.size - 2 + i): - idx1 = 1 + (j % poly.size) - idx2 = 1 + (idx1 % poly.size) - B = np.array( - [self.verts[poly[idx1 - 1]].azi, self.verts[poly[idx1 - 1]].ele] - ) - C = np.array( - [self.verts[poly[idx2 - 1]].azi, self.verts[poly[idx2 - 1]].ele] - ) - - if mod: - if not np.isnan(A[0]): - A[0] %= mod - if not np.isnan(B[0]): - B[0] %= mod - if not np.isnan(C[0]): - C[0] %= mod - - if self._in_triangle(P, A, B, C): - N = np.transpose([B[1] - C[1], C[0] - B[0]]) - N = N / np.dot(N, B - A) - poly_gain[i - 1] = 1 - np.dot(P - A, N) - - """ DEBUGGING / TODO """ - # set gains <= -60dB to 0 - poly_gain[np.abs(poly_gain) < 1e-6] = 0 - - return poly_gain - - """ geometric / math helper functions """ - - def _get_neighbours(self, idx_vert: int) -> np.ndarray: - """ - Find triangles containing the given vertex index (neighbouring vertices) - """ - n = self.tris[np.any(np.isin(self.tris, idx_vert), axis=1)] - return np.unique(n[n != idx_vert]) - - def _get_azi_ele(self, idx_vert: int) -> Tuple[float, float]: - """ - Return a tuple of (azi, ele) for a vertex at the given index - """ - return self.verts[idx_vert].azi, self.verts[idx_vert].ele - - def _in_polygon( - self, azimuth: float, elevation: float, poly: np.ndarray - ) -> Tuple[bool, int]: - """ - Determine whether the panning position lies within the given polygon - by iteratively checking its triangles - - Parameters - ---------- - azimuth : float - Azimuth of requested panning position - elevation : float - Elevation of requested panning position - poly : np.ndarray - Array of vertices defining the polygon - - Returns - ------- - in_polygon, mod: Tuple[bool, int] - Flag indicating whether the point is inside the given polygon - Value of wrapping required if used - """ - azi = [self.verts[v].azi for v in poly] - - P = np.array([azimuth, elevation]) - - for tri in combinations(poly, 3): - A = np.array(self._get_azi_ele(tri[0])) - B = np.array(self._get_azi_ele(tri[1])) - C = np.array(self._get_azi_ele(tri[2])) - if self._in_triangle(P, A, B, C): - return True, None - - # if the azimuth difference is large, perform the 2D check again with azimuths wrapped to (-360, 0] and [0, 360) - if np.nanmax(azi) - np.nanmin(azi) > 180: - for tri in combinations(poly, 3): - A = np.array(self._get_azi_ele(tri[0])) - B = np.array(self._get_azi_ele(tri[1])) - C = np.array(self._get_azi_ele(tri[2])) - if not np.isnan(A[0]): - A[0] %= 360 - if not np.isnan(B[0]): - B[0] %= 360 - if not np.isnan(C[0]): - C[0] %= 360 - if self._in_triangle(P, A, B, C): - return True, 360 - - for tri in combinations(poly, 3): - A = np.array(self._get_azi_ele(tri[0])) - B = np.array(self._get_azi_ele(tri[1])) - C = np.array(self._get_azi_ele(tri[2])) - if not np.isnan(A[0]): - A[0] %= -360 - if not np.isnan(B[0]): - B[0] %= -360 - if not np.isnan(C[0]): - C[0] %= -360 - if self._in_triangle(P, A, B, C): - return True, -360 - - return False, None - - def _in_triangle( - self, P: np.ndarray, A: np.ndarray, B: np.ndarray, C: np.ndarray - ) -> bool: - """ - Determine whether the panning position lies within the given triangle - - Parameters - ---------- - P : float - Point under test - A : float - First vertex of the triangle - B : float - Second vertex of the triangle - C : float - Third vertex of the triangle - - - Returns - ------- - bool - Flag indicating whether the point is inside the given triangle - """ - if np.isnan(A[0]): - A[0] = P[0] - - if np.isnan(B[0]): - B[0] = P[0] - - if np.isnan(C[0]): - C[0] = P[0] - - tmpMat = np.transpose([B - A, C - A]) - if (1 / np.linalg.cond(tmpMat)) < self._EFAP_THRESH_TRI: - return False - - Minv = np.linalg.inv(tmpMat) - S = Minv @ (P - A) - - if ( - S[0] < -self._EFAP_THRESH_TRI - or S[1] < -self._EFAP_THRESH_TRI - or S[0] + S[1] > 1 + self._EFAP_THRESH_TRI - ): - return False - - return True - - def _vertex_dist(self, surface: np.ndarray, idx_vert: int) -> float: - """ - Compute the distance of a vertex from a given plane - - Parameters - ---------- - surface : np.ndarray - Array of 3 ordered vertices defining the plane and its orientation - idx_vert: int - Index of the vertex to compute the distance for - - Returns - ------- - float - Distance of the vertex from the given plane - """ - return self._point_plane_dist( - self.verts[surface[0]].pos, - self.verts[surface[1]].pos, - self.verts[surface[2]].pos, - self.verts[idx_vert].pos, - ) - - def _point_plane_dist( - self, P1: np.ndarray, P2: np.ndarray, P3: np.ndarray, X: np.ndarray - ) -> float: - """ - Compute the distance of a vertex from a plane defined by three points - - Parameters - ---------- - P1 : np.ndarray - Cartesian coordinates of the first point - P2 : np.ndarray - Cartesian coordinates of the second point - P3 : np.ndarray - Cartesian coordinates of the third point - X: np.ndarray - Cartesian coordinates of the vertex - - Returns - ------- - float - Distance of the vertex from the given plane - """ - - if np.all(X == P1) or np.all(X == P2) or np.all(X == P3): - return 0 - else: - N = np.cross(P1 - P2, P1 - P3) - return np.dot(X - P1, N / np.linalg.norm(N)) - - def _flip_plane(self, surface: np.ndarray) -> np.ndarray: - """ - Flip the orientation of a plane (invert normal vector) - - Parameters - ---------- - surface : np.ndarray - Array of 3 ordered vertices defining the plane and its orientation - - Returns - ------- - surface : np.ndarray - Reordered vertices with plane normal pointing outwards from the hull centroid - """ - if ( - self._point_plane_dist( - self.verts[surface[0]].pos, - self.verts[surface[1]].pos, - self.verts[surface[2]].pos, - self.centroid, - ) - > 0 - ): - surface = np.flip(surface.copy()) - - return surface - - def _compute_gains_point(self, azimuth: float, elevation: float) -> np.ndarray: - """ - Compute gains for the requested panning position - - - Parameters - ---------- - azimuth : float - Azimuth of requested panning position - elevation : float - Elevation of requested panning position - - Returns - ------- - gains: np.ndarray - Panning gains for the loudspeaker layout - """ - if np.isnan(azimuth) or np.isnan(elevation): - raise ValueError(f"Angles cannot be NaNs : ({azimuth}, {elevation})") - - azimuth, elevation = wrap_angles(azimuth, elevation) - point_pos = [ - np.cos(np.deg2rad(azimuth)) * np.cos(np.deg2rad(elevation)), - np.sin(np.deg2rad(azimuth)) * np.cos(np.deg2rad(elevation)), - np.sin(np.deg2rad(elevation)), - ] - - # filter the polygon list with a quick 2d check - found_polys = [] - for poly in self.polys: - in_poly, mod = self._in_polygon(azimuth, elevation, poly) - if in_poly: - found_polys.append((poly, mod)) - - if not found_polys: - raise AssertionError("Unexpected error during panning") - - # find a visible polygon with the smallest distance - dist = [] - - for poly, mod in found_polys: - surface = self.verts[poly] - d = self._point_plane_dist( - surface[0].pos, - surface[1].pos, - surface[2].pos, - point_pos, - ) - if d >= 0: - dist.append(d) - else: - dist.append(np.inf) - - found_poly, mod = found_polys[np.argmin(dist)] - - # compute gains for the polygon vertices - poly_gain = self._pan_EFAP_poly(azimuth, elevation, found_poly, mod) - - # downmix ghost loudspeakers - gains = np.zeros(self.verts.size) - gains[found_poly] = poly_gain / np.linalg.norm(poly_gain) - gains = gains @ self.dmx_mat.T - gains = gains / np.linalg.norm(gains) - - if self.intensity_panning: - gains = np.sqrt(gains / np.sum(gains)) - - return gains - - """ public functions """ - - def pan( - self, azimuths: float, elevations: float, intensity_panning: bool = False - ) -> np.ndarray: - """ - Compute gains for the requested panning position - - - Parameters - ---------- - azimuth : float - Azimuth of requested panning position - elevation : float - Elevation of requested panning position - intensity_panning : bool - Flag whether to use intensity panning (Default is False == amplitude panning) - - Returns - ------- - gains: np.ndarray - Panning gains for the loudspeaker layout - """ - azimuths = np.array(azimuths) - elevations = np.array(elevations) - if azimuths.size == 1 and elevations.size == 1: - return self._compute_gains_point(azimuths, elevations) - elif np.squeeze(azimuths).ndim == 1 and np.squeeze(elevations).ndim == 1: - gains = [] - for a, e in zip(azimuths, elevations): - gains.append(self._compute_gains_point(a, e)) - return np.vstack(gains) - else: - raise ValueError( - "Azimuth and Elevation arrays cannot have more than one dimension and must be of equal size" - ) - - -def main(args): - """ - Parses a speaker layout text file and prints the panning gains - for the requested position - - - Parameters - ---------- - args : tuple - Command line arguments - - """ - - speaker_positions = np.loadtxt( - os.path.abspath(args.input), delimiter=",", max_rows=2 - ) - panner = EFAP(speaker_positions[0, :], speaker_positions[1, :], args.efip) - print(panner.pan(args.azimuth, args.elevation)) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Edge-Fading Amplitude Panning") - parser.add_argument( - "-i", - "--input", - metavar="layout_file", - required=True, - type=str, - help="IVAS compatible loudspeaker layout file (Loudspeaker azimuths in first line, elevations in second, subsequent lines are ignored)", - ) - parser.add_argument( - "-efip", - "-intensity_panning", - default=False, - action="store_true", - help="Intensity panning mode (EFIP)", - ) - parser.add_argument( - "azimuth", - type=float, - help="Azimuth of direction to compute panning gains for (positive-left)", - ) - parser.add_argument( - "elevation", - type=float, - help="Elevation of direction to compute panning gains for (positive-up)", - ) - args = parser.parse_args() - main(args) diff --git a/scripts/pyaudio3dtools/HRIRs_mat/ORANGE_HRIR_53_48000_SBA3.mat b/scripts/pyaudio3dtools/HRIRs_mat/ORANGE_HRIR_53_48000_SBA3.mat deleted file mode 100644 index 0d113a34af..0000000000 --- a/scripts/pyaudio3dtools/HRIRs_mat/ORANGE_HRIR_53_48000_SBA3.mat +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:02c8a25178b36399054c1802f00bb5a8739f3ac950c21b0c760c046b1dba530d -size 36201 diff --git a/scripts/pyaudio3dtools/HRIRs_mat/ORANGE_HRIR_53_48000_combined.mat b/scripts/pyaudio3dtools/HRIRs_mat/ORANGE_HRIR_53_48000_combined.mat deleted file mode 100644 index e52e031e8c..0000000000 --- a/scripts/pyaudio3dtools/HRIRs_mat/ORANGE_HRIR_53_48000_combined.mat +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9660be83192f7babb4f67e19653a94bc02cee7b3071065880cf618547c19d842 -size 20138 diff --git a/scripts/pyaudio3dtools/HRIRs_mat/ORANGE_HRIR_53_48000_full.mat b/scripts/pyaudio3dtools/HRIRs_mat/ORANGE_HRIR_53_48000_full.mat deleted file mode 100644 index f2c22c39ec..0000000000 --- a/scripts/pyaudio3dtools/HRIRs_mat/ORANGE_HRIR_53_48000_full.mat +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:05ae461fc303c8498d5912ebe37cd1601c077c2505baf3da3dbe12b37e8f2cf0 -size 14097574 diff --git a/scripts/pyaudio3dtools/__init__.py b/scripts/pyaudio3dtools/__init__.py deleted file mode 100644 index 9870fb6620..0000000000 --- a/scripts/pyaudio3dtools/__init__.py +++ /dev/null @@ -1,52 +0,0 @@ -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -""" -pyaudio3dtools -==== - -Provides - Basic methods for handling 3D audio in different formats (channel-based, object-based, Ambisonics) - -Imports -------- -functions -class -""" -from . import ( - audioarray, - audiofile, - binauralrenderer, - hoadecoder, - spatialaudioconvert, - spatialaudioformat, - spatialmetadata, -) -from .EFAP import EFAP diff --git a/scripts/pyaudio3dtools/audio3dtools.py b/scripts/pyaudio3dtools/audio3dtools.py deleted file mode 100755 index e6c03d7b73..0000000000 --- a/scripts/pyaudio3dtools/audio3dtools.py +++ /dev/null @@ -1,291 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import argparse -import logging -import os - -from pyaudio3dtools import ( - audiofile, - binauralrenderer, - spatialaudioconvert, - spatialaudioformat, -) - -main_logger = logging.getLogger("__main__") -logger = main_logger.getChild(__name__) -logger.setLevel(logging.DEBUG) - - -def main(): - parser = argparse.ArgumentParser( - description="Audio3DTools: Convert/Manipulate spatial audio files." - ) - - """ Required arguments """ - parser.add_argument( - "-i", - "--infiles", - required=True, - type=str, - help="input file *.wav or *.pcm or directory", - default=None, - ) - parser.add_argument( - "-o", - "--outdir", - required=True, - type=str, - help="output file *.wav or directory", - default="out", - ) - parser.add_argument( - "-f", - "--informat", - required=True, - type=str, - metavar="INFORMAT", - help="Input format (use -l/-L for a list)", - default=None, - ) - - """ Additional arguments """ - parser.add_argument( - "-F", - "--outformat", - type=str, - metavar="OUTFORMAT", - help="Output format (default = %(default)s, same as input format). Can be a custom loudspeaker layout file.", - default=None, - ) - parser.add_argument( - "-s", - "--infs", - type=int, - help="Input sampling rate (Hz) (default = %(default)s, deduced for input file)", - default=None, - ) - parser.add_argument( - "-S", - "--outfs", - type=int, - help="Output sampling rate (Hz) (default = %(default)s, same as input)", - default=None, - ) - parser.add_argument( - "-c", - "--inchan", - type=int, - help="Input number of channels (default = %(default)s, deduced for input file)", - default=None, - ) - parser.add_argument( - "-m", - "--metadata", - type=str, - nargs="+", - help="list of input metadata files (only relevant for ISM and MASA input)", - default=None, - ) - parser.add_argument( - "-fc", - "--outfc", - type=int, - help="Cut-off freq for eventual low-pass filtering (default = %(default)s)", - default=None, - ) - parser.add_argument( - "-T", - "--trajectory", - type=str, - help="Head-tracking trajectory file (default = %(default)s)", - default=None, - ) - parser.add_argument( - "-n", - "--normalize", - default=None, - type=int, - help="Normalize to given loudness with --LOUDNESS_TOOL (default = %(default)s)", - ) - - """ Miscellaneous or meta arguments """ - parser.add_argument( - "-b", - "--binaural", - help="Binauralize output *in addition to converting to output format", - action="store_true", - ) - parser.add_argument( - "--binaural_dataset", - type=str, - help="Dataset to use for binaural rendering (default = %(default)s)", - choices=["orange51", "orange52", "orange53", "orange54", "sadie"], - default="orange53", - ) - parser.add_argument( - "-l", - "--list", - help="list all supported spatial audio formats", - action="store_true", - ) - parser.add_argument( - "-L", - "--long", - help="list all supported spatial audio formats with long description", - action="store_true", - ) - parser.add_argument( - "-lt", - "--loudness_tool", - default="bs1770demo", - type=str, - help="Loudness tool to use: bs1770demo [default] or sv56demo (tool must be in $PATH or a path to the binary)", - ) - parser.add_argument( - "-rn", - "--dont-rename", - help="Disable default behaviour of renaming output files _.", - action="store_true", - ) - args = parser.parse_args() - - # Set up logging handlers - console_handler = logging.StreamHandler() - console_handler.setLevel(logging.INFO) - console_handler.setFormatter(logging.Formatter("%(message)s")) - - # Configure loggers - LOGGER_FORMAT = "%(asctime)s | %(name)-12s | %(levelname)-8s | %(message)s" - LOGGER_DATEFMT = "%m-%d %H:%M" - logging.basicConfig( - format=LOGGER_FORMAT, - datefmt=LOGGER_DATEFMT, - level=logging.INFO, - handlers=[console_handler], - ) - logger.info("Audio3DTools") - logger.info( - "Attention: you are using an older version of the pyaudio3dtools scripts (not including ISM-> binaural reference renderer or loudness tool)" - ) - logger.info("For the newest version see branch python_scripts_updates") - - if args.list is True or args.long is True: - logger.info("===Supported spatial audio formats===") - spatialaudioformat.Format.list_all(args.long) - - elif args.infiles is not None: - logger.info("===Convert spatial audio file===") - # Input folder can be a path, a file or a list of files - if os.path.isdir(args.infiles): - path = args.infiles - audio_list = [ - os.path.join(path, f) for f in os.listdir(path) if f.endswith((".wav")) - ] - else: - audio_list = [args.infiles] - - outdir = args.outdir - _, output_ext = os.path.splitext(os.path.basename(outdir)) - if (len(audio_list) == 1) and ( - (output_ext.lower() == ".wav") or (output_ext.lower() == ".pcm") - ): - outfile = outdir - else: - outfile = None - if not os.path.exists(outdir): - os.makedirs(outdir) - - for infile in audio_list: - logger.info(f" process {infile}") - - _, input_ext = os.path.splitext(os.path.basename(infile)) - - if outfile is None: - outfile = os.path.basename(infile) - if not args.dont_rename: - if args.outformat is not None: - outfile = outfile.replace(input_ext, f"_{args.outformat}.wav") - else: - outfile = outfile.replace(input_ext, ".out.wav") - outfile = os.path.join(outdir, outfile) - - spatialaudioconvert.spatial_audio_convert( - infile, - outfile, - in_format=args.informat, - in_fs=args.infs, - in_nchans=args.inchan, - in_meta_files=args.metadata, - out_format=args.outformat, - out_fs=args.outfs, - out_fc=args.outfc, - output_loudness=args.normalize, - loudness_tool=args.loudness_tool, - trajectory=args.trajectory, - binaural_dataset=args.binaural_dataset, - ) - - logger.info(f" Output {outfile}") - - if args.binaural: - if args.outformat.startswith("BINAURAL"): - raise SystemExit( - "BINAURAL output format can not be binauralized again!" - ) - - _, output_ext = os.path.splitext(os.path.basename(outfile)) - outfile_bin = outfile.replace(output_ext, "_BINAURAL.wav") - logger.info(f" Output binaural {outfile_bin}") - - spatialaudioconvert.spatial_audio_convert( - in_file=outfile, - out_file=outfile_bin, - in_format=args.outformat, - in_fs=args.outfs, - in_meta_files=args.metadata, - out_format="BINAURAL", - output_loudness=args.normalize, - loudness_tool=args.loudness_tool, - trajectory=args.trajectory, - binaural_dataset=args.binaural_dataset, - ) - - outfile = None - else: - raise Exception( - "Input file must be provided for conversion and audio manipulation." - ) - - -if __name__ == "__main__": - main() diff --git a/scripts/pyaudio3dtools/audioarray.py b/scripts/pyaudio3dtools/audioarray.py deleted file mode 100644 index 87fc50b463..0000000000 --- a/scripts/pyaudio3dtools/audioarray.py +++ /dev/null @@ -1,473 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import logging -import math -from typing import Callable, Iterable, Optional, Tuple - -import numpy as np -import multiprocessing as mp -import scipy.signal as sig - -main_logger = logging.getLogger("__main__") -logger = main_logger.getChild(__name__) -logger.setLevel(logging.DEBUG) - - -def convert( - x: np.ndarray, - out_nchans: Optional[int] = None, - in_fs: Optional[int] = None, - out_fs: Optional[int] = None, -) -> np.ndarray: - """Convert audio array, change nchannels and sampling rate - - Parameters - ---------- - x: numpy array - Input audio array - out_nchans: Optional[int] - Number of output channels, default out_nchans = in_nchans - in_fs: Optional[int] - Input sampling rate, default 48 kHz if required - out_fs: Optional[int] - Output sampling rate, default out_fs = in_fs - - Returns - ------- - y: numpy array - Ouput converted array - - """ - # Input config - if in_fs is None: - in_fs = 48000 - in_nchans = x.shape[1] - - # Output config - y = x - if out_fs is None: - out_fs = in_fs - if out_nchans is None: - out_nchans = in_nchans - - logger.debug(f"Input audio array: {x.shape[0]} by {x.shape[1]}") - - # Process - if in_nchans == out_nchans and in_fs == out_fs: - logger.debug("Convert file: nothing to be done") - else: - # adjust number of channels - if out_nchans < in_nchans: - y = y[:, 0:out_nchans] - elif out_nchans > in_nchans: - y = np.append(y, np.zeros([y.shape[0], out_nchans - in_nchans]), axis=1) - - # adjust sampling rate - y = resample(y, in_fs, out_fs) - - logger.debug(f"Output audio array: {y.shape[0]} by {y.shape[1]}") - - return y - - -def resample(x: np.ndarray, in_freq: int, out_freq: int) -> np.ndarray: - """Resample a multi-channel audio array - - Parameters - ---------- - x: numpy array - Input array - in_fs: int - Input sampling rate - out_fs: int - Output sampling rate - - Returns - ------- - y: - Output resampled numpy array - - """ - - if in_freq == out_freq or out_freq is None: - y = x - else: - # get gcd of original and deisred frequency - gcd = math.gcd(in_freq, out_freq) - - # calculate up-sampling factor - up_factor = int(out_freq / gcd) - - # calculate downsampling factor - down_factor = int(in_freq / gcd) - - # resample data using polyphase filtering across columns/channels - if x.ndim == 2: - y = sig.resample_poly(x[:, 0], up_factor, down_factor) - y = np.reshape(y, (y.shape[0], 1)) - for k in range(1, x.shape[1]): - a = sig.resample_poly(x[:, k], up_factor, down_factor) - a = np.reshape(a, (a.shape[0], 1)) - y = np.append(y, a, axis=1) - else: - y = sig.resample_poly(x, up_factor, down_factor) - - return y - - -def lpfilter(x: np.ndarray, fc: int, fs: int) -> np.ndarray: - """Low-pass filter a multi-channel audio array - - Parameters - ---------- - x: numpy array - Input array - fc: int - Cutoff frequency in Hz - out_fs: int - Sampling rate in Hz - - Returns - ------- - y: numpy array - Output low-pass filtered array - - """ - if (fc + 500) < (fs / 2.0): - # Design a Chebychev Type II filter, band_pass-band_stop = 500 Hz - N, Wn = sig.cheb2ord(fc / (fs / 2), (fc + 500) / (fs / 2), 3, 60) - b, a = sig.cheby2(N, 60, Wn, "low") - - # Apply the Butterworth filter for each channels, across time axis - # y = sig.lfilter(b, a, axis=0) # non zero-phase filter - y = sig.filtfilt(b, a, x, axis=0) # zero-phae filer, batch processing - else: - y = x - - return y - - -def cut(x: np.ndarray, limits: Tuple[int, int]) -> np.ndarray: - """Cut an audio array - - Parameters - ---------- - x: numpy array - Input array - limits: Tuple[int, int] - first and last samples to extract - - Returns - ------- - y: numpy array - Output cut array - """ - - in_samples, in_channels = x.shape - first_sample = limits[0] - last_sample = limits[1] - - if first_sample == 0 and (last_sample == -1 or last_sample == in_samples): - y = x - else: - if last_sample == -1: - last_sample = in_samples - - signal_start = first_sample - signal_end = last_sample - insert_start = 0 - insert_end = last_sample - first_sample - total_samples = last_sample - first_sample - if first_sample < 0: - samples_to_pad_begin = -first_sample - insert_start = samples_to_pad_begin - insert_end += samples_to_pad_begin - if last_sample > in_samples: - signal_end = in_samples - insert_end = insert_end - last_sample + in_samples - y = np.zeros([total_samples, in_channels], dtype=x.dtype) - y[insert_start:insert_end, :] = x[signal_start:signal_end, :] - - return y - - -def compare(ref: np.ndarray, test: np.ndarray, fs: int, per_frame: bool=True) -> dict: - """Compare two audio arrays - - Parameters - ---------- - ref: numpy array - Input reference array - test: numpy array - Input test array - fs: int - Input sampling rate in Hz - - Returns - ------- - result: dict - Comparison results - """ - framesize = fs // 50 - diff = abs(test - ref) - max_diff = int(diff.max()) - result = { - "bitexact": True, - "max_abs_diff": 0, - "max_abs_diff_pos_sample": 0, - "max_abs_diff_pos_channel": 0, - "nsamples_diff": 0, - "nsamples_diff_percentage": 0.0, - "first_diff_pos_sample": -1, - "first_diff_pos_channel": -1, - "first_diff_pos_frame": -1 - } - if per_frame: - result["max_abs_diff_pos_frame"] = 0 - result["nframes_diff"] = 0 - result["nframes_diff_percentage"] = 0.0 - - if max_diff != 0: - if diff.ndim == 1: - nsamples_total = diff.shape - nchannels = 1 - else: - nsamples_total, nchannels = diff.shape - max_diff_pos = np.nonzero(diff == max_diff) - max_diff_pos = [ - max_diff_pos[0][0], - max_diff_pos[0][0] // framesize, - max_diff_pos[1][0], - ] - - first_diff_pos = np.nonzero(diff) - first_diff_pos = [ - first_diff_pos[0][0], - first_diff_pos[0][0] // framesize, - first_diff_pos[1][0], - ] - - nsamples_diff = np.nonzero(diff)[0].size - nsamples_diff_percentage = nsamples_diff / (nsamples_total * nchannels) * 100.0 - nframes = nsamples_total // framesize - nframes_diff = 0 - - result = { - "bitexact": False, - "max_abs_diff": max_diff, - "max_abs_diff_pos_sample": max_diff_pos[0], - "max_abs_diff_pos_channel": max_diff_pos[2], - "nsamples_diff": nsamples_diff, - "nsamples_diff_percentage": nsamples_diff_percentage, - "first_diff_pos_sample": first_diff_pos[0], - "first_diff_pos_channel": first_diff_pos[2], - "first_diff_pos_frame": first_diff_pos[1], - } - - if per_frame: - for fr in range(nframes): - diff_fr = diff[fr * framesize : ((fr + 1) * framesize), :] - nframes_diff += 1 if diff_fr.nonzero()[0].size > 0 else 0 - nframes_diff_percentage = nframes_diff / nframes * 100.0 - result["max_abs_diff_pos_frame"] = max_diff_pos[1] - result["nframes_diff"] = nframes_diff - result["nframes_diff_percentage"] = nframes_diff_percentage - - return result - - -def getdelay(x: np.ndarray, y: np.ndarray) -> int: - """Get the delay between two audio signals - - Parameters - ---------- - x: numpy array - Input reference array - y: numpy array - Input test array - - Returns - ------- - result: int - delay of y in samples with respect to x (median of individual channel delays) - """ - if x.ndim == 1: - n_samples_x = x.shape - n_chan_x = 1 - else: - n_samples_x, n_chan_x = x.shape - if y.ndim == 1: - n_samples_y = y.shape - n_chan_y = 1 - else: - n_samples_y, n_chan_y = y.shape - if n_chan_x != n_chan_y: - raise ValueError - lags = np.arange(-n_samples_x + 1, n_samples_y) - lag = np.zeros([n_chan_x, 1], dtype=int) - for chan in range(n_chan_x): - correlation = sig.correlate(y[:, chan], x[:, chan], mode="full") - lag[chan] = lags[np.argmax(correlation)] - return int(np.median(lag)) - - -def limiter(x: np.ndarray, fs: int): - """Apply limiting to an audio signal - - Parameters - ---------- - x: numpy array - Input reference array - fs: int - Input sampling frequency - - Returns - ------- - None - """ - limiter_threshold = 32729 # -0.01dB FS - limiter_attack_seconds = 0.005 - attack_constant = 0.01 ** (1.0 / (limiter_attack_seconds * fs)) - release_heuristics_mem = 0.0 - gain = 1.0 - strong_saturation_cnt = 0 - - if x.ndim == 1: - n_samples_x = x.shape - n_chan_x = 1 - else: - n_samples_x, n_chan_x = x.shape - # framing - framesize = fs // 50 - nframes = n_samples_x // framesize - for fr in range(nframes): - apply_limiting = True - fr_sig = x[fr * framesize : ((fr + 1) * framesize), :] - sig_max = np.absolute(fr_sig).max() - release_heuristic = release_heuristics_mem - if sig_max > limiter_threshold: - frame_gain = limiter_threshold / sig_max - release_heuristic = min(1.0, release_heuristic + (4.0 * framesize / fs)) - else: - release_heuristic = max(0.0, release_heuristic - (framesize / fs)) - if gain >= 1.0 - 1e-10: - apply_limiting = False - - frame_gain = 1.0 - - if sig_max > 3 * limiter_threshold and strong_saturation_cnt > 0: - apply_strong_limiting = True - elif sig_max > 10 * limiter_threshold: - strong_saturation_cnt += 20 - apply_strong_limiting = True - else: - strong_saturation_cnt -= 1 - if strong_saturation_cnt < 0: - strong_saturation_cnt = 0 - apply_strong_limiting = False - - if apply_strong_limiting is True: - if frame_gain < 0.3: - frame_gain /= 3.0 - else: - apply_strong_limiting = False - - if frame_gain < 0.1 and apply_strong_limiting is False: - frame_gain = 0.1 - - if apply_limiting is True: - if frame_gain < gain: - fac = attack_constant ** (np.arange(1, framesize + 1, dtype=np.float32)) - else: - release_constant = 0.01 ** ( - 1.0 / (0.005 * (200.0**release_heuristic) * fs) - ) - fac = release_constant ** ( - np.arange(1, framesize + 1, dtype=np.float32) - ) - - fr_gain = np.tile(gain * fac + frame_gain * (1.0 - fac), (n_chan_x, 1)).T - fr_sig *= fr_gain - gain = fr_gain[-1, 0] - else: - gain = 1.0 - - release_heuristics_mem = release_heuristic - # hard limiting for everything that still sticks out - idx_max = np.where(fr_sig > 32767) - fr_sig[idx_max] = 32767 - idx_min = np.where(fr_sig < -32768) - fr_sig[idx_min] = -32768 - - -def get_framewise(x: np.ndarray, chunk_size: int, zero_pad=False) -> np.ndarray: - """Generator to yield a signal frame by frame - If array size is not a multiple of chunk_size, last frame contains the remainder - - Parameters - ---------- - x: numpy array - Input reference array - chunk_size: int - Size of frames to yield - zero_pad: bool - Whether to zero pad the last chunk if there are not enough samples - - Yields - ------- - frame : np.ndarray - One frame of the input audio signal - """ - n_frames = x.shape[0] // chunk_size - for i in range(n_frames): - yield x[i * chunk_size : (i + 1) * chunk_size, :] - if x.shape[0] % chunk_size: - last_chunk = x[n_frames * chunk_size :, :] - if zero_pad: - yield np.pad(last_chunk, [[0, chunk_size - (x.shape[0] % chunk_size)], [0, 0]]) - else: - yield last_chunk - - -def process_async(files: Iterable, func: Callable, **kwargs): - """Applies a function asynchronously to an array of audio files/filenames using a multiprocessing pool""" - - p = mp.pool(mp.cpu_count()) - results = [] - for f in files: - results.append(p.apply_async(func, args=(f, kwargs))) - p.close() - p.join() - for r in results: - r.get() - return results diff --git a/scripts/pyaudio3dtools/audiofile.py b/scripts/pyaudio3dtools/audiofile.py deleted file mode 100644 index 77be42285f..0000000000 --- a/scripts/pyaudio3dtools/audiofile.py +++ /dev/null @@ -1,809 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import os -import platform -import shutil -import struct -import subprocess as sp -import warnings -from importlib import import_module -from tempfile import TemporaryDirectory -from typing import Optional, Tuple - -import numpy as np -import scipy.io.wavfile as wav - -from pyaudio3dtools import audioarray, spatialaudioformat - - -def readfile( - filename: str, nchannels: int = 1, fs: int = 48000, outdtype="float" -) -> Tuple[np.ndarray, int]: - """Read audio file (.pcm or .wav) - - Parameters - ---------- - filename: str - Input file path - nchannels: Optional[int] - Number of input channels, required for .pcm otherwise default = 1 - fs: Optional[int] - Input sampling rate, required for .pcm input file, otherwise default = 48000 (Hz) - outdtype: Optional[int] - Data type of output array, python builtin or np.dtype - - Returns - ------- - x: np array - audio signal array - fs: int - signal sampling frequency - - """ - _, file_extension = os.path.splitext(os.path.basename(filename)) - - if file_extension == ".wav": - fs, data = wav.read(filename) - if data.dtype == np.int32: - data = np.interp( - data, - (np.iinfo(np.int32).min, np.iinfo(np.int32).max), - (np.iinfo(np.int16).min, np.iinfo(np.int16).max), - ) - elif data.dtype == np.float32: - data = np.interp( - data, - (-1, 1), - (np.iinfo(np.int16).min, np.iinfo(np.int16).max), - ) - x = np.array(data, dtype=outdtype) - file_len = x.shape[0] - if x.ndim == 1: - # force to be a mtx - x = np.reshape(x, (file_len, 1)) - elif file_extension == ".pcm" or file_extension == ".raw": - x = np.fromfile(filename, dtype=np.int16).astype(outdtype) - signal_len = len(x) // nchannels - x = x.reshape(signal_len, nchannels) - else: - raise ValueError("Wrong input format. Use wav or pcm") - - return x, fs - - -def writefile(filename: str, x: np.ndarray, fs: int = 48000) -> None: - """Write audio file (.pcm or .wav) - - Parameters - ---------- - filename: str - Output file path (.pcm or .wav) - x: np array - Numpy 2D array of dimension: number of samples x number of channels - fs: Optional[int] - Output sampling rate, required for .pcm input file, otherwise default = 48000 (Hz) - - Returns - ------- - None - - """ - _, file_extension = os.path.splitext(os.path.basename(filename)) - - clipped_samples = np.sum( - np.logical_or(x < np.iinfo(np.int16).min, x > np.iinfo(np.int16).max) - ) - if clipped_samples > 0: - warnings.warn(f" Warning: {clipped_samples} samples clipped") - x = np.clip(x, np.iinfo(np.int16).min, np.iinfo(np.int16).max) - - if file_extension == ".wav": - x = x.astype(np.int16) - wav.write(filename, fs, x) - elif file_extension == ".pcm" or file_extension == ".raw": - x = x.astype("int16").reshape(-1, 1) - x.tofile(filename) - else: - raise ValueError("Wrong input format. Use wav or pcm") - - -def convertfile( - in_file: str, - out_file: str, - in_nchans: Optional[int] = None, - out_nchans: Optional[int] = None, - in_fs: Optional[int] = None, - out_fs: Optional[int] = None, - out_len_samples: Optional[int] = None, - verbose: bool = False, -) -> None: - """Convert audio file, can convert wav from/to pcm, change nchannels and sampling rate - - Parameters - ---------- - in_file: str - Input file path - out_file: str - Output file path - in_nchans: Optional[int] - Number of input channels required for .pcm inpout file - out_nchans: Optional[int] - Number of output channels, default out_nchans = in_nchans - in_fs: Optional[int] - Input sampling rate, required for .pcm input file - out_fs: Optional[int] - Output sampling rate, default out_fs = in_fs - out_len_samples: Optional[int] - Cut file to this length in samples. - Adds zeros at the end if bigger than file length. - - Returns - ------- - None - - """ - # Read input file - if in_fs is None: - in_fs = 48000 - if in_nchans is None: - in_nchans = 1 - x, in_fs = readfile(in_file, nchannels=in_nchans, fs=in_fs) - in_nchans = x.shape[1] - in_len_samples = x.shape[0] - - # Configure output file - y = x - if out_fs is None: - out_fs = in_fs - if out_nchans is None: - out_nchans = in_nchans - - if verbose: - print(f"Input file: {in_file}, sampling rate {str(in_fs)} size {str(x.shape)}") - - # Process - if ( - in_file == out_file - and in_nchans == out_nchans - and in_fs == out_fs - and in_len_samples == out_len_samples - ): - if verbose: - print("Convert file: nothing to be done") - else: - y = audioarray.convert(x, out_nchans=out_nchans, in_fs=in_fs, out_fs=out_fs) - - if out_len_samples is None: - out_len_samples = y.shape[0] - y = audioarray.cut(y, (0, out_len_samples)) - - # write/convert wav format - writefile(out_file, y, fs=out_fs) - if verbose: - print( - f"Written output file: {out_file}, sampling rate {str(out_fs)} size {str(y.shape)}" - ) - - -def concatenatefiles( - in_filenames: list, - out_file: str, - silence_pre: int, - silence_post: int, - in_fs: Optional[int] = 48000, - out_fs: Optional[int] = None, -) -> None: - """Horizontally concatenates audio files into one long file - - Parameters - __________ - in_filenames: list - Input list of filenmames (.pcm or .wav) - out_file: str - Output multi-channel audio file name (.pcm or .wav) - in_fs: Optional[int] = 48000 - Input sampling rate, default 48000 Hz - out_fs: Optional[int] = None - Output sampling rate, default out_fs=in_fs - fs: Optional[int] - Output sampling rate, required for .pcm input file, otherwise default = 48000 (Hz) - - Returns - ------- - None - """ - y = None - - if out_fs is None: - out_fs = in_fs - - # Create silence padding arrays - pad_pre = int(silence_pre * in_fs / 1000) - pad_post = int(silence_post * in_fs / 1000) - - # Read input files - for in_file in in_filenames: - x, in_fs = readfile(in_file, fs=in_fs) - - # pad with silence - pre = np.zeros([pad_pre, x.shape[1]]) - post = np.zeros([pad_post, x.shape[1]]) - x = np.concatenate([pre, x, post]) - - if y is None: - y = x - else: - y = np.concatenate([y, x]) - - y = audioarray.resample(y, in_fs, out_fs) - - writefile(out_file, y, fs=out_fs) - - -def combinefiles( - in_filenames: list, - out_file: str, - out_nchans: Optional[int] = None, - in_fs: Optional[int] = 48000, - out_fs: Optional[int] = None, - verbose: bool = False, -) -> None: - """Combines audio files into one multi-channel file - - Parameters - ---------- - in_filenames: list - Input list of filenmames (.pcm or .wav) - out_file: str - Output multi-channel audio file name (.pcm or .wav) - in_fs: Optional[int] = 48000 - Input sampling rate, default 48000 Hz - out_fs: Optional[int] = None - Output sampling rate, default out_fs=in_fs - fs: Optional[int] - Output sampling rate, required for .pcm input file, otherwise default = 48000 (Hz) - - Returns - ------- - None - - """ - - y = None - - if out_fs is None: - out_fs = in_fs - - # Read input files - for in_file in in_filenames: - # assign correct channel - x, in_fs = readfile(in_file, fs=in_fs) - if y is None: - y = x - else: - if x.shape[0] > y.shape[0]: - x = x[: y.shape[0], :] - elif y.shape[0] > x.shape[0]: - y = y[: x.shape[0], :] - y = np.column_stack([y, x]) - - y = audioarray.resample(y, in_fs, out_fs) - - writefile(out_file, y, fs=out_fs) - - -def splitfiles( - in_file: str, - out_filenames: list, - in_nchans: int, - in_fs: Optional[int] = 48000, - out_fs: Optional[int] = None, - verbose: bool = False, -) -> None: - """Split multi-channel audio files into individual mono files - - Parameters - ---------- - in_file: str - Input file name (.pcm or .wav) - out_filenames: list - List of output file names (.pcm or .wav) - in_fs: Optional[int] = 48000 - Input sampling rate, default 48000 Hz - out_fs: Optional[int] = None - Output sampling rate, default out_fs=in_fs - - Returns - ------- - None - - """ - # validation - if in_nchans is None: - raise ValueError("Number of channels to split must be specified!") - if in_nchans != len(out_filenames): - print( - "Split: Mismatch between number of channels and output filenames length. Truncating output filenames list." - ) - out_filenames = out_filenames[:in_nchans] - - x, in_fs = readfile(in_file, nchannels=in_nchans, fs=in_fs) - - # Write output files - for idx, out_file in enumerate(out_filenames): - # extract correct channel - y = x[:, idx] - - if out_fs is None: - out_fs = in_fs - - y = audioarray.resample(y, in_fs, out_fs) - - writefile(out_file, y, fs=out_fs) - - -def mono( - in_file: str, - out_file: str, - in_nchans: Optional[int] = 2, - in_fs: Optional[int] = 48000, - out_fs: Optional[int] = None, - verbose: bool = False, -) -> None: - """Creates a passive mono downmix for a multi-channel audio file - - Parameters - ---------- - in_file: str - Input file name (.pcm or .wav) - out_file: str - Output mono downmix audio file name (.pcm or .wav) - in_nchans: Optional[int] - Number of input channels, required for .pcm otherwise default = 2 - in_fs: Optional[int] = 48000 - Input sampling rate, required for .pcm, otherwise default = 48000 Hz - out_fs: Optional[int] = in_fs - Output sampling rate, default = in_fs - - Returns - ------- - None - - """ - - # read input - x, in_fs = readfile(in_file, nchannels=in_nchans, fs=in_fs) - - if out_fs is None: - out_fs = in_fs - - # do pasive downmix - m = np.sum(x, 1) - - if out_fs != in_fs: - m = audioarray.resample(m, in_fs, out_fs) - - # write output - writefile(out_file, m, fs=out_fs) - - -def mutefile( - in_file: str, - out_file: str, - in_fs: int = 48000, - in_nchans: Optional[int] = 1, - mute_chans: Optional[list] = None, -) -> None: - """Mute audio channels in file - - Parameters - ---------- - in_file: str - Input multi-channel audio filenmame (.pcm or .wav) - out_file: str - Output multi-channel audio file name (.pcm or .wav) - in_nchans: Optional[int])1 - Number of channels, default = 1, or in *.wav header - mute_chans: Optional[list] = None - Indices of channel to mute, default=None=all - - Returns - ------- - None - - """ - x, in_fs = readfile(in_file, fs=in_fs, nchannels=in_nchans) - - if mute_chans is not None: - mute_chans = np.array(mute_chans) - if len(x.shape) > 1: - x[:, mute_chans[mute_chans < x.shape[1]]] = 0 - else: - x[:, mute_chans[mute_chans < 1]] = 0 - else: - x = np.zeros(x.shape) - - writefile(out_file, x, fs=in_fs) - - -def delayfile( - in_file: str, - out_file: str, - in_fs: int = 48000, - in_nchans: Optional[int] = 1, - delay: float = 0, -) -> None: - """Delay an audio file by a specified duration (ms) - - Parameters - ---------- - in_file: str - Input multi-channel audio filename (.pcm or .wav) - out_file: str - Output multi-channel audio file name (.pcm or .wav) - in_nchans: Optional[int])1 - Number of channels, default = 1, or in *.wav header - delay: float = 0 - Delay in milliseconds (negative values advance file) - - Returns - ------- - None - - """ - delay = int(delay * in_fs / 1000) - delay_abs = np.abs(delay) - - x, in_fs = readfile(in_file, fs=in_fs, nchannels=in_nchans) - - # shift array - x = np.roll(x, delay, axis=0) - - # zero shifted out samples - if delay == 0: - pass - elif delay < 0: - x[-delay_abs:, :] = 0 - elif delay > 0: - x[:delay_abs, :] = 0 - - writefile(out_file, x, fs=in_fs) - - -def loudnessinfo( - in_sig: np.ndarray, - in_fs: Optional[int] = 48000, - in_format: Optional[str] = "MONO", - output_loudness: Optional[int] = -26, - loudness_tool: Optional[str] = "bs1770demo", - use_rms: Optional[bool] = False, -) -> Tuple[float, float]: - """Obtain loudness info about a signal - - Parameters - ---------- - in_sig: np.ndarray - Input audio signal - in_fs: Optional[int] - Input sampling rate - in_format: Optional[str] - Input spatial audio format - output_loudness: Optional[int] - Loudness level in LKFS/dBov - loudness_tool: Optional[str] - Loudness tool to use. Must be in $PATH. - Supported tools: - ITU-R BS.1770-4 / "bs1770demo" (default) - ITU-T P.56 / "sv56demo" - - - Returns - ------- - measured_loudness, scale_factor - - """ - - if platform.system() == "Windows": - null_file = "nul" - else: - null_file = "/dev/null" - - if shutil.which(loudness_tool) is None: - raise FileNotFoundError(f"The binary {loudness_tool} was not found in path!") - - in_spfmt = spatialaudioformat.Format(in_format=in_format) - - if not (in_spfmt.isheadphones or in_spfmt.isloudspeaker or in_spfmt.ambi_order > 1): - raise NotImplementedError( - f"{in_spfmt.name} is currently unsupported with {loudness_tool}." - ) - - if in_sig.shape[1] != in_spfmt.nchannels: - raise ValueError( - f"Mismatch in number of channels in signal of shape {in_sig.shape} of spatial audio format {in_format}!" - ) - - with TemporaryDirectory() as tmp_dir: - tmp_file = os.path.join(tmp_dir, "tmp_loudness.pcm") - - if "bs1770demo" in loudness_tool: - """ - ITU-R BS-1770 - """ - if in_fs != 48000: - raise ValueError(f"{loudness_tool} only supports 48kHz sampling rate!") - - cmd = [ - loudness_tool, - "-nchan", - str(in_spfmt.nchannels), # input nchan - "-lev", - str(output_loudness), # level - "-conf", - "", # config string - tmp_file, - null_file, - ] - if in_spfmt.ambi_order > 0 or in_spfmt.name == "MONO": - cmd[2] = "1" # -nchan - cmd[6] = "0" # -conf - if in_spfmt.isheadphones: - cmd[2] = "2" # -nchan - cmd[6] = "00" # -conf - elif in_spfmt.isloudspeaker: - # if loudspeaker position fulfills the criteria, set the config string to 1 for that index - conf_str = [ - str(int(abs(e) < 30 and (abs(a) >= 60 and abs(a) <= 120))) - for a, e in zip(in_spfmt.ls_azi, in_spfmt.ls_ele) - ] - for lfe in in_spfmt.lfe_index: - conf_str[lfe] = "L" - - cmd[6] = "".join(conf_str) - - elif "sv56demo" in loudness_tool: - """ - ITU-T P.56 - """ - if not (in_spfmt.ambi_order > 0 or in_spfmt.name == "MONO"): - raise ValueError( - f"{in_format} is currently unsupported with {loudness_tool}" - ) - - cmd = [ - loudness_tool, - "-lev", - str(output_loudness), - "-sf", - str(in_fs), - "-blk", - str(int(in_fs * 0.02)), - "-q", - ] - - if use_rms: - cmd.extend(["-rms"]) - - cmd.extend( - [ - tmp_file, - null_file, - ] - ) - - # write temporary file - if in_spfmt.ambi_order > 0 or in_spfmt.name == "MONO": - writefile(tmp_file, in_sig[:, 0], in_fs) - elif in_spfmt.isheadphones: - writefile(tmp_file, in_sig[:, :2], in_fs) - elif in_spfmt.isloudspeaker: - writefile(tmp_file, in_sig, in_fs) - - # run command - try: - result = sp.run(cmd, check=True, capture_output=True, text=True) - except sp.CalledProcessError as e: - raise SystemError( - f"Command returned non-zero exit status ({e.returncode}): {' '.join(e.cmd)}\n{e.stderr}\n{e.stdout}" - ) - - # parse output - if "bs1770demo" in loudness_tool: - measured_loudness = float(result.stdout.splitlines()[3].split(":")[1]) - scale_factor = float(result.stdout.splitlines()[-3].split(":")[1]) - elif "sv56demo" in loudness_tool: - try: - measured_loudness = float( - result.stdout.splitlines()[14] - .replace("Active speech level: ..........", "") - .replace("[dBov]", "") - .strip() - ) - scale_factor = float( - result.stdout.splitlines()[6] - .replace("Norm factor desired is: .......", "") - .replace("[times]", "") - .strip() - ) - except Exception: - raise ValueError(f"Error parsing sv56demo output!\n{result.stdout}") - else: - raise ValueError(f"Unsupported tool {loudness_tool}") - - return measured_loudness, scale_factor - - -def print_plot_play(x: np.ndarray, fs: int, text: Optional[str] = "") -> None: - """1. Prints information about an audio signal, 2. plots the waveform, and 3. Creates player - - Parameters - ---------- - x: np array - Input signal - fs: int - Input sampling rate - text: Optional[str] = '' - text to print - Returns - ------- - None - - """ - - plt = import_module("matplotlib.pyplot") - ipd = import_module("IPython.display") - - print("%s fs = %d, x.shape = %s, x.dtype = %s" % (text, fs, x.shape, x.dtype)) - plt.figure(figsize=(8, 2)) - plt.plot(x, color="gray") - plt.xlim([0, x.shape[0]]) - plt.xlabel("Time (samples)") - plt.ylabel("Amplitude") - plt.tight_layout() - plt.show() - ipd.display(ipd.Audio(data=x, rate=fs)) - - -def get_wav_file_info(filename: str) -> dict: - - """ - Get the format information from a WAV file. - Return a dictionary with the format information - Parameters - ---------- - filename : string or open file handle - Input WAV file. - - Returns - ------- - Dictionary - - """ - - fid = open(filename, "rb") - - try: - - riff = fid.read(4) - - if riff == b"RIFF": - binary_format = "<" - elif riff == b"RIFX": - binary_format = ">" - else: - raise ValueError("No RIFF!") - - wav_size = struct.unpack(f"{binary_format}I", fid.read(4))[0] - - wav_identifier = fid.read(4) - if wav_identifier != b"WAVE": - raise ValueError("No WAVE!") - - fmt_chunk_id = fid.read(4) - - if fmt_chunk_id == b"fmt ": - fmt_size = struct.unpack(f"{binary_format}I", fid.read(4))[0] - wav_format = struct.unpack(f"{binary_format}H", fid.read(2))[0] - channels = struct.unpack(f"{binary_format}H", fid.read(2))[0] - fs = struct.unpack(f"{binary_format}I", fid.read(4))[0] - bytes_per_second = struct.unpack(f"{binary_format}I", fid.read(4))[0] - block_align = struct.unpack(f"{binary_format}H", fid.read(2))[0] - bit_depth = struct.unpack(f"{binary_format}H", fid.read(2))[0] - rem_bytes = fmt_size - 16 - ext_param_size = 0 - ext_param = None - if rem_bytes: - ext_param_size = struct.unpack(f"{binary_format}H", fid.read(2))[0] - - if ext_param_size: - ext_param = fid.read(ext_param_size) - else: - raise ValueError("No or corrupt fmt chunk!") - - finally: - fid.close() - - return { - "size": wav_size, - "format_tag": wav_format, - "channels": channels, - "fs": fs, - "bytes_per_second": bytes_per_second, - "block_align": block_align, - "bit_depth": bit_depth, - "ext_param_size": ext_param_size, - "ext_param": ext_param, - } - - -if __name__ == "__main__": - import argparse - - parser = argparse.ArgumentParser(description="Tool for basic operations on audio files") - subparsers = parser.add_subparsers() - - def pre_trim_wrapper(pre_trim_args): - if pre_trim_args.input_file.endswith(".wav"): - input_file_properties = get_wav_file_info(pre_trim_args.input_file) - else: - print("Delay currently only supported with WAV file input") - exit(-1) - - x, _ = readfile(pre_trim_args.input_file, fs=input_file_properties["fs"], nchannels=input_file_properties["channels"]) - trim = int(pre_trim_args.amount_in_ms * input_file_properties["fs"] / 1000) - x = x[trim:] - writefile(pre_trim_args.output_file, x, fs=input_file_properties["fs"]) - - - parser_delay = subparsers.add_parser("pre-trim", help="Trim a given amount of content from the beginning of the file") - parser_delay.add_argument("amount_in_ms", type=float, help="Trim amount milliseconds.") - parser_delay.add_argument("input_file") - parser_delay.add_argument("output_file") - parser_delay.set_defaults(func=pre_trim_wrapper) - - def convert_wrapper(convert_args): - if not convert_args.input_file.endswith(".wav"): - print("Convert currently only supported with WAV file input") - exit(-1) - - convertfile(convert_args.input_file, convert_args.output_file) - - parser_convert = subparsers.add_parser( - "convert", help="Convert file format (output file extension determines output format)" - ) - parser_convert.add_argument("input_file") - parser_convert.add_argument("output_file") - parser_convert.set_defaults(func=convert_wrapper) - - args = parser.parse_args() - args.func(args) diff --git a/scripts/pyaudio3dtools/binauralrenderer.py b/scripts/pyaudio3dtools/binauralrenderer.py deleted file mode 100644 index 2567b433db..0000000000 --- a/scripts/pyaudio3dtools/binauralrenderer.py +++ /dev/null @@ -1,782 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import logging -import os -import timeit -from typing import Tuple - -import numpy as np -import scipy.interpolate as interp -import scipy.io as sio -import scipy.signal as sig -from pyaudio3dtools.rotation import rotateHOA, rotateISM, rotateMC - -from pyaudio3dtools import audioarray, spatialaudioformat, spatialaudioconvert -from pyaudio3dtools.constants import * - -main_logger = logging.getLogger("__main__") -logger = main_logger.getChild(__name__) -logger.setLevel(logging.DEBUG) - -"""" Helper functions """ - - -def NS2SA(fs, x): - return int(int(fs / 100) * ((x) / 100) / 100000) - - -def read_hrirs_from_mat( - hrirs_path: str = "/HRIRs_mat/ORANGE_HRIR_53_48000_combined.mat", -) -> np.ndarray: - """Read HRIRs from Matlab dictionary file mat - - Parameters - ---------- - hrirs_path: str - HRTFs file name (.mat) - - Returns - ------- - IR: np.ndarray - array of impulse responses - SourcePosition: np.ndarray - array of source positions corresponding to the impulse responses - - """ - script_path = os.path.dirname(os.path.abspath(__file__)) - hrirs_filename = script_path + hrirs_path - - mat_contents = sio.loadmat(hrirs_filename) - IR = mat_contents["IR"] - try: - SourcePosition = mat_contents["SourcePosition"] - except KeyError: - SourcePosition = None - - logger.debug(f"Loaded HRIRs: {hrirs_filename}, {IR.shape[0]} by {IR.shape[1]}") - - return IR, SourcePosition - - -def get_IR( - in_spfmt: spatialaudioformat.Format, - out_spfmt: spatialaudioformat.Format, - dataset: str, -) -> Tuple[np.ndarray, np.ndarray, float]: - """get_IR - - Parameters - ---------- - in_spfmt: spatialaudioformat - input spatial audio format - out_spfmt: spatialaudioformat - output spatial audio format - dataset: str - name of the HRIRs or BRIRs dataset - - Returns - ------- - IR: np.ndarray - desired impulse response array - SourcePosition: np.ndarray - source positions of corresponding IRs - - """ - # override for BRIRs, currently only one option - if out_spfmt.name == "BINAURAL_ROOM": - dataset = "mozart_iis" - - # dataset file prefix - if dataset.lower().startswith("sadie"): - prefix = "/HRIRs_mat/SADIE_II_D2_48K_24bit_256tap" - elif dataset.lower().startswith("orange"): - prefix = f"/HRIRs_mat/ORANGE_HRIR_{dataset.replace('_full', '')[-2:]}_48000" - elif dataset.lower().startswith("mozart"): - prefix = "/BRIRs_mat/IIS_BRIR_officialMPEG_222UC" - else: - raise ValueError(f"Unsupported dataset '{dataset}' for HRIRs") - - # dataset file suffix - if in_spfmt.name.startswith("ISM") or in_spfmt.altname.startswith("CUSTOM_LS"): - suffix = "full.mat" - elif in_spfmt.isloudspeaker and in_spfmt.nchannels > 1: - suffix = "combined.mat" - elif in_spfmt.ambi_order > 0 or in_spfmt.name.upper() == "MONO": - suffix = "SBA3.mat" - else: - raise ValueError( - f"Unsupported format '{in_spfmt.name}' for dataset '{dataset}' for HRIRs" - ) - - IR, SourcePosition = read_hrirs_from_mat("_".join([prefix, suffix])) - - latency_smp = float(np.min(np.argmax(np.sum(np.abs(IR), axis=(1)), axis=(0)))) - - if in_spfmt.name.startswith("MONO"): - IR = IR[:, :, :1] # use omni/W from SBA - elif in_spfmt.name.startswith("STEREO"): - IR = IR[:, :, :2] # use L and R channels - elif in_spfmt.isloudspeaker and not in_spfmt.altname.startswith("CUSTOM_LS"): - # extract positions from the combined file - tmp_spfmt = spatialaudioformat.Format("COMBINED") - IR_tmp = IR.copy() - IR = np.zeros([IR_tmp.shape[0], IR_tmp.shape[1], in_spfmt.nchannels]) - - ir_index = 0 - for i in range(tmp_spfmt.nchannels): - for j in range(in_spfmt.nchannels): - if ( - tmp_spfmt.ls_azi[i] == in_spfmt.ls_azi[j] - and tmp_spfmt.ls_ele[i] == in_spfmt.ls_ele[j] - ): - if j != in_spfmt.lfe_index[0]: - IR[:, :, ir_index] = IR_tmp[:, :, i] - ir_index += 1 - - return IR, SourcePosition, latency_smp - - -def FindFilter(SourcePosition: np.ndarray, azi: float, ele: float) -> int: - """Find measurement closest to the selected direction, - reimplemented roughly along the lines of ConvBinauralRenderer.m - - Parameters - ---------- - SourcePosition: np.ndarray - Source IR positions - azi: float - desired response azimuth - ele: float - desired response elevation - - Returns - ------- - i_dir: int - index of nearest SourcePosition - """ - if azi < 0: - azi = azi + 360.0 - - if ele < 0: - ele = ele + 360.0 - - delta_azi = np.deg2rad(np.abs(azi - SourcePosition[:, 0])) - dist = np.arccos( - np.sin(np.deg2rad(SourcePosition[:, 2])) * np.sin(np.deg2rad(ele)) - + np.cos(np.deg2rad(SourcePosition[:, 1])) - * np.cos(np.deg2rad(ele)) - * np.cos(delta_azi) - ) - - i_dir = np.argmin(dist) - - # print('Direction closest to {}, {} is {} with angles {}, {} and distance {}\n'.format( - # azi, ele, i_dir, SourcePosition[i_dir,0], SourcePosition[i_dir,1], dist[i_dir] - # ) - # ) - - return i_dir - - -""" Core binaural rendering functions """ - - -def binaural_fftconv( - x: np.ndarray, IR: np.ndarray, nchannels: int, lfe_index: list = [] -) -> np.ndarray: - """Binauralization using fft convolution - - Parameters - ---------- - x: np array - input multi-channel array - IR: np array - HRIRs array - nchannels: int - maximum number of channels to process - lfe_index: list - list of LFE channel indices - - Returns - ------- - y: np.ndarray - output convolved signal array - - """ - y = np.zeros([x.shape[0], 2]) - for chan_idx in range(min(x.shape[1], nchannels)): - if chan_idx not in lfe_index: - y[:, 0] = np.add( - y[:, 0], - sig.fftconvolve( - x[:, chan_idx].astype(float), IR[:, 0, chan_idx] - ).astype(float)[: x.shape[0]], - ) - y[:, 1] = np.add( - y[:, 1], - sig.fftconvolve( - x[:, chan_idx].astype(float), IR[:, 1, chan_idx] - ).astype(float)[: x.shape[0]], - ) - else: - logger.debug(f" Mute LFE channel of index: {str(chan_idx)}") - - return y - - -def binaural_fftconv_framewise( - x: np.ndarray, - IR: np.ndarray, - SourcePosition: np.ndarray, - azi: np.ndarray = None, - ele: np.ndarray = None, - frame_len: int = (IVAS_FRAME_LEN_MS // 4) * 48, - interp_method="linear", - verbose=False, -) -> np.ndarray: - """Binauralization using fft convolution with frame-wise processing - supports rotation on trajectories with interpolation between measured Source - positions, reimplemented roughly along the lines of ConvBinauralRenderer.m - - Parameters - ---------- - x: np.ndarray - input multi-channel array - IR: np.ndarray - HRIRs array - SourcePosition: np.ndarray - positions of the source in the measurements in IR - azi: np.ndarray - azimuth angles for all frames - ele: np.ndarray - elevation angles for all frames - frame_len: int - frame length, optional, default = (IVAS_FRAME_LEN_MS // 4) * 48000 - interp_method: - interpolation method, optional, default = linear - - - Returns - ------- - y: np.ndarray - output binaural signal array - - """ - - sig_len = x.shape[0] - frame_len = (IVAS_FRAME_LEN_MS // 4) * 48 - N_frames = int(sig_len / frame_len) - - N_HRIR_taps = IR.shape[2] - - if azi is None or ele is None: - azi = np.repeat([0.0], N_frames) - ele = np.repeat([0.0], N_frames) - elif len(azi) < N_frames or len(ele) < N_frames: - azi = np.concatenate( - [np.repeat(azi, N_frames // len(azi)), azi[: N_frames % len(azi)]] - ) - ele = np.concatenate( - [np.repeat(ele, N_frames // len(ele)), ele[: N_frames % len(ele)]] - ) - - iGs = np.zeros([N_frames + 1], dtype=int) - mGs = np.zeros([N_frames + 1], dtype=int) - - # store trajectory as a sequence of indices of source positions - # on the HRTF database in a compressed format such that, for - # each new measurement point the trajectory hits, the sample index - # is stored in mGs and the index of the measurement in iG - # the number of measurement points hit by the trajectory is nsp - isp = 0 - iGs[0] = FindFilter(SourcePosition, azi[0], ele[0]) - mGs[0] = 0 - for i_frame in range(1, N_frames): - iG = FindFilter(SourcePosition, azi[i_frame], ele[i_frame]) - if iG != iGs[isp]: - isp += 1 - iGs[isp] = iG - mGs[isp] = i_frame * frame_len + 1 - nsp = isp + 1 - - # set last fence post explicitly - if mGs[nsp] < sig_len: - iGs[nsp] = iG - mGs[nsp] = sig_len - nsp = nsp + 1 - - T_rev = frame_len + N_HRIR_taps - 1 - N_rev = int(np.ceil(T_rev / frame_len)) - - if verbose: - print(" N_rev = ", N_rev) - - fastcode = True - if N_rev > 5: - if verbose: - print( - " __ __ ___ ___ _ _ ___ _ _ ___ " - ) - print( - r" \ \ / / / \ | _ \ | \| | |_ _| | \| | / __|" - ) - print( - r" \ \/\/ / | - | | / | . | | | | . | | (_ |" - ) - print( - r" \_/\_/ |_|_| |_|_\ |_|\_| |___| |_|\_| \___|" - ) - print( - " " - ) - print( - " You are using very long filters! This will be slooooow and use a lot of memory!" - ) - else: - fastcode = False - - if fastcode and verbose: - print( - " __ __ ___ ___ _ _ ___ _ _ ___ " - ) - print( - r" \ \ / / / \ | _ \ | \| | |_ _| | \| | / __|" - ) - print( - r" \ \/\/ / | - | | / | . | | | | . | | (_ |" - ) - print( - r" \_/\_/ |_|_| |_|_\ |_|\_| |___| |_|\_| \___|" - ) - print( - " " - ) - print( - " To speed up the otherwise extremely slow calculation, we only calculate the " - ) - print( - " responses of the latest frame with the latest filters instead of the full " - ) - print( - " integrals. This is much faster but much more prone to clicks. Inspect your " - ) - print( - " output signals carefully! To change this behavior, go to binauralrenderer.py " - ) - print( - " and set fastcode to False. " - ) - - y = np.zeros([sig_len + T_rev, 2]) - y0 = np.zeros([N_rev, sig_len + T_rev, 2]) - - fade_in = np.arange(frame_len) / (frame_len - 1) - fade_in = fade_in[:, np.newaxis] - fade_out = 1.0 - fade_in - - for i_ear in [0, 1]: - - Gs = IR[ - iGs[0:nsp], i_ear, : - ] # Green's function along the trajectory sampled by the measurement points - interp_G = interp.interp1d( - mGs[0:nsp], Gs, kind=interp_method, axis=0 - ) # interpolator for Green's function between those points - - G = interp_G(np.arange(0, sig_len, frame_len)) - - t0 = timeit.default_timer() - - if fastcode: - for i_frame in range(N_frames): - - i1 = i_frame * frame_len - i2 = (i_frame + 1) * frame_len - i2p = i1 + T_rev - - fade_out = np.linspace(0.0, 1.0, T_rev, endpoint=False) - fade_in = 1.0 - fade_out - - for j_frame in [0, 1]: - G_n_m = G[min(j_frame + i_frame, N_frames - 1), :] - y0[j_frame, i1:i2p, i_ear] = sig.oaconvolve( - np.squeeze(x[i1:i2]), G_n_m - ) - - y[i1:i2p, i_ear] = ( - np.squeeze(fade_out) * y0[0, i1:i2p, i_ear] - + np.squeeze(fade_in) * y0[1, i1:i2p, i_ear] - ) - - t1 = timeit.default_timer() - fps = (i_frame + 1) / (t1 - t0) - eta = (2 * N_frames - (i_frame + 1) + i_ear * N_frames) / fps - - if verbose: - print( - " Frame {}/{} on ear {}/2 done at {: 3.1f} fps, ETA {: 6.0f} s ".format( - i_frame + 1, N_frames, i_ear + 1, fps, eta - ), - end="\r", - ) - - else: - for i_frame in range(N_frames): - - i1 = i_frame * frame_len - i2 = (i_frame + 1) * frame_len - i2p = i1 + T_rev - - y0[:] = 0.0 - for j_frame in range( - max(0, i_frame - N_rev), min(i_frame + 1, N_frames) - ): - - j1 = j_frame * frame_len - j2 = (j_frame + 1) * frame_len - j2p = j1 + T_rev - - G0 = G[i_frame] - G1 = G[min(i_frame + 1, N_frames - 1)] - - y0[0, j1:j2p, i_ear] += sig.oaconvolve(np.squeeze(x[j1:j2]), G0) - y0[1, j1:j2p, i_ear] += sig.oaconvolve(np.squeeze(x[j1:j2]), G1) - - y[i1:i2, i_ear] = ( - np.squeeze(fade_out) * y0[0, i1:i2, i_ear] - + np.squeeze(fade_in) * y0[1, i1:i2, i_ear] - ) - - t1 = timeit.default_timer() - fps = (i_frame + 1) / (t1 - t0) - eta = (2 * N_frames - (i_frame + 1) + i_ear * N_frames) / fps - - if verbose: - print( - " Frame {}/{} on ear {}/2 done at {: 3.1f} fps, ETA {: 6.0f} s ".format( - i_frame + 1, N_frames, i_ear + 1, fps, eta - ), - end="\r", - ) - - if verbose: - print("") - - return y[0:sig_len] - - -def binaural_render_LFE( - x: np.ndarray, - fs: int = 48000, - lfe_index: list = [3], - LFE_gain: float = 10 ** (5.5 / 20), - latency_smp: int = 0, -) -> np.ndarray: - """ - Extract LFE from the given input and render - it binaurally, accounting for delay of the - """ - - lfe = x[:, lfe_index].copy() - - # if there is more than one LFE sum them into one - if lfe.shape[1] > 1: - lfe = np.sum(lfe, axis=1) - - # TODO tmu - disabled temporarily here, disabled in C - lfe_delay_ns = 0 - """ - # 120 Hz low-pass filtering for LFE using IVAS filter coefficients - if fs == 48000: - lfe = sig.sosfilt(IVAS_LPF_4_BUTTER_48K_SOS, lfe, axis=0) - else: - raise NotImplementedError("Only 48 kHz supported at the moment!") - - # 3.5ms LP filter delay from IVAS ROM - lfe_delay_ns = 0.0035 * 1e9 - lfe_delay_smp = round(lfe_delay_ns * fs / 1e9) - - # Delay LFE by the same amount as the HRTF delay - lfe = np.roll(lfe, round(latency_smp), axis=0) - lfe[0 : round(latency_smp), :] = 0 - """ - - # apply gain - lfe *= LFE_gain - - # duplicate for each binaural channel - lfe = np.hstack([lfe, lfe]) - - return lfe, lfe_delay_ns - - -""" Format specific wrapper functions """ - - -def render_custom_ls_binaural( - x: np.ndarray, - fs: int, - in_spfmt: spatialaudioformat.Format, - out_spfmt: spatialaudioformat.Format, - IR: np.ndarray, - SourcePosition: np.ndarray, - trajectory: np.ndarray, -) -> np.ndarray: - - ls_azi_all = in_spfmt.ls_azi - ls_ele_all = in_spfmt.ls_ele - lfe_index_all = in_spfmt.lfe_index - - logger.info(" Processing channels on custom LS layout") - azis = ", ".join([f"{a:7.2f}" for a in ls_azi_all]) - eles = ", ".join([f"{e:7.2f}" for e in ls_ele_all]) - logger.info(f" azi: {azis}") - logger.info(f" ele: {eles}") - logger.info(f" lfe_index: {lfe_index_all}") - - if out_spfmt.name == "BINAURAL_ROOM": - tmp_spfmt = spatialaudioformat.Format("7_1_4") - x = spatialaudioconvert.convert_mc(x, in_spfmt, tmp_spfmt) - ls_azi_all = tmp_spfmt.ls_azi - ls_ele_all = tmp_spfmt.ls_ele - lfe_index_all = tmp_spfmt.lfe_index - logger.info(f" {in_spfmt.name} -> {tmp_spfmt.name} -> {out_spfmt.name}") - - frame_len = (IVAS_FRAME_LEN_MS // 4) * (fs // 1000) - sig_len = x.shape[0] - N_frames = int(sig_len / frame_len) - - i_ls = 0 - y = np.zeros([sig_len, 2]) - for i_chan in range(x.shape[1]): - - # skip LFE - if i_chan in lfe_index_all: - continue - - # skip silent (or very low volume) channels - if np.allclose(x[:, i_chan], 0.0, atol=32.0): - continue - - ls_azi = np.repeat(ls_azi_all[i_ls], N_frames) - ls_ele = np.repeat(ls_ele_all[i_ls], N_frames) - - azi, ele = rotateISM(ls_azi, ls_ele, trajectory=trajectory) - - y += binaural_fftconv_framewise( - x[:, i_chan], - IR, - SourcePosition, - frame_len=frame_len, - azi=azi, - ele=ele, - verbose=False, - ) - i_ls += 1 - - return y - - -def render_ism_binaural( - x: np.ndarray, - fs: int, - IR: np.ndarray, - SourcePosition: np.ndarray, - trajectory: np.ndarray, - in_pos: np.ndarray, -) -> np.ndarray: - - frame_len = (IVAS_FRAME_LEN_MS // 4) * (fs // 1000) - sig_len = x.shape[0] - N_frames = int(sig_len / frame_len) - - # get ISM metadata and repeat it nsubframe times - pos_data = [] - for pos in in_pos: - pos_data.extend( - [pos["azimuth"], pos["elevation"]] for _ in range(pos["use_for_frames"]) - ) - pos_data = np.array(pos_data) - pos_data = np.repeat(pos_data, 4, axis=0) - - # extract positions only according to the audio duration - pos_data = pos_data[:N_frames, :] - - azi, ele = rotateISM(pos_data[:, 0], pos_data[:, 1], trajectory=trajectory) - - y = np.zeros([sig_len, 2]) - y += binaural_fftconv_framewise( - x, - IR, - SourcePosition, - frame_len=frame_len, - azi=azi, - ele=ele, - verbose=False, - ) - - return y - - -def render_masa_binaural( - x: np.ndarray, - fs: int, - in_spfmt: spatialaudioformat.Format, - IR: np.ndarray, - SourcePosition: np.ndarray, - trajectory: np.ndarray, -): - y = x[:, :2] - # TODO - return y - - -def render_ambi_ls_binaural( - x: np.ndarray, - fs: int, - in_spfmt: spatialaudioformat.Format, - IR: np.ndarray, - trajectory: np.ndarray, -) -> np.ndarray: - - y = x[:] - if trajectory is not None: - if in_spfmt.ambi_order > 0: - y = rotateHOA(y, trajectory) - if in_spfmt.isloudspeaker: - y = rotateMC(y, trajectory, in_spfmt) - - y = binaural_fftconv(y, IR, in_spfmt.nchannels, in_spfmt.lfe_index) - - return y - - -""" Wrapper function for generic binaural rendering """ - - -def binaural_rendering( - x: np.ndarray, - in_spfmt: spatialaudioformat.Format, - out_spfmt: spatialaudioformat.Format, - dataset: str = "orange53", - fs: int = 48000, - trajectory: str = None, - include_LFE: bool = False, - LFE_gain: float = 10 ** (5.5 / 20), - in_pos: dict = None, -): - """Binaural rendering - - Parameters - ---------- - x: np array - input multi-channel array - in_spfmt_name: str - name of input spatial format - dataset: str - name of the HRIRs or BRIRs dataset - fs: int - input/output sampling-rate (default 48kHz) - trajectory: str - path to trajectory file - - Returns - ------- - y: np.ndarray - output binaural signal array - - """ - - if trajectory is not None: - logger.info( - " performing rotation along trajectory from file {}".format(trajectory) - ) - - # resample to 48 kHz - y = audioarray.resample(x, fs, 48000) - delay_total_ns = 0 - - # get IR corresponding to the input and output formats - IR, SourcePosition, latency_smp = get_IR(in_spfmt, out_spfmt, dataset) - delay_total_ns += latency_smp / float(fs) * 1e9 - - # prepare LFE signal to be added to output - if include_LFE and in_spfmt.isloudspeaker and in_spfmt.lfe_index: - lfe, lfe_delay_ns = binaural_render_LFE( - x, 48000, in_spfmt.lfe_index, LFE_gain, latency_smp - ) - delay_total_ns += lfe_delay_ns - - # get binauralized signal based on format - if in_spfmt.altname.startswith("CUSTOM_LS"): - y = render_custom_ls_binaural( - x, fs, in_spfmt, out_spfmt, IR, SourcePosition, trajectory - ) - elif in_spfmt.name.startswith("ISM"): - if not in_pos: - raise ValueError("ISM metadata empty!") - y = render_ism_binaural( - x, - fs, - IR, - SourcePosition, - trajectory, - in_pos, - ) - elif in_spfmt.name.startswith("MASA"): - y = render_masa_binaural(x, fs, in_spfmt, IR, SourcePosition, trajectory) - elif in_spfmt.ambi_order > 0 or in_spfmt.isloudspeaker: - y = render_ambi_ls_binaural(x, fs, in_spfmt, IR, trajectory) - else: - raise NotImplementedError( - f"{in_spfmt.name} -> {out_spfmt.name}: format conversion not implemented" - ) - - # add LFE signal to output - if include_LFE and in_spfmt.isloudspeaker and in_spfmt.lfe_index: - # delay the binauralized signal by the LFE delay - lfe_delay_smp = NS2SA(fs, int(lfe_delay_ns)) - y = np.roll(y, lfe_delay_smp, axis=0) - y[0:lfe_delay_smp, :] = 0 - y += lfe - - # delay compensation - delay_total_smp = NS2SA(fs, delay_total_ns) - y = np.roll(y, -delay_total_smp, axis=0) - if delay_total_smp > 0: - y[-delay_total_smp:, :] = 0 - - # resample back to original rate - y = audioarray.resample(y, 48000, fs) - - return y diff --git a/scripts/pyaudio3dtools/constants.py b/scripts/pyaudio3dtools/constants.py deleted file mode 100644 index 92fd5a709e..0000000000 --- a/scripts/pyaudio3dtools/constants.py +++ /dev/null @@ -1,392 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022 Baseline Development Group with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies OY, Orange, - Panasonic Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The Baseline Development Group consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies OY, Orange, - Panasonic Corporation, Qualcomm Technologies, Inc., and VoiceAge Corporation retain full ownership - rights in their respective contributions in the software. No license of any kind, including but not - limited to patent license, of any foregoing parties is hereby granted by implication, estoppel or - otherwise. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and/or fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import numpy as np - -IVAS_FRAME_LEN_MS = 20 - -IVAS_CICPX_TO_MONO = np.array( - [ - [ - 1, - 1, - 1, - 1, - 0.79999995, - 0.79999995, - 0.79999995, - 0.79999995, - 0.849999964, - 0.849999964, - 0.849999964, - 0.849999964, - ] - ] -).T - -IVAS_CICPX_TO_STEREO = np.array( - [ - [1, 0], - [0, 1], - [np.sqrt(0.5), np.sqrt(0.5)], - [np.sqrt(0.5), np.sqrt(0.5)], - [0.79999995, 0], - [0, 0.79999995], - [0.79999995, 0], - [0, 0.79999995], - [0.849999964, 0], - [0, 0.849999964], - [0.849999964, 0], - [0, 0.849999964], - ] -) - -# downmix matrices -IVAS_CICP12_TO_6 = np.zeros(8 * 6) -IVAS_CICP12_TO_6[[0, 7, 14, 21, 28, 35, 40, 47]] = 1 -IVAS_CICP12_TO_6 = IVAS_CICP12_TO_6.reshape(8, 6) - -IVAS_CICP14_TO_6 = np.zeros(8 * 6) -IVAS_CICP14_TO_6[[0, 7, 14, 21, 28, 35]] = 1 -IVAS_CICP14_TO_6[[36, 43]] = 0.849999964 -IVAS_CICP14_TO_6 = IVAS_CICP14_TO_6.reshape(8, 6) - -IVAS_CICP16_TO_6 = np.zeros(10 * 6) -IVAS_CICP16_TO_6[[0, 7, 14, 21, 28, 35]] = 1 -IVAS_CICP16_TO_6[[36, 43, 52, 59]] = 0.849999964 -IVAS_CICP16_TO_6 = IVAS_CICP16_TO_6.reshape(10, 6) - -IVAS_CICP16_TO_12 = np.zeros(10 * 8) -IVAS_CICP16_TO_12[[0, 9, 18, 27, 36, 45]] = 1 -IVAS_CICP16_TO_12[[48, 57, 68, 77]] = 0.849999964 -IVAS_CICP16_TO_12 = IVAS_CICP16_TO_12.reshape(10, 8) - -IVAS_CICP16_TO_14 = np.zeros(10 * 8) -IVAS_CICP16_TO_14[[0, 9, 18, 27, 36, 45, 54, 63]] = 1 -IVAS_CICP16_TO_14[[68, 77]] = 0.849999964 -IVAS_CICP16_TO_14 = IVAS_CICP16_TO_14.reshape(10, 8) - -IVAS_CICP19_TO_6 = np.zeros(12 * 6) -IVAS_CICP19_TO_6[[0, 7, 14, 21, 28, 35]] = 1 -IVAS_CICP19_TO_6[[36, 43]] = 0.367322683 -IVAS_CICP19_TO_6[[48, 55, 64, 71]] = 0.849999964 -IVAS_CICP19_TO_6[[40, 47]] = 0.930093586 -IVAS_CICP19_TO_6 = IVAS_CICP19_TO_6.reshape(12, 6) - -IVAS_CICP19_TO_12 = np.zeros(12 * 8) -IVAS_CICP19_TO_12[[0, 9, 18, 27, 38, 47]] = 1 -IVAS_CICP19_TO_12[[48, 57]] = 0.367322683 -IVAS_CICP19_TO_12[[64, 73, 84, 93]] = 0.849999964 -IVAS_CICP19_TO_12[[52, 61]] = 0.930093586 -IVAS_CICP19_TO_12 = IVAS_CICP19_TO_12.reshape(12, 8) - -IVAS_CICP19_TO_14 = np.zeros(12 * 8) -IVAS_CICP19_TO_14[[0, 9, 18, 27, 36, 45, 70, 79]] = 1 -IVAS_CICP19_TO_14[[48, 57]] = 0.367322683 -IVAS_CICP19_TO_14[[84, 93]] = 0.849999964 -IVAS_CICP19_TO_14[[52, 61]] = 0.930093586 -IVAS_CICP19_TO_14 = IVAS_CICP19_TO_14.reshape(12, 8) - -IVAS_CICP19_TO_16 = np.zeros(12 * 10) -IVAS_CICP19_TO_16[[0, 11, 22, 33, 44, 55, 86, 97, 108, 119]] = 1 -IVAS_CICP19_TO_16[[60, 71]] = 0.367322683 -IVAS_CICP19_TO_16[[64, 75]] = 0.930093586 -IVAS_CICP19_TO_16 = IVAS_CICP19_TO_16.reshape(12, 10) - -# upmix matrices -IVAS_MONO_TO_CICPX = np.zeros([1, 12]) -IVAS_MONO_TO_CICPX[0, 2] = 1 - -IVAS_STEREO_TO_CICPX = np.zeros([2, 12]) -IVAS_STEREO_TO_CICPX[0, 0] = 1 -IVAS_STEREO_TO_CICPX[1, 1] = 1 - -IVAS_CICP12_TO_14 = np.zeros(8 * 8) -IVAS_CICP12_TO_14[[0, 9, 18, 27, 36, 45, 52, 61]] = 1 -IVAS_CICP12_TO_14 = IVAS_CICP12_TO_14.reshape(8, 8) - -IVAS_CICP12_TO_16 = np.zeros(8 * 10) -IVAS_CICP12_TO_16[[0, 11, 22, 33, 44, 55, 64, 75]] = 1 -IVAS_CICP12_TO_16 = IVAS_CICP12_TO_16.reshape(8, 10) - -IVAS_CICP12_TO_19 = np.zeros(8 * 12) -IVAS_CICP12_TO_19[[0, 13, 26, 39, 54, 67, 76, 89]] = 1 -IVAS_CICP12_TO_19 = IVAS_CICP12_TO_19.reshape(8, 12) - -IVAS_CICP14_TO_19 = np.zeros(8 * 12) -IVAS_CICP14_TO_19[[0, 13, 26, 39, 52, 65, 80, 93]] = 1 -IVAS_CICP14_TO_19 = IVAS_CICP14_TO_19.reshape(8, 12) - -IVAS_CICP16_TO_19 = np.zeros(10 * 12) -IVAS_CICP16_TO_19[[0, 13, 26, 39, 52, 65, 80, 93, 106, 119]] = 1 -IVAS_CICP16_TO_19 = IVAS_CICP16_TO_19.reshape(10, 12) - -# mapping dict -IVAS_MC_CONVERSION = { - "MONO": { - # upmix - "5_1": IVAS_MONO_TO_CICPX[:, :6], - "7_1": IVAS_MONO_TO_CICPX[:, :8], - "5_1_2": IVAS_MONO_TO_CICPX[:, :8], - "5_1_4": IVAS_MONO_TO_CICPX[:, :10], - "7_1_4": IVAS_MONO_TO_CICPX[:, :12], - }, - "STEREO": { - # upmix - "5_1": IVAS_STEREO_TO_CICPX[:, :6], - "7_1": IVAS_STEREO_TO_CICPX[:, :8], - "5_1_2": IVAS_STEREO_TO_CICPX[:, :8], - "5_1_4": IVAS_STEREO_TO_CICPX[:, :10], - "7_1_4": IVAS_STEREO_TO_CICPX[:, :12], - }, - "5_1": { - # downmix - "MONO": IVAS_CICPX_TO_MONO[:6, :], - "STEREO": IVAS_CICPX_TO_STEREO[:6, :], - # upmix - "7_1": np.pad(np.eye(6), [[0, 0], [0, 2]]), - "5_1_2": np.pad(np.eye(6), [[0, 0], [0, 2]]), - "5_1_4": np.pad(np.eye(6), [[0, 0], [0, 4]]), - "7_1_4": np.pad(np.eye(6), [[0, 0], [0, 6]]), - }, - "7_1": { - # downmix - "MONO": IVAS_CICPX_TO_MONO[:8, :], - "STEREO": IVAS_CICPX_TO_STEREO[:8, :], - "5_1": IVAS_CICP12_TO_6, - # upmix - "5_1_2": IVAS_CICP12_TO_14, - "5_1_4": IVAS_CICP12_TO_16, - "7_1_4": IVAS_CICP12_TO_19, - }, - "5_1_2": { - # downmix - "MONO": np.vstack([IVAS_CICPX_TO_MONO[:6, :], IVAS_CICPX_TO_MONO[-2:, :]]), - "STEREO": np.vstack( - [IVAS_CICPX_TO_STEREO[:6, :], IVAS_CICPX_TO_STEREO[-2:, :]] - ), - "5_1": IVAS_CICP14_TO_6, - "7_1": np.pad(IVAS_CICP14_TO_6, [[0, 0], [0, 2]]), - # upmix - "5_1_4": np.pad(np.eye(8), [[0, 0], [0, 2]]), - "7_1_4": IVAS_CICP14_TO_19, - }, - "5_1_4": { - # downmix - "MONO": np.vstack([IVAS_CICPX_TO_MONO[:6, :], IVAS_CICPX_TO_MONO[-4:, :]]), - "STEREO": np.vstack( - [IVAS_CICPX_TO_STEREO[:6, :], IVAS_CICPX_TO_STEREO[-4:, :]] - ), - "5_1": IVAS_CICP16_TO_6, - "7_1": IVAS_CICP16_TO_12, - "5_1_2": IVAS_CICP16_TO_14, - # upmix - "7_1_4": IVAS_CICP16_TO_19, - }, - "7_1_4": { - # downmix - "MONO": IVAS_CICPX_TO_MONO, - "STEREO": IVAS_CICPX_TO_STEREO, - "5_1": IVAS_CICP19_TO_6, - "7_1": IVAS_CICP19_TO_12, - "5_1_2": IVAS_CICP19_TO_14, - "5_1_4": IVAS_CICP19_TO_16, - }, -} - -# LFE 120 Hz LPF filter coefficients -IVAS_LPF_4_BUTTER_48K_SOS = np.array( - [ - [ - 5.12617881476274e-09, - 1.02523584294987e-08, - 5.12617879059970e-09, - 1, - -1.96875982668433, - 0.969044914826862, - ], - [ - 1, - 1.99999984394358, - 1.00000000471366, - 1, - -1.98677297369091, - 0.987060670205863, - ], - ] -) - -T_DESIGN_11_AZI = np.array( - [ - 132.927291884332, - -83.9349499672527, - 8.47410038634525, - -113.340833834572, - -103.265909909537, - -33.2370360923825, - 21.8564347471830, - -156.539486489880, - -64.2647531387317, - 165.779530068738, - -25.2028339893249, - -97.0037973959711, - 27.8546391256925, - 153.214218975132, - -155.061608694663, - -11.8421354925543, - 80.5387312016125, - -42.0561606270165, - -31.2233262205060, - 38.8379041944063, - 93.7606877469492, - -84.7560200078398, - 7.75536818082863, - -122.276883381108, - 46.8012705252113, - -24.7686335284573, - 99.8904719062334, - -134.783996960185, - -83.0880230164493, - 60.1281736000420, - 152.644656278084, - 29.7576658909417, - 40.7793187974476, - 110.183927562412, - 165.652065916454, - -12.9926632105736, - 79.7359893585681, - -50.5245271190884, - 118.923930267733, - 47.2202861862577, - 171.925276523721, - -62.5145800558502, - -11.1156697680531, - 132.018041099963, - -135.355486412425, - 102.370921576708, - 112.739282398012, - -178.304963670831, - -122.319932198534, - 59.0763464570905, - 151.704200334501, - 21.3763364190503, - -169.005476417779, - 118.980811786769, - -116.089295979010, - 9.64767870353308, - 60.8933243657771, - -156.021526862757, - -63.4602993325163, - 174.929787427393, - -175.288768596346, - -105.951907934032, - -50.1928304519800, - 131.358266702971, - -136.296815007542, - 93.5644603506407, - -97.0840116473627, - -169.158278888619, - -44.1323835471345, - 81.4795403841382, - ] -) - -T_DESIGN_11_ELE = np.array( - [ - 7.69254738757899, - -23.7300652200871, - 23.5127556185301, - 70.4225940747938, - -9.89694439538752, - -70.7513316063095, - -26.4618527647561, - 47.7764936689044, - -7.72047049524459, - 44.5343602375216, - 26.3897904767450, - -44.6578850137166, - 9.76703456924600, - -47.7053318175498, - 7.45302934155972, - -23.5901209534773, - 23.7194484034707, - 70.4382693912270, - -9.83541588740259, - -70.4980825105727, - -26.2949218109204, - 47.6148028805222, - -7.51718499746626, - 44.2862347125773, - 26.6442619674660, - -44.5693707254340, - 9.91271928508000, - -47.9599550372574, - 7.29679922953795, - -23.3445981426306, - 23.6415261666079, - 70.6843143997832, - -9.58140351749889, - -70.3934534122902, - -26.4258159091605, - 47.7510668062369, - -7.30853603036844, - 44.2632768570349, - 26.7140614474957, - -44.3149733480527, - 9.75899721561506, - -48.0361913333593, - 7.43965099805872, - -23.3326075548841, - 23.3868959687598, - 70.8219078016791, - -9.48596399169388, - -70.5801867828491, - -26.6740262349265, - 47.9978414043199, - -7.38276167631068, - 44.4970603752708, - 26.5024990214418, - -44.2461913308458, - 9.51845076548334, - -47.8281351088411, - 7.68427447425834, - -23.5706842106942, - 23.3074499244045, - 70.6586472132300, - -9.68088860263008, - -70.8026785673948, - -26.6963451935976, - 48.0136296461397, - -7.63734823159200, - 44.6651234222196, - 26.3023490002159, - -44.4576351865647, - 9.52341455917443, - -47.6242211091394, - ] -) diff --git a/scripts/pyaudio3dtools/hoadecoder.py b/scripts/pyaudio3dtools/hoadecoder.py deleted file mode 100644 index c37ceb3bdc..0000000000 --- a/scripts/pyaudio3dtools/hoadecoder.py +++ /dev/null @@ -1,186 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import logging -import os -from typing import Optional - -import numpy as np -from scipy.special import lpmv - -from pyaudio3dtools import spatialaudioformat -from pyaudio3dtools.constants import T_DESIGN_11_AZI, T_DESIGN_11_ELE -from pyaudio3dtools.EFAP import EFAP - -main_logger = logging.getLogger("__main__") -logger = main_logger.getChild(__name__) -logger.setLevel(logging.DEBUG) - - -def get_hoa_mtx( - ambi_order: int, - spkrlayout: spatialaudioformat, - norm: Optional[str] = "sn3d", - rE_weight: Optional[bool] = False, - intensity_panning: Optional[bool] = True, -) -> np.ndarray: - nharm = spatialaudioformat.Format.nchannels_from_ambiorder(ambi_order) - if spkrlayout.name == "MONO": - mtx_hoa_dec = np.zeros([1, nharm]) - mtx_hoa_dec[0, 0] = 1 - elif spkrlayout.name == "STEREO": - mtx_hoa_dec = np.zeros([2, nharm]) - # Cardioids +/- 90 degrees - mtx_hoa_dec[0, 0] = 0.5 - mtx_hoa_dec[0, 1] = 0.5 - mtx_hoa_dec[1, 0] = 0.5 - mtx_hoa_dec[1, 1] = -0.5 - elif spkrlayout.isloudspeaker: - Y_td = getRSH( - T_DESIGN_11_AZI, - T_DESIGN_11_ELE, - ambi_order, - norm="ortho", - ) - Y_td *= np.sqrt(4 * np.pi) - - n_ls_woLFE = spkrlayout.nchannels - len(spkrlayout.lfe_index) - ls_azi_woLFE = np.delete(spkrlayout.ls_azi, spkrlayout.lfe_index).astype(float) - ls_ele_woLFE = np.delete(spkrlayout.ls_ele, spkrlayout.lfe_index).astype(float) - - panner = EFAP(ls_azi_woLFE, ls_ele_woLFE, intensity_panning) - G_td = panner.pan(T_DESIGN_11_AZI, T_DESIGN_11_ELE) - - mtx_hoa_dec = (G_td.T @ Y_td.T) / T_DESIGN_11_AZI.size - - if norm == "sn3d": - mtx_hoa_dec = mtx_hoa_dec @ np.diag(sn2n(ambi_order)) - elif norm == "ortho": - mtx_hoa_dec *= np.sqrt(4 * np.pi) - - if rE_weight: - a_n = rE_weight(ambi_order) - nrg_pre = np.sqrt(len(n_ls_woLFE) / np.sum(a_n**2)) - mtx_hoa_dec = mtx_hoa_dec @ np.diag(a_n) * nrg_pre - - mtx_hoa_dec = np.insert( - mtx_hoa_dec, spkrlayout.lfe_index, np.zeros(nharm), axis=0 - ) - else: - raise ValueError( - f"Unsupported spatial audio format for ALLRAD: {spkrlayout.name}" - ) - - return mtx_hoa_dec - - -def hoa_linear_decoding(signal_in: np.ndarray, mtx_hoa_dec: np.ndarray) -> np.ndarray: - if not signal_in.shape[1] == mtx_hoa_dec.shape[1]: - raise Exception( - "Input number of channels must be equal to renderer matrix second dimension" - ) - - signal_out = np.dot(signal_in, mtx_hoa_dec.transpose()) - - logger.debug(f"Signal out: {signal_out.shape[0]} by {signal_out.shape[1]}") - - return signal_out - - -def rE_weight(order: int) -> np.ndarray: - return np.array( - [ - lpmv(0, l, np.cos(np.deg2rad(137.9) / (order + 1.51))) - for l in range(order + 1) - for _ in range(-l, l + 1) - ] - ).T - - -def n2sn(order: int) -> np.ndarray: - return np.array( - [1.0 / np.sqrt(2 * l + 1) for l in range(order + 1) for _ in range(-l, l + 1)] - ) - - -def sn2n(order: int) -> np.ndarray: - return np.array( - [np.sqrt(2 * l + 1) for l in range(order + 1) for _ in range(-l, l + 1)] - ) - - -def getRSH( - azi: np.ndarray, - ele: np.ndarray, - ambi_order: int, - norm: Optional[str] = "sn3d", - degrees: Optional[bool] = True, -) -> np.ndarray: - """ - Returns real spherical harmonic response for the given position(s) - """ - if degrees: - azi = np.deg2rad(azi) - ele = np.deg2rad(ele) - - LM = np.array([(l, m) for l in range(ambi_order + 1) for m in range(-l, l + 1)]) - - response = np.zeros([LM.shape[0], azi.shape[0]]) - - # trig_term * legendre * uncondon - for i, (l, m) in enumerate(LM): - # N3D norm - response[i, :] = np.sqrt( - ((2 * l + 1) * np.math.factorial(l - np.abs(m))) - / (4 * np.pi * np.math.factorial(l + np.abs(m))) - ) - - # trig term - if m < 0: - response[i, :] *= np.sqrt(2) * np.sin(azi * np.abs(m)) - elif m == 0: - pass # response[i,:] *= 1 - else: - response[i, :] *= np.sqrt(2) * np.cos(azi * m) - - # legendre polynomial - response[i, :] *= lpmv(np.abs(m), l, np.sin(ele)) * ((-1) ** np.abs(m)) - - if norm == "sn3d": - response *= np.sqrt(4 * np.pi) - response[:] = np.diag(n2sn(ambi_order)) @ response - elif norm == "n3d": - response *= np.sqrt(4 * np.pi) - else: - pass # ortho - - return response diff --git a/scripts/pyaudio3dtools/masarenderer.py b/scripts/pyaudio3dtools/masarenderer.py deleted file mode 100644 index ff9a3612a4..0000000000 --- a/scripts/pyaudio3dtools/masarenderer.py +++ /dev/null @@ -1,112 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - - -import os -import shutil -import subprocess as sp -from tempfile import TemporaryDirectory - -import numpy as np - -from pyaudio3dtools.audiofile import readfile, writefile -from pyaudio3dtools.spatialaudioformat import Format - - -def render_masa( - in_sig: str, - in_meta: str, - in_spfmt: Format, - out_spfmt: Format, -) -> np.ndarray: - """Python wrapper for masaRenderer binaray - - Parameters - ---------- - in_sig: np.ndarray - Input signal with MASA transport channels - in_meta: str - Input MASA metadata file - in_spfmt: Format - Input spatial audio format - out_spfmt: Format - Output spatial audio format - - Returns - ------- - out_sig: np.ndarray - Rendered signal - fs : int - Sampling frequency (always 48 kHz for masaRenderer) - """ - - if shutil.which("masaRenderer") is None: - raise FileNotFoundError("The masaRenderer binary was not found in path!") - - with TemporaryDirectory() as tmp_dir: - MASA_RENDERER_CMD = [ - "masaRenderer", - "", # outputMode -LS51, -LS714 or BINAURAL - "", # input PCM - in_meta[0], - "", # output PCM - ] - - cmd = MASA_RENDERER_CMD[:] - if out_spfmt.name.startswith("BINAURAL"): - cmd[1] = "-BINAURAL" - out_nchan = 2 - elif out_spfmt.name == "5_1": - cmd[1] = "-LS51" - out_nchan = 6 - else: - cmd[1] = "-LS714" - out_nchan = 12 - - tmp_in = os.path.join(tmp_dir, "tmp_masa_in.pcm") - tmp_out = os.path.join(tmp_dir, "tmp_masa_out.pcm") - - cmd[2] = tmp_in - cmd[4] = tmp_out - - writefile(tmp_in, in_sig, 48000) - - try: - result = sp.run(cmd, check=True, capture_output=True, text=True) - except sp.CalledProcessError as e: - raise SystemError( - f"Command returned non-zero exit status ({e.returncode}): {' '.join(e.cmd)}\n{e.stderr}\n{e.stdout}" - ) - - out_sig, _ = readfile(tmp_out, out_nchan, 48000) - - return out_sig diff --git a/scripts/pyaudio3dtools/quaternions/__init__.py b/scripts/pyaudio3dtools/quaternions/__init__.py deleted file mode 100644 index 8f1f04c369..0000000000 --- a/scripts/pyaudio3dtools/quaternions/__init__.py +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -""" -Quaternions -==== - -Provides - Handling of quaternions in the same conventions as in IVAS and the Matlab scripts - -Imports -------- -functions -""" -from . import functions diff --git a/scripts/pyaudio3dtools/quaternions/functions.py b/scripts/pyaudio3dtools/quaternions/functions.py deleted file mode 100644 index 6b30ccb5ee..0000000000 --- a/scripts/pyaudio3dtools/quaternions/functions.py +++ /dev/null @@ -1,183 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -from typing import Tuple -import numpy as np - - -def Quat2Euler(quat: np.ndarray, degrees: bool = True): - "Convert Quaternion to Euler angles" - - sinr = +2.0 * (quat[..., 0] * quat[..., 1] + quat[..., 2] * quat[..., 3]) - cosr = +1.0 - 2.0 * (quat[..., 1] * quat[..., 1] + quat[..., 2] * quat[..., 2]) - roll = np.arctan2(sinr, cosr) - - sinp = +2.0 * (quat[..., 0] * quat[..., 2] - quat[..., 3] * quat[..., 1]) - pitch = np.where(np.fabs(sinp) >= 1, np.copysign(np.pi / 2, sinp), np.arcsin(sinp)) - - siny = +2.0 * (quat[..., 0] * quat[..., 3] + quat[..., 1] * quat[..., 2]) - cosy = +1.0 - 2.0 * (quat[..., 2] * quat[..., 2] + quat[..., 3] * quat[..., 3]) - yaw = np.arctan2(siny, cosy) - - ypr = np.array([yaw, pitch, roll]).T - - if degrees: - ypr = np.rad2deg(ypr) - - return ypr - - -def Euler2Quat(ypr: np.ndarray, degrees: bool = True): - "Convert Euler angles to Quaternion" - - if degrees: - ypr = np.deg2rad(ypr) - - if len(ypr.shape) == 2: - N_quat = ypr.shape[0] - quat = np.zeros([N_quat, 4]) - yaw = ypr[:, 0] - pitch = ypr[:, 1] - roll = ypr[:, 2] - else: - quat = np.zeros([4]) - yaw = ypr[0] - pitch = ypr[1] - roll = ypr[2] - - c1 = np.cos(0.5 * yaw) - c2 = np.cos(0.5 * pitch) - c3 = np.cos(0.5 * roll) - - s1 = np.sin(0.5 * yaw) - s2 = np.sin(0.5 * pitch) - s3 = np.sin(0.5 * roll) - - quat[..., 0] = c3 * c2 * c1 + s3 * s2 * s1 - quat[..., 1] = s3 * c2 * c1 - c3 * s2 * s1 - quat[..., 2] = s3 * c2 * s1 + c3 * s2 * c1 - quat[..., 3] = c3 * c2 * s1 - s3 * s2 * c1 - - return quat - - -def Quat2RotMat(quat: np.ndarray): - "Convert quaternion to rotation matrix" - - R = np.zeros([3, 3]) - - if quat[0] != -3: - - # Quaternions - # formula taken from ivas_rotation.c - - R[0, 0] = ( - quat[0] * quat[0] - + quat[1] * quat[1] - - quat[2] * quat[2] - - quat[3] * quat[3] - ) - R[0, 1] = 2.0 * (quat[1] * quat[2] - quat[0] * quat[3]) - R[0, 2] = 2.0 * (quat[1] * quat[3] + quat[0] * quat[2]) - - R[1, 0] = 2.0 * (quat[1] * quat[2] + quat[0] * quat[3]) - R[1, 1] = ( - quat[0] * quat[0] - - quat[1] * quat[1] - + quat[2] * quat[2] - - quat[3] * quat[3] - ) - R[1, 2] = 2.0 * (quat[2] * quat[3] - quat[0] * quat[1]) - - R[2, 0] = 2.0 * (quat[1] * quat[3] - quat[0] * quat[2]) - R[2, 1] = 2.0 * (quat[2] * quat[3] + quat[0] * quat[1]) - R[2, 2] = ( - quat[0] * quat[0] - - quat[1] * quat[1] - - quat[2] * quat[2] - + quat[3] * quat[3] - ) - - else: - - # Euler angles in R_X(roll)*R_Y(pitch)*R_Z(yaw) convention - # - # yaw: rotate scene counter-clockwise in the horizontal plane - # pitch: rotate scene in the median plane, increase elevation with positive values - # roll: rotate scene from the right ear to the top - # - # formula taken from ivas_rotation.c - - c1 = np.cos(quat[3] / 180.0 * np.pi) - c2 = np.cos(quat[2] / 180.0 * np.pi) - c3 = np.cos(quat[1] / 180.0 * np.pi) - - s1 = np.sin(quat[3] / 180.0 * np.pi) - s2 = np.sin(-quat[2] / 180.0 * np.pi) - s3 = np.sin(quat[1] / 180.0 * np.pi) - - R[0, 0] = c2 * c3 - R[0, 1] = -c2 * s3 - R[0, 2] = s2 - - R[1, 0] = c1 * s3 + c3 * s1 * s2 - R[1, 1] = c1 * c3 - s1 * s2 * s3 - R[1, 2] = -c2 * s1 - - R[2, 0] = s1 * s3 - c1 * c3 * s2 - R[2, 1] = c3 * s1 + c1 * s2 * s3 - R[2, 2] = c1 * c2 - - return R - - -def rotateAziEle( - azi: float, ele: float, R: np.ndarray, is_planar: bool = False -) -> Tuple[float, float]: - w = np.cos(np.deg2rad(ele)) - dv = np.array( - [ - w * np.cos(np.deg2rad(azi)), - w * np.sin(np.deg2rad(azi)), - np.sin(np.deg2rad(ele)), - ] - ) - - dv_rot = R @ dv - - azi = np.rad2deg(np.arctan2(dv_rot[1], dv_rot[0])) - if is_planar: - ele = 0 - else: - ele = np.rad2deg(np.arctan2(dv_rot[2], np.sqrt(np.sum(dv_rot[:2] ** 2)))) - - return azi, ele diff --git a/scripts/pyaudio3dtools/rotation.py b/scripts/pyaudio3dtools/rotation.py deleted file mode 100644 index cbd76aef3f..0000000000 --- a/scripts/pyaudio3dtools/rotation.py +++ /dev/null @@ -1,346 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import numpy as np - -from pyaudio3dtools import EFAP, spatialaudioformat -from pyaudio3dtools.constants import * -from pyaudio3dtools.quaternions.functions import Quat2RotMat, rotateAziEle - -######################################################################### -# Helper functions used by Ruedenberg, -# an implementation of the algorithm in -# Ivanic, J. & Ruedenberg, K., J. Phys. Chem. 100, 6342 (1996) -# translated from ivas_rotation.c -######################################################################### - - -def SHrot_p( - i: int, l: int, a: int, b: int, SHrotmat: np.ndarray, R_lm1: np.ndarray -) -> float: - """Helper function to calculate the ps""" - - ri1 = SHrotmat[i + 1 + 1][1 + 1 + 1] - rim1 = SHrotmat[i + 1 + 1][-1 + 1 + 1] - ri0 = SHrotmat[i + 1 + 1][0 + 1 + 1] - - if b == -l: - R_lm1_1 = R_lm1[a + l - 1][0] - R_lm1_2 = R_lm1[a + l - 1][2 * l - 2] - p = ri1 * R_lm1_1 + rim1 * R_lm1_2 - else: - if b == l: - R_lm1_1 = R_lm1[a + l - 1][2 * l - 2] - R_lm1_2 = R_lm1[a + l - 1][0] - p = ri1 * R_lm1_1 - rim1 * R_lm1_2 - else: - R_lm1_1 = R_lm1[a + l - 1][b + l - 1] - p = ri0 * R_lm1_1 - - return p - - -def SHrot_u(l: int, m: int, n: int, SHrotmat: np.ndarray, R_lm1: np.ndarray) -> float: - """Helper function to calculate the us""" - return SHrot_p(0, l, m, n, SHrotmat, R_lm1) - - -def SHrot_v(l: int, m: int, n: int, SHrotmat: np.ndarray, R_lm1: np.ndarray) -> float: - """Helper function to calculate the vs""" - - if m == 0: - p0 = SHrot_p(1, l, 1, n, SHrotmat, R_lm1) - p1 = SHrot_p(-1, l, -1, n, SHrotmat, R_lm1) - return p0 + p1 - else: - if m > 0: - d = 1.0 if (m == 1) else 0.0 - p0 = SHrot_p(1, l, m - 1, n, SHrotmat, R_lm1) - p1 = SHrot_p(-1, l, -m + 1, n, SHrotmat, R_lm1) - return p0 * np.sqrt(1.0 + d) - p1 * (1.0 - d) - else: - d = 1.0 if (m == -1) else 0.0 - p0 = SHrot_p(1, l, m + 1, n, SHrotmat, R_lm1) - p1 = SHrot_p(-1, l, -m - 1, n, SHrotmat, R_lm1) - return p0 * (1.0 - d) + p1 * np.sqrt(1.0 + d) - - -def SHrot_w(l: int, m: int, n: int, SHrotmat: np.ndarray, R_lm1: np.ndarray) -> float: - """Helper function to calculate the w""" - if m == 0: - raise ValueError("ERROR should not be called\n") - else: - if m > 0: - p0 = SHrot_p(1, l, m + 1, n, SHrotmat, R_lm1) - p1 = SHrot_p(-1, l, -m - 1, n, SHrotmat, R_lm1) - return p0 + p1 - else: - p0 = SHrot_p(1, l, m - 1, n, SHrotmat, R_lm1) - p1 = SHrot_p(-1, l, -m + 1, n, SHrotmat, R_lm1) - return p0 - p1 - - -######################################## -# SHD rotation matrix calculation -# translated from ivas_rotation.c -######################################## -def SHrotmatgen(R: np.ndarray, order: int = 3) -> np.ndarray: - """Calculate SHD roatation matrix from that in real space - - Parameters: - ---------- - R: np.ndarray - real-space rotation matrix - - order: Optional[int] - Ambisonics order, default = 3 - - Returns: - ---------- - SHrotmat: np.ndarray - SHD rotation matrix - - """ - dim = (order + 1) * (order + 1) - - SHrotmat = np.zeros([dim, dim]) - R_lm1 = np.zeros([dim, dim]) - R_l = np.zeros([dim, dim]) - - SHrotmat[0][0] = 1.0 - - SHrotmat[1][1] = R[1][1] - SHrotmat[1][2] = R[1][2] - SHrotmat[1][3] = R[1][0] - - SHrotmat[2][1] = R[2][1] - SHrotmat[2][2] = R[2][2] - SHrotmat[2][3] = R[2][0] - - SHrotmat[3][1] = R[0][1] - SHrotmat[3][2] = R[0][2] - SHrotmat[3][3] = R[0][0] - - for i in range(2 * 1 + 1): - for j in range(2 * 1 + 1): - R_lm1[i][j] = SHrotmat[i + 1][j + 1] - - band_idx = 4 - for l in range(2, order + 1): - - R_l[:, :] = 0.0 - - for m in range(-l, l + 1): - - d = 1 if (m == 0) else 0 - absm = abs(m) - sql2mm2 = np.sqrt((l * l - m * m)) - sqdabsm = np.sqrt(((1 + d) * (l + absm - 1) * (l + absm))) - sqlabsm = np.sqrt(((l - absm - 1) * (l - absm))) - - for n in range(-l, l + 1): - if abs(n) == l: - sqdenom = np.sqrt((2 * l) * (2 * l - 1)) - else: - sqdenom = np.sqrt(l * l - n * n) - - u = sql2mm2 / sqdenom - v = sqdabsm / sqdenom * (1 - 2 * d) * 0.5 - w = sqlabsm / sqdenom * (1 - d) * (-0.5) - - if u != 0: - u = u * SHrot_u(l, m, n, SHrotmat, R_lm1) - if v != 0: - v = v * SHrot_v(l, m, n, SHrotmat, R_lm1) - if w != 0: - w = w * SHrot_w(l, m, n, SHrotmat, R_lm1) - R_l[m + l][n + l] = u + v + w - - for i in range(2 * l + 1): - for j in range(2 * l + 1): - SHrotmat[band_idx + i][band_idx + j] = R_l[i][j] - - for i in range(2 * l + 1): - for j in range(2 * l + 1): - R_lm1[i][j] = R_l[i][j] - - band_idx += 2 * l + 1 - - return SHrotmat - - -def rotateHOA(x: np.ndarray, trajectory: str) -> np.ndarray: - """Rotate HOA signal by applying a rotation matrix calculated from the current quaternion - in each subframe - - Parameters: - ---------- - x: np.ndarray - input signal upto HOA3 - trajectory: str - path to trajectory file - - Returns: - ---------- - y: np.ndarray - rotated HOA signal - """ - - trj_data = np.genfromtxt(trajectory, delimiter=",") - trj_frames = trj_data.shape[0] - - sig_len = x.shape[0] - sig_dim = x.shape[1] - frame_len = (IVAS_FRAME_LEN_MS // 4) * 48 - N_frames = int(sig_len / frame_len) - - if sig_dim not in [4, 9, 16]: - raise ValueError("rotateHOA can only handle FOA, HOA2 or HOA3 signals!") - - y = np.zeros([sig_len, sig_dim]) - - fade_in = np.arange(frame_len) / (frame_len - 1) - fade_in = fade_in[:, np.newaxis] - fade_out = 1.0 - fade_in - - R = np.eye(sig_dim) - R_old = np.eye(sig_dim) - for i_frame in range(N_frames): - - i1 = i_frame * frame_len - i2 = (i_frame + 1) * frame_len - - q1 = trj_data[i_frame % trj_frames, :] - R_r = Quat2RotMat(q1) - R[:, :] = SHrotmatgen(R_r, order=int(np.sqrt(sig_dim)) - 1) - - frame_in = x[i1:i2, :] - frame_out = y[i1:i2, :] - - frame_out[:, :] = (fade_in * frame_in @ R.T) + (fade_out * frame_in @ R_old.T) - - R_old[:, :] = R.copy() - - return y - - -def rotateISM( - azi: np.ndarray, - ele: np.ndarray, - trajectory: str = None, -) -> tuple: - - if trajectory is None: - return azi, ele - - trj_data = np.genfromtxt(trajectory, delimiter=",") - trj_frames = trj_data.shape[0] - - N_frames = azi.shape[0] - if ele.shape[0] != azi.shape[0]: - raise ValueError("Inconsistent input in azi and ele") - - azi_rot = np.zeros([N_frames]) - ele_rot = np.zeros([N_frames]) - - for i_frame in range(N_frames): - q = trj_data[i_frame % trj_frames, :] - azi_rot[i_frame], ele_rot[i_frame] = rotateAziEle( - azi[i_frame], ele[i_frame], Quat2RotMat(q) - ) - - return azi_rot, ele_rot - - -def rotateMC(x: np.ndarray, trajectory: str, layout: spatialaudioformat) -> np.ndarray: - """Rotate MC signal by applying a rotation matrix calculated from the current quaternion - in each subframe - - Parameters: - ---------- - x: np.ndarray - input multichannel signal - trajectory: str - path to trajectory file - - Returns: - ---------- - y: np.ndarray - rotated multichannel signal - """ - - # TODO needs optimization, currently slow - trj_data = np.genfromtxt(trajectory, delimiter=",") - trj_frames = trj_data.shape[0] - - sig_len = x.shape[0] - sig_dim = x.shape[1] - frame_len = (IVAS_FRAME_LEN_MS // 4) * 48 - N_frames = int(sig_len / frame_len) - - y = np.zeros([sig_len, sig_dim]) - - # TODO LFE handling here - panner = EFAP.EFAP(layout.ls_azi, layout.ls_ele) - - fade_in = np.arange(frame_len) / (frame_len - 1) - fade_in = fade_in[:, np.newaxis] - fade_out = 1.0 - fade_in - - R = np.eye(layout.nchannels) - R_old = np.eye(layout.nchannels) - - for i_frame in range(N_frames): - - start = i_frame * frame_len - end = (i_frame + 1) * frame_len - - q = trj_data[i_frame % trj_frames, :] - - rotated_pos = np.array( - [ - rotateAziEle(a, e, Quat2RotMat(q)) - for a, e in zip(layout.ls_azi, layout.ls_ele) - ] - ) - R = panner.pan(rotated_pos[:, 0], rotated_pos[:, 1]) - R[:, layout.lfe_index] = np.zeros([layout.nchannels, 1]) - R[layout.lfe_index, layout.lfe_index] = 1 - - frame_in = x[start:end, :] - frame_out = y[start:end, :] - - frame_out[:, :] = (fade_in * frame_in @ R) + (fade_out * frame_in @ R_old) - - R_old = R.copy() - - return y diff --git a/scripts/pyaudio3dtools/spatialaudioconvert.py b/scripts/pyaudio3dtools/spatialaudioconvert.py deleted file mode 100644 index 0040f28ace..0000000000 --- a/scripts/pyaudio3dtools/spatialaudioconvert.py +++ /dev/null @@ -1,570 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import logging -import os -import warnings -from typing import Optional, Tuple - -import numpy as np - -from pyaudio3dtools import ( - EFAP, - audioarray, - audiofile, - binauralrenderer, - hoadecoder, - masarenderer, - spatialaudioformat, - spatialmetadata, -) -from pyaudio3dtools.constants import * - -main_logger = logging.getLogger("__main__") -logger = main_logger.getChild(__name__) -logger.setLevel(logging.DEBUG) - - -def spatial_audio_convert( - in_file: str, - out_file: str, - in_format: Optional[str] = None, - in_fs: Optional[int] = None, - in_nchans: Optional[int] = None, - in_meta_files: Optional[list] = None, - out_format: Optional[str] = None, - out_fs: Optional[int] = None, - out_fc: Optional[int] = None, - output_loudness: Optional[int] = None, - loudness_tool: Optional[str] = None, - limit_output: Optional[bool] = False, - cut_preamble_s: Optional[int] = None, - trajectory: Optional[str] = None, - bin_rend_include_LFE: Optional[bool] = True, - bin_rend_LFE_gain: Optional[float] = 10 ** (5.5 / 20), - binaural_dataset: Optional[str] = "orange53", -) -> Tuple[np.ndarray, int]: - """ - Spatial audio conversion between various formats - - Parameters - ---------- - in_file: str - input filename - out_file: str - output filename - - in_format: Optional[str] - input spatial audio format - in_fs: Optional[int] - input sampling frequency - in_nchans: Optional[int] - input number of channels (deduced for .wav) - - out_format: Optional[str] - output spatial audio format - out_fs: Optional[int] - output sampling frequency - out_fc: Optional[int] - output cutoff frequency (low-pass filtering) - - output_loudness: Optional[int] - Loudness level in LKFS/dBov - loudness_tool: Optional[str] - Loudness tool to use. Must be in $PATH. - Supported tools: - ITU-R BS.1770-4 / "bs1770demo" (default) - ITU-T P.56 / "sv56demo" - - limit_output: Optional[bool] - flag whether to apply limiting to the output - cut_preamble_s: Optional[int] - preamble to cut in seconds - - trajectory: Optional[str] - head rotation trajectory file (for binaural rendering) - bin_rend_include_LFE: Optional[bool] - flag to include LFE in binaural rendering - bin_rend_LFE_gain: Optional[float] - gain to apply for LFE in binaural rendering - binaural_dataset: Optional[str] - dataset for binaural HRIR or BRIRs - - Returns - ------- - out_sig : np.ndarray - output signal - out_fs : int - output sampling frequency - """ - - """ get spatial input and audio format configurations """ - if in_format is None: - if in_nchans is not None: - in_format = spatialaudioformat.Format.detect_format(in_nchans) - in_spfmt = spatialaudioformat.Format(in_format) - logger.info(f" Input spatial audio format detected: {in_format}") - else: - logger.info(f" Input spatial audio format: {in_format}") - in_spfmt = spatialaudioformat.Format(in_format) - - if out_format is None: - out_format = in_format - logger.info( - f" Output spatial audio format not specified, defaulting to pass-through: {out_format}" - ) - out_spfmt = spatialaudioformat.Format(out_format) - - """ read input file """ - # Input is either waveform file (.pcm or .wav) or iis metadata (.txt) - _, input_ext = os.path.splitext(os.path.basename(in_file)) - - if input_ext == ".pcm": - if in_fs is None: - if out_fs: - in_fs = out_fs - else: - raise ValueError("Input and output fs not defined.") - if in_nchans is None: - if in_spfmt is not None: - in_nchans = in_spfmt.nchannels - else: - raise ValueError( - "Number of input channels not defined and can't be deduced." - ) - in_sig, in_fs = audiofile.readfile(in_file, fs=in_fs, nchannels=in_nchans) - elif input_ext == ".wav": - in_sig, in_fs = audiofile.readfile(in_file) - if in_format is None: - in_format = spatialaudioformat.Format.detect_format(in_sig.shape[1]) - in_spfmt = spatialaudioformat.Format(in_format) - - # Adjust number of channels if case of HOA, zeroed vert channels if planar - if in_spfmt.ambi_order > 0: - in_sig = audioarray.convert(in_sig, out_nchans=in_spfmt.nchannels) - elif input_ext == ".txt": - metadata_obj = spatialmetadata.Metadata(in_file, audio_fs=in_fs) - in_sig, in_fs = metadata_obj.get_audio_array() - if in_spfmt.name != "META": - logger.info( - f" {in_spfmt.name} specified with .txt input file: overriding to META format" - ) - in_format = "META" - in_spfmt = spatialaudioformat.Format(in_format) - else: - raise Exception(f"Not supported file {input_ext}") - _, in_nchans = in_sig.shape - - """ convert metadata based formats (ISM / META) directly to output format """ - if in_spfmt.name.startswith("META") or in_spfmt.name.startswith("ISM"): - if out_spfmt.name.startswith("META"): - raise Exception("out format must be specified for META (.txt) or ISM input") - - if in_spfmt.name.startswith("ISM"): - if in_meta_files is None: - raise ValueError( - f"Please specify a list of metadata files for {in_spfmt.name}" - ) - if len(in_meta_files) != int(in_spfmt.name[-1]): - raise ValueError( - f"Mismatch between number of streams and number of specified metadata files for {in_spfmt.name}" - ) - - # initialise metadata object for ISM - metadata_obj = spatialmetadata.Metadata() - metadata_obj.init_for_ism(in_file, in_fs, in_nchans, in_meta_files) - - # TODO decide on reference path for BINAURAL_ROOM - if out_spfmt.name.startswith("BINAURAL_ROOM"): - in_format = "7_1_4" - else: - in_format = out_format - in_spfmt = spatialaudioformat.Format(in_format) - - else: - # set input format to output format - # render_meta() handles all conversions - in_format = out_format - in_spfmt = out_spfmt - - in_sig = render_meta( - metadata_obj, - in_spfmt, - dataset=binaural_dataset, - fs=in_fs, - trajectory=trajectory, - include_LFE=bin_rend_include_LFE, - LFE_gain=bin_rend_LFE_gain, - ) - - """ cut preamble """ - if cut_preamble_s is not None: - samples_to_cut = int(cut_preamble_s * in_fs) - if samples_to_cut > 0: - logger.info(f" Cut preample by {samples_to_cut} samples") - in_sig = audioarray.cut(in_sig, (samples_to_cut, -1)) - - """ zero non-planar input ambisonics channels """ - if in_spfmt.ambi_order > 0 and in_spfmt.isplanar: - in_sig = spatialaudioformat.Format.zero_vert_hoa_channels(in_sig) - - """ Spatial audio format conversion """ - out_sig = in_sig - if (in_spfmt.name != out_spfmt.name) and not ( - in_spfmt.isheadphones and out_spfmt.isheadphones - ): - logger.info(f" {in_spfmt.name} -> {out_spfmt.name}") - - # binaural output (except MASA) - if out_spfmt.name.startswith("BINAURAL") and not in_spfmt.name.startswith( - "MASA" - ): - out_sig = binauralrenderer.binaural_rendering( - in_sig, - in_spfmt, - out_spfmt, - dataset=binaural_dataset, - fs=in_fs, - trajectory=trajectory, - include_LFE=bin_rend_include_LFE, - LFE_gain=bin_rend_LFE_gain, - ) - # non-binaural outputs - # HOA conversion - elif in_spfmt.ambi_order > 0: - out_sig = convert_sba(in_sig, in_spfmt, out_spfmt) - - # MC conversion - elif in_spfmt.isloudspeaker: - out_sig = convert_mc(in_sig, in_spfmt, out_spfmt) - - # MASA conversion - elif in_spfmt.name.startswith("MASA"): - out_sig = convert_masa(in_sig, in_fs, in_meta_files, in_spfmt, out_spfmt) - else: - raise NotImplementedError( - f"{in_spfmt.name} -> {out_spfmt.name}: format conversion not implemented" - ) - - """ zero non-planar output ambisonics channels """ - if out_spfmt.ambi_order > 0 and out_spfmt.isplanar: - out_sig = spatialaudioformat.Format.zero_vert_hoa_channels(out_sig) - - """ resampling """ - if (out_fs is not None) and (out_fs != in_fs): - out_sig = audioarray.resample(out_sig, in_fs, out_fs) - else: - out_fs = in_fs - - """ low-pass filtering """ - if out_fc is not None: - logger.info(f" Low-pass filter ({out_fc}Hz)") - out_sig = audioarray.lpfilter(out_sig, out_fc, out_fs) - - """ limiting """ - if limit_output: - logger.info(" apply limiter") - audioarray.limiter(out_sig, out_fs) - - """ loudness normalization """ - if output_loudness: - _, scale_factor = audiofile.loudnessinfo( - out_sig, - out_fs, - out_format, - output_loudness=output_loudness, - loudness_tool=loudness_tool, - ) - out_sig *= scale_factor - - audiofile.writefile(out_file, out_sig, out_fs) - - return out_sig, out_fs - - -def convert_sba( - in_sig: np.ndarray, - in_spfmt: spatialaudioformat.Format, - out_spfmt: spatialaudioformat.Format, -) -> np.ndarray: - """Convert an ambisonics signal to the requested output format""" - # HOA -> LS - if out_spfmt.isloudspeaker: - HOA2LS = hoadecoder.get_hoa_mtx(in_spfmt.ambi_order, out_spfmt) - return hoadecoder.hoa_linear_decoding(in_sig, HOA2LS) - # HOA -> HOA - elif out_spfmt.ambi_order > 0: - return audioarray.convert(in_sig, in_fs=None, out_nchans=out_spfmt.nchannels) - else: - raise NotImplementedError( - f"{in_spfmt.name} -> {out_spfmt.name}: format conversion not implemented" - ) - - -def convert_mc( - in_sig: np.ndarray, - in_spfmt: spatialaudioformat.Format, - out_spfmt: spatialaudioformat.Format, -) -> np.ndarray: - """Convert a multichannel signal to the requested output format""" - # MC -> LS - if in_spfmt.name == "STEREO" and out_spfmt.name == "MONO": - MC2LS = np.vstack([[0.5], [0.5]]) - return in_sig @ MC2LS - elif out_spfmt.isloudspeaker: - try: - MC2LS = IVAS_MC_CONVERSION[in_spfmt.name][out_spfmt.name] - except KeyError: - ls_azi_woLFE = np.delete(out_spfmt.ls_azi, out_spfmt.lfe_index).astype( - float - ) - ls_ele_woLFE = np.delete(out_spfmt.ls_ele, out_spfmt.lfe_index).astype( - float - ) - - panner = EFAP.EFAP(ls_azi_woLFE, ls_ele_woLFE) - - MC2LS = np.vstack( - [ - panner.pan(a, e).T - for i, (a, e) in enumerate(zip(in_spfmt.ls_azi, in_spfmt.ls_ele)) - if i not in in_spfmt.lfe_index - ] - ) - # TODO tmu : implement configurable LFE handling - # pass-through for LFE - MC2LS = np.insert(MC2LS, in_spfmt.lfe_index, 0, axis=0) - MC2LS = np.insert(MC2LS, out_spfmt.lfe_index, 0, axis=1) - MC2LS[in_spfmt.lfe_index, out_spfmt.lfe_index] = 1 - - # TODO tmu temporarily disable LFE rendering to MONO/STEREO - if out_spfmt.name == "MONO" or out_spfmt.name == "STEREO": - MC2LS[in_spfmt.lfe_index, :] = 0 - return in_sig @ MC2LS - # MC -> HOA - elif out_spfmt.ambi_order > 0: - # SH response for loudspeaker positions - MC2HOA = np.hstack( - [ - hoadecoder.getRSH([a], [e], out_spfmt.ambi_order) - for a, e in zip(in_spfmt.ls_azi, in_spfmt.ls_ele) - ] - ).T - - # do not add LFE to output - MC2HOA[in_spfmt.lfe_index] = 0 - - return in_sig @ MC2HOA - else: - raise NotImplementedError( - f"{in_spfmt.name} -> {out_spfmt.name}: format conversion not implemented" - ) - - -def convert_ism( - in_sig: np.ndarray, - in_fs: int, - in_pos: dict, - in_spfmt: spatialaudioformat.Format, - out_spfmt: spatialaudioformat.Format, -) -> np.ndarray: - """Convert an ISM signal to the requested output format""" - pos_data = [] - for pos in in_pos: - pos_data.extend( - [pos["azimuth"], pos["elevation"]] for _ in range(pos["use_for_frames"]) - ) - pos_data = np.array(pos_data) - pos_frames = pos_data.shape[0] - - sig_len = in_sig.shape[0] - frame_len = IVAS_FRAME_LEN_MS * (in_fs // 1000) - - out_sig = np.zeros([sig_len, out_spfmt.nchannels]) - - fade_in = np.arange(frame_len) / (frame_len - 1) - fade_in = fade_in[:, np.newaxis] - fade_out = 1.0 - fade_in - - if out_spfmt.isloudspeaker: - ls_azi_woLFE = np.delete(out_spfmt.ls_azi, out_spfmt.lfe_index) - ls_ele_woLFE = np.delete(out_spfmt.ls_ele, out_spfmt.lfe_index) - panner = EFAP.EFAP(ls_azi_woLFE, ls_ele_woLFE) - - gains_old = None - - for i_frame, (in_frame, out_frame) in enumerate( - zip( - audioarray.get_framewise(in_sig, frame_len), - audioarray.get_framewise(out_sig, frame_len), - ) - ): - # update the crossfade if we have a smaller last frame - if out_frame.shape[0] != frame_len: - frame_size = out_frame.shape[0] - fade_in = np.arange(frame_size) / (frame_size - 1) - fade_in = fade_in[:, np.newaxis] - fade_out = 1.0 - fade_in - - pos = EFAP.wrap_angles(*pos_data[i_frame % pos_frames, :], clip_ele=True) - - # ISM -> MC - if out_spfmt.isloudspeaker: - gains = panner.pan(pos[0], pos[1]) - gains = np.insert(gains, out_spfmt.lfe_index, 0) - gains = gains[:, np.newaxis] - # ISM -> HOA - elif out_spfmt.ambi_order > 0: - gains = hoadecoder.getRSH([pos[0]], [pos[1]], out_spfmt.ambi_order) - else: - raise NotImplementedError( - f"{in_spfmt.name} -> {out_spfmt.name}: format conversion not implemented" - ) - - if gains_old is None: - gains_old = gains.copy() - - out_frame[:] = (fade_in * in_frame @ gains.T) + ( - fade_out * in_frame @ gains_old.T - ) - - gains_old = gains.copy() - - return out_sig - - -def convert_masa( - in_sig: np.ndarray, - in_fs: int, - in_meta: str, - in_spfmt: spatialaudioformat.Format, - out_spfmt: spatialaudioformat.Format, -) -> np.ndarray: - """Convert a MASA signal to the requested output format""" - - if in_fs != 48000: - raise ValueError(f"{in_spfmt.name} rendering only support for 48kHz!") - - tmp_spfmt = out_spfmt - - # MASA -> LS - if out_spfmt.isloudspeaker: - if not (out_spfmt.name == "5_1" or out_spfmt.name == "7_1_4"): - tmp_spfmt = spatialaudioformat.Format("7_1_4") - warnings.warn( - f"{out_spfmt.name} not natively supported by masaRenderer, using {tmp_spfmt.name} as intermediate format" - ) - # MASA -> HOA - elif out_spfmt.ambi_order > 0: - tmp_spfmt = spatialaudioformat.Format("7_1_4") - warnings.warn( - f"{out_spfmt.name} not natively supported by masaRenderer, using {tmp_spfmt.name} as intermediate format" - ) - elif out_spfmt.name == "BINAURAL": - warnings.warn( - f"Using masaRenderer for rendering; any binaural_dataset setting will be ignored!" - ) - else: - raise NotImplementedError( - f"{in_spfmt.name} -> {out_spfmt.name}: format conversion not implemented" - ) - - out_sig = masarenderer.render_masa(in_sig, in_meta, in_spfmt, tmp_spfmt) - - # conversion done - if tmp_spfmt.name == out_spfmt.name: - return out_sig - # only rendered an intermediate format, more conversion needed - else: - return convert_mc(out_sig, tmp_spfmt, out_spfmt) - - -def render_meta( - metadata_obj: spatialmetadata.Metadata, - dest_fmt: spatialaudioformat.Format, - dataset: str, - fs: int, - trajectory: str, - include_LFE: bool = False, - LFE_gain: float = 10 ** (5.5 / 20), -) -> np.ndarray: - """Render mixed scene metadata to the desired format""" - - logger.info(f" META -> {dest_fmt.name}") - - out_sig = np.zeros([metadata_obj.audio_array.shape[0], dest_fmt.nchannels]) - - for object in metadata_obj.objects: - # extract object signal - start = object["track_index"] - stop = start + object["nb_tracks"] - obj_sig = metadata_obj.audio_array[:, start:stop] - # apply gain - if hasattr(object, "gain"): - obj_sig *= object["gain"] - - if dest_fmt.name.startswith("BINAURAL"): - if object["input_type"] == "ism": - src_format = spatialaudioformat.Format(f"ISM") - positions = object["positions"] - if object["input_type"] == "sba": - src_format = spatialaudioformat.Format(f"SBA{object['order']}") - positions = None - elif object["input_type"] == "mc": - src_format = spatialaudioformat.Format(f"CICP{object['cicp_index']}") - positions = None - - out_sig += binauralrenderer.binaural_rendering( - obj_sig, - src_format, - dest_fmt, - dataset=dataset, - fs=fs, - trajectory=trajectory, - include_LFE=include_LFE, - LFE_gain=LFE_gain, - in_pos=positions, - ) - else: - if object["input_type"] == "ism": - src_format = spatialaudioformat.Format("ISM") - out_sig += convert_ism( - obj_sig, fs, object["positions"], src_format, dest_fmt - ) - elif object["input_type"] == "sba": - src_format = object["format"] - out_sig += convert_sba(obj_sig, src_format, dest_fmt) - elif object["input_type"] == "mc": - src_format = object["format"] - out_sig += convert_mc(obj_sig, src_format, dest_fmt) - - return out_sig diff --git a/scripts/pyaudio3dtools/spatialaudioformat.py b/scripts/pyaudio3dtools/spatialaudioformat.py deleted file mode 100644 index 524e4d61f5..0000000000 --- a/scripts/pyaudio3dtools/spatialaudioformat.py +++ /dev/null @@ -1,470 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import os - -import numpy as np - -_format_configs = { - "MONO": { - "name": "MONO", - "ambi_order": 0, - "isplanar": False, - "nchannels": 1, - "isloudspeaker": True, - "isheadphones": False, - "ls_azi": [0], - "ls_ele": [0], - "lfe_index": [], - "altname": "HOA0", - }, - "STEREO": { - "name": "STEREO", - "ambi_order": -1, - "isplanar": False, - "nchannels": 2, - "isloudspeaker": True, - "isheadphones": False, - "ls_azi": [30, -30], - "ls_ele": [0, 0], - "lfe_index": [], - "altname": "cicp2", - }, - # binaural formats - "BINAURAL": { - "name": "BINAURAL", - "ambi_order": -1, - "isplanar": None, - "nchannels": 2, - "isloudspeaker": False, - "isheadphones": True, - "lfe_index": [], - "altname": "binaural", - }, - "BINAURAL_ROOM": { - "name": "BINAURAL_ROOM", - "ambi_order": -1, - "isplanar": None, - "nchannels": 2, - "isloudspeaker": False, - "isheadphones": True, - "lfe_index": [], - "altname": "binaural_room", - }, - "BINAURAL_REF": { - "name": "BINAURAL_REF", - "ambi_order": -1, - "isplanar": None, - "nchannels": 2, - "isloudspeaker": False, - "isheadphones": True, - "lfe_index": [], - "altname": "binaural_ref", - }, - "BINAURAL_ROOM_REF": { - "name": "BINAURAL_ROOM_REF", - "ambi_order": -1, - "isplanar": None, - "nchannels": 2, - "isloudspeaker": False, - "isheadphones": True, - "lfe_index": [], - "altname": "binaural_room_ref", - }, - # loudspeaker formats - "5_1": { - "name": "5_1", - "ambi_order": -1, - "isplanar": True, - "nchannels": 6, - "isloudspeaker": True, - "isheadphones": False, - "ls_azi": [30, -30, 0, 0, 110, -110], - "ls_ele": [0, 0, 0, 0, 0, 0], - "lfe_index": [3], - "altname": "cicp6", - }, - "7_1": { - "name": "7_1", - "ambi_order": -1, - "isplanar": True, - "nchannels": 8, - "isloudspeaker": True, - "isheadphones": False, - "ls_azi": [30, -30, 0, 0, 110, -110, 135, -135], - "ls_ele": [0, 0, 0, 0, 0, 0, 0, 0], - "lfe_index": [3], - "altname": "cicp12", - }, - "5_1_2": { - "name": "5_1_2", - "ambi_order": -1, - "isplanar": False, - "nchannels": 8, - "isloudspeaker": True, - "isheadphones": False, - "ls_azi": [30, -30, 0, 0, 110, -110, 30, -30], - "ls_ele": [0, 0, 0, 0, 0, 0, 35, 35], - "lfe_index": [3], - "altname": "cicp14", - }, - "5_1_4": { - "name": "5_1_4", - "ambi_order": -1, - "isplanar": False, - "nchannels": 10, - "isloudspeaker": True, - "isheadphones": False, - "ls_azi": [30, -30, 0, 0, 110, -110, 30, -30, 110, -110], - "ls_ele": [0, 0, 0, 0, 0, 0, 35, 35, 35, 35], - "lfe_index": [3], - "altname": "cicp16", - }, - "7_1_4": { - "name": "7_1_4", - "ambi_order": -1, - "isplanar": False, - "nchannels": 12, - "isloudspeaker": True, - "isheadphones": False, - "ls_azi": [30, -30, 0, 0, 135, -135, 90, -90, 30, -30, 135, -135], - "ls_ele": [0, 0, 0, 0, 0, 0, 0, 0, 35, 35, 35, 35], - "lfe_index": [3], - "altname": "cicp19", - }, - "COMBINED": { - "name": "COMBINED", - "ambi_order": -1, - "isplanar": False, - "nchannels": 15, - "isloudspeaker": True, - "isheadphones": False, - "ls_azi": [ - 30, - -30, - 0, - 135, - -135, - 110, - -110, - 90, - -90, - 30, - -30, - 110, - -110, - 135, - -135, - ], - "ls_ele": [0, 0, 0, 0, 0, 0, 0, 0, 0, 35, 35, 35, 35, 35, 35], - "lfe_index": None, - "altname": "combined", - }, - "CUSTOM_LS": { - "name": "CUSTOM_LS", - "ambi_order": -1, - "isplanar": False, - "nchannels": -1, - "isloudspeaker": True, - "isheadphones": False, - "ls_azi": None, - "ls_ele": None, - "lfe_index": None, - "altname": "CUSTOM_LS", - "config_file": "layout.txt", - }, - # ambisonics - "FOA": { - "name": "FOA", - "ambi_order": 1, - "isplanar": False, - "nchannels": 4, - "isloudspeaker": False, - "isheadphones": False, - "lfe_index": [], - "altname": "sba1", - }, - "PLANARFOA": { - "name": "PLANARFOA", - "ambi_order": 1, - "isplanar": True, - "nchannels": 4, - "isloudspeaker": False, - "isheadphones": False, - "lfe_index": [], - "altname": "sba1", - }, - "HOA2": { - "name": "HOA2", - "ambi_order": 2, - "isplanar": False, - "nchannels": 9, - "isloudspeaker": False, - "isheadphones": False, - "lfe_index": [], - "altname": "sba2", - }, - "PLANARHOA2": { - "name": "PLANARHOA2", - "ambi_order": 2, - "isplanar": True, - "nchannels": 9, - "isloudspeaker": False, - "isheadphones": False, - "lfe_index": [], - "altname": "sba2", - }, - "HOA3": { - "name": "HOA3", - "ambi_order": 3, - "isplanar": False, - "nchannels": 16, - "isloudspeaker": False, - "isheadphones": False, - "lfe_index": [], - "altname": "sba3", - }, - "PLANARHOA3": { - "name": "PLANARHOA3", - "ambi_order": 3, - "isplanar": True, - "nchannels": 16, - "isloudspeaker": False, - "isheadphones": False, - "lfe_index": [], - "altname": "sba3", - }, - # ism - "ISM": { - "name": "ISM", - "ambi_order": -1, - "isplanar": None, - "nchannels": -1, - "isloudspeaker": False, - "isheadphones": False, - "lfe_index": [], - "altname": "ism", - }, - "ISM1": { - "name": "ISM1", - "ambi_order": -1, - "isplanar": None, - "nchannels": 1, - "isloudspeaker": False, - "isheadphones": False, - "lfe_index": [], - "altname": "ism1", - }, - "ISM2": { - "name": "ISM2", - "ambi_order": -1, - "isplanar": None, - "nchannels": 2, - "isloudspeaker": False, - "isheadphones": False, - "lfe_index": [], - "altname": "ism2", - }, - "ISM3": { - "name": "ISM3", - "ambi_order": -1, - "isplanar": None, - "nchannels": 3, - "isloudspeaker": False, - "isheadphones": False, - "lfe_index": [], - "altname": "ism3", - }, - "ISM4": { - "name": "ISM4", - "ambi_order": -1, - "isplanar": None, - "nchannels": 4, - "isloudspeaker": False, - "isheadphones": False, - "lfe_index": [], - "altname": "ism4", - }, - # masa - "MASA": { - "name": "MASA", - "ambi_order": -1, - "isplanar": None, - "nchannels": -1, - "isloudspeaker": False, - "isheadphones": False, - "lfe_index": [], - "altname": "masa", - }, - "MASA1": { - "name": "MASA1", - "ambi_order": -1, - "isplanar": None, - "nchannels": 1, - "isloudspeaker": False, - "isheadphones": False, - "lfe_index": [], - "altname": "masa1", - }, - "MASA2": { - "name": "MASA2", - "ambi_order": -1, - "isplanar": None, - "nchannels": 2, - "isloudspeaker": False, - "isheadphones": False, - "lfe_index": [], - "altname": "masa2", - }, - # metadata format - "META": { - "name": "META", - "ambi_order": -1, - "isplanar": None, - "nchannels": -1, - "isloudspeaker": False, - "isheadphones": False, - "lfe_index": [], - "altname": "meta", - }, -} - -# Channel indices of planar Ambisonic components of ACN -_planar_hoa_channels = np.array([0, 1, 3, 4, 8, 9, 15]) -# Channel indices of vertical Ambisonic components of ACN -_vert_hoa_channels = np.array([2, 5, 6, 7, 10, 11, 12, 13, 14]) - - -class Format: - def __init__(self, in_format: str = "FOA"): - self.name = None - self.altname = None - self.ambi_order = -1 - self.nchannels = None - self.isloudspeaker = False - self.isheadphones = False - self.lfe_index = [] - - # if it is a path, then treat as custom layout - if not isinstance(in_format, str) or in_format[-4:].lower() == ".txt": - with open(in_format, "r") as f_ls: - self.ls_azi = [ - float(x.strip()) for x in f_ls.readline().strip().split(",") - ] - self.ls_ele = [ - float(x.strip()) for x in f_ls.readline().strip().split(",") - ] - try: - self.lfe_index = [ - int(x.strip()) for x in f_ls.readline().strip().split(",") - ] - except: - self.lfe_index = [] - - if self.lfe_index: - [self.ls_azi.insert(i, 0.0) for i in self.lfe_index] - [self.ls_ele.insert(i, 0.0) for i in self.lfe_index] - - self.name = os.path.basename(in_format).replace(".txt", "") - self.altname = "CUSTOM_LS" - self.config_file = str(in_format) - self.isloudspeaker = True - self.nchannels = len(self.ls_azi) - self.isplanar = np.all([e == 0.0 for e in self.ls_ele]) - # search in predefined dictionary - else: - for config_name, config_dict in _format_configs.items(): - if ( - in_format.upper() == config_name - or in_format.upper() == config_dict["altname"].upper() - ): - for k, v in _format_configs[config_name].items(): - setattr(self, k, v) - - if not self.name: - raise SystemExit( - f"Spatial audio format '{in_format}' not supported. If 'EXT' is used, please change to ISM or MASA. Ensure it is same as 'in_format'" - ) - - def get_nchannels(self): - return self.nchannels - - def print_info(self): - attrs = vars(self) - for item in attrs: - print(f" {item}: {attrs[item]}") - - @staticmethod - def ambiorder_from_nchannels(out_nchans: int) -> int: - return int(np.sqrt(out_nchans) - 1) - - @staticmethod - def nchannels_from_ambiorder(ambi_order: int) -> int: - return (ambi_order + 1) ** 2 - - @staticmethod - def zero_vert_hoa_channels(x: np.ndarray) -> np.ndarray: - x[:, _vert_hoa_channels[_vert_hoa_channels < x.shape[1]]] = 0.0 - return x - - @staticmethod - def get_vert_hoa_channels() -> np.ndarray: - return _vert_hoa_channels - - @staticmethod - def list_all(long_descition: bool = False): - for key, value in _format_configs.items(): - if long_descition is True: - print(key, value) - else: - print(key) - - @staticmethod - def detect_format(nchannels: int) -> str: - config_name = None - - for k, v in _format_configs.items(): - if v["nchannels"] == nchannels: - config_name = v["name"] - break - - if config_name is None: - raise SystemExit("Spatial audio format not found") - - return config_name - - @staticmethod - def get_format_dict(in_format: str): - for config_name in _format_configs: - if in_format.upper() == config_name: - return _format_configs[config_name] - return None diff --git a/scripts/pyaudio3dtools/spatialmetadata.py b/scripts/pyaudio3dtools/spatialmetadata.py deleted file mode 100644 index 3cf1338d4f..0000000000 --- a/scripts/pyaudio3dtools/spatialmetadata.py +++ /dev/null @@ -1,492 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import logging -import math -import os -from typing import Optional, TextIO - -import numpy as np - -from pyaudio3dtools import audioarray, audiofile, spatialaudioformat - -main_logger = logging.getLogger("__main__") -logger = main_logger.getChild(__name__) -logger.setLevel(logging.DEBUG) - - -class Metadata: - def __init__( - self, - metadata_path: Optional[str] = None, - metadata_format: Optional[str] = "iis", - audio_wav_path: Optional[str] = None, - audio_fs: Optional[int] = 48000, - ): - """ - Spatial Metadata - - Parameters - ---------- - metadata_path: Optional[str] - path to metadata file - metadata_format: Optional[str] - format for metadata file, supported: ["iis", "ivas_ism"] - audio_wav_path: Optional[str] - path to corresponding audio files - audio_fs: Optional[int] - audio sampling frequency - - Returns - ------- - self - - """ - - self._delete_all() - if audio_fs is None: - audio_fs = 48000 - self.audio_fs = audio_fs - - # init from input file - if metadata_path is not None and os.path.isfile(metadata_path): - self.read_metadata(metadata_path, metadata_format, audio_wav_path) - - def _delete_all(self) -> None: - self.objects = [] # list of audio objects - self.nb_objects = 0 # Number of objects - self.nb_tracks = 0 # Number of tracks - self.audio_wav = [] # list of wav files - self.audio_array = np.zeros([1, 0]) - self.nb_frames = 0 # Number of frames - - def read_metadata( - self, - metadata_path: str, - metadata_format: str = "iis", - audio_wav_path: Optional[str] = None, - ) -> None: - if metadata_format == "iis": - with open(metadata_path, "r") as file_in: - dirname = os.path.dirname(metadata_path) - self.audio_wav.append(os.path.join(dirname, file_in.readline().strip())) - nb_new_objects = int(file_in.readline()) - - for _ in range(nb_new_objects): - in_type = file_in.readline().strip() - - if in_type.lower() == "ism": - self.objects.append(read_ism_input(file_in, dirname)) - elif in_type.lower() == "sba": - self.objects.append(read_sba_input(file_in)) - elif in_type.lower() == "mc": - self.objects.append(read_mc_input(file_in)) - else: - raise ValueError("Unknown input type in metadata file") - - self._append_audio_array(self.audio_wav[-1]) - self.nb_objects += 1 - - elif metadata_format == "ivas_ism": - if audio_wav_path is None: - raise FileNotFoundError("Wave file not specified!") - - self.audio_wav.append(audio_wav_path) - self.objects.append( - read_ism_ivas_data(metadata_path, object_index=self.nb_objects) - ) - self._append_audio_array(self.audio_wav[-1]) - self.nb_objects += 1 - else: - raise ValueError("Metadata: unknown metadata format") - - def write_metadata( - self, - metadata_path: str, - metadata_format: str = "iis", - audio_output_path: Optional[str] = None, - max_objects: Optional[int] = None, - ) -> list: - metadata_out_list = [] - - if metadata_format == "iis": - with open(metadata_path, "w") as file_out: - if audio_output_path is not None: - file_out.write(f"{audio_output_path}\n") - dirname = os.path.dirname(metadata_path) - self.write_audio_array(os.path.join(dirname, audio_output_path)) - - file_out.write(f"{str(self.nb_objects)}\n") - for object_index in range(self.nb_objects): - if self.objects[object_index]["input_type"] == "ism": - write_ism_input( - file_out, - self.objects[object_index], - metadata_path, - num_frames=self.nb_frames, - ) - elif self.objects[object_index]["input_type"] == "sba": - write_sba_input(file_out, self.objects[object_index]) - elif self.objects[object_index]["input_type"] == "mc": - write_mc_input(file_out, self.objects[object_index]) - else: - raise ValueError("Unknown input type in metadata file") - - metadata_out_list.append(file_out.name) - - elif metadata_format == "ivas_ism": - outfilename, output_ext = os.path.splitext(os.path.basename(metadata_path)) - x = np.zeros([1, 0]) - - for object_index in range(self.nb_objects): - if self.objects[object_index]["input_type"] == "ism": - # Prepare audio wavefrom - if audio_output_path is not None: - chan_start = self.objects[object_index]["track_index"] - chan_end = chan_start + self.objects[object_index]["nb_tracks"] - if x.shape[1] == 0: - x = self.audio_array[:, chan_start:chan_end] - else: - x = np.append( - x, self.audio_array[:, chan_start:chan_end], axis=1 - ) - - # Write positions - with open( - metadata_path.replace( - output_ext, - str(self.objects[object_index]["track_index"]) + output_ext, - ), - "w", - ) as file_out: - write_ism_ivas_data( - file_out, - self.objects[object_index], - num_frames=self.nb_frames, - ) - metadata_out_list.append(file_out.name) - - if (max_objects is not None) and ( - len(metadata_out_list) >= max_objects - ): - break - - # Write audio waveform - if audio_output_path is not None: - audiofile.writefile(audio_output_path, x, fs=self.audio_fs) - - return metadata_out_list - - def print_info(self) -> None: - print(f"Number of objects in the scene: {self.nb_objects}") - for object_index in range(self.nb_objects): - print(f" Object #{object_index} Type: {self.objects[object_index]}") - - def _append_audio_array(self, audio_wav=None, fs=48000, nchan=1, object_index=None): - if audio_wav is None: - audio_wav = self.audio_wav[-1] - if object_index is None: - object_index = -1 - - x, fs = audiofile.readfile(audio_wav, fs=fs, nchannels=nchan) - logger.debug(f"Append {audio_wav}: {x.shape[0]} by {x.shape[1]}") - - # Select appropriate channels & resample if necessary - chan_start = self.objects[object_index]["track_index"] - chan_end = chan_start + self.objects[object_index]["nb_tracks"] - logger.debug(f" channels from {chan_start} to {chan_end}") - x = x[:, chan_start:chan_end] - x = audioarray.resample(x, fs, self.audio_fs) - - # Append array and update track index - self.objects[object_index]["track_index"] = self.audio_array.shape[1] - if self.audio_array.shape[1] == 0: - self.audio_array = x - else: - len_min = min([self.audio_array.shape[0], x.shape[0]]) - self.audio_array = np.append( - self.audio_array[:len_min][:], x[:len_min][:], axis=1 - ) - - self.nb_tracks = self.nb_tracks + x.shape[1] - self.nb_frames = math.ceil(50.0 * self.audio_array.shape[0] / self.audio_fs) - - # init with list of ISM metadata files - def init_for_ism( - self, - in_file: str, - in_fs: int, - in_nchan: int, - metadata_files: list, - ) -> None: - self.audio_wav.append(in_file) - - for csv in metadata_files: - self.objects.append(read_ism_ivas_data(csv, object_index=self.nb_objects)) - self.objects[-1]["track_index"] = self.nb_objects - self._append_audio_array(self.audio_wav[-1], fs=in_fs, nchan=in_nchan) - self.nb_objects += 1 - - # Get audio array with sampling rate - def get_audio_array(self): - return self.audio_array, self.audio_fs - - # Set audio array from file - def set_audio_array(self, audio_path, fs=None): - if fs is None: - fs = self.audio_fs - audiofile.readfile(audio_path, self.audio_array, fs) - self.audio_fs = fs - - # Write in file audio array - def write_audio_array(self, audio_path): - audiofile.writefile(audio_path, self.audio_array, fs=self.audio_fs) - - -################################################## -# Helper functions for IIS metadata -################################################## -def read_ism_input(file_handle: TextIO, dirname: str) -> dict: - """ - Read ISM Input (IIS metadata format) - - Parameters - ---------- - file_handle: TextIO - file pointer - dirname: str - root directory used to read csv files - - Returns - ------- - dict - ISM dictionary with positions - """ - ism = {"input_type": "ism"} - ism["track_index"] = int(file_handle.readline()) - 1 - ism["nb_tracks"] = 1 - ism["positions"] = [] - ism["gain"] = 1 - line = file_handle.readline() - - try: - ism["num_positions"] = int(line) - for _ in range(ism["num_positions"]): - [use_for_frames, azimuth, elevation] = ( - file_handle.readline().strip().split(",") - ) - pos = {} - pos["use_for_frames"] = int(use_for_frames) - pos["azimuth"] = int(azimuth) - pos["elevation"] = int(elevation) - ism["positions"].append(pos) - ism["gain"] = read_gain_value(file_handle) - except: - meta_csv = os.path.join(dirname, line.strip()) - pos_idx = 0 - with open(meta_csv) as file_handle: - for line in file_handle: - current_values = line.strip().split(",") - pos = {} - pos["use_for_frames"] = 1 - pos["azimuth"] = float(current_values[0]) - pos["elevation"] = float(current_values[1]) - ism["positions"].append(pos) - pos_idx += 1 - - ism["num_positions"] = pos_idx - - return ism - - -def write_ism_input( - file_handle: TextIO, - ism_dict: dict, - metadata_path: Optional[str] = None, - num_frames: Optional[int] = None, -) -> None: - """ - Write ISM Input (IIS metadata format) - - Parameters - ---------- - file_handle: TextIO - file pointer - ism_dict: dict - ISM dictionnary with positions - metadata_path: Optional[str] - if given positions writen cvs format - num_frame: Optional[int] - number of frames to be written - - Returns - ------- - None - """ - file_handle.write("ISM\n") - track_index = ism_dict["track_index"] - file_handle.write(f"{str(track_index + 1)}\n") - - if metadata_path is None: - num_positions = ism_dict["num_positions"] - file_handle.write(f"{str(num_positions)}\n") - - positions = ism_dict["positions"] - for pos_idx in range(ism_dict["num_positions"]): - use_for_frames = positions[pos_idx]["use_for_frames"] - azimuth = round(positions[pos_idx]["azimuth"]) - elevation = round(positions[pos_idx]["elevation"]) - - file_handle.write(f"{use_for_frames:04d},{azimuth:+03d},{elevation:+03d}\n") - else: - # Write filename - _, extname = os.path.splitext(metadata_path) - dirname = os.path.dirname(metadata_path) - basename = os.path.basename(metadata_path).replace( - extname, "." + str(track_index) + ".csv" - ) - file_handle.write(f"{basename}\n") - # Write positions - with open(os.path.join(dirname, basename), "w") as file_out: - write_ism_ivas_data(file_out, ism_dict, num_frames=num_frames) - - -def read_sba_input(file_handle: TextIO) -> dict: - sba = {"input_type": "sba"} - sba["track_index"] = int(file_handle.readline()) - 1 - sba["format"] = spatialaudioformat.Format(f"SBA{int(file_handle.readline())}") - sba["order"] = sba["format"].ambi_order - sba["nb_tracks"] = (sba["order"] + 1) ** 2 - sba["gain"] = read_gain_value(file_handle) - return sba - - -def write_sba_input(file_handle: TextIO, sba_dict: dict) -> None: - file_handle.write("SBA\n") - track_index = sba_dict["track_index"] - file_handle.write(f"{str(track_index + 1)}\n") - order = sba_dict["order"] - file_handle.write(f"{str(order)}\n") - - -def read_mc_input(file_handle: TextIO) -> dict: - mc = {"input_type": "mc"} - mc["track_index"] = int(file_handle.readline()) - 1 - mc["format"] = spatialaudioformat.Format(file_handle.readline().strip()) - mc["nb_tracks"] = mc["format"].nchannels - mc["gain"] = read_gain_value(file_handle) - return mc - - -def write_mc_input(file_handle: TextIO, mc_dict: dict) -> None: - file_handle.write("MC\n") - track_index = mc_dict["track_index"] - file_handle.write(f"{str(track_index + 1)}\n") - name = mc_dict["format"].name - file_handle.write(f"{name}\n") - - -def read_gain_value(file_handle: TextIO) -> float: - original_pos = file_handle.tell() - gain = file_handle.readline().lower() - if gain.startswith("gain_db"): - gain = float(gain.replace("gain_db", "")) - return 10 ** (gain / 20) - else: - file_handle.seek(original_pos) - return 1 - - -################################################## -# Helper functions for ISM IVAS metadata -################################################## -def read_ism_ivas_data(metadata_path: str, object_index: int = 0) -> None: - ism = {"input_type": "ism"} - ism["track_index"] = 0 - ism["num_positions"] = 0 - ism["nb_tracks"] = 1 - ism["positions"] = [] - - pos_idx = 0 - - try: - with open(metadata_path) as file_handle: - for line in file_handle: - current_values = line.strip().split(",") - pos = {} - pos["use_for_frames"] = 1 - pos["azimuth"] = float(current_values[0]) - pos["elevation"] = float(current_values[1]) - ism["positions"].append(pos) - pos_idx += 1 - except FileNotFoundError: - # TODO in case of NULL metadata we can also spread the objects spatially - pos = {} - pos["use_for_frames"] = 1 - pos["azimuth"] = 0.0 - pos["elevation"] = 0.0 - ism["positions"].append(pos) - pos_idx += 1 - - ism["num_positions"] = pos_idx - return ism - - -def write_ism_ivas_data( - file_handle: TextIO, ism_dict: dict, num_frames: Optional[int] = None -) -> None: - positions = ism_dict["positions"] - - if num_frames is None: - num_frames = 0 - for pos_idx in range(len(positions)): - num_frames += positions[pos_idx]["use_for_frames"] - - # Constants for all positions - distance = 1.0 - spread = 0.0 - gain = 1.0 - pos_idx = 0 - pos_used_times = 0 - for _ in range(num_frames): - azimuth = float(positions[pos_idx]["azimuth"]) - elevation = float(positions[pos_idx]["elevation"]) - - file_handle.write( - f"{azimuth:+07.2f},{elevation:+06.2f},{distance:05.2f},{spread:06.2f},{gain:04.2f}\n" - ) - - pos_used_times += 1 - - if pos_used_times == positions[pos_idx]["use_for_frames"]: - pos_idx = (pos_idx + 1) % len(positions) - pos_used_times = 0 diff --git a/scripts/pyprocessing/__init__.py b/scripts/pyprocessing/__init__.py deleted file mode 100644 index 5e7b5aa892..0000000000 --- a/scripts/pyprocessing/__init__.py +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -""" -pyprocessing -==== - -Provides - Methods for processing/coding 3D audio - -Imports -------- -functions -class -""" -from . import evs, ivas, prepost_processing, processing, processing_configs, utils diff --git a/scripts/pyprocessing/evs.py b/scripts/pyprocessing/evs.py deleted file mode 100644 index 6de86f2a27..0000000000 --- a/scripts/pyprocessing/evs.py +++ /dev/null @@ -1,238 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import logging -import os - -from pyaudio3dtools import audiofile, spatialaudioformat, spatialmetadata -from pyaudio3dtools.spatialaudioconvert import render_meta - -from pyprocessing import utils -from pyprocessing.processing import Processing - -main_logger = logging.getLogger("__main__") -logger = main_logger.getChild(__name__) -logger.setLevel(logging.DEBUG) - - -class EVS(Processing): - def __init__( - self, - in_format: str, - out_format: str, - bitrate: int, - in_fs: int = 48000, - **kwargs, - ): - super().__init__() - for k, v in kwargs.items(): - setattr(self, k, v) - self.in_format = spatialaudioformat.Format(in_format) - self.out_format = spatialaudioformat.Format(out_format) - if isinstance(bitrate, int): - self.bitrate = [bitrate] - if self.in_format.nchannels > 0: - self.bitrate = [bitrate] * self.in_format.nchannels - elif isinstance(bitrate, list): - self.bitrate = bitrate - if self.in_format.nchannels > 0: - self.bitrate.extend([0] * (self.in_format.nchannels - len(bitrate))) - self.in_fs = in_fs - self.exec_enc = utils.get_exec_path(kwargs["cod_bin"]) - self.exec_dec = utils.get_exec_path(kwargs["dec_bin"]) - - if in_format != out_format: - raise ValueError(f"EVS_: output format must be equal to input format.") - if not os.path.exists(self.exec_enc): - raise FileNotFoundError( - f"The EVS encoder binary was not found at the given path: {self.exec_enc}" - ) - if not os.path.exists(self.exec_dec): - raise FileNotFoundError( - f"The EVS decoder binary was not found at the given path: {self.exec_dec}" - ) - - def process(self, input_path: str, output_path: str, tmp_path: str) -> None: - logger.debug(f"EVS {input_path} -> {output_path}") - # Read input file - _, input_ext = os.path.splitext(os.path.basename(input_path)) - _, output_ext = os.path.splitext(os.path.basename(output_path)) - if input_ext == ".txt": - metadata_obj = spatialmetadata.Metadata(input_path, audio_fs=self.in_fs) - input_multi_channels = output_path.replace(output_ext, ".pcm") - if self.in_format.name[:3] == "ISM": - # extract IVAS ISM metadata and - input_csv = output_path.replace(output_ext, ".csv") - metadata_files = metadata_obj.write_metadata( - input_csv, "ivas_ism", input_multi_channels, max_objects=4 - ) - # change number of ISM - self.in_format.name = "ISM" + str(len(metadata_files)) - self.in_format = spatialaudioformat.Format(self.in_format.name[:4]) - if len(self.bitrate) < len(metadata_files): - self.bitrate.extend( - [self.bitrate[-1]] * (len(metadata_files) - len(self.bitrate)) - ) - else: - in_sig, fs = metadata_obj.get_audio_array() - in_sig = render_meta(metadata_obj, self.in_spfmt) - audiofile.writefile(input_multi_channels, in_sig, self.in_fs) - elif input_ext == ".wav" or input_ext == ".pcm": - input_multi_channels = input_path - else: - raise ValueError(f"EVS: invalid audio input extension: {input_ext}") - - # Split the corresponding channels at enc_fs - split_chans_in = [] - for idx in range(self.in_format.nchannels): - split_chans_in.append( - output_path.replace(output_ext, "." + str(idx) + ".pcm") - ) - - audiofile.splitfiles( - input_multi_channels, - split_chans_in, - in_nchans=self.in_format.nchannels, - in_fs=self.in_fs, - out_fs=self.enc_fs, - ) - - # Zero vertical Ambi channels if planar - mute_next_chan = -1 - if self.in_format.ambi_order > 0 and self.in_format.isplanar: - mute_chans_enum = enumerate(self.in_format.get_vert_hoa_channels()) - _, mute_next_chan = next(mute_chans_enum) - - # run processing - split_chans_out = [] - for idx, split_in in enumerate(split_chans_in): - split_bs = split_in.replace(".pcm", ".bs") - split_out = split_in.replace(".pcm", ".dec.pcm") - - # Zero vertical Ambi channels if planar - if idx == mute_next_chan: - audiofile.mutefile(split_in, split_in, in_fs=self.enc_fs, in_nchans=1) - try: - _, mute_next_chan = next(mute_chans_enum) - except: - pass - - if self.bitrate[idx] > 0: - self.enc(split_in, split_bs, self.bitrate[idx]) - self.dec(split_bs, split_out) - else: - # zero channel - audiofile.convertfile( - split_in, split_out, in_fs=self.enc_fs, out_fs=self.dec_fs - ) - audiofile.mutefile(split_out, split_out, in_fs=self.dec_fs, in_nchans=1) - - split_chans_out.append(split_out) - - # Output file: combine waveform files into one output, and optinaly write iis metadata - if output_ext == ".pcm" or output_ext == ".wav": - audiofile.combinefiles( - split_chans_out, output_path, in_fs=self.dec_fs, out_fs=self.dec_fs - ) - elif output_ext == ".txt": - output_wav = output_path.replace(output_ext, ".wav") - if self.in_format.name[:3] == "ISM": - # Write new metadata - metadata_out_obj = spatialmetadata.Metadata(audio_fs=self.dec_fs) - for idx in range(self.in_format.nchannels): - logger.debug( - "EVS_mono Read metadata " - + metadata_files[idx] - + " with audio file " - + split_chans_out[idx] - ) - metadata_out_obj.read_metadata( - metadata_files[idx], - metadata_format="ivas_ism", - audio_wav_path=split_chans_out[idx], - ) - metadata_out_obj.write_metadata( - output_path, - metadata_format="iis", - audio_output_path=os.path.basename(output_wav), - ) - else: - # pass-trhough mode, rewrite only audio waveform with decoded file - audiofile.combinefiles( - split_chans_out, output_wav, in_fs=self.dec_fs, out_fs=self.dec_fs - ) - metadata_obj.write_metadata(output_path, metadata_format="iis") - else: - raise ValueError(f"EVS: invalid audio input extension: {output_ext}") - - def enc(self, input_wav: str, output_bs: str, bitrate: int) -> None: - input_pcm = output_bs.replace(".bs", ".pcm") - audiofile.convertfile(input_wav, input_pcm, out_nchans=1, out_fs=self.in_fs) - - cmd = [self.exec_enc] - if self.dtx: - cmd.append("-dtx") - - if self.cod_opt: - cmd.extend(self.cod_opt) - - cmd.extend( - [ - "-max_band", - self.max_band, - str(bitrate), - str(self.enc_fs // 1000), - input_pcm, - output_bs, - ] - ) - Processing.run(cmd) - - def dec(self, input_bs: str, output_wav: str) -> None: - cmd = [self.exec_dec] - - if self.dec_opt: - cmd.extend(self.dec_opt) - - cmd.extend([str(self.dec_fs // 1000), input_bs, output_wav]) - Processing.run(cmd) - - def get_processing_file_paths(self, template_out_file: str, input_ext: str) -> list: - if input_ext == ".wav" or input_ext == ".pcm": - return [template_out_file.replace(".wav", "_evs_mono.wav")] - elif input_ext == ".txt": - return [template_out_file.replace(".wav", "_evs_mono.txt")] - else: - raise ValueError(f"EVS: invalid audio input extension: {input_ext}") - - def get_total_bit_rate(self): - return sum(self.bitrate) diff --git a/scripts/pyprocessing/ivas.py b/scripts/pyprocessing/ivas.py deleted file mode 100644 index 0f182fd1fb..0000000000 --- a/scripts/pyprocessing/ivas.py +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import logging -import os -from typing import Optional - -from pyaudio3dtools import audiofile, spatialaudioformat, spatialmetadata -from pyaudio3dtools.spatialaudioconvert import render_meta - -from pyprocessing import utils -from pyprocessing.processing import Processing - -main_logger = logging.getLogger("__main__") -logger = main_logger.getChild(__name__) -logger.setLevel(logging.DEBUG) - - -class IVAS(Processing): - def __init__(self, in_format: str, bitrate: int, in_fs: int = 48000, **kwargs): - super().__init__() - for k, v in kwargs.items(): - setattr(self, k, v) - self.in_format = spatialaudioformat.Format(in_format) - self.bitrate = bitrate - self.in_fs = in_fs - self.out_format = spatialaudioformat.Format(kwargs["out_format"]) - self.exec_enc = utils.get_exec_path(kwargs["cod_bin"]) - self.exec_dec = utils.get_exec_path(kwargs["dec_bin"]) - self.ht_file = os.path.abspath(self.ht_file) - - if self.out_format.name == "ISM": - self.out_format.name = "EXT" - - if not os.path.exists(self.exec_enc): - raise FileNotFoundError( - f"The IVAS encoder binary was not found at the given path: {self.exec_enc}" - ) - if not os.path.exists(self.exec_dec): - raise FileNotFoundError( - f"The IVAS decoder binary was not found at the given path: {self.exec_dec}" - ) - - def process(self, input_path: str, output_path: str, tmp_path: str) -> None: - logger.debug(f"IVAS {input_path} -> {output_path}") - - _, output_ext = os.path.splitext(os.path.basename(output_path)) - if output_ext == ".wav": - output_bs = output_path.replace(".wav", ".bs") - else: - raise ValueError(f"IVAS: invalid audio input extension: {output_ext}") - - self.enc(input_path, output_bs) - self.dec(output_bs, output_path) - - def enc(self, input_path: str, output_bs: str) -> None: - logger.debug(f"IVAS encoder {input_path} -> {output_bs}") - - # Only resample and convert if wav, otherwise supposed pcm to be sampled at self.in_fs - _, input_ext = os.path.splitext(os.path.basename(input_path)) - metadata_files = [] - - # for MASA suppose that metadata file as same basename and location as input file - if self.in_format.name.lower()[:4] == "masa": - metadata_files.append(input_path.replace(input_ext, ".met")) - - # Support input file wav, pcm and txt (metadata iis) - if (input_ext == ".wav") or (input_ext == ".pcm"): - input_pcm = output_bs.replace(".bs", ".pcm") - audiofile.convertfile( - input_path, - input_pcm, - in_fs=self.in_fs, - out_fs=self.enc_fs, - in_nchans=self.in_format.nchannels, - out_nchans=self.in_format.nchannels, - ) - elif input_ext == ".txt": - metadata_obj = spatialmetadata.Metadata(input_path, audio_fs=self.enc_fs) - input_pcm = output_bs.replace(".bs", ".pcm") - input_csv = output_bs.replace(".bs", ".csv") - if self.in_format.name[:3] == "ISM": - # extract IVAS ISM metadata and - metadata_files = metadata_obj.write_metadata( - input_csv, "ivas_ism", input_pcm, max_objects=4 - ) - - # change number of ISM - self.in_format.name = "ISM" + str(len(metadata_files)) - self.in_format = spatialaudioformat.Format(self.in_format.name[:4]) - else: - in_sig = render_meta(metadata_obj, self.in_spfmt) - audiofile.writefile(input_pcm, in_sig, self.enc_fs) - else: - raise ValueError(f"IVAS: invalid audio input extension: {input_ext}") - - cmd = [self.exec_enc] - if self.dtx: - cmd.append("-dtx") - - if self.cod_opt: - cmd.extend(self.cod_opt) - - cmd.extend( - [ - *IVAS.parse_config(self.in_format, metadata_files), - "-max_band", - self.max_band, - str(self.bitrate), - str(self.enc_fs // 1000), - input_pcm, - output_bs, - ] - ) - Processing.run(cmd) - - def dec(self, input_bs: str, output_wav: str) -> None: - logger.debug(f"IVAS decoder {input_bs} -> {output_wav}") - - cmd = [self.exec_dec] - if self.head_tracking: - cmd.extend(["-T", self.ht_file]) - - if self.plc: - cmd.extend(["-FEC", str(self.plc_rate)]) - - if self.dec_opt: - cmd.extend(self.dec_opt) - - cmd.extend( - [ - self.out_format.name, - str(self.dec_fs // 1000), - input_bs, - output_wav, - ] - ) - Processing.run(cmd) - - @staticmethod - def parse_config(spformat: spatialaudioformat, metadata_files: Optional[list] = []): - name = spformat.name.lower() - - if name == "mono": - return [""] - elif name == "stereo": - return ["-stereo"] - elif name.startswith("ism"): - # replace any missing files with NULL - while len(metadata_files) < int(name[-1]): - metadata_files.append("NULL") - return ["-ism", name[-1]] + metadata_files[: int(name[-1])] - elif name.startswith("masa"): - return ["-masa", name[-1], metadata_files[0]] - elif spformat.ambi_order > 0: - if spformat.isplanar: - return ["-sba", f"-{spformat.ambi_order}"] - else: - return ["-sba", f"+{spformat.ambi_order}"] - elif spformat.isloudspeaker: - return ["-mc", name] - - raise ValueError(f"IVAS: Invalid input config: {spformat}") - - def get_processing_file_paths( - self, template_out_file: str, input_ext: Optional[str] = None - ) -> list: - return [template_out_file.replace(".wav", "_ivas.wav")] - - def get_total_bit_rate(self): - return self.bitrate diff --git a/scripts/pyprocessing/prepost_processing.py b/scripts/pyprocessing/prepost_processing.py deleted file mode 100644 index 3ea7411cea..0000000000 --- a/scripts/pyprocessing/prepost_processing.py +++ /dev/null @@ -1,220 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import json -import logging -import os -import shutil -from typing import Optional - -from pyaudio3dtools import ( - audioarray, - audiofile, - binauralrenderer, - spatialaudioconvert, - spatialaudioformat, -) - -from pyprocessing import utils -from pyprocessing.processing import Processing - -main_logger = logging.getLogger("__main__") -logger = main_logger.getChild(__name__) -logger.setLevel(logging.DEBUG) - - -class PreProcessing(Processing): - def __init__( - self, - out_format: str, - out_fs: int = 48000, - out_fc: Optional[int] = None, - output_loudness: Optional[int] = None, - loudness_tool: Optional[str] = "bs1770demo", - ): - super().__init__() - self.out_format = out_format - self.out_fs = out_fs - self.fc = out_fc - self.output_loudness = output_loudness - self.loudness_tool = loudness_tool - - def process(self, input_path: str, output_path: str, tmp_path: str): - output_nickname = utils.get_nickname(output_path) - logger.info( - f" Pre Processing: convert to {self.out_format} : {output_nickname}" - ) - - try: - spatialaudioconvert.spatial_audio_convert( - input_path, - tmp_path, - out_format=self.out_format, - out_fs=self.out_fs, - output_loudness=self.output_loudness, - loudness_tool=self.loudness_tool, - ) - except Exception as e: - logger.info( - f" Pre Processing: by-pass : {output_nickname}. Encountered exception {e}" - ) - else: - shutil.move(tmp_path, output_path) - - def get_processing_file_paths( - self, template_out_file: str, input_ext: Optional[str] = None - ) -> list: - return [template_out_file.replace(".wav", "_pre.wav")] - - -class PostProcessing(Processing): - def __init__( - self, - in_format: str, - out_format: str, - in_fs: int = 48000, - out_fs: int = 48000, - out_fc: Optional[int] = None, - binaural_rendered: bool = False, - limit_output: bool = False, - cut_preamble: float = 0.0, - split_file_path: str = "", - bin_rend_include_LFE: bool = False, - bin_rend_LFE_gain: Optional[float] = 10 ** (5.5 / 20), - binaural_dataset: Optional[str] = "orange53", - output_loudness: Optional[int] = None, - loudness_tool: Optional[str] = "bs1770demo", - ): - super().__init__() - self.in_spfmt = spatialaudioformat.Format(in_format=in_format) - self.out_spfmt = spatialaudioformat.Format(in_format=out_format) - - self.in_fs = in_fs - self.out_fs = out_fs - self.fc = out_fc - self.binaural_rendered = binaural_rendered - self.cut_preamble = cut_preamble - self.split_file_path = split_file_path - self.bin_rend_include_LFE = bin_rend_include_LFE - self.bin_rend_LFE_gain = bin_rend_LFE_gain - self.binaural_dataset = binaural_dataset - self.limit_output = limit_output - self.output_loudness = output_loudness - self.loudness_tool = loudness_tool - - def process(self, input_path: str, output_path: str, tmp_path: str): - output_nickname = utils.get_nickname(output_path) - logger.info( - f" Post Processing: {self.in_spfmt.name} -> {self.out_spfmt.name} : {output_nickname}" - ) - - # Spatial audio format conversion - spatialaudioconvert.spatial_audio_convert( - input_path, - tmp_path, - in_format=self.in_spfmt.name, - out_format=self.out_spfmt.name, - in_fs=self.in_fs, - out_fs=self.out_fs, - out_fc=self.fc, - cut_preamble_s=self.cut_preamble, - limit_output=self.limit_output, - bin_rend_include_LFE=self.bin_rend_include_LFE, - bin_rend_LFE_gain=self.bin_rend_LFE_gain, - output_loudness=self.output_loudness, - loudness_tool=self.loudness_tool, - binaural_dataset=self.binaural_dataset, - ) - shutil.move(tmp_path, output_path) - - # Binaural rendering - if self.binaural_rendered and "BINAURAL" not in self.out_spfmt.name: - out_sig, fs = audiofile.readfile(output_path) - bin_sig = binauralrenderer.binaural_rendering( - out_sig, - self.out_spfmt, - spatialaudioformat.Format("BINAURAL"), - fs=fs, - include_LFE=self.bin_rend_include_LFE, - LFE_gain=self.bin_rend_LFE_gain, - ) - output_binaural_wav = output_path.replace(".wav", "_BINAURAL.wav") - logger.info( - f" Rendering {self.out_spfmt.name} -> BINAURAL : {output_nickname[:-4]}_BINAURAL.wav" - ) - if self.limit_output: - logger.info(f" limiting") - audioarray.limiter(bin_sig, self.out_fs) - audiofile.writefile(output_binaural_wav, bin_sig, self.out_fs) - - # split file - if self.split_file_path: - # check for the split info - split_file_name = os.path.join( - self.split_file_path, - "".join( - [ - os.path.basename(output_path) - .replace(".wav", "") - .replace("_post", ""), - ".split", - ] - ), - ) - if os.path.exists(split_file_name): - fp = open(split_file_name, "r") - splits = json.load(fp) - fp.close - n_splits = len(splits) - 1 - for split_idx in range(n_splits): - output_path_split = output_path.replace( - ".wav", f"_split{split_idx}.wav" - ) - start = int(splits[split_idx] * self.out_fs) - stop = int(splits[split_idx + 1] * self.out_fs) - split_sig = audioarray.cut(out_sig, (start, stop)) - audiofile.writefile(output_path_split, split_sig, self.out_fs) - if (self.binaural_rendered is True) and ( - self.out_spfmt.name != "BINAURAL" - ): - output_bin_wav_split = output_binaural_wav.replace( - ".wav", f"_split{split_idx}.wav" - ) - split_sig = audioarray.cut(bin_sig, (start, stop)) - audiofile.writefile( - output_bin_wav_split, split_sig, self.out_fs - ) - - def get_processing_file_paths( - self, template_out_file: str, input_ext: Optional[str] = None - ) -> list: - return [template_out_file.replace(".wav", "_post.wav")] diff --git a/scripts/pyprocessing/processing.py b/scripts/pyprocessing/processing.py deleted file mode 100644 index a0533b2b4e..0000000000 --- a/scripts/pyprocessing/processing.py +++ /dev/null @@ -1,123 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import logging -import os -import subprocess -from abc import ABC, abstractmethod - -global_print_cmd_only = False -main_logger = logging.getLogger("__main__") -logger = main_logger.getChild(__name__) -logger.setLevel(logging.DEBUG) - - -class Processing(ABC): - def __init__(self): - pass - - @abstractmethod - def process(self, input_path: str, output_path: str, tmp_path: str) -> None: - pass - - @abstractmethod - def get_processing_file_paths(self, template_out_file: str) -> list: - pass - - @staticmethod - def run(cmd: list) -> None: - Processing.print_cmd(cmd) - - if not global_print_cmd_only: - try: - result = subprocess.run(cmd, check=True, capture_output=True, text=True) - except subprocess.CalledProcessError as e: - logger.debug(f"Command returned non-zero exit status : {e.returncode}") - logger.debug(e.stderr) - logger.debug(e.stdout) - raise SystemError( - f"Command returned non-zero exit status ({e.returncode}): {' '.join(e.cmd)}\n{e.stderr}\n{e.stdout}" - ) - - logger.debug(result.stderr) - logger.debug(result.stdout) - - @staticmethod - def run_python(cmd: list) -> None: - Processing.print_cmd(cmd) - - if not global_print_cmd_only: - result = subprocess.run( - ["python3"] + cmd, check=True, capture_output=True, text=True - ) - logger.debug(result.stderr) - logger.debug(str(result.stdout)) - - @staticmethod - def print_cmd(cmd: list) -> None: - cmd[:] = [i if i is not None else "MISSING" for i in cmd] - logger.debug(f"Running command: {' '.join(cmd)}") - if "MISSING" in cmd: - logger.error("Missing arg in command") - raise ValueError("Missing arg in command") - - -def process_chain( - proc_chain: list, - input_path: str, - output_wav: str, - tmp_folder_path: str = os.path.abspath("tmp"), -) -> None: - tmp_file_template_name = os.path.join(tmp_folder_path, os.path.basename(output_wav)) - - # Prepare a chain of of input/tmp/output files - processing_paths = [input_path] - for p in proc_chain: - _, input_ext = os.path.splitext(processing_paths[-1]) - processing_paths.extend( - p.get_processing_file_paths(tmp_file_template_name, input_ext=input_ext) - ) - - # Temporary files if needed - tmp_processing_paths = processing_paths[:] - tmp_path_iter = iter(tmp_processing_paths) - next(tmp_path_iter) - - # Replace last with real output - processing_paths[-1] = output_wav - in_path_iter = iter(processing_paths) - out_path_iter = iter(processing_paths) - next(out_path_iter) - - # go through processing chain - for p in proc_chain: - p.process(next(in_path_iter), next(out_path_iter), next(tmp_path_iter)) diff --git a/scripts/pyprocessing/processing_configs.py b/scripts/pyprocessing/processing_configs.py deleted file mode 100644 index 255f183c6d..0000000000 --- a/scripts/pyprocessing/processing_configs.py +++ /dev/null @@ -1,375 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import json -import logging -import os - -from pyprocessing.evs import EVS -from pyprocessing.ivas import IVAS -from pyprocessing.prepost_processing import PostProcessing, PreProcessing -from pyprocessing.utils import list_audio - -main_logger = logging.getLogger("__main__") -logger = main_logger.getChild(__name__) -logger.setLevel(logging.DEBUG) - -# Parse a JSON file into class and perform necessary validation -class test_config: - def __init__(self, filename: str): - # Open and read configuration test file - with open(filename, "r") as fp: - config_dict = json.load(fp) - - # Init lists of conditions and associated folders - self.list_of_conditions = list() - self.output_folders = list() - self.tmp_folders = list() - - # Set defaults - self._set_defaults() - - # Set/override class attributes based on JSON file and update internal dict - for key, value in config_dict.items(): - # update subdictionaries in case of ivas and evs - if key.startswith("ivas"): - setattr(self, key, self.DEFAULTS_ivas.copy()) - getattr(self, key).update(value) - self.dict[key] = self.DEFAULTS_ivas.copy() - self.dict[key].update(value) - elif key.startswith("evs"): - setattr(self, key, self.DEFAULTS_evs.copy()) - getattr(self, key).update(value) - self.dict[key] = self.DEFAULTS_evs.copy() - self.dict[key].update(value) - # avoid overwriting the whole subkey, merge instead - elif hasattr(self, key) and isinstance(getattr(self, key), dict): - for k, v in value.items(): - getattr(self, key)[k] = v - self.dict[key][k] = v - else: - setattr(self, key, value) - self.dict[key] = value - - # Check required keys - REQUIRED_KEYS = [ - "name", - "input_path", - "output_path", - "in_format", - "renderer_format", - "conditions_to_generate", - ] - REQUIRED_KEYS_IVAS = ["bitrates", "out_format"] - REQUIRED_KEYS_EVS = ["bitrates"] - MISSING_KEYS = list() - - for key in REQUIRED_KEYS: - if not hasattr(self, key): - MISSING_KEYS.append(key) - elif not getattr(self, key): - MISSING_KEYS.append(key) - - for condition in self.conditions_to_generate: - if condition.startswith("ivas"): - if not hasattr(self, condition): - raise SystemExit( - f"Definition not found for condition {condition}, but was specified in conditions to generate" - ) - else: - for key in REQUIRED_KEYS_IVAS: - if getattr(self, condition).get(key, None) is None: - MISSING_KEYS.append(f"{condition}:{key}") - elif condition.startswith("evs"): - if not hasattr(self, condition): - raise SystemExit( - f"Definition not found for condition {condition}, but was specified in conditions to generate" - ) - else: - for key in REQUIRED_KEYS_EVS: - if getattr(self, condition).get(key, None) is None: - MISSING_KEYS.append(f"{condition}:{key}") - - # Report missing keys to the user - if len(MISSING_KEYS) > 0: - raise KeyError( - f"The following key(s) must be specified in {filename} : {MISSING_KEYS}" - ) - - # Remove DEFAULTS_ keys from dict and self - for key in list(self.dict.keys()): - if key.startswith("DEFAULTS_"): - delattr(self, key) - self.dict.pop(key, None) - - # Store the updated JSON for later output - self.json_out = json.dumps(self.dict, indent=4) - - # Concatenation options - if self.concatenate_input: - if len(self.concat_silence_ms): - self.concat_silence_pre = self.concat_silence_ms[0] - self.concat_silence_post = self.concat_silence_ms[1] - else: - self.concat_silence_pre = self.concat_silence_ms - self.concat_silence_post = self.concat_silence_ms - - # Pre-processing - if self.preproc_input: - self.list_of_conditions.append(self._get_condition_definition("preproc", 0)) - - # Check input directory for items - self.items_list = list_audio( - self.input_path, absolute=True, select_list=self.input_select - ) - - if self.items_list is None or len(self.items_list) == 0: - raise SystemExit( - f"Directory {self.input_path} is either blank, does not exist or all files were filtered out." - ) - - # Check if concatenation is required - if self.concatenate_input and any( - [i.endswith(".txt") for i in self.items_list] - ): - raise SystemExit("Concatenation for text files is unsupported") - - # Go through conditions to generate - for cond in self.conditions_to_generate: - try: - bitrates = getattr(self, cond)["bitrates"] - # If single value, convert to list for convenience - if not hasattr(bitrates, "__len__") and not isinstance(bitrates, str): - bitrates = [bitrates] - for b in bitrates: - if isinstance(b, list): - self.list_of_conditions.append( - self._get_condition_definition(cond, [int(x) for x in b]) - ) - else: - self.list_of_conditions.append( - self._get_condition_definition(cond, int(b)) - ) - bitrate_label = str( - self.list_of_conditions[-1]["proc_chain"][ - 0 - ].get_total_bit_rate() - ) - self.list_of_conditions[-1]["id"] = f"{cond}_{bitrate_label}" - except: - self.list_of_conditions.append(self._get_condition_definition(cond, 0)) - - # create output and temporary folder names for the conditions - for list_cond in self.list_of_conditions: - self.output_folders.append(os.path.join(self.output_path, list_cond["id"])) - self.tmp_folders.append( - os.path.join(self.output_path, "tmp_" + list_cond["id"]) - ) - - def __repr__(self): - return str(vars(self)) - - # default values to enable a sparse JSON input file - update if adding new keys - def _set_defaults(self): - DEFAULT_CONFIG = { - # general options - "enable_multiprocessing": True, - "delete_tmp": False, - # input/preprocessing options - "preproc_input": False, - "input_select": None, - "concatenate_input": False, - "concat_silence_ms": [0, 0], - # sampling rates - "in_fs": 48000, - "out_fs": 48000, - # binaural rendering - "binaural_rendered": False, - "bin_rend_include_LFE": False, - "bin_rend_LFE_gain": 10 ** (5.5 / 20), - "binaural_dataset": "orange53", - # apply limiter in the postprocessing - "limit_output": False, - # loudness adjustments - "preproc_loudness": None, - "output_loudness": None, - "loudness_tool": "bs1770demo", - # condition-specific values - "ref": {"out_fc": 48000}, - "DEFAULTS_ivas": { - "cod_bin": "../IVAS_cod", - "dec_bin": "../IVAS_dec", - "cod_opt": None, - "dec_opt": None, - "enc_fs": 48000, - "dec_fs": 48000, - "max_band": "FB", - "dtx": False, - # head tracking - "head_tracking": False, - "ht_file": "./trajectories/full_circle_in_15s", - # BER/FER - "plc": False, - "plc_rate": 10, - }, - "DEFAULTS_evs": { - "cod_bin": "../IVAS_cod", - "dec_bin": "../IVAS_dec", - "cod_opt": None, - "dec_opt": None, - "enc_fs": 48000, - "dec_fs": 48000, - "max_band": "FB", - "dtx": False, - }, - } - - # needed to output JSON later - self.dict = DEFAULT_CONFIG - - # set defaults from above dict - for key, value in DEFAULT_CONFIG.items(): - setattr(self, key, value) - - # Definitions of processing chains (edit with care) - def _get_condition_definition(self, cond: str, bitrate: int) -> dict: - definition = dict(id=cond, proc_chain=[]) - - if cond.startswith("preproc"): - definition["proc_chain"].extend( - [ - PreProcessing( - out_format=self.in_format, - out_fs=self.in_fs, - output_loudness=self.preproc_loudness, - loudness_tool=self.loudness_tool, - ) - ] - ) - elif cond.startswith("ref"): - definition["proc_chain"].extend( - [ - PostProcessing( - in_format=self.in_format, - out_format=self.renderer_format, - in_fs=self.in_fs, - out_fs=self.out_fs, - out_fc=self.ref["out_fc"], - binaural_rendered=self.binaural_rendered, - bin_rend_include_LFE=self.bin_rend_include_LFE, - bin_rend_LFE_gain=self.bin_rend_LFE_gain, - binaural_dataset=self.binaural_dataset, - limit_output=self.limit_output, - output_loudness=self.output_loudness, - loudness_tool=self.loudness_tool, - ) - ] - ) - elif cond.startswith("lp3k5"): - definition["proc_chain"].extend( - [ - PostProcessing( - in_format=self.in_format, - out_format=self.renderer_format, - in_fs=self.in_fs, - out_fs=self.out_fs, - out_fc=3500, - binaural_rendered=self.binaural_rendered, - binaural_dataset=self.binaural_dataset, - limit_output=self.limit_output, - output_loudness=self.output_loudness, - loudness_tool=self.loudness_tool, - ) - ] - ) - elif cond.startswith("lp7k"): - definition["proc_chain"].extend( - [ - PostProcessing( - in_format=self.in_format, - out_format=self.renderer_format, - in_fs=self.in_fs, - out_fs=self.out_fs, - out_fc=7000, - binaural_rendered=self.binaural_rendered, - binaural_dataset=self.binaural_dataset, - limit_output=self.limit_output, - output_loudness=self.output_loudness, - loudness_tool=self.loudness_tool, - ) - ] - ) - elif cond.startswith("evs"): - definition["proc_chain"].extend( - [ - EVS( - in_format=self.in_format, - out_format=self.in_format, - bitrate=bitrate, - in_fs=self.in_fs, - **getattr(self, cond), - ), - PostProcessing( - in_format=self.in_format, - out_format=self.renderer_format, - in_fs=getattr(self, cond)["dec_fs"], - out_fs=self.out_fs, - binaural_rendered=self.binaural_rendered, - binaural_dataset=self.binaural_dataset, - limit_output=self.limit_output, - ), - ] - ) - elif cond.startswith("ivas"): - definition["proc_chain"].extend( - [ - IVAS( - in_format=self.in_format, - bitrate=bitrate, - in_fs=self.in_fs, - **getattr(self, cond), - ), - PostProcessing( - in_format=getattr(self, cond)["out_format"], - out_format=self.renderer_format, - in_fs=getattr(self, cond)["dec_fs"], - out_fs=self.out_fs, - binaural_rendered=self.binaural_rendered, - binaural_dataset=self.binaural_dataset, - limit_output=self.limit_output, - ), - ] - ) - else: - raise SystemExit(f"Invalid condition: {cond}") - - return definition diff --git a/scripts/pyprocessing/utils.py b/scripts/pyprocessing/utils.py deleted file mode 100644 index e62840fd13..0000000000 --- a/scripts/pyprocessing/utils.py +++ /dev/null @@ -1,148 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import logging -import os -import platform -import shutil -from typing import Union - -""" -Directory/path handling -""" - -ALLOWED_INPUT_EXT = (".wav", ".pcm", ".txt") -main_logger = logging.getLogger("__main__") -logger = main_logger.getChild(__name__) -logger.setLevel(logging.DEBUG) - -# Creates a directory at the given path if it does not exist already -def create_dir(path: str) -> None: - if not os.path.exists(path): - os.makedirs(path) - - -def delete_dir(path: str) -> None: - if os.path.exists(path) and os.path.isdir(path): - logger.debug(f"Deleting path {path}") - shutil.rmtree(path) - - -class DirManager: - """ - Context manager that creates directories if not already present and - automatically cleans up (i.e. deletes) all specified paths - """ - - def __init__( - self, create_paths: Union[str, list], delete_paths: Union[str, list] = list() - ): - self.create_paths = ( - create_paths if type(create_paths) == list else [create_paths] - ) - self.delete_paths = ( - delete_paths if type(delete_paths) == list else [delete_paths] - ) - - def __enter__(self): - for path in self.create_paths: - create_dir(path) - - def __exit__(self, exc_type, exc_value, exc_traceback): - for path in self.delete_paths: - if path in self.create_paths: - delete_dir(path) - else: - print( - "Attempting to delete a tmp dir that was not in create_paths. Do not delete." - ) - - -def list_audio(path: str, absolute: bool = False, select_list: list = None) -> list: - """ - Return list with all files with ALLOWED_INPUT_EXT found under the given path. - - If path is a directory, all files in it are included, if it is a file, just the file - will be in the list. If a select list is provided, files are filtered accordingly. - """ - audio_list = list() - - if os.path.exists(path): - if os.path.isdir(path): - if absolute: - audio_list = [ - os.path.join(path, f) - for f in os.listdir(path) - if f.endswith(ALLOWED_INPUT_EXT) - ] - else: - audio_list = [ - f for f in os.listdir(path) if f.endswith(ALLOWED_INPUT_EXT) - ] - else: - if not absolute: - path = os.path.basename(path) - ext = os.path.splitext(path)[-1].lower() - if ext in ALLOWED_INPUT_EXT: - audio_list.append(path) - - # Filter according to select list - if select_list is not None: - if hasattr(select_list, "__len__") and not isinstance(select_list, str): - select_set = set([os.path.splitext(i)[0] for i in select_list]) - else: - select_set = [os.path.splitext(select_list)[0]] - - audio_list_orig = audio_list - audio_list = [] - for f in audio_list_orig: - f_name = os.path.splitext(os.path.basename(f))[0] - if any(x in f_name for x in select_set): - audio_list.append(f) - - return audio_list - - -def get_exec_path(path: str) -> str: - if platform.system() == "Windows" and os.path.splitext(path)[1] != ".exe": - exe = ".exe" - else: - exe = "" - - return f"{os.path.abspath(path)}{exe}" - - -def get_nickname(path: str) -> str: - nickname = os.path.join( - os.path.basename(os.path.dirname(path)), os.path.basename(path) - ) - return nickname -- GitLab From fb890c5ebc14993dd7ec4f179aff670428c4e727 Mon Sep 17 00:00:00 2001 From: Archit Tamarapu Date: Mon, 17 Apr 2023 17:07:47 +0200 Subject: [PATCH 02/12] [ci + scripts] remove reference to pyaudio3dtools --- .gitmodules | 3 + ci/ivas_voip_be_test.sh | 2 + scripts/README.md | 224 --------------------------- scripts/ivas-processing-scripts | 1 + scripts/pyivastest/IvasModeRunner.py | 10 +- scripts/self_test.py | 14 +- tests/cmp_pcm.py | 10 +- tests/cut_pcm.py | 8 +- tests/renderer/compare_audio.py | 2 +- tests/renderer/constants.py | 6 +- tests/renderer/utils.py | 20 +-- 11 files changed, 44 insertions(+), 256 deletions(-) create mode 100644 .gitmodules create mode 160000 scripts/ivas-processing-scripts diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000..eb6c2baca0 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "scripts/ivas-processing-scripts"] + path = scripts/ivas-processing-scripts + url = git@forge.3gpp.org:ivas-codec-pc/ivas-processing-scripts.git diff --git a/ci/ivas_voip_be_test.sh b/ci/ivas_voip_be_test.sh index 592451c345..01a1b5af6b 100755 --- a/ci/ivas_voip_be_test.sh +++ b/ci/ivas_voip_be_test.sh @@ -101,6 +101,7 @@ fi for cut in "$output_dir_voip_dec"/*.wav; do output_path=${cut/$output_dir_voip_dec/$output_dir_voip_dec_trimmed} output_path=${output_path/".wav"/".raw"} + # TODO tmu&knj try to replace with cut_pcm python3 "$python_audiofile_script_path" pre-trim 60 "$cut" "$output_path" | tee -a voip_be_test_output.txt done @@ -115,6 +116,7 @@ fi for ref in "$output_dir_default_dec"/*.wav; do output_path=${ref/$output_dir_default_dec/$output_dir_default_dec_pcm} output_path=${output_path/".wav"/".raw"} + # TODO tmu&knj check replacement python3 "$python_audiofile_script_path" convert "$ref" "$output_path" | tee -a voip_be_test_output.txt done diff --git a/scripts/README.md b/scripts/README.md index fec54ffeac..20e337dab1 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -49,16 +49,6 @@ title: Python scripts for Testing the IVAS code and Generating test items - [`IvasBuildAndRunChecks.py`](#ivasbuildandruncheckspy) - [`testBitexact.py`](#testbitexactpy) - [`self_test.py`](#self_testpy) - - [2. Script for generating listening test items](#2-script-for-generating-listening-test-items) - - [2.1. `generate_test_items.py`](#21-generate_test_itemspy) - - [2.2. Test configuration file](#22-test-configuration-file) - - [2.3. Supported test conditions](#23-supported-test-conditions) - - [2.4. Supported input/output/rendered audio formats](#24-supported-inputoutputrendered-audio-formats) - - [2.5. Processing](#25-processing) - - [2.6. Renderer Metadata definition](#26-renderer-metadata-definition) - - [3. Script for converting formats and binauralizing](#3-script-for-converting-formats-and-binauralizing) - - [3.1. Binauralizing with head rotation](#31-binauralizing-with-head-rotation) - - [3.2. Generating binaural reference signals](#32-generating-binaural-reference-signals) --- @@ -440,217 +430,3 @@ If not, they are built from the working copy. Missing reference conditions and the test conditions are then generated and the reference and test conditions are compared. ------ - - -## 2. Script for generating listening test items - -The `generate_test_items.py` python script helps to quickly setup listening tests with multiple (pre-)processing and post-processing options. - -### 2.1. `generate_test_items.py` - -Script for generating (listening) test items. - -``` -usage: generate_test_items.py [-h] -i INFILE [INFILE ...] - -Generate test items - -optional arguments: - -h, --help show this help message and exit - -i INFILE [INFILE ...], --infile INFILE [INFILE ...] - Configuration file(s): FILE1.json FILE2.json ... -``` - -Example how to call it: - -``` - python3 .\generate_test_items.py -i .\examples\my_test_config.json -``` - -Where `my_test_config.json` is a test configuration file in json format with fields explained in next section. - -### 2.2. Test configuration file - -This is the main file to edit in order to change global configuration options, detailed below. - -*NOTE: Paths specified in the JSON file are relative to the working directory where the script is executed from, NOT the location of the JSON file itself. It is possible (and recommended!) to use absolute paths instead to avoid confusion.* - -| key | values (example) | default | description | -|---------------------------|:------------------:|:-------------:|-----------------------------------------------| -| name | "my_test" | Required | name of the test session | -| author | "myself" | | Author of the configuration file (optional) | -| date | 20210205 | | Date of creation (optional) | -| | | | | -| enable_multiprocessing | True/False | True | Enables multiprocessing, recommended to set to True to make things fast. | -| delete_tmp | True/False | False | Enables deletion of temporary directories (containing intermediate processing files, bitstreams and per-item logfiles etc.). | -| | | | | -| input_path | ./my_items/ | Required | Input directory with *.WAV, *.PCM or *.TXT files to process | -| preproc_input | True/False | False | Whether to execute preprocessing on the input files | -| in_format | HOA3 | Required | Input format for the conditions to generate, see spatial_audio_format | -| in_fs | 32000 | 48000 | Input sampling rate for conditions to generate (assumed to be sampling-rate of input PCM files to process) | -| input_select | ["in", "file2"] | Required | Filenames to filter in the input directory, can be a single value, an array or null. Only compares filenames (therefore "in" in this array would match both "in.wav" and "in.pcm") | -| | | | | -| concatenate_input | True/False | False | Whether to (horizontally) concatenate files in the input directory | -| concat_silence_ms | [1000, 1000] | [0, 0] | Specifies the pre- and post-silence duration to pad concatenation with in ms. If a single value is specified it will be used for BOTH pre- and post-padding | -| preproc_loudness | -26 | | Loudness to preprocess input to (dBov / LKFS depending on tool). Only processed if preproc_input is True. | -| | | | | -| output_path | ./out/ | | Output root directory hosting generated items & log | -| out_fs | 48000 | 48000 | Output sampling rate for conditions to generate | -| output_loudness | -26 | | Loudness level for output file (dBov / LKFS depending on tool). | -| | | | | -| renderer_format | 7_1_4 or CICP19 | Required | Format to be rendered (using offline rendering, will be bypassed if = out_format) | -| binaural_rendered | True/False | False | Extra binauralization of the rendered outputs (using offline rendering) | -| include_LFE | True/False | False | Whether to include LFE in binural rendering | -| gain_factor | float value | 1.0 | Gain factor to be applied to LFE channel | -| loudness_tool | "sv56demo" | "bs1770demo" | Tool to use for loudness adjustment. Currently only sv56demo and bs1770demo are supported for appropriate format configurations. Optionally can be a path to the binary. | -| | | | | -| lt_mode | "MUSHRA" | | Automatically generates a NAME.ltg file with generate_lt_file.py in output_path according to the specified mode | -| conditions_to_generate | ["ref", "ivas"] | Required | list of conditions to be generated, for ivas and evs, multiple conditions can be specified with an \_ separator (i.e. "ivas_branch", "ivas_trunk" etc.) | -| | | | | -| ref | | | | -| - out_fc | 32000 | 48000 | cut-off frequency to be applied to the reference condition in post | -| ivas | | | | -| - bitrates | [16400, 128000] | Required | Bitrate(s) used for IVAS encoder | -| - enc_fs | 48000 | 48000 | Sampling rate for input to the encoder (pre-processing) | -| - max_band | wb, swb, fb etc. | FB | Maximum encoded bandwidth | -| - out_format | 7_1_4 or CICP19 | Required | Output format for IVAS, see spatial_audio_format | -| - dec_fs | 48000 | 48000 | Sampling rate for decoder output | -| - dtx | True/False | False | Enable DTX mode | -| - head_tracking | True/False | False | Enable head tracking | -| - ht_file | | "./trajectories/full_circle_in_15s" | Head rotation file | -| - plc | True/False | False | Enables forward error correction `IVAS_dec -FEC X` | -| - plc_rate | 0-10 | 10 | Percentage of erased frames | -| - cod_bin | "../../../IVAS_cod"| "../IVAS_cod" | path to encoder binary | -| - dec_bin | "../../../IVAS_dec"| "../IVAS_dec" | path to decoder binary | -| - cod_opt | ["-ucct", "1"] | | list of additional encoder options | -| - dec_opt | ["-q"] | | list of additional decoder options | -| evs | | | | -| - bitrates | [13200, 164000] | Required | Bitrate used for multi-stream EVS condition per stream/channel | -| - enc_fs | 48000 | 48000 | Sampling rate for input to the encoder (pre-processing) | -| - max_band | wb, swb, fb etc. | FB | Maximum encoded bandwidth | -| - dec_fs | 48000 | 480000 | Sampling rate for decoder output | -| - dtx | True/False | False | Enable DTX mode | -| - cod_bin | ../../../IVAS_cod | "../IVAS_cod" | path to binary | -| - dec_bin | ../../../IVAS_dec | "../IVAS_dec" | path to binary | -| | | | | - ---- -### 2.3. Supported test conditions - -The following conditions are the conditions which can be generated currently by `generate_test_items.py`. - -| Supported conditions | Description | -|:--------------------:|-----------------------------------------------------------| -| ref | Uncoded (reference) | -| lp3k5 | Uncoded low-passed at 3.5 kHz (anchor) | -| lp7k | Uncoded low-passed at 7 kHz (anchor) | -| evs_mono | Coded with multi-stream EVS codec, !!metadata not coded!! | -| ivas | Coded with IVAS codec | - - -Multiple conditions for evs_mono and ivas can be specified by using underscore separators e.g. `"ivas_1" : {...}, "ivas_2" : {...}` -(also see `test_SBA.json` for an example) - ---- - -### 2.4. Supported input/output/rendered audio formats - -| spatial_audio_format | Input/Ouput/Rendered | Description | -|--------------------------------------------------|----------------------|------------------------------------------------| -| MONO | yes/yes/yes | mono signals | -| STEREO | yes/yes/yes | stereo signals | -| ISM or ISMx | yes/no/no | Objects with metadata, description using renderer metadata | -| MASA or MASAx | yes/no/no | mono or stereo signals with spatial metadata !!!metadata must share same basename as waveform file but with .met extension!!! | -| FOA/HOA2/HOA3 or PLANAR(FOA/HOAx) | yes/yes/yes | Ambisonic signals or planar ambisonic signals | -| BINAURAL/BINAURAL_ROOM | no/yes/yes | Binaural signals | -| 5_1/5_1_2/5_1_4/7_1/7_1_4 or CICP[6/12/14/16/19] | yes/yes/yes | Multi-channel signals for predefined loudspeaker layout | -| META | yes/yes/no | Audio scene described by a renderer config | - ---- - -### 2.5. Processing - -The processing chain is as follows: - -1. Preprocessing - - **Condition**: `preproc_input == true` - - Input files converted to `in_format` -2. Processing - - **Condition**: Performed depending on key in `conditions_to_generate` - - Coding/decoding from `in_format` to `out_format` -3. Postprocessing - 1. Rendering to `renderer_format` - - **Condition**: `out_format != renderer_format` - - output files converted from `out_format` to `renderer_format` - 1. Binaural Rendering - - **Condition**: `binaural_rendered == true` and `out_format` is not a BINAURAL type - - output files converted from `out_format` to `BINAURAL` - ---- - -### 2.6. Renderer Metadata definition - -To run, the renderer requires a config file describing the input scene.The expected format of the config file is as follows: - ---- - -- Line 1: Path to a "multitrack" audio file. This should be a single multichannel wav/pcm file that contains all input audio. For example channels 1-4 can be an FOA scene,channel 5 - an object and channels 6-11 - a 5.1 channel bed. If the path is not absolute, it is considered relative to the renderer executable, not the config file. This path has lower priority than the one given on the command line: *The path in the config file is ignored if the --inputAudio argument to the renderer executable is specified.* - ---- - -- Line 2: Contains number of inputs. An input can either be an Ambisonics scene, anobject or a channel bed.This is NOT the total number of channels in the input audio file.The renderer currently supports simultaneously: *Up to 2 SBA inputs, Up to 2 MC inputs* Up to 16 ISM inputsThese limits can be freely changed with pre-processor macros, if needed. - ---- -- Following lines: -Define each of the inputs. Inputs can be listed in any order - they are NOT required to be listed in the same order as in the audio file. -Input definitions: - - First line of an input definition contains the input type: SBA, MC or ISM.Following lines depend on the input type:SBAIndex of the first channel of this input in the multitrack file (1-indexed)Ambisonics orderMCIndex of the first channel of this input in the multitrack file (1-indexed)CICP index of the speaker layoutISMIndex of this input's audio in the multitrack file (1-indexed)Path to ISM metadata file (if not absolute, relative to executable location)ORISMIndex of this input's audio in the multitrack file (1-indexed)Number N of positions defined, followed by N lines in form: -stay in position for x frames, azimuth, elevation(ISM position metadata defined this way is looped if there are more framesof audio than given positions) - ---- -Example config -The following example defines a scene with 4 inputs: *ISM with trajectory defined in a separate file. Channel 12 in the input file.* Ambisonics, order 1. Channels 1-4 in the input audio file. *CICP6 channel bed. Channels 5-10 in the input audio file.* ISM with 2 defined positions (-90,0) and (90,0). Channel 11 in the input file. The object will start at position (-90,0) and stay there for 5 frames, then move to (90,0) and stay there for 5 frames. This trajectory is looped over the duration of the input audio file. - -``` -./input_audio.wav4ISM12path/to/IVAS_ISM_metadata.csv -3 -SBA -1 -1 -MC -5 -6 -ISM -1 -1 -25,-90,05,90, -``` - -## 3. Script for converting formats and binauralizing - -The script audio3dtools.py can convert between different input and output formats and binauralize signals. - -Execute `python -m pyaudio3dtools.audio3dtools --help` for usage. - -### 3.1. Binauralizing with head rotation - -This example binauralizes a HOA3 signal with a head-rotation trajectory. Head rotation is peformed in SHD. It is supported for HOA3 and META input formats. For META input format, the audioscene is first prerendered to HOA3 and then rotated and binauralized. - -``` -python -m pyaudio3dtools.audio3dtools -i hoa3_input.wav -o . -F BINAURAL -T .\trajectories\full_circle_in_15s -``` - -### 3.2. Generating binaural reference signals - -Currently MC input signals are supported. The reference processing can be activated by selecting BINAURAL[_ROOM]_REF as output format. The signals are generated by convolving the channels with the filters from the database that are closes to the current position of the virtual LS. All interpolation methods supported by numpy can be chosen between the measured points along the trajectory. - -``` -python -m pyaudio3dtools.audio3dtools -i cicp6_input.wav -o . -F BINAURAL_REF -T .\trajectories\full_circle_in_15s -``` - -### 3.3. Rendering ISM to Custom loudspeakers with auxiliary binaural output -ISM metadata can either be specified via an input text file in the Renderer Metadata definition format, or via the commandline using the same style as IVAS: -``` -python -m pyaudio3dtools.audio3dtools -i ism2.wav -f ISM2 -m ism1.csv NULL -F 7_1_4 -o . -b -T .\trajectories\full_circle_in_15s -``` diff --git a/scripts/ivas-processing-scripts b/scripts/ivas-processing-scripts new file mode 160000 index 0000000000..e007ca5c1a --- /dev/null +++ b/scripts/ivas-processing-scripts @@ -0,0 +1 @@ +Subproject commit e007ca5c1afa77bed87c50f3634813493fb7ed18 diff --git a/scripts/pyivastest/IvasModeRunner.py b/scripts/pyivastest/IvasModeRunner.py index 114991c374..95f490e447 100644 --- a/scripts/pyivastest/IvasModeRunner.py +++ b/scripts/pyivastest/IvasModeRunner.py @@ -38,6 +38,7 @@ import time import logging import logging.handlers import platform +import sys import traceback from copy import deepcopy import re @@ -45,9 +46,10 @@ import json from pyivastest import IvasModeCollector from pyivastest import constants -from pyaudio3dtools.spatialaudioformat import Format as spformat -import pyaudio3dtools.audiofile as af -import pyaudio3dtools.audioarray as ar + +sys.path.append("../ivas-processing-scripts") +import ivas_processing_scripts.audiotools.audiofile as af +import ivas_processing_scripts.audiotools.audioarray as ar BW_TO_SR = {"nb": 8, "wb": 16, "swb": 32, "fb": 48} IN_CONFIG_TO_COPY_AUDIO = { "SBA": [], @@ -1659,4 +1661,4 @@ class IvasModeRunner(IvasModeCollector.IvasModeCollector): class NoInputForAnyModesFound(Exception): - pass \ No newline at end of file + pass diff --git a/scripts/self_test.py b/scripts/self_test.py index 2be585882b..f3b620fd03 100755 --- a/scripts/self_test.py +++ b/scripts/self_test.py @@ -43,7 +43,7 @@ import pyivastest.ivas_svn as svn import pyivastest.constants as constants import operator import sys -import pyaudio3dtools +import ivas_processing_scripts.audiotools import platform import numpy import multiprocessing @@ -350,11 +350,11 @@ class SelfTest(IvasScriptsCommon.IvasScript): # one frame diff, might be a delay change, still test for BE, but cut to the same length first n_samples_for_test = min(n_samples_test, n_samples_ref) if n_samples_for_test != n_samples_ref: - sig_ref = pyaudio3dtools.audioarray.cut( + sig_ref = audiotools.audioarray.cut( sig_ref, (0, n_samples_for_test) ) else: - sig_test = pyaudio3dtools.audioarray.cut( + sig_test = audiotools.audioarray.cut( sig_test, (0, n_samples_for_test) ) else: @@ -362,7 +362,7 @@ class SelfTest(IvasScriptsCommon.IvasScript): test_sample_num_diff = True if not test_failed: try: - cmp_result = pyaudio3dtools.audioarray.compare( + cmp_result = audiotools.audioarray.compare( sig_test, sig_ref, fs ) if not cmp_result["bitexact"]: @@ -519,7 +519,7 @@ class SelfTest(IvasScriptsCommon.IvasScript): decoded_item_ref_mono = None decoded_item_test_mono = None if mode_dict["in_fs"] > pesq_fs or in_nchans > 1: - pyaudio3dtools.audiofile.mono( + audiotools.audiofile.mono( orig_item, orig_mono, in_nchans=in_nchans, @@ -553,7 +553,7 @@ class SelfTest(IvasScriptsCommon.IvasScript): os.close(fd_ref) if mode_dict["out_fs"] > pesq_fs or n_channels > 1: - pyaudio3dtools.audiofile.mono( + audiotools.audiofile.mono( decoded_item_ref, decoded_item_ref_mono, in_nchans=n_channels, @@ -617,7 +617,7 @@ class SelfTest(IvasScriptsCommon.IvasScript): fd_test, decoded_item_test_mono = tempfile.mkstemp((".pcm")) os.close(fd_test) if mode_dict["out_fs"] > pesq_fs or n_channels > 1: - pyaudio3dtools.audiofile.mono( + audiotools.audiofile.mono( decoded_item_test, decoded_item_test_mono, in_nchans=n_channels, diff --git a/tests/cmp_pcm.py b/tests/cmp_pcm.py index a54aa2cf11..d8450d4ac2 100755 --- a/tests/cmp_pcm.py +++ b/tests/cmp_pcm.py @@ -6,8 +6,10 @@ import argparse THIS_PATH = os.path.join(os.getcwd(), __file__) sys.path.append(os.path.join(os.path.dirname(THIS_PATH), "../scripts")) +sys.path.append(os.path.join(os.path.dirname(THIS_PATH), "../scripts/ivas-processing-scripts")) -import pyaudio3dtools +from ivas_processing_scripts.audiotools.audiofile import read +from ivas_processing_scripts.audiotools.audioarray import compare import pyivastest import numpy as np @@ -31,8 +33,8 @@ def cmp_pcm(file1, file2, out_config, fs) -> (int, str): out_config_in_file_names = out_config nchannels = pyivastest.constants.OC_TO_NCHANNELS[out_config.upper()] - s1, _ = pyaudio3dtools.audiofile.readfile(file1, nchannels, fs, outdtype=np.int16) - s2, _ = pyaudio3dtools.audiofile.readfile(file2, nchannels, fs, outdtype=np.int16) + s1, _ = read(file1, nchannels, fs, outdtype=np.int16) + s2, _ = read(file2, nchannels, fs, outdtype=np.int16) if s1.shape != s2.shape: print( @@ -41,7 +43,7 @@ def cmp_pcm(file1, file2, out_config, fs) -> (int, str): ) return 1, "FAIL: File lengths differ" - cmp_result = pyaudio3dtools.audioarray.compare(s1, s2, fs, per_frame=False) + cmp_result = compare(s1, s2, fs, per_frame=False) if cmp_result["bitexact"]: return 0, "SUCCESS: Files are bitexact" diff --git a/tests/cut_pcm.py b/tests/cut_pcm.py index 99a6f6fc10..0ca1e93d35 100755 --- a/tests/cut_pcm.py +++ b/tests/cut_pcm.py @@ -51,9 +51,9 @@ import numpy as np from pathlib import Path HERE = Path(__file__).parent.resolve() -SCRIPTS_DIR = str(HERE.joinpath("../scripts").resolve()) +SCRIPTS_DIR = str(HERE.joinpath("../scripts/ivas-processing-scripts").resolve()) sys.path.append(SCRIPTS_DIR) -from pyaudio3dtools import audiofile +from ivas_processing_scripts.audiotools import audiofile def usage(): @@ -76,7 +76,7 @@ def cut_samples(in_file, out_file, num_channels, sample_rate, start, duration, g dur_sec = float(duration) gain_f = float(gain) - s, fs = audiofile.readfile(in_file, num_channels, fs, outdtype="float") + s, fs = audiofile.read(in_file, num_channels, fs, outdtype="float") num_in_samples = s.shape[0] num_samples_to_skip = int(start_sec * fs) @@ -89,7 +89,7 @@ def cut_samples(in_file, out_file, num_channels, sample_rate, start, duration, g s_out = s[num_samples_to_skip:num_samples_to_skip + dur_samples, :] * gain_f - audiofile.writefile(out_file, s_out, fs) + audiofile.write(out_file, s_out, fs) def main(argv): diff --git a/tests/renderer/compare_audio.py b/tests/renderer/compare_audio.py index bf3ce26c93..0201f6cb7f 100644 --- a/tests/renderer/compare_audio.py +++ b/tests/renderer/compare_audio.py @@ -35,7 +35,7 @@ import numpy as np from .constants import SCRIPTS_DIR sys.path.append(str(SCRIPTS_DIR)) -from pyaudio3dtools.audioarray import getdelay +from ivas_processing_scripts.audiotools.audioarray import getdelay def compare_audio_arrays( diff --git a/tests/renderer/constants.py b/tests/renderer/constants.py index e29696adec..65f9de7242 100644 --- a/tests/renderer/constants.py +++ b/tests/renderer/constants.py @@ -202,7 +202,7 @@ pass_snr = dict() # not relevant for tests anymore, should be deprecated soon _pass_snr = { #################################################################### # - # External Renderer vs Standalone and pyaudio3dtools renderers tests + # External Renderer vs Standalone and python renderers tests # #################################################################### # Failure reason: Renderer uses getRSH() with int16_t vs float in python @@ -225,7 +225,7 @@ _pass_snr = { "test_ambisonics_binaural_headrotation[HOA2-BINAURAL-full_circle_in_15s]": 18, "test_ambisonics_binaural_headrotation[HOA3-BINAURAL-full_circle_in_15s]": 15, # Failure reason: Crend unit test does not support intermediate conversion to 7_1_4 or SHD BRIRs - # Comparison with pyaudio3dtools results in bad SNR + # Comparison with python renderer results in bad SNR "test_ambisonics_binaural_headrotation[FOA-BINAURAL_ROOM-full_circle_in_15s]": 0, "test_ambisonics_binaural_headrotation[FOA-BINAURAL_ROOM-rotate_yaw_pitch_roll1]": 0, "test_ambisonics_binaural_headrotation[HOA2-BINAURAL_ROOM-full_circle_in_15s]": 0, @@ -254,7 +254,7 @@ _pass_snr = { "test_custom_ls_output[HOA3-itu_4+5+1]": 30, "test_custom_ls_output[HOA3-t_design_4]": 32, # Failure reason: TD Object Renderer standalone does not support custom LS input - # Comparison with pyaudio3dtools results in bad SNR + # Comparison with python renderer results in bad SNR "test_custom_ls_input_binaural[16ch_8+4+4-BINAURAL]": 8, "test_custom_ls_input_binaural[16ch_8+4+4-BINAURAL_ROOM]": 0, "test_custom_ls_input_binaural[4d4-BINAURAL]": 6, diff --git a/tests/renderer/utils.py b/tests/renderer/utils.py index d2af91f60c..470c451add 100644 --- a/tests/renderer/utils.py +++ b/tests/renderer/utils.py @@ -40,7 +40,8 @@ from .compare_audio import compare_audio_arrays from .constants import * sys.path.append(SCRIPTS_DIR) -import pyaudio3dtools +from ivas_processing_scripts.audiotools.audiofile import read +from ivas_processing_scripts.audiotools.convert import convert_file # fixture returns test information, enabling per-testcase SNR @pytest.fixture @@ -195,7 +196,7 @@ def run_renderer( run_cmd(cmd) - return pyaudio3dtools.audiofile.readfile(out_file) + return read(out_file) def run_pyscripts( @@ -206,7 +207,7 @@ def run_pyscripts( trj_file: Optional[str] = None, is_comparetest: Optional[bool] = False, ) -> Tuple[np.ndarray, int]: - """Reference creation with pyaudio3dtools""" + """Reference creation with python""" if trj_file is not None: trj_name = f"_{trj_file.stem}" else: @@ -234,17 +235,18 @@ def run_pyscripts( out_file = str(OUTPUT_PATH_REF.joinpath(f"{in_name}_to_{out_name}{trj_name}.wav")) - pyaudio3dtools.spatialaudioconvert.spatial_audio_convert( + convert_file( in_file, out_file, - in_format=in_fmt, - out_format=out_fmt, - in_meta_files=in_meta_files, + 48000, + in_fmt, + out_fmt, + in_meta=in_meta_files, trajectory=trj_file, limit_output=True, ) - return pyaudio3dtools.audiofile.readfile(out_file) + return read(out_file) def compare_renderer_vs_mergetarget(test_info, in_fmt, out_fmt, **kwargs): @@ -267,4 +269,4 @@ def compare_renderer_vs_pyscripts(test_info, in_fmt, out_fmt, **kwargs): def compare_renderer_args(test_info, in_fmt, out_fmt, ref_kwargs: Dict, cut_kwargs: Dict): ref, ref_fs = run_renderer(in_fmt, out_fmt, **ref_kwargs) cut, cut_fs = run_renderer(in_fmt, out_fmt, **cut_kwargs) - check_BE(test_info, ref, ref_fs, cut, cut_fs) \ No newline at end of file + check_BE(test_info, ref, ref_fs, cut, cut_fs) -- GitLab From e1e236b3faba59db5819c5439536ddf501f3dd04 Mon Sep 17 00:00:00 2001 From: Archit Tamarapu Date: Fri, 28 Apr 2023 10:02:35 +0200 Subject: [PATCH 03/12] update submodule to ivas-codec-tests tag of IVAS Processing Scripts repo --- scripts/ivas-processing-scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/ivas-processing-scripts b/scripts/ivas-processing-scripts index e007ca5c1a..5add879ec7 160000 --- a/scripts/ivas-processing-scripts +++ b/scripts/ivas-processing-scripts @@ -1 +1 @@ -Subproject commit e007ca5c1afa77bed87c50f3634813493fb7ed18 +Subproject commit 5add879ec7da16feb0296c2e09d3092db1b01152 -- GitLab From a8929f6475f5d35255facd4c907fc1af8c176626 Mon Sep 17 00:00:00 2001 From: knj Date: Fri, 28 Apr 2023 12:08:34 +0200 Subject: [PATCH 04/12] add checks for submodule in the test helper scripts --- tests/prepare_pytests.py | 16 +++++++++++++--- tests/run_pytests.py | 16 +++++++++++++--- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/tests/prepare_pytests.py b/tests/prepare_pytests.py index 9074a6704a..ae508391b0 100755 --- a/tests/prepare_pytests.py +++ b/tests/prepare_pytests.py @@ -34,13 +34,23 @@ __doc__ = """ Script to prepare the pytest tests. """ -import os import sys +from pathlib import Path + +try: + here = Path(__file__).parent.resolve() + scripts_dir = str(here.joinpath("../scripts/ivas-processing-scripts").resolve()) + sys.path.append(scripts_dir) + import ivas_processing_scripts +except ImportError: + print("Can not import ivas_processing_scripts - please run 'git submodule update --init' and try again") + sys.exit(0) + +import os import argparse import subprocess import platform -from pathlib import Path from create_short_testvectors import create_short_testvectors BIN_EXT = ".exe" if platform.system() == "Windows" else "" @@ -96,7 +106,7 @@ def main(argv): if platform.system() == "Windows": base_cmd = ["pytest"] else: - base_cmd = ["python3", "-m", "pytest"] + base_cmd = ["python3.10", "-m", "pytest"] if args.param_file: base_cmd += ["tests/test_param_file.py", "--param_file", args.param_file] else: diff --git a/tests/run_pytests.py b/tests/run_pytests.py index d20de6ef72..b00469963a 100755 --- a/tests/run_pytests.py +++ b/tests/run_pytests.py @@ -37,12 +37,22 @@ Test prerequisites are checked for and check failures are reported. When prerequisites are met, the pytest test is executed. """ -import os import sys +from pathlib import Path + +try: + here = Path(__file__).parent.resolve() + scripts_dir = str(here.joinpath("../scripts/ivas-processing-scripts").resolve()) + sys.path.append(scripts_dir) + import ivas_processing_scripts +except ImportError: + print("Can not import ivas_processing_scripts - please run 'git submodule update --init' and try again") + sys.exit(0) + +import os import argparse import subprocess import platform -from pathlib import Path BIN_EXT = ".exe" if platform.system() == "Windows" else "" HERE = Path(__file__).parent.resolve() @@ -91,7 +101,7 @@ def main(argv): if platform.system() == "Windows": cmd = ["pytest"] else: - cmd = ["python3", "-m", "pytest"] + cmd = ["python3.10", "-m", "pytest"] if args.param_file: cmd += ["tests/test_param_file.py", "--param_file", args.param_file] else: -- GitLab From dfbbb3438c3d7ca15396a225282aa6422bdc1c14 Mon Sep 17 00:00:00 2001 From: knj Date: Fri, 28 Apr 2023 12:29:46 +0200 Subject: [PATCH 05/12] configure CI to use submodule for scripts --- .gitlab-ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 082cf40cab..8c909fe327 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -9,6 +9,8 @@ variables: OUT_FORMATS_BINAURAL: "BINAURAL BINAURAL_ROOM" EXIT_CODE_NON_BE: 123 EXIT_CODE_FAIL: 1 + GIT_SUBMODULE_STRATEGY: normal + GIT_SUBMODULE_FORCE_HTTPS: "true" default: interruptible: true # Make all jobs by default interruptible -- GitLab From 807eb30e23b3e201328768a32ec9f40b27689e38 Mon Sep 17 00:00:00 2001 From: knj Date: Fri, 28 Apr 2023 12:52:11 +0200 Subject: [PATCH 06/12] explicitly use https for submodule cloning --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index eb6c2baca0..6b1b6ecc3b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "scripts/ivas-processing-scripts"] path = scripts/ivas-processing-scripts - url = git@forge.3gpp.org:ivas-codec-pc/ivas-processing-scripts.git + url = https://forge.3gpp.org/rep/ivas-codec-pc/ivas-processing-scripts.git -- GitLab From d00235264b2f49dd16aa11fb1a91d83643ae752d Mon Sep 17 00:00:00 2001 From: knj Date: Fri, 28 Apr 2023 12:57:08 +0200 Subject: [PATCH 07/12] change submodule strategy --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 8c909fe327..9a8a3eb0ef 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -9,7 +9,7 @@ variables: OUT_FORMATS_BINAURAL: "BINAURAL BINAURAL_ROOM" EXIT_CODE_NON_BE: 123 EXIT_CODE_FAIL: 1 - GIT_SUBMODULE_STRATEGY: normal + GIT_SUBMODULE_STRATEGY: recursive GIT_SUBMODULE_FORCE_HTTPS: "true" default: -- GitLab From 81df7feffe12cde6fbf0333fafa994e97e98f2b9 Mon Sep 17 00:00:00 2001 From: knj Date: Fri, 28 Apr 2023 13:05:53 +0200 Subject: [PATCH 08/12] add debug printout to job --- .gitlab-ci.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 9a8a3eb0ef..28c45d73e5 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -388,6 +388,10 @@ renderer-smoke-test: needs: ["build-codec-linux-make"] stage: test script: + - ls scripts + - ls -altr scripts/ivas-processing-scripts + - cd scripts + - python3 -c 'import ivas_processing_scripts' - make -j IVAS_rend - python3 -m pytest -q -n auto -rA --junit-xml=report-junit.xml tests/renderer/test_renderer.py artifacts: -- GitLab From f9d1d5e1904e61e4b7178f1c776ae11c11dcf951 Mon Sep 17 00:00:00 2001 From: knj Date: Fri, 28 Apr 2023 13:19:19 +0200 Subject: [PATCH 09/12] fix path for import --- tests/renderer/compare_audio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/renderer/compare_audio.py b/tests/renderer/compare_audio.py index 0201f6cb7f..0ae0931348 100644 --- a/tests/renderer/compare_audio.py +++ b/tests/renderer/compare_audio.py @@ -34,7 +34,7 @@ import numpy as np from .constants import SCRIPTS_DIR -sys.path.append(str(SCRIPTS_DIR)) +sys.path.append(str(SCRIPTS_DIR.joinpath("ivas-processing-scripts"))) from ivas_processing_scripts.audiotools.audioarray import getdelay -- GitLab From 1d411c668df5d9303e93e62782559195f3035896 Mon Sep 17 00:00:00 2001 From: knj Date: Fri, 28 Apr 2023 13:30:33 +0200 Subject: [PATCH 10/12] fix import --- scripts/pyivastest/IvasModeRunner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/pyivastest/IvasModeRunner.py b/scripts/pyivastest/IvasModeRunner.py index 95f490e447..0025f028ac 100644 --- a/scripts/pyivastest/IvasModeRunner.py +++ b/scripts/pyivastest/IvasModeRunner.py @@ -47,7 +47,7 @@ import json from pyivastest import IvasModeCollector from pyivastest import constants -sys.path.append("../ivas-processing-scripts") +sys.path.append(f"{os.path.dirname(__file__)}/../ivas-processing-scripts") import ivas_processing_scripts.audiotools.audiofile as af import ivas_processing_scripts.audiotools.audioarray as ar BW_TO_SR = {"nb": 8, "wb": 16, "swb": 32, "fb": 48} -- GitLab From a87c10168d788d60db3b80e7ab05ba15b64c7cf0 Mon Sep 17 00:00:00 2001 From: knj Date: Fri, 28 Apr 2023 13:48:27 +0200 Subject: [PATCH 11/12] remove debug printout --- .gitlab-ci.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 28c45d73e5..9a8a3eb0ef 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -388,10 +388,6 @@ renderer-smoke-test: needs: ["build-codec-linux-make"] stage: test script: - - ls scripts - - ls -altr scripts/ivas-processing-scripts - - cd scripts - - python3 -c 'import ivas_processing_scripts' - make -j IVAS_rend - python3 -m pytest -q -n auto -rA --junit-xml=report-junit.xml tests/renderer/test_renderer.py artifacts: -- GitLab From 22c0cfcf9d6d057d8fa0b9370b16323946c7add0 Mon Sep 17 00:00:00 2001 From: knj Date: Fri, 28 Apr 2023 13:50:36 +0200 Subject: [PATCH 12/12] for testing: only run some jobs on specific runner --- .gitlab-ci.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 9a8a3eb0ef..2bd61eac49 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -342,6 +342,8 @@ codec-msan: - .test-job-linux - .rules-merge-request stage: test + tags: + - test-fhg-linux-runner1 needs: ["build-codec-sanitizers-linux"] script: - *print-common-info @@ -364,6 +366,8 @@ codec-asan: - .test-job-linux - .rules-merge-request stage: test + tags: + - test-fhg-linux-runner1 needs: ["build-codec-sanitizers-linux"] script: - *print-common-info @@ -387,6 +391,8 @@ renderer-smoke-test: - .rules-merge-request needs: ["build-codec-linux-make"] stage: test + tags: + - test-fhg-linux-runner1 script: - make -j IVAS_rend - python3 -m pytest -q -n auto -rA --junit-xml=report-junit.xml tests/renderer/test_renderer.py @@ -407,6 +413,8 @@ renderer-asan: - .rules-merge-request needs: ["build-codec-linux-cmake"] stage: test + tags: + - test-fhg-linux-runner1 script: - python3 ci/disable_ram_counting.py - cmake -B cmake-build -G "Unix Makefiles" -DCLANG=asan -DCOPY_EXECUTABLES_FROM_BUILD_DIR=true @@ -430,6 +438,8 @@ renderer-msan: - .rules-merge-request needs: ["build-codec-linux-cmake"] stage: test + tags: + - test-fhg-linux-runner1 script: - python3 ci/disable_ram_counting.py - cmake -B cmake-build -G "Unix Makefiles" -DCLANG=msan -DCOPY_EXECUTABLES_FROM_BUILD_DIR=true @@ -455,6 +465,8 @@ renderer-pytest-on-merge-request: # TODO: set reasonable timeout, will most likely take less timeout: "20 minutes" stage: compare + tags: + - test-fhg-linux-runner1 script: - *print-common-info @@ -506,6 +518,8 @@ ivas-pytest-on-merge-request: - .test-job-linux - .rules-merge-request stage: compare + tags: + - test-fhg-linux-runner1 needs: ["build-codec-linux-cmake", "codec-smoke-test"] timeout: "10 minutes" script: @@ -601,6 +615,8 @@ voip-be-on-merge-request: - .test-job-linux-needs-testv-dir - .rules-merge-request stage: compare # Or should it be test? Comparison is done within one git revision + tags: + - test-fhg-linux-runner1 needs: ["build-codec-linux-make", codec-smoke-test] timeout: "10 minutes" script: -- GitLab