diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 760f908abd8558b580ebdfd9d8d39bcadac4d2fc..4dfee99996265e094a59744edeb5486fed5d198d 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -10,6 +10,8 @@ variables: OUT_FORMATS_BINAURAL: "BINAURAL BINAURAL_ROOM_IR BINAURAL_ROOM_REVERB" EXIT_CODE_NON_BE: 123 EXIT_CODE_FAIL: 1 + GIT_SUBMODULE_STRATEGY: recursive + GIT_SUBMODULE_FORCE_HTTPS: "true" PROCESSING_SCRIPTS_BIN_DIR: "/test-bin" TESTS_DIR_CODEC_BE_ON_MR: "tests/codec_be_on_mr_nonselection" @@ -348,6 +350,8 @@ codec-msan: - .test-job-linux - .rules-merge-request stage: test + tags: + - test-fhg-linux-runner1 needs: ["build-codec-sanitizers-linux"] script: - *print-common-info @@ -370,6 +374,8 @@ codec-asan: - .test-job-linux - .rules-merge-request stage: test + tags: + - test-fhg-linux-runner1 needs: ["build-codec-sanitizers-linux"] script: - *print-common-info @@ -393,6 +399,8 @@ renderer-smoke-test: - .rules-merge-request needs: ["build-codec-linux-make"] stage: test + tags: + - test-fhg-linux-runner1 script: - make -j IVAS_rend - python3 -m pytest -q -n auto -rA --junit-xml=report-junit.xml tests/renderer/test_renderer.py @@ -414,6 +422,8 @@ renderer-asan: - .rules-merge-request needs: ["build-codec-linux-cmake"] stage: test + tags: + - test-fhg-linux-runner1 script: - cmake -B cmake-build -G "Unix Makefiles" -DCLANG=asan -DCOPY_EXECUTABLES_FROM_BUILD_DIR=true - cmake --build cmake-build -- -j @@ -437,6 +447,8 @@ renderer-msan: - .rules-merge-request needs: ["build-codec-linux-cmake"] stage: test + tags: + - test-fhg-linux-runner1 script: - cmake -B cmake-build -G "Unix Makefiles" -DCLANG=msan -DCOPY_EXECUTABLES_FROM_BUILD_DIR=true - cmake --build cmake-build -- -j @@ -462,6 +474,8 @@ renderer-pytest-on-merge-request: # TODO: set reasonable timeout, will most likely take less timeout: "20 minutes" stage: compare + tags: + - test-fhg-linux-runner1 script: - *print-common-info @@ -592,6 +606,8 @@ ivas-pytest-on-merge-request: - .test-job-linux - .rules-merge-request stage: compare + tags: + - test-fhg-linux-runner1 needs: ["build-codec-linux-cmake", "codec-smoke-test"] timeout: "10 minutes" script: @@ -689,6 +705,8 @@ voip-be-on-merge-request: - .test-job-linux-needs-testv-dir - .rules-merge-request stage: compare # Or should it be test? Comparison is done within one git revision + tags: + - test-fhg-linux-runner1 needs: ["build-codec-linux-make", codec-smoke-test] timeout: "10 minutes" script: diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000000000000000000000000000000000..6b1b6ecc3b73ed3f6803b7cf30bf0b728e5b8e1a --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "scripts/ivas-processing-scripts"] + path = scripts/ivas-processing-scripts + url = https://forge.3gpp.org/rep/ivas-codec-pc/ivas-processing-scripts.git diff --git a/ci/ivas_voip_be_test.sh b/ci/ivas_voip_be_test.sh index 592451c345541b0e3c2d57424a4171bdbb40d460..01a1b5af6b851c2b0488bb94448fee14916f5a20 100755 --- a/ci/ivas_voip_be_test.sh +++ b/ci/ivas_voip_be_test.sh @@ -101,6 +101,7 @@ fi for cut in "$output_dir_voip_dec"/*.wav; do output_path=${cut/$output_dir_voip_dec/$output_dir_voip_dec_trimmed} output_path=${output_path/".wav"/".raw"} + # TODO tmu&knj try to replace with cut_pcm python3 "$python_audiofile_script_path" pre-trim 60 "$cut" "$output_path" | tee -a voip_be_test_output.txt done @@ -115,6 +116,7 @@ fi for ref in "$output_dir_default_dec"/*.wav; do output_path=${ref/$output_dir_default_dec/$output_dir_default_dec_pcm} output_path=${output_path/".wav"/".raw"} + # TODO tmu&knj check replacement python3 "$python_audiofile_script_path" convert "$ref" "$output_path" | tee -a voip_be_test_output.txt done diff --git a/scripts/generate_test_items.py b/scripts/generate_test_items.py deleted file mode 100755 index ad0717db9bd25c438436b88617d74eeb53107283..0000000000000000000000000000000000000000 --- a/scripts/generate_test_items.py +++ /dev/null @@ -1,254 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import argparse -import logging -import multiprocessing as mp -import os -from typing import Iterable - -from pyaudio3dtools import audiofile -from pyprocessing import processing, processing_configs, utils - -# Global logging options -logger = logging.getLogger(__name__) -LOGGER_MAIN_LOG_FILENAME = "log.txt" -LOGGER_PROC_ITEM_SUFFIX = "_log.txt" -LOGGER_FORMAT = "%(asctime)s | %(name)-12s | %(levelname)-8s | %(message)s" -LOGGER_DATEFMT = "%m-%d %H:%M" - - -def _run_condition_and_item( - out_folder: str, tmp_folder: str, item: str, proc_chain: Iterable -) -> None: - item_name, _ = os.path.splitext(os.path.basename(item)) - out_item = os.path.join(out_folder, item_name + ".wav") - - # Create a logfile for the item - fh = logging.FileHandler( - os.path.join(tmp_folder, item_name + LOGGER_PROC_ITEM_SUFFIX), mode="w" - ) - fh.setLevel(logging.DEBUG) - formatter = logging.Formatter(LOGGER_FORMAT, datefmt=LOGGER_DATEFMT) - fh.setFormatter(formatter) - logger.addHandler(fh) - - processing.process_chain(proc_chain, item, out_item, tmp_folder) - - # Log after completion - logger.removeHandler(fh) - output_nickname = "Done " + os.path.join( - os.path.basename(out_folder), item_name + ".wav" - ) - logger.info(f"{output_nickname:-^100.100}") - - -def main(test_cfg): - # Create pool if multiprocessing is enabled - if test_cfg.enable_multiprocessing: - pool = mp.Pool(mp.cpu_count()) - results = [] - - # pre-process items if required - if test_cfg.preproc_input: - condition = test_cfg.list_of_conditions[0] - out_folder = test_cfg.output_folders[0] - tmp_folder = test_cfg.tmp_folders[0] - - proc_id = condition["id"] - logger.info(" Condition: " + proc_id) - - proc_chain = condition["proc_chain"] - - for item in test_cfg.items_list: - logger.info(" Item: " + item) - if test_cfg.enable_multiprocessing: - results.append( - pool.apply_async( - _run_condition_and_item, - args=(out_folder, tmp_folder, item, proc_chain), - ) - ) - else: - _run_condition_and_item(out_folder, tmp_folder, item, proc_chain) - - if test_cfg.enable_multiprocessing: - pool.close() - pool.join() - for r in results: - r.get() - pool = mp.Pool(mp.cpu_count()) - results = [] - - test_cfg.items_list = utils.list_audio(out_folder, absolute=True) - - test_cfg.list_of_conditions = test_cfg.list_of_conditions[1:] - test_cfg.output_folders = test_cfg.output_folders[1:] - test_cfg.tmp_folders = test_cfg.tmp_folders[1:] - - # concatenate items if required - if test_cfg.concatenate_input: - if len(test_cfg.items_list) > 1: - concat_filename = os.path.join( - test_cfg.output_path, - os.path.basename(test_cfg.input_path) + "_concat.wav", - ) - audiofile.concatenatefiles( - test_cfg.items_list, - concat_filename, - test_cfg.concat_silence_pre, - test_cfg.concat_silence_post, - test_cfg.in_fs, - ) - # simply pad if single item - else: - concat_filename = os.path.join( - test_cfg.output_path, - os.path.splitext(os.path.basename(test_cfg.input_path))[0] - + "_padded.wav", - ) - audiofile.concatenatefiles( - test_cfg.items_list, - concat_filename, - test_cfg.concat_silence_pre, - test_cfg.concat_silence_post, - test_cfg.in_fs, - ) - test_cfg.items_list = [concat_filename] - - for condition, out_folder, tmp_folder in zip( - test_cfg.list_of_conditions, test_cfg.output_folders, test_cfg.tmp_folders - ): - proc_id = condition["id"] - logger.info(f" Condition: {proc_id} in {out_folder}") - - proc_chain = condition["proc_chain"] - - for item in test_cfg.items_list: - logger.info(f" Item: {item}") - if test_cfg.enable_multiprocessing: - results.append( - pool.apply_async( - _run_condition_and_item, - args=(out_folder, tmp_folder, item, proc_chain), - ) - ) - else: - _run_condition_and_item(out_folder, tmp_folder, item, proc_chain) - - if test_cfg.enable_multiprocessing: - pool.close() - pool.join() - for r in results: - r.get() - - # copy over JSON to main output directory - output_json = os.path.join(test_cfg.output_path, test_cfg.name + ".json") - with open(output_json, "w") as fp: - fp.write(test_cfg.json_out) - - # remove concatenated file - if ( - test_cfg.delete_tmp - and test_cfg.concatenate_input - and os.path.exists(concat_filename) - ): - os.remove(concat_filename) - - -if __name__ == "__main__": - # Parse command line - parser = argparse.ArgumentParser( - description="Generate test items. Refer to README.md for detailed usage instructions." - ) - parser.add_argument( - "-i", - "--infile", - required=True, - nargs="+", - help="Configuration file(s): FILE1.json FILE2.json ...", - ) - args = parser.parse_args() - - # Get all test configuration files to process - infile = args.infile - tests_list = [] - for infile in args.infile: - if os.path.isdir(infile): - tests_list.extend( - [ - os.path.join(infile, f) - for f in os.listdir(infile) - if f.endswith((".json")) - ] - ) - else: - tests_list.append(infile) - - # Read configuration file - for test in tests_list: - test_cfg = processing_configs.test_config(test) - - # context manager to create output folders and clean up temporary folders - delete_folders = test_cfg.tmp_folders if test_cfg.delete_tmp else [] - with utils.DirManager( - test_cfg.output_folders + test_cfg.tmp_folders, delete_folders - ): - - # Set up logging handlers - console_handler = logging.StreamHandler() - console_handler.setLevel(logging.INFO) - console_handler.setFormatter(logging.Formatter("%(message)s")) - - file_handler = logging.FileHandler( - os.path.join(test_cfg.output_path, LOGGER_MAIN_LOG_FILENAME), mode="w" - ) - file_handler.setLevel(logging.INFO) - file_handler.setFormatter( - logging.Formatter(LOGGER_FORMAT, datefmt=LOGGER_DATEFMT) - ) - - # Configure loggers - logging.basicConfig( - format=LOGGER_FORMAT, - datefmt=LOGGER_DATEFMT, - level=logging.INFO, - handlers=[console_handler, file_handler], - ) - - # Log some info - logger.info(f"===Generate test: {test}===") - logger.info(f"Test name: {test_cfg.name}") - logger.info(f"Input path: {test_cfg.input_path}") - logger.info(f"Output path: {test_cfg.output_path}") - - main(test_cfg) diff --git a/scripts/ivas-processing-scripts b/scripts/ivas-processing-scripts new file mode 160000 index 0000000000000000000000000000000000000000..5add879ec7da16feb0296c2e09d3092db1b01152 --- /dev/null +++ b/scripts/ivas-processing-scripts @@ -0,0 +1 @@ +Subproject commit 5add879ec7da16feb0296c2e09d3092db1b01152 diff --git a/scripts/pyaudio3dtools/__init__.py b/scripts/pyaudio3dtools/__init__.py deleted file mode 100644 index 33a5d39126901fd6a7f2b2d04168d09cf83ad151..0000000000000000000000000000000000000000 --- a/scripts/pyaudio3dtools/__init__.py +++ /dev/null @@ -1,47 +0,0 @@ -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -""" -pyaudio3dtools -==== - -Provides - Basic methods for handling 3D audio in different formats (channel-based, object-based, Ambisonics) - -Imports -------- -functions -class -""" -from . import ( - audioarray, - audiofile, - spatialaudioformat, -) diff --git a/scripts/pyaudio3dtools/audio3dtools.py b/scripts/pyaudio3dtools/audio3dtools.py deleted file mode 100755 index cfb7acb9f566d7c4a31b6607c120d2bc5571022e..0000000000000000000000000000000000000000 --- a/scripts/pyaudio3dtools/audio3dtools.py +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import argparse -import logging -import os - -from pyaudio3dtools import ( - audiofile, - spatialaudioformat, -) - -main_logger = logging.getLogger("__main__") -logger = main_logger.getChild(__name__) -logger.setLevel(logging.DEBUG) - - -def main(): - print( - f"These scripts have been deprecated! Please check out and use the latest version from https://forge.3gpp.org/rep/ivas-codec-pc/ivas-processing-scripts.git" - ) - - -if __name__ == "__main__": - main() diff --git a/scripts/pyaudio3dtools/audioarray.py b/scripts/pyaudio3dtools/audioarray.py deleted file mode 100644 index 740c40c3c6fda3cdc9f0112d3dfbfad9c6f5e1dc..0000000000000000000000000000000000000000 --- a/scripts/pyaudio3dtools/audioarray.py +++ /dev/null @@ -1,475 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import logging -import math -from typing import Callable, Iterable, Optional, Tuple - -import numpy as np -import multiprocessing as mp -import scipy.signal as sig - -main_logger = logging.getLogger("__main__") -logger = main_logger.getChild(__name__) -logger.setLevel(logging.DEBUG) - - -def convert( - x: np.ndarray, - out_nchans: Optional[int] = None, - in_fs: Optional[int] = None, - out_fs: Optional[int] = None, -) -> np.ndarray: - """Convert audio array, change nchannels and sampling rate - - Parameters - ---------- - x: numpy array - Input audio array - out_nchans: Optional[int] - Number of output channels, default out_nchans = in_nchans - in_fs: Optional[int] - Input sampling rate, default 48 kHz if required - out_fs: Optional[int] - Output sampling rate, default out_fs = in_fs - - Returns - ------- - y: numpy array - Ouput converted array - - """ - # Input config - if in_fs is None: - in_fs = 48000 - in_nchans = x.shape[1] - - # Output config - y = x - if out_fs is None: - out_fs = in_fs - if out_nchans is None: - out_nchans = in_nchans - - logger.debug(f"Input audio array: {x.shape[0]} by {x.shape[1]}") - - # Process - if in_nchans == out_nchans and in_fs == out_fs: - logger.debug("Convert file: nothing to be done") - else: - # adjust number of channels - if out_nchans < in_nchans: - y = y[:, 0:out_nchans] - elif out_nchans > in_nchans: - y = np.append(y, np.zeros([y.shape[0], out_nchans - in_nchans]), axis=1) - - # adjust sampling rate - y = resample(y, in_fs, out_fs) - - logger.debug(f"Output audio array: {y.shape[0]} by {y.shape[1]}") - - return y - - -def resample(x: np.ndarray, in_freq: int, out_freq: int) -> np.ndarray: - """Resample a multi-channel audio array - - Parameters - ---------- - x: numpy array - Input array - in_fs: int - Input sampling rate - out_fs: int - Output sampling rate - - Returns - ------- - y: - Output resampled numpy array - - """ - - if in_freq == out_freq or out_freq is None: - y = x - else: - # get gcd of original and deisred frequency - gcd = math.gcd(in_freq, out_freq) - - # calculate up-sampling factor - up_factor = int(out_freq / gcd) - - # calculate downsampling factor - down_factor = int(in_freq / gcd) - - # resample data using polyphase filtering across columns/channels - if x.ndim == 2: - y = sig.resample_poly(x[:, 0], up_factor, down_factor) - y = np.reshape(y, (y.shape[0], 1)) - for k in range(1, x.shape[1]): - a = sig.resample_poly(x[:, k], up_factor, down_factor) - a = np.reshape(a, (a.shape[0], 1)) - y = np.append(y, a, axis=1) - else: - y = sig.resample_poly(x, up_factor, down_factor) - - return y - - -def lpfilter(x: np.ndarray, fc: int, fs: int) -> np.ndarray: - """Low-pass filter a multi-channel audio array - - Parameters - ---------- - x: numpy array - Input array - fc: int - Cutoff frequency in Hz - out_fs: int - Sampling rate in Hz - - Returns - ------- - y: numpy array - Output low-pass filtered array - - """ - if (fc + 500) < (fs / 2.0): - # Design a Chebychev Type II filter, band_pass-band_stop = 500 Hz - N, Wn = sig.cheb2ord(fc / (fs / 2), (fc + 500) / (fs / 2), 3, 60) - b, a = sig.cheby2(N, 60, Wn, "low") - - # Apply the Butterworth filter for each channels, across time axis - # y = sig.lfilter(b, a, axis=0) # non zero-phase filter - y = sig.filtfilt(b, a, x, axis=0) # zero-phae filer, batch processing - else: - y = x - - return y - - -def cut(x: np.ndarray, limits: Tuple[int, int]) -> np.ndarray: - """Cut an audio array - - Parameters - ---------- - x: numpy array - Input array - limits: Tuple[int, int] - first and last samples to extract - - Returns - ------- - y: numpy array - Output cut array - """ - - in_samples, in_channels = x.shape - first_sample = limits[0] - last_sample = limits[1] - - if first_sample == 0 and (last_sample == -1 or last_sample == in_samples): - y = x - else: - if last_sample == -1: - last_sample = in_samples - - signal_start = first_sample - signal_end = last_sample - insert_start = 0 - insert_end = last_sample - first_sample - total_samples = last_sample - first_sample - if first_sample < 0: - samples_to_pad_begin = -first_sample - insert_start = samples_to_pad_begin - insert_end += samples_to_pad_begin - if last_sample > in_samples: - signal_end = in_samples - insert_end = insert_end - last_sample + in_samples - y = np.zeros([total_samples, in_channels], dtype=x.dtype) - y[insert_start:insert_end, :] = x[signal_start:signal_end, :] - - return y - - -def compare(ref: np.ndarray, test: np.ndarray, fs: int, per_frame: bool = True) -> dict: - """Compare two audio arrays - - Parameters - ---------- - ref: numpy array - Input reference array - test: numpy array - Input test array - fs: int - Input sampling rate in Hz - - Returns - ------- - result: dict - Comparison results - """ - framesize = fs // 50 - diff = abs(test - ref) - max_diff = int(diff.max()) - result = { - "bitexact": True, - "max_abs_diff": 0, - "max_abs_diff_pos_sample": 0, - "max_abs_diff_pos_channel": 0, - "nsamples_diff": 0, - "nsamples_diff_percentage": 0.0, - "first_diff_pos_sample": -1, - "first_diff_pos_channel": -1, - "first_diff_pos_frame": -1, - } - if per_frame: - result["max_abs_diff_pos_frame"] = 0 - result["nframes_diff"] = 0 - result["nframes_diff_percentage"] = 0.0 - - if max_diff != 0: - if diff.ndim == 1: - nsamples_total = diff.shape - nchannels = 1 - else: - nsamples_total, nchannels = diff.shape - max_diff_pos = np.nonzero(diff == max_diff) - max_diff_pos = [ - max_diff_pos[0][0], - max_diff_pos[0][0] // framesize, - max_diff_pos[1][0], - ] - - first_diff_pos = np.nonzero(diff) - first_diff_pos = [ - first_diff_pos[0][0], - first_diff_pos[0][0] // framesize, - first_diff_pos[1][0], - ] - - nsamples_diff = np.nonzero(diff)[0].size - nsamples_diff_percentage = nsamples_diff / (nsamples_total * nchannels) * 100.0 - nframes = nsamples_total // framesize - nframes_diff = 0 - - result = { - "bitexact": False, - "max_abs_diff": max_diff, - "max_abs_diff_pos_sample": max_diff_pos[0], - "max_abs_diff_pos_channel": max_diff_pos[2], - "nsamples_diff": nsamples_diff, - "nsamples_diff_percentage": nsamples_diff_percentage, - "first_diff_pos_sample": first_diff_pos[0], - "first_diff_pos_channel": first_diff_pos[2], - "first_diff_pos_frame": first_diff_pos[1], - } - - if per_frame: - for fr in range(nframes): - diff_fr = diff[fr * framesize : ((fr + 1) * framesize), :] - nframes_diff += 1 if diff_fr.nonzero()[0].size > 0 else 0 - nframes_diff_percentage = nframes_diff / nframes * 100.0 - result["max_abs_diff_pos_frame"] = max_diff_pos[1] - result["nframes_diff"] = nframes_diff - result["nframes_diff_percentage"] = nframes_diff_percentage - - return result - - -def getdelay(x: np.ndarray, y: np.ndarray) -> int: - """Get the delay between two audio signals - - Parameters - ---------- - x: numpy array - Input reference array - y: numpy array - Input test array - - Returns - ------- - result: int - delay of y in samples with respect to x (median of individual channel delays) - """ - if x.ndim == 1: - n_samples_x = x.shape - n_chan_x = 1 - else: - n_samples_x, n_chan_x = x.shape - if y.ndim == 1: - n_samples_y = y.shape - n_chan_y = 1 - else: - n_samples_y, n_chan_y = y.shape - if n_chan_x != n_chan_y: - raise ValueError - lags = np.arange(-n_samples_x + 1, n_samples_y) - lag = np.zeros([n_chan_x, 1], dtype=int) - for chan in range(n_chan_x): - correlation = sig.correlate(y[:, chan], x[:, chan], mode="full") - lag[chan] = lags[np.argmax(correlation)] - return int(np.median(lag)) - - -def limiter(x: np.ndarray, fs: int): - """Apply limiting to an audio signal - - Parameters - ---------- - x: numpy array - Input reference array - fs: int - Input sampling frequency - - Returns - ------- - None - """ - limiter_threshold = 32729 # -0.01dB FS - limiter_attack_seconds = 0.005 - attack_constant = 0.01 ** (1.0 / (limiter_attack_seconds * fs)) - release_heuristics_mem = 0.0 - gain = 1.0 - strong_saturation_cnt = 0 - - if x.ndim == 1: - n_samples_x = x.shape - n_chan_x = 1 - else: - n_samples_x, n_chan_x = x.shape - # framing - framesize = fs // 50 - nframes = n_samples_x // framesize - for fr in range(nframes): - apply_limiting = True - fr_sig = x[fr * framesize : ((fr + 1) * framesize), :] - sig_max = np.absolute(fr_sig).max() - release_heuristic = release_heuristics_mem - if sig_max > limiter_threshold: - frame_gain = limiter_threshold / sig_max - release_heuristic = min(1.0, release_heuristic + (4.0 * framesize / fs)) - else: - release_heuristic = max(0.0, release_heuristic - (framesize / fs)) - if gain >= 1.0 - 1e-10: - apply_limiting = False - - frame_gain = 1.0 - - if sig_max > 3 * limiter_threshold and strong_saturation_cnt > 0: - apply_strong_limiting = True - elif sig_max > 10 * limiter_threshold: - strong_saturation_cnt += 20 - apply_strong_limiting = True - else: - strong_saturation_cnt -= 1 - if strong_saturation_cnt < 0: - strong_saturation_cnt = 0 - apply_strong_limiting = False - - if apply_strong_limiting is True: - if frame_gain < 0.3: - frame_gain /= 3.0 - else: - apply_strong_limiting = False - - if frame_gain < 0.1 and apply_strong_limiting is False: - frame_gain = 0.1 - - if apply_limiting is True: - if frame_gain < gain: - fac = attack_constant ** (np.arange(1, framesize + 1, dtype=np.float32)) - else: - release_constant = 0.01 ** ( - 1.0 / (0.005 * (200.0**release_heuristic) * fs) - ) - fac = release_constant ** ( - np.arange(1, framesize + 1, dtype=np.float32) - ) - - fr_gain = np.tile(gain * fac + frame_gain * (1.0 - fac), (n_chan_x, 1)).T - fr_sig *= fr_gain - gain = fr_gain[-1, 0] - else: - gain = 1.0 - - release_heuristics_mem = release_heuristic - # hard limiting for everything that still sticks out - idx_max = np.where(fr_sig > 32767) - fr_sig[idx_max] = 32767 - idx_min = np.where(fr_sig < -32768) - fr_sig[idx_min] = -32768 - - -def get_framewise(x: np.ndarray, chunk_size: int, zero_pad=False) -> np.ndarray: - """Generator to yield a signal frame by frame - If array size is not a multiple of chunk_size, last frame contains the remainder - - Parameters - ---------- - x: numpy array - Input reference array - chunk_size: int - Size of frames to yield - zero_pad: bool - Whether to zero pad the last chunk if there are not enough samples - - Yields - ------- - frame : np.ndarray - One frame of the input audio signal - """ - n_frames = x.shape[0] // chunk_size - for i in range(n_frames): - yield x[i * chunk_size : (i + 1) * chunk_size, :] - if x.shape[0] % chunk_size: - last_chunk = x[n_frames * chunk_size :, :] - if zero_pad: - yield np.pad( - last_chunk, [[0, chunk_size - (x.shape[0] % chunk_size)], [0, 0]] - ) - else: - yield last_chunk - - -def process_async(files: Iterable, func: Callable, **kwargs): - """Applies a function asynchronously to an array of audio files/filenames using a multiprocessing pool""" - - p = mp.pool(mp.cpu_count()) - results = [] - for f in files: - results.append(p.apply_async(func, args=(f, kwargs))) - p.close() - p.join() - for r in results: - r.get() - return results diff --git a/scripts/pyaudio3dtools/audiofile.py b/scripts/pyaudio3dtools/audiofile.py deleted file mode 100644 index 5b6ffcdced2c3dadee3224d98d960123df002ec3..0000000000000000000000000000000000000000 --- a/scripts/pyaudio3dtools/audiofile.py +++ /dev/null @@ -1,817 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import os -import platform -import shutil -import struct -import subprocess as sp -import warnings -from importlib import import_module -from tempfile import TemporaryDirectory -from typing import Optional, Tuple - -import numpy as np -import scipy.io.wavfile as wav - -from pyaudio3dtools import audioarray, spatialaudioformat - - -def readfile( - filename: str, nchannels: int = 1, fs: int = 48000, outdtype="float" -) -> Tuple[np.ndarray, int]: - """Read audio file (.pcm or .wav) - - Parameters - ---------- - filename: str - Input file path - nchannels: Optional[int] - Number of input channels, required for .pcm otherwise default = 1 - fs: Optional[int] - Input sampling rate, required for .pcm input file, otherwise default = 48000 (Hz) - outdtype: Optional[int] - Data type of output array, python builtin or np.dtype - - Returns - ------- - x: np array - audio signal array - fs: int - signal sampling frequency - - """ - _, file_extension = os.path.splitext(os.path.basename(filename)) - - if file_extension == ".wav": - fs, data = wav.read(filename) - if data.dtype == np.int32: - data = np.interp( - data, - (np.iinfo(np.int32).min, np.iinfo(np.int32).max), - (np.iinfo(np.int16).min, np.iinfo(np.int16).max), - ) - elif data.dtype == np.float32: - data = np.interp( - data, - (-1, 1), - (np.iinfo(np.int16).min, np.iinfo(np.int16).max), - ) - x = np.array(data, dtype=outdtype) - file_len = x.shape[0] - if x.ndim == 1: - # force to be a mtx - x = np.reshape(x, (file_len, 1)) - elif file_extension == ".pcm" or file_extension == ".raw": - x = np.fromfile(filename, dtype=np.int16).astype(outdtype) - signal_len = len(x) // nchannels - x = x.reshape(signal_len, nchannels) - else: - raise ValueError("Wrong input format. Use wav or pcm") - - return x, fs - - -def writefile(filename: str, x: np.ndarray, fs: int = 48000) -> None: - """Write audio file (.pcm or .wav) - - Parameters - ---------- - filename: str - Output file path (.pcm or .wav) - x: np array - Numpy 2D array of dimension: number of samples x number of channels - fs: Optional[int] - Output sampling rate, required for .pcm input file, otherwise default = 48000 (Hz) - - Returns - ------- - None - - """ - _, file_extension = os.path.splitext(os.path.basename(filename)) - - clipped_samples = np.sum( - np.logical_or(x < np.iinfo(np.int16).min, x > np.iinfo(np.int16).max) - ) - if clipped_samples > 0: - warnings.warn(f" Warning: {clipped_samples} samples clipped") - x = np.clip(x, np.iinfo(np.int16).min, np.iinfo(np.int16).max) - - if file_extension == ".wav": - x = x.astype(np.int16) - wav.write(filename, fs, x) - elif file_extension == ".pcm" or file_extension == ".raw": - x = x.astype("int16").reshape(-1, 1) - x.tofile(filename) - else: - raise ValueError("Wrong input format. Use wav or pcm") - - -def convertfile( - in_file: str, - out_file: str, - in_nchans: Optional[int] = None, - out_nchans: Optional[int] = None, - in_fs: Optional[int] = None, - out_fs: Optional[int] = None, - out_len_samples: Optional[int] = None, - verbose: bool = False, -) -> None: - """Convert audio file, can convert wav from/to pcm, change nchannels and sampling rate - - Parameters - ---------- - in_file: str - Input file path - out_file: str - Output file path - in_nchans: Optional[int] - Number of input channels required for .pcm inpout file - out_nchans: Optional[int] - Number of output channels, default out_nchans = in_nchans - in_fs: Optional[int] - Input sampling rate, required for .pcm input file - out_fs: Optional[int] - Output sampling rate, default out_fs = in_fs - out_len_samples: Optional[int] - Cut file to this length in samples. - Adds zeros at the end if bigger than file length. - - Returns - ------- - None - - """ - # Read input file - if in_fs is None: - in_fs = 48000 - if in_nchans is None: - in_nchans = 1 - x, in_fs = readfile(in_file, nchannels=in_nchans, fs=in_fs) - in_nchans = x.shape[1] - in_len_samples = x.shape[0] - - # Configure output file - y = x - if out_fs is None: - out_fs = in_fs - if out_nchans is None: - out_nchans = in_nchans - - if verbose: - print(f"Input file: {in_file}, sampling rate {str(in_fs)} size {str(x.shape)}") - - # Process - if ( - in_file == out_file - and in_nchans == out_nchans - and in_fs == out_fs - and in_len_samples == out_len_samples - ): - if verbose: - print("Convert file: nothing to be done") - else: - y = audioarray.convert(x, out_nchans=out_nchans, in_fs=in_fs, out_fs=out_fs) - - if out_len_samples is None: - out_len_samples = y.shape[0] - y = audioarray.cut(y, (0, out_len_samples)) - - # write/convert wav format - writefile(out_file, y, fs=out_fs) - if verbose: - print( - f"Written output file: {out_file}, sampling rate {str(out_fs)} size {str(y.shape)}" - ) - - -def concatenatefiles( - in_filenames: list, - out_file: str, - silence_pre: int, - silence_post: int, - in_fs: Optional[int] = 48000, - out_fs: Optional[int] = None, -) -> None: - """Horizontally concatenates audio files into one long file - - Parameters - __________ - in_filenames: list - Input list of filenmames (.pcm or .wav) - out_file: str - Output multi-channel audio file name (.pcm or .wav) - in_fs: Optional[int] = 48000 - Input sampling rate, default 48000 Hz - out_fs: Optional[int] = None - Output sampling rate, default out_fs=in_fs - fs: Optional[int] - Output sampling rate, required for .pcm input file, otherwise default = 48000 (Hz) - - Returns - ------- - None - """ - y = None - - if out_fs is None: - out_fs = in_fs - - # Create silence padding arrays - pad_pre = int(silence_pre * in_fs / 1000) - pad_post = int(silence_post * in_fs / 1000) - - # Read input files - for in_file in in_filenames: - x, in_fs = readfile(in_file, fs=in_fs) - - # pad with silence - pre = np.zeros([pad_pre, x.shape[1]]) - post = np.zeros([pad_post, x.shape[1]]) - x = np.concatenate([pre, x, post]) - - if y is None: - y = x - else: - y = np.concatenate([y, x]) - - y = audioarray.resample(y, in_fs, out_fs) - - writefile(out_file, y, fs=out_fs) - - -def combinefiles( - in_filenames: list, - out_file: str, - out_nchans: Optional[int] = None, - in_fs: Optional[int] = 48000, - out_fs: Optional[int] = None, - verbose: bool = False, -) -> None: - """Combines audio files into one multi-channel file - - Parameters - ---------- - in_filenames: list - Input list of filenmames (.pcm or .wav) - out_file: str - Output multi-channel audio file name (.pcm or .wav) - in_fs: Optional[int] = 48000 - Input sampling rate, default 48000 Hz - out_fs: Optional[int] = None - Output sampling rate, default out_fs=in_fs - fs: Optional[int] - Output sampling rate, required for .pcm input file, otherwise default = 48000 (Hz) - - Returns - ------- - None - - """ - - y = None - - if out_fs is None: - out_fs = in_fs - - # Read input files - for in_file in in_filenames: - # assign correct channel - x, in_fs = readfile(in_file, fs=in_fs) - if y is None: - y = x - else: - if x.shape[0] > y.shape[0]: - x = x[: y.shape[0], :] - elif y.shape[0] > x.shape[0]: - y = y[: x.shape[0], :] - y = np.column_stack([y, x]) - - y = audioarray.resample(y, in_fs, out_fs) - - writefile(out_file, y, fs=out_fs) - - -def splitfiles( - in_file: str, - out_filenames: list, - in_nchans: int, - in_fs: Optional[int] = 48000, - out_fs: Optional[int] = None, - verbose: bool = False, -) -> None: - """Split multi-channel audio files into individual mono files - - Parameters - ---------- - in_file: str - Input file name (.pcm or .wav) - out_filenames: list - List of output file names (.pcm or .wav) - in_fs: Optional[int] = 48000 - Input sampling rate, default 48000 Hz - out_fs: Optional[int] = None - Output sampling rate, default out_fs=in_fs - - Returns - ------- - None - - """ - # validation - if in_nchans is None: - raise ValueError("Number of channels to split must be specified!") - if in_nchans != len(out_filenames): - print( - "Split: Mismatch between number of channels and output filenames length. Truncating output filenames list." - ) - out_filenames = out_filenames[:in_nchans] - - x, in_fs = readfile(in_file, nchannels=in_nchans, fs=in_fs) - - # Write output files - for idx, out_file in enumerate(out_filenames): - # extract correct channel - y = x[:, idx] - - if out_fs is None: - out_fs = in_fs - - y = audioarray.resample(y, in_fs, out_fs) - - writefile(out_file, y, fs=out_fs) - - -def mono( - in_file: str, - out_file: str, - in_nchans: Optional[int] = 2, - in_fs: Optional[int] = 48000, - out_fs: Optional[int] = None, - verbose: bool = False, -) -> None: - """Creates a passive mono downmix for a multi-channel audio file - - Parameters - ---------- - in_file: str - Input file name (.pcm or .wav) - out_file: str - Output mono downmix audio file name (.pcm or .wav) - in_nchans: Optional[int] - Number of input channels, required for .pcm otherwise default = 2 - in_fs: Optional[int] = 48000 - Input sampling rate, required for .pcm, otherwise default = 48000 Hz - out_fs: Optional[int] = in_fs - Output sampling rate, default = in_fs - - Returns - ------- - None - - """ - - # read input - x, in_fs = readfile(in_file, nchannels=in_nchans, fs=in_fs) - - if out_fs is None: - out_fs = in_fs - - # do pasive downmix - m = np.sum(x, 1) - - if out_fs != in_fs: - m = audioarray.resample(m, in_fs, out_fs) - - # write output - writefile(out_file, m, fs=out_fs) - - -def mutefile( - in_file: str, - out_file: str, - in_fs: int = 48000, - in_nchans: Optional[int] = 1, - mute_chans: Optional[list] = None, -) -> None: - """Mute audio channels in file - - Parameters - ---------- - in_file: str - Input multi-channel audio filenmame (.pcm or .wav) - out_file: str - Output multi-channel audio file name (.pcm or .wav) - in_nchans: Optional[int])1 - Number of channels, default = 1, or in *.wav header - mute_chans: Optional[list] = None - Indices of channel to mute, default=None=all - - Returns - ------- - None - - """ - x, in_fs = readfile(in_file, fs=in_fs, nchannels=in_nchans) - - if mute_chans is not None: - mute_chans = np.array(mute_chans) - if len(x.shape) > 1: - x[:, mute_chans[mute_chans < x.shape[1]]] = 0 - else: - x[:, mute_chans[mute_chans < 1]] = 0 - else: - x = np.zeros(x.shape) - - writefile(out_file, x, fs=in_fs) - - -def delayfile( - in_file: str, - out_file: str, - in_fs: int = 48000, - in_nchans: Optional[int] = 1, - delay: float = 0, -) -> None: - """Delay an audio file by a specified duration (ms) - - Parameters - ---------- - in_file: str - Input multi-channel audio filename (.pcm or .wav) - out_file: str - Output multi-channel audio file name (.pcm or .wav) - in_nchans: Optional[int])1 - Number of channels, default = 1, or in *.wav header - delay: float = 0 - Delay in milliseconds (negative values advance file) - - Returns - ------- - None - - """ - delay = int(delay * in_fs / 1000) - delay_abs = np.abs(delay) - - x, in_fs = readfile(in_file, fs=in_fs, nchannels=in_nchans) - - # shift array - x = np.roll(x, delay, axis=0) - - # zero shifted out samples - if delay == 0: - pass - elif delay < 0: - x[-delay_abs:, :] = 0 - elif delay > 0: - x[:delay_abs, :] = 0 - - writefile(out_file, x, fs=in_fs) - - -def loudnessinfo( - in_sig: np.ndarray, - in_fs: Optional[int] = 48000, - in_format: Optional[str] = "MONO", - output_loudness: Optional[int] = -26, - loudness_tool: Optional[str] = "bs1770demo", - use_rms: Optional[bool] = False, -) -> Tuple[float, float]: - """Obtain loudness info about a signal - - Parameters - ---------- - in_sig: np.ndarray - Input audio signal - in_fs: Optional[int] - Input sampling rate - in_format: Optional[str] - Input spatial audio format - output_loudness: Optional[int] - Loudness level in LKFS/dBov - loudness_tool: Optional[str] - Loudness tool to use. Must be in $PATH. - Supported tools: - ITU-R BS.1770-4 / "bs1770demo" (default) - ITU-T P.56 / "sv56demo" - - - Returns - ------- - measured_loudness, scale_factor - - """ - - if platform.system() == "Windows": - null_file = "nul" - else: - null_file = "/dev/null" - - if shutil.which(loudness_tool) is None: - raise FileNotFoundError(f"The binary {loudness_tool} was not found in path!") - - in_spfmt = spatialaudioformat.Format(in_format=in_format) - - if not (in_spfmt.isheadphones or in_spfmt.isloudspeaker or in_spfmt.ambi_order > 1): - raise NotImplementedError( - f"{in_spfmt.name} is currently unsupported with {loudness_tool}." - ) - - if in_sig.shape[1] != in_spfmt.nchannels: - raise ValueError( - f"Mismatch in number of channels in signal of shape {in_sig.shape} of spatial audio format {in_format}!" - ) - - with TemporaryDirectory() as tmp_dir: - tmp_file = os.path.join(tmp_dir, "tmp_loudness.pcm") - - if "bs1770demo" in loudness_tool: - """ - ITU-R BS-1770 - """ - if in_fs != 48000: - raise ValueError(f"{loudness_tool} only supports 48kHz sampling rate!") - - cmd = [ - loudness_tool, - "-nchan", - str(in_spfmt.nchannels), # input nchan - "-lev", - str(output_loudness), # level - "-conf", - "", # config string - tmp_file, - null_file, - ] - if in_spfmt.ambi_order > 0 or in_spfmt.name == "MONO": - cmd[2] = "1" # -nchan - cmd[6] = "0" # -conf - if in_spfmt.isheadphones: - cmd[2] = "2" # -nchan - cmd[6] = "00" # -conf - elif in_spfmt.isloudspeaker: - # if loudspeaker position fulfills the criteria, set the config string to 1 for that index - conf_str = [ - str(int(abs(e) < 30 and (abs(a) >= 60 and abs(a) <= 120))) - for a, e in zip(in_spfmt.ls_azi, in_spfmt.ls_ele) - ] - for lfe in in_spfmt.lfe_index: - conf_str[lfe] = "L" - - cmd[6] = "".join(conf_str) - - elif "sv56demo" in loudness_tool: - """ - ITU-T P.56 - """ - if not (in_spfmt.ambi_order > 0 or in_spfmt.name == "MONO"): - raise ValueError( - f"{in_format} is currently unsupported with {loudness_tool}" - ) - - cmd = [ - loudness_tool, - "-lev", - str(output_loudness), - "-sf", - str(in_fs), - "-blk", - str(int(in_fs * 0.02)), - "-q", - ] - - if use_rms: - cmd.extend(["-rms"]) - - cmd.extend( - [ - tmp_file, - null_file, - ] - ) - - # write temporary file - if in_spfmt.ambi_order > 0 or in_spfmt.name == "MONO": - writefile(tmp_file, in_sig[:, 0], in_fs) - elif in_spfmt.isheadphones: - writefile(tmp_file, in_sig[:, :2], in_fs) - elif in_spfmt.isloudspeaker: - writefile(tmp_file, in_sig, in_fs) - - # run command - try: - result = sp.run(cmd, check=True, capture_output=True, text=True) - except sp.CalledProcessError as e: - raise SystemError( - f"Command returned non-zero exit status ({e.returncode}): {' '.join(e.cmd)}\n{e.stderr}\n{e.stdout}" - ) - - # parse output - if "bs1770demo" in loudness_tool: - measured_loudness = float(result.stdout.splitlines()[3].split(":")[1]) - scale_factor = float(result.stdout.splitlines()[-3].split(":")[1]) - elif "sv56demo" in loudness_tool: - try: - measured_loudness = float( - result.stdout.splitlines()[14] - .replace("Active speech level: ..........", "") - .replace("[dBov]", "") - .strip() - ) - scale_factor = float( - result.stdout.splitlines()[6] - .replace("Norm factor desired is: .......", "") - .replace("[times]", "") - .strip() - ) - except Exception: - raise ValueError(f"Error parsing sv56demo output!\n{result.stdout}") - else: - raise ValueError(f"Unsupported tool {loudness_tool}") - - return measured_loudness, scale_factor - - -def print_plot_play(x: np.ndarray, fs: int, text: Optional[str] = "") -> None: - """1. Prints information about an audio signal, 2. plots the waveform, and 3. Creates player - - Parameters - ---------- - x: np array - Input signal - fs: int - Input sampling rate - text: Optional[str] = '' - text to print - Returns - ------- - None - - """ - - plt = import_module("matplotlib.pyplot") - ipd = import_module("IPython.display") - - print("%s fs = %d, x.shape = %s, x.dtype = %s" % (text, fs, x.shape, x.dtype)) - plt.figure(figsize=(8, 2)) - plt.plot(x, color="gray") - plt.xlim([0, x.shape[0]]) - plt.xlabel("Time (samples)") - plt.ylabel("Amplitude") - plt.tight_layout() - plt.show() - ipd.display(ipd.Audio(data=x, rate=fs)) - - -def get_wav_file_info(filename: str) -> dict: - """ - Get the format information from a WAV file. - Return a dictionary with the format information - Parameters - ---------- - filename : string or open file handle - Input WAV file. - - Returns - ------- - Dictionary - - """ - - fid = open(filename, "rb") - - try: - riff = fid.read(4) - - if riff == b"RIFF": - binary_format = "<" - elif riff == b"RIFX": - binary_format = ">" - else: - raise ValueError("No RIFF!") - - wav_size = struct.unpack(f"{binary_format}I", fid.read(4))[0] - - wav_identifier = fid.read(4) - if wav_identifier != b"WAVE": - raise ValueError("No WAVE!") - - fmt_chunk_id = fid.read(4) - - if fmt_chunk_id == b"fmt ": - fmt_size = struct.unpack(f"{binary_format}I", fid.read(4))[0] - wav_format = struct.unpack(f"{binary_format}H", fid.read(2))[0] - channels = struct.unpack(f"{binary_format}H", fid.read(2))[0] - fs = struct.unpack(f"{binary_format}I", fid.read(4))[0] - bytes_per_second = struct.unpack(f"{binary_format}I", fid.read(4))[0] - block_align = struct.unpack(f"{binary_format}H", fid.read(2))[0] - bit_depth = struct.unpack(f"{binary_format}H", fid.read(2))[0] - rem_bytes = fmt_size - 16 - ext_param_size = 0 - ext_param = None - if rem_bytes: - ext_param_size = struct.unpack(f"{binary_format}H", fid.read(2))[0] - - if ext_param_size: - ext_param = fid.read(ext_param_size) - else: - raise ValueError("No or corrupt fmt chunk!") - - finally: - fid.close() - - return { - "size": wav_size, - "format_tag": wav_format, - "channels": channels, - "fs": fs, - "bytes_per_second": bytes_per_second, - "block_align": block_align, - "bit_depth": bit_depth, - "ext_param_size": ext_param_size, - "ext_param": ext_param, - } - - -if __name__ == "__main__": - import argparse - - parser = argparse.ArgumentParser( - description="Tool for basic operations on audio files" - ) - subparsers = parser.add_subparsers() - - def pre_trim_wrapper(pre_trim_args): - if pre_trim_args.input_file.endswith(".wav"): - input_file_properties = get_wav_file_info(pre_trim_args.input_file) - else: - print("Delay currently only supported with WAV file input") - exit(-1) - - x, _ = readfile( - pre_trim_args.input_file, - fs=input_file_properties["fs"], - nchannels=input_file_properties["channels"], - ) - trim = int(pre_trim_args.amount_in_ms * input_file_properties["fs"] / 1000) - x = x[trim:] - writefile(pre_trim_args.output_file, x, fs=input_file_properties["fs"]) - - parser_delay = subparsers.add_parser( - "pre-trim", help="Trim a given amount of content from the beginning of the file" - ) - parser_delay.add_argument( - "amount_in_ms", type=float, help="Trim amount milliseconds." - ) - parser_delay.add_argument("input_file") - parser_delay.add_argument("output_file") - parser_delay.set_defaults(func=pre_trim_wrapper) - - def convert_wrapper(convert_args): - if not convert_args.input_file.endswith(".wav"): - print("Convert currently only supported with WAV file input") - exit(-1) - - convertfile(convert_args.input_file, convert_args.output_file) - - parser_convert = subparsers.add_parser( - "convert", - help="Convert file format (output file extension determines output format)", - ) - parser_convert.add_argument("input_file") - parser_convert.add_argument("output_file") - parser_convert.set_defaults(func=convert_wrapper) - - args = parser.parse_args() - args.func(args) diff --git a/scripts/pyaudio3dtools/constants.py b/scripts/pyaudio3dtools/constants.py deleted file mode 100644 index 92fd5a709e2ec7db67b8bdf7afbeff7cd2ebd56f..0000000000000000000000000000000000000000 --- a/scripts/pyaudio3dtools/constants.py +++ /dev/null @@ -1,392 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022 Baseline Development Group with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies OY, Orange, - Panasonic Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The Baseline Development Group consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies OY, Orange, - Panasonic Corporation, Qualcomm Technologies, Inc., and VoiceAge Corporation retain full ownership - rights in their respective contributions in the software. No license of any kind, including but not - limited to patent license, of any foregoing parties is hereby granted by implication, estoppel or - otherwise. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and/or fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import numpy as np - -IVAS_FRAME_LEN_MS = 20 - -IVAS_CICPX_TO_MONO = np.array( - [ - [ - 1, - 1, - 1, - 1, - 0.79999995, - 0.79999995, - 0.79999995, - 0.79999995, - 0.849999964, - 0.849999964, - 0.849999964, - 0.849999964, - ] - ] -).T - -IVAS_CICPX_TO_STEREO = np.array( - [ - [1, 0], - [0, 1], - [np.sqrt(0.5), np.sqrt(0.5)], - [np.sqrt(0.5), np.sqrt(0.5)], - [0.79999995, 0], - [0, 0.79999995], - [0.79999995, 0], - [0, 0.79999995], - [0.849999964, 0], - [0, 0.849999964], - [0.849999964, 0], - [0, 0.849999964], - ] -) - -# downmix matrices -IVAS_CICP12_TO_6 = np.zeros(8 * 6) -IVAS_CICP12_TO_6[[0, 7, 14, 21, 28, 35, 40, 47]] = 1 -IVAS_CICP12_TO_6 = IVAS_CICP12_TO_6.reshape(8, 6) - -IVAS_CICP14_TO_6 = np.zeros(8 * 6) -IVAS_CICP14_TO_6[[0, 7, 14, 21, 28, 35]] = 1 -IVAS_CICP14_TO_6[[36, 43]] = 0.849999964 -IVAS_CICP14_TO_6 = IVAS_CICP14_TO_6.reshape(8, 6) - -IVAS_CICP16_TO_6 = np.zeros(10 * 6) -IVAS_CICP16_TO_6[[0, 7, 14, 21, 28, 35]] = 1 -IVAS_CICP16_TO_6[[36, 43, 52, 59]] = 0.849999964 -IVAS_CICP16_TO_6 = IVAS_CICP16_TO_6.reshape(10, 6) - -IVAS_CICP16_TO_12 = np.zeros(10 * 8) -IVAS_CICP16_TO_12[[0, 9, 18, 27, 36, 45]] = 1 -IVAS_CICP16_TO_12[[48, 57, 68, 77]] = 0.849999964 -IVAS_CICP16_TO_12 = IVAS_CICP16_TO_12.reshape(10, 8) - -IVAS_CICP16_TO_14 = np.zeros(10 * 8) -IVAS_CICP16_TO_14[[0, 9, 18, 27, 36, 45, 54, 63]] = 1 -IVAS_CICP16_TO_14[[68, 77]] = 0.849999964 -IVAS_CICP16_TO_14 = IVAS_CICP16_TO_14.reshape(10, 8) - -IVAS_CICP19_TO_6 = np.zeros(12 * 6) -IVAS_CICP19_TO_6[[0, 7, 14, 21, 28, 35]] = 1 -IVAS_CICP19_TO_6[[36, 43]] = 0.367322683 -IVAS_CICP19_TO_6[[48, 55, 64, 71]] = 0.849999964 -IVAS_CICP19_TO_6[[40, 47]] = 0.930093586 -IVAS_CICP19_TO_6 = IVAS_CICP19_TO_6.reshape(12, 6) - -IVAS_CICP19_TO_12 = np.zeros(12 * 8) -IVAS_CICP19_TO_12[[0, 9, 18, 27, 38, 47]] = 1 -IVAS_CICP19_TO_12[[48, 57]] = 0.367322683 -IVAS_CICP19_TO_12[[64, 73, 84, 93]] = 0.849999964 -IVAS_CICP19_TO_12[[52, 61]] = 0.930093586 -IVAS_CICP19_TO_12 = IVAS_CICP19_TO_12.reshape(12, 8) - -IVAS_CICP19_TO_14 = np.zeros(12 * 8) -IVAS_CICP19_TO_14[[0, 9, 18, 27, 36, 45, 70, 79]] = 1 -IVAS_CICP19_TO_14[[48, 57]] = 0.367322683 -IVAS_CICP19_TO_14[[84, 93]] = 0.849999964 -IVAS_CICP19_TO_14[[52, 61]] = 0.930093586 -IVAS_CICP19_TO_14 = IVAS_CICP19_TO_14.reshape(12, 8) - -IVAS_CICP19_TO_16 = np.zeros(12 * 10) -IVAS_CICP19_TO_16[[0, 11, 22, 33, 44, 55, 86, 97, 108, 119]] = 1 -IVAS_CICP19_TO_16[[60, 71]] = 0.367322683 -IVAS_CICP19_TO_16[[64, 75]] = 0.930093586 -IVAS_CICP19_TO_16 = IVAS_CICP19_TO_16.reshape(12, 10) - -# upmix matrices -IVAS_MONO_TO_CICPX = np.zeros([1, 12]) -IVAS_MONO_TO_CICPX[0, 2] = 1 - -IVAS_STEREO_TO_CICPX = np.zeros([2, 12]) -IVAS_STEREO_TO_CICPX[0, 0] = 1 -IVAS_STEREO_TO_CICPX[1, 1] = 1 - -IVAS_CICP12_TO_14 = np.zeros(8 * 8) -IVAS_CICP12_TO_14[[0, 9, 18, 27, 36, 45, 52, 61]] = 1 -IVAS_CICP12_TO_14 = IVAS_CICP12_TO_14.reshape(8, 8) - -IVAS_CICP12_TO_16 = np.zeros(8 * 10) -IVAS_CICP12_TO_16[[0, 11, 22, 33, 44, 55, 64, 75]] = 1 -IVAS_CICP12_TO_16 = IVAS_CICP12_TO_16.reshape(8, 10) - -IVAS_CICP12_TO_19 = np.zeros(8 * 12) -IVAS_CICP12_TO_19[[0, 13, 26, 39, 54, 67, 76, 89]] = 1 -IVAS_CICP12_TO_19 = IVAS_CICP12_TO_19.reshape(8, 12) - -IVAS_CICP14_TO_19 = np.zeros(8 * 12) -IVAS_CICP14_TO_19[[0, 13, 26, 39, 52, 65, 80, 93]] = 1 -IVAS_CICP14_TO_19 = IVAS_CICP14_TO_19.reshape(8, 12) - -IVAS_CICP16_TO_19 = np.zeros(10 * 12) -IVAS_CICP16_TO_19[[0, 13, 26, 39, 52, 65, 80, 93, 106, 119]] = 1 -IVAS_CICP16_TO_19 = IVAS_CICP16_TO_19.reshape(10, 12) - -# mapping dict -IVAS_MC_CONVERSION = { - "MONO": { - # upmix - "5_1": IVAS_MONO_TO_CICPX[:, :6], - "7_1": IVAS_MONO_TO_CICPX[:, :8], - "5_1_2": IVAS_MONO_TO_CICPX[:, :8], - "5_1_4": IVAS_MONO_TO_CICPX[:, :10], - "7_1_4": IVAS_MONO_TO_CICPX[:, :12], - }, - "STEREO": { - # upmix - "5_1": IVAS_STEREO_TO_CICPX[:, :6], - "7_1": IVAS_STEREO_TO_CICPX[:, :8], - "5_1_2": IVAS_STEREO_TO_CICPX[:, :8], - "5_1_4": IVAS_STEREO_TO_CICPX[:, :10], - "7_1_4": IVAS_STEREO_TO_CICPX[:, :12], - }, - "5_1": { - # downmix - "MONO": IVAS_CICPX_TO_MONO[:6, :], - "STEREO": IVAS_CICPX_TO_STEREO[:6, :], - # upmix - "7_1": np.pad(np.eye(6), [[0, 0], [0, 2]]), - "5_1_2": np.pad(np.eye(6), [[0, 0], [0, 2]]), - "5_1_4": np.pad(np.eye(6), [[0, 0], [0, 4]]), - "7_1_4": np.pad(np.eye(6), [[0, 0], [0, 6]]), - }, - "7_1": { - # downmix - "MONO": IVAS_CICPX_TO_MONO[:8, :], - "STEREO": IVAS_CICPX_TO_STEREO[:8, :], - "5_1": IVAS_CICP12_TO_6, - # upmix - "5_1_2": IVAS_CICP12_TO_14, - "5_1_4": IVAS_CICP12_TO_16, - "7_1_4": IVAS_CICP12_TO_19, - }, - "5_1_2": { - # downmix - "MONO": np.vstack([IVAS_CICPX_TO_MONO[:6, :], IVAS_CICPX_TO_MONO[-2:, :]]), - "STEREO": np.vstack( - [IVAS_CICPX_TO_STEREO[:6, :], IVAS_CICPX_TO_STEREO[-2:, :]] - ), - "5_1": IVAS_CICP14_TO_6, - "7_1": np.pad(IVAS_CICP14_TO_6, [[0, 0], [0, 2]]), - # upmix - "5_1_4": np.pad(np.eye(8), [[0, 0], [0, 2]]), - "7_1_4": IVAS_CICP14_TO_19, - }, - "5_1_4": { - # downmix - "MONO": np.vstack([IVAS_CICPX_TO_MONO[:6, :], IVAS_CICPX_TO_MONO[-4:, :]]), - "STEREO": np.vstack( - [IVAS_CICPX_TO_STEREO[:6, :], IVAS_CICPX_TO_STEREO[-4:, :]] - ), - "5_1": IVAS_CICP16_TO_6, - "7_1": IVAS_CICP16_TO_12, - "5_1_2": IVAS_CICP16_TO_14, - # upmix - "7_1_4": IVAS_CICP16_TO_19, - }, - "7_1_4": { - # downmix - "MONO": IVAS_CICPX_TO_MONO, - "STEREO": IVAS_CICPX_TO_STEREO, - "5_1": IVAS_CICP19_TO_6, - "7_1": IVAS_CICP19_TO_12, - "5_1_2": IVAS_CICP19_TO_14, - "5_1_4": IVAS_CICP19_TO_16, - }, -} - -# LFE 120 Hz LPF filter coefficients -IVAS_LPF_4_BUTTER_48K_SOS = np.array( - [ - [ - 5.12617881476274e-09, - 1.02523584294987e-08, - 5.12617879059970e-09, - 1, - -1.96875982668433, - 0.969044914826862, - ], - [ - 1, - 1.99999984394358, - 1.00000000471366, - 1, - -1.98677297369091, - 0.987060670205863, - ], - ] -) - -T_DESIGN_11_AZI = np.array( - [ - 132.927291884332, - -83.9349499672527, - 8.47410038634525, - -113.340833834572, - -103.265909909537, - -33.2370360923825, - 21.8564347471830, - -156.539486489880, - -64.2647531387317, - 165.779530068738, - -25.2028339893249, - -97.0037973959711, - 27.8546391256925, - 153.214218975132, - -155.061608694663, - -11.8421354925543, - 80.5387312016125, - -42.0561606270165, - -31.2233262205060, - 38.8379041944063, - 93.7606877469492, - -84.7560200078398, - 7.75536818082863, - -122.276883381108, - 46.8012705252113, - -24.7686335284573, - 99.8904719062334, - -134.783996960185, - -83.0880230164493, - 60.1281736000420, - 152.644656278084, - 29.7576658909417, - 40.7793187974476, - 110.183927562412, - 165.652065916454, - -12.9926632105736, - 79.7359893585681, - -50.5245271190884, - 118.923930267733, - 47.2202861862577, - 171.925276523721, - -62.5145800558502, - -11.1156697680531, - 132.018041099963, - -135.355486412425, - 102.370921576708, - 112.739282398012, - -178.304963670831, - -122.319932198534, - 59.0763464570905, - 151.704200334501, - 21.3763364190503, - -169.005476417779, - 118.980811786769, - -116.089295979010, - 9.64767870353308, - 60.8933243657771, - -156.021526862757, - -63.4602993325163, - 174.929787427393, - -175.288768596346, - -105.951907934032, - -50.1928304519800, - 131.358266702971, - -136.296815007542, - 93.5644603506407, - -97.0840116473627, - -169.158278888619, - -44.1323835471345, - 81.4795403841382, - ] -) - -T_DESIGN_11_ELE = np.array( - [ - 7.69254738757899, - -23.7300652200871, - 23.5127556185301, - 70.4225940747938, - -9.89694439538752, - -70.7513316063095, - -26.4618527647561, - 47.7764936689044, - -7.72047049524459, - 44.5343602375216, - 26.3897904767450, - -44.6578850137166, - 9.76703456924600, - -47.7053318175498, - 7.45302934155972, - -23.5901209534773, - 23.7194484034707, - 70.4382693912270, - -9.83541588740259, - -70.4980825105727, - -26.2949218109204, - 47.6148028805222, - -7.51718499746626, - 44.2862347125773, - 26.6442619674660, - -44.5693707254340, - 9.91271928508000, - -47.9599550372574, - 7.29679922953795, - -23.3445981426306, - 23.6415261666079, - 70.6843143997832, - -9.58140351749889, - -70.3934534122902, - -26.4258159091605, - 47.7510668062369, - -7.30853603036844, - 44.2632768570349, - 26.7140614474957, - -44.3149733480527, - 9.75899721561506, - -48.0361913333593, - 7.43965099805872, - -23.3326075548841, - 23.3868959687598, - 70.8219078016791, - -9.48596399169388, - -70.5801867828491, - -26.6740262349265, - 47.9978414043199, - -7.38276167631068, - 44.4970603752708, - 26.5024990214418, - -44.2461913308458, - 9.51845076548334, - -47.8281351088411, - 7.68427447425834, - -23.5706842106942, - 23.3074499244045, - 70.6586472132300, - -9.68088860263008, - -70.8026785673948, - -26.6963451935976, - 48.0136296461397, - -7.63734823159200, - 44.6651234222196, - 26.3023490002159, - -44.4576351865647, - 9.52341455917443, - -47.6242211091394, - ] -) diff --git a/scripts/pyaudio3dtools/spatialaudioformat.py b/scripts/pyaudio3dtools/spatialaudioformat.py deleted file mode 100644 index 524e4d61f50fe50d05f75093be563b015f0468e9..0000000000000000000000000000000000000000 --- a/scripts/pyaudio3dtools/spatialaudioformat.py +++ /dev/null @@ -1,470 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import os - -import numpy as np - -_format_configs = { - "MONO": { - "name": "MONO", - "ambi_order": 0, - "isplanar": False, - "nchannels": 1, - "isloudspeaker": True, - "isheadphones": False, - "ls_azi": [0], - "ls_ele": [0], - "lfe_index": [], - "altname": "HOA0", - }, - "STEREO": { - "name": "STEREO", - "ambi_order": -1, - "isplanar": False, - "nchannels": 2, - "isloudspeaker": True, - "isheadphones": False, - "ls_azi": [30, -30], - "ls_ele": [0, 0], - "lfe_index": [], - "altname": "cicp2", - }, - # binaural formats - "BINAURAL": { - "name": "BINAURAL", - "ambi_order": -1, - "isplanar": None, - "nchannels": 2, - "isloudspeaker": False, - "isheadphones": True, - "lfe_index": [], - "altname": "binaural", - }, - "BINAURAL_ROOM": { - "name": "BINAURAL_ROOM", - "ambi_order": -1, - "isplanar": None, - "nchannels": 2, - "isloudspeaker": False, - "isheadphones": True, - "lfe_index": [], - "altname": "binaural_room", - }, - "BINAURAL_REF": { - "name": "BINAURAL_REF", - "ambi_order": -1, - "isplanar": None, - "nchannels": 2, - "isloudspeaker": False, - "isheadphones": True, - "lfe_index": [], - "altname": "binaural_ref", - }, - "BINAURAL_ROOM_REF": { - "name": "BINAURAL_ROOM_REF", - "ambi_order": -1, - "isplanar": None, - "nchannels": 2, - "isloudspeaker": False, - "isheadphones": True, - "lfe_index": [], - "altname": "binaural_room_ref", - }, - # loudspeaker formats - "5_1": { - "name": "5_1", - "ambi_order": -1, - "isplanar": True, - "nchannels": 6, - "isloudspeaker": True, - "isheadphones": False, - "ls_azi": [30, -30, 0, 0, 110, -110], - "ls_ele": [0, 0, 0, 0, 0, 0], - "lfe_index": [3], - "altname": "cicp6", - }, - "7_1": { - "name": "7_1", - "ambi_order": -1, - "isplanar": True, - "nchannels": 8, - "isloudspeaker": True, - "isheadphones": False, - "ls_azi": [30, -30, 0, 0, 110, -110, 135, -135], - "ls_ele": [0, 0, 0, 0, 0, 0, 0, 0], - "lfe_index": [3], - "altname": "cicp12", - }, - "5_1_2": { - "name": "5_1_2", - "ambi_order": -1, - "isplanar": False, - "nchannels": 8, - "isloudspeaker": True, - "isheadphones": False, - "ls_azi": [30, -30, 0, 0, 110, -110, 30, -30], - "ls_ele": [0, 0, 0, 0, 0, 0, 35, 35], - "lfe_index": [3], - "altname": "cicp14", - }, - "5_1_4": { - "name": "5_1_4", - "ambi_order": -1, - "isplanar": False, - "nchannels": 10, - "isloudspeaker": True, - "isheadphones": False, - "ls_azi": [30, -30, 0, 0, 110, -110, 30, -30, 110, -110], - "ls_ele": [0, 0, 0, 0, 0, 0, 35, 35, 35, 35], - "lfe_index": [3], - "altname": "cicp16", - }, - "7_1_4": { - "name": "7_1_4", - "ambi_order": -1, - "isplanar": False, - "nchannels": 12, - "isloudspeaker": True, - "isheadphones": False, - "ls_azi": [30, -30, 0, 0, 135, -135, 90, -90, 30, -30, 135, -135], - "ls_ele": [0, 0, 0, 0, 0, 0, 0, 0, 35, 35, 35, 35], - "lfe_index": [3], - "altname": "cicp19", - }, - "COMBINED": { - "name": "COMBINED", - "ambi_order": -1, - "isplanar": False, - "nchannels": 15, - "isloudspeaker": True, - "isheadphones": False, - "ls_azi": [ - 30, - -30, - 0, - 135, - -135, - 110, - -110, - 90, - -90, - 30, - -30, - 110, - -110, - 135, - -135, - ], - "ls_ele": [0, 0, 0, 0, 0, 0, 0, 0, 0, 35, 35, 35, 35, 35, 35], - "lfe_index": None, - "altname": "combined", - }, - "CUSTOM_LS": { - "name": "CUSTOM_LS", - "ambi_order": -1, - "isplanar": False, - "nchannels": -1, - "isloudspeaker": True, - "isheadphones": False, - "ls_azi": None, - "ls_ele": None, - "lfe_index": None, - "altname": "CUSTOM_LS", - "config_file": "layout.txt", - }, - # ambisonics - "FOA": { - "name": "FOA", - "ambi_order": 1, - "isplanar": False, - "nchannels": 4, - "isloudspeaker": False, - "isheadphones": False, - "lfe_index": [], - "altname": "sba1", - }, - "PLANARFOA": { - "name": "PLANARFOA", - "ambi_order": 1, - "isplanar": True, - "nchannels": 4, - "isloudspeaker": False, - "isheadphones": False, - "lfe_index": [], - "altname": "sba1", - }, - "HOA2": { - "name": "HOA2", - "ambi_order": 2, - "isplanar": False, - "nchannels": 9, - "isloudspeaker": False, - "isheadphones": False, - "lfe_index": [], - "altname": "sba2", - }, - "PLANARHOA2": { - "name": "PLANARHOA2", - "ambi_order": 2, - "isplanar": True, - "nchannels": 9, - "isloudspeaker": False, - "isheadphones": False, - "lfe_index": [], - "altname": "sba2", - }, - "HOA3": { - "name": "HOA3", - "ambi_order": 3, - "isplanar": False, - "nchannels": 16, - "isloudspeaker": False, - "isheadphones": False, - "lfe_index": [], - "altname": "sba3", - }, - "PLANARHOA3": { - "name": "PLANARHOA3", - "ambi_order": 3, - "isplanar": True, - "nchannels": 16, - "isloudspeaker": False, - "isheadphones": False, - "lfe_index": [], - "altname": "sba3", - }, - # ism - "ISM": { - "name": "ISM", - "ambi_order": -1, - "isplanar": None, - "nchannels": -1, - "isloudspeaker": False, - "isheadphones": False, - "lfe_index": [], - "altname": "ism", - }, - "ISM1": { - "name": "ISM1", - "ambi_order": -1, - "isplanar": None, - "nchannels": 1, - "isloudspeaker": False, - "isheadphones": False, - "lfe_index": [], - "altname": "ism1", - }, - "ISM2": { - "name": "ISM2", - "ambi_order": -1, - "isplanar": None, - "nchannels": 2, - "isloudspeaker": False, - "isheadphones": False, - "lfe_index": [], - "altname": "ism2", - }, - "ISM3": { - "name": "ISM3", - "ambi_order": -1, - "isplanar": None, - "nchannels": 3, - "isloudspeaker": False, - "isheadphones": False, - "lfe_index": [], - "altname": "ism3", - }, - "ISM4": { - "name": "ISM4", - "ambi_order": -1, - "isplanar": None, - "nchannels": 4, - "isloudspeaker": False, - "isheadphones": False, - "lfe_index": [], - "altname": "ism4", - }, - # masa - "MASA": { - "name": "MASA", - "ambi_order": -1, - "isplanar": None, - "nchannels": -1, - "isloudspeaker": False, - "isheadphones": False, - "lfe_index": [], - "altname": "masa", - }, - "MASA1": { - "name": "MASA1", - "ambi_order": -1, - "isplanar": None, - "nchannels": 1, - "isloudspeaker": False, - "isheadphones": False, - "lfe_index": [], - "altname": "masa1", - }, - "MASA2": { - "name": "MASA2", - "ambi_order": -1, - "isplanar": None, - "nchannels": 2, - "isloudspeaker": False, - "isheadphones": False, - "lfe_index": [], - "altname": "masa2", - }, - # metadata format - "META": { - "name": "META", - "ambi_order": -1, - "isplanar": None, - "nchannels": -1, - "isloudspeaker": False, - "isheadphones": False, - "lfe_index": [], - "altname": "meta", - }, -} - -# Channel indices of planar Ambisonic components of ACN -_planar_hoa_channels = np.array([0, 1, 3, 4, 8, 9, 15]) -# Channel indices of vertical Ambisonic components of ACN -_vert_hoa_channels = np.array([2, 5, 6, 7, 10, 11, 12, 13, 14]) - - -class Format: - def __init__(self, in_format: str = "FOA"): - self.name = None - self.altname = None - self.ambi_order = -1 - self.nchannels = None - self.isloudspeaker = False - self.isheadphones = False - self.lfe_index = [] - - # if it is a path, then treat as custom layout - if not isinstance(in_format, str) or in_format[-4:].lower() == ".txt": - with open(in_format, "r") as f_ls: - self.ls_azi = [ - float(x.strip()) for x in f_ls.readline().strip().split(",") - ] - self.ls_ele = [ - float(x.strip()) for x in f_ls.readline().strip().split(",") - ] - try: - self.lfe_index = [ - int(x.strip()) for x in f_ls.readline().strip().split(",") - ] - except: - self.lfe_index = [] - - if self.lfe_index: - [self.ls_azi.insert(i, 0.0) for i in self.lfe_index] - [self.ls_ele.insert(i, 0.0) for i in self.lfe_index] - - self.name = os.path.basename(in_format).replace(".txt", "") - self.altname = "CUSTOM_LS" - self.config_file = str(in_format) - self.isloudspeaker = True - self.nchannels = len(self.ls_azi) - self.isplanar = np.all([e == 0.0 for e in self.ls_ele]) - # search in predefined dictionary - else: - for config_name, config_dict in _format_configs.items(): - if ( - in_format.upper() == config_name - or in_format.upper() == config_dict["altname"].upper() - ): - for k, v in _format_configs[config_name].items(): - setattr(self, k, v) - - if not self.name: - raise SystemExit( - f"Spatial audio format '{in_format}' not supported. If 'EXT' is used, please change to ISM or MASA. Ensure it is same as 'in_format'" - ) - - def get_nchannels(self): - return self.nchannels - - def print_info(self): - attrs = vars(self) - for item in attrs: - print(f" {item}: {attrs[item]}") - - @staticmethod - def ambiorder_from_nchannels(out_nchans: int) -> int: - return int(np.sqrt(out_nchans) - 1) - - @staticmethod - def nchannels_from_ambiorder(ambi_order: int) -> int: - return (ambi_order + 1) ** 2 - - @staticmethod - def zero_vert_hoa_channels(x: np.ndarray) -> np.ndarray: - x[:, _vert_hoa_channels[_vert_hoa_channels < x.shape[1]]] = 0.0 - return x - - @staticmethod - def get_vert_hoa_channels() -> np.ndarray: - return _vert_hoa_channels - - @staticmethod - def list_all(long_descition: bool = False): - for key, value in _format_configs.items(): - if long_descition is True: - print(key, value) - else: - print(key) - - @staticmethod - def detect_format(nchannels: int) -> str: - config_name = None - - for k, v in _format_configs.items(): - if v["nchannels"] == nchannels: - config_name = v["name"] - break - - if config_name is None: - raise SystemExit("Spatial audio format not found") - - return config_name - - @staticmethod - def get_format_dict(in_format: str): - for config_name in _format_configs: - if in_format.upper() == config_name: - return _format_configs[config_name] - return None diff --git a/scripts/pyivastest/IvasModeRunner.py b/scripts/pyivastest/IvasModeRunner.py index 85515e824d25aec5ddd840002e6fa448b98ee8fb..9d774e61e0c13de42266739011613910afe4dbe0 100644 --- a/scripts/pyivastest/IvasModeRunner.py +++ b/scripts/pyivastest/IvasModeRunner.py @@ -38,6 +38,7 @@ import time import logging import logging.handlers import platform +import sys import traceback from copy import deepcopy import re @@ -45,9 +46,10 @@ import json from pyivastest import IvasModeCollector from pyivastest import constants -from pyaudio3dtools.spatialaudioformat import Format as spformat -import pyaudio3dtools.audiofile as af -import pyaudio3dtools.audioarray as ar + +sys.path.append(f"{os.path.dirname(__file__)}/../ivas-processing-scripts") +import ivas_processing_scripts.audiotools.audiofile as af +import ivas_processing_scripts.audiotools.audioarray as ar BW_TO_SR = {"nb": 8, "wb": 16, "swb": 32, "fb": 48} IN_CONFIG_TO_COPY_AUDIO = { "SBA": [], @@ -1664,4 +1666,4 @@ class IvasModeRunner(IvasModeCollector.IvasModeCollector): class NoInputForAnyModesFound(Exception): - pass \ No newline at end of file + pass diff --git a/scripts/self_test.py b/scripts/self_test.py index d7118832db57d14fad8db5451230c3ab79ae5de2..925084262e3cfb0807744381e956e2a4a976f935 100755 --- a/scripts/self_test.py +++ b/scripts/self_test.py @@ -43,7 +43,7 @@ import pyivastest.ivas_svn as svn import pyivastest.constants as constants import operator import sys -import pyaudio3dtools +import ivas_processing_scripts.audiotools import platform import numpy import multiprocessing @@ -350,11 +350,11 @@ class SelfTest(IvasScriptsCommon.IvasScript): # one frame diff, might be a delay change, still test for BE, but cut to the same length first n_samples_for_test = min(n_samples_test, n_samples_ref) if n_samples_for_test != n_samples_ref: - sig_ref = pyaudio3dtools.audioarray.cut( + sig_ref = audiotools.audioarray.cut( sig_ref, (0, n_samples_for_test) ) else: - sig_test = pyaudio3dtools.audioarray.cut( + sig_test = audiotools.audioarray.cut( sig_test, (0, n_samples_for_test) ) else: @@ -362,7 +362,7 @@ class SelfTest(IvasScriptsCommon.IvasScript): test_sample_num_diff = True if not test_failed: try: - cmp_result = pyaudio3dtools.audioarray.compare( + cmp_result = audiotools.audioarray.compare( sig_test, sig_ref, fs ) if not cmp_result["bitexact"]: @@ -519,7 +519,7 @@ class SelfTest(IvasScriptsCommon.IvasScript): decoded_item_ref_mono = None decoded_item_test_mono = None if mode_dict["in_fs"] > pesq_fs or in_nchans > 1: - pyaudio3dtools.audiofile.mono( + audiotools.audiofile.mono( orig_item, orig_mono, in_nchans=in_nchans, @@ -553,7 +553,7 @@ class SelfTest(IvasScriptsCommon.IvasScript): os.close(fd_ref) if mode_dict["out_fs"] > pesq_fs or n_channels > 1: - pyaudio3dtools.audiofile.mono( + audiotools.audiofile.mono( decoded_item_ref, decoded_item_ref_mono, in_nchans=n_channels, @@ -617,7 +617,7 @@ class SelfTest(IvasScriptsCommon.IvasScript): fd_test, decoded_item_test_mono = tempfile.mkstemp((".pcm")) os.close(fd_test) if mode_dict["out_fs"] > pesq_fs or n_channels > 1: - pyaudio3dtools.audiofile.mono( + audiotools.audiofile.mono( decoded_item_test, decoded_item_test_mono, in_nchans=n_channels, diff --git a/tests/cmp_pcm.py b/tests/cmp_pcm.py index a54aa2cf11fc56b6b5650adbe2b19855281f5227..d8450d4ac202972cbb271f9972842982ca38284d 100755 --- a/tests/cmp_pcm.py +++ b/tests/cmp_pcm.py @@ -6,8 +6,10 @@ import argparse THIS_PATH = os.path.join(os.getcwd(), __file__) sys.path.append(os.path.join(os.path.dirname(THIS_PATH), "../scripts")) +sys.path.append(os.path.join(os.path.dirname(THIS_PATH), "../scripts/ivas-processing-scripts")) -import pyaudio3dtools +from ivas_processing_scripts.audiotools.audiofile import read +from ivas_processing_scripts.audiotools.audioarray import compare import pyivastest import numpy as np @@ -31,8 +33,8 @@ def cmp_pcm(file1, file2, out_config, fs) -> (int, str): out_config_in_file_names = out_config nchannels = pyivastest.constants.OC_TO_NCHANNELS[out_config.upper()] - s1, _ = pyaudio3dtools.audiofile.readfile(file1, nchannels, fs, outdtype=np.int16) - s2, _ = pyaudio3dtools.audiofile.readfile(file2, nchannels, fs, outdtype=np.int16) + s1, _ = read(file1, nchannels, fs, outdtype=np.int16) + s2, _ = read(file2, nchannels, fs, outdtype=np.int16) if s1.shape != s2.shape: print( @@ -41,7 +43,7 @@ def cmp_pcm(file1, file2, out_config, fs) -> (int, str): ) return 1, "FAIL: File lengths differ" - cmp_result = pyaudio3dtools.audioarray.compare(s1, s2, fs, per_frame=False) + cmp_result = compare(s1, s2, fs, per_frame=False) if cmp_result["bitexact"]: return 0, "SUCCESS: Files are bitexact" diff --git a/tests/cut_pcm.py b/tests/cut_pcm.py index 99a6f6fc10bb6a2327a144327b8eb7fae165cba5..0ca1e93d3562821718cf3cff0e634291a83f8788 100755 --- a/tests/cut_pcm.py +++ b/tests/cut_pcm.py @@ -51,9 +51,9 @@ import numpy as np from pathlib import Path HERE = Path(__file__).parent.resolve() -SCRIPTS_DIR = str(HERE.joinpath("../scripts").resolve()) +SCRIPTS_DIR = str(HERE.joinpath("../scripts/ivas-processing-scripts").resolve()) sys.path.append(SCRIPTS_DIR) -from pyaudio3dtools import audiofile +from ivas_processing_scripts.audiotools import audiofile def usage(): @@ -76,7 +76,7 @@ def cut_samples(in_file, out_file, num_channels, sample_rate, start, duration, g dur_sec = float(duration) gain_f = float(gain) - s, fs = audiofile.readfile(in_file, num_channels, fs, outdtype="float") + s, fs = audiofile.read(in_file, num_channels, fs, outdtype="float") num_in_samples = s.shape[0] num_samples_to_skip = int(start_sec * fs) @@ -89,7 +89,7 @@ def cut_samples(in_file, out_file, num_channels, sample_rate, start, duration, g s_out = s[num_samples_to_skip:num_samples_to_skip + dur_samples, :] * gain_f - audiofile.writefile(out_file, s_out, fs) + audiofile.write(out_file, s_out, fs) def main(argv): diff --git a/tests/prepare_pytests.py b/tests/prepare_pytests.py index 9e50ea90d11462ca46c64c9c299c8911d63e7f39..3f0737e11a6f28c9a2f6a74fcab7e1f0dae25290 100755 --- a/tests/prepare_pytests.py +++ b/tests/prepare_pytests.py @@ -34,13 +34,23 @@ __doc__ = """ Script to prepare the pytest tests. """ -import os import sys +from pathlib import Path + +try: + here = Path(__file__).parent.resolve() + scripts_dir = str(here.joinpath("../scripts/ivas-processing-scripts").resolve()) + sys.path.append(scripts_dir) + import ivas_processing_scripts +except ImportError: + print("Can not import ivas_processing_scripts - please run 'git submodule update --init' and try again") + sys.exit(0) + +import os import argparse import subprocess import platform -from pathlib import Path from create_short_testvectors import create_short_testvectors BIN_EXT = ".exe" if platform.system() == "Windows" else "" @@ -96,7 +106,7 @@ def main(argv): if platform.system() == "Windows": base_cmd = ["pytest"] else: - base_cmd = ["python3", "-m", "pytest"] + base_cmd = ["python3.10", "-m", "pytest"] if args.param_file: base_cmd += ["tests/codec_be_on_mr_nonselection/test_param_file.py", "--param_file", args.param_file] else: diff --git a/tests/renderer/compare_audio.py b/tests/renderer/compare_audio.py index 3fc5c064a8e49b44e028b820502f40ab1ca9d619..7cb2bcf488941720056deb7aa687bbbd2dbd18ff 100644 --- a/tests/renderer/compare_audio.py +++ b/tests/renderer/compare_audio.py @@ -34,8 +34,8 @@ import numpy as np from .constants import SCRIPTS_DIR -sys.path.append(str(SCRIPTS_DIR)) -from pyaudio3dtools.audioarray import getdelay +sys.path.append(str(SCRIPTS_DIR.joinpath("ivas-processing-scripts"))) +from ivas_processing_scripts.audiotools.audioarray import getdelay def compare_audio_arrays( diff --git a/tests/renderer/constants.py b/tests/renderer/constants.py index 877a1454c2a425e13477f2a751f4dea23e071d6c..3b06e75698bf2ae01b38272b02dcce6d8bab42f7 100644 --- a/tests/renderer/constants.py +++ b/tests/renderer/constants.py @@ -205,7 +205,7 @@ pass_snr = dict() # not relevant for tests anymore, should be deprecated soon _pass_snr = { #################################################################### # - # External Renderer vs Standalone and pyaudio3dtools renderers tests + # External Renderer vs Standalone and python renderers tests # #################################################################### # Failure reason: Renderer uses getRSH() with int16_t vs float in python @@ -257,7 +257,7 @@ _pass_snr = { "test_custom_ls_output[HOA3-itu_4+5+1]": 30, "test_custom_ls_output[HOA3-t_design_4]": 32, # Failure reason: TD Object Renderer standalone does not support custom LS input - # Comparison with pyaudio3dtools results in bad SNR + # Comparison with python renderer results in bad SNR "test_custom_ls_input_binaural[16ch_8+4+4-BINAURAL]": 8, "test_custom_ls_input_binaural[16ch_8+4+4-BINAURAL_ROOM_IR]": 0, "test_custom_ls_input_binaural[4d4-BINAURAL]": 6, diff --git a/tests/renderer/utils.py b/tests/renderer/utils.py index 356902ff47674ff111dbe7541bde3bac8e24bf4e..dee2745a156f04418e081d0768573971873dacf6 100644 --- a/tests/renderer/utils.py +++ b/tests/renderer/utils.py @@ -40,7 +40,8 @@ from .compare_audio import compare_audio_arrays from .constants import * sys.path.append(SCRIPTS_DIR) -import pyaudio3dtools +from ivas_processing_scripts.audiotools.audiofile import read +from ivas_processing_scripts.audiotools.convert import convert_file # fixture returns test information, enabling per-testcase SNR @pytest.fixture @@ -202,7 +203,7 @@ def run_renderer( run_cmd(cmd) - return pyaudio3dtools.audiofile.readfile(out_file) + return read(out_file) def compare_renderer_vs_mergetarget(test_info, in_fmt, out_fmt, **kwargs): ref, ref_fs = run_renderer( @@ -224,4 +225,4 @@ def compare_renderer_vs_pyscripts(test_info, in_fmt, out_fmt, **kwargs): def compare_renderer_args(test_info, in_fmt, out_fmt, ref_kwargs: Dict, cut_kwargs: Dict): ref, ref_fs = run_renderer(in_fmt, out_fmt, **ref_kwargs) cut, cut_fs = run_renderer(in_fmt, out_fmt, **cut_kwargs) - check_BE(test_info, ref, ref_fs, cut, cut_fs) \ No newline at end of file + check_BE(test_info, ref, ref_fs, cut, cut_fs) diff --git a/tests/run_pytests.py b/tests/run_pytests.py index 91ab8f27d531fa9e648360aec94339c5d2e1c7f7..33fdbacbee893eb84ceeb4473eb4d2670bc277ef 100755 --- a/tests/run_pytests.py +++ b/tests/run_pytests.py @@ -37,12 +37,22 @@ Test prerequisites are checked for and check failures are reported. When prerequisites are met, the pytest test is executed. """ -import os import sys +from pathlib import Path + +try: + here = Path(__file__).parent.resolve() + scripts_dir = str(here.joinpath("../scripts/ivas-processing-scripts").resolve()) + sys.path.append(scripts_dir) + import ivas_processing_scripts +except ImportError: + print("Can not import ivas_processing_scripts - please run 'git submodule update --init' and try again") + sys.exit(0) + +import os import argparse import subprocess import platform -from pathlib import Path BIN_EXT = ".exe" if platform.system() == "Windows" else "" HERE = Path(__file__).parent.resolve() @@ -91,7 +101,7 @@ def main(argv): if platform.system() == "Windows": cmd = ["pytest"] else: - cmd = ["python3", "-m", "pytest"] + cmd = ["python3.10", "-m", "pytest"] if args.param_file: cmd += ["tests/codec_be_on_mr_nonselection/test_param_file.py", "--param_file", args.param_file] else: