From d24d9698ebd461ad282d1b8adf71207911dad2e1 Mon Sep 17 00:00:00 2001 From: Archit Tamarapu Date: Fri, 5 May 2023 11:03:30 +0200 Subject: [PATCH 1/3] clean up non-CI related pyaudio3dtools functionality --- scripts/README.md | 224 ----- .../IIS_BRIR_officialMPEG_222UC_SBA3.mat | 3 - .../IIS_BRIR_officialMPEG_222UC_combined.mat | 3 - .../IIS_BRIR_officialMPEG_222UC_full.mat | 3 - scripts/pyaudio3dtools/EFAP.py | 929 ------------------ .../HRIRs_mat/ORANGE_HRIR_53_48000_SBA3.mat | 3 - .../ORANGE_HRIR_53_48000_combined.mat | 3 - .../HRIRs_mat/ORANGE_HRIR_53_48000_full.mat | 3 - scripts/pyaudio3dtools/__init__.py | 5 - scripts/pyaudio3dtools/audio3dtools.py | 239 +---- scripts/pyaudio3dtools/audioarray.py | 10 +- scripts/pyaudio3dtools/audiofile.py | 24 +- scripts/pyaudio3dtools/binauralrenderer.py | 782 --------------- scripts/pyaudio3dtools/hoadecoder.py | 186 ---- scripts/pyaudio3dtools/masarenderer.py | 112 --- .../pyaudio3dtools/quaternions/__init__.py | 44 - .../pyaudio3dtools/quaternions/functions.py | 183 ---- scripts/pyaudio3dtools/rotation.py | 346 ------- scripts/pyaudio3dtools/spatialaudioconvert.py | 570 ----------- scripts/pyaudio3dtools/spatialmetadata.py | 492 ---------- scripts/pyprocessing/__init__.py | 45 - scripts/pyprocessing/evs.py | 238 ----- scripts/pyprocessing/ivas.py | 200 ---- scripts/pyprocessing/prepost_processing.py | 220 ----- scripts/pyprocessing/processing.py | 123 --- scripts/pyprocessing/processing_configs.py | 375 ------- scripts/pyprocessing/utils.py | 148 --- tests/renderer/utils.py | 53 - 28 files changed, 24 insertions(+), 5542 deletions(-) delete mode 100644 scripts/pyaudio3dtools/BRIRs_mat/IIS_BRIR_officialMPEG_222UC_SBA3.mat delete mode 100644 scripts/pyaudio3dtools/BRIRs_mat/IIS_BRIR_officialMPEG_222UC_combined.mat delete mode 100644 scripts/pyaudio3dtools/BRIRs_mat/IIS_BRIR_officialMPEG_222UC_full.mat delete mode 100644 scripts/pyaudio3dtools/EFAP.py delete mode 100644 scripts/pyaudio3dtools/HRIRs_mat/ORANGE_HRIR_53_48000_SBA3.mat delete mode 100644 scripts/pyaudio3dtools/HRIRs_mat/ORANGE_HRIR_53_48000_combined.mat delete mode 100644 scripts/pyaudio3dtools/HRIRs_mat/ORANGE_HRIR_53_48000_full.mat delete mode 100644 scripts/pyaudio3dtools/binauralrenderer.py delete mode 100644 scripts/pyaudio3dtools/hoadecoder.py delete mode 100644 scripts/pyaudio3dtools/masarenderer.py delete mode 100644 scripts/pyaudio3dtools/quaternions/__init__.py delete mode 100644 scripts/pyaudio3dtools/quaternions/functions.py delete mode 100644 scripts/pyaudio3dtools/rotation.py delete mode 100644 scripts/pyaudio3dtools/spatialaudioconvert.py delete mode 100644 scripts/pyaudio3dtools/spatialmetadata.py delete mode 100644 scripts/pyprocessing/__init__.py delete mode 100644 scripts/pyprocessing/evs.py delete mode 100644 scripts/pyprocessing/ivas.py delete mode 100644 scripts/pyprocessing/prepost_processing.py delete mode 100644 scripts/pyprocessing/processing.py delete mode 100644 scripts/pyprocessing/processing_configs.py delete mode 100644 scripts/pyprocessing/utils.py diff --git a/scripts/README.md b/scripts/README.md index fec54ffeac..4e8a9b3664 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -38,7 +38,6 @@ title: Python scripts for Testing the IVAS code and Generating test items - [Python scripts for Testing the IVAS code and Generating test items](#python-scripts-for-testing-the-ivas-code-and-generating-test-items) - [Contents](#contents) - [0. Requirements](#0-requirements) - - [- numpy and scipy for `generate_test_items.py`, `testBitexact.py` and `self_test.py`](#--numpy-and-scipy-for-generate_test_itemspy-testbitexactpy-and-self_testpy) - [1. Scripts and classes for testing IVAS code](#1--scripts-and-classes-for-testing-ivas-code) - [1.1 Classes](#11-classes) - [1.2 Output directory structure](#12-output-directory-structure) @@ -49,16 +48,6 @@ title: Python scripts for Testing the IVAS code and Generating test items - [`IvasBuildAndRunChecks.py`](#ivasbuildandruncheckspy) - [`testBitexact.py`](#testbitexactpy) - [`self_test.py`](#self_testpy) - - [2. Script for generating listening test items](#2-script-for-generating-listening-test-items) - - [2.1. `generate_test_items.py`](#21-generate_test_itemspy) - - [2.2. Test configuration file](#22-test-configuration-file) - - [2.3. Supported test conditions](#23-supported-test-conditions) - - [2.4. Supported input/output/rendered audio formats](#24-supported-inputoutputrendered-audio-formats) - - [2.5. Processing](#25-processing) - - [2.6. Renderer Metadata definition](#26-renderer-metadata-definition) - - [3. Script for converting formats and binauralizing](#3-script-for-converting-formats-and-binauralizing) - - [3.1. Binauralizing with head rotation](#31-binauralizing-with-head-rotation) - - [3.2. Generating binaural reference signals](#32-generating-binaural-reference-signals) --- @@ -441,216 +430,3 @@ Missing reference conditions and the test conditions are then generated and the reference and test conditions are compared. ----- - - -## 2. Script for generating listening test items - -The `generate_test_items.py` python script helps to quickly setup listening tests with multiple (pre-)processing and post-processing options. - -### 2.1. `generate_test_items.py` - -Script for generating (listening) test items. - -``` -usage: generate_test_items.py [-h] -i INFILE [INFILE ...] - -Generate test items - -optional arguments: - -h, --help show this help message and exit - -i INFILE [INFILE ...], --infile INFILE [INFILE ...] - Configuration file(s): FILE1.json FILE2.json ... -``` - -Example how to call it: - -``` - python3 .\generate_test_items.py -i .\examples\my_test_config.json -``` - -Where `my_test_config.json` is a test configuration file in json format with fields explained in next section. - -### 2.2. Test configuration file - -This is the main file to edit in order to change global configuration options, detailed below. - -*NOTE: Paths specified in the JSON file are relative to the working directory where the script is executed from, NOT the location of the JSON file itself. It is possible (and recommended!) to use absolute paths instead to avoid confusion.* - -| key | values (example) | default | description | -|---------------------------|:------------------:|:-------------:|-----------------------------------------------| -| name | "my_test" | Required | name of the test session | -| author | "myself" | | Author of the configuration file (optional) | -| date | 20210205 | | Date of creation (optional) | -| | | | | -| enable_multiprocessing | True/False | True | Enables multiprocessing, recommended to set to True to make things fast. | -| delete_tmp | True/False | False | Enables deletion of temporary directories (containing intermediate processing files, bitstreams and per-item logfiles etc.). | -| | | | | -| input_path | ./my_items/ | Required | Input directory with *.WAV, *.PCM or *.TXT files to process | -| preproc_input | True/False | False | Whether to execute preprocessing on the input files | -| in_format | HOA3 | Required | Input format for the conditions to generate, see spatial_audio_format | -| in_fs | 32000 | 48000 | Input sampling rate for conditions to generate (assumed to be sampling-rate of input PCM files to process) | -| input_select | ["in", "file2"] | Required | Filenames to filter in the input directory, can be a single value, an array or null. Only compares filenames (therefore "in" in this array would match both "in.wav" and "in.pcm") | -| | | | | -| concatenate_input | True/False | False | Whether to (horizontally) concatenate files in the input directory | -| concat_silence_ms | [1000, 1000] | [0, 0] | Specifies the pre- and post-silence duration to pad concatenation with in ms. If a single value is specified it will be used for BOTH pre- and post-padding | -| preproc_loudness | -26 | | Loudness to preprocess input to (dBov / LKFS depending on tool). Only processed if preproc_input is True. | -| | | | | -| output_path | ./out/ | | Output root directory hosting generated items & log | -| out_fs | 48000 | 48000 | Output sampling rate for conditions to generate | -| output_loudness | -26 | | Loudness level for output file (dBov / LKFS depending on tool). | -| | | | | -| renderer_format | 7_1_4 or CICP19 | Required | Format to be rendered (using offline rendering, will be bypassed if = out_format) | -| binaural_rendered | True/False | False | Extra binauralization of the rendered outputs (using offline rendering) | -| include_LFE | True/False | False | Whether to include LFE in binural rendering | -| gain_factor | float value | 1.0 | Gain factor to be applied to LFE channel | -| loudness_tool | "sv56demo" | "bs1770demo" | Tool to use for loudness adjustment. Currently only sv56demo and bs1770demo are supported for appropriate format configurations. Optionally can be a path to the binary. | -| | | | | -| lt_mode | "MUSHRA" | | Automatically generates a NAME.ltg file with generate_lt_file.py in output_path according to the specified mode | -| conditions_to_generate | ["ref", "ivas"] | Required | list of conditions to be generated, for ivas and evs, multiple conditions can be specified with an \_ separator (i.e. "ivas_branch", "ivas_trunk" etc.) | -| | | | | -| ref | | | | -| - out_fc | 32000 | 48000 | cut-off frequency to be applied to the reference condition in post | -| ivas | | | | -| - bitrates | [16400, 128000] | Required | Bitrate(s) used for IVAS encoder | -| - enc_fs | 48000 | 48000 | Sampling rate for input to the encoder (pre-processing) | -| - max_band | wb, swb, fb etc. | FB | Maximum encoded bandwidth | -| - out_format | 7_1_4 or CICP19 | Required | Output format for IVAS, see spatial_audio_format | -| - dec_fs | 48000 | 48000 | Sampling rate for decoder output | -| - dtx | True/False | False | Enable DTX mode | -| - head_tracking | True/False | False | Enable head tracking | -| - ht_file | | "./trajectories/full_circle_in_15s" | Head rotation file | -| - plc | True/False | False | Enables forward error correction `IVAS_dec -FEC X` | -| - plc_rate | 0-10 | 10 | Percentage of erased frames | -| - cod_bin | "../../../IVAS_cod"| "../IVAS_cod" | path to encoder binary | -| - dec_bin | "../../../IVAS_dec"| "../IVAS_dec" | path to decoder binary | -| - cod_opt | ["-ucct", "1"] | | list of additional encoder options | -| - dec_opt | ["-q"] | | list of additional decoder options | -| evs | | | | -| - bitrates | [13200, 164000] | Required | Bitrate used for multi-stream EVS condition per stream/channel | -| - enc_fs | 48000 | 48000 | Sampling rate for input to the encoder (pre-processing) | -| - max_band | wb, swb, fb etc. | FB | Maximum encoded bandwidth | -| - dec_fs | 48000 | 480000 | Sampling rate for decoder output | -| - dtx | True/False | False | Enable DTX mode | -| - cod_bin | ../../../IVAS_cod | "../IVAS_cod" | path to binary | -| - dec_bin | ../../../IVAS_dec | "../IVAS_dec" | path to binary | -| | | | | - ---- -### 2.3. Supported test conditions - -The following conditions are the conditions which can be generated currently by `generate_test_items.py`. - -| Supported conditions | Description | -|:--------------------:|-----------------------------------------------------------| -| ref | Uncoded (reference) | -| lp3k5 | Uncoded low-passed at 3.5 kHz (anchor) | -| lp7k | Uncoded low-passed at 7 kHz (anchor) | -| evs_mono | Coded with multi-stream EVS codec, !!metadata not coded!! | -| ivas | Coded with IVAS codec | - - -Multiple conditions for evs_mono and ivas can be specified by using underscore separators e.g. `"ivas_1" : {...}, "ivas_2" : {...}` -(also see `test_SBA.json` for an example) - ---- - -### 2.4. Supported input/output/rendered audio formats - -| spatial_audio_format | Input/Ouput/Rendered | Description | -|--------------------------------------------------|----------------------|------------------------------------------------| -| MONO | yes/yes/yes | mono signals | -| STEREO | yes/yes/yes | stereo signals | -| ISM or ISMx | yes/no/no | Objects with metadata, description using renderer metadata | -| MASA or MASAx | yes/no/no | mono or stereo signals with spatial metadata !!!metadata must share same basename as waveform file but with .met extension!!! | -| FOA/HOA2/HOA3 or PLANAR(FOA/HOAx) | yes/yes/yes | Ambisonic signals or planar ambisonic signals | -| BINAURAL/BINAURAL_ROOM | no/yes/yes | Binaural signals | -| 5_1/5_1_2/5_1_4/7_1/7_1_4 or CICP[6/12/14/16/19] | yes/yes/yes | Multi-channel signals for predefined loudspeaker layout | -| META | yes/yes/no | Audio scene described by a renderer config | - ---- - -### 2.5. Processing - -The processing chain is as follows: - -1. Preprocessing - - **Condition**: `preproc_input == true` - - Input files converted to `in_format` -2. Processing - - **Condition**: Performed depending on key in `conditions_to_generate` - - Coding/decoding from `in_format` to `out_format` -3. Postprocessing - 1. Rendering to `renderer_format` - - **Condition**: `out_format != renderer_format` - - output files converted from `out_format` to `renderer_format` - 1. Binaural Rendering - - **Condition**: `binaural_rendered == true` and `out_format` is not a BINAURAL type - - output files converted from `out_format` to `BINAURAL` - ---- - -### 2.6. Renderer Metadata definition - -To run, the renderer requires a config file describing the input scene.The expected format of the config file is as follows: - ---- - -- Line 1: Path to a "multitrack" audio file. This should be a single multichannel wav/pcm file that contains all input audio. For example channels 1-4 can be an FOA scene,channel 5 - an object and channels 6-11 - a 5.1 channel bed. If the path is not absolute, it is considered relative to the renderer executable, not the config file. This path has lower priority than the one given on the command line: *The path in the config file is ignored if the --inputAudio argument to the renderer executable is specified.* - ---- - -- Line 2: Contains number of inputs. An input can either be an Ambisonics scene, anobject or a channel bed.This is NOT the total number of channels in the input audio file.The renderer currently supports simultaneously: *Up to 2 SBA inputs, Up to 2 MC inputs* Up to 16 ISM inputsThese limits can be freely changed with pre-processor macros, if needed. - ---- -- Following lines: -Define each of the inputs. Inputs can be listed in any order - they are NOT required to be listed in the same order as in the audio file. -Input definitions: - - First line of an input definition contains the input type: SBA, MC or ISM.Following lines depend on the input type:SBAIndex of the first channel of this input in the multitrack file (1-indexed)Ambisonics orderMCIndex of the first channel of this input in the multitrack file (1-indexed)CICP index of the speaker layoutISMIndex of this input's audio in the multitrack file (1-indexed)Path to ISM metadata file (if not absolute, relative to executable location)ORISMIndex of this input's audio in the multitrack file (1-indexed)Number N of positions defined, followed by N lines in form: -stay in position for x frames, azimuth, elevation(ISM position metadata defined this way is looped if there are more framesof audio than given positions) - ---- -Example config -The following example defines a scene with 4 inputs: *ISM with trajectory defined in a separate file. Channel 12 in the input file.* Ambisonics, order 1. Channels 1-4 in the input audio file. *CICP6 channel bed. Channels 5-10 in the input audio file.* ISM with 2 defined positions (-90,0) and (90,0). Channel 11 in the input file. The object will start at position (-90,0) and stay there for 5 frames, then move to (90,0) and stay there for 5 frames. This trajectory is looped over the duration of the input audio file. - -``` -./input_audio.wav4ISM12path/to/IVAS_ISM_metadata.csv -3 -SBA -1 -1 -MC -5 -6 -ISM -1 -1 -25,-90,05,90, -``` - -## 3. Script for converting formats and binauralizing - -The script audio3dtools.py can convert between different input and output formats and binauralize signals. - -Execute `python -m pyaudio3dtools.audio3dtools --help` for usage. - -### 3.1. Binauralizing with head rotation - -This example binauralizes a HOA3 signal with a head-rotation trajectory. Head rotation is peformed in SHD. It is supported for HOA3 and META input formats. For META input format, the audioscene is first prerendered to HOA3 and then rotated and binauralized. - -``` -python -m pyaudio3dtools.audio3dtools -i hoa3_input.wav -o . -F BINAURAL -T .\trajectories\full_circle_in_15s -``` - -### 3.2. Generating binaural reference signals - -Currently MC input signals are supported. The reference processing can be activated by selecting BINAURAL[_ROOM]_REF as output format. The signals are generated by convolving the channels with the filters from the database that are closes to the current position of the virtual LS. All interpolation methods supported by numpy can be chosen between the measured points along the trajectory. - -``` -python -m pyaudio3dtools.audio3dtools -i cicp6_input.wav -o . -F BINAURAL_REF -T .\trajectories\full_circle_in_15s -``` - -### 3.3. Rendering ISM to Custom loudspeakers with auxiliary binaural output -ISM metadata can either be specified via an input text file in the Renderer Metadata definition format, or via the commandline using the same style as IVAS: -``` -python -m pyaudio3dtools.audio3dtools -i ism2.wav -f ISM2 -m ism1.csv NULL -F 7_1_4 -o . -b -T .\trajectories\full_circle_in_15s -``` diff --git a/scripts/pyaudio3dtools/BRIRs_mat/IIS_BRIR_officialMPEG_222UC_SBA3.mat b/scripts/pyaudio3dtools/BRIRs_mat/IIS_BRIR_officialMPEG_222UC_SBA3.mat deleted file mode 100644 index 5bc7464f7f..0000000000 --- a/scripts/pyaudio3dtools/BRIRs_mat/IIS_BRIR_officialMPEG_222UC_SBA3.mat +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b41a527b6ba22b4c100265655ca801ee4d2dba3c3e03dc58f7cc5d99e397d2c3 -size 11795531 diff --git a/scripts/pyaudio3dtools/BRIRs_mat/IIS_BRIR_officialMPEG_222UC_combined.mat b/scripts/pyaudio3dtools/BRIRs_mat/IIS_BRIR_officialMPEG_222UC_combined.mat deleted file mode 100644 index 61ba946617..0000000000 --- a/scripts/pyaudio3dtools/BRIRs_mat/IIS_BRIR_officialMPEG_222UC_combined.mat +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:081a9053c8b04831d97e6f18d641d4737b2c23b076778a9b41c7b3a41d954c32 -size 6348446 diff --git a/scripts/pyaudio3dtools/BRIRs_mat/IIS_BRIR_officialMPEG_222UC_full.mat b/scripts/pyaudio3dtools/BRIRs_mat/IIS_BRIR_officialMPEG_222UC_full.mat deleted file mode 100644 index 440c8aedd6..0000000000 --- a/scripts/pyaudio3dtools/BRIRs_mat/IIS_BRIR_officialMPEG_222UC_full.mat +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0544d1cf80a7cceb156760107d81b10fd787807bb0ea1e74e9aeb552474b3373 -size 13233924 diff --git a/scripts/pyaudio3dtools/EFAP.py b/scripts/pyaudio3dtools/EFAP.py deleted file mode 100644 index dcc615355e..0000000000 --- a/scripts/pyaudio3dtools/EFAP.py +++ /dev/null @@ -1,929 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022 Baseline Development Group with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies OY, Orange, - Panasonic Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The Baseline Development Group consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies OY, Orange, - Panasonic Corporation, Qualcomm Technologies, Inc., and VoiceAge Corporation retain full ownership - rights in their respective contributions in the software. No license of any kind, including but not - limited to patent license, of any foregoing parties is hereby granted by implication, estoppel or - otherwise. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and/or fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import argparse -import os -from enum import Enum -from itertools import combinations -from typing import Optional, Tuple, Union - -import numpy as np - - -def wrap_angles( - azi: float, ele: float, clip_ele: Optional[bool] = False -) -> Tuple[float, float]: - """ - Wrap angles to (-180, 180] azimuth and [-90, 90] elevation - Takes into account hemisphere flips from large elevation changes unless clip_ele is specified - """ - if clip_ele: - ele = min(max(ele, -90), 90) - - if ele != 0 and ele % 90 == 0: - # if elevation is a multiple of 90, azimuth is irrelevant since we are at a pole - azi = 0 - while np.abs(ele) > 90: - ele -= 360 - else: - # wrap elevation value - while np.abs(ele) > 90: - # flip azimuth to other hemisphere - azi += 180 - - # compensate elevation accordingly - if ele > 90: - ele -= 180 - elif ele < -90: - ele += 180 - - # wrap azimuth value - while np.abs(azi) > 180: - azi = (azi + 180) % 360 - if azi < 0: - azi += 360 - azi -= 180 - - # set -180 azimuth to 180 - if azi == -180: - azi = 180 - - return azi, ele - - -class EfapDmxType(Enum): - NONE = 0 - AMPLITUDE = 1 - INTENSITY = 2 - - -class EfapVertex: - """ - Vertex data structure for EFAP - Initialises a vertex from the given spherical coordinate pair, with a flag specifying if it is a ghost loudspeaker - - - Parameters - ---------- - azi : float - Azimuth of vertex - ele : float - Elevation of vertex - is_ghost : bool - Whether the vertex is a ghost, default is False - dmx_type : EfapDmxType - Downmix type for ghost vertices - """ - - def __init__( - self, - azi: float, - ele: float, - is_ghost: Optional[bool] = False, - dmx_type: Optional[EfapDmxType] = EfapDmxType.INTENSITY, - ): - self.azi, self.ele = wrap_angles(azi, ele) - self.pos = np.array( - [ - np.cos(np.deg2rad(azi)) * np.cos(np.deg2rad(ele)), - np.sin(np.deg2rad(azi)) * np.cos(np.deg2rad(ele)), - np.sin(np.deg2rad(ele)), - ] - ) - - idx_azi = np.round(np.abs(90 - np.abs(self.azi))) - idx_ele = 90 - np.round(np.abs(self.ele)) - self.index = ( - idx_azi + 181 * idx_ele - ) # vertices on the median plane have lowest index - - self.is_ghost = is_ghost - self.dmx_type = dmx_type - - def __str__(self): - str_ = f"a{self.azi}e{self.ele}" - if self.is_ghost: - str_ += "*" - return str_ - - def __lt__(self, other): - return self.index < other.index - - -class EFAP: - """ - EFAP data structure - - Initialise EFAP data for computing panning gains - - - Parameters - ---------- - azimuths : np.ndarray - Azimuth positions of the loudspeaker array - elevations : npndarray - Elevation postions of the loudspeaker array - intensity_panning : bool - Whether intensity panning is enabled or not - - Examples - -------- - - >>> from EFAP import EFAP - >>> panner = EFAP([30, -30, 0, 110, -110], [0, 0, 0, 0, 0], False) - >>> panner.pan(15, 45) - array([0.66742381, 0.19069252, 0.66742381, 0.19069252, 0.19069252]) - - """ - - _EFAP_HULL_TOL = 1e-4 # tolerance for a point to be added to the convex hull - _EFAP_MAX_AZI_GAP = 160 # maximum allowed angular gap in the middle layer - _EFAP_POLAR_ELE = 90 # elevation of north / south poles (zenith / nadir) - _EFAP_THRESH_COPLANAR = 1e-3 # tolerance for points to be considered coplanar - _EFAP_THRESH_MID_LAYER = 45 # elevation threshold for loudspeakers to be considered as in the middle layer - _EFAP_THRESH_POLES = 1e-6 # tolerance for a vertex to be considered polar - _EFAP_THRESH_TRI = 1e-10 # tolerance for a point to be inside a triangle - - def __init__( - self, - azimuths: Union[list, np.ndarray], - elevations: Union[list, np.ndarray], - intensity_panning: Optional[bool] = False, - ): - # validation - azimuths = np.array(azimuths) - elevations = np.array(elevations) - if np.squeeze(azimuths).ndim > 1: - raise ValueError("Too many dimensions for loudspeaker azimuth array") - if np.squeeze(elevations).ndim > 1: - raise ValueError("Too many dimensions for loudspeaker elevations array") - if azimuths.shape != elevations.shape: - raise ValueError("Mismatch between loudspeaker azimuths and elevations") - - # set EFIP flag - self.intensity_panning = intensity_panning - - # initialise vertices and add ghost loudspeakers if needed - self.verts = np.array( - [EfapVertex(azi, ele) for azi, ele in zip(azimuths, elevations)] - ) - self._add_ghost_speakers() - - # formulate initial tetrahedron for the convex hull - self._init_simplex() - - # add the remaining vertices to the convex hull in order of their index - for i in np.argsort(self.verts): - if self.verts[i] not in self.verts[self.tris]: - self._add_vertex_to_hull(i) - - # compute downmix matrix with remapped ghost speakers - self._remap_ghost_speakers() - - # set vertices near poles to have NaN azimuth - for v in self.verts: - if ( - v.ele > self._EFAP_POLAR_ELE - self._EFAP_THRESH_POLES - or v.ele < self._EFAP_THRESH_POLES - self._EFAP_POLAR_ELE - ): - v.azi = np.nan - - # combine triangles into polygons - self._tri2poly() - - def _add_ghost_speakers(self) -> None: - """ - Add ghost loudspeakers at the poles, or to fill large horizontal gaps - """ - ele = [v.ele for v in self.verts] - - dmx_type = EfapDmxType.INTENSITY - - # add ghost loudspeakers at the poles if necessary - if max(ele) < self._EFAP_POLAR_ELE: - - if self.intensity_panning: - if max(ele) > self._EFAP_THRESH_MID_LAYER: - dmx_type = EfapDmxType.NONE - else: - dmx_type = EfapDmxType.AMPLITUDE - - self.verts = np.append(self.verts, EfapVertex(0, 90, True, dmx_type)) - - if min(ele) > -self._EFAP_POLAR_ELE: - - if self.intensity_panning: - if min(ele) < -self._EFAP_THRESH_MID_LAYER: - dmx_type = EfapDmxType.NONE - else: - dmx_type = EfapDmxType.AMPLITUDE - - self.verts = np.append(self.verts, EfapVertex(0, -90, True, dmx_type)) - - # check for large gaps in the middle horizontal layer - mid_spkrs = [ - v.azi for v in self.verts if np.abs(v.ele) < self._EFAP_THRESH_MID_LAYER - ] - - # no speakers in middle layer; add a triangle of ghost speakers - if not mid_spkrs: - self.verts = np.append( - self.verts, - [ - EfapVertex(0, 0, True), - EfapVertex(180, 0, True), - EfapVertex(240, 0, True), - ], - ) - # only one speaker in the threshold; add two ghost speakers to form a triangle - elif len(mid_spkrs) == 1: - self.verts = np.append( - self.verts, - [ - EfapVertex(mid_spkrs[0] + 120, 0, True), - EfapVertex(mid_spkrs[0] + 240, 0, True), - ], - ) - # search for and fill gaps greater than MAX_AZI_GAP - else: - mid_spkrs = np.sort(mid_spkrs) - angle_diff = np.diff(np.concatenate([mid_spkrs, [mid_spkrs[0] + 360]])) - sectors = np.ceil(angle_diff / self._EFAP_MAX_AZI_GAP) - - for i, s in enumerate(sectors): - if s > 1: - new_diff = angle_diff[i] / s - num_new = s - 1 - for k in range(int(num_new)): - new_azi = mid_spkrs[i] + (k + 1) * new_diff - self.verts = np.append(self.verts, EfapVertex(new_azi, 0, True)) - - def _init_simplex(self) -> None: - """ - Create an initial tetrahedron / simplex for the convex hull from 4 vertices - """ - # take the first vertex as seed - t = [0] - - # attempt to form an edge with non-zero length - for i, v in enumerate(self.verts): - if ( - v.azi != self.verts[t[0]].azi or v.ele != self.verts[t[0]].ele - ) and i not in t: - t.append(i) - break - else: - raise ValueError("Vertices are conincident!") - - # attempt to form a triangle with non-zero area - for i, v in enumerate(self.verts): - if ( - np.linalg.norm( - np.cross( - self.verts[t[1]].pos - self.verts[t[0]].pos, - v.pos - self.verts[t[0]].pos, - ), - 2, - ) - > self._EFAP_HULL_TOL - and i not in t - ): - t.append(i) - break - else: - raise ValueError("Vertices are colinear!") - - # attempt to form a tetrahedron with non-zero volume - for i, v in enumerate(self.verts): - if ( - np.abs( - np.dot( - np.cross( - self.verts[t[1]].pos - self.verts[t[0]].pos, - self.verts[t[2]].pos - self.verts[t[0]].pos, - ), - v.pos - self.verts[t[0]].pos, - ) - ) - ) > self._EFAP_HULL_TOL and i not in t: - t.append(i) - break - else: - raise ValueError("Vertices are coplanar!") - - # create a list of the triangles of the initial simplex / tetrahedron - t = np.array(t) - self.tris = np.array([t[[0, 1, 2]], t[[0, 1, 3]], t[[0, 2, 3]], t[[1, 2, 3]]]) - - # orient the triangle surface planes outwards from the centroid - self.centroid = np.mean([self.verts[i].pos for i in t], axis=0) - for i, tri in enumerate(self.tris): - self.tris[i, :] = self._flip_plane(tri) - - def _add_vertex_to_hull(self, idx_new_vert: int) -> None: - """ - Add a vertex to the convex hull and update the list of triangles in the hull - """ - # compute the centroid of the current convex hull - self.centroid = np.mean( - [self.verts[i].pos for i in np.unique(self.tris)], axis=0 - ) - - tris_new = [] - visible = [] - - # find which hull surfaces are visible from the new vertex - for i, tri in enumerate(self.tris): - if self._vertex_dist(tri, idx_new_vert) > -1e-6: - visible.append(i) - else: - tris_new.append(tri) - - tris_new = np.array(tris_new) - visible = np.array(visible, dtype=int) - - # find edges of the visible hull surfaces - max_vert = np.amax(self.tris[visible]) + 1 - counter = np.zeros([max_vert, max_vert]) - for i, tri in enumerate(self.tris[visible]): - surface = np.append(tri, tri[0]) - for n in range(3): - a = surface[n] - b = surface[n + 1] - counter[a, b] = counter[a, b] + 1 - - counter += counter.T - - edges = [] - for a in range(max_vert - 1): - for b in range(a + 1, max_vert): - if counter[a, b] == 1: - edges.append([a, b]) - edges = np.vstack(edges) - - # break the edges visible from the new vertex and add the new triangle - for e in edges: - tris_new = np.vstack( - [tris_new, self._flip_plane(np.append(e, idx_new_vert))] - ) - - # update the list of triangles in the convex hull - self.tris = tris_new - - def _remap_ghost_speakers(self) -> None: - """ - Remove unused ghost speakers and compute a downmix matrix for the rest - """ - # find ghosts that are not part of the convex hull - ghosts = [i for i, v in enumerate(self.verts) if v.is_ghost] - unused_ghosts = np.compress( - np.isin(ghosts, np.unique(self.tris), invert=True), ghosts - ) - - if unused_ghosts.size > 0: - # remove the unused ghosts from the triangle array and also adjust indices - self.tris[self.tris > unused_ghosts.min()] -= unused_ghosts.size - # delete them from the vertex array - self.verts = np.delete(self.verts, unused_ghosts) - - # generate initial sound energy distribution matrix - n_vtx = len(self.verts) - n_ghost = len(ghosts) - len(unused_ghosts) - - M = np.eye(n_vtx) - for i, v in enumerate(self.verts): - if v.is_ghost: - neighbours = self._get_neighbours(i) - M[:, i] = np.zeros(n_vtx) - M[neighbours, i] = np.ones(len(neighbours)) / len(neighbours) - - # re-distribute sound energy from ghosts - M2 = M.copy() - for i, v in enumerate(self.verts): - if v.is_ghost: - vec = M[:, i] - while np.sum(vec[-n_ghost:]) > 1e-4: - vec = M @ vec - M2[:, i] = vec - - self.dmx_mat = M2[:-n_ghost, :] - - # amplitude downmix for real loudspeakers - self.dmx_mat[:, :-n_ghost] = np.sqrt(self.dmx_mat[:, :-n_ghost]) - - # distribute ghosts according to downmix type - for i, v in enumerate(self.verts): - if v.is_ghost: - if v.dmx_type == EfapDmxType.NONE: - self.dmx_mat[:, i] = 0 - elif v.dmx_type == EfapDmxType.AMPLITUDE: - pass - else: - self.dmx_mat[:, i] = np.sqrt(self.dmx_mat[:, i]) - - def _tri2poly(self) -> None: - """ - Merge hull triangles into polygons if they are coplanar - """ - polys = [] - - for tri in self.tris: - # find all vertices coplanar with this triangle (including those already in the triangle) - new_poly = np.array( - [ - i - for i, _ in enumerate(self.verts) - if np.abs(self._vertex_dist(tri, i)) < self._EFAP_THRESH_COPLANAR - ] - ) - - # check if we already found this polygon as a complete subset - is_subset = [ - i for i, poly in enumerate(polys) if np.all(np.isin(new_poly, poly)) - ] - is_superset = [ - i for i, poly in enumerate(polys) if np.all(np.isin(poly, new_poly)) - ] - - if is_subset: - continue - elif is_superset: - # remove the other polygon since it will be replaced by the superset polygon - polys_new = [p for i, p in enumerate(polys) if i not in is_superset] - polys = polys_new - - # orient the polygon plane in the same direction as the triangle - P1 = self.verts[tri[0]].pos - P2 = self.verts[tri[1]].pos - P3 = self.verts[tri[2]].pos - - # first base vector - U = P2 - P1 - U = U / np.linalg.norm(U) - - # second base vector - V = P3 - P2 - V = V - np.dot(U, V) * U - V = V / np.linalg.norm(V) - - # center of the first triangle - M = np.mean([P1, P2, P3], axis=0) - - # sort vertices - azi = np.zeros_like(new_poly, dtype=float) - for i, idx_v in enumerate(new_poly): - P = self.verts[idx_v].pos - M - X = np.dot(P, U) - Y = np.dot(P, V) - azi[i] = np.arctan2(Y, X) - - idx = np.argsort(azi) - new_poly = new_poly[idx] - - # add the polygon to the main list - polys.append(new_poly) - - self.polys = polys - - def _pan_EFAP_poly( - self, azimuth: float, elevation: float, poly: np.ndarray, mod: int - ) -> np.ndarray: - """ - Compute panning gains for each vertex in the given polygon - - - Parameters - ---------- - azimuth : float - Azimuth of requested panning position - elevation : float - Elevation of requested panning position - poly : np.ndarray - Array of vertices defining the polygon - - Returns - ------- - poly_gain: np.ndarray - Gains for each vertex in the polygon - """ - poly_gain = np.zeros_like(poly, dtype=float) - - P = np.array([azimuth, elevation]) - # search for the triangle of the polygon in which P belongs - for i in range(1, poly.size + 1): - A = np.array([self.verts[poly[i - 1]].azi, self.verts[poly[i - 1]].ele]) - for j in range(i, poly.size - 2 + i): - idx1 = 1 + (j % poly.size) - idx2 = 1 + (idx1 % poly.size) - B = np.array( - [self.verts[poly[idx1 - 1]].azi, self.verts[poly[idx1 - 1]].ele] - ) - C = np.array( - [self.verts[poly[idx2 - 1]].azi, self.verts[poly[idx2 - 1]].ele] - ) - - if mod: - if not np.isnan(A[0]): - A[0] %= mod - if not np.isnan(B[0]): - B[0] %= mod - if not np.isnan(C[0]): - C[0] %= mod - - if self._in_triangle(P, A, B, C): - N = np.transpose([B[1] - C[1], C[0] - B[0]]) - N = N / np.dot(N, B - A) - poly_gain[i - 1] = 1 - np.dot(P - A, N) - - """ DEBUGGING / TODO """ - # set gains <= -60dB to 0 - poly_gain[np.abs(poly_gain) < 1e-6] = 0 - - return poly_gain - - """ geometric / math helper functions """ - - def _get_neighbours(self, idx_vert: int) -> np.ndarray: - """ - Find triangles containing the given vertex index (neighbouring vertices) - """ - n = self.tris[np.any(np.isin(self.tris, idx_vert), axis=1)] - return np.unique(n[n != idx_vert]) - - def _get_azi_ele(self, idx_vert: int) -> Tuple[float, float]: - """ - Return a tuple of (azi, ele) for a vertex at the given index - """ - return self.verts[idx_vert].azi, self.verts[idx_vert].ele - - def _in_polygon( - self, azimuth: float, elevation: float, poly: np.ndarray - ) -> Tuple[bool, int]: - """ - Determine whether the panning position lies within the given polygon - by iteratively checking its triangles - - Parameters - ---------- - azimuth : float - Azimuth of requested panning position - elevation : float - Elevation of requested panning position - poly : np.ndarray - Array of vertices defining the polygon - - Returns - ------- - in_polygon, mod: Tuple[bool, int] - Flag indicating whether the point is inside the given polygon - Value of wrapping required if used - """ - azi = [self.verts[v].azi for v in poly] - - P = np.array([azimuth, elevation]) - - for tri in combinations(poly, 3): - A = np.array(self._get_azi_ele(tri[0])) - B = np.array(self._get_azi_ele(tri[1])) - C = np.array(self._get_azi_ele(tri[2])) - if self._in_triangle(P, A, B, C): - return True, None - - # if the azimuth difference is large, perform the 2D check again with azimuths wrapped to (-360, 0] and [0, 360) - if np.nanmax(azi) - np.nanmin(azi) > 180: - for tri in combinations(poly, 3): - A = np.array(self._get_azi_ele(tri[0])) - B = np.array(self._get_azi_ele(tri[1])) - C = np.array(self._get_azi_ele(tri[2])) - if not np.isnan(A[0]): - A[0] %= 360 - if not np.isnan(B[0]): - B[0] %= 360 - if not np.isnan(C[0]): - C[0] %= 360 - if self._in_triangle(P, A, B, C): - return True, 360 - - for tri in combinations(poly, 3): - A = np.array(self._get_azi_ele(tri[0])) - B = np.array(self._get_azi_ele(tri[1])) - C = np.array(self._get_azi_ele(tri[2])) - if not np.isnan(A[0]): - A[0] %= -360 - if not np.isnan(B[0]): - B[0] %= -360 - if not np.isnan(C[0]): - C[0] %= -360 - if self._in_triangle(P, A, B, C): - return True, -360 - - return False, None - - def _in_triangle( - self, P: np.ndarray, A: np.ndarray, B: np.ndarray, C: np.ndarray - ) -> bool: - """ - Determine whether the panning position lies within the given triangle - - Parameters - ---------- - P : float - Point under test - A : float - First vertex of the triangle - B : float - Second vertex of the triangle - C : float - Third vertex of the triangle - - - Returns - ------- - bool - Flag indicating whether the point is inside the given triangle - """ - if np.isnan(A[0]): - A[0] = P[0] - - if np.isnan(B[0]): - B[0] = P[0] - - if np.isnan(C[0]): - C[0] = P[0] - - tmpMat = np.transpose([B - A, C - A]) - if (1 / np.linalg.cond(tmpMat)) < self._EFAP_THRESH_TRI: - return False - - Minv = np.linalg.inv(tmpMat) - S = Minv @ (P - A) - - if ( - S[0] < -self._EFAP_THRESH_TRI - or S[1] < -self._EFAP_THRESH_TRI - or S[0] + S[1] > 1 + self._EFAP_THRESH_TRI - ): - return False - - return True - - def _vertex_dist(self, surface: np.ndarray, idx_vert: int) -> float: - """ - Compute the distance of a vertex from a given plane - - Parameters - ---------- - surface : np.ndarray - Array of 3 ordered vertices defining the plane and its orientation - idx_vert: int - Index of the vertex to compute the distance for - - Returns - ------- - float - Distance of the vertex from the given plane - """ - return self._point_plane_dist( - self.verts[surface[0]].pos, - self.verts[surface[1]].pos, - self.verts[surface[2]].pos, - self.verts[idx_vert].pos, - ) - - def _point_plane_dist( - self, P1: np.ndarray, P2: np.ndarray, P3: np.ndarray, X: np.ndarray - ) -> float: - """ - Compute the distance of a vertex from a plane defined by three points - - Parameters - ---------- - P1 : np.ndarray - Cartesian coordinates of the first point - P2 : np.ndarray - Cartesian coordinates of the second point - P3 : np.ndarray - Cartesian coordinates of the third point - X: np.ndarray - Cartesian coordinates of the vertex - - Returns - ------- - float - Distance of the vertex from the given plane - """ - - if np.all(X == P1) or np.all(X == P2) or np.all(X == P3): - return 0 - else: - N = np.cross(P1 - P2, P1 - P3) - return np.dot(X - P1, N / np.linalg.norm(N)) - - def _flip_plane(self, surface: np.ndarray) -> np.ndarray: - """ - Flip the orientation of a plane (invert normal vector) - - Parameters - ---------- - surface : np.ndarray - Array of 3 ordered vertices defining the plane and its orientation - - Returns - ------- - surface : np.ndarray - Reordered vertices with plane normal pointing outwards from the hull centroid - """ - if ( - self._point_plane_dist( - self.verts[surface[0]].pos, - self.verts[surface[1]].pos, - self.verts[surface[2]].pos, - self.centroid, - ) - > 0 - ): - surface = np.flip(surface.copy()) - - return surface - - def _compute_gains_point(self, azimuth: float, elevation: float) -> np.ndarray: - """ - Compute gains for the requested panning position - - - Parameters - ---------- - azimuth : float - Azimuth of requested panning position - elevation : float - Elevation of requested panning position - - Returns - ------- - gains: np.ndarray - Panning gains for the loudspeaker layout - """ - if np.isnan(azimuth) or np.isnan(elevation): - raise ValueError(f"Angles cannot be NaNs : ({azimuth}, {elevation})") - - azimuth, elevation = wrap_angles(azimuth, elevation) - point_pos = [ - np.cos(np.deg2rad(azimuth)) * np.cos(np.deg2rad(elevation)), - np.sin(np.deg2rad(azimuth)) * np.cos(np.deg2rad(elevation)), - np.sin(np.deg2rad(elevation)), - ] - - # filter the polygon list with a quick 2d check - found_polys = [] - for poly in self.polys: - in_poly, mod = self._in_polygon(azimuth, elevation, poly) - if in_poly: - found_polys.append((poly, mod)) - - if not found_polys: - raise AssertionError("Unexpected error during panning") - - # find a visible polygon with the smallest distance - dist = [] - - for poly, mod in found_polys: - surface = self.verts[poly] - d = self._point_plane_dist( - surface[0].pos, - surface[1].pos, - surface[2].pos, - point_pos, - ) - if d >= 0: - dist.append(d) - else: - dist.append(np.inf) - - found_poly, mod = found_polys[np.argmin(dist)] - - # compute gains for the polygon vertices - poly_gain = self._pan_EFAP_poly(azimuth, elevation, found_poly, mod) - - # downmix ghost loudspeakers - gains = np.zeros(self.verts.size) - gains[found_poly] = poly_gain / np.linalg.norm(poly_gain) - gains = gains @ self.dmx_mat.T - gains = gains / np.linalg.norm(gains) - - if self.intensity_panning: - gains = np.sqrt(gains / np.sum(gains)) - - return gains - - """ public functions """ - - def pan( - self, azimuths: float, elevations: float, intensity_panning: bool = False - ) -> np.ndarray: - """ - Compute gains for the requested panning position - - - Parameters - ---------- - azimuth : float - Azimuth of requested panning position - elevation : float - Elevation of requested panning position - intensity_panning : bool - Flag whether to use intensity panning (Default is False == amplitude panning) - - Returns - ------- - gains: np.ndarray - Panning gains for the loudspeaker layout - """ - azimuths = np.array(azimuths) - elevations = np.array(elevations) - if azimuths.size == 1 and elevations.size == 1: - return self._compute_gains_point(azimuths, elevations) - elif np.squeeze(azimuths).ndim == 1 and np.squeeze(elevations).ndim == 1: - gains = [] - for a, e in zip(azimuths, elevations): - gains.append(self._compute_gains_point(a, e)) - return np.vstack(gains) - else: - raise ValueError( - "Azimuth and Elevation arrays cannot have more than one dimension and must be of equal size" - ) - - -def main(args): - """ - Parses a speaker layout text file and prints the panning gains - for the requested position - - - Parameters - ---------- - args : tuple - Command line arguments - - """ - - speaker_positions = np.loadtxt( - os.path.abspath(args.input), delimiter=",", max_rows=2 - ) - panner = EFAP(speaker_positions[0, :], speaker_positions[1, :], args.efip) - print(panner.pan(args.azimuth, args.elevation)) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Edge-Fading Amplitude Panning") - parser.add_argument( - "-i", - "--input", - metavar="layout_file", - required=True, - type=str, - help="IVAS compatible loudspeaker layout file (Loudspeaker azimuths in first line, elevations in second, subsequent lines are ignored)", - ) - parser.add_argument( - "-efip", - "-intensity_panning", - default=False, - action="store_true", - help="Intensity panning mode (EFIP)", - ) - parser.add_argument( - "azimuth", - type=float, - help="Azimuth of direction to compute panning gains for (positive-left)", - ) - parser.add_argument( - "elevation", - type=float, - help="Elevation of direction to compute panning gains for (positive-up)", - ) - args = parser.parse_args() - main(args) diff --git a/scripts/pyaudio3dtools/HRIRs_mat/ORANGE_HRIR_53_48000_SBA3.mat b/scripts/pyaudio3dtools/HRIRs_mat/ORANGE_HRIR_53_48000_SBA3.mat deleted file mode 100644 index 0d113a34af..0000000000 --- a/scripts/pyaudio3dtools/HRIRs_mat/ORANGE_HRIR_53_48000_SBA3.mat +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:02c8a25178b36399054c1802f00bb5a8739f3ac950c21b0c760c046b1dba530d -size 36201 diff --git a/scripts/pyaudio3dtools/HRIRs_mat/ORANGE_HRIR_53_48000_combined.mat b/scripts/pyaudio3dtools/HRIRs_mat/ORANGE_HRIR_53_48000_combined.mat deleted file mode 100644 index e52e031e8c..0000000000 --- a/scripts/pyaudio3dtools/HRIRs_mat/ORANGE_HRIR_53_48000_combined.mat +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9660be83192f7babb4f67e19653a94bc02cee7b3071065880cf618547c19d842 -size 20138 diff --git a/scripts/pyaudio3dtools/HRIRs_mat/ORANGE_HRIR_53_48000_full.mat b/scripts/pyaudio3dtools/HRIRs_mat/ORANGE_HRIR_53_48000_full.mat deleted file mode 100644 index f2c22c39ec..0000000000 --- a/scripts/pyaudio3dtools/HRIRs_mat/ORANGE_HRIR_53_48000_full.mat +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:05ae461fc303c8498d5912ebe37cd1601c077c2505baf3da3dbe12b37e8f2cf0 -size 14097574 diff --git a/scripts/pyaudio3dtools/__init__.py b/scripts/pyaudio3dtools/__init__.py index 9870fb6620..33a5d39126 100644 --- a/scripts/pyaudio3dtools/__init__.py +++ b/scripts/pyaudio3dtools/__init__.py @@ -43,10 +43,5 @@ class from . import ( audioarray, audiofile, - binauralrenderer, - hoadecoder, - spatialaudioconvert, spatialaudioformat, - spatialmetadata, ) -from .EFAP import EFAP diff --git a/scripts/pyaudio3dtools/audio3dtools.py b/scripts/pyaudio3dtools/audio3dtools.py index e6c03d7b73..abf1163041 100755 --- a/scripts/pyaudio3dtools/audio3dtools.py +++ b/scripts/pyaudio3dtools/audio3dtools.py @@ -47,245 +47,10 @@ logger.setLevel(logging.DEBUG) def main(): - parser = argparse.ArgumentParser( - description="Audio3DTools: Convert/Manipulate spatial audio files." + raise NotImplementedError( + f"These scripts have been deprecated! Please check out and use the latest version from https://forge.3gpp.org/rep/ivas-codec-pc/ivas-processing-scripts.git" ) - """ Required arguments """ - parser.add_argument( - "-i", - "--infiles", - required=True, - type=str, - help="input file *.wav or *.pcm or directory", - default=None, - ) - parser.add_argument( - "-o", - "--outdir", - required=True, - type=str, - help="output file *.wav or directory", - default="out", - ) - parser.add_argument( - "-f", - "--informat", - required=True, - type=str, - metavar="INFORMAT", - help="Input format (use -l/-L for a list)", - default=None, - ) - - """ Additional arguments """ - parser.add_argument( - "-F", - "--outformat", - type=str, - metavar="OUTFORMAT", - help="Output format (default = %(default)s, same as input format). Can be a custom loudspeaker layout file.", - default=None, - ) - parser.add_argument( - "-s", - "--infs", - type=int, - help="Input sampling rate (Hz) (default = %(default)s, deduced for input file)", - default=None, - ) - parser.add_argument( - "-S", - "--outfs", - type=int, - help="Output sampling rate (Hz) (default = %(default)s, same as input)", - default=None, - ) - parser.add_argument( - "-c", - "--inchan", - type=int, - help="Input number of channels (default = %(default)s, deduced for input file)", - default=None, - ) - parser.add_argument( - "-m", - "--metadata", - type=str, - nargs="+", - help="list of input metadata files (only relevant for ISM and MASA input)", - default=None, - ) - parser.add_argument( - "-fc", - "--outfc", - type=int, - help="Cut-off freq for eventual low-pass filtering (default = %(default)s)", - default=None, - ) - parser.add_argument( - "-T", - "--trajectory", - type=str, - help="Head-tracking trajectory file (default = %(default)s)", - default=None, - ) - parser.add_argument( - "-n", - "--normalize", - default=None, - type=int, - help="Normalize to given loudness with --LOUDNESS_TOOL (default = %(default)s)", - ) - - """ Miscellaneous or meta arguments """ - parser.add_argument( - "-b", - "--binaural", - help="Binauralize output *in addition to converting to output format", - action="store_true", - ) - parser.add_argument( - "--binaural_dataset", - type=str, - help="Dataset to use for binaural rendering (default = %(default)s)", - choices=["orange51", "orange52", "orange53", "orange54", "sadie"], - default="orange53", - ) - parser.add_argument( - "-l", - "--list", - help="list all supported spatial audio formats", - action="store_true", - ) - parser.add_argument( - "-L", - "--long", - help="list all supported spatial audio formats with long description", - action="store_true", - ) - parser.add_argument( - "-lt", - "--loudness_tool", - default="bs1770demo", - type=str, - help="Loudness tool to use: bs1770demo [default] or sv56demo (tool must be in $PATH or a path to the binary)", - ) - parser.add_argument( - "-rn", - "--dont-rename", - help="Disable default behaviour of renaming output files _.", - action="store_true", - ) - args = parser.parse_args() - - # Set up logging handlers - console_handler = logging.StreamHandler() - console_handler.setLevel(logging.INFO) - console_handler.setFormatter(logging.Formatter("%(message)s")) - - # Configure loggers - LOGGER_FORMAT = "%(asctime)s | %(name)-12s | %(levelname)-8s | %(message)s" - LOGGER_DATEFMT = "%m-%d %H:%M" - logging.basicConfig( - format=LOGGER_FORMAT, - datefmt=LOGGER_DATEFMT, - level=logging.INFO, - handlers=[console_handler], - ) - logger.info("Audio3DTools") - logger.info( - "Attention: you are using an older version of the pyaudio3dtools scripts (not including ISM-> binaural reference renderer or loudness tool)" - ) - logger.info("For the newest version see branch python_scripts_updates") - - if args.list is True or args.long is True: - logger.info("===Supported spatial audio formats===") - spatialaudioformat.Format.list_all(args.long) - - elif args.infiles is not None: - logger.info("===Convert spatial audio file===") - # Input folder can be a path, a file or a list of files - if os.path.isdir(args.infiles): - path = args.infiles - audio_list = [ - os.path.join(path, f) for f in os.listdir(path) if f.endswith((".wav")) - ] - else: - audio_list = [args.infiles] - - outdir = args.outdir - _, output_ext = os.path.splitext(os.path.basename(outdir)) - if (len(audio_list) == 1) and ( - (output_ext.lower() == ".wav") or (output_ext.lower() == ".pcm") - ): - outfile = outdir - else: - outfile = None - if not os.path.exists(outdir): - os.makedirs(outdir) - - for infile in audio_list: - logger.info(f" process {infile}") - - _, input_ext = os.path.splitext(os.path.basename(infile)) - - if outfile is None: - outfile = os.path.basename(infile) - if not args.dont_rename: - if args.outformat is not None: - outfile = outfile.replace(input_ext, f"_{args.outformat}.wav") - else: - outfile = outfile.replace(input_ext, ".out.wav") - outfile = os.path.join(outdir, outfile) - - spatialaudioconvert.spatial_audio_convert( - infile, - outfile, - in_format=args.informat, - in_fs=args.infs, - in_nchans=args.inchan, - in_meta_files=args.metadata, - out_format=args.outformat, - out_fs=args.outfs, - out_fc=args.outfc, - output_loudness=args.normalize, - loudness_tool=args.loudness_tool, - trajectory=args.trajectory, - binaural_dataset=args.binaural_dataset, - ) - - logger.info(f" Output {outfile}") - - if args.binaural: - if args.outformat.startswith("BINAURAL"): - raise SystemExit( - "BINAURAL output format can not be binauralized again!" - ) - - _, output_ext = os.path.splitext(os.path.basename(outfile)) - outfile_bin = outfile.replace(output_ext, "_BINAURAL.wav") - logger.info(f" Output binaural {outfile_bin}") - - spatialaudioconvert.spatial_audio_convert( - in_file=outfile, - out_file=outfile_bin, - in_format=args.outformat, - in_fs=args.outfs, - in_meta_files=args.metadata, - out_format="BINAURAL", - output_loudness=args.normalize, - loudness_tool=args.loudness_tool, - trajectory=args.trajectory, - binaural_dataset=args.binaural_dataset, - ) - - outfile = None - else: - raise Exception( - "Input file must be provided for conversion and audio manipulation." - ) - if __name__ == "__main__": main() diff --git a/scripts/pyaudio3dtools/audioarray.py b/scripts/pyaudio3dtools/audioarray.py index 87fc50b463..740c40c3c6 100644 --- a/scripts/pyaudio3dtools/audioarray.py +++ b/scripts/pyaudio3dtools/audioarray.py @@ -221,7 +221,7 @@ def cut(x: np.ndarray, limits: Tuple[int, int]) -> np.ndarray: return y -def compare(ref: np.ndarray, test: np.ndarray, fs: int, per_frame: bool=True) -> dict: +def compare(ref: np.ndarray, test: np.ndarray, fs: int, per_frame: bool = True) -> dict: """Compare two audio arrays Parameters @@ -250,7 +250,7 @@ def compare(ref: np.ndarray, test: np.ndarray, fs: int, per_frame: bool=True) -> "nsamples_diff_percentage": 0.0, "first_diff_pos_sample": -1, "first_diff_pos_channel": -1, - "first_diff_pos_frame": -1 + "first_diff_pos_frame": -1, } if per_frame: result["max_abs_diff_pos_frame"] = 0 @@ -269,7 +269,7 @@ def compare(ref: np.ndarray, test: np.ndarray, fs: int, per_frame: bool=True) -> max_diff_pos[0][0] // framesize, max_diff_pos[1][0], ] - + first_diff_pos = np.nonzero(diff) first_diff_pos = [ first_diff_pos[0][0], @@ -454,7 +454,9 @@ def get_framewise(x: np.ndarray, chunk_size: int, zero_pad=False) -> np.ndarray: if x.shape[0] % chunk_size: last_chunk = x[n_frames * chunk_size :, :] if zero_pad: - yield np.pad(last_chunk, [[0, chunk_size - (x.shape[0] % chunk_size)], [0, 0]]) + yield np.pad( + last_chunk, [[0, chunk_size - (x.shape[0] % chunk_size)], [0, 0]] + ) else: yield last_chunk diff --git a/scripts/pyaudio3dtools/audiofile.py b/scripts/pyaudio3dtools/audiofile.py index 77be42285f..5b6ffcdced 100644 --- a/scripts/pyaudio3dtools/audiofile.py +++ b/scripts/pyaudio3dtools/audiofile.py @@ -695,7 +695,6 @@ def print_plot_play(x: np.ndarray, fs: int, text: Optional[str] = "") -> None: def get_wav_file_info(filename: str) -> dict: - """ Get the format information from a WAV file. Return a dictionary with the format information @@ -713,7 +712,6 @@ def get_wav_file_info(filename: str) -> dict: fid = open(filename, "rb") try: - riff = fid.read(4) if riff == b"RIFF": @@ -769,7 +767,9 @@ def get_wav_file_info(filename: str) -> dict: if __name__ == "__main__": import argparse - parser = argparse.ArgumentParser(description="Tool for basic operations on audio files") + parser = argparse.ArgumentParser( + description="Tool for basic operations on audio files" + ) subparsers = parser.add_subparsers() def pre_trim_wrapper(pre_trim_args): @@ -779,14 +779,21 @@ if __name__ == "__main__": print("Delay currently only supported with WAV file input") exit(-1) - x, _ = readfile(pre_trim_args.input_file, fs=input_file_properties["fs"], nchannels=input_file_properties["channels"]) + x, _ = readfile( + pre_trim_args.input_file, + fs=input_file_properties["fs"], + nchannels=input_file_properties["channels"], + ) trim = int(pre_trim_args.amount_in_ms * input_file_properties["fs"] / 1000) x = x[trim:] writefile(pre_trim_args.output_file, x, fs=input_file_properties["fs"]) - - parser_delay = subparsers.add_parser("pre-trim", help="Trim a given amount of content from the beginning of the file") - parser_delay.add_argument("amount_in_ms", type=float, help="Trim amount milliseconds.") + parser_delay = subparsers.add_parser( + "pre-trim", help="Trim a given amount of content from the beginning of the file" + ) + parser_delay.add_argument( + "amount_in_ms", type=float, help="Trim amount milliseconds." + ) parser_delay.add_argument("input_file") parser_delay.add_argument("output_file") parser_delay.set_defaults(func=pre_trim_wrapper) @@ -799,7 +806,8 @@ if __name__ == "__main__": convertfile(convert_args.input_file, convert_args.output_file) parser_convert = subparsers.add_parser( - "convert", help="Convert file format (output file extension determines output format)" + "convert", + help="Convert file format (output file extension determines output format)", ) parser_convert.add_argument("input_file") parser_convert.add_argument("output_file") diff --git a/scripts/pyaudio3dtools/binauralrenderer.py b/scripts/pyaudio3dtools/binauralrenderer.py deleted file mode 100644 index 2567b433db..0000000000 --- a/scripts/pyaudio3dtools/binauralrenderer.py +++ /dev/null @@ -1,782 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import logging -import os -import timeit -from typing import Tuple - -import numpy as np -import scipy.interpolate as interp -import scipy.io as sio -import scipy.signal as sig -from pyaudio3dtools.rotation import rotateHOA, rotateISM, rotateMC - -from pyaudio3dtools import audioarray, spatialaudioformat, spatialaudioconvert -from pyaudio3dtools.constants import * - -main_logger = logging.getLogger("__main__") -logger = main_logger.getChild(__name__) -logger.setLevel(logging.DEBUG) - -"""" Helper functions """ - - -def NS2SA(fs, x): - return int(int(fs / 100) * ((x) / 100) / 100000) - - -def read_hrirs_from_mat( - hrirs_path: str = "/HRIRs_mat/ORANGE_HRIR_53_48000_combined.mat", -) -> np.ndarray: - """Read HRIRs from Matlab dictionary file mat - - Parameters - ---------- - hrirs_path: str - HRTFs file name (.mat) - - Returns - ------- - IR: np.ndarray - array of impulse responses - SourcePosition: np.ndarray - array of source positions corresponding to the impulse responses - - """ - script_path = os.path.dirname(os.path.abspath(__file__)) - hrirs_filename = script_path + hrirs_path - - mat_contents = sio.loadmat(hrirs_filename) - IR = mat_contents["IR"] - try: - SourcePosition = mat_contents["SourcePosition"] - except KeyError: - SourcePosition = None - - logger.debug(f"Loaded HRIRs: {hrirs_filename}, {IR.shape[0]} by {IR.shape[1]}") - - return IR, SourcePosition - - -def get_IR( - in_spfmt: spatialaudioformat.Format, - out_spfmt: spatialaudioformat.Format, - dataset: str, -) -> Tuple[np.ndarray, np.ndarray, float]: - """get_IR - - Parameters - ---------- - in_spfmt: spatialaudioformat - input spatial audio format - out_spfmt: spatialaudioformat - output spatial audio format - dataset: str - name of the HRIRs or BRIRs dataset - - Returns - ------- - IR: np.ndarray - desired impulse response array - SourcePosition: np.ndarray - source positions of corresponding IRs - - """ - # override for BRIRs, currently only one option - if out_spfmt.name == "BINAURAL_ROOM": - dataset = "mozart_iis" - - # dataset file prefix - if dataset.lower().startswith("sadie"): - prefix = "/HRIRs_mat/SADIE_II_D2_48K_24bit_256tap" - elif dataset.lower().startswith("orange"): - prefix = f"/HRIRs_mat/ORANGE_HRIR_{dataset.replace('_full', '')[-2:]}_48000" - elif dataset.lower().startswith("mozart"): - prefix = "/BRIRs_mat/IIS_BRIR_officialMPEG_222UC" - else: - raise ValueError(f"Unsupported dataset '{dataset}' for HRIRs") - - # dataset file suffix - if in_spfmt.name.startswith("ISM") or in_spfmt.altname.startswith("CUSTOM_LS"): - suffix = "full.mat" - elif in_spfmt.isloudspeaker and in_spfmt.nchannels > 1: - suffix = "combined.mat" - elif in_spfmt.ambi_order > 0 or in_spfmt.name.upper() == "MONO": - suffix = "SBA3.mat" - else: - raise ValueError( - f"Unsupported format '{in_spfmt.name}' for dataset '{dataset}' for HRIRs" - ) - - IR, SourcePosition = read_hrirs_from_mat("_".join([prefix, suffix])) - - latency_smp = float(np.min(np.argmax(np.sum(np.abs(IR), axis=(1)), axis=(0)))) - - if in_spfmt.name.startswith("MONO"): - IR = IR[:, :, :1] # use omni/W from SBA - elif in_spfmt.name.startswith("STEREO"): - IR = IR[:, :, :2] # use L and R channels - elif in_spfmt.isloudspeaker and not in_spfmt.altname.startswith("CUSTOM_LS"): - # extract positions from the combined file - tmp_spfmt = spatialaudioformat.Format("COMBINED") - IR_tmp = IR.copy() - IR = np.zeros([IR_tmp.shape[0], IR_tmp.shape[1], in_spfmt.nchannels]) - - ir_index = 0 - for i in range(tmp_spfmt.nchannels): - for j in range(in_spfmt.nchannels): - if ( - tmp_spfmt.ls_azi[i] == in_spfmt.ls_azi[j] - and tmp_spfmt.ls_ele[i] == in_spfmt.ls_ele[j] - ): - if j != in_spfmt.lfe_index[0]: - IR[:, :, ir_index] = IR_tmp[:, :, i] - ir_index += 1 - - return IR, SourcePosition, latency_smp - - -def FindFilter(SourcePosition: np.ndarray, azi: float, ele: float) -> int: - """Find measurement closest to the selected direction, - reimplemented roughly along the lines of ConvBinauralRenderer.m - - Parameters - ---------- - SourcePosition: np.ndarray - Source IR positions - azi: float - desired response azimuth - ele: float - desired response elevation - - Returns - ------- - i_dir: int - index of nearest SourcePosition - """ - if azi < 0: - azi = azi + 360.0 - - if ele < 0: - ele = ele + 360.0 - - delta_azi = np.deg2rad(np.abs(azi - SourcePosition[:, 0])) - dist = np.arccos( - np.sin(np.deg2rad(SourcePosition[:, 2])) * np.sin(np.deg2rad(ele)) - + np.cos(np.deg2rad(SourcePosition[:, 1])) - * np.cos(np.deg2rad(ele)) - * np.cos(delta_azi) - ) - - i_dir = np.argmin(dist) - - # print('Direction closest to {}, {} is {} with angles {}, {} and distance {}\n'.format( - # azi, ele, i_dir, SourcePosition[i_dir,0], SourcePosition[i_dir,1], dist[i_dir] - # ) - # ) - - return i_dir - - -""" Core binaural rendering functions """ - - -def binaural_fftconv( - x: np.ndarray, IR: np.ndarray, nchannels: int, lfe_index: list = [] -) -> np.ndarray: - """Binauralization using fft convolution - - Parameters - ---------- - x: np array - input multi-channel array - IR: np array - HRIRs array - nchannels: int - maximum number of channels to process - lfe_index: list - list of LFE channel indices - - Returns - ------- - y: np.ndarray - output convolved signal array - - """ - y = np.zeros([x.shape[0], 2]) - for chan_idx in range(min(x.shape[1], nchannels)): - if chan_idx not in lfe_index: - y[:, 0] = np.add( - y[:, 0], - sig.fftconvolve( - x[:, chan_idx].astype(float), IR[:, 0, chan_idx] - ).astype(float)[: x.shape[0]], - ) - y[:, 1] = np.add( - y[:, 1], - sig.fftconvolve( - x[:, chan_idx].astype(float), IR[:, 1, chan_idx] - ).astype(float)[: x.shape[0]], - ) - else: - logger.debug(f" Mute LFE channel of index: {str(chan_idx)}") - - return y - - -def binaural_fftconv_framewise( - x: np.ndarray, - IR: np.ndarray, - SourcePosition: np.ndarray, - azi: np.ndarray = None, - ele: np.ndarray = None, - frame_len: int = (IVAS_FRAME_LEN_MS // 4) * 48, - interp_method="linear", - verbose=False, -) -> np.ndarray: - """Binauralization using fft convolution with frame-wise processing - supports rotation on trajectories with interpolation between measured Source - positions, reimplemented roughly along the lines of ConvBinauralRenderer.m - - Parameters - ---------- - x: np.ndarray - input multi-channel array - IR: np.ndarray - HRIRs array - SourcePosition: np.ndarray - positions of the source in the measurements in IR - azi: np.ndarray - azimuth angles for all frames - ele: np.ndarray - elevation angles for all frames - frame_len: int - frame length, optional, default = (IVAS_FRAME_LEN_MS // 4) * 48000 - interp_method: - interpolation method, optional, default = linear - - - Returns - ------- - y: np.ndarray - output binaural signal array - - """ - - sig_len = x.shape[0] - frame_len = (IVAS_FRAME_LEN_MS // 4) * 48 - N_frames = int(sig_len / frame_len) - - N_HRIR_taps = IR.shape[2] - - if azi is None or ele is None: - azi = np.repeat([0.0], N_frames) - ele = np.repeat([0.0], N_frames) - elif len(azi) < N_frames or len(ele) < N_frames: - azi = np.concatenate( - [np.repeat(azi, N_frames // len(azi)), azi[: N_frames % len(azi)]] - ) - ele = np.concatenate( - [np.repeat(ele, N_frames // len(ele)), ele[: N_frames % len(ele)]] - ) - - iGs = np.zeros([N_frames + 1], dtype=int) - mGs = np.zeros([N_frames + 1], dtype=int) - - # store trajectory as a sequence of indices of source positions - # on the HRTF database in a compressed format such that, for - # each new measurement point the trajectory hits, the sample index - # is stored in mGs and the index of the measurement in iG - # the number of measurement points hit by the trajectory is nsp - isp = 0 - iGs[0] = FindFilter(SourcePosition, azi[0], ele[0]) - mGs[0] = 0 - for i_frame in range(1, N_frames): - iG = FindFilter(SourcePosition, azi[i_frame], ele[i_frame]) - if iG != iGs[isp]: - isp += 1 - iGs[isp] = iG - mGs[isp] = i_frame * frame_len + 1 - nsp = isp + 1 - - # set last fence post explicitly - if mGs[nsp] < sig_len: - iGs[nsp] = iG - mGs[nsp] = sig_len - nsp = nsp + 1 - - T_rev = frame_len + N_HRIR_taps - 1 - N_rev = int(np.ceil(T_rev / frame_len)) - - if verbose: - print(" N_rev = ", N_rev) - - fastcode = True - if N_rev > 5: - if verbose: - print( - " __ __ ___ ___ _ _ ___ _ _ ___ " - ) - print( - r" \ \ / / / \ | _ \ | \| | |_ _| | \| | / __|" - ) - print( - r" \ \/\/ / | - | | / | . | | | | . | | (_ |" - ) - print( - r" \_/\_/ |_|_| |_|_\ |_|\_| |___| |_|\_| \___|" - ) - print( - " " - ) - print( - " You are using very long filters! This will be slooooow and use a lot of memory!" - ) - else: - fastcode = False - - if fastcode and verbose: - print( - " __ __ ___ ___ _ _ ___ _ _ ___ " - ) - print( - r" \ \ / / / \ | _ \ | \| | |_ _| | \| | / __|" - ) - print( - r" \ \/\/ / | - | | / | . | | | | . | | (_ |" - ) - print( - r" \_/\_/ |_|_| |_|_\ |_|\_| |___| |_|\_| \___|" - ) - print( - " " - ) - print( - " To speed up the otherwise extremely slow calculation, we only calculate the " - ) - print( - " responses of the latest frame with the latest filters instead of the full " - ) - print( - " integrals. This is much faster but much more prone to clicks. Inspect your " - ) - print( - " output signals carefully! To change this behavior, go to binauralrenderer.py " - ) - print( - " and set fastcode to False. " - ) - - y = np.zeros([sig_len + T_rev, 2]) - y0 = np.zeros([N_rev, sig_len + T_rev, 2]) - - fade_in = np.arange(frame_len) / (frame_len - 1) - fade_in = fade_in[:, np.newaxis] - fade_out = 1.0 - fade_in - - for i_ear in [0, 1]: - - Gs = IR[ - iGs[0:nsp], i_ear, : - ] # Green's function along the trajectory sampled by the measurement points - interp_G = interp.interp1d( - mGs[0:nsp], Gs, kind=interp_method, axis=0 - ) # interpolator for Green's function between those points - - G = interp_G(np.arange(0, sig_len, frame_len)) - - t0 = timeit.default_timer() - - if fastcode: - for i_frame in range(N_frames): - - i1 = i_frame * frame_len - i2 = (i_frame + 1) * frame_len - i2p = i1 + T_rev - - fade_out = np.linspace(0.0, 1.0, T_rev, endpoint=False) - fade_in = 1.0 - fade_out - - for j_frame in [0, 1]: - G_n_m = G[min(j_frame + i_frame, N_frames - 1), :] - y0[j_frame, i1:i2p, i_ear] = sig.oaconvolve( - np.squeeze(x[i1:i2]), G_n_m - ) - - y[i1:i2p, i_ear] = ( - np.squeeze(fade_out) * y0[0, i1:i2p, i_ear] - + np.squeeze(fade_in) * y0[1, i1:i2p, i_ear] - ) - - t1 = timeit.default_timer() - fps = (i_frame + 1) / (t1 - t0) - eta = (2 * N_frames - (i_frame + 1) + i_ear * N_frames) / fps - - if verbose: - print( - " Frame {}/{} on ear {}/2 done at {: 3.1f} fps, ETA {: 6.0f} s ".format( - i_frame + 1, N_frames, i_ear + 1, fps, eta - ), - end="\r", - ) - - else: - for i_frame in range(N_frames): - - i1 = i_frame * frame_len - i2 = (i_frame + 1) * frame_len - i2p = i1 + T_rev - - y0[:] = 0.0 - for j_frame in range( - max(0, i_frame - N_rev), min(i_frame + 1, N_frames) - ): - - j1 = j_frame * frame_len - j2 = (j_frame + 1) * frame_len - j2p = j1 + T_rev - - G0 = G[i_frame] - G1 = G[min(i_frame + 1, N_frames - 1)] - - y0[0, j1:j2p, i_ear] += sig.oaconvolve(np.squeeze(x[j1:j2]), G0) - y0[1, j1:j2p, i_ear] += sig.oaconvolve(np.squeeze(x[j1:j2]), G1) - - y[i1:i2, i_ear] = ( - np.squeeze(fade_out) * y0[0, i1:i2, i_ear] - + np.squeeze(fade_in) * y0[1, i1:i2, i_ear] - ) - - t1 = timeit.default_timer() - fps = (i_frame + 1) / (t1 - t0) - eta = (2 * N_frames - (i_frame + 1) + i_ear * N_frames) / fps - - if verbose: - print( - " Frame {}/{} on ear {}/2 done at {: 3.1f} fps, ETA {: 6.0f} s ".format( - i_frame + 1, N_frames, i_ear + 1, fps, eta - ), - end="\r", - ) - - if verbose: - print("") - - return y[0:sig_len] - - -def binaural_render_LFE( - x: np.ndarray, - fs: int = 48000, - lfe_index: list = [3], - LFE_gain: float = 10 ** (5.5 / 20), - latency_smp: int = 0, -) -> np.ndarray: - """ - Extract LFE from the given input and render - it binaurally, accounting for delay of the - """ - - lfe = x[:, lfe_index].copy() - - # if there is more than one LFE sum them into one - if lfe.shape[1] > 1: - lfe = np.sum(lfe, axis=1) - - # TODO tmu - disabled temporarily here, disabled in C - lfe_delay_ns = 0 - """ - # 120 Hz low-pass filtering for LFE using IVAS filter coefficients - if fs == 48000: - lfe = sig.sosfilt(IVAS_LPF_4_BUTTER_48K_SOS, lfe, axis=0) - else: - raise NotImplementedError("Only 48 kHz supported at the moment!") - - # 3.5ms LP filter delay from IVAS ROM - lfe_delay_ns = 0.0035 * 1e9 - lfe_delay_smp = round(lfe_delay_ns * fs / 1e9) - - # Delay LFE by the same amount as the HRTF delay - lfe = np.roll(lfe, round(latency_smp), axis=0) - lfe[0 : round(latency_smp), :] = 0 - """ - - # apply gain - lfe *= LFE_gain - - # duplicate for each binaural channel - lfe = np.hstack([lfe, lfe]) - - return lfe, lfe_delay_ns - - -""" Format specific wrapper functions """ - - -def render_custom_ls_binaural( - x: np.ndarray, - fs: int, - in_spfmt: spatialaudioformat.Format, - out_spfmt: spatialaudioformat.Format, - IR: np.ndarray, - SourcePosition: np.ndarray, - trajectory: np.ndarray, -) -> np.ndarray: - - ls_azi_all = in_spfmt.ls_azi - ls_ele_all = in_spfmt.ls_ele - lfe_index_all = in_spfmt.lfe_index - - logger.info(" Processing channels on custom LS layout") - azis = ", ".join([f"{a:7.2f}" for a in ls_azi_all]) - eles = ", ".join([f"{e:7.2f}" for e in ls_ele_all]) - logger.info(f" azi: {azis}") - logger.info(f" ele: {eles}") - logger.info(f" lfe_index: {lfe_index_all}") - - if out_spfmt.name == "BINAURAL_ROOM": - tmp_spfmt = spatialaudioformat.Format("7_1_4") - x = spatialaudioconvert.convert_mc(x, in_spfmt, tmp_spfmt) - ls_azi_all = tmp_spfmt.ls_azi - ls_ele_all = tmp_spfmt.ls_ele - lfe_index_all = tmp_spfmt.lfe_index - logger.info(f" {in_spfmt.name} -> {tmp_spfmt.name} -> {out_spfmt.name}") - - frame_len = (IVAS_FRAME_LEN_MS // 4) * (fs // 1000) - sig_len = x.shape[0] - N_frames = int(sig_len / frame_len) - - i_ls = 0 - y = np.zeros([sig_len, 2]) - for i_chan in range(x.shape[1]): - - # skip LFE - if i_chan in lfe_index_all: - continue - - # skip silent (or very low volume) channels - if np.allclose(x[:, i_chan], 0.0, atol=32.0): - continue - - ls_azi = np.repeat(ls_azi_all[i_ls], N_frames) - ls_ele = np.repeat(ls_ele_all[i_ls], N_frames) - - azi, ele = rotateISM(ls_azi, ls_ele, trajectory=trajectory) - - y += binaural_fftconv_framewise( - x[:, i_chan], - IR, - SourcePosition, - frame_len=frame_len, - azi=azi, - ele=ele, - verbose=False, - ) - i_ls += 1 - - return y - - -def render_ism_binaural( - x: np.ndarray, - fs: int, - IR: np.ndarray, - SourcePosition: np.ndarray, - trajectory: np.ndarray, - in_pos: np.ndarray, -) -> np.ndarray: - - frame_len = (IVAS_FRAME_LEN_MS // 4) * (fs // 1000) - sig_len = x.shape[0] - N_frames = int(sig_len / frame_len) - - # get ISM metadata and repeat it nsubframe times - pos_data = [] - for pos in in_pos: - pos_data.extend( - [pos["azimuth"], pos["elevation"]] for _ in range(pos["use_for_frames"]) - ) - pos_data = np.array(pos_data) - pos_data = np.repeat(pos_data, 4, axis=0) - - # extract positions only according to the audio duration - pos_data = pos_data[:N_frames, :] - - azi, ele = rotateISM(pos_data[:, 0], pos_data[:, 1], trajectory=trajectory) - - y = np.zeros([sig_len, 2]) - y += binaural_fftconv_framewise( - x, - IR, - SourcePosition, - frame_len=frame_len, - azi=azi, - ele=ele, - verbose=False, - ) - - return y - - -def render_masa_binaural( - x: np.ndarray, - fs: int, - in_spfmt: spatialaudioformat.Format, - IR: np.ndarray, - SourcePosition: np.ndarray, - trajectory: np.ndarray, -): - y = x[:, :2] - # TODO - return y - - -def render_ambi_ls_binaural( - x: np.ndarray, - fs: int, - in_spfmt: spatialaudioformat.Format, - IR: np.ndarray, - trajectory: np.ndarray, -) -> np.ndarray: - - y = x[:] - if trajectory is not None: - if in_spfmt.ambi_order > 0: - y = rotateHOA(y, trajectory) - if in_spfmt.isloudspeaker: - y = rotateMC(y, trajectory, in_spfmt) - - y = binaural_fftconv(y, IR, in_spfmt.nchannels, in_spfmt.lfe_index) - - return y - - -""" Wrapper function for generic binaural rendering """ - - -def binaural_rendering( - x: np.ndarray, - in_spfmt: spatialaudioformat.Format, - out_spfmt: spatialaudioformat.Format, - dataset: str = "orange53", - fs: int = 48000, - trajectory: str = None, - include_LFE: bool = False, - LFE_gain: float = 10 ** (5.5 / 20), - in_pos: dict = None, -): - """Binaural rendering - - Parameters - ---------- - x: np array - input multi-channel array - in_spfmt_name: str - name of input spatial format - dataset: str - name of the HRIRs or BRIRs dataset - fs: int - input/output sampling-rate (default 48kHz) - trajectory: str - path to trajectory file - - Returns - ------- - y: np.ndarray - output binaural signal array - - """ - - if trajectory is not None: - logger.info( - " performing rotation along trajectory from file {}".format(trajectory) - ) - - # resample to 48 kHz - y = audioarray.resample(x, fs, 48000) - delay_total_ns = 0 - - # get IR corresponding to the input and output formats - IR, SourcePosition, latency_smp = get_IR(in_spfmt, out_spfmt, dataset) - delay_total_ns += latency_smp / float(fs) * 1e9 - - # prepare LFE signal to be added to output - if include_LFE and in_spfmt.isloudspeaker and in_spfmt.lfe_index: - lfe, lfe_delay_ns = binaural_render_LFE( - x, 48000, in_spfmt.lfe_index, LFE_gain, latency_smp - ) - delay_total_ns += lfe_delay_ns - - # get binauralized signal based on format - if in_spfmt.altname.startswith("CUSTOM_LS"): - y = render_custom_ls_binaural( - x, fs, in_spfmt, out_spfmt, IR, SourcePosition, trajectory - ) - elif in_spfmt.name.startswith("ISM"): - if not in_pos: - raise ValueError("ISM metadata empty!") - y = render_ism_binaural( - x, - fs, - IR, - SourcePosition, - trajectory, - in_pos, - ) - elif in_spfmt.name.startswith("MASA"): - y = render_masa_binaural(x, fs, in_spfmt, IR, SourcePosition, trajectory) - elif in_spfmt.ambi_order > 0 or in_spfmt.isloudspeaker: - y = render_ambi_ls_binaural(x, fs, in_spfmt, IR, trajectory) - else: - raise NotImplementedError( - f"{in_spfmt.name} -> {out_spfmt.name}: format conversion not implemented" - ) - - # add LFE signal to output - if include_LFE and in_spfmt.isloudspeaker and in_spfmt.lfe_index: - # delay the binauralized signal by the LFE delay - lfe_delay_smp = NS2SA(fs, int(lfe_delay_ns)) - y = np.roll(y, lfe_delay_smp, axis=0) - y[0:lfe_delay_smp, :] = 0 - y += lfe - - # delay compensation - delay_total_smp = NS2SA(fs, delay_total_ns) - y = np.roll(y, -delay_total_smp, axis=0) - if delay_total_smp > 0: - y[-delay_total_smp:, :] = 0 - - # resample back to original rate - y = audioarray.resample(y, 48000, fs) - - return y diff --git a/scripts/pyaudio3dtools/hoadecoder.py b/scripts/pyaudio3dtools/hoadecoder.py deleted file mode 100644 index c37ceb3bdc..0000000000 --- a/scripts/pyaudio3dtools/hoadecoder.py +++ /dev/null @@ -1,186 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import logging -import os -from typing import Optional - -import numpy as np -from scipy.special import lpmv - -from pyaudio3dtools import spatialaudioformat -from pyaudio3dtools.constants import T_DESIGN_11_AZI, T_DESIGN_11_ELE -from pyaudio3dtools.EFAP import EFAP - -main_logger = logging.getLogger("__main__") -logger = main_logger.getChild(__name__) -logger.setLevel(logging.DEBUG) - - -def get_hoa_mtx( - ambi_order: int, - spkrlayout: spatialaudioformat, - norm: Optional[str] = "sn3d", - rE_weight: Optional[bool] = False, - intensity_panning: Optional[bool] = True, -) -> np.ndarray: - nharm = spatialaudioformat.Format.nchannels_from_ambiorder(ambi_order) - if spkrlayout.name == "MONO": - mtx_hoa_dec = np.zeros([1, nharm]) - mtx_hoa_dec[0, 0] = 1 - elif spkrlayout.name == "STEREO": - mtx_hoa_dec = np.zeros([2, nharm]) - # Cardioids +/- 90 degrees - mtx_hoa_dec[0, 0] = 0.5 - mtx_hoa_dec[0, 1] = 0.5 - mtx_hoa_dec[1, 0] = 0.5 - mtx_hoa_dec[1, 1] = -0.5 - elif spkrlayout.isloudspeaker: - Y_td = getRSH( - T_DESIGN_11_AZI, - T_DESIGN_11_ELE, - ambi_order, - norm="ortho", - ) - Y_td *= np.sqrt(4 * np.pi) - - n_ls_woLFE = spkrlayout.nchannels - len(spkrlayout.lfe_index) - ls_azi_woLFE = np.delete(spkrlayout.ls_azi, spkrlayout.lfe_index).astype(float) - ls_ele_woLFE = np.delete(spkrlayout.ls_ele, spkrlayout.lfe_index).astype(float) - - panner = EFAP(ls_azi_woLFE, ls_ele_woLFE, intensity_panning) - G_td = panner.pan(T_DESIGN_11_AZI, T_DESIGN_11_ELE) - - mtx_hoa_dec = (G_td.T @ Y_td.T) / T_DESIGN_11_AZI.size - - if norm == "sn3d": - mtx_hoa_dec = mtx_hoa_dec @ np.diag(sn2n(ambi_order)) - elif norm == "ortho": - mtx_hoa_dec *= np.sqrt(4 * np.pi) - - if rE_weight: - a_n = rE_weight(ambi_order) - nrg_pre = np.sqrt(len(n_ls_woLFE) / np.sum(a_n**2)) - mtx_hoa_dec = mtx_hoa_dec @ np.diag(a_n) * nrg_pre - - mtx_hoa_dec = np.insert( - mtx_hoa_dec, spkrlayout.lfe_index, np.zeros(nharm), axis=0 - ) - else: - raise ValueError( - f"Unsupported spatial audio format for ALLRAD: {spkrlayout.name}" - ) - - return mtx_hoa_dec - - -def hoa_linear_decoding(signal_in: np.ndarray, mtx_hoa_dec: np.ndarray) -> np.ndarray: - if not signal_in.shape[1] == mtx_hoa_dec.shape[1]: - raise Exception( - "Input number of channels must be equal to renderer matrix second dimension" - ) - - signal_out = np.dot(signal_in, mtx_hoa_dec.transpose()) - - logger.debug(f"Signal out: {signal_out.shape[0]} by {signal_out.shape[1]}") - - return signal_out - - -def rE_weight(order: int) -> np.ndarray: - return np.array( - [ - lpmv(0, l, np.cos(np.deg2rad(137.9) / (order + 1.51))) - for l in range(order + 1) - for _ in range(-l, l + 1) - ] - ).T - - -def n2sn(order: int) -> np.ndarray: - return np.array( - [1.0 / np.sqrt(2 * l + 1) for l in range(order + 1) for _ in range(-l, l + 1)] - ) - - -def sn2n(order: int) -> np.ndarray: - return np.array( - [np.sqrt(2 * l + 1) for l in range(order + 1) for _ in range(-l, l + 1)] - ) - - -def getRSH( - azi: np.ndarray, - ele: np.ndarray, - ambi_order: int, - norm: Optional[str] = "sn3d", - degrees: Optional[bool] = True, -) -> np.ndarray: - """ - Returns real spherical harmonic response for the given position(s) - """ - if degrees: - azi = np.deg2rad(azi) - ele = np.deg2rad(ele) - - LM = np.array([(l, m) for l in range(ambi_order + 1) for m in range(-l, l + 1)]) - - response = np.zeros([LM.shape[0], azi.shape[0]]) - - # trig_term * legendre * uncondon - for i, (l, m) in enumerate(LM): - # N3D norm - response[i, :] = np.sqrt( - ((2 * l + 1) * np.math.factorial(l - np.abs(m))) - / (4 * np.pi * np.math.factorial(l + np.abs(m))) - ) - - # trig term - if m < 0: - response[i, :] *= np.sqrt(2) * np.sin(azi * np.abs(m)) - elif m == 0: - pass # response[i,:] *= 1 - else: - response[i, :] *= np.sqrt(2) * np.cos(azi * m) - - # legendre polynomial - response[i, :] *= lpmv(np.abs(m), l, np.sin(ele)) * ((-1) ** np.abs(m)) - - if norm == "sn3d": - response *= np.sqrt(4 * np.pi) - response[:] = np.diag(n2sn(ambi_order)) @ response - elif norm == "n3d": - response *= np.sqrt(4 * np.pi) - else: - pass # ortho - - return response diff --git a/scripts/pyaudio3dtools/masarenderer.py b/scripts/pyaudio3dtools/masarenderer.py deleted file mode 100644 index ff9a3612a4..0000000000 --- a/scripts/pyaudio3dtools/masarenderer.py +++ /dev/null @@ -1,112 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - - -import os -import shutil -import subprocess as sp -from tempfile import TemporaryDirectory - -import numpy as np - -from pyaudio3dtools.audiofile import readfile, writefile -from pyaudio3dtools.spatialaudioformat import Format - - -def render_masa( - in_sig: str, - in_meta: str, - in_spfmt: Format, - out_spfmt: Format, -) -> np.ndarray: - """Python wrapper for masaRenderer binaray - - Parameters - ---------- - in_sig: np.ndarray - Input signal with MASA transport channels - in_meta: str - Input MASA metadata file - in_spfmt: Format - Input spatial audio format - out_spfmt: Format - Output spatial audio format - - Returns - ------- - out_sig: np.ndarray - Rendered signal - fs : int - Sampling frequency (always 48 kHz for masaRenderer) - """ - - if shutil.which("masaRenderer") is None: - raise FileNotFoundError("The masaRenderer binary was not found in path!") - - with TemporaryDirectory() as tmp_dir: - MASA_RENDERER_CMD = [ - "masaRenderer", - "", # outputMode -LS51, -LS714 or BINAURAL - "", # input PCM - in_meta[0], - "", # output PCM - ] - - cmd = MASA_RENDERER_CMD[:] - if out_spfmt.name.startswith("BINAURAL"): - cmd[1] = "-BINAURAL" - out_nchan = 2 - elif out_spfmt.name == "5_1": - cmd[1] = "-LS51" - out_nchan = 6 - else: - cmd[1] = "-LS714" - out_nchan = 12 - - tmp_in = os.path.join(tmp_dir, "tmp_masa_in.pcm") - tmp_out = os.path.join(tmp_dir, "tmp_masa_out.pcm") - - cmd[2] = tmp_in - cmd[4] = tmp_out - - writefile(tmp_in, in_sig, 48000) - - try: - result = sp.run(cmd, check=True, capture_output=True, text=True) - except sp.CalledProcessError as e: - raise SystemError( - f"Command returned non-zero exit status ({e.returncode}): {' '.join(e.cmd)}\n{e.stderr}\n{e.stdout}" - ) - - out_sig, _ = readfile(tmp_out, out_nchan, 48000) - - return out_sig diff --git a/scripts/pyaudio3dtools/quaternions/__init__.py b/scripts/pyaudio3dtools/quaternions/__init__.py deleted file mode 100644 index 8f1f04c369..0000000000 --- a/scripts/pyaudio3dtools/quaternions/__init__.py +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -""" -Quaternions -==== - -Provides - Handling of quaternions in the same conventions as in IVAS and the Matlab scripts - -Imports -------- -functions -""" -from . import functions diff --git a/scripts/pyaudio3dtools/quaternions/functions.py b/scripts/pyaudio3dtools/quaternions/functions.py deleted file mode 100644 index 6b30ccb5ee..0000000000 --- a/scripts/pyaudio3dtools/quaternions/functions.py +++ /dev/null @@ -1,183 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -from typing import Tuple -import numpy as np - - -def Quat2Euler(quat: np.ndarray, degrees: bool = True): - "Convert Quaternion to Euler angles" - - sinr = +2.0 * (quat[..., 0] * quat[..., 1] + quat[..., 2] * quat[..., 3]) - cosr = +1.0 - 2.0 * (quat[..., 1] * quat[..., 1] + quat[..., 2] * quat[..., 2]) - roll = np.arctan2(sinr, cosr) - - sinp = +2.0 * (quat[..., 0] * quat[..., 2] - quat[..., 3] * quat[..., 1]) - pitch = np.where(np.fabs(sinp) >= 1, np.copysign(np.pi / 2, sinp), np.arcsin(sinp)) - - siny = +2.0 * (quat[..., 0] * quat[..., 3] + quat[..., 1] * quat[..., 2]) - cosy = +1.0 - 2.0 * (quat[..., 2] * quat[..., 2] + quat[..., 3] * quat[..., 3]) - yaw = np.arctan2(siny, cosy) - - ypr = np.array([yaw, pitch, roll]).T - - if degrees: - ypr = np.rad2deg(ypr) - - return ypr - - -def Euler2Quat(ypr: np.ndarray, degrees: bool = True): - "Convert Euler angles to Quaternion" - - if degrees: - ypr = np.deg2rad(ypr) - - if len(ypr.shape) == 2: - N_quat = ypr.shape[0] - quat = np.zeros([N_quat, 4]) - yaw = ypr[:, 0] - pitch = ypr[:, 1] - roll = ypr[:, 2] - else: - quat = np.zeros([4]) - yaw = ypr[0] - pitch = ypr[1] - roll = ypr[2] - - c1 = np.cos(0.5 * yaw) - c2 = np.cos(0.5 * pitch) - c3 = np.cos(0.5 * roll) - - s1 = np.sin(0.5 * yaw) - s2 = np.sin(0.5 * pitch) - s3 = np.sin(0.5 * roll) - - quat[..., 0] = c3 * c2 * c1 + s3 * s2 * s1 - quat[..., 1] = s3 * c2 * c1 - c3 * s2 * s1 - quat[..., 2] = s3 * c2 * s1 + c3 * s2 * c1 - quat[..., 3] = c3 * c2 * s1 - s3 * s2 * c1 - - return quat - - -def Quat2RotMat(quat: np.ndarray): - "Convert quaternion to rotation matrix" - - R = np.zeros([3, 3]) - - if quat[0] != -3: - - # Quaternions - # formula taken from ivas_rotation.c - - R[0, 0] = ( - quat[0] * quat[0] - + quat[1] * quat[1] - - quat[2] * quat[2] - - quat[3] * quat[3] - ) - R[0, 1] = 2.0 * (quat[1] * quat[2] - quat[0] * quat[3]) - R[0, 2] = 2.0 * (quat[1] * quat[3] + quat[0] * quat[2]) - - R[1, 0] = 2.0 * (quat[1] * quat[2] + quat[0] * quat[3]) - R[1, 1] = ( - quat[0] * quat[0] - - quat[1] * quat[1] - + quat[2] * quat[2] - - quat[3] * quat[3] - ) - R[1, 2] = 2.0 * (quat[2] * quat[3] - quat[0] * quat[1]) - - R[2, 0] = 2.0 * (quat[1] * quat[3] - quat[0] * quat[2]) - R[2, 1] = 2.0 * (quat[2] * quat[3] + quat[0] * quat[1]) - R[2, 2] = ( - quat[0] * quat[0] - - quat[1] * quat[1] - - quat[2] * quat[2] - + quat[3] * quat[3] - ) - - else: - - # Euler angles in R_X(roll)*R_Y(pitch)*R_Z(yaw) convention - # - # yaw: rotate scene counter-clockwise in the horizontal plane - # pitch: rotate scene in the median plane, increase elevation with positive values - # roll: rotate scene from the right ear to the top - # - # formula taken from ivas_rotation.c - - c1 = np.cos(quat[3] / 180.0 * np.pi) - c2 = np.cos(quat[2] / 180.0 * np.pi) - c3 = np.cos(quat[1] / 180.0 * np.pi) - - s1 = np.sin(quat[3] / 180.0 * np.pi) - s2 = np.sin(-quat[2] / 180.0 * np.pi) - s3 = np.sin(quat[1] / 180.0 * np.pi) - - R[0, 0] = c2 * c3 - R[0, 1] = -c2 * s3 - R[0, 2] = s2 - - R[1, 0] = c1 * s3 + c3 * s1 * s2 - R[1, 1] = c1 * c3 - s1 * s2 * s3 - R[1, 2] = -c2 * s1 - - R[2, 0] = s1 * s3 - c1 * c3 * s2 - R[2, 1] = c3 * s1 + c1 * s2 * s3 - R[2, 2] = c1 * c2 - - return R - - -def rotateAziEle( - azi: float, ele: float, R: np.ndarray, is_planar: bool = False -) -> Tuple[float, float]: - w = np.cos(np.deg2rad(ele)) - dv = np.array( - [ - w * np.cos(np.deg2rad(azi)), - w * np.sin(np.deg2rad(azi)), - np.sin(np.deg2rad(ele)), - ] - ) - - dv_rot = R @ dv - - azi = np.rad2deg(np.arctan2(dv_rot[1], dv_rot[0])) - if is_planar: - ele = 0 - else: - ele = np.rad2deg(np.arctan2(dv_rot[2], np.sqrt(np.sum(dv_rot[:2] ** 2)))) - - return azi, ele diff --git a/scripts/pyaudio3dtools/rotation.py b/scripts/pyaudio3dtools/rotation.py deleted file mode 100644 index cbd76aef3f..0000000000 --- a/scripts/pyaudio3dtools/rotation.py +++ /dev/null @@ -1,346 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import numpy as np - -from pyaudio3dtools import EFAP, spatialaudioformat -from pyaudio3dtools.constants import * -from pyaudio3dtools.quaternions.functions import Quat2RotMat, rotateAziEle - -######################################################################### -# Helper functions used by Ruedenberg, -# an implementation of the algorithm in -# Ivanic, J. & Ruedenberg, K., J. Phys. Chem. 100, 6342 (1996) -# translated from ivas_rotation.c -######################################################################### - - -def SHrot_p( - i: int, l: int, a: int, b: int, SHrotmat: np.ndarray, R_lm1: np.ndarray -) -> float: - """Helper function to calculate the ps""" - - ri1 = SHrotmat[i + 1 + 1][1 + 1 + 1] - rim1 = SHrotmat[i + 1 + 1][-1 + 1 + 1] - ri0 = SHrotmat[i + 1 + 1][0 + 1 + 1] - - if b == -l: - R_lm1_1 = R_lm1[a + l - 1][0] - R_lm1_2 = R_lm1[a + l - 1][2 * l - 2] - p = ri1 * R_lm1_1 + rim1 * R_lm1_2 - else: - if b == l: - R_lm1_1 = R_lm1[a + l - 1][2 * l - 2] - R_lm1_2 = R_lm1[a + l - 1][0] - p = ri1 * R_lm1_1 - rim1 * R_lm1_2 - else: - R_lm1_1 = R_lm1[a + l - 1][b + l - 1] - p = ri0 * R_lm1_1 - - return p - - -def SHrot_u(l: int, m: int, n: int, SHrotmat: np.ndarray, R_lm1: np.ndarray) -> float: - """Helper function to calculate the us""" - return SHrot_p(0, l, m, n, SHrotmat, R_lm1) - - -def SHrot_v(l: int, m: int, n: int, SHrotmat: np.ndarray, R_lm1: np.ndarray) -> float: - """Helper function to calculate the vs""" - - if m == 0: - p0 = SHrot_p(1, l, 1, n, SHrotmat, R_lm1) - p1 = SHrot_p(-1, l, -1, n, SHrotmat, R_lm1) - return p0 + p1 - else: - if m > 0: - d = 1.0 if (m == 1) else 0.0 - p0 = SHrot_p(1, l, m - 1, n, SHrotmat, R_lm1) - p1 = SHrot_p(-1, l, -m + 1, n, SHrotmat, R_lm1) - return p0 * np.sqrt(1.0 + d) - p1 * (1.0 - d) - else: - d = 1.0 if (m == -1) else 0.0 - p0 = SHrot_p(1, l, m + 1, n, SHrotmat, R_lm1) - p1 = SHrot_p(-1, l, -m - 1, n, SHrotmat, R_lm1) - return p0 * (1.0 - d) + p1 * np.sqrt(1.0 + d) - - -def SHrot_w(l: int, m: int, n: int, SHrotmat: np.ndarray, R_lm1: np.ndarray) -> float: - """Helper function to calculate the w""" - if m == 0: - raise ValueError("ERROR should not be called\n") - else: - if m > 0: - p0 = SHrot_p(1, l, m + 1, n, SHrotmat, R_lm1) - p1 = SHrot_p(-1, l, -m - 1, n, SHrotmat, R_lm1) - return p0 + p1 - else: - p0 = SHrot_p(1, l, m - 1, n, SHrotmat, R_lm1) - p1 = SHrot_p(-1, l, -m + 1, n, SHrotmat, R_lm1) - return p0 - p1 - - -######################################## -# SHD rotation matrix calculation -# translated from ivas_rotation.c -######################################## -def SHrotmatgen(R: np.ndarray, order: int = 3) -> np.ndarray: - """Calculate SHD roatation matrix from that in real space - - Parameters: - ---------- - R: np.ndarray - real-space rotation matrix - - order: Optional[int] - Ambisonics order, default = 3 - - Returns: - ---------- - SHrotmat: np.ndarray - SHD rotation matrix - - """ - dim = (order + 1) * (order + 1) - - SHrotmat = np.zeros([dim, dim]) - R_lm1 = np.zeros([dim, dim]) - R_l = np.zeros([dim, dim]) - - SHrotmat[0][0] = 1.0 - - SHrotmat[1][1] = R[1][1] - SHrotmat[1][2] = R[1][2] - SHrotmat[1][3] = R[1][0] - - SHrotmat[2][1] = R[2][1] - SHrotmat[2][2] = R[2][2] - SHrotmat[2][3] = R[2][0] - - SHrotmat[3][1] = R[0][1] - SHrotmat[3][2] = R[0][2] - SHrotmat[3][3] = R[0][0] - - for i in range(2 * 1 + 1): - for j in range(2 * 1 + 1): - R_lm1[i][j] = SHrotmat[i + 1][j + 1] - - band_idx = 4 - for l in range(2, order + 1): - - R_l[:, :] = 0.0 - - for m in range(-l, l + 1): - - d = 1 if (m == 0) else 0 - absm = abs(m) - sql2mm2 = np.sqrt((l * l - m * m)) - sqdabsm = np.sqrt(((1 + d) * (l + absm - 1) * (l + absm))) - sqlabsm = np.sqrt(((l - absm - 1) * (l - absm))) - - for n in range(-l, l + 1): - if abs(n) == l: - sqdenom = np.sqrt((2 * l) * (2 * l - 1)) - else: - sqdenom = np.sqrt(l * l - n * n) - - u = sql2mm2 / sqdenom - v = sqdabsm / sqdenom * (1 - 2 * d) * 0.5 - w = sqlabsm / sqdenom * (1 - d) * (-0.5) - - if u != 0: - u = u * SHrot_u(l, m, n, SHrotmat, R_lm1) - if v != 0: - v = v * SHrot_v(l, m, n, SHrotmat, R_lm1) - if w != 0: - w = w * SHrot_w(l, m, n, SHrotmat, R_lm1) - R_l[m + l][n + l] = u + v + w - - for i in range(2 * l + 1): - for j in range(2 * l + 1): - SHrotmat[band_idx + i][band_idx + j] = R_l[i][j] - - for i in range(2 * l + 1): - for j in range(2 * l + 1): - R_lm1[i][j] = R_l[i][j] - - band_idx += 2 * l + 1 - - return SHrotmat - - -def rotateHOA(x: np.ndarray, trajectory: str) -> np.ndarray: - """Rotate HOA signal by applying a rotation matrix calculated from the current quaternion - in each subframe - - Parameters: - ---------- - x: np.ndarray - input signal upto HOA3 - trajectory: str - path to trajectory file - - Returns: - ---------- - y: np.ndarray - rotated HOA signal - """ - - trj_data = np.genfromtxt(trajectory, delimiter=",") - trj_frames = trj_data.shape[0] - - sig_len = x.shape[0] - sig_dim = x.shape[1] - frame_len = (IVAS_FRAME_LEN_MS // 4) * 48 - N_frames = int(sig_len / frame_len) - - if sig_dim not in [4, 9, 16]: - raise ValueError("rotateHOA can only handle FOA, HOA2 or HOA3 signals!") - - y = np.zeros([sig_len, sig_dim]) - - fade_in = np.arange(frame_len) / (frame_len - 1) - fade_in = fade_in[:, np.newaxis] - fade_out = 1.0 - fade_in - - R = np.eye(sig_dim) - R_old = np.eye(sig_dim) - for i_frame in range(N_frames): - - i1 = i_frame * frame_len - i2 = (i_frame + 1) * frame_len - - q1 = trj_data[i_frame % trj_frames, :] - R_r = Quat2RotMat(q1) - R[:, :] = SHrotmatgen(R_r, order=int(np.sqrt(sig_dim)) - 1) - - frame_in = x[i1:i2, :] - frame_out = y[i1:i2, :] - - frame_out[:, :] = (fade_in * frame_in @ R.T) + (fade_out * frame_in @ R_old.T) - - R_old[:, :] = R.copy() - - return y - - -def rotateISM( - azi: np.ndarray, - ele: np.ndarray, - trajectory: str = None, -) -> tuple: - - if trajectory is None: - return azi, ele - - trj_data = np.genfromtxt(trajectory, delimiter=",") - trj_frames = trj_data.shape[0] - - N_frames = azi.shape[0] - if ele.shape[0] != azi.shape[0]: - raise ValueError("Inconsistent input in azi and ele") - - azi_rot = np.zeros([N_frames]) - ele_rot = np.zeros([N_frames]) - - for i_frame in range(N_frames): - q = trj_data[i_frame % trj_frames, :] - azi_rot[i_frame], ele_rot[i_frame] = rotateAziEle( - azi[i_frame], ele[i_frame], Quat2RotMat(q) - ) - - return azi_rot, ele_rot - - -def rotateMC(x: np.ndarray, trajectory: str, layout: spatialaudioformat) -> np.ndarray: - """Rotate MC signal by applying a rotation matrix calculated from the current quaternion - in each subframe - - Parameters: - ---------- - x: np.ndarray - input multichannel signal - trajectory: str - path to trajectory file - - Returns: - ---------- - y: np.ndarray - rotated multichannel signal - """ - - # TODO needs optimization, currently slow - trj_data = np.genfromtxt(trajectory, delimiter=",") - trj_frames = trj_data.shape[0] - - sig_len = x.shape[0] - sig_dim = x.shape[1] - frame_len = (IVAS_FRAME_LEN_MS // 4) * 48 - N_frames = int(sig_len / frame_len) - - y = np.zeros([sig_len, sig_dim]) - - # TODO LFE handling here - panner = EFAP.EFAP(layout.ls_azi, layout.ls_ele) - - fade_in = np.arange(frame_len) / (frame_len - 1) - fade_in = fade_in[:, np.newaxis] - fade_out = 1.0 - fade_in - - R = np.eye(layout.nchannels) - R_old = np.eye(layout.nchannels) - - for i_frame in range(N_frames): - - start = i_frame * frame_len - end = (i_frame + 1) * frame_len - - q = trj_data[i_frame % trj_frames, :] - - rotated_pos = np.array( - [ - rotateAziEle(a, e, Quat2RotMat(q)) - for a, e in zip(layout.ls_azi, layout.ls_ele) - ] - ) - R = panner.pan(rotated_pos[:, 0], rotated_pos[:, 1]) - R[:, layout.lfe_index] = np.zeros([layout.nchannels, 1]) - R[layout.lfe_index, layout.lfe_index] = 1 - - frame_in = x[start:end, :] - frame_out = y[start:end, :] - - frame_out[:, :] = (fade_in * frame_in @ R) + (fade_out * frame_in @ R_old) - - R_old = R.copy() - - return y diff --git a/scripts/pyaudio3dtools/spatialaudioconvert.py b/scripts/pyaudio3dtools/spatialaudioconvert.py deleted file mode 100644 index 0040f28ace..0000000000 --- a/scripts/pyaudio3dtools/spatialaudioconvert.py +++ /dev/null @@ -1,570 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import logging -import os -import warnings -from typing import Optional, Tuple - -import numpy as np - -from pyaudio3dtools import ( - EFAP, - audioarray, - audiofile, - binauralrenderer, - hoadecoder, - masarenderer, - spatialaudioformat, - spatialmetadata, -) -from pyaudio3dtools.constants import * - -main_logger = logging.getLogger("__main__") -logger = main_logger.getChild(__name__) -logger.setLevel(logging.DEBUG) - - -def spatial_audio_convert( - in_file: str, - out_file: str, - in_format: Optional[str] = None, - in_fs: Optional[int] = None, - in_nchans: Optional[int] = None, - in_meta_files: Optional[list] = None, - out_format: Optional[str] = None, - out_fs: Optional[int] = None, - out_fc: Optional[int] = None, - output_loudness: Optional[int] = None, - loudness_tool: Optional[str] = None, - limit_output: Optional[bool] = False, - cut_preamble_s: Optional[int] = None, - trajectory: Optional[str] = None, - bin_rend_include_LFE: Optional[bool] = True, - bin_rend_LFE_gain: Optional[float] = 10 ** (5.5 / 20), - binaural_dataset: Optional[str] = "orange53", -) -> Tuple[np.ndarray, int]: - """ - Spatial audio conversion between various formats - - Parameters - ---------- - in_file: str - input filename - out_file: str - output filename - - in_format: Optional[str] - input spatial audio format - in_fs: Optional[int] - input sampling frequency - in_nchans: Optional[int] - input number of channels (deduced for .wav) - - out_format: Optional[str] - output spatial audio format - out_fs: Optional[int] - output sampling frequency - out_fc: Optional[int] - output cutoff frequency (low-pass filtering) - - output_loudness: Optional[int] - Loudness level in LKFS/dBov - loudness_tool: Optional[str] - Loudness tool to use. Must be in $PATH. - Supported tools: - ITU-R BS.1770-4 / "bs1770demo" (default) - ITU-T P.56 / "sv56demo" - - limit_output: Optional[bool] - flag whether to apply limiting to the output - cut_preamble_s: Optional[int] - preamble to cut in seconds - - trajectory: Optional[str] - head rotation trajectory file (for binaural rendering) - bin_rend_include_LFE: Optional[bool] - flag to include LFE in binaural rendering - bin_rend_LFE_gain: Optional[float] - gain to apply for LFE in binaural rendering - binaural_dataset: Optional[str] - dataset for binaural HRIR or BRIRs - - Returns - ------- - out_sig : np.ndarray - output signal - out_fs : int - output sampling frequency - """ - - """ get spatial input and audio format configurations """ - if in_format is None: - if in_nchans is not None: - in_format = spatialaudioformat.Format.detect_format(in_nchans) - in_spfmt = spatialaudioformat.Format(in_format) - logger.info(f" Input spatial audio format detected: {in_format}") - else: - logger.info(f" Input spatial audio format: {in_format}") - in_spfmt = spatialaudioformat.Format(in_format) - - if out_format is None: - out_format = in_format - logger.info( - f" Output spatial audio format not specified, defaulting to pass-through: {out_format}" - ) - out_spfmt = spatialaudioformat.Format(out_format) - - """ read input file """ - # Input is either waveform file (.pcm or .wav) or iis metadata (.txt) - _, input_ext = os.path.splitext(os.path.basename(in_file)) - - if input_ext == ".pcm": - if in_fs is None: - if out_fs: - in_fs = out_fs - else: - raise ValueError("Input and output fs not defined.") - if in_nchans is None: - if in_spfmt is not None: - in_nchans = in_spfmt.nchannels - else: - raise ValueError( - "Number of input channels not defined and can't be deduced." - ) - in_sig, in_fs = audiofile.readfile(in_file, fs=in_fs, nchannels=in_nchans) - elif input_ext == ".wav": - in_sig, in_fs = audiofile.readfile(in_file) - if in_format is None: - in_format = spatialaudioformat.Format.detect_format(in_sig.shape[1]) - in_spfmt = spatialaudioformat.Format(in_format) - - # Adjust number of channels if case of HOA, zeroed vert channels if planar - if in_spfmt.ambi_order > 0: - in_sig = audioarray.convert(in_sig, out_nchans=in_spfmt.nchannels) - elif input_ext == ".txt": - metadata_obj = spatialmetadata.Metadata(in_file, audio_fs=in_fs) - in_sig, in_fs = metadata_obj.get_audio_array() - if in_spfmt.name != "META": - logger.info( - f" {in_spfmt.name} specified with .txt input file: overriding to META format" - ) - in_format = "META" - in_spfmt = spatialaudioformat.Format(in_format) - else: - raise Exception(f"Not supported file {input_ext}") - _, in_nchans = in_sig.shape - - """ convert metadata based formats (ISM / META) directly to output format """ - if in_spfmt.name.startswith("META") or in_spfmt.name.startswith("ISM"): - if out_spfmt.name.startswith("META"): - raise Exception("out format must be specified for META (.txt) or ISM input") - - if in_spfmt.name.startswith("ISM"): - if in_meta_files is None: - raise ValueError( - f"Please specify a list of metadata files for {in_spfmt.name}" - ) - if len(in_meta_files) != int(in_spfmt.name[-1]): - raise ValueError( - f"Mismatch between number of streams and number of specified metadata files for {in_spfmt.name}" - ) - - # initialise metadata object for ISM - metadata_obj = spatialmetadata.Metadata() - metadata_obj.init_for_ism(in_file, in_fs, in_nchans, in_meta_files) - - # TODO decide on reference path for BINAURAL_ROOM - if out_spfmt.name.startswith("BINAURAL_ROOM"): - in_format = "7_1_4" - else: - in_format = out_format - in_spfmt = spatialaudioformat.Format(in_format) - - else: - # set input format to output format - # render_meta() handles all conversions - in_format = out_format - in_spfmt = out_spfmt - - in_sig = render_meta( - metadata_obj, - in_spfmt, - dataset=binaural_dataset, - fs=in_fs, - trajectory=trajectory, - include_LFE=bin_rend_include_LFE, - LFE_gain=bin_rend_LFE_gain, - ) - - """ cut preamble """ - if cut_preamble_s is not None: - samples_to_cut = int(cut_preamble_s * in_fs) - if samples_to_cut > 0: - logger.info(f" Cut preample by {samples_to_cut} samples") - in_sig = audioarray.cut(in_sig, (samples_to_cut, -1)) - - """ zero non-planar input ambisonics channels """ - if in_spfmt.ambi_order > 0 and in_spfmt.isplanar: - in_sig = spatialaudioformat.Format.zero_vert_hoa_channels(in_sig) - - """ Spatial audio format conversion """ - out_sig = in_sig - if (in_spfmt.name != out_spfmt.name) and not ( - in_spfmt.isheadphones and out_spfmt.isheadphones - ): - logger.info(f" {in_spfmt.name} -> {out_spfmt.name}") - - # binaural output (except MASA) - if out_spfmt.name.startswith("BINAURAL") and not in_spfmt.name.startswith( - "MASA" - ): - out_sig = binauralrenderer.binaural_rendering( - in_sig, - in_spfmt, - out_spfmt, - dataset=binaural_dataset, - fs=in_fs, - trajectory=trajectory, - include_LFE=bin_rend_include_LFE, - LFE_gain=bin_rend_LFE_gain, - ) - # non-binaural outputs - # HOA conversion - elif in_spfmt.ambi_order > 0: - out_sig = convert_sba(in_sig, in_spfmt, out_spfmt) - - # MC conversion - elif in_spfmt.isloudspeaker: - out_sig = convert_mc(in_sig, in_spfmt, out_spfmt) - - # MASA conversion - elif in_spfmt.name.startswith("MASA"): - out_sig = convert_masa(in_sig, in_fs, in_meta_files, in_spfmt, out_spfmt) - else: - raise NotImplementedError( - f"{in_spfmt.name} -> {out_spfmt.name}: format conversion not implemented" - ) - - """ zero non-planar output ambisonics channels """ - if out_spfmt.ambi_order > 0 and out_spfmt.isplanar: - out_sig = spatialaudioformat.Format.zero_vert_hoa_channels(out_sig) - - """ resampling """ - if (out_fs is not None) and (out_fs != in_fs): - out_sig = audioarray.resample(out_sig, in_fs, out_fs) - else: - out_fs = in_fs - - """ low-pass filtering """ - if out_fc is not None: - logger.info(f" Low-pass filter ({out_fc}Hz)") - out_sig = audioarray.lpfilter(out_sig, out_fc, out_fs) - - """ limiting """ - if limit_output: - logger.info(" apply limiter") - audioarray.limiter(out_sig, out_fs) - - """ loudness normalization """ - if output_loudness: - _, scale_factor = audiofile.loudnessinfo( - out_sig, - out_fs, - out_format, - output_loudness=output_loudness, - loudness_tool=loudness_tool, - ) - out_sig *= scale_factor - - audiofile.writefile(out_file, out_sig, out_fs) - - return out_sig, out_fs - - -def convert_sba( - in_sig: np.ndarray, - in_spfmt: spatialaudioformat.Format, - out_spfmt: spatialaudioformat.Format, -) -> np.ndarray: - """Convert an ambisonics signal to the requested output format""" - # HOA -> LS - if out_spfmt.isloudspeaker: - HOA2LS = hoadecoder.get_hoa_mtx(in_spfmt.ambi_order, out_spfmt) - return hoadecoder.hoa_linear_decoding(in_sig, HOA2LS) - # HOA -> HOA - elif out_spfmt.ambi_order > 0: - return audioarray.convert(in_sig, in_fs=None, out_nchans=out_spfmt.nchannels) - else: - raise NotImplementedError( - f"{in_spfmt.name} -> {out_spfmt.name}: format conversion not implemented" - ) - - -def convert_mc( - in_sig: np.ndarray, - in_spfmt: spatialaudioformat.Format, - out_spfmt: spatialaudioformat.Format, -) -> np.ndarray: - """Convert a multichannel signal to the requested output format""" - # MC -> LS - if in_spfmt.name == "STEREO" and out_spfmt.name == "MONO": - MC2LS = np.vstack([[0.5], [0.5]]) - return in_sig @ MC2LS - elif out_spfmt.isloudspeaker: - try: - MC2LS = IVAS_MC_CONVERSION[in_spfmt.name][out_spfmt.name] - except KeyError: - ls_azi_woLFE = np.delete(out_spfmt.ls_azi, out_spfmt.lfe_index).astype( - float - ) - ls_ele_woLFE = np.delete(out_spfmt.ls_ele, out_spfmt.lfe_index).astype( - float - ) - - panner = EFAP.EFAP(ls_azi_woLFE, ls_ele_woLFE) - - MC2LS = np.vstack( - [ - panner.pan(a, e).T - for i, (a, e) in enumerate(zip(in_spfmt.ls_azi, in_spfmt.ls_ele)) - if i not in in_spfmt.lfe_index - ] - ) - # TODO tmu : implement configurable LFE handling - # pass-through for LFE - MC2LS = np.insert(MC2LS, in_spfmt.lfe_index, 0, axis=0) - MC2LS = np.insert(MC2LS, out_spfmt.lfe_index, 0, axis=1) - MC2LS[in_spfmt.lfe_index, out_spfmt.lfe_index] = 1 - - # TODO tmu temporarily disable LFE rendering to MONO/STEREO - if out_spfmt.name == "MONO" or out_spfmt.name == "STEREO": - MC2LS[in_spfmt.lfe_index, :] = 0 - return in_sig @ MC2LS - # MC -> HOA - elif out_spfmt.ambi_order > 0: - # SH response for loudspeaker positions - MC2HOA = np.hstack( - [ - hoadecoder.getRSH([a], [e], out_spfmt.ambi_order) - for a, e in zip(in_spfmt.ls_azi, in_spfmt.ls_ele) - ] - ).T - - # do not add LFE to output - MC2HOA[in_spfmt.lfe_index] = 0 - - return in_sig @ MC2HOA - else: - raise NotImplementedError( - f"{in_spfmt.name} -> {out_spfmt.name}: format conversion not implemented" - ) - - -def convert_ism( - in_sig: np.ndarray, - in_fs: int, - in_pos: dict, - in_spfmt: spatialaudioformat.Format, - out_spfmt: spatialaudioformat.Format, -) -> np.ndarray: - """Convert an ISM signal to the requested output format""" - pos_data = [] - for pos in in_pos: - pos_data.extend( - [pos["azimuth"], pos["elevation"]] for _ in range(pos["use_for_frames"]) - ) - pos_data = np.array(pos_data) - pos_frames = pos_data.shape[0] - - sig_len = in_sig.shape[0] - frame_len = IVAS_FRAME_LEN_MS * (in_fs // 1000) - - out_sig = np.zeros([sig_len, out_spfmt.nchannels]) - - fade_in = np.arange(frame_len) / (frame_len - 1) - fade_in = fade_in[:, np.newaxis] - fade_out = 1.0 - fade_in - - if out_spfmt.isloudspeaker: - ls_azi_woLFE = np.delete(out_spfmt.ls_azi, out_spfmt.lfe_index) - ls_ele_woLFE = np.delete(out_spfmt.ls_ele, out_spfmt.lfe_index) - panner = EFAP.EFAP(ls_azi_woLFE, ls_ele_woLFE) - - gains_old = None - - for i_frame, (in_frame, out_frame) in enumerate( - zip( - audioarray.get_framewise(in_sig, frame_len), - audioarray.get_framewise(out_sig, frame_len), - ) - ): - # update the crossfade if we have a smaller last frame - if out_frame.shape[0] != frame_len: - frame_size = out_frame.shape[0] - fade_in = np.arange(frame_size) / (frame_size - 1) - fade_in = fade_in[:, np.newaxis] - fade_out = 1.0 - fade_in - - pos = EFAP.wrap_angles(*pos_data[i_frame % pos_frames, :], clip_ele=True) - - # ISM -> MC - if out_spfmt.isloudspeaker: - gains = panner.pan(pos[0], pos[1]) - gains = np.insert(gains, out_spfmt.lfe_index, 0) - gains = gains[:, np.newaxis] - # ISM -> HOA - elif out_spfmt.ambi_order > 0: - gains = hoadecoder.getRSH([pos[0]], [pos[1]], out_spfmt.ambi_order) - else: - raise NotImplementedError( - f"{in_spfmt.name} -> {out_spfmt.name}: format conversion not implemented" - ) - - if gains_old is None: - gains_old = gains.copy() - - out_frame[:] = (fade_in * in_frame @ gains.T) + ( - fade_out * in_frame @ gains_old.T - ) - - gains_old = gains.copy() - - return out_sig - - -def convert_masa( - in_sig: np.ndarray, - in_fs: int, - in_meta: str, - in_spfmt: spatialaudioformat.Format, - out_spfmt: spatialaudioformat.Format, -) -> np.ndarray: - """Convert a MASA signal to the requested output format""" - - if in_fs != 48000: - raise ValueError(f"{in_spfmt.name} rendering only support for 48kHz!") - - tmp_spfmt = out_spfmt - - # MASA -> LS - if out_spfmt.isloudspeaker: - if not (out_spfmt.name == "5_1" or out_spfmt.name == "7_1_4"): - tmp_spfmt = spatialaudioformat.Format("7_1_4") - warnings.warn( - f"{out_spfmt.name} not natively supported by masaRenderer, using {tmp_spfmt.name} as intermediate format" - ) - # MASA -> HOA - elif out_spfmt.ambi_order > 0: - tmp_spfmt = spatialaudioformat.Format("7_1_4") - warnings.warn( - f"{out_spfmt.name} not natively supported by masaRenderer, using {tmp_spfmt.name} as intermediate format" - ) - elif out_spfmt.name == "BINAURAL": - warnings.warn( - f"Using masaRenderer for rendering; any binaural_dataset setting will be ignored!" - ) - else: - raise NotImplementedError( - f"{in_spfmt.name} -> {out_spfmt.name}: format conversion not implemented" - ) - - out_sig = masarenderer.render_masa(in_sig, in_meta, in_spfmt, tmp_spfmt) - - # conversion done - if tmp_spfmt.name == out_spfmt.name: - return out_sig - # only rendered an intermediate format, more conversion needed - else: - return convert_mc(out_sig, tmp_spfmt, out_spfmt) - - -def render_meta( - metadata_obj: spatialmetadata.Metadata, - dest_fmt: spatialaudioformat.Format, - dataset: str, - fs: int, - trajectory: str, - include_LFE: bool = False, - LFE_gain: float = 10 ** (5.5 / 20), -) -> np.ndarray: - """Render mixed scene metadata to the desired format""" - - logger.info(f" META -> {dest_fmt.name}") - - out_sig = np.zeros([metadata_obj.audio_array.shape[0], dest_fmt.nchannels]) - - for object in metadata_obj.objects: - # extract object signal - start = object["track_index"] - stop = start + object["nb_tracks"] - obj_sig = metadata_obj.audio_array[:, start:stop] - # apply gain - if hasattr(object, "gain"): - obj_sig *= object["gain"] - - if dest_fmt.name.startswith("BINAURAL"): - if object["input_type"] == "ism": - src_format = spatialaudioformat.Format(f"ISM") - positions = object["positions"] - if object["input_type"] == "sba": - src_format = spatialaudioformat.Format(f"SBA{object['order']}") - positions = None - elif object["input_type"] == "mc": - src_format = spatialaudioformat.Format(f"CICP{object['cicp_index']}") - positions = None - - out_sig += binauralrenderer.binaural_rendering( - obj_sig, - src_format, - dest_fmt, - dataset=dataset, - fs=fs, - trajectory=trajectory, - include_LFE=include_LFE, - LFE_gain=LFE_gain, - in_pos=positions, - ) - else: - if object["input_type"] == "ism": - src_format = spatialaudioformat.Format("ISM") - out_sig += convert_ism( - obj_sig, fs, object["positions"], src_format, dest_fmt - ) - elif object["input_type"] == "sba": - src_format = object["format"] - out_sig += convert_sba(obj_sig, src_format, dest_fmt) - elif object["input_type"] == "mc": - src_format = object["format"] - out_sig += convert_mc(obj_sig, src_format, dest_fmt) - - return out_sig diff --git a/scripts/pyaudio3dtools/spatialmetadata.py b/scripts/pyaudio3dtools/spatialmetadata.py deleted file mode 100644 index 3cf1338d4f..0000000000 --- a/scripts/pyaudio3dtools/spatialmetadata.py +++ /dev/null @@ -1,492 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import logging -import math -import os -from typing import Optional, TextIO - -import numpy as np - -from pyaudio3dtools import audioarray, audiofile, spatialaudioformat - -main_logger = logging.getLogger("__main__") -logger = main_logger.getChild(__name__) -logger.setLevel(logging.DEBUG) - - -class Metadata: - def __init__( - self, - metadata_path: Optional[str] = None, - metadata_format: Optional[str] = "iis", - audio_wav_path: Optional[str] = None, - audio_fs: Optional[int] = 48000, - ): - """ - Spatial Metadata - - Parameters - ---------- - metadata_path: Optional[str] - path to metadata file - metadata_format: Optional[str] - format for metadata file, supported: ["iis", "ivas_ism"] - audio_wav_path: Optional[str] - path to corresponding audio files - audio_fs: Optional[int] - audio sampling frequency - - Returns - ------- - self - - """ - - self._delete_all() - if audio_fs is None: - audio_fs = 48000 - self.audio_fs = audio_fs - - # init from input file - if metadata_path is not None and os.path.isfile(metadata_path): - self.read_metadata(metadata_path, metadata_format, audio_wav_path) - - def _delete_all(self) -> None: - self.objects = [] # list of audio objects - self.nb_objects = 0 # Number of objects - self.nb_tracks = 0 # Number of tracks - self.audio_wav = [] # list of wav files - self.audio_array = np.zeros([1, 0]) - self.nb_frames = 0 # Number of frames - - def read_metadata( - self, - metadata_path: str, - metadata_format: str = "iis", - audio_wav_path: Optional[str] = None, - ) -> None: - if metadata_format == "iis": - with open(metadata_path, "r") as file_in: - dirname = os.path.dirname(metadata_path) - self.audio_wav.append(os.path.join(dirname, file_in.readline().strip())) - nb_new_objects = int(file_in.readline()) - - for _ in range(nb_new_objects): - in_type = file_in.readline().strip() - - if in_type.lower() == "ism": - self.objects.append(read_ism_input(file_in, dirname)) - elif in_type.lower() == "sba": - self.objects.append(read_sba_input(file_in)) - elif in_type.lower() == "mc": - self.objects.append(read_mc_input(file_in)) - else: - raise ValueError("Unknown input type in metadata file") - - self._append_audio_array(self.audio_wav[-1]) - self.nb_objects += 1 - - elif metadata_format == "ivas_ism": - if audio_wav_path is None: - raise FileNotFoundError("Wave file not specified!") - - self.audio_wav.append(audio_wav_path) - self.objects.append( - read_ism_ivas_data(metadata_path, object_index=self.nb_objects) - ) - self._append_audio_array(self.audio_wav[-1]) - self.nb_objects += 1 - else: - raise ValueError("Metadata: unknown metadata format") - - def write_metadata( - self, - metadata_path: str, - metadata_format: str = "iis", - audio_output_path: Optional[str] = None, - max_objects: Optional[int] = None, - ) -> list: - metadata_out_list = [] - - if metadata_format == "iis": - with open(metadata_path, "w") as file_out: - if audio_output_path is not None: - file_out.write(f"{audio_output_path}\n") - dirname = os.path.dirname(metadata_path) - self.write_audio_array(os.path.join(dirname, audio_output_path)) - - file_out.write(f"{str(self.nb_objects)}\n") - for object_index in range(self.nb_objects): - if self.objects[object_index]["input_type"] == "ism": - write_ism_input( - file_out, - self.objects[object_index], - metadata_path, - num_frames=self.nb_frames, - ) - elif self.objects[object_index]["input_type"] == "sba": - write_sba_input(file_out, self.objects[object_index]) - elif self.objects[object_index]["input_type"] == "mc": - write_mc_input(file_out, self.objects[object_index]) - else: - raise ValueError("Unknown input type in metadata file") - - metadata_out_list.append(file_out.name) - - elif metadata_format == "ivas_ism": - outfilename, output_ext = os.path.splitext(os.path.basename(metadata_path)) - x = np.zeros([1, 0]) - - for object_index in range(self.nb_objects): - if self.objects[object_index]["input_type"] == "ism": - # Prepare audio wavefrom - if audio_output_path is not None: - chan_start = self.objects[object_index]["track_index"] - chan_end = chan_start + self.objects[object_index]["nb_tracks"] - if x.shape[1] == 0: - x = self.audio_array[:, chan_start:chan_end] - else: - x = np.append( - x, self.audio_array[:, chan_start:chan_end], axis=1 - ) - - # Write positions - with open( - metadata_path.replace( - output_ext, - str(self.objects[object_index]["track_index"]) + output_ext, - ), - "w", - ) as file_out: - write_ism_ivas_data( - file_out, - self.objects[object_index], - num_frames=self.nb_frames, - ) - metadata_out_list.append(file_out.name) - - if (max_objects is not None) and ( - len(metadata_out_list) >= max_objects - ): - break - - # Write audio waveform - if audio_output_path is not None: - audiofile.writefile(audio_output_path, x, fs=self.audio_fs) - - return metadata_out_list - - def print_info(self) -> None: - print(f"Number of objects in the scene: {self.nb_objects}") - for object_index in range(self.nb_objects): - print(f" Object #{object_index} Type: {self.objects[object_index]}") - - def _append_audio_array(self, audio_wav=None, fs=48000, nchan=1, object_index=None): - if audio_wav is None: - audio_wav = self.audio_wav[-1] - if object_index is None: - object_index = -1 - - x, fs = audiofile.readfile(audio_wav, fs=fs, nchannels=nchan) - logger.debug(f"Append {audio_wav}: {x.shape[0]} by {x.shape[1]}") - - # Select appropriate channels & resample if necessary - chan_start = self.objects[object_index]["track_index"] - chan_end = chan_start + self.objects[object_index]["nb_tracks"] - logger.debug(f" channels from {chan_start} to {chan_end}") - x = x[:, chan_start:chan_end] - x = audioarray.resample(x, fs, self.audio_fs) - - # Append array and update track index - self.objects[object_index]["track_index"] = self.audio_array.shape[1] - if self.audio_array.shape[1] == 0: - self.audio_array = x - else: - len_min = min([self.audio_array.shape[0], x.shape[0]]) - self.audio_array = np.append( - self.audio_array[:len_min][:], x[:len_min][:], axis=1 - ) - - self.nb_tracks = self.nb_tracks + x.shape[1] - self.nb_frames = math.ceil(50.0 * self.audio_array.shape[0] / self.audio_fs) - - # init with list of ISM metadata files - def init_for_ism( - self, - in_file: str, - in_fs: int, - in_nchan: int, - metadata_files: list, - ) -> None: - self.audio_wav.append(in_file) - - for csv in metadata_files: - self.objects.append(read_ism_ivas_data(csv, object_index=self.nb_objects)) - self.objects[-1]["track_index"] = self.nb_objects - self._append_audio_array(self.audio_wav[-1], fs=in_fs, nchan=in_nchan) - self.nb_objects += 1 - - # Get audio array with sampling rate - def get_audio_array(self): - return self.audio_array, self.audio_fs - - # Set audio array from file - def set_audio_array(self, audio_path, fs=None): - if fs is None: - fs = self.audio_fs - audiofile.readfile(audio_path, self.audio_array, fs) - self.audio_fs = fs - - # Write in file audio array - def write_audio_array(self, audio_path): - audiofile.writefile(audio_path, self.audio_array, fs=self.audio_fs) - - -################################################## -# Helper functions for IIS metadata -################################################## -def read_ism_input(file_handle: TextIO, dirname: str) -> dict: - """ - Read ISM Input (IIS metadata format) - - Parameters - ---------- - file_handle: TextIO - file pointer - dirname: str - root directory used to read csv files - - Returns - ------- - dict - ISM dictionary with positions - """ - ism = {"input_type": "ism"} - ism["track_index"] = int(file_handle.readline()) - 1 - ism["nb_tracks"] = 1 - ism["positions"] = [] - ism["gain"] = 1 - line = file_handle.readline() - - try: - ism["num_positions"] = int(line) - for _ in range(ism["num_positions"]): - [use_for_frames, azimuth, elevation] = ( - file_handle.readline().strip().split(",") - ) - pos = {} - pos["use_for_frames"] = int(use_for_frames) - pos["azimuth"] = int(azimuth) - pos["elevation"] = int(elevation) - ism["positions"].append(pos) - ism["gain"] = read_gain_value(file_handle) - except: - meta_csv = os.path.join(dirname, line.strip()) - pos_idx = 0 - with open(meta_csv) as file_handle: - for line in file_handle: - current_values = line.strip().split(",") - pos = {} - pos["use_for_frames"] = 1 - pos["azimuth"] = float(current_values[0]) - pos["elevation"] = float(current_values[1]) - ism["positions"].append(pos) - pos_idx += 1 - - ism["num_positions"] = pos_idx - - return ism - - -def write_ism_input( - file_handle: TextIO, - ism_dict: dict, - metadata_path: Optional[str] = None, - num_frames: Optional[int] = None, -) -> None: - """ - Write ISM Input (IIS metadata format) - - Parameters - ---------- - file_handle: TextIO - file pointer - ism_dict: dict - ISM dictionnary with positions - metadata_path: Optional[str] - if given positions writen cvs format - num_frame: Optional[int] - number of frames to be written - - Returns - ------- - None - """ - file_handle.write("ISM\n") - track_index = ism_dict["track_index"] - file_handle.write(f"{str(track_index + 1)}\n") - - if metadata_path is None: - num_positions = ism_dict["num_positions"] - file_handle.write(f"{str(num_positions)}\n") - - positions = ism_dict["positions"] - for pos_idx in range(ism_dict["num_positions"]): - use_for_frames = positions[pos_idx]["use_for_frames"] - azimuth = round(positions[pos_idx]["azimuth"]) - elevation = round(positions[pos_idx]["elevation"]) - - file_handle.write(f"{use_for_frames:04d},{azimuth:+03d},{elevation:+03d}\n") - else: - # Write filename - _, extname = os.path.splitext(metadata_path) - dirname = os.path.dirname(metadata_path) - basename = os.path.basename(metadata_path).replace( - extname, "." + str(track_index) + ".csv" - ) - file_handle.write(f"{basename}\n") - # Write positions - with open(os.path.join(dirname, basename), "w") as file_out: - write_ism_ivas_data(file_out, ism_dict, num_frames=num_frames) - - -def read_sba_input(file_handle: TextIO) -> dict: - sba = {"input_type": "sba"} - sba["track_index"] = int(file_handle.readline()) - 1 - sba["format"] = spatialaudioformat.Format(f"SBA{int(file_handle.readline())}") - sba["order"] = sba["format"].ambi_order - sba["nb_tracks"] = (sba["order"] + 1) ** 2 - sba["gain"] = read_gain_value(file_handle) - return sba - - -def write_sba_input(file_handle: TextIO, sba_dict: dict) -> None: - file_handle.write("SBA\n") - track_index = sba_dict["track_index"] - file_handle.write(f"{str(track_index + 1)}\n") - order = sba_dict["order"] - file_handle.write(f"{str(order)}\n") - - -def read_mc_input(file_handle: TextIO) -> dict: - mc = {"input_type": "mc"} - mc["track_index"] = int(file_handle.readline()) - 1 - mc["format"] = spatialaudioformat.Format(file_handle.readline().strip()) - mc["nb_tracks"] = mc["format"].nchannels - mc["gain"] = read_gain_value(file_handle) - return mc - - -def write_mc_input(file_handle: TextIO, mc_dict: dict) -> None: - file_handle.write("MC\n") - track_index = mc_dict["track_index"] - file_handle.write(f"{str(track_index + 1)}\n") - name = mc_dict["format"].name - file_handle.write(f"{name}\n") - - -def read_gain_value(file_handle: TextIO) -> float: - original_pos = file_handle.tell() - gain = file_handle.readline().lower() - if gain.startswith("gain_db"): - gain = float(gain.replace("gain_db", "")) - return 10 ** (gain / 20) - else: - file_handle.seek(original_pos) - return 1 - - -################################################## -# Helper functions for ISM IVAS metadata -################################################## -def read_ism_ivas_data(metadata_path: str, object_index: int = 0) -> None: - ism = {"input_type": "ism"} - ism["track_index"] = 0 - ism["num_positions"] = 0 - ism["nb_tracks"] = 1 - ism["positions"] = [] - - pos_idx = 0 - - try: - with open(metadata_path) as file_handle: - for line in file_handle: - current_values = line.strip().split(",") - pos = {} - pos["use_for_frames"] = 1 - pos["azimuth"] = float(current_values[0]) - pos["elevation"] = float(current_values[1]) - ism["positions"].append(pos) - pos_idx += 1 - except FileNotFoundError: - # TODO in case of NULL metadata we can also spread the objects spatially - pos = {} - pos["use_for_frames"] = 1 - pos["azimuth"] = 0.0 - pos["elevation"] = 0.0 - ism["positions"].append(pos) - pos_idx += 1 - - ism["num_positions"] = pos_idx - return ism - - -def write_ism_ivas_data( - file_handle: TextIO, ism_dict: dict, num_frames: Optional[int] = None -) -> None: - positions = ism_dict["positions"] - - if num_frames is None: - num_frames = 0 - for pos_idx in range(len(positions)): - num_frames += positions[pos_idx]["use_for_frames"] - - # Constants for all positions - distance = 1.0 - spread = 0.0 - gain = 1.0 - pos_idx = 0 - pos_used_times = 0 - for _ in range(num_frames): - azimuth = float(positions[pos_idx]["azimuth"]) - elevation = float(positions[pos_idx]["elevation"]) - - file_handle.write( - f"{azimuth:+07.2f},{elevation:+06.2f},{distance:05.2f},{spread:06.2f},{gain:04.2f}\n" - ) - - pos_used_times += 1 - - if pos_used_times == positions[pos_idx]["use_for_frames"]: - pos_idx = (pos_idx + 1) % len(positions) - pos_used_times = 0 diff --git a/scripts/pyprocessing/__init__.py b/scripts/pyprocessing/__init__.py deleted file mode 100644 index 5e7b5aa892..0000000000 --- a/scripts/pyprocessing/__init__.py +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -""" -pyprocessing -==== - -Provides - Methods for processing/coding 3D audio - -Imports -------- -functions -class -""" -from . import evs, ivas, prepost_processing, processing, processing_configs, utils diff --git a/scripts/pyprocessing/evs.py b/scripts/pyprocessing/evs.py deleted file mode 100644 index 6de86f2a27..0000000000 --- a/scripts/pyprocessing/evs.py +++ /dev/null @@ -1,238 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import logging -import os - -from pyaudio3dtools import audiofile, spatialaudioformat, spatialmetadata -from pyaudio3dtools.spatialaudioconvert import render_meta - -from pyprocessing import utils -from pyprocessing.processing import Processing - -main_logger = logging.getLogger("__main__") -logger = main_logger.getChild(__name__) -logger.setLevel(logging.DEBUG) - - -class EVS(Processing): - def __init__( - self, - in_format: str, - out_format: str, - bitrate: int, - in_fs: int = 48000, - **kwargs, - ): - super().__init__() - for k, v in kwargs.items(): - setattr(self, k, v) - self.in_format = spatialaudioformat.Format(in_format) - self.out_format = spatialaudioformat.Format(out_format) - if isinstance(bitrate, int): - self.bitrate = [bitrate] - if self.in_format.nchannels > 0: - self.bitrate = [bitrate] * self.in_format.nchannels - elif isinstance(bitrate, list): - self.bitrate = bitrate - if self.in_format.nchannels > 0: - self.bitrate.extend([0] * (self.in_format.nchannels - len(bitrate))) - self.in_fs = in_fs - self.exec_enc = utils.get_exec_path(kwargs["cod_bin"]) - self.exec_dec = utils.get_exec_path(kwargs["dec_bin"]) - - if in_format != out_format: - raise ValueError(f"EVS_: output format must be equal to input format.") - if not os.path.exists(self.exec_enc): - raise FileNotFoundError( - f"The EVS encoder binary was not found at the given path: {self.exec_enc}" - ) - if not os.path.exists(self.exec_dec): - raise FileNotFoundError( - f"The EVS decoder binary was not found at the given path: {self.exec_dec}" - ) - - def process(self, input_path: str, output_path: str, tmp_path: str) -> None: - logger.debug(f"EVS {input_path} -> {output_path}") - # Read input file - _, input_ext = os.path.splitext(os.path.basename(input_path)) - _, output_ext = os.path.splitext(os.path.basename(output_path)) - if input_ext == ".txt": - metadata_obj = spatialmetadata.Metadata(input_path, audio_fs=self.in_fs) - input_multi_channels = output_path.replace(output_ext, ".pcm") - if self.in_format.name[:3] == "ISM": - # extract IVAS ISM metadata and - input_csv = output_path.replace(output_ext, ".csv") - metadata_files = metadata_obj.write_metadata( - input_csv, "ivas_ism", input_multi_channels, max_objects=4 - ) - # change number of ISM - self.in_format.name = "ISM" + str(len(metadata_files)) - self.in_format = spatialaudioformat.Format(self.in_format.name[:4]) - if len(self.bitrate) < len(metadata_files): - self.bitrate.extend( - [self.bitrate[-1]] * (len(metadata_files) - len(self.bitrate)) - ) - else: - in_sig, fs = metadata_obj.get_audio_array() - in_sig = render_meta(metadata_obj, self.in_spfmt) - audiofile.writefile(input_multi_channels, in_sig, self.in_fs) - elif input_ext == ".wav" or input_ext == ".pcm": - input_multi_channels = input_path - else: - raise ValueError(f"EVS: invalid audio input extension: {input_ext}") - - # Split the corresponding channels at enc_fs - split_chans_in = [] - for idx in range(self.in_format.nchannels): - split_chans_in.append( - output_path.replace(output_ext, "." + str(idx) + ".pcm") - ) - - audiofile.splitfiles( - input_multi_channels, - split_chans_in, - in_nchans=self.in_format.nchannels, - in_fs=self.in_fs, - out_fs=self.enc_fs, - ) - - # Zero vertical Ambi channels if planar - mute_next_chan = -1 - if self.in_format.ambi_order > 0 and self.in_format.isplanar: - mute_chans_enum = enumerate(self.in_format.get_vert_hoa_channels()) - _, mute_next_chan = next(mute_chans_enum) - - # run processing - split_chans_out = [] - for idx, split_in in enumerate(split_chans_in): - split_bs = split_in.replace(".pcm", ".bs") - split_out = split_in.replace(".pcm", ".dec.pcm") - - # Zero vertical Ambi channels if planar - if idx == mute_next_chan: - audiofile.mutefile(split_in, split_in, in_fs=self.enc_fs, in_nchans=1) - try: - _, mute_next_chan = next(mute_chans_enum) - except: - pass - - if self.bitrate[idx] > 0: - self.enc(split_in, split_bs, self.bitrate[idx]) - self.dec(split_bs, split_out) - else: - # zero channel - audiofile.convertfile( - split_in, split_out, in_fs=self.enc_fs, out_fs=self.dec_fs - ) - audiofile.mutefile(split_out, split_out, in_fs=self.dec_fs, in_nchans=1) - - split_chans_out.append(split_out) - - # Output file: combine waveform files into one output, and optinaly write iis metadata - if output_ext == ".pcm" or output_ext == ".wav": - audiofile.combinefiles( - split_chans_out, output_path, in_fs=self.dec_fs, out_fs=self.dec_fs - ) - elif output_ext == ".txt": - output_wav = output_path.replace(output_ext, ".wav") - if self.in_format.name[:3] == "ISM": - # Write new metadata - metadata_out_obj = spatialmetadata.Metadata(audio_fs=self.dec_fs) - for idx in range(self.in_format.nchannels): - logger.debug( - "EVS_mono Read metadata " - + metadata_files[idx] - + " with audio file " - + split_chans_out[idx] - ) - metadata_out_obj.read_metadata( - metadata_files[idx], - metadata_format="ivas_ism", - audio_wav_path=split_chans_out[idx], - ) - metadata_out_obj.write_metadata( - output_path, - metadata_format="iis", - audio_output_path=os.path.basename(output_wav), - ) - else: - # pass-trhough mode, rewrite only audio waveform with decoded file - audiofile.combinefiles( - split_chans_out, output_wav, in_fs=self.dec_fs, out_fs=self.dec_fs - ) - metadata_obj.write_metadata(output_path, metadata_format="iis") - else: - raise ValueError(f"EVS: invalid audio input extension: {output_ext}") - - def enc(self, input_wav: str, output_bs: str, bitrate: int) -> None: - input_pcm = output_bs.replace(".bs", ".pcm") - audiofile.convertfile(input_wav, input_pcm, out_nchans=1, out_fs=self.in_fs) - - cmd = [self.exec_enc] - if self.dtx: - cmd.append("-dtx") - - if self.cod_opt: - cmd.extend(self.cod_opt) - - cmd.extend( - [ - "-max_band", - self.max_band, - str(bitrate), - str(self.enc_fs // 1000), - input_pcm, - output_bs, - ] - ) - Processing.run(cmd) - - def dec(self, input_bs: str, output_wav: str) -> None: - cmd = [self.exec_dec] - - if self.dec_opt: - cmd.extend(self.dec_opt) - - cmd.extend([str(self.dec_fs // 1000), input_bs, output_wav]) - Processing.run(cmd) - - def get_processing_file_paths(self, template_out_file: str, input_ext: str) -> list: - if input_ext == ".wav" or input_ext == ".pcm": - return [template_out_file.replace(".wav", "_evs_mono.wav")] - elif input_ext == ".txt": - return [template_out_file.replace(".wav", "_evs_mono.txt")] - else: - raise ValueError(f"EVS: invalid audio input extension: {input_ext}") - - def get_total_bit_rate(self): - return sum(self.bitrate) diff --git a/scripts/pyprocessing/ivas.py b/scripts/pyprocessing/ivas.py deleted file mode 100644 index 0f182fd1fb..0000000000 --- a/scripts/pyprocessing/ivas.py +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import logging -import os -from typing import Optional - -from pyaudio3dtools import audiofile, spatialaudioformat, spatialmetadata -from pyaudio3dtools.spatialaudioconvert import render_meta - -from pyprocessing import utils -from pyprocessing.processing import Processing - -main_logger = logging.getLogger("__main__") -logger = main_logger.getChild(__name__) -logger.setLevel(logging.DEBUG) - - -class IVAS(Processing): - def __init__(self, in_format: str, bitrate: int, in_fs: int = 48000, **kwargs): - super().__init__() - for k, v in kwargs.items(): - setattr(self, k, v) - self.in_format = spatialaudioformat.Format(in_format) - self.bitrate = bitrate - self.in_fs = in_fs - self.out_format = spatialaudioformat.Format(kwargs["out_format"]) - self.exec_enc = utils.get_exec_path(kwargs["cod_bin"]) - self.exec_dec = utils.get_exec_path(kwargs["dec_bin"]) - self.ht_file = os.path.abspath(self.ht_file) - - if self.out_format.name == "ISM": - self.out_format.name = "EXT" - - if not os.path.exists(self.exec_enc): - raise FileNotFoundError( - f"The IVAS encoder binary was not found at the given path: {self.exec_enc}" - ) - if not os.path.exists(self.exec_dec): - raise FileNotFoundError( - f"The IVAS decoder binary was not found at the given path: {self.exec_dec}" - ) - - def process(self, input_path: str, output_path: str, tmp_path: str) -> None: - logger.debug(f"IVAS {input_path} -> {output_path}") - - _, output_ext = os.path.splitext(os.path.basename(output_path)) - if output_ext == ".wav": - output_bs = output_path.replace(".wav", ".bs") - else: - raise ValueError(f"IVAS: invalid audio input extension: {output_ext}") - - self.enc(input_path, output_bs) - self.dec(output_bs, output_path) - - def enc(self, input_path: str, output_bs: str) -> None: - logger.debug(f"IVAS encoder {input_path} -> {output_bs}") - - # Only resample and convert if wav, otherwise supposed pcm to be sampled at self.in_fs - _, input_ext = os.path.splitext(os.path.basename(input_path)) - metadata_files = [] - - # for MASA suppose that metadata file as same basename and location as input file - if self.in_format.name.lower()[:4] == "masa": - metadata_files.append(input_path.replace(input_ext, ".met")) - - # Support input file wav, pcm and txt (metadata iis) - if (input_ext == ".wav") or (input_ext == ".pcm"): - input_pcm = output_bs.replace(".bs", ".pcm") - audiofile.convertfile( - input_path, - input_pcm, - in_fs=self.in_fs, - out_fs=self.enc_fs, - in_nchans=self.in_format.nchannels, - out_nchans=self.in_format.nchannels, - ) - elif input_ext == ".txt": - metadata_obj = spatialmetadata.Metadata(input_path, audio_fs=self.enc_fs) - input_pcm = output_bs.replace(".bs", ".pcm") - input_csv = output_bs.replace(".bs", ".csv") - if self.in_format.name[:3] == "ISM": - # extract IVAS ISM metadata and - metadata_files = metadata_obj.write_metadata( - input_csv, "ivas_ism", input_pcm, max_objects=4 - ) - - # change number of ISM - self.in_format.name = "ISM" + str(len(metadata_files)) - self.in_format = spatialaudioformat.Format(self.in_format.name[:4]) - else: - in_sig = render_meta(metadata_obj, self.in_spfmt) - audiofile.writefile(input_pcm, in_sig, self.enc_fs) - else: - raise ValueError(f"IVAS: invalid audio input extension: {input_ext}") - - cmd = [self.exec_enc] - if self.dtx: - cmd.append("-dtx") - - if self.cod_opt: - cmd.extend(self.cod_opt) - - cmd.extend( - [ - *IVAS.parse_config(self.in_format, metadata_files), - "-max_band", - self.max_band, - str(self.bitrate), - str(self.enc_fs // 1000), - input_pcm, - output_bs, - ] - ) - Processing.run(cmd) - - def dec(self, input_bs: str, output_wav: str) -> None: - logger.debug(f"IVAS decoder {input_bs} -> {output_wav}") - - cmd = [self.exec_dec] - if self.head_tracking: - cmd.extend(["-T", self.ht_file]) - - if self.plc: - cmd.extend(["-FEC", str(self.plc_rate)]) - - if self.dec_opt: - cmd.extend(self.dec_opt) - - cmd.extend( - [ - self.out_format.name, - str(self.dec_fs // 1000), - input_bs, - output_wav, - ] - ) - Processing.run(cmd) - - @staticmethod - def parse_config(spformat: spatialaudioformat, metadata_files: Optional[list] = []): - name = spformat.name.lower() - - if name == "mono": - return [""] - elif name == "stereo": - return ["-stereo"] - elif name.startswith("ism"): - # replace any missing files with NULL - while len(metadata_files) < int(name[-1]): - metadata_files.append("NULL") - return ["-ism", name[-1]] + metadata_files[: int(name[-1])] - elif name.startswith("masa"): - return ["-masa", name[-1], metadata_files[0]] - elif spformat.ambi_order > 0: - if spformat.isplanar: - return ["-sba", f"-{spformat.ambi_order}"] - else: - return ["-sba", f"+{spformat.ambi_order}"] - elif spformat.isloudspeaker: - return ["-mc", name] - - raise ValueError(f"IVAS: Invalid input config: {spformat}") - - def get_processing_file_paths( - self, template_out_file: str, input_ext: Optional[str] = None - ) -> list: - return [template_out_file.replace(".wav", "_ivas.wav")] - - def get_total_bit_rate(self): - return self.bitrate diff --git a/scripts/pyprocessing/prepost_processing.py b/scripts/pyprocessing/prepost_processing.py deleted file mode 100644 index 3ea7411cea..0000000000 --- a/scripts/pyprocessing/prepost_processing.py +++ /dev/null @@ -1,220 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import json -import logging -import os -import shutil -from typing import Optional - -from pyaudio3dtools import ( - audioarray, - audiofile, - binauralrenderer, - spatialaudioconvert, - spatialaudioformat, -) - -from pyprocessing import utils -from pyprocessing.processing import Processing - -main_logger = logging.getLogger("__main__") -logger = main_logger.getChild(__name__) -logger.setLevel(logging.DEBUG) - - -class PreProcessing(Processing): - def __init__( - self, - out_format: str, - out_fs: int = 48000, - out_fc: Optional[int] = None, - output_loudness: Optional[int] = None, - loudness_tool: Optional[str] = "bs1770demo", - ): - super().__init__() - self.out_format = out_format - self.out_fs = out_fs - self.fc = out_fc - self.output_loudness = output_loudness - self.loudness_tool = loudness_tool - - def process(self, input_path: str, output_path: str, tmp_path: str): - output_nickname = utils.get_nickname(output_path) - logger.info( - f" Pre Processing: convert to {self.out_format} : {output_nickname}" - ) - - try: - spatialaudioconvert.spatial_audio_convert( - input_path, - tmp_path, - out_format=self.out_format, - out_fs=self.out_fs, - output_loudness=self.output_loudness, - loudness_tool=self.loudness_tool, - ) - except Exception as e: - logger.info( - f" Pre Processing: by-pass : {output_nickname}. Encountered exception {e}" - ) - else: - shutil.move(tmp_path, output_path) - - def get_processing_file_paths( - self, template_out_file: str, input_ext: Optional[str] = None - ) -> list: - return [template_out_file.replace(".wav", "_pre.wav")] - - -class PostProcessing(Processing): - def __init__( - self, - in_format: str, - out_format: str, - in_fs: int = 48000, - out_fs: int = 48000, - out_fc: Optional[int] = None, - binaural_rendered: bool = False, - limit_output: bool = False, - cut_preamble: float = 0.0, - split_file_path: str = "", - bin_rend_include_LFE: bool = False, - bin_rend_LFE_gain: Optional[float] = 10 ** (5.5 / 20), - binaural_dataset: Optional[str] = "orange53", - output_loudness: Optional[int] = None, - loudness_tool: Optional[str] = "bs1770demo", - ): - super().__init__() - self.in_spfmt = spatialaudioformat.Format(in_format=in_format) - self.out_spfmt = spatialaudioformat.Format(in_format=out_format) - - self.in_fs = in_fs - self.out_fs = out_fs - self.fc = out_fc - self.binaural_rendered = binaural_rendered - self.cut_preamble = cut_preamble - self.split_file_path = split_file_path - self.bin_rend_include_LFE = bin_rend_include_LFE - self.bin_rend_LFE_gain = bin_rend_LFE_gain - self.binaural_dataset = binaural_dataset - self.limit_output = limit_output - self.output_loudness = output_loudness - self.loudness_tool = loudness_tool - - def process(self, input_path: str, output_path: str, tmp_path: str): - output_nickname = utils.get_nickname(output_path) - logger.info( - f" Post Processing: {self.in_spfmt.name} -> {self.out_spfmt.name} : {output_nickname}" - ) - - # Spatial audio format conversion - spatialaudioconvert.spatial_audio_convert( - input_path, - tmp_path, - in_format=self.in_spfmt.name, - out_format=self.out_spfmt.name, - in_fs=self.in_fs, - out_fs=self.out_fs, - out_fc=self.fc, - cut_preamble_s=self.cut_preamble, - limit_output=self.limit_output, - bin_rend_include_LFE=self.bin_rend_include_LFE, - bin_rend_LFE_gain=self.bin_rend_LFE_gain, - output_loudness=self.output_loudness, - loudness_tool=self.loudness_tool, - binaural_dataset=self.binaural_dataset, - ) - shutil.move(tmp_path, output_path) - - # Binaural rendering - if self.binaural_rendered and "BINAURAL" not in self.out_spfmt.name: - out_sig, fs = audiofile.readfile(output_path) - bin_sig = binauralrenderer.binaural_rendering( - out_sig, - self.out_spfmt, - spatialaudioformat.Format("BINAURAL"), - fs=fs, - include_LFE=self.bin_rend_include_LFE, - LFE_gain=self.bin_rend_LFE_gain, - ) - output_binaural_wav = output_path.replace(".wav", "_BINAURAL.wav") - logger.info( - f" Rendering {self.out_spfmt.name} -> BINAURAL : {output_nickname[:-4]}_BINAURAL.wav" - ) - if self.limit_output: - logger.info(f" limiting") - audioarray.limiter(bin_sig, self.out_fs) - audiofile.writefile(output_binaural_wav, bin_sig, self.out_fs) - - # split file - if self.split_file_path: - # check for the split info - split_file_name = os.path.join( - self.split_file_path, - "".join( - [ - os.path.basename(output_path) - .replace(".wav", "") - .replace("_post", ""), - ".split", - ] - ), - ) - if os.path.exists(split_file_name): - fp = open(split_file_name, "r") - splits = json.load(fp) - fp.close - n_splits = len(splits) - 1 - for split_idx in range(n_splits): - output_path_split = output_path.replace( - ".wav", f"_split{split_idx}.wav" - ) - start = int(splits[split_idx] * self.out_fs) - stop = int(splits[split_idx + 1] * self.out_fs) - split_sig = audioarray.cut(out_sig, (start, stop)) - audiofile.writefile(output_path_split, split_sig, self.out_fs) - if (self.binaural_rendered is True) and ( - self.out_spfmt.name != "BINAURAL" - ): - output_bin_wav_split = output_binaural_wav.replace( - ".wav", f"_split{split_idx}.wav" - ) - split_sig = audioarray.cut(bin_sig, (start, stop)) - audiofile.writefile( - output_bin_wav_split, split_sig, self.out_fs - ) - - def get_processing_file_paths( - self, template_out_file: str, input_ext: Optional[str] = None - ) -> list: - return [template_out_file.replace(".wav", "_post.wav")] diff --git a/scripts/pyprocessing/processing.py b/scripts/pyprocessing/processing.py deleted file mode 100644 index a0533b2b4e..0000000000 --- a/scripts/pyprocessing/processing.py +++ /dev/null @@ -1,123 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import logging -import os -import subprocess -from abc import ABC, abstractmethod - -global_print_cmd_only = False -main_logger = logging.getLogger("__main__") -logger = main_logger.getChild(__name__) -logger.setLevel(logging.DEBUG) - - -class Processing(ABC): - def __init__(self): - pass - - @abstractmethod - def process(self, input_path: str, output_path: str, tmp_path: str) -> None: - pass - - @abstractmethod - def get_processing_file_paths(self, template_out_file: str) -> list: - pass - - @staticmethod - def run(cmd: list) -> None: - Processing.print_cmd(cmd) - - if not global_print_cmd_only: - try: - result = subprocess.run(cmd, check=True, capture_output=True, text=True) - except subprocess.CalledProcessError as e: - logger.debug(f"Command returned non-zero exit status : {e.returncode}") - logger.debug(e.stderr) - logger.debug(e.stdout) - raise SystemError( - f"Command returned non-zero exit status ({e.returncode}): {' '.join(e.cmd)}\n{e.stderr}\n{e.stdout}" - ) - - logger.debug(result.stderr) - logger.debug(result.stdout) - - @staticmethod - def run_python(cmd: list) -> None: - Processing.print_cmd(cmd) - - if not global_print_cmd_only: - result = subprocess.run( - ["python3"] + cmd, check=True, capture_output=True, text=True - ) - logger.debug(result.stderr) - logger.debug(str(result.stdout)) - - @staticmethod - def print_cmd(cmd: list) -> None: - cmd[:] = [i if i is not None else "MISSING" for i in cmd] - logger.debug(f"Running command: {' '.join(cmd)}") - if "MISSING" in cmd: - logger.error("Missing arg in command") - raise ValueError("Missing arg in command") - - -def process_chain( - proc_chain: list, - input_path: str, - output_wav: str, - tmp_folder_path: str = os.path.abspath("tmp"), -) -> None: - tmp_file_template_name = os.path.join(tmp_folder_path, os.path.basename(output_wav)) - - # Prepare a chain of of input/tmp/output files - processing_paths = [input_path] - for p in proc_chain: - _, input_ext = os.path.splitext(processing_paths[-1]) - processing_paths.extend( - p.get_processing_file_paths(tmp_file_template_name, input_ext=input_ext) - ) - - # Temporary files if needed - tmp_processing_paths = processing_paths[:] - tmp_path_iter = iter(tmp_processing_paths) - next(tmp_path_iter) - - # Replace last with real output - processing_paths[-1] = output_wav - in_path_iter = iter(processing_paths) - out_path_iter = iter(processing_paths) - next(out_path_iter) - - # go through processing chain - for p in proc_chain: - p.process(next(in_path_iter), next(out_path_iter), next(tmp_path_iter)) diff --git a/scripts/pyprocessing/processing_configs.py b/scripts/pyprocessing/processing_configs.py deleted file mode 100644 index 255f183c6d..0000000000 --- a/scripts/pyprocessing/processing_configs.py +++ /dev/null @@ -1,375 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import json -import logging -import os - -from pyprocessing.evs import EVS -from pyprocessing.ivas import IVAS -from pyprocessing.prepost_processing import PostProcessing, PreProcessing -from pyprocessing.utils import list_audio - -main_logger = logging.getLogger("__main__") -logger = main_logger.getChild(__name__) -logger.setLevel(logging.DEBUG) - -# Parse a JSON file into class and perform necessary validation -class test_config: - def __init__(self, filename: str): - # Open and read configuration test file - with open(filename, "r") as fp: - config_dict = json.load(fp) - - # Init lists of conditions and associated folders - self.list_of_conditions = list() - self.output_folders = list() - self.tmp_folders = list() - - # Set defaults - self._set_defaults() - - # Set/override class attributes based on JSON file and update internal dict - for key, value in config_dict.items(): - # update subdictionaries in case of ivas and evs - if key.startswith("ivas"): - setattr(self, key, self.DEFAULTS_ivas.copy()) - getattr(self, key).update(value) - self.dict[key] = self.DEFAULTS_ivas.copy() - self.dict[key].update(value) - elif key.startswith("evs"): - setattr(self, key, self.DEFAULTS_evs.copy()) - getattr(self, key).update(value) - self.dict[key] = self.DEFAULTS_evs.copy() - self.dict[key].update(value) - # avoid overwriting the whole subkey, merge instead - elif hasattr(self, key) and isinstance(getattr(self, key), dict): - for k, v in value.items(): - getattr(self, key)[k] = v - self.dict[key][k] = v - else: - setattr(self, key, value) - self.dict[key] = value - - # Check required keys - REQUIRED_KEYS = [ - "name", - "input_path", - "output_path", - "in_format", - "renderer_format", - "conditions_to_generate", - ] - REQUIRED_KEYS_IVAS = ["bitrates", "out_format"] - REQUIRED_KEYS_EVS = ["bitrates"] - MISSING_KEYS = list() - - for key in REQUIRED_KEYS: - if not hasattr(self, key): - MISSING_KEYS.append(key) - elif not getattr(self, key): - MISSING_KEYS.append(key) - - for condition in self.conditions_to_generate: - if condition.startswith("ivas"): - if not hasattr(self, condition): - raise SystemExit( - f"Definition not found for condition {condition}, but was specified in conditions to generate" - ) - else: - for key in REQUIRED_KEYS_IVAS: - if getattr(self, condition).get(key, None) is None: - MISSING_KEYS.append(f"{condition}:{key}") - elif condition.startswith("evs"): - if not hasattr(self, condition): - raise SystemExit( - f"Definition not found for condition {condition}, but was specified in conditions to generate" - ) - else: - for key in REQUIRED_KEYS_EVS: - if getattr(self, condition).get(key, None) is None: - MISSING_KEYS.append(f"{condition}:{key}") - - # Report missing keys to the user - if len(MISSING_KEYS) > 0: - raise KeyError( - f"The following key(s) must be specified in {filename} : {MISSING_KEYS}" - ) - - # Remove DEFAULTS_ keys from dict and self - for key in list(self.dict.keys()): - if key.startswith("DEFAULTS_"): - delattr(self, key) - self.dict.pop(key, None) - - # Store the updated JSON for later output - self.json_out = json.dumps(self.dict, indent=4) - - # Concatenation options - if self.concatenate_input: - if len(self.concat_silence_ms): - self.concat_silence_pre = self.concat_silence_ms[0] - self.concat_silence_post = self.concat_silence_ms[1] - else: - self.concat_silence_pre = self.concat_silence_ms - self.concat_silence_post = self.concat_silence_ms - - # Pre-processing - if self.preproc_input: - self.list_of_conditions.append(self._get_condition_definition("preproc", 0)) - - # Check input directory for items - self.items_list = list_audio( - self.input_path, absolute=True, select_list=self.input_select - ) - - if self.items_list is None or len(self.items_list) == 0: - raise SystemExit( - f"Directory {self.input_path} is either blank, does not exist or all files were filtered out." - ) - - # Check if concatenation is required - if self.concatenate_input and any( - [i.endswith(".txt") for i in self.items_list] - ): - raise SystemExit("Concatenation for text files is unsupported") - - # Go through conditions to generate - for cond in self.conditions_to_generate: - try: - bitrates = getattr(self, cond)["bitrates"] - # If single value, convert to list for convenience - if not hasattr(bitrates, "__len__") and not isinstance(bitrates, str): - bitrates = [bitrates] - for b in bitrates: - if isinstance(b, list): - self.list_of_conditions.append( - self._get_condition_definition(cond, [int(x) for x in b]) - ) - else: - self.list_of_conditions.append( - self._get_condition_definition(cond, int(b)) - ) - bitrate_label = str( - self.list_of_conditions[-1]["proc_chain"][ - 0 - ].get_total_bit_rate() - ) - self.list_of_conditions[-1]["id"] = f"{cond}_{bitrate_label}" - except: - self.list_of_conditions.append(self._get_condition_definition(cond, 0)) - - # create output and temporary folder names for the conditions - for list_cond in self.list_of_conditions: - self.output_folders.append(os.path.join(self.output_path, list_cond["id"])) - self.tmp_folders.append( - os.path.join(self.output_path, "tmp_" + list_cond["id"]) - ) - - def __repr__(self): - return str(vars(self)) - - # default values to enable a sparse JSON input file - update if adding new keys - def _set_defaults(self): - DEFAULT_CONFIG = { - # general options - "enable_multiprocessing": True, - "delete_tmp": False, - # input/preprocessing options - "preproc_input": False, - "input_select": None, - "concatenate_input": False, - "concat_silence_ms": [0, 0], - # sampling rates - "in_fs": 48000, - "out_fs": 48000, - # binaural rendering - "binaural_rendered": False, - "bin_rend_include_LFE": False, - "bin_rend_LFE_gain": 10 ** (5.5 / 20), - "binaural_dataset": "orange53", - # apply limiter in the postprocessing - "limit_output": False, - # loudness adjustments - "preproc_loudness": None, - "output_loudness": None, - "loudness_tool": "bs1770demo", - # condition-specific values - "ref": {"out_fc": 48000}, - "DEFAULTS_ivas": { - "cod_bin": "../IVAS_cod", - "dec_bin": "../IVAS_dec", - "cod_opt": None, - "dec_opt": None, - "enc_fs": 48000, - "dec_fs": 48000, - "max_band": "FB", - "dtx": False, - # head tracking - "head_tracking": False, - "ht_file": "./trajectories/full_circle_in_15s", - # BER/FER - "plc": False, - "plc_rate": 10, - }, - "DEFAULTS_evs": { - "cod_bin": "../IVAS_cod", - "dec_bin": "../IVAS_dec", - "cod_opt": None, - "dec_opt": None, - "enc_fs": 48000, - "dec_fs": 48000, - "max_band": "FB", - "dtx": False, - }, - } - - # needed to output JSON later - self.dict = DEFAULT_CONFIG - - # set defaults from above dict - for key, value in DEFAULT_CONFIG.items(): - setattr(self, key, value) - - # Definitions of processing chains (edit with care) - def _get_condition_definition(self, cond: str, bitrate: int) -> dict: - definition = dict(id=cond, proc_chain=[]) - - if cond.startswith("preproc"): - definition["proc_chain"].extend( - [ - PreProcessing( - out_format=self.in_format, - out_fs=self.in_fs, - output_loudness=self.preproc_loudness, - loudness_tool=self.loudness_tool, - ) - ] - ) - elif cond.startswith("ref"): - definition["proc_chain"].extend( - [ - PostProcessing( - in_format=self.in_format, - out_format=self.renderer_format, - in_fs=self.in_fs, - out_fs=self.out_fs, - out_fc=self.ref["out_fc"], - binaural_rendered=self.binaural_rendered, - bin_rend_include_LFE=self.bin_rend_include_LFE, - bin_rend_LFE_gain=self.bin_rend_LFE_gain, - binaural_dataset=self.binaural_dataset, - limit_output=self.limit_output, - output_loudness=self.output_loudness, - loudness_tool=self.loudness_tool, - ) - ] - ) - elif cond.startswith("lp3k5"): - definition["proc_chain"].extend( - [ - PostProcessing( - in_format=self.in_format, - out_format=self.renderer_format, - in_fs=self.in_fs, - out_fs=self.out_fs, - out_fc=3500, - binaural_rendered=self.binaural_rendered, - binaural_dataset=self.binaural_dataset, - limit_output=self.limit_output, - output_loudness=self.output_loudness, - loudness_tool=self.loudness_tool, - ) - ] - ) - elif cond.startswith("lp7k"): - definition["proc_chain"].extend( - [ - PostProcessing( - in_format=self.in_format, - out_format=self.renderer_format, - in_fs=self.in_fs, - out_fs=self.out_fs, - out_fc=7000, - binaural_rendered=self.binaural_rendered, - binaural_dataset=self.binaural_dataset, - limit_output=self.limit_output, - output_loudness=self.output_loudness, - loudness_tool=self.loudness_tool, - ) - ] - ) - elif cond.startswith("evs"): - definition["proc_chain"].extend( - [ - EVS( - in_format=self.in_format, - out_format=self.in_format, - bitrate=bitrate, - in_fs=self.in_fs, - **getattr(self, cond), - ), - PostProcessing( - in_format=self.in_format, - out_format=self.renderer_format, - in_fs=getattr(self, cond)["dec_fs"], - out_fs=self.out_fs, - binaural_rendered=self.binaural_rendered, - binaural_dataset=self.binaural_dataset, - limit_output=self.limit_output, - ), - ] - ) - elif cond.startswith("ivas"): - definition["proc_chain"].extend( - [ - IVAS( - in_format=self.in_format, - bitrate=bitrate, - in_fs=self.in_fs, - **getattr(self, cond), - ), - PostProcessing( - in_format=getattr(self, cond)["out_format"], - out_format=self.renderer_format, - in_fs=getattr(self, cond)["dec_fs"], - out_fs=self.out_fs, - binaural_rendered=self.binaural_rendered, - binaural_dataset=self.binaural_dataset, - limit_output=self.limit_output, - ), - ] - ) - else: - raise SystemExit(f"Invalid condition: {cond}") - - return definition diff --git a/scripts/pyprocessing/utils.py b/scripts/pyprocessing/utils.py deleted file mode 100644 index e62840fd13..0000000000 --- a/scripts/pyprocessing/utils.py +++ /dev/null @@ -1,148 +0,0 @@ -#!/usr/bin/env python3 - -""" - (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository. All Rights Reserved. - - This software is protected by copyright law and by international treaties. - The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, - Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., - Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, - Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other - contributors to this repository retain full ownership rights in their respective contributions in - the software. This notice grants no license of any kind, including but not limited to patent - license, nor is any license granted by implication, estoppel or otherwise. - - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making - contributions. - - This software is provided "AS IS", without any express or implied warranties. The software is in the - development stage. It is intended exclusively for experts who have experience with such software and - solely for the purpose of inspection. All implied warranties of non-infringement, merchantability - and fitness for a particular purpose are hereby disclaimed and excluded. - - Any dispute, controversy or claim arising under or in relation to providing this software shall be - submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in - accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and - the United Nations Convention on Contracts on the International Sales of Goods. -""" - -import logging -import os -import platform -import shutil -from typing import Union - -""" -Directory/path handling -""" - -ALLOWED_INPUT_EXT = (".wav", ".pcm", ".txt") -main_logger = logging.getLogger("__main__") -logger = main_logger.getChild(__name__) -logger.setLevel(logging.DEBUG) - -# Creates a directory at the given path if it does not exist already -def create_dir(path: str) -> None: - if not os.path.exists(path): - os.makedirs(path) - - -def delete_dir(path: str) -> None: - if os.path.exists(path) and os.path.isdir(path): - logger.debug(f"Deleting path {path}") - shutil.rmtree(path) - - -class DirManager: - """ - Context manager that creates directories if not already present and - automatically cleans up (i.e. deletes) all specified paths - """ - - def __init__( - self, create_paths: Union[str, list], delete_paths: Union[str, list] = list() - ): - self.create_paths = ( - create_paths if type(create_paths) == list else [create_paths] - ) - self.delete_paths = ( - delete_paths if type(delete_paths) == list else [delete_paths] - ) - - def __enter__(self): - for path in self.create_paths: - create_dir(path) - - def __exit__(self, exc_type, exc_value, exc_traceback): - for path in self.delete_paths: - if path in self.create_paths: - delete_dir(path) - else: - print( - "Attempting to delete a tmp dir that was not in create_paths. Do not delete." - ) - - -def list_audio(path: str, absolute: bool = False, select_list: list = None) -> list: - """ - Return list with all files with ALLOWED_INPUT_EXT found under the given path. - - If path is a directory, all files in it are included, if it is a file, just the file - will be in the list. If a select list is provided, files are filtered accordingly. - """ - audio_list = list() - - if os.path.exists(path): - if os.path.isdir(path): - if absolute: - audio_list = [ - os.path.join(path, f) - for f in os.listdir(path) - if f.endswith(ALLOWED_INPUT_EXT) - ] - else: - audio_list = [ - f for f in os.listdir(path) if f.endswith(ALLOWED_INPUT_EXT) - ] - else: - if not absolute: - path = os.path.basename(path) - ext = os.path.splitext(path)[-1].lower() - if ext in ALLOWED_INPUT_EXT: - audio_list.append(path) - - # Filter according to select list - if select_list is not None: - if hasattr(select_list, "__len__") and not isinstance(select_list, str): - select_set = set([os.path.splitext(i)[0] for i in select_list]) - else: - select_set = [os.path.splitext(select_list)[0]] - - audio_list_orig = audio_list - audio_list = [] - for f in audio_list_orig: - f_name = os.path.splitext(os.path.basename(f))[0] - if any(x in f_name for x in select_set): - audio_list.append(f) - - return audio_list - - -def get_exec_path(path: str) -> str: - if platform.system() == "Windows" and os.path.splitext(path)[1] != ".exe": - exe = ".exe" - else: - exe = "" - - return f"{os.path.abspath(path)}{exe}" - - -def get_nickname(path: str) -> str: - nickname = os.path.join( - os.path.basename(os.path.dirname(path)), os.path.basename(path) - ) - return nickname diff --git a/tests/renderer/utils.py b/tests/renderer/utils.py index d2af91f60c..2d248a470a 100644 --- a/tests/renderer/utils.py +++ b/tests/renderer/utils.py @@ -29,8 +29,6 @@ import logging import subprocess as sp import sys -from pathlib import Path -from tempfile import TemporaryDirectory from typing import Optional, Tuple, Dict import numpy as np @@ -40,7 +38,6 @@ from .compare_audio import compare_audio_arrays from .constants import * sys.path.append(SCRIPTS_DIR) -import pyaudio3dtools # fixture returns test information, enabling per-testcase SNR @pytest.fixture @@ -197,56 +194,6 @@ def run_renderer( return pyaudio3dtools.audiofile.readfile(out_file) - -def run_pyscripts( - in_fmt, - out_fmt, - metadata_input: Optional[str] = None, - in_meta_files: Optional[list] = None, - trj_file: Optional[str] = None, - is_comparetest: Optional[bool] = False, -) -> Tuple[np.ndarray, int]: - """Reference creation with pyaudio3dtools""" - if trj_file is not None: - trj_name = f"_{trj_file.stem}" - else: - trj_name = "" - - if not isinstance(out_fmt, str): - out_name = f"{out_fmt.stem}" - else: - out_name = out_fmt - - if is_comparetest: - FORMAT_TO_FILE = FORMAT_TO_FILE_COMPARETEST - else: - FORMAT_TO_FILE = FORMAT_TO_FILE_SMOKETEST - - if metadata_input is not None: - in_file = metadata_input - in_name = metadata_input.stem - elif isinstance(in_fmt, Path): - in_file = FORMAT_TO_FILE[in_fmt.stem] - in_name = in_fmt.stem - else: - in_file = FORMAT_TO_FILE[in_fmt] - in_name = in_fmt - - out_file = str(OUTPUT_PATH_REF.joinpath(f"{in_name}_to_{out_name}{trj_name}.wav")) - - pyaudio3dtools.spatialaudioconvert.spatial_audio_convert( - in_file, - out_file, - in_format=in_fmt, - out_format=out_fmt, - in_meta_files=in_meta_files, - trajectory=trj_file, - limit_output=True, - ) - - return pyaudio3dtools.audiofile.readfile(out_file) - - def compare_renderer_vs_mergetarget(test_info, in_fmt, out_fmt, **kwargs): ref, ref_fs = run_renderer( in_fmt, -- GitLab From 19d5598404d85e5b747fad1a3fe8729cf2f47aea Mon Sep 17 00:00:00 2001 From: Archit Tamarapu Date: Fri, 5 May 2023 11:06:56 +0200 Subject: [PATCH 2/3] fix renderer tests --- tests/renderer/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/renderer/utils.py b/tests/renderer/utils.py index 2d248a470a..f494181249 100644 --- a/tests/renderer/utils.py +++ b/tests/renderer/utils.py @@ -29,6 +29,8 @@ import logging import subprocess as sp import sys +from pathlib import Path +from tempfile import TemporaryDirectory from typing import Optional, Tuple, Dict import numpy as np @@ -38,6 +40,7 @@ from .compare_audio import compare_audio_arrays from .constants import * sys.path.append(SCRIPTS_DIR) +import pyaudio3dtools # fixture returns test information, enabling per-testcase SNR @pytest.fixture -- GitLab From dddf9a0fcf1d57969331335346a094ba242a58d8 Mon Sep 17 00:00:00 2001 From: Archit Tamarapu Date: Fri, 5 May 2023 11:12:18 +0200 Subject: [PATCH 3/3] update printout --- scripts/pyaudio3dtools/audio3dtools.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/pyaudio3dtools/audio3dtools.py b/scripts/pyaudio3dtools/audio3dtools.py index abf1163041..cfb7acb9f5 100755 --- a/scripts/pyaudio3dtools/audio3dtools.py +++ b/scripts/pyaudio3dtools/audio3dtools.py @@ -36,8 +36,6 @@ import os from pyaudio3dtools import ( audiofile, - binauralrenderer, - spatialaudioconvert, spatialaudioformat, ) @@ -47,7 +45,7 @@ logger.setLevel(logging.DEBUG) def main(): - raise NotImplementedError( + print( f"These scripts have been deprecated! Please check out and use the latest version from https://forge.3gpp.org/rep/ivas-codec-pc/ivas-processing-scripts.git" ) -- GitLab