From 66b1c883738079db474f840c9b27f65c15ef6467 Mon Sep 17 00:00:00 2001 From: Archit Tamarapu Date: Mon, 24 Oct 2022 10:57:18 +0200 Subject: [PATCH 1/6] add a new generic function for multiprocessing and cleanup audio3dtools.py --- scripts/pyaudio3dtools/audio3dtools.py | 218 ++++++++++++------------- scripts/pyaudio3dtools/audioarray.py | 17 +- 2 files changed, 125 insertions(+), 110 deletions(-) diff --git a/scripts/pyaudio3dtools/audio3dtools.py b/scripts/pyaudio3dtools/audio3dtools.py index 99ae71351b..d459615eae 100644 --- a/scripts/pyaudio3dtools/audio3dtools.py +++ b/scripts/pyaudio3dtools/audio3dtools.py @@ -46,7 +46,114 @@ logger = main_logger.getChild(__name__) logger.setLevel(logging.DEBUG) -def main(): +def main(args): + # Set up logging handlers + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.INFO) + console_handler.setFormatter(logging.Formatter("%(message)s")) + + # Configure loggers + LOGGER_FORMAT = "%(asctime)s | %(name)-12s | %(levelname)-8s | %(message)s" + LOGGER_DATEFMT = "%m-%d %H:%M" + logging.basicConfig( + format=LOGGER_FORMAT, + datefmt=LOGGER_DATEFMT, + level=logging.INFO, + handlers=[console_handler], + ) + logger.info("Audio3DTools") + + if args.list is True or args.long is True: + logger.info("===Supported spatial audio formats===") + spatialaudioformat.Format.list_all(args.long) + + elif args.infiles is not None: + logger.info("===Convert spatial audio file===") + # Input folder can be a path, a file or a list of files + if os.path.isdir(args.infiles): + path = args.infiles + audio_list = [ + os.path.join(path, f) for f in os.listdir(path) if f.endswith((".wav")) + ] + else: + audio_list = [args.infiles] + + outdir = args.outdir + _, output_ext = os.path.splitext(os.path.basename(outdir)) + if (len(audio_list) == 1) and ( + (output_ext.lower() == ".wav") or (output_ext.lower() == ".pcm") + ): + outfile = outdir + else: + outfile = None + if not os.path.exists(outdir): + os.makedirs(outdir) + + for infile in audio_list: + logger.info(f" process {infile}") + + _, input_ext = os.path.splitext(os.path.basename(infile)) + + if outfile is None: + outfile = os.path.basename(infile) + if not args.dont_rename: + if args.outformat is not None: + outfile = outfile.replace(input_ext, f"_{args.outformat}.wav") + else: + outfile = outfile.replace(input_ext, ".out.wav") + outfile = os.path.join(outdir, outfile) + + spatialaudioconvert.spatial_audio_convert( + infile, + outfile, + in_format=args.informat, + in_fs=args.infs, + in_nchans=args.inchan, + in_meta_files=args.metadata, + in_ls_layout_file=args.layoutfile, + out_format=args.outformat, + out_fs=args.outfs, + out_fc=args.outfc, + output_loudness=args.normalize, + loudness_tool=args.loudness_tool, + trajectory=args.trajectory, + binaural_dataset=args.binaural_dataset, + ) + + logger.info(f" Output {outfile}") + + if args.binaural: + if args.outformat.startswith("BINAURAL"): + raise SystemExit( + "BINAURAL output format can not be binauralized again!" + ) + + _, output_ext = os.path.splitext(os.path.basename(outfile)) + outfile_bin = outfile.replace(output_ext, "_BINAURAL.wav") + logger.info(f" Output binaural {outfile_bin}") + + spatialaudioconvert.spatial_audio_convert( + in_file=outfile, + out_file=outfile_bin, + in_format=args.outformat, + in_fs=args.outfs, + in_meta_files=args.metadata, + in_ls_layout_file=args.layoutfile, + out_format="BINAURAL", + output_loudness=args.normalize, + loudness_tool=args.loudness_tool, + trajectory=args.trajectory, + binaural_dataset=args.binaural_dataset, + ) + + outfile = None + else: + raise Exception( + "Input file must be provided for conversion and audio manipulation." + ) + + +if __name__ == "__main__": parser = argparse.ArgumentParser( description="Audio3DTools: Convert/Manipulate spatial audio files." ) @@ -186,111 +293,4 @@ def main(): ) args = parser.parse_args() - # Set up logging handlers - console_handler = logging.StreamHandler() - console_handler.setLevel(logging.INFO) - console_handler.setFormatter(logging.Formatter("%(message)s")) - - # Configure loggers - LOGGER_FORMAT = "%(asctime)s | %(name)-12s | %(levelname)-8s | %(message)s" - LOGGER_DATEFMT = "%m-%d %H:%M" - logging.basicConfig( - format=LOGGER_FORMAT, - datefmt=LOGGER_DATEFMT, - level=logging.INFO, - handlers=[console_handler], - ) - logger.info("Audio3DTools") - - if args.list is True or args.long is True: - logger.info("===Supported spatial audio formats===") - spatialaudioformat.Format.list_all(args.long) - - elif args.infiles is not None: - logger.info("===Convert spatial audio file===") - # Input folder can be a path, a file or a list of files - if os.path.isdir(args.infiles): - path = args.infiles - audio_list = [ - os.path.join(path, f) for f in os.listdir(path) if f.endswith((".wav")) - ] - else: - audio_list = [args.infiles] - - outdir = args.outdir - _, output_ext = os.path.splitext(os.path.basename(outdir)) - if (len(audio_list) == 1) and ( - (output_ext.lower() == ".wav") or (output_ext.lower() == ".pcm") - ): - outfile = outdir - else: - outfile = None - if not os.path.exists(outdir): - os.makedirs(outdir) - - for infile in audio_list: - logger.info(f" process {infile}") - - _, input_ext = os.path.splitext(os.path.basename(infile)) - - if outfile is None: - outfile = os.path.basename(infile) - if not args.dont_rename: - if args.outformat is not None: - outfile = outfile.replace(input_ext, f"_{args.outformat}.wav") - else: - outfile = outfile.replace(input_ext, ".out.wav") - outfile = os.path.join(outdir, outfile) - - spatialaudioconvert.spatial_audio_convert( - infile, - outfile, - in_format=args.informat, - in_fs=args.infs, - in_nchans=args.inchan, - in_meta_files=args.metadata, - in_ls_layout_file=args.layoutfile, - out_format=args.outformat, - out_fs=args.outfs, - out_fc=args.outfc, - output_loudness=args.normalize, - loudness_tool=args.loudness_tool, - trajectory=args.trajectory, - binaural_dataset=args.binaural_dataset, - ) - - logger.info(f" Output {outfile}") - - if args.binaural: - if args.outformat.startswith("BINAURAL"): - raise SystemExit( - "BINAURAL output format can not be binauralized again!" - ) - - _, output_ext = os.path.splitext(os.path.basename(outfile)) - outfile_bin = outfile.replace(output_ext, "_BINAURAL.wav") - logger.info(f" Output binaural {outfile_bin}") - - spatialaudioconvert.spatial_audio_convert( - in_file=outfile, - out_file=outfile_bin, - in_format=args.outformat, - in_fs=args.outfs, - in_meta_files=args.metadata, - in_ls_layout_file=args.layoutfile, - out_format="BINAURAL", - output_loudness=args.normalize, - loudness_tool=args.loudness_tool, - trajectory=args.trajectory, - binaural_dataset=args.binaural_dataset, - ) - - outfile = None - else: - raise Exception( - "Input file must be provided for conversion and audio manipulation." - ) - - -if __name__ == "__main__": - main() + main(args) diff --git a/scripts/pyaudio3dtools/audioarray.py b/scripts/pyaudio3dtools/audioarray.py index 16569e1ec7..5dbc43f769 100644 --- a/scripts/pyaudio3dtools/audioarray.py +++ b/scripts/pyaudio3dtools/audioarray.py @@ -32,9 +32,10 @@ import logging import math -from typing import Optional, Tuple +from typing import Callable, Iterable, Optional, Tuple import numpy as np +import multiprocessing as mp import scipy.signal as sig main_logger = logging.getLogger("__main__") @@ -430,3 +431,17 @@ def get_framewise(x: np.ndarray, chunk_size: int) -> np.ndarray: yield x[i * chunk_size : (i + 1) * chunk_size, :] if x.shape[0] % chunk_size: yield x[n_frames * chunk_size :, :] + + +def process_async(files: Iterable, func: Callable, **kwargs): + """Applies a function asynchronously to an array of audio files/filenames using a multiprocessing pool""" + + p = mp.pool(mp.cpu_count()) + results = [] + for f in files: + results.append(p.apply_async(func, args=(f, kwargs))) + p.close() + p.join() + for r in results: + r.get() + return results -- GitLab From e3c5bb7fdfa409238898f38d7a7b25effeaf0694 Mon Sep 17 00:00:00 2001 From: Archit Tamarapu Date: Wed, 26 Oct 2022 14:34:22 +0200 Subject: [PATCH 2/6] [pyaudio3dtools] fix PCM support for ISM input --- scripts/pyaudio3dtools/spatialaudioconvert.py | 2 +- scripts/pyaudio3dtools/spatialmetadata.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/scripts/pyaudio3dtools/spatialaudioconvert.py b/scripts/pyaudio3dtools/spatialaudioconvert.py index 3d71dc0e6f..29ac5f5d79 100644 --- a/scripts/pyaudio3dtools/spatialaudioconvert.py +++ b/scripts/pyaudio3dtools/spatialaudioconvert.py @@ -196,7 +196,7 @@ def spatial_audio_convert( # initialise metadata object for ISM metadata_obj = spatialmetadata.Metadata() - metadata_obj.init_for_ism(in_file, in_fs, in_meta_files) + metadata_obj.init_for_ism(in_file, in_fs, in_nchans, in_meta_files) # TODO alternative paths for binaural rendering for now if out_format.startswith("BINAURAL_ROOM"): diff --git a/scripts/pyaudio3dtools/spatialmetadata.py b/scripts/pyaudio3dtools/spatialmetadata.py index 829bd298bc..9fa42a2e06 100644 --- a/scripts/pyaudio3dtools/spatialmetadata.py +++ b/scripts/pyaudio3dtools/spatialmetadata.py @@ -211,13 +211,13 @@ class Metadata: for object_index in range(self.nb_objects): print(f" Object #{object_index} Type: {self.objects[object_index]}") - def _append_audio_array(self, audio_wav=None, fs=48000, object_index=None): + def _append_audio_array(self, audio_wav=None, fs=48000, nchan=1, object_index=None): if audio_wav is None: audio_wav = self.audio_wav[-1] if object_index is None: object_index = -1 - x, fs = audiofile.readfile(audio_wav, fs=fs) + x, fs = audiofile.readfile(audio_wav, fs=fs, nchannels=nchan) logger.debug(f"Append {audio_wav}: {x.shape[0]} by {x.shape[1]}") # Select appropriate channels & resample if necessary @@ -245,6 +245,7 @@ class Metadata: self, in_file: str, in_fs: int, + in_nchan: int, metadata_files: list, ) -> None: self.audio_wav.append(in_file) @@ -252,7 +253,7 @@ class Metadata: for csv in metadata_files: self.objects.append(read_ism_ivas_data(csv, object_index=self.nb_objects)) self.objects[-1]["track_index"] = self.nb_objects - self._append_audio_array(self.audio_wav[-1], fs=in_fs) + self._append_audio_array(self.audio_wav[-1], fs=in_fs, nchan=in_nchan) self.nb_objects += 1 # Get audio array with sampling rate -- GitLab From 81a87199b60d10b575f1818c1f8a49111f314e90 Mon Sep 17 00:00:00 2001 From: Archit Tamarapu Date: Wed, 26 Oct 2022 14:47:49 +0200 Subject: [PATCH 3/6] [pyaudio3dtools] add a warning and automatically adjust when a very low sampling rate is specified --- scripts/pyaudio3dtools/audio3dtools.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/scripts/pyaudio3dtools/audio3dtools.py b/scripts/pyaudio3dtools/audio3dtools.py index d459615eae..e9feccce80 100644 --- a/scripts/pyaudio3dtools/audio3dtools.py +++ b/scripts/pyaudio3dtools/audio3dtools.py @@ -33,6 +33,7 @@ import argparse import logging import os +import warnings from pyaudio3dtools import ( audiofile, @@ -63,6 +64,18 @@ def main(args): ) logger.info("Audio3DTools") + if args.infs is not None and args.infs < 1000: + warnings.warn( + f"Input sampling rate specified as {args.infs} Hz! Assuming {args.infs*1000} Hz" + ) + args.infs *= 1000 + + if args.outfs is not None and args.outfs < 1000: + warnings.warn( + f"Input sampling rate specified as {args.outfs} Hz! Assuming {args.outfs*1000} Hz" + ) + args.outfs *= 1000 + if args.list is True or args.long is True: logger.info("===Supported spatial audio formats===") spatialaudioformat.Format.list_all(args.long) -- GitLab From e5ef00eeac582354b52f6e9d0b63c31a5e1c9d0f Mon Sep 17 00:00:00 2001 From: Archit Tamarapu Date: Fri, 28 Oct 2022 10:59:17 +0200 Subject: [PATCH 4/6] fix for binaural rendering in generate_test_items.py --- scripts/pyprocessing/prepost_processing.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/pyprocessing/prepost_processing.py b/scripts/pyprocessing/prepost_processing.py index dee95b77a6..a2c014b6a9 100644 --- a/scripts/pyprocessing/prepost_processing.py +++ b/scripts/pyprocessing/prepost_processing.py @@ -161,7 +161,8 @@ class PostProcessing(Processing): out_sig, fs = audiofile.readfile(output_path) bin_sig = binauralrenderer.binaural_rendering( out_sig, - self.out_spfmt.name, + self.out_spfmt, + spatialaudioformat.Format("BINAURAL"), fs=fs, include_LFE=self.bin_rend_include_LFE, LFE_gain=self.bin_rend_LFE_gain, -- GitLab From e33c9bf6735fa3a65d5bf0c72fde4a24edfb5b03 Mon Sep 17 00:00:00 2001 From: Archit Tamarapu Date: Fri, 11 Nov 2022 16:34:46 +0100 Subject: [PATCH 5/6] [scripts] fix for ISM rendering when input file has extra samples - fix a missing comma in cicp13.txt ls layout file - correctly set executable bit on some python files --- scripts/ls_layouts/cicp13.txt | 6 +++--- scripts/pyaudio3dtools/audio3dtools.py | 0 scripts/pyaudio3dtools/audioarray.py | 10 ++++++++-- scripts/pyaudio3dtools/audiofile.py | 0 scripts/pyaudio3dtools/spatialaudioconvert.py | 7 +++++++ scripts/pyivastest/IvasModeRunner.py | 0 6 files changed, 18 insertions(+), 5 deletions(-) mode change 100644 => 100755 scripts/pyaudio3dtools/audio3dtools.py mode change 100755 => 100644 scripts/pyaudio3dtools/audiofile.py mode change 100755 => 100644 scripts/pyivastest/IvasModeRunner.py diff --git a/scripts/ls_layouts/cicp13.txt b/scripts/ls_layouts/cicp13.txt index 5ff15f86c1..d0510c1d8f 100644 --- a/scripts/ls_layouts/cicp13.txt +++ b/scripts/ls_layouts/cicp13.txt @@ -1,3 +1,3 @@ -0, 30 -30, 60, -60, 90, -90, 135, -135, 180, 0, 45, -45, 90, -90, 0, 135, -135, 180, 0, 45, -45 -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35, 35, 35, 35, 35, 90, 35, 35, 35, -15, -15, -15 -3, 9 \ No newline at end of file +0, 30, -30, 60, -60, 90, -90, 135, -135, 180, 0, 45, -45, 90, -90, 0, 135, -135, 180, 0, 45, -45 +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35, 35, 35, 35, 35, 90, 35, 35, 35, -15, -15, -15 +3, 9 diff --git a/scripts/pyaudio3dtools/audio3dtools.py b/scripts/pyaudio3dtools/audio3dtools.py old mode 100644 new mode 100755 diff --git a/scripts/pyaudio3dtools/audioarray.py b/scripts/pyaudio3dtools/audioarray.py index 0a918e5f1d..1906e82033 100644 --- a/scripts/pyaudio3dtools/audioarray.py +++ b/scripts/pyaudio3dtools/audioarray.py @@ -410,7 +410,7 @@ def limiter(x: np.ndarray, fs: int): fr_sig[idx_min] = -32768 -def get_framewise(x: np.ndarray, chunk_size: int) -> np.ndarray: +def get_framewise(x: np.ndarray, chunk_size: int, zero_pad=False) -> np.ndarray: """Generator to yield a signal frame by frame If array size is not a multiple of chunk_size, last frame contains the remainder @@ -420,6 +420,8 @@ def get_framewise(x: np.ndarray, chunk_size: int) -> np.ndarray: Input reference array chunk_size: int Size of frames to yield + zero_pad: bool + Whether to zero pad the last chunk if there are not enough samples Yields ------- @@ -430,7 +432,11 @@ def get_framewise(x: np.ndarray, chunk_size: int) -> np.ndarray: for i in range(n_frames): yield x[i * chunk_size : (i + 1) * chunk_size, :] if x.shape[0] % chunk_size: - yield x[n_frames * chunk_size :, :] + last_chunk = x[n_frames * chunk_size :, :] + if zero_pad: + yield np.pad(last_chunk, [[0, x.shape[0] % chunk_size], [0, 0]]) + else: + yield last_chunk def process_async(files: Iterable, func: Callable, **kwargs): diff --git a/scripts/pyaudio3dtools/audiofile.py b/scripts/pyaudio3dtools/audiofile.py old mode 100755 new mode 100644 diff --git a/scripts/pyaudio3dtools/spatialaudioconvert.py b/scripts/pyaudio3dtools/spatialaudioconvert.py index 430d362ab2..1ed144943f 100644 --- a/scripts/pyaudio3dtools/spatialaudioconvert.py +++ b/scripts/pyaudio3dtools/spatialaudioconvert.py @@ -426,6 +426,13 @@ def convert_ism( audioarray.get_framewise(out_sig, frame_len), ) ): + # update the crossfade if we have a smaller last frame + if out_frame.shape[0] != frame_len: + frame_size = out_frame.shape[0] + fade_in = np.arange(frame_size) / (frame_size - 1) + fade_in = fade_in[:, np.newaxis] + fade_out = 1.0 - fade_in + pos = EFAP.wrap_angles(*pos_data[i_frame % pos_frames, :], clip_ele=True) # ISM -> MC diff --git a/scripts/pyivastest/IvasModeRunner.py b/scripts/pyivastest/IvasModeRunner.py old mode 100755 new mode 100644 -- GitLab From 4759c7bb4aabbc11d952f2f5ec94c24863941463 Mon Sep 17 00:00:00 2001 From: Treffehn Date: Fri, 25 Nov 2022 16:19:38 +0100 Subject: [PATCH 6/6] fixed get framewise --- scripts/pyaudio3dtools/audioarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/pyaudio3dtools/audioarray.py b/scripts/pyaudio3dtools/audioarray.py index 1906e82033..917cdf59c6 100644 --- a/scripts/pyaudio3dtools/audioarray.py +++ b/scripts/pyaudio3dtools/audioarray.py @@ -434,7 +434,7 @@ def get_framewise(x: np.ndarray, chunk_size: int, zero_pad=False) -> np.ndarray: if x.shape[0] % chunk_size: last_chunk = x[n_frames * chunk_size :, :] if zero_pad: - yield np.pad(last_chunk, [[0, x.shape[0] % chunk_size], [0, 0]]) + yield np.pad(last_chunk, [[0, chunk_size - (x.shape[0] % chunk_size)], [0, 0]]) else: yield last_chunk -- GitLab