From 18a46e06aa059f001e0a542dc9a3a7e6a8803f4b Mon Sep 17 00:00:00 2001 From: Jan Kiene Date: Tue, 10 Feb 2026 13:14:42 +0100 Subject: [PATCH 1/3] write out tmp files for wav-diff in 24bit format --- scripts/pyaudio3dtools/audioarray.py | 206 ++++++++++++++++----------- tests/cmp_pcm.py | 7 +- 2 files changed, 130 insertions(+), 83 deletions(-) diff --git a/scripts/pyaudio3dtools/audioarray.py b/scripts/pyaudio3dtools/audioarray.py index 7793e978c1..c08252cc36 100644 --- a/scripts/pyaudio3dtools/audioarray.py +++ b/scripts/pyaudio3dtools/audioarray.py @@ -38,6 +38,9 @@ import platform import shutil import subprocess import tempfile +import wave +import struct +import itertools from pathlib import Path from enum import Enum from typing import Callable, Iterable, Optional, Tuple, Union @@ -46,6 +49,8 @@ import numpy as np import scipy.io.wavfile as wavfile import scipy.signal as sig +from . import audiofile + main_logger = logging.getLogger("__main__") logger = main_logger.getChild(__name__) logger.setLevel(logging.DEBUG) @@ -329,83 +334,16 @@ def compare( result["nframes_diff"] = 0 result["nframes_diff_percentage"] = 0.0 + assert ref.shape == test.shape + if ref.ndim == 1: + nsamples_total = ref.shape + nchannels = 1 + else: + nsamples_total, nchannels = ref.shape + # MLD (wav-diff) tool is run first, since it uses the input signals without length difference check for JBM test cases. if get_mld: - - def parse_wav_diff(proc: subprocess.CompletedProcess) -> float: - if proc.returncode: - raise ChildProcessError(f"{proc.stderr}\n{proc.stdout}") - line = proc.stdout.splitlines()[-1].strip() - start = line.find(">") + 1 - stop = line.rfind("<") - mld = float(line[start:stop].strip()) - - return mld - - mld_max = 0 - toolsdir = Path(__file__).parent.parent.joinpath("tools") - - curr_platform = platform.system() - if curr_platform not in {"Windows", "Linux", "Darwin"}: - raise NotImplementedError( - f"wav-diff tool not available for {curr_platform}" - ) - - search_path = toolsdir.joinpath(curr_platform.replace("Windows", "Win32")) - wdiff = search_path.joinpath("wav-diff").with_suffix( - ".exe" if curr_platform == "Windows" else "" - ) - - if not wdiff.exists(): - wdiff = shutil.which("wav-diff") - if wdiff is None: - raise FileNotFoundError( - f"wav-diff tool not found in {search_path} or PATH!" - ) - - with tempfile.TemporaryDirectory() as tmpdir: - tmpfile_ref = Path(tmpdir).joinpath("ref.wav") - tmpfile_test = Path(tmpdir).joinpath("test.wav") - - ### need to resample to 48kHz for MLD computation to be correct - ### write out and delete tmp variables to reduce memory usage - if fs != 48000: - ref_tmp = np.clip( - resample(ref.astype(float), fs, 48000), -32768, 32767 - ).astype(np.int16) - wavfile.write(str(tmpfile_ref), 48000, ref_tmp) - del ref_tmp - test_tmp = np.clip( - resample(test.astype(float), fs, 48000), -32768, 32767 - ).astype(np.int16) - wavfile.write(str(tmpfile_test), 48000, test_tmp) - del test_tmp - else: - wavfile.write(str(tmpfile_ref), 48000, ref.astype(np.int16)) - wavfile.write(str(tmpfile_test), 48000, test.astype(np.int16)) - - cmd = [ - str(wdiff), - "--print-ctest-measurement", - # wav-diff return code is 1 if differences are found which - # would cause parse_wav_diff to raise an Exception on these cases - "--no-fail", - str(tmpfile_ref), - str(tmpfile_test), - ] - if ref_jbm_tf and test_jbm_tf: - cmd.extend( - [ - "--ref-jbm-trace", - str(ref_jbm_tf), - "--cut-jbm-trace", - str(test_jbm_tf), - ] - ) - proc = subprocess.run(cmd, capture_output=True, text=True) - mld_max = parse_wav_diff(proc) - - result["MLD"] = mld_max + result["MLD"] = run_wavdiff(ref, test, fs, nchannels, ref_jbm_tf, test_jbm_tf) # Run remanining tests after checking if the lenght differs @@ -440,11 +378,6 @@ def compare( max_diff = int(diff.max()) if max_diff != 0: - if diff.ndim == 1: - nsamples_total = diff.shape - nchannels = 1 - else: - nsamples_total, nchannels = diff.shape max_diff_pos = np.nonzero(diff == max_diff) max_diff_pos = [ max_diff_pos[0][0], @@ -499,6 +432,119 @@ def compare( return result +def parse_wav_diff(proc: subprocess.CompletedProcess) -> float: + if proc.returncode: + raise ChildProcessError(f"{proc.stderr}\n{proc.stdout}") + line = proc.stdout.splitlines()[-1].strip() + start = line.find(">") + 1 + stop = line.rfind("<") + mld = float(line[start:stop].strip()) + + return mld + + +def run_wavdiff( + ref: np.ndarray, test: np.ndarray, fs, nchannels, ref_jbm_tf, test_jbm_tf +) -> float: + mld_max = 0 + + toolsdir = Path(__file__).parent.parent.joinpath("tools") + + curr_platform = platform.system() + if curr_platform not in {"Windows", "Linux", "Darwin"}: + raise NotImplementedError(f"wav-diff tool not available for {curr_platform}") + + search_path = toolsdir.joinpath(curr_platform.replace("Windows", "Win32")) + wdiff = search_path.joinpath("wav-diff").with_suffix( + ".exe" if curr_platform == "Windows" else "" + ) + + if not wdiff.exists(): + wdiff = shutil.which("wav-diff") + if wdiff is None: + raise FileNotFoundError( + f"wav-diff tool not found in {search_path} or PATH!" + ) + + with tempfile.TemporaryDirectory() as tmpdir: + tmpfile_ref = Path(tmpdir).joinpath("ref.wav") + tmpfile_test = Path(tmpdir).joinpath("test.wav") + + ### need to resample to 48kHz for MLD computation to be correct + ### write out and delete tmp variables to reduce memory usage + ### write 24bit wav files to handle cases where scaling or resampling + ### goes outside of 16bit range + ref_tmp = ref.astype(np.int32) + test_tmp = test.astype(np.int32) + + if fs != 48000: + ref_tmp = np.clip( + resample(ref.astype(float), fs, 48000), -32768, 32767 + ).astype(np.int32) + test_tmp = np.clip( + resample(test.astype(float), fs, 48000), -32768, 32767 + ).astype(np.int32) + + bytes_per_sample = 3 + with wave.open(str(tmpfile_ref), mode="wb") as ref_tmp_wav: + data_bytes = ref_tmp.astype(" int: """Get the delay between two audio signals diff --git a/tests/cmp_pcm.py b/tests/cmp_pcm.py index a646e20ae5..32cc3c28aa 100755 --- a/tests/cmp_pcm.py +++ b/tests/cmp_pcm.py @@ -96,9 +96,10 @@ def cmp_pcm( for s1, s2 in zip(np.split(s1, split_idx), np.split(s2, split_idx)): # Apply scalefac if specified. Useful in case scaling has been applied on the input, and the inverse is scaling is supplied in scalefac. - if scalefac != 1: - s1 = np.round(s1 * scalefac, 0) # Need rounding for max abs diff search - s2 = np.round(s2 * scalefac, 0) + # Need rounding for max abs diff search + # This has the side-effect of chaning the dtype of the arrays to float + s1 = np.round(s1 * scalefac, 0) + s2 = np.round(s2 * scalefac, 0) cmp_result = pyaudio3dtools.audioarray.compare( s1, -- GitLab From ab3bfd3a2e100a790fa9855500fd511b784194e9 Mon Sep 17 00:00:00 2001 From: Jan Kiene Date: Tue, 10 Feb 2026 13:22:08 +0100 Subject: [PATCH 2/3] cleanup --- scripts/pyaudio3dtools/audioarray.py | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/scripts/pyaudio3dtools/audioarray.py b/scripts/pyaudio3dtools/audioarray.py index c08252cc36..c556971f16 100644 --- a/scripts/pyaudio3dtools/audioarray.py +++ b/scripts/pyaudio3dtools/audioarray.py @@ -39,17 +39,14 @@ import shutil import subprocess import tempfile import wave -import struct import itertools from pathlib import Path from enum import Enum from typing import Callable, Iterable, Optional, Tuple, Union import numpy as np -import scipy.io.wavfile as wavfile import scipy.signal as sig -from . import audiofile main_logger = logging.getLogger("__main__") logger = main_logger.getChild(__name__) @@ -478,12 +475,8 @@ def run_wavdiff( test_tmp = test.astype(np.int32) if fs != 48000: - ref_tmp = np.clip( - resample(ref.astype(float), fs, 48000), -32768, 32767 - ).astype(np.int32) - test_tmp = np.clip( - resample(test.astype(float), fs, 48000), -32768, 32767 - ).astype(np.int32) + ref_tmp = resample(ref.astype(float), fs, 48000).astype(np.int32) + test_tmp = resample(test.astype(float), fs, 48000).astype(np.int32) bytes_per_sample = 3 with wave.open(str(tmpfile_ref), mode="wb") as ref_tmp_wav: @@ -500,10 +493,6 @@ def run_wavdiff( ) ref_tmp_wav.writeframes(data_bytes) - # for i in range(0, len(data_bytes), 4): - # chunk = data_bytes[i : i + bytes_per_sample] - # ref_tmp_wav.writeframes(chunk) - with wave.open(str(tmpfile_test), mode="wb") as test_tmp_wav: data_bytes = test_tmp.astype(" Date: Tue, 10 Feb 2026 14:52:52 +0100 Subject: [PATCH 3/3] set sampling rate correctly in run_wavdiff --- scripts/pyaudio3dtools/audioarray.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/pyaudio3dtools/audioarray.py b/scripts/pyaudio3dtools/audioarray.py index c556971f16..0f405a023f 100644 --- a/scripts/pyaudio3dtools/audioarray.py +++ b/scripts/pyaudio3dtools/audioarray.py @@ -478,6 +478,8 @@ def run_wavdiff( ref_tmp = resample(ref.astype(float), fs, 48000).astype(np.int32) test_tmp = resample(test.astype(float), fs, 48000).astype(np.int32) + fs = 48000 + bytes_per_sample = 3 with wave.open(str(tmpfile_ref), mode="wb") as ref_tmp_wav: data_bytes = ref_tmp.astype("