From 18a46e06aa059f001e0a542dc9a3a7e6a8803f4b Mon Sep 17 00:00:00 2001
From: Jan Kiene <jan.kiene@iis.fraunhofer.de>
Date: Tue, 10 Feb 2026 13:14:42 +0100
Subject: [PATCH 1/3] write out tmp files for wav-diff in 24bit format

---
 scripts/pyaudio3dtools/audioarray.py | 206 ++++++++++++++++-----------
 tests/cmp_pcm.py                     |   7 +-
 2 files changed, 130 insertions(+), 83 deletions(-)

diff --git a/scripts/pyaudio3dtools/audioarray.py b/scripts/pyaudio3dtools/audioarray.py
index 7793e978c1..c08252cc36 100644
--- a/scripts/pyaudio3dtools/audioarray.py
+++ b/scripts/pyaudio3dtools/audioarray.py
@@ -38,6 +38,9 @@ import platform
 import shutil
 import subprocess
 import tempfile
+import wave
+import struct
+import itertools
 from pathlib import Path
 from enum import Enum
 from typing import Callable, Iterable, Optional, Tuple, Union
@@ -46,6 +49,8 @@ import numpy as np
 import scipy.io.wavfile as wavfile
 import scipy.signal as sig
 
+from . import audiofile
+
 main_logger = logging.getLogger("__main__")
 logger = main_logger.getChild(__name__)
 logger.setLevel(logging.DEBUG)
@@ -329,83 +334,16 @@ def compare(
         result["nframes_diff"] = 0
         result["nframes_diff_percentage"] = 0.0
 
+    assert ref.shape == test.shape
+    if ref.ndim == 1:
+        nsamples_total = ref.shape
+        nchannels = 1
+    else:
+        nsamples_total, nchannels = ref.shape
+
     # MLD (wav-diff) tool is run first, since it uses the input signals without length difference check for JBM test cases.
     if get_mld:
-
-        def parse_wav_diff(proc: subprocess.CompletedProcess) -> float:
-            if proc.returncode:
-                raise ChildProcessError(f"{proc.stderr}\n{proc.stdout}")
-            line = proc.stdout.splitlines()[-1].strip()
-            start = line.find(">") + 1
-            stop = line.rfind("<")
-            mld = float(line[start:stop].strip())
-
-            return mld
-
-        mld_max = 0
-        toolsdir = Path(__file__).parent.parent.joinpath("tools")
-
-        curr_platform = platform.system()
-        if curr_platform not in {"Windows", "Linux", "Darwin"}:
-            raise NotImplementedError(
-                f"wav-diff tool not available for {curr_platform}"
-            )
-
-        search_path = toolsdir.joinpath(curr_platform.replace("Windows", "Win32"))
-        wdiff = search_path.joinpath("wav-diff").with_suffix(
-            ".exe" if curr_platform == "Windows" else ""
-        )
-
-        if not wdiff.exists():
-            wdiff = shutil.which("wav-diff")
-            if wdiff is None:
-                raise FileNotFoundError(
-                    f"wav-diff tool not found in {search_path} or PATH!"
-                )
-
-        with tempfile.TemporaryDirectory() as tmpdir:
-            tmpfile_ref = Path(tmpdir).joinpath("ref.wav")
-            tmpfile_test = Path(tmpdir).joinpath("test.wav")
-
-            ### need to resample to 48kHz for MLD computation to be correct
-            ### write out and delete tmp variables to reduce memory usage
-            if fs != 48000:
-                ref_tmp = np.clip(
-                    resample(ref.astype(float), fs, 48000), -32768, 32767
-                ).astype(np.int16)
-                wavfile.write(str(tmpfile_ref), 48000, ref_tmp)
-                del ref_tmp
-                test_tmp = np.clip(
-                    resample(test.astype(float), fs, 48000), -32768, 32767
-                ).astype(np.int16)
-                wavfile.write(str(tmpfile_test), 48000, test_tmp)
-                del test_tmp
-            else:
-                wavfile.write(str(tmpfile_ref), 48000, ref.astype(np.int16))
-                wavfile.write(str(tmpfile_test), 48000, test.astype(np.int16))
-
-            cmd = [
-                str(wdiff),
-                "--print-ctest-measurement",
-                # wav-diff return code is 1 if differences are found which
-                # would cause parse_wav_diff to raise an Exception on these cases
-                "--no-fail",
-                str(tmpfile_ref),
-                str(tmpfile_test),
-            ]
-            if ref_jbm_tf and test_jbm_tf:
-                cmd.extend(
-                    [
-                        "--ref-jbm-trace",
-                        str(ref_jbm_tf),
-                        "--cut-jbm-trace",
-                        str(test_jbm_tf),
-                    ]
-                )
-            proc = subprocess.run(cmd, capture_output=True, text=True)
-            mld_max = parse_wav_diff(proc)
-
-        result["MLD"] = mld_max
+        result["MLD"] = run_wavdiff(ref, test, fs, nchannels, ref_jbm_tf, test_jbm_tf)
 
     # Run remanining tests after checking if the lenght differs
 
@@ -440,11 +378,6 @@ def compare(
     max_diff = int(diff.max())
 
     if max_diff != 0:
-        if diff.ndim == 1:
-            nsamples_total = diff.shape
-            nchannels = 1
-        else:
-            nsamples_total, nchannels = diff.shape
         max_diff_pos = np.nonzero(diff == max_diff)
         max_diff_pos = [
             max_diff_pos[0][0],
@@ -499,6 +432,119 @@ def compare(
     return result
 
 
+def parse_wav_diff(proc: subprocess.CompletedProcess) -> float:
+    if proc.returncode:
+        raise ChildProcessError(f"{proc.stderr}\n{proc.stdout}")
+    line = proc.stdout.splitlines()[-1].strip()
+    start = line.find(">") + 1
+    stop = line.rfind("<")
+    mld = float(line[start:stop].strip())
+
+    return mld
+
+
+def run_wavdiff(
+    ref: np.ndarray, test: np.ndarray, fs, nchannels, ref_jbm_tf, test_jbm_tf
+) -> float:
+    mld_max = 0
+
+    toolsdir = Path(__file__).parent.parent.joinpath("tools")
+
+    curr_platform = platform.system()
+    if curr_platform not in {"Windows", "Linux", "Darwin"}:
+        raise NotImplementedError(f"wav-diff tool not available for {curr_platform}")
+
+    search_path = toolsdir.joinpath(curr_platform.replace("Windows", "Win32"))
+    wdiff = search_path.joinpath("wav-diff").with_suffix(
+        ".exe" if curr_platform == "Windows" else ""
+    )
+
+    if not wdiff.exists():
+        wdiff = shutil.which("wav-diff")
+        if wdiff is None:
+            raise FileNotFoundError(
+                f"wav-diff tool not found in {search_path} or PATH!"
+            )
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpfile_ref = Path(tmpdir).joinpath("ref.wav")
+        tmpfile_test = Path(tmpdir).joinpath("test.wav")
+
+        ### need to resample to 48kHz for MLD computation to be correct
+        ### write out and delete tmp variables to reduce memory usage
+        ### write 24bit wav files to handle cases where scaling or resampling
+        ### goes outside of 16bit range
+        ref_tmp = ref.astype(np.int32)
+        test_tmp = test.astype(np.int32)
+
+        if fs != 48000:
+            ref_tmp = np.clip(
+                resample(ref.astype(float), fs, 48000), -32768, 32767
+            ).astype(np.int32)
+            test_tmp = np.clip(
+                resample(test.astype(float), fs, 48000), -32768, 32767
+            ).astype(np.int32)
+
+        bytes_per_sample = 3
+        with wave.open(str(tmpfile_ref), mode="wb") as ref_tmp_wav:
+            data_bytes = ref_tmp.astype("<i").tobytes()
+            ref_tmp_wav.setnchannels(nchannels)
+            ref_tmp_wav.setsampwidth(bytes_per_sample)
+            ref_tmp_wav.setframerate(fs)
+
+            # only take first three bytes per 32 bit chunk
+            data_bytes = bytes(
+                itertools.chain.from_iterable(
+                    [data_bytes[i : i + 3] for i in range(0, len(data_bytes), 4)]
+                )
+            )
+            ref_tmp_wav.writeframes(data_bytes)
+
+            # for i in range(0, len(data_bytes), 4):
+            #     chunk = data_bytes[i : i + bytes_per_sample]
+            #     ref_tmp_wav.writeframes(chunk)
+
+        with wave.open(str(tmpfile_test), mode="wb") as test_tmp_wav:
+            data_bytes = test_tmp.astype("<i").tobytes()
+            test_tmp_wav.setnchannels(nchannels)
+            test_tmp_wav.setsampwidth(bytes_per_sample)
+            test_tmp_wav.setframerate(fs)
+
+            # only take first three bytes per 32 bit chunk
+            data_bytes = bytes(
+                itertools.chain.from_iterable(
+                    [data_bytes[i : i + 3] for i in range(0, len(data_bytes), 4)]
+                )
+            )
+            test_tmp_wav.writeframes(data_bytes)
+
+        del ref_tmp
+        del test_tmp
+
+        cmd = [
+            str(wdiff),
+            "--print-ctest-measurement",
+            # wav-diff return code is 1 if differences are found which
+            # would cause parse_wav_diff to raise an Exception on these cases
+            "--no-fail",
+            str(tmpfile_ref),
+            str(tmpfile_test),
+        ]
+        if ref_jbm_tf and test_jbm_tf:
+            cmd.extend(
+                [
+                    "--ref-jbm-trace",
+                    str(ref_jbm_tf),
+                    "--cut-jbm-trace",
+                    str(test_jbm_tf),
+                ]
+            )
+        proc = subprocess.run(cmd, capture_output=True, text=True)
+        mld_max = parse_wav_diff(proc)
+
+    return mld_max
+
+
 def getdelay(x: np.ndarray, y: np.ndarray) -> int:
     """Get the delay between two audio signals
 
diff --git a/tests/cmp_pcm.py b/tests/cmp_pcm.py
index a646e20ae5..32cc3c28aa 100755
--- a/tests/cmp_pcm.py
+++ b/tests/cmp_pcm.py
@@ -96,9 +96,10 @@ def cmp_pcm(
 
     for s1, s2 in zip(np.split(s1, split_idx), np.split(s2, split_idx)):
         # Apply scalefac if specified. Useful in case scaling has been applied on the input, and the inverse is scaling is supplied in scalefac.
-        if scalefac != 1:
-            s1 = np.round(s1 * scalefac, 0)  # Need rounding for max abs diff search
-            s2 = np.round(s2 * scalefac, 0)
+        # Need rounding for max abs diff search
+        # This has the side-effect of chaning the dtype of the arrays to float
+        s1 = np.round(s1 * scalefac, 0)
+        s2 = np.round(s2 * scalefac, 0)
 
         cmp_result = pyaudio3dtools.audioarray.compare(
             s1,
-- 
GitLab


From ab3bfd3a2e100a790fa9855500fd511b784194e9 Mon Sep 17 00:00:00 2001
From: Jan Kiene <jan.kiene@iis.fraunhofer.de>
Date: Tue, 10 Feb 2026 13:22:08 +0100
Subject: [PATCH 2/3] cleanup

---
 scripts/pyaudio3dtools/audioarray.py | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/scripts/pyaudio3dtools/audioarray.py b/scripts/pyaudio3dtools/audioarray.py
index c08252cc36..c556971f16 100644
--- a/scripts/pyaudio3dtools/audioarray.py
+++ b/scripts/pyaudio3dtools/audioarray.py
@@ -39,17 +39,14 @@ import shutil
 import subprocess
 import tempfile
 import wave
-import struct
 import itertools
 from pathlib import Path
 from enum import Enum
 from typing import Callable, Iterable, Optional, Tuple, Union
 
 import numpy as np
-import scipy.io.wavfile as wavfile
 import scipy.signal as sig
 
-from . import audiofile
 
 main_logger = logging.getLogger("__main__")
 logger = main_logger.getChild(__name__)
@@ -478,12 +475,8 @@ def run_wavdiff(
         test_tmp = test.astype(np.int32)
 
         if fs != 48000:
-            ref_tmp = np.clip(
-                resample(ref.astype(float), fs, 48000), -32768, 32767
-            ).astype(np.int32)
-            test_tmp = np.clip(
-                resample(test.astype(float), fs, 48000), -32768, 32767
-            ).astype(np.int32)
+            ref_tmp = resample(ref.astype(float), fs, 48000).astype(np.int32)
+            test_tmp = resample(test.astype(float), fs, 48000).astype(np.int32)
 
         bytes_per_sample = 3
         with wave.open(str(tmpfile_ref), mode="wb") as ref_tmp_wav:
@@ -500,10 +493,6 @@ def run_wavdiff(
             )
             ref_tmp_wav.writeframes(data_bytes)
 
-            # for i in range(0, len(data_bytes), 4):
-            #     chunk = data_bytes[i : i + bytes_per_sample]
-            #     ref_tmp_wav.writeframes(chunk)
-
         with wave.open(str(tmpfile_test), mode="wb") as test_tmp_wav:
             data_bytes = test_tmp.astype("<i").tobytes()
             test_tmp_wav.setnchannels(nchannels)
-- 
GitLab


From 476b27745fba699f64ca80f7eb824759c440f9b3 Mon Sep 17 00:00:00 2001
From: Jan Kiene <jan.kiene@iis.fraunhofer.de>
Date: Tue, 10 Feb 2026 14:52:52 +0100
Subject: [PATCH 3/3] set sampling rate correctly in run_wavdiff

---
 scripts/pyaudio3dtools/audioarray.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/pyaudio3dtools/audioarray.py b/scripts/pyaudio3dtools/audioarray.py
index c556971f16..0f405a023f 100644
--- a/scripts/pyaudio3dtools/audioarray.py
+++ b/scripts/pyaudio3dtools/audioarray.py
@@ -478,6 +478,8 @@ def run_wavdiff(
             ref_tmp = resample(ref.astype(float), fs, 48000).astype(np.int32)
             test_tmp = resample(test.astype(float), fs, 48000).astype(np.int32)
 
+            fs = 48000
+
         bytes_per_sample = 3
         with wave.open(str(tmpfile_ref), mode="wb") as ref_tmp_wav:
             data_bytes = ref_tmp.astype("<i").tobytes()
-- 
GitLab