diff --git a/scripts/pyaudio3dtools/audioarray.py b/scripts/pyaudio3dtools/audioarray.py
index 5741c1ada40fd956bf9a8022261e05719d2b3ae7..1ed31dff4da773915a04a018a4690c94d59398c0 100644
--- a/scripts/pyaudio3dtools/audioarray.py
+++ b/scripts/pyaudio3dtools/audioarray.py
@@ -39,7 +39,8 @@ import shutil
 import subprocess
 import tempfile
 from pathlib import Path
-from typing import Callable, Iterable, Optional, Tuple
+from enum import Enum
+from typing import Callable, Iterable, Optional, Tuple, Union
 
 import numpy as np
 import scipy.io.wavfile as wavfile
@@ -228,6 +229,12 @@ def cut(x: np.ndarray, limits: Tuple[int, int]) -> np.ndarray:
     return y
 
 
+class HandleDifferingLengths(str, Enum):
+    FAIL = "fail"
+    PAD = "pad"
+    CUT = "cut"
+
+
 def compare(
     ref: np.ndarray,
     test: np.ndarray,
@@ -239,8 +246,9 @@ def compare(
     ssnr_thresh_high: float = np.inf,
     apply_thresholds_to_ref_only: bool = False,
     test_start_offset_ms: int = 0,
-    ref_jbm_tf: Optional[Path] = None,
-    test_jbm_tf: Optional[Path] = None,
+    ref_jbm_tf: Optional[Union[Path, str]] = None,
+    test_jbm_tf: Optional[Union[Path, str]] = None,
+    handle_differing_lengths: HandleDifferingLengths = "fail",
 ) -> dict:
     """Compare two audio arrays
 
@@ -271,6 +279,17 @@ def compare(
     test_start_offset_ms: (non-negative) int
         offset in miliseconds for test signal. If > 0, the corresponding number of samples
         will be removed from the test array like so: test = test[sample_offset:, :].
+    ref_jbm_tf: str|Path
+        tracefile for ref signal for wav-diff based MLD comparison of JBM output
+    test_jbm_tf: str|Path
+        tracefile for test signal for wav-diff based MLD comparison of JBM output
+    handle_differing_lengths: one of "fail", "pad", "cut"
+        how to handle differing lengths in the input signals
+        "fail" - raise error
+        "pad" - pad shorter file with zeros
+        "cut" - cut longer file to length of shorter one
+
+        Note that external tools such as wav-diff (for mld) always use the unmodified files
 
     Returns
     -------
@@ -286,11 +305,38 @@ def compare(
     test = test[test_start_offset_samples:, :]
 
     framesize = fs // 50
-    if ref.shape[0] != test.shape[0]:
-        min_len = min(ref.shape[0], test.shape[0])
-        diff = abs(test[:min_len, :] - ref[:min_len, :])
-    else:
-        diff = abs(test - ref)
+
+    lengths_differ = ref.shape[0] != test.shape[0]
+
+    test_orig = test.copy()
+    ref_orig = ref.copy()
+
+    if lengths_differ:
+        if handle_differing_lengths == "fail":
+            raise RuntimeError(
+                f"Input signals have different lengths: ref - {ref.shape[0]}, test - {test.shape[0]}"
+            )
+        elif handle_differing_lengths == "cut":
+            min_len = min(ref.shape[0], test.shape[0])
+            ref = ref[:min_len, :]
+            test = test[:min_len, :]
+        elif handle_differing_lengths == "pad":
+            max_len = max(ref.shape[0], test.shape[0])
+            ref = np.pad(
+                ref,
+                ((0, max_len - ref.shape[0]), (0, 0)),
+                mode="constant",
+                constant_values=0,
+            )
+            test = np.pad(
+                test,
+                ((0, max_len - test.shape[0]), (0, 0)),
+                mode="constant",
+                constant_values=0,
+            )
+
+    diff = abs(test - ref)
+
     max_diff = int(diff.max())
     result = {
         "bitexact": True,
@@ -363,6 +409,8 @@ def compare(
         if get_mld:
 
             def parse_wav_diff(proc: subprocess.CompletedProcess) -> float:
+                if proc.returncode:
+                    raise ChildProcessError(f"{proc.stderr}\n{proc.stdout}")
                 line = proc.stdout.splitlines()[-1].strip()
                 start = line.find(">") + 1
                 stop = line.rfind("<")
@@ -370,11 +418,6 @@ def compare(
 
                 return mld
 
-                # TODO probably needs a fix to show up in pytest
-                if proc.returncode:
-                    print(f"{proc.stderr}\n{proc.stdout}")
-                return mld_max
-
             mld_max = 0
             toolsdir = Path(__file__).parent.parent.joinpath("tools")
 
@@ -397,18 +440,17 @@ def compare(
                 tmpfile_ref = Path(tmpdir).joinpath("ref.wav")
                 tmpfile_test = Path(tmpdir).joinpath("test.wav")
 
-
                 ### need to resample to 48kHz for MLD computation to be correct
                 if fs != 48000:
                     ref_tmp = np.clip(
-                        resample(ref.astype(float), fs, 48000), -32768, 32767
+                        resample(ref_orig.astype(float), fs, 48000), -32768, 32767
                     )
                     test_tmp = np.clip(
-                        resample(test.astype(float), fs, 48000), -32768, 32767
+                        resample(test_orig.astype(float), fs, 48000), -32768, 32767
                     )
                 else:
-                    ref_tmp = ref.copy()
-                    test_tmp = test.copy()
+                    ref_tmp = ref_orig.copy()
+                    test_tmp = test_orig.copy()
 
                 wavfile.write(str(tmpfile_ref), 48000, ref_tmp.astype(np.int16))
                 wavfile.write(str(tmpfile_test), 48000, test_tmp.astype(np.int16))
@@ -416,6 +458,9 @@ def compare(
                 cmd = [
                     str(wdiff),
                     "--print-ctest-measurement",
+                    # wav-diff return code is 1 if differences are found which
+                    # would cause parse_wav_diff to raise an Exception on these cases
+                    "--no-fail",
                     str(tmpfile_ref),
                     str(tmpfile_test),
                 ]
diff --git a/tests/cmp_pcm.py b/tests/cmp_pcm.py
index 9b837156bbe5b6b864d58b9a6d750d9557ca37eb..17a8803cae5a730a9159b8f823619642318a49d8 100755
--- a/tests/cmp_pcm.py
+++ b/tests/cmp_pcm.py
@@ -15,6 +15,7 @@ sys.path.append(os.path.join(os.path.dirname(THIS_PATH), "../scripts"))
 import numpy as np
 import pyaudio3dtools
 import pyivastest
+
 # Hack to resolve import when using from command line or from within scripts.
 try:
     from .constants import ODG_PATTERN_PQEVALAUDIO
@@ -38,7 +39,7 @@ def cmp_pcm(
     odg_ref=None,
     ref_jbm_tf: Optional[Path] = None,
     cut_jbm_tf: Optional[Path] = None,
-) -> (int, str):
+) -> tuple[int, str]:
     """
     Compare 2 PCM files for bitexactness
     """
@@ -77,11 +78,9 @@ def cmp_pcm(
         reason = "FAIL: Number of channels differ."
         return 1, reason
 
+    handle_differing_lengths = "fail"
     if allow_differing_lengths:
-        # to allow for MLD comparison, pad shorter file
-        max_len = max(s1.shape[0], s2.shape[0])
-        s1 = np.pad(s1,((0,max_len - s1.shape[0]),(0,0)),mode='constant',constant_values=0)
-        s2 = np.pad(s2,((0,max_len - s2.shape[0]),(0,0)),mode='constant',constant_values=0)
+        handle_differing_lengths = "pad"
     elif s1.shape != s2.shape:
         print(
             f"file size in samples: file 1 = {s1.shape[0]},",
@@ -103,6 +102,7 @@ def cmp_pcm(
         ssnr_thresh_low=-50,
         ref_jbm_tf=ref_jbm_tf,
         test_jbm_tf=cut_jbm_tf,
+        handle_differing_lengths=handle_differing_lengths,
     )
 
     output_differs = 0
@@ -149,15 +149,25 @@ def cmp_pcm(
                 32767,
             ).astype(np.int16)
 
-        pqeval_output = pqevalaudio_wrapper(odg_files[odg_input], odg_files[odg_ref], 48000)
+        pqeval_output = pqevalaudio_wrapper(
+            odg_files[odg_input], odg_files[odg_ref], 48000
+        )
         match_odg = re.search(ODG_PATTERN_PQEVALAUDIO, pqeval_output)
-        odg_ref = float(match_odg.groups()[0])
+        try:
+            odg_ref = float(match_odg.groups()[0])
+        except AttributeError:
+            raise OdgParsingFailed("Could not get Odg for ref signal")
 
-        pqeval_output = pqevalaudio_wrapper(odg_files[odg_input], odg_files[odg_test], 48000)
+        pqeval_output = pqevalaudio_wrapper(
+            odg_files[odg_input], odg_files[odg_test], 48000
+        )
         match_odg = re.search(ODG_PATTERN_PQEVALAUDIO, pqeval_output)
-        odg_test = float(match_odg.groups()[0])
+        try:
+            odg_test = float(match_odg.groups()[0])
+        except AttributeError:
+            raise OdgParsingFailed("Could not get Odg for test signal")
 
-        odg = odg_test - odg_ref # Todo: store both rather than difference?
+        odg = odg_test - odg_ref  # Todo: store both rather than difference?
 
         msg = f"ODG: {odg}"
         reason += " - " + msg
@@ -166,6 +176,10 @@ def cmp_pcm(
     return output_differs, reason
 
 
+class OdgParsingFailed(Exception):
+    pass
+
+
 def pqevalaudio_wrapper(
     ref_sig: np.ndarray,
     eval_sig: np.ndarray,