From 8dd8ba6e34bbd8826ca0373a6d97a0f89b070998 Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@dolby.com>
Date: Wed, 25 Mar 2026 15:48:52 +0100
Subject: [PATCH] Improve conformance script printouts and logging

---
 scripts/ivas_conformance/runConformance.py | 468 ++++++++++++++-------
 1 file changed, 306 insertions(+), 162 deletions(-)

diff --git a/scripts/ivas_conformance/runConformance.py b/scripts/ivas_conformance/runConformance.py
index e26fb6032..23fe3479a 100644
--- a/scripts/ivas_conformance/runConformance.py
+++ b/scripts/ivas_conformance/runConformance.py
@@ -253,6 +253,42 @@ class MLDConformance:
         with open(self.failedCmdsFile, "r") as f:
             return sum(1 for line in f if line.strip())
 
+    def getFailedCommandsRange(self, start: int, end: int) -> list[str]:
+        if not os.path.exists(self.failedCmdsFile) or end <= start:
+            return []
+        with open(self.failedCmdsFile, "r") as f:
+            lines = [line.strip() for line in f if line.strip()]
+        return lines[start:end]
+
+    def appendRunlog(self, command: str = "", output: str = "", context: str = "") -> None:
+        if not getattr(self, "logFile", None):
+            return
+        with open(self.logFile, "a") as fd:
+            if context:
+                fd.write(context + "\n")
+            if command:
+                fd.write(command + "\n")
+            if output:
+                fd.write(output)
+                if not output.endswith("\n"):
+                    fd.write("\n")
+
+    def appendFailed(self, command: str = "", output: str = "", context: str = "") -> None:
+        if not getattr(self, "failedCmdsFile", None):
+            return
+        with open(self.failedCmdsFile, "a") as fd:
+            if context:
+                fd.write(context + "\n")
+            if command:
+                fd.write(command + "\n")
+            if output:
+                fd.write(output)
+                if not output.endswith("\n"):
+                    fd.write("\n")
+
+    def formatTestHeader(self, testPrefix: str, phase: str, pytestTag: str) -> str:
+        return f"{testPrefix} {phase}: {pytestTag}\n---------------------------"
+
     def setupCommon(self):
         self.Commands = dict()
         self.TestDesc = dict[
@@ -615,22 +651,30 @@ class MLDConformance:
             return (non_be, None, None, None)
         else:
             if not os.path.exists(testDesc.refOutput) or not os.path.exists(testDesc.dutOutput):
-                with open(self.failedCmdsFile, "a") as f:
-                    f.write(
-                        f"Missing file for compare: ref={testDesc.refOutput}, dut={testDesc.dutOutput}\n"
-                    )
-                return (None, None, None, None)
+                msg = (
+                    f"Missing file for compare: ref={testDesc.refOutput}, dut={testDesc.dutOutput}"
+                )
+                self.appendFailed(context=f"[{tag}:{dutPytestTag}] {msg}")
+                return (None, None, (msg, ""), None)
+
+            validate_err = self.validateAudioPair(testDesc.refOutput, testDesc.dutOutput)
+            if validate_err:
+                self.appendFailed(context=f"[{tag}:{dutPytestTag}] {validate_err}")
+                return (None, None, (validate_err, ""), None)
+
             non_be = int(
                 not filecmp.cmp(testDesc.refOutput, testDesc.dutOutput, shallow=False)
             )
             ##### skip MLD verification for files with only 1 frame as MLD does not run with such files. Possible solution: append 0s and then compare #####
             if testDesc.rawCmdline.find("_cut.192.fer") == -1:
-                max_mld = self.mld(
+                max_mld, mld_error = self.mld(
                     tag,
                     dutPytestTag,
                     refFile=testDesc.refOutput,
                     dutFile=testDesc.dutOutput,
                 )
+                if mld_error is not None:
+                    return (None, None, mld_error, None)
                 return (non_be, max_mld, None, None)
             return (non_be, None, None, None)
 
@@ -669,15 +713,21 @@ class MLDConformance:
             if rc != 0:
                 return (None, None, (dutDecCmd, err_output), dutDecCmd)
             if not os.path.exists(refDecOutputFile) or not os.path.exists(dutDecOutputFile):
-                with open(self.failedCmdsFile, "a") as f:
-                    f.write(
-                        f"Missing file for compare: ref={refDecOutputFile}, dut={dutDecOutputFile}\n"
-                    )
-                return (None, None, None, dutDecCmd)
+                msg = f"Missing file for compare: ref={refDecOutputFile}, dut={dutDecOutputFile}"
+                self.appendFailed(context=f"[{tag}:{encPytestTag}] {msg}")
+                return (None, None, (msg, ""), dutDecCmd)
+
+            validate_err = self.validateAudioPair(refDecOutputFile, dutDecOutputFile)
+            if validate_err:
+                self.appendFailed(context=f"[{tag}:{encPytestTag}] {validate_err}")
+                return (None, None, (validate_err, ""), dutDecCmd)
+
             non_be = int(not filecmp.cmp(refDecOutputFile, dutDecOutputFile, shallow=False))
-            max_mld = self.mld(
+            max_mld, mld_error = self.mld(
                 tag, encPytestTag, refFile=refDecOutputFile, dutFile=dutDecOutputFile
             )
+            if mld_error is not None:
+                return (None, None, mld_error, dutDecCmd)
             return (non_be, max_mld, None, dutDecCmd)
 
     def analyseOneIsarEncoderTest(self, tag: str, pytestTag: str):
@@ -693,7 +743,8 @@ class MLDConformance:
             return (non_be, None, None, None)
         else:
             refDecOutputFile = testDesc.refOutput.replace(".splt.bit", ".wav")
-            dutDecOutputFile = testDesc.dutOutput.replace(".splt.bit", ".wav")
+            dutDecOutputFile = testDesc.dutOutput.replace(".splt.bit", "_CUT_REFDECODED.wav")
+            
             # Decode the encoded output with Reference ISAR decoder
             dutDecCmd = testDesc.refDecCmdline.split()
             for idx, cmd in enumerate(dutDecCmd):
@@ -712,15 +763,21 @@ class MLDConformance:
             if rc != 0:
                 return (None, None, (dutDecCmd, err_output), dutDecCmd)
             if not os.path.exists(refDecOutputFile) or not os.path.exists(dutDecOutputFile):
-                with open(self.failedCmdsFile, "a") as f:
-                    f.write(
-                        f"Missing file for compare: ref={refDecOutputFile}, dut={dutDecOutputFile}\n"
-                    )
-                return (None, None, None, dutDecCmd)
+                msg = f"Missing file for compare: ref={refDecOutputFile}, dut={dutDecOutputFile}"
+                self.appendFailed(context=f"[{tag}:{pytestTag}] {msg}")
+                return (None, None, (msg, ""), dutDecCmd)
+
+            validate_err = self.validateAudioPair(refDecOutputFile, dutDecOutputFile)
+            if validate_err:
+                self.appendFailed(context=f"[{tag}:{pytestTag}] {validate_err}")
+                return (None, None, (validate_err, ""), dutDecCmd)
+
             non_be = int(not filecmp.cmp(refDecOutputFile, dutDecOutputFile, shallow=False))
-            max_mld = self.mld(
+            max_mld, mld_error = self.mld(
                 tag, pytestTag, refFile=refDecOutputFile, dutFile=dutDecOutputFile
             )
+            if mld_error is not None:
+                return (None, None, mld_error, dutDecCmd)
             return (non_be, max_mld, None, dutDecCmd)
 
     def getRendOutputFile(self, command: str):
@@ -808,6 +865,7 @@ class MLDConformance:
     ):
         # Run CUT Cmdline
         testPrefix = f"[{tag} {testIndex}/{totalTests}]"
+        self.appendRunlog(context=self.formatTestHeader(testPrefix, "Running test", pyTestsTag))
         testDesc = self.TestDesc[tag][pyTestsTag]
         rc, err_output = self.process(
             command=testDesc.dutCmdline,
@@ -827,7 +885,8 @@ class MLDConformance:
         self, tag: str, pyTestsTag: str, testIndex: int = 0, totalTests: int = 0
     ):
         testPrefix = f"[{tag} {testIndex}/{totalTests}]"
-        failed_before = self.getFailedCommandCount()
+        header = self.formatTestHeader(testPrefix, "Analyzing test", pyTestsTag)
+        self.appendRunlog(context=header)
         non_be = None
         max_mld = None
         errorDetails = None
@@ -845,10 +904,16 @@ class MLDConformance:
         else:
             assert False, f"Un-implemented Tag {tag}"
 
-        failed_after = self.getFailedCommandCount()
-        if failed_after > failed_before:
+        if errorDetails is not None:
+            if errorDetails:
+                cmd, err_output = errorDetails
+                self.appendFailed(context=header, command=cmd, output=(err_output or ""))
+            elif executedCommand:
+                self.appendFailed(context=header, command=executedCommand)
+            else:
+                self.appendFailed(context=header)
             self.stats()
-            return (testPrefix, pyTestsTag, "ERROR", None, errorDetails, executedCommand)
+            return (testPrefix, pyTestsTag, "ERROR", None, errorDetails, executedCommand, None, None)
 
         if self.args.be_test:
             verdict = "NON-BE" if non_be else "BE"
@@ -859,8 +924,9 @@ class MLDConformance:
                 result_text = f"{verdict}, MLD=N/A"
             else:
                 result_text = f"{verdict}, MLD_MAX={max_mld}"
+
         self.stats()
-        return (testPrefix, pyTestsTag, "OK", result_text, None, executedCommand)
+        return (testPrefix, pyTestsTag, "OK", result_text, None, executedCommand, verdict, max_mld)
 
     def analyseOneCommandFromTuple(self, args):
         return self.analyseOneCommand(*args)
@@ -930,7 +996,7 @@ class MLDConformance:
         failed_after = self.getFailedCommandCount()
         failed_delta = failed_after - failed_before
         if failed_delta == 0:
-            print(f"[{tag}] OK")
+            print(f"[{tag}] OK\n")
             return True
 
         print(
@@ -939,8 +1005,13 @@ class MLDConformance:
         return False
 
     def analyseTag(self, tag: str) -> bool:
-        failed_before = self.getFailedCommandCount()
+        corridor_threshold = 0.1
+        command_fail_count = 0
+        be_count = 0
         non_be_count = 0
+        failure_count = 0
+        worst_failure = None
+
         # reset MLD, Sample Stats
         if self.args.be_test:
             with open(self.BEcsv[tag], "w") as f:
@@ -949,7 +1020,8 @@ class MLDConformance:
             open(self.mldcsv[tag], "w").close()
             with open(self.sampleStats[tag], "w") as f:
                 f.write(f"PYTESTTAG, MAXDIFF, RMSdB, BEFRAMES_PERCENT, MAX_MLD\n")
-        selectedTests = list()
+
+        selectedTests = []
         if self.filter:
             for pyTestsTag in self.TestDesc[tag].keys():
                 if self.filter in self.TestDesc[tag][pyTestsTag].rawCmdline:
@@ -962,96 +1034,85 @@ class MLDConformance:
             f"Analysing tests for {tag}  {'Filter=' + self.filter if self.filter else ''} ({self.totalTests} tests)",
             flush=True,
         )
+
+        def handle_test_result(
+            testPrefix,
+            pyTestsTag,
+            runStatus,
+            analysisResult,
+            errorDetails,
+            executedCommand,
+            verdict,
+            test_max_mld,
+        ):
+            nonlocal command_fail_count, be_count, non_be_count, failure_count, worst_failure
+
+            if runStatus != "OK":
+                command_fail_count += 1
+            elif verdict == "BE":
+                be_count += 1
+            elif verdict == "NON-BE":
+                non_be_count += 1
+                if test_max_mld is not None and test_max_mld > corridor_threshold:
+                    failure_count += 1
+                    fail_header = self.formatTestHeader(testPrefix, "Analyzing test", pyTestsTag)
+                    self.appendFailed(
+                        context=(
+                            fail_header
+                            + f"\n[{tag}:{pyTestsTag}] FAILURE (MLD_MAX={test_max_mld} > threshold={corridor_threshold})"
+                        ),
+                        command=(executedCommand or ""),
+                    )
+                    if worst_failure is None or test_max_mld > worst_failure["mld"]:
+                        worst_failure = {
+                            "prefix": testPrefix,
+                            "tag": pyTestsTag,
+                            "mld": float(test_max_mld),
+                        }
+
+            if self.args.verbose:
+                if runStatus == "OK":
+                    if not analysisResult:
+                        print(
+                            f"{testPrefix} Analyzing test: {pyTestsTag} ... MLD analysis failed!",
+                            flush=True,
+                        )
+                    else:
+                        print(
+                            f"{testPrefix} Analyzing test: {pyTestsTag} ... {analysisResult}",
+                            flush=True,
+                        )
+                else:
+                    print(
+                        f"{testPrefix} Analyzing test: {pyTestsTag} ... {runStatus}",
+                        flush=True,
+                    )
+
+            if runStatus != "OK":
+                if errorDetails:
+                    cmd, err_output = errorDetails
+                    print(f"{testPrefix} Failed command: {cmd}", flush=True)
+                    if err_output:
+                        print(
+                            err_output,
+                            end="" if err_output.endswith("\n") else "\n",
+                            flush=True,
+                        )
+                elif executedCommand:
+                    print(f"{testPrefix} Failed command: {executedCommand}", flush=True)
+
         if not self.args.no_multi_processing:
             with Pool() as pool:
                 args = [
                     (tag, pyTestsTag, idx, self.totalTests)
                     for idx, pyTestsTag in enumerate(selectedTests, start=1)
                 ]
-                for (
-                    testPrefix,
-                    pyTestsTag,
-                    runStatus,
-                    analysisResult,
-                    errorDetails,
-                    executedCommand,
-                ) in pool.imap_unordered(self.analyseOneCommandFromTuple, args):
-                    if (
-                        runStatus == "OK"
-                        and analysisResult
-                        and str(analysisResult).startswith("NON-BE")
-                    ):
-                        non_be_count += 1
-                    if self.args.verbose:
-                        if runStatus == "OK":
-                            if not analysisResult:
-                                print(
-                                    f"{testPrefix} Analyzing test: {pyTestsTag} ... MLD analysis failed!",
-                                    flush=True,
-                                )
-                            else:
-                                print(
-                                    f"{testPrefix} Analyzing test: {pyTestsTag} ... {analysisResult}",
-                                    flush=True,
-                                )
-                        else:
-                            print(
-                                f"{testPrefix} Analyzing test: {pyTestsTag} ... {runStatus}",
-                                flush=True,
-                            )
-   
-                            if errorDetails:
-                                cmd, err_output = errorDetails
-                                print(f"{testPrefix} Failed command: {cmd}", flush=True)
-                                if err_output:
-                                    print(
-                                        err_output,
-                                        end="" if err_output.endswith("\n") else "\n",
-                                        flush=True,
-                                    )
+                for result in pool.imap_unordered(self.analyseOneCommandFromTuple, args):
+                    handle_test_result(*result)
         else:
             for idx, pyTestsTag in enumerate(selectedTests, start=1):
-                (
-                    testPrefix,
-                    pyTestsTag,
-                    runStatus,
-                    analysisResult,
-                    errorDetails,
-                    executedCommand,
-                ) = self.analyseOneCommand(tag, pyTestsTag, idx, self.totalTests)
-                if (
-                    runStatus == "OK"
-                    and analysisResult
-                    and str(analysisResult).startswith("NON-BE")
-                ):
-                    non_be_count += 1
-                if self.args.verbose:
-                    if runStatus == "OK":
-                        if not analysisResult:
-                            print(
-                                f"{testPrefix} Analyzing test: {pyTestsTag} ... MLD analysis failed!",
-                                flush=True,
-                            )
-                        else:
-                            print(
-                                f"{testPrefix} Analyzing test: {pyTestsTag} ... {analysisResult}",
-                                flush=True,
-                            )
-                    else:
-                        print(
-                            f"{testPrefix} Analyzing test: {pyTestsTag} ... {runStatus}",
-                            flush=True,
-                        )
-
-                        if errorDetails:
-                            cmd, err_output = errorDetails
-                            print(f"{testPrefix} Failed command: {cmd}", flush=True)
-                            if err_output:
-                                print(
-                                    err_output,
-                                    end="" if err_output.endswith("\n") else "\n",
-                                    flush=True,
-                                )
+                result = self.analyseOneCommand(tag, pyTestsTag, idx, self.totalTests)
+                handle_test_result(*result)
 
         self.flushErrorBlocks()
 
@@ -1059,39 +1120,31 @@ class MLDConformance:
             self.doBEanalysis(selectTag=tag)
             analysis_ok = True
             corridor_fail_count = 0
-            max_mld_value = None
         else:
-            analysis_ok, corridor_fail_count, max_mld_value = self.doAnalysis(
-                selectTag=tag
-            )
+            analysis_ok, corridor_fail_count, _ = self.doAnalysis(selectTag=tag)
 
-        failed_after = self.getFailedCommandCount()
-        failed_delta = failed_after - failed_before
-        max_mld_text = (
-            f", MAX_MLD={max_mld_value}" if max_mld_value is not None else ""
-        )
         if self.args.regenerate_mld_ref:
-            return failed_delta == 0 and analysis_ok
+            return command_fail_count == 0 and analysis_ok
 
-        if failed_delta == 0 and non_be_count == 0 and analysis_ok:
-            print(f"[{tag}] OK{max_mld_text}")
+        if command_fail_count == 0 and failure_count == 0 and analysis_ok:
+            print(
+                f"[{tag}] OK (ERRORS={command_fail_count}, BE={be_count}, NON-BE={non_be_count}, MLD CORRIDOR FAILURES={failure_count})\n"
+            )
             return True
 
-        reasons = []
-        if failed_delta > 0:
-            reasons.append(f"{failed_delta} command(s) failed")
-        if non_be_count > 0:
-            reasons.append(f"{non_be_count} NON-BE result(s)")
-        if corridor_fail_count > 0:
-            reasons.append(f"MLD corridor failed in {corridor_fail_count} cases")
-        reason_text = ", ".join(reasons) if reasons else "analysis mismatch"
-        tag_text = f"\033[97m[{tag}]\033[00m"
-        fail_text = f"\033[91mFAILED ({reason_text})\033[00m"
-        if failed_delta > 0:
-            print(f"{tag_text} {fail_text}{max_mld_text}. See {self.failedCmdsFile}")
-        else:
-            print(f"{tag_text} {fail_text}{max_mld_text}")
-        print()
+        print(
+            f"[{tag}] FAILED (ERRORS={command_fail_count}, BE={be_count}, NON-BE={non_be_count}, MLD CORRIDOR FAILURES={failure_count})"
+        )
+        if worst_failure is not None:
+            print(
+                f"[{tag}] Worst MLD corridor failure: {worst_failure['prefix']} {worst_failure['tag']} "
+                f"(NON-BE, MLD_MAX={worst_failure['mld']})"
+            )
+        elif corridor_fail_count > 0:
+            print(
+                f"[{tag}] Worst MLD corridor failure: unavailable (frame tag/count mismatch in corridor comparison)"
+            )
+        print(f"[{tag}] See {self.failedCmdsFile}\n")
         return False
 
     def process(
@@ -1126,8 +1179,7 @@ class MLDConformance:
         if self.args.verbose and emitConsole:
             print(f"{prefix}Command: {command}", flush=True)
         if self.args.dryrun:
-            with open(self.logFile, "a") as fd:
-                fd.write(command + "\n")
+            self.appendRunlog(command=command)
             return 0
 
         c = subprocess.run(
@@ -1139,14 +1191,10 @@ class MLDConformance:
         )
 
         # Keep runlog format: command line followed by the tool output.
-        with open(self.logFile, "a") as fd:
-            fd.write(command + "\n")
-            if c.stdout:
-                fd.write(c.stdout)
+        self.appendRunlog(command=command, output=(c.stdout or ""))
 
         if c.returncode:
-            with open(self.failedCmdsFile, "a") as f:
-                f.write(command + "\n")
+            self.appendFailed(command=command, output=(c.stdout or ""))
             if emitConsole:
                 print(f"{prefix}Failed command: {command}", flush=True)
                 if c.stdout:
@@ -1172,15 +1220,40 @@ class MLDConformance:
         )
         return (maxDiff, rmsdB, beSamplesPercent)
 
+    def validateAudioPair(self, refFile: str, dutFile: str):
+        try:
+            refSamples, _ = readfile(refFile, outdtype="float")
+            dutSamples, _ = readfile(dutFile, outdtype="float")
+        except Exception as exc:
+            return f"Failed reading audio files: ref={refFile}, dut={dutFile}, error={exc}"
+
+        if refSamples.ndim != dutSamples.ndim:
+            return (
+                f"Audio dimensionality mismatch: ref={refFile} (ndim={refSamples.ndim}), "
+                f"dut={dutFile} (ndim={dutSamples.ndim})"
+            )
+        if refSamples.shape[1] != dutSamples.shape[1]:
+            return (
+                f"Audio channel mismatch: ref={refFile} (channels={refSamples.shape[1]}), "
+                f"dut={dutFile} (channels={dutSamples.shape[1]})"
+            )
+        if refSamples.shape[0] != dutSamples.shape[0]:
+            return (
+                f"Audio length mismatch: ref={refFile} (samples={refSamples.shape[0]}), "
+                f"dut={dutFile} (samples={dutSamples.shape[0]})"
+            )
+        return None
+
     def mld(self, tag, pytestTag, refFile, dutFile):
         mldThisFile = np.zeros(0)
-        with open(self.failedCmdsFile, "a") as f:
-            if not os.path.exists(refFile):
-                f.write(f"File does not exists: {refFile}\n")
-                return None
-            if not os.path.exists(dutFile):
-                f.write(f"File does not exists : {dutFile}\n")
-                return None
+        if not os.path.exists(refFile):
+            msg = f"File does not exists: {refFile}"
+            self.appendFailed(context=f"[{tag}:{pytestTag}] {msg}")
+            return None, (msg, "")
+        if not os.path.exists(dutFile):
+            msg = f"File does not exists : {dutFile}"
+            self.appendFailed(context=f"[{tag}:{pytestTag}] {msg}")
+            return None, (msg, "")
         with tempfile.TemporaryDirectory() as tmpdir:
             refSamples, fsR = readfile(refFile, outdtype="float")
             dutSamples, fsD = readfile(dutFile, outdtype="float")
@@ -1215,6 +1288,7 @@ class MLDConformance:
 
                 command = [
                     self.wavdiffbin,
+                    "--no-fail",
                     "-s",
                     refFileMono,
                     dutFileMono,
@@ -1230,10 +1304,60 @@ class MLDConformance:
                         shell=True,
                     )
 
-                mldThisChan = np.loadtxt(
-                    mldFile, delimiter=";", dtype=float, skiprows=1
+                wavdiff_output = ""
+                try:
+                    with open(mldFile, "r") as fd:
+                        wavdiff_output = fd.read().strip()
+                except OSError:
+                    wavdiff_output = ""
+
+                # Keep runlog readable: omit dense per-segment numeric rows from wav-diff output.
+                wavdiff_log_lines = []
+                wavdiff_rows_omitted = 0
+                for line in wavdiff_output.splitlines():
+                    if re.match(r"^\s*[-+]?\d+(?:\.\d+)?;[-+]?\d+(?:\.\d+)?;[-+]?\d+(?:\.\d+)?\s*$", line):
+                        wavdiff_rows_omitted += 1
+                    else:
+                        wavdiff_log_lines.append(line)
+                if wavdiff_rows_omitted > 0:
+                    wavdiff_log_lines.append(
+                        f"[... omitted {wavdiff_rows_omitted} per-segment MLD rows ...]"
+                    )
+                wavdiff_log_output = "\n".join(wavdiff_log_lines)
+
+                self.appendRunlog(
+                    context=f"[{tag}:{pytestTag}] wav-diff (channel={ch}, rc={c.returncode})",
+                    command=" ".join(command),
+                    output=wavdiff_log_output,
                 )
-                mldThisChan = mldThisChan[:, 2]
+
+                if c.returncode != 0:
+                    self.appendFailed(
+                        context=f"[{tag}:{pytestTag}] wav-diff execution failed (channel={ch}, rc={c.returncode})",
+                        command=" ".join(command),
+                        output=wavdiff_output,
+                    )
+                    return None, (" ".join(command), wavdiff_output)
+
+                mldRaw = np.loadtxt(mldFile, delimiter=";", dtype=float, skiprows=1)
+                if mldRaw.size == 0:
+                    self.appendFailed(
+                        context=f"[{tag}:{pytestTag}] wav-diff produced no MLD data (channel={ch}, rc={c.returncode})",
+                        command=" ".join(command),
+                        output=wavdiff_output,
+                    )
+                    return None, (" ".join(command), wavdiff_output)
+
+                mldRaw = np.atleast_2d(mldRaw)
+                if mldRaw.shape[1] < 3:
+                    self.appendFailed(
+                        context=f"[{tag}:{pytestTag}] wav-diff MLD output has invalid format (channel={ch}, rc={c.returncode})",
+                        command=" ".join(command),
+                        output=wavdiff_output,
+                    )
+                    return None, (" ".join(command), wavdiff_output)
+
+                mldThisChan = mldRaw[:, 2]
                 if ch == 0:
                     mldThisFile = mldThisChan
                 else:
@@ -1252,8 +1376,8 @@ class MLDConformance:
                 f.write(
                     f"{pytestTag}, {maxDiff}, {rmsdB}, {beSamplesPercent}, {mldThisFile.max()}\n"
                 )
-            return float(mldThisFile.max())
-        return None
+            return float(mldThisFile.max()), None
+        return None, ("wav-diff", "MLD output contained no usable samples")
 
     def beTest(
         self, tag, pytestTag, refFile, dutFile, DUTmdFileList=[], REFmdFileList=[]
@@ -1333,15 +1457,24 @@ class MLDConformance:
             diff = dutMLD - refMLD
             maxDiff = float(diff.max()) if diff.size else 0.0
             corridor_failed = maxDiff > threshold
+            if corridor_failed:
+                msg = (
+                    f"[{tag}] MLD corridor failed: max(dut-ref)={maxDiff} exceeds threshold={threshold}"
+                )
+                self.appendRunlog(context=msg)
+                self.appendFailed(context=msg)
         else:
             ref_count = refMLD.shape[0]
             dut_count = dutMLD.shape[0]
             ref_preview = ", ".join(refTags[:3]) if ref_count else "<empty>"
             dut_preview = ", ".join(dutTags[:3]) if dut_count else "<empty>"
-            print(
-                f"\033[93mWarning: {tag} corridor comparison skipped because reference and DUT frame tags do not match "
-                f"(ref_count={ref_count}, dut_count={dut_count}, ref_first=[{ref_preview}], dut_first=[{dut_preview}]).\033[00m"
+            warn_msg = (
+                f"Warning: {tag} corridor comparison skipped because reference and DUT frame tags do not match "
+                f"(ref_count={ref_count}, dut_count={dut_count}, ref_first=[{ref_preview}], dut_first=[{dut_preview}])."
             )
+            print(f"\033[93m{warn_msg}\033[00m")
+            self.appendRunlog(context=warn_msg)
+            self.appendFailed(context=warn_msg)
             corridor_failed = True
 
         return not corridor_failed
@@ -1358,6 +1491,10 @@ class MLDConformance:
                     delimiter=",",
                     dtype=[("MLD", "f8"), ("pyTestTag", "<U256")],
                 )
+                mdlCutWithTags = np.atleast_1d(mdlCutWithTags)
+                if mdlCutWithTags.size == 0:
+                    continue
+
                 bePercent = np.loadtxt(
                     self.sampleStats[tag],
                     delimiter=",",
@@ -1372,6 +1509,11 @@ class MLDConformance:
                     skiprows=1,
                     usecols=1,
                 )
+                bePercent = np.atleast_1d(bePercent)
+                maxDiff = np.atleast_1d(maxDiff)
+                if bePercent.size == 0 or maxDiff.size == 0:
+                    continue
+
                 bePercentAvg = np.average(bePercent)
                 maxDiffmax = np.max(maxDiff) * 32768.0
                 mdlValues = mdlCutWithTags["MLD"]
@@ -1430,9 +1572,10 @@ class MLDConformance:
                         all_ok = all_ok and corridor_ok
                         corridor_fail_count += int(not corridor_ok)
                     else:
-                        print(
-                            f"\033[91mMissing reference MLD file for {tag} : {refMldFile} \033[00m"
-                        )
+                        missing_msg = f"Missing reference MLD file for {tag} : {refMldFile}"
+                        print(f"\033[91m{missing_msg} \033[00m")
+                        self.appendRunlog(context=missing_msg)
+                        self.appendFailed(context=missing_msg)
                         all_ok = False
                         corridor_fail_count += 1
 
@@ -1575,6 +1718,7 @@ if __name__ == "__main__":
 
     if not args.regenerate_mld_ref:
         print("Summary of results:")
+        print("---------------------")
         for tag in testTags:
             tag_status = "OK" if tag_results.get(tag, False) else "FAILED"
             print(f"[{tag}] {tag_status}")
-- 
GitLab