From 8dd8ba6e34bbd8826ca0373a6d97a0f89b070998 Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Wed, 25 Mar 2026 15:48:52 +0100 Subject: [PATCH] Improve conformance script printouts and logging --- scripts/ivas_conformance/runConformance.py | 468 ++++++++++++++------- 1 file changed, 306 insertions(+), 162 deletions(-) diff --git a/scripts/ivas_conformance/runConformance.py b/scripts/ivas_conformance/runConformance.py index e26fb6032..23fe3479a 100644 --- a/scripts/ivas_conformance/runConformance.py +++ b/scripts/ivas_conformance/runConformance.py @@ -253,6 +253,42 @@ class MLDConformance: with open(self.failedCmdsFile, "r") as f: return sum(1 for line in f if line.strip()) + def getFailedCommandsRange(self, start: int, end: int) -> list[str]: + if not os.path.exists(self.failedCmdsFile) or end <= start: + return [] + with open(self.failedCmdsFile, "r") as f: + lines = [line.strip() for line in f if line.strip()] + return lines[start:end] + + def appendRunlog(self, command: str = "", output: str = "", context: str = "") -> None: + if not getattr(self, "logFile", None): + return + with open(self.logFile, "a") as fd: + if context: + fd.write(context + "\n") + if command: + fd.write(command + "\n") + if output: + fd.write(output) + if not output.endswith("\n"): + fd.write("\n") + + def appendFailed(self, command: str = "", output: str = "", context: str = "") -> None: + if not getattr(self, "failedCmdsFile", None): + return + with open(self.failedCmdsFile, "a") as fd: + if context: + fd.write(context + "\n") + if command: + fd.write(command + "\n") + if output: + fd.write(output) + if not output.endswith("\n"): + fd.write("\n") + + def formatTestHeader(self, testPrefix: str, phase: str, pytestTag: str) -> str: + return f"{testPrefix} {phase}: {pytestTag}\n---------------------------" + def setupCommon(self): self.Commands = dict() self.TestDesc = dict[ @@ -615,22 +651,30 @@ class MLDConformance: return (non_be, None, None, None) else: if not os.path.exists(testDesc.refOutput) or not os.path.exists(testDesc.dutOutput): - with open(self.failedCmdsFile, "a") as f: - f.write( - f"Missing file for compare: ref={testDesc.refOutput}, dut={testDesc.dutOutput}\n" - ) - return (None, None, None, None) + msg = ( + f"Missing file for compare: ref={testDesc.refOutput}, dut={testDesc.dutOutput}" + ) + self.appendFailed(context=f"[{tag}:{dutPytestTag}] {msg}") + return (None, None, (msg, ""), None) + + validate_err = self.validateAudioPair(testDesc.refOutput, testDesc.dutOutput) + if validate_err: + self.appendFailed(context=f"[{tag}:{dutPytestTag}] {validate_err}") + return (None, None, (validate_err, ""), None) + non_be = int( not filecmp.cmp(testDesc.refOutput, testDesc.dutOutput, shallow=False) ) ##### skip MLD verification for files with only 1 frame as MLD does not run with such files. Possible solution: append 0s and then compare ##### if testDesc.rawCmdline.find("_cut.192.fer") == -1: - max_mld = self.mld( + max_mld, mld_error = self.mld( tag, dutPytestTag, refFile=testDesc.refOutput, dutFile=testDesc.dutOutput, ) + if mld_error is not None: + return (None, None, mld_error, None) return (non_be, max_mld, None, None) return (non_be, None, None, None) @@ -669,15 +713,21 @@ class MLDConformance: if rc != 0: return (None, None, (dutDecCmd, err_output), dutDecCmd) if not os.path.exists(refDecOutputFile) or not os.path.exists(dutDecOutputFile): - with open(self.failedCmdsFile, "a") as f: - f.write( - f"Missing file for compare: ref={refDecOutputFile}, dut={dutDecOutputFile}\n" - ) - return (None, None, None, dutDecCmd) + msg = f"Missing file for compare: ref={refDecOutputFile}, dut={dutDecOutputFile}" + self.appendFailed(context=f"[{tag}:{encPytestTag}] {msg}") + return (None, None, (msg, ""), dutDecCmd) + + validate_err = self.validateAudioPair(refDecOutputFile, dutDecOutputFile) + if validate_err: + self.appendFailed(context=f"[{tag}:{encPytestTag}] {validate_err}") + return (None, None, (validate_err, ""), dutDecCmd) + non_be = int(not filecmp.cmp(refDecOutputFile, dutDecOutputFile, shallow=False)) - max_mld = self.mld( + max_mld, mld_error = self.mld( tag, encPytestTag, refFile=refDecOutputFile, dutFile=dutDecOutputFile ) + if mld_error is not None: + return (None, None, mld_error, dutDecCmd) return (non_be, max_mld, None, dutDecCmd) def analyseOneIsarEncoderTest(self, tag: str, pytestTag: str): @@ -693,7 +743,8 @@ class MLDConformance: return (non_be, None, None, None) else: refDecOutputFile = testDesc.refOutput.replace(".splt.bit", ".wav") - dutDecOutputFile = testDesc.dutOutput.replace(".splt.bit", ".wav") + dutDecOutputFile = testDesc.dutOutput.replace(".splt.bit", "_CUT_REFDECODED.wav") + # Decode the encoded output with Reference ISAR decoder dutDecCmd = testDesc.refDecCmdline.split() for idx, cmd in enumerate(dutDecCmd): @@ -712,15 +763,21 @@ class MLDConformance: if rc != 0: return (None, None, (dutDecCmd, err_output), dutDecCmd) if not os.path.exists(refDecOutputFile) or not os.path.exists(dutDecOutputFile): - with open(self.failedCmdsFile, "a") as f: - f.write( - f"Missing file for compare: ref={refDecOutputFile}, dut={dutDecOutputFile}\n" - ) - return (None, None, None, dutDecCmd) + msg = f"Missing file for compare: ref={refDecOutputFile}, dut={dutDecOutputFile}" + self.appendFailed(context=f"[{tag}:{pytestTag}] {msg}") + return (None, None, (msg, ""), dutDecCmd) + + validate_err = self.validateAudioPair(refDecOutputFile, dutDecOutputFile) + if validate_err: + self.appendFailed(context=f"[{tag}:{pytestTag}] {validate_err}") + return (None, None, (validate_err, ""), dutDecCmd) + non_be = int(not filecmp.cmp(refDecOutputFile, dutDecOutputFile, shallow=False)) - max_mld = self.mld( + max_mld, mld_error = self.mld( tag, pytestTag, refFile=refDecOutputFile, dutFile=dutDecOutputFile ) + if mld_error is not None: + return (None, None, mld_error, dutDecCmd) return (non_be, max_mld, None, dutDecCmd) def getRendOutputFile(self, command: str): @@ -808,6 +865,7 @@ class MLDConformance: ): # Run CUT Cmdline testPrefix = f"[{tag} {testIndex}/{totalTests}]" + self.appendRunlog(context=self.formatTestHeader(testPrefix, "Running test", pyTestsTag)) testDesc = self.TestDesc[tag][pyTestsTag] rc, err_output = self.process( command=testDesc.dutCmdline, @@ -827,7 +885,8 @@ class MLDConformance: self, tag: str, pyTestsTag: str, testIndex: int = 0, totalTests: int = 0 ): testPrefix = f"[{tag} {testIndex}/{totalTests}]" - failed_before = self.getFailedCommandCount() + header = self.formatTestHeader(testPrefix, "Analyzing test", pyTestsTag) + self.appendRunlog(context=header) non_be = None max_mld = None errorDetails = None @@ -845,10 +904,16 @@ class MLDConformance: else: assert False, f"Un-implemented Tag {tag}" - failed_after = self.getFailedCommandCount() - if failed_after > failed_before: + if errorDetails is not None: + if errorDetails: + cmd, err_output = errorDetails + self.appendFailed(context=header, command=cmd, output=(err_output or "")) + elif executedCommand: + self.appendFailed(context=header, command=executedCommand) + else: + self.appendFailed(context=header) self.stats() - return (testPrefix, pyTestsTag, "ERROR", None, errorDetails, executedCommand) + return (testPrefix, pyTestsTag, "ERROR", None, errorDetails, executedCommand, None, None) if self.args.be_test: verdict = "NON-BE" if non_be else "BE" @@ -859,8 +924,9 @@ class MLDConformance: result_text = f"{verdict}, MLD=N/A" else: result_text = f"{verdict}, MLD_MAX={max_mld}" + self.stats() - return (testPrefix, pyTestsTag, "OK", result_text, None, executedCommand) + return (testPrefix, pyTestsTag, "OK", result_text, None, executedCommand, verdict, max_mld) def analyseOneCommandFromTuple(self, args): return self.analyseOneCommand(*args) @@ -930,7 +996,7 @@ class MLDConformance: failed_after = self.getFailedCommandCount() failed_delta = failed_after - failed_before if failed_delta == 0: - print(f"[{tag}] OK") + print(f"[{tag}] OK\n") return True print( @@ -939,8 +1005,13 @@ class MLDConformance: return False def analyseTag(self, tag: str) -> bool: - failed_before = self.getFailedCommandCount() + corridor_threshold = 0.1 + command_fail_count = 0 + be_count = 0 non_be_count = 0 + failure_count = 0 + worst_failure = None + # reset MLD, Sample Stats if self.args.be_test: with open(self.BEcsv[tag], "w") as f: @@ -949,7 +1020,8 @@ class MLDConformance: open(self.mldcsv[tag], "w").close() with open(self.sampleStats[tag], "w") as f: f.write(f"PYTESTTAG, MAXDIFF, RMSdB, BEFRAMES_PERCENT, MAX_MLD\n") - selectedTests = list() + + selectedTests = [] if self.filter: for pyTestsTag in self.TestDesc[tag].keys(): if self.filter in self.TestDesc[tag][pyTestsTag].rawCmdline: @@ -962,96 +1034,85 @@ class MLDConformance: f"Analysing tests for {tag} {'Filter=' + self.filter if self.filter else ''} ({self.totalTests} tests)", flush=True, ) + + def handle_test_result( + testPrefix, + pyTestsTag, + runStatus, + analysisResult, + errorDetails, + executedCommand, + verdict, + test_max_mld, + ): + nonlocal command_fail_count, be_count, non_be_count, failure_count, worst_failure + + if runStatus != "OK": + command_fail_count += 1 + elif verdict == "BE": + be_count += 1 + elif verdict == "NON-BE": + non_be_count += 1 + if test_max_mld is not None and test_max_mld > corridor_threshold: + failure_count += 1 + fail_header = self.formatTestHeader(testPrefix, "Analyzing test", pyTestsTag) + self.appendFailed( + context=( + fail_header + + f"\n[{tag}:{pyTestsTag}] FAILURE (MLD_MAX={test_max_mld} > threshold={corridor_threshold})" + ), + command=(executedCommand or ""), + ) + if worst_failure is None or test_max_mld > worst_failure["mld"]: + worst_failure = { + "prefix": testPrefix, + "tag": pyTestsTag, + "mld": float(test_max_mld), + } + + if self.args.verbose: + if runStatus == "OK": + if not analysisResult: + print( + f"{testPrefix} Analyzing test: {pyTestsTag} ... MLD analysis failed!", + flush=True, + ) + else: + print( + f"{testPrefix} Analyzing test: {pyTestsTag} ... {analysisResult}", + flush=True, + ) + else: + print( + f"{testPrefix} Analyzing test: {pyTestsTag} ... {runStatus}", + flush=True, + ) + + if runStatus != "OK": + if errorDetails: + cmd, err_output = errorDetails + print(f"{testPrefix} Failed command: {cmd}", flush=True) + if err_output: + print( + err_output, + end="" if err_output.endswith("\n") else "\n", + flush=True, + ) + elif executedCommand: + print(f"{testPrefix} Failed command: {executedCommand}", flush=True) + if not self.args.no_multi_processing: with Pool() as pool: args = [ (tag, pyTestsTag, idx, self.totalTests) for idx, pyTestsTag in enumerate(selectedTests, start=1) ] - for ( - testPrefix, - pyTestsTag, - runStatus, - analysisResult, - errorDetails, - executedCommand, - ) in pool.imap_unordered(self.analyseOneCommandFromTuple, args): - if ( - runStatus == "OK" - and analysisResult - and str(analysisResult).startswith("NON-BE") - ): - non_be_count += 1 - if self.args.verbose: - if runStatus == "OK": - if not analysisResult: - print( - f"{testPrefix} Analyzing test: {pyTestsTag} ... MLD analysis failed!", - flush=True, - ) - else: - print( - f"{testPrefix} Analyzing test: {pyTestsTag} ... {analysisResult}", - flush=True, - ) - else: - print( - f"{testPrefix} Analyzing test: {pyTestsTag} ... {runStatus}", - flush=True, - ) - - if errorDetails: - cmd, err_output = errorDetails - print(f"{testPrefix} Failed command: {cmd}", flush=True) - if err_output: - print( - err_output, - end="" if err_output.endswith("\n") else "\n", - flush=True, - ) + for result in pool.imap_unordered(self.analyseOneCommandFromTuple, args): + handle_test_result(*result) else: for idx, pyTestsTag in enumerate(selectedTests, start=1): - ( - testPrefix, - pyTestsTag, - runStatus, - analysisResult, - errorDetails, - executedCommand, - ) = self.analyseOneCommand(tag, pyTestsTag, idx, self.totalTests) - if ( - runStatus == "OK" - and analysisResult - and str(analysisResult).startswith("NON-BE") - ): - non_be_count += 1 - if self.args.verbose: - if runStatus == "OK": - if not analysisResult: - print( - f"{testPrefix} Analyzing test: {pyTestsTag} ... MLD analysis failed!", - flush=True, - ) - else: - print( - f"{testPrefix} Analyzing test: {pyTestsTag} ... {analysisResult}", - flush=True, - ) - else: - print( - f"{testPrefix} Analyzing test: {pyTestsTag} ... {runStatus}", - flush=True, - ) - - if errorDetails: - cmd, err_output = errorDetails - print(f"{testPrefix} Failed command: {cmd}", flush=True) - if err_output: - print( - err_output, - end="" if err_output.endswith("\n") else "\n", - flush=True, - ) + result = self.analyseOneCommand(tag, pyTestsTag, idx, self.totalTests) + handle_test_result(*result) self.flushErrorBlocks() @@ -1059,39 +1120,31 @@ class MLDConformance: self.doBEanalysis(selectTag=tag) analysis_ok = True corridor_fail_count = 0 - max_mld_value = None else: - analysis_ok, corridor_fail_count, max_mld_value = self.doAnalysis( - selectTag=tag - ) + analysis_ok, corridor_fail_count, _ = self.doAnalysis(selectTag=tag) - failed_after = self.getFailedCommandCount() - failed_delta = failed_after - failed_before - max_mld_text = ( - f", MAX_MLD={max_mld_value}" if max_mld_value is not None else "" - ) if self.args.regenerate_mld_ref: - return failed_delta == 0 and analysis_ok + return command_fail_count == 0 and analysis_ok - if failed_delta == 0 and non_be_count == 0 and analysis_ok: - print(f"[{tag}] OK{max_mld_text}") + if command_fail_count == 0 and failure_count == 0 and analysis_ok: + print( + f"[{tag}] OK (ERRORS={command_fail_count}, BE={be_count}, NON-BE={non_be_count}, MLD CORRIDOR FAILURES={failure_count})\n" + ) return True - reasons = [] - if failed_delta > 0: - reasons.append(f"{failed_delta} command(s) failed") - if non_be_count > 0: - reasons.append(f"{non_be_count} NON-BE result(s)") - if corridor_fail_count > 0: - reasons.append(f"MLD corridor failed in {corridor_fail_count} cases") - reason_text = ", ".join(reasons) if reasons else "analysis mismatch" - tag_text = f"\033[97m[{tag}]\033[00m" - fail_text = f"\033[91mFAILED ({reason_text})\033[00m" - if failed_delta > 0: - print(f"{tag_text} {fail_text}{max_mld_text}. See {self.failedCmdsFile}") - else: - print(f"{tag_text} {fail_text}{max_mld_text}") - print() + print( + f"[{tag}] FAILED (ERRORS={command_fail_count}, BE={be_count}, NON-BE={non_be_count}, MLD CORRIDOR FAILURES={failure_count})" + ) + if worst_failure is not None: + print( + f"[{tag}] Worst MLD corridor failure: {worst_failure['prefix']} {worst_failure['tag']} " + f"(NON-BE, MLD_MAX={worst_failure['mld']})" + ) + elif corridor_fail_count > 0: + print( + f"[{tag}] Worst MLD corridor failure: unavailable (frame tag/count mismatch in corridor comparison)" + ) + print(f"[{tag}] See {self.failedCmdsFile}\n") return False def process( @@ -1126,8 +1179,7 @@ class MLDConformance: if self.args.verbose and emitConsole: print(f"{prefix}Command: {command}", flush=True) if self.args.dryrun: - with open(self.logFile, "a") as fd: - fd.write(command + "\n") + self.appendRunlog(command=command) return 0 c = subprocess.run( @@ -1139,14 +1191,10 @@ class MLDConformance: ) # Keep runlog format: command line followed by the tool output. - with open(self.logFile, "a") as fd: - fd.write(command + "\n") - if c.stdout: - fd.write(c.stdout) + self.appendRunlog(command=command, output=(c.stdout or "")) if c.returncode: - with open(self.failedCmdsFile, "a") as f: - f.write(command + "\n") + self.appendFailed(command=command, output=(c.stdout or "")) if emitConsole: print(f"{prefix}Failed command: {command}", flush=True) if c.stdout: @@ -1172,15 +1220,40 @@ class MLDConformance: ) return (maxDiff, rmsdB, beSamplesPercent) + def validateAudioPair(self, refFile: str, dutFile: str): + try: + refSamples, _ = readfile(refFile, outdtype="float") + dutSamples, _ = readfile(dutFile, outdtype="float") + except Exception as exc: + return f"Failed reading audio files: ref={refFile}, dut={dutFile}, error={exc}" + + if refSamples.ndim != dutSamples.ndim: + return ( + f"Audio dimensionality mismatch: ref={refFile} (ndim={refSamples.ndim}), " + f"dut={dutFile} (ndim={dutSamples.ndim})" + ) + if refSamples.shape[1] != dutSamples.shape[1]: + return ( + f"Audio channel mismatch: ref={refFile} (channels={refSamples.shape[1]}), " + f"dut={dutFile} (channels={dutSamples.shape[1]})" + ) + if refSamples.shape[0] != dutSamples.shape[0]: + return ( + f"Audio length mismatch: ref={refFile} (samples={refSamples.shape[0]}), " + f"dut={dutFile} (samples={dutSamples.shape[0]})" + ) + return None + def mld(self, tag, pytestTag, refFile, dutFile): mldThisFile = np.zeros(0) - with open(self.failedCmdsFile, "a") as f: - if not os.path.exists(refFile): - f.write(f"File does not exists: {refFile}\n") - return None - if not os.path.exists(dutFile): - f.write(f"File does not exists : {dutFile}\n") - return None + if not os.path.exists(refFile): + msg = f"File does not exists: {refFile}" + self.appendFailed(context=f"[{tag}:{pytestTag}] {msg}") + return None, (msg, "") + if not os.path.exists(dutFile): + msg = f"File does not exists : {dutFile}" + self.appendFailed(context=f"[{tag}:{pytestTag}] {msg}") + return None, (msg, "") with tempfile.TemporaryDirectory() as tmpdir: refSamples, fsR = readfile(refFile, outdtype="float") dutSamples, fsD = readfile(dutFile, outdtype="float") @@ -1215,6 +1288,7 @@ class MLDConformance: command = [ self.wavdiffbin, + "--no-fail", "-s", refFileMono, dutFileMono, @@ -1230,10 +1304,60 @@ class MLDConformance: shell=True, ) - mldThisChan = np.loadtxt( - mldFile, delimiter=";", dtype=float, skiprows=1 + wavdiff_output = "" + try: + with open(mldFile, "r") as fd: + wavdiff_output = fd.read().strip() + except OSError: + wavdiff_output = "" + + # Keep runlog readable: omit dense per-segment numeric rows from wav-diff output. + wavdiff_log_lines = [] + wavdiff_rows_omitted = 0 + for line in wavdiff_output.splitlines(): + if re.match(r"^\s*[-+]?\d+(?:\.\d+)?;[-+]?\d+(?:\.\d+)?;[-+]?\d+(?:\.\d+)?\s*$", line): + wavdiff_rows_omitted += 1 + else: + wavdiff_log_lines.append(line) + if wavdiff_rows_omitted > 0: + wavdiff_log_lines.append( + f"[... omitted {wavdiff_rows_omitted} per-segment MLD rows ...]" + ) + wavdiff_log_output = "\n".join(wavdiff_log_lines) + + self.appendRunlog( + context=f"[{tag}:{pytestTag}] wav-diff (channel={ch}, rc={c.returncode})", + command=" ".join(command), + output=wavdiff_log_output, ) - mldThisChan = mldThisChan[:, 2] + + if c.returncode != 0: + self.appendFailed( + context=f"[{tag}:{pytestTag}] wav-diff execution failed (channel={ch}, rc={c.returncode})", + command=" ".join(command), + output=wavdiff_output, + ) + return None, (" ".join(command), wavdiff_output) + + mldRaw = np.loadtxt(mldFile, delimiter=";", dtype=float, skiprows=1) + if mldRaw.size == 0: + self.appendFailed( + context=f"[{tag}:{pytestTag}] wav-diff produced no MLD data (channel={ch}, rc={c.returncode})", + command=" ".join(command), + output=wavdiff_output, + ) + return None, (" ".join(command), wavdiff_output) + + mldRaw = np.atleast_2d(mldRaw) + if mldRaw.shape[1] < 3: + self.appendFailed( + context=f"[{tag}:{pytestTag}] wav-diff MLD output has invalid format (channel={ch}, rc={c.returncode})", + command=" ".join(command), + output=wavdiff_output, + ) + return None, (" ".join(command), wavdiff_output) + + mldThisChan = mldRaw[:, 2] if ch == 0: mldThisFile = mldThisChan else: @@ -1252,8 +1376,8 @@ class MLDConformance: f.write( f"{pytestTag}, {maxDiff}, {rmsdB}, {beSamplesPercent}, {mldThisFile.max()}\n" ) - return float(mldThisFile.max()) - return None + return float(mldThisFile.max()), None + return None, ("wav-diff", "MLD output contained no usable samples") def beTest( self, tag, pytestTag, refFile, dutFile, DUTmdFileList=[], REFmdFileList=[] @@ -1333,15 +1457,24 @@ class MLDConformance: diff = dutMLD - refMLD maxDiff = float(diff.max()) if diff.size else 0.0 corridor_failed = maxDiff > threshold + if corridor_failed: + msg = ( + f"[{tag}] MLD corridor failed: max(dut-ref)={maxDiff} exceeds threshold={threshold}" + ) + self.appendRunlog(context=msg) + self.appendFailed(context=msg) else: ref_count = refMLD.shape[0] dut_count = dutMLD.shape[0] ref_preview = ", ".join(refTags[:3]) if ref_count else "" dut_preview = ", ".join(dutTags[:3]) if dut_count else "" - print( - f"\033[93mWarning: {tag} corridor comparison skipped because reference and DUT frame tags do not match " - f"(ref_count={ref_count}, dut_count={dut_count}, ref_first=[{ref_preview}], dut_first=[{dut_preview}]).\033[00m" + warn_msg = ( + f"Warning: {tag} corridor comparison skipped because reference and DUT frame tags do not match " + f"(ref_count={ref_count}, dut_count={dut_count}, ref_first=[{ref_preview}], dut_first=[{dut_preview}])." ) + print(f"\033[93m{warn_msg}\033[00m") + self.appendRunlog(context=warn_msg) + self.appendFailed(context=warn_msg) corridor_failed = True return not corridor_failed @@ -1358,6 +1491,10 @@ class MLDConformance: delimiter=",", dtype=[("MLD", "f8"), ("pyTestTag", "