From 3dad267c3b637bfc32022114375ea2a296a8d1ea Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Sun, 29 Mar 2026 13:19:43 +0200 Subject: [PATCH 1/9] conformance: add MASA MD analysis and references --- scripts/ivas_conformance/runConformance.py | 551 ++++++++++++++++++--- 1 file changed, 488 insertions(+), 63 deletions(-) diff --git a/scripts/ivas_conformance/runConformance.py b/scripts/ivas_conformance/runConformance.py index ecf964b73..65cc7eb5a 100644 --- a/scripts/ivas_conformance/runConformance.py +++ b/scripts/ivas_conformance/runConformance.py @@ -31,6 +31,7 @@ the United Nations Convention on Contracts on the International Sales of Goods. """ import argparse +import csv import os import platform import re @@ -38,6 +39,7 @@ import numpy as np import subprocess import tempfile import sys +import shlex from typing import Tuple from multiprocessing import Pool from dataclasses import dataclass @@ -229,6 +231,14 @@ ReferenceMldFiles = { "ISAR": "mld_ref_ISAR.csv", } +ReferenceMasaMDFiles = { + "ENC": "masa_ref_ENC.csv", + "DEC": "masa_ref_DEC.csv", + "REND": "masa_ref_REND.csv", +} + +PROGRAM_VERSION = "4.0" + @dataclass class TestDescriptor: @@ -368,14 +378,24 @@ class MLDConformance: if exe_platform == "Windows": exe_platform = "Win32" self.wavdiffbin = os.path.join(self.toolsdir, exe_platform, "wav-diff") + + if exe_platform == "Windows" or exe_platform == "Win64" or exe_platform == "Win32": + self.masadiffbin = os.path.join(self.toolsdir, "Win32", "masaDiffTool.exe") + elif exe_platform == "Darwin": + self.masadiffbin = os.path.join(self.toolsdir, "Darwin", "masaDiffTool") + else: + self.masadiffbin = os.path.join(self.toolsdir, "Linux", "masaDiffTool") + self.CutBins = dict() self.mldcsv = dict() + self.masaMDcsv = dict() self.BEcsv = dict() self.sampleStats = dict() for tag in IVAS_Bins.keys(): self.CutBins[tag] = os.path.join(self.cut_build_path, IVAS_Bins[tag]) self.mldcsv[tag] = os.path.join(self.outputDir, f"mld_{tag}.csv") + self.masaMDcsv[tag] = os.path.join(self.outputDir, f"masa_{tag}.csv") self.BEcsv[tag] = os.path.join(self.outputDir, f"BE_{tag}.csv") self.sampleStats[tag] = os.path.join( self.outputDir, f"sampleStats_{tag}.csv" @@ -675,6 +695,31 @@ class MLDConformance: ) if mld_error is not None: return (None, None, mld_error, None) + + if tag in {"DEC", "REND"} and "EXT" in self._outputFormatsInCommand(testDesc.rawCmdline): + masaMD_scores, masaMD_error = self.masaMD( + tag, + dutPytestTag, + refMdFile=testDesc.refOutput + ".met", + dutMdFile=testDesc.dutOutput + ".met", + ) + if masaMD_error is not None: + return (None, None, masaMD_error, None) + if masaMD_scores is not None and masaMD_scores.size > 0: + masaMDWithTags = np.column_stack( + ( + masaMD_scores, + np.array( + [ + f"{dutPytestTag}-FRM{x:05d}" + for x in range(masaMD_scores.size) + ] + ), + ) + ) + with open(self.masaMDcsv[tag], "ab") as f: + np.savetxt(f, masaMDWithTags, fmt="%s", delimiter=",") + return (non_be, max_mld, None, None) return (non_be, None, None, None) @@ -728,6 +773,31 @@ class MLDConformance: ) if mld_error is not None: return (None, None, mld_error, dutDecCmd) + + if "EXT" in self._outputFormatsInCommand(testDesc.rawCmdline): + masaMD_scores, masaMD_error = self.masaMD( + tag, + encPytestTag, + refMdFile=refDecOutputFile + ".met", + dutMdFile=dutDecOutputFile + ".met", + ) + if masaMD_error is not None: + return (None, None, masaMD_error, dutDecCmd) + if masaMD_scores is not None and masaMD_scores.size > 0: + masaMDWithTags = np.column_stack( + ( + masaMD_scores, + np.array( + [ + f"{encPytestTag}-FRM{x:05d}" + for x in range(masaMD_scores.size) + ] + ), + ) + ) + with open(self.masaMDcsv[tag], "ab") as f: + np.savetxt(f, masaMDWithTags, fmt="%s", delimiter=",") + return (non_be, max_mld, None, dutDecCmd) def analyseOneIsarEncoderTest(self, tag: str, pytestTag: str): @@ -931,6 +1001,139 @@ class MLDConformance: def analyseOneCommandFromTuple(self, args): return self.analyseOneCommand(*args) + def _extractKbpsValues(self, rawCmdline: str) -> list[float]: + """Extract all bitrate values from command line (e.g., from 'at_32_kbps' or 'from_32_kbps_to_96_kbps').""" + values = [] + for match in re.findall(r"(\d+(?:_\d+)?)_kbps", rawCmdline.lower()): + values.append(float(match.replace("_", "."))) + return values + + def _isBitrateAtMost80(self, rawCmdline: str) -> bool: + """Check if all bitrates in command line are <= 80 kbps. + + For bitrate switching tests (e.g., 'from_32_kbps_to_96_kbps'), this checks + that the upper (target) bitrate does not exceed 80 kbps.""" + values = self._extractKbpsValues(rawCmdline) + return bool(values) and max(values) <= 80.0 + + def _outputFormatsInCommand(self, rawCmdline: str) -> set[str]: + text = rawCmdline.upper() + formats = set() + + # Match format token between '_out_' and '_out' to avoid matching input-format words. + # Example: '..._48kHz_out_MONO_out_...' -> captures MONO only. + for match in re.finditer(r"_OUT_([A-Z0-9_]+?)_OUT(?:\b|_)", text): + fmt = match.group(1) + if fmt == "EXTERNAL": + fmt = "EXT" + if fmt in DECODER_OUTPUT_FORMATS: + formats.add(fmt) + + # Avoid matching words like EXTENDED; key on EXT_OUT style output naming. + if ( + "_EXT_OUT" in text + or "_EXTERNAL_OUT" in text + or " EXT_OUT" in text + or " EXTERNAL_OUT" in text + ): + formats.add("EXT") + + return formats + + def _matchesAllTerms(self, rawCmdline: str, terms: list[str]) -> bool: + text = rawCmdline.lower() + return all(term.lower() in text for term in terms) + + def _matchesAnyTerm(self, rawCmdline: str, terms: list[str]) -> bool: + text = rawCmdline.lower() + return any(term.lower() in text for term in terms) + + def _expectedMasaMDMetFiles(self, tag: str) -> list[str]: + """Return expected DUT-side .met files for a tag's selected tests.""" + if tag not in {"ENC", "DEC", "REND"}: + return [] + + met_files = [] + for pyTestsTag in self.getSelectedTestsForTag(tag): + testDesc = self.TestDesc[tag][pyTestsTag] + + if tag == "ENC": + if not isinstance(testDesc, BitstreamTestDescriptor): + continue + dut_dec_output = testDesc.dutOutput.replace(".192", "_CUT_REFDECODED.wav") + met_files.append(dut_dec_output + ".met") + else: + if not isinstance(testDesc, TestDescriptor): + continue + met_files.append(testDesc.dutOutput + ".met") + + return met_files + + def _hasAnyProducedMasaMDMetFile(self, tag: str) -> bool: + return any(os.path.exists(path) for path in self._expectedMasaMDMetFiles(tag)) + + def _matchesLevel1(self, tag: str, rawCmdline: str) -> bool: + if tag == "ENC": + return self._isBitrateAtMost80(rawCmdline) + + if tag == "DEC": + formats = self._outputFormatsInCommand(rawCmdline) + requested_formats = set(getattr(self.args, "filter_decoder_formats", [])) + + ext_ok = "EXT" in formats and self._isBitrateAtMost80(rawCmdline) + mono_ok = "MONO" in formats + stereo_ok = "STEREO" in formats + default_level1_dec_ok = ext_ok or mono_ok or stereo_ok + + if requested_formats: + # Plain decoder format tokens are restrictive under LEVEL1. + return default_level1_dec_ok and bool(formats.intersection(requested_formats)) + + return default_level1_dec_ok + + # For REND/ISAR/ISAR_ENC under LEVEL1, tag-level inclusion is decided at testTags parsing. + return True + + def _testPassesFilter(self, tag: str, rawCmdline: str) -> bool: + level = getattr(self.args, "filter_level", "LEVEL3") + restrictive_terms = getattr(self.args, "filter_restrictive_terms", []) + additive_terms = getattr(self.args, "filter_add_terms", []) + subtractive_terms = getattr(self.args, "filter_remove_terms", []) + requested_formats = set(getattr(self.args, "filter_decoder_formats", [])) + + # '-' terms always remove tests from the final selection. + if subtractive_terms and self._matchesAnyTerm(rawCmdline, subtractive_terms): + return False + + passes_level = True + if level == "LEVEL1": + passes_level = self._matchesLevel1(tag, rawCmdline) + + passes_requested_formats = True + if tag in {"ENC", "DEC"} and requested_formats: + cmd_formats = self._outputFormatsInCommand(rawCmdline) + passes_requested_formats = bool(cmd_formats.intersection(requested_formats)) + + passes_restrictive_terms = self._matchesAllTerms(rawCmdline, restrictive_terms) + base_selected = passes_level and passes_restrictive_terms and passes_requested_formats + + if base_selected: + return True + + # '+' terms add tests even if they fail restrictive filters. + if additive_terms and self._matchesAnyTerm(rawCmdline, additive_terms): + return True + + return False + + def getSelectedTestsForTag(self, tag: str) -> list[str]: + selected = [] + for pyTestsTag in self.TestDesc[tag].keys(): + rawCmdline = self.TestDesc[tag][pyTestsTag].rawCmdline + if self._testPassesFilter(tag, rawCmdline): + selected.append(pyTestsTag) + return selected + def runTag(self, tag: str) -> bool: failed_before = self.getFailedCommandCount() selectedTests = list() @@ -1018,6 +1221,7 @@ class MLDConformance: f.write(f"PYTESTTAG, BE=0 NON-BE=1\n") else: open(self.mldcsv[tag], "w").close() + open(self.masaMDcsv[tag], "w").close() with open(self.sampleStats[tag], "w") as f: f.write(f"PYTESTTAG, MAXDIFF, RMSdB, BEFRAMES_PERCENT, MAX_MLD\n") @@ -1244,6 +1448,117 @@ class MLDConformance: ) return None + def _parseMasaMDFrameScores(self, csvFile: str) -> np.ndarray: + frame_scores = [] + with open(csvFile, "r", newline="") as fd: + reader = csv.reader(fd) + # Skip header + next(reader, None) + for row in reader: + if not row: + continue + + # vlad: MASA MD CSV columns are: frame index, then per-frame diff metrics. + # We Use a single scalar per frame (max abs over all diff columns) + # so corridor handling is the same as MLD's one-value-per-frame pipeline. + metric_values = [] + for value in row[1:]: + text = value.strip() + if not text: + continue + try: + metric_values.append(abs(float(text))) + except ValueError: + continue + + if metric_values: + frame_scores.append(max(metric_values)) + + if not frame_scores: + return np.zeros(0, dtype=float) + return np.array(frame_scores, dtype=float) + + def masaMD(self, tag: str, pytestTag: str, refMdFile: str, dutMdFile: str): + ref_exists = os.path.exists(refMdFile) + dut_exists = os.path.exists(dutMdFile) + + if not ref_exists and not dut_exists: + # MASA metadata sidecars are optional for some test/output combinations. + # If both sides are missing, skip MASA diff for this test. + return np.zeros(0, dtype=float), None + + if ref_exists != dut_exists: + msg = ( + f"MASA Metadata file mismatch: ref={refMdFile} (exists={ref_exists}), " + f"dut={dutMdFile} (exists={dut_exists})" + ) + self.appendFailed(context=f"[{tag}:{pytestTag}] {msg}") + return None, (msg, "") + + if not os.path.exists(self.masadiffbin): + msg = f"masaDiffTool not found: {self.masadiffbin}" + self.appendFailed(context=f"[{tag}:{pytestTag}] {msg}") + return None, (msg, "") + + with tempfile.TemporaryDirectory() as tmpdir: + masaMDCsvFile = os.path.join( + tmpdir, f"{tempfile.gettempprefix()}_{tag}_{pytestTag}_masa.csv" + ) + command = [self.masadiffbin, "--csv", masaMDCsvFile, refMdFile, dutMdFile] + command_str = " ".join(shlex.quote(x) for x in command) + + c = subprocess.run( + command_str, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + shell=True, + ) + + tool_output = c.stdout or "" + self.appendRunlog( + context=f"[{tag}:{pytestTag}] masaDiffTool (rc={c.returncode})", + command=command_str, + output=tool_output, + ) + + if not os.path.exists(masaMDCsvFile): + self.appendFailed( + context=( + f"[{tag}:{pytestTag}] masaDiffTool did not produce CSV output " + f"(rc={c.returncode})" + ), + command=command_str, + output=tool_output, + ) + return None, (command_str, tool_output) + + try: + frame_scores = self._parseMasaMDFrameScores(masaMDCsvFile) + except Exception as exc: + err_msg = f"Failed to parse masaDiffTool CSV '{masaMDCsvFile}': {exc}" + self.appendFailed( + context=f"[{tag}:{pytestTag}] {err_msg}", + command=command_str, + output=tool_output, + ) + return None, (command_str, err_msg) + + # vlad: a Non-zero return code from masaDiffTool can indicate detected differences, + # but which is valid for non-BE analysis. Treat missing/invalid CSV as failure. + if frame_scores.size == 0: + self.appendFailed( + context=( + f"[{tag}:{pytestTag}] masaDiffTool produced empty frame scores " + f"(rc={c.returncode})" + ), + command=command_str, + output=tool_output, + ) + return None, (command_str, tool_output) + + return frame_scores, None + def mld(self, tag, pytestTag, refFile, dutFile): mldThisFile = np.zeros(0) if not os.path.exists(refFile): @@ -1442,40 +1757,57 @@ class MLDConformance: else: print(f"<{tag}> PASSED BE TEST") - def computeCorridor(self, mldRefWithTags, mldCutWithTags, tag, threshold=0.1): - indRef = np.argsort(mldRefWithTags["pyTestTag"]) - indDut = np.argsort(mldCutWithTags["pyTestTag"]) - refTags = mldRefWithTags["pyTestTag"][indRef] - dutTags = mldCutWithTags["pyTestTag"][indDut] - refMLD = mldRefWithTags["MLD"][indRef] - dutMLD = mldCutWithTags["MLD"][indDut] - - same_shape = refMLD.shape == dutMLD.shape + def computeCorridor( + self, + refWithTags, + cutWithTags, + tag, + threshold=0.1, + metricKey="MLD", + metricLabel="MLD", + ): + indRef = np.argsort(refWithTags["pyTestTag"]) + indDut = np.argsort(cutWithTags["pyTestTag"]) + refTags = refWithTags["pyTestTag"][indRef] + dutTags = cutWithTags["pyTestTag"][indDut] + refMetric = refWithTags[metricKey][indRef] + dutMetric = cutWithTags[metricKey][indDut] + + same_shape = refMetric.shape == dutMetric.shape same_tags = same_shape and np.array_equal(refTags, dutTags) if same_tags: - diff = dutMLD - refMLD + diff = dutMetric - refMetric maxDiff = float(diff.max()) if diff.size else 0.0 corridor_failed = maxDiff > threshold if corridor_failed: msg = ( - f"[{tag}] MLD corridor failed: max(dut-ref)={maxDiff} exceeds threshold={threshold}" + f"[{tag}] {metricLabel} corridor failed: max(dut-ref)={maxDiff} exceeds threshold={threshold}" ) self.appendRunlog(context=msg) self.appendFailed(context=msg) else: - ref_count = refMLD.shape[0] - dut_count = dutMLD.shape[0] - ref_preview = ", ".join(refTags[:3]) if ref_count else "" - dut_preview = ", ".join(dutTags[:3]) if dut_count else "" - warn_msg = ( - f"Warning: {tag} corridor comparison skipped because reference and DUT frame tags do not match " - f"(ref_count={ref_count}, dut_count={dut_count}, ref_first=[{ref_preview}], dut_first=[{dut_preview}])." - ) - print(f"\033[93m{warn_msg}\033[00m") - self.appendRunlog(context=warn_msg) - self.appendFailed(context=warn_msg) - corridor_failed = True + ref_count = refMetric.shape[0] + dut_count = dutMetric.shape[0] + # If filters are active, frame count mismatch is expected (DUT has fewer tests than reference). + # Skip the warning and don't treat it as a failure in this case. + if getattr(self.args, "filter_display", None): + skip_msg = ( + f"[{tag}] {metricLabel} corridor comparison skipped (filtered test set: ref_count={ref_count}, dut_count={dut_count})" + ) + self.appendRunlog(context=skip_msg) + corridor_failed = False + else: + ref_preview = ", ".join(refTags[:3]) if ref_count else "" + dut_preview = ", ".join(dutTags[:3]) if dut_count else "" + warn_msg = ( + f"Warning: {tag} {metricLabel} corridor comparison skipped because reference and DUT frame tags do not match " + f"(ref_count={ref_count}, dut_count={dut_count}, ref_first=[{ref_preview}], dut_first=[{dut_preview}])." + ) + print(f"\033[93m{warn_msg}\033[00m") + self.appendRunlog(context=warn_msg) + self.appendFailed(context=warn_msg) + corridor_failed = True return not corridor_failed @@ -1485,16 +1817,16 @@ class MLDConformance: max_mld_value = None keys = IVAS_Bins.keys() if selectTag == "all" else [selectTag] for tag in keys: - if os.path.exists(self.mldcsv[tag]): + mdlCutWithTags = None + if os.path.exists(self.mldcsv[tag]) and os.path.getsize(self.mldcsv[tag]) > 0: mdlCutWithTags = np.loadtxt( self.mldcsv[tag], delimiter=",", dtype=[("MLD", "f8"), ("pyTestTag", " 0: bePercent = np.loadtxt( self.sampleStats[tag], delimiter=",", @@ -1511,43 +1843,49 @@ class MLDConformance: ) bePercent = np.atleast_1d(bePercent) maxDiff = np.atleast_1d(maxDiff) - if bePercent.size == 0 or maxDiff.size == 0: - continue - - bePercentAvg = np.average(bePercent) - maxDiffmax = np.max(maxDiff) * 32768.0 - mdlValues = mdlCutWithTags["MLD"] - N = mdlValues.shape[0] - if N == 0: - continue - tag_max_mld = float(mdlValues.max()) - if max_mld_value is None: - max_mld_value = tag_max_mld - else: - max_mld_value = max(max_mld_value, tag_max_mld) - m0 = np.sum(mdlValues == 0) - m05 = np.sum(mdlValues <= 0.5) - m1 = np.sum(mdlValues <= 1.0) - m2 = np.sum(mdlValues <= 2.0) - m5 = np.sum(mdlValues <= 5.0) - - PCNT = lambda num: int(1000 * num / N) / 10.0 - print(f"\n##########################################################") - print(f"<{tag}> Total Frames: {N}") - print(f"<{tag}> MAX MLD across all frames : {mdlValues.max()}") - print(f"<{tag}> Frames with MLD == 0 : {m0} frames ({PCNT(m0)}%)") - print(f"<{tag}> Frames with MLD <= 0.5 : {m05} frames ({PCNT(m05)}%)") - print(f"<{tag}> Frames with MLD <= 1 : {m1} frames ({PCNT(m1)}%)") - print(f"<{tag}> Frames with MLD <= 2 : {m2} frames ({PCNT(m2)}%)") - print(f"<{tag}> Frames with MLD <= 5 : {m5} frames ({PCNT(m5)}%)") - print(f"<{tag}> BE samples percentage = {bePercentAvg}") - print( - f"<{tag}> max absolute diff = {maxDiffmax}, sample range (-32768, 32767)" - ) - print("##########################################################\n") + if bePercent.size > 0 and maxDiff.size > 0: + bePercentAvg = np.average(bePercent) + maxDiffmax = np.max(maxDiff) * 32768.0 + mdlValues = mdlCutWithTags["MLD"] + N = mdlValues.shape[0] + if N > 0: + tag_max_mld = float(mdlValues.max()) + if max_mld_value is None: + max_mld_value = tag_max_mld + else: + max_mld_value = max(max_mld_value, tag_max_mld) + m0 = np.sum(mdlValues == 0) + m05 = np.sum(mdlValues <= 0.5) + m1 = np.sum(mdlValues <= 1.0) + m2 = np.sum(mdlValues <= 2.0) + m5 = np.sum(mdlValues <= 5.0) + + PCNT = lambda num: int(1000 * num / N) / 10.0 + print(f"\n##########################################################") + print(f"<{tag}> Total Frames: {N}") + print(f"<{tag}> MAX MLD across all frames : {mdlValues.max()}") + print( + f"<{tag}> Frames with MLD == 0 : {m0} frames ({PCNT(m0)}%)" + ) + print( + f"<{tag}> Frames with MLD <= 0.5 : {m05} frames ({PCNT(m05)}%)" + ) + print( + f"<{tag}> Frames with MLD <= 1 : {m1} frames ({PCNT(m1)}%)" + ) + print( + f"<{tag}> Frames with MLD <= 2 : {m2} frames ({PCNT(m2)}%)" + ) + print( + f"<{tag}> Frames with MLD <= 5 : {m5} frames ({PCNT(m5)}%)" + ) + print(f"<{tag}> BE samples percentage = {bePercentAvg}") + print( + f"<{tag}> max absolute diff = {maxDiffmax}, sample range (-32768, 32767)" + ) if self.args.regenerate_mld_ref: - # Directly write DUT MLD values to mld_ref2 without reference comparison + # Directly write DUT MLD values to mld_ref2 without reference comparison. new_mld_dir = os.path.join(self.testvDir, "mld_ref2") if not os.path.exists(new_mld_dir): os.makedirs(new_mld_dir, exist_ok=True) @@ -1567,7 +1905,12 @@ class MLDConformance: dtype=[("MLD", "f8"), ("pyTestTag", " 0: + masaMDCutWithTags = np.loadtxt( + self.masaMDcsv[tag], + delimiter=",", + dtype=[("MASA", "f8"), ("pyTestTag", " 0: + masaMDValues = masaMDCutWithTags["MASA"] + nMasaMD = masaMDValues.shape[0] + if nMasaMD > 0: + m0 = np.sum(masaMDValues == 0) + m001 = np.sum(masaMDValues <= 0.01) + m01 = np.sum(masaMDValues <= 0.1) + m1 = np.sum(masaMDValues <= 1.0) + + PCNT = lambda num: int(1000 * num / nMasaMD) / 10.0 + print(f"<{tag}> Total MASA Frames: {nMasaMD}") + print(f"<{tag}> MAX MASA metadata diff across all frames : {masaMDValues.max()}") + print( + f"<{tag}> Frames with MASA metadata diff == 0 : {m0} frames ({PCNT(m0)}%)" + ) + print( + f"<{tag}> Frames with MASA metadata diff <= 0.01 : {m001} frames ({PCNT(m001)}%)" + ) + print( + f"<{tag}> Frames with MASA metadata diff <= 0.1 : {m01} frames ({PCNT(m01)}%)" + ) + print( + f"<{tag}> Frames with MASA metadata diff <= 1 : {m1} frames ({PCNT(m1)}%)" + ) + print("##########################################################\n") + + if self.args.regenerate_mld_ref: + # Regeneration gate for MASA refs: require that at least one expected + # metadata sidecar (.met) exists for this tag in the selected run. + if self._hasAnyProducedMasaMDMetFile(tag): + new_mld_dir = os.path.join(self.testvDir, "mld_ref2") + if not os.path.exists(new_mld_dir): + os.makedirs(new_mld_dir, exist_ok=True) + refMasaMDFile2 = os.path.join( + self.testvDir, "mld_ref2", ReferenceMasaMDFiles[tag] + ) + with open(refMasaMDFile2, "w") as f: + if masaMDCutWithTags is not None and masaMDCutWithTags.size > 0: + np.savetxt(f, masaMDCutWithTags, fmt="%s", delimiter=",") + elif masaMDCutWithTags is not None and masaMDCutWithTags.size > 0: + refMasaMDFile = os.path.join( + self.testvDir, "mld_ref", ReferenceMasaMDFiles[tag] + ) + if os.path.exists(refMasaMDFile): + masaMDRefWithTags = np.loadtxt( + refMasaMDFile, + delimiter=",", + dtype=[("MASA", "f8"), ("pyTestTag", " Date: Sun, 29 Mar 2026 13:20:21 +0200 Subject: [PATCH 2/9] conformance: report MASA results and enable REND met-trigger --- scripts/ivas_conformance/runConformance.py | 48 ++++++++++++++++------ 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/scripts/ivas_conformance/runConformance.py b/scripts/ivas_conformance/runConformance.py index 65cc7eb5a..f998c0e6f 100644 --- a/scripts/ivas_conformance/runConformance.py +++ b/scripts/ivas_conformance/runConformance.py @@ -696,12 +696,19 @@ class MLDConformance: if mld_error is not None: return (None, None, mld_error, None) - if tag in {"DEC", "REND"} and "EXT" in self._outputFormatsInCommand(testDesc.rawCmdline): + refMdFile = testDesc.refOutput + ".met" + dutMdFile = testDesc.dutOutput + ".met" + runMasaMD = ( + (tag == "DEC" and "EXT" in self._outputFormatsInCommand(testDesc.rawCmdline)) + or (tag == "REND" and (os.path.exists(refMdFile) or os.path.exists(dutMdFile))) + ) + + if runMasaMD: masaMD_scores, masaMD_error = self.masaMD( tag, dutPytestTag, - refMdFile=testDesc.refOutput + ".met", - dutMdFile=testDesc.dutOutput + ".met", + refMdFile=refMdFile, + dutMdFile=dutMdFile, ) if masaMD_error is not None: return (None, None, masaMD_error, None) @@ -1324,21 +1331,33 @@ class MLDConformance: self.doBEanalysis(selectTag=tag) analysis_ok = True corridor_fail_count = 0 + masa_comparison_done = False + masa_corridor_fail_count = 0 else: - analysis_ok, corridor_fail_count, _ = self.doAnalysis(selectTag=tag) + ( + analysis_ok, + corridor_fail_count, + _, + masa_comparison_done, + masa_corridor_fail_count, + ) = self.doAnalysis(selectTag=tag) if self.args.regenerate_mld_ref: return command_fail_count == 0 and analysis_ok + result_details = ( + f"ERRORS={command_fail_count}, BE={be_count}, NON-BE={non_be_count}, " + f"MLD CORRIDOR FAILURES={failure_count}" + ) + if masa_comparison_done: + result_details += f", MASA MD CORRIDOR FAILURES={masa_corridor_fail_count}" + + print() if command_fail_count == 0 and failure_count == 0 and analysis_ok: - print( - f"[{tag}] OK (ERRORS={command_fail_count}, BE={be_count}, NON-BE={non_be_count}, MLD CORRIDOR FAILURES={failure_count})\n" - ) + print(f"[{tag}] OK ({result_details})\n") return True - print( - f"[{tag}] FAILED (ERRORS={command_fail_count}, BE={be_count}, NON-BE={non_be_count}, MLD CORRIDOR FAILURES={failure_count})" - ) + print(f"[{tag}] FAILED ({result_details})") if worst_failure is not None: print( f"[{tag}] Worst MLD corridor failure: {worst_failure['prefix']} {worst_failure['tag']} " @@ -1814,6 +1833,8 @@ class MLDConformance: def doAnalysis(self, selectTag="all"): all_ok = True corridor_fail_count = 0 + masa_corridor_fail_count = 0 + masa_comparison_done = False max_mld_value = None keys = IVAS_Bins.keys() if selectTag == "all" else [selectTag] for tag in keys: @@ -1972,6 +1993,7 @@ class MLDConformance: if masaMDCutWithTags is not None and masaMDCutWithTags.size > 0: np.savetxt(f, masaMDCutWithTags, fmt="%s", delimiter=",") elif masaMDCutWithTags is not None and masaMDCutWithTags.size > 0: + masa_comparison_done = True refMasaMDFile = os.path.join( self.testvDir, "mld_ref", ReferenceMasaMDFiles[tag] ) @@ -1991,6 +2013,7 @@ class MLDConformance: ) all_ok = all_ok and corridor_ok corridor_fail_count += int(not corridor_ok) + masa_corridor_fail_count += int(not corridor_ok) else: missing_msg = f"Missing reference MASA file for {tag} : {refMasaMDFile}" print(f"\033[91m{missing_msg} \033[00m") @@ -1998,8 +2021,9 @@ class MLDConformance: self.appendFailed(context=missing_msg) all_ok = False corridor_fail_count += 1 + masa_corridor_fail_count += 1 - return all_ok, corridor_fail_count, max_mld_value + return all_ok, corridor_fail_count, max_mld_value, masa_comparison_done, masa_corridor_fail_count if __name__ == "__main__": @@ -2133,7 +2157,7 @@ if __name__ == "__main__": tag_results = {} for tag in testTags: if args.report_only: - tag_ok, _, _ = conformance.doAnalysis(selectTag=tag) + tag_ok, _, _, _, _ = conformance.doAnalysis(selectTag=tag) elif not args.analyse: tag_ok = conformance.runTag(tag) else: -- GitLab From 4f27f91a3e6da6342b6f50a38408da6587ef4571 Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Sun, 29 Mar 2026 13:20:52 +0200 Subject: [PATCH 3/9] docs: clarify non-BE MLD and MASA MD analysis flow --- scripts/ivas_conformance/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/ivas_conformance/README.md b/scripts/ivas_conformance/README.md index d56974ef0..21b822094 100644 --- a/scripts/ivas_conformance/README.md +++ b/scripts/ivas_conformance/README.md @@ -145,7 +145,8 @@ Analysing tests for ISAR (1252 tests) ### Perform the MLD based non-BE analysis on the CUT outputs on reference platform (Ubuntu 24.04) -The MLD-based non-BE analysis is performed to the CUT outputs with the command below. Encoded outputs will be decoded using the reference decoder executables as part of the process. The MLD analysis is then performed between the CUT and reference decoded outputs (only ".wav" files are compared). Comparison to MLD corridor is also done as part of this process. An example passing output is shown below. If all test sets print `MLD Corridor passed for...` and there were no non-BE metadata comparisons in BE-test, then CUT outputs are Non-BE conformant. +The non-BE analysis below compares CUT and reference outputs by running MLD on audio (`.wav`) and, when MASA metadata are generated, for the matching reference/DUT `.met` files. For encoder tests, encoded CUT bitstreams are first decoded with the reference decoder before analysis. Per-frame MLD and MASA metadata values are written to `scripts/CUT_OUTPUTS` and checked against corridor references in `testvec/testv/mld_ref` (`mld_ref_.csv` and `masa_ref_.csv`). + ```shell PYTHONPATH=scripts python scripts/ivas_conformance/runConformance.py --testvecDir $PWD/testvec --ref_build_path=testvec/bin --analyse -- GitLab From d9c0afe190b5ebef2f1b50c64927a7aff26381d2 Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Fri, 10 Apr 2026 14:49:10 +0200 Subject: [PATCH 4/9] replace shlex with subprocess.run --- scripts/ivas_conformance/runConformance.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/scripts/ivas_conformance/runConformance.py b/scripts/ivas_conformance/runConformance.py index ad0d89487..fb9106fab 100644 --- a/scripts/ivas_conformance/runConformance.py +++ b/scripts/ivas_conformance/runConformance.py @@ -38,20 +38,18 @@ import re import numpy as np import subprocess import tempfile +import filecmp import sys -import shlex from typing import Tuple from multiprocessing import Pool +import warnings +import math from dataclasses import dataclass from typing import Union import shutil import scipy.io.wavfile as wav import warnings import math -import scipy.signal as sig -import filecmp -import time - sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")) @@ -1529,14 +1527,13 @@ class MLDConformance: tmpdir, f"{tempfile.gettempprefix()}_{tag}_{pytestTag}_masa.csv" ) command = [self.masadiffbin, "--csv", masaMDCsvFile, refMdFile, dutMdFile] - command_str = " ".join(shlex.quote(x) for x in command) + command_str = " ".join(command) c = subprocess.run( - command_str, + command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, - shell=True, ) tool_output = c.stdout or "" -- GitLab From a0745a93711e3feae202073fe44ff37fad0d64e2 Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Fri, 10 Apr 2026 15:05:34 +0200 Subject: [PATCH 5/9] cleanup duplicate imports and restore missing symbols --- scripts/ivas_conformance/runConformance.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/ivas_conformance/runConformance.py b/scripts/ivas_conformance/runConformance.py index fb9106fab..14841530b 100644 --- a/scripts/ivas_conformance/runConformance.py +++ b/scripts/ivas_conformance/runConformance.py @@ -40,6 +40,7 @@ import subprocess import tempfile import filecmp import sys +import time from typing import Tuple from multiprocessing import Pool import warnings @@ -48,8 +49,7 @@ from dataclasses import dataclass from typing import Union import shutil import scipy.io.wavfile as wav -import warnings -import math +import scipy.signal as sig sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")) @@ -197,6 +197,8 @@ IVAS_Bins = { "ISAR": "ISAR_post_rend", } +DECODER_OUTPUT_FORMATS = {"MONO", "STEREO", "EXT"} + def validate_build_binaries(parser, build_path: str, build_label: str) -> None: """Validate that a build path exists and contains all IVAS binaries.""" -- GitLab From ea312fbc2dd76475f24a384d5edc3cf8f418eebc Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Mon, 13 Apr 2026 09:52:42 +0200 Subject: [PATCH 6/9] renaming --- scripts/ivas_conformance/runConformance.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/ivas_conformance/runConformance.py b/scripts/ivas_conformance/runConformance.py index 14841530b..af8d67b22 100644 --- a/scripts/ivas_conformance/runConformance.py +++ b/scripts/ivas_conformance/runConformance.py @@ -1507,7 +1507,7 @@ class MLDConformance: dut_exists = os.path.exists(dutMdFile) if not ref_exists and not dut_exists: - # MASA metadata sidecars are optional for some test/output combinations. + # MASA metadata companion files (.met) are optional for some test/output combinations. # If both sides are missing, skip MASA diff for this test. return np.zeros(0, dtype=float), None @@ -1985,7 +1985,7 @@ class MLDConformance: if self.args.regenerate_mld_ref: # Regeneration gate for MASA refs: require that at least one expected - # metadata sidecar (.met) exists for this tag in the selected run. + # metadata companion file (.met) exists for this tag in the selected run. if self._hasAnyProducedMasaMDMetFile(tag): new_mld_dir = os.path.join(self.testvDir, "mld_ref2") if not os.path.exists(new_mld_dir): -- GitLab From 5185f537c8585b1a4888bdbb9de2445c9b591d28 Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Mon, 13 Apr 2026 20:28:15 +0200 Subject: [PATCH 7/9] fix: BE test summary reports FAILED when non-BE tests are present --- scripts/ivas_conformance/runConformance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/ivas_conformance/runConformance.py b/scripts/ivas_conformance/runConformance.py index af8d67b22..76e6e98ed 100644 --- a/scripts/ivas_conformance/runConformance.py +++ b/scripts/ivas_conformance/runConformance.py @@ -1321,7 +1321,7 @@ class MLDConformance: if self.args.be_test: self.doBEanalysis(selectTag=tag) - analysis_ok = True + analysis_ok = non_be_count == 0 corridor_fail_count = 0 masa_comparison_done = False masa_corridor_fail_count = 0 -- GitLab From 39be217b568933255c89599e964cbabfe22b20c8 Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Mon, 13 Apr 2026 20:41:01 +0200 Subject: [PATCH 8/9] refine BE conformance summary output --- scripts/ivas_conformance/runConformance.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/scripts/ivas_conformance/runConformance.py b/scripts/ivas_conformance/runConformance.py index 76e6e98ed..25468facf 100644 --- a/scripts/ivas_conformance/runConformance.py +++ b/scripts/ivas_conformance/runConformance.py @@ -1320,8 +1320,7 @@ class MLDConformance: self.flushErrorBlocks() if self.args.be_test: - self.doBEanalysis(selectTag=tag) - analysis_ok = non_be_count == 0 + analysis_ok, be_failure_csv = self.doBEanalysis(selectTag=tag) corridor_fail_count = 0 masa_comparison_done = False masa_corridor_fail_count = 0 @@ -1350,7 +1349,9 @@ class MLDConformance: return True print(f"[{tag}] FAILED ({result_details})") - if worst_failure is not None: + if self.args.be_test and be_failure_csv is not None: + print(f"[{tag}] BE test failed, check {be_failure_csv}") + elif worst_failure is not None: print( f"[{tag}] Worst MLD corridor failure: {worst_failure['prefix']} {worst_failure['tag']} " f"(NON-BE, MLD_MAX={worst_failure['mld']})" @@ -1776,9 +1777,10 @@ class MLDConformance: usecols=1, ) if np.sum(BEresult) > 0: - print(f"<{tag}> FAILED BE TEST, check {self.BEcsv[tag]}") - else: - print(f"<{tag}> PASSED BE TEST") + return False, self.BEcsv[tag] + return True, None + + return True, None def computeCorridor( self, -- GitLab From 07b2d08086853b4d95b534bd01463207fb8cc477 Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Mon, 13 Apr 2026 20:43:32 +0200 Subject: [PATCH 9/9] omit MLD corridor count from BE summary --- scripts/ivas_conformance/runConformance.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/scripts/ivas_conformance/runConformance.py b/scripts/ivas_conformance/runConformance.py index 25468facf..1a0935353 100644 --- a/scripts/ivas_conformance/runConformance.py +++ b/scripts/ivas_conformance/runConformance.py @@ -1336,10 +1336,9 @@ class MLDConformance: if self.args.regenerate_mld_ref: return command_fail_count == 0 and analysis_ok - result_details = ( - f"ERRORS={command_fail_count}, BE={be_count}, NON-BE={non_be_count}, " - f"MLD CORRIDOR FAILURES={failure_count}" - ) + result_details = f"ERRORS={command_fail_count}, BE={be_count}, NON-BE={non_be_count}" + if not self.args.be_test: + result_details += f", MLD CORRIDOR FAILURES={failure_count}" if masa_comparison_done: result_details += f", MASA MD CORRIDOR FAILURES={masa_corridor_fail_count}" -- GitLab