From 3dad267c3b637bfc32022114375ea2a296a8d1ea Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@dolby.com>
Date: Sun, 29 Mar 2026 13:19:43 +0200
Subject: [PATCH 1/9] conformance: add MASA MD analysis and references

---
 scripts/ivas_conformance/runConformance.py | 551 ++++++++++++++++++---
 1 file changed, 488 insertions(+), 63 deletions(-)

diff --git a/scripts/ivas_conformance/runConformance.py b/scripts/ivas_conformance/runConformance.py
index ecf964b73..65cc7eb5a 100644
--- a/scripts/ivas_conformance/runConformance.py
+++ b/scripts/ivas_conformance/runConformance.py
@@ -31,6 +31,7 @@ the United Nations Convention on Contracts on the International Sales of Goods.
 """
 
 import argparse
+import csv
 import os
 import platform
 import re
@@ -38,6 +39,7 @@ import numpy as np
 import subprocess
 import tempfile
 import sys
+import shlex
 from typing import Tuple
 from multiprocessing import Pool
 from dataclasses import dataclass
@@ -229,6 +231,14 @@ ReferenceMldFiles = {
     "ISAR": "mld_ref_ISAR.csv",
 }
 
+ReferenceMasaMDFiles = {
+    "ENC": "masa_ref_ENC.csv",
+    "DEC": "masa_ref_DEC.csv",
+    "REND": "masa_ref_REND.csv",
+}
+
+PROGRAM_VERSION = "4.0"
+
 
 @dataclass
 class TestDescriptor:
@@ -368,14 +378,24 @@ class MLDConformance:
             if exe_platform == "Windows":
                 exe_platform = "Win32"  
                 self.wavdiffbin = os.path.join(self.toolsdir, exe_platform, "wav-diff")          
+
+        if exe_platform == "Windows" or exe_platform == "Win64" or exe_platform == "Win32":
+            self.masadiffbin = os.path.join(self.toolsdir, "Win32", "masaDiffTool.exe")
+        elif exe_platform == "Darwin":
+            self.masadiffbin = os.path.join(self.toolsdir, "Darwin", "masaDiffTool")
+        else:
+            self.masadiffbin = os.path.join(self.toolsdir, "Linux", "masaDiffTool")
+
         self.CutBins = dict()
         self.mldcsv = dict()
+        self.masaMDcsv = dict()
         self.BEcsv = dict()
         self.sampleStats = dict()
 
         for tag in IVAS_Bins.keys():
             self.CutBins[tag] = os.path.join(self.cut_build_path, IVAS_Bins[tag])
             self.mldcsv[tag] = os.path.join(self.outputDir, f"mld_{tag}.csv")
+            self.masaMDcsv[tag] = os.path.join(self.outputDir, f"masa_{tag}.csv")
             self.BEcsv[tag] = os.path.join(self.outputDir, f"BE_{tag}.csv")
             self.sampleStats[tag] = os.path.join(
                 self.outputDir, f"sampleStats_{tag}.csv"
@@ -675,6 +695,31 @@ class MLDConformance:
                 )
                 if mld_error is not None:
                     return (None, None, mld_error, None)
+
+                if tag in {"DEC", "REND"} and "EXT" in self._outputFormatsInCommand(testDesc.rawCmdline):
+                    masaMD_scores, masaMD_error = self.masaMD(
+                        tag,
+                        dutPytestTag,
+                        refMdFile=testDesc.refOutput + ".met",
+                        dutMdFile=testDesc.dutOutput + ".met",
+                    )
+                    if masaMD_error is not None:
+                        return (None, None, masaMD_error, None)
+                    if masaMD_scores is not None and masaMD_scores.size > 0:
+                        masaMDWithTags = np.column_stack(
+                            (
+                                masaMD_scores,
+                                np.array(
+                                    [
+                                        f"{dutPytestTag}-FRM{x:05d}"
+                                        for x in range(masaMD_scores.size)
+                                    ]
+                                ),
+                            )
+                        )
+                        with open(self.masaMDcsv[tag], "ab") as f:
+                            np.savetxt(f, masaMDWithTags, fmt="%s", delimiter=",")
+
                 return (non_be, max_mld, None, None)
             return (non_be, None, None, None)
 
@@ -728,6 +773,31 @@ class MLDConformance:
             )
             if mld_error is not None:
                 return (None, None, mld_error, dutDecCmd)
+
+            if "EXT" in self._outputFormatsInCommand(testDesc.rawCmdline):
+                masaMD_scores, masaMD_error = self.masaMD(
+                    tag,
+                    encPytestTag,
+                    refMdFile=refDecOutputFile + ".met",
+                    dutMdFile=dutDecOutputFile + ".met",
+                )
+                if masaMD_error is not None:
+                    return (None, None, masaMD_error, dutDecCmd)
+                if masaMD_scores is not None and masaMD_scores.size > 0:
+                    masaMDWithTags = np.column_stack(
+                        (
+                            masaMD_scores,
+                            np.array(
+                                [
+                                    f"{encPytestTag}-FRM{x:05d}"
+                                    for x in range(masaMD_scores.size)
+                                ]
+                            ),
+                        )
+                    )
+                    with open(self.masaMDcsv[tag], "ab") as f:
+                        np.savetxt(f, masaMDWithTags, fmt="%s", delimiter=",")
+
             return (non_be, max_mld, None, dutDecCmd)
 
     def analyseOneIsarEncoderTest(self, tag: str, pytestTag: str):
@@ -931,6 +1001,139 @@ class MLDConformance:
     def analyseOneCommandFromTuple(self, args):
         return self.analyseOneCommand(*args)
 
+    def _extractKbpsValues(self, rawCmdline: str) -> list[float]:
+        """Extract all bitrate values from command line (e.g., from 'at_32_kbps' or 'from_32_kbps_to_96_kbps')."""
+        values = []
+        for match in re.findall(r"(\d+(?:_\d+)?)_kbps", rawCmdline.lower()):
+            values.append(float(match.replace("_", ".")))
+        return values
+
+    def _isBitrateAtMost80(self, rawCmdline: str) -> bool:
+        """Check if all bitrates in command line are <= 80 kbps.
+        
+        For bitrate switching tests (e.g., 'from_32_kbps_to_96_kbps'), this checks 
+        that the upper (target) bitrate does not exceed 80 kbps."""
+        values = self._extractKbpsValues(rawCmdline)
+        return bool(values) and max(values) <= 80.0
+
+    def _outputFormatsInCommand(self, rawCmdline: str) -> set[str]:
+        text = rawCmdline.upper()
+        formats = set()
+
+        # Match format token between '_out_' and '_out' to avoid matching input-format words.
+        # Example: '..._48kHz_out_MONO_out_...' -> captures MONO only.
+        for match in re.finditer(r"_OUT_([A-Z0-9_]+?)_OUT(?:\b|_)", text):
+            fmt = match.group(1)
+            if fmt == "EXTERNAL":
+                fmt = "EXT"
+            if fmt in DECODER_OUTPUT_FORMATS:
+                formats.add(fmt)
+
+        # Avoid matching words like EXTENDED; key on EXT_OUT style output naming.
+        if (
+            "_EXT_OUT" in text
+            or "_EXTERNAL_OUT" in text
+            or " EXT_OUT" in text
+            or " EXTERNAL_OUT" in text
+        ):
+            formats.add("EXT")
+
+        return formats
+
+    def _matchesAllTerms(self, rawCmdline: str, terms: list[str]) -> bool:
+        text = rawCmdline.lower()
+        return all(term.lower() in text for term in terms)
+
+    def _matchesAnyTerm(self, rawCmdline: str, terms: list[str]) -> bool:
+        text = rawCmdline.lower()
+        return any(term.lower() in text for term in terms)
+
+    def _expectedMasaMDMetFiles(self, tag: str) -> list[str]:
+        """Return expected DUT-side .met files for a tag's selected tests."""
+        if tag not in {"ENC", "DEC", "REND"}:
+            return []
+
+        met_files = []
+        for pyTestsTag in self.getSelectedTestsForTag(tag):
+            testDesc = self.TestDesc[tag][pyTestsTag]
+
+            if tag == "ENC":
+                if not isinstance(testDesc, BitstreamTestDescriptor):
+                    continue
+                dut_dec_output = testDesc.dutOutput.replace(".192", "_CUT_REFDECODED.wav")
+                met_files.append(dut_dec_output + ".met")
+            else:
+                if not isinstance(testDesc, TestDescriptor):
+                    continue
+                met_files.append(testDesc.dutOutput + ".met")
+
+        return met_files
+
+    def _hasAnyProducedMasaMDMetFile(self, tag: str) -> bool:
+        return any(os.path.exists(path) for path in self._expectedMasaMDMetFiles(tag))
+
+    def _matchesLevel1(self, tag: str, rawCmdline: str) -> bool:
+        if tag == "ENC":
+            return self._isBitrateAtMost80(rawCmdline)
+
+        if tag == "DEC":
+            formats = self._outputFormatsInCommand(rawCmdline)
+            requested_formats = set(getattr(self.args, "filter_decoder_formats", []))
+
+            ext_ok = "EXT" in formats and self._isBitrateAtMost80(rawCmdline)
+            mono_ok = "MONO" in formats
+            stereo_ok = "STEREO" in formats
+            default_level1_dec_ok = ext_ok or mono_ok or stereo_ok
+
+            if requested_formats:
+                # Plain decoder format tokens are restrictive under LEVEL1.
+                return default_level1_dec_ok and bool(formats.intersection(requested_formats))
+
+            return default_level1_dec_ok
+
+        # For REND/ISAR/ISAR_ENC under LEVEL1, tag-level inclusion is decided at testTags parsing.
+        return True
+
+    def _testPassesFilter(self, tag: str, rawCmdline: str) -> bool:
+        level = getattr(self.args, "filter_level", "LEVEL3")
+        restrictive_terms = getattr(self.args, "filter_restrictive_terms", [])
+        additive_terms = getattr(self.args, "filter_add_terms", [])
+        subtractive_terms = getattr(self.args, "filter_remove_terms", [])
+        requested_formats = set(getattr(self.args, "filter_decoder_formats", []))
+
+        # '-' terms always remove tests from the final selection.
+        if subtractive_terms and self._matchesAnyTerm(rawCmdline, subtractive_terms):
+            return False
+
+        passes_level = True
+        if level == "LEVEL1":
+            passes_level = self._matchesLevel1(tag, rawCmdline)
+
+        passes_requested_formats = True
+        if tag in {"ENC", "DEC"} and requested_formats:
+            cmd_formats = self._outputFormatsInCommand(rawCmdline)
+            passes_requested_formats = bool(cmd_formats.intersection(requested_formats))
+
+        passes_restrictive_terms = self._matchesAllTerms(rawCmdline, restrictive_terms)
+        base_selected = passes_level and passes_restrictive_terms and passes_requested_formats
+
+        if base_selected:
+            return True
+
+        # '+' terms add tests even if they fail restrictive filters.
+        if additive_terms and self._matchesAnyTerm(rawCmdline, additive_terms):
+            return True
+
+        return False
+
+    def getSelectedTestsForTag(self, tag: str) -> list[str]:
+        selected = []
+        for pyTestsTag in self.TestDesc[tag].keys():
+            rawCmdline = self.TestDesc[tag][pyTestsTag].rawCmdline
+            if self._testPassesFilter(tag, rawCmdline):
+                selected.append(pyTestsTag)
+        return selected
+
     def runTag(self, tag: str) -> bool:
         failed_before = self.getFailedCommandCount()
         selectedTests = list()
@@ -1018,6 +1221,7 @@ class MLDConformance:
                 f.write(f"PYTESTTAG, BE=0  NON-BE=1\n")
         else:
             open(self.mldcsv[tag], "w").close()
+            open(self.masaMDcsv[tag], "w").close()
             with open(self.sampleStats[tag], "w") as f:
                 f.write(f"PYTESTTAG, MAXDIFF, RMSdB, BEFRAMES_PERCENT, MAX_MLD\n")
 
@@ -1244,6 +1448,117 @@ class MLDConformance:
             )
         return None
 
+    def _parseMasaMDFrameScores(self, csvFile: str) -> np.ndarray:
+        frame_scores = []
+        with open(csvFile, "r", newline="") as fd:
+            reader = csv.reader(fd)
+            # Skip header
+            next(reader, None)
+            for row in reader:
+                if not row:
+                    continue
+
+                # vlad: MASA MD CSV columns are: frame index, then per-frame diff metrics.
+                # We Use a single scalar per frame (max abs over all diff columns)
+                # so corridor handling is the same as  MLD's one-value-per-frame pipeline.
+                metric_values = []
+                for value in row[1:]:
+                    text = value.strip()
+                    if not text:
+                        continue
+                    try:
+                        metric_values.append(abs(float(text)))
+                    except ValueError:
+                        continue
+
+                if metric_values:
+                    frame_scores.append(max(metric_values))
+
+        if not frame_scores:
+            return np.zeros(0, dtype=float)
+        return np.array(frame_scores, dtype=float)
+
+    def masaMD(self, tag: str, pytestTag: str, refMdFile: str, dutMdFile: str):
+        ref_exists = os.path.exists(refMdFile)
+        dut_exists = os.path.exists(dutMdFile)
+
+        if not ref_exists and not dut_exists:
+            # MASA metadata sidecars are optional for some test/output combinations.
+            # If both sides are missing, skip MASA diff for this test.
+            return np.zeros(0, dtype=float), None
+
+        if ref_exists != dut_exists:
+            msg = (
+                f"MASA Metadata file mismatch: ref={refMdFile} (exists={ref_exists}), "
+                f"dut={dutMdFile} (exists={dut_exists})"
+            )
+            self.appendFailed(context=f"[{tag}:{pytestTag}] {msg}")
+            return None, (msg, "")
+
+        if not os.path.exists(self.masadiffbin):
+            msg = f"masaDiffTool not found: {self.masadiffbin}"
+            self.appendFailed(context=f"[{tag}:{pytestTag}] {msg}")
+            return None, (msg, "")
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            masaMDCsvFile = os.path.join(
+                tmpdir, f"{tempfile.gettempprefix()}_{tag}_{pytestTag}_masa.csv"
+            )
+            command = [self.masadiffbin, "--csv", masaMDCsvFile, refMdFile, dutMdFile]
+            command_str = " ".join(shlex.quote(x) for x in command)
+
+            c = subprocess.run(
+                command_str,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                text=True,
+                shell=True,
+            )
+
+            tool_output = c.stdout or ""
+            self.appendRunlog(
+                context=f"[{tag}:{pytestTag}] masaDiffTool (rc={c.returncode})",
+                command=command_str,
+                output=tool_output,
+            )
+
+            if not os.path.exists(masaMDCsvFile):
+                self.appendFailed(
+                    context=(
+                        f"[{tag}:{pytestTag}] masaDiffTool did not produce CSV output "
+                        f"(rc={c.returncode})"
+                    ),
+                    command=command_str,
+                    output=tool_output,
+                )
+                return None, (command_str, tool_output)
+
+            try:
+                frame_scores = self._parseMasaMDFrameScores(masaMDCsvFile)
+            except Exception as exc:
+                err_msg = f"Failed to parse masaDiffTool CSV '{masaMDCsvFile}': {exc}"
+                self.appendFailed(
+                    context=f"[{tag}:{pytestTag}] {err_msg}",
+                    command=command_str,
+                    output=tool_output,
+                )
+                return None, (command_str, err_msg)
+
+            # vlad: a Non-zero return code from masaDiffTool can indicate detected differences,
+            # but which is valid for non-BE analysis. Treat missing/invalid CSV as failure.
+            if frame_scores.size == 0:
+                self.appendFailed(
+                    context=(
+                        f"[{tag}:{pytestTag}] masaDiffTool produced empty frame scores "
+                        f"(rc={c.returncode})"
+                    ),
+                    command=command_str,
+                    output=tool_output,
+                )
+                return None, (command_str, tool_output)
+
+            return frame_scores, None
+
     def mld(self, tag, pytestTag, refFile, dutFile):
         mldThisFile = np.zeros(0)
         if not os.path.exists(refFile):
@@ -1442,40 +1757,57 @@ class MLDConformance:
                 else:
                     print(f"<{tag}> PASSED BE TEST")
 
-    def computeCorridor(self, mldRefWithTags, mldCutWithTags, tag, threshold=0.1):
-        indRef = np.argsort(mldRefWithTags["pyTestTag"])
-        indDut = np.argsort(mldCutWithTags["pyTestTag"])
-        refTags = mldRefWithTags["pyTestTag"][indRef]
-        dutTags = mldCutWithTags["pyTestTag"][indDut]
-        refMLD = mldRefWithTags["MLD"][indRef]
-        dutMLD = mldCutWithTags["MLD"][indDut]
-
-        same_shape = refMLD.shape == dutMLD.shape
+    def computeCorridor(
+        self,
+        refWithTags,
+        cutWithTags,
+        tag,
+        threshold=0.1,
+        metricKey="MLD",
+        metricLabel="MLD",
+    ):
+        indRef = np.argsort(refWithTags["pyTestTag"])
+        indDut = np.argsort(cutWithTags["pyTestTag"])
+        refTags = refWithTags["pyTestTag"][indRef]
+        dutTags = cutWithTags["pyTestTag"][indDut]
+        refMetric = refWithTags[metricKey][indRef]
+        dutMetric = cutWithTags[metricKey][indDut]
+
+        same_shape = refMetric.shape == dutMetric.shape
         same_tags = same_shape and np.array_equal(refTags, dutTags)
 
         if same_tags:
-            diff = dutMLD - refMLD
+            diff = dutMetric - refMetric
             maxDiff = float(diff.max()) if diff.size else 0.0
             corridor_failed = maxDiff > threshold
             if corridor_failed:
                 msg = (
-                    f"[{tag}] MLD corridor failed: max(dut-ref)={maxDiff} exceeds threshold={threshold}"
+                    f"[{tag}] {metricLabel} corridor failed: max(dut-ref)={maxDiff} exceeds threshold={threshold}"
                 )
                 self.appendRunlog(context=msg)
                 self.appendFailed(context=msg)
         else:
-            ref_count = refMLD.shape[0]
-            dut_count = dutMLD.shape[0]
-            ref_preview = ", ".join(refTags[:3]) if ref_count else "<empty>"
-            dut_preview = ", ".join(dutTags[:3]) if dut_count else "<empty>"
-            warn_msg = (
-                f"Warning: {tag} corridor comparison skipped because reference and DUT frame tags do not match "
-                f"(ref_count={ref_count}, dut_count={dut_count}, ref_first=[{ref_preview}], dut_first=[{dut_preview}])."
-            )
-            print(f"\033[93m{warn_msg}\033[00m")
-            self.appendRunlog(context=warn_msg)
-            self.appendFailed(context=warn_msg)
-            corridor_failed = True
+            ref_count = refMetric.shape[0]
+            dut_count = dutMetric.shape[0]
+            # If filters are active, frame count mismatch is expected (DUT has fewer tests than reference).
+            # Skip the warning and don't treat it as a failure in this case.
+            if getattr(self.args, "filter_display", None):
+                skip_msg = (
+                    f"[{tag}] {metricLabel} corridor comparison skipped (filtered test set: ref_count={ref_count}, dut_count={dut_count})"
+                )
+                self.appendRunlog(context=skip_msg)
+                corridor_failed = False
+            else:
+                ref_preview = ", ".join(refTags[:3]) if ref_count else "<empty>"
+                dut_preview = ", ".join(dutTags[:3]) if dut_count else "<empty>"
+                warn_msg = (
+                    f"Warning: {tag} {metricLabel} corridor comparison skipped because reference and DUT frame tags do not match "
+                    f"(ref_count={ref_count}, dut_count={dut_count}, ref_first=[{ref_preview}], dut_first=[{dut_preview}])."
+                )
+                print(f"\033[93m{warn_msg}\033[00m")
+                self.appendRunlog(context=warn_msg)
+                self.appendFailed(context=warn_msg)
+                corridor_failed = True
 
         return not corridor_failed
 
@@ -1485,16 +1817,16 @@ class MLDConformance:
         max_mld_value = None
         keys = IVAS_Bins.keys() if selectTag == "all" else [selectTag]
         for tag in keys:
-            if os.path.exists(self.mldcsv[tag]):
+            mdlCutWithTags = None
+            if os.path.exists(self.mldcsv[tag]) and os.path.getsize(self.mldcsv[tag]) > 0:
                 mdlCutWithTags = np.loadtxt(
                     self.mldcsv[tag],
                     delimiter=",",
                     dtype=[("MLD", "f8"), ("pyTestTag", "<U256")],
                 )
                 mdlCutWithTags = np.atleast_1d(mdlCutWithTags)
-                if mdlCutWithTags.size == 0:
-                    continue
 
+            if mdlCutWithTags is not None and mdlCutWithTags.size > 0:
                 bePercent = np.loadtxt(
                     self.sampleStats[tag],
                     delimiter=",",
@@ -1511,43 +1843,49 @@ class MLDConformance:
                 )
                 bePercent = np.atleast_1d(bePercent)
                 maxDiff = np.atleast_1d(maxDiff)
-                if bePercent.size == 0 or maxDiff.size == 0:
-                    continue
-
-                bePercentAvg = np.average(bePercent)
-                maxDiffmax = np.max(maxDiff) * 32768.0
-                mdlValues = mdlCutWithTags["MLD"]
-                N = mdlValues.shape[0]
-                if N == 0:
-                    continue
-                tag_max_mld = float(mdlValues.max())
-                if max_mld_value is None:
-                    max_mld_value = tag_max_mld
-                else:
-                    max_mld_value = max(max_mld_value, tag_max_mld)
-                m0 = np.sum(mdlValues == 0)
-                m05 = np.sum(mdlValues <= 0.5)
-                m1 = np.sum(mdlValues <= 1.0)
-                m2 = np.sum(mdlValues <= 2.0)
-                m5 = np.sum(mdlValues <= 5.0)
-
-                PCNT = lambda num: int(1000 * num / N) / 10.0
-                print(f"\n##########################################################")
-                print(f"<{tag}> Total Frames: {N}")
-                print(f"<{tag}> MAX MLD across all frames : {mdlValues.max()}")
-                print(f"<{tag}> Frames with MLD == 0 : {m0} frames ({PCNT(m0)}%)")
-                print(f"<{tag}> Frames with MLD <= 0.5 : {m05} frames ({PCNT(m05)}%)")
-                print(f"<{tag}> Frames with MLD <= 1 : {m1} frames ({PCNT(m1)}%)")
-                print(f"<{tag}> Frames with MLD <= 2 : {m2} frames ({PCNT(m2)}%)")
-                print(f"<{tag}> Frames with MLD <= 5 : {m5} frames ({PCNT(m5)}%)")
-                print(f"<{tag}> BE samples percentage = {bePercentAvg}")
-                print(
-                    f"<{tag}> max absolute diff = {maxDiffmax}, sample range (-32768, 32767)"
-                )
-                print("##########################################################\n")
+                if bePercent.size > 0 and maxDiff.size > 0:
+                    bePercentAvg = np.average(bePercent)
+                    maxDiffmax = np.max(maxDiff) * 32768.0
+                    mdlValues = mdlCutWithTags["MLD"]
+                    N = mdlValues.shape[0]
+                    if N > 0:
+                        tag_max_mld = float(mdlValues.max())
+                        if max_mld_value is None:
+                            max_mld_value = tag_max_mld
+                        else:
+                            max_mld_value = max(max_mld_value, tag_max_mld)
+                        m0 = np.sum(mdlValues == 0)
+                        m05 = np.sum(mdlValues <= 0.5)
+                        m1 = np.sum(mdlValues <= 1.0)
+                        m2 = np.sum(mdlValues <= 2.0)
+                        m5 = np.sum(mdlValues <= 5.0)
+
+                        PCNT = lambda num: int(1000 * num / N) / 10.0
+                        print(f"\n##########################################################")
+                        print(f"<{tag}> Total Frames: {N}")
+                        print(f"<{tag}> MAX MLD across all frames : {mdlValues.max()}")
+                        print(
+                            f"<{tag}> Frames with MLD == 0 : {m0} frames ({PCNT(m0)}%)"
+                        )
+                        print(
+                            f"<{tag}> Frames with MLD <= 0.5 : {m05} frames ({PCNT(m05)}%)"
+                        )
+                        print(
+                            f"<{tag}> Frames with MLD <= 1 : {m1} frames ({PCNT(m1)}%)"
+                        )
+                        print(
+                            f"<{tag}> Frames with MLD <= 2 : {m2} frames ({PCNT(m2)}%)"
+                        )
+                        print(
+                            f"<{tag}> Frames with MLD <= 5 : {m5} frames ({PCNT(m5)}%)"
+                        )
+                        print(f"<{tag}> BE samples percentage = {bePercentAvg}")
+                        print(
+                            f"<{tag}> max absolute diff = {maxDiffmax}, sample range (-32768, 32767)"
+                        )
 
                 if self.args.regenerate_mld_ref:
-                    # Directly write DUT MLD values to mld_ref2 without reference comparison
+                    # Directly write DUT MLD values to mld_ref2 without reference comparison.
                     new_mld_dir = os.path.join(self.testvDir, "mld_ref2")
                     if not os.path.exists(new_mld_dir):
                         os.makedirs(new_mld_dir, exist_ok=True)
@@ -1567,7 +1905,12 @@ class MLDConformance:
                             dtype=[("MLD", "f8"), ("pyTestTag", "<U256")],
                         )
                         corridor_ok = self.computeCorridor(
-                            mldRefWithTags, mdlCutWithTags, tag
+                            mldRefWithTags,
+                            mdlCutWithTags,
+                            tag,
+                            threshold=0.1,
+                            metricKey="MLD",
+                            metricLabel="MLD",
                         )
                         all_ok = all_ok and corridor_ok
                         corridor_fail_count += int(not corridor_ok)
@@ -1579,13 +1922,95 @@ class MLDConformance:
                         all_ok = False
                         corridor_fail_count += 1
 
-        return all_ok, corridor_fail_count, max_mld_value
+            masaMDCutWithTags = None
+            if os.path.exists(self.masaMDcsv[tag]) and os.path.getsize(self.masaMDcsv[tag]) > 0:
+                masaMDCutWithTags = np.loadtxt(
+                    self.masaMDcsv[tag],
+                    delimiter=",",
+                    dtype=[("MASA", "f8"), ("pyTestTag", "<U256")],
+                )
+                masaMDCutWithTags = np.atleast_1d(masaMDCutWithTags)
+
+            if tag in ReferenceMasaMDFiles:
+                if masaMDCutWithTags is not None and masaMDCutWithTags.size > 0:
+                    masaMDValues = masaMDCutWithTags["MASA"]
+                    nMasaMD = masaMDValues.shape[0]
+                    if nMasaMD > 0:
+                        m0 = np.sum(masaMDValues == 0)
+                        m001 = np.sum(masaMDValues <= 0.01)
+                        m01 = np.sum(masaMDValues <= 0.1)
+                        m1 = np.sum(masaMDValues <= 1.0)
+
+                        PCNT = lambda num: int(1000 * num / nMasaMD) / 10.0
+                        print(f"<{tag}> Total MASA Frames: {nMasaMD}")
+                        print(f"<{tag}> MAX MASA metadata diff across all frames : {masaMDValues.max()}")
+                        print(
+                            f"<{tag}> Frames with MASA metadata diff == 0 : {m0} frames ({PCNT(m0)}%)"
+                        )
+                        print(
+                            f"<{tag}> Frames with MASA metadata diff <= 0.01 : {m001} frames ({PCNT(m001)}%)"
+                        )
+                        print(
+                            f"<{tag}> Frames with MASA metadata  diff <= 0.1 : {m01} frames ({PCNT(m01)}%)"
+                        )
+                        print(
+                            f"<{tag}> Frames with MASA metadata diff <= 1 : {m1} frames ({PCNT(m1)}%)"
+                        )
+                        print("##########################################################\n")
+
+                if self.args.regenerate_mld_ref:
+                    # Regeneration gate for MASA refs: require that at least one expected
+                    # metadata sidecar (.met) exists for this tag in the selected run.
+                    if self._hasAnyProducedMasaMDMetFile(tag):
+                        new_mld_dir = os.path.join(self.testvDir, "mld_ref2")
+                        if not os.path.exists(new_mld_dir):
+                            os.makedirs(new_mld_dir, exist_ok=True)
+                        refMasaMDFile2 = os.path.join(
+                            self.testvDir, "mld_ref2", ReferenceMasaMDFiles[tag]
+                        )
+                        with open(refMasaMDFile2, "w") as f:
+                            if masaMDCutWithTags is not None and masaMDCutWithTags.size > 0:
+                                np.savetxt(f, masaMDCutWithTags, fmt="%s", delimiter=",")
+                elif masaMDCutWithTags is not None and masaMDCutWithTags.size > 0:
+                    refMasaMDFile = os.path.join(
+                        self.testvDir, "mld_ref", ReferenceMasaMDFiles[tag]
+                    )
+                    if os.path.exists(refMasaMDFile):
+                        masaMDRefWithTags = np.loadtxt(
+                            refMasaMDFile,
+                            delimiter=",",
+                            dtype=[("MASA", "f8"), ("pyTestTag", "<U256")],
+                        )
+                        corridor_ok = self.computeCorridor(
+                            masaMDRefWithTags,
+                            masaMDCutWithTags,
+                            tag,
+                            threshold=0.0,
+                            metricKey="MASA",
+                            metricLabel="MASA",
+                        )
+                        all_ok = all_ok and corridor_ok
+                        corridor_fail_count += int(not corridor_ok)
+                    else:
+                        missing_msg = f"Missing reference MASA file for {tag} : {refMasaMDFile}"
+                        print(f"\033[91m{missing_msg} \033[00m")
+                        self.appendRunlog(context=missing_msg)
+                        self.appendFailed(context=missing_msg)
+                        all_ok = False
+                        corridor_fail_count += 1
+
+                return all_ok, corridor_fail_count, max_mld_value
 
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(
         description="Compare .wav files in two folders using mld per frame"
     )
+    parser.add_argument(
+        "--version",
+        action="version",
+        version=f"%(prog)s {PROGRAM_VERSION}",
+    )
 
     parser.add_argument(
         "--testvecDir",
-- 
GitLab


From 1d2493852e6930e50bd71fb2ad5942136fe72fcf Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@dolby.com>
Date: Sun, 29 Mar 2026 13:20:21 +0200
Subject: [PATCH 2/9] conformance: report MASA results and enable REND
 met-trigger

---
 scripts/ivas_conformance/runConformance.py | 48 ++++++++++++++++------
 1 file changed, 36 insertions(+), 12 deletions(-)

diff --git a/scripts/ivas_conformance/runConformance.py b/scripts/ivas_conformance/runConformance.py
index 65cc7eb5a..f998c0e6f 100644
--- a/scripts/ivas_conformance/runConformance.py
+++ b/scripts/ivas_conformance/runConformance.py
@@ -696,12 +696,19 @@ class MLDConformance:
                 if mld_error is not None:
                     return (None, None, mld_error, None)
 
-                if tag in {"DEC", "REND"} and "EXT" in self._outputFormatsInCommand(testDesc.rawCmdline):
+                refMdFile = testDesc.refOutput + ".met"
+                dutMdFile = testDesc.dutOutput + ".met"
+                runMasaMD = (
+                    (tag == "DEC" and "EXT" in self._outputFormatsInCommand(testDesc.rawCmdline))
+                    or (tag == "REND" and (os.path.exists(refMdFile) or os.path.exists(dutMdFile)))
+                )
+
+                if runMasaMD:
                     masaMD_scores, masaMD_error = self.masaMD(
                         tag,
                         dutPytestTag,
-                        refMdFile=testDesc.refOutput + ".met",
-                        dutMdFile=testDesc.dutOutput + ".met",
+                        refMdFile=refMdFile,
+                        dutMdFile=dutMdFile,
                     )
                     if masaMD_error is not None:
                         return (None, None, masaMD_error, None)
@@ -1324,21 +1331,33 @@ class MLDConformance:
             self.doBEanalysis(selectTag=tag)
             analysis_ok = True
             corridor_fail_count = 0
+            masa_comparison_done = False
+            masa_corridor_fail_count = 0
         else:
-            analysis_ok, corridor_fail_count, _ = self.doAnalysis(selectTag=tag)
+            (
+                analysis_ok,
+                corridor_fail_count,
+                _,
+                masa_comparison_done,
+                masa_corridor_fail_count,
+            ) = self.doAnalysis(selectTag=tag)
 
         if self.args.regenerate_mld_ref:
             return command_fail_count == 0 and analysis_ok
 
+        result_details = (
+            f"ERRORS={command_fail_count}, BE={be_count}, NON-BE={non_be_count}, "
+            f"MLD CORRIDOR FAILURES={failure_count}"
+        )
+        if masa_comparison_done:
+            result_details += f", MASA MD CORRIDOR FAILURES={masa_corridor_fail_count}"
+
+        print()
         if command_fail_count == 0 and failure_count == 0 and analysis_ok:
-            print(
-                f"[{tag}] OK (ERRORS={command_fail_count}, BE={be_count}, NON-BE={non_be_count}, MLD CORRIDOR FAILURES={failure_count})\n"
-            )
+            print(f"[{tag}] OK ({result_details})\n")
             return True
 
-        print(
-            f"[{tag}] FAILED (ERRORS={command_fail_count}, BE={be_count}, NON-BE={non_be_count}, MLD CORRIDOR FAILURES={failure_count})"
-        )
+        print(f"[{tag}] FAILED ({result_details})")
         if worst_failure is not None:
             print(
                 f"[{tag}] Worst MLD corridor failure: {worst_failure['prefix']} {worst_failure['tag']} "
@@ -1814,6 +1833,8 @@ class MLDConformance:
     def doAnalysis(self, selectTag="all"):
         all_ok = True
         corridor_fail_count = 0
+        masa_corridor_fail_count = 0
+        masa_comparison_done = False
         max_mld_value = None
         keys = IVAS_Bins.keys() if selectTag == "all" else [selectTag]
         for tag in keys:
@@ -1972,6 +1993,7 @@ class MLDConformance:
                             if masaMDCutWithTags is not None and masaMDCutWithTags.size > 0:
                                 np.savetxt(f, masaMDCutWithTags, fmt="%s", delimiter=",")
                 elif masaMDCutWithTags is not None and masaMDCutWithTags.size > 0:
+                    masa_comparison_done = True
                     refMasaMDFile = os.path.join(
                         self.testvDir, "mld_ref", ReferenceMasaMDFiles[tag]
                     )
@@ -1991,6 +2013,7 @@ class MLDConformance:
                         )
                         all_ok = all_ok and corridor_ok
                         corridor_fail_count += int(not corridor_ok)
+                        masa_corridor_fail_count += int(not corridor_ok)
                     else:
                         missing_msg = f"Missing reference MASA file for {tag} : {refMasaMDFile}"
                         print(f"\033[91m{missing_msg} \033[00m")
@@ -1998,8 +2021,9 @@ class MLDConformance:
                         self.appendFailed(context=missing_msg)
                         all_ok = False
                         corridor_fail_count += 1
+                        masa_corridor_fail_count += 1
 
-                return all_ok, corridor_fail_count, max_mld_value
+        return all_ok, corridor_fail_count, max_mld_value, masa_comparison_done, masa_corridor_fail_count
 
 
 if __name__ == "__main__":
@@ -2133,7 +2157,7 @@ if __name__ == "__main__":
     tag_results = {}
     for tag in testTags:
         if args.report_only:
-            tag_ok, _, _ = conformance.doAnalysis(selectTag=tag)
+            tag_ok, _, _, _, _ = conformance.doAnalysis(selectTag=tag)
         elif not args.analyse:
             tag_ok = conformance.runTag(tag)
         else:
-- 
GitLab


From 4f27f91a3e6da6342b6f50a38408da6587ef4571 Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@dolby.com>
Date: Sun, 29 Mar 2026 13:20:52 +0200
Subject: [PATCH 3/9] docs: clarify non-BE MLD and MASA MD analysis flow

---
 scripts/ivas_conformance/README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/ivas_conformance/README.md b/scripts/ivas_conformance/README.md
index d56974ef0..21b822094 100644
--- a/scripts/ivas_conformance/README.md
+++ b/scripts/ivas_conformance/README.md
@@ -145,7 +145,8 @@ Analysing tests for ISAR   (1252 tests)
 
 ### Perform the MLD based non-BE analysis on the CUT outputs on reference platform (Ubuntu 24.04)
 
-The MLD-based non-BE analysis is performed to the CUT outputs with the command below. Encoded outputs will be decoded using the reference decoder executables as part of the process. The MLD analysis is then performed between the CUT and reference decoded outputs (only ".wav" files are compared). Comparison to MLD corridor is also done as part of this process. An example passing output is shown below. If all test sets print `MLD Corridor passed for...` and there were no non-BE metadata comparisons in BE-test, then CUT outputs are Non-BE conformant.
+The non-BE analysis below compares CUT and reference outputs by running MLD on audio (`.wav`) and, when MASA metadata are generated, for the matching reference/DUT `.met` files. For encoder tests, encoded CUT bitstreams are first decoded with the reference decoder before analysis. Per-frame MLD and MASA metadata values are written to `scripts/CUT_OUTPUTS` and checked against corridor references in `testvec/testv/mld_ref` (`mld_ref_<TAG>.csv` and `masa_ref_<TAG>.csv`).
+
 
 ```shell
 PYTHONPATH=scripts python scripts/ivas_conformance/runConformance.py --testvecDir $PWD/testvec --ref_build_path=testvec/bin --analyse
-- 
GitLab


From d9c0afe190b5ebef2f1b50c64927a7aff26381d2 Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@dolby.com>
Date: Fri, 10 Apr 2026 14:49:10 +0200
Subject: [PATCH 4/9] replace shlex with subprocess.run

---
 scripts/ivas_conformance/runConformance.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/scripts/ivas_conformance/runConformance.py b/scripts/ivas_conformance/runConformance.py
index ad0d89487..fb9106fab 100644
--- a/scripts/ivas_conformance/runConformance.py
+++ b/scripts/ivas_conformance/runConformance.py
@@ -38,20 +38,18 @@ import re
 import numpy as np
 import subprocess
 import tempfile
+import filecmp
 import sys
-import shlex
 from typing import Tuple
 from multiprocessing import Pool
+import warnings
+import math
 from dataclasses import dataclass
 from typing import Union
 import shutil
 import scipy.io.wavfile as wav
 import warnings
 import math
-import scipy.signal as sig
-import filecmp
-import time
-
 sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), ".."))
 
 
@@ -1529,14 +1527,13 @@ class MLDConformance:
                 tmpdir, f"{tempfile.gettempprefix()}_{tag}_{pytestTag}_masa.csv"
             )
             command = [self.masadiffbin, "--csv", masaMDCsvFile, refMdFile, dutMdFile]
-            command_str = " ".join(shlex.quote(x) for x in command)
+            command_str = " ".join(command)
 
             c = subprocess.run(
-                command_str,
+                command,
                 stdout=subprocess.PIPE,
                 stderr=subprocess.STDOUT,
                 text=True,
-                shell=True,
             )
 
             tool_output = c.stdout or ""
-- 
GitLab


From a0745a93711e3feae202073fe44ff37fad0d64e2 Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@dolby.com>
Date: Fri, 10 Apr 2026 15:05:34 +0200
Subject: [PATCH 5/9] cleanup duplicate imports and restore missing symbols

---
 scripts/ivas_conformance/runConformance.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/scripts/ivas_conformance/runConformance.py b/scripts/ivas_conformance/runConformance.py
index fb9106fab..14841530b 100644
--- a/scripts/ivas_conformance/runConformance.py
+++ b/scripts/ivas_conformance/runConformance.py
@@ -40,6 +40,7 @@ import subprocess
 import tempfile
 import filecmp
 import sys
+import time
 from typing import Tuple
 from multiprocessing import Pool
 import warnings
@@ -48,8 +49,7 @@ from dataclasses import dataclass
 from typing import Union
 import shutil
 import scipy.io.wavfile as wav
-import warnings
-import math
+import scipy.signal as sig
 sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), ".."))
 
 
@@ -197,6 +197,8 @@ IVAS_Bins = {
     "ISAR": "ISAR_post_rend",
 }
 
+DECODER_OUTPUT_FORMATS = {"MONO", "STEREO", "EXT"}
+
 
 def validate_build_binaries(parser, build_path: str, build_label: str) -> None:
     """Validate that a build path exists and contains all IVAS binaries."""
-- 
GitLab


From ea312fbc2dd76475f24a384d5edc3cf8f418eebc Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@dolby.com>
Date: Mon, 13 Apr 2026 09:52:42 +0200
Subject: [PATCH 6/9] renaming

---
 scripts/ivas_conformance/runConformance.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/ivas_conformance/runConformance.py b/scripts/ivas_conformance/runConformance.py
index 14841530b..af8d67b22 100644
--- a/scripts/ivas_conformance/runConformance.py
+++ b/scripts/ivas_conformance/runConformance.py
@@ -1507,7 +1507,7 @@ class MLDConformance:
         dut_exists = os.path.exists(dutMdFile)
 
         if not ref_exists and not dut_exists:
-            # MASA metadata sidecars are optional for some test/output combinations.
+            # MASA metadata companion files (.met) are optional for some test/output combinations.
             # If both sides are missing, skip MASA diff for this test.
             return np.zeros(0, dtype=float), None
 
@@ -1985,7 +1985,7 @@ class MLDConformance:
 
                 if self.args.regenerate_mld_ref:
                     # Regeneration gate for MASA refs: require that at least one expected
-                    # metadata sidecar (.met) exists for this tag in the selected run.
+                    # metadata companion file (.met) exists for this tag in the selected run.
                     if self._hasAnyProducedMasaMDMetFile(tag):
                         new_mld_dir = os.path.join(self.testvDir, "mld_ref2")
                         if not os.path.exists(new_mld_dir):
-- 
GitLab


From 5185f537c8585b1a4888bdbb9de2445c9b591d28 Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@dolby.com>
Date: Mon, 13 Apr 2026 20:28:15 +0200
Subject: [PATCH 7/9] fix: BE test summary reports FAILED when non-BE tests are
 present

---
 scripts/ivas_conformance/runConformance.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/ivas_conformance/runConformance.py b/scripts/ivas_conformance/runConformance.py
index af8d67b22..76e6e98ed 100644
--- a/scripts/ivas_conformance/runConformance.py
+++ b/scripts/ivas_conformance/runConformance.py
@@ -1321,7 +1321,7 @@ class MLDConformance:
 
         if self.args.be_test:
             self.doBEanalysis(selectTag=tag)
-            analysis_ok = True
+            analysis_ok = non_be_count == 0
             corridor_fail_count = 0
             masa_comparison_done = False
             masa_corridor_fail_count = 0
-- 
GitLab


From 39be217b568933255c89599e964cbabfe22b20c8 Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@dolby.com>
Date: Mon, 13 Apr 2026 20:41:01 +0200
Subject: [PATCH 8/9] refine BE conformance summary output

---
 scripts/ivas_conformance/runConformance.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/scripts/ivas_conformance/runConformance.py b/scripts/ivas_conformance/runConformance.py
index 76e6e98ed..25468facf 100644
--- a/scripts/ivas_conformance/runConformance.py
+++ b/scripts/ivas_conformance/runConformance.py
@@ -1320,8 +1320,7 @@ class MLDConformance:
         self.flushErrorBlocks()
 
         if self.args.be_test:
-            self.doBEanalysis(selectTag=tag)
-            analysis_ok = non_be_count == 0
+            analysis_ok, be_failure_csv = self.doBEanalysis(selectTag=tag)
             corridor_fail_count = 0
             masa_comparison_done = False
             masa_corridor_fail_count = 0
@@ -1350,7 +1349,9 @@ class MLDConformance:
             return True
 
         print(f"[{tag}] FAILED ({result_details})")
-        if worst_failure is not None:
+        if self.args.be_test and be_failure_csv is not None:
+            print(f"[{tag}] BE test failed, check {be_failure_csv}")
+        elif worst_failure is not None:
             print(
                 f"[{tag}] Worst MLD corridor failure: {worst_failure['prefix']} {worst_failure['tag']} "
                 f"(NON-BE, MLD_MAX={worst_failure['mld']})"
@@ -1776,9 +1777,10 @@ class MLDConformance:
                     usecols=1,
                 )
                 if np.sum(BEresult) > 0:
-                    print(f"<{tag}> FAILED BE TEST, check {self.BEcsv[tag]}")
-                else:
-                    print(f"<{tag}> PASSED BE TEST")
+                    return False, self.BEcsv[tag]
+                return True, None
+
+        return True, None
 
     def computeCorridor(
         self,
-- 
GitLab


From 07b2d08086853b4d95b534bd01463207fb8cc477 Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@dolby.com>
Date: Mon, 13 Apr 2026 20:43:32 +0200
Subject: [PATCH 9/9] omit MLD corridor count from BE summary

---
 scripts/ivas_conformance/runConformance.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/scripts/ivas_conformance/runConformance.py b/scripts/ivas_conformance/runConformance.py
index 25468facf..1a0935353 100644
--- a/scripts/ivas_conformance/runConformance.py
+++ b/scripts/ivas_conformance/runConformance.py
@@ -1336,10 +1336,9 @@ class MLDConformance:
         if self.args.regenerate_mld_ref:
             return command_fail_count == 0 and analysis_ok
 
-        result_details = (
-            f"ERRORS={command_fail_count}, BE={be_count}, NON-BE={non_be_count}, "
-            f"MLD CORRIDOR FAILURES={failure_count}"
-        )
+        result_details = f"ERRORS={command_fail_count}, BE={be_count}, NON-BE={non_be_count}"
+        if not self.args.be_test:
+            result_details += f", MLD CORRIDOR FAILURES={failure_count}"
         if masa_comparison_done:
             result_details += f", MASA MD CORRIDOR FAILURES={masa_corridor_fail_count}"
 
-- 
GitLab