diff --git a/scripts/ivas_conformance/README.md b/scripts/ivas_conformance/README.md
index 19763d14bab556954f85608cd6410bb43eb446d1..1aaced54d2036e6caa2dc8b5e2b23073924c9b7e 100644
--- a/scripts/ivas_conformance/README.md
+++ b/scripts/ivas_conformance/README.md
@@ -1,4 +1,4 @@
-# IVAS Conformance Scripts
+# IVAS Conformance Scripts, ver 3.1
 
 This folder contains scripts for running IVAS conformance tests.
 
@@ -48,12 +48,11 @@ Running the conformance tests requires around 30 gb of disk space and around 6 g
 
 To run CUT binaries on the targeted platform, it is necessary to have the correct setup for python and dependency packages.
 
-- Create virtual environment for Python 3.13 and install requirements
+- Create virtual environment for Python 3.13 and install requirements from the root folder of the IVAS codebase
 
   ```shell
   python3.13 -m venv pyConformance
   source pyConformance/bin/activate
-  cd ivas-codec
   python -m pip install -r scripts/ivas_conformance/requirements.txt
   ```
 
@@ -67,25 +66,37 @@ PYTHONPATH=scripts python scripts/ivas_conformance/runConformance.py --testvecDi
 
 <details>
 <summary>Example Output of CUT execution</summary>
-<pre><code>
+
+```console
 Accumulating commands from Readme_IVAS_dec.txt
 Accumulating commands from Readme_IVAS_rend.txt
 Accumulating commands from Readme_IVAS_enc.txt
 Accumulating commands from Readme_IVAS_ISAR_post_rend.txt
 Accumulating commands from Readme_IVAS_ISAR_dec.txt
 Accumulating commands from Readme_IVAS_JBM_dec.txt
+
 No of tests :
     ENC : 381
     DEC : 637
     REND : 666
     ISAR_ENC : 1032
     ISAR : 1032
-Executing tests for ENC   (381 tests)
-Executing tests for DEC   (637 tests)
-Executing tests for REND   (666 tests)
-Executing tests for ISAR_ENC   (1032 tests)
-Executing tests for ISAR   (1032 tests)
-</code></pre>
+
+Executing tests for ENC  (381 tests):
+---------------------------
+[ENC] OK
+
+...
+
+Summary of results:
+---------------------
+[ENC] OK
+[DEC] OK
+[REND] OK
+[ISAR_ENC] OK
+[ISAR] OK
+```
+
 </details>
 
 This should generate outputs in scripts/CUT_OUTPUTS folder which looks like below:-
@@ -106,7 +117,7 @@ If CUT test execution is done on a different platform, the scripts/CUT_OUTPUTS m
 
 ### Perform the BE comparison on the CUT outputs on reference platform
 
-The BE comparison is performed to the CUT outputs using the command below. Encoded outputs will be decoded using the reference decoder executables as part of the process. The BE comparison is then performed between the CUT and reference decoded outputs. This includes comparison of ".wav"-files, and ".csv" and ".met" metadata files. If any non-BE results are observed, this is reported on the command-line and link to an analysis ".csv" file is given. The analysis file shows which exact files were non-BE. An example passing output is shown below. If all test sets print `PASSED BE TEST`, then CUT outputs are BE-conformant.
+The BE comparison is performed to the CUT outputs using the command below. Encoded outputs are decoded using the reference decoder executables as part of the process. The BE comparison is then performed between the CUT and reference decoded outputs. This includes comparison of `.wav` files and `.csv`/`.met` metadata files. If non-BE results are observed, this is reported on the command line and in the generated analysis CSV output.
   
 ```shell
 PYTHONPATH=scripts python scripts/ivas_conformance/runConformance.py --testvecDir $PWD/testvec --ref_build_path=testvec/bin --analyse --be-test
@@ -114,30 +125,39 @@ PYTHONPATH=scripts python scripts/ivas_conformance/runConformance.py --testvecDi
 
 <details>
 <summary>Example Output of BE comparison</summary>
-<pre><code>
+
+```console
 Accumulating commands from Readme_IVAS_dec.txt
 Accumulating commands from Readme_IVAS_enc.txt
 Accumulating commands from Readme_IVAS_rend.txt
 Accumulating commands from Readme_IVAS_JBM_dec.txt
 Accumulating commands from Readme_IVAS_ISAR_dec.txt
 Accumulating commands from Readme_IVAS_ISAR_post_rend.txt
+
 No of tests :
     ENC : 374
     DEC : 638
     REND : 911
     ISAR_ENC : 1032
     ISAR : 1252
-Analysing tests for ENC   (374 tests)
-&lt;ENC&gt; PASSED BE TEST
-Analysing tests for DEC   (638 tests)
-&lt;DEC&gt; PASSED BE TEST
-Analysing tests for REND   (911 tests)
-&lt;REND&gt; PASSED BE TEST
-Analysing tests for ISAR_ENC   (1032 tests)
-&lt;ISAR_ENC&gt; PASSED BE TEST
-Analysing tests for ISAR   (1252 tests)
-&lt;ISAR&gt; PASSED BE TEST
-</code></pre>
+
+Analysing tests for ENC  (374 tests):
+---------------------------
+
+[ENC] OK (ERRORS=0, BE=374, NON-BE=0, MLD CORRIDOR FAILURES=0)
+[DEC] OK (ERRORS=0, BE=638, NON-BE=0, MLD CORRIDOR FAILURES=0)
+
+...
+
+Summary of results:
+---------------------
+[ENC] OK
+[DEC] OK
+[REND] OK
+[ISAR_ENC] OK
+[ISAR] OK
+```
+
 </details>
 
 ### Perform the MLD based non-BE analysis on the CUT outputs on reference platform (Ubuntu 24.04)
@@ -151,187 +171,105 @@ PYTHONPATH=scripts python scripts/ivas_conformance/runConformance.py --testvecDi
 
 <details>
 <summary>Example Output of non-BE analysis</summary>
-<pre><code>
+
+```console
 Accumulating commands from Readme_IVAS_dec.txt
 Accumulating commands from Readme_IVAS_enc.txt
 Accumulating commands from Readme_IVAS_rend.txt
 Accumulating commands from Readme_IVAS_JBM_dec.txt
 Accumulating commands from Readme_IVAS_ISAR_dec.txt
 Accumulating commands from Readme_IVAS_ISAR_post_rend.txt
+
 No of tests :
     ENC : 374
     DEC : 638
     REND : 911
     ISAR_ENC : 1032
     ISAR : 1252
-Analysing tests for ENC   (374 tests)
-
-##########################################################
-&lt;ENC&gt; Total Frames: 3074220
-&lt;ENC&gt; MAX MLD across all frames : 0.0
-&lt;ENC&gt; Frames with MLD == 0 : 3074220 frames (100.0%)
-&lt;ENC&gt; Frames with MLD <= 0.5 : 3074220 frames (100.0%)
-&lt;ENC&gt; Frames with MLD <= 1 : 3074220 frames (100.0%)
-&lt;ENC&gt; Frames with MLD <= 2 : 3074220 frames (100.0%)
-&lt;ENC&gt; Frames with MLD <= 5 : 3074220 frames (100.0%)
-&lt;ENC&gt; BE samples percentage = 100.0
-&lt;ENC&gt; max absolute diff = 0.0, sample range (-32768, 32767)
-##########################################################
-
-MLD Corridor passed for ENC with max MLD diff of 0.0
-Analysing tests for DEC   (638 tests)
-
-##########################################################
-&lt;DEC&gt; Total Frames: 5079252
-&lt;DEC&gt; MAX MLD across all frames : 0.0
-&lt;DEC&gt; Frames with MLD == 0 : 5079252 frames (100.0%)
-&lt;DEC&gt; Frames with MLD <= 0.5 : 5079252 frames (100.0%)
-&lt;DEC&gt; Frames with MLD <= 1 : 5079252 frames (100.0%)
-&lt;DEC&gt; Frames with MLD <= 2 : 5079252 frames (100.0%)
-&lt;DEC&gt; Frames with MLD <= 5 : 5079252 frames (100.0%)
-&lt;DEC&gt; BE samples percentage = 100.0
-&lt;DEC&gt; max absolute diff = 0.0, sample range (-32768, 32767)
-##########################################################
 
-MLD Corridor passed for DEC with max MLD diff of 0.0
-Analysing tests for REND   (911 tests)
+Analysing tests for ENC  (374 tests):
+---------------------------
 
 ##########################################################
-&lt;REND&gt; Total Frames: 5576907
-&lt;REND&gt; MAX MLD across all frames : 0.0
-&lt;REND&gt; Frames with MLD == 0 : 5576907 frames (100.0%)
-&lt;REND&gt; Frames with MLD <= 0.5 : 5576907 frames (100.0%)
-&lt;REND&gt; Frames with MLD <= 1 : 5576907 frames (100.0%)
-&lt;REND&gt; Frames with MLD <= 2 : 5576907 frames (100.0%)
-&lt;REND&gt; Frames with MLD <= 5 : 5576907 frames (100.0%)
-&lt;REND&gt; BE samples percentage = 100.0
-&lt;REND&gt; max absolute diff = 0.0, sample range (-32768, 32767)
+<ENC> Total Frames: 3074220
+<ENC> MAX MLD across all frames : 0.0
+<ENC> Frames with MLD == 0 : 3074220 frames (100.0%)
+<ENC> Frames with MLD <= 0.5 : 3074220 frames (100.0%)
+<ENC> Frames with MLD <= 1 : 3074220 frames (100.0%)
+<ENC> Frames with MLD <= 2 : 3074220 frames (100.0%)
+<ENC> Frames with MLD <= 5 : 3074220 frames (100.0%)
+<ENC> BE samples percentage = 100.0
+<ENC> max absolute diff = 0.0, sample range (-32768, 32767)
 ##########################################################
 
-MLD Corridor passed for REND with max MLD diff of 0.0
-Analysing tests for ISAR_ENC   (1032 tests)
+[ENC] OK (ERRORS=0, BE=374, NON-BE=0, MLD CORRIDOR FAILURES=0)
 
-##########################################################
-&lt;ISAR_ENC&gt; Total Frames: 2125956
-&lt;ISAR_ENC&gt; MAX MLD across all frames : 0.0
-&lt;ISAR_ENC&gt; Frames with MLD == 0 : 2125956 frames (100.0%)
-&lt;ISAR_ENC&gt; Frames with MLD <= 0.5 : 2125956 frames (100.0%)
-&lt;ISAR_ENC&gt; Frames with MLD <= 1 : 2125956 frames (100.0%)
-&lt;ISAR_ENC&gt; Frames with MLD <= 2 : 2125956 frames (100.0%)
-&lt;ISAR_ENC&gt; Frames with MLD <= 5 : 2125956 frames (100.0%)
-&lt;ISAR_ENC&gt; BE samples percentage = 100.0
-&lt;ISAR_ENC&gt; max absolute diff = 0.0, sample range (-32768, 32767)
-##########################################################
+...
 
-MLD Corridor passed for ISAR_ENC with max MLD diff of 0.0
-Analysing tests for ISAR   (1252 tests)
-
-##########################################################
-&lt;ISAR&gt; Total Frames: 2590956
-&lt;ISAR&gt; MAX MLD across all frames : 0.0
-&lt;ISAR&gt; Frames with MLD == 0 : 2590956 frames (100.0%)
-&lt;ISAR&gt; Frames with MLD <= 0.5 : 2590956 frames (100.0%)
-&lt;ISAR&gt; Frames with MLD <= 1 : 2590956 frames (100.0%)
-&lt;ISAR&gt; Frames with MLD <= 2 : 2590956 frames (100.0%)
-&lt;ISAR&gt; Frames with MLD <= 5 : 2590956 frames (100.0%)
-&lt;ISAR&gt; BE samples percentage = 100.0
-&lt;ISAR&gt; max absolute diff = 0.0, sample range (-32768, 32767)
-##########################################################
+Summary of results:
+---------------------
+[ENC] OK
+[DEC] OK
+[REND] OK
+[ISAR_ENC] OK
+[ISAR] OK
+```
 
-MLD Corridor passed for ISAR with max MLD diff of 0.0
-</code></pre>
 </details>
 
 ## Executing specific tests only
 
-All CUT tests can be run specifically for IVAS Encoder,IVAS Decoder,IVAS Renderer, ISAR Encoder and ISAR Decoder only. The commandline allows for ```-test-mode=<PARAM>``` for this functionality, examples:
-
-- Run CUT IVAS Encoder Tests Only (on Target Platform)
-
-  ```shell
-  PYTHONPATH=scripts python scripts/ivas_conformance/runConformance.py --testvecDir $PWD/testvec --cut_build_path=CUT_BIN_DIR --test-mode=ENC
-  ```
-
-- Analyse BE conformance for CUT IVAS Encoder Outputs Only (on Reference Platform)
-
-  ```shell
-  PYTHONPATH=scripts python scripts/ivas_conformance/runConformance.py --testvecDir $PWD/testvec --ref_build_path=testvec/bin --test-mode=ENC --analyse --be-test
-  ```
-
-- Analyse NON-BE conformance for CUT IVAS Encoder Outputs Only (on Reference Platform)
-
-  ```shell
-  PYTHONPATH=scripts python scripts/ivas_conformance/runConformance.py --testvecDir $PWD/testvec --ref_build_path=testvec/bin --test-mode=ENC --analyse
-  ```
-
-- Run CUT IVAS Decoder Tests Only (on Target Platform)
-
-  ```shell
-  PYTHONPATH=scripts python scripts/ivas_conformance/runConformance.py --testvecDir $PWD/testvec --cut_build_path=CUT_BIN_DIR --test-mode=DEC
-  ```
-
-- Analyse BE conformance for CUT IVAS Decoder Outputs Only
-
-  ```shell
-  PYTHONPATH=scripts python scripts/ivas_conformance/runConformance.py --testvecDir $PWD/testvec --test-mode=DEC --analyse --be-test
-  ```
-
-- Analyse NON-BE conformance CUT IVAS Decoder Outputs Only (on Reference Platform)
-
-  ```shell
-  PYTHONPATH=scripts python scripts/ivas_conformance/runConformance.py --testvecDir $PWD/testvec  --test-mode=DEC --analyse
-  ```
-
-- Run CUT IVAS Renderer Tests Only (on Target Platform)
-
-  ```shell
-  PYTHONPATH=scripts python scripts/ivas_conformance/runConformance.py --testvecDir $PWD/testvec --cut_build_path=CUT_BIN_DIR --test-mode=REND
-  ```
-
-- Analyse BE conformance for CUT Renderer Outputs Only
-
-  ```shell
-  PYTHONPATH=scripts python scripts/ivas_conformance/runConformance.py --testvecDir $PWD/testvec --test-mode=REND --analyse --be-test
-  ```
+Use `--filter TOKEN` to select test groups, output formats, and apply substring matching.
 
-- Analyse NON-BE conformance CUT Renderer Outputs Only
+- Token types:
+  - `ENC`, `DEC`, `REND`, `ISAR`, `ISAR_ENC`: test groups.
+  - `MONO`, `STEREO`, `EXT`, `HOA`, `SBA`, `MC`: output-format tokens.
+    - `HOA` expands to `HOA2`, `HOA3`.
+    - `SBA` expands to `FOA`, `HOA2`, `HOA3`.
+    - `MC` expands to `5_1`, `7_1`, `5_1_4`, `5_1_2`, `7_1_4`.
+  - Any other token is treated as a case-insensitive substring match.
+- Token modifiers:
+  - `TOKEN`: restrictive token. Multiple restrictive tokens combine with logical AND.
+  - `+TOKEN`: additive token. Adds matching tests even if they would otherwise be excluded.
+  - `-TOKEN`: subtractive token. Removes matching tests from the final selection.
+  - `TOKEN*`: wildcard token. Matches all known tokens starting with the given prefix.
+  - `+TOKEN*`, `-TOKEN*`: wildcard token with add/remove behavior.
+  - **Note**: wildcards must be quoted in shell (e.g. `'ISAR*'`) to prevent the shell from expanding them as filename globs before the script receives them.
 
-  ```shell
-  PYTHONPATH=scripts python scripts/ivas_conformance/runConformance.py --testvecDir $PWD/testvec --test-mode=REND --analyse
-  ```
+Examples (non-BE):
 
-- Run CUT ISAR Encoder Tests Only (on Target Platform)
+- Default behavior: run all test groups (ENC, DEC, REND, ISAR, ISAR_ENC)
 
   ```shell
-  PYTHONPATH=scripts python scripts/ivas_conformance/runConformance.py --testvecDir $PWD/testvec --cut_build_path=CUT_BIN_DIR --test-mode=ISAR_ENC
+  PYTHONPATH=scripts python scripts/ivas_conformance/runConformance.py --testvecDir $PWD/testvec --ref_build_path=testvec/bin --analyse
   ```
 
-- Analyse BE conformance for CUT ISAR Encoder Outputs Only (on Reference Platform)
+- Run only REND, ISAR, and ISAR_ENC test groups
 
   ```shell
-  PYTHONPATH=scripts python scripts/ivas_conformance/runConformance.py --testvecDir $PWD/testvec --ref_build_path=testvec/bin --test-mode=ISAR_ENC --analyse --be-test
+  PYTHONPATH=scripts python scripts/ivas_conformance/runConformance.py --testvecDir $PWD/testvec --ref_build_path=testvec/bin --analyse --filter REND ISAR ISAR_ENC
   ```
 
-- Analyse NON-BE conformance for CUT ISAR Encoder Outputs Only (on Reference Platform)
+- Run DEC tests with HOA output formats
 
   ```shell
-  PYTHONPATH=scripts python scripts/ivas_conformance/runConformance.py --testvecDir $PWD/testvec --ref_build_path=testvec/bin --test-mode=ISAR_ENC --analyse
+  PYTHONPATH=scripts python scripts/ivas_conformance/runConformance.py --testvecDir $PWD/testvec --ref_build_path=testvec/bin --analyse --filter DEC HOA
   ```
 
-- Run CUT ISAR Decoder Tests Only (on Target Platform)
+- Run DEC tests excluding those matching 'voip'
 
   ```shell
-  PYTHONPATH=scripts python scripts/ivas_conformance/runConformance.py --testvecDir $PWD/testvec --cut_build_path=CUT_BIN_DIR --test-mode=ISAR
+  PYTHONPATH=scripts python scripts/ivas_conformance/runConformance.py --testvecDir $PWD/testvec --ref_build_path=testvec/bin --analyse --filter DEC -voip
   ```
 
-- Analyse BE conformance for CUT ISAR Decoder Outputs Only
+- Run DEC tests with any BINAURAL output format
 
   ```shell
-  PYTHONPATH=scripts python scripts/ivas_conformance/runConformance.py --testvecDir $PWD/testvec  --test-mode=ISAR --analyse --be-test
+  PYTHONPATH=scripts python scripts/ivas_conformance/runConformance.py --testvecDir $PWD/testvec --ref_build_path=testvec/bin --analyse --filter DEC 'BINAURAL*'
   ```
 
-- Analyse NON-BE conformance CUT ISAR Decoder Outputs Only
+- Run ISAR and ISAR_ENC groups, excluding 'voip' tests
 
   ```shell
-  PYTHONPATH=scripts python scripts/ivas_conformance/runConformance.py --testvecDir $PWD/testvec  --test-mode=ISAR --analyse
+  PYTHONPATH=scripts python scripts/ivas_conformance/runConformance.py --testvecDir $PWD/testvec --ref_build_path=testvec/bin --analyse --filter 'ISAR*' -voip
   ```
diff --git a/scripts/ivas_conformance/README_Package_Gen.md b/scripts/ivas_conformance/README_Package_Gen.md
index ed313db63de83a60af0b23678ad87fd4e76da71b..157bb6efc8653b17ce1da2b11d94ad5d457bdd08 100644
--- a/scripts/ivas_conformance/README_Package_Gen.md
+++ b/scripts/ivas_conformance/README_Package_Gen.md
@@ -42,12 +42,11 @@ This folder contains scripts for running IVAS conformance tests.
   sudo apt install python3.13 python3.13-venv
   ```
 
-- Create virtual environment for Python 3.13 and install requirements
+- Create virtual environment for Python 3.13 and install requirements from the root folder of the IVAS codebase
 
   ```shell
   python3.13 -m venv pyConformance
   source pyConformance/bin/activate
-  cd ivas-codec
   python -m pip install -r tests/requirements.txt
   ```
 
diff --git a/scripts/ivas_conformance/runConformance.py b/scripts/ivas_conformance/runConformance.py
index 574d7be136bdd1debb5b0f91a3dc187255308428..87150fa9e79bbab883639f6871655ceae947ad82 100644
--- a/scripts/ivas_conformance/runConformance.py
+++ b/scripts/ivas_conformance/runConformance.py
@@ -43,7 +43,6 @@ import sys
 import time
 from typing import Tuple
 from multiprocessing import Pool
-import warnings
 import math
 from dataclasses import dataclass
 from typing import Union
@@ -53,6 +52,64 @@ import scipy.signal as sig
 sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), ".."))
 
 
+
+def _preprocess_filter_args():
+    """Preprocess sys.argv to handle --filter with minus-prefixed tokens.
+    
+    Argparse stops consuming args when it encounters e.g. '-TERM', treating it as a flag.
+    This function escapes filter tokens by wrapping minus-prefixed tokens with a marker,
+    allowing argparse to treat them as regular arguments.
+    
+    Stores the escape mapping in _FILTER_ESCAPES for later unescaping.
+    
+    Returns: modified sys.argv with filter tokens escaped
+    """
+    global _FILTER_ESCAPES
+    _FILTER_ESCAPES = {}
+    marker_prefix = "@FILT_"
+    result_argv = []
+    i = 0
+    
+    while i < len(sys.argv):
+        arg = sys.argv[i]
+        
+        # When we see --filter, process all following tokens until next option (--) or end
+        if arg == "--filter":
+            result_argv.append(arg)
+            i += 1
+            
+            # Collect all filter tokens, escaping those that look like flags
+            while i < len(sys.argv):
+                tok = sys.argv[i]
+                
+                # Stop if we hit the next option (--something or -X where X is not escaped)
+                if tok.startswith("--") or (tok.startswith("-") and len(tok) <= 2 and tok != "-"):
+                    # Exception: stop only if it's a known argparse option
+                    known_short_options = {"-h", "--help"}
+                    if tok in known_short_options or tok.startswith("--"):
+                        break
+                    break
+                
+                # Escape tokens starting with - to protect them from argparse
+                if tok.startswith("-"):
+                    escaped = f"{marker_prefix}{len(_FILTER_ESCAPES)}"
+                    _FILTER_ESCAPES[escaped] = tok
+                    result_argv.append(escaped)
+                else:
+                    result_argv.append(tok)
+                
+                i += 1
+        else:
+            result_argv.append(arg)
+            i += 1
+    
+
+    return result_argv
+
+
+# Module-level dict to track filter token escapes
+_FILTER_ESCAPES = {}
+
 def readfile(
     filename: str, nchannels: int = 1, fs: int = 48000, outdtype="float"
 ) -> Tuple[np.ndarray, int]:
@@ -131,7 +188,7 @@ def writefile(filename: str, x: np.ndarray, fs: int = 48000) -> None:
         np.logical_or(x < np.iinfo(np.int16).min, x > np.iinfo(np.int16).max)
     )
     if clipped_samples > 0:
-        warnings.warn(f"  Warning: {clipped_samples} samples clipped")
+        # do not issue warning for clipping as it is expected that some implementations may produce out of bound values and the reference implementation clips them. Instead, just count the number of clipped samples and clip the values to int16 range before writing to file to avoid overflow issues.
         x = np.clip(x, np.iinfo(np.int16).min, np.iinfo(np.int16).max)
 
     if file_extension == ".wav":
@@ -197,7 +254,34 @@ IVAS_Bins = {
     "ISAR": "ISAR_post_rend",
 }
 
-DECODER_OUTPUT_FORMATS = {"MONO", "STEREO", "EXT"}
+is_windows = platform.system() == "Windows"
+
+# Automatically append .exe on Windows
+if is_windows:
+    IVAS_Bins = {k: f"{v}.exe" for k, v in IVAS_Bins.items()}
+
+DECODER_OUTPUT_FORMATS = {
+    "MONO",
+    "STEREO",
+    "BINAURAL",
+    "BINAURAL_ROOM_IR",
+    "BINAURAL_ROOM_REVERB",
+    "5_1",
+    "7_1",
+    "5_1_4",
+    "5_1_2",
+    "7_1_4",
+    "FOA",
+    "HOA2",
+    "HOA3",
+    "EXT",
+}
+
+DECODER_OUTPUT_FORMAT_ALIASES = {
+    "HOA": {"HOA2", "HOA3"},
+    "SBA": {"FOA", "HOA2", "HOA3"},
+    "MC": {"5_1", "7_1", "5_1_4", "5_1_2", "7_1_4"},
+}
 
 
 def validate_build_binaries(parser, build_path: str, build_label: str) -> None:
@@ -209,16 +293,10 @@ def validate_build_binaries(parser, build_path: str, build_label: str) -> None:
         )
 
     missing = []
-    is_windows = platform.system() == "Windows"
     for tag, binary in IVAS_Bins.items():
         candidate = os.path.join(abs_build_path, binary)
-        candidate_exe = f"{candidate}.exe"
-        exists = os.path.isfile(candidate) or (
-            is_windows and os.path.isfile(candidate_exe)
-        )
-        if not exists:
-            shown = candidate_exe if is_windows else candidate
-            missing.append(f"{tag}: {shown}")
+        if not os.path.isfile(candidate):
+            missing.append(f"{tag}: {candidate}")
 
     if missing:
         parser.error(f"Missing {build_label} binaries:\n  - " + "\n  - ".join(missing))
@@ -238,7 +316,7 @@ ReferenceMasaMDFiles = {
     "REND": "masa_ref_REND.csv",
 }
 
-PROGRAM_VERSION = "4.0"
+PROGRAM_VERSION = "3.1"
 
 
 @dataclass
@@ -375,7 +453,7 @@ class MLDConformance:
 
     def setupDUT(self):
         self.cut_build_path = self.args.cut_build_path
-        self.filter = self.args.filter
+        self.filter = getattr(self.args, "filter_display", self.args.filter)
         exe_platform = platform.system()
         if exe_platform == "Windows":
             exe_platform = "Win64"
@@ -614,10 +692,6 @@ class MLDConformance:
                 )
             else:
                 print(f"{pyTestTag} not found in ISAR decoder")
-        print("No of tests :")
-        for tag in testDesciptor.keys():
-            print(f"    {tag} : {len(testDesciptor[tag])}")
-
         return testDesciptor
 
     def genEncoderReferences(self, tag: str, encPytestTag: str):
@@ -962,7 +1036,8 @@ class MLDConformance:
         self, tag: str, pyTestsTag: str, testIndex: int = 0, totalTests: int = 0
     ):
         # Run CUT Cmdline
-        testPrefix = f"[{tag} {testIndex}/{totalTests}]"
+        _w = len(str(totalTests))
+        testPrefix = f"[{tag} {testIndex:{_w}}/{totalTests}]"
         self.appendRunlog(
             context=self.formatTestHeader(testPrefix, "Running test", pyTestsTag)
         )
@@ -984,7 +1059,8 @@ class MLDConformance:
     def analyseOneCommand(
         self, tag: str, pyTestsTag: str, testIndex: int = 0, totalTests: int = 0
     ):
-        testPrefix = f"[{tag} {testIndex}/{totalTests}]"
+        _w = len(str(totalTests))
+        testPrefix = f"[{tag} {testIndex:{_w}}/{totalTests}]"
         header = self.formatTestHeader(testPrefix, "Analyzing test", pyTestsTag)
         self.appendRunlog(context=header)
         non_be = None
@@ -1061,20 +1137,29 @@ class MLDConformance:
     def analyseOneCommandFromTuple(self, args):
         return self.analyseOneCommand(*args)
 
-    def _extractKbpsValues(self, rawCmdline: str) -> list[float]:
-        """Extract all bitrate values from command line (e.g., from 'at_32_kbps' or 'from_32_kbps_to_96_kbps')."""
-        values = []
-        for match in re.findall(r"(\d+(?:_\d+)?)_kbps", rawCmdline.lower()):
-            values.append(float(match.replace("_", ".")))
-        return values
+    def _expectedMasaMDMetFiles(self, tag: str) -> list[str]:
+        """Return expected DUT-side .met files for a tag's selected tests."""
+        if tag not in {"ENC", "DEC", "REND"}:
+            return []
 
-    def _isBitrateAtMost80(self, rawCmdline: str) -> bool:
-        """Check if all bitrates in command line are <= 80 kbps.
-        
-        For bitrate switching tests (e.g., 'from_32_kbps_to_96_kbps'), this checks 
-        that the upper (target) bitrate does not exceed 80 kbps."""
-        values = self._extractKbpsValues(rawCmdline)
-        return bool(values) and max(values) <= 80.0
+        met_files = []
+        for pyTestsTag in self.getSelectedTestsForTag(tag):
+            testDesc = self.TestDesc[tag][pyTestsTag]
+
+            if tag == "ENC":
+                if not isinstance(testDesc, BitstreamTestDescriptor):
+                    continue
+                dut_dec_output = testDesc.dutOutput.replace(".192", "_CUT_REFDECODED.wav")
+                met_files.append(dut_dec_output + ".met")
+            else:
+                if not isinstance(testDesc, TestDescriptor):
+                    continue
+                met_files.append(testDesc.dutOutput + ".met")
+
+        return met_files
+
+    def _hasAnyProducedMasaMDMetFile(self, tag: str) -> bool:
+        return any(os.path.exists(path) for path in self._expectedMasaMDMetFiles(tag))
 
     def _outputFormatsInCommand(self, rawCmdline: str) -> set[str]:
         text = rawCmdline.upper()
@@ -1108,54 +1193,7 @@ class MLDConformance:
         text = rawCmdline.lower()
         return any(term.lower() in text for term in terms)
 
-    def _expectedMasaMDMetFiles(self, tag: str) -> list[str]:
-        """Return expected DUT-side .met files for a tag's selected tests."""
-        if tag not in {"ENC", "DEC", "REND"}:
-            return []
-
-        met_files = []
-        for pyTestsTag in self.getSelectedTestsForTag(tag):
-            testDesc = self.TestDesc[tag][pyTestsTag]
-
-            if tag == "ENC":
-                if not isinstance(testDesc, BitstreamTestDescriptor):
-                    continue
-                dut_dec_output = testDesc.dutOutput.replace(".192", "_CUT_REFDECODED.wav")
-                met_files.append(dut_dec_output + ".met")
-            else:
-                if not isinstance(testDesc, TestDescriptor):
-                    continue
-                met_files.append(testDesc.dutOutput + ".met")
-
-        return met_files
-
-    def _hasAnyProducedMasaMDMetFile(self, tag: str) -> bool:
-        return any(os.path.exists(path) for path in self._expectedMasaMDMetFiles(tag))
-
-    def _matchesLevel1(self, tag: str, rawCmdline: str) -> bool:
-        if tag == "ENC":
-            return self._isBitrateAtMost80(rawCmdline)
-
-        if tag == "DEC":
-            formats = self._outputFormatsInCommand(rawCmdline)
-            requested_formats = set(getattr(self.args, "filter_decoder_formats", []))
-
-            ext_ok = "EXT" in formats and self._isBitrateAtMost80(rawCmdline)
-            mono_ok = "MONO" in formats
-            stereo_ok = "STEREO" in formats
-            default_level1_dec_ok = ext_ok or mono_ok or stereo_ok
-
-            if requested_formats:
-                # Plain decoder format tokens are restrictive under LEVEL1.
-                return default_level1_dec_ok and bool(formats.intersection(requested_formats))
-
-            return default_level1_dec_ok
-
-        # For REND/ISAR/ISAR_ENC under LEVEL1, tag-level inclusion is decided at testTags parsing.
-        return True
-
     def _testPassesFilter(self, tag: str, rawCmdline: str) -> bool:
-        level = getattr(self.args, "filter_level", "LEVEL3")
         restrictive_terms = getattr(self.args, "filter_restrictive_terms", [])
         additive_terms = getattr(self.args, "filter_add_terms", [])
         subtractive_terms = getattr(self.args, "filter_remove_terms", [])
@@ -1165,17 +1203,13 @@ class MLDConformance:
         if subtractive_terms and self._matchesAnyTerm(rawCmdline, subtractive_terms):
             return False
 
-        passes_level = True
-        if level == "LEVEL1":
-            passes_level = self._matchesLevel1(tag, rawCmdline)
-
         passes_requested_formats = True
-        if tag in {"ENC", "DEC"} and requested_formats:
+        if requested_formats:
             cmd_formats = self._outputFormatsInCommand(rawCmdline)
             passes_requested_formats = bool(cmd_formats.intersection(requested_formats))
 
         passes_restrictive_terms = self._matchesAllTerms(rawCmdline, restrictive_terms)
-        base_selected = passes_level and passes_restrictive_terms and passes_requested_formats
+        base_selected = passes_restrictive_terms and passes_requested_formats
 
         if base_selected:
             return True
@@ -1196,19 +1230,14 @@ class MLDConformance:
 
     def runTag(self, tag: str) -> bool:
         failed_before = self.getFailedCommandCount()
-        selectedTests = list()
-        if self.filter:
-            for pyTestsTag in self.TestDesc[tag].keys():
-                if self.filter in self.TestDesc[tag][pyTestsTag].rawCmdline:
-                    selectedTests.append(pyTestsTag)
-        else:
-            selectedTests = list(self.TestDesc[tag].keys())
+        selectedTests = self.getSelectedTestsForTag(tag)
 
         self.totalTests = len(selectedTests)
         print(
-            f"Executing tests for {tag}  {'Filter=' + self.filter if self.filter else ''} ({self.totalTests} tests)",
+            f"Executing tests for {tag}  ({self.totalTests} tests):",
             flush=True,
         )
+        print("---------------------------")
         if not self.args.no_multi_processing:
             with Pool() as pool:
                 args = [
@@ -1289,19 +1318,14 @@ class MLDConformance:
             with open(self.sampleStats[tag], "w") as f:
                 f.write(f"PYTESTTAG, MAXDIFF, RMSdB, BEFRAMES_PERCENT, MAX_MLD\n")
 
-        selectedTests = []
-        if self.filter:
-            for pyTestsTag in self.TestDesc[tag].keys():
-                if self.filter in self.TestDesc[tag][pyTestsTag].rawCmdline:
-                    selectedTests.append(pyTestsTag)
-        else:
-            selectedTests = list(self.TestDesc[tag].keys())
+        selectedTests = self.getSelectedTestsForTag(tag)
 
         self.totalTests = len(selectedTests)
         print(
-            f"Analysing tests for {tag}  {'Filter=' + self.filter if self.filter else ''} ({self.totalTests} tests)",
+            f"Analysing tests for {tag}  ({self.totalTests} tests):",
             flush=True,
         )
+        print("---------------------------")
 
         def handle_test_result(
             testPrefix,
@@ -1410,6 +1434,7 @@ class MLDConformance:
         if self.args.regenerate_mld_ref:
             return command_fail_count == 0 and analysis_ok
 
+        print()
         result_details = f"ERRORS={command_fail_count}, BE={be_count}, NON-BE={non_be_count}"
         if not self.args.be_test:
             result_details += f", MLD CORRIDOR FAILURES={failure_count}"
@@ -1418,13 +1443,15 @@ class MLDConformance:
 
         print()
         if command_fail_count == 0 and failure_count == 0 and analysis_ok:
-            print(f"[{tag}] OK ({result_details})\n")
+            print(
+                f"[{tag}] OK (ERRORS={command_fail_count}, BE={be_count}, NON-BE={non_be_count}, MLD CORRIDOR FAILURES={failure_count})\n"
+            )
             return True
 
-        print(f"[{tag}] FAILED ({result_details})")
-        if self.args.be_test and be_failure_csv is not None:
-            print(f"[{tag}] BE test failed, check {be_failure_csv}")
-        elif worst_failure is not None:
+        print(
+            f"[{tag}] FAILED (ERRORS={command_fail_count}, BE={be_count}, NON-BE={non_be_count}, MLD CORRIDOR FAILURES={failure_count})"
+        )
+        if worst_failure is not None:
             print(
                 f"[{tag}] Worst MLD corridor failure: {worst_failure['prefix']} {worst_failure['tag']} "
                 f"(NON-BE, MLD_MAX={worst_failure['mld']})"
@@ -1444,7 +1471,7 @@ class MLDConformance:
         contextPrefix: str = "",
         emitConsole: bool = True,
         returnOutput: bool = False,
-    ) -> int:
+    ) -> Union[int, Tuple[int, str]]:
         contextPrefix = contextPrefix or (f"[{tag}]" if tag else "")
         return self._process(
             command,
@@ -1463,12 +1490,14 @@ class MLDConformance:
         contextPrefix: str = "",
         emitConsole: bool = True,
         returnOutput: bool = False,
-    ) -> int:
+    ) -> Union[int, Tuple[int, str]]:
         prefix = (contextPrefix + " ") if contextPrefix else ""
         if self.args.verbose and emitConsole:
             print(f"{prefix}Command: {command}", flush=True)
         if self.args.dryrun:
             self.appendRunlog(command=command)
+            if returnOutput:
+                return 0, ""
             return 0
 
         c = subprocess.run(
@@ -1560,7 +1589,7 @@ class MLDConformance:
                 if not row:
                     continue
 
-                # vlad: MASA MD CSV columns are: frame index, then per-frame diff metrics.
+                # MASA MD CSV columns are: frame index, then per-frame diff metrics.
                 # We Use a single scalar per frame (max abs over all diff columns)
                 # so corridor handling is the same as  MLD's one-value-per-frame pipeline.
                 metric_values = []
@@ -1645,7 +1674,7 @@ class MLDConformance:
                 )
                 return None, (command_str, err_msg)
 
-            # vlad: a Non-zero return code from masaDiffTool can indicate detected differences,
+            # A Non-zero return code from masaDiffTool can indicate detected differences,
             # but which is valid for non-BE analysis. Treat missing/invalid CSV as failure.
             if frame_scores.size == 0:
                 self.appendFailed(
@@ -1851,6 +1880,12 @@ class MLDConformance:
         keys = IVAS_Bins.keys() if selectTag == "all" else [selectTag]
         for tag in keys:
             if os.path.exists(self.BEcsv[tag]):
+                # For filtered runs it is valid to have zero selected tests for a tag.
+                # In that case the BE csv contains only the header; skip loadtxt to avoid warnings.
+                with open(self.BEcsv[tag], "r") as f:
+                    non_empty_lines = [line for line in f if line.strip()]
+                if len(non_empty_lines) <= 1:
+                    continue
                 BEresult = np.loadtxt(
                     self.BEcsv[tag],
                     delimiter=",",
@@ -1858,11 +1893,10 @@ class MLDConformance:
                     skiprows=1,
                     usecols=1,
                 )
-                if np.sum(BEresult) > 0:
-                    return False, self.BEcsv[tag]
-                return True, None
-
-        return True, None
+                # if np.sum(BEresult) > 0:
+                #     print(f"<{tag}> FAILED BE TEST, check {self.BEcsv[tag]}")
+                # else:
+                #     print(f"<{tag}> PASSED BE TEST")
 
     def computeCorridor(
         self,
@@ -1908,7 +1942,7 @@ class MLDConformance:
                 ref_preview = ", ".join(refTags[:3]) if ref_count else "<empty>"
                 dut_preview = ", ".join(dutTags[:3]) if dut_count else "<empty>"
                 warn_msg = (
-                    f"Warning: {tag} {metricLabel} corridor comparison skipped because reference and DUT frame tags do not match "
+                    f"Warning: {tag} corridor comparison skipped because reference and DUT frame tags do not match "
                     f"(ref_count={ref_count}, dut_count={dut_count}, ref_first=[{ref_preview}], dut_first=[{dut_preview}])."
                 )
                 print(f"\033[93m{warn_msg}\033[00m")
@@ -1926,6 +1960,7 @@ class MLDConformance:
         max_mld_value = None
         keys = IVAS_Bins.keys() if selectTag == "all" else [selectTag]
         for tag in keys:
+            mld_header_printed = False
             mdlCutWithTags = None
             if os.path.exists(self.mldcsv[tag]) and os.path.getsize(self.mldcsv[tag]) > 0:
                 mdlCutWithTags = np.loadtxt(
@@ -1970,6 +2005,7 @@ class MLDConformance:
                         m5 = np.sum(mdlValues <= 5.0)
 
                         PCNT = lambda num: int(1000 * num / N) / 10.0
+                        mld_header_printed = True
                         print(f"\n##########################################################")
                         print(f"<{tag}> Total Frames: {N}")
                         print(f"<{tag}> MAX MLD across all frames : {mdlValues.max()}")
@@ -2042,6 +2078,15 @@ class MLDConformance:
                 )
                 masaMDCutWithTags = np.atleast_1d(masaMDCutWithTags)
 
+            will_print_masa = (
+                tag in ReferenceMasaMDFiles
+                and masaMDCutWithTags is not None
+                and masaMDCutWithTags.size > 0
+            )
+            if mld_header_printed and not will_print_masa:
+                print("##########################################################\n")
+                mld_header_printed = False
+
             if tag in ReferenceMasaMDFiles:
                 if masaMDCutWithTags is not None and masaMDCutWithTags.size > 0:
                     masaMDValues = masaMDCutWithTags["MASA"]
@@ -2067,6 +2112,7 @@ class MLDConformance:
                         print(
                             f"<{tag}> Frames with MASA metadata diff <= 1 : {m1} frames ({PCNT(m1)}%)"
                         )
+                        mld_header_printed = False
                         print("##########################################################\n")
 
                 if self.args.regenerate_mld_ref:
@@ -2118,12 +2164,8 @@ class MLDConformance:
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(
-        description="Compare .wav files in two folders using mld per frame"
-    )
-    parser.add_argument(
-        "--version",
-        action="version",
-        version=f"%(prog)s {PROGRAM_VERSION}",
+        description="Compare .wav files in two folders using mld per frame",
+        formatter_class=argparse.RawTextHelpFormatter
     )
 
     parser.add_argument(
@@ -2171,16 +2213,30 @@ if __name__ == "__main__":
 
     parser.add_argument(
         "--filter",
-        type=str,
-        default=None,
-        help="Filter test based on text provided",
-    )
-    parser.add_argument(
-        "--test-mode",
-        type=str,
+        nargs="+",
         default=None,
-        choices=["ENC", "DEC", "REND", "ISAR", "ISAR_ENC"],
-        help='Choose one test group to run ["ENC", "DEC", "REND", "ISAR", "ISAR_ENC"]. If omitted, all are run.',
+        metavar="TOKEN",
+        help=(
+            "Select which tests to run. Default baseline: ENC+DEC tests (REND/ISAR optional).\n"
+            "\n"
+            "Token types:\n"
+            "  ENC, DEC, REND, ISAR, ISAR_ENC      — Test groups.\n"
+            "  MONO, STEREO, EXT, HOA, SBA, MC     — Output formats. Aliases: HOA→{HOA2,HOA3}, SBA→{FOA,HOA2,HOA3}, MC→{5_1,7_1,5_1_4,5_1_2,7_1_4}.\n"
+            "  (any other)                         — Substring match (case-insensitive). Multiple terms combine with AND.\n"
+            "\n"
+            "Token modifiers:\n"
+            "  +TOKEN                              — Add matching tests to selection (even if they would be excluded).\n"
+            "  -TOKEN                              — Remove matching tests from selection.\n"
+            "  TOKEN*                              — Wildcard: match all tokens starting with TOKEN (e.g., ISAR* → {ISAR, ISAR_ENC}; BINAURAL* → {BINAURAL, BINAURAL_IR, BINAURAL_ROOM_IR, ...}).\n"
+            "  +TOKEN*, -TOKEN*                    — Wildcard with add/remove modifiers (e.g., +ISAR*, -BINAURAL*).\n"
+            "\n"
+            "Examples:\n"
+            "  --filter DEC HOA                    Run DEC tests with HOA2/HOA3 outputs.\n"
+            "  --filter DEC -voip                  Run DEC tests except those matching 'voip'.\n"
+            "  --filter ISAR*                      Add ISAR and ISAR_ENC (wildcard expansion).\n"
+            "  --filter DEC +BINAURAL*             Run DEC + all BINAURAL output formats.\n"
+            "  --filter +ISAR* -voip               Add ISAR/ISAR_ENC groups then remove 'voip' tests.\n"
+        ),
     )
     parser.add_argument(
         "--be-test",
@@ -2207,11 +2263,10 @@ if __name__ == "__main__":
         help="Do not run DUT, use existing mld and bitdiff stats files to generate analysis only",
     )
     parser.add_argument(
-        "-c",
         "--clean-output-dir",
         default=False,
         action="store_true",
-        help="Do not run DUT, use existing mld and bitdiff stats files to generate analysis only",
+        help="Delete and recreate the CUT_OUTPUTS directory before running, discarding any previous DUT outputs",
     )
     parser.add_argument(
         "--regenerate-mld-ref",
@@ -2219,7 +2274,21 @@ if __name__ == "__main__":
         action="store_true",
         help="Run analysis and unconditionally regenerate mld_ref2 files for all tags",
     )
-    args = parser.parse_args()
+    # Preprocess sys.argv to handle --filter with minus-prefixed tokens like -JBM
+    modified_argv = _preprocess_filter_args()
+    args = parser.parse_args(modified_argv[1:])  # Skip program name; parse_args expects args without it
+
+    # Unescape filter tokens that were escaped during sys.argv preprocessing
+    if args.filter:
+        marker_prefix = "@FILT_"
+        unescaped_filter = []
+        for tok in args.filter:
+            # Unescape any tokens that were wrapped by preprocessing
+            if tok in _FILTER_ESCAPES:
+                unescaped_filter.append(_FILTER_ESCAPES[tok])
+            else:
+                unescaped_filter.append(tok)
+        args.filter = unescaped_filter
 
     if not os.path.isdir(args.testvecDir):
         parser.error(
@@ -2231,19 +2300,142 @@ if __name__ == "__main__":
     if args.cut_build_path:
         validate_build_binaries(parser, args.cut_build_path, "CUT")
 
+    # Parse --filter into optional tag selection + optional format/substring filters.
+    raw_filter = " ".join(args.filter) if args.filter else ""
+    filter_tokens = [tok for tok in re.split(r"[\s,]+", raw_filter.strip()) if tok]
+
+    valid_tags = set(IVAS_Bins.keys())
+    valid_decoder_formats = set(DECODER_OUTPUT_FORMATS).union(
+        DECODER_OUTPUT_FORMAT_ALIASES.keys()
+    )
+
+    tag_tokens = []
+    tag_add_tokens = []
+    tag_remove_tokens = []
+    decoder_format_tokens = []
+    restrictive_terms = []
+    additive_terms = []
+    subtractive_terms = []
+
+    for tok in filter_tokens:
+        sign = ""
+        base_tok = tok
+        if len(tok) > 1 and tok[0] in {"+", "-"}:
+            sign = tok[0]
+            base_tok = tok[1:]
+
+        upper_tok = base_tok.upper()
+
+        # Prefix wildcard selection for tags and decoder output formats, e.g. ISAR*, BINAURAL*.
+        if upper_tok.endswith("*") and len(upper_tok) > 1:
+            prefix = upper_tok[:-1]
+            matched_tags = sorted(t for t in valid_tags if t.startswith(prefix))
+            matched_format_tokens = sorted(
+                f for f in valid_decoder_formats if f.startswith(prefix)
+            )
+
+            if not matched_tags and not matched_format_tokens:
+                parser.error(
+                    f"Wildcard token '{tok}' did not match any known tag or decoder output format."
+                )
+
+            if sign == "+":
+                tag_add_tokens.extend(matched_tags)
+            elif sign == "-":
+                tag_remove_tokens.extend(matched_tags)
+            else:
+                tag_tokens.extend(matched_tags)
+
+            expanded_wildcard_formats = []
+            for fmt in matched_format_tokens:
+                expanded_wildcard_formats.extend(
+                    sorted(DECODER_OUTPUT_FORMAT_ALIASES.get(fmt, {fmt}))
+                )
+
+            if sign == "+":
+                additive_terms.extend(expanded_wildcard_formats)
+            elif sign == "-":
+                subtractive_terms.extend(expanded_wildcard_formats)
+            else:
+                decoder_format_tokens.extend(expanded_wildcard_formats)
+
+            continue
+
+        if upper_tok in valid_tags:
+            if sign == "+":
+                tag_add_tokens.append(upper_tok)
+            elif sign == "-":
+                tag_remove_tokens.append(upper_tok)
+            else:
+                tag_tokens.append(upper_tok)
+        elif upper_tok in valid_decoder_formats:
+            expanded_formats = DECODER_OUTPUT_FORMAT_ALIASES.get(upper_tok, {upper_tok})
+            if sign == "+":
+                additive_terms.append(base_tok)
+            elif sign == "-":
+                subtractive_terms.append(base_tok)
+            else:
+                decoder_format_tokens.extend(sorted(expanded_formats))
+        else:
+            if sign == "+":
+                additive_terms.append(base_tok)
+            elif sign == "-":
+                subtractive_terms.append(base_tok)
+            else:
+                restrictive_terms.append(base_tok)
+
+    # Preserve order while removing duplicates.
+    tag_tokens = list(dict.fromkeys(tag_tokens))
+    tag_add_tokens = list(dict.fromkeys(tag_add_tokens))
+    tag_remove_tokens = list(dict.fromkeys(tag_remove_tokens))
+    decoder_format_tokens = list(dict.fromkeys(decoder_format_tokens))
+
+    # Default with no --filter: run all tags.
+    # Plain tag_tokens restrict selected tags; +tag_tokens add tags.
+    if tag_tokens:
+        selected_tag_set = set(tag_tokens)
+    else:
+        selected_tag_set = set(IVAS_Bins.keys())
+    for tag in tag_add_tokens:
+        selected_tag_set.add(tag)
+    for tag in tag_remove_tokens:
+        selected_tag_set.discard(tag)
+    testTags = [tag for tag in IVAS_Bins.keys() if tag in selected_tag_set]
+
+    args.filter_display = raw_filter if raw_filter else None
+    args.filter_restrictive_terms = restrictive_terms
+    args.filter_add_terms = additive_terms
+    args.filter_remove_terms = subtractive_terms
+    args.filter_decoder_formats = decoder_format_tokens
+
     conformance = MLDConformance(args)
     conformance.accumulateCommands()
 
+    if args.filter_display:
+        print(f"Applying filter: {args.filter_display}")
+        print()
+
     if args.regenerate_enc_refs:
+        print()
+        print("Reference regeneration targets:")
+        print(f"    ISAR_ENC : {len(conformance.TestDesc['ISAR_ENC'])}")
+        print(f"    ENC : {len(conformance.TestDesc['ENC'])}")
+        print()
         conformance.runReferenceGeneration(encTag="ISAR_ENC")
         conformance.runReferenceGeneration(encTag="ENC")
         sys.exit(0)
 
+    print()
+    print("No of tests:")
+    for tag in IVAS_Bins.keys():
+        n = len(conformance.getSelectedTestsForTag(tag)) if tag in testTags else 0
+        print(f"    {tag} : {n}")
+    print()
+
     # If --regenerate-mld-ref is set, treat as --analyse
     if args.regenerate_mld_ref:
         args.analyse = True
 
-    testTags = IVAS_Bins.keys() if args.test_mode is None else [args.test_mode]
     tag_results = {}
     for tag in testTags:
         if args.report_only:
@@ -2261,3 +2453,5 @@ if __name__ == "__main__":
         for tag in testTags:
             tag_status = "OK" if tag_results.get(tag, False) else "FAILED"
             print(f"[{tag}] {tag_status}")
+        print()
+