Commit 6379031b authored by Jan Kiene's avatar Jan Kiene
Browse files

Merge branch 'ci/refactor-histogram-creation' into kiene/tmp-branch-for-ltv-split-testing

parents d5b5b8ec e96ede2c
Loading
Loading
Loading
Loading
+0 −184
Original line number Diff line number Diff line
#!/usr/bin/env python3

import argparse
import math
import numpy as np

# These next three lines are added as a precaution in case the gitlab runner
# needs DISPLAY to render the plots, even if they are written to file.
import matplotlib

matplotlib.use("Agg")
import matplotlib.pyplot as plt
import csv
import os
from parse_xml_report import IVAS_FORMATS, EVS_FORMATS, IVAS_CATEGORIES, EVS_CATEGORIES

"""
Parses a CSV report and creates a summary report.
"""


# Main routine
if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Parses a CSV report and creates a summary report."
    )
    parser.add_argument(
        "csv_report",
        type=str,
        help="CSV report file of test cases, e.g. report.csv",
    )
    parser.add_argument(
        "csv_summary", type=str, help="Output CSV file, e.g. summary.csv"
    )
    parser.add_argument(
        "csv_image",
        type=str,
        nargs="?",
        help="Summary image file, e.g. summary.png",
        default=None,
    )
    parser.add_argument(
        "--measure",
        type=str,
        nargs=1,
        help="Measure, any of: MLD, DIFF, SSNR, ODG, default: MLD",
        default=["MLD"],
    )
    parser.add_argument(
        "--evs",
        action="store_true",
        help="Parse using EVS 26.444 formats",
        default=False,
    )
    parser.add_argument(
        "--diff",
        action="store_true",
        help="Use limits for diff scores",
        default=False,
    )
    args = parser.parse_args()
    csv_report = args.csv_report
    csv_summary = args.csv_summary
    csv_image = args.csv_image
    measure = args.measure[0]
    if args.evs:
        FORMATS = EVS_FORMATS
        CATEGORIES = EVS_CATEGORIES
    else:
        FORMATS = IVAS_FORMATS
        CATEGORIES = IVAS_CATEGORIES
    if args.diff:
        limits_per_measure = {
            "MLD": ("MLD", None),
            "DIFF": ("MAXIMUM ABS DIFF", None),
            "SSNR": ("MIN_SSNR", None),
            "ODG": ("MIN_ODG", None),
            "DELTA_ODG": ("DELTA_ODG", None),
        }
    else:
        limits_per_measure = {
            "MLD": ("MLD", [0, 1, 2, 3, 4, 5, 10, 20, math.inf]),
            "DIFF": (
                "MAXIMUM ABS DIFF",
                [0, 16, 256, 1024, 2048, 4096, 8192, 16384, 32769],
            ),
            "SSNR": ("MIN_SSNR", [-math.inf, 0, 10, 20, 30, 40, 40, 50, 60, 100]),
            "ODG": (
                "MIN_ODG",
                [-5, -4, -3, -2, -1, -0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.5],
            ),
            "DELTA_ODG": (
                "DELTA_ODG",
                [-5, -4, -3, -2, -1, -0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.5],
            ),
        }
    (measure_label, limits) = limits_per_measure[measure]

    # Load CSV report
    results_sorted = {}
    with open(csv_report, "r") as fp:
        reader = csv.reader(fp, delimiter=";")
        header = next(reader)
        keys = header[1:]
        for row in reader:
            testcase = row[0]
            results_sorted[testcase] = {}
            for k, val in zip(keys, row[1:]):
                results_sorted[testcase][k] = val

    if limits is None:
        vals = [
            float(x)
            for x in [
                m[measure_label]
                for m in results_sorted.values()
                if m[measure_label] != "None" and m[measure_label] != ""
            ]
        ]
        start = min(vals)
        f = 10 ** (2 - int(np.floor(np.log10(abs(start)))) - 1)
        start = np.floor(start * f) / f
        step = (max(vals) - start) / 10
        f = 10 ** (2 - int(np.floor(np.log10(abs(step)))) - 1)
        step = np.ceil(step * f) / f
        limits = np.arange(start, 10 * step, step)

    # Output CSV file
    with open(csv_summary, "w") as fp:
        limits_labels = [f"{a:g}" for a in limits] + [
            "",
            "None",
        ]  # Put None cases in separate bin
        headerline = f"Format;Category;" + ";".join(limits_labels) + "\n"
        fp.write(headerline)

        for fmt in FORMATS:
            fig, ax = plt.subplots()
            bottom = np.zeros(len(limits_labels))
            for cat in CATEGORIES:
                values = [
                    x
                    for x in [
                        m[measure_label]
                        for m in results_sorted.values()
                        if m["Format"] == fmt and m["Category"] == cat
                    ]
                ]
                # Create separate bin for None (errors)
                val = [float(x) for x in values if x != "None" and x != ""]
                none = [sum([1 for x in values if x == "None" or x == ""])]
                hist, _ = np.histogram(val, limits)
                data = np.array(list(hist) + [0] + none + [0])

                # CSV output
                line = f"{fmt};{cat};{'; '.join(map(str,data))}\n"
                fp.write(line)

                # Matplotlib histogram
                ax.bar(
                    limits_labels,
                    data,
                    1,
                    align="edge",
                    edgecolor="black",
                    linewidth=0.5,
                    label=cat,
                    bottom=bottom,
                )
                bottom += data

            # Histogram layout
            ax.set_title(fmt)
            ax.legend(loc="best")
            ax.set_xlabel(measure_label)
            if "DIFF" in measure_label:
                ax.set_xticks(range(len(limits_labels)), limits_labels, rotation=35)
            ax.set_ylabel("Number of test cases")

            fig.set_figheight(4)
            fig.set_figwidth(6)
            if csv_image:
                base, ext = os.path.splitext(csv_image)
                plt.savefig(f"{base}_{fmt}{ext}")
+141 −0
Original line number Diff line number Diff line
#!/usr/bin/env python3

import argparse
import math
import pathlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from typing import List


BINS_FOR_MEASURES = {
    "MLD": [0, 1, 2, 3, 4, 5, 10, 20, math.inf],
    "MAX_ABS_DIFF": [0, 16, 256, 1024, 2048, 4096, 8192, 16384, 32769],
    "MIN_SSNR": [-math.inf, 0, 10, 20, 30, 40, 40, 50, 60, 100],
    "MIN_ODG": [-5, -4, -3, -2, -1, -0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.5],
    "DELTA_ODG": [-5, -4, -3, -2, -1, -0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.5],
}

DEFAULT_MEASURES = ["MAX_ABS_DIFF", "MLD", "MIN_SSNR", "MIN_ODG"]


def get_bins_for_diff(data: pd.Series):
    return np.linspace(data.min(), data.max(), num=10)


def create_histograms(
    df: pd.DataFrame,
    measures: List[str],
    output_folder: pathlib.Path,
    display_only: bool,
    bins_for_measures=BINS_FOR_MEASURES,
    prefix="",
):
    formats = df["format"].unique()
    categories = df["category"].unique()

    if not display_only:
        output_folder.mkdir(exist_ok=True, parents=True)

    for measure in measures:
        measure_in_df = prefix + measure
        bins = bins_for_measures.get(measure, get_bins_for_diff(df[measure_in_df]))
        x = [f"{x}" for x in bins] + ["", "ERROR"]
        for fmt in formats:
            fig, ax = plt.subplots()
            ax.xaxis.set_major_formatter("{x:.1f}")
            bottom = np.zeros(len(x))
            for cat in categories:
                data_mask = np.logical_and(df["format"] == fmt, df["category"] == cat)
                df_slice = df[data_mask]
                error_mask = df_slice["result"] == "ERROR"
                n_errors = np.sum(error_mask)
                df_hist = df_slice[np.logical_not(error_mask)]

                counts, _ = np.histogram(df_hist[measure_in_df], bins)

                data = np.concat([counts, [0], [n_errors], [0]])
                ax.bar(
                    x,
                    data,
                    1,
                    align="edge",
                    edgecolor="black",
                    linewidth=0.5,
                    label=cat,
                    bottom=bottom,
                )
                bottom += data

            # Histogram layout
            ax.set_title(fmt)
            ax.legend(loc="best")
            ax.set_xlabel(measure)
            if "DIFF" in measure:
                ax.set_xticks(range(len(x)), x, rotation=35)
            ax.set_ylabel("Number of test cases")

            fig.set_figheight(4)
            fig.set_figwidth(6)
            plt.tight_layout()

            if not display_only:
                image_file = f"histogram_{measure}_{fmt}.png"
                image_path = output_folder.joinpath(image_file)
                plt.savefig(image_path)

    if display_only:
        plt.show()


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Parses a csv file generated by parse_xml_report and creates histograms for the given measures."
    )
    parser.add_argument(
        "csv_report",
        type=str,
        help="CSV report file as generated by parse_xml_report.py",
    )
    parser.add_argument(
        "output_folder", type=pathlib.Path, help="Output folder for writing the "
    )
    parser.add_argument(
        "--display-only",
        action="store_true",
        help="Do not write the output files, but display the graphs instead.",
    )
    parser.add_argument(
        "--no-bins",
        action="store_true",
        help="""Do not use the hardcoded bins for creating the spectrograms.
Use this for visualising diff scores.""",
    )
    allowed_measures = " ".join(BINS_FOR_MEASURES.keys())
    parser.add_argument(
        "--measures",
        nargs="+",
        default=DEFAULT_MEASURES,
        help=f"Measures to plot from the csv file. One of {allowed_measures}",
    )
    parser.add_argument(
        "--prefix",
        default="",
        help="Common suffix to use when collecting measures from the input csv file",
    )
    args = parser.parse_args()
    df = pd.read_csv(args.csv_report)

    bins_for_measures = BINS_FOR_MEASURES
    if args.no_bins:
        bins_for_measures = {}

    create_histograms(
        df,
        args.measures,
        args.output_folder,
        args.display_only,
        bins_for_measures,
        args.prefix,
    )
+121 −185
Original line number Diff line number Diff line
#!/usr/bin/env python3

import argparse
import re
import pandas as pd
from xml.etree import ElementTree
from collections import Counter


SPLIT_STRING = "_split"
WHOLE_STRING = "_whole"


class TestcaseParser(dict):
    def __init__(self, testcases: list):
        super().__init__()

        for tc in testcases:
            self.parse_testcase(tc)

    def parse_testcase(self, testcase):
        """
Parse a junit report and create a summary report.
        Get all properties + name for a testcase
        """

PROPERTIES = ["MLD", "MAXIMUM ABS DIFF", "MIN_SSNR", "MIN_ODG"]

IVAS_FORMATS = {
    "Stereo": r"stereo",
    "ISM": r"ISM",
    "Multichannel": r"Multi-channel|MC",
    "MASA": r"(?<!O)MASA",
    "SBA": r"(?<!O)SBA",
    "OSBA": r"OSBA",
    "OMASA": r"OMASA",
    "Renderer": r"renderer",
}
        filename = testcase.get(
            "file", testcase.get("classname").replace(".", "/") + ".py"
        )
        fulltestname = filename + "::" + testcase.get("name")

EVS_FORMATS = {
    "AMRWBIO_dec": r"Readme_AMRWB_IO_dec",
    "AMRWBIO_enc": r"Readme_AMRWB_IO_enc",
    "EVS_dec": r"Readme_EVS_dec",
    "EVS_enc": r"Readme_EVS_enc",
    "EVS_JBM_dec": r"Readme_JBM_dec",
        result = get_result_from_testcase(testcase)
        # for ERRORS, two testcases are recorded, one with FAIL and one with ERROR
        # if we already have this testcase, do a sanity check and set result to ERROR
        if fulltestname in self:
            results = [self[fulltestname]["result"], result]
            assert any(r == "ERROR" for r in results)
            self[fulltestname]["result"] = "ERROR"
            return

        ret = {}
        ret["testcase"] = fulltestname
        ret["result"] = result
        properties = {
            p.get("name"): p.get("value") for p in testcase.findall(".//property")
        }

NO_FORMATS = {"Default": r".*"}
        ### handle split comparison results
        split_props = {k: v for k, v in properties.items() if SPLIT_STRING in k}
        whole_props = {k: v for k, v in properties.items() if WHOLE_STRING in k}

IVAS_CATEGORIES = {
    "Normal operation": r".*",
    "DTX": r"DTX",
    "PLC": r"%",
    "Bitrate switching": r"br sw|bitrate switching",
    "JBM": r"JBM",
}
        if len(split_props) > 0 and len(whole_props) > 0:
            measures_from_split = set(
                [m.split(SPLIT_STRING)[0] for m in split_props.keys()]
            )
            measures_from_whole = set(
                [m.split(WHOLE_STRING)[0] for m in whole_props.keys()]
            )
            assert measures_from_split == measures_from_whole
            measures = measures_from_whole

            # collect existing split suffixes by evaluating one of the measures only
            m_tmp = measures.pop()
            splits = sorted(
                [
                    k.split(SPLIT_STRING)[-1]
                    for k in split_props.keys()
                    if k.startswith(m_tmp)
                ]
            )

EVS_CATEGORIES = {
    "Normal operation": r".*",
    "DTX": r"DTX",
    "PLC": r"b10|f06|EPF",
    "Bitrate switching": r"sw",
    "JBM": r"JBM",
            # record each split under a separate key
            # the dict per key has the same fulltestname and an additional key "split"
            # this way, the resulting DataFrame in the end can be split by testnames
            for s in splits:
                split_key = f"{fulltestname} - {s}"
                ret_split = {"testcase": fulltestname, "split": s}
                for m in measures:
                    ret_split.update({m: split_props[m + SPLIT_STRING + f"{s}"]})
                self[split_key] = ret_split

        # it can be the case that there are no splits defined in the pytest suite, e.g. for the renderer
        # then, there are only "_whole" values recorded where we only need to remove the suffix
        # this if also handles the split case - if there are splits, there was also a "_whole" comparison done
        if len(whole_props) > 0:
            properties = {
                k.replace(WHOLE_STRING, ""): v for k, v in whole_props.items()
            }
            properties["split"] = "whole"

NO_CATEGORIES = {"N/A": r".*"}
        ret.update(properties)
        self[fulltestname] = ret

    def to_df(self) -> pd.DataFrame:
        testcases = list(self.values())
        df = pd.DataFrame(testcases)
        return df

def get_format_from_fulltestname(fulltestname: str) -> str:
    # For the format, favor the earliest match in the test case name
    fmt = min(
        [
            (f, re.search(FORMATS[f], fulltestname, re.IGNORECASE).end())
            for f in FORMATS
            if re.search(FORMATS[f], fulltestname, re.IGNORECASE) is not None
        ],
        key=lambda x: x[1],
    )[0]
    return fmt

def xml_to_dataframe(xml_report: str) -> pd.DataFrame:
    tree = ElementTree.parse(xml_report)
    root = tree.getroot()

    testcases = root[0].findall("testcase")
    testcases = [tc for tc in testcases if tc.find("skipped") is None]

    testcase_parser = TestcaseParser(testcases)
    testcase_df = testcase_parser.to_df()

def get_category_from_fulltestname(fulltestname: str) -> str:
    cat = [
        c for c in CATEGORIES if re.search(CATEGORIES[c], fulltestname, re.IGNORECASE)
    ][-1]
    return cat
    return testcase_df


def get_testresult(testcase: ElementTree.Element) -> str:
def get_result_from_testcase(testcase: ElementTree.Element) -> str:
    if testcase.find("failure") is not None:
        testresult = "FAIL"
    elif testcase.find("error") is not None:
@@ -81,10 +119,21 @@ def get_testresult(testcase: ElementTree.Element) -> str:
    return testresult


# Main routine
def main(xml_report, csv_file):
    df = xml_to_dataframe(xml_report)
    df.to_csv(csv_file, index=False)

    n_testcases = len(df)
    count = Counter(df["result"])

    print(
        f"Parsed testsuite with {n_testcases} tests: {count['PASS']} passes, {count['FAIL']} failures and {count['ERROR']} errors."
    )


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Parse a junit report and create an MLD summary report."
        description="Parse junit report from IVAS pytest suite and convert to csv file"
    )
    parser.add_argument(
        "xml_report",
@@ -92,119 +141,6 @@ if __name__ == "__main__":
        help="XML junit report input file, e.g. report-junit.xml",
    )
    parser.add_argument("csv_file", type=str, help="Output CSV file, e.g. report.csv")
    parser.add_argument(
        "--evs",
        action="store_true",
        help="Parse using EVS 26.444 formats",
    )
    parser.add_argument(
        "--clipping",
        action="store_true",
        help="Extract clipping information. Available if encoder has been run with DEBUGGING active.",
    )
    parser.add_argument(
        "--delta_odg",
        action="store_true",
        help="Extract Delta ODG information.",
    )
    parser.add_argument(
        "--skip_formats",
        action="store_true",
        help="Parse without formats and categories. Suitable for general tests which do not match the IVAS categories.",
    )
    args = parser.parse_args()
    xml_report = args.xml_report
    csv_file = args.csv_file
    FORMATS = IVAS_FORMATS
    CATEGORIES = IVAS_CATEGORIES
    if args.evs:
        FORMATS = EVS_FORMATS
        CATEGORIES = EVS_CATEGORIES
    else:
        FORMATS = IVAS_FORMATS
        CATEGORIES = IVAS_CATEGORIES
    if args.clipping:
        PROPERTIES += ["ENC_CORE_OVL", "MAX_OVL", "MIN_OVL"]
    if args.delta_odg:
        PROPERTIES += ["DELTA_ODG"]
    if args.skip_formats:
        FORMATS = NO_FORMATS
        CATEGORIES = NO_CATEGORIES

    tree = ElementTree.parse(xml_report)

    testsuite = tree.find(".//testsuite")
    testcases = tree.findall(".//testcase")

    # Prepare result structure
    results = {}
    for fmt in FORMATS:
        results[fmt] = {}
        for cat in CATEGORIES:
            results[fmt][cat] = {}
    count = {"PASS": 0, "FAIL": 0, "ERROR": 0}

    # filter out skipped testcases
    testcases = [tc for tc in testcases if tc.find(".//skipped") is None]

    for testcase in testcases:
        filename = testcase.get(
            "file", testcase.get("classname").replace(".", "/") + ".py"
        )
        fulltestname = filename + "::" + testcase.get("name")

        # only include the properties listed above
        # we need to find all occurences with any suffixes to also handle the split-comparison
        # runs correctly
        properties_found = {
            p.get("name"): p.get("value")
            for p in testcase.findall(".//property")
            if "CHANNEL" not in p.get("name")
            and any(p_listed in p.get("name") for p_listed in PROPERTIES)
        }

        # Identify format and category (mode of operation)
        # For the format, favor the earliest match in the test case name
        fmt = get_format_from_fulltestname(fulltestname)
        # Note that only one category is selected, even though several may match, e.g. bitrate switching + JBM. Here the last match is picked.
        cat = get_category_from_fulltestname(fulltestname)

        testresult = get_testresult(testcase)

        # get all present suffixes
        pattern = re.compile("|".join(PROPERTIES))
        suffixes = set(pattern.sub("", p) for p in properties_found)

        # record the result for all suffixes
        # For ERROR cases, both a FAIL and an ERROR result is generated.
        # Here, a FAIL would be overwritten with an ERROR result since it has the same name.
        for s in suffixes:
            fulltestname_suffix = f"{fulltestname}{s}"
            results[fmt][cat][fulltestname_suffix] = {"Result": testresult}
            for propertyname in PROPERTIES:
                results[fmt][cat][fulltestname_suffix][propertyname] = properties_found[
                    f"{propertyname}{s}"
                ]
        count[testresult] += 1

    header = ["testcase", "Format", "Category", "Result"] + PROPERTIES

    # Write CSV file
    with open(csv_file, "w") as outfile:
        headerline = ";".join(header) + "\n"
        outfile.write(headerline)
        for fmt in FORMATS:
            for cat in CATEGORIES:
                results[fmt][cat] = dict(sorted(results[fmt][cat].items()))
                for test in results[fmt][cat]:
                    line = (
                        ";".join(
                            [test, fmt, cat] + list(results[fmt][cat][test].values())
                        )
                        + "\n"
                    )
                    outfile.write(line)

    print(
        f"Parsed testsuite with {count['PASS']+count['FAIL']+count['ERROR']} tests: {count['PASS']} passes, {count['FAIL']} failures and {count['ERROR']} errors."
    )
    args = parser.parse_args()
    main(args.xml_report, args.csv_file)
+28 −5
Original line number Diff line number Diff line
@@ -54,6 +54,7 @@ from tests.conftest import (
    compare_dmx_signals,
    log_dbg_msg,
    get_split_idx,
    get_format_from_enc_opts,
)
from tests.testconfig import PARAM_FILE
from tests.constants import (
@@ -61,6 +62,11 @@ from tests.constants import (
    MAX_ENC_STATS_DIFF,
    SCRIPTS_DIR,
    MAX_ENC_DIFF,
    CAT_NORMAL,
    CAT_DTX,
    CAT_BITRATE_SWITCHING,
    CAT_JBM,
    CAT_PLC,
)
from tests.renderer.utils import check_and_makedir, binauralize_input_and_output

@@ -298,6 +304,10 @@ def run_test(
                "All non-passthrough modes are skipped when --compare-to-input is set"
            )

    testcase_props = {}
    testcase_props["format"] = get_format_from_enc_opts(enc_opts)
    testcase_props["category"] = CAT_NORMAL

    tag_str = convert_test_string_to_tag(test_tag)

    # evaluate encoder options
@@ -317,6 +327,9 @@ def run_test(
    bitrate = enc_split.pop()
    in_sr = sampling_rate

    if "-dtx" in enc_opts:
        testcase_props["category"] = CAT_DTX

    # bitrate can be a filename: change it to an absolute path
    if not bitrate.isdigit():
        if compare_enc_dmx:
@@ -324,11 +337,20 @@ def run_test(
                "Rate switching + --compare_enc_dmx currently skipped due to DEBUGGING code limitations with varying number of transport channels"
            )
        bitrate = Path(bitrate[3:]).absolute()
        testcase_props["category"] = CAT_BITRATE_SWITCHING

    testv_base = testv_file.split("/")[-1]
    if testv_base.endswith(".pcm"):
        testv_base = testv_base[:-4]

    if sim_opts != "":
        testcase_props["category"] = CAT_JBM
    if eid_opts != "":
        testcase_props["category"] = CAT_PLC

    for k, v in testcase_props.items():
        dut_encoder_frontend.record_property(k, v)

    assert bitstream_file == "bit"
    # in the parameter file, only "bit" is used as bitstream file name
    # -> construct bitstream filename
@@ -375,8 +397,8 @@ def run_test(

            # avoid double recording of the encoder diff
            if encoder_only:
                props = parse_properties(cmp_result_msg, False, [MAX_ENC_DIFF])
                for k, v in props.items():
                result_props = parse_properties(cmp_result_msg, False, [MAX_ENC_DIFF])
                for k, v in result_props.items():
                    dut_encoder_frontend.record_property(k, v)

    if encoder_only:
@@ -417,7 +439,6 @@ def run_test(
        )

    # check for eid-xor command line

    if eid_opts != "":
        eid_split = eid_opts.split()
        assert len(eid_split) >= 3, "eid-xor expects at least 3 parameters"
@@ -654,8 +675,10 @@ def run_test(
        for output_differs, reason, suffix in zip(
            output_differs_parts, reason_parts, prop_suffix
        ):
            props = parse_properties(reason, output_differs, props_to_record, suffix)
            for k, v in props.items():
            result_props = parse_properties(
                reason, output_differs, props_to_record, suffix
            )
            for k, v in result_props.items():
                dut_decoder_frontend.record_property(k, v)

        metadata_differs = False
+54 −1

File changed.

Preview size limit exceeded, changes collapsed.

Loading