Commit ce97dbf5 authored by Jan Kiene's avatar Jan Kiene
Browse files

Merge branch 'ci/split-output-files-b4-comparison-fixed' into 'main'

[BASOP-CI] add split comparison for ltv files 2

See merge request !2089
parents db3a2561 dac4ceb6
Loading
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -37,6 +37,7 @@ Comparing:

<br>
<h2><a href="summary_{job_name}.html">Summary page</a></h2>
<h2><a href="summary__split_{job_name}.html">Split comparison summary page</a></h2>
<br>
<br>

+31 −30
Original line number Diff line number Diff line
@@ -4,11 +4,11 @@ from typing import List
from create_report_pages import SUBPAGE_TMPL_CSS, FORMATS


title = {
TITLE_4_MEASURE = {
    "MLD": "Maximum MLD across channels",
    "DIFF": "Maximim absolute difference across channels",
    "SSNR": "Minimum SSNR across channels",
    "ODG": "Minimum PEAQ ODG across channels",
    "MAX_ABS_DIFF": "Maximum absolute difference across channels",
    "MIN_SSNR": "Minimum SSNR across channels",
    "MIN_ODG": "Minimum PEAQ ODG across channels",
    "DELTA_ODG": "PEAQ ODG using binauralized input and output",
}

@@ -16,9 +16,13 @@ SUMMARY_PAGE_TMPL_HTML = """

<h1>Summary for job {job_name}, ID: {id_current}</h1>

<hr>

{images}

"""
IMAGE_HTML_TMPL = "<img src={image_dir}/histogram_{measure}_{format}.png>"
SUBHEADING_HTML_TMP = "<h2>{subtitle}</h2>\n"


def create_summary_page(
@@ -26,53 +30,50 @@ def create_summary_page(
    id_current: int,
    job_name: str,
    measures: List[str],
    image_dir: str,
):
    images = histogram_summary(job_name, measures)
    html = "\n<hr>\n".join(
        [
            SUBHEADING_HTML_TMP.format(subtitle=TITLE_4_MEASURE[m])
            + " ".join(
                [
                    IMAGE_HTML_TMPL.format(measure=m, format=f, image_dir=image_dir)
                    for f in FORMATS
                ]
            )
            for m in measures
        ]
    )

    new_summary_page = SUBPAGE_TMPL_CSS + SUMMARY_PAGE_TMPL_HTML.format(
        id_current=id_current,
        job_name=job_name,
        images=images,
        images=html,
    )
    with open(html_out, "w") as f:
        f.write(new_summary_page)


def histogram_summary(
    job_name: str,
    measures: List[str],
):
    images = "<hr>"
    for m in measures:
        images += (
            f"<h2>{title[m]}</h2>\n"
            + " ".join(
                [f"<img src=images_{job_name}/summary_{m}_{x}.png>" for x in FORMATS]
            )
            + f'\n<br><a href="images_{job_name}/summary_{m}.csv">summary_{m}.csv</a><hr>\n\n'
        )
    return images


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("html_out")
    parser.add_argument("id_current", type=int)
    parser.add_argument("job_name")
    parser.add_argument("image_dir")
    parser.add_argument(
        "--measures",
        nargs="+",
        help=f"List of measures to include in summary. Allowed values: {' '.join(title.keys())}",
        default=["MLD", "DIFF", "SSNR", "ODG"],
        help=f"List of measures to include in summary. Allowed values: {' '.join(TITLE_4_MEASURE.keys())}",
        # exclude DELTA_ODG here
        default=list(TITLE_4_MEASURE.keys())[:-1],
    )
    args = parser.parse_args()

    if not all([m in title for m in args.measures]):
        raise ValueError(f"Invalid list of measures: {args.measures}, expected one of {' '.join(title.keys())}")
    if not all([m in TITLE_4_MEASURE for m in args.measures]):
        raise ValueError(
            f"Invalid list of measures: {args.measures}, expected one of {' '.join(TITLE_4_MEASURE.keys())}"
        )

    create_summary_page(
        args.html_out,
        args.id_current,
        args.job_name,
        args.measures,
        args.html_out, args.id_current, args.job_name, args.measures, args.image_dir
    )
+0 −156
Original line number Diff line number Diff line
#!/usr/bin/env python3

import argparse
import math
import numpy as np

# These next three lines are added as a precaution in case the gitlab runner
# needs DISPLAY to render the plots, even if they are written to file.
import matplotlib

matplotlib.use("Agg")
import matplotlib.pyplot as plt
import csv
import os
from parse_xml_report import IVAS_FORMATS, EVS_FORMATS, IVAS_CATEGORIES, EVS_CATEGORIES

"""
Parses a CSV report and creates a summary report.
"""


# Main routine
if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Parses a CSV report and creates a summary report."
    )
    parser.add_argument(
        "csv_report",
        type=str,
        help="CSV report file of test cases, e.g. report.csv",
    )
    parser.add_argument(
        "csv_summary", type=str, help="Output CSV file, e.g. summary.csv"
    )
    parser.add_argument(
        "csv_image",
        type=str,
        nargs="?",
        help="Summary image file, e.g. summary.png",
        default=None,
    )
    parser.add_argument(
        "--measure",
        type=str,
        nargs=1,
        help="Measure, any of: MLD, DIFF, SSNR, ODG, default: MLD",
        default=["MLD"],
    )
    parser.add_argument(
        "--evs",
        action="store_true",
        help="Parse using EVS 26.444 formats",
        default=False,
    )
    parser.add_argument(
        "--diff",
        action="store_true",
        help="Use limits for diff scores",
        default=False,
    )
    args = parser.parse_args()
    csv_report = args.csv_report
    csv_summary = args.csv_summary
    csv_image = args.csv_image
    measure = args.measure[0]
    if args.evs:
        FORMATS = EVS_FORMATS
        CATEGORIES = EVS_CATEGORIES
    else:
        FORMATS = IVAS_FORMATS
        CATEGORIES = IVAS_CATEGORIES
    if args.diff:
        limits_per_measure = {
            "MLD": ("MLD", None),
            "DIFF": ("MAXIMUM ABS DIFF", None),
            "SSNR": ("MIN_SSNR", None),
            "ODG": ("MIN_ODG", None),
            "DELTA_ODG": ("DELTA_ODG", None),
        }
    else:
        limits_per_measure = {
            "MLD": ("MLD", [0, 1, 2, 3, 4, 5, 10, 20, math.inf]),
            "DIFF": ("MAXIMUM ABS DIFF", [0, 16, 256, 1024, 2048, 4096, 8192, 16384, 32769]),
            "SSNR": ("MIN_SSNR", [-math.inf, 0, 10, 20, 30, 40, 40, 50, 60, 100]),
            "ODG": ("MIN_ODG", [-5, -2, -1, -0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.5]),
            "DELTA_ODG": ("DELTA_ODG", [-5, -2, -1, -0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.5]),
        }
    (measure_label, limits) = limits_per_measure[measure]

    # Load CSV report
    results_sorted = {}
    with open(csv_report, "r") as fp:
        reader = csv.reader(fp, delimiter=";")
        header = next(reader)
        keys = header[1:]
        for row in reader:
            testcase = row[0]
            results_sorted[testcase] = {}
            for k, val in zip(keys, row[1:]):
                results_sorted[testcase][k] = val

    if limits is None:
        vals = [float(x) for x in [m[measure_label] for m in results_sorted.values() if m[measure_label] != "None" and m[measure_label] != ""]]
        start = min(vals)
        f = 10 ** (2 - int(np.floor(np.log10(abs(start)))) - 1)
        start = np.floor(start*f)/f
        step = (max(vals) - start)/10
        f = 10 ** (2 - int(np.floor(np.log10(abs(step)))) - 1)
        step = np.ceil(step*f)/f
        limits = np.arange(start, 10*step, step)

    # Output CSV file
    with open(csv_summary, "w") as fp:
        limits_labels = [f"{a:g}" for a in limits] + ["","None"] # Put None cases in separate bin
        headerline = f"Format;Category;" + ";".join(limits_labels) + "\n"
        fp.write(headerline)

        for fmt in FORMATS:
            fig, ax = plt.subplots()
            bottom = np.zeros(len(limits_labels))
            for cat in CATEGORIES:
                values = [
                    x
                    for x in [
                        m[measure_label]
                        for m in results_sorted.values()
                        if m["Format"] == fmt and m["Category"] == cat
                    ]
                ]
                # Create separate bin for None (errors)
                val = [float(x) for x in values if x != "None" and x != ""]
                none = [sum([1 for x in values if x == "None" or x == ""])]
                hist, _ = np.histogram(val, limits)
                data = np.array(list(hist) + [0] + none + [0])

                # CSV output
                line = f"{fmt};{cat};{'; '.join(map(str,data))}\n"
                fp.write(line)

                # Matplotlib histogram
                ax.bar(limits_labels, data, 1, align='edge', edgecolor='black', linewidth=0.5, label=cat, bottom=bottom)
                bottom += data

            # Histogram layout
            ax.set_title(fmt)
            ax.legend(loc="best")
            ax.set_xlabel(measure_label)
            if "DIFF" in measure_label:
                ax.set_xticks(range(len(limits_labels)), limits_labels, rotation=35)
            ax.set_ylabel("Number of test cases")

            fig.set_figheight(4)
            fig.set_figwidth(6)
            if csv_image:
                base, ext = os.path.splitext(csv_image)
                plt.savefig(f"{base}_{fmt}{ext}")
+200 −0
Original line number Diff line number Diff line
#!/usr/bin/env python3

import argparse
import math
import pathlib
import sys
import pandas as pd
import numpy as np
from typing import List


# hack for avoiding missing DISPLAY variable in headless CI runners
import matplotlib

matplotlib.use("Agg")
import matplotlib.pyplot as plt


BINS_FOR_MEASURES = {
    "MLD": [0, 1, 2, 3, 4, 5, 10, 20, math.inf],
    "MAX_ABS_DIFF": [0, 16, 256, 1024, 2048, 4096, 8192, 16384, 32769],
    "MIN_SSNR": [-math.inf, 0, 10, 20, 30, 40, 40, 50, 60, 100, math.inf],
    "MIN_ODG": [-5, -4, -3, -2, -1, -0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.5],
    "DELTA_ODG": [-5, -4, -3, -2, -1, -0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.5],
}

DEFAULT_MEASURES = ["MAX_ABS_DIFF", "MLD", "MIN_SSNR", "MIN_ODG"]

### !!! Note: this is duplicated in tests/constatns.py. If you change this here, ALSO ADAPT IT THERE!!!
### (importing from there failed for unknown reasons in some jobs on some runners and I don't have time to properly investigate this...)
### below lines are the original solution, kept here for reference

# HERE = pathlib.Path(__file__).parent
# ROOT_DIR = HERE.parent
# sys.path.append(str(ROOT_DIR))
# from tests.constants import CAT_NORMAL, CAT_BITRATE_SWITCHING, CAT_DTX, CAT_JBM, CAT_PLC

CAT_NORMAL = "normal operation"
CAT_DTX = "DTX"
CAT_PLC = "PLC"
CAT_BITRATE_SWITCHING = "bitrate switching"
CAT_JBM = "JBM"

COLORS_FOR_CATEGORIES = {
    CAT_DTX: "tab:blue",
    CAT_PLC: "tab:orange",
    CAT_NORMAL: "tab:green",
    CAT_JBM: "tab:red",
    CAT_BITRATE_SWITCHING: "tab:purple",
}


def get_bins_for_diff(data: pd.Series):
    return np.round(np.linspace(data.min(), data.max(), num=10), decimals=2)


def create_histograms(
    df: pd.DataFrame,
    measures: List[str],
    output_folder: pathlib.Path,
    display_only: bool,
    bins_for_measures=BINS_FOR_MEASURES,
    prefix="",
    write_out_histograms=False,
):
    formats = df["format"].unique()
    categories = df["category"].unique()

    if not display_only or write_out_histograms:
        output_folder.mkdir(exist_ok=True, parents=True)

    for measure in measures:
        measure_in_df = prefix + measure
        bins = bins_for_measures.get(measure, get_bins_for_diff(df[measure_in_df]))
        x = [f"{x}" for x in bins] + ["", "ERROR"]

        df_hist = pd.DataFrame(columns=["format", "category"] + x)
        hist_row_count = 0

        for fmt in formats:
            fig, ax = plt.subplots()
            ax.xaxis.set_major_formatter("{x:.1f}")
            bottom = np.zeros(len(x))
            for cat in categories:
                data_mask = np.logical_and(df["format"] == fmt, df["category"] == cat)
                df_slice = df[data_mask]
                error_mask = df_slice["result"] == "ERROR"
                n_errors = np.sum(error_mask)
                df_slice = df_slice[np.logical_not(error_mask)]

                counts, _ = np.histogram(df_slice[measure_in_df], bins)

                data = np.concatenate([counts, [0], [n_errors], [0]])
                ax.bar(
                    x,
                    data,
                    1,
                    align="edge",
                    edgecolor="black",
                    linewidth=0.5,
                    label=cat,
                    bottom=bottom,
                    color=COLORS_FOR_CATEGORIES[cat],
                )
                bottom += data

                hist_row = [fmt, cat] + list(counts) + [0] + [0, n_errors]
                df_hist.loc[hist_row_count] = hist_row
                hist_row_count += 1

            # Histogram layout
            ax.set_title(fmt)
            ax.legend(loc="best")
            ax.set_xlabel(measure)
            if "DIFF" in measure or len(bins_for_measures) == 0:
                ax.set_xticks(range(len(x)), x, rotation=35)
            else:
                ax.set_xticks(range(len(x)), x)
            ax.set_ylabel("Number of test cases")

            fig.set_figheight(4)
            fig.set_figwidth(6)
            plt.tight_layout()

            if not display_only:
                image_file = f"histogram_{measure}_{fmt}.png"
                image_path = output_folder.joinpath(image_file)
                plt.savefig(image_path)
                plt.close(fig)

        if write_out_histograms:
            df_hist.to_csv(
                output_folder.joinpath(f"histogram_{measure}.csv"), index=False
            )

    if display_only:
        plt.show()


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Parses a csv file generated by parse_xml_report and creates histograms for the given measures."
    )
    parser.add_argument(
        "csv_report",
        type=str,
        help="CSV report file as generated by parse_xml_report.py",
    )
    parser.add_argument(
        "output_folder", type=pathlib.Path, help="Output folder for writing the "
    )
    parser.add_argument(
        "--display-only",
        action="store_true",
        help="Do not write the output files, but display the graphs instead.",
    )
    parser.add_argument(
        "--no-bins",
        action="store_true",
        help="""Do not use the hardcoded bins for creating the spectrograms.
Use this for visualising diff scores.""",
    )
    allowed_measures = " ".join(BINS_FOR_MEASURES.keys())
    parser.add_argument(
        "--measures",
        nargs="+",
        default=DEFAULT_MEASURES,
        help=f"Measures to plot from the csv file. One of {allowed_measures}",
    )
    parser.add_argument(
        "--prefix",
        default="",
        help="Common suffix to use when collecting measures from the input csv file",
    )
    parser.add_argument(
        "--write-out-histograms",
        action="store_true",
        help="Write out the histogram values to csv",
    )
    args = parser.parse_args()
    df = pd.read_csv(args.csv_report)

    # filter out missing format/category values
    mask_format_missing = df["format"].isna()
    mask_category_missing = df["category"].isna()
    df = df[~mask_format_missing | ~mask_category_missing]

    bins_for_measures = BINS_FOR_MEASURES
    if args.no_bins:
        bins_for_measures = {}

    create_histograms(
        df,
        args.measures,
        args.output_folder,
        args.display_only,
        bins_for_measures,
        args.prefix,
        args.write_out_histograms,
    )
+10 −6
Original line number Diff line number Diff line
@@ -33,25 +33,29 @@ the United Nations Convention on Contracts on the International Sales of Goods.
import pandas as pd
import argparse
import sys
import os
import pathlib

COLUMNS_TO_COMPARE = [
    "MLD",
    "MAXIMUM ABS DIFF",
    "MAX_ABS_DIFF",
    "MIN_SSNR",
    "MIN_ODG",
]


def main(args):
    df_ref = pd.read_csv(args.csv_ref, sep=";")
    df_test = pd.read_csv(args.csv_test, sep=";")
    df_ref = pd.read_csv(args.csv_ref).sort_values(
        by=["testcase", "format", "category"]
    )
    df_test = pd.read_csv(args.csv_test).sort_values(
        by=["testcase", "format", "category"]
    )

    for col in COLUMNS_TO_COMPARE:
        df_ref[col] = df_test[col] - df_ref[col]
    df_ref.to_csv(args.csv_diff, index=False, sep=";")
    df_ref.to_csv(args.csv_diff, index=False)
    return 0


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("csv_ref")
Loading