Commit 3ad4e872 authored by Jan Kiene's avatar Jan Kiene
Browse files

Merge branch 'basop-ci/add-commandlines-to-regression-check-output' into 'basop-ci-branch'

[Basop ci] add commandlines to regression check output

See merge request !1931
parents a34eabb1 e112140d
Loading
Loading
Loading
Loading
+130 −4
Original line number Diff line number Diff line
@@ -35,6 +35,9 @@ import argparse
import sys
import os
import pathlib
import re
import xml.etree.ElementTree as ET
from typing import Tuple


# set positive threshold for "lower is better" metrics, negative for "higher is better"
@@ -48,8 +51,16 @@ COLS_2_THRESHOLDS = {
OUTFILE_CRASHES = "changes_crashes.csv"
OUTFILE_SCORES = "changes_{}.csv"

PATTERN_ENC = r"... encoder command:\s*(.*)\s*... encoder stdout:"
PATTERN_DEC = r"... decoder command:\s*(.*)\s*... decoder stdout:"
PATTERN_EID = r"eid-xor command:\s*(.*)\s*"
PATTERN_NETSIM = r"netsim command:\s*(.*)\s*"

PATTERNS = [PATTERN_ENC, PATTERN_DEC, PATTERN_EID, PATTERN_NETSIM]


def main(args):
    xml_report = args.xml_report
    df_curr = pd.read_csv(args.csv_current, sep=";")
    df_prev = pd.read_csv(args.csv_previous, sep=";")
    df_merged = pd.merge(df_curr, df_prev, on="testcase", suffixes=["-curr", "-prev"])
@@ -77,7 +88,6 @@ def main(args):
    df_crashes_introduced = df_merged[mask_crash_introduced][display_cols].reset_index(
        drop=True
    )
    df_crashes_introduced.to_csv(OUTFILE_CRASHES, sep=";")

    if sum(mask_crash_introduced) > 0:
        regressions_found = True
@@ -85,15 +95,32 @@ def main(args):
        print(df_crashes_introduced)
        print()

        if xml_report is not None:
            cmdlines_crashes_introduced = get_command_lines_for_testcases(
                df_crashes_introduced["testcase"], xml_report, args.inject_cwd
            )
            df_crashes_introduced = pd.merge(
                df_crashes_introduced, cmdlines_crashes_introduced, on="testcase"
            )
    df_crashes_introduced.to_csv(OUTFILE_CRASHES, sep=";")

    if args.show_improvements and sum(mask_crash_fixed) > 0:
        df_crashes_fixed = df_merged[mask_crash_fixed][display_cols].reset_index(
            drop=True
        )
        df_crashes_fixed.to_csv(OUTFILE_CRASHES, mode="a", sep=";")
        print("---------------Testcases that fixed crashes---------------")
        print(df_crashes_fixed)
        print()

        if xml_report is not None:
            cmdlines_crashes_fixed = get_command_lines_for_testcases(
                df_crashes_fixed["testcase"], xml_report, args.inject_cwd
            )
            df_crashes_fixed = pd.merge(
                df_crashes_fixed, cmdlines_crashes_fixed, on="testcase"
            )
        df_crashes_fixed.to_csv(OUTFILE_CRASHES, mode="a", sep=";")

    # remove columns with ERRORs in any of the csv files before comparing the numerical columns
    mask_no_errors = (df_merged[col_curr] != "ERROR") & (df_merged[col_prev] != "ERROR")
    df_merged = df_merged[mask_no_errors].reset_index(drop=True)
@@ -115,7 +142,6 @@ def main(args):
        display_cols = ["testcase", col_curr, col_prev, col_diff]
        outfile = OUTFILE_SCORES.format(col.replace(" ", "_"))
        df_worse = df_merged[mask_worse][display_cols].reset_index(drop=True)
        df_worse.to_csv(outfile, sep=";")
        if sum(mask_worse) > 0:
            regressions_found = True
            print(
@@ -124,18 +150,107 @@ def main(args):
            print(df_worse)
            print()

            if xml_report is not None:
                cmdlines_worse = get_command_lines_for_testcases(
                    df_worse["testcase"], xml_report, args.inject_cwd
                )
                df_worse = pd.merge(df_worse, cmdlines_worse, on="testcase")
        df_worse.to_csv(outfile, sep=";")

        if args.show_improvements and sum(mask_better) > 0:
            df_better = df_merged[mask_better][display_cols].reset_index(drop=True)
            df_better.to_csv(outfile, mode="a", sep=";")
            print(
                f"---------------Testcases that got better wrt to {col}---------------"
            )
            print(df_better)
            print()

            if xml_report is not None:
                cmdlines_better = get_command_lines_for_testcases(
                    df_better["testcase"], xml_report, args.inject_cwd
                )
                df_better = pd.merge(df_better, cmdlines_better, on="testcase")
            df_better.to_csv(outfile, mode="a", sep=";")

    return int(regressions_found)


def get_command_lines_for_testcases(
    testcases: pd.Series, xml_report: pathlib.Path, cwd: pathlib.Path
) -> pd.DataFrame:
    testcase_elems = [
        e
        for _, e in ET.iterparse(xml_report)
        if e.tag == "testcase" and e.attrib["name"] in testcases.values
    ]

    cmdlines = {
        "testcase": [],
        "enc_cmd": [],
        "dec_cmd": [],
        "eid-xor_cmd": [],
        "netsim_cmd": [],
    }
    for elem in testcase_elems:
        testcase_name = elem.attrib["name"]
        enc_cmd = ""
        dec_cmd = ""
        eid_cmd = ""
        netsim_cmd = ""
        if (system_out := elem.find("system-out")) is not None:
            (
                enc_cmd,
                dec_cmd,
                eid_cmd,
                netsim_cmd,
            ) = extract_cmdlines(system_out.text, cwd)

        cmdlines["testcase"].append(testcase_name)
        cmdlines["enc_cmd"].append(enc_cmd)
        cmdlines["dec_cmd"].append(dec_cmd)
        cmdlines["eid-xor_cmd"].append(eid_cmd)
        cmdlines["netsim_cmd"].append(netsim_cmd)

    return pd.DataFrame(cmdlines)


def extract_cmdlines(text: str, cwd: pathlib.Path) -> list[str]:
    cmdlines = []
    for p in PATTERNS:
        m = re.search(p, text)
        if m is not None:
            cmdline = postprocess_cmdline(m.group(1), cwd)
            cmdlines.append(cmdline)
        else:
            cmdlines.append("")

    return cmdlines


def postprocess_cmdline(cmdline: str, cwd: pathlib.Path) -> str:
    cmdline_split = cmdline.split()
    cmdline_proc = []

    # change absolute paths into relative ones
    # remove the "quite" flag
    # for output and bitstream files only keep the filename
    for elem in cmdline_split:
        if elem == "-q":
            continue
        elif (elem_as_path := pathlib.Path(elem)).is_absolute():
            if elem_as_path.suffix == ".192" or (
                elem_as_path.suffix == ".wav"
                and cmdline_split.index(elem) == len(cmdline_split) - 1
            ):
                cmdline_proc.append(elem_as_path.name)
            else:
                cmdline_proc.append(str(elem_as_path.relative_to(cwd)))
        else:
            cmdline_proc.append(elem)

    return " ".join(cmdline_proc)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("csv_current")
@@ -147,6 +262,17 @@ if __name__ == "__main__":
        default=COLS_2_THRESHOLDS.keys(),
    )
    parser.add_argument("--show_improvements", action="store_true")
    parser.add_argument(
        "--xml_report",
        help="XMLxml_report report file from pytest run. Pass to add command lines to the output files.",
        default=None,
    )
    parser.add_argument(
        "--inject_cwd",
        help="Use this as cwd when pruning the long paths in the command lines. Debug option for testing.",
        default=pathlib.Path(__file__).parent.absolute(),
        type=pathlib.Path,
    )

    args = parser.parse_args()
    sys.exit(main(args))