From 65c6d89216307ec1728657a883c7a33f922d9027 Mon Sep 17 00:00:00 2001 From: Jan Kiene Date: Wed, 8 Jan 2025 16:49:41 +0100 Subject: [PATCH 1/4] add parsing of command lines into the check_for_changes script --- .../basop_check_for_changes_in_testcases.py | 87 ++++++++++++++++++- 1 file changed, 83 insertions(+), 4 deletions(-) diff --git a/scripts/basop_check_for_changes_in_testcases.py b/scripts/basop_check_for_changes_in_testcases.py index e4116cb76d..0942bbb537 100644 --- a/scripts/basop_check_for_changes_in_testcases.py +++ b/scripts/basop_check_for_changes_in_testcases.py @@ -35,6 +35,9 @@ import argparse import sys import os import pathlib +import re +import xml.etree.ElementTree as ET +from typing import Tuple # set positive threshold for "lower is better" metrics, negative for "higher is better" @@ -48,8 +51,12 @@ COLS_2_THRESHOLDS = { OUTFILE_CRASHES = "changes_crashes.csv" OUTFILE_SCORES = "changes_{}.csv" +PATTERN_ENC = r"... encoder command:\s*(.*)\s*... encoder stdout:" +PATTERN_DEC = r"... decoder command:\s*(.*)\s*... decoder stdout:" + def main(args): + xml_report = args.xml_report df_curr = pd.read_csv(args.csv_current, sep=";") df_prev = pd.read_csv(args.csv_previous, sep=";") df_merged = pd.merge(df_curr, df_prev, on="testcase", suffixes=["-curr", "-prev"]) @@ -77,7 +84,6 @@ def main(args): df_crashes_introduced = df_merged[mask_crash_introduced][display_cols].reset_index( drop=True ) - df_crashes_introduced.to_csv(OUTFILE_CRASHES, sep=";") if sum(mask_crash_introduced) > 0: regressions_found = True @@ -85,15 +91,32 @@ def main(args): print(df_crashes_introduced) print() + if xml_report is not None: + cmdlines_crashes_introduced = get_command_lines_for_testcases( + df_crashes_introduced["testcase"], xml_report + ) + df_crashes_introduced = pd.merge( + df_crashes_introduced, cmdlines_crashes_introduced, on="testcase" + ) + df_crashes_introduced.to_csv(OUTFILE_CRASHES, sep=";") + if args.show_improvements and sum(mask_crash_fixed) > 0: df_crashes_fixed = df_merged[mask_crash_fixed][display_cols].reset_index( drop=True ) - df_crashes_fixed.to_csv(OUTFILE_CRASHES, mode="a", sep=";") print("---------------Testcases that fixed crashes---------------") print(df_crashes_fixed) print() + if xml_report is not None: + cmdlines_crashes_fixed = get_command_lines_for_testcases( + df_crashes_fixed["testcase"], xml_report + ) + df_crashes_fixed = pd.merge( + df_crashes_fixed, cmdlines_crashes_fixed, on="testcase" + ) + df_crashes_fixed.to_csv(OUTFILE_CRASHES, mode="a", sep=";") + # remove columns with ERRORs in any of the csv files before comparing the numerical columns mask_no_errors = (df_merged[col_curr] != "ERROR") & (df_merged[col_prev] != "ERROR") df_merged = df_merged[mask_no_errors].reset_index(drop=True) @@ -115,7 +138,6 @@ def main(args): display_cols = ["testcase", col_curr, col_prev, col_diff] outfile = OUTFILE_SCORES.format(col.replace(" ", "_")) df_worse = df_merged[mask_worse][display_cols].reset_index(drop=True) - df_worse.to_csv(outfile, sep=";") if sum(mask_worse) > 0: regressions_found = True print( @@ -124,18 +146,70 @@ def main(args): print(df_worse) print() + if xml_report is not None: + cmdlines_worse = get_command_lines_for_testcases( + df_worse["testcase"], xml_report + ) + df_worse = pd.merge(df_worse, cmdlines_worse, on="testcase") + df_worse.to_csv(outfile, sep=";") + if args.show_improvements and sum(mask_better) > 0: df_better = df_merged[mask_better][display_cols].reset_index(drop=True) - df_better.to_csv(outfile, mode="a", sep=";") print( f"---------------Testcases that got better wrt to {col}---------------" ) print(df_better) print() + if xml_report is not None: + cmdlines_better = get_command_lines_for_testcases( + df_better["testcase"], xml_report + ) + df_better = pd.merge(df_better, cmdlines_better, on="testcase") + df_better.to_csv(outfile, mode="a", sep=";") + return int(regressions_found) +def get_command_lines_for_testcases( + testcases: pd.Series, xml_report: pathlib.Path +) -> pd.DataFrame: + testcase_elems = [ + e + for _, e in ET.iterparse(xml_report) + if e.tag == "testcase" and e.attrib["name"] in testcases.values + ] + + cmdlines = {"testcase": [], "enc_cmd": [], "dec_cmd": []} + for elem in testcase_elems: + testcase_name = elem.attrib["name"] + enc_cmd = "" + dec_cmd = "" + if (system_out := elem.find("system-out")) is not None: + enc_cmd, dec_cmd = extract_cmdlines(system_out.text) + + cmdlines["testcase"].append(testcase_name) + cmdlines["enc_cmd"].append(enc_cmd) + cmdlines["dec_cmd"].append(dec_cmd) + + return pd.DataFrame(cmdlines) + + +def extract_cmdlines(text: str) -> Tuple[str, str]: + enc_cmdline = "" + dec_cmdline = "" + + match_enc = re.search(PATTERN_ENC, text) + match_dec = re.search(PATTERN_DEC, text) + if match_enc is not None and match_dec is not None: + enc_cmdline = match_enc.group(1) + dec_cmdline = match_dec.group(1) + + # TODO: post-process paths + + return enc_cmdline, dec_cmdline + + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("csv_current") @@ -147,6 +221,11 @@ if __name__ == "__main__": default=COLS_2_THRESHOLDS.keys(), ) parser.add_argument("--show_improvements", action="store_true") + parser.add_argument( + "--xml_report", + help="XMLxml_report report file from pytest run. Pass to add command lines to the output files.", + default=None, + ) args = parser.parse_args() sys.exit(main(args)) -- GitLab From 36824cdea534df54cc4115ec214b8a03614ac7dc Mon Sep 17 00:00:00 2001 From: Jan Kiene Date: Wed, 8 Jan 2025 17:23:59 +0100 Subject: [PATCH 2/4] normalize paths in command lines --- .../basop_check_for_changes_in_testcases.py | 48 +++++++++++++++---- 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/scripts/basop_check_for_changes_in_testcases.py b/scripts/basop_check_for_changes_in_testcases.py index 0942bbb537..5d904fee2d 100644 --- a/scripts/basop_check_for_changes_in_testcases.py +++ b/scripts/basop_check_for_changes_in_testcases.py @@ -93,7 +93,7 @@ def main(args): if xml_report is not None: cmdlines_crashes_introduced = get_command_lines_for_testcases( - df_crashes_introduced["testcase"], xml_report + df_crashes_introduced["testcase"], xml_report, args.inject_cwd ) df_crashes_introduced = pd.merge( df_crashes_introduced, cmdlines_crashes_introduced, on="testcase" @@ -110,7 +110,7 @@ def main(args): if xml_report is not None: cmdlines_crashes_fixed = get_command_lines_for_testcases( - df_crashes_fixed["testcase"], xml_report + df_crashes_fixed["testcase"], xml_report, args.inject_cwd ) df_crashes_fixed = pd.merge( df_crashes_fixed, cmdlines_crashes_fixed, on="testcase" @@ -148,7 +148,7 @@ def main(args): if xml_report is not None: cmdlines_worse = get_command_lines_for_testcases( - df_worse["testcase"], xml_report + df_worse["testcase"], xml_report, args.inject_cwd ) df_worse = pd.merge(df_worse, cmdlines_worse, on="testcase") df_worse.to_csv(outfile, sep=";") @@ -163,7 +163,7 @@ def main(args): if xml_report is not None: cmdlines_better = get_command_lines_for_testcases( - df_better["testcase"], xml_report + df_better["testcase"], xml_report, args.inject_cwd ) df_better = pd.merge(df_better, cmdlines_better, on="testcase") df_better.to_csv(outfile, mode="a", sep=";") @@ -172,7 +172,7 @@ def main(args): def get_command_lines_for_testcases( - testcases: pd.Series, xml_report: pathlib.Path + testcases: pd.Series, xml_report: pathlib.Path, cwd: pathlib.Path ) -> pd.DataFrame: testcase_elems = [ e @@ -186,7 +186,7 @@ def get_command_lines_for_testcases( enc_cmd = "" dec_cmd = "" if (system_out := elem.find("system-out")) is not None: - enc_cmd, dec_cmd = extract_cmdlines(system_out.text) + enc_cmd, dec_cmd = extract_cmdlines(system_out.text, cwd) cmdlines["testcase"].append(testcase_name) cmdlines["enc_cmd"].append(enc_cmd) @@ -195,7 +195,7 @@ def get_command_lines_for_testcases( return pd.DataFrame(cmdlines) -def extract_cmdlines(text: str) -> Tuple[str, str]: +def extract_cmdlines(text: str, cwd: pathlib.Path) -> Tuple[str, str]: enc_cmdline = "" dec_cmdline = "" @@ -205,11 +205,37 @@ def extract_cmdlines(text: str) -> Tuple[str, str]: enc_cmdline = match_enc.group(1) dec_cmdline = match_dec.group(1) - # TODO: post-process paths + enc_cmdline = postprocess_cmdline(enc_cmdline, cwd) + dec_cmdline = postprocess_cmdline(dec_cmdline, cwd) return enc_cmdline, dec_cmdline +def postprocess_cmdline(cmdline: str, cwd: pathlib.Path) -> str: + cmdline_split = cmdline.split() + cmdline_proc = [] + + # change absolute paths into relative ones + # remove the "quite" flag + # for output and bitstream files only keep the filename + for elem in cmdline_split: + print(elem) + if elem == "-q": + continue + elif (elem_as_path := pathlib.Path(elem)).is_absolute(): + if elem_as_path.suffix == ".192" or ( + elem_as_path.suffix == ".wav" + and cmdline_split.index(elem) == len(cmdline_split) - 1 + ): + cmdline_proc.append(elem_as_path.name) + else: + cmdline_proc.append(str(elem_as_path.relative_to(cwd))) + else: + cmdline_proc.append(elem) + + return " ".join(cmdline_proc) + + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("csv_current") @@ -226,6 +252,12 @@ if __name__ == "__main__": help="XMLxml_report report file from pytest run. Pass to add command lines to the output files.", default=None, ) + parser.add_argument( + "--inject_cwd", + help="Use this as cwd when pruning the long paths in the command lines. Debug option for testing.", + default=pathlib.Path(__file__).parent.absolute(), + type=pathlib.Path, + ) args = parser.parse_args() sys.exit(main(args)) -- GitLab From 366f249569a6ed229e174a6090fe36a532399702 Mon Sep 17 00:00:00 2001 From: Jan Kiene Date: Thu, 9 Jan 2025 11:29:59 +0100 Subject: [PATCH 3/4] remove leftover print from debugging --- scripts/basop_check_for_changes_in_testcases.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/basop_check_for_changes_in_testcases.py b/scripts/basop_check_for_changes_in_testcases.py index 5d904fee2d..ed67aa279d 100644 --- a/scripts/basop_check_for_changes_in_testcases.py +++ b/scripts/basop_check_for_changes_in_testcases.py @@ -219,7 +219,6 @@ def postprocess_cmdline(cmdline: str, cwd: pathlib.Path) -> str: # remove the "quite" flag # for output and bitstream files only keep the filename for elem in cmdline_split: - print(elem) if elem == "-q": continue elif (elem_as_path := pathlib.Path(elem)).is_absolute(): -- GitLab From 0696cb4a9e3ca61cc7347f369fc69b4bae9f1f05 Mon Sep 17 00:00:00 2001 From: Jan Kiene Date: Mon, 13 Jan 2025 17:11:33 +0100 Subject: [PATCH 4/4] add capturing of eid-xor and network sim cmdlines --- .../basop_check_for_changes_in_testcases.py | 46 +++++++++++++------ 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/scripts/basop_check_for_changes_in_testcases.py b/scripts/basop_check_for_changes_in_testcases.py index ed67aa279d..c7c2e0f785 100644 --- a/scripts/basop_check_for_changes_in_testcases.py +++ b/scripts/basop_check_for_changes_in_testcases.py @@ -53,6 +53,10 @@ OUTFILE_SCORES = "changes_{}.csv" PATTERN_ENC = r"... encoder command:\s*(.*)\s*... encoder stdout:" PATTERN_DEC = r"... decoder command:\s*(.*)\s*... decoder stdout:" +PATTERN_EID = r"eid-xor command:\s*(.*)\s*" +PATTERN_NETSIM = r"netsim command:\s*(.*)\s*" + +PATTERNS = [PATTERN_ENC, PATTERN_DEC, PATTERN_EID, PATTERN_NETSIM] def main(args): @@ -180,35 +184,47 @@ def get_command_lines_for_testcases( if e.tag == "testcase" and e.attrib["name"] in testcases.values ] - cmdlines = {"testcase": [], "enc_cmd": [], "dec_cmd": []} + cmdlines = { + "testcase": [], + "enc_cmd": [], + "dec_cmd": [], + "eid-xor_cmd": [], + "netsim_cmd": [], + } for elem in testcase_elems: testcase_name = elem.attrib["name"] enc_cmd = "" dec_cmd = "" + eid_cmd = "" + netsim_cmd = "" if (system_out := elem.find("system-out")) is not None: - enc_cmd, dec_cmd = extract_cmdlines(system_out.text, cwd) + ( + enc_cmd, + dec_cmd, + eid_cmd, + netsim_cmd, + ) = extract_cmdlines(system_out.text, cwd) cmdlines["testcase"].append(testcase_name) cmdlines["enc_cmd"].append(enc_cmd) cmdlines["dec_cmd"].append(dec_cmd) + cmdlines["eid-xor_cmd"].append(eid_cmd) + cmdlines["netsim_cmd"].append(netsim_cmd) return pd.DataFrame(cmdlines) -def extract_cmdlines(text: str, cwd: pathlib.Path) -> Tuple[str, str]: - enc_cmdline = "" - dec_cmdline = "" - - match_enc = re.search(PATTERN_ENC, text) - match_dec = re.search(PATTERN_DEC, text) - if match_enc is not None and match_dec is not None: - enc_cmdline = match_enc.group(1) - dec_cmdline = match_dec.group(1) - - enc_cmdline = postprocess_cmdline(enc_cmdline, cwd) - dec_cmdline = postprocess_cmdline(dec_cmdline, cwd) +def extract_cmdlines(text: str, cwd: pathlib.Path) -> list[str]: + cmdlines = [] + for p in PATTERNS: + m = re.search(p, text) + if m is not None: + cmdline = postprocess_cmdline(m.group(1), cwd) + cmdlines.append(cmdline) + else: + cmdlines.append("") - return enc_cmdline, dec_cmdline + return cmdlines def postprocess_cmdline(cmdline: str, cwd: pathlib.Path) -> str: -- GitLab