Loading scripts/parse_usan_errors_from_xml_report.py 0 → 100644 +106 −0 Original line number Diff line number Diff line import pandas as pd from xml.etree import ElementTree import argparse from enum import Enum from typing import List import re class UsanError: def __init__(self, err: str, commandlines: dict): self.err = err self.commandlines = commandlines err_lines = err.split("\n") self.err_location = err_lines[0].strip().split(": runtime error:")[0] def __hash__(self): return hash(self.err_location) def __eq__(self, other): return self.err_location == other.err_location def __repr__(self): return f"<USAN err: {self.err_location}>" def to_dict(self) -> dict: return { "location": self.err_location, "traceback": "\n".join(self.err.split("\n")[1:]), **self.commandlines, } def parse_commandlines_from_sysout(sysout: str) -> dict: commandlines = { "IVAS_cod": "", "IVAS_dec": "", "IVAS_rend": "", "ISAR_post_rend": "", } for line in sysout.splitlines(): for exe in commandlines: # search for name of executable in line # it is repeated in the sanitizer traceback, hence the "not in" part if re.search(exe, line) is not None and "_start" not in line: assert commandlines[exe] == "" commandlines[exe] = line.strip() break return commandlines def parse_errors_from_sysout(sysout: str) -> List[UsanError]: commandlines = parse_commandlines_from_sysout(sysout) errors = [] class ParserState(Enum): OUT = 0 IN = 1 pattern = re.compile(r"(lib_.+|apps)\/(.*\.[ch]):(\d+):(\d+): runtime error:") state = ParserState.OUT accu = [] for line in sysout.splitlines(): m = re.match(pattern, line.strip()) if m is not None: assert state == ParserState.OUT state = ParserState.IN accu = [] if state == ParserState.IN: accu.append(line.strip()) if line.strip().startswith("SUMMARY:"): assert state == ParserState.IN errors.append(UsanError("\n".join(accu), commandlines)) state = ParserState.OUT return errors def main(args): tree = ElementTree.parse(args.xml_report) root = tree.getroot() errors = [] for tc in root[0].findall("testcase"): sysout = tc.find("system-out") assert sysout is not None sysout = sysout.text errors.extend(parse_errors_from_sysout(sysout)) unique_errors = list(set(errors)) df = pd.DataFrame([e.to_dict() for e in unique_errors]) df.to_csv(args.outfile, index=False) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("xml_report") parser.add_argument("outfile") args = parser.parse_args() main(args) Loading
scripts/parse_usan_errors_from_xml_report.py 0 → 100644 +106 −0 Original line number Diff line number Diff line import pandas as pd from xml.etree import ElementTree import argparse from enum import Enum from typing import List import re class UsanError: def __init__(self, err: str, commandlines: dict): self.err = err self.commandlines = commandlines err_lines = err.split("\n") self.err_location = err_lines[0].strip().split(": runtime error:")[0] def __hash__(self): return hash(self.err_location) def __eq__(self, other): return self.err_location == other.err_location def __repr__(self): return f"<USAN err: {self.err_location}>" def to_dict(self) -> dict: return { "location": self.err_location, "traceback": "\n".join(self.err.split("\n")[1:]), **self.commandlines, } def parse_commandlines_from_sysout(sysout: str) -> dict: commandlines = { "IVAS_cod": "", "IVAS_dec": "", "IVAS_rend": "", "ISAR_post_rend": "", } for line in sysout.splitlines(): for exe in commandlines: # search for name of executable in line # it is repeated in the sanitizer traceback, hence the "not in" part if re.search(exe, line) is not None and "_start" not in line: assert commandlines[exe] == "" commandlines[exe] = line.strip() break return commandlines def parse_errors_from_sysout(sysout: str) -> List[UsanError]: commandlines = parse_commandlines_from_sysout(sysout) errors = [] class ParserState(Enum): OUT = 0 IN = 1 pattern = re.compile(r"(lib_.+|apps)\/(.*\.[ch]):(\d+):(\d+): runtime error:") state = ParserState.OUT accu = [] for line in sysout.splitlines(): m = re.match(pattern, line.strip()) if m is not None: assert state == ParserState.OUT state = ParserState.IN accu = [] if state == ParserState.IN: accu.append(line.strip()) if line.strip().startswith("SUMMARY:"): assert state == ParserState.IN errors.append(UsanError("\n".join(accu), commandlines)) state = ParserState.OUT return errors def main(args): tree = ElementTree.parse(args.xml_report) root = tree.getroot() errors = [] for tc in root[0].findall("testcase"): sysout = tc.find("system-out") assert sysout is not None sysout = sysout.text errors.extend(parse_errors_from_sysout(sysout)) unique_errors = list(set(errors)) df = pd.DataFrame([e.to_dict() for e in unique_errors]) df.to_csv(args.outfile, index=False) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("xml_report") parser.add_argument("outfile") args = parser.parse_args() main(args)