Loading scripts/parse_sanitizer_errors_from_xml_report.py 0 → 100644 +291 −0 Original line number Diff line number Diff line #!/usr/env python3 from numpy import trace import pandas as pd from xml.etree import ElementTree import argparse from enum import Enum from typing import List, Tuple import re import os from pathlib import Path import logging logging.basicConfig(level=logging.INFO) class SanitizerError: SUMMARY_ID = "" def __init__( self, traceback: str, commandlines: dict, testcase: str, cwd: Path = Path(".") ) -> None: self.traceback = traceback self.commandlines = commandlines self.testcase = testcase self.type, self.location = self.parse_type_and_location(traceback, cwd) def __hash__(self): return hash(self.location) def __eq__(self, other): return self.location == other.location def __repr__(self): return f"<{self.__class__.__name__} at {self.location}>" def __lt__(self, other): # order by string comparison of location as first criterion # if location is the same in both instances, the smaller one is the one with more found command lines if self.location != other.location: return self.location < other.location else: num_cmdl_self = list(self.commandlines.values()).count("") num_cmdl_other = list(other.commandlines.values()).count("") return num_cmdl_self > num_cmdl_other def to_dict(self) -> dict: return { "testcase": self.testcase, "sanitizer": self.__class__.__name__.replace("Error", "").upper(), "location": self.location, "type": self.type, "traceback": self.traceback, **self.commandlines, } def parse_type_and_location(self, traceback, cwd) -> Tuple[str, str]: last_line = traceback.split("\n")[-1].strip() assert last_line.startswith(f"SUMMARY: {self.SUMMARY_ID}") m = re.match( r"SUMMARY: " + self.SUMMARY_ID + r": ([a-z-]*) (.*\/.*\.[ch]:\d+:\d+) in", last_line, ) assert m is not None type, location = m.groups() if Path(location).is_absolute(): location = str(Path(location).relative_to(cwd)) return type, location class UsanError(SanitizerError): SUMMARY_ID = "UndefinedBehaviorSanitizer" class MsanError(SanitizerError): SUMMARY_ID = "MemorySanitizer" class AsanError(SanitizerError): SUMMARY_ID = "AddressSanitizer" def parse_type_and_location(self, traceback, cwd) -> Tuple[str, str]: first_line = traceback.split("\n")[0].strip() type = "" location = "" if "AddressSanitizer" in first_line: last_line = traceback.split("\n")[-1].strip() assert last_line.startswith(f"SUMMARY: {self.SUMMARY_ID}") m = re.match( r"SUMMARY: " + self.SUMMARY_ID + r": ([a-z-]*) (.*\/.*\.[ch]:\d+:\d+) in", last_line, ) assert m is not None type, location = m.groups() elif "LeakSanitizer" in first_line: type = "memory leaks" # for location, we just pick from the first leak, even if there are more in there # perfect accurac not needed here for line in traceback.split("\n"): # this assumes that number #0 always is the executable itself and has no file associated if line.strip().startswith("#1"): location = line.split()[-1] break else: raise NotImplementedError("Unknown Asan type") if Path(location).is_absolute(): location = str(Path(location).relative_to(cwd)) return type, location def parse_commandlines_from_sysout(sysout: str, cwd: Path) -> dict: commandlines = { "IVAS_cod": "", "networkSimulator_g192": "", "eid-xor": "", "IVAS_dec": "", "IVAS_rend": "", "ISAR_post_rend": "", } for line in sysout.splitlines(): for exe in commandlines: # search for name of executable in line # it is repeated in the sanitizer traceback, hence the "not in" part # the "not at the start" condition is for eid-xor (there are also lines like this: "eid-xor command:") # the "does not contain CalledProcessError" is for the renderer tests if ( re.search(exe, line) is not None and " in _start " not in line and not line.strip().startswith(exe) and "CalledProcessError" not in line ): if commandlines[exe] != "": logging.debug( f"Commandline for {exe} already found, skip second one." ) else: commandlines[exe] = postprocess_cmdline(line.strip(), cwd, exe) # assumption: only one commandline per line break return commandlines def postprocess_cmdline(cmdline: str, cwd: Path, exe: str) -> str: # only use line with commandline from the token that includes the exe name # reason again the renderer tests... idx = 0 for elem in cmdline.split(): if exe in elem: idx = cmdline.index(elem) cmdline_split = cmdline[idx:].split() cmdline_proc = [] # change absolute paths into relative ones # remove the "quite" flag # for output and bitstream files only keep the filename for elem in cmdline_split: if elem == "-q": continue elif (elem_as_path := Path(elem)).is_absolute(): if ( elem_as_path.suffix == ".192" or elem_as_path.suffix == ".netsimtrace" or ( elem_as_path.suffix == ".wav" and cmdline_split.index(elem) == len(cmdline_split) - 1 ) ): cmdline_proc.append(elem_as_path.name) else: cmdline_proc.append(str(elem_as_path.relative_to(cwd))) else: cmdline_proc.append(elem) return " ".join(cmdline_proc) def parse_errors_from_sysout( sysout: str, testcase_name: str, cwd: Path ) -> List[UsanError]: logging.debug(testcase_name) commandlines = parse_commandlines_from_sysout(sysout, cwd) errors = [] class ParserState(Enum): OUT = 0 IN = 1 pattern_usan = re.compile(r"(lib_.+|apps)\/(.*\.[ch]):(\d+):(\d+): runtime error:") pattern_msan = re.compile(r" MemorySanitizer: ") pattern_asan = re.compile(r"==\d+==ERROR: .+Sanitizer: ") state = ParserState.OUT accu = [] err_cls = None for l in sysout.splitlines(): # hack for the weird renderer cases line = l.removeprefix("E") line = line.strip() m_usan = re.search(pattern_usan, line) m_msan = re.search(pattern_msan, line) m_asan = re.search(pattern_asan, line) usan_start_found = m_usan is not None msan_start_found = m_msan is not None and not line.startswith("SUMMARY:") asan_start_found = m_asan is not None matches_found = sum([usan_start_found, msan_start_found, asan_start_found]) assert matches_found <= 1 if matches_found > 0: assert state == ParserState.OUT state = ParserState.IN accu = [] err_cls = ( UsanError if m_usan is not None else MsanError if m_msan is not None else AsanError ) if state == ParserState.IN: accu.append(line) if line.startswith("SUMMARY:"): assert state == ParserState.IN errors.append(err_cls("\n".join(accu), commandlines, testcase_name, cwd)) state = ParserState.OUT return errors def main(args): tree = ElementTree.parse(args.xml_report) root = tree.getroot() errors = [] for tc in root[0].findall("testcase"): tc_name = tc.attrib["name"] errors_found = [] for sysout in tc.findall("system-out"): errors_found.extend( parse_errors_from_sysout(sysout.text, tc_name, args.inject_cwd) ) # hack for weird renderer tests that don't play the same game as everyone else... # if we don't find anything in system-out, try in failure if len(errors_found) > 0: errors.extend(errors_found) continue for failure in tc.findall("failure"): errors_found.extend( parse_errors_from_sysout(failure.text, tc_name, args.inject_cwd) ) errors.extend(errors_found) unique_errors = list(sorted(set(sorted(errors)))) print(f"Found {len(unique_errors)} unique errors") df = pd.DataFrame([e.to_dict() for e in unique_errors]) df.to_csv(args.outfile, index=False) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("xml_report") parser.add_argument("outfile") parser.add_argument( "--inject_cwd", help="Use this as cwd when pruning the long paths in the command lines. Debug option for testing.", default=Path(os.getcwd()).absolute(), type=Path, ) args = parser.parse_args() main(args) scripts/ubsan.supp +1 −0 Original line number Diff line number Diff line Loading @@ -32,6 +32,7 @@ implicit-signed-integer-truncation:lib_dec.c implicit-signed-integer-truncation:longarith.c implicit-signed-integer-truncation:tcq_position_arith.c implicit-signed-integer-truncation:tools.c implicit-signed-integer-truncation:ivas_objectRenderer_hrFilt.c shift-base:basop32.c shift-base:enh40.c shift-base:enh40.h Loading Loading
scripts/parse_sanitizer_errors_from_xml_report.py 0 → 100644 +291 −0 Original line number Diff line number Diff line #!/usr/env python3 from numpy import trace import pandas as pd from xml.etree import ElementTree import argparse from enum import Enum from typing import List, Tuple import re import os from pathlib import Path import logging logging.basicConfig(level=logging.INFO) class SanitizerError: SUMMARY_ID = "" def __init__( self, traceback: str, commandlines: dict, testcase: str, cwd: Path = Path(".") ) -> None: self.traceback = traceback self.commandlines = commandlines self.testcase = testcase self.type, self.location = self.parse_type_and_location(traceback, cwd) def __hash__(self): return hash(self.location) def __eq__(self, other): return self.location == other.location def __repr__(self): return f"<{self.__class__.__name__} at {self.location}>" def __lt__(self, other): # order by string comparison of location as first criterion # if location is the same in both instances, the smaller one is the one with more found command lines if self.location != other.location: return self.location < other.location else: num_cmdl_self = list(self.commandlines.values()).count("") num_cmdl_other = list(other.commandlines.values()).count("") return num_cmdl_self > num_cmdl_other def to_dict(self) -> dict: return { "testcase": self.testcase, "sanitizer": self.__class__.__name__.replace("Error", "").upper(), "location": self.location, "type": self.type, "traceback": self.traceback, **self.commandlines, } def parse_type_and_location(self, traceback, cwd) -> Tuple[str, str]: last_line = traceback.split("\n")[-1].strip() assert last_line.startswith(f"SUMMARY: {self.SUMMARY_ID}") m = re.match( r"SUMMARY: " + self.SUMMARY_ID + r": ([a-z-]*) (.*\/.*\.[ch]:\d+:\d+) in", last_line, ) assert m is not None type, location = m.groups() if Path(location).is_absolute(): location = str(Path(location).relative_to(cwd)) return type, location class UsanError(SanitizerError): SUMMARY_ID = "UndefinedBehaviorSanitizer" class MsanError(SanitizerError): SUMMARY_ID = "MemorySanitizer" class AsanError(SanitizerError): SUMMARY_ID = "AddressSanitizer" def parse_type_and_location(self, traceback, cwd) -> Tuple[str, str]: first_line = traceback.split("\n")[0].strip() type = "" location = "" if "AddressSanitizer" in first_line: last_line = traceback.split("\n")[-1].strip() assert last_line.startswith(f"SUMMARY: {self.SUMMARY_ID}") m = re.match( r"SUMMARY: " + self.SUMMARY_ID + r": ([a-z-]*) (.*\/.*\.[ch]:\d+:\d+) in", last_line, ) assert m is not None type, location = m.groups() elif "LeakSanitizer" in first_line: type = "memory leaks" # for location, we just pick from the first leak, even if there are more in there # perfect accurac not needed here for line in traceback.split("\n"): # this assumes that number #0 always is the executable itself and has no file associated if line.strip().startswith("#1"): location = line.split()[-1] break else: raise NotImplementedError("Unknown Asan type") if Path(location).is_absolute(): location = str(Path(location).relative_to(cwd)) return type, location def parse_commandlines_from_sysout(sysout: str, cwd: Path) -> dict: commandlines = { "IVAS_cod": "", "networkSimulator_g192": "", "eid-xor": "", "IVAS_dec": "", "IVAS_rend": "", "ISAR_post_rend": "", } for line in sysout.splitlines(): for exe in commandlines: # search for name of executable in line # it is repeated in the sanitizer traceback, hence the "not in" part # the "not at the start" condition is for eid-xor (there are also lines like this: "eid-xor command:") # the "does not contain CalledProcessError" is for the renderer tests if ( re.search(exe, line) is not None and " in _start " not in line and not line.strip().startswith(exe) and "CalledProcessError" not in line ): if commandlines[exe] != "": logging.debug( f"Commandline for {exe} already found, skip second one." ) else: commandlines[exe] = postprocess_cmdline(line.strip(), cwd, exe) # assumption: only one commandline per line break return commandlines def postprocess_cmdline(cmdline: str, cwd: Path, exe: str) -> str: # only use line with commandline from the token that includes the exe name # reason again the renderer tests... idx = 0 for elem in cmdline.split(): if exe in elem: idx = cmdline.index(elem) cmdline_split = cmdline[idx:].split() cmdline_proc = [] # change absolute paths into relative ones # remove the "quite" flag # for output and bitstream files only keep the filename for elem in cmdline_split: if elem == "-q": continue elif (elem_as_path := Path(elem)).is_absolute(): if ( elem_as_path.suffix == ".192" or elem_as_path.suffix == ".netsimtrace" or ( elem_as_path.suffix == ".wav" and cmdline_split.index(elem) == len(cmdline_split) - 1 ) ): cmdline_proc.append(elem_as_path.name) else: cmdline_proc.append(str(elem_as_path.relative_to(cwd))) else: cmdline_proc.append(elem) return " ".join(cmdline_proc) def parse_errors_from_sysout( sysout: str, testcase_name: str, cwd: Path ) -> List[UsanError]: logging.debug(testcase_name) commandlines = parse_commandlines_from_sysout(sysout, cwd) errors = [] class ParserState(Enum): OUT = 0 IN = 1 pattern_usan = re.compile(r"(lib_.+|apps)\/(.*\.[ch]):(\d+):(\d+): runtime error:") pattern_msan = re.compile(r" MemorySanitizer: ") pattern_asan = re.compile(r"==\d+==ERROR: .+Sanitizer: ") state = ParserState.OUT accu = [] err_cls = None for l in sysout.splitlines(): # hack for the weird renderer cases line = l.removeprefix("E") line = line.strip() m_usan = re.search(pattern_usan, line) m_msan = re.search(pattern_msan, line) m_asan = re.search(pattern_asan, line) usan_start_found = m_usan is not None msan_start_found = m_msan is not None and not line.startswith("SUMMARY:") asan_start_found = m_asan is not None matches_found = sum([usan_start_found, msan_start_found, asan_start_found]) assert matches_found <= 1 if matches_found > 0: assert state == ParserState.OUT state = ParserState.IN accu = [] err_cls = ( UsanError if m_usan is not None else MsanError if m_msan is not None else AsanError ) if state == ParserState.IN: accu.append(line) if line.startswith("SUMMARY:"): assert state == ParserState.IN errors.append(err_cls("\n".join(accu), commandlines, testcase_name, cwd)) state = ParserState.OUT return errors def main(args): tree = ElementTree.parse(args.xml_report) root = tree.getroot() errors = [] for tc in root[0].findall("testcase"): tc_name = tc.attrib["name"] errors_found = [] for sysout in tc.findall("system-out"): errors_found.extend( parse_errors_from_sysout(sysout.text, tc_name, args.inject_cwd) ) # hack for weird renderer tests that don't play the same game as everyone else... # if we don't find anything in system-out, try in failure if len(errors_found) > 0: errors.extend(errors_found) continue for failure in tc.findall("failure"): errors_found.extend( parse_errors_from_sysout(failure.text, tc_name, args.inject_cwd) ) errors.extend(errors_found) unique_errors = list(sorted(set(sorted(errors)))) print(f"Found {len(unique_errors)} unique errors") df = pd.DataFrame([e.to_dict() for e in unique_errors]) df.to_csv(args.outfile, index=False) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("xml_report") parser.add_argument("outfile") parser.add_argument( "--inject_cwd", help="Use this as cwd when pruning the long paths in the command lines. Debug option for testing.", default=Path(os.getcwd()).absolute(), type=Path, ) args = parser.parse_args() main(args)
scripts/ubsan.supp +1 −0 Original line number Diff line number Diff line Loading @@ -32,6 +32,7 @@ implicit-signed-integer-truncation:lib_dec.c implicit-signed-integer-truncation:longarith.c implicit-signed-integer-truncation:tcq_position_arith.c implicit-signed-integer-truncation:tools.c implicit-signed-integer-truncation:ivas_objectRenderer_hrFilt.c shift-base:basop32.c shift-base:enh40.c shift-base:enh40.h Loading