Merge branch 'kiene/ubsan-error-reporting' into 'main' (0afaaf6c) · Commits · IVAS Codec Public Collaboration / IVAS Codec

scripts/parse_sanitizer_errors_from_xml_report.py

0 → 100644

+291 −0

Original line number	Diff line number	Diff line
		#!/usr/env python3

		from numpy import trace
		import pandas as pd
		from xml.etree import ElementTree
		import argparse
		from enum import Enum
		from typing import List, Tuple
		import re
		import os
		from pathlib import Path
		import logging


		logging.basicConfig(level=logging.INFO)


		class SanitizerError:
		SUMMARY_ID = ""

		def __init__(
		self, traceback: str, commandlines: dict, testcase: str, cwd: Path = Path(".")
		) -> None:
		self.traceback = traceback
		self.commandlines = commandlines
		self.testcase = testcase
		self.type, self.location = self.parse_type_and_location(traceback, cwd)

		def __hash__(self):
		return hash(self.location)

		def __eq__(self, other):
		return self.location == other.location

		def __repr__(self):
		return f"<{self.__class__.__name__} at {self.location}>"

		def __lt__(self, other):
		# order by string comparison of location as first criterion
		# if location is the same in both instances, the smaller one is the one with more found command lines
		if self.location != other.location:
		return self.location < other.location
		else:
		num_cmdl_self = list(self.commandlines.values()).count("")
		num_cmdl_other = list(other.commandlines.values()).count("")
		return num_cmdl_self > num_cmdl_other

		def to_dict(self) -> dict:
		return {
		"testcase": self.testcase,
		"sanitizer": self.__class__.__name__.replace("Error", "").upper(),
		"location": self.location,
		"type": self.type,
		"traceback": self.traceback,
		**self.commandlines,
		}

		def parse_type_and_location(self, traceback, cwd) -> Tuple[str, str]:
		last_line = traceback.split("\n")[-1].strip()
		assert last_line.startswith(f"SUMMARY: {self.SUMMARY_ID}")
		m = re.match(
		r"SUMMARY: " + self.SUMMARY_ID + r": ([a-z-]) (.\/.*\.[ch]:\d+:\d+) in",
		last_line,
		)
		assert m is not None

		type, location = m.groups()

		if Path(location).is_absolute():
		location = str(Path(location).relative_to(cwd))
		return type, location


		class UsanError(SanitizerError):
		SUMMARY_ID = "UndefinedBehaviorSanitizer"


		class MsanError(SanitizerError):
		SUMMARY_ID = "MemorySanitizer"


		class AsanError(SanitizerError):
		SUMMARY_ID = "AddressSanitizer"

		def parse_type_and_location(self, traceback, cwd) -> Tuple[str, str]:
		first_line = traceback.split("\n")[0].strip()

		type = ""
		location = ""
		if "AddressSanitizer" in first_line:
		last_line = traceback.split("\n")[-1].strip()
		assert last_line.startswith(f"SUMMARY: {self.SUMMARY_ID}")
		m = re.match(
		r"SUMMARY: "
		+ self.SUMMARY_ID
		+ r": ([a-z-]) (.\/.*\.[ch]:\d+:\d+) in",
		last_line,
		)
		assert m is not None

		type, location = m.groups()
		elif "LeakSanitizer" in first_line:
		type = "memory leaks"

		# for location, we just pick from the first leak, even if there are more in there
		# perfect accurac not needed here
		for line in traceback.split("\n"):
		# this assumes that number #0 always is the executable itself and has no file associated
		if line.strip().startswith("#1"):
		location = line.split()[-1]
		break
		else:
		raise NotImplementedError("Unknown Asan type")

		if Path(location).is_absolute():
		location = str(Path(location).relative_to(cwd))
		return type, location


		def parse_commandlines_from_sysout(sysout: str, cwd: Path) -> dict:
		commandlines = {
		"IVAS_cod": "",
		"networkSimulator_g192": "",
		"eid-xor": "",
		"IVAS_dec": "",
		"IVAS_rend": "",
		"ISAR_post_rend": "",
		}
		for line in sysout.splitlines():
		for exe in commandlines:
		# search for name of executable in line
		# it is repeated in the sanitizer traceback, hence the "not in" part
		# the "not at the start" condition is for eid-xor (there are also lines like this: "eid-xor command:")
		# the "does not contain CalledProcessError" is for the renderer tests
		if (
		re.search(exe, line) is not None
		and " in _start " not in line
		and not line.strip().startswith(exe)
		and "CalledProcessError" not in line
		):
		if commandlines[exe] != "":
		logging.debug(
		f"Commandline for {exe} already found, skip second one."
		)
		else:
		commandlines[exe] = postprocess_cmdline(line.strip(), cwd, exe)

		# assumption: only one commandline per line
		break

		return commandlines


		def postprocess_cmdline(cmdline: str, cwd: Path, exe: str) -> str:
		# only use line with commandline from the token that includes the exe name
		# reason again the renderer tests...
		idx = 0
		for elem in cmdline.split():
		if exe in elem:
		idx = cmdline.index(elem)

		cmdline_split = cmdline[idx:].split()
		cmdline_proc = []

		# change absolute paths into relative ones
		# remove the "quite" flag
		# for output and bitstream files only keep the filename
		for elem in cmdline_split:
		if elem == "-q":
		continue
		elif (elem_as_path := Path(elem)).is_absolute():
		if (
		elem_as_path.suffix == ".192"
		or elem_as_path.suffix == ".netsimtrace"
		or (
		elem_as_path.suffix == ".wav"
		and cmdline_split.index(elem) == len(cmdline_split) - 1
		)
		):
		cmdline_proc.append(elem_as_path.name)
		else:
		cmdline_proc.append(str(elem_as_path.relative_to(cwd)))
		else:
		cmdline_proc.append(elem)

		return " ".join(cmdline_proc)


		def parse_errors_from_sysout(
		sysout: str, testcase_name: str, cwd: Path
		) -> List[UsanError]:
		logging.debug(testcase_name)
		commandlines = parse_commandlines_from_sysout(sysout, cwd)
		errors = []

		class ParserState(Enum):
		OUT = 0
		IN = 1

		pattern_usan = re.compile(r"(lib_.+\|apps)\/(.*\.[ch]):(\d+):(\d+): runtime error:")
		pattern_msan = re.compile(r" MemorySanitizer: ")
		pattern_asan = re.compile(r"==\d+==ERROR: .+Sanitizer: ")

		state = ParserState.OUT
		accu = []
		err_cls = None
		for l in sysout.splitlines():
		# hack for the weird renderer cases
		line = l.removeprefix("E")

		line = line.strip()

		m_usan = re.search(pattern_usan, line)
		m_msan = re.search(pattern_msan, line)
		m_asan = re.search(pattern_asan, line)

		usan_start_found = m_usan is not None
		msan_start_found = m_msan is not None and not line.startswith("SUMMARY:")
		asan_start_found = m_asan is not None

		matches_found = sum([usan_start_found, msan_start_found, asan_start_found])
		assert matches_found <= 1

		if matches_found > 0:
		assert state == ParserState.OUT
		state = ParserState.IN
		accu = []
		err_cls = (
		UsanError
		if m_usan is not None
		else MsanError
		if m_msan is not None
		else AsanError
		)

		if state == ParserState.IN:
		accu.append(line)

		if line.startswith("SUMMARY:"):
		assert state == ParserState.IN

		errors.append(err_cls("\n".join(accu), commandlines, testcase_name, cwd))
		state = ParserState.OUT

		return errors


		def main(args):
		tree = ElementTree.parse(args.xml_report)
		root = tree.getroot()

		errors = []
		for tc in root[0].findall("testcase"):
		tc_name = tc.attrib["name"]
		errors_found = []
		for sysout in tc.findall("system-out"):
		errors_found.extend(
		parse_errors_from_sysout(sysout.text, tc_name, args.inject_cwd)
		)
		# hack for weird renderer tests that don't play the same game as everyone else...
		# if we don't find anything in system-out, try in failure
		if len(errors_found) > 0:
		errors.extend(errors_found)
		continue

		for failure in tc.findall("failure"):
		errors_found.extend(
		parse_errors_from_sysout(failure.text, tc_name, args.inject_cwd)
		)
		errors.extend(errors_found)

		unique_errors = list(sorted(set(sorted(errors))))
		print(f"Found {len(unique_errors)} unique errors")

		df = pd.DataFrame([e.to_dict() for e in unique_errors])
		df.to_csv(args.outfile, index=False)


		if __name__ == "__main__":
		parser = argparse.ArgumentParser()
		parser.add_argument("xml_report")
		parser.add_argument("outfile")
		parser.add_argument(
		"--inject_cwd",
		help="Use this as cwd when pruning the long paths in the command lines. Debug option for testing.",
		default=Path(os.getcwd()).absolute(),
		type=Path,
		)

		args = parser.parse_args()
		main(args)

scripts/ubsan.supp

+1 −0

Original line number	Diff line number	Diff line
		@@ -32,6 +32,7 @@ implicit-signed-integer-truncation:lib_dec.c
		implicit-signed-integer-truncation:longarith.c
		implicit-signed-integer-truncation:tcq_position_arith.c
		implicit-signed-integer-truncation:tools.c
		implicit-signed-integer-truncation:ivas_objectRenderer_hrFilt.c
		shift-base:basop32.c
		shift-base:enh40.c
		shift-base:enh40.h