Commit 1c4e9d8f authored by Jan Kiene's avatar Jan Kiene
Browse files

add script for collecting USAN errors from logs

parent fdcce24c
Loading
Loading
Loading
Loading
+106 −0
Original line number Diff line number Diff line
import pandas as pd
from xml.etree import ElementTree
import argparse
from enum import Enum
from typing import List
import re


class UsanError:
    def __init__(self, err: str, commandlines: dict):
        self.err = err
        self.commandlines = commandlines
        err_lines = err.split("\n")
        self.err_location = err_lines[0].strip().split(": runtime error:")[0]

    def __hash__(self):
        return hash(self.err_location)

    def __eq__(self, other):
        return self.err_location == other.err_location

    def __repr__(self):
        return f"<USAN err: {self.err_location}>"

    def to_dict(self) -> dict:
        return {
            "location": self.err_location,
            "traceback": "\n".join(self.err.split("\n")[1:]),
            **self.commandlines,
        }


def parse_commandlines_from_sysout(sysout: str) -> dict:
    commandlines = {
        "IVAS_cod": "",
        "IVAS_dec": "",
        "IVAS_rend": "",
        "ISAR_post_rend": "",
    }
    for line in sysout.splitlines():
        for exe in commandlines:
            # search for name of executable in line
            # it is repeated in the sanitizer traceback, hence the "not in" part
            if re.search(exe, line) is not None and "_start" not in line:
                assert commandlines[exe] == ""
                commandlines[exe] = line.strip()
                break

    return commandlines


def parse_errors_from_sysout(sysout: str) -> List[UsanError]:
    commandlines = parse_commandlines_from_sysout(sysout)
    errors = []

    class ParserState(Enum):
        OUT = 0
        IN = 1

    pattern = re.compile(r"(lib_.+|apps)\/(.*\.[ch]):(\d+):(\d+): runtime error:")

    state = ParserState.OUT
    accu = []
    for line in sysout.splitlines():
        m = re.match(pattern, line.strip())
        if m is not None:
            assert state == ParserState.OUT
            state = ParserState.IN
            accu = []

        if state == ParserState.IN:
            accu.append(line.strip())

        if line.strip().startswith("SUMMARY:"):
            assert state == ParserState.IN
            errors.append(UsanError("\n".join(accu), commandlines))
            state = ParserState.OUT

    return errors


def main(args):
    tree = ElementTree.parse(args.xml_report)
    root = tree.getroot()

    errors = []
    for tc in root[0].findall("testcase"):
        sysout = tc.find("system-out")
        assert sysout is not None

        sysout = sysout.text
        errors.extend(parse_errors_from_sysout(sysout))

    unique_errors = list(set(errors))

    df = pd.DataFrame([e.to_dict() for e in unique_errors])
    df.to_csv(args.outfile, index=False)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("xml_report")
    parser.add_argument("outfile")

    args = parser.parse_args()
    main(args)