Unverified Commit 3a4fc5e3 authored by norvell's avatar norvell
Browse files

Change mld output file from detect_regressions.py

parent 268a18d1
Loading
Loading
Loading
Loading
Loading
+58 −17
Original line number Diff line number Diff line
@@ -6,7 +6,13 @@ import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def main(logs_dir, output_filename, measure, days):

def main(args):

    logs_dir = args.logs_dir
    output_filename = args.output_filename
    measure = args.measure
    days = args.days

    input_path = Path(logs_dir)
    logs = [f for f in input_path.iterdir() if f.is_dir()]
@@ -39,14 +45,18 @@ def main(logs_dir, output_filename, measure, days):
    for date, jobs in logdict.items():
        for job, testcases in jobs.items():
            for testcase, value in testcases.items():
                csv_rows.append((job, testcase, formatdict[date][job][testcase], date, value))
                csv_rows.append(
                    (job, testcase, formatdict[date][job][testcase], date, value)
                )

    result = pd.DataFrame(csv_rows, columns=["job", "testcase", "format", "date", "value"])
    result = pd.DataFrame(
        csv_rows, columns=["job", "testcase", "format", "date", "value"]
    )
    result = result.pivot(
        index=["job", "testcase", "format"], columns="date", values="value"
    ).reset_index()

    # Keep only tests for which results exist in the last run
    # Keep only tests for which results exist in any of the days
    if days == -1:
        rng = result.columns[3:]  # Whole history
    else:
@@ -66,24 +76,26 @@ def main(logs_dir, output_filename, measure, days):
    values = result.iloc[:, 3:]
    date = values.columns

    formats = result['format'].dropna().unique().tolist()
    formats = result["format"].dropna().unique().tolist()

    plotdata = pd.DataFrame(0.0, index=formats, columns=dates[-(days+1):-1])
    plottext = pd.DataFrame("", index=formats, columns=dates[-(days+1):-1])
    plotdata = pd.DataFrame(0.0, index=formats, columns=dates[-(days) : -1])
    plottext = pd.DataFrame("", index=formats, columns=dates[-(days) : -1])

    all_indices = []

    for i in range(days):
        currdate = dates[-(days-i+1)] # Make robust for shorter history
        prevdate = dates[-(days-i+2)]
        currdate = dates[-(days - i)]  # Make robust for shorter history
        prevdate = dates[-(days - i + 1)]

        idx = ratio.groupby("format")[currdate].idxmax()
        all_indices.append(idx)
        all_indices.extend(idx.tolist())

        # Store worst case per format for plotting
        for f in formats:
            plotdata.loc[f, currdate] = ratio.iloc[idx[f]][currdate]
            plottext.loc[f, currdate] = f"Job: {result.iloc[idx[f]]['job']}<br>Testcase: {result.iloc[idx[f]]['testcase']} <br>Max {measure} ratio: {ratio.iloc[idx[f]][currdate]:.2f}<br>Date: {currdate}"
            plottext.loc[f, currdate] = (
                f"Job: {result.iloc[idx[f]]['job']}<br>Testcase: {result.iloc[idx[f]]['testcase']} <br>Max {measure} ratio: {ratio.iloc[idx[f]][currdate]:.2f}<br>Date: {currdate}"
            )

    fig = make_subplots(
        rows=5,
@@ -122,6 +134,32 @@ def main(logs_dir, output_filename, measure, days):
    # Save to html
    fig.write_html(output_filename)

    # Write CSV-file
    if args.csv:
        output = result.iloc[all_indices].copy()
        cols = ["job","testcase","format"]
        cols.extend(date[-days:].tolist())
        output = output.loc[:,cols]
        values = output.iloc[:, 3:]
        last_date = values.columns[-1]
        output.insert(3, "min_date", values.idxmin(axis=1))
        output.insert(4, "min_sha", output["min_date"].map(sha))
        output.insert(5, "curr_value", output[last_date])
        output.insert(6, "min_value", values.min(axis=1))
        output.insert(7, "diff", output["curr_value"] - output["min_value"])
        output.insert(8, "ratio", output["curr_value"] / output["min_value"])
        output.loc[output["min_value"] == 0, "ratio"] = (
            1  # Set ratio to 1 for denominator 0
        )
        output["min_sha"] = (
            "'" + output["min_sha"]
        )  # Add apostrophy to prevent Excel reading this as a number
        output.sort_values(
            by=["format", "ratio"], ascending=[True, False], inplace=True
        )
        output.to_csv(args.csv, sep=";", index=False)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="logs dir")
    parser.add_argument(
@@ -146,8 +184,11 @@ if __name__ == "__main__":
        help="Number of days in history, (default: whole history)",
        default=-1,
    )
    parser.add_argument(
        "--csv",
        type=str,
        help="CSV output file",
    )

    args = parser.parse_args()

    main(args.logs_dir, args.output_filename, args.measure, args.days)
    main(args)