Loading scripts/detect_regressions.py +58 −17 Original line number Diff line number Diff line Loading @@ -6,7 +6,13 @@ import pandas as pd import plotly.graph_objects as go from plotly.subplots import make_subplots def main(logs_dir, output_filename, measure, days): def main(args): logs_dir = args.logs_dir output_filename = args.output_filename measure = args.measure days = args.days input_path = Path(logs_dir) logs = [f for f in input_path.iterdir() if f.is_dir()] Loading Loading @@ -39,14 +45,18 @@ def main(logs_dir, output_filename, measure, days): for date, jobs in logdict.items(): for job, testcases in jobs.items(): for testcase, value in testcases.items(): csv_rows.append((job, testcase, formatdict[date][job][testcase], date, value)) csv_rows.append( (job, testcase, formatdict[date][job][testcase], date, value) ) result = pd.DataFrame(csv_rows, columns=["job", "testcase", "format", "date", "value"]) result = pd.DataFrame( csv_rows, columns=["job", "testcase", "format", "date", "value"] ) result = result.pivot( index=["job", "testcase", "format"], columns="date", values="value" ).reset_index() # Keep only tests for which results exist in the last run # Keep only tests for which results exist in any of the days if days == -1: rng = result.columns[3:] # Whole history else: Loading @@ -66,24 +76,26 @@ def main(logs_dir, output_filename, measure, days): values = result.iloc[:, 3:] date = values.columns formats = result['format'].dropna().unique().tolist() formats = result["format"].dropna().unique().tolist() plotdata = pd.DataFrame(0.0, index=formats, columns=dates[-(days+1):-1]) plottext = pd.DataFrame("", index=formats, columns=dates[-(days+1):-1]) plotdata = pd.DataFrame(0.0, index=formats, columns=dates[-(days) : -1]) plottext = pd.DataFrame("", index=formats, columns=dates[-(days) : -1]) all_indices = [] for i in range(days): currdate = dates[-(days-i+1)] # Make robust for shorter history prevdate = dates[-(days-i+2)] currdate = dates[-(days - i)] # Make robust for shorter history prevdate = dates[-(days - i + 1)] idx = ratio.groupby("format")[currdate].idxmax() all_indices.append(idx) all_indices.extend(idx.tolist()) # Store worst case per format for plotting for f in formats: plotdata.loc[f, currdate] = ratio.iloc[idx[f]][currdate] plottext.loc[f, currdate] = f"Job: {result.iloc[idx[f]]['job']}<br>Testcase: {result.iloc[idx[f]]['testcase']} <br>Max {measure} ratio: {ratio.iloc[idx[f]][currdate]:.2f}<br>Date: {currdate}" plottext.loc[f, currdate] = ( f"Job: {result.iloc[idx[f]]['job']}<br>Testcase: {result.iloc[idx[f]]['testcase']} <br>Max {measure} ratio: {ratio.iloc[idx[f]][currdate]:.2f}<br>Date: {currdate}" ) fig = make_subplots( rows=5, Loading Loading @@ -122,6 +134,32 @@ def main(logs_dir, output_filename, measure, days): # Save to html fig.write_html(output_filename) # Write CSV-file if args.csv: output = result.iloc[all_indices].copy() cols = ["job","testcase","format"] cols.extend(date[-days:].tolist()) output = output.loc[:,cols] values = output.iloc[:, 3:] last_date = values.columns[-1] output.insert(3, "min_date", values.idxmin(axis=1)) output.insert(4, "min_sha", output["min_date"].map(sha)) output.insert(5, "curr_value", output[last_date]) output.insert(6, "min_value", values.min(axis=1)) output.insert(7, "diff", output["curr_value"] - output["min_value"]) output.insert(8, "ratio", output["curr_value"] / output["min_value"]) output.loc[output["min_value"] == 0, "ratio"] = ( 1 # Set ratio to 1 for denominator 0 ) output["min_sha"] = ( "'" + output["min_sha"] ) # Add apostrophy to prevent Excel reading this as a number output.sort_values( by=["format", "ratio"], ascending=[True, False], inplace=True ) output.to_csv(args.csv, sep=";", index=False) if __name__ == "__main__": parser = argparse.ArgumentParser(description="logs dir") parser.add_argument( Loading @@ -146,8 +184,11 @@ if __name__ == "__main__": help="Number of days in history, (default: whole history)", default=-1, ) parser.add_argument( "--csv", type=str, help="CSV output file", ) args = parser.parse_args() main(args.logs_dir, args.output_filename, args.measure, args.days) main(args) Loading
scripts/detect_regressions.py +58 −17 Original line number Diff line number Diff line Loading @@ -6,7 +6,13 @@ import pandas as pd import plotly.graph_objects as go from plotly.subplots import make_subplots def main(logs_dir, output_filename, measure, days): def main(args): logs_dir = args.logs_dir output_filename = args.output_filename measure = args.measure days = args.days input_path = Path(logs_dir) logs = [f for f in input_path.iterdir() if f.is_dir()] Loading Loading @@ -39,14 +45,18 @@ def main(logs_dir, output_filename, measure, days): for date, jobs in logdict.items(): for job, testcases in jobs.items(): for testcase, value in testcases.items(): csv_rows.append((job, testcase, formatdict[date][job][testcase], date, value)) csv_rows.append( (job, testcase, formatdict[date][job][testcase], date, value) ) result = pd.DataFrame(csv_rows, columns=["job", "testcase", "format", "date", "value"]) result = pd.DataFrame( csv_rows, columns=["job", "testcase", "format", "date", "value"] ) result = result.pivot( index=["job", "testcase", "format"], columns="date", values="value" ).reset_index() # Keep only tests for which results exist in the last run # Keep only tests for which results exist in any of the days if days == -1: rng = result.columns[3:] # Whole history else: Loading @@ -66,24 +76,26 @@ def main(logs_dir, output_filename, measure, days): values = result.iloc[:, 3:] date = values.columns formats = result['format'].dropna().unique().tolist() formats = result["format"].dropna().unique().tolist() plotdata = pd.DataFrame(0.0, index=formats, columns=dates[-(days+1):-1]) plottext = pd.DataFrame("", index=formats, columns=dates[-(days+1):-1]) plotdata = pd.DataFrame(0.0, index=formats, columns=dates[-(days) : -1]) plottext = pd.DataFrame("", index=formats, columns=dates[-(days) : -1]) all_indices = [] for i in range(days): currdate = dates[-(days-i+1)] # Make robust for shorter history prevdate = dates[-(days-i+2)] currdate = dates[-(days - i)] # Make robust for shorter history prevdate = dates[-(days - i + 1)] idx = ratio.groupby("format")[currdate].idxmax() all_indices.append(idx) all_indices.extend(idx.tolist()) # Store worst case per format for plotting for f in formats: plotdata.loc[f, currdate] = ratio.iloc[idx[f]][currdate] plottext.loc[f, currdate] = f"Job: {result.iloc[idx[f]]['job']}<br>Testcase: {result.iloc[idx[f]]['testcase']} <br>Max {measure} ratio: {ratio.iloc[idx[f]][currdate]:.2f}<br>Date: {currdate}" plottext.loc[f, currdate] = ( f"Job: {result.iloc[idx[f]]['job']}<br>Testcase: {result.iloc[idx[f]]['testcase']} <br>Max {measure} ratio: {ratio.iloc[idx[f]][currdate]:.2f}<br>Date: {currdate}" ) fig = make_subplots( rows=5, Loading Loading @@ -122,6 +134,32 @@ def main(logs_dir, output_filename, measure, days): # Save to html fig.write_html(output_filename) # Write CSV-file if args.csv: output = result.iloc[all_indices].copy() cols = ["job","testcase","format"] cols.extend(date[-days:].tolist()) output = output.loc[:,cols] values = output.iloc[:, 3:] last_date = values.columns[-1] output.insert(3, "min_date", values.idxmin(axis=1)) output.insert(4, "min_sha", output["min_date"].map(sha)) output.insert(5, "curr_value", output[last_date]) output.insert(6, "min_value", values.min(axis=1)) output.insert(7, "diff", output["curr_value"] - output["min_value"]) output.insert(8, "ratio", output["curr_value"] / output["min_value"]) output.loc[output["min_value"] == 0, "ratio"] = ( 1 # Set ratio to 1 for denominator 0 ) output["min_sha"] = ( "'" + output["min_sha"] ) # Add apostrophy to prevent Excel reading this as a number output.sort_values( by=["format", "ratio"], ascending=[True, False], inplace=True ) output.to_csv(args.csv, sep=";", index=False) if __name__ == "__main__": parser = argparse.ArgumentParser(description="logs dir") parser.add_argument( Loading @@ -146,8 +184,11 @@ if __name__ == "__main__": help="Number of days in history, (default: whole history)", default=-1, ) parser.add_argument( "--csv", type=str, help="CSV output file", ) args = parser.parse_args() main(args.logs_dir, args.output_filename, args.measure, args.days) main(args)