Loading scripts/find_regressions_from_logs.py +19 −3 Original line number Diff line number Diff line Loading @@ -4,7 +4,7 @@ import argparse from pathlib import Path import pandas as pd def main(logs_dir, output_filename, measure, days, all_results, diff_thr, ratio_thr, curr_value_thr): def main(logs_dir, output_filename, measure, days, all_results, diff_thr, ratio_thr, curr_value_thr, max_per_format): input_path = Path(logs_dir) logs = [f for f in input_path.iterdir() if f.is_dir()] Loading Loading @@ -58,10 +58,14 @@ def main(logs_dir, output_filename, measure, days, all_results, diff_thr, ratio_ remove_days = len_history - days result = result.drop(result.columns[2:2+remove_days], axis=1) values = result.iloc[:, 2:] # Filter out all-NA rows (testcase has no data in range) all_na = values.isna().all(axis=1) result = result[~all_na].reset_index(drop=True) values = result.iloc[:, 2:] last_date = values.columns[-1] result.insert(2, "format", f[last_date]) result.insert(2, "format", f.loc[~all_na, last_date].reset_index(drop=True)) result.insert(3, "min_date", values.idxmin(axis=1)) result.insert(4, "min_sha", result["min_date"].map(sha)) result.insert(5, "curr_value", values[last_date]) Loading @@ -78,6 +82,12 @@ def main(logs_dir, output_filename, measure, days, all_results, diff_thr, ratio_ result = result[result["ratio"] > ratio_thr] result = result[result["curr_value"] > curr_value_thr] # Sort by format and then ratio result = result.sort_values(by=["format", "ratio"], ascending=[True, False]) if max_per_format is not None: result = result.groupby("format").head(max_per_format) result["min_sha"] = "'" + result["min_sha"] # Add apostrophy to prevent Excel reading this as a number result.to_csv(output_filename, sep=";", index=False) Loading Loading @@ -132,8 +142,14 @@ if __name__ == "__main__": help="Include test cases with curr_value above curr_value_thr, (default: 0.0)", default=0.0, ) parser.add_argument( "--max_per_format", type=int, help="Max number of listed regressions per format (default: no limit)", default=None, ) args = parser.parse_args() main(args.logs_dir, args.output_filename, args.measure, args.days, args.all_results, args.diff_thr, args.ratio_thr, args.curr_value_thr) main(args.logs_dir, args.output_filename, args.measure, args.days, args.all_results, args.diff_thr, args.ratio_thr, args.curr_value_thr, args.max_per_format) Loading
scripts/find_regressions_from_logs.py +19 −3 Original line number Diff line number Diff line Loading @@ -4,7 +4,7 @@ import argparse from pathlib import Path import pandas as pd def main(logs_dir, output_filename, measure, days, all_results, diff_thr, ratio_thr, curr_value_thr): def main(logs_dir, output_filename, measure, days, all_results, diff_thr, ratio_thr, curr_value_thr, max_per_format): input_path = Path(logs_dir) logs = [f for f in input_path.iterdir() if f.is_dir()] Loading Loading @@ -58,10 +58,14 @@ def main(logs_dir, output_filename, measure, days, all_results, diff_thr, ratio_ remove_days = len_history - days result = result.drop(result.columns[2:2+remove_days], axis=1) values = result.iloc[:, 2:] # Filter out all-NA rows (testcase has no data in range) all_na = values.isna().all(axis=1) result = result[~all_na].reset_index(drop=True) values = result.iloc[:, 2:] last_date = values.columns[-1] result.insert(2, "format", f[last_date]) result.insert(2, "format", f.loc[~all_na, last_date].reset_index(drop=True)) result.insert(3, "min_date", values.idxmin(axis=1)) result.insert(4, "min_sha", result["min_date"].map(sha)) result.insert(5, "curr_value", values[last_date]) Loading @@ -78,6 +82,12 @@ def main(logs_dir, output_filename, measure, days, all_results, diff_thr, ratio_ result = result[result["ratio"] > ratio_thr] result = result[result["curr_value"] > curr_value_thr] # Sort by format and then ratio result = result.sort_values(by=["format", "ratio"], ascending=[True, False]) if max_per_format is not None: result = result.groupby("format").head(max_per_format) result["min_sha"] = "'" + result["min_sha"] # Add apostrophy to prevent Excel reading this as a number result.to_csv(output_filename, sep=";", index=False) Loading Loading @@ -132,8 +142,14 @@ if __name__ == "__main__": help="Include test cases with curr_value above curr_value_thr, (default: 0.0)", default=0.0, ) parser.add_argument( "--max_per_format", type=int, help="Max number of listed regressions per format (default: no limit)", default=None, ) args = parser.parse_args() main(args.logs_dir, args.output_filename, args.measure, args.days, args.all_results, args.diff_thr, args.ratio_thr, args.curr_value_thr) main(args.logs_dir, args.output_filename, args.measure, args.days, args.all_results, args.diff_thr, args.ratio_thr, args.curr_value_thr, args.max_per_format)