diff --git a/scripts/find_regressions_from_logs.py b/scripts/find_regressions_from_logs.py index 37f377afec471dc884cda7d6c775c21c097ec623..f64071995b5e7a4396b442cb59311f24756ed91b 100644 --- a/scripts/find_regressions_from_logs.py +++ b/scripts/find_regressions_from_logs.py @@ -4,7 +4,7 @@ import argparse from pathlib import Path import pandas as pd -def main(logs_dir, output_filename, measure, days, all_results, diff_thr, ratio_thr, curr_value_thr): +def main(logs_dir, output_filename, measure, days, all_results, diff_thr, ratio_thr, curr_value_thr, max_per_format): input_path = Path(logs_dir) logs = [f for f in input_path.iterdir() if f.is_dir()] @@ -85,6 +85,9 @@ def main(logs_dir, output_filename, measure, days, all_results, diff_thr, ratio_ # Sort by format and then ratio result = result.sort_values(by=["format", "ratio"], ascending=[True, False]) + if max_per_format is not None: + result = result.groupby("format").head(max_per_format) + result["min_sha"] = "'" + result["min_sha"] # Add apostrophy to prevent Excel reading this as a number result.to_csv(output_filename, sep=";", index=False) @@ -139,8 +142,14 @@ if __name__ == "__main__": help="Include test cases with curr_value above curr_value_thr, (default: 0.0)", default=0.0, ) + parser.add_argument( + "--max_per_format", + type=int, + help="Max number of listed regressions per format (default: no limit)", + default=None, + ) args = parser.parse_args() - main(args.logs_dir, args.output_filename, args.measure, args.days, args.all_results, args.diff_thr, args.ratio_thr, args.curr_value_thr) + main(args.logs_dir, args.output_filename, args.measure, args.days, args.all_results, args.diff_thr, args.ratio_thr, args.curr_value_thr, args.max_per_format)