diff --git a/scripts/find_regressions_from_logs.py b/scripts/find_regressions_from_logs.py index 67a7447ddfd1c4d3cf0257fe587892e5b7b086c7..37f377afec471dc884cda7d6c775c21c097ec623 100644 --- a/scripts/find_regressions_from_logs.py +++ b/scripts/find_regressions_from_logs.py @@ -58,10 +58,14 @@ def main(logs_dir, output_filename, measure, days, all_results, diff_thr, ratio_ remove_days = len_history - days result = result.drop(result.columns[2:2+remove_days], axis=1) + values = result.iloc[:, 2:] + # Filter out all-NA rows (testcase has no data in range) + all_na = values.isna().all(axis=1) + result = result[~all_na].reset_index(drop=True) values = result.iloc[:, 2:] last_date = values.columns[-1] - result.insert(2, "format", f[last_date]) + result.insert(2, "format", f.loc[~all_na, last_date].reset_index(drop=True)) result.insert(3, "min_date", values.idxmin(axis=1)) result.insert(4, "min_sha", result["min_date"].map(sha)) result.insert(5, "curr_value", values[last_date]) @@ -78,6 +82,9 @@ def main(logs_dir, output_filename, measure, days, all_results, diff_thr, ratio_ result = result[result["ratio"] > ratio_thr] result = result[result["curr_value"] > curr_value_thr] + # Sort by format and then ratio + result = result.sort_values(by=["format", "ratio"], ascending=[True, False]) + result["min_sha"] = "'" + result["min_sha"] # Add apostrophy to prevent Excel reading this as a number result.to_csv(output_filename, sep=";", index=False)