Loading scripts/find_regressions_from_logs2.py +20 −32 Original line number Diff line number Diff line Loading @@ -39,63 +39,51 @@ def main(logs_dir, output_filename, measure, days, all_results, diff_thr, ratio_ for date, jobs in logdict.items(): for job, testcases in jobs.items(): for testcase, value in testcases.items(): csv_rows.append((job, testcase, date, value)) formats.append((job, testcase, date, formatdict[date][job][testcase])) csv_rows.append((job, testcase, formatdict[date][job][testcase], date, value)) result = pd.DataFrame(csv_rows, columns=["job", "testcase", "date", "value"]) result = pd.DataFrame(csv_rows, columns=["job", "testcase", "format", "date", "value"]) result = result.pivot( index=["job", "testcase"], columns="date", values="value" index=["job", "testcase", "format"], columns="date", values="value" ).reset_index() f = pd.DataFrame(formats, columns=["job", "testcase", "date", "format"]) f = f.pivot( index=["job", "testcase"], columns="date", values="format" ).reset_index() # Keep only tests for which results exist in the last run if days == -1: rng = result.columns[3:] # Whole history else: rng = result.columns[-days:] result = result.dropna(subset=rng) result = result.reset_index(drop=True) ratio = result.copy() dates = result.iloc[:, 2:].columns ratio = ratio.reset_index() dates = result.iloc[:, 3:].columns # Calculate ratios ratio[dates[0]] = 1 # Set first ratio to 1 ratio[dates[0]] = 1.0 # Set first ratio to 1.0 for prevdate, currdate in zip( dates[0:-1], dates[1:]): ratio[currdate] = result[currdate] / result[prevdate] values = result.iloc[:, 2:] values = result.iloc[:, 3:] date = values.columns last_date = date[-1] result.insert(2, "format", f[last_date]) ratio.insert(2, "format", f[last_date]) formats = result['format'].dropna().unique().tolist() plotdata = pd.DataFrame(0.0, index=formats, columns=dates[-(days+1):-1]) plottext = pd.DataFrame("", index=formats, columns=dates[-(days+1):-1]) all_indices = [] for i in range(days): currdate = dates[-(days-i+1)] # Make robust for shorter history prevdate = dates[-(days-i+2)] idx = ratio.groupby("format")[currdate].nlargest(10).index.get_level_values(1) tmp = result[["job","testcase","format",prevdate,currdate]].iloc[idx,:].copy().reset_index() tmp.insert(3, "prev_date", prevdate) tmp.insert(4, "prev_sha", sha[prevdate]) tmp.insert(5, "curr_date", currdate) tmp.insert(6, "curr_sha", sha[prevdate]) tmp.insert(7, "diff", tmp[currdate] - tmp[prevdate]) tmp.insert(8, "ratio", tmp[currdate] / tmp[prevdate]) tmp.loc[tmp[prevdate] == 0, "ratio"] = ( 1 # Set ratio to 1 for denominator 0 ) tmp["prev_sha"] = "'" + tmp["prev_sha"] # Add apostrophy to prevent Excel reading this as a number tmp["curr_sha"] = "'" + tmp["curr_sha"] # Add apostrophy to prevent Excel reading this as a number csv_filename = f"regressions_{measure}_{currdate}.csv" tmp.to_csv(csv_filename, sep=";", index=False) idx = ratio.groupby("format")[currdate].idxmax() all_indices.append(idx) # Store worst case per format for plotting idx = tmp.groupby("format")["ratio"].idxmax() for f in formats: plotdata.loc[f, currdate] = tmp.iloc[idx[f]]["ratio"] plottext.loc[f, currdate] = f"{tmp.iloc[idx[f]]['job']} - {tmp.iloc[idx[f]]['testcase']} - Max {measure} ratio: {tmp.iloc[idx[f]]['ratio']:.2f}" plotdata.loc[f, currdate] = ratio.iloc[idx[f]][currdate] plottext.loc[f, currdate] = f"{result.iloc[idx[f]]['job']} - {result.iloc[idx[f]]['testcase']} - Max {measure} ratio: {ratio.iloc[idx[f]][currdate]:.2f}<br>Date: {currdate}" fig = make_subplots( rows=5, Loading Loading
scripts/find_regressions_from_logs2.py +20 −32 Original line number Diff line number Diff line Loading @@ -39,63 +39,51 @@ def main(logs_dir, output_filename, measure, days, all_results, diff_thr, ratio_ for date, jobs in logdict.items(): for job, testcases in jobs.items(): for testcase, value in testcases.items(): csv_rows.append((job, testcase, date, value)) formats.append((job, testcase, date, formatdict[date][job][testcase])) csv_rows.append((job, testcase, formatdict[date][job][testcase], date, value)) result = pd.DataFrame(csv_rows, columns=["job", "testcase", "date", "value"]) result = pd.DataFrame(csv_rows, columns=["job", "testcase", "format", "date", "value"]) result = result.pivot( index=["job", "testcase"], columns="date", values="value" index=["job", "testcase", "format"], columns="date", values="value" ).reset_index() f = pd.DataFrame(formats, columns=["job", "testcase", "date", "format"]) f = f.pivot( index=["job", "testcase"], columns="date", values="format" ).reset_index() # Keep only tests for which results exist in the last run if days == -1: rng = result.columns[3:] # Whole history else: rng = result.columns[-days:] result = result.dropna(subset=rng) result = result.reset_index(drop=True) ratio = result.copy() dates = result.iloc[:, 2:].columns ratio = ratio.reset_index() dates = result.iloc[:, 3:].columns # Calculate ratios ratio[dates[0]] = 1 # Set first ratio to 1 ratio[dates[0]] = 1.0 # Set first ratio to 1.0 for prevdate, currdate in zip( dates[0:-1], dates[1:]): ratio[currdate] = result[currdate] / result[prevdate] values = result.iloc[:, 2:] values = result.iloc[:, 3:] date = values.columns last_date = date[-1] result.insert(2, "format", f[last_date]) ratio.insert(2, "format", f[last_date]) formats = result['format'].dropna().unique().tolist() plotdata = pd.DataFrame(0.0, index=formats, columns=dates[-(days+1):-1]) plottext = pd.DataFrame("", index=formats, columns=dates[-(days+1):-1]) all_indices = [] for i in range(days): currdate = dates[-(days-i+1)] # Make robust for shorter history prevdate = dates[-(days-i+2)] idx = ratio.groupby("format")[currdate].nlargest(10).index.get_level_values(1) tmp = result[["job","testcase","format",prevdate,currdate]].iloc[idx,:].copy().reset_index() tmp.insert(3, "prev_date", prevdate) tmp.insert(4, "prev_sha", sha[prevdate]) tmp.insert(5, "curr_date", currdate) tmp.insert(6, "curr_sha", sha[prevdate]) tmp.insert(7, "diff", tmp[currdate] - tmp[prevdate]) tmp.insert(8, "ratio", tmp[currdate] / tmp[prevdate]) tmp.loc[tmp[prevdate] == 0, "ratio"] = ( 1 # Set ratio to 1 for denominator 0 ) tmp["prev_sha"] = "'" + tmp["prev_sha"] # Add apostrophy to prevent Excel reading this as a number tmp["curr_sha"] = "'" + tmp["curr_sha"] # Add apostrophy to prevent Excel reading this as a number csv_filename = f"regressions_{measure}_{currdate}.csv" tmp.to_csv(csv_filename, sep=";", index=False) idx = ratio.groupby("format")[currdate].idxmax() all_indices.append(idx) # Store worst case per format for plotting idx = tmp.groupby("format")["ratio"].idxmax() for f in formats: plotdata.loc[f, currdate] = tmp.iloc[idx[f]]["ratio"] plottext.loc[f, currdate] = f"{tmp.iloc[idx[f]]['job']} - {tmp.iloc[idx[f]]['testcase']} - Max {measure} ratio: {tmp.iloc[idx[f]]['ratio']:.2f}" plotdata.loc[f, currdate] = ratio.iloc[idx[f]][currdate] plottext.loc[f, currdate] = f"{result.iloc[idx[f]]['job']} - {result.iloc[idx[f]]['testcase']} - Max {measure} ratio: {ratio.iloc[idx[f]][currdate]:.2f}<br>Date: {currdate}" fig = make_subplots( rows=5, Loading