From 9ba5d516c5bbfe8dcba2c43e43908dc36a11de39 Mon Sep 17 00:00:00 2001 From: Erik Norvell Date: Tue, 27 Jan 2026 11:29:40 +0100 Subject: [PATCH 01/10] Fixes for the long term logs --- ci/process_long_term_logs.py | 90 +++++++++++++++--------------------- 1 file changed, 37 insertions(+), 53 deletions(-) diff --git a/ci/process_long_term_logs.py b/ci/process_long_term_logs.py index baabc7d93a..1c7d1a668f 100644 --- a/ci/process_long_term_logs.py +++ b/ci/process_long_term_logs.py @@ -39,7 +39,7 @@ def parse_csv_data(csv_data): return concat_df -def plot_data(df, output_filename): +def plot_data(df, output_filename, days): """plot max values for 'MLD' and 'MAX_ABS_DIFF' data and save to html file.""" # Convert 'date' to datetime @@ -47,30 +47,25 @@ def plot_data(df, output_filename): df["MLD"] = pd.to_numeric(df["MLD"], errors="coerce") df["MAX_ABS_DIFF"] = pd.to_numeric(df["MAX_ABS_DIFF"], errors="coerce") + # Filter out rows older than "days" + cutoff = df["date"].max() - pd.Timedelta(days=days) + df = df[df["date"] > cutoff].reset_index(drop=True) + # Drop rows with NaT and NaN - clean_df = df.dropna(subset=["date", "MLD", "MAX_ABS_DIFF"]) + df = df.dropna(subset=["date", "MLD", "MAX_ABS_DIFF"]) # Group by 'format' and 'date' to get rows with max 'MLD' per group - max_mld = ( - clean_df.groupby(["format", "date"]) - .apply(lambda x: x.loc[x["MLD"].idxmax()]) - .reset_index(drop=True) - ) - - # Group by 'format' and 'date' to get rows with max 'MAX_ABS_DIFF' per - # group - max_abs_diff = ( - clean_df.groupby(["format", "date"]) - .apply(lambda x: x.loc[x["MAX_ABS_DIFF"].idxmax()]) - .reset_index(drop=True) - ) + idx = df.groupby(['format', 'date'])['MLD'].idxmax() + max = df.loc[idx].reset_index(drop=True) + idx = df.groupby(['format', 'date'])['MLD'].idxmin() + min = df.loc[idx].reset_index(drop=True) + mean = df.groupby(['format', 'date'])['MLD'].mean().to_frame('mean').reset_index() - formats = sorted(clean_df["format"].unique()) + formats = sorted(df["format"].unique()) fig = make_subplots( rows=5, cols=2, - specs=[[{"secondary_y": True}] * 2] * 5, subplot_titles=[f"{i}" for i in formats], shared_xaxes="columns", ) @@ -79,9 +74,7 @@ def plot_data(df, output_filename): row = i // 2 + 1 col = i % 2 + 1 - data_mld = max_mld[max_mld["format"] == fmt].sort_values("date") - data_diff = max_abs_diff[max_abs_diff["format"] - == fmt].sort_values("date") + data_mld = max[max["format"] == fmt].sort_values("date") # Add max 'MLD' to primary y-axis fig.add_trace( @@ -89,54 +82,51 @@ def plot_data(df, output_filename): x=data_mld["date"], y=data_mld["MLD"], mode="lines+markers", - name=f" {fmt} - Max MLD", + name=f"Max MLD", hovertext=[ - f"Testcase: {tc}
MLD: {mld:.4f}
MAX_ABS_DIFF:" - f"{abs_diff}
Format:" + f"Testcase: {tc}
MLD: {mld:.4f}" f" {format}
Date: {date.date()}" - for tc, mld, abs_diff, format, date in zip( + for tc, mld, format, date in zip( data_mld["testcase"], data_mld["MLD"], - data_mld["MAX_ABS_DIFF"], data_mld["format"], data_mld["date"], ) + ], hoverinfo="text", ), row=row, col=col, - secondary_y=False, ) - # Add max 'MAX_ABS_DIFF' to secondary y-axis + data_mld = mean[mean["format"] == fmt].sort_values("date") + + # Add mean 'MLD' to primary y-axis fig.add_trace( go.Scatter( - x=data_diff["date"], - y=data_diff["MAX_ABS_DIFF"], + x=data_mld["date"], + y=data_mld["mean"], mode="lines+markers", - name=f"{fmt} - Max MAX_ABS_DIFF", + name=f"Mean MLD", hovertext=[ - f"Testcase: {tc}
MLD: {mld:.4f}
MAX_ABS_DIFF:" - f" {abs_diff:.4f}
Format:" + f"Mean MLD: {mld:.4f}" f" {format}
Date: {date.date()}" - for tc, mld, abs_diff, format, date in zip( - data_diff["testcase"], - data_diff["MLD"], - data_diff["MAX_ABS_DIFF"], - data_diff["format"], - data_diff["date"], + for mld, format, date in zip( + data_mld["mean"], + data_mld["format"], + data_mld["date"], ) + ], hoverinfo="text", ), row=row, col=col, - secondary_y=True, ) fig.update_layout( - title_text="Long-term regression: max MLD and max MAX_ABS_DIFF", + title_text="History: MLD", legend=dict(x=1, y=1, orientation="v"), hovermode="x unified", ) @@ -144,18 +134,6 @@ def plot_data(df, output_filename): fig.update_xaxes(automargin=True) fig.update_yaxes(automargin=True) - # Update y-axes titles per subplot - for i in range(10): - yaxis_num = i * 2 + 1 - yaxis2_num = yaxis_num + 1 - fig["layout"][f"yaxis{yaxis_num}"].update( - title="Max MLD", titlefont=dict(color="blue"), tickfont=dict(color="blue") - ) - fig["layout"][f"yaxis{yaxis2_num}"].update( - title="Max MAX_ABS_DIFF", - titlefont=dict(color="green"), - tickfont=dict(color="green"), - ) # Save to html fig.write_html(output_filename) @@ -173,8 +151,14 @@ if __name__ == "__main__": type=str, help="Filename of the generated plot. e.g" ". long_term_regression.html", ) + parser.add_argument( + "--days", + type=int, + help="Number of days in history. Default: 30", + default=30, + ) args = parser.parse_args() csv_data = read_csv_files(args.root_dir) data = parse_csv_data(csv_data) - plot_data(data, args.output_filename) + plot_data(data, args.output_filename, args.days) -- GitLab From b52261bc264381e71b2b3a952caab6658d2f48f2 Mon Sep 17 00:00:00 2001 From: Erik Norvell Date: Tue, 27 Jan 2026 13:45:39 +0100 Subject: [PATCH 02/10] Make measure a command line parameter, to allow creating separate html filer for MLD and MAX_ABS_DIFF --- ci/process_long_term_logs.py | 45 +++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/ci/process_long_term_logs.py b/ci/process_long_term_logs.py index 1c7d1a668f..3fcd733dc7 100644 --- a/ci/process_long_term_logs.py +++ b/ci/process_long_term_logs.py @@ -39,27 +39,26 @@ def parse_csv_data(csv_data): return concat_df -def plot_data(df, output_filename, days): - """plot max values for 'MLD' and 'MAX_ABS_DIFF' data and save +def plot_data(df, output_filename, days, measure): + """plot max values for measure and data and save to html file.""" # Convert 'date' to datetime df["date"] = pd.to_datetime(df["date"], errors="coerce") - df["MLD"] = pd.to_numeric(df["MLD"], errors="coerce") - df["MAX_ABS_DIFF"] = pd.to_numeric(df["MAX_ABS_DIFF"], errors="coerce") + df["MLD"] = pd.to_numeric(df[measure], errors="coerce") # Filter out rows older than "days" cutoff = df["date"].max() - pd.Timedelta(days=days) df = df[df["date"] > cutoff].reset_index(drop=True) # Drop rows with NaT and NaN - df = df.dropna(subset=["date", "MLD", "MAX_ABS_DIFF"]) + df = df.dropna(subset=["date", measure]) # Group by 'format' and 'date' to get rows with max 'MLD' per group - idx = df.groupby(['format', 'date'])['MLD'].idxmax() + idx = df.groupby(['format', 'date'])[measure].idxmax() max = df.loc[idx].reset_index(drop=True) - idx = df.groupby(['format', 'date'])['MLD'].idxmin() + idx = df.groupby(['format', 'date'])[measure].idxmin() min = df.loc[idx].reset_index(drop=True) - mean = df.groupby(['format', 'date'])['MLD'].mean().to_frame('mean').reset_index() + mean = df.groupby(['format', 'date'])[measure].mean().to_frame('mean').reset_index() formats = sorted(df["format"].unique()) @@ -76,25 +75,27 @@ def plot_data(df, output_filename, days): data_mld = max[max["format"] == fmt].sort_values("date") - # Add max 'MLD' to primary y-axis + # Add max measure to plots fig.add_trace( go.Scatter( x=data_mld["date"], - y=data_mld["MLD"], + y=data_mld[measure], mode="lines+markers", - name=f"Max MLD", + name=f"Max {measure}", hovertext=[ - f"Testcase: {tc}
MLD: {mld:.4f}" + f"Testcase: {tc}
{measure}: {mld:.4f}" f" {format}
Date: {date.date()}" for tc, mld, format, date in zip( data_mld["testcase"], - data_mld["MLD"], + data_mld[measure], data_mld["format"], data_mld["date"], ) ], hoverinfo="text", + marker_color="red", + showlegend=(i==0), ), row=row, col=col, @@ -108,9 +109,9 @@ def plot_data(df, output_filename, days): x=data_mld["date"], y=data_mld["mean"], mode="lines+markers", - name=f"Mean MLD", + name=f"Mean {measure}", hovertext=[ - f"Mean MLD: {mld:.4f}" + f"Mean {measure}: {mld:.4f}" f" {format}
Date: {date.date()}" for mld, format, date in zip( data_mld["mean"], @@ -120,13 +121,15 @@ def plot_data(df, output_filename, days): ], hoverinfo="text", + marker_color="blue", + showlegend=(i==0), ), row=row, col=col, ) fig.update_layout( - title_text="History: MLD", + title_text=f"History: {measure}", legend=dict(x=1, y=1, orientation="v"), hovermode="x unified", ) @@ -156,9 +159,15 @@ if __name__ == "__main__": type=int, help="Number of days in history. Default: 30", default=30, - ) + ) + parser.add_argument( + "--measure", + type=str, + help="Measure for analysis: MLD, MAX_ABS_DIFF, MIN_ODG, default: MLD", + default="MLD", + ) args = parser.parse_args() csv_data = read_csv_files(args.root_dir) data = parse_csv_data(csv_data) - plot_data(data, args.output_filename, args.days) + plot_data(data, args.output_filename, args.days, args.measure) -- GitLab From 29820a98657419869d183a5cc4d7119d9c60d66c Mon Sep 17 00:00:00 2001 From: Erik Norvell Date: Tue, 27 Jan 2026 14:09:39 +0100 Subject: [PATCH 03/10] Cleanup of text hover message --- ci/process_long_term_logs.py | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/ci/process_long_term_logs.py b/ci/process_long_term_logs.py index 3fcd733dc7..112365693e 100644 --- a/ci/process_long_term_logs.py +++ b/ci/process_long_term_logs.py @@ -54,11 +54,11 @@ def plot_data(df, output_filename, days, measure): df = df.dropna(subset=["date", measure]) # Group by 'format' and 'date' to get rows with max 'MLD' per group - idx = df.groupby(['format', 'date'])[measure].idxmax() + idx = df.groupby(["format", "date"])[measure].idxmax() max = df.loc[idx].reset_index(drop=True) - idx = df.groupby(['format', 'date'])[measure].idxmin() + idx = df.groupby(["format", "date"])[measure].idxmin() min = df.loc[idx].reset_index(drop=True) - mean = df.groupby(['format', 'date'])[measure].mean().to_frame('mean').reset_index() + mean = df.groupby(["format", "date"])[measure].mean().to_frame("mean").reset_index() formats = sorted(df["format"].unique()) @@ -83,19 +83,17 @@ def plot_data(df, output_filename, days, measure): mode="lines+markers", name=f"Max {measure}", hovertext=[ - f"Testcase: {tc}
{measure}: {mld:.4f}" - f" {format}
Date: {date.date()}" - for tc, mld, format, date in zip( + f"Testcase: {tc}
Max {measure}: {mld:.4f}" + f"
Date: {date.date()}" + for tc, mld, date in zip( data_mld["testcase"], data_mld[measure], - data_mld["format"], data_mld["date"], ) - ], hoverinfo="text", marker_color="red", - showlegend=(i==0), + showlegend=(i == 0), ), row=row, col=col, @@ -103,7 +101,7 @@ def plot_data(df, output_filename, days, measure): data_mld = mean[mean["format"] == fmt].sort_values("date") - # Add mean 'MLD' to primary y-axis + # Add mean measure to plots fig.add_trace( go.Scatter( x=data_mld["date"], @@ -111,25 +109,22 @@ def plot_data(df, output_filename, days, measure): mode="lines+markers", name=f"Mean {measure}", hovertext=[ - f"Mean {measure}: {mld:.4f}" - f" {format}
Date: {date.date()}" - for mld, format, date in zip( + f"Mean {measure}: {mld:.4f}" f"
Date: {date.date()}" + for mld, date in zip( data_mld["mean"], - data_mld["format"], data_mld["date"], ) - ], hoverinfo="text", marker_color="blue", - showlegend=(i==0), + showlegend=(i == 0), ), row=row, col=col, ) fig.update_layout( - title_text=f"History: {measure}", + title_text=f"History: {measure}", legend=dict(x=1, y=1, orientation="v"), hovermode="x unified", ) @@ -137,7 +132,6 @@ def plot_data(df, output_filename, days, measure): fig.update_xaxes(automargin=True) fig.update_yaxes(automargin=True) - # Save to html fig.write_html(output_filename) @@ -165,7 +159,7 @@ if __name__ == "__main__": type=str, help="Measure for analysis: MLD, MAX_ABS_DIFF, MIN_ODG, default: MLD", default="MLD", - ) + ) args = parser.parse_args() csv_data = read_csv_files(args.root_dir) -- GitLab From 4f6b4a0fbf502a6688d74626e3966055c3cf188c Mon Sep 17 00:00:00 2001 From: Erik Norvell Date: Mon, 2 Feb 2026 16:09:17 +0100 Subject: [PATCH 04/10] Add scripts/find_regressions_from_logs2.py as alternative analysis script --- scripts/find_regressions_from_logs2.py | 189 +++++++++++++++++++++++++ 1 file changed, 189 insertions(+) create mode 100644 scripts/find_regressions_from_logs2.py diff --git a/scripts/find_regressions_from_logs2.py b/scripts/find_regressions_from_logs2.py new file mode 100644 index 0000000000..cd07ead015 --- /dev/null +++ b/scripts/find_regressions_from_logs2.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python3 + +import argparse +from pathlib import Path +import pandas as pd +import plotly.graph_objects as go +from plotly.subplots import make_subplots + +def main(logs_dir, output_filename, measure, days, all_results, diff_thr, ratio_thr, curr_value_thr): + + input_path = Path(logs_dir) + logs = [f for f in input_path.iterdir() if f.is_dir()] + + # Build dict of scores + formatdict = {} + sha = {} + logdict = {} + for log in logs: + date = log.name + logdict[date] = {} + formatdict[date] = {} + for logfile in log.glob("*.csv"): + tmp = logfile.name.split("-") + job = "-".join(tmp[3:-4]) + sha[date] = tmp[-1].split(".")[0] + data = pd.read_csv(logfile, usecols=["testcase", measure, "format"]) + logdict[date][job] = {} + formatdict[date][job] = {} + + for testcase, value, format in zip( + data["testcase"], data[measure], data["format"] + ): + formatdict[date][job][testcase] = format + logdict[date][job][testcase] = value + + # Restructure dict + csv_rows = [] + formats = [] + for date, jobs in logdict.items(): + for job, testcases in jobs.items(): + for testcase, value in testcases.items(): + csv_rows.append((job, testcase, date, value)) + formats.append((job, testcase, date, formatdict[date][job][testcase])) + + result = pd.DataFrame(csv_rows, columns=["job", "testcase", "date", "value"]) + result = result.pivot( + index=["job", "testcase"], columns="date", values="value" + ).reset_index() + + f = pd.DataFrame(formats, columns=["job", "testcase", "date", "format"]) + f = f.pivot( + index=["job", "testcase"], columns="date", values="format" + ).reset_index() + + ratio = result.copy() + dates = result.iloc[:, 2:].columns + + # Calculate ratios + ratio[dates[0]] = 1 # Set first ratio to 1 + for prevdate, currdate in zip( dates[0:-1], dates[1:]): + ratio[currdate] = result[currdate] / result[prevdate] + + values = result.iloc[:, 2:] + date = values.columns + last_date = date[-1] + result.insert(2, "format", f[last_date]) + ratio.insert(2, "format", f[last_date]) + + formats = result['format'].dropna().unique().tolist() + + plotdata = pd.DataFrame(0.0, index=formats, columns=dates[-(days+1):-1]) + plottext = pd.DataFrame("", index=formats, columns=dates[-(days+1):-1]) + + for i in range(days): + currdate = dates[-(days-i+1)] # Make robust for shorter history + prevdate = dates[-(days-i+2)] + idx = ratio.groupby("format")[currdate].nlargest(10).index.get_level_values(1) + tmp = result[["job","testcase","format",prevdate,currdate]].iloc[idx,:].copy().reset_index() + tmp.insert(3, "prev_date", prevdate) + tmp.insert(4, "prev_sha", sha[prevdate]) + tmp.insert(5, "curr_date", currdate) + tmp.insert(6, "curr_sha", sha[prevdate]) + tmp.insert(7, "diff", tmp[currdate] - tmp[prevdate]) + tmp.insert(8, "ratio", tmp[currdate] / tmp[prevdate]) + tmp.loc[tmp[prevdate] == 0, "ratio"] = ( + 1 # Set ratio to 1 for denominator 0 + ) + tmp["prev_sha"] = "'" + tmp["prev_sha"] # Add apostrophy to prevent Excel reading this as a number + tmp["curr_sha"] = "'" + tmp["curr_sha"] # Add apostrophy to prevent Excel reading this as a number + + csv_filename = f"regressions_{measure}_{currdate}.csv" + tmp.to_csv(csv_filename, sep=";", index=False) + + # Store worst case per format for plotting + idx = tmp.groupby("format")["ratio"].idxmax() + for f in formats: + plotdata.loc[f, currdate] = tmp.iloc[idx[f]]["ratio"] + plottext.loc[f, currdate] = f"{tmp.iloc[idx[f]]['job']} - {tmp.iloc[idx[f]]['testcase']} - Max {measure} ratio: {tmp.iloc[idx[f]]['ratio']:.2f}" + + fig = make_subplots( + rows=5, + cols=2, + subplot_titles=[f"{i}" for i in formats], + shared_xaxes="columns", + ) + + for i, fmt in enumerate(formats): + row = i // 2 + 1 + col = i % 2 + 1 + + fig.add_trace( + go.Scatter( + x=pd.to_datetime(plotdata.columns), + y=plotdata.loc[fmt], + mode="lines+markers", + name=f"Max {measure}", + hovertext=plottext.loc[fmt], + hoverinfo="text", + showlegend=False, + ), + row=row, + col=col, + ) + + fig.update_layout( + title_text=f"Regression detection: Max {measure} ratio", + legend=dict(x=1, y=1, orientation="v"), + hovermode="x unified", + ) + + fig.update_xaxes(automargin=True) + fig.update_yaxes(automargin=True) + + # Save to html + fig.write_html(output_filename) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="logs dir") + parser.add_argument( + "logs_dir", + type=str, + help="Logs dir, e.g. logs", + ) + parser.add_argument( + "output_filename", + type=str, + help="Filename of the combined csv file. e.g mld.csv", + ) + parser.add_argument( + "--measure", + type=str, + help="Measure for summary, one of MLD MIN_SSNR MAX_ABS_DIFF MIN_ODG, (default: MLD)", + default="MLD", + ) + parser.add_argument( + "--days", + type=int, + help="Number of days in history, (default: whole history)", + default=-1, + ) + parser.add_argument( + "--all_results", + action="store_true", + help="Output all results, including cases without regression (default: off)", + default=False, + ) + parser.add_argument( + "--diff_thr", + type=float, + help="Include test cases with diff above diff_thr, (default: 0.0)", + default=0.0, + ) + parser.add_argument( + "--ratio_thr", + type=float, + help="Include test cases with ratio above ratio_thr, (default: 1.0)", + default=1.0, + ) + parser.add_argument( + "--curr_value_thr", + type=float, + help="Include test cases with curr_value above curr_value_thr, (default: 0.0)", + default=0.0, + ) + + args = parser.parse_args() + + main(args.logs_dir, args.output_filename, args.measure, args.days, args.all_results, args.diff_thr, args.ratio_thr, args.curr_value_thr) + -- GitLab From d10423218891f16dd76ac46cacd1f8fb7ca2c54e Mon Sep 17 00:00:00 2001 From: Erik Norvell Date: Thu, 19 Feb 2026 13:22:43 +0100 Subject: [PATCH 05/10] Updates to regression analysis --- scripts/find_regressions_from_logs2.py | 52 ++++++++++---------------- 1 file changed, 20 insertions(+), 32 deletions(-) diff --git a/scripts/find_regressions_from_logs2.py b/scripts/find_regressions_from_logs2.py index cd07ead015..8de7f0b4ec 100644 --- a/scripts/find_regressions_from_logs2.py +++ b/scripts/find_regressions_from_logs2.py @@ -39,63 +39,51 @@ def main(logs_dir, output_filename, measure, days, all_results, diff_thr, ratio_ for date, jobs in logdict.items(): for job, testcases in jobs.items(): for testcase, value in testcases.items(): - csv_rows.append((job, testcase, date, value)) - formats.append((job, testcase, date, formatdict[date][job][testcase])) + csv_rows.append((job, testcase, formatdict[date][job][testcase], date, value)) - result = pd.DataFrame(csv_rows, columns=["job", "testcase", "date", "value"]) + result = pd.DataFrame(csv_rows, columns=["job", "testcase", "format", "date", "value"]) result = result.pivot( - index=["job", "testcase"], columns="date", values="value" + index=["job", "testcase", "format"], columns="date", values="value" ).reset_index() - f = pd.DataFrame(formats, columns=["job", "testcase", "date", "format"]) - f = f.pivot( - index=["job", "testcase"], columns="date", values="format" - ).reset_index() + # Keep only tests for which results exist in the last run + if days == -1: + rng = result.columns[3:] # Whole history + else: + rng = result.columns[-days:] + result = result.dropna(subset=rng) + result = result.reset_index(drop=True) ratio = result.copy() - dates = result.iloc[:, 2:].columns + ratio = ratio.reset_index() + dates = result.iloc[:, 3:].columns # Calculate ratios - ratio[dates[0]] = 1 # Set first ratio to 1 + ratio[dates[0]] = 1.0 # Set first ratio to 1.0 for prevdate, currdate in zip( dates[0:-1], dates[1:]): ratio[currdate] = result[currdate] / result[prevdate] - values = result.iloc[:, 2:] + values = result.iloc[:, 3:] date = values.columns - last_date = date[-1] - result.insert(2, "format", f[last_date]) - ratio.insert(2, "format", f[last_date]) formats = result['format'].dropna().unique().tolist() plotdata = pd.DataFrame(0.0, index=formats, columns=dates[-(days+1):-1]) plottext = pd.DataFrame("", index=formats, columns=dates[-(days+1):-1]) + all_indices = [] + for i in range(days): currdate = dates[-(days-i+1)] # Make robust for shorter history prevdate = dates[-(days-i+2)] - idx = ratio.groupby("format")[currdate].nlargest(10).index.get_level_values(1) - tmp = result[["job","testcase","format",prevdate,currdate]].iloc[idx,:].copy().reset_index() - tmp.insert(3, "prev_date", prevdate) - tmp.insert(4, "prev_sha", sha[prevdate]) - tmp.insert(5, "curr_date", currdate) - tmp.insert(6, "curr_sha", sha[prevdate]) - tmp.insert(7, "diff", tmp[currdate] - tmp[prevdate]) - tmp.insert(8, "ratio", tmp[currdate] / tmp[prevdate]) - tmp.loc[tmp[prevdate] == 0, "ratio"] = ( - 1 # Set ratio to 1 for denominator 0 - ) - tmp["prev_sha"] = "'" + tmp["prev_sha"] # Add apostrophy to prevent Excel reading this as a number - tmp["curr_sha"] = "'" + tmp["curr_sha"] # Add apostrophy to prevent Excel reading this as a number - csv_filename = f"regressions_{measure}_{currdate}.csv" - tmp.to_csv(csv_filename, sep=";", index=False) + idx = ratio.groupby("format")[currdate].idxmax() + all_indices.append(idx) # Store worst case per format for plotting - idx = tmp.groupby("format")["ratio"].idxmax() for f in formats: - plotdata.loc[f, currdate] = tmp.iloc[idx[f]]["ratio"] - plottext.loc[f, currdate] = f"{tmp.iloc[idx[f]]['job']} - {tmp.iloc[idx[f]]['testcase']} - Max {measure} ratio: {tmp.iloc[idx[f]]['ratio']:.2f}" + plotdata.loc[f, currdate] = ratio.iloc[idx[f]][currdate] + plottext.loc[f, currdate] = f"{result.iloc[idx[f]]['job']} - {result.iloc[idx[f]]['testcase']} - Max {measure} ratio: {ratio.iloc[idx[f]][currdate]:.2f}
Date: {currdate}" fig = make_subplots( rows=5, -- GitLab From 13845e017e52d846ceef0ecd8a480e90bbf9cd3f Mon Sep 17 00:00:00 2001 From: Erik Norvell Date: Thu, 19 Feb 2026 14:54:13 +0100 Subject: [PATCH 06/10] Add filtering options for testcase --- ci/process_long_term_logs.py | 46 ++++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/ci/process_long_term_logs.py b/ci/process_long_term_logs.py index 112365693e..e5ae8afd5d 100644 --- a/ci/process_long_term_logs.py +++ b/ci/process_long_term_logs.py @@ -1,7 +1,9 @@ +#!/usr/bin/env python3 + import os import pandas as pd import argparse -import plotly.express as px +import re import plotly.graph_objects as go from plotly.subplots import make_subplots @@ -39,9 +41,13 @@ def parse_csv_data(csv_data): return concat_df -def plot_data(df, output_filename, days, measure): +def plot_data(df, args): """plot max values for measure and data and save to html file.""" + + measure = args.measure + days = args.days + # Convert 'date' to datetime df["date"] = pd.to_datetime(df["date"], errors="coerce") df["MLD"] = pd.to_numeric(df[measure], errors="coerce") @@ -53,6 +59,21 @@ def plot_data(df, output_filename, days, measure): # Drop rows with NaT and NaN df = df.dropna(subset=["date", measure]) + # Filter test cases based on include/reject/match arguments + if args.include: + mask = pd.Series(False, index=df.index) + for tag in args.include: + mask |= df["testcase"].str.contains(tag, case=False, na=False) + df = df[mask] + if args.reject: + mask = pd.Series(False, index=df.index) + for tag in args.reject: + mask |= df["testcase"].str.contains(tag, case=False, na=False) + df = df[~mask] + if args.match: + pattern = re.compile(args.match, re.IGNORECASE) + df = df[df["testcase"].str.contains(pattern, na=False)] + # Group by 'format' and 'date' to get rows with max 'MLD' per group idx = df.groupby(["format", "date"])[measure].idxmax() max = df.loc[idx].reset_index(drop=True) @@ -133,7 +154,7 @@ def plot_data(df, output_filename, days, measure): fig.update_yaxes(automargin=True) # Save to html - fig.write_html(output_filename) + fig.write_html(args.output_filename) if __name__ == "__main__": @@ -160,8 +181,25 @@ if __name__ == "__main__": help="Measure for analysis: MLD, MAX_ABS_DIFF, MIN_ODG, default: MLD", default="MLD", ) + parser.add_argument( + "--include", + nargs="+", + type=str, + help="List of tags to include", + ) + parser.add_argument( + "--reject", + nargs="+", + type=str, + help="List of tags to reject", + ) + parser.add_argument( + "--match", + type=str, + help="Regex pattern for selecting tests", + ) args = parser.parse_args() csv_data = read_csv_files(args.root_dir) data = parse_csv_data(csv_data) - plot_data(data, args.output_filename, args.days, args.measure) + plot_data(data, args) -- GitLab From 94cbb18ac6eb9231a963b59709582cb80e096bbc Mon Sep 17 00:00:00 2001 From: Erik Norvell Date: Thu, 19 Feb 2026 15:02:49 +0100 Subject: [PATCH 07/10] Fixes for MIN_ODG and MIN_SSNR --- ci/process_long_term_logs.py | 45 ++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/ci/process_long_term_logs.py b/ci/process_long_term_logs.py index e5ae8afd5d..cce9d1c22b 100644 --- a/ci/process_long_term_logs.py +++ b/ci/process_long_term_logs.py @@ -25,9 +25,9 @@ def read_csv_files(root_dir): def parse_csv_data(csv_data): - """keep 'testcase', 'format', 'MLD', 'MAX_ABS_DIFF' and add + """keep 'testcase', 'format', 'MLD', 'MAX_ABS_DIFF', 'MIN_ODG', 'MIN_SSNR' and add 'date' column.""" - cols_to_keep = ["testcase", "format", "MLD", "MAX_ABS_DIFF"] + cols_to_keep = ["testcase", "format", "MLD", "MAX_ABS_DIFF", "MIN_ODG", "MIN_SSNR"] parsed_data = {} for key, df in csv_data.items(): cols = [col for col in cols_to_keep if col in df.columns] @@ -50,7 +50,7 @@ def plot_data(df, args): # Convert 'date' to datetime df["date"] = pd.to_datetime(df["date"], errors="coerce") - df["MLD"] = pd.to_numeric(df[measure], errors="coerce") + df[measure] = pd.to_numeric(df[measure], errors="coerce") # Filter out rows older than "days" cutoff = df["date"].max() - pd.Timedelta(days=days) @@ -94,22 +94,27 @@ def plot_data(df, args): row = i // 2 + 1 col = i % 2 + 1 - data_mld = max[max["format"] == fmt].sort_values("date") + if "MIN" in measure: + data = min[min["format"] == fmt].sort_values("date") + maxmin_str = "Min" + else: + data = max[max["format"] == fmt].sort_values("date") + maxmin_str = "Max" # Add max measure to plots fig.add_trace( go.Scatter( - x=data_mld["date"], - y=data_mld[measure], + x=data["date"], + y=data[measure], mode="lines+markers", - name=f"Max {measure}", + name=f"{maxmin_str} {measure}", hovertext=[ - f"Testcase: {tc}
Max {measure}: {mld:.4f}" + f"Testcase: {tc}
{maxmin_str} {measure}: {value:.4f}" f"
Date: {date.date()}" - for tc, mld, date in zip( - data_mld["testcase"], - data_mld[measure], - data_mld["date"], + for tc, value, date in zip( + data["testcase"], + data[measure], + data["date"], ) ], hoverinfo="text", @@ -120,20 +125,20 @@ def plot_data(df, args): col=col, ) - data_mld = mean[mean["format"] == fmt].sort_values("date") + data = mean[mean["format"] == fmt].sort_values("date") # Add mean measure to plots fig.add_trace( go.Scatter( - x=data_mld["date"], - y=data_mld["mean"], + x=data["date"], + y=data["mean"], mode="lines+markers", name=f"Mean {measure}", hovertext=[ - f"Mean {measure}: {mld:.4f}" f"
Date: {date.date()}" - for mld, date in zip( - data_mld["mean"], - data_mld["date"], + f"Mean {measure}: {value:.4f}" f"
Date: {date.date()}" + for value, date in zip( + data["mean"], + data["date"], ) ], hoverinfo="text", @@ -178,7 +183,7 @@ if __name__ == "__main__": parser.add_argument( "--measure", type=str, - help="Measure for analysis: MLD, MAX_ABS_DIFF, MIN_ODG, default: MLD", + help="Measure for analysis: MLD, MAX_ABS_DIFF, MIN_ODG, MIN_SSNR, default: MLD", default="MLD", ) parser.add_argument( -- GitLab From 8742c1bbd7cb9c75b2d134eb78ea8f814f9ffa77 Mon Sep 17 00:00:00 2001 From: Erik Norvell Date: Thu, 19 Feb 2026 15:28:39 +0100 Subject: [PATCH 08/10] Add filtering options for jobs --- ci/process_long_term_logs.py | 48 ++++++++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/ci/process_long_term_logs.py b/ci/process_long_term_logs.py index cce9d1c22b..1eec7fff35 100644 --- a/ci/process_long_term_logs.py +++ b/ci/process_long_term_logs.py @@ -26,14 +26,17 @@ def read_csv_files(root_dir): def parse_csv_data(csv_data): """keep 'testcase', 'format', 'MLD', 'MAX_ABS_DIFF', 'MIN_ODG', 'MIN_SSNR' and add - 'date' column.""" + 'date' and 'job' column.""" cols_to_keep = ["testcase", "format", "MLD", "MAX_ABS_DIFF", "MIN_ODG", "MIN_SSNR"] parsed_data = {} for key, df in csv_data.items(): + tmp = key.split("-") + job = "-".join(tmp[4:-4]) cols = [col for col in cols_to_keep if col in df.columns] date = os.path.basename(os.path.dirname(key)) new_df = df[cols].copy() new_df["date"] = date + new_df["job"] = job parsed_data[key] = new_df # concatenate all dataframe in the dictionary @@ -74,6 +77,22 @@ def plot_data(df, args): pattern = re.compile(args.match, re.IGNORECASE) df = df[df["testcase"].str.contains(pattern, na=False)] + # Filter jobs based on job-include/job-reject/job-match arguments + if args.job_include: + mask = pd.Series(False, index=df.index) + for tag in args.job_include: + mask |= df["job"].str.contains(tag, case=False, na=False) + df = df[mask] + if args.job_reject: + mask = pd.Series(False, index=df.index) + for tag in args.job_reject: + mask |= df["job"].str.contains(tag, case=False, na=False) + df = df[~mask] + if args.job_match: + pattern = re.compile(args.job_match, re.IGNORECASE) + df = df[df["job"].str.contains(pattern, na=False)] + + # Group by 'format' and 'date' to get rows with max 'MLD' per group idx = df.groupby(["format", "date"])[measure].idxmax() max = df.loc[idx].reset_index(drop=True) @@ -110,8 +129,10 @@ def plot_data(df, args): name=f"{maxmin_str} {measure}", hovertext=[ f"Testcase: {tc}
{maxmin_str} {measure}: {value:.4f}" + f"
Job: {job}" f"
Date: {date.date()}" - for tc, value, date in zip( + for job, tc, value, date in zip( + data["job"], data["testcase"], data[measure], data["date"], @@ -190,18 +211,35 @@ if __name__ == "__main__": "--include", nargs="+", type=str, - help="List of tags to include", + help="List of tags to include in testcases", ) parser.add_argument( "--reject", nargs="+", type=str, - help="List of tags to reject", + help="List of tags to reject in testcases", ) parser.add_argument( "--match", type=str, - help="Regex pattern for selecting tests", + help="Regex pattern for selecting testcases", + ) + parser.add_argument( + "--job-include", + nargs="+", + type=str, + help="List of tags to include in jobs", + ) + parser.add_argument( + "--job-reject", + nargs="+", + type=str, + help="List of tags to reject in jobs", + ) + parser.add_argument( + "--job-match", + type=str, + help="Regex pattern for selecting jobs", ) args = parser.parse_args() -- GitLab From 268a18d1139ccefe36a8376bdadfe45c508ec922 Mon Sep 17 00:00:00 2001 From: Erik Norvell Date: Thu, 19 Feb 2026 15:46:31 +0100 Subject: [PATCH 09/10] Changed name of detect_regressions.py --- ...ns_from_logs2.py => detect_regressions.py} | 32 +++---------------- 1 file changed, 4 insertions(+), 28 deletions(-) rename scripts/{find_regressions_from_logs2.py => detect_regressions.py} (78%) diff --git a/scripts/find_regressions_from_logs2.py b/scripts/detect_regressions.py similarity index 78% rename from scripts/find_regressions_from_logs2.py rename to scripts/detect_regressions.py index 8de7f0b4ec..64de5a7a23 100644 --- a/scripts/find_regressions_from_logs2.py +++ b/scripts/detect_regressions.py @@ -6,7 +6,7 @@ import pandas as pd import plotly.graph_objects as go from plotly.subplots import make_subplots -def main(logs_dir, output_filename, measure, days, all_results, diff_thr, ratio_thr, curr_value_thr): +def main(logs_dir, output_filename, measure, days): input_path = Path(logs_dir) logs = [f for f in input_path.iterdir() if f.is_dir()] @@ -83,7 +83,7 @@ def main(logs_dir, output_filename, measure, days, all_results, diff_thr, ratio_ # Store worst case per format for plotting for f in formats: plotdata.loc[f, currdate] = ratio.iloc[idx[f]][currdate] - plottext.loc[f, currdate] = f"{result.iloc[idx[f]]['job']} - {result.iloc[idx[f]]['testcase']} - Max {measure} ratio: {ratio.iloc[idx[f]][currdate]:.2f}
Date: {currdate}" + plottext.loc[f, currdate] = f"Job: {result.iloc[idx[f]]['job']}
Testcase: {result.iloc[idx[f]]['testcase']}
Max {measure} ratio: {ratio.iloc[idx[f]][currdate]:.2f}
Date: {currdate}" fig = make_subplots( rows=5, @@ -132,7 +132,7 @@ if __name__ == "__main__": parser.add_argument( "output_filename", type=str, - help="Filename of the combined csv file. e.g mld.csv", + help="Output html file. e.g mld.html", ) parser.add_argument( "--measure", @@ -146,32 +146,8 @@ if __name__ == "__main__": help="Number of days in history, (default: whole history)", default=-1, ) - parser.add_argument( - "--all_results", - action="store_true", - help="Output all results, including cases without regression (default: off)", - default=False, - ) - parser.add_argument( - "--diff_thr", - type=float, - help="Include test cases with diff above diff_thr, (default: 0.0)", - default=0.0, - ) - parser.add_argument( - "--ratio_thr", - type=float, - help="Include test cases with ratio above ratio_thr, (default: 1.0)", - default=1.0, - ) - parser.add_argument( - "--curr_value_thr", - type=float, - help="Include test cases with curr_value above curr_value_thr, (default: 0.0)", - default=0.0, - ) args = parser.parse_args() - main(args.logs_dir, args.output_filename, args.measure, args.days, args.all_results, args.diff_thr, args.ratio_thr, args.curr_value_thr) + main(args.logs_dir, args.output_filename, args.measure, args.days) -- GitLab From 3a4fc5e31ddd08b4536d164c089cd9631fd1e2ed Mon Sep 17 00:00:00 2001 From: Erik Norvell Date: Thu, 19 Feb 2026 16:54:03 +0100 Subject: [PATCH 10/10] Change mld output file from detect_regressions.py --- scripts/detect_regressions.py | 75 +++++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 17 deletions(-) diff --git a/scripts/detect_regressions.py b/scripts/detect_regressions.py index 64de5a7a23..4aa98941f6 100644 --- a/scripts/detect_regressions.py +++ b/scripts/detect_regressions.py @@ -6,7 +6,13 @@ import pandas as pd import plotly.graph_objects as go from plotly.subplots import make_subplots -def main(logs_dir, output_filename, measure, days): + +def main(args): + + logs_dir = args.logs_dir + output_filename = args.output_filename + measure = args.measure + days = args.days input_path = Path(logs_dir) logs = [f for f in input_path.iterdir() if f.is_dir()] @@ -39,16 +45,20 @@ def main(logs_dir, output_filename, measure, days): for date, jobs in logdict.items(): for job, testcases in jobs.items(): for testcase, value in testcases.items(): - csv_rows.append((job, testcase, formatdict[date][job][testcase], date, value)) + csv_rows.append( + (job, testcase, formatdict[date][job][testcase], date, value) + ) - result = pd.DataFrame(csv_rows, columns=["job", "testcase", "format", "date", "value"]) + result = pd.DataFrame( + csv_rows, columns=["job", "testcase", "format", "date", "value"] + ) result = result.pivot( index=["job", "testcase", "format"], columns="date", values="value" ).reset_index() - # Keep only tests for which results exist in the last run + # Keep only tests for which results exist in any of the days if days == -1: - rng = result.columns[3:] # Whole history + rng = result.columns[3:] # Whole history else: rng = result.columns[-days:] result = result.dropna(subset=rng) @@ -59,31 +69,33 @@ def main(logs_dir, output_filename, measure, days): dates = result.iloc[:, 3:].columns # Calculate ratios - ratio[dates[0]] = 1.0 # Set first ratio to 1.0 - for prevdate, currdate in zip( dates[0:-1], dates[1:]): + ratio[dates[0]] = 1.0 # Set first ratio to 1.0 + for prevdate, currdate in zip(dates[0:-1], dates[1:]): ratio[currdate] = result[currdate] / result[prevdate] values = result.iloc[:, 3:] date = values.columns - formats = result['format'].dropna().unique().tolist() + formats = result["format"].dropna().unique().tolist() - plotdata = pd.DataFrame(0.0, index=formats, columns=dates[-(days+1):-1]) - plottext = pd.DataFrame("", index=formats, columns=dates[-(days+1):-1]) + plotdata = pd.DataFrame(0.0, index=formats, columns=dates[-(days) : -1]) + plottext = pd.DataFrame("", index=formats, columns=dates[-(days) : -1]) all_indices = [] for i in range(days): - currdate = dates[-(days-i+1)] # Make robust for shorter history - prevdate = dates[-(days-i+2)] + currdate = dates[-(days - i)] # Make robust for shorter history + prevdate = dates[-(days - i + 1)] idx = ratio.groupby("format")[currdate].idxmax() - all_indices.append(idx) + all_indices.extend(idx.tolist()) # Store worst case per format for plotting for f in formats: plotdata.loc[f, currdate] = ratio.iloc[idx[f]][currdate] - plottext.loc[f, currdate] = f"Job: {result.iloc[idx[f]]['job']}
Testcase: {result.iloc[idx[f]]['testcase']}
Max {measure} ratio: {ratio.iloc[idx[f]][currdate]:.2f}
Date: {currdate}" + plottext.loc[f, currdate] = ( + f"Job: {result.iloc[idx[f]]['job']}
Testcase: {result.iloc[idx[f]]['testcase']}
Max {measure} ratio: {ratio.iloc[idx[f]][currdate]:.2f}
Date: {currdate}" + ) fig = make_subplots( rows=5, @@ -122,6 +134,32 @@ def main(logs_dir, output_filename, measure, days): # Save to html fig.write_html(output_filename) + # Write CSV-file + if args.csv: + output = result.iloc[all_indices].copy() + cols = ["job","testcase","format"] + cols.extend(date[-days:].tolist()) + output = output.loc[:,cols] + values = output.iloc[:, 3:] + last_date = values.columns[-1] + output.insert(3, "min_date", values.idxmin(axis=1)) + output.insert(4, "min_sha", output["min_date"].map(sha)) + output.insert(5, "curr_value", output[last_date]) + output.insert(6, "min_value", values.min(axis=1)) + output.insert(7, "diff", output["curr_value"] - output["min_value"]) + output.insert(8, "ratio", output["curr_value"] / output["min_value"]) + output.loc[output["min_value"] == 0, "ratio"] = ( + 1 # Set ratio to 1 for denominator 0 + ) + output["min_sha"] = ( + "'" + output["min_sha"] + ) # Add apostrophy to prevent Excel reading this as a number + output.sort_values( + by=["format", "ratio"], ascending=[True, False], inplace=True + ) + output.to_csv(args.csv, sep=";", index=False) + + if __name__ == "__main__": parser = argparse.ArgumentParser(description="logs dir") parser.add_argument( @@ -146,8 +184,11 @@ if __name__ == "__main__": help="Number of days in history, (default: whole history)", default=-1, ) + parser.add_argument( + "--csv", + type=str, + help="CSV output file", + ) args = parser.parse_args() - - main(args.logs_dir, args.output_filename, args.measure, args.days) - + main(args) -- GitLab