Loading ci/process_long_term_logs.py +139 −71 Original line number Diff line number Diff line #!/usr/bin/env python3 import os import pandas as pd import argparse import plotly.express as px import re import plotly.graph_objects as go from plotly.subplots import make_subplots Loading @@ -23,15 +25,18 @@ def read_csv_files(root_dir): def parse_csv_data(csv_data): """keep 'testcase', 'format', 'MLD', 'MAX_ABS_DIFF' and add 'date' column.""" cols_to_keep = ["testcase", "format", "MLD", "MAX_ABS_DIFF"] """keep 'testcase', 'format', 'MLD', 'MAX_ABS_DIFF', 'MIN_ODG', 'MIN_SSNR' and add 'date' and 'job' column.""" cols_to_keep = ["testcase", "format", "MLD", "MAX_ABS_DIFF", "MIN_ODG", "MIN_SSNR"] parsed_data = {} for key, df in csv_data.items(): tmp = key.split("-") job = "-".join(tmp[4:-4]) cols = [col for col in cols_to_keep if col in df.columns] date = os.path.basename(os.path.dirname(key)) new_df = df[cols].copy() new_df["date"] = date new_df["job"] = job parsed_data[key] = new_df # concatenate all dataframe in the dictionary Loading @@ -39,38 +44,67 @@ def parse_csv_data(csv_data): return concat_df def plot_data(df, output_filename): """plot max values for 'MLD' and 'MAX_ABS_DIFF' data and save def plot_data(df, args): """plot max values for measure and data and save to html file.""" measure = args.measure days = args.days # Convert 'date' to datetime df["date"] = pd.to_datetime(df["date"], errors="coerce") df["MLD"] = pd.to_numeric(df["MLD"], errors="coerce") df["MAX_ABS_DIFF"] = pd.to_numeric(df["MAX_ABS_DIFF"], errors="coerce") df[measure] = pd.to_numeric(df[measure], errors="coerce") # Filter out rows older than "days" cutoff = df["date"].max() - pd.Timedelta(days=days) df = df[df["date"] > cutoff].reset_index(drop=True) # Drop rows with NaT and NaN clean_df = df.dropna(subset=["date", "MLD", "MAX_ABS_DIFF"]) df = df.dropna(subset=["date", measure]) # Filter test cases based on include/reject/match arguments if args.include: mask = pd.Series(False, index=df.index) for tag in args.include: mask |= df["testcase"].str.contains(tag, case=False, na=False) df = df[mask] if args.reject: mask = pd.Series(False, index=df.index) for tag in args.reject: mask |= df["testcase"].str.contains(tag, case=False, na=False) df = df[~mask] if args.match: pattern = re.compile(args.match, re.IGNORECASE) df = df[df["testcase"].str.contains(pattern, na=False)] # Filter jobs based on job-include/job-reject/job-match arguments if args.job_include: mask = pd.Series(False, index=df.index) for tag in args.job_include: mask |= df["job"].str.contains(tag, case=False, na=False) df = df[mask] if args.job_reject: mask = pd.Series(False, index=df.index) for tag in args.job_reject: mask |= df["job"].str.contains(tag, case=False, na=False) df = df[~mask] if args.job_match: pattern = re.compile(args.job_match, re.IGNORECASE) df = df[df["job"].str.contains(pattern, na=False)] # Group by 'format' and 'date' to get rows with max 'MLD' per group max_mld = ( clean_df.groupby(["format", "date"]) .apply(lambda x: x.loc[x["MLD"].idxmax()]) .reset_index(drop=True) ) # Group by 'format' and 'date' to get rows with max 'MAX_ABS_DIFF' per # group max_abs_diff = ( clean_df.groupby(["format", "date"]) .apply(lambda x: x.loc[x["MAX_ABS_DIFF"].idxmax()]) .reset_index(drop=True) ) # Group by 'format' and 'date' to get rows with max 'MLD' per group idx = df.groupby(["format", "date"])[measure].idxmax() max = df.loc[idx].reset_index(drop=True) idx = df.groupby(["format", "date"])[measure].idxmin() min = df.loc[idx].reset_index(drop=True) mean = df.groupby(["format", "date"])[measure].mean().to_frame("mean").reset_index() formats = sorted(clean_df["format"].unique()) formats = sorted(df["format"].unique()) fig = make_subplots( rows=5, cols=2, specs=[[{"secondary_y": True}] * 2] * 5, subplot_titles=[f"{i}" for i in formats], shared_xaxes="columns", ) Loading @@ -79,64 +113,65 @@ def plot_data(df, output_filename): row = i // 2 + 1 col = i % 2 + 1 data_mld = max_mld[max_mld["format"] == fmt].sort_values("date") data_diff = max_abs_diff[max_abs_diff["format"] == fmt].sort_values("date") if "MIN" in measure: data = min[min["format"] == fmt].sort_values("date") maxmin_str = "Min" else: data = max[max["format"] == fmt].sort_values("date") maxmin_str = "Max" # Add max 'MLD' to primary y-axis # Add max measure to plots fig.add_trace( go.Scatter( x=data_mld["date"], y=data_mld["MLD"], x=data["date"], y=data[measure], mode="lines+markers", name=f" {fmt} - Max MLD", name=f"{maxmin_str} {measure}", hovertext=[ f"Testcase: {tc}<br>MLD: {mld:.4f}<br>MAX_ABS_DIFF:" f"{abs_diff}<br>Format:" f" {format}<br>Date: {date.date()}" for tc, mld, abs_diff, format, date in zip( data_mld["testcase"], data_mld["MLD"], data_mld["MAX_ABS_DIFF"], data_mld["format"], data_mld["date"], f"Testcase: {tc}<br>{maxmin_str} {measure}: {value:.4f}" f"<br>Job: {job}" f"<br>Date: {date.date()}" for job, tc, value, date in zip( data["job"], data["testcase"], data[measure], data["date"], ) ], hoverinfo="text", marker_color="red", showlegend=(i == 0), ), row=row, col=col, secondary_y=False, ) # Add max 'MAX_ABS_DIFF' to secondary y-axis data = mean[mean["format"] == fmt].sort_values("date") # Add mean measure to plots fig.add_trace( go.Scatter( x=data_diff["date"], y=data_diff["MAX_ABS_DIFF"], x=data["date"], y=data["mean"], mode="lines+markers", name=f"{fmt} - Max MAX_ABS_DIFF", name=f"Mean {measure}", hovertext=[ f"Testcase: {tc}<br>MLD: {mld:.4f}<br>MAX_ABS_DIFF:" f" {abs_diff:.4f}<br>Format:" f" {format}<br>Date: {date.date()}" for tc, mld, abs_diff, format, date in zip( data_diff["testcase"], data_diff["MLD"], data_diff["MAX_ABS_DIFF"], data_diff["format"], data_diff["date"], f"Mean {measure}: {value:.4f}" f"<br>Date: {date.date()}" for value, date in zip( data["mean"], data["date"], ) ], hoverinfo="text", marker_color="blue", showlegend=(i == 0), ), row=row, col=col, secondary_y=True, ) fig.update_layout( title_text="Long-term regression: max MLD and max MAX_ABS_DIFF", title_text=f"History: {measure}", legend=dict(x=1, y=1, orientation="v"), hovermode="x unified", ) Loading @@ -144,21 +179,8 @@ def plot_data(df, output_filename): fig.update_xaxes(automargin=True) fig.update_yaxes(automargin=True) # Update y-axes titles per subplot for i in range(10): yaxis_num = i * 2 + 1 yaxis2_num = yaxis_num + 1 fig["layout"][f"yaxis{yaxis_num}"].update( title="Max MLD", titlefont=dict(color="blue"), tickfont=dict(color="blue") ) fig["layout"][f"yaxis{yaxis2_num}"].update( title="Max MAX_ABS_DIFF", titlefont=dict(color="green"), tickfont=dict(color="green"), ) # Save to html fig.write_html(output_filename) fig.write_html(args.output_filename) if __name__ == "__main__": Loading @@ -173,8 +195,54 @@ if __name__ == "__main__": type=str, help="Filename of the generated plot. e.g" ". long_term_regression.html", ) parser.add_argument( "--days", type=int, help="Number of days in history. Default: 30", default=30, ) parser.add_argument( "--measure", type=str, help="Measure for analysis: MLD, MAX_ABS_DIFF, MIN_ODG, MIN_SSNR, default: MLD", default="MLD", ) parser.add_argument( "--include", nargs="+", type=str, help="List of tags to include in testcases", ) parser.add_argument( "--reject", nargs="+", type=str, help="List of tags to reject in testcases", ) parser.add_argument( "--match", type=str, help="Regex pattern for selecting testcases", ) parser.add_argument( "--job-include", nargs="+", type=str, help="List of tags to include in jobs", ) parser.add_argument( "--job-reject", nargs="+", type=str, help="List of tags to reject in jobs", ) parser.add_argument( "--job-match", type=str, help="Regex pattern for selecting jobs", ) args = parser.parse_args() csv_data = read_csv_files(args.root_dir) data = parse_csv_data(csv_data) plot_data(data, args.output_filename) plot_data(data, args) scripts/detect_regressions.py 0 → 100644 +194 −0 Original line number Diff line number Diff line #!/usr/bin/env python3 import argparse from pathlib import Path import pandas as pd import plotly.graph_objects as go from plotly.subplots import make_subplots def main(args): logs_dir = args.logs_dir output_filename = args.output_filename measure = args.measure days = args.days input_path = Path(logs_dir) logs = [f for f in input_path.iterdir() if f.is_dir()] # Build dict of scores formatdict = {} sha = {} logdict = {} for log in logs: date = log.name logdict[date] = {} formatdict[date] = {} for logfile in log.glob("*.csv"): tmp = logfile.name.split("-") job = "-".join(tmp[3:-4]) sha[date] = tmp[-1].split(".")[0] data = pd.read_csv(logfile, usecols=["testcase", measure, "format"]) logdict[date][job] = {} formatdict[date][job] = {} for testcase, value, format in zip( data["testcase"], data[measure], data["format"] ): formatdict[date][job][testcase] = format logdict[date][job][testcase] = value # Restructure dict csv_rows = [] formats = [] for date, jobs in logdict.items(): for job, testcases in jobs.items(): for testcase, value in testcases.items(): csv_rows.append( (job, testcase, formatdict[date][job][testcase], date, value) ) result = pd.DataFrame( csv_rows, columns=["job", "testcase", "format", "date", "value"] ) result = result.pivot( index=["job", "testcase", "format"], columns="date", values="value" ).reset_index() # Keep only tests for which results exist in any of the days if days == -1: rng = result.columns[3:] # Whole history else: rng = result.columns[-days:] result = result.dropna(subset=rng) result = result.reset_index(drop=True) ratio = result.copy() ratio = ratio.reset_index() dates = result.iloc[:, 3:].columns # Calculate ratios ratio[dates[0]] = 1.0 # Set first ratio to 1.0 for prevdate, currdate in zip(dates[0:-1], dates[1:]): ratio[currdate] = result[currdate] / result[prevdate] values = result.iloc[:, 3:] date = values.columns formats = result["format"].dropna().unique().tolist() plotdata = pd.DataFrame(0.0, index=formats, columns=dates[-(days) : -1]) plottext = pd.DataFrame("", index=formats, columns=dates[-(days) : -1]) all_indices = [] for i in range(days): currdate = dates[-(days - i)] # Make robust for shorter history prevdate = dates[-(days - i + 1)] idx = ratio.groupby("format")[currdate].idxmax() all_indices.extend(idx.tolist()) # Store worst case per format for plotting for f in formats: plotdata.loc[f, currdate] = ratio.iloc[idx[f]][currdate] plottext.loc[f, currdate] = ( f"Job: {result.iloc[idx[f]]['job']}<br>Testcase: {result.iloc[idx[f]]['testcase']} <br>Max {measure} ratio: {ratio.iloc[idx[f]][currdate]:.2f}<br>Date: {currdate}" ) fig = make_subplots( rows=5, cols=2, subplot_titles=[f"{i}" for i in formats], shared_xaxes="columns", ) for i, fmt in enumerate(formats): row = i // 2 + 1 col = i % 2 + 1 fig.add_trace( go.Scatter( x=pd.to_datetime(plotdata.columns), y=plotdata.loc[fmt], mode="lines+markers", name=f"Max {measure}", hovertext=plottext.loc[fmt], hoverinfo="text", showlegend=False, ), row=row, col=col, ) fig.update_layout( title_text=f"Regression detection: Max {measure} ratio", legend=dict(x=1, y=1, orientation="v"), hovermode="x unified", ) fig.update_xaxes(automargin=True) fig.update_yaxes(automargin=True) # Save to html fig.write_html(output_filename) # Write CSV-file if args.csv: output = result.iloc[all_indices].copy() cols = ["job","testcase","format"] cols.extend(date[-days:].tolist()) output = output.loc[:,cols] values = output.iloc[:, 3:] last_date = values.columns[-1] output.insert(3, "min_date", values.idxmin(axis=1)) output.insert(4, "min_sha", output["min_date"].map(sha)) output.insert(5, "curr_value", output[last_date]) output.insert(6, "min_value", values.min(axis=1)) output.insert(7, "diff", output["curr_value"] - output["min_value"]) output.insert(8, "ratio", output["curr_value"] / output["min_value"]) output.loc[output["min_value"] == 0, "ratio"] = ( 1 # Set ratio to 1 for denominator 0 ) output["min_sha"] = ( "'" + output["min_sha"] ) # Add apostrophy to prevent Excel reading this as a number output.sort_values( by=["format", "ratio"], ascending=[True, False], inplace=True ) output.to_csv(args.csv, sep=";", index=False) if __name__ == "__main__": parser = argparse.ArgumentParser(description="logs dir") parser.add_argument( "logs_dir", type=str, help="Logs dir, e.g. logs", ) parser.add_argument( "output_filename", type=str, help="Output html file. e.g mld.html", ) parser.add_argument( "--measure", type=str, help="Measure for summary, one of MLD MIN_SSNR MAX_ABS_DIFF MIN_ODG, (default: MLD)", default="MLD", ) parser.add_argument( "--days", type=int, help="Number of days in history, (default: whole history)", default=-1, ) parser.add_argument( "--csv", type=str, help="CSV output file", ) args = parser.parse_args() main(args) Loading
ci/process_long_term_logs.py +139 −71 Original line number Diff line number Diff line #!/usr/bin/env python3 import os import pandas as pd import argparse import plotly.express as px import re import plotly.graph_objects as go from plotly.subplots import make_subplots Loading @@ -23,15 +25,18 @@ def read_csv_files(root_dir): def parse_csv_data(csv_data): """keep 'testcase', 'format', 'MLD', 'MAX_ABS_DIFF' and add 'date' column.""" cols_to_keep = ["testcase", "format", "MLD", "MAX_ABS_DIFF"] """keep 'testcase', 'format', 'MLD', 'MAX_ABS_DIFF', 'MIN_ODG', 'MIN_SSNR' and add 'date' and 'job' column.""" cols_to_keep = ["testcase", "format", "MLD", "MAX_ABS_DIFF", "MIN_ODG", "MIN_SSNR"] parsed_data = {} for key, df in csv_data.items(): tmp = key.split("-") job = "-".join(tmp[4:-4]) cols = [col for col in cols_to_keep if col in df.columns] date = os.path.basename(os.path.dirname(key)) new_df = df[cols].copy() new_df["date"] = date new_df["job"] = job parsed_data[key] = new_df # concatenate all dataframe in the dictionary Loading @@ -39,38 +44,67 @@ def parse_csv_data(csv_data): return concat_df def plot_data(df, output_filename): """plot max values for 'MLD' and 'MAX_ABS_DIFF' data and save def plot_data(df, args): """plot max values for measure and data and save to html file.""" measure = args.measure days = args.days # Convert 'date' to datetime df["date"] = pd.to_datetime(df["date"], errors="coerce") df["MLD"] = pd.to_numeric(df["MLD"], errors="coerce") df["MAX_ABS_DIFF"] = pd.to_numeric(df["MAX_ABS_DIFF"], errors="coerce") df[measure] = pd.to_numeric(df[measure], errors="coerce") # Filter out rows older than "days" cutoff = df["date"].max() - pd.Timedelta(days=days) df = df[df["date"] > cutoff].reset_index(drop=True) # Drop rows with NaT and NaN clean_df = df.dropna(subset=["date", "MLD", "MAX_ABS_DIFF"]) df = df.dropna(subset=["date", measure]) # Filter test cases based on include/reject/match arguments if args.include: mask = pd.Series(False, index=df.index) for tag in args.include: mask |= df["testcase"].str.contains(tag, case=False, na=False) df = df[mask] if args.reject: mask = pd.Series(False, index=df.index) for tag in args.reject: mask |= df["testcase"].str.contains(tag, case=False, na=False) df = df[~mask] if args.match: pattern = re.compile(args.match, re.IGNORECASE) df = df[df["testcase"].str.contains(pattern, na=False)] # Filter jobs based on job-include/job-reject/job-match arguments if args.job_include: mask = pd.Series(False, index=df.index) for tag in args.job_include: mask |= df["job"].str.contains(tag, case=False, na=False) df = df[mask] if args.job_reject: mask = pd.Series(False, index=df.index) for tag in args.job_reject: mask |= df["job"].str.contains(tag, case=False, na=False) df = df[~mask] if args.job_match: pattern = re.compile(args.job_match, re.IGNORECASE) df = df[df["job"].str.contains(pattern, na=False)] # Group by 'format' and 'date' to get rows with max 'MLD' per group max_mld = ( clean_df.groupby(["format", "date"]) .apply(lambda x: x.loc[x["MLD"].idxmax()]) .reset_index(drop=True) ) # Group by 'format' and 'date' to get rows with max 'MAX_ABS_DIFF' per # group max_abs_diff = ( clean_df.groupby(["format", "date"]) .apply(lambda x: x.loc[x["MAX_ABS_DIFF"].idxmax()]) .reset_index(drop=True) ) # Group by 'format' and 'date' to get rows with max 'MLD' per group idx = df.groupby(["format", "date"])[measure].idxmax() max = df.loc[idx].reset_index(drop=True) idx = df.groupby(["format", "date"])[measure].idxmin() min = df.loc[idx].reset_index(drop=True) mean = df.groupby(["format", "date"])[measure].mean().to_frame("mean").reset_index() formats = sorted(clean_df["format"].unique()) formats = sorted(df["format"].unique()) fig = make_subplots( rows=5, cols=2, specs=[[{"secondary_y": True}] * 2] * 5, subplot_titles=[f"{i}" for i in formats], shared_xaxes="columns", ) Loading @@ -79,64 +113,65 @@ def plot_data(df, output_filename): row = i // 2 + 1 col = i % 2 + 1 data_mld = max_mld[max_mld["format"] == fmt].sort_values("date") data_diff = max_abs_diff[max_abs_diff["format"] == fmt].sort_values("date") if "MIN" in measure: data = min[min["format"] == fmt].sort_values("date") maxmin_str = "Min" else: data = max[max["format"] == fmt].sort_values("date") maxmin_str = "Max" # Add max 'MLD' to primary y-axis # Add max measure to plots fig.add_trace( go.Scatter( x=data_mld["date"], y=data_mld["MLD"], x=data["date"], y=data[measure], mode="lines+markers", name=f" {fmt} - Max MLD", name=f"{maxmin_str} {measure}", hovertext=[ f"Testcase: {tc}<br>MLD: {mld:.4f}<br>MAX_ABS_DIFF:" f"{abs_diff}<br>Format:" f" {format}<br>Date: {date.date()}" for tc, mld, abs_diff, format, date in zip( data_mld["testcase"], data_mld["MLD"], data_mld["MAX_ABS_DIFF"], data_mld["format"], data_mld["date"], f"Testcase: {tc}<br>{maxmin_str} {measure}: {value:.4f}" f"<br>Job: {job}" f"<br>Date: {date.date()}" for job, tc, value, date in zip( data["job"], data["testcase"], data[measure], data["date"], ) ], hoverinfo="text", marker_color="red", showlegend=(i == 0), ), row=row, col=col, secondary_y=False, ) # Add max 'MAX_ABS_DIFF' to secondary y-axis data = mean[mean["format"] == fmt].sort_values("date") # Add mean measure to plots fig.add_trace( go.Scatter( x=data_diff["date"], y=data_diff["MAX_ABS_DIFF"], x=data["date"], y=data["mean"], mode="lines+markers", name=f"{fmt} - Max MAX_ABS_DIFF", name=f"Mean {measure}", hovertext=[ f"Testcase: {tc}<br>MLD: {mld:.4f}<br>MAX_ABS_DIFF:" f" {abs_diff:.4f}<br>Format:" f" {format}<br>Date: {date.date()}" for tc, mld, abs_diff, format, date in zip( data_diff["testcase"], data_diff["MLD"], data_diff["MAX_ABS_DIFF"], data_diff["format"], data_diff["date"], f"Mean {measure}: {value:.4f}" f"<br>Date: {date.date()}" for value, date in zip( data["mean"], data["date"], ) ], hoverinfo="text", marker_color="blue", showlegend=(i == 0), ), row=row, col=col, secondary_y=True, ) fig.update_layout( title_text="Long-term regression: max MLD and max MAX_ABS_DIFF", title_text=f"History: {measure}", legend=dict(x=1, y=1, orientation="v"), hovermode="x unified", ) Loading @@ -144,21 +179,8 @@ def plot_data(df, output_filename): fig.update_xaxes(automargin=True) fig.update_yaxes(automargin=True) # Update y-axes titles per subplot for i in range(10): yaxis_num = i * 2 + 1 yaxis2_num = yaxis_num + 1 fig["layout"][f"yaxis{yaxis_num}"].update( title="Max MLD", titlefont=dict(color="blue"), tickfont=dict(color="blue") ) fig["layout"][f"yaxis{yaxis2_num}"].update( title="Max MAX_ABS_DIFF", titlefont=dict(color="green"), tickfont=dict(color="green"), ) # Save to html fig.write_html(output_filename) fig.write_html(args.output_filename) if __name__ == "__main__": Loading @@ -173,8 +195,54 @@ if __name__ == "__main__": type=str, help="Filename of the generated plot. e.g" ". long_term_regression.html", ) parser.add_argument( "--days", type=int, help="Number of days in history. Default: 30", default=30, ) parser.add_argument( "--measure", type=str, help="Measure for analysis: MLD, MAX_ABS_DIFF, MIN_ODG, MIN_SSNR, default: MLD", default="MLD", ) parser.add_argument( "--include", nargs="+", type=str, help="List of tags to include in testcases", ) parser.add_argument( "--reject", nargs="+", type=str, help="List of tags to reject in testcases", ) parser.add_argument( "--match", type=str, help="Regex pattern for selecting testcases", ) parser.add_argument( "--job-include", nargs="+", type=str, help="List of tags to include in jobs", ) parser.add_argument( "--job-reject", nargs="+", type=str, help="List of tags to reject in jobs", ) parser.add_argument( "--job-match", type=str, help="Regex pattern for selecting jobs", ) args = parser.parse_args() csv_data = read_csv_files(args.root_dir) data = parse_csv_data(csv_data) plot_data(data, args.output_filename) plot_data(data, args)
scripts/detect_regressions.py 0 → 100644 +194 −0 Original line number Diff line number Diff line #!/usr/bin/env python3 import argparse from pathlib import Path import pandas as pd import plotly.graph_objects as go from plotly.subplots import make_subplots def main(args): logs_dir = args.logs_dir output_filename = args.output_filename measure = args.measure days = args.days input_path = Path(logs_dir) logs = [f for f in input_path.iterdir() if f.is_dir()] # Build dict of scores formatdict = {} sha = {} logdict = {} for log in logs: date = log.name logdict[date] = {} formatdict[date] = {} for logfile in log.glob("*.csv"): tmp = logfile.name.split("-") job = "-".join(tmp[3:-4]) sha[date] = tmp[-1].split(".")[0] data = pd.read_csv(logfile, usecols=["testcase", measure, "format"]) logdict[date][job] = {} formatdict[date][job] = {} for testcase, value, format in zip( data["testcase"], data[measure], data["format"] ): formatdict[date][job][testcase] = format logdict[date][job][testcase] = value # Restructure dict csv_rows = [] formats = [] for date, jobs in logdict.items(): for job, testcases in jobs.items(): for testcase, value in testcases.items(): csv_rows.append( (job, testcase, formatdict[date][job][testcase], date, value) ) result = pd.DataFrame( csv_rows, columns=["job", "testcase", "format", "date", "value"] ) result = result.pivot( index=["job", "testcase", "format"], columns="date", values="value" ).reset_index() # Keep only tests for which results exist in any of the days if days == -1: rng = result.columns[3:] # Whole history else: rng = result.columns[-days:] result = result.dropna(subset=rng) result = result.reset_index(drop=True) ratio = result.copy() ratio = ratio.reset_index() dates = result.iloc[:, 3:].columns # Calculate ratios ratio[dates[0]] = 1.0 # Set first ratio to 1.0 for prevdate, currdate in zip(dates[0:-1], dates[1:]): ratio[currdate] = result[currdate] / result[prevdate] values = result.iloc[:, 3:] date = values.columns formats = result["format"].dropna().unique().tolist() plotdata = pd.DataFrame(0.0, index=formats, columns=dates[-(days) : -1]) plottext = pd.DataFrame("", index=formats, columns=dates[-(days) : -1]) all_indices = [] for i in range(days): currdate = dates[-(days - i)] # Make robust for shorter history prevdate = dates[-(days - i + 1)] idx = ratio.groupby("format")[currdate].idxmax() all_indices.extend(idx.tolist()) # Store worst case per format for plotting for f in formats: plotdata.loc[f, currdate] = ratio.iloc[idx[f]][currdate] plottext.loc[f, currdate] = ( f"Job: {result.iloc[idx[f]]['job']}<br>Testcase: {result.iloc[idx[f]]['testcase']} <br>Max {measure} ratio: {ratio.iloc[idx[f]][currdate]:.2f}<br>Date: {currdate}" ) fig = make_subplots( rows=5, cols=2, subplot_titles=[f"{i}" for i in formats], shared_xaxes="columns", ) for i, fmt in enumerate(formats): row = i // 2 + 1 col = i % 2 + 1 fig.add_trace( go.Scatter( x=pd.to_datetime(plotdata.columns), y=plotdata.loc[fmt], mode="lines+markers", name=f"Max {measure}", hovertext=plottext.loc[fmt], hoverinfo="text", showlegend=False, ), row=row, col=col, ) fig.update_layout( title_text=f"Regression detection: Max {measure} ratio", legend=dict(x=1, y=1, orientation="v"), hovermode="x unified", ) fig.update_xaxes(automargin=True) fig.update_yaxes(automargin=True) # Save to html fig.write_html(output_filename) # Write CSV-file if args.csv: output = result.iloc[all_indices].copy() cols = ["job","testcase","format"] cols.extend(date[-days:].tolist()) output = output.loc[:,cols] values = output.iloc[:, 3:] last_date = values.columns[-1] output.insert(3, "min_date", values.idxmin(axis=1)) output.insert(4, "min_sha", output["min_date"].map(sha)) output.insert(5, "curr_value", output[last_date]) output.insert(6, "min_value", values.min(axis=1)) output.insert(7, "diff", output["curr_value"] - output["min_value"]) output.insert(8, "ratio", output["curr_value"] / output["min_value"]) output.loc[output["min_value"] == 0, "ratio"] = ( 1 # Set ratio to 1 for denominator 0 ) output["min_sha"] = ( "'" + output["min_sha"] ) # Add apostrophy to prevent Excel reading this as a number output.sort_values( by=["format", "ratio"], ascending=[True, False], inplace=True ) output.to_csv(args.csv, sep=";", index=False) if __name__ == "__main__": parser = argparse.ArgumentParser(description="logs dir") parser.add_argument( "logs_dir", type=str, help="Logs dir, e.g. logs", ) parser.add_argument( "output_filename", type=str, help="Output html file. e.g mld.html", ) parser.add_argument( "--measure", type=str, help="Measure for summary, one of MLD MIN_SSNR MAX_ABS_DIFF MIN_ODG, (default: MLD)", default="MLD", ) parser.add_argument( "--days", type=int, help="Number of days in history, (default: whole history)", default=-1, ) parser.add_argument( "--csv", type=str, help="CSV output file", ) args = parser.parse_args() main(args)