diff --git a/ci/process_long_term_logs.py b/ci/process_long_term_logs.py new file mode 100644 index 0000000000000000000000000000000000000000..baabc7d93a1c28bf0804b9a939a6968810675fb7 --- /dev/null +++ b/ci/process_long_term_logs.py @@ -0,0 +1,180 @@ +import os +import pandas as pd +import argparse +import plotly.express as px +import plotly.graph_objects as go +from plotly.subplots import make_subplots + + +def read_csv_files(root_dir): + """Read csv files as dictionary of panda dataframes.""" + csv_data = {} + for subdir, dirs, files in os.walk(root_dir): + for file in files: + if file.endswith(".csv"): + file_path = os.path.join(subdir, file) + try: + df = pd.read_csv(file_path) + csv_data[file_path] = df + except Exception as e: + print(f"Failed to read {file_path}: {e}") + exit(-1) + return csv_data + + +def parse_csv_data(csv_data): + """keep 'testcase', 'format', 'MLD', 'MAX_ABS_DIFF' and add + 'date' column.""" + cols_to_keep = ["testcase", "format", "MLD", "MAX_ABS_DIFF"] + parsed_data = {} + for key, df in csv_data.items(): + cols = [col for col in cols_to_keep if col in df.columns] + date = os.path.basename(os.path.dirname(key)) + new_df = df[cols].copy() + new_df["date"] = date + parsed_data[key] = new_df + + # concatenate all dataframe in the dictionary + concat_df = pd.concat(parsed_data.values(), ignore_index=True) + return concat_df + + +def plot_data(df, output_filename): + """plot max values for 'MLD' and 'MAX_ABS_DIFF' data and save + to html file.""" + # Convert 'date' to datetime + df["date"] = pd.to_datetime(df["date"], errors="coerce") + df["MLD"] = pd.to_numeric(df["MLD"], errors="coerce") + df["MAX_ABS_DIFF"] = pd.to_numeric(df["MAX_ABS_DIFF"], errors="coerce") + + # Drop rows with NaT and NaN + clean_df = df.dropna(subset=["date", "MLD", "MAX_ABS_DIFF"]) + + # Group by 'format' and 'date' to get rows with max 'MLD' per group + max_mld = ( + clean_df.groupby(["format", "date"]) + .apply(lambda x: x.loc[x["MLD"].idxmax()]) + .reset_index(drop=True) + ) + + # Group by 'format' and 'date' to get rows with max 'MAX_ABS_DIFF' per + # group + max_abs_diff = ( + clean_df.groupby(["format", "date"]) + .apply(lambda x: x.loc[x["MAX_ABS_DIFF"].idxmax()]) + .reset_index(drop=True) + ) + + formats = sorted(clean_df["format"].unique()) + + fig = make_subplots( + rows=5, + cols=2, + specs=[[{"secondary_y": True}] * 2] * 5, + subplot_titles=[f"{i}" for i in formats], + shared_xaxes="columns", + ) + + for i, fmt in enumerate(formats): + row = i // 2 + 1 + col = i % 2 + 1 + + data_mld = max_mld[max_mld["format"] == fmt].sort_values("date") + data_diff = max_abs_diff[max_abs_diff["format"] + == fmt].sort_values("date") + + # Add max 'MLD' to primary y-axis + fig.add_trace( + go.Scatter( + x=data_mld["date"], + y=data_mld["MLD"], + mode="lines+markers", + name=f" {fmt} - Max MLD", + hovertext=[ + f"Testcase: {tc}
MLD: {mld:.4f}
MAX_ABS_DIFF:" + f"{abs_diff}
Format:" + f" {format}
Date: {date.date()}" + for tc, mld, abs_diff, format, date in zip( + data_mld["testcase"], + data_mld["MLD"], + data_mld["MAX_ABS_DIFF"], + data_mld["format"], + data_mld["date"], + ) + ], + hoverinfo="text", + ), + row=row, + col=col, + secondary_y=False, + ) + + # Add max 'MAX_ABS_DIFF' to secondary y-axis + fig.add_trace( + go.Scatter( + x=data_diff["date"], + y=data_diff["MAX_ABS_DIFF"], + mode="lines+markers", + name=f"{fmt} - Max MAX_ABS_DIFF", + hovertext=[ + f"Testcase: {tc}
MLD: {mld:.4f}
MAX_ABS_DIFF:" + f" {abs_diff:.4f}
Format:" + f" {format}
Date: {date.date()}" + for tc, mld, abs_diff, format, date in zip( + data_diff["testcase"], + data_diff["MLD"], + data_diff["MAX_ABS_DIFF"], + data_diff["format"], + data_diff["date"], + ) + ], + hoverinfo="text", + ), + row=row, + col=col, + secondary_y=True, + ) + + fig.update_layout( + title_text="Long-term regression: max MLD and max MAX_ABS_DIFF", + legend=dict(x=1, y=1, orientation="v"), + hovermode="x unified", + ) + + fig.update_xaxes(automargin=True) + fig.update_yaxes(automargin=True) + + # Update y-axes titles per subplot + for i in range(10): + yaxis_num = i * 2 + 1 + yaxis2_num = yaxis_num + 1 + fig["layout"][f"yaxis{yaxis_num}"].update( + title="Max MLD", titlefont=dict(color="blue"), tickfont=dict(color="blue") + ) + fig["layout"][f"yaxis{yaxis2_num}"].update( + title="Max MAX_ABS_DIFF", + titlefont=dict(color="green"), + tickfont=dict(color="green"), + ) + + # Save to html + fig.write_html(output_filename) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Plot long term logs") + parser.add_argument( + "root_dir", + type=str, + help="Root directory containing subdirectories" " with CSV log files", + ) + parser.add_argument( + "output_filename", + type=str, + help="Filename of the generated plot. e.g" ". long_term_regression.html", + ) + args = parser.parse_args() + + csv_data = read_csv_files(args.root_dir) + data = parse_csv_data(csv_data) + plot_data(data, args.output_filename)