diff --git a/ci/process_long_term_logs.py b/ci/process_long_term_logs.py
new file mode 100644
index 0000000000000000000000000000000000000000..baabc7d93a1c28bf0804b9a939a6968810675fb7
--- /dev/null
+++ b/ci/process_long_term_logs.py
@@ -0,0 +1,180 @@
+import os
+import pandas as pd
+import argparse
+import plotly.express as px
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+
+
+def read_csv_files(root_dir):
+ """Read csv files as dictionary of panda dataframes."""
+ csv_data = {}
+ for subdir, dirs, files in os.walk(root_dir):
+ for file in files:
+ if file.endswith(".csv"):
+ file_path = os.path.join(subdir, file)
+ try:
+ df = pd.read_csv(file_path)
+ csv_data[file_path] = df
+ except Exception as e:
+ print(f"Failed to read {file_path}: {e}")
+ exit(-1)
+ return csv_data
+
+
+def parse_csv_data(csv_data):
+ """keep 'testcase', 'format', 'MLD', 'MAX_ABS_DIFF' and add
+ 'date' column."""
+ cols_to_keep = ["testcase", "format", "MLD", "MAX_ABS_DIFF"]
+ parsed_data = {}
+ for key, df in csv_data.items():
+ cols = [col for col in cols_to_keep if col in df.columns]
+ date = os.path.basename(os.path.dirname(key))
+ new_df = df[cols].copy()
+ new_df["date"] = date
+ parsed_data[key] = new_df
+
+ # concatenate all dataframe in the dictionary
+ concat_df = pd.concat(parsed_data.values(), ignore_index=True)
+ return concat_df
+
+
+def plot_data(df, output_filename):
+ """plot max values for 'MLD' and 'MAX_ABS_DIFF' data and save
+ to html file."""
+ # Convert 'date' to datetime
+ df["date"] = pd.to_datetime(df["date"], errors="coerce")
+ df["MLD"] = pd.to_numeric(df["MLD"], errors="coerce")
+ df["MAX_ABS_DIFF"] = pd.to_numeric(df["MAX_ABS_DIFF"], errors="coerce")
+
+ # Drop rows with NaT and NaN
+ clean_df = df.dropna(subset=["date", "MLD", "MAX_ABS_DIFF"])
+
+ # Group by 'format' and 'date' to get rows with max 'MLD' per group
+ max_mld = (
+ clean_df.groupby(["format", "date"])
+ .apply(lambda x: x.loc[x["MLD"].idxmax()])
+ .reset_index(drop=True)
+ )
+
+ # Group by 'format' and 'date' to get rows with max 'MAX_ABS_DIFF' per
+ # group
+ max_abs_diff = (
+ clean_df.groupby(["format", "date"])
+ .apply(lambda x: x.loc[x["MAX_ABS_DIFF"].idxmax()])
+ .reset_index(drop=True)
+ )
+
+ formats = sorted(clean_df["format"].unique())
+
+ fig = make_subplots(
+ rows=5,
+ cols=2,
+ specs=[[{"secondary_y": True}] * 2] * 5,
+ subplot_titles=[f"{i}" for i in formats],
+ shared_xaxes="columns",
+ )
+
+ for i, fmt in enumerate(formats):
+ row = i // 2 + 1
+ col = i % 2 + 1
+
+ data_mld = max_mld[max_mld["format"] == fmt].sort_values("date")
+ data_diff = max_abs_diff[max_abs_diff["format"]
+ == fmt].sort_values("date")
+
+ # Add max 'MLD' to primary y-axis
+ fig.add_trace(
+ go.Scatter(
+ x=data_mld["date"],
+ y=data_mld["MLD"],
+ mode="lines+markers",
+ name=f" {fmt} - Max MLD",
+ hovertext=[
+ f"Testcase: {tc}
MLD: {mld:.4f}
MAX_ABS_DIFF:"
+ f"{abs_diff}
Format:"
+ f" {format}
Date: {date.date()}"
+ for tc, mld, abs_diff, format, date in zip(
+ data_mld["testcase"],
+ data_mld["MLD"],
+ data_mld["MAX_ABS_DIFF"],
+ data_mld["format"],
+ data_mld["date"],
+ )
+ ],
+ hoverinfo="text",
+ ),
+ row=row,
+ col=col,
+ secondary_y=False,
+ )
+
+ # Add max 'MAX_ABS_DIFF' to secondary y-axis
+ fig.add_trace(
+ go.Scatter(
+ x=data_diff["date"],
+ y=data_diff["MAX_ABS_DIFF"],
+ mode="lines+markers",
+ name=f"{fmt} - Max MAX_ABS_DIFF",
+ hovertext=[
+ f"Testcase: {tc}
MLD: {mld:.4f}
MAX_ABS_DIFF:"
+ f" {abs_diff:.4f}
Format:"
+ f" {format}
Date: {date.date()}"
+ for tc, mld, abs_diff, format, date in zip(
+ data_diff["testcase"],
+ data_diff["MLD"],
+ data_diff["MAX_ABS_DIFF"],
+ data_diff["format"],
+ data_diff["date"],
+ )
+ ],
+ hoverinfo="text",
+ ),
+ row=row,
+ col=col,
+ secondary_y=True,
+ )
+
+ fig.update_layout(
+ title_text="Long-term regression: max MLD and max MAX_ABS_DIFF",
+ legend=dict(x=1, y=1, orientation="v"),
+ hovermode="x unified",
+ )
+
+ fig.update_xaxes(automargin=True)
+ fig.update_yaxes(automargin=True)
+
+ # Update y-axes titles per subplot
+ for i in range(10):
+ yaxis_num = i * 2 + 1
+ yaxis2_num = yaxis_num + 1
+ fig["layout"][f"yaxis{yaxis_num}"].update(
+ title="Max MLD", titlefont=dict(color="blue"), tickfont=dict(color="blue")
+ )
+ fig["layout"][f"yaxis{yaxis2_num}"].update(
+ title="Max MAX_ABS_DIFF",
+ titlefont=dict(color="green"),
+ tickfont=dict(color="green"),
+ )
+
+ # Save to html
+ fig.write_html(output_filename)
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description="Plot long term logs")
+ parser.add_argument(
+ "root_dir",
+ type=str,
+ help="Root directory containing subdirectories" " with CSV log files",
+ )
+ parser.add_argument(
+ "output_filename",
+ type=str,
+ help="Filename of the generated plot. e.g" ". long_term_regression.html",
+ )
+ args = parser.parse_args()
+
+ csv_data = read_csv_files(args.root_dir)
+ data = parse_csv_data(csv_data)
+ plot_data(data, args.output_filename)