Loading ci/process_long_term_logs.py +37 −53 Original line number Diff line number Diff line Loading @@ -39,7 +39,7 @@ def parse_csv_data(csv_data): return concat_df def plot_data(df, output_filename): def plot_data(df, output_filename, days): """plot max values for 'MLD' and 'MAX_ABS_DIFF' data and save to html file.""" # Convert 'date' to datetime Loading @@ -47,30 +47,25 @@ def plot_data(df, output_filename): df["MLD"] = pd.to_numeric(df["MLD"], errors="coerce") df["MAX_ABS_DIFF"] = pd.to_numeric(df["MAX_ABS_DIFF"], errors="coerce") # Filter out rows older than "days" cutoff = df["date"].max() - pd.Timedelta(days=days) df = df[df["date"] > cutoff].reset_index(drop=True) # Drop rows with NaT and NaN clean_df = df.dropna(subset=["date", "MLD", "MAX_ABS_DIFF"]) df = df.dropna(subset=["date", "MLD", "MAX_ABS_DIFF"]) # Group by 'format' and 'date' to get rows with max 'MLD' per group max_mld = ( clean_df.groupby(["format", "date"]) .apply(lambda x: x.loc[x["MLD"].idxmax()]) .reset_index(drop=True) ) # Group by 'format' and 'date' to get rows with max 'MAX_ABS_DIFF' per # group max_abs_diff = ( clean_df.groupby(["format", "date"]) .apply(lambda x: x.loc[x["MAX_ABS_DIFF"].idxmax()]) .reset_index(drop=True) ) idx = df.groupby(['format', 'date'])['MLD'].idxmax() max = df.loc[idx].reset_index(drop=True) idx = df.groupby(['format', 'date'])['MLD'].idxmin() min = df.loc[idx].reset_index(drop=True) mean = df.groupby(['format', 'date'])['MLD'].mean().to_frame('mean').reset_index() formats = sorted(clean_df["format"].unique()) formats = sorted(df["format"].unique()) fig = make_subplots( rows=5, cols=2, specs=[[{"secondary_y": True}] * 2] * 5, subplot_titles=[f"{i}" for i in formats], shared_xaxes="columns", ) Loading @@ -79,9 +74,7 @@ def plot_data(df, output_filename): row = i // 2 + 1 col = i % 2 + 1 data_mld = max_mld[max_mld["format"] == fmt].sort_values("date") data_diff = max_abs_diff[max_abs_diff["format"] == fmt].sort_values("date") data_mld = max[max["format"] == fmt].sort_values("date") # Add max 'MLD' to primary y-axis fig.add_trace( Loading @@ -89,54 +82,51 @@ def plot_data(df, output_filename): x=data_mld["date"], y=data_mld["MLD"], mode="lines+markers", name=f" {fmt} - Max MLD", name=f"Max MLD", hovertext=[ f"Testcase: {tc}<br>MLD: {mld:.4f}<br>MAX_ABS_DIFF:" f"{abs_diff}<br>Format:" f"Testcase: {tc}<br>MLD: {mld:.4f}" f" {format}<br>Date: {date.date()}" for tc, mld, abs_diff, format, date in zip( for tc, mld, format, date in zip( data_mld["testcase"], data_mld["MLD"], data_mld["MAX_ABS_DIFF"], data_mld["format"], data_mld["date"], ) ], hoverinfo="text", ), row=row, col=col, secondary_y=False, ) # Add max 'MAX_ABS_DIFF' to secondary y-axis data_mld = mean[mean["format"] == fmt].sort_values("date") # Add mean 'MLD' to primary y-axis fig.add_trace( go.Scatter( x=data_diff["date"], y=data_diff["MAX_ABS_DIFF"], x=data_mld["date"], y=data_mld["mean"], mode="lines+markers", name=f"{fmt} - Max MAX_ABS_DIFF", name=f"Mean MLD", hovertext=[ f"Testcase: {tc}<br>MLD: {mld:.4f}<br>MAX_ABS_DIFF:" f" {abs_diff:.4f}<br>Format:" f"Mean MLD: {mld:.4f}" f" {format}<br>Date: {date.date()}" for tc, mld, abs_diff, format, date in zip( data_diff["testcase"], data_diff["MLD"], data_diff["MAX_ABS_DIFF"], data_diff["format"], data_diff["date"], for mld, format, date in zip( data_mld["mean"], data_mld["format"], data_mld["date"], ) ], hoverinfo="text", ), row=row, col=col, secondary_y=True, ) fig.update_layout( title_text="Long-term regression: max MLD and max MAX_ABS_DIFF", title_text="History: MLD", legend=dict(x=1, y=1, orientation="v"), hovermode="x unified", ) Loading @@ -144,18 +134,6 @@ def plot_data(df, output_filename): fig.update_xaxes(automargin=True) fig.update_yaxes(automargin=True) # Update y-axes titles per subplot for i in range(10): yaxis_num = i * 2 + 1 yaxis2_num = yaxis_num + 1 fig["layout"][f"yaxis{yaxis_num}"].update( title="Max MLD", titlefont=dict(color="blue"), tickfont=dict(color="blue") ) fig["layout"][f"yaxis{yaxis2_num}"].update( title="Max MAX_ABS_DIFF", titlefont=dict(color="green"), tickfont=dict(color="green"), ) # Save to html fig.write_html(output_filename) Loading @@ -173,8 +151,14 @@ if __name__ == "__main__": type=str, help="Filename of the generated plot. e.g" ". long_term_regression.html", ) parser.add_argument( "--days", type=int, help="Number of days in history. Default: 30", default=30, ) args = parser.parse_args() csv_data = read_csv_files(args.root_dir) data = parse_csv_data(csv_data) plot_data(data, args.output_filename) plot_data(data, args.output_filename, args.days) Loading
ci/process_long_term_logs.py +37 −53 Original line number Diff line number Diff line Loading @@ -39,7 +39,7 @@ def parse_csv_data(csv_data): return concat_df def plot_data(df, output_filename): def plot_data(df, output_filename, days): """plot max values for 'MLD' and 'MAX_ABS_DIFF' data and save to html file.""" # Convert 'date' to datetime Loading @@ -47,30 +47,25 @@ def plot_data(df, output_filename): df["MLD"] = pd.to_numeric(df["MLD"], errors="coerce") df["MAX_ABS_DIFF"] = pd.to_numeric(df["MAX_ABS_DIFF"], errors="coerce") # Filter out rows older than "days" cutoff = df["date"].max() - pd.Timedelta(days=days) df = df[df["date"] > cutoff].reset_index(drop=True) # Drop rows with NaT and NaN clean_df = df.dropna(subset=["date", "MLD", "MAX_ABS_DIFF"]) df = df.dropna(subset=["date", "MLD", "MAX_ABS_DIFF"]) # Group by 'format' and 'date' to get rows with max 'MLD' per group max_mld = ( clean_df.groupby(["format", "date"]) .apply(lambda x: x.loc[x["MLD"].idxmax()]) .reset_index(drop=True) ) # Group by 'format' and 'date' to get rows with max 'MAX_ABS_DIFF' per # group max_abs_diff = ( clean_df.groupby(["format", "date"]) .apply(lambda x: x.loc[x["MAX_ABS_DIFF"].idxmax()]) .reset_index(drop=True) ) idx = df.groupby(['format', 'date'])['MLD'].idxmax() max = df.loc[idx].reset_index(drop=True) idx = df.groupby(['format', 'date'])['MLD'].idxmin() min = df.loc[idx].reset_index(drop=True) mean = df.groupby(['format', 'date'])['MLD'].mean().to_frame('mean').reset_index() formats = sorted(clean_df["format"].unique()) formats = sorted(df["format"].unique()) fig = make_subplots( rows=5, cols=2, specs=[[{"secondary_y": True}] * 2] * 5, subplot_titles=[f"{i}" for i in formats], shared_xaxes="columns", ) Loading @@ -79,9 +74,7 @@ def plot_data(df, output_filename): row = i // 2 + 1 col = i % 2 + 1 data_mld = max_mld[max_mld["format"] == fmt].sort_values("date") data_diff = max_abs_diff[max_abs_diff["format"] == fmt].sort_values("date") data_mld = max[max["format"] == fmt].sort_values("date") # Add max 'MLD' to primary y-axis fig.add_trace( Loading @@ -89,54 +82,51 @@ def plot_data(df, output_filename): x=data_mld["date"], y=data_mld["MLD"], mode="lines+markers", name=f" {fmt} - Max MLD", name=f"Max MLD", hovertext=[ f"Testcase: {tc}<br>MLD: {mld:.4f}<br>MAX_ABS_DIFF:" f"{abs_diff}<br>Format:" f"Testcase: {tc}<br>MLD: {mld:.4f}" f" {format}<br>Date: {date.date()}" for tc, mld, abs_diff, format, date in zip( for tc, mld, format, date in zip( data_mld["testcase"], data_mld["MLD"], data_mld["MAX_ABS_DIFF"], data_mld["format"], data_mld["date"], ) ], hoverinfo="text", ), row=row, col=col, secondary_y=False, ) # Add max 'MAX_ABS_DIFF' to secondary y-axis data_mld = mean[mean["format"] == fmt].sort_values("date") # Add mean 'MLD' to primary y-axis fig.add_trace( go.Scatter( x=data_diff["date"], y=data_diff["MAX_ABS_DIFF"], x=data_mld["date"], y=data_mld["mean"], mode="lines+markers", name=f"{fmt} - Max MAX_ABS_DIFF", name=f"Mean MLD", hovertext=[ f"Testcase: {tc}<br>MLD: {mld:.4f}<br>MAX_ABS_DIFF:" f" {abs_diff:.4f}<br>Format:" f"Mean MLD: {mld:.4f}" f" {format}<br>Date: {date.date()}" for tc, mld, abs_diff, format, date in zip( data_diff["testcase"], data_diff["MLD"], data_diff["MAX_ABS_DIFF"], data_diff["format"], data_diff["date"], for mld, format, date in zip( data_mld["mean"], data_mld["format"], data_mld["date"], ) ], hoverinfo="text", ), row=row, col=col, secondary_y=True, ) fig.update_layout( title_text="Long-term regression: max MLD and max MAX_ABS_DIFF", title_text="History: MLD", legend=dict(x=1, y=1, orientation="v"), hovermode="x unified", ) Loading @@ -144,18 +134,6 @@ def plot_data(df, output_filename): fig.update_xaxes(automargin=True) fig.update_yaxes(automargin=True) # Update y-axes titles per subplot for i in range(10): yaxis_num = i * 2 + 1 yaxis2_num = yaxis_num + 1 fig["layout"][f"yaxis{yaxis_num}"].update( title="Max MLD", titlefont=dict(color="blue"), tickfont=dict(color="blue") ) fig["layout"][f"yaxis{yaxis2_num}"].update( title="Max MAX_ABS_DIFF", titlefont=dict(color="green"), tickfont=dict(color="green"), ) # Save to html fig.write_html(output_filename) Loading @@ -173,8 +151,14 @@ if __name__ == "__main__": type=str, help="Filename of the generated plot. e.g" ". long_term_regression.html", ) parser.add_argument( "--days", type=int, help="Number of days in history. Default: 30", default=30, ) args = parser.parse_args() csv_data = read_csv_files(args.root_dir) data = parse_csv_data(csv_data) plot_data(data, args.output_filename) plot_data(data, args.output_filename, args.days)