Loading ci/process_long_term_logs.py +94 −61 Original line number Diff line number Diff line Loading @@ -11,13 +11,13 @@ def read_csv_files(root_dir): csv_data = {} for subdir, dirs, files in os.walk(root_dir): for file in files: if file.endswith('.csv'): if file.endswith(".csv"): file_path = os.path.join(subdir, file) try: df = pd.read_csv(file_path) csv_data[file_path] = df except Exception as e: print(f'Failed to read {file_path}: {e}') print(f"Failed to read {file_path}: {e}") exit(-1) return csv_data Loading @@ -28,11 +28,10 @@ def parse_csv_data(csv_data): cols_to_keep = ["testcase", "format", "MLD", "MAX_ABS_DIFF"] parsed_data = {} for key, df in csv_data.items(): cols = [ col for col in cols_to_keep if col in df.columns] cols = [col for col in cols_to_keep if col in df.columns] date = os.path.basename(os.path.dirname(key)) new_df = df[cols].copy() new_df['date'] = date new_df["date"] = date parsed_data[key] = new_df # concatenate all dataframe in the dictionary Loading @@ -44,73 +43,102 @@ def plot_data(df, output_filename): """plot max values for 'MLD' and 'MAX_ABS_DIFF' data and save to html file.""" # Convert 'date' to datetime df['date'] = pd.to_datetime(df['date'], errors='coerce') df['MLD'] = pd.to_numeric(df['MLD'], errors='coerce') df['MAX_ABS_DIFF'] = pd.to_numeric(df['MAX_ABS_DIFF'], errors='coerce') df["date"] = pd.to_datetime(df["date"], errors="coerce") df["MLD"] = pd.to_numeric(df["MLD"], errors="coerce") df["MAX_ABS_DIFF"] = pd.to_numeric(df["MAX_ABS_DIFF"], errors="coerce") # Drop rows with NaT and NaN clean_df = df.dropna(subset=['date', 'MLD', 'MAX_ABS_DIFF']) clean_df = df.dropna(subset=["date", "MLD", "MAX_ABS_DIFF"]) # Group by 'format' and 'date' to get rows with max 'MLD' per group max_mld = clean_df.groupby(['format', 'date']).apply( lambda x: x.loc[x['MLD'].idxmax()]).reset_index(drop=True) max_mld = ( clean_df.groupby(["format", "date"]) .apply(lambda x: x.loc[x["MLD"].idxmax()]) .reset_index(drop=True) ) # Group by 'format' and 'date' to get rows with max 'MAX_ABS_DIFF' per # group max_diff = clean_df.groupby(['format', 'date']).apply( lambda x: x.loc[x['MAX_ABS_DIFF'].idxmax()]).reset_index(drop=True) max_abs_diff = ( clean_df.groupby(["format", "date"]) .apply(lambda x: x.loc[x["MAX_ABS_DIFF"].idxmax()]) .reset_index(drop=True) ) formats = sorted(clean_df['format'].unique()) formats = sorted(clean_df["format"].unique()) fig = make_subplots( rows=5, cols=2, rows=5, cols=2, specs=[[{"secondary_y": True}] * 2] * 5, subplot_titles=[f'{i}' for i in formats], shared_xaxes='columns' subplot_titles=[f"{i}" for i in formats], shared_xaxes="columns", ) for i, fmt in enumerate(formats): row = i // 2 + 1 col = i % 2 + 1 data_mld = max_mld[max_mld['format'] == fmt].sort_values('date') data_diff = max_diff[max_diff['format'] == fmt].sort_values('date') data_mld = max_mld[max_mld["format"] == fmt].sort_values("date") data_diff = max_abs_diff[max_abs_diff["format"] == fmt].sort_values("date") # Add max 'MLD' to primary y-axis fig.add_trace( go.Scatter( x=data_mld['date'], y=data_mld['MLD'], mode='lines+markers', name=f' {fmt} - Max MLD', hovertext=[f"Testcase: {tc}<br>MAX_ABS_DIFF: {diff}<br>Format:" x=data_mld["date"], y=data_mld["MLD"], mode="lines+markers", name=f" {fmt} - Max MLD", hovertext=[ f"Testcase: {tc}<br>MLD: {mld:.4f}<br>MAX_ABS_DIFF:" f"{abs_diff}<br>Format:" f" {format}<br>Date: {date.date()}" for tc, diff, format, date in zip( data_mld['testcase'], data_mld['MAX_ABS_DIFF'], data_mld['format'], data_mld['date'])], hoverinfo='text+y' for tc, mld, abs_diff, format, date in zip( data_mld["testcase"], data_mld["MLD"], data_mld["MAX_ABS_DIFF"], data_mld["format"], data_mld["date"], ) ], hoverinfo="text", ), row=row, col=col, secondary_y=False row=row, col=col, secondary_y=False, ) # Add max 'MAX_ABS_DIFF' to secondary y-axis fig.add_trace( go.Scatter( x=data_diff['date'], y=data_diff['MAX_ABS_DIFF'], mode='lines+markers', name=f'{fmt} - Max MAX_ABS_DIFF', hovertext=[f"Testcase: {tc}<br>MLD: {mld:.4f}<br>Format:" x=data_diff["date"], y=data_diff["MAX_ABS_DIFF"], mode="lines+markers", name=f"{fmt} - Max MAX_ABS_DIFF", hovertext=[ f"Testcase: {tc}<br>MLD: {mld:.4f}<br>MAX_ABS_DIFF:" f" {abs_diff:.4f}<br>Format:" f" {format}<br>Date: {date.date()}" for tc, mld, format, date in zip( data_diff['testcase'], data_diff['MLD'], data_mld['format'], data_diff['date'])], hoverinfo='text+y' for tc, mld, abs_diff, format, date in zip( data_diff["testcase"], data_diff["MLD"], data_diff["MAX_ABS_DIFF"], data_diff["format"], data_diff["date"], ) ], hoverinfo="text", ), row=row, col=col, secondary_y=True row=row, col=col, secondary_y=True, ) fig.update_layout( title_text='Long-term regression: max MLD and max MAX_ABS_DIFF', legend=dict(x=1, y=1, orientation='v'), hovermode='x unified' title_text="Long-term regression: max MLD and max MAX_ABS_DIFF", legend=dict(x=1, y=1, orientation="v"), hovermode="x unified", ) fig.update_xaxes(automargin=True) Loading @@ -120,26 +148,31 @@ def plot_data(df, output_filename): for i in range(10): yaxis_num = i * 2 + 1 yaxis2_num = yaxis_num + 1 fig['layout'][f'yaxis{yaxis_num}'].update( title='Max MLD', titlefont=dict(color='blue'), tickfont=dict(color='blue')) fig['layout'][f'yaxis{yaxis2_num}'].update( title='Max MAX_ABS_DIFF', titlefont=dict(color='green'), tickfont=dict(color='green')) fig["layout"][f"yaxis{yaxis_num}"].update( title="Max MLD", titlefont=dict(color="blue"), tickfont=dict(color="blue") ) fig["layout"][f"yaxis{yaxis2_num}"].update( title="Max MAX_ABS_DIFF", titlefont=dict(color="green"), tickfont=dict(color="green"), ) # Save to html fig.write_html(output_filename) if __name__ == '__main__': parser = argparse.ArgumentParser( description='Plot long term logs') if __name__ == "__main__": parser = argparse.ArgumentParser(description="Plot long term logs") parser.add_argument( 'root_dir', type=str, help='Root directory containing subdirectories' ' with CSV log files') "root_dir", type=str, help="Root directory containing subdirectories" " with CSV log files", ) parser.add_argument( 'output_filename', type=str, help='Filename of the generated plot. e.g' '. long_term_regression.html') "output_filename", type=str, help="Filename of the generated plot. e.g" ". long_term_regression.html", ) args = parser.parse_args() csv_data = read_csv_files(args.root_dir) Loading Loading
ci/process_long_term_logs.py +94 −61 Original line number Diff line number Diff line Loading @@ -11,13 +11,13 @@ def read_csv_files(root_dir): csv_data = {} for subdir, dirs, files in os.walk(root_dir): for file in files: if file.endswith('.csv'): if file.endswith(".csv"): file_path = os.path.join(subdir, file) try: df = pd.read_csv(file_path) csv_data[file_path] = df except Exception as e: print(f'Failed to read {file_path}: {e}') print(f"Failed to read {file_path}: {e}") exit(-1) return csv_data Loading @@ -28,11 +28,10 @@ def parse_csv_data(csv_data): cols_to_keep = ["testcase", "format", "MLD", "MAX_ABS_DIFF"] parsed_data = {} for key, df in csv_data.items(): cols = [ col for col in cols_to_keep if col in df.columns] cols = [col for col in cols_to_keep if col in df.columns] date = os.path.basename(os.path.dirname(key)) new_df = df[cols].copy() new_df['date'] = date new_df["date"] = date parsed_data[key] = new_df # concatenate all dataframe in the dictionary Loading @@ -44,73 +43,102 @@ def plot_data(df, output_filename): """plot max values for 'MLD' and 'MAX_ABS_DIFF' data and save to html file.""" # Convert 'date' to datetime df['date'] = pd.to_datetime(df['date'], errors='coerce') df['MLD'] = pd.to_numeric(df['MLD'], errors='coerce') df['MAX_ABS_DIFF'] = pd.to_numeric(df['MAX_ABS_DIFF'], errors='coerce') df["date"] = pd.to_datetime(df["date"], errors="coerce") df["MLD"] = pd.to_numeric(df["MLD"], errors="coerce") df["MAX_ABS_DIFF"] = pd.to_numeric(df["MAX_ABS_DIFF"], errors="coerce") # Drop rows with NaT and NaN clean_df = df.dropna(subset=['date', 'MLD', 'MAX_ABS_DIFF']) clean_df = df.dropna(subset=["date", "MLD", "MAX_ABS_DIFF"]) # Group by 'format' and 'date' to get rows with max 'MLD' per group max_mld = clean_df.groupby(['format', 'date']).apply( lambda x: x.loc[x['MLD'].idxmax()]).reset_index(drop=True) max_mld = ( clean_df.groupby(["format", "date"]) .apply(lambda x: x.loc[x["MLD"].idxmax()]) .reset_index(drop=True) ) # Group by 'format' and 'date' to get rows with max 'MAX_ABS_DIFF' per # group max_diff = clean_df.groupby(['format', 'date']).apply( lambda x: x.loc[x['MAX_ABS_DIFF'].idxmax()]).reset_index(drop=True) max_abs_diff = ( clean_df.groupby(["format", "date"]) .apply(lambda x: x.loc[x["MAX_ABS_DIFF"].idxmax()]) .reset_index(drop=True) ) formats = sorted(clean_df['format'].unique()) formats = sorted(clean_df["format"].unique()) fig = make_subplots( rows=5, cols=2, rows=5, cols=2, specs=[[{"secondary_y": True}] * 2] * 5, subplot_titles=[f'{i}' for i in formats], shared_xaxes='columns' subplot_titles=[f"{i}" for i in formats], shared_xaxes="columns", ) for i, fmt in enumerate(formats): row = i // 2 + 1 col = i % 2 + 1 data_mld = max_mld[max_mld['format'] == fmt].sort_values('date') data_diff = max_diff[max_diff['format'] == fmt].sort_values('date') data_mld = max_mld[max_mld["format"] == fmt].sort_values("date") data_diff = max_abs_diff[max_abs_diff["format"] == fmt].sort_values("date") # Add max 'MLD' to primary y-axis fig.add_trace( go.Scatter( x=data_mld['date'], y=data_mld['MLD'], mode='lines+markers', name=f' {fmt} - Max MLD', hovertext=[f"Testcase: {tc}<br>MAX_ABS_DIFF: {diff}<br>Format:" x=data_mld["date"], y=data_mld["MLD"], mode="lines+markers", name=f" {fmt} - Max MLD", hovertext=[ f"Testcase: {tc}<br>MLD: {mld:.4f}<br>MAX_ABS_DIFF:" f"{abs_diff}<br>Format:" f" {format}<br>Date: {date.date()}" for tc, diff, format, date in zip( data_mld['testcase'], data_mld['MAX_ABS_DIFF'], data_mld['format'], data_mld['date'])], hoverinfo='text+y' for tc, mld, abs_diff, format, date in zip( data_mld["testcase"], data_mld["MLD"], data_mld["MAX_ABS_DIFF"], data_mld["format"], data_mld["date"], ) ], hoverinfo="text", ), row=row, col=col, secondary_y=False row=row, col=col, secondary_y=False, ) # Add max 'MAX_ABS_DIFF' to secondary y-axis fig.add_trace( go.Scatter( x=data_diff['date'], y=data_diff['MAX_ABS_DIFF'], mode='lines+markers', name=f'{fmt} - Max MAX_ABS_DIFF', hovertext=[f"Testcase: {tc}<br>MLD: {mld:.4f}<br>Format:" x=data_diff["date"], y=data_diff["MAX_ABS_DIFF"], mode="lines+markers", name=f"{fmt} - Max MAX_ABS_DIFF", hovertext=[ f"Testcase: {tc}<br>MLD: {mld:.4f}<br>MAX_ABS_DIFF:" f" {abs_diff:.4f}<br>Format:" f" {format}<br>Date: {date.date()}" for tc, mld, format, date in zip( data_diff['testcase'], data_diff['MLD'], data_mld['format'], data_diff['date'])], hoverinfo='text+y' for tc, mld, abs_diff, format, date in zip( data_diff["testcase"], data_diff["MLD"], data_diff["MAX_ABS_DIFF"], data_diff["format"], data_diff["date"], ) ], hoverinfo="text", ), row=row, col=col, secondary_y=True row=row, col=col, secondary_y=True, ) fig.update_layout( title_text='Long-term regression: max MLD and max MAX_ABS_DIFF', legend=dict(x=1, y=1, orientation='v'), hovermode='x unified' title_text="Long-term regression: max MLD and max MAX_ABS_DIFF", legend=dict(x=1, y=1, orientation="v"), hovermode="x unified", ) fig.update_xaxes(automargin=True) Loading @@ -120,26 +148,31 @@ def plot_data(df, output_filename): for i in range(10): yaxis_num = i * 2 + 1 yaxis2_num = yaxis_num + 1 fig['layout'][f'yaxis{yaxis_num}'].update( title='Max MLD', titlefont=dict(color='blue'), tickfont=dict(color='blue')) fig['layout'][f'yaxis{yaxis2_num}'].update( title='Max MAX_ABS_DIFF', titlefont=dict(color='green'), tickfont=dict(color='green')) fig["layout"][f"yaxis{yaxis_num}"].update( title="Max MLD", titlefont=dict(color="blue"), tickfont=dict(color="blue") ) fig["layout"][f"yaxis{yaxis2_num}"].update( title="Max MAX_ABS_DIFF", titlefont=dict(color="green"), tickfont=dict(color="green"), ) # Save to html fig.write_html(output_filename) if __name__ == '__main__': parser = argparse.ArgumentParser( description='Plot long term logs') if __name__ == "__main__": parser = argparse.ArgumentParser(description="Plot long term logs") parser.add_argument( 'root_dir', type=str, help='Root directory containing subdirectories' ' with CSV log files') "root_dir", type=str, help="Root directory containing subdirectories" " with CSV log files", ) parser.add_argument( 'output_filename', type=str, help='Filename of the generated plot. e.g' '. long_term_regression.html') "output_filename", type=str, help="Filename of the generated plot. e.g" ". long_term_regression.html", ) args = parser.parse_args() csv_data = read_csv_files(args.root_dir) Loading