Loading scripts/create_histograms.py +23 −3 Original line number Diff line number Diff line Loading @@ -37,17 +37,22 @@ def create_histograms( display_only: bool, bins_for_measures=BINS_FOR_MEASURES, prefix="", write_out_histograms=False, ): formats = df["format"].unique() categories = df["category"].unique() if not display_only: if not display_only or write_out_histograms: output_folder.mkdir(exist_ok=True, parents=True) for measure in measures: measure_in_df = prefix + measure bins = bins_for_measures.get(measure, get_bins_for_diff(df[measure_in_df])) x = [f"{x}" for x in bins] + ["", "ERROR"] df_hist = pd.DataFrame(columns=["format", "category"] + x) hist_row_count = 0 for fmt in formats: fig, ax = plt.subplots() ax.xaxis.set_major_formatter("{x:.1f}") Loading @@ -57,9 +62,9 @@ def create_histograms( df_slice = df[data_mask] error_mask = df_slice["result"] == "ERROR" n_errors = np.sum(error_mask) df_hist = df_slice[np.logical_not(error_mask)] df_slice = df_slice[np.logical_not(error_mask)] counts, _ = np.histogram(df_hist[measure_in_df], bins) counts, _ = np.histogram(df_slice[measure_in_df], bins) data = np.concatenate([counts, [0], [n_errors], [0]]) ax.bar( Loading @@ -74,6 +79,10 @@ def create_histograms( ) bottom += data hist_row = [fmt, cat] + list(counts) + [0] + [0, n_errors] df_hist.loc[hist_row_count] = hist_row hist_row_count += 1 # Histogram layout ax.set_title(fmt) ax.legend(loc="best") Loading @@ -94,6 +103,11 @@ def create_histograms( plt.savefig(image_path) plt.close(fig) if write_out_histograms: df_hist.to_csv( output_folder.joinpath(f"histogram_{measure}.csv"), index=False ) if display_only: plt.show() Loading Loading @@ -133,6 +147,11 @@ Use this for visualising diff scores.""", default="", help="Common suffix to use when collecting measures from the input csv file", ) parser.add_argument( "--write-out-histograms", action="store_true", help="Write out the histogram values to csv", ) args = parser.parse_args() df = pd.read_csv(args.csv_report) Loading @@ -152,4 +171,5 @@ Use this for visualising diff scores.""", args.display_only, bins_for_measures, args.prefix, args.write_out_histograms, ) Loading
scripts/create_histograms.py +23 −3 Original line number Diff line number Diff line Loading @@ -37,17 +37,22 @@ def create_histograms( display_only: bool, bins_for_measures=BINS_FOR_MEASURES, prefix="", write_out_histograms=False, ): formats = df["format"].unique() categories = df["category"].unique() if not display_only: if not display_only or write_out_histograms: output_folder.mkdir(exist_ok=True, parents=True) for measure in measures: measure_in_df = prefix + measure bins = bins_for_measures.get(measure, get_bins_for_diff(df[measure_in_df])) x = [f"{x}" for x in bins] + ["", "ERROR"] df_hist = pd.DataFrame(columns=["format", "category"] + x) hist_row_count = 0 for fmt in formats: fig, ax = plt.subplots() ax.xaxis.set_major_formatter("{x:.1f}") Loading @@ -57,9 +62,9 @@ def create_histograms( df_slice = df[data_mask] error_mask = df_slice["result"] == "ERROR" n_errors = np.sum(error_mask) df_hist = df_slice[np.logical_not(error_mask)] df_slice = df_slice[np.logical_not(error_mask)] counts, _ = np.histogram(df_hist[measure_in_df], bins) counts, _ = np.histogram(df_slice[measure_in_df], bins) data = np.concatenate([counts, [0], [n_errors], [0]]) ax.bar( Loading @@ -74,6 +79,10 @@ def create_histograms( ) bottom += data hist_row = [fmt, cat] + list(counts) + [0] + [0, n_errors] df_hist.loc[hist_row_count] = hist_row hist_row_count += 1 # Histogram layout ax.set_title(fmt) ax.legend(loc="best") Loading @@ -94,6 +103,11 @@ def create_histograms( plt.savefig(image_path) plt.close(fig) if write_out_histograms: df_hist.to_csv( output_folder.joinpath(f"histogram_{measure}.csv"), index=False ) if display_only: plt.show() Loading Loading @@ -133,6 +147,11 @@ Use this for visualising diff scores.""", default="", help="Common suffix to use when collecting measures from the input csv file", ) parser.add_argument( "--write-out-histograms", action="store_true", help="Write out the histogram values to csv", ) args = parser.parse_args() df = pd.read_csv(args.csv_report) Loading @@ -152,4 +171,5 @@ Use this for visualising diff scores.""", args.display_only, bins_for_measures, args.prefix, args.write_out_histograms, )