Commit 4a27ea2f authored by Jan Kiene's avatar Jan Kiene
Browse files

Merge branch 'ci/split-output-files-b4-comparison' into kiene/tmp-branch-for-ltv-split-testing

parents ae0041cc e69f9945
Loading
Loading
Loading
Loading
+38 −3
Original line number Diff line number Diff line
@@ -3,6 +3,7 @@
import argparse
import math
import pathlib
import sys
import pandas as pd
import numpy as np
from typing import List
@@ -25,6 +26,19 @@ BINS_FOR_MEASURES = {

DEFAULT_MEASURES = ["MAX_ABS_DIFF", "MLD", "MIN_SSNR", "MIN_ODG"]

HERE = pathlib.Path(__file__).parent
ROOT_DIR = HERE.parent
sys.path.append(str(ROOT_DIR))
from tests.constants import CAT_NORMAL, CAT_BITRATE_SWITCHING, CAT_DTX, CAT_JBM, CAT_PLC

COLORS_FOR_CATEGORIES = {
    CAT_DTX: "tab:blue",
    CAT_PLC: "tab:orange",
    CAT_NORMAL: "tab:green",
    CAT_JBM: "tab:red",
    CAT_BITRATE_SWITCHING: "tab:purple",
}


def get_bins_for_diff(data: pd.Series):
    return np.linspace(data.min(), data.max(), num=10)
@@ -37,17 +51,22 @@ def create_histograms(
    display_only: bool,
    bins_for_measures=BINS_FOR_MEASURES,
    prefix="",
    write_out_histograms=False,
):
    formats = df["format"].unique()
    categories = df["category"].unique()

    if not display_only:
    if not display_only or write_out_histograms:
        output_folder.mkdir(exist_ok=True, parents=True)

    for measure in measures:
        measure_in_df = prefix + measure
        bins = bins_for_measures.get(measure, get_bins_for_diff(df[measure_in_df]))
        x = [f"{x}" for x in bins] + ["", "ERROR"]

        df_hist = pd.DataFrame(columns=["format", "category"] + x)
        hist_row_count = 0

        for fmt in formats:
            fig, ax = plt.subplots()
            ax.xaxis.set_major_formatter("{x:.1f}")
@@ -57,9 +76,9 @@ def create_histograms(
                df_slice = df[data_mask]
                error_mask = df_slice["result"] == "ERROR"
                n_errors = np.sum(error_mask)
                df_hist = df_slice[np.logical_not(error_mask)]
                df_slice = df_slice[np.logical_not(error_mask)]

                counts, _ = np.histogram(df_hist[measure_in_df], bins)
                counts, _ = np.histogram(df_slice[measure_in_df], bins)

                data = np.concatenate([counts, [0], [n_errors], [0]])
                ax.bar(
@@ -71,9 +90,14 @@ def create_histograms(
                    linewidth=0.5,
                    label=cat,
                    bottom=bottom,
                    color=COLORS_FOR_CATEGORIES[cat],
                )
                bottom += data

                hist_row = [fmt, cat] + list(counts) + [0] + [0, n_errors]
                df_hist.loc[hist_row_count] = hist_row
                hist_row_count += 1

            # Histogram layout
            ax.set_title(fmt)
            ax.legend(loc="best")
@@ -94,6 +118,11 @@ def create_histograms(
                plt.savefig(image_path)
                plt.close(fig)

        if write_out_histograms:
            df_hist.to_csv(
                output_folder.joinpath(f"histogram_{measure}.csv"), index=False
            )

    if display_only:
        plt.show()

@@ -133,6 +162,11 @@ Use this for visualising diff scores.""",
        default="",
        help="Common suffix to use when collecting measures from the input csv file",
    )
    parser.add_argument(
        "--write-out-histograms",
        action="store_true",
        help="Write out the histogram values to csv",
    )
    args = parser.parse_args()
    df = pd.read_csv(args.csv_report)

@@ -152,4 +186,5 @@ Use this for visualising diff scores.""",
        args.display_only,
        bins_for_measures,
        args.prefix,
        args.write_out_histograms,
    )