Commit 8e29b2b9 authored by Archit Tamarapu's avatar Archit Tamarapu
Browse files

[ci] add script to parse loudness measurement data

parent 925bcbb2
Loading
Loading
Loading
Loading
+105 −0
Original line number Diff line number Diff line
#!/usr/bin/env python3
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from pathlib import Path

LOUDNESS_DATA_FILENAME = "loudness.csv"


def plot_loudness_by_bandwidth(df, in_fmt, out_fmt, out_dir):
    # set output file
    out_file = out_dir.joinpath(f"{in_fmt}_to_{out_fmt}.png")

    # filter for the output format
    filtered_df = df[df["outformat"] == out_fmt].copy()

    # filter by input format if specified
    if in_fmt is not None:
        filtered_df = filtered_df[filtered_df["format"] == in_fmt]

    # get input loudness
    if not filtered_df.empty:
        input_loudness = filtered_df["input_loudness"].iloc[0]
    else:
        input_loudness = None

    y_min, y_max = -36, -16
    y_ticks = np.arange(y_min, y_max + 0.5, 0.5)

    # get unique bitrates and sort them
    bitrates = sorted(df["bitrate"].unique())
    bitrate_to_idx = {br: idx for idx, br in enumerate(bitrates)}

    # bandwidth values
    bandwidths = df["bandwidth"].unique()

    # check for DTX
    dtx_values = filtered_df["dtx"].unique()
    show_legend = len(dtx_values) > 1

    # create subplots
    fig, axes = plt.subplots(1, 3, figsize=(18, 5))

    for idx, bw in enumerate(bandwidths):
        ax = axes[idx]
        bw_data = filtered_df[filtered_df["bandwidth"] == bw]

        # plot for each dtx value
        for dtx_val in [True, False]:
            subset = bw_data[bw_data["dtx"] == dtx_val]

            if not subset.empty:
                # equal spacing for bitrates on x axis
                x_positions = [bitrate_to_idx[br] for br in subset["bitrate"]]
                y_values = subset["output_loudness"]

                marker = "o" if dtx_val else "s"
                label = f"dtx={dtx_val}" if show_legend else None
                ax.scatter(
                    x_positions, y_values, label=label, marker=marker, s=80, alpha=0.7
                )

        # plot input loudness as horizontal reference line
        if input_loudness is not None:
            label_input = "Input Loudness" if show_legend else None
            ax.axhline(
                y=input_loudness,
                color="red",
                linestyle="--",
                linewidth=2,
                alpha=0.7,
                label=label_input,
            )

        ax.set_xticks(range(len(bitrates)))
        ax.set_xticklabels(bitrates, rotation=45, ha="right")

        ax.set_ylim(y_min, y_max)
        ax.set_yticks(y_ticks)

        ax.set_xlabel("Bitrate (kbps)", fontsize=11)
        ax.set_ylabel("Output Loudness (LKFS)", fontsize=11)
        ax.set_title(f"{bw.upper()}", fontsize=12)
        ax.grid(True, alpha=0.3)

        # only show legend if there are multiple DTX values
        if show_legend:
            ax.legend()

    title = f"Output Loudness vs Bitrate : {in_fmt} to {out_fmt}"
    fig.suptitle(title, fontsize=14)

    plt.tight_layout(rect=[0, 0, 1, 0.96])
    plt.savefig(out_file)
    # plt.show()


out_dir = Path(__file__).parent.parent.joinpath("plots")
out_dir.mkdir(parents=True, exist_ok=True)

df = pd.read_csv(LOUDNESS_DATA_FILENAME)

for in_fmt in df["format"].unique():
    for out_fmt in df["outformat"].unique():
        plot_loudness_by_bandwidth(df, in_fmt, out_fmt, out_dir)