Commit 51221781 authored by Archit Tamarapu's avatar Archit Tamarapu
Browse files

add jupyter notebook for measuring loudness levels

parent 843dbdf7
Loading
Loading
Loading
Loading
Loading
+313 −0
Original line number Diff line number Diff line
%% Cell type:code id: tags:

``` python
# imports
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import sys
from tempfile import TemporaryDirectory
from itertools import product
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
import multiprocessing as mp

IVAS_PROCESSING_SCRIPTS_DIR = "../ivas-processing-scripts"
sys.path.append(IVAS_PROCESSING_SCRIPTS_DIR)

from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness
from ivas_processing_scripts.audiotools.audio import fromfile
from ivas_processing_scripts.audiotools.audiofile import read, write
from ivas_processing_scripts.audiotools.audioarray import resample
from ivas_processing_scripts.utils import run

%matplotlib inline
```

%% Cell type:code id: tags:

``` python
# constants
IVAS_COD_CMD = [
    "./IVAS_cod",
    "-mc",
    "",  # 2 format
    "",  # 3 bitrate
    "",  # 4 fs
    "",  # 5 in_file
    "",  # 6 bs
]
IVAS_DEC_CMD = [
    "./IVAS_dec",
    "BINAURAL",
    "",  # 2 fs
    "",  # 3 bit
    "",  # 4 out_file
]
IVAS_REND_CMD = [
    "./IVAS_rend",
    "-i",
    "",  # 2 in_file
    "-if",
    "",  # 4 in_fmt
    "-o",
    "",  # 6 out_file
    "-of",
    "BINAURAL",
]
IVAS_FS = [16, 32, 48]
MC_MODE_TO_BITRATE_51 = {
    "McMASA": [13200, 16400, 24400, 32000],
    "ParamMC": [48000, 64000, 80000],
    "DiscMC": [96000, 128000, 160000, 192000, 256000, 384000, 512000],
}
MC_BITRATE_TO_MODE_51 = {
    13200: "McMASA",
    16400: "McMASA",
    24400: "McMASA",
    32000: "McMASA",
    48000: "ParamMC",
    64000: "ParamMC",
    80000: "ParamMC",
    96000: "DiscMC",
    128000: "DiscMC",
    160000: "DiscMC",
    192000: "DiscMC",
    256000: "DiscMC",
    384000: "DiscMC",
    512000: "DiscMC",
}

INPUT_51 = Path("~/ivas/items/5_1_loudness").expanduser()
```

%% Cell type:code id: tags:

``` python
# helper functions
def process_ivas_codec(
    format: str, bitrate: int, in_fs: int, out_fs: int, in_file: Path
) -> float:
    with TemporaryDirectory() as tmp_dir:
        tmp_dir = Path(tmp_dir)
        tmp_in = tmp_dir.joinpath(f"{in_file.stem}_in.wav")
        tmp_bs = tmp_dir.joinpath(f"{in_file.stem}.192")
        tmp_out = tmp_dir.joinpath(f"{in_file.stem}_out.wav")

        data, fs = read(in_file)
        if fs != in_fs * 1000:
            data = resample(data, fs, in_fs * 1000)
        write(tmp_in, data, in_fs * 1000)

        cod_cmd = IVAS_COD_CMD[:]
        cod_cmd[2] = format
        cod_cmd[3] = str(bitrate)
        cod_cmd[4] = str(in_fs)
        cod_cmd[5] = str(tmp_in)
        cod_cmd[6] = str(tmp_bs)

        run(cod_cmd)

        dec_cmd = IVAS_DEC_CMD[:]
        dec_cmd[2] = str(out_fs)
        dec_cmd[3] = str(tmp_bs)
        dec_cmd[4] = str(tmp_out)

        run(dec_cmd)

        audio = fromfile("BINAURAL", tmp_out)

        return get_loudness(audio)


def process_ivas_rend(format: str, in_fs: int, in_file: Path):
    with TemporaryDirectory() as tmp_dir:
        tmp_dir = Path(tmp_dir)
        tmp_in = tmp_dir.joinpath(f"{in_file.stem}_in.wav")
        tmp_out = tmp_dir.joinpath(f"{in_file.stem}_out.wav")

        data, fs = read(in_file)
        if fs != in_fs * 1000:
            data = resample(data, fs, in_fs * 1000)
        write(tmp_in, data, in_fs * 1000)

        cmd = IVAS_REND_CMD[:]
        cmd[2] = str(tmp_in)
        cmd[4] = format
        cmd[6] = str(tmp_out)

        run(cmd)

        audio = fromfile("BINAURAL", tmp_out)

        return get_loudness(audio)
```

%% Cell type:code id: tags:

``` python
# dataframe setup
DF_COLS_CODEC = [
    "MC Format",
    "MC Mode",
    "Input File",
    "Bitrate",
    "Sampling rate (input)",
    "Sampling rate (output)",
    "Loudness",
]
DF_COLS_RENDERER = [
    "MC Format",
    "Input File",
    "Sampling rate (input)",
    "Loudness",
]
df_codec = pd.DataFrame(columns=DF_COLS_CODEC)
df_renderer = pd.DataFrame(columns=DF_COLS_RENDERER)
```

%% Cell type:code id: tags:

``` python
# Codec Evaluation
# If changing to ProcessPoolExecutor, macOS needs the "fork" context to work in a notebook
# In this case ThreadPoolExecutor should suffice for the mostly I/O bound workload
# with ProcessPoolExecutor(mp_context=mp.get_context("fork")) as e:
with ThreadPoolExecutor() as e:
    futures = dict()
    for f in INPUT_51.glob("*.wav"):
        for bitrate, mc_mode in MC_BITRATE_TO_MODE_51.items():
            for in_fs, out_fs in product(IVAS_FS, IVAS_FS):
                # store the futures in a dict with args so we can fetch both when the task is complete
                futures[
                    e.submit(process_ivas_codec, "5_1", bitrate, in_fs, out_fs, f)
                ] = ["5_1", mc_mode, f.stem, bitrate, in_fs, out_fs]

    # collect results
    for future, args in futures.items():
        loudness, _, _ = future.result()

        record = dict(zip(DF_COLS_CODEC, [*args, loudness]))

        df_codec = pd.concat(
            [df_codec, pd.DataFrame.from_records([record])], ignore_index=True
        )
```

%% Cell type:code id: tags:

``` python
df_codec.to_markdown(Path("./codec_loudness.md"), index=False)
df_codec
```

%% Cell type:code id: tags:

``` python
# Renderer Evaluation
# If changing to ProcessPoolExecutor, macOS needs the "fork" context to work in a notebook
# In this case ThreadPoolExecutor should suffice for the mostly I/O bound workload
# with ProcessPoolExecutor(mp_context=mp.get_context("fork")) as e:
with ThreadPoolExecutor() as e:
    futures = dict()
    for f in INPUT_51.glob("*.wav"):
        for in_fs in IVAS_FS:
            futures[e.submit(process_ivas_rend, "5_1", in_fs, f)] = [
                "5_1",
                f.stem,
                in_fs,
            ]

    # collect results
    for future, args in futures.items():
        loudness, _, _ = future.result()

        record = dict(zip(DF_COLS_RENDERER, [*args, loudness]))

        df_renderer = pd.concat(
            [df_renderer, pd.DataFrame.from_records([record])], ignore_index=True
        )
```

%% Cell type:code id: tags:

``` python
df_renderer.to_markdown(Path("./renderer_loudness.md"), index=False)
df_renderer
```

%% Cell type:code id: tags:

``` python
# get loudness difference to -26 LKFS and IVAS_rend output level
df = df_codec.copy()
df = df.merge(
    df_renderer.rename(columns={"Loudness": "Loudness (IVAS_rend)"}),
    on=["Input File", "Sampling rate (input)", "MC Format"],
)
df["Loudness Difference (-26 LKFS)"] = df["Loudness"] - -26
df["Loudness Difference (IVAS_rend)"] = df["Loudness"] - df["Loudness (IVAS_rend)"]
df.to_markdown(Path("./codec_vs_rend_loudness.md"), index=False)

# compute mean loudness over all files
df.drop(columns="Input File", inplace=True)
df = (
    df.groupby(
        [
            "MC Format",
            "MC Mode",
            "Bitrate",
            "Sampling rate (input)",
            "Sampling rate (output)",
        ]
    )
    .mean()
    .reset_index()
)
df.to_markdown(Path("./mean_binaural_loudness.md"), index=False)
df
```

%% Output


    
        MC Format  MC Mode  Bitrate  Sampling rate (input)  \
    0         5_1   DiscMC    96000                     16
    1         5_1   DiscMC    96000                     16
    2         5_1   DiscMC    96000                     16
    3         5_1   DiscMC    96000                     32
    4         5_1   DiscMC    96000                     32
    ..        ...      ...      ...                    ...
    121       5_1  ParamMC    80000                     32
    122       5_1  ParamMC    80000                     32
    123       5_1  ParamMC    80000                     48
    124       5_1  ParamMC    80000                     48
    125       5_1  ParamMC    80000                     48
    
         Sampling rate (output)   Loudness  Loudness (IVAS_rend)  \
    0                        16 -21.760427            -21.579920
    1                        32 -21.885873            -21.579920
    2                        48 -22.188446            -21.579920
    3                        16 -21.726489            -21.420983
    4                        32 -21.584996            -21.420983
    ..                      ...        ...                   ...
    121                      32 -20.672214            -21.420983
    122                      48 -20.659830            -21.420983
    123                      16 -21.071618            -21.485968
    124                      32 -20.602037            -21.485968
    125                      48 -20.526470            -21.485968
    
         Loudness Difference (-26 LKFS)  Loudness Difference (IVAS_rend)
    0                          4.239573                        -0.180507
    1                          4.114127                        -0.305953
    2                          3.811554                        -0.608525
    3                          4.273511                        -0.305506
    4                          4.415004                        -0.164013
    ..                              ...                              ...
    121                        5.327786                         0.748769
    122                        5.340170                         0.761153
    123                        4.928382                         0.414350
    124                        5.397963                         0.883931
    125                        5.473530                         0.959498
    
    [126 rows x 9 columns]