add jupyter notebook for measuring loudness levels (51221781) · Commits · IVAS Codec Public Collaboration / IVAS Codec

Binaural_Loudness.ipynb

0 → 100644

+313 −0

Original line number	Diff line number	Diff line
		%% Cell type:code id: tags:

		``` python
		# imports
		import pandas as pd
		import matplotlib.pyplot as plt
		from pathlib import Path
		import sys
		from tempfile import TemporaryDirectory
		from itertools import product
		from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
		import multiprocessing as mp

		IVAS_PROCESSING_SCRIPTS_DIR = "../ivas-processing-scripts"
		sys.path.append(IVAS_PROCESSING_SCRIPTS_DIR)

		from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness
		from ivas_processing_scripts.audiotools.audio import fromfile
		from ivas_processing_scripts.audiotools.audiofile import read, write
		from ivas_processing_scripts.audiotools.audioarray import resample
		from ivas_processing_scripts.utils import run

		%matplotlib inline
		```

		%% Cell type:code id: tags:

		``` python
		# constants
		IVAS_COD_CMD = [
		"./IVAS_cod",
		"-mc",
		"", # 2 format
		"", # 3 bitrate
		"", # 4 fs
		"", # 5 in_file
		"", # 6 bs
		]
		IVAS_DEC_CMD = [
		"./IVAS_dec",
		"BINAURAL",
		"", # 2 fs
		"", # 3 bit
		"", # 4 out_file
		]
		IVAS_REND_CMD = [
		"./IVAS_rend",
		"-i",
		"", # 2 in_file
		"-if",
		"", # 4 in_fmt
		"-o",
		"", # 6 out_file
		"-of",
		"BINAURAL",
		]
		IVAS_FS = [16, 32, 48]
		MC_MODE_TO_BITRATE_51 = {
		"McMASA": [13200, 16400, 24400, 32000],
		"ParamMC": [48000, 64000, 80000],
		"DiscMC": [96000, 128000, 160000, 192000, 256000, 384000, 512000],
		}
		MC_BITRATE_TO_MODE_51 = {
		13200: "McMASA",
		16400: "McMASA",
		24400: "McMASA",
		32000: "McMASA",
		48000: "ParamMC",
		64000: "ParamMC",
		80000: "ParamMC",
		96000: "DiscMC",
		128000: "DiscMC",
		160000: "DiscMC",
		192000: "DiscMC",
		256000: "DiscMC",
		384000: "DiscMC",
		512000: "DiscMC",
		}

		INPUT_51 = Path("~/ivas/items/5_1_loudness").expanduser()
		```

		%% Cell type:code id: tags:

		``` python
		# helper functions
		def process_ivas_codec(
		format: str, bitrate: int, in_fs: int, out_fs: int, in_file: Path
		) -> float:
		with TemporaryDirectory() as tmp_dir:
		tmp_dir = Path(tmp_dir)
		tmp_in = tmp_dir.joinpath(f"{in_file.stem}_in.wav")
		tmp_bs = tmp_dir.joinpath(f"{in_file.stem}.192")
		tmp_out = tmp_dir.joinpath(f"{in_file.stem}_out.wav")

		data, fs = read(in_file)
		if fs != in_fs * 1000:
		data = resample(data, fs, in_fs * 1000)
		write(tmp_in, data, in_fs * 1000)

		cod_cmd = IVAS_COD_CMD[:]
		cod_cmd[2] = format
		cod_cmd[3] = str(bitrate)
		cod_cmd[4] = str(in_fs)
		cod_cmd[5] = str(tmp_in)
		cod_cmd[6] = str(tmp_bs)

		run(cod_cmd)

		dec_cmd = IVAS_DEC_CMD[:]
		dec_cmd[2] = str(out_fs)
		dec_cmd[3] = str(tmp_bs)
		dec_cmd[4] = str(tmp_out)

		run(dec_cmd)

		audio = fromfile("BINAURAL", tmp_out)

		return get_loudness(audio)


		def process_ivas_rend(format: str, in_fs: int, in_file: Path):
		with TemporaryDirectory() as tmp_dir:
		tmp_dir = Path(tmp_dir)
		tmp_in = tmp_dir.joinpath(f"{in_file.stem}_in.wav")
		tmp_out = tmp_dir.joinpath(f"{in_file.stem}_out.wav")

		data, fs = read(in_file)
		if fs != in_fs * 1000:
		data = resample(data, fs, in_fs * 1000)
		write(tmp_in, data, in_fs * 1000)

		cmd = IVAS_REND_CMD[:]
		cmd[2] = str(tmp_in)
		cmd[4] = format
		cmd[6] = str(tmp_out)

		run(cmd)

		audio = fromfile("BINAURAL", tmp_out)

		return get_loudness(audio)
		```

		%% Cell type:code id: tags:

		``` python
		# dataframe setup
		DF_COLS_CODEC = [
		"MC Format",
		"MC Mode",
		"Input File",
		"Bitrate",
		"Sampling rate (input)",
		"Sampling rate (output)",
		"Loudness",
		]
		DF_COLS_RENDERER = [
		"MC Format",
		"Input File",
		"Sampling rate (input)",
		"Loudness",
		]
		df_codec = pd.DataFrame(columns=DF_COLS_CODEC)
		df_renderer = pd.DataFrame(columns=DF_COLS_RENDERER)
		```

		%% Cell type:code id: tags:

		``` python
		# Codec Evaluation
		# If changing to ProcessPoolExecutor, macOS needs the "fork" context to work in a notebook
		# In this case ThreadPoolExecutor should suffice for the mostly I/O bound workload
		# with ProcessPoolExecutor(mp_context=mp.get_context("fork")) as e:
		with ThreadPoolExecutor() as e:
		futures = dict()
		for f in INPUT_51.glob("*.wav"):
		for bitrate, mc_mode in MC_BITRATE_TO_MODE_51.items():
		for in_fs, out_fs in product(IVAS_FS, IVAS_FS):
		# store the futures in a dict with args so we can fetch both when the task is complete
		futures[
		e.submit(process_ivas_codec, "5_1", bitrate, in_fs, out_fs, f)
		] = ["5_1", mc_mode, f.stem, bitrate, in_fs, out_fs]

		# collect results
		for future, args in futures.items():
		loudness, _, _ = future.result()

		record = dict(zip(DF_COLS_CODEC, [*args, loudness]))

		df_codec = pd.concat(
		[df_codec, pd.DataFrame.from_records([record])], ignore_index=True
		)
		```

		%% Cell type:code id: tags:

		``` python
		df_codec.to_markdown(Path("./codec_loudness.md"), index=False)
		df_codec
		```

		%% Cell type:code id: tags:

		``` python
		# Renderer Evaluation
		# If changing to ProcessPoolExecutor, macOS needs the "fork" context to work in a notebook
		# In this case ThreadPoolExecutor should suffice for the mostly I/O bound workload
		# with ProcessPoolExecutor(mp_context=mp.get_context("fork")) as e:
		with ThreadPoolExecutor() as e:
		futures = dict()
		for f in INPUT_51.glob("*.wav"):
		for in_fs in IVAS_FS:
		futures[e.submit(process_ivas_rend, "5_1", in_fs, f)] = [
		"5_1",
		f.stem,
		in_fs,
		]

		# collect results
		for future, args in futures.items():
		loudness, _, _ = future.result()

		record = dict(zip(DF_COLS_RENDERER, [*args, loudness]))

		df_renderer = pd.concat(
		[df_renderer, pd.DataFrame.from_records([record])], ignore_index=True
		)
		```

		%% Cell type:code id: tags:

		``` python
		df_renderer.to_markdown(Path("./renderer_loudness.md"), index=False)
		df_renderer
		```

		%% Cell type:code id: tags:

		``` python
		# get loudness difference to -26 LKFS and IVAS_rend output level
		df = df_codec.copy()
		df = df.merge(
		df_renderer.rename(columns={"Loudness": "Loudness (IVAS_rend)"}),
		on=["Input File", "Sampling rate (input)", "MC Format"],
		)
		df["Loudness Difference (-26 LKFS)"] = df["Loudness"] - -26
		df["Loudness Difference (IVAS_rend)"] = df["Loudness"] - df["Loudness (IVAS_rend)"]
		df.to_markdown(Path("./codec_vs_rend_loudness.md"), index=False)

		# compute mean loudness over all files
		df.drop(columns="Input File", inplace=True)
		df = (
		df.groupby(
		[
		"MC Format",
		"MC Mode",
		"Bitrate",
		"Sampling rate (input)",
		"Sampling rate (output)",
		]
		)
		.mean()
		.reset_index()
		)
		df.to_markdown(Path("./mean_binaural_loudness.md"), index=False)
		df
		```

		%% Output



		MC Format MC Mode Bitrate Sampling rate (input) \
		0 5_1 DiscMC 96000 16
		1 5_1 DiscMC 96000 16
		2 5_1 DiscMC 96000 16
		3 5_1 DiscMC 96000 32
		4 5_1 DiscMC 96000 32
		.. ... ... ... ...
		121 5_1 ParamMC 80000 32
		122 5_1 ParamMC 80000 32
		123 5_1 ParamMC 80000 48
		124 5_1 ParamMC 80000 48
		125 5_1 ParamMC 80000 48

		Sampling rate (output) Loudness Loudness (IVAS_rend) \
		0 16 -21.760427 -21.579920
		1 32 -21.885873 -21.579920
		2 48 -22.188446 -21.579920
		3 16 -21.726489 -21.420983
		4 32 -21.584996 -21.420983
		.. ... ... ...
		121 32 -20.672214 -21.420983
		122 48 -20.659830 -21.420983
		123 16 -21.071618 -21.485968
		124 32 -20.602037 -21.485968
		125 48 -20.526470 -21.485968

		Loudness Difference (-26 LKFS) Loudness Difference (IVAS_rend)
		0 4.239573 -0.180507
		1 4.114127 -0.305953
		2 3.811554 -0.608525
		3 4.273511 -0.305506
		4 4.415004 -0.164013
		.. ... ...
		121 5.327786 0.748769
		122 5.340170 0.761153
		123 4.928382 0.414350
		124 5.397963 0.883931
		125 5.473530 0.959498

		[126 rows x 9 columns]