make first minimal test config run for MASA P800-8 (9d2a29fc) · Commits · IVAS Codec Public Collaboration / IVAS Processing Scripts

experiments/selection/P800-8/config/P800-8.yml

0 → 100644

+307 −0

Original line number	Diff line number	Diff line
		---
		################################################
		# General configuration
		################################################

		name: P800-8
		master_seed: 5
		prerun_seed: 2
		multiprocessing: false

		input_path: "experiments/selection/P800-8/proc_input"
		output_path: "experiments/selection/P800-8/proc_output"

		################################################
		### Input configuration
		################################################
		input:
		fmt: "FOA"
		fs: 48000

		################################################
		### Pre-processing on individual items
		################################################
		preprocessing:
		mask: "HP50"
		loudness: -26
		window: 100

		################################################
		### Pre-processing on whole signal(s)
		################################################
		preprocessing_2:
		concatenate_input: true
		# concatenation_order: []
		preamble: 10000
		preamble_noise: true

		#################################################
		### Bitstream processing
		#################################################

		################################################
		### Configuration for conditions under test
		################################################
		conditions_to_generate:
		### Reference and anchor conditions ##########################
		# c01:
		# type: ref
		# c02:
		# type: mnru
		# q: 28
		# c03:
		# type: mnru
		# q: 24
		# c04:
		# type: mnru
		# q: 20
		# c05:
		# type: mnru
		# q: 16
		# c06:
		# type: esdru
		# alpha: 0.7
		# c07:
		# type: esdru
		# alpha: 0.4
		# c08:
		# type: esdru
		# alpha: 0.1

		# ### EVS condition ################################
		# c09:
		# type: evs
		# bitrates:
		# - 7200
		# cod:
		# opts: ["-max_band", "FB"]
		# dec:
		# c10:
		# type: evs
		# bitrates:
		# - 8000
		# cod:
		# opts: ["-max_band", "FB"]
		# dec:
		# c11:
		# type: evs
		# bitrates:
		# - 9600
		# cod:
		# opts: ["-max_band", "FB"]
		# dec:
		# c12:
		# type: evs
		# bitrates:
		# - 13200
		# cod:
		# opts: ["-max_band", "FB"]
		# dec:
		# c13:
		# type: evs
		# bitrates:
		# - 16400
		# cod:
		# opts: ["-max_band", "FB"]
		# dec:
		# c14:
		# type: evs
		# bitrates:
		# - 24400
		# cod:
		# opts: ["-max_band", "FB"]
		# dec:
		# c15:
		# type: evs
		# bitrates:
		# - 32000
		# cod:
		# opts: ["-max_band", "FB"]
		# dec:
		# c16:
		# type: evs
		# bitrates:
		# - 7200
		# cod:
		# opts: ["-max_band", "FB"]
		# dec:
		# tx:
		# type: "FER"
		# error_rate: 5
		# c17:
		# type: evs
		# bitrates:
		# - 8000
		# cod:
		# opts: ["-max_band", "FB"]
		# dec:
		# tx:
		# type: "FER"
		# error_rate: 5
		# c18:
		# type: evs
		# bitrates:
		# - 9600
		# cod:
		# opts: ["-max_band", "FB"]
		# dec:
		# tx:
		# type: "FER"
		# error_rate: 5
		# c19:
		# type: evs
		# bitrates:
		# - 13200
		# cod:
		# opts: ["-max_band", "FB"]
		# dec:
		# tx:
		# type: "FER"
		# error_rate: 5
		# c20:
		# type: evs
		# bitrates:
		# - 16400
		# cod:
		# opts: ["-max_band", "FB"]
		# dec:
		# tx:
		# type: "FER"
		# error_rate: 5
		# c21:
		# type: evs
		# bitrates:
		# - 24400
		# cod:
		# opts: ["-max_band", "FB"]
		# dec:
		# tx:
		# type: "FER"
		# error_rate: 5
		# c22:
		# type: evs
		# bitrates:
		# - 32000
		# cod:
		# opts: ["-max_band", "FB"]
		# dec:
		# tx:
		# type: "FER"
		# error_rate: 5

		# ### IVAS condition ###############################
		# c23:
		# type: ivas
		# bitrates:
		# - 13200
		# cod:
		# dec:
		# fmt: "STEREO"
		# c24:
		# type: ivas
		# bitrates:
		# - 16400
		# cod:
		# dec:
		# fmt: "STEREO"
		# c25:
		# type: ivas
		# bitrates:
		# - 24400
		# cod:
		# dec:
		# fmt: "STEREO"
		# c26:
		# type: ivas
		# bitrates:
		# - 32000
		# cod:
		# dec:
		# fmt: "STEREO"
		# c27:
		# type: ivas
		# bitrates:
		# - 48000
		# cod:
		# dec:
		# fmt: "STEREO"
		# c28:
		# type: ivas
		# bitrates:
		# - 13200
		# cod:
		# dec:
		# fmt: "STEREO"
		# tx:
		# type: "FER"
		# error_rate: 5
		# c29:
		# type: ivas
		# bitrates:
		# - 16400
		# cod:
		# dec:
		# fmt: "STEREO"
		# tx:
		# type: "FER"
		# error_rate: 5
		# c30:
		# type: ivas
		# bitrates:
		# - 24400
		# cod:
		# dec:
		# fmt: "STEREO"
		# tx:
		# type: "FER"
		# error_rate: 5
		# c31:
		# type: ivas
		# bitrates:
		# - 32000
		# cod:
		# dec:
		# fmt: "STEREO"
		# tx:
		# type: "FER"
		# error_rate: 5
		# c32:
		# type: ivas
		# bitrates:
		# - 48000
		# cod:
		# dec:
		# fmt: "STEREO"
		# tx:
		# type: "FER"
		# error_rate: 5

		# c33:
		# type: ivas
		# bitrates:
		# - 24400
		# cod:
		# opts: ["-dtx"]
		# dec:
		# fmt: "STEREO"

		c34:
		type: ivas
		bitrates:
		- 13200
		cod:
		fmt: "MASA2"
		opts: ["-dtx"]
		dec:
		fmt: "MASA2"
		tx:
		type: "FER"
		error_rate: 5

		################################################
		### Post-processing
		################################################
		postprocessing:
		fmt: "BINAURAL"
		fs: 48000
		loudness: -26

ivas_processing_scripts/audiotools/convert/init.py

+14 −2

Original line number	Diff line number	Diff line
		@@ -33,6 +33,7 @@
		import logging
		from pathlib import Path, PurePath
		from typing import Optional, Union
		from numpy import empty

		from ivas_processing_scripts.audiotools import audio, audioarray, metadata
		from ivas_processing_scripts.audiotools.audiofile import write
		@@ -72,6 +73,9 @@ def convert_file(
		if not isinstance(in_fmt, PurePath) and in_fmt.startswith("META"):
		input = metadata.Metadata(in_file)
		else:
		if in_fmt.startswith("MASA") and in_meta is None:
		# if no MD fileis provided, default to name (including .wav or .pcm!!!) + ".met"
		in_meta = [in_file.parent / (in_file.name + ".met")]
		input = audio.fromfile(in_fmt, in_file, in_fs, in_meta)

		# try to set reasonable defaults if missing
		@@ -89,6 +93,14 @@ def convert_file(
		out_fmt = input.name

		output = audio.fromtype(out_fmt)

		if isinstance(output, audio.MetadataAssistedSpatialAudio):
		# create dummy audio array to allow inference of MASA mode
		num_tcs = int(output.name[-1])
		output.audio = empty((1, num_tcs))

		# fabricate metadata file name
		output.metadata_files = [Path(out_file).with_suffix(".met")]
		if isinstance(output, audio.ObjectBasedAudio):
		try:
		output.object_pos = input.object_pos
		@@ -291,8 +303,8 @@ def format_conversion(
		"""Convert one audio format to another"""

		# validation
		if isinstance(output, audio.MetadataAssistedSpatialAudio):
		raise NotImplementedError("MASA is not supported as an output for rendering!")
		if isinstance(output, audio.MetadataAssistedSpatialAudio) and not isinstance(input, audio.SceneBasedAudio):
		raise NotImplementedError("Can only convert to MASA from SBA")

		if isinstance(output, audio.ObjectBasedAudio) and input.name != output.name:
		raise NotImplementedError(

ivas_processing_scripts/audiotools/convert/scenebased.py

+21 −0

Original line number	Diff line number	Diff line
		@@ -53,6 +53,7 @@ from ivas_processing_scripts.audiotools.convert.binaural import binaural_fftconv
		from ivas_processing_scripts.audiotools.EFAP import EFAP
		from ivas_processing_scripts.audiotools.rotation import Quat2RotMat, SHrotmatgen
		from ivas_processing_scripts.audiotools.wrappers.filter import resample_itu
		from ivas_processing_scripts.audiotools.wrappers.masaAnalyzer import masaAnalyzer

		""" SceneBasedAudio functions """

		@@ -75,6 +76,11 @@ def convert_scenebased(
		# SBA -> SBA
		elif isinstance(out, audio.SceneBasedAudio):
		render_sba_to_sba(sba, out)

		# SBA -> MASA
		elif isinstance(out, audio.MetadataAssistedSpatialAudio) and sba.name == "FOA":
		render_sba_to_masa(sba, out)

		else:
		raise NotImplementedError(
		f"Conversion from {sba.name} to {out.name} is unsupported!"
		@@ -177,6 +183,21 @@ def render_sba_to_sba(
		zero_vert_channels(sba_out)


		def render_sba_to_masa(
		sba_in: audio.SceneBasedAudio,
		masa_out: audio.MetadataAssistedSpatialAudio,
		) -> None:
		assert sba_in.name == "FOA"

		# two dir only possible from HOA2
		num_dirs = 1
		num_tcs = masa_out.audio.shape[1]
		md_out_path = masa_out.metadata_files[0]

		masa = masaAnalyzer(sba_in, num_tcs, num_dirs, md_out_path)
		masa_out.audio = masa.audio


		def rotate_sba(
		sba: audio.SceneBasedAudio,
		trajectory: str,

ivas_processing_scripts/audiotools/wrappers/masaRenderer.py

+1 −1

Original line number	Diff line number	Diff line
		@@ -87,7 +87,7 @@ def masaRenderer(
		str(binary),
		output_mode,
		"", # 2 -> inputPcm
		str(masa.metadata_files.resolve()),
		str(masa.metadata_file.resolve()),
		"", # 4 -> outputPcm
		]

ivas_processing_scripts/processing/chains.py

+15 −0

Original line number	Diff line number	Diff line
		@@ -360,6 +360,21 @@ def get_processing_chain(
		else:
		preamble = 0

		# if the encoding format differs from the format after the preprocessing, add format conversion stuff
		if tmp_in_fmt != cod_cfg["fmt"]:
		chain["processes"].append(
		Preprocessing(
		{
		"in_fs": tmp_in_fs,
		"in_fmt": tmp_in_fmt,
		"out_fs": tmp_in_fs,
		"out_fmt": cod_cfg["fmt"],
		"multiprocessing": cfg.multiprocessing,
		}
		)
		)
		tmp_in_fmt = cod_cfg["fmt"]

		chain["processes"].append(
		IVAS(
		{