Commit 9d2a29fc authored by Jan Kiene's avatar Jan Kiene
Browse files

make first minimal test config run for MASA P800-8

- only one IVAS condition tested currently
parent 4a75fbba
Loading
Loading
Loading
Loading
+307 −0
Original line number Diff line number Diff line
---
################################################
# General configuration
################################################

name: P800-8
master_seed: 5
prerun_seed: 2
multiprocessing: false

input_path: "experiments/selection/P800-8/proc_input"
output_path: "experiments/selection/P800-8/proc_output"

################################################
### Input configuration
################################################
input:
    fmt: "FOA"
    fs: 48000

################################################
### Pre-processing on individual items
################################################
preprocessing:
    mask: "HP50"
    loudness: -26
    window: 100

################################################
### Pre-processing on whole signal(s)
################################################
preprocessing_2:
    concatenate_input: true
    # concatenation_order: []
    preamble: 10000
    preamble_noise: true

#################################################
### Bitstream processing
#################################################

################################################
### Configuration for conditions under test
################################################
conditions_to_generate:
    ### Reference and anchor conditions ##########################
    # c01:
    #     type: ref
    # c02:
    #     type: mnru
    #     q: 28
    # c03:
    #     type: mnru
    #     q: 24
    # c04:
    #     type: mnru
    #     q: 20
    # c05:
    #     type: mnru
    #     q: 16
    # c06:
    #     type: esdru
    #     alpha: 0.7
    # c07:
    #     type: esdru
    #     alpha: 0.4
    # c08:
    #     type: esdru
    #     alpha: 0.1

    # ### EVS condition ################################
    # c09:
    #     type: evs
    #     bitrates:
    #         - 7200
    #     cod:
    #         opts: ["-max_band", "FB"]
    #     dec:
    # c10:
    #     type: evs
    #     bitrates:
    #         - 8000
    #     cod:
    #         opts: ["-max_band", "FB"]
    #     dec:
    # c11:
    #     type: evs
    #     bitrates:
    #         - 9600
    #     cod:
    #         opts: ["-max_band", "FB"]
    #     dec:
    # c12:
    #     type: evs
    #     bitrates:
    #         - 13200
    #     cod:
    #         opts: ["-max_band", "FB"]
    #     dec:
    # c13:
    #     type: evs
    #     bitrates:
    #         - 16400
    #     cod:
    #         opts: ["-max_band", "FB"]
    #     dec:
    # c14:
    #     type: evs
    #     bitrates:
    #         - 24400
    #     cod:
    #         opts: ["-max_band", "FB"]
    #     dec:
    # c15:
    #     type: evs
    #     bitrates:
    #         - 32000
    #     cod:
    #         opts: ["-max_band", "FB"]
    #     dec:
    # c16:
    #     type: evs
    #     bitrates:
    #         - 7200
    #     cod:
    #         opts: ["-max_band", "FB"]
    #     dec:
    #     tx:
    #         type: "FER"
    #         error_rate: 5
    # c17:
    #     type: evs
    #     bitrates:
    #         - 8000
    #     cod:
    #         opts: ["-max_band", "FB"]
    #     dec:
    #     tx:
    #         type: "FER"
    #         error_rate: 5
    # c18:
    #     type: evs
    #     bitrates:
    #         - 9600
    #     cod:
    #         opts: ["-max_band", "FB"]
    #     dec:
    #     tx:
    #         type: "FER"
    #         error_rate: 5
    # c19:
    #     type: evs
    #     bitrates:
    #         - 13200
    #     cod:
    #         opts: ["-max_band", "FB"]
    #     dec:
    #     tx:
    #         type: "FER"
    #         error_rate: 5
    # c20:
    #     type: evs
    #     bitrates:
    #         - 16400
    #     cod:
    #         opts: ["-max_band", "FB"]
    #     dec:
    #     tx:
    #         type: "FER"
    #         error_rate: 5
    # c21:
    #     type: evs
    #     bitrates:
    #         - 24400
    #     cod:
    #         opts: ["-max_band", "FB"]
    #     dec:
    #     tx:
    #         type: "FER"
    #         error_rate: 5
    # c22:
    #     type: evs
    #     bitrates:
    #         - 32000
    #     cod:
    #         opts: ["-max_band", "FB"]
    #     dec:
    #     tx:
    #         type: "FER"
    #         error_rate: 5

    # ### IVAS condition ###############################
    # c23:
    #     type: ivas
    #     bitrates:
    #         - 13200
    #     cod:
    #     dec:
    #         fmt: "STEREO"
    # c24:
    #     type: ivas
    #     bitrates:
    #         - 16400
    #     cod:
    #     dec:
    #         fmt: "STEREO"
    # c25:
    #     type: ivas
    #     bitrates:
    #         - 24400
    #     cod:
    #     dec:
    #         fmt: "STEREO"
    # c26:
    #     type: ivas
    #     bitrates:
    #         - 32000
    #     cod:
    #     dec:
    #         fmt: "STEREO"
    # c27:
    #     type: ivas
    #     bitrates:
    #         - 48000
    #     cod:
    #     dec:
    #         fmt: "STEREO"
    # c28:
    #     type: ivas
    #     bitrates:
    #         - 13200
    #     cod:
    #     dec:
    #         fmt: "STEREO"
    #     tx:
    #         type: "FER"
    #         error_rate: 5
    # c29:
    #     type: ivas
    #     bitrates:
    #         - 16400
    #     cod:
    #     dec:
    #         fmt: "STEREO"
    #     tx:
    #         type: "FER"
    #         error_rate: 5
    # c30:
    #     type: ivas
    #     bitrates:
    #         - 24400
    #     cod:
    #     dec:
    #         fmt: "STEREO"
    #     tx:
    #         type: "FER"
    #         error_rate: 5
    # c31:
    #     type: ivas
    #     bitrates:
    #         - 32000
    #     cod:
    #     dec:
    #         fmt: "STEREO"
    #     tx:
    #         type: "FER"
    #         error_rate: 5
    # c32:
    #     type: ivas
    #     bitrates:
    #         - 48000
    #     cod:
    #     dec:
    #         fmt: "STEREO"
    #     tx:
    #         type: "FER"
    #         error_rate: 5

    # c33:
    #     type: ivas
    #     bitrates:
    #         - 24400
    #     cod:
    #         opts: ["-dtx"]
    #     dec:
    #         fmt: "STEREO"

    c34:
        type: ivas
        bitrates:
            - 13200
        cod:
            fmt: "MASA2"
            opts: ["-dtx"]
        dec:
            fmt: "MASA2"
        tx:
            type: "FER"
            error_rate: 5

################################################
### Post-processing
################################################
postprocessing:
    fmt: "BINAURAL"
    fs: 48000
    loudness: -26
+14 −2
Original line number Diff line number Diff line
@@ -33,6 +33,7 @@
import logging
from pathlib import Path, PurePath
from typing import Optional, Union
from numpy import empty

from ivas_processing_scripts.audiotools import audio, audioarray, metadata
from ivas_processing_scripts.audiotools.audiofile import write
@@ -72,6 +73,9 @@ def convert_file(
    if not isinstance(in_fmt, PurePath) and in_fmt.startswith("META"):
        input = metadata.Metadata(in_file)
    else:
        if in_fmt.startswith("MASA") and in_meta is None:
            # if no MD fileis provided, default to name (including .wav or .pcm!!!) + ".met"
            in_meta = [in_file.parent / (in_file.name + ".met")]
        input = audio.fromfile(in_fmt, in_file, in_fs, in_meta)

    # try to set reasonable defaults if missing
@@ -89,6 +93,14 @@ def convert_file(
            out_fmt = input.name

    output = audio.fromtype(out_fmt)

    if isinstance(output, audio.MetadataAssistedSpatialAudio):
        # create dummy audio array to allow inference of MASA mode
        num_tcs = int(output.name[-1])
        output.audio = empty((1, num_tcs))

        # fabricate metadata file name
        output.metadata_files = [Path(out_file).with_suffix(".met")]
    if isinstance(output, audio.ObjectBasedAudio):
        try:
            output.object_pos = input.object_pos
@@ -291,8 +303,8 @@ def format_conversion(
    """Convert one audio format to another"""

    # validation
    if isinstance(output, audio.MetadataAssistedSpatialAudio):
        raise NotImplementedError("MASA is not supported as an output for rendering!")
    if isinstance(output, audio.MetadataAssistedSpatialAudio) and not isinstance(input, audio.SceneBasedAudio):
        raise NotImplementedError("Can only convert to MASA from SBA")

    if isinstance(output, audio.ObjectBasedAudio) and input.name != output.name:
        raise NotImplementedError(
+21 −0
Original line number Diff line number Diff line
@@ -53,6 +53,7 @@ from ivas_processing_scripts.audiotools.convert.binaural import binaural_fftconv
from ivas_processing_scripts.audiotools.EFAP import EFAP
from ivas_processing_scripts.audiotools.rotation import Quat2RotMat, SHrotmatgen
from ivas_processing_scripts.audiotools.wrappers.filter import resample_itu
from ivas_processing_scripts.audiotools.wrappers.masaAnalyzer import masaAnalyzer

""" SceneBasedAudio functions """

@@ -75,6 +76,11 @@ def convert_scenebased(
    # SBA -> SBA
    elif isinstance(out, audio.SceneBasedAudio):
        render_sba_to_sba(sba, out)

    # SBA -> MASA
    elif isinstance(out, audio.MetadataAssistedSpatialAudio) and sba.name == "FOA":
        render_sba_to_masa(sba, out)

    else:
        raise NotImplementedError(
            f"Conversion from {sba.name} to {out.name} is unsupported!"
@@ -177,6 +183,21 @@ def render_sba_to_sba(
        zero_vert_channels(sba_out)


def render_sba_to_masa(
    sba_in: audio.SceneBasedAudio,
    masa_out: audio.MetadataAssistedSpatialAudio,
) -> None:
    assert sba_in.name == "FOA"

    # two dir only possible from HOA2
    num_dirs = 1
    num_tcs = masa_out.audio.shape[1]
    md_out_path = masa_out.metadata_files[0]
    
    masa = masaAnalyzer(sba_in, num_tcs, num_dirs, md_out_path)
    masa_out.audio = masa.audio


def rotate_sba(
    sba: audio.SceneBasedAudio,
    trajectory: str,
+1 −1
Original line number Diff line number Diff line
@@ -87,7 +87,7 @@ def masaRenderer(
        str(binary),
        output_mode,
        "",  # 2 -> inputPcm
        str(masa.metadata_files.resolve()),
        str(masa.metadata_file.resolve()),
        "",  # 4 -> outputPcm
    ]

+15 −0
Original line number Diff line number Diff line
@@ -360,6 +360,21 @@ def get_processing_chain(
        else:
            preamble = 0

        # if the encoding format differs from the format after the preprocessing, add format conversion stuff
        if tmp_in_fmt != cod_cfg["fmt"]:
            chain["processes"].append(
                Preprocessing(
                    {
                        "in_fs": tmp_in_fs,
                        "in_fmt": tmp_in_fmt,
                        "out_fs": tmp_in_fs,
                        "out_fmt": cod_cfg["fmt"],
                        "multiprocessing": cfg.multiprocessing,
                    }
                )
            )
            tmp_in_fmt = cod_cfg["fmt"]

        chain["processes"].append(
            IVAS(
                {
Loading