Commit 0774f358 authored by Vladimir Malenovsky's avatar Vladimir Malenovsky
Browse files

fixes in OMASA item generation script

parent 5cd9308f
Loading
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -72,6 +72,10 @@ def masaAnalyzer(
    else:
        binary = find_binary("masaAnalyzer")

    # enforce metadata_out_filename to be a Path object
    if metadata_out_filename is not None and not isinstance(metadata_out_filename, Path):
        metadata_out_filename = Path(metadata_out_filename)

    if num_tcs not in [1, 2]:
        raise ValueError(f"Only 1 or 2 TCs supported, but {num_tcs} was given.")

+6 −1
Original line number Diff line number Diff line
@@ -83,11 +83,16 @@ def masaRenderer(
        output_mode = "-BINAURAL"
        num_channels = 2

    # enforce masa_metadata_file to be a Path object
    masa_metadata_file = masa.metadata_file
    if masa_metadata_file is not None and not isinstance(masa_metadata_file, Path):
        masa_metadata_file = Path(masa_metadata_file)

    cmd = [
        str(binary),
        output_mode,
        "",  # 2 -> inputPcm
        str(masa.metadata_file.resolve()),
        str(masa_metadata_file.resolve()),
        "",  # 4 -> outputPcm
    ]

+31 −14
Original line number Diff line number Diff line
@@ -29,7 +29,7 @@
#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
#  the United Nations Convention on Contracts on the International Sales of Goods.
#
import pdb

import csv
import logging
import os
@@ -47,8 +47,6 @@ from ivas_processing_scripts.audiotools.convert.omasa import convert_omasa
from ivas_processing_scripts.generation import config
from ivas_processing_scripts.utils import apply_func_parallel

import pdb

SEED_RANDOM_NOISE = 0


@@ -152,14 +150,13 @@ def generate_omasa_items(

    # set multiprocessing
    if "multiprocessing" not in cfg.__dict__:
        cfg.multiprocessing = True
        cfg.multiprocessing = False

    apply_func_parallel(
        generate_scene,
        zip(cfg.scenes.keys(), cfg.scenes.values(), repeat(cfg), repeat(logger)),
        None,
        # "mp" if cfg.multiprocessing else None,
        None,
        type = "mp" if cfg.multiprocessing else None,
        show_progress = None,
    )

    return
@@ -168,15 +165,36 @@ def generate_omasa_items(
def generate_scene(
    scene_name: str, scene: dict, cfg: config.TestConfig, logger: logging.Logger
):
    """
    Processes a single scene to generate OMASA items with metadata.

    Args:
        scene_name (str): The name of the scene being processed.
        scene (dict): A dictionary containing scene description, including source files, azimuth, elevation, and other parameters.
        cfg (config.TestConfig): Configuration object containing settings for processing, such as input/output paths, sampling rate, and loudness levels.
        logger (logging.Logger): Logger instance for logging information and errors.

    Expected Behavior:
        - Reads audio source files and processes them based on the scene description.
        - Generates metadata files and appends them to the OMASA object.
        - Writes the processed audio and metadata to output files.
        - Handles various audio formats (e.g., MONO, FOA, HOA2) and applies transformations like loudness normalization, trimming, and padding.
    """
    logger.info( f"Processing scene {scene_name}:")
    
    # extract the number of audio sources
    N_sources = len(np.atleast_1d(scene["source"]))
    N_ISMs = N_sources-1

    # initialize output array
    # initialize output dirs
    omasa_format = f"ISM{N_ISMs}MASA{cfg.masa_tc}DIR{cfg.masa_dirs}"
    output_filename = os.path.join( cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + append_str_filename(os.path.basename(scene["name"]), f"_s{scene_name}_{omasa_format}") )

    dir_path = os.path.dirname(output_filename)
    if dir_path and not os.path.exists(dir_path):
        os.makedirs(dir_path, exist_ok=True)

    # initialize output OMASA object
    y = audio.OMASAAudio(omasa_format)

    # repeat for all source files
@@ -236,7 +254,7 @@ def generate_scene(
            logger.info(f"Error: Input format of the source file with {N_channels} channels is not supported!")
            sys.exit(-1)
    
        if fmt in ["FOA", "HOA2"]:
        if fmt in ["FOA", "HOA2", "HOA3"]:
            # generate MASA metadata .met filename (should end with .met)
            y.metadata_files.append(os.path.splitext(output_filename)[0]+".met")
        elif fmt == "MONO":
@@ -262,7 +280,7 @@ def generate_scene(
        audioarray.cut(x.audio, [0, N_frames * frame_len])
        
        # adjust the level of the source file
        if fmt in ["FOA", "HOA2"]:
        if fmt in ["FOA", "HOA2", "HOA3"]:
            x.audio, _ = loudness_norm(x, level, loudness_format="STEREO", rms=True)
        else:
            x.audio, _ = loudness_norm(x, level, loudness_format="MONO")
@@ -277,9 +295,9 @@ def generate_scene(
            N_pad = int(frame_len - len(x.audio) % frame_len)
            x.audio = audioarray.trim(x.audio, x.fs, limits=[0, -N_pad], samples=True)

        # convert FOA to MASA
        if fmt in ["FOA", "HOA2"]:
            x_masa = audio.MetadataAssistedSpatialAudio(f"MASA2DIR1")
        # convert FOA/HOA2/HOA3 to MASA
        if fmt in ["FOA", "HOA2", "HOA3"]:
            x_masa = audio.MetadataAssistedSpatialAudio(f"MASA{cfg.masa_tc}DIR{cfg.masa_dirs}")
            x_masa.metadata_file = y.metadata_files[i]
            render_sba_to_masa(x, x_masa)
            y.audio = x_masa.audio
@@ -320,7 +338,6 @@ def generate_scene(
            else scene["elevation"]
        )

        # pdb.set_trace()
        N_frames = int(np.rint((len(y.audio) / y.fs * 50)))

        # read azimuth information and convert to an array