Commit 89477466 authored by Vladimir Malenovsky's avatar Vladimir Malenovsky
Browse files

simplification of the top-level functions generate_[ism|stereo]_items()

parent 18b3e256
Loading
Loading
Loading
Loading
Loading
+4 −7
Original line number Diff line number Diff line
@@ -6,12 +6,6 @@
### Output format
format: "ISM1"

### Date; default = YYYYMMDD_HH.MM.SS
# date: 2023.06.30

### Deletion of temporary directories containing intermediate processing files, bitstreams etc.; default = false
# delete_tmp: true

### Output sampling rate in Hz needed for headerless audio files; default = 48000
fs: 48000

@@ -29,10 +23,13 @@ output_path: "./items_ISM1"
### Target loudness in LKFS; default = null (no loudness normalization applied)
loudness: -26

### Pre-amble and Post-amble length in seconds (default = None)
### Pre-amble and Post-amble length in seconds (default = 0.0)
preamble: 0.5
postamble: 0.5

### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence)
add_low_level_random_noise: true


################################################
### Scene description
+0 −6
Original line number Diff line number Diff line
@@ -6,12 +6,6 @@
### Output format
format: "ISM2"

### Date; default = YYYYMMDD_HH.MM.SS
# date: 2023.06.30

### Deletion of temporary directories containing intermediate processing files, bitstreams etc.; default = false
# delete_tmp: true

### Output sampling rate in Hz needed for headerless audio files; default = 48000
fs: 48000

+2 −8
Original line number Diff line number Diff line
@@ -6,12 +6,6 @@
### Output format
format: "STEREO"

### Date; default = YYYYMMDD_HH.MM.SS
# date: 2023.06.30

### Deletion of temporary directories containing intermediate processing files, bitstreams etc.; default = false
# delete_tmp: true

### Output sampling rate in Hz needed for headerless audio files; default = 48000
fs: 48000

@@ -26,8 +20,8 @@ IR_fs: 32000
### Input path to mono files
input_path: "./items_mono"

### Input path to stereo impulse response files
IR_path: "./IR"
### Input path to stereo impulse response files, default = './ivas_processing_scripts/generation/IR'
# IR_path: "./IR"

### Output path for generated test items and metadata files
output_path: "./items_STEREO"
+2 −28
Original line number Diff line number Diff line
@@ -84,36 +84,10 @@ def main(args):
    # generate input items
    if cfg.format.startswith("ISM"):
        # generate ISM items with metadata according to scene description
        process_ism_items.generate_ism_items(
            cfg.format,
            cfg.loudness,
            cfg.input_path,
            cfg.output_path,
            cfg.scenes,
            logger,
            fs=cfg.fs,
            preamble=cfg.preamble,
            postamble=cfg.postamble,
            add_low_level_random_noise=getattr(cfg, "add_low_level_random_noise", False), 
            # TODO@VM dict.get() can provide a default value if the key is not found
            # please check if this is a viable solution - I kept getting "AttributeError: 'TestConfig' object has no attribute 'add_low_level_random_noise'"
        )
        process_ism_items.generate_ism_items(cfg, logger)
    elif cfg.format == "STEREO":
        # generate STEREO items according to scene description
        process_stereo_items.generate_stereo_items(
            cfg.format,
            cfg.loudness,
            cfg.input_path,
            cfg.IR_path,
            cfg.output_path,
            cfg.scenes,
            logger,
            fs=cfg.fs,
            IR_fs=cfg.IR_fs,
            preamble=cfg.preamble,
            postamble=cfg.postamble,
            add_low_level_random_noise=cfg.add_low_level_random_noise,
        )
        process_stereo_items.generate_stereo_items(cfg, logger)

    # copy configuration to output directory
    with open(cfg.output_path.joinpath(f"{cfg.format}.yml"), "w") as f:
+35 −24
Original line number Diff line number Diff line
@@ -33,12 +33,11 @@
import csv
import logging
import os
import numpy as np
from math import floor
from pathlib import Path
from typing import Optional

import numpy as np

from ivas_processing_scripts.generation import config
from ivas_processing_scripts.audiotools import audio, audiofile
from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness

@@ -52,23 +51,34 @@ def csv_formatdata(data):


def generate_ism_items(
    format: str,
    target_level: int,
    input_path: Path,
    output_path: Path,
    scenes: dict,
    cfg : config.TestConfig,
    logger: logging.Logger,
    fs: Optional[int] = 48000,
    preamble: Optional[float] = 0.0,
    postamble: Optional[float] = 0.0,
    add_low_level_random_noise: Optional[bool] = False,
):
    """Generate ISM items with metadata from mono items based on scene description"""

    # get the number of scenes
    N_scenes = len(scenes)
    N_scenes = len(cfg.scenes)

    # set the target level
    if "loudness" not in cfg.__dict__:
        cfg.loudness = -26

    # set the fs
    if "fs" not in cfg.__dict__:
        cfg.fs = 48000

    for scene_name, scene in scenes.items():
    # set the pre-amble and post-amble
    if "preamble" not in cfg.__dict__:
        cfg.preamble = 0.0
        
    if "postamble" not in cfg.__dict__:
        cfg.postamble = 0.0
           
    # set the pre-amble and post-amble
    if "add_low_level_random_noise" not in cfg.__dict__:
        cfg.add_low_level_random_noise = False
        
    for scene_name, scene in cfg.scenes.items():
        logger.info(f"Processing {scene_name} out of {N_scenes} scenes")

        # extract the number of audio sources
@@ -89,6 +99,7 @@ def generate_ism_items(

        # repeat for all source files
        for i in range(N_sources):
        
            # parse parameters from the scene description
            source_file = np.atleast_1d(scene["source"])[i]
            source_azi = np.atleast_1d(scene["azimuth"])[i]
@@ -99,7 +110,7 @@ def generate_ism_items(
            )

            # read source file
            x = audio.fromfile("MONO", os.path.join(input_path, source_file), fs=fs)
            x = audio.fromfile("MONO", os.path.join(cfg.input_path, source_file), fs=cfg.fs)
 
            # get the number of frames (multiple of 20ms)
            N_frames = int(len(x.audio) / x.fs * 50)
@@ -109,7 +120,7 @@ def generate_ism_items(
            # x.audio = x.audio[:N_trim]

            # adjust the level of the source file
            _, scale_factor = get_loudness(x, target_level, "MONO")
            _, scale_factor = get_loudness(x, cfg.loudness, "MONO")
            x.audio *= scale_factor

            # read azimuth information and create array
@@ -271,9 +282,9 @@ def generate_ism_items(
                y_meta = np.concatenate([y_meta, x_meta])

        # append pre-amble and post-amble to all sources
        if preamble != 0.0:
        if cfg.preamble != 0.0:
            # ensure that pre-mable is a multiple of 20ms
            N_pre = int(floor(preamble * 50) / 50 * y.fs)
            N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs)

            # insert all-zero preamble to all sources
            pre = np.zeros((N_pre, y.audio.shape[1]))
@@ -285,9 +296,9 @@ def generate_ism_items(
            )  # !!!! TBD - check if we should insert netrual position or the first position of the metadata
            y_meta = np.concatenate([pre, y_meta], axis=1)

        if postamble != 0.0:
        if cfg.postamble != 0.0:
            # ensure that post-mable is a multiple of 20ms
            N_post = int(floor(postamble * 50) / 50 * y.fs)
            N_post = int(floor(cfg.postamble * 50) / 50 * y.fs)

            # append all-zero postamble to all sources
            post = np.zeros((N_post, y.audio.shape[1]))
@@ -300,7 +311,7 @@ def generate_ism_items(
            y_meta = np.concatenate([y_meta, post], axis=1)

        # add random noise
        if add_low_level_random_noise:
        if cfg.add_low_level_random_noise:
            # create uniformly distributed noise between -4 and 4
            np.random.seed(SEED_RANDOM_NOISE)
            noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype(
@@ -313,7 +324,7 @@ def generate_ism_items(
        # write individual ISM audio streams to the output file in an interleaved format
        output_filename = scene["name"]
        audiofile.write(
            os.path.join(output_path, output_filename), y.audio, y.fs
            os.path.join(cfg.output_path, output_filename), y.audio, y.fs
        )  # !!!! TBD: replace all os.path.xxx operations with the Path object

        # write individual ISM metadata to output files in .csv format
@@ -322,7 +333,7 @@ def generate_ism_items(
            csv_filename = os.path.normpath(f"{output_filename}.{i}.csv")

            with open(
                os.path.join(output_path, csv_filename),
                os.path.join(cfg.output_path, csv_filename),
                "w",
                newline="",
                encoding="utf-8",
Loading