simplification of the top-level functions generate_[ism|stereo]_items() (89477466) · Commits · IVAS Codec Public Collaboration / IVAS Processing Scripts

item_gen_configs/ISM1_CONFIG.yml

+4 −7

Original line number	Diff line number	Diff line
		@@ -6,12 +6,6 @@
		### Output format
		format: "ISM1"

		### Date; default = YYYYMMDD_HH.MM.SS
		# date: 2023.06.30

		### Deletion of temporary directories containing intermediate processing files, bitstreams etc.; default = false
		# delete_tmp: true

		### Output sampling rate in Hz needed for headerless audio files; default = 48000
		fs: 48000

		@@ -29,10 +23,13 @@ output_path: "./items_ISM1"
		### Target loudness in LKFS; default = null (no loudness normalization applied)
		loudness: -26

		### Pre-amble and Post-amble length in seconds (default = None)
		### Pre-amble and Post-amble length in seconds (default = 0.0)
		preamble: 0.5
		postamble: 0.5

		### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence)
		add_low_level_random_noise: true


		################################################
		### Scene description

item_gen_configs/ISM2_CONFIG.yml

+0 −6

Original line number	Diff line number	Diff line
		@@ -6,12 +6,6 @@
		### Output format
		format: "ISM2"

		### Date; default = YYYYMMDD_HH.MM.SS
		# date: 2023.06.30

		### Deletion of temporary directories containing intermediate processing files, bitstreams etc.; default = false
		# delete_tmp: true

		### Output sampling rate in Hz needed for headerless audio files; default = 48000
		fs: 48000

item_gen_configs/STEREO_CONFIG.yml

+2 −8

Original line number	Diff line number	Diff line
		@@ -6,12 +6,6 @@
		### Output format
		format: "STEREO"

		### Date; default = YYYYMMDD_HH.MM.SS
		# date: 2023.06.30

		### Deletion of temporary directories containing intermediate processing files, bitstreams etc.; default = false
		# delete_tmp: true

		### Output sampling rate in Hz needed for headerless audio files; default = 48000
		fs: 48000

		@@ -26,8 +20,8 @@ IR_fs: 32000
		### Input path to mono files
		input_path: "./items_mono"

		### Input path to stereo impulse response files
		IR_path: "./IR"
		### Input path to stereo impulse response files, default = './ivas_processing_scripts/generation/IR'
		# IR_path: "./IR"

		### Output path for generated test items and metadata files
		output_path: "./items_STEREO"

ivas_processing_scripts/generation/init.py

+2 −28

Original line number	Diff line number	Diff line
		@@ -84,36 +84,10 @@ def main(args):
		# generate input items
		if cfg.format.startswith("ISM"):
		# generate ISM items with metadata according to scene description
		process_ism_items.generate_ism_items(
		cfg.format,
		cfg.loudness,
		cfg.input_path,
		cfg.output_path,
		cfg.scenes,
		logger,
		fs=cfg.fs,
		preamble=cfg.preamble,
		postamble=cfg.postamble,
		add_low_level_random_noise=getattr(cfg, "add_low_level_random_noise", False),
		# TODO@VM dict.get() can provide a default value if the key is not found
		# please check if this is a viable solution - I kept getting "AttributeError: 'TestConfig' object has no attribute 'add_low_level_random_noise'"
		)
		process_ism_items.generate_ism_items(cfg, logger)
		elif cfg.format == "STEREO":
		# generate STEREO items according to scene description
		process_stereo_items.generate_stereo_items(
		cfg.format,
		cfg.loudness,
		cfg.input_path,
		cfg.IR_path,
		cfg.output_path,
		cfg.scenes,
		logger,
		fs=cfg.fs,
		IR_fs=cfg.IR_fs,
		preamble=cfg.preamble,
		postamble=cfg.postamble,
		add_low_level_random_noise=cfg.add_low_level_random_noise,
		)
		process_stereo_items.generate_stereo_items(cfg, logger)

		# copy configuration to output directory
		with open(cfg.output_path.joinpath(f"{cfg.format}.yml"), "w") as f:

ivas_processing_scripts/generation/process_ism_items.py

+35 −24

Original line number	Diff line number	Diff line
		@@ -33,12 +33,11 @@
		import csv
		import logging
		import os
		import numpy as np
		from math import floor
		from pathlib import Path
		from typing import Optional

		import numpy as np

		from ivas_processing_scripts.generation import config
		from ivas_processing_scripts.audiotools import audio, audiofile
		from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness

		@@ -52,23 +51,34 @@ def csv_formatdata(data):


		def generate_ism_items(
		format: str,
		target_level: int,
		input_path: Path,
		output_path: Path,
		scenes: dict,
		cfg : config.TestConfig,
		logger: logging.Logger,
		fs: Optional[int] = 48000,
		preamble: Optional[float] = 0.0,
		postamble: Optional[float] = 0.0,
		add_low_level_random_noise: Optional[bool] = False,
		):
		"""Generate ISM items with metadata from mono items based on scene description"""

		# get the number of scenes
		N_scenes = len(scenes)
		N_scenes = len(cfg.scenes)

		# set the target level
		if "loudness" not in cfg.__dict__:
		cfg.loudness = -26

		# set the fs
		if "fs" not in cfg.__dict__:
		cfg.fs = 48000

		for scene_name, scene in scenes.items():
		# set the pre-amble and post-amble
		if "preamble" not in cfg.__dict__:
		cfg.preamble = 0.0

		if "postamble" not in cfg.__dict__:
		cfg.postamble = 0.0

		# set the pre-amble and post-amble
		if "add_low_level_random_noise" not in cfg.__dict__:
		cfg.add_low_level_random_noise = False

		for scene_name, scene in cfg.scenes.items():
		logger.info(f"Processing {scene_name} out of {N_scenes} scenes")

		# extract the number of audio sources
		@@ -89,6 +99,7 @@ def generate_ism_items(

		# repeat for all source files
		for i in range(N_sources):

		# parse parameters from the scene description
		source_file = np.atleast_1d(scene["source"])[i]
		source_azi = np.atleast_1d(scene["azimuth"])[i]
		@@ -99,7 +110,7 @@ def generate_ism_items(
		)

		# read source file
		x = audio.fromfile("MONO", os.path.join(input_path, source_file), fs=fs)
		x = audio.fromfile("MONO", os.path.join(cfg.input_path, source_file), fs=cfg.fs)

		# get the number of frames (multiple of 20ms)
		N_frames = int(len(x.audio) / x.fs * 50)
		@@ -109,7 +120,7 @@ def generate_ism_items(
		# x.audio = x.audio[:N_trim]

		# adjust the level of the source file
		_, scale_factor = get_loudness(x, target_level, "MONO")
		_, scale_factor = get_loudness(x, cfg.loudness, "MONO")
		x.audio *= scale_factor

		# read azimuth information and create array
		@@ -271,9 +282,9 @@ def generate_ism_items(
		y_meta = np.concatenate([y_meta, x_meta])

		# append pre-amble and post-amble to all sources
		if preamble != 0.0:
		if cfg.preamble != 0.0:
		# ensure that pre-mable is a multiple of 20ms
		N_pre = int(floor(preamble * 50) / 50 * y.fs)
		N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs)

		# insert all-zero preamble to all sources
		pre = np.zeros((N_pre, y.audio.shape[1]))
		@@ -285,9 +296,9 @@ def generate_ism_items(
		) # !!!! TBD - check if we should insert netrual position or the first position of the metadata
		y_meta = np.concatenate([pre, y_meta], axis=1)

		if postamble != 0.0:
		if cfg.postamble != 0.0:
		# ensure that post-mable is a multiple of 20ms
		N_post = int(floor(postamble * 50) / 50 * y.fs)
		N_post = int(floor(cfg.postamble * 50) / 50 * y.fs)

		# append all-zero postamble to all sources
		post = np.zeros((N_post, y.audio.shape[1]))
		@@ -300,7 +311,7 @@ def generate_ism_items(
		y_meta = np.concatenate([y_meta, post], axis=1)

		# add random noise
		if add_low_level_random_noise:
		if cfg.add_low_level_random_noise:
		# create uniformly distributed noise between -4 and 4
		np.random.seed(SEED_RANDOM_NOISE)
		noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype(
		@@ -313,7 +324,7 @@ def generate_ism_items(
		# write individual ISM audio streams to the output file in an interleaved format
		output_filename = scene["name"]
		audiofile.write(
		os.path.join(output_path, output_filename), y.audio, y.fs
		os.path.join(cfg.output_path, output_filename), y.audio, y.fs
		) # !!!! TBD: replace all os.path.xxx operations with the Path object

		# write individual ISM metadata to output files in .csv format
		@@ -322,7 +333,7 @@ def generate_ism_items(
		csv_filename = os.path.normpath(f"{output_filename}.{i}.csv")

		with open(
		os.path.join(output_path, csv_filename),
		os.path.join(cfg.output_path, csv_filename),
		"w",
		newline="",
		encoding="utf-8",