formatting (f2d3f6e9) · Commits · IVAS Codec Public Collaboration / IVAS Processing Scripts

item_generation_scripts/init.py

+4 −5

Original line number	Diff line number	Diff line
		@@ -30,11 +30,12 @@
		# the United Nations Convention on Contracts on the International Sales of Goods.
		#

		import os
		import logging
		import os
		import pdb
		from itertools import repeat

		import yaml
		import pdb

		from item_generation_scripts.constants import (
		LOGGER_DATEFMT,
		@@ -42,7 +43,6 @@ from item_generation_scripts.constants import (
		LOGGER_SUFFIX,
		)
		from item_generation_scripts.processing import config, process_ism_items
		from item_generation_scripts.processing import config
		from item_generation_scripts.utils import create_dir


		@@ -73,7 +73,6 @@ def logging_init(args, cfg):


		def main(args):

		# parse configuration
		cfg = config.TestConfig(args.config)

		@@ -93,7 +92,7 @@ def main(args):
		cfg.input_path,
		cfg.output_path,
		cfg.scenes,
		logger
		logger,
		)

		# copy configuration to output directory

item_generation_scripts/processing/config.py

+1 −2

Original line number	Diff line number	Diff line
		@@ -38,7 +38,7 @@ import yaml
		from item_generation_scripts.constants import (
		DEFAULT_CONFIG,
		DEFAULT_CONFIG_ISM2,
		REQUIRED_KEYS
		REQUIRED_KEYS,
		)


		@@ -127,4 +127,3 @@ class TestConfig:
		# Report missing keys to the user
		if MISSING_KEYS:
		raise KeyError(f"The following key(s) must be specified : {MISSING_KEYS}")

item_generation_scripts/processing/process_ism_items.py

+95 −72

Original line number	Diff line number	Diff line
		@@ -31,25 +31,25 @@
		#


		import csv
		import logging
		import os
		import sys
		import shutil
		import numpy as np
		import logging
		import csv
		import subprocess as sp
		import sys
		from pathlib import Path

		import numpy as np

		from item_generation_scripts.audiotools import (
		audio,
		audioarray,
		audiofile,
		binauralobjectrenderer,
		metadata
		metadata,
		)

		from item_generation_scripts.audiotools.wrappers.bs1770 import get_loudness
		from item_generation_scripts.audiotools import audio


		# function for converting nd numpy array to strings with 2 decimal digits
		def csv_formatdata(data):
		@@ -63,9 +63,8 @@ def generate_ism_items(
		input_path: Path,
		output_path: Path,
		scenes: dict,
		logger: logging.Logger
		logger: logging.Logger,
		):

		"""Generate ISM items with metadata from mono items based on scene description"""

		# get the number of scenes
		@@ -75,26 +74,26 @@ def generate_ism_items(
		logger.info(f"Processing {scene_name} out of {N_scenes} scenes")

		# extract the number of audio sources
		N_sources = len(np.atleast_1d(scene['source']))
		N_sources = len(np.atleast_1d(scene["source"]))

		y = None
		y_meta = None
		for i in range(N_sources):

		source_file = np.atleast_1d(scene['source'])[i]
		source_azi = np.atleast_1d(scene['azimuth'])[i]
		source_ele = np.atleast_1d(scene['elevation'])[i]
		source_type = 'speech' #### !!!! TBD - support generic audio + background noise and speech in the .yml file
		source_delay = np.atleast_1d(scene['delay'])[i]

		logger.info(f"Encoding {source_file} at position(s) {source_azi},{source_ele}")
		source_file = np.atleast_1d(scene["source"])[i]
		source_azi = np.atleast_1d(scene["azimuth"])[i]
		source_ele = np.atleast_1d(scene["elevation"])[i]
		source_type = "speech" #### !!!! TBD - support generic audio + background noise and speech in the .yml file
		source_delay = np.atleast_1d(scene["delay"])[i]

		logger.info(
		f"Encoding {source_file} at position(s) {source_azi},{source_ele}"
		)

		# read source file
		# x, fs = audiofile.read(os.path.join(input_path, source_file)) #### !!!! TBD - check the support for headerless .raw files
		# pdb.set_trace()
		audio_object = audio.fromfile("MONO", os.path.join(input_path, source_file))


		x = audio_object.audio
		fs = audio_object.fs

		@@ -108,9 +107,13 @@ def generate_ism_items(

		# read azimuth information and create array
		if isinstance(source_azi, str):
		if ':' in source_azi:
		source_azi = source_azi.split(':')
		azi = np.arange(float(eval(source_azi[0])), float(eval(source_azi[2])), float(eval(source_azi[1])))
		if ":" in source_azi:
		source_azi = source_azi.split(":")
		azi = np.arange(
		float(eval(source_azi[0])),
		float(eval(source_azi[2])),
		float(eval(source_azi[1])),
		)
		else:
		azi = np.array(float(eval(source_azi)), ndmin=1)[:N_frames]
		else:
		@@ -129,13 +132,19 @@ def generate_ism_items(

		# check if azimuth is from -180 .. +180
		if any(azi > 180) or any(azi < -180):
		logger.error(f"Incorrect value(s) of azimuth: {azi[(azi > 180) \| (azi < -180)]}")
		logger.error(
		f"Incorrect value(s) of azimuth: {azi[(azi > 180) \| (azi < -180)]}"
		)

		# read elevation information and create array
		if isinstance(source_ele, str):
		if ':' in source_ele:
		source_ele = source_ele.split(':')
		ele = np.arange(float(eval(source_ele[0])), float(eval(source_ele[2])), float(eval(source_ele[1])))
		if ":" in source_ele:
		source_ele = source_ele.split(":")
		ele = np.arange(
		float(eval(source_ele[0])),
		float(eval(source_ele[2])),
		float(eval(source_ele[1])),
		)
		else:
		ele = np.array(float(eval(source_ele)), ndmin=1)[:N_frames]
		else:
		@@ -151,10 +160,14 @@ def generate_ism_items(

		# check if elevation is from -90 .. +90
		if any(ele > 90) or any(ele < -90):
		logger.error(f"Incorrect value(s) of elevation: {ele[(ele > 90) \| (ele < -90)]}")
		logger.error(
		f"Incorrect value(s) of elevation: {ele[(ele > 90) \| (ele < -90)]}"
		)

		# additional metadata
		dist = np.ones(N_frames) #### !!!! TBD - check what to do with these metadata
		dist = np.ones(
		N_frames
		) #### !!!! TBD - check what to do with these metadata
		spread = np.zeros(N_frames)
		gain = np.ones(N_frames)

		@@ -167,7 +180,9 @@ def generate_ism_items(
		x = np.concatenate([pre, x])

		# apply delay to metadata as well
		pre = np.tile([0.00,0.00,1.00,0.00,1.00], (int(source_delay * 50), 1))
		pre = np.tile(
		[0.00, 0.00, 1.00, 0.00, 1.00], (int(source_delay * 50), 1)
		)
		# pre = np.zeros((int(source_delay * 50), x_meta.shape[1]))
		x_meta = np.concatenate([pre, x_meta])

		@@ -194,26 +209,34 @@ def generate_ism_items(
		if x_meta.shape[1] > y_meta.shape[1]:
		N_delta = x_meta.shape[1] - y_meta.shape[1]
		y_meta = y_meta.reshape(y_meta.shape[1], -1) # reshape to 2d array
		y_meta = np.vstack((y_meta, np.tile(y_meta[-1,:], (N_delta, 1)))) # repeat last row N_delta times and append to the array
		y_meta = y_meta.reshape(N_srcs, -1, N_meta_features) # reshape back to 3d array
		y_meta = np.vstack(
		(y_meta, np.tile(y_meta[-1, :], (N_delta, 1)))
		) # repeat last row N_delta times and append to the array
		y_meta = y_meta.reshape(
		N_srcs, -1, N_meta_features
		) # reshape back to 3d array
		elif y_meta.shape[1] > x_meta.shape[1]:
		N_delta = y_meta.shape[1] - x_meta.shape[1]
		x_meta = x_meta.reshape(x_meta.shape[1], -1) # reshape to 2d array
		x_meta = np.vstack((x_meta, np.tile(x_meta[-1,:], (N_delta, 1)))) # repeat last row N_delta times and append to the array
		x_meta = np.vstack(
		(x_meta, np.tile(x_meta[-1, :], (N_delta, 1)))
		) # repeat last row N_delta times and append to the array
		x_meta = np.expand_dims(x_meta, axis=0) # reshape back to 3d array

		y_meta = np.concatenate([y_meta, x_meta])

		# write individual ISM audio streams to the output file in an interleaved format
		output_filename = scene['name']
		audiofile.write(os.path.join(output_path, output_filename), y, fs) ### !!!! replace all os.path.xxx operations with the Path object
		output_filename = scene["name"]
		audiofile.write(
		os.path.join(output_path, output_filename), y, fs
		) ### !!!! replace all os.path.xxx operations with the Path object

		# write individual ISM metadata to output files in .csv format
		for i in range(N_sources):
		# generate .csv filename (should end with .0.csv, .1.csv, ...)
		csv_filename = os.path.normpath(f"{output_filename}.{i}.csv")

		with open(os.path.join(output_path, csv_filename), 'w') as f:
		with open(os.path.join(output_path, csv_filename), "w") as f:
		# create csv writer
		writer = csv.writer(f)

item_generation_scripts/constants.py

+3 −3

File changed.

Contains only whitespace changes.