formatting (d4665a98) · Commits · IVAS Codec Public Collaboration / IVAS Processing Scripts

ivas_processing_scripts/generation/init.py

+4 −4

Original line number	Diff line number	Diff line
		@@ -42,10 +42,10 @@ from ivas_processing_scripts.constants import (
		)
		from ivas_processing_scripts.generation import (
		config,
		process_ambi_items,
		process_ism1_items,
		process_ism2_items,
		process_stereo_items,
		process_ambi_items,
		)
		from ivas_processing_scripts.utils import create_dir

ivas_processing_scripts/generation/process_ambi_items.py

+58 −23

Original line number	Diff line number	Diff line
		@@ -32,8 +32,9 @@

		import logging
		import os
		from itertools import groupby, repeat
		from math import floor
		from itertools import repeat, groupby

		import numpy as np

		from ivas_processing_scripts.audiotools import audio, audiofile, convert
		@@ -50,6 +51,7 @@ def csv_formatdata(data):
		for row in data:
		yield ["%0.2f" % v for v in row]


		# function for searching sequences of same the same character and replacing it by another string
		def replace_char_seq_with_string(str, char_seq, repl_str):
		result = []
		@@ -131,26 +133,42 @@ def generate_ambi_items(
		cfg.use_input_prefix = ""
		else:
		# replace file designators
		cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "l", cfg.listening_lab )
		cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "LL", cfg.language )
		cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "eee", cfg.exp )
		cfg.use_input_prefix = replace_char_seq_with_string(
		cfg.use_input_prefix, "l", cfg.listening_lab
		)
		cfg.use_input_prefix = replace_char_seq_with_string(
		cfg.use_input_prefix, "LL", cfg.language
		)
		cfg.use_input_prefix = replace_char_seq_with_string(
		cfg.use_input_prefix, "eee", cfg.exp
		)

		# set the prefix for all IR filenames
		if "use_IR_prefix" not in cfg.__dict__:
		cfg.use_IR_prefix = ""
		else:
		# replace file designators
		cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "p", cfg.provider )
		cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "LL", cfg.language )
		cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "eee", cfg.exp )
		cfg.use_IR_prefix = replace_char_seq_with_string(
		cfg.use_IR_prefix, "p", cfg.provider
		)
		cfg.use_IR_prefix = replace_char_seq_with_string(
		cfg.use_IR_prefix, "LL", cfg.language
		)
		cfg.use_IR_prefix = replace_char_seq_with_string(
		cfg.use_IR_prefix, "eee", cfg.exp
		)

		# set the prefix for all output filenames
		if "use_output_prefix" not in cfg.__dict__:
		cfg.use_output_prefix = None
		else:
		# replace file designators
		cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "l", cfg.listening_lab )
		cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "eee", cfg.exp )
		cfg.use_output_prefix = replace_char_seq_with_string(
		cfg.use_output_prefix, "l", cfg.listening_lab
		)
		cfg.use_output_prefix = replace_char_seq_with_string(
		cfg.use_output_prefix, "eee", cfg.exp
		)

		# set multiprocessing
		if "multiprocessing" not in cfg.__dict__:
		@@ -190,7 +208,6 @@ def generate_ambi_scene(

		y = audio.SceneBasedAudio(ambi_format)
		for i in range(N_sources):

		# parse parameters from the scene description
		source_file = np.atleast_1d(scene["source"])[i]
		IR_file = np.atleast_1d(scene["IR"])[i]
		@@ -198,10 +215,26 @@ def generate_ambi_scene(
		logger.info(f"Convolving {source_file} with {IR_file}")

		# read source file
		x = audio.fromfile("MONO", os.path.join(cfg.input_path, os.path.dirname(source_file), cfg.use_input_prefix + os.path.basename(source_file)), fs=cfg.fs)
		x = audio.fromfile(
		"MONO",
		os.path.join(
		cfg.input_path,
		os.path.dirname(source_file),
		cfg.use_input_prefix + os.path.basename(source_file),
		),
		fs=cfg.fs,
		)

		# read the IR file
		IR = audio.fromfile(ambi_format, os.path.join(cfg.IR_path, os.path.dirname(IR_file), cfg.use_IR_prefix + os.path.basename(IR_file)), fs=cfg.IR_fs)
		IR = audio.fromfile(
		ambi_format,
		os.path.join(
		cfg.IR_path,
		os.path.dirname(IR_file),
		cfg.use_IR_prefix + os.path.basename(IR_file),
		),
		fs=cfg.IR_fs,
		)

		# convolve with the FOA/HOA2 IR
		if ambi_format == "FOA":
		@@ -289,16 +322,18 @@ def generate_ambi_scene(
		if cfg.add_low_level_random_noise:
		# create uniformly distributed noise between -4 and 4
		np.random.seed(SEED_RANDOM_NOISE)
		noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype(
		"float"
		)
		noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float")

		# superimpose
		y.audio += noise

		# write the reverberated audio into output file
		audiofile.write(
		os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.basename(scene["name"])),
		os.path.join(
		cfg.output_path,
		os.path.dirname(scene["name"]),
		cfg.use_output_prefix + os.path.basename(scene["name"]),
		),
		y.audio,
		y.fs,
		)

ivas_processing_scripts/generation/process_ism1_items.py

+52 −30

Original line number	Diff line number	Diff line
		@@ -33,8 +33,8 @@
		import csv
		import logging
		import os
		from itertools import groupby, repeat
		from math import floor
		from itertools import repeat, groupby

		import numpy as np

		@@ -51,6 +51,7 @@ def csv_formatdata(data):
		for row in data:
		yield ["%0.2f" % v for v in row]


		# function for searching sequences of same the same character and replacing it by another string
		def replace_char_seq_with_string(str, char_seq, repl_str):
		result = []
		@@ -117,17 +118,27 @@ def generate_ism1_items(
		cfg.use_input_prefix = ""
		else:
		# replace file designators
		cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "l", cfg.listening_lab )
		cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "LL", cfg.language )
		cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "eee", cfg.exp )
		cfg.use_input_prefix = replace_char_seq_with_string(
		cfg.use_input_prefix, "l", cfg.listening_lab
		)
		cfg.use_input_prefix = replace_char_seq_with_string(
		cfg.use_input_prefix, "LL", cfg.language
		)
		cfg.use_input_prefix = replace_char_seq_with_string(
		cfg.use_input_prefix, "eee", cfg.exp
		)

		# set the prefix for all output filenames
		if "use_output_prefix" not in cfg.__dict__:
		cfg.use_output_prefix = None
		else:
		# replace file designators
		cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "l", cfg.listening_lab )
		cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "eee", cfg.exp )
		cfg.use_output_prefix = replace_char_seq_with_string(
		cfg.use_output_prefix, "l", cfg.listening_lab
		)
		cfg.use_output_prefix = replace_char_seq_with_string(
		cfg.use_output_prefix, "eee", cfg.exp
		)

		# set multiprocessing
		if "multiprocessing" not in cfg.__dict__:
		@@ -142,6 +153,7 @@ def generate_ism1_items(

		return


		def generate_ism1_scene(
		scene_name: str, scene: dict, cfg: config.TestConfig, logger: logging.Logger
		):
		@@ -170,14 +182,18 @@ def generate_ism1_scene(
		for i in range(N_sources):
		# parse parameters from the scene description
		source_file = (
		scene["source"][i]
		if isinstance(scene["source"], list)
		else scene["source"]
		scene["source"][i] if isinstance(scene["source"], list) else scene["source"]
		)

		# read source file
		x = audio.fromfile(
		"MONO", os.path.join(cfg.input_path, os.path.dirname(source_file), cfg.use_input_prefix + os.path.basename(source_file)), fs=cfg.fs
		"MONO",
		os.path.join(
		cfg.input_path,
		os.path.dirname(source_file),
		cfg.use_input_prefix + os.path.basename(source_file),
		),
		fs=cfg.fs,
		)

		# get the number of frames (multiple of 20ms)
		@@ -302,9 +318,7 @@ def generate_ism1_scene(

		# insert neutral position as a pre-amble to all sources
		N_pre = int(N_pre / frame_len)
		pre = np.tile(
		[0.00, 0.00, 1.00, 0.00, 1.00], (N_pre, 1)
		)
		pre = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (N_pre, 1))
		y_meta = np.concatenate([pre, y_meta], axis=0)

		if cfg.postamble != 0.0:
		@@ -317,29 +331,37 @@ def generate_ism1_scene(

		# append neutral position as a post-amble to all sources
		N_post = int(N_post / frame_len)
		post = np.tile(
		[0.00, 0.00, 1.00, 0.00, 1.00], (N_post, 1)
		)
		post = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (N_post, 1))
		y_meta = np.concatenate([y_meta, post], axis=0)

		# add random noise
		if cfg.add_low_level_random_noise:
		# create uniformly distributed noise between -4 and 4
		np.random.seed(SEED_RANDOM_NOISE)
		noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype(
		"float"
		)
		noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float")

		# superimpose
		y.audio += noise

		# write ISM audio stream to the output file
		audiofile.write(
		os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.basename(scene["name"])), y.audio, y.fs
		os.path.join(
		cfg.output_path,
		os.path.dirname(scene["name"]),
		cfg.use_output_prefix + os.path.basename(scene["name"]),
		),
		y.audio,
		y.fs,
		)

		# write ISM metadata to the output file in .0.csv format
		csv_filename = os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.splitext(os.path.basename(scene["name"]))[0] + f".0.csv")
		csv_filename = os.path.join(
		cfg.output_path,
		os.path.dirname(scene["name"]),
		cfg.use_output_prefix
		+ os.path.splitext(os.path.basename(scene["name"]))[0]
		+ f".0.csv",
		)

		with open(
		csv_filename,

ivas_processing_scripts/generation/process_ism2_items.py

+55 −39

Original line number	Diff line number	Diff line
		@@ -29,12 +29,12 @@
		# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
		# the United Nations Convention on Contracts on the International Sales of Goods.
		#
		import pdb
		import csv
		import logging
		import os
		import pdb
		from itertools import groupby, repeat
		from math import floor
		from itertools import repeat, groupby

		import numpy as np

		@@ -51,6 +51,7 @@ def csv_formatdata(data):
		for row in data:
		yield ["%0.2f" % v for v in row]


		# function for searching sequences of same the same character and replacing it by another string
		def replace_char_seq_with_string(str, char_seq, repl_str):
		result = []
		@@ -117,17 +118,27 @@ def generate_ism2_items(
		cfg.use_input_prefix = ""
		else:
		# replace file designators
		cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "l", cfg.listening_lab )
		cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "LL", cfg.language )
		cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "eee", cfg.exp )
		cfg.use_input_prefix = replace_char_seq_with_string(
		cfg.use_input_prefix, "l", cfg.listening_lab
		)
		cfg.use_input_prefix = replace_char_seq_with_string(
		cfg.use_input_prefix, "LL", cfg.language
		)
		cfg.use_input_prefix = replace_char_seq_with_string(
		cfg.use_input_prefix, "eee", cfg.exp
		)

		# set the prefix for all output filenames
		if "use_output_prefix" not in cfg.__dict__:
		cfg.use_output_prefix = None
		else:
		# replace file designators
		cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "l", cfg.listening_lab )
		cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "eee", cfg.exp )
		cfg.use_output_prefix = replace_char_seq_with_string(
		cfg.use_output_prefix, "l", cfg.listening_lab
		)
		cfg.use_output_prefix = replace_char_seq_with_string(
		cfg.use_output_prefix, "eee", cfg.exp
		)

		# set multiprocessing
		if "multiprocessing" not in cfg.__dict__:
		@@ -142,6 +153,7 @@ def generate_ism2_items(

		return


		def generate_ism2_scene(
		scene_name: str, scene: dict, cfg: config.TestConfig, logger: logging.Logger
		):
		@@ -166,9 +178,7 @@ def generate_ism2_scene(
		for i in range(N_sources):
		# parse parameters from the scene description
		source_file = (
		scene["source"][i]
		if isinstance(scene["source"], list)
		else scene["source"]
		scene["source"][i] if isinstance(scene["source"], list) else scene["source"]
		)
		source_azi = (
		scene["azimuth"][i]
		@@ -181,13 +191,17 @@ def generate_ism2_scene(
		else scene["elevation"]
		)

		logger.info(
		f"Encoding {source_file} at position(s) {source_azi},{source_ele}"
		)
		logger.info(f"Encoding {source_file} at position(s) {source_azi},{source_ele}")

		# read source file
		x = audio.fromfile(
		"MONO", os.path.join(cfg.input_path, os.path.dirname(source_file), cfg.use_input_prefix + os.path.basename(source_file)), fs=cfg.fs
		"MONO",
		os.path.join(
		cfg.input_path,
		os.path.dirname(source_file),
		cfg.use_input_prefix + os.path.basename(source_file),
		),
		fs=cfg.fs,
		)

		# get the number of frames (multiple of 20ms)
		@@ -283,9 +297,7 @@ def generate_ism2_scene(
		# insert neutral position as a pre-amble
		N_delay = int(N_delay / frame_len)
		# use neutral position for padding
		pre = np.tile(
		[0.00, 0.00, 1.00, 0.00, 1.00], (N_delay, 1)
		)
		pre = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (N_delay, 1))
		x_meta = np.concatenate([pre, x_meta])

		# pad with zeros to ensure that the signal length is a multiple of 20ms
		@@ -299,9 +311,7 @@ def generate_ism2_scene(
		N_pad = int(len(x.audio) / frame_len) - len(x_meta)
		if N_pad > 0:
		# use neutral position for padding
		post = np.tile(
		[0.00, 0.00, 1.00, 0.00, 1.00], (N_pad, 1)
		)
		post = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (N_pad, 1))
		x_meta = np.concatenate([x_meta, post])

		# add source signal to the array of all source signals
		@@ -370,9 +380,7 @@ def generate_ism2_scene(

		# insert neutral position as a pre-amble to all sources
		N_pre = int(N_pre / frame_len)
		pre = np.tile(
		[0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_pre, 1)
		)
		pre = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_pre, 1))
		y_meta = np.concatenate([pre, y_meta], axis=1)

		if cfg.postamble != 0.0:
		@@ -385,31 +393,39 @@ def generate_ism2_scene(

		# append neutral position as a post-amble to all sources
		N_post = int(N_post / frame_len)
		post = np.tile(
		[0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_post, 1)
		)
		post = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_post, 1))
		y_meta = np.concatenate([y_meta, post], axis=1)

		# add random noise
		if cfg.add_low_level_random_noise:
		# create uniformly distributed noise between -4 and 4
		np.random.seed(SEED_RANDOM_NOISE)
		noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype(
		"float"
		)
		noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float")

		# superimpose
		y.audio += noise

		# write individual ISM audio streams to the output file in an interleaved format
		audiofile.write(
		os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.basename(scene["name"])), y.audio, y.fs
		os.path.join(
		cfg.output_path,
		os.path.dirname(scene["name"]),
		cfg.use_output_prefix + os.path.basename(scene["name"]),
		),
		y.audio,
		y.fs,
		)

		# write individual ISM metadata to output files in .csv format
		for i in range(N_sources):
		# generate .csv filename (should end with .0.csv, .1.csv, ...)
		csv_filename = os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.splitext(os.path.basename(scene["name"]))[0] + f".{i}.csv")
		csv_filename = os.path.join(
		cfg.output_path,
		os.path.dirname(scene["name"]),
		cfg.use_output_prefix
		+ os.path.splitext(os.path.basename(scene["name"]))[0]
		+ f".{i}.csv",
		)

		with open(
		csv_filename,

ivas_processing_scripts/generation/process_stereo_items.py

+55 −19

Original line number	Diff line number	Diff line
		@@ -32,7 +32,7 @@

		import logging
		import os
		from itertools import repeat, groupby
		from itertools import groupby, repeat
		from math import floor

		import numpy as np
		@@ -51,6 +51,7 @@ def csv_formatdata(data):
		for row in data:
		yield ["%0.2f" % v for v in row]


		# function for searching sequences of same the same character and replacing it by another string
		def replace_char_seq_with_string(str, char_seq, repl_str):
		result = []
		@@ -125,26 +126,42 @@ def generate_stereo_items(
		cfg.use_input_prefix = ""
		else:
		# replace file designators
		cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "l", cfg.listening_lab )
		cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "LL", cfg.language )
		cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "eee", cfg.exp )
		cfg.use_input_prefix = replace_char_seq_with_string(
		cfg.use_input_prefix, "l", cfg.listening_lab
		)
		cfg.use_input_prefix = replace_char_seq_with_string(
		cfg.use_input_prefix, "LL", cfg.language
		)
		cfg.use_input_prefix = replace_char_seq_with_string(
		cfg.use_input_prefix, "eee", cfg.exp
		)

		# set the prefix for all IR filenames
		if "use_IR_prefix" not in cfg.__dict__:
		cfg.use_IR_prefix = ""
		else:
		# replace file designators
		cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "p", cfg.provider )
		cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "LL", cfg.language )
		cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "eee", cfg.exp )
		cfg.use_IR_prefix = replace_char_seq_with_string(
		cfg.use_IR_prefix, "p", cfg.provider
		)
		cfg.use_IR_prefix = replace_char_seq_with_string(
		cfg.use_IR_prefix, "LL", cfg.language
		)
		cfg.use_IR_prefix = replace_char_seq_with_string(
		cfg.use_IR_prefix, "eee", cfg.exp
		)

		# set the prefix for all output filenames
		if "use_output_prefix" not in cfg.__dict__:
		cfg.use_output_prefix = None
		else:
		# replace file designators
		cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "l", cfg.listening_lab )
		cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "eee", cfg.exp )
		cfg.use_output_prefix = replace_char_seq_with_string(
		cfg.use_output_prefix, "l", cfg.listening_lab
		)
		cfg.use_output_prefix = replace_char_seq_with_string(
		cfg.use_output_prefix, "eee", cfg.exp
		)

		# set multiprocessing
		if "multiprocessing" not in cfg.__dict__:
		@@ -178,7 +195,6 @@ def generate_stereo_scene(

		y = audio.ChannelBasedAudio("STEREO")
		for i in range(N_sources):

		# parse parameters from the scene description
		source_file = np.atleast_1d(scene["source"])[i]
		IR_file = np.atleast_1d(scene["IR"])[i]
		@@ -186,10 +202,26 @@ def generate_stereo_scene(
		logger.info(f"Convolving {source_file} with {IR_file}")

		# read source file
		x = audio.fromfile("MONO", os.path.join(cfg.input_path, os.path.dirname(source_file), cfg.use_input_prefix + os.path.basename(source_file)), fs=cfg.fs)
		x = audio.fromfile(
		"MONO",
		os.path.join(
		cfg.input_path,
		os.path.dirname(source_file),
		cfg.use_input_prefix + os.path.basename(source_file),
		),
		fs=cfg.fs,
		)

		# read the IR file
		IR = audio.fromfile("STEREO", os.path.join(cfg.IR_path, os.path.dirname(IR_file), cfg.use_IR_prefix + os.path.basename(IR_file)), fs=cfg.IR_fs)
		IR = audio.fromfile(
		"STEREO",
		os.path.join(
		cfg.IR_path,
		os.path.dirname(IR_file),
		cfg.use_IR_prefix + os.path.basename(IR_file),
		),
		fs=cfg.IR_fs,
		)

		# convolve with stereo IR
		x = reverb_stereo(x, IR)
		@@ -281,7 +313,11 @@ def generate_stereo_scene(

		# write the reverberated audio into output file
		audiofile.write(
		os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.basename(scene["name"])),
		os.path.join(
		cfg.output_path,
		os.path.dirname(scene["name"]),
		cfg.use_output_prefix + os.path.basename(scene["name"]),
		),
		y.audio,
		y.fs,
		)