Loading item_generation_scripts/__init__.py +4 −5 Original line number Diff line number Diff line Loading @@ -30,11 +30,12 @@ # the United Nations Convention on Contracts on the International Sales of Goods. # import os import logging import os import pdb from itertools import repeat import yaml import pdb from item_generation_scripts.constants import ( LOGGER_DATEFMT, Loading @@ -42,7 +43,6 @@ from item_generation_scripts.constants import ( LOGGER_SUFFIX, ) from item_generation_scripts.processing import config, process_ism_items from item_generation_scripts.processing import config from item_generation_scripts.utils import create_dir Loading Loading @@ -73,7 +73,6 @@ def logging_init(args, cfg): def main(args): # parse configuration cfg = config.TestConfig(args.config) Loading @@ -93,7 +92,7 @@ def main(args): cfg.input_path, cfg.output_path, cfg.scenes, logger logger, ) # copy configuration to output directory Loading item_generation_scripts/processing/config.py +1 −2 Original line number Diff line number Diff line Loading @@ -38,7 +38,7 @@ import yaml from item_generation_scripts.constants import ( DEFAULT_CONFIG, DEFAULT_CONFIG_ISM2, REQUIRED_KEYS REQUIRED_KEYS, ) Loading Loading @@ -127,4 +127,3 @@ class TestConfig: # Report missing keys to the user if MISSING_KEYS: raise KeyError(f"The following key(s) must be specified : {MISSING_KEYS}") item_generation_scripts/processing/process_ism_items.py +95 −72 Original line number Diff line number Diff line Loading @@ -31,25 +31,25 @@ # import csv import logging import os import sys import shutil import numpy as np import logging import csv import subprocess as sp import sys from pathlib import Path import numpy as np from item_generation_scripts.audiotools import ( audio, audioarray, audiofile, binauralobjectrenderer, metadata metadata, ) from item_generation_scripts.audiotools.wrappers.bs1770 import get_loudness from item_generation_scripts.audiotools import audio # function for converting nd numpy array to strings with 2 decimal digits def csv_formatdata(data): Loading @@ -63,9 +63,8 @@ def generate_ism_items( input_path: Path, output_path: Path, scenes: dict, logger: logging.Logger logger: logging.Logger, ): """Generate ISM items with metadata from mono items based on scene description""" # get the number of scenes Loading @@ -75,26 +74,26 @@ def generate_ism_items( logger.info(f"Processing {scene_name} out of {N_scenes} scenes") # extract the number of audio sources N_sources = len(np.atleast_1d(scene['source'])) N_sources = len(np.atleast_1d(scene["source"])) y = None y_meta = None for i in range(N_sources): source_file = np.atleast_1d(scene['source'])[i] source_azi = np.atleast_1d(scene['azimuth'])[i] source_ele = np.atleast_1d(scene['elevation'])[i] source_type = 'speech' #### !!!! TBD - support generic audio + background noise and speech in the .yml file source_delay = np.atleast_1d(scene['delay'])[i] logger.info(f"Encoding {source_file} at position(s) {source_azi},{source_ele}") source_file = np.atleast_1d(scene["source"])[i] source_azi = np.atleast_1d(scene["azimuth"])[i] source_ele = np.atleast_1d(scene["elevation"])[i] source_type = "speech" #### !!!! TBD - support generic audio + background noise and speech in the .yml file source_delay = np.atleast_1d(scene["delay"])[i] logger.info( f"Encoding {source_file} at position(s) {source_azi},{source_ele}" ) # read source file # x, fs = audiofile.read(os.path.join(input_path, source_file)) #### !!!! TBD - check the support for headerless .raw files # pdb.set_trace() audio_object = audio.fromfile("MONO", os.path.join(input_path, source_file)) x = audio_object.audio fs = audio_object.fs Loading @@ -108,9 +107,13 @@ def generate_ism_items( # read azimuth information and create array if isinstance(source_azi, str): if ':' in source_azi: source_azi = source_azi.split(':') azi = np.arange(float(eval(source_azi[0])), float(eval(source_azi[2])), float(eval(source_azi[1]))) if ":" in source_azi: source_azi = source_azi.split(":") azi = np.arange( float(eval(source_azi[0])), float(eval(source_azi[2])), float(eval(source_azi[1])), ) else: azi = np.array(float(eval(source_azi)), ndmin=1)[:N_frames] else: Loading @@ -129,13 +132,19 @@ def generate_ism_items( # check if azimuth is from -180 .. +180 if any(azi > 180) or any(azi < -180): logger.error(f"Incorrect value(s) of azimuth: {azi[(azi > 180) | (azi < -180)]}") logger.error( f"Incorrect value(s) of azimuth: {azi[(azi > 180) | (azi < -180)]}" ) # read elevation information and create array if isinstance(source_ele, str): if ':' in source_ele: source_ele = source_ele.split(':') ele = np.arange(float(eval(source_ele[0])), float(eval(source_ele[2])), float(eval(source_ele[1]))) if ":" in source_ele: source_ele = source_ele.split(":") ele = np.arange( float(eval(source_ele[0])), float(eval(source_ele[2])), float(eval(source_ele[1])), ) else: ele = np.array(float(eval(source_ele)), ndmin=1)[:N_frames] else: Loading @@ -151,10 +160,14 @@ def generate_ism_items( # check if elevation is from -90 .. +90 if any(ele > 90) or any(ele < -90): logger.error(f"Incorrect value(s) of elevation: {ele[(ele > 90) | (ele < -90)]}") logger.error( f"Incorrect value(s) of elevation: {ele[(ele > 90) | (ele < -90)]}" ) # additional metadata dist = np.ones(N_frames) #### !!!! TBD - check what to do with these metadata dist = np.ones( N_frames ) #### !!!! TBD - check what to do with these metadata spread = np.zeros(N_frames) gain = np.ones(N_frames) Loading @@ -167,7 +180,9 @@ def generate_ism_items( x = np.concatenate([pre, x]) # apply delay to metadata as well pre = np.tile([0.00,0.00,1.00,0.00,1.00], (int(source_delay * 50), 1)) pre = np.tile( [0.00, 0.00, 1.00, 0.00, 1.00], (int(source_delay * 50), 1) ) # pre = np.zeros((int(source_delay * 50), x_meta.shape[1])) x_meta = np.concatenate([pre, x_meta]) Loading @@ -194,26 +209,34 @@ def generate_ism_items( if x_meta.shape[1] > y_meta.shape[1]: N_delta = x_meta.shape[1] - y_meta.shape[1] y_meta = y_meta.reshape(y_meta.shape[1], -1) # reshape to 2d array y_meta = np.vstack((y_meta, np.tile(y_meta[-1,:], (N_delta, 1)))) # repeat last row N_delta times and append to the array y_meta = y_meta.reshape(N_srcs, -1, N_meta_features) # reshape back to 3d array y_meta = np.vstack( (y_meta, np.tile(y_meta[-1, :], (N_delta, 1))) ) # repeat last row N_delta times and append to the array y_meta = y_meta.reshape( N_srcs, -1, N_meta_features ) # reshape back to 3d array elif y_meta.shape[1] > x_meta.shape[1]: N_delta = y_meta.shape[1] - x_meta.shape[1] x_meta = x_meta.reshape(x_meta.shape[1], -1) # reshape to 2d array x_meta = np.vstack((x_meta, np.tile(x_meta[-1,:], (N_delta, 1)))) # repeat last row N_delta times and append to the array x_meta = np.vstack( (x_meta, np.tile(x_meta[-1, :], (N_delta, 1))) ) # repeat last row N_delta times and append to the array x_meta = np.expand_dims(x_meta, axis=0) # reshape back to 3d array y_meta = np.concatenate([y_meta, x_meta]) # write individual ISM audio streams to the output file in an interleaved format output_filename = scene['name'] audiofile.write(os.path.join(output_path, output_filename), y, fs) ### !!!! replace all os.path.xxx operations with the Path object output_filename = scene["name"] audiofile.write( os.path.join(output_path, output_filename), y, fs ) ### !!!! replace all os.path.xxx operations with the Path object # write individual ISM metadata to output files in .csv format for i in range(N_sources): # generate .csv filename (should end with .0.csv, .1.csv, ...) csv_filename = os.path.normpath(f"{output_filename}.{i}.csv") with open(os.path.join(output_path, csv_filename), 'w') as f: with open(os.path.join(output_path, csv_filename), "w") as f: # create csv writer writer = csv.writer(f) Loading item_generation_scripts/constants.py +3 −3 File changed.Contains only whitespace changes. Show changes Loading
item_generation_scripts/__init__.py +4 −5 Original line number Diff line number Diff line Loading @@ -30,11 +30,12 @@ # the United Nations Convention on Contracts on the International Sales of Goods. # import os import logging import os import pdb from itertools import repeat import yaml import pdb from item_generation_scripts.constants import ( LOGGER_DATEFMT, Loading @@ -42,7 +43,6 @@ from item_generation_scripts.constants import ( LOGGER_SUFFIX, ) from item_generation_scripts.processing import config, process_ism_items from item_generation_scripts.processing import config from item_generation_scripts.utils import create_dir Loading Loading @@ -73,7 +73,6 @@ def logging_init(args, cfg): def main(args): # parse configuration cfg = config.TestConfig(args.config) Loading @@ -93,7 +92,7 @@ def main(args): cfg.input_path, cfg.output_path, cfg.scenes, logger logger, ) # copy configuration to output directory Loading
item_generation_scripts/processing/config.py +1 −2 Original line number Diff line number Diff line Loading @@ -38,7 +38,7 @@ import yaml from item_generation_scripts.constants import ( DEFAULT_CONFIG, DEFAULT_CONFIG_ISM2, REQUIRED_KEYS REQUIRED_KEYS, ) Loading Loading @@ -127,4 +127,3 @@ class TestConfig: # Report missing keys to the user if MISSING_KEYS: raise KeyError(f"The following key(s) must be specified : {MISSING_KEYS}")
item_generation_scripts/processing/process_ism_items.py +95 −72 Original line number Diff line number Diff line Loading @@ -31,25 +31,25 @@ # import csv import logging import os import sys import shutil import numpy as np import logging import csv import subprocess as sp import sys from pathlib import Path import numpy as np from item_generation_scripts.audiotools import ( audio, audioarray, audiofile, binauralobjectrenderer, metadata metadata, ) from item_generation_scripts.audiotools.wrappers.bs1770 import get_loudness from item_generation_scripts.audiotools import audio # function for converting nd numpy array to strings with 2 decimal digits def csv_formatdata(data): Loading @@ -63,9 +63,8 @@ def generate_ism_items( input_path: Path, output_path: Path, scenes: dict, logger: logging.Logger logger: logging.Logger, ): """Generate ISM items with metadata from mono items based on scene description""" # get the number of scenes Loading @@ -75,26 +74,26 @@ def generate_ism_items( logger.info(f"Processing {scene_name} out of {N_scenes} scenes") # extract the number of audio sources N_sources = len(np.atleast_1d(scene['source'])) N_sources = len(np.atleast_1d(scene["source"])) y = None y_meta = None for i in range(N_sources): source_file = np.atleast_1d(scene['source'])[i] source_azi = np.atleast_1d(scene['azimuth'])[i] source_ele = np.atleast_1d(scene['elevation'])[i] source_type = 'speech' #### !!!! TBD - support generic audio + background noise and speech in the .yml file source_delay = np.atleast_1d(scene['delay'])[i] logger.info(f"Encoding {source_file} at position(s) {source_azi},{source_ele}") source_file = np.atleast_1d(scene["source"])[i] source_azi = np.atleast_1d(scene["azimuth"])[i] source_ele = np.atleast_1d(scene["elevation"])[i] source_type = "speech" #### !!!! TBD - support generic audio + background noise and speech in the .yml file source_delay = np.atleast_1d(scene["delay"])[i] logger.info( f"Encoding {source_file} at position(s) {source_azi},{source_ele}" ) # read source file # x, fs = audiofile.read(os.path.join(input_path, source_file)) #### !!!! TBD - check the support for headerless .raw files # pdb.set_trace() audio_object = audio.fromfile("MONO", os.path.join(input_path, source_file)) x = audio_object.audio fs = audio_object.fs Loading @@ -108,9 +107,13 @@ def generate_ism_items( # read azimuth information and create array if isinstance(source_azi, str): if ':' in source_azi: source_azi = source_azi.split(':') azi = np.arange(float(eval(source_azi[0])), float(eval(source_azi[2])), float(eval(source_azi[1]))) if ":" in source_azi: source_azi = source_azi.split(":") azi = np.arange( float(eval(source_azi[0])), float(eval(source_azi[2])), float(eval(source_azi[1])), ) else: azi = np.array(float(eval(source_azi)), ndmin=1)[:N_frames] else: Loading @@ -129,13 +132,19 @@ def generate_ism_items( # check if azimuth is from -180 .. +180 if any(azi > 180) or any(azi < -180): logger.error(f"Incorrect value(s) of azimuth: {azi[(azi > 180) | (azi < -180)]}") logger.error( f"Incorrect value(s) of azimuth: {azi[(azi > 180) | (azi < -180)]}" ) # read elevation information and create array if isinstance(source_ele, str): if ':' in source_ele: source_ele = source_ele.split(':') ele = np.arange(float(eval(source_ele[0])), float(eval(source_ele[2])), float(eval(source_ele[1]))) if ":" in source_ele: source_ele = source_ele.split(":") ele = np.arange( float(eval(source_ele[0])), float(eval(source_ele[2])), float(eval(source_ele[1])), ) else: ele = np.array(float(eval(source_ele)), ndmin=1)[:N_frames] else: Loading @@ -151,10 +160,14 @@ def generate_ism_items( # check if elevation is from -90 .. +90 if any(ele > 90) or any(ele < -90): logger.error(f"Incorrect value(s) of elevation: {ele[(ele > 90) | (ele < -90)]}") logger.error( f"Incorrect value(s) of elevation: {ele[(ele > 90) | (ele < -90)]}" ) # additional metadata dist = np.ones(N_frames) #### !!!! TBD - check what to do with these metadata dist = np.ones( N_frames ) #### !!!! TBD - check what to do with these metadata spread = np.zeros(N_frames) gain = np.ones(N_frames) Loading @@ -167,7 +180,9 @@ def generate_ism_items( x = np.concatenate([pre, x]) # apply delay to metadata as well pre = np.tile([0.00,0.00,1.00,0.00,1.00], (int(source_delay * 50), 1)) pre = np.tile( [0.00, 0.00, 1.00, 0.00, 1.00], (int(source_delay * 50), 1) ) # pre = np.zeros((int(source_delay * 50), x_meta.shape[1])) x_meta = np.concatenate([pre, x_meta]) Loading @@ -194,26 +209,34 @@ def generate_ism_items( if x_meta.shape[1] > y_meta.shape[1]: N_delta = x_meta.shape[1] - y_meta.shape[1] y_meta = y_meta.reshape(y_meta.shape[1], -1) # reshape to 2d array y_meta = np.vstack((y_meta, np.tile(y_meta[-1,:], (N_delta, 1)))) # repeat last row N_delta times and append to the array y_meta = y_meta.reshape(N_srcs, -1, N_meta_features) # reshape back to 3d array y_meta = np.vstack( (y_meta, np.tile(y_meta[-1, :], (N_delta, 1))) ) # repeat last row N_delta times and append to the array y_meta = y_meta.reshape( N_srcs, -1, N_meta_features ) # reshape back to 3d array elif y_meta.shape[1] > x_meta.shape[1]: N_delta = y_meta.shape[1] - x_meta.shape[1] x_meta = x_meta.reshape(x_meta.shape[1], -1) # reshape to 2d array x_meta = np.vstack((x_meta, np.tile(x_meta[-1,:], (N_delta, 1)))) # repeat last row N_delta times and append to the array x_meta = np.vstack( (x_meta, np.tile(x_meta[-1, :], (N_delta, 1))) ) # repeat last row N_delta times and append to the array x_meta = np.expand_dims(x_meta, axis=0) # reshape back to 3d array y_meta = np.concatenate([y_meta, x_meta]) # write individual ISM audio streams to the output file in an interleaved format output_filename = scene['name'] audiofile.write(os.path.join(output_path, output_filename), y, fs) ### !!!! replace all os.path.xxx operations with the Path object output_filename = scene["name"] audiofile.write( os.path.join(output_path, output_filename), y, fs ) ### !!!! replace all os.path.xxx operations with the Path object # write individual ISM metadata to output files in .csv format for i in range(N_sources): # generate .csv filename (should end with .0.csv, .1.csv, ...) csv_filename = os.path.normpath(f"{output_filename}.{i}.csv") with open(os.path.join(output_path, csv_filename), 'w') as f: with open(os.path.join(output_path, csv_filename), "w") as f: # create csv writer writer = csv.writer(f) Loading
item_generation_scripts/constants.py +3 −3 File changed.Contains only whitespace changes. Show changes