Loading item_generation_scripts/__init__.py +1 −0 Original line number Diff line number Diff line Loading @@ -91,6 +91,7 @@ def main(args): cfg.output_path, cfg.scenes, logger, fs=cfg.fs ) # copy configuration to output directory Loading item_generation_scripts/config/ISM1_CONFIG.yml +39 −39 Original line number Diff line number Diff line Loading @@ -13,7 +13,7 @@ format: "ISM1" # delete_tmp: true ### Output sampling rate in Hz needed for headerless audio files; default = 48000 # fs: 32000 fs: 48000 ### Any relative paths will be interpreted relative to the working directory the script is called from! ### Usage of absolute paths is recommended. Loading @@ -21,10 +21,10 @@ format: "ISM1" ### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions ### Input path to mono files input_path: "/mnt/c/Work/IVAS/3gpp_forge_gitlab/ivas-processing-scripts/items_mono" input_path: "./items_mono" ### Output path for generated test items and metadata files output_path: "/mnt/c/Work/IVAS/3gpp_forge_gitlab/ivas-processing-scripts/output" output_path: "./output" ### Target loudness in LKFS; default = null (no loudness normalization applied) loudness: -26 Loading @@ -51,7 +51,7 @@ scenes: a1: name: "G1S1.wav" description: "Talker sitting at a table" source: "f2s5a_Talker1.wav" source: "test_single.wav" azimuth: 0 elevation: 0 delay: 0 Loading @@ -59,7 +59,7 @@ scenes: a2: name: "G6S2.wav" description: "Talker sitting at a table" source: "f5s10a_Talker1.wav" source: "test_single.wav" azimuth: 60 elevation: 0 delay: 0 Loading @@ -67,7 +67,7 @@ scenes: a3: name: "G5S3.wav" description: "Talker sitting at a table" source: "f2s5a_Talker1.wav" source: "test_single.wav" azimuth: 120 elevation: 0 delay: 0 Loading @@ -75,7 +75,7 @@ scenes: a4: name: "G4S4.wav" description: "Talker sitting at a table" source: "m4s11b_Talker1.wav" source: "test_single.wav" azimuth: 180 elevation: 0 delay: 0 Loading @@ -83,7 +83,7 @@ scenes: a5: name: "G3S5.wav" description: "Talker sitting at a table" source: "m1s4a_Talker1.wav" source: "test_single.wav" azimuth: 240 elevation: 0 delay: 0 Loading @@ -91,7 +91,7 @@ scenes: a6: name: "G2S6.wav" description: "Talker sitting at a table" source: "f5s10a_Talker1.wav" source: "test_single.wav" azimuth: 300 elevation: 0 delay: 0 Loading @@ -99,7 +99,7 @@ scenes: b1: name: "G2S1.wav" description: "standing talker." source: "f5s10b_Talker1.wav" source: "test_single.wav" azimuth: 120 elevation: 35 delay: 0 Loading @@ -107,7 +107,7 @@ scenes: b2: name: "G1S2.wav" description: "standing talker." source: "f2s1a_Talker1.wav" source: "test_single.wav" azimuth: 180 elevation: 35 delay: 0 Loading @@ -115,7 +115,7 @@ scenes: b3: name: "G6S3.wav" description: "standing talker." source: "f5s10b_Talker1.wav" source: "test_single.wav" azimuth: 240 elevation: 35 delay: 0 Loading @@ -123,7 +123,7 @@ scenes: b4: name: "G5S4.wav" description: "standing talker." source: "f2s1a_Talker1.wav" source: "test_single.wav" azimuth: 300 elevation: 35 delay: 0 Loading @@ -131,7 +131,7 @@ scenes: b5: name: "G4S5.wav" description: "standing talker." source: "m4s11a_Talker1.wav" source: "test_single.wav" azimuth: 0 elevation: 35 delay: 0 Loading @@ -139,7 +139,7 @@ scenes: b6: name: "G3S6.wav" description: "standing talker." source: "m1s2b_Talker1.wav" source: "test_single.wav" azimuth: 60 elevation: 35 delay: 0 Loading @@ -147,7 +147,7 @@ scenes: c1: name: "G3S1.wav" description: "Smaller talker (child) walking around a table." source: "m1s6b_Talker1.wav" source: "test_single.wav" azimuth: "0:1:360" elevation: 0 delay: 0 Loading @@ -155,7 +155,7 @@ scenes: c2: name: "G2S2.wav" description: "Smaller talker (child) walking around a table." source: "f5s14a_Talker1.wav" source: "test_single.wav" azimuth: "60:1:60+360" elevation: 0 delay: 0 Loading @@ -163,7 +163,7 @@ scenes: c3: name: "G1S3.wav" description: "Smaller talker (child) walking around a table." source: "f2s6a_Talker1.wav" source: "test_single.wav" azimuth: "120:1:120+360" elevation: 0 delay: 0 Loading @@ -171,7 +171,7 @@ scenes: c4: name: "G6S4.wav" description: "Smaller talker (child) walking around a table." source: "f5s14a_Talker1.wav" source: "test_single.wav" azimuth: "180:1:180+360" elevation: 0 delay: 0 Loading @@ -179,7 +179,7 @@ scenes: c5: name: "G5S5.wav" description: "Smaller talker (child) walking around a table." source: "f2s6a_Talker1.wav" source: "test_single.wav" azimuth: "240:1:240+360" elevation: 0 delay: 0 Loading @@ -187,7 +187,7 @@ scenes: c6: name: "G4S6.wav" description: "Smaller talker (child) walking around a table." source: "m4s13a_Talker1.wav" source: "test_single.wav" azimuth: "300:1:300+360" elevation: 0 delay: 0 Loading @@ -195,7 +195,7 @@ scenes: d1: name: "G4S1.wav" description: "Talker walking around the table." source: "m4s12b_Talker1.wav" source: "test_single.wav" azimuth: "0:-1:-360" elevation: 35 delay: 0 Loading @@ -203,7 +203,7 @@ scenes: d2: name: "G3S2.wav" description: "Talker walking around the table." source: "m1s12a_Talker1.wav" source: "test_single.wav" azimuth: "60:-1:60-360" elevation: 35 delay: 0 Loading @@ -211,7 +211,7 @@ scenes: d3: name: "G3S2.wav" description: "Talker walking around the table." source: "f5s15b_Talker1.wav" source: "test_single.wav" azimuth: "120:-1:120-360" elevation: 35 delay: 0 Loading @@ -219,7 +219,7 @@ scenes: d4: name: "G1S4.wav" description: "Talker walking around the table." source: "f2s3b_Talker1.wav" source: "test_single.wav" azimuth: "180:-1:180-360" elevation: 35 delay: 0 Loading @@ -227,7 +227,7 @@ scenes: d5: name: "G6S5.wav" description: "Talker walking around the table." source: "f5s15b_Talker1.wav" source: "test_single.wav" azimuth: "240:-1:240-360" elevation: 35 delay: 0 Loading @@ -235,7 +235,7 @@ scenes: d6: name: "G5S6.wav" description: "Talker walking around the table." source: "f2s3b_Talker1.wav" source: "test_single.wav" azimuth: "300:-1:300-360" elevation: 35 delay: 0 Loading @@ -243,7 +243,7 @@ scenes: e1: name: "G5S1.wav" description: "Elevation displacement." source: "f2s4a_Talker1.wav" source: "test_single.wav" azimuth: 240 elevation: "-90:0.5:90" delay: 0 Loading @@ -251,7 +251,7 @@ scenes: e2: name: "G4S2.wav" description: "Elevation displacement." source: "m4s16a_Talker1.wav" source: "test_single.wav" azimuth: 300 elevation: 0 delay: 0 Loading @@ -259,7 +259,7 @@ scenes: e3: name: "G3S3.wav" description: "Elevation displacement." source: "m1s16b_Talker1.wav" source: "test_single.wav" azimuth: 0 elevation: "-90:0.5:90" delay: 0 Loading @@ -267,7 +267,7 @@ scenes: e4: name: "G2S4.wav" description: "Elevation displacement." source: "f5s19a_Talker1.wav" source: "test_single.wav" azimuth: 60 elevation: "-90:0.5:90" delay: 0 Loading @@ -275,7 +275,7 @@ scenes: e5: name: "G1S5.wav" description: "Elevation displacement." source: "f2s4a_Talker1.wav" source: "test_single.wav" azimuth: 120 elevation: "-90:0.5:90" delay: 0 Loading @@ -283,7 +283,7 @@ scenes: e6: name: "G6S6.wav" description: "Elevation displacement." source: "f5s19a_Talker1.wav" source: "test_single.wav" azimuth: 180 elevation: "-90:0.5:90" delay: 0 Loading @@ -291,7 +291,7 @@ scenes: f1: name: "G6S1.wav" description: "Azimuth and elevation displacement." source: "f5s15a_Talker1.wav" source: "test_single.wav" azimuth: "60:0.5:60+180" elevation: "35:-0.2:-35" delay: 0 Loading @@ -299,7 +299,7 @@ scenes: f2: name: "G5S2.wav" description: "Azimuth and elevation displacement." source: "f2s7b_Talker1.wav" source: "test_single.wav" azimuth: "120:0.5:120+180" elevation: "35:-0.2:-35" delay: 0 Loading @@ -307,7 +307,7 @@ scenes: f3: name: "G4S3.wav" description: "Azimuth and elevation displacement." source: "m4s14a_Talker1.wav" source: "test_single.wav" azimuth: "180:0.5:180+180" elevation: "35:-0.2:-35" delay: 0 Loading @@ -315,7 +315,7 @@ scenes: f4: name: "G3S4.wav" description: "Azimuth and elevation displacement." source: "m1s7a_Talker1.wav" source: "test_single.wav" azimuth: "240:0.5:240+180" elevation: "35:-0.2:-35" delay: 0 Loading @@ -323,7 +323,7 @@ scenes: f5: name: "G2S5.wav" description: "Azimuth and elevation displacement." source: "f5s15a_Talker1.wav" source: "test_single.wav" azimuth: "300:0.5:300+180" elevation: "35:-0.2:-35" delay: 0 Loading @@ -331,7 +331,7 @@ scenes: f6: name: "G1S6.wav" description: "Azimuth and elevation displacement." source: "f2s7b_Talker1.wav" source: "test_single.wav" azimuth: "0:0.5:0+180" elevation: "35:-0.2:-35" delay: 0 item_generation_scripts/config/ISM2_CONFIG.yml +3 −3 Original line number Diff line number Diff line Loading @@ -13,7 +13,7 @@ format: "ISM2" # delete_tmp: true ### Output sampling rate in Hz needed for headerless audio files; default = 48000 # fs: 32000 fs: 48000 ### Any relative paths will be interpreted relative to the working directory the script is called from! ### Usage of absolute paths is recommended. Loading @@ -21,10 +21,10 @@ format: "ISM2" ### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions ### Input path to mono files input_path: "/mnt/c/Work/IVAS/3gpp_forge_gitlab/ivas-processing-scripts/items_mono" input_path: "./items_mono" ### Output path for generated test items and metadata files output_path: "/mnt/c/Work/IVAS/3gpp_forge_gitlab/ivas-processing-scripts/output" output_path: "./output" ### Target loudness in LKFS; default = null (no loudness normalization applied) loudness: -26 Loading item_generation_scripts/processing/config.py +1 −4 Original line number Diff line number Diff line Loading @@ -35,10 +35,7 @@ from pathlib import Path import yaml from item_generation_scripts.constants import ( DEFAULT_CONFIG, REQUIRED_KEYS, ) from item_generation_scripts.constants import DEFAULT_CONFIG, REQUIRED_KEYS def merge_dicts(base: dict, other: dict) -> None: Loading item_generation_scripts/processing/process_ism_items.py +4 −9 Original line number Diff line number Diff line Loading @@ -35,6 +35,7 @@ import csv import logging import os from pathlib import Path from typing import Optional import numpy as np Loading @@ -55,6 +56,7 @@ def generate_ism_items( output_path: Path, scenes: dict, logger: logging.Logger, fs: Optional[int] = 48000, ): """Generate ISM items with metadata from mono items based on scene description""" Loading @@ -73,7 +75,6 @@ def generate_ism_items( source_file = np.atleast_1d(scene["source"])[i] source_azi = np.atleast_1d(scene["azimuth"])[i] source_ele = np.atleast_1d(scene["elevation"])[i] # source_type = "speech" # !!!! TBD - support generic audio + background noise and speech in the .yml file source_delay = np.atleast_1d(scene["delay"])[i] logger.info( Loading @@ -81,10 +82,7 @@ def generate_ism_items( ) # read source file # x, fs = audiofile.read(os.path.join(input_path, source_file)) # !!!! TBD - check the support for headerless .raw files # pdb.set_trace() audio_object = audio.fromfile("MONO", os.path.join(input_path, source_file)) audio_object = audio.fromfile("MONO", os.path.join(input_path, source_file), fs=fs) x = audio_object.audio fs = audio_object.fs Loading @@ -93,7 +91,6 @@ def generate_ism_items( # adjust the level of the source file _, scale_factor = get_loudness(audio_object, target_level, "MONO") # print(f"Scaling loudness with factor: {scale_factor}") x *= scale_factor # read azimuth information and create array Loading Loading @@ -156,9 +153,7 @@ def generate_ism_items( ) # additional metadata dist = np.ones( N_frames ) # !!!! TBD - check what to do with these metadata dist = np.ones(N_frames) # !!!! TBD - check what to do with these metadata spread = np.zeros(N_frames) gain = np.ones(N_frames) Loading Loading
item_generation_scripts/__init__.py +1 −0 Original line number Diff line number Diff line Loading @@ -91,6 +91,7 @@ def main(args): cfg.output_path, cfg.scenes, logger, fs=cfg.fs ) # copy configuration to output directory Loading
item_generation_scripts/config/ISM1_CONFIG.yml +39 −39 Original line number Diff line number Diff line Loading @@ -13,7 +13,7 @@ format: "ISM1" # delete_tmp: true ### Output sampling rate in Hz needed for headerless audio files; default = 48000 # fs: 32000 fs: 48000 ### Any relative paths will be interpreted relative to the working directory the script is called from! ### Usage of absolute paths is recommended. Loading @@ -21,10 +21,10 @@ format: "ISM1" ### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions ### Input path to mono files input_path: "/mnt/c/Work/IVAS/3gpp_forge_gitlab/ivas-processing-scripts/items_mono" input_path: "./items_mono" ### Output path for generated test items and metadata files output_path: "/mnt/c/Work/IVAS/3gpp_forge_gitlab/ivas-processing-scripts/output" output_path: "./output" ### Target loudness in LKFS; default = null (no loudness normalization applied) loudness: -26 Loading @@ -51,7 +51,7 @@ scenes: a1: name: "G1S1.wav" description: "Talker sitting at a table" source: "f2s5a_Talker1.wav" source: "test_single.wav" azimuth: 0 elevation: 0 delay: 0 Loading @@ -59,7 +59,7 @@ scenes: a2: name: "G6S2.wav" description: "Talker sitting at a table" source: "f5s10a_Talker1.wav" source: "test_single.wav" azimuth: 60 elevation: 0 delay: 0 Loading @@ -67,7 +67,7 @@ scenes: a3: name: "G5S3.wav" description: "Talker sitting at a table" source: "f2s5a_Talker1.wav" source: "test_single.wav" azimuth: 120 elevation: 0 delay: 0 Loading @@ -75,7 +75,7 @@ scenes: a4: name: "G4S4.wav" description: "Talker sitting at a table" source: "m4s11b_Talker1.wav" source: "test_single.wav" azimuth: 180 elevation: 0 delay: 0 Loading @@ -83,7 +83,7 @@ scenes: a5: name: "G3S5.wav" description: "Talker sitting at a table" source: "m1s4a_Talker1.wav" source: "test_single.wav" azimuth: 240 elevation: 0 delay: 0 Loading @@ -91,7 +91,7 @@ scenes: a6: name: "G2S6.wav" description: "Talker sitting at a table" source: "f5s10a_Talker1.wav" source: "test_single.wav" azimuth: 300 elevation: 0 delay: 0 Loading @@ -99,7 +99,7 @@ scenes: b1: name: "G2S1.wav" description: "standing talker." source: "f5s10b_Talker1.wav" source: "test_single.wav" azimuth: 120 elevation: 35 delay: 0 Loading @@ -107,7 +107,7 @@ scenes: b2: name: "G1S2.wav" description: "standing talker." source: "f2s1a_Talker1.wav" source: "test_single.wav" azimuth: 180 elevation: 35 delay: 0 Loading @@ -115,7 +115,7 @@ scenes: b3: name: "G6S3.wav" description: "standing talker." source: "f5s10b_Talker1.wav" source: "test_single.wav" azimuth: 240 elevation: 35 delay: 0 Loading @@ -123,7 +123,7 @@ scenes: b4: name: "G5S4.wav" description: "standing talker." source: "f2s1a_Talker1.wav" source: "test_single.wav" azimuth: 300 elevation: 35 delay: 0 Loading @@ -131,7 +131,7 @@ scenes: b5: name: "G4S5.wav" description: "standing talker." source: "m4s11a_Talker1.wav" source: "test_single.wav" azimuth: 0 elevation: 35 delay: 0 Loading @@ -139,7 +139,7 @@ scenes: b6: name: "G3S6.wav" description: "standing talker." source: "m1s2b_Talker1.wav" source: "test_single.wav" azimuth: 60 elevation: 35 delay: 0 Loading @@ -147,7 +147,7 @@ scenes: c1: name: "G3S1.wav" description: "Smaller talker (child) walking around a table." source: "m1s6b_Talker1.wav" source: "test_single.wav" azimuth: "0:1:360" elevation: 0 delay: 0 Loading @@ -155,7 +155,7 @@ scenes: c2: name: "G2S2.wav" description: "Smaller talker (child) walking around a table." source: "f5s14a_Talker1.wav" source: "test_single.wav" azimuth: "60:1:60+360" elevation: 0 delay: 0 Loading @@ -163,7 +163,7 @@ scenes: c3: name: "G1S3.wav" description: "Smaller talker (child) walking around a table." source: "f2s6a_Talker1.wav" source: "test_single.wav" azimuth: "120:1:120+360" elevation: 0 delay: 0 Loading @@ -171,7 +171,7 @@ scenes: c4: name: "G6S4.wav" description: "Smaller talker (child) walking around a table." source: "f5s14a_Talker1.wav" source: "test_single.wav" azimuth: "180:1:180+360" elevation: 0 delay: 0 Loading @@ -179,7 +179,7 @@ scenes: c5: name: "G5S5.wav" description: "Smaller talker (child) walking around a table." source: "f2s6a_Talker1.wav" source: "test_single.wav" azimuth: "240:1:240+360" elevation: 0 delay: 0 Loading @@ -187,7 +187,7 @@ scenes: c6: name: "G4S6.wav" description: "Smaller talker (child) walking around a table." source: "m4s13a_Talker1.wav" source: "test_single.wav" azimuth: "300:1:300+360" elevation: 0 delay: 0 Loading @@ -195,7 +195,7 @@ scenes: d1: name: "G4S1.wav" description: "Talker walking around the table." source: "m4s12b_Talker1.wav" source: "test_single.wav" azimuth: "0:-1:-360" elevation: 35 delay: 0 Loading @@ -203,7 +203,7 @@ scenes: d2: name: "G3S2.wav" description: "Talker walking around the table." source: "m1s12a_Talker1.wav" source: "test_single.wav" azimuth: "60:-1:60-360" elevation: 35 delay: 0 Loading @@ -211,7 +211,7 @@ scenes: d3: name: "G3S2.wav" description: "Talker walking around the table." source: "f5s15b_Talker1.wav" source: "test_single.wav" azimuth: "120:-1:120-360" elevation: 35 delay: 0 Loading @@ -219,7 +219,7 @@ scenes: d4: name: "G1S4.wav" description: "Talker walking around the table." source: "f2s3b_Talker1.wav" source: "test_single.wav" azimuth: "180:-1:180-360" elevation: 35 delay: 0 Loading @@ -227,7 +227,7 @@ scenes: d5: name: "G6S5.wav" description: "Talker walking around the table." source: "f5s15b_Talker1.wav" source: "test_single.wav" azimuth: "240:-1:240-360" elevation: 35 delay: 0 Loading @@ -235,7 +235,7 @@ scenes: d6: name: "G5S6.wav" description: "Talker walking around the table." source: "f2s3b_Talker1.wav" source: "test_single.wav" azimuth: "300:-1:300-360" elevation: 35 delay: 0 Loading @@ -243,7 +243,7 @@ scenes: e1: name: "G5S1.wav" description: "Elevation displacement." source: "f2s4a_Talker1.wav" source: "test_single.wav" azimuth: 240 elevation: "-90:0.5:90" delay: 0 Loading @@ -251,7 +251,7 @@ scenes: e2: name: "G4S2.wav" description: "Elevation displacement." source: "m4s16a_Talker1.wav" source: "test_single.wav" azimuth: 300 elevation: 0 delay: 0 Loading @@ -259,7 +259,7 @@ scenes: e3: name: "G3S3.wav" description: "Elevation displacement." source: "m1s16b_Talker1.wav" source: "test_single.wav" azimuth: 0 elevation: "-90:0.5:90" delay: 0 Loading @@ -267,7 +267,7 @@ scenes: e4: name: "G2S4.wav" description: "Elevation displacement." source: "f5s19a_Talker1.wav" source: "test_single.wav" azimuth: 60 elevation: "-90:0.5:90" delay: 0 Loading @@ -275,7 +275,7 @@ scenes: e5: name: "G1S5.wav" description: "Elevation displacement." source: "f2s4a_Talker1.wav" source: "test_single.wav" azimuth: 120 elevation: "-90:0.5:90" delay: 0 Loading @@ -283,7 +283,7 @@ scenes: e6: name: "G6S6.wav" description: "Elevation displacement." source: "f5s19a_Talker1.wav" source: "test_single.wav" azimuth: 180 elevation: "-90:0.5:90" delay: 0 Loading @@ -291,7 +291,7 @@ scenes: f1: name: "G6S1.wav" description: "Azimuth and elevation displacement." source: "f5s15a_Talker1.wav" source: "test_single.wav" azimuth: "60:0.5:60+180" elevation: "35:-0.2:-35" delay: 0 Loading @@ -299,7 +299,7 @@ scenes: f2: name: "G5S2.wav" description: "Azimuth and elevation displacement." source: "f2s7b_Talker1.wav" source: "test_single.wav" azimuth: "120:0.5:120+180" elevation: "35:-0.2:-35" delay: 0 Loading @@ -307,7 +307,7 @@ scenes: f3: name: "G4S3.wav" description: "Azimuth and elevation displacement." source: "m4s14a_Talker1.wav" source: "test_single.wav" azimuth: "180:0.5:180+180" elevation: "35:-0.2:-35" delay: 0 Loading @@ -315,7 +315,7 @@ scenes: f4: name: "G3S4.wav" description: "Azimuth and elevation displacement." source: "m1s7a_Talker1.wav" source: "test_single.wav" azimuth: "240:0.5:240+180" elevation: "35:-0.2:-35" delay: 0 Loading @@ -323,7 +323,7 @@ scenes: f5: name: "G2S5.wav" description: "Azimuth and elevation displacement." source: "f5s15a_Talker1.wav" source: "test_single.wav" azimuth: "300:0.5:300+180" elevation: "35:-0.2:-35" delay: 0 Loading @@ -331,7 +331,7 @@ scenes: f6: name: "G1S6.wav" description: "Azimuth and elevation displacement." source: "f2s7b_Talker1.wav" source: "test_single.wav" azimuth: "0:0.5:0+180" elevation: "35:-0.2:-35" delay: 0
item_generation_scripts/config/ISM2_CONFIG.yml +3 −3 Original line number Diff line number Diff line Loading @@ -13,7 +13,7 @@ format: "ISM2" # delete_tmp: true ### Output sampling rate in Hz needed for headerless audio files; default = 48000 # fs: 32000 fs: 48000 ### Any relative paths will be interpreted relative to the working directory the script is called from! ### Usage of absolute paths is recommended. Loading @@ -21,10 +21,10 @@ format: "ISM2" ### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions ### Input path to mono files input_path: "/mnt/c/Work/IVAS/3gpp_forge_gitlab/ivas-processing-scripts/items_mono" input_path: "./items_mono" ### Output path for generated test items and metadata files output_path: "/mnt/c/Work/IVAS/3gpp_forge_gitlab/ivas-processing-scripts/output" output_path: "./output" ### Target loudness in LKFS; default = null (no loudness normalization applied) loudness: -26 Loading
item_generation_scripts/processing/config.py +1 −4 Original line number Diff line number Diff line Loading @@ -35,10 +35,7 @@ from pathlib import Path import yaml from item_generation_scripts.constants import ( DEFAULT_CONFIG, REQUIRED_KEYS, ) from item_generation_scripts.constants import DEFAULT_CONFIG, REQUIRED_KEYS def merge_dicts(base: dict, other: dict) -> None: Loading
item_generation_scripts/processing/process_ism_items.py +4 −9 Original line number Diff line number Diff line Loading @@ -35,6 +35,7 @@ import csv import logging import os from pathlib import Path from typing import Optional import numpy as np Loading @@ -55,6 +56,7 @@ def generate_ism_items( output_path: Path, scenes: dict, logger: logging.Logger, fs: Optional[int] = 48000, ): """Generate ISM items with metadata from mono items based on scene description""" Loading @@ -73,7 +75,6 @@ def generate_ism_items( source_file = np.atleast_1d(scene["source"])[i] source_azi = np.atleast_1d(scene["azimuth"])[i] source_ele = np.atleast_1d(scene["elevation"])[i] # source_type = "speech" # !!!! TBD - support generic audio + background noise and speech in the .yml file source_delay = np.atleast_1d(scene["delay"])[i] logger.info( Loading @@ -81,10 +82,7 @@ def generate_ism_items( ) # read source file # x, fs = audiofile.read(os.path.join(input_path, source_file)) # !!!! TBD - check the support for headerless .raw files # pdb.set_trace() audio_object = audio.fromfile("MONO", os.path.join(input_path, source_file)) audio_object = audio.fromfile("MONO", os.path.join(input_path, source_file), fs=fs) x = audio_object.audio fs = audio_object.fs Loading @@ -93,7 +91,6 @@ def generate_ism_items( # adjust the level of the source file _, scale_factor = get_loudness(audio_object, target_level, "MONO") # print(f"Scaling loudness with factor: {scale_factor}") x *= scale_factor # read azimuth information and create array Loading Loading @@ -156,9 +153,7 @@ def generate_ism_items( ) # additional metadata dist = np.ones( N_frames ) # !!!! TBD - check what to do with these metadata dist = np.ones(N_frames) # !!!! TBD - check what to do with these metadata spread = np.zeros(N_frames) gain = np.ones(N_frames) Loading