Loading item_generation_scripts/config/ISM1_CONFIG.yml +1 −1 Original line number Diff line number Diff line Loading @@ -189,7 +189,7 @@ scenes: elevation: 35 d3: name: "G3S2.wav" name: "G2S3.wav" description: "Talker walking around the table." source: "test_single.wav" azimuth: "120:-1:120-360" Loading item_generation_scripts/config/ISM2_CONFIG.yml +36 −36 Original line number Diff line number Diff line Loading @@ -55,7 +55,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [0, 50] elevation: [0, 0] delay: [0, 1] delay: [0, 0] a2: name: "G6S2.wav" Loading @@ -63,7 +63,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [50, 350] elevation: [0, 0] delay: [0, 1] delay: [0, 0] a3: name: "G5S3.wav" Loading @@ -71,7 +71,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [40, 290] elevation: [0, 0] delay: [0, 1] delay: [0, 0] a4: name: "G4S4.wav" Loading @@ -79,7 +79,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [30, 230] elevation: [15, 15] delay: [0, 1] delay: [0, 0] a5: name: "G3S5.wav" Loading @@ -87,7 +87,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [20, 170] elevation: [15, 15] delay: [0, 1] delay: [0, 0] a6: name: "G2S6.wav" Loading @@ -95,7 +95,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [10, 110] elevation: [15, 15] delay: [0, 1] delay: [0, 0] b1: name: "G2S1.wav" Loading @@ -103,7 +103,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [20, 170] elevation: [30, 30] delay: [0, 1] delay: [0, 1.5] b2: name: "G1S2.wav" Loading @@ -111,7 +111,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [10, 110] elevation: [30, 30] delay: [0, 1] delay: [0, 1.5] b3: name: "G6S3.wav" Loading @@ -119,7 +119,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [0, 50] elevation: [30, 30] delay: [0, 1] delay: [0, 1.5] b4: name: "G5S4.wav" Loading @@ -127,7 +127,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [50, 350] elevation: [60, 60] delay: [0, 1] delay: [0, 1.5] b5: name: "G4S5.wav" Loading @@ -135,7 +135,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [40, 290] elevation: [60, 60] delay: [0, 1] delay: [0, 1.5] b6: name: "G3S6.wav" Loading @@ -143,7 +143,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [30, 230] elevation: [60, 60] delay: [0, 1] delay: [0, 1.5] c1: name: "G3S1.wav" Loading @@ -151,7 +151,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [40, 290] elevation: [0, 60] delay: [0, 1] delay: [0, 0] c2: name: "G2S2.wav" Loading @@ -159,7 +159,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [30, 230] elevation: [0, 60] delay: [0, 1] delay: [0, 0] c3: name: "G1S3.wav" Loading @@ -167,7 +167,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [20, 170] elevation: [0, 60] delay: [0, 1] delay: [0, 0] c4: name: "G6S4.wav" Loading @@ -183,7 +183,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [0, 50] elevation: [0, 60] delay: [0, 1] delay: [0, 0] c6: name: "G4S6.wav" Loading @@ -191,7 +191,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [50, 350] elevation: [0, 60] delay: [0, 1] delay: [0, 0] d1: name: "G4S1.wav" Loading @@ -199,7 +199,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [50, "180:1:120 + 360"] elevation: [0, 60] delay: [0, 1] delay: [0, 1.5] d2: name: "G3S2.wav" Loading @@ -207,15 +207,15 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [300, "-70:-1:-10 - 360"] elevation: [0, 60] delay: [0, 1] delay: [0, 1.5] d3: name: "G3S2.wav" name: "G2S3.wav" description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." source: ["test_double.wav", "test_double.wav"] azimuth: [250, "-20:-1:-320"] elevation: [0, 60] delay: [0, 1] delay: [0, 1.5] d4: name: "G1S4.wav" Loading @@ -223,7 +223,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [200, "30:-1:-270"] elevation: [0, 60] delay: [0, 1] delay: [0, 1.5] d5: name: "G6S5.wav" Loading @@ -231,7 +231,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [150, "80:1:20 + 360"] elevation: [0, 60] delay: [0, 1] delay: [0, 1.5] d6: name: "G5S6.wav" Loading @@ -239,7 +239,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [100, "130:1:70 + 360"] elevation: [0, 60] delay: [0, 1] delay: [0, 1.5] e1: name: "G5S1.wav" Loading @@ -247,7 +247,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: ["80:1:20 + 360", "80:1:20 + 360"] elevation: [10, 60] delay: [0, 1] delay: [0, 1.5] e2: name: "G4S2.wav" Loading @@ -255,7 +255,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: ["130:1:70 + 360", "130:1:70 + 360"] elevation: [10, 60] delay: [0, 1] delay: [0, 1.5] e3: name: "G3S3.wav" Loading @@ -263,7 +263,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: ["180:1:120 + 360", "180:1:120 + 360"] elevation: [10, 60] delay: [0, 1] delay: [0, 1.5] e4: name: "G2S4.wav" Loading @@ -271,7 +271,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: ["-70:-1:-10 - 360", "-70:-1:-10 - 360"] elevation: [10, 60] delay: [0, 1] delay: [0, 1.5] e5: name: "G1S5.wav" Loading @@ -279,7 +279,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: ["-20:-1:-320", "-20:-1:-320"] elevation: [10, 60] delay: [0, 1] delay: [0, 1.5] e6: name: "G6S6.wav" Loading @@ -287,7 +287,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: ["30:-1:-270", "30:-1:-270"] elevation: [10, 60] delay: [0, 1] delay: [0, 1.5] f1: name: "G6S1.wav" Loading @@ -295,7 +295,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: ["60:1:0 + 360", "60:-1:120 - 360"] elevation: [20, 50] delay: [0, 1] delay: [0, 0] f2: name: "G5S2.wav" Loading @@ -303,7 +303,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: ["0:1:300", "0:-1:60 - 360"] elevation: [20, 50] delay: [0, 1] delay: [0, 0] f3: name: "G4S3.wav" Loading @@ -311,7 +311,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: ["300:1:240 + 360", "300:-1:0"] elevation: [20, 50] delay: [0, 1] delay: [0, 0] f4: name: "G3S4.wav" Loading @@ -319,7 +319,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: ["240:1:180 + 360", "240:-1:-60"] elevation: [20, 50] delay: [0, 1] delay: [0, 0] f5: name: "G2S5.wav" Loading @@ -327,7 +327,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: ["180:1:120 + 360", "180:-1:-120"] elevation: [20, 50] delay: [0, 1] delay: [0, 0] f6: name: "G1S6.wav" Loading @@ -335,5 +335,5 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: ["120:1:60 + 360", "120:-1:180 - 360"] elevation: [20, 50] delay: [0, 1] delay: [0, 0] No newline at end of file item_generation_scripts/config/STEREO_CONFIG.yml 0 → 100644 +306 −0 Original line number Diff line number Diff line --- ################################################ # General configuration ################################################ ### Output format format: "STEREO" ### Date; default = YYYYMMDD_HH.MM.SS # date: 2023.06.30 ### Deletion of temporary directories containing intermediate processing files, bitstreams etc.; default = false # delete_tmp: true ### Output sampling rate in Hz needed for headerless audio files; default = 48000 fs: 48000 ### Any relative paths will be interpreted relative to the working directory the script is called from! ### Usage of absolute paths is recommended. ### Do not use file names with dots "." in them! This is not supported, use "_" instead ### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions ### Input path to mono files input_path: "./items_mono" ### Input path to stereo impulse response files input_path_IR: "./IR" ### Output path for generated test items and metadata files output_path: "./output" ### Target loudness in LKFS; default = null (no loudness normalization applied) loudness: -26 ################################################ ### Scene description ################################################ ### Each scene must start with the sceneN tag ### Specify the mono source filename (the program will search for it in the input_path folder) ### Specify azimuth and elevation for each input source ### Specify the delay in seconds for each input source ### Note 1: use [val1, val2, ...] for multiple sources in a scene ### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames ### Note 3: we're using right-handed coordinate system with azi = 0 pointing from the nose to the screen ### azimuth: float, [-180,180]; positive indicates left ### elevation: float, [-90,90]; positive indicates up ### distance: float, tbd: default: 1 ### spread: float, [0,360]; spread in angles from 0 ... 360˚ ### gain: float, [0,1] scenes: a1: name: "G1S1.wav" description: "Large anechoic room with AB microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["LAABP01.L.IR32", "LAABP01.R.IR32"] delay: [0, 0] a2: name: "G6S2.wav" description: "Large anechoic room with AB microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["LAABP02.L.IR32", "LAABP02.R.IR32"] delay: [0, 0] a3: name: "G5S3.wav" description: "Large anechoic room with AB microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["LAABP03.L.IR32", "LAABP03.R.IR32"] delay: [0, 0] a4: name: "G4S4.wav" description: "Large anechoic room with AB microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["LAABP04.L.IR32", "LAABP04.R.IR32"] delay: [0, 0] a5: name: "G3S5.wav" description: "Large anechoic room with AB microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["LAABP05.L.IR32", "LAABP05.R.IR32"] delay: [0, 0] a6: name: "G2S6.wav" description: "Large anechoic room with AB microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["LAABP06.L.IR32", "LAABP06.R.IR32"] delay: [0, 0] b1: name: "G2S1.wav" description: "Small anechoic room with AB microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SAABP01.L.IR32", "SAABP01.R.IR32"] delay: [0, 1.5] b2: name: "G1S2.wav" description: "Small anechoic room with AB microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SAABP01.L.IR32", "SAABP01.R.IR32"] delay: [0, 1.5] b3: name: "G6S3.wav" description: "Small anechoic room with AB microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SAABP01.L.IR32", "SAABP01.R.IR32"] delay: [0, 1.5] b4: name: "G5S4.wav" description: "Small anechoic room with AB microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SAABP01.L.IR32", "SAABP01.R.IR32"] delay: [0, 1.5] b5: name: "G4S5.wav" description: "Small anechoic room with AB microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SAABP01.L.IR32", "SAABP01.R.IR32"] delay: [0, 1.5] b6: name: "G3S6.wav" description: "Small anechoic room with AB microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SAABP01.L.IR32", "SAABP01.R.IR32"] delay: [0, 1.5] c1: name: "G3S1.wav" description: "Small anechoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SAMSP01.L.IR32", "SAMSP01.R.IR32"] delay: [0, 0] c2: name: "G2S2.wav" description: "Small anechoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SAMSP01.L.IR32", "SAMSP01.R.IR32"] delay: [0, 0] c3: name: "G1S3.wav" description: "Small anechoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SAMSP01.L.IR32", "SAMSP01.R.IR32"] delay: [0, 0] c4: name: "G6S4.wav" description: "Small anechoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SAMSP01.L.IR32", "SAMSP01.R.IR32"] delay: [0, 1] c5: name: "G5S5.wav" description: "Small anechoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SAMSP01.L.IR32", "SAMSP01.R.IR32"] delay: [0, 0] c6: name: "G4S6.wav" description: "Small anechoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SAMSP01.L.IR32", "SAMSP01.R.IR32"] delay: [0, 0] d1: name: "G4S1.wav" description: "Small echoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"] delay: [0, 1.5] d2: name: "G3S2.wav" description: "Small echoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"] delay: [0, 1.5] d3: name: "G3S2.wav" description: "Small echoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"] delay: [0, 1.5] d4: name: "G1S4.wav" description: "Small echoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"] delay: [0, 1.5] d5: name: "G6S5.wav" description: "Small echoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"] delay: [0, 1.5] d6: name: "G5S6.wav" description: "Small echoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"] delay: [0, 1.5] e1: name: "G5S1.wav" description: "Small echoic room with binaural microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEBIP01.L.IR32", "SEBIP01.R.IR32"] delay: [0, 1.5] e2: name: "G4S2.wav" description: "Small echoic room with binaural microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEBIP01.L.IR32", "SEBIP01.R.IR32"] delay: [0, 1.5] e3: name: "G3S3.wav" description: "Small echoic room with binaural microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEBIP01.L.IR32", "SEBIP01.R.IR32"] delay: [0, 1.5] e4: name: "G2S4.wav" description: "Small echoic room with binaural microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEBIP01.L.IR32", "SEBIP01.R.IR32"] delay: [0, 1.5] e5: name: "G1S5.wav" description: "Small echoic room with binaural microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEBIP01.L.IR32", "SEBIP01.R.IR32"] delay: [0, 1.5] e6: name: "G6S6.wav" description: "Small echoic room with binaural microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEBIP01.L.IR32", "SEBIP01.R.IR32"] delay: [0, 1.5] f1: name: "G6S1.wav" description: "Small echoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"] delay: [0, 0] f2: name: "G5S2.wav" description: "Small echoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"] delay: [0, 0] f3: name: "G4S3.wav" description: "Small echoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"] delay: [0, 0] f4: name: "G3S4.wav" description: "Small echoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"] delay: [0, 0] f5: name: "G2S5.wav" description: "Small echoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"] delay: [0, 0] f6: name: "G1S6.wav" description: "Small echoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"] delay: [0, 0] No newline at end of file item_generation_scripts/processing/process_ism_items.py +1 −1 Original line number Diff line number Diff line Loading @@ -227,7 +227,7 @@ def generate_ism_items( # generate .csv filename (should end with .0.csv, .1.csv, ...) csv_filename = os.path.normpath(f"{output_filename}.{i}.csv") with open(os.path.join(output_path, csv_filename), "w") as f: with open(os.path.join(output_path, csv_filename), 'w', newline='', encoding='utf-8') as f: # create csv writer writer = csv.writer(f) Loading Loading
item_generation_scripts/config/ISM1_CONFIG.yml +1 −1 Original line number Diff line number Diff line Loading @@ -189,7 +189,7 @@ scenes: elevation: 35 d3: name: "G3S2.wav" name: "G2S3.wav" description: "Talker walking around the table." source: "test_single.wav" azimuth: "120:-1:120-360" Loading
item_generation_scripts/config/ISM2_CONFIG.yml +36 −36 Original line number Diff line number Diff line Loading @@ -55,7 +55,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [0, 50] elevation: [0, 0] delay: [0, 1] delay: [0, 0] a2: name: "G6S2.wav" Loading @@ -63,7 +63,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [50, 350] elevation: [0, 0] delay: [0, 1] delay: [0, 0] a3: name: "G5S3.wav" Loading @@ -71,7 +71,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [40, 290] elevation: [0, 0] delay: [0, 1] delay: [0, 0] a4: name: "G4S4.wav" Loading @@ -79,7 +79,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [30, 230] elevation: [15, 15] delay: [0, 1] delay: [0, 0] a5: name: "G3S5.wav" Loading @@ -87,7 +87,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [20, 170] elevation: [15, 15] delay: [0, 1] delay: [0, 0] a6: name: "G2S6.wav" Loading @@ -95,7 +95,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [10, 110] elevation: [15, 15] delay: [0, 1] delay: [0, 0] b1: name: "G2S1.wav" Loading @@ -103,7 +103,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [20, 170] elevation: [30, 30] delay: [0, 1] delay: [0, 1.5] b2: name: "G1S2.wav" Loading @@ -111,7 +111,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [10, 110] elevation: [30, 30] delay: [0, 1] delay: [0, 1.5] b3: name: "G6S3.wav" Loading @@ -119,7 +119,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [0, 50] elevation: [30, 30] delay: [0, 1] delay: [0, 1.5] b4: name: "G5S4.wav" Loading @@ -127,7 +127,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [50, 350] elevation: [60, 60] delay: [0, 1] delay: [0, 1.5] b5: name: "G4S5.wav" Loading @@ -135,7 +135,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [40, 290] elevation: [60, 60] delay: [0, 1] delay: [0, 1.5] b6: name: "G3S6.wav" Loading @@ -143,7 +143,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [30, 230] elevation: [60, 60] delay: [0, 1] delay: [0, 1.5] c1: name: "G3S1.wav" Loading @@ -151,7 +151,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [40, 290] elevation: [0, 60] delay: [0, 1] delay: [0, 0] c2: name: "G2S2.wav" Loading @@ -159,7 +159,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [30, 230] elevation: [0, 60] delay: [0, 1] delay: [0, 0] c3: name: "G1S3.wav" Loading @@ -167,7 +167,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [20, 170] elevation: [0, 60] delay: [0, 1] delay: [0, 0] c4: name: "G6S4.wav" Loading @@ -183,7 +183,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [0, 50] elevation: [0, 60] delay: [0, 1] delay: [0, 0] c6: name: "G4S6.wav" Loading @@ -191,7 +191,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [50, 350] elevation: [0, 60] delay: [0, 1] delay: [0, 0] d1: name: "G4S1.wav" Loading @@ -199,7 +199,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [50, "180:1:120 + 360"] elevation: [0, 60] delay: [0, 1] delay: [0, 1.5] d2: name: "G3S2.wav" Loading @@ -207,15 +207,15 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [300, "-70:-1:-10 - 360"] elevation: [0, 60] delay: [0, 1] delay: [0, 1.5] d3: name: "G3S2.wav" name: "G2S3.wav" description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." source: ["test_double.wav", "test_double.wav"] azimuth: [250, "-20:-1:-320"] elevation: [0, 60] delay: [0, 1] delay: [0, 1.5] d4: name: "G1S4.wav" Loading @@ -223,7 +223,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [200, "30:-1:-270"] elevation: [0, 60] delay: [0, 1] delay: [0, 1.5] d5: name: "G6S5.wav" Loading @@ -231,7 +231,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [150, "80:1:20 + 360"] elevation: [0, 60] delay: [0, 1] delay: [0, 1.5] d6: name: "G5S6.wav" Loading @@ -239,7 +239,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: [100, "130:1:70 + 360"] elevation: [0, 60] delay: [0, 1] delay: [0, 1.5] e1: name: "G5S1.wav" Loading @@ -247,7 +247,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: ["80:1:20 + 360", "80:1:20 + 360"] elevation: [10, 60] delay: [0, 1] delay: [0, 1.5] e2: name: "G4S2.wav" Loading @@ -255,7 +255,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: ["130:1:70 + 360", "130:1:70 + 360"] elevation: [10, 60] delay: [0, 1] delay: [0, 1.5] e3: name: "G3S3.wav" Loading @@ -263,7 +263,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: ["180:1:120 + 360", "180:1:120 + 360"] elevation: [10, 60] delay: [0, 1] delay: [0, 1.5] e4: name: "G2S4.wav" Loading @@ -271,7 +271,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: ["-70:-1:-10 - 360", "-70:-1:-10 - 360"] elevation: [10, 60] delay: [0, 1] delay: [0, 1.5] e5: name: "G1S5.wav" Loading @@ -279,7 +279,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: ["-20:-1:-320", "-20:-1:-320"] elevation: [10, 60] delay: [0, 1] delay: [0, 1.5] e6: name: "G6S6.wav" Loading @@ -287,7 +287,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: ["30:-1:-270", "30:-1:-270"] elevation: [10, 60] delay: [0, 1] delay: [0, 1.5] f1: name: "G6S1.wav" Loading @@ -295,7 +295,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: ["60:1:0 + 360", "60:-1:120 - 360"] elevation: [20, 50] delay: [0, 1] delay: [0, 0] f2: name: "G5S2.wav" Loading @@ -303,7 +303,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: ["0:1:300", "0:-1:60 - 360"] elevation: [20, 50] delay: [0, 1] delay: [0, 0] f3: name: "G4S3.wav" Loading @@ -311,7 +311,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: ["300:1:240 + 360", "300:-1:0"] elevation: [20, 50] delay: [0, 1] delay: [0, 0] f4: name: "G3S4.wav" Loading @@ -319,7 +319,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: ["240:1:180 + 360", "240:-1:-60"] elevation: [20, 50] delay: [0, 1] delay: [0, 0] f5: name: "G2S5.wav" Loading @@ -327,7 +327,7 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: ["180:1:120 + 360", "180:-1:-120"] elevation: [20, 50] delay: [0, 1] delay: [0, 0] f6: name: "G1S6.wav" Loading @@ -335,5 +335,5 @@ scenes: source: ["test_double.wav", "test_double.wav"] azimuth: ["120:1:60 + 360", "120:-1:180 - 360"] elevation: [20, 50] delay: [0, 1] delay: [0, 0] No newline at end of file
item_generation_scripts/config/STEREO_CONFIG.yml 0 → 100644 +306 −0 Original line number Diff line number Diff line --- ################################################ # General configuration ################################################ ### Output format format: "STEREO" ### Date; default = YYYYMMDD_HH.MM.SS # date: 2023.06.30 ### Deletion of temporary directories containing intermediate processing files, bitstreams etc.; default = false # delete_tmp: true ### Output sampling rate in Hz needed for headerless audio files; default = 48000 fs: 48000 ### Any relative paths will be interpreted relative to the working directory the script is called from! ### Usage of absolute paths is recommended. ### Do not use file names with dots "." in them! This is not supported, use "_" instead ### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions ### Input path to mono files input_path: "./items_mono" ### Input path to stereo impulse response files input_path_IR: "./IR" ### Output path for generated test items and metadata files output_path: "./output" ### Target loudness in LKFS; default = null (no loudness normalization applied) loudness: -26 ################################################ ### Scene description ################################################ ### Each scene must start with the sceneN tag ### Specify the mono source filename (the program will search for it in the input_path folder) ### Specify azimuth and elevation for each input source ### Specify the delay in seconds for each input source ### Note 1: use [val1, val2, ...] for multiple sources in a scene ### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames ### Note 3: we're using right-handed coordinate system with azi = 0 pointing from the nose to the screen ### azimuth: float, [-180,180]; positive indicates left ### elevation: float, [-90,90]; positive indicates up ### distance: float, tbd: default: 1 ### spread: float, [0,360]; spread in angles from 0 ... 360˚ ### gain: float, [0,1] scenes: a1: name: "G1S1.wav" description: "Large anechoic room with AB microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["LAABP01.L.IR32", "LAABP01.R.IR32"] delay: [0, 0] a2: name: "G6S2.wav" description: "Large anechoic room with AB microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["LAABP02.L.IR32", "LAABP02.R.IR32"] delay: [0, 0] a3: name: "G5S3.wav" description: "Large anechoic room with AB microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["LAABP03.L.IR32", "LAABP03.R.IR32"] delay: [0, 0] a4: name: "G4S4.wav" description: "Large anechoic room with AB microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["LAABP04.L.IR32", "LAABP04.R.IR32"] delay: [0, 0] a5: name: "G3S5.wav" description: "Large anechoic room with AB microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["LAABP05.L.IR32", "LAABP05.R.IR32"] delay: [0, 0] a6: name: "G2S6.wav" description: "Large anechoic room with AB microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["LAABP06.L.IR32", "LAABP06.R.IR32"] delay: [0, 0] b1: name: "G2S1.wav" description: "Small anechoic room with AB microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SAABP01.L.IR32", "SAABP01.R.IR32"] delay: [0, 1.5] b2: name: "G1S2.wav" description: "Small anechoic room with AB microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SAABP01.L.IR32", "SAABP01.R.IR32"] delay: [0, 1.5] b3: name: "G6S3.wav" description: "Small anechoic room with AB microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SAABP01.L.IR32", "SAABP01.R.IR32"] delay: [0, 1.5] b4: name: "G5S4.wav" description: "Small anechoic room with AB microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SAABP01.L.IR32", "SAABP01.R.IR32"] delay: [0, 1.5] b5: name: "G4S5.wav" description: "Small anechoic room with AB microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SAABP01.L.IR32", "SAABP01.R.IR32"] delay: [0, 1.5] b6: name: "G3S6.wav" description: "Small anechoic room with AB microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SAABP01.L.IR32", "SAABP01.R.IR32"] delay: [0, 1.5] c1: name: "G3S1.wav" description: "Small anechoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SAMSP01.L.IR32", "SAMSP01.R.IR32"] delay: [0, 0] c2: name: "G2S2.wav" description: "Small anechoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SAMSP01.L.IR32", "SAMSP01.R.IR32"] delay: [0, 0] c3: name: "G1S3.wav" description: "Small anechoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SAMSP01.L.IR32", "SAMSP01.R.IR32"] delay: [0, 0] c4: name: "G6S4.wav" description: "Small anechoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SAMSP01.L.IR32", "SAMSP01.R.IR32"] delay: [0, 1] c5: name: "G5S5.wav" description: "Small anechoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SAMSP01.L.IR32", "SAMSP01.R.IR32"] delay: [0, 0] c6: name: "G4S6.wav" description: "Small anechoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SAMSP01.L.IR32", "SAMSP01.R.IR32"] delay: [0, 0] d1: name: "G4S1.wav" description: "Small echoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"] delay: [0, 1.5] d2: name: "G3S2.wav" description: "Small echoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"] delay: [0, 1.5] d3: name: "G3S2.wav" description: "Small echoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"] delay: [0, 1.5] d4: name: "G1S4.wav" description: "Small echoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"] delay: [0, 1.5] d5: name: "G6S5.wav" description: "Small echoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"] delay: [0, 1.5] d6: name: "G5S6.wav" description: "Small echoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"] delay: [0, 1.5] e1: name: "G5S1.wav" description: "Small echoic room with binaural microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEBIP01.L.IR32", "SEBIP01.R.IR32"] delay: [0, 1.5] e2: name: "G4S2.wav" description: "Small echoic room with binaural microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEBIP01.L.IR32", "SEBIP01.R.IR32"] delay: [0, 1.5] e3: name: "G3S3.wav" description: "Small echoic room with binaural microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEBIP01.L.IR32", "SEBIP01.R.IR32"] delay: [0, 1.5] e4: name: "G2S4.wav" description: "Small echoic room with binaural microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEBIP01.L.IR32", "SEBIP01.R.IR32"] delay: [0, 1.5] e5: name: "G1S5.wav" description: "Small echoic room with binaural microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEBIP01.L.IR32", "SEBIP01.R.IR32"] delay: [0, 1.5] e6: name: "G6S6.wav" description: "Small echoic room with binaural microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEBIP01.L.IR32", "SEBIP01.R.IR32"] delay: [0, 1.5] f1: name: "G6S1.wav" description: "Small echoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"] delay: [0, 0] f2: name: "G5S2.wav" description: "Small echoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"] delay: [0, 0] f3: name: "G4S3.wav" description: "Small echoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"] delay: [0, 0] f4: name: "G3S4.wav" description: "Small echoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"] delay: [0, 0] f5: name: "G2S5.wav" description: "Small echoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"] delay: [0, 0] f6: name: "G1S6.wav" description: "Small echoic room with MS microphone pickup." source: ["test_double.wav", "test_double.wav"] IR: ["SEMSP01.L.IR32", "SEMSP01.R.IR32"] delay: [0, 0] No newline at end of file
item_generation_scripts/processing/process_ism_items.py +1 −1 Original line number Diff line number Diff line Loading @@ -227,7 +227,7 @@ def generate_ism_items( # generate .csv filename (should end with .0.csv, .1.csv, ...) csv_filename = os.path.normpath(f"{output_filename}.{i}.csv") with open(os.path.join(output_path, csv_filename), "w") as f: with open(os.path.join(output_path, csv_filename), 'w', newline='', encoding='utf-8') as f: # create csv writer writer = csv.writer(f) Loading