diff --git a/item_gen_configs/P800-1.yml b/item_gen_configs/P800-1.yml new file mode 100644 index 0000000000000000000000000000000000000000..b1e958e27a43ef3ace455e45c365fa2330756bc4 --- /dev/null +++ b/item_gen_configs/P800-1.yml @@ -0,0 +1,303 @@ +--- +################################################ +# General configuration +################################################ + +### Output format +format: "STEREO" + +### Output sampling rate in Hz needed for headerless audio files; default = 48000 +fs: 48000 + +### IR sampling rate in Hz needed for headerless audio files; default = 48000 +IR_fs: 32000 + +### Any relative paths will be interpreted relative to the working directory the script is called from! +### Usage of absolute paths is recommended. +### Do not use file names with dots "." in them! This is not supported, use "_" instead +### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions + +### Input path to mono files +input_path: "./items_mono" + +### Input path to stereo impulse response files, default = './ivas_processing_scripts/generation/IR' +# IR_path: "./IR" + +### Output path for generated test items and metadata files +output_path: "experiments/selection/P800-1/proc_input" + +### Target loudness in LKFS; default = null (no loudness normalization applied) +loudness: -26 + +### Pre-amble and Post-amble length in seconds (default = 0.0) +preamble: 1.0 +postamble: 1.0 + +### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence) +add_low_level_random_noise: true + + +################################################ +### Scene description +################################################ + +### Each scene must have a unique name +### Specify the mono source filenames (the program will search for it in the input_path folder) +### Specify the stereo IR source filenames (the program will search for it in the input_path_IR folder) +### Specify the overlap length in seconds for each input source (negative value creates a gap) +### Note 1: use [val1, val2, ...] for multiple sources in a scene +### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames + +scenes: + cat1_1: + name: "C1P1.wav" + description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers." + source: ["f1.wav", "m1.wav"] + IR: ["SAABP01.wav", "SAABP07.wav"] + overlap: 1.0 + + cat1_2: + name: "C1P2.wav" + description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers." + source: ["m2.wav", "f2.wav"] + IR: ["SAABP05.wav", "SAABP03.wav"] + overlap: 1.0 + + cat1_3: + name: "C1P3.wav" + description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers." + source: ["f3.wav", "m3.wav"] + IR: ["SAABP02.wav", "SAABP06.wav"] + overlap: 1.0 + + cat1_4: + name: "C1P4.wav" + description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers." + source: ["m1.wav", "f1.wav"] + IR: ["SAABP04.wav", "SAABP01.wav"] + overlap: 1.0 + + cat1_5: + name: "C1P5.wav" + description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers." + source: ["f2.wav", "m2.wav"] + IR: ["SAABP03.wav", "SAABP04.wav"] + overlap: 1.0 + + cat1_6: + name: "C1P6.wav" + description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers." + source: ["m3.wav", "f3.wav"] + IR: ["SAABP07.wav", "SAABP02.wav"] + overlap: 1.0 + + cat2_1: + name: "C2P1.wav" + description: "Large anechoic room with AB microphone pickup, no overlap between the talkers." + source: ["m3.wav", "f3.wav"] + IR: ["LAABP05.wav", "LAABP11.wav"] + overlap: -1.0 + + cat2_2: + name: "C2P2.wav" + description: "Large anechoic room with AB microphone pickup, no overlap between the talkers." + source: ["f1.wav", "m1.wav"] + IR: ["LAABP01.wav", "LAABP06.wav"] + overlap: -1.0 + + cat2_3: + name: "C2P3.wav" + description: "Large anechoic room with AB microphone pickup, no overlap between the talkers." + source: ["m2.wav", "f2.wav"] + IR: ["LAABP03.wav", "LAABP07.wav"] + overlap: -1.0 + + cat2_4: + name: "C2P4.wav" + description: "Large anechoic room with AB microphone pickup, no overlap between the talkers." + source: ["f3.wav", "m3.wav"] + IR: ["LAABP05.wav", "LAABP08.wav"] + overlap: -1.0 + + cat2_5: + name: "C2P5.wav" + description: "Large anechoic room with AB microphone pickup, no overlap between the talkers." + source: ["m1.wav", "f1.wav"] + IR: ["LAABP09.wav", "LAABP07.wav"] + overlap: -1.0 + + cat2_6: + name: "C2P6.wav" + description: "Large anechoic room with AB microphone pickup, no overlap between the talkers." + source: ["f2.wav", "m2.wav"] + IR: ["LAABP10.wav", "LAABP09.wav"] + overlap: -1.0 + + cat3_1: + name: "C3P1.wav" + description: "Small anechoic room with MS microphone pickup, no overlap between the talkers." + source: ["f2.wav", "m2.wav"] + IR: ["SAMSP01.wav", "SAMSP07.wav"] + overlap: -1.0 + + cat3_2: + name: "C3P2.wav" + description: "Small anechoic room with MS microphone pickup, no overlap between the talkers." + source: ["m3.wav", "f3.wav"] + IR: ["SAMSP05.wav", "SAMSP03.wav"] + overlap: -1.0 + + cat3_3: + name: "C3P3.wav" + description: "Small anechoic room with MS microphone pickup, no overlap between the talkers." + source: ["f1.wav", "m1.wav"] + IR: ["SAMSP02.wav", "SAMSP06.wav"] + overlap: -1.0 + + cat3_4: + name: "C3P4.wav" + description: "Small anechoic room with MS microphone pickup, no overlap between the talkers." + source: ["m2.wav", "f2.wav"] + IR: ["SAMSP04.wav", "SAMSP01.wav"] + overlap: -1.0 + + cat3_5: + name: "C3P5.wav" + description: "Small anechoic room with MS microphone pickup, no overlap between the talkers." + source: ["f3.wav", "m3.wav"] + IR: ["SAMSP03.wav", "SAMSP04.wav"] + overlap: -1.0 + + cat3_6: + name: "C3P6.wav" + description: "Small anechoic room with MS microphone pickup, no overlap between the talkers." + source: ["m1.wav", "f1.wav"] + IR: ["SAMSP07.wav", "SAMSP02.wav"] + overlap: -1.0 + + cat4_1: + name: "C4P1.wav" + description: "Small echoic room with AB microphone pickup, partial overlap between the talkers." + source: ["m1.wav", "f1.wav"] + IR: ["SEABP01.wav", "SEABP07.wav"] + overlap: 1.0 + + cat4_2: + name: "C4P2.wav" + description: "Small echoic room with AB microphone pickup, partial overlap between the talkers." + source: ["f2.wav", "m2.wav"] + IR: ["SEABP05.wav", "SEABP03.wav"] + overlap: 1.0 + + cat4_3: + name: "C4P3.wav" + description: "Small echoic room with AB microphone pickup, partial overlap between the talkers." + source: ["m3.wav", "f3.wav"] + IR: ["SEABP02.wav", "SEABP06.wav"] + overlap: 1.0 + + cat4_4: + name: "C4P4.wav" + description: "Small echoic room with AB microphone pickup, partial overlap between the talkers." + source: ["f1.wav", "m1.wav"] + IR: ["SEABP04.wav", "SEABP01.wav"] + overlap: 1.0 + + cat4_5: + name: "C4P5.wav" + description: "Small echoic room with AB microphone pickup, partial overlap between the talkers." + source: ["m2.wav", "f2.wav"] + IR: ["SEABP03.wav", "SEABP04.wav"] + overlap: 1.0 + + cat4_6: + name: "C4P6.wav" + description: "Small echoic room with AB microphone pickup, partial overlap between the talkers." + source: ["f3.wav", "m3.wav"] + IR: ["SEABP07.wav", "SEABP02.wav"] + overlap: 1.0 + + cat5_1: + name: "C5P1.wav" + description: "Large echoic room with AB microphone pickup, partial overlap between the talkers." + source: ["f3.wav", "m3.wav"] + IR: ["LEABP02.wav", "LEABP08.wav"] + overlap: 1.0 + + cat5_2: + name: "C5P2.wav" + description: "Large echoic room with AB microphone pickup, partial overlap between the talkers." + source: ["m1.wav", "f1.wav"] + IR: ["LEABP09.wav", "LEABP04.wav"] + overlap: 1.0 + + cat5_3: + name: "C5P3.wav" + description: "Large echoic room with AB microphone pickup, partial overlap between the talkers." + source: ["f2.wav", "m2.wav"] + IR: ["LEABP06.wav", "LEABP10.wav"] + overlap: 1.0 + + cat5_4: + name: "C5P4.wav" + description: "Large echoic room with AB microphone pickup, partial overlap between the talkers." + source: ["m3.wav", "f3.wav"] + IR: ["LEABP11.wav", "LEABP08.wav"] + overlap: 1.0 + + cat5_5: + name: "C5P5.wav" + description: "Large echoic room with AB microphone pickup, partial overlap between the talkers." + source: ["f1.wav", "m1.wav"] + IR: ["LEABP10.wav", "LEABP12.wav"] + overlap: 1.0 + + cat5_6: + name: "C5P6.wav" + description: "Large echoic room with AB microphone pickup, partial overlap between the talkers." + source: ["m2.wav", "f2.wav"] + IR: ["LEABP12.wav", "LEABP01.wav"] + overlap: 1.0 + + cat6_1: + name: "C6P1.wav" + description: "Small echoic room with binaural microphone pickup, no overlap between the talkers." + source: ["m2.wav", "f2.wav"] + IR: ["SEABP01.wav", "SEABP07.wav"] + overlap: -1.0 + + cat6_2: + name: "C6P2.wav" + description: "Small echoic room with binaural microphone pickup, no overlap between the talkers." + source: ["f3.wav", "m3.wav"] + IR: ["SEABP05.wav", "SEABP03.wav"] + overlap: -1.0 + + cat6_3: + name: "C6P3.wav" + description: "Small echoic room with binaural microphone pickup, no overlap between the talkers." + source: ["m1.wav", "f1.wav"] + IR: ["SEABP02.wav", "SEABP06.wav"] + overlap: -1.0 + + cat6_4: + name: "C6P4.wav" + description: "Small echoic room with binaural microphone pickup, no overlap between the talkers." + source: ["f2.wav", "m2.wav"] + IR: ["SEABP04.wav", "SEABP01.wav"] + overlap: -1.0 + + cat6_5: + name: "C6P5.wav" + description: "Small echoic room with binaural microphone pickup, no overlap between the talkers." + source: ["m3.wav", "f3.wav"] + IR: ["SEABP03.wav", "SEABP04.wav"] + overlap: -1.0 + + cat6_6: + name: "C6P6.wav" + description: "Small echoic room with binaural microphone pickup, no overlap between the talkers." + source: ["f1.wav", "m1.wav"] + IR: ["SEABP07.wav", "SEABP02.wav"] + overlap: -1.0 + \ No newline at end of file diff --git a/item_gen_configs/P800-2.yml b/item_gen_configs/P800-2.yml new file mode 100644 index 0000000000000000000000000000000000000000..906fcd3f49dc556085f61460b9a0825ba2ce1daa --- /dev/null +++ b/item_gen_configs/P800-2.yml @@ -0,0 +1,303 @@ +--- +################################################ +# General configuration +################################################ + +### Output format +format: "STEREO" + +### Output sampling rate in Hz needed for headerless audio files; default = 48000 +fs: 48000 + +### IR sampling rate in Hz needed for headerless audio files; default = 48000 +IR_fs: 32000 + +### Any relative paths will be interpreted relative to the working directory the script is called from! +### Usage of absolute paths is recommended. +### Do not use file names with dots "." in them! This is not supported, use "_" instead +### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions + +### Input path to mono files +input_path: "./items_mono" + +### Input path to stereo impulse response files, default = './ivas_processing_scripts/generation/IR' +# IR_path: "./IR" + +### Output path for generated test items and metadata files +output_path: "experiments/selection/P800-2/proc_input" + +### Target loudness in LKFS; default = null (no loudness normalization applied) +loudness: -26 + +### Pre-amble and Post-amble length in seconds (default = 0.0) +preamble: 1.0 +postamble: 1.0 + +### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence) +add_low_level_random_noise: true + + +################################################ +### Scene description +################################################ + +### Each scene must have a unique name +### Specify the mono source filenames (the program will search for it in the input_path folder) +### Specify the stereo IR source filenames (the program will search for it in the input_path_IR folder) +### Specify the overlap length in seconds for each input source (negative value creates a gap) +### Note 1: use [val1, val2, ...] for multiple sources in a scene +### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames + +scenes: + cat1_1: + name: "C1P1.wav" + description: "Car with AB microphone pickup, no overlap between the talkers, car noise." + source: ["f1.wav", "m1.wav"] + IR: ["", ""] + overlap: -1.0 + + cat1_2: + name: "C1P2.wav" + description: "Car with AB microphone pickup, no overlap between the talkers, car noise." + source: ["m2.wav", "f2.wav"] + IR: ["", ""] + overlap: -1.0 + + cat1_3: + name: "C1P3.wav" + description: "Car with AB microphone pickup, no overlap between the talkers, car noise." + source: ["f3.wav", "m3.wav"] + IR: ["", ""] + overlap: -1.0 + + cat1_4: + name: "C1P4.wav" + description: "Car with AB microphone pickup, no overlap between the talkers, car noise." + source: ["m1.wav", "f1.wav"] + IR:["", ""] + overlap: -1.0 + + cat1_5: + name: "C1P5.wav" + description: "Car with AB microphone pickup, no overlap between the talkers, car noise." + source: ["f2.wav", "m2.wav"] + IR: ["", ""] + overlap: -1.0 + + cat1_6: + name: "C1P6.wav" + description: "Car with AB microphone pickup, no overlap between the talkers, car noise." + source: ["m3.wav", "f3.wav"] + IR: ["", ""] + overlap: -1.0 + + cat2_1: + name: "C2P1.wav" + description: "Car with AB microphone pickup, no overlap between the talkers, street noise." + source: ["m3.wav", "f3.wav"] + IR: ["", ""] + overlap: -1.0 + + cat2_2: + name: "C2P2.wav" + description: "Car with AB microphone pickup, no overlap between the talkers, street noise." + source: ["f1.wav", "m1.wav"] + IR: ["", ""] + overlap: -1.0 + + cat2_3: + name: "C2P3.wav" + description: "Car with AB microphone pickup, no overlap between the talkers, street noise." + source: ["m2.wav", "f2.wav"] + IR: ["", ""] + overlap: -1.0 + + cat2_4: + name: "C2P4.wav" + description: "Car with AB microphone pickup, no overlap between the talkers, street noise." + source: ["f3.wav", "m3.wav"] + IR: ["", ""] + overlap: -1.0 + + cat2_5: + name: "C2P5.wav" + description: "Car with AB microphone pickup, no overlap between the talkers, street noise." + source: ["m1.wav", "f1.wav"] + IR: ["", ""] + overlap: -1.0 + + cat2_6: + name: "C2P6.wav" + description: "Car with AB microphone pickup, no overlap between the talkers, street noise." + source: ["f2.wav", "m2.wav"] + IR: ["", ""] + overlap: -1.0 + + cat3_1: + name: "C3P1.wav" + description: "Small echoic room with MS microphone pickup, no overlap between the talkers, office noise." + source: ["f2.wav", "m2.wav"] + IR: ["SEMSP01.wav", "SEMSP07.wav"] + overlap: -1.0 + + cat3_2: + name: "C3P2.wav" + description: "Small echoic room with MS microphone pickup, no overlap between the talkers, office noise." + source: ["m3.wav", "f3.wav"] + IR: ["SEMSP05.wav", "SEMSP03.wav"] + overlap: -1.0 + + cat3_3: + name: "C3P3.wav" + description: "Small echoic room with MS microphone pickup, no overlap between the talkers, office noise." + source: ["f1.wav", "m1.wav"] + IR: ["SEMSP02.wav", "SEMSP06.wav"] + overlap: -1.0 + + cat3_4: + name: "C3P4.wav" + description: "Small echoic room with MS microphone pickup, no overlap between the talkers, office noise." + source: ["m2.wav", "f2.wav"] + IR: ["SEMSP04.wav", "SEMSP01.wav"] + overlap: -1.0 + + cat3_5: + name: "C3P5.wav" + description: "Small echoic room with MS microphone pickup, no overlap between the talkers, office noise." + source: ["f3.wav", "m3.wav"] + IR: ["SEMSP03.wav", "SEMSP04.wav"] + overlap: -1.0 + + cat3_6: + name: "C3P6.wav" + description: "Small echoic room with MS microphone pickup, no overlap between the talkers, office noise." + source: ["m1.wav", "f1.wav"] + IR: ["SEMSP07.wav", "SEMSP02.wav"] + overlap: -1.0 + + cat4_1: + name: "C4P1.wav" + description: "Small echoic room with AB microphone pickup, partial overlap between the talkers, office noise." + source: ["m1.wav", "f1.wav"] + IR: ["SEABP01.wav", "SEABP07.wav"] + overlap: -1.0 + + cat4_2: + name: "C4P2.wav" + description: "Small echoic room with AB microphone pickup, partial overlap between the talkers, office noise." + source: ["f2.wav", "m2.wav"] + IR: ["SEABP05.wav", "SEABP03.wav"] + overlap: -1.0 + + cat4_3: + name: "C4P3.wav" + description: "Small echoic room with AB microphone pickup, partial overlap between the talkers, office noise." + source: ["m3.wav", "f3.wav"] + IR: ["SEABP02.wav", "SEABP06.wav"] + overlap: -1.0 + + cat4_4: + name: "C4P4.wav" + description: "Small echoic room with AB microphone pickup, partial overlap between the talkers, office noise." + source: ["f1.wav", "m1.wav"] + IR: ["SEABP04.wav", "SEABP01.wav"] + overlap: -1.0 + + cat4_5: + name: "C4P5.wav" + description: "Small echoic room with AB microphone pickup, partial overlap between the talkers, office noise." + source: ["m2.wav", "f2.wav"] + IR: ["SEABP03.wav", "SEABP04.wav"] + overlap: -1.0 + + cat4_6: + name: "C4P6.wav" + description: "Small echoic room with AB microphone pickup, partial overlap between the talkers, office noise." + source: ["f3.wav", "m3.wav"] + IR: ["SEABP07.wav", "SEABP02.wav"] + overlap: -1.0 + + cat5_1: + name: "C5P1.wav" + description: "Large echoic room with AB microphone pickup, partial overlap between the talkers, office noise." + source: ["f3.wav", "m3.wav"] + IR: ["LEABP02.wav", "LEABP08.wav"] + overlap: -1.0 + + cat5_2: + name: "C5P2.wav" + description: "Large echoic room with AB microphone pickup, partial overlap between the talkers, office noise." + source: ["m1.wav", "f1.wav"] + IR: ["LEABP09.wav", "LEABP04.wav"] + overlap: -1.0 + + cat5_3: + name: "C5P3.wav" + description: "Large echoic room with AB microphone pickup, partial overlap between the talkers, office noise." + source: ["f2.wav", "m2.wav"] + IR: ["LEABP06.wav", "LEABP10.wav"] + overlap: -1.0 + + cat5_4: + name: "C5P4.wav" + description: "Large echoic room with AB microphone pickup, partial overlap between the talkers, office noise." + source: ["m3.wav", "f3.wav"] + IR: ["LEABP11.wav", "LEABP08.wav"] + overlap: -1.0 + + cat5_5: + name: "C5P5.wav" + description: "Large echoic room with AB microphone pickup, partial overlap between the talkers, office noise." + source: ["f1.wav", "m1.wav"] + IR: ["LEABP10.wav", "LEABP12.wav"] + overlap: -1.0 + + cat5_6: + name: "C5P6.wav" + description: "Large echoic room with AB microphone pickup, partial overlap between the talkers, office noise." + source: ["m2.wav", "f2.wav"] + IR: ["LEABP12.wav", "LEABP01.wav"] + overlap: -1.0 + + cat6_1: + name: "C6P1.wav" + description: "Small echoic room with binaural microphone pickup, no overlap between the talkers, office noise." + source: ["m2.wav", "f2.wav"] + IR: ["SEABP01.wav", "SEABP07.wav"] + overlap: -1.0 + + cat6_2: + name: "C6P2.wav" + description: "Small echoic room with binaural microphone pickup, no overlap between the talkers, office noise." + source: ["f3.wav", "m3.wav"] + IR: ["SEABP05.wav", "SEABP03.wav"] + overlap: -1.0 + + cat6_3: + name: "C6P3.wav" + description: "Small echoic room with binaural microphone pickup, no overlap between the talkers, office noise." + source: ["m1.wav", "f1.wav"] + IR: ["SEABP02.wav", "SEABP06.wav"] + overlap: -1.0 + + cat6_4: + name: "C6P4.wav" + description: "Small echoic room with binaural microphone pickup, no overlap between the talkers, office noise." + source: ["f2.wav", "m2.wav"] + IR: ["SEABP04.wav", "SEABP01.wav"] + overlap: -1.0 + + cat6_5: + name: "C6P5.wav" + description: "Small echoic room with binaural microphone pickup, no overlap between the talkers, office noise." + source: ["m3.wav", "f3.wav"] + IR: ["SEABP03.wav", "SEABP04.wav"] + overlap: -1.0 + + cat6_6: + name: "C6P6.wav" + description: "Small echoic room with binaural microphone pickup, no overlap between the talkers, office noise." + source: ["f1.wav", "m1.wav"] + IR: ["SEABP07.wav", "SEABP02.wav"] + overlap: -1.0 + \ No newline at end of file diff --git a/item_gen_configs/P800-6.yml b/item_gen_configs/P800-6.yml new file mode 100644 index 0000000000000000000000000000000000000000..da77847835fb1a05cbceb3b26a9c4ad8f4ea0b90 --- /dev/null +++ b/item_gen_configs/P800-6.yml @@ -0,0 +1,303 @@ +--- +################################################ +# General configuration +################################################ + +### Output format +format: "ISM1" + +### Output sampling rate in Hz needed for headerless audio files; default = 48000 +fs: 48000 + +### Any relative paths will be interpreted relative to the working directory the script is called from! +### Usage of absolute paths is recommended. +### Do not use file names with dots "." in them! This is not supported, use "_" instead +### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions + +### Input path to mono files +input_path: "./items_mono" + +### Output path for generated test items and metadata files +output_path: "experiments/selection/P800-6/proc_input" + +### Target loudness in LKFS; default = null (no loudness normalization applied) +loudness: -26 + +### Pre-amble and Post-amble length in seconds (default = 0.0) +# preamble: 0.5 +# postamble: 0.5 + +### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence) +add_low_level_random_noise: true + + +################################################ +### Scene description +################################################ + +### Each scene must start with the sceneN tag +### Specify the mono source filename (the program will search for it in the input_path folder) +### Specify azimuth and elevation for each input source +### Note 1: use [val1, val2, ...] for multiple sources in a scene +### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames + +### Note 3: we're using right-handed coordinate system with azi = 0 pointing from the nose to the screen +### azimuth: float, [-180,180]; positive indicates left +### elevation: float, [-90,90]; positive indicates up +### distance: float, tbd: default: 1 +### spread: float, [0,360]; spread in angles from 0 ... 360˚ +### gain: float, [0,1] + +scenes: + a1: + name: "G1S1.wav" + description: "Talker sitting at a table" + source: "m1.wav" + azimuth: 0 + elevation: 0 + + a2: + name: "G6S2.wav" + description: "Talker sitting at a table" + source: "f3.wav" + azimuth: 60 + elevation: 0 + + a3: + name: "G5S3.wav" + description: "Talker sitting at a table" + source: "m3.wav" + azimuth: 120 + elevation: 0 + + a4: + name: "G4S4.wav" + description: "Talker sitting at a table" + source: "f2.wav" + azimuth: 180 + elevation: 0 + + a5: + name: "G3S5.wav" + description: "Talker sitting at a table" + source: "m2.wav" + azimuth: 240 + elevation: 0 + + a6: + name: "G2S6.wav" + description: "Talker sitting at a table" + source: "f1.wav" + azimuth: 300 + elevation: 0 + + b1: + name: "G2S1.wav" + description: "standing talker." + source: "f1.wav" + azimuth: 120 + elevation: 35 + + b2: + name: "G1S2.wav" + description: "standing talker." + source: "m1.wav" + azimuth: 180 + elevation: 35 + + b3: + name: "G6S3.wav" + description: "standing talker." + source: "f.wav" + azimuth: 240 + elevation: 35 + + b4: + name: "G5S4.wav" + description: "standing talker." + source: "m3.wav" + azimuth: 300 + elevation: 35 + + b5: + name: "G4S5.wav" + description: "standing talker." + source: "f2.wav" + azimuth: 0 + elevation: 35 + + b6: + name: "G3S6.wav" + description: "standing talker." + source: "m2.wav" + azimuth: 60 + elevation: 35 + + c1: + name: "G3S1.wav" + description: "Smaller talker (child) walking around a table." + source: "m2.wav" + azimuth: "0:1:360" + elevation: 0 + + c2: + name: "G2S2.wav" + description: "Smaller talker (child) walking around a table." + source: "f1.wav" + azimuth: "60:1:60+360" + elevation: 0 + + c3: + name: "G1S3.wav" + description: "Smaller talker (child) walking around a table." + source: "m1.wav" + azimuth: "120:1:120+360" + elevation: 0 + + c4: + name: "G6S4.wav" + description: "Smaller talker (child) walking around a table." + source: "f3.wav" + azimuth: "180:1:180+360" + elevation: 0 + + c5: + name: "G5S5.wav" + description: "Smaller talker (child) walking around a table." + source: "m3.wav" + azimuth: "240:1:240+360" + elevation: 0 + + c6: + name: "G4S6.wav" + description: "Smaller talker (child) walking around a table." + source: "f2.wav" + azimuth: "300:1:300+360" + elevation: 0 + + d1: + name: "G4S1.wav" + description: "Talker walking around the table." + source: "f2.wav" + azimuth: "0:-1:-360" + elevation: 35 + + d2: + name: "G3S2.wav" + description: "Talker walking around the table." + source: "m2.wav" + azimuth: "60:-1:60-360" + elevation: 35 + + d3: + name: "G2S3.wav" + description: "Talker walking around the table." + source: "f1.wav" + azimuth: "120:-1:120-360" + elevation: 35 + + d4: + name: "G1S4.wav" + description: "Talker walking around the table." + source: "m1.wav" + azimuth: "180:-1:180-360" + elevation: 35 + + d5: + name: "G6S5.wav" + description: "Talker walking around the table." + source: "f3.wav" + azimuth: "240:-1:240-360" + elevation: 35 + + d6: + name: "G5S6.wav" + description: "Talker walking around the table." + source: "m3.wav" + azimuth: "300:-1:300-360" + elevation: 35 + + e1: + name: "G5S1.wav" + description: "Elevation displacement." + source: "m3.wav" + azimuth: 240 + elevation: "-90:0.5:90" + + e2: + name: "G4S2.wav" + description: "Elevation displacement." + source: "f2.wav" + azimuth: 300 + elevation: 0 + + e3: + name: "G3S3.wav" + description: "Elevation displacement." + source: "m2.wav" + azimuth: 0 + elevation: "-90:0.5:90" + + e4: + name: "G2S4.wav" + description: "Elevation displacement." + source: "f1.wav" + azimuth: 60 + elevation: "-90:0.5:90" + + e5: + name: "G1S5.wav" + description: "Elevation displacement." + source: "m1.wav" + azimuth: 120 + elevation: "-90:0.5:90" + + e6: + name: "G6S6.wav" + description: "Elevation displacement." + source: "f3.wav" + azimuth: 180 + elevation: "-90:0.5:90" + + f1: + name: "G6S1.wav" + description: "Azimuth and elevation displacement." + source: "f3.wav" + azimuth: "60:0.5:60+180" + elevation: "35:-0.2:-35" + + f2: + name: "G5S2.wav" + description: "Azimuth and elevation displacement." + source: "m3.wav" + azimuth: "120:0.5:120+180" + elevation: "35:-0.2:-35" + + f3: + name: "G4S3.wav" + description: "Azimuth and elevation displacement." + source: "f2.wav" + azimuth: "180:0.5:180+180" + elevation: "35:-0.2:-35" + + f4: + name: "G3S4.wav" + description: "Azimuth and elevation displacement." + source: "m2.wav" + azimuth: "240:0.5:240+180" + elevation: "35:-0.2:-35" + + f5: + name: "G2S5.wav" + description: "Azimuth and elevation displacement." + source: "f1.wav" + azimuth: "300:0.5:300+180" + elevation: "35:-0.2:-35" + + f6: + name: "G1S6.wav" + description: "Azimuth and elevation displacement." + source: "m1.wav" + azimuth: "0:0.5:0+180" + elevation: "35:-0.2:-35" + \ No newline at end of file diff --git a/item_gen_configs/P800-7.yml b/item_gen_configs/P800-7.yml new file mode 100644 index 0000000000000000000000000000000000000000..0ec52ef4e06a9a4c2020a3b3b1dc004e52ffffd3 --- /dev/null +++ b/item_gen_configs/P800-7.yml @@ -0,0 +1,339 @@ +--- +################################################ +# General configuration +################################################ + +### Output format +format: "ISM2" + +### Output sampling rate in Hz needed for headerless audio files; default = 48000 +fs: 48000 + +### Any relative paths will be interpreted relative to the working directory the script is called from! +### Usage of absolute paths is recommended. +### Do not use file names with dots "." in them! This is not supported, use "_" instead +### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions + +### Input path to mono files +input_path: "./items_mono" + +### Output path for generated test items and metadata files +output_path: "./items_ISM2" + +### Target loudness in LKFS; default = null (no loudness normalization applied) +loudness: -26 + +### Pre-amble and Post-amble length in seconds (default = 0.0) +preamble: 1.0 +postamble: 1.0 + +### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence) +add_low_level_random_noise: true + +################################################ +### Scene description +################################################ + +### Each scene must start with the sceneN tag +### Specify the mono source filename (the program will search for it in the input_path folder) +### Specify azimuth and elevation for each input source +### Specify the overlap length in seconds for each input source (negative value creates a gap) +### Note 1: use [val1, val2, ...] for multiple sources in a scene +### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames + +### Note 3: we're using right-handed coordinate system with azi = 0 pointing from the nose to the screen +### azimuth: float, [-180,180]; positive indicates left +### elevation: float, [-90,90]; positive indicates up +### distance: float, tbd: default: 1 +### spread: float, [0,360]; spread in angles from 0 ... 360˚ +### gain: float, [0,1] + +scenes: + a1: + name: "G1S1.wav" + description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." + source: ["m1.wav", "f1.wav"] + azimuth: [0, 50] + elevation: [0, 0] + overlap: -1.0 + + a2: + name: "G6S2.wav" + description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." + source: ["f3.wav", "m1.wav"] + azimuth: [50, 350] + elevation: [0, 0] + overlap: -1.0 + + a3: + name: "G5S3.wav" + description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." + source: ["f2.wav", "m3.wav"] + azimuth: [40, 290] + elevation: [0, 0] + overlap: -1.0 + + a4: + name: "G4S4.wav" + description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." + source: ["f1.wav", "m2.wav"] + azimuth: [30, 230] + elevation: [15, 15] + overlap: -1.0 + + a5: + name: "G3S5.wav" + description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." + source: ["m3.wav", "f3.wav"] + azimuth: [20, 170] + elevation: [15, 15] + overlap: -1.0 + + a6: + name: "G2S6.wav" + description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." + source: ["m2.wav", "f2.wav"] + azimuth: [10, 110] + elevation: [15, 15] + overlap: -1.0 + + b1: + name: "G2S1.wav" + description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." + source: ["m2.wav", "f2.wav"] + azimuth: [20, 170] + elevation: [30, 30] + overlap: 1.0 + + b2: + name: "G1S2.wav" + description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." + source: ["m1.wav", "f1.wav"] + azimuth: [10, 110] + elevation: [30, 30] + overlap: 1.0 + + b3: + name: "G6S3.wav" + description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." + source: ["f3.wav", "m1.wav"] + azimuth: [0, 50] + elevation: [30, 30] + overlap: 1.0 + + b4: + name: "G5S4.wav" + description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." + source: ["f2.wav", "m3.wav"] + azimuth: [50, 350] + elevation: [60, 60] + overlap: 1.0 + + b5: + name: "G4S5.wav" + description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." + source: ["f1.wav", "m2.wav"] + azimuth: [40, 290] + elevation: [60, 60] + overlap: 1.0 + + b6: + name: "G3S6.wav" + description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." + source: ["m3.wav", "f3.wav"] + azimuth: [30, 230] + elevation: [60, 60] + overlap: 1.0 + + c1: + name: "G3S1.wav" + description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." + source: ["m3.wav", "f3.wav"] + azimuth: [40, 290] + elevation: [0, 60] + overlap: -1.0 + + c2: + name: "G2S2.wav" + description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." + source: ["m2.wav", "f2.wav"] + azimuth: [30, 230] + elevation: [0, 60] + overlap: -1.0 + + c3: + name: "G1S3.wav" + description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." + source: ["m1.wav", "f1.wav"] + azimuth: [20, 170] + elevation: [0, 60] + overlap: -1.0 + + c4: + name: "G6S4.wav" + description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." + source: ["f3.wav", "m1.wav"] + azimuth: [10, 110] + elevation: [0, 60] + overlap: -1.0 + + c5: + name: "G5S5.wav" + description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." + source: ["f2.wav", "m3.wav"] + azimuth: [0, 50] + elevation: [0, 60] + overlap: -1.0 + + c6: + name: "G4S6.wav" + description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." + source: ["f1.wav", "m2.wav"] + azimuth: [50, 350] + elevation: [0, 60] + overlap: -1.0 + + d1: + name: "G4S1.wav" + description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." + source: ["f1.wav", "m2.wav"] + azimuth: [50, "180:1:120 + 360"] + elevation: [0, 60] + overlap: 1.0 + + d2: + name: "G3S2.wav" + description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." + source: ["m3.wav", "f3.wav"] + azimuth: [300, "-70:-1:-10 - 360"] + elevation: [0, 60] + overlap: 1.0 + + d3: + name: "G2S3.wav" + description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." + source: ["m2.wav", "f2.wav"] + azimuth: [250, "-20:-1:-320"] + elevation: [0, 60] + overlap: 1.0 + + d4: + name: "G1S4.wav" + description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." + source: ["m1.wav", "f1.wav"] + azimuth: [200, "30:-1:-270"] + elevation: [0, 60] + overlap: 1.0 + + d5: + name: "G6S5.wav" + description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." + source: ["f3.wav", "m1.wav"] + azimuth: [150, "80:1:20 + 360"] + elevation: [0, 60] + overlap: 1.0 + + d6: + name: "G5S6.wav" + description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." + source: ["f2.wav", "m3.wav"] + azimuth: [100, "130:1:70 + 360"] + elevation: [0, 60] + overlap: 1.0 + + e1: + name: "G5S1.wav" + description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" + source: ["f2.wav", "m3.wav"] + azimuth: ["80:1:20 + 360", "80:1:20 + 360"] + elevation: [10, 60] + overlap: 1.0 + + e2: + name: "G4S2.wav" + description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" + source: ["f1.wav", "m2.wav"] + azimuth: ["130:1:70 + 360", "130:1:70 + 360"] + elevation: [10, 60] + overlap: 1.0 + + e3: + name: "G3S3.wav" + description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" + source: ["m3.wav", "f3.wav"] + azimuth: ["180:1:120 + 360", "180:1:120 + 360"] + elevation: [10, 60] + overlap: 1.0 + + e4: + name: "G2S4.wav" + description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" + source: ["m2.wav", "f2.wav"] + azimuth: ["-70:-1:-10 - 360", "-70:-1:-10 - 360"] + elevation: [10, 60] + overlap: 1.0 + + e5: + name: "G1S5.wav" + description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" + source: ["m1.wav", "f1.wav"] + azimuth: ["-20:-1:-320", "-20:-1:-320"] + elevation: [10, 60] + overlap: 1.0 + + e6: + name: "G6S6.wav" + description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" + source: ["f3.wav", "m1.wav"] + azimuth: ["30:-1:-270", "30:-1:-270"] + elevation: [10, 60] + overlap: 1.0 + + f1: + name: "G6S1.wav" + description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." + source: ["f3.wav", "m1.wav"] + azimuth: ["60:1:0 + 360", "60:-1:120 - 360"] + elevation: [20, 50] + overlap: -1.0 + + f2: + name: "G5S2.wav" + description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." + source: ["f2.wav", "m3.wav"] + azimuth: ["0:1:300", "0:-1:60 - 360"] + elevation: [20, 50] + overlap: -1.0 + + f3: + name: "G4S3.wav" + description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." + source: ["f1.wav", "m2.wav"] + azimuth: ["300:1:240 + 360", "300:-1:0"] + elevation: [20, 50] + overlap: -1.0 + + f4: + name: "G3S4.wav" + description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." + source: ["m3.wav", "f3.wav"] + azimuth: ["240:1:180 + 360", "240:-1:-60"] + elevation: [20, 50] + overlap: -1.0 + + f5: + name: "G2S5.wav" + description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." + source: ["m2.wav", "f2.wav"] + azimuth: ["180:1:120 + 360", "180:-1:-120"] + elevation: [20, 50] + overlap: -1.0 + + f6: + name: "G1S6.wav" + description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." + source: ["m1.wav", "f1.wav"] + azimuth: ["120:1:60 + 360", "120:-1:180 - 360"] + elevation: [20, 50] + overlap: -1.0 + \ No newline at end of file