Loading item_gen_configs/P800-6.yml +50 −40 Original line number Diff line number Diff line Loading @@ -35,7 +35,7 @@ add_low_level_random_noise: true ### Scene description ################################################ ### Each scene must start with the sceneN tag ### Each scene must start with a unique tag ### Specify the mono source filename (the program will search for it in the input_path folder) ### Specify azimuth and elevation for each input source ### Note 1: use [val1, val2, ...] for multiple sources in a scene Loading @@ -44,13 +44,23 @@ add_low_level_random_noise: true ### Note 3: we're using right-handed coordinate system with azi = 0 pointing from the nose to the screen ### azimuth: float, [-180,180]; positive indicates left ### elevation: float, [-90,90]; positive indicates up ### distance: float, tbd: default: 1 ### spread: float, [0,360]; spread in angles from 0 ... 360˚ ### gain: float, [0,1] ### Naming convention for P.800 items ### The filenames of the input content samples are represented by: ### leeeayszz.wav ### The filenames of the accompanying metadata files (applicable to metadata-assisted spatial audio, object-based audio) are represented by: ### leeeayszz.met for metadata-assisted spatial audio ### leeeayszz.wav.o.csv for object-based audio ### where: ### l stands for the listening lab designator (a through d according to Table 2) ### eee stands for the experiment designator, e.g. p01 (see Table 1) ### a stands audio, and y is the per experiment category according to IVAS-8a ### s stands for sample and zz is the sample number; 01, 02, 03, 04, 05, 06, 07, where 07 is the preliminary ### o stands for object number; 0, 1, 2, 3 scenes: a1: name: "G1S1.wav" name: "lp06a1s01" description: "Talker sitting at a table" source: ["m1_s1.wav", "m1_s7.wav"] azimuth: 0 Loading @@ -58,7 +68,7 @@ scenes: overlap: -0.5 a2: name: "G6S2.wav" name: "lp06a1s02" description: "Talker sitting at a table" source: ["f3_s2.wav", "f3_s8.wav"] azimuth: 60 Loading @@ -66,238 +76,238 @@ scenes: overlap: -0.5 a3: name: "G5S3.wav" name: "lp06a1s03" description: "Talker sitting at a table" source: ["m3_s3.wav", "m3_s9.wav"] azimuth: 120 elevation: 0 a4: name: "G4S4.wav" name: "lp06a1s04" description: "Talker sitting at a table" source: ["f2_s4.wav", "f2_s10.wav"] azimuth: 180 elevation: 0 a5: name: "G3S5.wav" name: "lp06a1s05" description: "Talker sitting at a table" source: ["m2_s5.wav", "m2_s11.wav"] azimuth: 240 elevation: 0 a6: name: "G2S6.wav" name: "lp06a1s06" description: "Talker sitting at a table" source: ["f1_s6.wav", "f1_s12.wav"] azimuth: 300 elevation: 0 b1: name: "G2S1.wav" name: "lp06a2s01" description: "standing talker." source: ["f1_s1.wav", "f1_s7.wav"] azimuth: 120 elevation: 35 b2: name: "G1S2.wav" name: "lp06a2s02" description: "standing talker." source: ["m1_s2.wav", "m1_s8.wav"] azimuth: 180 elevation: 35 b3: name: "G6S3.wav" name: "lp06a2s03" description: "standing talker." source: ["f3_s3.wav", "f3_s9.wav"] azimuth: 240 elevation: 35 b4: name: "G5S4.wav" name: "lp06a2s04" description: "standing talker." source: ["m3_s4.wav", "m3_s10.wav"] azimuth: 300 elevation: 35 b5: name: "G4S5.wav" name: "lp06a2s05" description: "standing talker." source: ["f2_s5.wav", "f2_s11.wav"] azimuth: 0 elevation: 35 b6: name: "G3S6.wav" name: "lp06a2s06" description: "standing talker." source: ["m2_s6.wav", "m2_s12.wav"] azimuth: 60 elevation: 35 c1: name: "G3S1.wav" name: "lp06a3s01" description: "Smaller talker (child) walking around a table." source: ["m2_s1.wav", "m2_s7.wav"] azimuth: "0:1:360" elevation: 0 c2: name: "G2S2.wav" name: ""lp06a3s02" description: "Smaller talker (child) walking around a table." source: ["f1_s2.wav", "f1_s8.wav"] azimuth: "60:1:60+360" elevation: 0 c3: name: "G1S3.wav" name: "lp06a3s03" description: "Smaller talker (child) walking around a table." source: ["m1_s3.wav", "m1_s9.wav"] azimuth: "120:1:120+360" elevation: 0 c4: name: "G6S4.wav" name: "lp06a3s04" description: "Smaller talker (child) walking around a table." source: ["f3_s4.wav", "f3_s10.wav"] azimuth: "180:1:180+360" elevation: 0 c5: name: "G5S5.wav" name: "lp06a3s05" description: "Smaller talker (child) walking around a table." source: ["m3_s5.wav", "m3_s11.wav"] azimuth: "240:1:240+360" elevation: 0 c6: name: "G4S6.wav" name: "lp06a3s06" description: "Smaller talker (child) walking around a table." source: ["f2_s6.wav", "f2_s12.wav"] azimuth: "300:1:300+360" elevation: 0 d1: name: "G4S1.wav" name: "lp06a4s01" description: "Talker walking around the table." source: ["f2_s1.wav", "f2_s7.wav"] azimuth: "0:-1:-360" elevation: 35 d2: name: "G3S2.wav" name: "lp06a4s02" description: "Talker walking around the table." source: ["m2_s2.wav", "m2_s8.wav"] azimuth: "60:-1:60-360" elevation: 35 d3: name: "G2S3.wav" name: "lp06a4s03" description: "Talker walking around the table." source: ["f1_s3.wav", "f1_s9.wav"] azimuth: "120:-1:120-360" elevation: 35 d4: name: "G1S4.wav" name: "lp06a4s04" description: "Talker walking around the table." source: ["m1_s4.wav", "m1_s10.wav"] azimuth: "180:-1:180-360" elevation: 35 d5: name: "G6S5.wav" name: "lp06a4s05" description: "Talker walking around the table." source: ["f3_s5.wav", "f3_s11.wav"] azimuth: "240:-1:240-360" elevation: 35 d6: name: "G5S6.wav" name: "lp06a4s06" description: "Talker walking around the table." source: ["m3_s6.wav", "m3_s12.wav"] azimuth: "300:-1:300-360" elevation: 35 e1: name: "G5S1.wav" name: "lp06a5s01" description: "Elevation displacement." source: ["m3_s1.wav", "m3_s7.wav"] azimuth: 240 elevation: "-90:0.5:90" e2: name: "G4S2.wav" name: "lp06a5s02" description: "Elevation displacement." source: ["f2_s2.wav", "f2_s8.wav"] azimuth: 300 elevation: 0 e3: name: "G3S3.wav" name: "lp06a5s03" description: "Elevation displacement." source: ["m2_s3.wav", "m2_s9.wav"] azimuth: 0 elevation: "-90:0.5:90" e4: name: "G2S4.wav" name: "lp06a5s04" description: "Elevation displacement." source: ["f1_s4.wav", "f1_s10.wav"] azimuth: 60 elevation: "-90:0.5:90" e5: name: "G1S5.wav" name: "lp06a5s05" description: "Elevation displacement." source: ["m1_s5.wav", "m1_s11.wav"] azimuth: 120 elevation: "-90:0.5:90" e6: name: "G6S6.wav" name: "lp06a5s06" description: "Elevation displacement." source: ["f3_s6.wav", "f3_s12.wav"] azimuth: 180 elevation: "-90:0.5:90" f1: name: "G6S1.wav" name: "lp06a6s01" description: "Azimuth and elevation displacement." source: ["f3_s1.wav", "f3_s7.wav"] azimuth: "60:0.5:60+180" elevation: "35:-0.2:-35" f2: name: "G5S2.wav" name: "lp06a6s02" description: "Azimuth and elevation displacement." source: ["m3_s2.wav", "m3_s8.wav"] azimuth: "120:0.5:120+180" elevation: "35:-0.2:-35" f3: name: "G4S3.wav" name: "lp06a6s03" description: "Azimuth and elevation displacement." source: ["f2_s3.wav", "f2_s9.wav"] azimuth: "180:0.5:180+180" elevation: "35:-0.2:-35" f4: name: "G3S4.wav" name: "lp06a6s04" description: "Azimuth and elevation displacement." source: ["m2_s4.wav", "m2_s10.wav"] azimuth: "240:0.5:240+180" elevation: "35:-0.2:-35" f5: name: "G2S5.wav" name: "lp06a6s05" description: "Azimuth and elevation displacement." source: ["f1_s5.wav", "f1_s11.wav"] azimuth: "300:0.5:300+180" elevation: "35:-0.2:-35" f6: name: "G1S6.wav" name: "lp06a6s06" description: "Azimuth and elevation displacement." source: ["m1_s6.wav", "m1_s12.wav"] azimuth: "0:0.5:0+180" Loading item_gen_configs/P800-7.yml +50 −41 Original line number Diff line number Diff line Loading @@ -34,23 +34,32 @@ add_low_level_random_noise: true ### Scene description ################################################ ### Each scene must start with the sceneN tag ### Each scene must start with a unique tag ### Specify the mono source filename (the program will search for it in the input_path folder) ### Specify azimuth and elevation for each input source ### Specify the overlap length in seconds for each input source (negative value creates a gap) ### Note 1: use [val1, val2, ...] for multiple sources in a scene ### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames ### Note 3: we're using right-handed coordinate system with azi = 0 pointing from the nose to the screen ### azimuth: float, [-180,180]; positive indicates left ### elevation: float, [-90,90]; positive indicates up ### distance: float, tbd: default: 1 ### spread: float, [0,360]; spread in angles from 0 ... 360˚ ### gain: float, [0,1] ### Naming convention for P.800 items ### The filenames of the input content samples are represented by: ### leeeayszz.wav ### The filenames of the accompanying metadata files (applicable to metadata-assisted spatial audio, object-based audio) are represented by: ### leeeayszz.met for metadata-assisted spatial audio ### leeeayszz.wav.o.csv for object-based audio ### where: ### l stands for the listening lab designator (a through d according to Table 2) ### eee stands for the experiment designator, e.g. p01 (see Table 1) ### a stands audio, and y is the per experiment category according to IVAS-8a ### s stands for sample and zz is the sample number; 01, 02, 03, 04, 05, 06, 07, where 07 is the preliminary ### o stands for object number; 0, 1, 2, 3 scenes: a1: name: "G1S1.wav" name: "lp07a1s01" description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." source: ["m1_s1.wav", "f1_s1.wav"] azimuth: [0, 50] Loading @@ -58,7 +67,7 @@ scenes: overlap: -1.0 a2: name: "G6S2.wav" name: "lp07a1s02" description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." source: ["f3_s8.wav", "m1_s8.wav"] azimuth: [50, 350] Loading @@ -66,7 +75,7 @@ scenes: overlap: -1.0 a3: name: "G5S3.wav" name: "lp07a1s03" description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." source: ["f2_s9.wav", "m3_s9.wav"] azimuth: [40, 290] Loading @@ -74,7 +83,7 @@ scenes: overlap: -1.0 a4: name: "G4S4.wav" name: "lp07a1s04" description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." source: ["f1_s10.wav", "m2_s10.wav"] azimuth: [30, 230] Loading @@ -82,7 +91,7 @@ scenes: overlap: -1.0 a5: name: "G3S5.wav" name: "lp07a1s05" description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." source: ["m3_s5.wav", "f3_s5.wav"] azimuth: [20, 170] Loading @@ -90,7 +99,7 @@ scenes: overlap: -1.0 a6: name: "G2S6.wav" name: "lp07a1s06" description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." source: ["m2_s6.wav", "f2_s6.wav"] azimuth: [10, 110] Loading @@ -98,7 +107,7 @@ scenes: overlap: -1.0 b1: name: "G2S1.wav" name: "lp07a2s01" description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." source: ["m2_s1.wav", "f2_s1.wav"] azimuth: [20, 170] Loading @@ -106,7 +115,7 @@ scenes: overlap: 1.0 b2: name: "G1S2.wav" name: "lp07a2s02" description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." source: ["m1_s2.wav", "f1_s2.wav"] azimuth: [10, 110] Loading @@ -114,7 +123,7 @@ scenes: overlap: 1.0 b3: name: "G6S3.wav" name: "lp07a2s03" description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." source: ["f3_s9.wav", "m1_s9.wav"] azimuth: [0, 50] Loading @@ -122,7 +131,7 @@ scenes: overlap: 1.0 b4: name: "G5S4.wav" name: "lp07a2s04" description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." source: ["f2_s10.wav", "m3_s10.wav"] azimuth: [50, 350] Loading @@ -130,7 +139,7 @@ scenes: overlap: 1.0 b5: name: "G4S5.wav" name: "lp07a2s05" description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." source: ["f1_s11.wav", "m2_s11.wav"] azimuth: [40, 290] Loading @@ -138,7 +147,7 @@ scenes: overlap: 1.0 b6: name: "G3S6.wav" name: "lp07a2s06" description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." source: ["m3_s6.wav", "f3_s6.wav"] azimuth: [30, 230] Loading @@ -146,7 +155,7 @@ scenes: overlap: 1.0 c1: name: "G3S1.wav" name: "lp07a3s01" description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." source: ["m3_s1.wav", "f3_s1.wav"] azimuth: [40, 290] Loading @@ -154,7 +163,7 @@ scenes: overlap: -1.0 c2: name: "G2S2.wav" name: "lp07a3s02" description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." source: ["m2_s2.wav", "f2_s2.wav"] azimuth: [30, 230] Loading @@ -162,7 +171,7 @@ scenes: overlap: -1.0 c3: name: "G1S3.wav" name: "lp07a3s03" description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." source: ["m1_s3.wav", "f1_s3.wav"] azimuth: [20, 170] Loading @@ -170,7 +179,7 @@ scenes: overlap: -1.0 c4: name: "G6S4.wav" name: "lp07a3s04" description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." source: ["f3_s10.wav", "m1_s10.wav"] azimuth: [10, 110] Loading @@ -178,7 +187,7 @@ scenes: overlap: -1.0 c5: name: "G5S5.wav" name: "lp07a3s05" description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." source: ["f2_s11.wav", "m3_s11.wav"] azimuth: [0, 50] Loading @@ -186,7 +195,7 @@ scenes: overlap: -1.0 c6: name: "G4S6.wav" name: "lp07a3s06" description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." source: ["f1_s12.wav", "m2_s12.wav"] azimuth: [50, 350] Loading @@ -194,7 +203,7 @@ scenes: overlap: -1.0 d1: name: "G4S1.wav" name: "lp07a4s01" description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." source: ["f1_s7.wav", "m2_s7.wav"] azimuth: [50, "180:1:120 + 360"] Loading @@ -202,7 +211,7 @@ scenes: overlap: 1.0 d2: name: "G3S2.wav" name: "lp07a4s02" description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." source: ["m3_s2.wav", "f3_s2.wav"] azimuth: [300, "-70:-1:-10 - 360"] Loading @@ -210,7 +219,7 @@ scenes: overlap: 1.0 d3: name: "G2S3.wav" name: "lp07a4s03" description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." source: ["m2_s3.wav", "f2_s3.wav"] azimuth: [250, "-20:-1:-320"] Loading @@ -218,7 +227,7 @@ scenes: overlap: 1.0 d4: name: "G1S4.wav" name: "lp07a4s04" description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." source: ["m1_s4.wav", "f1_s4.wav"] azimuth: [200, "30:-1:-270"] Loading @@ -226,7 +235,7 @@ scenes: overlap: 1.0 d5: name: "G6S5.wav" name: "lp07a4s05" description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." source: ["f3_s11.wav", "m1_s11.wav"] azimuth: [150, "80:1:20 + 360"] Loading @@ -234,7 +243,7 @@ scenes: overlap: 1.0 d6: name: "G5S6.wav" name: "lp07a4s06" description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." source: ["f2_s12.wav", "m3_s12.wav"] azimuth: [100, "130:1:70 + 360"] Loading @@ -242,7 +251,7 @@ scenes: overlap: 1.0 e1: name: "G5S1.wav" name: "lp07a5s01" description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" source: ["f2_s7.wav", "m3_s7.wav"] azimuth: ["80:1:20 + 360", "80:1:20 + 360"] Loading @@ -250,7 +259,7 @@ scenes: overlap: 1.0 e2: name: "G4S2.wav" name: "lp07a5s02" description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" source: ["f1_s8.wav", "m2_s8.wav"] azimuth: ["130:1:70 + 360", "130:1:70 + 360"] Loading @@ -258,7 +267,7 @@ scenes: overlap: 1.0 e3: name: "G3S3.wav" name: "lp07a5s03" description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" source: ["m3_s3.wav", "f3_s3.wav"] azimuth: ["180:1:120 + 360", "180:1:120 + 360"] Loading @@ -266,7 +275,7 @@ scenes: overlap: 1.0 e4: name: "G2S4.wav" name: "lp07a5s04" description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" source: ["m2_s4.wav", "f2_s4.wav"] azimuth: ["-70:-1:-10 - 360", "-70:-1:-10 - 360"] Loading @@ -274,7 +283,7 @@ scenes: overlap: 1.0 e5: name: "G1S5.wav" name: "lp07a5s05" description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" source: ["m1_s5.wav", "f1_s5.wav"] azimuth: ["-20:-1:-320", "-20:-1:-320"] Loading @@ -282,7 +291,7 @@ scenes: overlap: 1.0 e6: name: "G6S6.wav" name: "lp07a5s06" description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" source: ["f3_s12.wav", "m1_s12.wav"] azimuth: ["30:-1:-270", "30:-1:-270"] Loading @@ -290,7 +299,7 @@ scenes: overlap: 1.0 f1: name: "G6S1.wav" name: "lp07a6s01" description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." source: ["f3_s7.wav", "m1_s7.wav"] azimuth: ["60:1:0 + 360", "60:-1:120 - 360"] Loading @@ -298,7 +307,7 @@ scenes: overlap: -1.0 f2: name: "G5S2.wav" name: "lp07a6s02" description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." source: ["f2_s8.wav", "m3_s8.wav"] azimuth: ["0:1:300", "0:-1:60 - 360"] Loading @@ -306,7 +315,7 @@ scenes: overlap: -1.0 f3: name: "G4S3.wav" name: "lp07a6s03" description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." source: ["f1_s9.wav", "m2_s9.wav"] azimuth: ["300:1:240 + 360", "300:-1:0"] Loading @@ -314,7 +323,7 @@ scenes: overlap: -1.0 f4: name: "G3S4.wav" name: "lp07a6s04" description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." source: ["m3_s4.wav", "f3_s4.wav"] azimuth: ["240:1:180 + 360", "240:-1:-60"] Loading @@ -322,7 +331,7 @@ scenes: overlap: -1.0 f5: name: "G2S5.wav" name: "lp07a6s05" description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." source: ["m2_s5.wav", "f2_s5.wav"] azimuth: ["180:1:120 + 360", "180:-1:-120"] Loading @@ -330,7 +339,7 @@ scenes: overlap: -1.0 f6: name: "G1S6.wav" name: "lp07a6s06" description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." source: ["m1_s6.wav", "f1_s6.wav"] azimuth: ["120:1:60 + 360", "120:-1:180 - 360"] Loading Loading
item_gen_configs/P800-6.yml +50 −40 Original line number Diff line number Diff line Loading @@ -35,7 +35,7 @@ add_low_level_random_noise: true ### Scene description ################################################ ### Each scene must start with the sceneN tag ### Each scene must start with a unique tag ### Specify the mono source filename (the program will search for it in the input_path folder) ### Specify azimuth and elevation for each input source ### Note 1: use [val1, val2, ...] for multiple sources in a scene Loading @@ -44,13 +44,23 @@ add_low_level_random_noise: true ### Note 3: we're using right-handed coordinate system with azi = 0 pointing from the nose to the screen ### azimuth: float, [-180,180]; positive indicates left ### elevation: float, [-90,90]; positive indicates up ### distance: float, tbd: default: 1 ### spread: float, [0,360]; spread in angles from 0 ... 360˚ ### gain: float, [0,1] ### Naming convention for P.800 items ### The filenames of the input content samples are represented by: ### leeeayszz.wav ### The filenames of the accompanying metadata files (applicable to metadata-assisted spatial audio, object-based audio) are represented by: ### leeeayszz.met for metadata-assisted spatial audio ### leeeayszz.wav.o.csv for object-based audio ### where: ### l stands for the listening lab designator (a through d according to Table 2) ### eee stands for the experiment designator, e.g. p01 (see Table 1) ### a stands audio, and y is the per experiment category according to IVAS-8a ### s stands for sample and zz is the sample number; 01, 02, 03, 04, 05, 06, 07, where 07 is the preliminary ### o stands for object number; 0, 1, 2, 3 scenes: a1: name: "G1S1.wav" name: "lp06a1s01" description: "Talker sitting at a table" source: ["m1_s1.wav", "m1_s7.wav"] azimuth: 0 Loading @@ -58,7 +68,7 @@ scenes: overlap: -0.5 a2: name: "G6S2.wav" name: "lp06a1s02" description: "Talker sitting at a table" source: ["f3_s2.wav", "f3_s8.wav"] azimuth: 60 Loading @@ -66,238 +76,238 @@ scenes: overlap: -0.5 a3: name: "G5S3.wav" name: "lp06a1s03" description: "Talker sitting at a table" source: ["m3_s3.wav", "m3_s9.wav"] azimuth: 120 elevation: 0 a4: name: "G4S4.wav" name: "lp06a1s04" description: "Talker sitting at a table" source: ["f2_s4.wav", "f2_s10.wav"] azimuth: 180 elevation: 0 a5: name: "G3S5.wav" name: "lp06a1s05" description: "Talker sitting at a table" source: ["m2_s5.wav", "m2_s11.wav"] azimuth: 240 elevation: 0 a6: name: "G2S6.wav" name: "lp06a1s06" description: "Talker sitting at a table" source: ["f1_s6.wav", "f1_s12.wav"] azimuth: 300 elevation: 0 b1: name: "G2S1.wav" name: "lp06a2s01" description: "standing talker." source: ["f1_s1.wav", "f1_s7.wav"] azimuth: 120 elevation: 35 b2: name: "G1S2.wav" name: "lp06a2s02" description: "standing talker." source: ["m1_s2.wav", "m1_s8.wav"] azimuth: 180 elevation: 35 b3: name: "G6S3.wav" name: "lp06a2s03" description: "standing talker." source: ["f3_s3.wav", "f3_s9.wav"] azimuth: 240 elevation: 35 b4: name: "G5S4.wav" name: "lp06a2s04" description: "standing talker." source: ["m3_s4.wav", "m3_s10.wav"] azimuth: 300 elevation: 35 b5: name: "G4S5.wav" name: "lp06a2s05" description: "standing talker." source: ["f2_s5.wav", "f2_s11.wav"] azimuth: 0 elevation: 35 b6: name: "G3S6.wav" name: "lp06a2s06" description: "standing talker." source: ["m2_s6.wav", "m2_s12.wav"] azimuth: 60 elevation: 35 c1: name: "G3S1.wav" name: "lp06a3s01" description: "Smaller talker (child) walking around a table." source: ["m2_s1.wav", "m2_s7.wav"] azimuth: "0:1:360" elevation: 0 c2: name: "G2S2.wav" name: ""lp06a3s02" description: "Smaller talker (child) walking around a table." source: ["f1_s2.wav", "f1_s8.wav"] azimuth: "60:1:60+360" elevation: 0 c3: name: "G1S3.wav" name: "lp06a3s03" description: "Smaller talker (child) walking around a table." source: ["m1_s3.wav", "m1_s9.wav"] azimuth: "120:1:120+360" elevation: 0 c4: name: "G6S4.wav" name: "lp06a3s04" description: "Smaller talker (child) walking around a table." source: ["f3_s4.wav", "f3_s10.wav"] azimuth: "180:1:180+360" elevation: 0 c5: name: "G5S5.wav" name: "lp06a3s05" description: "Smaller talker (child) walking around a table." source: ["m3_s5.wav", "m3_s11.wav"] azimuth: "240:1:240+360" elevation: 0 c6: name: "G4S6.wav" name: "lp06a3s06" description: "Smaller talker (child) walking around a table." source: ["f2_s6.wav", "f2_s12.wav"] azimuth: "300:1:300+360" elevation: 0 d1: name: "G4S1.wav" name: "lp06a4s01" description: "Talker walking around the table." source: ["f2_s1.wav", "f2_s7.wav"] azimuth: "0:-1:-360" elevation: 35 d2: name: "G3S2.wav" name: "lp06a4s02" description: "Talker walking around the table." source: ["m2_s2.wav", "m2_s8.wav"] azimuth: "60:-1:60-360" elevation: 35 d3: name: "G2S3.wav" name: "lp06a4s03" description: "Talker walking around the table." source: ["f1_s3.wav", "f1_s9.wav"] azimuth: "120:-1:120-360" elevation: 35 d4: name: "G1S4.wav" name: "lp06a4s04" description: "Talker walking around the table." source: ["m1_s4.wav", "m1_s10.wav"] azimuth: "180:-1:180-360" elevation: 35 d5: name: "G6S5.wav" name: "lp06a4s05" description: "Talker walking around the table." source: ["f3_s5.wav", "f3_s11.wav"] azimuth: "240:-1:240-360" elevation: 35 d6: name: "G5S6.wav" name: "lp06a4s06" description: "Talker walking around the table." source: ["m3_s6.wav", "m3_s12.wav"] azimuth: "300:-1:300-360" elevation: 35 e1: name: "G5S1.wav" name: "lp06a5s01" description: "Elevation displacement." source: ["m3_s1.wav", "m3_s7.wav"] azimuth: 240 elevation: "-90:0.5:90" e2: name: "G4S2.wav" name: "lp06a5s02" description: "Elevation displacement." source: ["f2_s2.wav", "f2_s8.wav"] azimuth: 300 elevation: 0 e3: name: "G3S3.wav" name: "lp06a5s03" description: "Elevation displacement." source: ["m2_s3.wav", "m2_s9.wav"] azimuth: 0 elevation: "-90:0.5:90" e4: name: "G2S4.wav" name: "lp06a5s04" description: "Elevation displacement." source: ["f1_s4.wav", "f1_s10.wav"] azimuth: 60 elevation: "-90:0.5:90" e5: name: "G1S5.wav" name: "lp06a5s05" description: "Elevation displacement." source: ["m1_s5.wav", "m1_s11.wav"] azimuth: 120 elevation: "-90:0.5:90" e6: name: "G6S6.wav" name: "lp06a5s06" description: "Elevation displacement." source: ["f3_s6.wav", "f3_s12.wav"] azimuth: 180 elevation: "-90:0.5:90" f1: name: "G6S1.wav" name: "lp06a6s01" description: "Azimuth and elevation displacement." source: ["f3_s1.wav", "f3_s7.wav"] azimuth: "60:0.5:60+180" elevation: "35:-0.2:-35" f2: name: "G5S2.wav" name: "lp06a6s02" description: "Azimuth and elevation displacement." source: ["m3_s2.wav", "m3_s8.wav"] azimuth: "120:0.5:120+180" elevation: "35:-0.2:-35" f3: name: "G4S3.wav" name: "lp06a6s03" description: "Azimuth and elevation displacement." source: ["f2_s3.wav", "f2_s9.wav"] azimuth: "180:0.5:180+180" elevation: "35:-0.2:-35" f4: name: "G3S4.wav" name: "lp06a6s04" description: "Azimuth and elevation displacement." source: ["m2_s4.wav", "m2_s10.wav"] azimuth: "240:0.5:240+180" elevation: "35:-0.2:-35" f5: name: "G2S5.wav" name: "lp06a6s05" description: "Azimuth and elevation displacement." source: ["f1_s5.wav", "f1_s11.wav"] azimuth: "300:0.5:300+180" elevation: "35:-0.2:-35" f6: name: "G1S6.wav" name: "lp06a6s06" description: "Azimuth and elevation displacement." source: ["m1_s6.wav", "m1_s12.wav"] azimuth: "0:0.5:0+180" Loading
item_gen_configs/P800-7.yml +50 −41 Original line number Diff line number Diff line Loading @@ -34,23 +34,32 @@ add_low_level_random_noise: true ### Scene description ################################################ ### Each scene must start with the sceneN tag ### Each scene must start with a unique tag ### Specify the mono source filename (the program will search for it in the input_path folder) ### Specify azimuth and elevation for each input source ### Specify the overlap length in seconds for each input source (negative value creates a gap) ### Note 1: use [val1, val2, ...] for multiple sources in a scene ### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames ### Note 3: we're using right-handed coordinate system with azi = 0 pointing from the nose to the screen ### azimuth: float, [-180,180]; positive indicates left ### elevation: float, [-90,90]; positive indicates up ### distance: float, tbd: default: 1 ### spread: float, [0,360]; spread in angles from 0 ... 360˚ ### gain: float, [0,1] ### Naming convention for P.800 items ### The filenames of the input content samples are represented by: ### leeeayszz.wav ### The filenames of the accompanying metadata files (applicable to metadata-assisted spatial audio, object-based audio) are represented by: ### leeeayszz.met for metadata-assisted spatial audio ### leeeayszz.wav.o.csv for object-based audio ### where: ### l stands for the listening lab designator (a through d according to Table 2) ### eee stands for the experiment designator, e.g. p01 (see Table 1) ### a stands audio, and y is the per experiment category according to IVAS-8a ### s stands for sample and zz is the sample number; 01, 02, 03, 04, 05, 06, 07, where 07 is the preliminary ### o stands for object number; 0, 1, 2, 3 scenes: a1: name: "G1S1.wav" name: "lp07a1s01" description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." source: ["m1_s1.wav", "f1_s1.wav"] azimuth: [0, 50] Loading @@ -58,7 +67,7 @@ scenes: overlap: -1.0 a2: name: "G6S2.wav" name: "lp07a1s02" description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." source: ["f3_s8.wav", "m1_s8.wav"] azimuth: [50, 350] Loading @@ -66,7 +75,7 @@ scenes: overlap: -1.0 a3: name: "G5S3.wav" name: "lp07a1s03" description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." source: ["f2_s9.wav", "m3_s9.wav"] azimuth: [40, 290] Loading @@ -74,7 +83,7 @@ scenes: overlap: -1.0 a4: name: "G4S4.wav" name: "lp07a1s04" description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." source: ["f1_s10.wav", "m2_s10.wav"] azimuth: [30, 230] Loading @@ -82,7 +91,7 @@ scenes: overlap: -1.0 a5: name: "G3S5.wav" name: "lp07a1s05" description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." source: ["m3_s5.wav", "f3_s5.wav"] azimuth: [20, 170] Loading @@ -90,7 +99,7 @@ scenes: overlap: -1.0 a6: name: "G2S6.wav" name: "lp07a1s06" description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." source: ["m2_s6.wav", "f2_s6.wav"] azimuth: [10, 110] Loading @@ -98,7 +107,7 @@ scenes: overlap: -1.0 b1: name: "G2S1.wav" name: "lp07a2s01" description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." source: ["m2_s1.wav", "f2_s1.wav"] azimuth: [20, 170] Loading @@ -106,7 +115,7 @@ scenes: overlap: 1.0 b2: name: "G1S2.wav" name: "lp07a2s02" description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." source: ["m1_s2.wav", "f1_s2.wav"] azimuth: [10, 110] Loading @@ -114,7 +123,7 @@ scenes: overlap: 1.0 b3: name: "G6S3.wav" name: "lp07a2s03" description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." source: ["f3_s9.wav", "m1_s9.wav"] azimuth: [0, 50] Loading @@ -122,7 +131,7 @@ scenes: overlap: 1.0 b4: name: "G5S4.wav" name: "lp07a2s04" description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." source: ["f2_s10.wav", "m3_s10.wav"] azimuth: [50, 350] Loading @@ -130,7 +139,7 @@ scenes: overlap: 1.0 b5: name: "G4S5.wav" name: "lp07a2s05" description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." source: ["f1_s11.wav", "m2_s11.wav"] azimuth: [40, 290] Loading @@ -138,7 +147,7 @@ scenes: overlap: 1.0 b6: name: "G3S6.wav" name: "lp07a2s06" description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." source: ["m3_s6.wav", "f3_s6.wav"] azimuth: [30, 230] Loading @@ -146,7 +155,7 @@ scenes: overlap: 1.0 c1: name: "G3S1.wav" name: "lp07a3s01" description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." source: ["m3_s1.wav", "f3_s1.wav"] azimuth: [40, 290] Loading @@ -154,7 +163,7 @@ scenes: overlap: -1.0 c2: name: "G2S2.wav" name: "lp07a3s02" description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." source: ["m2_s2.wav", "f2_s2.wav"] azimuth: [30, 230] Loading @@ -162,7 +171,7 @@ scenes: overlap: -1.0 c3: name: "G1S3.wav" name: "lp07a3s03" description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." source: ["m1_s3.wav", "f1_s3.wav"] azimuth: [20, 170] Loading @@ -170,7 +179,7 @@ scenes: overlap: -1.0 c4: name: "G6S4.wav" name: "lp07a3s04" description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." source: ["f3_s10.wav", "m1_s10.wav"] azimuth: [10, 110] Loading @@ -178,7 +187,7 @@ scenes: overlap: -1.0 c5: name: "G5S5.wav" name: "lp07a3s05" description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." source: ["f2_s11.wav", "m3_s11.wav"] azimuth: [0, 50] Loading @@ -186,7 +195,7 @@ scenes: overlap: -1.0 c6: name: "G4S6.wav" name: "lp07a3s06" description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." source: ["f1_s12.wav", "m2_s12.wav"] azimuth: [50, 350] Loading @@ -194,7 +203,7 @@ scenes: overlap: -1.0 d1: name: "G4S1.wav" name: "lp07a4s01" description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." source: ["f1_s7.wav", "m2_s7.wav"] azimuth: [50, "180:1:120 + 360"] Loading @@ -202,7 +211,7 @@ scenes: overlap: 1.0 d2: name: "G3S2.wav" name: "lp07a4s02" description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." source: ["m3_s2.wav", "f3_s2.wav"] azimuth: [300, "-70:-1:-10 - 360"] Loading @@ -210,7 +219,7 @@ scenes: overlap: 1.0 d3: name: "G2S3.wav" name: "lp07a4s03" description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." source: ["m2_s3.wav", "f2_s3.wav"] azimuth: [250, "-20:-1:-320"] Loading @@ -218,7 +227,7 @@ scenes: overlap: 1.0 d4: name: "G1S4.wav" name: "lp07a4s04" description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." source: ["m1_s4.wav", "f1_s4.wav"] azimuth: [200, "30:-1:-270"] Loading @@ -226,7 +235,7 @@ scenes: overlap: 1.0 d5: name: "G6S5.wav" name: "lp07a4s05" description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." source: ["f3_s11.wav", "m1_s11.wav"] azimuth: [150, "80:1:20 + 360"] Loading @@ -234,7 +243,7 @@ scenes: overlap: 1.0 d6: name: "G5S6.wav" name: "lp07a4s06" description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." source: ["f2_s12.wav", "m3_s12.wav"] azimuth: [100, "130:1:70 + 360"] Loading @@ -242,7 +251,7 @@ scenes: overlap: 1.0 e1: name: "G5S1.wav" name: "lp07a5s01" description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" source: ["f2_s7.wav", "m3_s7.wav"] azimuth: ["80:1:20 + 360", "80:1:20 + 360"] Loading @@ -250,7 +259,7 @@ scenes: overlap: 1.0 e2: name: "G4S2.wav" name: "lp07a5s02" description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" source: ["f1_s8.wav", "m2_s8.wav"] azimuth: ["130:1:70 + 360", "130:1:70 + 360"] Loading @@ -258,7 +267,7 @@ scenes: overlap: 1.0 e3: name: "G3S3.wav" name: "lp07a5s03" description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" source: ["m3_s3.wav", "f3_s3.wav"] azimuth: ["180:1:120 + 360", "180:1:120 + 360"] Loading @@ -266,7 +275,7 @@ scenes: overlap: 1.0 e4: name: "G2S4.wav" name: "lp07a5s04" description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" source: ["m2_s4.wav", "f2_s4.wav"] azimuth: ["-70:-1:-10 - 360", "-70:-1:-10 - 360"] Loading @@ -274,7 +283,7 @@ scenes: overlap: 1.0 e5: name: "G1S5.wav" name: "lp07a5s05" description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" source: ["m1_s5.wav", "f1_s5.wav"] azimuth: ["-20:-1:-320", "-20:-1:-320"] Loading @@ -282,7 +291,7 @@ scenes: overlap: 1.0 e6: name: "G6S6.wav" name: "lp07a5s06" description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" source: ["f3_s12.wav", "m1_s12.wav"] azimuth: ["30:-1:-270", "30:-1:-270"] Loading @@ -290,7 +299,7 @@ scenes: overlap: 1.0 f1: name: "G6S1.wav" name: "lp07a6s01" description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." source: ["f3_s7.wav", "m1_s7.wav"] azimuth: ["60:1:0 + 360", "60:-1:120 - 360"] Loading @@ -298,7 +307,7 @@ scenes: overlap: -1.0 f2: name: "G5S2.wav" name: "lp07a6s02" description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." source: ["f2_s8.wav", "m3_s8.wav"] azimuth: ["0:1:300", "0:-1:60 - 360"] Loading @@ -306,7 +315,7 @@ scenes: overlap: -1.0 f3: name: "G4S3.wav" name: "lp07a6s03" description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." source: ["f1_s9.wav", "m2_s9.wav"] azimuth: ["300:1:240 + 360", "300:-1:0"] Loading @@ -314,7 +323,7 @@ scenes: overlap: -1.0 f4: name: "G3S4.wav" name: "lp07a6s04" description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." source: ["m3_s4.wav", "f3_s4.wav"] azimuth: ["240:1:180 + 360", "240:-1:-60"] Loading @@ -322,7 +331,7 @@ scenes: overlap: -1.0 f5: name: "G2S5.wav" name: "lp07a6s05" description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." source: ["m2_s5.wav", "f2_s5.wav"] azimuth: ["180:1:120 + 360", "180:-1:-120"] Loading @@ -330,7 +339,7 @@ scenes: overlap: -1.0 f6: name: "G1S6.wav" name: "lp07a6s06" description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." source: ["m1_s6.wav", "f1_s6.wav"] azimuth: ["120:1:60 + 360", "120:-1:180 - 360"] Loading