From c9fbdb5db4a291e323d8fec6c61cdb3ddaeed7a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomas=20Toftg=C3=A5rd?= Date: Fri, 9 Jun 2023 18:48:19 +0200 Subject: [PATCH 1/9] Align with test plan - correct elevation for some scenes - correct assignment of categories (talker/talker pair) - correct the source files for preliminaries, to correspond to test plan categories - metadata to cover the whole samples --- .../P800-6/config/item_gen_P800-6.yml | 256 +++++++++--------- .../P800-7/config/item_gen_P800-7.yml | 256 +++++++++--------- .../generation/process_ism1_items.py | 109 +++----- .../generation/process_ism2_items.py | 203 ++++++-------- 4 files changed, 379 insertions(+), 445 deletions(-) diff --git a/experiments/selection/P800-6/config/item_gen_P800-6.yml b/experiments/selection/P800-6/config/item_gen_P800-6.yml index 1cddf5bc..844032e5 100644 --- a/experiments/selection/P800-6/config/item_gen_P800-6.yml +++ b/experiments/selection/P800-6/config/item_gen_P800-6.yml @@ -31,8 +31,8 @@ postamble: 1.0 add_low_level_random_noise: true ### File designators, default is "l" for listening lab, "EN" for language, "p06" for exp and "g" for provider -listening_lab: "l" -language: "EN" +listening_lab: "a" +language: "JP" exp: "p06" provider: "g" @@ -86,339 +86,339 @@ use_output_prefix: "leee" scenes: - cat1_1: - name: "cat1/a1s01.wav" + a1: + name: "a1s01" description: "Talker sitting at a table" source: ["m1s01.wav", "m1s07.wav"] azimuth: 0 elevation: 0 - overlap: -1.0 + overlap: -0.5 - cat1_2: - name: "cat1/a1s02.wav" + a2: + name: "a6s02" description: "Talker sitting at a table" source: ["f3s02.wav", "f3s08.wav"] azimuth: 60 elevation: 0 - overlap: -1.0 + overlap: -0.5 - cat1_3: - name: "cat1/a1s03.wav" + a3: + name: "a5s03" description: "Talker sitting at a table" source: ["m3s03.wav", "m3s09.wav"] azimuth: 120 elevation: 0 - overlap: -1.0 + overlap: -0.5 - cat1_4: - name: "cat1/a1s04.wav" + a4: + name: "a4s04" description: "Talker sitting at a table" source: ["f2s04.wav", "f2s10.wav"] azimuth: 180 elevation: 0 - overlap: -1.0 + overlap: -0.5 - cat1_5: - name: "cat1/a1s05.wav" + a5: + name: "a3s05" description: "Talker sitting at a table" source: ["m2s05.wav", "m2s11.wav"] azimuth: 240 elevation: 0 - overlap: -1.0 + overlap: -0.5 - cat1_6: - name: "cat1/a1s06.wav" + a6: + name: "a2s06" description: "Talker sitting at a table" source: ["f1s06.wav", "f1s12.wav"] azimuth: 300 elevation: 0 - overlap: -1.0 + overlap: -0.5 - cat1_7: - name: "cat1/a1s07.wav" + a7: + name: "a2s07" description: "Preliminary: Talker sitting at a table" source: ["f1s13.wav", "f1s14.wav"] azimuth: 0 elevation: 0 - overlap: -1.0 + overlap: -0.5 - cat2_1: - name: "cat2/a2s01.wav" + b1: + name: "a2s01" description: "Standing talker." source: ["f1s01.wav", "f1s07.wav"] azimuth: 120 elevation: 35 - overlap: -1.0 + overlap: -0.5 - cat2_2: - name: "cat2/a2s02.wav" + b2: + name: "a1s02" description: "Standing talker." source: ["m1s02.wav", "m1s08.wav"] azimuth: 180 elevation: 35 - overlap: -1.0 + overlap: -0.5 - cat2_3: - name: "cat2/a2s03.wav" + b3: + name: "a6s03" description: "Standing talker." source: ["f3s03.wav", "f3s09.wav"] azimuth: 240 elevation: 35 - overlap: -1.0 + overlap: -0.5 - cat2_4: - name: "cat2/a2s04.wav" + b4: + name: "a5s04" description: "Standing talker." source: ["m3s04.wav", "m3s10.wav"] azimuth: 300 elevation: 35 - overlap: -1.0 + overlap: -0.5 - cat2_5: - name: "cat2/a2s05.wav" + b5: + name: "a4s05" description: "Standing talker." source: ["f2s05.wav", "f2s11.wav"] azimuth: 0 elevation: 35 - overlap: -1.0 + overlap: -0.5 - cat2_6: - name: "cat2/a2s06.wav" + b6: + name: "a3s06" description: "Standing talker." source: ["m2s06.wav", "m2s12.wav"] azimuth: 60 elevation: 35 - overlap: -1.0 + overlap: -0.5 - cat2_7: - name: "cat2/a2s07.wav" + b7: + name: "a1s07" description: "Preliminary: Standing talker." source: ["m1s13.wav", "m1s14.wav"] azimuth: 180 elevation: 35 - overlap: -1.0 + overlap: -0.5 - cat3_1: - name: "cat3/a3s01.wav" + c1: + name: "a3s01" description: "Smaller talker (child) walking around a table." source: ["m2s01.wav", "m2s07.wav"] azimuth: "0:1:360" elevation: 0 - overlap: -1.0 + overlap: -0.5 - cat3_2: - name: "cat3/a3s02.wav" + c2: + name: "a2s02" description: "Smaller talker (child) walking around a table." source: ["f1s02.wav", "f1s08.wav"] azimuth: "60:1:60+360" elevation: 0 - overlap: -1.0 + overlap: -0.5 - cat3_3: - name: "cat3/a3s03.wav" + c3: + name: "a1s03" description: "Smaller talker (child) walking around a table." source: ["m1s03.wav", "m1s09.wav"] azimuth: "120:1:120+360" elevation: 0 - overlap: -1.0 + overlap: -0.5 - cat3_4: - name: "cat3/a3s04.wav" + c4: + name: "a6s04" description: "Smaller talker (child) walking around a table." source: ["f3s04.wav", "f3s10.wav"] azimuth: "180:1:180+360" elevation: 0 - overlap: -1.0 + overlap: -0.5 - cat3_5: - name: "cat3/a3s05.wav" + c5: + name: "a5s05" description: "Smaller talker (child) walking around a table." source: ["m3s05.wav", "m3s11.wav"] azimuth: "240:1:240+360" elevation: 0 - overlap: -1.0 + overlap: -0.5 - cat3_6: - name: "cat3/a3s06.wav" + c6: + name: "a4s06" description: "Smaller talker (child) walking around a table." source: ["f2s06.wav", "f2s12.wav"] azimuth: "300:1:300+360" elevation: 0 - overlap: -1.0 + overlap: -0.5 - cat3_7: - name: "cat3/a3s07.wav" + c7: + name: "a4s07" description: "Preliminary: Smaller talker (child) walking around a table." source: ["f2s13.wav", "f2s14.wav"] azimuth: "120:1:120+360" elevation: 0 - overlap: -1.0 + overlap: -0.5 - cat4_1: - name: "cat4/a4s01.wav" + d1: + name: "a4s01" description: "Talker walking around the table." source: ["f2s01.wav", "f2s07.wav"] azimuth: "0:-1:-360" elevation: 35 - overlap: -1.0 + overlap: -0.5 - cat4_2: - name: "cat4/a4s02.wav" + d2: + name: "a3s02" description: "Talker walking around the table." source: ["m2s02.wav", "m2s08.wav"] azimuth: "60:-1:60-360" elevation: 35 - overlap: -1.0 + overlap: -0.5 - cat4_3: - name: "cat4/a4s03.wav" + d3: + name: "a2s03" description: "Talker walking around the table." source: ["f1s03.wav", "f1s09.wav"] azimuth: "120:-1:120-360" elevation: 35 - overlap: -1.0 + overlap: -0.5 - cat4_4: - name: "cat4/a4s04.wav" + d4: + name: "a1s04" description: "Talker walking around the table." source: ["m1s04.wav", "m1s10.wav"] azimuth: "180:-1:180-360" elevation: 35 - overlap: -1.0 + overlap: -0.5 - cat4_5: - name: "cat4/a4s05.wav" + d5: + name: "a6s05" description: "Talker walking around the table." source: ["f3s05.wav", "f3s11.wav"] azimuth: "240:-1:240-360" elevation: 35 - overlap: -1.0 + overlap: -0.5 - cat4_6: - name: "cat4/a4s06.wav" + d6: + name: "a5s06" description: "Talker walking around the table." source: ["m3s06.wav", "m3s12.wav"] azimuth: "300:-1:300-360" elevation: 35 - overlap: -1.0 + overlap: -0.5 - cat4_7: - name: "cat4/a4s07.wav" + d7: + name: "a3s07" description: "Preliminary: Talker walking around the table." source: ["m2s13.wav", "m2s14.wav"] azimuth: "180:-1:180-360" elevation: 35 - overlap: -1.0 + overlap: -0.5 - cat5_1: - name: "cat5/a5s01.wav" + e1: + name: "a5s01" description: "Elevation displacement." source: ["m3s01.wav", "m3s07.wav"] azimuth: 240 elevation: "-90:0.5:90" - overlap: -1.0 + overlap: -0.5 - cat5_2: - name: "cat5/a5s02.wav" + e2: + name: "a4s02" description: "Elevation displacement." source: ["f2s02.wav", "f2s08.wav"] azimuth: 300 elevation: 0 - overlap: -1.0 + overlap: -0.5 - cat5_3: - name: "cat5/a5s03.wav" + e3: + name: "a3s03" description: "Elevation displacement." source: ["m2s03.wav", "m2s09.wav"] azimuth: 0 elevation: "-90:0.5:90" - overlap: -1.0 + overlap: -0.5 - cat5_4: - name: "cat5/a5s04.wav" + e4: + name: "a2s04" description: "Elevation displacement." source: ["f1s04.wav", "f1s10.wav"] azimuth: 60 elevation: "-90:0.5:90" - overlap: -1.0 + overlap: -0.5 - cat5_5: - name: "cat5/a5s05.wav" + e5: + name: "a1s05" description: "Elevation displacement." source: ["m1s05.wav", "m1s11.wav"] azimuth: 120 elevation: "-90:0.5:90" - overlap: -1.0 + overlap: -0.5 - cat5_6: - name: "cat5/a5s06.wav" + e6: + name: "a6s06" description: "Elevation displacement." source: ["f3s06.wav", "f3s12.wav"] azimuth: 180 elevation: "-90:0.5:90" - overlap: -1.0 + overlap: -0.5 - cat5_7: - name: "cat5/a5s07.wav" + e7: + name: "a6s07" description: "Preliminary: Elevation displacement." source: ["f3s13.wav", "f3s14.wav"] azimuth: 120 elevation: "-90:0.5:90" - overlap: -1.0 + overlap: -0.5 - cat6_1: - name: "cat6/a6s01.wav" + f1: + name: "a6s01" description: "Azimuth and elevation displacement." source: ["f3s01.wav", "f3s07.wav"] azimuth: "60:0.5:60+180" elevation: "35:-0.2:-35" - overlap: -1.0 + overlap: -0.5 - cat6_2: - name: "cat6/a6s02.wav" + f2: + name: "a5s02" description: "Azimuth and elevation displacement." source: ["m3s02.wav", "m3s08.wav"] azimuth: "120:0.5:120+180" elevation: "35:-0.2:-35" - overlap: -1.0 + overlap: -0.5 - cat6_3: - name: "cat6/a6s03.wav" + f3: + name: "a4s03" description: "Azimuth and elevation displacement." source: ["f2s03.wav", "f2s09.wav"] azimuth: "180:0.5:180+180" elevation: "35:-0.2:-35" - overlap: -1.0 + overlap: -0.5 - cat6_4: - name: "cat6/a6s04.wav" + f4: + name: "a3s04" description: "Azimuth and elevation displacement." source: ["m2s04.wav", "m2s10.wav"] azimuth: "240:0.5:240+180" elevation: "35:-0.2:-35" - overlap: -1.0 + overlap: -0.5 - cat6_5: - name: "cat6/a6s05.wav" + f5: + name: "a2s05" description: "Azimuth and elevation displacement." source: ["f1s05.wav", "f1s11.wav"] azimuth: "300:0.5:300+180" elevation: "35:-0.2:-35" - overlap: -1.0 + overlap: -0.5 - cat6_6: - name: "cat6/a6s06.wav" + f6: + name: "a1s06" description: "Azimuth and elevation displacement." source: ["m1s06.wav", "m1s12.wav"] azimuth: "0:0.5:0+180" elevation: "35:-0.2:-35" - overlap: -1.0 + overlap: -0.5 - cat6_7: - name: "cat6/a6s07.wav" + f7: + name: "a5s07" description: "Preliminary: Azimuth and elevation displacement." source: ["m3s13.wav", "m3s14.wav"] azimuth: "0:0.5:0+180" elevation: "35:-0.2:-35" - overlap: -1.0 + overlap: -0.5 \ No newline at end of file diff --git a/experiments/selection/P800-7/config/item_gen_P800-7.yml b/experiments/selection/P800-7/config/item_gen_P800-7.yml index 6a5f7a2e..54a8695e 100644 --- a/experiments/selection/P800-7/config/item_gen_P800-7.yml +++ b/experiments/selection/P800-7/config/item_gen_P800-7.yml @@ -31,8 +31,8 @@ postamble: 1.0 add_low_level_random_noise: true ### File designators, default is "l" for listening lab, "EN" for language, "p07" for exp and "g" for provider -listening_lab: "l" -language: "EN" +listening_lab: "a" +language: "DK" exp: "p07" provider: "g" @@ -85,339 +85,339 @@ use_output_prefix: "leee" ### o stands for the object number; 0, 1, 2, 3 scenes: - cat1_1: - name: "cat1/a1s01.wav" + a1: + name: "a1s01" description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." source: ["m1s01.wav", "f1s01.wav"] azimuth: [0, 50] elevation: [0, 0] overlap: -1.0 - cat1_2: - name: "cat1/a1s02.wav" + a2: + name: "a6s02" description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." source: ["f3s08.wav", "m1s08.wav"] azimuth: [50, 350] elevation: [0, 0] overlap: -1.0 - cat1_3: - name: "cat1/a1s03.wav" + a3: + name: "a5s03" description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." source: ["f2s09.wav", "m3s09.wav"] azimuth: [40, 290] elevation: [0, 0] overlap: -1.0 - cat1_4: - name: "cat1/a1s04.wav" + a4: + name: "a4s04" description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." source: ["f1s10.wav", "m2s10.wav"] azimuth: [30, 230] - elevation: [15, 15] + elevation: [0, 0] overlap: -1.0 - cat1_5: - name: "cat1/a1s05.wav" + a5: + name: "a3s05" description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." source: ["m3s05.wav", "f3s05.wav"] azimuth: [20, 170] - elevation: [15, 15] + elevation: [0, 0] overlap: -1.0 - cat1_6: - name: "cat1/a1s06.wav" + a6: + name: "a2s06" description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." source: ["m2s06.wav", "f2s06.wav"] azimuth: [10, 110] - elevation: [15, 15] + elevation: [0, 0] overlap: -1.0 - cat1_7: - name: "cat1/a1s07.wav" + a7: + name: "a1s07" description: "Preliminary: Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." source: ["m1s13.wav", "f1s13.wav"] azimuth: [0, 50] elevation: [0, 0] overlap: -1.0 - cat2_1: - name: "cat2/a2s01.wav" + b1: + name: "a2s01" description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." source: ["m2s01.wav", "f2s01.wav"] azimuth: [20, 170] - elevation: [30, 30] + elevation: [35, 35] overlap: 1.0 - cat2_2: - name: "cat2/a2s02.wav" + b2: + name: "a1s02" description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." source: ["m1s02.wav", "f1s02.wav"] azimuth: [10, 110] - elevation: [30, 30] + elevation: [35, 35] overlap: 1.0 - cat2_3: - name: "cat2/a2s03.wav" + b3: + name: "a6s03" description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." source: ["f3s09.wav", "m1s09.wav"] azimuth: [0, 50] - elevation: [30, 30] + elevation: [35, 35] overlap: 1.0 - cat2_4: - name: "cat2/a2s04.wav" + b4: + name: "a5s04" description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." source: ["f2s10.wav", "m3s10.wav"] azimuth: [50, 350] - elevation: [60, 60] + elevation: [35, 35] overlap: 1.0 - cat2_5: - name: "cat2/a2s05.wav" + b5: + name: "a4s05" description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." source: ["f1s11.wav", "m2s11.wav"] azimuth: [40, 290] - elevation: [60, 60] + elevation: [35, 35] overlap: 1.0 - cat2_6: - name: "cat2/a2s06.wav" + b6: + name: "a3s06" description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." source: ["m3s06.wav", "f3s06.wav"] azimuth: [30, 230] - elevation: [60, 60] + elevation: [35, 35] overlap: 1.0 - cat2_7: - name: "cat2/a2s07.wav" + b7: + name: "a2s07" description: "Preliminary: Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." - source: ["f2s13.wav", "m2s13.wav"] + source: ["m2s13.wav", "f2s13.wav"] azimuth: [10, 110] - elevation: [30, 30] + elevation: [35, 35] overlap: 1.0 - cat3_1: - name: "cat3/a3s01.wav" + c1: + name: "a3s01" description: "One talker sitting at a table, second talker standing beside the table, non-overlapping utterances." source: ["m3s01.wav", "f3s01.wav"] azimuth: [40, 290] - elevation: [0, 60] + elevation: [0, 45] overlap: -1.0 - cat3_2: - name: "cat3/a3s02.wav" + c2: + name: "a2s02" description: "One talker sitting at a table, second talker standing beside the table, non-overlapping utterances." source: ["m2s02.wav", "f2s02.wav"] azimuth: [30, 230] - elevation: [0, 60] + elevation: [0, 45] overlap: -1.0 - cat3_3: - name: "cat3/a3s03.wav" + c3: + name: "a1s03" description: "One talker sitting at a table, second talker standing beside the table, non-overlapping utterances." source: ["m1s03.wav", "f1s03.wav"] azimuth: [20, 170] - elevation: [0, 60] + elevation: [0, 45] overlap: -1.0 - cat3_4: - name: "cat3/a3s04.wav" + c4: + name: "a6s04" description: "One talker sitting at a table, second talker standing beside the table, non-overlapping utterances." source: ["f3s10.wav", "m1s10.wav"] azimuth: [10, 110] - elevation: [0, 60] + elevation: [0, 45] overlap: -1.0 - cat3_5: - name: "cat3/a3s05.wav" + c5: + name: "a5s05" description: "One talker sitting at a table, second talker standing beside the table, non-overlapping utterances." source: ["f2s11.wav", "m3s11.wav"] azimuth: [0, 50] - elevation: [0, 60] + elevation: [0, 45] overlap: -1.0 - cat3_6: - name: "cat3/a3s06.wav" + c6: + name: "a4s06" description: "One talker sitting at a table, second talker standing beside the table, non-overlapping utterances." source: ["f1s12.wav", "m2s12.wav"] azimuth: [50, 350] elevation: [0, 60] overlap: -1.0 - cat3_7: - name: "cat3/a3s07.wav" + c7: + name: "a3s07" description: "Preliminary: One talker sitting at a table, second talker standing beside the table, non-overlapping utterances." source: ["m3s13.wav", "f3s13.wav"] azimuth: [20, 170] elevation: [0, 60] overlap: -1.0 - cat4_1: - name: "cat4/a4s01.wav" + d1: + name: "a4s01" description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." source: ["f1s07.wav", "m2s07.wav"] azimuth: [50, "180:1:120 + 360"] - elevation: [0, 60] - overlap: 1.0 + elevation: [0, 45] + overlap: 1.0 - cat4_2: - name: "cat4/a4s02.wav" + d2: + name: "a3s02" description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." source: ["m3s02.wav", "f3s02.wav"] azimuth: [300, "-70:-1:-10 - 360"] - elevation: [0, 60] + elevation: [0, 45] overlap: 1.0 - cat4_3: - name: "cat4/a4s03.wav" + d3: + name: "a2s03" description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." source: ["m2s03.wav", "f2s03.wav"] azimuth: [250, "-20:-1:-320"] - elevation: [0, 60] + elevation: [0, 45] overlap: 1.0 - cat4_4: - name: "cat4/a4s04.wav" + d4: + name: "a1s04" description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." source: ["m1s04.wav", "f1s04.wav"] azimuth: [200, "30:-1:-270"] - elevation: [0, 60] + elevation: [0, 45] overlap: 1.0 - cat4_5: - name: "cat4/a4s05.wav" + d5: + name: "a6s05" description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." source: ["f3s11.wav", "m1s11.wav"] azimuth: [150, "80:1:20 + 360"] - elevation: [0, 60] + elevation: [0, 45] overlap: 1.0 - cat4_6: - name: "cat4/a4s06.wav" + d6: + name: "a5s06" description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." source: ["f2s12.wav", "m3s12.wav"] azimuth: [100, "130:1:70 + 360"] - elevation: [0, 60] + elevation: [0, 45] overlap: 1.0 - cat4_7: - name: "cat4/a4s07.wav" + d7: + name: "a4s07" description: "Preliminary: One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." - source: ["f1s14.wav", "m1s14.wav"] + source: ["f1s14.wav", "m2s14.wav"] azimuth: [200, "30:-1:-270"] - elevation: [0, 60] + elevation: [0, 45] overlap: 1.0 - cat5_1: - name: "cat5/a5s01.wav" + e1: + name: "a5s01" description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" source: ["f2s07.wav", "m3s07.wav"] azimuth: ["80:1:20 + 360", "80:1:20 + 360"] - elevation: [10, 60] + elevation: [45, 45] overlap: 1.0 - cat5_2: - name: "cat5/a5s02.wav" + e2: + name: "a4s02" description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" source: ["f1s08.wav", "m2s08.wav"] azimuth: ["130:1:70 + 360", "130:1:70 + 360"] - elevation: [10, 60] + elevation: [45, 45] overlap: 1.0 - cat5_3: - name: "cat5/a5s03.wav" + e3: + name: "a3s03" description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" source: ["m3s03.wav", "f3s03.wav"] azimuth: ["180:1:120 + 360", "180:1:120 + 360"] - elevation: [10, 60] + elevation: [45, 45] overlap: 1.0 - cat5_4: - name: "cat5/a5s04.wav" + e4: + name: "a2s04" description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" source: ["m2s04.wav", "f2s04.wav"] azimuth: ["-70:-1:-10 - 360", "-70:-1:-10 - 360"] - elevation: [10, 60] + elevation: [45, 45] overlap: 1.0 - cat5_5: - name: "cat5/a5s05.wav" + e5: + name: "a1s05" description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" source: ["m1s05.wav", "f1s05.wav"] azimuth: ["-20:-1:-320", "-20:-1:-320"] - elevation: [10, 60] + elevation: [45, 45] overlap: 1.0 - cat5_6: - name: "cat5/a5s06.wav" + e6: + name: "a6s06" description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" source: ["f3s12.wav", "m1s12.wav"] azimuth: ["30:-1:-270", "30:-1:-270"] - elevation: [10, 60] + elevation: [45, 45] overlap: 1.0 - cat5_7: - name: "cat5/a5s07.wav" + e7: + name: "a5s07" description: "Preliminary: Two talkers walking side-by-side around the table, ~30% overlapping utterances" - source: ["m2s14.wav", "f2s14.wav"] + source: ["f2s14.wav", "m3s14.wav"] azimuth: ["-20:-1:-320", "-20:-1:-320"] - elevation: [10, 60] + elevation: [45, 45] overlap: 1.0 - cat6_1: - name: "cat6/a6s01.wav" + f1: + name: "a6s01" description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." source: ["f3s07.wav", "m1s07.wav"] azimuth: ["60:1:0 + 360", "60:-1:120 - 360"] - elevation: [20, 50] - overlap: -1.0 + elevation: [30, 30] + overlap: -1.0 - cat6_2: - name: "cat6/a6s02.wav" + f2: + name: "a5s02" description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." source: ["f2s08.wav", "m3s08.wav"] azimuth: ["0:1:300", "0:-1:60 - 360"] - elevation: [20, 50] + elevation: [30, 30] overlap: -1.0 - cat6_3: - name: "cat6/a6s03.wav" + f3: + name: "a4s03" description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." source: ["f1s09.wav", "m2s09.wav"] azimuth: ["300:1:240 + 360", "300:-1:0"] - elevation: [20, 50] + elevation: [30, 30] overlap: -1.0 - cat6_4: - name: "cat6/a6s04.wav" + f4: + name: "a3s04" description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." source: ["m3s04.wav", "f3s04.wav"] azimuth: ["240:1:180 + 360", "240:-1:-60"] - elevation: [20, 50] + elevation: [30, 30] overlap: -1.0 - cat6_5: - name: "cat6/a6s05.wav" + f5: + name: "a2s05" description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." source: ["m2s05.wav", "f2s05.wav"] azimuth: ["180:1:120 + 360", "180:-1:-120"] - elevation: [20, 50] + elevation: [30, 30] overlap: -1.0 - cat6_6: - name: "cat6/a6s06.wav" + f6: + name: "a1s06" description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." source: ["m1s06.wav", "f1s06.wav"] azimuth: ["120:1:60 + 360", "120:-1:180 - 360"] - elevation: [20, 50] + elevation: [30, 30] overlap: -1.0 - cat6_7: - name: "cat6/a6s07.wav" + f7: + name: "a6s07" description: "Preliminary: Two talkers walking around the table in opposite directions, non-overlapping utterances." - source: ["f3s14.wav", "m3s14.wav"] + source: ["f3s14.wav", "m1s14.wav"] azimuth: ["120:1:60 + 360", "120:-1:180 - 360"] - elevation: [20, 50] + elevation: [30, 30] overlap: -1.0 \ No newline at end of file diff --git a/ivas_processing_scripts/generation/process_ism1_items.py b/ivas_processing_scripts/generation/process_ism1_items.py index f6d14b1c..55791a19 100644 --- a/ivas_processing_scripts/generation/process_ism1_items.py +++ b/ivas_processing_scripts/generation/process_ism1_items.py @@ -234,6 +234,33 @@ def generate_ism1_scene( y.audio.resize(x.audio.shape, refcheck=False) y.audio += x.audio + # append pre-amble and post-amble to all sources + if cfg.preamble != 0.0: + # ensure that pre-amble is a multiple of 20ms + N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs) + + # insert all-zero preamble to all sources + pre = np.zeros((N_pre, y.audio.shape[1])) + y.audio = np.concatenate([pre, y.audio]) + + if cfg.postamble != 0.0: + # ensure that post-amble is a multiple of 20ms + N_post = int(floor(cfg.postamble * 50) / 50 * y.fs) + + # append all-zero postamble to all sources + post = np.zeros((N_post, y.audio.shape[1])) + y.audio = np.concatenate([y.audio, post]) + + # add random noise + if cfg.add_low_level_random_noise: + # create uniformly distributed noise between -4 and 4 + np.random.seed(SEED_RANDOM_NOISE) + noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") + + # superimpose + y.audio += noise + + # process azimuth and elevation source_azi = scene["azimuth"] source_ele = scene["elevation"] @@ -244,22 +271,18 @@ def generate_ism1_scene( if isinstance(source_azi, str): if ":" in source_azi: source_azi = source_azi.split(":") - azi = np.arange( + azi = np.linspace( float(eval(source_azi[0])), float(eval(source_azi[2])), - float(eval(source_azi[1])), + N_frames ) else: - azi = np.array(float(eval(source_azi)), ndmin=1)[:N_frames] + azi = np.array(float(eval(source_azi)), ndmin=1) else: azi = np.array(source_azi, ndmin=1)[:N_frames] - - # ensure that azimuth array has N_frames values - if len(azi) > N_frames: - # cut the array of azimuth values - azi = azi[:N_frames] - elif len(azi) < N_frames: - # replicate the last azimuth value + + if len(azi) < N_frames: + # replicate the last elevation azi = np.append(azi, np.full(N_frames - len(azi), azi[-1])) # convert azimuth from 0 .. 360 to -180 .. +180 @@ -275,21 +298,17 @@ def generate_ism1_scene( if isinstance(source_ele, str): if ":" in source_ele: source_ele = source_ele.split(":") - ele = np.arange( + ele = np.linspace( float(eval(source_ele[0])), float(eval(source_ele[2])), - float(eval(source_ele[1])), + N_frames ) else: - ele = np.array(float(eval(source_ele)), ndmin=1)[:N_frames] + ele = np.array(float(eval(source_ele)), ndmin=1) else: ele = np.array(source_ele, ndmin=1)[:N_frames] - - # ensure that elevation array has N_frames values - if len(ele) > N_frames: - # cut the array of elevation values - ele = ele[:N_frames] - elif len(ele) < N_frames: + + if len(ele) < N_frames: # replicate the last elevation ele = np.append(ele, np.full(N_frames - len(ele), ele[-1])) @@ -298,57 +317,15 @@ def generate_ism1_scene( logger.error( f"Incorrect value(s) of elevation: {ele[(ele > 90) | (ele < -90)]}" ) - - # additional metadata (default values) - radius = np.ones(N_frames) - spread = np.zeros(N_frames) - gain = np.ones(N_frames) - # arrange all metadata fields column-wise into a matrix - y_meta = np.column_stack((azi, ele, radius, spread, gain)) - - # append pre-amble and post-amble to all sources - if cfg.preamble != 0.0: - # ensure that pre-amble is a multiple of 20ms - N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs) - - # insert all-zero preamble to all sources - pre = np.zeros((N_pre, y.audio.shape[1])) - y.audio = np.concatenate([pre, y.audio]) - - # insert neutral position as a pre-amble to all sources - N_pre = int(N_pre / frame_len) - pre = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (N_pre, 1)) - y_meta = np.concatenate([pre, y_meta], axis=0) - - if cfg.postamble != 0.0: - # ensure that post-amble is a multiple of 20ms - N_post = int(floor(cfg.postamble * 50) / 50 * y.fs) - - # append all-zero postamble to all sources - post = np.zeros((N_post, y.audio.shape[1])) - y.audio = np.concatenate([y.audio, post]) - - # append neutral position as a post-amble to all sources - N_post = int(N_post / frame_len) - post = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (N_post, 1)) - y_meta = np.concatenate([y_meta, post], axis=0) - - # add random noise - if cfg.add_low_level_random_noise: - # create uniformly distributed noise between -4 and 4 - np.random.seed(SEED_RANDOM_NOISE) - noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") - - # superimpose - y.audio += noise + y_meta = np.column_stack((azi, ele)) # write ISM audio stream to the output file audiofile.write( os.path.join( cfg.output_path, - os.path.dirname(scene["name"]), - cfg.use_output_prefix + os.path.basename(scene["name"]), + "cat"+scene["name"][1], + cfg.use_output_prefix + os.path.basename(scene["name"]+".wav"), ), y.audio, y.fs, @@ -357,8 +334,8 @@ def generate_ism1_scene( # write ISM metadata to the output file in .0.csv format csv_filename = os.path.join( cfg.output_path, - os.path.dirname(scene["name"]), - cfg.use_output_prefix + os.path.basename(scene["name"]) + ".0.csv", + "cat"+scene["name"][1], + cfg.use_output_prefix + os.path.basename(scene["name"]) + ".wav.0.csv", ) with open( diff --git a/ivas_processing_scripts/generation/process_ism2_items.py b/ivas_processing_scripts/generation/process_ism2_items.py index c1c09645..4a1c100a 100644 --- a/ivas_processing_scripts/generation/process_ism2_items.py +++ b/ivas_processing_scripts/generation/process_ism2_items.py @@ -215,73 +215,6 @@ def generate_ism2_scene( _, scale_factor, _ = get_loudness(x, cfg.loudness, "MONO") x.audio *= scale_factor - # read azimuth information and create array - if isinstance(source_azi, str): - if ":" in source_azi: - source_azi = source_azi.split(":") - azi = np.arange( - float(eval(source_azi[0])), - float(eval(source_azi[2])), - float(eval(source_azi[1])), - ) - else: - azi = np.array(float(eval(source_azi)), ndmin=1)[:N_frames] - else: - azi = np.array(source_azi, ndmin=1)[:N_frames] - - # ensure that azimuth array has N_frames values - if len(azi) > N_frames: - # cut the array of azimuth values - azi = azi[:N_frames] - elif len(azi) < N_frames: - # replicate the last azimuth - azi = np.append(azi, np.full(N_frames - len(azi), azi[-1])) - - # convert azimuth from 0 .. 360 to -180 .. +180 - azi = (azi + 180) % 360 - 180 - - # check if azimuth is from -180 .. +180 - if any(azi > 180) or any(azi < -180): - logger.error( - f"Incorrect value(s) of azimuth: {azi[(azi > 180) | (azi < -180)]}" - ) - - # read elevation information and create array - if isinstance(source_ele, str): - if ":" in source_ele: - source_ele = source_ele.split(":") - ele = np.arange( - float(eval(source_ele[0])), - float(eval(source_ele[2])), - float(eval(source_ele[1])), - ) - else: - ele = np.array(float(eval(source_ele)), ndmin=1)[:N_frames] - else: - ele = np.array(source_ele, ndmin=1)[:N_frames] - - # ensure that elevation array has N_frames values - if len(ele) > N_frames: - # cut the array of elevation values - ele = ele[:N_frames] - elif len(ele) < N_frames: - # replicate the last elevation - ele = np.append(ele, np.full(N_frames - len(ele), ele[-1])) - - # check if elevation is from -90 .. +90 - if any(ele > 90) or any(ele < -90): - logger.error( - f"Incorrect value(s) of elevation: {ele[(ele > 90) | (ele < -90)]}" - ) - - # additional metadata (default values) - radius = np.ones(N_frames) - spread = np.zeros(N_frames) - gain = np.ones(N_frames) - - # arrange all metadata fields column-wise into a matrix - x_meta = np.column_stack((azi, ele, radius, spread, gain)) - # shift the second (and all other) source files (positive shift creates overlap, negative shift creates a gap) if i > 0: # get the length of the first source file @@ -294,12 +227,6 @@ def generate_ism2_scene( pre = np.zeros((N_delay, x.audio.shape[1])) x.audio = np.concatenate([pre, x.audio]) - # insert neutral position as a pre-amble - N_delay = int(N_delay / frame_len) - # use neutral position for padding - pre = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (N_delay, 1)) - x_meta = np.concatenate([pre, x_meta]) - # pad with zeros to ensure that the signal length is a multiple of 20ms if len(x.audio) % frame_len != 0: # pad the source signal @@ -307,13 +234,6 @@ def generate_ism2_scene( post = np.zeros((N_pad, x.audio.shape[1])) x.audio = np.concatenate([x.audio, post]) - # pad the metadata - N_pad = int(len(x.audio) / frame_len) - len(x_meta) - if N_pad > 0: - # use neutral position for padding - post = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (N_pad, 1)) - x_meta = np.concatenate([x_meta, post]) - # add source signal to the array of all source signals y.fs = x.fs if y.audio is None: @@ -340,35 +260,6 @@ def generate_ism2_scene( ) y.audio = np.hstack((y.audio, x.audio)) - # add metadata to the array of all metadata - # make sure x_meta is a 3d array - x_meta = x_meta[np.newaxis, :] - if y_meta is None: - y_meta = x_meta - else: - N_srcs = y_meta.shape[0] - N_meta_features = y_meta.shape[2] - - # append the last position of the metadata to have equal length of all metadata - if x_meta.shape[1] > y_meta.shape[1]: - N_delta = x_meta.shape[1] - y_meta.shape[1] - # reshape to 2d array - y_meta = y_meta.reshape(y_meta.shape[1], -1) - # repeat last row N_delta times and append to the array - y_meta = np.vstack((y_meta, np.tile(y_meta[-1, :], (N_delta, 1)))) - # reshape back to 3d array - y_meta = y_meta.reshape(N_srcs, -1, N_meta_features) - elif y_meta.shape[1] > x_meta.shape[1]: - N_delta = y_meta.shape[1] - x_meta.shape[1] - # reshape to 2d array - x_meta = x_meta.reshape(x_meta.shape[1], -1) - # repeat last row N_delta times and append to the array - x_meta = np.vstack((x_meta, np.tile(x_meta[-1, :], (N_delta, 1)))) - # reshape back to 3d array - x_meta = np.expand_dims(x_meta, axis=0) - - y_meta = np.concatenate([y_meta, x_meta]) - # append pre-amble and post-amble to all sources if cfg.preamble != 0.0: # ensure that pre-amble is a multiple of 20ms @@ -378,11 +269,6 @@ def generate_ism2_scene( pre = np.zeros((N_pre, y.audio.shape[1])) y.audio = np.concatenate([pre, y.audio]) - # insert neutral position as a pre-amble to all sources - N_pre = int(N_pre / frame_len) - pre = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_pre, 1)) - y_meta = np.concatenate([pre, y_meta], axis=1) - if cfg.postamble != 0.0: # ensure that post-mable is a multiple of 20ms N_post = int(floor(cfg.postamble * 50) / 50 * y.fs) @@ -391,11 +277,6 @@ def generate_ism2_scene( post = np.zeros((N_post, y.audio.shape[1])) y.audio = np.concatenate([y.audio, post]) - # append neutral position as a post-amble to all sources - N_post = int(N_post / frame_len) - post = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_post, 1)) - y_meta = np.concatenate([y_meta, post], axis=1) - # add random noise if cfg.add_low_level_random_noise: # create uniformly distributed noise between -4 and 4 @@ -405,12 +286,88 @@ def generate_ism2_scene( # superimpose y.audio += noise + # create metadata files + for i in range(N_sources): + # parse metadata parameters from the scene description + source_azi = ( + scene["azimuth"][i] + if isinstance(scene["azimuth"], list) + else scene["azimuth"] + ) + source_ele = ( + scene["elevation"][i] + if isinstance(scene["elevation"], list) + else scene["elevation"] + ) + + N_frames = int(len(y.audio) / y.fs * 50) + + # read azimuth information and create array + if isinstance(source_azi, str): + if ":" in source_azi: + source_azi = source_azi.split(":") + azi = np.linspace( + float(eval(source_azi[0])), + float(eval(source_azi[2])), + N_frames + ) + else: + azi = np.array(float(eval(source_azi)), ndmin=1) + else: + azi = np.array(source_azi, ndmin=1)[:N_frames] + + if len(azi) < N_frames: + # replicate the last elevation + azi = np.append(azi, np.full(N_frames - len(azi), azi[-1])) + + # convert azimuth from 0 .. 360 to -180 .. +180 + azi = (azi + 180) % 360 - 180 + + # check if azimuth is from -180 .. +180 + if any(azi > 180) or any(azi < -180): + logger.error( + f"Incorrect value(s) of azimuth: {azi[(azi > 180) | (azi < -180)]}" + ) + + # read elevation information and create array + if isinstance(source_ele, str): + if ":" in source_ele: + source_ele = source_ele.split(":") + ele = np.linspace( + float(eval(source_ele[0])), + float(eval(source_ele[2])), + N_frames + ) + else: + ele = np.array(float(eval(source_ele)), ndmin=1) + else: + ele = np.array(source_ele, ndmin=1)[:N_frames] + + if len(ele) < N_frames: + # replicate the last elevation + ele = np.append(ele, np.full(N_frames - len(ele), ele[-1])) + + # check if elevation is from -90 .. +90 + if any(ele > 90) or any(ele < -90): + logger.error( + f"Incorrect value(s) of elevation: {ele[(ele > 90) | (ele < -90)]}" + ) + + # arrange all metadata fields column-wise into a matrix + x_meta = np.column_stack((azi, ele)) + + x_meta = x_meta[np.newaxis, :] + if y_meta is None: + y_meta = x_meta + else: + y_meta = np.concatenate([y_meta, x_meta]) + # write individual ISM audio streams to the output file in an interleaved format audiofile.write( os.path.join( cfg.output_path, - os.path.dirname(scene["name"]), - cfg.use_output_prefix + os.path.basename(scene["name"]), + "cat"+scene["name"][1], + cfg.use_output_prefix + os.path.basename(scene["name"] + ".wav"), ), y.audio, y.fs, @@ -421,8 +378,8 @@ def generate_ism2_scene( # generate .csv filename (should end with .0.csv, .1.csv, ...) csv_filename = os.path.join( cfg.output_path, - os.path.dirname(scene["name"]), - cfg.use_output_prefix + os.path.basename(scene["name"]) + f".{i}.csv", + "cat"+scene["name"][1], + cfg.use_output_prefix + os.path.basename(scene["name"]) + f".wav.{i}.csv", ) with open( -- GitLab From 45f06fd00f879eb53cf4cf76a29c937dd8a1b112 Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Tue, 13 Jun 2023 11:01:02 +0200 Subject: [PATCH 2/9] correction of categories per talker/talker pairs in ISM1 and ISM2 tests --- .../P800-6/config/item_gen_P800-6.yml | 493 +++++++++-------- .../P800-7/config/item_gen_P800-7.yml | 520 +++++++++--------- .../generation/process_ism1_items.py | 4 +- .../generation/process_ism2_items.py | 4 +- 4 files changed, 510 insertions(+), 511 deletions(-) diff --git a/experiments/selection/P800-6/config/item_gen_P800-6.yml b/experiments/selection/P800-6/config/item_gen_P800-6.yml index 844032e5..101f07f6 100644 --- a/experiments/selection/P800-6/config/item_gen_P800-6.yml +++ b/experiments/selection/P800-6/config/item_gen_P800-6.yml @@ -86,339 +86,338 @@ use_output_prefix: "leee" scenes: - a1: - name: "a1s01" + cat1_1: + name: "cat1/a1s01.wav" description: "Talker sitting at a table" source: ["m1s01.wav", "m1s07.wav"] azimuth: 0 elevation: 0 overlap: -0.5 - a2: - name: "a6s02" - description: "Talker sitting at a table" - source: ["f3s02.wav", "f3s08.wav"] - azimuth: 60 + cat1_2: + name: "cat1/a1s02.wav" + description: "Standing talker." + source: ["m1s02.wav", "m1s08.wav"] + azimuth: 180 + elevation: 35 + overlap: -0.5 + + cat1_3: + name: "cat1/a1s03.wav" + description: "Smaller talker (child) walking around a table." + source: ["m1s03.wav", "m1s09.wav"] + azimuth: "120:1:120+360" elevation: 0 overlap: -0.5 - - a3: - name: "a5s03" - description: "Talker sitting at a table" - source: ["m3s03.wav", "m3s09.wav"] + + cat1_4: + name: "cat1/a1s04.wav" + description: "Talker walking around the table." + source: ["m1s04.wav", "m1s10.wav"] + azimuth: "180:-1:180-360" + elevation: 35 + overlap: -0.5 + + cat1_5: + name: "cat1/a1s05.wav" + description: "Elevation displacement." + source: ["m1s05.wav", "m1s11.wav"] azimuth: 120 - elevation: 0 + elevation: "-90:0.5:90" overlap: -0.5 - a4: - name: "a4s04" - description: "Talker sitting at a table" - source: ["f2s04.wav", "f2s10.wav"] + cat1_6: + name: "cat1/a1s06.wav" + description: "Azimuth and elevation displacement." + source: ["m1s06.wav", "m1s12.wav"] + azimuth: "0:0.5:0+180" + elevation: "35:-0.2:-35" + overlap: -0.5 + + cat1_7: + name: "cat1/a1s07.wav" + description: "Preliminary: Standing talker." + source: ["m1s13.wav", "m1s14.wav"] azimuth: 180 - elevation: 0 + elevation: 35 + overlap: -0.5 + + cat2_1: + name: "cat2/a2s01.wav" + description: "Standing talker." + source: ["f1s01.wav", "f1s07.wav"] + azimuth: 120 + elevation: 35 overlap: -0.5 - a5: - name: "a3s05" - description: "Talker sitting at a table" - source: ["m2s05.wav", "m2s11.wav"] - azimuth: 240 + cat2_2: + name: "cat2/a2s02.wav" + description: "Smaller talker (child) walking around a table." + source: ["f1s02.wav", "f1s08.wav"] + azimuth: "60:1:60+360" elevation: 0 overlap: -0.5 - a6: - name: "a2s06" + cat2_3: + name: "cat2/a2s03.wav" + description: "Talker walking around the table." + source: ["f1s03.wav", "f1s09.wav"] + azimuth: "120:-1:120-360" + elevation: 35 + overlap: -0.5 + + cat2_4: + name: "cat2/a2s04.wav" + description: "Elevation displacement." + source: ["f1s04.wav", "f1s10.wav"] + azimuth: 60 + elevation: "-90:0.5:90" + overlap: -0.5 + + cat2_5: + name: "cat2/a2s05.wav" + description: "Azimuth and elevation displacement." + source: ["f1s05.wav", "f1s11.wav"] + azimuth: "300:0.5:300+180" + elevation: "35:-0.2:-35" + overlap: -0.5 + + cat2_6: + name: "cat2/a2s06.wav" description: "Talker sitting at a table" source: ["f1s06.wav", "f1s12.wav"] azimuth: 300 elevation: 0 overlap: -0.5 - - a7: - name: "a2s07" + + cat2_7: + name: "cat2/a2s07.wav" description: "Preliminary: Talker sitting at a table" source: ["f1s13.wav", "f1s14.wav"] azimuth: 0 elevation: 0 - overlap: -0.5 + overlap: -0.5 - b1: - name: "a2s01" - description: "Standing talker." - source: ["f1s01.wav", "f1s07.wav"] - azimuth: 120 - elevation: 35 + cat3_1: + name: "cat3/a3s01.wav" + description: "Smaller talker (child) walking around a table." + source: ["m2s01.wav", "m2s07.wav"] + azimuth: "0:1:360" + elevation: 0 overlap: -0.5 - - b2: - name: "a1s02" - description: "Standing talker." - source: ["m1s02.wav", "m1s08.wav"] - azimuth: 180 + + cat3_2: + name: "cat3/a3s02.wav" + description: "Talker walking around the table." + source: ["m2s02.wav", "m2s08.wav"] + azimuth: "60:-1:60-360" elevation: 35 overlap: -0.5 - - b3: - name: "a6s03" - description: "Standing talker." - source: ["f3s03.wav", "f3s09.wav"] - azimuth: 240 - elevation: 35 + + cat3_3: + name: "cat3/a3s03.wav" + description: "Elevation displacement." + source: ["m2s03.wav", "m2s09.wav"] + azimuth: 0 + elevation: "-90:0.5:90" overlap: -0.5 - - b4: - name: "a5s04" - description: "Standing talker." - source: ["m3s04.wav", "m3s10.wav"] - azimuth: 300 - elevation: 35 + + cat3_4: + name: "cat3/a3s04.wav" + description: "Azimuth and elevation displacement." + source: ["m2s04.wav", "m2s10.wav"] + azimuth: "240:0.5:240+180" + elevation: "35:-0.2:-35" overlap: -0.5 - b5: - name: "a4s05" - description: "Standing talker." - source: ["f2s05.wav", "f2s11.wav"] - azimuth: 0 - elevation: 35 + cat3_5: + name: "cat3/a3s05.wav" + description: "Talker sitting at a table" + source: ["m2s05.wav", "m2s11.wav"] + azimuth: 240 + elevation: 0 overlap: -0.5 - b6: - name: "a3s06" + cat3_6: + name: "cat3/a3s06.wav" description: "Standing talker." source: ["m2s06.wav", "m2s12.wav"] azimuth: 60 elevation: 35 overlap: -0.5 - - b7: - name: "a1s07" - description: "Preliminary: Standing talker." - source: ["m1s13.wav", "m1s14.wav"] - azimuth: 180 + + cat3_7: + name: "cat3/a3s07.wav" + description: "Preliminary: Talker walking around the table." + source: ["m2s13.wav", "m2s14.wav"] + azimuth: "180:-1:180-360" elevation: 35 overlap: -0.5 - c1: - name: "a3s01" - description: "Smaller talker (child) walking around a table." - source: ["m2s01.wav", "m2s07.wav"] - azimuth: "0:1:360" - elevation: 0 + cat4_1: + name: "cat4/a4s01.wav" + description: "Talker walking around the table." + source: ["f2s01.wav", "f2s07.wav"] + azimuth: "0:-1:-360" + elevation: 35 overlap: -0.5 - c2: - name: "a2s02" - description: "Smaller talker (child) walking around a table." - source: ["f1s02.wav", "f1s08.wav"] - azimuth: "60:1:60+360" + cat4_2: + name: "cat4/a4s02.wav" + description: "Elevation displacement." + source: ["f2s02.wav", "f2s08.wav"] + azimuth: 300 elevation: 0 overlap: -0.5 - - c3: - name: "a1s03" - description: "Smaller talker (child) walking around a table." - source: ["m1s03.wav", "m1s09.wav"] - azimuth: "120:1:120+360" - elevation: 0 + + cat4_3: + name: "cat4/a4s03.wav" + description: "Azimuth and elevation displacement." + source: ["f2s03.wav", "f2s09.wav"] + azimuth: "180:0.5:180+180" + elevation: "35:-0.2:-35" overlap: -0.5 - - c4: - name: "a6s04" - description: "Smaller talker (child) walking around a table." - source: ["f3s04.wav", "f3s10.wav"] - azimuth: "180:1:180+360" + + cat4_4: + name: "cat4/a4s04.wav" + description: "Talker sitting at a table" + source: ["f2s04.wav", "f2s10.wav"] + azimuth: 180 elevation: 0 overlap: -0.5 - - c5: - name: "a5s05" - description: "Smaller talker (child) walking around a table." - source: ["m3s05.wav", "m3s11.wav"] - azimuth: "240:1:240+360" - elevation: 0 + + cat4_5: + name: "cat4/a4s05.wav" + description: "Standing talker." + source: ["f2s05.wav", "f2s11.wav"] + azimuth: 0 + elevation: 35 overlap: -0.5 - - c6: - name: "a4s06" + + cat4_6: + name: "cat4/a4s06.wav" description: "Smaller talker (child) walking around a table." source: ["f2s06.wav", "f2s12.wav"] azimuth: "300:1:300+360" elevation: 0 overlap: -0.5 - - c7: - name: "a4s07" + + cat4_7: + name: "cat4/a4s07.wav" description: "Preliminary: Smaller talker (child) walking around a table." source: ["f2s13.wav", "f2s14.wav"] azimuth: "120:1:120+360" elevation: 0 + overlap: -0.5 + + cat5_1: + name: "cat5/a5s01.wav" + description: "Elevation displacement." + source: ["m3s01.wav", "m3s07.wav"] + azimuth: 240 + elevation: "-90:0.5:90" overlap: -0.5 - - d1: - name: "a4s01" - description: "Talker walking around the table." - source: ["f2s01.wav", "f2s07.wav"] - azimuth: "0:-1:-360" - elevation: 35 - overlap: -0.5 - - d2: - name: "a3s02" - description: "Talker walking around the table." - source: ["m2s02.wav", "m2s08.wav"] - azimuth: "60:-1:60-360" - elevation: 35 + + cat5_2: + name: "cat5/a5s02.wav" + description: "Azimuth and elevation displacement." + source: ["m3s02.wav", "m3s08.wav"] + azimuth: "120:0.5:120+180" + elevation: "35:-0.2:-35" overlap: -0.5 - - d3: - name: "a2s03" - description: "Talker walking around the table." - source: ["f1s03.wav", "f1s09.wav"] - azimuth: "120:-1:120-360" - elevation: 35 + + cat5_3: + name: "cat5/a5s03.wav" + description: "Talker sitting at a table" + source: ["m3s03.wav", "m3s09.wav"] + azimuth: 120 + elevation: 0 overlap: -0.5 - - d4: - name: "a1s04" - description: "Talker walking around the table." - source: ["m1s04.wav", "m1s10.wav"] - azimuth: "180:-1:180-360" + + cat5_4: + name: "cat5/a5s04.wav" + description: "Standing talker." + source: ["m3s04.wav", "m3s10.wav"] + azimuth: 300 elevation: 35 overlap: -0.5 - - d5: - name: "a6s05" - description: "Talker walking around the table." - source: ["f3s05.wav", "f3s11.wav"] - azimuth: "240:-1:240-360" - elevation: 35 + + cat5_5: + name: "cat5/a5s05.wav" + description: "Smaller talker (child) walking around a table." + source: ["m3s05.wav", "m3s11.wav"] + azimuth: "240:1:240+360" + elevation: 0 overlap: -0.5 - - d6: - name: "a5s06" + + cat5_6: + name: "cat5/a5s06.wav" description: "Talker walking around the table." source: ["m3s06.wav", "m3s12.wav"] azimuth: "300:-1:300-360" elevation: 35 overlap: -0.5 + + cat5_7: + name: "cat5/a5s07.wav" + description: "Preliminary: Azimuth and elevation displacement." + source: ["m3s13.wav", "m3s14.wav"] + azimuth: "0:0.5:0+180" + elevation: "35:-0.2:-35" + overlap: -0.5 - d7: - name: "a3s07" - description: "Preliminary: Talker walking around the table." - source: ["m2s13.wav", "m2s14.wav"] - azimuth: "180:-1:180-360" - elevation: 35 - overlap: -0.5 - - e1: - name: "a5s01" - description: "Elevation displacement." - source: ["m3s01.wav", "m3s07.wav"] - azimuth: 240 - elevation: "-90:0.5:90" + cat6_1: + name: "cat6/a6s01.wav" + description: "Azimuth and elevation displacement." + source: ["f3s01.wav", "f3s07.wav"] + azimuth: "60:0.5:60+180" + elevation: "35:-0.2:-35" overlap: -0.5 - - e2: - name: "a4s02" - description: "Elevation displacement." - source: ["f2s02.wav", "f2s08.wav"] - azimuth: 300 + + cat6_2: + name: "cat6/a6s02.wav" + description: "Talker sitting at a table" + source: ["f3s02.wav", "f3s08.wav"] + azimuth: 60 elevation: 0 overlap: -0.5 - e3: - name: "a3s03" - description: "Elevation displacement." - source: ["m2s03.wav", "m2s09.wav"] - azimuth: 0 - elevation: "-90:0.5:90" + cat6_3: + name: "cat6/a6s03.wav" + description: "Standing talker." + source: ["f3s03.wav", "f3s09.wav"] + azimuth: 240 + elevation: 35 overlap: -0.5 - - e4: - name: "a2s04" - description: "Elevation displacement." - source: ["f1s04.wav", "f1s10.wav"] - azimuth: 60 - elevation: "-90:0.5:90" + + cat6_4: + name: "cat6/a6s04.wav" + description: "Smaller talker (child) walking around a table." + source: ["f3s04.wav", "f3s10.wav"] + azimuth: "180:1:180+360" + elevation: 0 overlap: -0.5 - - e5: - name: "a1s05" - description: "Elevation displacement." - source: ["m1s05.wav", "m1s11.wav"] - azimuth: 120 - elevation: "-90:0.5:90" + + cat6_5: + name: "cat6/a6s05.wav" + description: "Talker walking around the table." + source: ["f3s05.wav", "f3s11.wav"] + azimuth: "240:-1:240-360" + elevation: 35 overlap: -0.5 - - e6: - name: "a6s06" + + cat6_6: + name: "cat6/a6s06.wav" description: "Elevation displacement." source: ["f3s06.wav", "f3s12.wav"] azimuth: 180 elevation: "-90:0.5:90" overlap: -0.5 - - e7: - name: "a6s07" + + cat6_7: + name: "cat6/a6s07.wav" description: "Preliminary: Elevation displacement." source: ["f3s13.wav", "f3s14.wav"] azimuth: 120 elevation: "-90:0.5:90" overlap: -0.5 - - f1: - name: "a6s01" - description: "Azimuth and elevation displacement." - source: ["f3s01.wav", "f3s07.wav"] - azimuth: "60:0.5:60+180" - elevation: "35:-0.2:-35" - overlap: -0.5 - - f2: - name: "a5s02" - description: "Azimuth and elevation displacement." - source: ["m3s02.wav", "m3s08.wav"] - azimuth: "120:0.5:120+180" - elevation: "35:-0.2:-35" - overlap: -0.5 - - f3: - name: "a4s03" - description: "Azimuth and elevation displacement." - source: ["f2s03.wav", "f2s09.wav"] - azimuth: "180:0.5:180+180" - elevation: "35:-0.2:-35" - overlap: -0.5 - - f4: - name: "a3s04" - description: "Azimuth and elevation displacement." - source: ["m2s04.wav", "m2s10.wav"] - azimuth: "240:0.5:240+180" - elevation: "35:-0.2:-35" - overlap: -0.5 - - f5: - name: "a2s05" - description: "Azimuth and elevation displacement." - source: ["f1s05.wav", "f1s11.wav"] - azimuth: "300:0.5:300+180" - elevation: "35:-0.2:-35" - overlap: -0.5 - - f6: - name: "a1s06" - description: "Azimuth and elevation displacement." - source: ["m1s06.wav", "m1s12.wav"] - azimuth: "0:0.5:0+180" - elevation: "35:-0.2:-35" - overlap: -0.5 - - f7: - name: "a5s07" - description: "Preliminary: Azimuth and elevation displacement." - source: ["m3s13.wav", "m3s14.wav"] - azimuth: "0:0.5:0+180" - elevation: "35:-0.2:-35" - overlap: -0.5 - \ No newline at end of file diff --git a/experiments/selection/P800-7/config/item_gen_P800-7.yml b/experiments/selection/P800-7/config/item_gen_P800-7.yml index 54a8695e..ec79e31b 100644 --- a/experiments/selection/P800-7/config/item_gen_P800-7.yml +++ b/experiments/selection/P800-7/config/item_gen_P800-7.yml @@ -85,336 +85,336 @@ use_output_prefix: "leee" ### o stands for the object number; 0, 1, 2, 3 scenes: - a1: - name: "a1s01" + cat1_1: + name: "cat1/a1s01.wav" description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." source: ["m1s01.wav", "f1s01.wav"] azimuth: [0, 50] elevation: [0, 0] overlap: -1.0 - a2: - name: "a6s02" - description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." - source: ["f3s08.wav", "m1s08.wav"] - azimuth: [50, 350] - elevation: [0, 0] - overlap: -1.0 + cat1_2: + name: "cat1/a1s02.wav" + description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." + source: ["m1s02.wav", "f1s02.wav"] + azimuth: [10, 110] + elevation: [35, 35] + overlap: 1.0 - a3: - name: "a5s03" - description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." - source: ["f2s09.wav", "m3s09.wav"] - azimuth: [40, 290] - elevation: [0, 0] - overlap: -1.0 - - a4: - name: "a4s04" - description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." - source: ["f1s10.wav", "m2s10.wav"] - azimuth: [30, 230] - elevation: [0, 0] - overlap: -1.0 - - a5: - name: "a3s05" - description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." - source: ["m3s05.wav", "f3s05.wav"] + cat1_3: + name: "cat1/a1s03.wav" + description: "One talker sitting at a table, second talker standing beside the table, non-overlapping utterances." + source: ["m1s03.wav", "f1s03.wav"] azimuth: [20, 170] - elevation: [0, 0] - overlap: -1.0 - - a6: - name: "a2s06" - description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." - source: ["m2s06.wav", "f2s06.wav"] - azimuth: [10, 110] - elevation: [0, 0] - overlap: -1.0 - - a7: - name: "a1s07" + elevation: [0, 45] + overlap: -1.0 + + cat1_4: + name: "cat1/a1s04.wav" + description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." + source: ["m1s04.wav", "f1s04.wav"] + azimuth: [200, "30:-1:-270"] + elevation: [0, 45] + overlap: 1.0 + + cat1_5: + name: "cat1/a1s05.wav" + description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" + source: ["m1s05.wav", "f1s05.wav"] + azimuth: ["-20:-1:-320", "-20:-1:-320"] + elevation: [45, 45] + overlap: 1.0 + + cat1_6: + name: "cat1/a1s06.wav" + description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." + source: ["m1s06.wav", "f1s06.wav"] + azimuth: ["120:1:60 + 360", "120:-1:180 - 360"] + elevation: [30, 30] + overlap: -1.0 + + cat1_7: + name: "cat1/a1s07.wav" description: "Preliminary: Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." source: ["m1s13.wav", "f1s13.wav"] azimuth: [0, 50] elevation: [0, 0] overlap: -1.0 - - b1: - name: "a2s01" + + cat2_1: + name: "cat2/a2s01.wav" description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." source: ["m2s01.wav", "f2s01.wav"] azimuth: [20, 170] elevation: [35, 35] overlap: 1.0 - - b2: - name: "a1s02" - description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." - source: ["m1s02.wav", "f1s02.wav"] - azimuth: [10, 110] - elevation: [35, 35] - overlap: 1.0 - - b3: - name: "a6s03" - description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." - source: ["f3s09.wav", "m1s09.wav"] - azimuth: [0, 50] - elevation: [35, 35] - overlap: 1.0 - - b4: - name: "a5s04" - description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." - source: ["f2s10.wav", "m3s10.wav"] - azimuth: [50, 350] - elevation: [35, 35] - overlap: 1.0 + + cat2_2: + name: "cat2/a2s02.wav" + description: "One talker sitting at a table, second talker standing beside the table, non-overlapping utterances." + source: ["m2s02.wav", "f2s02.wav"] + azimuth: [30, 230] + elevation: [0, 45] + overlap: -1.0 - b5: - name: "a4s05" - description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." - source: ["f1s11.wav", "m2s11.wav"] - azimuth: [40, 290] - elevation: [35, 35] - overlap: 1.0 + cat2_3: + name: "cat2/a2s03.wav" + description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." + source: ["m2s03.wav", "f2s03.wav"] + azimuth: [250, "-20:-1:-320"] + elevation: [0, 45] + overlap: 1.0 - b6: - name: "a3s06" - description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." - source: ["m3s06.wav", "f3s06.wav"] - azimuth: [30, 230] - elevation: [35, 35] - overlap: 1.0 + cat2_4: + name: "cat2/a2s04.wav" + description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" + source: ["m2s04.wav", "f2s04.wav"] + azimuth: ["-70:-1:-10 - 360", "-70:-1:-10 - 360"] + elevation: [45, 45] + overlap: 1.0 + + cat2_5: + name: "cat2/a2s05.wav" + description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." + source: ["m2s05.wav", "f2s05.wav"] + azimuth: ["180:1:120 + 360", "180:-1:-120"] + elevation: [30, 30] + overlap: -1.0 + + cat2_6: + name: "cat2/a2s06.wav" + description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." + source: ["m2s06.wav", "f2s06.wav"] + azimuth: [10, 110] + elevation: [0, 0] + overlap: -1.0 - b7: - name: "a2s07" + cat2_7: + name: "cat2/a2s07.wav" description: "Preliminary: Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." source: ["m2s13.wav", "f2s13.wav"] azimuth: [10, 110] elevation: [35, 35] overlap: 1.0 - c1: - name: "a3s01" + cat3_1: + name: "cat3/a3s01.wav" description: "One talker sitting at a table, second talker standing beside the table, non-overlapping utterances." source: ["m3s01.wav", "f3s01.wav"] azimuth: [40, 290] elevation: [0, 45] overlap: -1.0 - c2: - name: "a2s02" - description: "One talker sitting at a table, second talker standing beside the table, non-overlapping utterances." - source: ["m2s02.wav", "f2s02.wav"] - azimuth: [30, 230] + cat3_2: + name: "cat3/a3s02.wav" + description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." + source: ["m3s02.wav", "f3s02.wav"] + azimuth: [300, "-70:-1:-10 - 360"] elevation: [0, 45] - overlap: -1.0 - - c3: - name: "a1s03" - description: "One talker sitting at a table, second talker standing beside the table, non-overlapping utterances." - source: ["m1s03.wav", "f1s03.wav"] + overlap: 1.0 + + cat3_3: + name: "cat3/a3s03.wav" + description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" + source: ["m3s03.wav", "f3s03.wav"] + azimuth: ["180:1:120 + 360", "180:1:120 + 360"] + elevation: [45, 45] + overlap: 1.0 + + cat3_4: + name: "cat3/a3s04.wav" + description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." + source: ["m3s04.wav", "f3s04.wav"] + azimuth: ["240:1:180 + 360", "240:-1:-60"] + elevation: [30, 30] + overlap: -1.0 + + cat3_5: + name: "cat3/a3s05.wav" + description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." + source: ["m3s05.wav", "f3s05.wav"] azimuth: [20, 170] - elevation: [0, 45] - overlap: -1.0 - - c4: - name: "a6s04" - description: "One talker sitting at a table, second talker standing beside the table, non-overlapping utterances." - source: ["f3s10.wav", "m1s10.wav"] - azimuth: [10, 110] - elevation: [0, 45] - overlap: -1.0 - - c5: - name: "a5s05" - description: "One talker sitting at a table, second talker standing beside the table, non-overlapping utterances." - source: ["f2s11.wav", "m3s11.wav"] - azimuth: [0, 50] - elevation: [0, 45] - overlap: -1.0 - - c6: - name: "a4s06" - description: "One talker sitting at a table, second talker standing beside the table, non-overlapping utterances." - source: ["f1s12.wav", "m2s12.wav"] - azimuth: [50, 350] - elevation: [0, 60] - overlap: -1.0 - - c7: - name: "a3s07" + elevation: [0, 0] + overlap: -1.0 + + cat3_6: + name: "cat3/a3s06.wav" + description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." + source: ["m3s06.wav", "f3s06.wav"] + azimuth: [30, 230] + elevation: [35, 35] + overlap: 1.0 + + cat3_7: + name: "cat3/a3s07.wav" description: "Preliminary: One talker sitting at a table, second talker standing beside the table, non-overlapping utterances." source: ["m3s13.wav", "f3s13.wav"] azimuth: [20, 170] elevation: [0, 60] overlap: -1.0 - - d1: - name: "a4s01" + + cat4_1: + name: "cat4/a4s01.wav" description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." source: ["f1s07.wav", "m2s07.wav"] azimuth: [50, "180:1:120 + 360"] elevation: [0, 45] overlap: 1.0 - - d2: - name: "a3s02" - description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." - source: ["m3s02.wav", "f3s02.wav"] - azimuth: [300, "-70:-1:-10 - 360"] - elevation: [0, 45] - overlap: 1.0 - - d3: - name: "a2s03" - description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." - source: ["m2s03.wav", "f2s03.wav"] - azimuth: [250, "-20:-1:-320"] - elevation: [0, 45] - overlap: 1.0 - - d4: - name: "a1s04" - description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." - source: ["m1s04.wav", "f1s04.wav"] - azimuth: [200, "30:-1:-270"] - elevation: [0, 45] - overlap: 1.0 - - d5: - name: "a6s05" - description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." - source: ["f3s11.wav", "m1s11.wav"] - azimuth: [150, "80:1:20 + 360"] - elevation: [0, 45] - overlap: 1.0 - - d6: - name: "a5s06" - description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." - source: ["f2s12.wav", "m3s12.wav"] - azimuth: [100, "130:1:70 + 360"] - elevation: [0, 45] - overlap: 1.0 - - d7: - name: "a4s07" + + cat4_2: + name: "cat4/a4s02.wav" + description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" + source: ["f1s08.wav", "m2s08.wav"] + azimuth: ["130:1:70 + 360", "130:1:70 + 360"] + elevation: [45, 45] + overlap: 1.0 + + cat4_3: + name: "cat4/a4s03.wav" + description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." + source: ["f1s09.wav", "m2s09.wav"] + azimuth: ["300:1:240 + 360", "300:-1:0"] + elevation: [30, 30] + overlap: -1.0 + + cat4_4: + name: "cat4/a4s04.wav" + description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." + source: ["f1s10.wav", "m2s10.wav"] + azimuth: [30, 230] + elevation: [0, 0] + overlap: -1.0 + + cat4_5: + name: "cat4/a4s05.wav" + description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." + source: ["f1s11.wav", "m2s11.wav"] + azimuth: [40, 290] + elevation: [35, 35] + overlap: 1.0 + + cat4_6: + name: "cat4/a4s06.wav" + description: "One talker sitting at a table, second talker standing beside the table, non-overlapping utterances." + source: ["f1s12.wav", "m2s12.wav"] + azimuth: [50, 350] + elevation: [0, 60] + overlap: -1.0 + + cat4_7: + name: "cat4/a4s07.wav" description: "Preliminary: One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." source: ["f1s14.wav", "m2s14.wav"] azimuth: [200, "30:-1:-270"] elevation: [0, 45] overlap: 1.0 - - e1: - name: "a5s01" + + cat5_1: + name: "cat5/a5s01.wav" description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" source: ["f2s07.wav", "m3s07.wav"] azimuth: ["80:1:20 + 360", "80:1:20 + 360"] elevation: [45, 45] overlap: 1.0 - - e2: - name: "a4s02" - description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" - source: ["f1s08.wav", "m2s08.wav"] - azimuth: ["130:1:70 + 360", "130:1:70 + 360"] - elevation: [45, 45] - overlap: 1.0 - - e3: - name: "a3s03" - description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" - source: ["m3s03.wav", "f3s03.wav"] - azimuth: ["180:1:120 + 360", "180:1:120 + 360"] - elevation: [45, 45] - overlap: 1.0 - - e4: - name: "a2s04" - description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" - source: ["m2s04.wav", "f2s04.wav"] - azimuth: ["-70:-1:-10 - 360", "-70:-1:-10 - 360"] - elevation: [45, 45] - overlap: 1.0 - - e5: - name: "a1s05" - description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" - source: ["m1s05.wav", "f1s05.wav"] - azimuth: ["-20:-1:-320", "-20:-1:-320"] - elevation: [45, 45] + + cat5_2: + name: "cat5/a5s02.wav" + description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." + source: ["f2s08.wav", "m3s08.wav"] + azimuth: ["0:1:300", "0:-1:60 - 360"] + elevation: [30, 30] + overlap: -1.0 + + cat5_3: + name: "cat5/a5s03.wav" + description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." + source: ["f2s09.wav", "m3s09.wav"] + azimuth: [40, 290] + elevation: [0, 0] + overlap: -1.0 + + cat5_4: + name: "cat5/a5s04.wav" + description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." + source: ["f2s10.wav", "m3s10.wav"] + azimuth: [50, 350] + elevation: [35, 35] + overlap: 1.0 + + cat5_5: + name: "cat5/a5s05.wav" + description: "One talker sitting at a table, second talker standing beside the table, non-overlapping utterances." + source: ["f2s11.wav", "m3s11.wav"] + azimuth: [0, 50] + elevation: [0, 45] + overlap: -1.0 + + cat5_6: + name: "cat5/a5s06.wav" + description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." + source: ["f2s12.wav", "m3s12.wav"] + azimuth: [100, "130:1:70 + 360"] + elevation: [0, 45] overlap: 1.0 - - e6: - name: "a6s06" - description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" - source: ["f3s12.wav", "m1s12.wav"] - azimuth: ["30:-1:-270", "30:-1:-270"] - elevation: [45, 45] - overlap: 1.0 - - e7: - name: "a5s07" + + cat5_7: + name: "cat5/a5s07.wav" description: "Preliminary: Two talkers walking side-by-side around the table, ~30% overlapping utterances" source: ["f2s14.wav", "m3s14.wav"] azimuth: ["-20:-1:-320", "-20:-1:-320"] elevation: [45, 45] overlap: 1.0 - - f1: - name: "a6s01" + + cat6_1: + name: "cat6/a6s01.wav" description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." source: ["f3s07.wav", "m1s07.wav"] azimuth: ["60:1:0 + 360", "60:-1:120 - 360"] elevation: [30, 30] overlap: -1.0 + + cat6_2: + name: "cat6/a6s02.wav" + description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." + source: ["f3s08.wav", "m1s08.wav"] + azimuth: [50, 350] + elevation: [0, 0] + overlap: -1.0 - f2: - name: "a5s02" - description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." - source: ["f2s08.wav", "m3s08.wav"] - azimuth: ["0:1:300", "0:-1:60 - 360"] - elevation: [30, 30] - overlap: -1.0 + cat6_3: + name: "cat6/a6s03.wav" + description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." + source: ["f3s09.wav", "m1s09.wav"] + azimuth: [0, 50] + elevation: [35, 35] + overlap: 1.0 - f3: - name: "a4s03" - description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." - source: ["f1s09.wav", "m2s09.wav"] - azimuth: ["300:1:240 + 360", "300:-1:0"] - elevation: [30, 30] + cat6_4: + name: "cat6/a6s04.wav" + description: "One talker sitting at a table, second talker standing beside the table, non-overlapping utterances." + source: ["f3s10.wav", "m1s10.wav"] + azimuth: [10, 110] + elevation: [0, 45] overlap: -1.0 + + cat6_5: + name: "cat6/a6s05.wav" + description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." + source: ["f3s11.wav", "m1s11.wav"] + azimuth: [150, "80:1:20 + 360"] + elevation: [0, 45] + overlap: 1.0 + + cat6_6: + name: "cat6/a6s06.wav" + description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" + source: ["f3s12.wav", "m1s12.wav"] + azimuth: ["30:-1:-270", "30:-1:-270"] + elevation: [45, 45] + overlap: 1.0 - f4: - name: "a3s04" - description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." - source: ["m3s04.wav", "f3s04.wav"] - azimuth: ["240:1:180 + 360", "240:-1:-60"] - elevation: [30, 30] - overlap: -1.0 - - f5: - name: "a2s05" - description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." - source: ["m2s05.wav", "f2s05.wav"] - azimuth: ["180:1:120 + 360", "180:-1:-120"] - elevation: [30, 30] - overlap: -1.0 - - f6: - name: "a1s06" - description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." - source: ["m1s06.wav", "f1s06.wav"] - azimuth: ["120:1:60 + 360", "120:-1:180 - 360"] - elevation: [30, 30] - overlap: -1.0 - - f7: - name: "a6s07" + cat6_7: + name: "cat6/a6s07.wav" description: "Preliminary: Two talkers walking around the table in opposite directions, non-overlapping utterances." source: ["f3s14.wav", "m1s14.wav"] azimuth: ["120:1:60 + 360", "120:-1:180 - 360"] diff --git a/ivas_processing_scripts/generation/process_ism1_items.py b/ivas_processing_scripts/generation/process_ism1_items.py index 55791a19..89ce4d1f 100644 --- a/ivas_processing_scripts/generation/process_ism1_items.py +++ b/ivas_processing_scripts/generation/process_ism1_items.py @@ -334,8 +334,8 @@ def generate_ism1_scene( # write ISM metadata to the output file in .0.csv format csv_filename = os.path.join( cfg.output_path, - "cat"+scene["name"][1], - cfg.use_output_prefix + os.path.basename(scene["name"]) + ".wav.0.csv", + os.path.dirname(scene["name"]), + cfg.use_output_prefix + os.path.basename(scene["name"]) + ".0.csv", ) with open( diff --git a/ivas_processing_scripts/generation/process_ism2_items.py b/ivas_processing_scripts/generation/process_ism2_items.py index 4a1c100a..d50edbfb 100644 --- a/ivas_processing_scripts/generation/process_ism2_items.py +++ b/ivas_processing_scripts/generation/process_ism2_items.py @@ -378,8 +378,8 @@ def generate_ism2_scene( # generate .csv filename (should end with .0.csv, .1.csv, ...) csv_filename = os.path.join( cfg.output_path, - "cat"+scene["name"][1], - cfg.use_output_prefix + os.path.basename(scene["name"]) + f".wav.{i}.csv", + os.path.dirname(scene["name"]), + cfg.use_output_prefix + os.path.basename(scene["name"]) + f".{i}.csv", ) with open( -- GitLab From 7e6fa6c2d094244c79c15f19ae266f6c303ae858 Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Tue, 13 Jun 2023 11:07:25 +0200 Subject: [PATCH 3/9] correction of categories per talker/talker pairs in ISM1 and ISM2 tests --- .../generation/process_ism2_items.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/ivas_processing_scripts/generation/process_ism2_items.py b/ivas_processing_scripts/generation/process_ism2_items.py index d50edbfb..af59b9c7 100644 --- a/ivas_processing_scripts/generation/process_ism2_items.py +++ b/ivas_processing_scripts/generation/process_ism2_items.py @@ -362,17 +362,6 @@ def generate_ism2_scene( else: y_meta = np.concatenate([y_meta, x_meta]) - # write individual ISM audio streams to the output file in an interleaved format - audiofile.write( - os.path.join( - cfg.output_path, - "cat"+scene["name"][1], - cfg.use_output_prefix + os.path.basename(scene["name"] + ".wav"), - ), - y.audio, - y.fs, - ) - # write individual ISM metadata to output files in .csv format for i in range(N_sources): # generate .csv filename (should end with .0.csv, .1.csv, ...) -- GitLab From 87febe56f735a700a5c40511e5eb051e4bec4b40 Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Tue, 13 Jun 2023 11:13:04 +0200 Subject: [PATCH 4/9] correction of categories per talker/talker pairs in ISM1 and ISM2 tests --- .../generation/process_ism1_items.py | 4 ++-- .../generation/process_ism2_items.py | 11 +++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/ivas_processing_scripts/generation/process_ism1_items.py b/ivas_processing_scripts/generation/process_ism1_items.py index 89ce4d1f..221562d3 100644 --- a/ivas_processing_scripts/generation/process_ism1_items.py +++ b/ivas_processing_scripts/generation/process_ism1_items.py @@ -324,8 +324,8 @@ def generate_ism1_scene( audiofile.write( os.path.join( cfg.output_path, - "cat"+scene["name"][1], - cfg.use_output_prefix + os.path.basename(scene["name"]+".wav"), + os.path.dirname(scene["name"]), + cfg.use_output_prefix + os.path.basename(scene["name"]), ), y.audio, y.fs, diff --git a/ivas_processing_scripts/generation/process_ism2_items.py b/ivas_processing_scripts/generation/process_ism2_items.py index af59b9c7..4ece488d 100644 --- a/ivas_processing_scripts/generation/process_ism2_items.py +++ b/ivas_processing_scripts/generation/process_ism2_items.py @@ -362,6 +362,17 @@ def generate_ism2_scene( else: y_meta = np.concatenate([y_meta, x_meta]) + # write individual ISM audio streams to the output file in an interleaved format + audiofile.write( + os.path.join( + cfg.output_path, + os.path.dirname(scene["name"]), + cfg.use_output_prefix + os.path.basename(scene["name"]), + ), + y.audio, + y.fs, + ) + # write individual ISM metadata to output files in .csv format for i in range(N_sources): # generate .csv filename (should end with .0.csv, .1.csv, ...) -- GitLab From 25c01c513fe0060d9ad58003a2f95bcefa6f9105 Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Thu, 15 Jun 2023 12:36:21 +0200 Subject: [PATCH 5/9] correction of ranges of azimuth and elevation maintaining speed of talker movement --- .../generation/process_ism1_items.py | 42 ++++++++++--------- .../generation/process_ism2_items.py | 39 +++++++++-------- 2 files changed, 44 insertions(+), 37 deletions(-) diff --git a/ivas_processing_scripts/generation/process_ism1_items.py b/ivas_processing_scripts/generation/process_ism1_items.py index 221562d3..d6ffc6d7 100644 --- a/ivas_processing_scripts/generation/process_ism1_items.py +++ b/ivas_processing_scripts/generation/process_ism1_items.py @@ -267,9 +267,10 @@ def generate_ism1_scene( N_frames = int(len(y.audio) / y.fs * 50) - # read azimuth information and create array + # read azimuth information and convert to an array if isinstance(source_azi, str): if ":" in source_azi: + # start with the initial azimuth value and apply step N_frames times source_azi = source_azi.split(":") azi = np.linspace( float(eval(source_azi[0])), @@ -277,46 +278,49 @@ def generate_ism1_scene( N_frames ) else: - azi = np.array(float(eval(source_azi)), ndmin=1) + # replicate static azimuth value N_frames times + azi = np.repeat(float(eval(source_azi)), N_frames) else: - azi = np.array(source_azi, ndmin=1)[:N_frames] + # replicate static azimuth value N_frames times + azi = np.repeat(float(source_azi), N_frames) - if len(azi) < N_frames: - # replicate the last elevation - azi = np.append(azi, np.full(N_frames - len(azi), azi[-1])) - # convert azimuth from 0 .. 360 to -180 .. +180 azi = (azi + 180) % 360 - 180 - # check if azimuth is from -180 .. +180 + # check, if azimuth is from -180 .. +180 if any(azi > 180) or any(azi < -180): logger.error( f"Incorrect value(s) of azimuth: {azi[(azi > 180) | (azi < -180)]}" ) - # read elevation information and create array + # read elevation information and convert to an array if isinstance(source_ele, str): if ":" in source_ele: + # convert into array (initial_value:step:stop_value) + # note: the stop_value value is +-90 degrees depending on the sign of the step source_ele = source_ele.split(":") - ele = np.linspace( + ele = np.arange( float(eval(source_ele[0])), - float(eval(source_ele[2])), - N_frames - ) + np.sign(float(eval(source_ele[1]))) * 90, + float(eval(source_ele[1])) + )[:N_frames] + + # repeat the last elevation value, if array is shorter than N_frames + if len(ele) < N_frames: + ele = np.append(ele, np.full(N_frames - len(ele), ele[-1])) else: - ele = np.array(float(eval(source_ele)), ndmin=1) + # replicate static elevation value N_frames times + ele = np.repeat(float(eval(source_ele)), N_frames) else: - ele = np.array(source_ele, ndmin=1)[:N_frames] + # replicate static elevation value N_frames times + ele = np.repeat(float(source_ele), N_frames) - if len(ele) < N_frames: - # replicate the last elevation - ele = np.append(ele, np.full(N_frames - len(ele), ele[-1])) - # check if elevation is from -90 .. +90 if any(ele > 90) or any(ele < -90): logger.error( f"Incorrect value(s) of elevation: {ele[(ele > 90) | (ele < -90)]}" ) + # arrange all metadata fields column-wise into a matrix y_meta = np.column_stack((azi, ele)) diff --git a/ivas_processing_scripts/generation/process_ism2_items.py b/ivas_processing_scripts/generation/process_ism2_items.py index 4ece488d..9935bc20 100644 --- a/ivas_processing_scripts/generation/process_ism2_items.py +++ b/ivas_processing_scripts/generation/process_ism2_items.py @@ -302,9 +302,10 @@ def generate_ism2_scene( N_frames = int(len(y.audio) / y.fs * 50) - # read azimuth information and create array + # read azimuth information and convert to an array if isinstance(source_azi, str): if ":" in source_azi: + # start with the initial azimuth value and apply step N_frames times source_azi = source_azi.split(":") azi = np.linspace( float(eval(source_azi[0])), @@ -312,13 +313,11 @@ def generate_ism2_scene( N_frames ) else: - azi = np.array(float(eval(source_azi)), ndmin=1) + # replicate static azimuth value N_frames times + azi = np.repeat(float(eval(source_azi)), N_frames) else: - azi = np.array(source_azi, ndmin=1)[:N_frames] - - if len(azi) < N_frames: - # replicate the last elevation - azi = np.append(azi, np.full(N_frames - len(azi), azi[-1])) + # replicate static azimuth value N_frames times + azi = np.repeat(float(source_azi), N_frames) # convert azimuth from 0 .. 360 to -180 .. +180 azi = (azi + 180) % 360 - 180 @@ -329,23 +328,27 @@ def generate_ism2_scene( f"Incorrect value(s) of azimuth: {azi[(azi > 180) | (azi < -180)]}" ) - # read elevation information and create array + # read elevation information and convert to an array if isinstance(source_ele, str): if ":" in source_ele: + # convert into array (initial_value:step:stop_value) + # note: the stop_value value is +-90 degrees depending on the sign of the step source_ele = source_ele.split(":") - ele = np.linspace( + ele = np.arange( float(eval(source_ele[0])), - float(eval(source_ele[2])), - N_frames - ) + np.sign(float(eval(source_ele[1]))) * 90, + float(eval(source_ele[1])) + )[:N_frames] + + # repeat the last elevation value, if array is shorter than N_frames + if len(ele) < N_frames: + ele = np.append(ele, np.full(N_frames - len(ele), ele[-1])) else: - ele = np.array(float(eval(source_ele)), ndmin=1) + # replicate static elevation value N_frames times + ele = np.repeat(float(eval(source_ele)), N_frames) else: - ele = np.array(source_ele, ndmin=1)[:N_frames] - - if len(ele) < N_frames: - # replicate the last elevation - ele = np.append(ele, np.full(N_frames - len(ele), ele[-1])) + # replicate static elevation value N_frames times + ele = np.repeat(float(source_ele), N_frames) # check if elevation is from -90 .. +90 if any(ele > 90) or any(ele < -90): -- GitLab From 8a48abb14089de26a6ea20889ca64226ea63313e Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Thu, 15 Jun 2023 12:37:30 +0200 Subject: [PATCH 6/9] reduce elevation displacement step from 0.5 to 0.3 degress in scene e --- .../P800-6/config/item_gen_P800-6.yml | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/experiments/selection/P800-6/config/item_gen_P800-6.yml b/experiments/selection/P800-6/config/item_gen_P800-6.yml index 101f07f6..1a7584a2 100644 --- a/experiments/selection/P800-6/config/item_gen_P800-6.yml +++ b/experiments/selection/P800-6/config/item_gen_P800-6.yml @@ -31,11 +31,17 @@ postamble: 1.0 add_low_level_random_noise: true ### File designators, default is "l" for listening lab, "EN" for language, "p06" for exp and "g" for provider -listening_lab: "a" -language: "JP" -exp: "p06" +# listening_lab: "a" +# language: "JP" +# exp: "p06" +# provider: "g" + +listening_lab: "l" +language: "EN" +exp: "p01" provider: "g" + ### Use prefix for all input filenames (default: "") ### l stands for the 'listening_lab' designator, L stands for the 'language', e stands for the 'exp' designator (the number of consecutive letters define the length of the field) use_input_prefix: "lLLeee" @@ -123,7 +129,7 @@ scenes: description: "Elevation displacement." source: ["m1s05.wav", "m1s11.wav"] azimuth: 120 - elevation: "-90:0.5:90" + elevation: "-90:0.3:90" overlap: -0.5 cat1_6: @@ -171,7 +177,7 @@ scenes: description: "Elevation displacement." source: ["f1s04.wav", "f1s10.wav"] azimuth: 60 - elevation: "-90:0.5:90" + elevation: "-90:0.3:90" overlap: -0.5 cat2_5: @@ -219,7 +225,7 @@ scenes: description: "Elevation displacement." source: ["m2s03.wav", "m2s09.wav"] azimuth: 0 - elevation: "-90:0.5:90" + elevation: "-90:0.3:90" overlap: -0.5 cat3_4: @@ -267,7 +273,7 @@ scenes: description: "Elevation displacement." source: ["f2s02.wav", "f2s08.wav"] azimuth: 300 - elevation: 0 + elevation: "-90:0.3:90" overlap: -0.5 cat4_3: @@ -315,7 +321,7 @@ scenes: description: "Elevation displacement." source: ["m3s01.wav", "m3s07.wav"] azimuth: 240 - elevation: "-90:0.5:90" + elevation: "-90:0.3:90" overlap: -0.5 cat5_2: @@ -411,7 +417,7 @@ scenes: description: "Elevation displacement." source: ["f3s06.wav", "f3s12.wav"] azimuth: 180 - elevation: "-90:0.5:90" + elevation: "-90:0.3:90" overlap: -0.5 cat6_7: @@ -419,5 +425,5 @@ scenes: description: "Preliminary: Elevation displacement." source: ["f3s13.wav", "f3s14.wav"] azimuth: 120 - elevation: "-90:0.5:90" + elevation: "-90:0.3:90" overlap: -0.5 -- GitLab From 0966754905ecda68a5d781586accd099c1b39b1f Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Thu, 15 Jun 2023 12:39:03 +0200 Subject: [PATCH 7/9] remove debugging modifications --- .../selection/P800-6/config/item_gen_P800-6.yml | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/experiments/selection/P800-6/config/item_gen_P800-6.yml b/experiments/selection/P800-6/config/item_gen_P800-6.yml index 1a7584a2..c4321aba 100644 --- a/experiments/selection/P800-6/config/item_gen_P800-6.yml +++ b/experiments/selection/P800-6/config/item_gen_P800-6.yml @@ -31,14 +31,9 @@ postamble: 1.0 add_low_level_random_noise: true ### File designators, default is "l" for listening lab, "EN" for language, "p06" for exp and "g" for provider -# listening_lab: "a" -# language: "JP" -# exp: "p06" -# provider: "g" - -listening_lab: "l" -language: "EN" -exp: "p01" +listening_lab: "a" +language: "JP" +exp: "p06" provider: "g" -- GitLab From 3e9a392ce247d4d7d985520084fdc99ed7875001 Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Thu, 15 Jun 2023 14:14:51 +0200 Subject: [PATCH 8/9] correction in azimuth range --- ivas_processing_scripts/generation/process_ism1_items.py | 6 +++--- ivas_processing_scripts/generation/process_ism2_items.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ivas_processing_scripts/generation/process_ism1_items.py b/ivas_processing_scripts/generation/process_ism1_items.py index d6ffc6d7..ee1df7d8 100644 --- a/ivas_processing_scripts/generation/process_ism1_items.py +++ b/ivas_processing_scripts/generation/process_ism1_items.py @@ -272,10 +272,10 @@ def generate_ism1_scene( if ":" in source_azi: # start with the initial azimuth value and apply step N_frames times source_azi = source_azi.split(":") - azi = np.linspace( + azi = np.arange( float(eval(source_azi[0])), - float(eval(source_azi[2])), - N_frames + float(eval(source_azi[0])) + N_frames * float(eval(source_azi[1])), + float(eval(source_azi[1])) ) else: # replicate static azimuth value N_frames times diff --git a/ivas_processing_scripts/generation/process_ism2_items.py b/ivas_processing_scripts/generation/process_ism2_items.py index 9935bc20..4afa730d 100644 --- a/ivas_processing_scripts/generation/process_ism2_items.py +++ b/ivas_processing_scripts/generation/process_ism2_items.py @@ -307,10 +307,10 @@ def generate_ism2_scene( if ":" in source_azi: # start with the initial azimuth value and apply step N_frames times source_azi = source_azi.split(":") - azi = np.linspace( + azi = np.arange( float(eval(source_azi[0])), - float(eval(source_azi[2])), - N_frames + float(eval(source_azi[0])) + N_frames * float(eval(source_azi[1])), + float(eval(source_azi[1])) ) else: # replicate static azimuth value N_frames times -- GitLab From 251368432f0d9976353050da3fae8f9ce00f097e Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Fri, 16 Jun 2023 09:04:14 +0200 Subject: [PATCH 9/9] formatting --- .../generation/process_ism1_items.py | 15 +++++++-------- .../generation/process_ism2_items.py | 8 ++++---- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/ivas_processing_scripts/generation/process_ism1_items.py b/ivas_processing_scripts/generation/process_ism1_items.py index ee1df7d8..6c45ad65 100644 --- a/ivas_processing_scripts/generation/process_ism1_items.py +++ b/ivas_processing_scripts/generation/process_ism1_items.py @@ -260,14 +260,13 @@ def generate_ism1_scene( # superimpose y.audio += noise - # process azimuth and elevation source_azi = scene["azimuth"] source_ele = scene["elevation"] N_frames = int(len(y.audio) / y.fs * 50) - # read azimuth information and convert to an array + # read azimuth information and convert to an array if isinstance(source_azi, str): if ":" in source_azi: # start with the initial azimuth value and apply step N_frames times @@ -275,7 +274,7 @@ def generate_ism1_scene( azi = np.arange( float(eval(source_azi[0])), float(eval(source_azi[0])) + N_frames * float(eval(source_azi[1])), - float(eval(source_azi[1])) + float(eval(source_azi[1])), ) else: # replicate static azimuth value N_frames times @@ -283,7 +282,7 @@ def generate_ism1_scene( else: # replicate static azimuth value N_frames times azi = np.repeat(float(source_azi), N_frames) - + # convert azimuth from 0 .. 360 to -180 .. +180 azi = (azi + 180) % 360 - 180 @@ -302,9 +301,9 @@ def generate_ism1_scene( ele = np.arange( float(eval(source_ele[0])), np.sign(float(eval(source_ele[1]))) * 90, - float(eval(source_ele[1])) + float(eval(source_ele[1])), )[:N_frames] - + # repeat the last elevation value, if array is shorter than N_frames if len(ele) < N_frames: ele = np.append(ele, np.full(N_frames - len(ele), ele[-1])) @@ -314,13 +313,13 @@ def generate_ism1_scene( else: # replicate static elevation value N_frames times ele = np.repeat(float(source_ele), N_frames) - + # check if elevation is from -90 .. +90 if any(ele > 90) or any(ele < -90): logger.error( f"Incorrect value(s) of elevation: {ele[(ele > 90) | (ele < -90)]}" ) - + # arrange all metadata fields column-wise into a matrix y_meta = np.column_stack((azi, ele)) diff --git a/ivas_processing_scripts/generation/process_ism2_items.py b/ivas_processing_scripts/generation/process_ism2_items.py index 4afa730d..2f3fc0c0 100644 --- a/ivas_processing_scripts/generation/process_ism2_items.py +++ b/ivas_processing_scripts/generation/process_ism2_items.py @@ -302,7 +302,7 @@ def generate_ism2_scene( N_frames = int(len(y.audio) / y.fs * 50) - # read azimuth information and convert to an array + # read azimuth information and convert to an array if isinstance(source_azi, str): if ":" in source_azi: # start with the initial azimuth value and apply step N_frames times @@ -310,7 +310,7 @@ def generate_ism2_scene( azi = np.arange( float(eval(source_azi[0])), float(eval(source_azi[0])) + N_frames * float(eval(source_azi[1])), - float(eval(source_azi[1])) + float(eval(source_azi[1])), ) else: # replicate static azimuth value N_frames times @@ -318,7 +318,7 @@ def generate_ism2_scene( else: # replicate static azimuth value N_frames times azi = np.repeat(float(source_azi), N_frames) - + # convert azimuth from 0 .. 360 to -180 .. +180 azi = (azi + 180) % 360 - 180 @@ -337,7 +337,7 @@ def generate_ism2_scene( ele = np.arange( float(eval(source_ele[0])), np.sign(float(eval(source_ele[1]))) * 90, - float(eval(source_ele[1])) + float(eval(source_ele[1])), )[:N_frames] # repeat the last elevation value, if array is shorter than N_frames -- GitLab