From c9fbdb5db4a291e323d8fec6c61cdb3ddaeed7a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20Toftg=C3=A5rd?= <tomas.toftgard@ericsson.com>
Date: Fri, 9 Jun 2023 18:48:19 +0200
Subject: [PATCH 1/9] Align with test plan - correct elevation for some scenes
 - correct assignment of categories (talker/talker pair) - correct the source
 files for preliminaries, to correspond to test plan categories - metadata to
 cover the whole samples

---
 .../P800-6/config/item_gen_P800-6.yml         | 256 +++++++++---------
 .../P800-7/config/item_gen_P800-7.yml         | 256 +++++++++---------
 .../generation/process_ism1_items.py          | 109 +++-----
 .../generation/process_ism2_items.py          | 203 ++++++--------
 4 files changed, 379 insertions(+), 445 deletions(-)

diff --git a/experiments/selection/P800-6/config/item_gen_P800-6.yml b/experiments/selection/P800-6/config/item_gen_P800-6.yml
index 1cddf5bc..844032e5 100644
--- a/experiments/selection/P800-6/config/item_gen_P800-6.yml
+++ b/experiments/selection/P800-6/config/item_gen_P800-6.yml
@@ -31,8 +31,8 @@ postamble: 1.0
 add_low_level_random_noise: true
 
 ### File designators, default is "l" for listening lab, "EN" for language, "p06" for exp and "g" for provider
-listening_lab: "l"
-language: "EN"
+listening_lab: "a"
+language: "JP"
 exp: "p06"
 provider: "g"
 
@@ -86,339 +86,339 @@ use_output_prefix: "leee"
 
 
 scenes:
-    cat1_1: 
-        name: "cat1/a1s01.wav"
+    a1: 
+        name: "a1s01"
         description: "Talker sitting at a table"
         source: ["m1s01.wav", "m1s07.wav"]
         azimuth: 0 
         elevation: 0 
-        overlap: -1.0
+        overlap: -0.5
         
-    cat1_2:
-        name: "cat1/a1s02.wav"
+    a2: 
+        name: "a6s02"
         description: "Talker sitting at a table"
         source: ["f3s02.wav", "f3s08.wav"]
         azimuth: 60 
         elevation: 0 
-        overlap: -1.0
+        overlap: -0.5
         
-    cat1_3:
-        name: "cat1/a1s03.wav"
+    a3: 
+        name: "a5s03"
         description: "Talker sitting at a table"
         source: ["m3s03.wav", "m3s09.wav"]
         azimuth: 120 
         elevation: 0 
-        overlap: -1.0
+        overlap: -0.5
 
-    cat1_4:
-        name: "cat1/a1s04.wav"
+    a4: 
+        name: "a4s04"
         description: "Talker sitting at a table"
         source: ["f2s04.wav", "f2s10.wav"]
         azimuth: 180 
         elevation: 0 
-        overlap: -1.0
+        overlap: -0.5
 
-    cat1_5:
-        name: "cat1/a1s05.wav"
+    a5: 
+        name: "a3s05"
         description: "Talker sitting at a table"
         source: ["m2s05.wav", "m2s11.wav"]
         azimuth: 240 
         elevation: 0 
-        overlap: -1.0
+        overlap: -0.5
 
-    cat1_6:
-        name: "cat1/a1s06.wav"
+    a6: 
+        name: "a2s06"
         description: "Talker sitting at a table"
         source: ["f1s06.wav", "f1s12.wav"]
         azimuth: 300 
         elevation: 0 
-        overlap: -1.0
+        overlap: -0.5
 
-    cat1_7: 
-        name: "cat1/a1s07.wav"
+    a7: 
+        name: "a2s07"
         description: "Preliminary: Talker sitting at a table"
         source: ["f1s13.wav", "f1s14.wav"]
         azimuth: 0 
         elevation: 0 
-        overlap: -1.0
+        overlap: -0.5
 
-    cat2_1:
-        name: "cat2/a2s01.wav"
+    b1: 
+        name: "a2s01"
         description: "Standing talker."
         source: ["f1s01.wav", "f1s07.wav"]
         azimuth: 120 
         elevation: 35 
-        overlap: -1.0
+        overlap: -0.5
  
-    cat2_2:
-        name: "cat2/a2s02.wav"
+    b2: 
+        name: "a1s02"
         description: "Standing talker."
         source: ["m1s02.wav", "m1s08.wav"]
         azimuth: 180 
         elevation: 35 
-        overlap: -1.0
+        overlap: -0.5
  
-    cat2_3:
-        name: "cat2/a2s03.wav"
+    b3: 
+        name: "a6s03"
         description: "Standing talker."
         source: ["f3s03.wav", "f3s09.wav"]
         azimuth: 240 
         elevation: 35 
-        overlap: -1.0
+        overlap: -0.5
  
-    cat2_4:
-        name: "cat2/a2s04.wav"
+    b4: 
+        name: "a5s04"
         description: "Standing talker."
         source: ["m3s04.wav", "m3s10.wav"]
         azimuth: 300 
         elevation: 35 
-        overlap: -1.0
+        overlap: -0.5
 
-    cat2_5:
-        name: "cat2/a2s05.wav"
+    b5: 
+        name: "a4s05"
         description: "Standing talker."
         source: ["f2s05.wav", "f2s11.wav"]
         azimuth: 0 
         elevation: 35 
-        overlap: -1.0
+        overlap: -0.5
 
-    cat2_6:
-        name: "cat2/a2s06.wav"
+    b6: 
+        name: "a3s06"
         description: "Standing talker."
         source: ["m2s06.wav", "m2s12.wav"]
         azimuth: 60 
         elevation: 35 
-        overlap: -1.0
+        overlap: -0.5
 
-    cat2_7:
-        name: "cat2/a2s07.wav"
+    b7:
+        name: "a1s07"
         description: "Preliminary: Standing talker."
         source: ["m1s13.wav", "m1s14.wav"]
         azimuth: 180 
         elevation: 35 
-        overlap: -1.0
+        overlap: -0.5
 
-    cat3_1:
-        name: "cat3/a3s01.wav"
+    c1: 
+        name: "a3s01"
         description: "Smaller talker (child) walking around a table."
         source: ["m2s01.wav", "m2s07.wav"]
         azimuth: "0:1:360"
         elevation: 0 
-        overlap: -1.0
+        overlap: -0.5
 
-    cat3_2:
-        name: "cat3/a3s02.wav"
+    c2: 
+        name: "a2s02"
         description: "Smaller talker (child) walking around a table."
         source: ["f1s02.wav", "f1s08.wav"]
         azimuth: "60:1:60+360" 
         elevation: 0 
-        overlap: -1.0
+        overlap: -0.5
   
-    cat3_3:
-        name: "cat3/a3s03.wav"
+    c3: 
+        name: "a1s03"
         description: "Smaller talker (child) walking around a table."
         source: ["m1s03.wav", "m1s09.wav"]
         azimuth: "120:1:120+360" 
         elevation: 0 
-        overlap: -1.0
+        overlap: -0.5
   
-    cat3_4:
-        name: "cat3/a3s04.wav"
+    c4: 
+        name: "a6s04"
         description: "Smaller talker (child) walking around a table."
         source: ["f3s04.wav", "f3s10.wav"]
         azimuth: "180:1:180+360" 
         elevation: 0 
-        overlap: -1.0
+        overlap: -0.5
   
-    cat3_5:
-        name: "cat3/a3s05.wav"
+    c5: 
+        name: "a5s05"
         description: "Smaller talker (child) walking around a table."
         source: ["m3s05.wav", "m3s11.wav"]
         azimuth: "240:1:240+360"
         elevation: 0 
-        overlap: -1.0
+        overlap: -0.5
   
-    cat3_6:
-        name: "cat3/a3s06.wav"
+    c6: 
+        name: "a4s06"
         description: "Smaller talker (child) walking around a table."
         source: ["f2s06.wav", "f2s12.wav"]
         azimuth: "300:1:300+360" 
         elevation: 0 
-        overlap: -1.0
+        overlap: -0.5
  
-    cat3_7:
-        name: "cat3/a3s07.wav"
+    c7:
+        name: "a4s07"
         description: "Preliminary: Smaller talker (child) walking around a table."
         source: ["f2s13.wav", "f2s14.wav"]
         azimuth: "120:1:120+360" 
         elevation: 0 
-        overlap: -1.0
+        overlap: -0.5
  
-    cat4_1:
-        name: "cat4/a4s01.wav"
+    d1: 
+        name: "a4s01"
         description: "Talker walking around the table."
         source: ["f2s01.wav", "f2s07.wav"]
         azimuth: "0:-1:-360"
         elevation: 35 
-        overlap: -1.0
+        overlap: -0.5
         
-    cat4_2:
-        name: "cat4/a4s02.wav"
+    d2: 
+        name: "a3s02"
         description: "Talker walking around the table."
         source: ["m2s02.wav", "m2s08.wav"]
         azimuth: "60:-1:60-360" 
         elevation: 35 
-        overlap: -1.0
+        overlap: -0.5
         
-    cat4_3:
-        name: "cat4/a4s03.wav"
+    d3: 
+        name: "a2s03"
         description: "Talker walking around the table."
         source: ["f1s03.wav", "f1s09.wav"]
         azimuth: "120:-1:120-360" 
         elevation: 35 
-        overlap: -1.0
+        overlap: -0.5
  
-    cat4_4:
-        name: "cat4/a4s04.wav"
+    d4: 
+        name: "a1s04"
         description: "Talker walking around the table."
         source: ["m1s04.wav", "m1s10.wav"]
         azimuth: "180:-1:180-360" 
         elevation: 35 
-        overlap: -1.0
+        overlap: -0.5
  
-    cat4_5:
-        name: "cat4/a4s05.wav"
+    d5: 
+        name: "a6s05"
         description: "Talker walking around the table."
         source: ["f3s05.wav", "f3s11.wav"]
         azimuth: "240:-1:240-360"
         elevation: 35 
-        overlap: -1.0
+        overlap: -0.5
  
-    cat4_6:
-        name: "cat4/a4s06.wav"
+    d6: 
+        name: "a5s06"
         description: "Talker walking around the table."
         source: ["m3s06.wav", "m3s12.wav"]
         azimuth: "300:-1:300-360" 
         elevation: 35
-        overlap: -1.0
+        overlap: -0.5
 
-    cat4_7:
-        name: "cat4/a4s07.wav"
+    d7:
+        name: "a3s07"
         description: "Preliminary: Talker walking around the table."
         source: ["m2s13.wav", "m2s14.wav"]
         azimuth: "180:-1:180-360" 
         elevation: 35 
-        overlap: -1.0
+        overlap: -0.5
  
-    cat5_1:
-        name: "cat5/a5s01.wav"
+    e1: 
+        name: "a5s01"
         description: "Elevation displacement."
         source: ["m3s01.wav", "m3s07.wav"]
         azimuth: 240 
         elevation: "-90:0.5:90" 
-        overlap: -1.0
+        overlap: -0.5
  
-    cat5_2:
-        name: "cat5/a5s02.wav"
+    e2: 
+        name: "a4s02"
         description: "Elevation displacement."
         source: ["f2s02.wav", "f2s08.wav"]
         azimuth: 300 
         elevation: 0 
-        overlap: -1.0
+        overlap: -0.5
         
-    cat5_3:
-        name: "cat5/a5s03.wav"
+    e3: 
+        name: "a3s03"
         description: "Elevation displacement."
         source: ["m2s03.wav", "m2s09.wav"]
         azimuth: 0 
         elevation: "-90:0.5:90"  
-        overlap: -1.0
+        overlap: -0.5
   
-    cat5_4:
-        name: "cat5/a5s04.wav"
+    e4: 
+        name: "a2s04"
         description: "Elevation displacement."
         source: ["f1s04.wav", "f1s10.wav"]
         azimuth: 60 
         elevation: "-90:0.5:90"  
-        overlap: -1.0
+        overlap: -0.5
   
-    cat5_5:
-        name: "cat5/a5s05.wav"
+    e5: 
+        name: "a1s05"
         description: "Elevation displacement."
         source: ["m1s05.wav", "m1s11.wav"]
         azimuth: 120 
         elevation: "-90:0.5:90"  
-        overlap: -1.0
+        overlap: -0.5
   
-    cat5_6:
-        name: "cat5/a5s06.wav"
+    e6: 
+        name: "a6s06"
         description: "Elevation displacement."
         source: ["f3s06.wav", "f3s12.wav"]
         azimuth: 180 
         elevation: "-90:0.5:90"  
-        overlap: -1.0
+        overlap: -0.5
  
-    cat5_7:
-        name: "cat5/a5s07.wav"
+    e7:
+        name: "a6s07"
         description: "Preliminary: Elevation displacement."
         source: ["f3s13.wav", "f3s14.wav"]
         azimuth: 120 
         elevation: "-90:0.5:90"  
-        overlap: -1.0
+        overlap: -0.5
  
-    cat6_1:
-        name: "cat6/a6s01.wav"
+    f1: 
+        name: "a6s01"
         description: "Azimuth and elevation displacement."
         source: ["f3s01.wav", "f3s07.wav"]
         azimuth: "60:0.5:60+180" 
         elevation: "35:-0.2:-35"
-        overlap: -1.0
+        overlap: -0.5
  
-    cat6_2:
-        name: "cat6/a6s02.wav"
+    f2: 
+        name: "a5s02"
         description: "Azimuth and elevation displacement."
         source: ["m3s02.wav", "m3s08.wav"]
         azimuth: "120:0.5:120+180" 
         elevation: "35:-0.2:-35" 
-        overlap: -1.0
+        overlap: -0.5
   
-    cat6_3:
-        name: "cat6/a6s03.wav"
+    f3: 
+        name: "a4s03"
         description: "Azimuth and elevation displacement."
         source: ["f2s03.wav", "f2s09.wav"]
         azimuth: "180:0.5:180+180" 
         elevation: "35:-0.2:-35" 
-        overlap: -1.0
+        overlap: -0.5
   
-    cat6_4:
-        name: "cat6/a6s04.wav"
+    f4: 
+        name: "a3s04"
         description: "Azimuth and elevation displacement."
         source: ["m2s04.wav", "m2s10.wav"]
         azimuth: "240:0.5:240+180" 
         elevation: "35:-0.2:-35"
-        overlap: -1.0
+        overlap: -0.5
   
-    cat6_5:
-        name: "cat6/a6s05.wav"
+    f5: 
+        name: "a2s05"
         description: "Azimuth and elevation displacement."
         source: ["f1s05.wav", "f1s11.wav"]
         azimuth: "300:0.5:300+180" 
         elevation: "35:-0.2:-35" 
-        overlap: -1.0
+        overlap: -0.5
   
-    cat6_6:
-        name: "cat6/a6s06.wav"
+    f6: 
+        name: "a1s06"
         description: "Azimuth and elevation displacement."
         source: ["m1s06.wav", "m1s12.wav"]
         azimuth: "0:0.5:0+180" 
         elevation: "35:-0.2:-35" 
-        overlap: -1.0
+        overlap: -0.5
 
-    cat6_7:
-        name: "cat6/a6s07.wav"
+    f7:
+        name: "a5s07"
         description: "Preliminary: Azimuth and elevation displacement."
         source: ["m3s13.wav", "m3s14.wav"]
         azimuth: "0:0.5:0+180" 
         elevation: "35:-0.2:-35" 
-        overlap: -1.0
+        overlap: -0.5
   
\ No newline at end of file
diff --git a/experiments/selection/P800-7/config/item_gen_P800-7.yml b/experiments/selection/P800-7/config/item_gen_P800-7.yml
index 6a5f7a2e..54a8695e 100644
--- a/experiments/selection/P800-7/config/item_gen_P800-7.yml
+++ b/experiments/selection/P800-7/config/item_gen_P800-7.yml
@@ -31,8 +31,8 @@ postamble: 1.0
 add_low_level_random_noise: true
 
 ### File designators, default is "l" for listening lab, "EN" for language, "p07" for exp and "g" for provider
-listening_lab: "l"
-language: "EN"
+listening_lab: "a"
+language: "DK"
 exp: "p07"
 provider: "g"
 
@@ -85,339 +85,339 @@ use_output_prefix: "leee"
 ###   o stands for the object number; 0, 1, 2, 3
 
 scenes:
-    cat1_1: 
-        name: "cat1/a1s01.wav"
+    a1: 
+        name: "a1s01"
         description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
         source: ["m1s01.wav", "f1s01.wav"]
         azimuth: [0, 50]
         elevation: [0, 0]
         overlap: -1.0
         
-    cat1_2:
-        name: "cat1/a1s02.wav"
+    a2: 
+        name: "a6s02"
         description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
         source: ["f3s08.wav", "m1s08.wav"]
         azimuth: [50, 350]
         elevation: [0, 0]
         overlap: -1.0
         
-    cat1_3:
-        name: "cat1/a1s03.wav"
+    a3: 
+        name: "a5s03"
         description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
         source: ["f2s09.wav", "m3s09.wav"]
         azimuth: [40, 290]
         elevation: [0, 0]
         overlap: -1.0
 
-    cat1_4:
-        name: "cat1/a1s04.wav"
+    a4: 
+        name: "a4s04"
         description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
         source: ["f1s10.wav", "m2s10.wav"]
         azimuth: [30, 230]
-        elevation: [15, 15]
+        elevation: [0, 0]
         overlap: -1.0
 
-    cat1_5:
-        name: "cat1/a1s05.wav"
+    a5: 
+        name: "a3s05"
         description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
         source: ["m3s05.wav", "f3s05.wav"]
         azimuth: [20, 170]
-        elevation: [15, 15]
+        elevation: [0, 0]
         overlap: -1.0
 
-    cat1_6:
-        name: "cat1/a1s06.wav"
+    a6: 
+        name: "a2s06"
         description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
         source: ["m2s06.wav", "f2s06.wav"]
         azimuth: [10, 110]
-        elevation: [15, 15]
+        elevation: [0, 0]
         overlap: -1.0
 
-    cat1_7: 
-        name: "cat1/a1s07.wav"
+    a7: 
+        name: "a1s07"
         description: "Preliminary: Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
         source: ["m1s13.wav", "f1s13.wav"]
         azimuth: [0, 50]
         elevation: [0, 0]
         overlap: -1.0
 
-    cat2_1:
-        name: "cat2/a2s01.wav"
+    b1: 
+        name: "a2s01"
         description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
         source: ["m2s01.wav", "f2s01.wav"]
         azimuth: [20, 170]
-        elevation: [30, 30]
+        elevation: [35, 35]
         overlap: 1.0
  
-    cat2_2:
-        name: "cat2/a2s02.wav"
+    b2: 
+        name: "a1s02"
         description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
         source: ["m1s02.wav", "f1s02.wav"]
         azimuth: [10, 110]
-        elevation: [30, 30]
+        elevation: [35, 35]
         overlap: 1.0
  
-    cat2_3:
-        name: "cat2/a2s03.wav"
+    b3: 
+        name: "a6s03"
         description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
         source: ["f3s09.wav", "m1s09.wav"]
         azimuth: [0, 50]
-        elevation: [30, 30]
+        elevation: [35, 35]
         overlap: 1.0
  
-    cat2_4:
-        name: "cat2/a2s04.wav"
+    b4: 
+        name: "a5s04"
         description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
         source: ["f2s10.wav", "m3s10.wav"]
         azimuth: [50, 350]
-        elevation: [60, 60]
+        elevation: [35, 35]
         overlap: 1.0 
 
-    cat2_5:
-        name: "cat2/a2s05.wav"
+    b5: 
+        name: "a4s05"
         description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
         source: ["f1s11.wav", "m2s11.wav"]
         azimuth: [40, 290]
-        elevation: [60, 60]
+        elevation: [35, 35]
         overlap: 1.0 
 
-    cat2_6:
-        name: "cat2/a2s06.wav"
+    b6: 
+        name: "a3s06"
         description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
         source: ["m3s06.wav", "f3s06.wav"]
         azimuth: [30, 230]
-        elevation: [60, 60]
+        elevation: [35, 35]
         overlap: 1.0 
 
-    cat2_7:
-        name: "cat2/a2s07.wav"
+    b7:
+        name: "a2s07"
         description: "Preliminary: Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
-        source: ["f2s13.wav", "m2s13.wav"]
+        source: ["m2s13.wav", "f2s13.wav"]
         azimuth: [10, 110]
-        elevation: [30, 30]
+        elevation: [35, 35]
         overlap: 1.0
 
-    cat3_1:
-        name: "cat3/a3s01.wav"
+    c1: 
+        name: "a3s01"
         description: "One talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
         source: ["m3s01.wav", "f3s01.wav"]
         azimuth: [40, 290]
-        elevation: [0, 60]
+        elevation: [0, 45]
         overlap: -1.0 
 
-    cat3_2:
-        name: "cat3/a3s02.wav"
+    c2: 
+        name: "a2s02"
         description: "One talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
         source: ["m2s02.wav", "f2s02.wav"]
         azimuth: [30, 230]
-        elevation: [0, 60]
+        elevation: [0, 45]
         overlap: -1.0 
   
-    cat3_3:
-        name: "cat3/a3s03.wav"
+    c3: 
+        name: "a1s03"
         description: "One talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
         source: ["m1s03.wav", "f1s03.wav"]
         azimuth: [20, 170]
-        elevation: [0, 60]
+        elevation: [0, 45]
         overlap: -1.0   
   
-    cat3_4:
-        name: "cat3/a3s04.wav"
+    c4: 
+        name: "a6s04"
         description: "One talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
         source: ["f3s10.wav", "m1s10.wav"]
         azimuth: [10, 110]
-        elevation: [0, 60]
+        elevation: [0, 45]
         overlap: -1.0     
   
-    cat3_5:
-        name: "cat3/a3s05.wav"
+    c5: 
+        name: "a5s05"
         description: "One talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
         source: ["f2s11.wav", "m3s11.wav"]
         azimuth: [0, 50]
-        elevation: [0, 60]
+        elevation: [0, 45]
         overlap: -1.0     
   
-    cat3_6:
-        name: "cat3/a3s06.wav"
+    c6: 
+        name: "a4s06"
         description: "One talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
         source: ["f1s12.wav", "m2s12.wav"]
         azimuth: [50, 350]
         elevation: [0, 60]
         overlap: -1.0      
  
-    cat3_7:
-        name: "cat3/a3s07.wav"
+    c7:
+        name: "a3s07"
         description: "Preliminary: One talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
         source: ["m3s13.wav", "f3s13.wav"]
         azimuth: [20, 170]
         elevation: [0, 60]
         overlap: -1.0   
  
-    cat4_1:
-        name: "cat4/a4s01.wav"
+    d1: 
+        name: "a4s01"
         description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
         source: ["f1s07.wav", "m2s07.wav"]
         azimuth: [50, "180:1:120 + 360"]
-        elevation: [0, 60]
-        overlap: 1.0   
+        elevation: [0, 45]
+        overlap: 1.0 
         
-    cat4_2:
-        name: "cat4/a4s02.wav"
+    d2: 
+        name: "a3s02"
         description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
         source: ["m3s02.wav", "f3s02.wav"]
         azimuth: [300, "-70:-1:-10 - 360"]
-        elevation: [0, 60]
+        elevation: [0, 45]
         overlap: 1.0   
         
-    cat4_3:
-        name: "cat4/a4s03.wav"
+    d3: 
+        name: "a2s03"
         description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
         source: ["m2s03.wav", "f2s03.wav"]
         azimuth: [250, "-20:-1:-320"]
-        elevation: [0, 60]
+        elevation: [0, 45]
         overlap: 1.0          
  
-    cat4_4:
-        name: "cat4/a4s04.wav"
+    d4: 
+        name: "a1s04"
         description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
         source: ["m1s04.wav", "f1s04.wav"]
         azimuth: [200, "30:-1:-270"]
-        elevation: [0, 60]
+        elevation: [0, 45]
         overlap: 1.0  
  
-    cat4_5:
-        name: "cat4/a4s05.wav"
+    d5: 
+        name: "a6s05"
         description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
         source: ["f3s11.wav", "m1s11.wav"]
         azimuth: [150, "80:1:20 + 360"]
-        elevation: [0, 60]
+        elevation: [0, 45]
         overlap: 1.0   
  
-    cat4_6:
-        name: "cat4/a4s06.wav"
+    d6: 
+        name: "a5s06"
         description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
         source: ["f2s12.wav", "m3s12.wav"]
         azimuth: [100, "130:1:70 + 360"]
-        elevation: [0, 60]
+        elevation: [0, 45]
         overlap: 1.0   
  
-    cat4_7:
-        name: "cat4/a4s07.wav"
+    d7:
+        name: "a4s07"
         description: "Preliminary: One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
-        source: ["f1s14.wav", "m1s14.wav"]
+        source: ["f1s14.wav", "m2s14.wav"]
         azimuth: [200, "30:-1:-270"]
-        elevation: [0, 60]
+        elevation: [0, 45]
         overlap: 1.0  
  
-    cat5_1:
-        name: "cat5/a5s01.wav"
+    e1: 
+        name: "a5s01"
         description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances"
         source: ["f2s07.wav", "m3s07.wav"]
         azimuth: ["80:1:20 + 360", "80:1:20 + 360"]
-        elevation: [10, 60]
+        elevation: [45, 45]
         overlap: 1.0
  
-    cat5_2:
-        name: "cat5/a5s02.wav"
+    e2: 
+        name: "a4s02"
         description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances"
         source: ["f1s08.wav", "m2s08.wav"]
         azimuth: ["130:1:70 + 360", "130:1:70 + 360"]
-        elevation: [10, 60]
+        elevation: [45, 45]
         overlap: 1.0    
         
-    cat5_3:
-        name: "cat5/a5s03.wav"
+    e3: 
+        name: "a3s03"
         description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances"
         source: ["m3s03.wav", "f3s03.wav"]
         azimuth: ["180:1:120 + 360", "180:1:120 + 360"]
-        elevation: [10, 60]
+        elevation: [45, 45]
         overlap: 1.0            
   
-    cat5_4:
-        name: "cat5/a5s04.wav"
+    e4: 
+        name: "a2s04"
         description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances"
         source: ["m2s04.wav", "f2s04.wav"]
         azimuth: ["-70:-1:-10 - 360", "-70:-1:-10 - 360"]
-        elevation: [10, 60]
+        elevation: [45, 45]
         overlap: 1.0    
   
-    cat5_5:
-        name: "cat5/a5s05.wav"
+    e5: 
+        name: "a1s05"
         description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances"
         source: ["m1s05.wav", "f1s05.wav"]
         azimuth: ["-20:-1:-320", "-20:-1:-320"]
-        elevation: [10, 60]
+        elevation: [45, 45]
         overlap: 1.0   
   
-    cat5_6:
-        name: "cat5/a5s06.wav"
+    e6: 
+        name: "a6s06"
         description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances"
         source: ["f3s12.wav", "m1s12.wav"]
         azimuth: ["30:-1:-270", "30:-1:-270"]
-        elevation: [10, 60]
+        elevation: [45, 45]
         overlap: 1.0     
  
-    cat5_7:
-        name: "cat5/a5s07.wav"
+    e7:
+        name: "a5s07"
         description: "Preliminary: Two talkers walking side-by-side around the table, ~30% overlapping utterances"
-        source: ["m2s14.wav", "f2s14.wav"]
+        source: ["f2s14.wav", "m3s14.wav"]
         azimuth: ["-20:-1:-320", "-20:-1:-320"]
-        elevation: [10, 60]
+        elevation: [45, 45]
         overlap: 1.0   
  
-    cat6_1:
-        name: "cat6/a6s01.wav"
+    f1: 
+        name: "a6s01"
         description: "Two talkers walking around the table in opposite directions, non-overlapping utterances."
         source: ["f3s07.wav", "m1s07.wav"]
         azimuth: ["60:1:0 + 360", "60:-1:120 - 360"]
-        elevation: [20, 50]
-        overlap: -1.0    
+        elevation: [30, 30]
+        overlap: -1.0   
  
-    cat6_2:
-        name: "cat6/a6s02.wav"
+    f2: 
+        name: "a5s02"
         description: "Two talkers walking around the table in opposite directions, non-overlapping utterances."
         source: ["f2s08.wav", "m3s08.wav"]
         azimuth: ["0:1:300", "0:-1:60 - 360"]
-        elevation: [20, 50]
+        elevation: [30, 30]
         overlap: -1.0   
   
-    cat6_3:
-        name: "cat6/a6s03.wav"
+    f3: 
+        name: "a4s03"
         description: "Two talkers walking around the table in opposite directions, non-overlapping utterances."
         source: ["f1s09.wav", "m2s09.wav"]
         azimuth: ["300:1:240 + 360", "300:-1:0"]
-        elevation: [20, 50]
+        elevation: [30, 30]
         overlap: -1.0     
   
-    cat6_4:
-        name: "cat6/a6s04.wav"
+    f4: 
+        name: "a3s04"
         description: "Two talkers walking around the table in opposite directions, non-overlapping utterances."
         source: ["m3s04.wav", "f3s04.wav"]
         azimuth: ["240:1:180 + 360", "240:-1:-60"]
-        elevation: [20, 50]
+        elevation: [30, 30]
         overlap: -1.0  
   
-    cat6_5:
-        name: "cat6/a6s05.wav"
+    f5: 
+        name: "a2s05"
         description: "Two talkers walking around the table in opposite directions, non-overlapping utterances."
         source: ["m2s05.wav", "f2s05.wav"]
         azimuth: ["180:1:120 + 360", "180:-1:-120"]
-        elevation: [20, 50]
+        elevation: [30, 30]
         overlap: -1.0    
   
-    cat6_6:
-        name: "cat6/a6s06.wav"
+    f6: 
+        name: "a1s06"
         description: "Two talkers walking around the table in opposite directions, non-overlapping utterances."
         source: ["m1s06.wav", "f1s06.wav"]
         azimuth: ["120:1:60 + 360", "120:-1:180 - 360"]
-        elevation: [20, 50]
+        elevation: [30, 30]
         overlap: -1.0      
   
-    cat6_7:
-        name: "cat6/a6s07.wav"
+    f7:
+        name: "a6s07"
         description: "Preliminary: Two talkers walking around the table in opposite directions, non-overlapping utterances."
-        source: ["f3s14.wav", "m3s14.wav"]
+        source: ["f3s14.wav", "m1s14.wav"]
         azimuth: ["120:1:60 + 360", "120:-1:180 - 360"]
-        elevation: [20, 50]
+        elevation: [30, 30]
         overlap: -1.0      
   
\ No newline at end of file
diff --git a/ivas_processing_scripts/generation/process_ism1_items.py b/ivas_processing_scripts/generation/process_ism1_items.py
index f6d14b1c..55791a19 100644
--- a/ivas_processing_scripts/generation/process_ism1_items.py
+++ b/ivas_processing_scripts/generation/process_ism1_items.py
@@ -234,6 +234,33 @@ def generate_ism1_scene(
             y.audio.resize(x.audio.shape, refcheck=False)
             y.audio += x.audio
 
+    # append pre-amble and post-amble to all sources
+    if cfg.preamble != 0.0:
+        # ensure that pre-amble is a multiple of 20ms
+        N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs)
+
+        # insert all-zero preamble to all sources
+        pre = np.zeros((N_pre, y.audio.shape[1]))
+        y.audio = np.concatenate([pre, y.audio])
+
+    if cfg.postamble != 0.0:
+        # ensure that post-amble is a multiple of 20ms
+        N_post = int(floor(cfg.postamble * 50) / 50 * y.fs)
+
+        # append all-zero postamble to all sources
+        post = np.zeros((N_post, y.audio.shape[1]))
+        y.audio = np.concatenate([y.audio, post])
+
+    # add random noise
+    if cfg.add_low_level_random_noise:
+        # create uniformly distributed noise between -4 and 4
+        np.random.seed(SEED_RANDOM_NOISE)
+        noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float")
+
+        # superimpose
+        y.audio += noise
+
+
     # process azimuth and elevation
     source_azi = scene["azimuth"]
     source_ele = scene["elevation"]
@@ -244,22 +271,18 @@ def generate_ism1_scene(
     if isinstance(source_azi, str):
         if ":" in source_azi:
             source_azi = source_azi.split(":")
-            azi = np.arange(
+            azi = np.linspace(
                 float(eval(source_azi[0])),
                 float(eval(source_azi[2])),
-                float(eval(source_azi[1])),
+                N_frames
             )
         else:
-            azi = np.array(float(eval(source_azi)), ndmin=1)[:N_frames]
+            azi = np.array(float(eval(source_azi)), ndmin=1)
     else:
         azi = np.array(source_azi, ndmin=1)[:N_frames]
-
-    # ensure that azimuth array has N_frames values
-    if len(azi) > N_frames:
-        # cut the array of azimuth values
-        azi = azi[:N_frames]
-    elif len(azi) < N_frames:
-        # replicate the last azimuth value
+    
+    if len(azi) < N_frames:
+        # replicate the last elevation
         azi = np.append(azi, np.full(N_frames - len(azi), azi[-1]))
 
     # convert azimuth from 0 .. 360 to -180 .. +180
@@ -275,21 +298,17 @@ def generate_ism1_scene(
     if isinstance(source_ele, str):
         if ":" in source_ele:
             source_ele = source_ele.split(":")
-            ele = np.arange(
+            ele = np.linspace(
                 float(eval(source_ele[0])),
                 float(eval(source_ele[2])),
-                float(eval(source_ele[1])),
+                N_frames
             )
         else:
-            ele = np.array(float(eval(source_ele)), ndmin=1)[:N_frames]
+            ele = np.array(float(eval(source_ele)), ndmin=1)
     else:
         ele = np.array(source_ele, ndmin=1)[:N_frames]
-
-    # ensure that elevation array has N_frames values
-    if len(ele) > N_frames:
-        # cut the array of elevation values
-        ele = ele[:N_frames]
-    elif len(ele) < N_frames:
+    
+    if len(ele) < N_frames:
         # replicate the last elevation
         ele = np.append(ele, np.full(N_frames - len(ele), ele[-1]))
 
@@ -298,57 +317,15 @@ def generate_ism1_scene(
         logger.error(
             f"Incorrect value(s) of elevation: {ele[(ele > 90) | (ele < -90)]}"
         )
-
-    # additional metadata (default values)
-    radius = np.ones(N_frames)
-    spread = np.zeros(N_frames)
-    gain = np.ones(N_frames)
-
     # arrange all metadata fields column-wise into a matrix
-    y_meta = np.column_stack((azi, ele, radius, spread, gain))
-
-    # append pre-amble and post-amble to all sources
-    if cfg.preamble != 0.0:
-        # ensure that pre-amble is a multiple of 20ms
-        N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs)
-
-        # insert all-zero preamble to all sources
-        pre = np.zeros((N_pre, y.audio.shape[1]))
-        y.audio = np.concatenate([pre, y.audio])
-
-        # insert neutral position as a pre-amble to all sources
-        N_pre = int(N_pre / frame_len)
-        pre = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (N_pre, 1))
-        y_meta = np.concatenate([pre, y_meta], axis=0)
-
-    if cfg.postamble != 0.0:
-        # ensure that post-amble is a multiple of 20ms
-        N_post = int(floor(cfg.postamble * 50) / 50 * y.fs)
-
-        # append all-zero postamble to all sources
-        post = np.zeros((N_post, y.audio.shape[1]))
-        y.audio = np.concatenate([y.audio, post])
-
-        # append neutral position as a post-amble to all sources
-        N_post = int(N_post / frame_len)
-        post = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (N_post, 1))
-        y_meta = np.concatenate([y_meta, post], axis=0)
-
-    # add random noise
-    if cfg.add_low_level_random_noise:
-        # create uniformly distributed noise between -4 and 4
-        np.random.seed(SEED_RANDOM_NOISE)
-        noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float")
-
-        # superimpose
-        y.audio += noise
+    y_meta = np.column_stack((azi, ele))
 
     # write ISM audio stream to the output file
     audiofile.write(
         os.path.join(
             cfg.output_path,
-            os.path.dirname(scene["name"]),
-            cfg.use_output_prefix + os.path.basename(scene["name"]),
+            "cat"+scene["name"][1],
+            cfg.use_output_prefix + os.path.basename(scene["name"]+".wav"),
         ),
         y.audio,
         y.fs,
@@ -357,8 +334,8 @@ def generate_ism1_scene(
     # write ISM metadata to the output file in .0.csv format
     csv_filename = os.path.join(
         cfg.output_path,
-        os.path.dirname(scene["name"]),
-        cfg.use_output_prefix + os.path.basename(scene["name"]) + ".0.csv",
+        "cat"+scene["name"][1],
+        cfg.use_output_prefix + os.path.basename(scene["name"]) + ".wav.0.csv",
     )
 
     with open(
diff --git a/ivas_processing_scripts/generation/process_ism2_items.py b/ivas_processing_scripts/generation/process_ism2_items.py
index c1c09645..4a1c100a 100644
--- a/ivas_processing_scripts/generation/process_ism2_items.py
+++ b/ivas_processing_scripts/generation/process_ism2_items.py
@@ -215,73 +215,6 @@ def generate_ism2_scene(
         _, scale_factor, _ = get_loudness(x, cfg.loudness, "MONO")
         x.audio *= scale_factor
 
-        # read azimuth information and create array
-        if isinstance(source_azi, str):
-            if ":" in source_azi:
-                source_azi = source_azi.split(":")
-                azi = np.arange(
-                    float(eval(source_azi[0])),
-                    float(eval(source_azi[2])),
-                    float(eval(source_azi[1])),
-                )
-            else:
-                azi = np.array(float(eval(source_azi)), ndmin=1)[:N_frames]
-        else:
-            azi = np.array(source_azi, ndmin=1)[:N_frames]
-
-        # ensure that azimuth array has N_frames values
-        if len(azi) > N_frames:
-            # cut the array of azimuth values
-            azi = azi[:N_frames]
-        elif len(azi) < N_frames:
-            # replicate the last azimuth
-            azi = np.append(azi, np.full(N_frames - len(azi), azi[-1]))
-
-        # convert azimuth from 0 .. 360 to -180 .. +180
-        azi = (azi + 180) % 360 - 180
-
-        # check if azimuth is from -180 .. +180
-        if any(azi > 180) or any(azi < -180):
-            logger.error(
-                f"Incorrect value(s) of azimuth: {azi[(azi > 180) | (azi < -180)]}"
-            )
-
-        # read elevation information and create array
-        if isinstance(source_ele, str):
-            if ":" in source_ele:
-                source_ele = source_ele.split(":")
-                ele = np.arange(
-                    float(eval(source_ele[0])),
-                    float(eval(source_ele[2])),
-                    float(eval(source_ele[1])),
-                )
-            else:
-                ele = np.array(float(eval(source_ele)), ndmin=1)[:N_frames]
-        else:
-            ele = np.array(source_ele, ndmin=1)[:N_frames]
-
-        # ensure that elevation array has N_frames values
-        if len(ele) > N_frames:
-            # cut the array of elevation values
-            ele = ele[:N_frames]
-        elif len(ele) < N_frames:
-            # replicate the last elevation
-            ele = np.append(ele, np.full(N_frames - len(ele), ele[-1]))
-
-        # check if elevation is from -90 .. +90
-        if any(ele > 90) or any(ele < -90):
-            logger.error(
-                f"Incorrect value(s) of elevation: {ele[(ele > 90) | (ele < -90)]}"
-            )
-
-        # additional metadata (default values)
-        radius = np.ones(N_frames)
-        spread = np.zeros(N_frames)
-        gain = np.ones(N_frames)
-
-        # arrange all metadata fields column-wise into a matrix
-        x_meta = np.column_stack((azi, ele, radius, spread, gain))
-
         # shift the second (and all other) source files (positive shift creates overlap, negative shift creates a gap)
         if i > 0:
             # get the length of the first source file
@@ -294,12 +227,6 @@ def generate_ism2_scene(
             pre = np.zeros((N_delay, x.audio.shape[1]))
             x.audio = np.concatenate([pre, x.audio])
 
-            # insert neutral position as a pre-amble
-            N_delay = int(N_delay / frame_len)
-            # use neutral position for padding
-            pre = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (N_delay, 1))
-            x_meta = np.concatenate([pre, x_meta])
-
         # pad with zeros to ensure that the signal length is a multiple of 20ms
         if len(x.audio) % frame_len != 0:
             # pad the source signal
@@ -307,13 +234,6 @@ def generate_ism2_scene(
             post = np.zeros((N_pad, x.audio.shape[1]))
             x.audio = np.concatenate([x.audio, post])
 
-            # pad the metadata
-            N_pad = int(len(x.audio) / frame_len) - len(x_meta)
-            if N_pad > 0:
-                # use neutral position for padding
-                post = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (N_pad, 1))
-                x_meta = np.concatenate([x_meta, post])
-
         # add source signal to the array of all source signals
         y.fs = x.fs
         if y.audio is None:
@@ -340,35 +260,6 @@ def generate_ism2_scene(
                 )
             y.audio = np.hstack((y.audio, x.audio))
 
-        # add metadata to the array of all metadata
-        # make sure x_meta is a 3d array
-        x_meta = x_meta[np.newaxis, :]
-        if y_meta is None:
-            y_meta = x_meta
-        else:
-            N_srcs = y_meta.shape[0]
-            N_meta_features = y_meta.shape[2]
-
-            # append the last position of the metadata to have equal length of all metadata
-            if x_meta.shape[1] > y_meta.shape[1]:
-                N_delta = x_meta.shape[1] - y_meta.shape[1]
-                # reshape to 2d array
-                y_meta = y_meta.reshape(y_meta.shape[1], -1)
-                # repeat last row N_delta times and append to the array
-                y_meta = np.vstack((y_meta, np.tile(y_meta[-1, :], (N_delta, 1))))
-                # reshape back to 3d array
-                y_meta = y_meta.reshape(N_srcs, -1, N_meta_features)
-            elif y_meta.shape[1] > x_meta.shape[1]:
-                N_delta = y_meta.shape[1] - x_meta.shape[1]
-                # reshape to 2d array
-                x_meta = x_meta.reshape(x_meta.shape[1], -1)
-                # repeat last row N_delta times and append to the array
-                x_meta = np.vstack((x_meta, np.tile(x_meta[-1, :], (N_delta, 1))))
-                # reshape back to 3d array
-                x_meta = np.expand_dims(x_meta, axis=0)
-
-            y_meta = np.concatenate([y_meta, x_meta])
-
     # append pre-amble and post-amble to all sources
     if cfg.preamble != 0.0:
         # ensure that pre-amble is a multiple of 20ms
@@ -378,11 +269,6 @@ def generate_ism2_scene(
         pre = np.zeros((N_pre, y.audio.shape[1]))
         y.audio = np.concatenate([pre, y.audio])
 
-        # insert neutral position as a pre-amble to all sources
-        N_pre = int(N_pre / frame_len)
-        pre = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_pre, 1))
-        y_meta = np.concatenate([pre, y_meta], axis=1)
-
     if cfg.postamble != 0.0:
         # ensure that post-mable is a multiple of 20ms
         N_post = int(floor(cfg.postamble * 50) / 50 * y.fs)
@@ -391,11 +277,6 @@ def generate_ism2_scene(
         post = np.zeros((N_post, y.audio.shape[1]))
         y.audio = np.concatenate([y.audio, post])
 
-        # append neutral position as a post-amble to all sources
-        N_post = int(N_post / frame_len)
-        post = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_post, 1))
-        y_meta = np.concatenate([y_meta, post], axis=1)
-
     # add random noise
     if cfg.add_low_level_random_noise:
         # create uniformly distributed noise between -4 and 4
@@ -405,12 +286,88 @@ def generate_ism2_scene(
         # superimpose
         y.audio += noise
 
+    # create metadata files
+    for i in range(N_sources):
+        # parse metadata parameters from the scene description
+        source_azi = (
+            scene["azimuth"][i]
+            if isinstance(scene["azimuth"], list)
+            else scene["azimuth"]
+        )
+        source_ele = (
+            scene["elevation"][i]
+            if isinstance(scene["elevation"], list)
+            else scene["elevation"]
+        )
+
+        N_frames = int(len(y.audio) / y.fs * 50)
+
+        # read azimuth information and create array
+        if isinstance(source_azi, str):
+            if ":" in source_azi:
+                source_azi = source_azi.split(":")
+                azi = np.linspace(
+                    float(eval(source_azi[0])),
+                    float(eval(source_azi[2])),
+                    N_frames
+                )
+            else:
+                azi = np.array(float(eval(source_azi)), ndmin=1)
+        else:
+            azi = np.array(source_azi, ndmin=1)[:N_frames]
+        
+        if len(azi) < N_frames:
+            # replicate the last elevation
+            azi = np.append(azi, np.full(N_frames - len(azi), azi[-1]))
+            
+        # convert azimuth from 0 .. 360 to -180 .. +180
+        azi = (azi + 180) % 360 - 180
+
+        # check if azimuth is from -180 .. +180
+        if any(azi > 180) or any(azi < -180):
+            logger.error(
+                f"Incorrect value(s) of azimuth: {azi[(azi > 180) | (azi < -180)]}"
+            )
+
+        # read elevation information and create array
+        if isinstance(source_ele, str):
+            if ":" in source_ele:
+                source_ele = source_ele.split(":")
+                ele = np.linspace(
+                    float(eval(source_ele[0])),
+                    float(eval(source_ele[2])),
+                    N_frames
+                )
+            else:
+                ele = np.array(float(eval(source_ele)), ndmin=1)
+        else:
+            ele = np.array(source_ele, ndmin=1)[:N_frames]
+        
+        if len(ele) < N_frames:
+            # replicate the last elevation
+            ele = np.append(ele, np.full(N_frames - len(ele), ele[-1]))
+
+        # check if elevation is from -90 .. +90
+        if any(ele > 90) or any(ele < -90):
+            logger.error(
+                f"Incorrect value(s) of elevation: {ele[(ele > 90) | (ele < -90)]}"
+            )
+
+        # arrange all metadata fields column-wise into a matrix
+        x_meta = np.column_stack((azi, ele))
+
+        x_meta = x_meta[np.newaxis, :]
+        if y_meta is None:
+            y_meta = x_meta
+        else:
+            y_meta = np.concatenate([y_meta, x_meta])
+
     # write individual ISM audio streams to the output file in an interleaved format
     audiofile.write(
         os.path.join(
             cfg.output_path,
-            os.path.dirname(scene["name"]),
-            cfg.use_output_prefix + os.path.basename(scene["name"]),
+            "cat"+scene["name"][1],
+            cfg.use_output_prefix + os.path.basename(scene["name"] + ".wav"),
         ),
         y.audio,
         y.fs,
@@ -421,8 +378,8 @@ def generate_ism2_scene(
         # generate .csv filename (should end with .0.csv, .1.csv, ...)
         csv_filename = os.path.join(
             cfg.output_path,
-            os.path.dirname(scene["name"]),
-            cfg.use_output_prefix + os.path.basename(scene["name"]) + f".{i}.csv",
+            "cat"+scene["name"][1],
+            cfg.use_output_prefix + os.path.basename(scene["name"]) + f".wav.{i}.csv",
         )
 
         with open(
-- 
GitLab


From 45f06fd00f879eb53cf4cf76a29c937dd8a1b112 Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Tue, 13 Jun 2023 11:01:02 +0200
Subject: [PATCH 2/9] correction of categories per talker/talker pairs in ISM1
 and ISM2 tests

---
 .../P800-6/config/item_gen_P800-6.yml         | 493 +++++++++--------
 .../P800-7/config/item_gen_P800-7.yml         | 520 +++++++++---------
 .../generation/process_ism1_items.py          |   4 +-
 .../generation/process_ism2_items.py          |   4 +-
 4 files changed, 510 insertions(+), 511 deletions(-)

diff --git a/experiments/selection/P800-6/config/item_gen_P800-6.yml b/experiments/selection/P800-6/config/item_gen_P800-6.yml
index 844032e5..101f07f6 100644
--- a/experiments/selection/P800-6/config/item_gen_P800-6.yml
+++ b/experiments/selection/P800-6/config/item_gen_P800-6.yml
@@ -86,339 +86,338 @@ use_output_prefix: "leee"
 
 
 scenes:
-    a1: 
-        name: "a1s01"
+    cat1_1: 
+        name: "cat1/a1s01.wav"
         description: "Talker sitting at a table"
         source: ["m1s01.wav", "m1s07.wav"]
         azimuth: 0 
         elevation: 0 
         overlap: -0.5
         
-    a2: 
-        name: "a6s02"
-        description: "Talker sitting at a table"
-        source: ["f3s02.wav", "f3s08.wav"]
-        azimuth: 60 
+    cat1_2: 
+        name: "cat1/a1s02.wav"
+        description: "Standing talker."
+        source: ["m1s02.wav", "m1s08.wav"]
+        azimuth: 180 
+        elevation: 35 
+        overlap: -0.5
+
+    cat1_3: 
+        name: "cat1/a1s03.wav"
+        description: "Smaller talker (child) walking around a table."
+        source: ["m1s03.wav", "m1s09.wav"]
+        azimuth: "120:1:120+360" 
         elevation: 0 
         overlap: -0.5
-        
-    a3: 
-        name: "a5s03"
-        description: "Talker sitting at a table"
-        source: ["m3s03.wav", "m3s09.wav"]
+
+    cat1_4: 
+        name: "cat1/a1s04.wav"
+        description: "Talker walking around the table."
+        source: ["m1s04.wav", "m1s10.wav"]
+        azimuth: "180:-1:180-360" 
+        elevation: 35 
+        overlap: -0.5
+
+    cat1_5: 
+        name: "cat1/a1s05.wav"
+        description: "Elevation displacement."
+        source: ["m1s05.wav", "m1s11.wav"]
         azimuth: 120 
-        elevation: 0 
+        elevation: "-90:0.5:90"  
         overlap: -0.5
 
-    a4: 
-        name: "a4s04"
-        description: "Talker sitting at a table"
-        source: ["f2s04.wav", "f2s10.wav"]
+    cat1_6: 
+        name: "cat1/a1s06.wav"
+        description: "Azimuth and elevation displacement."
+        source: ["m1s06.wav", "m1s12.wav"]
+        azimuth: "0:0.5:0+180" 
+        elevation: "35:-0.2:-35" 
+        overlap: -0.5
+        
+    cat1_7:
+        name: "cat1/a1s07.wav"
+        description: "Preliminary: Standing talker."
+        source: ["m1s13.wav", "m1s14.wav"]
         azimuth: 180 
-        elevation: 0 
+        elevation: 35 
+        overlap: -0.5        
+
+    cat2_1: 
+        name: "cat2/a2s01.wav"
+        description: "Standing talker."
+        source: ["f1s01.wav", "f1s07.wav"]
+        azimuth: 120 
+        elevation: 35 
         overlap: -0.5
 
-    a5: 
-        name: "a3s05"
-        description: "Talker sitting at a table"
-        source: ["m2s05.wav", "m2s11.wav"]
-        azimuth: 240 
+    cat2_2: 
+        name: "cat2/a2s02.wav"
+        description: "Smaller talker (child) walking around a table."
+        source: ["f1s02.wav", "f1s08.wav"]
+        azimuth: "60:1:60+360" 
         elevation: 0 
         overlap: -0.5
 
-    a6: 
-        name: "a2s06"
+    cat2_3: 
+        name: "cat2/a2s03.wav"
+        description: "Talker walking around the table."
+        source: ["f1s03.wav", "f1s09.wav"]
+        azimuth: "120:-1:120-360" 
+        elevation: 35 
+        overlap: -0.5
+
+    cat2_4: 
+        name: "cat2/a2s04.wav"
+        description: "Elevation displacement."
+        source: ["f1s04.wav", "f1s10.wav"]
+        azimuth: 60 
+        elevation: "-90:0.5:90"  
+        overlap: -0.5
+
+    cat2_5: 
+        name: "cat2/a2s05.wav"
+        description: "Azimuth and elevation displacement."
+        source: ["f1s05.wav", "f1s11.wav"]
+        azimuth: "300:0.5:300+180" 
+        elevation: "35:-0.2:-35" 
+        overlap: -0.5
+
+    cat2_6: 
+        name: "cat2/a2s06.wav"
         description: "Talker sitting at a table"
         source: ["f1s06.wav", "f1s12.wav"]
         azimuth: 300 
         elevation: 0 
         overlap: -0.5
-
-    a7: 
-        name: "a2s07"
+        
+    cat2_7: 
+        name: "cat2/a2s07.wav"
         description: "Preliminary: Talker sitting at a table"
         source: ["f1s13.wav", "f1s14.wav"]
         azimuth: 0 
         elevation: 0 
-        overlap: -0.5
+        overlap: -0.5        
 
-    b1: 
-        name: "a2s01"
-        description: "Standing talker."
-        source: ["f1s01.wav", "f1s07.wav"]
-        azimuth: 120 
-        elevation: 35 
+    cat3_1: 
+        name: "cat3/a3s01.wav"
+        description: "Smaller talker (child) walking around a table."
+        source: ["m2s01.wav", "m2s07.wav"]
+        azimuth: "0:1:360"
+        elevation: 0 
         overlap: -0.5
- 
-    b2: 
-        name: "a1s02"
-        description: "Standing talker."
-        source: ["m1s02.wav", "m1s08.wav"]
-        azimuth: 180 
+
+    cat3_2: 
+        name: "cat3/a3s02.wav"
+        description: "Talker walking around the table."
+        source: ["m2s02.wav", "m2s08.wav"]
+        azimuth: "60:-1:60-360" 
         elevation: 35 
         overlap: -0.5
- 
-    b3: 
-        name: "a6s03"
-        description: "Standing talker."
-        source: ["f3s03.wav", "f3s09.wav"]
-        azimuth: 240 
-        elevation: 35 
+
+    cat3_3: 
+        name: "cat3/a3s03.wav"
+        description: "Elevation displacement."
+        source: ["m2s03.wav", "m2s09.wav"]
+        azimuth: 0 
+        elevation: "-90:0.5:90"  
         overlap: -0.5
- 
-    b4: 
-        name: "a5s04"
-        description: "Standing talker."
-        source: ["m3s04.wav", "m3s10.wav"]
-        azimuth: 300 
-        elevation: 35 
+
+    cat3_4: 
+        name: "cat3/a3s04.wav"
+        description: "Azimuth and elevation displacement."
+        source: ["m2s04.wav", "m2s10.wav"]
+        azimuth: "240:0.5:240+180" 
+        elevation: "35:-0.2:-35"
         overlap: -0.5
 
-    b5: 
-        name: "a4s05"
-        description: "Standing talker."
-        source: ["f2s05.wav", "f2s11.wav"]
-        azimuth: 0 
-        elevation: 35 
+    cat3_5: 
+        name: "cat3/a3s05.wav"
+        description: "Talker sitting at a table"
+        source: ["m2s05.wav", "m2s11.wav"]
+        azimuth: 240 
+        elevation: 0 
         overlap: -0.5
 
-    b6: 
-        name: "a3s06"
+    cat3_6: 
+        name: "cat3/a3s06.wav"
         description: "Standing talker."
         source: ["m2s06.wav", "m2s12.wav"]
         azimuth: 60 
         elevation: 35 
         overlap: -0.5
-
-    b7:
-        name: "a1s07"
-        description: "Preliminary: Standing talker."
-        source: ["m1s13.wav", "m1s14.wav"]
-        azimuth: 180 
+        
+    cat3_7:
+        name: "cat3/a3s07.wav"
+        description: "Preliminary: Talker walking around the table."
+        source: ["m2s13.wav", "m2s14.wav"]
+        azimuth: "180:-1:180-360" 
         elevation: 35 
         overlap: -0.5
 
-    c1: 
-        name: "a3s01"
-        description: "Smaller talker (child) walking around a table."
-        source: ["m2s01.wav", "m2s07.wav"]
-        azimuth: "0:1:360"
-        elevation: 0 
+    cat4_1: 
+        name: "cat4/a4s01.wav"
+        description: "Talker walking around the table."
+        source: ["f2s01.wav", "f2s07.wav"]
+        azimuth: "0:-1:-360"
+        elevation: 35 
         overlap: -0.5
 
-    c2: 
-        name: "a2s02"
-        description: "Smaller talker (child) walking around a table."
-        source: ["f1s02.wav", "f1s08.wav"]
-        azimuth: "60:1:60+360" 
+    cat4_2: 
+        name: "cat4/a4s02.wav"
+        description: "Elevation displacement."
+        source: ["f2s02.wav", "f2s08.wav"]
+        azimuth: 300 
         elevation: 0 
         overlap: -0.5
-  
-    c3: 
-        name: "a1s03"
-        description: "Smaller talker (child) walking around a table."
-        source: ["m1s03.wav", "m1s09.wav"]
-        azimuth: "120:1:120+360" 
-        elevation: 0 
+
+    cat4_3: 
+        name: "cat4/a4s03.wav"
+        description: "Azimuth and elevation displacement."
+        source: ["f2s03.wav", "f2s09.wav"]
+        azimuth: "180:0.5:180+180" 
+        elevation: "35:-0.2:-35" 
         overlap: -0.5
-  
-    c4: 
-        name: "a6s04"
-        description: "Smaller talker (child) walking around a table."
-        source: ["f3s04.wav", "f3s10.wav"]
-        azimuth: "180:1:180+360" 
+
+    cat4_4: 
+        name: "cat4/a4s04.wav"
+        description: "Talker sitting at a table"
+        source: ["f2s04.wav", "f2s10.wav"]
+        azimuth: 180 
         elevation: 0 
         overlap: -0.5
-  
-    c5: 
-        name: "a5s05"
-        description: "Smaller talker (child) walking around a table."
-        source: ["m3s05.wav", "m3s11.wav"]
-        azimuth: "240:1:240+360"
-        elevation: 0 
+
+    cat4_5: 
+        name: "cat4/a4s05.wav"
+        description: "Standing talker."
+        source: ["f2s05.wav", "f2s11.wav"]
+        azimuth: 0 
+        elevation: 35 
         overlap: -0.5
-  
-    c6: 
-        name: "a4s06"
+
+    cat4_6: 
+        name: "cat4/a4s06.wav"
         description: "Smaller talker (child) walking around a table."
         source: ["f2s06.wav", "f2s12.wav"]
         azimuth: "300:1:300+360" 
         elevation: 0 
         overlap: -0.5
- 
-    c7:
-        name: "a4s07"
+        
+    cat4_7:
+        name: "cat4/a4s07.wav"
         description: "Preliminary: Smaller talker (child) walking around a table."
         source: ["f2s13.wav", "f2s14.wav"]
         azimuth: "120:1:120+360" 
         elevation: 0 
+        overlap: -0.5        
+
+    cat5_1: 
+        name: "cat5/a5s01.wav"
+        description: "Elevation displacement."
+        source: ["m3s01.wav", "m3s07.wav"]
+        azimuth: 240 
+        elevation: "-90:0.5:90" 
         overlap: -0.5
- 
-    d1: 
-        name: "a4s01"
-        description: "Talker walking around the table."
-        source: ["f2s01.wav", "f2s07.wav"]
-        azimuth: "0:-1:-360"
-        elevation: 35 
-        overlap: -0.5
-        
-    d2: 
-        name: "a3s02"
-        description: "Talker walking around the table."
-        source: ["m2s02.wav", "m2s08.wav"]
-        azimuth: "60:-1:60-360" 
-        elevation: 35 
+
+    cat5_2: 
+        name: "cat5/a5s02.wav"
+        description: "Azimuth and elevation displacement."
+        source: ["m3s02.wav", "m3s08.wav"]
+        azimuth: "120:0.5:120+180" 
+        elevation: "35:-0.2:-35" 
         overlap: -0.5
-        
-    d3: 
-        name: "a2s03"
-        description: "Talker walking around the table."
-        source: ["f1s03.wav", "f1s09.wav"]
-        azimuth: "120:-1:120-360" 
-        elevation: 35 
+
+    cat5_3: 
+        name: "cat5/a5s03.wav"
+        description: "Talker sitting at a table"
+        source: ["m3s03.wav", "m3s09.wav"]
+        azimuth: 120 
+        elevation: 0 
         overlap: -0.5
- 
-    d4: 
-        name: "a1s04"
-        description: "Talker walking around the table."
-        source: ["m1s04.wav", "m1s10.wav"]
-        azimuth: "180:-1:180-360" 
+
+    cat5_4: 
+        name: "cat5/a5s04.wav"
+        description: "Standing talker."
+        source: ["m3s04.wav", "m3s10.wav"]
+        azimuth: 300 
         elevation: 35 
         overlap: -0.5
- 
-    d5: 
-        name: "a6s05"
-        description: "Talker walking around the table."
-        source: ["f3s05.wav", "f3s11.wav"]
-        azimuth: "240:-1:240-360"
-        elevation: 35 
+
+    cat5_5: 
+        name: "cat5/a5s05.wav"
+        description: "Smaller talker (child) walking around a table."
+        source: ["m3s05.wav", "m3s11.wav"]
+        azimuth: "240:1:240+360"
+        elevation: 0 
         overlap: -0.5
- 
-    d6: 
-        name: "a5s06"
+
+    cat5_6: 
+        name: "cat5/a5s06.wav"
         description: "Talker walking around the table."
         source: ["m3s06.wav", "m3s12.wav"]
         azimuth: "300:-1:300-360" 
         elevation: 35
         overlap: -0.5
+        
+    cat5_7:
+        name: "cat5/a5s07.wav"
+        description: "Preliminary: Azimuth and elevation displacement."
+        source: ["m3s13.wav", "m3s14.wav"]
+        azimuth: "0:0.5:0+180" 
+        elevation: "35:-0.2:-35" 
+        overlap: -0.5        
 
-    d7:
-        name: "a3s07"
-        description: "Preliminary: Talker walking around the table."
-        source: ["m2s13.wav", "m2s14.wav"]
-        azimuth: "180:-1:180-360" 
-        elevation: 35 
-        overlap: -0.5
- 
-    e1: 
-        name: "a5s01"
-        description: "Elevation displacement."
-        source: ["m3s01.wav", "m3s07.wav"]
-        azimuth: 240 
-        elevation: "-90:0.5:90" 
+    cat6_1: 
+        name: "cat6/a6s01.wav"
+        description: "Azimuth and elevation displacement."
+        source: ["f3s01.wav", "f3s07.wav"]
+        azimuth: "60:0.5:60+180" 
+        elevation: "35:-0.2:-35"
         overlap: -0.5
- 
-    e2: 
-        name: "a4s02"
-        description: "Elevation displacement."
-        source: ["f2s02.wav", "f2s08.wav"]
-        azimuth: 300 
+        
+    cat6_2: 
+        name: "cat6/a6s02.wav"
+        description: "Talker sitting at a table"
+        source: ["f3s02.wav", "f3s08.wav"]
+        azimuth: 60 
         elevation: 0 
         overlap: -0.5
         
-    e3: 
-        name: "a3s03"
-        description: "Elevation displacement."
-        source: ["m2s03.wav", "m2s09.wav"]
-        azimuth: 0 
-        elevation: "-90:0.5:90"  
+    cat6_3: 
+        name: "cat6/a6s03.wav"
+        description: "Standing talker."
+        source: ["f3s03.wav", "f3s09.wav"]
+        azimuth: 240 
+        elevation: 35 
         overlap: -0.5
-  
-    e4: 
-        name: "a2s04"
-        description: "Elevation displacement."
-        source: ["f1s04.wav", "f1s10.wav"]
-        azimuth: 60 
-        elevation: "-90:0.5:90"  
+
+    cat6_4: 
+        name: "cat6/a6s04.wav"
+        description: "Smaller talker (child) walking around a table."
+        source: ["f3s04.wav", "f3s10.wav"]
+        azimuth: "180:1:180+360" 
+        elevation: 0 
         overlap: -0.5
-  
-    e5: 
-        name: "a1s05"
-        description: "Elevation displacement."
-        source: ["m1s05.wav", "m1s11.wav"]
-        azimuth: 120 
-        elevation: "-90:0.5:90"  
+
+    cat6_5: 
+        name: "cat6/a6s05.wav"
+        description: "Talker walking around the table."
+        source: ["f3s05.wav", "f3s11.wav"]
+        azimuth: "240:-1:240-360"
+        elevation: 35 
         overlap: -0.5
-  
-    e6: 
-        name: "a6s06"
+
+    cat6_6: 
+        name: "cat6/a6s06.wav"
         description: "Elevation displacement."
         source: ["f3s06.wav", "f3s12.wav"]
         azimuth: 180 
         elevation: "-90:0.5:90"  
         overlap: -0.5
- 
-    e7:
-        name: "a6s07"
+
+    cat6_7:
+        name: "cat6/a6s07.wav"
         description: "Preliminary: Elevation displacement."
         source: ["f3s13.wav", "f3s14.wav"]
         azimuth: 120 
         elevation: "-90:0.5:90"  
         overlap: -0.5
- 
-    f1: 
-        name: "a6s01"
-        description: "Azimuth and elevation displacement."
-        source: ["f3s01.wav", "f3s07.wav"]
-        azimuth: "60:0.5:60+180" 
-        elevation: "35:-0.2:-35"
-        overlap: -0.5
- 
-    f2: 
-        name: "a5s02"
-        description: "Azimuth and elevation displacement."
-        source: ["m3s02.wav", "m3s08.wav"]
-        azimuth: "120:0.5:120+180" 
-        elevation: "35:-0.2:-35" 
-        overlap: -0.5
-  
-    f3: 
-        name: "a4s03"
-        description: "Azimuth and elevation displacement."
-        source: ["f2s03.wav", "f2s09.wav"]
-        azimuth: "180:0.5:180+180" 
-        elevation: "35:-0.2:-35" 
-        overlap: -0.5
-  
-    f4: 
-        name: "a3s04"
-        description: "Azimuth and elevation displacement."
-        source: ["m2s04.wav", "m2s10.wav"]
-        azimuth: "240:0.5:240+180" 
-        elevation: "35:-0.2:-35"
-        overlap: -0.5
-  
-    f5: 
-        name: "a2s05"
-        description: "Azimuth and elevation displacement."
-        source: ["f1s05.wav", "f1s11.wav"]
-        azimuth: "300:0.5:300+180" 
-        elevation: "35:-0.2:-35" 
-        overlap: -0.5
-  
-    f6: 
-        name: "a1s06"
-        description: "Azimuth and elevation displacement."
-        source: ["m1s06.wav", "m1s12.wav"]
-        azimuth: "0:0.5:0+180" 
-        elevation: "35:-0.2:-35" 
-        overlap: -0.5
-
-    f7:
-        name: "a5s07"
-        description: "Preliminary: Azimuth and elevation displacement."
-        source: ["m3s13.wav", "m3s14.wav"]
-        azimuth: "0:0.5:0+180" 
-        elevation: "35:-0.2:-35" 
-        overlap: -0.5
-  
\ No newline at end of file
diff --git a/experiments/selection/P800-7/config/item_gen_P800-7.yml b/experiments/selection/P800-7/config/item_gen_P800-7.yml
index 54a8695e..ec79e31b 100644
--- a/experiments/selection/P800-7/config/item_gen_P800-7.yml
+++ b/experiments/selection/P800-7/config/item_gen_P800-7.yml
@@ -85,336 +85,336 @@ use_output_prefix: "leee"
 ###   o stands for the object number; 0, 1, 2, 3
 
 scenes:
-    a1: 
-        name: "a1s01"
+    cat1_1: 
+        name: "cat1/a1s01.wav"
         description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
         source: ["m1s01.wav", "f1s01.wav"]
         azimuth: [0, 50]
         elevation: [0, 0]
         overlap: -1.0
         
-    a2: 
-        name: "a6s02"
-        description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
-        source: ["f3s08.wav", "m1s08.wav"]
-        azimuth: [50, 350]
-        elevation: [0, 0]
-        overlap: -1.0
+    cat1_2:
+        name: "cat1/a1s02.wav"
+        description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
+        source: ["m1s02.wav", "f1s02.wav"]
+        azimuth: [10, 110]
+        elevation: [35, 35]
+        overlap: 1.0
         
-    a3: 
-        name: "a5s03"
-        description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
-        source: ["f2s09.wav", "m3s09.wav"]
-        azimuth: [40, 290]
-        elevation: [0, 0]
-        overlap: -1.0
-
-    a4: 
-        name: "a4s04"
-        description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
-        source: ["f1s10.wav", "m2s10.wav"]
-        azimuth: [30, 230]
-        elevation: [0, 0]
-        overlap: -1.0
-
-    a5: 
-        name: "a3s05"
-        description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
-        source: ["m3s05.wav", "f3s05.wav"]
+    cat1_3:
+        name: "cat1/a1s03.wav"
+        description: "One talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
+        source: ["m1s03.wav", "f1s03.wav"]
         azimuth: [20, 170]
-        elevation: [0, 0]
-        overlap: -1.0
-
-    a6: 
-        name: "a2s06"
-        description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
-        source: ["m2s06.wav", "f2s06.wav"]
-        azimuth: [10, 110]
-        elevation: [0, 0]
-        overlap: -1.0
-
-    a7: 
-        name: "a1s07"
+        elevation: [0, 45]
+        overlap: -1.0   
+        
+    cat1_4:
+        name: "cat1/a1s04.wav"
+        description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
+        source: ["m1s04.wav", "f1s04.wav"]
+        azimuth: [200, "30:-1:-270"]
+        elevation: [0, 45]
+        overlap: 1.0  
+        
+    cat1_5:
+        name: "cat1/a1s05.wav"
+        description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances"
+        source: ["m1s05.wav", "f1s05.wav"]
+        azimuth: ["-20:-1:-320", "-20:-1:-320"]
+        elevation: [45, 45]
+        overlap: 1.0   
+        
+    cat1_6:
+        name: "cat1/a1s06.wav"
+        description: "Two talkers walking around the table in opposite directions, non-overlapping utterances."
+        source: ["m1s06.wav", "f1s06.wav"]
+        azimuth: ["120:1:60 + 360", "120:-1:180 - 360"]
+        elevation: [30, 30]
+        overlap: -1.0      
+        
+    cat1_7:
+        name: "cat1/a1s07.wav"
         description: "Preliminary: Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
         source: ["m1s13.wav", "f1s13.wav"]
         azimuth: [0, 50]
         elevation: [0, 0]
         overlap: -1.0
-
-    b1: 
-        name: "a2s01"
+        
+    cat2_1:
+        name: "cat2/a2s01.wav"
         description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
         source: ["m2s01.wav", "f2s01.wav"]
         azimuth: [20, 170]
         elevation: [35, 35]
         overlap: 1.0
- 
-    b2: 
-        name: "a1s02"
-        description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
-        source: ["m1s02.wav", "f1s02.wav"]
-        azimuth: [10, 110]
-        elevation: [35, 35]
-        overlap: 1.0
- 
-    b3: 
-        name: "a6s03"
-        description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
-        source: ["f3s09.wav", "m1s09.wav"]
-        azimuth: [0, 50]
-        elevation: [35, 35]
-        overlap: 1.0
- 
-    b4: 
-        name: "a5s04"
-        description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
-        source: ["f2s10.wav", "m3s10.wav"]
-        azimuth: [50, 350]
-        elevation: [35, 35]
-        overlap: 1.0 
+        
+    cat2_2:
+        name: "cat2/a2s02.wav"
+        description: "One talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
+        source: ["m2s02.wav", "f2s02.wav"]
+        azimuth: [30, 230]
+        elevation: [0, 45]
+        overlap: -1.0 
 
-    b5: 
-        name: "a4s05"
-        description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
-        source: ["f1s11.wav", "m2s11.wav"]
-        azimuth: [40, 290]
-        elevation: [35, 35]
-        overlap: 1.0 
+    cat2_3:
+        name: "cat2/a2s03.wav"
+        description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
+        source: ["m2s03.wav", "f2s03.wav"]
+        azimuth: [250, "-20:-1:-320"]
+        elevation: [0, 45]
+        overlap: 1.0          
 
-    b6: 
-        name: "a3s06"
-        description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
-        source: ["m3s06.wav", "f3s06.wav"]
-        azimuth: [30, 230]
-        elevation: [35, 35]
-        overlap: 1.0 
+    cat2_4:
+        name: "cat2/a2s04.wav"
+        description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances"
+        source: ["m2s04.wav", "f2s04.wav"]
+        azimuth: ["-70:-1:-10 - 360", "-70:-1:-10 - 360"]
+        elevation: [45, 45]
+        overlap: 1.0    
+
+    cat2_5:
+        name: "cat2/a2s05.wav"
+        description: "Two talkers walking around the table in opposite directions, non-overlapping utterances."
+        source: ["m2s05.wav", "f2s05.wav"]
+        azimuth: ["180:1:120 + 360", "180:-1:-120"]
+        elevation: [30, 30]
+        overlap: -1.0    
+
+    cat2_6:
+        name: "cat2/a2s06.wav"
+        description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
+        source: ["m2s06.wav", "f2s06.wav"]
+        azimuth: [10, 110]
+        elevation: [0, 0]
+        overlap: -1.0
 
-    b7:
-        name: "a2s07"
+    cat2_7:
+        name: "cat2/a2s07.wav"
         description: "Preliminary: Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
         source: ["m2s13.wav", "f2s13.wav"]
         azimuth: [10, 110]
         elevation: [35, 35]
         overlap: 1.0
 
-    c1: 
-        name: "a3s01"
+    cat3_1:
+        name: "cat3/a3s01.wav"
         description: "One talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
         source: ["m3s01.wav", "f3s01.wav"]
         azimuth: [40, 290]
         elevation: [0, 45]
         overlap: -1.0 
 
-    c2: 
-        name: "a2s02"
-        description: "One talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
-        source: ["m2s02.wav", "f2s02.wav"]
-        azimuth: [30, 230]
+    cat3_2:
+        name: "cat3/a3s02.wav"
+        description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
+        source: ["m3s02.wav", "f3s02.wav"]
+        azimuth: [300, "-70:-1:-10 - 360"]
         elevation: [0, 45]
-        overlap: -1.0 
-  
-    c3: 
-        name: "a1s03"
-        description: "One talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
-        source: ["m1s03.wav", "f1s03.wav"]
+        overlap: 1.0   
+
+    cat3_3:
+        name: "cat3/a3s03.wav"
+        description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances"
+        source: ["m3s03.wav", "f3s03.wav"]
+        azimuth: ["180:1:120 + 360", "180:1:120 + 360"]
+        elevation: [45, 45]
+        overlap: 1.0            
+
+    cat3_4:
+        name: "cat3/a3s04.wav"
+        description: "Two talkers walking around the table in opposite directions, non-overlapping utterances."
+        source: ["m3s04.wav", "f3s04.wav"]
+        azimuth: ["240:1:180 + 360", "240:-1:-60"]
+        elevation: [30, 30]
+        overlap: -1.0  
+
+    cat3_5:
+        name: "cat3/a3s05.wav"
+        description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
+        source: ["m3s05.wav", "f3s05.wav"]
         azimuth: [20, 170]
-        elevation: [0, 45]
-        overlap: -1.0   
-  
-    c4: 
-        name: "a6s04"
-        description: "One talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
-        source: ["f3s10.wav", "m1s10.wav"]
-        azimuth: [10, 110]
-        elevation: [0, 45]
-        overlap: -1.0     
-  
-    c5: 
-        name: "a5s05"
-        description: "One talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
-        source: ["f2s11.wav", "m3s11.wav"]
-        azimuth: [0, 50]
-        elevation: [0, 45]
-        overlap: -1.0     
-  
-    c6: 
-        name: "a4s06"
-        description: "One talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
-        source: ["f1s12.wav", "m2s12.wav"]
-        azimuth: [50, 350]
-        elevation: [0, 60]
-        overlap: -1.0      
- 
-    c7:
-        name: "a3s07"
+        elevation: [0, 0]
+        overlap: -1.0
+
+    cat3_6:
+        name: "cat3/a3s06.wav"
+        description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
+        source: ["m3s06.wav", "f3s06.wav"]
+        azimuth: [30, 230]
+        elevation: [35, 35]
+        overlap: 1.0 
+
+    cat3_7:
+        name: "cat3/a3s07.wav"
         description: "Preliminary: One talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
         source: ["m3s13.wav", "f3s13.wav"]
         azimuth: [20, 170]
         elevation: [0, 60]
         overlap: -1.0   
- 
-    d1: 
-        name: "a4s01"
+
+    cat4_1:
+        name: "cat4/a4s01.wav"
         description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
         source: ["f1s07.wav", "m2s07.wav"]
         azimuth: [50, "180:1:120 + 360"]
         elevation: [0, 45]
         overlap: 1.0 
-        
-    d2: 
-        name: "a3s02"
-        description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
-        source: ["m3s02.wav", "f3s02.wav"]
-        azimuth: [300, "-70:-1:-10 - 360"]
-        elevation: [0, 45]
-        overlap: 1.0   
-        
-    d3: 
-        name: "a2s03"
-        description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
-        source: ["m2s03.wav", "f2s03.wav"]
-        azimuth: [250, "-20:-1:-320"]
-        elevation: [0, 45]
-        overlap: 1.0          
- 
-    d4: 
-        name: "a1s04"
-        description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
-        source: ["m1s04.wav", "f1s04.wav"]
-        azimuth: [200, "30:-1:-270"]
-        elevation: [0, 45]
-        overlap: 1.0  
- 
-    d5: 
-        name: "a6s05"
-        description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
-        source: ["f3s11.wav", "m1s11.wav"]
-        azimuth: [150, "80:1:20 + 360"]
-        elevation: [0, 45]
-        overlap: 1.0   
- 
-    d6: 
-        name: "a5s06"
-        description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
-        source: ["f2s12.wav", "m3s12.wav"]
-        azimuth: [100, "130:1:70 + 360"]
-        elevation: [0, 45]
-        overlap: 1.0   
- 
-    d7:
-        name: "a4s07"
+
+    cat4_2:
+        name: "cat4/a4s02.wav"
+        description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances"
+        source: ["f1s08.wav", "m2s08.wav"]
+        azimuth: ["130:1:70 + 360", "130:1:70 + 360"]
+        elevation: [45, 45]
+        overlap: 1.0    
+
+    cat4_3:
+        name: "cat4/a4s03.wav"
+        description: "Two talkers walking around the table in opposite directions, non-overlapping utterances."
+        source: ["f1s09.wav", "m2s09.wav"]
+        azimuth: ["300:1:240 + 360", "300:-1:0"]
+        elevation: [30, 30]
+        overlap: -1.0     
+
+    cat4_4:
+        name: "cat4/a4s04.wav"
+        description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
+        source: ["f1s10.wav", "m2s10.wav"]
+        azimuth: [30, 230]
+        elevation: [0, 0]
+        overlap: -1.0
+
+    cat4_5:
+        name: "cat4/a4s05.wav"
+        description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
+        source: ["f1s11.wav", "m2s11.wav"]
+        azimuth: [40, 290]
+        elevation: [35, 35]
+        overlap: 1.0 
+
+    cat4_6:
+        name: "cat4/a4s06.wav"
+        description: "One talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
+        source: ["f1s12.wav", "m2s12.wav"]
+        azimuth: [50, 350]
+        elevation: [0, 60]
+        overlap: -1.0      
+
+    cat4_7:
+        name: "cat4/a4s07.wav"
         description: "Preliminary: One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
         source: ["f1s14.wav", "m2s14.wav"]
         azimuth: [200, "30:-1:-270"]
         elevation: [0, 45]
         overlap: 1.0  
- 
-    e1: 
-        name: "a5s01"
+
+    cat5_1:
+        name: "cat5/a5s01.wav"
         description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances"
         source: ["f2s07.wav", "m3s07.wav"]
         azimuth: ["80:1:20 + 360", "80:1:20 + 360"]
         elevation: [45, 45]
         overlap: 1.0
- 
-    e2: 
-        name: "a4s02"
-        description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances"
-        source: ["f1s08.wav", "m2s08.wav"]
-        azimuth: ["130:1:70 + 360", "130:1:70 + 360"]
-        elevation: [45, 45]
-        overlap: 1.0    
-        
-    e3: 
-        name: "a3s03"
-        description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances"
-        source: ["m3s03.wav", "f3s03.wav"]
-        azimuth: ["180:1:120 + 360", "180:1:120 + 360"]
-        elevation: [45, 45]
-        overlap: 1.0            
-  
-    e4: 
-        name: "a2s04"
-        description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances"
-        source: ["m2s04.wav", "f2s04.wav"]
-        azimuth: ["-70:-1:-10 - 360", "-70:-1:-10 - 360"]
-        elevation: [45, 45]
-        overlap: 1.0    
-  
-    e5: 
-        name: "a1s05"
-        description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances"
-        source: ["m1s05.wav", "f1s05.wav"]
-        azimuth: ["-20:-1:-320", "-20:-1:-320"]
-        elevation: [45, 45]
+
+    cat5_2:
+        name: "cat5/a5s02.wav"
+        description: "Two talkers walking around the table in opposite directions, non-overlapping utterances."
+        source: ["f2s08.wav", "m3s08.wav"]
+        azimuth: ["0:1:300", "0:-1:60 - 360"]
+        elevation: [30, 30]
+        overlap: -1.0   
+
+    cat5_3:
+        name: "cat5/a5s03.wav"
+        description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
+        source: ["f2s09.wav", "m3s09.wav"]
+        azimuth: [40, 290]
+        elevation: [0, 0]
+        overlap: -1.0
+
+    cat5_4:
+        name: "cat5/a5s04.wav"
+        description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
+        source: ["f2s10.wav", "m3s10.wav"]
+        azimuth: [50, 350]
+        elevation: [35, 35]
+        overlap: 1.0 
+
+    cat5_5:
+        name: "cat5/a5s05.wav"
+        description: "One talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
+        source: ["f2s11.wav", "m3s11.wav"]
+        azimuth: [0, 50]
+        elevation: [0, 45]
+        overlap: -1.0     
+
+    cat5_6:
+        name: "cat5/a5s06.wav"
+        description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
+        source: ["f2s12.wav", "m3s12.wav"]
+        azimuth: [100, "130:1:70 + 360"]
+        elevation: [0, 45]
         overlap: 1.0   
-  
-    e6: 
-        name: "a6s06"
-        description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances"
-        source: ["f3s12.wav", "m1s12.wav"]
-        azimuth: ["30:-1:-270", "30:-1:-270"]
-        elevation: [45, 45]
-        overlap: 1.0     
- 
-    e7:
-        name: "a5s07"
+
+    cat5_7:
+        name: "cat5/a5s07.wav"
         description: "Preliminary: Two talkers walking side-by-side around the table, ~30% overlapping utterances"
         source: ["f2s14.wav", "m3s14.wav"]
         azimuth: ["-20:-1:-320", "-20:-1:-320"]
         elevation: [45, 45]
         overlap: 1.0   
- 
-    f1: 
-        name: "a6s01"
+        
+    cat6_1:
+        name: "cat6/a6s01.wav"
         description: "Two talkers walking around the table in opposite directions, non-overlapping utterances."
         source: ["f3s07.wav", "m1s07.wav"]
         azimuth: ["60:1:0 + 360", "60:-1:120 - 360"]
         elevation: [30, 30]
         overlap: -1.0   
+        
+    cat6_2:
+        name: "cat6/a6s02.wav"
+        description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances."
+        source: ["f3s08.wav", "m1s08.wav"]
+        azimuth: [50, 350]
+        elevation: [0, 0]
+        overlap: -1.0
  
-    f2: 
-        name: "a5s02"
-        description: "Two talkers walking around the table in opposite directions, non-overlapping utterances."
-        source: ["f2s08.wav", "m3s08.wav"]
-        azimuth: ["0:1:300", "0:-1:60 - 360"]
-        elevation: [30, 30]
-        overlap: -1.0   
+    cat6_3:
+        name: "cat6/a6s03.wav"
+        description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances."
+        source: ["f3s09.wav", "m1s09.wav"]
+        azimuth: [0, 50]
+        elevation: [35, 35]
+        overlap: 1.0
   
-    f3: 
-        name: "a4s03"
-        description: "Two talkers walking around the table in opposite directions, non-overlapping utterances."
-        source: ["f1s09.wav", "m2s09.wav"]
-        azimuth: ["300:1:240 + 360", "300:-1:0"]
-        elevation: [30, 30]
+    cat6_4:
+        name: "cat6/a6s04.wav"
+        description: "One talker sitting at a table, second talker standing beside the table,  non-overlapping utterances."
+        source: ["f3s10.wav", "m1s10.wav"]
+        azimuth: [10, 110]
+        elevation: [0, 45]
         overlap: -1.0     
+ 
+    cat6_5:
+        name: "cat6/a6s05.wav"
+        description: "One talker sitting at a table, second talker walking around the table, ~30% overlapping utterances."
+        source: ["f3s11.wav", "m1s11.wav"]
+        azimuth: [150, "80:1:20 + 360"]
+        elevation: [0, 45]
+        overlap: 1.0   
+ 
+    cat6_6:
+        name: "cat6/a6s06.wav"
+        description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances"
+        source: ["f3s12.wav", "m1s12.wav"]
+        azimuth: ["30:-1:-270", "30:-1:-270"]
+        elevation: [45, 45]
+        overlap: 1.0     
   
-    f4: 
-        name: "a3s04"
-        description: "Two talkers walking around the table in opposite directions, non-overlapping utterances."
-        source: ["m3s04.wav", "f3s04.wav"]
-        azimuth: ["240:1:180 + 360", "240:-1:-60"]
-        elevation: [30, 30]
-        overlap: -1.0  
-  
-    f5: 
-        name: "a2s05"
-        description: "Two talkers walking around the table in opposite directions, non-overlapping utterances."
-        source: ["m2s05.wav", "f2s05.wav"]
-        azimuth: ["180:1:120 + 360", "180:-1:-120"]
-        elevation: [30, 30]
-        overlap: -1.0    
-  
-    f6: 
-        name: "a1s06"
-        description: "Two talkers walking around the table in opposite directions, non-overlapping utterances."
-        source: ["m1s06.wav", "f1s06.wav"]
-        azimuth: ["120:1:60 + 360", "120:-1:180 - 360"]
-        elevation: [30, 30]
-        overlap: -1.0      
-  
-    f7:
-        name: "a6s07"
+    cat6_7:
+        name: "cat6/a6s07.wav"
         description: "Preliminary: Two talkers walking around the table in opposite directions, non-overlapping utterances."
         source: ["f3s14.wav", "m1s14.wav"]
         azimuth: ["120:1:60 + 360", "120:-1:180 - 360"]
diff --git a/ivas_processing_scripts/generation/process_ism1_items.py b/ivas_processing_scripts/generation/process_ism1_items.py
index 55791a19..89ce4d1f 100644
--- a/ivas_processing_scripts/generation/process_ism1_items.py
+++ b/ivas_processing_scripts/generation/process_ism1_items.py
@@ -334,8 +334,8 @@ def generate_ism1_scene(
     # write ISM metadata to the output file in .0.csv format
     csv_filename = os.path.join(
         cfg.output_path,
-        "cat"+scene["name"][1],
-        cfg.use_output_prefix + os.path.basename(scene["name"]) + ".wav.0.csv",
+        os.path.dirname(scene["name"]),
+        cfg.use_output_prefix + os.path.basename(scene["name"]) + ".0.csv",
     )
 
     with open(
diff --git a/ivas_processing_scripts/generation/process_ism2_items.py b/ivas_processing_scripts/generation/process_ism2_items.py
index 4a1c100a..d50edbfb 100644
--- a/ivas_processing_scripts/generation/process_ism2_items.py
+++ b/ivas_processing_scripts/generation/process_ism2_items.py
@@ -378,8 +378,8 @@ def generate_ism2_scene(
         # generate .csv filename (should end with .0.csv, .1.csv, ...)
         csv_filename = os.path.join(
             cfg.output_path,
-            "cat"+scene["name"][1],
-            cfg.use_output_prefix + os.path.basename(scene["name"]) + f".wav.{i}.csv",
+            os.path.dirname(scene["name"]),
+            cfg.use_output_prefix + os.path.basename(scene["name"]) + f".{i}.csv",
         )
 
         with open(
-- 
GitLab


From 7e6fa6c2d094244c79c15f19ae266f6c303ae858 Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Tue, 13 Jun 2023 11:07:25 +0200
Subject: [PATCH 3/9] correction of categories per talker/talker pairs in ISM1
 and ISM2 tests

---
 .../generation/process_ism2_items.py                  | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/ivas_processing_scripts/generation/process_ism2_items.py b/ivas_processing_scripts/generation/process_ism2_items.py
index d50edbfb..af59b9c7 100644
--- a/ivas_processing_scripts/generation/process_ism2_items.py
+++ b/ivas_processing_scripts/generation/process_ism2_items.py
@@ -362,17 +362,6 @@ def generate_ism2_scene(
         else:
             y_meta = np.concatenate([y_meta, x_meta])
 
-    # write individual ISM audio streams to the output file in an interleaved format
-    audiofile.write(
-        os.path.join(
-            cfg.output_path,
-            "cat"+scene["name"][1],
-            cfg.use_output_prefix + os.path.basename(scene["name"] + ".wav"),
-        ),
-        y.audio,
-        y.fs,
-    )
-
     # write individual ISM metadata to output files in .csv format
     for i in range(N_sources):
         # generate .csv filename (should end with .0.csv, .1.csv, ...)
-- 
GitLab


From 87febe56f735a700a5c40511e5eb051e4bec4b40 Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Tue, 13 Jun 2023 11:13:04 +0200
Subject: [PATCH 4/9] correction of categories per talker/talker pairs in ISM1
 and ISM2 tests

---
 .../generation/process_ism1_items.py                  |  4 ++--
 .../generation/process_ism2_items.py                  | 11 +++++++++++
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/ivas_processing_scripts/generation/process_ism1_items.py b/ivas_processing_scripts/generation/process_ism1_items.py
index 89ce4d1f..221562d3 100644
--- a/ivas_processing_scripts/generation/process_ism1_items.py
+++ b/ivas_processing_scripts/generation/process_ism1_items.py
@@ -324,8 +324,8 @@ def generate_ism1_scene(
     audiofile.write(
         os.path.join(
             cfg.output_path,
-            "cat"+scene["name"][1],
-            cfg.use_output_prefix + os.path.basename(scene["name"]+".wav"),
+            os.path.dirname(scene["name"]),
+            cfg.use_output_prefix + os.path.basename(scene["name"]),
         ),
         y.audio,
         y.fs,
diff --git a/ivas_processing_scripts/generation/process_ism2_items.py b/ivas_processing_scripts/generation/process_ism2_items.py
index af59b9c7..4ece488d 100644
--- a/ivas_processing_scripts/generation/process_ism2_items.py
+++ b/ivas_processing_scripts/generation/process_ism2_items.py
@@ -362,6 +362,17 @@ def generate_ism2_scene(
         else:
             y_meta = np.concatenate([y_meta, x_meta])
 
+    # write individual ISM audio streams to the output file in an interleaved format
+    audiofile.write(
+        os.path.join(
+            cfg.output_path,
+            os.path.dirname(scene["name"]),
+            cfg.use_output_prefix + os.path.basename(scene["name"]),
+        ),
+        y.audio,
+        y.fs,
+    )
+
     # write individual ISM metadata to output files in .csv format
     for i in range(N_sources):
         # generate .csv filename (should end with .0.csv, .1.csv, ...)
-- 
GitLab


From 25c01c513fe0060d9ad58003a2f95bcefa6f9105 Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Thu, 15 Jun 2023 12:36:21 +0200
Subject: [PATCH 5/9] correction of ranges of azimuth and elevation maintaining
 speed of talker movement

---
 .../generation/process_ism1_items.py          | 42 ++++++++++---------
 .../generation/process_ism2_items.py          | 39 +++++++++--------
 2 files changed, 44 insertions(+), 37 deletions(-)

diff --git a/ivas_processing_scripts/generation/process_ism1_items.py b/ivas_processing_scripts/generation/process_ism1_items.py
index 221562d3..d6ffc6d7 100644
--- a/ivas_processing_scripts/generation/process_ism1_items.py
+++ b/ivas_processing_scripts/generation/process_ism1_items.py
@@ -267,9 +267,10 @@ def generate_ism1_scene(
 
     N_frames = int(len(y.audio) / y.fs * 50)
 
-    # read azimuth information and create array
+    # read azimuth information and convert to an array 
     if isinstance(source_azi, str):
         if ":" in source_azi:
+            # start with the initial azimuth value and apply step N_frames times
             source_azi = source_azi.split(":")
             azi = np.linspace(
                 float(eval(source_azi[0])),
@@ -277,46 +278,49 @@ def generate_ism1_scene(
                 N_frames
             )
         else:
-            azi = np.array(float(eval(source_azi)), ndmin=1)
+            # replicate static azimuth value N_frames times
+            azi = np.repeat(float(eval(source_azi)), N_frames)
     else:
-        azi = np.array(source_azi, ndmin=1)[:N_frames]
+        # replicate static azimuth value N_frames times
+        azi = np.repeat(float(source_azi), N_frames)
     
-    if len(azi) < N_frames:
-        # replicate the last elevation
-        azi = np.append(azi, np.full(N_frames - len(azi), azi[-1]))
-
     # convert azimuth from 0 .. 360 to -180 .. +180
     azi = (azi + 180) % 360 - 180
 
-    # check if azimuth is from -180 .. +180
+    # check, if azimuth is from -180 .. +180
     if any(azi > 180) or any(azi < -180):
         logger.error(
             f"Incorrect value(s) of azimuth: {azi[(azi > 180) | (azi < -180)]}"
         )
 
-    # read elevation information and create array
+    # read elevation information and convert to an array
     if isinstance(source_ele, str):
         if ":" in source_ele:
+            # convert into array (initial_value:step:stop_value)
+            # note: the stop_value value is +-90 degrees depending on the sign of the step
             source_ele = source_ele.split(":")
-            ele = np.linspace(
+            ele = np.arange(
                 float(eval(source_ele[0])),
-                float(eval(source_ele[2])),
-                N_frames
-            )
+                np.sign(float(eval(source_ele[1]))) * 90,
+                float(eval(source_ele[1]))
+            )[:N_frames]
+            
+            # repeat the last elevation value, if array is shorter than N_frames
+            if len(ele) < N_frames:
+                ele = np.append(ele, np.full(N_frames - len(ele), ele[-1]))
         else:
-            ele = np.array(float(eval(source_ele)), ndmin=1)
+            # replicate static elevation value N_frames times
+            ele = np.repeat(float(eval(source_ele)), N_frames)
     else:
-        ele = np.array(source_ele, ndmin=1)[:N_frames]
+        # replicate static elevation value N_frames times
+        ele = np.repeat(float(source_ele), N_frames)
     
-    if len(ele) < N_frames:
-        # replicate the last elevation
-        ele = np.append(ele, np.full(N_frames - len(ele), ele[-1]))
-
     # check if elevation is from -90 .. +90
     if any(ele > 90) or any(ele < -90):
         logger.error(
             f"Incorrect value(s) of elevation: {ele[(ele > 90) | (ele < -90)]}"
         )
+        
     # arrange all metadata fields column-wise into a matrix
     y_meta = np.column_stack((azi, ele))
 
diff --git a/ivas_processing_scripts/generation/process_ism2_items.py b/ivas_processing_scripts/generation/process_ism2_items.py
index 4ece488d..9935bc20 100644
--- a/ivas_processing_scripts/generation/process_ism2_items.py
+++ b/ivas_processing_scripts/generation/process_ism2_items.py
@@ -302,9 +302,10 @@ def generate_ism2_scene(
 
         N_frames = int(len(y.audio) / y.fs * 50)
 
-        # read azimuth information and create array
+        # read azimuth information and convert to an array 
         if isinstance(source_azi, str):
             if ":" in source_azi:
+                # start with the initial azimuth value and apply step N_frames times
                 source_azi = source_azi.split(":")
                 azi = np.linspace(
                     float(eval(source_azi[0])),
@@ -312,13 +313,11 @@ def generate_ism2_scene(
                     N_frames
                 )
             else:
-                azi = np.array(float(eval(source_azi)), ndmin=1)
+                # replicate static azimuth value N_frames times
+                azi = np.repeat(float(eval(source_azi)), N_frames)
         else:
-            azi = np.array(source_azi, ndmin=1)[:N_frames]
-        
-        if len(azi) < N_frames:
-            # replicate the last elevation
-            azi = np.append(azi, np.full(N_frames - len(azi), azi[-1]))
+            # replicate static azimuth value N_frames times
+            azi = np.repeat(float(source_azi), N_frames)
             
         # convert azimuth from 0 .. 360 to -180 .. +180
         azi = (azi + 180) % 360 - 180
@@ -329,23 +328,27 @@ def generate_ism2_scene(
                 f"Incorrect value(s) of azimuth: {azi[(azi > 180) | (azi < -180)]}"
             )
 
-        # read elevation information and create array
+        # read elevation information and convert to an array
         if isinstance(source_ele, str):
             if ":" in source_ele:
+                # convert into array (initial_value:step:stop_value)
+                # note: the stop_value value is +-90 degrees depending on the sign of the step
                 source_ele = source_ele.split(":")
-                ele = np.linspace(
+                ele = np.arange(
                     float(eval(source_ele[0])),
-                    float(eval(source_ele[2])),
-                    N_frames
-                )
+                    np.sign(float(eval(source_ele[1]))) * 90,
+                    float(eval(source_ele[1]))
+                )[:N_frames]
+
+                # repeat the last elevation value, if array is shorter than N_frames
+                if len(ele) < N_frames:
+                    ele = np.append(ele, np.full(N_frames - len(ele), ele[-1]))
             else:
-                ele = np.array(float(eval(source_ele)), ndmin=1)
+                # replicate static elevation value N_frames times
+                ele = np.repeat(float(eval(source_ele)), N_frames)
         else:
-            ele = np.array(source_ele, ndmin=1)[:N_frames]
-        
-        if len(ele) < N_frames:
-            # replicate the last elevation
-            ele = np.append(ele, np.full(N_frames - len(ele), ele[-1]))
+            # replicate static elevation value N_frames times
+            ele = np.repeat(float(source_ele), N_frames)
 
         # check if elevation is from -90 .. +90
         if any(ele > 90) or any(ele < -90):
-- 
GitLab


From 8a48abb14089de26a6ea20889ca64226ea63313e Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Thu, 15 Jun 2023 12:37:30 +0200
Subject: [PATCH 6/9] reduce elevation displacement step from 0.5 to 0.3
 degress in scene e

---
 .../P800-6/config/item_gen_P800-6.yml         | 26 ++++++++++++-------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/experiments/selection/P800-6/config/item_gen_P800-6.yml b/experiments/selection/P800-6/config/item_gen_P800-6.yml
index 101f07f6..1a7584a2 100644
--- a/experiments/selection/P800-6/config/item_gen_P800-6.yml
+++ b/experiments/selection/P800-6/config/item_gen_P800-6.yml
@@ -31,11 +31,17 @@ postamble: 1.0
 add_low_level_random_noise: true
 
 ### File designators, default is "l" for listening lab, "EN" for language, "p06" for exp and "g" for provider
-listening_lab: "a"
-language: "JP"
-exp: "p06"
+# listening_lab: "a"
+# language: "JP"
+# exp: "p06"
+# provider: "g"
+
+listening_lab: "l"
+language: "EN"
+exp: "p01"
 provider: "g"
 
+
 ### Use prefix for all input filenames (default: "")
 ### l stands for the 'listening_lab' designator, L stands for the 'language', e stands for the 'exp' designator (the number of consecutive letters define the length of the field) 
 use_input_prefix: "lLLeee"
@@ -123,7 +129,7 @@ scenes:
         description: "Elevation displacement."
         source: ["m1s05.wav", "m1s11.wav"]
         azimuth: 120 
-        elevation: "-90:0.5:90"  
+        elevation: "-90:0.3:90"  
         overlap: -0.5
 
     cat1_6: 
@@ -171,7 +177,7 @@ scenes:
         description: "Elevation displacement."
         source: ["f1s04.wav", "f1s10.wav"]
         azimuth: 60 
-        elevation: "-90:0.5:90"  
+        elevation: "-90:0.3:90"  
         overlap: -0.5
 
     cat2_5: 
@@ -219,7 +225,7 @@ scenes:
         description: "Elevation displacement."
         source: ["m2s03.wav", "m2s09.wav"]
         azimuth: 0 
-        elevation: "-90:0.5:90"  
+        elevation: "-90:0.3:90"  
         overlap: -0.5
 
     cat3_4: 
@@ -267,7 +273,7 @@ scenes:
         description: "Elevation displacement."
         source: ["f2s02.wav", "f2s08.wav"]
         azimuth: 300 
-        elevation: 0 
+        elevation: "-90:0.3:90" 
         overlap: -0.5
 
     cat4_3: 
@@ -315,7 +321,7 @@ scenes:
         description: "Elevation displacement."
         source: ["m3s01.wav", "m3s07.wav"]
         azimuth: 240 
-        elevation: "-90:0.5:90" 
+        elevation: "-90:0.3:90" 
         overlap: -0.5
 
     cat5_2: 
@@ -411,7 +417,7 @@ scenes:
         description: "Elevation displacement."
         source: ["f3s06.wav", "f3s12.wav"]
         azimuth: 180 
-        elevation: "-90:0.5:90"  
+        elevation: "-90:0.3:90"  
         overlap: -0.5
 
     cat6_7:
@@ -419,5 +425,5 @@ scenes:
         description: "Preliminary: Elevation displacement."
         source: ["f3s13.wav", "f3s14.wav"]
         azimuth: 120 
-        elevation: "-90:0.5:90"  
+        elevation: "-90:0.3:90"  
         overlap: -0.5
-- 
GitLab


From 0966754905ecda68a5d781586accd099c1b39b1f Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Thu, 15 Jun 2023 12:39:03 +0200
Subject: [PATCH 7/9] remove debugging modifications

---
 .../selection/P800-6/config/item_gen_P800-6.yml       | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/experiments/selection/P800-6/config/item_gen_P800-6.yml b/experiments/selection/P800-6/config/item_gen_P800-6.yml
index 1a7584a2..c4321aba 100644
--- a/experiments/selection/P800-6/config/item_gen_P800-6.yml
+++ b/experiments/selection/P800-6/config/item_gen_P800-6.yml
@@ -31,14 +31,9 @@ postamble: 1.0
 add_low_level_random_noise: true
 
 ### File designators, default is "l" for listening lab, "EN" for language, "p06" for exp and "g" for provider
-# listening_lab: "a"
-# language: "JP"
-# exp: "p06"
-# provider: "g"
-
-listening_lab: "l"
-language: "EN"
-exp: "p01"
+listening_lab: "a"
+language: "JP"
+exp: "p06"
 provider: "g"
 
 
-- 
GitLab


From 3e9a392ce247d4d7d985520084fdc99ed7875001 Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Thu, 15 Jun 2023 14:14:51 +0200
Subject: [PATCH 8/9] correction in azimuth range

---
 ivas_processing_scripts/generation/process_ism1_items.py | 6 +++---
 ivas_processing_scripts/generation/process_ism2_items.py | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/ivas_processing_scripts/generation/process_ism1_items.py b/ivas_processing_scripts/generation/process_ism1_items.py
index d6ffc6d7..ee1df7d8 100644
--- a/ivas_processing_scripts/generation/process_ism1_items.py
+++ b/ivas_processing_scripts/generation/process_ism1_items.py
@@ -272,10 +272,10 @@ def generate_ism1_scene(
         if ":" in source_azi:
             # start with the initial azimuth value and apply step N_frames times
             source_azi = source_azi.split(":")
-            azi = np.linspace(
+            azi = np.arange(
                 float(eval(source_azi[0])),
-                float(eval(source_azi[2])),
-                N_frames
+                float(eval(source_azi[0])) + N_frames * float(eval(source_azi[1])),
+                float(eval(source_azi[1]))
             )
         else:
             # replicate static azimuth value N_frames times
diff --git a/ivas_processing_scripts/generation/process_ism2_items.py b/ivas_processing_scripts/generation/process_ism2_items.py
index 9935bc20..4afa730d 100644
--- a/ivas_processing_scripts/generation/process_ism2_items.py
+++ b/ivas_processing_scripts/generation/process_ism2_items.py
@@ -307,10 +307,10 @@ def generate_ism2_scene(
             if ":" in source_azi:
                 # start with the initial azimuth value and apply step N_frames times
                 source_azi = source_azi.split(":")
-                azi = np.linspace(
+                azi = np.arange(
                     float(eval(source_azi[0])),
-                    float(eval(source_azi[2])),
-                    N_frames
+                    float(eval(source_azi[0])) + N_frames * float(eval(source_azi[1])),
+                    float(eval(source_azi[1]))
                 )
             else:
                 # replicate static azimuth value N_frames times
-- 
GitLab


From 251368432f0d9976353050da3fae8f9ce00f097e Mon Sep 17 00:00:00 2001
From: Vladimir Malenovsky <vladimir.malenovsky@usherbrooke.ca>
Date: Fri, 16 Jun 2023 09:04:14 +0200
Subject: [PATCH 9/9] formatting

---
 .../generation/process_ism1_items.py              | 15 +++++++--------
 .../generation/process_ism2_items.py              |  8 ++++----
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/ivas_processing_scripts/generation/process_ism1_items.py b/ivas_processing_scripts/generation/process_ism1_items.py
index ee1df7d8..6c45ad65 100644
--- a/ivas_processing_scripts/generation/process_ism1_items.py
+++ b/ivas_processing_scripts/generation/process_ism1_items.py
@@ -260,14 +260,13 @@ def generate_ism1_scene(
         # superimpose
         y.audio += noise
 
-
     # process azimuth and elevation
     source_azi = scene["azimuth"]
     source_ele = scene["elevation"]
 
     N_frames = int(len(y.audio) / y.fs * 50)
 
-    # read azimuth information and convert to an array 
+    # read azimuth information and convert to an array
     if isinstance(source_azi, str):
         if ":" in source_azi:
             # start with the initial azimuth value and apply step N_frames times
@@ -275,7 +274,7 @@ def generate_ism1_scene(
             azi = np.arange(
                 float(eval(source_azi[0])),
                 float(eval(source_azi[0])) + N_frames * float(eval(source_azi[1])),
-                float(eval(source_azi[1]))
+                float(eval(source_azi[1])),
             )
         else:
             # replicate static azimuth value N_frames times
@@ -283,7 +282,7 @@ def generate_ism1_scene(
     else:
         # replicate static azimuth value N_frames times
         azi = np.repeat(float(source_azi), N_frames)
-    
+
     # convert azimuth from 0 .. 360 to -180 .. +180
     azi = (azi + 180) % 360 - 180
 
@@ -302,9 +301,9 @@ def generate_ism1_scene(
             ele = np.arange(
                 float(eval(source_ele[0])),
                 np.sign(float(eval(source_ele[1]))) * 90,
-                float(eval(source_ele[1]))
+                float(eval(source_ele[1])),
             )[:N_frames]
-            
+
             # repeat the last elevation value, if array is shorter than N_frames
             if len(ele) < N_frames:
                 ele = np.append(ele, np.full(N_frames - len(ele), ele[-1]))
@@ -314,13 +313,13 @@ def generate_ism1_scene(
     else:
         # replicate static elevation value N_frames times
         ele = np.repeat(float(source_ele), N_frames)
-    
+
     # check if elevation is from -90 .. +90
     if any(ele > 90) or any(ele < -90):
         logger.error(
             f"Incorrect value(s) of elevation: {ele[(ele > 90) | (ele < -90)]}"
         )
-        
+
     # arrange all metadata fields column-wise into a matrix
     y_meta = np.column_stack((azi, ele))
 
diff --git a/ivas_processing_scripts/generation/process_ism2_items.py b/ivas_processing_scripts/generation/process_ism2_items.py
index 4afa730d..2f3fc0c0 100644
--- a/ivas_processing_scripts/generation/process_ism2_items.py
+++ b/ivas_processing_scripts/generation/process_ism2_items.py
@@ -302,7 +302,7 @@ def generate_ism2_scene(
 
         N_frames = int(len(y.audio) / y.fs * 50)
 
-        # read azimuth information and convert to an array 
+        # read azimuth information and convert to an array
         if isinstance(source_azi, str):
             if ":" in source_azi:
                 # start with the initial azimuth value and apply step N_frames times
@@ -310,7 +310,7 @@ def generate_ism2_scene(
                 azi = np.arange(
                     float(eval(source_azi[0])),
                     float(eval(source_azi[0])) + N_frames * float(eval(source_azi[1])),
-                    float(eval(source_azi[1]))
+                    float(eval(source_azi[1])),
                 )
             else:
                 # replicate static azimuth value N_frames times
@@ -318,7 +318,7 @@ def generate_ism2_scene(
         else:
             # replicate static azimuth value N_frames times
             azi = np.repeat(float(source_azi), N_frames)
-            
+
         # convert azimuth from 0 .. 360 to -180 .. +180
         azi = (azi + 180) % 360 - 180
 
@@ -337,7 +337,7 @@ def generate_ism2_scene(
                 ele = np.arange(
                     float(eval(source_ele[0])),
                     np.sign(float(eval(source_ele[1]))) * 90,
-                    float(eval(source_ele[1]))
+                    float(eval(source_ele[1])),
                 )[:N_frames]
 
                 # repeat the last elevation value, if array is shorter than N_frames
-- 
GitLab