From 18ca9aaf02fc0b46dfc6a69002dc680b88052228 Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Tue, 6 Jun 2023 16:11:48 +0200 Subject: [PATCH 01/11] update of config file --- item_gen_configs/P800-1.yml | 229 ++++++++++++++++++++---------------- 1 file changed, 126 insertions(+), 103 deletions(-) diff --git a/item_gen_configs/P800-1.yml b/item_gen_configs/P800-1.yml index eeec0787..56da0194 100644 --- a/item_gen_configs/P800-1.yml +++ b/item_gen_configs/P800-1.yml @@ -6,16 +6,16 @@ ### Output format format: "STEREO" -### Output sampling rate in Hz needed for headerless audio files; default = 48000 +### Output sampling rate in Hz; default = 48000 fs: 48000 -### IR sampling rate in Hz needed for headerless audio files; default = 48000 -IR_fs: 32000 +### IR sampling rate in Hz (only for files in .pcm format); default = 48000 +# IR_fs: 48000 ### Any relative paths will be interpreted relative to the working directory the script is called from! ### Usage of absolute paths is recommended. ### Do not use file names with dots "." in them! This is not supported, use "_" instead -### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions +### For Windows users: please use double back slash '\\' in paths ### Input path to mono files input_path: "./items_mono" @@ -40,297 +40,320 @@ add_low_level_random_noise: true listening_lab: "a" language: "JP" exp: "p01" +# provider: "Dolby" -################################################ -### Input files -################################################ +### Use prefix for all input filenames (default: None) +### l stands for the 'listening_lab' designator, L stands for the 'language', e stands for the 'exp' designator (the number of consecutive letters define the length of the field) +use_input_prefix: "lLLeee" + +### Use prefix for all IR filenames (default: None) +### p stands for the 'provider', e stands for the 'exp' designator (the number of consecutive letters define the length of the field) +# use_IR_prefix: "IR_pp_eee_" + +### Use prefix for all output filenames (default: None) +### l stands for the 'listening_lab' designator, e stands for the 'exp' designator (the number of consecutive letters define the length of the field) +use_output_prefix: "leee" -### s.wav -### -### With -### -### = a (Force Technology), b (HEAD acoustics), -### c (MQ University), d (Mesaqin.com) -### = JP, FR, GE, MA, DA, EN -### = p01, p02, p04, p05, p06, p07, p08, p09 -### = f1, f2, f3, m1, m2, m3 -### = 01, …, 14 ################################################ ### Scene description ################################################ -### Each scene must have a unique name -### Specify the mono source filenames (the program will search for it in the input_path folder) -### Specify the stereo IR source filenames (the program will search for it in the input_path_IR folder) -### Specify the overlap length in seconds for each input source (negative value creates a gap) -### Note 1: use brackets [val1, val2, ...] for multiple sources in a scene -### Note 2: use the Matlab notation "start:step:stop" to create moving sources (step will be applied in 20ms frames) +### Each scene must begin by specifying the category in the following format: catN_I where N is the category index and N is the scene index +### Each scene shall de described using the following parameters/properties: +### name: filename of the generated output item (the program will save th generated items in the output_path folder, note: it is possible to use subfolders, e.g. items_stereo/x1_s01.wav) +### description: textual description of the scene +### source: filename(s) of the mono input sources (the program will search for it in the input_path folder) +### IR: filenames(s) of the input IRs (the program will search for it in the IR_path folder) +### overlap: overlap length between two input sources in seconds (negative value creates a gap) +### Note 1: use brackets [val1, val2, ...] when specifying multiple values + +### Naming convention for the input mono files +### The input filenames are represented by: +### lLLeeettszz.wav +### where: +### l stands for the listening lab designator: a (Force Technology), b (HEAD acoustics), c (MQ University), d (Mesaqin.com) +### LL stands for the language: JP, FR, GE, MA, DA, EN +### eee stands for the experiment designator: p01, p02, p04, p05, p06, p07, p08, p09 +### tt stands for the talker ID: f1, f2, f3, m1, m2, m3 +### s stands for 'sample' and zz is the sample number; 01, ..., 14 + +### Naming convention for the input IR files +### The input IR filenames are represented by: +### IR_pp_eee_r_tt_mm_ffffff.wav +### where: +### pp stands for the provider: do (Dolby), no (Nokia), or (Orange), vo (VoiceAge), g (G.191) +### eee stands for the experiment designator: p01, p02, p04, p05, p06, p07, p08, p09 +### r stands for the room ID: a, b, c, ... +### tt stands for the talker position: 01, 02, ... +### mm stands for the microphone position: 00, 01, 02, ... +### ffffff stands for the format ID: stAB20, stABC20, stAB100, stAB150, stMS, stBin, FOA, HOA2 -### Naming convention for P.800 items -### The filenames of the input content samples are represented by: +### Naming convention for the generated output files +### The output filenames are represented by: ### leeeayszz.wav -### The filenames of the accompanying metadata files (applicable to metadata-assisted spatial audio, object-based audio) are represented by: +### The filenames of the accompanying output metadata files (applicable to metadata-assisted spatial audio, object-based audio) are represented by: ### leeeayszz.met for metadata-assisted spatial audio ### leeeayszz.wav.o.csv for object-based audio ### where: -### l stands for the listening lab designator (a through d according to Table 2) -### eee stands for the experiment designator, e.g. p01 (see Table 1) -### a stands audio, and y is the per experiment category according to IVAS-8a -### s stands for sample and zz is the sample number; 01, 02, 03, 04, 05, 06, 07, where 07 is the preliminary -### o stands for object number; 0, 1, 2, 3 +### l stands for the listening lab designator: a (Force Technology), b (HEAD acoustics), c (MQ University), d (Mesaqin.com) +### eee stands for the experiment designator: p01, p02, p04, p05, p06, p07, p08, p09 +### a stands 'audio' +### y is the per-experiment category according to IVAS-8a: 01, 02, 03, 04, 05, 06 +### s stands for sample and zz is the sample number; 01, 02, 03, 04, 05, 06, 07 (07 is the preliminary sample) +### o stands for the object number; 0, 1, 2, 3 scenes: cat1_1: - name: "a1s01" + name: "cat1/a1s01.wav" description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers." source: ["f1s01.wav", "m1s01.wav"] - IR: ["SAABP01.wav", "SAABP07.wav"] + IR: ["stereo/IR_g_p01_a_01_00_stAB100.wav", "stereo/IR_g_p01_a_07_00_stAB100.wav"] overlap: 1.0 cat1_2: - name: "a1s02" + name: "cat1/a1s02" description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers." source: ["m2s01.wav", "f2s01.wav"] - IR: ["SAABP05.wav", "SAABP03.wav"] + IR: ["stereo/IR_g_p01_a_05_00_stAB100.wav", "stereo/IR_g_p01_a_03_00_stAB100.wav"] overlap: 1.0 cat1_3: - name: "a1s03" + name: "cat1/a1s03" description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers." source: ["f3s01.wav", "m3s01.wav"] - IR: ["SAABP02.wav", "SAABP06.wav"] + IR: ["stereo/IR_g_p01_a_02_00_stAB100.wav", "stereo/IR_g_p01_a_06_00_stAB100.wav"] overlap: 1.0 cat1_4: - name: "a1s04" + name: "cat1/a1s04" description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers." source: ["m1s02.wav", "f1s02.wav"] - IR: ["SAABP04.wav", "SAABP01.wav"] + IR: ["stereo/IR_g_p01_a_04_00_stAB100.wav", "stereo/IR_g_p01_a_01_00_stAB100.wav"] overlap: 1.0 cat1_5: - name: "a1s05" + name: "cat1/a1s05" description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers." source: ["f2s02.wav", "m2s02.wav"] - IR: ["SAABP03.wav", "SAABP04.wav"] + IR: ["stereo/IR_g_p01_a_03_00_stAB100.wav", "stereo/IR_g_p01_a_04_00_stAB100.wav"] overlap: 1.0 cat1_6: - name: "a1s06" + name: "cat1/a1s06" description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers." source: ["m3s02.wav", "f3s02.wav"] - IR: ["SAABP07.wav", "SAABP02.wav"] + IR: ["stereo/IR_g_p01_a_07_00_stAB100.wav", "stereo/IR_g_p01_a_02_00_stAB100.wav"] overlap: 1.0 cat2_1: - name: "a2s01" + name: "cat2/a2s01" description: "Large anechoic room with AB microphone pickup, no overlap between the talkers." source: ["m3s03.wav", "f3s03.wav"] - IR: ["LAABP05.wav", "LAABP11.wav"] + IR: ["stereo/IR_g_p01_b_05_00_stAB150.wav", "stereo/IR_g_p01_b_11_00_stAB150.wav"] overlap: -1.0 cat2_2: - name: "a2s02" + name: "cat2/a2s02" description: "Large anechoic room with AB microphone pickup, no overlap between the talkers." source: ["f1s03.wav", "m1s03.wav"] - IR: ["LAABP01.wav", "LAABP06.wav"] + IR: ["stereo/IR_g_p01_b_01_00_stAB150.wav", "stereo/IR_g_p01_b_06_00_stAB150.wav"] overlap: -1.0 cat2_3: - name: "a2s03" + name: "cat2/a2s03" description: "Large anechoic room with AB microphone pickup, no overlap between the talkers." source: ["m2s03.wav", "f2s03.wav"] - IR: ["LAABP03.wav", "LAABP07.wav"] + IR: ["stereo/IR_g_p01_b_03_00_stAB150.wav", "stereo/IR_g_p01_b_07_00_stAB150.wav"] overlap: -1.0 cat2_4: - name: "a2s04" + name: "cat2/a2s04" description: "Large anechoic room with AB microphone pickup, no overlap between the talkers." source: ["f3s04.wav", "m3s04.wav"] - IR: ["LAABP05.wav", "LAABP08.wav"] + IR: ["stereo/IR_g_p01_b_05_00_stAB150.wav", "stereo/IR_g_p01_b_08_00_stAB150.wav"] overlap: -1.0 cat2_5: - name: "a2s05" + name: "cat2/a2s05" description: "Large anechoic room with AB microphone pickup, no overlap between the talkers." source: ["m1s04.wav", "f1s04.wav"] - IR: ["LAABP09.wav", "LAABP07.wav"] + IR: ["stereo/IR_g_p01_b_09_00_stAB150.wav", "stereo/IR_g_p01_b_07_00_stAB150.wav"] overlap: -1.0 cat2_6: - name: "a2s06" + name: "cat2/a2s06" description: "Large anechoic room with AB microphone pickup, no overlap between the talkers." source: ["f2s04.wav", "m2s04.wav"] - IR: ["LAABP10.wav", "LAABP09.wav"] + IR: ["stereo/IR_g_p01_b_10_00_stAB150.wav", "stereo/IR_g_p01_b_09_00_stAB150.wav"] overlap: -1.0 cat3_1: - name: "a3s01" + name: "cat3/a3s01" description: "Small anechoic room with MS microphone pickup, no overlap between the talkers." source: ["f2s05.wav", "m2s05.wav"] - IR: ["SAMSP01.wav", "SAMSP07.wav"] + IR: ["stereo/IR_g_p01_a_01_00_stMS100.wav", "stereo/IR_g_p01_a_07_00_stMS100.wav"] overlap: -1.0 cat3_2: - name: "a3s02" + name: "cat3/a3s02" description: "Small anechoic room with MS microphone pickup, no overlap between the talkers." source: ["m3s05.wav", "f3s05.wav"] - IR: ["SAMSP05.wav", "SAMSP03.wav"] + IR: ["stereo/IR_g_p01_a_05_00_stMS100.wav", "stereo/IR_g_p01_a_03_00_stMS100.wav"] overlap: -1.0 cat3_3: - name: "a3s03" + name: "cat3/a3s03" description: "Small anechoic room with MS microphone pickup, no overlap between the talkers." source: ["f1s05.wav", "m1s05.wav"] - IR: ["SAMSP02.wav", "SAMSP06.wav"] + IR: ["stereo/IR_g_p01_a_02_00_stMS100.wav", "stereo/IR_g_p01_a_06_00_stMS100.wav"] overlap: -1.0 cat3_4: - name: "a3s04" + name: "cat3/a3s04" description: "Small anechoic room with MS microphone pickup, no overlap between the talkers." source: ["m2s06.wav", "f2s06.wav"] - IR: ["SAMSP04.wav", "SAMSP01.wav"] + IR: ["stereo/IR_g_p01_a_04_00_stMS100.wav", "stereo/IR_g_p01_a_01_00_stMS100.wav"] overlap: -1.0 cat3_5: - name: "a3s05" + name: "cat3/a3s05" description: "Small anechoic room with MS microphone pickup, no overlap between the talkers." source: ["f3s06.wav", "m3s06.wav"] - IR: ["SAMSP03.wav", "SAMSP04.wav"] + IR: ["stereo/IR_g_p01_a_03_00_stMS100.wav", "stereo/IR_g_p01_a_04_00_stMS100.wav"] overlap: -1.0 cat3_6: - name: "a3s06" + name: "cat3/a3s06" description: "Small anechoic room with MS microphone pickup, no overlap between the talkers." source: ["m1s06.wav", "f1s06.wav"] - IR: ["SAMSP07.wav", "SAMSP02.wav"] + IR: ["stereo/IR_g_p01_a_07_00_stMS100.wav", "stereo/IR_g_p01_a_02_00_stMS100.wav"] overlap: -1.0 cat4_1: - name: "a4s01" + name: "cat4/a4s01" description: "Small echoic room with AB microphone pickup, partial overlap between the talkers." source: ["m1s07.wav", "f1s07.wav"] - IR: ["SEABP01.wav", "SEABP07.wav"] + IR: ["stereo/IR_g_p01_b_01_00_stAB100.wav", "stereo/IR_g_p01_b_07_00_stAB100.wav"] overlap: 1.0 cat4_2: - name: "a4s02" + name: "cat4/a4s02" description: "Small echoic room with AB microphone pickup, partial overlap between the talkers." source: ["f2s07.wav", "m2s07.wav"] - IR: ["SEABP05.wav", "SEABP03.wav"] + IR: ["stereo/IR_g_p01_b_07_00_stAB100.wav", "stereo/IR_g_p01_b_03_00_stAB100.wav"] overlap: 1.0 cat4_3: - name: "a4s03" + name: "cat4/a4s03" description: "Small echoic room with AB microphone pickup, partial overlap between the talkers." source: ["m3s07.wav", "f3s07.wav"] - IR: ["SEABP02.wav", "SEABP06.wav"] + IR: ["stereo/IR_g_p01_b_02_00_stAB100.wav", "stereo/IR_g_p01_b_06_00_stAB100.wav"] overlap: 1.0 cat4_4: - name: "a4s04" + name: "cat4/a4s04" description: "Small echoic room with AB microphone pickup, partial overlap between the talkers." source: ["f1s08.wav", "m1s08.wav"] - IR: ["SEABP04.wav", "SEABP01.wav"] + IR: ["stereo/IR_g_p01_b_04_00_stAB100.wav", "stereo/IR_g_p01_b_01_00_stAB100.wav"] overlap: 1.0 cat4_5: - name: "a4s05" + name: "cat4/a4s05" description: "Small echoic room with AB microphone pickup, partial overlap between the talkers." source: ["m2s08.wav", "f2s08.wav"] - IR: ["SEABP03.wav", "SEABP04.wav"] + IR: ["stereo/IR_g_p01_b_03_00_stAB100.wav", "stereo/IR_g_p01_b_04_00_stAB100.wav"] overlap: 1.0 cat4_6: - name: "a4s06" + name: "cat4/a4s06" description: "Small echoic room with AB microphone pickup, partial overlap between the talkers." source: ["f3s08.wav", "m3s08.wav"] - IR: ["SEABP07.wav", "SEABP02.wav"] + IR: ["stereo/IR_g_p01_b_07_00_stAB100.wav", "stereo/IR_g_p01_b_02_00_stAB100.wav"] overlap: 1.0 cat5_1: - name: "a5s01" + name: "cat5/a5s01" description: "Large echoic room with AB microphone pickup, partial overlap between the talkers." source: ["f3s09.wav", "m3s09.wav"] - IR: ["LEABP02.wav", "LEABP08.wav"] + IR: ["stereo/IR_g_p01_d_02_00_stAB150.wav", "stereo/IR_g_p01_d_08_00_stAB150.wav"] overlap: 1.0 cat5_2: - name: "a5s02" + name: "cat5/a5s02" description: "Large echoic room with AB microphone pickup, partial overlap between the talkers." source: ["m1s09.wav", "f1s09.wav"] - IR: ["LEABP09.wav", "LEABP04.wav"] + IR: ["stereo/IR_g_p01_d_09_00_stAB150.wav", "stereo/IR_g_p01_d_04_00_stAB150.wav"] overlap: 1.0 cat5_3: - name: "a5s03" + name: "cat5/a5s03" description: "Large echoic room with AB microphone pickup, partial overlap between the talkers." source: ["f2s09.wav", "m2s09.wav"] - IR: ["LEABP06.wav", "LEABP10.wav"] + IR: ["stereo/IR_g_p01_d_06_00_stAB150.wav", "stereo/IR_g_p01_d_10_00_stAB150.wav"] overlap: 1.0 cat5_4: - name: "a5s04" + name: "cat5/a5s04" description: "Large echoic room with AB microphone pickup, partial overlap between the talkers." source: ["m3s10.wav", "f3s10.wav"] - IR: ["LEABP11.wav", "LEABP08.wav"] + IR: ["stereo/IR_g_p01_d_11_00_stAB150.wav", "stereo/IR_g_p01_d_08_00_stAB150.wav"] overlap: 1.0 cat5_5: - name: "a5s05" + name: "cat5/a5s05" description: "Large echoic room with AB microphone pickup, partial overlap between the talkers." source: ["f1s10.wav", "m1s10.wav"] - IR: ["LEABP10.wav", "LEABP12.wav"] + IR: ["stereo/IR_g_p01_d_10_00_stAB150.wav", "stereo/IR_g_p01_d_12_00_stAB150.wav"] overlap: 1.0 cat5_6: - name: "a5s06" + name: "cat5/a5s06" description: "Large echoic room with AB microphone pickup, partial overlap between the talkers." source: ["m2s10.wav", "f2s10.wav"] - IR: ["LEABP12.wav", "LEABP01.wav"] + IR: ["stereo/IR_g_p01_d_12_00_stAB150.wav", "stereo/IR_g_p01_d_01_00_stAB150.wav"] overlap: 1.0 cat6_1: - name: "a6s01" + name: "cat6/a6s01" description: "Small echoic room with binaural microphone pickup, no overlap between the talkers." source: ["m2s11.wav", "f2s11.wav"] - IR: ["SEABP01.wav", "SEABP07.wav"] + IR: ["stereo/IR_g_p01_b_01_00_stAB100.wav", "stereo/IR_g_p01_b_07_00_stAB100.wav"] overlap: -1.0 cat6_2: - name: "a6s02" + name: "cat6/a6s02" description: "Small echoic room with binaural microphone pickup, no overlap between the talkers." source: ["f3s11.wav", "m3s11.wav"] - IR: ["SEABP05.wav", "SEABP03.wav"] + IR: ["stereo/IR_g_p01_b_05_00_stAB100.wav", "stereo/IR_g_p01_b_03_00_stAB100.wav"] overlap: -1.0 cat6_3: - name: "a6s03" + name: "cat6/a6s03" description: "Small echoic room with binaural microphone pickup, no overlap between the talkers." source: ["m1s11.wav", "f1s11.wav"] - IR: ["SEABP02.wav", "SEABP06.wav"] + IR: ["stereo/IR_g_p01_b_02_00_stAB100.wav", "stereo/IR_g_p01_b_06_00_stAB100.wav"] overlap: -1.0 cat6_4: - name: "a6s04" + name: "cat6/a6s04" description: "Small echoic room with binaural microphone pickup, no overlap between the talkers." source: ["f2s12.wav", "m2s12.wav"] - IR: ["SEABP04.wav", "SEABP01.wav"] + IR: ["stereo/IR_g_p01_b_04_00_stAB100.wav", "stereo/IR_g_p01_b_01_00_stAB100.wav"] overlap: -1.0 cat6_5: - name: "a6s05" + name: "cat6/a6s05" description: "Small echoic room with binaural microphone pickup, no overlap between the talkers." source: ["m3s12.wav", "f3s12.wav"] - IR: ["SEABP03.wav", "SEABP04.wav"] + IR: ["stereo/IR_g_p01_b_03_00_stAB100.wav", "stereo/IR_g_p01_b_04_00_stAB100.wav"] overlap: -1.0 cat6_6: - name: "a6s06" + name: "cat6/a6s06" description: "Small echoic room with binaural microphone pickup, no overlap between the talkers." source: ["f1s12.wav", "m1s12.wav"] - IR: ["SEABP07.wav", "SEABP02.wav"] + IR: ["stereo/IR_g_p01_b_07_00_stAB100.wav", "stereo/IR_g_p01_b_02_00_stAB100.wav"] overlap: -1.0 -- GitLab From 005c31f6f03ea2a29e0406fd30b0da245a168b2a Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Tue, 6 Jun 2023 16:52:59 +0200 Subject: [PATCH 02/11] remove the IRs as they will be provided by the MCE --- .../generation/IR/Car_TalkPos1_Stereo_M5_SinSweep_2chn.wav | 3 --- .../generation/IR/Car_TalkPos2_Stereo_M5_SinSweep_2chn.wav | 3 --- .../generation/IR/Car_TalkPos3_Stereo_M5_SinSweep_2chn.wav | 3 --- .../generation/IR/Car_TalkPos4_Stereo_M5_SinSweep_2chn.wav | 3 --- .../generation/IR/Car_TalkPos5_Stereo_M5_SinSweep_2chn.wav | 3 --- .../generation/IR/FreeField_IR_Python_AB_20cm_Pos0.wav | 3 --- .../generation/IR/FreeField_IR_Python_AB_20cm_Pos1.wav | 3 --- .../generation/IR/FreeField_IR_Python_AB_20cm_Pos2.wav | 3 --- .../generation/IR/FreeField_IR_Python_AB_20cm_Pos3.wav | 3 --- .../generation/IR/FreeField_IR_Python_AB_20cm_Pos4.wav | 3 --- ivas_processing_scripts/generation/IR/LAABP01.wav | 3 --- ivas_processing_scripts/generation/IR/LAABP02.wav | 3 --- ivas_processing_scripts/generation/IR/LAABP03.wav | 3 --- ivas_processing_scripts/generation/IR/LAABP04.wav | 3 --- ivas_processing_scripts/generation/IR/LAABP05.wav | 3 --- ivas_processing_scripts/generation/IR/LAABP06.wav | 3 --- ivas_processing_scripts/generation/IR/LAABP07.wav | 3 --- ivas_processing_scripts/generation/IR/LAABP08.wav | 3 --- ivas_processing_scripts/generation/IR/LAABP09.wav | 3 --- ivas_processing_scripts/generation/IR/LAABP10.wav | 3 --- ivas_processing_scripts/generation/IR/LAABP11.wav | 3 --- ivas_processing_scripts/generation/IR/LAABP12.wav | 3 --- ivas_processing_scripts/generation/IR/LEABP01.wav | 3 --- ivas_processing_scripts/generation/IR/LEABP02.wav | 3 --- ivas_processing_scripts/generation/IR/LEABP03.wav | 3 --- ivas_processing_scripts/generation/IR/LEABP04.wav | 3 --- ivas_processing_scripts/generation/IR/LEABP05.wav | 3 --- ivas_processing_scripts/generation/IR/LEABP06.wav | 3 --- ivas_processing_scripts/generation/IR/LEABP07.wav | 3 --- ivas_processing_scripts/generation/IR/LEABP08.wav | 3 --- ivas_processing_scripts/generation/IR/LEABP09.wav | 3 --- ivas_processing_scripts/generation/IR/LEABP10.wav | 3 --- ivas_processing_scripts/generation/IR/LEABP11.wav | 3 --- ivas_processing_scripts/generation/IR/LEABP12.wav | 3 --- ivas_processing_scripts/generation/IR/SAABP01.wav | 3 --- ivas_processing_scripts/generation/IR/SAABP02.wav | 3 --- ivas_processing_scripts/generation/IR/SAABP03.wav | 3 --- ivas_processing_scripts/generation/IR/SAABP04.wav | 3 --- ivas_processing_scripts/generation/IR/SAABP05.wav | 3 --- ivas_processing_scripts/generation/IR/SAABP06.wav | 3 --- ivas_processing_scripts/generation/IR/SAABP07.wav | 3 --- ivas_processing_scripts/generation/IR/SAMSP01.wav | 3 --- ivas_processing_scripts/generation/IR/SAMSP02.wav | 3 --- ivas_processing_scripts/generation/IR/SAMSP03.wav | 3 --- ivas_processing_scripts/generation/IR/SAMSP04.wav | 3 --- ivas_processing_scripts/generation/IR/SAMSP05.wav | 3 --- ivas_processing_scripts/generation/IR/SAMSP06.wav | 3 --- ivas_processing_scripts/generation/IR/SAMSP07.wav | 3 --- ivas_processing_scripts/generation/IR/SEABP01.wav | 3 --- ivas_processing_scripts/generation/IR/SEABP02.wav | 3 --- ivas_processing_scripts/generation/IR/SEABP03.wav | 3 --- ivas_processing_scripts/generation/IR/SEABP04.wav | 3 --- ivas_processing_scripts/generation/IR/SEABP05.wav | 3 --- ivas_processing_scripts/generation/IR/SEABP06.wav | 3 --- ivas_processing_scripts/generation/IR/SEABP07.wav | 3 --- ivas_processing_scripts/generation/IR/SEBIP01.wav | 3 --- ivas_processing_scripts/generation/IR/SEBIP02.wav | 3 --- ivas_processing_scripts/generation/IR/SEBIP03.wav | 3 --- ivas_processing_scripts/generation/IR/SEBIP04.wav | 3 --- ivas_processing_scripts/generation/IR/SEBIP05.wav | 3 --- ivas_processing_scripts/generation/IR/SEBIP06.wav | 3 --- ivas_processing_scripts/generation/IR/SEBIP07.wav | 3 --- ivas_processing_scripts/generation/IR/SEMSP01.wav | 3 --- ivas_processing_scripts/generation/IR/SEMSP02.wav | 3 --- ivas_processing_scripts/generation/IR/SEMSP03.wav | 3 --- ivas_processing_scripts/generation/IR/SEMSP04.wav | 3 --- ivas_processing_scripts/generation/IR/SEMSP05.wav | 3 --- ivas_processing_scripts/generation/IR/SEMSP06.wav | 3 --- ivas_processing_scripts/generation/IR/SEMSP07.wav | 3 --- 69 files changed, 207 deletions(-) delete mode 100644 ivas_processing_scripts/generation/IR/Car_TalkPos1_Stereo_M5_SinSweep_2chn.wav delete mode 100644 ivas_processing_scripts/generation/IR/Car_TalkPos2_Stereo_M5_SinSweep_2chn.wav delete mode 100644 ivas_processing_scripts/generation/IR/Car_TalkPos3_Stereo_M5_SinSweep_2chn.wav delete mode 100644 ivas_processing_scripts/generation/IR/Car_TalkPos4_Stereo_M5_SinSweep_2chn.wav delete mode 100644 ivas_processing_scripts/generation/IR/Car_TalkPos5_Stereo_M5_SinSweep_2chn.wav delete mode 100644 ivas_processing_scripts/generation/IR/FreeField_IR_Python_AB_20cm_Pos0.wav delete mode 100644 ivas_processing_scripts/generation/IR/FreeField_IR_Python_AB_20cm_Pos1.wav delete mode 100644 ivas_processing_scripts/generation/IR/FreeField_IR_Python_AB_20cm_Pos2.wav delete mode 100644 ivas_processing_scripts/generation/IR/FreeField_IR_Python_AB_20cm_Pos3.wav delete mode 100644 ivas_processing_scripts/generation/IR/FreeField_IR_Python_AB_20cm_Pos4.wav delete mode 100644 ivas_processing_scripts/generation/IR/LAABP01.wav delete mode 100644 ivas_processing_scripts/generation/IR/LAABP02.wav delete mode 100644 ivas_processing_scripts/generation/IR/LAABP03.wav delete mode 100644 ivas_processing_scripts/generation/IR/LAABP04.wav delete mode 100644 ivas_processing_scripts/generation/IR/LAABP05.wav delete mode 100644 ivas_processing_scripts/generation/IR/LAABP06.wav delete mode 100644 ivas_processing_scripts/generation/IR/LAABP07.wav delete mode 100644 ivas_processing_scripts/generation/IR/LAABP08.wav delete mode 100644 ivas_processing_scripts/generation/IR/LAABP09.wav delete mode 100644 ivas_processing_scripts/generation/IR/LAABP10.wav delete mode 100644 ivas_processing_scripts/generation/IR/LAABP11.wav delete mode 100644 ivas_processing_scripts/generation/IR/LAABP12.wav delete mode 100644 ivas_processing_scripts/generation/IR/LEABP01.wav delete mode 100644 ivas_processing_scripts/generation/IR/LEABP02.wav delete mode 100644 ivas_processing_scripts/generation/IR/LEABP03.wav delete mode 100644 ivas_processing_scripts/generation/IR/LEABP04.wav delete mode 100644 ivas_processing_scripts/generation/IR/LEABP05.wav delete mode 100644 ivas_processing_scripts/generation/IR/LEABP06.wav delete mode 100644 ivas_processing_scripts/generation/IR/LEABP07.wav delete mode 100644 ivas_processing_scripts/generation/IR/LEABP08.wav delete mode 100644 ivas_processing_scripts/generation/IR/LEABP09.wav delete mode 100644 ivas_processing_scripts/generation/IR/LEABP10.wav delete mode 100644 ivas_processing_scripts/generation/IR/LEABP11.wav delete mode 100644 ivas_processing_scripts/generation/IR/LEABP12.wav delete mode 100644 ivas_processing_scripts/generation/IR/SAABP01.wav delete mode 100644 ivas_processing_scripts/generation/IR/SAABP02.wav delete mode 100644 ivas_processing_scripts/generation/IR/SAABP03.wav delete mode 100644 ivas_processing_scripts/generation/IR/SAABP04.wav delete mode 100644 ivas_processing_scripts/generation/IR/SAABP05.wav delete mode 100644 ivas_processing_scripts/generation/IR/SAABP06.wav delete mode 100644 ivas_processing_scripts/generation/IR/SAABP07.wav delete mode 100644 ivas_processing_scripts/generation/IR/SAMSP01.wav delete mode 100644 ivas_processing_scripts/generation/IR/SAMSP02.wav delete mode 100644 ivas_processing_scripts/generation/IR/SAMSP03.wav delete mode 100644 ivas_processing_scripts/generation/IR/SAMSP04.wav delete mode 100644 ivas_processing_scripts/generation/IR/SAMSP05.wav delete mode 100644 ivas_processing_scripts/generation/IR/SAMSP06.wav delete mode 100644 ivas_processing_scripts/generation/IR/SAMSP07.wav delete mode 100644 ivas_processing_scripts/generation/IR/SEABP01.wav delete mode 100644 ivas_processing_scripts/generation/IR/SEABP02.wav delete mode 100644 ivas_processing_scripts/generation/IR/SEABP03.wav delete mode 100644 ivas_processing_scripts/generation/IR/SEABP04.wav delete mode 100644 ivas_processing_scripts/generation/IR/SEABP05.wav delete mode 100644 ivas_processing_scripts/generation/IR/SEABP06.wav delete mode 100644 ivas_processing_scripts/generation/IR/SEABP07.wav delete mode 100644 ivas_processing_scripts/generation/IR/SEBIP01.wav delete mode 100644 ivas_processing_scripts/generation/IR/SEBIP02.wav delete mode 100644 ivas_processing_scripts/generation/IR/SEBIP03.wav delete mode 100644 ivas_processing_scripts/generation/IR/SEBIP04.wav delete mode 100644 ivas_processing_scripts/generation/IR/SEBIP05.wav delete mode 100644 ivas_processing_scripts/generation/IR/SEBIP06.wav delete mode 100644 ivas_processing_scripts/generation/IR/SEBIP07.wav delete mode 100644 ivas_processing_scripts/generation/IR/SEMSP01.wav delete mode 100644 ivas_processing_scripts/generation/IR/SEMSP02.wav delete mode 100644 ivas_processing_scripts/generation/IR/SEMSP03.wav delete mode 100644 ivas_processing_scripts/generation/IR/SEMSP04.wav delete mode 100644 ivas_processing_scripts/generation/IR/SEMSP05.wav delete mode 100644 ivas_processing_scripts/generation/IR/SEMSP06.wav delete mode 100644 ivas_processing_scripts/generation/IR/SEMSP07.wav diff --git a/ivas_processing_scripts/generation/IR/Car_TalkPos1_Stereo_M5_SinSweep_2chn.wav b/ivas_processing_scripts/generation/IR/Car_TalkPos1_Stereo_M5_SinSweep_2chn.wav deleted file mode 100644 index 897ed804..00000000 --- a/ivas_processing_scripts/generation/IR/Car_TalkPos1_Stereo_M5_SinSweep_2chn.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3f7dba87cc6d72206ce41c451d76cc9c68bacc7cb4f2599630e2b57be7360427 -size 38444 diff --git a/ivas_processing_scripts/generation/IR/Car_TalkPos2_Stereo_M5_SinSweep_2chn.wav b/ivas_processing_scripts/generation/IR/Car_TalkPos2_Stereo_M5_SinSweep_2chn.wav deleted file mode 100644 index 54cbeac4..00000000 --- a/ivas_processing_scripts/generation/IR/Car_TalkPos2_Stereo_M5_SinSweep_2chn.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a9c32e8552f2ba45c060d76a9f5bd3e587b6d721892101a29ab69952310a1582 -size 38444 diff --git a/ivas_processing_scripts/generation/IR/Car_TalkPos3_Stereo_M5_SinSweep_2chn.wav b/ivas_processing_scripts/generation/IR/Car_TalkPos3_Stereo_M5_SinSweep_2chn.wav deleted file mode 100644 index d191e017..00000000 --- a/ivas_processing_scripts/generation/IR/Car_TalkPos3_Stereo_M5_SinSweep_2chn.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:228df054ac0a52b06ec33f39f5fffcf77fea7b4f1408ac5838947e689cc6d5ee -size 38444 diff --git a/ivas_processing_scripts/generation/IR/Car_TalkPos4_Stereo_M5_SinSweep_2chn.wav b/ivas_processing_scripts/generation/IR/Car_TalkPos4_Stereo_M5_SinSweep_2chn.wav deleted file mode 100644 index 66e86f22..00000000 --- a/ivas_processing_scripts/generation/IR/Car_TalkPos4_Stereo_M5_SinSweep_2chn.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:54123ac67fe98f920ee77cca20f2c37d902e36cf68ca8d676b16eda217c96899 -size 38444 diff --git a/ivas_processing_scripts/generation/IR/Car_TalkPos5_Stereo_M5_SinSweep_2chn.wav b/ivas_processing_scripts/generation/IR/Car_TalkPos5_Stereo_M5_SinSweep_2chn.wav deleted file mode 100644 index a81018a4..00000000 --- a/ivas_processing_scripts/generation/IR/Car_TalkPos5_Stereo_M5_SinSweep_2chn.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4f1e33ec20a287607ac31ecb54ca0285d383be9bbafaa3c176c73244554a1e7c -size 38444 diff --git a/ivas_processing_scripts/generation/IR/FreeField_IR_Python_AB_20cm_Pos0.wav b/ivas_processing_scripts/generation/IR/FreeField_IR_Python_AB_20cm_Pos0.wav deleted file mode 100644 index 36bf960c..00000000 --- a/ivas_processing_scripts/generation/IR/FreeField_IR_Python_AB_20cm_Pos0.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c58b79685b9a5b8896d3e78e8ccb0499c7a3da57693904cf358db0ec6a73fe6f -size 984 diff --git a/ivas_processing_scripts/generation/IR/FreeField_IR_Python_AB_20cm_Pos1.wav b/ivas_processing_scripts/generation/IR/FreeField_IR_Python_AB_20cm_Pos1.wav deleted file mode 100644 index b640c9f0..00000000 --- a/ivas_processing_scripts/generation/IR/FreeField_IR_Python_AB_20cm_Pos1.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:676f1d1d041094542652200252b54e7788277deea7e92cbfc13273b2b6a68f04 -size 984 diff --git a/ivas_processing_scripts/generation/IR/FreeField_IR_Python_AB_20cm_Pos2.wav b/ivas_processing_scripts/generation/IR/FreeField_IR_Python_AB_20cm_Pos2.wav deleted file mode 100644 index 70745113..00000000 --- a/ivas_processing_scripts/generation/IR/FreeField_IR_Python_AB_20cm_Pos2.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8a6eb953c2890beac150bac67250c3a95188d40a9b4e2971f219f8815b15cdbb -size 984 diff --git a/ivas_processing_scripts/generation/IR/FreeField_IR_Python_AB_20cm_Pos3.wav b/ivas_processing_scripts/generation/IR/FreeField_IR_Python_AB_20cm_Pos3.wav deleted file mode 100644 index 6f2e03b3..00000000 --- a/ivas_processing_scripts/generation/IR/FreeField_IR_Python_AB_20cm_Pos3.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e82f2f00ea52c79cc05898824d114ccdc9ea3d5e4e58ab6548da95fa93cc6341 -size 984 diff --git a/ivas_processing_scripts/generation/IR/FreeField_IR_Python_AB_20cm_Pos4.wav b/ivas_processing_scripts/generation/IR/FreeField_IR_Python_AB_20cm_Pos4.wav deleted file mode 100644 index 7bdb29fd..00000000 --- a/ivas_processing_scripts/generation/IR/FreeField_IR_Python_AB_20cm_Pos4.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5a9c3bea97a2556492712f6edac3c2776497a82b9c9da193db42cfd2b7eb09ad -size 984 diff --git a/ivas_processing_scripts/generation/IR/LAABP01.wav b/ivas_processing_scripts/generation/IR/LAABP01.wav deleted file mode 100644 index aeaa9eeb..00000000 --- a/ivas_processing_scripts/generation/IR/LAABP01.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4a4e959d347d3f99468dbe75bce9853eb9d66af6cb22cf3ea9ad2dc4c9e84a2a -size 36804 diff --git a/ivas_processing_scripts/generation/IR/LAABP02.wav b/ivas_processing_scripts/generation/IR/LAABP02.wav deleted file mode 100644 index 41586c2f..00000000 --- a/ivas_processing_scripts/generation/IR/LAABP02.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2658ddec94aa86e2fa0ed365686daded586a6a46436dff1c6d8dba6d17d0182c -size 36804 diff --git a/ivas_processing_scripts/generation/IR/LAABP03.wav b/ivas_processing_scripts/generation/IR/LAABP03.wav deleted file mode 100644 index c4ec38f9..00000000 --- a/ivas_processing_scripts/generation/IR/LAABP03.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5616c8bcf3959aeee246a96a9f2ce6793d4087bfce3dfd1d97e313e3717b5bd6 -size 36804 diff --git a/ivas_processing_scripts/generation/IR/LAABP04.wav b/ivas_processing_scripts/generation/IR/LAABP04.wav deleted file mode 100644 index 1c50022f..00000000 --- a/ivas_processing_scripts/generation/IR/LAABP04.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f433047f7fdba568183873d11c7f4423550a675b3e0677b6d846137227862bac -size 36804 diff --git a/ivas_processing_scripts/generation/IR/LAABP05.wav b/ivas_processing_scripts/generation/IR/LAABP05.wav deleted file mode 100644 index e3bd1916..00000000 --- a/ivas_processing_scripts/generation/IR/LAABP05.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:791b69ca22d15226e5e2f6c5a39d3d40af04264523f3373d842a070ea4d40862 -size 36804 diff --git a/ivas_processing_scripts/generation/IR/LAABP06.wav b/ivas_processing_scripts/generation/IR/LAABP06.wav deleted file mode 100644 index 1c50022f..00000000 --- a/ivas_processing_scripts/generation/IR/LAABP06.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f433047f7fdba568183873d11c7f4423550a675b3e0677b6d846137227862bac -size 36804 diff --git a/ivas_processing_scripts/generation/IR/LAABP07.wav b/ivas_processing_scripts/generation/IR/LAABP07.wav deleted file mode 100644 index c4ec38f9..00000000 --- a/ivas_processing_scripts/generation/IR/LAABP07.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5616c8bcf3959aeee246a96a9f2ce6793d4087bfce3dfd1d97e313e3717b5bd6 -size 36804 diff --git a/ivas_processing_scripts/generation/IR/LAABP08.wav b/ivas_processing_scripts/generation/IR/LAABP08.wav deleted file mode 100644 index 41586c2f..00000000 --- a/ivas_processing_scripts/generation/IR/LAABP08.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2658ddec94aa86e2fa0ed365686daded586a6a46436dff1c6d8dba6d17d0182c -size 36804 diff --git a/ivas_processing_scripts/generation/IR/LAABP09.wav b/ivas_processing_scripts/generation/IR/LAABP09.wav deleted file mode 100644 index aeaa9eeb..00000000 --- a/ivas_processing_scripts/generation/IR/LAABP09.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4a4e959d347d3f99468dbe75bce9853eb9d66af6cb22cf3ea9ad2dc4c9e84a2a -size 36804 diff --git a/ivas_processing_scripts/generation/IR/LAABP10.wav b/ivas_processing_scripts/generation/IR/LAABP10.wav deleted file mode 100644 index 37693eb5..00000000 --- a/ivas_processing_scripts/generation/IR/LAABP10.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9271410ecad011fbcf22fb8f7af5b0f19f02510ef0f198ef6c6d9e33e64d38da -size 36804 diff --git a/ivas_processing_scripts/generation/IR/LAABP11.wav b/ivas_processing_scripts/generation/IR/LAABP11.wav deleted file mode 100644 index 482a0e76..00000000 --- a/ivas_processing_scripts/generation/IR/LAABP11.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cda11409aae6b99f6ccb4d20db24b065b7b2bda004dddd7659607215568d90b6 -size 36804 diff --git a/ivas_processing_scripts/generation/IR/LAABP12.wav b/ivas_processing_scripts/generation/IR/LAABP12.wav deleted file mode 100644 index 37693eb5..00000000 --- a/ivas_processing_scripts/generation/IR/LAABP12.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9271410ecad011fbcf22fb8f7af5b0f19f02510ef0f198ef6c6d9e33e64d38da -size 36804 diff --git a/ivas_processing_scripts/generation/IR/LEABP01.wav b/ivas_processing_scripts/generation/IR/LEABP01.wav deleted file mode 100644 index 424ddfb5..00000000 --- a/ivas_processing_scripts/generation/IR/LEABP01.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d810da26d72e818444c6ee16a3a59a77eabf74df3aaebd2b021696fa7fdd610f -size 82068 diff --git a/ivas_processing_scripts/generation/IR/LEABP02.wav b/ivas_processing_scripts/generation/IR/LEABP02.wav deleted file mode 100644 index 784caa2d..00000000 --- a/ivas_processing_scripts/generation/IR/LEABP02.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4c21239ff8bbf0e465a175f7ea5125c03f02568a8dbc9b4b63e064955529c489 -size 82068 diff --git a/ivas_processing_scripts/generation/IR/LEABP03.wav b/ivas_processing_scripts/generation/IR/LEABP03.wav deleted file mode 100644 index c81bce1f..00000000 --- a/ivas_processing_scripts/generation/IR/LEABP03.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:96e5b25de682dc8e0c1f036bbb0c193cfef574a48621069584d48cdd40f520ed -size 82068 diff --git a/ivas_processing_scripts/generation/IR/LEABP04.wav b/ivas_processing_scripts/generation/IR/LEABP04.wav deleted file mode 100644 index 87d97879..00000000 --- a/ivas_processing_scripts/generation/IR/LEABP04.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bfd86b594612a319e30676e4e3c0d177f01ee5626379864610df9796532e7024 -size 82068 diff --git a/ivas_processing_scripts/generation/IR/LEABP05.wav b/ivas_processing_scripts/generation/IR/LEABP05.wav deleted file mode 100644 index 5e01d3be..00000000 --- a/ivas_processing_scripts/generation/IR/LEABP05.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2e31f9bf16791af9b3e01e75316d5bfe32115a5dec8a4b820d253e78e0b84edb -size 82068 diff --git a/ivas_processing_scripts/generation/IR/LEABP06.wav b/ivas_processing_scripts/generation/IR/LEABP06.wav deleted file mode 100644 index a1027066..00000000 --- a/ivas_processing_scripts/generation/IR/LEABP06.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:65be054317c4dfd5cb0f9bef1d9fc90f35df6ae841e223280946e435c7b6b0c7 -size 82068 diff --git a/ivas_processing_scripts/generation/IR/LEABP07.wav b/ivas_processing_scripts/generation/IR/LEABP07.wav deleted file mode 100644 index 3bfe1b97..00000000 --- a/ivas_processing_scripts/generation/IR/LEABP07.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:78da36e2a0652cc9c7f77279ba1342d0f58b4a879ef4e3038da38580c9bfd07d -size 82068 diff --git a/ivas_processing_scripts/generation/IR/LEABP08.wav b/ivas_processing_scripts/generation/IR/LEABP08.wav deleted file mode 100644 index 7ac86eb1..00000000 --- a/ivas_processing_scripts/generation/IR/LEABP08.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fa78fae31221631fd31d251ea6ad5f7369bbcc054c84e8b82dca7c8613f3867a -size 82068 diff --git a/ivas_processing_scripts/generation/IR/LEABP09.wav b/ivas_processing_scripts/generation/IR/LEABP09.wav deleted file mode 100644 index 010be6fb..00000000 --- a/ivas_processing_scripts/generation/IR/LEABP09.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fd7a9ca0ff37a58455414d8e66efb9aa6d8f686af7459751e24f40eb3c2d6415 -size 82068 diff --git a/ivas_processing_scripts/generation/IR/LEABP10.wav b/ivas_processing_scripts/generation/IR/LEABP10.wav deleted file mode 100644 index 4fbadb40..00000000 --- a/ivas_processing_scripts/generation/IR/LEABP10.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7149eb3558db62f34e4f476c85a57733e0ca153a297aa183ebeb550878a5ab40 -size 82068 diff --git a/ivas_processing_scripts/generation/IR/LEABP11.wav b/ivas_processing_scripts/generation/IR/LEABP11.wav deleted file mode 100644 index 156d4156..00000000 --- a/ivas_processing_scripts/generation/IR/LEABP11.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2665ed857b1e3f095581c591e400e9ef532ff9e130a414bc2cc939c37b829c8a -size 82068 diff --git a/ivas_processing_scripts/generation/IR/LEABP12.wav b/ivas_processing_scripts/generation/IR/LEABP12.wav deleted file mode 100644 index e84b30b8..00000000 --- a/ivas_processing_scripts/generation/IR/LEABP12.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ad942c2d19303a80ccadab2289172c514c83397096e8317476d6e8dd6463f0f4 -size 82068 diff --git a/ivas_processing_scripts/generation/IR/SAABP01.wav b/ivas_processing_scripts/generation/IR/SAABP01.wav deleted file mode 100644 index 180b682a..00000000 --- a/ivas_processing_scripts/generation/IR/SAABP01.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cd68dd01200bbfd25bebec4dfc63b8f528a03c88d1307e75d7a6c91eeec8be6e -size 36764 diff --git a/ivas_processing_scripts/generation/IR/SAABP02.wav b/ivas_processing_scripts/generation/IR/SAABP02.wav deleted file mode 100644 index f0acab78..00000000 --- a/ivas_processing_scripts/generation/IR/SAABP02.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3f145f6f8eb8324c7f3e18c5af5047641e82952603a787e0b7e069d26d5c4ca6 -size 36764 diff --git a/ivas_processing_scripts/generation/IR/SAABP03.wav b/ivas_processing_scripts/generation/IR/SAABP03.wav deleted file mode 100644 index 1efea8d6..00000000 --- a/ivas_processing_scripts/generation/IR/SAABP03.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f8493653f497915b35377984c6d79e04aa344ccf44e0d5b8e286fbec492c9c31 -size 36764 diff --git a/ivas_processing_scripts/generation/IR/SAABP04.wav b/ivas_processing_scripts/generation/IR/SAABP04.wav deleted file mode 100644 index ec788896..00000000 --- a/ivas_processing_scripts/generation/IR/SAABP04.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:36c04e66154b91979160d18faaf02dc226f6d2ed61f63d19227d777bb3459987 -size 36764 diff --git a/ivas_processing_scripts/generation/IR/SAABP05.wav b/ivas_processing_scripts/generation/IR/SAABP05.wav deleted file mode 100644 index 3098f0b4..00000000 --- a/ivas_processing_scripts/generation/IR/SAABP05.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dae758e6b7b3fd8ef3a8d76fa3210f5f412f3286056085e68a1f9ca7a13e9bab -size 36764 diff --git a/ivas_processing_scripts/generation/IR/SAABP06.wav b/ivas_processing_scripts/generation/IR/SAABP06.wav deleted file mode 100644 index a4553381..00000000 --- a/ivas_processing_scripts/generation/IR/SAABP06.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b5d52622ceb146c340c8a52689468c355c09bd3f71ef1f2f5dae9fb5d217b27e -size 36764 diff --git a/ivas_processing_scripts/generation/IR/SAABP07.wav b/ivas_processing_scripts/generation/IR/SAABP07.wav deleted file mode 100644 index 8e641a98..00000000 --- a/ivas_processing_scripts/generation/IR/SAABP07.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bf6c77ccfa239f5a0cb44a071dcb0d0ca92da0bbc858e4cc060af814ab3ffe3e -size 36764 diff --git a/ivas_processing_scripts/generation/IR/SAMSP01.wav b/ivas_processing_scripts/generation/IR/SAMSP01.wav deleted file mode 100644 index 7d59592a..00000000 --- a/ivas_processing_scripts/generation/IR/SAMSP01.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ba6e8d380d91e5492338ac98df45e444532b92ff84a71f569673610e59cde136 -size 36764 diff --git a/ivas_processing_scripts/generation/IR/SAMSP02.wav b/ivas_processing_scripts/generation/IR/SAMSP02.wav deleted file mode 100644 index b8b62cef..00000000 --- a/ivas_processing_scripts/generation/IR/SAMSP02.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dd991ef690a9c86fa00064c56ad3df3ef726d9b6232efaf256b33cbc1ad3ac32 -size 36764 diff --git a/ivas_processing_scripts/generation/IR/SAMSP03.wav b/ivas_processing_scripts/generation/IR/SAMSP03.wav deleted file mode 100644 index feab358d..00000000 --- a/ivas_processing_scripts/generation/IR/SAMSP03.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9a996729d0c2573d4f219d72db60273b986280fac7ae0f5fe0a35524b83a0d95 -size 36764 diff --git a/ivas_processing_scripts/generation/IR/SAMSP04.wav b/ivas_processing_scripts/generation/IR/SAMSP04.wav deleted file mode 100644 index 0f29ec53..00000000 --- a/ivas_processing_scripts/generation/IR/SAMSP04.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dad01524476b6f8a5fc2d4d31f8c1b7589a836d9b98cc4d27201e42481931962 -size 36764 diff --git a/ivas_processing_scripts/generation/IR/SAMSP05.wav b/ivas_processing_scripts/generation/IR/SAMSP05.wav deleted file mode 100644 index 71293903..00000000 --- a/ivas_processing_scripts/generation/IR/SAMSP05.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0f0b5f91b292924c4e1eb1e2d884059720ab5c3eaae05d22230d786f19de7879 -size 36764 diff --git a/ivas_processing_scripts/generation/IR/SAMSP06.wav b/ivas_processing_scripts/generation/IR/SAMSP06.wav deleted file mode 100644 index 0d51fc62..00000000 --- a/ivas_processing_scripts/generation/IR/SAMSP06.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:06e0d7f97b4ce56065d143d19a45ad8c757ed21cf0fe3f8ed05cbedbd966084e -size 36764 diff --git a/ivas_processing_scripts/generation/IR/SAMSP07.wav b/ivas_processing_scripts/generation/IR/SAMSP07.wav deleted file mode 100644 index a20ac5f9..00000000 --- a/ivas_processing_scripts/generation/IR/SAMSP07.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cb825349bec07813ea7ccb936948783aed31683805a3daae867568445820f8ea -size 36764 diff --git a/ivas_processing_scripts/generation/IR/SEABP01.wav b/ivas_processing_scripts/generation/IR/SEABP01.wav deleted file mode 100644 index 6120c6a0..00000000 --- a/ivas_processing_scripts/generation/IR/SEABP01.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a934da1fee82c8131c427680304c9102a3289179697318735b87536d2db6261e -size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEABP02.wav b/ivas_processing_scripts/generation/IR/SEABP02.wav deleted file mode 100644 index 3dc413d8..00000000 --- a/ivas_processing_scripts/generation/IR/SEABP02.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:21bd1f242bf459bda18ea9e444eedbdf97db20e0956e3600c4e3c03870f1a877 -size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEABP03.wav b/ivas_processing_scripts/generation/IR/SEABP03.wav deleted file mode 100644 index 27d2af1c..00000000 --- a/ivas_processing_scripts/generation/IR/SEABP03.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6bd27e370e9fff391ef37a9e45e3f1583cdcef6ce23cef6135368fb6964674f2 -size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEABP04.wav b/ivas_processing_scripts/generation/IR/SEABP04.wav deleted file mode 100644 index ed3c9918..00000000 --- a/ivas_processing_scripts/generation/IR/SEABP04.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c4399629b729b0ceb8b30f3c994b736557bd8b35a968cb80cba486833b7c54d1 -size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEABP05.wav b/ivas_processing_scripts/generation/IR/SEABP05.wav deleted file mode 100644 index 2e990d65..00000000 --- a/ivas_processing_scripts/generation/IR/SEABP05.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a5c7af3d46eea2d738cb1c6e25a351489f9daff2976c365251595cec719b7ebe -size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEABP06.wav b/ivas_processing_scripts/generation/IR/SEABP06.wav deleted file mode 100644 index 3d1397a0..00000000 --- a/ivas_processing_scripts/generation/IR/SEABP06.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fb284bd97e306b890b9ccdd2e7649c602f6fd78774c1b2140b29051126a1fece -size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEABP07.wav b/ivas_processing_scripts/generation/IR/SEABP07.wav deleted file mode 100644 index 075da1a1..00000000 --- a/ivas_processing_scripts/generation/IR/SEABP07.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5a4fce653f7d80f389f3114a1e07688c5ad292e1419a59c6c4630a3bb8f2bf74 -size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEBIP01.wav b/ivas_processing_scripts/generation/IR/SEBIP01.wav deleted file mode 100644 index a6068236..00000000 --- a/ivas_processing_scripts/generation/IR/SEBIP01.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:55a349cb20898415609ea49187f871ba2dc980d07a1fa36fb655efde96208b4c -size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEBIP02.wav b/ivas_processing_scripts/generation/IR/SEBIP02.wav deleted file mode 100644 index 10f8a62c..00000000 --- a/ivas_processing_scripts/generation/IR/SEBIP02.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b35e9171ceaeb3e4e00f1e73b337c39c6c933620c39394e3a5ff095535db657a -size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEBIP03.wav b/ivas_processing_scripts/generation/IR/SEBIP03.wav deleted file mode 100644 index fd0ec69f..00000000 --- a/ivas_processing_scripts/generation/IR/SEBIP03.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:524e0505f83bc579774e5e36f730b40fcb62b9b10f3a7767cec4389f4689d87b -size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEBIP04.wav b/ivas_processing_scripts/generation/IR/SEBIP04.wav deleted file mode 100644 index 30be4326..00000000 --- a/ivas_processing_scripts/generation/IR/SEBIP04.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:aa2e8e18ef82a299d142fcbfc462b2370472fa202cbe361e1d661c20e21cd4c8 -size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEBIP05.wav b/ivas_processing_scripts/generation/IR/SEBIP05.wav deleted file mode 100644 index 91e57937..00000000 --- a/ivas_processing_scripts/generation/IR/SEBIP05.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:88d9cb43b175c2cf94eb861780e48fdb56da0bc4a2dd4f6034b179fa17dd09ab -size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEBIP06.wav b/ivas_processing_scripts/generation/IR/SEBIP06.wav deleted file mode 100644 index eb589f49..00000000 --- a/ivas_processing_scripts/generation/IR/SEBIP06.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ca350c682655d3ba8b075e3744adde034783dc87036b8fa9aaf9ccb3500f9286 -size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEBIP07.wav b/ivas_processing_scripts/generation/IR/SEBIP07.wav deleted file mode 100644 index d8a20381..00000000 --- a/ivas_processing_scripts/generation/IR/SEBIP07.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:79e4520ada475e1c37b8707da8062bbbfb26e617261e0130b7344b2bc1a937c5 -size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEMSP01.wav b/ivas_processing_scripts/generation/IR/SEMSP01.wav deleted file mode 100644 index 4dab142a..00000000 --- a/ivas_processing_scripts/generation/IR/SEMSP01.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:66a37fc3855a0929cf4a4702301bf231fe346f1964b845b9cf464a5bfd3e29ad -size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEMSP02.wav b/ivas_processing_scripts/generation/IR/SEMSP02.wav deleted file mode 100644 index d59419c5..00000000 --- a/ivas_processing_scripts/generation/IR/SEMSP02.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1cf267a42add5770e08b756d5577e95459b3efc5e49076ac910bb00aabe879b1 -size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEMSP03.wav b/ivas_processing_scripts/generation/IR/SEMSP03.wav deleted file mode 100644 index 0e2e8205..00000000 --- a/ivas_processing_scripts/generation/IR/SEMSP03.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b4947d0762a6d653690d164c1a0dc09acc9c2bf38e8c28f33b9661d899094cd7 -size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEMSP04.wav b/ivas_processing_scripts/generation/IR/SEMSP04.wav deleted file mode 100644 index dc665c65..00000000 --- a/ivas_processing_scripts/generation/IR/SEMSP04.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5f8a703057836541f8ca3e1e788d95302adcf983e12e1f6481e0743548559eeb -size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEMSP05.wav b/ivas_processing_scripts/generation/IR/SEMSP05.wav deleted file mode 100644 index aec9c66f..00000000 --- a/ivas_processing_scripts/generation/IR/SEMSP05.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:12613e8b6f43d6a8df2a4b78961fcacc2956d3b0bd8e3321fdea487ab00679ab -size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEMSP06.wav b/ivas_processing_scripts/generation/IR/SEMSP06.wav deleted file mode 100644 index 84f990ed..00000000 --- a/ivas_processing_scripts/generation/IR/SEMSP06.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b107956649319df472cfe311e278f73735708957b44f7af6a6e444a33b7cb9d0 -size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEMSP07.wav b/ivas_processing_scripts/generation/IR/SEMSP07.wav deleted file mode 100644 index bf89445a..00000000 --- a/ivas_processing_scripts/generation/IR/SEMSP07.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ef5a76c1026510861b8cac697415e6e08810857252b9ce52e0157c6024400bf2 -size 42112 -- GitLab From e0df0208163952aad6ac71bb8614a8bc74e34252 Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Wed, 7 Jun 2023 10:41:08 +0200 Subject: [PATCH 03/11] support for using prefixes for input_path, IR and output_path --- item_gen_configs/P800-1.yml | 172 +++++++++--------- .../generation/process_stereo_items.py | 87 +++++++-- 2 files changed, 160 insertions(+), 99 deletions(-) diff --git a/item_gen_configs/P800-1.yml b/item_gen_configs/P800-1.yml index 56da0194..0ca2f87f 100644 --- a/item_gen_configs/P800-1.yml +++ b/item_gen_configs/P800-1.yml @@ -3,6 +3,11 @@ # General configuration ################################################ +### Any relative paths will be interpreted relative to the working directory the script is called from! +### Usage of absolute paths is recommended. +### Do not use file names with dots "." in them! This is not supported, use "_" instead +### For Windows users: please use double back slash '\\' in paths + ### Output format format: "STEREO" @@ -12,16 +17,11 @@ fs: 48000 ### IR sampling rate in Hz (only for files in .pcm format); default = 48000 # IR_fs: 48000 -### Any relative paths will be interpreted relative to the working directory the script is called from! -### Usage of absolute paths is recommended. -### Do not use file names with dots "." in them! This is not supported, use "_" instead -### For Windows users: please use double back slash '\\' in paths - ### Input path to mono files input_path: "./items_mono" -### Input path to stereo impulse response files, default = './ivas_processing_scripts/generation/IR' -# IR_path: "./IR" +### Input path to stereo impulse response files, default = './IRs' +IR_path: "./IRs" ### Output path for generated test items and metadata files output_path: "experiments/selection/P800-1/proc_input" @@ -33,24 +33,24 @@ loudness: -26 preamble: 0.5 postamble: 1.0 -### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence) +### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = False (silence) add_low_level_random_noise: true -### File designators -listening_lab: "a" -language: "JP" +### File designators, default is "l" for listening lab, "EN" for language, "p01" for exp and "g" for provider +listening_lab: "l" +language: "EN" exp: "p01" -# provider: "Dolby" +provider: "g" -### Use prefix for all input filenames (default: None) +### Use prefix for all input filenames (default: "") ### l stands for the 'listening_lab' designator, L stands for the 'language', e stands for the 'exp' designator (the number of consecutive letters define the length of the field) use_input_prefix: "lLLeee" -### Use prefix for all IR filenames (default: None) +### Use prefix for all IR filenames (default: "") ### p stands for the 'provider', e stands for the 'exp' designator (the number of consecutive letters define the length of the field) # use_IR_prefix: "IR_pp_eee_" -### Use prefix for all output filenames (default: None) +### Use prefix for all output filenames (default: "") ### l stands for the 'listening_lab' designator, e stands for the 'exp' designator (the number of consecutive letters define the length of the field) use_output_prefix: "leee" @@ -109,251 +109,251 @@ scenes: name: "cat1/a1s01.wav" description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers." source: ["f1s01.wav", "m1s01.wav"] - IR: ["stereo/IR_g_p01_a_01_00_stAB100.wav", "stereo/IR_g_p01_a_07_00_stAB100.wav"] + IR: ["IR_g_p01_a_01_00_stAB100.wav", "IR_g_p01_a_07_00_stAB100.wav"] overlap: 1.0 cat1_2: - name: "cat1/a1s02" + name: "cat1/a1s02.wav" description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers." source: ["m2s01.wav", "f2s01.wav"] - IR: ["stereo/IR_g_p01_a_05_00_stAB100.wav", "stereo/IR_g_p01_a_03_00_stAB100.wav"] + IR: ["IR_g_p01_a_05_00_stAB100.wav", "IR_g_p01_a_03_00_stAB100.wav"] overlap: 1.0 cat1_3: - name: "cat1/a1s03" + name: "cat1/a1s03.wav" description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers." source: ["f3s01.wav", "m3s01.wav"] - IR: ["stereo/IR_g_p01_a_02_00_stAB100.wav", "stereo/IR_g_p01_a_06_00_stAB100.wav"] + IR: ["IR_g_p01_a_02_00_stAB100.wav", "IR_g_p01_a_06_00_stAB100.wav"] overlap: 1.0 cat1_4: - name: "cat1/a1s04" + name: "cat1/a1s04.wav" description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers." source: ["m1s02.wav", "f1s02.wav"] - IR: ["stereo/IR_g_p01_a_04_00_stAB100.wav", "stereo/IR_g_p01_a_01_00_stAB100.wav"] + IR: ["IR_g_p01_a_04_00_stAB100.wav", "IR_g_p01_a_01_00_stAB100.wav"] overlap: 1.0 cat1_5: - name: "cat1/a1s05" + name: "cat1/a1s05.wav" description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers." source: ["f2s02.wav", "m2s02.wav"] - IR: ["stereo/IR_g_p01_a_03_00_stAB100.wav", "stereo/IR_g_p01_a_04_00_stAB100.wav"] + IR: ["IR_g_p01_a_03_00_stAB100.wav", "IR_g_p01_a_04_00_stAB100.wav"] overlap: 1.0 cat1_6: - name: "cat1/a1s06" + name: "cat1/a1s06.wav" description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers." source: ["m3s02.wav", "f3s02.wav"] - IR: ["stereo/IR_g_p01_a_07_00_stAB100.wav", "stereo/IR_g_p01_a_02_00_stAB100.wav"] + IR: ["IR_g_p01_a_07_00_stAB100.wav", "IR_g_p01_a_02_00_stAB100.wav"] overlap: 1.0 cat2_1: - name: "cat2/a2s01" + name: "cat2/a2s01.wav" description: "Large anechoic room with AB microphone pickup, no overlap between the talkers." source: ["m3s03.wav", "f3s03.wav"] - IR: ["stereo/IR_g_p01_b_05_00_stAB150.wav", "stereo/IR_g_p01_b_11_00_stAB150.wav"] + IR: ["IR_g_p01_c_05_00_stAB150.wav", "IR_g_p01_c_11_00_stAB150.wav"] overlap: -1.0 cat2_2: - name: "cat2/a2s02" + name: "cat2/a2s02.wav" description: "Large anechoic room with AB microphone pickup, no overlap between the talkers." source: ["f1s03.wav", "m1s03.wav"] - IR: ["stereo/IR_g_p01_b_01_00_stAB150.wav", "stereo/IR_g_p01_b_06_00_stAB150.wav"] + IR: ["IR_g_p01_c_01_00_stAB150.wav", "IR_g_p01_c_06_00_stAB150.wav"] overlap: -1.0 cat2_3: - name: "cat2/a2s03" + name: "cat2/a2s03.wav" description: "Large anechoic room with AB microphone pickup, no overlap between the talkers." source: ["m2s03.wav", "f2s03.wav"] - IR: ["stereo/IR_g_p01_b_03_00_stAB150.wav", "stereo/IR_g_p01_b_07_00_stAB150.wav"] + IR: ["IR_g_p01_c_03_00_stAB150.wav", "IR_g_p01_c_07_00_stAB150.wav"] overlap: -1.0 cat2_4: - name: "cat2/a2s04" + name: "cat2/a2s04.wav" description: "Large anechoic room with AB microphone pickup, no overlap between the talkers." source: ["f3s04.wav", "m3s04.wav"] - IR: ["stereo/IR_g_p01_b_05_00_stAB150.wav", "stereo/IR_g_p01_b_08_00_stAB150.wav"] + IR: ["IR_g_p01_c_05_00_stAB150.wav", "IR_g_p01_c_08_00_stAB150.wav"] overlap: -1.0 cat2_5: - name: "cat2/a2s05" + name: "cat2/a2s05.wav" description: "Large anechoic room with AB microphone pickup, no overlap between the talkers." source: ["m1s04.wav", "f1s04.wav"] - IR: ["stereo/IR_g_p01_b_09_00_stAB150.wav", "stereo/IR_g_p01_b_07_00_stAB150.wav"] + IR: ["IR_g_p01_c_09_00_stAB150.wav", "IR_g_p01_c_07_00_stAB150.wav"] overlap: -1.0 cat2_6: - name: "cat2/a2s06" + name: "cat2/a2s06.wav" description: "Large anechoic room with AB microphone pickup, no overlap between the talkers." source: ["f2s04.wav", "m2s04.wav"] - IR: ["stereo/IR_g_p01_b_10_00_stAB150.wav", "stereo/IR_g_p01_b_09_00_stAB150.wav"] + IR: ["IR_g_p01_c_10_00_stAB150.wav", "IR_g_p01_c_09_00_stAB150.wav"] overlap: -1.0 cat3_1: - name: "cat3/a3s01" + name: "cat3/a3s01.wav" description: "Small anechoic room with MS microphone pickup, no overlap between the talkers." source: ["f2s05.wav", "m2s05.wav"] - IR: ["stereo/IR_g_p01_a_01_00_stMS100.wav", "stereo/IR_g_p01_a_07_00_stMS100.wav"] + IR: ["IR_g_p01_a_01_00_stMS100.wav", "IR_g_p01_a_07_00_stMS100.wav"] overlap: -1.0 cat3_2: - name: "cat3/a3s02" + name: "cat3/a3s02.wav" description: "Small anechoic room with MS microphone pickup, no overlap between the talkers." source: ["m3s05.wav", "f3s05.wav"] - IR: ["stereo/IR_g_p01_a_05_00_stMS100.wav", "stereo/IR_g_p01_a_03_00_stMS100.wav"] + IR: ["IR_g_p01_a_05_00_stMS100.wav", "IR_g_p01_a_03_00_stMS100.wav"] overlap: -1.0 cat3_3: - name: "cat3/a3s03" + name: "cat3/a3s03.wav" description: "Small anechoic room with MS microphone pickup, no overlap between the talkers." source: ["f1s05.wav", "m1s05.wav"] - IR: ["stereo/IR_g_p01_a_02_00_stMS100.wav", "stereo/IR_g_p01_a_06_00_stMS100.wav"] + IR: ["IR_g_p01_a_02_00_stMS100.wav", "IR_g_p01_a_06_00_stMS100.wav"] overlap: -1.0 cat3_4: - name: "cat3/a3s04" + name: "cat3/a3s04.wav" description: "Small anechoic room with MS microphone pickup, no overlap between the talkers." source: ["m2s06.wav", "f2s06.wav"] - IR: ["stereo/IR_g_p01_a_04_00_stMS100.wav", "stereo/IR_g_p01_a_01_00_stMS100.wav"] + IR: ["IR_g_p01_a_04_00_stMS100.wav", "IR_g_p01_a_01_00_stMS100.wav"] overlap: -1.0 cat3_5: - name: "cat3/a3s05" + name: "cat3/a3s05.wav" description: "Small anechoic room with MS microphone pickup, no overlap between the talkers." source: ["f3s06.wav", "m3s06.wav"] - IR: ["stereo/IR_g_p01_a_03_00_stMS100.wav", "stereo/IR_g_p01_a_04_00_stMS100.wav"] + IR: ["IR_g_p01_a_03_00_stMS100.wav", "IR_g_p01_a_04_00_stMS100.wav"] overlap: -1.0 cat3_6: - name: "cat3/a3s06" + name: "cat3/a3s06.wav" description: "Small anechoic room with MS microphone pickup, no overlap between the talkers." source: ["m1s06.wav", "f1s06.wav"] - IR: ["stereo/IR_g_p01_a_07_00_stMS100.wav", "stereo/IR_g_p01_a_02_00_stMS100.wav"] + IR: ["IR_g_p01_a_07_00_stMS100.wav", "IR_g_p01_a_02_00_stMS100.wav"] overlap: -1.0 cat4_1: - name: "cat4/a4s01" + name: "cat4/a4s01.wav" description: "Small echoic room with AB microphone pickup, partial overlap between the talkers." source: ["m1s07.wav", "f1s07.wav"] - IR: ["stereo/IR_g_p01_b_01_00_stAB100.wav", "stereo/IR_g_p01_b_07_00_stAB100.wav"] + IR: ["IR_g_p01_b_01_00_stAB100.wav", "IR_g_p01_b_07_00_stAB100.wav"] overlap: 1.0 cat4_2: - name: "cat4/a4s02" + name: "cat4/a4s02.wav" description: "Small echoic room with AB microphone pickup, partial overlap between the talkers." source: ["f2s07.wav", "m2s07.wav"] - IR: ["stereo/IR_g_p01_b_07_00_stAB100.wav", "stereo/IR_g_p01_b_03_00_stAB100.wav"] + IR: ["IR_g_p01_b_07_00_stAB100.wav", "IR_g_p01_b_03_00_stAB100.wav"] overlap: 1.0 cat4_3: - name: "cat4/a4s03" + name: "cat4/a4s03.wav" description: "Small echoic room with AB microphone pickup, partial overlap between the talkers." source: ["m3s07.wav", "f3s07.wav"] - IR: ["stereo/IR_g_p01_b_02_00_stAB100.wav", "stereo/IR_g_p01_b_06_00_stAB100.wav"] + IR: ["IR_g_p01_b_02_00_stAB100.wav", "IR_g_p01_b_06_00_stAB100.wav"] overlap: 1.0 cat4_4: - name: "cat4/a4s04" + name: "cat4/a4s04.wav" description: "Small echoic room with AB microphone pickup, partial overlap between the talkers." source: ["f1s08.wav", "m1s08.wav"] - IR: ["stereo/IR_g_p01_b_04_00_stAB100.wav", "stereo/IR_g_p01_b_01_00_stAB100.wav"] + IR: ["IR_g_p01_b_04_00_stAB100.wav", "IR_g_p01_b_01_00_stAB100.wav"] overlap: 1.0 cat4_5: - name: "cat4/a4s05" + name: "cat4/a4s05.wav" description: "Small echoic room with AB microphone pickup, partial overlap between the talkers." source: ["m2s08.wav", "f2s08.wav"] - IR: ["stereo/IR_g_p01_b_03_00_stAB100.wav", "stereo/IR_g_p01_b_04_00_stAB100.wav"] + IR: ["IR_g_p01_b_03_00_stAB100.wav", "IR_g_p01_b_04_00_stAB100.wav"] overlap: 1.0 cat4_6: - name: "cat4/a4s06" + name: "cat4/a4s06.wav" description: "Small echoic room with AB microphone pickup, partial overlap between the talkers." source: ["f3s08.wav", "m3s08.wav"] - IR: ["stereo/IR_g_p01_b_07_00_stAB100.wav", "stereo/IR_g_p01_b_02_00_stAB100.wav"] + IR: ["IR_g_p01_b_07_00_stAB100.wav", "IR_g_p01_b_02_00_stAB100.wav"] overlap: 1.0 cat5_1: - name: "cat5/a5s01" + name: "cat5/a5s01.wav" description: "Large echoic room with AB microphone pickup, partial overlap between the talkers." source: ["f3s09.wav", "m3s09.wav"] - IR: ["stereo/IR_g_p01_d_02_00_stAB150.wav", "stereo/IR_g_p01_d_08_00_stAB150.wav"] + IR: ["IR_g_p01_d_02_00_stAB150.wav", "IR_g_p01_d_08_00_stAB150.wav"] overlap: 1.0 cat5_2: - name: "cat5/a5s02" + name: "cat5/a5s02.wav" description: "Large echoic room with AB microphone pickup, partial overlap between the talkers." source: ["m1s09.wav", "f1s09.wav"] - IR: ["stereo/IR_g_p01_d_09_00_stAB150.wav", "stereo/IR_g_p01_d_04_00_stAB150.wav"] + IR: ["IR_g_p01_d_09_00_stAB150.wav", "IR_g_p01_d_04_00_stAB150.wav"] overlap: 1.0 cat5_3: - name: "cat5/a5s03" + name: "cat5/a5s03.wav" description: "Large echoic room with AB microphone pickup, partial overlap between the talkers." source: ["f2s09.wav", "m2s09.wav"] - IR: ["stereo/IR_g_p01_d_06_00_stAB150.wav", "stereo/IR_g_p01_d_10_00_stAB150.wav"] + IR: ["IR_g_p01_d_06_00_stAB150.wav", "IR_g_p01_d_10_00_stAB150.wav"] overlap: 1.0 cat5_4: - name: "cat5/a5s04" + name: "cat5/a5s04.wav" description: "Large echoic room with AB microphone pickup, partial overlap between the talkers." source: ["m3s10.wav", "f3s10.wav"] - IR: ["stereo/IR_g_p01_d_11_00_stAB150.wav", "stereo/IR_g_p01_d_08_00_stAB150.wav"] + IR: ["IR_g_p01_d_11_00_stAB150.wav", "IR_g_p01_d_08_00_stAB150.wav"] overlap: 1.0 cat5_5: - name: "cat5/a5s05" + name: "cat5/a5s05.wav" description: "Large echoic room with AB microphone pickup, partial overlap between the talkers." source: ["f1s10.wav", "m1s10.wav"] - IR: ["stereo/IR_g_p01_d_10_00_stAB150.wav", "stereo/IR_g_p01_d_12_00_stAB150.wav"] + IR: ["IR_g_p01_d_10_00_stAB150.wav", "IR_g_p01_d_12_00_stAB150.wav"] overlap: 1.0 cat5_6: - name: "cat5/a5s06" + name: "cat5/a5s06.wav" description: "Large echoic room with AB microphone pickup, partial overlap between the talkers." source: ["m2s10.wav", "f2s10.wav"] - IR: ["stereo/IR_g_p01_d_12_00_stAB150.wav", "stereo/IR_g_p01_d_01_00_stAB150.wav"] + IR: ["IR_g_p01_d_12_00_stAB150.wav", "IR_g_p01_d_01_00_stAB150.wav"] overlap: 1.0 cat6_1: - name: "cat6/a6s01" + name: "cat6/a6s01.wav" description: "Small echoic room with binaural microphone pickup, no overlap between the talkers." source: ["m2s11.wav", "f2s11.wav"] - IR: ["stereo/IR_g_p01_b_01_00_stAB100.wav", "stereo/IR_g_p01_b_07_00_stAB100.wav"] + IR: ["IR_g_p01_b_01_00_stBI100.wav", "IR_g_p01_b_07_00_stBI100.wav"] overlap: -1.0 cat6_2: - name: "cat6/a6s02" + name: "cat6/a6s02.wav" description: "Small echoic room with binaural microphone pickup, no overlap between the talkers." source: ["f3s11.wav", "m3s11.wav"] - IR: ["stereo/IR_g_p01_b_05_00_stAB100.wav", "stereo/IR_g_p01_b_03_00_stAB100.wav"] + IR: ["IR_g_p01_b_05_00_stBI100.wav", "IR_g_p01_b_03_00_stBI100.wav"] overlap: -1.0 cat6_3: - name: "cat6/a6s03" + name: "cat6/a6s03.wav" description: "Small echoic room with binaural microphone pickup, no overlap between the talkers." source: ["m1s11.wav", "f1s11.wav"] - IR: ["stereo/IR_g_p01_b_02_00_stAB100.wav", "stereo/IR_g_p01_b_06_00_stAB100.wav"] + IR: ["IR_g_p01_b_02_00_stBI100.wav", "IR_g_p01_b_06_00_stBI100.wav"] overlap: -1.0 cat6_4: - name: "cat6/a6s04" + name: "cat6/a6s04.wav" description: "Small echoic room with binaural microphone pickup, no overlap between the talkers." source: ["f2s12.wav", "m2s12.wav"] - IR: ["stereo/IR_g_p01_b_04_00_stAB100.wav", "stereo/IR_g_p01_b_01_00_stAB100.wav"] + IR: ["IR_g_p01_b_04_00_stBI100.wav", "IR_g_p01_b_01_00_stBI100.wav"] overlap: -1.0 cat6_5: - name: "cat6/a6s05" + name: "cat6/a6s05.wav" description: "Small echoic room with binaural microphone pickup, no overlap between the talkers." source: ["m3s12.wav", "f3s12.wav"] - IR: ["stereo/IR_g_p01_b_03_00_stAB100.wav", "stereo/IR_g_p01_b_04_00_stAB100.wav"] + IR: ["IR_g_p01_b_03_00_stBI100.wav", "IR_g_p01_b_04_00_stBI100.wav"] overlap: -1.0 cat6_6: - name: "cat6/a6s06" + name: "cat6/a6s06.wav" description: "Small echoic room with binaural microphone pickup, no overlap between the talkers." source: ["f1s12.wav", "m1s12.wav"] - IR: ["stereo/IR_g_p01_b_07_00_stAB100.wav", "stereo/IR_g_p01_b_02_00_stAB100.wav"] + IR: ["IR_g_p01_b_07_00_stBI100.wav", "IR_g_p01_b_02_00_stBI100.wav"] overlap: -1.0 diff --git a/ivas_processing_scripts/generation/process_stereo_items.py b/ivas_processing_scripts/generation/process_stereo_items.py index 7ddfecac..10ac66bf 100644 --- a/ivas_processing_scripts/generation/process_stereo_items.py +++ b/ivas_processing_scripts/generation/process_stereo_items.py @@ -29,10 +29,10 @@ # accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and # the United Nations Convention on Contracts on the International Sales of Goods. # - +import pdb import logging import os -from itertools import repeat +from itertools import repeat, groupby from math import floor import numpy as np @@ -51,6 +51,25 @@ def csv_formatdata(data): for row in data: yield ["%0.2f" % v for v in row] +# function for searching sequences of same the same character and replacing it by another string +def replace_char_seq_with_string(str, char_seq, repl_str): + result = [] + + # find groups of consecutive letters + groups = ["".join(list(g)) for k, g in groupby(str)] + + # limit the length of the replacement string by the length of the character sequence + repl_str = repl_str[:len(char_seq)] + + # replace each occurence of the sequence of characters + for g in groups: + if char_seq in g: + result.append(repl_str) + else: + result.append(g) + + return "".join(result) + def generate_stereo_items( cfg: config.TestConfig, @@ -79,12 +98,54 @@ def generate_stereo_items( # set the IR path if "IR_path" not in cfg.__dict__: - cfg.IR_path = os.path.join(os.path.dirname(__file__), "IR") + cfg.IR_path = os.path.join(os.path.dirname(__file__), "IRs") # set the pre-amble and post-amble if "add_low_level_random_noise" not in cfg.__dict__: cfg.add_low_level_random_noise = False + # set the listening lab designator + if "listening_lab" not in cfg.__dict__: + cfg.listening_lab = "l" + + # set the language designator + if "language" not in cfg.__dict__: + cfg.language = "EN" + + # set the experiment designator + if "exp" not in cfg.__dict__: + cfg.exp = "p01" + + # set the provider + if "provider" not in cfg.__dict__: + cfg.provider = "g" + + # set the prefix for all input filenames + if "use_input_prefix" not in cfg.__dict__: + cfg.use_input_prefix = "" + else: + # replace file designators + cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "l", cfg.listening_lab ) + cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "LL", cfg.language ) + cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "eee", cfg.exp ) + + # set the prefix for all IR filenames + if "use_IR_prefix" not in cfg.__dict__: + cfg.use_IR_prefix = "" + else: + # replace file designators + cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "p", cfg.provider ) + cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "LL", cfg.language ) + cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "eee", cfg.exp ) + + # set the prefix for all output filenames + if "use_output_prefix" not in cfg.__dict__: + cfg.use_output_prefix = None + else: + # replace file designators + cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "l", cfg.listening_lab ) + cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "eee", cfg.exp ) + # set multiprocessing if "multiprocessing" not in cfg.__dict__: cfg.multiprocessing = True @@ -108,8 +169,8 @@ def generate_stereo_scene( # extract the number of audio sources N_sources = len(np.atleast_1d(scene["source"])) - # read the IR (check if stereo or two mono files were provided) - source_IR = np.atleast_1d(scene["IR"]) + # read the IR source file (check if stereo or two mono files were provided) + # source_IR = np.atleast_1d(scene["IR"]) # read the overlap length if "overlap" in scene.keys(): @@ -119,18 +180,18 @@ def generate_stereo_scene( y = audio.ChannelBasedAudio("STEREO") for i in range(N_sources): - source_prefix = cfg.listening_lab + cfg.language + cfg.exp + # parse parameters from the scene description - source_file = source_prefix + np.atleast_1d(scene["source"])[i] + source_file = np.atleast_1d(scene["source"])[i] IR_file = np.atleast_1d(scene["IR"])[i] - logger.info(f"Convolving {source_file} with {source_IR}") + logger.info(f"Convolving {source_file} with {IR_file}") # read source file - x = audio.fromfile("MONO", os.path.join(cfg.input_path, source_file), fs=cfg.fs) + x = audio.fromfile("MONO", os.path.join(cfg.input_path, os.path.dirname(source_file), cfg.use_input_prefix + os.path.basename(source_file)), fs=cfg.fs) # read the IR file - IR = audio.fromfile("STEREO", os.path.join(cfg.IR_path, IR_file), fs=cfg.IR_fs) + IR = audio.fromfile("STEREO", os.path.join(cfg.IR_path, os.path.dirname(IR_file), cfg.use_IR_prefix + os.path.basename(IR_file)), fs=cfg.IR_fs) # convolve with stereo IR x = reverb_stereo(x, IR) @@ -221,9 +282,9 @@ def generate_stereo_scene( y.audio += noise # write the reverberated audio into output file - output_filename = cfg.listening_lab + cfg.exp + scene["name"] + ".wav" + # output_filename = cfg.listening_lab + cfg.exp + scene["name"] + ".wav" audiofile.write( - os.path.join(cfg.output_path, scene_name.split("_")[0], output_filename), + os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.basename(scene["name"])), y.audio, y.fs, - ) # !!!! TBD: replace all os.path.xxx operations with the Path object + ) -- GitLab From a8a80dc078fc2647e7a9ccab37267d427a4af86d Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Wed, 7 Jun 2023 10:55:57 +0200 Subject: [PATCH 04/11] naming convention in P800-2.yml --- item_gen_configs/P800-2.yml | 230 ++++++++++++++++++++---------------- 1 file changed, 127 insertions(+), 103 deletions(-) diff --git a/item_gen_configs/P800-2.yml b/item_gen_configs/P800-2.yml index 1f566421..d0b65b51 100644 --- a/item_gen_configs/P800-2.yml +++ b/item_gen_configs/P800-2.yml @@ -3,19 +3,19 @@ # General configuration ################################################ +### Any relative paths will be interpreted relative to the working directory the script is called from! +### Usage of absolute paths is recommended. +### Do not use file names with dots "." in them! This is not supported, use "_" instead +### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions + ### Output format format: "STEREO" -### Output sampling rate in Hz needed for headerless audio files; default = 48000 +### Output sampling rate in Hz; default = 48000 fs: 48000 -### IR sampling rate in Hz needed for headerless audio files; default = 48000 -IR_fs: 32000 - -### Any relative paths will be interpreted relative to the working directory the script is called from! -### Usage of absolute paths is recommended. -### Do not use file names with dots "." in them! This is not supported, use "_" instead -### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions +### IR sampling rate in Hz (only for files in .pcm format); default = 48000 +# IR_fs: 48000 ### Input path to mono files input_path: "./items_mono" @@ -36,300 +36,324 @@ postamble: 1.0 ### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence) add_low_level_random_noise: true -### File designators -listening_lab: "b" -language: "GE" +### File designators, default is "l" for listening lab, "EN" for language, "p02" for exp and "g" for provider +listening_lab: "l" +language: "EN" exp: "p02" +provider: "g" + +### Use prefix for all input filenames (default: "") +### l stands for the 'listening_lab' designator, L stands for the 'language', e stands for the 'exp' designator (the number of consecutive letters define the length of the field) +use_input_prefix: "lLLeee" + +### Use prefix for all IR filenames (default: "") +### p stands for the 'provider', e stands for the 'exp' designator (the number of consecutive letters define the length of the field) +# use_IR_prefix: "IR_pp_eee_" + +### Use prefix for all output filenames (default: "") +### l stands for the 'listening_lab' designator, e stands for the 'exp' designator (the number of consecutive letters define the length of the field) +use_output_prefix: "leee" -################################################ -### Input files -################################################ -### s.wav -### -### With -### -### = a (Force Technology), b (HEAD acoustics), -### c (MQ University), d (Mesaqin.com) -### = JP, FR, GE, MA, DA, EN -### = p01, p02, p04, p05, p06, p07, p08, p09 -### = f1, f2, f3, m1, m2, m3 -### = 01, …, 14 ################################################ ### Scene description ################################################ -### Each scene must have a unique name -### Specify the mono source filenames (the program will search for it in the input_path folder) -### Specify the stereo IR source filenames (the program will search for it in the input_path_IR folder) -### Specify the overlap length in seconds for each input source (negative value creates a gap) -### Note 1: use brackets [val1, val2, ...] for multiple sources in a scene -### Note 2: use the Matlab notation "start:step:stop" to create moving sources (step will be applied in 20ms frames) +### Each scene must begin by specifying the category in the following format: catN_I where N is the category index and N is the scene index +### Each scene shall de described using the following parameters/properties: +### name: filename of the generated output item (the program will save th generated items in the output_path folder, note: it is possible to use subfolders, e.g. items_stereo/x1_s01.wav) +### description: textual description of the scene +### source: filename(s) of the mono input sources (the program will search for it in the input_path folder) +### IR: filenames(s) of the input IRs (the program will search for it in the IR_path folder) +### overlap: overlap length between two input sources in seconds (negative value creates a gap) +### Note 1: use brackets [val1, val2, ...] when specifying multiple values + +### Naming convention for the input mono files +### The input filenames are represented by: +### lLLeeettszz.wav +### where: +### l stands for the listening lab designator: a (Force Technology), b (HEAD acoustics), c (MQ University), d (Mesaqin.com) +### LL stands for the language: JP, FR, GE, MA, DA, EN +### eee stands for the experiment designator: p01, p02, p04, p05, p06, p07, p08, p09 +### tt stands for the talker ID: f1, f2, f3, m1, m2, m3 +### s stands for 'sample' and zz is the sample number; 01, ..., 14 + +### Naming convention for the input IR files +### The input IR filenames are represented by: +### IR_pp_eee_r_tt_mm_ffffff.wav +### where: +### pp stands for the provider: do (Dolby), no (Nokia), or (Orange), vo (VoiceAge), g (G.191) +### eee stands for the experiment designator: p01, p02, p04, p05, p06, p07, p08, p09 +### r stands for the room ID: a, b, c, ... +### tt stands for the talker position: 01, 02, ... +### mm stands for the microphone position: 00, 01, 02, ... +### ffffff stands for the format ID: stAB20, stABC20, stAB100, stAB150, stMS, stBin, FOA, HOA2 -### Naming convention for P.800 items -### The filenames of the input content samples are represented by: +### Naming convention for the generated output files +### The output filenames are represented by: ### leeeayszz.wav -### The filenames of the accompanying metadata files (applicable to metadata-assisted spatial audio, object-based audio) are represented by: +### The filenames of the accompanying output metadata files (applicable to metadata-assisted spatial audio, object-based audio) are represented by: ### leeeayszz.met for metadata-assisted spatial audio ### leeeayszz.wav.o.csv for object-based audio ### where: -### l stands for the listening lab designator (a through d according to Table 2) -### eee stands for the experiment designator, e.g. p01 (see Table 1) -### a stands audio, and y is the per experiment category according to IVAS-8a -### s stands for sample and zz is the sample number; 01, 02, 03, 04, 05, 06, 07, where 07 is the preliminary -### o stands for object number; 0, 1, 2, 3 +### l stands for the listening lab designator: a (Force Technology), b (HEAD acoustics), c (MQ University), d (Mesaqin.com) +### eee stands for the experiment designator: p01, p02, p04, p05, p06, p07, p08, p09 +### a stands 'audio' +### y is the per-experiment category according to IVAS-8a: 01, 02, 03, 04, 05, 06 +### s stands for sample and zz is the sample number; 01, 02, 03, 04, 05, 06, 07 (07 is the preliminary sample) +### o stands for the object number; 0, 1, 2, 3 scenes: cat1_1: - name: "a1s01" + name: "cat1/a1s01" description: "Car with AB microphone pickup, no overlap between the talkers, car noise." source: ["f1s01.wav", "m1s01.wav"] IR: ["Car_TalkPos5_Stereo_M5_SinSweep_2chn.wav", "Car_TalkPos1_Stereo_M5_SinSweep_2chn.wav"] overlap: -1.0 cat1_2: - name: "a1s02" + name: "cat1/a1s02.wav" description: "Car with AB microphone pickup, no overlap between the talkers, car noise." source: ["m2s01.wav", "f2s01.wav"] IR: ["Car_TalkPos2_Stereo_M5_SinSweep_2chn.wav", "Car_TalkPos5_Stereo_M5_SinSweep_2chn.wav"] overlap: -1.0 cat1_3: - name: "a1s03" + name: "cat1/a1s03.wav" description: "Car with AB microphone pickup, no overlap between the talkers, car noise." source: ["f3s01.wav", "m3s01.wav"] IR: ["Car_TalkPos5_Stereo_M5_SinSweep_2chn.wav", "Car_TalkPos3_Stereo_M5_SinSweep_2chn.wav"] overlap: -1.0 cat1_4: - name: "a1s04" + name: "cat1/a1s04.wav" description: "Car with AB microphone pickup, no overlap between the talkers, car noise." source: ["m1s02.wav", "f1s02.wav"] IR: ["Car_TalkPos4_Stereo_M5_SinSweep_2chn.wav", "Car_TalkPos5_Stereo_M5_SinSweep_2chn.wav"] overlap: -1.0 cat1_5: - name: "a1s05" + name: "cat1/a1s05.wav" description: "Car with AB microphone pickup, no overlap between the talkers, car noise." source: ["f2s02.wav", "m2s02.wav"] IR: ["Car_TalkPos2_Stereo_M5_SinSweep_2chn.wav", "Car_TalkPos4_Stereo_M5_SinSweep_2chn.wav"] overlap: -1.0 cat1_6: - name: "a1s06" + name: "cat1/a1s06.wav" description: "Car with AB microphone pickup, no overlap between the talkers, car noise." source: ["m3s02.wav", "f3s02.wav"] IR: ["Car_TalkPos3_Stereo_M5_SinSweep_2chn.wav", "Car_TalkPos2_Stereo_M5_SinSweep_2chn.wav"] overlap: -1.0 cat2_1: - name: "a2s01" + name: "cat2/a2s01.wav" description: "Car with AB microphone pickup, no overlap between the talkers, street noise." source: ["m3s03.wav", "f3s03.wav"] IR: ["FreeField_IR_Python_AB_20cm_Pos1.wav", "FreeField_IR_Python_AB_20cm_Pos4.wav"] overlap: -1.0 cat2_2: - name: "a2s02" - description: "Car with AB microphone pickup, no overlap between the talkers, street noise." + name: "cat2/a2s02.wav" + description: "Street conversation with AB microphone pickup, no overlap between the talkers, street noise." source: ["f1s03.wav", "m1s03.wav"] IR: ["FreeField_IR_Python_AB_20cm_Pos0.wav", "FreeField_IR_Python_AB_20cm_Pos1.wav"] overlap: -1.0 cat2_3: - name: "a2s03" - description: "Car with AB microphone pickup, no overlap between the talkers, street noise." + name: "cat2/a2s03.wav" + description: "Street conversation with AB microphone pickup, no overlap between the talkers, street noise." source: ["m2s03.wav", "f2s03.wav"] IR: ["FreeField_IR_Python_AB_20cm_Pos2.wav", "FreeField_IR_Python_AB_20cm_Pos0.wav"] overlap: -1.0 cat2_4: - name: "a2s04" - description: "Car with AB microphone pickup, no overlap between the talkers, street noise." + name: "cat2/a2s04.wav" + description: "Street conversation with AB microphone pickup, no overlap between the talkers, street noise." source: ["f3s04.wav", "m3s04.wav"] IR: ["FreeField_IR_Python_AB_20cm_Pos1.wav", "FreeField_IR_Python_AB_20cm_Pos3.wav"] overlap: -1.0 cat2_5: - name: "a2s05" - description: "Car with AB microphone pickup, no overlap between the talkers, street noise." + name: "cat2/a2s05.wav" + description: "Street conversation with AB microphone pickup, no overlap between the talkers, street noise." source: ["m1s04.wav", "f1s04.wav"] IR: ["FreeField_IR_Python_AB_20cm_Pos4.wav", "FreeField_IR_Python_AB_20cm_Pos2.wav"] overlap: -1.0 cat2_6: - name: "a2s06" - description: "Car with AB microphone pickup, no overlap between the talkers, street noise." + name: "cat2/a2s06.wav" + description: "Street conversation with AB microphone pickup, no overlap between the talkers, street noise." source: ["f2s04.wav", "m2s04.wav"] IR: ["FreeField_IR_Python_AB_20cm_Pos3.wav", "FreeField_IR_Python_AB_20cm_Pos0.wav"] overlap: -1.0 cat3_1: - name: "a3s01" + name: "cat3/a3s01.wav" description: "Small echoic room with MS microphone pickup, no overlap between the talkers, office noise." source: ["f2s05.wav", "m2s05.wav"] - IR: ["SEMSP01.wav", "SEMSP07.wav"] + IR: ["IR_g_p02_b_01_00_stMS100.wav", "IR_g_p02_b_07_00_stMS100.wav"] overlap: -1.0 cat3_2: - name: "a3s02" + name: "cat3/a3s02.wav" description: "Small echoic room with MS microphone pickup, no overlap between the talkers, office noise." source: ["m3s05.wav", "f3s05.wav"] - IR: ["SEMSP05.wav", "SEMSP03.wav"] + IR: ["IR_g_p02_b_05_00_stMS100.wav", "IR_g_p02_b_03_00_stMS100.wav"] overlap: -1.0 cat3_3: - name: "a3s03" + name: "cat3/a3s03.wav" description: "Small echoic room with MS microphone pickup, no overlap between the talkers, office noise." source: ["f1s05.wav", "m1s05.wav"] - IR: ["SEMSP02.wav", "SEMSP06.wav"] + IR: ["IR_g_p02_b_02_00_stMS100.wav", "IR_g_p02_b_06_00_stMS100.wav"] overlap: -1.0 cat3_4: - name: "a3s04" + name: "cat3/a3s04.wav" description: "Small echoic room with MS microphone pickup, no overlap between the talkers, office noise." source: ["m2s06.wav", "f2s06.wav"] - IR: ["SEMSP04.wav", "SEMSP01.wav"] + IR: ["IR_g_p02_b_04_00_stMS100.wav", "IR_g_p02_b_01_00_stMS100.wav"] overlap: -1.0 cat3_5: - name: "a3s05" + name: "cat3/a3s05.wav" description: "Small echoic room with MS microphone pickup, no overlap between the talkers, office noise." source: ["f3s06.wav", "m3s06.wav"] - IR: ["SEMSP03.wav", "SEMSP04.wav"] + IR: ["IR_g_p02_b_03_00_stMS100.wav", "IR_g_p02_b_04_00_stMS100.wav"] overlap: -1.0 cat3_6: - name: "a3s06" + name: "cat3/a3s06.wav" description: "Small echoic room with MS microphone pickup, no overlap between the talkers, office noise." source: ["m1s06.wav", "f1s06.wav"] - IR: ["SEMSP07.wav", "SEMSP02.wav"] + IR: ["IR_g_p02_b_07_00_stMS100.wav", "IR_g_p02_b_02_00_stMS100.wav"] overlap: -1.0 cat4_1: - name: "a4s01" + name: "cat4/a4s01.wav" description: "Small echoic room with AB microphone pickup, partial overlap between the talkers, office noise." source: ["m1s07.wav", "f1s07.wav"] - IR: ["SEABP01.wav", "SEABP07.wav"] + IR: ["IR_g_p02_b_01_00_stAB100.wav", "IR_g_p02_b_07_00_stAB100.wav"] overlap: -1.0 cat4_2: - name: "a4s02" + name: "cat4/a4s02.wav" description: "Small echoic room with AB microphone pickup, partial overlap between the talkers, office noise." source: ["f2s07.wav", "m2s07.wav"] - IR: ["SEABP05.wav", "SEABP03.wav"] + IR: ["IR_g_p02_b_05_00_stAB100.wav", "IR_g_p02_b_03_00_stAB100.wav"] overlap: -1.0 cat4_3: - name: "a4s03" + name: "cat4/a4s03.wav" description: "Small echoic room with AB microphone pickup, partial overlap between the talkers, office noise." source: ["m3s07.wav", "f3s07.wav"] - IR: ["SEABP02.wav", "SEABP06.wav"] + IR: ["IR_g_p02_b_02_00_stAB100.wav", "IR_g_p02_b_06_00_stAB100.wav"] overlap: -1.0 cat4_4: - name: "a4s04" + name: "cat4/a4s04.wav" description: "Small echoic room with AB microphone pickup, partial overlap between the talkers, office noise." source: ["f1s08.wav", "m1s08.wav"] - IR: ["SEABP04.wav", "SEABP01.wav"] + IR: ["IR_g_p02_b_04_00_stAB100.wav", "IR_g_p02_b_01_00_stAB100.wav"] overlap: -1.0 cat4_5: - name: "a4s05" + name: "cat4/a4s05.wav" description: "Small echoic room with AB microphone pickup, partial overlap between the talkers, office noise." source: ["m2s08.wav", "f2s08.wav"] - IR: ["SEABP03.wav", "SEABP04.wav"] + IR: ["IR_g_p02_b_03_00_stAB100.wav", "IR_g_p02_b_04_00_stAB100.wav"] overlap: -1.0 cat4_6: - name: "a4s06" + name: "cat4/a4s06.wav" description: "Small echoic room with AB microphone pickup, partial overlap between the talkers, office noise." source: ["f3s08.wav", "m3s08.wav"] - IR: ["SEABP07.wav", "SEABP02.wav"] + IR: ["IR_g_p02_b_07_00_stAB100.wav", "IR_g_p02_b_02_00_stAB100.wav"] overlap: -1.0 cat5_1: - name: "a5s01" + name: "cat5/a5s01.wav" description: "Large echoic room with AB microphone pickup, partial overlap between the talkers, office noise." source: ["f3s09.wav", "m3s09.wav"] - IR: ["LEABP02.wav", "LEABP08.wav"] + IR: ["IR_g_p02_d_02_00_stAB150.wav", "IR_g_p02_d_08_00_stAB150.wav"] overlap: -1.0 cat5_2: - name: "a5s02" + name: "cat5/a5s02.wav" description: "Large echoic room with AB microphone pickup, partial overlap between the talkers, office noise." source: ["m1s09.wav", "f1s09.wav"] - IR: ["LEABP09.wav", "LEABP04.wav"] + IR: ["IR_g_p02_d_09_00_stAB150.wav", "IR_g_p02_d_04_00_stAB150.wav"] overlap: -1.0 cat5_3: - name: "a5s03" + name: "cat5/a5s03.wav" description: "Large echoic room with AB microphone pickup, partial overlap between the talkers, office noise." source: ["f2s09.wav", "m2s09.wav"] - IR: ["LEABP06.wav", "LEABP10.wav"] + IR: ["IR_g_p02_d_06_00_stAB150.wav", "IR_g_p02_d_10_00_stAB150.wav"] overlap: -1.0 cat5_4: - name: "a5s04" + name: "cat5/a5s04.wav" description: "Large echoic room with AB microphone pickup, partial overlap between the talkers, office noise." source: ["m3s10.wav", "f3s10.wav"] - IR: ["LEABP11.wav", "LEABP08.wav"] + IR: ["IR_g_p02_d_11_00_stAB150.wav", "IR_g_p02_d_08_00_stAB150.wav"] overlap: -1.0 cat5_5: - name: "a5s05" + name: "cat5/a5s05.wav" description: "Large echoic room with AB microphone pickup, partial overlap between the talkers, office noise." source: ["f1s10.wav", "m1s10.wav"] - IR: ["LEABP10.wav", "LEABP12.wav"] + IR: ["IR_g_p02_d_10_00_stAB150.wav", "IR_g_p02_d_12_00_stAB150.wav"] overlap: -1.0 cat5_6: - name: "a5s06" + name: "cat5/a5s06.wav" description: "Large echoic room with AB microphone pickup, partial overlap between the talkers, office noise." source: ["m2s10.wav", "f2s10.wav"] - IR: ["LEABP12.wav", "LEABP01.wav"] + IR: ["IR_g_p02_d_12_00_stAB150.wav", "IR_g_p02_d_01_00_stAB150.wav"] overlap: -1.0 cat6_1: - name: "a6s01" + name: "cat6/a6s01.wav" description: "Small echoic room with binaural microphone pickup, no overlap between the talkers, office noise." source: ["m2s11.wav", "f2s11.wav"] - IR: ["SEBIP01.wav", "SEBIP07.wav"] + IR: ["IR_g_p02_b_01_00_stBI100.wav", "IR_g_p02_b_07_00_stBI100.wav"] overlap: -1.0 cat6_2: - name: "a6s02" + name: "cat6/a6s02.wav" description: "Small echoic room with binaural microphone pickup, no overlap between the talkers, office noise." source: ["f3s11.wav", "m3s11.wav"] - IR: ["SEBIP05.wav", "SEBIP03.wav"] + IR: ["IR_g_p02_b_05_00_stBI100.wav", "IR_g_p02_b_03_00_stBI100.wav"] overlap: -1.0 cat6_3: - name: "a6s03" + name: "cat6/a6s03.wav" description: "Small echoic room with binaural microphone pickup, no overlap between the talkers, office noise." source: ["m1s11.wav", "f1s11.wav"] - IR: ["SEBIP02.wav", "SEBIP06.wav"] + IR: ["IR_g_p02_b_02_00_stBI100.wav", "IR_g_p02_b_06_00_stBI100.wav"] overlap: -1.0 cat6_4: - name: "a6s04" + name: "cat6/a6s04.wav" description: "Small echoic room with binaural microphone pickup, no overlap between the talkers, office noise." source: ["f2s12.wav", "m2s12.wav"] - IR: ["SEBIP04.wav", "SEBIP01.wav"] + IR: ["IR_g_p02_b_04_00_stBI100.wav", "IR_g_p02_b_01_00_stBI100.wav"] overlap: -1.0 cat6_5: - name: "a6s05" + name: "cat6/a6s05.wav" description: "Small echoic room with binaural microphone pickup, no overlap between the talkers, office noise." source: ["m3s12.wav", "f3s12.wav"] - IR: ["SEBIP03.wav", "SEBIP04.wav"] + IR: ["IR_g_p02_b_03_00_stBI100.wav", "IR_g_p02_b_04_00_stBI100.wav"] overlap: -1.0 cat6_6: - name: "a6s06" + name: "cat6/a6s06.wav" description: "Small echoic room with binaural microphone pickup, no overlap between the talkers, office noise." source: ["f1s12.wav", "m1s12.wav"] - IR: ["SEBIP07.wav", "SEBIP02.wav"] + IR: ["IR_g_p02_b_07_00_stBI100.wav", "IR_g_p02_b_02_00_stBI100.wav"] overlap: -1.0 \ No newline at end of file -- GitLab From 4d3c1832cd3023eb4bd1c903401700d900027aac Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Wed, 7 Jun 2023 11:01:56 +0200 Subject: [PATCH 05/11] naming convention in P800-2.yml - update --- item_gen_configs/P800-2.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/item_gen_configs/P800-2.yml b/item_gen_configs/P800-2.yml index d0b65b51..d2cb3775 100644 --- a/item_gen_configs/P800-2.yml +++ b/item_gen_configs/P800-2.yml @@ -21,7 +21,7 @@ fs: 48000 input_path: "./items_mono" ### Input path to stereo impulse response files, default = './ivas_processing_scripts/generation/IR' -# IR_path: "./IR" +IR_path: "./IRs" ### Output path for generated test items and metadata files output_path: "experiments/selection/P800-2/proc_input" -- GitLab From 23ca49654883abb09a2fd6637df1d2a30bf83a3b Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Wed, 7 Jun 2023 16:43:19 +0200 Subject: [PATCH 06/11] correction of ISM1 and ISM2 generation scripts; further adjustments --- item_gen_configs/P800-1.yml | 2 +- item_gen_configs/P800-2.yml | 4 +- item_gen_configs/P800-4.yml | 411 ++++++++++------- item_gen_configs/P800-5.yml | 412 ++++++++++------- item_gen_configs/P800-6.yml | 325 +++++++------ item_gen_configs/P800-7.yml | 287 ++++++------ item_gen_configs/P800-8.yml | 44 +- item_gen_configs/P800-9.yml | 44 +- .../generation/__init__.py | 33 +- .../generation/process_ambi_items.py | 318 +++++++++++++ .../generation/process_foa_items.py | 315 +++++++------ .../generation/process_ism1_items.py | 357 +++++++++++++++ .../generation/process_ism2_items.py | 427 ++++++++++++++++++ .../generation/process_ism_items.py | 360 --------------- .../generation/process_stereo_items.py | 11 +- 15 files changed, 2244 insertions(+), 1106 deletions(-) create mode 100644 ivas_processing_scripts/generation/process_ambi_items.py create mode 100644 ivas_processing_scripts/generation/process_ism1_items.py create mode 100644 ivas_processing_scripts/generation/process_ism2_items.py delete mode 100644 ivas_processing_scripts/generation/process_ism_items.py diff --git a/item_gen_configs/P800-1.yml b/item_gen_configs/P800-1.yml index 0ca2f87f..b6e48e0e 100644 --- a/item_gen_configs/P800-1.yml +++ b/item_gen_configs/P800-1.yml @@ -20,7 +20,7 @@ fs: 48000 ### Input path to mono files input_path: "./items_mono" -### Input path to stereo impulse response files, default = './IRs' +### Input path to stereo impulse response files, default = './ivas_processing_scripts/generation/IR' IR_path: "./IRs" ### Output path for generated test items and metadata files diff --git a/item_gen_configs/P800-2.yml b/item_gen_configs/P800-2.yml index d2cb3775..84b0e95d 100644 --- a/item_gen_configs/P800-2.yml +++ b/item_gen_configs/P800-2.yml @@ -36,7 +36,7 @@ postamble: 1.0 ### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence) add_low_level_random_noise: true -### File designators, default is "l" for listening lab, "EN" for language, "p02" for exp and "g" for provider +### File designators, default is "l" for listening lab, "EN" for language, "p01" for exp and "g" for provider listening_lab: "l" language: "EN" exp: "p02" @@ -54,8 +54,6 @@ use_input_prefix: "lLLeee" ### l stands for the 'listening_lab' designator, e stands for the 'exp' designator (the number of consecutive letters define the length of the field) use_output_prefix: "leee" - - ################################################ ### Scene description ################################################ diff --git a/item_gen_configs/P800-4.yml b/item_gen_configs/P800-4.yml index db2597d9..540f7ffc 100644 --- a/item_gen_configs/P800-4.yml +++ b/item_gen_configs/P800-4.yml @@ -3,28 +3,28 @@ # General configuration ################################################ +### Any relative paths will be interpreted relative to the working directory the script is called from! +### Usage of absolute paths is recommended. +### Do not use file names with dots "." in them! This is not supported, use "_" instead +### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions + ### Output format format: "FOA" -### Output sampling rate in Hz needed for headerless audio files; default = 48000 +### Output sampling rate in Hz; default = 48000 fs: 48000 -### IR sampling rate in Hz needed for headerless audio files; default = 48000 -IR_fs: 48000 - -### Any relative paths will be interpreted relative to the working directory the script is called from! -### Usage of absolute paths is recommended. -### Do not use file names with dots "." in them! This is not supported, use "_" instead -### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions +### IR sampling rate in Hz (only for files in .pcm format); default = 48000 +# IR_fs: 48000 ### Input path to mono files input_path: "./items_mono" -### Input path to impulse response files, default = './ivas_processing_scripts/generation/IR' +### Input path to stereo impulse response files, default = './ivas_processing_scripts/generation/IR' IR_path: "./IRs" ### Output path for generated test items and metadata files -output_path: "./items_FOA" +output_path: "experiments/selection/P800-4/proc_input" ### (Optional) Output path for binauralized versions of the generated FOA items # binaural_path: "./items_FOA_bin" @@ -39,269 +39,360 @@ postamble: 1.0 ### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence) add_low_level_random_noise: true +### File designators, default is "l" for listening lab, "EN" for language, "p04" for exp and "g" for provider +listening_lab: "l" +language: "EN" +exp: "p04" +provider: "g" + +### Use prefix for all input filenames (default: "") +### l stands for the 'listening_lab' designator, L stands for the 'language', e stands for the 'exp' designator (the number of consecutive letters define the length of the field) +use_input_prefix: "lLLeee" + +### Use prefix for all IR filenames (default: "") +### p stands for the 'provider', e stands for the 'exp' designator (the number of consecutive letters define the length of the field) +# use_IR_prefix: "IR_pp_eee_" + +### Use prefix for all output filenames (default: "") +### l stands for the 'listening_lab' designator, e stands for the 'exp' designator (the number of consecutive letters define the length of the field) +use_output_prefix: "leee" ################################################ ### Scene description ################################################ -### Each scene must start with the sceneN tag -### Specify the mono source filename (the program will search for it in the input_path folder) -### Specify the IR source filename (the program will search for it in the input_path_IR folder) -### Specify the overlap length in seconds for each input source (negative value creates a gap) -### Note 1: use [val1, val2, ...] for multiple sources in a scene -### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames +### Each scene must begin by specifying the category in the following format: catN_I where N is the category index and N is the scene index +### Each scene shall de described using the following parameters/properties: +### name: filename of the generated output item (the program will save th generated items in the output_path folder, note: it is possible to use subfolders, e.g. items_stereo/x1_s01.wav) +### description: textual description of the scene +### source: filename(s) of the mono input sources (the program will search for it in the input_path folder) +### IR: filenames(s) of the input IRs (the program will search for it in the IR_path folder) +### overlap: overlap length between two input sources in seconds (negative value creates a gap) +### format: input file format ("FOA" or "HOA2") +### Note 1: use brackets [val1, val2, ...] when specifying multiple values + +### Naming convention for the input mono files +### The input filenames are represented by: +### lLLeeettszz.wav +### where: +### l stands for the listening lab designator: a (Force Technology), b (HEAD acoustics), c (MQ University), d (Mesaqin.com) +### LL stands for the language: JP, FR, GE, MA, DA, EN +### eee stands for the experiment designator: p01, p02, p04, p05, p06, p07, p08, p09 +### tt stands for the talker ID: f1, f2, f3, m1, m2, m3 +### s stands for 'sample' and zz is the sample number; 01, ..., 14 + +### Naming convention for the input IR files +### The input IR filenames are represented by: +### IR_pp_eee_r_tt_mm_ffffff.wav +### where: +### pp stands for the provider: do (Dolby), no (Nokia), or (Orange), vo (VoiceAge), g (G.191) +### eee stands for the experiment designator: p01, p02, p04, p05, p06, p07, p08, p09 +### r stands for the room ID: a, b, c, ... +### tt stands for the talker position: 01, 02, ... +### mm stands for the microphone position: 00, 01, 02, ... +### ffffff stands for the format ID: stAB20, stABC20, stAB100, stAB150, stMS, stBin, FOA, HOA2 + +### Naming convention for the generated output files +### The output filenames are represented by: +### leeeayszz.wav +### The filenames of the accompanying output metadata files (applicable to metadata-assisted spatial audio, object-based audio) are represented by: +### leeeayszz.met for metadata-assisted spatial audio +### leeeayszz.wav.o.csv for object-based audio +### where: +### l stands for the listening lab designator: a (Force Technology), b (HEAD acoustics), c (MQ University), d (Mesaqin.com) +### eee stands for the experiment designator: p01, p02, p04, p05, p06, p07, p08, p09 +### a stands 'audio' +### y is the per-experiment category according to IVAS-8a: 01, 02, 03, 04, 05, 06 +### s stands for sample and zz is the sample number; 01, 02, 03, 04, 05, 06, 07 (07 is the preliminary sample) +### o stands for the object number; 0, 1, 2, 3 scenes: - cat1_s1: - name: "lp04a1s01" + cat1_1: + name: "cat1/a1s01.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env1/AmbiX_036x033y_0.01m.wav", "IR_FOA_env1/AmbiX_-70x035y_0.01m.wav"] + source: ["f1s01.wav", "m1s01.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat1_s2: - name: "lp04a1s02" + cat1_2: + name: "cat1/a1s02.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env1/AmbiX_036x033y_0.01m.wav", "IR_FOA_env1/AmbiX_-70x035y_0.01m.wav"] + source: ["m2s01.wav", "f2s01.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat1_s3: - name: "lp04a1s03" + cat1_3: + name: "cat1/a1s03.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env1/AmbiX_036x033y_0.01m.wav", "IR_FOA_env1/AmbiX_-70x035y_0.01m.wav"] + source: ["f3s01.wav", "m3s01.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat1_s4: - name: "lp04a1s04" + cat1_4: + name: "cat1/a1s04.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env1/AmbiX_036x033y_0.01m.wav", "IR_FOA_env1/AmbiX_-70x035y_0.01m.wav"] + source: ["m1s02.wav", "f1s02.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat1_s5: - name: "lp04a1s05" + cat1_5: + name: "cat1/a1s05.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env1/AmbiX_036x033y_0.01m.wav", "IR_FOA_env1/AmbiX_-70x035y_0.01m.wav"] + source: ["f2s02.wav", "m2s02.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat1_s6: - name: "lp04a1s06" + cat1_6: + name: "cat1/a1s06.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env1/AmbiX_036x033y_0.01m.wav", "IR_FOA_env1/AmbiX_-70x035y_0.01m.wav"] + source: ["m3s02.wav", "f3s02.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat2_s1: - name: "lp04a2s01" + cat2_1: + name: "cat2/a2s01.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env2/AmbiX_036x033y_0.01m.wav", "IR_FOA_env2/AmbiX_-70x035y_0.01m.wav"] + source: ["m3s03.wav", "f3s03.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat2_s2: - name: "lp04a2s02" + cat2_2: + name: "cat2/a2s02.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env2/AmbiX_036x033y_0.01m.wav", "IR_FOA_env2/AmbiX_-70x035y_0.01m.wav"] + source: ["f1s03.wav", "m1s03.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat2_s3: - name: "lp04a2s03" + cat2_3: + name: "cat2/a2s03.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env2/AmbiX_036x033y_0.01m.wav", "IR_FOA_env2/AmbiX_-70x035y_0.01m.wav"] + source: ["m2s03.wav", "f2s03.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat2_s4: - name: "lp04a2s04" + cat2_4: + name: "cat2/a2s04.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env2/AmbiX_036x033y_0.01m.wav", "IR_FOA_env2/AmbiX_-70x035y_0.01m.wav"] + source: ["f3s04.wav", "m3s04.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat2_s5: - name: "lp04a2s05" + cat2_5: + name: "cat2/a2s05.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env2/AmbiX_036x033y_0.01m.wav", "IR_FOA_env2/AmbiX_-70x035y_0.01m.wav"] + source: ["m1s04.wav", "f1s04.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat2_s6: - name: "lp04a2s06" + cat2_6: + name: "cat2/a2s06.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env2/AmbiX_036x033y_0.01m.wav", "IR_FOA_env2/AmbiX_-70x035y_0.01m.wav"] + source: ["f2s04.wav", "m2s04.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat3_s1: - name: "lp04a3s01" + cat3_1: + name: "cat3/a3s01.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env3/AmbiX_036x033y_0.01m.wav", "IR_FOA_env3/AmbiX_-70x035y_0.01m.wav"] + source: ["f2s05.wav", "m2s05.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat3_s2: - name: "lp04a3s02" + cat3_2: + name: "cat3/a3s02.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env3/AmbiX_036x033y_0.01m.wav", "IR_FOA_env3/AmbiX_-70x035y_0.01m.wav"] + source: ["m3s05.wav", "f3s05.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat3_s3: - name: "lp04a3s03" + cat3_3: + name: "cat3/a3s03.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env3/AmbiX_036x033y_0.01m.wav", "IR_FOA_env3/AmbiX_-70x035y_0.01m.wav"] + source: ["f1s05.wav", "m1s05.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat3_s4: - name: "lp04a3s04" + cat3_4: + name: "cat3/a3s04.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env3/AmbiX_036x033y_0.01m.wav", "IR_FOA_env3/AmbiX_-70x035y_0.01m.wav"] + source: ["m2s06.wav", "f2s06.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat3_s5: - name: "lp04a3s05" + cat3_5: + name: "cat3/a3s05.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env3/AmbiX_036x033y_0.01m.wav", "IR_FOA_env3/AmbiX_-70x035y_0.01m.wav"] + source: ["f3s06.wav", "m3s06.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat3_s6: - name: "lp04a3s06" + cat3_6: + name: "cat3/a3s06.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env3/AmbiX_036x033y_0.01m.wav", "IR_FOA_env3/AmbiX_-70x035y_0.01m.wav"] + source: ["m1s06.wav", "f1s06.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat4_s1: - name: "lp04a4s01" + cat4_1: + name: "cat4/a4s01.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env4/AmbiX_036x033y_0.01m.wav", "IR_FOA_env4/AmbiX_-70x035y_0.01m.wav"] + source: ["m1s07.wav", "f1s07.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat4_s2: - name: "lp04a4s02" + cat4_2: + name: "cat4/a4s02.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env4/AmbiX_036x033y_0.01m.wav", "IR_FOA_env4/AmbiX_-70x035y_0.01m.wav"] + source: ["f2s07.wav", "m2s07.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat4_s3: - name: "lp04a4s03" + cat4_3: + name: "cat4/a4s03.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env4/AmbiX_036x033y_0.01m.wav", "IR_FOA_env4/AmbiX_-70x035y_0.01m.wav"] + source: ["m3s07.wav", "f3s07.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat4_s4: - name: "lp04a4s04" + cat4_4: + name: "cat4/a4s04.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env4/AmbiX_036x033y_0.01m.wav", "IR_FOA_env4/AmbiX_-70x035y_0.01m.wav"] + source: ["f1s08.wav", "m1s08.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat4_s5: - name: "lp04a4s05" + cat4_5: + name: "cat4/a4s05.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env4/AmbiX_036x033y_0.01m.wav", "IR_FOA_env4/AmbiX_-70x035y_0.01m.wav"] + source: ["m2s08.wav", "f2s08.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat4_s6: - name: "lp04a4s06" + cat4_6: + name: "cat4/a4s06.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env4/AmbiX_036x033y_0.01m.wav", "IR_FOA_env4/AmbiX_-70x035y_0.01m.wav"] + source: ["f3s08.wav", "m3s08.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat5_s1: - name: "lp04a5s01" + cat5_1: + name: "cat5/a5s01.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env5/AmbiX_036x033y_0.01m.wav", "IR_FOA_env5/AmbiX_-70x035y_0.01m.wav"] + source: ["f3s09.wav", "m3s09.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat5_s2: - name: "lp04a5s02" + cat5_2: + name: "cat5/a5s02.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env5/AmbiX_036x033y_0.01m.wav", "IR_FOA_env5/AmbiX_-70x035y_0.01m.wav"] + source: ["m1s09.wav", "f1s09.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat5_s3: - name: "lp04a5s03" + cat5_3: + name: "cat5/a5s03.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env5/AmbiX_036x033y_0.01m.wav", "IR_FOA_env5/AmbiX_-70x035y_0.01m.wav"] + source: ["f2s09.wav", "m2s09.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat5_s4: - name: "lp04a5s04" + cat5_4: + name: "cat5/a5s04.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env5/AmbiX_036x033y_0.01m.wav", "IR_FOA_env5/AmbiX_-70x035y_0.01m.wav"] + source: ["m3s10.wav", "f3s10.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat5_s5: - name: "lp04a5s05" + cat5_5: + name: "cat5/a5s05.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env5/AmbiX_036x033y_0.01m.wav", "IR_FOA_env5/AmbiX_-70x035y_0.01m.wav"] + source: ["f1s10.wav", "m1s10.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat5_s6: - name: "lp04a5s06" + cat5_6: + name: "cat5/a5s06.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env5/AmbiX_036x033y_0.01m.wav", "IR_FOA_env5/AmbiX_-70x035y_0.01m.wav"] + source: ["m2s10.wav", "f2s10.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat6_s1: - name: "lp04a6s01" + cat6_1: + name: "cat6/a6s01.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env6/AmbiX_036x033y_0.01m.wav", "IR_FOA_env6/AmbiX_-70x035y_0.01m.wav"] + source: ["m2s11.wav", "f2s11.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat6_s2: - name: "lp04a6s02" + cat6_2: + name: "cat6/a6s02.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env6/AmbiX_036x033y_0.01m.wav", "IR_FOA_env6/AmbiX_-70x035y_0.01m.wav"] + source: ["f3s11.wav", "m3s11.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat6_s3: - name: "lp04a6s03" + cat6_3: + name: "cat6/a6s03.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env6/AmbiX_036x033y_0.01m.wav", "IR_FOA_env6/AmbiX_-70x035y_0.01m.wav"] + source: ["m1s11.wav", "f1s11.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat6_s4: - name: "lp04a6s04" + cat6_4: + name: "cat6/a6s04.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env6/AmbiX_036x033y_0.01m.wav", "IR_FOA_env6/AmbiX_-70x035y_0.01m.wav"] + source: ["f2s12.wav", "m2s12.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat6_s5: - name: "lp04a6s05" + cat6_5: + name: "cat6/a6s05.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env6/AmbiX_036x033y_0.01m.wav", "IR_FOA_env6/AmbiX_-70x035y_0.01m.wav"] + source: ["m3s12.wav", "f3s12.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat6_s6: - name: "lp04a6s06" + cat6_6: + name: "cat6/a6s06.wav" description: "" - source: ["aENp04Fa1.wav", "aENp04Ma1.wav"] - IR: ["IR_FOA_env6/AmbiX_036x033y_0.01m.wav", "IR_FOA_env6/AmbiX_-70x035y_0.01m.wav"] + source: ["f1s12.wav", "m1s12.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" diff --git a/item_gen_configs/P800-5.yml b/item_gen_configs/P800-5.yml index ec2716a7..0fad95cb 100644 --- a/item_gen_configs/P800-5.yml +++ b/item_gen_configs/P800-5.yml @@ -3,28 +3,28 @@ # General configuration ################################################ +### Any relative paths will be interpreted relative to the working directory the script is called from! +### Usage of absolute paths is recommended. +### Do not use file names with dots "." in them! This is not supported, use "_" instead +### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions + ### Output format format: "FOA" -### Output sampling rate in Hz needed for headerless audio files; default = 48000 +### Output sampling rate in Hz; default = 48000 fs: 48000 -### IR sampling rate in Hz needed for headerless audio files; default = 48000 -IR_fs: 48000 - -### Any relative paths will be interpreted relative to the working directory the script is called from! -### Usage of absolute paths is recommended. -### Do not use file names with dots "." in them! This is not supported, use "_" instead -### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions +### IR sampling rate in Hz (only for files in .pcm format); default = 48000 +# IR_fs: 48000 ### Input path to mono files input_path: "./items_mono" -### Input path to impulse response files, default = './ivas_processing_scripts/generation/IR' +### Input path to stereo impulse response files, default = './ivas_processing_scripts/generation/IR' IR_path: "./IRs" ### Output path for generated test items and metadata files -output_path: "./items_FOA" +output_path: "experiments/selection/P800-5/proc_input" ### (Optional) Output path for binauralized versions of the generated FOA items # binaural_path: "./items_FOA_bin" @@ -39,268 +39,360 @@ postamble: 1.0 ### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence) add_low_level_random_noise: true +### File designators, default is "l" for listening lab, "EN" for language, "p04" for exp and "g" for provider +listening_lab: "l" +language: "EN" +exp: "p05" +provider: "g" + +### Use prefix for all input filenames (default: "") +### l stands for the 'listening_lab' designator, L stands for the 'language', e stands for the 'exp' designator (the number of consecutive letters define the length of the field) +use_input_prefix: "lLLeee" + +### Use prefix for all IR filenames (default: "") +### p stands for the 'provider', e stands for the 'exp' designator (the number of consecutive letters define the length of the field) +# use_IR_prefix: "IR_pp_eee_" + +### Use prefix for all output filenames (default: "") +### l stands for the 'listening_lab' designator, e stands for the 'exp' designator (the number of consecutive letters define the length of the field) +use_output_prefix: "leee" ################################################ ### Scene description ################################################ -### Each scene must start with the sceneN tag -### Specify the mono source filename (the program will search for it in the input_path folder) -### Specify the IR source filename (the program will search for it in the input_path_IR folder) -### Specify the overlap length in seconds for each input source (negative value creates a gap) -### Note 1: use [val1, val2, ...] for multiple sources in a scene -### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames +### Each scene must begin by specifying the category in the following format: catN_I where N is the category index and N is the scene index +### Each scene shall de described using the following parameters/properties: +### name: filename of the generated output item (the program will save th generated items in the output_path folder, note: it is possible to use subfolders, e.g. items_stereo/x1_s01.wav) +### description: textual description of the scene +### source: filename(s) of the mono input sources (the program will search for it in the input_path folder) +### IR: filenames(s) of the input IRs (the program will search for it in the IR_path folder) +### overlap: overlap length between two input sources in seconds (negative value creates a gap) +### format: input file format ("FOA" or "HOA2") +### Note 1: use brackets [val1, val2, ...] when specifying multiple values + +### Naming convention for the input mono files +### The input filenames are represented by: +### lLLeeettszz.wav +### where: +### l stands for the listening lab designator: a (Force Technology), b (HEAD acoustics), c (MQ University), d (Mesaqin.com) +### LL stands for the language: JP, FR, GE, MA, DA, EN +### eee stands for the experiment designator: p01, p02, p04, p05, p06, p07, p08, p09 +### tt stands for the talker ID: f1, f2, f3, m1, m2, m3 +### s stands for 'sample' and zz is the sample number; 01, ..., 14 + +### Naming convention for the input IR files +### The input IR filenames are represented by: +### IR_pp_eee_r_tt_mm_ffffff.wav +### where: +### pp stands for the provider: do (Dolby), no (Nokia), or (Orange), vo (VoiceAge), g (G.191) +### eee stands for the experiment designator: p01, p02, p04, p05, p06, p07, p08, p09 +### r stands for the room ID: a, b, c, ... +### tt stands for the talker position: 01, 02, ... +### mm stands for the microphone position: 00, 01, 02, ... +### ffffff stands for the format ID: stAB20, stABC20, stAB100, stAB150, stMS, stBin, FOA, HOA2 + +### Naming convention for the generated output files +### The output filenames are represented by: +### leeeayszz.wav +### The filenames of the accompanying output metadata files (applicable to metadata-assisted spatial audio, object-based audio) are represented by: +### leeeayszz.met for metadata-assisted spatial audio +### leeeayszz.wav.o.csv for object-based audio +### where: +### l stands for the listening lab designator: a (Force Technology), b (HEAD acoustics), c (MQ University), d (Mesaqin.com) +### eee stands for the experiment designator: p01, p02, p04, p05, p06, p07, p08, p09 +### a stands 'audio' +### y is the per-experiment category according to IVAS-8a: 01, 02, 03, 04, 05, 06 +### s stands for sample and zz is the sample number; 01, 02, 03, 04, 05, 06, 07 (07 is the preliminary sample) +### o stands for the object number; 0, 1, 2, 3 scenes: - cat1_s1: - name: "lp05a1s01" + cat1_1: + name: "cat1/a1s01.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env1/AmbiX_036x033y_0.01m.wav", "IR_FOA_env1/AmbiX_-70x035y_0.01m.wav"] + source: ["f1s01.wav", "m1s01.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat1_s2: - name: "lp05a1s02" + cat1_2: + name: "cat1/a1s02.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env1/AmbiX_036x033y_0.01m.wav", "IR_FOA_env1/AmbiX_-70x035y_0.01m.wav"] + source: ["m2s01.wav", "f2s01.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat1_s3: - name: "lp05a1s03" + cat1_3: + name: "cat1/a1s03.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env1/AmbiX_036x033y_0.01m.wav", "IR_FOA_env1/AmbiX_-70x035y_0.01m.wav"] + source: ["f3s01.wav", "m3s01.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat1_s4: - name: "lp05a1s04" + cat1_4: + name: "cat1/a1s04.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env1/AmbiX_036x033y_0.01m.wav", "IR_FOA_env1/AmbiX_-70x035y_0.01m.wav"] + source: ["m1s02.wav", "f1s02.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat1_s5: - name: "lp05a1s05" + cat1_5: + name: "cat1/a1s05.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env1/AmbiX_036x033y_0.01m.wav", "IR_FOA_env1/AmbiX_-70x035y_0.01m.wav"] + source: ["f2s02.wav", "m2s02.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat1_s6: - name: "lp05a1s06" + cat1_6: + name: "cat1/a1s06.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env1/AmbiX_036x033y_0.01m.wav", "IR_FOA_env1/AmbiX_-70x035y_0.01m.wav"] + source: ["m3s02.wav", "f3s02.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat2_s1: - name: "lp05a2s01" + cat2_1: + name: "cat2/a2s01.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env2/AmbiX_036x033y_0.01m.wav", "IR_FOA_env2/AmbiX_-70x035y_0.01m.wav"] + source: ["m3s03.wav", "f3s03.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat2_s2: - name: "lp05a2s02" + cat2_2: + name: "cat2/a2s02.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env2/AmbiX_036x033y_0.01m.wav", "IR_FOA_env2/AmbiX_-70x035y_0.01m.wav"] + source: ["f1s03.wav", "m1s03.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat2_s3: - name: "lp05a2s03" + cat2_3: + name: "cat2/a2s03.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env2/AmbiX_036x033y_0.01m.wav", "IR_FOA_env2/AmbiX_-70x035y_0.01m.wav"] + source: ["m2s03.wav", "f2s03.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat2_s4: - name: "lp05a2s04" + cat2_4: + name: "cat2/a2s04.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env2/AmbiX_036x033y_0.01m.wav", "IR_FOA_env2/AmbiX_-70x035y_0.01m.wav"] + source: ["f3s04.wav", "m3s04.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat2_s5: - name: "lp05a2s05" + cat2_5: + name: "cat2/a2s05.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env2/AmbiX_036x033y_0.01m.wav", "IR_FOA_env2/AmbiX_-70x035y_0.01m.wav"] + source: ["m1s04.wav", "f1s04.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat2_s6: - name: "lp05a2s06" + cat2_6: + name: "cat2/a2s06.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env2/AmbiX_036x033y_0.01m.wav", "IR_FOA_env2/AmbiX_-70x035y_0.01m.wav"] + source: ["f2s04.wav", "m2s04.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat3_s1: - name: "lp05a3s01" + cat3_1: + name: "cat3/a3s01.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env3/AmbiX_036x033y_0.01m.wav", "IR_FOA_env3/AmbiX_-70x035y_0.01m.wav"] + source: ["f2s05.wav", "m2s05.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat3_s2: - name: "lp05a3s02" + cat3_2: + name: "cat3/a3s02.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env3/AmbiX_036x033y_0.01m.wav", "IR_FOA_env3/AmbiX_-70x035y_0.01m.wav"] + source: ["m3s05.wav", "f3s05.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat3_s3: - name: "lp05a3s03" + cat3_3: + name: "cat3/a3s03.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env3/AmbiX_036x033y_0.01m.wav", "IR_FOA_env3/AmbiX_-70x035y_0.01m.wav"] + source: ["f1s05.wav", "m1s05.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat3_s4: - name: "lp05a3s04" + cat3_4: + name: "cat3/a3s04.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env3/AmbiX_036x033y_0.01m.wav", "IR_FOA_env3/AmbiX_-70x035y_0.01m.wav"] + source: ["m2s06.wav", "f2s06.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat3_s5: - name: "lp05a3s05" + cat3_5: + name: "cat3/a3s05.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env3/AmbiX_036x033y_0.01m.wav", "IR_FOA_env3/AmbiX_-70x035y_0.01m.wav"] + source: ["f3s06.wav", "m3s06.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat3_s6: - name: "lp05a3s06" + cat3_6: + name: "cat3/a3s06.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env3/AmbiX_036x033y_0.01m.wav", "IR_FOA_env3/AmbiX_-70x035y_0.01m.wav"] + source: ["m1s06.wav", "f1s06.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat4_s1: - name: "lp05a4s01" + cat4_1: + name: "cat4/a4s01.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env4/AmbiX_036x033y_0.01m.wav", "IR_FOA_env4/AmbiX_-70x035y_0.01m.wav"] + source: ["m1s07.wav", "f1s07.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat4_s2: - name: "lp05a4s02" + cat4_2: + name: "cat4/a4s02.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env4/AmbiX_036x033y_0.01m.wav", "IR_FOA_env4/AmbiX_-70x035y_0.01m.wav"] + source: ["f2s07.wav", "m2s07.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat4_s3: - name: "lp05a4s03" + cat4_3: + name: "cat4/a4s03.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env4/AmbiX_036x033y_0.01m.wav", "IR_FOA_env4/AmbiX_-70x035y_0.01m.wav"] + source: ["m3s07.wav", "f3s07.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat4_s4: - name: "lp05a4s04" + cat4_4: + name: "cat4/a4s04.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env4/AmbiX_036x033y_0.01m.wav", "IR_FOA_env4/AmbiX_-70x035y_0.01m.wav"] + source: ["f1s08.wav", "m1s08.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat4_s5: - name: "lp05a4s05" + cat4_5: + name: "cat4/a4s05.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env4/AmbiX_036x033y_0.01m.wav", "IR_FOA_env4/AmbiX_-70x035y_0.01m.wav"] + source: ["m2s08.wav", "f2s08.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat4_s6: - name: "lp05a4s06" + cat4_6: + name: "cat4/a4s06.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env4/AmbiX_036x033y_0.01m.wav", "IR_FOA_env4/AmbiX_-70x035y_0.01m.wav"] + source: ["f3s08.wav", "m3s08.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat5_s1: - name: "lp05a5s01" + cat5_1: + name: "cat5/a5s01.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env5/AmbiX_036x033y_0.01m.wav", "IR_FOA_env5/AmbiX_-70x035y_0.01m.wav"] + source: ["f3s09.wav", "m3s09.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat5_s2: - name: "lp05a5s02" + cat5_2: + name: "cat5/a5s02.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env5/AmbiX_036x033y_0.01m.wav", "IR_FOA_env5/AmbiX_-70x035y_0.01m.wav"] + source: ["m1s09.wav", "f1s09.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat5_s3: - name: "lp05a5s03" + cat5_3: + name: "cat5/a5s03.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env5/AmbiX_036x033y_0.01m.wav", "IR_FOA_env5/AmbiX_-70x035y_0.01m.wav"] + source: ["f2s09.wav", "m2s09.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat5_s4: - name: "lp05a5s04" + cat5_4: + name: "cat5/a5s04.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env5/AmbiX_036x033y_0.01m.wav", "IR_FOA_env5/AmbiX_-70x035y_0.01m.wav"] + source: ["m3s10.wav", "f3s10.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat5_s5: - name: "lp05a5s05" + cat5_5: + name: "cat5/a5s05.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env5/AmbiX_036x033y_0.01m.wav", "IR_FOA_env5/AmbiX_-70x035y_0.01m.wav"] + source: ["f1s10.wav", "m1s10.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat5_s6: - name: "lp05a5s06" + cat5_6: + name: "cat5/a5s06.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env5/AmbiX_036x033y_0.01m.wav", "IR_FOA_env5/AmbiX_-70x035y_0.01m.wav"] + source: ["m2s10.wav", "f2s10.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat6_s1: - name: "lp05a6s01" + cat6_1: + name: "cat6/a6s01.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env6/AmbiX_036x033y_0.01m.wav", "IR_FOA_env6/AmbiX_-70x035y_0.01m.wav"] + source: ["m2s11.wav", "f2s11.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat6_s2: - name: "lp05a6s02" + cat6_2: + name: "cat6/a6s02.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env6/AmbiX_036x033y_0.01m.wav", "IR_FOA_env6/AmbiX_-70x035y_0.01m.wav"] + source: ["f3s11.wav", "m3s11.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat6_s3: - name: "lp05a6s03" + cat6_3: + name: "cat6/a6s03.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env6/AmbiX_036x033y_0.01m.wav", "IR_FOA_env6/AmbiX_-70x035y_0.01m.wav"] + source: ["m1s11.wav", "f1s11.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat6_s4: - name: "lp05a6s04" + cat6_4: + name: "cat6/a6s04.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env6/AmbiX_036x033y_0.01m.wav", "IR_FOA_env6/AmbiX_-70x035y_0.01m.wav"] + source: ["f2s12.wav", "m2s12.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat6_s5: - name: "lp05a6s05" + cat6_5: + name: "cat6/a6s05.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env6/AmbiX_036x033y_0.01m.wav", "IR_FOA_env6/AmbiX_-70x035y_0.01m.wav"] + source: ["m3s12.wav", "f3s12.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat6_s6: - name: "lp05a6s06" + cat6_6: + name: "cat6/a6s06.wav" description: "" - source: ["aENp05Fa1.wav", "aENp05Ma1.wav"] - IR: ["IR_FOA_env6/AmbiX_036x033y_0.01m.wav", "IR_FOA_env6/AmbiX_-70x035y_0.01m.wav"] + source: ["f1s12.wav", "m1s12.wav"] + IR: ["IR_do_p05_e_01_01_FOA.wav", "IR_do_p05_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" + diff --git a/item_gen_configs/P800-6.yml b/item_gen_configs/P800-6.yml index 277f6a2a..2885adf1 100644 --- a/item_gen_configs/P800-6.yml +++ b/item_gen_configs/P800-6.yml @@ -3,17 +3,17 @@ # General configuration ################################################ -### Output format -format: "ISM1" - -### Output sampling rate in Hz needed for headerless audio files; default = 48000 -fs: 48000 - ### Any relative paths will be interpreted relative to the working directory the script is called from! ### Usage of absolute paths is recommended. ### Do not use file names with dots "." in them! This is not supported, use "_" instead ### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions +### Output format +format: "ISM1" + +### Output sampling rate in Hz; default = 48000 +fs: 48000 + ### Input path to mono files input_path: "./items_mono" @@ -24,292 +24,353 @@ output_path: "experiments/selection/P800-6/proc_input" loudness: -26 ### Pre-amble and Post-amble length in seconds (default = 0.0) -preamble: 1.0 +preamble: 0.5 postamble: 1.0 ### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence) add_low_level_random_noise: true +### File designators, default is "l" for listening lab, "EN" for language, "p06" for exp and "g" for provider +listening_lab: "l" +language: "EN" +exp: "p06" +provider: "g" + +### Use prefix for all input filenames (default: "") +### l stands for the 'listening_lab' designator, L stands for the 'language', e stands for the 'exp' designator (the number of consecutive letters define the length of the field) +use_input_prefix: "lLLeee" + +### Use prefix for all output filenames (default: "") +### l stands for the 'listening_lab' designator, e stands for the 'exp' designator (the number of consecutive letters define the length of the field) +use_output_prefix: "leee" ################################################ ### Scene description ################################################ -### Each scene must start with a unique tag -### Specify the mono source filename (the program will search for it in the input_path folder) -### Specify azimuth and elevation for each input source -### Note 1: use [val1, val2, ...] for multiple sources in a scene +### Each scene must begin by specifying the category in the following format: catN_I where N is the category index and N is the scene index +### Each scene shall de described using the following parameters/properties: +### name: filename of the generated output item (the program will save th generated items in the output_path folder, note: it is possible to use subfolders, e.g. items_stereo/x1_s01.wav) +### description: textual description of the scene +### source: filename(s) of the mono input sources (the program will search for it in the input_path folder) +### azimuth: azimuth in the range [-180,180]; positive values point to the left +### elevation: elevation in the range [-90,90]; positive values indicate up +### overlap: overlap length between two input sources in seconds (negative value creates a gap) +### Note 1: use brackets [val1, val2, ...] when specifying multiple values ### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames +### Note 3: we're using right-handed coordinate system with azimuth = 0 pointing from the nose to the screen -### Note 3: we're using right-handed coordinate system with azi = 0 pointing from the nose to the screen -### azimuth: float, [-180,180]; positive indicates left -### elevation: float, [-90,90]; positive indicates up +### Naming convention for the input mono files +### The input filenames are represented by: +### lLLeeettszz.wav +### where: +### l stands for the listening lab designator: a (Force Technology), b (HEAD acoustics), c (MQ University), d (Mesaqin.com) +### LL stands for the language: JP, FR, GE, MA, DA, EN +### eee stands for the experiment designator: p01, p02, p04, p05, p06, p07, p08, p09 +### tt stands for the talker ID: f1, f2, f3, m1, m2, m3 +### s stands for 'sample' and zz is the sample number; 01, ..., 14 -### Naming convention for P.800 items -### The filenames of the input content samples are represented by: +### Naming convention for the generated output files +### The output filenames are represented by: ### leeeayszz.wav -### The filenames of the accompanying metadata files (applicable to metadata-assisted spatial audio, object-based audio) are represented by: +### The filenames of the accompanying output metadata files (applicable to metadata-assisted spatial audio, object-based audio) are represented by: ### leeeayszz.met for metadata-assisted spatial audio ### leeeayszz.wav.o.csv for object-based audio ### where: -### l stands for the listening lab designator (a through d according to Table 2) -### eee stands for the experiment designator, e.g. p01 (see Table 1) -### a stands audio, and y is the per experiment category according to IVAS-8a -### s stands for sample and zz is the sample number; 01, 02, 03, 04, 05, 06, 07, where 07 is the preliminary -### o stands for object number; 0, 1, 2, 3 +### l stands for the listening lab designator: a (Force Technology), b (HEAD acoustics), c (MQ University), d (Mesaqin.com) +### eee stands for the experiment designator: p01, p02, p04, p05, p06, p07, p08, p09 +### a stands 'audio' +### y is the per-experiment category according to IVAS-8a: 01, 02, 03, 04, 05, 06 +### s stands for sample and zz is the sample number; 01, 02, 03, 04, 05, 06, 07 (07 is the preliminary sample) +### o stands for the object number; 0, 1, 2, 3 + scenes: - a1: - name: "lp06a1s01" + cat1_1: + name: "cat1/a1s01.wav" description: "Talker sitting at a table" - source: ["m1_s1.wav", "m1_s7.wav"] + source: ["m1s01.wav", "m1s07.wav"] azimuth: 0 elevation: 0 - overlap: -0.5 + overlap: -1.0 - a2: - name: "lp06a1s02" + cat1_2: + name: "cat1/a1s02.wav" description: "Talker sitting at a table" - source: ["f3_s2.wav", "f3_s8.wav"] + source: ["f3s02.wav", "f3s08.wav"] azimuth: 60 elevation: 0 - overlap: -0.5 + overlap: -1.0 - a3: - name: "lp06a1s03" + cat1_3: + name: "cat1/a1s03.wav" description: "Talker sitting at a table" - source: ["m3_s3.wav", "m3_s9.wav"] + source: ["m3s03.wav", "m3s09.wav"] azimuth: 120 elevation: 0 + overlap: -1.0 - a4: - name: "lp06a1s04" + cat1_4: + name: "cat1/a1s04.wav" description: "Talker sitting at a table" - source: ["f2_s4.wav", "f2_s10.wav"] + source: ["f2s04.wav", "f2s10.wav"] azimuth: 180 elevation: 0 + overlap: -1.0 - a5: - name: "lp06a1s05" + cat1_5: + name: "cat1/a1s05.wav" description: "Talker sitting at a table" - source: ["m2_s5.wav", "m2_s11.wav"] + source: ["m2s05.wav", "m2s11.wav"] azimuth: 240 elevation: 0 + overlap: -1.0 - a6: - name: "lp06a1s06" + cat1_6: + name: "cat1/a1s06.wav" description: "Talker sitting at a table" - source: ["f1_s6.wav", "f1_s12.wav"] + source: ["f1s06.wav", "f1s12.wav"] azimuth: 300 elevation: 0 + overlap: -1.0 - b1: - name: "lp06a2s01" + cat2_1: + name: "cat2/a2s01.wav" description: "standing talker." - source: ["f1_s1.wav", "f1_s7.wav"] + source: ["f1s01.wav", "f1s07.wav"] azimuth: 120 elevation: 35 + overlap: -1.0 - b2: - name: "lp06a2s02" + cat2_2: + name: "cat2/a2s02.wav" description: "standing talker." - source: ["m1_s2.wav", "m1_s8.wav"] + source: ["m1s02.wav", "m1s08.wav"] azimuth: 180 elevation: 35 + overlap: -1.0 - b3: - name: "lp06a2s03" + cat2_3: + name: "cat2/a2s03.wav" description: "standing talker." - source: ["f3_s3.wav", "f3_s9.wav"] + source: ["f3s03.wav", "f3s09.wav"] azimuth: 240 elevation: 35 + overlap: -1.0 - b4: - name: "lp06a2s04" + cat2_4: + name: "cat2/a2s04.wav" description: "standing talker." - source: ["m3_s4.wav", "m3_s10.wav"] + source: ["m3s04.wav", "m3s10.wav"] azimuth: 300 elevation: 35 + overlap: -1.0 - b5: - name: "lp06a2s05" + cat2_5: + name: "cat2/a2s05.wav" description: "standing talker." - source: ["f2_s5.wav", "f2_s11.wav"] + source: ["f2s05.wav", "f2s11.wav"] azimuth: 0 elevation: 35 + overlap: -1.0 - b6: - name: "lp06a2s06" + cat2_6: + name: "cat2/a2s06.wav" description: "standing talker." - source: ["m2_s6.wav", "m2_s12.wav"] + source: ["m2s06.wav", "m2s12.wav"] azimuth: 60 elevation: 35 + overlap: -1.0 - c1: - name: "lp06a3s01" + cat3_1: + name: "cat3/a3s01.wav" description: "Smaller talker (child) walking around a table." - source: ["m2_s1.wav", "m2_s7.wav"] + source: ["m2s01.wav", "m2s07.wav"] azimuth: "0:1:360" elevation: 0 + overlap: -1.0 - c2: - name: ""lp06a3s02" + cat3_2: + name: "cat3/a3s02.wav" description: "Smaller talker (child) walking around a table." - source: ["f1_s2.wav", "f1_s8.wav"] + source: ["f1s02.wav", "f1s08.wav"] azimuth: "60:1:60+360" elevation: 0 + overlap: -1.0 - c3: - name: "lp06a3s03" + cat3_3: + name: "cat3/a3s03.wav" description: "Smaller talker (child) walking around a table." - source: ["m1_s3.wav", "m1_s9.wav"] + source: ["m1s03.wav", "m1s09.wav"] azimuth: "120:1:120+360" elevation: 0 + overlap: -1.0 - c4: - name: "lp06a3s04" + cat3_4: + name: "cat3/a3s04.wav" description: "Smaller talker (child) walking around a table." - source: ["f3_s4.wav", "f3_s10.wav"] + source: ["f3s04.wav", "f3s10.wav"] azimuth: "180:1:180+360" elevation: 0 + overlap: -1.0 - c5: - name: "lp06a3s05" + cat3_5: + name: "cat3/a3s05.wav" description: "Smaller talker (child) walking around a table." - source: ["m3_s5.wav", "m3_s11.wav"] + source: ["m3s05.wav", "m3s11.wav"] azimuth: "240:1:240+360" elevation: 0 + overlap: -1.0 - c6: - name: "lp06a3s06" + cat3_6: + name: "cat3/a3s06.wav" description: "Smaller talker (child) walking around a table." - source: ["f2_s6.wav", "f2_s12.wav"] + source: ["f2s06.wav", "f2s12.wav"] azimuth: "300:1:300+360" elevation: 0 + overlap: -1.0 - d1: - name: "lp06a4s01" + cat4_1: + name: "cat4/a4s01.wav" description: "Talker walking around the table." - source: ["f2_s1.wav", "f2_s7.wav"] + source: ["f2s01.wav", "f2s07.wav"] azimuth: "0:-1:-360" elevation: 35 + overlap: -1.0 - d2: - name: "lp06a4s02" + cat4_2: + name: "cat4/a4s02.wav" description: "Talker walking around the table." - source: ["m2_s2.wav", "m2_s8.wav"] + source: ["m2s02.wav", "m2s08.wav"] azimuth: "60:-1:60-360" elevation: 35 + overlap: -1.0 - d3: - name: "lp06a4s03" + cat4_3: + name: "cat4/a4s03.wav" description: "Talker walking around the table." - source: ["f1_s3.wav", "f1_s9.wav"] + source: ["f1s03.wav", "f1s09.wav"] azimuth: "120:-1:120-360" elevation: 35 + overlap: -1.0 - d4: - name: "lp06a4s04" + cat4_4: + name: "cat4/a4s04.wav" description: "Talker walking around the table." - source: ["m1_s4.wav", "m1_s10.wav"] + source: ["m1s04.wav", "m1s10.wav"] azimuth: "180:-1:180-360" elevation: 35 + overlap: -1.0 - d5: - name: "lp06a4s05" + cat4_5: + name: "cat4/a4s05.wav" description: "Talker walking around the table." - source: ["f3_s5.wav", "f3_s11.wav"] + source: ["f3s05.wav", "f3s11.wav"] azimuth: "240:-1:240-360" elevation: 35 + overlap: -1.0 - d6: - name: "lp06a4s06" + cat4_6: + name: "cat4/a4s06.wav" description: "Talker walking around the table." - source: ["m3_s6.wav", "m3_s12.wav"] + source: ["m3s06.wav", "m3s12.wav"] azimuth: "300:-1:300-360" elevation: 35 + overlap: -1.0 - e1: - name: "lp06a5s01" + cat5_1: + name: "cat5/a5s01.wav" description: "Elevation displacement." - source: ["m3_s1.wav", "m3_s7.wav"] + source: ["m3s01.wav", "m3s07.wav"] azimuth: 240 elevation: "-90:0.5:90" + overlap: -1.0 - e2: - name: "lp06a5s02" + cat5_2: + name: "cat5/a5s02.wav" description: "Elevation displacement." - source: ["f2_s2.wav", "f2_s8.wav"] + source: ["f2s02.wav", "f2s08.wav"] azimuth: 300 elevation: 0 + overlap: -1.0 - e3: - name: "lp06a5s03" + cat5_3: + name: "cat5/a5s03.wav" description: "Elevation displacement." - source: ["m2_s3.wav", "m2_s9.wav"] + source: ["m2s03.wav", "m2s09.wav"] azimuth: 0 elevation: "-90:0.5:90" + overlap: -1.0 - e4: - name: "lp06a5s04" + cat5_4: + name: "cat5/a5s04.wav" description: "Elevation displacement." - source: ["f1_s4.wav", "f1_s10.wav"] + source: ["f1s04.wav", "f1s10.wav"] azimuth: 60 elevation: "-90:0.5:90" + overlap: -1.0 - e5: - name: "lp06a5s05" + cat5_5: + name: "cat5/a5s05.wav" description: "Elevation displacement." - source: ["m1_s5.wav", "m1_s11.wav"] + source: ["m1s05.wav", "m1s11.wav"] azimuth: 120 elevation: "-90:0.5:90" + overlap: -1.0 - e6: - name: "lp06a5s06" + cat5_6: + name: "cat5/a5s06.wav" description: "Elevation displacement." - source: ["f3_s6.wav", "f3_s12.wav"] + source: ["f3s06.wav", "f3s12.wav"] azimuth: 180 elevation: "-90:0.5:90" + overlap: -1.0 - f1: - name: "lp06a6s01" + cat6_1: + name: "cat6/a6s01.wav" description: "Azimuth and elevation displacement." - source: ["f3_s1.wav", "f3_s7.wav"] + source: ["f3s01.wav", "f3s07.wav"] azimuth: "60:0.5:60+180" elevation: "35:-0.2:-35" + overlap: -1.0 - f2: - name: "lp06a6s02" + cat6_2: + name: "cat6/a6s02.wav" description: "Azimuth and elevation displacement." - source: ["m3_s2.wav", "m3_s8.wav"] + source: ["m3s02.wav", "m3s08.wav"] azimuth: "120:0.5:120+180" elevation: "35:-0.2:-35" + overlap: -1.0 - f3: - name: "lp06a6s03" + cat6_3: + name: "cat6/a6s03.wav" description: "Azimuth and elevation displacement." - source: ["f2_s3.wav", "f2_s9.wav"] + source: ["f2s03.wav", "f2s09.wav"] azimuth: "180:0.5:180+180" elevation: "35:-0.2:-35" + overlap: -1.0 - f4: - name: "lp06a6s04" + cat6_4: + name: "cat6/a6s04.wav" description: "Azimuth and elevation displacement." - source: ["m2_s4.wav", "m2_s10.wav"] + source: ["m2s04.wav", "m2s10.wav"] azimuth: "240:0.5:240+180" elevation: "35:-0.2:-35" + overlap: -1.0 - f5: - name: "lp06a6s05" + cat6_5: + name: "cat6/a6s05.wav" description: "Azimuth and elevation displacement." - source: ["f1_s5.wav", "f1_s11.wav"] + source: ["f1s05.wav", "f1s11.wav"] azimuth: "300:0.5:300+180" elevation: "35:-0.2:-35" + overlap: -1.0 - f6: - name: "lp06a6s06" + cat6_6: + name: "cat6/a6s06.wav" description: "Azimuth and elevation displacement." - source: ["m1_s6.wav", "m1_s12.wav"] + source: ["m1s06.wav", "m1s12.wav"] azimuth: "0:0.5:0+180" elevation: "35:-0.2:-35" + overlap: -1.0 \ No newline at end of file diff --git a/item_gen_configs/P800-7.yml b/item_gen_configs/P800-7.yml index 7b8e1bba..f93ea386 100644 --- a/item_gen_configs/P800-7.yml +++ b/item_gen_configs/P800-7.yml @@ -3,22 +3,22 @@ # General configuration ################################################ -### Output format -format: "ISM2" - -### Output sampling rate in Hz needed for headerless audio files; default = 48000 -fs: 48000 - ### Any relative paths will be interpreted relative to the working directory the script is called from! ### Usage of absolute paths is recommended. ### Do not use file names with dots "." in them! This is not supported, use "_" instead ### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions +### Output format +format: "ISM2" + +### Output sampling rate in Hz; default = 48000 +fs: 48000 + ### Input path to mono files input_path: "./items_mono" ### Output path for generated test items and metadata files -output_path: "./items_ISM2" +output_path: "experiments/selection/P800-7/proc_input" ### Target loudness in LKFS; default = null (no loudness normalization applied) loudness: -26 @@ -30,318 +30,345 @@ postamble: 1.0 ### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence) add_low_level_random_noise: true +### File designators, default is "l" for listening lab, "EN" for language, "p06" for exp and "g" for provider +listening_lab: "l" +language: "EN" +exp: "p01" +provider: "g" + +### Use prefix for all input filenames (default: "") +### l stands for the 'listening_lab' designator, L stands for the 'language', e stands for the 'exp' designator (the number of consecutive letters define the length of the field) +use_input_prefix: "lLLeee" + +### Use prefix for all output filenames (default: "") +### l stands for the 'listening_lab' designator, e stands for the 'exp' designator (the number of consecutive letters define the length of the field) +use_output_prefix: "leee" + ################################################ ### Scene description ################################################ -### Each scene must start with a unique tag -### Specify the mono source filename (the program will search for it in the input_path folder) -### Specify azimuth and elevation for each input source -### Note 1: use [val1, val2, ...] for multiple sources in a scene +### Each scene must begin by specifying the category in the following format: catN_I where N is the category index and N is the scene index +### Each scene shall de described using the following parameters/properties: +### name: filename of the generated output item (the program will save th generated items in the output_path folder, note: it is possible to use subfolders, e.g. items_stereo/x1_s01.wav) +### description: textual description of the scene +### source: filename(s) of the mono input sources (the program will search for it in the input_path folder) +### azimuth: azimuth in the range [-180,180]; positive values point to the left +### elevation: elevation in the range [-90,90]; positive values indicate up +### overlap: overlap length between two input sources in seconds (negative value creates a gap) +### Note 1: use brackets [val1, val2, ...] when specifying multiple values ### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames +### Note 3: we're using right-handed coordinate system with azimuth = 0 pointing from the nose to the screen -### Note 3: we're using right-handed coordinate system with azi = 0 pointing from the nose to the screen -### azimuth: float, [-180,180]; positive indicates left -### elevation: float, [-90,90]; positive indicates up +### Naming convention for the input mono files +### The input filenames are represented by: +### lLLeeettszz.wav +### where: +### l stands for the listening lab designator: a (Force Technology), b (HEAD acoustics), c (MQ University), d (Mesaqin.com) +### LL stands for the language: JP, FR, GE, MA, DA, EN +### eee stands for the experiment designator: p01, p02, p04, p05, p06, p07, p08, p09 +### tt stands for the talker ID: f1, f2, f3, m1, m2, m3 +### s stands for 'sample' and zz is the sample number; 01, ..., 14 -### Naming convention for P.800 items -### The filenames of the input content samples are represented by: +### Naming convention for the generated output files +### The output filenames are represented by: ### leeeayszz.wav -### The filenames of the accompanying metadata files (applicable to metadata-assisted spatial audio, object-based audio) are represented by: +### The filenames of the accompanying output metadata files (applicable to metadata-assisted spatial audio, object-based audio) are represented by: ### leeeayszz.met for metadata-assisted spatial audio ### leeeayszz.wav.o.csv for object-based audio ### where: -### l stands for the listening lab designator (a through d according to Table 2) -### eee stands for the experiment designator, e.g. p01 (see Table 1) -### a stands audio, and y is the per experiment category according to IVAS-8a -### s stands for sample and zz is the sample number; 01, 02, 03, 04, 05, 06, 07, where 07 is the preliminary -### o stands for object number; 0, 1, 2, 3 +### l stands for the listening lab designator: a (Force Technology), b (HEAD acoustics), c (MQ University), d (Mesaqin.com) +### eee stands for the experiment designator: p01, p02, p04, p05, p06, p07, p08, p09 +### a stands 'audio' +### y is the per-experiment category according to IVAS-8a: 01, 02, 03, 04, 05, 06 +### s stands for sample and zz is the sample number; 01, 02, 03, 04, 05, 06, 07 (07 is the preliminary sample) +### o stands for the object number; 0, 1, 2, 3 scenes: - a1: - name: "lp07a1s01" + cat1_1: + name: "cat1/a1s01.wav" description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." - source: ["m1_s1.wav", "f1_s1.wav"] + source: ["m1s01.wav", "f1s01.wav"] azimuth: [0, 50] elevation: [0, 0] overlap: -1.0 - a2: - name: "lp07a1s02" + cat1_2: + name: "cat1/a1s02.wav" description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." - source: ["f3_s8.wav", "m1_s8.wav"] + source: ["f3s08.wav", "m1s08.wav"] azimuth: [50, 350] elevation: [0, 0] overlap: -1.0 - a3: - name: "lp07a1s03" + cat1_3: + name: "cat1/a1s03.wav" description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." - source: ["f2_s9.wav", "m3_s9.wav"] + source: ["f2s09.wav", "m3s09.wav"] azimuth: [40, 290] elevation: [0, 0] overlap: -1.0 - a4: - name: "lp07a1s04" + cat1_4: + name: "cat1/a1s04.wav" description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." - source: ["f1_s10.wav", "m2_s10.wav"] + source: ["f1s10.wav", "m2s10.wav"] azimuth: [30, 230] elevation: [15, 15] overlap: -1.0 - a5: - name: "lp07a1s05" + cat1_5: + name: "cat1/a1s05.wav" description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." - source: ["m3_s5.wav", "f3_s5.wav"] + source: ["m3s05.wav", "f3s05.wav"] azimuth: [20, 170] elevation: [15, 15] overlap: -1.0 - a6: - name: "lp07a1s06" + cat1_6: + name: "cat1/a1s06.wav" description: "Two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." - source: ["m2_s6.wav", "f2_s6.wav"] + source: ["m2s06.wav", "f2s06.wav"] azimuth: [10, 110] elevation: [15, 15] overlap: -1.0 - b1: - name: "lp07a2s01" + cat2_1: + name: "cat2/a2s01.wav" description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." - source: ["m2_s1.wav", "f2_s1.wav"] + source: ["m2s01.wav", "f2s01.wav"] azimuth: [20, 170] elevation: [30, 30] overlap: 1.0 - b2: - name: "lp07a2s02" + cat2_2: + name: "cat2/a2s02.wav" description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." - source: ["m1_s2.wav", "f1_s2.wav"] + source: ["m1s02.wav", "f1s02.wav"] azimuth: [10, 110] elevation: [30, 30] overlap: 1.0 - b3: - name: "lp07a2s03" + cat2_3: + name: "cat2/a2s03.wav" description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." - source: ["f3_s9.wav", "m1_s9.wav"] + source: ["f3s09.wav", "m1s09.wav"] azimuth: [0, 50] elevation: [30, 30] overlap: 1.0 - b4: - name: "lp07a2s04" + cat2_4: + name: "cat2/a2s04.wav" description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." - source: ["f2_s10.wav", "m3_s10.wav"] + source: ["f2s10.wav", "m3s10.wav"] azimuth: [50, 350] elevation: [60, 60] overlap: 1.0 - b5: - name: "lp07a2s05" + cat2_5: + name: "cat2/a2s05.wav" description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." - source: ["f1_s11.wav", "m2_s11.wav"] + source: ["f1s11.wav", "m2s11.wav"] azimuth: [40, 290] elevation: [60, 60] overlap: 1.0 - b6: - name: "lp07a2s06" + cat2_6: + name: "cat2/a2s06.wav" description: "Two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." - source: ["m3_s6.wav", "f3_s6.wav"] + source: ["m3s06.wav", "f3s06.wav"] azimuth: [30, 230] elevation: [60, 60] overlap: 1.0 - c1: - name: "lp07a3s01" + cat3_1: + name: "cat3/a3s01.wav" description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." - source: ["m3_s1.wav", "f3_s1.wav"] + source: ["m3s01.wav", "f3s01.wav"] azimuth: [40, 290] elevation: [0, 60] overlap: -1.0 - c2: - name: "lp07a3s02" + cat3_2: + name: "cat3/a3s02.wav" description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." - source: ["m2_s2.wav", "f2_s2.wav"] + source: ["m2s02.wav", "f2s02.wav"] azimuth: [30, 230] elevation: [0, 60] overlap: -1.0 - c3: - name: "lp07a3s03" + cat3_3: + name: "cat3/a3s03.wav" description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." - source: ["m1_s3.wav", "f1_s3.wav"] + source: ["m1s03.wav", "f1s03.wav"] azimuth: [20, 170] elevation: [0, 60] overlap: -1.0 - c4: - name: "lp07a3s04" + cat3_4: + name: "cat3/a3s04.wav" description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." - source: ["f3_s10.wav", "m1_s10.wav"] + source: ["f3s10.wav", "m1s10.wav"] azimuth: [10, 110] elevation: [0, 60] overlap: -1.0 - c5: - name: "lp07a3s05" + cat3_5: + name: "cat3/a3s05.wav" description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." - source: ["f2_s11.wav", "m3_s11.wav"] + source: ["f2s11.wav", "m3s11.wav"] azimuth: [0, 50] elevation: [0, 60] overlap: -1.0 - c6: - name: "lp07a3s06" + cat3_6: + name: "cat3/a3s06.wav" description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." - source: ["f1_s12.wav", "m2_s12.wav"] + source: ["f1s12.wav", "m2s12.wav"] azimuth: [50, 350] elevation: [0, 60] overlap: -1.0 - d1: - name: "lp07a4s01" + cat4_1: + name: "cat4/a4s01.wav" description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." - source: ["f1_s7.wav", "m2_s7.wav"] + source: ["f1s07.wav", "m2s07.wav"] azimuth: [50, "180:1:120 + 360"] elevation: [0, 60] overlap: 1.0 - d2: - name: "lp07a4s02" + cat4_2: + name: "cat4/a4s02.wav" description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." - source: ["m3_s2.wav", "f3_s2.wav"] + source: ["m3s02.wav", "f3s02.wav"] azimuth: [300, "-70:-1:-10 - 360"] elevation: [0, 60] overlap: 1.0 - d3: - name: "lp07a4s03" + cat4_3: + name: "cat4/a4s03.wav" description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." - source: ["m2_s3.wav", "f2_s3.wav"] + source: ["m2s03.wav", "f2s03.wav"] azimuth: [250, "-20:-1:-320"] elevation: [0, 60] overlap: 1.0 - d4: - name: "lp07a4s04" + cat4_4: + name: "cat4/a4s04.wav" description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." - source: ["m1_s4.wav", "f1_s4.wav"] + source: ["m1s04.wav", "f1s04.wav"] azimuth: [200, "30:-1:-270"] elevation: [0, 60] overlap: 1.0 - d5: - name: "lp07a4s05" + cat4_5: + name: "cat4/a4s05.wav" description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." - source: ["f3_s11.wav", "m1_s11.wav"] + source: ["f3s11.wav", "m1s11.wav"] azimuth: [150, "80:1:20 + 360"] elevation: [0, 60] overlap: 1.0 - d6: - name: "lp07a4s06" + cat4_6: + name: "cat4/a4s06.wav" description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." - source: ["f2_s12.wav", "m3_s12.wav"] + source: ["f2s12.wav", "m3s12.wav"] azimuth: [100, "130:1:70 + 360"] elevation: [0, 60] overlap: 1.0 - e1: - name: "lp07a5s01" + cat5_1: + name: "cat5/a5s01.wav" description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" - source: ["f2_s7.wav", "m3_s7.wav"] + source: ["f2s07.wav", "m3s07.wav"] azimuth: ["80:1:20 + 360", "80:1:20 + 360"] elevation: [10, 60] overlap: 1.0 - e2: - name: "lp07a5s02" + cat5_2: + name: "cat5/a5s02.wav" description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" - source: ["f1_s8.wav", "m2_s8.wav"] + source: ["f1s08.wav", "m2s08.wav"] azimuth: ["130:1:70 + 360", "130:1:70 + 360"] elevation: [10, 60] overlap: 1.0 - e3: - name: "lp07a5s03" + cat5_3: + name: "cat5/a5s03.wav" description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" - source: ["m3_s3.wav", "f3_s3.wav"] + source: ["m3s03.wav", "f3s03.wav"] azimuth: ["180:1:120 + 360", "180:1:120 + 360"] elevation: [10, 60] overlap: 1.0 - e4: - name: "lp07a5s04" + cat5_4: + name: "cat5/a5s04.wav" description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" - source: ["m2_s4.wav", "f2_s4.wav"] + source: ["m2s04.wav", "f2s04.wav"] azimuth: ["-70:-1:-10 - 360", "-70:-1:-10 - 360"] elevation: [10, 60] overlap: 1.0 - e5: - name: "lp07a5s05" + cat5_5: + name: "cat5/a5s05.wav" description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" - source: ["m1_s5.wav", "f1_s5.wav"] + source: ["m1s05.wav", "f1s05.wav"] azimuth: ["-20:-1:-320", "-20:-1:-320"] elevation: [10, 60] overlap: 1.0 - e6: - name: "lp07a5s06" + cat5_6: + name: "cat5/a5s06.wav" description: "Two talkers walking side-by-side around the table, ~30% overlapping utterances" - source: ["f3_s12.wav", "m1_s12.wav"] + source: ["f3s12.wav", "m1s12.wav"] azimuth: ["30:-1:-270", "30:-1:-270"] elevation: [10, 60] overlap: 1.0 - f1: - name: "lp07a6s01" + cat6_1: + name: "cat6/a6s01.wav" description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." - source: ["f3_s7.wav", "m1_s7.wav"] + source: ["f3s07.wav", "m1s07.wav"] azimuth: ["60:1:0 + 360", "60:-1:120 - 360"] elevation: [20, 50] overlap: -1.0 - f2: - name: "lp07a6s02" + cat6_2: + name: "cat6/a6s02.wav" description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." - source: ["f2_s8.wav", "m3_s8.wav"] + source: ["f2s08.wav", "m3s08.wav"] azimuth: ["0:1:300", "0:-1:60 - 360"] elevation: [20, 50] overlap: -1.0 - f3: - name: "lp07a6s03" + cat6_3: + name: "cat6/a6s03.wav" description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." - source: ["f1_s9.wav", "m2_s9.wav"] + source: ["f1s09.wav", "m2s09.wav"] azimuth: ["300:1:240 + 360", "300:-1:0"] elevation: [20, 50] overlap: -1.0 - f4: - name: "lp07a6s04" + cat6_4: + name: "cat6/a6s04.wav" description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." - source: ["m3_s4.wav", "f3_s4.wav"] + source: ["m3s04.wav", "f3s04.wav"] azimuth: ["240:1:180 + 360", "240:-1:-60"] elevation: [20, 50] overlap: -1.0 - f5: - name: "lp07a6s05" + cat6_5: + name: "cat6/a6s05.wav" description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." - source: ["m2_s5.wav", "f2_s5.wav"] + source: ["m2s05.wav", "f2s05.wav"] azimuth: ["180:1:120 + 360", "180:-1:-120"] elevation: [20, 50] overlap: -1.0 - f6: - name: "lp07a6s06" + cat6_6: + name: "cat6/a6s06.wav" description: "Two talkers walking around the table in opposite directions, non-overlapping utterances." - source: ["m1_s6.wav", "f1_s6.wav"] + source: ["m1s06.wav", "f1s06.wav"] azimuth: ["120:1:60 + 360", "120:-1:180 - 360"] elevation: [20, 50] overlap: -1.0 diff --git a/item_gen_configs/P800-8.yml b/item_gen_configs/P800-8.yml index c0df61fa..457ae07b 100644 --- a/item_gen_configs/P800-8.yml +++ b/item_gen_configs/P800-8.yml @@ -6,11 +6,11 @@ ### Output format format: "FOA" -### Output sampling rate in Hz needed for headerless audio files; default = 48000 +### Output sampling rate in Hz; default = 48000 fs: 48000 -### IR sampling rate in Hz needed for headerless audio files; default = 48000 -IR_fs: 48000 +### IR sampling rate in Hz (only for files in .pcm format); default = 48000 +# IR_fs: 48000 ### Any relative paths will be interpreted relative to the working directory the script is called from! ### Usage of absolute paths is recommended. @@ -21,10 +21,10 @@ IR_fs: 48000 input_path: "./items_mono" ### Input path to impulse response files, default = './ivas_processing_scripts/generation/IR' -IR_path: "./IRs" +# IR_path: "./IR" ### Output path for generated test items and metadata files -output_path: "./items_FOA" +output_path: "experiments/selection/P800-8/proc_input" ### (Optional) Output path for binauralized versions of the generated FOA items # binaural_path: "./items_FOA_bin" @@ -51,6 +51,40 @@ add_low_level_random_noise: true ### Note 1: use [val1, val2, ...] for multiple sources in a scene ### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames +### Naming convention for the input mono files +### The input filenames are represented by: +### lLLeeettszz.wav +### where: +### l stands for the listening lab designator: a (Force Technology), b (HEAD acoustics), c (MQ University), d (Mesaqin.com) +### LL stands for the language: JP, FR, GE, MA, DA, EN +### eee stands for the experiment designator: p01, p02, p04, p05, p06, p07, p08, p09 +### tt stands for the talker ID: f1, f2, f3, m1, m2, m3 +### s stands for 'sample' and zz is the sample number; 01, ..., 14 + +### Naming convention for the input IR files +### The input IR filenames are represented by: +### IR_pp_eee_r_tt_mm_ffffff.wav +### where: +### pp stands for the provider: do (Dolby), no (Nokia), or (Orange), vo (VoiceAge), g (G.191) +### eee stands for the experiment designator: p01, p02, p04, p05, p06, p07, p08, p09 +### r stands for the room ID: a, b, c, ... +### tt stands for the talker position: 01, 02, ... +### mm stands for the microphone position: 01, 02, ... +### ffffff stands for the format ID: stAB20, stABC20, stAB100, stAB150, stMS, stBin, FOA, HOA2 + +### Naming convention for the generated output files +### The output filenames are represented by: +### leeeayszz.wav +### The filenames of the accompanying output metadata files (applicable to metadata-assisted spatial audio, object-based audio) are represented by: +### leeeayszz.met for metadata-assisted spatial audio +### leeeayszz.wav.o.csv for object-based audio +### where: +### l stands for the listening lab designator: a (Force Technology), b (HEAD acoustics), c (MQ University), d (Mesaqin.com) +### eee stands for the experiment designator: p01, p02, p04, p05, p06, p07, p08, p09 +### a stands 'audio' +### y is the per-experiment category according to IVAS-8a: 01, 02, 03, 04, 05, 06 +### s stands for sample and zz is the sample number; 01, 02, 03, 04, 05, 06, 07 (07 is the preliminary sample) +### o stands for the object number; 0, 1, 2, 3 scenes: cat1_s1: name: "lp08a1s01" diff --git a/item_gen_configs/P800-9.yml b/item_gen_configs/P800-9.yml index c0df61fa..cb22444e 100644 --- a/item_gen_configs/P800-9.yml +++ b/item_gen_configs/P800-9.yml @@ -6,11 +6,11 @@ ### Output format format: "FOA" -### Output sampling rate in Hz needed for headerless audio files; default = 48000 +### Output sampling rate in Hz; default = 48000 fs: 48000 -### IR sampling rate in Hz needed for headerless audio files; default = 48000 -IR_fs: 48000 +### IR sampling rate in Hz (only for files in .pcm format); default = 48000 +# IR_fs: 48000 ### Any relative paths will be interpreted relative to the working directory the script is called from! ### Usage of absolute paths is recommended. @@ -21,10 +21,10 @@ IR_fs: 48000 input_path: "./items_mono" ### Input path to impulse response files, default = './ivas_processing_scripts/generation/IR' -IR_path: "./IRs" +# IR_path: "./IR" ### Output path for generated test items and metadata files -output_path: "./items_FOA" +output_path: "experiments/selection/P800-9/proc_input" ### (Optional) Output path for binauralized versions of the generated FOA items # binaural_path: "./items_FOA_bin" @@ -51,6 +51,40 @@ add_low_level_random_noise: true ### Note 1: use [val1, val2, ...] for multiple sources in a scene ### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames +### Naming convention for the input mono files +### The input filenames are represented by: +### lLLeeettszz.wav +### where: +### l stands for the listening lab designator: a (Force Technology), b (HEAD acoustics), c (MQ University), d (Mesaqin.com) +### LL stands for the language: JP, FR, GE, MA, DA, EN +### eee stands for the experiment designator: p01, p02, p04, p05, p06, p07, p08, p09 +### tt stands for the talker ID: f1, f2, f3, m1, m2, m3 +### s stands for 'sample' and zz is the sample number; 01, ..., 14 + +### Naming convention for the input IR files +### The input IR filenames are represented by: +### IR_pp_eee_r_tt_mm_ffffff.wav +### where: +### pp stands for the provider: do (Dolby), no (Nokia), or (Orange), vo (VoiceAge), g (G.191) +### eee stands for the experiment designator: p01, p02, p04, p05, p06, p07, p08, p09 +### r stands for the room ID: a, b, c, ... +### tt stands for the talker position: 01, 02, ... +### mm stands for the microphone position: 01, 02, ... +### ffffff stands for the format ID: stAB20, stABC20, stAB100, stAB150, stMS, stBin, FOA, HOA2 + +### Naming convention for the generated output files +### The output filenames are represented by: +### leeeayszz.wav +### The filenames of the accompanying output metadata files (applicable to metadata-assisted spatial audio, object-based audio) are represented by: +### leeeayszz.met for metadata-assisted spatial audio +### leeeayszz.wav.o.csv for object-based audio +### where: +### l stands for the listening lab designator: a (Force Technology), b (HEAD acoustics), c (MQ University), d (Mesaqin.com) +### eee stands for the experiment designator: p01, p02, p04, p05, p06, p07, p08, p09 +### a stands 'audio' +### y is the per-experiment category according to IVAS-8a: 01, 02, 03, 04, 05, 06 +### s stands for sample and zz is the sample number; 01, 02, 03, 04, 05, 06, 07 (07 is the preliminary sample) +### o stands for the object number; 0, 1, 2, 3 scenes: cat1_s1: name: "lp08a1s01" diff --git a/ivas_processing_scripts/generation/__init__.py b/ivas_processing_scripts/generation/__init__.py index bbd572a3..192f1387 100755 --- a/ivas_processing_scripts/generation/__init__.py +++ b/ivas_processing_scripts/generation/__init__.py @@ -42,10 +42,10 @@ from ivas_processing_scripts.constants import ( ) from ivas_processing_scripts.generation import ( config, - process_foa_items, - process_hoa2_items, - process_ism_items, + process_ism1_items, + process_ism2_items, process_stereo_items, + process_ambi_items, ) from ivas_processing_scripts.utils import create_dir @@ -86,20 +86,25 @@ def main(args): # set up logging logger = logging_init(args, cfg) - - # generate input items - if cfg.format.startswith("ISM"): - # generate ISM items with metadata according to scene description - process_ism_items.generate_ism_items(cfg, logger) + + # generate ISM and STEREO items + if cfg.format == "ISM1": + # generate ISM1 items with metadata according to scene description + process_ism1_items.generate_ism1_items(cfg, logger) + elif cfg.format == "ISM2": + # generate ISM2 items with metadata according to scene description + process_ism2_items.generate_ism2_items(cfg, logger) elif cfg.format == "STEREO": # generate STEREO items according to scene description process_stereo_items.generate_stereo_items(cfg, logger) - elif cfg.format == "FOA": - # generate FOA items according to scene description - process_foa_items.generate_foa_items(cfg, logger) - elif cfg.format == "HOA2": - # generate HOA2 items according to scene description - process_hoa2_items.generate_hoa2_items(cfg, logger) + + # make format a list + if not isinstance(cfg.format, list): + cfg.format = [cfg.format] + + if "FOA" in cfg.format or "HOA2" in cfg.format: + # generate FOA/HOA2 items according to scene description + process_ambi_items.generate_ambi_items(cfg, logger) # copy configuration to output directory with open(cfg.output_path.joinpath(f"{cfg.format}.yml"), "w") as f: diff --git a/ivas_processing_scripts/generation/process_ambi_items.py b/ivas_processing_scripts/generation/process_ambi_items.py new file mode 100644 index 00000000..13f5fcdd --- /dev/null +++ b/ivas_processing_scripts/generation/process_ambi_items.py @@ -0,0 +1,318 @@ +#!/usr/bin/env python3 + +# +# (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository. All Rights Reserved. +# +# This software is protected by copyright law and by international treaties. +# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository retain full ownership rights in their respective contributions in +# the software. This notice grants no license of any kind, including but not limited to patent +# license, nor is any license granted by implication, estoppel or otherwise. +# +# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making +# contributions. +# +# This software is provided "AS IS", without any express or implied warranties. The software is in the +# development stage. It is intended exclusively for experts who have experience with such software and +# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability +# and fitness for a particular purpose are hereby disclaimed and excluded. +# +# Any dispute, controversy or claim arising under or in relation to providing this software shall be +# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in +# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and +# the United Nations Convention on Contracts on the International Sales of Goods. +# + +import logging +import os +from math import floor +from itertools import repeat, groupby +import numpy as np + +from ivas_processing_scripts.audiotools import audio, audiofile, convert +from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness +from ivas_processing_scripts.audiotools.wrappers.reverb import reverb_foa +from ivas_processing_scripts.generation import config +from ivas_processing_scripts.utils import apply_func_parallel + +SEED_RANDOM_NOISE = 0 + + +# function for converting nd numpy array to strings with 2 decimal digits +def csv_formatdata(data): + for row in data: + yield ["%0.2f" % v for v in row] + +# function for searching sequences of same the same character and replacing it by another string +def replace_char_seq_with_string(str, char_seq, repl_str): + result = [] + + # find groups of consecutive letters + groups = ["".join(list(g)) for k, g in groupby(str)] + + # limit the length of the replacement string by the length of the character sequence + repl_str = repl_str[:len(char_seq)] + + # replace each occurence of the sequence of characters + for g in groups: + if char_seq in g: + result.append(repl_str) + else: + result.append(g) + + return "".join(result) + + +def generate_ambi_items( + cfg: config.TestConfig, + logger: logging.Logger, +): + """Generate FOA/HOA2 items from mono items based on scene description""" + + # get the number of scenes + N_scenes = len(cfg.scenes) + + # set the target level + if "loudness" not in cfg.__dict__: + cfg.loudness = -26 + + # set the fs + if "fs" not in cfg.__dict__: + cfg.fs = 48000 + + # set the IR fs + if "IR_fs" not in cfg.__dict__: + cfg.IR_fs = 48000 + + # set the pre-amble and post-amble + if "preamble" not in cfg.__dict__: + cfg.preamble = 0.0 + + if "postamble" not in cfg.__dict__: + cfg.postamble = 0.0 + + # set the IR path + if "IR_path" not in cfg.__dict__: + cfg.IR_path = os.path.join(os.path.dirname(__file__), "IR") + + # set the pre-amble and post-amble + if "add_low_level_random_noise" not in cfg.__dict__: + cfg.add_low_level_random_noise = False + + # setup binaural rendering + if "binaural_path" not in cfg.__dict__: + cfg.binaural_path = "" + + # set the listening lab designator + if "listening_lab" not in cfg.__dict__: + cfg.listening_lab = "l" + + # set the language designator + if "language" not in cfg.__dict__: + cfg.language = "EN" + + # set the experiment designator + if "exp" not in cfg.__dict__: + cfg.exp = "p01" + + # set the provider + if "provider" not in cfg.__dict__: + cfg.provider = "g" + + # set the prefix for all input filenames + if "use_input_prefix" not in cfg.__dict__: + cfg.use_input_prefix = "" + else: + # replace file designators + cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "l", cfg.listening_lab ) + cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "LL", cfg.language ) + cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "eee", cfg.exp ) + + # set the prefix for all IR filenames + if "use_IR_prefix" not in cfg.__dict__: + cfg.use_IR_prefix = "" + else: + # replace file designators + cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "p", cfg.provider ) + cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "LL", cfg.language ) + cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "eee", cfg.exp ) + + # set the prefix for all output filenames + if "use_output_prefix" not in cfg.__dict__: + cfg.use_output_prefix = None + else: + # replace file designators + cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "l", cfg.listening_lab ) + cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "eee", cfg.exp ) + + # set multiprocessing + if "multiprocessing" not in cfg.__dict__: + cfg.multiprocessing = True + + apply_func_parallel( + generate_ambi_scene, + zip(cfg.scenes.keys(), cfg.scenes.values(), repeat(cfg), repeat(logger)), + None, + # "mp" if cfg.multiprocessing else None, + None, + ) + + return + + +def generate_ambi_scene( + scene_name: str, scene: dict, cfg: config.TestConfig, logger: logging.Logger +): + logger.info( + f"Processing scene: {scene_name} out of {len(cfg.scenes)} scenes, name: {scene['name']}" + ) + + # extract the number of audio sources + N_sources = len(np.atleast_1d(scene["source"])) + + # read the overlap length + if "overlap" in scene.keys(): + source_overlap = float(scene["overlap"]) + else: + source_overlap = 0.0 + + # read the ambi format + if "format" in scene.keys(): + ambi_format = scene["format"] + else: + ambi_format = "FOA" + + y = audio.SceneBasedAudio(ambi_format) + for i in range(N_sources): + + # parse parameters from the scene description + source_file = np.atleast_1d(scene["source"])[i] + IR_file = np.atleast_1d(scene["IR"])[i] + + logger.info(f"Convolving {source_file} with {IR_file}") + + # read source file + x = audio.fromfile("MONO", os.path.join(cfg.input_path, os.path.dirname(source_file), cfg.use_input_prefix + os.path.basename(source_file)), fs=cfg.fs) + + # read the IR file + IR = audio.fromfile(ambi_format, os.path.join(cfg.IR_path, os.path.dirname(IR_file), cfg.use_IR_prefix + os.path.basename(IR_file)), fs=cfg.IR_fs) + + # convolve with the FOA/HOA2 IR + if ambi_format == "FOA": + x = reverb_foa(x, IR) + elif ambi_format == "HOA2": + x = reverb_hoa2(x, IR) + + # adjust the level of the foa signal + _, scale_factor, _ = get_loudness(x, cfg.loudness, "BINAURAL") + x.audio *= scale_factor + + # shift the second (and all other) source files (positive shift creates overlap, negative shift creates a gap) + if i > 0: + # get the length of the first source file + N_delay = len(y.audio[:, 0]) + + # add the shift + N_delay += int(-source_overlap * x.fs) + + # insert all-zero preamble + pre = np.zeros((N_delay, x.audio.shape[1])) + x.audio = np.concatenate([pre, x.audio]) + + # pad with zeros to ensure that the signal length is a multiple of 20ms + N_frame = x.fs / 50 + if len(x.audio) % N_frame != 0: + N_pad = int(N_frame - len(x.audio) % N_frame) + + # insert all-zero preamble + pre = np.zeros((N_pad, x.audio.shape[1])) + x.audio = np.concatenate([pre, x.audio]) + + # add source signal to the array of source signals + y.fs = x.fs + if y.audio is None: + y.audio = x.audio.copy() + else: + # pad with zeros to have equal length of all source signals + if x.audio.shape[0] > y.audio.shape[0]: + y.audio = np.vstack( + ( + y.audio, + np.zeros( + ( + x.audio.shape[0] - y.audio.shape[0], + y.audio.shape[1], + ) + ), + ) + ) + elif y.audio.shape[0] > x.audio.shape[0]: + x.audio = np.vstack( + ( + x.audio, + np.zeros( + ( + y.audio.shape[0] - x.audio.shape[0], + x.audio.shape[1], + ) + ), + ) + ) + + # superimpose + y.audio += x.audio + + # append pre-amble and post-amble to all sources + if cfg.preamble != 0.0: + # ensure that pre-amble is a multiple of 20ms + N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs) + + # insert all-zero preamble to all sources + pre = np.zeros((N_pre, y.audio.shape[1])) + y.audio = np.concatenate([pre, y.audio]) + + if cfg.postamble != 0.0: + # ensure that post-mable is a multiple of 20ms + N_post = int(floor(cfg.postamble * 50) / 50 * y.fs) + + # append all-zero postamble to all sources + post = np.zeros((N_post, y.audio.shape[1])) + y.audio = np.concatenate([y.audio, post]) + + # add random noise + if cfg.add_low_level_random_noise: + # create uniformly distributed noise between -4 and 4 + np.random.seed(SEED_RANDOM_NOISE) + noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype( + "float" + ) + + # superimpose + y.audio += noise + + # write the reverberated audio into output file + audiofile.write( + os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.basename(scene["name"])), + y.audio, + y.fs, + ) + + # convert to binaural if option chosen + if cfg.binaural_path != "": + binaudio = audio.fromtype("BINAURAL") + binaudio.fs = y.fs + convert.format_conversion(y, binaudio) + audiofile.write( + os.path.join(cfg.binaural_path, scene["name"]), + binaudio.audio, + binaudio.fs, + ) + + return diff --git a/ivas_processing_scripts/generation/process_foa_items.py b/ivas_processing_scripts/generation/process_foa_items.py index 31be77e4..56c7f332 100644 --- a/ivas_processing_scripts/generation/process_foa_items.py +++ b/ivas_processing_scripts/generation/process_foa_items.py @@ -89,143 +89,200 @@ def generate_foa_items( # setup binaural rendering if "binaural_path" not in cfg.__dict__: cfg.binaural_path = "" + + # set the listening lab designator + if "listening_lab" not in cfg.__dict__: + cfg.listening_lab = "l" + + # set the language designator + if "language" not in cfg.__dict__: + cfg.language = "EN" + + # set the experiment designator + if "exp" not in cfg.__dict__: + cfg.exp = "p01" + + # set the provider + if "provider" not in cfg.__dict__: + cfg.provider = "g" + + # set the prefix for all input filenames + if "use_input_prefix" not in cfg.__dict__: + cfg.use_input_prefix = "" + else: + # replace file designators + cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "l", cfg.listening_lab ) + cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "LL", cfg.language ) + cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "eee", cfg.exp ) + + # set the prefix for all IR filenames + if "use_IR_prefix" not in cfg.__dict__: + cfg.use_IR_prefix = "" + else: + # replace file designators + cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "p", cfg.provider ) + cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "LL", cfg.language ) + cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "eee", cfg.exp ) + + # set the prefix for all output filenames + if "use_output_prefix" not in cfg.__dict__: + cfg.use_output_prefix = None + else: + # replace file designators + cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "l", cfg.listening_lab ) + cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "eee", cfg.exp ) + + # set multiprocessing + if "multiprocessing" not in cfg.__dict__: + cfg.multiprocessing = True + + apply_func_parallel( + generate_foa_scene, + zip(cfg.scenes.keys(), cfg.scenes.values(), repeat(cfg), repeat(logger)), + None, + "mp" if cfg.multiprocessing else None, + ) + + return - # repeat for all source files - for scene_name, scene in cfg.scenes.items(): - logger.info( - f"Processing scene: {scene_name} out of {N_scenes} scenes, name: {scene['name']}" - ) - - # extract the number of audio sources - N_sources = len(np.atleast_1d(scene["source"])) - # read the overlap length - if "overlap" in scene.keys(): - source_overlap = float(scene["overlap"]) +def generate_foa_scene( + scene_name: str, scene: dict, cfg: config.TestConfig, logger: logging.Logger +): + logger.info( + f"Processing scene: {scene_name} out of {len(cfg.scenes)} scenes, name: {scene['name']}" + ) + + # extract the number of audio sources + N_sources = len(np.atleast_1d(scene["source"])) + + # read the overlap length + if "overlap" in scene.keys(): + source_overlap = float(scene["overlap"]) + else: + source_overlap = 0.0 + + y = audio.SceneBasedAudio("FOA") + for i in range(N_sources): + + # parse parameters from the scene description + source_file = np.atleast_1d(scene["source"])[i] + IR_file = np.atleast_1d(scene["IR"])[i] + + logger.info(f"Convolving {source_file} with {IR_file}") + + # read source file + x = audio.fromfile("MONO", os.path.join(cfg.input_path, os.path.dirname(source_file), cfg.use_input_prefix + os.path.basename(source_file)), fs=cfg.fs) + + # read the IR file + IR = audio.fromfile("FOA", os.path.join(cfg.IR_path, os.path.dirname(IR_file), cfg.use_IR_prefix + os.path.basename(IR_file)), fs=cfg.IR_fs) + + # convolve with FOA IR + x = reverb_foa(x, IR) + + # adjust the level of the foa signal + _, scale_factor, _ = get_loudness(x, cfg.loudness, "BINAURAL") + x.audio *= scale_factor + + # shift the second (and all other) source files (positive shift creates overlap, negative shift creates a gap) + if i > 0 and source_overlap != 0.0: + # get the length of the first source file + N_delay = len(y.audio[:, 0]) + + # add the shift + N_delay += int(-source_overlap * x.fs) + + # insert all-zero preamble + pre = np.zeros((N_delay, x.audio.shape[1])) + x.audio = np.concatenate([pre, x.audio]) + + # pad with zeros to ensure that the signal length is a multiple of 20ms + N_frame = x.fs / 50 + if len(x.audio) % N_frame != 0: + N_pad = int(N_frame - len(x.audio) % N_frame) + + # insert all-zero preamble + pre = np.zeros((N_pad, x.audio.shape[1])) + x.audio = np.concatenate([pre, x.audio]) + + # add source signal to the array of source signals + y.fs = x.fs + if y.audio is None: + y.audio = x.audio else: - source_overlap = 0.0 - - y = audio.SceneBasedAudio("FOA") - for i in range(N_sources): - # parse parameters from the scene description - source_file = np.atleast_1d(scene["source"])[i] - IR_file = np.atleast_1d(scene["IR"])[i] - - logger.info(f"Convolving {source_file} with {IR_file}") - - # read source file - x = audio.fromfile( - "MONO", os.path.join(cfg.input_path, source_file), fs=cfg.fs - ) - - # read the IR file - IR = audio.fromfile("FOA", os.path.join(cfg.IR_path, IR_file), fs=cfg.IR_fs) - - # convolve with FOA IR - x = reverb_foa(x, IR) - - # adjust the level of the foa signal - _, scale_factor, _ = get_loudness(x, cfg.loudness, "BINAURAL") - x.audio *= scale_factor - - # shift the second (and all other) source files (positive shift creates overlap, negative shift creates a gap) - if i > 0 and source_overlap != 0.0: - # get the length of the first source file - N_delay = len(y.audio[:, 0]) - - # add the shift - N_delay += int(-source_overlap * x.fs) - - # insert all-zero preamble - pre = np.zeros((N_delay, x.audio.shape[1])) - x.audio = np.concatenate([pre, x.audio]) - - # pad with zeros to ensure that the signal length is a multiple of 20ms - N_frame = x.fs / 50 - if len(x.audio) % N_frame != 0: - N_pad = int(N_frame - len(x.audio) % N_frame) - - # insert all-zero preamble - pre = np.zeros((N_pad, x.audio.shape[1])) - x.audio = np.concatenate([pre, x.audio]) - - # add source signal to the array of source signals - y.fs = x.fs - if y.audio is None: - y.audio = x.audio - else: - # pad with zeros to have equal length of all source signals - if x.audio.shape[0] > y.audio.shape[0]: - y.audio = np.vstack( - ( - y.audio, - np.zeros( - ( - x.audio.shape[0] - y.audio.shape[0], - y.audio.shape[1], - ) - ), - ) + # pad with zeros to have equal length of all source signals + if x.audio.shape[0] > y.audio.shape[0]: + y.audio = np.vstack( + ( + y.audio, + np.zeros( + ( + x.audio.shape[0] - y.audio.shape[0], + y.audio.shape[1], + ) + ), ) - elif y.audio.shape[0] > x.audio.shape[0]: - x.audio = np.vstack( - ( - x.audio, - np.zeros( - ( - y.audio.shape[0] - x.audio.shape[0], - x.audio.shape[1], - ) - ), - ) + ) + elif y.audio.shape[0] > x.audio.shape[0]: + x.audio = np.vstack( + ( + x.audio, + np.zeros( + ( + y.audio.shape[0] - x.audio.shape[0], + x.audio.shape[1], + ) + ), ) - - # superimpose - y.audio += x.audio - - # append pre-amble and post-amble to all sources - if cfg.preamble != 0.0: - # ensure that pre-amble is a multiple of 20ms - N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs) - - # insert all-zero preamble to all sources - pre = np.zeros((N_pre, y.audio.shape[1])) - y.audio = np.concatenate([pre, y.audio]) - - if cfg.postamble != 0.0: - # ensure that post-mable is a multiple of 20ms - N_post = int(floor(cfg.postamble * 50) / 50 * y.fs) - - # append all-zero postamble to all sources - post = np.zeros((N_post, y.audio.shape[1])) - y.audio = np.concatenate([y.audio, post]) - - # add random noise - if cfg.add_low_level_random_noise: - # create uniformly distributed noise between -4 and 4 - np.random.seed(SEED_RANDOM_NOISE) - noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype( - "float" - ) + ) # superimpose - y.audio += noise + y.audio += x.audio + + # append pre-amble and post-amble to all sources + if cfg.preamble != 0.0: + # ensure that pre-amble is a multiple of 20ms + N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs) + + # insert all-zero preamble to all sources + pre = np.zeros((N_pre, y.audio.shape[1])) + y.audio = np.concatenate([pre, y.audio]) + + if cfg.postamble != 0.0: + # ensure that post-mable is a multiple of 20ms + N_post = int(floor(cfg.postamble * 50) / 50 * y.fs) + + # append all-zero postamble to all sources + post = np.zeros((N_post, y.audio.shape[1])) + y.audio = np.concatenate([y.audio, post]) + + # add random noise + if cfg.add_low_level_random_noise: + # create uniformly distributed noise between -4 and 4 + np.random.seed(SEED_RANDOM_NOISE) + noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype( + "float" + ) - # write the reverberated audio into output file - output_filename = scene["name"] + # superimpose + y.audio += noise + + # write the reverberated audio into output file + audiofile.write( + os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.basename(scene["name"])), + y.audio, + y.fs, + ) + + # convert to binaural if option chosen + if cfg.binaural_path != "": + binaudio = audio.fromtype("BINAURAL") + binaudio.fs = y.fs + convert.format_conversion(y, binaudio) audiofile.write( - os.path.join(cfg.output_path, output_filename), y.audio, y.fs - ) # !!!! TBD: replace all os.path.xxx operations with the Path object - - # convert to binaural if option chosen - if cfg.binaural_path != "": - binaudio = audio.fromtype("BINAURAL") - binaudio.fs = y.fs - convert.format_conversion(y, binaudio) - audiofile.write( - os.path.join(cfg.binaural_path, output_filename), - binaudio.audio, - binaudio.fs, - ) # !!!! TBD: replace all os.path.xxx operations with the Path object + os.path.join(cfg.binaural_path, scene["name"]), + binaudio.audio, + binaudio.fs, + ) return diff --git a/ivas_processing_scripts/generation/process_ism1_items.py b/ivas_processing_scripts/generation/process_ism1_items.py new file mode 100644 index 00000000..f8c056c8 --- /dev/null +++ b/ivas_processing_scripts/generation/process_ism1_items.py @@ -0,0 +1,357 @@ +#!/usr/bin/env python3 + +# +# (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository. All Rights Reserved. +# +# This software is protected by copyright law and by international treaties. +# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository retain full ownership rights in their respective contributions in +# the software. This notice grants no license of any kind, including but not limited to patent +# license, nor is any license granted by implication, estoppel or otherwise. +# +# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making +# contributions. +# +# This software is provided "AS IS", without any express or implied warranties. The software is in the +# development stage. It is intended exclusively for experts who have experience with such software and +# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability +# and fitness for a particular purpose are hereby disclaimed and excluded. +# +# Any dispute, controversy or claim arising under or in relation to providing this software shall be +# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in +# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and +# the United Nations Convention on Contracts on the International Sales of Goods. +# + +import csv +import logging +import os +from math import floor +from itertools import repeat, groupby + +import numpy as np + +from ivas_processing_scripts.audiotools import audio, audiofile +from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness +from ivas_processing_scripts.generation import config +from ivas_processing_scripts.utils import apply_func_parallel + +SEED_RANDOM_NOISE = 0 + + +# function for converting nd numpy array to strings with 2 decimal digits +def csv_formatdata(data): + for row in data: + yield ["%0.2f" % v for v in row] + +# function for searching sequences of same the same character and replacing it by another string +def replace_char_seq_with_string(str, char_seq, repl_str): + result = [] + + # find groups of consecutive letters + groups = ["".join(list(g)) for k, g in groupby(str)] + + # limit the length of the replacement string by the length of the character sequence + repl_str = repl_str[:len(char_seq)] + + # replace each occurence of the sequence of characters + for g in groups: + if char_seq in g: + result.append(repl_str) + else: + result.append(g) + + return "".join(result) + + +def generate_ism1_items( + cfg: config.TestConfig, + logger: logging.Logger, +): + """Generate ISM2 items with metadata from mono items based on scene description""" + + # set the target level + if "loudness" not in cfg.__dict__: + cfg.loudness = -26 + + # set the fs + if "fs" not in cfg.__dict__: + cfg.fs = 48000 + + # set the pre-amble and post-amble + if "preamble" not in cfg.__dict__: + cfg.preamble = 0.0 + + if "postamble" not in cfg.__dict__: + cfg.postamble = 0.0 + + # set the pre-amble and post-amble + if "add_low_level_random_noise" not in cfg.__dict__: + cfg.add_low_level_random_noise = False + + # set the listening lab designator + if "listening_lab" not in cfg.__dict__: + cfg.listening_lab = "l" + + # set the language designator + if "language" not in cfg.__dict__: + cfg.language = "EN" + + # set the experiment designator + if "exp" not in cfg.__dict__: + cfg.exp = "p06" + + # set the provider + if "provider" not in cfg.__dict__: + cfg.provider = "g" + + # set the prefix for all input filenames + if "use_input_prefix" not in cfg.__dict__: + cfg.use_input_prefix = "" + else: + # replace file designators + cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "l", cfg.listening_lab ) + cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "LL", cfg.language ) + cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "eee", cfg.exp ) + + # set the prefix for all output filenames + if "use_output_prefix" not in cfg.__dict__: + cfg.use_output_prefix = None + else: + # replace file designators + cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "l", cfg.listening_lab ) + cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "eee", cfg.exp ) + + # set multiprocessing + if "multiprocessing" not in cfg.__dict__: + cfg.multiprocessing = True + + apply_func_parallel( + generate_ism1_scene, + zip(cfg.scenes.keys(), cfg.scenes.values(), repeat(cfg), repeat(logger)), + None, + # "mp" if cfg.multiprocessing else None, + None, + ) + + return + +def generate_ism1_scene( + scene_name: str, scene: dict, cfg: config.TestConfig, logger: logging.Logger +): + logger.info( + f"Processing {scene_name} out of {len(cfg.scenes)} scenes, name: {scene['name']}" + ) + + # extract the number of audio sources + N_sources = len(np.atleast_1d(scene["source"])) + + # initialize output arrays + y = audio.ChannelBasedAudio("MONO") + y_meta = None + + # read the overlap length + if "overlap" in scene.keys(): + source_overlap = float(scene["overlap"]) + else: + source_overlap = 0.0 + + logger.info( + f"Encoding {scene['source']} at position(s) {scene['azimuth']},{scene['elevation']}" + ) + + # repeat for all source files + for i in range(N_sources): + # parse parameters from the scene description + source_file = ( + scene["source"][i] + if isinstance(scene["source"], list) + else scene["source"] + ) + + # read source file + x = audio.fromfile( + "MONO", os.path.join(cfg.input_path, os.path.dirname(source_file), cfg.use_input_prefix + os.path.basename(source_file)), fs=cfg.fs + ) + + # get the number of frames (multiple of 20ms) + N_frames = int(len(x.audio) / x.fs * 50) + frame_len = int(x.fs / 50) + + # trim the samples from the end to ensure that the signal length is a multiple of 20ms + x.audio = x.audio[: N_frames * frame_len] + + # adjust the level of the source file + _, scale_factor, _ = get_loudness(x, cfg.loudness, "MONO") + x.audio *= scale_factor + + # shift the second (and all other) source files (positive shift creates overlap, negative shift creates a gap) + if i > 0: + # get the length of the first source file + N_delay = len(y.audio) + + # add the shift value (ensure that the shift is a multiple of 20ms) + N_delay += int(floor(-source_overlap * 50) / 50 * x.fs) + + # insert all-zero signal + pre = np.zeros((N_delay, 1)) + x.audio = np.concatenate([pre, x.audio]) + + # pad with zeros to ensure that the signal length is a multiple of 20ms + if len(x.audio) % frame_len != 0: + # pad the source signal + N_pad = int(frame_len - len(x.audio) % frame_len) + post = np.zeros((N_pad, 1)) + x.audio = np.concatenate([x.audio, post]) + + # superimpose all source signals together + y.fs = x.fs + if y.audio is None: + y.audio = x.audio.copy() + else: + y.audio.resize(x.audio.shape, refcheck=False) + y.audio += x.audio + + # process azimuth and elevation + source_azi = scene["azimuth"] + source_ele = scene["elevation"] + + N_frames = int(len(y.audio) / y.fs * 50) + + # read azimuth information and create array + if isinstance(source_azi, str): + if ":" in source_azi: + source_azi = source_azi.split(":") + azi = np.arange( + float(eval(source_azi[0])), + float(eval(source_azi[2])), + float(eval(source_azi[1])), + ) + else: + azi = np.array(float(eval(source_azi)), ndmin=1)[:N_frames] + else: + azi = np.array(source_azi, ndmin=1)[:N_frames] + + # ensure that azimuth array has N_frames values + if len(azi) > N_frames: + # cut the array of azimuth values + azi = azi[:N_frames] + elif len(azi) < N_frames: + # replicate the last azimuth value + azi = np.append(azi, np.full(N_frames - len(azi), azi[-1])) + + # convert azimuth from 0 .. 360 to -180 .. +180 + azi = (azi + 180) % 360 - 180 + + # check if azimuth is from -180 .. +180 + if any(azi > 180) or any(azi < -180): + logger.error( + f"Incorrect value(s) of azimuth: {azi[(azi > 180) | (azi < -180)]}" + ) + + # read elevation information and create array + if isinstance(source_ele, str): + if ":" in source_ele: + source_ele = source_ele.split(":") + ele = np.arange( + float(eval(source_ele[0])), + float(eval(source_ele[2])), + float(eval(source_ele[1])), + ) + else: + ele = np.array(float(eval(source_ele)), ndmin=1)[:N_frames] + else: + ele = np.array(source_ele, ndmin=1)[:N_frames] + + # ensure that elevation array has N_frames values + if len(ele) > N_frames: + # cut the array of elevation values + ele = ele[:N_frames] + elif len(ele) < N_frames: + # replicate the last elevation + ele = np.append(ele, np.full(N_frames - len(ele), ele[-1])) + + # check if elevation is from -90 .. +90 + if any(ele > 90) or any(ele < -90): + logger.error( + f"Incorrect value(s) of elevation: {ele[(ele > 90) | (ele < -90)]}" + ) + + # additional metadata + dist = np.ones(N_frames) # !!!! TBD - check what to do with these metadata + spread = np.zeros(N_frames) + gain = np.ones(N_frames) + + # arrange all metadata fields column-wise into a matrix + y_meta = np.column_stack((azi, ele, dist, spread, gain)) + + # append pre-amble and post-amble to all sources + if cfg.preamble != 0.0: + # ensure that pre-amble is a multiple of 20ms + N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs) + + # insert all-zero preamble to all sources + pre = np.zeros((N_pre, y.audio.shape[1])) + y.audio = np.concatenate([pre, y.audio]) + + # insert neutral position as a pre-amble to all sources + N_pre = int(N_pre / frame_len) + pre = np.tile( + [0.00, 0.00, 1.00, 0.00, 1.00], (N_pre, 1) + ) + y_meta = np.concatenate([pre, y_meta], axis=0) + + if cfg.postamble != 0.0: + # ensure that post-amble is a multiple of 20ms + N_post = int(floor(cfg.postamble * 50) / 50 * y.fs) + + # append all-zero postamble to all sources + post = np.zeros((N_post, y.audio.shape[1])) + y.audio = np.concatenate([y.audio, post]) + + # append neutral position as a post-amble to all sources + N_post = int(N_post / frame_len) + post = np.tile( + [0.00, 0.00, 1.00, 0.00, 1.00], (N_post, 1) + ) + y_meta = np.concatenate([y_meta, post], axis=0) + + # add random noise + if cfg.add_low_level_random_noise: + # create uniformly distributed noise between -4 and 4 + np.random.seed(SEED_RANDOM_NOISE) + noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype( + "float" + ) + + # superimpose + y.audio += noise + + # write ISM audio stream to the output file + audiofile.write( + os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.basename(scene["name"])), y.audio, y.fs + ) + + # write ISM metadata to the output file in .0.csv format + csv_filename = os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.splitext(os.path.basename(scene["name"]))[0] + f".0.csv") + + with open( + csv_filename, + "w", + newline="", + encoding="utf-8", + ) as f: + # create csv writer + writer = csv.writer(f) + + # write all rows to the .csv file + writer.writerows(csv_formatdata(y_meta)) + + return \ No newline at end of file diff --git a/ivas_processing_scripts/generation/process_ism2_items.py b/ivas_processing_scripts/generation/process_ism2_items.py new file mode 100644 index 00000000..c514f8b6 --- /dev/null +++ b/ivas_processing_scripts/generation/process_ism2_items.py @@ -0,0 +1,427 @@ +#!/usr/bin/env python3 + +# +# (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository. All Rights Reserved. +# +# This software is protected by copyright law and by international treaties. +# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository retain full ownership rights in their respective contributions in +# the software. This notice grants no license of any kind, including but not limited to patent +# license, nor is any license granted by implication, estoppel or otherwise. +# +# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making +# contributions. +# +# This software is provided "AS IS", without any express or implied warranties. The software is in the +# development stage. It is intended exclusively for experts who have experience with such software and +# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability +# and fitness for a particular purpose are hereby disclaimed and excluded. +# +# Any dispute, controversy or claim arising under or in relation to providing this software shall be +# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in +# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and +# the United Nations Convention on Contracts on the International Sales of Goods. +# +import pdb +import csv +import logging +import os +from math import floor +from itertools import repeat, groupby + +import numpy as np + +from ivas_processing_scripts.audiotools import audio, audiofile +from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness +from ivas_processing_scripts.generation import config +from ivas_processing_scripts.utils import apply_func_parallel + +SEED_RANDOM_NOISE = 0 + + +# function for converting nd numpy array to strings with 2 decimal digits +def csv_formatdata(data): + for row in data: + yield ["%0.2f" % v for v in row] + +# function for searching sequences of same the same character and replacing it by another string +def replace_char_seq_with_string(str, char_seq, repl_str): + result = [] + + # find groups of consecutive letters + groups = ["".join(list(g)) for k, g in groupby(str)] + + # limit the length of the replacement string by the length of the character sequence + repl_str = repl_str[:len(char_seq)] + + # replace each occurence of the sequence of characters + for g in groups: + if char_seq in g: + result.append(repl_str) + else: + result.append(g) + + return "".join(result) + + +def generate_ism2_items( + cfg: config.TestConfig, + logger: logging.Logger, +): + """Generate ISM2 items with metadata from mono items based on scene description""" + + # set the target level + if "loudness" not in cfg.__dict__: + cfg.loudness = -26 + + # set the fs + if "fs" not in cfg.__dict__: + cfg.fs = 48000 + + # set the pre-amble and post-amble + if "preamble" not in cfg.__dict__: + cfg.preamble = 0.0 + + if "postamble" not in cfg.__dict__: + cfg.postamble = 0.0 + + # set the pre-amble and post-amble + if "add_low_level_random_noise" not in cfg.__dict__: + cfg.add_low_level_random_noise = False + + # set the listening lab designator + if "listening_lab" not in cfg.__dict__: + cfg.listening_lab = "l" + + # set the language designator + if "language" not in cfg.__dict__: + cfg.language = "EN" + + # set the experiment designator + if "exp" not in cfg.__dict__: + cfg.exp = "p07" + + # set the provider + if "provider" not in cfg.__dict__: + cfg.provider = "g" + + # set the prefix for all input filenames + if "use_input_prefix" not in cfg.__dict__: + cfg.use_input_prefix = "" + else: + # replace file designators + cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "l", cfg.listening_lab ) + cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "LL", cfg.language ) + cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "eee", cfg.exp ) + + # set the prefix for all output filenames + if "use_output_prefix" not in cfg.__dict__: + cfg.use_output_prefix = None + else: + # replace file designators + cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "l", cfg.listening_lab ) + cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "eee", cfg.exp ) + + # set multiprocessing + if "multiprocessing" not in cfg.__dict__: + cfg.multiprocessing = True + + apply_func_parallel( + generate_ism2_scene, + zip(cfg.scenes.keys(), cfg.scenes.values(), repeat(cfg), repeat(logger)), + None, + # "mp" if cfg.multiprocessing else None, + None, + ) + + return + +def generate_ism2_scene( + scene_name: str, scene: dict, cfg: config.TestConfig, logger: logging.Logger +): + logger.info( + f"Processing {scene_name} out of {len(cfg.scenes)} scenes, name: {scene['name']}" + ) + + # extract the number of audio sources + N_sources = len(np.atleast_1d(scene["source"])) + + # initialize output arrays + y = audio.ChannelBasedAudio("STEREO") + y_meta = None + + # read the overlap length + if "overlap" in scene.keys(): + source_overlap = float(scene["overlap"]) + else: + source_overlap = 0.0 + + # repeat for all source files + for i in range(N_sources): + # parse parameters from the scene description + source_file = ( + scene["source"][i] + if isinstance(scene["source"], list) + else scene["source"] + ) + source_azi = ( + scene["azimuth"][i] + if isinstance(scene["azimuth"], list) + else scene["azimuth"] + ) + source_ele = ( + scene["elevation"][i] + if isinstance(scene["elevation"], list) + else scene["elevation"] + ) + + logger.info( + f"Encoding {source_file} at position(s) {source_azi},{source_ele}" + ) + + # read source file + x = audio.fromfile( + "MONO", os.path.join(cfg.input_path, os.path.dirname(source_file), cfg.use_input_prefix + os.path.basename(source_file)), fs=cfg.fs + ) + + # get the number of frames (multiple of 20ms) + N_frames = int(len(x.audio) / x.fs * 50) + frame_len = int(x.fs / 50) + + # trim the samples from the end to ensure that the signal length is a multiple of 20ms + x.audio = x.audio[: N_frames * frame_len] + + # adjust the level of the source file + _, scale_factor, _ = get_loudness(x, cfg.loudness, "MONO") + x.audio *= scale_factor + + # read azimuth information and create array + if isinstance(source_azi, str): + if ":" in source_azi: + source_azi = source_azi.split(":") + azi = np.arange( + float(eval(source_azi[0])), + float(eval(source_azi[2])), + float(eval(source_azi[1])), + ) + else: + azi = np.array(float(eval(source_azi)), ndmin=1)[:N_frames] + else: + azi = np.array(source_azi, ndmin=1)[:N_frames] + + # ensure that azimuth array has N_frames values + if len(azi) > N_frames: + # cut the array of azimuth values + azi = azi[:N_frames] + elif len(azi) < N_frames: + # replicate the last azimuth + azi = np.append(azi, np.full(N_frames - len(azi), azi[-1])) + + # convert azimuth from 0 .. 360 to -180 .. +180 + azi = (azi + 180) % 360 - 180 + + # check if azimuth is from -180 .. +180 + if any(azi > 180) or any(azi < -180): + logger.error( + f"Incorrect value(s) of azimuth: {azi[(azi > 180) | (azi < -180)]}" + ) + + # read elevation information and create array + if isinstance(source_ele, str): + if ":" in source_ele: + source_ele = source_ele.split(":") + ele = np.arange( + float(eval(source_ele[0])), + float(eval(source_ele[2])), + float(eval(source_ele[1])), + ) + else: + ele = np.array(float(eval(source_ele)), ndmin=1)[:N_frames] + else: + ele = np.array(source_ele, ndmin=1)[:N_frames] + + # ensure that elevation array has N_frames values + if len(ele) > N_frames: + # cut the array of elevation values + ele = ele[:N_frames] + elif len(ele) < N_frames: + # replicate the last elevation + ele = np.append(ele, np.full(N_frames - len(ele), ele[-1])) + + # check if elevation is from -90 .. +90 + if any(ele > 90) or any(ele < -90): + logger.error( + f"Incorrect value(s) of elevation: {ele[(ele > 90) | (ele < -90)]}" + ) + + # additional metadata + dist = np.ones(N_frames) # !!!! TBD - check what to do with these metadata + spread = np.zeros(N_frames) + gain = np.ones(N_frames) + + # arrange all metadata fields column-wise into a matrix + x_meta = np.column_stack((azi, ele, dist, spread, gain)) + + # shift the second (and all other) source files (positive shift creates overlap, negative shift creates a gap) + if i > 0: + # get the length of the first source file + N_delay = len(y.audio[:, 0]) + + # add the shift value (ensure that the shift is a multiple of 20ms) + N_delay += int(floor(-source_overlap * 50) / 50 * x.fs) + + # insert all-zero signal + pre = np.zeros((N_delay, x.audio.shape[1])) + x.audio = np.concatenate([pre, x.audio]) + + # insert neutral position as a pre-amble + N_delay = int(N_delay / frame_len) + # use neutral position for padding + pre = np.tile( + [0.00, 0.00, 1.00, 0.00, 1.00], (N_delay, 1) + ) + x_meta = np.concatenate([pre, x_meta]) + + # pad with zeros to ensure that the signal length is a multiple of 20ms + if len(x.audio) % frame_len != 0: + # pad the source signal + N_pad = int(frame_len - len(x.audio) % frame_len) + post = np.zeros((N_pad, x.audio.shape[1])) + x.audio = np.concatenate([x.audio, post]) + + # pad the metadata + N_pad = int(len(x.audio) / frame_len) - len(x_meta) + if N_pad > 0: + # use neutral position for padding + post = np.tile( + [0.00, 0.00, 1.00, 0.00, 1.00], (N_pad, 1) + ) + x_meta = np.concatenate([x_meta, post]) + + # add source signal to the array of all source signals + y.fs = x.fs + if y.audio is None: + y.audio = x.audio.copy() + else: + # pad with zeros to have the same length of all source signals + if x.audio.shape[0] > y.audio.shape[0]: + y.audio = np.vstack( + ( + y.audio, + np.zeros( + (x.audio.shape[0] - y.audio.shape[0], y.audio.shape[1]) + ), + ) + ) + elif y.audio.shape[0] > x.audio.shape[0]: + x.audio = np.vstack( + ( + x.audio, + np.zeros( + (y.audio.shape[0] - x.audio.shape[0], x.audio.shape[1]) + ), + ) + ) + y.audio = np.hstack((y.audio, x.audio)) + + # add metadata to the array of all metadata + # make sure x_meta is a 3d array + x_meta = x_meta[np.newaxis, :] + if y_meta is None: + y_meta = x_meta + else: + N_srcs = y_meta.shape[0] + N_meta_features = y_meta.shape[2] + + # append the last position of the metadata to have equal length of all metadata + if x_meta.shape[1] > y_meta.shape[1]: + N_delta = x_meta.shape[1] - y_meta.shape[1] + # reshape to 2d array + y_meta = y_meta.reshape(y_meta.shape[1], -1) + # repeat last row N_delta times and append to the array + y_meta = np.vstack((y_meta, np.tile(y_meta[-1, :], (N_delta, 1)))) + # reshape back to 3d array + y_meta = y_meta.reshape(N_srcs, -1, N_meta_features) + elif y_meta.shape[1] > x_meta.shape[1]: + N_delta = y_meta.shape[1] - x_meta.shape[1] + # reshape to 2d array + x_meta = x_meta.reshape(x_meta.shape[1], -1) + # repeat last row N_delta times and append to the array + x_meta = np.vstack((x_meta, np.tile(x_meta[-1, :], (N_delta, 1)))) + # reshape back to 3d array + x_meta = np.expand_dims(x_meta, axis=0) + + y_meta = np.concatenate([y_meta, x_meta]) + + # append pre-amble and post-amble to all sources + if cfg.preamble != 0.0: + # ensure that pre-amble is a multiple of 20ms + N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs) + + # insert all-zero preamble to all sources + pre = np.zeros((N_pre, y.audio.shape[1])) + y.audio = np.concatenate([pre, y.audio]) + + # insert neutral position as a pre-amble to all sources + N_pre = int(N_pre / frame_len) + pre = np.tile( + [0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_pre, 1) + ) + y_meta = np.concatenate([pre, y_meta], axis=1) + + if cfg.postamble != 0.0: + # ensure that post-mable is a multiple of 20ms + N_post = int(floor(cfg.postamble * 50) / 50 * y.fs) + + # append all-zero postamble to all sources + post = np.zeros((N_post, y.audio.shape[1])) + y.audio = np.concatenate([y.audio, post]) + + # append neutral position as a post-amble to all sources + N_post = int(N_post / frame_len) + post = np.tile( + [0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_post, 1) + ) + y_meta = np.concatenate([y_meta, post], axis=1) + + # add random noise + if cfg.add_low_level_random_noise: + # create uniformly distributed noise between -4 and 4 + np.random.seed(SEED_RANDOM_NOISE) + noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype( + "float" + ) + + # superimpose + y.audio += noise + + # write individual ISM audio streams to the output file in an interleaved format + audiofile.write( + os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.basename(scene["name"])), y.audio, y.fs + ) + + # write individual ISM metadata to output files in .csv format + for i in range(N_sources): + # generate .csv filename (should end with .0.csv, .1.csv, ...) + csv_filename = os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.splitext(os.path.basename(scene["name"]))[0] + f".{i}.csv") + + with open( + csv_filename, + "w", + newline="", + encoding="utf-8", + ) as f: + # create csv writer + writer = csv.writer(f) + + # write all rows to the .csv file + writer.writerows(csv_formatdata(y_meta[i])) + + return \ No newline at end of file diff --git a/ivas_processing_scripts/generation/process_ism_items.py b/ivas_processing_scripts/generation/process_ism_items.py deleted file mode 100644 index ff123663..00000000 --- a/ivas_processing_scripts/generation/process_ism_items.py +++ /dev/null @@ -1,360 +0,0 @@ -#!/usr/bin/env python3 - -# -# (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, -# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., -# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, -# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other -# contributors to this repository. All Rights Reserved. -# -# This software is protected by copyright law and by international treaties. -# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, -# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., -# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, -# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other -# contributors to this repository retain full ownership rights in their respective contributions in -# the software. This notice grants no license of any kind, including but not limited to patent -# license, nor is any license granted by implication, estoppel or otherwise. -# -# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making -# contributions. -# -# This software is provided "AS IS", without any express or implied warranties. The software is in the -# development stage. It is intended exclusively for experts who have experience with such software and -# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability -# and fitness for a particular purpose are hereby disclaimed and excluded. -# -# Any dispute, controversy or claim arising under or in relation to providing this software shall be -# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in -# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and -# the United Nations Convention on Contracts on the International Sales of Goods. -# - -import csv -import logging -import os -from math import floor - -import numpy as np - -from ivas_processing_scripts.audiotools import audio, audiofile -from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness -from ivas_processing_scripts.generation import config - -SEED_RANDOM_NOISE = 0 - - -# function for converting nd numpy array to strings with 2 decimal digits -def csv_formatdata(data): - for row in data: - yield ["%0.2f" % v for v in row] - - -def generate_ism_items( - cfg: config.TestConfig, - logger: logging.Logger, -): - """Generate ISM items with metadata from mono items based on scene description""" - - # get the number of scenes - N_scenes = len(cfg.scenes) - - # set the target level - if "loudness" not in cfg.__dict__: - cfg.loudness = -26 - - # set the fs - if "fs" not in cfg.__dict__: - cfg.fs = 48000 - - # set the pre-amble and post-amble - if "preamble" not in cfg.__dict__: - cfg.preamble = 0.0 - - if "postamble" not in cfg.__dict__: - cfg.postamble = 0.0 - - # set the pre-amble and post-amble - if "add_low_level_random_noise" not in cfg.__dict__: - cfg.add_low_level_random_noise = False - - for scene_name, scene in cfg.scenes.items(): - logger.info( - f"Processing {scene_name} out of {N_scenes} scenes, name: {scene['name']}" - ) - - # extract the number of audio sources - N_sources = len(np.atleast_1d(scene["source"])) - - # initialize output variables - if format == "ISM2": - y = audio.ChannelBasedAudio("STEREO") - else: - y = audio.ChannelBasedAudio("MONO") - y_meta = None - - # read the overlap length - if "overlap" in scene.keys(): - source_overlap = float(scene["overlap"]) - else: - source_overlap = 0.0 - - # repeat for all source files - for i in range(N_sources): - # parse parameters from the scene description - source_file = ( - scene["source"][i] - if isinstance(scene["source"], list) - else scene["source"] - ) - source_azi = ( - scene["azimuth"][i] - if isinstance(scene["azimuth"], list) - else scene["azimuth"] - ) - source_ele = ( - scene["elevation"][i] - if isinstance(scene["elevation"], list) - else scene["elevation"] - ) - - logger.info( - f"Encoding {source_file} at position(s) {source_azi},{source_ele}" - ) - - # read source file - x = audio.fromfile( - "MONO", os.path.join(cfg.input_path, source_file), fs=cfg.fs - ) - - # get the number of frames (multiple of 20ms) - N_frames = int(len(x.audio) / x.fs * 50) - frame_len = int(x.fs / 50) - - # trim the samples from the end to ensure that the signal length is a multiple of 20ms - x.audio = x.audio[: N_frames * frame_len] - - # adjust the level of the source file - _, scale_factor, _ = get_loudness(x, cfg.loudness, "MONO") - x.audio *= scale_factor - - # read azimuth information and create array - if isinstance(source_azi, str): - if ":" in source_azi: - source_azi = source_azi.split(":") - azi = np.arange( - float(eval(source_azi[0])), - float(eval(source_azi[2])), - float(eval(source_azi[1])), - ) - else: - azi = np.array(float(eval(source_azi)), ndmin=1)[:N_frames] - else: - azi = np.array(source_azi, ndmin=1)[:N_frames] - - # ensure that azimuth array has N_frames values - if len(azi) > N_frames: - # cut the array of azimuth values - azi = azi[:N_frames] - elif len(azi) < N_frames: - # replicate the last azimuth - azi = np.append(azi, np.full(N_frames - len(azi), azi[-1])) - - # convert azimuth from 0 .. 360 to -180 .. +180 - azi = (azi + 180) % 360 - 180 - - # check if azimuth is from -180 .. +180 - if any(azi > 180) or any(azi < -180): - logger.error( - f"Incorrect value(s) of azimuth: {azi[(azi > 180) | (azi < -180)]}" - ) - - # read elevation information and create array - if isinstance(source_ele, str): - if ":" in source_ele: - source_ele = source_ele.split(":") - ele = np.arange( - float(eval(source_ele[0])), - float(eval(source_ele[2])), - float(eval(source_ele[1])), - ) - else: - ele = np.array(float(eval(source_ele)), ndmin=1)[:N_frames] - else: - ele = np.array(source_ele, ndmin=1)[:N_frames] - - # ensure that elevation array has N_frames values - if len(ele) > N_frames: - # cut the array of elevation values - ele = ele[:N_frames] - elif len(ele) < N_frames: - # replicate the last elevation - ele = np.append(ele, np.full(N_frames - len(ele), ele[-1])) - - # check if elevation is from -90 .. +90 - if any(ele > 90) or any(ele < -90): - logger.error( - f"Incorrect value(s) of elevation: {ele[(ele > 90) | (ele < -90)]}" - ) - - # additional metadata - dist = np.ones(N_frames) # !!!! TBD - check what to do with these metadata - spread = np.zeros(N_frames) - gain = np.ones(N_frames) - - # arrange all metadata fields column-wise into a matrix - x_meta = np.column_stack((azi, ele, dist, spread, gain)) - - # shift the second (and all other) source files (positive shift creates overlap, negative shift creates a gap) - if i > 0 and source_overlap != 0.0: - # get the length of the first source file - N_delay = len(y.audio[:, 0]) - - # add the shift value (ensure that the shift is a multiple of 20ms) - N_delay += int(floor(-source_overlap * 50) / 50 * x.fs) - - # insert all-zero signal - pre = np.zeros((N_delay, x.audio.shape[1])) - x.audio = np.concatenate([pre, x.audio]) - - # insert neutral position as a pre-amble - N_delay = int(N_delay / frame_len) - pre = np.tile( - [0.00, 0.00, 1.00, 0.00, 1.00], (N_delay, 1) - ) # !!!! TBD - check if we should insert netrual position or the first position of the metadata - x_meta = np.concatenate([pre, x_meta]) - - # pad with zeros to ensure that the signal length is a multiple of 20ms - if len(x.audio) % frame_len != 0: - # pad the source signal - N_pad = int(frame_len - len(x.audio) % frame_len) - post = np.zeros((N_pad, x.audio.shape[1])) - x.audio = np.concatenate([x.audio, post]) - - # pad the metadata - N_pad = int(len(x.audio) / frame_len) - len(x_meta) - if N_pad > 0: - post = np.tile( - [0.00, 0.00, 1.00, 0.00, 1.00], (N_pad, 1) - ) # !!!! TBD - check if we should insert netrual position or the first position of the metadata - x_meta = np.concatenate([x_meta, post]) - - # add source signal to the array of all source signals - y.fs = x.fs - if y.audio is None: - y.audio = x.audio - else: - # pad with zeros to have the same length of all source signals - if x.audio.shape[0] > y.audio.shape[0]: - y.audio = np.vstack( - ( - y.audio, - np.zeros( - (x.audio.shape[0] - y.audio.shape[0], y.audio.shape[1]) - ), - ) - ) - elif y.audio.shape[0] > x.audio.shape[0]: - x.audio = np.vstack( - ( - x.audio, - np.zeros( - (y.audio.shape[0] - x.audio.shape[0], x.audio.shape[1]) - ), - ) - ) - y.audio = np.hstack((y.audio, x.audio)) - - # add metadata to the array of all metadata - # make sure x_meta is a 3d array - x_meta = x_meta[np.newaxis, :] - if y_meta is None: - y_meta = x_meta - else: - N_srcs = y_meta.shape[0] - N_meta_features = y_meta.shape[2] - - # append the last position of the metadata to have equal length of all metadata - if x_meta.shape[1] > y_meta.shape[1]: - N_delta = x_meta.shape[1] - y_meta.shape[1] - # reshape to 2d array - y_meta = y_meta.reshape(y_meta.shape[1], -1) - # repeat last row N_delta times and append to the array - y_meta = np.vstack((y_meta, np.tile(y_meta[-1, :], (N_delta, 1)))) - # reshape back to 3d array - y_meta = y_meta.reshape(N_srcs, -1, N_meta_features) - elif y_meta.shape[1] > x_meta.shape[1]: - N_delta = y_meta.shape[1] - x_meta.shape[1] - # reshape to 2d array - x_meta = x_meta.reshape(x_meta.shape[1], -1) - # repeat last row N_delta times and append to the array - x_meta = np.vstack((x_meta, np.tile(x_meta[-1, :], (N_delta, 1)))) - # reshape back to 3d array - x_meta = np.expand_dims(x_meta, axis=0) - - y_meta = np.concatenate([y_meta, x_meta]) - - # append pre-amble and post-amble to all sources - if cfg.preamble != 0.0: - # ensure that pre-amble is a multiple of 20ms - N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs) - - # insert all-zero preamble to all sources - pre = np.zeros((N_pre, y.audio.shape[1])) - y.audio = np.concatenate([pre, y.audio]) - - # insert neutral position as a pre-amble to all sources - N_pre = int(N_pre / frame_len) - pre = np.tile( - [0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_pre, 1) - ) # !!!! TBD - check if we should insert netrual position or the first position of the metadata - y_meta = np.concatenate([pre, y_meta], axis=1) - - if cfg.postamble != 0.0: - # ensure that post-mable is a multiple of 20ms - N_post = int(floor(cfg.postamble * 50) / 50 * y.fs) - - # append all-zero postamble to all sources - post = np.zeros((N_post, y.audio.shape[1])) - y.audio = np.concatenate([y.audio, post]) - - # append neutral position as a post-amble to all sources - N_post = int(N_post / frame_len) - post = np.tile( - [0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_post, 1) - ) # !!!! TBD - check if we should insert netrual position or the last position of the metadata - y_meta = np.concatenate([y_meta, post], axis=1) - - # add random noise - if cfg.add_low_level_random_noise: - # create uniformly distributed noise between -4 and 4 - np.random.seed(SEED_RANDOM_NOISE) - noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype( - "float" - ) - - # superimpose - y.audio += noise - - # write individual ISM audio streams to the output file in an interleaved format - output_filename = scene["name"] - audiofile.write( - os.path.join(cfg.output_path, output_filename), y.audio, y.fs - ) # !!!! TBD: replace all os.path.xxx operations with the Path object - - # write individual ISM metadata to output files in .csv format - for i in range(N_sources): - # generate .csv filename (should end with .0.csv, .1.csv, ...) - csv_filename = os.path.normpath(f"{output_filename}.{i}.csv") - - with open( - os.path.join(cfg.output_path, csv_filename), - "w", - newline="", - encoding="utf-8", - ) as f: - # create csv writer - writer = csv.writer(f) - - # write all rows to the .csv file - writer.writerows(csv_formatdata(y_meta[i])) diff --git a/ivas_processing_scripts/generation/process_stereo_items.py b/ivas_processing_scripts/generation/process_stereo_items.py index 10ac66bf..7f4ca115 100644 --- a/ivas_processing_scripts/generation/process_stereo_items.py +++ b/ivas_processing_scripts/generation/process_stereo_items.py @@ -29,7 +29,7 @@ # accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and # the United Nations Convention on Contracts on the International Sales of Goods. # -import pdb + import logging import os from itertools import repeat, groupby @@ -156,6 +156,7 @@ def generate_stereo_items( None, "mp" if cfg.multiprocessing else None, ) + return @@ -169,9 +170,6 @@ def generate_stereo_scene( # extract the number of audio sources N_sources = len(np.atleast_1d(scene["source"])) - # read the IR source file (check if stereo or two mono files were provided) - # source_IR = np.atleast_1d(scene["IR"]) - # read the overlap length if "overlap" in scene.keys(): source_overlap = float(scene["overlap"]) @@ -201,7 +199,7 @@ def generate_stereo_scene( x.audio *= scale_factor # shift the second (and all other) source files (positive shift creates overlap, negative shift creates a gap) - if i > 0 and source_overlap != 0.0: + if i > 0: # get the length of the first source file N_delay = len(y.audio[:, 0]) @@ -215,7 +213,7 @@ def generate_stereo_scene( # add source signal to the array of source signals y.fs = x.fs if y.audio is None: - y.audio = x.audio + y.audio = x.audio.copy() else: # pad with zeros to have equal length of all source signals if x.audio.shape[0] > y.audio.shape[0]: @@ -282,7 +280,6 @@ def generate_stereo_scene( y.audio += noise # write the reverberated audio into output file - # output_filename = cfg.listening_lab + cfg.exp + scene["name"] + ".wav" audiofile.write( os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.basename(scene["name"])), y.audio, -- GitLab From 0540c0f3fa6d60d82da84cdf06897d321aa85ddb Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Thu, 8 Jun 2023 10:14:24 +0200 Subject: [PATCH 07/11] updates to P800-7,8,9 --- item_gen_configs/P800-7.yml | 6 +- item_gen_configs/P800-8.yml | 376 ++++++++++-------- item_gen_configs/P800-9.yml | 376 ++++++++++-------- .../generation/process_ambi_items.py | 7 +- .../generation/process_foa_items.py | 288 -------------- .../generation/process_hoa2_items.py | 233 ----------- .../generation/process_ism1_items.py | 3 +- .../generation/process_ism2_items.py | 3 +- 8 files changed, 442 insertions(+), 850 deletions(-) delete mode 100644 ivas_processing_scripts/generation/process_foa_items.py delete mode 100644 ivas_processing_scripts/generation/process_hoa2_items.py diff --git a/item_gen_configs/P800-7.yml b/item_gen_configs/P800-7.yml index f93ea386..dc92d089 100644 --- a/item_gen_configs/P800-7.yml +++ b/item_gen_configs/P800-7.yml @@ -24,16 +24,16 @@ output_path: "experiments/selection/P800-7/proc_input" loudness: -26 ### Pre-amble and Post-amble length in seconds (default = 0.0) -preamble: 1.0 +preamble: 0.5 postamble: 1.0 ### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence) add_low_level_random_noise: true -### File designators, default is "l" for listening lab, "EN" for language, "p06" for exp and "g" for provider +### File designators, default is "l" for listening lab, "EN" for language, "p07" for exp and "g" for provider listening_lab: "l" language: "EN" -exp: "p01" +exp: "p07" provider: "g" ### Use prefix for all input filenames (default: "") diff --git a/item_gen_configs/P800-8.yml b/item_gen_configs/P800-8.yml index 457ae07b..6f30a8eb 100644 --- a/item_gen_configs/P800-8.yml +++ b/item_gen_configs/P800-8.yml @@ -3,8 +3,13 @@ # General configuration ################################################ +### Any relative paths will be interpreted relative to the working directory the script is called from! +### Usage of absolute paths is recommended. +### Do not use file names with dots "." in them! This is not supported, use "_" instead +### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions + ### Output format -format: "FOA" +format: ["FOA", "HOA2"] ### Output sampling rate in Hz; default = 48000 fs: 48000 @@ -12,16 +17,11 @@ fs: 48000 ### IR sampling rate in Hz (only for files in .pcm format); default = 48000 # IR_fs: 48000 -### Any relative paths will be interpreted relative to the working directory the script is called from! -### Usage of absolute paths is recommended. -### Do not use file names with dots "." in them! This is not supported, use "_" instead -### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions - ### Input path to mono files input_path: "./items_mono" -### Input path to impulse response files, default = './ivas_processing_scripts/generation/IR' -# IR_path: "./IR" +### Input path to stereo impulse response files, default = './ivas_processing_scripts/generation/IR' +IR_path: "./IRs" ### Output path for generated test items and metadata files output_path: "experiments/selection/P800-8/proc_input" @@ -39,17 +39,37 @@ postamble: 1.0 ### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence) add_low_level_random_noise: true +### File designators, default is "l" for listening lab, "EN" for language, "p04" for exp and "g" for provider +listening_lab: "l" +language: "EN" +exp: "p01" +provider: "g" + +### Use prefix for all input filenames (default: "") +### l stands for the 'listening_lab' designator, L stands for the 'language', e stands for the 'exp' designator (the number of consecutive letters define the length of the field) +use_input_prefix: "lLLeee" + +### Use prefix for all IR filenames (default: "") +### p stands for the 'provider', e stands for the 'exp' designator (the number of consecutive letters define the length of the field) +# use_IR_prefix: "IR_pp_eee_" + +### Use prefix for all output filenames (default: "") +### l stands for the 'listening_lab' designator, e stands for the 'exp' designator (the number of consecutive letters define the length of the field) +use_output_prefix: "leee" ################################################ ### Scene description ################################################ -### Each scene must start with the sceneN tag -### Specify the mono source filename (the program will search for it in the input_path folder) -### Specify the IR source filename (the program will search for it in the input_path_IR folder) -### Specify the overlap length in seconds for each input source (negative value creates a gap) -### Note 1: use [val1, val2, ...] for multiple sources in a scene -### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames +### Each scene must begin by specifying the category in the following format: catN_I where N is the category index and N is the scene index +### Each scene shall de described using the following parameters/properties: +### name: filename of the generated output item (the program will save th generated items in the output_path folder, note: it is possible to use subfolders, e.g. items_stereo/x1_s01.wav) +### description: textual description of the scene +### source: filename(s) of the mono input sources (the program will search for it in the input_path folder) +### IR: filenames(s) of the input IRs (the program will search for it in the IR_path folder) +### overlap: overlap length between two input sources in seconds (negative value creates a gap) +### format: input file format ("FOA" or "HOA2") +### Note 1: use brackets [val1, val2, ...] when specifying multiple values ### Naming convention for the input mono files ### The input filenames are represented by: @@ -69,7 +89,7 @@ add_low_level_random_noise: true ### eee stands for the experiment designator: p01, p02, p04, p05, p06, p07, p08, p09 ### r stands for the room ID: a, b, c, ... ### tt stands for the talker position: 01, 02, ... -### mm stands for the microphone position: 01, 02, ... +### mm stands for the microphone position: 00, 01, 02, ... ### ffffff stands for the format ID: stAB20, stABC20, stAB100, stAB150, stMS, stBin, FOA, HOA2 ### Naming convention for the generated output files @@ -85,256 +105,294 @@ add_low_level_random_noise: true ### y is the per-experiment category according to IVAS-8a: 01, 02, 03, 04, 05, 06 ### s stands for sample and zz is the sample number; 01, 02, 03, 04, 05, 06, 07 (07 is the preliminary sample) ### o stands for the object number; 0, 1, 2, 3 + scenes: - cat1_s1: - name: "lp08a1s01" + cat1_1: + name: "cat1/a1s01.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env1/AmbiX_036x033y_0.01m.wav", "IR_FOA_env1/AmbiX_-70x035y_0.01m.wav"] + source: ["f1s01.wav", "m1s01.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat1_s2: - name: "lp08a1s02" + cat1_2: + name: "cat1/a1s02.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env1/AmbiX_036x033y_0.01m.wav", "IR_FOA_env1/AmbiX_-70x035y_0.01m.wav"] + source: ["m2s01.wav", "f2s01.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat1_s3: - name: "lp08a1s03" + cat1_3: + name: "cat1/a1s03.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env1/AmbiX_036x033y_0.01m.wav", "IR_FOA_env1/AmbiX_-70x035y_0.01m.wav"] + source: ["f3s01.wav", "m3s01.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat1_s4: - name: "lp08a1s04" + cat1_4: + name: "cat1/a1s04.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env1/AmbiX_036x033y_0.01m.wav", "IR_FOA_env1/AmbiX_-70x035y_0.01m.wav"] + source: ["m1s02.wav", "f1s02.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat1_s5: - name: "lp08a1s05" + cat1_5: + name: "cat1/a1s05.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env1/AmbiX_036x033y_0.01m.wav", "IR_FOA_env1/AmbiX_-70x035y_0.01m.wav"] + source: ["f2s02.wav", "m2s02.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat1_s6: - name: "lp08a1s06" + cat1_6: + name: "cat1/a1s06.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env1/AmbiX_036x033y_0.01m.wav", "IR_FOA_env1/AmbiX_-70x035y_0.01m.wav"] + source: ["m3s02.wav", "f3s02.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat2_s1: - name: "lp08a2s01" + cat2_1: + name: "cat2/a2s01.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env2/AmbiX_036x033y_0.01m.wav", "IR_FOA_env2/AmbiX_-70x035y_0.01m.wav"] + source: ["m3s03.wav", "f3s03.wav"] + IR: ["Nokia_Small_Room3_TalkPos1_EigenOwnHoA2_SinSweep_9chn.wav", "Nokia_Small_Room3_TalkPos2_EigenOwnHoA2_SinSweep_9chn.wav"] overlap: 0.5 + format: "HOA2" - cat2_s2: - name: "lp08a2s02" + cat2_2: + name: "cat2/a2s02.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env2/AmbiX_036x033y_0.01m.wav", "IR_FOA_env2/AmbiX_-70x035y_0.01m.wav"] + source: ["f1s03.wav", "m1s03.wav"] + IR: ["Nokia_Small_Room3_TalkPos1_EigenOwnHoA2_SinSweep_9chn.wav", "Nokia_Small_Room3_TalkPos2_EigenOwnHoA2_SinSweep_9chn.wav"] overlap: 0.5 + format: "HOA2" - cat2_s3: - name: "lp08a2s03" + cat2_3: + name: "cat2/a2s03.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env2/AmbiX_036x033y_0.01m.wav", "IR_FOA_env2/AmbiX_-70x035y_0.01m.wav"] + source: ["m2s03.wav", "f2s03.wav"] + IR: ["Nokia_Small_Room3_TalkPos1_EigenOwnHoA2_SinSweep_9chn.wav", "Nokia_Small_Room3_TalkPos2_EigenOwnHoA2_SinSweep_9chn.wav"] overlap: 0.5 + format: "HOA2" - cat2_s4: - name: "lp08a2s04" + cat2_4: + name: "cat2/a2s04.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env2/AmbiX_036x033y_0.01m.wav", "IR_FOA_env2/AmbiX_-70x035y_0.01m.wav"] + source: ["f3s04.wav", "m3s04.wav"] + IR: ["Nokia_Small_Room3_TalkPos1_EigenOwnHoA2_SinSweep_9chn.wav", "Nokia_Small_Room3_TalkPos2_EigenOwnHoA2_SinSweep_9chn.wav"] overlap: 0.5 + format: "HOA2" - cat2_s5: - name: "lp08a2s05" + cat2_5: + name: "cat2/a2s05.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env2/AmbiX_036x033y_0.01m.wav", "IR_FOA_env2/AmbiX_-70x035y_0.01m.wav"] + source: ["m1s04.wav", "f1s04.wav"] + IR: ["Nokia_Small_Room3_TalkPos1_EigenOwnHoA2_SinSweep_9chn.wav", "Nokia_Small_Room3_TalkPos2_EigenOwnHoA2_SinSweep_9chn.wav"] overlap: 0.5 + format: "HOA2" - cat2_s6: - name: "lp08a2s06" + cat2_6: + name: "cat2/a2s06.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env2/AmbiX_036x033y_0.01m.wav", "IR_FOA_env2/AmbiX_-70x035y_0.01m.wav"] + source: ["f2s04.wav", "m2s04.wav"] + IR: ["Nokia_Small_Room3_TalkPos1_EigenOwnHoA2_SinSweep_9chn.wav", "Nokia_Small_Room3_TalkPos2_EigenOwnHoA2_SinSweep_9chn.wav"] overlap: 0.5 + format: "HOA2" - cat3_s1: - name: "lp08a3s01" + cat3_1: + name: "cat3/a3s01.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env3/AmbiX_036x033y_0.01m.wav", "IR_FOA_env3/AmbiX_-70x035y_0.01m.wav"] + source: ["f2s05.wav", "m2s05.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat3_s2: - name: "lp08a3s02" + cat3_2: + name: "cat3/a3s02.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env3/AmbiX_036x033y_0.01m.wav", "IR_FOA_env3/AmbiX_-70x035y_0.01m.wav"] + source: ["m3s05.wav", "f3s05.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat3_s3: - name: "lp08a3s03" + cat3_3: + name: "cat3/a3s03.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env3/AmbiX_036x033y_0.01m.wav", "IR_FOA_env3/AmbiX_-70x035y_0.01m.wav"] + source: ["f1s05.wav", "m1s05.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat3_s4: - name: "lp08a3s04" + cat3_4: + name: "cat3/a3s04.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env3/AmbiX_036x033y_0.01m.wav", "IR_FOA_env3/AmbiX_-70x035y_0.01m.wav"] + source: ["m2s06.wav", "f2s06.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat3_s5: - name: "lp08a3s05" + cat3_5: + name: "cat3/a3s05.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env3/AmbiX_036x033y_0.01m.wav", "IR_FOA_env3/AmbiX_-70x035y_0.01m.wav"] + source: ["f3s06.wav", "m3s06.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat3_s6: - name: "lp08a3s06" + cat3_6: + name: "cat3/a3s06.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env3/AmbiX_036x033y_0.01m.wav", "IR_FOA_env3/AmbiX_-70x035y_0.01m.wav"] + source: ["m1s06.wav", "f1s06.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat4_s1: - name: "lp08a4s01" + cat4_1: + name: "cat4/a4s01.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env4/AmbiX_036x033y_0.01m.wav", "IR_FOA_env4/AmbiX_-70x035y_0.01m.wav"] + source: ["m1s07.wav", "f1s07.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat4_s2: - name: "lp08a4s02" + cat4_2: + name: "cat4/a4s02.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env4/AmbiX_036x033y_0.01m.wav", "IR_FOA_env4/AmbiX_-70x035y_0.01m.wav"] + source: ["f2s07.wav", "m2s07.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat4_s3: - name: "lp08a4s03" + cat4_3: + name: "cat4/a4s03.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env4/AmbiX_036x033y_0.01m.wav", "IR_FOA_env4/AmbiX_-70x035y_0.01m.wav"] + source: ["m3s07.wav", "f3s07.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat4_s4: - name: "lp08a4s04" + cat4_4: + name: "cat4/a4s04.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env4/AmbiX_036x033y_0.01m.wav", "IR_FOA_env4/AmbiX_-70x035y_0.01m.wav"] + source: ["f1s08.wav", "m1s08.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat4_s5: - name: "lp08a4s05" + cat4_5: + name: "cat4/a4s05.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env4/AmbiX_036x033y_0.01m.wav", "IR_FOA_env4/AmbiX_-70x035y_0.01m.wav"] + source: ["m2s08.wav", "f2s08.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat4_s6: - name: "lp08a4s06" + cat4_6: + name: "cat4/a4s06.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env4/AmbiX_036x033y_0.01m.wav", "IR_FOA_env4/AmbiX_-70x035y_0.01m.wav"] + source: ["f3s08.wav", "m3s08.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat5_s1: - name: "lp08a5s01" + cat5_1: + name: "cat5/a5s01.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env5/AmbiX_036x033y_0.01m.wav", "IR_FOA_env5/AmbiX_-70x035y_0.01m.wav"] + source: ["f3s09.wav", "m3s09.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat5_s2: - name: "lp08a5s02" + cat5_2: + name: "cat5/a5s02.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env5/AmbiX_036x033y_0.01m.wav", "IR_FOA_env5/AmbiX_-70x035y_0.01m.wav"] + source: ["m1s09.wav", "f1s09.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat5_s3: - name: "lp08a5s03" + cat5_3: + name: "cat5/a5s03.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env5/AmbiX_036x033y_0.01m.wav", "IR_FOA_env5/AmbiX_-70x035y_0.01m.wav"] + source: ["f2s09.wav", "m2s09.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat5_s4: - name: "lp08a5s04" + cat5_4: + name: "cat5/a5s04.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env5/AmbiX_036x033y_0.01m.wav", "IR_FOA_env5/AmbiX_-70x035y_0.01m.wav"] + source: ["m3s10.wav", "f3s10.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat5_s5: - name: "lp08a5s05" + cat5_5: + name: "cat5/a5s05.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env5/AmbiX_036x033y_0.01m.wav", "IR_FOA_env5/AmbiX_-70x035y_0.01m.wav"] + source: ["f1s10.wav", "m1s10.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat5_s6: - name: "lp08a5s06" + cat5_6: + name: "cat5/a5s06.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env5/AmbiX_036x033y_0.01m.wav", "IR_FOA_env5/AmbiX_-70x035y_0.01m.wav"] + source: ["m2s10.wav", "f2s10.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat6_s1: - name: "lp08a6s01" + cat6_1: + name: "cat6/a6s01.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env6/AmbiX_036x033y_0.01m.wav", "IR_FOA_env6/AmbiX_-70x035y_0.01m.wav"] + source: ["m2s11.wav", "f2s11.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat6_s2: - name: "lp08a6s02" + cat6_2: + name: "cat6/a6s02.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env6/AmbiX_036x033y_0.01m.wav", "IR_FOA_env6/AmbiX_-70x035y_0.01m.wav"] + source: ["f3s11.wav", "m3s11.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat6_s3: - name: "lp08a6s03" + cat6_3: + name: "cat6/a6s03.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env6/AmbiX_036x033y_0.01m.wav", "IR_FOA_env6/AmbiX_-70x035y_0.01m.wav"] + source: ["m1s11.wav", "f1s11.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat6_s4: - name: "lp08a6s04" + cat6_4: + name: "cat6/a6s04.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env6/AmbiX_036x033y_0.01m.wav", "IR_FOA_env6/AmbiX_-70x035y_0.01m.wav"] + source: ["f2s12.wav", "m2s12.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat6_s5: - name: "lp08a6s05" + cat6_5: + name: "cat6/a6s05.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env6/AmbiX_036x033y_0.01m.wav", "IR_FOA_env6/AmbiX_-70x035y_0.01m.wav"] + source: ["m3s12.wav", "f3s12.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat6_s6: - name: "lp08a6s06" + cat6_6: + name: "cat6/a6s06.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env6/AmbiX_036x033y_0.01m.wav", "IR_FOA_env6/AmbiX_-70x035y_0.01m.wav"] + source: ["f1s12.wav", "m1s12.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" + diff --git a/item_gen_configs/P800-9.yml b/item_gen_configs/P800-9.yml index cb22444e..dc812126 100644 --- a/item_gen_configs/P800-9.yml +++ b/item_gen_configs/P800-9.yml @@ -3,8 +3,13 @@ # General configuration ################################################ +### Any relative paths will be interpreted relative to the working directory the script is called from! +### Usage of absolute paths is recommended. +### Do not use file names with dots "." in them! This is not supported, use "_" instead +### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions + ### Output format -format: "FOA" +format: ["FOA", "HOA2"] ### Output sampling rate in Hz; default = 48000 fs: 48000 @@ -12,16 +17,11 @@ fs: 48000 ### IR sampling rate in Hz (only for files in .pcm format); default = 48000 # IR_fs: 48000 -### Any relative paths will be interpreted relative to the working directory the script is called from! -### Usage of absolute paths is recommended. -### Do not use file names with dots "." in them! This is not supported, use "_" instead -### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions - ### Input path to mono files input_path: "./items_mono" -### Input path to impulse response files, default = './ivas_processing_scripts/generation/IR' -# IR_path: "./IR" +### Input path to stereo impulse response files, default = './ivas_processing_scripts/generation/IR' +IR_path: "./IRs" ### Output path for generated test items and metadata files output_path: "experiments/selection/P800-9/proc_input" @@ -39,17 +39,37 @@ postamble: 1.0 ### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence) add_low_level_random_noise: true +### File designators, default is "l" for listening lab, "EN" for language, "p04" for exp and "g" for provider +listening_lab: "l" +language: "EN" +exp: "p09" +provider: "g" + +### Use prefix for all input filenames (default: "") +### l stands for the 'listening_lab' designator, L stands for the 'language', e stands for the 'exp' designator (the number of consecutive letters define the length of the field) +use_input_prefix: "lLLeee" + +### Use prefix for all IR filenames (default: "") +### p stands for the 'provider', e stands for the 'exp' designator (the number of consecutive letters define the length of the field) +# use_IR_prefix: "IR_pp_eee_" + +### Use prefix for all output filenames (default: "") +### l stands for the 'listening_lab' designator, e stands for the 'exp' designator (the number of consecutive letters define the length of the field) +use_output_prefix: "leee" ################################################ ### Scene description ################################################ -### Each scene must start with the sceneN tag -### Specify the mono source filename (the program will search for it in the input_path folder) -### Specify the IR source filename (the program will search for it in the input_path_IR folder) -### Specify the overlap length in seconds for each input source (negative value creates a gap) -### Note 1: use [val1, val2, ...] for multiple sources in a scene -### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames +### Each scene must begin by specifying the category in the following format: catN_I where N is the category index and N is the scene index +### Each scene shall de described using the following parameters/properties: +### name: filename of the generated output item (the program will save th generated items in the output_path folder, note: it is possible to use subfolders, e.g. items_stereo/x1_s01.wav) +### description: textual description of the scene +### source: filename(s) of the mono input sources (the program will search for it in the input_path folder) +### IR: filenames(s) of the input IRs (the program will search for it in the IR_path folder) +### overlap: overlap length between two input sources in seconds (negative value creates a gap) +### format: input file format ("FOA" or "HOA2") +### Note 1: use brackets [val1, val2, ...] when specifying multiple values ### Naming convention for the input mono files ### The input filenames are represented by: @@ -69,7 +89,7 @@ add_low_level_random_noise: true ### eee stands for the experiment designator: p01, p02, p04, p05, p06, p07, p08, p09 ### r stands for the room ID: a, b, c, ... ### tt stands for the talker position: 01, 02, ... -### mm stands for the microphone position: 01, 02, ... +### mm stands for the microphone position: 00, 01, 02, ... ### ffffff stands for the format ID: stAB20, stABC20, stAB100, stAB150, stMS, stBin, FOA, HOA2 ### Naming convention for the generated output files @@ -85,256 +105,294 @@ add_low_level_random_noise: true ### y is the per-experiment category according to IVAS-8a: 01, 02, 03, 04, 05, 06 ### s stands for sample and zz is the sample number; 01, 02, 03, 04, 05, 06, 07 (07 is the preliminary sample) ### o stands for the object number; 0, 1, 2, 3 + scenes: - cat1_s1: - name: "lp08a1s01" + cat1_1: + name: "cat1/a1s01.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env1/AmbiX_036x033y_0.01m.wav", "IR_FOA_env1/AmbiX_-70x035y_0.01m.wav"] + source: ["f1s01.wav", "m1s01.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat1_s2: - name: "lp08a1s02" + cat1_2: + name: "cat1/a1s02.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env1/AmbiX_036x033y_0.01m.wav", "IR_FOA_env1/AmbiX_-70x035y_0.01m.wav"] + source: ["m2s01.wav", "f2s01.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat1_s3: - name: "lp08a1s03" + cat1_3: + name: "cat1/a1s03.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env1/AmbiX_036x033y_0.01m.wav", "IR_FOA_env1/AmbiX_-70x035y_0.01m.wav"] + source: ["f3s01.wav", "m3s01.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat1_s4: - name: "lp08a1s04" + cat1_4: + name: "cat1/a1s04.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env1/AmbiX_036x033y_0.01m.wav", "IR_FOA_env1/AmbiX_-70x035y_0.01m.wav"] + source: ["m1s02.wav", "f1s02.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat1_s5: - name: "lp08a1s05" + cat1_5: + name: "cat1/a1s05.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env1/AmbiX_036x033y_0.01m.wav", "IR_FOA_env1/AmbiX_-70x035y_0.01m.wav"] + source: ["f2s02.wav", "m2s02.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat1_s6: - name: "lp08a1s06" + cat1_6: + name: "cat1/a1s06.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env1/AmbiX_036x033y_0.01m.wav", "IR_FOA_env1/AmbiX_-70x035y_0.01m.wav"] + source: ["m3s02.wav", "f3s02.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat2_s1: - name: "lp08a2s01" + cat2_1: + name: "cat2/a2s01.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env2/AmbiX_036x033y_0.01m.wav", "IR_FOA_env2/AmbiX_-70x035y_0.01m.wav"] + source: ["m3s03.wav", "f3s03.wav"] + IR: ["Nokia_Small_Room3_TalkPos1_EigenOwnHoA2_SinSweep_9chn.wav", "Nokia_Small_Room3_TalkPos2_EigenOwnHoA2_SinSweep_9chn.wav"] overlap: 0.5 + format: "HOA2" - cat2_s2: - name: "lp08a2s02" + cat2_2: + name: "cat2/a2s02.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env2/AmbiX_036x033y_0.01m.wav", "IR_FOA_env2/AmbiX_-70x035y_0.01m.wav"] + source: ["f1s03.wav", "m1s03.wav"] + IR: ["Nokia_Small_Room3_TalkPos1_EigenOwnHoA2_SinSweep_9chn.wav", "Nokia_Small_Room3_TalkPos2_EigenOwnHoA2_SinSweep_9chn.wav"] overlap: 0.5 + format: "HOA2" - cat2_s3: - name: "lp08a2s03" + cat2_3: + name: "cat2/a2s03.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env2/AmbiX_036x033y_0.01m.wav", "IR_FOA_env2/AmbiX_-70x035y_0.01m.wav"] + source: ["m2s03.wav", "f2s03.wav"] + IR: ["Nokia_Small_Room3_TalkPos1_EigenOwnHoA2_SinSweep_9chn.wav", "Nokia_Small_Room3_TalkPos2_EigenOwnHoA2_SinSweep_9chn.wav"] overlap: 0.5 + format: "HOA2" - cat2_s4: - name: "lp08a2s04" + cat2_4: + name: "cat2/a2s04.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env2/AmbiX_036x033y_0.01m.wav", "IR_FOA_env2/AmbiX_-70x035y_0.01m.wav"] + source: ["f3s04.wav", "m3s04.wav"] + IR: ["Nokia_Small_Room3_TalkPos1_EigenOwnHoA2_SinSweep_9chn.wav", "Nokia_Small_Room3_TalkPos2_EigenOwnHoA2_SinSweep_9chn.wav"] overlap: 0.5 + format: "HOA2" - cat2_s5: - name: "lp08a2s05" + cat2_5: + name: "cat2/a2s05.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env2/AmbiX_036x033y_0.01m.wav", "IR_FOA_env2/AmbiX_-70x035y_0.01m.wav"] + source: ["m1s04.wav", "f1s04.wav"] + IR: ["Nokia_Small_Room3_TalkPos1_EigenOwnHoA2_SinSweep_9chn.wav", "Nokia_Small_Room3_TalkPos2_EigenOwnHoA2_SinSweep_9chn.wav"] overlap: 0.5 + format: "HOA2" - cat2_s6: - name: "lp08a2s06" + cat2_6: + name: "cat2/a2s06.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env2/AmbiX_036x033y_0.01m.wav", "IR_FOA_env2/AmbiX_-70x035y_0.01m.wav"] + source: ["f2s04.wav", "m2s04.wav"] + IR: ["Nokia_Small_Room3_TalkPos1_EigenOwnHoA2_SinSweep_9chn.wav", "Nokia_Small_Room3_TalkPos2_EigenOwnHoA2_SinSweep_9chn.wav"] overlap: 0.5 + format: "HOA2" - cat3_s1: - name: "lp08a3s01" + cat3_1: + name: "cat3/a3s01.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env3/AmbiX_036x033y_0.01m.wav", "IR_FOA_env3/AmbiX_-70x035y_0.01m.wav"] + source: ["f2s05.wav", "m2s05.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat3_s2: - name: "lp08a3s02" + cat3_2: + name: "cat3/a3s02.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env3/AmbiX_036x033y_0.01m.wav", "IR_FOA_env3/AmbiX_-70x035y_0.01m.wav"] + source: ["m3s05.wav", "f3s05.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat3_s3: - name: "lp08a3s03" + cat3_3: + name: "cat3/a3s03.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env3/AmbiX_036x033y_0.01m.wav", "IR_FOA_env3/AmbiX_-70x035y_0.01m.wav"] + source: ["f1s05.wav", "m1s05.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat3_s4: - name: "lp08a3s04" + cat3_4: + name: "cat3/a3s04.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env3/AmbiX_036x033y_0.01m.wav", "IR_FOA_env3/AmbiX_-70x035y_0.01m.wav"] + source: ["m2s06.wav", "f2s06.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat3_s5: - name: "lp08a3s05" + cat3_5: + name: "cat3/a3s05.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env3/AmbiX_036x033y_0.01m.wav", "IR_FOA_env3/AmbiX_-70x035y_0.01m.wav"] + source: ["f3s06.wav", "m3s06.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat3_s6: - name: "lp08a3s06" + cat3_6: + name: "cat3/a3s06.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env3/AmbiX_036x033y_0.01m.wav", "IR_FOA_env3/AmbiX_-70x035y_0.01m.wav"] + source: ["m1s06.wav", "f1s06.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat4_s1: - name: "lp08a4s01" + cat4_1: + name: "cat4/a4s01.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env4/AmbiX_036x033y_0.01m.wav", "IR_FOA_env4/AmbiX_-70x035y_0.01m.wav"] + source: ["m1s07.wav", "f1s07.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat4_s2: - name: "lp08a4s02" + cat4_2: + name: "cat4/a4s02.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env4/AmbiX_036x033y_0.01m.wav", "IR_FOA_env4/AmbiX_-70x035y_0.01m.wav"] + source: ["f2s07.wav", "m2s07.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat4_s3: - name: "lp08a4s03" + cat4_3: + name: "cat4/a4s03.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env4/AmbiX_036x033y_0.01m.wav", "IR_FOA_env4/AmbiX_-70x035y_0.01m.wav"] + source: ["m3s07.wav", "f3s07.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat4_s4: - name: "lp08a4s04" + cat4_4: + name: "cat4/a4s04.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env4/AmbiX_036x033y_0.01m.wav", "IR_FOA_env4/AmbiX_-70x035y_0.01m.wav"] + source: ["f1s08.wav", "m1s08.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat4_s5: - name: "lp08a4s05" + cat4_5: + name: "cat4/a4s05.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env4/AmbiX_036x033y_0.01m.wav", "IR_FOA_env4/AmbiX_-70x035y_0.01m.wav"] + source: ["m2s08.wav", "f2s08.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat4_s6: - name: "lp08a4s06" + cat4_6: + name: "cat4/a4s06.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env4/AmbiX_036x033y_0.01m.wav", "IR_FOA_env4/AmbiX_-70x035y_0.01m.wav"] + source: ["f3s08.wav", "m3s08.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat5_s1: - name: "lp08a5s01" + cat5_1: + name: "cat5/a5s01.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env5/AmbiX_036x033y_0.01m.wav", "IR_FOA_env5/AmbiX_-70x035y_0.01m.wav"] + source: ["f3s09.wav", "m3s09.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat5_s2: - name: "lp08a5s02" + cat5_2: + name: "cat5/a5s02.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env5/AmbiX_036x033y_0.01m.wav", "IR_FOA_env5/AmbiX_-70x035y_0.01m.wav"] + source: ["m1s09.wav", "f1s09.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat5_s3: - name: "lp08a5s03" + cat5_3: + name: "cat5/a5s03.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env5/AmbiX_036x033y_0.01m.wav", "IR_FOA_env5/AmbiX_-70x035y_0.01m.wav"] + source: ["f2s09.wav", "m2s09.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat5_s4: - name: "lp08a5s04" + cat5_4: + name: "cat5/a5s04.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env5/AmbiX_036x033y_0.01m.wav", "IR_FOA_env5/AmbiX_-70x035y_0.01m.wav"] + source: ["m3s10.wav", "f3s10.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat5_s5: - name: "lp08a5s05" + cat5_5: + name: "cat5/a5s05.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env5/AmbiX_036x033y_0.01m.wav", "IR_FOA_env5/AmbiX_-70x035y_0.01m.wav"] + source: ["f1s10.wav", "m1s10.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat5_s6: - name: "lp08a5s06" + cat5_6: + name: "cat5/a5s06.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env5/AmbiX_036x033y_0.01m.wav", "IR_FOA_env5/AmbiX_-70x035y_0.01m.wav"] + source: ["m2s10.wav", "f2s10.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat6_s1: - name: "lp08a6s01" + cat6_1: + name: "cat6/a6s01.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env6/AmbiX_036x033y_0.01m.wav", "IR_FOA_env6/AmbiX_-70x035y_0.01m.wav"] + source: ["m2s11.wav", "f2s11.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat6_s2: - name: "lp08a6s02" + cat6_2: + name: "cat6/a6s02.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env6/AmbiX_036x033y_0.01m.wav", "IR_FOA_env6/AmbiX_-70x035y_0.01m.wav"] + source: ["f3s11.wav", "m3s11.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat6_s3: - name: "lp08a6s03" + cat6_3: + name: "cat6/a6s03.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env6/AmbiX_036x033y_0.01m.wav", "IR_FOA_env6/AmbiX_-70x035y_0.01m.wav"] + source: ["m1s11.wav", "f1s11.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat6_s4: - name: "lp08a6s04" + cat6_4: + name: "cat6/a6s04.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env6/AmbiX_036x033y_0.01m.wav", "IR_FOA_env6/AmbiX_-70x035y_0.01m.wav"] + source: ["f2s12.wav", "m2s12.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat6_s5: - name: "lp08a6s05" + cat6_5: + name: "cat6/a6s05.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env6/AmbiX_036x033y_0.01m.wav", "IR_FOA_env6/AmbiX_-70x035y_0.01m.wav"] + source: ["m3s12.wav", "f3s12.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" - cat6_s6: - name: "lp08a6s06" + cat6_6: + name: "cat6/a6s06.wav" description: "" - source: ["aENp01Fa1.wav", "aENp01Ma1.wav"] - IR: ["IR_FOA_env6/AmbiX_036x033y_0.01m.wav", "IR_FOA_env6/AmbiX_-70x035y_0.01m.wav"] + source: ["f1s12.wav", "m1s12.wav"] + IR: ["IR_do_p04_e_01_01_FOA.wav", "IR_do_p04_e_02_01_FOA.wav"] overlap: 0.5 + format: "FOA" + diff --git a/ivas_processing_scripts/generation/process_ambi_items.py b/ivas_processing_scripts/generation/process_ambi_items.py index 13f5fcdd..f0c0b420 100644 --- a/ivas_processing_scripts/generation/process_ambi_items.py +++ b/ivas_processing_scripts/generation/process_ambi_items.py @@ -38,7 +38,7 @@ import numpy as np from ivas_processing_scripts.audiotools import audio, audiofile, convert from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness -from ivas_processing_scripts.audiotools.wrappers.reverb import reverb_foa +from ivas_processing_scripts.audiotools.wrappers.reverb import reverb_foa, reverb_hoa2 from ivas_processing_scripts.generation import config from ivas_processing_scripts.utils import apply_func_parallel @@ -120,7 +120,7 @@ def generate_ambi_items( # set the experiment designator if "exp" not in cfg.__dict__: - cfg.exp = "p01" + cfg.exp = "p04" # set the provider if "provider" not in cfg.__dict__: @@ -160,8 +160,7 @@ def generate_ambi_items( generate_ambi_scene, zip(cfg.scenes.keys(), cfg.scenes.values(), repeat(cfg), repeat(logger)), None, - # "mp" if cfg.multiprocessing else None, - None, + "mp" if cfg.multiprocessing else None, ) return diff --git a/ivas_processing_scripts/generation/process_foa_items.py b/ivas_processing_scripts/generation/process_foa_items.py deleted file mode 100644 index 56c7f332..00000000 --- a/ivas_processing_scripts/generation/process_foa_items.py +++ /dev/null @@ -1,288 +0,0 @@ -#!/usr/bin/env python3 - -# -# (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, -# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., -# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, -# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other -# contributors to this repository. All Rights Reserved. -# -# This software is protected by copyright law and by international treaties. -# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, -# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., -# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, -# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other -# contributors to this repository retain full ownership rights in their respective contributions in -# the software. This notice grants no license of any kind, including but not limited to patent -# license, nor is any license granted by implication, estoppel or otherwise. -# -# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making -# contributions. -# -# This software is provided "AS IS", without any express or implied warranties. The software is in the -# development stage. It is intended exclusively for experts who have experience with such software and -# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability -# and fitness for a particular purpose are hereby disclaimed and excluded. -# -# Any dispute, controversy or claim arising under or in relation to providing this software shall be -# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in -# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and -# the United Nations Convention on Contracts on the International Sales of Goods. -# - -import logging -import os -from math import floor - -import numpy as np - -from ivas_processing_scripts.audiotools import audio, audiofile, convert -from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness -from ivas_processing_scripts.audiotools.wrappers.reverb import reverb_foa -from ivas_processing_scripts.generation import config - -SEED_RANDOM_NOISE = 0 - - -# function for converting nd numpy array to strings with 2 decimal digits -def csv_formatdata(data): - for row in data: - yield ["%0.2f" % v for v in row] - - -def generate_foa_items( - cfg: config.TestConfig, - logger: logging.Logger, -): - """Generate FOA items from mono items based on scene description""" - - # get the number of scenes - N_scenes = len(cfg.scenes) - - # set the target level - if "loudness" not in cfg.__dict__: - cfg.loudness = -26 - - # set the fs - if "fs" not in cfg.__dict__: - cfg.fs = 48000 - - # set the IR fs - if "IR_fs" not in cfg.__dict__: - cfg.IR_fs = 48000 - - # set the pre-amble and post-amble - if "preamble" not in cfg.__dict__: - cfg.preamble = 0.0 - - if "postamble" not in cfg.__dict__: - cfg.postamble = 0.0 - - # set the IR path - if "IR_path" not in cfg.__dict__: - cfg.IR_path = os.path.join(os.path.dirname(__file__), "IR") - - # set the pre-amble and post-amble - if "add_low_level_random_noise" not in cfg.__dict__: - cfg.add_low_level_random_noise = False - - # setup binaural rendering - if "binaural_path" not in cfg.__dict__: - cfg.binaural_path = "" - - # set the listening lab designator - if "listening_lab" not in cfg.__dict__: - cfg.listening_lab = "l" - - # set the language designator - if "language" not in cfg.__dict__: - cfg.language = "EN" - - # set the experiment designator - if "exp" not in cfg.__dict__: - cfg.exp = "p01" - - # set the provider - if "provider" not in cfg.__dict__: - cfg.provider = "g" - - # set the prefix for all input filenames - if "use_input_prefix" not in cfg.__dict__: - cfg.use_input_prefix = "" - else: - # replace file designators - cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "l", cfg.listening_lab ) - cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "LL", cfg.language ) - cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "eee", cfg.exp ) - - # set the prefix for all IR filenames - if "use_IR_prefix" not in cfg.__dict__: - cfg.use_IR_prefix = "" - else: - # replace file designators - cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "p", cfg.provider ) - cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "LL", cfg.language ) - cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "eee", cfg.exp ) - - # set the prefix for all output filenames - if "use_output_prefix" not in cfg.__dict__: - cfg.use_output_prefix = None - else: - # replace file designators - cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "l", cfg.listening_lab ) - cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "eee", cfg.exp ) - - # set multiprocessing - if "multiprocessing" not in cfg.__dict__: - cfg.multiprocessing = True - - apply_func_parallel( - generate_foa_scene, - zip(cfg.scenes.keys(), cfg.scenes.values(), repeat(cfg), repeat(logger)), - None, - "mp" if cfg.multiprocessing else None, - ) - - return - - -def generate_foa_scene( - scene_name: str, scene: dict, cfg: config.TestConfig, logger: logging.Logger -): - logger.info( - f"Processing scene: {scene_name} out of {len(cfg.scenes)} scenes, name: {scene['name']}" - ) - - # extract the number of audio sources - N_sources = len(np.atleast_1d(scene["source"])) - - # read the overlap length - if "overlap" in scene.keys(): - source_overlap = float(scene["overlap"]) - else: - source_overlap = 0.0 - - y = audio.SceneBasedAudio("FOA") - for i in range(N_sources): - - # parse parameters from the scene description - source_file = np.atleast_1d(scene["source"])[i] - IR_file = np.atleast_1d(scene["IR"])[i] - - logger.info(f"Convolving {source_file} with {IR_file}") - - # read source file - x = audio.fromfile("MONO", os.path.join(cfg.input_path, os.path.dirname(source_file), cfg.use_input_prefix + os.path.basename(source_file)), fs=cfg.fs) - - # read the IR file - IR = audio.fromfile("FOA", os.path.join(cfg.IR_path, os.path.dirname(IR_file), cfg.use_IR_prefix + os.path.basename(IR_file)), fs=cfg.IR_fs) - - # convolve with FOA IR - x = reverb_foa(x, IR) - - # adjust the level of the foa signal - _, scale_factor, _ = get_loudness(x, cfg.loudness, "BINAURAL") - x.audio *= scale_factor - - # shift the second (and all other) source files (positive shift creates overlap, negative shift creates a gap) - if i > 0 and source_overlap != 0.0: - # get the length of the first source file - N_delay = len(y.audio[:, 0]) - - # add the shift - N_delay += int(-source_overlap * x.fs) - - # insert all-zero preamble - pre = np.zeros((N_delay, x.audio.shape[1])) - x.audio = np.concatenate([pre, x.audio]) - - # pad with zeros to ensure that the signal length is a multiple of 20ms - N_frame = x.fs / 50 - if len(x.audio) % N_frame != 0: - N_pad = int(N_frame - len(x.audio) % N_frame) - - # insert all-zero preamble - pre = np.zeros((N_pad, x.audio.shape[1])) - x.audio = np.concatenate([pre, x.audio]) - - # add source signal to the array of source signals - y.fs = x.fs - if y.audio is None: - y.audio = x.audio - else: - # pad with zeros to have equal length of all source signals - if x.audio.shape[0] > y.audio.shape[0]: - y.audio = np.vstack( - ( - y.audio, - np.zeros( - ( - x.audio.shape[0] - y.audio.shape[0], - y.audio.shape[1], - ) - ), - ) - ) - elif y.audio.shape[0] > x.audio.shape[0]: - x.audio = np.vstack( - ( - x.audio, - np.zeros( - ( - y.audio.shape[0] - x.audio.shape[0], - x.audio.shape[1], - ) - ), - ) - ) - - # superimpose - y.audio += x.audio - - # append pre-amble and post-amble to all sources - if cfg.preamble != 0.0: - # ensure that pre-amble is a multiple of 20ms - N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs) - - # insert all-zero preamble to all sources - pre = np.zeros((N_pre, y.audio.shape[1])) - y.audio = np.concatenate([pre, y.audio]) - - if cfg.postamble != 0.0: - # ensure that post-mable is a multiple of 20ms - N_post = int(floor(cfg.postamble * 50) / 50 * y.fs) - - # append all-zero postamble to all sources - post = np.zeros((N_post, y.audio.shape[1])) - y.audio = np.concatenate([y.audio, post]) - - # add random noise - if cfg.add_low_level_random_noise: - # create uniformly distributed noise between -4 and 4 - np.random.seed(SEED_RANDOM_NOISE) - noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype( - "float" - ) - - # superimpose - y.audio += noise - - # write the reverberated audio into output file - audiofile.write( - os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.basename(scene["name"])), - y.audio, - y.fs, - ) - - # convert to binaural if option chosen - if cfg.binaural_path != "": - binaudio = audio.fromtype("BINAURAL") - binaudio.fs = y.fs - convert.format_conversion(y, binaudio) - audiofile.write( - os.path.join(cfg.binaural_path, scene["name"]), - binaudio.audio, - binaudio.fs, - ) - - return diff --git a/ivas_processing_scripts/generation/process_hoa2_items.py b/ivas_processing_scripts/generation/process_hoa2_items.py deleted file mode 100644 index b8a88adb..00000000 --- a/ivas_processing_scripts/generation/process_hoa2_items.py +++ /dev/null @@ -1,233 +0,0 @@ -#!/usr/bin/env python3 - -# -# (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, -# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., -# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, -# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other -# contributors to this repository. All Rights Reserved. -# -# This software is protected by copyright law and by international treaties. -# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, -# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., -# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, -# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other -# contributors to this repository retain full ownership rights in their respective contributions in -# the software. This notice grants no license of any kind, including but not limited to patent -# license, nor is any license granted by implication, estoppel or otherwise. -# -# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making -# contributions. -# -# This software is provided "AS IS", without any express or implied warranties. The software is in the -# development stage. It is intended exclusively for experts who have experience with such software and -# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability -# and fitness for a particular purpose are hereby disclaimed and excluded. -# -# Any dispute, controversy or claim arising under or in relation to providing this software shall be -# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in -# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and -# the United Nations Convention on Contracts on the International Sales of Goods. -# - -import logging -import os -from math import floor - -import numpy as np - -from ivas_processing_scripts.audiotools import audio, audiofile, convert -from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness -from ivas_processing_scripts.audiotools.wrappers.reverb import reverb_hoa2 -from ivas_processing_scripts.generation import config - -SEED_RANDOM_NOISE = 0 - - -# function for converting nd numpy array to strings with 2 decimal digits -def csv_formatdata(data): - for row in data: - yield ["%0.2f" % v for v in row] - - -def generate_hoa2_items( - cfg: config.TestConfig, - logger: logging.Logger, -): - """Generate HOA2 items from mono items based on scene description""" - - # get the number of scenes - N_scenes = len(cfg.scenes) - - # set the target level - if "loudness" not in cfg.__dict__: - cfg.loudness = -26 - - # set the fs - if "fs" not in cfg.__dict__: - cfg.fs = 48000 - - # set the IR fs - if "IR_fs" not in cfg.__dict__: - cfg.IR_fs = 48000 - - # set the pre-amble and post-amble - if "preamble" not in cfg.__dict__: - cfg.preamble = 0.0 - - if "postamble" not in cfg.__dict__: - cfg.postamble = 0.0 - - # set the IR path - if "IR_path" not in cfg.__dict__: - cfg.IR_path = os.path.join(os.path.dirname(__file__), "IR") - - # set the pre-amble and post-amble - if "add_low_level_random_noise" not in cfg.__dict__: - cfg.add_low_level_random_noise = False - - # setup binaural rendering - if "binaural_path" not in cfg.__dict__: - cfg.binaural_path = "" - - # repeat for all source files - for scene_name, scene in cfg.scenes.items(): - logger.info( - f"Processing scene: {scene_name} out of {N_scenes} scenes, name: {scene['name']}" - ) - - # extract the number of audio sources - N_sources = len(np.atleast_1d(scene["source"])) - - # read the overlap length - if "overlap" in scene.keys(): - source_overlap = float(scene["overlap"]) - else: - source_overlap = 0.0 - - y = audio.SceneBasedAudio("HOA2") - for i in range(N_sources): - # parse parameters from the scene description - source_file = np.atleast_1d(scene["source"])[i] - IR_file = np.atleast_1d(scene["IR"])[i] - - logger.info(f"Convolving {source_file} with {IR_file}") - - # read source file - x = audio.fromfile( - "MONO", os.path.join(cfg.input_path, source_file), fs=cfg.fs - ) - - # read the IR file - IR = audio.fromfile( - "HOA2", os.path.join(cfg.IR_path, IR_file), fs=cfg.IR_fs - ) - - # convolve with HOA2 IR - x = reverb_hoa2(x, IR) - - # adjust the level of the HOA2 signal - _, scale_factor, _ = get_loudness(x, cfg.loudness, "BINAURAL") - x.audio *= scale_factor - - # shift the second (and all other) source files (positive shift creates overlap, negative shift creates a gap) - if i > 0 and source_overlap != 0.0: - # get the length of the first source file - N_delay = len(y.audio[:, 0]) - - # add the shift - N_delay += int(-source_overlap * x.fs) - - # insert all-zero preamble - pre = np.zeros((N_delay, x.audio.shape[1])) - x.audio = np.concatenate([pre, x.audio]) - - # pad with zeros to ensure that the signal length is a multiple of 20ms - N_frame = x.fs / 50 - if len(x.audio) % N_frame != 0: - N_pad = int(N_frame - len(x.audio) % N_frame) - - # insert all-zero preamble - pre = np.zeros((N_pad, x.audio.shape[1])) - x.audio = np.concatenate([pre, x.audio]) - - # add source signal to the array of source signals - y.fs = x.fs - if y.audio is None: - y.audio = x.audio - else: - # pad with zeros to have equal length of all source signals - if x.audio.shape[0] > y.audio.shape[0]: - y.audio = np.vstack( - ( - y.audio, - np.zeros( - ( - x.audio.shape[0] - y.audio.shape[0], - y.audio.shape[1], - ) - ), - ) - ) - elif y.audio.shape[0] > x.audio.shape[0]: - x.audio = np.vstack( - ( - x.audio, - np.zeros( - ( - y.audio.shape[0] - x.audio.shape[0], - x.audio.shape[1], - ) - ), - ) - ) - - # superimpose - y.audio += x.audio - - # append pre-amble and post-amble to all sources - if cfg.preamble != 0.0: - # ensure that pre-amble is a multiple of 20ms - N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs) - - # insert all-zero preamble to all sources - pre = np.zeros((N_pre, y.audio.shape[1])) - y.audio = np.concatenate([pre, y.audio]) - - if cfg.postamble != 0.0: - # ensure that post-mable is a multiple of 20ms - N_post = int(floor(cfg.postamble * 50) / 50 * y.fs) - - # append all-zero postamble to all sources - post = np.zeros((N_post, y.audio.shape[1])) - y.audio = np.concatenate([y.audio, post]) - - # add random noise - if cfg.add_low_level_random_noise: - # create uniformly distributed noise between -4 and 4 - np.random.seed(SEED_RANDOM_NOISE) - noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype( - "float" - ) - - # superimpose - y.audio += noise - - # write the reverberated audio into output file - output_filename = scene["name"] - audiofile.write( - os.path.join(cfg.output_path, output_filename), y.audio, y.fs - ) # !!!! TBD: replace all os.path.xxx operations with the Path object - - # convert to binaural if option chosen - if cfg.binaural_path != "": - binaudio = audio.fromtype("BINAURAL") - binaudio.fs = y.fs - convert.format_conversion(y, binaudio) - audiofile.write( - os.path.join(cfg.binaural_path, output_filename), - binaudio.audio, - binaudio.fs, - ) # !!!! TBD: replace all os.path.xxx operations with the Path object - - return diff --git a/ivas_processing_scripts/generation/process_ism1_items.py b/ivas_processing_scripts/generation/process_ism1_items.py index f8c056c8..88184a39 100644 --- a/ivas_processing_scripts/generation/process_ism1_items.py +++ b/ivas_processing_scripts/generation/process_ism1_items.py @@ -137,8 +137,7 @@ def generate_ism1_items( generate_ism1_scene, zip(cfg.scenes.keys(), cfg.scenes.values(), repeat(cfg), repeat(logger)), None, - # "mp" if cfg.multiprocessing else None, - None, + "mp" if cfg.multiprocessing else None, ) return diff --git a/ivas_processing_scripts/generation/process_ism2_items.py b/ivas_processing_scripts/generation/process_ism2_items.py index c514f8b6..fcf8d4b8 100644 --- a/ivas_processing_scripts/generation/process_ism2_items.py +++ b/ivas_processing_scripts/generation/process_ism2_items.py @@ -137,8 +137,7 @@ def generate_ism2_items( generate_ism2_scene, zip(cfg.scenes.keys(), cfg.scenes.values(), repeat(cfg), repeat(logger)), None, - # "mp" if cfg.multiprocessing else None, - None, + "mp" if cfg.multiprocessing else None, ) return -- GitLab From f9c4d821246e7ba69cfe4a2b878a1273ce3dc679 Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Thu, 8 Jun 2023 10:24:48 +0200 Subject: [PATCH 08/11] fixing default experiment number --- item_gen_configs/P800-8.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/item_gen_configs/P800-8.yml b/item_gen_configs/P800-8.yml index 6f30a8eb..8916d0c2 100644 --- a/item_gen_configs/P800-8.yml +++ b/item_gen_configs/P800-8.yml @@ -42,7 +42,7 @@ add_low_level_random_noise: true ### File designators, default is "l" for listening lab, "EN" for language, "p04" for exp and "g" for provider listening_lab: "l" language: "EN" -exp: "p01" +exp: "p08" provider: "g" ### Use prefix for all input filenames (default: "") -- GitLab From d4665a98e708ed1628d64f60c2f19ca50651e7a0 Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Thu, 8 Jun 2023 10:56:32 +0200 Subject: [PATCH 09/11] formatting --- .../generation/__init__.py | 8 +- .../generation/process_ambi_items.py | 81 +++++++++++----- .../generation/process_ism1_items.py | 82 ++++++++++------ .../generation/process_ism2_items.py | 94 +++++++++++-------- .../generation/process_stereo_items.py | 74 +++++++++++---- .../processing/processing.py | 8 +- 6 files changed, 231 insertions(+), 116 deletions(-) diff --git a/ivas_processing_scripts/generation/__init__.py b/ivas_processing_scripts/generation/__init__.py index 192f1387..6269871a 100755 --- a/ivas_processing_scripts/generation/__init__.py +++ b/ivas_processing_scripts/generation/__init__.py @@ -42,10 +42,10 @@ from ivas_processing_scripts.constants import ( ) from ivas_processing_scripts.generation import ( config, + process_ambi_items, process_ism1_items, process_ism2_items, process_stereo_items, - process_ambi_items, ) from ivas_processing_scripts.utils import create_dir @@ -86,7 +86,7 @@ def main(args): # set up logging logger = logging_init(args, cfg) - + # generate ISM and STEREO items if cfg.format == "ISM1": # generate ISM1 items with metadata according to scene description @@ -97,11 +97,11 @@ def main(args): elif cfg.format == "STEREO": # generate STEREO items according to scene description process_stereo_items.generate_stereo_items(cfg, logger) - + # make format a list if not isinstance(cfg.format, list): cfg.format = [cfg.format] - + if "FOA" in cfg.format or "HOA2" in cfg.format: # generate FOA/HOA2 items according to scene description process_ambi_items.generate_ambi_items(cfg, logger) diff --git a/ivas_processing_scripts/generation/process_ambi_items.py b/ivas_processing_scripts/generation/process_ambi_items.py index f0c0b420..11dea1d2 100644 --- a/ivas_processing_scripts/generation/process_ambi_items.py +++ b/ivas_processing_scripts/generation/process_ambi_items.py @@ -32,8 +32,9 @@ import logging import os +from itertools import groupby, repeat from math import floor -from itertools import repeat, groupby + import numpy as np from ivas_processing_scripts.audiotools import audio, audiofile, convert @@ -50,6 +51,7 @@ def csv_formatdata(data): for row in data: yield ["%0.2f" % v for v in row] + # function for searching sequences of same the same character and replacing it by another string def replace_char_seq_with_string(str, char_seq, repl_str): result = [] @@ -58,7 +60,7 @@ def replace_char_seq_with_string(str, char_seq, repl_str): groups = ["".join(list(g)) for k, g in groupby(str)] # limit the length of the replacement string by the length of the character sequence - repl_str = repl_str[:len(char_seq)] + repl_str = repl_str[: len(char_seq)] # replace each occurence of the sequence of characters for g in groups: @@ -66,7 +68,7 @@ def replace_char_seq_with_string(str, char_seq, repl_str): result.append(repl_str) else: result.append(g) - + return "".join(result) @@ -109,7 +111,7 @@ def generate_ambi_items( # setup binaural rendering if "binaural_path" not in cfg.__dict__: cfg.binaural_path = "" - + # set the listening lab designator if "listening_lab" not in cfg.__dict__: cfg.listening_lab = "l" @@ -131,26 +133,42 @@ def generate_ambi_items( cfg.use_input_prefix = "" else: # replace file designators - cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "l", cfg.listening_lab ) - cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "LL", cfg.language ) - cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "eee", cfg.exp ) + cfg.use_input_prefix = replace_char_seq_with_string( + cfg.use_input_prefix, "l", cfg.listening_lab + ) + cfg.use_input_prefix = replace_char_seq_with_string( + cfg.use_input_prefix, "LL", cfg.language + ) + cfg.use_input_prefix = replace_char_seq_with_string( + cfg.use_input_prefix, "eee", cfg.exp + ) # set the prefix for all IR filenames if "use_IR_prefix" not in cfg.__dict__: cfg.use_IR_prefix = "" else: # replace file designators - cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "p", cfg.provider ) - cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "LL", cfg.language ) - cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "eee", cfg.exp ) + cfg.use_IR_prefix = replace_char_seq_with_string( + cfg.use_IR_prefix, "p", cfg.provider + ) + cfg.use_IR_prefix = replace_char_seq_with_string( + cfg.use_IR_prefix, "LL", cfg.language + ) + cfg.use_IR_prefix = replace_char_seq_with_string( + cfg.use_IR_prefix, "eee", cfg.exp + ) # set the prefix for all output filenames if "use_output_prefix" not in cfg.__dict__: cfg.use_output_prefix = None else: # replace file designators - cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "l", cfg.listening_lab ) - cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "eee", cfg.exp ) + cfg.use_output_prefix = replace_char_seq_with_string( + cfg.use_output_prefix, "l", cfg.listening_lab + ) + cfg.use_output_prefix = replace_char_seq_with_string( + cfg.use_output_prefix, "eee", cfg.exp + ) # set multiprocessing if "multiprocessing" not in cfg.__dict__: @@ -162,7 +180,7 @@ def generate_ambi_items( None, "mp" if cfg.multiprocessing else None, ) - + return @@ -181,7 +199,7 @@ def generate_ambi_scene( source_overlap = float(scene["overlap"]) else: source_overlap = 0.0 - + # read the ambi format if "format" in scene.keys(): ambi_format = scene["format"] @@ -190,7 +208,6 @@ def generate_ambi_scene( y = audio.SceneBasedAudio(ambi_format) for i in range(N_sources): - # parse parameters from the scene description source_file = np.atleast_1d(scene["source"])[i] IR_file = np.atleast_1d(scene["IR"])[i] @@ -198,10 +215,26 @@ def generate_ambi_scene( logger.info(f"Convolving {source_file} with {IR_file}") # read source file - x = audio.fromfile("MONO", os.path.join(cfg.input_path, os.path.dirname(source_file), cfg.use_input_prefix + os.path.basename(source_file)), fs=cfg.fs) + x = audio.fromfile( + "MONO", + os.path.join( + cfg.input_path, + os.path.dirname(source_file), + cfg.use_input_prefix + os.path.basename(source_file), + ), + fs=cfg.fs, + ) # read the IR file - IR = audio.fromfile(ambi_format, os.path.join(cfg.IR_path, os.path.dirname(IR_file), cfg.use_IR_prefix + os.path.basename(IR_file)), fs=cfg.IR_fs) + IR = audio.fromfile( + ambi_format, + os.path.join( + cfg.IR_path, + os.path.dirname(IR_file), + cfg.use_IR_prefix + os.path.basename(IR_file), + ), + fs=cfg.IR_fs, + ) # convolve with the FOA/HOA2 IR if ambi_format == "FOA": @@ -289,19 +322,21 @@ def generate_ambi_scene( if cfg.add_low_level_random_noise: # create uniformly distributed noise between -4 and 4 np.random.seed(SEED_RANDOM_NOISE) - noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype( - "float" - ) + noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") # superimpose y.audio += noise # write the reverberated audio into output file audiofile.write( - os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.basename(scene["name"])), + os.path.join( + cfg.output_path, + os.path.dirname(scene["name"]), + cfg.use_output_prefix + os.path.basename(scene["name"]), + ), y.audio, y.fs, - ) + ) # convert to binaural if option chosen if cfg.binaural_path != "": @@ -312,6 +347,6 @@ def generate_ambi_scene( os.path.join(cfg.binaural_path, scene["name"]), binaudio.audio, binaudio.fs, - ) + ) return diff --git a/ivas_processing_scripts/generation/process_ism1_items.py b/ivas_processing_scripts/generation/process_ism1_items.py index 88184a39..66ce92be 100644 --- a/ivas_processing_scripts/generation/process_ism1_items.py +++ b/ivas_processing_scripts/generation/process_ism1_items.py @@ -33,8 +33,8 @@ import csv import logging import os +from itertools import groupby, repeat from math import floor -from itertools import repeat, groupby import numpy as np @@ -51,6 +51,7 @@ def csv_formatdata(data): for row in data: yield ["%0.2f" % v for v in row] + # function for searching sequences of same the same character and replacing it by another string def replace_char_seq_with_string(str, char_seq, repl_str): result = [] @@ -59,7 +60,7 @@ def replace_char_seq_with_string(str, char_seq, repl_str): groups = ["".join(list(g)) for k, g in groupby(str)] # limit the length of the replacement string by the length of the character sequence - repl_str = repl_str[:len(char_seq)] + repl_str = repl_str[: len(char_seq)] # replace each occurence of the sequence of characters for g in groups: @@ -67,7 +68,7 @@ def replace_char_seq_with_string(str, char_seq, repl_str): result.append(repl_str) else: result.append(g) - + return "".join(result) @@ -95,7 +96,7 @@ def generate_ism1_items( # set the pre-amble and post-amble if "add_low_level_random_noise" not in cfg.__dict__: cfg.add_low_level_random_noise = False - + # set the listening lab designator if "listening_lab" not in cfg.__dict__: cfg.listening_lab = "l" @@ -117,18 +118,28 @@ def generate_ism1_items( cfg.use_input_prefix = "" else: # replace file designators - cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "l", cfg.listening_lab ) - cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "LL", cfg.language ) - cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "eee", cfg.exp ) + cfg.use_input_prefix = replace_char_seq_with_string( + cfg.use_input_prefix, "l", cfg.listening_lab + ) + cfg.use_input_prefix = replace_char_seq_with_string( + cfg.use_input_prefix, "LL", cfg.language + ) + cfg.use_input_prefix = replace_char_seq_with_string( + cfg.use_input_prefix, "eee", cfg.exp + ) # set the prefix for all output filenames if "use_output_prefix" not in cfg.__dict__: cfg.use_output_prefix = None else: # replace file designators - cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "l", cfg.listening_lab ) - cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "eee", cfg.exp ) - + cfg.use_output_prefix = replace_char_seq_with_string( + cfg.use_output_prefix, "l", cfg.listening_lab + ) + cfg.use_output_prefix = replace_char_seq_with_string( + cfg.use_output_prefix, "eee", cfg.exp + ) + # set multiprocessing if "multiprocessing" not in cfg.__dict__: cfg.multiprocessing = True @@ -139,8 +150,9 @@ def generate_ism1_items( None, "mp" if cfg.multiprocessing else None, ) - - return + + return + def generate_ism1_scene( scene_name: str, scene: dict, cfg: config.TestConfig, logger: logging.Logger @@ -170,14 +182,18 @@ def generate_ism1_scene( for i in range(N_sources): # parse parameters from the scene description source_file = ( - scene["source"][i] - if isinstance(scene["source"], list) - else scene["source"] + scene["source"][i] if isinstance(scene["source"], list) else scene["source"] ) # read source file x = audio.fromfile( - "MONO", os.path.join(cfg.input_path, os.path.dirname(source_file), cfg.use_input_prefix + os.path.basename(source_file)), fs=cfg.fs + "MONO", + os.path.join( + cfg.input_path, + os.path.dirname(source_file), + cfg.use_input_prefix + os.path.basename(source_file), + ), + fs=cfg.fs, ) # get the number of frames (multiple of 20ms) @@ -221,7 +237,7 @@ def generate_ism1_scene( # process azimuth and elevation source_azi = scene["azimuth"] source_ele = scene["elevation"] - + N_frames = int(len(y.audio) / y.fs * 50) # read azimuth information and create array @@ -302,9 +318,7 @@ def generate_ism1_scene( # insert neutral position as a pre-amble to all sources N_pre = int(N_pre / frame_len) - pre = np.tile( - [0.00, 0.00, 1.00, 0.00, 1.00], (N_pre, 1) - ) + pre = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (N_pre, 1)) y_meta = np.concatenate([pre, y_meta], axis=0) if cfg.postamble != 0.0: @@ -317,29 +331,37 @@ def generate_ism1_scene( # append neutral position as a post-amble to all sources N_post = int(N_post / frame_len) - post = np.tile( - [0.00, 0.00, 1.00, 0.00, 1.00], (N_post, 1) - ) + post = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (N_post, 1)) y_meta = np.concatenate([y_meta, post], axis=0) # add random noise if cfg.add_low_level_random_noise: # create uniformly distributed noise between -4 and 4 np.random.seed(SEED_RANDOM_NOISE) - noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype( - "float" - ) + noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") # superimpose y.audio += noise # write ISM audio stream to the output file audiofile.write( - os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.basename(scene["name"])), y.audio, y.fs - ) + os.path.join( + cfg.output_path, + os.path.dirname(scene["name"]), + cfg.use_output_prefix + os.path.basename(scene["name"]), + ), + y.audio, + y.fs, + ) # write ISM metadata to the output file in .0.csv format - csv_filename = os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.splitext(os.path.basename(scene["name"]))[0] + f".0.csv") + csv_filename = os.path.join( + cfg.output_path, + os.path.dirname(scene["name"]), + cfg.use_output_prefix + + os.path.splitext(os.path.basename(scene["name"]))[0] + + f".0.csv", + ) with open( csv_filename, @@ -353,4 +375,4 @@ def generate_ism1_scene( # write all rows to the .csv file writer.writerows(csv_formatdata(y_meta)) - return \ No newline at end of file + return diff --git a/ivas_processing_scripts/generation/process_ism2_items.py b/ivas_processing_scripts/generation/process_ism2_items.py index fcf8d4b8..704c2230 100644 --- a/ivas_processing_scripts/generation/process_ism2_items.py +++ b/ivas_processing_scripts/generation/process_ism2_items.py @@ -29,12 +29,12 @@ # accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and # the United Nations Convention on Contracts on the International Sales of Goods. # -import pdb import csv import logging import os +import pdb +from itertools import groupby, repeat from math import floor -from itertools import repeat, groupby import numpy as np @@ -51,6 +51,7 @@ def csv_formatdata(data): for row in data: yield ["%0.2f" % v for v in row] + # function for searching sequences of same the same character and replacing it by another string def replace_char_seq_with_string(str, char_seq, repl_str): result = [] @@ -59,7 +60,7 @@ def replace_char_seq_with_string(str, char_seq, repl_str): groups = ["".join(list(g)) for k, g in groupby(str)] # limit the length of the replacement string by the length of the character sequence - repl_str = repl_str[:len(char_seq)] + repl_str = repl_str[: len(char_seq)] # replace each occurence of the sequence of characters for g in groups: @@ -67,7 +68,7 @@ def replace_char_seq_with_string(str, char_seq, repl_str): result.append(repl_str) else: result.append(g) - + return "".join(result) @@ -95,7 +96,7 @@ def generate_ism2_items( # set the pre-amble and post-amble if "add_low_level_random_noise" not in cfg.__dict__: cfg.add_low_level_random_noise = False - + # set the listening lab designator if "listening_lab" not in cfg.__dict__: cfg.listening_lab = "l" @@ -117,18 +118,28 @@ def generate_ism2_items( cfg.use_input_prefix = "" else: # replace file designators - cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "l", cfg.listening_lab ) - cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "LL", cfg.language ) - cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "eee", cfg.exp ) + cfg.use_input_prefix = replace_char_seq_with_string( + cfg.use_input_prefix, "l", cfg.listening_lab + ) + cfg.use_input_prefix = replace_char_seq_with_string( + cfg.use_input_prefix, "LL", cfg.language + ) + cfg.use_input_prefix = replace_char_seq_with_string( + cfg.use_input_prefix, "eee", cfg.exp + ) # set the prefix for all output filenames if "use_output_prefix" not in cfg.__dict__: cfg.use_output_prefix = None else: # replace file designators - cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "l", cfg.listening_lab ) - cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "eee", cfg.exp ) - + cfg.use_output_prefix = replace_char_seq_with_string( + cfg.use_output_prefix, "l", cfg.listening_lab + ) + cfg.use_output_prefix = replace_char_seq_with_string( + cfg.use_output_prefix, "eee", cfg.exp + ) + # set multiprocessing if "multiprocessing" not in cfg.__dict__: cfg.multiprocessing = True @@ -139,8 +150,9 @@ def generate_ism2_items( None, "mp" if cfg.multiprocessing else None, ) - - return + + return + def generate_ism2_scene( scene_name: str, scene: dict, cfg: config.TestConfig, logger: logging.Logger @@ -166,9 +178,7 @@ def generate_ism2_scene( for i in range(N_sources): # parse parameters from the scene description source_file = ( - scene["source"][i] - if isinstance(scene["source"], list) - else scene["source"] + scene["source"][i] if isinstance(scene["source"], list) else scene["source"] ) source_azi = ( scene["azimuth"][i] @@ -181,13 +191,17 @@ def generate_ism2_scene( else scene["elevation"] ) - logger.info( - f"Encoding {source_file} at position(s) {source_azi},{source_ele}" - ) + logger.info(f"Encoding {source_file} at position(s) {source_azi},{source_ele}") # read source file x = audio.fromfile( - "MONO", os.path.join(cfg.input_path, os.path.dirname(source_file), cfg.use_input_prefix + os.path.basename(source_file)), fs=cfg.fs + "MONO", + os.path.join( + cfg.input_path, + os.path.dirname(source_file), + cfg.use_input_prefix + os.path.basename(source_file), + ), + fs=cfg.fs, ) # get the number of frames (multiple of 20ms) @@ -283,9 +297,7 @@ def generate_ism2_scene( # insert neutral position as a pre-amble N_delay = int(N_delay / frame_len) # use neutral position for padding - pre = np.tile( - [0.00, 0.00, 1.00, 0.00, 1.00], (N_delay, 1) - ) + pre = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (N_delay, 1)) x_meta = np.concatenate([pre, x_meta]) # pad with zeros to ensure that the signal length is a multiple of 20ms @@ -299,9 +311,7 @@ def generate_ism2_scene( N_pad = int(len(x.audio) / frame_len) - len(x_meta) if N_pad > 0: # use neutral position for padding - post = np.tile( - [0.00, 0.00, 1.00, 0.00, 1.00], (N_pad, 1) - ) + post = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (N_pad, 1)) x_meta = np.concatenate([x_meta, post]) # add source signal to the array of all source signals @@ -370,9 +380,7 @@ def generate_ism2_scene( # insert neutral position as a pre-amble to all sources N_pre = int(N_pre / frame_len) - pre = np.tile( - [0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_pre, 1) - ) + pre = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_pre, 1)) y_meta = np.concatenate([pre, y_meta], axis=1) if cfg.postamble != 0.0: @@ -385,31 +393,39 @@ def generate_ism2_scene( # append neutral position as a post-amble to all sources N_post = int(N_post / frame_len) - post = np.tile( - [0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_post, 1) - ) + post = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_post, 1)) y_meta = np.concatenate([y_meta, post], axis=1) # add random noise if cfg.add_low_level_random_noise: # create uniformly distributed noise between -4 and 4 np.random.seed(SEED_RANDOM_NOISE) - noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype( - "float" - ) + noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float") # superimpose y.audio += noise # write individual ISM audio streams to the output file in an interleaved format audiofile.write( - os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.basename(scene["name"])), y.audio, y.fs - ) + os.path.join( + cfg.output_path, + os.path.dirname(scene["name"]), + cfg.use_output_prefix + os.path.basename(scene["name"]), + ), + y.audio, + y.fs, + ) # write individual ISM metadata to output files in .csv format for i in range(N_sources): # generate .csv filename (should end with .0.csv, .1.csv, ...) - csv_filename = os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.splitext(os.path.basename(scene["name"]))[0] + f".{i}.csv") + csv_filename = os.path.join( + cfg.output_path, + os.path.dirname(scene["name"]), + cfg.use_output_prefix + + os.path.splitext(os.path.basename(scene["name"]))[0] + + f".{i}.csv", + ) with open( csv_filename, @@ -423,4 +439,4 @@ def generate_ism2_scene( # write all rows to the .csv file writer.writerows(csv_formatdata(y_meta[i])) - return \ No newline at end of file + return diff --git a/ivas_processing_scripts/generation/process_stereo_items.py b/ivas_processing_scripts/generation/process_stereo_items.py index 7f4ca115..4d58eb22 100644 --- a/ivas_processing_scripts/generation/process_stereo_items.py +++ b/ivas_processing_scripts/generation/process_stereo_items.py @@ -32,7 +32,7 @@ import logging import os -from itertools import repeat, groupby +from itertools import groupby, repeat from math import floor import numpy as np @@ -51,6 +51,7 @@ def csv_formatdata(data): for row in data: yield ["%0.2f" % v for v in row] + # function for searching sequences of same the same character and replacing it by another string def replace_char_seq_with_string(str, char_seq, repl_str): result = [] @@ -59,7 +60,7 @@ def replace_char_seq_with_string(str, char_seq, repl_str): groups = ["".join(list(g)) for k, g in groupby(str)] # limit the length of the replacement string by the length of the character sequence - repl_str = repl_str[:len(char_seq)] + repl_str = repl_str[: len(char_seq)] # replace each occurence of the sequence of characters for g in groups: @@ -67,9 +68,9 @@ def replace_char_seq_with_string(str, char_seq, repl_str): result.append(repl_str) else: result.append(g) - + return "".join(result) - + def generate_stereo_items( cfg: config.TestConfig, @@ -125,27 +126,43 @@ def generate_stereo_items( cfg.use_input_prefix = "" else: # replace file designators - cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "l", cfg.listening_lab ) - cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "LL", cfg.language ) - cfg.use_input_prefix = replace_char_seq_with_string(cfg.use_input_prefix, "eee", cfg.exp ) + cfg.use_input_prefix = replace_char_seq_with_string( + cfg.use_input_prefix, "l", cfg.listening_lab + ) + cfg.use_input_prefix = replace_char_seq_with_string( + cfg.use_input_prefix, "LL", cfg.language + ) + cfg.use_input_prefix = replace_char_seq_with_string( + cfg.use_input_prefix, "eee", cfg.exp + ) # set the prefix for all IR filenames if "use_IR_prefix" not in cfg.__dict__: cfg.use_IR_prefix = "" else: # replace file designators - cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "p", cfg.provider ) - cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "LL", cfg.language ) - cfg.use_IR_prefix = replace_char_seq_with_string(cfg.use_IR_prefix, "eee", cfg.exp ) + cfg.use_IR_prefix = replace_char_seq_with_string( + cfg.use_IR_prefix, "p", cfg.provider + ) + cfg.use_IR_prefix = replace_char_seq_with_string( + cfg.use_IR_prefix, "LL", cfg.language + ) + cfg.use_IR_prefix = replace_char_seq_with_string( + cfg.use_IR_prefix, "eee", cfg.exp + ) # set the prefix for all output filenames if "use_output_prefix" not in cfg.__dict__: cfg.use_output_prefix = None else: # replace file designators - cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "l", cfg.listening_lab ) - cfg.use_output_prefix = replace_char_seq_with_string(cfg.use_output_prefix, "eee", cfg.exp ) - + cfg.use_output_prefix = replace_char_seq_with_string( + cfg.use_output_prefix, "l", cfg.listening_lab + ) + cfg.use_output_prefix = replace_char_seq_with_string( + cfg.use_output_prefix, "eee", cfg.exp + ) + # set multiprocessing if "multiprocessing" not in cfg.__dict__: cfg.multiprocessing = True @@ -156,7 +173,7 @@ def generate_stereo_items( None, "mp" if cfg.multiprocessing else None, ) - + return @@ -178,7 +195,6 @@ def generate_stereo_scene( y = audio.ChannelBasedAudio("STEREO") for i in range(N_sources): - # parse parameters from the scene description source_file = np.atleast_1d(scene["source"])[i] IR_file = np.atleast_1d(scene["IR"])[i] @@ -186,10 +202,26 @@ def generate_stereo_scene( logger.info(f"Convolving {source_file} with {IR_file}") # read source file - x = audio.fromfile("MONO", os.path.join(cfg.input_path, os.path.dirname(source_file), cfg.use_input_prefix + os.path.basename(source_file)), fs=cfg.fs) + x = audio.fromfile( + "MONO", + os.path.join( + cfg.input_path, + os.path.dirname(source_file), + cfg.use_input_prefix + os.path.basename(source_file), + ), + fs=cfg.fs, + ) # read the IR file - IR = audio.fromfile("STEREO", os.path.join(cfg.IR_path, os.path.dirname(IR_file), cfg.use_IR_prefix + os.path.basename(IR_file)), fs=cfg.IR_fs) + IR = audio.fromfile( + "STEREO", + os.path.join( + cfg.IR_path, + os.path.dirname(IR_file), + cfg.use_IR_prefix + os.path.basename(IR_file), + ), + fs=cfg.IR_fs, + ) # convolve with stereo IR x = reverb_stereo(x, IR) @@ -281,7 +313,11 @@ def generate_stereo_scene( # write the reverberated audio into output file audiofile.write( - os.path.join(cfg.output_path, os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.basename(scene["name"])), + os.path.join( + cfg.output_path, + os.path.dirname(scene["name"]), + cfg.use_output_prefix + os.path.basename(scene["name"]), + ), y.audio, y.fs, - ) + ) diff --git a/ivas_processing_scripts/processing/processing.py b/ivas_processing_scripts/processing/processing.py index 093fa91b..6a480b7f 100755 --- a/ivas_processing_scripts/processing/processing.py +++ b/ivas_processing_scripts/processing/processing.py @@ -527,7 +527,13 @@ def multiple_of_frame_size( ) # Create and append zeros - padded_data = trim(x, sampling_rate, (0, -padding_samples), pad_noise=True, samples=True) + padded_data = trim( + x, + sampling_rate, + (0, -padding_samples), + pad_noise=True, + samples=True, + ) # Write padded data to output directory write(output_dir / item.name, padded_data, fs) else: -- GitLab From 104bdf9573b29062d329bcfcb7590a304f36ad14 Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Thu, 8 Jun 2023 11:00:26 +0200 Subject: [PATCH 10/11] removing obsolete .yml files --- item_gen_configs/FOA_CONFIG.yml | 79 ------- item_gen_configs/HOA2_CONFIG.yml | 61 ------ item_gen_configs/ISM1_CONFIG.yml | 303 -------------------------- item_gen_configs/ISM2_CONFIG.yml | 339 ----------------------------- item_gen_configs/STEREO_CONFIG.yml | 303 -------------------------- 5 files changed, 1085 deletions(-) delete mode 100644 item_gen_configs/FOA_CONFIG.yml delete mode 100644 item_gen_configs/HOA2_CONFIG.yml delete mode 100644 item_gen_configs/ISM1_CONFIG.yml delete mode 100644 item_gen_configs/ISM2_CONFIG.yml delete mode 100644 item_gen_configs/STEREO_CONFIG.yml diff --git a/item_gen_configs/FOA_CONFIG.yml b/item_gen_configs/FOA_CONFIG.yml deleted file mode 100644 index 88f77c4a..00000000 --- a/item_gen_configs/FOA_CONFIG.yml +++ /dev/null @@ -1,79 +0,0 @@ ---- -################################################ -# General configuration -################################################ - -### Output format -format: "FOA" - -### Output sampling rate in Hz needed for headerless audio files; default = 48000 -fs: 48000 - -### IR sampling rate in Hz needed for headerless audio files; default = 48000 -IR_fs: 48000 - -### Any relative paths will be interpreted relative to the working directory the script is called from! -### Usage of absolute paths is recommended. -### Do not use file names with dots "." in them! This is not supported, use "_" instead -### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions - -### Input path to mono files -input_path: "./items_mono" - -### Input path to stereo impulse response files, default = './ivas_processing_scripts/generation/IR' -IR_path: "./IR_FOA" - -### Output path for generated test items and metadata files -output_path: "./items_FOA" - -### Target loudness in LKFS; default = null (no loudness normalization applied) -loudness: -26 - -### Pre-amble and Post-amble length in seconds (default = 0.0) -preamble: 1.0 -postamble: 1.0 - -### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence) -add_low_level_random_noise: true - - -################################################ -### Scene description -################################################ - -### Each scene must start with the sceneN tag -### Specify the mono source filename (the program will search for it in the input_path folder) -### Specify the stereo IR source filename (the program will search for it in the input_path_IR folder) -### Specify the overlap length in seconds for each input source (negative value creates a gap) -### Note 1: use [val1, val2, ...] for multiple sources in a scene -### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames - -scenes: - a1: - name: "G1S1.wav" - description: "Two speakers sitting in a car." - source: ["fa1.wav", "ma1.wav"] - IR: ["AmbiX_036x033y_0.01m.wav", "AmbiX_-70x035y_0.01m.wav"] - overlap: 1.0 - - a2: - name: "G6S2.wav" - description: "Two speakers sitting in a car." - source: ["fa1.wav", "ma1.wav"] - IR: ["AmbiX_-70x035y_0.01m.wav", "AmbiX_036x033y_0.01m.wav"] - overlap: 1.0 - - a3: - name: "G2S1.wav" - description: "Two speakers sitting in a car." - source: ["fa1.wav", "ma1.wav"] - IR: ["AmbiX_000x005y_0.1m.wav", "AmbiX_022x-38y_0.01m.wav"] - overlap: -1.0 - - a4: - name: "G2S2.wav" - description: "Two speakers sitting in a car." - source: ["fa1.wav", "ma1.wav"] - IR: ["AmbiX_022x-38y_0.01m.wav", "AmbiX_000x005y_0.1m.wav"] - overlap: -1.0 - diff --git a/item_gen_configs/HOA2_CONFIG.yml b/item_gen_configs/HOA2_CONFIG.yml deleted file mode 100644 index 3be0c57d..00000000 --- a/item_gen_configs/HOA2_CONFIG.yml +++ /dev/null @@ -1,61 +0,0 @@ ---- -################################################ -# General configuration -################################################ - -### Output format -format: "HOA2" - -### Output sampling rate in Hz needed for headerless audio files; default = 48000 -fs: 48000 - -### IR sampling rate in Hz needed for headerless audio files; default = 48000 -IR_fs: 48000 - -### Any relative paths will be interpreted relative to the working directory the script is called from! -### Usage of absolute paths is recommended. -### Do not use file names with dots "." in them! This is not supported, use "_" instead -### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions - -### Input path to mono files -input_path: "./items_mono" - -### Input path to stereo impulse response files, default = './ivas_processing_scripts/generation/IR' -IR_path: "./IRs" - -### Output path for generated test items and metadata files -output_path: "./items_HOA2" - -### (Optional) Output path for binauralized versions of the generated HOA2 items -# binaural_path: "./items_HOA2_bin" - -### Target loudness in LKFS; default = null (no loudness normalization applied) -loudness: -26 - -### Pre-amble and Post-amble length in seconds (default = 0.0) -preamble: 0.5 -postamble: 1.0 - -### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence) -add_low_level_random_noise: true - - -################################################ -### Scene description -################################################ - -### Each scene must start with the sceneN tag -### Specify the mono source filename (the program will search for it in the input_path folder) -### Specify the stereo IR source filename (the program will search for it in the input_path_IR folder) -### Specify the overlap length in seconds for each input source (negative value creates a gap) -### Note 1: use [val1, val2, ...] for multiple sources in a scene -### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames - -scenes: - a1: - name: "out.wav" - description: "" - source: ["fa1.wav", "ma1.wav"] - IR: ["IR_HOA2_env1/FreefieldFloor_TalkPos1_EigenHoA2_SinSweep_9chn.wav", "IR_HOA2_env1/FreefieldFloor_TalkPos2_EigenHoA2_SinSweep_9chn.wav"] - overlap: -0.2 - diff --git a/item_gen_configs/ISM1_CONFIG.yml b/item_gen_configs/ISM1_CONFIG.yml deleted file mode 100644 index 0f26866a..00000000 --- a/item_gen_configs/ISM1_CONFIG.yml +++ /dev/null @@ -1,303 +0,0 @@ ---- -################################################ -# General configuration -################################################ - -### Output format -format: "ISM1" - -### Output sampling rate in Hz needed for headerless audio files; default = 48000 -fs: 48000 - -### Any relative paths will be interpreted relative to the working directory the script is called from! -### Usage of absolute paths is recommended. -### Do not use file names with dots "." in them! This is not supported, use "_" instead -### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions - -### Input path to mono files -input_path: "./items_mono" - -### Output path for generated test items and metadata files -output_path: "./items_ISM1" - -### Target loudness in LKFS; default = null (no loudness normalization applied) -loudness: -26 - -### Pre-amble and Post-amble length in seconds (default = 0.0) -preamble: 0.5 -postamble: 0.5 - -### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence) -add_low_level_random_noise: true - - -################################################ -### Scene description -################################################ - -### Each scene must start with the sceneN tag -### Specify the mono source filename (the program will search for it in the input_path folder) -### Specify azimuth and elevation for each input source -### Note 1: use [val1, val2, ...] for multiple sources in a scene -### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames - -### Note 3: we're using right-handed coordinate system with azi = 0 pointing from the nose to the screen -### azimuth: float, [-180,180]; positive indicates left -### elevation: float, [-90,90]; positive indicates up -### distance: float, tbd: default: 1 -### spread: float, [0,360]; spread in angles from 0 ... 360˚ -### gain: float, [0,1] - -scenes: - a1: - name: "G1S1.wav" - description: "Talker sitting at a table" - source: "test_single.wav" - azimuth: 0 - elevation: 0 - - a2: - name: "G6S2.wav" - description: "Talker sitting at a table" - source: "test_single.wav" - azimuth: 60 - elevation: 0 - - a3: - name: "G5S3.wav" - description: "Talker sitting at a table" - source: "test_single.wav" - azimuth: 120 - elevation: 0 - - a4: - name: "G4S4.wav" - description: "Talker sitting at a table" - source: "test_single.wav" - azimuth: 180 - elevation: 0 - - a5: - name: "G3S5.wav" - description: "Talker sitting at a table" - source: "test_single.wav" - azimuth: 240 - elevation: 0 - - a6: - name: "G2S6.wav" - description: "Talker sitting at a table" - source: "test_single.wav" - azimuth: 300 - elevation: 0 - - b1: - name: "G2S1.wav" - description: "standing talker." - source: "test_single.wav" - azimuth: 120 - elevation: 35 - - b2: - name: "G1S2.wav" - description: "standing talker." - source: "test_single.wav" - azimuth: 180 - elevation: 35 - - b3: - name: "G6S3.wav" - description: "standing talker." - source: "test_single.wav" - azimuth: 240 - elevation: 35 - - b4: - name: "G5S4.wav" - description: "standing talker." - source: "test_single.wav" - azimuth: 300 - elevation: 35 - - b5: - name: "G4S5.wav" - description: "standing talker." - source: "test_single.wav" - azimuth: 0 - elevation: 35 - - b6: - name: "G3S6.wav" - description: "standing talker." - source: "test_single.wav" - azimuth: 60 - elevation: 35 - - c1: - name: "G3S1.wav" - description: "Smaller talker (child) walking around a table." - source: "test_single.wav" - azimuth: "0:1:360" - elevation: 0 - - c2: - name: "G2S2.wav" - description: "Smaller talker (child) walking around a table." - source: "test_single.wav" - azimuth: "60:1:60+360" - elevation: 0 - - c3: - name: "G1S3.wav" - description: "Smaller talker (child) walking around a table." - source: "test_single.wav" - azimuth: "120:1:120+360" - elevation: 0 - - c4: - name: "G6S4.wav" - description: "Smaller talker (child) walking around a table." - source: "test_single.wav" - azimuth: "180:1:180+360" - elevation: 0 - - c5: - name: "G5S5.wav" - description: "Smaller talker (child) walking around a table." - source: "test_single.wav" - azimuth: "240:1:240+360" - elevation: 0 - - c6: - name: "G4S6.wav" - description: "Smaller talker (child) walking around a table." - source: "test_single.wav" - azimuth: "300:1:300+360" - elevation: 0 - - d1: - name: "G4S1.wav" - description: "Talker walking around the table." - source: "test_single.wav" - azimuth: "0:-1:-360" - elevation: 35 - - d2: - name: "G3S2.wav" - description: "Talker walking around the table." - source: "test_single.wav" - azimuth: "60:-1:60-360" - elevation: 35 - - d3: - name: "G2S3.wav" - description: "Talker walking around the table." - source: "test_single.wav" - azimuth: "120:-1:120-360" - elevation: 35 - - d4: - name: "G1S4.wav" - description: "Talker walking around the table." - source: "test_single.wav" - azimuth: "180:-1:180-360" - elevation: 35 - - d5: - name: "G6S5.wav" - description: "Talker walking around the table." - source: "test_single.wav" - azimuth: "240:-1:240-360" - elevation: 35 - - d6: - name: "G5S6.wav" - description: "Talker walking around the table." - source: "test_single.wav" - azimuth: "300:-1:300-360" - elevation: 35 - - e1: - name: "G5S1.wav" - description: "Elevation displacement." - source: "test_single.wav" - azimuth: 240 - elevation: "-90:0.5:90" - - e2: - name: "G4S2.wav" - description: "Elevation displacement." - source: "test_single.wav" - azimuth: 300 - elevation: 0 - - e3: - name: "G3S3.wav" - description: "Elevation displacement." - source: "test_single.wav" - azimuth: 0 - elevation: "-90:0.5:90" - - e4: - name: "G2S4.wav" - description: "Elevation displacement." - source: "test_single.wav" - azimuth: 60 - elevation: "-90:0.5:90" - - e5: - name: "G1S5.wav" - description: "Elevation displacement." - source: "test_single.wav" - azimuth: 120 - elevation: "-90:0.5:90" - - e6: - name: "G6S6.wav" - description: "Elevation displacement." - source: "test_single.wav" - azimuth: 180 - elevation: "-90:0.5:90" - - f1: - name: "G6S1.wav" - description: "Azimuth and elevation displacement." - source: "test_single.wav" - azimuth: "60:0.5:60+180" - elevation: "35:-0.2:-35" - - f2: - name: "G5S2.wav" - description: "Azimuth and elevation displacement." - source: "test_single.wav" - azimuth: "120:0.5:120+180" - elevation: "35:-0.2:-35" - - f3: - name: "G4S3.wav" - description: "Azimuth and elevation displacement." - source: "test_single.wav" - azimuth: "180:0.5:180+180" - elevation: "35:-0.2:-35" - - f4: - name: "G3S4.wav" - description: "Azimuth and elevation displacement." - source: "test_single.wav" - azimuth: "240:0.5:240+180" - elevation: "35:-0.2:-35" - - f5: - name: "G2S5.wav" - description: "Azimuth and elevation displacement." - source: "test_single.wav" - azimuth: "300:0.5:300+180" - elevation: "35:-0.2:-35" - - f6: - name: "G1S6.wav" - description: "Azimuth and elevation displacement." - source: "test_single.wav" - azimuth: "0:0.5:0+180" - elevation: "35:-0.2:-35" - \ No newline at end of file diff --git a/item_gen_configs/ISM2_CONFIG.yml b/item_gen_configs/ISM2_CONFIG.yml deleted file mode 100644 index c9b749a5..00000000 --- a/item_gen_configs/ISM2_CONFIG.yml +++ /dev/null @@ -1,339 +0,0 @@ ---- -################################################ -# General configuration -################################################ - -### Output format -format: "ISM2" - -### Output sampling rate in Hz needed for headerless audio files; default = 48000 -fs: 48000 - -### Any relative paths will be interpreted relative to the working directory the script is called from! -### Usage of absolute paths is recommended. -### Do not use file names with dots "." in them! This is not supported, use "_" instead -### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions - -### Input path to mono files -input_path: "./items_mono" - -### Output path for generated test items and metadata files -output_path: "./items_ISM2" - -### Target loudness in LKFS; default = null (no loudness normalization applied) -loudness: -26 - -### Pre-amble and Post-amble length in seconds (default = 0.0) -preamble: 1.0 -postamble: 1.0 - -### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence) -add_low_level_random_noise: true - -################################################ -### Scene description -################################################ - -### Each scene must start with the sceneN tag -### Specify the mono source filename (the program will search for it in the input_path folder) -### Specify azimuth and elevation for each input source -### Specify the overlap length in seconds for each input source (negative value creates a gap) -### Note 1: use [val1, val2, ...] for multiple sources in a scene -### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames - -### Note 3: we're using right-handed coordinate system with azi = 0 pointing from the nose to the screen -### azimuth: float, [-180,180]; positive indicates left -### elevation: float, [-90,90]; positive indicates up -### distance: float, tbd: default: 1 -### spread: float, [0,360]; spread in angles from 0 ... 360˚ -### gain: float, [0,1] - -scenes: - a1: - name: "G1S1.wav" - description: "two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: [0, 50] - elevation: [0, 0] - overlap: -1.0 - - a2: - name: "G6S2.wav" - description: "two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: [50, 350] - elevation: [0, 0] - overlap: -1.0 - - a3: - name: "G5S3.wav" - description: "two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: [40, 290] - elevation: [0, 0] - overlap: -1.0 - - a4: - name: "G4S4.wav" - description: "two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: [30, 230] - elevation: [15, 15] - overlap: -1.0 - - a5: - name: "G3S5.wav" - description: "two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: [20, 170] - elevation: [15, 15] - overlap: -1.0 - - a6: - name: "G2S6.wav" - description: "two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: [10, 110] - elevation: [15, 15] - overlap: -1.0 - - b1: - name: "G2S1.wav" - description: "two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: [20, 170] - elevation: [30, 30] - overlap: 1.0 - - b2: - name: "G1S2.wav" - description: "two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: [10, 110] - elevation: [30, 30] - overlap: 1.0 - - b3: - name: "G6S3.wav" - description: "two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: [0, 50] - elevation: [30, 30] - overlap: 1.0 - - b4: - name: "G5S4.wav" - description: "two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: [50, 350] - elevation: [60, 60] - overlap: 1.0 - - b5: - name: "G4S5.wav" - description: "two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: [40, 290] - elevation: [60, 60] - overlap: 1.0 - - b6: - name: "G3S6.wav" - description: "two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: [30, 230] - elevation: [60, 60] - overlap: 1.0 - - c1: - name: "G3S1.wav" - description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: [40, 290] - elevation: [0, 60] - overlap: -1.0 - - c2: - name: "G2S2.wav" - description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: [30, 230] - elevation: [0, 60] - overlap: -1.0 - - c3: - name: "G1S3.wav" - description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: [20, 170] - elevation: [0, 60] - overlap: -1.0 - - c4: - name: "G6S4.wav" - description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: [10, 110] - elevation: [0, 60] - overlap: -1.0 - - c5: - name: "G5S5.wav" - description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: [0, 50] - elevation: [0, 60] - overlap: -1.0 - - c6: - name: "G4S6.wav" - description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: [50, 350] - elevation: [0, 60] - overlap: -1.0 - - d1: - name: "G4S1.wav" - description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: [50, "180:1:120 + 360"] - elevation: [0, 60] - overlap: 1.0 - - d2: - name: "G3S2.wav" - description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: [300, "-70:-1:-10 - 360"] - elevation: [0, 60] - overlap: 1.0 - - d3: - name: "G2S3.wav" - description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: [250, "-20:-1:-320"] - elevation: [0, 60] - overlap: 1.0 - - d4: - name: "G1S4.wav" - description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: [200, "30:-1:-270"] - elevation: [0, 60] - overlap: 1.0 - - d5: - name: "G6S5.wav" - description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: [150, "80:1:20 + 360"] - elevation: [0, 60] - overlap: 1.0 - - d6: - name: "G5S6.wav" - description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: [100, "130:1:70 + 360"] - elevation: [0, 60] - overlap: 1.0 - - e1: - name: "G5S1.wav" - description: "two talkers walking side-by-side around the table, ~30% overlapping utterances" - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: ["80:1:20 + 360", "80:1:20 + 360"] - elevation: [10, 60] - overlap: 1.0 - - e2: - name: "G4S2.wav" - description: "two talkers walking side-by-side around the table, ~30% overlapping utterances" - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: ["130:1:70 + 360", "130:1:70 + 360"] - elevation: [10, 60] - overlap: 1.0 - - e3: - name: "G3S3.wav" - description: "two talkers walking side-by-side around the table, ~30% overlapping utterances" - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: ["180:1:120 + 360", "180:1:120 + 360"] - elevation: [10, 60] - overlap: 1.0 - - e4: - name: "G2S4.wav" - description: "two talkers walking side-by-side around the table, ~30% overlapping utterances" - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: ["-70:-1:-10 - 360", "-70:-1:-10 - 360"] - elevation: [10, 60] - overlap: 1.0 - - e5: - name: "G1S5.wav" - description: "two talkers walking side-by-side around the table, ~30% overlapping utterances" - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: ["-20:-1:-320", "-20:-1:-320"] - elevation: [10, 60] - overlap: 1.0 - - e6: - name: "G6S6.wav" - description: "two talkers walking side-by-side around the table, ~30% overlapping utterances" - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: ["30:-1:-270", "30:-1:-270"] - elevation: [10, 60] - overlap: 1.0 - - f1: - name: "G6S1.wav" - description: "two talkers walking around the table in opposite directions, non-overlapping utterances." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: ["60:1:0 + 360", "60:-1:120 - 360"] - elevation: [20, 50] - overlap: -1.0 - - f2: - name: "G5S2.wav" - description: "two talkers walking around the table in opposite directions, non-overlapping utterances." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: ["0:1:300", "0:-1:60 - 360"] - elevation: [20, 50] - overlap: -1.0 - - f3: - name: "G4S3.wav" - description: "two talkers walking around the table in opposite directions, non-overlapping utterances." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: ["300:1:240 + 360", "300:-1:0"] - elevation: [20, 50] - overlap: -1.0 - - f4: - name: "G3S4.wav" - description: "two talkers walking around the table in opposite directions, non-overlapping utterances." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: ["240:1:180 + 360", "240:-1:-60"] - elevation: [20, 50] - overlap: -1.0 - - f5: - name: "G2S5.wav" - description: "two talkers walking around the table in opposite directions, non-overlapping utterances." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: ["180:1:120 + 360", "180:-1:-120"] - elevation: [20, 50] - overlap: -1.0 - - f6: - name: "G1S6.wav" - description: "two talkers walking around the table in opposite directions, non-overlapping utterances." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - azimuth: ["120:1:60 + 360", "120:-1:180 - 360"] - elevation: [20, 50] - overlap: -1.0 - \ No newline at end of file diff --git a/item_gen_configs/STEREO_CONFIG.yml b/item_gen_configs/STEREO_CONFIG.yml deleted file mode 100644 index 7dd1a956..00000000 --- a/item_gen_configs/STEREO_CONFIG.yml +++ /dev/null @@ -1,303 +0,0 @@ ---- -################################################ -# General configuration -################################################ - -### Output format -format: "STEREO" - -### Output sampling rate in Hz needed for headerless audio files; default = 48000 -fs: 48000 - -### IR sampling rate in Hz needed for headerless audio files; default = 48000 -IR_fs: 32000 - -### Any relative paths will be interpreted relative to the working directory the script is called from! -### Usage of absolute paths is recommended. -### Do not use file names with dots "." in them! This is not supported, use "_" instead -### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions - -### Input path to mono files -input_path: "./items_mono" - -### Input path to stereo impulse response files, default = './ivas_processing_scripts/generation/IR' -# IR_path: "./IR" - -### Output path for generated test items and metadata files -output_path: "./items_STEREO" - -### Target loudness in LKFS; default = null (no loudness normalization applied) -loudness: -26 - -### Pre-amble and Post-amble length in seconds (default = 0.0) -preamble: 1.0 -postamble: 1.0 - -### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence) -add_low_level_random_noise: true - - -################################################ -### Scene description -################################################ - -### Each scene must start with the sceneN tag -### Specify the mono source filename (the program will search for it in the input_path folder) -### Specify the stereo IR source filename (the program will search for it in the input_path_IR folder) -### Specify the overlap length in seconds for each input source (negative value creates a gap) -### Note 1: use [val1, val2, ...] for multiple sources in a scene -### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames - -scenes: - a1: - name: "G1S1.wav" - description: "Two speakers sitting at oval table in opposite corners in a large echoic conference room." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - IR: ["LEABP04.wav", "LEABP11.wav"] - overlap: 1.0 - - a2: - name: "G6S2.wav" - description: "Two speakers sitting at oval table in opposite corners in a large echoic conference room." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - IR: ["LEABP05.wav", "LEABP11.wav"] - overlap: 1.0 - - a3: - name: "G5S3.wav" - description: "Two speakers sitting at oval table in opposite corners in a large echoic conference room." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - IR: ["LEABP06.wav", "LEABP11.wav"] - overlap: 1.0 - - a4: - name: "G4S4.wav" - description: "Two speakers sitting at oval table in opposite corners in a large echoic conference room." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - IR: ["LEABP05.wav", "LEABP10.wav"] - overlap: -1.0 - - a5: - name: "G3S5.wav" - description: "Two speakers sitting at oval table in opposite corners in a large echoic conference room." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - IR: ["LEABP05.wav", "LEABP11.wav"] - overlap: -1.0 - - a6: - name: "G2S6.wav" - description: "Two speakers sitting at oval table in opposite corners in a large echoic conference room." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - IR: ["LEABP05.wav", "LEABP12.wav"] - overlap: -1.0 - - b1: - name: "G2S1.wav" - description: "Two speakers sitting at oval table side by side in a large anechoic conference room." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - IR: ["LAABP05.wav", "LAABP06.wav"] - overlap: -1.0 - - b2: - name: "G1S2.wav" - description: "Two speakers sitting at oval table side by side in a large anechoic conference room." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - IR: ["LAABP07.wav", "LAABP08.wav"] - overlap: 1.0 - - b3: - name: "G6S3.wav" - description: "Two speakers sitting at oval table side by side in a large anechoic conference room." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - IR: ["LAABP09.wav", "LAABP10.wav"] - overlap: 1.0 - - b4: - name: "G5S4.wav" - description: "Two speakers sitting at oval table side by side in a large anechoic conference room." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - IR: ["LAABP11.wav", "LAABP12.wav"] - overlap: -1.0 - - b5: - name: "G4S5.wav" - description: "Two speakers sitting at oval table side by side in a large anechoic conference room." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - IR: ["LAABP01.wav", "LAABP02.wav"] - overlap: -1.0 - - b6: - name: "G3S6.wav" - description: "Two speakers sitting at oval table side by side in a large anechoic conference room." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - IR: ["LAABP03.wav", "LAABP04.wav"] - overlap: -1.0 - - c1: - name: "G3S1.wav" - description: "One talker sitting at table in a small anechoic conference room." - source: ["test_single.wav"] - IR: ["SAMSP01.wav"] - overlap: -1.0 - - c2: - name: "G2S2.wav" - description: "One talker sitting at table in a small anechoic conference room." - source: ["test_single.wav"] - IR: ["SAMSP04.wav"] - overlap: -1.0 - - c3: - name: "G1S3.wav" - description: "One talker sitting at table in a small anechoic conference room." - source: ["test_single.wav"] - IR: ["SAMSP07.wav"] - overlap: -1.0 - - c4: - name: "G6S4.wav" - description: "One talker sitting at table in a small echoic conference room." - source: ["test_single.wav"] - IR: ["SEABP01.wav"] - overlap: -1.0 - - c5: - name: "G5S5.wav" - description: "One talker sitting at table in a small echoic conference room." - source: ["test_single.wav"] - IR: ["SEABP03.wav"] - overlap: -1.0 - - c6: - name: "G4S6.wav" - description: "One talker sitting at table in a small echoic conference room." - source: ["test_single.wav"] - IR: ["SEABP06.wav"] - overlap: -1.0 - - d1: - name: "G4S1.wav" - description: "One talker sitting at table in a small anechoic conference room." - source: ["test_single.wav"] - IR: ["SEBIP01.wav"] - overlap: -1.0 - - d2: - name: "G3S2.wav" - description: "One talker sitting at table in a small anechoic conference room." - source: ["test_single.wav"] - IR: ["SEBIP04.wav"] - overlap: -1.0 - - d3: - name: "G3S2.wav" - description: "One talker sitting at table in a small anechoic conference room." - source: ["test_single.wav"] - IR: ["SEBIP07.wav"] - overlap: -1.0 - - d4: - name: "G1S4.wav" - description: "One talker sitting at table in a small echoic conference room." - source: ["test_single.wav"] - IR: ["SEBIP07.wav"] - overlap: -1.0 - - d5: - name: "G6S5.wav" - description: "One talker sitting at table in a small echoic conference room." - source: ["test_single.wav"] - IR: ["SEBIP07.wav"] - overlap: -1.0 - - d6: - name: "G5S6.wav" - description: "One talker sitting at table in a small echoic conference room." - source: ["test_single.wav"] - IR: ["SEBIP07.wav"] - overlap: -1.0 - - e1: - name: "G5S1.wav" - description: "Two talkers sitting in a room." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - IR: ["SEMSP01.wav", "SEMSP03.wav"] - overlap: 1.0 - - e2: - name: "G4S2.wav" - description: "Two talkers sitting in a room." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - IR: ["SEMSP01.wav", "SEMSP05.wav"] - overlap: 1.0 - - e3: - name: "G3S3.wav" - description: "Two talkers sitting in a room." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - IR: ["SEMSP01.wav", "SEMSP07.wav"] - overlap: 1.0 - - e4: - name: "G2S4.wav" - description: "Two talkers sitting in a room." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - IR: ["SEMSP03.wav", "SEMSP04.wav"] - overlap: -1.0 - - e5: - name: "G1S5.wav" - description: "Two talkers sitting in a room." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - IR: ["SEMSP05.wav", "SEMSP07.wav"] - overlap: -1.0 - - e6: - name: "G6S6.wav" - description: "Two talkers sitting in a room." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - IR: ["SEMSP06.wav", "SEMSP02.wav"] - overlap: -1.0 - - f1: - name: "G6S1.wav" - description: "Two talkers sitting in a room." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - IR: ["SEBIP05.wav", "SEBIP01.wav"] - overlap: 1.0 - - f2: - name: "G5S2.wav" - description: "Two talkers sitting in a room." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - IR: ["SEBIP07.wav", "SEBIP01.wav"] - overlap: 1.0 - - f3: - name: "G4S3.wav" - description: "Two talkers sitting in a room." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - IR: ["SEBIP04.wav", "SEBIP01.wav"] - overlap: 1.0 - - f4: - name: "G3S4.wav" - description: "Two talkers sitting in a room." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - IR: ["SEBIP02.wav", "SEBIP06.wav"] - overlap: -1.0 - - f5: - name: "G2S5.wav" - description: "Two talkers sitting in a room." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - IR: ["SEBIP02.wav", "SEBIP06.wav"] - overlap: -1.0 - - f6: - name: "G1S6.wav" - description: "Two talkers sitting in a room." - source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] - IR: ["SEBIP03.wav", "SEBIP04.wav"] - overlap: -1.0 - \ No newline at end of file -- GitLab From 212fcff7b17cb349f03dd8e1a8a5356b58628679 Mon Sep 17 00:00:00 2001 From: Vladimir Malenovsky Date: Thu, 8 Jun 2023 12:19:31 +0200 Subject: [PATCH 11/11] formatting --- ivas_processing_scripts/generation/process_ambi_items.py | 3 --- ivas_processing_scripts/generation/process_ism1_items.py | 2 +- ivas_processing_scripts/generation/process_ism2_items.py | 2 +- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/ivas_processing_scripts/generation/process_ambi_items.py b/ivas_processing_scripts/generation/process_ambi_items.py index 11dea1d2..64176841 100644 --- a/ivas_processing_scripts/generation/process_ambi_items.py +++ b/ivas_processing_scripts/generation/process_ambi_items.py @@ -78,9 +78,6 @@ def generate_ambi_items( ): """Generate FOA/HOA2 items from mono items based on scene description""" - # get the number of scenes - N_scenes = len(cfg.scenes) - # set the target level if "loudness" not in cfg.__dict__: cfg.loudness = -26 diff --git a/ivas_processing_scripts/generation/process_ism1_items.py b/ivas_processing_scripts/generation/process_ism1_items.py index 66ce92be..e78c9a57 100644 --- a/ivas_processing_scripts/generation/process_ism1_items.py +++ b/ivas_processing_scripts/generation/process_ism1_items.py @@ -360,7 +360,7 @@ def generate_ism1_scene( os.path.dirname(scene["name"]), cfg.use_output_prefix + os.path.splitext(os.path.basename(scene["name"]))[0] - + f".0.csv", + + ".0.csv", ) with open( diff --git a/ivas_processing_scripts/generation/process_ism2_items.py b/ivas_processing_scripts/generation/process_ism2_items.py index 704c2230..ebb2725f 100644 --- a/ivas_processing_scripts/generation/process_ism2_items.py +++ b/ivas_processing_scripts/generation/process_ism2_items.py @@ -29,10 +29,10 @@ # accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and # the United Nations Convention on Contracts on the International Sales of Goods. # + import csv import logging import os -import pdb from itertools import groupby, repeat from math import floor -- GitLab