diff --git a/.gitignore b/.gitignore index 7855f81eeadf748ff2b38de86de256724be89df0..77abd26af5ba04e7ab60551a09a8bb92b723e034 100644 --- a/.gitignore +++ b/.gitignore @@ -6,7 +6,6 @@ venv/ .vscode/ .idea/ .DS_Store -*.wav !tests/data/**/*.wav *.pcm *.bs diff --git a/item_gen_configs/ISM1_CONFIG.yml b/item_gen_configs/ISM1_CONFIG.yml new file mode 100644 index 0000000000000000000000000000000000000000..0f26866aa3c2d51210331e4626699d321d7e1ad7 --- /dev/null +++ b/item_gen_configs/ISM1_CONFIG.yml @@ -0,0 +1,303 @@ +--- +################################################ +# General configuration +################################################ + +### Output format +format: "ISM1" + +### Output sampling rate in Hz needed for headerless audio files; default = 48000 +fs: 48000 + +### Any relative paths will be interpreted relative to the working directory the script is called from! +### Usage of absolute paths is recommended. +### Do not use file names with dots "." in them! This is not supported, use "_" instead +### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions + +### Input path to mono files +input_path: "./items_mono" + +### Output path for generated test items and metadata files +output_path: "./items_ISM1" + +### Target loudness in LKFS; default = null (no loudness normalization applied) +loudness: -26 + +### Pre-amble and Post-amble length in seconds (default = 0.0) +preamble: 0.5 +postamble: 0.5 + +### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence) +add_low_level_random_noise: true + + +################################################ +### Scene description +################################################ + +### Each scene must start with the sceneN tag +### Specify the mono source filename (the program will search for it in the input_path folder) +### Specify azimuth and elevation for each input source +### Note 1: use [val1, val2, ...] for multiple sources in a scene +### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames + +### Note 3: we're using right-handed coordinate system with azi = 0 pointing from the nose to the screen +### azimuth: float, [-180,180]; positive indicates left +### elevation: float, [-90,90]; positive indicates up +### distance: float, tbd: default: 1 +### spread: float, [0,360]; spread in angles from 0 ... 360˚ +### gain: float, [0,1] + +scenes: + a1: + name: "G1S1.wav" + description: "Talker sitting at a table" + source: "test_single.wav" + azimuth: 0 + elevation: 0 + + a2: + name: "G6S2.wav" + description: "Talker sitting at a table" + source: "test_single.wav" + azimuth: 60 + elevation: 0 + + a3: + name: "G5S3.wav" + description: "Talker sitting at a table" + source: "test_single.wav" + azimuth: 120 + elevation: 0 + + a4: + name: "G4S4.wav" + description: "Talker sitting at a table" + source: "test_single.wav" + azimuth: 180 + elevation: 0 + + a5: + name: "G3S5.wav" + description: "Talker sitting at a table" + source: "test_single.wav" + azimuth: 240 + elevation: 0 + + a6: + name: "G2S6.wav" + description: "Talker sitting at a table" + source: "test_single.wav" + azimuth: 300 + elevation: 0 + + b1: + name: "G2S1.wav" + description: "standing talker." + source: "test_single.wav" + azimuth: 120 + elevation: 35 + + b2: + name: "G1S2.wav" + description: "standing talker." + source: "test_single.wav" + azimuth: 180 + elevation: 35 + + b3: + name: "G6S3.wav" + description: "standing talker." + source: "test_single.wav" + azimuth: 240 + elevation: 35 + + b4: + name: "G5S4.wav" + description: "standing talker." + source: "test_single.wav" + azimuth: 300 + elevation: 35 + + b5: + name: "G4S5.wav" + description: "standing talker." + source: "test_single.wav" + azimuth: 0 + elevation: 35 + + b6: + name: "G3S6.wav" + description: "standing talker." + source: "test_single.wav" + azimuth: 60 + elevation: 35 + + c1: + name: "G3S1.wav" + description: "Smaller talker (child) walking around a table." + source: "test_single.wav" + azimuth: "0:1:360" + elevation: 0 + + c2: + name: "G2S2.wav" + description: "Smaller talker (child) walking around a table." + source: "test_single.wav" + azimuth: "60:1:60+360" + elevation: 0 + + c3: + name: "G1S3.wav" + description: "Smaller talker (child) walking around a table." + source: "test_single.wav" + azimuth: "120:1:120+360" + elevation: 0 + + c4: + name: "G6S4.wav" + description: "Smaller talker (child) walking around a table." + source: "test_single.wav" + azimuth: "180:1:180+360" + elevation: 0 + + c5: + name: "G5S5.wav" + description: "Smaller talker (child) walking around a table." + source: "test_single.wav" + azimuth: "240:1:240+360" + elevation: 0 + + c6: + name: "G4S6.wav" + description: "Smaller talker (child) walking around a table." + source: "test_single.wav" + azimuth: "300:1:300+360" + elevation: 0 + + d1: + name: "G4S1.wav" + description: "Talker walking around the table." + source: "test_single.wav" + azimuth: "0:-1:-360" + elevation: 35 + + d2: + name: "G3S2.wav" + description: "Talker walking around the table." + source: "test_single.wav" + azimuth: "60:-1:60-360" + elevation: 35 + + d3: + name: "G2S3.wav" + description: "Talker walking around the table." + source: "test_single.wav" + azimuth: "120:-1:120-360" + elevation: 35 + + d4: + name: "G1S4.wav" + description: "Talker walking around the table." + source: "test_single.wav" + azimuth: "180:-1:180-360" + elevation: 35 + + d5: + name: "G6S5.wav" + description: "Talker walking around the table." + source: "test_single.wav" + azimuth: "240:-1:240-360" + elevation: 35 + + d6: + name: "G5S6.wav" + description: "Talker walking around the table." + source: "test_single.wav" + azimuth: "300:-1:300-360" + elevation: 35 + + e1: + name: "G5S1.wav" + description: "Elevation displacement." + source: "test_single.wav" + azimuth: 240 + elevation: "-90:0.5:90" + + e2: + name: "G4S2.wav" + description: "Elevation displacement." + source: "test_single.wav" + azimuth: 300 + elevation: 0 + + e3: + name: "G3S3.wav" + description: "Elevation displacement." + source: "test_single.wav" + azimuth: 0 + elevation: "-90:0.5:90" + + e4: + name: "G2S4.wav" + description: "Elevation displacement." + source: "test_single.wav" + azimuth: 60 + elevation: "-90:0.5:90" + + e5: + name: "G1S5.wav" + description: "Elevation displacement." + source: "test_single.wav" + azimuth: 120 + elevation: "-90:0.5:90" + + e6: + name: "G6S6.wav" + description: "Elevation displacement." + source: "test_single.wav" + azimuth: 180 + elevation: "-90:0.5:90" + + f1: + name: "G6S1.wav" + description: "Azimuth and elevation displacement." + source: "test_single.wav" + azimuth: "60:0.5:60+180" + elevation: "35:-0.2:-35" + + f2: + name: "G5S2.wav" + description: "Azimuth and elevation displacement." + source: "test_single.wav" + azimuth: "120:0.5:120+180" + elevation: "35:-0.2:-35" + + f3: + name: "G4S3.wav" + description: "Azimuth and elevation displacement." + source: "test_single.wav" + azimuth: "180:0.5:180+180" + elevation: "35:-0.2:-35" + + f4: + name: "G3S4.wav" + description: "Azimuth and elevation displacement." + source: "test_single.wav" + azimuth: "240:0.5:240+180" + elevation: "35:-0.2:-35" + + f5: + name: "G2S5.wav" + description: "Azimuth and elevation displacement." + source: "test_single.wav" + azimuth: "300:0.5:300+180" + elevation: "35:-0.2:-35" + + f6: + name: "G1S6.wav" + description: "Azimuth and elevation displacement." + source: "test_single.wav" + azimuth: "0:0.5:0+180" + elevation: "35:-0.2:-35" + \ No newline at end of file diff --git a/item_gen_configs/ISM2_CONFIG.yml b/item_gen_configs/ISM2_CONFIG.yml new file mode 100644 index 0000000000000000000000000000000000000000..c9b749a5063cf91de7dcf3cc248df6f38dd4d9a1 --- /dev/null +++ b/item_gen_configs/ISM2_CONFIG.yml @@ -0,0 +1,339 @@ +--- +################################################ +# General configuration +################################################ + +### Output format +format: "ISM2" + +### Output sampling rate in Hz needed for headerless audio files; default = 48000 +fs: 48000 + +### Any relative paths will be interpreted relative to the working directory the script is called from! +### Usage of absolute paths is recommended. +### Do not use file names with dots "." in them! This is not supported, use "_" instead +### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions + +### Input path to mono files +input_path: "./items_mono" + +### Output path for generated test items and metadata files +output_path: "./items_ISM2" + +### Target loudness in LKFS; default = null (no loudness normalization applied) +loudness: -26 + +### Pre-amble and Post-amble length in seconds (default = 0.0) +preamble: 1.0 +postamble: 1.0 + +### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence) +add_low_level_random_noise: true + +################################################ +### Scene description +################################################ + +### Each scene must start with the sceneN tag +### Specify the mono source filename (the program will search for it in the input_path folder) +### Specify azimuth and elevation for each input source +### Specify the overlap length in seconds for each input source (negative value creates a gap) +### Note 1: use [val1, val2, ...] for multiple sources in a scene +### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames + +### Note 3: we're using right-handed coordinate system with azi = 0 pointing from the nose to the screen +### azimuth: float, [-180,180]; positive indicates left +### elevation: float, [-90,90]; positive indicates up +### distance: float, tbd: default: 1 +### spread: float, [0,360]; spread in angles from 0 ... 360˚ +### gain: float, [0,1] + +scenes: + a1: + name: "G1S1.wav" + description: "two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: [0, 50] + elevation: [0, 0] + overlap: -1.0 + + a2: + name: "G6S2.wav" + description: "two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: [50, 350] + elevation: [0, 0] + overlap: -1.0 + + a3: + name: "G5S3.wav" + description: "two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: [40, 290] + elevation: [0, 0] + overlap: -1.0 + + a4: + name: "G4S4.wav" + description: "two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: [30, 230] + elevation: [15, 15] + overlap: -1.0 + + a5: + name: "G3S5.wav" + description: "two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: [20, 170] + elevation: [15, 15] + overlap: -1.0 + + a6: + name: "G2S6.wav" + description: "two talkers sitting at a table, at different azimuth angles with respect to the microphone, non-overlapping utterances." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: [10, 110] + elevation: [15, 15] + overlap: -1.0 + + b1: + name: "G2S1.wav" + description: "two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: [20, 170] + elevation: [30, 30] + overlap: 1.0 + + b2: + name: "G1S2.wav" + description: "two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: [10, 110] + elevation: [30, 30] + overlap: 1.0 + + b3: + name: "G6S3.wav" + description: "two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: [0, 50] + elevation: [30, 30] + overlap: 1.0 + + b4: + name: "G5S4.wav" + description: "two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: [50, 350] + elevation: [60, 60] + overlap: 1.0 + + b5: + name: "G4S5.wav" + description: "two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: [40, 290] + elevation: [60, 60] + overlap: 1.0 + + b6: + name: "G3S6.wav" + description: "two standing talkers, at different azimuth angles with respect to the microphone, ~30% overlapping utterances." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: [30, 230] + elevation: [60, 60] + overlap: 1.0 + + c1: + name: "G3S1.wav" + description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: [40, 290] + elevation: [0, 60] + overlap: -1.0 + + c2: + name: "G2S2.wav" + description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: [30, 230] + elevation: [0, 60] + overlap: -1.0 + + c3: + name: "G1S3.wav" + description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: [20, 170] + elevation: [0, 60] + overlap: -1.0 + + c4: + name: "G6S4.wav" + description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: [10, 110] + elevation: [0, 60] + overlap: -1.0 + + c5: + name: "G5S5.wav" + description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: [0, 50] + elevation: [0, 60] + overlap: -1.0 + + c6: + name: "G4S6.wav" + description: "one talker sitting at a table, second talker standing beside the table, non-overlapping utterances." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: [50, 350] + elevation: [0, 60] + overlap: -1.0 + + d1: + name: "G4S1.wav" + description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: [50, "180:1:120 + 360"] + elevation: [0, 60] + overlap: 1.0 + + d2: + name: "G3S2.wav" + description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: [300, "-70:-1:-10 - 360"] + elevation: [0, 60] + overlap: 1.0 + + d3: + name: "G2S3.wav" + description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: [250, "-20:-1:-320"] + elevation: [0, 60] + overlap: 1.0 + + d4: + name: "G1S4.wav" + description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: [200, "30:-1:-270"] + elevation: [0, 60] + overlap: 1.0 + + d5: + name: "G6S5.wav" + description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: [150, "80:1:20 + 360"] + elevation: [0, 60] + overlap: 1.0 + + d6: + name: "G5S6.wav" + description: "one talker sitting at a table, second talker walking around the table, ~30% overlapping utterances." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: [100, "130:1:70 + 360"] + elevation: [0, 60] + overlap: 1.0 + + e1: + name: "G5S1.wav" + description: "two talkers walking side-by-side around the table, ~30% overlapping utterances" + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: ["80:1:20 + 360", "80:1:20 + 360"] + elevation: [10, 60] + overlap: 1.0 + + e2: + name: "G4S2.wav" + description: "two talkers walking side-by-side around the table, ~30% overlapping utterances" + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: ["130:1:70 + 360", "130:1:70 + 360"] + elevation: [10, 60] + overlap: 1.0 + + e3: + name: "G3S3.wav" + description: "two talkers walking side-by-side around the table, ~30% overlapping utterances" + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: ["180:1:120 + 360", "180:1:120 + 360"] + elevation: [10, 60] + overlap: 1.0 + + e4: + name: "G2S4.wav" + description: "two talkers walking side-by-side around the table, ~30% overlapping utterances" + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: ["-70:-1:-10 - 360", "-70:-1:-10 - 360"] + elevation: [10, 60] + overlap: 1.0 + + e5: + name: "G1S5.wav" + description: "two talkers walking side-by-side around the table, ~30% overlapping utterances" + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: ["-20:-1:-320", "-20:-1:-320"] + elevation: [10, 60] + overlap: 1.0 + + e6: + name: "G6S6.wav" + description: "two talkers walking side-by-side around the table, ~30% overlapping utterances" + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: ["30:-1:-270", "30:-1:-270"] + elevation: [10, 60] + overlap: 1.0 + + f1: + name: "G6S1.wav" + description: "two talkers walking around the table in opposite directions, non-overlapping utterances." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: ["60:1:0 + 360", "60:-1:120 - 360"] + elevation: [20, 50] + overlap: -1.0 + + f2: + name: "G5S2.wav" + description: "two talkers walking around the table in opposite directions, non-overlapping utterances." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: ["0:1:300", "0:-1:60 - 360"] + elevation: [20, 50] + overlap: -1.0 + + f3: + name: "G4S3.wav" + description: "two talkers walking around the table in opposite directions, non-overlapping utterances." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: ["300:1:240 + 360", "300:-1:0"] + elevation: [20, 50] + overlap: -1.0 + + f4: + name: "G3S4.wav" + description: "two talkers walking around the table in opposite directions, non-overlapping utterances." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: ["240:1:180 + 360", "240:-1:-60"] + elevation: [20, 50] + overlap: -1.0 + + f5: + name: "G2S5.wav" + description: "two talkers walking around the table in opposite directions, non-overlapping utterances." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: ["180:1:120 + 360", "180:-1:-120"] + elevation: [20, 50] + overlap: -1.0 + + f6: + name: "G1S6.wav" + description: "two talkers walking around the table in opposite directions, non-overlapping utterances." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + azimuth: ["120:1:60 + 360", "120:-1:180 - 360"] + elevation: [20, 50] + overlap: -1.0 + \ No newline at end of file diff --git a/item_gen_configs/STEREO_CONFIG.yml b/item_gen_configs/STEREO_CONFIG.yml new file mode 100644 index 0000000000000000000000000000000000000000..7dd1a956e9f9e1d79f0e33cdb41cc6b862fa68a9 --- /dev/null +++ b/item_gen_configs/STEREO_CONFIG.yml @@ -0,0 +1,303 @@ +--- +################################################ +# General configuration +################################################ + +### Output format +format: "STEREO" + +### Output sampling rate in Hz needed for headerless audio files; default = 48000 +fs: 48000 + +### IR sampling rate in Hz needed for headerless audio files; default = 48000 +IR_fs: 32000 + +### Any relative paths will be interpreted relative to the working directory the script is called from! +### Usage of absolute paths is recommended. +### Do not use file names with dots "." in them! This is not supported, use "_" instead +### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions + +### Input path to mono files +input_path: "./items_mono" + +### Input path to stereo impulse response files, default = './ivas_processing_scripts/generation/IR' +# IR_path: "./IR" + +### Output path for generated test items and metadata files +output_path: "./items_STEREO" + +### Target loudness in LKFS; default = null (no loudness normalization applied) +loudness: -26 + +### Pre-amble and Post-amble length in seconds (default = 0.0) +preamble: 1.0 +postamble: 1.0 + +### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence) +add_low_level_random_noise: true + + +################################################ +### Scene description +################################################ + +### Each scene must start with the sceneN tag +### Specify the mono source filename (the program will search for it in the input_path folder) +### Specify the stereo IR source filename (the program will search for it in the input_path_IR folder) +### Specify the overlap length in seconds for each input source (negative value creates a gap) +### Note 1: use [val1, val2, ...] for multiple sources in a scene +### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames + +scenes: + a1: + name: "G1S1.wav" + description: "Two speakers sitting at oval table in opposite corners in a large echoic conference room." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + IR: ["LEABP04.wav", "LEABP11.wav"] + overlap: 1.0 + + a2: + name: "G6S2.wav" + description: "Two speakers sitting at oval table in opposite corners in a large echoic conference room." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + IR: ["LEABP05.wav", "LEABP11.wav"] + overlap: 1.0 + + a3: + name: "G5S3.wav" + description: "Two speakers sitting at oval table in opposite corners in a large echoic conference room." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + IR: ["LEABP06.wav", "LEABP11.wav"] + overlap: 1.0 + + a4: + name: "G4S4.wav" + description: "Two speakers sitting at oval table in opposite corners in a large echoic conference room." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + IR: ["LEABP05.wav", "LEABP10.wav"] + overlap: -1.0 + + a5: + name: "G3S5.wav" + description: "Two speakers sitting at oval table in opposite corners in a large echoic conference room." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + IR: ["LEABP05.wav", "LEABP11.wav"] + overlap: -1.0 + + a6: + name: "G2S6.wav" + description: "Two speakers sitting at oval table in opposite corners in a large echoic conference room." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + IR: ["LEABP05.wav", "LEABP12.wav"] + overlap: -1.0 + + b1: + name: "G2S1.wav" + description: "Two speakers sitting at oval table side by side in a large anechoic conference room." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + IR: ["LAABP05.wav", "LAABP06.wav"] + overlap: -1.0 + + b2: + name: "G1S2.wav" + description: "Two speakers sitting at oval table side by side in a large anechoic conference room." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + IR: ["LAABP07.wav", "LAABP08.wav"] + overlap: 1.0 + + b3: + name: "G6S3.wav" + description: "Two speakers sitting at oval table side by side in a large anechoic conference room." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + IR: ["LAABP09.wav", "LAABP10.wav"] + overlap: 1.0 + + b4: + name: "G5S4.wav" + description: "Two speakers sitting at oval table side by side in a large anechoic conference room." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + IR: ["LAABP11.wav", "LAABP12.wav"] + overlap: -1.0 + + b5: + name: "G4S5.wav" + description: "Two speakers sitting at oval table side by side in a large anechoic conference room." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + IR: ["LAABP01.wav", "LAABP02.wav"] + overlap: -1.0 + + b6: + name: "G3S6.wav" + description: "Two speakers sitting at oval table side by side in a large anechoic conference room." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + IR: ["LAABP03.wav", "LAABP04.wav"] + overlap: -1.0 + + c1: + name: "G3S1.wav" + description: "One talker sitting at table in a small anechoic conference room." + source: ["test_single.wav"] + IR: ["SAMSP01.wav"] + overlap: -1.0 + + c2: + name: "G2S2.wav" + description: "One talker sitting at table in a small anechoic conference room." + source: ["test_single.wav"] + IR: ["SAMSP04.wav"] + overlap: -1.0 + + c3: + name: "G1S3.wav" + description: "One talker sitting at table in a small anechoic conference room." + source: ["test_single.wav"] + IR: ["SAMSP07.wav"] + overlap: -1.0 + + c4: + name: "G6S4.wav" + description: "One talker sitting at table in a small echoic conference room." + source: ["test_single.wav"] + IR: ["SEABP01.wav"] + overlap: -1.0 + + c5: + name: "G5S5.wav" + description: "One talker sitting at table in a small echoic conference room." + source: ["test_single.wav"] + IR: ["SEABP03.wav"] + overlap: -1.0 + + c6: + name: "G4S6.wav" + description: "One talker sitting at table in a small echoic conference room." + source: ["test_single.wav"] + IR: ["SEABP06.wav"] + overlap: -1.0 + + d1: + name: "G4S1.wav" + description: "One talker sitting at table in a small anechoic conference room." + source: ["test_single.wav"] + IR: ["SEBIP01.wav"] + overlap: -1.0 + + d2: + name: "G3S2.wav" + description: "One talker sitting at table in a small anechoic conference room." + source: ["test_single.wav"] + IR: ["SEBIP04.wav"] + overlap: -1.0 + + d3: + name: "G3S2.wav" + description: "One talker sitting at table in a small anechoic conference room." + source: ["test_single.wav"] + IR: ["SEBIP07.wav"] + overlap: -1.0 + + d4: + name: "G1S4.wav" + description: "One talker sitting at table in a small echoic conference room." + source: ["test_single.wav"] + IR: ["SEBIP07.wav"] + overlap: -1.0 + + d5: + name: "G6S5.wav" + description: "One talker sitting at table in a small echoic conference room." + source: ["test_single.wav"] + IR: ["SEBIP07.wav"] + overlap: -1.0 + + d6: + name: "G5S6.wav" + description: "One talker sitting at table in a small echoic conference room." + source: ["test_single.wav"] + IR: ["SEBIP07.wav"] + overlap: -1.0 + + e1: + name: "G5S1.wav" + description: "Two talkers sitting in a room." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + IR: ["SEMSP01.wav", "SEMSP03.wav"] + overlap: 1.0 + + e2: + name: "G4S2.wav" + description: "Two talkers sitting in a room." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + IR: ["SEMSP01.wav", "SEMSP05.wav"] + overlap: 1.0 + + e3: + name: "G3S3.wav" + description: "Two talkers sitting in a room." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + IR: ["SEMSP01.wav", "SEMSP07.wav"] + overlap: 1.0 + + e4: + name: "G2S4.wav" + description: "Two talkers sitting in a room." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + IR: ["SEMSP03.wav", "SEMSP04.wav"] + overlap: -1.0 + + e5: + name: "G1S5.wav" + description: "Two talkers sitting in a room." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + IR: ["SEMSP05.wav", "SEMSP07.wav"] + overlap: -1.0 + + e6: + name: "G6S6.wav" + description: "Two talkers sitting in a room." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + IR: ["SEMSP06.wav", "SEMSP02.wav"] + overlap: -1.0 + + f1: + name: "G6S1.wav" + description: "Two talkers sitting in a room." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + IR: ["SEBIP05.wav", "SEBIP01.wav"] + overlap: 1.0 + + f2: + name: "G5S2.wav" + description: "Two talkers sitting in a room." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + IR: ["SEBIP07.wav", "SEBIP01.wav"] + overlap: 1.0 + + f3: + name: "G4S3.wav" + description: "Two talkers sitting in a room." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + IR: ["SEBIP04.wav", "SEBIP01.wav"] + overlap: 1.0 + + f4: + name: "G3S4.wav" + description: "Two talkers sitting in a room." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + IR: ["SEBIP02.wav", "SEBIP06.wav"] + overlap: -1.0 + + f5: + name: "G2S5.wav" + description: "Two talkers sitting in a room." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + IR: ["SEBIP02.wav", "SEBIP06.wav"] + overlap: -1.0 + + f6: + name: "G1S6.wav" + description: "Two talkers sitting in a room." + source: ["test_talker1_trimmed.wav", "test_talker2_trimmed.wav"] + IR: ["SEBIP03.wav", "SEBIP04.wav"] + overlap: -1.0 + \ No newline at end of file diff --git a/ivas_processing_scripts/audiotools/audiofile.py b/ivas_processing_scripts/audiotools/audiofile.py index 2d6ee25a63b8b933d14f3bedf9a542eee7c59dca..7eeb7aa01c75cc36a0dfdd675dc631ccb6d6145d 100755 --- a/ivas_processing_scripts/audiotools/audiofile.py +++ b/ivas_processing_scripts/audiotools/audiofile.py @@ -112,6 +112,7 @@ def write( filename: Union[str, Path], x: np.ndarray, fs: Optional[int] = 48000, + dtype: Optional[str] = "int16", ) -> None: """ Write audio file (.pcm, .wav or .raw) @@ -124,6 +125,8 @@ def write( Numpy 2D array of dimension: number of channels x number of samples fs: Optional[int] Sampling rate, required for .pcm or .raw input file, default = 48000 (Hz) + dtype: Optional[str] + Data type format required for .pcm or .raw input file, default = 'int16' Returns ------- @@ -140,10 +143,10 @@ def write( x = np.clip(x, np.iinfo(np.int16).min, np.iinfo(np.int16).max) if file_extension == ".wav": - x = x.astype(np.int16) + x = x.astype(dtype) wav.write(filename, fs, x) elif file_extension == ".pcm" or file_extension == ".raw": - x = x.astype("int16").reshape(-1, 1) + x = x.astype(dtype).reshape(-1, 1) x.tofile(filename) else: raise ValueError("Wrong input format. Use wav, pcm or raw") diff --git a/ivas_processing_scripts/audiotools/wrappers/reverb.py b/ivas_processing_scripts/audiotools/wrappers/reverb.py new file mode 100644 index 0000000000000000000000000000000000000000..d0f04677f0fd01b5de5500b5461037acb2b03b80 --- /dev/null +++ b/ivas_processing_scripts/audiotools/wrappers/reverb.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python3 + +# +# (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository. All Rights Reserved. +# +# This software is protected by copyright law and by international treaties. +# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository retain full ownership rights in their respective contributions in +# the software. This notice grants no license of any kind, including but not limited to patent +# license, nor is any license granted by implication, estoppel or otherwise. +# +# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making +# contributions. +# +# This software is provided "AS IS", without any express or implied warranties. The software is in the +# development stage. It is intended exclusively for experts who have experience with such software and +# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability +# and fitness for a particular purpose are hereby disclaimed and excluded. +# +# Any dispute, controversy or claim arising under or in relation to providing this software shall be +# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in +# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and +# the United Nations Convention on Contracts on the International Sales of Goods. +# + +from copy import copy +from pathlib import Path +from tempfile import TemporaryDirectory +from typing import Optional + +import numpy as np +from scipy.fft import fft + +from ivas_processing_scripts.audiotools.audio import Audio +from ivas_processing_scripts.audiotools.audiofile import read, write +from ivas_processing_scripts.audiotools.wrappers.filter import resample_itu +from ivas_processing_scripts.constants import DEFAULT_CONFIG_BINARIES +from ivas_processing_scripts.utils import find_binary, run + + +def reverb( + input: Audio, + IR: Audio, + align: Optional[float] = None, +) -> Audio: + """ + Wrapper for the ITU-T reverb binary to convolve mono audio signal with an impulse response + Note: The 'reverb' binary tool expects that the IR file is written in the 32b IEEE Standard 754 floating-point representation. + + Parameters + ---------- + input: Audio + Input audio signal + IR: Audio + Impulse response + align: float + multiplicative factor to apply to the reverberated sound in order to align its energy level with a second filePath to the output file + + Returns + ------- + output: Audio + Convolved audio signal with IR + """ + + # find binary + if "reverb" in DEFAULT_CONFIG_BINARIES["binary_paths"]: + binary = find_binary( + DEFAULT_CONFIG_BINARIES["binary_paths"]["reverb"].name, + binary_path=DEFAULT_CONFIG_BINARIES["binary_paths"]["reverb"].parent, + ) + else: + binary = find_binary("reverb") + + with TemporaryDirectory() as tmp_dir: + tmp_dir = Path(tmp_dir) + + # resample input audio signal to that of the IR + old_fs = None + tmp_input = copy(input) + if input.fs != IR.fs: + old_fs = input.fs + tmp_input.audio = resample_itu(tmp_input, IR.fs) + tmp_input.fs = IR.fs + + # write input audio signal to temporary file in .pcm format + tmp_input_file = tmp_dir.joinpath("tmp_reverbIn.pcm") + write(tmp_input_file, tmp_input.audio, tmp_input.fs) + + # down-scale IR to prevent saturation + # max_value = np.max(np.abs(IR.audio)) + # if max_value > 1.0: + # IR.audio = IR.audio / max_value + + # write IR to temporary file in .pcm format + # note: the reverb tool expects 32b float format + tmp_IR_file = tmp_dir.joinpath("tmp_IR.pcm") + write(tmp_IR_file, IR.audio.astype(np.float32), IR.fs, dtype=np.float32) + + # set up the 'reverb' command line + cmd = [ + str(binary), + ] + + # append multiplicative factor, if provided + if align: + cmd.extend(["-align", str(align)]) + + # append temporary filenames + tmp_output_file = tmp_dir.joinpath("tmp_reverbOut.pcm") + cmd.extend([tmp_input_file, tmp_IR_file, tmp_output_file]) + + # run the 'reverb' command + run(cmd) + + # read the reverberated output file + output = copy(tmp_input) + output.audio, _ = read(tmp_output_file, nchannels=1, fs=tmp_input.fs) + + # reverse the resampling + if old_fs: + output.audio = resample_itu(output, old_fs) + output.fs = old_fs + + return output + + +def reverb_stereo( + input: Audio, + stereo_IR: Audio, + align: Optional[float] = None, +) -> Audio: + """ + Wrapper for the ITU-T reverb binary to convolve mono audio signal with a stereo impulse response + + Parameters + ---------- + input: Audio + Input audio signal + IR: Audio + Impulse response + align: float + multiplicative factor to apply to the reverberated sound in order to align its energy level with the second file + + Returns + ------- + output: Audio + Convolved audio signal with stereo IR + """ + + # convert to float32 + stereo_IR.audio = np.float32(stereo_IR.audio) + + # separate into left and right IR + IR_left = copy(stereo_IR) + IR_left.name = "MONO" + IR_left.num_channels = 1 + IR_left.audio = np.reshape(stereo_IR.audio[:, 0], (-1, 1)) + + IR_right = copy(stereo_IR) + IR_right.name = "MONO" + IR_right.num_channels = 1 + IR_right.audio = np.reshape(stereo_IR.audio[:, 1], (-1, 1)) + + # calculate the scaling (multiplicative) factor such that the maximum gain of the IR filter across all frequencies is 0dB + if align is None: + H = fft(stereo_IR.audio, axis=0) + align = 1.0 / np.max(np.abs(H)) + + # convolve mono input with left and right IR + y_left = reverb(input, IR_left, align=align) + y_right = reverb(input, IR_right, align=align) + + # combine into stereo output + y = copy(input) + y.name = "STEREO" + y.num_channels = 2 + y.audio = np.column_stack([y_left.audio, y_right.audio]) + + return y diff --git a/ivas_processing_scripts/generation/IR/LAABP01.wav b/ivas_processing_scripts/generation/IR/LAABP01.wav new file mode 100644 index 0000000000000000000000000000000000000000..aeaa9eeb6f93b4b11140d18f481667be0aef7883 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/LAABP01.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a4e959d347d3f99468dbe75bce9853eb9d66af6cb22cf3ea9ad2dc4c9e84a2a +size 36804 diff --git a/ivas_processing_scripts/generation/IR/LAABP02.wav b/ivas_processing_scripts/generation/IR/LAABP02.wav new file mode 100644 index 0000000000000000000000000000000000000000..41586c2f62f1c11b49fdbeec5f7e98e416246cbb --- /dev/null +++ b/ivas_processing_scripts/generation/IR/LAABP02.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2658ddec94aa86e2fa0ed365686daded586a6a46436dff1c6d8dba6d17d0182c +size 36804 diff --git a/ivas_processing_scripts/generation/IR/LAABP03.wav b/ivas_processing_scripts/generation/IR/LAABP03.wav new file mode 100644 index 0000000000000000000000000000000000000000..c4ec38f98576cf5e178e4ad2a84d866643f7dd4e --- /dev/null +++ b/ivas_processing_scripts/generation/IR/LAABP03.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5616c8bcf3959aeee246a96a9f2ce6793d4087bfce3dfd1d97e313e3717b5bd6 +size 36804 diff --git a/ivas_processing_scripts/generation/IR/LAABP04.wav b/ivas_processing_scripts/generation/IR/LAABP04.wav new file mode 100644 index 0000000000000000000000000000000000000000..1c50022f1a63a68bb02b7bec9cb7cddbec91da62 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/LAABP04.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f433047f7fdba568183873d11c7f4423550a675b3e0677b6d846137227862bac +size 36804 diff --git a/ivas_processing_scripts/generation/IR/LAABP05.wav b/ivas_processing_scripts/generation/IR/LAABP05.wav new file mode 100644 index 0000000000000000000000000000000000000000..e3bd19165b290a1da2f1412c61936184a8cde394 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/LAABP05.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:791b69ca22d15226e5e2f6c5a39d3d40af04264523f3373d842a070ea4d40862 +size 36804 diff --git a/ivas_processing_scripts/generation/IR/LAABP06.wav b/ivas_processing_scripts/generation/IR/LAABP06.wav new file mode 100644 index 0000000000000000000000000000000000000000..1c50022f1a63a68bb02b7bec9cb7cddbec91da62 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/LAABP06.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f433047f7fdba568183873d11c7f4423550a675b3e0677b6d846137227862bac +size 36804 diff --git a/ivas_processing_scripts/generation/IR/LAABP07.wav b/ivas_processing_scripts/generation/IR/LAABP07.wav new file mode 100644 index 0000000000000000000000000000000000000000..c4ec38f98576cf5e178e4ad2a84d866643f7dd4e --- /dev/null +++ b/ivas_processing_scripts/generation/IR/LAABP07.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5616c8bcf3959aeee246a96a9f2ce6793d4087bfce3dfd1d97e313e3717b5bd6 +size 36804 diff --git a/ivas_processing_scripts/generation/IR/LAABP08.wav b/ivas_processing_scripts/generation/IR/LAABP08.wav new file mode 100644 index 0000000000000000000000000000000000000000..41586c2f62f1c11b49fdbeec5f7e98e416246cbb --- /dev/null +++ b/ivas_processing_scripts/generation/IR/LAABP08.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2658ddec94aa86e2fa0ed365686daded586a6a46436dff1c6d8dba6d17d0182c +size 36804 diff --git a/ivas_processing_scripts/generation/IR/LAABP09.wav b/ivas_processing_scripts/generation/IR/LAABP09.wav new file mode 100644 index 0000000000000000000000000000000000000000..aeaa9eeb6f93b4b11140d18f481667be0aef7883 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/LAABP09.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a4e959d347d3f99468dbe75bce9853eb9d66af6cb22cf3ea9ad2dc4c9e84a2a +size 36804 diff --git a/ivas_processing_scripts/generation/IR/LAABP10.wav b/ivas_processing_scripts/generation/IR/LAABP10.wav new file mode 100644 index 0000000000000000000000000000000000000000..37693eb52683675a720dd03387e90122122b5334 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/LAABP10.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9271410ecad011fbcf22fb8f7af5b0f19f02510ef0f198ef6c6d9e33e64d38da +size 36804 diff --git a/ivas_processing_scripts/generation/IR/LAABP11.wav b/ivas_processing_scripts/generation/IR/LAABP11.wav new file mode 100644 index 0000000000000000000000000000000000000000..482a0e76c8c27f577fa879a73adff1f22d65e3d7 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/LAABP11.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cda11409aae6b99f6ccb4d20db24b065b7b2bda004dddd7659607215568d90b6 +size 36804 diff --git a/ivas_processing_scripts/generation/IR/LAABP12.wav b/ivas_processing_scripts/generation/IR/LAABP12.wav new file mode 100644 index 0000000000000000000000000000000000000000..37693eb52683675a720dd03387e90122122b5334 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/LAABP12.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9271410ecad011fbcf22fb8f7af5b0f19f02510ef0f198ef6c6d9e33e64d38da +size 36804 diff --git a/ivas_processing_scripts/generation/IR/LEABP01.wav b/ivas_processing_scripts/generation/IR/LEABP01.wav new file mode 100644 index 0000000000000000000000000000000000000000..424ddfb5c23bf2ad33ef946fe9c297672a2b35aa --- /dev/null +++ b/ivas_processing_scripts/generation/IR/LEABP01.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d810da26d72e818444c6ee16a3a59a77eabf74df3aaebd2b021696fa7fdd610f +size 82068 diff --git a/ivas_processing_scripts/generation/IR/LEABP02.wav b/ivas_processing_scripts/generation/IR/LEABP02.wav new file mode 100644 index 0000000000000000000000000000000000000000..784caa2db39b37058eaa019a4189df59bd0ded99 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/LEABP02.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c21239ff8bbf0e465a175f7ea5125c03f02568a8dbc9b4b63e064955529c489 +size 82068 diff --git a/ivas_processing_scripts/generation/IR/LEABP03.wav b/ivas_processing_scripts/generation/IR/LEABP03.wav new file mode 100644 index 0000000000000000000000000000000000000000..c81bce1fceb32ff72d6f661811fdbe9e5c9b7c27 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/LEABP03.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96e5b25de682dc8e0c1f036bbb0c193cfef574a48621069584d48cdd40f520ed +size 82068 diff --git a/ivas_processing_scripts/generation/IR/LEABP04.wav b/ivas_processing_scripts/generation/IR/LEABP04.wav new file mode 100644 index 0000000000000000000000000000000000000000..87d97879424523f8d73d6af6724b5604454d77cb --- /dev/null +++ b/ivas_processing_scripts/generation/IR/LEABP04.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfd86b594612a319e30676e4e3c0d177f01ee5626379864610df9796532e7024 +size 82068 diff --git a/ivas_processing_scripts/generation/IR/LEABP05.wav b/ivas_processing_scripts/generation/IR/LEABP05.wav new file mode 100644 index 0000000000000000000000000000000000000000..5e01d3be95c7466221f7eac2d74fed0559077802 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/LEABP05.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e31f9bf16791af9b3e01e75316d5bfe32115a5dec8a4b820d253e78e0b84edb +size 82068 diff --git a/ivas_processing_scripts/generation/IR/LEABP06.wav b/ivas_processing_scripts/generation/IR/LEABP06.wav new file mode 100644 index 0000000000000000000000000000000000000000..a102706670abd0ac576c099c596571a79984c5a4 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/LEABP06.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65be054317c4dfd5cb0f9bef1d9fc90f35df6ae841e223280946e435c7b6b0c7 +size 82068 diff --git a/ivas_processing_scripts/generation/IR/LEABP07.wav b/ivas_processing_scripts/generation/IR/LEABP07.wav new file mode 100644 index 0000000000000000000000000000000000000000..3bfe1b979f45dd8be581b338d5ed0c8f0972122f --- /dev/null +++ b/ivas_processing_scripts/generation/IR/LEABP07.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78da36e2a0652cc9c7f77279ba1342d0f58b4a879ef4e3038da38580c9bfd07d +size 82068 diff --git a/ivas_processing_scripts/generation/IR/LEABP08.wav b/ivas_processing_scripts/generation/IR/LEABP08.wav new file mode 100644 index 0000000000000000000000000000000000000000..7ac86eb1145d0fbe117bbea51ad30eacb2b62f43 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/LEABP08.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa78fae31221631fd31d251ea6ad5f7369bbcc054c84e8b82dca7c8613f3867a +size 82068 diff --git a/ivas_processing_scripts/generation/IR/LEABP09.wav b/ivas_processing_scripts/generation/IR/LEABP09.wav new file mode 100644 index 0000000000000000000000000000000000000000..010be6fbc060e41105cbcaecc44f123ed12c1556 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/LEABP09.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd7a9ca0ff37a58455414d8e66efb9aa6d8f686af7459751e24f40eb3c2d6415 +size 82068 diff --git a/ivas_processing_scripts/generation/IR/LEABP10.wav b/ivas_processing_scripts/generation/IR/LEABP10.wav new file mode 100644 index 0000000000000000000000000000000000000000..4fbadb40d5590b493062892df70732f34c82cb4c --- /dev/null +++ b/ivas_processing_scripts/generation/IR/LEABP10.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7149eb3558db62f34e4f476c85a57733e0ca153a297aa183ebeb550878a5ab40 +size 82068 diff --git a/ivas_processing_scripts/generation/IR/LEABP11.wav b/ivas_processing_scripts/generation/IR/LEABP11.wav new file mode 100644 index 0000000000000000000000000000000000000000..156d4156cfd36171b78ec5f1cc966e18db454a58 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/LEABP11.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2665ed857b1e3f095581c591e400e9ef532ff9e130a414bc2cc939c37b829c8a +size 82068 diff --git a/ivas_processing_scripts/generation/IR/LEABP12.wav b/ivas_processing_scripts/generation/IR/LEABP12.wav new file mode 100644 index 0000000000000000000000000000000000000000..e84b30b81a31e8d295d60dd5b194a1786cd2e527 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/LEABP12.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad942c2d19303a80ccadab2289172c514c83397096e8317476d6e8dd6463f0f4 +size 82068 diff --git a/ivas_processing_scripts/generation/IR/SAABP01.wav b/ivas_processing_scripts/generation/IR/SAABP01.wav new file mode 100644 index 0000000000000000000000000000000000000000..180b682aa0ec9cb6e3f334fc341e89397af0d2c8 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SAABP01.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd68dd01200bbfd25bebec4dfc63b8f528a03c88d1307e75d7a6c91eeec8be6e +size 36764 diff --git a/ivas_processing_scripts/generation/IR/SAABP02.wav b/ivas_processing_scripts/generation/IR/SAABP02.wav new file mode 100644 index 0000000000000000000000000000000000000000..f0acab781d692bab0f3b3f2a5689f512e7b1d23b --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SAABP02.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f145f6f8eb8324c7f3e18c5af5047641e82952603a787e0b7e069d26d5c4ca6 +size 36764 diff --git a/ivas_processing_scripts/generation/IR/SAABP03.wav b/ivas_processing_scripts/generation/IR/SAABP03.wav new file mode 100644 index 0000000000000000000000000000000000000000..1efea8d6133c71b844d55a10aa8fa92b40223509 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SAABP03.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8493653f497915b35377984c6d79e04aa344ccf44e0d5b8e286fbec492c9c31 +size 36764 diff --git a/ivas_processing_scripts/generation/IR/SAABP04.wav b/ivas_processing_scripts/generation/IR/SAABP04.wav new file mode 100644 index 0000000000000000000000000000000000000000..ec788896fe81e4de43e156f1af172caea3238869 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SAABP04.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36c04e66154b91979160d18faaf02dc226f6d2ed61f63d19227d777bb3459987 +size 36764 diff --git a/ivas_processing_scripts/generation/IR/SAABP05.wav b/ivas_processing_scripts/generation/IR/SAABP05.wav new file mode 100644 index 0000000000000000000000000000000000000000..3098f0b4af2216c70f13b9e1f0c95499fa57b9e6 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SAABP05.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dae758e6b7b3fd8ef3a8d76fa3210f5f412f3286056085e68a1f9ca7a13e9bab +size 36764 diff --git a/ivas_processing_scripts/generation/IR/SAABP06.wav b/ivas_processing_scripts/generation/IR/SAABP06.wav new file mode 100644 index 0000000000000000000000000000000000000000..a45533815990edc7ddaf83a8382f5ecd6656192d --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SAABP06.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5d52622ceb146c340c8a52689468c355c09bd3f71ef1f2f5dae9fb5d217b27e +size 36764 diff --git a/ivas_processing_scripts/generation/IR/SAABP07.wav b/ivas_processing_scripts/generation/IR/SAABP07.wav new file mode 100644 index 0000000000000000000000000000000000000000..8e641a981edac69a5284ac6371d9c255478d8451 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SAABP07.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf6c77ccfa239f5a0cb44a071dcb0d0ca92da0bbc858e4cc060af814ab3ffe3e +size 36764 diff --git a/ivas_processing_scripts/generation/IR/SAMSP01.wav b/ivas_processing_scripts/generation/IR/SAMSP01.wav new file mode 100644 index 0000000000000000000000000000000000000000..7d59592a65745f53103e82dcecbd22db6bedd959 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SAMSP01.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba6e8d380d91e5492338ac98df45e444532b92ff84a71f569673610e59cde136 +size 36764 diff --git a/ivas_processing_scripts/generation/IR/SAMSP02.wav b/ivas_processing_scripts/generation/IR/SAMSP02.wav new file mode 100644 index 0000000000000000000000000000000000000000..b8b62cef531442500cad6f50d5da52baac026fea --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SAMSP02.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd991ef690a9c86fa00064c56ad3df3ef726d9b6232efaf256b33cbc1ad3ac32 +size 36764 diff --git a/ivas_processing_scripts/generation/IR/SAMSP03.wav b/ivas_processing_scripts/generation/IR/SAMSP03.wav new file mode 100644 index 0000000000000000000000000000000000000000..feab358d8974c4af6554f831ab206642cd747cf6 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SAMSP03.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a996729d0c2573d4f219d72db60273b986280fac7ae0f5fe0a35524b83a0d95 +size 36764 diff --git a/ivas_processing_scripts/generation/IR/SAMSP04.wav b/ivas_processing_scripts/generation/IR/SAMSP04.wav new file mode 100644 index 0000000000000000000000000000000000000000..0f29ec53654b80d37e9c70b1943e69f89ce048e4 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SAMSP04.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dad01524476b6f8a5fc2d4d31f8c1b7589a836d9b98cc4d27201e42481931962 +size 36764 diff --git a/ivas_processing_scripts/generation/IR/SAMSP05.wav b/ivas_processing_scripts/generation/IR/SAMSP05.wav new file mode 100644 index 0000000000000000000000000000000000000000..71293903c975e1574223448f3f983a31e449cb44 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SAMSP05.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f0b5f91b292924c4e1eb1e2d884059720ab5c3eaae05d22230d786f19de7879 +size 36764 diff --git a/ivas_processing_scripts/generation/IR/SAMSP06.wav b/ivas_processing_scripts/generation/IR/SAMSP06.wav new file mode 100644 index 0000000000000000000000000000000000000000..0d51fc62a314c745a81dded83f1cee42f1821c3f --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SAMSP06.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06e0d7f97b4ce56065d143d19a45ad8c757ed21cf0fe3f8ed05cbedbd966084e +size 36764 diff --git a/ivas_processing_scripts/generation/IR/SAMSP07.wav b/ivas_processing_scripts/generation/IR/SAMSP07.wav new file mode 100644 index 0000000000000000000000000000000000000000..a20ac5f94753a9028cf6942ac9f95d84df8f4fd5 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SAMSP07.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb825349bec07813ea7ccb936948783aed31683805a3daae867568445820f8ea +size 36764 diff --git a/ivas_processing_scripts/generation/IR/SEABP01.wav b/ivas_processing_scripts/generation/IR/SEABP01.wav new file mode 100644 index 0000000000000000000000000000000000000000..6120c6a09d3fea533e3ca2335c0e02080f7ead4c --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SEABP01.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a934da1fee82c8131c427680304c9102a3289179697318735b87536d2db6261e +size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEABP02.wav b/ivas_processing_scripts/generation/IR/SEABP02.wav new file mode 100644 index 0000000000000000000000000000000000000000..3dc413d83782f51a5e709adf3f403356168fcee9 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SEABP02.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21bd1f242bf459bda18ea9e444eedbdf97db20e0956e3600c4e3c03870f1a877 +size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEABP03.wav b/ivas_processing_scripts/generation/IR/SEABP03.wav new file mode 100644 index 0000000000000000000000000000000000000000..27d2af1c86849c1fdd8f4ae19efbe48e8b391a8b --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SEABP03.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bd27e370e9fff391ef37a9e45e3f1583cdcef6ce23cef6135368fb6964674f2 +size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEABP04.wav b/ivas_processing_scripts/generation/IR/SEABP04.wav new file mode 100644 index 0000000000000000000000000000000000000000..ed3c991881757175d48ba0996ca7060336339662 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SEABP04.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4399629b729b0ceb8b30f3c994b736557bd8b35a968cb80cba486833b7c54d1 +size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEABP05.wav b/ivas_processing_scripts/generation/IR/SEABP05.wav new file mode 100644 index 0000000000000000000000000000000000000000..2e990d65b5b6f5e32dbf529d8d6912408cb86fcb --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SEABP05.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5c7af3d46eea2d738cb1c6e25a351489f9daff2976c365251595cec719b7ebe +size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEABP06.wav b/ivas_processing_scripts/generation/IR/SEABP06.wav new file mode 100644 index 0000000000000000000000000000000000000000..3d1397a0df7a33cfe39dec042c99465e61a96a10 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SEABP06.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb284bd97e306b890b9ccdd2e7649c602f6fd78774c1b2140b29051126a1fece +size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEABP07.wav b/ivas_processing_scripts/generation/IR/SEABP07.wav new file mode 100644 index 0000000000000000000000000000000000000000..075da1a13ba604475ffe36d0694fc0f0c508c984 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SEABP07.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a4fce653f7d80f389f3114a1e07688c5ad292e1419a59c6c4630a3bb8f2bf74 +size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEBIP01.wav b/ivas_processing_scripts/generation/IR/SEBIP01.wav new file mode 100644 index 0000000000000000000000000000000000000000..a60682367f820b7e348125dae9b02838a8368983 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SEBIP01.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55a349cb20898415609ea49187f871ba2dc980d07a1fa36fb655efde96208b4c +size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEBIP02.wav b/ivas_processing_scripts/generation/IR/SEBIP02.wav new file mode 100644 index 0000000000000000000000000000000000000000..10f8a62caa4d4a003b10b3fe6e8684d21e379cd0 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SEBIP02.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b35e9171ceaeb3e4e00f1e73b337c39c6c933620c39394e3a5ff095535db657a +size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEBIP03.wav b/ivas_processing_scripts/generation/IR/SEBIP03.wav new file mode 100644 index 0000000000000000000000000000000000000000..fd0ec69f5bb24a0b9593b3b37edf038942ee6019 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SEBIP03.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:524e0505f83bc579774e5e36f730b40fcb62b9b10f3a7767cec4389f4689d87b +size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEBIP04.wav b/ivas_processing_scripts/generation/IR/SEBIP04.wav new file mode 100644 index 0000000000000000000000000000000000000000..30be43262c297a8ebbd218716c7ed3950b36bd1d --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SEBIP04.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa2e8e18ef82a299d142fcbfc462b2370472fa202cbe361e1d661c20e21cd4c8 +size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEBIP05.wav b/ivas_processing_scripts/generation/IR/SEBIP05.wav new file mode 100644 index 0000000000000000000000000000000000000000..91e5793765fbb9b2868f3b156b2e581c65f3e1b4 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SEBIP05.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88d9cb43b175c2cf94eb861780e48fdb56da0bc4a2dd4f6034b179fa17dd09ab +size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEBIP06.wav b/ivas_processing_scripts/generation/IR/SEBIP06.wav new file mode 100644 index 0000000000000000000000000000000000000000..eb589f4922ef64265165280ba856b2d0faa632a1 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SEBIP06.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca350c682655d3ba8b075e3744adde034783dc87036b8fa9aaf9ccb3500f9286 +size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEBIP07.wav b/ivas_processing_scripts/generation/IR/SEBIP07.wav new file mode 100644 index 0000000000000000000000000000000000000000..d8a20381c7426ad9dac5dab8a47dad9ec5124f99 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SEBIP07.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79e4520ada475e1c37b8707da8062bbbfb26e617261e0130b7344b2bc1a937c5 +size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEMSP01.wav b/ivas_processing_scripts/generation/IR/SEMSP01.wav new file mode 100644 index 0000000000000000000000000000000000000000..4dab142a325bd393a4f1ff3ecc5ecb86985ac7f5 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SEMSP01.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66a37fc3855a0929cf4a4702301bf231fe346f1964b845b9cf464a5bfd3e29ad +size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEMSP02.wav b/ivas_processing_scripts/generation/IR/SEMSP02.wav new file mode 100644 index 0000000000000000000000000000000000000000..d59419c5bc4f5d1e9978508a75b777cd3f059b58 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SEMSP02.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cf267a42add5770e08b756d5577e95459b3efc5e49076ac910bb00aabe879b1 +size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEMSP03.wav b/ivas_processing_scripts/generation/IR/SEMSP03.wav new file mode 100644 index 0000000000000000000000000000000000000000..0e2e8205f1b4a89c76b5cf1fb6df2fdd3c609925 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SEMSP03.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4947d0762a6d653690d164c1a0dc09acc9c2bf38e8c28f33b9661d899094cd7 +size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEMSP04.wav b/ivas_processing_scripts/generation/IR/SEMSP04.wav new file mode 100644 index 0000000000000000000000000000000000000000..dc665c65740d967fc3dbcca151a4a2dcfcb22ad3 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SEMSP04.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f8a703057836541f8ca3e1e788d95302adcf983e12e1f6481e0743548559eeb +size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEMSP05.wav b/ivas_processing_scripts/generation/IR/SEMSP05.wav new file mode 100644 index 0000000000000000000000000000000000000000..aec9c66f098289fc869da94dfef49f8cab687b3d --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SEMSP05.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12613e8b6f43d6a8df2a4b78961fcacc2956d3b0bd8e3321fdea487ab00679ab +size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEMSP06.wav b/ivas_processing_scripts/generation/IR/SEMSP06.wav new file mode 100644 index 0000000000000000000000000000000000000000..84f990edd27537451066bfadeab5cb0e672507bb --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SEMSP06.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b107956649319df472cfe311e278f73735708957b44f7af6a6e444a33b7cb9d0 +size 42112 diff --git a/ivas_processing_scripts/generation/IR/SEMSP07.wav b/ivas_processing_scripts/generation/IR/SEMSP07.wav new file mode 100644 index 0000000000000000000000000000000000000000..bf89445a717c41c5ffb42bf47503157e5dd71e66 --- /dev/null +++ b/ivas_processing_scripts/generation/IR/SEMSP07.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef5a76c1026510861b8cac697415e6e08810857252b9ce52e0157c6024400bf2 +size 42112 diff --git a/ivas_processing_scripts/generation/__init__.py b/ivas_processing_scripts/generation/__init__.py new file mode 100755 index 0000000000000000000000000000000000000000..27ff902175e423df13231376079afc718aaf6a12 --- /dev/null +++ b/ivas_processing_scripts/generation/__init__.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 + +# +# (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository. All Rights Reserved. +# +# This software is protected by copyright law and by international treaties. +# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository retain full ownership rights in their respective contributions in +# the software. This notice grants no license of any kind, including but not limited to patent +# license, nor is any license granted by implication, estoppel or otherwise. +# +# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making +# contributions. +# +# This software is provided "AS IS", without any express or implied warranties. The software is in the +# development stage. It is intended exclusively for experts who have experience with such software and +# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability +# and fitness for a particular purpose are hereby disclaimed and excluded. +# +# Any dispute, controversy or claim arising under or in relation to providing this software shall be +# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in +# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and +# the United Nations Convention on Contracts on the International Sales of Goods. +# + +import logging +import os + +import yaml + +from ivas_processing_scripts.constants import ( + LOGGER_DATEFMT, + LOGGER_FORMAT, + LOGGER_SUFFIX, +) +from ivas_processing_scripts.generation import ( + config, + process_ism_items, + process_stereo_items, +) +from ivas_processing_scripts.utils import create_dir + + +def logging_init(args, cfg): + """set up logging for a test file""" + logger = logging.getLogger("__main__") + logger.setLevel(logging.DEBUG) + + # console handler + console_handler = logging.StreamHandler() + console_handler.setFormatter(logging.Formatter("%(message)s")) + console_handler.setLevel(logging.DEBUG if args.debug else logging.INFO) + logger.addHandler(console_handler) + + # main log file + file_handler = logging.FileHandler( + cfg.output_path.joinpath(f"{cfg.format}{LOGGER_SUFFIX}"), mode="w" + ) + file_handler.setFormatter(logging.Formatter(LOGGER_FORMAT, datefmt=LOGGER_DATEFMT)) + file_handler.setLevel(logging.DEBUG if args.debug else logging.INFO) + logger.addHandler(file_handler) + + logger.info(f"Processing test configuration file {args.config}") + logger.info(f"Input path: {cfg.input_path.absolute()}") + logger.info(f"Output path: {cfg.output_path.absolute()}") + + return logger + + +def main(args): + # parse configuration + cfg = config.TestConfig(args.config) + + # create output directory, if not existing + if not os.path.exists(cfg.output_path): + create_dir(cfg.output_path) + + # set up logging + logger = logging_init(args, cfg) + + # generate input items + if cfg.format.startswith("ISM"): + # generate ISM items with metadata according to scene description + process_ism_items.generate_ism_items(cfg, logger) + elif cfg.format == "STEREO": + # generate STEREO items according to scene description + process_stereo_items.generate_stereo_items(cfg, logger) + + # copy configuration to output directory + with open(cfg.output_path.joinpath(f"{cfg.format}.yml"), "w") as f: + yaml.safe_dump(cfg._yaml_dump, f) diff --git a/ivas_processing_scripts/generation/__main__.py b/ivas_processing_scripts/generation/__main__.py new file mode 100755 index 0000000000000000000000000000000000000000..9ba00fd5d8394e621e86bb3e6064dd112db27cb2 --- /dev/null +++ b/ivas_processing_scripts/generation/__main__.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 + +# +# (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository. All Rights Reserved. +# +# This software is protected by copyright law and by international treaties. +# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository retain full ownership rights in their respective contributions in +# the software. This notice grants no license of any kind, including but not limited to patent +# license, nor is any license granted by implication, estoppel or otherwise. +# +# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making +# contributions. +# +# This software is provided "AS IS", without any express or implied warranties. The software is in the +# development stage. It is intended exclusively for experts who have experience with such software and +# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability +# and fitness for a particular purpose are hereby disclaimed and excluded. +# +# Any dispute, controversy or claim arising under or in relation to providing this software shall be +# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in +# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and +# the United Nations Convention on Contracts on the International Sales of Goods. +# + +import argparse + +from ivas_processing_scripts.generation import main + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="IVAS item generation scripts for listening tests. Please refer to README.md for usage." + ) + parser.add_argument( + "config", + help="YAML configuration file", + ) + parser.add_argument( + "--debug", help="Set logging level to debug", action="store_true", default=False + ) + args = parser.parse_args() + + main(args) diff --git a/ivas_processing_scripts/generation/config.py b/ivas_processing_scripts/generation/config.py new file mode 100644 index 0000000000000000000000000000000000000000..a84b156c274e195c515e428c917773657907420c --- /dev/null +++ b/ivas_processing_scripts/generation/config.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python3 + +# +# (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository. All Rights Reserved. +# +# This software is protected by copyright law and by international treaties. +# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository retain full ownership rights in their respective contributions in +# the software. This notice grants no license of any kind, including but not limited to patent +# license, nor is any license granted by implication, estoppel or otherwise. +# +# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making +# contributions. +# +# This software is provided "AS IS", without any express or implied warranties. The software is in the +# development stage. It is intended exclusively for experts who have experience with such software and +# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability +# and fitness for a particular purpose are hereby disclaimed and excluded. +# +# Any dispute, controversy or claim arising under or in relation to providing this software shall be +# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in +# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and +# the United Nations Convention on Contracts on the International Sales of Goods. +# + +from copy import deepcopy +from pathlib import Path + +import yaml + +from ivas_processing_scripts.generation.constants import DEFAULT_CONFIG, REQUIRED_KEYS + + +def merge_dicts(base: dict, other: dict) -> None: + """ + updates base with new keys from other + overrides existing keys + """ + for k in other.keys(): + if k in base and isinstance(base[k], dict) and isinstance(other[k], dict): + merge_dicts(base[k], other[k]) + # explicitly check for None here; + # if the user accidentally specifies only the parent but no sub-keys we don't want to overwrite the default + # however we do want to set non-truthy values e.g. False + elif other[k] is not None: + base[k] = other[k] + + +class TestConfig: + def __init__(self, filename: str): + """Parse a YAML or JSON configuration file""" + # init lists of conditions and associated dirs + self.out_dirs = [] + self.tmp_dirs = [] + + # get default config + cfg = DEFAULT_CONFIG + + # parse configuration file + file_cfg = self._parse_yaml(filename) + + # validate configuration from file + self._validate(file_cfg) + + # merge dictionaries, overriding from config file + merge_dicts(cfg, file_cfg) + + # set attributes from merged dictionary + self.__dict__.update(cfg) + + # store the merged config for writing to file later + self._yaml_dump = self._dump_yaml(cfg) + + # convert to Path + self.input_path = Path(self.input_path) + self.output_path = Path(self.output_path) + + def _parse_yaml(self, filename): + """parse configuration file""" + with open(filename) as fp: + return yaml.safe_load(fp) + + def _dump_yaml(self, cfg: dict): + """convert objects to to strings to avoid YAML dump as object""" + cfg = deepcopy(cfg) + + def format(d: dict): + for k, v in d.items(): + if isinstance(v, dict): + format(v) + else: + d[k] = str(v) + + format(cfg) + + return cfg + + def _validate(self, cfg: dict): + """ensure configuration contains required keys""" + MISSING_KEYS = [] + # check required keys + for r in REQUIRED_KEYS: + # if there was a tuple, we have a list of subkeys to check + if isinstance(r, tuple): + req_key, req_values = r + if not cfg.get(req_key): + MISSING_KEYS.append(req_key) + else: + # check all required values + for v in req_values: + if not cfg.get(req_key).get(v): + MISSING_KEYS.append(f"{req_key} : {v}") + elif not cfg.get(r): + MISSING_KEYS.append(r) + + # Report missing keys to the user + if MISSING_KEYS: + raise KeyError(f"The following key(s) must be specified : {MISSING_KEYS}") diff --git a/ivas_processing_scripts/generation/constants.py b/ivas_processing_scripts/generation/constants.py new file mode 100644 index 0000000000000000000000000000000000000000..c14547303213a838db04793e24172e513452ba93 --- /dev/null +++ b/ivas_processing_scripts/generation/constants.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 + +# +# (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository. All Rights Reserved. +# +# This software is protected by copyright law and by international treaties. +# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository retain full ownership rights in their respective contributions in +# the software. This notice grants no license of any kind, including but not limited to patent +# license, nor is any license granted by implication, estoppel or otherwise. +# +# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making +# contributions. +# +# This software is provided "AS IS", without any express or implied warranties. The software is in the +# development stage. It is intended exclusively for experts who have experience with such software and +# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability +# and fitness for a particular purpose are hereby disclaimed and excluded. +# +# Any dispute, controversy or claim arising under or in relation to providing this software shall be +# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in +# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and +# the United Nations Convention on Contracts on the International Sales of Goods. +# + +from datetime import datetime +from pathlib import Path + +from ivas_processing_scripts.utils import get_binary_paths + +LOGGER_SUFFIX = ".log" +LOGGER_FORMAT = ( + "%(levelname)-8s:%(processName)-10s | %(name)s | %(asctime)s | %(message)s" +) +LOGGER_DATEFMT = "%m-%d %H:%M:%S" + +SUPPORTED_FORMATS = { + "STEREO", + "ISM1", + "ISM2", +} + +DEFAULT_CONFIG = { + # general options + "date": f"{datetime.now().strftime('%Y%m%d_%H.%M.%S')}", + "delete_tmp": False, +} + +DEFAULT_CONFIG_BINARIES = { + "binary_paths": get_binary_paths( + Path(__file__).parent.parent.joinpath("binary_paths.yml") + ), +} + +REQUIRED_KEYS = [ + "format", + "input_path", + "output_path", + "scenes", +] diff --git a/ivas_processing_scripts/generation/process_ism_items.py b/ivas_processing_scripts/generation/process_ism_items.py new file mode 100644 index 0000000000000000000000000000000000000000..800f12a17b2264cb63d51654a6f816c8cee4d311 --- /dev/null +++ b/ivas_processing_scripts/generation/process_ism_items.py @@ -0,0 +1,346 @@ +#!/usr/bin/env python3 + +# +# (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository. All Rights Reserved. +# +# This software is protected by copyright law and by international treaties. +# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository retain full ownership rights in their respective contributions in +# the software. This notice grants no license of any kind, including but not limited to patent +# license, nor is any license granted by implication, estoppel or otherwise. +# +# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making +# contributions. +# +# This software is provided "AS IS", without any express or implied warranties. The software is in the +# development stage. It is intended exclusively for experts who have experience with such software and +# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability +# and fitness for a particular purpose are hereby disclaimed and excluded. +# +# Any dispute, controversy or claim arising under or in relation to providing this software shall be +# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in +# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and +# the United Nations Convention on Contracts on the International Sales of Goods. +# + +import csv +import logging +import os +from math import floor + +import numpy as np + +from ivas_processing_scripts.audiotools import audio, audiofile +from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness +from ivas_processing_scripts.generation import config + +SEED_RANDOM_NOISE = 0 + + +# function for converting nd numpy array to strings with 2 decimal digits +def csv_formatdata(data): + for row in data: + yield ["%0.2f" % v for v in row] + + +def generate_ism_items( + cfg: config.TestConfig, + logger: logging.Logger, +): + """Generate ISM items with metadata from mono items based on scene description""" + + # get the number of scenes + N_scenes = len(cfg.scenes) + + # set the target level + if "loudness" not in cfg.__dict__: + cfg.loudness = -26 + + # set the fs + if "fs" not in cfg.__dict__: + cfg.fs = 48000 + + # set the pre-amble and post-amble + if "preamble" not in cfg.__dict__: + cfg.preamble = 0.0 + + if "postamble" not in cfg.__dict__: + cfg.postamble = 0.0 + + # set the pre-amble and post-amble + if "add_low_level_random_noise" not in cfg.__dict__: + cfg.add_low_level_random_noise = False + + for scene_name, scene in cfg.scenes.items(): + logger.info(f"Processing {scene_name} out of {N_scenes} scenes") + + # extract the number of audio sources + N_sources = len(np.atleast_1d(scene["source"])) + + # initialize output variables + if format == "ISM2": + y = audio.ChannelBasedAudio("STEREO") + else: + y = audio.ChannelBasedAudio("MONO") + y_meta = None + + # read the overlap length + if "overlap" in scene.keys(): + source_overlap = float(scene["overlap"]) + else: + source_overlap = 0.0 + + # repeat for all source files + for i in range(N_sources): + # parse parameters from the scene description + source_file = np.atleast_1d(scene["source"])[i] + source_azi = np.atleast_1d(scene["azimuth"])[i] + source_ele = np.atleast_1d(scene["elevation"])[i] + + logger.info( + f"Encoding {source_file} at position(s) {source_azi},{source_ele}" + ) + + # read source file + x = audio.fromfile( + "MONO", os.path.join(cfg.input_path, source_file), fs=cfg.fs + ) + + # get the number of frames (multiple of 20ms) + N_frames = int(len(x.audio) / x.fs * 50) + frame_len = int(x.fs / 50) + + # trim the samples from the end to ensure that the signal length is a multiple of 20ms + x.audio = x.audio[: N_frames * frame_len] + + # adjust the level of the source file + _, scale_factor = get_loudness(x, cfg.loudness, "MONO") + x.audio *= scale_factor + + # read azimuth information and create array + if isinstance(source_azi, str): + if ":" in source_azi: + source_azi = source_azi.split(":") + azi = np.arange( + float(eval(source_azi[0])), + float(eval(source_azi[2])), + float(eval(source_azi[1])), + ) + else: + azi = np.array(float(eval(source_azi)), ndmin=1)[:N_frames] + else: + azi = np.array(source_azi, ndmin=1)[:N_frames] + + # ensure that azimuth array has N_frames values + if len(azi) > N_frames: + # cut the array of azimuth values + azi = azi[:N_frames] + elif len(azi) < N_frames: + # replicate the last azimuth + azi = np.append(azi, np.full(N_frames - len(azi), azi[-1])) + + # convert azimuth from 0 .. 360 to -180 .. +180 + azi = (azi + 180) % 360 - 180 + + # check if azimuth is from -180 .. +180 + if any(azi > 180) or any(azi < -180): + logger.error( + f"Incorrect value(s) of azimuth: {azi[(azi > 180) | (azi < -180)]}" + ) + + # read elevation information and create array + if isinstance(source_ele, str): + if ":" in source_ele: + source_ele = source_ele.split(":") + ele = np.arange( + float(eval(source_ele[0])), + float(eval(source_ele[2])), + float(eval(source_ele[1])), + ) + else: + ele = np.array(float(eval(source_ele)), ndmin=1)[:N_frames] + else: + ele = np.array(source_ele, ndmin=1)[:N_frames] + + # ensure that elevation array has N_frames values + if len(ele) > N_frames: + # cut the array of elevation values + ele = ele[:N_frames] + elif len(ele) < N_frames: + # replicate the last elevation + ele = np.append(ele, np.full(N_frames - len(ele), ele[-1])) + + # check if elevation is from -90 .. +90 + if any(ele > 90) or any(ele < -90): + logger.error( + f"Incorrect value(s) of elevation: {ele[(ele > 90) | (ele < -90)]}" + ) + + # additional metadata + dist = np.ones(N_frames) # !!!! TBD - check what to do with these metadata + spread = np.zeros(N_frames) + gain = np.ones(N_frames) + + # arrange all metadata fields column-wise into a matrix + x_meta = np.column_stack((azi, ele, dist, spread, gain)) + + # shift the second (and all other) source files (positive shift creates overlap, negative shift creates a gap) + if i > 0 and source_overlap != 0.0: + # get the length of the first source file + N_delay = len(y.audio[:, 0]) + + # add the shift value (ensure that the shift is a multiple of 20ms) + N_delay += int(floor(-source_overlap * 50) / 50 * x.fs) + + # insert all-zero signal + pre = np.zeros((N_delay, x.audio.shape[1])) + x.audio = np.concatenate([pre, x.audio]) + + # insert neutral position as a pre-amble + N_delay = int(N_delay / frame_len) + pre = np.tile( + [0.00, 0.00, 1.00, 0.00, 1.00], (N_delay, 1) + ) # !!!! TBD - check if we should insert netrual position or the first position of the metadata + x_meta = np.concatenate([pre, x_meta]) + + # pad with zeros to ensure that the signal length is a multiple of 20ms + if len(x.audio) % frame_len != 0: + # pad the source signal + N_pad = int(frame_len - len(x.audio) % frame_len) + post = np.zeros((N_pad, x.audio.shape[1])) + x.audio = np.concatenate([x.audio, post]) + + # pad the metadata + N_pad = int(len(x.audio) / frame_len) - len(x_meta) + if N_pad > 0: + post = np.tile( + [0.00, 0.00, 1.00, 0.00, 1.00], (N_pad, 1) + ) # !!!! TBD - check if we should insert netrual position or the first position of the metadata + x_meta = np.concatenate([x_meta, post]) + + # add source signal to the array of all source signals + y.fs = x.fs + if y.audio is None: + y.audio = x.audio + else: + # pad with zeros to have the same length of all source signals + if x.audio.shape[0] > y.audio.shape[0]: + y.audio = np.vstack( + ( + y.audio, + np.zeros( + (x.audio.shape[0] - y.audio.shape[0], y.audio.shape[1]) + ), + ) + ) + elif y.audio.shape[0] > x.audio.shape[0]: + x.audio = np.vstack( + ( + x.audio, + np.zeros( + (y.audio.shape[0] - x.audio.shape[0], x.audio.shape[1]) + ), + ) + ) + y.audio = np.hstack((y.audio, x.audio)) + + # add metadata to the array of all metadata + # make sure x_meta is a 3d array + x_meta = x_meta[np.newaxis, :] + if y_meta is None: + y_meta = x_meta + else: + N_srcs = y_meta.shape[0] + N_meta_features = y_meta.shape[2] + + # append the last position of the metadata to have equal length of all metadata + if x_meta.shape[1] > y_meta.shape[1]: + N_delta = x_meta.shape[1] - y_meta.shape[1] + # reshape to 2d array + y_meta = y_meta.reshape(y_meta.shape[1], -1) + # repeat last row N_delta times and append to the array + y_meta = np.vstack((y_meta, np.tile(y_meta[-1, :], (N_delta, 1)))) + # reshape back to 3d array + y_meta = y_meta.reshape(N_srcs, -1, N_meta_features) + elif y_meta.shape[1] > x_meta.shape[1]: + N_delta = y_meta.shape[1] - x_meta.shape[1] + # reshape to 2d array + x_meta = x_meta.reshape(x_meta.shape[1], -1) + # repeat last row N_delta times and append to the array + x_meta = np.vstack((x_meta, np.tile(x_meta[-1, :], (N_delta, 1)))) + # reshape back to 3d array + x_meta = np.expand_dims(x_meta, axis=0) + + y_meta = np.concatenate([y_meta, x_meta]) + + # append pre-amble and post-amble to all sources + if cfg.preamble != 0.0: + # ensure that pre-amble is a multiple of 20ms + N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs) + + # insert all-zero preamble to all sources + pre = np.zeros((N_pre, y.audio.shape[1])) + y.audio = np.concatenate([pre, y.audio]) + + # insert neutral position as a pre-amble to all sources + N_pre = int(N_pre / frame_len) + pre = np.tile( + [0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_pre, 1) + ) # !!!! TBD - check if we should insert netrual position or the first position of the metadata + y_meta = np.concatenate([pre, y_meta], axis=1) + + if cfg.postamble != 0.0: + # ensure that post-mable is a multiple of 20ms + N_post = int(floor(cfg.postamble * 50) / 50 * y.fs) + + # append all-zero postamble to all sources + post = np.zeros((N_post, y.audio.shape[1])) + y.audio = np.concatenate([y.audio, post]) + + # append neutral position as a post-amble to all sources + N_post = int(N_post / frame_len) + post = np.tile( + [0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_post, 1) + ) # !!!! TBD - check if we should insert netrual position or the last position of the metadata + y_meta = np.concatenate([y_meta, post], axis=1) + + # add random noise + if cfg.add_low_level_random_noise: + # create uniformly distributed noise between -4 and 4 + np.random.seed(SEED_RANDOM_NOISE) + noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype( + "float" + ) + + # superimpose + y.audio += noise + + # write individual ISM audio streams to the output file in an interleaved format + output_filename = scene_name + audiofile.write( + os.path.join(cfg.output_path, output_filename), y.audio, y.fs + ) # !!!! TBD: replace all os.path.xxx operations with the Path object + + # write individual ISM metadata to output files in .csv format + for i in range(N_sources): + # generate .csv filename (should end with .0.csv, .1.csv, ...) + csv_filename = os.path.normpath(f"{output_filename}.{i}.csv") + + with open( + os.path.join(cfg.output_path, csv_filename), + "w", + newline="", + encoding="utf-8", + ) as f: + # create csv writer + writer = csv.writer(f) + + # write all rows to the .csv file + writer.writerows(csv_formatdata(y_meta[i])) diff --git a/ivas_processing_scripts/generation/process_stereo_items.py b/ivas_processing_scripts/generation/process_stereo_items.py new file mode 100644 index 0000000000000000000000000000000000000000..ff3ec5922b6e27d068ff45bb5db199b2678fbc59 --- /dev/null +++ b/ivas_processing_scripts/generation/process_stereo_items.py @@ -0,0 +1,221 @@ +#!/usr/bin/env python3 + +# +# (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository. All Rights Reserved. +# +# This software is protected by copyright law and by international treaties. +# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, +# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +# contributors to this repository retain full ownership rights in their respective contributions in +# the software. This notice grants no license of any kind, including but not limited to patent +# license, nor is any license granted by implication, estoppel or otherwise. +# +# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making +# contributions. +# +# This software is provided "AS IS", without any express or implied warranties. The software is in the +# development stage. It is intended exclusively for experts who have experience with such software and +# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability +# and fitness for a particular purpose are hereby disclaimed and excluded. +# +# Any dispute, controversy or claim arising under or in relation to providing this software shall be +# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in +# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and +# the United Nations Convention on Contracts on the International Sales of Goods. +# + +import logging +import os +from math import floor + +import numpy as np + +from ivas_processing_scripts.audiotools import audio, audiofile +from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness +from ivas_processing_scripts.audiotools.wrappers.reverb import reverb_stereo +from ivas_processing_scripts.generation import config + +SEED_RANDOM_NOISE = 0 + + +# function for converting nd numpy array to strings with 2 decimal digits +def csv_formatdata(data): + for row in data: + yield ["%0.2f" % v for v in row] + + +def generate_stereo_items( + cfg: config.TestConfig, + logger: logging.Logger, +): + """Generate STEREO items from mono items based on scene description""" + + # get the number of scenes + N_scenes = len(cfg.scenes) + + # set the target level + if "loudness" not in cfg.__dict__: + cfg.loudness = -26 + + # set the fs + if "fs" not in cfg.__dict__: + cfg.fs = 48000 + + # set the IR fs + if "IR_fs" not in cfg.__dict__: + cfg.IR_fs = 48000 + + # set the pre-amble and post-amble + if "preamble" not in cfg.__dict__: + cfg.preamble = 0.0 + + if "postamble" not in cfg.__dict__: + cfg.postamble = 0.0 + + # set the IR path + if "IR_path" not in cfg.__dict__: + cfg.IR_path = os.path.join(os.path.dirname(__file__), "IR") + + # set the pre-amble and post-amble + if "add_low_level_random_noise" not in cfg.__dict__: + cfg.add_low_level_random_noise = False + + # repeat for all source files + for scene_name, scene in cfg.scenes.items(): + logger.info( + f"Processing scene: {scene_name} out of {N_scenes} scenes, name: {scene_name}" + ) + + # extract the number of audio sources + N_sources = len(np.atleast_1d(scene["source"])) + + # read the IR (check if stereo or two mono files were provided) + source_IR = np.atleast_1d(scene["IR"]) + + # read the overlap length + if "overlap" in scene.keys(): + source_overlap = float(scene["overlap"]) + else: + source_overlap = 0.0 + + y = audio.ChannelBasedAudio("STEREO") + for i in range(N_sources): + # parse parameters from the scene description + source_file = np.atleast_1d(scene["source"])[i] + IR_file = np.atleast_1d(scene["IR"])[i] + + logger.info(f"Convolving {source_file} with {source_IR}") + + # read source file + x = audio.fromfile( + "MONO", os.path.join(cfg.input_path, source_file), fs=cfg.fs + ) + + # read the IR file + IR = audio.fromfile( + "STEREO", os.path.join(cfg.IR_path, IR_file), fs=cfg.IR_fs + ) + + # convolve with stereo IR + x = reverb_stereo(x, IR) + + # adjust the level of the stereo signal + _, scale_factor = get_loudness(x, cfg.loudness, "STEREO") + x.audio *= scale_factor + + # shift the second (and all other) source files (positive shift creates overlap, negative shift creates a gap) + if i > 0 and source_overlap != 0.0: + # get the length of the first source file + N_delay = len(y.audio[:, 0]) + + # add the shift + N_delay += int(-source_overlap * x.fs) + + # insert all-zero preamble + pre = np.zeros((N_delay, x.audio.shape[1])) + x.audio = np.concatenate([pre, x.audio]) + + # pad with zeros to ensure that the signal length is a multiple of 20ms + N_frame = x.fs / 50 + if len(x.audio) % N_frame != 0: + N_pad = int(N_frame - len(x.audio) % N_frame) + + # insert all-zero preamble + pre = np.zeros((N_pad, x.audio.shape[1])) + x.audio = np.concatenate([pre, x.audio]) + + # add source signal to the array of source signals + y.fs = x.fs + if y.audio is None: + y.audio = x.audio + else: + # pad with zeros to have equal length of all source signals + if x.audio.shape[0] > y.audio.shape[0]: + y.audio = np.vstack( + ( + y.audio, + np.zeros( + ( + x.audio.shape[0] - y.audio.shape[0], + y.audio.shape[1], + ) + ), + ) + ) + elif y.audio.shape[0] > x.audio.shape[0]: + x.audio = np.vstack( + ( + x.audio, + np.zeros( + ( + y.audio.shape[0] - x.audio.shape[0], + x.audio.shape[1], + ) + ), + ) + ) + + # superimpose + y.audio += x.audio + + # append pre-amble and post-amble to all sources + if cfg.preamble != 0.0: + # ensure that pre-amble is a multiple of 20ms + N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs) + + # insert all-zero preamble to all sources + pre = np.zeros((N_pre, y.audio.shape[1])) + y.audio = np.concatenate([pre, y.audio]) + + if cfg.postamble != 0.0: + # ensure that post-mable is a multiple of 20ms + N_post = int(floor(cfg.postamble * 50) / 50 * y.fs) + + # append all-zero postamble to all sources + post = np.zeros((N_post, y.audio.shape[1])) + y.audio = np.concatenate([y.audio, post]) + + # add random noise + if cfg.add_low_level_random_noise: + # create uniformly distributed noise between -4 and 4 + np.random.seed(SEED_RANDOM_NOISE) + noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype( + "float" + ) + + # superimpose + y.audio += noise + + # write the reverberated audio into output file + output_filename = scene_name + audiofile.write( + os.path.join(cfg.output_path, output_filename), y.audio, y.fs + ) # !!!! TBD: replace all os.path.xxx operations with the Path object + + return diff --git a/thirdPartyLegalNotices/REVERB_IR.TXT b/thirdPartyLegalNotices/REVERB_IR.TXT new file mode 100644 index 0000000000000000000000000000000000000000..ba5b228181a1bd85d61d7e975f0852585e739d33 --- /dev/null +++ b/thirdPartyLegalNotices/REVERB_IR.TXT @@ -0,0 +1,56 @@ +---------------------------------------------------------------------------------------------- + This set of stereo impulse responses for superwideband audio signals has been measured + by France Telecom/Orange + Copyright (c) 2008-2023 + + Authors: Claude Marro, David Virette, France Telecom/Orange, France + + WARRANTIES: + This set of stereo impulse responses is made available by Orange in the hope they will be useful, + but without any warranty. + France Telecom/Orange is not liable for any consequence related to the use of the provided data. + ---------------------------------------------------------------------------------------------- + +The naming of stereo impulse responses is defined as +[Room][Reverb][Mic]P[Position].WAV + +where: +Room is S=Small or L=Large, +Reverb is E=Echoic or A=Anechoic, +Mic is AB or MS or BI=binaural, +Position is a two digit position number + + +---------------------------------------------------------------------------------------------------------- +|Scenario | Main Characteristics | Naming of Impulse response pair +| | | (with example positions): +---------------------------------------------------------------------------------------------------------- +|Scenario 1, Large conf. room, 12 positions, | Large, Anechoic, AB | LAABP12.WAV +|AB microphone, no reverb, anechoic. | | +---------------------------------------------------------------------------------------------------------- +|Scenario 1, Large conf. room, 12 positions, | Large, Echoic, AB | LEABP01.WAV +|AB microphone, including reverberation. | | +---------------------------------------------------------------------------------------------------------- +|Scenario 2, small conf room, 7 positions, | Small, Anechoic, AB | SAABP01.WAV +|AB microphone, no reverb, anechoic. | | +---------------------------------------------------------------------------------------------------------- +|Scenario 2, small conf room, 7 positions, | Small, Anechoic, MS | SAMSP05.WAV +|MS microphone, no reverb, anechoic. | | +---------------------------------------------------------------------------------------------------------- +|Scenario 2, small conf room, 7 positions, | Small, Echoic, AB | SEABP02.WAV +|AB microphone, including reverberation. | | +---------------------------------------------------------------------------------------------------------- +|Scenario 2, small conf room, 7 positions, | Small, Echoic, Binaural | SEBIP04.WAV +|Binaural microphone, including reverberation.| | +---------------------------------------------------------------------------------------------------------- +|Scenario 2, small conf room, 7 positions, | Small, Echoic, MS | SEMSP07.WAV +|MS microphone, including reverberation. | | +---------------------------------------------------------------------------------------------------------- + +Stereo impulse responses are stored in WAV format (16-bit integer, 32 kHz). +WARNING : All these impulse responses were measured with a sampling frequency of 32kHz. +They are for use with 32 kHz sampled speech files. + +References: +[1] original description, http://ties.itu.int/u/tsg16/sg16/xchange/wp3/0809-Geneva/q10/AC-0809-Q10-22-Ericsson_STL_updates.doc +[2] original IRs, https://www.itu.int/u/tsg16/sg16/xchange/wp3/q23/g729.1_g718_swbst_qualification/impulse_resp/stereo/FT/ \ No newline at end of file