Loading item_gen_configs/HOA2_CONFIG.yml 0 → 100644 +61 −0 Original line number Diff line number Diff line --- ################################################ # General configuration ################################################ ### Output format format: "HOA2" ### Output sampling rate in Hz needed for headerless audio files; default = 48000 fs: 48000 ### IR sampling rate in Hz needed for headerless audio files; default = 48000 IR_fs: 48000 ### Any relative paths will be interpreted relative to the working directory the script is called from! ### Usage of absolute paths is recommended. ### Do not use file names with dots "." in them! This is not supported, use "_" instead ### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions ### Input path to mono files input_path: "./items_mono" ### Input path to stereo impulse response files, default = './ivas_processing_scripts/generation/IR' IR_path: "./IRs" ### Output path for generated test items and metadata files output_path: "./items_HOA2" ### (Optional) Output path for binauralized versions of the generated HOA2 items # binaural_path: "./items_HOA2_bin" ### Target loudness in LKFS; default = null (no loudness normalization applied) loudness: -26 ### Pre-amble and Post-amble length in seconds (default = 0.0) preamble: 0.5 postamble: 1.0 ### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence) add_low_level_random_noise: true ################################################ ### Scene description ################################################ ### Each scene must start with the sceneN tag ### Specify the mono source filename (the program will search for it in the input_path folder) ### Specify the stereo IR source filename (the program will search for it in the input_path_IR folder) ### Specify the overlap length in seconds for each input source (negative value creates a gap) ### Note 1: use [val1, val2, ...] for multiple sources in a scene ### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames scenes: a1: name: "out.wav" description: "" source: ["fa1.wav", "ma1.wav"] IR: ["IR_HOA2_env1/FreefieldFloor_TalkPos1_EigenHoA2_SinSweep_9chn.wav", "IR_HOA2_env1/FreefieldFloor_TalkPos2_EigenHoA2_SinSweep_9chn.wav"] overlap: -0.2 item_gen_configs/P800-4.yml +3 −0 Original line number Diff line number Diff line Loading @@ -26,6 +26,9 @@ IR_path: "./IRs" ### Output path for generated test items and metadata files output_path: "./items_FOA" ### (Optional) Output path for binauralized versions of the generated FOA items # binaural_path: "./items_FOA_bin" ### Target loudness in LKFS; default = null (no loudness normalization applied) loudness: -26 Loading item_gen_configs/P800-5.yml +3 −0 Original line number Diff line number Diff line Loading @@ -26,6 +26,9 @@ IR_path: "./IRs" ### Output path for generated test items and metadata files output_path: "./items_FOA" ### (Optional) Output path for binauralized versions of the generated FOA items # binaural_path: "./items_FOA_bin" ### Target loudness in LKFS; default = null (no loudness normalization applied) loudness: -26 Loading ivas_processing_scripts/audiotools/wrappers/reverb.py +65 −1 Original line number Diff line number Diff line Loading @@ -238,7 +238,7 @@ def reverb_foa( H = fft(foa_IR.audio, axis=0) align = 1.0 / np.max(np.abs(H)) # convolve mono input with left and right IR # convolve mono input with FOA IR y_w = reverb(input, IR_w, align=align) y_x = reverb(input, IR_x, align=align) y_y = reverb(input, IR_y, align=align) Loading @@ -251,3 +251,67 @@ def reverb_foa( y.audio = np.column_stack([y_w.audio, y_x.audio, y_y.audio, y_z.audio]) return y def reverb_hoa2( input: Audio, hoa2_IR: Audio, align: Optional[float] = None, ) -> Audio: """ Wrapper for the ITU-T reverb binary to convolve mono audio signal with an HOA2 impulse response Parameters ---------- input: Audio Input audio signal IR: Audio Impulse response align: float multiplicative factor to apply to the reverberated sound in order to align its energy level with the second file Returns ------- output: Audio Convolved audio signal with HOA2 IR """ # convert to float32 hoa2_IR.audio = np.float32(hoa2_IR.audio) numchannels = 9 # HOA2 by definition # calculate the scaling (multiplicative) factor such that the maximum gain of the IR filter across all frequencies is 0dB if align is None: H = fft(hoa2_IR.audio, axis=0) align = 1.0 / np.max(np.abs(H)) IR = copy(hoa2_IR) IR.name = "MONO" IR.num_channels = 1 ych = [] for i in range(numchannels): # separate IR into each channel IR.audio = np.reshape(hoa2_IR.audio[:, i], (-1, 1)) # convolve mono input with channel IR ych.append(reverb(input, IR, align=align)) # combine into hoa2 output y = copy(input) y.name = "HOA2" y.num_channels = numchannels y.audio = np.column_stack( [ ych[0].audio, ych[1].audio, ych[2].audio, ych[3].audio, ych[4].audio, ych[5].audio, ych[6].audio, ych[7].audio, ych[8].audio, ] ) return y ivas_processing_scripts/generation/__init__.py +4 −0 Original line number Diff line number Diff line Loading @@ -43,6 +43,7 @@ from ivas_processing_scripts.constants import ( from ivas_processing_scripts.generation import ( config, process_foa_items, process_hoa2_items, process_ism_items, process_stereo_items, ) Loading Loading @@ -96,6 +97,9 @@ def main(args): elif cfg.format == "FOA": # generate FOA items according to scene description process_foa_items.generate_foa_items(cfg, logger) elif cfg.format == "HOA2": # generate HOA2 items according to scene description process_hoa2_items.generate_hoa2_items(cfg, logger) # copy configuration to output directory with open(cfg.output_path.joinpath(f"{cfg.format}.yml"), "w") as f: Loading Loading
item_gen_configs/HOA2_CONFIG.yml 0 → 100644 +61 −0 Original line number Diff line number Diff line --- ################################################ # General configuration ################################################ ### Output format format: "HOA2" ### Output sampling rate in Hz needed for headerless audio files; default = 48000 fs: 48000 ### IR sampling rate in Hz needed for headerless audio files; default = 48000 IR_fs: 48000 ### Any relative paths will be interpreted relative to the working directory the script is called from! ### Usage of absolute paths is recommended. ### Do not use file names with dots "." in them! This is not supported, use "_" instead ### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions ### Input path to mono files input_path: "./items_mono" ### Input path to stereo impulse response files, default = './ivas_processing_scripts/generation/IR' IR_path: "./IRs" ### Output path for generated test items and metadata files output_path: "./items_HOA2" ### (Optional) Output path for binauralized versions of the generated HOA2 items # binaural_path: "./items_HOA2_bin" ### Target loudness in LKFS; default = null (no loudness normalization applied) loudness: -26 ### Pre-amble and Post-amble length in seconds (default = 0.0) preamble: 0.5 postamble: 1.0 ### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence) add_low_level_random_noise: true ################################################ ### Scene description ################################################ ### Each scene must start with the sceneN tag ### Specify the mono source filename (the program will search for it in the input_path folder) ### Specify the stereo IR source filename (the program will search for it in the input_path_IR folder) ### Specify the overlap length in seconds for each input source (negative value creates a gap) ### Note 1: use [val1, val2, ...] for multiple sources in a scene ### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames scenes: a1: name: "out.wav" description: "" source: ["fa1.wav", "ma1.wav"] IR: ["IR_HOA2_env1/FreefieldFloor_TalkPos1_EigenHoA2_SinSweep_9chn.wav", "IR_HOA2_env1/FreefieldFloor_TalkPos2_EigenHoA2_SinSweep_9chn.wav"] overlap: -0.2
item_gen_configs/P800-4.yml +3 −0 Original line number Diff line number Diff line Loading @@ -26,6 +26,9 @@ IR_path: "./IRs" ### Output path for generated test items and metadata files output_path: "./items_FOA" ### (Optional) Output path for binauralized versions of the generated FOA items # binaural_path: "./items_FOA_bin" ### Target loudness in LKFS; default = null (no loudness normalization applied) loudness: -26 Loading
item_gen_configs/P800-5.yml +3 −0 Original line number Diff line number Diff line Loading @@ -26,6 +26,9 @@ IR_path: "./IRs" ### Output path for generated test items and metadata files output_path: "./items_FOA" ### (Optional) Output path for binauralized versions of the generated FOA items # binaural_path: "./items_FOA_bin" ### Target loudness in LKFS; default = null (no loudness normalization applied) loudness: -26 Loading
ivas_processing_scripts/audiotools/wrappers/reverb.py +65 −1 Original line number Diff line number Diff line Loading @@ -238,7 +238,7 @@ def reverb_foa( H = fft(foa_IR.audio, axis=0) align = 1.0 / np.max(np.abs(H)) # convolve mono input with left and right IR # convolve mono input with FOA IR y_w = reverb(input, IR_w, align=align) y_x = reverb(input, IR_x, align=align) y_y = reverb(input, IR_y, align=align) Loading @@ -251,3 +251,67 @@ def reverb_foa( y.audio = np.column_stack([y_w.audio, y_x.audio, y_y.audio, y_z.audio]) return y def reverb_hoa2( input: Audio, hoa2_IR: Audio, align: Optional[float] = None, ) -> Audio: """ Wrapper for the ITU-T reverb binary to convolve mono audio signal with an HOA2 impulse response Parameters ---------- input: Audio Input audio signal IR: Audio Impulse response align: float multiplicative factor to apply to the reverberated sound in order to align its energy level with the second file Returns ------- output: Audio Convolved audio signal with HOA2 IR """ # convert to float32 hoa2_IR.audio = np.float32(hoa2_IR.audio) numchannels = 9 # HOA2 by definition # calculate the scaling (multiplicative) factor such that the maximum gain of the IR filter across all frequencies is 0dB if align is None: H = fft(hoa2_IR.audio, axis=0) align = 1.0 / np.max(np.abs(H)) IR = copy(hoa2_IR) IR.name = "MONO" IR.num_channels = 1 ych = [] for i in range(numchannels): # separate IR into each channel IR.audio = np.reshape(hoa2_IR.audio[:, i], (-1, 1)) # convolve mono input with channel IR ych.append(reverb(input, IR, align=align)) # combine into hoa2 output y = copy(input) y.name = "HOA2" y.num_channels = numchannels y.audio = np.column_stack( [ ych[0].audio, ych[1].audio, ych[2].audio, ych[3].audio, ych[4].audio, ych[5].audio, ych[6].audio, ych[7].audio, ych[8].audio, ] ) return y
ivas_processing_scripts/generation/__init__.py +4 −0 Original line number Diff line number Diff line Loading @@ -43,6 +43,7 @@ from ivas_processing_scripts.constants import ( from ivas_processing_scripts.generation import ( config, process_foa_items, process_hoa2_items, process_ism_items, process_stereo_items, ) Loading Loading @@ -96,6 +97,9 @@ def main(args): elif cfg.format == "FOA": # generate FOA items according to scene description process_foa_items.generate_foa_items(cfg, logger) elif cfg.format == "HOA2": # generate HOA2 items according to scene description process_hoa2_items.generate_hoa2_items(cfg, logger) # copy configuration to output directory with open(cfg.output_path.joinpath(f"{cfg.format}.yml"), "w") as f: Loading