Loading scripts/binauralRenderer_interface/check_ivas_binaural_gains.m +31 −7 Original line number Diff line number Diff line Loading @@ -41,24 +41,48 @@ clc; %% Add path to routines needed scripts addpath('./matlab_hrir_generation_scripts/'); bitrates = [13200, 16400, 24400, 32000, 48000, 64000,80000, 96000, 128000, 160000, 192000, 256000, 384000, 512000]; inFilesPath = { {'5_1','../testv/6_Channel_ID_48khz.wav'},... {'5_1','../testv/voice_LS_5.1.wav'},... {'5_1','../testv/voice_L_5.1.wav'},... {'5_1','../testv/voice_C_5.1.wav'},... {'5_1','../testv/voices_5.1.wav'},... {'5_1','../testv/6_Channel_ID_48khz_pn.wav'},... {'5_1','../testv/6_Channel_ID_48khz_pn_duplicated.wav'},... {'5_1','../testv/6_Channel_ID_48khz_pn_different.wav'},... {'5_1','../testv/6_Channel_ID_48khz.wav'},... {'7_1_4','../testv/FL_FR_C_LFE_Lss_Rss_Ls_Rs_Lv_Rv_Lvr_Rvr_714_48k.wav'},... {'5_1', '../testv/6_Channel_ID_48khz_pn.wav'}, ... {'5_1', '../testv/stv51MC48c.wav'}, ... { '5_1_2', '../testv/stv512MC48c.wav'}, ... {'5_1_4', '../testv/stv514MC48c.wav'}, ... {'7_1', '../testv/stv71MC48c.wav'}, ... {'7_1_4', '../testv/stv714MC48c.wav'}}; level_hrir = zeros(size(inFilesPath,2),size(bitrates,2)); level_hrir_bin = zeros(size(inFilesPath,2),size(bitrates,2)); gain_hrir = zeros(size(inFilesPath,2),size(bitrates,2)); gain_hrir_bin = zeros(size(inFilesPath,2),size(bitrates,2)); for ind=1:size(inFilesPath,2) [gain_bin(ind), gain(ind)] = binauralizeAudioWithIvas(inFilesPath{ind}{2}, inFilesPath{ind}{1}); [level_hrir_bin(ind,:), level_hrir(ind,:), gain_hrir_bin(ind,:), gain_hrir(ind,:)] = levelsBinauralizeAudioWithIvas(inFilesPath{ind}{2}, inFilesPath{ind}{1},bitrates, 'BINAURAL'); disp("levels computed with binaural correction in bs1770 function : " + num2str(level_hrir_bin)) disp("levesl computed without binaural correction in bs1770 function : " + num2str(level_hrir)) disp("gains with binaural correction in bs1770 function : " + num2str(gain_hrir_bin(ind,:))) disp("gains without binaural correction in bs1770 function : " + num2str(gain_hrir(ind,:))) end disp("gains with binaural correction in bs1770 function : " + num2str(gain_bin)) disp("gains without binaural correction in bs1770 function : " + num2str(gain)) No newline at end of file level_brir = zeros(size(inFilesPath,2),size(bitrates,2)); level_brir_bin = zeros(size(inFilesPath,2),size(bitrates,2)); gain_brir = zeros(size(inFilesPath,2),size(bitrates,2)); gain_brir_bin = zeros(size(inFilesPath,2),size(bitrates,2)); for ind=1:size(inFilesPath,2) [level_brir_bin(ind,:), level_brir(ind,:), gain_brir_bin(ind,:), gain_brir(ind,:)] = levelsBinauralizeAudioWithIvas(inFilesPath{ind}{2}, inFilesPath{ind}{1},bitrates, 'BINAURAL_ROOM_IR'); disp("levels computed with binaural correction in bs1770 function : " + num2str(level_brir_bin)) disp("levesl computed without binaural correction in bs1770 function : " + num2str(level_brir)) disp("gains with binaural correction in bs1770 function : " + num2str(gain_brir_bin(ind,:))) disp("gains without binaural correction in bs1770 function : " + num2str(gain_brir(ind,:))) end scripts/binauralRenderer_interface/compute_HRIR_BRIR_gains.m +31 −5 Original line number Diff line number Diff line Loading @@ -39,7 +39,7 @@ close all; clc; hrir_file_name = 'HRIR_128_Meth5_IRC_53_Q10_symL_Itrp1_48000.sofa'; % fail for TD renderer brir_file_name = 'BRIR_Auditorium_S1_R1_CICP19.sofa'; brir_file_name = 'IIS_BRIR_officialMPEG_Combined.sofa'; hrir_path = fullfile ('.','HRIRs_sofa'); brir_path = fullfile ('.','BRIRs_sofa'); hrir_file = fullfile( hrir_path, hrir_file_name); Loading @@ -56,6 +56,9 @@ inFilesPath = { {'5_1','../testv/voice_L_5.1.wav'},... {'5_1','../testv/voice_C_5.1.wav'},... {'5_1','../testv/voices_5.1.wav'},... {'5_1','../testv/6_Channel_ID_48khz_pn.wav'},... {'5_1','../testv/6_Channel_ID_48khz_pn_duplicated.wav'},... {'5_1','../testv/6_Channel_ID_48khz_pn_different.wav'},... {'5_1','../testv/6_Channel_ID_48khz.wav'},... {'7_1_4','../testv/FL_FR_C_LFE_Lss_Rss_Ls_Rs_Lv_Rv_Lvr_Rvr_714_48k.wav'},... {'5_1', '../testv/6_Channel_ID_48khz_pn.wav'}, ... Loading @@ -67,11 +70,34 @@ inFilesPath = { % {'5_1_4', '../testv/stv514MC48c.wav'}, ... % {'7_1', '../testv/stv71MC48c.wav'}, ... level_hrir = zeros(size(inFilesPath,2),1); level_hrir_bin = zeros(size(inFilesPath,2),1); gain_hrir = zeros(size(inFilesPath,2),1); gain_hrir_bin = zeros(size(inFilesPath,2),1); for ind=1:size(inFilesPath,2) matFile = [erase(hrir_file,'.sofa'), '_', inFilesPath{ind}{1}, '.mat']; SOFA_save_to_mat(hrir_file,inFilesPath{ind}{1},0.707, matFile); [binauralSignal, gain_bin(ind), gain(ind)] = binauralizeAudio(inFilesPath{ind}{2}, matFile); SOFA_save_to_mat(hrir_file,inFilesPath{ind}{1},10^(-3/20), matFile); [level_hrir_bin(ind), level_hrir(ind), gain_hrir_bin(ind), gain_hrir(ind)] = levelsBinauralizeAudio(inFilesPath{ind}{2}, matFile); end disp("levels hrir computed with binaural correction in bs1770 function : " + num2str(level_hrir_bin)) disp("levesl hrir computed without binaural correction in bs1770 function : " + num2str(level_hrir)) disp("gains hrir with binaural correction in bs1770 function : " + num2str(gain_hrir_bin)) disp("gains hrir without binaural correction in bs1770 function : " + num2str(gain_hrir)) level_brir = zeros(size(inFilesPath,2),1); level_brir_bin = zeros(size(inFilesPath,2),1); gain_brir = zeros(size(inFilesPath,2),1); gain_brir_bin = zeros(size(inFilesPath,2),1); for ind=1:size(inFilesPath,2) matFile = [erase(brir_file,'.sofa'), '_', inFilesPath{ind}{1}, '.mat']; SOFA_save_to_mat(brir_file,inFilesPath{ind}{1},10^(6.5/20), matFile); [level_brir_bin(ind), level_brir(ind), gain_brir_bin(ind), gain_brir(ind)] = levelsBinauralizeAudio(inFilesPath{ind}{2}, matFile); end disp("gains with binaural correction in bs1770 function : " + num2str(gain_bin)) disp("gains without binaural correction in bs1770 function : " + num2str(gain)) No newline at end of file disp("levels brir computed with binaural correction in bs1770 function : " + num2str(level_brir_bin)) disp("levesl brir computed without binaural correction in bs1770 function : " + num2str(level_brir)) disp("gains brir with binaural correction in bs1770 function : " + num2str(gain_brir_bin)) disp("gains brir without binaural correction in bs1770 function : " + num2str(gain_brir)) scripts/binauralRenderer_interface/create_5.1_voice.py +35 −7 Original line number Diff line number Diff line Loading @@ -2,13 +2,14 @@ import soundfile as sf import numpy as np def create_5_1_audio(mono_files, output_file): def create_5_1_audio(mono_files, output_file, start_times=None): """ Combine 5 mono audio files into a 5.1 surround sound file. Args: mono_files: List of 5 paths to mono audio files output_file: Path to output 5.1 audio file start_times: List of 5 start times in seconds (default: all at 0) Channel layout: 0: Front Left (FL) Loading @@ -22,6 +23,11 @@ def create_5_1_audio(mono_files, output_file): if len(mono_files) != 5: raise ValueError("You need exactly 5 mono audio files") if start_times is None: start_times = [0] * 5 elif len(start_times) != 5: raise ValueError("You need exactly 5 start times") # Read all mono files audio_arrays = [] sample_rate = None Loading @@ -41,13 +47,19 @@ def create_5_1_audio(mono_files, output_file): data = data[:, 0] audio_arrays.append(data) max_length = max(max_length, len(data)) start_time_seconds = start_times[i] start_sample = int(start_time_seconds * sample_rate) max_length = max(max_length, start_sample + len(data)) # Pad all arrays to the same length padded_arrays = [] for audio in audio_arrays: if len(audio) < max_length: padded = np.pad(audio, (0, max_length - len(audio)), mode="constant") for i, audio in enumerate(audio_arrays): start_time_seconds = start_times[i] start_sample = int(start_time_seconds * sample_rate) if len(audio) + start_sample < max_length: padded = np.pad( audio, (0, max_length - (len(audio) + start_sample)), mode="constant" ) else: padded = audio padded_arrays.append(padded) Loading @@ -58,7 +70,11 @@ def create_5_1_audio(mono_files, output_file): # Place each mono file on its channel (skip LFE at index 3) channel_mapping = [0, 1, 2, 4, 5] # FL, FC, FR, SL, SR for i in range(5): surround_51[:, channel_mapping[i]] = padded_arrays[i] start_time_seconds = start_times[i] start_sample = int(start_time_seconds * sample_rate) surround_51[ start_sample : start_sample + len(padded_arrays[i]), channel_mapping[i] ] = padded_arrays[i] # Channel 3 (LFE) remains silent sf.write(output_file, surround_51, sample_rate) Loading @@ -74,5 +90,17 @@ if __name__ == "__main__": "/Users/marcemerit/datasets/VCTK/VCTK-Corpus-0.92/wav48_silence_trimmed/p228/p228_005_mic2.flac", "/Users/marcemerit/datasets/VCTK/VCTK-Corpus-0.92/wav48_silence_trimmed/p229/p229_006_mic2.flac", ] mono_files = [ "./scripts/testv/mono_pn1.wav", "./scripts/testv/mono_pn2.wav", "./scripts/testv/mono_pn3.wav", "./scripts/testv/mono_pn4.wav", "./scripts/testv/mono_pn5.wav", ] # Start times for each channel in seconds (FL, FC, FR, SL, SR) start_times = [0, 1, 2, 3, 4] create_5_1_audio(mono_files, "./scripts/testv/output_5.1.wav") create_5_1_audio( mono_files, "./scripts/testv/6_Channel_ID_48khz_pn_different.wav", start_times ) Loading
scripts/binauralRenderer_interface/check_ivas_binaural_gains.m +31 −7 Original line number Diff line number Diff line Loading @@ -41,24 +41,48 @@ clc; %% Add path to routines needed scripts addpath('./matlab_hrir_generation_scripts/'); bitrates = [13200, 16400, 24400, 32000, 48000, 64000,80000, 96000, 128000, 160000, 192000, 256000, 384000, 512000]; inFilesPath = { {'5_1','../testv/6_Channel_ID_48khz.wav'},... {'5_1','../testv/voice_LS_5.1.wav'},... {'5_1','../testv/voice_L_5.1.wav'},... {'5_1','../testv/voice_C_5.1.wav'},... {'5_1','../testv/voices_5.1.wav'},... {'5_1','../testv/6_Channel_ID_48khz_pn.wav'},... {'5_1','../testv/6_Channel_ID_48khz_pn_duplicated.wav'},... {'5_1','../testv/6_Channel_ID_48khz_pn_different.wav'},... {'5_1','../testv/6_Channel_ID_48khz.wav'},... {'7_1_4','../testv/FL_FR_C_LFE_Lss_Rss_Ls_Rs_Lv_Rv_Lvr_Rvr_714_48k.wav'},... {'5_1', '../testv/6_Channel_ID_48khz_pn.wav'}, ... {'5_1', '../testv/stv51MC48c.wav'}, ... { '5_1_2', '../testv/stv512MC48c.wav'}, ... {'5_1_4', '../testv/stv514MC48c.wav'}, ... {'7_1', '../testv/stv71MC48c.wav'}, ... {'7_1_4', '../testv/stv714MC48c.wav'}}; level_hrir = zeros(size(inFilesPath,2),size(bitrates,2)); level_hrir_bin = zeros(size(inFilesPath,2),size(bitrates,2)); gain_hrir = zeros(size(inFilesPath,2),size(bitrates,2)); gain_hrir_bin = zeros(size(inFilesPath,2),size(bitrates,2)); for ind=1:size(inFilesPath,2) [gain_bin(ind), gain(ind)] = binauralizeAudioWithIvas(inFilesPath{ind}{2}, inFilesPath{ind}{1}); [level_hrir_bin(ind,:), level_hrir(ind,:), gain_hrir_bin(ind,:), gain_hrir(ind,:)] = levelsBinauralizeAudioWithIvas(inFilesPath{ind}{2}, inFilesPath{ind}{1},bitrates, 'BINAURAL'); disp("levels computed with binaural correction in bs1770 function : " + num2str(level_hrir_bin)) disp("levesl computed without binaural correction in bs1770 function : " + num2str(level_hrir)) disp("gains with binaural correction in bs1770 function : " + num2str(gain_hrir_bin(ind,:))) disp("gains without binaural correction in bs1770 function : " + num2str(gain_hrir(ind,:))) end disp("gains with binaural correction in bs1770 function : " + num2str(gain_bin)) disp("gains without binaural correction in bs1770 function : " + num2str(gain)) No newline at end of file level_brir = zeros(size(inFilesPath,2),size(bitrates,2)); level_brir_bin = zeros(size(inFilesPath,2),size(bitrates,2)); gain_brir = zeros(size(inFilesPath,2),size(bitrates,2)); gain_brir_bin = zeros(size(inFilesPath,2),size(bitrates,2)); for ind=1:size(inFilesPath,2) [level_brir_bin(ind,:), level_brir(ind,:), gain_brir_bin(ind,:), gain_brir(ind,:)] = levelsBinauralizeAudioWithIvas(inFilesPath{ind}{2}, inFilesPath{ind}{1},bitrates, 'BINAURAL_ROOM_IR'); disp("levels computed with binaural correction in bs1770 function : " + num2str(level_brir_bin)) disp("levesl computed without binaural correction in bs1770 function : " + num2str(level_brir)) disp("gains with binaural correction in bs1770 function : " + num2str(gain_brir_bin(ind,:))) disp("gains without binaural correction in bs1770 function : " + num2str(gain_brir(ind,:))) end
scripts/binauralRenderer_interface/compute_HRIR_BRIR_gains.m +31 −5 Original line number Diff line number Diff line Loading @@ -39,7 +39,7 @@ close all; clc; hrir_file_name = 'HRIR_128_Meth5_IRC_53_Q10_symL_Itrp1_48000.sofa'; % fail for TD renderer brir_file_name = 'BRIR_Auditorium_S1_R1_CICP19.sofa'; brir_file_name = 'IIS_BRIR_officialMPEG_Combined.sofa'; hrir_path = fullfile ('.','HRIRs_sofa'); brir_path = fullfile ('.','BRIRs_sofa'); hrir_file = fullfile( hrir_path, hrir_file_name); Loading @@ -56,6 +56,9 @@ inFilesPath = { {'5_1','../testv/voice_L_5.1.wav'},... {'5_1','../testv/voice_C_5.1.wav'},... {'5_1','../testv/voices_5.1.wav'},... {'5_1','../testv/6_Channel_ID_48khz_pn.wav'},... {'5_1','../testv/6_Channel_ID_48khz_pn_duplicated.wav'},... {'5_1','../testv/6_Channel_ID_48khz_pn_different.wav'},... {'5_1','../testv/6_Channel_ID_48khz.wav'},... {'7_1_4','../testv/FL_FR_C_LFE_Lss_Rss_Ls_Rs_Lv_Rv_Lvr_Rvr_714_48k.wav'},... {'5_1', '../testv/6_Channel_ID_48khz_pn.wav'}, ... Loading @@ -67,11 +70,34 @@ inFilesPath = { % {'5_1_4', '../testv/stv514MC48c.wav'}, ... % {'7_1', '../testv/stv71MC48c.wav'}, ... level_hrir = zeros(size(inFilesPath,2),1); level_hrir_bin = zeros(size(inFilesPath,2),1); gain_hrir = zeros(size(inFilesPath,2),1); gain_hrir_bin = zeros(size(inFilesPath,2),1); for ind=1:size(inFilesPath,2) matFile = [erase(hrir_file,'.sofa'), '_', inFilesPath{ind}{1}, '.mat']; SOFA_save_to_mat(hrir_file,inFilesPath{ind}{1},0.707, matFile); [binauralSignal, gain_bin(ind), gain(ind)] = binauralizeAudio(inFilesPath{ind}{2}, matFile); SOFA_save_to_mat(hrir_file,inFilesPath{ind}{1},10^(-3/20), matFile); [level_hrir_bin(ind), level_hrir(ind), gain_hrir_bin(ind), gain_hrir(ind)] = levelsBinauralizeAudio(inFilesPath{ind}{2}, matFile); end disp("levels hrir computed with binaural correction in bs1770 function : " + num2str(level_hrir_bin)) disp("levesl hrir computed without binaural correction in bs1770 function : " + num2str(level_hrir)) disp("gains hrir with binaural correction in bs1770 function : " + num2str(gain_hrir_bin)) disp("gains hrir without binaural correction in bs1770 function : " + num2str(gain_hrir)) level_brir = zeros(size(inFilesPath,2),1); level_brir_bin = zeros(size(inFilesPath,2),1); gain_brir = zeros(size(inFilesPath,2),1); gain_brir_bin = zeros(size(inFilesPath,2),1); for ind=1:size(inFilesPath,2) matFile = [erase(brir_file,'.sofa'), '_', inFilesPath{ind}{1}, '.mat']; SOFA_save_to_mat(brir_file,inFilesPath{ind}{1},10^(6.5/20), matFile); [level_brir_bin(ind), level_brir(ind), gain_brir_bin(ind), gain_brir(ind)] = levelsBinauralizeAudio(inFilesPath{ind}{2}, matFile); end disp("gains with binaural correction in bs1770 function : " + num2str(gain_bin)) disp("gains without binaural correction in bs1770 function : " + num2str(gain)) No newline at end of file disp("levels brir computed with binaural correction in bs1770 function : " + num2str(level_brir_bin)) disp("levesl brir computed without binaural correction in bs1770 function : " + num2str(level_brir)) disp("gains brir with binaural correction in bs1770 function : " + num2str(gain_brir_bin)) disp("gains brir without binaural correction in bs1770 function : " + num2str(gain_brir))
scripts/binauralRenderer_interface/create_5.1_voice.py +35 −7 Original line number Diff line number Diff line Loading @@ -2,13 +2,14 @@ import soundfile as sf import numpy as np def create_5_1_audio(mono_files, output_file): def create_5_1_audio(mono_files, output_file, start_times=None): """ Combine 5 mono audio files into a 5.1 surround sound file. Args: mono_files: List of 5 paths to mono audio files output_file: Path to output 5.1 audio file start_times: List of 5 start times in seconds (default: all at 0) Channel layout: 0: Front Left (FL) Loading @@ -22,6 +23,11 @@ def create_5_1_audio(mono_files, output_file): if len(mono_files) != 5: raise ValueError("You need exactly 5 mono audio files") if start_times is None: start_times = [0] * 5 elif len(start_times) != 5: raise ValueError("You need exactly 5 start times") # Read all mono files audio_arrays = [] sample_rate = None Loading @@ -41,13 +47,19 @@ def create_5_1_audio(mono_files, output_file): data = data[:, 0] audio_arrays.append(data) max_length = max(max_length, len(data)) start_time_seconds = start_times[i] start_sample = int(start_time_seconds * sample_rate) max_length = max(max_length, start_sample + len(data)) # Pad all arrays to the same length padded_arrays = [] for audio in audio_arrays: if len(audio) < max_length: padded = np.pad(audio, (0, max_length - len(audio)), mode="constant") for i, audio in enumerate(audio_arrays): start_time_seconds = start_times[i] start_sample = int(start_time_seconds * sample_rate) if len(audio) + start_sample < max_length: padded = np.pad( audio, (0, max_length - (len(audio) + start_sample)), mode="constant" ) else: padded = audio padded_arrays.append(padded) Loading @@ -58,7 +70,11 @@ def create_5_1_audio(mono_files, output_file): # Place each mono file on its channel (skip LFE at index 3) channel_mapping = [0, 1, 2, 4, 5] # FL, FC, FR, SL, SR for i in range(5): surround_51[:, channel_mapping[i]] = padded_arrays[i] start_time_seconds = start_times[i] start_sample = int(start_time_seconds * sample_rate) surround_51[ start_sample : start_sample + len(padded_arrays[i]), channel_mapping[i] ] = padded_arrays[i] # Channel 3 (LFE) remains silent sf.write(output_file, surround_51, sample_rate) Loading @@ -74,5 +90,17 @@ if __name__ == "__main__": "/Users/marcemerit/datasets/VCTK/VCTK-Corpus-0.92/wav48_silence_trimmed/p228/p228_005_mic2.flac", "/Users/marcemerit/datasets/VCTK/VCTK-Corpus-0.92/wav48_silence_trimmed/p229/p229_006_mic2.flac", ] mono_files = [ "./scripts/testv/mono_pn1.wav", "./scripts/testv/mono_pn2.wav", "./scripts/testv/mono_pn3.wav", "./scripts/testv/mono_pn4.wav", "./scripts/testv/mono_pn5.wav", ] # Start times for each channel in seconds (FL, FC, FR, SL, SR) start_times = [0, 1, 2, 3, 4] create_5_1_audio(mono_files, "./scripts/testv/output_5.1.wav") create_5_1_audio( mono_files, "./scripts/testv/6_Channel_ID_48khz_pn_different.wav", start_times )