add brir (72d39bdc) · Commits · IVAS Codec Public Collaboration / IVAS Codec

scripts/binauralRenderer_interface/check_ivas_binaural_gains.m

+31 −7

Original line number	Diff line number	Diff line
		@@ -41,24 +41,48 @@ clc;
		%% Add path to routines needed scripts
		addpath('./matlab_hrir_generation_scripts/');

		bitrates = [13200, 16400, 24400, 32000, 48000, 64000,80000, 96000, 128000, 160000, 192000, 256000, 384000, 512000];


		inFilesPath = {
		{'5_1','../testv/6_Channel_ID_48khz.wav'},...
		{'5_1','../testv/voice_LS_5.1.wav'},...
		{'5_1','../testv/voice_L_5.1.wav'},...
		{'5_1','../testv/voice_C_5.1.wav'},...
		{'5_1','../testv/voices_5.1.wav'},...
		{'5_1','../testv/6_Channel_ID_48khz_pn.wav'},...
		{'5_1','../testv/6_Channel_ID_48khz_pn_duplicated.wav'},...
		{'5_1','../testv/6_Channel_ID_48khz_pn_different.wav'},...
		{'5_1','../testv/6_Channel_ID_48khz.wav'},...
		{'7_1_4','../testv/FL_FR_C_LFE_Lss_Rss_Ls_Rs_Lv_Rv_Lvr_Rvr_714_48k.wav'},...
		{'5_1', '../testv/6_Channel_ID_48khz_pn.wav'}, ...
		{'5_1', '../testv/stv51MC48c.wav'}, ...
		{ '5_1_2', '../testv/stv512MC48c.wav'}, ...
		{'5_1_4', '../testv/stv514MC48c.wav'}, ...
		{'7_1', '../testv/stv71MC48c.wav'}, ...
		{'7_1_4', '../testv/stv714MC48c.wav'}};


		level_hrir = zeros(size(inFilesPath,2),size(bitrates,2));
		level_hrir_bin = zeros(size(inFilesPath,2),size(bitrates,2));
		gain_hrir = zeros(size(inFilesPath,2),size(bitrates,2));
		gain_hrir_bin = zeros(size(inFilesPath,2),size(bitrates,2));

		for ind=1:size(inFilesPath,2)
		[gain_bin(ind), gain(ind)] = binauralizeAudioWithIvas(inFilesPath{ind}{2}, inFilesPath{ind}{1});
		[level_hrir_bin(ind,:), level_hrir(ind,:), gain_hrir_bin(ind,:), gain_hrir(ind,:)] = levelsBinauralizeAudioWithIvas(inFilesPath{ind}{2}, inFilesPath{ind}{1},bitrates, 'BINAURAL');
		disp("levels computed with binaural correction in bs1770 function : " + num2str(level_hrir_bin))
		disp("levesl computed without binaural correction in bs1770 function : " + num2str(level_hrir))
		disp("gains with binaural correction in bs1770 function : " + num2str(gain_hrir_bin(ind,:)))
		disp("gains without binaural correction in bs1770 function : " + num2str(gain_hrir(ind,:)))
		end

		disp("gains with binaural correction in bs1770 function : " + num2str(gain_bin))
		disp("gains without binaural correction in bs1770 function : " + num2str(gain))
		No newline at end of file
		level_brir = zeros(size(inFilesPath,2),size(bitrates,2));
		level_brir_bin = zeros(size(inFilesPath,2),size(bitrates,2));
		gain_brir = zeros(size(inFilesPath,2),size(bitrates,2));
		gain_brir_bin = zeros(size(inFilesPath,2),size(bitrates,2));

		for ind=1:size(inFilesPath,2)
		[level_brir_bin(ind,:), level_brir(ind,:), gain_brir_bin(ind,:), gain_brir(ind,:)] = levelsBinauralizeAudioWithIvas(inFilesPath{ind}{2}, inFilesPath{ind}{1},bitrates, 'BINAURAL_ROOM_IR');
		disp("levels computed with binaural correction in bs1770 function : " + num2str(level_brir_bin))
		disp("levesl computed without binaural correction in bs1770 function : " + num2str(level_brir))
		disp("gains with binaural correction in bs1770 function : " + num2str(gain_brir_bin(ind,:)))
		disp("gains without binaural correction in bs1770 function : " + num2str(gain_brir(ind,:)))
		end

scripts/binauralRenderer_interface/compute_HRIR_BRIR_gains.m

+31 −5

Original line number	Diff line number	Diff line
		@@ -39,7 +39,7 @@ close all;
		clc;

		hrir_file_name = 'HRIR_128_Meth5_IRC_53_Q10_symL_Itrp1_48000.sofa'; % fail for TD renderer
		brir_file_name = 'BRIR_Auditorium_S1_R1_CICP19.sofa';
		brir_file_name = 'IIS_BRIR_officialMPEG_Combined.sofa';
		hrir_path = fullfile ('.','HRIRs_sofa');
		brir_path = fullfile ('.','BRIRs_sofa');
		hrir_file = fullfile( hrir_path, hrir_file_name);
		@@ -56,6 +56,9 @@ inFilesPath = {
		{'5_1','../testv/voice_L_5.1.wav'},...
		{'5_1','../testv/voice_C_5.1.wav'},...
		{'5_1','../testv/voices_5.1.wav'},...
		{'5_1','../testv/6_Channel_ID_48khz_pn.wav'},...
		{'5_1','../testv/6_Channel_ID_48khz_pn_duplicated.wav'},...
		{'5_1','../testv/6_Channel_ID_48khz_pn_different.wav'},...
		{'5_1','../testv/6_Channel_ID_48khz.wav'},...
		{'7_1_4','../testv/FL_FR_C_LFE_Lss_Rss_Ls_Rs_Lv_Rv_Lvr_Rvr_714_48k.wav'},...
		{'5_1', '../testv/6_Channel_ID_48khz_pn.wav'}, ...
		@@ -67,11 +70,34 @@ inFilesPath = {
		% {'5_1_4', '../testv/stv514MC48c.wav'}, ...
		% {'7_1', '../testv/stv71MC48c.wav'}, ...

		level_hrir = zeros(size(inFilesPath,2),1);
		level_hrir_bin = zeros(size(inFilesPath,2),1);
		gain_hrir = zeros(size(inFilesPath,2),1);
		gain_hrir_bin = zeros(size(inFilesPath,2),1);

		for ind=1:size(inFilesPath,2)
		matFile = [erase(hrir_file,'.sofa'), '_', inFilesPath{ind}{1}, '.mat'];
		SOFA_save_to_mat(hrir_file,inFilesPath{ind}{1},0.707, matFile);
		[binauralSignal, gain_bin(ind), gain(ind)] = binauralizeAudio(inFilesPath{ind}{2}, matFile);
		SOFA_save_to_mat(hrir_file,inFilesPath{ind}{1},10^(-3/20), matFile);
		[level_hrir_bin(ind), level_hrir(ind), gain_hrir_bin(ind), gain_hrir(ind)] = levelsBinauralizeAudio(inFilesPath{ind}{2}, matFile);
		end

		disp("levels hrir computed with binaural correction in bs1770 function : " + num2str(level_hrir_bin))
		disp("levesl hrir computed without binaural correction in bs1770 function : " + num2str(level_hrir))
		disp("gains hrir with binaural correction in bs1770 function : " + num2str(gain_hrir_bin))
		disp("gains hrir without binaural correction in bs1770 function : " + num2str(gain_hrir))

		level_brir = zeros(size(inFilesPath,2),1);
		level_brir_bin = zeros(size(inFilesPath,2),1);
		gain_brir = zeros(size(inFilesPath,2),1);
		gain_brir_bin = zeros(size(inFilesPath,2),1);

		for ind=1:size(inFilesPath,2)
		matFile = [erase(brir_file,'.sofa'), '_', inFilesPath{ind}{1}, '.mat'];
		SOFA_save_to_mat(brir_file,inFilesPath{ind}{1},10^(6.5/20), matFile);
		[level_brir_bin(ind), level_brir(ind), gain_brir_bin(ind), gain_brir(ind)] = levelsBinauralizeAudio(inFilesPath{ind}{2}, matFile);
		end

		disp("gains with binaural correction in bs1770 function : " + num2str(gain_bin))
		disp("gains without binaural correction in bs1770 function : " + num2str(gain))
		No newline at end of file
		disp("levels brir computed with binaural correction in bs1770 function : " + num2str(level_brir_bin))
		disp("levesl brir computed without binaural correction in bs1770 function : " + num2str(level_brir))
		disp("gains brir with binaural correction in bs1770 function : " + num2str(gain_brir_bin))
		disp("gains brir without binaural correction in bs1770 function : " + num2str(gain_brir))

scripts/binauralRenderer_interface/create_5.1_voice.py

+35 −7

Original line number	Diff line number	Diff line
		@@ -2,13 +2,14 @@ import soundfile as sf
		import numpy as np


		def create_5_1_audio(mono_files, output_file):
		def create_5_1_audio(mono_files, output_file, start_times=None):
		"""
		Combine 5 mono audio files into a 5.1 surround sound file.

		Args:
		mono_files: List of 5 paths to mono audio files
		output_file: Path to output 5.1 audio file
		start_times: List of 5 start times in seconds (default: all at 0)

		Channel layout:
		0: Front Left (FL)
		@@ -22,6 +23,11 @@ def create_5_1_audio(mono_files, output_file):
		if len(mono_files) != 5:
		raise ValueError("You need exactly 5 mono audio files")

		if start_times is None:
		start_times = [0] * 5
		elif len(start_times) != 5:
		raise ValueError("You need exactly 5 start times")

		# Read all mono files
		audio_arrays = []
		sample_rate = None
		@@ -41,13 +47,19 @@ def create_5_1_audio(mono_files, output_file):
		data = data[:, 0]

		audio_arrays.append(data)
		max_length = max(max_length, len(data))
		start_time_seconds = start_times[i]
		start_sample = int(start_time_seconds * sample_rate)
		max_length = max(max_length, start_sample + len(data))

		# Pad all arrays to the same length
		padded_arrays = []
		for audio in audio_arrays:
		if len(audio) < max_length:
		padded = np.pad(audio, (0, max_length - len(audio)), mode="constant")
		for i, audio in enumerate(audio_arrays):
		start_time_seconds = start_times[i]
		start_sample = int(start_time_seconds * sample_rate)
		if len(audio) + start_sample < max_length:
		padded = np.pad(
		audio, (0, max_length - (len(audio) + start_sample)), mode="constant"
		)
		else:
		padded = audio
		padded_arrays.append(padded)
		@@ -58,7 +70,11 @@ def create_5_1_audio(mono_files, output_file):
		# Place each mono file on its channel (skip LFE at index 3)
		channel_mapping = [0, 1, 2, 4, 5] # FL, FC, FR, SL, SR
		for i in range(5):
		surround_51[:, channel_mapping[i]] = padded_arrays[i]
		start_time_seconds = start_times[i]
		start_sample = int(start_time_seconds * sample_rate)
		surround_51[
		start_sample : start_sample + len(padded_arrays[i]), channel_mapping[i]
		] = padded_arrays[i]

		# Channel 3 (LFE) remains silent
		sf.write(output_file, surround_51, sample_rate)
		@@ -74,5 +90,17 @@ if __name__ == "__main__":
		"/Users/marcemerit/datasets/VCTK/VCTK-Corpus-0.92/wav48_silence_trimmed/p228/p228_005_mic2.flac",
		"/Users/marcemerit/datasets/VCTK/VCTK-Corpus-0.92/wav48_silence_trimmed/p229/p229_006_mic2.flac",
		]
		mono_files = [
		"./scripts/testv/mono_pn1.wav",
		"./scripts/testv/mono_pn2.wav",
		"./scripts/testv/mono_pn3.wav",
		"./scripts/testv/mono_pn4.wav",
		"./scripts/testv/mono_pn5.wav",
		]

		# Start times for each channel in seconds (FL, FC, FR, SL, SR)
		start_times = [0, 1, 2, 3, 4]

		create_5_1_audio(mono_files, "./scripts/testv/output_5.1.wav")
		create_5_1_audio(
		mono_files, "./scripts/testv/6_Channel_ID_48khz_pn_different.wav", start_times
		)