Loading scripts/binauralRenderer_interface/Table_Format_Converter/generate_tables_from_rom_to_bin.c +220 −14 Original line number Diff line number Diff line Loading @@ -42,6 +42,7 @@ #define FILE_HEADER #define PARABIN_FILE_312 /*------------------------------------------------------------------------------------------* * Constants Loading @@ -55,6 +56,9 @@ #define DEFAULT_INPUT_ROM_FILE "ivas_binaural" #define DEFAULT_INPUT_TD_BIN_FILE "hrfilter_model" #ifdef PARABIN_FILE_312 #define DEFAULT_INPUT_PARABIN_FILE "parabin_binary_rom" #endif #define DEFAULT_BIN_FILE_EXT ".bin" #define IVAS_NB_RENDERER_TYPE 7 Loading Loading @@ -161,6 +165,10 @@ void usage_tables_format_converter( void ) "-48 : Select 48 kHz sampling frequency (no multiple values, all frequencies by default).\n" ); fprintf( stdout, "-input_td_file_path : Path of binary files for time-domain renderer (with separator, used as flag).\n" ); fprintf( stdout, "-input_td_file_name : Name of input td file (without extension %s, default = '%s').\n", DEFAULT_BIN_FILE_EXT, DEFAULT_INPUT_TD_BIN_FILE ); #ifdef PARABIN_FILE_312 fprintf( stdout, "-input_parabin_file_path : Path of binary files for parabin renderer (with separator, used as flag).\n" ); fprintf( stdout, "-input_parabin_file_name : Name of input parabin file (without extension %s, default = '%s').\n", DEFAULT_BIN_FILE_EXT, DEFAULT_INPUT_PARABIN_FILE ); #endif fprintf( stdout, "\n" ); } Loading @@ -176,6 +184,12 @@ char *input_td_bin_path = NULL; char *input_td_bin_file_name = NULL; char *full_in_td_path = NULL; #ifdef PARABIN_FILE_312 char *input_parabin_path = NULL; char *input_parabin_file_name = NULL; char *full_in_parabin_path = NULL; #endif int16_t nb_freq = IVAS_NB_SAMPLERATE; const int32_t *freq_ptr = sample_rates; Loading Loading @@ -1175,7 +1189,18 @@ char *create_hrtf_parametric( int32_t *hrtf_size ) uint32_t data_size_tmp; int16_t i, j; #ifdef PARABIN_FILE_312 uint8_t file_read_ok; float hrtfShCoeffsReFile[BINAURAL_CHANNELS][HRTF_SH_CHANNELS][HRTF_NUM_BINS]; float hrtfShCoeffsImFile[BINAURAL_CHANNELS][HRTF_SH_CHANNELS][HRTF_NUM_BINS]; float parametricReverberationTimesFile[CLDFB_NO_CHANNELS_MAX]; float parametricReverberationEneCorrectionsFile[CLDFB_NO_CHANNELS_MAX]; float parametricEarlyPartEneCorrectionFile[CLDFB_NO_CHANNELS_MAX]; FILE *input_parabin_file = NULL; #endif hrtf_data_size = 0; /* Binary file - block description : Loading @@ -1187,7 +1212,6 @@ char *create_hrtf_parametric( int32_t *hrtf_size ) hrtfShCoeffsIm => float[BINAURAL_CHANNELS][HRTF_SH_CHANNELS][HRTF_NUM_BINS]; BRIR-based reverb CLDFB_NO_CHANNELS_MAX => uint16_t parametricReverberationTimes => float[CLDFB_NO_CHANNELS_MAX]; parametricReverberationEneCorrections => float[CLDFB_NO_CHANNELS_MAX]; Loading @@ -1206,10 +1230,73 @@ char *create_hrtf_parametric( int32_t *hrtf_size ) hrtf_data_size += CLDFB_NO_CHANNELS_MAX * sizeof( float ); // parametricReverberationEneCorrections hrtf_data_size += CLDFB_NO_CHANNELS_MAX * sizeof( float ); // parametricEarlyPartEneCorrection #ifdef PARABIN_FILE_312 file_read_ok = 0; file_read_ok = 0; input_parabin_file = fopen( full_in_parabin_path, "rb" ); if ( input_parabin_file != NULL ) { uint16_t hrtf_sh_channels, hrtf_num_bins, cldfb_no_channels_max; fseek( input_parabin_file, 0, SEEK_END ); data_size_tmp = ftell( input_parabin_file ); fseek( input_parabin_file, 0, SEEK_SET ); if ( data_size_tmp != hrtf_data_size ) { fprintf( stderr, "Inconsistent binary data file size (expected: %d, found %d). Using built-in default values.\n\n", hrtf_data_size, data_size_tmp ); } else { fread( &hrtf_sh_channels, sizeof( uint16_t ), 1, input_parabin_file ); fread( &hrtf_num_bins, sizeof( uint16_t ), 1, input_parabin_file ); if ( hrtf_sh_channels != HRTF_SH_CHANNELS ) { fprintf( stderr, "Warning: Inconsistent HRTF_SH_CHANNELS (expected: %d, found: %d).\n\n", HRTF_SH_CHANNELS, hrtf_sh_channels ); } if ( hrtf_num_bins != HRTF_NUM_BINS ) { fprintf( stderr, "Warning: Inconsistent HRTF_NUM_BINS (expected: %d, found: %d).\n\n", HRTF_NUM_BINS, hrtf_num_bins ); } for ( size_t bin_chnl = 0; bin_chnl < BINAURAL_CHANNELS; bin_chnl++ ) { for ( size_t hrtf_chnl = 0; hrtf_chnl < HRTF_SH_CHANNELS; hrtf_chnl++ ) { fread( hrtfShCoeffsReFile[bin_chnl][hrtf_chnl], sizeof( float ), HRTF_NUM_BINS, input_parabin_file ); } } for ( size_t bin_chnl = 0; bin_chnl < BINAURAL_CHANNELS; bin_chnl++ ) { for ( size_t hrtf_chnl = 0; hrtf_chnl < HRTF_SH_CHANNELS; hrtf_chnl++ ) { fread( hrtfShCoeffsImFile[bin_chnl][hrtf_chnl], sizeof( float ), HRTF_NUM_BINS, input_parabin_file ); } } /* reverb */ fread( &cldfb_no_channels_max, sizeof( uint16_t ), 1, input_parabin_file ); if ( cldfb_no_channels_max != CLDFB_NO_CHANNELS_MAX ) { fprintf( stderr, "Warning: Inconsistent CLDFB_NO_CHANNELS_MAX (expected: %d, found: %d).\n\n", CLDFB_NO_CHANNELS_MAX, cldfb_no_channels_max ); } fread( parametricReverberationTimesFile, sizeof( float ), CLDFB_NO_CHANNELS_MAX, input_parabin_file ); fread( parametricReverberationEneCorrectionsFile, sizeof( float ), CLDFB_NO_CHANNELS_MAX, input_parabin_file ); fread( parametricEarlyPartEneCorrectionFile, sizeof( float ), CLDFB_NO_CHANNELS_MAX, input_parabin_file ); file_read_ok = 1; } } else { fprintf( stderr, "Opening of parabin file %s failed. Using built-in default values.\n\n", full_in_parabin_path ); } #endif // Allocate memory *hrtf_size = sizeof( ivas_hrtfs_header_t ) + hrtf_data_size; hrtf = (char *) malloc( *hrtf_size ); if ( hrtf == NULL ) { fprintf( stderr, "Memory allocation for the block failed!\n\n" ); Loading @@ -1217,6 +1304,7 @@ char *create_hrtf_parametric( int32_t *hrtf_size ) return NULL; } // Write // Header [Declaration of the HRTF] Loading Loading @@ -1252,14 +1340,24 @@ char *create_hrtf_parametric( int32_t *hrtf_size ) *( (uint16_t *) ( hrtf_wptr ) ) = HRTF_NUM_BINS; hrtf_wptr += sizeof( uint16_t ); // HRTF data_size_tmp = HRTF_NUM_BINS * sizeof( float ); for ( i = 0; i < BINAURAL_CHANNELS; i++ ) { for ( j = 0; j < HRTF_SH_CHANNELS; j++ ) { #ifdef PARABIN_FILE_312 if ( file_read_ok ) { memcpy( hrtf_wptr, &hrtfShCoeffsReFile[i][j], data_size_tmp ); } else { memcpy( hrtf_wptr, &hrtfShCoeffsRe[i][j], data_size_tmp ); } #else memcpy( hrtf_wptr, &hrtfShCoeffsRe[i][j], data_size_tmp ); #endif hrtf_wptr += data_size_tmp; } } Loading @@ -1267,7 +1365,18 @@ char *create_hrtf_parametric( int32_t *hrtf_size ) { for ( j = 0; j < HRTF_SH_CHANNELS; j++ ) { #ifdef PARABIN_FILE_312 if ( file_read_ok ) { memcpy( hrtf_wptr, &hrtfShCoeffsImFile[i][j], data_size_tmp ); } else { memcpy( hrtf_wptr, &hrtfShCoeffsIm[i][j], data_size_tmp ); } #else memcpy( hrtf_wptr, &hrtfShCoeffsIm[i][j], data_size_tmp ); #endif hrtf_wptr += data_size_tmp; } } Loading @@ -1277,13 +1386,28 @@ char *create_hrtf_parametric( int32_t *hrtf_size ) hrtf_wptr += sizeof( uint16_t ); data_size_tmp = CLDFB_NO_CHANNELS_MAX * sizeof( float ); #ifdef PARABIN_FILE_312 if ( file_read_ok ) { memcpy( hrtf_wptr, &( parametricReverberationTimesFile ), data_size_tmp ); // parametricReverberationTimes hrtf_wptr += data_size_tmp; memcpy( hrtf_wptr, &( parametricReverberationEneCorrectionsFile ), data_size_tmp ); // parametricReverberationEneCorrections hrtf_wptr += data_size_tmp; memcpy( hrtf_wptr, &( parametricEarlyPartEneCorrectionFile ), data_size_tmp ); // parametricEarlyPartEneCorrection hrtf_wptr += data_size_tmp; } else { #endif memcpy( hrtf_wptr, &( parametricReverberationTimes ), data_size_tmp ); // parametricReverberationTimes hrtf_wptr += data_size_tmp; memcpy( hrtf_wptr, &( parametricReverberationEneCorrections ), data_size_tmp ); // parametricReverberationEneCorrections hrtf_wptr += data_size_tmp; memcpy( hrtf_wptr, &( parametricEarlyPartEneCorrection ), data_size_tmp ); // parametricEarlyPartEneCorrection hrtf_wptr += data_size_tmp; #ifdef PARABIN_FILE_312 } #endif return hrtf; } Loading Loading @@ -2292,6 +2416,34 @@ int rom2bin_init( int argc, char *argv[] ) strcpy( input_td_bin_file_name, argv[i] ); i++; } #ifdef PARABIN_FILE_312 else if ( strcmp( to_upper( argv[i] ), "-INPUT_PARABIN_FILE_PATH" ) == 0 ) { i++; if ( strlen( argv[i] ) == 0 ) { fprintf( stderr, "Wrong input parabin file path: %s\n\n", argv[i] ); usage_tables_format_converter(); return -1; } input_parabin_path = malloc( strlen( argv[i] ) + 1 ); strcpy( input_parabin_path, argv[i] ); i++; } else if ( strcmp( to_upper( argv[i] ), "-INPUT_PARABIN_FILE_NAME" ) == 0 ) { i++; if ( strlen( argv[i] ) == 0 ) { fprintf( stderr, "Wrong input parabin file name: %s\n\n", argv[i] ); usage_tables_format_converter(); return -1; } input_parabin_file_name = malloc( strlen( argv[i] ) + 1 ); strcpy( input_parabin_file_name, argv[i] ); i++; } #endif else { fprintf( stderr, "Unknown option: %s\n\n", argv[i] ); Loading Loading @@ -2387,7 +2539,45 @@ int rom2bin_init( int argc, char *argv[] ) return -1; } } #ifdef PARABIN_FILE_312 if ( input_parabin_path == NULL ) { input_parabin_path = (char *) malloc( sizeof( char ) * ( strlen( DEFAULT_PATH ) + 1 ) ); if ( input_parabin_path ) { strcpy( input_parabin_path, DEFAULT_PATH ); } else { fprintf( stderr, "Memory issue for input parabin file path!\n\n" ); rom2bin_terminat(); return -1; } } if ( input_parabin_file_name == NULL ) { input_parabin_file_name = (char *) malloc( sizeof( char ) * ( strlen( DEFAULT_INPUT_PARABIN_FILE ) + 1 ) ); if ( input_parabin_file_name ) { strcpy( input_parabin_file_name, DEFAULT_INPUT_PARABIN_FILE ); } else { fprintf( stderr, "Memory issue for input parabin file name!\n\n" ); rom2bin_terminat(); return -1; } } full_in_parabin_path = (char *) malloc( sizeof( char ) * ( strlen( input_parabin_path ) + strlen( input_parabin_file_name ) + strlen( DEFAULT_BIN_FILE_EXT ) + 2 ) ); if ( full_in_parabin_path == NULL ) { fprintf( stderr, "Memory issue for full input parabin path!\n\n" ); rom2bin_terminat(); return -1; } sprintf( full_in_parabin_path, "%s/%s%s", input_parabin_path, input_parabin_file_name, DEFAULT_BIN_FILE_EXT ); #endif return 0; } Loading Loading @@ -2427,6 +2617,22 @@ void rom2bin_terminat( void ) { free( full_in_td_path ); } #ifdef PARABIN_FILE_312 if ( input_parabin_path != NULL ) { free( input_parabin_path ); } if ( input_parabin_file_name != NULL ) { free( input_parabin_file_name ); } if ( full_in_parabin_path != NULL ) { free( full_in_parabin_path ); } #endif } /*---------------------------------------------------------------------* Loading scripts/binauralRenderer_interface/param_bin/SH_GainComputation.m 0 → 100644 +40 −0 Original line number Diff line number Diff line function sh_gains = SH_GainComputation(dirs_deg, max_order) % Computation of real-valued spherical harmonic coefficients in ACN/orthonormal % normalization format num_sh = (max_order+1)^2; az_rad = dirs_deg(:,1)*pi/180; coel_rad = pi/2-dirs_deg(:,2)*pi/180; num_el = length(coel_rad); % Compute real-valued spherical harmonic coefficients sh_gains = zeros(num_sh,num_el); indx = 0; % l: SH-level (SH-order) for l = 0:max_order P = legendre(l,cos(coel_rad)).'; % m: SH-mode for m = -l:l indx = indx + 1; % N3D normalization term norm_term = sqrt((2*l+1)*factorial(l-abs(m))/(4*pi*factorial(l+abs(m)))); % trigonometric term if m > 0 trg_term = sqrt(2)*cos(m*az_rad); elseif m == 0 trg_term = 1; else trg_term = -sqrt(2)*sin(m*az_rad); end % associate Legendre function (with compensation of the Condon-Shortley phase term) Pnm = P(:,abs(m)+1)*(-1)^m; % final direct gain sh_gains(indx,:) = norm_term.*Pnm.*trg_term; end end end No newline at end of file scripts/binauralRenderer_interface/param_bin/cldfb_prototype.mat 0 → 100644LFS +3 −0 Original line number Diff line number Diff line version https://git-lfs.github.com/spec/v1 oid sha256:386bbd3a02c3b95280fff7bd8f5240ee60f8858889cad0f6147e930f5f2aa7e2 size 1246 scripts/binauralRenderer_interface/param_bin/equalDistributionUnitVectors.mat 0 → 100644LFS +3 −0 Original line number Diff line number Diff line version https://git-lfs.github.com/spec/v1 oid sha256:f47ff2387079ac315e2ea4bdf9f6f9c67d47173d9740eef86eec3a7d409f24d7 size 3844 scripts/binauralRenderer_interface/param_bin/generate_BRIR_in_SHD_CLDFB_PARAMETRIC.m 0 → 100644 +196 −0 Original line number Diff line number Diff line % % (C) 2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, % Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., % Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, % Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other % contributors to this repository. All Rights Reserved. % % This software is protected by copyright law and by international treaties. % The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, % Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., % Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, % Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other % contributors to this repository retain full ownership rights in their respective contributions in % the software. This notice grants no license of any kind, including but not limited to patent % license, nor is any license granted by implication, estoppel or otherwise. % % Contributors are required to enter into the IVAS codec Public Collaboration agreement before making % contributions. % % This software is provided "AS IS", without any express or implied warranties. The software is in the % development stage. It is intended exclusively for experts who have experience with such software and % solely for the purpose of inspection. All implied warranties of non-infringement, merchantability % and fitness for a particular purpose are hereby disclaimed and excluded. % % Any dispute, controversy or claim arising under or in relation to providing this software shall be % submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in % accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and % the United Nations Convention on Contracts on the International Sales of Goods. % function [T60, lateEnes, earlyEnes] = generate_BRIR_in_SHD_CLDFB_PARAMETRIC(brir_inputfile, SHhrtf) % % [T60, lateEnes, earlyEnes] = generate_BRIR_in_SHD_CLDFB_PARAMETRIC(brir_inputfile, SHhrtf) % % First parameter is the name with path to BRIR file in SOFA format % Second parameter is the HRIR set in SHD created before. % Return value contains the tables derived from BRIR set. % Note: it is assumed that SOFAstart has been called as prerequisite %% Load BRIR input file sofaData = SOFAload(brir_inputfile); %% Get data and format it for us % Input VRIR data from SOFA. After permuation, in order (response, ear, dirIndex) brirs = permute(sofaData.Data.IR(:, :, :), [3 2 1]); % Sampling rate of the BRIR set. Currently only checked. brirFS = sofaData.Data.SamplingRate; if brirFS ~= 48000 error('Only 48 kHz sampling rate BRIR is supported for now.') end % Measurement directions for responses. We discard the distance but there should % be solution that uses only measurements from same distance. BRIRanglesDeg = sofaData.SourcePosition(:, 1:2); % Determine nearest subset corresponding to 5.0 layout. % All BRIR related parameters are determined based on them brirAziRad = BRIRanglesDeg(:, 1)*pi/180; brirEleRad = BRIRanglesDeg(:, 2)*pi/180; refAnglesRad = [30 0 -30 110 -110]' * pi/180; refVectors = [cos(refAnglesRad) sin(refAnglesRad) zeros(size(refAnglesRad))]; sofaVectors = [cos(brirAziRad).*cos(brirEleRad) sin(brirAziRad).*cos(brirEleRad) sin(brirEleRad)]; for ch = 1:length(refAnglesRad) [~, maxIndex] = max(sofaVectors*refVectors(ch,:)'); chSelect(ch) = maxIndex; end brirs = brirs(:, :, chSelect); % Remove low frequencies which would cause T60 estimation errors [B,A] = butter(5, 80/24000, 'high'); brirs = filter(B, A, brirs); % Determine energies for the 5 BRIR pairs in 60 CLDFB frequency bins nFFT = 120; % 60 frequency bins window = hanning(nFFT); freqShiftWindow = window.*exp(-1i*[1:nFFT]'*pi/nFFT); % half bin offset as in CLDFB enes = []; for ch = 1:length(refAnglesRad) % Make low-pass energy enevelope and find its maximum energy position, % for each BRIR pair separately since the measurements may be not in sync brirEne = sum(abs(brirs(:, :, ch)).^2,2); maxEneSearchEnvelope = fftfilt(window, brirEne); [~, maxEnePos] = max(maxEneSearchEnvelope); % Set the first frame to match the maximum energy (i.e., catch the the direct component) timeIndices = maxEnePos - nFFT + [1:nFFT]'; % Determine the energy from that position onwards in 60 sample hops frameIndex = 1; while timeIndices(end) < length(brirs) brirFrame = brirs(timeIndices, :, ch).*repmat(freqShiftWindow, [1 2]); frameF = fft(brirFrame); enes(:, frameIndex, ch) = sum(abs(frameF(1:nFFT/2, :)).^2,2); frameIndex = frameIndex + 1; timeIndices = timeIndices + nFFT/2; end end % Combine energies across all directions enes = sum(enes, 3); % Determine search range for the T60 determination. In other words, find % where noise floor starts at each band for band = 1:nFFT/2 % This position is a maximum value after the early part [~, lateStart] = max(enes(band, 5:15)); % Ene envelope in dB for the band eneDb = 10*log10(enes(band, :)); index = 1; % Estimate a linear fit to the energy decay and determine RMSE:s with % respect to the actual energt decay for p = lateStart+5:length(eneDb) indices = [lateStart:p]'; P = polyfit(indices, eneDb(indices).', 1); est = polyval(P, indices); RMSE(index) = rms(est'-eneDb(indices)); index = index+1; end % Find minimum RMSE, but then find the longest sequence that has % smaller than 1dB larger RMSE rmseIndicesInRange = (RMSE < (min(RMSE) + 1)); maxIndexInRange(band) = 1; for index = 1:length(RMSE) if rmseIndicesInRange(index) maxIndexInRange(band) = index; end end % Truncate a little bit from the end, otherwise the start of the noise % floor slightly appears at the tail maxIndexInRange(band) = round(maxIndexInRange(band)*0.9); end % Smooth the length of the responses (starts of the noise floor) across % frequency. This clears any occasional errors at the estimates maxIndexInRangeSmooth = zeros(nFFT/2, 1); for band = 1:nFFT/2 range = band + [-2:2]; range=range(range > 0); range=range(range <= nFFT/2); maxIndexInRangeSmooth(band) = round(median(maxIndexInRange(range))); end % Determine T60, early energies, and late energies T60 = zeros(nFFT/2,1); for band = 1:nFFT/2 [~, lateStart] = max(enes(band, 5:15)); eneDb = 10*log10(enes(band, :)); indices = [lateStart:maxIndexInRangeSmooth(band)]; % Linear fitting a line, determining T60 based on it P = polyfit(indices, eneDb(indices), 1); dbDropPerFrame = P(1); frames60dBdropped = -60 / dbDropPerFrame; T60(band) = frames60dBdropped / 48000 * nFFT /2; % Formulate early and late part enes earlyEnes(band) = sum(enes(band, 1:25)); lateEnes(band) = sum(enes(band, 26:maxIndexInRangeSmooth(band))); end % Compensate earlyEnes with HRIR energy as it is used together with the % BRIR set in rendering. order = 3; SH = SH_GainComputation([refAnglesRad*180/pi zeros(size(refAnglesRad))], order)/0.2821; SH(2:4, :) = SH(2:4, :) / sqrt(3); SH(5:9, :) = SH(5:9, :) / sqrt(5); SH(10:16, :) = SH(10:16,: ) / sqrt(7); refHrtfEnes = zeros(60, 1); for band = 1:60 for ch = 1:length(refAnglesRad) hrtf = SHhrtf(:, :, band) * SH(:, ch); refHrtfEnes(band) = refHrtfEnes(band) + mean(abs(hrtf).^2); end end refHrtfEnes = refHrtfEnes / length(refAnglesRad); % Limit reference energies such that close to zero or zero HRTF energy would not generate high compensation % factors for earlyEnes. refHrtfEnes = max(0.2*median(refHrtfEnes), refHrtfEnes); earlyEnes = earlyEnes./refHrtfEnes'; earlyEnes(1) = earlyEnes(1)*10^(1/10); % 1dB LF boost at first bin to compensate for the high-pass filter % Smoothing of the high frequency T60s. Highest range near Nyquist are % prone for T60 estimation errors. Linearly expand from lower frequencies. indices = [30:50]'; P = polyfit(indices, T60(indices), 1); interp = min(1, [0:30]'/20); T60(30:60) = T60(30:60).*(1-interp) + polyval(P, [30:60]').*interp; % Late part energies are multiplied by sqrt(2), since parametric % expects such normalization. lateEnes = lateEnes * sqrt(2); Loading
scripts/binauralRenderer_interface/Table_Format_Converter/generate_tables_from_rom_to_bin.c +220 −14 Original line number Diff line number Diff line Loading @@ -42,6 +42,7 @@ #define FILE_HEADER #define PARABIN_FILE_312 /*------------------------------------------------------------------------------------------* * Constants Loading @@ -55,6 +56,9 @@ #define DEFAULT_INPUT_ROM_FILE "ivas_binaural" #define DEFAULT_INPUT_TD_BIN_FILE "hrfilter_model" #ifdef PARABIN_FILE_312 #define DEFAULT_INPUT_PARABIN_FILE "parabin_binary_rom" #endif #define DEFAULT_BIN_FILE_EXT ".bin" #define IVAS_NB_RENDERER_TYPE 7 Loading Loading @@ -161,6 +165,10 @@ void usage_tables_format_converter( void ) "-48 : Select 48 kHz sampling frequency (no multiple values, all frequencies by default).\n" ); fprintf( stdout, "-input_td_file_path : Path of binary files for time-domain renderer (with separator, used as flag).\n" ); fprintf( stdout, "-input_td_file_name : Name of input td file (without extension %s, default = '%s').\n", DEFAULT_BIN_FILE_EXT, DEFAULT_INPUT_TD_BIN_FILE ); #ifdef PARABIN_FILE_312 fprintf( stdout, "-input_parabin_file_path : Path of binary files for parabin renderer (with separator, used as flag).\n" ); fprintf( stdout, "-input_parabin_file_name : Name of input parabin file (without extension %s, default = '%s').\n", DEFAULT_BIN_FILE_EXT, DEFAULT_INPUT_PARABIN_FILE ); #endif fprintf( stdout, "\n" ); } Loading @@ -176,6 +184,12 @@ char *input_td_bin_path = NULL; char *input_td_bin_file_name = NULL; char *full_in_td_path = NULL; #ifdef PARABIN_FILE_312 char *input_parabin_path = NULL; char *input_parabin_file_name = NULL; char *full_in_parabin_path = NULL; #endif int16_t nb_freq = IVAS_NB_SAMPLERATE; const int32_t *freq_ptr = sample_rates; Loading Loading @@ -1175,7 +1189,18 @@ char *create_hrtf_parametric( int32_t *hrtf_size ) uint32_t data_size_tmp; int16_t i, j; #ifdef PARABIN_FILE_312 uint8_t file_read_ok; float hrtfShCoeffsReFile[BINAURAL_CHANNELS][HRTF_SH_CHANNELS][HRTF_NUM_BINS]; float hrtfShCoeffsImFile[BINAURAL_CHANNELS][HRTF_SH_CHANNELS][HRTF_NUM_BINS]; float parametricReverberationTimesFile[CLDFB_NO_CHANNELS_MAX]; float parametricReverberationEneCorrectionsFile[CLDFB_NO_CHANNELS_MAX]; float parametricEarlyPartEneCorrectionFile[CLDFB_NO_CHANNELS_MAX]; FILE *input_parabin_file = NULL; #endif hrtf_data_size = 0; /* Binary file - block description : Loading @@ -1187,7 +1212,6 @@ char *create_hrtf_parametric( int32_t *hrtf_size ) hrtfShCoeffsIm => float[BINAURAL_CHANNELS][HRTF_SH_CHANNELS][HRTF_NUM_BINS]; BRIR-based reverb CLDFB_NO_CHANNELS_MAX => uint16_t parametricReverberationTimes => float[CLDFB_NO_CHANNELS_MAX]; parametricReverberationEneCorrections => float[CLDFB_NO_CHANNELS_MAX]; Loading @@ -1206,10 +1230,73 @@ char *create_hrtf_parametric( int32_t *hrtf_size ) hrtf_data_size += CLDFB_NO_CHANNELS_MAX * sizeof( float ); // parametricReverberationEneCorrections hrtf_data_size += CLDFB_NO_CHANNELS_MAX * sizeof( float ); // parametricEarlyPartEneCorrection #ifdef PARABIN_FILE_312 file_read_ok = 0; file_read_ok = 0; input_parabin_file = fopen( full_in_parabin_path, "rb" ); if ( input_parabin_file != NULL ) { uint16_t hrtf_sh_channels, hrtf_num_bins, cldfb_no_channels_max; fseek( input_parabin_file, 0, SEEK_END ); data_size_tmp = ftell( input_parabin_file ); fseek( input_parabin_file, 0, SEEK_SET ); if ( data_size_tmp != hrtf_data_size ) { fprintf( stderr, "Inconsistent binary data file size (expected: %d, found %d). Using built-in default values.\n\n", hrtf_data_size, data_size_tmp ); } else { fread( &hrtf_sh_channels, sizeof( uint16_t ), 1, input_parabin_file ); fread( &hrtf_num_bins, sizeof( uint16_t ), 1, input_parabin_file ); if ( hrtf_sh_channels != HRTF_SH_CHANNELS ) { fprintf( stderr, "Warning: Inconsistent HRTF_SH_CHANNELS (expected: %d, found: %d).\n\n", HRTF_SH_CHANNELS, hrtf_sh_channels ); } if ( hrtf_num_bins != HRTF_NUM_BINS ) { fprintf( stderr, "Warning: Inconsistent HRTF_NUM_BINS (expected: %d, found: %d).\n\n", HRTF_NUM_BINS, hrtf_num_bins ); } for ( size_t bin_chnl = 0; bin_chnl < BINAURAL_CHANNELS; bin_chnl++ ) { for ( size_t hrtf_chnl = 0; hrtf_chnl < HRTF_SH_CHANNELS; hrtf_chnl++ ) { fread( hrtfShCoeffsReFile[bin_chnl][hrtf_chnl], sizeof( float ), HRTF_NUM_BINS, input_parabin_file ); } } for ( size_t bin_chnl = 0; bin_chnl < BINAURAL_CHANNELS; bin_chnl++ ) { for ( size_t hrtf_chnl = 0; hrtf_chnl < HRTF_SH_CHANNELS; hrtf_chnl++ ) { fread( hrtfShCoeffsImFile[bin_chnl][hrtf_chnl], sizeof( float ), HRTF_NUM_BINS, input_parabin_file ); } } /* reverb */ fread( &cldfb_no_channels_max, sizeof( uint16_t ), 1, input_parabin_file ); if ( cldfb_no_channels_max != CLDFB_NO_CHANNELS_MAX ) { fprintf( stderr, "Warning: Inconsistent CLDFB_NO_CHANNELS_MAX (expected: %d, found: %d).\n\n", CLDFB_NO_CHANNELS_MAX, cldfb_no_channels_max ); } fread( parametricReverberationTimesFile, sizeof( float ), CLDFB_NO_CHANNELS_MAX, input_parabin_file ); fread( parametricReverberationEneCorrectionsFile, sizeof( float ), CLDFB_NO_CHANNELS_MAX, input_parabin_file ); fread( parametricEarlyPartEneCorrectionFile, sizeof( float ), CLDFB_NO_CHANNELS_MAX, input_parabin_file ); file_read_ok = 1; } } else { fprintf( stderr, "Opening of parabin file %s failed. Using built-in default values.\n\n", full_in_parabin_path ); } #endif // Allocate memory *hrtf_size = sizeof( ivas_hrtfs_header_t ) + hrtf_data_size; hrtf = (char *) malloc( *hrtf_size ); if ( hrtf == NULL ) { fprintf( stderr, "Memory allocation for the block failed!\n\n" ); Loading @@ -1217,6 +1304,7 @@ char *create_hrtf_parametric( int32_t *hrtf_size ) return NULL; } // Write // Header [Declaration of the HRTF] Loading Loading @@ -1252,14 +1340,24 @@ char *create_hrtf_parametric( int32_t *hrtf_size ) *( (uint16_t *) ( hrtf_wptr ) ) = HRTF_NUM_BINS; hrtf_wptr += sizeof( uint16_t ); // HRTF data_size_tmp = HRTF_NUM_BINS * sizeof( float ); for ( i = 0; i < BINAURAL_CHANNELS; i++ ) { for ( j = 0; j < HRTF_SH_CHANNELS; j++ ) { #ifdef PARABIN_FILE_312 if ( file_read_ok ) { memcpy( hrtf_wptr, &hrtfShCoeffsReFile[i][j], data_size_tmp ); } else { memcpy( hrtf_wptr, &hrtfShCoeffsRe[i][j], data_size_tmp ); } #else memcpy( hrtf_wptr, &hrtfShCoeffsRe[i][j], data_size_tmp ); #endif hrtf_wptr += data_size_tmp; } } Loading @@ -1267,7 +1365,18 @@ char *create_hrtf_parametric( int32_t *hrtf_size ) { for ( j = 0; j < HRTF_SH_CHANNELS; j++ ) { #ifdef PARABIN_FILE_312 if ( file_read_ok ) { memcpy( hrtf_wptr, &hrtfShCoeffsImFile[i][j], data_size_tmp ); } else { memcpy( hrtf_wptr, &hrtfShCoeffsIm[i][j], data_size_tmp ); } #else memcpy( hrtf_wptr, &hrtfShCoeffsIm[i][j], data_size_tmp ); #endif hrtf_wptr += data_size_tmp; } } Loading @@ -1277,13 +1386,28 @@ char *create_hrtf_parametric( int32_t *hrtf_size ) hrtf_wptr += sizeof( uint16_t ); data_size_tmp = CLDFB_NO_CHANNELS_MAX * sizeof( float ); #ifdef PARABIN_FILE_312 if ( file_read_ok ) { memcpy( hrtf_wptr, &( parametricReverberationTimesFile ), data_size_tmp ); // parametricReverberationTimes hrtf_wptr += data_size_tmp; memcpy( hrtf_wptr, &( parametricReverberationEneCorrectionsFile ), data_size_tmp ); // parametricReverberationEneCorrections hrtf_wptr += data_size_tmp; memcpy( hrtf_wptr, &( parametricEarlyPartEneCorrectionFile ), data_size_tmp ); // parametricEarlyPartEneCorrection hrtf_wptr += data_size_tmp; } else { #endif memcpy( hrtf_wptr, &( parametricReverberationTimes ), data_size_tmp ); // parametricReverberationTimes hrtf_wptr += data_size_tmp; memcpy( hrtf_wptr, &( parametricReverberationEneCorrections ), data_size_tmp ); // parametricReverberationEneCorrections hrtf_wptr += data_size_tmp; memcpy( hrtf_wptr, &( parametricEarlyPartEneCorrection ), data_size_tmp ); // parametricEarlyPartEneCorrection hrtf_wptr += data_size_tmp; #ifdef PARABIN_FILE_312 } #endif return hrtf; } Loading Loading @@ -2292,6 +2416,34 @@ int rom2bin_init( int argc, char *argv[] ) strcpy( input_td_bin_file_name, argv[i] ); i++; } #ifdef PARABIN_FILE_312 else if ( strcmp( to_upper( argv[i] ), "-INPUT_PARABIN_FILE_PATH" ) == 0 ) { i++; if ( strlen( argv[i] ) == 0 ) { fprintf( stderr, "Wrong input parabin file path: %s\n\n", argv[i] ); usage_tables_format_converter(); return -1; } input_parabin_path = malloc( strlen( argv[i] ) + 1 ); strcpy( input_parabin_path, argv[i] ); i++; } else if ( strcmp( to_upper( argv[i] ), "-INPUT_PARABIN_FILE_NAME" ) == 0 ) { i++; if ( strlen( argv[i] ) == 0 ) { fprintf( stderr, "Wrong input parabin file name: %s\n\n", argv[i] ); usage_tables_format_converter(); return -1; } input_parabin_file_name = malloc( strlen( argv[i] ) + 1 ); strcpy( input_parabin_file_name, argv[i] ); i++; } #endif else { fprintf( stderr, "Unknown option: %s\n\n", argv[i] ); Loading Loading @@ -2387,7 +2539,45 @@ int rom2bin_init( int argc, char *argv[] ) return -1; } } #ifdef PARABIN_FILE_312 if ( input_parabin_path == NULL ) { input_parabin_path = (char *) malloc( sizeof( char ) * ( strlen( DEFAULT_PATH ) + 1 ) ); if ( input_parabin_path ) { strcpy( input_parabin_path, DEFAULT_PATH ); } else { fprintf( stderr, "Memory issue for input parabin file path!\n\n" ); rom2bin_terminat(); return -1; } } if ( input_parabin_file_name == NULL ) { input_parabin_file_name = (char *) malloc( sizeof( char ) * ( strlen( DEFAULT_INPUT_PARABIN_FILE ) + 1 ) ); if ( input_parabin_file_name ) { strcpy( input_parabin_file_name, DEFAULT_INPUT_PARABIN_FILE ); } else { fprintf( stderr, "Memory issue for input parabin file name!\n\n" ); rom2bin_terminat(); return -1; } } full_in_parabin_path = (char *) malloc( sizeof( char ) * ( strlen( input_parabin_path ) + strlen( input_parabin_file_name ) + strlen( DEFAULT_BIN_FILE_EXT ) + 2 ) ); if ( full_in_parabin_path == NULL ) { fprintf( stderr, "Memory issue for full input parabin path!\n\n" ); rom2bin_terminat(); return -1; } sprintf( full_in_parabin_path, "%s/%s%s", input_parabin_path, input_parabin_file_name, DEFAULT_BIN_FILE_EXT ); #endif return 0; } Loading Loading @@ -2427,6 +2617,22 @@ void rom2bin_terminat( void ) { free( full_in_td_path ); } #ifdef PARABIN_FILE_312 if ( input_parabin_path != NULL ) { free( input_parabin_path ); } if ( input_parabin_file_name != NULL ) { free( input_parabin_file_name ); } if ( full_in_parabin_path != NULL ) { free( full_in_parabin_path ); } #endif } /*---------------------------------------------------------------------* Loading
scripts/binauralRenderer_interface/param_bin/SH_GainComputation.m 0 → 100644 +40 −0 Original line number Diff line number Diff line function sh_gains = SH_GainComputation(dirs_deg, max_order) % Computation of real-valued spherical harmonic coefficients in ACN/orthonormal % normalization format num_sh = (max_order+1)^2; az_rad = dirs_deg(:,1)*pi/180; coel_rad = pi/2-dirs_deg(:,2)*pi/180; num_el = length(coel_rad); % Compute real-valued spherical harmonic coefficients sh_gains = zeros(num_sh,num_el); indx = 0; % l: SH-level (SH-order) for l = 0:max_order P = legendre(l,cos(coel_rad)).'; % m: SH-mode for m = -l:l indx = indx + 1; % N3D normalization term norm_term = sqrt((2*l+1)*factorial(l-abs(m))/(4*pi*factorial(l+abs(m)))); % trigonometric term if m > 0 trg_term = sqrt(2)*cos(m*az_rad); elseif m == 0 trg_term = 1; else trg_term = -sqrt(2)*sin(m*az_rad); end % associate Legendre function (with compensation of the Condon-Shortley phase term) Pnm = P(:,abs(m)+1)*(-1)^m; % final direct gain sh_gains(indx,:) = norm_term.*Pnm.*trg_term; end end end No newline at end of file
scripts/binauralRenderer_interface/param_bin/cldfb_prototype.mat 0 → 100644LFS +3 −0 Original line number Diff line number Diff line version https://git-lfs.github.com/spec/v1 oid sha256:386bbd3a02c3b95280fff7bd8f5240ee60f8858889cad0f6147e930f5f2aa7e2 size 1246
scripts/binauralRenderer_interface/param_bin/equalDistributionUnitVectors.mat 0 → 100644LFS +3 −0 Original line number Diff line number Diff line version https://git-lfs.github.com/spec/v1 oid sha256:f47ff2387079ac315e2ea4bdf9f6f9c67d47173d9740eef86eec3a7d409f24d7 size 3844
scripts/binauralRenderer_interface/param_bin/generate_BRIR_in_SHD_CLDFB_PARAMETRIC.m 0 → 100644 +196 −0 Original line number Diff line number Diff line % % (C) 2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, % Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., % Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, % Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other % contributors to this repository. All Rights Reserved. % % This software is protected by copyright law and by international treaties. % The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, % Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., % Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, % Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other % contributors to this repository retain full ownership rights in their respective contributions in % the software. This notice grants no license of any kind, including but not limited to patent % license, nor is any license granted by implication, estoppel or otherwise. % % Contributors are required to enter into the IVAS codec Public Collaboration agreement before making % contributions. % % This software is provided "AS IS", without any express or implied warranties. The software is in the % development stage. It is intended exclusively for experts who have experience with such software and % solely for the purpose of inspection. All implied warranties of non-infringement, merchantability % and fitness for a particular purpose are hereby disclaimed and excluded. % % Any dispute, controversy or claim arising under or in relation to providing this software shall be % submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in % accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and % the United Nations Convention on Contracts on the International Sales of Goods. % function [T60, lateEnes, earlyEnes] = generate_BRIR_in_SHD_CLDFB_PARAMETRIC(brir_inputfile, SHhrtf) % % [T60, lateEnes, earlyEnes] = generate_BRIR_in_SHD_CLDFB_PARAMETRIC(brir_inputfile, SHhrtf) % % First parameter is the name with path to BRIR file in SOFA format % Second parameter is the HRIR set in SHD created before. % Return value contains the tables derived from BRIR set. % Note: it is assumed that SOFAstart has been called as prerequisite %% Load BRIR input file sofaData = SOFAload(brir_inputfile); %% Get data and format it for us % Input VRIR data from SOFA. After permuation, in order (response, ear, dirIndex) brirs = permute(sofaData.Data.IR(:, :, :), [3 2 1]); % Sampling rate of the BRIR set. Currently only checked. brirFS = sofaData.Data.SamplingRate; if brirFS ~= 48000 error('Only 48 kHz sampling rate BRIR is supported for now.') end % Measurement directions for responses. We discard the distance but there should % be solution that uses only measurements from same distance. BRIRanglesDeg = sofaData.SourcePosition(:, 1:2); % Determine nearest subset corresponding to 5.0 layout. % All BRIR related parameters are determined based on them brirAziRad = BRIRanglesDeg(:, 1)*pi/180; brirEleRad = BRIRanglesDeg(:, 2)*pi/180; refAnglesRad = [30 0 -30 110 -110]' * pi/180; refVectors = [cos(refAnglesRad) sin(refAnglesRad) zeros(size(refAnglesRad))]; sofaVectors = [cos(brirAziRad).*cos(brirEleRad) sin(brirAziRad).*cos(brirEleRad) sin(brirEleRad)]; for ch = 1:length(refAnglesRad) [~, maxIndex] = max(sofaVectors*refVectors(ch,:)'); chSelect(ch) = maxIndex; end brirs = brirs(:, :, chSelect); % Remove low frequencies which would cause T60 estimation errors [B,A] = butter(5, 80/24000, 'high'); brirs = filter(B, A, brirs); % Determine energies for the 5 BRIR pairs in 60 CLDFB frequency bins nFFT = 120; % 60 frequency bins window = hanning(nFFT); freqShiftWindow = window.*exp(-1i*[1:nFFT]'*pi/nFFT); % half bin offset as in CLDFB enes = []; for ch = 1:length(refAnglesRad) % Make low-pass energy enevelope and find its maximum energy position, % for each BRIR pair separately since the measurements may be not in sync brirEne = sum(abs(brirs(:, :, ch)).^2,2); maxEneSearchEnvelope = fftfilt(window, brirEne); [~, maxEnePos] = max(maxEneSearchEnvelope); % Set the first frame to match the maximum energy (i.e., catch the the direct component) timeIndices = maxEnePos - nFFT + [1:nFFT]'; % Determine the energy from that position onwards in 60 sample hops frameIndex = 1; while timeIndices(end) < length(brirs) brirFrame = brirs(timeIndices, :, ch).*repmat(freqShiftWindow, [1 2]); frameF = fft(brirFrame); enes(:, frameIndex, ch) = sum(abs(frameF(1:nFFT/2, :)).^2,2); frameIndex = frameIndex + 1; timeIndices = timeIndices + nFFT/2; end end % Combine energies across all directions enes = sum(enes, 3); % Determine search range for the T60 determination. In other words, find % where noise floor starts at each band for band = 1:nFFT/2 % This position is a maximum value after the early part [~, lateStart] = max(enes(band, 5:15)); % Ene envelope in dB for the band eneDb = 10*log10(enes(band, :)); index = 1; % Estimate a linear fit to the energy decay and determine RMSE:s with % respect to the actual energt decay for p = lateStart+5:length(eneDb) indices = [lateStart:p]'; P = polyfit(indices, eneDb(indices).', 1); est = polyval(P, indices); RMSE(index) = rms(est'-eneDb(indices)); index = index+1; end % Find minimum RMSE, but then find the longest sequence that has % smaller than 1dB larger RMSE rmseIndicesInRange = (RMSE < (min(RMSE) + 1)); maxIndexInRange(band) = 1; for index = 1:length(RMSE) if rmseIndicesInRange(index) maxIndexInRange(band) = index; end end % Truncate a little bit from the end, otherwise the start of the noise % floor slightly appears at the tail maxIndexInRange(band) = round(maxIndexInRange(band)*0.9); end % Smooth the length of the responses (starts of the noise floor) across % frequency. This clears any occasional errors at the estimates maxIndexInRangeSmooth = zeros(nFFT/2, 1); for band = 1:nFFT/2 range = band + [-2:2]; range=range(range > 0); range=range(range <= nFFT/2); maxIndexInRangeSmooth(band) = round(median(maxIndexInRange(range))); end % Determine T60, early energies, and late energies T60 = zeros(nFFT/2,1); for band = 1:nFFT/2 [~, lateStart] = max(enes(band, 5:15)); eneDb = 10*log10(enes(band, :)); indices = [lateStart:maxIndexInRangeSmooth(band)]; % Linear fitting a line, determining T60 based on it P = polyfit(indices, eneDb(indices), 1); dbDropPerFrame = P(1); frames60dBdropped = -60 / dbDropPerFrame; T60(band) = frames60dBdropped / 48000 * nFFT /2; % Formulate early and late part enes earlyEnes(band) = sum(enes(band, 1:25)); lateEnes(band) = sum(enes(band, 26:maxIndexInRangeSmooth(band))); end % Compensate earlyEnes with HRIR energy as it is used together with the % BRIR set in rendering. order = 3; SH = SH_GainComputation([refAnglesRad*180/pi zeros(size(refAnglesRad))], order)/0.2821; SH(2:4, :) = SH(2:4, :) / sqrt(3); SH(5:9, :) = SH(5:9, :) / sqrt(5); SH(10:16, :) = SH(10:16,: ) / sqrt(7); refHrtfEnes = zeros(60, 1); for band = 1:60 for ch = 1:length(refAnglesRad) hrtf = SHhrtf(:, :, band) * SH(:, ch); refHrtfEnes(band) = refHrtfEnes(band) + mean(abs(hrtf).^2); end end refHrtfEnes = refHrtfEnes / length(refAnglesRad); % Limit reference energies such that close to zero or zero HRTF energy would not generate high compensation % factors for earlyEnes. refHrtfEnes = max(0.2*median(refHrtfEnes), refHrtfEnes); earlyEnes = earlyEnes./refHrtfEnes'; earlyEnes(1) = earlyEnes(1)*10^(1/10); % 1dB LF boost at first bin to compensate for the high-pass filter % Smoothing of the high frequency T60s. Highest range near Nyquist are % prone for T60 estimation errors. Linearly expand from lower frequencies. indices = [30:50]'; P = polyfit(indices, T60(indices), 1); interp = min(1, [0:30]'/20); T60(30:60) = T60(30:60).*(1-interp) + polyval(P, [30:60]').*interp; % Late part energies are multiplied by sqrt(2), since parametric % expects such normalization. lateEnes = lateEnes * sqrt(2);