Commit 1278a0b6 authored by Marek Szczerba's avatar Marek Szczerba
Browse files

Added BinauralSDM for IVAS

parent 5f64c1c9
Loading
Loading
Loading
Loading
Loading
+416 −0
Original line number Diff line number Diff line
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% (C) 2022-2025 IVAS codec Public Collaboration with portions copyright 
% Dolby International AB, Ericsson AB, Fraunhofer-Gesellschaft zur Foerderung 
% der angewandten Forschung e.V., Huawei Technologies Co. LTD., 
% Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, 
% Nokia Technologies Oy, Orange, Panasonic Holdings Corporation, 
% Qualcomm Technologies, Inc., VoiceAge Corporation, and other contributors 
% to this repository. All Rights Reserved.
%
% This software is protected by copyright law and by international treaties.
% The IVAS codec Public Collaboration consisting of Dolby International AB, 
% Ericsson AB, Fraunhofer-Gesellschaft zur Foerderung der angewandten 
% Forschung e.V., Huawei Technologies Co. LTD., Koninklijke Philips N.V., 
% Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, 
% Panasonic Holdings Corporation, Qualcomm Technologies, Inc., 
% VoiceAge Corporation, and other contributors to this repository retain full 
% ownership rights in their respective contributions in the software. 
% This notice grants no license of any kind, including but not limited to 
% patent license, nor is any license granted by implication, estoppel or 
% otherwise.
%
% Contributors are required to enter into the IVAS codec Public Collaboration 
% agreement before making contributions.
%
% This software is provided "AS IS", without any express or implied warranties. 
% The software is in the development stage. It is intended exclusively for 
% experts who have experience with such software and solely for the purpose 
% of inspection. All implied warranties of non-infringement, merchantability 
% and fitness for a particular purpose are hereby disclaimed and excluded.
%
% Any dispute, controversy or claim arising under or in relation to providing 
% this software shall be submitted to and settled by the final, binding 
% jurisdiction of the courts of Munich, Germany in accordance with the laws 
% of the Federal Republic of Germany excluding its conflict of law rules and 
% the United Nations Convention on Contracts on the International Sales of Goods.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%% Description

% This script is based on an example as provided in the BinauralSDM
% repository (https://github.com/facebookresearch/BinauralSDM).
% Copyright (c) Facebook, Inc. and its affiliates.
% The BRIRs generated by this script include the steps described in
% Amengual et al. 2020 - DOA Postprocessing (smoothing and quantization)
% and RTMod+AP equalization.

% The overall process is as follows:
% - Spatial data from a multichannel RIR is obtained using the SDM
% utilizing the functions from the SDM Toolbox (Tervo & Patynen).
% - The spatial information is smoothed and quantized as proposed in
% Amengual et al. 2020.
% - Preliminary BRIRs are synthesized by selecting the closest directions
% from an HRIR dataset. In this case, Orange HRIR data set #53 is used.
% 
% - The reverberation time of the preliminary BRIRs is corrected using the
% RTMod method introduced in Amengual et al. 2020.
% - After RTMod correction, a cascade of 3 all-pass filters is applied to
% both left and right channels to improve the diffuseness of the late
% reverberation tail. This process is also introduced in Amengual et al.
% 2020.
% - Responses for arbitrarily defined head orientations are generated
% using the previous steps by rotating the DOA vectors.
% - The data is saved in a user defined folder.
%
%
% References: 
% - (Tervo et al. 2013) - "Spatial Decomposition Method for Room Impulse
% Responses", JAES, 2013.
% - (SDM Toolbox) - "SDM Toolbox", S. Tervo and J. Patynen, Mathworks
% Exchange
% - (Amengual et al. 2020) - "Optimizations of the Spatial Decomposition 
% Method for Binaural Reproduction", JAES 2020.
% Author: Sebastia Amengual (samengual@fb.com)
%
% Adapted: Marek Szczerba (marek.szczerba@philips.com) (MS)
% Last modified: 08/25/2025
%
% HOW TO
% - Clone the BinauralSDM repository
%   (https://github.com/facebookresearch/BinauralSDM).
% - Place this file in the ./BinauralSDM/Src/Examples folder.
% - Update the settings on top of the BinauralSDM_for_IVAS function to
%   reflect the actual folder structure.
% - Run this script.
%
% NOTES (MS)
% Eigenmike geometry is supported out-of-the-box. Check create_MicGeometry
% in BinauralSDM/Src folder


function BinauralSDM_for_IVAS

    % BinauralSDM_for_IVAS,  Binaural SDM processing for IVAS
    %
    %  refactored into functional programming with local variables

    % Parameters and settings
    % RIR
    RIR_input_path  = '..\..\..\RIRs\';
    RIR_room_name   = 'Auditorium';
    RIR_source_pos  = 'S1';

    % HRIR
    HRIR_input_path = '..\..\..\ivas-codec\scripts\binauralRenderer_interface\HRIRs_sofa\';
    HRIR_Filename   = 'HRIR_128_Meth5_IRC_53_Q10_symL_Itrp1_48000.sofa';
    HRIR_Subject    = 'HRIR-53';  % Name of the HRIR subject (only used for naming purposes while saving).
    HRIR_Type       = 'SOFA';     % File format of the HRIR. Only SOFA is supported for now.
    
    % BRIR
    BRIR_output_path='.\';
    BRIR_angles_mode='9.1.6'; % 7.1.4 or 9.1.6 or All

    % settings is a struct that summarizes all releveant settings for processing files
    settings.RIR_database_input_path    = fullfile(RIR_input_path);
    settings.RIR_room                   = RIR_room_name;
    settings.RIR_SourcePos              = RIR_source_pos;

    settings.HRIR_input_path    = fullfile(HRIR_input_path);
    settings.HRIR_filename      = HRIR_Filename;
    settings.HRIR_subject       = HRIR_Subject;
    settings.HRIR_type          = HRIR_Type;

    settings.BRIR_output_path   = fullfile(BRIR_output_path);
    settings.BRIR_AnglesMode    = BRIR_angles_mode;

    % setup processing
    time_start  = tic();
    SRIR_data   = init_SRIR_data(settings);
    SDM_Struct  = init_SDM_Struct(SRIR_data);
    BRIR_data   = init_BRIR_data(settings, SRIR_data);
    Plot_data   = init_Plot_data(SRIR_data, BRIR_data);

    % do processing
    SRIR_data = analyze(SRIR_data, SDM_Struct, Plot_data);
    synthesize(SRIR_data, BRIR_data, Plot_data);

    % done
    time_exec = toc(time_start);
    fprintf('\n... completed in %.0fh %.0fm %.0fs.\n', time_exec/60/60, time_exec/60, mod(time_exec, 60));
end

function SRIR_data = init_SRIR_data(settings)
    % Analysis parameters
    MicArray        = 'Eigenmike';              % FRL Array is 7 mics, 10cm diameter, with central sensor. Supported geometries are FRL_10cm, FRL_5cm, 
                                                % Tetramic and Eigenmike. Modify the file create_MicGeometry to add other geometries (or contact us, we'd be happy to help).
    Room            = settings.RIR_room;        % Name of the room. RIR file name must follow the convention RoomName_SX_RX.wav
    SourcePos       = settings.RIR_SourcePos;   % Source Position. RIR file name must follow the convention RoomName_SX_RX.wav
    ReceiverPos     = 'R1';                     % Receiver Position. RIR file name must follow the convention RoomName_SX_RX.wav
    Database_Path   = settings.RIR_database_input_path;   % Relative path to folder containing the multichannel RIR
    fs              = 48e3;                 % Sampling Rate (in Hz). Only 48 kHz is recommended. Other sampling rates have not been tested.
    MixingTime      = 0.08;                 % Mixing time (in seconds) of the room for rendering. Data after the mixing time will be rendered 
                                            % as a single direction independent reverb tail and AP rendering will be applied.
    DOASmooth       = 16;                   % Window length (in samples) for smoothing of DOA information. 16 samples is a good compromise for noise 
                                            % reduction and time resolution.
    DOAOnsetLength  = 128;                  % Length (in samples) for assignment of a constant (averaged) DOA for the onset / direct sound.
    DenoiseFlag     = true;                 % Flag to perform noise floor compensation on the multichannel RIR. If set, it ensures that the RIR decays 
                                            % progressively and removes rendering artifacts due to high noise floor in the RIR.
    PlotDenoisedRIR = false;
    FilterRawFlag   = true;                 % Flag to perform band pass filtering on the multichannel RIR prior to DOA estimation. If set, only
                                            % information between 200 Hz and 8 kHz (by default) will be used for DOA estimation. This helps increasing 
                                            % robustness of the estimation. See create_BRIR_data.m for customization of the filtering.
    AlignDOAFlag    = true;                 % If this flag is set, the DOA data will be rotated so the direct sound is aligned to 0,0 (az, el).
    
    % Initialize SRIR data struct
    SRIR_data = create_SRIR_data('MicArray',MicArray,...
                                 'Room',Room,...
                                 'SourcePos',SourcePos,...
                                 'ReceiverPos',ReceiverPos,...
                                 'Database_Path',Database_Path,...
                                 'fs',fs,...
                                 'MixingTime',MixingTime,...
                                 'DOASmooth',DOASmooth,...
                                 'DOAOnsetLength',DOAOnsetLength,...
                                 'Denoise',DenoiseFlag,...
                                 'PlotDenoisedRIR',PlotDenoisedRIR,...
                                 'FilterRaw',FilterRawFlag,...
                                 'AlignDOA',AlignDOAFlag);
end


function SDM_Struct = init_SDM_Struct(SRIR_data)
    SpeedSound      = 345;                  % Speed of sound in m/s (for SDM Toolbox DOA analysis)
    WinLen          = 24; %62;              % Window Length (in samples) for SDM DOA analysis. For fs = 48kHz, sizes between 36 and 64 seem appropriate. 
                                            % The optimal size might be room dependent. See Tervo et al. 2013 and Amengual et al. 2020 for a discussion.
    
    % Initialize SDM analysis struct (from SDM Toolbox)
    SDM_Struct = createSDMStruct('c',SpeedSound,...
                                 'fs',SRIR_data.fs,...
                                 'micLocs',SRIR_data.ArrayGeometry,...
                                 'winLen',WinLen);
end


function BRIR_data = init_BRIR_data(settings, SRIR_data)
    %% Rendering parameters
    DOAAzOffset      = 0.0;             % Azimuth rotation in degrees aplied after DOA estmation and before DOA quantization / BRIR rendering.
    DOAElOffset      = 0.0;             % Elevation rotation in degrees aplied after DOA estmation and before DOA quantization / BRIR rendering. 
    QuantizeDOAFlag  = true;            % Flag to determine if DOA information must me quantized.
    DOADirections    = 50;              % Number of directions to which the spatial information will be quantized using a Lebedev grid.
    BandsPerOctave   = 1;               % Bands per octave used for RT60 equalization
    EqTxx            = 20;              % Txx used for RT60 equalization. For very small rooms or low SNR measurements, T20 is recommended. Otherwise, T30 is recommended.
    RTModRegFreq     = false;           % Regularization frequncy in Hz above which the RTmod modification of the late reverberation should be regularized.
    NamingCond       = sprintf('Quantized%dDOA', DOADirections); % String used for naming purposes, useful when rendering variations of the sasme RIR.
    BRIR_Length      = 0.0;             % Length of BRIR in seconds, will be chosen by analysis of the room reverberation time if unspecified.
    BRIR_Atten       = 24;              % Attenuation of the rendered BRIRs (in dB). Useful to avoid clipping. Use the same value when rendering various
                                        % positions in the same room to maintain relative level differences.
    AnglesMode       = settings.BRIR_AnglesMode;         % Angle processing mode: '7.1.4', '9.1.6' or 'All'
                                        
    % AzOrient         = (-180:2:180)';   % Render BRIRs every 2 degrees in azimuth.
    % ElOrient         = (-90:5:90)';     % Render BRIRs every 5 degrees in elevation.
    switch AnglesMode
        case 'All'
            AzOrient = (-180:2:180)';   % Render BRIRs every 2 degrees in azimuth.
            ElOrient = (-90:6:90)';     % Render BRIRs every 6 degrees in elevation.
        case '7.1.4' % 7.1+4 loudspeaker setup
            AzOrient = [  30.0,  -30.0,    0.0,  135.0, -135.0,   90.0,  -90.0,   30.0,  -30.0,  135.0, -135.0];
            ElOrient = [   0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,   35.0,   35.0,   35.0,   35.0];
        case '9.1.6' % 9.1+6 loudspeaker setup
            AzOrient = [  30.0,  -30.0,    0.0,  135.0, -135.0,  110.0, -110.0,  90.0,   -90.0,   30.0,  -30.0,  110.0, -110.0,  135.0, -135.0];
            ElOrient = [   0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,   0.0,     0.0,   35.0,   35.0,   35.0,   35.0,   35.0,   35.0];
    end
    
    ExportSofaFlag  = true;            % Flag to determine if BRIRs should be exported in a SOFA-file.
    ExportWavFlag   = false;           % Flag to determine if BRIRs should be exported in indivudal WAV-files.
    ExportDSERcFlag = true;            % Flag to determine if BRIRs should be exported with direct sound and early reflections combined (WAV-files only).
    ExportDSERsFlag = true;            % Flag to determine if BRIRs should be exported with direct sound and early reflections separated (WAV-files only).
    DestinationPath = settings.BRIR_output_path;            % Folder where the resulting BRIRs will be saved.
    
    [~,~] = mkdir(settings.HRIR_folder); % ignore warning if directory already exists
    HRIR_Path = fullfile(settings.HRIR_folder, settings.HRIR_filename);
    
    % Append configuration to destination path
    DestinationPath = fullfile(DestinationPath, strrep(settings.HRIR_subject, ' ', '_'), ...
        sprintf('%s_%s_%s', SRIR_data.Room, SRIR_data.SourcePos, SRIR_data.ReceiverPos), ...
        NamingCond);

    % Initialize re-synthesized BRIR struct
    BRIR_data = create_BRIR_data('MixingTime',SRIR_data.MixingTime,...
                                 'HRTF_Subject',settings.HRIR_Subject,...
                                 'HRTF_Type',settings.HRIR_Type,...
                                 'HRTF_Path',HRIR_Path,...
                                 'BandsPerOctave',BandsPerOctave,...
                                 'EqTxx',EqTxx,...
                                 'RTModRegFreq',RTModRegFreq,...
                                 'Length',BRIR_Length,...
                                 'RenderingCondition',NamingCond,...
                                 'Attenuation',BRIR_Atten,...
                                 'AzOrient',AzOrient,...
                                 'ElOrient',ElOrient,...
                                 'DOAAzOffset',DOAAzOffset,...
                                 'DOAElOffset',DOAElOffset,...
                                 'QuantizeDOAFlag',QuantizeDOAFlag,...
                                 'DOADirections',DOADirections,...
                                 'ExportSofaFlag',ExportSofaFlag,...
                                 'ExportWavFlag',ExportWavFlag,...
                                 'ExportDSERcFlag',ExportDSERcFlag,...
                                 'ExportDSERsFlag',ExportDSERsFlag,...
                                 'DestinationPath',DestinationPath,...
                                 'fs',SRIR_data.fs);

    if strcmp(AnglesMode, '7.1.4') || strcmp(AnglesMode, '9.1.6')
        BRIR_data.Directions = [AzOrient; ElOrient]';
    end

    BRIR_data.AnglesMode = AnglesMode;
end


function Plot_data = init_Plot_data(SRIR_data, BRIR_data)
    % Initialize visualization struct (from SDM Toolbox, with additions)
    Plot_data = createVisualizationStruct(...
        'fs', SRIR_data.fs, 'DefaultRoom', 'Small', ... % or 'VerySmall, 'Medium', 'Large'
        'name', sprintf('%s_%s_%s', SRIR_data.Room, SRIR_data.SourcePos, SRIR_data.ReceiverPos));
    
    Plot_data.PlotAnalysisFlag = false;            % Flag to determine if analysis results should be plotted.
    Plot_data.PlotExportFlag   = false;            % Flag to determine if analysis result plots should be exported.
    Plot_data.DestinationPath  = BRIR_data.DestinationPath;
end


function SRIR_data = analyze(SRIR_data, SDM_Struct, Plot_data)
    %% Normalize RIRs
    SRIR_data.P_RIR = SRIR_data.P_RIR ./ max(SRIR_data.P_RIR);
    SRIR_data.Raw_RIR = SRIR_data.Raw_RIR ./ max(SRIR_data.Raw_RIR);
    
    %% Analysis
    % Estimate directional information using SDM. This function is a wrapper of
    % the SDM Toolbox DOA estimation (using TDOA analysis) to include some 
    % post-processing. The actual DOA estimation is performed by the SDMPar.m 
    % function of the SDM Toolbox.
    SRIR_data = Analyze_SRIR(SRIR_data, SDM_Struct);
    
    if Plot_data.PlotAnalysisFlag
        % Plot analysis results
        plot_name = [Plot_data.name, '_spatio_temporal'];
        if SRIR_data.AlignDOA
            plot_name = [plot_name, '_aligned']; 
        end
        
        Plot_Spec(SRIR_data, Plot_data, [Plot_data.name, '_time_frequency']);
        Plot_DOA(SRIR_data, Plot_data, plot_name);
    end
end


function BRIR_data=synthesize(SRIR_data, BRIR_data, Plot_data)
    %% Synthesis
    % 1. Pre-processing operations (massage HRTF directions, resolve DOA NaNs).
    
    % Read HRTF dataset for re-synthesis
    HRIR_data = Read_HRTF(BRIR_data);
    
    [SRIR_data, BRIR_data, HRIR_data, HRIR] = PreProcess_Synthesize_SDM_Binaural(SRIR_data, BRIR_data, HRIR_data);
    
    % -----------------------------------------------------------------------
    % 2. Rotate DOA information, if required
    if BRIR_data.DOAAzOffset || BRIR_data.DOAElOffset
        SRIR_data = Rotate_DOA(SRIR_data, BRIR_data.DOAAzOffset, BRIR_data.DOAElOffset);
    
        if Plot_data.PlotAnalysisFlag
            Plot_DOA(SRIR_data, Plot_data, [Plot_data.name, '_spatio_temporal_rotated']);
        end
    end
    
    % -----------------------------------------------------------------------
    % 3. Quantize DOA information, if required
    if BRIR_data.QuantizeDOAFlag
        [SRIR_data, ~] = QuantizeDOA(SRIR_data, ...
            BRIR_data.DOADirections, SRIR_data.DOAOnsetLength);
        
        if Plot_data.PlotAnalysisFlag
            Plot_DOA(SRIR_data, Plot_data, [Plot_data.name, '_spatio_temporal_quantized']);
        end
    end
    
    % -----------------------------------------------------------------------
    % 4. Compute parameters for RTMod Compensation
    
    % Synthesize one direction to extract the reverb compensation - solving the
    % SDM synthesis spectral whitening
    BRIR_Pre = Synthesize_SDM_Binaural(SRIR_data, BRIR_data, HRIR, [0, 0], true);
    
    % Using the pressure RIR as a reference for the reverberation compensation
    BRIR_data.ReferenceBRIR = [SRIR_data.P_RIR, SRIR_data.P_RIR];
    
    % Get the desired T30 from the Pressure RIR and the actual T30 from one
    % rendered BRIR
    [BRIR_data.DesiredT30, BRIR_data.OriginalT30, BRIR_data.RTFreqVector] = GetReverbTime(SRIR_data, BRIR_Pre, BRIR_data.BandsPerOctave, BRIR_data.EqTxx);
    
    % -----------------------------------------------------------------------
    % 5. Render BRIRs with RTMod compensation for the specified directions
    
    nDirs = size(BRIR_data.Directions, 1);
    
    % Render early reflections
    hbar = parfor_progressbar(nDirs, 'Please wait, rendering (step 1/2) ...', 'Name', Plot_data.name);
    parfor iDir = 1 : nDirs
        hbar.iterate(); %#ok<PFBNS>
        BRIR_early_temp = Synthesize_SDM_Binaural( ...
            SRIR_data, BRIR_data, HRIR, BRIR_data.Directions(iDir, :), false);
        BRIR_early(:, :, iDir) = Modify_Reverb_Slope(BRIR_data, BRIR_early_temp);
    end
    close(hbar);
    
    % Render late reverb
    BRIR_late = Synthesize_SDM_Binaural(SRIR_data, BRIR_data, HRIR, [0, 0], true);
    BRIR_late = Modify_Reverb_Slope(BRIR_data, BRIR_late, Plot_data);
    
    % Remove leading zeros
    save('dbg_rem_dly.mat','BRIR_early', 'BRIR_late')
    [BRIR_early, BRIR_late] = Remove_BRIR_Delay(BRIR_early, BRIR_late, -20);
    
    % -----------------------------------------------------------------------
    % 6. Apply AP processing for the late reverb
    
    % AllPass filtering for the late reverb (increasing diffuseness and
    % smoothing out the EDC)
    BRIR_data.allpass_delays = [37, 113, 215];  % in samples
    BRIR_data.allpass_RT     = [0.1, 0.1, 0.1]; % in seconds
    
    BRIR_late = Apply_Allpass(BRIR_late, BRIR_data);


    % -----------------------------------------------------------------------
    % 7. Split the BRIRs into components by time windowing
    [BRIR_DSER, BRIR_LR, BRIR_DS, BRIR_ER] = Split_BRIR(BRIR_early, BRIR_late, BRIR_data.MixingTime, BRIR_data.fs, 256);
    if Plot_data.PlotAnalysisFlag
        Plot_BRIR(BRIR_data, BRIR_DS, BRIR_ER, BRIR_LR, Plot_data);
    end

    % -----------------------------------------------------------------------
    % 8. Save BRIRs
    nFiles = BRIR_data.ExportSofaFlag + (BRIR_data.ExportWavFlag * nDirs) + 1;
    hbar = parfor_progressbar(nFiles, 'Please wait, saving (step 2/2) ...', 'Name', Plot_data.name);

    if BRIR_data.ExportSofaFlag
        hbar.iterate();
        Save_BRIR_sofa(BRIR_data, BRIR_DSER, BRIR_LR, sprintf('BRIR_%s_%s_%s_%s.sofa', SRIR_data.Room, SRIR_data.SourcePos, SRIR_data.ReceiverPos, BRIR_data.AnglesMode));
    end
    
    if BRIR_data.ExportWavFlag
        for iDir = 1 : nDirs
            hbar.iterate();
            if iDir == 1 % export identical late reverberation only once
                BRIR_LR_export = BRIR_LR;
            else
                BRIR_LR_export = [];
            end
            Save_BRIR_wav(BRIR_data, BRIR_DS(:, :, iDir), BRIR_DSER(:, :, iDir), BRIR_ER(:, :, iDir), BRIR_LR_export, BRIR_data.Directions(iDir, :));
        end
    end
    hbar.iterate();
    SaveRenderingStructs(SRIR_data, BRIR_data);
    close(hbar);
end
+89 −0

File added.

Preview size limit exceeded, changes collapsed.