diff --git a/Workspace_msvc/lib_rend.vcxproj b/Workspace_msvc/lib_rend.vcxproj index 865652649a8a320ff0769bd83170528445da8ab3..d1cda3290d9efd2128280b8dab8ddbe4e72272ac 100644 --- a/Workspace_msvc/lib_rend.vcxproj +++ b/Workspace_msvc/lib_rend.vcxproj @@ -202,14 +202,18 @@ + + + + diff --git a/apps/renderer.c b/apps/renderer.c index 7bee92331fab3321ff1a15ecff32ea6248297a23..1844cb92493f156aa58b0c75482828a5fc35dbf2 100644 --- a/apps/renderer.c +++ b/apps/renderer.c @@ -51,6 +51,9 @@ #include "ism_file_reader.h" #include "ls_custom_file_reader.h" #include "masa_file_reader.h" +#ifdef MASA_PREREND +#include "masa_file_writer.h" +#endif #include "prot.h" #include "render_config_reader.h" #include "wmc_auto.h" @@ -443,6 +446,9 @@ static int16_t getTotalNumInChannels( fprintf( stderr, "Error: %s\n", ivas_error_to_string( error ) ); exit( -1 ); } +#ifdef MASA_PREREND + IVAS_REND_GetNumAllObjects( hIvasRend, &numInputChannels ); /* In case of MASA output, modify the numInputChannels to contain all objects. Otherwise, keep the original value. */ +#endif totalNumInChannels += numInputChannels; } @@ -571,6 +577,9 @@ int main( #endif RenderConfigReader *renderConfigReader = NULL; MasaFileReader *masaReaders[RENDERER_MAX_MASA_INPUTS]; +#ifdef MASA_PREREND + MasaFileWriter *masaWriter = NULL; +#endif IVAS_MASA_METADATA_HANDLE hMasaMetadata[RENDERER_MAX_MASA_INPUTS]; char audioFilePath[FILENAME_MAX]; AudioFileReader *audioReader = NULL; @@ -721,6 +730,26 @@ int main( setupWithSingleFormatInput( args, audioFilePath, positionProvider, masaReaders ); } +#ifdef MASA_PREREND + /* Check that there is allowed configuration for MASA format output */ + if ( args.outConfig.audioConfig == IVAS_REND_AUDIO_CONFIG_MASA1 || args.outConfig.audioConfig == IVAS_REND_AUDIO_CONFIG_MASA2 ) + { + if ( args.inConfig.numMasaBuses == 0 ) + { + fprintf( stderr, "\nInvalid configuration - Merging to MASA output requires MASA input and at least one another input to be present\n" ); + fprintf( stderr, "\nMASA input is missing\n" ); + exit( -1 ); + } + + if ( args.inConfig.numAudioObjects == 0 && args.inConfig.numMultiChannelBuses == 0 && args.inConfig.numAmbisonicsBuses == 0 ) + { + fprintf( stderr, "\nInvalid configuration - Merging to MASA output requires MASA input and at least one another input to be present\n" ); + fprintf( stderr, "\nNo object, multi-channel, or Ambisonic input present.\n" ); + exit( -1 ); + } + } +#endif + if ( AudioFileReader_open( &audioReader, audioFilePath ) != IVAS_ERR_OK ) { fprintf( stderr, "Error opening file: %s\n", audioFilePath ); @@ -827,6 +856,25 @@ int main( } } +#ifdef MASA_PREREND + /* Set up MASA writer for MASA output */ + if ( args.outConfig.audioConfig == IVAS_REND_AUDIO_CONFIG_MASA1 || args.outConfig.audioConfig == IVAS_REND_AUDIO_CONFIG_MASA2 ) + { + MasaFileWriter_open( args.outputFilePath, true, &masaWriter ); /* No delay for audio in renderer, so calling metadata writer in delayCompensated mode, i.e., no delay applied to meta */ + if ( masaWriter == NULL ) + { + fprintf( stderr, "Could not open MASA metadata file %s\n", args.outputFilePath ); + exit( -1 ); + } + } + + /* Set the total number of objects */ + if ( args.inConfig.numAudioObjects > 0 ) + { + IVAS_REND_SetTotalNumberOfObjects( hIvasRend, args.inConfig.numAudioObjects ); + } +#endif + IVAS_REND_LfePanMtx lfePanMatrix; /* parse input LFE panning matrix */ @@ -937,6 +985,14 @@ int main( fprintf( stderr, "Error: %s\n", ivas_error_to_string( error ) ); exit( -1 ); } + +#ifdef MASA_PREREND + /* With MASA output, all objects are handled at once, so add only one input having all objects in it */ + if ( args.outConfig.audioConfig == IVAS_REND_AUDIO_CONFIG_MASA1 || args.outConfig.audioConfig == IVAS_REND_AUDIO_CONFIG_MASA2 ) + { + break; + } +#endif } for ( i = 0; i < args.inConfig.numAmbisonicsBuses; ++i ) @@ -1165,6 +1221,43 @@ int main( for ( i = 0; i < args.inConfig.numAudioObjects; ++i ) { +#ifdef MASA_PREREND + if ( args.outConfig.audioConfig == IVAS_REND_AUDIO_CONFIG_MASA1 || args.outConfig.audioConfig == IVAS_REND_AUDIO_CONFIG_MASA2 ) + { + if ( i == 0 ) + { + IVAS_REND_ReadOnlyAudioBuffer tmpBuffer = getReadOnlySubBuffer( inBuffer, (int16_t) args.inConfig.audioObjects[i].inputChannelIndex, args.inConfig.numAudioObjects ); + + if ( ( error = IVAS_REND_FeedInputAudio( hIvasRend, ismIds[i], tmpBuffer ) ) != IVAS_ERR_OK ) + { + fprintf( stderr, "Error: %s\n", ivas_error_to_string( error ) ); + exit( -1 ); + } + } + + if ( ( error = IVAS_REND_FeedInputObjectMetadataToOMasa( hIvasRend, i, mtdBuffer.positions[i] ) ) != IVAS_ERR_OK ) + { + fprintf( stderr, "Error: %s\n", ivas_error_to_string( error ) ); + exit( -1 ); + } + } + else + { + IVAS_REND_ReadOnlyAudioBuffer tmpBuffer = getReadOnlySubBuffer( inBuffer, (int16_t) args.inConfig.audioObjects[i].inputChannelIndex, 1 ); + + if ( ( error = IVAS_REND_FeedInputAudio( hIvasRend, ismIds[i], tmpBuffer ) ) != IVAS_ERR_OK ) + { + fprintf( stderr, "Error: %s\n", ivas_error_to_string( error ) ); + exit( -1 ); + } + + if ( ( error = IVAS_REND_FeedInputObjectMetadata( hIvasRend, ismIds[i], mtdBuffer.positions[i] ) ) != IVAS_ERR_OK ) + { + fprintf( stderr, "Error: %s\n", ivas_error_to_string( error ) ); + exit( -1 ); + } + } +#else IVAS_REND_ReadOnlyAudioBuffer tmpBuffer = getReadOnlySubBuffer( inBuffer, (int16_t) args.inConfig.audioObjects[i].inputChannelIndex, 1 ); if ( ( error = IVAS_REND_FeedInputAudio( hIvasRend, ismIds[i], tmpBuffer ) ) != IVAS_ERR_OK ) @@ -1178,6 +1271,7 @@ int main( fprintf( stderr, "Error: %s\n", ivas_error_to_string( error ) ); exit( -1 ); } +#endif } for ( i = 0; i < args.inConfig.numAmbisonicsBuses; ++i ) @@ -1269,6 +1363,82 @@ int main( delayNumSamples -= (int16_t) outBufferSize; } +#ifdef MASA_PREREND + /* Write MASA metadata for MASA outputs */ + if ( args.outConfig.audioConfig == IVAS_REND_AUDIO_CONFIG_MASA1 || args.outConfig.audioConfig == IVAS_REND_AUDIO_CONFIG_MASA2 ) + { + IVAS_REND_AudioConfigType inputType1; + IVAS_REND_AudioConfigType inputType2; + MASA_DECODER_EXT_OUT_META_HANDLE hMetaOutput; + int16_t numInputFormats; + + inputType1 = IVAS_REND_AUDIO_CONFIG_TYPE_UNKNOWN; + inputType2 = IVAS_REND_AUDIO_CONFIG_TYPE_UNKNOWN; + hMetaOutput = NULL; + + numInputFormats = 0; + if ( args.inConfig.numAmbisonicsBuses > 0 ) + { + numInputFormats++; + inputType1 = IVAS_REND_AUDIO_CONFIG_TYPE_AMBISONICS; + } + if ( args.inConfig.numMultiChannelBuses > 0 ) + { + numInputFormats++; + if ( inputType1 == IVAS_REND_AUDIO_CONFIG_TYPE_UNKNOWN ) + { + inputType1 = IVAS_REND_AUDIO_CONFIG_TYPE_CHANNEL_BASED; + } + } + if ( args.inConfig.numMasaBuses > 0 ) + { + numInputFormats++; + if ( inputType1 == IVAS_REND_AUDIO_CONFIG_TYPE_UNKNOWN ) + { + inputType1 = IVAS_REND_AUDIO_CONFIG_TYPE_MASA; + } + } + if ( args.inConfig.numAudioObjects > 0 ) + { + numInputFormats++; + if ( inputType1 == IVAS_REND_AUDIO_CONFIG_TYPE_UNKNOWN ) + { + inputType1 = IVAS_REND_AUDIO_CONFIG_TYPE_OBJECT_BASED; + } + } + + if ( numInputFormats == 1 ) + { + IVAS_REND_GetMasaMetadata( hIvasRend, &hMetaOutput, inputType1 ); + } + else + { + if ( args.inConfig.numAmbisonicsBuses > 0 && args.inConfig.numMultiChannelBuses > 0 ) + { + inputType2 = IVAS_REND_AUDIO_CONFIG_TYPE_CHANNEL_BASED; + IVAS_REND_MergeMasaMetadata( hIvasRend, &hMetaOutput, inputType1, inputType2 ); + } + + if ( ( args.inConfig.numAmbisonicsBuses > 0 || args.inConfig.numMultiChannelBuses > 0 ) && args.inConfig.numMasaBuses > 0 ) + { + inputType2 = IVAS_REND_AUDIO_CONFIG_TYPE_MASA; + IVAS_REND_MergeMasaMetadata( hIvasRend, &hMetaOutput, inputType1, inputType2 ); + } + + if ( ( args.inConfig.numAmbisonicsBuses > 0 || args.inConfig.numMultiChannelBuses > 0 || args.inConfig.numMasaBuses > 0 ) && args.inConfig.numAudioObjects > 0 ) + { + inputType2 = IVAS_REND_AUDIO_CONFIG_TYPE_OBJECT_BASED; + IVAS_REND_MergeMasaMetadata( hIvasRend, &hMetaOutput, inputType1, inputType2 ); + } + } + + if ( ( error = MasaFileWriter_writeFrame( masaWriter, hMetaOutput ) ) != IVAS_ERR_OK ) + { + fprintf( stderr, "\nError writing MASA metadata to file: %s\n", MasaFileWriter_getFilePath( masaWriter ) ); + } + } +#endif + frame++; if ( !args.quietModeEnabled ) { @@ -1322,6 +1492,9 @@ int main( { LfeRoutingConfig_close( lfeRoutingConfigs[i] ); } +#endif +#ifdef MASA_PREREND + MasaFileWriter_close( &masaWriter ); #endif AudioFileReader_close( &audioReader ); AudioFileWriter_close( &audioWriter ); diff --git a/lib_com/options.h b/lib_com/options.h index b04268f7ced758ba683b284281e983f3e9982dc7..0e7035a632b4d87c3470303a2b0b6a0c078aa386 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -172,6 +172,8 @@ #define EXTERNAL_ORIENTATIONS /* Nokia: Contribution 41: (external) orientation information handling */ +#define MASA_PREREND /* Nokia: Contribution 42: Support for IVAS_rend to merge MASA + other format to MASA */ + /* ################## End DEVELOPMENT switches ######################### */ /* clang-format on */ diff --git a/lib_rend/ivas_dirac_ana.c b/lib_rend/ivas_dirac_ana.c new file mode 100644 index 0000000000000000000000000000000000000000..3a749570cf5d1ec7e78432eb6ad0399eba0e1d25 --- /dev/null +++ b/lib_rend/ivas_dirac_ana.c @@ -0,0 +1,406 @@ +/****************************************************************************************************** + + (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, + Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., + Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, + Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other + contributors to this repository. All Rights Reserved. + + This software is protected by copyright law and by international treaties. + The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, + Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., + Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, + Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other + contributors to this repository retain full ownership rights in their respective contributions in + the software. This notice grants no license of any kind, including but not limited to patent + license, nor is any license granted by implication, estoppel or otherwise. + + Contributors are required to enter into the IVAS codec Public Collaboration agreement before making + contributions. + + This software is provided "AS IS", without any express or implied warranties. The software is in the + development stage. It is intended exclusively for experts who have experience with such software and + solely for the purpose of inspection. All implied warranties of non-infringement, merchantability + and fitness for a particular purpose are hereby disclaimed and excluded. + + Any dispute, controversy or claim arising under or in relation to providing this software shall be + submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in + accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and + the United Nations Convention on Contracts on the International Sales of Goods. + +*******************************************************************************************************/ + +#include "options.h" +#include +#include "ivas_cnst.h" +#include "ivas_prot_rend.h" +#include "ivas_prot.h" +#include "prot.h" +#include "ivas_stat_rend.h" +#include "ivas_rom_com.h" +#ifdef DEBUGGING +#include "debug.h" +#endif +#include "wmc_auto.h" + +#ifdef MASA_PREREND + +/*------------------------------------------------------------------------- + * Local function prototypes + *------------------------------------------------------------------------*/ + +static void ivas_dirac_param_est_ana( DIRAC_ANA_HANDLE hDirAC, float data_f[][L_FRAME48k], float elevation_m_values[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], float azimuth_m_values[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], float energyRatio[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], float spreadCoherence[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], float surroundingCoherence[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], const int16_t input_frame ); + +static void ivas_dirac_dmx( float data_in_f[][L_FRAME48k], const int16_t input_frame, const int16_t nchan_transport ); + + +/*--------------------------------------------------------------------------* + * ivas_dirac_ana_open() + * + * Allocate and initialize DIRAC handle + *--------------------------------------------------------------------------*/ + +ivas_error ivas_dirac_ana_open( + DIRAC_ANA_HANDLE *hDirACPtr, /* i/o: DIRAC data handle pointer */ + int32_t input_Fs /* i: Sampling frequency */ +) +{ + int16_t i, j; + DIRAC_ANA_HANDLE hDirAC; + int16_t numAnalysisChannels; + int16_t maxBin; + ivas_error error; + + error = IVAS_ERR_OK; + + if ( ( hDirAC = (DIRAC_ANA_HANDLE) malloc( sizeof( DIRAC_ANA_DATA ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DIRAC\n" ) ); + } + + numAnalysisChannels = FOA_CHANNELS; + + /* Determine the number of bands */ + hDirAC->nbands = MASA_FREQUENCY_BANDS; + + /* Determine band grouping */ + mvs2s( MASA_band_grouping_24, hDirAC->band_grouping, 24 + 1 ); + + maxBin = (int16_t) ( input_Fs * INV_CLDFB_BANDWIDTH + 0.5f ); + for ( i = 1; i < hDirAC->nbands + 1; i++ ) + { + if ( hDirAC->band_grouping[i] >= maxBin ) + { + hDirAC->band_grouping[i] = maxBin; + hDirAC->nbands = i; + break; + } + } + + /* Determine block grouping */ + mvs2s( DirAC_block_grouping, hDirAC->block_grouping, MAX_PARAM_SPATIAL_SUBFRAMES + 1 ); + + /* open/initialize CLDFB */ + hDirAC->num_Cldfb_instances = numAnalysisChannels; + for ( i = 0; i < hDirAC->num_Cldfb_instances; i++ ) + { + openCldfb( &( hDirAC->cldfbAnaEnc[i] ), CLDFB_ANALYSIS, input_Fs, CLDFB_PROTOTYPE_5_00MS ); + } + + /* intensity 3-dim */ + for ( i = 0; i < DIRAC_NUM_DIMS; i++ ) + { + hDirAC->direction_vector_m[i] = (float **) malloc( MAX_PARAM_SPATIAL_SUBFRAMES * sizeof( float * ) ); + + for ( j = 0; j < MAX_PARAM_SPATIAL_SUBFRAMES; j++ ) + { + hDirAC->direction_vector_m[i][j] = (float *) malloc( MASA_FREQUENCY_BANDS * sizeof( float ) ); + set_zero( hDirAC->direction_vector_m[i][j], MASA_FREQUENCY_BANDS ); + } + } + + for ( i = 0; i < DIRAC_NUM_DIMS; i++ ) + { + for ( j = 0; j < DIRAC_NO_COL_AVG_DIFF; j++ ) + { + hDirAC->buffer_intensity_real[i][j] = (float *) malloc( MASA_FREQUENCY_BANDS * sizeof( float ) ); + set_zero( hDirAC->buffer_intensity_real[i][j], MASA_FREQUENCY_BANDS ); + } + } + + set_zero( hDirAC->buffer_energy, DIRAC_NO_COL_AVG_DIFF * MASA_FREQUENCY_BANDS ); + + hDirAC->index_buffer_intensity = 0; + + if ( ( hDirAC->hMasaOut = (MASA_DECODER_EXT_OUT_META_HANDLE) malloc( sizeof( MASA_DECODER_EXT_OUT_META ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for MASA decoder\n" ) ); + } + + if ( ( hDirAC->sph_grid16 = (SPHERICAL_GRID_DATA *) malloc( sizeof( SPHERICAL_GRID_DATA ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for MASA decoder\n" ) ); + } + generate_gridEq( hDirAC->sph_grid16 ); + + ( *hDirACPtr ) = hDirAC; + + return error; +} + + +/*--------------------------------------------------------------------------* + * ivas_dirac_ana_close() + * + * Close DIRAC handle + *--------------------------------------------------------------------------*/ + +void ivas_dirac_ana_close( + DIRAC_ANA_HANDLE( *hDirAC ) /* i/o: analysis DIRAC handle */ +) +{ + int16_t i, j; + + if ( hDirAC == NULL || *hDirAC == NULL ) + { + return; + } + + for ( i = 0; i < ( *hDirAC )->num_Cldfb_instances; i++ ) + { + deleteCldfb( &( ( *hDirAC )->cldfbAnaEnc[i] ) ); + } + + for ( i = 0; i < DIRAC_NUM_DIMS; i++ ) + { + for ( j = 0; j < MAX_PARAM_SPATIAL_SUBFRAMES; j++ ) + { + free( ( *hDirAC )->direction_vector_m[i][j] ); + ( *hDirAC )->direction_vector_m[i][j] = NULL; + } + + for ( j = 0; j < DIRAC_NO_COL_AVG_DIFF; j++ ) + { + free( ( *hDirAC )->buffer_intensity_real[i][j] ); + ( *hDirAC )->buffer_intensity_real[i][j] = NULL; + } + + free( ( *hDirAC )->direction_vector_m[i] ); + ( *hDirAC )->direction_vector_m[i] = NULL; + } + + free( ( *hDirAC )->hMasaOut ); + ( *hDirAC )->hMasaOut = NULL; + free( ( *hDirAC )->sph_grid16 ); + ( *hDirAC )->sph_grid16 = NULL; + + free( ( *hDirAC ) ); + ( *hDirAC ) = NULL; + + return; +} + + +/*--------------------------------------------------------------------------* + * ivas_dirac_ana() + * + * DIRAC analysis function + *--------------------------------------------------------------------------*/ + +void ivas_dirac_ana( + DIRAC_ANA_HANDLE hDirAC, /* i/o: DIRAC analysis handle */ + float data_in_f[][L_FRAME48k], /* i/o: Input / transport audio signals */ + const int16_t input_frame, /* i : Input frame size */ + const int16_t nchan_transport /* i : Number of transport channels */ +) +{ + float elevation_m_values[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS]; + float azimuth_m_values[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS]; + float energyRatio[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS]; + float spreadCoherence[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS]; + float surroundingCoherence[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS]; + + + /* Estimate MASA parameters from the SBA signals */ + ivas_dirac_param_est_ana( hDirAC, data_in_f, elevation_m_values, azimuth_m_values, energyRatio, spreadCoherence, surroundingCoherence, input_frame ); + + /* Create MASA metadata buffer from the estimated values */ + ivas_create_masa_out_meta( hDirAC->hMasaOut, hDirAC->sph_grid16, nchan_transport, elevation_m_values, azimuth_m_values, energyRatio, spreadCoherence, surroundingCoherence ); + + /* Downmix */ + ivas_dirac_dmx( data_in_f, input_frame, nchan_transport ); + + return; +} + + +/*--------------------------------------------------------------------------* + * Local functions + *--------------------------------------------------------------------------*/ + +/* Estimate MASA parameters from the SBA signals */ +static void ivas_dirac_param_est_ana( + DIRAC_ANA_HANDLE hDirAC, + float data_f[][L_FRAME48k], + float elevation_m_values[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], + float azimuth_m_values[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], + float energyRatio[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], + float spreadCoherence[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], + float surroundingCoherence[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], + const int16_t input_frame ) +{ + float reference_power[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX]; + int16_t ts, i, d, j; + int16_t num_freq_bands, index; + float dir_v[DIRAC_NUM_DIMS]; + int16_t l_ts; + float Foa_RealBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX]; + float Foa_ImagBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX]; + float intensity_real[DIRAC_NUM_DIMS][MASA_FREQUENCY_BANDS]; + float direction_vector[DIRAC_NUM_DIMS][MASA_FREQUENCY_BANDS]; + float diffuseness_vector[MASA_FREQUENCY_BANDS]; + float diffuseness_m[MASA_FREQUENCY_BANDS]; + + int16_t band_m_idx, block_m_idx; + float renormalization_factor_diff[MASA_FREQUENCY_BANDS]; + float norm_tmp; + int16_t mrange[2]; + int16_t brange[2]; + int16_t numAnalysisChannels; + + num_freq_bands = hDirAC->nbands; + l_ts = input_frame / CLDFB_NO_COL_MAX; + numAnalysisChannels = FOA_CHANNELS; + + + /* do processing over all CLDFB time slots */ + for ( block_m_idx = 0; block_m_idx < MAX_PARAM_SPATIAL_SUBFRAMES; block_m_idx++ ) + { + mrange[0] = hDirAC->block_grouping[block_m_idx]; + mrange[1] = hDirAC->block_grouping[block_m_idx + 1]; + + for ( band_m_idx = 0; band_m_idx < hDirAC->nbands; band_m_idx++ ) + { + hDirAC->direction_vector_m[0][block_m_idx][band_m_idx] = 0.0f; + hDirAC->direction_vector_m[1][block_m_idx][band_m_idx] = 0.0f; + hDirAC->direction_vector_m[2][block_m_idx][band_m_idx] = 0.0f; + } + + /* Need to initialize renormalization_factors, and variables to be normalized */ + set_zero( renormalization_factor_diff, hDirAC->nbands ); + set_zero( diffuseness_m, hDirAC->nbands ); + set_zero( hDirAC->energy[block_m_idx], MASA_FREQUENCY_BANDS ); + + for ( ts = mrange[0]; ts < mrange[1]; ts++ ) + { + for ( i = 0; i < numAnalysisChannels; i++ ) + { + cldfbAnalysis_ts( &( data_f[i][l_ts * ts] ), Foa_RealBuffer[i], Foa_ImagBuffer[i], l_ts, hDirAC->cldfbAnaEnc[i] ); + } + + /* Compute omni energy for metadata processing */ + for ( band_m_idx = 0; band_m_idx < num_freq_bands; band_m_idx++ ) + { + brange[0] = hDirAC->band_grouping[band_m_idx]; + brange[1] = hDirAC->band_grouping[band_m_idx + 1]; + for ( j = brange[0]; j < brange[1]; j++ ) + { + hDirAC->energy[block_m_idx][band_m_idx] += Foa_RealBuffer[0][j] * Foa_RealBuffer[0][j] + Foa_ImagBuffer[0][j] * Foa_ImagBuffer[0][j]; + } + } + + /* Direction estimation */ + computeIntensityVector_ana( hDirAC->band_grouping, Foa_RealBuffer, Foa_ImagBuffer, num_freq_bands, intensity_real ); + computeDirectionVectors( intensity_real[0], intensity_real[1], intensity_real[2], 0, num_freq_bands, direction_vector[0], direction_vector[1], direction_vector[2] ); + + /* Power estimation for diffuseness */ + computeReferencePower_ana( hDirAC->band_grouping, Foa_RealBuffer, Foa_ImagBuffer, reference_power[ts], num_freq_bands ); + + /* Fill buffers of length "averaging_length" time slots for intensity and energy */ + hDirAC->index_buffer_intensity = ( hDirAC->index_buffer_intensity % DIRAC_NO_COL_AVG_DIFF ) + 1; /* averaging_length = 32 */ + index = hDirAC->index_buffer_intensity; + for ( i = 0; i < DIRAC_NUM_DIMS; i++ ) + { + /* only real part needed */ + mvr2r( intensity_real[i], &( hDirAC->buffer_intensity_real[i][index - 1][0] ), num_freq_bands ); + } + mvr2r( reference_power[ts], &( hDirAC->buffer_energy[( index - 1 ) * num_freq_bands] ), num_freq_bands ); + + computeDiffuseness( hDirAC->buffer_intensity_real, hDirAC->buffer_energy, num_freq_bands, diffuseness_vector ); + + for ( band_m_idx = 0; band_m_idx < hDirAC->nbands; band_m_idx++ ) + { + norm_tmp = reference_power[ts][band_m_idx] * ( 1 - diffuseness_vector[band_m_idx] ); + + hDirAC->direction_vector_m[0][block_m_idx][band_m_idx] += norm_tmp * direction_vector[0][band_m_idx]; + hDirAC->direction_vector_m[1][block_m_idx][band_m_idx] += norm_tmp * direction_vector[1][band_m_idx]; + hDirAC->direction_vector_m[2][block_m_idx][band_m_idx] += norm_tmp * direction_vector[2][band_m_idx]; + + diffuseness_m[band_m_idx] += reference_power[ts][band_m_idx] * diffuseness_vector[band_m_idx]; + renormalization_factor_diff[band_m_idx] += reference_power[ts][band_m_idx]; + } + } + + for ( band_m_idx = 0; band_m_idx < hDirAC->nbands; band_m_idx++ ) + { + for ( d = 0; d < DIRAC_NUM_DIMS; d++ ) + { + dir_v[d] = hDirAC->direction_vector_m[d][block_m_idx][band_m_idx]; + } + ivas_qmetadata_direction_vector_to_azimuth_elevation( dir_v, &azimuth_m_values[block_m_idx][band_m_idx], &elevation_m_values[block_m_idx][band_m_idx] ); + } + + /* Determine energy ratios */ + for ( band_m_idx = 0; band_m_idx < hDirAC->nbands; band_m_idx++ ) + { + if ( renormalization_factor_diff[band_m_idx] > EPSILON ) + { + diffuseness_m[band_m_idx] /= renormalization_factor_diff[band_m_idx]; + } + else + { + diffuseness_m[band_m_idx] = 0.0f; + } + + energyRatio[block_m_idx][band_m_idx] = 1.0f - diffuseness_m[band_m_idx]; + } + + /* Todo Nokia: Implement coherence analysis */ + for ( band_m_idx = 0; band_m_idx < hDirAC->nbands; band_m_idx++ ) + { + spreadCoherence[block_m_idx][band_m_idx] = 0.0f; + surroundingCoherence[block_m_idx][band_m_idx] = 0.0f; + } + } + + return; +} + + +/* Compute downmix */ +static void ivas_dirac_dmx( + float data_in_f[][L_FRAME48k], + const int16_t input_frame, + const int16_t nchan_transport ) +{ + int16_t i; + float data_out_f[MASA_MAX_TRANSPORT_CHANNELS][L_FRAME48k]; + + if ( nchan_transport == 2 ) + { + v_add( data_in_f[0], data_in_f[1], data_out_f[0], input_frame ); + v_multc( data_out_f[0], 0.5f, data_out_f[0], input_frame ); + + v_sub( data_in_f[0], data_in_f[1], data_out_f[1], input_frame ); + v_multc( data_out_f[1], 0.5f, data_out_f[1], input_frame ); + + for ( i = 0; i < nchan_transport; i++ ) + { + mvr2r( data_out_f[i], data_in_f[i], input_frame ); + } + } + + return; +} + +#endif /* MASA_PREREND */ diff --git a/lib_rend/ivas_masa_merge.c b/lib_rend/ivas_masa_merge.c new file mode 100644 index 0000000000000000000000000000000000000000..68372746940179d59b9b53e331516aad316e21e9 --- /dev/null +++ b/lib_rend/ivas_masa_merge.c @@ -0,0 +1,362 @@ +/****************************************************************************************************** + +(C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, +Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +contributors to this repository. All Rights Reserved. + +This software is protected by copyright law and by international treaties. +The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, +Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., +Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, +Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other +contributors to this repository retain full ownership rights in their respective contributions in +the software. This notice grants no license of any kind, including but not limited to patent +license, nor is any license granted by implication, estoppel or otherwise. + +Contributors are required to enter into the IVAS codec Public Collaboration agreement before making +contributions. + +This software is provided "AS IS", without any express or implied warranties. The software is in the +development stage. It is intended exclusively for experts who have experience with such software and +solely for the purpose of inspection. All implied warranties of non-infringement, merchantability +and fitness for a particular purpose are hereby disclaimed and excluded. + +Any dispute, controversy or claim arising under or in relation to providing this software shall be +submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in +accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and +the United Nations Convention on Contracts on the International Sales of Goods. + +*******************************************************************************************************/ + +#include +#include "options.h" +#include "lib_rend.h" +#include "ivas_prot_rend.h" +#include "ivas_prot.h" +#include "ivas_cnst.h" +#include "prot.h" +#include "wmc_auto.h" + +#ifdef MASA_PREREND + + +static void copy_masa_meta_tile( + MASA_DECODER_EXT_OUT_META_HANDLE outMeta, /* o: metadata to be written */ + MASA_DECODER_EXT_OUT_META_HANDLE inMeta, /* i: input metadata */ + const uint8_t sf, /* i: sub-frame index */ + const uint8_t band /* i: band index */ +); + +static void full_stream_merge( + MASA_DECODER_EXT_OUT_META_HANDLE outMeta, /* o: Merged metadata output */ + MASA_DECODER_EXT_OUT_META_HANDLE inMeta1, /* i: Input metadata 1 */ + float inEne1[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], /* i/o: TF-energy of input 1. after merge, contains the energy of the merged signal */ + MASA_DECODER_EXT_OUT_META_HANDLE inMeta2, /* i: Input metadata 2 */ + float inEne2[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS] /* i: TF-energy of input 2 */ +); + +static void diffuse_meta_merge_1x1( + MASA_DECODER_EXT_OUT_META_HANDLE outMeta, /* o: Merged metadata output */ + MASA_DECODER_EXT_OUT_META_HANDLE inMeta, /* i: Input metadata 1 */ + float inEne[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], /* i/o: TF-energy of input 1. after merge, contains the energy of the merged signal */ + MASA_DECODER_EXT_OUT_META_HANDLE inMetaISM, /* i: Input metadata 2 */ + float inEneISM[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS] /* i: TF-energy of input 2 */ +); + +void copy_masa_meta_tile( + MASA_DECODER_EXT_OUT_META_HANDLE outMeta, /* o: metadata to be written */ + MASA_DECODER_EXT_OUT_META_HANDLE inMeta, /* i: input metadata */ + const uint8_t sf, /* i: sub-frame index */ + const uint8_t band /* i: band index */ +) +{ + outMeta->directionIndex[0][sf][band] = inMeta->directionIndex[0][sf][band]; + outMeta->directToTotalRatio[0][sf][band] = inMeta->directToTotalRatio[0][sf][band]; + outMeta->spreadCoherence[0][sf][band] = inMeta->spreadCoherence[0][sf][band]; + + outMeta->surroundCoherence[sf][band] = inMeta->surroundCoherence[sf][band]; + outMeta->diffuseToTotalRatio[sf][band] = inMeta->diffuseToTotalRatio[sf][band]; + + if ( inMeta->descriptiveMeta.numberOfDirections == 1 ) + { + outMeta->directionIndex[1][sf][band] = inMeta->directionIndex[1][sf][band]; + outMeta->directToTotalRatio[1][sf][band] = inMeta->directToTotalRatio[1][sf][band]; + outMeta->spreadCoherence[1][sf][band] = inMeta->spreadCoherence[1][sf][band]; + } + else + { + /* make sure the output has zeroed data in the second direction */ + outMeta->directionIndex[1][sf][band] = SPH_IDX_FRONT; + outMeta->directToTotalRatio[1][sf][band] = 0u; + outMeta->spreadCoherence[1][sf][band] = 0u; + } + + return; +} + +void copy_masa_descriptive_meta( + MASA_DECRIPTIVE_META *outMeta, /* o: metadata to be written */ + MASA_DECRIPTIVE_META *inMeta /* i: input metadata */ +) +{ + uint8_t char_idx; + for ( char_idx = 0; char_idx < 8; char_idx++ ) + { + outMeta->formatDescriptor[char_idx] = inMeta->formatDescriptor[char_idx]; + } + outMeta->numberOfDirections = inMeta->numberOfDirections; + outMeta->numberOfChannels = inMeta->numberOfChannels; + outMeta->sourceFormat = inMeta->sourceFormat; + outMeta->transportDefinition = inMeta->transportDefinition; + outMeta->channelAngle = inMeta->channelAngle; + outMeta->channelDistance = inMeta->channelDistance; + outMeta->channelLayout = inMeta->channelLayout; +} + +void diffuse_meta_merge_1x1( + MASA_DECODER_EXT_OUT_META_HANDLE outMeta, /* o: Merged metadata output */ + MASA_DECODER_EXT_OUT_META_HANDLE inMeta, /* i: Input metadata 1 */ + float inEne[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], /* i: TF-energy of input 1 */ + MASA_DECODER_EXT_OUT_META_HANDLE inMetaISM, /* i: Input metadata 2 */ + float inEneISM[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS] /* i: TF-energy of input 2 */ +) +{ + int8_t sf, band; + + for ( sf = 0; sf < MAX_PARAM_SPATIAL_SUBFRAMES; sf++ ) + { + for ( band = 0; band < MASA_FREQUENCY_BANDS; band++ ) + { + float energyTimesRatio, energyTimesRatioISM, total_diff_nrg, dir_nrg_ratio, total_nrg; + float dir_ratio_ism; + + energyTimesRatio = (float) ( inMeta->directToTotalRatio[0][sf][band] ) / UINT8_MAX * inEne[sf][band]; + + total_nrg = inEne[sf][band] + inEneISM[sf][band]; + + /* target is original MASA diffuseness */ + total_diff_nrg = (float) ( inMeta->diffuseToTotalRatio[sf][band] ) / UINT8_MAX * inEne[sf][band]; + /* criterion is mean of ISM ratio and new ratio */ + dir_ratio_ism = (float) ( inMetaISM->directToTotalRatio[0][sf][band] ) / UINT8_MAX; + + energyTimesRatioISM = ( dir_ratio_ism + ( 1.0f - total_diff_nrg / ( EPSILON + total_nrg ) ) ) / 2.0f * inEneISM[sf][band]; + + if ( energyTimesRatioISM > energyTimesRatio ) + { + float new_dir_ratio, new_diff_ratio; + outMeta->directionIndex[0][sf][band] = inMetaISM->directionIndex[0][sf][band]; + outMeta->directToTotalRatio[0][sf][band] = inMetaISM->directToTotalRatio[0][sf][band]; + outMeta->spreadCoherence[0][sf][band] = inMetaISM->spreadCoherence[0][sf][band]; + + outMeta->surroundCoherence[sf][band] = inMetaISM->surroundCoherence[sf][band]; + + dir_nrg_ratio = 1.0f - total_diff_nrg / ( EPSILON + total_nrg ); /* new dir ratio */ + new_dir_ratio = min( dir_nrg_ratio, dir_ratio_ism ); /* clip with original ISM dir */ + outMeta->directToTotalRatio[0][sf][band] = (uint8_t) floorf( new_dir_ratio * UINT8_MAX ); + new_diff_ratio = 1.0f - new_dir_ratio; + outMeta->diffuseToTotalRatio[sf][band] = (uint8_t) floorf( new_diff_ratio * UINT8_MAX ); + } + else + { + /* use the plain original meta for this tile */ + outMeta->directionIndex[0][sf][band] = inMeta->directionIndex[0][sf][band]; + outMeta->directToTotalRatio[0][sf][band] = inMeta->directToTotalRatio[0][sf][band]; + outMeta->spreadCoherence[0][sf][band] = inMeta->spreadCoherence[0][sf][band]; + + outMeta->surroundCoherence[sf][band] = inMeta->surroundCoherence[sf][band]; + outMeta->diffuseToTotalRatio[sf][band] = inMeta->diffuseToTotalRatio[sf][band]; + } + outMeta->directionIndex[1][sf][band] = SPH_IDX_FRONT; + outMeta->directToTotalRatio[1][sf][band] = 0u; + outMeta->spreadCoherence[1][sf][band] = 0u; + + inEne[sf][band] += inEneISM[sf][band]; /* Update energy for subsequent mergings */ + } + } + + /* Set descriptive meta for mixed format */ + outMeta->descriptiveMeta.sourceFormat = 0u; + outMeta->descriptiveMeta.transportDefinition = 0u; + outMeta->descriptiveMeta.channelAngle = 0u; + outMeta->descriptiveMeta.channelDistance = 0u; + outMeta->descriptiveMeta.channelLayout = 0u; + outMeta->descriptiveMeta.numberOfDirections = 0u; + /* Number of transports should be set outside. */ + + return; +} + +void full_stream_merge( + MASA_DECODER_EXT_OUT_META_HANDLE outMeta, /* o: Merged metadata output */ + MASA_DECODER_EXT_OUT_META_HANDLE inMeta1, /* i: Input metadata 1 */ + float inEne1[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], /* i/o: TF-energy of input 1. after merge, contains the energy of the merged signal */ + MASA_DECODER_EXT_OUT_META_HANDLE inMeta2, /* i: Input metadata 2 */ + float inEne2[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS] /* i: TF-energy of input 2 */ +) +{ + float dir_nrg_1, dir_nrg_2; + uint8_t n_dirs_1, n_dirs_2; + uint8_t sf, band; + + /* full stream select based on total direct energy */ + n_dirs_1 = inMeta1->descriptiveMeta.numberOfDirections + 1u; /* to 1-based */ + n_dirs_2 = inMeta2->descriptiveMeta.numberOfDirections + 1u; + + for ( sf = 0; sf < MAX_PARAM_SPATIAL_SUBFRAMES; sf++ ) + { + for ( band = 0; band < MASA_FREQUENCY_BANDS; band++ ) + { + dir_nrg_1 = (float) ( inMeta1->directToTotalRatio[0][sf][band] ) / UINT8_MAX * inEne1[sf][band]; + dir_nrg_2 = (float) ( inMeta2->directToTotalRatio[0][sf][band] ) / UINT8_MAX * inEne2[sf][band]; + + if ( n_dirs_1 == 2 ) + { + dir_nrg_1 += (float) ( inMeta1->directToTotalRatio[1][sf][band] ) / UINT8_MAX * inEne1[sf][band]; + } + + if ( n_dirs_2 == 2 ) + { + dir_nrg_2 += (float) ( inMeta2->directToTotalRatio[1][sf][band] ) / UINT8_MAX * inEne2[sf][band]; + } + + if ( dir_nrg_1 > dir_nrg_2 ) + { + copy_masa_meta_tile( outMeta, inMeta1, sf, band ); + } + else + { + copy_masa_meta_tile( outMeta, inMeta2, sf, band ); + } + + inEne1[sf][band] += inEne2[sf][band]; /* Update energy for subsequent mergings */ + } + } + + /* Set descriptive meta for mixed format */ + outMeta->descriptiveMeta.sourceFormat = 0u; + outMeta->descriptiveMeta.transportDefinition = 0u; + outMeta->descriptiveMeta.channelAngle = 0u; + outMeta->descriptiveMeta.channelDistance = 0u; + outMeta->descriptiveMeta.channelLayout = 0u; + if ( n_dirs_1 == 2 || n_dirs_2 == 2 ) + { + outMeta->descriptiveMeta.numberOfDirections = 1u; + } + else + { + outMeta->descriptiveMeta.numberOfDirections = 0u; + } + /* Number of transports should be set outside. */ + + return; +} + +void ivas_prerend_merge_masa_metadata( + MASA_DECODER_EXT_OUT_META_HANDLE outMeta, /* o: Merged metadata output */ + MASA_DECODER_EXT_OUT_META_HANDLE inMeta1, /* i: Input metadata 1 */ + IVAS_REND_AudioConfigType inType1, /* i: Type of input 1 */ + float inEne1[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], /* i/o: TF-energy of input 1. after merge, contains the energy of the merged signal */ + MASA_DECODER_EXT_OUT_META_HANDLE inMeta2, /* i: Input metadata 2 */ + IVAS_REND_AudioConfigType inType2, /* i: Type of input 2 */ + float inEne2[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS] /* i: TF-energy of input 2 */ +) +{ + /* mixing ISMs with non-ISM use different merge */ + if ( inType1 == IVAS_REND_AUDIO_CONFIG_TYPE_OBJECT_BASED && inType2 != IVAS_REND_AUDIO_CONFIG_TYPE_OBJECT_BASED && ( inMeta1->descriptiveMeta.numberOfDirections == 0u && inMeta2->descriptiveMeta.numberOfDirections == 0u ) ) + { + /* meta_1 is ISM and both are 1dir */ + diffuse_meta_merge_1x1( outMeta, inMeta2, inEne2, inMeta1, inEne1 ); + } + else if ( inType2 == IVAS_REND_AUDIO_CONFIG_TYPE_OBJECT_BASED && inType1 != IVAS_REND_AUDIO_CONFIG_TYPE_OBJECT_BASED && ( inMeta1->descriptiveMeta.numberOfDirections == 0u && inMeta2->descriptiveMeta.numberOfDirections == 0u ) ) + { + /* meta_2 is ISM and both are 1dir */ + diffuse_meta_merge_1x1( outMeta, inMeta1, inEne1, inMeta2, inEne2 ); + } + else + { + full_stream_merge( outMeta, inMeta1, inEne1, inMeta2, inEne2 ); + } + + return; +} + +ivas_error masaPrerendOpen( + MASA_PREREND_HANDLE *hMasaPrerendPtr, /* o: handle to the opened prerenderer */ + int16_t numTransports, /* i: number of transport channels */ + int32_t input_Fs /* i: signal sampling rate */ +) +{ + MASA_PREREND_HANDLE hMasaPrerend; + int16_t i; + ivas_error error; + + error = IVAS_ERR_OK; + + hMasaPrerend = (MASA_PREREND_HANDLE) malloc( sizeof( MASA_PREREND_DATA ) ); + if ( hMasaPrerend == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for MASA prerenderer\n" ) ); + } + + hMasaPrerend->num_Cldfb_instances = numTransports; + for ( i = 0; i < hMasaPrerend->num_Cldfb_instances; i++ ) + { + if ( ( error = openCldfb( &( hMasaPrerend->cldfbAnaEnc[i] ), CLDFB_ANALYSIS, input_Fs, CLDFB_PROTOTYPE_5_00MS ) ) != IVAS_ERR_OK ) + { + return error; + } + } + for ( ; i < MASA_MAX_TRANSPORT_CHANNELS; i++ ) + { + hMasaPrerend->cldfbAnaEnc[i] = NULL; + } + + if ( ( hMasaPrerend->hMasaOut = (MASA_DECODER_EXT_OUT_META_HANDLE) malloc( sizeof( MASA_DECODER_EXT_OUT_META ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for MASA prerenderer\n" ) ); + } + + if ( ( hMasaPrerend->sph_grid16 = (SPHERICAL_GRID_DATA *) malloc( sizeof( SPHERICAL_GRID_DATA ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for MASA prerenderer\n" ) ); + } + generate_gridEq( hMasaPrerend->sph_grid16 ); + + if ( error == IVAS_ERR_OK ) + { + *hMasaPrerendPtr = hMasaPrerend; + } + + return error; +} + +void masaPrerendClose( + MASA_PREREND_HANDLE *hMasaPrerendPtr /* i/o: prerenderer handle to be closed */ +) +{ + int16_t i; + + if ( hMasaPrerendPtr == NULL || *hMasaPrerendPtr == NULL ) + { + return; + } + + for ( i = 0; i < ( *hMasaPrerendPtr )->num_Cldfb_instances; i++ ) + { + deleteCldfb( &( ( *hMasaPrerendPtr )->cldfbAnaEnc[i] ) ); + } + + free( ( *hMasaPrerendPtr )->hMasaOut ); + ( *hMasaPrerendPtr )->hMasaOut = NULL; + free( ( *hMasaPrerendPtr )->sph_grid16 ); + ( *hMasaPrerendPtr )->sph_grid16 = NULL; + + free( ( *hMasaPrerendPtr ) ); + ( *hMasaPrerendPtr ) = NULL; + + return; +} + +#endif /* MASA_PREREND */ diff --git a/lib_rend/ivas_mcmasa_ana.c b/lib_rend/ivas_mcmasa_ana.c new file mode 100644 index 0000000000000000000000000000000000000000..bc99d810f9bab2f8157f7c44ac291211351c90f8 --- /dev/null +++ b/lib_rend/ivas_mcmasa_ana.c @@ -0,0 +1,1116 @@ +/****************************************************************************************************** + + (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, + Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., + Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, + Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other + contributors to this repository. All Rights Reserved. + + This software is protected by copyright law and by international treaties. + The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, + Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., + Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, + Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other + contributors to this repository retain full ownership rights in their respective contributions in + the software. This notice grants no license of any kind, including but not limited to patent + license, nor is any license granted by implication, estoppel or otherwise. + + Contributors are required to enter into the IVAS codec Public Collaboration agreement before making + contributions. + + This software is provided "AS IS", without any express or implied warranties. The software is in the + development stage. It is intended exclusively for experts who have experience with such software and + solely for the purpose of inspection. All implied warranties of non-infringement, merchantability + and fitness for a particular purpose are hereby disclaimed and excluded. + + Any dispute, controversy or claim arising under or in relation to providing this software shall be + submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in + accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and + the United Nations Convention on Contracts on the International Sales of Goods. + +*******************************************************************************************************/ + +#include +#include +#include +#include "ivas_cnst.h" +#include "options.h" +#include "ivas_prot_rend.h" +#include "ivas_prot.h" +#include "prot.h" +#include "ivas_stat_rend.h" +#include "ivas_rom_com.h" +#ifdef DEBUGGING +#include "debug.h" +#endif +#include "wmc_auto.h" + +#ifdef MASA_PREREND + + +/*------------------------------------------------------------------------- + * Local constants + *------------------------------------------------------------------------*/ + +#define NEAR_HORIZONTAL_PLANE_ELEVATION 17.5f +#define VERTICAL_ENERGY_RATIO_OFFSET 0.15f + + +/*------------------------------------------------------------------------- + * Local function prototypes + *------------------------------------------------------------------------*/ + +/* Structure for covariance matrix */ +typedef struct +{ + float xr[MCMASA_MAX_ANA_CHANS][MCMASA_MAX_ANA_CHANS]; + float xi[MCMASA_MAX_ANA_CHANS][MCMASA_MAX_ANA_CHANS]; +} CovarianceMatrix; + +void ivas_mcmasa_param_est_ana( MCMASA_ANA_HANDLE hMcMasa, float data_f[][L_FRAME48k], float elevation_m_values[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], float azimuth_m_values[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], float energyRatio[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], float spreadCoherence[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], float surroundingCoherence[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], const int16_t input_frame, const int16_t nchan_inp ); + +static void ivas_mcmasa_dmx( MCMASA_ANA_HANDLE hMcMasa, float data_f[][L_FRAME48k], const int16_t input_frame, const int16_t nchan_transport, const int16_t nchan_inp ); + +static void compute_cov_mtx( float sr[MCMASA_MAX_ANA_CHANS][CLDFB_NO_CHANNELS_MAX], float si[MCMASA_MAX_ANA_CHANS][CLDFB_NO_CHANNELS_MAX], const int16_t freq, const int16_t N, CovarianceMatrix *COVls ); + +static void computeVerticalDiffuseness( float **buffer_intensity, const float *buffer_energy, const int16_t num_freq_bands, float *diffuseness ); + +static void computeEvenLayout( const float *ls_azimuth, float *ls_azimuth_even, const int16_t numChannels ); + + +/*--------------------------------------------------------------------------* + * ivas_mcmasa_ana_open() + * + * + *--------------------------------------------------------------------------*/ + +ivas_error ivas_mcmasa_ana_open( + MCMASA_ANA_HANDLE *hMcMasaPtr, /* i/o: McMASA data handle pointer */ + const IVAS_REND_AudioConfig inConfig, /* i: Input config */ + int32_t input_Fs /* i: Sampling frequency */ +) +{ + int16_t i, j; + MCMASA_ANA_HANDLE hMcMasa; + float ls_azimuth[MCMASA_MAX_ANA_CHANS]; + float ls_elevation[MCMASA_MAX_ANA_CHANS]; + float ls_azimuth_even[MCMASA_MAX_ANA_CHANS]; + int16_t nchan_inp; + int16_t numAnalysisChannels; + float left_min, right_min, azi_diff; + int16_t maxBin, input_frame; + ivas_error error; + + error = IVAS_ERR_OK; + + if ( ( hMcMasa = (MCMASA_ANA_HANDLE) malloc( sizeof( MCMASA_ANA_DATA ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for McMasa\n" ) ); + } + + if ( inConfig == IVAS_REND_AUDIO_CONFIG_5_1 ) + { + nchan_inp = 6; + mvr2r( ls_azimuth_CICP6, ls_azimuth, nchan_inp - 1 ); + mvr2r( ls_elevation_CICP6, ls_elevation, nchan_inp - 1 ); + hMcMasa->numHorizontalChannels = 5; + hMcMasa->isHorizontalSetup = 1; + } + else if ( inConfig == IVAS_REND_AUDIO_CONFIG_7_1 ) + { + nchan_inp = 8; + mvr2r( ls_azimuth_CICP12, ls_azimuth, nchan_inp - 1 ); + mvr2r( ls_elevation_CICP12, ls_elevation, nchan_inp - 1 ); + hMcMasa->numHorizontalChannels = 7; + hMcMasa->isHorizontalSetup = 1; + } + else if ( inConfig == IVAS_REND_AUDIO_CONFIG_5_1_2 ) + { + nchan_inp = 8; + mvr2r( ls_azimuth_CICP14, ls_azimuth, nchan_inp - 1 ); + mvr2r( ls_elevation_CICP14, ls_elevation, nchan_inp - 1 ); + hMcMasa->numHorizontalChannels = 5; + hMcMasa->isHorizontalSetup = 0; + } + else if ( inConfig == IVAS_REND_AUDIO_CONFIG_5_1_4 ) + { + nchan_inp = 10; + mvr2r( ls_azimuth_CICP16, ls_azimuth, nchan_inp - 1 ); + mvr2r( ls_elevation_CICP16, ls_elevation, nchan_inp - 1 ); + hMcMasa->numHorizontalChannels = 5; + hMcMasa->isHorizontalSetup = 0; + } + else + { + nchan_inp = 12; + mvr2r( ls_azimuth_CICP19, ls_azimuth, nchan_inp - 1 ); + mvr2r( ls_elevation_CICP19, ls_elevation, nchan_inp - 1 ); + hMcMasa->numHorizontalChannels = 7; + hMcMasa->isHorizontalSetup = 0; + } + + numAnalysisChannels = nchan_inp - 1; + + /* Determine the number of bands */ + hMcMasa->nbands = MASA_FREQUENCY_BANDS; + + /* Determine band grouping */ + mvs2s( MASA_band_grouping_24, hMcMasa->band_grouping, 24 + 1 ); + + maxBin = (int16_t) ( input_Fs * INV_CLDFB_BANDWIDTH + 0.5f ); + for ( i = 1; i < hMcMasa->nbands + 1; i++ ) + { + if ( hMcMasa->band_grouping[i] >= maxBin ) + { + hMcMasa->band_grouping[i] = maxBin; + hMcMasa->nbands = i; + break; + } + } + + /* Determine block grouping */ + mvs2s( DirAC_block_grouping, hMcMasa->block_grouping, MAX_PARAM_SPATIAL_SUBFRAMES + 1 ); + + /* open/initialize CLDFB */ + hMcMasa->num_Cldfb_instances = numAnalysisChannels; + for ( i = 0; i < hMcMasa->num_Cldfb_instances; i++ ) + { + openCldfb( &( hMcMasa->cldfbAnaEnc[i] ), CLDFB_ANALYSIS, input_Fs, CLDFB_PROTOTYPE_5_00MS ); + } + + /* intensity 3-dim */ + for ( i = 0; i < DIRAC_NUM_DIMS; i++ ) + { + hMcMasa->direction_vector_m[i] = (float **) malloc( MAX_PARAM_SPATIAL_SUBFRAMES * sizeof( float * ) ); + + for ( j = 0; j < MAX_PARAM_SPATIAL_SUBFRAMES; j++ ) + { + hMcMasa->direction_vector_m[i][j] = (float *) malloc( MASA_FREQUENCY_BANDS * sizeof( float ) ); + set_zero( hMcMasa->direction_vector_m[i][j], MASA_FREQUENCY_BANDS ); + } + } + + for ( i = 0; i < DIRAC_NUM_DIMS; i++ ) + { + for ( j = 0; j < DIRAC_NO_COL_AVG_DIFF; j++ ) + { + hMcMasa->buffer_intensity_real[i][j] = (float *) malloc( MASA_FREQUENCY_BANDS * sizeof( float ) ); + set_zero( hMcMasa->buffer_intensity_real[i][j], MASA_FREQUENCY_BANDS ); + } + } + + for ( j = 0; j < DIRAC_NO_COL_AVG_DIFF; j++ ) + { + hMcMasa->buffer_intensity_real_vert[j] = (float *) malloc( MASA_FREQUENCY_BANDS * sizeof( float ) ); + set_zero( hMcMasa->buffer_intensity_real_vert[j], MASA_FREQUENCY_BANDS ); + } + + set_zero( hMcMasa->buffer_energy, DIRAC_NO_COL_AVG_DIFF * MASA_FREQUENCY_BANDS ); + + computeEvenLayout( ls_azimuth, ls_azimuth_even, hMcMasa->numHorizontalChannels ); + if ( !hMcMasa->isHorizontalSetup ) + { + computeEvenLayout( &ls_azimuth[hMcMasa->numHorizontalChannels], &ls_azimuth_even[hMcMasa->numHorizontalChannels], numAnalysisChannels - hMcMasa->numHorizontalChannels ); + } + + for ( i = 0; i < numAnalysisChannels; i++ ) + { + hMcMasa->chnlToFoaMtx[0][i] = 1.0f; + hMcMasa->chnlToFoaMtx[1][i] = sinf( ls_azimuth[i] * PI_OVER_180 ) * cosf( ls_elevation[i] * PI_OVER_180 ); + hMcMasa->chnlToFoaMtx[2][i] = sinf( ls_elevation[i] * PI_OVER_180 ); + hMcMasa->chnlToFoaMtx[3][i] = cosf( ls_azimuth[i] * PI_OVER_180 ) * cosf( ls_elevation[i] * PI_OVER_180 ); + + hMcMasa->chnlToFoaEvenMtx[0][i] = 1.0f; + hMcMasa->chnlToFoaEvenMtx[1][i] = sinf( ls_azimuth_even[i] * PI_OVER_180 ); + hMcMasa->chnlToFoaEvenMtx[2][i] = 0.0f; + hMcMasa->chnlToFoaEvenMtx[3][i] = cosf( ls_azimuth_even[i] * PI_OVER_180 ); + } + + mvr2r( ls_azimuth, hMcMasa->ls_azimuth, numAnalysisChannels ); + + for ( i = 0; i < hMcMasa->numHorizontalChannels; i++ ) + { + left_min = 360.0f; + right_min = -360.0f; + + for ( j = 0; j < hMcMasa->numHorizontalChannels; j++ ) + { + azi_diff = ls_azimuth[j] - ls_azimuth[i]; + + if ( azi_diff > 180.0f ) + { + azi_diff -= 360.0f; + } + else if ( azi_diff < -180.0f ) + { + azi_diff += 360.0f; + } + + if ( azi_diff < left_min && azi_diff > 0.0f ) + { + hMcMasa->leftNearest[i] = j; + left_min = azi_diff; + } + + if ( azi_diff > right_min && azi_diff < 0.0f ) + { + hMcMasa->rightNearest[i] = j; + right_min = azi_diff; + } + } + } + + hMcMasa->prevMultiChEne = 0.0f; + hMcMasa->prevDownmixEne = 0.0f; + hMcMasa->prevEQ = 1.0f; + input_frame = (int16_t) ( input_Fs / FRAMES_PER_SEC ); + for ( i = 0; i < input_frame; i++ ) + { + hMcMasa->interpolator[i] = ( (float) i ) / ( (float) input_frame ); + } + + hMcMasa->index_buffer_intensity = 0; + + if ( ( hMcMasa->hMasaOut = (MASA_DECODER_EXT_OUT_META_HANDLE) malloc( sizeof( MASA_DECODER_EXT_OUT_META ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for MASA decoder\n" ) ); + } + + if ( ( hMcMasa->sph_grid16 = (SPHERICAL_GRID_DATA *) malloc( sizeof( SPHERICAL_GRID_DATA ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for MASA decoder\n" ) ); + } + generate_gridEq( hMcMasa->sph_grid16 ); + + ( *hMcMasaPtr ) = hMcMasa; + + return error; +} + + +/*--------------------------------------------------------------------------* + * ivas_mcmasa_ana_close() + * + * + *--------------------------------------------------------------------------*/ + +void ivas_mcmasa_ana_close( + MCMASA_ANA_HANDLE *hMcMasa /* i/o: analysis McMASA handle */ +) +{ + int16_t i, j; + + if ( hMcMasa == NULL || *hMcMasa == NULL ) + { + return; + } + + for ( i = 0; i < ( *hMcMasa )->num_Cldfb_instances; i++ ) + { + deleteCldfb( &( ( *hMcMasa )->cldfbAnaEnc[i] ) ); + } + + /* intensity 3-dim */ + for ( i = 0; i < DIRAC_NUM_DIMS; i++ ) + { + for ( j = 0; j < MAX_PARAM_SPATIAL_SUBFRAMES; j++ ) + { + free( ( *hMcMasa )->direction_vector_m[i][j] ); + ( *hMcMasa )->direction_vector_m[i][j] = NULL; + } + + for ( j = 0; j < DIRAC_NO_COL_AVG_DIFF; j++ ) + { + free( ( *hMcMasa )->buffer_intensity_real[i][j] ); + ( *hMcMasa )->buffer_intensity_real[i][j] = NULL; + } + + free( ( *hMcMasa )->direction_vector_m[i] ); + ( *hMcMasa )->direction_vector_m[i] = NULL; + } + + for ( j = 0; j < DIRAC_NO_COL_AVG_DIFF; j++ ) + { + free( ( *hMcMasa )->buffer_intensity_real_vert[j] ); + ( *hMcMasa )->buffer_intensity_real_vert[j] = NULL; + } + + free( ( *hMcMasa )->hMasaOut ); + ( *hMcMasa )->hMasaOut = NULL; + free( ( *hMcMasa )->sph_grid16 ); + ( *hMcMasa )->sph_grid16 = NULL; + + free( ( *hMcMasa ) ); + ( *hMcMasa ) = NULL; + + return; +} + + +/*--------------------------------------------------------------------------* + * ivas_mcmasa_ana() + * + * Multichannel MASA analysis + *--------------------------------------------------------------------------*/ + +void ivas_mcmasa_ana( + MCMASA_ANA_HANDLE hMcMasa, /* i/o: McMASA encoder handle */ + float data_f[][L_FRAME48k], /* i/o: Input / transport audio signals */ + const int16_t input_frame, /* i : Input frame size */ + const int16_t nchan_transport, /* i : Number of transport channels */ + const int16_t nchan_inp /* i : Number of input channels */ +) +{ + int16_t i; + float elevation_m_values[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS]; + float azimuth_m_values[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS]; + float energyRatio[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS]; + float spreadCoherence[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS]; + float surroundingCoherence[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS]; + + + /* Sum center and LFE, move surround channels */ + v_add( data_f[2], data_f[3], data_f[2], input_frame ); + for ( i = 4; i < nchan_inp; i++ ) + { + mvr2r( data_f[i], data_f[i - 1], input_frame ); + } + + /* Analysis */ + ivas_mcmasa_param_est_ana( hMcMasa, data_f, elevation_m_values, azimuth_m_values, energyRatio, spreadCoherence, surroundingCoherence, input_frame, nchan_inp ); + + /* Create MASA metadata buffer from the estimated values */ + ivas_create_masa_out_meta( hMcMasa->hMasaOut, hMcMasa->sph_grid16, nchan_transport, elevation_m_values, azimuth_m_values, energyRatio, spreadCoherence, surroundingCoherence ); + + /* Downmix */ + ivas_mcmasa_dmx( hMcMasa, data_f, input_frame, nchan_transport, nchan_inp ); + + return; +} + + +/*--------------------------------------------------------------------------* + * Local functions + *--------------------------------------------------------------------------*/ + +/* Estimate metadata parameters for McMASA */ +void ivas_mcmasa_param_est_ana( + MCMASA_ANA_HANDLE hMcMasa, /* i : McMASA analyzer structure */ + float data_f[][L_FRAME48k], /* i : Audio frame in MC-format */ + float elevation_m_values[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], /* o : Estimated elevation */ + float azimuth_m_values[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], /* o : Estimated azimuth */ + float energyRatio[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], /* o : Estimated direct-to-total ratio */ + float spreadCoherence[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], /* o : Estimated spread coherence */ + float surroundingCoherence[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], /* o : Estimated surround coherence */ + const int16_t input_frame, /* i : Input frame size */ + const int16_t nchan_inp /* i : Number of input channels */ +) +{ + float reference_power[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX]; + int16_t ts, i, j, d; + int16_t num_freq_bins, num_freq_bands, index; + float dir_v[DIRAC_NUM_DIMS]; + int16_t l_ts; + float Chnl_RealBuffer[MCMASA_MAX_ANA_CHANS][CLDFB_NO_CHANNELS_MAX]; + float Chnl_ImagBuffer[MCMASA_MAX_ANA_CHANS][CLDFB_NO_CHANNELS_MAX]; + float Foa_RealBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX]; + float Foa_ImagBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX]; + float FoaEven_RealBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX]; + float FoaEven_ImagBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX]; + float intensity_real[DIRAC_NUM_DIMS][MASA_FREQUENCY_BANDS]; + float intensity_even_real[DIRAC_NUM_DIMS][MASA_FREQUENCY_BANDS]; + float direction_vector[DIRAC_NUM_DIMS][MASA_FREQUENCY_BANDS]; + float diffuseness_vector[MASA_FREQUENCY_BANDS]; + float vertical_diffuseness_vector[MASA_FREQUENCY_BANDS]; + float diffuseness_m[MASA_FREQUENCY_BANDS]; + float coherentEnergyRatio[MASA_FREQUENCY_BANDS]; + int16_t band_m_idx, block_m_idx; + float renormalization_factor_diff[MASA_FREQUENCY_BANDS]; + float norm_tmp; + int16_t mrange[2], brange[2]; + CovarianceMatrix COVls[MASA_FREQUENCY_BANDS]; + float absCOVls[MCMASA_MAX_ANA_CHANS][MCMASA_MAX_ANA_CHANS]; + float lsEnergy[MCMASA_MAX_ANA_CHANS]; + float lsEnergySum, maxEne; + int16_t loudestCh; + float surrCoh, tempCoh, tempCoh2; + int16_t i1, i2, i3; + float angleDist, minAngleDist; + float currentAzi; + float lsEnergyRelation; + float tempLsEnergyRelation; + float stereoness, cohwideness, spreadCoh; + float stereoRatio, cohPanRatio; + float stereoCoh, cohPanCoh, cohRatio; + int16_t numAnalysisChannels; + + num_freq_bins = hMcMasa->cldfbAnaEnc[0]->no_channels; + num_freq_bands = hMcMasa->nbands; + l_ts = input_frame / CLDFB_NO_COL_MAX; + numAnalysisChannels = nchan_inp - 1; + + /* do processing over all CLDFB time slots */ + for ( block_m_idx = 0; block_m_idx < MAX_PARAM_SPATIAL_SUBFRAMES; block_m_idx++ ) + { + mrange[0] = hMcMasa->block_grouping[block_m_idx]; + mrange[1] = hMcMasa->block_grouping[block_m_idx + 1]; + + for ( band_m_idx = 0; band_m_idx < hMcMasa->nbands; band_m_idx++ ) + { + hMcMasa->direction_vector_m[0][block_m_idx][band_m_idx] = 0; + hMcMasa->direction_vector_m[1][block_m_idx][band_m_idx] = 0; + hMcMasa->direction_vector_m[2][block_m_idx][band_m_idx] = 0; + } + + /* Need to initialize renormalization_factors, and variables to be normalized */ + set_zero( renormalization_factor_diff, hMcMasa->nbands ); + set_zero( diffuseness_m, hMcMasa->nbands ); + set_zero( hMcMasa->energy[block_m_idx], MASA_FREQUENCY_BANDS ); + + /* Reset variable */ + for ( i = 0; i < hMcMasa->nbands; i++ ) + { + for ( j = 0; j < numAnalysisChannels; j++ ) + { + set_zero( COVls[i].xr[j], numAnalysisChannels ); + set_zero( COVls[i].xi[j], numAnalysisChannels ); + } + } + + for ( ts = mrange[0]; ts < mrange[1]; ts++ ) + { + for ( i = 0; i < numAnalysisChannels; i++ ) + { + cldfbAnalysis_ts( &( data_f[i][l_ts * ts] ), Chnl_RealBuffer[i], Chnl_ImagBuffer[i], l_ts, hMcMasa->cldfbAnaEnc[i] ); + } + + /* Compute channel-based energy for metadata processing */ + for ( band_m_idx = 0; band_m_idx < num_freq_bands; band_m_idx++ ) + { + brange[0] = hMcMasa->band_grouping[band_m_idx]; + brange[1] = hMcMasa->band_grouping[band_m_idx + 1]; + for ( j = brange[0]; j < brange[1]; j++ ) + { + for ( i = 0; i < numAnalysisChannels; i++ ) + { + hMcMasa->energy[block_m_idx][band_m_idx] += Chnl_RealBuffer[i][j] * Chnl_RealBuffer[i][j] + Chnl_ImagBuffer[i][j] * Chnl_ImagBuffer[i][j]; + } + } + } + + /* Compute covariance matrix */ + for ( i = 0; i < num_freq_bands; i++ ) + { + brange[0] = hMcMasa->band_grouping[i]; + brange[1] = hMcMasa->band_grouping[i + 1]; + for ( j = brange[0]; j < brange[1]; j++ ) + { + compute_cov_mtx( Chnl_RealBuffer, Chnl_ImagBuffer, j, numAnalysisChannels, &( COVls[i] ) ); + } + } + + /* Compute standard FOA */ + /* W */ + v_add( Chnl_RealBuffer[0], Chnl_RealBuffer[1], Foa_RealBuffer[0], num_freq_bins ); + v_add( Chnl_ImagBuffer[0], Chnl_ImagBuffer[1], Foa_ImagBuffer[0], num_freq_bins ); + for ( i = 2; i < numAnalysisChannels; i++ ) + { + v_add( Chnl_RealBuffer[i], Foa_RealBuffer[0], Foa_RealBuffer[0], num_freq_bins ); + v_add( Chnl_ImagBuffer[i], Foa_ImagBuffer[0], Foa_ImagBuffer[0], num_freq_bins ); + } + + /* Y */ + v_multc( Chnl_RealBuffer[0], hMcMasa->chnlToFoaMtx[1][0], Foa_RealBuffer[1], num_freq_bins ); + v_multc( Chnl_ImagBuffer[0], hMcMasa->chnlToFoaMtx[1][0], Foa_ImagBuffer[1], num_freq_bins ); + for ( i = 1; i < numAnalysisChannels; i++ ) + { + v_multc_acc( Chnl_RealBuffer[i], hMcMasa->chnlToFoaMtx[1][i], Foa_RealBuffer[1], num_freq_bins ); + v_multc_acc( Chnl_ImagBuffer[i], hMcMasa->chnlToFoaMtx[1][i], Foa_ImagBuffer[1], num_freq_bins ); + } + + /* Z */ + if ( hMcMasa->isHorizontalSetup ) + { + /* Set zero for horizontal setups */ + set_zero( Foa_RealBuffer[2], num_freq_bins ); + set_zero( Foa_ImagBuffer[2], num_freq_bins ); + } + else + { + v_multc( Chnl_RealBuffer[0], hMcMasa->chnlToFoaMtx[2][0], Foa_RealBuffer[2], num_freq_bins ); + v_multc( Chnl_ImagBuffer[0], hMcMasa->chnlToFoaMtx[2][0], Foa_ImagBuffer[2], num_freq_bins ); + for ( i = 1; i < numAnalysisChannels; i++ ) + { + v_multc_acc( Chnl_RealBuffer[i], hMcMasa->chnlToFoaMtx[2][i], Foa_RealBuffer[2], num_freq_bins ); + v_multc_acc( Chnl_ImagBuffer[i], hMcMasa->chnlToFoaMtx[2][i], Foa_ImagBuffer[2], num_freq_bins ); + } + } + + /* X */ + v_multc( Chnl_RealBuffer[0], hMcMasa->chnlToFoaMtx[3][0], Foa_RealBuffer[3], num_freq_bins ); + v_multc( Chnl_ImagBuffer[0], hMcMasa->chnlToFoaMtx[3][0], Foa_ImagBuffer[3], num_freq_bins ); + for ( i = 1; i < numAnalysisChannels; i++ ) + { + v_multc_acc( Chnl_RealBuffer[i], hMcMasa->chnlToFoaMtx[3][i], Foa_RealBuffer[3], num_freq_bins ); + v_multc_acc( Chnl_ImagBuffer[i], hMcMasa->chnlToFoaMtx[3][i], Foa_ImagBuffer[3], num_freq_bins ); + } + + /* Compute even FOA */ + /* W */ + mvr2r( Foa_RealBuffer[0], FoaEven_RealBuffer[0], num_freq_bins ); + mvr2r( Foa_ImagBuffer[0], FoaEven_ImagBuffer[0], num_freq_bins ); + + /* Y */ + v_multc( Chnl_RealBuffer[0], hMcMasa->chnlToFoaEvenMtx[1][0], FoaEven_RealBuffer[1], num_freq_bins ); + v_multc( Chnl_ImagBuffer[0], hMcMasa->chnlToFoaEvenMtx[1][0], FoaEven_ImagBuffer[1], num_freq_bins ); + for ( i = 1; i < numAnalysisChannels; i++ ) + { + v_multc_acc( Chnl_RealBuffer[i], hMcMasa->chnlToFoaEvenMtx[1][i], FoaEven_RealBuffer[1], num_freq_bins ); + v_multc_acc( Chnl_ImagBuffer[i], hMcMasa->chnlToFoaEvenMtx[1][i], FoaEven_ImagBuffer[1], num_freq_bins ); + } + + /* Z (even setups are handled as horizontal) */ + set_zero( FoaEven_RealBuffer[2], num_freq_bins ); + set_zero( FoaEven_ImagBuffer[2], num_freq_bins ); + + /* X */ + v_multc( Chnl_RealBuffer[0], hMcMasa->chnlToFoaEvenMtx[3][0], FoaEven_RealBuffer[3], num_freq_bins ); + v_multc( Chnl_ImagBuffer[0], hMcMasa->chnlToFoaEvenMtx[3][0], FoaEven_ImagBuffer[3], num_freq_bins ); + for ( i = 1; i < numAnalysisChannels; i++ ) + { + v_multc_acc( Chnl_RealBuffer[i], hMcMasa->chnlToFoaEvenMtx[3][i], FoaEven_RealBuffer[3], num_freq_bins ); + v_multc_acc( Chnl_ImagBuffer[i], hMcMasa->chnlToFoaEvenMtx[3][i], FoaEven_ImagBuffer[3], num_freq_bins ); + } + + /* Direction estimation */ + computeIntensityVector_ana( hMcMasa->band_grouping, Foa_RealBuffer, Foa_ImagBuffer, num_freq_bands, intensity_real ); + computeDirectionVectors( intensity_real[0], intensity_real[1], intensity_real[2], 0, num_freq_bands, direction_vector[0], direction_vector[1], direction_vector[2] ); + + /* Power and intensity estimation for diffuseness */ + computeIntensityVector_ana( hMcMasa->band_grouping, FoaEven_RealBuffer, FoaEven_ImagBuffer, num_freq_bands, intensity_even_real ); + computeReferencePower_ana( hMcMasa->band_grouping, FoaEven_RealBuffer, FoaEven_ImagBuffer, reference_power[ts], num_freq_bands ); + + /* Fill buffers of length "averaging_length" time slots for intensity and energy */ + hMcMasa->index_buffer_intensity = ( hMcMasa->index_buffer_intensity % DIRAC_NO_COL_AVG_DIFF ) + 1; /* averaging_length = 32 */ + index = hMcMasa->index_buffer_intensity; + for ( i = 0; i < DIRAC_NUM_DIMS; i++ ) + { + /* only real part needed */ + mvr2r( intensity_even_real[i], &( hMcMasa->buffer_intensity_real[i][index - 1][0] ), num_freq_bands ); + } + mvr2r( reference_power[ts], &( hMcMasa->buffer_energy[( index - 1 ) * num_freq_bands] ), num_freq_bands ); + + computeDiffuseness( hMcMasa->buffer_intensity_real, hMcMasa->buffer_energy, num_freq_bands, diffuseness_vector ); + + /* Compute vertical diffuseness, and tune original diffuseness if needed */ + if ( !hMcMasa->isHorizontalSetup ) + { + mvr2r( intensity_real[2], &( hMcMasa->buffer_intensity_real_vert[index - 1][0] ), num_freq_bands ); + computeVerticalDiffuseness( hMcMasa->buffer_intensity_real_vert, hMcMasa->buffer_energy, num_freq_bands, vertical_diffuseness_vector ); + v_min( diffuseness_vector, vertical_diffuseness_vector, diffuseness_vector, num_freq_bands ); + } + + for ( band_m_idx = 0; band_m_idx < hMcMasa->nbands; band_m_idx++ ) + { + norm_tmp = reference_power[ts][band_m_idx] * ( 1 - diffuseness_vector[band_m_idx] ); + + hMcMasa->direction_vector_m[0][block_m_idx][band_m_idx] += norm_tmp * direction_vector[0][band_m_idx]; + hMcMasa->direction_vector_m[1][block_m_idx][band_m_idx] += norm_tmp * direction_vector[1][band_m_idx]; + hMcMasa->direction_vector_m[2][block_m_idx][band_m_idx] += norm_tmp * direction_vector[2][band_m_idx]; + + diffuseness_m[band_m_idx] += reference_power[ts][band_m_idx] * diffuseness_vector[band_m_idx]; + renormalization_factor_diff[band_m_idx] += reference_power[ts][band_m_idx]; + } + } + + for ( band_m_idx = 0; band_m_idx < hMcMasa->nbands; band_m_idx++ ) + { + for ( d = 0; d < DIRAC_NUM_DIMS; d++ ) + { + dir_v[d] = hMcMasa->direction_vector_m[d][block_m_idx][band_m_idx]; + } + ivas_qmetadata_direction_vector_to_azimuth_elevation( dir_v, &azimuth_m_values[block_m_idx][band_m_idx], &elevation_m_values[block_m_idx][band_m_idx] ); + } + + /* Coherence processing */ + for ( band_m_idx = 0; band_m_idx < hMcMasa->nbands; band_m_idx++ ) + { + /* Compute absolute values */ + for ( i = 0; i < numAnalysisChannels; i++ ) + { + for ( j = i; j < numAnalysisChannels; j++ ) + { + absCOVls[i][j] = sqrtf( ( COVls[band_m_idx].xr[i][j] * COVls[band_m_idx].xr[i][j] + COVls[band_m_idx].xi[i][j] * COVls[band_m_idx].xi[i][j] ) ); + } + lsEnergy[i] = absCOVls[i][i]; + } + + /* Find loudest channel */ + maxEne = lsEnergy[0]; + loudestCh = 0; + for ( i = 1; i < numAnalysisChannels; i++ ) + { + if ( lsEnergy[i] > maxEne ) + { + maxEne = lsEnergy[i]; + loudestCh = i; + } + } + + /* Compute surrounding coherence */ + surrCoh = 1.0f; + for ( i = 0; i < numAnalysisChannels; i++ ) + { + if ( i != loudestCh ) + { + if ( i < loudestCh ) + { + i1 = i; + i2 = loudestCh; + } + else + { + i1 = loudestCh; + i2 = i; + } + tempCoh = absCOVls[i1][i2] / ( sqrtf( ( lsEnergy[i1] * lsEnergy[i2] + EPSILON ) ) ); + surrCoh = ( surrCoh < tempCoh ) ? surrCoh : tempCoh; + } + } + surrCoh = surrCoh * surrCoh; + surrCoh = ( surrCoh < 1.0f ) ? surrCoh : 1.0f; + surrCoh = ( surrCoh > 0.0f ) ? surrCoh : 0.0f; + + /* Compute spread coherence */ + if ( elevation_m_values[block_m_idx][band_m_idx] < NEAR_HORIZONTAL_PLANE_ELEVATION ) /* Computed only near horizontal plane */ + { + minAngleDist = 180.0f; + i1 = 0; + currentAzi = azimuth_m_values[block_m_idx][band_m_idx]; + for ( i = 0; i < hMcMasa->numHorizontalChannels; i++ ) + { + angleDist = fabsf( currentAzi - hMcMasa->ls_azimuth[i] ); + if ( angleDist > 180.0f ) + { + angleDist = fabsf( angleDist - 360.0f ); + } + if ( angleDist < minAngleDist ) + { + minAngleDist = angleDist; + i1 = i; + } + } + i2 = hMcMasa->leftNearest[i1]; + i3 = hMcMasa->rightNearest[i1]; + + if ( i2 < i3 ) + { + stereoCoh = absCOVls[i2][i3] / ( sqrtf( lsEnergy[i2] * lsEnergy[i3] + EPSILON ) ); + } + else + { + stereoCoh = absCOVls[i3][i2] / ( sqrtf( lsEnergy[i2] * lsEnergy[i3] + EPSILON ) ); + } + lsEnergyRelation = ( lsEnergy[i2] + lsEnergy[i3] ) / ( lsEnergy[i1] + lsEnergy[i2] + lsEnergy[i3] + EPSILON ); + stereoness = stereoCoh * lsEnergyRelation; + + if ( i1 < i2 ) + { + tempCoh = absCOVls[i1][i2] / ( sqrtf( lsEnergy[i1] * lsEnergy[i2] + EPSILON ) ); + } + else + { + tempCoh = absCOVls[i2][i1] / ( sqrtf( lsEnergy[i1] * lsEnergy[i2] + EPSILON ) ); + } + if ( i1 < i3 ) + { + tempCoh2 = absCOVls[i1][i3] / ( sqrtf( lsEnergy[i1] * lsEnergy[i3] + EPSILON ) ); + } + else + { + tempCoh2 = absCOVls[i3][i1] / ( sqrtf( lsEnergy[i1] * lsEnergy[i3] + EPSILON ) ); + } + cohPanCoh = ( tempCoh < tempCoh2 ) ? tempCoh : tempCoh2; + lsEnergyRelation = lsEnergy[i2] / ( lsEnergy[i1] + EPSILON ); + tempLsEnergyRelation = lsEnergy[i1] / ( lsEnergy[i2] + EPSILON ); + lsEnergyRelation = ( lsEnergyRelation < tempLsEnergyRelation ) ? lsEnergyRelation : tempLsEnergyRelation; + tempLsEnergyRelation = lsEnergy[i3] / ( lsEnergy[i1] + EPSILON ); + lsEnergyRelation = ( lsEnergyRelation < tempLsEnergyRelation ) ? lsEnergyRelation : tempLsEnergyRelation; + tempLsEnergyRelation = lsEnergy[i1] / ( lsEnergy[i3] + EPSILON ); + lsEnergyRelation = ( lsEnergyRelation < tempLsEnergyRelation ) ? lsEnergyRelation : tempLsEnergyRelation; + cohwideness = cohPanCoh * lsEnergyRelation; + + spreadCoh = ( cohwideness > stereoness ) ? cohwideness : stereoness; + if ( spreadCoh > 0.5f ) + { + if ( cohwideness > stereoness ) + { + tempCoh = stereoness - ( cohwideness - 0.5f ); + spreadCoh = ( tempCoh > 0.5f ) ? tempCoh : 0.5f; + } + } + spreadCoh = ( spreadCoh < 1.0f ) ? spreadCoh : 1.0f; + spreadCoh = ( spreadCoh > 0.0f ) ? spreadCoh : 0.0f; + + /* Compute energy ratio tuning parameter */ + lsEnergySum = sum_f( lsEnergy, numAnalysisChannels ) + EPSILON; + lsEnergyRelation = ( lsEnergy[i2] + lsEnergy[i3] ) / lsEnergySum; + stereoRatio = stereoCoh * lsEnergyRelation - surrCoh; + + lsEnergyRelation = ( lsEnergy[i1] + lsEnergy[i2] + lsEnergy[i3] ) / lsEnergySum; + cohPanRatio = cohPanCoh * lsEnergyRelation - surrCoh; + + cohRatio = ( stereoRatio > cohPanRatio ) ? stereoRatio : cohPanRatio; + cohRatio = ( cohRatio < 1.0f ) ? cohRatio : 1.0f; + cohRatio = ( cohRatio > 0.0f ) ? cohRatio : 0.0f; + } + else /* Otherwise, set spread coherence to zero */ + { + spreadCoh = 0.0f; + cohRatio = 0.0f; + lsEnergySum = sum_f( lsEnergy, numAnalysisChannels ); + } + + /* Store values */ + spreadCoherence[block_m_idx][band_m_idx] = spreadCoh; + surroundingCoherence[block_m_idx][band_m_idx] = surrCoh; + coherentEnergyRatio[band_m_idx] = cohRatio; + } + + /* Determine energy ratios */ + for ( band_m_idx = 0; band_m_idx < hMcMasa->nbands; band_m_idx++ ) + { + if ( renormalization_factor_diff[band_m_idx] > EPSILON ) + { + diffuseness_m[band_m_idx] /= renormalization_factor_diff[band_m_idx]; + } + else + { + diffuseness_m[band_m_idx] = 0.0f; + } + + energyRatio[block_m_idx][band_m_idx] = 1.0f - diffuseness_m[band_m_idx]; + energyRatio[block_m_idx][band_m_idx] = ( energyRatio[block_m_idx][band_m_idx] > coherentEnergyRatio[band_m_idx] ) ? energyRatio[block_m_idx][band_m_idx] : coherentEnergyRatio[band_m_idx]; + } + } + + return; +} + + +/* Compute downmix */ +static void ivas_mcmasa_dmx( + MCMASA_ANA_HANDLE hMcMasa, + float data_f[][L_FRAME48k], + const int16_t input_frame, + const int16_t nchan_transport, + const int16_t nchan_inp ) +{ + int16_t i, j; + int16_t numAnalysisChannels; + float dmx_c; + float multiChEne, downmixEne; + float prevEQ, currEQ, instEQ; + float alpha; + + numAnalysisChannels = nchan_inp - 1; + + multiChEne = 0.0f; + for ( j = 0; j < numAnalysisChannels; j++ ) + { + for ( i = 0; i < input_frame; i++ ) + { + multiChEne += data_f[j][i] * data_f[j][i]; + } + } + + if ( nchan_transport == 2 ) + { + int16_t numSideChannels; /* Channels other than left, right, center */ + int16_t leftIndex, rightIndex; + + numSideChannels = numAnalysisChannels / 2 - 1; + for ( j = 0; j < numSideChannels; j++ ) + { + leftIndex = j * 2 + 3; + rightIndex = j * 2 + 4; + + for ( i = 0; i < input_frame; i++ ) + { + data_f[0][i] += data_f[leftIndex][i]; + data_f[1][i] += data_f[rightIndex][i]; + } + } + + for ( i = 0; i < input_frame; i++ ) + { + dmx_c = INV_SQRT2 * data_f[2][i]; + data_f[0][i] += dmx_c; + data_f[1][i] += dmx_c; + } + } + else if ( nchan_transport == 1 ) + { + for ( i = 0; i < input_frame; i++ ) + { + for ( j = 1; j < numAnalysisChannels; j++ ) + { + data_f[0][i] += data_f[j][i]; + } + } + } + + downmixEne = 0.0f; + for ( j = 0; j < nchan_transport; j++ ) + { + for ( i = 0; i < input_frame; i++ ) + { + downmixEne += data_f[j][i] * data_f[j][i]; + } + } + + alpha = 0.1f; + hMcMasa->prevMultiChEne = alpha * multiChEne + ( 1.0f - alpha ) * hMcMasa->prevMultiChEne; + hMcMasa->prevDownmixEne = alpha * downmixEne + ( 1.0f - alpha ) * hMcMasa->prevDownmixEne; + + prevEQ = hMcMasa->prevEQ; + currEQ = sqrtf( hMcMasa->prevMultiChEne / ( hMcMasa->prevDownmixEne + EPSILON ) ); + hMcMasa->prevEQ = currEQ; + + for ( i = 0; i < input_frame; i++ ) + { + instEQ = hMcMasa->interpolator[i] * currEQ + ( 1.0f - hMcMasa->interpolator[i] ) * prevEQ; + for ( j = 0; j < nchan_transport; j++ ) + { + data_f[j][i] *= instEQ; + } + } + + return; +} + + +/* Compute covariance matrix, i.e., xT * conj(x), and accumulate to the output */ +static void compute_cov_mtx( + float sr[MCMASA_MAX_ANA_CHANS][CLDFB_NO_CHANNELS_MAX], /* i : Input matrix, real, s[ch][freq] */ + float si[MCMASA_MAX_ANA_CHANS][CLDFB_NO_CHANNELS_MAX], /* i : Input matrix, imag, s[ch][freq] */ + const int16_t freq, /* i : Freq to process */ + const int16_t N, /* i : Number of channels */ + CovarianceMatrix *COVls /* o : Output matrix, contains upper part of cov mtx */ +) +{ + int16_t i, j; + float a, b, c, d; + + for ( i = 0; i < N; i++ ) + { + a = sr[i][freq]; + b = si[i][freq]; + for ( j = i; j < N; j++ ) + { + c = sr[j][freq]; + d = si[j][freq]; + COVls->xr[i][j] += a * c + b * d; + COVls->xi[i][j] += b * c - a * d; + } + } + + return; +} + +/*------------------------------------------------------------------------- + * computeVerticalDiffuseness() + * + * + *------------------------------------------------------------------------*/ + +static void computeVerticalDiffuseness( + float **buffer_intensity, /* i : Intensity vectors */ + const float *buffer_energy, /* i : Energy */ + const int16_t num_freq_bands, /* i : Number of frequency bands */ + float *diffuseness /* o : Estimated diffuseness */ +) +{ + float intensity_slow[MASA_FREQUENCY_BANDS]; + float intensity_slow_abs[MASA_FREQUENCY_BANDS]; + float energy_slow[MASA_FREQUENCY_BANDS]; + int16_t i, k; + float tmp = 0; + const float *p_tmp_c; + + /* Set variables to zero */ + set_f( intensity_slow, 0.0f, MASA_FREQUENCY_BANDS ); + set_f( energy_slow, 0.0f, MASA_FREQUENCY_BANDS ); + + for ( i = 0; i < DIRAC_NO_COL_AVG_DIFF; ++i ) + { + /* Energy slow */ + p_tmp_c = buffer_energy + i * num_freq_bands; + for ( k = 0; k < num_freq_bands; k++ ) + { + energy_slow[k] += *( p_tmp_c++ ); + } + + /* Intensity slow */ + for ( k = 0; k < num_freq_bands; k++ ) + { + intensity_slow[k] += buffer_intensity[i][k]; + } + } + + /* Compute absolute value */ + for ( k = 0; k < num_freq_bands; k++ ) + { + intensity_slow_abs[k] = fabsf( intensity_slow[k] ); + } + + /* Compute Diffuseness */ + for ( i = 0; i < num_freq_bands; ++i ) + { + tmp = intensity_slow_abs[i] / ( energy_slow[i] + EPSILON ); + tmp = ( tmp - VERTICAL_ENERGY_RATIO_OFFSET ) / ( 1.0f - VERTICAL_ENERGY_RATIO_OFFSET ); /* Tuned to avoid effect due to ambience of vertically un-even setups */ + tmp = 1.0f - tmp; + diffuseness[i] = ( ( tmp < 1.0f ) ? ( ( tmp < 0.0f ) ? 0.f : tmp ) : 1.0f ); + } + + return; +} + + +static void computeEvenLayout( + const float *ls_azimuth, + float *ls_azimuth_even, + const int16_t numChannels ) +{ + int16_t i; + int16_t j; + float ls_azimuth_temp[MCMASA_MAX_ANA_CHANS]; + float ls_azimuth_even_ordered[MCMASA_MAX_ANA_CHANS]; + int16_t ls_azimuth_order[MCMASA_MAX_ANA_CHANS]; + float smallestAzimuth; + int16_t smallestAzimuthIndex; + float lsSpacing; + uint8_t oddLayout; + float startAzimuth; + int16_t numChannelsHalf; + + lsSpacing = 360.0f / (float) numChannels; + oddLayout = numChannels % 2; + numChannelsHalf = numChannels / 2; + + mvr2r( ls_azimuth, ls_azimuth_temp, numChannels ); + for ( i = 0; i < numChannels; i++ ) + { + smallestAzimuth = 1000.0f; + smallestAzimuthIndex = 0; + for ( j = 0; j < numChannels; j++ ) + { + if ( ls_azimuth_temp[j] < smallestAzimuth ) + { + smallestAzimuth = ls_azimuth_temp[j]; + smallestAzimuthIndex = j; + } + } + ls_azimuth_order[i] = smallestAzimuthIndex; + ls_azimuth_temp[smallestAzimuthIndex] = 1000.0f; + } + + if ( oddLayout ) + { + startAzimuth = -lsSpacing * ( (float) numChannelsHalf ); + } + else + { + startAzimuth = -lsSpacing * ( (float) numChannelsHalf - 0.5f ); + } + + for ( i = 0; i < numChannels; i++ ) + { + ls_azimuth_even_ordered[i] = (float) i * lsSpacing + startAzimuth; + } + + for ( i = 0; i < numChannels; i++ ) + { + ls_azimuth_even[ls_azimuth_order[i]] = roundf( ls_azimuth_even_ordered[i] ); + } + + return; +} + +void ivas_create_masa_out_meta( + MASA_DECODER_EXT_OUT_META_HANDLE extOutMeta, /* i/o: MASA metadata handle */ + SPHERICAL_GRID_DATA *Sph_Grid16, /* i: Spherical grid */ + const int16_t nchan_transport, /* i: Number of transport channels */ + float elevation_m_values[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], /* i: Estimated elevation */ + float azimuth_m_values[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], /* i: Estimated azimuth */ + float energyRatio[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], /* i: Estimated direct-to-total ratio */ + float spreadCoherence[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], /* i: Estimated spread coherence */ + float surroundingCoherence[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS] /* i: Estimated surround coherence */ +) +{ + const uint8_t ivasmasaFormatDescriptor[8] = { 0x49, 0x56, 0x41, 0x53, 0x4D, 0x41, 0x53, 0x41 }; /* "IVASMASA" */ + int16_t i, sf, band; + uint8_t numFrequencyBands; + uint8_t numDirections; + uint16_t spherical_index; + + + numDirections = 1; + numFrequencyBands = MASA_FREQUENCY_BANDS; + + /* Construct descriptive meta */ + for ( i = 0; i < 8; i++ ) + { + extOutMeta->descriptiveMeta.formatDescriptor[i] = ivasmasaFormatDescriptor[i]; + } + extOutMeta->descriptiveMeta.numberOfDirections = numDirections - 1; + extOutMeta->descriptiveMeta.numberOfChannels = (uint8_t) ( nchan_transport - 1 ); + /* Following correspond to "unknown" values */ + extOutMeta->descriptiveMeta.sourceFormat = 0x0u; + extOutMeta->descriptiveMeta.transportDefinition = 0x0u; + extOutMeta->descriptiveMeta.channelAngle = 0x0u; + extOutMeta->descriptiveMeta.channelDistance = 0x0u; + extOutMeta->descriptiveMeta.channelLayout = 0x0u; + + /* Construct spatial metadata from estimated values */ + for ( sf = 0; sf < MAX_PARAM_SPATIAL_SUBFRAMES; sf++ ) + { + /* Spherical index */ + for ( band = 0; band < numFrequencyBands; band++ ) + { + spherical_index = index_theta_phi_16( &elevation_m_values[sf][band], &azimuth_m_values[sf][band], Sph_Grid16 ); + extOutMeta->directionIndex[0][sf][band] = spherical_index; + extOutMeta->directionIndex[1][sf][band] = SPH_IDX_FRONT; + } + + /* Direct-to-total ratio */ + for ( band = 0; band < numFrequencyBands; band++ ) + { + extOutMeta->directToTotalRatio[0][sf][band] = (uint8_t) floorf( energyRatio[sf][band] * UINT8_MAX ); + extOutMeta->directToTotalRatio[1][sf][band] = 0; + } + + /* Spread coherence */ + for ( band = 0; band < numFrequencyBands; band++ ) + { + extOutMeta->spreadCoherence[0][sf][band] = (uint8_t) floorf( spreadCoherence[sf][band] * UINT8_MAX ); + extOutMeta->spreadCoherence[1][sf][band] = 0; + } + + /* Diffuse-to-total ratio = 1 - sum(direct-to-total ratios) */ + for ( band = 0; band < numFrequencyBands; band++ ) + { + extOutMeta->diffuseToTotalRatio[sf][band] = UINT8_MAX - (uint8_t) floorf( energyRatio[sf][band] * UINT8_MAX ); + } + + /* Surround coherence */ + for ( band = 0; band < numFrequencyBands; band++ ) + { + extOutMeta->surroundCoherence[sf][band] = (uint8_t) floorf( surroundingCoherence[sf][band] * UINT8_MAX ); + } + } + + return; +} + +#endif /* MASA_PREREND */ diff --git a/lib_rend/ivas_omasa_ana.c b/lib_rend/ivas_omasa_ana.c new file mode 100644 index 0000000000000000000000000000000000000000..28a3af9133ddb83201365dc37dcf936577829a5a --- /dev/null +++ b/lib_rend/ivas_omasa_ana.c @@ -0,0 +1,586 @@ +/****************************************************************************************************** + + (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, + Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., + Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, + Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other + contributors to this repository. All Rights Reserved. + + This software is protected by copyright law and by international treaties. + The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, + Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., + Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, + Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other + contributors to this repository retain full ownership rights in their respective contributions in + the software. This notice grants no license of any kind, including but not limited to patent + license, nor is any license granted by implication, estoppel or otherwise. + + Contributors are required to enter into the IVAS codec Public Collaboration agreement before making + contributions. + + This software is provided "AS IS", without any express or implied warranties. The software is in the + development stage. It is intended exclusively for experts who have experience with such software and + solely for the purpose of inspection. All implied warranties of non-infringement, merchantability + and fitness for a particular purpose are hereby disclaimed and excluded. + + Any dispute, controversy or claim arising under or in relation to providing this software shall be + submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in + accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and + the United Nations Convention on Contracts on the International Sales of Goods. + +*******************************************************************************************************/ + +#include "options.h" +#include +#include +#include "ivas_cnst.h" +#include "ivas_prot_rend.h" +#include "ivas_prot.h" +#include "prot.h" +#include "ivas_stat_rend.h" +#include "ivas_rom_com.h" +#ifdef DEBUGGING +#include "debug.h" +#endif +#include "wmc_auto.h" + +#ifdef MASA_PREREND + + +/*------------------------------------------------------------------------- + * Local function prototypes + *------------------------------------------------------------------------*/ + +static void ivas_omasa_param_est_ana( OMASA_ANA_HANDLE hOMasa, float data_f[][L_FRAME48k], float elevation_m_values[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], float azimuth_m_values[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], float energyRatio[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], float spreadCoherence[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], float surroundingCoherence[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], const int16_t input_frame, const int16_t nchan_ism ); + +static void ivas_omasa_dmx( float data_in_f[][L_FRAME48k], const int16_t input_frame, const int16_t nchan_transport, const int16_t nchan_ism, const float ism_azimuth[MAX_NUM_OBJECTS], const float ism_elevation[MAX_NUM_OBJECTS], float prev_gains[][MASA_MAX_TRANSPORT_CHANNELS], const float interpolator[L_FRAME48k] ); + + +/*--------------------------------------------------------------------------* + * ivas_omasa_ana_open() + * + * Allocate and initialize OMASA handle + *--------------------------------------------------------------------------*/ + +ivas_error ivas_omasa_ana_open( + OMASA_ANA_HANDLE *hOMasaPtr, /* i/o: OMASA data handle pointer */ + int32_t input_Fs, /* i: Sampling frequency */ + uint16_t total_num_objects /* i: Number of objects */ +) +{ + int16_t i, j; + OMASA_ANA_HANDLE hOMasa; + int16_t numAnalysisChannels; + int16_t maxBin, input_frame; + ivas_error error; + + error = IVAS_ERR_OK; + + if ( ( hOMasa = (OMASA_ANA_HANDLE) malloc( sizeof( OMASA_ANA_DATA ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for OMASA\n" ) ); + } + + numAnalysisChannels = (int16_t) total_num_objects; + + /* Determine the number of bands */ + hOMasa->nbands = MASA_FREQUENCY_BANDS; + + /* Determine band grouping */ + mvs2s( MASA_band_grouping_24, hOMasa->band_grouping, 24 + 1 ); + + maxBin = (int16_t) ( input_Fs * INV_CLDFB_BANDWIDTH + 0.5f ); + for ( i = 1; i < hOMasa->nbands + 1; i++ ) + { + if ( hOMasa->band_grouping[i] >= maxBin ) + { + hOMasa->band_grouping[i] = maxBin; + hOMasa->nbands = i; + break; + } + } + + /* Determine block grouping */ + mvs2s( DirAC_block_grouping, hOMasa->block_grouping, MAX_PARAM_SPATIAL_SUBFRAMES + 1 ); + + /* open/initialize CLDFB */ + hOMasa->num_Cldfb_instances = numAnalysisChannels; + for ( i = 0; i < hOMasa->num_Cldfb_instances; i++ ) + { + openCldfb( &( hOMasa->cldfbAnaEnc[i] ), CLDFB_ANALYSIS, input_Fs, CLDFB_PROTOTYPE_5_00MS ); + } + + for ( ; i < MAX_NUM_OBJECTS; i++ ) + { + hOMasa->cldfbAnaEnc[i] = NULL; + } + + /* intensity 3-dim */ + for ( i = 0; i < DIRAC_NUM_DIMS; i++ ) + { + hOMasa->direction_vector_m[i] = (float **) malloc( MAX_PARAM_SPATIAL_SUBFRAMES * sizeof( float * ) ); + + for ( j = 0; j < MAX_PARAM_SPATIAL_SUBFRAMES; j++ ) + { + hOMasa->direction_vector_m[i][j] = (float *) malloc( MASA_FREQUENCY_BANDS * sizeof( float ) ); + set_zero( hOMasa->direction_vector_m[i][j], MASA_FREQUENCY_BANDS ); + } + } + + for ( i = 0; i < DIRAC_NUM_DIMS; i++ ) + { + for ( j = 0; j < DIRAC_NO_COL_AVG_DIFF; j++ ) + { + hOMasa->buffer_intensity_real[i][j] = (float *) malloc( MASA_FREQUENCY_BANDS * sizeof( float ) ); + set_zero( hOMasa->buffer_intensity_real[i][j], MASA_FREQUENCY_BANDS ); + } + } + + set_zero( hOMasa->buffer_energy, DIRAC_NO_COL_AVG_DIFF * MASA_FREQUENCY_BANDS ); + + for ( i = 0; i < MAX_NUM_OBJECTS; i++ ) + { + set_f( hOMasa->prev_object_dm_gains[i], (float) sqrt( 0.5 ), MASA_MAX_TRANSPORT_CHANNELS ); + } + + input_frame = (int16_t) ( input_Fs / FRAMES_PER_SEC ); + for ( i = 0; i < input_frame; i++ ) + { + hOMasa->interpolator[i] = ( (float) i ) / ( (float) input_frame ); + } + + hOMasa->index_buffer_intensity = 0; + + if ( ( hOMasa->hMasaOut = (MASA_DECODER_EXT_OUT_META_HANDLE) malloc( sizeof( MASA_DECODER_EXT_OUT_META ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for MASA decoder\n" ) ); + } + + if ( ( hOMasa->sph_grid16 = (SPHERICAL_GRID_DATA *) malloc( sizeof( SPHERICAL_GRID_DATA ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for MASA decoder\n" ) ); + } + generate_gridEq( hOMasa->sph_grid16 ); + + for ( i = 0; i < MAX_PARAM_SPATIAL_SUBFRAMES; i++ ) + { + set_zero( hOMasa->energy[i], MASA_FREQUENCY_BANDS ); + } + + set_zero( hOMasa->ism_azimuth, MAX_NUM_OBJECTS ); + set_zero( hOMasa->ism_elevation, MAX_NUM_OBJECTS ); + + ( *hOMasaPtr ) = hOMasa; + + return error; +} + + +/*--------------------------------------------------------------------------* + * ivas_omasa_ana_close() + * + * Close OMASA handle + *--------------------------------------------------------------------------*/ + +void ivas_omasa_ana_close( + OMASA_ANA_HANDLE *hOMasa /* i/o: analysis OMASA handle */ +) +{ + int16_t i, j; + + if ( hOMasa == NULL || *hOMasa == NULL ) + { + return; + } + + for ( i = 0; i < ( *hOMasa )->num_Cldfb_instances; i++ ) + { + deleteCldfb( &( ( *hOMasa )->cldfbAnaEnc[i] ) ); + } + + for ( i = 0; i < DIRAC_NUM_DIMS; i++ ) + { + for ( j = 0; j < MAX_PARAM_SPATIAL_SUBFRAMES; j++ ) + { + free( ( *hOMasa )->direction_vector_m[i][j] ); + ( *hOMasa )->direction_vector_m[i][j] = NULL; + } + + for ( j = 0; j < DIRAC_NO_COL_AVG_DIFF; j++ ) + { + free( ( *hOMasa )->buffer_intensity_real[i][j] ); + ( *hOMasa )->buffer_intensity_real[i][j] = NULL; + } + + free( ( *hOMasa )->direction_vector_m[i] ); + ( *hOMasa )->direction_vector_m[i] = NULL; + } + + free( ( *hOMasa )->hMasaOut ); + ( *hOMasa )->hMasaOut = NULL; + free( ( *hOMasa )->sph_grid16 ); + ( *hOMasa )->sph_grid16 = NULL; + + free( ( *hOMasa ) ); + ( *hOMasa ) = NULL; + + return; +} + + +/*--------------------------------------------------------------------------* + * ivas_omasa_ana() + * + * OMASA analysis function + *--------------------------------------------------------------------------*/ + +void ivas_omasa_ana( + OMASA_ANA_HANDLE hOMasa, /* i/o: OMASA analysis handle */ + float data_in_f[][L_FRAME48k], /* i/o: Input / transport audio signals */ + const int16_t input_frame, /* i : Input frame size */ + const int16_t nchan_transport, /* i : Number of transport channels */ + const int16_t nchan_ism /* i : Number of objects for parameter analysis */ +) +{ + float elevation_m_values[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS]; + float azimuth_m_values[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS]; + float energyRatio[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS]; + float spreadCoherence[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS]; + float surroundingCoherence[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS]; + + + /* Estimate MASA parameters from the objects */ + ivas_omasa_param_est_ana( hOMasa, data_in_f, elevation_m_values, azimuth_m_values, energyRatio, spreadCoherence, surroundingCoherence, input_frame, nchan_ism ); + + /* Create MASA metadata buffer from the estimated values */ + ivas_create_masa_out_meta( hOMasa->hMasaOut, hOMasa->sph_grid16, nchan_transport, elevation_m_values, azimuth_m_values, energyRatio, spreadCoherence, surroundingCoherence ); + + /* Downmix */ + ivas_omasa_dmx( data_in_f, input_frame, nchan_transport, nchan_ism, hOMasa->ism_azimuth, hOMasa->ism_elevation, hOMasa->prev_object_dm_gains, hOMasa->interpolator ); + + return; +} + + +/*--------------------------------------------------------------------------* + * Local functions + *--------------------------------------------------------------------------*/ + +/* Estimate MASA parameters from the objects */ +static void ivas_omasa_param_est_ana( + OMASA_ANA_HANDLE hOMasa, + float data_f[][L_FRAME48k], + float elevation_m_values[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], + float azimuth_m_values[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], + float energyRatio[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], + float spreadCoherence[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], + float surroundingCoherence[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], + const int16_t input_frame, + const int16_t nchan_ism ) +{ + float reference_power[MASA_FREQUENCY_BANDS]; + int16_t ts, i, d, j; + int16_t num_freq_bins, num_freq_bands, index; + float dir_v[DIRAC_NUM_DIMS]; + int16_t l_ts; + float Chnl_RealBuffer[MAX_NUM_OBJECTS][CLDFB_NO_CHANNELS_MAX]; + float Chnl_ImagBuffer[MAX_NUM_OBJECTS][CLDFB_NO_CHANNELS_MAX]; + float Foa_RealBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX]; + float Foa_ImagBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX]; + float intensity_real[DIRAC_NUM_DIMS][MASA_FREQUENCY_BANDS]; + float direction_vector[DIRAC_NUM_DIMS][MASA_FREQUENCY_BANDS]; + float diffuseness_vector[MASA_FREQUENCY_BANDS]; + float diffuseness_m[MASA_FREQUENCY_BANDS]; + + int16_t band_m_idx, block_m_idx; + float renormalization_factor_diff[MASA_FREQUENCY_BANDS]; + float norm_tmp; + int16_t mrange[2]; + int16_t brange[2]; + + num_freq_bins = hOMasa->cldfbAnaEnc[0]->no_channels; + num_freq_bands = hOMasa->nbands; + l_ts = input_frame / CLDFB_NO_COL_MAX; + + + /* Compute ISM to FOA matrices */ + for ( i = 0; i < nchan_ism; i++ ) + { + hOMasa->chnlToFoaMtx[0][i] = 1.0f; + hOMasa->chnlToFoaMtx[1][i] = sinf( ( hOMasa->ism_azimuth[i] / 180.0f * EVS_PI ) ) * cosf( ( hOMasa->ism_elevation[i] / 180.0f * EVS_PI ) ); + hOMasa->chnlToFoaMtx[2][i] = sinf( ( hOMasa->ism_elevation[i] / 180.0f * EVS_PI ) ); + hOMasa->chnlToFoaMtx[3][i] = cosf( ( hOMasa->ism_azimuth[i] / 180.0f * EVS_PI ) ) * cosf( ( hOMasa->ism_elevation[i] / 180.0f * EVS_PI ) ); + } + + /* do processing over all CLDFB time slots */ + for ( block_m_idx = 0; block_m_idx < MAX_PARAM_SPATIAL_SUBFRAMES; block_m_idx++ ) + { + mrange[0] = hOMasa->block_grouping[block_m_idx]; + mrange[1] = hOMasa->block_grouping[block_m_idx + 1]; + + for ( band_m_idx = 0; band_m_idx < hOMasa->nbands; band_m_idx++ ) + { + hOMasa->direction_vector_m[0][block_m_idx][band_m_idx] = 0.0f; + hOMasa->direction_vector_m[1][block_m_idx][band_m_idx] = 0.0f; + hOMasa->direction_vector_m[2][block_m_idx][band_m_idx] = 0.0f; + } + + /* Need to initialize renormalization_factors, and variables to be normalized */ + set_zero( renormalization_factor_diff, hOMasa->nbands ); + set_zero( diffuseness_m, hOMasa->nbands ); + set_zero( hOMasa->energy[block_m_idx], MASA_FREQUENCY_BANDS ); + + for ( ts = mrange[0]; ts < mrange[1]; ts++ ) + { + for ( i = 0; i < nchan_ism; i++ ) + { + cldfbAnalysis_ts( &( data_f[i][l_ts * ts] ), Chnl_RealBuffer[i], Chnl_ImagBuffer[i], l_ts, hOMasa->cldfbAnaEnc[i] ); + } + + /* Compute channel-based energy for metadata processing */ + for ( band_m_idx = 0; band_m_idx < num_freq_bands; band_m_idx++ ) + { + brange[0] = hOMasa->band_grouping[band_m_idx]; + brange[1] = hOMasa->band_grouping[band_m_idx + 1]; + for ( j = brange[0]; j < brange[1]; j++ ) + { + for ( i = 0; i < nchan_ism; i++ ) + { + hOMasa->energy[block_m_idx][band_m_idx] += Chnl_RealBuffer[i][j] * Chnl_RealBuffer[i][j] + Chnl_ImagBuffer[i][j] * Chnl_ImagBuffer[i][j]; + } + } + } + + /* Compute FOA */ + /* W */ + mvr2r( Chnl_RealBuffer[0], Foa_RealBuffer[0], num_freq_bins ); + mvr2r( Chnl_ImagBuffer[0], Foa_ImagBuffer[0], num_freq_bins ); + for ( i = 1; i < nchan_ism; i++ ) + { + v_add( Chnl_RealBuffer[i], Foa_RealBuffer[0], Foa_RealBuffer[0], num_freq_bins ); + v_add( Chnl_ImagBuffer[i], Foa_ImagBuffer[0], Foa_ImagBuffer[0], num_freq_bins ); + } + + /* Y */ + v_multc( Chnl_RealBuffer[0], hOMasa->chnlToFoaMtx[1][0], Foa_RealBuffer[1], num_freq_bins ); + v_multc( Chnl_ImagBuffer[0], hOMasa->chnlToFoaMtx[1][0], Foa_ImagBuffer[1], num_freq_bins ); + for ( i = 1; i < nchan_ism; i++ ) + { + v_multc_acc( Chnl_RealBuffer[i], hOMasa->chnlToFoaMtx[1][i], Foa_RealBuffer[1], num_freq_bins ); + v_multc_acc( Chnl_ImagBuffer[i], hOMasa->chnlToFoaMtx[1][i], Foa_ImagBuffer[1], num_freq_bins ); + } + + /* Z */ + v_multc( Chnl_RealBuffer[0], hOMasa->chnlToFoaMtx[2][0], Foa_RealBuffer[2], num_freq_bins ); + v_multc( Chnl_ImagBuffer[0], hOMasa->chnlToFoaMtx[2][0], Foa_ImagBuffer[2], num_freq_bins ); + for ( i = 1; i < nchan_ism; i++ ) + { + v_multc_acc( Chnl_RealBuffer[i], hOMasa->chnlToFoaMtx[2][i], Foa_RealBuffer[2], num_freq_bins ); + v_multc_acc( Chnl_ImagBuffer[i], hOMasa->chnlToFoaMtx[2][i], Foa_ImagBuffer[2], num_freq_bins ); + } + + /* X */ + v_multc( Chnl_RealBuffer[0], hOMasa->chnlToFoaMtx[3][0], Foa_RealBuffer[3], num_freq_bins ); + v_multc( Chnl_ImagBuffer[0], hOMasa->chnlToFoaMtx[3][0], Foa_ImagBuffer[3], num_freq_bins ); + for ( i = 1; i < nchan_ism; i++ ) + { + v_multc_acc( Chnl_RealBuffer[i], hOMasa->chnlToFoaMtx[3][i], Foa_RealBuffer[3], num_freq_bins ); + v_multc_acc( Chnl_ImagBuffer[i], hOMasa->chnlToFoaMtx[3][i], Foa_ImagBuffer[3], num_freq_bins ); + } + + /* Direction estimation */ + computeIntensityVector_ana( hOMasa->band_grouping, Foa_RealBuffer, Foa_ImagBuffer, num_freq_bands, intensity_real ); + computeDirectionVectors( intensity_real[0], intensity_real[1], intensity_real[2], 0, num_freq_bands, direction_vector[0], direction_vector[1], direction_vector[2] ); + + /* Power estimation for diffuseness */ + computeReferencePower_ana( hOMasa->band_grouping, Foa_RealBuffer, Foa_ImagBuffer, reference_power, num_freq_bands ); + + /* Fill buffers of length "averaging_length" time slots for intensity and energy */ + hOMasa->index_buffer_intensity = ( hOMasa->index_buffer_intensity % DIRAC_NO_COL_AVG_DIFF ) + 1; /* averaging_length = 32 */ + index = hOMasa->index_buffer_intensity; + for ( i = 0; i < DIRAC_NUM_DIMS; i++ ) + { + /* only real part needed */ + mvr2r( intensity_real[i], &( hOMasa->buffer_intensity_real[i][index - 1][0] ), num_freq_bands ); + } + mvr2r( reference_power, &( hOMasa->buffer_energy[( index - 1 ) * num_freq_bands] ), num_freq_bands ); + + computeDiffuseness( hOMasa->buffer_intensity_real, hOMasa->buffer_energy, num_freq_bands, diffuseness_vector ); + + for ( band_m_idx = 0; band_m_idx < hOMasa->nbands; band_m_idx++ ) + { + norm_tmp = reference_power[band_m_idx] * ( 1 - diffuseness_vector[band_m_idx] ); + + hOMasa->direction_vector_m[0][block_m_idx][band_m_idx] += norm_tmp * direction_vector[0][band_m_idx]; + hOMasa->direction_vector_m[1][block_m_idx][band_m_idx] += norm_tmp * direction_vector[1][band_m_idx]; + hOMasa->direction_vector_m[2][block_m_idx][band_m_idx] += norm_tmp * direction_vector[2][band_m_idx]; + + diffuseness_m[band_m_idx] += reference_power[band_m_idx] * diffuseness_vector[band_m_idx]; + renormalization_factor_diff[band_m_idx] += reference_power[band_m_idx]; + } + } + + for ( band_m_idx = 0; band_m_idx < hOMasa->nbands; band_m_idx++ ) + { + for ( d = 0; d < DIRAC_NUM_DIMS; d++ ) + { + dir_v[d] = hOMasa->direction_vector_m[d][block_m_idx][band_m_idx]; + } + ivas_qmetadata_direction_vector_to_azimuth_elevation( dir_v, &azimuth_m_values[block_m_idx][band_m_idx], &elevation_m_values[block_m_idx][band_m_idx] ); + } + + /* Determine energy ratios */ + for ( band_m_idx = 0; band_m_idx < hOMasa->nbands; band_m_idx++ ) + { + if ( renormalization_factor_diff[band_m_idx] > EPSILON ) + { + diffuseness_m[band_m_idx] /= renormalization_factor_diff[band_m_idx]; + } + else + { + diffuseness_m[band_m_idx] = 0.0f; + } + + energyRatio[block_m_idx][band_m_idx] = 1.0f - diffuseness_m[band_m_idx]; + } + + /* Set coherences to zero, as this mode is used at lowest bit rates where the coherences are not transmitted */ + for ( band_m_idx = 0; band_m_idx < hOMasa->nbands; band_m_idx++ ) + { + spreadCoherence[block_m_idx][band_m_idx] = 0.0f; + surroundingCoherence[block_m_idx][band_m_idx] = 0.0f; + } + } + + return; +} + + +/* Compute downmix */ +static void ivas_omasa_dmx( + float data_in_f[][L_FRAME48k], + const int16_t input_frame, + const int16_t nchan_transport, + const int16_t nchan_ism, + const float ism_azimuth[MAX_NUM_OBJECTS], + const float ism_elevation[MAX_NUM_OBJECTS], + float prev_gains[][MASA_MAX_TRANSPORT_CHANNELS], + const float interpolator[L_FRAME48k] ) +{ + int16_t i, j, k; + float azimuth, elevation; + float gains[MASA_MAX_TRANSPORT_CHANNELS]; + float g1, g2; + float data_out_f[MASA_MAX_TRANSPORT_CHANNELS][L_FRAME48k]; + + + for ( i = 0; i < nchan_transport; i++ ) + { + set_zero( data_out_f[i], input_frame ); + } + + for ( i = 0; i < nchan_ism; i++ ) + { + azimuth = ism_azimuth[i]; + elevation = ism_elevation[i]; + + ivas_ism_get_stereo_gains( azimuth, elevation, &gains[0], &gains[1] ); + + /* Downmix using the panning gains */ + for ( j = 0; j < nchan_transport; j++ ) + { + if ( fabsf( gains[j] ) > 0.0 || fabsf( prev_gains[i][j] ) > 0.0f ) + { + for ( k = 0; k < input_frame; k++ ) + { + g1 = interpolator[k]; + g2 = 1.0f - g1; + data_out_f[j][k] += ( g1 * gains[j] + g2 * prev_gains[i][j] ) * data_in_f[i][k]; + } + } + prev_gains[i][j] = gains[j]; + } + } + + for ( i = 0; i < nchan_transport; i++ ) + { + mvr2r( data_out_f[i], data_in_f[i], input_frame ); + } + + + return; +} + + +void computeIntensityVector_ana( + const int16_t *band_grouping, /* i : Band grouping for estimation */ + float Cldfb_RealBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX], /* i : Real part of input signal */ + float Cldfb_ImagBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX], /* i : Imag part of input signal */ + const int16_t num_frequency_bands, /* i : Number of frequency bands */ + float intensity_real[DIRAC_NUM_DIMS][MASA_FREQUENCY_BANDS] /* o : Intensity vector */ +) +{ + /* Reminder + * X = a + ib; Y = c + id + * X*Y = ac - bd + i(ad +bc) + */ + int16_t i, j; + float real, img; + int16_t brange[2]; + + for ( i = 0; i < num_frequency_bands; i++ ) + { + brange[0] = band_grouping[i]; + brange[1] = band_grouping[i + 1]; + + intensity_real[0][i] = 0; + intensity_real[1][i] = 0; + intensity_real[2][i] = 0; + + for ( j = brange[0]; j < brange[1]; j++ ) + { + real = Cldfb_RealBuffer[0][j]; + img = Cldfb_ImagBuffer[0][j]; + intensity_real[0][i] += Cldfb_RealBuffer[3][j] * real + Cldfb_ImagBuffer[3][j] * img; /* Intensity is XYZ order, audio is WYZX order. */ + intensity_real[1][i] += Cldfb_RealBuffer[1][j] * real + Cldfb_ImagBuffer[1][j] * img; + intensity_real[2][i] += Cldfb_RealBuffer[2][j] * real + Cldfb_ImagBuffer[2][j] * img; + } + } + + return; +} + + +void computeReferencePower_ana( + const int16_t *band_grouping, /* i : Band grouping for estimation */ + float Cldfb_RealBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX], /* i : Real part of input signal */ + float Cldfb_ImagBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX], /* i : Imag part of input signal */ + float *reference_power, /* o : Estimated power */ + const int16_t num_freq_bands /* i : Number of frequency bands */ +) +{ + int16_t brange[2]; + int16_t ch_idx, i, j; + + for ( i = 0; i < num_freq_bands; i++ ) + { + brange[0] = band_grouping[i]; + brange[1] = band_grouping[i + 1]; + reference_power[i] = 0; + + for ( ch_idx = 0; ch_idx < FOA_CHANNELS; ch_idx++ ) + { + /* abs()^2 */ + for ( j = brange[0]; j < brange[1]; j++ ) + { + reference_power[i] += ( Cldfb_RealBuffer[ch_idx][j] * Cldfb_RealBuffer[ch_idx][j] ) + ( Cldfb_ImagBuffer[ch_idx][j] * Cldfb_ImagBuffer[ch_idx][j] ); + } + } + } + + v_multc( reference_power, 0.5f, reference_power, num_freq_bands ); + + return; +} + +#endif /* MASA_PREREND */ diff --git a/lib_rend/ivas_prot_rend.h b/lib_rend/ivas_prot_rend.h index f34cee66d013cccf1e80f059c106d49829c7fa97..8b0bf1f471c5680bc04b582baf1cb15492ad5416 100644 --- a/lib_rend/ivas_prot_rend.h +++ b/lib_rend/ivas_prot_rend.h @@ -990,6 +990,117 @@ ivas_error ivas_orient_trk_Process( IVAS_QUATERNION *pTrkRot /* o : tracked rotation */ ); +#ifdef MASA_PREREND +/*----------------------------------------------------------------------------------* + * Rendering & merging to MASA format + *----------------------------------------------------------------------------------*/ + +ivas_error ivas_mcmasa_ana_open( + MCMASA_ANA_HANDLE *hMcMasaPtr, /* i/o: McMASA data handle pointer */ + const IVAS_REND_AudioConfig inConfig, /* i: Input config */ + int32_t input_Fs /* i: Sampling frequency */ +); + +void ivas_mcmasa_ana( + MCMASA_ANA_HANDLE hMcMasa, /* i/o: McMASA encoder handle */ + float data_f[][L_FRAME48k], /* i/o: Input / transport audio signals */ + const int16_t input_frame, /* i : Input frame size */ + const int16_t nchan_transport, /* i : Number of transport channels */ + const int16_t nchan_inp /* i : Number of input channels */ +); + +void ivas_mcmasa_ana_close( + MCMASA_ANA_HANDLE *hMcMasa /* i/o: analysis McMASA handle */ +); + +ivas_error ivas_omasa_ana_open( + OMASA_ANA_HANDLE *hOMasaPtr, /* i/o: OMASA data handle pointer */ + int32_t input_Fs, /* i: Sampling frequency */ + uint16_t total_num_objects /* i: Number of objects */ +); + +void ivas_omasa_ana( + OMASA_ANA_HANDLE hOMasa, /* i/o: OMASA analysis handle */ + float data_in_f[][L_FRAME48k], /* i/o: Input / transport audio signals */ + const int16_t input_frame, /* i : Input frame size */ + const int16_t nchan_transport, /* i : Number of transport channels */ + const int16_t nchan_ism /* i : Number of objects for parameter analysis */ +); + +void ivas_omasa_ana_close( + OMASA_ANA_HANDLE *hOMasa /* i/o: analysis OMASA handle */ +); + +void computeIntensityVector_ana( + const int16_t *band_grouping, /* i : Band grouping for estimation */ + float Cldfb_RealBuffer[DIRAC_MAX_ANA_CHANS][CLDFB_NO_CHANNELS_MAX], /* i : Real part of input signal */ + float Cldfb_ImagBuffer[DIRAC_MAX_ANA_CHANS][CLDFB_NO_CHANNELS_MAX], /* i : Imag part of input signal */ + const int16_t num_frequency_bands, /* i : Number of frequency bands */ + float intensity_real[DIRAC_NUM_DIMS][MASA_FREQUENCY_BANDS] /* o : Intensity vector */ +); + +void computeReferencePower_ana( + const int16_t *band_grouping, /* i : Band grouping for estimation */ + float Cldfb_RealBuffer[DIRAC_MAX_ANA_CHANS][CLDFB_NO_CHANNELS_MAX], /* i : Real part of input signal */ + float Cldfb_ImagBuffer[DIRAC_MAX_ANA_CHANS][CLDFB_NO_CHANNELS_MAX], /* i : Imag part of input signal */ + float *reference_power, /* o : Estimated power */ + const int16_t num_freq_bands /* i : Number of frequency bands */ +); + +void ivas_create_masa_out_meta( + MASA_DECODER_EXT_OUT_META_HANDLE extOutMeta, /* i/o: MASA metadata handle */ + SPHERICAL_GRID_DATA *Sph_Grid16, /* i: Spherical grid */ + const int16_t nchan_transport, /* i: Number of transport channels */ + float elevation_m_values[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], /* i: Estimated elevation */ + float azimuth_m_values[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], /* i: Estimated azimuth */ + float energyRatio[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], /* i: Estimated direct-to-total ratio */ + float spreadCoherence[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], /* i: Estimated spread coherence */ + float surroundingCoherence[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS] /* i: Estimated surround coherence */ +); + +ivas_error ivas_dirac_ana_open( + DIRAC_ANA_HANDLE *hDirACPtr, /* i/o: DIRAC data handle pointer */ + int32_t input_Fs +); + +void ivas_dirac_ana( + DIRAC_ANA_HANDLE hDirAC, /* i/o: DIRAC analysis handle */ + float data_in_f[][L_FRAME48k], /* i/o: Input / transport audio signals */ + const int16_t input_frame, /* i : Input frame size */ + const int16_t nchan_transport /* i : Number of transport channels */ +); + +void ivas_dirac_ana_close( + DIRAC_ANA_HANDLE ( *hDirAC ) /* i/o: analysis DIRAC handle */ +); + +void ivas_prerend_merge_masa_metadata( + MASA_DECODER_EXT_OUT_META_HANDLE outMeta, /* o: Merged metadata output */ + MASA_DECODER_EXT_OUT_META_HANDLE inMeta1, /* i: Input metadata 1 */ + IVAS_REND_AudioConfigType inType1, /* i: Type of input 1 */ + float inEne1[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], /* i/o: TF-energy of input 1. after merge, contains the energy of the merged signal */ + MASA_DECODER_EXT_OUT_META_HANDLE inMeta2, /* i: Input metadata 2 */ + IVAS_REND_AudioConfigType inType2, /* i: Type of input 2 */ + float inEne2[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS] /* i: TF-energy of input 2 */ +); + +void copy_masa_descriptive_meta( + MASA_DECRIPTIVE_META *outMeta, /* o: metadata to be written */ + MASA_DECRIPTIVE_META *inMeta /* i: input metadata */ +); + +ivas_error masaPrerendOpen( + MASA_PREREND_HANDLE *hMasaPrerendPtr, /* o: handle to the opened prerenderer */ + int16_t numTransports, /* i: number of transport channels */ + int32_t input_Fs /* i: signal sampling rate */ +); + +void masaPrerendClose( + MASA_PREREND_HANDLE *hMasaPrerendPtr /* i/o: prerenderer handle to be closed */ +); +#endif + + /* clang-format on */ #endif /* IVAS_PROT_REND_H */ diff --git a/lib_rend/ivas_stat_rend.h b/lib_rend/ivas_stat_rend.h index 2d6419ed8c4957c41d07cb2947cd7881de758e9b..bec505c4eebe1f10f4c65847bbc4c119f25f0d6a 100644 --- a/lib_rend/ivas_stat_rend.h +++ b/lib_rend/ivas_stat_rend.h @@ -37,6 +37,9 @@ #include "options.h" #include "ivas_cnst.h" #include "ivas_stat_com.h" // note: needed for DIRAC_DEC_BIN_HANDLE until #156 is solved +#ifdef MASA_PREREND +#include "stat_com.h" /* Note: Currently needed for CLDFB. */ +#endif #include "common_api_types.h" @@ -896,4 +899,132 @@ typedef enum CHANNEL_TYPE_LFE } ChannelType; +#ifdef MASA_PREREND +/*----------------------------------------------------------------------------------* + * Multichannel MASA (McMASA) analysis structure + *----------------------------------------------------------------------------------*/ + +typedef struct ivas_mcmasa_ana_data_structure +{ + int16_t nbands; + + /* CLDFB analysis */ + int16_t num_Cldfb_instances; + HANDLE_CLDFB_FILTER_BANK cldfbAnaEnc[MCMASA_MAX_ANA_CHANS]; + + /* DirAC parameter estimation */ + float **direction_vector_m[DIRAC_NUM_DIMS]; /* Average direction vector */ + int16_t band_grouping[MASA_FREQUENCY_BANDS + 1]; + int16_t block_grouping[5]; + + /* diffuseness */ + int16_t index_buffer_intensity; + float *buffer_intensity_real[DIRAC_NUM_DIMS][DIRAC_NO_COL_AVG_DIFF]; + float *buffer_intensity_real_vert[DIRAC_NO_COL_AVG_DIFF]; + float buffer_energy[DIRAC_NO_COL_AVG_DIFF * MASA_FREQUENCY_BANDS]; + + float chnlToFoaMtx[FOA_CHANNELS][MCMASA_MAX_ANA_CHANS]; + float chnlToFoaEvenMtx[FOA_CHANNELS][MCMASA_MAX_ANA_CHANS]; + float ls_azimuth[MCMASA_MAX_ANA_CHANS]; + int16_t leftNearest[MCMASA_MAX_ANA_CHANS]; + int16_t rightNearest[MCMASA_MAX_ANA_CHANS]; + int16_t numHorizontalChannels; + uint8_t isHorizontalSetup; + + float prevMultiChEne; + float prevDownmixEne; + float prevEQ; + float interpolator[L_FRAME48k]; + + MASA_DECODER_EXT_OUT_META_HANDLE hMasaOut; + SPHERICAL_GRID_DATA *sph_grid16; + + float energy[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS]; + +} MCMASA_ANA_DATA, *MCMASA_ANA_HANDLE; + +/*----------------------------------------------------------------------------------* + * Object MASA (OMASA) analysis structure + *----------------------------------------------------------------------------------*/ + +typedef struct ivas_omasa_ana_data_structure +{ + int16_t nbands; + + /* CLDFB analysis */ + int16_t num_Cldfb_instances; + HANDLE_CLDFB_FILTER_BANK cldfbAnaEnc[MAX_NUM_OBJECTS]; + + /* DirAC parameter estimation */ + float **direction_vector_m[DIRAC_NUM_DIMS]; /* Average direction vector */ + int16_t band_grouping[MASA_FREQUENCY_BANDS + 1]; + int16_t block_grouping[5]; + + /* diffuseness */ + int16_t index_buffer_intensity; + float *buffer_intensity_real[DIRAC_NUM_DIMS][DIRAC_NO_COL_AVG_DIFF]; + float buffer_energy[DIRAC_NO_COL_AVG_DIFF * MASA_FREQUENCY_BANDS]; + + float chnlToFoaMtx[FOA_CHANNELS][MCMASA_MAX_ANA_CHANS]; + + float interpolator[L_FRAME48k]; + + float prev_object_dm_gains[MAX_NUM_OBJECTS][MASA_MAX_TRANSPORT_CHANNELS]; + + MASA_DECODER_EXT_OUT_META_HANDLE hMasaOut; + SPHERICAL_GRID_DATA *sph_grid16; + float ism_azimuth[MAX_NUM_OBJECTS]; + float ism_elevation[MAX_NUM_OBJECTS]; + + float energy[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS]; + +} OMASA_ANA_DATA, *OMASA_ANA_HANDLE; + +/*----------------------------------------------------------------------------------* + * DirAC analysis structure + *----------------------------------------------------------------------------------*/ + +typedef struct ivas_dirac_ana_data_structure +{ + int16_t nbands; + + /* CLDFB analysis */ + int16_t num_Cldfb_instances; + HANDLE_CLDFB_FILTER_BANK cldfbAnaEnc[DIRAC_MAX_ANA_CHANS]; + + /* DirAC parameter estimation */ + float **direction_vector_m[DIRAC_NUM_DIMS]; /* Average direction vector */ + int16_t band_grouping[MASA_FREQUENCY_BANDS + 1]; + int16_t block_grouping[5]; + + /* diffuseness */ + int16_t index_buffer_intensity; + float *buffer_intensity_real[DIRAC_NUM_DIMS][DIRAC_NO_COL_AVG_DIFF]; + float buffer_energy[DIRAC_NO_COL_AVG_DIFF * MASA_FREQUENCY_BANDS]; + + MASA_DECODER_EXT_OUT_META_HANDLE hMasaOut; + SPHERICAL_GRID_DATA *sph_grid16; + + float energy[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS]; + +} DIRAC_ANA_DATA, *DIRAC_ANA_HANDLE; + +/*----------------------------------------------------------------------------------* + * MASA prerend structure + *----------------------------------------------------------------------------------*/ + +typedef struct ivas_masa_prerend_data_structure +{ + /* CLDFB analysis */ + int16_t num_Cldfb_instances; + HANDLE_CLDFB_FILTER_BANK cldfbAnaEnc[MASA_MAX_TRANSPORT_CHANNELS]; + + MASA_DECODER_EXT_OUT_META_HANDLE hMasaOut; + SPHERICAL_GRID_DATA *sph_grid16; + + float energy[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS]; + +} MASA_PREREND_DATA, *MASA_PREREND_HANDLE; +#endif + #endif /* IVAS_STAT_REND_H */ diff --git a/lib_rend/lib_rend.c b/lib_rend/lib_rend.c index f4a51d44c9e8b3c4f93ceb9975dd767453d4f335..71c28fe75b30d23121478564b3fcae11ac3ecd10 100644 --- a/lib_rend/lib_rend.c +++ b/lib_rend/lib_rend.c @@ -123,6 +123,10 @@ typedef struct rotation_matrix rot_mat_prev; int16_t nonDiegeticPan; float nonDiegeticPanGain; +#ifdef MASA_PREREND + OMASA_ANA_HANDLE hOMasa; + uint16_t total_num_objects; +#endif } input_ism; typedef struct @@ -152,6 +156,9 @@ typedef struct int16_t nonDiegeticPan; float nonDiegeticPanGain; lfe_routing lfeRouting; +#ifdef MASA_PREREND + MCMASA_ANA_HANDLE hMcMasa; +#endif } input_mc; typedef struct @@ -160,6 +167,9 @@ typedef struct pan_matrix hoaDecMtx; CREND_WRAPPER_HANDLE crendWrapper; rotation_gains rot_gains_prev; +#ifdef MASA_PREREND + DIRAC_ANA_HANDLE hDirAC; +#endif } input_sba; /* Due to API of some rendering methods, the renderer has to use the decoder struct. @@ -172,6 +182,9 @@ typedef struct DecoderDummy *decDummy; MASA_METADATA_FRAME masaMetadata; bool metadataHasBeenFed; +#ifdef MASA_PREREND + MASA_PREREND_HANDLE hMasaPrerend; +#endif } input_masa; struct IVAS_REND @@ -434,6 +447,10 @@ static ivas_error validateOutputAudioConfig( case IVAS_REND_AUDIO_CONFIG_HOA3: case IVAS_REND_AUDIO_CONFIG_BINAURAL: case IVAS_REND_AUDIO_CONFIG_BINAURAL_ROOM: +#ifdef MASA_PREREND + case IVAS_REND_AUDIO_CONFIG_MASA1: + case IVAS_REND_AUDIO_CONFIG_MASA2: +#endif return IVAS_ERR_OK; default: break; @@ -1087,6 +1104,30 @@ static bool isIoConfigPairSupported( return true; } +#ifdef MASA_PREREND +static ivas_error initIsmMasaRendering( + input_ism *inputIsm, + int32_t inSampleRate ) +{ + ivas_error error; + + if ( inputIsm->tdRendWrapper.hBinRendererTd != NULL ) + { + ivas_td_binaural_close( &inputIsm->tdRendWrapper.hBinRendererTd ); + inputIsm->tdRendWrapper.hHrtfTD = NULL; + } + ivas_rend_closeCrend( &inputIsm->crendWrapper ); + ivas_reverb_close( &inputIsm->hReverb ); + + if ( ( error = ivas_omasa_ana_open( &inputIsm->hOMasa, inSampleRate, inputIsm->total_num_objects ) ) != IVAS_ERR_OK ) + { + return error; + } + + return IVAS_ERR_OK; +} +#endif + static ivas_error setRendInputActiveIsm( void *input, const IVAS_REND_AudioConfig inConfig, @@ -1115,6 +1156,9 @@ static ivas_error setRendInputActiveIsm( inputIsm->hReverb = NULL; inputIsm->tdRendWrapper = defaultTdRendWrapper(); initRotMatrix( inputIsm->rot_mat_prev ); +#ifdef MASA_PREREND + inputIsm->hOMasa = NULL; +#endif error = IVAS_ERR_OK; if ( outConfig == IVAS_REND_AUDIO_CONFIG_BINAURAL ) @@ -1147,6 +1191,15 @@ static ivas_error setRendInputActiveIsm( } } } +#ifdef MASA_PREREND + else if ( outConfig == IVAS_REND_AUDIO_CONFIG_MASA1 || outConfig == IVAS_REND_AUDIO_CONFIG_MASA2 ) + { + if ( ( error = initIsmMasaRendering( inputIsm, *rendCtx.pOutSampleRate ) ) != IVAS_ERR_OK ) + { + return error; + } + } +#endif return IVAS_ERR_OK; } @@ -1172,6 +1225,10 @@ static void clearInputIsm( inputIsm->tdRendWrapper.hHrtfTD = NULL; } +#ifdef MASA_PREREND + ivas_omasa_ana_close( &( inputIsm->hOMasa ) ); +#endif + return; } @@ -1770,6 +1827,10 @@ static ivas_error updateMcPanGains( return IVAS_ERR_INVALID_OUTPUT_FORMAT; } break; +#ifdef MASA_PREREND + case IVAS_REND_AUDIO_CONFIG_TYPE_MASA: + break; /* Do nothing */ +#endif default: return IVAS_ERR_INVALID_OUTPUT_FORMAT; } @@ -1876,6 +1937,35 @@ static ivas_error initMcBinauralRendering( return IVAS_ERR_OK; } +#ifdef MASA_PREREND +static ivas_error initMcMasaRendering( + input_mc *inputMc, + const IVAS_REND_AudioConfig inConfig, + int32_t inSampleRate ) +{ + ivas_error error; + + if ( inputMc->tdRendWrapper.hBinRendererTd != NULL ) + { + ivas_td_binaural_close( &inputMc->tdRendWrapper.hBinRendererTd ); + inputMc->tdRendWrapper.hHrtfTD = NULL; + } + ivas_rend_closeCrend( &inputMc->crendWrapper ); + ivas_reverb_close( &inputMc->hReverb ); + if ( inputMc->efapInWrapper.hEfap != NULL ) + { + efap_free_data( &inputMc->efapInWrapper.hEfap ); + } + + if ( ( error = ivas_mcmasa_ana_open( &inputMc->hMcMasa, inConfig, inSampleRate ) ) != IVAS_ERR_OK ) + { + return error; + } + + return IVAS_ERR_OK; +} +#endif + static lfe_routing defaultLfeRouting( const IVAS_REND_AudioConfig inConfig, const LSSETUP_CUSTOM_STRUCT customLsIn, @@ -1960,6 +2050,9 @@ static ivas_error setRendInputActiveMc( inputMc->tdRendWrapper = defaultTdRendWrapper(); inputMc->crendWrapper = NULL; inputMc->hReverb = NULL; +#ifdef MASA_PREREND + inputMc->hMcMasa = NULL; +#endif initRotGains( inputMc->rot_gains_prev ); inputMc->lfeRouting = defaultLfeRouting( inConfig, inputMc->customLsInput, outConfig, *inputMc->base.ctx.pCustomLsOut ); @@ -1971,6 +2064,16 @@ static ivas_error setRendInputActiveMc( } } +#ifdef MASA_PREREND + if ( outConfig == IVAS_REND_AUDIO_CONFIG_MASA1 || outConfig == IVAS_REND_AUDIO_CONFIG_MASA2 ) + { + if ( ( error = initMcMasaRendering( inputMc, inConfig, *rendCtx.pOutSampleRate ) ) != IVAS_ERR_OK ) + { + return error; + } + } +#endif + if ( ( error = updateMcPanGains( inputMc, outConfig ) ) != IVAS_ERR_OK ) { return error; @@ -2004,6 +2107,10 @@ static void clearInputMc( inputMc->tdRendWrapper.hHrtfTD = NULL; } +#ifdef MASA_PREREND + ivas_mcmasa_ana_close( &( inputMc->hMcMasa ) ); +#endif + return; } @@ -2146,6 +2253,11 @@ static ivas_error updateSbaPanGains( return IVAS_ERR_INVALID_OUTPUT_FORMAT; } break; +#ifdef MASA_PREREND + case IVAS_REND_AUDIO_CONFIG_TYPE_MASA: + error = IVAS_ERR_OK; + break; /* Do nothing */ +#endif default: return IVAS_ERR_INVALID_OUTPUT_FORMAT; } @@ -2158,6 +2270,24 @@ static ivas_error updateSbaPanGains( return IVAS_ERR_OK; } +#ifdef MASA_PREREND +static ivas_error initSbaMasaRendering( + input_sba *inputSba, + int32_t inSampleRate ) +{ + ivas_error error; + + ivas_rend_closeCrend( &inputSba->crendWrapper ); + + if ( ( error = ivas_dirac_ana_open( &inputSba->hDirAC, inSampleRate ) ) != IVAS_ERR_OK ) + { + return error; + } + + return IVAS_ERR_OK; +} +#endif + static ivas_error setRendInputActiveSba( void *input, const IVAS_REND_AudioConfig inConfig, @@ -2181,8 +2311,21 @@ static ivas_error setRendInputActiveSba( initRendInputBase( &inputSba->base, inConfig, id, rendCtx ); setZeroPanMatrix( inputSba->hoaDecMtx ); inputSba->crendWrapper = NULL; +#ifdef MASA_PREREND + inputSba->hDirAC = NULL; +#endif initRotGains( inputSba->rot_gains_prev ); +#ifdef MASA_PREREND + if ( outConfig == IVAS_REND_AUDIO_CONFIG_MASA1 || outConfig == IVAS_REND_AUDIO_CONFIG_MASA2 ) + { + if ( ( error = initSbaMasaRendering( inputSba, *rendCtx.pOutSampleRate ) ) != IVAS_ERR_OK ) + { + return error; + } + } +#endif + if ( ( error = updateSbaPanGains( inputSba, outConfig, hRendCfg ) ) != IVAS_ERR_OK ) { return error; @@ -2203,6 +2346,10 @@ static void clearInputSba( /* Free input's internal handles */ ivas_rend_closeCrend( &inputSba->crendWrapper ); +#ifdef MASA_PREREND + ivas_dirac_ana_close( &( inputSba->hDirAC ) ); +#endif + return; } @@ -2593,6 +2740,26 @@ static ivas_error setRendInputActiveMasa( { return error; } +#ifdef MASA_PREREND + if ( getAudioConfigType( outConfig ) == IVAS_REND_AUDIO_CONFIG_TYPE_MASA ) + { + inputMasa->metadataHasBeenFed = false; + if ( ( error = masaPrerendOpen( &inputMasa->hMasaPrerend, inputMasa->base.inConfig == IVAS_REND_AUDIO_CONFIG_MASA1 ? 1 : 2, *( inputMasa->base.ctx.pOutSampleRate ) ) ) != IVAS_ERR_OK ) + { + return error; + } + } + else + { + inputMasa->decDummy = initDecoderDummy( *rendCtx.pOutSampleRate, numInChannels, outConfig, 0 ); + inputMasa->metadataHasBeenFed = false; + + if ( ( error = updateMasaDummyDec( inputMasa, outConfig ) ) != IVAS_ERR_OK ) + { + return error; + } + } +#else inputMasa->decDummy = initDecoderDummy( *rendCtx.pOutSampleRate, numInChannels, outConfig, 0 ); inputMasa->metadataHasBeenFed = false; @@ -2600,6 +2767,7 @@ static ivas_error setRendInputActiveMasa( { return error; } +#endif return IVAS_ERR_OK; } @@ -2694,6 +2862,9 @@ static void clearInputMasa( rendCtx = inputMasa->base.ctx; +#ifdef MASA_PREREND + masaPrerendClose( &inputMasa->hMasaPrerend ); +#endif initRendInputBase( &inputMasa->base, IVAS_REND_AUDIO_CONFIG_UNKNOWN, 0, rendCtx ); freeDecoderDummy( &inputMasa->decDummy ); @@ -2791,6 +2962,9 @@ ivas_error IVAS_REND_Open( hIvasRend->inputsIsm[i].tdRendWrapper.hBinRendererTd = NULL; hIvasRend->inputsIsm[i].nonDiegeticPan = nonDiegeticPan; hIvasRend->inputsIsm[i].nonDiegeticPanGain = nonDiegeticPanGain; +#ifdef MASA_PREREND + hIvasRend->inputsIsm[i].hOMasa = NULL; +#endif } for ( i = 0; i < RENDERER_MAX_MC_INPUTS; ++i ) @@ -2802,12 +2976,18 @@ ivas_error IVAS_REND_Open( hIvasRend->inputsMc[i].tdRendWrapper.hBinRendererTd = NULL; hIvasRend->inputsMc[i].nonDiegeticPan = nonDiegeticPan; hIvasRend->inputsMc[i].nonDiegeticPanGain = nonDiegeticPanGain; +#ifdef MASA_PREREND + hIvasRend->inputsMc[i].hMcMasa = NULL; +#endif } for ( i = 0; i < RENDERER_MAX_SBA_INPUTS; ++i ) { initRendInputBase( &hIvasRend->inputsSba[i].base, IVAS_REND_AUDIO_CONFIG_UNKNOWN, 0, getRendCtx( hIvasRend ) ); hIvasRend->inputsSba[i].crendWrapper = NULL; +#ifdef MASA_PREREND + hIvasRend->inputsSba[i].hDirAC = NULL; +#endif } for ( i = 0; i < RENDERER_MAX_MASA_INPUTS; ++i ) @@ -2815,6 +2995,9 @@ ivas_error IVAS_REND_Open( initRendInputBase( &hIvasRend->inputsMasa[i].base, IVAS_REND_AUDIO_CONFIG_UNKNOWN, 0, getRendCtx( hIvasRend ) ); hIvasRend->inputsMasa[i].decDummy = NULL; hIvasRend->inputsMasa[i].metadataHasBeenFed = false; +#ifdef MASA_PREREND + hIvasRend->inputsMasa[i].hMasaPrerend = NULL; +#endif } return IVAS_ERR_OK; @@ -3524,6 +3707,33 @@ ivas_error IVAS_REND_GetInputNumChannels( } +#ifdef MASA_PREREND +/*-------------------------------------------------------------------* + * IVAS_REND_GetNumAllObjects() + * + * + *-------------------------------------------------------------------*/ + +ivas_error IVAS_REND_GetNumAllObjects( + IVAS_REND_CONST_HANDLE hIvasRend, /* i : Renderer handle */ + int16_t *numChannels /* o : number of all objects */ +) +{ + if ( hIvasRend == NULL || numChannels == NULL ) + { + return IVAS_ERR_UNEXPECTED_NULL_POINTER; + } + + if ( hIvasRend->outputConfig == IVAS_REND_AUDIO_CONFIG_MASA1 || hIvasRend->outputConfig == IVAS_REND_AUDIO_CONFIG_MASA2 ) + { + *numChannels = (int16_t) hIvasRend->inputsIsm[0].total_num_objects; + } + + return IVAS_ERR_OK; +} +#endif + + /*-------------------------------------------------------------------* * IVAS_REND_GetDelay() * @@ -3646,6 +3856,12 @@ ivas_error IVAS_REND_FeedInputAudio( { return error; } +#ifdef MASA_PREREND + if ( ( hIvasRend->outputConfig == IVAS_REND_AUDIO_CONFIG_MASA1 || hIvasRend->outputConfig == IVAS_REND_AUDIO_CONFIG_MASA2 ) && inputBase->inConfig == IVAS_REND_AUDIO_CONFIG_OBJECT ) + { + numInputChannels = (int16_t) hIvasRend->inputsIsm[0].total_num_objects; + } +#endif if ( numInputChannels != inputAudio.config.numChannels ) { @@ -3703,6 +3919,34 @@ ivas_error IVAS_REND_FeedInputObjectMetadata( } +#ifdef MASA_PREREND +/*-------------------------------------------------------------------* + * IVAS_REND_FeedInputObjectMetadata() + * + * + *-------------------------------------------------------------------*/ + +ivas_error IVAS_REND_FeedInputObjectMetadataToOMasa( + IVAS_REND_HANDLE hIvasRend, /* i/o: Renderer handle */ + const int16_t inputIndex, /* i : Index of the input */ + const IVAS_REND_AudioObjectPosition objectPosition /* i : object position struct */ +) +{ + /* Validate function arguments */ + if ( hIvasRend == NULL ) + { + return IVAS_ERR_UNEXPECTED_NULL_POINTER; + } + + /* Set position to OMasa struct */ + hIvasRend->inputsIsm->hOMasa->ism_azimuth[inputIndex] = objectPosition.azimuth; + hIvasRend->inputsIsm->hOMasa->ism_elevation[inputIndex] = objectPosition.elevation; + + return IVAS_ERR_OK; +} +#endif + + /*-------------------------------------------------------------------* * IVAS_REND_FeedInputMasaMetadata() * @@ -4872,6 +5116,25 @@ static ivas_error renderIsmToSba( return error; } +#ifdef MASA_PREREND +static ivas_error renderIsmToMasa( + input_ism *ismInput, + IVAS_REND_AudioBuffer outAudio ) +{ + float tmpRendBuffer[MAX_NUM_OBJECTS][L_FRAME48k]; + + push_wmops( "renderIsmToMasa" ); + + copyBufferTo2dArray( ismInput->base.inputBuffer, tmpRendBuffer ); + ivas_omasa_ana( ismInput->hOMasa, tmpRendBuffer, ismInput->base.inputBuffer.config.numSamplesPerChannel, outAudio.config.numChannels, ismInput->base.inputBuffer.config.numChannels ); + accumulate2dArrayToBuffer( tmpRendBuffer, &outAudio ); + + pop_wmops(); + + return IVAS_ERR_OK; +} +#endif + static ivas_error renderInputIsm( input_ism *ismInput, const IVAS_REND_AudioConfig outConfig, @@ -4913,6 +5176,11 @@ static ivas_error renderInputIsm( return IVAS_ERR_INVALID_OUTPUT_FORMAT; } break; +#ifdef MASA_PREREND + case IVAS_REND_AUDIO_CONFIG_TYPE_MASA: + error = renderIsmToMasa( ismInput, outAudio ); + break; +#endif default: return IVAS_ERR_INVALID_OUTPUT_FORMAT; } @@ -5394,6 +5662,25 @@ static void renderMcToSba( return; } +#ifdef MASA_PREREND +static ivas_error renderMcToMasa( + input_mc *mcInput, + IVAS_REND_AudioBuffer outAudio ) +{ + float tmpRendBuffer[MAX_OUTPUT_CHANNELS][L_FRAME48k]; + + push_wmops( "renderMcToMasa" ); + + copyBufferTo2dArray( mcInput->base.inputBuffer, tmpRendBuffer ); + ivas_mcmasa_ana( mcInput->hMcMasa, tmpRendBuffer, mcInput->base.inputBuffer.config.numSamplesPerChannel, outAudio.config.numChannels, mcInput->base.inputBuffer.config.numChannels ); + accumulate2dArrayToBuffer( tmpRendBuffer, &outAudio ); + + pop_wmops(); + + return IVAS_ERR_OK; +} +#endif + static ivas_error renderInputMc( input_mc *mcInput, IVAS_REND_AudioConfig outConfig, @@ -5447,6 +5734,11 @@ static ivas_error renderInputMc( return IVAS_ERR_INVALID_OUTPUT_FORMAT; } break; +#ifdef MASA_PREREND + case IVAS_REND_AUDIO_CONFIG_TYPE_MASA: + renderMcToMasa( mcInput, outAudio ); + break; +#endif default: return IVAS_ERR_INVALID_OUTPUT_FORMAT; } @@ -5723,6 +6015,25 @@ static ivas_error renderSbaToBinauralRoom( return IVAS_ERR_OK; } +#ifdef MASA_PREREND +static ivas_error renderSbaToMasa( + input_sba *sbaInput, + IVAS_REND_AudioBuffer outAudio ) +{ + float tmpRendBuffer[MAX_OUTPUT_CHANNELS][L_FRAME48k]; + + push_wmops( "renderMcToMasa" ); + + copyBufferTo2dArray( sbaInput->base.inputBuffer, tmpRendBuffer ); + ivas_dirac_ana( sbaInput->hDirAC, tmpRendBuffer, sbaInput->base.inputBuffer.config.numSamplesPerChannel, outAudio.config.numChannels ); + accumulate2dArrayToBuffer( tmpRendBuffer, &outAudio ); + + pop_wmops(); + + return IVAS_ERR_OK; +} +#endif + static ivas_error renderInputSba( input_sba *sbaInput, const IVAS_REND_AudioConfig outConfig, @@ -5769,6 +6080,11 @@ static ivas_error renderInputSba( return IVAS_ERR_INVALID_OUTPUT_FORMAT; } break; +#ifdef MASA_PREREND + case IVAS_REND_AUDIO_CONFIG_TYPE_MASA: + renderSbaToMasa( sbaInput, outAudio ); + break; +#endif default: return IVAS_ERR_INVALID_OUTPUT_FORMAT; } @@ -5905,6 +6221,136 @@ static void renderMasaToBinaural( return; } +#ifdef MASA_PREREND +static void renderMasaToMasa( + input_masa *masaInput, + IVAS_REND_AudioBuffer outAudio ) +{ + int16_t sf, band, dir, numDirs; + float ratioSum; + MASA_DECODER_EXT_OUT_META_HANDLE outMeta; + MASA_METADATA_FRAME *inMeta; + float tmpBuffer[MAX_OUTPUT_CHANNELS][L_FRAME48k]; + int16_t ts, i, j, l_ts; + float Chan_RealBuffer[MASA_MAX_TRANSPORT_CHANNELS][CLDFB_NO_CHANNELS_MAX]; + float Chan_ImagBuffer[MASA_MAX_TRANSPORT_CHANNELS][CLDFB_NO_CHANNELS_MAX]; + + int16_t band_m_idx, block_m_idx; + int16_t mrange[2]; + int16_t brange[2]; + int16_t numAnalysisChannels; + + copyBufferTo2dArray( masaInput->base.inputBuffer, tmpBuffer ); + + /* Calculate energy */ + l_ts = masaInput->base.inputBuffer.config.numSamplesPerChannel / CLDFB_NO_COL_MAX; + numAnalysisChannels = masaInput->hMasaPrerend->num_Cldfb_instances; + + + /* do processing over all CLDFB time slots */ + for ( block_m_idx = 0; block_m_idx < MAX_PARAM_SPATIAL_SUBFRAMES; block_m_idx++ ) + { + mrange[0] = DirAC_block_grouping[block_m_idx]; + mrange[1] = DirAC_block_grouping[block_m_idx + 1]; + + set_zero( masaInput->hMasaPrerend->energy[block_m_idx], MASA_FREQUENCY_BANDS ); + + for ( ts = mrange[0]; ts < mrange[1]; ts++ ) + { + for ( i = 0; i < numAnalysisChannels; i++ ) + { + cldfbAnalysis_ts( &( tmpBuffer[i][l_ts * ts] ), Chan_RealBuffer[i], Chan_ImagBuffer[i], l_ts, masaInput->hMasaPrerend->cldfbAnaEnc[i] ); + } + + /* Compute channel energy for metadata processing */ + for ( band_m_idx = 0; band_m_idx < MASA_FREQUENCY_BANDS; band_m_idx++ ) + { + brange[0] = MASA_band_grouping_24[band_m_idx]; + brange[1] = MASA_band_grouping_24[band_m_idx + 1]; + for ( j = brange[0]; j < brange[1]; j++ ) + { + for ( i = 0; i < numAnalysisChannels; i++ ) + { + masaInput->hMasaPrerend->energy[block_m_idx][band_m_idx] += Chan_RealBuffer[0][j] * Chan_RealBuffer[0][j] + Chan_ImagBuffer[0][j] * Chan_ImagBuffer[0][j]; + } + } + } + } + } + + /* Copy audio channels if mismatch in number of transports */ + if ( masaInput->base.inputBuffer.config.numChannels == 1 && outAudio.config.numChannels == 2 ) + { + mvr2r( tmpBuffer[0], tmpBuffer[1], masaInput->base.inputBuffer.config.numSamplesPerChannel ); + } + else if ( masaInput->base.inputBuffer.config.numChannels == 2 && outAudio.config.numChannels == 1 ) + { + v_add( tmpBuffer[0], tmpBuffer[1], tmpBuffer[0], masaInput->base.inputBuffer.config.numSamplesPerChannel ); + } + + /* Copy metadata */ + outMeta = masaInput->hMasaPrerend->hMasaOut; + inMeta = &masaInput->masaMetadata; + numDirs = inMeta->descriptive_meta.numberOfDirections + 1; + + for ( sf = 0; sf < MAX_PARAM_SPATIAL_SUBFRAMES; sf++ ) + { + for ( band = 0; band < MASA_FREQUENCY_BANDS; band++ ) + { + /* Remainder is always set to zero and energy removal is compensated in following steps + * to other ratios. */ + inMeta->common_meta.remainder_to_total_ratio[sf][band] = 0.0f; + + ratioSum = 0; + for ( dir = 0; dir < numDirs; dir++ ) + { + ratioSum += inMeta->directional_meta[dir].energy_ratio[sf][band]; + } + ratioSum += inMeta->common_meta.diffuse_to_total_ratio[sf][band]; + + if ( ratioSum == 0.0f ) + { + for ( dir = 0; dir < numDirs; dir++ ) + { + inMeta->directional_meta[dir].energy_ratio[sf][band] = 0.0f; + } + inMeta->common_meta.diffuse_to_total_ratio[sf][band] = 1.0f; + } + else if ( ratioSum != 1.0f ) + { + for ( dir = 0; dir < numDirs; dir++ ) + { + inMeta->directional_meta[dir].energy_ratio[sf][band] /= ratioSum; + } + inMeta->common_meta.diffuse_to_total_ratio[sf][band] /= ratioSum; + } + } + } + + for ( sf = 0; sf < MAX_PARAM_SPATIAL_SUBFRAMES; sf++ ) + { + for ( band = 0; band < MASA_FREQUENCY_BANDS; band++ ) + { + outMeta->diffuseToTotalRatio[sf][band] = UINT8_MAX; + for ( dir = 0; dir < numDirs; dir++ ) + { + outMeta->directionIndex[dir][sf][band] = index_theta_phi_16( &inMeta->directional_meta[dir].elevation[sf][band], &inMeta->directional_meta[dir].azimuth[sf][band], masaInput->hMasaPrerend->sph_grid16 ); + outMeta->directToTotalRatio[dir][sf][band] = (uint8_t) floorf( inMeta->directional_meta[dir].energy_ratio[sf][band] * UINT8_MAX ); + outMeta->diffuseToTotalRatio[sf][band] -= outMeta->directToTotalRatio[dir][sf][band]; + outMeta->spreadCoherence[dir][sf][band] = (uint8_t) floorf( inMeta->directional_meta[dir].spread_coherence[sf][band] * UINT8_MAX ); + } + outMeta->surroundCoherence[sf][band] = (uint8_t) floorf( inMeta->common_meta.surround_coherence[sf][band] * UINT8_MAX ); + } + } + + copy_masa_descriptive_meta( &( outMeta->descriptiveMeta ), &( inMeta->descriptive_meta ) ); + + accumulate2dArrayToBuffer( tmpBuffer, &outAudio ); + + return; +} +#endif + static ivas_error renderInputMasa( input_masa *masaInput, const IVAS_REND_AudioConfig outConfig, @@ -5954,6 +6400,11 @@ static ivas_error renderInputMasa( return IVAS_ERR_INVALID_OUTPUT_FORMAT; } break; +#ifdef MASA_PREREND + case IVAS_REND_AUDIO_CONFIG_TYPE_MASA: + renderMasaToMasa( masaInput, outAudio ); + break; +#endif default: return IVAS_ERR_INVALID_OUTPUT_FORMAT; } @@ -5987,6 +6438,152 @@ static ivas_error renderActiveInputsMasa( } +#ifdef MASA_PREREND +/*---------------------------------------------------------------------* + * IVAS_REND_GetMasaMetadata( ) + * + * Get metadata of the estimated MASA frame + *---------------------------------------------------------------------*/ + +ivas_error IVAS_REND_GetMasaMetadata( + IVAS_REND_HANDLE hIvasRend, /* i/o: IVAS renderer handle */ + MASA_DECODER_EXT_OUT_META_HANDLE *hMasaExtOutMeta, /* o : pointer to handle, which will be set to point to analyzed MASA metadata */ + IVAS_REND_AudioConfigType inputType /* i : Input type */ +) +{ + if ( hIvasRend == NULL ) + { + return IVAS_ERR_UNEXPECTED_NULL_POINTER; + } + + /* Get the metadata handle */ + if ( inputType == IVAS_REND_AUDIO_CONFIG_TYPE_OBJECT_BASED ) + { + *hMasaExtOutMeta = hIvasRend->inputsIsm->hOMasa->hMasaOut; + } + else if ( inputType == IVAS_REND_AUDIO_CONFIG_TYPE_CHANNEL_BASED ) + { + *hMasaExtOutMeta = hIvasRend->inputsMc->hMcMasa->hMasaOut; + } + else if ( inputType == IVAS_REND_AUDIO_CONFIG_TYPE_AMBISONICS ) + { + *hMasaExtOutMeta = hIvasRend->inputsSba->hDirAC->hMasaOut; + } + else + { + return IVAS_ERR_NOT_SUPPORTED_OPTION; + } + + return IVAS_ERR_OK; +} + + +/*---------------------------------------------------------------------* + * IVAS_REND_MergeMasaMetadata( ) + * + * Merge MASA metadata from two formats + *---------------------------------------------------------------------*/ + +ivas_error IVAS_REND_MergeMasaMetadata( + IVAS_REND_HANDLE hIvasRend, /* i/o: IVAS renderer handle */ + MASA_DECODER_EXT_OUT_META_HANDLE *hMasaExtOutMeta, /* o : pointer to handle, which will be set to point to merged metadata */ + IVAS_REND_AudioConfigType inputType1, /* i : Input type 1 */ + IVAS_REND_AudioConfigType inputType2 /* i : Input type 2 */ +) +{ + MASA_DECODER_EXT_OUT_META_HANDLE inMeta2; + float( *inEne1 )[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS]; + float( *inEne2 )[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS]; + + if ( hIvasRend == NULL ) + { + return IVAS_ERR_UNEXPECTED_NULL_POINTER; + } + + /* Input1 metadata and energy */ + if ( inputType1 == IVAS_REND_AUDIO_CONFIG_TYPE_OBJECT_BASED ) + { + *hMasaExtOutMeta = hIvasRend->inputsIsm->hOMasa->hMasaOut; + inEne1 = &( hIvasRend->inputsIsm->hOMasa->energy ); + } + else if ( inputType1 == IVAS_REND_AUDIO_CONFIG_TYPE_CHANNEL_BASED ) + { + *hMasaExtOutMeta = hIvasRend->inputsMc->hMcMasa->hMasaOut; + inEne1 = &( hIvasRend->inputsMc->hMcMasa->energy ); + } + else if ( inputType1 == IVAS_REND_AUDIO_CONFIG_TYPE_AMBISONICS ) + { + *hMasaExtOutMeta = hIvasRend->inputsSba->hDirAC->hMasaOut; + inEne1 = &( hIvasRend->inputsSba->hDirAC->energy ); + } + else if ( inputType1 == IVAS_REND_AUDIO_CONFIG_TYPE_MASA ) + { + *hMasaExtOutMeta = hIvasRend->inputsMasa->hMasaPrerend->hMasaOut; + inEne1 = &( hIvasRend->inputsMasa->hMasaPrerend->energy ); + } + else + { + return IVAS_ERR_NOT_SUPPORTED_OPTION; + } + + /* Input2 metadata and energy */ + if ( inputType2 == IVAS_REND_AUDIO_CONFIG_TYPE_OBJECT_BASED ) + { + inMeta2 = hIvasRend->inputsIsm->hOMasa->hMasaOut; + inEne2 = &( hIvasRend->inputsIsm->hOMasa->energy ); + } + else if ( inputType2 == IVAS_REND_AUDIO_CONFIG_TYPE_CHANNEL_BASED ) + { + inMeta2 = hIvasRend->inputsMc->hMcMasa->hMasaOut; + inEne2 = &( hIvasRend->inputsMc->hMcMasa->energy ); + } + else if ( inputType2 == IVAS_REND_AUDIO_CONFIG_TYPE_AMBISONICS ) + { + inMeta2 = hIvasRend->inputsSba->hDirAC->hMasaOut; + inEne2 = &( hIvasRend->inputsSba->hDirAC->energy ); + } + else if ( inputType2 == IVAS_REND_AUDIO_CONFIG_TYPE_MASA ) + { + inMeta2 = hIvasRend->inputsMasa->hMasaPrerend->hMasaOut; + inEne2 = &( hIvasRend->inputsMasa->hMasaPrerend->energy ); + } + else + { + return IVAS_ERR_NOT_SUPPORTED_OPTION; + } + + /* Merge metadata */ + ivas_prerend_merge_masa_metadata( *hMasaExtOutMeta, *hMasaExtOutMeta, inputType1, *inEne1, inMeta2, inputType2, *inEne2 ); + ( *hMasaExtOutMeta )->descriptiveMeta.numberOfChannels = hIvasRend->outputConfig == IVAS_REND_AUDIO_CONFIG_MASA1 ? 0u : 1u; + + + return IVAS_ERR_OK; +} + + +/*---------------------------------------------------------------------* + * IVAS_REND_SetTotalNumberOfObjects( ) + * + * Set the total number of objects to the first object data + *---------------------------------------------------------------------*/ + +ivas_error IVAS_REND_SetTotalNumberOfObjects( + IVAS_REND_HANDLE hIvasRend, /* i/o: IVAS renderer handle */ + const uint16_t total_num_objects /* i: total number of objects */ +) +{ + if ( hIvasRend == NULL ) + { + return IVAS_ERR_UNEXPECTED_NULL_POINTER; + } + + hIvasRend->inputsIsm[0].total_num_objects = total_num_objects; + + return IVAS_ERR_OK; +} +#endif + + /*-------------------------------------------------------------------* * IVAS_REND_GetSamples() * @@ -6024,6 +6621,39 @@ ivas_error IVAS_REND_GetSamples( return IVAS_ERR_INVALID_BUFFER_SIZE; } +#ifdef MASA_PREREND + /* Check that there is allowed configuration for MASA format output */ + if ( getAudioConfigType( hIvasRend->outputConfig ) == IVAS_REND_AUDIO_CONFIG_TYPE_MASA ) + { + int16_t i; + int16_t numMasaInputs = 0; + int16_t numOtherInputs = 0; + + for ( i = 0; i < RENDERER_MAX_MASA_INPUTS; i++ ) + { + numMasaInputs += hIvasRend->inputsMasa[i].base.inConfig == IVAS_REND_AUDIO_CONFIG_UNKNOWN ? 0 : 1; + } + + for ( i = 0; i < RENDERER_MAX_MC_INPUTS; i++ ) + { + numOtherInputs += hIvasRend->inputsMc[i].base.inConfig == IVAS_REND_AUDIO_CONFIG_UNKNOWN ? 0 : 1; + } + + for ( i = 0; i < RENDERER_MAX_SBA_INPUTS; i++ ) + { + numOtherInputs += hIvasRend->inputsSba[i].base.inConfig == IVAS_REND_AUDIO_CONFIG_UNKNOWN ? 0 : 1; + } + + /* For ISM, we check only first as all ISMs are handled together via OMASA when merging to MASA. */ + numOtherInputs += hIvasRend->inputsIsm[0].base.inConfig == IVAS_REND_AUDIO_CONFIG_UNKNOWN ? 0 : 1; + + if ( numMasaInputs == 0 || numOtherInputs == 0 ) + { + return IVAS_ERR_IO_CONFIG_PAIR_NOT_SUPPORTED; + } + } +#endif + if ( ( error = IVAS_REND_NumOutChannels( hIvasRend, &numOutChannels ) ) != IVAS_ERR_OK ) { return error; diff --git a/lib_rend/lib_rend.h b/lib_rend/lib_rend.h index 406fa83e8744c59875aa19743944c0d6172ede27..c8873ecb6f776adbeafd18306f760ca846fe7ef8 100644 --- a/lib_rend/lib_rend.h +++ b/lib_rend/lib_rend.h @@ -222,6 +222,14 @@ ivas_error IVAS_REND_FeedInputObjectMetadata( const IVAS_REND_AudioObjectPosition objectPosition /* i : object position struct */ ); +#ifdef MASA_PREREND +ivas_error IVAS_REND_FeedInputObjectMetadataToOMasa( + IVAS_REND_HANDLE hIvasRend, /* i/o: Renderer handle */ + const int16_t inputIndex, /* i : Index of the input */ + const IVAS_REND_AudioObjectPosition objectPosition /* i : object position struct */ +); +#endif + ivas_error IVAS_REND_FeedInputMasaMetadata( IVAS_REND_HANDLE hIvasRend, /* i/o: Renderer handle */ const IVAS_REND_InputId inputId, /* i : ID of the input */ @@ -295,6 +303,31 @@ ivas_error IVAS_REND_GetCombinedOrientation( ); #endif +#ifdef MASA_PREREND +ivas_error IVAS_REND_GetMasaMetadata( + IVAS_REND_HANDLE hIvasRend, /* i/o: IVAS renderer handle */ + MASA_DECODER_EXT_OUT_META_HANDLE *hMasaExtOutMeta, /* o : pointer to handle, which will be set to point to analyzed MASA metadata */ + IVAS_REND_AudioConfigType inputType /* i : Input type */ +); + +ivas_error IVAS_REND_MergeMasaMetadata( + IVAS_REND_HANDLE hIvasRend, /* i/o: IVAS renderer handle */ + MASA_DECODER_EXT_OUT_META_HANDLE *hMasaExtOutMeta, /* o : pointer to handle, which will be set to point to merged metadata */ + IVAS_REND_AudioConfigType inputType1, /* i : Input type 1 */ + IVAS_REND_AudioConfigType inputType2 /* i : Input type 2 */ +); + +ivas_error IVAS_REND_SetTotalNumberOfObjects( + IVAS_REND_HANDLE hIvasRend, /* i/o: IVAS renderer handle */ + const uint16_t total_num_objects /* i: total number of objects */ +); + +ivas_error IVAS_REND_GetNumAllObjects( + IVAS_REND_CONST_HANDLE hIvasRend, /* i : Renderer handle */ + int16_t *numChannels /* o : number of all objects */ +); +#endif + ivas_error IVAS_REND_GetSamples( IVAS_REND_HANDLE hIvasRend, /* i/o: Renderer handle */ IVAS_REND_AudioBuffer outAudio /* i/o: buffer for output audio */