From ce428ca7eb8f2b74ea689fbbbd761fcf6cccf9e7 Mon Sep 17 00:00:00 2001 From: azmill Date: Thu, 27 Jul 2023 15:23:35 +1000 Subject: [PATCH 1/7] Adding in mono detection and processing to stop leakage --- lib_com/ivas_prot.h | 18 ++++ lib_com/ivas_stat_com.h | 3 + lib_com/options.h | 3 +- lib_dec/ivas_spar_md_dec.c | 120 ++++++++++++++++++++++++ lib_enc/ivas_dirac_enc.c | 176 +++++++++++++++++++++++++++++++++++- lib_enc/ivas_mcmasa_enc.c | 8 +- lib_enc/ivas_spar_encoder.c | 45 +++++++-- lib_enc/ivas_spar_md_enc.c | 51 +++++++++++ lib_enc/ivas_stat_enc.h | 2 + 9 files changed, 413 insertions(+), 13 deletions(-) diff --git a/lib_com/ivas_prot.h b/lib_com/ivas_prot.h index 885de08281..56dccb18b8 100644 --- a/lib_com/ivas_prot.h +++ b/lib_com/ivas_prot.h @@ -3375,6 +3375,11 @@ void ivas_dirac_param_est_enc( const IVAS_FORMAT ivas_format , const int16_t hodirac_flag, const int16_t nchan_fb_in +#ifdef FIX_527_SBA_MONO_INPUT + , + int16_t *mono_frame_count, + int16_t *dirac_mono_flag +#endif ); @@ -4327,6 +4332,9 @@ ivas_error ivas_spar_md_enc_process( const int16_t nchan_inp, const int16_t sba_order, /* i : Ambisonic (SBA) order */ float *prior_mixer[IVAS_MAX_FB_MIXER_OUT_CH][IVAS_MAX_SPAR_FB_MIXER_IN_CH] /* i : prior mixer_matrix */ +#ifdef FIX_527_SBA_MONO_INPUT + ,const int16_t dirac_mono_flag +#endif ); void ivas_compute_spar_params( @@ -4466,6 +4474,11 @@ void ivas_spar_update_md_hist( ivas_spar_md_dec_state_t *hMdDec /* i/o: SPAR MD decoder handle */ ); +int16_t ivas_spar_chk_zero_coefs( + Decoder_Struct *st_ivas, /* i/o: IVAS decoder handle */ + const int16_t sba_order /* i : Ambisonic (SBA) order */ +); + void ivas_spar_smooth_md_dtx( ivas_spar_md_dec_state_t *hMdDec, /* i/o: SPAR MD decoder handle */ const int16_t num_bands_out, /* i : number of output bands */ @@ -5345,6 +5358,11 @@ void computeReferencePower_enc( const IVAS_FORMAT ivas_format, /* i : ivas_format */ int16_t ref_power_w, /* i : use 0 if hodirac is enabled */ const int16_t nchan_ana /* i : number of analysis channels */ +#ifdef FIX_527_SBA_MONO_INPUT + , + int16_t *mono_frame_count, + int16_t *dirac_mono_flag +#endif ); ivas_error ivas_mono_dmx_renderer_open( diff --git a/lib_com/ivas_stat_com.h b/lib_com/ivas_stat_com.h index 6d2ed4c4e9..e239426f84 100644 --- a/lib_com/ivas_stat_com.h +++ b/lib_com/ivas_stat_com.h @@ -540,6 +540,9 @@ typedef struct ivas_masa_qmetadata_frame_struct int16_t metadata_max_bits; /* maximum allowed number of bits for metadata per frame */ uint8_t useLowerRes; uint8_t useLowerBandRes; +#ifdef FIX_527_SBA_MONO_INPUT + int16_t dirac_mono_flag; +#endif IVAS_SURROUND_COHERENCE_BAND_DATA *surcoh_band_data; /* Additional helper values to include all data required for writing to output file */ diff --git a/lib_com/options.h b/lib_com/options.h index c2bc0b866b..359aea915d 100755 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -210,8 +210,9 @@ #define FIX_626_VARIABLE_TYPE_MDCT_CONC /* FhG: trivial fix to fix USAN error */ #define FIX_616_DIV_ZERO_MCT /*FhG : Fix UBSAN division by zero error of issue 616*/ -/* ################## End BE DEVELOPMENT switches ######################### */ +#define FIX_527_SBA_MONO_INPUT /*Dlb : Fix for mono content in a HOA input format */ +/* ################## End BE DEVELOPMENT switches ######################### */ /* #################### Start NON-BE CR switches ########################## */ /* any switch which is non-be wrt operation points tested in selection */ diff --git a/lib_dec/ivas_spar_md_dec.c b/lib_dec/ivas_spar_md_dec.c index c61effd673..9e71c35314 100644 --- a/lib_dec/ivas_spar_md_dec.c +++ b/lib_dec/ivas_spar_md_dec.c @@ -725,6 +725,63 @@ static ivas_error ivas_spar_set_dec_config( return IVAS_ERR_OK; } +/*-----------------------------------------------------------------------------------------* + * Function ivas_dec_mono_sba_handling() + * + * + *-----------------------------------------------------------------------------------------*/ + +static void ivas_dec_mono_sba_handling( + Decoder_Struct *st_ivas, /* i/o: IVAS decoder handle */ + const int16_t sba_order /* i : Ambisonic (SBA) order */ +) +{ + int16_t mono_flag, b, block; + + mono_flag = 1; + + for ( b = 0; b < st_ivas->hQMetaData->q_direction[0].cfg.nbands; b++ ) + { + for ( block = 0; block < MAX_PARAM_SPATIAL_SUBFRAMES; ++block ) + { + float azimuth = st_ivas->hQMetaData->q_direction[0].band_data[b].azimuth[block]; + float elevation = st_ivas->hQMetaData->q_direction[0].band_data[b].azimuth[block]; + float energy_ratio = st_ivas->hQMetaData->q_direction[0].band_data[0].energy_ratio[block]; + if ( + ( azimuth != 0.0f ) || + ( elevation != 0.0f ) || + ( energy_ratio > 0.15f ) ) /* 0.15f is just above the lowest quantised value. */ + { + mono_flag = 0; + } + } + } + /* Combine the SPAR prediction coefs flag with the azimuth, elevation and energy ratio flag.*/ + mono_flag = mono_flag && ivas_spar_chk_zero_coefs( st_ivas, sba_order ); + + if ( mono_flag ) + { + /* Set Energy Ratio values to be zero */ + for ( b = 0; b < st_ivas->hQMetaData->q_direction[0].cfg.nbands; b++ ) + { + set_zero( st_ivas->hQMetaData->q_direction[0].band_data[b].energy_ratio, MAX_PARAM_SPATIAL_SUBFRAMES ); + } + if ( st_ivas->hDirAC != NULL ) + { + for ( block = 0; block < st_ivas->hSpatParamRendCom->dirac_md_buffer_length; ++block ) + { + /* Set directional Energy Ratio values to be zero */ + set_zero( st_ivas->hSpatParamRendCom->energy_ratio1[block], st_ivas->hSpatParamRendCom->num_freq_bands ); + if ( st_ivas->hQMetaData->no_directions == 2 ) + { + set_zero( st_ivas->hSpatParamRendCom->energy_ratio2[block], st_ivas->hSpatParamRendCom->num_freq_bands ); + } + /* Set Diffuseness values to be 1.0 */ + set_f( st_ivas->hSpatParamRendCom->diffuseness_vector[block], 1.0f, st_ivas->hSpatParamRendCom->num_freq_bands ); + } + } + } +} /*-----------------------------------------------------------------------------------------* * Function ivas_spar_md_dec_process() @@ -764,6 +821,10 @@ void ivas_spar_md_dec_process( #endif st_ivas->hQMetaData->sba_inactive_mode, st_ivas->last_active_ivas_total_brate ); +#ifdef FIX_527_SBA_MONO_INPUT + ivas_dec_mono_sba_handling( st_ivas, sba_order ); +#endif + #if 0 { char f_name[100]; @@ -1090,7 +1151,66 @@ void ivas_spar_md_dec_process( return; } +/*-----------------------------------------------------------------------------------------* + * Function ivas_spar_chk_zero_coefs() + * + * Check for zeroed SPAR coefficients + *-----------------------------------------------------------------------------------------*/ +int16_t ivas_spar_chk_zero_coefs( + Decoder_Struct *st_ivas, /* i/o: IVAS decoder handle */ + const int16_t sba_order /* i : Ambisonic (SBA) order */ +) +{ + int16_t j, k, b, i_ts; + ivas_spar_md_dec_state_t *hMdDec; + int16_t num_md_sub_frames; + int16_t mono = 1; + + hMdDec = st_ivas->hSpar->hMdDec; + num_md_sub_frames = ivas_get_spar_dec_md_num_subframes( sba_order, st_ivas->hDecoderConfig->ivas_total_brate +#ifdef VLBR_20MS_MD + , + st_ivas->last_active_ivas_total_brate +#endif + ); + + int16_t ndec = hMdDec->spar_md_cfg.num_decorr_per_band[0]; + int16_t ndm = hMdDec->spar_md_cfg.num_dmx_chans_per_band[0]; + + for ( i_ts = 0; i_ts < num_md_sub_frames; i_ts++ ) + { + for ( b = 0; b < min( hMdDec->spar_md.num_bands, SPAR_DIRAC_SPLIT_START_BAND ); b++ ) + { + for ( j = 0; j < ndm + ndec - 1; j++ ) + { + // printf("%e, ", hMdDec->spar_md.band_coeffs[b + i_ts * IVAS_MAX_NUM_BANDS].pred_re[j]); + if ( hMdDec->spar_md.band_coeffs[b + i_ts * IVAS_MAX_NUM_BANDS].pred_re[j] != 0.0f ) + { + mono = 0; + } + } + for ( j = 0; j < ndec; j++ ) + { + for ( k = 0; k < ndm - 1; k++ ) + { + if ( hMdDec->spar_md.band_coeffs[b + i_ts * IVAS_MAX_NUM_BANDS].C_re[j][k] != 0.0f ) + { + mono = 0; + } + } + } + for ( j = 0; j < ndec; j++ ) + { + if ( hMdDec->spar_md.band_coeffs[b + i_ts * IVAS_MAX_NUM_BANDS].P_re[j] != 0.0f ) + { + mono = 0; + } + } + } + } + return mono; +} /*-----------------------------------------------------------------------------------------* * Function ivas_spar_smooth_md_dtx() diff --git a/lib_enc/ivas_dirac_enc.c b/lib_enc/ivas_dirac_enc.c index 1a25541d31..29c6642db7 100644 --- a/lib_enc/ivas_dirac_enc.c +++ b/lib_enc/ivas_dirac_enc.c @@ -159,6 +159,7 @@ ivas_error ivas_dirac_enc_open( } hDirAC->index_buffer_intensity = 0; + hDirAC->mono_frame_count = 0; st_ivas->hDirAC = hDirAC; st_ivas->hSpar->enc_param_start_band = st_ivas->hDirAC->hConfig->enc_param_start_band; @@ -299,7 +300,13 @@ void ivas_dirac_enc( int16_t i, j, b, i_ts; push_wmops( "ivas_dirac_enc" ); - ivas_dirac_param_est_enc( hDirAC, hQMetaData->q_direction, hQMetaData->useLowerRes, data_f, ppIn_FR_real, ppIn_FR_imag, input_frame, ivas_format, hodirac_flag, hodirac_flag ? HOA2_CHANNELS : FOA_CHANNELS ); + ivas_dirac_param_est_enc( hDirAC, hQMetaData->q_direction, hQMetaData->useLowerRes, data_f, ppIn_FR_real, ppIn_FR_imag, input_frame, ivas_format, hodirac_flag, hodirac_flag ? HOA2_CHANNELS : FOA_CHANNELS +#ifdef FIX_527_SBA_MONO_INPUT + , + &( hDirAC->mono_frame_count ), + &( hQMetaData->dirac_mono_flag ) +#endif + ); if ( hQMetaData->q_direction->cfg.nbands > 0 ) { @@ -310,6 +317,22 @@ void ivas_dirac_enc( /* WB 4TC mode bit : disable for now*/ push_next_indice( hMetaData, 0, 1 ); +#ifdef FIX_527_SBA_MONO_INPUT + /* Set Energy Ratio to 0.0 if the mono flag is set, before the metadata is encoded */ + if ( hQMetaData->dirac_mono_flag ) + { + for ( b = hQMetaData->q_direction->cfg.start_band; b < hQMetaData->q_direction->cfg.nbands; b++ ) + { + for ( i_ts = 0; i_ts < ( ( dtx_vad == 1 ) ? hQMetaData->q_direction[0].cfg.nblocks : 1 ); i_ts++ ) + { + hQMetaData->q_direction[0].band_data[b].energy_ratio[i_ts] = 0.0f; + hQMetaData->q_direction[0].band_data[b].azimuth[i_ts] = 0.0f; + hQMetaData->q_direction[0].band_data[b].elevation[i_ts] = 0.0f; + } + } + } +#endif + ivas_qmetadata_enc_encode( hMetaData, hQMetaData, hodirac_flag ); } else @@ -388,6 +411,125 @@ void ivas_dirac_enc( return; } +/*------------------------------------------------------------------------- + * ivas_dirac_get_mono_flag() + * + * + *-------------------------------------------------------------------------*/ + +static int16_t ivas_dirac_get_mono_flag( + const int16_t *band_grouping, /* i : Band grouping for estimation */ + float Cldfb_RealBuffer[DIRAC_MAX_ANA_CHANS][DIRAC_NO_FB_BANDS_MAX], /* i : Real part of input signal */ + float Cldfb_ImagBuffer[DIRAC_MAX_ANA_CHANS][DIRAC_NO_FB_BANDS_MAX], /* i : Imag part of input signal */ + const int16_t nchan_ana, /* i : number of analysis channels */ + int16_t *mono_frame_count ) /* i/o : current number of mono frames count */ +{ + int16_t brange[2]; + int16_t i, j, ch_idx; + float other_ch_band_power; + float W_band_power; + int16_t any_mc_band = 0; + int16_t any_mono_band = 0; + int16_t local_mono_flag = 0; + float W_band_power_norm; + float threshold = 0; + + static const float THRESH_SILENCE = 3e4f; + static const float NORM_FACTOR = 1e13f; + static const float ONE_ON_NORM_FACTOR = 1 / 1e13f; + static const float MAX_THRESH = 1e6f; + static const float MIN_THRESH = 1e2f; + static const float MONO_FRAME_THRESH = ( 15 ); /* 30ms */ + + /* Banded Power Calculations */ + for ( i = 0; i < DIRAC_MAX_NBANDS; i++ ) + { + W_band_power = 0; + other_ch_band_power = 0; + + brange[0] = band_grouping[i]; + brange[1] = band_grouping[i + 1]; + + /* Loop over the W channel bins to calculate the power in the band */ + for ( j = brange[0]; j < brange[1]; j++ ) + { + W_band_power += ( Cldfb_RealBuffer[0][j] * Cldfb_RealBuffer[0][j] ) + ( Cldfb_ImagBuffer[0][j] * Cldfb_ImagBuffer[0][j] ); + } + + /* Loop over the other channels and bins to calculate the power in the band */ + for ( ch_idx = 1; ch_idx < nchan_ana; ch_idx++ ) + { + /* abs()^2 */ + for ( j = brange[0]; j < brange[1]; j++ ) + { + other_ch_band_power += ( Cldfb_RealBuffer[ch_idx][j] * Cldfb_RealBuffer[ch_idx][j] ) + ( Cldfb_ImagBuffer[ch_idx][j] * Cldfb_ImagBuffer[ch_idx][j] ); + } + } + if ( other_ch_band_power < EPSILON ) + { + if ( W_band_power > THRESH_SILENCE ) + { + any_mono_band = 1; + } + } + else + { + if ( ( W_band_power > THRESH_SILENCE ) || ( other_ch_band_power > THRESH_SILENCE ) ) + { + W_band_power_norm = min( W_band_power, NORM_FACTOR ) * ONE_ON_NORM_FACTOR; + threshold = max( W_band_power_norm * MAX_THRESH, MIN_THRESH ); + if ( W_band_power / other_ch_band_power > threshold ) + { + any_mono_band = 1; + } + else + { + any_mc_band = 1; + } + } + } + } + /* If any band contains multi-channel content it's not mono */ + if ( any_mc_band ) + { + local_mono_flag = 0; + } + else + { + /* If any band contains mono content the frame is mono. */ + if ( any_mono_band ) + { + local_mono_flag = 1; + } + } + /* Hysteresis - only after MONO_FRAME_THRESH frames on mono will the actual mono flag be set */ + if ( local_mono_flag ) + { + if ( *mono_frame_count < MONO_FRAME_THRESH ) + { + ( *mono_frame_count )++; + } + } + else + { + /* Instantaneously disable actual mono flag if multi-channel content is observed */ + if ( any_mc_band ) + { + *mono_frame_count = 0; + } + } + + /* Final check if there has been mono for MONO_FRAME_THRESH number of frames than the content is declared mono */ + if ( *mono_frame_count == MONO_FRAME_THRESH ) + { + return 1; + } + else + { + return 0; + } +} + /*------------------------------------------------------------------------- * computeReferencePower_enc() * @@ -401,9 +543,14 @@ void computeReferencePower_enc( float *reference_power, /* o : Estimated power */ const int16_t enc_param_start_band, /* i : first band to process */ const int16_t num_freq_bands, /* i : Number of frequency bands */ - const IVAS_FORMAT ivas_format, /* i : ivas_format */ + const IVAS_FORMAT ivas_format, /* i : ivas_format */ int16_t ref_power_w, /* i : use 0 if hodirac is enabled */ const int16_t nchan_ana /* i : number of analysis channels */ +#ifdef FIX_527_SBA_MONO_INPUT + , + int16_t *mono_frame_count, /* i/o: Mono Frame Count */ + int16_t *dirac_mono_flag /* i/o: Mono Flag */ +#endif ) { int16_t brange[2]; @@ -411,10 +558,18 @@ void computeReferencePower_enc( float reference_power_W[DIRAC_MAX_NBANDS]; +#ifdef FIX_527_SBA_MONO_INPUT + if ( dirac_mono_flag != NULL ) + { + *dirac_mono_flag = ivas_dirac_get_mono_flag( band_grouping, Cldfb_RealBuffer, Cldfb_ImagBuffer, nchan_ana, mono_frame_count ); + } +#endif + for ( i = 0; i < num_freq_bands; i++ ) { brange[0] = band_grouping[i + enc_param_start_band]; brange[1] = band_grouping[i + enc_param_start_band + 1]; + reference_power[i] = 0; reference_power_W[i] = 0; @@ -446,7 +601,6 @@ void computeReferencePower_enc( return; } - /*------------------------------------------------------------------------- * ivas_dirac_param_est_enc() * @@ -463,7 +617,13 @@ void ivas_dirac_param_est_enc( const int16_t input_frame, const IVAS_FORMAT ivas_format, const int16_t hodirac_flag, - const int16_t nchan_fb_in ) + const int16_t nchan_fb_in +#ifdef FIX_527_SBA_MONO_INPUT + , + int16_t *mono_frame_count, + int16_t *dirac_mono_flag +#endif +) { int16_t i, d, ts, index, l_ts, num_freq_bands; int16_t band_m_idx, block_m_idx; @@ -571,7 +731,13 @@ void ivas_dirac_param_est_enc( num_freq_bands, ivas_format, hodirac_flag ? 0 : 1, - FOA_CHANNELS ); + FOA_CHANNELS +#ifdef FIX_527_SBA_MONO_INPUT + , + mono_frame_count, + dirac_mono_flag +#endif + ); computeIntensityVector_enc( hDirAC, diff --git a/lib_enc/ivas_mcmasa_enc.c b/lib_enc/ivas_mcmasa_enc.c index 9dbdbcab15..2088fa7dce 100644 --- a/lib_enc/ivas_mcmasa_enc.c +++ b/lib_enc/ivas_mcmasa_enc.c @@ -979,7 +979,13 @@ void ivas_mcmasa_param_est_enc( num_freq_bands, MC_FORMAT, 0, - FOA_CHANNELS ); + FOA_CHANNELS +#ifdef FIX_527_SBA_MONO_INPUT + , + NULL, + NULL +#endif + ); /* Fill buffers of length "averaging_length" time slots for intensity and energy */ hMcMasa->index_buffer_intensity = ( hMcMasa->index_buffer_intensity % hMcMasa->no_col_avg_diff ) + 1; /* averaging_length = 32 */ diff --git a/lib_enc/ivas_spar_encoder.c b/lib_enc/ivas_spar_encoder.c index 7233234f49..3c62084138 100644 --- a/lib_enc/ivas_spar_encoder.c +++ b/lib_enc/ivas_spar_encoder.c @@ -444,7 +444,12 @@ static ivas_error ivas_spar_cov_md_process( if ( hSpar->hMdEnc->spar_hoa_md_flag == 0 ) { - ivas_spar_md_enc_process( hSpar->hMdEnc, hEncoderConfig, cov_real, cov_dtx_real, hMetaData, dtx_vad, nchan_inp, sba_order, hSpar->hFbMixer->prior_mixer ); + ivas_spar_md_enc_process( hSpar->hMdEnc, hEncoderConfig, cov_real, cov_dtx_real, hMetaData, dtx_vad, nchan_inp, sba_order, hSpar->hFbMixer->prior_mixer +#ifdef FIX_527_SBA_MONO_INPUT + , + hQMetaData->dirac_mono_flag +#endif + ); } if ( hSpar->hMdEnc->spar_hoa_dirac2spar_md_flag ) @@ -492,7 +497,12 @@ static ivas_error ivas_spar_cov_md_process( if ( hSpar->hMdEnc->spar_hoa_md_flag ) { - ivas_spar_md_enc_process( hSpar->hMdEnc, hEncoderConfig, cov_real, cov_dtx_real, hMetaData, dtx_vad, nchan_inp, sba_order, hSpar->hFbMixer->prior_mixer ); + ivas_spar_md_enc_process( hSpar->hMdEnc, hEncoderConfig, cov_real, cov_dtx_real, hMetaData, dtx_vad, nchan_inp, sba_order, hSpar->hFbMixer->prior_mixer +#ifdef FIX_527_SBA_MONO_INPUT + , + hQMetaData->dirac_mono_flag +#endif + ); } return error; @@ -535,7 +545,7 @@ static ivas_error ivas_spar_enc_process( const int16_t *order; SPAR_ENC_HANDLE hSpar = st_ivas->hSpar; IVAS_QMETADATA_HANDLE hQMetaData = st_ivas->hQMetaData; - int16_t ts, l_ts, num_del_samples; + int16_t ts, l_ts, num_del_samples, b, i_ts; float *ppIn_FR_real[IVAS_SPAR_MAX_CH], *ppIn_FR_imag[IVAS_SPAR_MAX_CH]; float wyzx_del_buf[FOA_CHANNELS][IVAS_FB_1MS_48K_SAMP]; @@ -594,7 +604,7 @@ static ivas_error ivas_spar_enc_process( /* fill delay (1 ms) buffer for all Transport channels */ for ( i = 0; i < hSpar->hFbMixer->fb_cfg->num_out_chans; i++ ) { - int idx = hSpar->hFbMixer->fb_cfg->remix_order[i]; + int16_t idx = hSpar->hFbMixer->fb_cfg->remix_order[i]; mvr2r( &hSpar->hFbMixer->ppFilterbank_prior_input[idx][hSpar->hFbMixer->fb_cfg->prior_input_length - num_del_samples], wyzx_del_buf[idx], num_del_samples ); } } @@ -653,6 +663,19 @@ static ivas_error ivas_spar_enc_process( ivas_dirac_enc( st_ivas->hDirAC, hQMetaData, hMetaData, data_f, ppIn_FR_real, ppIn_FR_imag, input_frame, dtx_vad, hEncoderConfig->ivas_format, hodirac_flag ); +#ifdef FIX_527_SBA_MONO_INPUT + /* Set Energy Ratio to 0.0 if the mono flag has been set */ + if ( hQMetaData->dirac_mono_flag ) + { + for ( b = hQMetaData->q_direction->cfg.start_band; b < hQMetaData->q_direction->cfg.nbands; b++ ) + { + for ( i_ts = 0; i_ts < ( ( dtx_vad == 1 ) ? hQMetaData->q_direction[0].cfg.nblocks : 1 ); i_ts++ ) + { + hQMetaData->q_direction[0].band_data[b].energy_ratio[i_ts] = 0.0f; + } + } + } +#endif #ifdef COVARIANCE_MEMORY_OPT /*-----------------------------------------------------------------------------------------* @@ -709,7 +732,12 @@ static ivas_error ivas_spar_enc_process( if ( hSpar->hMdEnc->spar_hoa_md_flag == 0 ) { - ivas_spar_md_enc_process( hSpar->hMdEnc, hEncoderConfig, cov_real, cov_dtx_real, hMetaData, dtx_vad, nchan_inp, sba_order, hSpar->hFbMixer->prior_mixer ); + ivas_spar_md_enc_process( hSpar->hMdEnc, hEncoderConfig, cov_real, cov_dtx_real, hMetaData, dtx_vad, nchan_inp, sba_order, hSpar->hFbMixer->prior_mixer +#ifdef FIX_527_SBA_MONO_INPUT + , + hQMetaData->dirac_mono_flag +#endif + ); } if ( hSpar->hMdEnc->spar_hoa_dirac2spar_md_flag ) @@ -757,7 +785,12 @@ static ivas_error ivas_spar_enc_process( if ( hSpar->hMdEnc->spar_hoa_md_flag ) { - ivas_spar_md_enc_process( hSpar->hMdEnc, hEncoderConfig, cov_real, cov_dtx_real, hMetaData, dtx_vad, nchan_inp, sba_order, hSpar->hFbMixer->prior_mixer ); + ivas_spar_md_enc_process( hSpar->hMdEnc, hEncoderConfig, cov_real, cov_dtx_real, hMetaData, dtx_vad, nchan_inp, sba_order, hSpar->hFbMixer->prior_mixer +#ifdef FIX_527_SBA_MONO_INPUT + , + hQMetaData->dirac_mono_flag +#endif + ); } #endif diff --git a/lib_enc/ivas_spar_md_enc.c b/lib_enc/ivas_spar_md_enc.c index 757f772b13..6d6d5abd9a 100644 --- a/lib_enc/ivas_spar_md_enc.c +++ b/lib_enc/ivas_spar_md_enc.c @@ -567,6 +567,10 @@ ivas_error ivas_spar_md_enc_process( const int16_t nchan_inp, const int16_t sba_order, /* i : Ambisonic (SBA) order */ float *prior_mixer[IVAS_MAX_FB_MIXER_OUT_CH][IVAS_MAX_SPAR_FB_MIXER_IN_CH] /* i : prior mixer_matrix */ +#ifdef FIX_527_SBA_MONO_INPUT + , + const int16_t dirac_mono_flag +#endif ) { float pred_coeffs_re[IVAS_SPAR_MAX_CH - 1][IVAS_MAX_NUM_BANDS]; @@ -690,6 +694,38 @@ ivas_error ivas_spar_md_enc_process( active_w_vlbr, &hMdEnc->spar_md_cfg, &hMdEnc->spar_md, Wscale, 0 ); +#ifdef FIX_527_SBA_MONO_INPUT + if ( dirac_mono_flag ) + { + int16_t i_ts, k; + int16_t num_md_sub_frames = 1; + int16_t ndec = hMdEnc->spar_md_cfg.num_decorr_per_band[0]; + int16_t ndm = hMdEnc->spar_md_cfg.num_dmx_chans_per_band[0]; + for ( i_ts = 0; i_ts < num_md_sub_frames; i_ts++ ) + { + for ( b = 0; b < IVAS_MAX_NUM_BANDS; b++ ) + { + for ( j = 0; j < ndm + ndec - 1; j++ ) + { + hMdEnc->spar_md.band_coeffs[b + i_ts * IVAS_MAX_NUM_BANDS].pred_re[j] = 0.0f; + } + for ( j = 0; j < ndec; j++ ) + { + for ( k = 0; k < ndm - 1; k++ ) + { + hMdEnc->spar_md.band_coeffs[b + i_ts * IVAS_MAX_NUM_BANDS].C_re[j][k] = 0.0f; + } + } + + for ( j = 0; j < ndec; j++ ) + { + hMdEnc->spar_md.band_coeffs[b + i_ts * IVAS_MAX_NUM_BANDS].P_re[j] = 0.0f; + } + } + } + } +#endif + for ( i = 0; i < num_ch; i++ ) { for ( j = 0; j < num_ch; j++ ) @@ -891,6 +927,21 @@ ivas_error ivas_spar_md_enc_process( planarCP #endif ); +#ifdef FIX_527_SBA_MONO_INPUT + if ( dirac_mono_flag ) + { + int16_t k; + int16_t ndec = hMdEnc->spar_md_cfg.num_decorr_per_band[0]; + int16_t ndm = hMdEnc->spar_md_cfg.num_dmx_chans_per_band[0]; + for ( j = 0; j < ndec; j++ ) + { + for ( k = 0; k < ndm - 1; k++ ) + { + hMdEnc->spar_md.band_coeffs[b].C_re[j][k] = 0.0f; + } + } + } +#endif #ifdef SPAR_HOA_DBG /*fprintf(stderr, "\n\n C coefficients: band %d\n", b); diff --git a/lib_enc/ivas_stat_enc.h b/lib_enc/ivas_stat_enc.h index 59abb33a0a..9eb6dd75c7 100644 --- a/lib_enc/ivas_stat_enc.h +++ b/lib_enc/ivas_stat_enc.h @@ -612,6 +612,8 @@ typedef struct ivas_dirac_enc_data_structure float **buffer_intensity_real[DIRAC_NUM_DIMS]; float *buffer_energy; + /* Frame count for detecting mono */ + int16_t mono_frame_count; } DIRAC_ENC_DATA, *DIRAC_ENC_HANDLE; -- GitLab From ad902c1f9e2c91c73c75e4f3095d730df65ec5a3 Mon Sep 17 00:00:00 2001 From: azmill Date: Thu, 27 Jul 2023 19:34:03 +1000 Subject: [PATCH 2/7] Removing a stray printf --- lib_dec/ivas_spar_md_dec.c | 1 - 1 file changed, 1 deletion(-) diff --git a/lib_dec/ivas_spar_md_dec.c b/lib_dec/ivas_spar_md_dec.c index 9e71c35314..61844a8f01 100644 --- a/lib_dec/ivas_spar_md_dec.c +++ b/lib_dec/ivas_spar_md_dec.c @@ -1184,7 +1184,6 @@ int16_t ivas_spar_chk_zero_coefs( { for ( j = 0; j < ndm + ndec - 1; j++ ) { - // printf("%e, ", hMdDec->spar_md.band_coeffs[b + i_ts * IVAS_MAX_NUM_BANDS].pred_re[j]); if ( hMdDec->spar_md.band_coeffs[b + i_ts * IVAS_MAX_NUM_BANDS].pred_re[j] != 0.0f ) { mono = 0; -- GitLab From fe2a5c817b3e4689d93a27ec55b731c5bd45941f Mon Sep 17 00:00:00 2001 From: azmill Date: Fri, 28 Jul 2023 09:59:17 +1000 Subject: [PATCH 3/7] Fixing variable declaration --- lib_dec/ivas_spar_md_dec.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib_dec/ivas_spar_md_dec.c b/lib_dec/ivas_spar_md_dec.c index 61844a8f01..5a4960cd11 100644 --- a/lib_dec/ivas_spar_md_dec.c +++ b/lib_dec/ivas_spar_md_dec.c @@ -1165,6 +1165,7 @@ int16_t ivas_spar_chk_zero_coefs( ivas_spar_md_dec_state_t *hMdDec; int16_t num_md_sub_frames; int16_t mono = 1; + int16_t ndec, ndm; hMdDec = st_ivas->hSpar->hMdDec; @@ -1175,8 +1176,8 @@ int16_t ivas_spar_chk_zero_coefs( #endif ); - int16_t ndec = hMdDec->spar_md_cfg.num_decorr_per_band[0]; - int16_t ndm = hMdDec->spar_md_cfg.num_dmx_chans_per_band[0]; + ndec = hMdDec->spar_md_cfg.num_decorr_per_band[0]; + ndm = hMdDec->spar_md_cfg.num_dmx_chans_per_band[0]; for ( i_ts = 0; i_ts < num_md_sub_frames; i_ts++ ) { -- GitLab From 14ef25b0d14731d62eccae7c61540086ceabedfa Mon Sep 17 00:00:00 2001 From: azmill Date: Mon, 31 Jul 2023 11:00:20 +1000 Subject: [PATCH 4/7] Fixing failing msan tests --- lib_com/ivas_qmetadata_com.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib_com/ivas_qmetadata_com.c b/lib_com/ivas_qmetadata_com.c index 32b4653d2f..022f4efdf4 100644 --- a/lib_com/ivas_qmetadata_com.c +++ b/lib_com/ivas_qmetadata_com.c @@ -149,6 +149,9 @@ ivas_error ivas_qmetadata_allocate_memory( { set_zero( hQMetaData->q_direction[dir].band_data[j].elevation, MAX_PARAM_SPATIAL_SUBFRAMES ); set_zero( hQMetaData->q_direction[dir].band_data[j].azimuth, MAX_PARAM_SPATIAL_SUBFRAMES ); +#ifdef FIX_527_SBA_MONO_INPUT + set_zero( hQMetaData->q_direction[dir].band_data[j].energy_ratio, MAX_PARAM_SPATIAL_SUBFRAMES ); +#endif } } -- GitLab From cace261c2d4f8d4955398a2041a4c2d1a7d75d17 Mon Sep 17 00:00:00 2001 From: Dominik Weckbecker Date: Mon, 7 Aug 2023 13:32:51 +0200 Subject: [PATCH 5/7] move constants for mono detection to ivas_cnst.h --- lib_com/ivas_cnst.h | 7 +++++++ lib_enc/ivas_dirac_enc.c | 23 ++++++++--------------- lib_rend/ivas_splitRendererPre.c | 2 +- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/lib_com/ivas_cnst.h b/lib_com/ivas_cnst.h index 6d89e222ec..42c563b9a9 100644 --- a/lib_com/ivas_cnst.h +++ b/lib_com/ivas_cnst.h @@ -1018,6 +1018,13 @@ typedef enum #define DIRAC_SLOT_NS 1250000L /* time duration of a time slot, 1.25ms (==DELAY_RENERER_NS/MAX_PARAM_SPATIAL_SUBFRAMES) */ #define DIRAC_SLOT_ENC_NS 5000000L +#define DIRAC_MONO_THRESH_SILENCE 3e4f +#define DIRAC_MONO_NORM_FACTOR 1e13f +#define DIRAC_MONO_ONE_ON_NORM_FACTOR (1.f / 1e13f) +#define DIRAC_MONO_MAX_THRESH 1e6f +#define DIRAC_MONO_MIN_THRESH 1e2f +#define DIRAC_MONO_FRAME_THRESH 15 /* 30ms */ + typedef enum { DIRAC_OPEN, /* initialize to default value */ diff --git a/lib_enc/ivas_dirac_enc.c b/lib_enc/ivas_dirac_enc.c index 29c6642db7..9f4b3b7694 100644 --- a/lib_enc/ivas_dirac_enc.c +++ b/lib_enc/ivas_dirac_enc.c @@ -434,13 +434,6 @@ static int16_t ivas_dirac_get_mono_flag( float W_band_power_norm; float threshold = 0; - static const float THRESH_SILENCE = 3e4f; - static const float NORM_FACTOR = 1e13f; - static const float ONE_ON_NORM_FACTOR = 1 / 1e13f; - static const float MAX_THRESH = 1e6f; - static const float MIN_THRESH = 1e2f; - static const float MONO_FRAME_THRESH = ( 15 ); /* 30ms */ - /* Banded Power Calculations */ for ( i = 0; i < DIRAC_MAX_NBANDS; i++ ) { @@ -467,17 +460,17 @@ static int16_t ivas_dirac_get_mono_flag( } if ( other_ch_band_power < EPSILON ) { - if ( W_band_power > THRESH_SILENCE ) + if ( W_band_power > DIRAC_MONO_THRESH_SILENCE ) { any_mono_band = 1; } } else { - if ( ( W_band_power > THRESH_SILENCE ) || ( other_ch_band_power > THRESH_SILENCE ) ) + if ( ( W_band_power > DIRAC_MONO_THRESH_SILENCE ) || ( other_ch_band_power > DIRAC_MONO_THRESH_SILENCE ) ) { - W_band_power_norm = min( W_band_power, NORM_FACTOR ) * ONE_ON_NORM_FACTOR; - threshold = max( W_band_power_norm * MAX_THRESH, MIN_THRESH ); + W_band_power_norm = min( W_band_power, DIRAC_MONO_NORM_FACTOR ) * DIRAC_MONO_ONE_ON_NORM_FACTOR; + threshold = max( W_band_power_norm * DIRAC_MONO_MAX_THRESH, DIRAC_MONO_MIN_THRESH ); if ( W_band_power / other_ch_band_power > threshold ) { any_mono_band = 1; @@ -502,10 +495,10 @@ static int16_t ivas_dirac_get_mono_flag( local_mono_flag = 1; } } - /* Hysteresis - only after MONO_FRAME_THRESH frames on mono will the actual mono flag be set */ + /* Hysteresis - only after DIRAC_MONO_FRAME_THRESH frames on mono will the actual mono flag be set */ if ( local_mono_flag ) { - if ( *mono_frame_count < MONO_FRAME_THRESH ) + if ( *mono_frame_count < DIRAC_MONO_FRAME_THRESH ) { ( *mono_frame_count )++; } @@ -519,8 +512,8 @@ static int16_t ivas_dirac_get_mono_flag( } } - /* Final check if there has been mono for MONO_FRAME_THRESH number of frames than the content is declared mono */ - if ( *mono_frame_count == MONO_FRAME_THRESH ) + /* Final check if there has been mono for DIRAC_MONO_FRAME_THRESH number of frames than the content is declared mono */ + if ( *mono_frame_count == DIRAC_MONO_FRAME_THRESH ) { return 1; } diff --git a/lib_rend/ivas_splitRendererPre.c b/lib_rend/ivas_splitRendererPre.c index 0d7e1ed19e..6e3c05324f 100644 --- a/lib_rend/ivas_splitRendererPre.c +++ b/lib_rend/ivas_splitRendererPre.c @@ -60,7 +60,7 @@ #define MAX_BAND_SMOOTH ( 1 ) -#define SMOOTH_NORM_FACTOR ( 5.0f ) +#define SMOOTH_DIRAC_MONO_NORM_FACTOR ( 5.0f ) static void ivas_calc_mat_det_2by2_complex( float in_re[BINAURAL_CHANNELS][BINAURAL_CHANNELS], float in_im[BINAURAL_CHANNELS][BINAURAL_CHANNELS], -- GitLab From e99e6ab0ce30c29e4deae6c1afd75df64d9da86b Mon Sep 17 00:00:00 2001 From: Dominik Weckbecker Date: Mon, 7 Aug 2023 13:42:46 +0200 Subject: [PATCH 6/7] fix compiler warnings --- lib_enc/ivas_spar_md_enc.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/lib_enc/ivas_spar_md_enc.c b/lib_enc/ivas_spar_md_enc.c index 6dca3a5a36..9e6fb8cfe8 100644 --- a/lib_enc/ivas_spar_md_enc.c +++ b/lib_enc/ivas_spar_md_enc.c @@ -697,11 +697,13 @@ ivas_error ivas_spar_md_enc_process( #ifdef FIX_527_SBA_MONO_INPUT if ( dirac_mono_flag ) { - int16_t i_ts, k; + int16_t i_ts; int16_t num_md_sub_frames = 1; - int16_t ndec = hMdEnc->spar_md_cfg.num_decorr_per_band[0]; - int16_t ndm = hMdEnc->spar_md_cfg.num_dmx_chans_per_band[0]; - for ( i_ts = 0; i_ts < num_md_sub_frames; i_ts++ ) + + ndec = hMdEnc->spar_md_cfg.num_decorr_per_band[0]; + ndm = hMdEnc->spar_md_cfg.num_dmx_chans_per_band[0]; + + for ( i_ts = 0; i_ts < num_md_sub_frames; i_ts++ ) { for ( b = 0; b < IVAS_MAX_NUM_BANDS; b++ ) { @@ -930,9 +932,8 @@ ivas_error ivas_spar_md_enc_process( #ifdef FIX_527_SBA_MONO_INPUT if ( dirac_mono_flag ) { - int16_t k; - int16_t ndec = hMdEnc->spar_md_cfg.num_decorr_per_band[0]; - int16_t ndm = hMdEnc->spar_md_cfg.num_dmx_chans_per_band[0]; + ndec = hMdEnc->spar_md_cfg.num_decorr_per_band[0]; + ndm = hMdEnc->spar_md_cfg.num_dmx_chans_per_band[0]; for ( j = 0; j < ndec; j++ ) { for ( k = 0; k < ndm - 1; k++ ) -- GitLab From 676813e2eda27c4731cfdd1b1c9eb33b2c566130 Mon Sep 17 00:00:00 2001 From: Dominik Weckbecker Date: Mon, 7 Aug 2023 13:53:01 +0200 Subject: [PATCH 7/7] fix formatting --- lib_enc/ivas_spar_md_enc.c | 8 ++++---- lib_rend/ivas_splitRendererPre.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib_enc/ivas_spar_md_enc.c b/lib_enc/ivas_spar_md_enc.c index 9e6fb8cfe8..4e8556cd37 100644 --- a/lib_enc/ivas_spar_md_enc.c +++ b/lib_enc/ivas_spar_md_enc.c @@ -699,11 +699,11 @@ ivas_error ivas_spar_md_enc_process( { int16_t i_ts; int16_t num_md_sub_frames = 1; - - ndec = hMdEnc->spar_md_cfg.num_decorr_per_band[0]; + + ndec = hMdEnc->spar_md_cfg.num_decorr_per_band[0]; ndm = hMdEnc->spar_md_cfg.num_dmx_chans_per_band[0]; - - for ( i_ts = 0; i_ts < num_md_sub_frames; i_ts++ ) + + for ( i_ts = 0; i_ts < num_md_sub_frames; i_ts++ ) { for ( b = 0; b < IVAS_MAX_NUM_BANDS; b++ ) { diff --git a/lib_rend/ivas_splitRendererPre.c b/lib_rend/ivas_splitRendererPre.c index 6e3c05324f..efbb188f8b 100644 --- a/lib_rend/ivas_splitRendererPre.c +++ b/lib_rend/ivas_splitRendererPre.c @@ -59,7 +59,7 @@ #endif -#define MAX_BAND_SMOOTH ( 1 ) +#define MAX_BAND_SMOOTH ( 1 ) #define SMOOTH_DIRAC_MONO_NORM_FACTOR ( 5.0f ) static void ivas_calc_mat_det_2by2_complex( float in_re[BINAURAL_CHANNELS][BINAURAL_CHANNELS], -- GitLab