From f2d5e62ed9989c2119241bec4cc1113211fb3cdf Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Wed, 23 Apr 2025 17:21:44 +0530 Subject: [PATCH] Fix for 3GPP issue 1499: Spike in BASOP encoded signal of MASA selection material Link #1499 --- lib_enc/analy_sp_fx.c | 10 ++--- lib_enc/find_uv_fx.c | 2 +- lib_enc/ivas_core_pre_proc_front_fx.c | 32 ++++++++-------- lib_enc/ivas_cpe_enc_fx.c | 4 +- lib_enc/ivas_front_vad_fx.c | 12 +++--- lib_enc/ivas_stereo_td_analysis_fx.c | 8 ++-- lib_enc/long_enr_fx.c | 4 +- lib_enc/nois_est_fx.c | 55 ++++++++++++++------------- lib_enc/prot_fx_enc.h | 6 +-- lib_enc/stat_enc.h | 6 ++- lib_enc/updt_enc_fx.c | 9 ++++- 11 files changed, 80 insertions(+), 68 deletions(-) diff --git a/lib_enc/analy_sp_fx.c b/lib_enc/analy_sp_fx.c index 57dbb369c..49bb03f57 100644 --- a/lib_enc/analy_sp_fx.c +++ b/lib_enc/analy_sp_fx.c @@ -438,7 +438,7 @@ void ivas_analy_sp_fx( Word16 *q_fr_bands, /* o : energy in critical frequency bands Q0 */ Word32 *lf_E, /* o : per bin E for first... q_lf_E */ Word16 *q_lf_E, /* o : per bin E for first... Q0 */ - Word16 *Etot, /* o : total input energy Q8 */ + Word32 *Etot, /* o : total input energy Q24 */ const Word16 min_band, /* i : minimum critical band Q0 */ const Word16 max_band, /* i : maximum critical band Q0 */ Word32 *Bin_E, /* o : per-bin energy spectrum q_Bin_E */ @@ -557,7 +557,7 @@ void ivas_analy_sp_fx( /* Average total log energy over both half-frames */ /* *Etot = 10.0f * (float)log10(0.5f * *Etot); */ - *Etot = -12800 /* 10.f * logf(0.00001f) in Q8 */; + *Etot = -838860800 /* 10.f * log10f(0.00001f) in Q24 : This is when LEtot is 0*/; move16(); IF( LEtot != 0 ) { @@ -566,7 +566,7 @@ void ivas_analy_sp_fx( LEtot = W_shl( LEtot, exp ); // q_fr_bands+2+exp Ltmp = BASOP_Util_Log10( W_extract_h( LEtot ), sub( 61, add( *q_fr_bands, exp ) ) /* 31-(q_fr_bands+2+exp-32) */ ); // Q25 Ltmp = Mpy_32_32( Ltmp, 1342177280 /* 10.f in Q27 */ ); // (Q25, Q27) -> Q21 - *Etot = extract_h( L_shl( Ltmp, Q24 - Q21 ) ); // Q8 + *Etot = L_shl( Ltmp, Q24 - Q21 ); // Q24 move16(); } } @@ -599,7 +599,7 @@ void ivas_analy_sp_fx( } /* Average total log energy over both half-frames */ - *Etot = -12800 /* 10.f * logf(0.00001f) in Q8 */; + *Etot = -838860800 /* 10.f * log10f(0.00001f) in Q24 : This is when LEtot is 0*/; move16(); IF( LEtot != 0 ) { @@ -607,7 +607,7 @@ void ivas_analy_sp_fx( LEtot = W_shl( LEtot, exp ); // q_fr_bands+exp Ltmp = BASOP_Util_Log10( W_extract_h( LEtot ), sub( 62, add( *q_fr_bands, exp ) ) /* 31-(q_fr_bands+1+exp-32) */ ); // Q25 Ltmp = Mpy_32_32( Ltmp, 1342177280 /* 10.f in Q27 */ ); // (Q25, Q27) -> Q21 - *Etot = extract_h( L_shl( Ltmp, Q24 - Q21 ) ); // Q8 + *Etot = L_shl( Ltmp, Q24 - Q21 ); // Q24 move16(); } } diff --git a/lib_enc/find_uv_fx.c b/lib_enc/find_uv_fx.c index 79dc78c6d..c4fe2e895 100644 --- a/lib_enc/find_uv_fx.c +++ b/lib_enc/find_uv_fx.c @@ -729,7 +729,7 @@ Word16 find_uv_ivas_fx( /* o : coding typ * Total frame energy difference (dE3) *-----------------------------------------------------------------*/ - dE3 = sub( Etot, hNoiseEst->Etot_last_fx ); /*Q8*/ + dE3 = sub( Etot, extract_h( hNoiseEst->Etot_last_32fx ) ); /*Q8*/ /*-----------------------------------------------------------------* * Energy decrease after spike (dE2) diff --git a/lib_enc/ivas_core_pre_proc_front_fx.c b/lib_enc/ivas_core_pre_proc_front_fx.c index 67c4f256f..b9a15cd35 100644 --- a/lib_enc/ivas_core_pre_proc_front_fx.c +++ b/lib_enc/ivas_core_pre_proc_front_fx.c @@ -205,7 +205,7 @@ ivas_error pre_proc_front_ivas_fx( Word16 S_map_fx[L_FFT / 2]; Word16 cor_map_sum_LR_fx[CPE_CHANNELS]; /* speech/music clasif. parameter */ Word16 S_map_LR_fx[L_FFT / 2]; /* short-term correlation map */ - Word16 Etot_fx; /* total energy Q8 */ + Word32 Etot_fx; /* total energy Q8 */ Word32 tmpN_fx[NB_BANDS]; /* Temporary noise update */ Word32 tmpE_fx[NB_BANDS]; /* Temporary averaged energy of 2 sf. */ Word32 tmpN_LR_fx[CPE_CHANNELS][NB_BANDS]; /* Temporary noise update */ @@ -739,7 +739,7 @@ ivas_error pre_proc_front_ivas_fx( IF( hStereoClassif != NULL ) { - IF( GT_32( sub( st->lp_speech_fx, Etot_fx ), 25 << Q8 ) ) /*Q8*/ + IF( GT_32( sub( st->lp_speech_fx, extract_h( Etot_fx ) ), 25 << Q8 ) ) /*Q8*/ { hStereoClassif->silence_flag = 2; move16(); @@ -759,11 +759,11 @@ ivas_error pre_proc_front_ivas_fx( IF( hCPE != NULL ) { - noise_est_pre_32fx( L_deposit_h( Etot_fx ), st->ini_frame, st->hNoiseEst, st->idchan, element_mode, hCPE->last_element_mode ); + noise_est_pre_32fx( ( Etot_fx ), st->ini_frame, st->hNoiseEst, st->idchan, element_mode, hCPE->last_element_mode ); } ELSE { - noise_est_pre_32fx( L_deposit_h( Etot_fx ), st->ini_frame, st->hNoiseEst, st->idchan, element_mode, element_mode ); + noise_est_pre_32fx( ( Etot_fx ), st->ini_frame, st->hNoiseEst, st->idchan, element_mode, element_mode ); } test(); @@ -913,18 +913,18 @@ ivas_error pre_proc_front_ivas_fx( *----------------------------------------------------------------*/ noise_est_down_ivas_fx( fr_bands_fx, fr_bands_fx_q, st->hNoiseEst->bckr_fx, &st->hNoiseEst->q_bckr, tmpN_fx, &q_tmpN, tmpE_fx, &q_tmpE, st->min_band, st->max_band, - &st->hNoiseEst->totalNoise_fx, Etot_fx, &st->hNoiseEst->Etot_last_fx, &st->hNoiseEst->Etot_v_h2_fx ); + &st->hNoiseEst->totalNoise_fx, Etot_fx, &st->hNoiseEst->Etot_last_32fx, &st->hNoiseEst->Etot_v_h2_fx ); test(); IF( lr_vad_enabled && st->idchan == 0 ) { - noise_est_down_ivas_fx( fr_bands_LR_fx[0], fr_bands_LR_fx_q[0], hCPE->hFrontVad[0]->hNoiseEst->bckr_fx, &hCPE->hFrontVad[0]->hNoiseEst->q_bckr, tmpN_LR_fx[0], &q_tmpN_LR[0], tmpE_LR_fx[0], &q_tmpE_LR[0], st->min_band, st->max_band, &hCPE->hFrontVad[0]->hNoiseEst->totalNoise_fx, Etot_LR_fx[0], &hCPE->hFrontVad[0]->hNoiseEst->Etot_last_fx, &hCPE->hFrontVad[0]->hNoiseEst->Etot_v_h2_fx ); - noise_est_down_ivas_fx( fr_bands_LR_fx[1], fr_bands_LR_fx_q[1], hCPE->hFrontVad[1]->hNoiseEst->bckr_fx, &hCPE->hFrontVad[1]->hNoiseEst->q_bckr, tmpN_LR_fx[1], &q_tmpN_LR[1], tmpE_LR_fx[1], &q_tmpE_LR[1], st->min_band, st->max_band, &hCPE->hFrontVad[1]->hNoiseEst->totalNoise_fx, Etot_LR_fx[1], &hCPE->hFrontVad[1]->hNoiseEst->Etot_last_fx, &hCPE->hFrontVad[1]->hNoiseEst->Etot_v_h2_fx ); + noise_est_down_ivas_fx( fr_bands_LR_fx[0], fr_bands_LR_fx_q[0], hCPE->hFrontVad[0]->hNoiseEst->bckr_fx, &hCPE->hFrontVad[0]->hNoiseEst->q_bckr, tmpN_LR_fx[0], &q_tmpN_LR[0], tmpE_LR_fx[0], &q_tmpE_LR[0], st->min_band, st->max_band, &hCPE->hFrontVad[0]->hNoiseEst->totalNoise_fx, L_deposit_h( Etot_LR_fx[0] ), &hCPE->hFrontVad[0]->hNoiseEst->Etot_last_32fx, &hCPE->hFrontVad[0]->hNoiseEst->Etot_v_h2_fx ); + noise_est_down_ivas_fx( fr_bands_LR_fx[1], fr_bands_LR_fx_q[1], hCPE->hFrontVad[1]->hNoiseEst->bckr_fx, &hCPE->hFrontVad[1]->hNoiseEst->q_bckr, tmpN_LR_fx[1], &q_tmpN_LR[1], tmpE_LR_fx[1], &q_tmpE_LR[1], st->min_band, st->max_band, &hCPE->hFrontVad[1]->hNoiseEst->totalNoise_fx, L_deposit_h( Etot_LR_fx[1] ), &hCPE->hFrontVad[1]->hNoiseEst->Etot_last_32fx, &hCPE->hFrontVad[1]->hNoiseEst->Etot_v_h2_fx ); corr_shiftL_fx = correlation_shift_fx( hCPE->hFrontVad[0]->hNoiseEst->totalNoise_fx ); // Q15 corr_shiftR_fx = correlation_shift_fx( hCPE->hFrontVad[1]->hNoiseEst->totalNoise_fx ); // Q15 } - *relE_fx = sub( Etot_fx, st->lp_speech_fx ); // Q8 + *relE_fx = sub( extract_h( Etot_fx ), st->lp_speech_fx ); // Q8 move16(); corr_shift_fx = correlation_shift_fx( st->hNoiseEst->totalNoise_fx ); /* Q15 */ @@ -1307,8 +1307,8 @@ ivas_error pre_proc_front_ivas_fx( move16(); move16(); - noise_est_ivas_fx( st, old_pitch1, tmpN_fx, epsP_fx, Etot_fx, *relE_fx, corr_shift_fx, tmpE_fx, q_tmpE, fr_bands_fx, fr_bands_fx_q, cor_map_sum_fx, - &ncharX_fx, &sp_div_fx, &q_sp_div, &non_staX_fx, loc_harm, lf_E_fx, q_lf_E_fx, &st->hNoiseEst->harm_cor_cnt, st->hNoiseEst->Etot_l_lp_fx, + noise_est_ivas_fx( st, old_pitch1, tmpN_fx, epsP_fx, extract_h( Etot_fx ), *relE_fx, corr_shift_fx, tmpE_fx, q_tmpE, fr_bands_fx, fr_bands_fx_q, cor_map_sum_fx, + &ncharX_fx, &sp_div_fx, &q_sp_div, &non_staX_fx, loc_harm, lf_E_fx, q_lf_E_fx, &st->hNoiseEst->harm_cor_cnt, extract_h( st->hNoiseEst->Etot_l_lp_32fx ), st->hNoiseEst->Etot_v_h2_fx, &st->hNoiseEst->bg_cnt, st->lgBin_E_fx, &dummy_fx, S_map_fx, hStereoClassif, NULL, st->ini_frame ); @@ -1352,14 +1352,14 @@ ivas_error pre_proc_front_ivas_fx( noise_est_ivas_fx( st, old_pitch1, tmpN_LR_fx[0], epsP_fx, Etot_LR_fx[0], sub( Etot_LR_fx[0], hCPE->hFrontVad[0]->lp_speech_fx ), corr_shiftL_fx, tmpE_LR_fx[0], q_tmpE_LR[0], fr_bands_LR_fx[0], fr_bands_LR_fx_q[0], &cor_map_sum_LR_fx[0], &ncharX_LR_fx, &sp_div_LR_fx, &q_sp_div_LR, &non_staX_LR_fx, loc_harmLR_fx, lf_E_LR_fx[0], lf_E_LR_fx_q, &hCPE->hFrontVad[0]->hNoiseEst->harm_cor_cnt, - hCPE->hFrontVad[0]->hNoiseEst->Etot_l_lp_fx, hCPE->hFrontVad[0]->hNoiseEst->Etot_v_h2_fx, &hCPE->hFrontVad[0]->hNoiseEst->bg_cnt, + extract_h( hCPE->hFrontVad[0]->hNoiseEst->Etot_l_lp_32fx ), hCPE->hFrontVad[0]->hNoiseEst->Etot_v_h2_fx, &hCPE->hFrontVad[0]->hNoiseEst->bg_cnt, st->lgBin_E_fx, &dummy_fx, S_map_LR_fx, NULL, hCPE->hFrontVad[0], hCPE->hFrontVad[0]->ini_frame ); /* Note: the index [0] in the last argument is intended, the ini_frame counter is only maintained in the zero-th channel's VAD handle */ noise_est_ivas_fx( st, old_pitch1, tmpN_LR_fx[1], epsP_fx, Etot_LR_fx[1], sub( Etot_LR_fx[1], hCPE->hFrontVad[1]->lp_speech_fx ), corr_shiftR_fx, tmpE_LR_fx[1], q_tmpE_LR[1], fr_bands_LR_fx[1], fr_bands_LR_fx_q[1], &cor_map_sum_LR_fx[1], &ncharX_LR_fx, &sp_div_LR_fx, &q_sp_div_LR, &non_staX_LR_fx, loc_harmLR_fx, lf_E_LR_fx[1], lf_E_LR_fx_q, &hCPE->hFrontVad[1]->hNoiseEst->harm_cor_cnt, - hCPE->hFrontVad[1]->hNoiseEst->Etot_l_lp_fx, hCPE->hFrontVad[1]->hNoiseEst->Etot_v_h2_fx, &hCPE->hFrontVad[1]->hNoiseEst->bg_cnt, + extract_h( hCPE->hFrontVad[1]->hNoiseEst->Etot_l_lp_32fx ), hCPE->hFrontVad[1]->hNoiseEst->Etot_v_h2_fx, &hCPE->hFrontVad[1]->hNoiseEst->bg_cnt, st->lgBin_E_fx, &dummy_fx, S_map_LR_fx, NULL, hCPE->hFrontVad[1], hCPE->hFrontVad[0]->ini_frame ); } @@ -1387,7 +1387,7 @@ ivas_error pre_proc_front_ivas_fx( find_tilt_ivas_fx( fr_bands_fx, fr_bands_fx_q, st->hNoiseEst->bckr_fx, st->hNoiseEst->q_bckr, ee_fx, st->pitch, st->voicing_fx, lf_E_fx, q_lf_E_fx, corr_shift_fx, st->input_bwidth, st->max_band, hp_E_fx, MODE1, &( st->bckr_tilt_lt ), st->Opt_SC_VBR ); - st->coder_type = find_uv_ivas_fx( st, pitch_fr_fx, voicing_fr_fx, inp_12k8_fx, ee_fx, &dE1X_fx, corr_shift_fx, *relE_fx, Etot_fx, hp_E_fx, + st->coder_type = find_uv_ivas_fx( st, pitch_fr_fx, voicing_fr_fx, inp_12k8_fx, ee_fx, &dE1X_fx, corr_shift_fx, *relE_fx, extract_h( Etot_fx ), hp_E_fx, &flag_spitch, last_core_orig, hStereoClassif, *Q_new /*q_inp_12k8*/, fr_bands_fx_q ); // Q0 Copy_Scale_sig_16_32_no_sat( st->lgBin_E_fx, st->Bin_E_fx, L_FFT / 2, sub( st->q_Bin_E, Q7 ) ); @@ -1461,7 +1461,7 @@ ivas_error pre_proc_front_ivas_fx( scale_sig32( PS_fx, 128, shift ); Qfact_PS = add( Qfact_PS, shift ); - smc_dec = ivas_smc_gmm_fx( st, hStereoClassif, localVAD_HE_SAD, Etot_fx, lsp_new_fx, *cor_map_sum_fx /*Q8*/, epsP_fx, PS_fx, non_staX_fx, *relE_fx, &high_lpn_flag, flag_spitch, Qfact_PS, *epsP_fx_q, hSpMusClas->past_PS_Q ); /* Q0 */ + smc_dec = ivas_smc_gmm_fx( st, hStereoClassif, localVAD_HE_SAD, extract_h( Etot_fx ), lsp_new_fx, *cor_map_sum_fx /*Q8*/, epsP_fx, PS_fx, non_staX_fx, *relE_fx, &high_lpn_flag, flag_spitch, Qfact_PS, *epsP_fx_q, hSpMusClas->past_PS_Q ); /* Q0 */ #ifdef DEBUG_FORCE_DIR if ( st->force_dir[0] != '\0' ) @@ -1495,7 +1495,7 @@ ivas_error pre_proc_front_ivas_fx( * Update of old per-band energy spectrum *----------------------------------------------------------------*/ - ivas_long_enr_fx( st, Etot_fx, localVAD_HE_SAD, high_lpn_flag, NULL, 1, NULL, NULL ); + ivas_long_enr_fx( st, extract_h( Etot_fx ), localVAD_HE_SAD, high_lpn_flag, NULL, 1, NULL, NULL ); Copy32( fr_bands_fx + NB_BANDS, st->hNoiseEst->enrO_fx, NB_BANDS ); /* fr_bands_fx_q */ st->hNoiseEst->q_enrO = fr_bands_fx_q; @@ -1676,7 +1676,7 @@ ivas_error pre_proc_front_ivas_fx( st->q_Bin_E = add( *Q_new, Q_SCALE - 2 ); move16(); /* 2nd stage speech/music classification (ACELP/GSC/TCX core selection) */ - ivas_smc_mode_selection_fx( st, element_brate, smc_dec, *relE_fx, Etot_fx, attack_flag, inp_12k8_fx, *Q_new, S_map_fx, flag_spitch ); + ivas_smc_mode_selection_fx( st, element_brate, smc_dec, *relE_fx, extract_h( Etot_fx ), attack_flag, inp_12k8_fx, *Q_new, S_map_fx, flag_spitch ); } #ifdef DEBUG_FORCE_DIR diff --git a/lib_enc/ivas_cpe_enc_fx.c b/lib_enc/ivas_cpe_enc_fx.c index b5ed69560..a05493aa4 100644 --- a/lib_enc/ivas_cpe_enc_fx.c +++ b/lib_enc/ivas_cpe_enc_fx.c @@ -735,8 +735,8 @@ ivas_error ivas_cpe_enc_fx( move16(); } - Etot_last_fx[0] = sts[0]->hNoiseEst->Etot_last_fx; /* Q8 */ - Etot_last_fx[1] = sts[1]->hNoiseEst->Etot_last_fx; /* Q8 */ + Etot_last_fx[0] = extract_h( sts[0]->hNoiseEst->Etot_last_32fx ); /* Q8 */ + Etot_last_fx[1] = extract_h( sts[1]->hNoiseEst->Etot_last_32fx ); /* Q8 */ move16(); move16(); } diff --git a/lib_enc/ivas_front_vad_fx.c b/lib_enc/ivas_front_vad_fx.c index f3cf05e63..124ab2996 100644 --- a/lib_enc/ivas_front_vad_fx.c +++ b/lib_enc/ivas_front_vad_fx.c @@ -90,6 +90,7 @@ ivas_error front_vad_fx( ivas_error error; Word16 Q_new; Word16 Qband, mem_decim_size; + Word32 Etot_LR_32fx; error = IVAS_ERR_OK; push_wmops( "front_vad" ); move16(); @@ -248,7 +249,7 @@ ivas_error front_vad_fx( move16(); ivas_analy_sp_fx( IVAS_CPE_TD, hCPE, sts[0]->input_Fs, hFrontVad->buffer_12k8_fx + L_FFT / 2 - 3 * ( L_SUBFR / 2 ), Q_new, fr_bands_fx[n], - &q_fr_bands[n], lf_E_fx[n], &q_lf_E[n], &Etot_LR_fx[n], sts[0]->min_band, sts[0]->max_band, Bin_E_fx, &q_Bin_E, Bin_E_old_fx, + &q_fr_bands[n], lf_E_fx[n], &q_lf_E[n], &Etot_LR_32fx, sts[0]->min_band, sts[0]->max_band, Bin_E_fx, &q_Bin_E, Bin_E_old_fx, &q_Bin_E_old, PS_fx, q_PS_out, lgBin_E_fx, band_energies_fx, &q_band_energies, fft_buffLR_fx, &q_fft_buffLR ); if ( n == 0 ) { @@ -275,9 +276,10 @@ ivas_error front_vad_fx( /* add up energies for later calculating average of channel energies */ - Word32 Etot_fx = L_deposit_h( Etot_LR_fx[n] ); /* Q24 */ + noise_est_pre_32fx( Etot_LR_32fx, hFrontVads[0]->ini_frame, hFrontVad->hNoiseEst, 0, 0, 0 ); - noise_est_pre_32fx( Etot_fx, hFrontVads[0]->ini_frame, hFrontVad->hNoiseEst, 0, 0, 0 ); + Etot_LR_fx[n] = extract_h( Etot_LR_32fx ); + move16(); /* wb_vad */ Word16 scale = s_min( q_fr_bands[n], add( hFrontVads[n]->hNoiseEst->q_enrO, L_norm_arr( hFrontVads[n]->hNoiseEst->enrO_fx, NB_BANDS ) ) ); @@ -611,7 +613,7 @@ ivas_error front_vad_spar_fx( Word16 q_tmpN, q_tmpE; noise_est_down_ivas_fx( fr_bands_fx[0], q_fr_bands[0], hFrontVad->hNoiseEst->bckr_fx, &hFrontVad->hNoiseEst->q_bckr, tmpN_fx, &q_tmpN, tmpE_fx, &q_tmpE, st->min_band, st->max_band, - &hFrontVad->hNoiseEst->totalNoise_fx, Etot_fx[0], &hFrontVad->hNoiseEst->Etot_last_fx, &hFrontVad->hNoiseEst->Etot_v_h2_fx ); + &hFrontVad->hNoiseEst->totalNoise_fx, Etot_fx[0], &hFrontVad->hNoiseEst->Etot_last_32fx, &hFrontVad->hNoiseEst->Etot_v_h2_fx ); corr_shift_fx = correlation_shift_fx( hFrontVad->hNoiseEst->totalNoise_fx ); /* Q15 */ @@ -753,7 +755,7 @@ ivas_error front_vad_spar_fx( noise_est_ivas_fx( st, old_pitch, tmpN_fx, epsP_fx, Etot_fx[0], sub( Etot_fx[0], hFrontVad->lp_speech_fx ), corr_shift_fx, tmpE_fx, hFrontVad->hNoiseEst->ave_enr_q, fr_bands_fx[0], q_fr_bands[0], &cor_map_sum_fx, NULL, &sp_div_fx, &Q_sp_div, &non_staX_fx, &loc_harm, - lf_E_fx[0], q_lf_E[0], &hFrontVad->hNoiseEst->harm_cor_cnt, hFrontVad->hNoiseEst->Etot_l_lp_fx, hFrontVad->hNoiseEst->Etot_v_h2_fx, + lf_E_fx[0], q_lf_E[0], &hFrontVad->hNoiseEst->harm_cor_cnt, extract_h( hFrontVad->hNoiseEst->Etot_l_lp_32fx ), hFrontVad->hNoiseEst->Etot_v_h2_fx, &hFrontVad->hNoiseEst->bg_cnt, st->lgBin_E_fx, &sp_floor, S_map_fx, NULL, hFrontVad, hFrontVad->ini_frame ); MVR2R_WORD16( st->pitch, st->pitch, 3 ); diff --git a/lib_enc/ivas_stereo_td_analysis_fx.c b/lib_enc/ivas_stereo_td_analysis_fx.c index 1856370f6..909d0c206 100644 --- a/lib_enc/ivas_stereo_td_analysis_fx.c +++ b/lib_enc/ivas_stereo_td_analysis_fx.c @@ -340,8 +340,8 @@ Word16 stereo_tdm_ener_analysis_fx( test(); IF( ( LE_32( hStereoTD->tdm_lt_rms_L_fx, rms_thd_fx ) && LE_32( hStereoTD->tdm_lt_rms_R_fx, L_shl( rms_thd_fx, 1 ) ) ) || ( LE_32( hStereoTD->tdm_lt_rms_R_fx, rms_thd_fx ) && LE_32( hStereoTD->tdm_lt_rms_L_fx, L_shl( rms_thd_fx, 1 ) ) ) || - ( sts[0]->hVAD->hangover_cnt != 0 && LT_16( sts[1]->hNoiseEst->Etot_last_fx, 3072 /*12 in Q8*/ ) ) || - ( sts[1]->hVAD->hangover_cnt != 0 && LT_16( sts[0]->hNoiseEst->Etot_last_fx, 3072 /*12 in Q8*/ ) ) || + ( sts[0]->hVAD->hangover_cnt != 0 && LT_32( sts[1]->hNoiseEst->Etot_last_32fx, 201326592 /*12 in Q24*/ ) ) || + ( sts[1]->hVAD->hangover_cnt != 0 && LT_32( sts[0]->hNoiseEst->Etot_last_32fx, 201326592 /*12 in Q24*/ ) ) || ( NE_16( sts[0]->hSpMusClas->past_dec[0], sts[1]->hSpMusClas->past_dec[0] ) ) ) { test(); @@ -873,8 +873,8 @@ static Word16 Get_dt_lt_ener_fx( move32(); *tdm_lt_rms_R = L_shl( Mpy_32_32( 1932735282 /* 0.9f in Q31 */, rms_R ), sub( Q16, q_rms_R ) ); // (Q31, q_rms_R) -> q_rms_R -> Q16 move32(); - L_tmp = Mpy_32_16_1( 1932735282 /* 0.9f in Q31 */, sts[0]->hNoiseEst->Etot_last_fx ); //(Q31, Q8) -> Q24 - sts[1]->hNoiseEst->Etot_last_fx = extract_h( L_tmp ); // (Q24 >> Q16) -> Q8 + L_tmp = Mpy_32_32( 1932735282 /* 0.9f in Q31 */, sts[0]->hNoiseEst->Etot_last_32fx ); //(Q31, Q24) -> Q24 + sts[1]->hNoiseEst->Etot_last_32fx = L_tmp; // (Q24 >> Q16) -> Q8 sts[1]->hVAD->hangover_cnt = 0; move16(); } diff --git a/lib_enc/long_enr_fx.c b/lib_enc/long_enr_fx.c index af57edd3b..60c40b3fe 100644 --- a/lib_enc/long_enr_fx.c +++ b/lib_enc/long_enr_fx.c @@ -94,7 +94,7 @@ void ivas_long_enr_fx( } FOR( n = 0; n < n_chan; n++ ) { - hFrontVad[n]->hNoiseEst->Etot_last_fx = Etot_LR[n]; /* Q8 */ + hFrontVad[n]->hNoiseEst->Etot_last_32fx = L_deposit_h( Etot_LR[n] ); /* Q24 */ move16(); } } @@ -141,7 +141,7 @@ void ivas_long_enr_fx( } } /* Update */ - st_fx->hNoiseEst->Etot_last_fx = Etot; /* Q8 */ + st_fx->hNoiseEst->Etot_last_32fx = L_deposit_h( Etot ); /* Q24 */ move16(); } diff --git a/lib_enc/nois_est_fx.c b/lib_enc/nois_est_fx.c index c8f698da9..d8131f10e 100644 --- a/lib_enc/nois_est_fx.c +++ b/lib_enc/nois_est_fx.c @@ -359,8 +359,8 @@ void noise_est_init_ivas_fx( hNoiseEst->Etot_lp_fx = 0; hNoiseEst->Etot_h_fx = 0; hNoiseEst->Etot_l_fx = 0; - hNoiseEst->Etot_l_lp_fx = 0; - hNoiseEst->Etot_last_fx = 0; + hNoiseEst->Etot_l_lp_32fx = 0; + hNoiseEst->Etot_last_32fx = 0; hNoiseEst->Etot_v_h2_fx = 0; hNoiseEst->sign_dyn_lp_fx = 0; move16(); @@ -368,8 +368,8 @@ void noise_est_init_ivas_fx( move16(); move16(); move16(); - move16(); - move16(); + move32(); + move32(); return; } @@ -510,10 +510,10 @@ void noise_est_pre_32fx( move32(); Etot_l_32fx = Etot; move32(); - hNoiseEst->Etot_l_lp_fx = extract_h( Etot ); // Q8 - move16(); - hNoiseEst->Etot_last_fx = extract_h( Etot ); // Q8 - move16(); + hNoiseEst->Etot_l_lp_32fx = Etot; // Q24 + move32(); + hNoiseEst->Etot_last_32fx = Etot; // Q24 + move32(); hNoiseEst->Etot_v_h2_fx = 0; move16(); Etot_lp_32fx = Etot; @@ -538,20 +538,20 @@ void noise_est_pre_32fx( ) { /* *Etot_l += min(2,(*Etot_last-*Etot_l)*0.1f); */ - tmp = Mpy_32_32( L_sub( L_deposit_h( hNoiseEst->Etot_last_fx ), Etot_l_32fx ), 214748365 ); /* 0.1f factor in Q31 */ - tmp = L_min( 33554432, tmp ); /* 2.0 in Q24 is 33554432 */ - Etot_l_32fx = L_add( Etot_l_32fx, tmp ); /* Q24 */ + tmp = Mpy_32_32( L_sub( ( hNoiseEst->Etot_last_32fx ), Etot_l_32fx ), 214748365 ); /* 0.1f factor in Q31 */ + tmp = L_min( 33554432, tmp ); /* 2.0 in Q24 is 33554432 */ + Etot_l_32fx = L_add( Etot_l_32fx, tmp ); /* Q24 */ } /* Avoids large steps in short active segments */ test(); - IF( ( GT_32( L_sub( L_deposit_h( hNoiseEst->Etot_last_fx ), Etot_l_32fx ), 503316480 ) ) /* 30.0f*Q24 */ + IF( ( GT_32( L_sub( hNoiseEst->Etot_last_32fx, Etot_l_32fx ), 503316480 ) ) /* 30.0f*Q24 */ && ( GT_16( hNoiseEst->harm_cor_cnt, HE_LT_CNT_PRE_SHORT_FX ) ) ) { /* *Etot_l += (*Etot_last-*Etot_l)*0.02f; */ - Etot_l_32fx = L_add( Etot_l_32fx, Mpy_32_32( L_sub( L_deposit_h( hNoiseEst->Etot_last_fx ), Etot_l_32fx ), 42949673 ) ); /* 0.02 in Q24*/ + Etot_l_32fx = L_add( Etot_l_32fx, Mpy_32_32( L_sub( hNoiseEst->Etot_last_32fx, Etot_l_32fx ), 42949673 ) ); /* 0.02 in Q24*/ } - ELSE IF( GT_32( L_sub( L_deposit_h( hNoiseEst->Etot_last_fx ), Etot_l_32fx ), 167772160 ) ) /* 10.0 in Q24*/ + ELSE IF( GT_32( L_sub( hNoiseEst->Etot_last_32fx, Etot_l_32fx ), 167772160 ) ) /* 10.0 in Q24*/ { Etot_l_32fx = L_add( Etot_l_32fx, 1342177 ); /* 0.08 in Q24*/ } @@ -560,10 +560,10 @@ void noise_est_pre_32fx( Etot_l_32fx = L_min( Etot_l_32fx, Etot ); // Q24 test(); - IF( LT_16( ini_frame_fx, 100 ) && LT_32( Etot_l_32fx, L_deposit_h( hNoiseEst->Etot_l_lp_fx ) ) ) + IF( LT_16( ini_frame_fx, 100 ) && LT_32( Etot_l_32fx, ( hNoiseEst->Etot_l_lp_32fx ) ) ) { /**Etot_l_lp = 0.1f * *Etot_l + (1.0f - 0.1) * *Etot_l_lp; */ - hNoiseEst->Etot_l_lp_fx = extract_h( L_add( Mpy_32_32( 214748364 /* 0.1f in Q31*/, Etot_l_32fx ), Mpy_32_32( 1932735283 /* 0.9f in Q31*/, L_deposit_h( hNoiseEst->Etot_l_lp_fx ) ) ) ); // Q8 + hNoiseEst->Etot_l_lp_32fx = W_round64_L( W_add( W_mult_32_32( 214748364 /* 0.1f in Q31*/, Etot_l_32fx ), W_mult_32_32( 1932735283 /* 0.9f in Q31*/, hNoiseEst->Etot_l_lp_32fx ) ) ); // Q8 move16(); } ELSE @@ -572,16 +572,16 @@ void noise_est_pre_32fx( test(); test(); test(); - IF( ( ( GT_16( hNoiseEst->harm_cor_cnt, HE_LT_CNT_FX ) ) && ( GT_32( L_sub_sat( L_deposit_h( hNoiseEst->Etot_last_fx ), Etot_l_32fx ), HE_LT_THR2_Q24 ) ) ) || ( ( sub( hNoiseEst->harm_cor_cnt, HE_LT_CNT_FX ) > 0 ) && ( LT_16( ini_frame_fx, HE_LT_CNT_INIT_FX ) ) ) || ( GT_32( L_sub_sat( L_deposit_h( hNoiseEst->Etot_l_lp_fx ), Etot_l_32fx ), HE_LT_THR2_Q24 ) ) ) + IF( ( ( GT_16( hNoiseEst->harm_cor_cnt, HE_LT_CNT_FX ) ) && ( GT_32( L_sub_sat( hNoiseEst->Etot_last_32fx, Etot_l_32fx ), HE_LT_THR2_Q24 ) ) ) || ( ( sub( hNoiseEst->harm_cor_cnt, HE_LT_CNT_FX ) > 0 ) && ( LT_16( ini_frame_fx, HE_LT_CNT_INIT_FX ) ) ) || ( GT_32( L_sub_sat( hNoiseEst->Etot_l_lp_32fx, Etot_l_32fx ), HE_LT_THR2_Q24 ) ) ) { /**Etot_l_lp = 0.03f * *Etot_l + (1.0f - 0.03f) * *Etot_l_lp; */ - hNoiseEst->Etot_l_lp_fx = extract_h( L_add( Mpy_32_32( 64424509 /* 0.03f in Q31*/, Etot_l_32fx ), Mpy_32_32( 2083059139 /* 0.97f in Q31*/, L_deposit_h( hNoiseEst->Etot_l_lp_fx ) ) ) ); // Q8 + hNoiseEst->Etot_l_lp_32fx = W_extract_h( W_add( W_mult_32_32( 64424509 /* 0.03f in Q31*/, Etot_l_32fx ), W_mult_32_32( 2083059139 /* 0.97f in Q31*/, hNoiseEst->Etot_l_lp_32fx ) ) ); // Q24 move16(); } ELSE { /* *Etot_l_lp = 0.02f * *Etot_l + (1.0f - 0.02f) * *Etot_l_lp; */ - hNoiseEst->Etot_l_lp_fx = extract_h( L_add( Mpy_32_32( 42949673 /* 0.02f in Q31*/, Etot_l_32fx ), Mpy_32_32( 2104533975 /* 0.98f in Q31*/, L_deposit_h( hNoiseEst->Etot_l_lp_fx ) ) ) ); // Q8 + hNoiseEst->Etot_l_lp_32fx = W_extract_h( W_add( W_mult_32_32( 42949673 /* 0.02f in Q31*/, Etot_l_32fx ), W_mult_32_32( 2104533975 /* 0.98f in Q31*/, hNoiseEst->Etot_l_lp_32fx ) ) ); // Q24 move16(); } } @@ -748,8 +748,8 @@ void noise_est_down_ivas_fx( const Word16 min_band, /* i : minimum critical band */ const Word16 max_band, /* i : maximum critical band */ Word16 *totalNoise, /* o : noise estimate over all critical bands */ - Word16 Etot, /* i : Energy of current frame */ - Word16 *Etot_last, /* i/o: Energy of last frame Q8 */ + Word32 Etot, /* i : Energy of current frame */ + Word32 *Etot_last, /* i/o: Energy of last frame Q8 */ Word16 *Etot_v_h2 /* i/o: Energy variations of noise frames Q8 */ ) @@ -770,8 +770,9 @@ void noise_est_down_ivas_fx( Copy32( bckr, bckr32, NB_BANDS ); set16_fx( bckr_q, *q_bckr, NB_BANDS ); - L_Etot = L_shl( Etot, 16 ); /*Q24 for later AR1 computations*/ - L_Etot_last = L_shl( *Etot_last, 16 ); + L_Etot = Etot; /*Q24 for later AR1 computations*/ + move32(); + L_Etot_last = *Etot_last; L_Etot_v_h2 = L_shl( *Etot_v_h2, 16 ); /*-----------------------------------------------------------------* @@ -2989,7 +2990,7 @@ void noise_est_ivas_fx( move16(); /* st->lt_Ellp_dist = 0.03f* (Etot - st->Etot_l_lp) + 0.97f*st->lt_Ellp_dist;*/ - tmp = sub( Etot, hNoiseEst->Etot_l_lp_fx ); /* Q8 */ + tmp = sub( Etot, extract_h( hNoiseEst->Etot_l_lp_32fx ) ); /* Q8 */ hNoiseEst->lt_Ellp_dist_fx = mac_r( L_mult( tmp, 983 /* 0.03 in Q15*/ ), hNoiseEst->lt_Ellp_dist_fx, 31785 /* 0.97 in Q15*/ ); // Q8 move16(); @@ -3180,7 +3181,7 @@ void noise_est_ivas_fx( { tmp = shl( tmp, 1 ); /*1.5 + 1.5 Q13 */ } - Ltmp = L_deposit_h( hNoiseEst->Etot_l_lp_fx ); + Ltmp = hNoiseEst->Etot_l_lp_32fx; Etot_l_lp_thr = round_fx( L_add( Ltmp, L_shl( L_mult( tmp, Etot_v_h2 ), 2 ) ) ); /* Q13+Q8+1 +2 = Q24 -> Q8*/ /* enr_bgd = Etot < Etot_l_lp_thr; */ @@ -3365,8 +3366,8 @@ void noise_est_ivas_fx( move16(); test(); test(); - if ( ( GT_16( hNoiseEst->sign_dyn_lp_fx, 15 * 256 ) ) /* 15 in Q8 */ - && ( LT_16( sub( Etot, hNoiseEst->Etot_l_lp_fx ), shl( Etot_v_h2, 1 ) ) ) /* Q8 , Etot_v_h2 has limited dynmics can be upscaled*/ + if ( ( GT_16( hNoiseEst->sign_dyn_lp_fx, 15 * 256 ) ) /* 15 in Q8 */ + && ( LT_16( sub( Etot, extract_h( hNoiseEst->Etot_l_lp_32fx ) ), shl( Etot_v_h2, 1 ) ) ) /* Q8 , Etot_v_h2 has limited dynmics can be upscaled*/ && ( GT_16( *st_harm_cor_cnt, 20 ) ) ) { sd1_bgd = 1; diff --git a/lib_enc/prot_fx_enc.h b/lib_enc/prot_fx_enc.h index 120dd42b5..4237aa9c3 100644 --- a/lib_enc/prot_fx_enc.h +++ b/lib_enc/prot_fx_enc.h @@ -369,8 +369,8 @@ void noise_est_down_ivas_fx( const Word16 min_band, /* i : minimum critical band */ const Word16 max_band, /* i : maximum critical band */ Word16 *totalNoise, /* o : noise estimate over all critical bands */ - Word16 Etot, /* i : Energy of current frame */ - Word16 *Etot_last, /* i/o: Energy of last frame Q8 */ + Word32 Etot, /* i : Energy of current frame */ + Word32 *Etot_last, /* i/o: Energy of last frame Q8 */ Word16 *Etot_v_h2 /* i/o: Energy variations of noise frames Q8 */ ); @@ -2230,7 +2230,7 @@ void ivas_analy_sp_fx( Word16 *q_fr_bands, /* o : energy in critical frequency bands Q0 */ Word32 *lf_E, /* o : per bin E for first... q_lf_E */ Word16 *q_lf_E, /* o : per bin E for first... Q0 */ - Word16 *Etot, /* o : total input energy Q8 */ + Word32 *Etot, /* o : total input energy Q24 */ const Word16 min_band, /* i : minimum critical band Q0 */ const Word16 max_band, /* i : maximum critical band Q0 */ Word32 *Bin_E, /* o : per-bin energy spectrum q_Bin_E */ diff --git a/lib_enc/stat_enc.h b/lib_enc/stat_enc.h index c6176709b..4fe5bb8a4 100644 --- a/lib_enc/stat_enc.h +++ b/lib_enc/stat_enc.h @@ -545,8 +545,10 @@ typedef struct noise_estimation_structure Word16 bg_cnt; /* Noise estimator - pause length counter */ Word16 Etot_l_fx; /* Q8 Noise estimator - Track energy from below */ Word16 Etot_h_fx; /* Q8 Noise estimator - Track energy from above */ - Word16 Etot_l_lp_fx; /* Q8 Noise estimator - Smoothed low energy */ - Word16 Etot_last_fx; /*Q8*/ + Word16 Etot_l_lp_fx; /* EVS- Q8 Noise estimator - Smoothed low energy */ + Word32 Etot_l_lp_32fx; /* IVAS-Q24 Noise estimator - Smoothed low energy */ + Word16 Etot_last_fx; /*EVS- Q8*/ + Word32 Etot_last_32fx; /* IVAS - Q24*/ Word16 Etot_lp_fx; /* Q8 Noise estimator - Filtered input energy */ Word16 lt_tn_track_fx; /* Q15 */ diff --git a/lib_enc/updt_enc_fx.c b/lib_enc/updt_enc_fx.c index fb2a6215b..1210bfd04 100644 --- a/lib_enc/updt_enc_fx.c +++ b/lib_enc/updt_enc_fx.c @@ -362,7 +362,14 @@ void updt_enc_common_fx( move16(); st->last_bwidth = st->bwidth; move16(); - st->hNoiseEst->Etot_last_fx = Etot; + IF( GT_16( st->element_mode, EVS_MONO ) ) + { + st->hNoiseEst->Etot_last_32fx = L_deposit_h( Etot ); + } + ELSE + { + st->hNoiseEst->Etot_last_fx = Etot; + } move16(); st->last_coder_type_raw = st->coder_type_raw; move16(); -- GitLab