From 3f865db7ccf62c5f370c6f27d1e9816121f051e6 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Tue, 27 May 2025 22:42:16 +0530 Subject: [PATCH] Precision improvement changes to improve the vad flag selection --- lib_enc/analy_sp_fx.c | 33 ++++++++++++++------------- lib_enc/ivas_core_pre_proc_front_fx.c | 7 +++--- lib_enc/ivas_front_vad_fx.c | 2 +- lib_enc/nois_est_fx.c | 26 ++++++++++----------- lib_enc/prot_fx_enc.h | 2 +- lib_enc/stat_enc.h | 1 + 6 files changed, 36 insertions(+), 35 deletions(-) diff --git a/lib_enc/analy_sp_fx.c b/lib_enc/analy_sp_fx.c index 7062a3090..dbd656dec 100644 --- a/lib_enc/analy_sp_fx.c +++ b/lib_enc/analy_sp_fx.c @@ -569,6 +569,8 @@ void ivas_analy_sp_fx( *Etot = L_shl( Ltmp, Q24 - Q21 ); // Q24 move16(); } + *q_band_energies = *q_fr_bands; + move16(); } ELSE { @@ -578,10 +580,10 @@ void ivas_analy_sp_fx( set32_fx( lf_E, 0, 2 * VOIC_BINS ); set32_fx( band_energies, 0, 2 * NB_BANDS ); set32_fx( fr_bands, E_MIN_FXQ31, 2 * NB_BANDS ); // Q31 (*q_fr_bands) - - LEtot = W_shl( W_mult_32_16( E_MIN_FXQ31, add( sub( max_band, min_band ), 1 ) ), 1 ); // Q32 (*q_fr_bands+1) + *Etot = -193760400; // Q24 *q_fr_bands = Q31; *q_lf_E = *q_fr_bands; + move32(); move16(); move16(); } @@ -596,25 +598,24 @@ void ivas_analy_sp_fx( MVR2R_WORD32( Bin_E, Bin_E + ( L_FFT / 2 ), L_FFT / 2 ); MVR2R_WORD32( band_energies, band_energies + NB_BANDS, NB_BANDS ); MVR2R_WORD32( pt_bands, pt_bands + NB_BANDS, NB_BANDS ); - } - /* Average total log energy over both half-frames */ - *Etot = -838860800 /* 10.f * log10f(0.00001f) in Q24 : This is when LEtot is 0*/; - move16(); - IF( LEtot != 0 ) - { - exp = W_norm( LEtot ); - LEtot = W_shl( LEtot, exp ); // q_fr_bands+exp - Ltmp = BASOP_Util_Log10( W_extract_h( LEtot ), sub( 62, add( *q_fr_bands, exp ) ) /* 31-(q_fr_bands+1+exp-32) */ ); // Q25 - Ltmp = Mpy_32_32( Ltmp, 1342177280 /* 10.f in Q27 */ ); // (Q25, Q27) -> Q21 - *Etot = L_shl( Ltmp, Q24 - Q21 ); // Q24 + /* Average total log energy over both half-frames */ + *Etot = -838860800 /* 10.f * log10f(0.00001f) in Q24 : This is when LEtot is 0*/; move16(); + IF( LEtot != 0 ) + { + exp = W_norm( LEtot ); + LEtot = W_shl( LEtot, exp ); // q_fr_bands+exp + Ltmp = BASOP_Util_Log10( W_extract_h( LEtot ), sub( 62, add( *q_fr_bands, exp ) ) /* 31-(q_fr_bands+1+exp-32) */ ); // Q25 + Ltmp = Mpy_32_32( Ltmp, 1342177280 /* 10.f in Q27 */ ); // (Q25, Q27) -> Q21 + *Etot = L_shl( Ltmp, Q24 - Q21 ); // Q24 + move16(); + } } + *q_band_energies = *q_fr_bands; + move16(); } - *q_band_energies = *q_fr_bands; - move16(); - exp = sub( getScaleFactor32( fr_bands, 2 * NB_BANDS ), 1 ); scale_sig32( fr_bands, 2 * NB_BANDS, exp ); /* q_fr_bands + exp */ *q_fr_bands = add( *q_fr_bands, exp ); diff --git a/lib_enc/ivas_core_pre_proc_front_fx.c b/lib_enc/ivas_core_pre_proc_front_fx.c index be208296d..106d7c387 100644 --- a/lib_enc/ivas_core_pre_proc_front_fx.c +++ b/lib_enc/ivas_core_pre_proc_front_fx.c @@ -1258,8 +1258,7 @@ ivas_error pre_proc_front_ivas_fx( noise_est_ivas_fx( st, old_pitch1, tmpN_fx, epsP_fx, extract_h( Etot_fx ), *relE_fx, corr_shift_fx, tmpE_fx, q_tmpE, fr_bands_fx, fr_bands_fx_q, cor_map_sum_fx, &ncharX_fx, &sp_div_fx, &q_sp_div, &non_staX_fx, loc_harm, lf_E_fx, q_lf_E_fx, &st->hNoiseEst->harm_cor_cnt, extract_h( st->hNoiseEst->Etot_l_lp_32fx ), - extract_h( st->hNoiseEst->Etot_v_h2_32fx ), &st->hNoiseEst->bg_cnt, st->lgBin_E_fx, &dummy_fx, S_map_fx, - hStereoClassif, NULL, st->ini_frame ); + st->hNoiseEst->Etot_v_h2_32fx, &st->hNoiseEst->bg_cnt, st->lgBin_E_fx, &dummy_fx, S_map_fx, hStereoClassif, NULL, st->ini_frame ); test(); IF( lr_vad_enabled && st->idchan == 0 ) @@ -1301,14 +1300,14 @@ ivas_error pre_proc_front_ivas_fx( noise_est_ivas_fx( st, old_pitch1, tmpN_LR_fx[0], epsP_fx, Etot_LR_fx[0], sub( Etot_LR_fx[0], hCPE->hFrontVad[0]->lp_speech_fx ), corr_shiftL_fx, tmpE_LR_fx[0], q_tmpE_LR[0], fr_bands_LR_fx[0], fr_bands_LR_fx_q[0], &cor_map_sum_LR_fx[0], &ncharX_LR_fx, &sp_div_LR_fx, &q_sp_div_LR, &non_staX_LR_fx, loc_harmLR_fx, lf_E_LR_fx[0], lf_E_LR_fx_q, &hCPE->hFrontVad[0]->hNoiseEst->harm_cor_cnt, - extract_h( hCPE->hFrontVad[0]->hNoiseEst->Etot_l_lp_32fx ), extract_h( hCPE->hFrontVad[0]->hNoiseEst->Etot_v_h2_32fx ), &hCPE->hFrontVad[0]->hNoiseEst->bg_cnt, + extract_h( hCPE->hFrontVad[0]->hNoiseEst->Etot_l_lp_32fx ), hCPE->hFrontVad[0]->hNoiseEst->Etot_v_h2_32fx, &hCPE->hFrontVad[0]->hNoiseEst->bg_cnt, st->lgBin_E_fx, &dummy_fx, S_map_LR_fx, NULL, hCPE->hFrontVad[0], hCPE->hFrontVad[0]->ini_frame ); /* Note: the index [0] in the last argument is intended, the ini_frame counter is only maintained in the zero-th channel's VAD handle */ noise_est_ivas_fx( st, old_pitch1, tmpN_LR_fx[1], epsP_fx, Etot_LR_fx[1], sub( Etot_LR_fx[1], hCPE->hFrontVad[1]->lp_speech_fx ), corr_shiftR_fx, tmpE_LR_fx[1], q_tmpE_LR[1], fr_bands_LR_fx[1], fr_bands_LR_fx_q[1], &cor_map_sum_LR_fx[1], &ncharX_LR_fx, &sp_div_LR_fx, &q_sp_div_LR, &non_staX_LR_fx, loc_harmLR_fx, lf_E_LR_fx[1], lf_E_LR_fx_q, &hCPE->hFrontVad[1]->hNoiseEst->harm_cor_cnt, - extract_h( hCPE->hFrontVad[1]->hNoiseEst->Etot_l_lp_32fx ), extract_h( hCPE->hFrontVad[1]->hNoiseEst->Etot_v_h2_32fx ), &hCPE->hFrontVad[1]->hNoiseEst->bg_cnt, + extract_h( hCPE->hFrontVad[1]->hNoiseEst->Etot_l_lp_32fx ), hCPE->hFrontVad[1]->hNoiseEst->Etot_v_h2_32fx, &hCPE->hFrontVad[1]->hNoiseEst->bg_cnt, st->lgBin_E_fx, &dummy_fx, S_map_LR_fx, NULL, hCPE->hFrontVad[1], hCPE->hFrontVad[0]->ini_frame ); } diff --git a/lib_enc/ivas_front_vad_fx.c b/lib_enc/ivas_front_vad_fx.c index a4baf6a00..a6054a841 100644 --- a/lib_enc/ivas_front_vad_fx.c +++ b/lib_enc/ivas_front_vad_fx.c @@ -741,7 +741,7 @@ ivas_error front_vad_spar_fx( noise_est_ivas_fx( st, old_pitch, tmpN_fx, epsP_fx, Etot_fx[0], sub( Etot_fx[0], hFrontVad->lp_speech_fx ), corr_shift_fx, tmpE_fx, hFrontVad->hNoiseEst->ave_enr_q, fr_bands_fx[0], q_fr_bands[0], &cor_map_sum_fx, NULL, &sp_div_fx, &Q_sp_div, &non_staX_fx, &loc_harm, - lf_E_fx[0], q_lf_E[0], &hFrontVad->hNoiseEst->harm_cor_cnt, extract_h( hFrontVad->hNoiseEst->Etot_l_lp_32fx ), extract_h( hFrontVad->hNoiseEst->Etot_v_h2_32fx ), + lf_E_fx[0], q_lf_E[0], &hFrontVad->hNoiseEst->harm_cor_cnt, extract_h( hFrontVad->hNoiseEst->Etot_l_lp_32fx ), hFrontVad->hNoiseEst->Etot_v_h2_32fx, &hFrontVad->hNoiseEst->bg_cnt, st->lgBin_E_fx, &sp_floor, S_map_fx, NULL, hFrontVad, hFrontVad->ini_frame ); MVR2R_WORD16( st->pitch, st->pitch, 3 ); diff --git a/lib_enc/nois_est_fx.c b/lib_enc/nois_est_fx.c index 4f178214f..7534a5159 100644 --- a/lib_enc/nois_est_fx.c +++ b/lib_enc/nois_est_fx.c @@ -358,14 +358,14 @@ void noise_est_init_ivas_fx( move16(); hNoiseEst->Etot_lp_fx = 0; hNoiseEst->Etot_h_fx = 0; - hNoiseEst->Etot_l_fx = 0; + hNoiseEst->Etot_l_32fx = 0; hNoiseEst->Etot_l_lp_32fx = 0; hNoiseEst->Etot_last_32fx = 0; hNoiseEst->Etot_v_h2_32fx = 0; hNoiseEst->sign_dyn_lp_fx = 0; move16(); move16(); - move16(); + move32(); move16(); move32(); move32(); @@ -528,7 +528,7 @@ void noise_est_pre_32fx( Etot_h_32fx = L_sub( L_deposit_h( hNoiseEst->Etot_h_fx ), 671089 ); /* 671089=0.04 in Q24 */ Etot_h_32fx = L_max( Etot_h_32fx, Etot ); - Etot_l_32fx = L_add( L_deposit_h( hNoiseEst->Etot_l_fx ), 1342177 ); /* 1342177 = .08 in Q24 */ + Etot_l_32fx = L_add( hNoiseEst->Etot_l_32fx, 1342177 ); /* 1342177 = .08 in Q24 */ /* Could even be higher but it also delays first entry to DTX */ IF( GT_16( hNoiseEst->harm_cor_cnt, HE_LT_CNT_PRE_FX ) ) @@ -590,10 +590,10 @@ void noise_est_pre_32fx( move16(); } - hNoiseEst->Etot_l_fx = extract_h( Etot_l_32fx ); // Q8 + hNoiseEst->Etot_l_32fx = Etot_l_32fx; // Q24 hNoiseEst->Etot_h_fx = extract_h( Etot_h_32fx ); // Q8 hNoiseEst->Etot_lp_fx = extract_h( Etot_lp_32fx ); // Q8 - move16(); + move32(); move16(); move16(); @@ -2213,7 +2213,7 @@ void noise_est_ivas_fx( const Word16 q_lf_E, /* i : Q of lf_E Q0 */ Word16 *st_harm_cor_cnt, /* i/o : 1st harm correlation timer Q0 */ const Word16 Etot_l_lp, /* i : Smoothed low energy Q8 */ - const Word16 Etot_v_h2, /* i : Energy variations Q8 */ + const Word32 Etot_v_h2, /* i : Energy variations Q24 */ Word16 *bg_cnt, /* i : Background burst length timer Q0 */ Word16 EspecdB[], /* i/o: log E spectrum (with f=0) of the current frame Q7 for multi harm */ Word16 *sp_floor, /* o : noise floor estimate Q7 */ @@ -3176,14 +3176,14 @@ void noise_est_ivas_fx( haco_ev_max = s_max( tmp, hNoiseEst->lt_haco_ev_fx ); /* Q15 */ /* Etot_l_lp_thr = st->Etot_l_lp + (1.5f + 1.5f * (st->Etot_lp<50.0f))*st->Etot_v_h2; */ - tmp = 12288; - move16(); /* 1.5 Q13 */ + L_tmp = 49152; /* 1.5 Q15 */ + move32(); if ( LT_16( hNoiseEst->Etot_lp_fx, 12800 ) ) /* 50.0 in Q8 */ { - tmp = shl( tmp, 1 ); /*1.5 + 1.5 Q13 */ + L_tmp = L_shl( L_tmp, 1 ); /*1.5 + 1.5 Q15 */ } - Ltmp = hNoiseEst->Etot_l_lp_32fx; - Etot_l_lp_thr = round_fx( L_add( Ltmp, L_shl( L_mult( tmp, Etot_v_h2 ), 2 ) ) ); /* Q13+Q8+1 +2 = Q24 -> Q8*/ + temp = W_mac_32_32( W_mult_32_32( hNoiseEst->Etot_l_lp_32fx, ONE_IN_Q15 ), Etot_v_h2, L_tmp ); // Q40(24+15+1) + Etot_l_lp_thr = W_round32_s( temp ); // Q8 /* enr_bgd = Etot < Etot_l_lp_thr; */ enr_bgd = 0; @@ -3367,8 +3367,8 @@ void noise_est_ivas_fx( move16(); test(); test(); - if ( ( GT_16( hNoiseEst->sign_dyn_lp_fx, 15 * 256 ) ) /* 15 in Q8 */ - && ( LT_16( sub( Etot, extract_h( hNoiseEst->Etot_l_lp_32fx ) ), shl( Etot_v_h2, 1 ) ) ) /* Q8 , Etot_v_h2 has limited dynmics can be upscaled*/ + if ( ( GT_16( hNoiseEst->sign_dyn_lp_fx, 15 * 256 ) ) /* 15 in Q8 */ + && ( LT_32( L_sub( L_deposit_h( Etot ), hNoiseEst->Etot_l_lp_32fx ), L_shl( Etot_v_h2, 1 ) ) ) /* Q24 , Etot_v_h2 has limited dynmics can be upscaled*/ && ( GT_16( *st_harm_cor_cnt, 20 ) ) ) { sd1_bgd = 1; diff --git a/lib_enc/prot_fx_enc.h b/lib_enc/prot_fx_enc.h index 8e323af82..59d2fdf9a 100644 --- a/lib_enc/prot_fx_enc.h +++ b/lib_enc/prot_fx_enc.h @@ -404,7 +404,7 @@ void noise_est_ivas_fx( const Word16 q_lf_E, /* i : Q of lf_E Q0 */ Word16 *st_harm_cor_cnt, /* i/o : 1st harm correlation timer Q0 */ const Word16 Etot_l_lp, /* i : Smoothed low energy Q8 */ - const Word16 Etot_v_h2, /* i : Energy variations Q8 */ + const Word32 Etot_v_h2, /* i : Energy variations Q24 */ Word16 *bg_cnt, /* i : Background burst length timer Q0 */ Word16 EspecdB[], /* i/o: log E spectrum (with f=0) of the current frame Q7 for multi harm */ Word16 *sp_floor, /* o : noise floor estimate Q7 */ diff --git a/lib_enc/stat_enc.h b/lib_enc/stat_enc.h index 41383f109..b9c515eea 100644 --- a/lib_enc/stat_enc.h +++ b/lib_enc/stat_enc.h @@ -549,6 +549,7 @@ typedef struct noise_estimation_structure Word16 bg_cnt; /* Noise estimator - pause length counter */ Word16 Etot_l_fx; /* Q8 Noise estimator - Track energy from below */ Word16 Etot_h_fx; /* Q8 Noise estimator - Track energy from above */ + Word32 Etot_l_32fx; /* Q24 Noise estimator - Track energy from below */ Word16 Etot_l_lp_fx; /* EVS- Q8 Noise estimator - Smoothed low energy */ Word32 Etot_l_lp_32fx; /* IVAS-Q24 Noise estimator - Smoothed low energy */ Word16 Etot_last_fx; /*EVS- Q8*/ -- GitLab