From f602e97871c026176fa68e63304c08a248a1de68 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Fri, 30 May 2025 15:42:13 +0530 Subject: [PATCH] Precision improvements for lp_noise, totalNoise; saturation resolution for non_staX --- lib_com/cnst.h | 1 + lib_com/ivas_prot_fx.h | 4 +-- lib_enc/bw_detect_fx.c | 13 +++++++- lib_enc/dtx_fx.c | 8 ++--- lib_enc/fd_cng_enc_fx.c | 12 +++++-- lib_enc/find_uv_fx.c | 6 ++-- lib_enc/init_enc_fx.c | 6 ++-- lib_enc/ivas_core_pre_proc_front_fx.c | 28 ++++++++-------- lib_enc/ivas_core_pre_proc_fx.c | 2 +- lib_enc/ivas_front_vad_fx.c | 11 +++---- lib_enc/ivas_ism_dtx_enc_fx.c | 2 +- lib_enc/ivas_ism_metadata_enc_fx.c | 2 +- lib_enc/ivas_masa_enc_fx.c | 4 +-- lib_enc/ivas_omasa_enc_fx.c | 2 +- lib_enc/ivas_stereo_classifier_fx.c | 2 +- lib_enc/ivas_stereo_td_analysis_fx.c | 2 +- lib_enc/ivas_tcx_core_enc_fx.c | 4 +-- lib_enc/long_enr_fx.c | 44 ++++++++++++------------- lib_enc/nois_est_fx.c | 47 ++++++++++++++++++--------- lib_enc/prot_fx_enc.h | 13 ++++++-- lib_enc/speech_music_classif_fx.c | 6 ++-- lib_enc/stat_enc.h | 13 +++++--- lib_enc/vad_fx.c | 4 +-- 23 files changed, 140 insertions(+), 96 deletions(-) diff --git a/lib_com/cnst.h b/lib_com/cnst.h index 7b204a8e2..a2d051d87 100644 --- a/lib_com/cnst.h +++ b/lib_com/cnst.h @@ -759,6 +759,7 @@ enum #define PIT_FIR_SIZE6_2 ( PIT_UP_SAMP6 * PIT_L_INTERPOL6_2 + 1 ) #define E_MIN 0.0035f /* minimum allowable energy */ #define E_MIN_Q11_FX 7 /* minimum allowable energy in Q11*/ +#define E_MIN_Q27_FX 469762 /* minimum allowable energy in Q27*/ #define STEP_DELTA 0.0625f /* quantization step for tilt compensation of gaussian cb. excitation */ #define GAMMA_EV 0.92f /* weighting factor for core synthesis error weighting */ #define FORMANT_SHARPENING_NOISE_THRESHOLD 21.0f /* lp_noise level above which formant sharpening is deactivated */ diff --git a/lib_com/ivas_prot_fx.h b/lib_com/ivas_prot_fx.h index 928736427..d9ca855be 100644 --- a/lib_com/ivas_prot_fx.h +++ b/lib_com/ivas_prot_fx.h @@ -2960,7 +2960,7 @@ Word16 ivas_acelp_tcx20_switching_fx( Word16 *inp_fx, /* i : new input signal */ Word16 q_inp, /* i : i/p Q */ Word16 *wsp, /* i : input weighted signal */ - Word16 non_staX, /* i : unbound non-stationarity for sp/mu clas */ + Word32 non_staX, /* i : unbound non-stationarity for sp/mu clas */ Word16 *pitch_fr, /* i : fraction pitch values */ Word16 *voicing_fr, /* i : fractional voicing values */ Word32 currFlatness, /* i : flatness */ @@ -3984,7 +3984,7 @@ Word16 ivas_smc_gmm_fx( const Word16 cor_map_sum_fx, /* i : correlation map sum (from multi-harmonic anal.) Q8 */ const Word32 epsP_fx[M + 1], /* i : LP prediciton error */ const Word32 PS_fx[], /* i : energy spectrum */ - const Word16 non_sta_fx, /* i : unbound non-stationarity Q8 */ + const Word32 non_sta_fx, /* i : unbound non-stationarity Q20 */ const Word16 relE_fx, /* i : relative frame energy Q8 */ Word16 *high_lpn_flag, /* i/o: sp/mus LPN flag */ const Word16 flag_spitch, /* i : flag to indicate very short stable pitch */ diff --git a/lib_enc/bw_detect_fx.c b/lib_enc/bw_detect_fx.c index 050adbf01..94ed49129 100644 --- a/lib_enc/bw_detect_fx.c +++ b/lib_enc/bw_detect_fx.c @@ -71,6 +71,7 @@ void bw_detect_fx( Flag Overflow = 0; move32(); #endif + Word16 lp_noise_fx; bwd_count_wider_bw = BWD_COUNT_WIDER_BW; move16(); @@ -532,7 +533,17 @@ void bw_detect_fx( /*if( localVAD || st->lp_noise > 30 )*/ test(); - IF( st->localVAD || GT_16( st->lp_noise_fx, 7680 /*30 in Q8*/ ) ) + IF( EQ_16( st->element_mode, EVS_MONO ) ) + { + lp_noise_fx = st->lp_noise_fx; + move16(); + } + ELSE + { + lp_noise_fx = extract_h( st->lp_noise_32fx ); + } + + IF( st->localVAD || GT_32( ( lp_noise_fx ), 7680 /*30 in Q8*/ ) ) { /*st->lt_mean_NB_fx = ALPHA_BWD * st->lt_mean_NB_fx + (1-ALPHA_BWD) * mean_NB;*/ L_tmp = L_mult( ALPHA_BWD_FX, st->lt_mean_NB_fx ); /* Q15 * Q11 -> Q27 */ diff --git a/lib_enc/dtx_fx.c b/lib_enc/dtx_fx.c index 6d8a29795..7966fc7d8 100644 --- a/lib_enc/dtx_fx.c +++ b/lib_enc/dtx_fx.c @@ -95,7 +95,7 @@ void dtx_ivas_fx( test(); test(); test(); - last_br_cng_flag = LE_32( st_fx->last_total_brate_cng, MAX_BRATE_DTX_EVS ) || LT_16( st_fx->lp_noise_fx, DTX_THR * 256 ) || ( EQ_16( st_fx->element_mode, IVAS_SCE ) && LE_32( st_fx->last_total_brate_cng, MAX_BRATE_DTX_IVAS ) ); + last_br_cng_flag = LE_32( st_fx->last_total_brate_cng, MAX_BRATE_DTX_EVS ) || LT_32( ( st_fx->lp_noise_32fx ), DTX_THR * 16777216 ) || ( EQ_16( st_fx->element_mode, IVAS_SCE ) && LE_32( st_fx->last_total_brate_cng, MAX_BRATE_DTX_IVAS ) ); test(); test(); @@ -103,7 +103,7 @@ void dtx_ivas_fx( last_br_flag = ( st_fx->element_mode == EVS_MONO && LE_32( st_fx->last_total_brate, MAX_BRATE_DTX_EVS ) ) || ( st_fx->element_mode != EVS_MONO && LE_32( last_ivas_total_brate, MAX_BRATE_DTX_IVAS ) ) || - LT_16( st_fx->lp_noise_fx, DTX_THR * 256 ); + LT_32( ( st_fx->lp_noise_32fx ), DTX_THR * 16777216 ); } /* Initialization */ @@ -211,7 +211,7 @@ void dtx_ivas_fx( test(); br_dtx_flag = ( ( st_fx->element_mode == EVS_MONO ) && LE_32( st_fx->total_brate, MAX_BRATE_DTX_EVS ) ) || ( ( st_fx->element_mode != EVS_MONO ) && LE_32( ivas_total_brate, MAX_BRATE_DTX_IVAS ) ) || - LT_16( st_fx->lp_noise_fx, DTX_THR * 256 ); + LT_16( extract_h( st_fx->lp_noise_32fx ), DTX_THR * 256 ); } test(); test(); @@ -403,7 +403,7 @@ void dtx_ivas_fx( st_fx->cng_type = FD_CNG; move16(); } - ELSE IF( EQ_16( st_fx->cng_type, FD_CNG ) && ( LT_32( st_fx->bckr_tilt_lt, fd_thresh ) ) && ( GT_16( st_fx->lp_noise_fx, 512 /* 2 in Q8 */ ) ) ) + ELSE IF( EQ_16( st_fx->cng_type, FD_CNG ) && ( LT_32( st_fx->bckr_tilt_lt, fd_thresh ) ) && ( GT_32( st_fx->lp_noise_32fx, 67108864 /* 2 in Q24 */ ) ) ) { st_fx->cng_type = LP_CNG; move16(); diff --git a/lib_enc/fd_cng_enc_fx.c b/lib_enc/fd_cng_enc_fx.c index fe5839565..9298c8634 100644 --- a/lib_enc/fd_cng_enc_fx.c +++ b/lib_enc/fd_cng_enc_fx.c @@ -447,8 +447,16 @@ void resetFdCngEnc_fx( /* st->totalNoise_fx; Q8 Noise estimator - total noise energy */ /* Detect fast increase of totalNoise */ - totalNoiseIncrease = sub( hNoiseEst->totalNoise_fx, st->last_totalNoise_fx ); // Q8 - st->last_totalNoise_fx = hNoiseEst->totalNoise_fx; // Q8 + IF( EQ_16( st->element_mode, EVS_MONO ) ) + { + totalNoiseIncrease = sub( hNoiseEst->totalNoise_fx, st->last_totalNoise_fx ); // Q8 + st->last_totalNoise_fx = hNoiseEst->totalNoise_fx; // Q8 + } + ELSE + { + totalNoiseIncrease = sub( extract_h( hNoiseEst->totalNoise_32fx ), st->last_totalNoise_fx ); // Q8 + st->last_totalNoise_fx = extract_h( hNoiseEst->totalNoise_32fx ); // Q8 + } move16(); IF( totalNoiseIncrease > 0 ) { diff --git a/lib_enc/find_uv_fx.c b/lib_enc/find_uv_fx.c index c4fe2e895..61de9ca7d 100644 --- a/lib_enc/find_uv_fx.c +++ b/lib_enc/find_uv_fx.c @@ -853,11 +853,11 @@ Word16 find_uv_ivas_fx( /* o : coding typ IF( st_fx->input_bwidth != NB ) { /*relE_thres = 0.700f * st->lp_noise - 33.5f; (lp_noise in Q8, constant Q8<<16) */ - L_tmp = L_mac( -562036736 /* 33.5f in Q24 */, 22938 /* 0.7 in Q15 */, st_fx->lp_noise_fx ); // Q24 + L_tmp = L_mac( -562036736 /* 33.5f in Q24 */, 22938 /* 0.7 in Q15 */, extract_h( st_fx->lp_noise_32fx ) ); // Q24 IF( Last_Resort == 0 ) { /*relE_thres = 0.650f * st->lp_noise - 33.5f; (lp_noise in Q8, constant Q8<<16)*/ - L_tmp = L_mac( -562036736 /* 33.5f in Q24 */, 21299 /* 0.650f in Q15 */, st_fx->lp_noise_fx ); // Q24 + L_tmp = L_mac( -562036736 /* 33.5f in Q24 */, 21299 /* 0.650f in Q15 */, extract_h( st_fx->lp_noise_32fx ) ); // Q24 } relE_thres = round_fx( L_tmp ); } @@ -865,7 +865,7 @@ Word16 find_uv_ivas_fx( /* o : coding typ { /*relE_thres = 0.60f * st->lp_noise - 28.2f; (lp_noise in Q8, constant Q8<<16)*/ - L_tmp = L_mac( -473117491 /* 28.2f in Q24 */, 19661 /* 0.6f in Q15 */, st_fx->lp_noise_fx ); // Q24 + L_tmp = L_mac( -473117491 /* 28.2f in Q24 */, 19661 /* 0.6f in Q15 */, extract_h( st_fx->lp_noise_32fx ) ); // Q24 relE_thres = round_fx( L_tmp ); } relE_thres = s_max( relE_thres, -6400 /* -25.0f in Q8 */ ); /* Q8 */ diff --git a/lib_enc/init_enc_fx.c b/lib_enc/init_enc_fx.c index 543a6fab9..ba1d47f79 100644 --- a/lib_enc/init_enc_fx.c +++ b/lib_enc/init_enc_fx.c @@ -1572,9 +1572,9 @@ ivas_error init_encoder_ivas_fx( * DTX *-----------------------------------------------------------------*/ - st->lp_speech_fx = 11520; /*Q8 (45.0) */ /* Initialize the long-term active speech level in dB */ - move16(); - st->lp_noise_fx = 0; + st->lp_speech_32fx = 754974720; /*Q24 (45.0) */ /* Initialize the long-term active speech level in dB */ + move32(); + st->lp_noise_32fx = 0; move16(); st->flag_noisy_speech_snr = 0; move16(); diff --git a/lib_enc/ivas_core_pre_proc_front_fx.c b/lib_enc/ivas_core_pre_proc_front_fx.c index 3e7bf2464..fbd207e26 100644 --- a/lib_enc/ivas_core_pre_proc_front_fx.c +++ b/lib_enc/ivas_core_pre_proc_front_fx.c @@ -158,7 +158,7 @@ ivas_error pre_proc_front_ivas_fx( Word16 high_lpn_flag; Word16 lsf_new_fx[M]; // Q2.56 Word16 localVAD_HE_SAD; - Word16 non_staX_fx; + Word32 non_staX_fx; Word16 alw_pitch_lag_12k8[2]; Word16 alw_voicing_fx[2]; Word16 last_core_orig; @@ -191,7 +191,7 @@ ivas_error pre_proc_front_ivas_fx( Word16 ncharX_fx; Word16 ncharX_LR_fx; /* noise character for sp/mus classifier */ Word16 loc_harmLR_fx[CPE_CHANNELS]; /* harmonicity flag */ - Word16 non_staX_LR_fx; /* non-stationarity for sp/mus classifier */ + Word32 non_staX_LR_fx; /* non-stationarity for sp/mus classifier */ Word16 sp_div_fx; Word16 q_sp_div; Word16 sp_div_LR_fx; @@ -742,7 +742,7 @@ ivas_error pre_proc_front_ivas_fx( IF( hStereoClassif != NULL ) { - IF( GT_32( sub( st->lp_speech_fx, extract_h( Etot_fx ) ), 25 << Q8 ) ) /*Q8*/ + IF( GT_32( L_sub( st->lp_speech_32fx, Etot_fx ), 25 << Q24 ) ) /*Q8*/ { hStereoClassif->silence_flag = 2; move16(); @@ -806,7 +806,7 @@ ivas_error pre_proc_front_ivas_fx( test(); IF( ( hCPE != NULL && !( lr_vad_enabled && st->idchan == 0 ) ) || hSCE != NULL ) { - *vad_flag_dtx = ivas_dtx_hangover_addition_fx( st, st->vad_flag, sub( st->lp_speech_fx, st->lp_noise_fx ), 0, vad_hover_flag, NULL, NULL, NULL ); /* Q0 */ + *vad_flag_dtx = ivas_dtx_hangover_addition_fx( st, st->vad_flag, extract_h( L_sub( st->lp_speech_32fx, st->lp_noise_32fx ) ), 0, vad_hover_flag, NULL, NULL, NULL ); /* Q0 */ move16(); } ELSE @@ -904,21 +904,21 @@ ivas_error pre_proc_front_ivas_fx( *----------------------------------------------------------------*/ noise_est_down_ivas_fx( fr_bands_fx, fr_bands_fx_q, st->hNoiseEst->bckr_fx, &st->hNoiseEst->q_bckr, tmpN_fx, &q_tmpN, tmpE_fx, &q_tmpE, st->min_band, st->max_band, - &st->hNoiseEst->totalNoise_fx, Etot_fx, &st->hNoiseEst->Etot_last_32fx, &st->hNoiseEst->Etot_v_h2_32fx ); + &st->hNoiseEst->totalNoise_32fx, Etot_fx, &st->hNoiseEst->Etot_last_32fx, &st->hNoiseEst->Etot_v_h2_32fx ); test(); IF( lr_vad_enabled && st->idchan == 0 ) { - noise_est_down_ivas_fx( fr_bands_LR_fx[0], fr_bands_LR_fx_q[0], hCPE->hFrontVad[0]->hNoiseEst->bckr_fx, &hCPE->hFrontVad[0]->hNoiseEst->q_bckr, tmpN_LR_fx[0], &q_tmpN_LR[0], tmpE_LR_fx[0], &q_tmpE_LR[0], st->min_band, st->max_band, &hCPE->hFrontVad[0]->hNoiseEst->totalNoise_fx, L_deposit_h( Etot_LR_fx[0] ), &hCPE->hFrontVad[0]->hNoiseEst->Etot_last_32fx, &hCPE->hFrontVad[0]->hNoiseEst->Etot_v_h2_32fx ); - noise_est_down_ivas_fx( fr_bands_LR_fx[1], fr_bands_LR_fx_q[1], hCPE->hFrontVad[1]->hNoiseEst->bckr_fx, &hCPE->hFrontVad[1]->hNoiseEst->q_bckr, tmpN_LR_fx[1], &q_tmpN_LR[1], tmpE_LR_fx[1], &q_tmpE_LR[1], st->min_band, st->max_band, &hCPE->hFrontVad[1]->hNoiseEst->totalNoise_fx, L_deposit_h( Etot_LR_fx[1] ), &hCPE->hFrontVad[1]->hNoiseEst->Etot_last_32fx, &hCPE->hFrontVad[1]->hNoiseEst->Etot_v_h2_32fx ); + noise_est_down_ivas_fx( fr_bands_LR_fx[0], fr_bands_LR_fx_q[0], hCPE->hFrontVad[0]->hNoiseEst->bckr_fx, &hCPE->hFrontVad[0]->hNoiseEst->q_bckr, tmpN_LR_fx[0], &q_tmpN_LR[0], tmpE_LR_fx[0], &q_tmpE_LR[0], st->min_band, st->max_band, &hCPE->hFrontVad[0]->hNoiseEst->totalNoise_32fx, L_deposit_h( Etot_LR_fx[0] ), &hCPE->hFrontVad[0]->hNoiseEst->Etot_last_32fx, &hCPE->hFrontVad[0]->hNoiseEst->Etot_v_h2_32fx ); + noise_est_down_ivas_fx( fr_bands_LR_fx[1], fr_bands_LR_fx_q[1], hCPE->hFrontVad[1]->hNoiseEst->bckr_fx, &hCPE->hFrontVad[1]->hNoiseEst->q_bckr, tmpN_LR_fx[1], &q_tmpN_LR[1], tmpE_LR_fx[1], &q_tmpE_LR[1], st->min_band, st->max_band, &hCPE->hFrontVad[1]->hNoiseEst->totalNoise_32fx, L_deposit_h( Etot_LR_fx[1] ), &hCPE->hFrontVad[1]->hNoiseEst->Etot_last_32fx, &hCPE->hFrontVad[1]->hNoiseEst->Etot_v_h2_32fx ); - corr_shiftL_fx = correlation_shift_fx( hCPE->hFrontVad[0]->hNoiseEst->totalNoise_fx ); // Q15 - corr_shiftR_fx = correlation_shift_fx( hCPE->hFrontVad[1]->hNoiseEst->totalNoise_fx ); // Q15 + corr_shiftL_fx = correlation_shift_fx( extract_h( hCPE->hFrontVad[0]->hNoiseEst->totalNoise_32fx ) ); // Q15 + corr_shiftR_fx = correlation_shift_fx( extract_h( hCPE->hFrontVad[1]->hNoiseEst->totalNoise_32fx ) ); // Q15 } - *relE_fx = sub( extract_h( Etot_fx ), st->lp_speech_fx ); // Q8 + *relE_fx = sub( extract_h( Etot_fx ), extract_h( st->lp_speech_32fx ) ); // Q8 move16(); - corr_shift_fx = correlation_shift_fx( st->hNoiseEst->totalNoise_fx ); /* Q15 */ + corr_shift_fx = correlation_shift_fx( extract_h( st->hNoiseEst->totalNoise_32fx ) ); /* Q15 */ /*----------------------------------------------------------------* * FD-CNG Noise Estimator @@ -1397,7 +1397,7 @@ ivas_error pre_proc_front_ivas_fx( move16(); stereo_classifier_features_ivas_fx( hStereoClassif, st->idchan, element_mode, localVAD_HE_SAD, lsf_new_fx, epsP_fx, st->pitch, st->voicing_fx, *cor_map_sum_fx, non_staX_fx, sp_div_fx, - st->clas, sub( 31, *epsP_fx_q ), ( 31 - Q8 ) /* exp of cor_map_sum */, ( 31 - Q8 ) /* exp of non_staX_fx */, sub( 15, q_sp_div ) ); + st->clas, sub( 31, *epsP_fx_q ), ( 31 - Q8 ) /* exp of cor_map_sum */, ( 31 - Q20 ) /* exp of non_staX_fx */, sub( 15, q_sp_div ) ); } /*----------------------------------------------------------------* @@ -1443,7 +1443,7 @@ ivas_error pre_proc_front_ivas_fx( * Update of old per-band energy spectrum *----------------------------------------------------------------*/ - ivas_long_enr_fx( st, extract_h( Etot_fx ), localVAD_HE_SAD, high_lpn_flag, NULL, 1, NULL, NULL ); + ivas_long_enr_fx( st, Etot_fx, localVAD_HE_SAD, high_lpn_flag, NULL, 1, NULL, NULL ); Copy32( fr_bands_fx + NB_BANDS, st->hNoiseEst->enrO_fx, NB_BANDS ); /* fr_bands_fx_q */ st->hNoiseEst->q_enrO = fr_bands_fx_q; @@ -1452,7 +1452,7 @@ ivas_error pre_proc_front_ivas_fx( test(); IF( lr_vad_enabled && st->idchan == 0 ) { - ivas_long_enr_fx( st, -256 /*-1 q8*/, localVAD_HE_SAD, high_lpn_flag, hCPE->hFrontVad, CPE_CHANNELS, localVAD_HE_SAD_LR, Etot_LR_fx ); + ivas_long_enr_fx( st, -16777216 /*-1 << 24*/, localVAD_HE_SAD, high_lpn_flag, hCPE->hFrontVad, CPE_CHANNELS, localVAD_HE_SAD_LR, Etot_LR_fx ); Copy32( fr_bands_LR_fx[0] + NB_BANDS, hCPE->hFrontVad[0]->hNoiseEst->enrO_fx, NB_BANDS ); // fr_bands_LR_fx_q hCPE->hFrontVad[0]->hNoiseEst->q_enrO = fr_bands_LR_fx_q[0]; diff --git a/lib_enc/ivas_core_pre_proc_fx.c b/lib_enc/ivas_core_pre_proc_fx.c index 938ab9964..623b38e68 100644 --- a/lib_enc/ivas_core_pre_proc_fx.c +++ b/lib_enc/ivas_core_pre_proc_fx.c @@ -462,7 +462,7 @@ ivas_error pre_proc_ivas_fx( IF( EQ_16( st->coder_type, GENERIC ) || EQ_16( st->coder_type, VOICED ) || EQ_16( st->coder_type, TRANSITION ) ) { test(); - IF( GE_32( element_brate, FRMT_SHP_MIN_BRATE_IVAS ) && GT_16( st->lp_noise_fx, FORMANT_SHARPENING_NOISE_THRESHOLD_FX ) ) + IF( GE_32( element_brate, FRMT_SHP_MIN_BRATE_IVAS ) && GT_32( st->lp_noise_32fx, FORMANT_SHARPENING_NOISE_THRESHOLD_FX << 16 ) ) { st->sharpFlag = 0; move16(); diff --git a/lib_enc/ivas_front_vad_fx.c b/lib_enc/ivas_front_vad_fx.c index a6054a841..963e363b2 100644 --- a/lib_enc/ivas_front_vad_fx.c +++ b/lib_enc/ivas_front_vad_fx.c @@ -492,7 +492,7 @@ ivas_error front_vad_spar_fx( Word16 sp_div_fx; Word16 Q_sp_div; - Word16 non_staX_fx; + Word32 non_staX_fx; Word16 sp_floor; Word16 cor_map_sum_fx; @@ -603,9 +603,9 @@ ivas_error front_vad_spar_fx( Word16 q_tmpN, q_tmpE; noise_est_down_ivas_fx( fr_bands_fx[0], q_fr_bands[0], hFrontVad->hNoiseEst->bckr_fx, &hFrontVad->hNoiseEst->q_bckr, tmpN_fx, &q_tmpN, tmpE_fx, &q_tmpE, st->min_band, st->max_band, - &hFrontVad->hNoiseEst->totalNoise_fx, L_deposit_h( Etot_fx[0] ) /*q8->q24*/, &hFrontVad->hNoiseEst->Etot_last_32fx, &hFrontVad->hNoiseEst->Etot_v_h2_32fx ); + &hFrontVad->hNoiseEst->totalNoise_32fx, L_deposit_h( Etot_fx[0] ) /*q8->q24*/, &hFrontVad->hNoiseEst->Etot_last_32fx, &hFrontVad->hNoiseEst->Etot_v_h2_32fx ); - corr_shift_fx = correlation_shift_fx( hFrontVad->hNoiseEst->totalNoise_fx ); /* Q15 */ + corr_shift_fx = correlation_shift_fx( extract_h( hFrontVad->hNoiseEst->totalNoise_32fx ) ); /* Q15 */ dtx_ivas_fx( st, hEncoderConfig->last_ivas_total_brate, hEncoderConfig->ivas_total_brate, vad_flag_dtx[0], inp_12k8_fx, Q_inp_12k8 ); @@ -637,7 +637,7 @@ ivas_error front_vad_spar_fx( move32(); } - relE_fx = sub( Etot_fx[0], st->lp_speech_fx ); + relE_fx = sub( Etot_fx[0], extract_h( st->lp_speech_32fx ) ); Scale_sig( A_fx, ( L_FRAME / L_SUBFR ) * ( M + 1 ), -2 ); // Q12 st->mem_wsp_fx = (Word16) shl_sat( st->mem_wsp_fx, Q_inp_12k8 - st->mem_wsp_q ); /* Q_inp_12k8 */ @@ -749,13 +749,12 @@ ivas_error front_vad_spar_fx( /* 1st stage speech/music classification (GMM model) */ /* run only to get 'high_lpn_flag' parameter */ SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas; - Word16 non_sta_fx = shr( non_staX_fx, Q2 ); // Q8->Q6 Word16 Etot_fx_0 = Etot_fx[0]; move16(); scale = getScaleFactor32( PS_fx, 128 ); Qfact_PS = add( Qfact_PS, scale ); Scale_sig32( PS_fx, 128, scale ); - ivas_smc_gmm_fx( st, NULL, localVAD_HE_SAD[0], Etot_fx_0, lsp_new_fx, cor_map_sum_fx, epsP_fx, PS_fx, non_sta_fx, relE_fx, &high_lpn_flag, flag_spitch, Qfact_PS, Q_esp, hSpMusClas->past_PS_Q ); + ivas_smc_gmm_fx( st, NULL, localVAD_HE_SAD[0], Etot_fx_0, lsp_new_fx, cor_map_sum_fx, epsP_fx, PS_fx, non_staX_fx, relE_fx, &high_lpn_flag, flag_spitch, Qfact_PS, Q_esp, hSpMusClas->past_PS_Q ); /* long-term energy update */ ivas_long_enr_fx( st, -256 /*-1 q8*/, localVAD_HE_SAD[0], high_lpn_flag, &hFrontVad, 1, localVAD_HE_SAD, Etot_fx ); diff --git a/lib_enc/ivas_ism_dtx_enc_fx.c b/lib_enc/ivas_ism_dtx_enc_fx.c index b06acfbac..bc2c83de1 100644 --- a/lib_enc/ivas_ism_dtx_enc_fx.c +++ b/lib_enc/ivas_ism_dtx_enc_fx.c @@ -143,7 +143,7 @@ Word16 ivas_ism_dtx_enc_fx( /* one of the channels is active -> no DTX */ FOR( ch = 0; ch < nchan_transport; ch++ ) { - lp_noise_fx[ch] = hSCE[ch]->hCoreCoder[0]->lp_noise_fx; /*Q8*/ + lp_noise_fx[ch] = extract_h( hSCE[ch]->hCoreCoder[0]->lp_noise_32fx ); /*Q8*/ move16(); } diff --git a/lib_enc/ivas_ism_metadata_enc_fx.c b/lib_enc/ivas_ism_metadata_enc_fx.c index 1efbb2123..594871b29 100644 --- a/lib_enc/ivas_ism_metadata_enc_fx.c +++ b/lib_enc/ivas_ism_metadata_enc_fx.c @@ -277,7 +277,7 @@ ivas_error ivas_ism_metadata_enc_fx( /* In case of low level noise for low bitrate inactive frames, do not sent metadata */ test(); test(); - hIsmMeta[ch]->ism_metadata_flag = vad_flag[ch] || GT_16( hSCE[ch]->hCoreCoder[0]->lp_noise_fx, 2560 /*10 Q8*/ ) || hSCE[ch]->hCoreCoder[0]->tcxonly; + hIsmMeta[ch]->ism_metadata_flag = vad_flag[ch] || GT_32( hSCE[ch]->hCoreCoder[0]->lp_noise_32fx, 167772160 /*10 Q24*/ ) || hSCE[ch]->hCoreCoder[0]->tcxonly; move16(); } diff --git a/lib_enc/ivas_masa_enc_fx.c b/lib_enc/ivas_masa_enc_fx.c index 9d0579d12..303d11698 100644 --- a/lib_enc/ivas_masa_enc_fx.c +++ b/lib_enc/ivas_masa_enc_fx.c @@ -1195,12 +1195,12 @@ ivas_error ivas_masa_enc_config_fx( test(); IF( EQ_16( st_ivas->hCPE[0]->element_mode, IVAS_CPE_DFT ) || LT_16( st_ivas->hMasa->data.hOmasaData->omasa_stereo_sw_cnt, OMASA_STEREO_SW_CNT_MAX ) ) { - st_ivas->hMasa->data.hOmasaData->lp_noise_CPE_fx = st_ivas->hCPE[0]->hCoreCoder[0]->lp_noise_fx; /*Q8*/ + st_ivas->hMasa->data.hOmasaData->lp_noise_CPE_fx = extract_h( st_ivas->hCPE[0]->hCoreCoder[0]->lp_noise_32fx ); /*Q8*/ } ELSE { /* ( st_ivas->hCPE[0]->hCoreCoder[0]->lp_noise + st_ivas->hCPE[0]->hCoreCoder[1]->lp_noise ) / CPE_CHANNELS; */ - st_ivas->hMasa->data.hOmasaData->lp_noise_CPE_fx = extract_h( L_mac( L_mult( st_ivas->hCPE[0]->hCoreCoder[0]->lp_noise_fx, ONE_IN_Q14 ), st_ivas->hCPE[0]->hCoreCoder[1]->lp_noise_fx, ONE_IN_Q14 ) ); /*Q8*/ + st_ivas->hMasa->data.hOmasaData->lp_noise_CPE_fx = extract_h( L_mac( L_mult( extract_h( st_ivas->hCPE[0]->hCoreCoder[0]->lp_noise_32fx ), ONE_IN_Q14 ), extract_h( st_ivas->hCPE[0]->hCoreCoder[1]->lp_noise_32fx ), ONE_IN_Q14 ) ); /*Q8*/ } move16(); } diff --git a/lib_enc/ivas_omasa_enc_fx.c b/lib_enc/ivas_omasa_enc_fx.c index b5b592582..b88bb69a1 100644 --- a/lib_enc/ivas_omasa_enc_fx.c +++ b/lib_enc/ivas_omasa_enc_fx.c @@ -795,7 +795,7 @@ void ivas_set_ism_importance_interformat_fx( IF( active_flag == 0 ) { test(); - if ( GT_16( st->lp_noise_fx, 3840 /* 15 in Q8 */ ) || LT_16( sub( lp_noise_CPE_fx, st->lp_noise_fx ), 7680 /* 30 in Q8 */ ) ) + if ( GT_32( st->lp_noise_32fx, 251658240 /* 15 in Q24 */ ) || LT_16( sub( lp_noise_CPE_fx, extract_h( st->lp_noise_32fx ) ), 7680 /* 30 in Q8 */ ) ) { active_flag = 1; move16(); diff --git a/lib_enc/ivas_stereo_classifier_fx.c b/lib_enc/ivas_stereo_classifier_fx.c index 3a040517a..2c1bc0317 100644 --- a/lib_enc/ivas_stereo_classifier_fx.c +++ b/lib_enc/ivas_stereo_classifier_fx.c @@ -1507,7 +1507,7 @@ void xtalk_classifier_dft_fx( hStereoClassif->xtalk_decision = 1; move16(); } - ELSE IF( GE_32( hCPE->element_brate, IVAS_16k4 ) && hStereoClassif->xtalk_decision == 0 && GT_16( abs_s( itd ), STEREO_DFT_ITD_MAX ) && GT_16( sub( hCPE->hCoreCoder[0]->lp_speech_fx, hCPE->hCoreCoder[0]->lp_noise_fx ), 25 << 8 ) ) + ELSE IF( GE_32( hCPE->element_brate, IVAS_16k4 ) && hStereoClassif->xtalk_decision == 0 && GT_16( abs_s( itd ), STEREO_DFT_ITD_MAX ) && GT_32( L_sub( hCPE->hCoreCoder[0]->lp_speech_32fx, hCPE->hCoreCoder[0]->lp_noise_32fx ), 25 << 24 ) ) { hStereoClassif->xtalk_decision = 1; move16(); diff --git a/lib_enc/ivas_stereo_td_analysis_fx.c b/lib_enc/ivas_stereo_td_analysis_fx.c index 909d0c206..de47ea8e2 100644 --- a/lib_enc/ivas_stereo_td_analysis_fx.c +++ b/lib_enc/ivas_stereo_td_analysis_fx.c @@ -564,7 +564,7 @@ Word16 stereo_tdm_ener_analysis_fx( } } - IF( LT_16( sub( sts[1]->lp_speech_fx, sts[1]->lp_noise_fx ), 12800 /*50.0f in Q8*/ ) ) /* likely presence of noisy content */ + IF( LT_32( L_sub( sts[1]->lp_speech_32fx, sts[1]->lp_noise_32fx ), 838860800 /*50.0f in Q24*/ ) ) /* likely presence of noisy content */ { /* pointing in the right direction, inverse it else do nothing */ test(); diff --git a/lib_enc/ivas_tcx_core_enc_fx.c b/lib_enc/ivas_tcx_core_enc_fx.c index 5f2e9a8e0..da141f473 100644 --- a/lib_enc/ivas_tcx_core_enc_fx.c +++ b/lib_enc/ivas_tcx_core_enc_fx.c @@ -715,7 +715,7 @@ Word16 ivas_acelp_tcx20_switching_fx( Word16 *inp_fx, Word16 q_inp, Word16 *wsp, /*q_inp i : input weighted signal */ - Word16 non_staX, /*Q8 i : unbound non-stationarity for sp/mu clas*/ + Word32 non_staX, /*Q20 i : unbound non-stationarity for sp/mu clas*/ Word16 *pitch_fr, /*Q6 i : fraction pitch values */ Word16 *voicing_fr, /*Q15 i : fractional voicing values */ Word32 currFlatness, /*Q21 i : flatness */ @@ -1276,7 +1276,7 @@ Word16 ivas_acelp_tcx20_switching_fx( test(); test(); test(); - if ( ( !flag_16k_smc ) && ( LT_32( offset_tcx, 0x18950F ) ) && GT_16( non_staX, 1280 /*5.0f Q8*/ ) && ( GE_32( snr_acelp, L_sub( tcx_snr, 262144 /*4.0f in Q16*/ ) ) ) && GE_16( st->Nb_ACELP_frames, 1 ) && ( ( GT_16( st->hSpMusClas->lps_fx, st->hSpMusClas->lpm_fx ) && GE_32( mean_voicing_fr, /* 0.3 in Q30 */ 322122547 ) ) || ( GE_16( st->Nb_ACELP_frames, 6 ) && GT_16( st->hSpMusClas->lps_fx, sub( st->hSpMusClas->lpm_fx, 192 /*1.5in Q7*/ ) ) ) ) && ( st->sp_aud_decision0 == 0 ) && st->vad_flag ) + if ( ( !flag_16k_smc ) && ( LT_32( offset_tcx, 0x18950F ) ) && GT_32( non_staX, 5242880 /*5.0f Q20*/ ) && ( GE_32( snr_acelp, L_sub( tcx_snr, 262144 /*4.0f in Q16*/ ) ) ) && GE_16( st->Nb_ACELP_frames, 1 ) && ( ( GT_16( st->hSpMusClas->lps_fx, st->hSpMusClas->lpm_fx ) && GE_32( mean_voicing_fr, /* 0.3 in Q30 */ 322122547 ) ) || ( GE_16( st->Nb_ACELP_frames, 6 ) && GT_16( st->hSpMusClas->lps_fx, sub( st->hSpMusClas->lpm_fx, 192 /*1.5in Q7*/ ) ) ) ) && ( st->sp_aud_decision0 == 0 ) && st->vad_flag ) { /* Fine tuned across various databases based on various metrics to detect TCX frames in speech.*/ dsnr = 262144; /*4.0f Q16*/ diff --git a/lib_enc/long_enr_fx.c b/lib_enc/long_enr_fx.c index 60c40b3fe..93918b81e 100644 --- a/lib_enc/long_enr_fx.c +++ b/lib_enc/long_enr_fx.c @@ -17,7 +17,7 @@ *-------------------------------------------------------------------*/ void ivas_long_enr_fx( Encoder_State *st_fx, /* i/o: state structure */ - const Word16 Etot, /* i : total channel E (see lib_enc\analy_sp.c) Q8 */ + const Word32 Etot, /* i : total channel E (see lib_enc\analy_sp.c) Q24 */ const Word16 localVAD_HE_SAD, /* i : HE-SAD flag without hangover Q0*/ Word16 high_lpn_flag, /* i : sp/mus LPN flag Q0*/ FRONT_VAD_ENC_HANDLE hFrontVad[], /* i/o: front-VAD handles */ @@ -28,7 +28,7 @@ void ivas_long_enr_fx( ) { Word16 tmp; - Word16 alpha; + Word32 alpha; NOISE_EST_HANDLE hNoiseEst = st_fx->hNoiseEst; /*-----------------------------------------------------------------* @@ -42,7 +42,7 @@ void ivas_long_enr_fx( { FOR( n = 0; n < n_chan; n++ ) { - hFrontVad[n]->lp_noise_fx = hFrontVad[n]->hNoiseEst->totalNoise_fx; /* Q8 */ + hFrontVad[n]->lp_noise_fx = extract_h( hFrontVad[n]->hNoiseEst->totalNoise_32fx ); /* Q8 */ move16(); tmp = add( hFrontVad[n]->lp_noise_fx, 2560 ); /* Q8 */ @@ -74,7 +74,7 @@ void ivas_long_enr_fx( FOR( n = 0; n < n_chan; n++ ) { - hFrontVad[n]->lp_noise_fx = add( mult_r( smooth_prev, hFrontVad[n]->lp_noise_fx ), mult_r( smooth_curr, hFrontVad[n]->hNoiseEst->totalNoise_fx ) ); /* Q8 */ + hFrontVad[n]->lp_noise_fx = add( mult_r( smooth_prev, hFrontVad[n]->lp_noise_fx ), mult_r( smooth_curr, extract_h( hFrontVad[n]->hNoiseEst->totalNoise_32fx ) ) ); /* Q8 */ move16(); test(); IF( localVAD_HE_SAD_LR[n] && !high_lpn_flag ) @@ -102,11 +102,11 @@ void ivas_long_enr_fx( { IF( LT_16( st_fx->ini_frame, 4 ) ) { - st_fx->lp_noise_fx = hNoiseEst->totalNoise_fx; /* Q8 */ - move16(); - tmp = add( st_fx->lp_noise_fx, 2560 ); /*10.0 in Q8*/ - st_fx->lp_speech_fx = s_max( st_fx->lp_speech_fx, tmp ); - move16(); + st_fx->lp_noise_32fx = hNoiseEst->totalNoise_32fx; /* Q24 */ + move32(); + Word32 tmp1 = L_add( st_fx->lp_noise_32fx, 167772160 ); /*10.0 in Q24*/ + st_fx->lp_speech_32fx = L_max( st_fx->lp_speech_32fx, tmp1 ); + move32(); } ELSE { @@ -115,34 +115,34 @@ void ivas_long_enr_fx( } else { st->lp_noise = 0.98f * st->lp_noise + 0.02f * st->totalNoise; } */ - alpha = 655; /* 0.02 Q15 */ - move16(); + alpha = 42949673; /* 0.02 Q31 */ + move32(); if ( LT_16( st_fx->ini_frame, 150 ) ) /* should match HE_LT_CNT_INIT_FX */ { - alpha = 1638; - move16(); /* 0.05 Q15 */ + alpha = 107374182; + move32(); /* 0.05 Q31 */ } - st_fx->lp_noise_fx = noise_est_AR1_Qx( hNoiseEst->totalNoise_fx, st_fx->lp_noise_fx, alpha ); /* Q8 state, alpha in Q15 */ - move16(); + st_fx->lp_noise_32fx = noise_est_AR1_Qx_32( hNoiseEst->totalNoise_32fx, st_fx->lp_noise_32fx, alpha ); /* Q24 state, alpha in Q31 */ + move32(); test(); IF( ( localVAD_HE_SAD != 0 ) && ( high_lpn_flag == 0 ) ) { - IF( LT_16( sub( st_fx->lp_speech_fx, Etot ), 2560 ) ) /* 10.0 in Q8 */ + IF( LT_32( L_sub( st_fx->lp_speech_32fx, Etot ), 167772160 ) ) /* 10.0 in Q24 */ { /* st->lp_speech = 0.98f * st->lp_speech + 0.02f * Etot; */ - st_fx->lp_speech_fx = noise_est_AR1_Qx( Etot, st_fx->lp_speech_fx, 655 ); /* Q8 state, 0.02 in Q15 */ - move16(); + st_fx->lp_speech_32fx = noise_est_AR1_Qx_32( Etot, st_fx->lp_speech_32fx, 42949673 ); /* Q24 state, 0.02 in Q31 */ + move32(); } ELSE { - st_fx->lp_speech_fx = sub( st_fx->lp_speech_fx, 13 ); /* st->lp_speech = st->lp_speech - 0.05f; linear decay*/ - move16(); + st_fx->lp_speech_32fx = L_sub( st_fx->lp_speech_32fx, 838861 ); /* st->lp_speech = st->lp_speech - 0.05f; linear decay*/ + move32(); } } } /* Update */ - st_fx->hNoiseEst->Etot_last_32fx = L_deposit_h( Etot ); /* Q24 */ - move16(); + st_fx->hNoiseEst->Etot_last_32fx = Etot; /* Q24 */ + move32(); } /*-----------------------------------------------------------------* diff --git a/lib_enc/nois_est_fx.c b/lib_enc/nois_est_fx.c index 7534a5159..676696fd6 100644 --- a/lib_enc/nois_est_fx.c +++ b/lib_enc/nois_est_fx.c @@ -75,6 +75,20 @@ Word16 noise_est_AR1_Qx( /* o : Qx y(n) */ return mac_r( L_mult( y, alpham1 ), x, alpha ); } +Word32 noise_est_AR1_Qx_32( /* o : Qx y(n) */ + Word32 x, /* i : Qx x(n) */ + Word32 y, /* i : Qx y(n-1) */ + Word32 alpha /*i : Q15 scaling of driving x(n) */ +) +{ + Word32 alpham1; + /*alpham1 = negate(add((Word16)-32768, alpha)); */ + alpham1 = L_sub( MAX_32, alpha ); /* one cycle less */ + alpham1++; + + return Madd_32_32( Mpy_32_32( y, alpham1 ), x, alpha ); +} + /*-----------------------------------------------------------------* * noise_est_ln_q8_fx() * @@ -280,7 +294,7 @@ void noise_est_init_ivas_fx( move32(); /*Q7//E_MIN; */ hNoiseEst->enrO_fx[i] = E_MIN_Q11_FX; move32(); - hNoiseEst->bckr_fx[i] = E_MIN_Q11_FX; + hNoiseEst->bckr_fx[i] = E_MIN_Q27_FX; move32(); hNoiseEst->ave_enr_fx[i] = E_MIN_Q11_FX; move32(); @@ -289,14 +303,14 @@ void noise_est_init_ivas_fx( move16(); /*1e-5f; */ hNoiseEst->q_enrO = Q11; move16(); - hNoiseEst->q_bckr = Q11; + hNoiseEst->q_bckr = Q27; move16(); hNoiseEst->ave_enr_q = Q11; move16(); move16(); - hNoiseEst->totalNoise_fx = 0; - move16(); + hNoiseEst->totalNoise_32fx = 0; + move32(); hNoiseEst->first_noise_updt = 0; move16(); hNoiseEst->first_noise_updt_cnt = 0; @@ -747,7 +761,7 @@ void noise_est_down_ivas_fx( Word16 *q_enr, const Word16 min_band, /* i : minimum critical band */ const Word16 max_band, /* i : maximum critical band */ - Word16 *totalNoise, /* o : noise estimate over all critical bands */ + Word32 *totalNoise, /* o : noise estimate over all critical bands */ Word32 Etot, /* i : Energy of current frame Q24*/ Word32 *Etot_last, /* i/o: Energy of last frame Q24 */ Word32 *Etot_v_h2 /* i/o: Energy variations of noise frames Q24 */ @@ -803,8 +817,8 @@ void noise_est_down_ivas_fx( Ltmp = L_mac( L_deposit_h( e_Noise ), f_Noise, 1 ); // Q16 Ltmp = Mpy_32_16_1( Ltmp, LG10 ); // Q14 (16+13-15) Ltmp = L_shl( Ltmp, 10 ); // Q26 - *totalNoise = round_fx( Ltmp ); /*Q8*/ - move16(); + *totalNoise = ( Ltmp ); /*Q24*/ + move32(); /*-----------------------------------------------------------------* * Average energy per frame for each frequency band @@ -2207,7 +2221,7 @@ void noise_est_ivas_fx( Word16 *ncharX, /* o : Q11 */ Word16 *sp_div, /* o : Q_sp_div */ Word16 *Q_sp_div, /* o : Q factor for sp_div */ - Word16 *non_staX, /* o : non-stationarity for sp/mus classifier Q8 */ + Word32 *non_staX, /* o : non-stationarity for sp/mus classifier Q20 */ Word16 *loc_harm, /* o : multi-harmonicity flag for UV classifier */ const Word32 *lf_E, /* i : per bin energy for low frequencies q_lf_E */ const Word16 q_lf_E, /* i : Q of lf_E Q0 */ @@ -2728,7 +2742,7 @@ void noise_est_ivas_fx( Ltmp1 = Mpy_32_16_1( Ltmp1, 22713 ); // Q15 log_enr16 = round_fx( L_shl( Ltmp1, 9 ) ); /* Q8 */ wtmp = abs_s( sub( log_enr16, hSpMusClas->past_log_enr_fx[i - START_BAND_SPMUS] ) ); - *non_staX = add_o( *non_staX, wtmp, &Overflow ); + *non_staX = L_add( *non_staX, wtmp ); move16(); /* Q8 */ hSpMusClas->past_log_enr_fx[i - START_BAND_SPMUS] = log_enr16; move16(); @@ -2773,8 +2787,9 @@ void noise_est_ivas_fx( } } - } /* end of band loop FOR( i = st_fx->min_band; i <= st_fx->max_band; i++ ) */ - + } /* end of band loop FOR( i = st_fx->min_band; i <= st_fx->max_band; i++ ) */ + *non_staX = L_shl( *non_staX, 12 ); // Q20 + move32(); IF( LT_16( Etot, -1280 /* -5.0f in Q8 */ ) ) { non_sta = L_deposit_l( 1024 ); /* 1.0 in Q10 */ @@ -2974,7 +2989,7 @@ void noise_est_ivas_fx( * long term extensions of frame features *-----------------------------------------------------------------*/ - tmp = sub( Etot, hNoiseEst->totalNoise_fx ); /* Q8 */ + tmp = sub( Etot, extract_h( hNoiseEst->totalNoise_32fx ) ); /* Q8 */ /* st->lt_tn_track = 0.03f* (Etot - st->totalNoise < 10) + 0.97f*st->lt_tn_track; */ tmp2 = 0; move16(); @@ -3471,7 +3486,7 @@ void noise_est_ivas_fx( test(); test(); IF( ( LT_16( hNoiseEst->act_pred_fx, 27853 /* 0.85 in Q15 */ ) && ( aE_bgd != 0 ) && ( LT_16( hNoiseEst->lt_Ellp_dist_fx, 10 * 256 /* 10 in Q8*/ ) || ( sd1_bgd != 0 ) ) && ( LT_16( hNoiseEst->lt_tn_dist_fx, 40 * 256 ) ) /* 40.0 in Q8*/ - && LT_16( sub( Etot, hNoiseEst->totalNoise_fx ), 10 * 256 /* 10 in Q8 */ ) /* 10.0 in Q8*/ ) || + && LT_16( sub( Etot, extract_h( hNoiseEst->totalNoise_32fx ) ), 10 * 256 /* 10 in Q8 */ ) /* 10.0 in Q8*/ ) || ( ( hNoiseEst->first_noise_updt == 0 ) && GT_16( hNoiseEst->harm_cor_cnt, 80 ) && ( aE_bgd != 0 ) && GT_16( hNoiseEst->lt_aEn_zero_fx, 16384 /* 0.5 in Q15 */ ) ) || ( ( tn_ini != 0 ) && ( ( aE_bgd != 0 ) || LT_16( non_staB, 10 * 256 /* 10 in Q8*/ ) || GT_16( hNoiseEst->harm_cor_cnt, 80 ) ) ) ) { @@ -3494,7 +3509,7 @@ void noise_est_ivas_fx( ) */ ELSE IF( ( LT_16( hNoiseEst->act_pred_fx, 26214 /* 0.8 in Q15*/ ) && ( ( aE_bgd != 0 ) || ( PAU != 0 ) ) && ( LT_16( hNoiseEst->lt_haco_ev_fx, 3277 /* 0.1 in q15*/ ) ) ) || ( ( LT_16( hNoiseEst->act_pred_fx, 22938 /* 0.70 in Q15 */ ) ) && ( ( aE_bgd != 0 ) || ( LT_16( non_staB, 17 * 256 /* 17.0 in Q8 */ ) ) ) && ( PAU != 0 ) && ( LT_16( hNoiseEst->lt_haco_ev_fx, 4915 /* 0.15 in Q15 */ ) ) ) || - ( GT_16( hNoiseEst->harm_cor_cnt, 80 ) && GT_16( hNoiseEst->totalNoise_fx, 5 * 256 /* 5.0 in Q8 */ ) && LT_16( Etot, s_max( 1 * 256, add( Etot_l_lp, extract_h( L_add( hNoiseEst->Etot_v_h2_32fx, L_shr( hNoiseEst->Etot_v_h2_32fx, 1 ) ) ) /* 1.5= 1.0+.5 */ ) ) ) ) || + ( GT_16( hNoiseEst->harm_cor_cnt, 80 ) && GT_16( extract_h( hNoiseEst->totalNoise_32fx ), 5 * 256 /* 5.0 in Q8 */ ) && LT_16( Etot, s_max( 1 * 256, add( Etot_l_lp, extract_h( L_add( hNoiseEst->Etot_v_h2_32fx, L_shr( hNoiseEst->Etot_v_h2_32fx, 1 ) ) ) /* 1.5= 1.0+.5 */ ) ) ) ) || ( GT_16( hNoiseEst->harm_cor_cnt, 50 ) && GT_16( hNoiseEst->first_noise_updt, 30 ) && ( aE_bgd != 0 ) && GT_16( hNoiseEst->lt_aEn_zero_fx, 16384 /*.5 in Q15*/ ) ) || ( tn_ini != 0 ) ) { updt_step = 3277; @@ -3513,7 +3528,7 @@ void noise_est_ivas_fx( test(); if ( ( aE_bgd == 0 ) && LT_16( hNoiseEst->harm_cor_cnt, 50 ) && ( GT_16( hNoiseEst->act_pred_fx, 19661 /* 0.6 in Q15*/ ) || - ( ( tn_ini == 0 ) && LT_16( sub( Etot_l_lp, hNoiseEst->totalNoise_fx ), 10 * 256 /* 10.0 in Q8 */ ) && GT_16( non_staB, 8 * 256 /* 8.0 in in Q8*/ ) ) ) ) + ( ( tn_ini == 0 ) && LT_16( sub( Etot_l_lp, extract_h( hNoiseEst->totalNoise_32fx ) ), 10 * 256 /* 10.0 in Q8 */ ) && GT_16( non_staB, 8 * 256 /* 8.0 in in Q8*/ ) ) ) ) { updt_step = 328; move16(); /* 0.01 Q15 */ @@ -3543,7 +3558,7 @@ void noise_est_ivas_fx( /* If in music lower bckr to drop further */ test(); test(); - IF( GT_16( hNoiseEst->low_tn_track_cnt, 300 ) && GT_16( hNoiseEst->lt_haco_ev_fx, 29491 /* 0.9 in Q15 */ ) && ( hNoiseEst->totalNoise_fx > 0 ) ) + IF( GT_16( hNoiseEst->low_tn_track_cnt, 300 ) && GT_16( hNoiseEst->lt_haco_ev_fx, 29491 /* 0.9 in Q15 */ ) && ( hNoiseEst->totalNoise_32fx > 0 ) ) { updt_step = -655; move16(); /* for debug purposes */ diff --git a/lib_enc/prot_fx_enc.h b/lib_enc/prot_fx_enc.h index 59d2fdf9a..c433dcb2c 100644 --- a/lib_enc/prot_fx_enc.h +++ b/lib_enc/prot_fx_enc.h @@ -304,7 +304,7 @@ void long_enr_fx( Word16 high_lpn_flag ); void ivas_long_enr_fx( Encoder_State *st_fx, /* i/o: state structure */ - const Word16 Etot, /* i : total channel E (see lib_enc\analy_sp.c) Q=8*/ + const Word32 Etot, /* i : total channel E (see lib_enc\analy_sp.c) Q=24*/ const Word16 localVAD_HE_SAD, /* i : HE-SAD flag without hangover */ Word16 high_lpn_flag, /* i : sp/mus LPN flag */ FRONT_VAD_ENC_HANDLE hFrontVad[], /* i/o: front-VAD handles */ @@ -347,7 +347,7 @@ void noise_est_down_ivas_fx( Word16 *q_enr, const Word16 min_band, /* i : minimum critical band */ const Word16 max_band, /* i : maximum critical band */ - Word16 *totalNoise, /* o : noise estimate over all critical bands */ + Word32 *totalNoise, /* o : noise estimate over all critical bands */ Word32 Etot, /* i : Energy of current frame Q24*/ Word32 *Etot_last, /* i/o: Energy of last frame Q24 */ Word32 *Etot_v_h2 /* i/o: Energy variations of noise frames Q24 */ @@ -398,7 +398,7 @@ void noise_est_ivas_fx( Word16 *ncharX, /* o : Q11 */ Word16 *sp_div, /* o : Q_sp_div */ Word16 *Q_sp_div, /* o : Q factor for sp_div */ - Word16 *non_staX, /* o : non-stationarity for sp/mus classifier */ + Word32 *non_staX, /* o : non-stationarity for sp/mus classifier */ Word16 *loc_harm, /* o : multi-harmonicity flag for UV classifier */ const Word32 *lf_E, /* i : per bin energy for low frequencies q_lf_E */ const Word16 q_lf_E, /* i : Q of lf_E Q0 */ @@ -4465,6 +4465,13 @@ Word16 noise_est_AR1_Qx( /* o: Qx y(n) */ Word16 y, /* i : Qx y(n-1) */ Word16 alpha /*i : Q15 scaling of driving x(n) */ ); + +Word32 noise_est_AR1_Qx_32( /* o: Qx y(n) */ + Word32 x, /* i : Qx x(n) */ + Word32 y, /* i : Qx y(n-1) */ + Word32 alpha /*i : Q15 scaling of driving x(n) */ +); + void FEC_lsf_estim_enc_fx( Encoder_State *st_fx, /* i : Encoder static memory */ Word16 *lsf /* o : estimated LSF vector Qlog2(2.56)*/ diff --git a/lib_enc/speech_music_classif_fx.c b/lib_enc/speech_music_classif_fx.c index 817e5fb70..fd983741b 100644 --- a/lib_enc/speech_music_classif_fx.c +++ b/lib_enc/speech_music_classif_fx.c @@ -1588,7 +1588,7 @@ Word16 ivas_smc_gmm_fx( const Word16 cor_map_sum_fx, /* i : correlation map sum (from multi-harmonic anal.) Q8 */ const Word32 epsP_fx[M + 1], /* i : LP prediciton error */ const Word32 PS_fx[], /* i : energy spectrum */ - const Word16 non_sta_fx, /* i : unbound non-stationarity Q8 */ + const Word32 non_sta_fx, /* i : unbound non-stationarity Q20 */ const Word16 relE_fx, /* i : relative frame energy Q8 */ Word16 *high_lpn_flag, /* i/o: sp/mus LPN flag */ const Word16 flag_spitch, /* i : flag to indicate very short stable pitch */ @@ -1879,7 +1879,7 @@ Word16 ivas_smc_gmm_fx( move32(); /* [8] non_sta */ - *pFV_fx++ = L_shl( non_sta_fx, Q12 ); /*scaling from Q8 to Q20*/ + *pFV_fx++ = non_sta_fx; /*Q20*/ move32(); /* [9] epsP */ @@ -3161,7 +3161,7 @@ void ivas_smc_mode_selection_fx( test(); test(); test(); - if ( EQ_16( st->vad_flag, 1 ) && LE_32( element_brate, IVAS_16k4 ) && GT_16( st->lp_noise_fx, 7680 /* 30.0f in Q8 */ ) && st->sp_aud_decision1 == 0 && GE_16( st->bwidth, SWB ) && EQ_16( st->coder_type_raw, UNVOICED ) ) + if ( EQ_16( st->vad_flag, 1 ) && LE_32( element_brate, IVAS_16k4 ) && GT_32( st->lp_noise_32fx, 503316480 /* 30.0f in Q24 */ ) && st->sp_aud_decision1 == 0 && GE_16( st->bwidth, SWB ) && EQ_16( st->coder_type_raw, UNVOICED ) ) { st->GSC_noisy_speech = 1; move16(); diff --git a/lib_enc/stat_enc.h b/lib_enc/stat_enc.h index 2fbec12bd..0647b6ebc 100644 --- a/lib_enc/stat_enc.h +++ b/lib_enc/stat_enc.h @@ -542,8 +542,9 @@ typedef struct noise_estimation_structure Word32 ave_enr_fx[NB_BANDS]; /* Q_new + Q_SCALE Noise estimator - long-term average energy per critical band ave_enr_q */ Word16 ave_enr_q; - Word16 aEn; /* Noise estimator - noise estimator adaptation flag */ - Word16 totalNoise_fx; /* Q8 Noise estimator - total noise energy */ + Word16 aEn; /* Noise estimator - noise estimator adaptation flag */ + Word16 totalNoise_fx; /* Q8 Noise estimator - total noise energy */ + Word32 totalNoise_32fx; /* Q24 Noise estimator - total noise energy */ Word16 first_noise_updt; /* Noise estimator - flag used to determine if the first noise update frame */ Word16 first_noise_updt_cnt; /* Noise estimator - counter of frame after first noise update */ @@ -1522,9 +1523,11 @@ typedef struct enc_core_structure Word16 sharpFlag; Word16 localVAD; /* i : local VAD flag */ - Word32 bckr_tilt_lt; /* Q16 */ - Word16 lp_speech_fx; /* Q8 */ - Word16 lp_noise_fx; /* CNG and DTX - LP filtered total noise estimation Q8 */ + Word32 bckr_tilt_lt; /* Q16 */ + Word16 lp_speech_fx; /* Q8 */ + Word16 lp_noise_fx; /* CNG and DTX - LP filtered total noise estimation Q24 */ + Word32 lp_speech_32fx; /* Q24 */ + Word32 lp_noise_32fx; /* CNG and DTX - LP filtered total noise estimation Q24 */ Word16 Opt_HE_SAD_ON_fx; Word16 nb_active_frames_HE_SAD_fx; Word16 voicing_old_fx; diff --git a/lib_enc/vad_fx.c b/lib_enc/vad_fx.c index 721a6e348..1cf7a5de8 100644 --- a/lib_enc/vad_fx.c +++ b/lib_enc/vad_fx.c @@ -1997,13 +1997,13 @@ Word16 wb_vad_ivas_fx( } if ( LT_16( lp_speech_fx, -100 * 256 ) ) { - lp_speech_fx = st_fx->lp_speech_fx; /*Q8*/ + lp_speech_fx = extract_h( st_fx->lp_speech_32fx ); /*Q8*/ move16(); } if ( LT_16( lp_noise_fx, -100 * 256 ) ) { - lp_noise_fx = st_fx->lp_noise_fx; /*Q8*/ + lp_noise_fx = extract_h( st_fx->lp_noise_32fx ); /*Q8*/ move16(); } -- GitLab