From f0002fbd4198e1d91bf29dbc7321f663bb90e742 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Fri, 10 Jan 2025 16:41:01 +0530 Subject: [PATCH] Encoder bug fixes, quality improvements [x] Shift correction in prep_tbe_exc_ivas_fx [x] Fixed Q-mismatch for mem_shb_speech_ref_fx [x] Removed saturation in attack_det and fixed q mismatch [x] Fix for saturation in tcx_ltp_encode --- lib_com/swb_tbe_com_fx.c | 7 +------ lib_enc/ivas_stereo_icbwe_enc.c | 4 +++- lib_enc/speech_music_classif_fx.c | 10 ++++++++-- lib_enc/swb_pre_proc.c | 4 +++- lib_enc/tcx_ltp_enc_fx.c | 3 +-- 5 files changed, 16 insertions(+), 12 deletions(-) diff --git a/lib_com/swb_tbe_com_fx.c b/lib_com/swb_tbe_com_fx.c index b7c650e9e..7b64a2923 100644 --- a/lib_com/swb_tbe_com_fx.c +++ b/lib_com/swb_tbe_com_fx.c @@ -7847,13 +7847,8 @@ void prep_tbe_exc_ivas_fx( } ELSE { - Word16 shift = 8; + Word16 shift = 4; move16(); - if ( element_mode == EVS_MONO ) - { - shift = 4; - move16(); - } IF( gain_preQ_fx != 0 ) { FOR( i = 0; i < L_subfr; i++ ) diff --git a/lib_enc/ivas_stereo_icbwe_enc.c b/lib_enc/ivas_stereo_icbwe_enc.c index aed080b12..f21cf3531 100644 --- a/lib_enc/ivas_stereo_icbwe_enc.c +++ b/lib_enc/ivas_stereo_icbwe_enc.c @@ -664,7 +664,8 @@ void stereo_icBWE_enc_ivas_fx( Copy_Scale_sig( hStereoICBWE->mem_shb_speech_ref_fx, hStereoICBWE->mem_shb_speech_ref_fx, L_LOOK_16k, negate( sub( max_e, hStereoICBWE->mem_shb_speech_ref_e ) ) ); // mem_shb_speech_ref_e set32_fx( shb_frame_ref_fx, 0, L_LOOK_16k + L_FRAME16k ); - Copy_Scale_sig_16_32( hStereoICBWE->mem_shb_speech_ref_fx, shb_frame_ref_fx, L_LOOK_16k, negate( sub( max_e, hStereoICBWE->mem_shb_speech_ref_e ) ) + Q16 ); // mem_shb_speech_ref_e + Copy_Scale_sig_16_32( hStereoICBWE->mem_shb_speech_ref_fx, shb_frame_ref_fx, L_LOOK_16k, 0 ); // Q = 15 - mem_shb_speech_ref_e + scale_sig32( shb_frame_ref_fx, L_LOOK_16k, add( negate( sub( max_e, hStereoICBWE->mem_shb_speech_ref_e ) ), Q16 ) ); // Q = 31 - mem_shb_speech_ref_e hStereoICBWE->mem_shb_speech_ref_e = max_e; shb_frame_ref_e = max_e; @@ -678,6 +679,7 @@ void stereo_icBWE_enc_ivas_fx( Copy_Scale_sig32( shb_speech_ref_fx, shb_frame_ref_fx + L_LOOK_16k - refMemLen, L_FRAME16k, negate( sub( max_e, shb_speech_ref_e ) ) ); /* shb_speech_ref_e */ Copy_Scale_sig_32_16( shb_frame_ref_fx + L_FRAME16k, hStereoICBWE->mem_shb_speech_ref_fx, L_LOOK_16k, negate( sub( max_e, hStereoICBWE->mem_shb_speech_ref_e ) ) - Q16 ); // mem_shb_speech_ref_e + test(); test(); IF( ( EQ_16( st->extl, SWB_TBE ) || EQ_16( st->extl, FB_TBE ) ) && EQ_16( st->flag_ACELP16k, 1 ) ) diff --git a/lib_enc/speech_music_classif_fx.c b/lib_enc/speech_music_classif_fx.c index 319db976e..c5a131ac3 100644 --- a/lib_enc/speech_music_classif_fx.c +++ b/lib_enc/speech_music_classif_fx.c @@ -2846,13 +2846,18 @@ static Word16 attack_det_ivas_fx( /* o : attack flag Copy32( finc, finc_prev, attack1 ); /* compute mean energy before the attack */ - etmp = L_shr( sum32_fx( finc_prev, ATT_NSEG ), 5 ); /*ATT_NSEG == 32*/ + Word64 W_etmp = W_deposit32_l( finc_prev[0] ); + FOR( Word16 idx = 1; idx < ATT_NSEG; idx++ ) + { + W_etmp = W_add( W_etmp, W_deposit32_l( finc_prev[idx] ) ); + } + W_etmp = W_shr( W_etmp, 5 ); /*ATT_NSEG == 32*/ etmp2 = finc[attack1]; move32(); test(); test(); - if ( ( LT_32( L_shl( etmp, 4 ), etmp2 ) ) || ( LT_32( L_add( L_shl( etmp, 3 ), L_shl( etmp, 2 ) ), etmp2 ) && EQ_16( last_clas, UNVOICED_CLAS ) ) ) + if ( ( LT_64( W_shl( W_etmp, 4 ), W_deposit32_l( etmp2 ) ) ) || ( LT_64( W_add( W_shl( W_etmp, 3 ), W_shl( W_etmp, 2 ) ), W_deposit32_l( etmp2 ) ) && EQ_16( last_clas, UNVOICED_CLAS ) ) ) { attack = attack1; move16(); @@ -3045,6 +3050,7 @@ void ivas_smc_mode_selection_fx( IF( NE_16( shl( Q_new, 1 ), hSpMusClas->Q_lt_finc ) ) { Scale_sig32( &hSpMusClas->lt_finc_fx, 1, sub( shl( Q_new, 1 ), hSpMusClas->Q_lt_finc ) ); + Scale_sig32( hSpMusClas->finc_prev_fx, ATT_NSEG, sub( shl( Q_new, 1 ), hSpMusClas->Q_lt_finc ) ); hSpMusClas->Q_lt_finc = shl( Q_new, 1 ); move16(); } diff --git a/lib_enc/swb_pre_proc.c b/lib_enc/swb_pre_proc.c index f4f2eeaf4..2f76fa5f4 100644 --- a/lib_enc/swb_pre_proc.c +++ b/lib_enc/swb_pre_proc.c @@ -393,7 +393,9 @@ void swb_pre_proc_ivas_fx( /* delay corresponding to CLDFB delay */ Copy( old_input_fx + STEREO_DFT_OVL_16k - 20, shb_speech, L_FRAME16k ); Copy( old_input_fx, hBWE_TD->old_speech_shb_fx + L_LOOK_16k + L_SUBFR16k - ( STEREO_DFT_OVL_16k - 20 ), STEREO_DFT_OVL_16k - 20 ); - Copy( old_input_fx, hCPE->hStereoICBWE->mem_shb_speech_ref_fx, STEREO_DFT_OVL_16k - 20 ); + Copy( old_input_fx, hCPE->hStereoICBWE->mem_shb_speech_ref_fx, STEREO_DFT_OVL_16k - 20 ); // Q0 + hCPE->hStereoICBWE->mem_shb_speech_ref_e = 15; + move16(); IF( LE_32( CldfbHB_fx, 0 ) ) { diff --git a/lib_enc/tcx_ltp_enc_fx.c b/lib_enc/tcx_ltp_enc_fx.c index fbc8bf263..a37e8ec77 100644 --- a/lib_enc/tcx_ltp_enc_fx.c +++ b/lib_enc/tcx_ltp_enc_fx.c @@ -1154,8 +1154,7 @@ void tcx_ltp_encode_ivas_fx( move16(); sqr = Sqrt16( hTcxEnc->tcxltp_on_mem, &exponent ); move16(); - sqr = shl_sat( sqr, exponent ); - IF( ( EQ_16( tcxOnly, 1 ) && EQ_16( st->element_mode, IVAS_CPE_MDCT ) && ( GT_16( ( mult( sqr, mult( ( norm_corr_fx ), 29492 ) ) ), 14419 ) && ( LT_16( tempFlatness_fx, 768 ) || ( LT_16( tempFlatness_fx, 896 ) && LT_16( maxEnergyChange_fx, 176 ) ) ) ) ) ) // 0.9f Q15, 6.0f Q7, 7.0f Q7, 22.0f Q3 + IF( ( EQ_16( tcxOnly, 1 ) && EQ_16( st->element_mode, IVAS_CPE_MDCT ) && ( GT_16( ( mult( sqr, mult( ( norm_corr_fx ), 29492 ) ) ), shr( 14419, exponent ) ) && ( LT_16( tempFlatness_fx, 768 ) || ( LT_16( tempFlatness_fx, 896 ) && LT_16( maxEnergyChange_fx, 176 ) ) ) ) ) ) // 0.9f Q15, 6.0f Q7, 7.0f Q7, 22.0f Q3 { ltp_on = 1; move16(); -- GitLab