From bd8637d94fda7573c15f0e07b714299f3bb4ac7e Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Fri, 25 Apr 2025 14:55:08 +0530 Subject: [PATCH 1/2] Fix for 3GPP issue 1427: Basop Encoder Spectral Gaps in Stereo DTX 13.2 kbps Noisy Signal Link #1427 --- lib_enc/ivas_core_enc_fx.c | 19 ++++++------------- lib_enc/ivas_core_pre_proc_fx.c | 14 ++++++++++---- lib_enc/prot_fx_enc.h | 1 + lib_enc/stat_enc.h | 2 +- lib_enc/swb_bwe_enc_fx.c | 17 +++++++++-------- lib_enc/swb_pre_proc_fx.c | 15 ++++++++++++++- 6 files changed, 41 insertions(+), 27 deletions(-) diff --git a/lib_enc/ivas_core_enc_fx.c b/lib_enc/ivas_core_enc_fx.c index f5d3c4d73..6fcb8abd0 100644 --- a/lib_enc/ivas_core_enc_fx.c +++ b/lib_enc/ivas_core_enc_fx.c @@ -675,12 +675,6 @@ ivas_error ivas_core_enc_fx( wb_bwe_enc_ivas_fx( st, new_inp_resamp16k_fx[n] ); } - IF( st->hBWE_FD != NULL ) - { - Scale_sig( st->hBWE_FD->L_old_wtda_swb_fx, L_FRAME48k, Q1 ); // Q-1 -> Q0 - st->Q_old_wtda = add( st->Q_old_wtda, Q1 ); - move16(); - } /*---------------------------------------------------------------------* * SWB(FB) TBE encoding @@ -725,8 +719,6 @@ ivas_error ivas_core_enc_fx( Scale_sig( st->hBWE_FD->L_old_wtda_swb_fx, L_FRAME48k, shift ); // st->Q_old_wtda } - Word16 q_new_swb_speech_buffer = getScaleFactor16( new_swb_speech_buffer_fx_16, L_FRAME48k + STEREO_DFT_OVL_MAX ); - Scale_sig( new_swb_speech_buffer_fx_16, L_FRAME48k + STEREO_DFT_OVL_MAX, q_new_swb_speech_buffer ); // Q0->q_new_swb_speech_buffer /* SWB TBE encoder */ test(); @@ -750,11 +742,9 @@ ivas_error ivas_core_enc_fx( } ELSE IF( EQ_16( st->extl, SWB_BWE ) || EQ_16( st->extl, FB_BWE ) ) { - Copy_Scale_sig_32_16( shb_speech_fx32, shb_speech_fx, L_FRAME16k, -Q16 ); // Q_shb_spch - 16 - Scale_sig( new_swb_speech_buffer_fx_16, L_FRAME48k + STEREO_DFT_OVL_MAX, negate( q_new_swb_speech_buffer ) ); // q_new_swb_speech_buffer -> Q0 + Copy_Scale_sig_32_16( shb_speech_fx32, shb_speech_fx, L_FRAME16k, -Q16 ); // Q_shb_spch - 16 /* SWB(FB) BWE encoder */ - swb_bwe_enc_ivas_fx( st, last_element_mode, old_inp_12k8_fx[n], old_inp_16k_fx[n], old_syn_12k8_16k_fx[n], new_swb_speech_fx_16, shb_speech_fx, sub( Q_shb_spch, Q16 ), sub( Q_new[n], 1 ) ); - Scale_sig( new_swb_speech_buffer_fx_16, L_FRAME48k + STEREO_DFT_OVL_MAX, q_new_swb_speech_buffer ); // Q0 -> q_new_swb_speech_buffer + swb_bwe_enc_ivas_fx( st, last_element_mode, old_inp_12k8_fx[n], old_inp_16k_fx[n], old_syn_12k8_16k_fx[n], new_swb_speech_fx_16, st->q_inp, shb_speech_fx, sub( Q_shb_spch, Q16 ), sub( Q_new[n], 1 ) ); } Scale_sig( old_syn_12k8_16k_fx[n], L_FRAME16k, sub( Q1, Q_new[n] ) ); // Q0 @@ -776,6 +766,9 @@ ivas_error ivas_core_enc_fx( * Inter-channel BWE encoding *-------------------------------------------------------------------*/ + Word16 q_new_swb_speech_buffer = getScaleFactor16( new_swb_speech_buffer_fx_16, L_FRAME48k + STEREO_DFT_OVL_MAX ); + Scale_sig( new_swb_speech_buffer_fx_16, L_FRAME48k + STEREO_DFT_OVL_MAX, q_new_swb_speech_buffer ); // st->q_inp+q_new_swb_speech_buffer + q_new_swb_speech_buffer = add( st->q_inp, q_new_swb_speech_buffer ); test(); test(); IF( n == 0 && GE_32( input_Fs, 32000 ) && hStereoICBWE != NULL ) @@ -784,7 +777,7 @@ ivas_error ivas_core_enc_fx( stereo_icBWE_preproc_fx( hCPE, input_frame, new_swb_speech_buffer_fx_16 /*tmp buffer*/, q_new_swb_speech_buffer ); q_new_swb_speech_buffer = add( q_new_swb_speech_buffer, 16 ); - Copy_Scale_sig_16_32_no_sat( new_swb_speech_buffer_fx_16, new_swb_speech_buffer_fx, L_FRAME48k + STEREO_DFT_OVL_MAX, Q16 ); // q_new_swb_speech_buffer - 16 - > q_new_swb_speech_buffer + Copy_Scale_sig_16_32_no_sat( new_swb_speech_buffer_fx_16, new_swb_speech_buffer_fx, L_FRAME48k + STEREO_DFT_OVL_MAX, Q16 ); // q_new_swb_speech_buffer+st->q_inp - 16 - > q_new_swb_speech_buffer+st->q_inp Copy_Scale_sig_16_32_no_sat( voice_factors_fx[0], voice_factors_fx32[0], NB_SUBFR16k, Q16 ); // Q31 stereo_icBWE_enc_ivas_fx( hCPE, shb_speech_fx32, sub( Q31, Q_shb_spch ), new_swb_speech_buffer_fx, sub( Q31, q_new_swb_speech_buffer ), voice_factors_fx32[0] ); diff --git a/lib_enc/ivas_core_pre_proc_fx.c b/lib_enc/ivas_core_pre_proc_fx.c index 6fe6b6720..48253aed6 100644 --- a/lib_enc/ivas_core_pre_proc_fx.c +++ b/lib_enc/ivas_core_pre_proc_fx.c @@ -886,8 +886,11 @@ ivas_error ivas_compute_core_buffers_fx( IF( EQ_16( st->bwidth, WB ) ) { - Copy_Scale_sig( new_inp_16k_fx - delay, st->hBWE_FD->old_input_wb_fx, delay, negate( add( Q_old_inp_16k, 1 ) ) ); /* Scaling to Q(-1) */ - Copy( new_inp_16k_fx - STEREO_DFT_OVL_16k, st->hBWE_FD->L_old_wtda_swb_fx + L_FRAME16k - STEREO_DFT_OVL_16k + delay, sub( STEREO_DFT_OVL_16k, delay ) ); /* Check Q here once. Q should be Q_old_wtda */ + Copy_Scale_sig( new_inp_16k_fx - delay, st->hBWE_FD->old_input_wb_fx, delay, negate( add( Q_old_inp_16k, 1 ) ) ); /* Scaling to Q(-1) */ + scale_sig( st->hBWE_FD->L_old_wtda_swb_fx, L_FRAME48k, sub( Q_old_inp_16k, st->Q_old_wtda ) ); // st->Q_old_wtda->Q_old_inp_16k + Copy( new_inp_16k_fx - STEREO_DFT_OVL_16k, st->hBWE_FD->L_old_wtda_swb_fx + L_FRAME16k - STEREO_DFT_OVL_16k + delay, sub( STEREO_DFT_OVL_16k, delay ) ); + st->Q_old_wtda = Q_old_inp_16k; + move16(); } } ELSE IF( EQ_16( element_mode, IVAS_CPE_TD ) ) @@ -898,8 +901,11 @@ ivas_error ivas_compute_core_buffers_fx( test(); IF( EQ_16( st->bwidth, WB ) && st->hBWE_FD != NULL ) { - Copy_Scale_sig( new_inp_16k_fx + L_FILT16k - delay, st->hBWE_FD->old_input_wb_fx, delay, negate( add( Q_old_inp_16k, 1 ) ) ); /* Scaling to Q(-1) */ - Copy( new_inp_16k_fx - L_MEM_RECALC_16K, st->hBWE_FD->L_old_wtda_swb_fx + L_FRAME16k - L_MEM_RECALC_16K - L_FILT16k + delay, sub( L_MEM_RECALC_16K + L_FILT16k, delay ) ); /* Check Q here once. Q should be Q_old_wtda */ + Copy_Scale_sig( new_inp_16k_fx + L_FILT16k - delay, st->hBWE_FD->old_input_wb_fx, delay, negate( add( Q_old_inp_16k, 1 ) ) ); /* Scaling to Q(-1) */ + scale_sig( st->hBWE_FD->L_old_wtda_swb_fx, L_FRAME48k, sub( Q_old_inp_16k, st->Q_old_wtda ) ); // st->Q_old_wtda->Q_old_inp_16k + Copy( new_inp_16k_fx - L_MEM_RECALC_16K, st->hBWE_FD->L_old_wtda_swb_fx + L_FRAME16k - L_MEM_RECALC_16K - L_FILT16k + delay, sub( L_MEM_RECALC_16K + L_FILT16k, delay ) ); + st->Q_old_wtda = Q_old_inp_16k; + move16(); } } ELSE IF( element_mode == IVAS_SCE ) diff --git a/lib_enc/prot_fx_enc.h b/lib_enc/prot_fx_enc.h index 4237aa9c3..11d6404b7 100644 --- a/lib_enc/prot_fx_enc.h +++ b/lib_enc/prot_fx_enc.h @@ -639,6 +639,7 @@ void swb_bwe_enc_ivas_fx( Word16 *old_input_16k_fx, /* i : input signal @16kHz for SWB BWE */ const Word16 *old_syn_12k8_16k_fx, /* i : ACELP core synthesis at 12.8kHz or 16kHz */ const Word16 *new_swb_speech_fx, /* i : original input signal at 32kHz */ + const Word16 Q_new_swb_speech, /* i : Q for new_swb_speech_fx */ Word16 *shb_speech_fx, /* i : SHB target signal (6-14kHz) at 16kHz */ Word16 Q_shb_speech, Word16 Q_slb_speech ); diff --git a/lib_enc/stat_enc.h b/lib_enc/stat_enc.h index 45ec01b31..4da0bff61 100644 --- a/lib_enc/stat_enc.h +++ b/lib_enc/stat_enc.h @@ -1013,7 +1013,7 @@ typedef struct fd_bwe_enc_structure { Word16 new_input_hp_fx[NS2SA( 16000, ACELP_LOOK_NS + DELAY_FD_BWE_ENC_NS + DELAY_FIR_RESAMPL_NS - DELAY_CLDFB_NS )]; // Q_new_input_hp Word16 Q_new_input_hp; - Word16 old_input_fx[NS2SA( 48000, DELAY_FD_BWE_ENC_NS + DELAY_FIR_RESAMPL_NS )]; // q0 + Word16 old_input_fx[NS2SA( 48000, DELAY_FD_BWE_ENC_NS + DELAY_FIR_RESAMPL_NS )]; // st->q_inp Word16 old_input_wb_fx[NS2SA( 16000, DELAY_FD_BWE_ENC_NS )]; /* Q(-1) */ Word16 old_input_lp_fx[NS2SA( 16000, ACELP_LOOK_NS + DELAY_FD_BWE_ENC_NS )]; // st->hBWE_FD->prev_Q_input_lp Word16 old_syn_12k8_16k_fx[NS2SA( 16000, DELAY_FD_BWE_ENC_NS )]; // st->Q_syn diff --git a/lib_enc/swb_bwe_enc_fx.c b/lib_enc/swb_bwe_enc_fx.c index 1ebef7404..6b309c334 100644 --- a/lib_enc/swb_bwe_enc_fx.c +++ b/lib_enc/swb_bwe_enc_fx.c @@ -270,6 +270,7 @@ void swb_bwe_enc_ivas_fx( Word16 *old_input_16k_fx, /* i : input signal @16kHz for SWB BWE */ const Word16 *old_syn_12k8_16k_fx, /* i : ACELP core synthesis at 12.8kHz or 16kHz */ const Word16 *new_swb_speech_fx, /* i : original input signal at 32kHz */ + const Word16 Q_new_swb_speech, /* i : Q for new_swb_speech_fx */ Word16 *shb_speech_fx, /* i : SHB target signal (6-14kHz) at 16kHz */ Word16 Q_shb_speech, Word16 Q_slb_speech ) @@ -287,7 +288,7 @@ void swb_bwe_enc_ivas_fx( Word16 old_input_lp_fx[L_FRAME16k]; Word16 new_input_hp_fx[L_FRAME16k]; Word16 yorig_fx[L_FRAME48k]; - Word16 scl, new_input_fx_exp; + Word16 scl, new_input_fx_q; Word16 max; Word16 Sample_Delay_SWB_BWE; Word16 Sample_Delay_HP; @@ -412,7 +413,7 @@ void swb_bwe_enc_ivas_fx( * SWB BWE encoding * FB BWE encoding *---------------------------------------------------------------------*/ - new_input_fx_exp = 0; + new_input_fx_q = Q_new_swb_speech; move16(); test(); IF( ( EQ_16( st_fx->idchan, 1 ) ) && ( EQ_16( last_element_mode, IVAS_CPE_DFT ) ) ) @@ -424,12 +425,12 @@ void swb_bwe_enc_ivas_fx( } } /* MDCT of the core synthesis signal */ - wtda_fx( old_input_fx, &new_input_fx_exp, L_old_input_fx, hBWE_FD->L_old_wtda_swb_fx, + wtda_fx( old_input_fx, &new_input_fx_q, L_old_input_fx, hBWE_FD->L_old_wtda_swb_fx, &st_fx->Q_old_wtda, ALDO_WINDOW, ALDO_WINDOW, /* window overlap of current frame (0: full, 2: none, or 3: half) */ inner_frame ); /* DCT of the ACELP core synthesis */ - direct_transform_fx( L_old_input_fx, yorig_32, 0, inner_frame, &new_input_fx_exp, st_fx->element_mode ); + direct_transform_fx( L_old_input_fx, yorig_32, 0, inner_frame, &new_input_fx_q, st_fx->element_mode ); /* high-band gain control in case of BWS */ IF( st_fx->bwidth_sw_cnt > 0 ) @@ -438,7 +439,7 @@ void swb_bwe_enc_ivas_fx( } /* Convert to 16 Bits (Calc Shift Required to Stay within MAX_Q_NEW_INPUT) */ - scl = sub( 16 + 8, new_input_fx_exp ); + scl = sub( 16 + 8, new_input_fx_q ); /* Possible to Upscale? */ IF( scl > 0 ) { @@ -449,7 +450,7 @@ void swb_bwe_enc_ivas_fx( scl = s_min( Q_synth, scl ); } Copy_Scale_sig32_16( yorig_32, yorig_fx, inner_frame, scl ); - Q_synth = add( sub( new_input_fx_exp, 16 ), scl ); + Q_synth = add( sub( new_input_fx_q, 16 ), scl ); max = 0; move16(); Q_synth_hf = 0; @@ -558,12 +559,12 @@ void swb_bwe_enc_ivas_fx( IF( EQ_16( st_fx->L_frame, L_FRAME16k ) ) { SWB_BWE_encoding_ivas_fx( st_fx, old_input_fx, old_input_lp_fx, new_input_hp_fx, old_syn_12k8_16k_fx, yorig_32, - SWB_fenv_fx, tilt_nb_fx, 80, Q_slb_speech, Q_shb, new_input_fx_exp, new_input_fx_exp ); + SWB_fenv_fx, tilt_nb_fx, 80, Q_slb_speech, Q_shb, new_input_fx_q, new_input_fx_q ); } ELSE { SWB_BWE_encoding_ivas_fx( st_fx, old_input_fx, old_input_lp_fx, new_input_hp_fx, old_syn_12k8_16k_fx, yorig_32, - SWB_fenv_fx, tilt_nb_fx, 6, Q_slb_speech, Q_shb, new_input_fx_exp, new_input_fx_exp ); + SWB_fenv_fx, tilt_nb_fx, 6, Q_slb_speech, Q_shb, new_input_fx_q, new_input_fx_q ); } diff --git a/lib_enc/swb_pre_proc_fx.c b/lib_enc/swb_pre_proc_fx.c index ba165cba4..30cfb4920 100644 --- a/lib_enc/swb_pre_proc_fx.c +++ b/lib_enc/swb_pre_proc_fx.c @@ -166,6 +166,8 @@ void wb_pre_proc_fx( Copy( hBWE_FD->old_input_wb_fx, old_input, Sample_Delay_WB_BWE ); Copy( new_inp_resamp16k + L_FRAME16k - Sample_Delay_WB_BWE, hBWE_FD->old_input_wb_fx, Sample_Delay_WB_BWE ); Copy( old_input, hBWE_FD->L_old_wtda_swb_fx, L_FRAME16k ); + st_fx->Q_old_wtda = -1; + move16(); } return; } @@ -778,7 +780,7 @@ void swb_pre_proc_fx( /*full implementation pending*/ void swb_pre_proc_ivas_fx( Encoder_State *st, /* i/o: encoder state structure */ - Word16 *new_swb_speech, /* o : original input signal at 32kHz - Q0 */ + Word16 *new_swb_speech, /* o : original input signal at 32kHz - st->q_inp */ Word32 *new_swb_speech_fx, /* o : original input signal at 32kHz - Q - q_reImBuffer */ Word16 *shb_speech, /* o : SHB target signal (6-14kHz) at 16kHz- Q(Q_shb_spch) */ Word16 *Q_shb_spch, @@ -846,6 +848,9 @@ void swb_pre_proc_ivas_fx( test(); test(); + scale_sig( hBWE_FD->L_old_wtda_swb_fx, L_FRAME48k, sub( st->q_inp, st->Q_old_wtda ) ); // st->Q_old_wtda -> st->q_inp + st->Q_old_wtda = st->q_inp; + move16(); IF( EQ_16( st->element_mode, IVAS_CPE_TD ) && GE_16( st->bwidth, SWB ) ) { Copy( st->input_fx - hCPE->hStereoTCA->lMemRecalc, hBWE_FD->L_old_wtda_swb_fx + L_FRAME32k - sub( hCPE->hStereoTCA->lMemRecalc, Sample_Delay_SWB_BWE ), sub( hCPE->hStereoTCA->lMemRecalc, Sample_Delay_SWB_BWE ) ); @@ -880,6 +885,8 @@ void swb_pre_proc_ivas_fx( IF( NE_16( st->extl, WB_BWE ) ) { Copy( old_input_fx, hBWE_FD->L_old_wtda_swb_fx, L_FRAME32k ); + st->Q_old_wtda = st->q_inp; + move16(); } } @@ -929,6 +936,10 @@ void swb_pre_proc_ivas_fx( test(); test(); + scale_sig( hBWE_FD->L_old_wtda_swb_fx, L_FRAME48k, sub( st->q_inp, st->Q_old_wtda ) ); // st->Q_old_wtda -> st->q_inp + st->Q_old_wtda = st->q_inp; + move16(); + IF( EQ_16( st->element_mode, IVAS_CPE_TD ) && GE_16( st->bwidth, SWB ) ) { IF( EQ_16( st->bwidth, SWB ) ) @@ -1029,6 +1040,8 @@ void swb_pre_proc_ivas_fx( set16_fx( old_input_fx, 0, Sample_Delay_SWB_BWE ); Copy( new_swb_speech + inner_frame - Sample_Delay_SWB_BWE, hBWE_FD->old_input_fx, Sample_Delay_SWB_BWE ); Copy( old_input_fx, hBWE_FD->L_old_wtda_swb_fx, inner_frame ); + st->Q_old_wtda = st->q_inp; + move16(); } /* resample 48 kHz to 32kHz */ -- GitLab From 6e4399ee5756cb057a36cfd0e8a8b99367f53b03 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Mon, 28 Apr 2025 08:46:47 +0530 Subject: [PATCH 2/2] EVS bitexactness fix --- lib_enc/swb_pre_proc_fx.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib_enc/swb_pre_proc_fx.c b/lib_enc/swb_pre_proc_fx.c index 30cfb4920..8991696e6 100644 --- a/lib_enc/swb_pre_proc_fx.c +++ b/lib_enc/swb_pre_proc_fx.c @@ -166,8 +166,6 @@ void wb_pre_proc_fx( Copy( hBWE_FD->old_input_wb_fx, old_input, Sample_Delay_WB_BWE ); Copy( new_inp_resamp16k + L_FRAME16k - Sample_Delay_WB_BWE, hBWE_FD->old_input_wb_fx, Sample_Delay_WB_BWE ); Copy( old_input, hBWE_FD->L_old_wtda_swb_fx, L_FRAME16k ); - st_fx->Q_old_wtda = -1; - move16(); } return; } -- GitLab