diff --git a/lib_com/ivas_prot_fx.h b/lib_com/ivas_prot_fx.h index 9e9e65bb7ae0cfaa5f014645b5c91dcc3a9f3985..bb796d8795dce77716937d471653e13f7e9f2d66 100644 --- a/lib_com/ivas_prot_fx.h +++ b/lib_com/ivas_prot_fx.h @@ -1595,7 +1595,12 @@ void decoder_tcx_imdct_fx( Word32 x_fx[N_MAX], Word16 q_x, Word16 xn_buf_fx[], +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + Word16 *q_win, + Word16 *q_winFB, +#else Word16 q_win, +#endif const UWord16 kernelType, /* i : TCX transform kernel type */ const Word16 fUseTns, /* i : flag that is set if TNS data is present */ Word16 synth_fx[], /* i/o: synth[-M..L_frame] */ diff --git a/lib_com/options.h b/lib_com/options.h index d1b3cb63e4f0e20bafe9f31d1fd8e41adc6fc06b..bfa02eacf4e6e613ac4f68a3606a6f46706edf6d 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -98,6 +98,10 @@ #define MERGE_REQUEST_1472_SPEEDUP_ivas_mc_param_enc_fx_NONBE /* FhG: reduce WMOPS of dmx calculation in ivas_param_mc_param_est_enc_fx() by using 64 Bit addition. Requires MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE. */ +#define FIX_1348_BIT_PRECISION_IMPROVEMENT +#define FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN +#define FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + #define FIX_USAN_BASOP_UTIL_DIVIDE3232 /* Eri: Fix USAN error in BASOP_Util_Divide3232_Scale_newton by adding explicit type cast for -1 in hex */ #define FIX_1740_MISING_POP_WMOPS /* VA: fix issue 1740: missing pop_wmops() */ diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index 3144c16dd4c4f000ce1764abe74749ebb4eff6c1..d0aa2baf3e3d26272b1ad4ff8ba2a2a9a91f84c4 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -9447,9 +9447,19 @@ void IMDCT_fx( Word32 *x, Word16 x_e, Word16 *old_syn_overl, Word16 *syn_Overl_T void IMDCT_ivas_fx( Word32 *x_fx, Word16 q_x, +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN Word16 *old_syn_overl_fx, + Word16 *Q_old_syn_overl_fx, Word16 *syn_Overl_TDAC_fx, + Word16 *Q_syn_Overl_TDAC_fx, +#else + Word16 *old_syn_overl_fx, + Word16 *syn_Overl_TDAC_fx, +#endif Word16 *xn_buf_fx, +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + Word16 q_xn_buf_fx, +#endif const Word16 *tcx_aldo_window_1_fx, const PWord16 *tcx_aldo_window_1_trunc_fx, const PWord16 *tcx_aldo_window_2_fx, @@ -9470,11 +9480,19 @@ void IMDCT_ivas_fx( const Word16 frame_cnt, const Word16 bfi, Word16 *old_out_fx, +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + Word16 *q_old_out_fx, +#endif const Word16 FB_flag, Decoder_State *st, const Word16 fullbandScale, Word16 *acelp_zir_fx, +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + Word16 *q_acelp_zir_fx, + Word16 *pq_win ); +#else Word16 q_win ); +#endif void v_mult16_fixed( const Word16 x1[], /* i : Input vector 1 */ diff --git a/lib_dec/acelp_core_dec_fx.c b/lib_dec/acelp_core_dec_fx.c index 76bf9d622fb81e5a9e917788387887687c0032bc..a05304d8a7e19d71dea45252b3488f44a7a87970 100644 --- a/lib_dec/acelp_core_dec_fx.c +++ b/lib_dec/acelp_core_dec_fx.c @@ -62,18 +62,18 @@ ivas_error acelp_core_dec_fx( Word16 old_syn_12k8_16k_fx[], /* o : intermediate ACELP synthesis at 12.8kHz or 16kHz to be used by SWB BWE Q_syn2-1*/ const Word16 sharpFlag, /* i : formant sharpening flag */ Word16 pitch_buf_fx[NB_SUBFR16k], /* o : Word16 pitch for each subframe Q6*/ - Word16 *unbits, /* o : number of unused bits */ - Word16 *sid_bw, /* o : 0-NB/WB, 1-SWB SID */ + Word16 *unbits, /* o : number of unused bits */ + Word16 *sid_bw, /* o : 0-NB/WB, 1-SWB SID */ STEREO_TD_DEC_DATA_HANDLE hStereoTD, /* i/o: TD stereo decoder handle Qlog2(2.56)*/ const Word16 tdm_lspQ_PCh_fx[M], /* i : Q LSPs for primary channel Q15*/ const Word16 tdm_lsfQ_PCh_fx[M], /* i : Q LSFs for primary channel */ - const Word16 use_cldfb_for_dft, /* i : flag to use of CLDFB for DFT Stereo */ - const Word16 last_element_mode, /* i : last element mode */ - const Word32 last_element_brate, /* i : last element bitrate */ - const Word16 flag_sec_CNA, /* i : CNA flag for secondary channel */ - const Word16 nchan_out, /* i : number of output channels */ - STEREO_CNG_DEC_HANDLE hStereoCng, /* i : stereo CNG handle */ - const Word16 read_sid_info /* i : read SID info flag */ + const Word16 use_cldfb_for_dft, /* i : flag to use of CLDFB for DFT Stereo */ + const Word16 last_element_mode, /* i : last element mode */ + const Word32 last_element_brate, /* i : last element bitrate */ + const Word16 flag_sec_CNA, /* i : CNA flag for secondary channel */ + const Word16 nchan_out, /* i : number of output channels */ + STEREO_CNG_DEC_HANDLE hStereoCng, /* i : stereo CNG handle */ + const Word16 read_sid_info /* i : read SID info flag */ ) { Word32 synth_fx[960], save_hb_synth_fx[960] /*, bwe_exc_extended_fx[L_FRAME32k + NL_BUFF_OFFSET]*/; diff --git a/lib_dec/core_dec_init_fx.c b/lib_dec/core_dec_init_fx.c index eb34e1bc1e4ad06bdf2fa06d34dfe06806550b3e..d5a1dfeb067483b3dcb6849dd94d8453386bad3e 100644 --- a/lib_dec/core_dec_init_fx.c +++ b/lib_dec/core_dec_init_fx.c @@ -396,8 +396,15 @@ void open_decoder_LPD_fx( move16(); Copy( hHQ_core->fer_samples_fx + delay_comp, hTcxDec->syn_OverlFB, shr( hTcxDec->L_frameTCX, 1 ) ); /* hHQ_core->Q_fer_samples*/ - +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + hTcxDec->Q_syn_OverlFB = hHQ_core->Q_fer_samples; + move16(); +#endif lerp( hHQ_core->fer_samples_fx + delay_comp, hTcxDec->syn_Overl, shr( st->L_frame, 1 ), shr( hTcxDec->L_frameTCX, 1 ) ); /*Q0: ACELP(bfi)->TCX(rect)*/ +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + hTcxDec->Q_syn_Overl = hHQ_core->Q_fer_samples; + move16(); +#endif /*old_out needed for MODE1 routine and syn_Overl_TDAC for MODE2 routine*/ hHQ_core->Q_old_wtda = -1; @@ -1069,8 +1076,16 @@ void acelp_plc_mdct_transition_fx( delay_comp = NS2SA_FX2( st->output_Fs, DELAY_CLDFB_NS ); move16(); /*CLDFB delay*/ - Copy( st->hHQ_core->fer_samples_fx + delay_comp, st->hTcxDec->syn_OverlFB, shr( st->hTcxDec->L_frameTCX, 1 ) ); /* Q_fer_samples */ + Copy( st->hHQ_core->fer_samples_fx + delay_comp, st->hTcxDec->syn_OverlFB, shr( st->hTcxDec->L_frameTCX, 1 ) ); /* Q_fer_samples */ +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + st->hTcxDec->Q_syn_OverlFB = st->hHQ_core->Q_fer_samples; + move16(); +#endif lerp( st->hHQ_core->fer_samples_fx + delay_comp, st->hTcxDec->syn_Overl, shr( st->L_frame, 1 ), shr( st->hTcxDec->L_frameTCX, 1 ) ); /*ACELP(bfi)->TCX(rect)*/ +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + st->hTcxDec->Q_syn_Overl = st->hHQ_core->Q_fer_samples; + move16(); +#endif /*old_out needed for MODE1 routine and syn_Overl_TDAC for MODE2 routine*/ st->hHQ_core->Q_old_wtda = -1; move16(); @@ -2090,6 +2105,11 @@ void reset_tcx_overl_buf_fx( set16_fx( hTcxDec->syn_Overl, 0, L_FRAME32k / 2 ); /*HQ-CORE(bfi)->TCX don't need it*/ hTcxDec->Q_syn_Overl = 0; move16(); +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + set16_fx( hTcxDec->syn_OverlFB, 0, L_FRAME48k / 2 ); /*HQ-CORE(bfi)->TCX don't need it*/ + hTcxDec->Q_syn_OverlFB = 0; + move16(); +#endif set16_fx( hTcxDec->syn_Overl_TDACFB, 0, L_FRAME_MAX / 2 ); /*HQ-CORE(bfi)->TCX don't need it*/ hTcxDec->Q_syn_Overl_TDACFB = 0; move16(); diff --git a/lib_dec/dec_ace_fx.c b/lib_dec/dec_ace_fx.c index 9c366e3f3190c2138e9940a69e30a447609a0d1f..edc429459e1a613dacdfefa6a0bf8b55c16b276f 100644 --- a/lib_dec/dec_ace_fx.c +++ b/lib_dec/dec_ace_fx.c @@ -690,6 +690,10 @@ void decoder_acelp_fx( E_UTIL_deemph2( st->Q_syn, syn, st->preemph_fac, st->L_frame, &tmp_deemph ); /* tmp_deemph and syn in Q0 starting from here*/ bufferCopyFx( syn + shr( st->L_frame, 1 ), hTcxDec->old_syn_Overl, shr( st->L_frame, 1 ), 0 /*Qf_syn*/, -1 /*Qf_old_xnq*/, 0, 0 /*Q_old_xnq*/ ); +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + st->hTcxDec->Q_old_syn_Overl = sub( st->Q_syn, 1 ); + move16(); +#endif Copy( syn + sub( st->L_frame, M + 1 ), st->syn, 1 + M ); /*Q0*/ diff --git a/lib_dec/dec_tcx_fx.c b/lib_dec/dec_tcx_fx.c index 6590d4b4322a532cc9c0500568d5243b21f44ee6..6206577c104b4fa833b3a00f1aa3cc10a198addb 100644 --- a/lib_dec/dec_tcx_fx.c +++ b/lib_dec/dec_tcx_fx.c @@ -2572,12 +2572,394 @@ void IMDCT_fx( Word32 *x, Word16 x_e, Word16 *old_syn_overl, Word16 *syn_Overl_T } } +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + +static Word16 IMDCT_ivas_fx_calc_qwin( + Decoder_State *st, + Word16 *syn_Overl_TDAC, + Word16 Q_syn_Overl_TDAC, + Word16 *syn_Overl, + Word16 Q_syn_Overl, + Word16 *old_syn_Overl, + Word16 Q_old_syn_Overl, + Word16 *old_out_fx, + Word16 Q_old_out_fx, + Word16 q_win, + const Word16 FB_flag ) +{ + Word16 t, old_syn_Overl_len, syn_Overl_TDAC_len; + + t = L_FRAME32k; + move16(); + if ( FB_flag ) + { + t = L_FRAME48k; + move16(); + } + + old_syn_Overl_len = st->hTcxCfg->tcx_mdct_window_length; + syn_Overl_TDAC_len = s_max( st->hTcxCfg->tcx_mdct_window_length_old, 0 ); + + IF( ( st->prev_bfi && EQ_16( st->last_core_bfi, ACELP_CORE ) ) || EQ_16( st->last_core, ACELP_CORE ) ) + { + old_syn_Overl_len = shr( st->L_frame, 1 ); + syn_Overl_TDAC_len = shr( st->last_L_frame, 1 ); + } + + IF( st->prev_bfi && ( st->last_core_bfi == ACELP_CORE ) ) + { + syn_Overl_TDAC_len = old_syn_Overl_len; + } + + q_win = 6; + move16(); + + // q_win == norm + Q_syn_Overl_TDAC + q_win = s_min( q_win, norm_arr( syn_Overl_TDAC, syn_Overl_TDAC_len ) + Q_syn_Overl_TDAC ); + + // q_win = s_min( q_win, norm_arr( syn_Overl, oldLength / 2 ) + Q_syn_Overl ); + q_win = s_min( q_win, norm_arr( syn_Overl, old_syn_Overl_len ) + Q_syn_Overl ); + + q_win = s_min( q_win, norm_arr( old_syn_Overl, old_syn_Overl_len ) + Q_old_syn_Overl ); + + // q_win = s_min( q_win, norm_arr( old_out_fx, oldLength ) + Q_old_out_fx ); + q_win = s_min( q_win, norm_arr( old_out_fx, t ) + Q_old_out_fx ); + +#if 0 + set16_zero_fx(syn_Overl_TDAC + oldLength / 2, (t-oldLength)/2 ); + //set16_zero_fx(syn_Overl + oldLength / 2, (t-oldLength)/2 ); + set16_zero_fx(old_syn_Overl + oldLength / 2, (t-oldLength)/2 ); + //set16_zero_fx(old_out_fx + oldLength, (t-oldLength) ); +#endif + q_win = s_max( -3, sub( q_win, 2 ) ); + + return q_win; +} + +static void IMDCT_ivas_fx_rescale( + Word16 *xn_buf_fx, + Word16 *q_xn_buf_fx, + Word16 *syn_Overl_TDAC, + Word16 *Q_syn_Overl_TDAC, + Word16 *syn_Overl, + Word16 *Q_syn_Overl, + Word16 *old_syn_Overl, + Word16 *Q_old_syn_Overl, + Word16 *old_out_fx, + Word16 *Q_old_out_fx, + Word16 q_win, + const Word16 FB_flag ) +{ + Word16 oldLength; + + oldLength = L_FRAME32k; + move16(); + if ( FB_flag ) + { + oldLength = L_FRAME48k; + move16(); + } + +#if 1 + IF( xn_buf_fx != NULL ) + { + Scale_sig( xn_buf_fx, L_MDCT_OVLP_MAX + L_FRAME_PLUS + L_MDCT_OVLP_MAX, sub( q_win, *q_xn_buf_fx ) ); + *q_xn_buf_fx = q_win; + move16(); + } + Scale_sig( syn_Overl_TDAC, oldLength / 2, sub( q_win, *Q_syn_Overl_TDAC ) ); // st->hTcxDec->Q_syn_Overl_TDAC -> q_win + *Q_syn_Overl_TDAC = q_win; + move16(); + Scale_sig( syn_Overl, oldLength / 2, sub( q_win, *Q_syn_Overl ) ); // st->hTcxDec->Q_syn_Overl -> q_win + *Q_syn_Overl = q_win; + move16(); + if ( FB_flag == 0 ) + { + Scale_sig( old_syn_Overl, oldLength / 2, sub( q_win, *Q_old_syn_Overl ) ); // Q(-1 - st->Q_syn) -> q_win + *Q_old_syn_Overl = q_win; + move16(); + } + Scale_sig( old_out_fx, oldLength, sub( q_win, *Q_old_out_fx ) ); // Q(st->hHQ_core->Q_old_wtda) -> q_win + *Q_old_out_fx = q_win; + move16(); +#endif +} + +static Word16 TCX_MDCT_Inverse_GetScaleFactor( + const Word16 L, /* Q0 */ + Word16 *factor_e /* Q0 */ +) +{ + + Word16 factor; + + IF( EQ_16( L, NORM_MDCT_FACTOR ) ) + { + factor = 32767; + move16(); + *factor_e = 0; + move16(); + } + ELSE IF( EQ_16( L, 2 * NORM_MDCT_FACTOR ) ) + { + factor = 23170; + move16(); + *factor_e = 1; + move16(); + } + ELSE IF( EQ_16( L, 4 * NORM_MDCT_FACTOR ) ) + { + factor = 32767; + move16(); + *factor_e = 1; + move16(); + } + ELSE + { + factor = mult_r( shl( L, 4 ), 26214 /*128.f / NORM_MDCT_FACTOR*/ ); /* 4Q11 */ + *factor_e = 4; + move16(); + + factor = Sqrt16( factor, factor_e ); + } + + return factor; +} + +static void TCX_MDCT_Inverse_qwin_fx( + Word32 *x, // Q( 31 - x_e ) + Word16 x_e, + Word16 *y, /* Qy */ + const Word16 l, /* Q0 */ + const Word16 m, /* Q0 */ + const Word16 r, /* Q0 */ + const Word16 element_mode, /* Q0 */ + Word16 *q_win, + Word16 allow_qwin_change ) +{ + + Word16 i, fac, negfac, s; + Word16 L2 = l, R2 = r; + Word32 tmp_buf[N_MAX + L_MDCT_OVLP_MAX / 2]; + Word16 fac_e; + (void) element_mode; + L2 = shr( l, 1 ); + R2 = shr( r, 1 ); + + x_e = sub( 15, x_e ); + edct_fx( x, tmp_buf + L2, add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &x_e ); + x_e = sub( 15, x_e ); + + fac = TCX_MDCT_Inverse_GetScaleFactor( add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &fac_e ); /* exp(fac_e) */ + x_e = add( x_e, fac_e ); + + negfac = negate( fac ); + + IF( allow_qwin_change ) + { + // x_e + q_win == L_norm_arr(tmp_buf, , *q_win ); + // q_win = L_norm_arr(tmp_buf, , *q_win ) - x_e; + s = L_norm_arr( tmp_buf + L2, m + R2 + L2 ); + *q_win = s_min( sub( s, x_e ), *q_win ); + } + + s = add( x_e, *q_win ); + move16(); + + FOR( i = 0; i < R2; i++ ) + { + y[l + m + R2 + i] = round_fx_sat( L_shl_sat( Mpy_32_16_1( tmp_buf[L2 + i], negfac ), s ) ); /* fold out right end of DCT exp(fac_e)*/ + + move16(); + } + + FOR( i = 0; i < L2; i++ ) + { + y[i] = round_fx_sat( L_shl_sat( Mpy_32_16_1( tmp_buf[L2 + m + R2 + i], fac ), s ) ); /* negate, fold out left end of DCT exp(fac_e)*/ + move16(); + } + + FOR( i = 0; i < ( ( L2 + m + R2 ) >> 1 ); i++ ) + { + Word16 f; + + f = round_fx_sat( L_shl_sat( Mpy_32_16_1( tmp_buf[L2 + i], negfac ), s ) ); + y[L2 + i] = round_fx_sat( L_shl_sat( Mpy_32_16_1( tmp_buf[l + m + R2 - 1 - i], negfac ), s ) ); /* time-reverse mid of DCT exp(fac_e)*/ + move16(); + y[l + m + R2 - 1 - i] = f; + move16(); + } +} + +static void TCX_MDST_Inverse_qwin_fx( + Word32 *x, /* exp(x_e) */ + Word16 x_e, + Word16 *y, /* Qx */ + const Word16 l, /* Q0 */ + const Word16 m, /* Q0 */ + const Word16 r, /* Q0 */ + Word16 *q_win, + Word16 allow_qwin_change ) +{ + + Word16 i, fac, negfac, s; + Word16 L2 = l, R2 = r; + move16(); + move16(); + Word32 tmp_buf[N_MAX + L_MDCT_OVLP_MAX / 2]; + Word16 fac_e; + + L2 = shr( l, 1 ); + R2 = shr( r, 1 ); + + x_e = sub( 15, x_e ); + edst_fx( x, tmp_buf + L2, add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &x_e ); + x_e = sub( 15, x_e ); + + fac = TCX_MDCT_Inverse_GetScaleFactor( add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &fac_e ); + x_e = add( x_e, fac_e ); + + negfac = negate( fac ); + + IF( allow_qwin_change ) + { + // x_e + q_win == L_norm_arr(tmp_buf, , *q_win ); + // q_win = L_norm_arr(tmp_buf, , *q_win ) - x_e; + s = L_norm_arr( tmp_buf + L2, m + R2 + L2 ); + *q_win = s_min( sub( s, x_e ), *q_win ); + } + + s = add( x_e, *q_win ); + move16(); + + FOR( i = 0; i < R2; i++ ) + { + y[l + m + R2 + i] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[L2 + i], fac ), s ) ); /* fold out right end of DCT exp(fac_e)*/ + move16(); + } + + FOR( i = 0; i < L2; i++ ) + { + y[i] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[L2 + m + R2 + i], negfac ), s ) ); /* negate, fold out left end of DCT exp(fac_e)*/ + move16(); + } + + FOR( i = 0; i < ( ( L2 + m + R2 ) >> 1 ); i++ ) + { + Word16 f; + f = round_fx( L_shl( Mpy_32_16_1( tmp_buf[L2 + i], fac ), s ) ); + + y[L2 + i] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[l + m + R2 - 1 - i], negfac ), s ) ); /* time-reverse mid of DCT exp(fac_e)*/ + move16(); + + y[l + m + R2 - 1 - i] = negate( f ); + move16(); + } +} + +/*-------------------------------------------------------------------* + * TCX_MDXT_Inverse_fx() + * + * + *-------------------------------------------------------------------*/ +static void TCX_MDXT_Inverse_qwin_fx( + const Word32 *x, /* exp(x_e) */ + Word16 x_e, + Word16 *y, /* Qx */ + const Word16 l, /* Q0 */ + const Word16 m, /* Q0 */ + const Word16 r, /* Q0 */ + const UWord16 kernel_type, /* Q0 */ + Word16 *q_win, + Word16 allow_qwin_change ) +{ + Word16 signLeft; + Word16 signRight; + Word16 i, fac, negfac, s, fac_e; + const Word16 L2 = shr( l, 1 ), R2 = shr( r, 1 ); + Word32 tmp_buf[N_MAX + L_MDCT_OVLP_MAX / 2]; + Word16 f; + + set32_fx( tmp_buf, 0, N_MAX + L_MDCT_OVLP_MAX / 2 ); + + edxt_fx( x, tmp_buf + L2, add( add( L2, m ), R2 ), kernel_type, TRUE ); + + fac = TCX_MDCT_Inverse_GetScaleFactor( add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &fac_e ); + x_e = add( x_e, fac_e ); + + negfac = negate( fac ); + IF( GE_16( kernel_type, MDCT_II ) ) + { + signLeft = negfac; + } + ELSE + { + signLeft = fac; + } + // signRight = ( kernel_type & 1 ? fac : negfac ); + IF( L_and( kernel_type, 1 ) ) + { + signRight = fac; + } + ELSE + { + signRight = negfac; + } + + IF( allow_qwin_change ) + { + // x_e + q_win == L_norm_arr(tmp_buf, , *q_win ); + // q_win = L_norm_arr(tmp_buf, , *q_win ) - x_e; + s = L_norm_arr( tmp_buf + L2, m + R2 + L2 ); + *q_win = s_min( sub( s, x_e ), *q_win ); + } + + s = add( x_e, *q_win ); + move16(); + + FOR( i = 0; i < L2; i++ ) + { + y[i] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[L2 + m + R2 + i], signLeft ), s ) ); /* fold out the left end exp(fac_e)*/ + } + + FOR( i = 0; i < R2; i++ ) + { + y[l + m + R2 + i] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[L2 + i], signRight ), s ) ); /* ...and right end exp(fac_e)*/ + move16(); + } + + FOR( i = 0; i < ( ( L2 + m + R2 ) >> 1 ); i++ ) + { + + f = round_fx( L_shl( Mpy_32_16_1( tmp_buf[L2 + i], negfac ), s ) ); + + y[L2 + i] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[l + m + R2 - 1 - i], negfac ), s ) ); /* time-reverse mid of DCT exp(fac_e)*/ + move16(); + + y[l + m + R2 - 1 - i] = f; + move16(); + } + + return; +} +#endif + void IMDCT_ivas_fx( Word32 *x_fx, // Q(q_x) Word16 q_x, - Word16 *old_syn_overl_fx, // Q(-2) - Word16 *syn_Overl_TDAC_fx, // Q(-2) - Word16 *xn_buf_fx, // Q(-2) +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + Word16 *old_syn_overl_fx, // *Q_old_syn_overl_fx + Word16 *Q_old_syn_overl_fx, + Word16 *syn_Overl_TDAC_fx, // *Q_syn_Overl_TDAC_fx + Word16 *Q_syn_Overl_TDAC_fx, +#else + Word16 *old_syn_overl_fx, // Q(-2) + Word16 *syn_Overl_TDAC_fx, // Q(-2) +#endif + Word16 *xn_buf_fx, // Q(-2) +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + Word16 q_xn_buf_fx, +#endif const Word16 *tcx_aldo_window_1_fx, // Q(15) const PWord16 *tcx_aldo_window_1_trunc_fx, // Q(15) const PWord16 *tcx_aldo_window_2_fx, // Q(15) @@ -2598,11 +2980,19 @@ void IMDCT_ivas_fx( const Word16 frame_cnt, const Word16 bfi, Word16 *old_out_fx, // Q(-2) +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + Word16 *q_old_out_fx, +#endif const Word16 FB_flag, Decoder_State *st, const Word16 fullbandScale, Word16 *acelp_zir_fx, - Word16 q_win ) // Q(-2) +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + Word16 *q_acelp_zir_fx, + Word16 *pq_win ) +#else + Word16 q_win ) // Q(-2) +#endif { Word16 i, nz, aldo, w, L_win, L_ola; Word16 win_fx[( L_FRAME_PLUS + L_MDCT_OVLP_MAX ) / 2]; @@ -2611,7 +3001,27 @@ void IMDCT_ivas_fx( Word16 x_e_hdrm; Word32 c; Word16 exp; +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + Word16 q_win = *pq_win; + Word16 allow_qwin_change = 1; + move16(); + move16(); + x_e_hdrm = sub( Q16, q_x ); +#if 0 + IF( *pq_win == 0 ) + { + allow_qwin_change = 0; + } +#endif + IF( allow_qwin_change ) + { + // q_win = IMDCT_ivas_fx_adjust_qwin( *Q_syn_Overl_TDAC_fx, *Q_old_syn_overl_fx, hTcxDec->Q_old_syn_Overl, *q_old_out_fx, q_win ); + q_win = IMDCT_ivas_fx_calc_qwin( st, syn_Overl_TDAC_fx, *Q_syn_Overl_TDAC_fx, old_syn_overl_fx, *Q_old_syn_overl_fx, + hTcxDec->old_syn_Overl, hTcxDec->Q_old_syn_Overl, old_out_fx, *q_old_out_fx, q_win, FB_flag ); + } +#else x_e_hdrm = add( q_win, sub( Q16, q_x ) ); +#endif aldo = 0; move16(); @@ -2692,11 +3102,41 @@ void IMDCT_ivas_fx( Word16 L_spec_TCX5_tmp = 0; move16(); +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + IF( allow_qwin_change ) + { + /* Use fixed q_win to avoid the need to adapt scaling of two TCX5 blocks (less effort with maybe not ideal scaling) */ + q_win = -2; + move16(); + allow_qwin_change = 0; + move16(); + } +#endif + FOR( w = 0; w < 2; w++ ) { test(); test(); L_spec_TCX5_tmp = imult1616( w, L_spec_TCX5 ); +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + // Assume that xn_buf_fx has no headroom. + q_win = s_min( q_xn_buf_fx, q_win ); + + IF( EQ_16( kernel_type, MDST_IV ) || s_and( kernel_type, w ) ) + { + TCX_MDST_Inverse_qwin_fx( x_fx + L_spec_TCX5_tmp, x_e_hdrm, win_fx, L_ola, sub( L_win, L_ola ), L_ola, &q_win, allow_qwin_change ); + } + ELSE IF( ( kernel_type != 0 ) && ( w == 0 ) ) /* type 1 or 2 */ + { + TCX_MDXT_Inverse_qwin_fx( x_fx + L_spec_TCX5_tmp, x_e_hdrm, win_fx, L_ola, sub( L_win, L_ola ), L_ola, kernel_type, &q_win, allow_qwin_change ); + } + ELSE + { + TCX_MDCT_Inverse_qwin_fx( x_fx + L_spec_TCX5_tmp, x_e_hdrm, win_fx, L_ola, sub( L_win, L_ola ), L_ola, st->element_mode, &q_win, allow_qwin_change ); + } + + IMDCT_ivas_fx_rescale( xn_buf_fx, &q_xn_buf_fx, syn_Overl_TDAC_fx, Q_syn_Overl_TDAC_fx, old_syn_overl_fx, Q_old_syn_overl_fx, hTcxDec->old_syn_Overl, &hTcxDec->Q_old_syn_Overl, old_out_fx, q_old_out_fx, q_win, FB_flag ); +#else IF( EQ_16( kernel_type, MDST_IV ) || s_and( kernel_type, w ) ) { TCX_MDST_Inverse_fx( x_fx + L_spec_TCX5_tmp, x_e_hdrm, win_fx, L_ola, sub( L_win, L_ola ), L_ola ); @@ -2709,6 +3149,7 @@ void IMDCT_ivas_fx( { TCX_MDCT_Inverse( x_fx + L_spec_TCX5_tmp, x_e_hdrm, win_fx, L_ola, sub( L_win, L_ola ), L_ola, st->element_mode ); } +#endif tcx_windowing_synthesis_current_frame( win_fx, tcx_aldo_window_2_fx, tcx_mdct_window_half_fx, tcx_mdct_window_minimum_fx, L_ola, tcx_mdct_window_half_length, tcx_mdct_window_min_length, ( w > 0 ) ? 0 : left_rect, ( w > 0 ) || ( w == 0 && index == 2 ) ? MIN_OVERLAP : hTcxCfg->tcx_last_overlap_mode, acelp_zir_fx, hTcxDec->old_syn_Overl, syn_Overl_TDAC_fx, st->old_Aq_12_8_fx, tcx_mdct_window_trans_fx, L_win, tcx_offset < 0 ? -tcx_offset : 0, ( w > 0 ) || ( frame_cnt > 0 ) ? 1 : st->last_core, ( w > 0 ) || ( frame_cnt > 0 ) ? 0 : (Word8) st->last_is_cng, fullbandScale ); @@ -2747,6 +3188,38 @@ void IMDCT_ivas_fx( set16_fx( win_fx, 0, shr( add( L_FRAME_PLUS, L_MDCT_OVLP_MAX ), 1 ) ); /* 1st TCX-5 window, special MDCT with minimum overlap on right side */ +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + Word16 q_win_prev; + +#if 0 + // Assume that xn_buf_fx has no headroom. + q_win = s_min( q_xn_buf_fx, q_win ); +#endif +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + IF( allow_qwin_change ) + { + /* Use fixed q_win to avoid the need to adapt scaling of two TCX5 blocks (less effort with maybe not ideal scaling) */ + q_win = -2; + move16(); + allow_qwin_change = 0; + move16(); + } +#endif + + IF( EQ_16( kernel_type, MDST_IV ) ) + { + TCX_MDST_Inverse_qwin_fx( x_fx, x_e_hdrm, win_fx + L_win, 0, sub( L_win, shr( L_ola, 1 ) ), L_ola, &q_win, allow_qwin_change ); + } + ELSE IF( NE_16( kernel_type, MDCT_IV ) ) /* type 1 or 2 */ + { + TCX_MDXT_Inverse_qwin_fx( x_fx, x_e_hdrm, win_fx + L_win, 0, sub( L_win, shr( L_ola, 1 ) ), L_ola, kernel_type, &q_win, allow_qwin_change ); + } + ELSE + { + TCX_MDCT_Inverse_qwin_fx( x_fx, x_e_hdrm, win_fx + L_win, 0, sub( L_win, shr( L_ola, 1 ) ), L_ola, st->element_mode, &q_win, allow_qwin_change ); + } + q_win_prev = q_win; +#else IF( EQ_16( kernel_type, MDST_IV ) ) { TCX_MDST_Inverse_fx( x_fx, x_e_hdrm, win_fx + L_win, 0, sub( L_win, shr( L_ola, 1 ) ), L_ola ); @@ -2759,12 +3232,31 @@ void IMDCT_ivas_fx( { TCX_MDCT_Inverse( x_fx, x_e_hdrm, win_fx + L_win, 0, sub( L_win, shr( L_ola, 1 ) ), L_ola, st->element_mode ); } +#endif set16_fx( xn_buf_fx, 0, shr( overlap, 1 ) ); /* copy new sub-window region not overlapping with previous window */ Copy( win_fx + L_win, xn_buf_fx + shr( overlap, 1 ), add( L_win, shr( L_ola, 1 ) ) ); +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + q_xn_buf_fx = q_win; + move16(); + + /* 2nd TCX-5 window, regular MDCT with minimum overlap on both sides */ + IF( s_and( kernel_type, 1 ) ) + { + TCX_MDST_Inverse_qwin_fx( x_fx + L_spec_TCX5, x_e_hdrm, win_fx, L_ola, sub( L_win, L_ola ), L_ola, &q_win, allow_qwin_change ); + } + ELSE + { + TCX_MDCT_Inverse_qwin_fx( x_fx + L_spec_TCX5, x_e_hdrm, win_fx, L_ola, sub( L_win, L_ola ), L_ola, st->element_mode, &q_win, allow_qwin_change ); + } + + assert( q_win_prev == q_win ); + + IMDCT_ivas_fx_rescale( xn_buf_fx, &q_xn_buf_fx, syn_Overl_TDAC_fx, Q_syn_Overl_TDAC_fx, old_syn_overl_fx, Q_old_syn_overl_fx, hTcxDec->old_syn_Overl, &hTcxDec->Q_old_syn_Overl, old_out_fx, q_old_out_fx, q_win, FB_flag ); +#else /* 2nd TCX-5 window, regular MDCT with minimum overlap on both sides */ IF( s_and( kernel_type, 1 ) ) { @@ -2774,6 +3266,7 @@ void IMDCT_ivas_fx( { TCX_MDCT_Inverse( x_fx + L_spec_TCX5, x_e_hdrm, win_fx, L_ola, sub( L_win, L_ola ), L_ola, st->element_mode ); } +#endif tcx_windowing_synthesis_current_frame( win_fx, tcx_aldo_window_2_fx, tcx_mdct_window_half_fx, tcx_mdct_window_minimum_fx, L_ola, tcx_mdct_window_half_length, tcx_mdct_window_min_length, 0, /* left_rect */ MIN_OVERLAP, /* left_mode */ acelp_zir_fx, hTcxDec->old_syn_Overl, syn_Overl_TDAC_fx, st->old_Aq_12_8_fx, tcx_mdct_window_trans_fx, L_win, ( tcx_offset < 0 ) ? -tcx_offset : 0, 1, /* st->last_mode_bfi */ 0, /* st->last_is_cng */ fullbandScale ); @@ -2808,6 +3301,30 @@ void IMDCT_ivas_fx( ELSE { /* default, i.e. maximum overlap, single transform, no grouping */ +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + IF( allow_qwin_change && fullbandScale ) + { + q_win = s_min( q_win, norm_arr( acelp_zir_fx, shr( L_frame_glob, 1 ) ) + *q_acelp_zir_fx ); + } + + IF( EQ_16( kernel_type, MDST_IV ) ) + { + TCX_MDST_Inverse_qwin_fx( x_fx, x_e_hdrm, xn_buf_fx, overlap, sub( L_frame, overlap ), overlap, &q_win, allow_qwin_change ); + } + ELSE IF( NE_16( kernel_type, MDCT_IV ) ) /* type 1 or 2 */ + { + TCX_MDXT_Inverse_qwin_fx( x_fx, x_e_hdrm, xn_buf_fx, overlap, sub( L_frame, overlap ), overlap, kernel_type, &q_win, allow_qwin_change ); + } + ELSE + { + TCX_MDCT_Inverse_qwin_fx( x_fx, x_e_hdrm, xn_buf_fx, overlap, sub( L_frame, overlap ), overlap, st->element_mode, &q_win, allow_qwin_change ); + } + // Because xn_buf_fx is overwritten above. + q_xn_buf_fx = q_win; + move16(); + + IMDCT_ivas_fx_rescale( xn_buf_fx, &q_xn_buf_fx, syn_Overl_TDAC_fx, Q_syn_Overl_TDAC_fx, old_syn_overl_fx, Q_old_syn_overl_fx, hTcxDec->old_syn_Overl, &hTcxDec->Q_old_syn_Overl, old_out_fx, q_old_out_fx, q_win, FB_flag ); +#else IF( EQ_16( kernel_type, MDST_IV ) ) { TCX_MDST_Inverse_fx( x_fx, x_e_hdrm, xn_buf_fx, overlap, sub( L_frame, overlap ), overlap ); @@ -2820,7 +3337,18 @@ void IMDCT_ivas_fx( { TCX_MDCT_Inverse( x_fx, x_e_hdrm, xn_buf_fx, overlap, sub( L_frame, overlap ), overlap, st->element_mode ); } +#endif +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + IF( !fullbandScale ) + { + *q_acelp_zir_fx = q_xn_buf_fx; + } + ELSE + { + scale_sig( acelp_zir_fx, shr( L_frame_glob, 1 ), sub( q_xn_buf_fx, *q_acelp_zir_fx ) ); + } +#endif tcx_windowing_synthesis_current_frame( xn_buf_fx, tcx_aldo_window_2_fx, tcx_mdct_window_half_fx, tcx_mdct_window_minimum_fx, overlap, tcx_mdct_window_half_length, tcx_mdct_window_min_length, left_rect, !bfi && ( frame_cnt > 0 ) && ( index == 0 ) && NE_16( st->last_core, ACELP_CORE ) ? MIN_OVERLAP : index, acelp_zir_fx, hTcxDec->old_syn_Overl, syn_Overl_TDAC_fx, st->old_Aq_12_8_fx, tcx_mdct_window_trans_fx, shr( L_frame_glob, 1 ), ( tcx_offset < 0 ) ? -tcx_offset : 0, ( frame_cnt > 0 /*|| (st->last_con_tcx )*/ ) ? 1 : st->last_core_bfi, ( frame_cnt > 0 ) ? 0 : (Word8) st->last_is_cng, fullbandScale ); } /* tcx_last_overlap_mode != FULL_OVERLAP */ @@ -2865,10 +3393,30 @@ void IMDCT_ivas_fx( q_tmp_fx_32 = q_xn_buf_fx_32; move16(); +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + // q_win < norm + q_tmp_fx_32 - 16 + q_win = s_min( q_win, L_norm_arr( tmp_fx_32, L_frame ) + q_tmp_fx_32 - 16 ); + IMDCT_ivas_fx_rescale( NULL, NULL, syn_Overl_TDAC_fx, Q_syn_Overl_TDAC_fx, old_syn_overl_fx, Q_old_syn_overl_fx, hTcxDec->old_syn_Overl, &hTcxDec->Q_old_syn_Overl, old_out_fx, q_old_out_fx, q_win, FB_flag ); +#endif + Word16 diff = sub( q_tmp_fx_32, q_win ); +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + Word16 q_old_out_diff = sub( q_tmp_fx_32, *q_old_out_fx ); + IF( q_old_out_diff < 0 ) + { + Scale_sig( old_out_fx, L_frame, q_old_out_diff ); + *q_old_out_fx = add( *q_old_out_fx, q_old_out_diff ); + q_old_out_diff = 0; + } +#endif FOR( Word16 ind = 0; ind < L_frame; ind++ ) { +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + assert( L_shr( L_shl( old_out_fx[ind], q_old_out_diff ), q_old_out_diff ) == old_out_fx[ind] ); + old_out_fx_32[ind] = L_shl( L_deposit_l( old_out_fx[ind] ), q_old_out_diff ); +#else old_out_fx_32[ind] = L_shl( old_out_fx[ind], diff ); +#endif move32(); } @@ -2876,7 +3424,14 @@ void IMDCT_ivas_fx( FOR( Word16 ind = 0; ind < L_frame; ind++ ) { +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + assert( extract_h( L_shr( old_out_fx_32[ind], q_old_out_diff ) ) == 0 || extract_h( L_shr( old_out_fx_32[ind], q_old_out_diff ) ) == -1 ); + old_out_fx[ind] = extract_l( L_shr( old_out_fx_32[ind], q_old_out_diff ) ); +#else + assert( extract_h( L_shr( old_out_fx_32[ind], diff ) ) == 0 || extract_h( L_shr( old_out_fx_32[ind], diff ) ) == -1 ); old_out_fx[ind] = extract_l( L_shr( old_out_fx_32[ind], diff ) ); +#endif + assert( extract_h( L_shr( xn_buf_fx_32[ind], diff ) ) == 0 || extract_h( L_shr( xn_buf_fx_32[ind], diff ) ) == -1 ); xn_buf_fx[ind] = extract_l( L_shr( xn_buf_fx_32[ind], diff ) ); move16(); move16(); @@ -2884,8 +3439,10 @@ void IMDCT_ivas_fx( } ELSE { +#ifndef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD Word16 q_old_out = q_win; move16(); +#endif edct_ivas_fx( x_fx, xn_buf_fx_32 + add( shr( overlap, 1 ), nz ), L_frame, &q_xn_buf_fx_32 ); Word16 res_m, res_e; res_e = 0; @@ -2901,22 +3458,49 @@ void IMDCT_ivas_fx( q_tmp_fx_32 = sub( q_xn_buf_fx_32, res_e ); // v_multc_fixed( xn_buf_fx_32 + overlap / 2 + nz, (float) sqrt( (float) L_frame / NORM_MDCT_FACTOR ), tmp_fx_32, L_frame ); +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + if ( allow_qwin_change ) + { + // sub( q_xn_buf_fx_32, q_win ) == 16 - L_norm_arr( xn_buf_fx_32, L_frame ) + // q_xn_buf_fx_32 - q_win == 16 - L_norm_arr( xn_buf_fx_32, L_frame ) + // q_win == - 16 + L_norm_arr( xn_buf_fx_32, L_frame ) + q_xn_buf_fx_32 + q_win = s_min( q_win, add( sub( q_xn_buf_fx_32, 16 ), L_norm_arr( xn_buf_fx_32, L_frame ) ) - 2 ); + } + IMDCT_ivas_fx_rescale( xn_buf_fx, &q_xn_buf_fx, syn_Overl_TDAC_fx, Q_syn_Overl_TDAC_fx, old_syn_overl_fx, Q_old_syn_overl_fx, hTcxDec->old_syn_Overl, &hTcxDec->Q_old_syn_Overl, old_out_fx, q_old_out_fx, q_win, FB_flag ); +#endif + Word16 q_diff = sub( q_xn_buf_fx_32, q_win ); FOR( Word16 ind = 0; ind < L_frame; ind++ ) { + assert( extract_h( L_shr( xn_buf_fx_32[( ind + ( overlap / 2 ) ) + nz], q_diff ) ) == 0 || extract_h( L_shr( xn_buf_fx_32[( ind + ( overlap / 2 ) ) + nz], q_diff ) ) == -1 ); xn_buf_fx[( ind + ( overlap / 2 ) ) + nz] = extract_l( L_shr( xn_buf_fx_32[( ind + ( overlap / 2 ) ) + nz], q_diff ) ); move16(); } +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + window_ola_fx( tmp_fx_32, xn_buf_fx, &q_tmp_fx_32, old_out_fx, q_old_out_fx, L_frame, hTcxCfg->tcx_last_overlap_mode, hTcxCfg->tcx_curr_overlap_mode, 0, 0, NULL ); +#else window_ola_fx( tmp_fx_32, xn_buf_fx, &q_tmp_fx_32, old_out_fx, &q_old_out, L_frame, hTcxCfg->tcx_last_overlap_mode, hTcxCfg->tcx_curr_overlap_mode, 0, 0, NULL ); q_diff = sub( q_old_out, q_win ); - +#endif +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + if ( allow_qwin_change ) + { + // sub( q_tmp_fx_32, q_win ) == -norm_arr( xn_buf_fx, L_frame ) + // q_tmp_fx_32 - q_win == -norm_arr( xn_buf_fx, L_frame ) + // q_win == q_tmp_fx_32 + norm_arr( xn_buf_fx, L_frame ) + q_win = s_min( q_win, add( q_tmp_fx_32, norm_arr( xn_buf_fx, L_frame ) ) ); + } +#endif Word16 diff = sub( q_tmp_fx_32, q_win ); FOR( Word16 ind = 0; ind < L_frame; ind++ ) { +#ifndef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + old_out_fx[ind] = shr_sat( old_out_fx[ind], q_diff ); move16(); +#endif xn_buf_fx[ind] = shr_sat( xn_buf_fx[ind], diff ); move16(); } @@ -2939,6 +3523,40 @@ void IMDCT_ivas_fx( move16(); } +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + IF( allow_qwin_change && fullbandScale ) + { + q_win = s_min( q_win, norm_arr( acelp_zir_fx, shr( L_frame_glob, 1 ) ) + *q_acelp_zir_fx ); + } + IF( EQ_16( st->element_mode, IVAS_CPE_MDCT ) && ( LE_32( st->last_core_brate, SID_2k40 ) || st->last_core == ACELP_CORE ) && ( fullbandScale == 0 ) ) + { + /* Increase headroom because if the ACELP ZIR is used below, the synthesis filter gain is unknown. */ + IF( allow_qwin_change ) + { + allow_qwin_change = 0; + move16(); + q_win = s_max( -2, sub( q_win, 1 ) ); + } + } + + IF( EQ_16( kernel_type, MDST_IV ) ) + { + TCX_MDST_Inverse_qwin_fx( x_fx, x_e_hdrm, xn_buf_fx, overlap, L_frame - overlap, overlap, &q_win, allow_qwin_change ); + } + ELSE IF( NE_16( kernel_type, MDCT_IV ) ) /* type 1 or 2 */ + { + TCX_MDXT_Inverse_qwin_fx( x_fx, x_e_hdrm, xn_buf_fx, overlap, L_frame - overlap, overlap, kernel_type, &q_win, allow_qwin_change ); + } + ELSE + { + TCX_MDCT_Inverse_qwin_fx( x_fx, x_e_hdrm, xn_buf_fx, overlap, L_frame - overlap, overlap, st->element_mode, &q_win, allow_qwin_change ); + } + // Because xn_buf_fx is overwritten above. + q_xn_buf_fx = q_win; + move16(); + + IMDCT_ivas_fx_rescale( xn_buf_fx, &q_xn_buf_fx, syn_Overl_TDAC_fx, Q_syn_Overl_TDAC_fx, old_syn_overl_fx, Q_old_syn_overl_fx, hTcxDec->old_syn_Overl, &hTcxDec->Q_old_syn_Overl, old_out_fx, q_old_out_fx, q_win, FB_flag ); +#else IF( EQ_16( kernel_type, MDST_IV ) ) { TCX_MDST_Inverse_fx( x_fx, x_e_hdrm, xn_buf_fx, overlap, L_frame - overlap, overlap ); @@ -2951,7 +3569,7 @@ void IMDCT_ivas_fx( { TCX_MDCT_Inverse( x_fx, x_e_hdrm, xn_buf_fx, overlap, L_frame - overlap, overlap, st->element_mode ); } - +#endif /*-----------------------------------------------------------* * Windowing, overlap and add * *-----------------------------------------------------------*/ @@ -2983,7 +3601,16 @@ void IMDCT_ivas_fx( move16(); } } - +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + IF( !fullbandScale ) + { + *q_acelp_zir_fx = q_xn_buf_fx; + } + ELSE + { + scale_sig( acelp_zir_fx, shr( L_frame_glob, 1 ), sub( q_xn_buf_fx, *q_acelp_zir_fx ) ); + } +#endif /* Window current frame */ tcx_windowing_synthesis_current_frame( xn_buf_fx, tcx_aldo_window_2_fx, tcx_mdct_window_half_fx, tcx_mdct_window_minimum_fx, overlap, tcx_mdct_window_half_length, tcx_mdct_window_min_length, left_rect, hTcxCfg->tcx_last_overlap_mode, acelp_zir_fx, hTcxDec->old_syn_Overl, syn_Overl_TDAC_fx, st->old_Aq_12_8_fx, tcx_mdct_window_trans_fx, shr( L_frame_glob, 1 ), acelp_mem_len, st->last_core_bfi, (Word8) st->last_is_cng, fullbandScale ); } @@ -3008,9 +3635,18 @@ void IMDCT_ivas_fx( IF( hTcxCfg->last_aldo != 0 ) { +#if defined( FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD ) && !defined( FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN ) + Word16 tmp_old_out; + Word16 q_diff = sub( *q_old_out_fx, q_win ); +#endif FOR( i = 0; i < sub( overlap, tcx_mdct_window_min_length ); i++ ) { - xn_buf_fx[( i + ( ( overlap / 2 ) - tcx_offset ) )] = add_sat( xn_buf_fx[( i + ( ( overlap / 2 ) - tcx_offset ) )], old_out_fx[( i + nz )] ); // Q(-2) +#if defined( FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD ) && !defined( FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN ) + tmp_old_out = shr_sat( old_out_fx[( i + nz )], q_diff ); + xn_buf_fx[( i + ( ( overlap / 2 ) - tcx_offset ) )] = add_sat( xn_buf_fx[( i + ( ( overlap / 2 ) - tcx_offset ) )], tmp_old_out ); // q_win +#else + xn_buf_fx[( i + ( ( overlap / 2 ) - tcx_offset ) )] = add_sat( xn_buf_fx[( i + ( ( overlap / 2 ) - tcx_offset ) )], old_out_fx[( i + nz )] ); // Q(-2) +#endif move16(); } @@ -3023,17 +3659,32 @@ void IMDCT_ivas_fx( // tested FOR( ; i < overlap; i++ ) /* perfectly reconstructing ALDO shortening */ { - xn_buf_fx[( ( i + ( overlap / 2 ) ) - tcx_offset )] = add_sat( xn_buf_fx[( ( i + ( overlap / 2 ) ) - tcx_offset )], old_out_fx[( i + nz )] ); // Q(-2) +#if defined( FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD ) && !defined( FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN ) + tmp_old_out = shr_sat( old_out_fx[( i + nz )], q_diff ); + xn_buf_fx[( ( i + ( overlap / 2 ) ) - tcx_offset )] = add_sat( xn_buf_fx[( ( i + ( overlap / 2 ) ) - tcx_offset )], tmp_old_out ); // q_win +#else + xn_buf_fx[( ( i + ( overlap / 2 ) ) - tcx_offset )] = add_sat( xn_buf_fx[( ( i + ( overlap / 2 ) ) - tcx_offset )], old_out_fx[( i + nz )] ); // q_win +#endif move16(); } FOR( i = 0; i < ( tcx_mdct_window_min_length / 2 ); i++ ) { - xn_buf_fx[( ( ( i + ( overlap / 2 ) ) - tcx_offset ) + overlap )] = add_sat( xn_buf_fx[( ( ( i + ( overlap / 2 ) ) - tcx_offset ) + overlap )], mult_r( old_out_fx[( ( i + nz ) + overlap )], tcx_mdct_window_minimum_fx[i].v.re ) ); // Q(-2) +#if defined( FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD ) && !defined( FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN ) + tmp_old_out = shr_sat( old_out_fx[( ( i + nz ) + overlap )], q_diff ); + xn_buf_fx[( ( ( i + ( overlap / 2 ) ) - tcx_offset ) + overlap )] = add_sat( xn_buf_fx[( ( ( i + ( overlap / 2 ) ) - tcx_offset ) + overlap )], mult_r( tmp_old_out, tcx_mdct_window_minimum_fx[i].v.re ) ); // q_win +#else + xn_buf_fx[( ( ( i + ( overlap / 2 ) ) - tcx_offset ) + overlap )] = add_sat( xn_buf_fx[( ( ( i + ( overlap / 2 ) ) - tcx_offset ) + overlap )], mult_r( old_out_fx[( ( i + nz ) + overlap )], tcx_mdct_window_minimum_fx[i].v.re ) ); // q_win +#endif move16(); } FOR( ; i < tcx_mdct_window_min_length; i++ ) { - xn_buf_fx[( ( ( i + ( overlap / 2 ) ) - tcx_offset ) + overlap )] = add_sat( xn_buf_fx[( ( ( i + ( overlap / 2 ) ) - tcx_offset ) + overlap )], mult_r( old_out_fx[( ( i + nz ) + overlap )], tcx_mdct_window_minimum_fx[( tcx_mdct_window_min_length - ( 1 + i ) )].v.im ) ); // Q(-2) +#if defined( FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD ) && !defined( FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN ) + tmp_old_out = shr_sat( old_out_fx[( ( i + nz ) + overlap )], q_diff ); + xn_buf_fx[( ( ( i + ( overlap / 2 ) ) - tcx_offset ) + overlap )] = add_sat( xn_buf_fx[( ( ( i + ( overlap / 2 ) ) - tcx_offset ) + overlap )], mult_r( tmp_old_out, tcx_mdct_window_minimum_fx[( tcx_mdct_window_min_length - ( 1 + i ) )].v.im ) ); // q_win +#else + xn_buf_fx[( ( ( i + ( overlap / 2 ) ) - tcx_offset ) + overlap )] = add_sat( xn_buf_fx[( ( ( i + ( overlap / 2 ) ) - tcx_offset ) + overlap )], mult_r( old_out_fx[( ( i + nz ) + overlap )], tcx_mdct_window_minimum_fx[( tcx_mdct_window_min_length - ( 1 + i ) )].v.im ) ); // q_win +#endif move16(); } } @@ -3041,12 +3692,22 @@ void IMDCT_ivas_fx( { FOR( ; i < ( overlap - ( tcx_mdct_window_min_length / 2 ) ); i++ ) { - xn_buf_fx[( ( i + ( overlap / 2 ) ) - tcx_offset )] = add_sat( xn_buf_fx[( ( i + ( overlap / 2 ) ) - tcx_offset )], mult_r( old_out_fx[( i + nz )], tcx_mdct_window_minimum_fx[( ( tcx_mdct_window_min_length - overlap ) + i )].v.re ) ); // Q(-2) +#if defined( FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD ) && !defined( FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN ) + tmp_old_out = shr_sat( old_out_fx[( i + nz )], q_diff ); + xn_buf_fx[( ( i + ( overlap / 2 ) ) - tcx_offset )] = add_sat( xn_buf_fx[( ( i + ( overlap / 2 ) ) - tcx_offset )], mult_r( tmp_old_out, tcx_mdct_window_minimum_fx[( ( tcx_mdct_window_min_length - overlap ) + i )].v.re ) ); // Q(-2) +#else + xn_buf_fx[( ( i + ( overlap / 2 ) ) - tcx_offset )] = add_sat( xn_buf_fx[( ( i + ( overlap / 2 ) ) - tcx_offset )], mult_r( old_out_fx[( i + nz )], tcx_mdct_window_minimum_fx[( ( tcx_mdct_window_min_length - overlap ) + i )].v.re ) ); // Q(-2) +#endif move16(); } FOR( ; i < overlap; i++ ) { - xn_buf_fx[( i + ( ( overlap / 2 ) - tcx_offset ) )] = add_sat( xn_buf_fx[( ( i + ( overlap / 2 ) ) - tcx_offset )], mult_r( old_out_fx[( i + nz )], tcx_mdct_window_minimum_fx[( overlap - ( 1 + i ) )].v.im ) ); // Q(-2) +#if defined( FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD ) && !defined( FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN ) + tmp_old_out = shr_sat( old_out_fx[( i + nz )], q_diff ); + xn_buf_fx[( i + ( ( overlap / 2 ) - tcx_offset ) )] = add_sat( xn_buf_fx[( ( i + ( overlap / 2 ) ) - tcx_offset )], mult_r( tmp_old_out, tcx_mdct_window_minimum_fx[( overlap - ( 1 + i ) )].v.im ) ); // Q(-2) +#else + xn_buf_fx[( i + ( ( overlap / 2 ) - tcx_offset ) )] = add_sat( xn_buf_fx[( ( i + ( overlap / 2 ) ) - tcx_offset )], mult_r( old_out_fx[( i + nz )], tcx_mdct_window_minimum_fx[( overlap - ( 1 + i ) )].v.im ) ); // Q(-2) +#endif move16(); } } @@ -3095,7 +3756,9 @@ void IMDCT_ivas_fx( /* Compute windowed synthesis in case of switching to ALDO windows in next frame */ Copy( xn_buf_fx + sub( L_frame, nz ), old_out_fx, add( nz, overlap ) ); set16_fx( old_out_fx + add( nz, overlap ), 0, nz ); - +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + *q_old_out_fx = q_win; +#endif tcx_windowing_synthesis_past_frame( old_out_fx + nz, tcx_aldo_window_1_trunc_fx, tcx_mdct_window_half_fx, tcx_mdct_window_minimum_fx, overlap, tcx_mdct_window_half_length, tcx_mdct_window_min_length, hTcxCfg->tcx_curr_overlap_mode ); /* If current overlap mode = FULL_OVERLAP -> ALDO_WINDOW */ @@ -3169,6 +3832,10 @@ void IMDCT_ivas_fx( } } } +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + *pq_win = q_win; + move16(); +#endif return; } @@ -3588,6 +4255,7 @@ void decoder_tcx_ivas_fx( decoder_tcx_tns_fx( st, L_frame_glob, L_spec, L_frame, L_frameTCX, x_fx, fUseTns, &tnsData, bfi, frame_cnt, 0, NULL ); +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN Scale_sig32( x_fx, N_MAX, sub( x_e, 20 ) ); // Scaling x_fx to Q11 Scale_sig( xn_buf_fx, L_MDCT_OVLP_MAX + L_FRAME_PLUS + L_MDCT_OVLP_MAX, sub( st->Q_syn, 14 ) ); // Scaling xn_buf_fx to Q_syn x_e = sub( 31, 11 ); @@ -3599,6 +4267,43 @@ void decoder_tcx_ivas_fx( move16(); } + Copy_Scale_sig_16_32_no_sat( st->old_Aq_12_8_fx, st->old_Aq_12_8_fx_32, M + 1, ( sub( 28, ( sub( 15, norm_s( sub( st->old_Aq_12_8_fx[0], 1 ) ) ) ) ) ) ); + + Word16 q_win, q_winFB; + + q_win = st->Q_syn; + move16(); + q_winFB = st->Q_syn; + move16(); + + assert( q_win == 0 ); + + Scale_sig( synth_fx, L_frame_glob, sub( q_win, st->Q_syn ) ); // Scaling to Q_syn + Scale_sig( synthFB_fx, L_frameTCX_glob, sub( q_winFB, st->Q_syn ) ); // Scaling to Q_syn + + decoder_tcx_imdct_fx( st, L_frame_glob, L_frameTCX_glob, L_spec, tcx_offset, tcx_offsetFB, L_frame, L_frameTCX, left_rect, &x_fx[0], q_x, xn_buf_fx, &q_win, &q_winFB, MDCT_IV, + fUseTns, &synth_fx[0], &synthFB_fx[0], bfi, frame_cnt, sba_dirac_stereo_flag ); + + /* Scaling up again */ + Scale_sig( synth_fx, L_frame_glob, sub( st->Q_syn, q_win ) ); + Scale_sig( synthFB_fx, L_frameTCX_glob, sub( st->Q_syn, q_winFB ) ); + // Scale_sig( st->hTcxDec->syn_Overl, L_FRAME32k / 2, 1 ); + + Scale_sig( st->hTcxDec->old_syn_Overl, 320, ( -2 - st->hTcxDec->Q_old_syn_Overl ) ); // Scaling to Q-2 + st->hTcxDec->Q_old_syn_Overl = -2; + +#else + Scale_sig32( x_fx, N_MAX, sub( x_e, 20 ) ); // Scaling x_fx to Q11 + Scale_sig( xn_buf_fx, L_MDCT_OVLP_MAX + L_FRAME_PLUS + L_MDCT_OVLP_MAX, sub( st->Q_syn, 14 ) ); // Scaling xn_buf_fx to Q_syn + x_e = sub( 31, 11 ); + + IF( st->igf != 0 ) + { + Scale_sig32( st->hIGFDec->virtualSpec, ( N_MAX_TCX - IGF_START_MN ), ( sub( st->hIGFDec->virtualSpec_e, x_e ) ) ); + st->hIGFDec->virtualSpec_e = x_e; + move16(); + } + /* Scaling down buffers for decoder_tcx_imdct_fx*/ Scale_sig( st->hTcxDec->syn_Overl_TDACFB, L_FRAME_MAX / 2, sub( st->Q_syn, st->hTcxDec->Q_syn_Overl_TDACFB ) ); // Scaling to Q_syn st->hTcxDec->Q_syn_Overl_TDACFB = st->Q_syn; @@ -3606,12 +4311,14 @@ void decoder_tcx_ivas_fx( Scale_sig( st->hTcxDec->syn_Overl_TDAC, L_FRAME32k / 2, sub( st->Q_syn, st->hTcxDec->Q_syn_Overl_TDAC ) ); // Scaling to Q_syn st->hTcxDec->Q_syn_Overl_TDAC = st->Q_syn; move16(); - Scale_sig( st->hHQ_core->old_out_fx, L_FRAME48k, negate( st->hHQ_core->Q_old_wtda ) ); // Scaling to Q_syn - Scale_sig( st->hHQ_core->old_out_LB_fx, L_FRAME32k, negate( st->hHQ_core->Q_old_wtda_LB ) ); // Scaling to Q_syn +#ifndef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + Scale_sig( st->hHQ_core->old_out_fx, L_FRAME48k, negate( st->hHQ_core->Q_old_wtda ) ); // Scaling to Q0 + Scale_sig( st->hHQ_core->old_out_LB_fx, L_FRAME32k, negate( st->hHQ_core->Q_old_wtda_LB ) ); // Scaling to Q0 st->hHQ_core->Q_old_wtda = 0; move16(); st->hHQ_core->Q_old_wtda_LB = 0; move16(); +#endif Scale_sig( st->hTcxDec->old_syn_Overl, 320, st->Q_syn - st->hTcxDec->Q_old_syn_Overl ); // Scaling to Q_syn st->hTcxDec->Q_old_syn_Overl = st->Q_syn; @@ -3627,12 +4334,15 @@ void decoder_tcx_ivas_fx( Scale_sig( synth_fx, L_frame_glob, negate( st->Q_syn ) ); Scale_sig( synthFB_fx, L_frameTCX_glob, negate( st->Q_syn ) ); // Scale_sig( st->hTcxDec->syn_Overl, L_FRAME32k / 2, 1 ); +#ifndef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD st->hHQ_core->Q_old_wtda = st->Q_syn; // Scale_sig( st->hHQ_core->old_out_LB_fx, L_FRAME32k, ( sub( st->hHQ_core->Q_old_wtda, st->Q_syn ) ) ); st->hHQ_core->Q_old_wtda_LB = st->Q_syn; // Scale_sig( st->hHQ_core->old_out_fx, L_FRAME48k, ( sub( st->hHQ_core->Q_old_wtda, st->Q_syn ) ) ); +#endif Scale_sig( st->hTcxDec->old_syn_Overl, 320, ( -2 - st->hTcxDec->Q_old_syn_Overl ) ); // Scaling to Q-2 st->hTcxDec->Q_old_syn_Overl = -2; +#endif } /*-------------------------------------------------------------------* @@ -5039,7 +5749,12 @@ void decoder_tcx_imdct_fx( Word32 x_fx[N_MAX], // Q(11) Word16 q_x, Word16 xn_buf_fx[], // Q(-2) +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + Word16 *q_win, + Word16 *q_winFB, +#else Word16 q_win, +#endif const UWord16 kernelType, /* i : TCX transform kernel type */ const Word16 fUseTns, /* i : flag that is set if TNS data is present */ Word16 synth_fx[], // Q(-2) /* i/o: synth[-M..L_frame] */ @@ -5065,7 +5780,11 @@ void decoder_tcx_imdct_fx( Word16 q_a_itf = 15; Word16 x_e = sub( 31, q_x ); move16(); +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + Word16 q_acelp_zir_fx = 0; +#else Word16 shift_q = sub( q_x, q_win ); +#endif /*-----------------------------------------------------------------* * Initializations @@ -5218,9 +5937,17 @@ void decoder_tcx_imdct_fx( IF( NE_16( st->element_mode, IVAS_CPE_DFT ) && !sba_dirac_stereo_flag ) { +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + IMDCT_ivas_fx( xn_bufFB_fx, q_x, hTcxDec->syn_Overl, &hTcxDec->Q_syn_Overl, hTcxDec->syn_Overl_TDAC, &hTcxDec->Q_syn_Overl_TDAC, xn_buf_fx, *q_win, hTcxCfg->tcx_aldo_window_1, hTcxCfg->tcx_aldo_window_1_trunc, hTcxCfg->tcx_aldo_window_2, +#else IMDCT_ivas_fx( xn_bufFB_fx, q_x, hTcxDec->syn_Overl, hTcxDec->syn_Overl_TDAC, xn_buf_fx, hTcxCfg->tcx_aldo_window_1, hTcxCfg->tcx_aldo_window_1_trunc, hTcxCfg->tcx_aldo_window_2, +#endif hTcxCfg->tcx_mdct_window_half, hTcxCfg->tcx_mdct_window_minimum, hTcxCfg->tcx_mdct_window_trans, hTcxCfg->tcx_mdct_window_half_length, hTcxCfg->tcx_mdct_window_min_length, index, +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + kernelType, left_rect, tcx_offset, overlap, L_frame, L_frameTCX, shr( s_max( L_frameTCX, L_spec ), 1 ), L_frame_glob, frame_cnt, bfi, st->hHQ_core->old_out_LB_fx, &st->hHQ_core->Q_old_wtda_LB, 0, st, 0, acelp_zir_fx, &q_acelp_zir_fx, q_win ); +#else kernelType, left_rect, tcx_offset, overlap, L_frame, L_frameTCX, shr( s_max( L_frameTCX, L_spec ), 1 ), L_frame_glob, frame_cnt, bfi, st->hHQ_core->old_out_LB_fx, 0, st, 0, acelp_zir_fx, q_win ); +#endif } /* Generate additional comfort noise to mask potential coding artefacts */ @@ -5240,14 +5967,32 @@ void decoder_tcx_imdct_fx( IF( EQ_16( st->element_mode, IVAS_CPE_DFT ) || ( sba_dirac_stereo_flag != 0 ) ) { Copy32( x_fx, xn_bufFB_fx, s_max( L_spec, s_max( L_frame, L_frameTCX ) ) ); // q_x - +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + IMDCT_ivas_fx( xn_bufFB_fx, q_x, hTcxDec->syn_Overl, &hTcxDec->Q_syn_Overl, hTcxDec->syn_Overl_TDAC, &hTcxDec->Q_syn_Overl_TDAC, xn_buf_fx, *q_win, hTcxCfg->tcx_aldo_window_1, hTcxCfg->tcx_aldo_window_1_trunc, hTcxCfg->tcx_aldo_window_2, hTcxCfg->tcx_mdct_window_half, hTcxCfg->tcx_mdct_window_minimum, hTcxCfg->tcx_mdct_window_trans, hTcxCfg->tcx_mdct_window_half_length, hTcxCfg->tcx_mdct_window_min_length, index, +#else IMDCT_ivas_fx( xn_bufFB_fx, q_x, hTcxDec->syn_Overl, hTcxDec->syn_Overl_TDAC, xn_buf_fx, hTcxCfg->tcx_aldo_window_1, hTcxCfg->tcx_aldo_window_1_trunc, hTcxCfg->tcx_aldo_window_2, hTcxCfg->tcx_mdct_window_half, hTcxCfg->tcx_mdct_window_minimum, hTcxCfg->tcx_mdct_window_trans, hTcxCfg->tcx_mdct_window_half_length, hTcxCfg->tcx_mdct_window_min_length, index, +#endif +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + kernelType, left_rect, tcx_offset, overlap, L_frame, L_frameTCX, shr( s_max( L_frameTCX, L_spec ), 1 ), L_frame_glob, frame_cnt, bfi, st->hHQ_core->old_out_LB_fx, &st->hHQ_core->Q_old_wtda_LB, 0, st, 0, acelp_zir_fx, &q_acelp_zir_fx, q_win ); +#else kernelType, left_rect, tcx_offset, overlap, L_frame, L_frameTCX, shr( s_max( L_frameTCX, L_spec ), 1 ), L_frame_glob, frame_cnt, bfi, st->hHQ_core->old_out_LB_fx, 0, st, 0, acelp_zir_fx, q_win ); +#endif } +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + Word16 shift_q, q_x16; + + shift_q = L_norm_arr( xn_bufFB_fx, L_MDCT_OVLP_MAX + L_FRAME_PLUS + L_MDCT_OVLP_MAX ); + shift_q = sub( 16, shift_q ); + q_x16 = sub( q_x, shift_q ); +#endif + FOR( Word16 ind = 0; ind < L_MDCT_OVLP_MAX + L_FRAME_PLUS + L_MDCT_OVLP_MAX; ind++ ) { - xn_bufFB_fx_16[ind] = extract_l( L_shr( xn_bufFB_fx[ind], shift_q ) ); // q_x +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + assert( extract_h( L_shr( xn_bufFB_fx[ind], shift_q ) ) == 0 || extract_h( L_shr( xn_bufFB_fx[ind], shift_q ) ) == -1 ); +#endif + xn_bufFB_fx_16[ind] = extract_l( L_shr( xn_bufFB_fx[ind], shift_q ) ); // q_x16 move16(); } @@ -5257,19 +6002,40 @@ void decoder_tcx_imdct_fx( IF( st->element_mode != EVS_MONO ) { +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + IMDCT_ivas_fx( x_tmp_fx, q_x, hTcxDec->syn_OverlFB, &hTcxDec->Q_syn_OverlFB, hTcxDec->syn_Overl_TDACFB, &hTcxDec->Q_syn_Overl_TDACFB, xn_bufFB_fx_16, q_x16, hTcxCfg->tcx_aldo_window_1_FB, hTcxCfg->tcx_aldo_window_1_FB_trunc, hTcxCfg->tcx_aldo_window_2_FB, +#else IMDCT_ivas_fx( x_tmp_fx, q_x, hTcxDec->syn_OverlFB, hTcxDec->syn_Overl_TDACFB, xn_bufFB_fx_16, hTcxCfg->tcx_aldo_window_1_FB, hTcxCfg->tcx_aldo_window_1_FB_trunc, hTcxCfg->tcx_aldo_window_2_FB, +#endif hTcxCfg->tcx_mdct_window_halfFB, hTcxCfg->tcx_mdct_window_minimumFB, hTcxCfg->tcx_mdct_window_transFB, hTcxCfg->tcx_mdct_window_half_lengthFB, hTcxCfg->tcx_mdct_window_min_lengthFB, index, +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + kernelType, left_rect, tcx_offsetFB, overlapFB, L_frameTCX, L_frameTCX, shr( s_max( L_frameTCX, L_spec ), 1 ), L_frameTCX_glob, frame_cnt, bfi, st->hHQ_core->old_out_fx, &st->hHQ_core->Q_old_wtda, 1, st, ratio, acelp_zir_fx, &q_acelp_zir_fx, q_winFB ); +#else kernelType, left_rect, tcx_offsetFB, overlapFB, L_frameTCX, L_frameTCX, shr( s_max( L_frameTCX, L_spec ), 1 ), L_frameTCX_glob, frame_cnt, bfi, st->hHQ_core->old_out_fx, 1, st, ratio, acelp_zir_fx, q_win ); +#endif } ELSE { - +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + IMDCT_ivas_fx( x_fx, q_x, hTcxDec->syn_OverlFB, &hTcxDec->Q_syn_OverlFB, hTcxDec->syn_Overl_TDACFB, &hTcxDec->Q_syn_Overl_TDACFB, xn_bufFB_fx_16, q_x16, hTcxCfg->tcx_aldo_window_1_FB, hTcxCfg->tcx_aldo_window_1_FB_trunc, hTcxCfg->tcx_aldo_window_2_FB, hTcxCfg->tcx_mdct_window_halfFB, hTcxCfg->tcx_mdct_window_minimumFB, hTcxCfg->tcx_mdct_window_transFB, hTcxCfg->tcx_mdct_window_half_lengthFB, hTcxCfg->tcx_mdct_window_min_lengthFB, index, +#else IMDCT_ivas_fx( x_fx, q_x, hTcxDec->syn_OverlFB, hTcxDec->syn_Overl_TDACFB, xn_bufFB_fx_16, hTcxCfg->tcx_aldo_window_1_FB, hTcxCfg->tcx_aldo_window_1_FB_trunc, hTcxCfg->tcx_aldo_window_2_FB, hTcxCfg->tcx_mdct_window_halfFB, hTcxCfg->tcx_mdct_window_minimumFB, hTcxCfg->tcx_mdct_window_transFB, hTcxCfg->tcx_mdct_window_half_lengthFB, hTcxCfg->tcx_mdct_window_min_lengthFB, index, +#endif +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + kernelType, left_rect, tcx_offsetFB, overlapFB, L_frameTCX, L_frameTCX, shr( s_max( L_frameTCX, L_spec ), 1 ), L_frameTCX_glob, frame_cnt, bfi, st->hHQ_core->old_out_fx, &st->hHQ_core->Q_old_wtda, 1, st, ratio, acelp_zir_fx, &q_acelp_zir_fx, q_winFB ); +#else kernelType, left_rect, tcx_offsetFB, overlapFB, L_frameTCX, L_frameTCX, shr( s_max( L_frameTCX, L_spec ), 1 ), L_frameTCX_glob, frame_cnt, bfi, st->hHQ_core->old_out_fx, 1, st, ratio, acelp_zir_fx, q_win ); +#endif } + +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + shift_q = 16; + move16(); +#endif + FOR( Word16 ind = 0; ind < L_MDCT_OVLP_MAX + L_FRAME_PLUS + L_MDCT_OVLP_MAX; ind++ ) { - xn_bufFB_fx[ind] = L_shl( L_deposit_l( xn_bufFB_fx_16[ind] ), shift_q ); // Q_x + xn_bufFB_fx[ind] = L_shl( L_deposit_l( xn_bufFB_fx_16[ind] ), shift_q ); // q_winFB } IF( ( bfi == 0 ) ) @@ -5311,18 +6077,21 @@ void decoder_tcx_imdct_fx( /* Update old_syn_overl */ IF( hTcxCfg->last_aldo == 0 ) { - Copy( xn_buf_fx + L_frame, hTcxDec->syn_Overl, overlap ); // Q(-2) + Copy( xn_buf_fx + L_frame, hTcxDec->syn_Overl, overlap ); // q_win FOR( Word16 ind = 0; ind < overlapFB; ind++ ) { - hTcxDec->syn_OverlFB[ind] = extract_l( L_shr( xn_bufFB_fx[( ind + L_frameTCX )], shift_q ) ); // q_x + assert( extract_h( L_shr( xn_bufFB_fx[( ind + L_frameTCX )], shift_q ) ) == 0 || extract_h( L_shr( xn_bufFB_fx[( ind + L_frameTCX )], shift_q ) ) == -1 ); + hTcxDec->syn_OverlFB[ind] = extract_l( L_shr( xn_bufFB_fx[( ind + L_frameTCX )], shift_q ) ); // q_winFB } } /* Output */ - Copy( xn_buf_fx + sub( shr( overlap, 1 ), tcx_offset ), synth_fx, L_frame_glob ); // Q(-2) + Copy( xn_buf_fx + sub( shr( overlap, 1 ), tcx_offset ), synth_fx, L_frame_glob ); // q_win FOR( Word16 ind = 0; ind < L_frameTCX_glob; ind++ ) { - synthFB_fx[ind] = extract_l( L_shr( xn_bufFB_fx[( ind + ( ( overlapFB >> 1 ) - tcx_offsetFB ) )], shift_q ) ); // q_x + assert( extract_h( L_shr( xn_bufFB_fx[( ind + ( ( overlapFB >> 1 ) - tcx_offsetFB ) )], shift_q ) ) == 0 || + extract_h( L_shr( xn_bufFB_fx[( ind + ( ( overlapFB >> 1 ) - tcx_offsetFB ) )], shift_q ) ) == -1 ); + synthFB_fx[ind] = extract_l( L_shr( xn_bufFB_fx[( ind + ( ( overlapFB >> 1 ) - tcx_offsetFB ) )], shift_q ) ); // q_winFB } diff --git a/lib_dec/er_dec_acelp_fx.c b/lib_dec/er_dec_acelp_fx.c index b6e5f31711327a47d27ed1fcb85c40249c4addf1..470053c6a063550c32b07461961700494f8c4f44 100644 --- a/lib_dec/er_dec_acelp_fx.c +++ b/lib_dec/er_dec_acelp_fx.c @@ -1018,6 +1018,10 @@ void con_acelp_fx( Copy( syn, synth, st->L_frame ); bufferCopyFx( syn + sub( st->L_frame, shr( st->L_frame, 1 ) ), hTcxDec->old_syn_Overl, shr( st->L_frame, 1 ), 0 /*Qf_syn*/, -1 /*Qf_old_xnq*/, 0, 0 /*Q_old_xnq*/ ); +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + st->hTcxDec->Q_old_syn_Overl = sub( Qf_syn, 1 ); + move16(); +#endif /* save last half frame if next frame is TCX */ bufferCopyFx( syn + st->L_frame, hTcxDec->syn_Overl_TDAC, shr( st->L_frame, 1 ), 0 /*Qf_syn*/, -1 /*Qf_old_xnq*/, 0, 0 /*Q_old_xnq*/ ); @@ -1057,11 +1061,9 @@ void con_acelp_fx( move16(); } set16_fx( &hHQ_core->old_out_LB_fx[( W1 + n )], 0, n ); - hHQ_core->Q_old_wtda = hHQ_core->Q_old_wtda_LB; move16(); - FOR( i = 0; i < W2; i++ ) { buf[i] = mult_r( hTcxDec->syn_Overl_TDAC[i], w[i].v.re ); /*hTcxDec->Q_syn_Overl_TDAC*/ @@ -1104,7 +1106,15 @@ void con_acelp_fx( st->hTcxDec->Q_syn_Overl_TDACFB = st->hTcxDec->Q_syn_Overl_TDAC; move16(); lerp( hTcxDec->syn_Overl, hTcxDec->syn_OverlFB, shr( hTcxDec->L_frameTCX, 1 ), shr( st->L_frame, 1 ) ); +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + hTcxDec->Q_syn_OverlFB = hTcxDec->Q_syn_Overl; + move16(); +#endif lerp( hHQ_core->old_out_LB_fx, hHQ_core->old_out_fx, hTcxDec->L_frameTCX, st->L_frame ); +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + hHQ_core->Q_old_wtda = hHQ_core->Q_old_wtda_LB; + move16(); +#endif /* copy total excitation exc2 as 16kHz for acelp mode1 decoding */ IF( st->hWIDec != NULL ) diff --git a/lib_dec/er_dec_tcx_fx.c b/lib_dec/er_dec_tcx_fx.c index 3d0eab8153845c06cda2e6562038b4da91a1b2c2..37465bc294ad33a91ad7a5f3ced6dfbe86d80d57 100644 --- a/lib_dec/er_dec_tcx_fx.c +++ b/lib_dec/er_dec_tcx_fx.c @@ -843,6 +843,10 @@ void con_tcx_fx( lerp( syn + sub( L_frame, shr( L_frame, 1 ) ), hTcxDec->old_syn_Overl, shr( st->L_frame, 1 ), shr( L_frame, 1 ) ); +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + st->hTcxDec->Q_old_syn_Overl = Q_syn; + move16(); +#endif Copy( syn + sub( L_frame, n ), hHQ_core->old_out_fx, sub( L_frame, n ) ); /*Q_syn*/ FOR( i = 0; i < W12; i++ ) @@ -867,7 +871,13 @@ void con_tcx_fx( bufferCopyFx( syn, synth, L_frame, Q_syn, 0, 0, 0 ); /*Q_syn*/ BASOP_SATURATE_WARNING_ON_EVS +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + Copy_Scale_sig( syn + L_frame, hTcxDec->syn_OverlFB, shr( L_frame, 1 ), sub( 0, Q_syn ) ); /*Q0*/ + hTcxDec->Q_syn_OverlFB = 0; + move16(); +#else Copy_Scale_sig( syn + L_frame, hTcxDec->syn_OverlFB, shr( L_frame, 1 ), negate( Q_syn ) ); /*Q0*/ +#endif /* copy total excitation exc2 as 16kHz for acelp mode1 decoding */ IF( st->hWIDec != NULL ) @@ -929,11 +939,23 @@ void con_tcx_fx( /* update memory for low band */ Scale_sig( hTcxDec->old_syn_Overl, shr( st->L_frame, 1 ), sub( -1, Q_syn ) ); /*Q_syn*/ +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + hTcxDec->Q_old_syn_Overl = -1; + move16(); +#endif lerp( hTcxDec->syn_OverlFB, hTcxDec->syn_Overl, shr( st->L_frame, 1 ), shr( L_frame, 1 ) ); +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + hTcxDec->Q_syn_Overl = hTcxDec->Q_syn_OverlFB; + move16(); +#endif lerp( hTcxDec->syn_Overl_TDACFB, hTcxDec->syn_Overl_TDAC, shr( st->L_frame, 1 ), shr( L_frame, 1 ) ); hTcxDec->Q_syn_Overl_TDAC = hTcxDec->Q_syn_Overl_TDACFB; move16(); lerp( st->hHQ_core->old_out_fx, st->hHQ_core->old_out_LB_fx, st->L_frame, L_frame ); +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_NO + hHQ_core->Q_old_wtda_LB = hHQ_core->Q_old_wtda; + move16(); +#endif st->old_enr_LP = Enr_1_Az_fx( A_local, L_SUBFR ); /*Q3*/ move16(); @@ -1789,6 +1811,10 @@ void con_tcx_ivas_fx( lerp( syn + sub( L_frame, shr( L_frame, 1 ) ), hTcxDec->old_syn_Overl, shr( st->L_frame, 1 ), shr( L_frame, 1 ) ); +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + hTcxDec->Q_old_syn_Overl = Q_syn; + move16(); +#endif Copy( syn + sub( L_frame, n ), hHQ_core->old_out_fx, sub( L_frame, n ) ); /*Q_syn*/ FOR( i = 0; i < W12; i++ ) @@ -1813,7 +1839,13 @@ void con_tcx_ivas_fx( bufferCopyFx( syn, synth, L_frame, Q_syn, 0, 0, 0 ); /*Q_syn*/ BASOP_SATURATE_WARNING_ON_EVS +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + Copy_Scale_sig( syn + L_frame, hTcxDec->syn_OverlFB, shr( L_frame, 1 ), sub( 0, Q_syn ) ); /*Q0*/ + hTcxDec->Q_syn_OverlFB = 0; + move16(); +#else Copy_Scale_sig( syn + L_frame, hTcxDec->syn_OverlFB, shr( L_frame, 1 ), negate( Q_syn ) ); /*Q0*/ +#endif /* copy total excitation exc2 as 16kHz for acelp mode1 decoding */ IF( st->hWIDec != NULL ) @@ -1828,9 +1860,14 @@ void con_tcx_ivas_fx( move16(); /* create aliasing and windowing need for transition to TCX10/5 */ +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + Copy( syn + L_frame, hTcxDec->syn_Overl_TDACFB, shr( L_frame, 1 ) ); + hTcxDec->Q_syn_Overl_TDACFB = Q_syn; +#else // bufferCopyFx( syn + L_frame, hTcxDec->syn_Overl_TDACFB, shr( L_frame, 1 ), Q_syn, 0, -1, 0 ); Copy_Scale_sig( syn + L_frame, hTcxDec->syn_Overl_TDACFB, shr( L_frame, 1 ), sub( Q_syn, 1 ) ); hTcxDec->Q_syn_Overl_TDACFB = sub( Q_syn, 1 ); +#endif move16(); FOR( i = 0; i < W12; i++ ) @@ -1877,14 +1914,20 @@ void con_tcx_ivas_fx( /* update memory for low band */ st->Q_syn = Q_syn; move16(); +#ifndef FIX_1348_BIT_PRECISION_IMPROVEMENT Scale_sig( hTcxDec->old_syn_Overl, shr( st->L_frame, 1 ), sub( -1, Q_syn ) ); /*Q_syn*/ st->hTcxDec->Q_old_syn_Overl = -1; +#endif lerp( hTcxDec->syn_OverlFB, hTcxDec->syn_Overl, shr( st->L_frame, 1 ), shr( L_frame, 1 ) ); +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + hTcxDec->Q_syn_Overl = hTcxDec->Q_syn_OverlFB; + move16(); +#endif lerp( hTcxDec->syn_Overl_TDACFB, hTcxDec->syn_Overl_TDAC, shr( st->L_frame, 1 ), shr( L_frame, 1 ) ); hTcxDec->Q_syn_Overl_TDAC = hTcxDec->Q_syn_Overl_TDACFB; move16(); lerp( st->hHQ_core->old_out_fx, st->hHQ_core->old_out_LB_fx, st->L_frame, L_frame ); - st->hHQ_core->Q_old_wtda_LB = Q_syn; + st->hHQ_core->Q_old_wtda_LB = Q_syn; // st->hHQ_core->Q_old_wtda st->old_enr_LP = Enr_1_Az_fx( A_local, L_SUBFR ); /*Q3*/ move16(); diff --git a/lib_dec/evs_dec_fx.c b/lib_dec/evs_dec_fx.c index 9c81fddf347529f42c78a49bbaae321ffc589306..6411eb477cb3508a4570f8b64a6a850e7a172d6e 100644 --- a/lib_dec/evs_dec_fx.c +++ b/lib_dec/evs_dec_fx.c @@ -167,7 +167,7 @@ ivas_error evs_dec_fx( tmp1 = extract_l( L_shr_r( f, s ) ); /*Q14 - s*/ FOR( i = 0; i < st_fx->hTcxCfg->tcx_mdct_window_lengthFB; i++ ) { - hTcxDec->syn_OverlFB[i] = shl_sat( mult_sat( tmp1, hTcxDec->syn_OverlFB[i] ), add( s, 1 ) ); /*hTcxDec->Q_syn_Overl*/ + hTcxDec->syn_OverlFB[i] = shl_sat( mult_sat( tmp1, hTcxDec->syn_OverlFB[i] ), add( s, 1 ) ); /*hTcxDec->Q_syn_OverlFB*/ move16(); } s = norm_l( f ); diff --git a/lib_dec/hq_core_dec_fx.c b/lib_dec/hq_core_dec_fx.c index 3c2cd5a8092c7c385a6916eae760689d1996e8b8..0161cbac0e1ad5b6f95c233ca22f1ba665229c56 100644 --- a/lib_dec/hq_core_dec_fx.c +++ b/lib_dec/hq_core_dec_fx.c @@ -1094,6 +1094,10 @@ void HQ_core_dec_init_fx( move16(); move16(); move16(); +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + hHQ_core->Q_old_out_fx32 = Q11; + move16(); +#endif hHQ_core->last_hq_core_type = -1; /* Q0 */ move16(); diff --git a/lib_dec/ivas_core_dec_fx.c b/lib_dec/ivas_core_dec_fx.c index 40012175640da7963da6eb8fbda62bc83507ad23..4def2242a8154a77d5f207239121970b24973354 100644 --- a/lib_dec/ivas_core_dec_fx.c +++ b/lib_dec/ivas_core_dec_fx.c @@ -337,6 +337,10 @@ ivas_error ivas_core_dec_fx( Copy_Scale_sig_16_32_no_sat( sts[0]->hHQ_core->old_out_LB_fx, sts[0]->hHQ_core->old_out_LB_fx32, L_FRAME32k, sub( Q11, sts[0]->hHQ_core->Q_old_wtda_LB ) ); // Q11 L_lerp_fx_q11( sts[0]->hHQ_core->old_out_LB_fx32, sts[0]->hHQ_core->old_out_LB_fx32, sts[0]->L_frame, sts[0]->last_L_frame ); Copy_Scale_sig_32_16( sts[0]->hHQ_core->old_out_LB_fx32, sts[0]->hHQ_core->old_out_LB_fx, L_FRAME32k, sub( sts[0]->hHQ_core->Q_old_wtda_LB, Q11 ) ); // Q_old_wtda_LB +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + sts[0]->hHQ_core->Q_old_out_fx32 = Q11; + move16(); +#endif } IF( NE_16( sts[0]->L_frame, L_FRAME16k ) ) { @@ -576,14 +580,16 @@ ivas_error ivas_core_dec_fx( move32(); /* TCX decoder */ - Scale_sig( st->hPFstat->mem_stp, L_SUBFR, -Qsyn_temp ); // Q0 - Scale_sig( st->hPFstat->mem_pf_in, L_SUBFR, -Qsyn_temp ); // Q0 + Scale_sig( st->hPFstat->mem_stp, L_SUBFR, -Qsyn_temp ); // Q0 + Scale_sig( st->hPFstat->mem_pf_in, L_SUBFR, -Qsyn_temp ); // Q0 +#ifndef FIX_1348_BIT_PRECISION_IMPROVEMENT Scale_sig( st->hHQ_core->old_out_LB_fx, L_FRAME32k, -st->hHQ_core->Q_old_wtda_LB ); // Q0 Scale_sig( st->hHQ_core->old_out_fx, L_FRAME48k, -st->hHQ_core->Q_old_wtda ); // Q0 st->hHQ_core->Q_old_wtda_LB = 0; move16(); st->hHQ_core->Q_old_wtda = 0; move16(); +#endif IF( st_ivas == NULL ) { @@ -598,8 +604,10 @@ ivas_error ivas_core_dec_fx( stereo_tcx_core_dec_fx( st, frameMode[n], output_16_fx[n], synth_16_fx[n], pitch_buf_fx[n], sba_dirac_stereo_flag, hStereoTD, last_element_mode, flag_sec_CNA, hStereoCng, nchan_out, ivas_format ); +#ifndef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN st->hHQ_core->Q_old_wtda_LB = st->hHQ_core->Q_old_wtda; move16(); +#endif Copy_Scale_sig_16_32_DEPREC( output_16_fx[n], output_32_fx[n], output_frame, Q11 ); // Q11 IF( st->hTcxDec ) @@ -701,6 +709,7 @@ ivas_error ivas_core_dec_fx( st->hHQ_core->Q_fer_samples = 0; move16(); } +#ifndef FIX_1348_BIT_PRECISION_IMPROVEMENT IF( NE_16( st->core, st->last_core ) ) { IF( st->hTcxDec ) @@ -710,7 +719,7 @@ ivas_error ivas_core_dec_fx( st->Q_syn = 0; move16(); } - +#endif st->prev_Q_syn = st->Q_syn; move16(); @@ -1401,6 +1410,10 @@ ivas_error ivas_core_dec_fx( { Copy_Scale_sig_16_32_no_sat( st->hHQ_core->old_out_LB_fx, st->hHQ_core->old_out_LB_fx32, L_FRAME32k, sub( Q11, st->hHQ_core->Q_old_wtda_LB ) ); // Q11 Copy_Scale_sig_16_32_no_sat( st->hHQ_core->old_out_fx, st->hHQ_core->old_out_fx32, L_FRAME48k, sub( Q11, st->hHQ_core->Q_old_wtda ) ); // Q11 +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + st->hHQ_core->Q_old_out_fx32 = Q11; + move16(); +#endif } IF( NE_16( st->element_mode, IVAS_CPE_DFT ) ) diff --git a/lib_dec/ivas_cpe_dec_fx.c b/lib_dec/ivas_cpe_dec_fx.c index e26a56c1bb44b72f62e6c4bd06cc37c0619618f3..d4c041e971ba79f633dd679cad89f7dbcece79e2 100644 --- a/lib_dec/ivas_cpe_dec_fx.c +++ b/lib_dec/ivas_cpe_dec_fx.c @@ -132,7 +132,11 @@ ivas_error ivas_cpe_dec_fx( IF( hCPE->hCoreCoder[ind1]->hHQ_core ) { Copy_Scale_sig_16_32_no_sat( hCPE->hCoreCoder[ind1]->hHQ_core->old_out_LB_fx, hCPE->hCoreCoder[ind1]->hHQ_core->old_out_LB_fx32, L_FRAME32k, sub( Q11, hCPE->hCoreCoder[ind1]->hHQ_core->Q_old_wtda_LB ) ); // Q11 +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + hCPE->hCoreCoder[ind1]->hHQ_core->Q_old_out_fx32 = Q11; +#else hCPE->hCoreCoder[ind1]->hHQ_core->q_old_outLB_fx = Q11; +#endif move16(); } } @@ -155,7 +159,7 @@ ivas_error ivas_cpe_dec_fx( test(); IF( hCPE->hCoreCoder[ind1] && hCPE->hCoreCoder[ind1]->hHQ_core ) { - Copy_Scale_sig_32_16( hCPE->hCoreCoder[ind1]->hHQ_core->old_out_LB_fx32, hCPE->hCoreCoder[ind1]->hHQ_core->old_out_LB_fx, L_FRAME32k, sub( hCPE->hCoreCoder[ind1]->hHQ_core->Q_old_wtda_LB, Q11 ) ); // Q_old_wtda + Copy_Scale_sig_32_16( hCPE->hCoreCoder[ind1]->hHQ_core->old_out_LB_fx32, hCPE->hCoreCoder[ind1]->hHQ_core->old_out_LB_fx, L_FRAME32k, sub( hCPE->hCoreCoder[ind1]->hHQ_core->Q_old_wtda_LB, Q11 ) ); // Q_old_wtda_LB } } diff --git a/lib_dec/ivas_jbm_dec_fx.c b/lib_dec/ivas_jbm_dec_fx.c index 008093a89ea195b5aaf697730e2bffb97d7b35dc..92ed871a1e5005e50247237972eedcbb51ab24c7 100644 --- a/lib_dec/ivas_jbm_dec_fx.c +++ b/lib_dec/ivas_jbm_dec_fx.c @@ -434,7 +434,11 @@ ivas_error ivas_jbm_dec_tc_fx( { Copy_Scale_sig_16_32_no_sat( hCPE->hCoreCoder[0]->hHQ_core->old_out_LB_fx, hCPE->hCoreCoder[0]->hHQ_core->old_out_LB_fx32, L_FRAME32k, q - hCPE->hCoreCoder[0]->hHQ_core->Q_old_wtda_LB ); // q Copy_Scale_sig_16_32_no_sat( hCPE->hCoreCoder[0]->hHQ_core->old_out_fx, hCPE->hCoreCoder[0]->hHQ_core->old_out_fx32, L_FRAME48k, q - hCPE->hCoreCoder[0]->hHQ_core->Q_old_wtda ); // q +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + hCPE->hCoreCoder[0]->hHQ_core->Q_old_out_fx32 = q; +#else hCPE->hCoreCoder[0]->hHQ_core->q_old_outLB_fx = q; +#endif move16(); } IF( hCPE->hStereoDft != NULL ) @@ -950,7 +954,11 @@ ivas_error ivas_jbm_dec_tc_fx( { Copy_Scale_sig_16_32_no_sat( hCPE->hCoreCoder[0]->hHQ_core->old_out_LB_fx, hCPE->hCoreCoder[0]->hHQ_core->old_out_LB_fx32, L_FRAME32k, sub( q, hCPE->hCoreCoder[0]->hHQ_core->Q_old_wtda_LB ) ); // q Copy_Scale_sig_16_32_no_sat( hCPE->hCoreCoder[0]->hHQ_core->old_out_fx, hCPE->hCoreCoder[0]->hHQ_core->old_out_fx32, L_FRAME48k, sub( q, hCPE->hCoreCoder[0]->hHQ_core->Q_old_wtda ) ); // q +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + hCPE->hCoreCoder[0]->hHQ_core->Q_old_out_fx32 = q; +#else hCPE->hCoreCoder[0]->hHQ_core->q_old_outLB_fx = q; +#endif move16(); } IF( hCPE->hStereoDft != NULL ) @@ -996,7 +1004,6 @@ ivas_error ivas_jbm_dec_tc_fx( IF( hCPE->hCoreCoder[0] != NULL ) { - Copy_Scale_sig_32_16( hCPE->hCoreCoder[0]->hHQ_core->old_out_LB_fx32, hCPE->hCoreCoder[0]->hHQ_core->old_out_LB_fx, L_FRAME32k, sub( hCPE->hCoreCoder[0]->hHQ_core->Q_old_wtda_LB, q ) ); // Q_old_wtda_LB Copy_Scale_sig_32_16( hCPE->hCoreCoder[0]->hHQ_core->old_out_fx32, hCPE->hCoreCoder[0]->hHQ_core->old_out_fx, L_FRAME48k, sub( hCPE->hCoreCoder[0]->hHQ_core->Q_old_wtda, q ) ); // Q_old_wtda } @@ -1398,7 +1405,11 @@ ivas_error ivas_jbm_dec_tc_fx( { Copy_Scale_sig_16_32_no_sat( hCPE->hCoreCoder[0]->hHQ_core->old_out_LB_fx, hCPE->hCoreCoder[0]->hHQ_core->old_out_LB_fx32, L_FRAME32k, q - hCPE->hCoreCoder[0]->hHQ_core->Q_old_wtda_LB ); // q Copy_Scale_sig_16_32_no_sat( hCPE->hCoreCoder[0]->hHQ_core->old_out_fx, hCPE->hCoreCoder[0]->hHQ_core->old_out_fx32, L_FRAME48k, q - hCPE->hCoreCoder[0]->hHQ_core->Q_old_wtda ); // q +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + hCPE->hCoreCoder[0]->hHQ_core->Q_old_out_fx32 = q; +#else hCPE->hCoreCoder[0]->hHQ_core->q_old_outLB_fx = q; +#endif move16(); } IF( hCPE->hStereoDft != NULL ) diff --git a/lib_dec/ivas_mdct_core_dec_fx.c b/lib_dec/ivas_mdct_core_dec_fx.c index 5f529be5557c15e4423a1633bb74612e675e6d9d..92511a62dc71e7b6f00c5025d9d9f621e0d3f5aa 100644 --- a/lib_dec/ivas_mdct_core_dec_fx.c +++ b/lib_dec/ivas_mdct_core_dec_fx.c @@ -1151,8 +1151,17 @@ void ivas_mdct_core_reconstruct_fx( Word16 *synthFB_fx; Word16 q_syn = 0; move16(); +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + Word16 q_win = 0; + Word16 q_winFB = 0; +#else +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + Word16 q_win = -1; +#else Word16 q_win = -2; +#endif move16(); +#endif /* TCX */ Word16 xn_buf_fx[L_MDCT_OVLP_MAX + L_FRAME_PLUS + L_MDCT_OVLP_MAX]; // Q(-2) Word16 tcx_offset[CPE_CHANNELS]; @@ -1213,32 +1222,76 @@ void ivas_mdct_core_reconstruct_fx( st->hTcxDec->q_old_synth = 0; move16(); } +#ifndef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN Copy( st->hTcxDec->old_synth, synth_buf_fx, st->hTcxDec->old_synth_len ); // Q = st->hTcxDec->q_old_synth Copy_Scale_sig( st->hTcxDec->old_synthFB_fx, synth_bufFB_fx, st->hTcxDec->old_synth_lenFB, sub( st->hTcxDec->q_old_synth, st->Q_syn ) ); // Q = st->hTcxDec->q_old_synth +#endif q_syn = st->hTcxDec->q_old_synth; move16(); set16_fx( synth_fx, 0, L_FRAME_PLUS + M ); set16_fx( synthFB_fx, 0, L_FRAME_PLUS + M ); IF( st->core != ACELP_CORE ) { +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + Word16 q_win0, q_winFB0; +#else Scale_sig( st->hTcxDec->syn_Overl_TDACFB, L_FRAME_MAX / 2, sub( q_win, st->hTcxDec->Q_syn_Overl_TDACFB ) ); // st->hTcxDec->Q_syn_Overl_TDACFB -> q_win - Scale_sig( st->hTcxDec->syn_Overl_TDAC, L_FRAME32k / 2, sub( q_win, st->hTcxDec->Q_syn_Overl_TDAC ) ); // st->hTcxDec->Q_syn_Overl_TDAC -> q_win - Scale_sig( st->hTcxDec->old_syn_Overl, L_FRAME32k / 2, sub( q_win, st->hTcxDec->Q_old_syn_Overl ) ); // Q(-1 - st->Q_syn) -> q_win + st->hTcxDec->Q_syn_Overl_TDACFB = q_win; + move16(); + Scale_sig( st->hTcxDec->syn_Overl_TDAC, L_FRAME32k / 2, sub( q_win, st->hTcxDec->Q_syn_Overl_TDAC ) ); // st->hTcxDec->Q_syn_Overl_TDAC -> q_win + st->hTcxDec->Q_syn_Overl_TDAC = q_win; + move16(); + Scale_sig( st->hTcxDec->old_syn_Overl, L_FRAME32k / 2, sub( q_win, st->hTcxDec->Q_old_syn_Overl ) ); // Q(-1 - st->Q_syn) -> q_win st->hTcxDec->Q_old_syn_Overl = q_win; - Scale_sig( st->hTcxDec->syn_Overl, L_FRAME32k / 2, sub( q_win, st->Q_syn ) ); // Q(st->Q_syn) -> q_win - Scale_sig( st->hTcxDec->syn_OverlFB, L_FRAME_MAX / 2, sub( q_win, st->Q_syn ) ); // Q(st->Q_syn) -> q_win - Scale_sig( st->hHQ_core->old_out_LB_fx, L_FRAME32k, sub( q_win, st->hHQ_core->Q_old_wtda_LB ) ); // Q(st->hHQ_core->Q_old_wtda_LB) -> q_win - Scale_sig( st->hHQ_core->old_out_fx, L_FRAME48k, sub( q_win, st->hHQ_core->Q_old_wtda ) ); // Q(st->hHQ_core->Q_old_wtda) -> q_win + move16(); + Scale_sig( st->hTcxDec->syn_Overl, L_FRAME32k / 2, sub( q_win, st->Q_syn ) ); // Q(st->Q_syn) -> q_win + Scale_sig( st->hTcxDec->syn_OverlFB, L_FRAME_MAX / 2, sub( q_win, st->Q_syn ) ); // Q(st->Q_syn) -> q_win + Scale_sig( st->hHQ_core->old_out_LB_fx, L_FRAME32k, sub( q_win, st->hHQ_core->Q_old_wtda_LB ) ); // Q(st->hHQ_core->Q_old_wtda_LB) -> q_win + Scale_sig( st->hHQ_core->old_out_fx, L_FRAME48k, sub( q_win, st->hHQ_core->Q_old_wtda ) ); // Q(st->hHQ_core->Q_old_wtda) -> q_win +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + st->hHQ_core->Q_old_wtda = q_win; + move16(); + st->hHQ_core->Q_old_wtda_LB = q_win; + move16(); +#endif Scale_sig( synth_buf_fx, add( add( st->hTcxDec->old_synth_len, L_FRAME_PLUS ), M ), sub( q_win, q_syn ) ); // q_syn -> q_win Scale_sig( synth_bufFB_fx, add( add( st->hTcxDec->old_synth_lenFB, L_FRAME_PLUS ), M ), sub( q_win, q_syn ) ); // q_syn -> q_win Scale_sig( st->syn, M + 1, sub( q_win, st->Q_syn ) ); // st->Q_syn -> q_win +#endif + FOR( k = 0; k < nSubframes[ch]; k++ ) { init_tcx_info_fx( st, L_frame_global[ch], L_frame_globalTCX[ch], k, bfi, &tcx_offset[ch], &tcx_offsetFB[ch], &L_frame[ch], &L_frameTCX[ch], &left_rect[ch], &L_spec[ch] ); +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + assert( nSubframes[ch] <= 2 ); + q_win0 = q_win; + move16(); + q_winFB0 = q_winFB; + move16(); + q_win = -2; + move16(); + q_winFB = -2; + move16(); +#endif + IF( !skip_decoding ) { +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + test(); + test(); + IF( ( EQ_16( hCPE->nchan_out, 1 ) && EQ_16( st->hTcxDec->kernel_type[k], MDST_IV ) ) || EQ_16( st->hTcxCfg->tcx_last_overlap_mode, TRANSITION_OVERLAP ) ) + { + decoder_tcx_imdct_fx( st, L_frame_global[ch], L_frame_globalTCX[ch], L_spec[ch], tcx_offset[ch], tcx_offsetFB[ch], L_frame[ch], L_frameTCX[ch], left_rect[ch], x_fx[ch][k], q_x, xn_buf_fx, &q_win, &q_winFB, + MDCT_IV, fUseTns[ch][k], &synth_fx[k * L_frame[ch]], &synthFB_fx[k * L_frameTCX[ch]], bfi, k, 0 ); + } + ELSE + { + decoder_tcx_imdct_fx( st, L_frame_global[ch], L_frame_globalTCX[ch], L_spec[ch], tcx_offset[ch], tcx_offsetFB[ch], L_frame[ch], L_frameTCX[ch], left_rect[ch], x_fx[ch][k], q_x, xn_buf_fx, &q_win, &q_winFB, + st->hTcxDec->kernel_type[k], fUseTns[ch][k], &synth_fx[k * L_frame[ch]], &synthFB_fx[k * L_frameTCX[ch]], bfi, k, 0 ); + } +#else test(); test(); IF( ( EQ_16( hCPE->nchan_out, 1 ) && EQ_16( st->hTcxDec->kernel_type[k], MDST_IV ) ) || EQ_16( st->hTcxCfg->tcx_last_overlap_mode, TRANSITION_OVERLAP ) ) @@ -1251,21 +1304,86 @@ void ivas_mdct_core_reconstruct_fx( decoder_tcx_imdct_fx( st, L_frame_global[ch], L_frame_globalTCX[ch], L_spec[ch], tcx_offset[ch], tcx_offsetFB[ch], L_frame[ch], L_frameTCX[ch], left_rect[ch], x_fx[ch][k], q_x, xn_buf_fx, q_win, st->hTcxDec->kernel_type[k], fUseTns[ch][k], &synth_fx[k * L_frame[ch]], &synthFB_fx[k * L_frameTCX[ch]], bfi, k, 0 ); } +#endif } ELSE { +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + q_win = Q15; + move16(); + q_winFB = Q15; + move16(); +#endif set16_fx( &synth_fx[k * L_frame[ch]], 0, L_frame[ch] ); set16_fx( &synthFB_fx[k * L_frame[ch]], 0, L_frameTCX[ch] ); /* Note: these buffers are not subframe-based, hence no indexing with k */ set16_fx( &st->hHQ_core->old_out_LB_fx[0], 0, L_frame[ch] ); set16_fx( &st->hHQ_core->old_out_fx[0], 0, L_frameTCX[ch] ); +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + st->hHQ_core->Q_old_wtda = Q15; + st->hHQ_core->Q_old_wtda_LB = Q15; +#endif set16_fx( &st->hTcxDec->syn_Overl[0], 0, shr( L_frame[ch], 1 ) ); set16_fx( &st->hTcxDec->syn_OverlFB[0], 0, shr( L_frameTCX[ch], 1 ) ); set16_fx( &st->hTcxDec->syn_Overl_TDAC[0], 0, shr( L_frame[ch], 1 ) ); set16_fx( &st->hTcxDec->syn_Overl_TDACFB[0], 0, shr( L_frameTCX[ch], 1 ) ); } } +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + + IF( nSubframes[ch] > 1 ) + { + Word16 q_winN, q_winFBN; + + q_winN = s_min( q_win, q_win0 ); + q_winFBN = s_min( q_winFB, q_winFB0 ); + // Need to rescale for each subframe, because q_win/q_winFB might had changed for the next subframe. Other OLA buffers are rescaled each time. + Scale_sig( synth_fx, L_frame[ch], sub( q_winN, q_win0 ) ); // q_win -> st->hTcxDec->q_old_synth + Scale_sig( synthFB_fx, L_frameTCX[ch], sub( q_winFBN, q_winFB0 ) ); // q_winFB -> st->hTcxDec->q_old_synth + Scale_sig( synth_fx + L_frame[ch], add( L_frame[ch], M ), sub( q_winN, q_win ) ); // q_win -> st->hTcxDec->q_old_synth + Scale_sig( synthFB_fx + L_frameTCX[ch], add( L_frameTCX[ch], M ), sub( q_winFBN, q_winFB ) ); // q_winFB -> st->hTcxDec->q_old_synth + q_win = q_winN; + move16(); + q_winFB = q_winFBN; + move16(); + } + + Copy_Scale_sig( st->hTcxDec->old_synth, synth_buf_fx, st->hTcxDec->old_synth_len, sub( q_win, st->hTcxDec->q_old_synth ) ); // q_win + Copy_Scale_sig( st->hTcxDec->old_synthFB_fx, synth_bufFB_fx, st->hTcxDec->old_synth_lenFB, sub( q_winFB, st->Q_syn ) ); // q_winFB + + test(); + IF( ( bfi == 0 ) && st->hTonalMDCTConc != NULL ) + { + TonalMDCTConceal_SaveTimeSignal_ivas_fx( st->hTonalMDCTConc, synthFB_fx, q_winFB, L_frameTCX[ch] ); + } + decoder_tcx_post_ivas_fx( st, synth_fx, synthFB_fx, q_winFB, NULL, bfi, MCT_flag ); + + // norm(synth_buf) >= q_syn - q_win + // norm(synth_buf) + q_win >= q_syn + sf = s_min( getScaleFactor16( synth_buf_fx, add( add( st->hTcxDec->old_synth_len, L_FRAME_PLUS ), M ) ), getScaleFactor16( synth_bufFB_fx, add( add( st->hTcxDec->old_synth_lenFB, L_FRAME_PLUS ), M ) ) ); + q_syn = add( sub( sf, 1 ), s_min( q_win, q_winFB ) ); + st->Q_syn = q_syn; + move16(); + + // norm(old_out) >= st->Q_syn - q_win + // norm(old_out) + q_win >= st->Q_syn + sf = s_min( getScaleFactor16( st->hHQ_core->old_out_fx, L_FRAME48k ), getScaleFactor16( st->hHQ_core->old_out_LB_fx, L_FRAME32k ) ); + st->Q_syn = add( sf, s_min( q_win, q_winFB ) ); + + Scale_sig( synth_buf_fx, add( add( st->hTcxDec->old_synth_len, L_FRAME_PLUS ), M ), sub( q_syn, q_win ) ); // st->hTcxDec->q_old_synth -> q_syn + Scale_sig( synth_bufFB_fx, add( add( st->hTcxDec->old_synth_lenFB, L_FRAME_PLUS ), M ), sub( q_syn, q_winFB ) ); // st->hTcxDec->q_old_synth -> q_syn + // Scale_sig( st->syn, M + 1, add( st->Q_syn, 2 ) ); + Scale_sig( st->syn, M + 1, sub( st->Q_syn, q_win ) ); + IF( NE_16( st->element_mode, IVAS_CPE_MDCT ) ) + { + Scale_sig( st->mem_syn2_fx, M, sub( st->Q_syn, st->prev_Q_syn ) ); // st->Q_syn gets a new value, but st->mem_syn2_fx got its value from E_UTIL_f_preemph3 with st->Q_syn = st->prev_Q_syn + } + ELSE + { + Scale_sig( st->mem_syn2_fx, M, sub( st->Q_syn, add( q_win, 2 ) ) ); // q_win+2 -> st->Q_syn: in this case, E_UTIL_f_preemph2 shifts st->mem_syn2_fx by 2 bits + } +#else test(); IF( ( bfi == 0 ) && st->hTonalMDCTConc != NULL ) { @@ -1298,16 +1416,33 @@ void ivas_mdct_core_reconstruct_fx( Scale_sig( synth_bufFB_fx, add( add( st->hTcxDec->old_synth_lenFB, L_FRAME_PLUS ), M ), sub( q_syn, q_win ) ); // q_win -> q_syn Scale_sig( st->syn, M + 1, add( st->Q_syn, 2 ) ); Scale_sig( st->hTcxDec->syn_OverlFB, L_FRAME_MAX / 2, sub( st->Q_syn, q_win ) ); // q_win -> st->Q_syn - Scale_sig( st->hTcxDec->syn_Overl, L_FRAME32k / 2, sub( st->Q_syn, q_win ) ); // q_win -> st->Q_syn - Scale_sig( st->hHQ_core->old_out_LB_fx, L_FRAME32k, sub( st->Q_syn, q_win ) ); // q_win -> st->Q_syn - Scale_sig( st->hHQ_core->old_out_fx, L_FRAME48k, sub( st->Q_syn, q_win ) ); // q_win -> st->Q_syn + st->hTcxDec->Q_syn_OverlFB = st->Q_syn; + Scale_sig( st->hTcxDec->syn_Overl, L_FRAME32k / 2, sub( st->Q_syn, q_win ) ); // q_win -> st->Q_syn + st->hTcxDec->Q_syn_Overl = st->Q_syn; +#ifndef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD + Scale_sig( st->hHQ_core->old_out_LB_fx, L_FRAME32k, sub( st->Q_syn, q_win ) ); // q_win -> st->Q_syn + Scale_sig( st->hHQ_core->old_out_fx, L_FRAME48k, sub( st->Q_syn, q_win ) ); // q_win -> st->Q_syn st->hHQ_core->Q_old_wtda = st->Q_syn; move16(); st->hHQ_core->Q_old_wtda_LB = st->Q_syn; move16(); +#endif +#endif +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_DYNAMIC_QOLD_NO + Scale_sig( st->mem_syn2_fx, M, sub( st->Q_syn, q_win ) ); // q_win -> Q_syn +#endif } ELSE /*ACELP core for ACELP-PLC */ { +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + Copy( st->hTcxDec->old_synth, synth_buf_fx, st->hTcxDec->old_synth_len ); // Q = st->hTcxDec->q_old_synth + Copy_Scale_sig( st->hTcxDec->old_synthFB_fx, synth_bufFB_fx, st->hTcxDec->old_synth_lenFB, sub( st->hTcxDec->q_old_synth, st->Q_syn ) ); // Q = st->hTcxDec->q_old_synth + + // Temporary workaround: con_tcx_ivas_fx() should be analyzed for potential issues. + // Scale_sig( synth_bufFB_fx, st->hTcxDec->old_synth_lenFB, -2 ); + // Scale_sig( synth_bufFB_fx, st->hTcxDec->old_synth_lenFB, +2 ); + /////////////////////////////////////////////////////////////////////////////////// +#endif assert( EQ_16( st->bfi, 1 ) ); /* PLC: [TCX: TD PLC] */ IF( MCT_flag != 0 ) @@ -1336,6 +1471,13 @@ void ivas_mdct_core_reconstruct_fx( st->Q_syn = q_syn; move16(); st->hTcxDec->Q_old_syn_Overl = add( st->hTcxDec->Q_old_syn_Overl, q_syn ); + move16(); +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT_QWIN + st->hTcxDec->Q_syn_OverlFB = add( st->hTcxDec->Q_syn_OverlFB, q_syn ); + move16(); + // st->hTcxDec->Q_syn_Overl = add( st->hTcxDec->Q_syn_Overl, q_syn ); // It is disabled because it increased the number of regressions. + // move16(); +#endif IF( ( EQ_16( st->nbLostCmpt, 1 ) ) || ( st->hTcxDec->tcxConceal_recalc_exc != 0 ) ) { diff --git a/lib_dec/ivas_post_proc_fx.c b/lib_dec/ivas_post_proc_fx.c index c0e69b91663e2275b0398e4dec3439df3baf5c1b..1b02b2b17fa81322d0581b49a56ebe28bc1532af 100644 --- a/lib_dec/ivas_post_proc_fx.c +++ b/lib_dec/ivas_post_proc_fx.c @@ -112,6 +112,9 @@ void ivas_post_proc_fx( { Word16 numZeros = (Word16) ( NS2SA_FX2( output_Fs, N_ZERO_MDCT_NS ) ); /*Q0*/ move16(); +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + assert( sts[n]->hHQ_core->Q_old_out_fx32 == Q11 ); +#endif Copy32( sts[n]->hHQ_core->old_out_fx32 + numZeros, sts[n]->hTcxDec->FBTCXdelayBuf_32, delay_comp ); /*Q11*/ } @@ -588,7 +591,11 @@ void stereo_dft_dec_core_switching_fx( Word32 tmp_fade_fx[max( STEREO_DFT_ALLPASS_FADELEN_12k8, STEREO_DFT_ALLPASS_FADELEN_16k )]; Copy32( st->hHQ_core->old_out_LB_fx32 + numZeros, hCPE->hStereoDft->ap_fade_mem_fx, ap_fade_len ); /*st->hHQ_core->q_old_outLB_fx*/ +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + hCPE->hStereoDft->q_ap_fade_mem_fx = st->hHQ_core->Q_old_out_fx32; +#else hCPE->hStereoDft->q_ap_fade_mem_fx = st->hHQ_core->q_old_outLB_fx; +#endif move16(); test(); diff --git a/lib_dec/ivas_stereo_switching_dec_fx.c b/lib_dec/ivas_stereo_switching_dec_fx.c index b72a23f6369451f5dfeaf9dd1703804eff373ff5..57f631204777984c44b613c1b08fdffed3590fcf 100644 --- a/lib_dec/ivas_stereo_switching_dec_fx.c +++ b/lib_dec/ivas_stereo_switching_dec_fx.c @@ -68,6 +68,10 @@ static ivas_error allocate_CoreCoder_TCX_fx( reset_tcx_overl_buf_fx( st->hTcxDec ); // st->hTcxDec->CngLevelBackgroundTrace_bfi = 0; //initializing to avoid garbage overflow; set16_fx( st->hTcxDec->syn_OverlFB, 0, L_FRAME48k / 2 ); +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + st->hTcxDec->Q_syn_OverlFB = 0; + move16(); +#endif set16_fx( st->hTcxDec->old_synth, 0, OLD_SYNTH_INTERNAL_DEC ); st->hTcxDec->q_old_synth = 0; move16(); @@ -418,6 +422,9 @@ ivas_error stereo_memory_dec_fx( test(); IF( EQ_16( hCPE->last_element_mode, IVAS_CPE_MDCT ) && EQ_16( hCPE->element_mode, IVAS_CPE_DFT ) ) { +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + assert( hCPE->hCoreCoder[0]->hHQ_core->Q_old_out_fx32 == hCPE->hCoreCoder[1]->hHQ_core->Q_old_out_fx32 ); +#endif v_add_32( hCPE->hCoreCoder[0]->hHQ_core->old_out_fx32, hCPE->hCoreCoder[1]->hHQ_core->old_out_fx32, hCPE->hCoreCoder[0]->hHQ_core->old_out_fx32, extract_l( Mpy_32_16_1( output_Fs, INV_FRAME_PER_SEC_Q15 ) ) ); /* exp(exp_old_out) */ v_multc_fixed_16( hCPE->hCoreCoder[0]->hHQ_core->old_out_fx32, 16384 /* 0.5 in Q15 */, hCPE->hCoreCoder[0]->hHQ_core->old_out_fx32, extract_l( Mpy_32_16_1( output_Fs, INV_FRAME_PER_SEC_Q15 ) ) ); /* exp(exp_old_out) */ @@ -2149,6 +2156,10 @@ void stereo_td2dft_update_fx( move16(); /* update buffers used for fading when switching to DFT Stereo */ +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + assert( sts[0]->hHQ_core->Q_old_out_fx32 == sts[1]->hHQ_core->Q_old_out_fx32 ); +#endif + v_add_fx( sts[0]->hHQ_core->old_out_LB_fx32 + nsLB, sts[1]->hHQ_core->old_out_LB_fx32 + nsLB, hCPE->old_outLB_mdct_fx, old_outLB_len ); L_lerp_fx_q11( hCPE->old_outLB_mdct_fx, hCPE->old_outLB_mdct_fx, STEREO_MDCT2DFT_FADE_LEN_48k, old_outLB_len ); FOR( i = 0; i < old_outLB_len; i++ ) diff --git a/lib_dec/stat_dec.h b/lib_dec/stat_dec.h index 28a97f072fa86cd9ea94c4c884a0a0c5740c1459..3015b85e4a9b2386a20501e468a11d2c6597ccc9 100644 --- a/lib_dec/stat_dec.h +++ b/lib_dec/stat_dec.h @@ -700,11 +700,15 @@ typedef struct hq_nbfec_structure typedef struct hq_dec_structure { Word32 old_out_fx32[L_FRAME48k]; /* HQ core - previous synthesis for OLA */ - Word16 old_out_fx[L_FRAME48k]; /* HQ core - previous synthesis for OLA */ + Word16 old_out_fx[L_FRAME48k]; /* HQ core - previous synthesis for OLA. Q_old_wtda */ Word16 exp_old_out; - Word16 old_out_LB_fx[L_FRAME32k]; /* HQ core - previous synthesis for OLA for Low Band */ + Word16 old_out_LB_fx[L_FRAME32k]; /* HQ core - previous synthesis for OLA for Low Band. Q_old_wtda_LB */ Word32 old_out_LB_fx32[L_FRAME32k]; +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + Word16 Q_old_out_fx32; /* Q for both old_out_fx32 and old_out_LB_fx32 */ +#else Word16 q_old_outLB_fx; +#endif Word16 Q_old_wtda_LB; Word16 Q_old_wtda; Word16 Q_old_postdec; /*scaling of the output of core_switching_post_dec_fx() */ diff --git a/lib_dec/updt_dec_fx.c b/lib_dec/updt_dec_fx.c index 81046fb19c71b0ceef898d08f1e36163f092644a..d1fe01b4290a9243e01b799fd041880386c4fb30 100644 --- a/lib_dec/updt_dec_fx.c +++ b/lib_dec/updt_dec_fx.c @@ -1207,6 +1207,10 @@ void update_decoder_LPD_cng( Copy( hTcxDec->old_synth + st->L_frame, hTcxDec->old_synth, sub( hTcxDec->old_synth_len, st->L_frame ) ); Copy( synth, hTcxDec->old_synth + sub( hTcxDec->old_synth_len, st->L_frame ), st->L_frame ); bufferCopyFx( synth + sub( st->L_frame, ( st->L_frame / 2 ) ), hTcxDec->old_syn_Overl, st->L_frame / 2, 0 /*Qf_syn*/, -1 /*Qf_old_xnq*/, 0, 0 /*Q_old_xnq*/ ); +#ifdef FIX_1348_BIT_PRECISION_IMPROVEMENT + st->hTcxDec->Q_old_syn_Overl = sub( st->Q_syn, 1 ); + move16(); +#endif hTcxDec->tcxltp_last_gain_unmodified = 0; move16();