diff --git a/lib_com/ivas_prot_fx.h b/lib_com/ivas_prot_fx.h index ba65afec02816a7ff13a61885e29a07ccc4bc2bf..6b073e87ffbbbd9152610ea6b868bd2449f09b49 100644 --- a/lib_com/ivas_prot_fx.h +++ b/lib_com/ivas_prot_fx.h @@ -3026,17 +3026,34 @@ void InternalTCXDecoder_fx( void stereo_tcx_core_enc( Encoder_State *st, /* i/o: encoder state structure */ +#ifdef SIMPLIFY_CORE_ENC + Word16 new_samples_12k8[], /* i : buffer of input signal @12.8 kHz */ + Word16 new_samples_16k[], /* i : buffer of input signal @16 kHz */ +#else const Word16 new_samples_12k8[], /* i : buffer of input signal @12.8 kHz */ const Word16 new_samples_16k[], /* i : buffer of input signal @16 kHz */ +#endif const Word16 Aw_fx[], /* i : weighted A(z) unquant. for subframes,Q12*/ Word16 lsp_new_fx[], /* i : LSPs at the end of the frame, Q15 */ Word16 lsp_mid_fx[], /* i : LSPs in the middle of the frame, Q15 */ Word16 pitch_buf_fx[NB_SUBFR16k], /* o : pitch for each subframe, Q6 */ const Word16 last_element_mode, /* i : last element mode, Q0 */ const Word16 vad_hover_flag, /* i : VAD hangover flag, Q0 */ - Word16 Q_new +#ifdef SIMPLIFY_CORE_ENC + const Word16 Q_new_orig /* i : Scaling factor of new_samples_xx[] */ +#else + Word16 Q_new +#endif +); + +#ifdef SIMPLIFY_CORE_ENC +void stereo_tcx_enc_scale_buffers( + Encoder_State *st, /* i/o: encoder state structure */ + const Word16 n_channels, /* i : number of core channels */ + const Word16 Q_spec_old /* i : Q of old spectrum */ ); +#endif Word16 transient_analysis_ivas_fx( TRAN_DET_HANDLE hTranDet, /* i : handle transient detection */ const Word16 cor_map_LT[], /* i : LT correlation map Q_cor_map = Qx */ @@ -5966,7 +5983,11 @@ void pre_proc_ivas_fx( const Word16 vad_hover_flag, /* i : VAD hangover flag Q0*/ const Word16 flag_16k_smc, /* i : flag to indicate if the OL SMC is run at 16 kHz Q0*/ Word32 enerBuffer_fx[CLDFB_NO_CHANNELS_MAX], /* i : energy buffer e_enerBuffer */ +#ifdef SIMPLIFY_CORE_ENC + Word16 e_enerBuffer, /* i : Q value of energy buffer */ +#else const Word16 e_enerBuffer, /* i : Q value of energy buffer */ +#endif Word16 fft_buff_fx[2 * L_FFT], /* i : FFT buffer Qx*/ const Word16 cor_map_sum_fx, /* i : speech/music clasif. parameter Q8*/ Word16 *Q_new /* i/o: Q factor of speech buffers */ @@ -6035,8 +6056,8 @@ ivas_error ivas_core_enc_fx( const Word16 loc_harm[], /* i : harmonicity flag Q0*/ const Word16 cor_map_sum_fx[], /* i : speech/music clasif. parameter Q8*/ const Word16 vad_flag_dtx[], /* i : HE-SAD flag with additional DTX HO Q0*/ - Word32 enerBuffer_fx[][CLDFB_NO_CHANNELS_MAX], /* o : energy buffer enerBuffer_fx_exp*/ - Word16 enerBuffer_fx_exp[], /* o : energy buffer */ + Word32 enerBuffer_fx[][CLDFB_NO_CHANNELS_MAX], /* i : energy buffer enerBuffer_fx_exp*/ + Word16 enerBuffer_fx_exp[], /* i : energy buffer */ Word16 fft_buff_fx[][2 * L_FFT], /* i : FFT buffer Qx*/ const Word16 tdm_SM_or_LRTD_Pri, /* i : channel combination scheme flag Q0*/ const Word16 ivas_format, /* i : IVAS format Q0*/ @@ -6331,7 +6352,12 @@ UWord16 get_indice_st( void stereo_mdct_core_enc_fx( CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ +#ifdef SIMPLIFY_CORE_ENC + Word16 new_samples[CPE_CHANNELS][L_INP], /* i : new samples Q_new[] - 1*/ + const Word16 Q_new[CPE_CHANNELS], /* i : Scaling factor of new_samples */ +#else Word16 new_samples[CPE_CHANNELS][L_INP], /* i : new samples Qnew-1*/ +#endif Word16 old_wsp[CPE_CHANNELS][L_WSP], /* i : 12.8kHz weighted speech (for LTP Qx*/ Word16 pitch_buf_fx[CPE_CHANNELS][NB_SUBFR16k] /* o : floating pitch for each subframe Q6*/ ); @@ -6378,7 +6404,12 @@ void stereo_switching_dec( void ivas_mdct_core_whitening_enc_fx( CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ +#ifdef SIMPLIFY_CORE_ENC + Word16 new_samples_fx[CPE_CHANNELS][L_INP], /* i : new samples Q_new[]-1*/ + const Word16 Q_new_orig[CPE_CHANNELS], /* i : Scaling factor of new_samples */ +#else Word16 new_samples_fx[CPE_CHANNELS][L_INP], /* i : new samples */ +#endif Word16 old_wsp_fx[CPE_CHANNELS][L_WSP], /* i : 12.8kHz weighted speech (for LTP */ Word16 pitch_buf[CPE_CHANNELS][NB_SUBFR16k], /* o : floating pitch for each subframe */ Word32 *mdst_spectrum_long[CPE_CHANNELS], /* o : buffer for MDST spectrum */ diff --git a/lib_com/options.h b/lib_com/options.h index 6f0d8e55eecabd7851829ca66e771ef0e0d4ba91..b4ef10a351d4328b1b68bf47eb3cab325bb8f0a7 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -92,6 +92,7 @@ #define HARM_FD_BWE /* VA: harmonize core-coder FD BWE function duplications */ #define FIX_2411_Harmonize_TNSANALYSIS_DETECTTNSFILT /* FhG: basop issue 2411: harmonize TNSAnalysis*_fx(), DetectTnsFilt*_fx()*/ #define HARMONIZE_READ_DECODE_TNS /* FhG basop 2389: Harm between two pairs of functions. */ +#define SIMPLIFY_CORE_ENC /* VA: basop issue 2430: Simplify ivas_core_enc_fx() */ /* #################### End BE switches ################################## */ diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index 49e042e2db19e255f9c8204595d6a34082816bba..13bff9e0e6fc8f629ffe368bec81480a1457bdb8 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -7067,8 +7067,12 @@ void fd_bwe_dec_init( ); void hq_core_enc_ivas_fx( - Encoder_State *st, /* i/o: encoder state structure */ - const Word16 *audio_fx, /* i : input audio signal Q0 */ + Encoder_State *st, /* i/o: encoder state structure */ +#ifdef SIMPLIFY_CORE_ENC + Word16 *audio_fx, /* i : input audio signal q_inp */ +#else + const Word16 *audio_fx, /* i : input audio signal Q0 */ +#endif const Word16 input_frame, /* i : frame length Q0*/ const Word16 hq_core_type, /* i : HQ core type Q0*/ const Word16 Voicing_flag, /* i : Voicing flag for FER method selection Q0*/ diff --git a/lib_enc/hq_core_enc_fx.c b/lib_enc/hq_core_enc_fx.c index 6c4aa6bf4ab9bd8f7398d3d8e6d7807be77d7bf3..fb40edd5a05e79aa121a0e53f81664b9af17f6cb 100644 --- a/lib_enc/hq_core_enc_fx.c +++ b/lib_enc/hq_core_enc_fx.c @@ -271,8 +271,12 @@ void HQ_core_enc_init_fx( } void hq_core_enc_ivas_fx( - Encoder_State *st, /* i/o: encoder state structure */ - const Word16 *audio_fx, /* i : input audio signal Q0 */ + Encoder_State *st, /* i/o: encoder state structure */ +#ifdef SIMPLIFY_CORE_ENC + Word16 *audio_fx, /* i : input audio signal q_inp */ +#else + const Word16 *audio_fx, /* i : input audio signal Q0 */ +#endif const Word16 input_frame_orig, /* i : frame length Q0*/ const Word16 hq_core_type, /* i : HQ core type Q0*/ const Word16 Voicing_flag, /* i : Voicing flag for FER method selection Q0*/ @@ -325,6 +329,22 @@ void hq_core_enc_ivas_fx( st->hTcxCfg->tcx_curr_overlap_mode = ALDO_WINDOW; move16(); +#ifdef SIMPLIFY_CORE_ENC + /*-------------------------------------------------------------------------- + * Scaling of buffers + *--------------------------------------------------------------------------*/ + + Scale_sig( audio_fx, input_frame, negate( st->q_inp ) ); // Q0 + st->q_inp = 0; + move16(); + Scale_sig( st->old_input_signal_fx, input_frame, negate( st->q_old_inp ) ); // Q0 + st->q_old_inp = 0; + move16(); + Scale_sig( st->hTcxEnc->Txnq, L_FRAME32k / 2 + 64, sub( 0, st->hTcxEnc->q_Txnq ) ); // Q0 + st->hTcxEnc->q_Txnq = 0; + move16(); + +#endif /*-------------------------------------------------------------------------- * Preprocessing in the first HQ frame after ACELP frame * Find the number of bits for PVQ coding diff --git a/lib_enc/ivas_core_enc_fx.c b/lib_enc/ivas_core_enc_fx.c index 55956600df3ebf63cd1bf0c056022182efa9ec35..ec7cf080e88defafc82ed62949ed0d9fc0f0f395 100644 --- a/lib_enc/ivas_core_enc_fx.c +++ b/lib_enc/ivas_core_enc_fx.c @@ -73,8 +73,8 @@ ivas_error ivas_core_enc_fx( const Word16 loc_harm[], /* i : harmonicity flag Q0*/ const Word16 cor_map_sum_fx[], /* i : speech/music clasif. parameter Q8*/ const Word16 vad_flag_dtx[], /* i : HE-SAD flag with additional DTX HO Q0*/ - Word32 enerBuffer_fx[][CLDFB_NO_CHANNELS_MAX], /* o : energy buffer enerBuffer_fx_exp*/ - Word16 enerBuffer_fx_exp[], /* o : energy buffer */ + Word32 enerBuffer_fx[][CLDFB_NO_CHANNELS_MAX], /* i : energy buffer enerBuffer_fx_exp*/ + Word16 enerBuffer_fx_exp[], /* i : energy buffer */ Word16 fft_buff_fx[][2 * L_FFT], /* i : FFT buffer Qx*/ const Word16 tdm_SM_or_LRTD_Pri, /* i : channel combination scheme flag Q0*/ const Word16 ivas_format, /* i : IVAS format Q0*/ @@ -104,7 +104,11 @@ ivas_error ivas_core_enc_fx( Word16 diff_nBits; ivas_error error; Word16 max_num_indices_BWE; +#ifdef SIMPLIFY_CORE_ENC + Word16 i, shift; +#else Word16 i, shift, Q_min; +#endif FOR( i = 0; i < CPE_CHANNELS; i++ ) { @@ -198,6 +202,7 @@ ivas_error ivas_core_enc_fx( * Pre-processing, incl. Decision matrix *---------------------------------------------------------------------*/ +#ifndef SIMPLIFY_CORE_ENC IF( st->cldfbAnaEnc ) { Word16 tmp_shift = L_norm_arr( enerBuffer_fx[n], st->cldfbAnaEnc->no_channels ); @@ -212,6 +217,7 @@ ivas_error ivas_core_enc_fx( Scale_sig( fft_buff_fx[n], ( 2 * L_FFT ), -1 ); // To create 1 headroom for addition of magnitude square spectrum // fft_buff_fx_exp = add(fft_buff_fx_exp,1); +#endif FOR( i = 0; i < st->nb_subfr; i++ ) { Scale_sig( &Aw_fx[n][i * ( M + 1 )], M + 1, sub( norm_s( Aw_fx[n][i * ( M + 1 )] ), 2 ) ); // scaling to Q12 @@ -311,16 +317,19 @@ ivas_error ivas_core_enc_fx( test(); IF( ( EQ_16( st->core, TCX_20_CORE ) || EQ_16( st->core, TCX_10_CORE ) ) && NE_16( st->element_mode, IVAS_CPE_MDCT ) ) { + /* TCX core encoder */ +#ifdef SIMPLIFY_CORE_ENC + stereo_tcx_core_enc( st, old_inp_12k8_fx[n] + L_INP_MEM, old_inp_16k_fx[n] + L_INP_MEM, Aw_fx[n], lsp_new_fx[n], lsp_mid_fx[n], pitch_buf_fx[n], last_element_mode, vad_hover_flag[0], Q_new[n] ); +#else TCX_ENC_HANDLE hTcxEnc = st->hTcxEnc; Word16 Q_spec_old, L_spec; - Scale_sig( old_inp_12k8_fx[n], L_INP_12k8, sub( Q1, Q_new[n] ) ); // Q0 - Scale_sig( old_inp_16k_fx[n], L_INP, sub( Q1, Q_new[n] ) ); // Q0 - + Scale_sig( old_inp_12k8_fx[n], L_INP_12k8, sub( Q1, Q_new[n] ) ); // Q0 + Scale_sig( old_inp_16k_fx[n], L_INP, sub( Q1, Q_new[n] ) ); // Q0 Scale_sig( st->hTcxEnc->Txnq, L_FRAME32k / 2 + 64, sub( negate( 1 ), st->hTcxEnc->q_Txnq ) ); /* Q(-1) */ st->hTcxEnc->q_Txnq = -Q1; move16(); - Q_spec_old = hTcxEnc->spectrum_long_e; + Q_spec_old = st->hTcxEnc->spectrum_long_e; move16(); /* TCX core encoder */ @@ -362,11 +371,13 @@ ivas_error ivas_core_enc_fx( move16(); hTcxEnc->spectrum_e[1] = hTcxEnc->spectrum_long_e; move16(); +#endif } IF( EQ_16( st->core, HQ_CORE ) ) { /* HQ core encoder */ +#ifndef SIMPLIFY_CORE_ENC Scale_sig( st->input_fx, input_frame, negate( st->q_inp ) ); // Q0 st->q_inp = 0; move16(); @@ -376,6 +387,7 @@ ivas_error ivas_core_enc_fx( Scale_sig( st->hTcxEnc->Txnq, L_FRAME32k / 2 + 64, sub( 0, st->hTcxEnc->q_Txnq ) ); // Q0 st->hTcxEnc->q_Txnq = 0; move16(); +#endif hq_core_enc_ivas_fx( st, st->input_fx, input_frame, NORMAL_HQ_CORE, Voicing_flag[n], vad_hover_flag[0] ); } @@ -404,6 +416,9 @@ ivas_error ivas_core_enc_fx( { Word16 mdst_spectrum_e[CPE_CHANNELS][NB_DIV]; Word16 orig_spectrum_e[CPE_CHANNELS][NB_DIV]; +#ifdef SIMPLIFY_CORE_ENC + Word16 Q_spec_old[CPE_CHANNELS]; +#else Word16 pitch_buf_fx_new[CPE_CHANNELS][NB_SUBFR16k]; /* Q6 */ FOR( n = 0; n < n_CoreChannels; n++ ) { @@ -416,35 +431,50 @@ ivas_error ivas_core_enc_fx( } } Word16 Q_spec_old[2], L_spec; +#endif Q_spec_old[0] = hCPE->hCoreCoder[0]->hTcxEnc->spectrum_long_e; move16(); Q_spec_old[1] = hCPE->hCoreCoder[1]->hTcxEnc->spectrum_long_e; move16(); +#ifdef SIMPLIFY_CORE_ENC + ivas_mdct_core_whitening_enc_fx( hCPE, old_inp_16k_fx, Q_new, old_wsp_fx, pitch_buf_fx, hMCT->p_mdst_spectrum_long_fx[cpe_id], hMCT->tnsBits[cpe_id], hMCT->p_orig_spectrum_long_fx[cpe_id], + hMCT->tnsSize[cpe_id], hMCT->p_param[cpe_id], hMCT->hBstr, 1, hMCT->nchan_out_woLFE, mdst_spectrum_e, orig_spectrum_e ); +#else ivas_mdct_core_whitening_enc_fx( hCPE, old_inp_16k_fx, old_wsp_fx, pitch_buf_fx_new, hMCT->p_mdst_spectrum_long_fx[cpe_id], hMCT->tnsBits[cpe_id], hMCT->p_orig_spectrum_long_fx[cpe_id], hMCT->tnsSize[cpe_id], hMCT->p_param[cpe_id], hMCT->hBstr, 1, hMCT->nchan_out_woLFE, mdst_spectrum_e, orig_spectrum_e ); +#endif FOR( i = 0; i < CPE_CHANNELS; i++ ) { +#ifndef SIMPLIFY_CORE_ENC st = sts[i]; Word16 nSubframes = NB_DIV; move16(); + Scale_sig( old_inp_16k_fx[i], L_INP, sub( Q_new[i], Q1 ) ); // Q_new[n] - 1 + if ( EQ_16( st->hTcxEnc->tcxMode, TCX_20 ) ) { nSubframes = 1; move16(); } - +#endif hMCT->q_mdst_spectrum_long_fx[cpe_id][i][0] = sub( Q31, mdst_spectrum_e[i][0] ); move16(); +#ifdef SIMPLIFY_CORE_ENC + IF( NE_16( sts[i]->hTcxEnc->tcxMode, TCX_20 ) ) +#else IF( EQ_16( nSubframes, NB_DIV ) ) +#endif { hMCT->q_mdst_spectrum_long_fx[cpe_id][i][1] = sub( Q31, mdst_spectrum_e[i][1] ); move16(); +#ifndef SIMPLIFY_CORE_ENC } IF( EQ_16( nSubframes, NB_DIV ) ) { +#endif Word16 max_e = s_max( orig_spectrum_e[i][0], orig_spectrum_e[i][1] ); scale_sig32( hMCT->p_orig_spectrum_long_fx[cpe_id][i], N_TCX10_MAX, sub( orig_spectrum_e[i][0], max_e ) ); // exp(max_e) scale_sig32( hMCT->p_orig_spectrum_long_fx[cpe_id][i] + N_TCX10_MAX, N_TCX10_MAX, sub( orig_spectrum_e[i][1], max_e ) ); // exp(max_e) @@ -457,6 +487,9 @@ ivas_error ivas_core_enc_fx( move16(); } +#ifdef SIMPLIFY_CORE_ENC + stereo_tcx_enc_scale_buffers( sts[i], CPE_CHANNELS, Q_spec_old[i] ); +#else IF( EQ_16( st->hTcxEnc->tcxMode, TCX_20 ) ) { Word16 e_max = s_max( st->hTcxEnc->spectrum_e[0], Q_spec_old[i] ); @@ -487,10 +520,14 @@ ivas_error ivas_core_enc_fx( move16(); st->hTcxEnc->spectrum_e[1] = st->hTcxEnc->spectrum_long_e; move16(); +#endif } } ELSE { +#ifdef SIMPLIFY_CORE_ENC + stereo_mdct_core_enc_fx( hCPE, old_inp_16k_fx, Q_new, old_wsp_fx, pitch_buf_fx ); +#else FOR( i = 0; i < CPE_CHANNELS; i++ ) { st = sts[i]; @@ -556,6 +593,7 @@ ivas_error ivas_core_enc_fx( st->hTcxEnc->spectrum_e[1] = st->hTcxEnc->spectrum_long_e; move16(); } +#endif } } ELSE IF( EQ_32( sts[0]->core_brate, SID_2k40 ) && EQ_32( sts[1]->core_brate, SID_2k40 ) ) diff --git a/lib_enc/ivas_core_pre_proc_fx.c b/lib_enc/ivas_core_pre_proc_fx.c index 9134f304d468ebe8463f19727c0195d3474d3993..3aacf25538a5a360d19982b01b98b2c64bf0f5d5 100644 --- a/lib_enc/ivas_core_pre_proc_fx.c +++ b/lib_enc/ivas_core_pre_proc_fx.c @@ -72,10 +72,14 @@ void pre_proc_ivas_fx( const Word16 vad_hover_flag, /* i : VAD hangover flag Q0*/ const Word16 flag_16k_smc, /* i : flag to indicate if the OL SMC is run at 16 kHz Q0*/ Word32 enerBuffer_fx[CLDFB_NO_CHANNELS_MAX], /* i : energy buffer e_enerBuffer */ - const Word16 e_enerBuffer, /* i : Q value of energy buffer */ - Word16 fft_buff_fx[2 * L_FFT], /* i : FFT buffer Qx*/ - const Word16 cor_map_sum_fx, /* i : speech/music clasif. parameter Q8*/ - Word16 *Q_new /* i/o: Q factor of speech buffers */ +#ifdef SIMPLIFY_CORE_ENC + Word16 e_enerBuffer, /* i : Q value of energy buffer */ +#else + const Word16 e_enerBuffer, /* i : Q value of energy buffer */ +#endif + Word16 fft_buff_fx[2 * L_FFT], /* i : FFT buffer Qx*/ + const Word16 cor_map_sum_fx, /* i : speech/music clasif. parameter Q8*/ + Word16 *Q_new /* i/o: Q factor of speech buffers */ ) { Word16 L_look, element_mode, lMemRecalc_12k8; @@ -85,6 +89,27 @@ void pre_proc_ivas_fx( push_wmops( "pre_proc" ); +#ifdef SIMPLIFY_CORE_ENC + /*----------------------------------------------------------------* + * Scaling of buffers + *----------------------------------------------------------------*/ + + IF( st->cldfbAnaEnc ) + { + Word16 tmp_shift = L_norm_arr( enerBuffer_fx, st->cldfbAnaEnc->no_channels ); + tmp_shift = sub( tmp_shift, 5 ); + IF( tmp_shift < 0 ) + { + scale_sig32( enerBuffer_fx, st->cldfbAnaEnc->no_channels, tmp_shift ); /* enerBuffer_fx_exp[n] - tmp_shift */ + e_enerBuffer = sub( e_enerBuffer, tmp_shift ); + move16(); + } + } + + Scale_sig( fft_buff_fx, ( 2 * L_FFT ), -1 ); // To create 1 headroom for addition of magnitude square spectrum + // fft_buff_fx_exp = add(fft_buff_fx_exp,1); +#endif + /*----------------------------------------------------------------* * Initialization *----------------------------------------------------------------*/ diff --git a/lib_enc/ivas_mdct_core_enc_fx.c b/lib_enc/ivas_mdct_core_enc_fx.c index 560cd81bd4b542e53725da58e0a8d9c5ecb556d0..91f21624bc6c52e8a098564d2295c1d57ad6b717 100644 --- a/lib_enc/ivas_mdct_core_enc_fx.c +++ b/lib_enc/ivas_mdct_core_enc_fx.c @@ -1116,8 +1116,13 @@ void enc_prm_igf_mdct( *-------------------------------------------------------------------*/ void ivas_mdct_core_whitening_enc_fx( - CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ - Word16 new_samples_fx[CPE_CHANNELS][L_INP], /* i : new samples Q0*/ + CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ +#ifdef SIMPLIFY_CORE_ENC + Word16 new_samples_fx[CPE_CHANNELS][L_INP], /* i : new samples Q_new[]-1*/ + const Word16 Q_new_orig[CPE_CHANNELS], /* i : Scaling factor of new_samples */ +#else + Word16 new_samples_fx[CPE_CHANNELS][L_INP], /* i : new samples Q0*/ +#endif Word16 old_wsp_fx[CPE_CHANNELS][L_WSP], /* i : 12.8kHz weighted speech (for LTP Qx*/ Word16 pitch_buf_fx[CPE_CHANNELS][NB_SUBFR16k], /* o : floating pitch for each subframe Q6*/ Word32 *mdst_spectrum_long_fx[CPE_CHANNELS], /* o : buffer for MDST spectrum mdst_spectrum_e*/ @@ -1183,6 +1188,13 @@ void ivas_mdct_core_whitening_enc_fx( move16(); move16(); +#ifdef SIMPLIFY_CORE_ENC + FOR( i = 0; i < CPE_CHANNELS; i++ ) + { + Scale_sig( new_samples_fx[i], L_INP, sub( Q1, Q_new_orig[i] ) ); // Q0 + } +#endif + /*--------------------------------------------------------------* * Initialization *---------------------------------------------------------------*/ @@ -2470,6 +2482,14 @@ void ivas_mdct_core_whitening_enc_fx( } } + +#ifdef SIMPLIFY_CORE_ENC + FOR( i = 0; i < CPE_CHANNELS; i++ ) + { + Scale_sig( new_samples_fx[i], L_INP, sub( Q_new_orig[i], Q1 ) ); // Q_new[] - 1 + } +#endif + pop_wmops(); return; } diff --git a/lib_enc/ivas_stereo_mdct_core_enc_fx.c b/lib_enc/ivas_stereo_mdct_core_enc_fx.c index 705635d887e22490a1c1272d93d120d28835caec..fe8d6084d9bf95a8bcb5836923a0a95ead73cd23 100644 --- a/lib_enc/ivas_stereo_mdct_core_enc_fx.c +++ b/lib_enc/ivas_stereo_mdct_core_enc_fx.c @@ -126,9 +126,15 @@ static void sync_tcx_mode_fx( * * joint stereo mdct core encoder *-------------------------------------------------------------------*/ + void stereo_mdct_core_enc_fx( - CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ - Word16 new_samples[CPE_CHANNELS][L_INP], /* i : new samples Q0*/ + CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ +#ifdef SIMPLIFY_CORE_ENC + Word16 new_samples[CPE_CHANNELS][L_INP], /* i : new samples Q_new[] - 1*/ + const Word16 Q_new[CPE_CHANNELS], /* i : Scaling factor of new_samples */ +#else + Word16 new_samples[CPE_CHANNELS][L_INP], /* i : new samples Q0*/ +#endif Word16 old_wsp[CPE_CHANNELS][L_WSP], /* i : 12.8kHz weighted speech (for LTP Qx*/ Word16 pitch_buf_fx[CPE_CHANNELS][NB_SUBFR16k] /* o : floating pitch for each subframe Q6*/ ) @@ -164,7 +170,7 @@ void stereo_mdct_core_enc_fx( Word16 mdst_spectrum_e[CPE_CHANNELS][NB_DIV]; Word16 stereo_bits; Word16 meta_bits, signal_bits; - Word16 p_orig_spectrum_e[2]; + Word16 p_orig_spectrum_e[CPE_CHANNELS]; push_wmops( "stereo_mdct_core_enc" ); @@ -195,9 +201,41 @@ void stereo_mdct_core_enc_fx( signal_bits = hBstr->nb_bits_tot; move16(); +#ifdef SIMPLIFY_CORE_ENC + Word16 shift, Q_min, Q_spec_old[2]; + Word16 input_frame = extract_l( Mpy_32_32( sts[0]->input_Fs, ONE_BY_FRAMES_PER_SEC_Q31 ) ); // Q0 + + /*--------------------------------------------------------------------------------* + * Scaling of buffers + *--------------------------------------------------------------------------------*/ + + FOR( i = 0; i < CPE_CHANNELS; i++ ) + { + st = sts[i]; + + shift = norm_arr( st->input_fx, input_frame ); + Q_min = s_max( -2, add( st->q_inp, shift ) ); + scale_sig( st->input_fx, input_frame, sub( Q_min, st->q_inp ) ); + st->q_inp = Q_min; + move16(); + + shift = norm_arr( st->input_fx - input_frame, input_frame ); + Q_min = s_max( -2, add( st->q_old_inp, shift ) ); + scale_sig( st->input_fx - input_frame, input_frame, sub( Q_min, st->q_old_inp ) ); + st->q_old_inp = Q_min; + move16(); + } + + Q_spec_old[0] = sts[0]->hTcxEnc->spectrum_long_e; + move16(); + Q_spec_old[1] = sts[1]->hTcxEnc->spectrum_long_e; + move16(); + +#endif /*--------------------------------------------------------------* * Initialization *---------------------------------------------------------------*/ + Word16 tmp_e = 0; move16(); Word32 L_tmp; @@ -241,9 +279,15 @@ void stereo_mdct_core_enc_fx( hCPE->hStereoMdct->stbParamsTCX20.nBandsStereoCore = hCPE->hStereoMdct->stbParamsTCX20.sfbCnt; move16(); } + +#ifdef SIMPLIFY_CORE_ENC + Word16 q_com = s_min( s_min( add( sts[0]->q_inp, getScaleFactor16( sts[0]->input_fx, add( input_frame, NS2SA( 48000, DELAY_FIR_RESAMPL_NS ) ) ) ), add( sts[0]->q_old_inp, getScaleFactor16( sts[0]->old_input_signal_fx, input_frame ) ) ), + s_min( add( sts[1]->q_inp, getScaleFactor16( sts[1]->input_fx, add( input_frame, NS2SA( 48000, DELAY_FIR_RESAMPL_NS ) ) ) ), add( sts[1]->q_old_inp, getScaleFactor16( sts[1]->old_input_signal_fx, input_frame ) ) ) ); +#else Word16 len = extract_l( Mpy_32_32( sts[0]->input_Fs, ONE_BY_FRAMES_PER_SEC_Q31 ) ); Word16 q_com = s_min( s_min( add( sts[0]->q_inp, getScaleFactor16( sts[0]->input_fx, add( len, NS2SA( 48000, DELAY_FIR_RESAMPL_NS ) ) ) ), add( sts[0]->q_old_inp, getScaleFactor16( sts[0]->old_input_signal_fx, len ) ) ), s_min( add( sts[1]->q_inp, getScaleFactor16( sts[1]->input_fx, add( len, NS2SA( 48000, DELAY_FIR_RESAMPL_NS ) ) ) ), add( sts[1]->q_old_inp, getScaleFactor16( sts[1]->old_input_signal_fx, len ) ) ) ); +#endif q_com = sub( q_com, Q1 ); FOR( ch = 0; ch < CPE_CHANNELS; ch++ ) { @@ -268,8 +312,13 @@ void stereo_mdct_core_enc_fx( sts[ch]->hTcxEnc->tns_ms_flag[1] = 0; move16(); +#ifdef SIMPLIFY_CORE_ENC + scale_sig( sts[ch]->input_fx, add( input_frame, NS2SA( 48000, DELAY_FIR_RESAMPL_NS ) ), sub( q_com, sts[ch]->q_inp ) ); /* q_com */ + scale_sig( sts[ch]->old_input_signal_fx, input_frame, sub( q_com, sts[ch]->q_old_inp ) ); /* q_com */ +#else scale_sig( sts[ch]->input_fx, add( extract_l( Mpy_32_32( sts[ch]->input_Fs, ONE_BY_FRAMES_PER_SEC_Q31 ) ), NS2SA( 48000, DELAY_FIR_RESAMPL_NS ) ), sub( q_com, sts[ch]->q_inp ) ); /* q_com */ scale_sig( sts[ch]->old_input_signal_fx, extract_l( Mpy_32_32( sts[ch]->input_Fs, ONE_BY_FRAMES_PER_SEC_Q31 ) ), sub( q_com, sts[ch]->q_old_inp ) ); /* q_com */ +#endif sts[ch]->q_old_inp = q_com; move16(); sts[ch]->q_inp = q_com; @@ -284,8 +333,13 @@ void stereo_mdct_core_enc_fx( * - TNS *---------------------------------------------------------------*/ +#ifdef SIMPLIFY_CORE_ENC + ivas_mdct_core_whitening_enc_fx( hCPE, new_samples, Q_new, old_wsp, pitch_buf_fx, p_mdst_spectrum_long_fx, + tnsBits, p_orig_spectrum_long_fx, tnsSize, p_param, hBstr, 0, CPE_CHANNELS, mdst_spectrum_e, orig_spectrum_e ); +#else ivas_mdct_core_whitening_enc_fx( hCPE, new_samples, old_wsp, pitch_buf_fx, p_mdst_spectrum_long_fx, tnsBits, p_orig_spectrum_long_fx, tnsSize, p_param, hBstr, 0, CPE_CHANNELS, mdst_spectrum_e, orig_spectrum_e ); +#endif FOR( i = 0; i < CPE_CHANNELS; i++ ) { @@ -337,6 +391,7 @@ void stereo_mdct_core_enc_fx( /*--------------------------------------------------------------* * Stereo Processing *---------------------------------------------------------------*/ + test(); IF( !hStereoMdct->isSBAStereoMode ) { @@ -401,6 +456,7 @@ void stereo_mdct_core_enc_fx( /*--------------------------------------------------------------* * Power spectrum calculation *---------------------------------------------------------------*/ + Word16 length, exp, shift1, shift2, norm; Word32 mdct, mdst, imdct, imdst; @@ -532,6 +588,7 @@ void stereo_mdct_core_enc_fx( { L_subframeTCX = add( L_subframeTCX, shr( L_subframeTCX, 2 ) ); /* Q0 */ } + test(); test(); IF( EQ_16( st->hTcxEnc->tcxMode, TCX_20 ) && ( LT_32( st->element_brate, HQ_96k ) || st->igf ) ) @@ -553,6 +610,7 @@ void stereo_mdct_core_enc_fx( /*--------------------------------------------------------------* * IGF *---------------------------------------------------------------*/ + test(); IF( sts[0]->igf || sts[1]->igf ) { @@ -841,6 +899,17 @@ void stereo_mdct_core_enc_fx( ivas_mdct_quant_coder_fx( hCPE, tnsBits, tnsSize, p_param, 0 ); +#ifdef SIMPLIFY_CORE_ENC + /*--------------------------------------------------------------------------------* + * Scaling of buffers + *--------------------------------------------------------------------------------*/ + + FOR( i = 0; i < CPE_CHANNELS; i++ ) + { + stereo_tcx_enc_scale_buffers( sts[i], CPE_CHANNELS, Q_spec_old[i] ); + } + +#endif pop_wmops(); return; diff --git a/lib_enc/ivas_tcx_core_enc_fx.c b/lib_enc/ivas_tcx_core_enc_fx.c index 20f030c6d1ed0e0abe7e93ea3f6d4696b80cd043..110ef08a387946028d522f60e91f536c78334817 100644 --- a/lib_enc/ivas_tcx_core_enc_fx.c +++ b/lib_enc/ivas_tcx_core_enc_fx.c @@ -151,16 +151,26 @@ void stereo_tcx_init_enc_fx( *-------------------------------------------------------------------*/ void stereo_tcx_core_enc( - Encoder_State *st, /* i/o: encoder state structure */ - const Word16 new_samples_12k8[], /* i : buffer of input signal @12.8 kHz Q_new*/ - const Word16 new_samples_16k[], /* i : buffer of input signal @16 kHz Q_new*/ + Encoder_State *st, /* i/o: encoder state structure */ +#ifdef SIMPLIFY_CORE_ENC + Word16 new_samples_12k8[], /* i : buffer of input signal @12.8 kHz Q_new -1 */ + Word16 new_samples_16k[], /* i : buffer of input signal @16 kHz Q_new -1 */ +#else + const Word16 new_samples_12k8[], /* i : buffer of input signal @12.8 kHz Q_new*/ + const Word16 new_samples_16k[], /* i : buffer of input signal @16 kHz Q_new*/ +#endif const Word16 Aw_fx[], /* i : weighted A(z) unquant. for subframes, Q12 */ Word16 lsp_new_fx[], /* i : LSPs at the end of the frame, Q15 */ Word16 lsp_mid_fx[], /* i : LSPs in the middle of the frame, Q15 */ Word16 pitch_buf_fx[NB_SUBFR16k], /* o : pitch for each subframe, Q6 */ const Word16 last_element_mode, /* i : last element mode, Q0 */ const Word16 vad_hover_flag, /* i : VAD hangover flag, Q0 */ - Word16 Q_new ) +#ifdef SIMPLIFY_CORE_ENC + const Word16 Q_new_orig /* i : Scaling factor of new_samples_xx[] */ +#else + Word16 Q_new +#endif +) { TCX_ENC_HANDLE hTcxEnc; Word16 i, n; @@ -228,7 +238,23 @@ void stereo_tcx_core_enc( hTcxEnc = st->hTcxEnc; Word16 Q_exc; +#ifdef SIMPLIFY_CORE_ENC + Word16 Q_spec_old, Q_new; + + /*--------------------------------------------------------------------------------* + * Scaling of buffers + *--------------------------------------------------------------------------------*/ + + Q_new = Q_new_orig; + move16(); + Scale_sig( st->hTcxEnc->Txnq, L_FRAME32k / 2 + 64, sub( negate( 1 ), st->hTcxEnc->q_Txnq ) ); /* Q(-1) */ + st->hTcxEnc->q_Txnq = -Q1; + move16(); + Q_spec_old = st->hTcxEnc->spectrum_long_e; + move16(); + +#endif /*--------------------------------------------------------------* * Configuration of TCX *---------------------------------------------------------------*/ @@ -300,11 +326,21 @@ void stereo_tcx_core_enc( IF( EQ_16( st->L_frame, L_FRAME ) ) { +#ifdef SIMPLIFY_CORE_ENC + Scale_sig( new_samples_12k8 - L_INP_MEM, L_INP_12k8, sub( Q1, Q_new ) ); /* Q0 */ + p_new_samples = new_samples_12k8; /* Q0 */ +#else p_new_samples = new_samples_12k8; /* Q_new */ +#endif } ELSE { - p_new_samples = new_samples_16k; /* Q_new */ +#ifdef SIMPLIFY_CORE_ENC + Scale_sig( new_samples_16k - L_INP_MEM, L_INP, sub( Q1, Q_new ) ); /* Q0 */ + p_new_samples = new_samples_16k; /* Q0 */ +#else + p_new_samples = new_samples_16k; /* Q_new */ +#endif } /*--------------------------------------------------------------* @@ -668,6 +704,7 @@ void stereo_tcx_core_enc( /*--------------------------------------------------------------------------------* * Encode TCX20/10 parameters *--------------------------------------------------------------------------------*/ + writeTCXparam_fx( st, hBstr, hm_cfg, param_core, nbits_header, nbits_start, add( nbits_lpc[0], nbits_lpc[1] ), NULL, NULL, NULL, -1 ); total_nbbits = sub( hBstr->nb_bits_tot, nbits_start ); @@ -699,10 +736,89 @@ void stereo_tcx_core_enc( move16(); } +#ifdef SIMPLIFY_CORE_ENC + /*--------------------------------------------------------------------------------* + * Scaling of buffers + *--------------------------------------------------------------------------------*/ + + IF( EQ_16( st->L_frame, L_FRAME ) ) + { + Scale_sig( new_samples_12k8 - L_INP_MEM, L_INP_12k8, sub( Q_new_orig, Q1 ) ); /* Q_new - 1 */ + } + ELSE + { + Scale_sig( new_samples_16k - L_INP_MEM, L_INP, sub( Q_new_orig, Q1 ) ); /* Q_new - 1 */ + } + Scale_sig( hTcxEnc->old_out_fx, L_FRAME32k, negate( hTcxEnc->Q_old_out ) ); // scaling back to Q0 + hTcxEnc->Q_old_out = 0; + move16(); + + stereo_tcx_enc_scale_buffers( st, 1, Q_spec_old ); + +#endif pop_wmops(); return; } +#ifdef SIMPLIFY_CORE_ENC + +/*-------------------------------------------------------------------* + * stereo_tcx_enc_scale_buffers() + * + * Scale TCX buffers after the TCX encoding + *-------------------------------------------------------------------*/ + +void stereo_tcx_enc_scale_buffers( + Encoder_State *st, /* i/o: encoder state structure */ + const Word16 n_channels, /* i : number of core channels */ + const Word16 Q_spec_old /* i : Q of old spectrum */ +) +{ + Word16 L_spec, e_max; + TCX_ENC_HANDLE hTcxEnc = st->hTcxEnc; + + IF( EQ_16( hTcxEnc->tcxMode, TCX_20 ) ) + { + L_spec = hTcxEnc->L_frameTCX; + move16(); + e_max = s_max( hTcxEnc->spectrum_e[0], Q_spec_old ); + e_max = sub( e_max, L_norm_arr( hTcxEnc->spectrum_long_fx, N_MAX ) ); + scale_sig32( hTcxEnc->spectrum_fx[0], L_spec, sub( hTcxEnc->spectrum_e[0], e_max ) ); // exp(e_max) + scale_sig32( hTcxEnc->spectrum_long_fx + L_spec, sub( N_MAX, L_spec ), sub( Q_spec_old, e_max ) ); // exp(e_max) + + hTcxEnc->spectrum_long_e = e_max; + move16(); + } + ELSE + { + L_spec = shr( hTcxEnc->L_frameTCX, 1 ); + e_max = s_max( Q_spec_old, s_max( hTcxEnc->spectrum_e[0], hTcxEnc->spectrum_e[1] ) ); + e_max = sub( e_max, L_norm_arr( hTcxEnc->spectrum_long_fx, N_MAX ) ); + scale_sig32( hTcxEnc->spectrum_fx[0], L_spec, sub( hTcxEnc->spectrum_e[0], e_max ) ); // exp(e_max) + scale_sig32( hTcxEnc->spectrum_fx[1], L_spec, sub( hTcxEnc->spectrum_e[1], e_max ) ); // exp(e_max) + scale_sig32( hTcxEnc->spectrum_fx[0] + L_spec, sub( N_TCX10_MAX, L_spec ), sub( Q_spec_old, e_max ) ); // exp(e_max) + scale_sig32( hTcxEnc->spectrum_fx[1] + L_spec, sub( N_MAX - N_TCX10_MAX, L_spec ), sub( Q_spec_old, e_max ) ); // exp(e_max) + + IF( EQ_16( n_channels, 1 ) ) + { + hTcxEnc->spectrum_long_e = s_max( Q_spec_old, s_max( hTcxEnc->spectrum_e[0], hTcxEnc->spectrum_e[1] ) ); + } + ELSE + { + hTcxEnc->spectrum_long_e = e_max; + move16(); + } + } + + hTcxEnc->spectrum_e[0] = hTcxEnc->spectrum_long_e; + move16(); + hTcxEnc->spectrum_e[1] = hTcxEnc->spectrum_long_e; + move16(); + + return; +} + +#endif /*-------------------------------------------------------------------* * ivas_acelp_tcx20_switching()