SIMPLIFY_CORE_ENC (4df11c09) · Commits · SA4 / Audio / IVAS BASOP

lib_com/ivas_prot_fx.h

+34 −3

Original line number	Diff line number	Diff line
		@@ -3026,17 +3026,34 @@ void InternalTCXDecoder_fx(

		void stereo_tcx_core_enc(
		Encoder_State st, / i/o: encoder state structure */
		#ifdef SIMPLIFY_CORE_ENC
		Word16 new_samples_12k8[], /* i : buffer of input signal @12.8 kHz */
		Word16 new_samples_16k[], /* i : buffer of input signal @16 kHz */
		#else
		const Word16 new_samples_12k8[], /* i : buffer of input signal @12.8 kHz */
		const Word16 new_samples_16k[], /* i : buffer of input signal @16 kHz */
		#endif
		const Word16 Aw_fx[], /* i : weighted A(z) unquant. for subframes,Q12*/
		Word16 lsp_new_fx[], /* i : LSPs at the end of the frame, Q15 */
		Word16 lsp_mid_fx[], /* i : LSPs in the middle of the frame, Q15 */
		Word16 pitch_buf_fx[NB_SUBFR16k], /* o : pitch for each subframe, Q6 */
		const Word16 last_element_mode, /* i : last element mode, Q0 */
		const Word16 vad_hover_flag, /* i : VAD hangover flag, Q0 */
		#ifdef SIMPLIFY_CORE_ENC
		const Word16 Q_new_orig /* i : Scaling factor of new_samples_xx[] */
		#else
		Word16 Q_new
		#endif
		);

		#ifdef SIMPLIFY_CORE_ENC
		void stereo_tcx_enc_scale_buffers(
		Encoder_State st, / i/o: encoder state structure */
		const Word16 n_channels, /* i : number of core channels */
		const Word16 Q_spec_old /* i : Q of old spectrum */
		);

		#endif
		Word16 transient_analysis_ivas_fx(
		TRAN_DET_HANDLE hTranDet, /* i : handle transient detection */
		const Word16 cor_map_LT[], /* i : LT correlation map Q_cor_map = Qx */
		@@ -5966,7 +5983,11 @@ void pre_proc_ivas_fx(
		const Word16 vad_hover_flag, /* i : VAD hangover flag Q0*/
		const Word16 flag_16k_smc, /* i : flag to indicate if the OL SMC is run at 16 kHz Q0*/
		Word32 enerBuffer_fx[CLDFB_NO_CHANNELS_MAX], /* i : energy buffer e_enerBuffer */
		#ifdef SIMPLIFY_CORE_ENC
		Word16 e_enerBuffer, /* i : Q value of energy buffer */
		#else
		const Word16 e_enerBuffer, /* i : Q value of energy buffer */
		#endif
		Word16 fft_buff_fx[2 * L_FFT], /* i : FFT buffer Qx*/
		const Word16 cor_map_sum_fx, /* i : speech/music clasif. parameter Q8*/
		Word16 Q_new / i/o: Q factor of speech buffers */
		@@ -6035,8 +6056,8 @@ ivas_error ivas_core_enc_fx(
		const Word16 loc_harm[], /* i : harmonicity flag Q0*/
		const Word16 cor_map_sum_fx[], /* i : speech/music clasif. parameter Q8*/
		const Word16 vad_flag_dtx[], /* i : HE-SAD flag with additional DTX HO Q0*/
		Word32 enerBuffer_fx[][CLDFB_NO_CHANNELS_MAX], /* o : energy buffer enerBuffer_fx_exp*/
		Word16 enerBuffer_fx_exp[], /* o : energy buffer */
		Word32 enerBuffer_fx[][CLDFB_NO_CHANNELS_MAX], /* i : energy buffer enerBuffer_fx_exp*/
		Word16 enerBuffer_fx_exp[], /* i : energy buffer */
		Word16 fft_buff_fx[][2 * L_FFT], /* i : FFT buffer Qx*/
		const Word16 tdm_SM_or_LRTD_Pri, /* i : channel combination scheme flag Q0*/
		const Word16 ivas_format, /* i : IVAS format Q0*/
		@@ -6331,7 +6352,12 @@ UWord16 get_indice_st(

		void stereo_mdct_core_enc_fx(
		CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */
		#ifdef SIMPLIFY_CORE_ENC
		Word16 new_samples[CPE_CHANNELS][L_INP], /* i : new samples Q_new[] - 1*/
		const Word16 Q_new[CPE_CHANNELS], /* i : Scaling factor of new_samples */
		#else
		Word16 new_samples[CPE_CHANNELS][L_INP], /* i : new samples Qnew-1*/
		#endif
		Word16 old_wsp[CPE_CHANNELS][L_WSP], /* i : 12.8kHz weighted speech (for LTP Qx*/
		Word16 pitch_buf_fx[CPE_CHANNELS][NB_SUBFR16k] /* o : floating pitch for each subframe Q6*/
		);
		@@ -6378,7 +6404,12 @@ void stereo_switching_dec(

		void ivas_mdct_core_whitening_enc_fx(
		CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */
		#ifdef SIMPLIFY_CORE_ENC
		Word16 new_samples_fx[CPE_CHANNELS][L_INP], /* i : new samples Q_new[]-1*/
		const Word16 Q_new_orig[CPE_CHANNELS], /* i : Scaling factor of new_samples */
		#else
		Word16 new_samples_fx[CPE_CHANNELS][L_INP], /* i : new samples */
		#endif
		Word16 old_wsp_fx[CPE_CHANNELS][L_WSP], /* i : 12.8kHz weighted speech (for LTP */
		Word16 pitch_buf[CPE_CHANNELS][NB_SUBFR16k], /* o : floating pitch for each subframe */
		Word32 mdst_spectrum_long[CPE_CHANNELS], / o : buffer for MDST spectrum */

lib_com/options.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -92,6 +92,7 @@
		#define HARM_FD_BWE /* VA: harmonize core-coder FD BWE function duplications */
		#define FIX_2411_Harmonize_TNSANALYSIS_DETECTTNSFILT /* FhG: basop issue 2411: harmonize TNSAnalysis_fx(), DetectTnsFilt_fx()*/
		#define HARMONIZE_READ_DECODE_TNS /* FhG basop 2389: Harm between two pairs of functions. */
		#define SIMPLIFY_CORE_ENC /* VA: basop issue 2430: Simplify ivas_core_enc_fx() */

		/* #################### End BE switches ################################## */

lib_com/prot_fx.h

+6 −2

Original line number	Diff line number	Diff line
		@@ -7068,7 +7068,11 @@ void fd_bwe_dec_init(

		void hq_core_enc_ivas_fx(
		Encoder_State st, / i/o: encoder state structure */
		#ifdef SIMPLIFY_CORE_ENC
		Word16 audio_fx, / i : input audio signal q_inp */
		#else
		const Word16 audio_fx, / i : input audio signal Q0 */
		#endif
		const Word16 input_frame, /* i : frame length Q0*/
		const Word16 hq_core_type, /* i : HQ core type Q0*/
		const Word16 Voicing_flag, /* i : Voicing flag for FER method selection Q0*/

lib_enc/hq_core_enc_fx.c

+22 −2

Original line number	Diff line number	Diff line
		@@ -272,7 +272,11 @@ void HQ_core_enc_init_fx(

		void hq_core_enc_ivas_fx(
		Encoder_State st, / i/o: encoder state structure */
		#ifdef SIMPLIFY_CORE_ENC
		Word16 audio_fx, / i : input audio signal q_inp */
		#else
		const Word16 audio_fx, / i : input audio signal Q0 */
		#endif
		const Word16 input_frame_orig, /* i : frame length Q0*/
		const Word16 hq_core_type, /* i : HQ core type Q0*/
		const Word16 Voicing_flag, /* i : Voicing flag for FER method selection Q0*/
		@@ -325,6 +329,22 @@ void hq_core_enc_ivas_fx(
		st->hTcxCfg->tcx_curr_overlap_mode = ALDO_WINDOW;
		move16();

		#ifdef SIMPLIFY_CORE_ENC
		/*--------------------------------------------------------------------------
		* Scaling of buffers
		--------------------------------------------------------------------------/

		Scale_sig( audio_fx, input_frame, negate( st->q_inp ) ); // Q0
		st->q_inp = 0;
		move16();
		Scale_sig( st->old_input_signal_fx, input_frame, negate( st->q_old_inp ) ); // Q0
		st->q_old_inp = 0;
		move16();
		Scale_sig( st->hTcxEnc->Txnq, L_FRAME32k / 2 + 64, sub( 0, st->hTcxEnc->q_Txnq ) ); // Q0
		st->hTcxEnc->q_Txnq = 0;
		move16();

		#endif
		/*--------------------------------------------------------------------------
		* Preprocessing in the first HQ frame after ACELP frame
		* Find the number of bits for PVQ coding

lib_enc/ivas_core_enc_fx.c

+45 −7

Original line number	Diff line number	Diff line
		@@ -73,8 +73,8 @@ ivas_error ivas_core_enc_fx(
		const Word16 loc_harm[], /* i : harmonicity flag Q0*/
		const Word16 cor_map_sum_fx[], /* i : speech/music clasif. parameter Q8*/
		const Word16 vad_flag_dtx[], /* i : HE-SAD flag with additional DTX HO Q0*/
		Word32 enerBuffer_fx[][CLDFB_NO_CHANNELS_MAX], /* o : energy buffer enerBuffer_fx_exp*/
		Word16 enerBuffer_fx_exp[], /* o : energy buffer */
		Word32 enerBuffer_fx[][CLDFB_NO_CHANNELS_MAX], /* i : energy buffer enerBuffer_fx_exp*/
		Word16 enerBuffer_fx_exp[], /* i : energy buffer */
		Word16 fft_buff_fx[][2 * L_FFT], /* i : FFT buffer Qx*/
		const Word16 tdm_SM_or_LRTD_Pri, /* i : channel combination scheme flag Q0*/
		const Word16 ivas_format, /* i : IVAS format Q0*/
		@@ -104,7 +104,11 @@ ivas_error ivas_core_enc_fx(
		Word16 diff_nBits;
		ivas_error error;
		Word16 max_num_indices_BWE;
		#ifdef SIMPLIFY_CORE_ENC
		Word16 i, shift;
		#else
		Word16 i, shift, Q_min;
		#endif

		FOR( i = 0; i < CPE_CHANNELS; i++ )
		{
		@@ -198,6 +202,7 @@ ivas_error ivas_core_enc_fx(
		* Pre-processing, incl. Decision matrix
		---------------------------------------------------------------------/

		#ifndef SIMPLIFY_CORE_ENC
		IF( st->cldfbAnaEnc )
		{
		Word16 tmp_shift = L_norm_arr( enerBuffer_fx[n], st->cldfbAnaEnc->no_channels );
		@@ -212,6 +217,7 @@ ivas_error ivas_core_enc_fx(

		Scale_sig( fft_buff_fx[n], ( 2 * L_FFT ), -1 ); // To create 1 headroom for addition of magnitude square spectrum
		// fft_buff_fx_exp = add(fft_buff_fx_exp,1);
		#endif
		FOR( i = 0; i < st->nb_subfr; i++ )
		{
		Scale_sig( &Aw_fx[n][i * ( M + 1 )], M + 1, sub( norm_s( Aw_fx[n][i * ( M + 1 )] ), 2 ) ); // scaling to Q12
		@@ -311,16 +317,19 @@ ivas_error ivas_core_enc_fx(
		test();
		IF( ( EQ_16( st->core, TCX_20_CORE ) \|\| EQ_16( st->core, TCX_10_CORE ) ) && NE_16( st->element_mode, IVAS_CPE_MDCT ) )
		{
		/* TCX core encoder */
		#ifdef SIMPLIFY_CORE_ENC
		stereo_tcx_core_enc( st, old_inp_12k8_fx[n] + L_INP_MEM, old_inp_16k_fx[n] + L_INP_MEM, Aw_fx[n], lsp_new_fx[n], lsp_mid_fx[n], pitch_buf_fx[n], last_element_mode, vad_hover_flag[0], Q_new[n] );
		#else
		TCX_ENC_HANDLE hTcxEnc = st->hTcxEnc;
		Word16 Q_spec_old, L_spec;

		Scale_sig( old_inp_12k8_fx[n], L_INP_12k8, sub( Q1, Q_new[n] ) ); // Q0
		Scale_sig( old_inp_16k_fx[n], L_INP, sub( Q1, Q_new[n] ) ); // Q0

		Scale_sig( st->hTcxEnc->Txnq, L_FRAME32k / 2 + 64, sub( negate( 1 ), st->hTcxEnc->q_Txnq ) ); /* Q(-1) */
		st->hTcxEnc->q_Txnq = -Q1;
		move16();
		Q_spec_old = hTcxEnc->spectrum_long_e;
		Q_spec_old = st->hTcxEnc->spectrum_long_e;
		move16();

		/* TCX core encoder */
		@@ -362,11 +371,13 @@ ivas_error ivas_core_enc_fx(
		move16();
		hTcxEnc->spectrum_e[1] = hTcxEnc->spectrum_long_e;
		move16();
		#endif
		}

		IF( EQ_16( st->core, HQ_CORE ) )
		{
		/* HQ core encoder */
		#ifndef SIMPLIFY_CORE_ENC
		Scale_sig( st->input_fx, input_frame, negate( st->q_inp ) ); // Q0
		st->q_inp = 0;
		move16();
		@@ -376,6 +387,7 @@ ivas_error ivas_core_enc_fx(
		Scale_sig( st->hTcxEnc->Txnq, L_FRAME32k / 2 + 64, sub( 0, st->hTcxEnc->q_Txnq ) ); // Q0
		st->hTcxEnc->q_Txnq = 0;
		move16();
		#endif
		hq_core_enc_ivas_fx( st, st->input_fx, input_frame, NORMAL_HQ_CORE, Voicing_flag[n], vad_hover_flag[0] );
		}

		@@ -404,6 +416,9 @@ ivas_error ivas_core_enc_fx(
		{
		Word16 mdst_spectrum_e[CPE_CHANNELS][NB_DIV];
		Word16 orig_spectrum_e[CPE_CHANNELS][NB_DIV];
		#ifdef SIMPLIFY_CORE_ENC
		Word16 Q_spec_old[CPE_CHANNELS];
		#else
		Word16 pitch_buf_fx_new[CPE_CHANNELS][NB_SUBFR16k]; /* Q6 */
		FOR( n = 0; n < n_CoreChannels; n++ )
		{
		@@ -416,35 +431,50 @@ ivas_error ivas_core_enc_fx(
		}
		}
		Word16 Q_spec_old[2], L_spec;
		#endif
		Q_spec_old[0] = hCPE->hCoreCoder[0]->hTcxEnc->spectrum_long_e;
		move16();
		Q_spec_old[1] = hCPE->hCoreCoder[1]->hTcxEnc->spectrum_long_e;
		move16();

		#ifdef SIMPLIFY_CORE_ENC
		ivas_mdct_core_whitening_enc_fx( hCPE, old_inp_16k_fx, Q_new, old_wsp_fx, pitch_buf_fx, hMCT->p_mdst_spectrum_long_fx[cpe_id], hMCT->tnsBits[cpe_id], hMCT->p_orig_spectrum_long_fx[cpe_id],
		hMCT->tnsSize[cpe_id], hMCT->p_param[cpe_id], hMCT->hBstr, 1, hMCT->nchan_out_woLFE, mdst_spectrum_e, orig_spectrum_e );
		#else
		ivas_mdct_core_whitening_enc_fx( hCPE, old_inp_16k_fx, old_wsp_fx, pitch_buf_fx_new, hMCT->p_mdst_spectrum_long_fx[cpe_id], hMCT->tnsBits[cpe_id], hMCT->p_orig_spectrum_long_fx[cpe_id],
		hMCT->tnsSize[cpe_id], hMCT->p_param[cpe_id], hMCT->hBstr, 1, hMCT->nchan_out_woLFE, mdst_spectrum_e, orig_spectrum_e );
		#endif

		FOR( i = 0; i < CPE_CHANNELS; i++ )
		{
		#ifndef SIMPLIFY_CORE_ENC
		st = sts[i];
		Word16 nSubframes = NB_DIV;
		move16();

		Scale_sig( old_inp_16k_fx[i], L_INP, sub( Q_new[i], Q1 ) ); // Q_new[n] - 1

		if ( EQ_16( st->hTcxEnc->tcxMode, TCX_20 ) )
		{
		nSubframes = 1;
		move16();
		}

		#endif
		hMCT->q_mdst_spectrum_long_fx[cpe_id][i][0] = sub( Q31, mdst_spectrum_e[i][0] );
		move16();
		#ifdef SIMPLIFY_CORE_ENC
		IF( NE_16( sts[i]->hTcxEnc->tcxMode, TCX_20 ) )
		#else
		IF( EQ_16( nSubframes, NB_DIV ) )
		#endif
		{
		hMCT->q_mdst_spectrum_long_fx[cpe_id][i][1] = sub( Q31, mdst_spectrum_e[i][1] );
		move16();
		#ifndef SIMPLIFY_CORE_ENC
		}
		IF( EQ_16( nSubframes, NB_DIV ) )
		{
		#endif
		Word16 max_e = s_max( orig_spectrum_e[i][0], orig_spectrum_e[i][1] );
		scale_sig32( hMCT->p_orig_spectrum_long_fx[cpe_id][i], N_TCX10_MAX, sub( orig_spectrum_e[i][0], max_e ) ); // exp(max_e)
		scale_sig32( hMCT->p_orig_spectrum_long_fx[cpe_id][i] + N_TCX10_MAX, N_TCX10_MAX, sub( orig_spectrum_e[i][1], max_e ) ); // exp(max_e)
		@@ -457,6 +487,9 @@ ivas_error ivas_core_enc_fx(
		move16();
		}

		#ifdef SIMPLIFY_CORE_ENC
		stereo_tcx_enc_scale_buffers( sts[i], CPE_CHANNELS, Q_spec_old[i] );
		#else
		IF( EQ_16( st->hTcxEnc->tcxMode, TCX_20 ) )
		{
		Word16 e_max = s_max( st->hTcxEnc->spectrum_e[0], Q_spec_old[i] );
		@@ -487,10 +520,14 @@ ivas_error ivas_core_enc_fx(
		move16();
		st->hTcxEnc->spectrum_e[1] = st->hTcxEnc->spectrum_long_e;
		move16();
		#endif
		}
		}
		ELSE
		{
		#ifdef SIMPLIFY_CORE_ENC
		stereo_mdct_core_enc_fx( hCPE, old_inp_16k_fx, Q_new, old_wsp_fx, pitch_buf_fx );
		#else
		FOR( i = 0; i < CPE_CHANNELS; i++ )
		{
		st = sts[i];
		@@ -556,6 +593,7 @@ ivas_error ivas_core_enc_fx(
		st->hTcxEnc->spectrum_e[1] = st->hTcxEnc->spectrum_long_e;
		move16();
		}
		#endif
		}
		}
		ELSE IF( EQ_32( sts[0]->core_brate, SID_2k40 ) && EQ_32( sts[1]->core_brate, SID_2k40 ) )