Merge branch 'enc_cleanup_3' into 'main' (6c27a4cf) · Commits · SA4 / Audio / IVAS BASOP

lib_com/ivas_prot.h

+16 −10

Original line number	Diff line number	Diff line
		@@ -240,7 +240,8 @@ ivas_error pre_proc_front_ivas_fx(
		Word16 relE_fx, / o : frame relative energy Q8 */
		Word16 A_fx[NB_SUBFR16k * ( M + 1 )], /* o : A(z) unquantized for the 4 subframes */
		Word16 Aw_fx[NB_SUBFR16k * ( M + 1 )], /* o : weighted A(z) unquantized for subframes */
		float epsP[M + 1], /* o : LP prediction errors */
		Word32 epsP_fx[M + 1], /* o : LP prediction errors */
		Word16 *epsP_fx_q,
		Word16 lsp_new_fx[M], /* o : LSPs at the end of the frame Q15 */
		Word16 lsp_mid_fx[M], /* o : LSPs in the middle of the frame Q15 */
		Word16 vad_hover_flag, / o : VAD hangover flag */
		@@ -262,11 +263,14 @@ ivas_error pre_proc_front_ivas_fx(
		const Word16 tdm_lsp_new_PCh_fx[M], /* i : unq. LSPs of primary channel Q15 */
		const Word16 currFlatness_fx, /* i : flatness parameter Q7 */
		const Word16 tdm_ratio_idx, /* i : Current Ratio_L index */
		float fr_bands_LR[][2 * NB_BANDS], /* i : energy in frequency bands */
		Word32 fr_bands_LR_fx[][2 * NB_BANDS], /* i : energy in frequency bands (fr_bands_LR_fx_q) */
		Word16 fr_bands_LR_fx_q,
		const Word16 Etot_LR_fx[], /* i : total energy Left & Right channel Q8*/
		float lf_E_LR[][2 * VOIC_BINS], /* i : per bin spectrum energy in lf, LR channels */
		Word32 lf_E_LR_fx[][2 * VOIC_BINS], /* i : per bin spectrum energy in lf, LR channels (lf_E_LR_fx_q) */
		Word16 lf_E_LR_fx_q,
		const Word16 localVAD_HE_SAD_LR[], /* i : HE-SAD flag without hangover, LR channels */
		float band_energies_LR[2 * NB_BANDS], /* o : energy in critical bands without minimum noise floor E_MIN */
		Word32 band_energies_LR_fx[2 * NB_BANDS], /* o : energy in critical bands without minimum noise floor E_MIN (band_energies_LR_fx_q) */
		Word16 band_energies_LR_fx_q,
		const Word16 flag_16k_smc, /* i : flag to indicate if the OL SMC is run at 16 kHz */
		const Word16 front_vad_flag, /* i : front-VAD flag to overwrite VAD decision */
		const Word16 force_front_vad, /* i : flag to force VAD decision */
		@@ -289,7 +293,8 @@ ivas_error pre_proc_ivas(
		Word32 ener_fx, / o : residual energy from Levinson-Durbin Q6 */
		Word16 A[NB_SUBFR16k * ( M + 1 )], /* i/o: A(z) unquantized for the 4 subframes */
		Word16 Aw[NB_SUBFR16k * ( M + 1 )], /* i/o: weighted A(z) unquantized for subframes */
		float epsP[M + 1], /* i/o: LP prediction errors */
		Word32 epsP_fx[M + 1], /* i : LP prediction errors epsP_fx_q */
		Word16 epsP_fx_q, / i : LP prediction errors */
		Word16 lsp_new[M], /* i/o: LSPs at the end of the frame */
		Word16 lsp_mid[M], /* i/o: LSPs in the middle of the frame */
		Word16 new_inp_resamp16k_fx, / o : new input signal @16kHz, non pre-emphasised, used by the WB TBE/BWE */
		@@ -529,14 +534,15 @@ ivas_error ivas_core_enc(
		Word32 ener_fx[], /* i : residual energy from Levinson-Durbin Q6*/
		Word16 A_fx[][NB_SUBFR16k * ( M + 1 )], /* i : A(z) unquantized for the 4 subframes */
		Word16 Aw_fx[][NB_SUBFR16k * ( M + 1 )], /* i : weighted A(z) unquantized for subframes*/
		float epsP[][M + 1], /* i : LP prediction errors */
		Word32 epsP_fx[][M + 1], /* i : LP prediction errors epsP_fx_q */
		Word16 epsP_fx_q[], /* i : LP prediction errors */
		Word16 lsp_new[][M], /* i : LSPs at the end of the frame Q15 */
		Word16 lsp_mid[][M], /* i : LSPs in the middle of the frame Q15 */
		const int16_t vad_hover_flag[], /* i : VAD hanglover flag */
		int16_t attack_flag[], /* i : attack flag (GSC or TC) */
		Word32 realBuffer_fx[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i/o: real buffer */
		Word32 imagBuffer_fx[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i/o: imag buffer */
		Word16 q_re_im_buf,
		Word16 *q_re_im_buf,
		float old_wsp[][L_WSP], /* i : weighted input signal buffer */
		const int16_t loc_harm[], /* i : harmonicity flag */
		const Word16 cor_map_sum[], /* i : speech/music clasif. parameter Q8 */
		@@ -2494,10 +2500,10 @@ void stereo_mdct_core_enc(
		float pitch_buf[CPE_CHANNELS][NB_SUBFR16k] /* o : floating pitch for each subframe */
		);
		#else
		void stereo_mdct_core_enc(
		void stereo_mdct_core_enc_fx(
		CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */
		float new_samples[CPE_CHANNELS][L_INP], /* i : new samples */
		float old_wsp[CPE_CHANNELS][L_WSP], /* i : 12.8kHz weighted speech (for LTP */
		Word16 new_samples[CPE_CHANNELS][L_INP], /* i : new samples */
		Word16 old_wsp[CPE_CHANNELS][L_WSP], /* i : 12.8kHz weighted speech (for LTP */
		Word16 pitch_buf_fx[CPE_CHANNELS][NB_SUBFR16k] /* o : floating pitch for each subframe */
		);
		#endif

lib_com/ivas_prot_fx.h

+1 −1

Original line number	Diff line number	Diff line
		@@ -4495,7 +4495,7 @@ void EstimateStereoTCXNoiseLevel_fx(
		Encoder_State *sts, / i : state handle */
		Word32 q_spectrum[CPE_CHANNELS][NB_DIV], / i : quantized MDCT spectrum */
		Word16 gain_tcx[][NB_DIV], /* i : global gain */
		Word16 gain_tcx_e, /* i : global gain exponent */
		Word16 gain_tcx_e[][NB_DIV], /* i : global gain exponent */
		Word16 L_frame[][NB_DIV], /* i : frame length */
		Word16 noiseFillingBorder[][NB_DIV], /* i : noise filling border */
		Word16 hm_active[][NB_DIV], /* i : flag indicating if the harmonic model is active */

lib_enc/analy_sp_fx.c

+21 −20

Original line number	Diff line number	Diff line
		@@ -216,7 +216,7 @@ static void find_enr_dft_fx(
		Word32 Bin_E_fx, / o : Per bin energy (Q7) */
		Word32 band_ener_fx, / o : per band energy without E_MIN (Qout) */
		Word16 Q_inp_dmx,
		Word16 Qout )
		Word16 *Qout )
		{
		Word16 i, cnt;
		Word32 tmp_fx;
		@@ -308,7 +308,7 @@ static void find_enr_dft_fx(
		ngmult = W_shl( ngmult, ngmult_exp ); // Q31 + Q31 - gaurded_bits + ngmult_exp
		BinE_fx[bin_cnt] = Mpy_32_32( W_extract_h( ngmult ), BinE_fx[bin_cnt] ); //(Q31 + Q31 - gaurded_bits + ngmult_exp) - Q32 + (2 * Q_inp_dmx + te_exp - 32) - 31
		move32();
		BinE_fx[bin_cnt] = L_shl( BinE_fx[bin_cnt], sub( Qout, sub( sub( add( add( shl( Q_inp_dmx, 1 ), te_exp ), ngmult_exp ), 34 ), guarded_bits ) ) );
		BinE_fx[bin_cnt] = L_shl( BinE_fx[bin_cnt], sub( *Qout, sub( sub( add( add( shl( Q_inp_dmx, 1 ), te_exp ), ngmult_exp ), 34 ), guarded_bits ) ) );
		move32();
		band_fx[i] = L_add( BinE_fx[bin_cnt], band_fx[i] );
		move32();
		@@ -324,9 +324,9 @@ static void find_enr_dft_fx(
		move32();
		band_ener_fx[i] = band_fx[i]; /* per band energy without E_MIN */
		move32();
		if ( LT_32( band_fx[i], L_shl( E_MIN_Q11_FX, sub( Qout, Q11 ) ) ) )
		IF( LT_32( band_fx[i], L_shl( E_MIN_Q11_FX, sub( *Qout, Q11 ) ) ) )
		{
		band_fx[i] = L_shl( E_MIN_Q11_FX, sub( Qout, Q11 ) );
		band_fx[i] = L_shl( E_MIN_Q11_FX, sub( *Qout, Q11 ) );
		move32();
		}
		}
		@@ -370,7 +370,7 @@ static void find_enr_dft_fx(
		ngmult = W_shl( ngmult, ngmult_exp ); // Q31 + Q31 - gaurded_bits + ngmult_exp
		BinE_fx[bin_cnt] = Mpy_32_32( W_extract_h( ngmult ), BinE_fx[bin_cnt] ); //(Q31 + Q31 - gaurded_bits + ngmult_exp) - Q32 + (2 * Q_inp_dmx + te_exp - 32) - 31
		move32();
		BinE_fx[bin_cnt] = L_shl( BinE_fx[bin_cnt], sub( Qout, sub( sub( add( add( shl( Q_inp_dmx, 1 ), te_exp ), ngmult_exp ), 34 ), guarded_bits ) ) );
		BinE_fx[bin_cnt] = L_shl( BinE_fx[bin_cnt], sub( *Qout, sub( sub( add( add( shl( Q_inp_dmx, 1 ), te_exp ), ngmult_exp ), 34 ), guarded_bits ) ) );
		move32();
		band_fx[i] = L_add( BinE_fx[bin_cnt], band_fx[i] );
		move32();
		@@ -385,9 +385,9 @@ static void find_enr_dft_fx(
		move32();
		band_ener_fx[i] = band_fx[i];
		move32();
		if ( LT_32( band_fx[i], L_shl( E_MIN_Q11_FX, sub( Qout, Q11 ) ) ) )
		IF( LT_32( band_fx[i], L_shl( E_MIN_Q11_FX, sub( *Qout, Q11 ) ) ) )
		{
		band_fx[i] = L_shl( E_MIN_Q11_FX, sub( Qout, Q11 ) );
		band_fx[i] = L_shl( E_MIN_Q11_FX, sub( *Qout, Q11 ) );
		move32();
		}
		}
		@@ -397,7 +397,7 @@ static void find_enr_dft_fx(
		assert( bin_cnt == ( STEREO_DFT_N_12k8_ENC / 2 - 1 ) );
		BinE_fx[STEREO_DFT_N_12k8_ENC / 2 - 1] = BinE_fx[STEREO_DFT_N_12k8_ENC / 2 - 2];
		move32();
		L_lerp_fx( BinE_fx, Bin_E_fx, L_FFT / 2, STEREO_DFT_N_12k8_ENC / 2, &Qout );
		L_lerp_fx( BinE_fx, Bin_E_fx, L_FFT / 2, STEREO_DFT_N_12k8_ENC / 2, Qout );
		MVR2R_WORD32( Bin_E_fx, ptE_fx, VOIC_BINS ); // Qout

		/* find the total log energy */
		@@ -518,10 +518,8 @@ void ivas_analy_sp_fx(
		}
		ELSE
		{
		Word16 Q_inp_dmx = Q_factor_arrL( hCPE->hStereoDft->DFT[0], STEREO_DFT_N_MAX_ENC );
		floatToFixed_arrL( hCPE->hStereoDft->DFT[0], hCPE->hStereoDft->DFT_fx[0], Q_inp_dmx, STEREO_DFT_N_MAX_ENC );
		Word16 Qout = add( Q_new, QSCALE - 2 );
		find_enr_dft_fx( hCPE, input_Fs, hCPE->hStereoDft->DFT_fx[0], pt_bands, lf_E, &LEtot, min_band, max_band, Bin_E, band_energies, Q_inp_dmx, Qout );
		find_enr_dft_fx( hCPE, input_Fs, hCPE->hStereoDft->DFT_fx[0], pt_bands, lf_E, &LEtot, min_band, max_band, Bin_E, band_energies, sub( Q31, hCPE->hStereoDft->DFT_fx_e[0] ), &Qout );
		MVR2R_WORD32( lf_E, lf_E + VOIC_BINS, VOIC_BINS );
		MVR2R_WORD32( Bin_E, Bin_E + ( L_FFT / 2 ), L_FFT / 2 );
		MVR2R_WORD32( band_energies, band_energies + NB_BANDS, NB_BANDS );
		@@ -583,10 +581,10 @@ void ivas_analy_sp_fx_front(
		Word16 Scale_fac[2], /* o : FFT scales factors (2 values by frame) Q0 */
		Word32 Bin_E, / o : per-bin energy spectrum Q7 */
		Word32 Bin_E_old, / o : per-bin energy spectrum of the previous frame Q7 */
		Word32 PS, / o : per-bin energy spectrum Q_new + QSCALE */
		Word32 PS, / o : per-bin energy spectrum Q_new + QSCALE - 2 */
		Word16 EspecdB, / o : per-bin log energy spectrum (with f=0) Q7 */
		Word32 band_energies, / o : energy in critical frequency bands without minimum noise floor MODE2_E_MIN (Q_new + QSCALE + 2)*/
		Word16 fft_buff / o : FFT coefficients (Q_new + QSCALE + 2) */
		Word32 band_energies, / o : energy in critical frequency bands without minimum noise floor MODE2_E_MIN (Q_new + QSCALE)*/
		Word16 fft_buff / o : FFT coefficients (Q_new + Scale_fac[i_subfr]) */
		)
		{
		Word16 *pt;
		@@ -651,7 +649,9 @@ void ivas_analy_sp_fx_front(
		}
		ELSE
		{
		find_enr_dft_fx( hCPE, input_Fs, hCPE->hStereoDft->DFT_fx[0], pt_bands, lf_E, &LEtot, min_band, max_band, Bin_E, band_energies, sub( Q31, hCPE->hStereoDft->DFT_fx_e[0] ), add( Q_new, QSCALE - 2 ) );
		Word16 Qout = add( Q_new, QSCALE - 2 );
		find_enr_dft_fx( hCPE, input_Fs, hCPE->hStereoDft->DFT_fx[0], pt_bands, lf_E, &LEtot, min_band, max_band, Bin_E, band_energies, sub( Q31, hCPE->hStereoDft->DFT_fx_e[0] ), &Qout );
		scale_sig32( lf_E, VOIC_BINS, sub( add( Q_new, QSCALE - 2 ), Qout ) );
		MVR2R_WORD32( lf_E, lf_E + VOIC_BINS, VOIC_BINS );
		MVR2R_WORD32( Bin_E, Bin_E + ( L_FFT / 2 ), L_FFT / 2 );
		MVR2R_WORD32( band_energies, band_energies + NB_BANDS, NB_BANDS );
		@@ -668,7 +668,8 @@ void ivas_analy_sp_fx_front(
		}
		ELSE
		{
		temp32_log = BASOP_Util_Log10( L_shr( LEtot, 1 ), sub( Q31, add( Q_new, QSCALE - Q2 ) ) ); // Q25
		/* log10( LEtot / 2 ) = log10( LEtot ) - log10( 2 ) */
		temp32_log = L_sub( BASOP_Util_Log10( LEtot, sub( Q31, add( Q_new, QSCALE - Q2 ) ) ), LOG10_2_Q31 >> Q6 ); // Q25
		temp32_log = Mpy_32_32( temp32_log, 1342177280 /* 10.f in Q27 */ ); // (Q25, Q27) -> Q21
		*Etot = extract_l( L_shr( temp32_log, Q21 - Q8 ) ); // Q8
		move16();
		@@ -1050,7 +1051,7 @@ static void ivas_find_enr1(
		band[i] = L_shl( Ltmp, Q2 ); // Q_new + QSCALE
		move32();

		band_energies[i] = L_shl( band[i], Q2 ); /* per band energy without E_MIN */ // Q_new + QSCALE + 2
		band_energies[i] = band[i]; /* per band energy without E_MIN */ // Q_new + QSCALE
		move32();

		if ( LT_32( band[i], e_min ) ) // Q_new + QSCALE
		@@ -1098,7 +1099,7 @@ static void ivas_find_enr1(
		band[i] = L_shl( Ltmp, Q2 ); // Q_new + QSCALE
		move32();

		band_energies[i] = L_shl( band[i], Q2 ); /* per band energy without E_MIN */ // Q_new + QSCALE + 2
		band_energies[i] = band[i]; /* per band energy without E_MIN */ // Q_new + QSCALE
		move32();

		if ( LT_32( band[i], e_min ) ) // Q_new + QSCALE

lib_enc/cod_tcx_fx.c

+2 −2

Original line number	Diff line number	Diff line
		@@ -1714,7 +1714,7 @@ void EstimateStereoTCXNoiseLevel_fx(
		Encoder_State *sts, / i : state handle */
		Word32 q_spectrum[CPE_CHANNELS][NB_DIV], / i : quantized MDCT spectrum */
		Word16 gain_tcx[][NB_DIV], /* i : global gain */
		Word16 gain_tcx_e, /* i : global gain exponent */
		Word16 gain_tcx_e[][NB_DIV], /* i : global gain exponent */
		Word16 L_frame[][NB_DIV], /* i : frame length */
		Word16 noiseFillingBorder[][NB_DIV], /* i : noise filling border */
		Word16 hm_active[][NB_DIV], /* i : flag indicating if the harmonic model is active */
		@@ -1821,7 +1821,7 @@ void EstimateStereoTCXNoiseLevel_fx(
		hTcxEnc->spectrum_e[n] = add( hTcxEnc->spectrum_e[n], Q1 );
		move16();
		}
		tcx_noise_factor_ivas_fx( hTcxEnc->spectrum_fx[n], hTcxEnc->spectrum_e[n], combined_q_spectrum, iStart, maxNfCalcBw, noiseTransWidth, L_frame[ch][n], gain_tcx[ch][n], gain_tcx_e, hTcxEnc->noiseTiltFactor, &fac_ns[ch][n], fac_ns_q, st->element_mode );
		tcx_noise_factor_ivas_fx( hTcxEnc->spectrum_fx[n], hTcxEnc->spectrum_e[n], combined_q_spectrum, iStart, maxNfCalcBw, noiseTransWidth, L_frame[ch][n], gain_tcx[ch][n], gain_tcx_e[ch][n], hTcxEnc->noiseTiltFactor, &fac_ns[ch][n], fac_ns_q, st->element_mode );

		/* hysteresis for very tonal passages (more stationary noise filling level) */
		IF( EQ_16( *fac_ns_q, 1 ) )

lib_enc/core_enc_init.c

+9 −6

Original line number	Diff line number	Diff line
		@@ -826,7 +826,7 @@ static void init_sig_buffers(
		set_zero( st->buf_wspeech_enc_flt, L_FRAME16k + L_SUBFR + L_FRAME16k + L_NEXT_MAX_16k + 320 );
		if ( hTcxEnc != NULL )
		{
		set_zero( hTcxEnc->buf_speech_ltp_flt, L_PAST_MAX_32k + L_FRAME32k + L_NEXT_MAX_32k );
		// set_zero( hTcxEnc->buf_speech_ltp_flt, L_PAST_MAX_32k + L_FRAME32k + L_NEXT_MAX_32k );
		}
		}
		else if ( st->L_frame != L_frame_old && !( ( total_brate >= ACELP_16k40 && total_brate <= ACELP_24k40 ) &&
		@@ -836,7 +836,9 @@ static void init_sig_buffers(

		if ( ( st->last_core != TCX_20_CORE ) && ( st->last_core != TCX_10_CORE ) )
		{
		mvr2r( st->buf_speech_enc_flt, hTcxEnc->buf_speech_ltp_flt, st->L_frame );
		#ifdef IVAS_FLOAT_FIXED
		floatToFixed_arr( st->buf_speech_enc_flt, st->buf_speech_enc, 0, st->L_frame );
		#endif
		}

		mvr2r( st->old_wsp, st->buf_wspeech_enc_flt + st->L_frame + L_SUBFR - L_WSP_MEM, L_WSP_MEM );
		@@ -875,7 +877,7 @@ static void init_sig_buffers(

		if ( hTcxEnc != NULL )
		{
		hTcxEnc->new_speech_ltp_flt = hTcxEnc->buf_speech_ltp_flt + st->encoderPastSamples_enc + st->encoderLookahead_enc;
		// hTcxEnc->new_speech_ltp_flt = hTcxEnc->buf_speech_ltp_flt + st->encoderPastSamples_enc + st->encoderLookahead_enc;
		}

		if ( st->hTcxEnc != NULL )
		@@ -889,7 +891,7 @@ static void init_sig_buffers(

		if ( hTcxEnc != NULL )
		{
		hTcxEnc->speech_ltp_flt = hTcxEnc->buf_speech_ltp_flt + st->encoderPastSamples_enc;
		// hTcxEnc->speech_ltp_flt = hTcxEnc->buf_speech_ltp_flt + st->encoderPastSamples_enc;
		}

		if ( st->element_mode > EVS_MONO )
		@@ -900,12 +902,13 @@ static void init_sig_buffers(
		{
		st->wspeech_enc_flt = st->buf_wspeech_enc_flt + st->L_frame + L_subfr;
		}

		#ifndef IVAS_FLOAT_FIXED
		if ( st->ini_frame == 0 \|\| st->L_frame != L_frame_old \|\| st->last_codec_mode == MODE1 )
		{
		set_zero( st->buf_synth_flt, OLD_SYNTH_SIZE_ENC + L_FRAME32k );
		}
		st->synth_flt = st->buf_synth_flt + st->L_frame + L_subfr;
		#endif

		return;
		}
		@@ -1299,7 +1302,7 @@ static void init_acelp(
		st->hTcxCfg->tcx_curr_overlap_mode = ALDO_WINDOW;

		/ALDO overlap windowed past: also used in MODE1 but for other MDCT-FB/
		set_f( st->hTcxEnc->old_out, 0, st->L_frame );
		// set_f( st->hTcxEnc->old_out, 0, st->L_frame );
		}
		else
		{