Merge branch 'encoder_cleanup_LTV_crash_fixes' into 'main' (a4a9ebfe) · Commits · SA4 / Audio / IVAS BASOP

lib_com/int_lsp.c

+1 −0

Original line number	Diff line number	Diff line
		@@ -344,6 +344,7 @@ void int_lsp4_ivas_fx(
		}
		ELSE IF( EQ_16( relax_prev_lsf_interp, -2 ) )
		{
		// This condition not present in int_lsp4_fx
		pt_int_coeffs = interpol_frac2_mid_fx; /Q15/
		}
		ELSE

lib_com/ivas_prot.h

+46 −47

Original line number	Diff line number	Diff line
		@@ -229,49 +229,50 @@ ivas_error pre_proc_front_ivas(
		ivas_error pre_proc_front_ivas_fx(
		SCE_ENC_HANDLE hSCE, /* i/o: SCE encoder structure */
		CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */
		const int32_t element_brate, /* i : SCE/CPE element bitrate */
		const int16_t nb_bits_metadata, /* i : number of metadata bits */
		const int16_t input_frame, /* i : frame length */
		const int16_t n, /* i : channel number */
		Word16 old_inp_12k8_fx[], /* o : buffer of old input signal */
		const Word32 element_brate, /* i : SCE/CPE element bitrate */
		const Word16 nb_bits_metadata, /* i : number of metadata bits */
		const Word16 input_frame, /* i : frame length */
		const Word16 n, /* i : channel number */
		Word16 old_inp_12k8_fx[], /* o : (Q-1) buffer of old input signal */
		float old_inp_12k8[], /* o : buffer of old input signal */
		float old_inp_16k[], /* o : buffer of old input signal @16kHz */
		float ener, / o : residual energy from Levinson-Durbin */
		float relE, / o : frame relative energy */
		float A[NB_SUBFR16k * ( M + 1 )], /* o : A(z) unquantized for the 4 subframes */
		float Aw[NB_SUBFR16k * ( M + 1 )], /* o : weighted A(z) unquantized for subframes */
		Word16 relE_fx, / o : frame relative energy Q8 */
		Word16 A_fx[NB_SUBFR16k * ( M + 1 )], /* o : A(z) unquantized for the 4 subframes */
		Word16 Aw_fx[NB_SUBFR16k * ( M + 1 )], /* o : weighted A(z) unquantized for subframes */
		float epsP[M + 1], /* o : LP prediction errors */
		float lsp_new[M], /* o : LSPs at the end of the frame */
		float lsp_mid[M], /* o : LSPs in the middle of the frame */
		int16_t vad_hover_flag, / o : VAD hangover flag */
		int16_t attack_flag, / o : flag signaling attack */
		float realBuffer[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i/o: real buffer */
		float imagBuffer[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i/o: imag buffer */
		Word16 lsp_new_fx[M], /* o : LSPs at the end of the frame Q15 */
		Word16 lsp_mid_fx[M], /* o : LSPs in the middle of the frame Q15 */
		Word16 vad_hover_flag, / o : VAD hangover flag */
		Word16 attack_flag, / o : flag signaling attack */
		Word32 realBuffer_fx[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i/o: real buffer Q(q_re_im_buf) */
		Word32 imagBuffer_fx[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i/o: imag buffer Q(q_re_im_buf) */
		Word16 q_re_im_buf, / i/o: Q-factor of real and imag buffer */
		float old_wsp[], /* o : weighted input signal buffer */
		float pitch_fr[NB_SUBFR], /* o : fractional pitch values */
		float voicing_fr[NB_SUBFR], /* o : fractional pitch gains */
		int16_t loc_harm, / o : harmonicity flag */
		float cor_map_sum, / o : speech/music clasif. parameter */
		int16_t vad_flag_dtx, / o : HE-SAD flag with additional DTX HO */
		Word16 pitch_fr_fx[NB_SUBFR], /* o : fractional pitch values Q6 */
		Word16 voicing_fr_fx[NB_SUBFR], /* o : fractional pitch gains Q15 */
		Word16 loc_harm, / o : harmonicity flag */
		Word16 cor_map_sum_fx, / o : speech/music clasif. parameter Q8 */
		Word16 vad_flag_dtx, / o : HE-SAD flag with additional DTX HO */
		float enerBuffer[CLDFB_NO_CHANNELS_MAX], /* o : energy buffer */
		float fft_buff[2 * L_FFT], /* o : FFT buffer */
		Word16 fft_buff_fx[2 * L_FFT], /* o : FFT buffer */
		const float tdm_A_PCh[M + 1], /* i : unq. LP coeff. of primary channel */
		const float tdm_lsp_new_PCh[M], /* i : unq. LSPs of primary channel */
		const float currFlatness, /* i : flatness parameter */
		const int16_t tdm_ratio_idx, /* i : Current Ratio_L index */
		const Word16 tdm_A_PCh_fx[M + 1], /* i : unq. LP coeff. of primary channel Q12 */
		const Word16 tdm_lsp_new_PCh_fx[M], /* i : unq. LSPs of primary channel Q15 */
		const Word16 currFlatness_fx, /* i : flatness parameter Q7 */
		const Word16 tdm_ratio_idx, /* i : Current Ratio_L index */
		float fr_bands_LR[][2 * NB_BANDS], /* i : energy in frequency bands */
		const float Etot_LR[], /* i : total energy Left & Right channel */
		const Word16 Etot_LR_fx[], /* i : total energy Left & Right channel Q8*/
		float lf_E_LR[][2 * VOIC_BINS], /* i : per bin spectrum energy in lf, LR channels */
		const int16_t localVAD_HE_SAD_LR[], /* i : HE-SAD flag without hangover, LR channels */
		const Word16 localVAD_HE_SAD_LR[], /* i : HE-SAD flag without hangover, LR channels */
		float band_energies_LR[2 * NB_BANDS], /* o : energy in critical bands without minimum noise floor E_MIN */
		const int16_t flag_16k_smc, /* i : flag to indicate if the OL SMC is run at 16 kHz */
		const int16_t front_vad_flag, /* i : front-VAD flag to overwrite VAD decision */
		const int16_t force_front_vad, /* i : flag to force VAD decision */
		const int16_t front_vad_dtx_flag, /* i : front-VAD DTX flag to overwrite VAD decision*/
		const Word16 flag_16k_smc, /* i : flag to indicate if the OL SMC is run at 16 kHz */
		const Word16 front_vad_flag, /* i : front-VAD flag to overwrite VAD decision */
		const Word16 force_front_vad, /* i : flag to force VAD decision */
		const Word16 front_vad_dtx_flag, /* i : front-VAD DTX flag to overwrite VAD decision*/
		const IVAS_FORMAT ivas_format, /* i : IVAS format */
		const int16_t MCT_flag, /* i : hMCT handle allocated (1) or not (0) */
		const int32_t ivas_total_brate /* i : IVAS total bitrate */
		const Word16 MCT_flag, /* i : hMCT handle allocated (1) or not (0) */
		const Word32 ivas_total_brate /* i : IVAS total bitrate - for setting the DTX */
		);
		ivas_error pre_proc_ivas(
		Encoder_State st, / i/o: encoder state structure */
		@@ -2276,7 +2277,6 @@ void td_stereo_param_updt(
		const int16_t flag_ACELP16k, /* i : ACELP@16kHz flag */
		const int16_t tdm_use_IAWB_Ave_lpc /* i : flag to indicate the usage of mean inactive LP coefficients */
		);
		#endif

		void gsc_enc(
		Encoder_State st, / i/o: State structure */
		@@ -2290,7 +2290,6 @@ void gsc_enc(
		float tmp_noise / o : long-term noise energy */
		);

		#ifndef IVAS_FLOAT_FIXED
		void tdm_low_rate_enc(
		Encoder_State st, / i/o: State structure */
		const float Aq[], /* i : 12k8 Lp coefficient */

lib_com/ivas_prot_fx.h

+13 −15

Original line number	Diff line number	Diff line
		@@ -3110,16 +3110,15 @@ void stereo_classifier_features_ivas_fx(
		const Word16 idchan, /* i : channel ID */
		const Word16 element_mode, /* i : element mode */
		const Word16 vad_flag, /* i : VAD flag */
		Word32 lsf_new_fx[], /* i : LSFs at the end of the frame */
		Word16 lsf_new_fx[], /* i : LSFs at the end of the frame Q11.28 /
		Word32 epsP_fx[], /* i : LP analysis residual energies for each iteration*/
		Word16 pitch[], /q0 i : open-loop pitch values for quantiz. /
		Word16 voicing_fx[], /* i : OL maximum normalized correlation */
		Word16 pitch[], /* i : open-loop pitch values for quantiz. Q0 */
		Word16 voicing_fx[], /* i : OL maximum normalized correlation Q15 */
		Word32 cor_map_sum_fx, /* i : speech/music clasif. parameter */
		Word32 non_staX_fx, /* i : unbound non-stationarity for sp/mu clas. */
		Word32 sp_div_fx, /* i : spectral diversity feature */
		Word16 sp_div_fx, /* i : spectral diversity feature */
		const Word16 clas, /* i : signal class */
		Word16 epsP_e, /exponent for epsP_fx /
		Word16 voicing_e, /exponent for voicing_fx /
		Word16 cor_map_sum_e, /exponent for cor_map_sum_fx /
		Word16 non_staX_e, /exponent for non_staX_fx /
		Word16 sp_div_e /exponent for sp_div_fx /
		@@ -3974,15 +3973,14 @@ Word16 ivas_smc_gmm_fx(
		STEREO_CLASSIF_HANDLE hStereoClassif, /* i/o: stereo classifier structure */
		const Word16 localVAD_HE_SAD, /* i : HE-SAD flag without hangover */
		const Word16 Etot_fx, /* i : total frame energy */
		Word16 lsp_new_fx[M], /* i : LSPs in current frame TODO:For now removing 'const' to avoid warning */
		Word16 cor_map_sum_fx, /* i : correlation map sum (from multi-harmonic anal.) */
		Word32 epsP_fx[M + 1], /* i : LP prediciton error TODO:For now removing 'const' to avoid warning */
		Word32 PS_fx[], /* i : energy spectrum TODO:For now removing 'const' to avoid warning */
		const Word16 non_sta_fx, /* i : unbound non-stationarity */
		const Word16 relE_fx, /* i : relative frame energy */
		const Word16 lsp_new_fx[M], /* i : LSPs in current frame Q15 */
		const Word16 cor_map_sum_fx, /* i : correlation map sum (from multi-harmonic anal.) Q8 */
		const Word32 epsP_fx[M + 1], /* i : LP prediciton error */
		const Word32 PS_fx[], /* i : energy spectrum */
		const Word16 non_sta_fx, /* i : unbound non-stationarity Q8 */
		const Word16 relE_fx, /* i : relative frame energy Q8 */
		Word16 high_lpn_flag, / i/o: sp/mus LPN flag */
		const Word16 flag_spitch /* i : flag to indicate very short stable pitch */
		,
		const Word16 flag_spitch, /* i : flag to indicate very short stable pitch */
		Word16 Qfact_PS,
		Word16 Q_esp,
		Word16 Qfact_PS_past );
		@@ -4407,8 +4405,8 @@ Word16 tdm_lp_comparison_fx(
		const Word16 A_PCh_fx, / i : primary channel LP coefficients Q12*/
		const Word16 A_SCh_fx, / i : secondary channel LP coeff. Q12*/
		const Word16 m, /* i : filter length */
		const Word32 isp_PCh_fx, / i : primary channel LSPs Q31 */
		const Word32 isp_SCh_fx, / i : secondary channel LSPs Q31 */
		const Word16 isp_PCh_fx, / i : primary channel LSPs Q15 */
		const Word16 isp_SCh_fx, / i : secondary channel LSPs Q15 */
		const Word16 L_frame, /* i : frame length */
		const Word32 element_brate_wo_meta, /* i : element bitrate without metadata*/
		Word16 Q_speech );

lib_com/ivas_stat_com.h

+2 −2

Original line number	Diff line number	Diff line
		@@ -868,9 +868,7 @@ typedef struct ivas_fb_mixer_state_structure
		/* store sin part in const table (no need to store 1s and 0s, no need to do windowing for 1's and 0's as well) */
		Word16 cross_fade_start_offset;
		Word16 cross_fade_end_offset;
		const float *pFilterbank_cross_fade;
		Word16 ana_window_offset;
		const float *pAna_window;

		Word16 prior_input_length;
		Word16 windowed_fr_offset;
		@@ -881,6 +879,8 @@ typedef struct ivas_fb_mixer_state_structure
		const Word16 *pAna_window_fx;
		#else
		float cldfb_cross_fade[CLDFB_NO_COL_MAX];
		const float *pFilterbank_cross_fade;
		const float *pAna_window;
		#endif // IVAS_FLOAT_FIXED
		Word16 cldfb_cross_fade_start;
		Word16 cldfb_cross_fade_end;

lib_com/prot.h

+12 −1

Original line number	Diff line number	Diff line
		@@ -3164,6 +3164,7 @@ void find_targets(
		float h1 / o : impulse response of weighted synthesis filter */
		);

		#ifndef IVAS_FLOAT_FIXED
		void inov_encode(
		Encoder_State st, / i/o: encoder state structure */
		const int32_t core_brate, /* i : core bitrate */
		@@ -3188,6 +3189,7 @@ void inov_encode(
		int16_t unbits, / o : number of unused bits for EVS_PI */
		const int16_t L_subfr /* i : subframe length */
		);
		#endif

		void acelp_1t64(
		BSTR_ENC_HANDLE hBstr, /* i/o: encoder bitstream handle */
		@@ -3206,6 +3208,7 @@ void acelp_2t32(
		float y[] /* o : filtered fixed codebook excitation */
		);

		#ifndef IVAS_FLOAT_FIXED
		int16_t acelp_4t64(
		BSTR_ENC_HANDLE hBstr, /* i/o: encoder bitstream handle */
		float dn[], /* i : corr. between target and h[]. */
		@@ -3219,6 +3222,7 @@ int16_t acelp_4t64(
		const int16_t cmpl_flag, /* i : coomplexity reduction flag */
		const int16_t Opt_AMR_WB /* i : flag indicating AMR-WB IO mode */
		);
		#endif

		/! r: return (2N)+1 bits */
		int16_t quant_2p_2N1(
		@@ -3406,7 +3410,7 @@ void lp_gain_updt(
		float lp_gainc, / i/o: LP-filtered code gain (FEC) */
		const int16_t L_frame /* i : length of the frame */
		);

		#ifndef IVAS_FLOAT_FIXED
		void enc_pit_exc(
		Encoder_State st, / i/o: state structure */
		const float speech, / i : Input speech */
		@@ -3424,11 +3428,13 @@ void enc_pit_exc(
		const int16_t tdm_Pitch_reuse_flag, /* i : primary channel pitch reuse flag */
		const float tdm_Pri_pitch_buf[] /* i : primary channel pitch buffer */
		);
		#endif

		void GSC_enc_init(
		GSC_ENC_HANDLE hGSCEnc /* i/o: GSC data handle */
		);

		#ifndef IVAS_FLOAT_FIXED
		void encod_audio(
		Encoder_State st, / i/o: state structure */
		const float speech[], /* i : input speech */
		@@ -3446,6 +3452,7 @@ void encod_audio(
		const int16_t tdm_Pitch_reuse_flag, /* i : primary channel pitch reuse flag */
		const float tdm_Pri_pitch_buf[] /* i : primary channel pitch buffer */
		);
		#endif

		/! r: index of the last band where pitch contribution is significant /
		int16_t Pit_exc_contribution_len(
		@@ -4222,6 +4229,7 @@ void pre_exc(
		const int16_t L_subfr /* i : subframe length */
		);

		#ifndef IVAS_FLOAT_FIXED
		void encod_unvoiced(
		Encoder_State st, / i/o: state structure */
		const float speech, / i : input speech */
		@@ -4273,6 +4281,7 @@ int16_t encod_tran(
		int16_t position, /* i : maximum of residual signal index */
		int16_t unbits / i/o: number of unused bits */
		);
		#endif

		void encod_amr_wb(
		Encoder_State st, / i/o: state structure */
		@@ -6739,6 +6748,7 @@ void acelp_pulsesign(
		float vec[],
		const float alp );

		#ifndef IVAS_FLOAT_FIXED
		void E_ACELP_4t(
		float dn[],
		float cn[],
		@@ -6770,6 +6780,7 @@ void E_ACELP_4tsearchx(
		float code[],
		PulseConfig *config,
		int16_t ind[] );
		#endif

		int16_t E_ACELP_indexing(
		float code[],