Merge branch 'enc_funcs_fxd_6' into 'main' (81c8a9c5) · Commits · SA4 / Audio / IVAS BASOP

lib_com/cldfb.c

+21 −19

Original line number	Diff line number	Diff line
		@@ -1950,7 +1950,9 @@ void analysisCldfbEncoder_ivas(

		void analysisCldfbEncoder_ivas_fx(
		Encoder_State st, / i/o: encoder state structure */
		const Word16 *timeIn,
		const Word32 timeIn, /q11*/
		Word16 timeInq,
		Word16 samplesToProcess,
		Word32 realBuffer[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX],
		Word32 imagBuffer[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX],
		Word16 realBuffer16[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX],
		@@ -1965,7 +1967,6 @@ void analysisCldfbEncoder_ivas_fx(
		Word32 *ppBuf_Imag[CLDFB_NO_COL_MAX];
		Word16 *ppBuf_Real16[CLDFB_NO_COL_MAX];
		Word16 *ppBuf_Imag16[CLDFB_NO_COL_MAX];
		Word32 workBuffer[256];

		FOR( i = 0; i < CLDFB_NO_COL_MAX; i++ )
		{
		@@ -1974,13 +1975,14 @@ void analysisCldfbEncoder_ivas_fx(
		ppBuf_Real16[i] = &realBuffer16[i][0];
		ppBuf_Imag16[i] = &imagBuffer16[i][0];
		}
		cldfbAnalysis_ivas_fx( timeIn, ppBuf_Real, ppBuf_Imag, samplesToProcess, st->cldfbAnaEnc );

		cldfbAnalysisFiltering( st->cldfbAnaEnc, ppBuf_Real, ppBuf_Imag, scale, timeIn, 0, CLDFB_NO_COL_MAX, workBuffer );

		scale->lb_scale = sub( 16 + 5, timeInq );
		enerScale.lb_scale = negate( scale->lb_scale );
		enerScale.lb_scale16 = negate( scale->lb_scale );
		move16();
		move16();
		move16();

		AnalysisPostSpectrumScaling_Fx( st->cldfbAnaEnc, ppBuf_Real, ppBuf_Imag, ppBuf_Real16, ppBuf_Imag16, &enerScale.lb_scale16 );

		@@ -2793,7 +2795,7 @@ static void cldfb_init_proto_and_twiddles_enc(
		#ifdef IVAS_FLOAT_FIXED
		hs->p_filter_sf = (Word16) 17036;
		hs->scale = cldfb_scale_2_5ms[0];
		hs->p_filter = cldfb_protoFilter_2_5ms[0];
		hs->p_filter = CLDFB80_10_fx;
		#endif
		}
		else if ( hs->prototype == CLDFB_PROTOTYPE_5_00MS )
		@@ -2808,7 +2810,7 @@ static void cldfb_init_proto_and_twiddles_enc(
		hs->rot_vec_syn_delay_im = rot_vec_delay_im_LDQMF;
		#ifdef IVAS_FLOAT_FIXED
		hs->p_filter_sf = (Word16) 15388;
		hs->p_filter = cldfb_protoFilter_5_0ms[0];
		hs->p_filter = LDQMF_10_fx;
		hs->scale = cldfb_scale_5_0ms[0];
		#endif
		}
		@@ -2843,7 +2845,7 @@ static void cldfb_init_proto_and_twiddles_enc(
		hs->rot_vec_syn_delay_im = NULL;
		#ifdef IVAS_FLOAT_FIXED
		hs->p_filter_sf = (Word16) 17051;
		hs->p_filter = cldfb_protoFilter_2_5ms[1];
		hs->p_filter = CLDFB80_16_fx;
		hs->scale = cldfb_scale_2_5ms[1];
		#endif
		}
		@@ -2859,7 +2861,7 @@ static void cldfb_init_proto_and_twiddles_enc(
		hs->rot_vec_syn_delay_im = rot_vec_delay_im_LDQMF;
		#ifdef IVAS_FLOAT_FIXED
		hs->p_filter_sf = (Word16) 15388;
		hs->p_filter = cldfb_protoFilter_5_0ms[1];
		hs->p_filter = LDQMF_16_fx;
		hs->scale = cldfb_scale_5_0ms[1];
		#endif
		}
		@@ -2900,7 +2902,7 @@ static void cldfb_init_proto_and_twiddles_enc(
		hs->rot_vec_syn_delay_im = NULL;
		#ifdef IVAS_FLOAT_FIXED
		hs->p_filter_sf = (Word16) 17050;
		hs->p_filter = cldfb_protoFilter_2_5ms[2];
		hs->p_filter = CLDFB80_20_fx;
		hs->scale = cldfb_scale_2_5ms[2];
		#endif
		}
		@@ -2916,7 +2918,7 @@ static void cldfb_init_proto_and_twiddles_enc(
		hs->rot_vec_syn_delay_im = rot_vec_delay_im_LDQMF;
		#ifdef IVAS_FLOAT_FIXED
		hs->p_filter_sf = (Word16) 15390;
		hs->p_filter = cldfb_protoFilter_5_0ms[2];
		hs->p_filter = LDQMF_20_fx;
		hs->scale = cldfb_scale_5_0ms[2];
		#endif
		}
		@@ -2958,7 +2960,7 @@ static void cldfb_init_proto_and_twiddles_enc(
		#ifdef IVAS_FLOAT_FIXED
		hs->p_filter_sf = (Word16) 17051;
		hs->scale = cldfb_scale_2_5ms[6];
		hs->p_filter = cldfb_protoFilter_2_5ms[6];
		hs->p_filter = CLDFB80_30_fx;
		#endif
		}
		else if ( hs->prototype == CLDFB_PROTOTYPE_5_00MS )
		@@ -2974,7 +2976,7 @@ static void cldfb_init_proto_and_twiddles_enc(
		#ifdef IVAS_FLOAT_FIXED
		hs->p_filter_sf = (Word16) 15388;
		hs->scale = cldfb_scale_5_0ms[6];
		hs->p_filter = cldfb_protoFilter_5_0ms[6];
		hs->p_filter = LDQMF_30_fx;
		#endif
		}
		break;
		@@ -3008,7 +3010,7 @@ static void cldfb_init_proto_and_twiddles_enc(
		hs->rot_vec_syn_delay_im = NULL;
		#ifdef IVAS_FLOAT_FIXED
		hs->p_filter_sf = (Word16) 17050;
		hs->p_filter = cldfb_protoFilter_2_5ms[3];
		hs->p_filter = CLDFB80_32_fx;
		hs->scale = cldfb_scale_2_5ms[3];
		#endif
		}
		@@ -3025,7 +3027,7 @@ static void cldfb_init_proto_and_twiddles_enc(
		#ifdef IVAS_FLOAT_FIXED
		hs->p_filter_sf = (Word16) 15392;
		hs->scale = cldfb_scale_5_0ms[3];
		hs->p_filter = cldfb_protoFilter_5_0ms[3];
		hs->p_filter = LDQMF_32_fx;
		#endif
		}
		break;
		@@ -3067,7 +3069,7 @@ static void cldfb_init_proto_and_twiddles_enc(
		hs->p_filter_sf = (Word16) 17051;
		hs->q_scale = norm_s( (Word16) CLDFB80_40_SCALE );
		hs->scale = cldfb_scale_2_5ms[4];
		hs->p_filter = cldfb_protoFilter_2_5ms[4];
		hs->p_filter = CLDFB80_40_fx;
		#endif
		}
		else if ( hs->prototype == CLDFB_PROTOTYPE_5_00MS )
		@@ -3086,7 +3088,7 @@ static void cldfb_init_proto_and_twiddles_enc(
		hs->rot_vec_syn_delay_re_fx = rot_vec_delay_re_LDQMF_fx;
		hs->rot_vec_syn_delay_im_fx = rot_vec_delay_im_LDQMF_fx;
		hs->p_filter_sf = (Word16) 15391;
		hs->p_filter = cldfb_protoFilter_5_0ms[4];
		hs->p_filter = LDQMF_40_fx;
		hs->scale = cldfb_scale_5_0ms[4];
		#endif
		}
		@@ -3131,7 +3133,7 @@ static void cldfb_init_proto_and_twiddles_enc(
		hs->rot_vec_syn_delay_re_fx = NULL;
		hs->rot_vec_syn_delay_im_fx = NULL;
		hs->p_filter_sf = (Word16) 17051;
		hs->p_filter = cldfb_protoFilter_2_5ms[5];
		hs->p_filter = CLDFB80_60_fx;
		hs->scale = cldfb_scale_2_5ms[5];
		#endif
		}
		@@ -3151,7 +3153,7 @@ static void cldfb_init_proto_and_twiddles_enc(
		hs->rot_vec_syn_delay_re_fx = rot_vec_delay_re_LDQMF_fx;
		hs->rot_vec_syn_delay_im_fx = rot_vec_delay_im_LDQMF_fx;
		hs->p_filter_sf = (Word16) 15391;
		hs->p_filter = cldfb_protoFilter_5_0ms[5];
		hs->p_filter = LDQMF_60_fx;
		hs->scale = cldfb_scale_5_0ms[5];
		#endif
		}

lib_com/cnst.h

+6 −0

Original line number	Diff line number	Diff line
		@@ -745,6 +745,7 @@ enum
		#define PIT_UP_SAMP6 6
		#define PIT_L_INTERPOL6_2 17
		#define PIT_FIR_SIZE6_2 ( PIT_UP_SAMP6 * PIT_L_INTERPOL6_2 + 1 )
		#define E_MIN_Q11_FX 7 /* minimum allowable energy in Q11*/
		#define E_MIN 0.0035f /* minimum allowable energy */
		#define E_MIN_Q11_FX 7 /* minimum allowable energy in Q11*/
		#define STEP_DELTA 0.0625f /* quantization step for tilt compensation of gaussian cb. excitation */
		@@ -1010,6 +1011,7 @@ typedef enum
		#define BWD_COUNT_MAX 100 /* maximum value of BWD counter */
		#define BWD_N_BINS_MAX 13 /* maximum number of BWD bins */
		#define BWS_TRAN_PERIOD 5 /* BWS - number of frames for transition period */
		#define ONE_BY_BWS_TRAN_PERIOD_Q15 6554

		#define PREEMPH_FAC_FLT 0.68f /* preemphasis factor at 12.8kHz */
		#define PREEMPH_FAC_16k_FLT 0.72f
		@@ -1953,10 +1955,14 @@ typedef enum _DCTTYPE
		#define ATT_NSEG 32 /* strong attack detection - number of time blocks */

		#define TOD_NSPEC 80 /* number of spectral bins of the tonal detector */
		#define TOD_NSPEC_INV_Q31 26843546 /* inverse of number of spectral bins of the tonal detector */
		#define TOD_THR_MASS 0.86f /* initial value for the adaptive threshold of the tonal detector */
		#define P2A_FACT 0.9f /* long-term averaging factor for peak-to-average ratio */
		#define P2A_FACT_FX_Q15 29491 /* long-term averaging factor for peak-to-average ratio */
		#define THR_P2A_HIGH 95.0f /* higher threshold to detect strongly peaky signals at low bitrates*/
		#define THR_P2A 80.0f /* lower threshold to detect strongly peaky signals at higher bitrates */
		#define THR_P2A_HIGH_FX 95 /* Q0, higher threshold to detect strongly peaky signals at low bitrates*/
		#define THR_P2A_FX 80 /* Q0, lower threshold to detect strongly peaky signals at higher bitrates */

		/----------------------------------------------------------------------------------
		* LD music post-filter constants

lib_com/ivas_cnst.h

+3 −0

Original line number	Diff line number	Diff line
		@@ -644,7 +644,9 @@ typedef enum
		#define STEREO_DFT_RES_GAINS_BITS 3

		#define STEREO_DFT_RES_COD_SNR_MIN 10
		#define STEREO_DFT_RES_COD_SNR_MIN_Q25 335544320
		#define STEREO_DFT_RES_COD_SNR_MAX 40
		#define STEREO_DFT_RES_COD_SNR_MAX_Q25 1342177280
		#define STEREO_DFT_L_SUBFR_8k 32
		#define STEREO_DFT_NBPSF_PIT_MAX_8k NBPSF_PIT_MAX / 2

		@@ -727,6 +729,7 @@ enum
		#define ECSQ_GLOBAL_GAIN_INDEX_ALL_ZERO 127 /* indicates that all values in the vector are zero */

		#define ECLVQ_GLOBAL_GAIN_FACTOR ( 20.0f * 127.0f / 90.0f )
		#define ECLVQ_GLOBAL_GAIN_FACTOR_Q26 1893961218
		#define ECLVQ_INV_GLOBAL_GAIN_FACTOR ( 1.0f / ( 20.0f * 127.0f / 90.0f ) )
		#define ECLVQ_INV_GLOBAL_GAIN_FACTOR_Q24 594468

lib_com/ivas_prot.h

+16 −0

Original line number	Diff line number	Diff line
		@@ -258,6 +258,7 @@ ivas_error pre_proc_front_ivas_fx(
		const int16_t nb_bits_metadata, /* i : number of metadata bits */
		const int16_t input_frame, /* i : frame length */
		const int16_t n, /* i : channel number */
		Word16 old_inp_12k8_fx[], /* o : buffer of old input signal */
		float old_inp_12k8[], /* o : buffer of old input signal */
		float old_inp_16k[], /* o : buffer of old input signal @16kHz */
		float ener, / o : residual energy from Levinson-Durbin */
		@@ -279,6 +280,7 @@ ivas_error pre_proc_front_ivas_fx(
		int16_t vad_flag_dtx, / o : HE-SAD flag with additional DTX HO */
		float enerBuffer[CLDFB_NO_CHANNELS_MAX], /* o : energy buffer */
		float fft_buff[2 * L_FFT], /* o : FFT buffer */
		Word16 fft_buff_fx[2 * L_FFT], /* o : FFT buffer */
		const float tdm_A_PCh[M + 1], /* i : unq. LP coeff. of primary channel */
		const float tdm_lsp_new_PCh[M], /* i : unq. LSPs of primary channel */
		const float currFlatness, /* i : flatness parameter */
		@@ -1797,6 +1799,7 @@ void stereo_dft_enc_process(
		);
		#endif

		#ifndef IVAS_FLOAT_FIXED
		void stereo_dft_enc_res(
		STEREO_DFT_ENC_DATA_HANDLE hStereoDft, /* i/o: encoder DFT stereo handle */
		const float input_8k, / i : input buffer sampled at 8kHz */
		@@ -1804,11 +1807,14 @@ void stereo_dft_enc_res(
		int16_t nb_bits, / o : number of bits written */
		const int16_t max_bits
		);
		#endif

		#ifndef IVAS_FLOAT_FIXED
		void stereo_dft_enc_write_BS(
		CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */
		int16_t nb_bits / o : number of bits written */
		);
		#endif

		void stereo_dtf_cng(
		CPE_DEC_HANDLE hCPE, /* i/o: CPE decoder structure */
		@@ -1857,6 +1863,7 @@ void stereo_dft_dequantize_itd(
		const int32_t output_Fs
		);

		#ifndef IVAS_FLOAT_FIXED
		void stereo_dft_enc_sid_calc_coh(
		STEREO_DFT_ENC_DATA_HANDLE hStereoDft, /* i/o: DFT stereo handle */
		float prev_cohBand[2(STEREO_DFT_BAND_MAX/2)], / i/o: Previous coherence */
		@@ -1864,7 +1871,9 @@ void stereo_dft_enc_sid_calc_coh(
		int16_t first_SID, / i/o: First SID indicator */
		float cohBand / i/o: Coherence per band */
		);
		#endif

		#ifndef IVAS_FLOAT_FIXED
		void stereo_dft_enc_sid_coh(
		BSTR_ENC_HANDLE hBstr, /* i/o: bitstream handle */
		float mem_cohBand, / i/o: Coherence memory */
		@@ -1872,6 +1881,7 @@ void stereo_dft_enc_sid_coh(
		int16_t nb_bits, / i/o: number of bits written */
		float cohBand / i/o: Coherence per band */
		);
		#endif
		#ifdef IVAS_FLOAT_FIXED
		void stereo_dft_dec_sid_coh_fx(
		Decoder_State st, / i/o: decoder state structure */
		@@ -2476,18 +2486,22 @@ float ECSQ_dequantize_gain(
		const int16_t index
		);

		#ifndef IVAS_FLOAT_FIXED
		void ECSQ_quantize_vector(
		const float *input,
		const float global_gain,
		const int16_t N,
		int16_t *output
		);
		#endif

		#ifndef IVAS_FLOAT_FIXED
		float ECSQ_compute_optimal_gain(
		const float *input,
		const int16_t N,
		const int16_t *output
		);
		#endif

		void ECSQ_init_instance(
		ECSQ_instance *ecsq_inst,
		@@ -2495,6 +2509,7 @@ void ECSQ_init_instance(
		void *ac_handle
		);

		#ifndef IVAS_FLOAT_FIXED
		int32_t ECSQ_encode_target_SNR(
		ECSQ_instance *ecsq_inst,
		const float *input,
		@@ -2504,6 +2519,7 @@ int32_t ECSQ_encode_target_SNR(
		float *output,
		int16_t *global_gain_index_output
		);
		#endif // IVAS_FLOAT_FIXED

		#ifndef IVAS_FLOAT_FIXED
		void ECSQ_decode(

lib_com/ivas_prot_fx.h

+78 −0

Original line number	Diff line number	Diff line
		@@ -849,6 +849,11 @@ void td_stereo_param_updt_fx(
		#endif
		);

		void updt_enc_common_ivas_fx(
		Encoder_State st / i/o: encoder state structure */
		);


		void updateBuffersForDmxMdctStereo_fx(
		CPE_DEC_HANDLE hCPE, /* i/o: CPE handle */
		const Word16 output_frame, /* i : output frame length */
		@@ -2741,6 +2746,58 @@ void stereo_cng_upd_counters_fx(
		Word16 coh_fade_counter / i : Coherence fade counter */
		);


		void stereo_dft_enc_sid_calc_coh_fx(
		STEREO_DFT_ENC_DATA_HANDLE hStereoDft, /* i/o: DFT stereo handle */
		Word32 prev_cohBand[2 * ( STEREO_DFT_BAND_MAX / 2 )], /* i/o: Previous coherence Q31 */
		Word16 td_active, / i/o: TD stereo mode indicator */
		Word16 first_SID, / i/o: First SID indicator */
		Word32 cohBand / i/o: Coherence per band Q31 */
		);

		void stereo_dft_enc_sid_coh_fx(
		BSTR_ENC_HANDLE hBstr, /* i/o: bitstream handle */
		Word32 mem_cohBand, / i/o: Coherence memory */
		const Word16 nbands, /* i : number of DFT stereo bands */
		Word16 nb_bits, / i/o: number of bits written */
		Word32 cohBand / i/o: Coherence per band */
		);

		void stereo_dft_enc_write_BS_fx(
		CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */
		Word16 nb_bits / o : number of bits written */
		);

		void stereo_dft_enc_res_fx(
		STEREO_DFT_ENC_DATA_HANDLE hStereoDft, /* i/o: encoder stereo handle */
		const Word32 input_8k, / i : input buffer sampled at 8kHz Q16 */
		BSTR_ENC_HANDLE hBstr, /* i/o: bitstream handle */
		Word16 nb_bits, / o : number of bits written */
		const Word16 max_bits );

		Word32 ECSQ_encode_target_SNR_fx(
		ECSQ_instance *ecsq_inst,
		const Word32 *input,
		const Word16 input_e,
		const Word16 N,
		const Word32 target_SNR, // Q25
		const Word16 max_bits,
		Word32 *output,
		Word16 *global_gain_index_output );

		void ECSQ_quantize_vector_fx(
		const Word32 *input,
		const Word16 input_e,
		const Word32 global_gain, // Q15
		const Word16 N,
		Word16 *output );

		Word32 ECSQ_compute_optimal_gain_fx( // Q15
		const Word32 *input,
		const Word16 input_e,
		const Word16 N,
		const Word16 *output );

		void spectral_balancer_fx16(
		Word16 signal, / i/o : signal Qx */
		Word16 mem, / i/o : mem Qx */
		@@ -2863,4 +2920,25 @@ void stereo_tcx_core_enc(
		const Word16 vad_hover_flag /* i : VAD hangover flag, Q0 */
		);


		Word16 transient_analysis_ivas_fx(
		TRAN_DET_HANDLE hTranDet, /* i : handle transient detection */
		const Word16 cor_map_LT[], /* i : LT correlation map Q_cor_map = Qx */
		Word16 Q_cor_map,
		const Word16 multi_harm_limit, /* i : multi harmonic threshold Q_multi_harm_limit = Qx */
		Word16 Q_multi_harm_limit );

		void ivas_smc_mode_selection_fx(
		Encoder_State st, / i/o: encoder state structure */
		const Word32 element_brate, /* i : element bitrate */
		Word16 smc_dec, /* i : raw decision of the 1st stage classifier*/
		const Word16 relE, /* i : relative frame energy, Q8 */
		const Word16 Etot, /* i : total frame energy, Q8 */
		Word16 attack_flag, / i/o: attack flag (GSC or TC) */
		const Word16 inp, / i : input signal */
		const Word16 Q_new, /* i : Q of input signal */
		const Word16 S_map[], /* i : short-term correlation map, Q7 */
		const Word16 flag_spitch /* i : flag to indicate very short stable pitch*/
		);

		#endif