MC-paramupmix, MC-MASA functions conversion and integration (b0364efc) · Commits · SA4 / Audio / IVAS BASOP

lib_com/cldfb.c

+19 −4

Original line number	Diff line number	Diff line
		@@ -1950,7 +1950,7 @@ void analysisCldfbEncoder_ivas(

		void analysisCldfbEncoder_ivas_fx(
		Encoder_State st, / i/o: encoder state structure */
		const Word32 timeIn, /q11*/
		Word32 timeIn, /q11*/
		Word16 timeInq,
		Word16 samplesToProcess,
		Word32 realBuffer[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX],
		@@ -1967,6 +1967,11 @@ void analysisCldfbEncoder_ivas_fx(
		Word32 *ppBuf_Imag[CLDFB_NO_COL_MAX];
		Word16 *ppBuf_Real16[CLDFB_NO_COL_MAX];
		Word16 *ppBuf_Imag16[CLDFB_NO_COL_MAX];
		Word32 l_timeIn[L_FRAME48k];
		Word16 norm_timeIn = L_norm_arr( timeIn, samplesToProcess );
		Word16 guard_bits = find_guarded_bits_fx( shl( samplesToProcess, 1 ) );
		Word16 shift = 0;
		move16();

		FOR( i = 0; i < CLDFB_NO_COL_MAX; i++ )
		{
		@@ -1975,9 +1980,19 @@ void analysisCldfbEncoder_ivas_fx(
		ppBuf_Real16[i] = &realBuffer16[i][0];
		ppBuf_Imag16[i] = &imagBuffer16[i][0];
		}
		cldfbAnalysis_ivas_fx( timeIn, ppBuf_Real, ppBuf_Imag, samplesToProcess, st->cldfbAnaEnc );
		IF( GT_16( guard_bits, norm_timeIn ) )
		{
		shift = sub( guard_bits, norm_timeIn );
		v_shr_32( timeIn, l_timeIn, samplesToProcess, shift );
		}
		ELSE
		{
		Copy32( timeIn, l_timeIn, samplesToProcess );
		}

		cldfbAnalysis_ivas_fx( l_timeIn, ppBuf_Real, ppBuf_Imag, samplesToProcess, st->cldfbAnaEnc );

		scale->lb_scale = sub( 16 + 5, timeInq );
		scale->lb_scale = sub( 16 + 5, sub( timeInq, shift ) );
		enerScale.lb_scale = negate( scale->lb_scale );
		enerScale.lb_scale16 = negate( scale->lb_scale );
		move16();

lib_com/cnst.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -785,6 +785,7 @@ enum
		#define CLDFB_OVRLP_MIN_SLOTS 3 /* CLDFB resampling - minimize processing to minimum required for transition frame ACELP->TCX/HQ */
		#define INV_CLDFB_BANDWIDTH ( 1.f / 800.f )
		#define INV_CLDFB_BANDWIDTH_Q31 ( 2684355l )
		#define INV_CLDFB_BANDWIDTH_MDFT_FAC_Q31 ( 10737418 )
		#define CLDFB_BANDWIDTH 800

		#define L_FILT_2OVER3 12

lib_com/ivas_cnst.h

+7 −0

Original line number	Diff line number	Diff line
		@@ -827,6 +827,10 @@ enum fea_names
		#define STEREO_TCA_GDMIN_FX -32768
		#define STEREO_TCA_GDSTEP 0.05f
		#define STEREO_TCA_GDSTEP_FX 819
		#ifdef IVAS_FLOAT_FIXED
		#define STEREO_TCA_GDMIN_FX_Q14 (-16384)
		#define STEREO_TCA_GDSTEP_FX_Q13 (410)
		#endif
		#define STEREO_BITS_TCA ( STEREO_BITS_TCA_CHAN + STEREO_BITS_TCA_CORRSTATS + STEREO_BITS_TCA_GD )

		#define STEREO_ICBWE_MSFLAG_BITS 1 /* BWE Multi Source flag */
		@@ -1282,6 +1286,9 @@ enum

		#define MASA_COHERENCE_TOLERANCE 0.1f
		#define MASA_COHERENCE_THRESHOLD 0.1f
		#ifdef IVAS_FLOAT_FIXED
		#define MASA_COHERENCE_THRESHOLD_FX 214748365 // 0.1 in Q31
		#endif
		#define MASA_RATIO_TOLERANCE 0.1f
		#define MASA_RATIO_THRESHOLD 0.1f
		#define MASA_ANGLE_TOLERANCE 0.5f

lib_com/ivas_prot.h

+20 −30

Original line number	Diff line number	Diff line
		@@ -159,7 +159,7 @@ void destroy_cpe_enc(
		void ivas_mct_enc_close(
		MCT_ENC_HANDLE hMCT / i/o: MCT encoder structure */
		);

		#ifndef IVAS_FLOAT_FIXED
		ivas_error ivas_corecoder_enc_reconfig(
		Encoder_Struct st_ivas, / i/o: IVAS encoder structure */
		const int16_t nSCE_old, /* i : number of SCEs in previous frame */
		@@ -169,7 +169,7 @@ ivas_error ivas_corecoder_enc_reconfig(
		const int32_t brate_CPE, /* i : bitrate to be set for the CPEs */
		const MC_MODE last_mc_mode /* i : switching between MC modes: last mode */
		);

		#endif
		ivas_error ivas_sce_enc(
		Encoder_Struct st_ivas, / i/o: IVAS encoder structure */
		const int16_t sce_id, /* i : SCE # identifier */
		@@ -811,6 +811,7 @@ void ivas_smc_mode_selection(
		);

		/! r: S/M decision (0=speech or noise,1=unclear,2=music) /
		#ifndef IVAS_FLOAT_FIXED
		int16_t ivas_acelp_tcx20_switching(
		Encoder_State st, / i/o: encoder state structure */
		const float inp, / i : new input signal */
		@@ -824,6 +825,7 @@ int16_t ivas_acelp_tcx20_switching(
		float *res_cod_SNR_M,
		const int16_t flag_16k_smc /* i : flag to indicate if the OL SMC is run at 16 kHz */
		);
		#endif

		#ifndef IVAS_FLOAT_FIXED
		void ivas_decision_matrix_enc(
		@@ -4232,23 +4234,7 @@ void getChannelEnergies(
		float nrg[MCT_MAX_CHANNELS], /* o : energies */
		const int16_t nchan /* i : number of channels */
		);
		#ifdef IVAS_FLOAT_FIXED
		void mctStereoIGF_enc_fx(
		MCT_ENC_HANDLE hMCT, /* i/o: MCT encoder structure */
		Encoder_State *sts, / i/o: encoder state structure */
		Word32 orig_spectrum_fx[MCT_MAX_CHANNELS][2], / i : MDCT spectrum for ITF */
		Word32 powerSpec_fx[MCT_MAX_CHANNELS][L_FRAME48k], /* i/o: MDCT^2 + MDST^2 spectrum,or estimate*/
		Word32 powerSpecMsInv_fx[MCT_MAX_CHANNELS][NB_DIV], / i : same as above but for inverse spect.*/
		Word32 inv_spectrum_fx[MCT_MAX_CHANNELS][NB_DIV], / i : inverse spectrum */
		#ifdef IVAS_FLOAT_FIXED_TO_BE_REMOVED
		float orig_spectrum[MCT_MAX_CHANNELS][2], / i : MDCT spectrum for ITF */
		float powerSpec[MCT_MAX_CHANNELS][L_FRAME48k], /* i/o: MDCT^2 + MDST^2 spectrum,or estimate*/
		float powerSpecMsInv[MCT_MAX_CHANNELS][NB_DIV], / i : same as above but for inverse spect.*/
		float inv_spectrum[MCT_MAX_CHANNELS][NB_DIV], / i : inverse spectrum */
		#endif
		const Word16 sp_aud_decision0[MCT_MAX_CHANNELS] /* i : speech audio decision */
		);
		#endif

		void mctStereoIGF_enc(
		MCT_ENC_HANDLE hMCT, /* i/o: MCT encoder structure */
		Encoder_State *sts, / i/o: encoder state structure */
		@@ -7553,6 +7539,7 @@ void ivas_ls_custom_setup(
		* McMASA prototypes
		----------------------------------------------------------------------------------/

		#ifndef IVAS_FLOAT_FIXED
		ivas_error ivas_mcmasa_enc_open(
		Encoder_Struct st_ivas / i/o: IVAS encoder handle */
		);
		@@ -7565,6 +7552,7 @@ void ivas_mcmasa_enc_close(
		ivas_error ivas_mcmasa_enc_reconfig(
		Encoder_Struct st_ivas / i/o: IVAS encoder handle */
		);
		#endif

		ivas_error ivas_mcmasa_dec_reconfig(
		Decoder_Struct st_ivas / i/o: IVAS decoder handle */
		@@ -7591,6 +7579,7 @@ void ivas_mcmasa_split_brate(
		int32_t brate_cpe / o : Pointer to CPE element bitrate */
		);

		#ifndef IVAS_FLOAT_FIXED
		void ivas_mcmasa_enc(
		MCMASA_ENC_HANDLE hMcMasa, /* i/o: Encoder McMASA handle */
		IVAS_QMETADATA_HANDLE hQMeta, /* o : Qmetadata handle */
		@@ -7614,6 +7603,7 @@ void ivas_mcmasa_param_est_enc(
		const int16_t nchan_inp /* i : Number of input channels */
		);

		#endif
		void ivas_mcmasa_dmx_modify(
		const int16_t n_samples, /* i : input frame length in samples */
		float dmx[][L_FRAME48k + NS2SA( 48000, IVAS_FB_ENC_DELAY_NS )], /* i/o: downmix signal to be transformed into another format */

lib_com/ivas_prot_fx.h

+137 −0

Original line number	Diff line number	Diff line
		@@ -2754,7 +2754,22 @@ void ivas_spar_bitrate_dist_fx(
		const Word16 sba_order, /* i : Ambisonic (SBA) order */
		const Word16 bwidth /* i : audio bandwidth */
		);
		ivas_error ivas_corecoder_enc_reconfig_fx(
		Encoder_Struct st_ivas, / i/o: IVAS encoder structure */
		const Word16 nSCE_old, /* i : number of SCEs in previous frame */
		const Word16 nCPE_old, /* i : number of CPEs in previous frame */
		const Word16 nchan_transport_old, /* i : number of TCs in previous frame */
		const Word32 brate_SCE, /* i : bitrate to be set for the SCEs */
		const Word32 brate_CPE, /* i : bitrate to be set for the CPEs */
		const MC_MODE last_mc_mode /* i : switching between MC modes: last mode */
		);

		void ivas_sba_zero_vert_comp_fx(
		Word32 sba_data[], / i : SBA signals */
		const Word16 sba_order, /* i : SBA order */
		const Word16 sba_planar, /* i : SBA planar flag */
		const Word16 input_frame /* i : frame length */
		);
		void tdm_configure_dec_fx(
		const Word16 ivas_format, /* i : IVAS format */
		const Word16 ism_mode, /* i : ISM mode in combined format */
		@@ -2935,6 +2950,23 @@ void core_switching_pre_enc_ivas_fx(
		const Word16 last_element_mode /* i : last_element_mode */
		);

		Word16 ivas_acelp_tcx20_switching_fx(
		Encoder_State st, / i/o: encoder state structure */
		Word16 inp_fx, / i : new input signal */
		Word16 q_inp, /* i : i/p Q */
		Word16 wsp, / i : input weighted signal */
		Word16 non_staX, /* i : unbound non-stationarity for sp/mu clas */
		Word16 pitch_fr, / i : fraction pitch values */
		Word16 voicing_fr, / i : fractional voicing values */
		Word16 currFlatness, /* i : flatness */
		Word16 lsp_mid[M], /* i : LSPs at the middle of the frame */
		Word16 stab_fac, /* i : LP filter stability */
		Word32 *res_cod_SNR_M,
		Word16 *res_cod_SNR_M_e,
		Word16 *tcx_mdct_window_fx,
		const Word16 flag_16k_smc /* i : flag to indicate if the OL SMC is run at 16 kHz */
		);

		void encod_gen_2sbfr(
		Encoder_State st, / i/o: state structure */
		const Word16 speech[], /* i : input speech */
		@@ -3144,6 +3176,87 @@ Word16 ivas_omasa_ener_brate_fx(
		const Word16 input_frame, /* i : Input frame size */
		Word16 data_e /i:exponent for data_f /
		);
		void computeDiffuseness_mdft_fx(
		Word32 **buffer_intensity[DIRAC_NUM_DIMS],
		const Word32 *buffer_energy,
		const Word16 num_freq_bands,
		const UWord16 no_col_avg_diff,
		Word32 *diffuseness,
		Word16 *q_factor_intensity,
		Word16 *q_factor_energy,
		Word16 q_diffuseness /Ouput Q*/
		);

		void computeDirectionVectors_fixed(
		Word32 *intensity_real_x,
		Word32 *intensity_real_y,
		Word32 *intensity_real_z,
		const Word16 enc_param_start_band,
		const Word16 num_frequency_bands,
		Word32 direction_vector_x, /Q30*/
		Word32 direction_vector_y, /Q30*/
		Word32 direction_vector_z, /Q30*/
		Word16 i_e /Exponent of all the intensity buffers/ );


		UWord8 ivas_masa_surrcoh_signicant_fx(
		Word32 surroundingCoherence[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], /* i : Surround coherence */
		Word32 diffuse_to_total_ratio[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], /* i : Diffuse to total ratio */
		const Word16 nSubFrames, /* i : Number of sub frames */
		const Word16 nBands /* i : Number of frequency bands */
		);

		/----------------------------------------------------------------------------------
		* McMASA prototypes
		----------------------------------------------------------------------------------/

		ivas_error ivas_mcmasa_enc_open_fx(
		Encoder_Struct st_ivas / i/o: IVAS encoder handle */
		);
		void ivas_mcmasa_enc_close_fx(
		MCMASA_ENC_HANDLE hMcMasa, / i/o: encoder McMASA handle */
		const Word32 input_Fs /* i : input sampling rate */
		);

		ivas_error ivas_mcmasa_enc_reconfig_fx(
		Encoder_Struct st_ivas / i/o: IVAS encoder handle */
		);

		void ivas_mcmasa_enc_fx(
		MCMASA_ENC_HANDLE hMcMasa, /* i/o: Encoder McMASA handle */
		IVAS_QMETADATA_HANDLE hQMeta, /* o : Qmetadata handle */
		MASA_ENCODER_HANDLE hMasa, /* i/o: Encoder MASA handle */
		Word32 data_f[], / i : Input frame of audio */
		const Word16 input_frame, /* i : Input frame size */
		const Word16 nchan_transport, /* i : Number of transport channels */
		const Word16 nchan_inp, /* i : Number of input channels */
		const Word16 q_inp /* i : Input data q-format */
		);
		void ivas_mcmasa_param_est_enc_fx(
		MCMASA_ENC_HANDLE hMcMasa, /* i : McMASA encoder structure */
		MASA_ENCODER_HANDLE hMasa, /* i : MASA encoder structure */
		Word32 data_f[], / i : Audio frame in MC-format */
		Word32 elevation_m_values_fx[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], /* o : Estimated elevation */
		Word32 azimuth_m_values_fx[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], /* o : Estimated azimuth */
		Word32 energyRatio_fx[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], /* o : Estimated direct-to-total ratio */
		Word32 spreadCoherence_fx[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], /* o : Estimated spread coherence */
		Word32 surroundingCoherence_fx[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], /* o : Estimated surround coherence */
		const Word16 input_frame, /* i : Input frame size */
		const Word16 nchan_inp, /* i : Number of input channels */
		const Word16 q_inp /* i : Number of input channels */
		);

		void computeReferencePower_enc_fx(
		const Word16 band_grouping, / i : Band grouping for estimation */
		Word32 Cldfb_RealBuffer[][DIRAC_NO_FB_BANDS_MAX], /* i : Real part of input signal */
		Word32 Cldfb_ImagBuffer[][DIRAC_NO_FB_BANDS_MAX], /* i : Imag part of input signal */
		Word32 reference_power, / o : Estimated power */
		const Word16 enc_param_start_band, /* i : first band to process */
		const Word16 num_freq_bands, /* i : Number of frequency bands */
		const IVAS_FORMAT ivas_format, /* i : ivas_format */
		Word16 ref_power_w, /* i : use 0 if hodirac is enabled */
		const Word16 nchan_ana /* i : number of analysis channels */
		);
		void ivas_omasa_enc(
		OMASA_ENC_HANDLE hOMasa, /* i/o: OMASA encoder handle */
		MASA_ENCODER_HANDLE hMasa, /* i/o: MASA encoder handle */
		@@ -3158,4 +3271,28 @@ void ivas_omasa_enc(
		float data_separated_object, / o : Separated object audio signal */
		int16_t idx_separated_object / o : Index of the separated object */
		);

		void mctStereoIGF_enc_fx(
		MCT_ENC_HANDLE hMCT, /* i/o: MCT encoder structure */
		Encoder_State *sts, / i/o: encoder state structure */
		Word32 orig_spectrum_fx[MCT_MAX_CHANNELS][2], / i : MDCT spectrum for ITF */
		Word16 q_origSpec, /* i : Q for MDCT spectrum */
		Word32 powerSpec_fx[MCT_MAX_CHANNELS][L_FRAME48k], /* i/o: MDCT^2 + MDST^2 spectrum,or estimate*/
		Word32 powerSpecMsInv_fx[MCT_MAX_CHANNELS][NB_DIV], / i : same as above but for inverse spect.*/
		Word16 q_powerSpec[MCT_MAX_CHANNELS], /* i : Q for powSpec_fx and powSpecMsInv_fx*/
		Word32 inv_spectrum_fx[MCT_MAX_CHANNELS][NB_DIV], / i : inverse spectrum */
		const Word16 sp_aud_decision0[MCT_MAX_CHANNELS] /* i : speech audio decision */
		);

		void ivas_mct_core_enc_fx(
		const IVAS_FORMAT ivas_format, /* i : IVAS format */
		MCT_ENC_HANDLE hMCT, /* i/o: MCT encoder structure */
		CPE_ENC_HANDLE hCPE[MCT_MAX_BLOCKS], /* i/o: CPE encoder structures */
		const Word16 nChannels, /* i : number of channels to be coded */
		const Word32 ivas_total_brate, /* i : IVAS total bitrate */
		const Word16 switch_bw, /* i : flag bandwidth switch occurance */
		const Word16 lfe_bits, /* i : bits spent for LFE */
		const Word16 sba_order /* i : Ambisonic (SBA) order */
		);

		#endif