stereo_dft_enc_synthesize_fx implementation (ff64fead) · Commits · SA4 / Audio / IVAS BASOP

lib_com/fft_fx.c

+18 −0

Original line number	Diff line number	Diff line
		@@ -7111,6 +7111,12 @@ void rfft_fx(
		s2 = -204;
		move16();
		BREAK;
		case 256:
		s1 = 128;
		move16();
		s2 = -128;
		move16();
		BREAK;
		case 320:
		s1 = 102;
		move16();
		@@ -7123,6 +7129,18 @@ void rfft_fx(
		s2 = -68;
		move16();
		BREAK;
		case 640:
		s1 = 51;
		move16();
		s2 = -51;
		move16();
		BREAK;
		case 960:
		s1 = 34;
		move16();
		s2 = -34;
		move16();
		BREAK;
		default:
		s1 = -1;
		move16();

lib_com/ivas_prot_fx.h

+14 −0

Original line number	Diff line number	Diff line
		@@ -1123,6 +1123,20 @@ void stereo_dft_dec_fx(
		const Word16 num_md_sub_frames /* i : number of MD subframes */
		);

		// ivas_stereo_dft_enc.c
		#ifdef IVAS_FLOAT_FIXED
		Word32 stereo_dft_enc_synthesize_fx(
		STEREO_DFT_ENC_DATA_HANDLE hStereoDft, /* i/o: encoder stereo handle */
		// float output, / o : output synthesis */
		Word32 output_fx, / o : output synthesis Q16 */
		Word16 *output_start_index,
		Word16 *output_end_index,
		const Word16 chan, /* i : channel number */
		const Word32 input_Fs, /* i : input sampling rate */
		const Word32 output_sampling_rate, /* i : output sampling rate */
		const Word16 L_frame, /* i : frame length at internal Fs */
		Word16 *nrg_out_fx_e );
		#endif

		void ivas_ls_setup_conversion_fx(
		Decoder_Struct st_ivas, / i : IVAS decoder structure */

lib_com/prot_fx.h

+12 −0

Original line number	Diff line number	Diff line
		@@ -63,6 +63,7 @@
		#include "ivas_cnst.h"
		#include "stat_enc.h"
		#include "stat_dec.h"
		#include "ivas_stat_enc.h"
		#include "ivas_stat_dec.h"
		#include "ivas_error.h"
		#include "ivas_error_utils.h"
		@@ -10068,4 +10069,15 @@ Word16 sr2fscale(
		const Word32 sr_core /* i : internal sampling rate */
		);

		// pre_proc functions
		/full implementation pending/
		void swb_pre_proc_ivas_fx(
		Encoder_State st, / i/o: encoder state structure */
		float new_swb_speech, / o : original input signal at 32kHz */
		Word32 new_swb_speech_fx, / o : original input signal at 32kHz */
		float shb_speech, / o : SHB target signal (6-14kHz) at 16kHz */
		float realBuffer[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i : real buffer */
		float imagBuffer[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i : imag buffer */
		CPE_ENC_HANDLE hCPE /* i/o: CPE encoder structure */
		);
		#endif

lib_enc/ivas_core_enc.c

+409 −1

Original line number	Diff line number	Diff line
		@@ -40,6 +40,7 @@
		#include "wmc_auto.h"
		#include <math.h>
		#ifdef IVAS_FLOAT_FIXED
		#include "prot_fx.h"
		#include "ivas_prot_fx.h"
		#endif

		@@ -49,7 +50,7 @@
		*
		* Principal IVAS core coder routine, where number of core channels is 1 or 2
		-------------------------------------------------------------------/

		#ifdef IVAS_FLOAT_FIXED
		ivas_error ivas_core_enc(
		SCE_ENC_HANDLE hSCE, /* i/o: SCE encoder structure */
		CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */
		@@ -90,6 +91,10 @@ ivas_error ivas_core_enc(
		float hb_speech[L_FRAME16k / 4];
		float *new_swb_speech;
		float new_swb_speech_buffer[L_FRAME48k + STEREO_DFT_OVL_MAX];
		#ifdef IVAS_FLOAT_FIXED
		Word32 *new_swb_speech_fx;
		Word32 new_swb_speech_buffer_fx[L_FRAME48k + STEREO_DFT_OVL_MAX];
		#endif
		float bwe_exc_extended[CPE_CHANNELS][L_FRAME32k + NL_BUFF_OFFSET];
		float voice_factors[CPE_CHANNELS][NB_SUBFR16k];
		int16_t Voicing_flag[CPE_CHANNELS];
		@@ -363,11 +368,18 @@ ivas_error ivas_core_enc(
		---------------------------------------------------------------------/

		new_swb_speech = new_swb_speech_buffer + STEREO_DFT_OVL_MAX;
		#ifdef IVAS_FLOAT_FIXED
		new_swb_speech_fx = new_swb_speech_buffer_fx + STEREO_DFT_OVL_MAX;
		#endif

		if ( !st->Opt_SC_VBR && input_Fs >= 32000 && st->hBWE_TD != NULL )
		{
		/* Common pre-processing for SWB(FB) TBE and SWB(FB) BWE */
		#ifdef IVAS_FLOAT_FIXED
		swb_pre_proc_ivas_fx( st, new_swb_speech, new_swb_speech_fx, shb_speech, realBuffer[n], imagBuffer[n], hCPE );
		#else
		swb_pre_proc( st, new_swb_speech, shb_speech, realBuffer[n], imagBuffer[n], hCPE );
		#endif
		}
		else if ( input_Fs >= 32000 )
		{
		@@ -455,3 +467,399 @@ ivas_error ivas_core_enc(

		return error;
		}
		#else
		ivas_error ivas_core_enc(
		SCE_ENC_HANDLE hSCE, /* i/o: SCE encoder structure */
		CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */
		MCT_ENC_HANDLE hMCT, /* i/o: MCT encoder structure */
		const int16_t n_CoreChannels, /* i : number of core channels to be coded */
		float old_inp_12k8[][L_INP_12k8], /* i : buffer of old input signal */
		float old_inp_16k[][L_INP], /* i : buffer of old input signal */
		float ener[], /* i : residual energy from Levinson-Durbin */
		float A[][NB_SUBFR16k * ( M + 1 )], /* i : A(z) unquantized for the 4 subframes */
		float Aw[][NB_SUBFR16k * ( M + 1 )], /* i : weighted A(z) unquantized for subframes */
		float epsP[][M + 1], /* i : LP prediction errors */
		float lsp_new[][M], /* i : LSPs at the end of the frame */
		float lsp_mid[][M], /* i : LSPs in the middle of the frame */
		const int16_t vad_hover_flag[], /* i : VAD hanglover flag */
		int16_t attack_flag[], /* i : attack flag (GSC or TC) */
		float realBuffer[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i/o: real buffer */
		float imagBuffer[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i/o: imag buffer */
		float old_wsp[][L_WSP], /* i : weighted input signal buffer */
		const int16_t loc_harm[], /* i : harmonicity flag */
		const float cor_map_sum[], /* i : speech/music clasif. parameter */
		const int16_t vad_flag_dtx[], /* i : HE-SAD flag with additional DTX HO */
		float enerBuffer[][CLDFB_NO_CHANNELS_MAX], /* i : energy buffer */
		float fft_buff[][2 * L_FFT], /* i : FFT buffer */
		const int16_t tdm_SM_or_LRTD_Pri, /* i : channel combination scheme flag */
		const int16_t ivas_format, /* i : IVAS format */
		const int16_t flag_16k_smc /* i : flag to indicate if the OL SMC is run at 16 kHz */
		)
		{
		int16_t n, input_frame;
		int16_t cpe_id, MCT_flag;
		Encoder_State *sts, st;
		STEREO_ICBWE_ENC_HANDLE hStereoICBWE;
		STEREO_TD_ENC_DATA_HANDLE hStereoTD;
		float *inp[CPE_CHANNELS];
		float new_inp_resamp16k[CPE_CHANNELS][L_FRAME16k]; /* new input signal @16kHz, non pre-emphasised, used by the WB TBE/BWE */
		float old_syn_12k8_16k[CPE_CHANNELS][L_FRAME16k]; /* ACELP core synthesis at 12.8kHz or 16kHz to be used by the SWB BWE */
		float shb_speech[L_FRAME16k];
		float hb_speech[L_FRAME16k / 4];
		float *new_swb_speech;
		float new_swb_speech_buffer[L_FRAME48k + STEREO_DFT_OVL_MAX];
		float bwe_exc_extended[CPE_CHANNELS][L_FRAME32k + NL_BUFF_OFFSET];
		float voice_factors[CPE_CHANNELS][NB_SUBFR16k];
		int16_t Voicing_flag[CPE_CHANNELS];
		float pitch_buf[CPE_CHANNELS][NB_SUBFR16k];
		int16_t unbits[CPE_CHANNELS];
		float tdm_lspQ_PCh[M], tdm_lsfQ_PCh[M];
		int16_t last_element_mode, tdm_Pitch_reuse_flag;
		int32_t element_brate, last_element_brate, input_Fs;
		int16_t diff_nBits;
		ivas_error error;
		int16_t max_num_indices_BWE;

		push_wmops( "ivas_core_enc" );

		error = IVAS_ERR_OK;

		/------------------------------------------------------------------
		* General initialization
		-----------------------------------------------------------------/

		if ( hSCE != NULL )
		{
		cpe_id = -1;
		MCT_flag = 0;
		sts = hSCE->hCoreCoder;
		hStereoTD = NULL;
		hStereoICBWE = NULL;
		element_brate = hSCE->element_brate;
		last_element_brate = hSCE->last_element_brate;
		last_element_mode = IVAS_SCE;
		tdm_Pitch_reuse_flag = -1;
		}
		else
		{
		cpe_id = hCPE->cpe_id;
		MCT_flag = 0;
		if ( hMCT != NULL )
		{
		MCT_flag = 1;
		}
		sts = hCPE->hCoreCoder;
		hStereoICBWE = hCPE->hStereoICBWE;
		element_brate = hCPE->element_brate;
		last_element_brate = hCPE->last_element_brate;
		last_element_mode = hCPE->last_element_mode;

		if ( hCPE->hStereoTD != NULL )
		{
		hStereoTD = hCPE->hStereoTD;
		tdm_Pitch_reuse_flag = hCPE->hStereoTD->tdm_Pitch_reuse_flag;
		}
		else
		{
		hStereoTD = NULL;
		tdm_Pitch_reuse_flag = -1;
		}
		}

		input_Fs = sts[0]->input_Fs;
		input_frame = (int16_t) ( input_Fs / FRAMES_PER_SEC );

		set_f( new_swb_speech_buffer, 0, L_FRAME48k + STEREO_DFT_OVL_MAX );

		for ( n = 0; n < n_CoreChannels; n++ )
		{
		st = sts[n];

		/------------------------------------------------------------------
		* Initializiation per core-coder channel
		-----------------------------------------------------------------/


		st->extl = -1;
		unbits[n] = 0;

		st->element_brate = element_brate;

		/---------------------------------------------------------------------
		* Pre-processing, incl. Decision matrix
		---------------------------------------------------------------------/

		if ( ( error = pre_proc_ivas( st, last_element_mode, element_brate, ivas_format == SBA_FORMAT ? last_element_brate : element_brate, input_frame, old_inp_12k8[n], old_inp_16k[n], &inp[n], &ener[n], A[n], Aw[n], epsP[n], lsp_new[n], lsp_mid[n], new_inp_resamp16k[n], &Voicing_flag[n], old_wsp[n], loc_harm[n], cor_map_sum[n], vad_flag_dtx[n], enerBuffer[n], fft_buff[n], MCT_flag, vad_hover_flag[n], flag_16k_smc ) ) != IVAS_ERR_OK )
		{
		return error;
		}

		if ( st->element_mode == IVAS_CPE_MDCT \|\| st->element_mode == IVAS_SCE )
		{
		st->enablePlcWaveadjust = 0;
		}
		}

		/------------------------------------------------------------------
		* Sanity check in combined format coding
		-----------------------------------------------------------------/

		diff_nBits = 0;
		if ( hCPE != NULL && hCPE->element_mode == IVAS_CPE_DFT && hCPE->brate_surplus > 0 )
		{
		ivas_combined_format_brate_sanity( hCPE->element_brate, sts[0]->core, sts[0]->total_brate, &( sts[0]->core_brate ), &( sts[0]->inactive_coder_type_flag ), &diff_nBits );
		}

		/---------------------------------------------------------------------
		* Core Encoding
		---------------------------------------------------------------------/

		for ( n = 0; n < n_CoreChannels; n++ )
		{
		st = sts[n];

		/* update pointer to the buffer of indices of the second channel */
		if ( n == 1 && st->element_mode == IVAS_CPE_TD )
		{
		/* adjust the pointer to the buffer of indices of the secondary channel (make space for BWE indices) */
		max_num_indices_BWE = get_BWE_max_num_indices( sts[0]->extl_brate );
		st->hBstr->ind_list = sts[0]->hBstr->ind_list + sts[0]->hBstr->nb_ind_tot + max_num_indices_BWE;

		/* write TD stereo spatial parameters */
		move_indices( hStereoTD->tdm_hBstr_tmp.ind_list, st->hBstr->ind_list, hStereoTD->tdm_hBstr_tmp.nb_ind_tot );
		st->hBstr->nb_ind_tot += hStereoTD->tdm_hBstr_tmp.nb_ind_tot;
		st->hBstr->nb_bits_tot += hStereoTD->tdm_hBstr_tmp.nb_bits_tot;

		reset_indices_enc( &hStereoTD->tdm_hBstr_tmp, MAX_IND_TDM_TMP );
		}

		/---------------------------------------------------------------------
		* Write signaling info into the bitstream
		---------------------------------------------------------------------/

		if ( !MCT_flag \|\| ( MCT_flag && cpe_id == 0 ) )
		{
		ivas_signaling_enc( st, MCT_flag, element_brate, tdm_SM_or_LRTD_Pri, tdm_Pitch_reuse_flag );
		}

		/---------------------------------------------------------------------
		* Preprocessing (preparing) for ACELP/HQ core switching
		---------------------------------------------------------------------/

		core_switching_pre_enc( st, old_inp_12k8[n], old_inp_16k[n], sts[0]->active_cnt, last_element_mode );

		/---------------------------------------------------------------------
		* ACELP core encoding
		* TCX core encoding
		* HQ core encoding
		---------------------------------------------------------------------/

		if ( st->core == ACELP_CORE )
		{
		/* ACELP core encoder */
		if ( ( error = acelp_core_enc( st, inp[n], ener[n], A[n], Aw[n], epsP[n], lsp_new[n], lsp_mid[n], vad_hover_flag[0], attack_flag[n], bwe_exc_extended[n], voice_factors[n], old_syn_12k8_16k[n], pitch_buf[n], &unbits[n], hStereoTD, tdm_lsfQ_PCh ) ) != IVAS_ERR_OK )
		{
		return error;
		}
		}

		if ( ( st->core == TCX_20_CORE \|\| st->core == TCX_10_CORE ) && st->element_mode != IVAS_CPE_MDCT )
		{
		/* TCX core encoder */
		stereo_tcx_core_enc( st, old_inp_12k8[n] + L_INP_MEM, old_inp_16k[n] + L_INP_MEM, Aw[n], lsp_new[n], lsp_mid[n], pitch_buf[n], last_element_mode, vad_hover_flag[0] );
		}

		if ( st->core == HQ_CORE )
		{
		/* HQ core encoder */
		hq_core_enc( st, st->input, input_frame, NORMAL_HQ_CORE, Voicing_flag[n], vad_hover_flag[0] );
		}

		/---------------------------------------------------------------------
		* TD stereo updates
		---------------------------------------------------------------------/

		if ( st->element_mode == IVAS_CPE_TD && n == 0 )
		{
		td_stereo_param_updt( st->lsp_old, st->lsf_old, pitch_buf[0], tdm_lspQ_PCh, tdm_lsfQ_PCh, hStereoTD->tdm_Pri_pitch_buf, st->flag_ACELP16k, hStereoTD->tdm_use_IAWB_Ave_lpc );
		}
		}


		/---------------------------------------------------------------------
		* MDCT stereo: joint TCX Core Encoding
		---------------------------------------------------------------------/

		if ( sts[0]->element_mode == IVAS_CPE_MDCT )
		{
		if ( sts[0]->core_brate > SID_2k40 && sts[1]->core_brate > SID_2k40 )
		{
		if ( MCT_flag )
		{
		ivas_mdct_core_whitening_enc( hCPE, old_inp_16k, old_wsp, pitch_buf, hMCT->p_mdst_spectrum_long[cpe_id], hMCT->tnsBits[cpe_id], hMCT->p_orig_spectrum_long[cpe_id],
		hMCT->tnsSize[cpe_id], hMCT->p_param[cpe_id], hMCT->hBstr, 1, hMCT->nchan_out_woLFE );
		}
		else
		{
		stereo_mdct_core_enc( hCPE, old_inp_16k, old_wsp, pitch_buf );
		}
		}
		else if ( sts[0]->core_brate == SID_2k40 && sts[1]->core_brate == SID_2k40 )
		{
		/* synch CNG configs between channels */
		for ( n = 0; n < CPE_CHANNELS; n++ )
		{
		st = sts[n];
		if ( ( st->hFdCngEnc->hFdCngCom->frameSize != st->L_frame ) \|\| ( st->hFdCngEnc->hFdCngCom->CngBandwidth != st->bwidth ) )
		{
		configureFdCngEnc( st->hFdCngEnc, max( st->bwidth, WB ), st->L_frame == L_FRAME16k ? ACELP_16k40 : ACELP_9k60 );
		}
		}

		if ( sts[0]->cng_sba_flag )
		{
		FdCngEncodeDiracMDCTStereoSID( hCPE );
		}
		else
		{
		FdCngEncodeMDCTStereoSID( hCPE );
		}
		}
		}


		/---------------------------------------------------------------------
		* Postprocessing, BWEs and Updates
		---------------------------------------------------------------------/

		for ( n = 0; n < n_CoreChannels; n++ )
		{
		st = sts[n];

		/---------------------------------------------------------------------
		* Postprocessing for ACELP/HQ core switching
		---------------------------------------------------------------------/

		core_switching_post_enc( st, old_inp_12k8[n], old_inp_16k[n], A[n] );

		/---------------------------------------------------------------------
		* WB TBE encoding
		* WB BWE encoding
		---------------------------------------------------------------------/

		if ( input_Fs >= 16000 && st->bwidth < SWB && st->hBWE_TD != NULL )
		{
		/* Common pre-processing for WB TBE and WB BWE */
		wb_pre_proc( st, last_element_mode, new_inp_resamp16k[n], hb_speech );
		}

		if ( st->extl == WB_TBE )
		{
		/* WB TBE encoder */
		wb_tbe_enc( st, hb_speech, bwe_exc_extended[n], voice_factors[n], pitch_buf[n] );
		}
		else if ( st->extl == WB_BWE && n == 0 && st->element_mode != IVAS_CPE_MDCT )
		{
		/* WB BWE encoder */
		wb_bwe_enc( st, new_inp_resamp16k[n] );
		}

		/---------------------------------------------------------------------
		* SWB(FB) TBE encoding
		* SWB(FB) BWE encoding
		---------------------------------------------------------------------/

		new_swb_speech = new_swb_speech_buffer + STEREO_DFT_OVL_MAX;

		if ( !st->Opt_SC_VBR && input_Fs >= 32000 && st->hBWE_TD != NULL )
		{
		/* Common pre-processing for SWB(FB) TBE and SWB(FB) BWE */
		swb_pre_proc( st, new_swb_speech, shb_speech, realBuffer[n], imagBuffer[n], hCPE );
		}
		else if ( input_Fs >= 32000 )
		{
		if ( st->hBWE_TD != NULL )
		{
		InitSWBencBufferStates( st->hBWE_TD, shb_speech );
		}
		}

		/* SWB TBE encoder */
		if ( st->extl == SWB_TBE \|\| st->extl == FB_TBE )
		{
		if ( st->core_brate != FRAME_NO_DATA && st->core_brate != SID_2k40 )
		{
		float fb_exc[L_FRAME16k];

		swb_tbe_enc( st, hStereoICBWE, shb_speech, bwe_exc_extended[n], voice_factors[n], fb_exc, pitch_buf[n] );

		if ( st->extl == FB_TBE )
		{
		/* FB TBE encoder */
		fb_tbe_enc( st, st->input, fb_exc );
		}
		}
		}
		else if ( st->extl == SWB_BWE \|\| st->extl == FB_BWE )
		{
		/* SWB(FB) BWE encoder */
		swb_bwe_enc( st, last_element_mode, old_inp_12k8[n], old_inp_16k[n], old_syn_12k8_16k[n], new_swb_speech, shb_speech );
		}

		/---------------------------------------------------------------------
		* SWB DTX/CNG encoding
		---------------------------------------------------------------------/

		if ( st->hTdCngEnc != NULL && st->Opt_DTX_ON && ( input_frame >= L_FRAME32k \|\| st->element_mode == IVAS_CPE_DFT ) )
		{
		/* SHB DTX/CNG encoder */
		swb_CNG_enc( st, shb_speech, old_syn_12k8_16k[n] );
		}

		/-------------------------------------------------------------------
		* Inter-channel BWE encoding
		-------------------------------------------------------------------/

		if ( n == 0 && input_Fs >= 32000 && hStereoICBWE != NULL )
		{
		stereo_icBWE_preproc( hCPE, input_frame, new_swb_speech_buffer /tmp buffer/ );

		stereo_icBWE_enc( hCPE, shb_speech, new_swb_speech_buffer, voice_factors[0] );
		}

		/---------------------------------------------------------------------
		* Channel-aware mode - write signaling information into the bitstream
		---------------------------------------------------------------------/

		signaling_enc_rf( st );

		/---------------------------------------------------------------------
		* Common updates
		---------------------------------------------------------------------/

		if ( !MCT_flag ) /* for MCT do this later, otherwise there can be a problem because TCX quant happens later and might get the wrong last_core on a bit rate switch */
		{
		updt_enc_common( st );
		}
		}

		/------------------------------------------------------------------
		* Write potentially unused bits in combined format coding
		-----------------------------------------------------------------/

		if ( hCPE != NULL && hCPE->element_mode == IVAS_CPE_DFT && hCPE->brate_surplus > 0 )
		{
		while ( diff_nBits > 0 )
		{
		n = min( diff_nBits, 16 );
		push_indice( sts[0]->hBstr, IND_UNUSED, 0, n );
		diff_nBits -= n;
		}
		}


		pop_wmops();

		return error;
		}
		#endif
		No newline at end of file

lib_enc/ivas_cpe_enc.c

+744 −4

File changed.

Preview size limit exceeded, changes collapsed.