initial functional split within MSVQ encoder function (03d98545) · Commits · IVAS Codec Public Collaboration / IVAS Codec

lib_com/options.h

+3 −0

Original line number	Diff line number	Diff line
		@@ -158,6 +158,9 @@

		#define ERI_FDCNGVQ_LOW_ROM /* Eri: Contribution #31 Table ROM saving for IVAS FDCNG-VQ modes */

		#define ERI_MSVQ_CLEANUP /* Eri: BE modularization of msvq encoder side DCT c-code */


		/* ################## End DEVELOPMENT switches ######################### */
		/* clang-format on */
		#endif

lib_enc/lsf_msvq_ma_enc.c

+267 −7

Original line number	Diff line number	Diff line
		@@ -53,6 +53,235 @@

		#include "ivas_prot.h"
		void dctT2_N_apply_matrix( const float input, float output, const int16_t dct_dim, int16_t fdcngvq_dim, const float *idctT2_24_X_matrixQ16, const int16_t matrix_1st_dim, DCTTYPE dcttype );

		#ifdef ERI_MSVQ_CLEANUP



		int16_t msvq_stage1_dct_search( /* o : (p_max , best candidate sofar ) */
		const float u, / i : target */
		const int16_t N, /* i : target length and IDCT synthesis length */
		const int16_t maxC_st1, /* i : number of candidates to provide */

		float invTrfMatrix, / i/o: IDCT synthesis matrix for dim N */

		float st1_mse_ptr, / i : dynRAM buffer for MSEs */
		int16_t indices_st1_local, / o: selecetd cand indices */
		float st1_syn_vec_ptr , / i/o: buffer for IDCT24 synthesis */
		float dist1_ptr / o: resulting stage 1 MSEs in DCT24 domain */
		)

		{ /* stage 1 search in truncated dct domain without any weights */

		float dct_target[FDCNG_VQ_DCT_MAXTRUNC];
		float u_mr[FDCNG_VQ_MAX_LEN];
		float u_mr_scaled[FDCNG_VQ_MAX_LEN];
		float mse_trunc_segm[FDCNG_VQ_DCT_NSEGM];
		float tmp, check_mse;
		float mse; /* Word32 */

		int16_t p_max,c,c2, segm, j_full,j,i ;
		int16_t n_ana, p_mins[2], idx_min[2];

		const Word8 *cbpW8;
		const Word16 *dct_col_shift_tab;

		float *st1_mse_pair;
		int16_t *st1_idx_pair;

		DCTTYPE dcttype = DCT_T2_24_XX;
		float tmp2;
		int16_t check_ind[FDCNG_VQ_DCT_NPOST];

		n_ana = N; /* VQ stage#1 core is currentlu always using stored DCT24 coeffs */
		assert( n_ana >= FDCNG_VQ_DCT_MAXTRUNC ); /* check for FDCNGVQ WB , SWB, FB operation */

		/remove mean/mid fdcng stage#1 vector, in original subband domain /
		v_sub( u, cdk1r_tr_midQ_truncQ, u_mr, n_ana );

		v_multc( u_mr, fdcng_dct_invScaleF[1], u_mr_scaled, n_ana ); /scale up target to upscaled W8x storage domain /
		/* 16.0-->scale up from Q0 to search domain in Q4, not really needed in BASOP , impl. by shifts */

		dctT2_N_apply_matrix( (const float *) u_mr_scaled, dct_target, min( FDCNG_VQ_DCT_MAXTRUNC, n_ana ), n_ana, invTrfMatrix, FDCNG_VQ_DCT_MAXTRUNC, dcttype );

		mse = 0;
		/* init search state ptr's at the top */
		set_f( dist1_ptr, FLT_MAX, maxC_st1);
		st1_mse_pair = &( dist1_ptr[0] ); /* req. ptr init +=2 */
		st1_idx_pair = &( indices_st1_local[0] ); /* req. ptr init +=2 */
		for ( segm = 0; segm < FDCNG_VQ_DCT_NSEGM; segm++ )
		{
		/* point to a new paired location for each segment */
		st1_mse_pair += 2; /* req. ptr init +=2 */
		st1_idx_pair += 2 ; /* req. ptr init +=2 */
		p_max = 0; /* req. to point to 1 or 0 */

		/* compute segment common trunction error in dctN domain */
		mse_trunc_segm[segm] = 0;
		mse_trunc_segm[segm] += sum2_f( (const float *) ( &( dct_target[cdk1_ivas_cols_per_segment[segm]] ) ), cdk1_ivas_trunc_dct_cols_per_segment[segm] );

		cbpW8 = cdk_37bits_ivas_stage1_W8Qx_dct_sections[segm]; /* Word8 column variable Qx storage*/

		for ( j = 0; j < cdk1_ivas_entries_per_segment[segm]; j++ )
		{
		/* unweighted segmented search DCT domain loop */
		j_full = j + cdk1_ivas_cum_entries_per_segment[segm]; /* or simply use j_full++ */

		mse = mse_trunc_segm[segm]; /* init mse with with common mse truncation part, in BASOP a move32() */

		dct_col_shift_tab = stage1_dct_col_syn_shift[segm]; /* ptr init */

		for ( c2 = 0; c2 < cdk1_ivas_cols_per_segment[segm]; c2++ )
		{

		#define WMC_TOOL_SKIP
		tmp = dct_target[c2] - (float) ( ( (Word16) cbpW8[c2] ) << dct_col_shift_tab[c2] ); /* Word8 storage MSE inner loop */
		LOGIC( 1 );
		SHIFT( 1 );
		ADD( 1 ); /* in BASOP: s_and(for W8->W16), shl(), sub()*/
		#undef WMC_TOOL_SKIP

		mse += tmp * tmp; /* L_mac or L_mac0() square Word16 -> Word32*/
		}
		st1_mse_ptr[j_full] = mse; /* save MSE in shared dynamic RAM, move32() in BASOP */

		#define WMC_TOOL_SKIP
		cbpW8 += cdk1_ivas_cols_per_segment[segm]; /* fixed pointer increment for each segment */
		#undef WMC_TOOL_SKIP

		/* overwrite with a new worst index at p_max */
		#ifdef ERI_FDCNGVQ_LOW_ROM
		/* The three inner loop if's below are not really properly instrumented by WMC tool */
		/* a ptr to worst index will be in use */
		#endif
		if ( mse < st1_mse_pair[p_max] ) /* L_sub */
		{
		st1_idx_pair[p_max] = j_full; /* simplified */
		} /* BASOP 2 ops */

		if ( st1_idx_pair[p_max] == j_full ) /* simplified */
		{ /* idx updated to j_full --> also update mse */
		st1_mse_pair[p_max] = mse; /* move32(), single BASOP */
		} /* BASOP 3 ops */

		/* avoid WC costly list management by always updating p_max, as we have only a pair to maintain */
		p_max = 0; /* move16() */
		if ( ( st1_mse_pair[0] - st1_mse_pair[1] ) < 0 ) /* L_sub()*/
		{
		p_max = 1; /* move16() */
		} /* BASOP 3 ops ,Note 2 ops possible in BASOP with L_sub and L_lshr */

		/* Note: logical shift right not available in ANSI-C */
		/* p_max = (st1_mse_pair[0] - st1_mse_pair[1]) ">>>" 31; */
		/* in java logical shift right is available as >>> , in BASOP it is L_lshr */

		/* Cost: weighted sum with cond moves ('if') => 8 in float , 7 in BASOP with L_lshr */
		} /* j in section */

		} /* next segment */

		for ( j = 0; j < maxC_st1; j++ )
		{
		/* compute_full mse using stored DCT24 domain MSE's */
		/* calculate MSE from stage1 inner using existing inner DCT domain variables */
		dist1_ptr[j] = fdcng_dct_scaleF[2]; / single multiplication to get the MSE scale to the correct input domain */
		}

		p_max = maximum( dist1_ptr, maxC_st1, NULL ); /* establish current worst candidate for stage#2 among all maxC_st1 candidates */

		p_mins[0] = minimum( dist1_ptr, maxC_st1, NULL ); /* find best entry among all maxC_pre */
		tmp = dist1_ptr[p_mins[0]];
		dist1_ptr[p_mins[0]] = FLT_MAX; /* exclude 1st */

		p_mins[1] = minimum( dist1_ptr, maxC_st1, NULL ); /* find 2nd best entry */
		tmp2 = dist1_ptr[p_mins[1]];
		dist1_ptr[p_mins[1]] = FLT_MAX; /* exclude 2nd */

		dist1_ptr[p_mins[0]] = tmp; /* restore 1st */
		dist1_ptr[p_mins[1]] = tmp2; /* restore 2nd */

		idx_min[0] = indices_st1_local[p_mins[0]];
		idx_min[1] = indices_st1_local[p_mins[1]];


		/* use global exclusion list to never reselect the two (best) MSE values sofar */
		st1_mse_ptr[idx_min[0]] = FLT_MAX; /* move32() */
		st1_mse_ptr[idx_min[1]] = FLT_MAX; /* move32() */

		/* circular MSE-neigbour list in use to potentially replace some segment search candidates */
		/* using both 1st and 2nd best neighbours in fwd and rev directions */
		check_ind[0] = cdk1_ivas_segm_neighbour_fwd[idx_min[0]];
		check_ind[1] = cdk1_ivas_segm_neighbour_rev[idx_min[0]];

		check_ind[2] = cdk1_ivas_segm_neighbour_fwd[idx_min[1]];
		check_ind[3] = cdk1_ivas_segm_neighbour_rev[idx_min[1]];

		check_ind[4] = cdk1_ivas_segm_neighbour_fwd[check_ind[0]];
		check_ind[5] = cdk1_ivas_segm_neighbour_rev[check_ind[1]];

		check_ind[6] = cdk1_ivas_segm_neighbour_fwd[check_ind[2]];
		check_ind[FDCNG_VQ_DCT_NPOST-1] = cdk1_ivas_segm_neighbour_rev[check_ind[3]];

		for ( i = 0; i < FDCNG_VQ_DCT_NPOST; i++ )
		{
		/* move MSE from search to synthesis domain */
		/* multiplication by fdcng_dct_scaleF[2] to get the float outer loop scale correct in IDCT synthesis domain */
		check_mse = st1_mse_ptr[check_ind[i]] * fdcng_dct_scaleF[2];

		if ( check_mse < dist1_ptr[p_max] )
		{ /* new winner , replace */
		dist1_ptr[p_max] = check_mse;
		indices_st1_local[p_max] = check_ind[i];
		st1_mse_ptr[check_ind[i]] = FLT_MAX; /* exclude, BASOP: move32() */
		p_max = maximum( dist1_ptr, maxC_st1, NULL ); /* establish a new current worst candidate among all maxC */
		}
		}

		/* extract the selected stage one vectors in DCT_N domain , apply IDCT_N and scale up */
		/* always extract full length signal(24) to be able to update WB( N==21) candidate MSE values */
		/* in the case that only a part of the IDCT vector is in final use */
		for ( c = 0; c < maxC_st1; c++ )
		{
		dec_FDCNG_MSVQ_stage1( indices_st1_local[c], N, invTrfMatrix, dcttype + 1, &( st1_syn_vec_ptr[c * N] ), NULL );
		}

		return p_max;
		};


		/* recalc MSE for WB(0..20) coeffs ,
		essentially subtract res21^2 ,res22^2, res23^2 that was included in stage1 MSE in the DCT24 domain truncated search,
		excludes the waveform contributions at pos 21,22,23 to the MSE, important to keep the WB MSEs update for the subsequent stages
		*/
		int16_t msvq_stage1_dct_recalc_candidates_wb( /* o : (updated p_max) */
		const float st1_syn_vec_ptr, / i : IDCT24 synthesis vectors */
		const float u, / i : target signal */
		const int16_t maxC_st1, /* i : number of candidates in stage1 */
		float dist_ptr / i/o: updated MSE vector for stage1 */
		)
		{
		int16_t p_max_local, c;
		const float *p2;
		float res24, high_diff[FDCNG_VQ_MAX_LEN - FDCNG_VQ_MAX_LEN_WB];

		for ( c = 0; c < maxC_st1; c++ )
		{ /* point to extended synthesis part */
		p2 = (const float ) &( st1_syn_vec_ptr[c FDCNG_VQ_MAX_LEN + FDCNG_VQ_MAX_LEN_WB] ); /* ptr init to synthesis candidate c */
		/* for stage#1 use "u" instead of the shortened resid[0], to access the extended/extrapolated input target */
		v_sub( p2, &( u[FDCNG_VQ_MAX_LEN_WB] ), high_diff, FDCNG_VQ_MAX_LEN - FDCNG_VQ_MAX_LEN_WB );
		res24 = dotp( high_diff, high_diff, FDCNG_VQ_MAX_LEN - FDCNG_VQ_MAX_LEN_WB ); /* sum squared over top env. values above WB coeffs */

		dist_ptr[c] -= res24; /* remove DCT24 high band error contribution */
		}

		/* finally update p_max, as it may potentially change,
		due to the core DCT24 search originally optimizing over the longer basis vectors than DCT21 */
		p_max_local = maximum( dist_ptr, maxC_st1, NULL );

		return p_max_local;
		};
		#endif

		#endif

		/--------------------------------------------------------------------------
		@@ -87,9 +316,8 @@ void msvq_enc(
		float resid_buf[2 * LSFMBEST_MAX * M_MAX], dist_buf[2 * LSFMBEST_MAX], Tmp[M_MAX];
		int16_t idx_buf[2 * LSFMBEST_MAX * MAX_VQ_STAGES_USED], parents[LSFMBEST_MAX];
		int16_t n, maxn, start;


		#ifdef ERI_FDCNGVQ_LOW_ROM
		#ifndef ERI_MSVQ_CLEANUP
		/* buffers */
		float dct_target[FDCNG_VQ_DCT_MAXTRUNC];
		float u_mr[FDCNG_VQ_MAX_LEN];
		@@ -110,20 +338,28 @@ void msvq_enc(

		int16_t check_ind[FDCNG_VQ_DCT_NPOST];
		int16_t segm, j_full, maxC_pre;
		float st1_syn_vec_ptr; / 8* 24 floats in dynRAM */
		float st1_mse_ptr; / 2^¨7 == 128 floats in existing dRAM used for stage 1 candidate analysis, 128 Word32 in BASOP */
		#endif
		float st1_syn_vec_ptr; / ptr to buffer in dynRAM */
		float st1_mse_ptr; / ptr to buffer in existing dRAM used for stage 1 candidate analysis */
		#ifdef ERI_MSVQ_CLEANUP
		int16_t indices_st1_local[FDCNG_VQ_DCT_NSEGM * 2]; /* after stage#1 DCT search this is copied to the global indices[1][sstages] structure /
		#else
		float res24, high_diff[FDCNG_VQ_MAX_LEN - FDCNG_VQ_MAX_LEN_WB];

		maxC_pre = ( FDCNG_VQ_DCT_NSEGM * 2 );
		#endif
		assert( maxC <= LSFMBEST_MAX );
		assert( ( LSFMBEST_MAX * M_MAX ) > ( N * maxC ) );
		/* top of resid_buf is resid[1] and used for stage#1 residuals (input target u),
		we here reuse resid[0] part of the buffer for stage#1 DCT dynamic RAM needs
		*/
		st1_mse_ptr = &( resid_buf[1 * LSFMBEST_MAX * M_MAX] ) - ( levels[0] ); /* reuse top of residual resid[0] scratch RAM for stage1 MSEs */

		st1_syn_vec_ptr = &( resid_buf[1 * LSFMBEST_MAX * M_MAX] ) - FDCNG_VQ_MAX_LEN * maxC; /* reuse top of resid[0] scratch RAM for residual */

		#ifndef ERI_MSVQ_CLEANUP
		dcttype = DCT_T2_24_XX;
		#endif

		#endif

		/----------------------------------------------------------------
		@@ -209,6 +445,22 @@ void msvq_enc(
		}

		#ifdef ERI_FDCNGVQ_LOW_ROM

		#ifdef ERI_MSVQ_CLEANUP
		if ( !s && applyDCT_flag != 0 ) /* means: m==1 */
		{
		/* stage 1 candidates search in truncated dct24 domain without any weights */
		assert( N == FDCNG_VQ_MAX_LEN );
		assert( maxC == 2 * FDCNG_VQ_DCT_NSEGM );
		p_max = msvq_stage1_dct_search( u, FDCNG_VQ_MAX_LEN, maxC, invTrfMatrix, st1_mse_ptr, indices_st1_local, st1_syn_vec_ptr, dist[1] );

		/* move established stage#1 indices to the global MSVQ list structure */
		for ( c = 0; c < maxC; c++ )
		{
		indices[1][c * stages] = indices_st1_local[c];
		}
		}
		#else
		if ( !s && applyDCT_flag != 0 ) /* means: m==1 */
		{ /* stage 1 search in truncated dct domain without any weights */

		@@ -370,6 +622,7 @@ void msvq_enc(

		assert( maxC == maxC_pre );
		}
		#endif
		else
		/* non-DCT Stage #1 code below */
		#endif
		@@ -512,6 +765,12 @@ void msvq_enc(
		essentially subtract res21^2 ,res22^2, res23^2 that was included in stage1 MSE in the DCT24 domain truncated search,
		excludes the waveform contributions at pos 21,22,23 to the MSE, important to keep WB MSEs update for the subsequent stages
		*/
		#ifdef ERI_MSVQ_CLEANUP
		if ( s == 0 && applyDCT_flag != 0 && n == FDCNG_VQ_MAX_LEN_WB )
		{
		p_max = msvq_stage1_dct_recalc_candidates_wb( st1_syn_vec_ptr, u, maxC, dist[1] );
		}
		#else
		if ( s == 0 && applyDCT_flag != 0 && n == FDCNG_VQ_MAX_LEN_WB )
		{
		assert( start == 0 );
		@@ -527,6 +786,7 @@ void msvq_enc(
		/* update p_max, as it may potentially change, due to the core DCT24 search originally optimizing over longer basis vectors than 21 */
		p_max = maximum( dist[1], maxC, NULL );
		}
		#endif
		#endif
		m = maxC;
		} /* for (m=1, s=0; s<stages; s++) */