ROM optimization for FD-CNG first stage MSVQ (25bac608) · Commits · IVAS Codec Public Collaboration / IVAS Codec

lib_com/cnst.h

+23 −0

Original line number	Diff line number	Diff line
		@@ -755,7 +755,11 @@ typedef enum
		#define STEP_SID 5.25f /* CNG & DTX - CNG energy quantization step */

		#define MIN_ACT_CNG_UPD 20 /* DTX - Minimum number of consecutive active frames for CNG mode update */
		#ifdef ERI_FDCNGVQ_LOW_ROM_TESTING
		#define FIXED_SID_RATE 2 /* increase DTX SID rate for better FDCNG VQ testing */
		#else
		#define FIXED_SID_RATE 8 /* DTX SID rate */
		#endif

		#define TOTALNOISE_HIST_SIZE 4

		@@ -1391,6 +1395,25 @@ enum
		#define NPARTCLDFB 10
		#define NPART_SHAPING 62

		#ifdef ERI_FDCNGVQ_LOW_ROM
		#define FDCNG_VQ_MAX_LEN FD_CNG_maxN_37bits
		#define FDCNG_VQ_DCT_NSEGM 4
		#define FDCNG_VQ_DCT_MINTRUNC 8
		#define FDCNG_VQ_DCT_MAXTRUNC 18
		#define FDCNG_VQ_MAX_LEN_WB 21

		#define FDCNG_VQ_DCT_NPOST 8

		typedef enum _DCTTYPE
		{
		DCT_T2_24_XX = 0, /* truncated DCT_T2_24 */
		IDCT_T2_XX_24 = 1,
		DCT_T2_21_XX = 2, /* truncated DCT_T2_21 */
		IDCT_T2_XX_21 = 3
		} DCTTYPE;

		#endif

		#define MSSUBFRLEN 12
		#define MSNUMSUBFR 6
		#define MSBUFLEN 5

lib_com/ivas_error_utils.h

+4 −0

Original line number	Diff line number	Diff line
		@@ -83,7 +83,11 @@ static inline ivas_error ivas_error_wrapper( const ivas_error error_code, const
		va_end( args );

		fprintf( stderr, "\n\nIn function: %s(), %s:%d\n\n", function, file, line );
		#ifdef ERI_FDCNGVQ_LOW_ROM_TESTING
		assert( 0 );
		#endif
		// assert( 0 );

		return error_code;
		}
		#else

lib_com/lsf_tools.c

+365 −2

Original line number	Diff line number	Diff line
		@@ -2027,6 +2027,69 @@ int16_t tcxlpc_get_cdk(
		return cdk;
		}

		#ifdef ERI_FDCNGVQ_LOW_ROM
		void dec_FDCNG_MSVQ_stage1(
		int16_t j_full, /* i: index full range */
		int16_t n, /* i: dimension to generate */
		const float invTrfMatrix, / i: matrix for synthesis */
		const DCTTYPE idcttype, /* i: specify which IDCT */
		float uq, / o: synthesized stage1 vector */
		Word16 uq_ind / o: synthesized stage1 vector in BASOP */
		)
		{
		int16_t col, segm_ind, j;
		float dct_vec[FDCNG_VQ_MAX_LEN];
		float idct_vec[FDCNG_VQ_MAX_LEN];
		const Word8 *cbpW8;
		const Word16 *dct_col_shift_tab;

		assert( n <= FDCNG_VQ_MAX_LEN );
		assert( n >= FDCNG_VQ_DCT_MINTRUNC );

		segm_ind = 0;
		for ( col = 1; col <= FDCNG_VQ_DCT_NSEGM; col++ )
		{
		if ( j_full >= cdk1_ivas_cum_entries_per_segment[col] )
		{
		segm_ind++;
		}
		}

		j = j_full - cdk1_ivas_cum_entries_per_segment[segm_ind]; /* j is the local segment index */

		assert( j < cdk1_ivas_entries_per_segment[segm_ind] );

		/* Word8 column variable Qx storage*/
		cbpW8 = cdk_37bits_ivas_stage1_W8Qx_dct_sections[segm_ind]; /* Word8 storage fixed ptr_init */
		cbpW8 += j * cdk1_ivas_cols_per_segment[segm_ind]; /* adaptive ptr init */
		dct_col_shift_tab = stage1_dct_col_syn_shift[segm_ind];

		for ( col = 0; col < cdk1_ivas_cols_per_segment[segm_ind]; col++ )
		{
		#if 1
		dct_vec[col] = (float) ( ( (Word16) cbpW8[col] ) << dct_col_shift_tab[col] );
		#else
		#define WMC_TOOL_MAN
		dct_vec[col] = (float) ( ( (Word16) cbpW8[col] ) << dct_col_shift_tab[col] );
		LOGIC( 1 );
		SHIFT( 1 );
		ADD( 1 ); /* in BASOP: s_and(for W8->W16), shl(), sub()*/
		#undef WMC_TOOL_MAN
		#endif
		}
		dctT2_N_apply_matrix( (const float *) dct_vec, idct_vec, cdk1_ivas_cols_per_segment[segm_ind], n, invTrfMatrix, FDCNG_VQ_DCT_MAXTRUNC, idcttype );

		/scale down to original fdcngvq domain and move to Q0 /
		v_multc( idct_vec, fdcng_dct_scaleF[1], idct_vec, n );
		/* fdcng_dct_scaleF[1] --> 0.0625-->scale down from search Q4 domain to Q0 , not really relevant for BASOP loop */

		/add common mid fdcng vector, in fdcng bands domain /
		v_add( idct_vec, cdk1r_tr_midQ_truncQ, uq, n );
		assert( uq_ind == NULL );
		}
		#endif


		/--------------------------------------------------------------------------
		* msvq_dec()
		*
		@@ -2042,6 +2105,10 @@ void msvq_dec(
		const int16_t N, /* i : Vector dimension */
		const int16_t maxN, /* i : Codebook dimension */
		const int16_t Idx[], /* i : Indices */
		#ifdef ERI_FDCNGVQ_LOW_ROM
		const int16_t applyIDCT_flag, /* i : applyIDCT flag */
		const float invTrfMatrix, / i: matrix for synthesis */
		#endif
		float uq, / o : quantized vector */
		Word16 uq_ind / o : quantized vector (fixed point) */
		)
		@@ -2079,9 +2146,32 @@ void msvq_dec(
		start = 0;
		}

		#ifdef ERI_FDCNGVQ_LOW_ROM
		if ( i == 0 && applyIDCT_flag != 0 )
		{
		assert( start == 0 );
		dec_FDCNG_MSVQ_stage1( Idx[0], N, invTrfMatrix, IDCT_T2_XX_24, uq, uq_ind ); /* IDCT24 used for all synthesis */
		}
		else
		{
		v_add( uq + start, cb[i] + Idx[i] * maxn, uq + start, n );
		}

		#define WMC_TOOL_SKIP
		IF( uq_ind != NULL )
		{
		FOR( j = 0; j < n; ++j )
		{
		move16();
		uq_ind[start + j] = add( uq_ind[start + j], (Word16) ( cb[i][Idx[i] * maxn + j] * 2.0f * 1.28f ) );
		}
		}
		#undef WMC_TOOL_SKIP
		#else

		v_add( uq + start, cb[i] + Idx[i] * maxn, uq + start, n );

		#define WMC_TOOL_SKIP
		IF( uq_ind != NULL )
		{
		FOR( j = 0; j < n; ++j )
		@@ -2091,6 +2181,7 @@ void msvq_dec(
		}
		}
		#undef WMC_TOOL_SKIP
		#endif
		}

		return;
		@@ -2358,3 +2449,275 @@ void a2isf(

		return;
		}

		#ifdef ERI_FDCNGVQ_LOW_ROM
		/-------------------------------------------------------------------
		* dctT2_N_apply_matrix()
		*
		* dct/idct matrix application loop for a fixed DCT basis vector length of N
		-------------------------------------------------------------------/
		void dctT2_N_apply_matrix(
		const float *input,
		float *output,
		const int16_t dct_dim,
		int16_t fdcngvq_dim,
		const float *matrix,
		const int16_t matrix_row_dim,
		DCTTYPE dcttype )
		{
		int16_t i, j, dim_in, dim_out;
		int16_t mat_step_col, mat_step_row, mat_step_col_flag;
		const float pt_x, pt_A;
		float tmp_y[FDCNG_VQ_MAX_LEN];
		float *pt_y;

		#ifdef ERI_FDCNGVQ_LOW_ROM_TESTING
		{
		char tmp_str[1024];
		sprintf( tmp_str, "dctT2_%d_apply_mat", fdcngvq_dim );
		push_wmops( tmp_str );
		}
		#endif

		/* [optionally] non-square DCT_N and IDCT_N matrix application,
		using a stored format of an IDCT_Nx(FDCNG_VQ_DCT_MAXTRUNC) matrix */
		/* effciently parallelized in SIMD */

		assert( dct_dim <= FDCNG_VQ_DCT_MAXTRUNC );
		assert( fdcngvq_dim <= FDCNG_VQ_MAX_LEN );

		if ( ( dcttype & 1 ) == 0 ) /* even entries are DCTs */
		{
		/* DCT_typeII 24,21 -> XX in worst case */
		dim_in = fdcngvq_dim;
		dim_out = dct_dim;
		mat_step_col = matrix_row_dim; /* matrix maximum storage size dependent, width of first row in matrix */
		mat_step_row = 0;
		mat_step_col_flag = 1;
		assert( dcttype == DCT_T2_21_XX \|\| dcttype == DCT_T2_24_XX );
		}
		else
		{
		assert( ( dcttype & 1 ) != 0 ); /* idct */
		dim_in = dct_dim;
		dim_out = fdcngvq_dim;
		mat_step_col = 1;
		mat_step_row = matrix_row_dim;
		mat_step_col_flag = 0;
		assert( dcttype == IDCT_T2_XX_24 );
		}

		pt_y = tmp_y;
		for ( i = 0; i < dim_out; i++ )
		{
		pt_x = input;

		*pt_y = 0;

		/* +i(DCT) or +imaxTrunc(IDCT) /
		#define WMC_TOOL_SKIP
		pt_A = &( matrix[i * ( mat_step_row + mat_step_col_flag )] ); /* ptr indexing */
		PTR_INIT( 1 );
		#undef WMC_TOOL_SKIP
		for ( j = 0; j < dim_in; j++ )
		{
		#define WMC_TOOL_SKIP
		pt_y += ( pt_x++ ) * ( *pt_A );
		pt_A += mat_step_col; /* step +maxtrunc or 1 / / ptr indexing*/
		MAC( 1 );
		#undef WMC_TOOL_SKIP
		}
		pt_y++;
		}
		mvr2r( tmp_y, output, dim_out );
		#ifdef ERI_FDCNGVQ_LOW_ROM_TESTING
		{
		pop_wmops();
		}
		#endif
		}


		/-------------------------------------------------------------------
		* extend_dctN_input()
		*
		* (inputN, dctN) -> idct(N_ext) idct_N matrix application loop for
		* extending, extrapolating a DCT basis vector length of N to N_ext
		-------------------------------------------------------------------/

		void extend_dctN_input(
		const float input, / i: input in fdcng domain */
		const float dct_input, / i: input in dctN(fdcng) domain */
		const int16_t in_dim, /* i: in_dim==N */
		float ext_sig, / o: extended output in fdcng domain */
		const int16_t out_dim, /* i: output total dim */
		float matrix, / i: idct synthesis matrix of size N rows , n_cols columns*/
		const int16_t n_cols, /* i: number of columns == truncation length */
		DCTTYPE dcttype ) /* i: matrix operation type */
		{
		int16_t i, j, i_rev;

		const float( ptr )[FDCNG_VQ_DCT_MAXTRUNC] = (void ) matrix;

		#ifdef ERI_FDCNGVQ_LOW_ROM_TESTING
		{
		char tmp_str[1024];
		sprintf( tmp_str, "extend_dct%d_input", in_dim );
		push_wmops( tmp_str );
		}
		#endif


		/* stored format of an IDCT_Nx(FDCNG_VQ_DCT_MAXTRUNC) matrix */
		assert( in_dim < FDCNG_VQ_MAX_LEN );
		assert( out_dim <= FDCNG_VQ_MAX_LEN );
		assert( out_dim > in_dim );
		assert( n_cols == FDCNG_VQ_DCT_MAXTRUNC ); /* for ptr[MAX_TRUNC] adressing/


		assert( ( dcttype & 1 ) != 0 ); /* idct tables in use for this basis vector extension */

		mvr2r( input, ext_sig, in_dim ); /* copy initial part, i.e. only last/tail parts are updated */

		set_f( &( ext_sig[in_dim] ), 0.0, out_dim - in_dim );

		i_rev = in_dim; /ptr init/
		for ( i = in_dim; i < out_dim; i++ )
		{ /* for each extension sample */
		/* i = 21 22 23;
		i_rev = 20 19 18; for odd dctII simply reflect basis vector
		*/
		i_rev--;

		for ( j = 0; j < n_cols; j++ ) /* for each available DCT coeff */
		{
		/* DCTcoeff * reflected basis vector */
		#define WMC_TOOL_SKIP
		/* these are pure ptr operations */
		ext_sig[i] += dct_input[j] * ptr[i_rev][j]; /* sum up scaled and extended basis vector */
		MAC( 1 );
		#undef WMC_TOOL_SKIP
		}
		}
		#ifdef ERI_FDCNGVQ_LOW_ROM_TESTING
		{
		pop_wmops();
		}
		#endif
		}


		/* inititate idct24 FDCNG_VQ_DCT_MAXTRUNCx N matrix in RAM from a Word8+Word16 quantized compressed ROM format */
		void create_IDCT_N_Matrix( float *inv_matrixFloatQ, const int16_t N, const int16_t n_cols, const int16_t alloc_size )
		{
		int16_t c, c1, r, r_flip, W16_val;
		int16_t len; /* <=FDCNG_VQ_MAX_LEN */
		int16_t mat_cpy_size;
		const Word16 *val_ptr;
		const Word8 *idx_ptr;
		int8_t idx;
		#ifdef ERI_FDCNGVQ_LOW_ROM_TESTING
		{
		char tmp_str[1024];
		sprintf( tmp_str, "create_IDCT_%d_mat", N );
		push_wmops( tmp_str );
		}
		#endif


		val_ptr = unique_idctT2_24coeffsQ16;
		idx_ptr = idctT2_24_compressed_idx;
		len = FDCNG_VQ_MAX_LEN;
		if ( N == FDCNG_VQ_MAX_LEN_WB )
		{
		val_ptr = unique_idctT2_21coeffsQ16;
		idx_ptr = idctT2_21_compressed_idx;
		len = N;
		}

		assert( alloc_size >= ( n_cols * len ) ); /* enough space for the full expanded IDCT matrix */
		assert( N <= len );
		W16_val = 0; /* safety init */

		mat_cpy_size = ( n_cols ) * ( len / 2 ); /* one integer division of "len" */

		if ( ( len & 1 ) != 0 )
		{ /* odd sized DCT with a non-reflected center row */
		mat_cpy_size += n_cols;
		}

		for ( c = 0; c < mat_cpy_size; c++ )
		{
		idx = idx_ptr[c];
		if ( idx >= 0 )
		{
		W16_val = ( val_ptr[idx] );
		}
		if ( idx < 0 )
		{
		W16_val = -( val_ptr[-idx] );
		}
		inv_matrixFloatQ[c] = ( +1.52587890625e-05f ) * ( (float) W16_val ); /* 1.0/2.^16 scaling to a float-"Q0" scaling not done in BASOP */
		}

		{ /* a temporary local scope for matrix ptr's */
		/* for even number of coeffs DCT24,
		flip symmetry for odd, even used to save 50% further Table ROM */
		/* for odd DCT center is not flipped e.g DCT21 */
		/* float inv_matrixFloatQ[len][FDCNG_VQ_DCT_MAXTRUNC]; */

		/* use fixed number of columns pointers , to simplify adressing code below */
		float( ptr )[FDCNG_VQ_DCT_MAXTRUNC] = (void ) inv_matrixFloatQ;
		float( ptr21 )[FDCNG_VQ_MAX_LEN_WB] = (void ) inv_matrixFloatQ;

		if ( n_cols == FDCNG_VQ_DCT_MAXTRUNC )
		{

		assert( ( n_cols & 1 ) == 0 );
		for ( c = 0; c < ( n_cols ); c += 2 )
		{
		c1 = c + 1;
		r_flip = len - 1;
		for ( r = 0; r < ( len / 2 ); r++, r_flip-- )
		{
		#define WMC_TOOL_SKIP
		/* pure ptr based calculations */
		ptr[r_flip][c] = ptr[r][c]; /* flipped */
		ptr[r_flip][c1] = -ptr[r][c1]; /* flipped and sign swapped */

		MOVE( 2 );
		MULT( 1 ); /for negate /
		#undef WMC_TOOL_SKIP
		}
		}
		}
		else
		{
		float sign_swap = 1.0f;
		assert( n_cols == FDCNG_VQ_MAX_LEN_WB );
		for ( c = 0; c < ( n_cols ); c++ )
		{
		r_flip = len - 1;
		sign_swap = 1.0f - 2.0f * ( c & 1 ); /signsawpp odd columns /
		for ( r = 0; r < ( len / 2 ); r++, r_flip-- )
		{
		#define WMC_TOOL_SKIP
		/* pure ptr based calculations */
		ptr21[r_flip][c] = sign_swap * ptr21[r][c]; /* flipped and potetially sign swapped */

		MULT( 1 );
		MOVE( 1 );
		#undef WMC_TOOL_SKIP
		}
		}
		}
		}
		#ifdef ERI_FDCNGVQ_LOW_ROM_TESTING
		{
		pop_wmops();
		}
		#endif
		}


		#endif

lib_com/options.h

+4 −0

Original line number	Diff line number	Diff line
		@@ -156,6 +156,10 @@
		#define OTR_REFERENCE_VECTOR_TRACKING /* FhG: enables the reference position orientation tracking mode */
		#endif


		#define ERI_FDCNGVQ_LOW_ROM /* Eri: ~1.6kW Table ROM saving for IVAS FDCNG-VQ */
		/# define ERI_FDCNGVQ_LOW_ROM_TESTING / /* Enable testing vs. Baseline for XC SD-measurements , deactivate for WMOPS measurement */

		/* ################## End DEVELOPMENT switches ######################### */
		/* clang-format on */
		#endif

lib_com/prot.h

+51 −3

Original line number	Diff line number	Diff line
		@@ -8061,6 +8061,10 @@ void msvq_enc(
		const float w[], /* i : Weights */
		const int16_t N, /* i : Vector dimension */
		const int16_t maxN, /* i : Codebook dimension */
		#ifdef ERI_FDCNGVQ_LOW_ROM
		const int16_t applyDCT_flag, /* i : applyDCT flag */
		float invTrfMatrix, / i:/o expanded synthesis matrix */
		#endif
		int16_t Idx[] /* o : Indices */
		);

		@@ -8072,10 +8076,54 @@ void msvq_dec(
		const int16_t N, /* i : Vector dimension */
		const int16_t maxN, /* i : Codebook dimension */
		const int16_t Idx[], /* i : Indices */
		#ifdef ERI_FDCNGVQ_LOW_ROM
		const int16_t applyIDCT_flag, /* i : applyIDCT flag */
		const float invTrfMatrix, / i: synthesis matrix */
		#endif
		float uq, / o : quantized vector */
		Word16 uq_ind / o : quantized vector (fixed point) */
		);

		#ifdef ERI_FDCNGVQ_LOW_ROM
		void dec_FDCNG_MSVQ_stage1(
		int16_t j_full, /* i: index full range */
		int16_t n, /* i: dimension to generate */
		const float invTrfMatrix, / i: synthesis matrix */
		DCTTYPE idcttype, /* i: idct type */
		float uq, / o: synthesized stage1 vector */
		Word16 uq_ind / o: synthesized stage1 vector in BASOP */
		);


		void create_IDCT_N_Matrix(
		float inv_matrixFloatQ, /i/o: RAM buffer */
		const int16_t N, /* i: DCT length , number of time samples */
		const int16_t n_cols, /i: number of dct coeffs (as DCt may be truncated /

		const int16_t alloc_size /i: RAM buffer size in elements/
		);


		void dctT2_N_apply_matrix(
		const float *input,
		float *output,
		const int16_t dct_dim,
		int16_t fdcngvq_dim,
		const float *idctT2_N_16matrixQ16,
		const int16_t matrix_1st_dim,
		DCTTYPE dcttype );

		void extend_dctN_input(
		const float input, / i: input in fdcng domain */
		const float dct_input, / i: input in dctN(fdcng) domain */
		const int16_t in_dim, /* i: in_dim==N */
		float ext_sig, / o: extended output in fdcng domain */
		const int16_t out_dim, /* i: output total dim */
		float matrix, / i: idct matrix of size N rows , n_cols columns*/
		const int16_t n_cols, /* i: number of columns == truncation length */
		DCTTYPE dcttype ); /* i: matrix operation type */
		#endif

		void PulseResynchronization(
		const float src_exc, / i : Input excitation buffer */
		float dst_exc, / o : output excitation buffer */