diff --git a/lib_com/cnst.h b/lib_com/cnst.h index db1dc3d8982a768eff807f35d083dbabeae15f58..7b359e2f40ff80ab3db4a1848c9f0e3a781f28d6 100644 --- a/lib_com/cnst.h +++ b/lib_com/cnst.h @@ -1391,6 +1391,25 @@ enum #define NPARTCLDFB 10 #define NPART_SHAPING 62 +#ifdef ERI_FDCNGVQ_LOW_ROM +#define FDCNG_VQ_MAX_LEN FD_CNG_maxN_37bits +#define FDCNG_VQ_DCT_NSEGM 4 +#define FDCNG_VQ_DCT_MINTRUNC 8 +#define FDCNG_VQ_DCT_MAXTRUNC 18 +#define FDCNG_VQ_MAX_LEN_WB 21 + +#define FDCNG_VQ_DCT_NPOST 8 + +typedef enum _DCTTYPE +{ + DCT_T2_24_XX = 0, /* truncated DCT_T2_24 */ + IDCT_T2_XX_24 = 1, + DCT_T2_21_XX = 2, /* truncated DCT_T2_21 */ + IDCT_T2_XX_21 = 3 +} DCTTYPE; + +#endif + #define MSSUBFRLEN 12 #define MSNUMSUBFR 6 #define MSBUFLEN 5 diff --git a/lib_com/ivas_error_utils.h b/lib_com/ivas_error_utils.h index 034369656bc8e54c703e986e3479a514eafa1c2b..b9a6b3f872443e35368c4d802054d6f47eaafc42 100644 --- a/lib_com/ivas_error_utils.h +++ b/lib_com/ivas_error_utils.h @@ -84,6 +84,7 @@ static inline ivas_error ivas_error_wrapper( const ivas_error error_code, const fprintf( stderr, "\n\nIn function: %s(), %s:%d\n\n", function, file, line ); // assert( 0 ); + return error_code; } #else diff --git a/lib_com/lsf_tools.c b/lib_com/lsf_tools.c index 19321e21d9bcea526d92d3f0fc6cebc6be6c87f2..5f0916dbd147f32788f2c4e7b49015cb17e8bf85 100644 --- a/lib_com/lsf_tools.c +++ b/lib_com/lsf_tools.c @@ -2027,6 +2027,64 @@ int16_t tcxlpc_get_cdk( return cdk; } +#ifdef ERI_FDCNGVQ_LOW_ROM +void dec_FDCNG_MSVQ_stage1( + int16_t j_full, /* i: index full range */ + int16_t n, /* i: dimension to generate */ + const float *invTrfMatrix, /* i: IDCT matrix for synthesis */ + const DCTTYPE idcttype, /* i: specify which IDCT */ + float *uq, /* o: synthesized stage1 vector */ + Word16 *uq_ind /* o: synthesized stage1 vector in BASOP */ +) +{ + int16_t col, segm_ind, j; + float dct_vec[FDCNG_VQ_MAX_LEN]; + float idct_vec[FDCNG_VQ_MAX_LEN]; + const Word8 *cbpW8; + const Word16 *dct_col_shift_tab; + + assert( n <= FDCNG_VQ_MAX_LEN ); + assert( n >= FDCNG_VQ_DCT_MINTRUNC ); + + segm_ind = 0; + for ( col = 1; col <= FDCNG_VQ_DCT_NSEGM; col++ ) + { + if ( j_full >= cdk1_ivas_cum_entries_per_segment[col] ) + { + segm_ind++; + } + } + + j = j_full - cdk1_ivas_cum_entries_per_segment[segm_ind]; /* j is the local segment index */ + + assert( j < cdk1_ivas_entries_per_segment[segm_ind] ); + + /* Word8 column variable Qx storage*/ + cbpW8 = cdk_37bits_ivas_stage1_W8Qx_dct_sections[segm_ind]; /* Word8 storage fixed ptr_init */ + cbpW8 += j * cdk1_ivas_cols_per_segment[segm_ind]; /* adaptive ptr init */ + dct_col_shift_tab = stage1_dct_col_syn_shift[segm_ind]; + + for ( col = 0; col < cdk1_ivas_cols_per_segment[segm_ind]; col++ ) + { + dct_vec[col] = (float) ( ( (Word16) cbpW8[col] ) << dct_col_shift_tab[col] ); + /* LOGIC( 1 );SHIFT( 1 ); ADD( 1 ); + in BASOP: s_and(for W8->W16), shl(), sub() + */ + } + dctT2_N_apply_matrix( (const float *) dct_vec, idct_vec, cdk1_ivas_cols_per_segment[segm_ind], n, invTrfMatrix, FDCNG_VQ_DCT_MAXTRUNC, idcttype ); + + /*scale down to original fdcngvq domain and move to Q0 */ + v_multc( idct_vec, fdcng_dct_scaleF[1], idct_vec, n ); + /* fdcng_dct_scaleF[1] --> 0.0625-->scale down from search Q4 domain to Q0 , + not really relevant for BASOP loop */ + + /*add common mid fdcng vector, in fdcng bands domain */ + v_add( idct_vec, cdk1r_tr_midQ_truncQ, uq, n ); + assert( uq_ind == NULL ); +} +#endif + + /*--------------------------------------------------------------------------* * msvq_dec() * @@ -2042,8 +2100,12 @@ void msvq_dec( const int16_t N, /* i : Vector dimension */ const int16_t maxN, /* i : Codebook dimension */ const int16_t Idx[], /* i : Indices */ - float *uq, /* o : quantized vector */ - Word16 *uq_ind /* o : quantized vector (fixed point) */ +#ifdef ERI_FDCNGVQ_LOW_ROM + const int16_t applyIDCT_flag, /* i : applyIDCT flag */ + const float *invTrfMatrix, /* i: matrix for IDCT synthesis */ +#endif + float *uq, /* o : quantized vector */ + Word16 *uq_ind /* o : quantized vector (fixed point) */ ) { int16_t i, n, maxn, start; @@ -2079,9 +2141,32 @@ void msvq_dec( start = 0; } +#ifdef ERI_FDCNGVQ_LOW_ROM + if ( i == 0 && applyIDCT_flag != 0 ) + { + assert( start == 0 ); + dec_FDCNG_MSVQ_stage1( Idx[0], N, invTrfMatrix, IDCT_T2_XX_24, uq, uq_ind ); /* IDCT_T2 N=24 used for all synthesis */ + } + else + { + v_add( uq + start, cb[i] + Idx[i] * maxn, uq + start, n ); + } + #define WMC_TOOL_SKIP + IF( uq_ind != NULL ) + { + FOR( j = 0; j < n; ++j ) + { + move16(); + uq_ind[start + j] = add( uq_ind[start + j], (Word16) ( cb[i][Idx[i] * maxn + j] * 2.0f * 1.28f ) ); + } + } +#undef WMC_TOOL_SKIP +#else + v_add( uq + start, cb[i] + Idx[i] * maxn, uq + start, n ); +#define WMC_TOOL_SKIP IF( uq_ind != NULL ) { FOR( j = 0; j < n; ++j ) @@ -2091,6 +2176,7 @@ void msvq_dec( } } #undef WMC_TOOL_SKIP +#endif } return; @@ -2358,3 +2444,197 @@ void a2isf( return; } + +#ifdef ERI_FDCNGVQ_LOW_ROM +/*-------------------------------------------------------------------* + * dctT2_N_apply_matrix() + * + * dct/idct truncated matrix appl. for DCT basis vector lengths of N + *-------------------------------------------------------------------*/ +void dctT2_N_apply_matrix( + const float *input, + float *output, + const int16_t dct_dim, + int16_t fdcngvq_dim, + const float *matrix, + const int16_t matrix_row_dim, + DCTTYPE dcttype ) +{ + int16_t i, j, dim_in, dim_out; + int16_t mat_step_col, mat_step_row, mat_step_col_flag; + const float *pt_x, *pt_A; + float tmp_y[FDCNG_VQ_MAX_LEN]; + float *pt_y; + + /* non-square DCT_N and IDCT_N matrix application, + using a stored format of an IDCT_Nx(FDCNG_VQ_DCT_MAXTRUNC) matrix */ + /* efficiently parallelized in SIMD */ + + assert( dct_dim <= FDCNG_VQ_DCT_MAXTRUNC ); + assert( fdcngvq_dim <= FDCNG_VQ_MAX_LEN ); + + if ( ( dcttype & 1 ) == 0 ) /* even entries are DCTs */ + { + /* DCT_typeII 24,21 -> XX in worst case */ + dim_in = fdcngvq_dim; + dim_out = dct_dim; + mat_step_col = matrix_row_dim; /* matrix maximum storage size dependent, width of first row in matrix */ + mat_step_row = 0; + mat_step_col_flag = 1; + assert( dcttype == DCT_T2_21_XX || dcttype == DCT_T2_24_XX ); + } + else + { + assert( ( dcttype & 1 ) != 0 ); /* idct */ + dim_in = dct_dim; + dim_out = fdcngvq_dim; + mat_step_col = 1; + mat_step_row = matrix_row_dim; + mat_step_col_flag = 0; + assert( dcttype == IDCT_T2_XX_24 ); + } + + pt_y = tmp_y; + for ( i = 0; i < dim_out; i++ ) + { + pt_x = input; + *pt_y = 0; + + /* +i(DCT) or +i*maxTrunc(IDCT) */ +#define WMC_TOOL_SKIP + pt_A = &( matrix[i * ( mat_step_row + mat_step_col_flag )] ); /* ptr indexing */ + PTR_INIT( 1 ); +#undef WMC_TOOL_SKIP + for ( j = 0; j < dim_in; j++ ) + { +#define WMC_TOOL_SKIP + *pt_y += ( *pt_x++ ) * ( *pt_A ); + pt_A += mat_step_col; /* step +maxtrunc or +1 */ /* ptr indexing*/ + MAC( 1 ); +#undef WMC_TOOL_SKIP + } + pt_y++; + } + mvr2r( tmp_y, output, dim_out ); +} + +/*-------------------------------------------------------------------* + * extend_dctN_input() + * + * (inputN, dctN) -> idct(N_ext) idct_N matrix application loop for + * extending, extrapolating a DCT basis vector length of N to N_ext + *-------------------------------------------------------------------*/ + +void extend_dctN_input( + const float *input, /* i: input in fdcng domain */ + const float *dct_input, /* i: input in dctN(fdcng) domain */ + const int16_t in_dim, /* i: in_dim == N */ + float *ext_sig, /* o: extended output in fdcng domain */ + const int16_t out_dim, /* i: output total dim */ + float *matrix, /* i: idct synthesis matrix N rows, n_cols columns */ + const int16_t n_cols, /* i: number of columns == DCT truncation length */ + DCTTYPE dcttype ) /* i: matrix operation type */ +{ + int16_t i, j, i_rev; + const float( *ptr )[FDCNG_VQ_DCT_MAXTRUNC] = (void *) matrix; + + /* stored format is an IDCT_Nx(FDCNG_VQ_DCT_MAXTRUNC) matrix */ + assert( in_dim < FDCNG_VQ_MAX_LEN ); + assert( out_dim <= FDCNG_VQ_MAX_LEN ); + assert( out_dim > in_dim ); + assert( n_cols == FDCNG_VQ_DCT_MAXTRUNC ); /* for *ptr[MAX_TRUNC] adressing*/ + assert( ( dcttype & 1 ) != 0 ); /* idct tables always in use for this basis vector extension */ + + mvr2r( input, ext_sig, in_dim ); /* copy initial part, i.e. only last/tail parts are extended */ + set_f( &( ext_sig[in_dim] ), 0.0, out_dim - in_dim ); + + i_rev = in_dim; /*ptr init*/ + for ( i = in_dim; i < out_dim; i++ ) + { /* for each extension sample */ + /* i = 21 22 23; + i_rev = 20 19 18; for odd dctII reflect basis vector + */ + i_rev--; + + for ( j = 0; j < n_cols; j++ ) /* for each available DCT coeff */ + { + /* DCTcoeff * reflected basis vector */ +#define WMC_TOOL_SKIP + /* pure ptr MAC operations */ + ext_sig[i] += dct_input[j] * ptr[i_rev][j]; /* sum up scaled and extended basis vector */ + MAC( 1 ); +#undef WMC_TOOL_SKIP + } + } +} + + +/* inititate idct24 FDCNG_VQ_DCT_MAXTRUNCx N matrix in RAM from a quantized compressed ROM format */ +void create_IDCT_N_Matrix( float *inv_matrixFloatQ, const int16_t N, const int16_t n_cols, const int16_t alloc_size ) +{ + int16_t c, c1, r, r_flip, W16_val; + int16_t len; + int16_t mat_cpy_size; + const Word16 *absval_ptr; + const Word8 *idx_ptr; + Word16 idx; + float( *ptr )[FDCNG_VQ_DCT_MAXTRUNC] = (void *) inv_matrixFloatQ; /* fixed number of columns pointers, to simplifies adressing in ANSIC */ + + absval_ptr = unique_idctT2_24coeffsQ16; + idx_ptr = idctT2_24_compressed_idx; + len = FDCNG_VQ_MAX_LEN; + + if ( N == FDCNG_VQ_MAX_LEN_WB ) + { + absval_ptr = unique_idctT2_21coeffsQ16; + idx_ptr = idctT2_21_compressed_idx; + len = N; + } + + assert( alloc_size >= ( n_cols * len ) ); /* enough space for the full expanded IDCT matrix */ + assert( N <= len ); + + mat_cpy_size = ( n_cols ) * ( len >> 1 ); /* NB integer division of "len" */ + + if ( ( len & 1 ) != 0 ) + { /* odd sized DCT with a non-reflected center row */ + mat_cpy_size += n_cols; + } + + for ( c = 0; c < mat_cpy_size; c++ ) + { + idx = (Word16) ( idx_ptr[c] ); + W16_val = absval_ptr[abs( idx )]; + + if ( idx < 0 ) + { + W16_val = -( W16_val ); + } + inv_matrixFloatQ[c] = ( +1.52587890625e-05f ) * ( (float) W16_val ); /* 1.0/2.^16 scaling to a float-"Q0" , a scaling that is not done in BASOP */ + } + + /* for even number of coeffs DCT24, + flip symmetry for odd, even is used to save 50% IDCT Table ROM */ + /* for an odd DCT center is not flipped e.g for DCT21 */ + + assert( n_cols == FDCNG_VQ_DCT_MAXTRUNC ); + assert( ( n_cols & 1 ) == 0 ); + + for ( c = 0; c < ( n_cols ); c += 2 ) + { + c1 = c + 1; + r_flip = len - 1; + for ( r = 0; r < ( len / 2 ); r++, r_flip-- ) + { +#define WMC_TOOL_SKIP + ptr[r_flip][c] = ptr[r][c]; /* flipped */ + ptr[r_flip][c1] = -( ptr[r][c1] ); /* flipped and sign swapped */ + MOVE( 2 ); + MULT( 1 ); /* for negate */ +#undef WMC_TOOL_SKIP + } + } +} + + +#endif diff --git a/lib_com/options.h b/lib_com/options.h index 7147a49308d53bc0f27ddcf9c74e3cd01653b456..91f2a1ad7219dfa99589bc15854ac8d075d67c72 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -83,7 +83,7 @@ /*#define ALLOW_BYTE_EP*/ /* allow byte fer pattern files and check fer pattern file validity */ #define WRAP_AS_EIDXOR /* wraps FER file (as in STL_eid-xor.c/softbit.c) */ -#define DEBUG_FORCE_MDCT_STEREO_MODE /* Force stereo mode decision for MDCT stereo: -stere 3 1 forces L/R coding and -stereo 3 2 forces full M/S coding */ +#define DEBUG_FORCE_MDCT_STEREO_MODE /* Force stereo mode decision for MDCT stereo: -stereo 3 1 forces L/R coding and -stereo 3 2 forces full M/S coding */ /*#define DEBUG_STEREO_DFT_NOCORE*/ /* DFT stereo: by-pass core coder at decoder side*/ /*#define DEBUG_STEREO_DFT_NOSTEREO*/ /* DFT stereo: by-pass stereo processing at encoder and decoder side*/ /*#define DEBUG_STEREO_DFT_NOQRES*/ @@ -156,7 +156,7 @@ #define FIX_419_ISM_BRATE_SW_DTX /* VA: issue 419: fix ISM Bitrate Switching with dtx */ #define FIX_422 /* FhG: Issue 422: re-introduce fix for noisy speech buffer in ParamISM */ - +#define ERI_FDCNGVQ_LOW_ROM /* Eri: Contribution #31 Table ROM saving for IVAS FDCNG-VQ modes */ /* ################## End DEVELOPMENT switches ######################### */ /* clang-format on */ diff --git a/lib_com/prot.h b/lib_com/prot.h index f3829d914f37dfe7876f29fe16eab7534822fc52..c63a8d93de0e0381aa8b036e30d43f0a4b74c0b4 100644 --- a/lib_com/prot.h +++ b/lib_com/prot.h @@ -8064,7 +8064,11 @@ void msvq_enc( const float w[], /* i : Weights */ const int16_t N, /* i : Vector dimension */ const int16_t maxN, /* i : Codebook dimension */ - int16_t Idx[] /* o : Indices */ +#ifdef ERI_FDCNGVQ_LOW_ROM + const int16_t applyDCT_flag, /* i : applyDCT flag */ + float *invTrfMatrix, /* i:/o expanded synthesis matrix */ +#endif + int16_t Idx[] /* o : Indices */ ); void msvq_dec( @@ -8075,9 +8079,51 @@ void msvq_dec( const int16_t N, /* i : Vector dimension */ const int16_t maxN, /* i : Codebook dimension */ const int16_t Idx[], /* i : Indices */ - float *uq, /* o : quantized vector */ - Word16 *uq_ind /* o : quantized vector (fixed point) */ -); +#ifdef ERI_FDCNGVQ_LOW_ROM + const int16_t applyIDCT_flag, /* i : applyIDCT flag */ + const float *invTrfMatrix, /* i: synthesis matrix */ +#endif + float *uq, /* o : quantized vector */ + Word16 *uq_ind /* o : quantized vector (fixed point) */ +); + +#ifdef ERI_FDCNGVQ_LOW_ROM +void dec_FDCNG_MSVQ_stage1( + int16_t j_full, /* i: index full range */ + int16_t n, /* i: dimension to generate */ + const float *invTrfMatrix, /* i: synthesis matrix */ + DCTTYPE idcttype, /* i: idct type */ + float *uq, /* o: synthesized stage1 vector */ + Word16 *uq_ind /* o: synthesized stage1 vector in BASOP */ +); + +void create_IDCT_N_Matrix( + float *inv_matrixFloatQ, /* i/o: RAM buffer */ + const int16_t N, /* i: DCT length , number of time samples */ + const int16_t n_cols, /* i: number of dct coeffs (as DCT may be truncated) */ + const int16_t alloc_size /* i: RAM buffer size in elements*/ +); + + +void dctT2_N_apply_matrix( + const float *input, /* i: input in fdcng or DCT(fdcng) domain */ + float *output, /* o: output in DCT(fdcng) or fdcng ordomain */ + const int16_t dct_dim, /* i: dct processing dim possibly truncated */ + int16_t fdcngvq_dim, /* i: fdcng domain length */ + const float *idctT2_N_16matrixQ16, /* i: IDCT matrix */ + const int16_t matrix_1st_dim, /* i: */ + DCTTYPE dcttype ); /* i: matrix operation type */ + +void extend_dctN_input( + const float *input, /* i: input in fdcng domain */ + const float *dct_input, /* i: input in dctN(fdcng) domain */ + const int16_t in_dim, /* i: in_dim==N */ + float *ext_sig, /* o: extended output in fdcng domain */ + const int16_t out_dim, /* i: output total dim */ + float *matrix, /* i: idct matrix of size N rows , n_cols columns*/ + const int16_t n_cols, /* i: number of columns == truncation length */ + DCTTYPE dcttype ); /* i: matrix type */ +#endif void PulseResynchronization( const float *src_exc, /* i : Input excitation buffer */ diff --git a/lib_com/rom_com.c b/lib_com/rom_com.c index b0c0c61978cd93adcbd9db91a0c0bf6768d711cb..6701d395344bc00b97762b37a1109b84ba511a15 100644 --- a/lib_com/rom_com.c +++ b/lib_com/rom_com.c @@ -6036,6 +6036,299 @@ const FD_CNG_SETUP FdCngSetup_swb2 = { 640, 320, sizeof(sidPartitions_swb2)/size const int16_t levels_37bits[FD_CNG_stages_37bits] = { 128, 64, 64, 64, 64, 64 }; const int16_t bits_37bits[FD_CNG_stages_37bits] = { 7, 6, 6, 6, 6, 6 }; +#ifdef ERI_FDCNGVQ_LOW_ROM +/* IDCT_MATRIX_ROM: 18*24 Word16 = 432 Word16 */ +/* or compressed IDCT_MATRIX_ROM: 18*24 Word8 + 25 = 230 Word16 + WMOPS (INDIRECT(432) and STORE(432) ) */ + +/* Stage1 Word8 tables 16x8+ 17*10+ 17*16 + 78*18 = = 1974 Word8 -> 987 Word16 */ + +/* ROM storeSizeW8 = W8reduction (3072- (987+230) )/3072 = 1207/3072 --> 39.3 % */ +/* ROM with DCTII-24 in PROM = W8reduction (3072- (987) )/3072 = /3072 --> 31.8 % */ + +/* additional minor Table ROM ( dct_mid points 18 Word16, dct_col_upshifts 52, scaleFactors 2*2 = ~= 74 Word16s */ + + +const Word16 cdk1_ivas_entries_per_segment[FDCNG_VQ_DCT_NSEGM] = { 16, 17, 17, 78 }; +const Word16 cdk1_ivas_cum_entries_per_segment[FDCNG_VQ_DCT_NSEGM + 1] = { 0, 16 ,33, 50, 128 }; +const Word16 /* DCT trunc_len */ cdk1_ivas_cols_per_segment[FDCNG_VQ_DCT_NSEGM] = { FDCNG_VQ_DCT_MINTRUNC, 10, 16, FDCNG_VQ_DCT_MAXTRUNC }; /* 8, 10, 16, 18 */ +const Word16 /* segment inner DCT trunc_len */ cdk1_ivas_trunc_dct_cols_per_segment[FDCNG_VQ_DCT_NSEGM] = { FDCNG_VQ_DCT_MAXTRUNC - FDCNG_VQ_DCT_MINTRUNC, FDCNG_VQ_DCT_MAXTRUNC - 10 , FDCNG_VQ_DCT_MAXTRUNC - 16 , 0 }; + +/* to get back to FDCNG VQ domain for segment S use : idct as follows */ +/* cdk1r_vec[col, row] = cdk1r_tr_midQ_truncQ(col 1:24 ) + invScaleFQ * idctMat( cdk1_ivas_dct_sS_W8[1:col]<element_mode == EVS_MONO ) ? cdk_37bits : cdk_37bits_ivas; +#endif const float gain_q_offset = ( st->element_mode == EVS_MONO ) ? GAIN_Q_OFFSET_EVS : GAIN_Q_OFFSET_IVAS; +#ifdef ERI_FDCNGVQ_LOW_ROM + invTrfMatrix = (float *) tmpRAM; +#endif + hFdCngCom = ( st->hFdCngDec )->hFdCngCom; sidNoiseEst = hFdCngCom->sidNoiseEst; @@ -987,7 +997,21 @@ void FdCng_decodeSID( index = get_next_indice( st, 7 ); /* MSVQ decoder */ + +#ifdef ERI_FDCNGVQ_LOW_ROM + if ( st->element_mode != EVS_MONO ) + { + create_IDCT_N_Matrix( invTrfMatrix, FDCNG_VQ_MAX_LEN, FDCNG_VQ_DCT_MAXTRUNC, sizeof( tmpRAM ) / ( sizeof( float ) ) ); + msvq_dec( cdk_37bits_ivas, NULL, NULL, FD_CNG_stages_37bits, N, FD_CNG_maxN_37bits, indices, 1, invTrfMatrix, v, NULL ); + } + else + { /* Legacy EVS_MONO MSVQ tables */ + msvq_dec( cdk_37bits, NULL, NULL, FD_CNG_stages_37bits, N, FD_CNG_maxN_37bits, indices, 0, NULL, v, NULL ); + } + +#else msvq_dec( codebooks, NULL, NULL, FD_CNG_stages_37bits, N, FD_CNG_maxN_37bits, indices, v, NULL ); +#endif /* Decode gain */ gain = ( (float) index - gain_q_offset ) / 1.5f; @@ -1992,6 +2016,16 @@ void FdCngDecodeMDCTStereoSID( int16_t indices[FD_CNG_stages_37bits]; int16_t N, i, ch, p, stages; int16_t is_out_ms; +#ifdef ERI_FDCNGVQ_LOW_ROM + float *invTrfMatrix; + float tmpRAM[FDCNG_VQ_MAX_LEN][FDCNG_VQ_DCT_MAXTRUNC]; +#endif + + +#ifdef ERI_FDCNGVQ_LOW_ROM + invTrfMatrix = (float *) tmpRAM; + create_IDCT_N_Matrix( invTrfMatrix, FDCNG_VQ_MAX_LEN, FDCNG_VQ_DCT_MAXTRUNC, sizeof( tmpRAM ) / ( sizeof( float ) ) ); +#endif is_out_ms = 0; if ( hCPE->hCoreCoder[0]->cng_sba_flag ) @@ -2035,7 +2069,11 @@ void FdCngDecodeMDCTStereoSID( } /* MSVQ decoder */ +#ifdef ERI_FDCNGVQ_LOW_ROM + msvq_dec( cdk_37bits_ivas, NULL, NULL, stages, N, FD_CNG_maxN_37bits, indices, 1, invTrfMatrix, ms_ptr[ch], NULL ); +#else msvq_dec( cdk_37bits_ivas, NULL, NULL, stages, N, FD_CNG_maxN_37bits, indices, ms_ptr[ch], NULL ); +#endif } dtx_read_padding_bits( sts[1], ( IVAS_SID_5k2 - 4400 ) / FRAMES_PER_SEC ); @@ -2079,7 +2117,7 @@ void FdCngDecodeMDCTStereoSID( /*------------------------------------------------------------------- * FdCngDecodeDiracMDCTStereoSID() * - * Decode FD-Cng parameters for CNG in 2TC DirAC mode from the bitstream + * Decode FD-CNG parameters for CNG in 2TC DirAC mode from the bitstream *-------------------------------------------------------------------*/ void FdCngDecodeDiracMDCTStereoSID( @@ -2094,6 +2132,16 @@ void FdCngDecodeDiracMDCTStereoSID( float gain[CPE_CHANNELS]; int16_t indices[FD_CNG_stages_37bits]; int16_t N, i, ch, p; +#ifdef ERI_FDCNGVQ_LOW_ROM + float *invTrfMatrix; + float tmpRAM[FDCNG_VQ_MAX_LEN][FDCNG_VQ_DCT_MAXTRUNC]; +#endif + + +#ifdef ERI_FDCNGVQ_LOW_ROM + invTrfMatrix = (float *) tmpRAM; /* dynamically filled */ + create_IDCT_N_Matrix( invTrfMatrix, FDCNG_VQ_MAX_LEN, FDCNG_VQ_DCT_MAXTRUNC, sizeof( tmpRAM ) / ( sizeof( float ) ) ); +#endif for ( ch = 0; ch < CPE_CHANNELS; ch++ ) { @@ -2116,7 +2164,11 @@ void FdCngDecodeDiracMDCTStereoSID( gain[1] = gain[0]; /* MSVQ decoder */ +#ifdef ERI_FDCNGVQ_LOW_ROM + msvq_dec( cdk_37bits_ivas, NULL, NULL, FD_CNG_stages_37bits, N, FD_CNG_maxN_37bits, indices, 1, invTrfMatrix, ms_ptr[0], NULL ); +#else msvq_dec( cdk_37bits_ivas, NULL, NULL, FD_CNG_stages_37bits, N, FD_CNG_maxN_37bits, indices, ms_ptr[0], NULL ); +#endif mvr2r( ms_ptr[0], ms_ptr[1], N ); /*inverseMS( N, ms_ptr[0], ms_ptr[1], 1.f );*/ diff --git a/lib_dec/ivas_sns_dec.c b/lib_dec/ivas_sns_dec.c index e03c7594be2c440b876322e55c7946e4fe80d6be..6cf80ec0d1f0efdfae5d2f98f7ca770746d31a56 100644 --- a/lib_dec/ivas_sns_dec.c +++ b/lib_dec/ivas_sns_dec.c @@ -283,12 +283,21 @@ void dequantize_sns( nStages = SNS_MSVQ_NSTAGES_SIDE; means = ( st->core == TCX_20_CORE ) ? ivas_sns_means_side_tcx20 : ivas_sns_means_side_tcx10; +#ifdef ERI_FDCNGVQ_LOW_ROM + msvq_dec( side_cdbks, NULL, NULL, nStages, M, M, &indices[ch][idxIndices + SNS_STEREO_MODE_OFFSET_INDICES], 0, NULL, snsQ, NULL ); +#else msvq_dec( side_cdbks, NULL, NULL, nStages, M, M, &indices[ch][idxIndices + SNS_STEREO_MODE_OFFSET_INDICES], snsQ, NULL ); +#endif + v_add( snsQ, means, snsQ, M ); } else { +#ifdef ERI_FDCNGVQ_LOW_ROM + msvq_dec( cdbks, NULL, NULL, nStages, M, M, &indices[ch][idxIndices + SNS_STEREO_MODE_OFFSET_INDICES], 0, NULL, snsQ, NULL ); +#else msvq_dec( cdbks, NULL, NULL, nStages, M, M, &indices[ch][idxIndices + SNS_STEREO_MODE_OFFSET_INDICES], snsQ, NULL ); +#endif } idxIndices += nStages; } diff --git a/lib_dec/lsf_msvq_ma_dec.c b/lib_dec/lsf_msvq_ma_dec.c index 2fa1f3573b0a432fe062ffce9c45b8358e721b77..ede93bb1623ccdb68fb963356947701764e81522 100644 --- a/lib_dec/lsf_msvq_ma_dec.c +++ b/lib_dec/lsf_msvq_ma_dec.c @@ -187,7 +187,11 @@ int16_t D_lsf_tcxlpc( NumIndices = 1; +#ifdef ERI_FDCNGVQ_LOW_ROM + msvq_dec( lsf_codebook[narrowband][cdk], lsf_dims, lsf_offs, TCXLPC_NUMSTAGES, M, M, indices + NumIndices, 0, NULL, lsf_q, lsf_q_ind ); +#else msvq_dec( lsf_codebook[narrowband][cdk], lsf_dims, lsf_offs, TCXLPC_NUMSTAGES, M, M, indices + NumIndices, lsf_q, lsf_q_ind ); +#endif NumIndices += TCXLPC_NUMSTAGES; @@ -195,7 +199,12 @@ int16_t D_lsf_tcxlpc( { /* Only add contribution if flag is enabled */ + +#ifdef ERI_FDCNGVQ_LOW_ROM + msvq_dec( lsf_ind_codebook[narrowband][cdk], lsf_ind_dims, lsf_ind_offs, TCXLPC_IND_NUMSTAGES, M, M, indices + NumIndices, 0, NULL, lsf_rem_q, lsf_rem_q_ind ); +#else msvq_dec( lsf_ind_codebook[narrowband][cdk], lsf_ind_dims, lsf_ind_offs, TCXLPC_IND_NUMSTAGES, M, M, indices + NumIndices, lsf_rem_q, lsf_rem_q_ind ); +#endif NumIndices += TCXLPC_IND_NUMSTAGES; /* Add to MA-removed vector */ @@ -264,7 +273,11 @@ int16_t dec_lsf_tcxlpc( } /* Decode independent lsf */ +#ifdef ERI_FDCNGVQ_LOW_ROM + msvq_dec( lsf_codebook[narrowband][cdk], lsf_dims, lsf_offs, TCXLPC_NUMSTAGES, M, M, flag + 1, 0, NULL, lsf_q_ignored, lsf_q_ind ); +#else msvq_dec( lsf_codebook[narrowband][cdk], lsf_dims, lsf_offs, TCXLPC_NUMSTAGES, M, M, flag + 1, lsf_q_ignored, lsf_q_ind ); +#endif /* Update flag */ *flag = lsf_ind_is_active( lsf_q_ind, lsf_means[narrowband], narrowband, cdk ); diff --git a/lib_enc/fd_cng_enc.c b/lib_enc/fd_cng_enc.c index e43b65d56a88383a225e5228d77b2c06bf1d4018..a76b741ca41726fa5e553dcdc92ad424749d2762 100644 --- a/lib_enc/fd_cng_enc.c +++ b/lib_enc/fd_cng_enc.c @@ -513,12 +513,24 @@ void FdCng_encodeSID( float w[32]; float preemph_fac = st->preemph_fac; +#ifdef ERI_FDCNGVQ_LOW_ROM + float *invTrfMatrix; + float tmpRAM[FDCNG_VQ_MAX_LEN][FDCNG_VQ_DCT_MAXTRUNC]; + float dct_target[FDCNG_VQ_DCT_MAXTRUNC]; + float tot_sig_ext[FDCNG_VQ_MAX_LEN]; +#else const float *const *codebooks = ( st->element_mode == EVS_MONO ) ? cdk_37bits : cdk_37bits_ivas; +#endif const float gain_q_offset = ( st->element_mode == EVS_MONO ) ? GAIN_Q_OFFSET_EVS : GAIN_Q_OFFSET_IVAS; /* Init */ N = hFdCngEnc->npartDec; +#ifdef ERI_FDCNGVQ_LOW_ROM + invTrfMatrix = (float *) tmpRAM; /* dynamically filled */ + set_zero( v, FDCNG_VQ_MAX_LEN ); +#endif + /* Convert to LOG */ e = 0.f; for ( i = 0; i < N; i++ ) @@ -544,10 +556,39 @@ void FdCng_encodeSID( /* MSVQ encoder */ set_f( w, 1.0f, N ); +#ifdef ERI_FDCNGVQ_LOW_ROM + if ( st->element_mode != EVS_MONO ) + { + /* DCT domain compressed/truncated indices used for first stage */ + /* quantization with stage1 stored in DCT24 domain, stages 2 through 6 directly dearched + in FDCNG band domain + */ + if ( N == FDCNG_VQ_MAX_LEN_WB ) + { + create_IDCT_N_Matrix( invTrfMatrix, N, FDCNG_VQ_DCT_MAXTRUNC, sizeof( tmpRAM ) / ( sizeof( float ) ) ); + /* truncated DCT21 analysis */ + dctT2_N_apply_matrix( (const float *) v, dct_target, FDCNG_VQ_DCT_MAXTRUNC, N, invTrfMatrix, FDCNG_VQ_DCT_MAXTRUNC, DCT_T2_21_XX ); + /* truncated IDCT21 extension to 24 bands */ + extend_dctN_input( v, dct_target, N, tot_sig_ext, FDCNG_VQ_MAX_LEN, invTrfMatrix, FDCNG_VQ_DCT_MAXTRUNC, IDCT_T2_XX_21 ); + + mvr2r( tot_sig_ext, v, FDCNG_VQ_MAX_LEN ); /* write extended result as input to VQ stage #1 */ + } + create_IDCT_N_Matrix( invTrfMatrix, FDCNG_VQ_MAX_LEN, FDCNG_VQ_DCT_MAXTRUNC, sizeof( tmpRAM ) / ( sizeof( float ) ) ); + msvq_enc( cdk_37bits_ivas, NULL, NULL, v, levels_37bits, FD_CNG_maxC_37bits, FD_CNG_stages_37bits, w, N, FD_CNG_maxN_37bits, 1, invTrfMatrix, indices ); + msvq_dec( cdk_37bits_ivas, NULL, NULL, FD_CNG_stages_37bits, N, FD_CNG_maxN_37bits, indices, 1, invTrfMatrix, v, NULL ); + } + else + { /* EVS_MONO tables */ + msvq_enc( cdk_37bits, NULL, NULL, v, levels_37bits, FD_CNG_maxC_37bits, FD_CNG_stages_37bits, w, N, FD_CNG_maxN_37bits, 0, NULL, indices ); + msvq_dec( cdk_37bits, NULL, NULL, FD_CNG_stages_37bits, N, FD_CNG_maxN_37bits, indices, 0, NULL, v, NULL ); + } +#else msvq_enc( codebooks, NULL, NULL, v, levels_37bits, FD_CNG_maxC_37bits, FD_CNG_stages_37bits, w, N, FD_CNG_maxN_37bits, indices ); /* MSVQ decoder */ msvq_dec( codebooks, NULL, NULL, FD_CNG_stages_37bits, N, FD_CNG_maxN_37bits, indices, v, NULL ); +#endif + /* Compute gain */ gain = 0.f; @@ -973,6 +1014,14 @@ void FdCngEncodeMDCTStereoSID( int16_t no_side_flag; int16_t is_inp_ms; +#ifdef ERI_FDCNGVQ_LOW_ROM + float tot_sig_ext[FDCNG_VQ_MAX_LEN], dct_target[CPE_CHANNELS][FDCNG_VQ_DCT_MAXTRUNC]; /* 24 +2*18*/ + float *invTrfMatrix; + float tmpRAM[FDCNG_VQ_MAX_LEN][FDCNG_VQ_DCT_MAXTRUNC]; /*24*18*/ + invTrfMatrix = (float *) tmpRAM; /* dynamically filled */ +#endif + + is_inp_ms = 0; if ( hCPE->hCoreCoder[0]->cng_sba_flag == 1 ) { @@ -1023,7 +1072,11 @@ void FdCngEncodeMDCTStereoSID( /* Quantize noise shapes */ for ( ch = 0; ch < CPE_CHANNELS; ch++ ) { +#ifdef ERI_FDCNGVQ_LOW_ROM + /* Normalize MSVQ input */ +#else /* Normalize MSVW input */ +#endif gain[ch] = 0.f; for ( p = N_GAIN_MIN; p < N_GAIN_MAX; p++ ) { @@ -1036,6 +1089,39 @@ void FdCngEncodeMDCTStereoSID( ms_ptr[ch][p] -= gain[ch]; } +#ifdef ERI_FDCNGVQ_LOW_ROM + } + /* always split channel targetloop */ + + /* extend fdcng envelope from length 21 to a 24 length fdncg domain envelope signal */ + /* High quality cosine smooth basis extension used to not introduce noise in stage#1 DCT24 analysis and subsequent VQ-steps */ + if ( N == FDCNG_VQ_MAX_LEN_WB ) + { + create_IDCT_N_Matrix( invTrfMatrix, N, FDCNG_VQ_DCT_MAXTRUNC, sizeof( tmpRAM ) / ( sizeof( float ) ) ); /*WB: create truncated IDCT21 matrix */ + + for ( ch = 0; ch < CPE_CHANNELS; ch++ ) + { + /* run DCT_N N==21 , truncated at 18/21 ~= 86% , i.e use a bit better better quality in extrapolation , than subsequent DCT24 analysis which is truncated at 75%*/ + + /* truncated DCT 21 analysis */ + dctT2_N_apply_matrix( (const float *) ms_ptr[ch], dct_target[ch], FDCNG_VQ_DCT_MAXTRUNC, N, invTrfMatrix, FDCNG_VQ_DCT_MAXTRUNC, DCT_T2_21_XX ); + + /* extrapolate extend fdcng envelope signal in the fdncg ienvelope/"time" domain using DCT21 basis vectors, + estimated DCT21 coeffs scaling extended basis vectors are used to create extrapolated length 24 input target envelope signal */ + /* this DCT21 extension does not introduce DCT24 coefficient noise for the subsequent dct24 target analysis, and later in IDCT24 synthesis */ + + /* truncated IDCT 21 extension synthesis */ + extend_dctN_input( ms_ptr[ch], dct_target[ch], N, tot_sig_ext, FDCNG_VQ_MAX_LEN, invTrfMatrix /* DCT_N basis vectors */, FDCNG_VQ_DCT_MAXTRUNC, IDCT_T2_XX_21 ); /* use 18 basis vectors*/ + + mvr2r( tot_sig_ext, ms_ptr[ch], FDCNG_VQ_MAX_LEN ); /* write extended result as input to VQ */ + } + } + create_IDCT_N_Matrix( invTrfMatrix, FDCNG_VQ_MAX_LEN, FDCNG_VQ_DCT_MAXTRUNC, sizeof( tmpRAM ) / ( sizeof( float ) ) ); /*always create/set up IDCT24 matrix in RAM */ + + /* end split */ + for ( ch = 0; ch < CPE_CHANNELS; ch++ ) + { +#endif /* MSVQ */ if ( ch ) { @@ -1046,8 +1132,21 @@ void FdCngEncodeMDCTStereoSID( stages = FD_CNG_stages_37bits; } +#ifdef ERI_FDCNGVQ_LOW_ROM + /* DCT24 domain compressed/truncated indices used for first stage */ + /* mid channel quantization using stages 1 through 6 */ + /* & side channel quantization using stages 1 through 4 */ + + { + msvq_enc( cdk_37bits_ivas, NULL, NULL, ms_ptr[ch], levels_37bits, FD_CNG_maxC_37bits, stages, weights, N, FD_CNG_maxN_37bits, 1, invTrfMatrix, indices[ch] ); + msvq_dec( cdk_37bits_ivas, NULL, NULL, stages, N, FD_CNG_maxN_37bits, indices[ch], 1, invTrfMatrix, ms_ptr[ch], NULL ); + } +#else msvq_enc( cdk_37bits_ivas, NULL, NULL, ms_ptr[ch], levels_37bits, FD_CNG_maxC_37bits, stages, weights, N, FD_CNG_maxN_37bits, indices[ch] ); + msvq_dec( cdk_37bits_ivas, NULL, NULL, stages, N, FD_CNG_maxN_37bits, indices[ch], ms_ptr[ch], NULL ); + +#endif } if ( no_side_flag ) @@ -1133,6 +1232,7 @@ void FdCngEncodeMDCTStereoSID( /* pad with zeros to reach common SID frame size */ push_indice( sts[1]->hBstr, IND_ENERGY, 0, ( IVAS_SID_5k2 - 4400 ) / FRAMES_PER_SEC ); + return; } @@ -1160,6 +1260,13 @@ void FdCngEncodeDiracMDCTStereoSID( int16_t indices[CPE_CHANNELS][FD_CNG_stages_37bits]; int16_t gain_idx[CPE_CHANNELS]; int16_t ch, p; +#ifdef ERI_FDCNGVQ_LOW_ROM + float *invTrfMatrix; + float tmpRAM[FDCNG_VQ_MAX_LEN][FDCNG_VQ_DCT_MAXTRUNC]; + float dct_target[FDCNG_VQ_DCT_MAXTRUNC]; + float tot_sig_ext[FDCNG_VQ_MAX_LEN]; + invTrfMatrix = (float *) tmpRAM; /* dynamically filled */ +#endif /* set pointers and initialize */ for ( ch = 0; ch < CPE_CHANNELS; ch++ ) @@ -1176,7 +1283,11 @@ void FdCngEncodeDiracMDCTStereoSID( for ( ch = 0; ch < CPE_CHANNELS; ch++ ) { E[ch] = 0.0f; +#ifdef ERI_FDCNGVQ_LOW_ROM + for ( p = 0; p < NPART; p++ ) /* TBD Note: NPART should likely be N[ch] if N[ch] may change */ +#else for ( p = 0; p < NPART; p++ ) +#endif { ms_ptr[ch][p] = 10.f * log10f( lr_in_ptr[ch][p] + EPSILON ); E[ch] += ms_ptr[ch][p]; @@ -1184,12 +1295,22 @@ void FdCngEncodeDiracMDCTStereoSID( } /* M/S transform on log envelopes */ +#ifdef ERI_FDCNGVQ_LOW_ROM + convertToMS( NPART, ms_ptr[0], ms_ptr[1], 0.5f ); /* TBD Note: NPART should likely be N[0] if N[0] may change */ + + E[0] = sum_f( ms_ptr[0], NPART ); /* TBD Note: NPART should likely be N[0] if N[0] may change */ +#else convertToMS( NPART, ms_ptr[0], ms_ptr[1], 0.5f ); E[0] = sum_f( ms_ptr[0], NPART ); +#endif /* Quantize M noise shape */ +#ifdef ERI_FDCNGVQ_LOW_ROM + /* Normalize MSVQ input */ +#else /* Normalize MSVW input */ +#endif gain[0] = sum_f( ms_ptr[0] + N_GAIN_MIN, N_GAIN_MAX - N_GAIN_MIN ); gain[0] /= (float) ( N_GAIN_MAX - N_GAIN_MIN ); @@ -1199,14 +1320,38 @@ void FdCngEncodeDiracMDCTStereoSID( } /* MSVQ */ +#ifdef ERI_FDCNGVQ_LOW_ROM + /* DCT domain compressed/truncated indices used for first stage */ + /* mid quantization using stages #1 through 6 */ + if ( N[0] == FDCNG_VQ_MAX_LEN_WB ) + { + create_IDCT_N_Matrix( invTrfMatrix, N[0], FDCNG_VQ_DCT_MAXTRUNC, sizeof( tmpRAM ) / ( sizeof( float ) ) ); + /* truncated DCT 21 analysis */ + dctT2_N_apply_matrix( (const float *) ms_ptr[0], dct_target, FDCNG_VQ_DCT_MAXTRUNC, N[0], invTrfMatrix, FDCNG_VQ_DCT_MAXTRUNC, DCT_T2_21_XX ); + /* truncated IDCT21 extension to 24 synthesis */ + extend_dctN_input( ms_ptr[0], dct_target, N[0], tot_sig_ext, FDCNG_VQ_MAX_LEN, invTrfMatrix, FDCNG_VQ_DCT_MAXTRUNC, IDCT_T2_XX_21 ); /* use 18 basis vectors*/ + + mvr2r( tot_sig_ext, ms_ptr[0], FDCNG_VQ_MAX_LEN ); /* write extended result as input to VQ stage #1 */ + } + create_IDCT_N_Matrix( invTrfMatrix, FDCNG_VQ_MAX_LEN, FDCNG_VQ_DCT_MAXTRUNC, sizeof( tmpRAM ) / ( sizeof( float ) ) ); + + msvq_enc( cdk_37bits_ivas, NULL, NULL, ms_ptr[0], levels_37bits, FD_CNG_maxC_37bits, FD_CNG_stages_37bits, weights, N[0], FD_CNG_maxN_37bits, 1, invTrfMatrix, indices[0] ); + msvq_dec( cdk_37bits_ivas, NULL, NULL, FD_CNG_stages_37bits, N[0], FD_CNG_maxN_37bits, indices[0], 1, invTrfMatrix, ms_ptr[0], NULL ); + +#else msvq_enc( cdk_37bits_ivas, NULL, NULL, ms_ptr[0], levels_37bits, FD_CNG_maxC_37bits, FD_CNG_stages_37bits, weights, N[0], FD_CNG_maxN_37bits, indices[0] ); msvq_dec( cdk_37bits_ivas, NULL, NULL, FD_CNG_stages_37bits, N[0], FD_CNG_maxN_37bits, indices[0], ms_ptr[0], NULL ); +#endif /* set S to zero */ set_zero( ms_ptr[1], NPART ); /* compute M gain */ +#ifdef ERI_FDCNGVQ_LOW_ROM + gain[0] = sum_f( ms_ptr[0], NPART ); /* TBD Note: NPART should likely be N[0] if N[0] may change */ +#else gain[0] = sum_f( ms_ptr[0], NPART ); +#endif gain[0] = ( E[0] - gain[0] ) / (float) N[0]; apply_scale( &gain[0], sts[0]->hFdCngEnc->hFdCngCom->CngBandwidth, sts[0]->hDtxEnc->last_active_brate, scaleTableStereo, SIZE_SCALE_TABLE_STEREO ); @@ -1218,7 +1363,11 @@ void FdCngEncodeDiracMDCTStereoSID( gain[1] = gain[0]; /* undo M/S */ +#ifdef ERI_FDCNGVQ_LOW_ROM + convertToMS( NPART, ms_ptr[0], ms_ptr[1], 1.0f ); /* TBD Note: NPART should likely be N[0] if N[0] may change */ +#else convertToMS( NPART, ms_ptr[0], ms_ptr[1], 1.0f ); +#endif /* restore channel noise envelopes */ for ( ch = 0; ch < CPE_CHANNELS; ch++ ) @@ -1226,7 +1375,11 @@ void FdCngEncodeDiracMDCTStereoSID( HANDLE_FD_CNG_ENC hFdCngEnc = sts[ch]->hFdCngEnc; HANDLE_FD_CNG_COM hFdCngCom = hFdCngEnc->hFdCngCom; +#ifdef ERI_FDCNGVQ_LOW_ROM + for ( p = 0; p < NPART; p++ ) /* TBD Note: NPART should likely be N[0] if N[0] may change */ +#else for ( p = 0; p < NPART; p++ ) +#endif { lr_out_ptr[ch][p] = powf( 10.f, ( ms_ptr[ch][p] + gain[ch] ) / 10.f ); } @@ -1265,6 +1418,5 @@ void FdCngEncodeDiracMDCTStereoSID( } push_indice( sts[0]->hBstr, IND_ENERGY, gain_idx[0], 7 ); - return; } diff --git a/lib_enc/ivas_sns_enc.c b/lib_enc/ivas_sns_enc.c index b69890aacff2d9c8291ce5095b40f1569fb44b9c..762b0aeb0f0d0d09ccc946f368a1e905e3d3b28e 100644 --- a/lib_enc/ivas_sns_enc.c +++ b/lib_enc/ivas_sns_enc.c @@ -482,14 +482,24 @@ int16_t quantize_sns( means = ( st->core == TCX_20_CORE ) ? ivas_sns_means_side_tcx20 : ivas_sns_means_side_tcx10; v_sub( sns_ptr, means, snsQ, M ); +#ifdef ERI_FDCNGVQ_LOW_ROM + msvq_enc( side_cdbks, NULL, NULL, snsQ, side_levels, 3, nStages, weights, M, M, 0, NULL, &indices[idxIndices] ); + msvq_dec( side_cdbks, NULL, NULL, nStages, M, M, &indices[idxIndices], 0, NULL, snsQ, NULL ); +#else msvq_enc( side_cdbks, NULL, NULL, snsQ, side_levels, 3, nStages, weights, M, M, &indices[idxIndices] ); msvq_dec( side_cdbks, NULL, NULL, nStages, M, M, &indices[idxIndices], snsQ, NULL ); +#endif v_add( snsQ, means, snsQ, M ); } else { +#ifdef ERI_FDCNGVQ_LOW_ROM + msvq_enc( cdbks, NULL, NULL, sns_ptr, levels, 3, nStages, weights, M, M, 0, NULL, &indices[idxIndices] ); + msvq_dec( cdbks, NULL, NULL, nStages, M, M, &indices[idxIndices], 0, NULL, snsQ, NULL ); +#else msvq_enc( cdbks, NULL, NULL, sns_ptr, levels, 3, nStages, weights, M, M, &indices[idxIndices] ); msvq_dec( cdbks, NULL, NULL, nStages, M, M, &indices[idxIndices], snsQ, NULL ); +#endif } idxIndices += nStages; diff --git a/lib_enc/lsf_msvq_ma_enc.c b/lib_enc/lsf_msvq_ma_enc.c index 06b50cc1fc0ab5550761074c9f35a7ecf245b3b0..190f4e2a74d04ac8981cd0fdbb44306adb08801d 100644 --- a/lib_enc/lsf_msvq_ma_enc.c +++ b/lib_enc/lsf_msvq_ma_enc.c @@ -48,6 +48,13 @@ #define kMaxC 8 + +#ifdef ERI_FDCNGVQ_LOW_ROM + +#include "ivas_prot.h" +void dctT2_N_apply_matrix( const float *input, float *output, const int16_t dct_dim, int16_t fdcngvq_dim, const float *idctT2_24_X_matrixQ16, const int16_t matrix_1st_dim, DCTTYPE dcttype ); +#endif + /*--------------------------------------------------------------------------* * msvq_enc() * @@ -62,10 +69,14 @@ void msvq_enc( const int16_t maxC, /* i : Tree search size (number of candidates kept from */ /* one stage to the next == M-best) */ const int16_t stages, /* i : Number of stages */ - const float w[], /* i : Weights */ - const int16_t N, /* i : Vector dimension */ - const int16_t maxN, /* i : Codebook dimension */ - int16_t Idx[] /* o : Indices */ + const float w[], /* i : Weights */ + const int16_t N, /* i : Vector dimension */ + const int16_t maxN, /* i : Codebook dimension */ +#ifdef ERI_FDCNGVQ_LOW_ROM + const int16_t applyDCT_flag, /* i : applyDCT flag */ + float *invTrfMatrix, /*i/o : synthesis matrix */ +#endif + int16_t Idx[] /* o : Indices */ ) { float *resid[2], *dist[2]; @@ -77,8 +88,46 @@ void msvq_enc( int16_t idx_buf[2 * LSFMBEST_MAX * MAX_VQ_STAGES_USED], parents[LSFMBEST_MAX]; int16_t n, maxn, start; + +#ifdef ERI_FDCNGVQ_LOW_ROM + /* buffers */ + float dct_target[FDCNG_VQ_DCT_MAXTRUNC]; + float u_mr[FDCNG_VQ_MAX_LEN]; + float u_mr_scaled[FDCNG_VQ_MAX_LEN]; + float mse_trunc_all_segms; + float mse_trunc_segm[FDCNG_VQ_DCT_NSEGM]; + float mse; + + const Word8 *cbpW8; + const Word16 *dct_col_shift_tab; + + float *st1_mse_pair; + int16_t *st1_idx_pair; + int16_t indices_st1_local[FDCNG_VQ_DCT_NSEGM * 2]; /* after stage#1 DCT search this is copied to the global indices[1][s*stages] structure */ + int16_t n_ana, p_mins[2], idx_min[2]; + DCTTYPE dcttype = DCT_T2_24_XX; + float tmp2; + + int16_t check_ind[FDCNG_VQ_DCT_NPOST]; + int16_t segm, j_full, maxC_pre; + float *st1_syn_vec_ptr; /* 8* 24 floats in dynRAM */ + float *st1_mse_ptr; /* 2^¨7 == 128 floats in existing dRAM used for stage 1 candidate analysis, 128 Word32 in BASOP */ + float res24, high_diff[FDCNG_VQ_MAX_LEN - FDCNG_VQ_MAX_LEN_WB]; + + maxC_pre = ( FDCNG_VQ_DCT_NSEGM * 2 ); + assert( maxC <= LSFMBEST_MAX ); + assert( ( LSFMBEST_MAX * M_MAX ) > ( N * maxC ) ); + /* top of resid_buf is resid[1] and used for stage#1 residuals (input target u), + we here reuse resid[0] part of the buffer for stage#1 DCT dynamic RAM needs + */ + st1_mse_ptr = &( resid_buf[1 * LSFMBEST_MAX * M_MAX] ) - ( levels[0] ); /* reuse top of residual resid[0] scratch RAM for stage1 MSEs */ + st1_syn_vec_ptr = &( resid_buf[1 * LSFMBEST_MAX * M_MAX] ) - FDCNG_VQ_MAX_LEN * maxC; /* reuse top of resid[0] scratch RAM for residual */ + + dcttype = DCT_T2_24_XX; +#endif + /*----------------------------------------------------------------* - * Allocate memory for previous (parent) and current nodes. + * Allocate memory for previous (parent) and current nodes. * Parent node is indexed [0], current node is indexed [1]. *----------------------------------------------------------------*/ @@ -158,7 +207,173 @@ void msvq_enc( { dist[1][j] = FLT_MAX; } - if ( !s ) /* means: m==1 */ + +#ifdef ERI_FDCNGVQ_LOW_ROM + if ( !s && applyDCT_flag != 0 ) /* means: m==1 */ + { /* stage 1 search in truncated dct domain without any weights */ + + n_ana = FDCNG_VQ_MAX_LEN; /* VQ stage#1 core is always using stored DCT24 coeffs */ + /*remove mean/mid fdcng stage#1 vector, in original subband domain */ + v_sub( u, cdk1r_tr_midQ_truncQ, u_mr, n_ana ); + + v_multc( u_mr, fdcng_dct_invScaleF[1], u_mr_scaled, n_ana ); /*scale up target to upscaled W8x storage domain */ + /* 16.0-->scale up from Q0 to search domain in Q4, not really needed in BASOP , impl. by shifts */ + + assert( n_ana >= FDCNG_VQ_DCT_MAXTRUNC ); /* check for WB , SWB, FB operation */ + + dctT2_N_apply_matrix( (const float *) u_mr_scaled, dct_target, min( FDCNG_VQ_DCT_MAXTRUNC, n_ana ), n_ana, invTrfMatrix, FDCNG_VQ_DCT_MAXTRUNC, dcttype ); + + mse_trunc_all_segms = 0; + mse = 0; + + /* init search state ptr's at the top */ + for ( segm = 0; segm < FDCNG_VQ_DCT_NSEGM; segm++ ) + { + /* point to a new paired location */ + st1_mse_pair = &( dist[1][2 * segm] ); /* req. ptr init +=2 */ + st1_mse_pair[0] = FLT_MAX; /* req */ + st1_mse_pair[1] = FLT_MAX; /* req */ + st1_idx_pair = &( indices_st1_local[2 * segm] ); /* +=2 */ + p_max = 0; /* req. to point to 1 or 0 */ + + /* compute segment common trunction error in dct domain */ + mse_trunc_segm[segm] = mse_trunc_all_segms; + mse_trunc_segm[segm] += sum2_f( (const float *) ( &( dct_target[cdk1_ivas_cols_per_segment[segm]] ) ), cdk1_ivas_trunc_dct_cols_per_segment[segm] ); + + cbpW8 = cdk_37bits_ivas_stage1_W8Qx_dct_sections[segm]; /* Word8 column variable Qx storage*/ + + for ( j = 0; j < cdk1_ivas_entries_per_segment[segm]; j++ ) + { + /* unweighted segmented search DCT domain loop */ + j_full = j + cdk1_ivas_cum_entries_per_segment[segm]; /* or simply use j_full++ */ + + mse = mse_trunc_segm[segm]; /* move32() init mse with with common mse truncation part */ + + dct_col_shift_tab = stage1_dct_col_syn_shift[segm]; /* ptr init */ + + for ( c2 = 0; c2 < cdk1_ivas_cols_per_segment[segm]; c2++ ) + { + +#define WMC_TOOL_SKIP + tmp = dct_target[c2] - (float) ( ( (Word16) cbpW8[c2] ) << dct_col_shift_tab[c2] ); /* Word8 storage MSE inner loop */ + LOGIC( 1 ); + SHIFT( 1 ); + ADD( 1 ); /* in BASOP: s_and(for W8->W16), shl(), sub()*/ +#undef WMC_TOOL_SKIP + + mse += tmp * tmp; /* L_mac or L_mac0() square Word16 -> Word32*/ + } + st1_mse_ptr[j_full] = mse; /* save MSE in shared dynamic 2^7=128 RAM, move32() in BASOP */ + +#define WMC_TOOL_SKIP + cbpW8 += cdk1_ivas_cols_per_segment[segm]; /* pointer increment */ +#undef WMC_TOOL_SKIP + /* overwrite with a new worst index at p_max */ + +#ifdef ERI_FDCNGVQ_LOW_ROM + /* The three inner loop if's below are not really properly instrumented by WMC tool */ + /* a ptr to worst index will be in use */ +#endif + if ( mse < st1_mse_pair[p_max] ) /* L_sub */ + { + st1_idx_pair[p_max] = j_full; /* simplified */ + } /* BASOP 2 ops */ + + if ( st1_idx_pair[p_max] == j_full ) /* simplified */ + { /*idx updated to j_full --> also update mse */ + st1_mse_pair[p_max] = mse; /* move32(), single BASOP */ + } /* BASOP 3 ops */ + /* avoid WC costly list management by always updating p_max, as we have only a pair to maintain */ + p_max = 0; /* move16() */ + if ( ( st1_mse_pair[0] - st1_mse_pair[1] ) < 0 ) /* L_sub()*/ + { + p_max = 1; /* move16() */ + } /* BASOP 3 ops ,Note 2 ops possible in BASOP with L_sub and L_lshr */ + + /* Note: logical shift right not available in ANSI-C */ + /* p_max = (st1_mse_pair[0] - st1_mse_pair[1]) ">>>" 31; */ + /* in java logical shift right is available as >>> , in BASOP it is L_lshr */ + + /* Cost: weighted sum with cond moves ('if') => 8 in float , 7 in BASOP with L_lshr */ + } /* j in section */ + + } /* next segment */ + + for ( j = 0; j < maxC_pre; j++ ) + { + /* compute_full mse using stored DCT24 domain MSE's */ + /* calculate MSE from stage1 inner using existing inner DCT domain variables */ + dist[1][j] *= fdcng_dct_scaleF[2]; /* single multiplication to get the MSE scale to the correct input domain */ + } + + p_max = maximum( dist[1], maxC_pre, NULL ); /* establish current worst candidate for stage#2 among all maxC_pre candidates */ + + p_mins[0] = minimum( dist[1], maxC_pre, NULL ); /* find best entry among all maxC_pre */ + tmp = dist[1][p_mins[0]]; + dist[1][p_mins[0]] = FLT_MAX; /* exclude 1st */ + + p_mins[1] = minimum( dist[1], maxC_pre, NULL ); /* find 2nd best entry */ + tmp2 = dist[1][p_mins[1]]; + dist[1][p_mins[1]] = FLT_MAX; /* exclude 2nd*/ + + dist[1][p_mins[0]] = tmp; /* restore 1st */ + dist[1][p_mins[1]] = tmp2; /* restore 2nd */ + + idx_min[0] = indices_st1_local[p_mins[0]]; + idx_min[1] = indices_st1_local[p_mins[1]]; + + + /* use global exclusion list to never reselect the two (best) mse values sofar */ + st1_mse_ptr[idx_min[0]] = FLT_MAX; /* move32() */ + st1_mse_ptr[idx_min[1]] = FLT_MAX; /* move32() */ + + /* circular MSE-neigbour list in use to potentially replace some segment search candidates */ + /* using both 1st and 2nd best neighbours in fwd and rev directions */ + check_ind[0] = cdk1_ivas_segm_neighbour_fwd[idx_min[0]]; + check_ind[1] = cdk1_ivas_segm_neighbour_rev[idx_min[0]]; + + check_ind[2] = cdk1_ivas_segm_neighbour_fwd[idx_min[1]]; + check_ind[3] = cdk1_ivas_segm_neighbour_rev[idx_min[1]]; + + check_ind[4] = cdk1_ivas_segm_neighbour_fwd[check_ind[0]]; + check_ind[5] = cdk1_ivas_segm_neighbour_rev[check_ind[1]]; + + check_ind[6] = cdk1_ivas_segm_neighbour_fwd[check_ind[2]]; + check_ind[7] = cdk1_ivas_segm_neighbour_rev[check_ind[3]]; + + for ( i = 0; i < FDCNG_VQ_DCT_NPOST; i++ ) + { + float check_mse = st1_mse_ptr[check_ind[i]] * fdcng_dct_scaleF[2]; + /* *= fdcng_dct_scaleF[2]; */ /* multiplication in use to get the float outer loop scale correct */ + + if ( check_mse < dist[1][p_max] ) + { + /* new winner , replace */ + dist[1][p_max] = check_mse; + indices_st1_local[p_max] = check_ind[i]; + st1_mse_ptr[check_ind[i]] = FLT_MAX; /* BASOP: move32() */ + p_max = maximum( dist[1], maxC_pre, NULL ); /* establish a new current worst candidate among all maxC */ + } + } + + for ( c = 0; c < maxC_pre; c++ ) + { + indices[1][c * stages] = indices_st1_local[c]; /* move established stage#1 indices to global MSVQ list structure */ + } + + /* extract the selected stage one vectors in DCT domain , apply IDCT_N and scale up */ + /*always extract full length signal(24) to be able to update WB( N==21) candidate MSE values */ + for ( c = 0; c < maxC_pre; c++ ) + { + dec_FDCNG_MSVQ_stage1( indices_st1_local[c], FDCNG_VQ_MAX_LEN, invTrfMatrix, dcttype + 1, &( st1_syn_vec_ptr[c * FDCNG_VQ_MAX_LEN] ), NULL ); + } + + assert( maxC == maxC_pre ); + } + else + /* non-DCT Stage #1 code below */ +#endif + if ( !s ) /* means: m==1 */ { /* This loop is identical to the one below, except, that the inner loop over c=0..m is hardcoded to c=0, since m=1. */ @@ -201,9 +416,16 @@ void msvq_enc( } } /* if (tmp <= dist[1][p_max]) */ } /* for (j=0; j