Loading lib_com/options.h +3 −0 Original line number Diff line number Diff line Loading @@ -158,6 +158,9 @@ #define ERI_FDCNGVQ_LOW_ROM /* Eri: Contribution #31 Table ROM saving for IVAS FDCNG-VQ modes */ #define ERI_MSVQ_CLEANUP /* Eri: BE modularization of msvq encoder side DCT c-code */ /* ################## End DEVELOPMENT switches ######################### */ /* clang-format on */ #endif lib_enc/lsf_msvq_ma_enc.c +267 −7 Original line number Diff line number Diff line Loading @@ -53,6 +53,235 @@ #include "ivas_prot.h" void dctT2_N_apply_matrix( const float *input, float *output, const int16_t dct_dim, int16_t fdcngvq_dim, const float *idctT2_24_X_matrixQ16, const int16_t matrix_1st_dim, DCTTYPE dcttype ); #ifdef ERI_MSVQ_CLEANUP int16_t msvq_stage1_dct_search( /* o : (p_max , best candidate sofar ) */ const float *u, /* i : target */ const int16_t N, /* i : target length and IDCT synthesis length */ const int16_t maxC_st1, /* i : number of candidates to provide */ float *invTrfMatrix, /* i/o: IDCT synthesis matrix for dim N */ float *st1_mse_ptr, /* i : dynRAM buffer for MSEs */ int16_t *indices_st1_local, /* o: selecetd cand indices */ float *st1_syn_vec_ptr , /* i/o: buffer for IDCT24 synthesis */ float *dist1_ptr /* o: resulting stage 1 MSEs in DCT24 domain */ ) { /* stage 1 search in truncated dct domain without any weights */ float dct_target[FDCNG_VQ_DCT_MAXTRUNC]; float u_mr[FDCNG_VQ_MAX_LEN]; float u_mr_scaled[FDCNG_VQ_MAX_LEN]; float mse_trunc_segm[FDCNG_VQ_DCT_NSEGM]; float tmp, check_mse; float mse; /* Word32 */ int16_t p_max,c,c2, segm, j_full,j,i ; int16_t n_ana, p_mins[2], idx_min[2]; const Word8 *cbpW8; const Word16 *dct_col_shift_tab; float *st1_mse_pair; int16_t *st1_idx_pair; DCTTYPE dcttype = DCT_T2_24_XX; float tmp2; int16_t check_ind[FDCNG_VQ_DCT_NPOST]; n_ana = N; /* VQ stage#1 core is currentlu always using stored DCT24 coeffs */ assert( n_ana >= FDCNG_VQ_DCT_MAXTRUNC ); /* check for FDCNGVQ WB , SWB, FB operation */ /*remove mean/mid fdcng stage#1 vector, in original subband domain */ v_sub( u, cdk1r_tr_midQ_truncQ, u_mr, n_ana ); v_multc( u_mr, fdcng_dct_invScaleF[1], u_mr_scaled, n_ana ); /*scale up target to upscaled W8x storage domain */ /* 16.0-->scale up from Q0 to search domain in Q4, not really needed in BASOP , impl. by shifts */ dctT2_N_apply_matrix( (const float *) u_mr_scaled, dct_target, min( FDCNG_VQ_DCT_MAXTRUNC, n_ana ), n_ana, invTrfMatrix, FDCNG_VQ_DCT_MAXTRUNC, dcttype ); mse = 0; /* init search state ptr's at the top */ set_f( dist1_ptr, FLT_MAX, maxC_st1); st1_mse_pair = &( dist1_ptr[0] ); /* req. ptr init +=2 */ st1_idx_pair = &( indices_st1_local[0] ); /* req. ptr init +=2 */ for ( segm = 0; segm < FDCNG_VQ_DCT_NSEGM; segm++ ) { /* point to a new paired location for each segment */ st1_mse_pair += 2; /* req. ptr init +=2 */ st1_idx_pair += 2 ; /* req. ptr init +=2 */ p_max = 0; /* req. to point to 1 or 0 */ /* compute segment common trunction error in dctN domain */ mse_trunc_segm[segm] = 0; mse_trunc_segm[segm] += sum2_f( (const float *) ( &( dct_target[cdk1_ivas_cols_per_segment[segm]] ) ), cdk1_ivas_trunc_dct_cols_per_segment[segm] ); cbpW8 = cdk_37bits_ivas_stage1_W8Qx_dct_sections[segm]; /* Word8 column variable Qx storage*/ for ( j = 0; j < cdk1_ivas_entries_per_segment[segm]; j++ ) { /* unweighted segmented search DCT domain loop */ j_full = j + cdk1_ivas_cum_entries_per_segment[segm]; /* or simply use j_full++ */ mse = mse_trunc_segm[segm]; /* init mse with with common mse truncation part, in BASOP a move32() */ dct_col_shift_tab = stage1_dct_col_syn_shift[segm]; /* ptr init */ for ( c2 = 0; c2 < cdk1_ivas_cols_per_segment[segm]; c2++ ) { #define WMC_TOOL_SKIP tmp = dct_target[c2] - (float) ( ( (Word16) cbpW8[c2] ) << dct_col_shift_tab[c2] ); /* Word8 storage MSE inner loop */ LOGIC( 1 ); SHIFT( 1 ); ADD( 1 ); /* in BASOP: s_and(for W8->W16), shl(), sub()*/ #undef WMC_TOOL_SKIP mse += tmp * tmp; /* L_mac or L_mac0() square Word16 -> Word32*/ } st1_mse_ptr[j_full] = mse; /* save MSE in shared dynamic RAM, move32() in BASOP */ #define WMC_TOOL_SKIP cbpW8 += cdk1_ivas_cols_per_segment[segm]; /* fixed pointer increment for each segment */ #undef WMC_TOOL_SKIP /* overwrite with a new worst index at p_max */ #ifdef ERI_FDCNGVQ_LOW_ROM /* The three inner loop if's below are not really properly instrumented by WMC tool */ /* a ptr to worst index will be in use */ #endif if ( mse < st1_mse_pair[p_max] ) /* L_sub */ { st1_idx_pair[p_max] = j_full; /* simplified */ } /* BASOP 2 ops */ if ( st1_idx_pair[p_max] == j_full ) /* simplified */ { /* idx updated to j_full --> also update mse */ st1_mse_pair[p_max] = mse; /* move32(), single BASOP */ } /* BASOP 3 ops */ /* avoid WC costly list management by always updating p_max, as we have only a pair to maintain */ p_max = 0; /* move16() */ if ( ( st1_mse_pair[0] - st1_mse_pair[1] ) < 0 ) /* L_sub()*/ { p_max = 1; /* move16() */ } /* BASOP 3 ops ,Note 2 ops possible in BASOP with L_sub and L_lshr */ /* Note: logical shift right not available in ANSI-C */ /* p_max = (st1_mse_pair[0] - st1_mse_pair[1]) ">>>" 31; */ /* in java logical shift right is available as >>> , in BASOP it is L_lshr */ /* Cost: weighted sum with cond moves ('if') => 8 in float , 7 in BASOP with L_lshr */ } /* j in section */ } /* next segment */ for ( j = 0; j < maxC_st1; j++ ) { /* compute_full mse using stored DCT24 domain MSE's */ /* calculate MSE from stage1 inner using existing inner DCT domain variables */ dist1_ptr[j] *= fdcng_dct_scaleF[2]; /* single multiplication to get the MSE scale to the correct input domain */ } p_max = maximum( dist1_ptr, maxC_st1, NULL ); /* establish current worst candidate for stage#2 among all maxC_st1 candidates */ p_mins[0] = minimum( dist1_ptr, maxC_st1, NULL ); /* find best entry among all maxC_pre */ tmp = dist1_ptr[p_mins[0]]; dist1_ptr[p_mins[0]] = FLT_MAX; /* exclude 1st */ p_mins[1] = minimum( dist1_ptr, maxC_st1, NULL ); /* find 2nd best entry */ tmp2 = dist1_ptr[p_mins[1]]; dist1_ptr[p_mins[1]] = FLT_MAX; /* exclude 2nd */ dist1_ptr[p_mins[0]] = tmp; /* restore 1st */ dist1_ptr[p_mins[1]] = tmp2; /* restore 2nd */ idx_min[0] = indices_st1_local[p_mins[0]]; idx_min[1] = indices_st1_local[p_mins[1]]; /* use global exclusion list to never reselect the two (best) MSE values sofar */ st1_mse_ptr[idx_min[0]] = FLT_MAX; /* move32() */ st1_mse_ptr[idx_min[1]] = FLT_MAX; /* move32() */ /* circular MSE-neigbour list in use to potentially replace some segment search candidates */ /* using both 1st and 2nd best neighbours in fwd and rev directions */ check_ind[0] = cdk1_ivas_segm_neighbour_fwd[idx_min[0]]; check_ind[1] = cdk1_ivas_segm_neighbour_rev[idx_min[0]]; check_ind[2] = cdk1_ivas_segm_neighbour_fwd[idx_min[1]]; check_ind[3] = cdk1_ivas_segm_neighbour_rev[idx_min[1]]; check_ind[4] = cdk1_ivas_segm_neighbour_fwd[check_ind[0]]; check_ind[5] = cdk1_ivas_segm_neighbour_rev[check_ind[1]]; check_ind[6] = cdk1_ivas_segm_neighbour_fwd[check_ind[2]]; check_ind[FDCNG_VQ_DCT_NPOST-1] = cdk1_ivas_segm_neighbour_rev[check_ind[3]]; for ( i = 0; i < FDCNG_VQ_DCT_NPOST; i++ ) { /* move MSE from search to synthesis domain */ /* multiplication by fdcng_dct_scaleF[2] to get the float outer loop scale correct in IDCT synthesis domain */ check_mse = st1_mse_ptr[check_ind[i]] * fdcng_dct_scaleF[2]; if ( check_mse < dist1_ptr[p_max] ) { /* new winner , replace */ dist1_ptr[p_max] = check_mse; indices_st1_local[p_max] = check_ind[i]; st1_mse_ptr[check_ind[i]] = FLT_MAX; /* exclude, BASOP: move32() */ p_max = maximum( dist1_ptr, maxC_st1, NULL ); /* establish a new current worst candidate among all maxC */ } } /* extract the selected stage one vectors in DCT_N domain , apply IDCT_N and scale up */ /* always extract full length signal(24) to be able to update WB( N==21) candidate MSE values */ /* in the case that only a part of the IDCT vector is in final use */ for ( c = 0; c < maxC_st1; c++ ) { dec_FDCNG_MSVQ_stage1( indices_st1_local[c], N, invTrfMatrix, dcttype + 1, &( st1_syn_vec_ptr[c * N] ), NULL ); } return p_max; }; /* recalc MSE for WB(0..20) coeffs , essentially subtract res21^2 ,res22^2, res23^2 that was included in stage1 MSE in the DCT24 domain truncated search, excludes the waveform contributions at pos 21,22,23 to the MSE, important to keep the WB MSEs update for the subsequent stages */ int16_t msvq_stage1_dct_recalc_candidates_wb( /* o : (updated p_max) */ const float *st1_syn_vec_ptr, /* i : IDCT24 synthesis vectors */ const float *u, /* i : target signal */ const int16_t maxC_st1, /* i : number of candidates in stage1 */ float *dist_ptr /* i/o: updated MSE vector for stage1 */ ) { int16_t p_max_local, c; const float *p2; float res24, high_diff[FDCNG_VQ_MAX_LEN - FDCNG_VQ_MAX_LEN_WB]; for ( c = 0; c < maxC_st1; c++ ) { /* point to extended synthesis part */ p2 = (const float *) &( st1_syn_vec_ptr[c * FDCNG_VQ_MAX_LEN + FDCNG_VQ_MAX_LEN_WB] ); /* ptr init to synthesis candidate c */ /* for stage#1 use "u" instead of the shortened resid[0], to access the extended/extrapolated input target */ v_sub( p2, &( u[FDCNG_VQ_MAX_LEN_WB] ), high_diff, FDCNG_VQ_MAX_LEN - FDCNG_VQ_MAX_LEN_WB ); res24 = dotp( high_diff, high_diff, FDCNG_VQ_MAX_LEN - FDCNG_VQ_MAX_LEN_WB ); /* sum squared over top env. values above WB coeffs */ dist_ptr[c] -= res24; /* remove DCT24 high band error contribution */ } /* finally update p_max, as it may potentially change, due to the core DCT24 search originally optimizing over the longer basis vectors than DCT21 */ p_max_local = maximum( dist_ptr, maxC_st1, NULL ); return p_max_local; }; #endif #endif /*--------------------------------------------------------------------------* Loading Loading @@ -87,9 +316,8 @@ void msvq_enc( float resid_buf[2 * LSFMBEST_MAX * M_MAX], dist_buf[2 * LSFMBEST_MAX], Tmp[M_MAX]; int16_t idx_buf[2 * LSFMBEST_MAX * MAX_VQ_STAGES_USED], parents[LSFMBEST_MAX]; int16_t n, maxn, start; #ifdef ERI_FDCNGVQ_LOW_ROM #ifndef ERI_MSVQ_CLEANUP /* buffers */ float dct_target[FDCNG_VQ_DCT_MAXTRUNC]; float u_mr[FDCNG_VQ_MAX_LEN]; Loading @@ -110,20 +338,28 @@ void msvq_enc( int16_t check_ind[FDCNG_VQ_DCT_NPOST]; int16_t segm, j_full, maxC_pre; float *st1_syn_vec_ptr; /* 8* 24 floats in dynRAM */ float *st1_mse_ptr; /* 2^¨7 == 128 floats in existing dRAM used for stage 1 candidate analysis, 128 Word32 in BASOP */ #endif float *st1_syn_vec_ptr; /* ptr to buffer in dynRAM */ float *st1_mse_ptr; /* ptr to buffer in existing dRAM used for stage 1 candidate analysis */ #ifdef ERI_MSVQ_CLEANUP int16_t indices_st1_local[FDCNG_VQ_DCT_NSEGM * 2]; /* after stage#1 DCT search this is copied to the global indices[1][s*stages] structure */ #else float res24, high_diff[FDCNG_VQ_MAX_LEN - FDCNG_VQ_MAX_LEN_WB]; maxC_pre = ( FDCNG_VQ_DCT_NSEGM * 2 ); #endif assert( maxC <= LSFMBEST_MAX ); assert( ( LSFMBEST_MAX * M_MAX ) > ( N * maxC ) ); /* top of resid_buf is resid[1] and used for stage#1 residuals (input target u), we here reuse resid[0] part of the buffer for stage#1 DCT dynamic RAM needs */ st1_mse_ptr = &( resid_buf[1 * LSFMBEST_MAX * M_MAX] ) - ( levels[0] ); /* reuse top of residual resid[0] scratch RAM for stage1 MSEs */ st1_syn_vec_ptr = &( resid_buf[1 * LSFMBEST_MAX * M_MAX] ) - FDCNG_VQ_MAX_LEN * maxC; /* reuse top of resid[0] scratch RAM for residual */ #ifndef ERI_MSVQ_CLEANUP dcttype = DCT_T2_24_XX; #endif #endif /*----------------------------------------------------------------* Loading Loading @@ -209,6 +445,22 @@ void msvq_enc( } #ifdef ERI_FDCNGVQ_LOW_ROM #ifdef ERI_MSVQ_CLEANUP if ( !s && applyDCT_flag != 0 ) /* means: m==1 */ { /* stage 1 candidates search in truncated dct24 domain without any weights */ assert( N == FDCNG_VQ_MAX_LEN ); assert( maxC == 2 * FDCNG_VQ_DCT_NSEGM ); p_max = msvq_stage1_dct_search( u, FDCNG_VQ_MAX_LEN, maxC, invTrfMatrix, st1_mse_ptr, indices_st1_local, st1_syn_vec_ptr, dist[1] ); /* move established stage#1 indices to the global MSVQ list structure */ for ( c = 0; c < maxC; c++ ) { indices[1][c * stages] = indices_st1_local[c]; } } #else if ( !s && applyDCT_flag != 0 ) /* means: m==1 */ { /* stage 1 search in truncated dct domain without any weights */ Loading Loading @@ -370,6 +622,7 @@ void msvq_enc( assert( maxC == maxC_pre ); } #endif else /* non-DCT Stage #1 code below */ #endif Loading Loading @@ -512,6 +765,12 @@ void msvq_enc( essentially subtract res21^2 ,res22^2, res23^2 that was included in stage1 MSE in the DCT24 domain truncated search, excludes the waveform contributions at pos 21,22,23 to the MSE, important to keep WB MSEs update for the subsequent stages */ #ifdef ERI_MSVQ_CLEANUP if ( s == 0 && applyDCT_flag != 0 && n == FDCNG_VQ_MAX_LEN_WB ) { p_max = msvq_stage1_dct_recalc_candidates_wb( st1_syn_vec_ptr, u, maxC, dist[1] ); } #else if ( s == 0 && applyDCT_flag != 0 && n == FDCNG_VQ_MAX_LEN_WB ) { assert( start == 0 ); Loading @@ -527,6 +786,7 @@ void msvq_enc( /* update p_max, as it may potentially change, due to the core DCT24 search originally optimizing over longer basis vectors than 21 */ p_max = maximum( dist[1], maxC, NULL ); } #endif #endif m = maxC; } /* for (m=1, s=0; s<stages; s++) */ Loading Loading
lib_com/options.h +3 −0 Original line number Diff line number Diff line Loading @@ -158,6 +158,9 @@ #define ERI_FDCNGVQ_LOW_ROM /* Eri: Contribution #31 Table ROM saving for IVAS FDCNG-VQ modes */ #define ERI_MSVQ_CLEANUP /* Eri: BE modularization of msvq encoder side DCT c-code */ /* ################## End DEVELOPMENT switches ######################### */ /* clang-format on */ #endif
lib_enc/lsf_msvq_ma_enc.c +267 −7 Original line number Diff line number Diff line Loading @@ -53,6 +53,235 @@ #include "ivas_prot.h" void dctT2_N_apply_matrix( const float *input, float *output, const int16_t dct_dim, int16_t fdcngvq_dim, const float *idctT2_24_X_matrixQ16, const int16_t matrix_1st_dim, DCTTYPE dcttype ); #ifdef ERI_MSVQ_CLEANUP int16_t msvq_stage1_dct_search( /* o : (p_max , best candidate sofar ) */ const float *u, /* i : target */ const int16_t N, /* i : target length and IDCT synthesis length */ const int16_t maxC_st1, /* i : number of candidates to provide */ float *invTrfMatrix, /* i/o: IDCT synthesis matrix for dim N */ float *st1_mse_ptr, /* i : dynRAM buffer for MSEs */ int16_t *indices_st1_local, /* o: selecetd cand indices */ float *st1_syn_vec_ptr , /* i/o: buffer for IDCT24 synthesis */ float *dist1_ptr /* o: resulting stage 1 MSEs in DCT24 domain */ ) { /* stage 1 search in truncated dct domain without any weights */ float dct_target[FDCNG_VQ_DCT_MAXTRUNC]; float u_mr[FDCNG_VQ_MAX_LEN]; float u_mr_scaled[FDCNG_VQ_MAX_LEN]; float mse_trunc_segm[FDCNG_VQ_DCT_NSEGM]; float tmp, check_mse; float mse; /* Word32 */ int16_t p_max,c,c2, segm, j_full,j,i ; int16_t n_ana, p_mins[2], idx_min[2]; const Word8 *cbpW8; const Word16 *dct_col_shift_tab; float *st1_mse_pair; int16_t *st1_idx_pair; DCTTYPE dcttype = DCT_T2_24_XX; float tmp2; int16_t check_ind[FDCNG_VQ_DCT_NPOST]; n_ana = N; /* VQ stage#1 core is currentlu always using stored DCT24 coeffs */ assert( n_ana >= FDCNG_VQ_DCT_MAXTRUNC ); /* check for FDCNGVQ WB , SWB, FB operation */ /*remove mean/mid fdcng stage#1 vector, in original subband domain */ v_sub( u, cdk1r_tr_midQ_truncQ, u_mr, n_ana ); v_multc( u_mr, fdcng_dct_invScaleF[1], u_mr_scaled, n_ana ); /*scale up target to upscaled W8x storage domain */ /* 16.0-->scale up from Q0 to search domain in Q4, not really needed in BASOP , impl. by shifts */ dctT2_N_apply_matrix( (const float *) u_mr_scaled, dct_target, min( FDCNG_VQ_DCT_MAXTRUNC, n_ana ), n_ana, invTrfMatrix, FDCNG_VQ_DCT_MAXTRUNC, dcttype ); mse = 0; /* init search state ptr's at the top */ set_f( dist1_ptr, FLT_MAX, maxC_st1); st1_mse_pair = &( dist1_ptr[0] ); /* req. ptr init +=2 */ st1_idx_pair = &( indices_st1_local[0] ); /* req. ptr init +=2 */ for ( segm = 0; segm < FDCNG_VQ_DCT_NSEGM; segm++ ) { /* point to a new paired location for each segment */ st1_mse_pair += 2; /* req. ptr init +=2 */ st1_idx_pair += 2 ; /* req. ptr init +=2 */ p_max = 0; /* req. to point to 1 or 0 */ /* compute segment common trunction error in dctN domain */ mse_trunc_segm[segm] = 0; mse_trunc_segm[segm] += sum2_f( (const float *) ( &( dct_target[cdk1_ivas_cols_per_segment[segm]] ) ), cdk1_ivas_trunc_dct_cols_per_segment[segm] ); cbpW8 = cdk_37bits_ivas_stage1_W8Qx_dct_sections[segm]; /* Word8 column variable Qx storage*/ for ( j = 0; j < cdk1_ivas_entries_per_segment[segm]; j++ ) { /* unweighted segmented search DCT domain loop */ j_full = j + cdk1_ivas_cum_entries_per_segment[segm]; /* or simply use j_full++ */ mse = mse_trunc_segm[segm]; /* init mse with with common mse truncation part, in BASOP a move32() */ dct_col_shift_tab = stage1_dct_col_syn_shift[segm]; /* ptr init */ for ( c2 = 0; c2 < cdk1_ivas_cols_per_segment[segm]; c2++ ) { #define WMC_TOOL_SKIP tmp = dct_target[c2] - (float) ( ( (Word16) cbpW8[c2] ) << dct_col_shift_tab[c2] ); /* Word8 storage MSE inner loop */ LOGIC( 1 ); SHIFT( 1 ); ADD( 1 ); /* in BASOP: s_and(for W8->W16), shl(), sub()*/ #undef WMC_TOOL_SKIP mse += tmp * tmp; /* L_mac or L_mac0() square Word16 -> Word32*/ } st1_mse_ptr[j_full] = mse; /* save MSE in shared dynamic RAM, move32() in BASOP */ #define WMC_TOOL_SKIP cbpW8 += cdk1_ivas_cols_per_segment[segm]; /* fixed pointer increment for each segment */ #undef WMC_TOOL_SKIP /* overwrite with a new worst index at p_max */ #ifdef ERI_FDCNGVQ_LOW_ROM /* The three inner loop if's below are not really properly instrumented by WMC tool */ /* a ptr to worst index will be in use */ #endif if ( mse < st1_mse_pair[p_max] ) /* L_sub */ { st1_idx_pair[p_max] = j_full; /* simplified */ } /* BASOP 2 ops */ if ( st1_idx_pair[p_max] == j_full ) /* simplified */ { /* idx updated to j_full --> also update mse */ st1_mse_pair[p_max] = mse; /* move32(), single BASOP */ } /* BASOP 3 ops */ /* avoid WC costly list management by always updating p_max, as we have only a pair to maintain */ p_max = 0; /* move16() */ if ( ( st1_mse_pair[0] - st1_mse_pair[1] ) < 0 ) /* L_sub()*/ { p_max = 1; /* move16() */ } /* BASOP 3 ops ,Note 2 ops possible in BASOP with L_sub and L_lshr */ /* Note: logical shift right not available in ANSI-C */ /* p_max = (st1_mse_pair[0] - st1_mse_pair[1]) ">>>" 31; */ /* in java logical shift right is available as >>> , in BASOP it is L_lshr */ /* Cost: weighted sum with cond moves ('if') => 8 in float , 7 in BASOP with L_lshr */ } /* j in section */ } /* next segment */ for ( j = 0; j < maxC_st1; j++ ) { /* compute_full mse using stored DCT24 domain MSE's */ /* calculate MSE from stage1 inner using existing inner DCT domain variables */ dist1_ptr[j] *= fdcng_dct_scaleF[2]; /* single multiplication to get the MSE scale to the correct input domain */ } p_max = maximum( dist1_ptr, maxC_st1, NULL ); /* establish current worst candidate for stage#2 among all maxC_st1 candidates */ p_mins[0] = minimum( dist1_ptr, maxC_st1, NULL ); /* find best entry among all maxC_pre */ tmp = dist1_ptr[p_mins[0]]; dist1_ptr[p_mins[0]] = FLT_MAX; /* exclude 1st */ p_mins[1] = minimum( dist1_ptr, maxC_st1, NULL ); /* find 2nd best entry */ tmp2 = dist1_ptr[p_mins[1]]; dist1_ptr[p_mins[1]] = FLT_MAX; /* exclude 2nd */ dist1_ptr[p_mins[0]] = tmp; /* restore 1st */ dist1_ptr[p_mins[1]] = tmp2; /* restore 2nd */ idx_min[0] = indices_st1_local[p_mins[0]]; idx_min[1] = indices_st1_local[p_mins[1]]; /* use global exclusion list to never reselect the two (best) MSE values sofar */ st1_mse_ptr[idx_min[0]] = FLT_MAX; /* move32() */ st1_mse_ptr[idx_min[1]] = FLT_MAX; /* move32() */ /* circular MSE-neigbour list in use to potentially replace some segment search candidates */ /* using both 1st and 2nd best neighbours in fwd and rev directions */ check_ind[0] = cdk1_ivas_segm_neighbour_fwd[idx_min[0]]; check_ind[1] = cdk1_ivas_segm_neighbour_rev[idx_min[0]]; check_ind[2] = cdk1_ivas_segm_neighbour_fwd[idx_min[1]]; check_ind[3] = cdk1_ivas_segm_neighbour_rev[idx_min[1]]; check_ind[4] = cdk1_ivas_segm_neighbour_fwd[check_ind[0]]; check_ind[5] = cdk1_ivas_segm_neighbour_rev[check_ind[1]]; check_ind[6] = cdk1_ivas_segm_neighbour_fwd[check_ind[2]]; check_ind[FDCNG_VQ_DCT_NPOST-1] = cdk1_ivas_segm_neighbour_rev[check_ind[3]]; for ( i = 0; i < FDCNG_VQ_DCT_NPOST; i++ ) { /* move MSE from search to synthesis domain */ /* multiplication by fdcng_dct_scaleF[2] to get the float outer loop scale correct in IDCT synthesis domain */ check_mse = st1_mse_ptr[check_ind[i]] * fdcng_dct_scaleF[2]; if ( check_mse < dist1_ptr[p_max] ) { /* new winner , replace */ dist1_ptr[p_max] = check_mse; indices_st1_local[p_max] = check_ind[i]; st1_mse_ptr[check_ind[i]] = FLT_MAX; /* exclude, BASOP: move32() */ p_max = maximum( dist1_ptr, maxC_st1, NULL ); /* establish a new current worst candidate among all maxC */ } } /* extract the selected stage one vectors in DCT_N domain , apply IDCT_N and scale up */ /* always extract full length signal(24) to be able to update WB( N==21) candidate MSE values */ /* in the case that only a part of the IDCT vector is in final use */ for ( c = 0; c < maxC_st1; c++ ) { dec_FDCNG_MSVQ_stage1( indices_st1_local[c], N, invTrfMatrix, dcttype + 1, &( st1_syn_vec_ptr[c * N] ), NULL ); } return p_max; }; /* recalc MSE for WB(0..20) coeffs , essentially subtract res21^2 ,res22^2, res23^2 that was included in stage1 MSE in the DCT24 domain truncated search, excludes the waveform contributions at pos 21,22,23 to the MSE, important to keep the WB MSEs update for the subsequent stages */ int16_t msvq_stage1_dct_recalc_candidates_wb( /* o : (updated p_max) */ const float *st1_syn_vec_ptr, /* i : IDCT24 synthesis vectors */ const float *u, /* i : target signal */ const int16_t maxC_st1, /* i : number of candidates in stage1 */ float *dist_ptr /* i/o: updated MSE vector for stage1 */ ) { int16_t p_max_local, c; const float *p2; float res24, high_diff[FDCNG_VQ_MAX_LEN - FDCNG_VQ_MAX_LEN_WB]; for ( c = 0; c < maxC_st1; c++ ) { /* point to extended synthesis part */ p2 = (const float *) &( st1_syn_vec_ptr[c * FDCNG_VQ_MAX_LEN + FDCNG_VQ_MAX_LEN_WB] ); /* ptr init to synthesis candidate c */ /* for stage#1 use "u" instead of the shortened resid[0], to access the extended/extrapolated input target */ v_sub( p2, &( u[FDCNG_VQ_MAX_LEN_WB] ), high_diff, FDCNG_VQ_MAX_LEN - FDCNG_VQ_MAX_LEN_WB ); res24 = dotp( high_diff, high_diff, FDCNG_VQ_MAX_LEN - FDCNG_VQ_MAX_LEN_WB ); /* sum squared over top env. values above WB coeffs */ dist_ptr[c] -= res24; /* remove DCT24 high band error contribution */ } /* finally update p_max, as it may potentially change, due to the core DCT24 search originally optimizing over the longer basis vectors than DCT21 */ p_max_local = maximum( dist_ptr, maxC_st1, NULL ); return p_max_local; }; #endif #endif /*--------------------------------------------------------------------------* Loading Loading @@ -87,9 +316,8 @@ void msvq_enc( float resid_buf[2 * LSFMBEST_MAX * M_MAX], dist_buf[2 * LSFMBEST_MAX], Tmp[M_MAX]; int16_t idx_buf[2 * LSFMBEST_MAX * MAX_VQ_STAGES_USED], parents[LSFMBEST_MAX]; int16_t n, maxn, start; #ifdef ERI_FDCNGVQ_LOW_ROM #ifndef ERI_MSVQ_CLEANUP /* buffers */ float dct_target[FDCNG_VQ_DCT_MAXTRUNC]; float u_mr[FDCNG_VQ_MAX_LEN]; Loading @@ -110,20 +338,28 @@ void msvq_enc( int16_t check_ind[FDCNG_VQ_DCT_NPOST]; int16_t segm, j_full, maxC_pre; float *st1_syn_vec_ptr; /* 8* 24 floats in dynRAM */ float *st1_mse_ptr; /* 2^¨7 == 128 floats in existing dRAM used for stage 1 candidate analysis, 128 Word32 in BASOP */ #endif float *st1_syn_vec_ptr; /* ptr to buffer in dynRAM */ float *st1_mse_ptr; /* ptr to buffer in existing dRAM used for stage 1 candidate analysis */ #ifdef ERI_MSVQ_CLEANUP int16_t indices_st1_local[FDCNG_VQ_DCT_NSEGM * 2]; /* after stage#1 DCT search this is copied to the global indices[1][s*stages] structure */ #else float res24, high_diff[FDCNG_VQ_MAX_LEN - FDCNG_VQ_MAX_LEN_WB]; maxC_pre = ( FDCNG_VQ_DCT_NSEGM * 2 ); #endif assert( maxC <= LSFMBEST_MAX ); assert( ( LSFMBEST_MAX * M_MAX ) > ( N * maxC ) ); /* top of resid_buf is resid[1] and used for stage#1 residuals (input target u), we here reuse resid[0] part of the buffer for stage#1 DCT dynamic RAM needs */ st1_mse_ptr = &( resid_buf[1 * LSFMBEST_MAX * M_MAX] ) - ( levels[0] ); /* reuse top of residual resid[0] scratch RAM for stage1 MSEs */ st1_syn_vec_ptr = &( resid_buf[1 * LSFMBEST_MAX * M_MAX] ) - FDCNG_VQ_MAX_LEN * maxC; /* reuse top of resid[0] scratch RAM for residual */ #ifndef ERI_MSVQ_CLEANUP dcttype = DCT_T2_24_XX; #endif #endif /*----------------------------------------------------------------* Loading Loading @@ -209,6 +445,22 @@ void msvq_enc( } #ifdef ERI_FDCNGVQ_LOW_ROM #ifdef ERI_MSVQ_CLEANUP if ( !s && applyDCT_flag != 0 ) /* means: m==1 */ { /* stage 1 candidates search in truncated dct24 domain without any weights */ assert( N == FDCNG_VQ_MAX_LEN ); assert( maxC == 2 * FDCNG_VQ_DCT_NSEGM ); p_max = msvq_stage1_dct_search( u, FDCNG_VQ_MAX_LEN, maxC, invTrfMatrix, st1_mse_ptr, indices_st1_local, st1_syn_vec_ptr, dist[1] ); /* move established stage#1 indices to the global MSVQ list structure */ for ( c = 0; c < maxC; c++ ) { indices[1][c * stages] = indices_st1_local[c]; } } #else if ( !s && applyDCT_flag != 0 ) /* means: m==1 */ { /* stage 1 search in truncated dct domain without any weights */ Loading Loading @@ -370,6 +622,7 @@ void msvq_enc( assert( maxC == maxC_pre ); } #endif else /* non-DCT Stage #1 code below */ #endif Loading Loading @@ -512,6 +765,12 @@ void msvq_enc( essentially subtract res21^2 ,res22^2, res23^2 that was included in stage1 MSE in the DCT24 domain truncated search, excludes the waveform contributions at pos 21,22,23 to the MSE, important to keep WB MSEs update for the subsequent stages */ #ifdef ERI_MSVQ_CLEANUP if ( s == 0 && applyDCT_flag != 0 && n == FDCNG_VQ_MAX_LEN_WB ) { p_max = msvq_stage1_dct_recalc_candidates_wb( st1_syn_vec_ptr, u, maxC, dist[1] ); } #else if ( s == 0 && applyDCT_flag != 0 && n == FDCNG_VQ_MAX_LEN_WB ) { assert( start == 0 ); Loading @@ -527,6 +786,7 @@ void msvq_enc( /* update p_max, as it may potentially change, due to the core DCT24 search originally optimizing over longer basis vectors than 21 */ p_max = maximum( dist[1], maxC, NULL ); } #endif #endif m = maxC; } /* for (m=1, s=0; s<stages; s++) */ Loading