Loading lib_com/basop_util.c +20 −0 Original line number Diff line number Diff line Loading @@ -1918,6 +1918,26 @@ Word16 findIndexOfMinWord32( Word32 *x, const Word16 len ) return indx; } Word16 findIndexOfMinWord64( Word64 *x, const Word16 len ) { Word16 i, indx; indx = 0; move16(); FOR( i = 1; i < len; i++ ) { if ( LT_64( x[i], x[indx] ) ) { indx = i; move16(); } } return indx; } Word16 imult1616( Word16 x, Word16 y ) { Loading lib_com/basop_util.h +1 −0 Original line number Diff line number Diff line Loading @@ -538,6 +538,7 @@ Word16 findIndexOfMinWord16( Word16 *x, const Word16 len ); \return index of min Word32 */ Word16 findIndexOfMinWord32( Word32 *x, const Word16 len ); Word16 findIndexOfMinWord64( Word64 *x, const Word16 len ); /****************************************************************************/ /*! Loading lib_dec/ivas_jbm_dec_fx.c +3 −3 Original line number Diff line number Diff line Loading @@ -1031,18 +1031,18 @@ ivas_error ivas_jbm_dec_tc_fx( { num_md_sub_frames = ivas_get_spar_dec_md_num_subframes( st_ivas->sba_analysis_order, st_ivas->hDecoderConfig->ivas_total_brate, st_ivas->last_active_ivas_total_brate ); SPAR_DEC_HANDLE hSpar = st_ivas->hSpar; Word16 Q_p_output = 14; move16(); Word16 nchan_transport; // num_bands_out = hSpar->hFbMixer->pFb->filterbank_num_bands; nchan_transport = hSpar->hMdDec->spar_md_cfg.nchan_transport; move16(); nchan_out = nchan_transport; move16(); Word16 Q_p_output = max( 3, sub( L_norm_arr( p_output_fx[sba_ch_idx], imult1616( output_frame, nchan_transport ) ), 1 ) ); FOR( ch = 0; ch < nchan_transport; ch++ ) { Scale_sig32( p_output_fx[sba_ch_idx + ch], output_frame, sub( Q_p_output, Q11 ) ); // Q_p_output Scale_sig32( p_output_fx[sba_ch_idx + ch], output_frame, Q_p_output ); // Q_p_output + Q11 } Q_p_output = add( Q11, Q_p_output ); hSpar->hMdDec->Q_mixer_mat = 31; move16(); Loading lib_enc/gain_enc_fx.c +10 −7 Original line number Diff line number Diff line Loading @@ -2525,7 +2525,7 @@ void gain_enc_lbr_ivas_fx( /*Ecode = ( dotp( code, code, L_SUBFR ) + 0.01f ) / L_SUBFR; *gain_inov = 1.0f / (float)sqrt(Ecode);*/ L_tmp = Dot_product12( code, code, L_subfr, &exp_code ); L_tmp = Dot_product12( code, code, L_subfr, &exp_code ); /* Q9 + Q9 + 1 + (30-exp_code)*/ L_inov = L_tmp; /* sets to 'L_tmp' in 1 clock */ move32(); /* exp_code: -18 (code in Q9), -6 (/L_SUBFR), -31 (L_tmp Q31->Q0) */ Loading Loading @@ -2599,12 +2599,15 @@ void gain_enc_lbr_ivas_fx( /* gcode0 = (float)pow(10, dotp(b, aux, n_pred) - 0.5f * (float)log10(Ecode)); gcode0 = (float)pow(10, dotp(b, aux, n_pred) - 0.05f * 10 * (float)log10(Ecode)); gcode0 = (float)pow(10, 0.05(20 * dotp(b, aux, n_pred) - 10 * (float)log10(Ecode))); */ exp_code = sub( exp_code, 18 + 6 + 1 ); // Ecode = (Ecode / L_subfr) L_tmp = L_shr( L_tmp, L_subfr_sf ); // Q19 + (Q30-exp_code) /* Calculation for log10(Ecode) exponent for applying log10 = Q31 - q = Q31 - Q19 - Q30 + exp_code = exp_code - Q18*/ L_tmp = BASOP_Util_Log10( L_tmp, sub( exp_code, 18 ) ); // new q = Q25 exp = norm_l( L_tmp ); frac = Log2_norm_lc( L_shl( L_tmp, exp ) ); exp = sub( exp_code, exp ); L_tmp1 = Mpy_32_16( exp, frac, 24660 ); /* Q14 */ /* 10*log10(2) in Q13*/ L_tmp = L_shl( L_tmp, exp ); // Q25 + exp // 10 in Q27 , ( 10 * log10( Ecode ) ) L_tmp1 = Mpy_32_32( L_tmp, 1342177280 ); // Q25 + exp + 1 + Q27 - 32 = Q21 + exp L_tmp1 = L_shr( L_tmp1, add( 7, exp ) ); // Q21 + exp - 7 - exp = Q14 L_tmp = Dot_product( b, aux, n_pred ); /*Q25*/ L_tmp = Mult_32_16( L_tmp, 320 ); /*Q14, 20 in Q4*/ Loading lib_enc/lsf_msvq_ma_enc_fx.c +329 −3 Original line number Diff line number Diff line Loading @@ -468,6 +468,33 @@ static void depack_sub_values_fx( Word16 *pTmp, const Word16 *p1, const Word16 * } static Word64 depack_mul_values_fx64( Word32 *Tmp, const Word16 *w, const Word16 *cbp, const Word16 N ) { Word16 i, val0, val1, val2, val3; Word64 en; en = 0; move32(); FOR( i = 0; i < N; i += 4 ) { depack_4_values( cbp + i_mult( shr( i, 2 ), 3 ), val0, val1, val2, val3 ) Tmp[i + 0] = L_mult0( w[i + 0], val0 ); // Q8 * Q2.56 move16(); en = W_mac_32_16( en, Tmp[i + 0], val0 ); // Q8 * Q2.56 * 2.56 * Q1 Tmp[i + 1] = L_mult0( w[i + 1], val1 ); move16(); en = W_mac_32_16( en, Tmp[i + 1], val1 ); Tmp[i + 2] = L_mult0( w[i + 2], val2 ); move16(); en = W_mac_32_16( en, Tmp[i + 2], val2 ); Tmp[i + 3] = L_mult0( w[i + 3], val3 ); move16(); en = W_mac_32_16( en, Tmp[i + 3], val3 ); } return en; // Q8 * Q2.56 * 2.56 * Q1 } /*--------------------------------------------------------------------------* * msvq_enc_find_p_max_8() * Loading Loading @@ -522,6 +549,54 @@ static Word16 msvq_enc_find_p_max_8_fx( Word32 dist[] ) } static Word16 msvq_enc_find_p_max_8_fx64( Word64 dist[] ) { Word16 p_max; p_max = 0; move16(); BASOP_SATURATE_WARNING_OFF_EVS if ( GT_64( dist[1], dist[p_max] ) ) { p_max = 1; move16(); } if ( GT_64( dist[2], dist[p_max] ) ) { p_max = 2; move16(); } if ( GT_64( dist[3], dist[p_max] ) ) { p_max = 3; move16(); } if ( GT_64( dist[4], dist[p_max] ) ) { p_max = 4; move16(); } if ( GT_64( dist[5], dist[p_max] ) ) { p_max = 5; move16(); } if ( GT_64( dist[6], dist[p_max] ) ) { p_max = 6; move16(); } if ( GT_64( dist[7], dist[p_max] ) ) { p_max = 7; move16(); } BASOP_SATURATE_WARNING_ON_EVS return p_max; } /*--------------------------------------------------------------------------* * msvq_enc_find_p_max_6() * Loading Loading @@ -565,6 +640,43 @@ static Word16 msvq_enc_find_p_max_6_fx( Word32 dist[] ) return p_max; } static Word16 msvq_enc_find_p_max_6_fx64( Word64 dist[] ) { Word16 p_max; p_max = 0; move16(); BASOP_SATURATE_WARNING_OFF_EVS if ( GT_64( dist[1], dist[p_max] ) ) { p_max = 1; move16(); } if ( GT_64( dist[2], dist[p_max] ) ) { p_max = 2; move16(); } if ( GT_64( dist[3], dist[p_max] ) ) { p_max = 3; move16(); } if ( GT_64( dist[4], dist[p_max] ) ) { p_max = 4; move16(); } if ( GT_64( dist[5], dist[p_max] ) ) { p_max = 5; move16(); } BASOP_SATURATE_WARNING_ON_EVS return p_max; } /*--------------------------------------------------------------------------* * msvq_enc_fx() Loading Loading @@ -787,7 +899,222 @@ void msvq_enc_fx( return; } void msvq_enc_lsf_fx64( const Word16 *const *cb, /* i : Codebook (indexed cb[*stages][levels][p]) (10Q5 * 1.28) */ const Word16 dims[], /* i : Dimension of each codebook stage (NULL: full dim.) */ const Word16 offs[], /* i : Starting dimension of each codebook stage (NULL: 0) */ const Word16 u[], /* i : Vector to be encoded (prediction and mean removed)(Q14Q1*1.28) */ const Word16 *levels, /* i : Number of levels in each stage */ const Word16 maxC, /* i : Tree search size (number of candidates kept from */ /* one stage to the next == M-best) */ const Word16 stages, /* i : Number of stages */ const Word16 w[], /* i : Weights Q8*/ const Word16 N, /* i : Vector dimension */ const Word16 maxN, /* i : Codebook dimension */ Word16 Idx[] /* o : Indices */ ) { Word16 j; const Word16 *cbp; Word16 p2i; Word16 resid_buf[2 * LSFMBEST_MAX * M_MAX], *resid[2]; Word16 *pTmp, *p1; Word16 *indices[2], m, s, c, c2, p_max, i; Word32 Tmp32[M_MAX]; Word16 idx_buf[2 * LSFMBEST_MAX * MAX_VQ_STAGES_USED], parents[LSFMBEST_MAX]; Word64 *dist_64[2], en64, tmp64; Word64 dist_buf_64[2 * LSFMBEST_MAX]; Word16 ( *func_ptr64 )( Word64 * ); Word16 N34; Word16 n, maxn, start; /*----------------------------------------------------------------* * Allocate memory for previous (parent) and current nodes. * Parent node is indexed [0], current node is indexed [1]. *----------------------------------------------------------------*/ indices[0] = idx_buf; indices[1] = idx_buf + maxC * stages; /*move16();*/ /*vr_iset(0, idx_buf, 2*stages*maxC);*/ set16_fx( idx_buf, 0, (Word16) ( 2 * stages * maxC ) ); resid[0] = resid_buf; resid[1] = resid_buf + maxC * N; /*move16();*/ dist_64[0] = dist_buf_64; dist_64[1] = dist_buf_64 + maxC; /*move16();*/ /*vr_iset(0, parents, maxC);*/ set16_fx( parents, 0, maxC ); func_ptr64 = msvq_enc_find_p_max_6_fx64; move16(); if ( EQ_16( maxC, 8 ) ) { func_ptr64 = msvq_enc_find_p_max_8_fx64; move16(); } /*----------------------------------------------------------------* * LSF weights are normalized, so it is always better to multiply it first * Set up inital distance vector *----------------------------------------------------------------*/ /* Q0/16 * Qw_norm/16 << 1 >> 16 => Qwnorm-15/16 * Q0/16 << 1 => Qwnorm-14/32 * 6.5536 */ Word64 ss2_64; ss2_64 = W_mult_32_16( L_mult0( u[0], w[0] ), u[0] ); // Q8 * Q2.56 * 2.56 * Q1 FOR( j = 1; j < N; j++ ) { ss2_64 = W_mac_32_16( ss2_64, L_mult0( u[j], w[j] ), u[j] ); } /* Set up inital error (residual) vectors */ pTmp = resid[1]; /*move16();*/ FOR( c = 0; c < maxC; c++ ) { Copy( u, pTmp + c * N, N ); dist_64[1][c] = ss2_64; move64(); } /* Loop over all stages */ m = 1; move16(); FOR( s = 0; s < stages; s++ ) { /* codebook pointer is set to point to first stage */ cbp = cb[s]; /*3Q12*1.28*/ move16(); /* Set up pointers to parent and current nodes */ swap( indices[0], indices[1], Word16 * ); move16(); move16(); move16(); move16(); swap( resid[0], resid[1], Word16 * ); move16(); move16(); move16(); swap( dist_64[0], dist_64[1], Word64 * ); move64(); move64(); move64(); /* p_max points to maximum distortion node (worst of best) */ p_max = 0; move16(); n = N; move16(); maxn = maxN; move16(); if ( dims ) { n = dims[s]; move16(); } if ( dims ) { maxn = n; move16(); } assert( ( maxn % 4 ) == 0 ); N34 = mult( maxn, 24576 /*0.75f Q15*/ ); start = 0; move16(); if ( offs ) { start = offs[s]; move16(); } set32_fx( Tmp32, 0, start ); set32_fx( Tmp32 + start + n, 0, sub( N, add( start, n ) ) ); /* Set distortions to a large value */ FOR( j = 0; j < maxC; j++ ) { dist_64[1][j] = LLONG_MAX; move64(); } FOR( j = 0; j < levels[s]; j++ ) { /* Compute weighted codebook element and its energy */ en64 = depack_mul_values_fx64( Tmp32 + start, w + start, cbp, n ); // Q8 // en64: Q8 * Q2.56 * Q2.56 * q1 // Tmp: 2.56 * Q8 cbp += N34; /* pointer is incremented */ /* Iterate over all parent nodes */ FOR( c = 0; c < m; c++ ) { pTmp = &resid[0][c * N]; // this resid buffer is initial lsf values /*tmp = (*pTmp++) * Tmp[0];*/ Word64 t164 = 0; move64(); t164 = W_mult_32_16( Tmp32[0], pTmp[0] ); // 2.56 * Q8 * Q2.56 * Q1 // Tmp32: Q8 * Q2.56 FOR( i = 1; i < N; i++ ) { t164 = W_mac_32_16( t164, Tmp32[i], pTmp[i] ); // 2.56 * Q8 * Q2.56 * Q1 } tmp64 = W_add( dist_64[0][c], W_sub( en64, W_shl( t164, 1 ) ) ); t164 = W_sub( tmp64, dist_64[1][p_max] ); IF( t164 <= 0 ) { /* Replace worst */ dist_64[1][p_max] = tmp64; move64(); indices[1][p_max * stages + s] = j; move16(); parents[p_max] = c; move16(); p_max = ( *func_ptr64 )( dist_64[1] ); } /*IF (L_sub(tmp,dist[1][p_max]) < 0) */ } /* FOR (c=0; c<m; c++) */ } /* FOR (j=0; j<levels[s]; j++) */ /*------------------------------------------------------------* * Compute error vectors for each node *------------------------------------------------------------*/ pTmp = resid[1]; FOR( c = 0; c < maxC; c++ ) { /* Subtract codebook entry from residual vector of parent node and multiply with scale factor */ p1 = resid[0] + parents[c] * N; p2i = indices[1][c * stages + s]; move16(); Copy( p1, pTmp, start ); depack_sub_values_fx( pTmp + start, p1 + start, &cb[s][p2i * N34], n ); Copy( p1 + start + n, pTmp + start + n, sub( N, add( start, n ) ) ); pTmp += N; /* Get indices that were used for parent node */ /*mvs2s(indices[0]+parents[c]*stages, indices[1]+c*stages, s);*/ Copy( indices[0] + parents[c] * stages, indices[1] + c * stages, s ); } /* for (c=0; c<maxC; c++) */ m = maxC; move16(); } /* for (m=1, s=0; s<stages; s++) */ /* Find the optimum candidate */ c2 = findIndexOfMinWord64( dist_64[1], maxC ); /*mvi2i (indices[1]+c2*stages, Idx, stages);*/ Copy( indices[1] + c2 * stages, Idx, stages ); return; } /*--------------------------------------------------------------------------* * msvq_enc_ivas_fx() * Loading Loading @@ -1530,8 +1857,7 @@ Word16 Q_lsf_tcxlpc_ivas_fx( move16(); } msvq_enc_fx( msvq_enc_lsf_fx64( lsf_codebook[narrowband][cdk], lsf_dims, lsf_offs, Loading Loading @@ -1572,7 +1898,7 @@ Word16 Q_lsf_tcxlpc_ivas_fx( } /* Quantize using extra stage(s) */ msvq_enc_fx( msvq_enc_lsf_fx64( lsf_ind_codebook[narrowband][cdk], lsf_ind_dims, lsf_ind_offs, Loading Loading
lib_com/basop_util.c +20 −0 Original line number Diff line number Diff line Loading @@ -1918,6 +1918,26 @@ Word16 findIndexOfMinWord32( Word32 *x, const Word16 len ) return indx; } Word16 findIndexOfMinWord64( Word64 *x, const Word16 len ) { Word16 i, indx; indx = 0; move16(); FOR( i = 1; i < len; i++ ) { if ( LT_64( x[i], x[indx] ) ) { indx = i; move16(); } } return indx; } Word16 imult1616( Word16 x, Word16 y ) { Loading
lib_com/basop_util.h +1 −0 Original line number Diff line number Diff line Loading @@ -538,6 +538,7 @@ Word16 findIndexOfMinWord16( Word16 *x, const Word16 len ); \return index of min Word32 */ Word16 findIndexOfMinWord32( Word32 *x, const Word16 len ); Word16 findIndexOfMinWord64( Word64 *x, const Word16 len ); /****************************************************************************/ /*! Loading
lib_dec/ivas_jbm_dec_fx.c +3 −3 Original line number Diff line number Diff line Loading @@ -1031,18 +1031,18 @@ ivas_error ivas_jbm_dec_tc_fx( { num_md_sub_frames = ivas_get_spar_dec_md_num_subframes( st_ivas->sba_analysis_order, st_ivas->hDecoderConfig->ivas_total_brate, st_ivas->last_active_ivas_total_brate ); SPAR_DEC_HANDLE hSpar = st_ivas->hSpar; Word16 Q_p_output = 14; move16(); Word16 nchan_transport; // num_bands_out = hSpar->hFbMixer->pFb->filterbank_num_bands; nchan_transport = hSpar->hMdDec->spar_md_cfg.nchan_transport; move16(); nchan_out = nchan_transport; move16(); Word16 Q_p_output = max( 3, sub( L_norm_arr( p_output_fx[sba_ch_idx], imult1616( output_frame, nchan_transport ) ), 1 ) ); FOR( ch = 0; ch < nchan_transport; ch++ ) { Scale_sig32( p_output_fx[sba_ch_idx + ch], output_frame, sub( Q_p_output, Q11 ) ); // Q_p_output Scale_sig32( p_output_fx[sba_ch_idx + ch], output_frame, Q_p_output ); // Q_p_output + Q11 } Q_p_output = add( Q11, Q_p_output ); hSpar->hMdDec->Q_mixer_mat = 31; move16(); Loading
lib_enc/gain_enc_fx.c +10 −7 Original line number Diff line number Diff line Loading @@ -2525,7 +2525,7 @@ void gain_enc_lbr_ivas_fx( /*Ecode = ( dotp( code, code, L_SUBFR ) + 0.01f ) / L_SUBFR; *gain_inov = 1.0f / (float)sqrt(Ecode);*/ L_tmp = Dot_product12( code, code, L_subfr, &exp_code ); L_tmp = Dot_product12( code, code, L_subfr, &exp_code ); /* Q9 + Q9 + 1 + (30-exp_code)*/ L_inov = L_tmp; /* sets to 'L_tmp' in 1 clock */ move32(); /* exp_code: -18 (code in Q9), -6 (/L_SUBFR), -31 (L_tmp Q31->Q0) */ Loading Loading @@ -2599,12 +2599,15 @@ void gain_enc_lbr_ivas_fx( /* gcode0 = (float)pow(10, dotp(b, aux, n_pred) - 0.5f * (float)log10(Ecode)); gcode0 = (float)pow(10, dotp(b, aux, n_pred) - 0.05f * 10 * (float)log10(Ecode)); gcode0 = (float)pow(10, 0.05(20 * dotp(b, aux, n_pred) - 10 * (float)log10(Ecode))); */ exp_code = sub( exp_code, 18 + 6 + 1 ); // Ecode = (Ecode / L_subfr) L_tmp = L_shr( L_tmp, L_subfr_sf ); // Q19 + (Q30-exp_code) /* Calculation for log10(Ecode) exponent for applying log10 = Q31 - q = Q31 - Q19 - Q30 + exp_code = exp_code - Q18*/ L_tmp = BASOP_Util_Log10( L_tmp, sub( exp_code, 18 ) ); // new q = Q25 exp = norm_l( L_tmp ); frac = Log2_norm_lc( L_shl( L_tmp, exp ) ); exp = sub( exp_code, exp ); L_tmp1 = Mpy_32_16( exp, frac, 24660 ); /* Q14 */ /* 10*log10(2) in Q13*/ L_tmp = L_shl( L_tmp, exp ); // Q25 + exp // 10 in Q27 , ( 10 * log10( Ecode ) ) L_tmp1 = Mpy_32_32( L_tmp, 1342177280 ); // Q25 + exp + 1 + Q27 - 32 = Q21 + exp L_tmp1 = L_shr( L_tmp1, add( 7, exp ) ); // Q21 + exp - 7 - exp = Q14 L_tmp = Dot_product( b, aux, n_pred ); /*Q25*/ L_tmp = Mult_32_16( L_tmp, 320 ); /*Q14, 20 in Q4*/ Loading
lib_enc/lsf_msvq_ma_enc_fx.c +329 −3 Original line number Diff line number Diff line Loading @@ -468,6 +468,33 @@ static void depack_sub_values_fx( Word16 *pTmp, const Word16 *p1, const Word16 * } static Word64 depack_mul_values_fx64( Word32 *Tmp, const Word16 *w, const Word16 *cbp, const Word16 N ) { Word16 i, val0, val1, val2, val3; Word64 en; en = 0; move32(); FOR( i = 0; i < N; i += 4 ) { depack_4_values( cbp + i_mult( shr( i, 2 ), 3 ), val0, val1, val2, val3 ) Tmp[i + 0] = L_mult0( w[i + 0], val0 ); // Q8 * Q2.56 move16(); en = W_mac_32_16( en, Tmp[i + 0], val0 ); // Q8 * Q2.56 * 2.56 * Q1 Tmp[i + 1] = L_mult0( w[i + 1], val1 ); move16(); en = W_mac_32_16( en, Tmp[i + 1], val1 ); Tmp[i + 2] = L_mult0( w[i + 2], val2 ); move16(); en = W_mac_32_16( en, Tmp[i + 2], val2 ); Tmp[i + 3] = L_mult0( w[i + 3], val3 ); move16(); en = W_mac_32_16( en, Tmp[i + 3], val3 ); } return en; // Q8 * Q2.56 * 2.56 * Q1 } /*--------------------------------------------------------------------------* * msvq_enc_find_p_max_8() * Loading Loading @@ -522,6 +549,54 @@ static Word16 msvq_enc_find_p_max_8_fx( Word32 dist[] ) } static Word16 msvq_enc_find_p_max_8_fx64( Word64 dist[] ) { Word16 p_max; p_max = 0; move16(); BASOP_SATURATE_WARNING_OFF_EVS if ( GT_64( dist[1], dist[p_max] ) ) { p_max = 1; move16(); } if ( GT_64( dist[2], dist[p_max] ) ) { p_max = 2; move16(); } if ( GT_64( dist[3], dist[p_max] ) ) { p_max = 3; move16(); } if ( GT_64( dist[4], dist[p_max] ) ) { p_max = 4; move16(); } if ( GT_64( dist[5], dist[p_max] ) ) { p_max = 5; move16(); } if ( GT_64( dist[6], dist[p_max] ) ) { p_max = 6; move16(); } if ( GT_64( dist[7], dist[p_max] ) ) { p_max = 7; move16(); } BASOP_SATURATE_WARNING_ON_EVS return p_max; } /*--------------------------------------------------------------------------* * msvq_enc_find_p_max_6() * Loading Loading @@ -565,6 +640,43 @@ static Word16 msvq_enc_find_p_max_6_fx( Word32 dist[] ) return p_max; } static Word16 msvq_enc_find_p_max_6_fx64( Word64 dist[] ) { Word16 p_max; p_max = 0; move16(); BASOP_SATURATE_WARNING_OFF_EVS if ( GT_64( dist[1], dist[p_max] ) ) { p_max = 1; move16(); } if ( GT_64( dist[2], dist[p_max] ) ) { p_max = 2; move16(); } if ( GT_64( dist[3], dist[p_max] ) ) { p_max = 3; move16(); } if ( GT_64( dist[4], dist[p_max] ) ) { p_max = 4; move16(); } if ( GT_64( dist[5], dist[p_max] ) ) { p_max = 5; move16(); } BASOP_SATURATE_WARNING_ON_EVS return p_max; } /*--------------------------------------------------------------------------* * msvq_enc_fx() Loading Loading @@ -787,7 +899,222 @@ void msvq_enc_fx( return; } void msvq_enc_lsf_fx64( const Word16 *const *cb, /* i : Codebook (indexed cb[*stages][levels][p]) (10Q5 * 1.28) */ const Word16 dims[], /* i : Dimension of each codebook stage (NULL: full dim.) */ const Word16 offs[], /* i : Starting dimension of each codebook stage (NULL: 0) */ const Word16 u[], /* i : Vector to be encoded (prediction and mean removed)(Q14Q1*1.28) */ const Word16 *levels, /* i : Number of levels in each stage */ const Word16 maxC, /* i : Tree search size (number of candidates kept from */ /* one stage to the next == M-best) */ const Word16 stages, /* i : Number of stages */ const Word16 w[], /* i : Weights Q8*/ const Word16 N, /* i : Vector dimension */ const Word16 maxN, /* i : Codebook dimension */ Word16 Idx[] /* o : Indices */ ) { Word16 j; const Word16 *cbp; Word16 p2i; Word16 resid_buf[2 * LSFMBEST_MAX * M_MAX], *resid[2]; Word16 *pTmp, *p1; Word16 *indices[2], m, s, c, c2, p_max, i; Word32 Tmp32[M_MAX]; Word16 idx_buf[2 * LSFMBEST_MAX * MAX_VQ_STAGES_USED], parents[LSFMBEST_MAX]; Word64 *dist_64[2], en64, tmp64; Word64 dist_buf_64[2 * LSFMBEST_MAX]; Word16 ( *func_ptr64 )( Word64 * ); Word16 N34; Word16 n, maxn, start; /*----------------------------------------------------------------* * Allocate memory for previous (parent) and current nodes. * Parent node is indexed [0], current node is indexed [1]. *----------------------------------------------------------------*/ indices[0] = idx_buf; indices[1] = idx_buf + maxC * stages; /*move16();*/ /*vr_iset(0, idx_buf, 2*stages*maxC);*/ set16_fx( idx_buf, 0, (Word16) ( 2 * stages * maxC ) ); resid[0] = resid_buf; resid[1] = resid_buf + maxC * N; /*move16();*/ dist_64[0] = dist_buf_64; dist_64[1] = dist_buf_64 + maxC; /*move16();*/ /*vr_iset(0, parents, maxC);*/ set16_fx( parents, 0, maxC ); func_ptr64 = msvq_enc_find_p_max_6_fx64; move16(); if ( EQ_16( maxC, 8 ) ) { func_ptr64 = msvq_enc_find_p_max_8_fx64; move16(); } /*----------------------------------------------------------------* * LSF weights are normalized, so it is always better to multiply it first * Set up inital distance vector *----------------------------------------------------------------*/ /* Q0/16 * Qw_norm/16 << 1 >> 16 => Qwnorm-15/16 * Q0/16 << 1 => Qwnorm-14/32 * 6.5536 */ Word64 ss2_64; ss2_64 = W_mult_32_16( L_mult0( u[0], w[0] ), u[0] ); // Q8 * Q2.56 * 2.56 * Q1 FOR( j = 1; j < N; j++ ) { ss2_64 = W_mac_32_16( ss2_64, L_mult0( u[j], w[j] ), u[j] ); } /* Set up inital error (residual) vectors */ pTmp = resid[1]; /*move16();*/ FOR( c = 0; c < maxC; c++ ) { Copy( u, pTmp + c * N, N ); dist_64[1][c] = ss2_64; move64(); } /* Loop over all stages */ m = 1; move16(); FOR( s = 0; s < stages; s++ ) { /* codebook pointer is set to point to first stage */ cbp = cb[s]; /*3Q12*1.28*/ move16(); /* Set up pointers to parent and current nodes */ swap( indices[0], indices[1], Word16 * ); move16(); move16(); move16(); move16(); swap( resid[0], resid[1], Word16 * ); move16(); move16(); move16(); swap( dist_64[0], dist_64[1], Word64 * ); move64(); move64(); move64(); /* p_max points to maximum distortion node (worst of best) */ p_max = 0; move16(); n = N; move16(); maxn = maxN; move16(); if ( dims ) { n = dims[s]; move16(); } if ( dims ) { maxn = n; move16(); } assert( ( maxn % 4 ) == 0 ); N34 = mult( maxn, 24576 /*0.75f Q15*/ ); start = 0; move16(); if ( offs ) { start = offs[s]; move16(); } set32_fx( Tmp32, 0, start ); set32_fx( Tmp32 + start + n, 0, sub( N, add( start, n ) ) ); /* Set distortions to a large value */ FOR( j = 0; j < maxC; j++ ) { dist_64[1][j] = LLONG_MAX; move64(); } FOR( j = 0; j < levels[s]; j++ ) { /* Compute weighted codebook element and its energy */ en64 = depack_mul_values_fx64( Tmp32 + start, w + start, cbp, n ); // Q8 // en64: Q8 * Q2.56 * Q2.56 * q1 // Tmp: 2.56 * Q8 cbp += N34; /* pointer is incremented */ /* Iterate over all parent nodes */ FOR( c = 0; c < m; c++ ) { pTmp = &resid[0][c * N]; // this resid buffer is initial lsf values /*tmp = (*pTmp++) * Tmp[0];*/ Word64 t164 = 0; move64(); t164 = W_mult_32_16( Tmp32[0], pTmp[0] ); // 2.56 * Q8 * Q2.56 * Q1 // Tmp32: Q8 * Q2.56 FOR( i = 1; i < N; i++ ) { t164 = W_mac_32_16( t164, Tmp32[i], pTmp[i] ); // 2.56 * Q8 * Q2.56 * Q1 } tmp64 = W_add( dist_64[0][c], W_sub( en64, W_shl( t164, 1 ) ) ); t164 = W_sub( tmp64, dist_64[1][p_max] ); IF( t164 <= 0 ) { /* Replace worst */ dist_64[1][p_max] = tmp64; move64(); indices[1][p_max * stages + s] = j; move16(); parents[p_max] = c; move16(); p_max = ( *func_ptr64 )( dist_64[1] ); } /*IF (L_sub(tmp,dist[1][p_max]) < 0) */ } /* FOR (c=0; c<m; c++) */ } /* FOR (j=0; j<levels[s]; j++) */ /*------------------------------------------------------------* * Compute error vectors for each node *------------------------------------------------------------*/ pTmp = resid[1]; FOR( c = 0; c < maxC; c++ ) { /* Subtract codebook entry from residual vector of parent node and multiply with scale factor */ p1 = resid[0] + parents[c] * N; p2i = indices[1][c * stages + s]; move16(); Copy( p1, pTmp, start ); depack_sub_values_fx( pTmp + start, p1 + start, &cb[s][p2i * N34], n ); Copy( p1 + start + n, pTmp + start + n, sub( N, add( start, n ) ) ); pTmp += N; /* Get indices that were used for parent node */ /*mvs2s(indices[0]+parents[c]*stages, indices[1]+c*stages, s);*/ Copy( indices[0] + parents[c] * stages, indices[1] + c * stages, s ); } /* for (c=0; c<maxC; c++) */ m = maxC; move16(); } /* for (m=1, s=0; s<stages; s++) */ /* Find the optimum candidate */ c2 = findIndexOfMinWord64( dist_64[1], maxC ); /*mvi2i (indices[1]+c2*stages, Idx, stages);*/ Copy( indices[1] + c2 * stages, Idx, stages ); return; } /*--------------------------------------------------------------------------* * msvq_enc_ivas_fx() * Loading Loading @@ -1530,8 +1857,7 @@ Word16 Q_lsf_tcxlpc_ivas_fx( move16(); } msvq_enc_fx( msvq_enc_lsf_fx64( lsf_codebook[narrowband][cdk], lsf_dims, lsf_offs, Loading Loading @@ -1572,7 +1898,7 @@ Word16 Q_lsf_tcxlpc_ivas_fx( } /* Quantize using extra stage(s) */ msvq_enc_fx( msvq_enc_lsf_fx64( lsf_ind_codebook[narrowband][cdk], lsf_ind_dims, lsf_ind_offs, Loading