Commit df4ab5e6 authored by thomas dettbarn's avatar thomas dettbarn
Browse files

replaced the costly basop_util_mant2exp() function with a 64 bit addtion....

replaced the costly basop_util_mant2exp() function with a 64 bit addtion. there is a potential overflow which needs to be adressed first.
parent 4ac67249
Loading
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -4714,6 +4714,12 @@ Word32 dot_product_cholesky_fixed(
    const Word16 exp_A,
    Word16 *exp_sum );

Word64 dot_product_cholesky_fixed64(
    const Word32 *x, /* i  : vector x                        */
    const Word32 *A, /* i  : Cholesky  matrix A              */
    const Word16 N   /* i  : vector & matrix size            */
);

void v_mult_mat_fx(
    Word32 *y_fx, /* o  : the product x*A                         */
    Word16 *y_q_fx,
+39 −0
Original line number Diff line number Diff line
@@ -642,6 +642,45 @@ Word32 dot_product_cholesky_fixed(

    return suma;
}
/*---------------------------------------------------------------------*
 * dot_product_cholesky()
 *
 * Calculates dot product of type x'*A*A'*x, where x is column vector of size m,
 * and A is a Cholesky decomposition of some Hermitian matrix S whose size is m*m.
 * Therefore, S=A*A' where A is upper triangular matrix of size (m*m+m)/2 (zeros ommitted, column-wise)
 *---------------------------------------------------------------------*/

/*! r: the dot product x'*A*A'*x */
Word64 dot_product_cholesky_fixed64(
    const Word32 *x, /* i  : vector x                        Q31 - exp_x*/
    const Word32 *A, /* i  : Cholesky  matrix A              Q31 - exp_A*/
    const Word16 N  /* i  : vector & matrix size            Q0*/
)
{
    Word16 i, j;
    Word64 suma, tmp_sum;
    Word32 mul;
    const Word32 *pt_x, *pt_A;
    pt_A = A;
    suma = 0;
    move32();
    FOR( i = 0; i < N; i++ )
    {
        tmp_sum = 0;
        move32();
        pt_x = x;

        FOR( j = 0; j <= i; j++ )
        {
            mul = Mpy_32_32( *pt_x++, *pt_A++ );
            tmp_sum = W_add( tmp_sum, W_deposit32_l( mul ) );
        }

        suma = W_mac_32_32( suma, tmp_sum, tmp_sum );	// TODO: make sure that this does not overflow. 
    }

    return suma;
}
void v_mult_mat_fixed(
    Word32 *y,       /* o  : the product x*A               Qx - guardbits*/
    const Word32 *x, /* i  : vector x                      Qx*/
+7 −16
Original line number Diff line number Diff line
@@ -1683,10 +1683,8 @@ Word16 ivas_smc_gmm_fx(
    Word16 flag_odv;
    Word32 lps_fx, lpm_fx, lpn_fx;
    Word32 ps_fx[N_SMC_MIXTURES], pm_fx[N_SMC_MIXTURES], pn_fx[N_SMC_MIXTURES];
    Word32 lprob_fx;
    Word16 lprob_exp = 0;
    Word64 wprob_fx;
    Word32 fvm_fx[N_PCA_COEF];
    Word16 fvm_exp = 0;
    Word32 sum_PS_fx, ps_diff_fx, ps_sta_fx;
    Word32 dlp_fx, wrelE_fx, wdrop_fx, wght_fx;
    Word32 wrise_fx;
@@ -2273,23 +2271,16 @@ Word16 ivas_smc_gmm_fx(
    FOR( m = 0; m < N_SMC_MIXTURES; m++ )
    {
        v_sub32_fx( FV_fx, &means_speech_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF );
        fvm_exp = sub( 31, Qfact_FV );
        lprob_exp = 0;
        move16();
        lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp );
        ps_fx[m] = L_sub( L_sub( L_add( log_weights_speech_compute[m], log_det_chol_speech_fx[m] ), L_shl( lprob_fx, sub( Q18 - 1, sub( Q31, lprob_exp ) ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
        wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );
        ps_fx[m] = L_sub( L_sub( L_add( log_weights_speech_compute[m], log_det_chol_speech_fx[m] ), W_shr( wprob_fx, Q18 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
        move32();
        v_sub32_fx( FV_fx, &means_music_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF );
        lprob_exp = 0;
        move16();
        lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp );
        pm_fx[m] = L_sub( L_sub( L_add( log_weights_music_compute[m], log_det_chol_music_fx[m] ), L_shl( lprob_fx, sub( Q18 - 1, sub( Q31, lprob_exp ) ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
        wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );
        pm_fx[m] = L_sub( L_sub( L_add( log_weights_music_compute[m], log_det_chol_music_fx[m] ), W_shr( wprob_fx, Q18 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
        move32();
        v_sub32_fx( FV_fx, &means_noise_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF );
        lprob_exp = 0;
        move16();
        lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp );
        pn_fx[m] = L_sub( L_sub( L_add( log_weights_noise_compute[m], log_det_chol_noise_fx[m] ), L_shl( lprob_fx, sub( Q18 - 1, sub( Q31, lprob_exp ) ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
        wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );
        pn_fx[m] = L_sub( L_sub( L_add( log_weights_noise_compute[m], log_det_chol_noise_fx[m] ), W_shr( wprob_fx, Q18 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
        move32();
    }