Commit 8a686876 authored by Fabian Bauer's avatar Fabian Bauer
Browse files

Merge branch 'main' into 1326-complexity-issue-ism-4-32-kbps-fb-to-binaural-basop

parents 667d008a 6a3fdbb9
Loading
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -4706,6 +4706,8 @@ Word32 dot_product_cholesky_fx(
    const Word32 *A, /* i  : Cholesky  matrix A              */
    const Word16 N   /* i  : vector & matrix size            */
);

#ifndef DOT_PROD_CHOLESKY_64BIT
Word32 dot_product_cholesky_fixed(
    const Word32 *x, /* i  : vector x                        */
    const Word32 *A, /* i  : Cholesky  matrix A              */
@@ -4713,6 +4715,13 @@ Word32 dot_product_cholesky_fixed(
    const Word16 exp_x,
    const Word16 exp_A,
    Word16 *exp_sum );
#else
Word64 dot_product_cholesky_fixed(
    const Word32 *x, /* i  : vector x                        */
    const Word32 *A, /* i  : Cholesky  matrix A              */
    const Word16 N   /* i  : vector & matrix size            */
);
#endif

void v_mult_mat_fx(
    Word32 *y_fx, /* o  : the product x*A                         */
+39 −0
Original line number Diff line number Diff line
@@ -606,6 +606,7 @@ void v_sub32_fx(
 * Therefore, S=A*A' where A is upper triangular matrix of size (m*m+m)/2 (zeros ommitted, column-wise)
 *---------------------------------------------------------------------*/

#ifndef DOT_PROD_CHOLESKY_64BIT
/*! r: the dot product x'*A*A'*x */
Word32 dot_product_cholesky_fixed(
    const Word32 *x, /* i  : vector x                        Q31 - exp_x*/
@@ -642,6 +643,44 @@ Word32 dot_product_cholesky_fixed(

    return suma;
}
#else
/*! r: the dot product x'*A*A'*x */
Word64 dot_product_cholesky_fixed(
    const Word32 *x, /* i  : vector x                        Q31 - exp_x*/
    const Word32 *A, /* i  : Cholesky  matrix A              Q31 - exp_A*/
    const Word16 N   /* i  : vector & matrix size            Q0*/
)
{
    Word16 i, j;
    Word64 suma, tmp_sum;
    Word32 mul;
    Word32 tmp;
    const Word32 *pt_x, *pt_A;
    pt_A = A;
    suma = 0;
    move64();

    FOR( i = 0; i < N; i++ )
    {
        tmp_sum = 0;
        move32();
        pt_x = x;

        FOR( j = 0; j <= i; j++ )
        {
            mul = Mpy_32_32( *pt_x++, *pt_A++ );
            tmp_sum = W_add( tmp_sum, W_deposit32_l( mul ) );
        }

        tmp_sum = W_shr( tmp_sum, 4 ); // to make sure that the tmp_sum will not overflow
        tmp = W_extract_l( tmp_sum );
        suma = W_mac_32_32( suma, tmp, tmp );
    }

    return suma;
}
#endif

void v_mult_mat_fixed(
    Word32 *y,       /* o  : the product x*A               Qx - guardbits*/
    const Word32 *x, /* i  : vector x                      Qx*/
+1 −0
Original line number Diff line number Diff line
@@ -173,4 +173,5 @@
#define NONBE_FIX_1277_EVS_DTX_HIGH_RATE_THRESHOLD      /* VA/Eri: FLP issue 1277: Fix Mismatch in DTX high-rate threshold between EVS float and BASOP */
#define NONBE_FIX_708_OSBA_BR_SWITCHING_CRASH   /* FhG: issue 708: fix crash in OSBA BR switching with long test vectors */
//#define OPT_STEREO_32KBPS_V1                    /* Optimization made in stereo decoding path for 32kbps decoding */
#define DOT_PROD_CHOLESKY_64BIT                 /* FhG: Issue 1323, optimized 64 bit implementation of dot_product_cholesky() */
#endif
+22 −1
Original line number Diff line number Diff line
@@ -1683,10 +1683,16 @@ Word16 ivas_smc_gmm_fx(
    Word16 flag_odv;
    Word32 lps_fx, lpm_fx, lpn_fx;
    Word32 ps_fx[N_SMC_MIXTURES], pm_fx[N_SMC_MIXTURES], pn_fx[N_SMC_MIXTURES];
#ifndef DOT_PROD_CHOLESKY_64BIT
    Word32 lprob_fx;
    Word16 lprob_exp = 0;
#else
    Word64 wprob_fx;
#endif
    Word32 fvm_fx[N_PCA_COEF];
#ifndef DOT_PROD_CHOLESKY_64BIT
    Word16 fvm_exp = 0;
#endif
    Word32 sum_PS_fx, ps_diff_fx, ps_sta_fx;
    Word32 dlp_fx, wrelE_fx, wdrop_fx, wght_fx;
    Word32 wrise_fx;
@@ -2273,23 +2279,38 @@ Word16 ivas_smc_gmm_fx(
    FOR( m = 0; m < N_SMC_MIXTURES; m++ )
    {
        v_sub32_fx( FV_fx, &means_speech_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF );
#ifndef DOT_PROD_CHOLESKY_64BIT
        fvm_exp = sub( 31, Qfact_FV );
        lprob_exp = 0;
        move16();
        lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp );
        ps_fx[m] = L_sub( L_sub( L_add( log_weights_speech_compute[m], log_det_chol_speech_fx[m] ), L_shl( lprob_fx, sub( Q18 - 1, sub( Q31, lprob_exp ) ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
#else
        wprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );                 // Q10
        ps_fx[m] = L_sub( L_sub( L_add( log_weights_speech_compute[m], log_det_chol_speech_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
#endif
        move32();
        v_sub32_fx( FV_fx, &means_music_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF );
#ifndef DOT_PROD_CHOLESKY_64BIT
        lprob_exp = 0;
        move16();
        lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp );
        pm_fx[m] = L_sub( L_sub( L_add( log_weights_music_compute[m], log_det_chol_music_fx[m] ), L_shl( lprob_fx, sub( Q18 - 1, sub( Q31, lprob_exp ) ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
#else
        wprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );                  // Q10
        pm_fx[m] = L_sub( L_sub( L_add( log_weights_music_compute[m], log_det_chol_music_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 );   // Q18
#endif
        move32();
        v_sub32_fx( FV_fx, &means_noise_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF );
#ifndef DOT_PROD_CHOLESKY_64BIT
        lprob_exp = 0;
        move16();
        lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp );
        pn_fx[m] = L_sub( L_sub( L_add( log_weights_noise_compute[m], log_det_chol_noise_fx[m] ), L_shl( lprob_fx, sub( Q18 - 1, sub( Q31, lprob_exp ) ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
#else
        wprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );                  // Q10
        pn_fx[m] = L_sub( L_sub( L_add( log_weights_noise_compute[m], log_det_chol_noise_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 );   // Q18
#endif
        move32();
    }