Loading lib_com/ivas_prot_fx.h +5 −2 Original line number Diff line number Diff line Loading @@ -4706,6 +4706,8 @@ Word32 dot_product_cholesky_fx( const Word32 *A, /* i : Cholesky matrix A */ const Word16 N /* i : vector & matrix size */ ); #ifndef DOT_PROD_CHOLESKY_64BIT Word32 dot_product_cholesky_fixed( const Word32 *x, /* i : vector x */ const Word32 *A, /* i : Cholesky matrix A */ Loading @@ -4713,12 +4715,13 @@ Word32 dot_product_cholesky_fixed( const Word16 exp_x, const Word16 exp_A, Word16 *exp_sum ); Word64 dot_product_cholesky_fixed64( #else Word64 dot_product_cholesky_fixed( const Word32 *x, /* i : vector x */ const Word32 *A, /* i : Cholesky matrix A */ const Word16 N /* i : vector & matrix size */ ); #endif void v_mult_mat_fx( Word32 *y_fx, /* o : the product x*A */ Loading lib_com/ivas_tools.c +4 −10 Original line number Diff line number Diff line Loading @@ -606,6 +606,7 @@ void v_sub32_fx( * Therefore, S=A*A' where A is upper triangular matrix of size (m*m+m)/2 (zeros ommitted, column-wise) *---------------------------------------------------------------------*/ #ifndef DOT_PROD_CHOLESKY_64BIT /*! r: the dot product x'*A*A'*x */ Word32 dot_product_cholesky_fixed( const Word32 *x, /* i : vector x Q31 - exp_x*/ Loading Loading @@ -642,17 +643,9 @@ Word32 dot_product_cholesky_fixed( return suma; } /*---------------------------------------------------------------------* * dot_product_cholesky() * * Calculates dot product of type x'*A*A'*x, where x is column vector of size m, * and A is a Cholesky decomposition of some Hermitian matrix S whose size is m*m. * Therefore, S=A*A' where A is upper triangular matrix of size (m*m+m)/2 (zeros ommitted, column-wise) *---------------------------------------------------------------------*/ #else /*! r: the dot product x'*A*A'*x */ Word64 dot_product_cholesky_fixed64( Word64 dot_product_cholesky_fixed( const Word32 *x, /* i : vector x Q31 - exp_x*/ const Word32 *A, /* i : Cholesky matrix A Q31 - exp_A*/ const Word16 N /* i : vector & matrix size Q0*/ Loading Loading @@ -686,6 +679,7 @@ Word64 dot_product_cholesky_fixed64( return suma; } #endif void v_mult_mat_fixed( Word32 *y, /* o : the product x*A Qx - guardbits*/ Loading lib_com/options.h +1 −0 Original line number Diff line number Diff line Loading @@ -169,4 +169,5 @@ #define FIX_1298 /* VA: fix possible assert in gaus_enc */ #define FIX_1300_ICA_SHIFT_QUANT_IMPROV /* VA: Fix to 1300 to improve precision of the lag quantizer */ #define FIX_1301_CORRECT_TD_CNST /* VA: Fix 1301, correct wrong constant in TD stereo */ #define DOT_PROD_CHOLESKY_64BIT /* FhG: Issue 1323, optimized 64 bit implementation of dot_product_cholesky() */ #endif lib_enc/speech_music_classif_fx.c +33 −3 Original line number Diff line number Diff line Loading @@ -1683,8 +1683,16 @@ Word16 ivas_smc_gmm_fx( Word16 flag_odv; Word32 lps_fx, lpm_fx, lpn_fx; Word32 ps_fx[N_SMC_MIXTURES], pm_fx[N_SMC_MIXTURES], pn_fx[N_SMC_MIXTURES]; #ifndef DOT_PROD_CHOLESKY_64BIT Word32 lprob_fx; Word16 lprob_exp = 0; #else Word64 wprob_fx; #endif Word32 fvm_fx[N_PCA_COEF]; #ifndef DOT_PROD_CHOLESKY_64BIT Word16 fvm_exp = 0; #endif Word32 sum_PS_fx, ps_diff_fx, ps_sta_fx; Word32 dlp_fx, wrelE_fx, wdrop_fx, wght_fx; Word32 wrise_fx; Loading Loading @@ -2271,16 +2279,38 @@ Word16 ivas_smc_gmm_fx( FOR( m = 0; m < N_SMC_MIXTURES; m++ ) { v_sub32_fx( FV_fx, &means_speech_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF ); wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 #ifndef DOT_PROD_CHOLESKY_64BIT fvm_exp = sub( 31, Qfact_FV ); lprob_exp = 0; move16(); lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp ); ps_fx[m] = L_sub( L_sub( L_add( log_weights_speech_compute[m], log_det_chol_speech_fx[m] ), L_shl( lprob_fx, sub( Q18 - 1, sub( Q31, lprob_exp ) ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 #else wprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 ps_fx[m] = L_sub( L_sub( L_add( log_weights_speech_compute[m], log_det_chol_speech_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 #endif move32(); v_sub32_fx( FV_fx, &means_music_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF ); wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 #ifndef DOT_PROD_CHOLESKY_64BIT lprob_exp = 0; move16(); lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp ); pm_fx[m] = L_sub( L_sub( L_add( log_weights_music_compute[m], log_det_chol_music_fx[m] ), L_shl( lprob_fx, sub( Q18 - 1, sub( Q31, lprob_exp ) ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 #else wprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 pm_fx[m] = L_sub( L_sub( L_add( log_weights_music_compute[m], log_det_chol_music_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 #endif move32(); v_sub32_fx( FV_fx, &means_noise_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF ); wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 #ifndef DOT_PROD_CHOLESKY_64BIT lprob_exp = 0; move16(); lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp ); pn_fx[m] = L_sub( L_sub( L_add( log_weights_noise_compute[m], log_det_chol_noise_fx[m] ), L_shl( lprob_fx, sub( Q18 - 1, sub( Q31, lprob_exp ) ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 #else wprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 pn_fx[m] = L_sub( L_sub( L_add( log_weights_noise_compute[m], log_det_chol_noise_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 #endif move32(); } Loading Loading
lib_com/ivas_prot_fx.h +5 −2 Original line number Diff line number Diff line Loading @@ -4706,6 +4706,8 @@ Word32 dot_product_cholesky_fx( const Word32 *A, /* i : Cholesky matrix A */ const Word16 N /* i : vector & matrix size */ ); #ifndef DOT_PROD_CHOLESKY_64BIT Word32 dot_product_cholesky_fixed( const Word32 *x, /* i : vector x */ const Word32 *A, /* i : Cholesky matrix A */ Loading @@ -4713,12 +4715,13 @@ Word32 dot_product_cholesky_fixed( const Word16 exp_x, const Word16 exp_A, Word16 *exp_sum ); Word64 dot_product_cholesky_fixed64( #else Word64 dot_product_cholesky_fixed( const Word32 *x, /* i : vector x */ const Word32 *A, /* i : Cholesky matrix A */ const Word16 N /* i : vector & matrix size */ ); #endif void v_mult_mat_fx( Word32 *y_fx, /* o : the product x*A */ Loading
lib_com/ivas_tools.c +4 −10 Original line number Diff line number Diff line Loading @@ -606,6 +606,7 @@ void v_sub32_fx( * Therefore, S=A*A' where A is upper triangular matrix of size (m*m+m)/2 (zeros ommitted, column-wise) *---------------------------------------------------------------------*/ #ifndef DOT_PROD_CHOLESKY_64BIT /*! r: the dot product x'*A*A'*x */ Word32 dot_product_cholesky_fixed( const Word32 *x, /* i : vector x Q31 - exp_x*/ Loading Loading @@ -642,17 +643,9 @@ Word32 dot_product_cholesky_fixed( return suma; } /*---------------------------------------------------------------------* * dot_product_cholesky() * * Calculates dot product of type x'*A*A'*x, where x is column vector of size m, * and A is a Cholesky decomposition of some Hermitian matrix S whose size is m*m. * Therefore, S=A*A' where A is upper triangular matrix of size (m*m+m)/2 (zeros ommitted, column-wise) *---------------------------------------------------------------------*/ #else /*! r: the dot product x'*A*A'*x */ Word64 dot_product_cholesky_fixed64( Word64 dot_product_cholesky_fixed( const Word32 *x, /* i : vector x Q31 - exp_x*/ const Word32 *A, /* i : Cholesky matrix A Q31 - exp_A*/ const Word16 N /* i : vector & matrix size Q0*/ Loading Loading @@ -686,6 +679,7 @@ Word64 dot_product_cholesky_fixed64( return suma; } #endif void v_mult_mat_fixed( Word32 *y, /* o : the product x*A Qx - guardbits*/ Loading
lib_com/options.h +1 −0 Original line number Diff line number Diff line Loading @@ -169,4 +169,5 @@ #define FIX_1298 /* VA: fix possible assert in gaus_enc */ #define FIX_1300_ICA_SHIFT_QUANT_IMPROV /* VA: Fix to 1300 to improve precision of the lag quantizer */ #define FIX_1301_CORRECT_TD_CNST /* VA: Fix 1301, correct wrong constant in TD stereo */ #define DOT_PROD_CHOLESKY_64BIT /* FhG: Issue 1323, optimized 64 bit implementation of dot_product_cholesky() */ #endif
lib_enc/speech_music_classif_fx.c +33 −3 Original line number Diff line number Diff line Loading @@ -1683,8 +1683,16 @@ Word16 ivas_smc_gmm_fx( Word16 flag_odv; Word32 lps_fx, lpm_fx, lpn_fx; Word32 ps_fx[N_SMC_MIXTURES], pm_fx[N_SMC_MIXTURES], pn_fx[N_SMC_MIXTURES]; #ifndef DOT_PROD_CHOLESKY_64BIT Word32 lprob_fx; Word16 lprob_exp = 0; #else Word64 wprob_fx; #endif Word32 fvm_fx[N_PCA_COEF]; #ifndef DOT_PROD_CHOLESKY_64BIT Word16 fvm_exp = 0; #endif Word32 sum_PS_fx, ps_diff_fx, ps_sta_fx; Word32 dlp_fx, wrelE_fx, wdrop_fx, wght_fx; Word32 wrise_fx; Loading Loading @@ -2271,16 +2279,38 @@ Word16 ivas_smc_gmm_fx( FOR( m = 0; m < N_SMC_MIXTURES; m++ ) { v_sub32_fx( FV_fx, &means_speech_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF ); wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 #ifndef DOT_PROD_CHOLESKY_64BIT fvm_exp = sub( 31, Qfact_FV ); lprob_exp = 0; move16(); lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp ); ps_fx[m] = L_sub( L_sub( L_add( log_weights_speech_compute[m], log_det_chol_speech_fx[m] ), L_shl( lprob_fx, sub( Q18 - 1, sub( Q31, lprob_exp ) ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 #else wprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 ps_fx[m] = L_sub( L_sub( L_add( log_weights_speech_compute[m], log_det_chol_speech_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 #endif move32(); v_sub32_fx( FV_fx, &means_music_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF ); wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 #ifndef DOT_PROD_CHOLESKY_64BIT lprob_exp = 0; move16(); lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp ); pm_fx[m] = L_sub( L_sub( L_add( log_weights_music_compute[m], log_det_chol_music_fx[m] ), L_shl( lprob_fx, sub( Q18 - 1, sub( Q31, lprob_exp ) ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 #else wprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 pm_fx[m] = L_sub( L_sub( L_add( log_weights_music_compute[m], log_det_chol_music_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 #endif move32(); v_sub32_fx( FV_fx, &means_noise_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF ); wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 #ifndef DOT_PROD_CHOLESKY_64BIT lprob_exp = 0; move16(); lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp ); pn_fx[m] = L_sub( L_sub( L_add( log_weights_noise_compute[m], log_det_chol_noise_fx[m] ), L_shl( lprob_fx, sub( Q18 - 1, sub( Q31, lprob_exp ) ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 #else wprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 pn_fx[m] = L_sub( L_sub( L_add( log_weights_noise_compute[m], log_det_chol_noise_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 #endif move32(); } Loading