Loading lib_com/ivas_prot_fx.h +9 −0 Original line number Diff line number Diff line Loading @@ -4706,6 +4706,8 @@ Word32 dot_product_cholesky_fx( const Word32 *A, /* i : Cholesky matrix A */ const Word16 N /* i : vector & matrix size */ ); #ifndef DOT_PROD_CHOLESKY_64BIT Word32 dot_product_cholesky_fixed( const Word32 *x, /* i : vector x */ const Word32 *A, /* i : Cholesky matrix A */ Loading @@ -4713,6 +4715,13 @@ Word32 dot_product_cholesky_fixed( const Word16 exp_x, const Word16 exp_A, Word16 *exp_sum ); #else Word64 dot_product_cholesky_fixed( const Word32 *x, /* i : vector x */ const Word32 *A, /* i : Cholesky matrix A */ const Word16 N /* i : vector & matrix size */ ); #endif void v_mult_mat_fx( Word32 *y_fx, /* o : the product x*A */ Loading lib_com/ivas_tools_fx.c +39 −0 Original line number Diff line number Diff line Loading @@ -606,6 +606,7 @@ void v_sub32_fx( * Therefore, S=A*A' where A is upper triangular matrix of size (m*m+m)/2 (zeros ommitted, column-wise) *---------------------------------------------------------------------*/ #ifndef DOT_PROD_CHOLESKY_64BIT /*! r: the dot product x'*A*A'*x */ Word32 dot_product_cholesky_fixed( const Word32 *x, /* i : vector x Q31 - exp_x*/ Loading Loading @@ -642,6 +643,44 @@ Word32 dot_product_cholesky_fixed( return suma; } #else /*! r: the dot product x'*A*A'*x */ Word64 dot_product_cholesky_fixed( const Word32 *x, /* i : vector x Q31 - exp_x*/ const Word32 *A, /* i : Cholesky matrix A Q31 - exp_A*/ const Word16 N /* i : vector & matrix size Q0*/ ) { Word16 i, j; Word64 suma, tmp_sum; Word32 mul; Word32 tmp; const Word32 *pt_x, *pt_A; pt_A = A; suma = 0; move64(); FOR( i = 0; i < N; i++ ) { tmp_sum = 0; move32(); pt_x = x; FOR( j = 0; j <= i; j++ ) { mul = Mpy_32_32( *pt_x++, *pt_A++ ); tmp_sum = W_add( tmp_sum, W_deposit32_l( mul ) ); } tmp_sum = W_shr( tmp_sum, 4 ); // to make sure that the tmp_sum will not overflow tmp = W_extract_l( tmp_sum ); suma = W_mac_32_32( suma, tmp, tmp ); } return suma; } #endif void v_mult_mat_fixed( Word32 *y, /* o : the product x*A Qx - guardbits*/ const Word32 *x, /* i : vector x Qx*/ Loading lib_com/options.h +1 −0 Original line number Diff line number Diff line Loading @@ -173,4 +173,5 @@ #define NONBE_FIX_1277_EVS_DTX_HIGH_RATE_THRESHOLD /* VA/Eri: FLP issue 1277: Fix Mismatch in DTX high-rate threshold between EVS float and BASOP */ #define NONBE_FIX_708_OSBA_BR_SWITCHING_CRASH /* FhG: issue 708: fix crash in OSBA BR switching with long test vectors */ //#define OPT_STEREO_32KBPS_V1 /* Optimization made in stereo decoding path for 32kbps decoding */ #define DOT_PROD_CHOLESKY_64BIT /* FhG: Issue 1323, optimized 64 bit implementation of dot_product_cholesky() */ #endif lib_enc/speech_music_classif_fx.c +22 −1 Original line number Diff line number Diff line Loading @@ -1683,10 +1683,16 @@ Word16 ivas_smc_gmm_fx( Word16 flag_odv; Word32 lps_fx, lpm_fx, lpn_fx; Word32 ps_fx[N_SMC_MIXTURES], pm_fx[N_SMC_MIXTURES], pn_fx[N_SMC_MIXTURES]; #ifndef DOT_PROD_CHOLESKY_64BIT Word32 lprob_fx; Word16 lprob_exp = 0; #else Word64 wprob_fx; #endif Word32 fvm_fx[N_PCA_COEF]; #ifndef DOT_PROD_CHOLESKY_64BIT Word16 fvm_exp = 0; #endif Word32 sum_PS_fx, ps_diff_fx, ps_sta_fx; Word32 dlp_fx, wrelE_fx, wdrop_fx, wght_fx; Word32 wrise_fx; Loading Loading @@ -2273,23 +2279,38 @@ Word16 ivas_smc_gmm_fx( FOR( m = 0; m < N_SMC_MIXTURES; m++ ) { v_sub32_fx( FV_fx, &means_speech_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF ); #ifndef DOT_PROD_CHOLESKY_64BIT fvm_exp = sub( 31, Qfact_FV ); lprob_exp = 0; move16(); lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp ); ps_fx[m] = L_sub( L_sub( L_add( log_weights_speech_compute[m], log_det_chol_speech_fx[m] ), L_shl( lprob_fx, sub( Q18 - 1, sub( Q31, lprob_exp ) ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 #else wprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 ps_fx[m] = L_sub( L_sub( L_add( log_weights_speech_compute[m], log_det_chol_speech_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 #endif move32(); v_sub32_fx( FV_fx, &means_music_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF ); #ifndef DOT_PROD_CHOLESKY_64BIT lprob_exp = 0; move16(); lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp ); pm_fx[m] = L_sub( L_sub( L_add( log_weights_music_compute[m], log_det_chol_music_fx[m] ), L_shl( lprob_fx, sub( Q18 - 1, sub( Q31, lprob_exp ) ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 #else wprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 pm_fx[m] = L_sub( L_sub( L_add( log_weights_music_compute[m], log_det_chol_music_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 #endif move32(); v_sub32_fx( FV_fx, &means_noise_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF ); #ifndef DOT_PROD_CHOLESKY_64BIT lprob_exp = 0; move16(); lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp ); pn_fx[m] = L_sub( L_sub( L_add( log_weights_noise_compute[m], log_det_chol_noise_fx[m] ), L_shl( lprob_fx, sub( Q18 - 1, sub( Q31, lprob_exp ) ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 #else wprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 pn_fx[m] = L_sub( L_sub( L_add( log_weights_noise_compute[m], log_det_chol_noise_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 #endif move32(); } Loading Loading
lib_com/ivas_prot_fx.h +9 −0 Original line number Diff line number Diff line Loading @@ -4706,6 +4706,8 @@ Word32 dot_product_cholesky_fx( const Word32 *A, /* i : Cholesky matrix A */ const Word16 N /* i : vector & matrix size */ ); #ifndef DOT_PROD_CHOLESKY_64BIT Word32 dot_product_cholesky_fixed( const Word32 *x, /* i : vector x */ const Word32 *A, /* i : Cholesky matrix A */ Loading @@ -4713,6 +4715,13 @@ Word32 dot_product_cholesky_fixed( const Word16 exp_x, const Word16 exp_A, Word16 *exp_sum ); #else Word64 dot_product_cholesky_fixed( const Word32 *x, /* i : vector x */ const Word32 *A, /* i : Cholesky matrix A */ const Word16 N /* i : vector & matrix size */ ); #endif void v_mult_mat_fx( Word32 *y_fx, /* o : the product x*A */ Loading
lib_com/ivas_tools_fx.c +39 −0 Original line number Diff line number Diff line Loading @@ -606,6 +606,7 @@ void v_sub32_fx( * Therefore, S=A*A' where A is upper triangular matrix of size (m*m+m)/2 (zeros ommitted, column-wise) *---------------------------------------------------------------------*/ #ifndef DOT_PROD_CHOLESKY_64BIT /*! r: the dot product x'*A*A'*x */ Word32 dot_product_cholesky_fixed( const Word32 *x, /* i : vector x Q31 - exp_x*/ Loading Loading @@ -642,6 +643,44 @@ Word32 dot_product_cholesky_fixed( return suma; } #else /*! r: the dot product x'*A*A'*x */ Word64 dot_product_cholesky_fixed( const Word32 *x, /* i : vector x Q31 - exp_x*/ const Word32 *A, /* i : Cholesky matrix A Q31 - exp_A*/ const Word16 N /* i : vector & matrix size Q0*/ ) { Word16 i, j; Word64 suma, tmp_sum; Word32 mul; Word32 tmp; const Word32 *pt_x, *pt_A; pt_A = A; suma = 0; move64(); FOR( i = 0; i < N; i++ ) { tmp_sum = 0; move32(); pt_x = x; FOR( j = 0; j <= i; j++ ) { mul = Mpy_32_32( *pt_x++, *pt_A++ ); tmp_sum = W_add( tmp_sum, W_deposit32_l( mul ) ); } tmp_sum = W_shr( tmp_sum, 4 ); // to make sure that the tmp_sum will not overflow tmp = W_extract_l( tmp_sum ); suma = W_mac_32_32( suma, tmp, tmp ); } return suma; } #endif void v_mult_mat_fixed( Word32 *y, /* o : the product x*A Qx - guardbits*/ const Word32 *x, /* i : vector x Qx*/ Loading
lib_com/options.h +1 −0 Original line number Diff line number Diff line Loading @@ -173,4 +173,5 @@ #define NONBE_FIX_1277_EVS_DTX_HIGH_RATE_THRESHOLD /* VA/Eri: FLP issue 1277: Fix Mismatch in DTX high-rate threshold between EVS float and BASOP */ #define NONBE_FIX_708_OSBA_BR_SWITCHING_CRASH /* FhG: issue 708: fix crash in OSBA BR switching with long test vectors */ //#define OPT_STEREO_32KBPS_V1 /* Optimization made in stereo decoding path for 32kbps decoding */ #define DOT_PROD_CHOLESKY_64BIT /* FhG: Issue 1323, optimized 64 bit implementation of dot_product_cholesky() */ #endif
lib_enc/speech_music_classif_fx.c +22 −1 Original line number Diff line number Diff line Loading @@ -1683,10 +1683,16 @@ Word16 ivas_smc_gmm_fx( Word16 flag_odv; Word32 lps_fx, lpm_fx, lpn_fx; Word32 ps_fx[N_SMC_MIXTURES], pm_fx[N_SMC_MIXTURES], pn_fx[N_SMC_MIXTURES]; #ifndef DOT_PROD_CHOLESKY_64BIT Word32 lprob_fx; Word16 lprob_exp = 0; #else Word64 wprob_fx; #endif Word32 fvm_fx[N_PCA_COEF]; #ifndef DOT_PROD_CHOLESKY_64BIT Word16 fvm_exp = 0; #endif Word32 sum_PS_fx, ps_diff_fx, ps_sta_fx; Word32 dlp_fx, wrelE_fx, wdrop_fx, wght_fx; Word32 wrise_fx; Loading Loading @@ -2273,23 +2279,38 @@ Word16 ivas_smc_gmm_fx( FOR( m = 0; m < N_SMC_MIXTURES; m++ ) { v_sub32_fx( FV_fx, &means_speech_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF ); #ifndef DOT_PROD_CHOLESKY_64BIT fvm_exp = sub( 31, Qfact_FV ); lprob_exp = 0; move16(); lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp ); ps_fx[m] = L_sub( L_sub( L_add( log_weights_speech_compute[m], log_det_chol_speech_fx[m] ), L_shl( lprob_fx, sub( Q18 - 1, sub( Q31, lprob_exp ) ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 #else wprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 ps_fx[m] = L_sub( L_sub( L_add( log_weights_speech_compute[m], log_det_chol_speech_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 #endif move32(); v_sub32_fx( FV_fx, &means_music_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF ); #ifndef DOT_PROD_CHOLESKY_64BIT lprob_exp = 0; move16(); lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp ); pm_fx[m] = L_sub( L_sub( L_add( log_weights_music_compute[m], log_det_chol_music_fx[m] ), L_shl( lprob_fx, sub( Q18 - 1, sub( Q31, lprob_exp ) ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 #else wprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 pm_fx[m] = L_sub( L_sub( L_add( log_weights_music_compute[m], log_det_chol_music_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 #endif move32(); v_sub32_fx( FV_fx, &means_noise_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF ); #ifndef DOT_PROD_CHOLESKY_64BIT lprob_exp = 0; move16(); lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp ); pn_fx[m] = L_sub( L_sub( L_add( log_weights_noise_compute[m], log_det_chol_noise_fx[m] ), L_shl( lprob_fx, sub( Q18 - 1, sub( Q31, lprob_exp ) ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 #else wprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 pn_fx[m] = L_sub( L_sub( L_add( log_weights_noise_compute[m], log_det_chol_noise_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 #endif move32(); } Loading