From df4ab5e6d98ccf2250ef0b6daa10b9b7c1f0ec91 Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Tue, 25 Feb 2025 15:39:37 +0100 Subject: [PATCH 1/5] replaced the costly basop_util_mant2exp() function with a 64 bit addtion. there is a potential overflow which needs to be adressed first. --- lib_com/ivas_prot_fx.h | 6 +++++ lib_com/ivas_tools.c | 39 +++++++++++++++++++++++++++++++ lib_enc/speech_music_classif_fx.c | 23 ++++++------------ 3 files changed, 52 insertions(+), 16 deletions(-) diff --git a/lib_com/ivas_prot_fx.h b/lib_com/ivas_prot_fx.h index 1682c255b..1b6e7be85 100644 --- a/lib_com/ivas_prot_fx.h +++ b/lib_com/ivas_prot_fx.h @@ -4714,6 +4714,12 @@ Word32 dot_product_cholesky_fixed( const Word16 exp_A, Word16 *exp_sum ); +Word64 dot_product_cholesky_fixed64( + const Word32 *x, /* i : vector x */ + const Word32 *A, /* i : Cholesky matrix A */ + const Word16 N /* i : vector & matrix size */ +); + void v_mult_mat_fx( Word32 *y_fx, /* o : the product x*A */ Word16 *y_q_fx, diff --git a/lib_com/ivas_tools.c b/lib_com/ivas_tools.c index d6210dfc7..9f1a42f29 100644 --- a/lib_com/ivas_tools.c +++ b/lib_com/ivas_tools.c @@ -642,6 +642,45 @@ Word32 dot_product_cholesky_fixed( return suma; } +/*---------------------------------------------------------------------* + * dot_product_cholesky() + * + * Calculates dot product of type x'*A*A'*x, where x is column vector of size m, + * and A is a Cholesky decomposition of some Hermitian matrix S whose size is m*m. + * Therefore, S=A*A' where A is upper triangular matrix of size (m*m+m)/2 (zeros ommitted, column-wise) + *---------------------------------------------------------------------*/ + +/*! r: the dot product x'*A*A'*x */ +Word64 dot_product_cholesky_fixed64( + const Word32 *x, /* i : vector x Q31 - exp_x*/ + const Word32 *A, /* i : Cholesky matrix A Q31 - exp_A*/ + const Word16 N /* i : vector & matrix size Q0*/ +) +{ + Word16 i, j; + Word64 suma, tmp_sum; + Word32 mul; + const Word32 *pt_x, *pt_A; + pt_A = A; + suma = 0; + move32(); + FOR( i = 0; i < N; i++ ) + { + tmp_sum = 0; + move32(); + pt_x = x; + + FOR( j = 0; j <= i; j++ ) + { + mul = Mpy_32_32( *pt_x++, *pt_A++ ); + tmp_sum = W_add( tmp_sum, W_deposit32_l( mul ) ); + } + + suma = W_mac_32_32( suma, tmp_sum, tmp_sum ); // TODO: make sure that this does not overflow. + } + + return suma; +} void v_mult_mat_fixed( Word32 *y, /* o : the product x*A Qx - guardbits*/ const Word32 *x, /* i : vector x Qx*/ diff --git a/lib_enc/speech_music_classif_fx.c b/lib_enc/speech_music_classif_fx.c index 295cff8a1..34744131f 100644 --- a/lib_enc/speech_music_classif_fx.c +++ b/lib_enc/speech_music_classif_fx.c @@ -1683,10 +1683,8 @@ Word16 ivas_smc_gmm_fx( Word16 flag_odv; Word32 lps_fx, lpm_fx, lpn_fx; Word32 ps_fx[N_SMC_MIXTURES], pm_fx[N_SMC_MIXTURES], pn_fx[N_SMC_MIXTURES]; - Word32 lprob_fx; - Word16 lprob_exp = 0; + Word64 wprob_fx; Word32 fvm_fx[N_PCA_COEF]; - Word16 fvm_exp = 0; Word32 sum_PS_fx, ps_diff_fx, ps_sta_fx; Word32 dlp_fx, wrelE_fx, wdrop_fx, wght_fx; Word32 wrise_fx; @@ -2273,23 +2271,16 @@ Word16 ivas_smc_gmm_fx( FOR( m = 0; m < N_SMC_MIXTURES; m++ ) { v_sub32_fx( FV_fx, &means_speech_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF ); - fvm_exp = sub( 31, Qfact_FV ); - lprob_exp = 0; - move16(); - lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp ); - ps_fx[m] = L_sub( L_sub( L_add( log_weights_speech_compute[m], log_det_chol_speech_fx[m] ), L_shl( lprob_fx, sub( Q18 - 1, sub( Q31, lprob_exp ) ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 + wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); + ps_fx[m] = L_sub( L_sub( L_add( log_weights_speech_compute[m], log_det_chol_speech_fx[m] ), W_shr( wprob_fx, Q18 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 move32(); v_sub32_fx( FV_fx, &means_music_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF ); - lprob_exp = 0; - move16(); - lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp ); - pm_fx[m] = L_sub( L_sub( L_add( log_weights_music_compute[m], log_det_chol_music_fx[m] ), L_shl( lprob_fx, sub( Q18 - 1, sub( Q31, lprob_exp ) ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 + wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); + pm_fx[m] = L_sub( L_sub( L_add( log_weights_music_compute[m], log_det_chol_music_fx[m] ), W_shr( wprob_fx, Q18 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 move32(); v_sub32_fx( FV_fx, &means_noise_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF ); - lprob_exp = 0; - move16(); - lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp ); - pn_fx[m] = L_sub( L_sub( L_add( log_weights_noise_compute[m], log_det_chol_noise_fx[m] ), L_shl( lprob_fx, sub( Q18 - 1, sub( Q31, lprob_exp ) ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 + wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); + pn_fx[m] = L_sub( L_sub( L_add( log_weights_noise_compute[m], log_det_chol_noise_fx[m] ), W_shr( wprob_fx, Q18 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 move32(); } -- GitLab From e86a8b5fe7ce3094bf769099ef03cc506dcc800f Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Wed, 26 Feb 2025 11:03:31 +0100 Subject: [PATCH 2/5] the cholesky_fixed64 function returns Q10 instead of Q18 now. --- lib_com/ivas_tools.c | 9 +++++---- lib_enc/speech_music_classif_fx.c | 12 ++++++------ 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/lib_com/ivas_tools.c b/lib_com/ivas_tools.c index 9f1a42f29..6cb13740a 100644 --- a/lib_com/ivas_tools.c +++ b/lib_com/ivas_tools.c @@ -660,10 +660,11 @@ Word64 dot_product_cholesky_fixed64( Word16 i, j; Word64 suma, tmp_sum; Word32 mul; + Word32 tmp; const Word32 *pt_x, *pt_A; pt_A = A; suma = 0; - move32(); + move64(); FOR( i = 0; i < N; i++ ) { tmp_sum = 0; @@ -675,10 +676,10 @@ Word64 dot_product_cholesky_fixed64( mul = Mpy_32_32( *pt_x++, *pt_A++ ); tmp_sum = W_add( tmp_sum, W_deposit32_l( mul ) ); } - - suma = W_mac_32_32( suma, tmp_sum, tmp_sum ); // TODO: make sure that this does not overflow. + tmp_sum = W_shr( tmp_sum, 4 ); // to make sure that the tmp_sum will not overflow + tmp = W_extract_l( tmp_sum ); + suma = W_mac_32_32( suma, tmp, tmp ); } - return suma; } void v_mult_mat_fixed( diff --git a/lib_enc/speech_music_classif_fx.c b/lib_enc/speech_music_classif_fx.c index 34744131f..da93127bf 100644 --- a/lib_enc/speech_music_classif_fx.c +++ b/lib_enc/speech_music_classif_fx.c @@ -2271,16 +2271,16 @@ Word16 ivas_smc_gmm_fx( FOR( m = 0; m < N_SMC_MIXTURES; m++ ) { v_sub32_fx( FV_fx, &means_speech_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF ); - wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); - ps_fx[m] = L_sub( L_sub( L_add( log_weights_speech_compute[m], log_det_chol_speech_fx[m] ), W_shr( wprob_fx, Q18 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 + wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 + ps_fx[m] = L_sub( L_sub( L_add( log_weights_speech_compute[m], log_det_chol_speech_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 move32(); v_sub32_fx( FV_fx, &means_music_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF ); - wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); - pm_fx[m] = L_sub( L_sub( L_add( log_weights_music_compute[m], log_det_chol_music_fx[m] ), W_shr( wprob_fx, Q18 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 + wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 + pm_fx[m] = L_sub( L_sub( L_add( log_weights_music_compute[m], log_det_chol_music_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 move32(); v_sub32_fx( FV_fx, &means_noise_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF ); - wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); - pn_fx[m] = L_sub( L_sub( L_add( log_weights_noise_compute[m], log_det_chol_noise_fx[m] ), W_shr( wprob_fx, Q18 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 + wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 + pn_fx[m] = L_sub( L_sub( L_add( log_weights_noise_compute[m], log_det_chol_noise_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 move32(); } -- GitLab From d3d63a8b48f4661a64f9dfc3307c043fd090ed9b Mon Sep 17 00:00:00 2001 From: Thomas Dettbarn Date: Wed, 26 Feb 2025 14:37:27 +0100 Subject: [PATCH 3/5] applied the clang patch. --- lib_com/ivas_tools.c | 4 ++-- lib_enc/speech_music_classif_fx.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib_com/ivas_tools.c b/lib_com/ivas_tools.c index 6cb13740a..2b62a0e5a 100644 --- a/lib_com/ivas_tools.c +++ b/lib_com/ivas_tools.c @@ -654,7 +654,7 @@ Word32 dot_product_cholesky_fixed( Word64 dot_product_cholesky_fixed64( const Word32 *x, /* i : vector x Q31 - exp_x*/ const Word32 *A, /* i : Cholesky matrix A Q31 - exp_A*/ - const Word16 N /* i : vector & matrix size Q0*/ + const Word16 N /* i : vector & matrix size Q0*/ ) { Word16 i, j; @@ -676,7 +676,7 @@ Word64 dot_product_cholesky_fixed64( mul = Mpy_32_32( *pt_x++, *pt_A++ ); tmp_sum = W_add( tmp_sum, W_deposit32_l( mul ) ); } - tmp_sum = W_shr( tmp_sum, 4 ); // to make sure that the tmp_sum will not overflow + tmp_sum = W_shr( tmp_sum, 4 ); // to make sure that the tmp_sum will not overflow tmp = W_extract_l( tmp_sum ); suma = W_mac_32_32( suma, tmp, tmp ); } diff --git a/lib_enc/speech_music_classif_fx.c b/lib_enc/speech_music_classif_fx.c index da93127bf..fe6647a6c 100644 --- a/lib_enc/speech_music_classif_fx.c +++ b/lib_enc/speech_music_classif_fx.c @@ -2271,15 +2271,15 @@ Word16 ivas_smc_gmm_fx( FOR( m = 0; m < N_SMC_MIXTURES; m++ ) { v_sub32_fx( FV_fx, &means_speech_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF ); - wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 + wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 ps_fx[m] = L_sub( L_sub( L_add( log_weights_speech_compute[m], log_det_chol_speech_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 move32(); v_sub32_fx( FV_fx, &means_music_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF ); - wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 + wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 pm_fx[m] = L_sub( L_sub( L_add( log_weights_music_compute[m], log_det_chol_music_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 move32(); v_sub32_fx( FV_fx, &means_noise_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF ); - wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 + wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 pn_fx[m] = L_sub( L_sub( L_add( log_weights_noise_compute[m], log_det_chol_noise_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 move32(); } -- GitLab From 9778e0b4fb9e4c9a420b1ce19416ed22a35618b1 Mon Sep 17 00:00:00 2001 From: Markus Multrus Date: Fri, 28 Feb 2025 09:30:06 +0100 Subject: [PATCH 4/5] whitespace --- lib_com/ivas_tools.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib_com/ivas_tools.c b/lib_com/ivas_tools.c index e0a971951..dc45e3f0e 100644 --- a/lib_com/ivas_tools.c +++ b/lib_com/ivas_tools.c @@ -642,6 +642,7 @@ Word32 dot_product_cholesky_fixed( return suma; } + /*---------------------------------------------------------------------* * dot_product_cholesky() * @@ -665,6 +666,7 @@ Word64 dot_product_cholesky_fixed64( pt_A = A; suma = 0; move64(); + FOR( i = 0; i < N; i++ ) { tmp_sum = 0; @@ -676,12 +678,15 @@ Word64 dot_product_cholesky_fixed64( mul = Mpy_32_32( *pt_x++, *pt_A++ ); tmp_sum = W_add( tmp_sum, W_deposit32_l( mul ) ); } + tmp_sum = W_shr( tmp_sum, 4 ); // to make sure that the tmp_sum will not overflow tmp = W_extract_l( tmp_sum ); suma = W_mac_32_32( suma, tmp, tmp ); } + return suma; } + void v_mult_mat_fixed( Word32 *y, /* o : the product x*A Qx - guardbits*/ const Word32 *x, /* i : vector x Qx*/ -- GitLab From 1e58f1597c8d14b4bcecd1658fd3a6e0f4a99ac7 Mon Sep 17 00:00:00 2001 From: Markus Multrus Date: Tue, 4 Mar 2025 20:50:46 +0100 Subject: [PATCH 5/5] encapsulate changes --- lib_com/ivas_prot_fx.h | 7 ++++-- lib_com/ivas_tools.c | 14 ++++-------- lib_com/options.h | 1 + lib_enc/speech_music_classif_fx.c | 36 ++++++++++++++++++++++++++++--- 4 files changed, 43 insertions(+), 15 deletions(-) diff --git a/lib_com/ivas_prot_fx.h b/lib_com/ivas_prot_fx.h index 6b09b1b47..1bdc33439 100644 --- a/lib_com/ivas_prot_fx.h +++ b/lib_com/ivas_prot_fx.h @@ -4706,6 +4706,8 @@ Word32 dot_product_cholesky_fx( const Word32 *A, /* i : Cholesky matrix A */ const Word16 N /* i : vector & matrix size */ ); + +#ifndef DOT_PROD_CHOLESKY_64BIT Word32 dot_product_cholesky_fixed( const Word32 *x, /* i : vector x */ const Word32 *A, /* i : Cholesky matrix A */ @@ -4713,12 +4715,13 @@ Word32 dot_product_cholesky_fixed( const Word16 exp_x, const Word16 exp_A, Word16 *exp_sum ); - -Word64 dot_product_cholesky_fixed64( +#else +Word64 dot_product_cholesky_fixed( const Word32 *x, /* i : vector x */ const Word32 *A, /* i : Cholesky matrix A */ const Word16 N /* i : vector & matrix size */ ); +#endif void v_mult_mat_fx( Word32 *y_fx, /* o : the product x*A */ diff --git a/lib_com/ivas_tools.c b/lib_com/ivas_tools.c index dc45e3f0e..d55766928 100644 --- a/lib_com/ivas_tools.c +++ b/lib_com/ivas_tools.c @@ -606,6 +606,7 @@ void v_sub32_fx( * Therefore, S=A*A' where A is upper triangular matrix of size (m*m+m)/2 (zeros ommitted, column-wise) *---------------------------------------------------------------------*/ +#ifndef DOT_PROD_CHOLESKY_64BIT /*! r: the dot product x'*A*A'*x */ Word32 dot_product_cholesky_fixed( const Word32 *x, /* i : vector x Q31 - exp_x*/ @@ -642,17 +643,9 @@ Word32 dot_product_cholesky_fixed( return suma; } - -/*---------------------------------------------------------------------* - * dot_product_cholesky() - * - * Calculates dot product of type x'*A*A'*x, where x is column vector of size m, - * and A is a Cholesky decomposition of some Hermitian matrix S whose size is m*m. - * Therefore, S=A*A' where A is upper triangular matrix of size (m*m+m)/2 (zeros ommitted, column-wise) - *---------------------------------------------------------------------*/ - +#else /*! r: the dot product x'*A*A'*x */ -Word64 dot_product_cholesky_fixed64( +Word64 dot_product_cholesky_fixed( const Word32 *x, /* i : vector x Q31 - exp_x*/ const Word32 *A, /* i : Cholesky matrix A Q31 - exp_A*/ const Word16 N /* i : vector & matrix size Q0*/ @@ -686,6 +679,7 @@ Word64 dot_product_cholesky_fixed64( return suma; } +#endif void v_mult_mat_fixed( Word32 *y, /* o : the product x*A Qx - guardbits*/ diff --git a/lib_com/options.h b/lib_com/options.h index a37ccf387..5fc9a375f 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -169,4 +169,5 @@ #define FIX_1298 /* VA: fix possible assert in gaus_enc */ #define FIX_1300_ICA_SHIFT_QUANT_IMPROV /* VA: Fix to 1300 to improve precision of the lag quantizer */ #define FIX_1301_CORRECT_TD_CNST /* VA: Fix 1301, correct wrong constant in TD stereo */ +#define DOT_PROD_CHOLESKY_64BIT /* FhG: Issue 1323, optimized 64 bit implementation of dot_product_cholesky() */ #endif diff --git a/lib_enc/speech_music_classif_fx.c b/lib_enc/speech_music_classif_fx.c index fe6647a6c..f0fc3f304 100644 --- a/lib_enc/speech_music_classif_fx.c +++ b/lib_enc/speech_music_classif_fx.c @@ -1683,8 +1683,16 @@ Word16 ivas_smc_gmm_fx( Word16 flag_odv; Word32 lps_fx, lpm_fx, lpn_fx; Word32 ps_fx[N_SMC_MIXTURES], pm_fx[N_SMC_MIXTURES], pn_fx[N_SMC_MIXTURES]; +#ifndef DOT_PROD_CHOLESKY_64BIT + Word32 lprob_fx; + Word16 lprob_exp = 0; +#else Word64 wprob_fx; +#endif Word32 fvm_fx[N_PCA_COEF]; +#ifndef DOT_PROD_CHOLESKY_64BIT + Word16 fvm_exp = 0; +#endif Word32 sum_PS_fx, ps_diff_fx, ps_sta_fx; Word32 dlp_fx, wrelE_fx, wdrop_fx, wght_fx; Word32 wrise_fx; @@ -2271,16 +2279,38 @@ Word16 ivas_smc_gmm_fx( FOR( m = 0; m < N_SMC_MIXTURES; m++ ) { v_sub32_fx( FV_fx, &means_speech_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF ); - wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 +#ifndef DOT_PROD_CHOLESKY_64BIT + fvm_exp = sub( 31, Qfact_FV ); + lprob_exp = 0; + move16(); + lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp ); + ps_fx[m] = L_sub( L_sub( L_add( log_weights_speech_compute[m], log_det_chol_speech_fx[m] ), L_shl( lprob_fx, sub( Q18 - 1, sub( Q31, lprob_exp ) ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 +#else + wprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 ps_fx[m] = L_sub( L_sub( L_add( log_weights_speech_compute[m], log_det_chol_speech_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 +#endif move32(); v_sub32_fx( FV_fx, &means_music_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF ); - wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 +#ifndef DOT_PROD_CHOLESKY_64BIT + lprob_exp = 0; + move16(); + lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp ); + pm_fx[m] = L_sub( L_sub( L_add( log_weights_music_compute[m], log_det_chol_music_fx[m] ), L_shl( lprob_fx, sub( Q18 - 1, sub( Q31, lprob_exp ) ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 +#else + wprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 pm_fx[m] = L_sub( L_sub( L_add( log_weights_music_compute[m], log_det_chol_music_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 +#endif move32(); v_sub32_fx( FV_fx, &means_noise_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF ); - wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 +#ifndef DOT_PROD_CHOLESKY_64BIT + lprob_exp = 0; + move16(); + lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp ); + pn_fx[m] = L_sub( L_sub( L_add( log_weights_noise_compute[m], log_det_chol_noise_fx[m] ), L_shl( lprob_fx, sub( Q18 - 1, sub( Q31, lprob_exp ) ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 +#else + wprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10 pn_fx[m] = L_sub( L_sub( L_add( log_weights_noise_compute[m], log_det_chol_noise_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18 +#endif move32(); } -- GitLab