From df4ab5e6d98ccf2250ef0b6daa10b9b7c1f0ec91 Mon Sep 17 00:00:00 2001
From: Thomas Dettbarn <thomas.dettbarn@iis.fraunhofer.de>
Date: Tue, 25 Feb 2025 15:39:37 +0100
Subject: [PATCH 1/5] replaced the costly basop_util_mant2exp() function with a
 64 bit addtion. there is a potential overflow which needs to be adressed
 first.

---
 lib_com/ivas_prot_fx.h            |  6 +++++
 lib_com/ivas_tools.c              | 39 +++++++++++++++++++++++++++++++
 lib_enc/speech_music_classif_fx.c | 23 ++++++------------
 3 files changed, 52 insertions(+), 16 deletions(-)

diff --git a/lib_com/ivas_prot_fx.h b/lib_com/ivas_prot_fx.h
index 1682c255b..1b6e7be85 100644
--- a/lib_com/ivas_prot_fx.h
+++ b/lib_com/ivas_prot_fx.h
@@ -4714,6 +4714,12 @@ Word32 dot_product_cholesky_fixed(
     const Word16 exp_A,
     Word16 *exp_sum );
 
+Word64 dot_product_cholesky_fixed64(
+    const Word32 *x, /* i  : vector x                        */
+    const Word32 *A, /* i  : Cholesky  matrix A              */
+    const Word16 N   /* i  : vector & matrix size            */
+);
+
 void v_mult_mat_fx(
     Word32 *y_fx, /* o  : the product x*A                         */
     Word16 *y_q_fx,
diff --git a/lib_com/ivas_tools.c b/lib_com/ivas_tools.c
index d6210dfc7..9f1a42f29 100644
--- a/lib_com/ivas_tools.c
+++ b/lib_com/ivas_tools.c
@@ -642,6 +642,45 @@ Word32 dot_product_cholesky_fixed(
 
     return suma;
 }
+/*---------------------------------------------------------------------*
+ * dot_product_cholesky()
+ *
+ * Calculates dot product of type x'*A*A'*x, where x is column vector of size m,
+ * and A is a Cholesky decomposition of some Hermitian matrix S whose size is m*m.
+ * Therefore, S=A*A' where A is upper triangular matrix of size (m*m+m)/2 (zeros ommitted, column-wise)
+ *---------------------------------------------------------------------*/
+
+/*! r: the dot product x'*A*A'*x */
+Word64 dot_product_cholesky_fixed64(
+    const Word32 *x, /* i  : vector x                        Q31 - exp_x*/
+    const Word32 *A, /* i  : Cholesky  matrix A              Q31 - exp_A*/
+    const Word16 N  /* i  : vector & matrix size            Q0*/
+)
+{
+    Word16 i, j;
+    Word64 suma, tmp_sum;
+    Word32 mul;
+    const Word32 *pt_x, *pt_A;
+    pt_A = A;
+    suma = 0;
+    move32();
+    FOR( i = 0; i < N; i++ )
+    {
+        tmp_sum = 0;
+        move32();
+        pt_x = x;
+
+        FOR( j = 0; j <= i; j++ )
+        {
+            mul = Mpy_32_32( *pt_x++, *pt_A++ );
+            tmp_sum = W_add( tmp_sum, W_deposit32_l( mul ) );
+        }
+
+        suma = W_mac_32_32( suma, tmp_sum, tmp_sum );	// TODO: make sure that this does not overflow. 
+    }
+
+    return suma;
+}
 void v_mult_mat_fixed(
     Word32 *y,       /* o  : the product x*A               Qx - guardbits*/
     const Word32 *x, /* i  : vector x                      Qx*/
diff --git a/lib_enc/speech_music_classif_fx.c b/lib_enc/speech_music_classif_fx.c
index 295cff8a1..34744131f 100644
--- a/lib_enc/speech_music_classif_fx.c
+++ b/lib_enc/speech_music_classif_fx.c
@@ -1683,10 +1683,8 @@ Word16 ivas_smc_gmm_fx(
     Word16 flag_odv;
     Word32 lps_fx, lpm_fx, lpn_fx;
     Word32 ps_fx[N_SMC_MIXTURES], pm_fx[N_SMC_MIXTURES], pn_fx[N_SMC_MIXTURES];
-    Word32 lprob_fx;
-    Word16 lprob_exp = 0;
+    Word64 wprob_fx;
     Word32 fvm_fx[N_PCA_COEF];
-    Word16 fvm_exp = 0;
     Word32 sum_PS_fx, ps_diff_fx, ps_sta_fx;
     Word32 dlp_fx, wrelE_fx, wdrop_fx, wght_fx;
     Word32 wrise_fx;
@@ -2273,23 +2271,16 @@ Word16 ivas_smc_gmm_fx(
     FOR( m = 0; m < N_SMC_MIXTURES; m++ )
     {
         v_sub32_fx( FV_fx, &means_speech_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF );
-        fvm_exp = sub( 31, Qfact_FV );
-        lprob_exp = 0;
-        move16();
-        lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp );
-        ps_fx[m] = L_sub( L_sub( L_add( log_weights_speech_compute[m], log_det_chol_speech_fx[m] ), L_shl( lprob_fx, sub( Q18 - 1, sub( Q31, lprob_exp ) ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
+        wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );
+        ps_fx[m] = L_sub( L_sub( L_add( log_weights_speech_compute[m], log_det_chol_speech_fx[m] ), W_shr( wprob_fx, Q18 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
         move32();
         v_sub32_fx( FV_fx, &means_music_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF );
-        lprob_exp = 0;
-        move16();
-        lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp );
-        pm_fx[m] = L_sub( L_sub( L_add( log_weights_music_compute[m], log_det_chol_music_fx[m] ), L_shl( lprob_fx, sub( Q18 - 1, sub( Q31, lprob_exp ) ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
+        wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );
+        pm_fx[m] = L_sub( L_sub( L_add( log_weights_music_compute[m], log_det_chol_music_fx[m] ), W_shr( wprob_fx, Q18 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
         move32();
         v_sub32_fx( FV_fx, &means_noise_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF );
-        lprob_exp = 0;
-        move16();
-        lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp );
-        pn_fx[m] = L_sub( L_sub( L_add( log_weights_noise_compute[m], log_det_chol_noise_fx[m] ), L_shl( lprob_fx, sub( Q18 - 1, sub( Q31, lprob_exp ) ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
+        wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );
+        pn_fx[m] = L_sub( L_sub( L_add( log_weights_noise_compute[m], log_det_chol_noise_fx[m] ), W_shr( wprob_fx, Q18 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
         move32();
     }
 
-- 
GitLab


From e86a8b5fe7ce3094bf769099ef03cc506dcc800f Mon Sep 17 00:00:00 2001
From: Thomas Dettbarn <thomas.dettbarn@iis.fraunhofer.de>
Date: Wed, 26 Feb 2025 11:03:31 +0100
Subject: [PATCH 2/5] the cholesky_fixed64 function returns Q10 instead of Q18
 now.

---
 lib_com/ivas_tools.c              |  9 +++++----
 lib_enc/speech_music_classif_fx.c | 12 ++++++------
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/lib_com/ivas_tools.c b/lib_com/ivas_tools.c
index 9f1a42f29..6cb13740a 100644
--- a/lib_com/ivas_tools.c
+++ b/lib_com/ivas_tools.c
@@ -660,10 +660,11 @@ Word64 dot_product_cholesky_fixed64(
     Word16 i, j;
     Word64 suma, tmp_sum;
     Word32 mul;
+    Word32 tmp;
     const Word32 *pt_x, *pt_A;
     pt_A = A;
     suma = 0;
-    move32();
+    move64();
     FOR( i = 0; i < N; i++ )
     {
         tmp_sum = 0;
@@ -675,10 +676,10 @@ Word64 dot_product_cholesky_fixed64(
             mul = Mpy_32_32( *pt_x++, *pt_A++ );
             tmp_sum = W_add( tmp_sum, W_deposit32_l( mul ) );
         }
-
-        suma = W_mac_32_32( suma, tmp_sum, tmp_sum );	// TODO: make sure that this does not overflow. 
+        tmp_sum = W_shr( tmp_sum, 4 );	// to make sure that the tmp_sum will not overflow
+        tmp = W_extract_l( tmp_sum );
+        suma = W_mac_32_32( suma, tmp, tmp );
     }
-
     return suma;
 }
 void v_mult_mat_fixed(
diff --git a/lib_enc/speech_music_classif_fx.c b/lib_enc/speech_music_classif_fx.c
index 34744131f..da93127bf 100644
--- a/lib_enc/speech_music_classif_fx.c
+++ b/lib_enc/speech_music_classif_fx.c
@@ -2271,16 +2271,16 @@ Word16 ivas_smc_gmm_fx(
     FOR( m = 0; m < N_SMC_MIXTURES; m++ )
     {
         v_sub32_fx( FV_fx, &means_speech_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF );
-        wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );
-        ps_fx[m] = L_sub( L_sub( L_add( log_weights_speech_compute[m], log_det_chol_speech_fx[m] ), W_shr( wprob_fx, Q18 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
+        wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );	// Q10
+        ps_fx[m] = L_sub( L_sub( L_add( log_weights_speech_compute[m], log_det_chol_speech_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
         move32();
         v_sub32_fx( FV_fx, &means_music_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF );
-        wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );
-        pm_fx[m] = L_sub( L_sub( L_add( log_weights_music_compute[m], log_det_chol_music_fx[m] ), W_shr( wprob_fx, Q18 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
+        wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );	// Q10
+        pm_fx[m] = L_sub( L_sub( L_add( log_weights_music_compute[m], log_det_chol_music_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
         move32();
         v_sub32_fx( FV_fx, &means_noise_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF );
-        wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );
-        pn_fx[m] = L_sub( L_sub( L_add( log_weights_noise_compute[m], log_det_chol_noise_fx[m] ), W_shr( wprob_fx, Q18 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
+        wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );	// Q10
+        pn_fx[m] = L_sub( L_sub( L_add( log_weights_noise_compute[m], log_det_chol_noise_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
         move32();
     }
 
-- 
GitLab


From d3d63a8b48f4661a64f9dfc3307c043fd090ed9b Mon Sep 17 00:00:00 2001
From: Thomas Dettbarn <thomas.dettbarn@iis.fraunhofer.de>
Date: Wed, 26 Feb 2025 14:37:27 +0100
Subject: [PATCH 3/5] applied the clang patch.

---
 lib_com/ivas_tools.c              | 4 ++--
 lib_enc/speech_music_classif_fx.c | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/lib_com/ivas_tools.c b/lib_com/ivas_tools.c
index 6cb13740a..2b62a0e5a 100644
--- a/lib_com/ivas_tools.c
+++ b/lib_com/ivas_tools.c
@@ -654,7 +654,7 @@ Word32 dot_product_cholesky_fixed(
 Word64 dot_product_cholesky_fixed64(
     const Word32 *x, /* i  : vector x                        Q31 - exp_x*/
     const Word32 *A, /* i  : Cholesky  matrix A              Q31 - exp_A*/
-    const Word16 N  /* i  : vector & matrix size            Q0*/
+    const Word16 N   /* i  : vector & matrix size            Q0*/
 )
 {
     Word16 i, j;
@@ -676,7 +676,7 @@ Word64 dot_product_cholesky_fixed64(
             mul = Mpy_32_32( *pt_x++, *pt_A++ );
             tmp_sum = W_add( tmp_sum, W_deposit32_l( mul ) );
         }
-        tmp_sum = W_shr( tmp_sum, 4 );	// to make sure that the tmp_sum will not overflow
+        tmp_sum = W_shr( tmp_sum, 4 ); // to make sure that the tmp_sum will not overflow
         tmp = W_extract_l( tmp_sum );
         suma = W_mac_32_32( suma, tmp, tmp );
     }
diff --git a/lib_enc/speech_music_classif_fx.c b/lib_enc/speech_music_classif_fx.c
index da93127bf..fe6647a6c 100644
--- a/lib_enc/speech_music_classif_fx.c
+++ b/lib_enc/speech_music_classif_fx.c
@@ -2271,15 +2271,15 @@ Word16 ivas_smc_gmm_fx(
     FOR( m = 0; m < N_SMC_MIXTURES; m++ )
     {
         v_sub32_fx( FV_fx, &means_speech_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF );
-        wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );	// Q10
+        wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );               // Q10
         ps_fx[m] = L_sub( L_sub( L_add( log_weights_speech_compute[m], log_det_chol_speech_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
         move32();
         v_sub32_fx( FV_fx, &means_music_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF );
-        wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );	// Q10
+        wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );              // Q10
         pm_fx[m] = L_sub( L_sub( L_add( log_weights_music_compute[m], log_det_chol_music_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
         move32();
         v_sub32_fx( FV_fx, &means_noise_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF );
-        wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );	// Q10
+        wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );              // Q10
         pn_fx[m] = L_sub( L_sub( L_add( log_weights_noise_compute[m], log_det_chol_noise_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
         move32();
     }
-- 
GitLab


From 9778e0b4fb9e4c9a420b1ce19416ed22a35618b1 Mon Sep 17 00:00:00 2001
From: Markus Multrus <markus.multrus@iis.fraunhofer.de>
Date: Fri, 28 Feb 2025 09:30:06 +0100
Subject: [PATCH 4/5] whitespace

---
 lib_com/ivas_tools.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/lib_com/ivas_tools.c b/lib_com/ivas_tools.c
index e0a971951..dc45e3f0e 100644
--- a/lib_com/ivas_tools.c
+++ b/lib_com/ivas_tools.c
@@ -642,6 +642,7 @@ Word32 dot_product_cholesky_fixed(
 
     return suma;
 }
+
 /*---------------------------------------------------------------------*
  * dot_product_cholesky()
  *
@@ -665,6 +666,7 @@ Word64 dot_product_cholesky_fixed64(
     pt_A = A;
     suma = 0;
     move64();
+
     FOR( i = 0; i < N; i++ )
     {
         tmp_sum = 0;
@@ -676,12 +678,15 @@ Word64 dot_product_cholesky_fixed64(
             mul = Mpy_32_32( *pt_x++, *pt_A++ );
             tmp_sum = W_add( tmp_sum, W_deposit32_l( mul ) );
         }
+
         tmp_sum = W_shr( tmp_sum, 4 ); // to make sure that the tmp_sum will not overflow
         tmp = W_extract_l( tmp_sum );
         suma = W_mac_32_32( suma, tmp, tmp );
     }
+
     return suma;
 }
+
 void v_mult_mat_fixed(
     Word32 *y,       /* o  : the product x*A               Qx - guardbits*/
     const Word32 *x, /* i  : vector x                      Qx*/
-- 
GitLab


From 1e58f1597c8d14b4bcecd1658fd3a6e0f4a99ac7 Mon Sep 17 00:00:00 2001
From: Markus Multrus <markus.multrus@iis.fraunhofer.de>
Date: Tue, 4 Mar 2025 20:50:46 +0100
Subject: [PATCH 5/5] encapsulate changes

---
 lib_com/ivas_prot_fx.h            |  7 ++++--
 lib_com/ivas_tools.c              | 14 ++++--------
 lib_com/options.h                 |  1 +
 lib_enc/speech_music_classif_fx.c | 36 ++++++++++++++++++++++++++++---
 4 files changed, 43 insertions(+), 15 deletions(-)

diff --git a/lib_com/ivas_prot_fx.h b/lib_com/ivas_prot_fx.h
index 6b09b1b47..1bdc33439 100644
--- a/lib_com/ivas_prot_fx.h
+++ b/lib_com/ivas_prot_fx.h
@@ -4706,6 +4706,8 @@ Word32 dot_product_cholesky_fx(
     const Word32 *A, /* i  : Cholesky  matrix A              */
     const Word16 N   /* i  : vector & matrix size            */
 );
+
+#ifndef DOT_PROD_CHOLESKY_64BIT
 Word32 dot_product_cholesky_fixed(
     const Word32 *x, /* i  : vector x                        */
     const Word32 *A, /* i  : Cholesky  matrix A              */
@@ -4713,12 +4715,13 @@ Word32 dot_product_cholesky_fixed(
     const Word16 exp_x,
     const Word16 exp_A,
     Word16 *exp_sum );
-
-Word64 dot_product_cholesky_fixed64(
+#else
+Word64 dot_product_cholesky_fixed(
     const Word32 *x, /* i  : vector x                        */
     const Word32 *A, /* i  : Cholesky  matrix A              */
     const Word16 N   /* i  : vector & matrix size            */
 );
+#endif
 
 void v_mult_mat_fx(
     Word32 *y_fx, /* o  : the product x*A                         */
diff --git a/lib_com/ivas_tools.c b/lib_com/ivas_tools.c
index dc45e3f0e..d55766928 100644
--- a/lib_com/ivas_tools.c
+++ b/lib_com/ivas_tools.c
@@ -606,6 +606,7 @@ void v_sub32_fx(
  * Therefore, S=A*A' where A is upper triangular matrix of size (m*m+m)/2 (zeros ommitted, column-wise)
  *---------------------------------------------------------------------*/
 
+#ifndef DOT_PROD_CHOLESKY_64BIT
 /*! r: the dot product x'*A*A'*x */
 Word32 dot_product_cholesky_fixed(
     const Word32 *x, /* i  : vector x                        Q31 - exp_x*/
@@ -642,17 +643,9 @@ Word32 dot_product_cholesky_fixed(
 
     return suma;
 }
-
-/*---------------------------------------------------------------------*
- * dot_product_cholesky()
- *
- * Calculates dot product of type x'*A*A'*x, where x is column vector of size m,
- * and A is a Cholesky decomposition of some Hermitian matrix S whose size is m*m.
- * Therefore, S=A*A' where A is upper triangular matrix of size (m*m+m)/2 (zeros ommitted, column-wise)
- *---------------------------------------------------------------------*/
-
+#else
 /*! r: the dot product x'*A*A'*x */
-Word64 dot_product_cholesky_fixed64(
+Word64 dot_product_cholesky_fixed(
     const Word32 *x, /* i  : vector x                        Q31 - exp_x*/
     const Word32 *A, /* i  : Cholesky  matrix A              Q31 - exp_A*/
     const Word16 N   /* i  : vector & matrix size            Q0*/
@@ -686,6 +679,7 @@ Word64 dot_product_cholesky_fixed64(
 
     return suma;
 }
+#endif
 
 void v_mult_mat_fixed(
     Word32 *y,       /* o  : the product x*A               Qx - guardbits*/
diff --git a/lib_com/options.h b/lib_com/options.h
index a37ccf387..5fc9a375f 100644
--- a/lib_com/options.h
+++ b/lib_com/options.h
@@ -169,4 +169,5 @@
 #define FIX_1298                                /* VA: fix possible assert in gaus_enc */
 #define FIX_1300_ICA_SHIFT_QUANT_IMPROV         /* VA: Fix to 1300 to improve precision of the lag quantizer */
 #define FIX_1301_CORRECT_TD_CNST                /* VA: Fix 1301, correct wrong constant in TD stereo */
+#define DOT_PROD_CHOLESKY_64BIT                 /* FhG: Issue 1323, optimized 64 bit implementation of dot_product_cholesky() */
 #endif
diff --git a/lib_enc/speech_music_classif_fx.c b/lib_enc/speech_music_classif_fx.c
index fe6647a6c..f0fc3f304 100644
--- a/lib_enc/speech_music_classif_fx.c
+++ b/lib_enc/speech_music_classif_fx.c
@@ -1683,8 +1683,16 @@ Word16 ivas_smc_gmm_fx(
     Word16 flag_odv;
     Word32 lps_fx, lpm_fx, lpn_fx;
     Word32 ps_fx[N_SMC_MIXTURES], pm_fx[N_SMC_MIXTURES], pn_fx[N_SMC_MIXTURES];
+#ifndef DOT_PROD_CHOLESKY_64BIT
+    Word32 lprob_fx;
+    Word16 lprob_exp = 0;
+#else
     Word64 wprob_fx;
+#endif
     Word32 fvm_fx[N_PCA_COEF];
+#ifndef DOT_PROD_CHOLESKY_64BIT
+    Word16 fvm_exp = 0;
+#endif
     Word32 sum_PS_fx, ps_diff_fx, ps_sta_fx;
     Word32 dlp_fx, wrelE_fx, wdrop_fx, wght_fx;
     Word32 wrise_fx;
@@ -2271,16 +2279,38 @@ Word16 ivas_smc_gmm_fx(
     FOR( m = 0; m < N_SMC_MIXTURES; m++ )
     {
         v_sub32_fx( FV_fx, &means_speech_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF );
-        wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );               // Q10
+#ifndef DOT_PROD_CHOLESKY_64BIT
+        fvm_exp = sub( 31, Qfact_FV );
+        lprob_exp = 0;
+        move16();
+        lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp );
+        ps_fx[m] = L_sub( L_sub( L_add( log_weights_speech_compute[m], log_det_chol_speech_fx[m] ), L_shl( lprob_fx, sub( Q18 - 1, sub( Q31, lprob_exp ) ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
+#else
+        wprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );               // Q10
         ps_fx[m] = L_sub( L_sub( L_add( log_weights_speech_compute[m], log_det_chol_speech_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
+#endif
         move32();
         v_sub32_fx( FV_fx, &means_music_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF );
-        wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );              // Q10
+#ifndef DOT_PROD_CHOLESKY_64BIT
+        lprob_exp = 0;
+        move16();
+        lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp );
+        pm_fx[m] = L_sub( L_sub( L_add( log_weights_music_compute[m], log_det_chol_music_fx[m] ), L_shl( lprob_fx, sub( Q18 - 1, sub( Q31, lprob_exp ) ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
+#else
+        wprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );              // Q10
         pm_fx[m] = L_sub( L_sub( L_add( log_weights_music_compute[m], log_det_chol_music_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
+#endif
         move32();
         v_sub32_fx( FV_fx, &means_noise_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF );
-        wprob_fx = dot_product_cholesky_fixed64( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );              // Q10
+#ifndef DOT_PROD_CHOLESKY_64BIT
+        lprob_exp = 0;
+        move16();
+        lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp );
+        pn_fx[m] = L_sub( L_sub( L_add( log_weights_noise_compute[m], log_det_chol_noise_fx[m] ), L_shl( lprob_fx, sub( Q18 - 1, sub( Q31, lprob_exp ) ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
+#else
+        wprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );              // Q10
         pn_fx[m] = L_sub( L_sub( L_add( log_weights_noise_compute[m], log_det_chol_noise_fx[m] ), W_shr( wprob_fx, Q10 ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
+#endif
         move32();
     }
 
-- 
GitLab