From 95c647d848a904aa0ffd19d8aa0f014a974c8a34 Mon Sep 17 00:00:00 2001
From: Sandesh Venkatesh <sandesh.venkatesh@ittiam.com>
Date: Thu, 21 Nov 2024 14:38:57 +0530
Subject: [PATCH] Fixed point Changes corresponding to float reference code
 updates made in MR 797,788

[x] Bug fix for inverse matrix computation
[x] fixes from !1826 and !1862 (flt.pt.) to the ivas-float-update branch
---
 lib_com/options.h                      |  2 ++
 lib_dec/ivas_stereo_mdct_core_dec_fx.c | 45 ++++++++++++++++++++++----
 lib_enc/enc_uv_fx.c                    |  5 +--
 lib_enc/ivas_cpe_enc.c                 | 11 ++++---
 lib_enc/ivas_decision_matrix_enc.c     | 27 +++++++++++++++-
 lib_enc/ivas_stereo_classifier.c       | 16 ++++++---
 lib_enc/ivas_stereo_td_enc.c           | 33 +++++++++++++++++--
 7 files changed, 118 insertions(+), 21 deletions(-)

diff --git a/lib_com/options.h b/lib_com/options.h
index 095c183e8..21e924688 100644
--- a/lib_com/options.h
+++ b/lib_com/options.h
@@ -196,6 +196,8 @@
 #define FIX_953_WRONG_ENERGY_RATIO_MASA_EXT     /* Nok: Fix 953 wrong energy ratio value after shift and cast to Word8 */
 #define FIX_982_WRONG_DECODED_ENERGY_RATIO      /* Nokia: Fix 982 wrong energy in EXT mode and in second direction when present */
 #define FIX_999_WRONG_ISM_EXTENDED_METADATA     /* VA: fix 999: fix ISM extended metadata decoding */
+#define NONBE_FIX_1205_TD_STEREO_MOD_CT         /* VA: fix mismatch of coder_type (mod_ct) btw. TD stereo encoder and decoder */
+#define NONBE_FIX_1204_MDCT_STEREO_NOISE_EST_SCALING    /* FhG: fixes for decoder-side noise level estimation in MDCT-Stereo to prevent noise bursts in stereo switching */
 /* ################## End DEVELOPMENT switches ######################### */
 
 /* clang-format on */
diff --git a/lib_dec/ivas_stereo_mdct_core_dec_fx.c b/lib_dec/ivas_stereo_mdct_core_dec_fx.c
index 155e207b8..60e9c62db 100644
--- a/lib_dec/ivas_stereo_mdct_core_dec_fx.c
+++ b/lib_dec/ivas_stereo_mdct_core_dec_fx.c
@@ -988,7 +988,7 @@ static void run_min_stats_fx(
 {
     Word16 ch, will_estimate_noise_on_channel[CPE_CHANNELS], save_VAD[CPE_CHANNELS];
     Word32 power_spec[L_FRAME16k];
-    Word16 power_spec_16[L_FRAME16k], power_spec_e = 0;
+    Word16 power_spec_e = 0;
     move16();
     Word32 *spec_in;
     Word16 spec_e;
@@ -1044,29 +1044,62 @@ static void run_min_stats_fx(
             IF( ( EQ_16( will_estimate_noise_on_channel[0], will_estimate_noise_on_channel[1] ) ) || EQ_16( ch, 0 ) )
             {
                 Word16 tmp16 = getScaleFactor32( spec_in, L_FRAME16k );
+
+#ifdef NONBE_FIX_1204_MDCT_STEREO_NOISE_EST_SCALING
+                Word32 power_spec_scale_fac;
+
+                /* calculate power spectrum from MDCT coefficients and estimated MDST coeffs */
+                power_spec_scale_fac = 20792; // 1.f / ( L_FRAME16k * L_FRAME16k ) in Q31
+                move32();
+                power_spec[0] = Mpy_32_32( W_extract_h( W_shl( W_mult_32_32( spec_in[0], spec_in[0] ), sub( tmp16, 4 ) ) ), power_spec_scale_fac ); /* 2 * (Q31 - x_e) + tmp16 - Q4 - Q31 */
+                move32();
+                power_spec[L_FRAME16k - 1] = Mpy_32_32( W_extract_h( W_shl( W_mult_32_32( spec_in[L_FRAME16k - 1], spec_in[L_FRAME16k - 1] ), sub( tmp16, 4 ) ) ), power_spec_scale_fac ); /* 2 * (Q31 - x_e) + tmp16 - Q4 - Q31 */
+                move32();
+#else
                 /* calculate power spectrum from MDCT coefficients and estimated MDST coeffs */
-                power_spec[0] = W_extract_h( W_shl( W_mult_32_32( spec_in[0], spec_in[0] ), sub( tmp16, 4 ) ) );                                        /* 2 * (Q31 - x_e) + tmp16 - Q4 - Q31 */
-                power_spec[L_FRAME16k - 1] = W_extract_h( W_shl( W_mult_32_32( spec_in[L_FRAME16k - 1], spec_in[L_FRAME16k - 1] ), sub( tmp16, 4 ) ) ); /* 2 * (Q31 - x_e) + tmp16 - Q4 - Q31 */
+                power_spec[0] = W_extract_h( W_shl( W_mult_32_32( spec_in[0], spec_in[0] ), sub( tmp16, 4 ) ) );                                                                                   /* 2 * (Q31 - x_e) + tmp16 - Q4 - Q31 */
+                power_spec[L_FRAME16k - 1] = W_extract_h( W_shl( W_mult_32_32( spec_in[L_FRAME16k - 1], spec_in[L_FRAME16k - 1] ), sub( tmp16, 4 ) ) );                                            /* 2 * (Q31 - x_e) + tmp16 - Q4 - Q31 */
+#endif
                 FOR( Word16 i = 1; i < L_FRAME16k - 1; i++ )
                 {
                     Word32 mdst;
-                    mdst = L_sub( spec_in[i + 1], spec_in[i - 1] );                                                                                                                                /* Q31 - x_e */
+                    mdst = L_sub( spec_in[i + 1], spec_in[i - 1] ); /* Q31 - x_e */
+
+#ifdef NONBE_FIX_1204_MDCT_STEREO_NOISE_EST_SCALING
+                    power_spec[i] = Mpy_32_32( L_add( W_extract_h( W_shl( W_mult_32_32( spec_in[i], spec_in[i] ), sub( tmp16, 4 ) ) ), W_extract_h( W_shl( W_mult_32_32( mdst, mdst ), sub( tmp16, 4 ) ) ) ), power_spec_scale_fac ); /* 2 * (Q31 - x_e) + tmp16 - Q4 - Q31*/
+#else
                     power_spec[i] = L_add( W_extract_h( W_shl( W_mult_32_32( spec_in[i], spec_in[i] ), sub( tmp16, 4 ) ) ), W_extract_h( W_shl( W_mult_32_32( mdst, mdst ), sub( tmp16, 4 ) ) ) ); /* 2 * (Q31 - x_e) + tmp16 - Q4 - Q31*/
+#endif
                     move32();
                 }
                 power_spec_e = sub( add( 4, shl( spec_e, 1 ) ), tmp16 );
             }
 
-            Copy_Scale_sig32_16( power_spec, power_spec_16, L_FRAME16k, 0 ); /* exp(power_spec_e) */
-
+#ifndef NONBE_FIX_1204_MDCT_STEREO_NOISE_EST_SCALING
             noisy_speech_detection_fx( st->hFdCngDec, st->VAD && st->m_frame_type == ACTIVE_FRAME, power_spec_16, sub( 15, power_spec_e ) );
 
             st->hFdCngDec->hFdCngCom->likelihood_noisy_speech = add( mult_r( 32440 /* 0.99 in Q15 */, st->hFdCngDec->hFdCngCom->likelihood_noisy_speech ), mult_r( st->hFdCngDec->hFdCngCom->flag_noisy_speech, 328 /* 0.01 in Q15 */ ) ); /* Q15 */
             move16();
             st->lp_noise = st->hFdCngDec->lp_noise; /* Q9.23 */
             move32();
+#endif
         }
 
+#ifdef NONBE_FIX_1204_MDCT_STEREO_NOISE_EST_SCALING
+        IF( EQ_16( st->core, TCX_20_CORE ) )
+        {
+            Word16 x_fx_16[L_FRAME16k];
+            Copy_Scale_sig32_16( x[ch][0], x_fx_16, L_FRAME16k, 0 ); /* exp(x_e) */
+
+            test();
+            noisy_speech_detection_fx( st->hFdCngDec, save_VAD[ch] && EQ_16( st->m_frame_type, ACTIVE_FRAME ), x_fx_16, sub( Q15, x_e[ch][0] ) );
+            st->hFdCngDec->hFdCngCom->likelihood_noisy_speech = add( mult_r( 32440 /* 0.99 in Q15 */, st->hFdCngDec->hFdCngCom->likelihood_noisy_speech ), mult_r( st->hFdCngDec->hFdCngCom->flag_noisy_speech, 328 /* 0.01 in Q15 */ ) ); /* Q15 */
+            move16();
+            st->lp_noise = st->hFdCngDec->lp_noise; /* Q9.23 */
+            move32();
+        }
+#endif
+
         test();
         test();
         IF( will_estimate_noise_on_channel[0] || will_estimate_noise_on_channel[1] || st->bfi )
diff --git a/lib_enc/enc_uv_fx.c b/lib_enc/enc_uv_fx.c
index 7708c0e1c..afa32cd3a 100644
--- a/lib_enc/enc_uv_fx.c
+++ b/lib_enc/enc_uv_fx.c
@@ -282,6 +282,7 @@ void encod_unvoiced_ivas_fx(
 {
     Word16 xn_fx[L_SUBFR];   /* Target vector for pitch search     */
     Word16 h1_fx[L_SUBFR];   /* Impulse response vector            */
+    Word16 h2_fx[L_SUBFR];   /* Impulse response vector            */
     Word16 code_fx[L_SUBFR]; /* Fixed codebook excitation          */
     Word16 y2_fx[L_SUBFR];   /* Filtered algebraic excitation      */
     Word16 *pt_pitch_fx;     /* pointer to floating pitch buffer   */
@@ -348,7 +349,7 @@ void encod_unvoiced_ivas_fx(
         find_targets_fx( speech_fx, hLPDmem->mem_syn, i_subfr, &hLPDmem->mem_w0, p_Aq_fx,
                          res_fx, L_SUBFR, p_Aw_fx, st_fx->preemph_fac, xn_fx, cn_fx, h1_fx );
 
-        /*Copy_Scale_sig(h1_fx, h2_fx, L_SUBFR, -2);*/
+        Copy_Scale_sig( h1_fx, h2_fx, L_SUBFR, -2 );
         Scale_sig( h1_fx, L_SUBFR, add( 1, shift ) ); /* set h1[] in Q14 with scaling for convolution */
 
         /* scaling of xn[] to limit dynamic at 12 bits */
@@ -390,7 +391,7 @@ void encod_unvoiced_ivas_fx(
             // E_ACELP_innovative_codebook_fx( exc_fx, *pt_pitch_fx, 0, 1, gain_pit_fx, hLPDmem->tilt_code, acelp_cfg, i_subfr, p_Aq_fx, h1_fx, xn_fx, cn_fx, y1, y2_fx, (Word8) st_fx->acelp_autocorr, &prm, code_fx, shift, st_fx->L_frame, st_fx->last_L_frame, st_fx->total_brate, st_fx->element_mode );
             inov_encode_ivas_fx( st_fx, st_fx->core_brate, 0, L_FRAME, st_fx->last_L_frame,
                                  UNVOICED, st_fx->bwidth, st_fx->sharpFlag, i_subfr, -1, p_Aq_fx,
-                                 gain_pit_fx, cn_fx, exc_fx, h1_fx, hLPDmem->tilt_code, *pt_pitch_fx, xn_fx, code_fx, y2_fx, &unbits_PI, L_SUBFR, shift, Q_new );
+                                 gain_pit_fx, cn_fx, exc_fx, h2_fx, hLPDmem->tilt_code, *pt_pitch_fx, xn_fx, code_fx, y2_fx, &unbits_PI, L_SUBFR, shift, Q_new );
 
             E_ACELP_xy2_corr( xn_fx, y1, y2_fx, &g_corr, L_SUBFR, Q_xn );
 
diff --git a/lib_enc/ivas_cpe_enc.c b/lib_enc/ivas_cpe_enc.c
index 9f9b7ad8d..e4205115c 100644
--- a/lib_enc/ivas_cpe_enc.c
+++ b/lib_enc/ivas_cpe_enc.c
@@ -678,8 +678,11 @@ ivas_error ivas_cpe_enc_fx(
 #ifdef IVAS_FLOAT_FIXED
 #ifdef IVAS_FLOAT_FIXED_CONVERSIONS
         Word16 input_fx[2][L_FRAME48k];
-        floatToFixed_arr16( sts[1]->input, input_fx[0], 0, input_frame );
-        floatToFixed_arr16( sts[0]->input, input_fx[1], 0, input_frame );
+        Word16 tmpppp;
+        tmpppp = s_min( Q_factor_arr( sts[1]->input, input_frame ), Q_factor_arr( sts[0]->input, input_frame ) );
+
+        floatToFixed_arr16( sts[1]->input, input_fx[1], tmpppp, input_frame );
+        floatToFixed_arr16( sts[0]->input, input_fx[0], tmpppp, input_frame );
 #endif
         Word16 tdm_SM_flag;
         IF( hCPE->hStereoTD->tdm_LRTD_flag == 0 )
@@ -695,8 +698,8 @@ ivas_error ivas_cpe_enc_fx(
         stereo_tdm_downmix_ivas_fx( hCPE->hStereoTD, input_fx[0], input_fx[1], input_frame, tdm_ratio_idx, tdm_SM_flag, tdm_ratio_idx_SM );
 
 #ifdef IVAS_FLOAT_FIXED_CONVERSIONS
-        fixedToFloat_arr( input_fx[0], sts[0]->input, 0, input_frame );
-        fixedToFloat_arr( input_fx[1], sts[1]->input, 0, input_frame );
+        fixedToFloat_arr( input_fx[0], sts[0]->input, tmpppp, input_frame );
+        fixedToFloat_arr( input_fx[1], sts[1]->input, tmpppp, input_frame );
 #endif
 #else
         stereo_tdm_downmix( hCPE->hStereoTD, sts[0]->input, sts[1]->input, input_frame, tdm_ratio_idx, ( ( hCPE->hStereoTD->tdm_LRTD_flag == 0 ) ? tdm_SM_or_LRTD_Pri : 0 ), tdm_ratio_idx_SM );
diff --git a/lib_enc/ivas_decision_matrix_enc.c b/lib_enc/ivas_decision_matrix_enc.c
index b3ae38f10..3f9ab4f9b 100644
--- a/lib_enc/ivas_decision_matrix_enc.c
+++ b/lib_enc/ivas_decision_matrix_enc.c
@@ -498,10 +498,24 @@ void ivas_decision_matrix_enc_fx(
     IF( EQ_16( st->core, TCX_20_CORE ) && LT_32( st->total_brate, STEREO_TCX_MIN_RATE ) )
     {
         st->core = ACELP_CORE;
+
+#ifdef NONBE_FIX_1205_TD_STEREO_MOD_CT
+        test();
+        test();
+        test();
+        /* In TD stereo below 24.4 kbps we cannot overwrite the `coder_type` when it is set to TRANSITION, */
+        /* as it is used for TD stereo bit allocation. To ensure consistent bit allocation, it must remain unchanged on the decoder side. */
+        if ( st->idchan == 0 && !( LT_32( element_brate, IVAS_24k4 ) && EQ_16( st->coder_type, TRANSITION ) && EQ_16( st->element_mode, IVAS_CPE_TD ) ) )
+        {
+            st->coder_type = AUDIO;
+            move16();
+        }
+#else
         st->coder_type = AUDIO;
+        move16();
+#endif
         st->sp_aud_decision2 = 0;
 
-        move16();
         move16();
         move16();
 
@@ -844,6 +858,16 @@ void ivas_signaling_enc_fx(
 
                 IF( EQ_16( st->core, ACELP_CORE ) )
                 {
+#ifdef NONBE_FIX_1205_TD_STEREO_MOD_CT
+                    /* write coder type */
+                    push_indice( hBstr, IND_ACELP_SIGNALLING, st->coder_type, 3 );
+
+                    IF( GE_32( element_brate, FRMT_SHP_MIN_BRATE_IVAS ) )
+                    {
+                        /* write sharpening flag */
+                        push_indice( hBstr, IND_SHARP_FLAG, st->sharpFlag, 1 );
+                    }
+#else
                     IF( LT_32( element_brate, FRMT_SHP_MIN_BRATE_IVAS ) )
                     {
                         push_indice( hBstr, IND_ACELP_SIGNALLING, st->coder_type, 3 );
@@ -856,6 +880,7 @@ void ivas_signaling_enc_fx(
                         /* write sharpening flag */
                         push_indice( hBstr, IND_SHARP_FLAG, st->sharpFlag, 1 );
                     }
+#endif
 
                     /* write extension layer flag to distinguish between TBE (0) and BWE (1) */
                     IF( st->extl_brate > 0 )
diff --git a/lib_enc/ivas_stereo_classifier.c b/lib_enc/ivas_stereo_classifier.c
index b85e182f3..d78fd7433 100644
--- a/lib_enc/ivas_stereo_classifier.c
+++ b/lib_enc/ivas_stereo_classifier.c
@@ -1463,15 +1463,21 @@ void unclr_classifier_dft_fx(
 
 
     /* normalize score to -1:+1 */
-    /*if (score > UNCLR_SCORE_THR)
+    IF( BASOP_Util_Add_Mant32Exp( score, score_e, -UNCLR_SCORE_THR_Q28, 3, &i ) > 0 )
     {
-        score = UNCLR_SCORE_THR;
+        score = UNCLR_SCORE_THR_Q28;
+        move32();
+        score_e = 3;
+        move16();
     }
-    else if (score < -UNCLR_SCORE_THR)
+    ELSE IF( BASOP_Util_Add_Mant32Exp( score, score_e, UNCLR_SCORE_THR_Q28, 3, &i ) < 0 )
     {
-        score = -UNCLR_SCORE_THR;
+        score = -UNCLR_SCORE_THR_Q28;
+        move32();
+        score_e = 3;
+        move16();
     }
-    score /= 2 * UNCLR_SCORE_THR;*/
+
     score = L_shr_r_sat( score, sub( 3, score_e ) ); // Q31
 
     /* weight raw score with relative energy */
diff --git a/lib_enc/ivas_stereo_td_enc.c b/lib_enc/ivas_stereo_td_enc.c
index 5c0b6a6f7..79181844d 100644
--- a/lib_enc/ivas_stereo_td_enc.c
+++ b/lib_enc/ivas_stereo_td_enc.c
@@ -832,6 +832,32 @@ void tdm_configure_enc_fx(
 
     mod_ct = AUDIO;
     move16();
+#ifdef NONBE_FIX_1205_TD_STEREO_MOD_CT
+    IF( LT_32( hCPE->element_brate, IVAS_24k4 ) )
+    {
+        test();
+        test();
+        test();
+        test();
+        test();
+        test();
+        test();
+        test();
+        test();
+        /* In TD stereo, the TRANSITION mode has a specific bit allocation. All other formats share the same bit allocation. For these other formats, `mod_ct` is set to AUDIO to aid in debugging, though it does not have any functional impact. */
+        if ( !( sts[0]->localVAD == 0 && EQ_16( sts[0]->coder_type, TRANSITION ) ) &&
+             ( EQ_16( sts[0]->coder_type, TRANSITION ) ||
+               ( ( ( GE_16( sts[0]->last_L_frame, L_FRAME16k ) && sts[0]->flag_ACELP16k == 0 ) ||
+                   ( EQ_16( sts[0]->last_L_frame, L_FRAME ) && EQ_16( sts[0]->flag_ACELP16k, 1 ) ) ) &&
+                 ( sts[0]->last_core_brate != FRAME_NO_DATA ) &&
+                 NE_32( sts[0]->last_core_brate, SID_2k40 ) &&
+                 NE_16( sts[0]->coder_type_raw, VOICED ) ) ) )
+        {
+            mod_ct = TRANSITION;
+            move16();
+        }
+    }
+#else
     IF( LT_32( hCPE->element_brate, IVAS_24k4 ) )
     {
         mod_ct = sts[0]->coder_type;
@@ -857,6 +883,7 @@ void tdm_configure_enc_fx(
             move16();
         }
     }
+#endif
 
     /* Correction of tdm_inst_ratio_idx in case of TC in the seecondary channel */
     test();
@@ -1467,9 +1494,9 @@ static void tdm_downmix_fade_ivas_fx(
 
     FOR( i = start_index; i < end_index; i++ )
     {
-        FR_Y_fx[i] = add( mult( add( mult( Right_in_fx[i], extract_l( One_m_OldRatio_fx ) ), mult( Left_in_fx[i], extract_l( OldRatio_L_fx ) ) ), fade_out_fx ), mult( add( mult( Right_in_fx[i], extract_l( One_m_Ratio_fx ) ), mult( Left_in_fx[i], extract_l( ratio_L_fx ) ) ), fade_in_fx ) ); // Qx
-        LR_X_fx[i] = add( mult( sub( mult( Left_in_fx[i], extract_l( One_m_OldRatio_fx ) ), mult( Right_in_fx[i], extract_l( OldRatio_L_fx ) ) ), fade_out_fx ),
-                          mult( sub( mult( Left_in_fx[i], extract_l( One_m_Ratio_fx ) ), mult( Right_in_fx[i], extract_l( ratio_L_fx ) ) ),
+        FR_Y_fx[i] = add( mult( add( mult( Right_in_fx[i], extract_h( One_m_OldRatio_fx ) ), mult( Left_in_fx[i], extract_h( OldRatio_L_fx ) ) ), fade_out_fx ), mult( add( mult( Right_in_fx[i], extract_h( One_m_Ratio_fx ) ), mult( Left_in_fx[i], extract_h( ratio_L_fx ) ) ), fade_in_fx ) ); // Qx
+        LR_X_fx[i] = add( mult( sub( mult( Left_in_fx[i], extract_h( One_m_OldRatio_fx ) ), mult( Right_in_fx[i], extract_h( OldRatio_L_fx ) ) ), fade_out_fx ),
+                          mult( sub( mult( Left_in_fx[i], extract_h( One_m_Ratio_fx ) ), mult( Right_in_fx[i], extract_h( ratio_L_fx ) ) ),
                                 fade_in_fx ) ); // Qx
         move16();
         move16();
-- 
GitLab