From 588e13202013b76785ce4f5e7ab9e821bc8a6567 Mon Sep 17 00:00:00 2001
From: Sandesh Venkatesh <sandesh.venkatesh@ittiam.com>
Date: Wed, 19 Jun 2024 12:39:27 +0530
Subject: [PATCH 1/2] Fix for issue 771: Frequency peak for DTX Stereo at lower
 bitrates and issue 772: Noise Burst for DTX Stereo at lower bitrates with Fs
 16kHz

---
 lib_com/prot.h                |   5 +
 lib_com/tools.c               |  24 ++
 lib_dec/cng_dec_fx.c          |  10 -
 lib_dec/core_switching_dec.c  |   2 +-
 lib_dec/ivas_core_dec.c       |  41 ++-
 lib_dec/ivas_stereo_cng_dec.c | 470 +++++++++++++---------------------
 lib_dec/ivas_stereo_dft_dec.c |  10 +-
 7 files changed, 235 insertions(+), 327 deletions(-)

diff --git a/lib_com/prot.h b/lib_com/prot.h
index 817465058..ed69b9f6e 100644
--- a/lib_com/prot.h
+++ b/lib_com/prot.h
@@ -171,6 +171,11 @@ Word32 sum2_f_32_fx(
     const Word32 *vec,  /* i  : input vector                          */
     const int16_t lvec, /* i  : length of input vector                */
     Word16 gb );
+
+Word32 sum2_32_fx(
+    const Word32 *vec, /* i  : input vector                          */
+    const Word16 lvec, /* i  : length of input vector                */
+    Word16 *e );
 #endif
 
 void set_c(
diff --git a/lib_com/tools.c b/lib_com/tools.c
index 4bb03acc0..6a6ea3d42 100644
--- a/lib_com/tools.c
+++ b/lib_com/tools.c
@@ -233,6 +233,30 @@ Word32 sum2_f_32_fx(
 
     return tmp;
 }
+
+Word32 sum2_32_fx(
+    const Word32 *vec, /* i  : input vector                          */
+    const Word16 lvec, /* i  : length of input vector                */
+    Word16 *e )
+{
+    int16_t i;
+    Word32 tmp;
+
+    tmp = 0;
+    Word32 var_a = 0;
+    Word16 exp = 0, exp_tmp;
+    FOR( i = 0; i < lvec; i++ )
+    {
+        exp_tmp = norm_l( vec[i] );
+        var_a = L_shl( vec[i], exp_tmp );
+        var_a = Mpy_32_32( var_a, var_a );
+        exp_tmp = shl( sub( *e, exp_tmp ), 1 );
+        tmp = BASOP_Util_Add_Mant32Exp( tmp, exp, var_a, exp_tmp, &exp );
+    }
+    *e = exp;
+
+    return tmp;
+}
 #endif
 
 /*-------------------------------------------------------------------*
diff --git a/lib_dec/cng_dec_fx.c b/lib_dec/cng_dec_fx.c
index 1c5628c8c..15be52f70 100644
--- a/lib_dec/cng_dec_fx.c
+++ b/lib_dec/cng_dec_fx.c
@@ -684,16 +684,6 @@ void CNG_dec_fx(
         }
     }
 
-    /* Update the frame length memory */
-    st_fx->last_CNG_L_frame = st_fx->L_frame;
-    move16();
-
-    IF( NE_32( st_fx->core_brate, SID_1k75 ) )
-    {
-        hTdCngDec->num_ho = m;
-        move16();
-    }
-
     IF( st_fx->Opt_AMR_WB )
     {
         E_LPC_f_isp_a_conversion( st_fx->lspCNG_fx, Aq, M );
diff --git a/lib_dec/core_switching_dec.c b/lib_dec/core_switching_dec.c
index d2d4f6669..7e7bfbd6c 100644
--- a/lib_dec/core_switching_dec.c
+++ b/lib_dec/core_switching_dec.c
@@ -340,7 +340,7 @@ ivas_error core_switching_pre_dec_ivas_fx(
        within ACELP_CORE if switching from another bitarate to vbr, last_ppp and last_nelp is always updated in the previous frame */
     test();
     test();
-    IF( EQ_16( st->core, ACELP_CORE ) && ( NE_16( st->last_core, ACELP_CORE ) || NE_16( st->last_codec_mode, MODE2 ) ) )
+    IF( EQ_16( st->core, ACELP_CORE ) && ( NE_16( st->last_core, ACELP_CORE ) || EQ_16( st->last_codec_mode, MODE2 ) ) )
     {
         st->last_ppp_mode_dec = 0;
         st->last_nelp_mode_dec = 0;
diff --git a/lib_dec/ivas_core_dec.c b/lib_dec/ivas_core_dec.c
index 2f9af21ab..6ccf133f1 100644
--- a/lib_dec/ivas_core_dec.c
+++ b/lib_dec/ivas_core_dec.c
@@ -1194,11 +1194,14 @@ ivas_error ivas_core_dec(
 
             swb_CNG_dec_ivas_fx( st, synth_fxl, hb_synth_16_fx[n], sid_bw[n], negate( q ) );
 
-            Copy_Scale_sig_16_32( hb_synth_16_fx[n], hb_synth_32_fx[n], L_FRAME48k, add( Q11, q ) );
-            Copy_Scale_sig_16_32( synth_fxl, synth_32_fx[n], L_FRAME48k, add( Q11, q ) );
-            Scale_sig( st->hBWE_TD->state_lpc_syn_fx, LPC_SHB_ORDER, negate( sub( Q8, st->prev_Q_bwe_syn ) ) );
-            Scale_sig32( st->hBWE_TD->genSHBsynth_Hilbert_Mem_fx, HILBERT_MEM_SIZE, negate( sub( st->prev_Q_bwe_syn2, Q11 ) ) );
-            Copy_Scale_sig_16_32( st->hBWE_TD->genSHBsynth_state_lsyn_filt_shb_local_fx, st->hBWE_TD->genSHBsynth_state_lsyn_filt_shb_local_fx_32, imult1616( 2, ALLPASSSECTIONS_STEEP ), negate( sub( st->prev_Q_bwe_syn2, Q11 ) ) );
+            IF( EQ_32( st->core_brate, FRAME_NO_DATA ) || EQ_32( st->core_brate, SID_2k40 ) )
+            {
+                Copy_Scale_sig_16_32( hb_synth_16_fx[n], hb_synth_32_fx[n], L_FRAME48k, ( Q11 + q ) );
+            }
+            Copy_Scale_sig_16_32( synth_fxl, synth_32_fx[n], L_FRAME48k, ( Q11 + q ) );
+            Scale_sig( st->hBWE_TD->state_lpc_syn_fx, LPC_SHB_ORDER, -( Q8 - st->prev_Q_bwe_syn ) );
+            Scale_sig32( st->hBWE_TD->genSHBsynth_Hilbert_Mem_fx, HILBERT_MEM_SIZE, -( st->prev_Q_bwe_syn2 - Q11 ) );
+            Copy_Scale_sig_16_32( st->hBWE_TD->genSHBsynth_state_lsyn_filt_shb_local_fx, st->hBWE_TD->genSHBsynth_state_lsyn_filt_shb_local_fx_32, 2 * ALLPASSSECTIONS_STEEP, -( st->prev_Q_bwe_syn2 - Q11 ) );
         }
 
         /*-------------------------------------------------------------------*
@@ -1393,8 +1396,8 @@ ivas_error ivas_core_dec(
             test();
             IF( GE_16( output_frame, L_FRAME32k ) && GT_16( st->extl, SWB_CNG ) && EQ_16( st->core, ACELP_CORE ) && st->hTdCngDec != NULL )
             {
-                Word16 exp, fra;
-                Word16 gb;
+                Word16 exp;
+                Word32 fra;
                 SWITCH( output_frame )
                 {
                     case L_FRAME8k:
@@ -1417,14 +1420,24 @@ ivas_error ivas_core_dec(
 
                 L_tmp = L_deposit_l( 2 ); /*0.001 in Q11*/
 
-                gb = find_guarded_bits_fx( output_frame );
-                L_tmp = L_add( L_tmp, sum2_f_32_fx( hb_synth_32_fx[n], output_frame, sub( 11, gb ) ) );
+                Word16 exp2;
+                exp = 20;
+                move16();
+                Word32 L_tmp2 = sum2_32_fx( hb_synth_32_fx[n], output_frame, &exp );
+                L_tmp = BASOP_Util_Add_Mant32Exp( L_tmp, 20, L_tmp2, exp, &exp );
                 L_tmp = Mpy_32_16_1( L_tmp, tmp16 );
-                exp = norm_l( L_tmp );
-                fra = Log2_norm_lc( L_shl( L_tmp, exp ) );
-                exp = sub( sub( 30, shl( sub( Q11, 0 ), 1 ) ), exp );
-                L_tmp = Mpy_32_16( exp, fra, LG10 );
-                st->last_shb_ener_fx = round_fx_sat( L_shl_sat( L_tmp, 10 ) ); /*Q8*/
+                exp2 = norm_l( L_tmp );
+                exp = sub( exp, exp2 );
+                fra = BASOP_Util_Log2( L_shl_sat( L_tmp, exp2 ) );
+                exp2 = norm_l( exp );
+                L_tmp = L_shl( exp, exp2 );
+                exp = sub( Q31, exp2 );
+                L_tmp = BASOP_Util_Add_Mant32Exp( fra, 6, L_tmp, exp, &exp );
+                // exp = sub( sub( 30, shl( sub( Q11, 0 ), 1 ) ), exp );
+                L_tmp = Mpy_32_16_1( L_tmp, 24660 );
+                exp = add( exp, 2 );
+                st->last_shb_ener_fx = round_fx_sat( L_shl_sat( L_tmp, exp - 7 ) ); /*Q8*/
+                st->hTdCngDec->last_shb_ener_fx = L_shl_sat( L_tmp, exp - 20 );     /*Q11*/
             }
         }
 
diff --git a/lib_dec/ivas_stereo_cng_dec.c b/lib_dec/ivas_stereo_cng_dec.c
index 418d04fbf..ddb29199d 100644
--- a/lib_dec/ivas_stereo_cng_dec.c
+++ b/lib_dec/ivas_stereo_cng_dec.c
@@ -684,12 +684,11 @@ static void stereo_dft_generate_comfort_noise_fx(
     Word32 tmp, enr;
     Word16 q_enr;
     Word32 shb_shape[L_FRAME16k];
-    Word16 q_shb_shape[L_FRAME16k];
+    Word16 q_shb_shape;
     Word32 *ptr0, *ptr1, *ptr2;
-    Word16 *ptr_q_shb;
     Word16 dmpf[M + 2], Atmp[M + 2];
     Word32 cngNoiseLevel_upd[L_FRAME16k], cngNoiseLevel_hist[L_FRAME16k - 2];
-    Word16 q_cng_temp[L_FRAME16k];
+    Word16 e_cng_temp[L_FRAME16k];
     Word32 *ptr_tmp, *ptr_cng;
     Word32 E0, E1;
     Word16 b, q_cngNoiseLevel_upd, q_cngNoiseLevel;
@@ -699,32 +698,31 @@ static void stereo_dft_generate_comfort_noise_fx(
     Word16 scaleMS;
     Word16 scaleAvg;
     Word16 LR_ratio;
-    Word16 factor;
+    Word32 factor;
     Word16 alpha;
     Word32 ftmp;
     Word16 trigo_dec[STEREO_DFT32MS_N_16k / 2 + 1];
     const Word16 *pTrigo;
     Word16 trigo_step;
-    Word16 addl_guard_bits;
-    Word16 max_diff;
-    /*
-     * The additional guard bits data is added to tackle very small shb_cng_gain_fx_32.
-     * One additional guard bit is added for every 6dB post -12dB.
-     * -12dB in Q11 is (Word32)0xFFFFA000.
-     * The guard bits are calculated by converting the power of 10 exponent to power of 2 exponent.
-     */
-    addl_guard_bits = s_max( 1, shr( extract_l( L_shr( Mpy_32_16_1( L_sub( (Word32) 0xFFFFA000, st->hTdCngDec->shb_cng_gain_fx_32 ), (Word16) 0x2A85 ), Q11 ) ), 1 ) );
-
-    move16();
+    // Word16 addl_guard_bits;
+    // Word16 max_diff;
+    Word32 tmp32_1, tmp32_2;
+    Word16 q_div, q_sqrt1, q_sqrt2, q_sqrt, sqrt_res;
+    Word16 q_shift;
 
     hFdCngCom = st->hFdCngDec->hFdCngCom;
 
     push_wmops( "DFT_CNG" );
 
-    set_val_Word16( dmpf, 0, M + 2 );
-    set_val_Word16( Atmp, 0, M + 2 );
+    set16_fx( dmpf, 0, M + 2 );
+    set16_fx( Atmp, 0, M + 2 );
 
-    set_val_Word32( DFT[chan], 0, STEREO_DFT_BUF_MAX );
+    set_zero_fx( DFT[chan], STEREO_DFT_BUF_MAX );
+    set32_fx( cngNoiseLevel_upd, 0, L_FRAME16k );
+
+    q_shift = s_min( getScaleFactor32( hFdCngCom->cngNoiseLevel, FFTCLDFBLEN ), sub( hFdCngCom->cngNoiseLevelExp, 27 ) );
+    scale_sig32( hFdCngCom->cngNoiseLevel, FFTCLDFBLEN, q_shift );
+    hFdCngCom->cngNoiseLevelExp = sub( hFdCngCom->cngNoiseLevelExp, q_shift );
 
     enr = 0; /* Eliminates compiler warning. They are always set before they are used */
     move32();
@@ -736,25 +734,21 @@ static void stereo_dft_generate_comfort_noise_fx(
     move32();
     lp_noise = 0;
     move32();
-    q_cngNoiseLevel_upd = 0;
-    move16();
-    q_cngNoiseLevel = 0;
+    q_shb_shape = 0;
     move16();
 
     IF( EQ_16( chan, 0 ) )
     {
-        pSideGain = hStereoDft->side_gain_fx + STEREO_DFT_NBDIV * STEREO_DFT_BAND_MAX;
+        pSideGain = hStereoDft->side_gain_fx + i_mult( STEREO_DFT_NBDIV, STEREO_DFT_BAND_MAX );
         FOR( b = 0; b < hStereoDft->nbands; b++ )
         {
             IF( EQ_16( hStereoCng->xfade_frame_counter, 0 ) )
             {
                 hStereoDft->g_state_fx[b] = extract_h( *pSideGain++ );
-                move16();
             }
             ELSE
             {
                 hStereoDft->g_state_fx[b] = add( mult( ONE_MINUS_A_GFILT_FX, extract_h( *pSideGain++ ) ), mult( A_GFILT_FX, hStereoDft->g_state_fx[b] ) );
-                move16();
             }
 
             IF( hStereoCng->first_SID )
@@ -762,7 +756,6 @@ static void stereo_dft_generate_comfort_noise_fx(
                 IF( hStereoCng->first_SID_after_TD )
                 {
                     hStereoCng->cm_fx[b] = extract_h( Mpy_32_32( hStereoCng->c_LR_LT_fx, hStereoCng->c_LR_LT_fx ) );
-                    move16();
                 }
                 ELSE
                 {
@@ -775,41 +768,35 @@ static void stereo_dft_generate_comfort_noise_fx(
                 IF( GT_16( hStereoCng->nr_corr_frames, CORR_INIT ) )
                 {
                     hStereoCng->cm_fx[b] = extract_h( Mpy_32_32( hStereoCng->c_LR_LT_fx, hStereoCng->c_LR_LT_fx ) );
-                    move16();
                 }
                 ELSE
                 {
                     hStereoCng->cm_fx[b] = add( mult( ONE_MINUS_A_GFILT_FX, hStereoCng->coh_fx[b] ), mult( A_GFILT_FX, hStereoCng->cm_fx[b] ) );
-                    move16();
                 }
             }
             ELSE
             {
                 hStereoCng->cm_fx[b] = add( mult( ONE_MINUS_A_GFILT_FX, hStereoCng->coh_fx[b] ), mult( A_GFILT_FX, hStereoCng->cm_fx[b] ) );
-                move16();
             }
         }
 
         IF( hStereoCng->first_SID_after_TD )
         {
-            Word16 q_div, q_sqrt, q_sqrt2;
             scaleAvg = 0;
             move16();
             FOR( b = 0; b < hStereoDft->nbands; b++ )
             {
-                Word32 tmp_n, tmp_d, tmp_32, tmp_32_2;
-                Word16 sqrt_res;
-                IF( LT_16( hStereoCng->cm_fx[b], (Word32) ( 0x7333 ) ) )
+                IF( LT_16( hStereoCng->cm_fx[b], 0x7333 ) )
                 {
-                    gamma = hStereoCng->cm_fx[b];
+                    gamma = hStereoCng->cm_fx[b]; /* Q15 */
                     move16();
-                    gamma = BASOP_Util_Divide1616_Scale( gamma, sub( MAX_16, gamma ), &q_div );
+                    gamma = BASOP_Util_Divide1616_Scale( gamma, 32767 - gamma, &q_div ); /* e(q_div) */
                     q_sqrt2 = q_div + 16;
-                    tmp_32_2 = Sqrt32( gamma, &q_sqrt2 );
-                    tmp_32 = BASOP_Util_Add_Mant32Exp( gamma, 16 + q_div, sub( MAX_16, mult( hStereoDft->g_state_fx[b], hStereoDft->g_state_fx[b] ) ), 16, &q_sqrt );
-                    tmp_32 = Sqrt32( tmp_32, &q_sqrt );
-                    tmp_32 = BASOP_Util_Add_Mant32Exp( tmp_32, q_sqrt, L_negate( tmp_32_2 ), q_sqrt2, &q_sqrt );
-                    gamma = extract_h( L_shl( tmp_32, q_sqrt ) );
+                    tmp32_1 = BASOP_Util_Add_Mant32Exp( gamma, add( q_div, 16 ), sub( 32767, mult_r( hStereoDft->g_state_fx[b], hStereoDft->g_state_fx[b] ) ), 16, &q_sqrt1 ); /* e(q_sqrt1) */
+                    tmp32_1 = Sqrt32( tmp32_1, &q_sqrt1 );                                                                                                                     /* e(q_sqrt1) */
+                    tmp32_2 = Sqrt32( gamma, &q_sqrt2 );                                                                                                                       /* e(q_sqrt2) */
+                    tmp32_1 = BASOP_Util_Add_Mant32Exp( tmp32_1, q_sqrt1, L_negate( tmp32_2 ), q_sqrt2, &q_sqrt1 );                                                            /* e(q_sqrt1) */
+                    gamma = extract_h( L_shl( tmp32_1, q_sqrt1 ) );                                                                                                            /* Q15 */
                 }
                 ELSE
                 {
@@ -821,25 +808,25 @@ static void stereo_dft_generate_comfort_noise_fx(
                 c = BASOP_Util_Divide3232_Scale(
                     L_add( L_mult( add( ONE_IN_Q13, shr( hStereoDft->g_state_fx[b], 2 ) ),
                                    add( ONE_IN_Q13, shr( hStereoDft->g_state_fx[b], 2 ) ) ),
-                           L_shr( L_mult( gamma, gamma ), 2 ) ),
+                           L_shr( L_mult( gamma, gamma ), 4 ) ),
                     L_add( Mpy_32_32( sub( ONE_IN_Q13, shr( hStereoDft->g_state_fx[b], 2 ) ),
                                       sub( ONE_IN_Q13, shr( hStereoDft->g_state_fx[b], 2 ) ) ),
-                           L_shr( L_mult( gamma, gamma ), 2 ) ),
+                           L_shr( L_mult( gamma, gamma ), 4 ) ),
                     &q_div );
                 q_sqrt = q_div;
                 move16();
                 sqrt_res = Sqrt16( mult( c, hStereoCng->cm_fx[b] ), &q_sqrt );
                 // Add 1 to q_sqrt to account for multiplication with 2.0 in float computation.
                 q_sqrt = add( q_sqrt, 1 );
-                tmp_n = L_add( L_add( L_shl( 1, sub( Q15, q_div ) ), c ), L_shr( L_deposit_l( sqrt_res ), sub( q_div, q_sqrt ) ) );
+                tmp32_1 = L_add( L_add( L_shl( 1, sub( Q15, q_div ) ), c ), L_shr( L_deposit_l( sqrt_res ), sub( q_div, q_sqrt ) ) );
                 q_sqrt = q_div;
                 move16();
                 sqrt_res = Sqrt16( mult( c, hStereoCng->cm_fx[b] ), &q_sqrt );
-                tmp_d = L_shl( L_add( L_shl( L_deposit_l( mult( c, mult( LR_ratio, LR_ratio ) ) ), q_div ),
-                                      L_add( mult( mult( sub( MAX_16, LR_ratio ), sub( MAX_16, LR_ratio ) ), mult( targetGain, targetGain ) ),
-                                             L_shl( L_deposit_l( mult( mult( LR_ratio, sub( MAX_16, LR_ratio ) ), mult( targetGain, sqrt_res ) ) ), add( 1, q_sqrt ) ) ) ), // add(1, q_sqrt) to account for multiplication with 2 and also to maintain uniform q.
-                               2 );
-                scaleMS = BASOP_Util_Divide3232_Scale( tmp_n, tmp_d, &q_div );
+                tmp32_2 = L_shl( L_add( L_shl( L_deposit_l( mult( c, mult( LR_ratio, LR_ratio ) ) ), q_div ),
+                                        L_add( mult( mult( sub( MAX_16, LR_ratio ), sub( MAX_16, LR_ratio ) ), mult( targetGain, targetGain ) ),
+                                               L_shl( L_deposit_l( mult( mult( LR_ratio, sub( MAX_16, LR_ratio ) ), mult( targetGain, sqrt_res ) ) ), add( 1, q_sqrt ) ) ) ), // add(1, q_sqrt) to account for multiplication with 2 and also to maintain uniform q.
+                                 2 );
+                scaleMS = BASOP_Util_Divide3232_Scale( tmp32_1, tmp32_2, &q_div );
                 q_sqrt = q_div;
                 move16();
                 scaleMS = Sqrt16( scaleMS, &q_sqrt );
@@ -852,241 +839,159 @@ static void stereo_dft_generate_comfort_noise_fx(
 
     IF( EQ_16( st->cng_type, LP_CNG ) )
     {
-        Word16 q_sqrt, q_div, rshift_shb = 0, rshift_cng;
-        Word32 min_val;
-        move16();
-        set_val_Word32( cngNoiseLevel_upd, 0, st->L_frame );
+        set32_fx( cngNoiseLevel_upd, 0, st->L_frame );
 
         /* Deemphasis */
         dmpf[0] = MAX_16;
         move16();
         dmpf[1] = negate( st->preemph_fac );
         move16();
-        Copy( st->Aq_cng, Atmp, M + 1 );
+        mvs2s( st->Aq_cng, Atmp, M + 1 );
         conv_fx_32( Atmp, dmpf, cngNoiseLevel_upd, M + 2 );
 
         IF( EQ_16( st->L_frame, L_FRAME ) )
         {
             pTrigo = hStereoDft->dft_trigo_12k8_fx;
             trigo_step = STEREO_DFT_TRIGO_SRATE_12k8_STEP * STEREO_DFT_TRIGO_DEC_STEP;
-            move16();
         }
         ELSE
         {
             pTrigo = hStereoDft->dft_trigo_16k_fx;
             trigo_step = STEREO_DFT_TRIGO_SRATE_16k_STEP * STEREO_DFT_TRIGO_DEC_STEP;
-            move16();
         }
 
-        FOR( i = 0; i < shr( st->L_frame, 2 ); i++ )
+        FOR( i = 0; i < st->L_frame / 4; i++ )
         {
-            trigo_dec[i] = pTrigo[i * trigo_step];
-            move16();
-            trigo_dec[sub( shr( st->L_frame, 1 ), i )] = pTrigo[i * trigo_step];
-            move16();
+            trigo_dec[i] = pTrigo[i_mult( i, trigo_step )];
+            trigo_dec[sub( shr( st->L_frame, 1 ), i )] = pTrigo[i_mult( i, trigo_step )];
         }
-        trigo_dec[shr( st->L_frame, 2 )] = pTrigo[shr( st->L_frame, 2 ) * trigo_step];
-        move16();
+        trigo_dec[shr( st->L_frame, 2 )] = pTrigo[i_mult( shr( st->L_frame, 2 ), trigo_step )];
 
-        rshift_cng = getScaleFactor32( cngNoiseLevel_upd, st->L_frame );
-        rshift_cng = sub( find_guarded_bits_fx( st->L_frame ), rshift_cng );
-        /*
-         * The guardbits calculated here are not sufficient for all cases.
-         * Due to the gain values like shb_cng_gain and other things in the ensuing code blocks,
-         * additional guard bits have to be added. The choice based on existing test vector set is Q2
-         */
-        rshift_cng = add( rshift_cng, addl_guard_bits );
+        Word16 rshift_cng = sub( find_guarded_bits_fx( st->L_frame ), getScaleFactor32( cngNoiseLevel_upd, st->L_frame ) );
         v_shr_32( cngNoiseLevel_upd, cngNoiseLevel_upd, st->L_frame, rshift_cng );
-        // Input Q to fft will be Q30 - rshift.
+        q_cngNoiseLevel_upd = sub( Q30, rshift_cng );
+
         rfft_fx( cngNoiseLevel_upd, trigo_dec, st->L_frame, -1 );
-        // v_shr_32(cngNoiseLevel_upd, cngNoiseLevel_upd, st->L_frame, negate(rshift));
+
+        Word16 shift = sub( getScaleFactor32( cngNoiseLevel_upd, st->L_frame ), 1 );
+        scale_sig32( cngNoiseLevel_upd, st->L_frame, shift );
+        q_cngNoiseLevel_upd = add( q_cngNoiseLevel_upd, shift );
+        set16_fx( e_cng_temp, q_cngNoiseLevel_upd, st->L_frame );
 
         /* Compute 1/|A| */
         ptr0 = cngNoiseLevel_upd;
         ptr1 = ptr0 + 2;
         ptr2 = ptr1 + 1;
-        assert( st->lp_ener_fx > 0 );
+        assert( GT_32( st->lp_ener_fx, 0 ) );
+
         // lp_ener_fx will be in Q6 at this point.
         // So applying appropriate left shift on the denominator.
-        factor = shr( BASOP_Util_Divide3232_Scale( st->lp_ener_fx, L_shl( st->L_frame, Q6 ), &q_div ), 1 ); /* fixed factor  in the loop below */
-        factor = Sqrt16( factor, &q_div );
-        // there is multiplication with 2.0 that has to be applied on the factor and
-        // a left shift of q_div as shown in the below two steps.
-        // factor = shl(factor, add(q_div, 1));
-        // q_div = 0; move16();
-        // The left shift of q_div and 1 (because of multiplication factor 2.0) is handled below.
-        // by adjusting q_inv_sqrt.
-        minimum_abs32_fx( ptr0, st->L_frame, &min_val );
-        // q_inv_sqrt = sub( sub( Q31, norm_l( L_abs( min_val ) ) ), add( q_div, 1 ) );
-
-        /* cngNoiseLevel_upd buffer has Q of ( Q30 - rshift_cng )*/
-        FOR( i = 0; i < shr( st->L_frame, 1 ) - 1; i++ )
-        {
-            // if ((*ptr1 == 0) && (*ptr2 == 0))
-            //     assert(0);
-            // assert( (*ptr1 != 0) || (*ptr2 != 0));
-            IF( norm_l( *ptr1 ) > rshift_cng && norm_l( *ptr2 ) > rshift_cng )
-            {
-                ftmp = Madd_32_32( Mpy_32_32( L_shl( *ptr1, rshift_cng ), L_shl( *ptr1, rshift_cng ) ), L_shl( *ptr2, rshift_cng ), L_shl( *ptr2, rshift_cng ) );
-                q_sqrt = Q2;
-            }
-            ELSE
-            {
-                ftmp = Madd_32_32( Mpy_32_32( *ptr1, *ptr1 ), *ptr2, *ptr2 );
-                q_sqrt = sub( Q31, sub( shl( sub( Q30, rshift_cng ), 1 ), Q31 ) );
-            }
+        factor = L_shr( BASOP_Util_Divide3232_Scale_cadence( st->lp_ener_fx, L_shl( st->L_frame, Q6 ), &q_div ), 1 ); /* fixed factor  in the loop below */
+        factor = Sqrt32( factor, &q_div );
+        q_div = add( q_div, 1 );
+
+        Word16 q_tmp;
+        FOR( i = 0; i < sub( shr( st->L_frame, 1 ), 1 ); i++ )
+        {
+            ftmp = Madd_32_32( Mpy_32_32( *ptr1, *ptr1 ), *ptr2, *ptr2 );
+            assert( GT_32( ftmp, 0 ) );
+            q_sqrt = sub( 62, shl( q_cngNoiseLevel_upd, 1 ) );
             IF( EQ_32( ftmp, 0 ) )
             {
-                ftmp = EPSILON_FX;
-                move32();
+                ftmp = EPSILLON_FX;
                 q_sqrt = s_max( 0, q_sqrt );
             }
             tmp = ISqrt32( ftmp, &q_sqrt );
-            tmp = Mpy_32_16_1( tmp, factor );
-            q_cng_temp[i] = sub( add( sub( Q31, q_sqrt ), sub( Q15, add( q_div, 1 ) ) ), Q15 );
-            *ptr0++ = tmp;
-            move32();
+            e_cng_temp[i] = sub( 31, add( q_sqrt, q_div ) );
+            *ptr0++ = Mpy_32_32( factor, tmp );
             ptr1 += 2;
             ptr2 += 2;
         }
 
-        /* The initial values in the cngNoiseLevel_upd buffer, from index 0 to st->L_frame / 2 - 1, are updated in the for loop above,
-           with their Q factors stored in q_cng_temp. The values stored after index st->L_frame / 2 - 1 have a Q factor of (Q30 - rshift_cng).
-
-           Calculate the maximum difference between (Q30 - rshift_cng) and the headroom available for the values stored in cngNoiseLevel_upd
-           for the initial st->L_frame / 2 - 1 value that were updated in the above for loop.
-             - If the maximum difference is negative, the cngNoiseLevel_upd buffer has enough headroom to accommodate all the values
-               with a Q factor of (Q30 - rshift_cng).
-
-             - If the maximum difference is positive, some values in the cngNoiseLevel_upd buffer may overflow with a Q factor of
-               (Q30 - rshift_cng). In this case, scale the cngNoiseLevel_upd buffer to a Q factor of (Q30 - rshift_cng - max_diff)
-               to prevent overflows. */
-
-        max_diff = sub( sub( sub( Q30, rshift_cng ), q_cng_temp[0] ), norm_l( cngNoiseLevel_upd[0] ) );
-        FOR( i = 1; i < shr( st->L_frame, 1 ) - 1; i++ )
+        FOR( i = 0; i < sub( shr( st->L_frame, 1 ), 1 ); i++ )
         {
-            max_diff = s_max( max_diff, sub( sub( sub( Q30, rshift_cng ), q_cng_temp[i] ), norm_l( cngNoiseLevel_upd[i] ) ) );
+            q_cngNoiseLevel_upd = s_min( q_cngNoiseLevel_upd, e_cng_temp[i] + norm_l( cngNoiseLevel_upd[i] ) );
         }
-        IF( LT_16( max_diff, 0 ) )
+        FOR( i = 0; i < st->L_frame; i++ )
         {
-            FOR( i = 0; i < shr( st->L_frame, 1 ) - 1; i++ )
-            {
-                cngNoiseLevel_upd[i] = L_shl( cngNoiseLevel_upd[i], sub( sub( Q30, rshift_cng ), q_cng_temp[i] ) );
-            }
-            q_cngNoiseLevel_upd = sub( Q30, rshift_cng );
-        }
-        ELSE
-        {
-            FOR( i = 0; i < shr( st->L_frame, 1 ) - 1; i++ )
-            {
-                cngNoiseLevel_upd[i] = L_shl( cngNoiseLevel_upd[i], sub( sub( sub( Q30, rshift_cng ), q_cng_temp[i] ), max_diff ) );
-            }
-            scale_sig32( cngNoiseLevel_upd + shr( st->L_frame, 1 ) - 1, shr( st->L_frame, 1 ) + 1, -max_diff );
-            q_cngNoiseLevel_upd = sub( sub( Q30, rshift_cng ), max_diff );
+            cngNoiseLevel_upd[i] = L_shl( cngNoiseLevel_upd[i], sub( q_cngNoiseLevel_upd, e_cng_temp[i] ) );
         }
 
         IF( GT_16( sub( s_min( output_frame, L_FRAME32k ), hFdCngCom->stopFFTbin ), 0 ) )
         {
             /* Transform shb LP spectrum */
-            set_val_Word32( shb_shape, 0, L_FRAME16k );
-            set_val_Word16( q_shb_shape, 0, L_FRAME16k );
-            Copy_Scale_sig_16_32( st->hTdCngDec->shb_lpcCNG_fx, shb_shape, LPC_SHB_ORDER + 1, Q15 );
+            set32_fx( shb_shape, 0, L_FRAME16k );
+            Copy_Scale_sig_16_32( st->hTdCngDec->shb_lpcCNG_fx, shb_shape, LPC_SHB_ORDER + 1, Q15 ); // Q30
 
             IF( NE_16( st->L_frame, L_FRAME16k ) )
             {
                 pTrigo = hStereoDft->dft_trigo_16k_fx;
-                trigo_step = STEREO_DFT_TRIGO_SRATE_16k_STEP * STEREO_DFT_TRIGO_DEC_STEP;
-                move16();
+                trigo_step = i_mult( STEREO_DFT_TRIGO_SRATE_16k_STEP, STEREO_DFT_TRIGO_DEC_STEP );
+
                 FOR( i = 0; i < L_FRAME16k / 4; i++ )
                 {
-                    trigo_dec[i] = pTrigo[i * trigo_step];
-                    move16();
-                    trigo_dec[L_FRAME16k / 2 - i] = pTrigo[i * trigo_step];
-                    move16();
+                    trigo_dec[i] = pTrigo[i_mult( i, trigo_step )];
+                    trigo_dec[sub( shr( L_FRAME16k, 1 ), i )] = pTrigo[i_mult( i, trigo_step )];
                 }
-                trigo_dec[L_FRAME16k / 4] = pTrigo[L_FRAME16k / 4 * trigo_step];
-                move16();
+                trigo_dec[shr( L_FRAME16k, 2 )] = pTrigo[i_mult( shr( L_FRAME16k, 2 ), trigo_step )];
             }
 
-            rshift_shb = getScaleFactor32( shb_shape, L_FRAME16k );
-            rshift_shb = sub( find_guarded_bits_fx( L_FRAME16k ), rshift_shb );
-            /*
-             * The guardbits calculated here are not sufficient for all cases.
-             * Due to the gain values like shb_cng_gain and other things in the ensuing code blocks,
-             * additional guard bits have to be added. The choice based on existing test vector set is Q2
-             */
-            rshift_shb = add( rshift_shb, addl_guard_bits );
+            Word16 rshift_shb = sub( find_guarded_bits_fx( L_FRAME16k ), getScaleFactor32( shb_shape, L_FRAME16k ) );
             v_shr_32( shb_shape, shb_shape, L_FRAME16k, rshift_shb );
+            q_shb_shape = sub( Q30, rshift_shb );
+
             rfft_fx( shb_shape, trigo_dec, L_FRAME16k, -1 );
-            // v_shr_32(shb_shape, shb_shape, L_FRAME16k, negate(rshift));
+
+            shift = sub( getScaleFactor32( shb_shape, L_FRAME16k ), 1 );
+            scale_sig32( shb_shape, L_FRAME16k, shift );
+            q_shb_shape = add( q_shb_shape, shift );
 
             /* Compute 1/|A| */
-            enr = Madd_32_32( Mpy_32_32( L_shl( shb_shape[0], addl_guard_bits ), L_shl( shb_shape[0], addl_guard_bits ) ),
-                              L_shl( shb_shape[1], addl_guard_bits ), L_shl( shb_shape[1], addl_guard_bits ) );
-            q_enr = add( sub( shl( sub( Q30, rshift_shb ), 1 ), Q31 ), shl( addl_guard_bits, 1 ) );
-            IF( EQ_32( enr, 0 ) )
-            {
-                enr = EPSILON_FX;
-                move32();
-                q_enr = s_max( 0, q_enr );
-            }
+            enr = Madd_32_32( Mpy_32_32( shb_shape[0], shb_shape[0] ), shb_shape[1], shb_shape[1] );
+            q_enr = sub( 62, shl( q_shb_shape, 1 ) );
             ptr0 = shb_shape;
             ptr1 = ptr0 + 2;
             ptr2 = ptr1 + 1;
 
             FOR( i = 0; i < L_FRAME16k / 2 - 1; i++ )
             {
-                Word16 q_shift = sub( shl( sub( Q30, rshift_shb ), 1 ), Q31 );
-                // assert((*ptr1 != 0) || (*ptr2 != 0));
-                ftmp = Madd_32_32( Mpy_32_32( L_shl( *ptr1, addl_guard_bits ), L_shl( *ptr1, addl_guard_bits ) ),
-                                   L_shl( *ptr2, addl_guard_bits ), L_shl( *ptr2, addl_guard_bits ) );
-                q_shift = add( q_shift, shl( addl_guard_bits, 1 ) );
-                IF( EQ_32( ftmp, 0 ) )
-                {
-                    ftmp = EPSILON_FX;
-                    move32();
-                    q_shift = s_max( 0, q_shift );
-                }
-                ftmp = L_deposit_l( BASOP_Util_Divide3232_Scale( L_sub( L_shl( Q1, q_shift ), 1 ), ftmp, &q_div ) );
-                ftmp = L_shl( ftmp, sub( q_div, sub( Q15, q_shift ) ) );
+                ftmp = Madd_32_32( Mpy_32_32( *ptr1, *ptr1 ), *ptr2, *ptr2 );
+                q_tmp = sub( shl( q_shb_shape, 1 ), 31 );
+                assert( GT_32( ftmp, 0 ) );
+                q_div = 0;
+                ftmp = BASOP_Util_Divide3232_Scale_cadence( L_shl( 1, q_tmp ), ftmp, &q_div );
                 /* in float:
-                     both a = "div"=(1/(x^2+y^2) and sqrt(a)  is used and summed up in the same loop.
-
-                  in BASOP:
-                    sum up  using inv_sqrt( *ptr1 * *ptr1 + *ptr2 * *ptr2 ), in this loop
-                    and then sum up  enr = sum( *ptr0 * *ptr0 ),  in a subsequent MAC loop  */
-                enr = L_add( enr, ftmp );
-                q_div = sub( Q31, q_shift );
-                ftmp = Sqrt32( ftmp, &q_div );
-                // Reduce the Q of shb_shape back to its original Q i.e., Q30 - rshift
-                ftmp = L_shr( ftmp, sub( sub( Q31, q_div ), sub( Q30, rshift_shb ) ) );
-                *ptr0++ = ftmp;
-                move32();
+                both a = "div"=(1/(x^2+y^2) and sqrt(a)  is used and summed up in the same loop.
+
+                in BASOP:
+                sum up  using inv_sqrt( *ptr1 * *ptr1 + *ptr2 * *ptr2 ), in this loop
+                and then sum up  enr = sum( *ptr0 * *ptr0 ),  in a subsequent MAC loop  */
+                enr = BASOP_Util_Add_Mant32Exp( enr, q_enr, ftmp, q_div, &q_enr );
+                tmp32_1 = Sqrt32( ftmp, &q_div );
+                *ptr0++ = L_shl( tmp32_1, sub( q_div, sub( 31, q_shb_shape ) ) );
                 ptr1 += 2;
                 ptr2 += 2;
             }
         }
 
         /* Update CNG noise level from MS noise estimation */
-        Copy32( st->hFdCngDec->bandNoiseShape, cngNoiseLevel_hist, sub( hFdCngCom->stopFFTbin, hFdCngCom->startBand ) );
+        mvl2l( st->hFdCngDec->bandNoiseShape, cngNoiseLevel_hist, sub( hFdCngCom->stopFFTbin, hFdCngCom->startBand ) );
         ptr_cng = cngNoiseLevel_hist;
-        FOR( i = 0; i < shr( sub( st->last_L_frame, hFdCngCom->startBand ), 1 ); i++ )
+        FOR( i = 0; i < sub( st->last_L_frame, hFdCngCom->startBand ) / 2; i++ )
         {
-            tmp = *( cngNoiseLevel_hist + i * 2 );
-            move32();
-            tmp = L_add( tmp, *( cngNoiseLevel_hist + i * 2 + 1 ) );
+            tmp = *( cngNoiseLevel_hist + shl( i, 1 ) );
+            tmp = L_add( tmp, *( cngNoiseLevel_hist + add( shl( i, 1 ), 1 ) ) );
             *ptr_cng++ = L_shr( tmp, 1 );
-            move32();
         }
 
-        IF( last_element_mode == IVAS_CPE_TD && chan == 0 && hStereoCng->xfade_frame_counter == 0 && !( hFdCngCom->msFrCnt_init_counter < hFdCngCom->msFrCnt_init_thresh ) )
+        IF( EQ_16( last_element_mode, IVAS_CPE_TD ) && EQ_16( chan, 0 ) && EQ_16( hStereoCng->xfade_frame_counter, 0 ) && GE_16( hFdCngCom->msFrCnt_init_counter, hFdCngCom->msFrCnt_init_thresh ) )
         {
             ptr_cng = cngNoiseLevel_hist + hFdCngCom->startBand;
             ptr_tmp = cngNoiseLevel_upd + hFdCngCom->startBand;
             FOR( i = 0; i < shr( sub( st->last_L_frame, hFdCngCom->startBand ), 1 ); i++ )
             {
                 E0 = L_add( E0, *ptr_cng++ );
-                E1 = Madd_32_32( E1, ( *ptr_tmp ), ( *ptr_tmp ) );
+                E1 = L_add( E1, Mpy_32_32( ( *ptr_tmp ), ( *ptr_tmp ) ) );
                 ptr_tmp++;
             }
 
@@ -1124,55 +1029,41 @@ static void stereo_dft_generate_comfort_noise_fx(
 
         FOR( k = 0; k < STEREO_DFT_NBDIV; k++ )
         {
-            Word16 shift_val = sub( Q31, q_cngNoiseLevel_upd );
             /* low band */
-            ptr_level = cngNoiseLevel_upd;
-            ptr_r = DFT[chan] + hFdCngCom->startBand + k * STEREO_DFT32MS_N_MAX;
+            ptr_level = cngNoiseLevel_upd;                                                      // Q(q_cngNoiseLevel_upd)
+            ptr_r = DFT[chan] + add( hFdCngCom->startBand, i_mult( k, STEREO_DFT32MS_N_MAX ) ); // Q(q_dft)
             ptr_i = ptr_r + 1;
             scale = shr( output_frame, 1 );
 
-            FOR( i = 0; i < shr( sub( hFdCngCom->stopFFTbin, hFdCngCom->startBand ), 1 ); i++ )
+            FOR( i = 0; i < ( hFdCngCom->stopFFTbin - hFdCngCom->startBand ) / 2; i++ )
             {
                 /* Real part in FFT bins */
                 rand_gauss_fx( ptr_r, &st->hTdCngDec->cng_seed, q_dft );
-                // the Q factor of ptr_level is q_cngNoiseLevel_upd
-                // To ensure the result of the multiplication is with optimal precision
-                // apply left shift on the input data and use it for multiplication
-                // result of multiplication will be in same Q as ptr_r buffer
-                tmp = imult3216( ( *ptr_r ), scale );
-                IF( GE_16( norm_l( tmp ), shift_val ) )
-                {
-                    ( *ptr_r ) = Mpy_32_32( L_shl( tmp, shift_val ), *ptr_level );
-                    move32();
-                }
-                ELSE
+                tmp32_1 = Mpy_32_16_1( *ptr_level, scale ); // Q(q_cngNoiseLevel_upd - 15)
+                q_shift = norm_l( tmp32_1 );
+                IF( GT_16( q_shift, sub( 46, q_cngNoiseLevel_upd ) ) )
                 {
-                    ( *ptr_r ) = L_shl( Mpy_32_32( tmp, *ptr_level ), shift_val );
-                    move32();
+                    q_shift = sub( 46, q_cngNoiseLevel_upd );
                 }
+                tmp32_1 = L_shl( tmp32_1, q_shift );
+                q_shift = sub( sub( 46, q_cngNoiseLevel_upd ), q_shift );
+
+                ( *ptr_r ) = L_shl( Mpy_32_32( ( *ptr_r ), tmp32_1 ), q_shift );
                 ptr_r += 2;
+
                 /* Imaginary part in FFT bins */
                 rand_gauss_fx( ptr_i, &st->hTdCngDec->cng_seed, q_dft );
-                tmp = imult3216( ( *ptr_i ), scale );
-                move32();
-                IF( GE_16( norm_l( tmp ), shift_val ) )
-                {
-                    ( *ptr_i ) = Mpy_32_32( L_shl( tmp, shift_val ), *ptr_level );
-                }
-                ELSE
-                {
-                    ( *ptr_i ) = L_shl( Mpy_32_32( tmp, *ptr_level ), shift_val );
-                }
+                ( *ptr_i ) = L_shl( Mpy_32_32( ( *ptr_i ), tmp32_1 ), q_shift );
                 ptr_i += 2;
                 ptr_level++;
             }
 
             IF( GT_16( sub( s_min( output_frame, L_FRAME32k ), hFdCngCom->stopFFTbin ), 0 ) )
             {
-                Word16 q_res = 0;
                 Word32 scale_32;
+                Word16 q_res;
                 /* high band generation, flipped spectrum */
-                assert( enr != 0 );
+                assert( NE_32( enr, 0 ) );
                 // 10 ^ (0.1 * st->hTdCngDec->shb_cng_gain)
                 // Above expression equivalent to 2 ^ (3.321928094 * 0.1 * st->hTdCngDec->shb_cng_gain)
                 // 3.321928094 * 0.1 = 0.3321928094
@@ -1184,63 +1075,71 @@ static void stereo_dft_generate_comfort_noise_fx(
                 move16();
                 scale = BASOP_Util_Divide3232_Scale( scale_32, enr, &q_div );
                 // q_res = add(q_div, sub( q_enr, sub(Q31, q_res))); move16();
-                q_res = sub( Q15, add( sub( Q15, q_div ), sub( sub( Q31, q_res ), q_enr ) ) );
-                move16();
+                q_res = q_res - q_enr + q_div;
                 // q_div = sub(Q15, q_res); move16();
                 q_div = q_res;
                 inv_scale = ISqrt16( scale, &q_res );
                 scale = Sqrt16( scale, &q_div );
-                ptr_shb = shb_shape + L_FRAME16k / 2 - 1;
-                ptr_q_shb = q_shb_shape + L_FRAME16k / 2 - 1;
+                ptr_shb = shb_shape + sub( shr( L_FRAME16k, 1 ), 1 );
                 /* Averaging for Nyquist frequency */
-                tmp = Mpy_32_16_1( cngNoiseLevel_upd[sub( shr( sub( hFdCngCom->stopFFTbin, hFdCngCom->startBand ), 1 ), 1 )], inv_scale );
-                // q of cngNoiseLevel_upd is Q16.
-                // ptr_shb will be in Q30 - rshift. tmp is in Q16(Q15 + 1 (for the 0.5 in multiplication)) - q_res, Applying appropriate shift on tmp
-                q_res = sub( sub( Q30, rshift_cng ), sub( Q16, q_res ) );
-                IF( LT_16( q_res, norm_l( tmp ) ) )
+                Word16 q1, q2;
+                tmp32_1 = Mpy_32_16_1( cngNoiseLevel_upd[sub( shr( sub( hFdCngCom->stopFFTbin, hFdCngCom->startBand ), 1 ), 1 )], inv_scale );
+                tmp32_2 = *( ptr_shb - 1 );
+                q1 = sub( add( q_cngNoiseLevel_upd, 1 ), q_res );
+                q2 = q_shb_shape;
+                *ptr_shb = BASOP_Util_Add_Mant32Exp( tmp32_1, sub( Q31, q1 ), tmp32_2, sub( Q31, q2 ), &q_tmp );
+                q_tmp = sub( q_tmp, 1 );
+                IF( LT_16( sub( Q31, q_tmp ), q_shb_shape ) )
                 {
-                    tmp = L_shl( tmp, q_res );
-                    q_res = 0;
-                    move16();
+                    scale_sig32( shb_shape, L_FRAME16k, sub( sub( Q31, q_tmp ), q_shb_shape ) );
+                    q_shb_shape = sub( Q31, q_tmp );
                 }
-                *ptr_q_shb = q_res;
-                move16();
 
-                // Bring both ptr_shb and ptr_shb - 1 to the same Q.
-                *ptr_shb = L_shr( L_add( tmp, L_shr( *( ptr_shb - 1 ), sub( q_res, *( ptr_q_shb - 1 ) ) ) ), 1 );
-                move32();
-                ptr_r = DFT[chan] + hFdCngCom->stopFFTbin + k * STEREO_DFT32MS_N_MAX;
+                *ptr_shb = L_shl( *ptr_shb, sub( q_shb_shape, sub( Q31, q_tmp ) ) );
+
+                // *ptr_shb = 0.5f * ( cngNoiseLevel_upd[( hFdCngCom->stopFFTbin - hFdCngCom->startBand ) / 2 - 1] / scale + *( ptr_shb - 1 ) );
+
+                ptr_r = DFT[chan] + hFdCngCom->stopFFTbin + i_mult( k, STEREO_DFT32MS_N_MAX );
                 ptr_i = ptr_r + 1;
 
-                FOR( i = 0; i < ( min( output_frame, hFdCngCom->regularStopBand * 16 ) - hFdCngCom->stopFFTbin ) / 2; i++ )
+                FOR( i = 0; i < shr( sub( s_min( output_frame, i_mult( hFdCngCom->regularStopBand, 16 ) ), hFdCngCom->stopFFTbin ), 1 ); i++ )
                 {
                     /* Real part in FFT bins */
                     rand_gauss_fx( ptr_r, &st->hTdCngDec->cng_seed, q_dft );
-                    // ptr_shb will be in Q30 - rshift at this point. So apply left shift by 1 to compensate Mpy_32_32 right shift..
-                    ( *ptr_r ) = W_extract_l( W_shl( W_mult0_32_32( *ptr_r, *ptr_shb ), sub( add( rshift_shb, add( 1, *ptr_q_shb ) ), 31 ) ) );
-                    move32();
+                    tmp32_1 = *ptr_shb;
+                    q_shift = norm_l( tmp32_1 );
+                    IF( GT_16( q_shift, sub( 31, q_shb_shape ) ) )
+                    {
+                        q_shift = sub( 31, q_shb_shape );
+                    }
+                    tmp32_1 = L_shl( tmp32_1, q_shift );
+                    q_shift = sub( sub( 31, q_shb_shape ), q_shift );
+
+                    ( *ptr_r ) = L_shl( Mpy_32_32( ( *ptr_r ), tmp32_1 ), q_shift );
                     ptr_r += 2;
                     /* Imaginary part in FFT bins */
                     rand_gauss_fx( ptr_i, &st->hTdCngDec->cng_seed, q_dft );
-                    ( *ptr_i ) = W_extract_l( W_shl( W_mult0_32_32( *ptr_i, *ptr_shb ), sub( add( rshift_shb, add( 1, *ptr_q_shb ) ), 31 ) ) );
-                    move32();
+                    ( *ptr_i ) = L_shl( Mpy_32_32( ( *ptr_i ), tmp32_1 ), q_shift );
                     ptr_i += 2;
                     ptr_shb--;
                 }
 
                 /* rescale */
-                // scale = L_shr(imult3216(scale, output_frame) , 1);
-                // multiplication with shr(output_frame, 1) is carried out below.
-                ptr_r = DFT[chan] + hFdCngCom->stopFFTbin + k * STEREO_DFT32MS_N_MAX;
+                tmp32_1 = L_mult0( scale, shr( output_frame, 1 ) );
+                q_shift = norm_l( tmp32_1 );
+                IF( GT_16( q_shift, add( Q16, q_div ) ) )
+                {
+                    q_shift = add( Q16, q_div );
+                }
+                tmp32_1 = L_shl( tmp32_1, q_shift );
+                q_shift = sub( add( Q16, q_div ), q_shift );
+
+                ptr_r = DFT[chan] + add( hFdCngCom->stopFFTbin, i_mult( k, STEREO_DFT32MS_N_MAX ) );
                 ptr_i = ptr_r + 1;
-                FOR( i = 0; i < shr( sub( s_min( output_frame, shl( hFdCngCom->regularStopBand, 4 ) ), hFdCngCom->stopFFTbin ), 1 ); i++ )
+                FOR( i = 0; i < ( min( output_frame, hFdCngCom->regularStopBand * 16 ) - hFdCngCom->stopFFTbin ) / 2; i++ )
                 {
-                    ( *ptr_r ) = W_extract_l( W_shl( W_mult0_32_32( Mpy_32_16_1( *ptr_r, scale ), shr( output_frame, 1 ) ), q_div ) );
-                    move32();
-                    move32();
-                    ( *ptr_i ) = W_extract_l( W_shl( W_mult0_32_32( Mpy_32_16_1( *ptr_i, scale ), shr( output_frame, 1 ) ), q_div ) );
-                    move32();
-                    move32();
+                    ( *ptr_r ) = L_shl( Mpy_32_32( ( *ptr_r ), tmp32_1 ), q_shift );
+                    ( *ptr_i ) = L_shl( Mpy_32_32( ( *ptr_i ), tmp32_1 ), q_shift );
                     ptr_r += 2;
                     ptr_i += 2;
                 }
@@ -1250,29 +1149,21 @@ static void stereo_dft_generate_comfort_noise_fx(
         /* Expand cngNoiseLevel_flt from 0-159 to 0-318, compute noise level */
         lp_noise = 0;
         move32();
-        ptr_level = hFdCngCom->cngNoiseLevel + sub( sub( hFdCngCom->stopFFTbin, hFdCngCom->startBand ), 1 );
+        ptr_level = hFdCngCom->cngNoiseLevel + sub( hFdCngCom->stopFFTbin, add( hFdCngCom->startBand, 1 ) );
         ptr_tmp = cngNoiseLevel_upd + sub( shr( sub( hFdCngCom->stopFFTbin, hFdCngCom->startBand ), 1 ), 1 );
-        rshift_cng = sub( sub( 31, hFdCngCom->cngNoiseLevelExp ), sub( shl( q_cngNoiseLevel_upd, 1 ), Q31 ) );
+
+        q_tmp = s_max( hFdCngCom->cngNoiseLevelExp, shl( sub( Q31, q_cngNoiseLevel_upd ), 1 ) );
+        scale_sig32( hFdCngCom->cngNoiseLevel, FFTCLDFBLEN, sub( hFdCngCom->cngNoiseLevelExp, q_tmp ) );
+        hFdCngCom->cngNoiseLevelExp = q_tmp;
+
         FOR( i = 0; i < shr( sub( hFdCngCom->stopFFTbin, hFdCngCom->startBand ), 1 ); i++ )
         {
-            IF( norm_l( *ptr_tmp ) >= rshift_cng )
-            {
-                *ptr_level-- = Mpy_32_32( L_shl( *ptr_tmp, rshift_cng ), *ptr_tmp );
-                move32();
-            }
-            ELSE
-            {
-                *ptr_level-- = L_shl( Mpy_32_32( *ptr_tmp, *ptr_tmp ), rshift_cng );
-                move32();
-            }
-
+            *ptr_level-- = L_shl( Mpy_32_32( *ptr_tmp, *ptr_tmp ), sub( shl( sub( Q31, q_cngNoiseLevel_upd ), 1 ), hFdCngCom->cngNoiseLevelExp ) );
             ptr_tmp--;
             *ptr_level = *( ptr_level + 1 );
             move32();
-            lp_noise = L_add( lp_noise, L_shl( *ptr_level--, 1 ) );
+            lp_noise = L_add( lp_noise, L_shl( *ptr_level--, add( 1, sub( Q6, sub( Q31, hFdCngCom->cngNoiseLevelExp ) ) ) ) );
         }
-        // q_cngNoiseLevel = sub(shl(q_cngNoiseLevel_upd, 1), Q31);
-        // hFdCngCom->q_cngNoiseLevel = q_cngNoiseLevel; move16();
     }
     ELSE
     {
@@ -1286,19 +1177,18 @@ static void stereo_dft_generate_comfort_noise_fx(
                 move16();
                 FOR( k = 0; k < ( hFdCngCom->nFFTpart - 2 ); k++ )
                 {
-                    Word16 q_div;
-                    factor = BASOP_Util_Divide3232_Scale( ( hFdCngCom->sidNoiseEstLp[k] + DELTA_FX ), ( st->hFdCngDec->partNoiseShape[k] + DELTA_FX ), &q_div );
-                    factor = s_min( add( hStereoDft->scale_fx, extract_l( Mpy_32_16_1( L_mult( sub( factor, hStereoDft->scale_fx ), hStereoCng->xfade_frame_counter ), ONE_BY_MAX_K ) ) ), factor );
+                    factor = BASOP_Util_Divide3232_Scale_cadence( ( hFdCngCom->sidNoiseEstLp[k] + DELTA_FX ), ( st->hFdCngDec->partNoiseShape[k] + DELTA_FX ), &q_div );
+                    factor = s_min( add( hStereoDft->scale_fx, extract_l( Mpy_32_16_1( L_mult( sub( extract_h( factor ), hStereoDft->scale_fx ), hStereoCng->xfade_frame_counter ), ONE_BY_MAX_K ) ) ), extract_h( factor ) );
                     FOR( ; j <= hFdCngCom->part[k]; j++ )
                     {
-                        hFdCngCom->cngNoiseLevel[j] = Mpy_32_16_1( st->hFdCngDec->bandNoiseShape[j], factor );
+                        hFdCngCom->cngNoiseLevel[j] = Mpy_32_32( st->hFdCngDec->bandNoiseShape[j], factor );
                         move32();
                     }
                 }
             }
         }
         scale = shr( output_frame, 1 );
-        numSlots = shr( hFdCngCom->numSlots, 2 );
+        numSlots = shr( hFdCngCom->numSlots, 1 );
         FOR( k = 0; k < STEREO_DFT_NBDIV; k++ )
         {
             ptr_level = hFdCngCom->cngNoiseLevel;
@@ -1307,7 +1197,6 @@ static void stereo_dft_generate_comfort_noise_fx(
             q_cngNoiseLevel = sub( Q31, hFdCngCom->cngNoiseLevelExp );
             FOR( i = 0; i < shr( sub( hFdCngCom->stopFFTbin, hFdCngCom->startBand ), 1 ); i++ )
             {
-                Word16 q_sqrt;
                 /* Real part in FFT bins */
                 tmp = *ptr_level++;
                 move32();
@@ -1316,14 +1205,13 @@ static void stereo_dft_generate_comfort_noise_fx(
                 rand_gauss_fx( ptr_r, &st->hTdCngDec->cng_seed, q_dft );
                 q_sqrt = sub( Q31, q_cngNoiseLevel );
                 tmp = Sqrt32( tmp, &q_sqrt );
-                tmp = Mpy_32_32( *( ptr_r ), tmp );
-                tmp = imult3216( tmp, scale );
-                ( *ptr_r ) = L_shl( tmp, q_sqrt );
+                *( ptr_r ) = imult3216( Mpy_32_32( *( ptr_r ), tmp ), scale );
+                ( *ptr_r ) = L_shl( ( *ptr_r ), q_sqrt );
                 move32();
                 ptr_r += 2;
                 /* Imaginary part in FFT bins */
                 rand_gauss_fx( ptr_i, &st->hTdCngDec->cng_seed, q_dft );
-                ( *ptr_i ) = Mpy_32_32( ( *ptr_i ), tmp );
+                ( *ptr_i ) = L_shl( imult3216( Mpy_32_32( ( *ptr_i ), tmp ), scale ), q_sqrt );
                 move32();
                 ptr_i += 2;
             }
@@ -1334,16 +1222,8 @@ static void stereo_dft_generate_comfort_noise_fx(
             {
                 FOR( i = 0; i < numSlots; i++ )
                 {
-#ifndef MSAN_FIX
-                    Word16 q_sqrt;
-#else
-                    Word16 q_sqrt = hFdCngCom->cngNoiseLevelExp;
-#endif
                     /* Real part in FFT bins */
                     rand_gauss_fx( ptr_r, &st->hTdCngDec->cng_seed, q_dft );
-#ifdef MSAN_FIX
-                    q_sqrt = sub( Q31, q_cngNoiseLevel );
-#endif
                     tmp = Mpy_32_16_1( Sqrt32( *ptr_level, &q_sqrt ), scale );
                     ( *ptr_r ) = Mpy_32_32( *ptr_r, tmp );
                     move32();
@@ -1421,7 +1301,7 @@ static void stereo_dft_generate_comfort_noise_fx(
             {
                 alpha = (Word16) ( 0x799A );
                 move16();
-                IF( GT_32( st->hFdCngDec->smoothed_psd_fx[i], 0 ) && GT_32( Mpy_32_16_1( ftmp, (Word16) 0x3333 ), L_shr( st->hFdCngDec->smoothed_psd_fx[i], l_shift_val ) ) )
+                IF( GT_32( st->hFdCngDec->smoothed_psd_fx[i], 0 ) && GT_32( Mpy_32_16_1( ftmp, (Word16) 0x3333 ), L_shr_sat( st->hFdCngDec->smoothed_psd_fx[i], l_shift_val ) ) )
                 {
                     /* prevent abrupt upward update steps */
                     ftmp = L_add( L_shl( st->hFdCngDec->smoothed_psd_fx[i], 2 ), L_shr( st->hFdCngDec->smoothed_psd_fx[i], 1 ) );
diff --git a/lib_dec/ivas_stereo_dft_dec.c b/lib_dec/ivas_stereo_dft_dec.c
index 0ed260376..daa92a594 100644
--- a/lib_dec/ivas_stereo_dft_dec.c
+++ b/lib_dec/ivas_stereo_dft_dec.c
@@ -789,13 +789,9 @@ void stereo_dft_dec_analyze_fx(
     zp = NS2SA( inputFs, STEREO_DFT32MS_ZP_NS );
     ovl = NS2SA( inputFs, STEREO_DFT32MS_OVL_NS );
     NFFT = NS2SA( inputFs, STEREO_DFT32MS_N_NS );
-    Word16 w1, w2, qw1, qw2, qfac_fx;
-    qw1 = norm_s( hStereoDft->NFFT );
-    qw2 = norm_s( NFFT );
-    w1 = shl( hStereoDft->NFFT, qw1 - 1 );
-    w2 = shl( NFFT, qw2 );
-    fac_fx = L_shl( div_s( w1, w2 ), 16 );
-    qfac_fx = 31 - ( qw2 - ( qw1 - 1 ) );
+    Word16 qfac_fx;
+    fac_fx = BASOP_Util_Divide3232_Scale_cadence( hStereoDft->NFFT, NFFT, &qfac_fx );
+    qfac_fx = sub( 31, qfac_fx );
     ovl2 = NS2SA( inputFs, STEREO_DFT32MS_OVL2_NS );
 
     /* Offset FOR the time buffers */
-- 
GitLab


From 9bd31f5a3dda1edd6c867eb2d05c0906e7526f92 Mon Sep 17 00:00:00 2001
From: Sandesh Venkatesh <sandesh.venkatesh@ittiam.com>
Date: Wed, 19 Jun 2024 14:01:37 +0530
Subject: [PATCH 2/2] EVS BE issue fix

---
 lib_dec/cng_dec_fx.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/lib_dec/cng_dec_fx.c b/lib_dec/cng_dec_fx.c
index 15be52f70..2b938920f 100644
--- a/lib_dec/cng_dec_fx.c
+++ b/lib_dec/cng_dec_fx.c
@@ -684,6 +684,17 @@ void CNG_dec_fx(
         }
     }
 
+    IF( EQ_16( st_fx->element_mode, EVS_MONO ) )
+    {
+        st_fx->last_CNG_L_frame = st_fx->L_frame;
+        move16();
+
+        IF( NE_32( st_fx->core_brate, SID_1k75 ) )
+        {
+            hTdCngDec->num_ho = m;
+            move16();
+        }
+    }
     IF( st_fx->Opt_AMR_WB )
     {
         E_LPC_f_isp_a_conversion( st_fx->lspCNG_fx, Aq, M );
-- 
GitLab