diff --git a/lib_com/cldfb.c b/lib_com/cldfb.c
index 5a3d2e1a27b1fd60aba746579f8bc5cee346fe1e..b6eafd993ccf0ed5786ef9c075f171c8c45fad8f 100644
--- a/lib_com/cldfb.c
+++ b/lib_com/cldfb.c
@@ -1213,7 +1213,7 @@ void cldfbSynthesis_ivas_fx(
             /*cplxMult(&iBuffer[2*i], &iBuffer[2*i+1],-imagBuffer[k][2*i], imagBuffer[k][M1-1-2*i], rot_vctr_re[i], rot_vctr_im[i]);*/
             iBuffer_fx[2 * i] = Msub_32_32( Mpy_32_32( ( L_negate( imagBuffer_fx[k][2 * i] ) ), rot_vctr_re_fx[i] ), imagBuffer_fx[k][( M1 - 1 ) - ( i * 2 )], rot_vctr_im_fx[i] ); // Qx
             move32();
-            iBuffer_fx[2 * i + 1] = Madd_32_32( Mpy_32_32( ( L_negate( imagBuffer_fx[k][2 * i] ) ), rot_vctr_im_fx[i] ), imagBuffer_fx[k][( M1 - 1 ) - ( i * 2 )], rot_vctr_re_fx[i] ); // Qx
+            iBuffer_fx[2 * i + 1] = Msub_32_32( Mpy_32_32( imagBuffer_fx[k][( M1 - 1 ) - ( i * 2 )], rot_vctr_re_fx[i] ), imagBuffer_fx[k][2 * i], rot_vctr_im_fx[i] ); // Qx
             move32();
         }
 
diff --git a/lib_com/ivas_stereo_ica_com_fx.c b/lib_com/ivas_stereo_ica_com_fx.c
index 3ebfd99c1e065ba93c7f0a8deeee608c1acb3ce0..be9f358683e12cb417e25c2e2a447f9220d4f958 100644
--- a/lib_com/ivas_stereo_ica_com_fx.c
+++ b/lib_com/ivas_stereo_ica_com_fx.c
@@ -131,17 +131,7 @@ static void interpTargetChannel_fx(
     Word32 spread_factor2_fx;
     Word64 tempD1_fx, tempD2_fx;
 
-    d = negate( sub( currShift, prevShift ) );
-    IF( d >= 0 )
-    {
-        signShift = 1;
-        move16();
-    }
-    ELSE
-    {
-        signShift = -1;
-        move16();
-    }
+    d = sub( prevShift, currShift );
 
     IF( d == 0 )
     {
@@ -149,6 +139,15 @@ static void interpTargetChannel_fx(
         return;
     }
 
+    signShift = 1;
+    move16();
+
+    if ( d < 0 )
+    {
+        signShift = -1;
+        move16();
+    }
+
     N = L_shift_adapt;
     move16();
     Word32 *table_pointer = NULL;
@@ -207,7 +206,7 @@ static void interpTargetChannel_fx(
 
             FOR( j = lim1; j <= lim2; j++ )
             {
-                ptr2_fx[i] = L_add( Mpy_32_32( win_fx[j * INTERP_FACTOR1 - i], ptr1_fx[j] ), ptr2_fx[i] ); // qsynth
+                ptr2_fx[i] = Madd_32_32( ptr2_fx[i], win_fx[j * INTERP_FACTOR1 - i], ptr1_fx[j] ); // qsynth
                 move32();
             }
         }
@@ -225,44 +224,31 @@ static void interpTargetChannel_fx(
     tempD1_fx = W_deposit32_l( table_D1_pointer[abs( d )] );    // Q35
     tempD2_fx = W_mult0_32_32( 3, table_D1_pointer[abs( d )] ); // Q35
 
-    IF( EQ_16( signShift, 1 ) )
+    tempF1_fx = -ONE_IN_Q12; // Q12
+    move32();
+
+    if ( EQ_16( signShift, 1 ) )
     {
         tempF1_fx = ONE_IN_Q12; // Q12
         move32();
     }
-    ELSE
-    {
-        tempF1_fx = -ONE_IN_Q12; // Q12
-        move32();
-    }
+
     tempF1_fx = L_sub( imult3216( factor_fx, d ), tempF1_fx ); // Q12
-    FOR( k = 0; k < sub( N, 1 ); k++ )
+    FOR( k = 0; k < N - 1; k++ )
     {
-        Word32 local = L_sub( W_extract_l( W_shr( W_mult0_32_32( tempF1_fx, spread_factor2_fx ), 31 ) ), ONE_IN_Q12 ); // Q12
-        Word32 sign_local;
-        IF( local > 0 )
-        {
-            sign_local = 1;
-            move32();
-        }
-        ELSE
-        {
-            sign_local = -1;
-            move32();
-        }
-        Word32 local_int = W_extract_l( W_shr( W_abs( local ), 12 ) ); // Q0
+        Word32 local = Madd_32_32( -ONE_IN_Q12, tempF1_fx, spread_factor2_fx ); // Q12
+        Word32 local_int = L_shr( local, 12 );                                  // Q0
         Word32 res_a1, res_a2, res_a3;
         Word32 res_b1, res_b2, res_b3;
         Word32 res_c1, res_c2, res_c3;
         Word32 res_d1, res_d2, res_d3;
-        Word64 local_int_scaled;
+        Word32 local_int_scaled;
         Word64 res_a, res_b, res_c, res_d;
         Word64 tempa, tempb;
         Word64 mult_a_D1, mult_b_D2;
-        local_int = W_extract_l( W_mult0_32_32( sign_local, local_int ) ); // Q0
-        local_int_scaled = W_deposit32_l( L_shl( local_int, 12 ) );        // Q12
-        lim1 = extract_l( local_int );                                     // Q0
-        IF( W_sub( local_int_scaled, local ) > 0 )                         // Q21
+        local_int_scaled = L_shl( local_int, 12 );  // Q12
+        lim1 = extract_l( local_int );              // Q0
+        if ( L_sub( local_int_scaled, local ) > 0 ) // Q12
         {
             lim1 = sub( lim1, 1 ); // Q0
         }
@@ -387,7 +373,7 @@ static void targetCh_AlignStereoDFT_fx(
         }
         FOR( i = 0; i < L_shift_adapt; i++ )
         {
-            target_fx[i] = L_add( Mpy_32_32( alpha_fx, fadeInBuff_fx[i] ), Mpy_32_32( L_sub( ONE_IN_Q31, alpha_fx ), fadeOutBuff_fx[i] ) ); // qsynth
+            target_fx[i] = Madd_32_32( Mpy_32_32( alpha_fx, fadeInBuff_fx[i] ), L_sub( ONE_IN_Q31, alpha_fx ), fadeOutBuff_fx[i] ); // qsynth
             move32();
 
             alpha_fx = L_add_sat( alpha_fx, winSlope_fx ); // Q31
diff --git a/lib_com/ivas_tools.c b/lib_com/ivas_tools.c
index 8d486df04dd62d69e843008e92faa6cc86770729..d6210dfc7d4b3c050bd77310f010c93922678654 100644
--- a/lib_com/ivas_tools.c
+++ b/lib_com/ivas_tools.c
@@ -942,6 +942,12 @@ Word16 matrix_product_mant_exp_fx(
     Word16 *Zp_fx_e = out_e;
     Word16 row, col;
     Word16 x_idx, y_idx;
+    Word64 temp;
+    Word16 temp_e;
+    Word16 prod_e = add( X_fx_e, Y_fx_e );
+
+    Word16 max_exp = -31;
+    move16();
 
     /* Processing */
     test();
@@ -957,17 +963,28 @@ Word16 matrix_product_mant_exp_fx(
         {
             FOR( i = 0; i < colsX; ++i )
             {
-                ( *Zp_fx ) = 0;
-                move32();
-                ( *Zp_fx_e ) = 0;
-                move16();
+                temp = 0;
+                move64();
+
                 FOR( k = 0; k < rowsX; ++k )
                 {
-                    x_idx = add( k, imult1616( i, rowsX ) );
-                    y_idx = add( k, imult1616( j, rowsY ) );
-                    ( *Zp_fx ) = BASOP_Util_Add_Mant32Exp( *Zp_fx, *Zp_fx_e, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ), add( X_fx_e, Y_fx_e ), Zp_fx_e ); /*Q31 - Zp_fx_e*/
-                    move32();
+                    x_idx = k + i * rowsX;
+                    y_idx = k + j * rowsY;
+                    temp = W_mac_32_32( temp, X_fx[x_idx], Y_fx[y_idx] ); // X_fx_e + Y_fx_e
                 }
+                /* Maximize accumulated value to 32-bit */
+                temp_e = W_norm( temp );
+                temp = W_shl( temp, temp_e );
+                if ( 0 == temp )
+                {
+                    temp_e = prod_e;
+                    move16();
+                }
+                *Zp_fx_e = sub( prod_e, temp_e );
+                move16();
+                ( *Zp_fx ) = W_extract_h( temp );
+                move32();
+                max_exp = s_max( max_exp, *Zp_fx_e ); // Find the max exp
                 Zp_fx++;
                 Zp_fx_e++;
             }
@@ -987,17 +1004,27 @@ Word16 matrix_product_mant_exp_fx(
         {
             FOR( i = 0; i < rowsX; ++i )
             {
-                ( *Zp_fx ) = 0;
-                move32();
-                ( *Zp_fx_e ) = 0;
-                move16();
+                temp = 0;
+                move64();
                 FOR( k = 0; k < colsX; ++k )
                 {
-                    x_idx = add( i, imult1616( k, rowsX ) );
-                    y_idx = add( j, imult1616( k, rowsY ) );
-                    ( *Zp_fx ) = BASOP_Util_Add_Mant32Exp( *Zp_fx, *Zp_fx_e, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ), add( X_fx_e, Y_fx_e ), Zp_fx_e ); /*Q31 - Zp_fx_e*/
-                    move32();
+                    x_idx = i + k * rowsX;
+                    y_idx = j + k * rowsY;
+                    temp = W_mac_32_32( temp, X_fx[x_idx], Y_fx[y_idx] ); // X_fx_e + Y_fx_e
+                }
+                /* Maximize accumulated value to 32-bit */
+                temp_e = W_norm( temp );
+                temp = W_shl( temp, temp_e );
+                if ( 0 == temp )
+                {
+                    temp_e = prod_e;
+                    move16();
                 }
+                *Zp_fx_e = sub( prod_e, temp_e );
+                move16();
+                ( *Zp_fx ) = W_extract_h( temp );
+                move32();
+                max_exp = s_max( max_exp, *Zp_fx_e ); // Find the max exp
                 Zp_fx++;
                 Zp_fx_e++;
             }
@@ -1017,18 +1044,27 @@ Word16 matrix_product_mant_exp_fx(
         {
             FOR( i = 0; i < colsX; ++i )
             {
-                ( *Zp_fx ) = 0;
-                move32();
-                ( *Zp_fx_e ) = 0;
-                move16();
+                temp = 0;
+                move64();
                 FOR( k = 0; k < colsX; ++k )
                 {
-                    x_idx = add( k, imult1616( i, rowsX ) );
-                    y_idx = add( j, imult1616( k, rowsY ) );
-                    ( *Zp_fx ) = BASOP_Util_Add_Mant32Exp( *Zp_fx, *Zp_fx_e, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ), add( X_fx_e, Y_fx_e ), Zp_fx_e ); /*Q31 - Zp_fx_e*/
-                    move32();
+                    x_idx = k + i * rowsX;
+                    y_idx = j + k * rowsY;
+                    temp = W_mac_32_32( temp, X_fx[x_idx], Y_fx[y_idx] ); // X_fx_e + Y_fx_e
                 }
-
+                /* Maximize accumulated value to 32-bit */
+                temp_e = W_norm( temp );
+                temp = W_shl( temp, temp_e );
+                if ( 0 == temp )
+                {
+                    temp_e = prod_e;
+                    move16();
+                }
+                *Zp_fx_e = sub( prod_e, temp_e );
+                move16();
+                ( *Zp_fx ) = W_extract_h( temp );
+                move32();
+                max_exp = s_max( max_exp, *Zp_fx_e ); // Find the max exp
                 Zp_fx++;
                 Zp_fx_e++;
             }
@@ -1049,17 +1085,26 @@ Word16 matrix_product_mant_exp_fx(
         {
             FOR( i = 0; i < rowsX; ++i )
             {
-                ( *Zp_fx ) = 0;
-                move32();
-                ( *Zp_fx_e ) = 0;
-                move16();
+                temp = 0;
+                move64();
                 FOR( k = 0; k < colsX; ++k )
                 {
-                    x_idx = add( i, imult1616( k, rowsX ) );
-                    y_idx = add( k, imult1616( j, rowsY ) );
-                    ( *Zp_fx ) = BASOP_Util_Add_Mant32Exp( *Zp_fx, *Zp_fx_e, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ), add( X_fx_e, Y_fx_e ), Zp_fx_e ); /*Q31 - Zp_fx_e*/
-                    move32();
+                    x_idx = i + k * rowsX;
+                    y_idx = k + j * rowsY;
+                    temp = W_mac_32_32( temp, X_fx[x_idx], Y_fx[y_idx] ); // X_fx_e + Y_fx_e
+                }
+                /* Maximize accumulated value to 32-bit */
+                temp_e = W_norm( temp );
+                temp = W_shl( temp, temp_e );
+                if ( 0 == temp )
+                {
+                    temp_e = prod_e;
                 }
+                *Zp_fx_e = sub( prod_e, temp_e );
+                move16();
+                ( *Zp_fx ) = W_extract_h( temp );
+                move32();
+                max_exp = s_max( max_exp, *Zp_fx_e ); // Find the max exp
                 Zp_fx++;
                 Zp_fx_e++;
             }
@@ -1070,18 +1115,11 @@ Word16 matrix_product_mant_exp_fx(
         move16();
     }
     Zp_fx = Z_fx; /*Q31 - Zp_fx_e*/
+
     Zp_fx_e = out_e;
-    Word16 max_exp = -31;
     move16();
-    FOR( j = 0; j < row; ++j )
-    {
-        FOR( i = 0; i < col; ++i )
-        {
-            max_exp = s_max( max_exp, *Zp_fx_e );
-            Zp_fx_e++;
-        }
-    }
-    Zp_fx_e = out_e;
+
+
     *Z_fx_e = max_exp;
     move16();
     FOR( j = 0; j < row; ++j )
diff --git a/lib_com/options.h b/lib_com/options.h
index ea2823ad7fc7c7836f27203b128295f34d17f16e..1b2c15f872080cbde13e6d7fa2f8cb912b9fe84b 100755
--- a/lib_com/options.h
+++ b/lib_com/options.h
@@ -152,5 +152,6 @@
 #define FIX_ISSUE_1214                          /* Ittiam: Fix for issue 1214: Energy leakage in IGF tiles for MDCT-stereo @64kbps SWB*/
 #define FIX_881_HILBERT_FILTER                  /* VA: improve the precision of the Hilbert filter to remove 2kHz unwanted tone */
 #define FIX_ISSUE_1245                          /* Ittiam: Fix for issue 1245: Basop Encoder: Audible noise for silent Stereo input DTX on @24.4 kbps, @32 kbps*/
-#endif
 #define FIX_MINOR_SVD_WMOPS_MR1010X             /* FhG: Minor WMOPS tuning, bit-exact to previous version, saves about 8.2 WMOPS for MR1010 */
+#define SVD_WMOPS_OPT                           /* Ittiam : SVD related optimizations */
+#endif
diff --git a/lib_dec/dec_tcx.c b/lib_dec/dec_tcx.c
index ae0474f3d987d452a00f59905bd09924038a8e21..708527df5fa21a97b807fd13f934b06c56d1cac1 100644
--- a/lib_dec/dec_tcx.c
+++ b/lib_dec/dec_tcx.c
@@ -237,6 +237,7 @@ void decoder_tcx_imdct_fx(
     Word16 q_a_itf = 15;
     Word16 x_e = sub( 31, q_x );
     move16();
+    Word16 shift_q = sub( q_x, q_win );
 
     /*-----------------------------------------------------------------*
      * Initializations
@@ -364,9 +365,10 @@ void decoder_tcx_imdct_fx(
 
     IF( EQ_16( st->element_mode, IVAS_CPE_MDCT ) )
     {
+        Word16 copy_len = s_min( L_FRAME48k, s_max( L_spec, s_max( L_frame, L_frameTCX ) ) );
         set32_fx( x_tmp_fx, 0, L_FRAME_PLUS );
-        Copy32( x_fx, x_tmp_fx, s_min( L_FRAME48k, s_max( L_spec, s_max( L_frame, L_frameTCX ) ) ) );    // q_x
-        Copy32( x_fx, xn_bufFB_fx, s_min( L_FRAME48k, s_max( L_spec, s_max( L_frame, L_frameTCX ) ) ) ); // q_x
+        Copy32( x_fx, x_tmp_fx, copy_len );    // q_x
+        Copy32( x_fx, xn_bufFB_fx, copy_len ); // q_x
     }
     ELSE IF( ( st->element_mode == EVS_MONO ) )
     {
@@ -374,8 +376,9 @@ void decoder_tcx_imdct_fx(
     }
     ELSE
     {
-        Copy32( x_fx, x_tmp_fx, s_max( L_spec, s_max( L_frame, L_frameTCX ) ) );    // q_x
-        Copy32( x_fx, xn_bufFB_fx, s_max( L_spec, s_max( L_frame, L_frameTCX ) ) ); // q_x
+        Word16 copy_len = s_max( L_spec, s_max( L_frame, L_frameTCX ) );
+        Copy32( x_fx, x_tmp_fx, copy_len );    // q_x
+        Copy32( x_fx, xn_bufFB_fx, copy_len ); // q_x
     }
 
     IF( ( st->igf != 0 ) )
@@ -416,24 +419,29 @@ void decoder_tcx_imdct_fx(
 
     FOR( Word16 ind = 0; ind < L_MDCT_OVLP_MAX + L_FRAME_PLUS + L_MDCT_OVLP_MAX; ind++ )
     {
-        xn_bufFB_fx_16[ind] = extract_l( L_shr( xn_bufFB_fx[ind], sub( q_x, q_win ) ) ); // q_x
+        xn_bufFB_fx_16[ind] = extract_l( L_shr( xn_bufFB_fx[ind], shift_q ) ); // q_x
         move16();
     }
+
+    Word16 ratio_e;
+    Word16 ratio = BASOP_Util_Divide1616_Scale( L_frameTCX_glob, L_frame_glob, &ratio_e ); // Q = 15-ratio_e. * FSCALE_DENOM is (1 << 9)
+    ratio = shr( ratio, sub( 6, ratio_e ) );
+
     IF( st->element_mode != EVS_MONO )
     {
         IMDCT_ivas_fx( x_tmp_fx, q_x, hTcxDec->syn_OverlFB, hTcxDec->syn_Overl_TDACFB, xn_bufFB_fx_16, hTcxCfg->tcx_aldo_window_1_FB, hTcxCfg->tcx_aldo_window_1_FB_trunc, hTcxCfg->tcx_aldo_window_2_FB,
                        hTcxCfg->tcx_mdct_window_halfFB, hTcxCfg->tcx_mdct_window_minimumFB, hTcxCfg->tcx_mdct_window_transFB, hTcxCfg->tcx_mdct_window_half_lengthFB, hTcxCfg->tcx_mdct_window_min_lengthFB, index,
-                       kernelType, left_rect, tcx_offsetFB, overlapFB, L_frameTCX, L_frameTCX, max( L_frameTCX, L_spec ) >> 1, L_frameTCX_glob, frame_cnt, bfi, st->hHQ_core->old_out_fx, 1, st, FSCALE_DENOM * L_frameTCX_glob / L_frame_glob, acelp_zir_fx, q_win );
+                       kernelType, left_rect, tcx_offsetFB, overlapFB, L_frameTCX, L_frameTCX, shr( max( L_frameTCX, L_spec ), 1 ), L_frameTCX_glob, frame_cnt, bfi, st->hHQ_core->old_out_fx, 1, st, ratio, acelp_zir_fx, q_win );
     }
     ELSE
     {
 
         IMDCT_ivas_fx( x_fx, q_x, hTcxDec->syn_OverlFB, hTcxDec->syn_Overl_TDACFB, xn_bufFB_fx_16, hTcxCfg->tcx_aldo_window_1_FB, hTcxCfg->tcx_aldo_window_1_FB_trunc, hTcxCfg->tcx_aldo_window_2_FB, hTcxCfg->tcx_mdct_window_halfFB, hTcxCfg->tcx_mdct_window_minimumFB, hTcxCfg->tcx_mdct_window_transFB, hTcxCfg->tcx_mdct_window_half_lengthFB, hTcxCfg->tcx_mdct_window_min_lengthFB, index,
-                       kernelType, left_rect, tcx_offsetFB, overlapFB, L_frameTCX, L_frameTCX, shr( s_max( L_frameTCX, L_spec ), 1 ), L_frameTCX_glob, frame_cnt, bfi, st->hHQ_core->old_out_fx, 1, st, FSCALE_DENOM * L_frameTCX_glob / L_frame_glob, acelp_zir_fx, q_win );
+                       kernelType, left_rect, tcx_offsetFB, overlapFB, L_frameTCX, L_frameTCX, shr( s_max( L_frameTCX, L_spec ), 1 ), L_frameTCX_glob, frame_cnt, bfi, st->hHQ_core->old_out_fx, 1, st, ratio, acelp_zir_fx, q_win );
     }
     FOR( Word16 ind = 0; ind < L_MDCT_OVLP_MAX + L_FRAME_PLUS + L_MDCT_OVLP_MAX; ind++ )
     {
-        xn_bufFB_fx[ind] = L_shl( xn_bufFB_fx_16[ind], sub( q_x, q_win ) ); // Q_x
+        xn_bufFB_fx[ind] = L_shl( L_deposit_l( xn_bufFB_fx_16[ind] ), shift_q ); // Q_x
     }
 
     IF( ( bfi == 0 ) )
@@ -453,19 +461,22 @@ void decoder_tcx_imdct_fx(
 
         IF( EQ_16( st->element_mode, IVAS_CPE_MDCT ) )
         {
-            res_m = BASOP_Util_Divide1616_Scale( L_frame_glob, L_FRAME, &res_e );
-            st->old_fpitch = L_shl( Mpy_32_16_1( st->old_fpitch, res_m ), res_e );
+            // Using sat as a single instruction shifts and extracts
+            st->old_fpitch = W_shl_sat_l( W_mult0_32_32( st->old_fpitch, L_frame_glob ), -8 ); // Divide by 256 ==> SHR by 8
+            move32();
         }
 
         IF( GT_16( st->element_mode, EVS_MONO ) )
         {
             res_m = BASOP_Util_Divide1616_Scale( L_frameTCX_glob, L_frame_glob, &res_e );
             st->old_fpitchFB = L_shl( Mpy_32_16_1( st->old_fpitch, res_m ), res_e );
+            move32();
         }
         ELSE
         {
             res_m = BASOP_Util_Divide1616_Scale( L_frameTCX, L_frame, &res_e );
             st->old_fpitchFB = L_shl( Mpy_32_16_1( st->old_fpitch, res_m ), res_e );
+            move32();
         }
     }
 
@@ -475,7 +486,7 @@ void decoder_tcx_imdct_fx(
         Copy( xn_buf_fx + L_frame, hTcxDec->syn_Overl, overlap ); // Q(-2)
         FOR( Word16 ind = 0; ind < overlapFB; ind++ )
         {
-            hTcxDec->syn_OverlFB[ind] = (Word16) L_shr( xn_bufFB_fx[( ind + L_frameTCX )], sub( q_x, q_win ) ); // q_x
+            hTcxDec->syn_OverlFB[ind] = extract_l( L_shr( xn_bufFB_fx[( ind + L_frameTCX )], shift_q ) ); // q_x
         }
     }
 
@@ -483,7 +494,7 @@ void decoder_tcx_imdct_fx(
     Copy( xn_buf_fx + sub( shr( overlap, 1 ), tcx_offset ), synth_fx, L_frame_glob ); // Q(-2)
     FOR( Word16 ind = 0; ind < L_frameTCX_glob; ind++ )
     {
-        synthFB_fx[ind] = (Word16) L_shr( xn_bufFB_fx[( ind + ( ( overlapFB >> 1 ) - tcx_offsetFB ) )], sub( q_x, q_win ) ); // q_x
+        synthFB_fx[ind] = extract_l( L_shr( xn_bufFB_fx[( ind + ( ( overlapFB >> 1 ) - tcx_offsetFB ) )], shift_q ) ); // q_x
     }
 
 
diff --git a/lib_dec/ivas_binRenderer_internal.c b/lib_dec/ivas_binRenderer_internal.c
index 36246f73923c7b643e2709a7b7fc6faa71ace490..f23c0b7106036dbd7f90b8539e4b9e28c027e5f5 100644
--- a/lib_dec/ivas_binRenderer_internal.c
+++ b/lib_dec/ivas_binRenderer_internal.c
@@ -70,6 +70,7 @@ static void ivas_binRenderer_filterModule_fx(
     Word32 *filterStatesLeftRealPtr_fx, *filterStatesLeftImagPtr_fx;
     Word16 *Q_filterStates;
     const Word32 *filterTapsLeftRealPtr_fx, *filterTapsLeftImagPtr_fx, *filterTapsRightRealPtr_fx, *filterTapsRightImagPtr_fx;
+    Word16 shift_q;
 
     FOR( bandIdx = 0; bandIdx < hBinRenderer->conv_band; bandIdx++ )
     {
@@ -87,11 +88,6 @@ static void ivas_binRenderer_filterModule_fx(
             FOR( k = 0; k < numTimeSlots; k++ )
             {
                 Word64 outRealLeft_fx = 0, outRealRight_fx = 0, outImagLeft_fx = 0, outImagRight_fx = 0;
-                Word64 W_sub1 = 0, W_add1 = 0, W_sub2 = 0, W_add2 = 0;
-                move64();
-                move64();
-                move64();
-                move64();
                 move64();
                 move64();
                 move64();
@@ -104,31 +100,32 @@ static void ivas_binRenderer_filterModule_fx(
                     filterStatesLeftImagPtr_fx[tapIdx] = filterStatesLeftImagPtr_fx[tapIdx - 1];
                     move32();
 
-                    W_sub1 = W_sub( W_mult0_32_32( filterStatesLeftRealPtr_fx[tapIdx], filterTapsLeftRealPtr_fx[tapIdx] ),
-                                    W_mult0_32_32( filterStatesLeftImagPtr_fx[tapIdx], filterTapsLeftImagPtr_fx[tapIdx] ) ); // Q29 + Q_filterStates[tapIdx - 1]
-                    W_add1 = W_add( W_mult0_32_32( filterStatesLeftRealPtr_fx[tapIdx], filterTapsLeftImagPtr_fx[tapIdx] ),
-                                    W_mult0_32_32( filterStatesLeftImagPtr_fx[tapIdx], filterTapsLeftRealPtr_fx[tapIdx] ) ); // Q29 + Q_filterStates[tapIdx - 1]
-                    W_sub2 = W_sub( W_mult0_32_32( filterStatesLeftRealPtr_fx[tapIdx], filterTapsRightRealPtr_fx[tapIdx] ),
-                                    W_mult0_32_32( filterStatesLeftImagPtr_fx[tapIdx], filterTapsRightImagPtr_fx[tapIdx] ) ); // Q29 + Q_filterStates[tapIdx - 1]
-                    W_add2 = W_add( W_mult0_32_32( filterStatesLeftRealPtr_fx[tapIdx], filterTapsRightImagPtr_fx[tapIdx] ),
-                                    W_mult0_32_32( filterStatesLeftImagPtr_fx[tapIdx], filterTapsRightRealPtr_fx[tapIdx] ) ); // Q29 + Q_filterStates[tapIdx - 1]
+                    shift_q = sub( Q_filterStates[tapIdx], Q_filterStates[tapIdx - 1] );
+                    outRealLeft_fx = W_shr( outRealLeft_fx, shift_q );
+                    outImagLeft_fx = W_shr( outImagLeft_fx, shift_q );
+                    outRealRight_fx = W_shr( outRealRight_fx, shift_q );
+                    outImagRight_fx = W_shr( outImagRight_fx, shift_q );
 
-                    outRealLeft_fx = W_shr( outRealLeft_fx, sub( Q_filterStates[tapIdx], Q_filterStates[tapIdx - 1] ) );
-                    outImagLeft_fx = W_shr( outImagLeft_fx, sub( Q_filterStates[tapIdx], Q_filterStates[tapIdx - 1] ) );
-                    outRealRight_fx = W_shr( outRealRight_fx, sub( Q_filterStates[tapIdx], Q_filterStates[tapIdx - 1] ) );
-                    outImagRight_fx = W_shr( outImagRight_fx, sub( Q_filterStates[tapIdx], Q_filterStates[tapIdx - 1] ) );
+                    outRealLeft_fx = W_mac_32_32( outRealLeft_fx, filterStatesLeftRealPtr_fx[tapIdx], filterTapsLeftRealPtr_fx[tapIdx] );
+                    outRealLeft_fx = W_mac_32_32( outRealLeft_fx, L_negate( filterStatesLeftImagPtr_fx[tapIdx] ), filterTapsLeftImagPtr_fx[tapIdx] ); // Q30 + Q_filterStates[tapIdx - 1]
 
-                    Q_filterStates[tapIdx] = Q_filterStates[tapIdx - 1];
-                    move16();
+                    outImagLeft_fx = W_mac_32_32( outImagLeft_fx, filterStatesLeftRealPtr_fx[tapIdx], filterTapsLeftImagPtr_fx[tapIdx] );
+                    outImagLeft_fx = W_mac_32_32( outImagLeft_fx, filterStatesLeftImagPtr_fx[tapIdx], filterTapsLeftRealPtr_fx[tapIdx] );
+
+                    outRealRight_fx = W_mac_32_32( outRealRight_fx, filterStatesLeftRealPtr_fx[tapIdx], filterTapsRightRealPtr_fx[tapIdx] );
+                    outRealRight_fx = W_mac_32_32( outRealRight_fx, L_negate( filterStatesLeftImagPtr_fx[tapIdx] ), filterTapsRightImagPtr_fx[tapIdx] );
 
-                    /* Left Real and Imag */
-                    outRealLeft_fx = W_add( outRealLeft_fx, W_sub1 ); // Q29 + Q_filterStates[1]
-                    outImagLeft_fx = W_add( outImagLeft_fx, W_add1 ); // Q29 + Q_filterStates[1]
+                    outImagRight_fx = W_mac_32_32( outImagRight_fx, filterStatesLeftRealPtr_fx[tapIdx], filterTapsRightImagPtr_fx[tapIdx] );
+                    outImagRight_fx = W_mac_32_32( outImagRight_fx, filterStatesLeftImagPtr_fx[tapIdx], filterTapsRightRealPtr_fx[tapIdx] );
 
-                    /* Right Real and Imag*/
-                    outRealRight_fx = W_add( outRealRight_fx, W_sub2 ); // Q29 + Q_filterStates[1]
-                    outImagRight_fx = W_add( outImagRight_fx, W_add2 ); // Q29 + Q_filterStates[1]
+                    Q_filterStates[tapIdx] = Q_filterStates[tapIdx - 1];
+                    move16();
                 }
+                shift_q = add( sub( Q_filterStates[1], Q_curr ), 1 );
+                outRealLeft_fx = W_shr( outRealLeft_fx, shift_q );
+                outImagLeft_fx = W_shr( outImagLeft_fx, shift_q );
+                outRealRight_fx = W_shr( outRealRight_fx, shift_q );
+                outImagRight_fx = W_shr( outImagRight_fx, shift_q );
 
                 filterStatesLeftRealPtr_fx[0] = CLDFB_real[chIdx][k][bandIdx];
                 move32();
@@ -141,27 +138,29 @@ static void ivas_binRenderer_filterModule_fx(
                 /* Left Real and Imag */
                 // Q29 + Q_curr
 
-                out_Conv_CLDFB_real[0][k][bandIdx] = W_add( out_Conv_CLDFB_real[0][k][bandIdx],
-                                                            W_add( W_shr( outRealLeft_fx, sub( Q_filterStates[1], Q_curr ) ),
-                                                                   W_sub( W_mult0_32_32( filterStatesLeftRealPtr_fx[0], filterTapsLeftRealPtr_fx[0] ),
-                                                                          W_mult0_32_32( filterStatesLeftImagPtr_fx[0], filterTapsLeftImagPtr_fx[0] ) ) ) ); // Q29
+                Word32 temp1 = L_shr( filterStatesLeftRealPtr_fx[0], 1 );
+                Word32 temp2 = L_shr( filterStatesLeftImagPtr_fx[0], 1 );
+
+
+                outRealLeft_fx = W_mac_32_32( outRealLeft_fx, temp1, filterTapsLeftRealPtr_fx[0] );
+                outRealLeft_fx = W_mac_32_32( outRealLeft_fx, L_negate( temp2 ), filterTapsLeftImagPtr_fx[0] );
+                out_Conv_CLDFB_real[0][k][bandIdx] = W_add( out_Conv_CLDFB_real[0][k][bandIdx], outRealLeft_fx ); // Q29
                 move64();
-                out_Conv_CLDFB_imag[0][k][bandIdx] = W_add( out_Conv_CLDFB_imag[0][k][bandIdx],
-                                                            W_add( W_shr( outImagLeft_fx, sub( Q_filterStates[1], Q_curr ) ),
-                                                                   W_add( W_mult0_32_32( filterStatesLeftRealPtr_fx[0], filterTapsLeftImagPtr_fx[0] ),
-                                                                          W_mult0_32_32( filterStatesLeftImagPtr_fx[0], filterTapsLeftRealPtr_fx[0] ) ) ) ); // Q29
+
+                outImagLeft_fx = W_mac_32_32( outImagLeft_fx, temp1, filterTapsLeftImagPtr_fx[0] );
+                outImagLeft_fx = W_mac_32_32( outImagLeft_fx, temp2, filterTapsLeftRealPtr_fx[0] );
+                out_Conv_CLDFB_imag[0][k][bandIdx] = W_add( out_Conv_CLDFB_imag[0][k][bandIdx], outImagLeft_fx ); // Q29
                 move64();
 
                 /* Right Real and Imag */
-                out_Conv_CLDFB_real[1][k][bandIdx] = W_add( out_Conv_CLDFB_real[1][k][bandIdx],
-                                                            W_add( W_shr( outRealRight_fx, sub( Q_filterStates[1], Q_curr ) ),
-                                                                   W_sub( W_mult0_32_32( filterStatesLeftRealPtr_fx[0], filterTapsRightRealPtr_fx[0] ),
-                                                                          W_mult0_32_32( filterStatesLeftImagPtr_fx[0], filterTapsRightImagPtr_fx[0] ) ) ) ); // Q29
+                outRealRight_fx = W_mac_32_32( outRealRight_fx, temp1, filterTapsRightRealPtr_fx[0] );
+                outRealRight_fx = W_mac_32_32( outRealRight_fx, L_negate( temp2 ), filterTapsRightImagPtr_fx[0] );
+                out_Conv_CLDFB_real[1][k][bandIdx] = W_add( out_Conv_CLDFB_real[1][k][bandIdx], outRealRight_fx ); // Q29
                 move64();
-                out_Conv_CLDFB_imag[1][k][bandIdx] = W_add( out_Conv_CLDFB_imag[1][k][bandIdx],
-                                                            W_add( W_shr( outImagRight_fx, sub( Q_filterStates[1], Q_curr ) ),
-                                                                   W_add( W_mult0_32_32( filterStatesLeftRealPtr_fx[0], filterTapsRightImagPtr_fx[0] ),
-                                                                          W_mult0_32_32( filterStatesLeftImagPtr_fx[0], filterTapsRightRealPtr_fx[0] ) ) ) ); // Q29
+
+                outImagRight_fx = W_mac_32_32( outImagRight_fx, temp1, filterTapsRightImagPtr_fx[0] );
+                outImagRight_fx = W_mac_32_32( outImagRight_fx, temp2, filterTapsRightRealPtr_fx[0] );
+                out_Conv_CLDFB_imag[1][k][bandIdx] = W_add( out_Conv_CLDFB_imag[1][k][bandIdx], outImagRight_fx ); // Q29
                 move64();
             }
         }
diff --git a/lib_dec/ivas_dirac_output_synthesis_cov.c b/lib_dec/ivas_dirac_output_synthesis_cov.c
index 5aa649ecda5e1fe8388321724dac753b47b0d351..fd039fe965fd2a8f87867d037f31f73ec8267f3a 100644
--- a/lib_dec/ivas_dirac_output_synthesis_cov.c
+++ b/lib_dec/ivas_dirac_output_synthesis_cov.c
@@ -410,8 +410,6 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot_fx(
     const Word16 nchan_in                                                           /* i  : number of input channels                              */
 )
 {
-    Word16 cx_init_e;
-    Word16 cx_init_imag_e;
     Word16 band_idx, ch_idx;
     Word16 brange[2];
     Word32 real_in_buffer_fx[PARAM_MC_MAX_BANDS_IN_PARAMETER_BAND * MAX_TRANSPORT_CHANNELS];
@@ -421,10 +419,9 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot_fx(
     Word32 real_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
     Word32 imag_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
     Word16 output_e;
-    Word16 i, j, tmp1, tmp2, tmp1_e, tmp2_e, shift_imag, shift_real;
-    Word32 L_tmp;
+    Word16 tmp1_e, tmp2_e, shift_imag, shift_real;
     Word16 band, num_bands;
-
+    Word16 cx_fx_norm, cx_imag_fx_norm;
     /* estimate input covariance */
     /* Already stack here instead of in the process_subframe */
 
@@ -451,8 +448,11 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot_fx(
     move16();
     imag_in_e = ImagBuffer_e;
     move16();
-    shift_real = sub( L_norm_arr( real_in_buffer_fx, imult1616( num_bands, nchan_in ) ), find_guarded_bits_fx( add( num_bands, 1 ) ) );
-    shift_imag = sub( L_norm_arr( imag_in_buffer_fx, imult1616( num_bands, nchan_in ) ), find_guarded_bits_fx( add( num_bands, 1 ) ) );
+
+    Word16 buf_len = imult1616( num_bands, nchan_in );
+
+    shift_real = sub( L_norm_arr( real_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) );
+    shift_imag = sub( L_norm_arr( imag_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) );
 
     real_in_e = sub( real_in_e, shift_real );
     imag_in_e = sub( imag_in_e, shift_imag );
@@ -460,50 +460,23 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot_fx(
 
     output_e = s_max( real_in_e, imag_in_e );
 
-    FOR( i = 0; i < num_bands * nchan_in; ++i )
-    {
-        real_in_buffer_fx[i] = L_shr( real_in_buffer_fx[i], sub( output_e, RealBuffer_e ) ); // Q(31-output_e)
-        move32();
-        imag_in_buffer_fx[i] = L_shr( imag_in_buffer_fx[i], sub( output_e, ImagBuffer_e ) ); // Q(31-output_e)
-        move32();
-    }
+    scale_sig32( real_in_buffer_fx, buf_len, sub( RealBuffer_e, output_e ) );
+    scale_sig32( imag_in_buffer_fx, buf_len, sub( ImagBuffer_e, output_e ) );
 
     cmplx_matrix_square_fx( real_in_buffer_fx, imag_in_buffer_fx, num_bands, nchan_in, real_buffer_fx, imag_buffer_fx, output_e, &output_e );
     v_add_fixed_me( cx_fx, *cx_e, real_buffer_fx, output_e, cx_fx, &tmp1_e, imult1616( nchan_in, nchan_in ), 1 );
 
     v_add_fixed_me( cx_imag_fx, *cx_imag_e, imag_buffer_fx, output_e, cx_imag_fx, &tmp2_e, imult1616( nchan_in, nchan_in ), 1 );
-    cx_init_e = tmp1_e;
-    move16();
-    cx_init_imag_e = tmp2_e;
-    move16();
 
-    // normalizing both the matrices to a common exponent for a better precision
-    tmp1 = 0;
-    move16();
-    tmp2 = 0;
-    move16();
-
-    FOR( j = 0; j < PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS; j++ )
-    {
-        L_tmp = BASOP_Util_Add_Mant32Exp( cx_fx[j], cx_init_e, 0, 0, &tmp1_e );
-        L_tmp = BASOP_Util_Add_Mant32Exp( cx_imag_fx[j], cx_init_imag_e, 0, 0, &tmp2_e );
-        tmp1 = s_max( tmp1, tmp1_e );
-        tmp2 = s_max( tmp2, tmp2_e );
-    }
+    cx_fx_norm = L_norm_arr( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
+    cx_imag_fx_norm = L_norm_arr( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
 
-    FOR( j = 0; j < PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS; j++ )
-    {
-        L_tmp = BASOP_Util_Add_Mant32Exp( cx_fx[j], cx_init_e, 0, 0, &tmp1_e );
-        cx_fx[j] = L_shr( L_tmp, sub( tmp1, tmp1_e ) ); // Q(31-tmp1)
-        move32();
-        L_tmp = BASOP_Util_Add_Mant32Exp( cx_imag_fx[j], cx_init_imag_e, 0, 0, &tmp2_e );
-        cx_imag_fx[j] = L_shr( L_tmp, sub( tmp2, tmp2_e ) ); // Q(31-tmp2)
-        move32();
-    }
+    scale_sig32( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_fx_norm );
+    scale_sig32( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_imag_fx_norm );
 
-    *cx_e = tmp1;
+    *cx_e = sub( tmp1_e, cx_fx_norm );
     move16();
-    *cx_imag_e = tmp2;
+    *cx_imag_e = sub( tmp2_e, cx_imag_fx_norm );
     move16();
 
     return;
diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c
index 942a2b5b0c484cf3a90b25c5dd5cc17a3da01a5e..c8778f99ccf301a493996b9489d1139c3a6074f9 100644
--- a/lib_dec/ivas_svd_dec.c
+++ b/lib_dec/ivas_svd_dec.c
@@ -912,13 +912,55 @@ static void ApplyRotation_fx(
 )
 {
     Word16 ch;
-    Word16 temp_exp;
 
     *d = BASOP_Util_Add_Mant32Exp( Mpy_32_32( c, x11 ), add( c_e, x11_e ), Mpy_32_32( s, x12 ), add( s_e, x12_e ), d_e ); /* exp(d_e) */
     move32();
     *g = BASOP_Util_Add_Mant32Exp( Mpy_32_32( c, x12 ), add( c_e, x12_e ), Mpy_32_32( L_negate( s ), x11 ), add( s_e, x11_e ), g_e ); /* exp(g_e) */
     move32();
 
+#ifdef SVD_WMOPS_OPT
+    Word16 c_q = sub( 31, c_e );
+    Word16 s_q = sub( 31, s_e );
+    Word32 op1, op2;
+    Word16 op_e;
+
+    // Bring c and s to same Q
+    IF( GT_16( c_q, s_q ) )
+    {
+        op1 = L_shr( c, sub( c_q, s_q ) );
+        op2 = s;
+        move32();
+        op_e = s_q;
+        move16();
+    }
+    ELSE
+    {
+        op1 = c;
+        move32();
+        op2 = L_shr( s, sub( s_q, c_q ) );
+        op_e = c_q;
+        move16();
+    }
+    op_e = add( op_e, 1 ); // 64 bit mac -> +1
+
+    FOR( ch = 0; ch < nChannels; ch++ )
+    {
+        x11 = singularVector[ch][currentIndex2];
+        move32();
+        x12 = singularVector[ch][currentIndex1];
+        move32();
+
+        Word64 temp = W_mac_32_32( W_mult_32_32( op1, x11 ), op2, x12 ); // Q(singularVector) + op_e
+        temp = W_shr( temp, op_e );                                      // Q(singularVector)
+        singularVector[ch][currentIndex2] = W_sat_l( temp );             // Q(singularVector)
+        move32();
+
+        temp = W_mac_32_32( W_mult_32_32( op1, x12 ), L_negate( op2 ), x11 ); // Q(singularVector) + op_e
+        temp = W_shr( temp, op_e );                                           // Q(singularVector)
+        singularVector[ch][currentIndex1] = W_sat_l( temp );                  // Q(singularVector)
+        move32();
+    }
+#else
 #ifndef FIX_MINOR_SVD_WMOPS_MR1010X
     FOR( ch = 0; ch < nChannels; ch++ )
     {
@@ -952,6 +994,7 @@ static void ApplyRotation_fx(
         move32();
     }
 
+#endif
 #endif
 
     return;
@@ -1605,26 +1648,43 @@ static void singularVectorsAccumulationLeft_fx(
             t_ii = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, maxWithSign_fx( t_ii ), &temp_exp ); /* exp(1 + (temp_exp + tii_e)) */
             t_ii_e = add( 1, sub( temp_exp, t_ii_e ) );
 #endif
+            Word16 tempe;
+            Word32 temp = BASOP_Util_Divide3232_Scale_cadence( t_ii, maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &tempe );
+            tempe = add( tempe, sub( t_ii_e, singularVectors_Left_e[nCh][nCh] ) );
             // fprintf( fp, "%e\n", me2f( t_ii, t_ii_e ) );
             FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
             {
-                norm_y = 0;
-                move32();
-                norm_y_e = 0;
+                Word64 acc = 0;
+                move64();
+                Word64 prod[16];
+                Word16 prod_e[16];
+                Word16 max_e = -31;
                 move16();
                 FOR( k = nCh + 1; k < nChannelsL; k++ ) /* nChannelsL */
                 {
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
                     norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( sing_exp2[k][nCh], sing_exp2[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */
 #else
-                    norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( singularVectors_Left_e[k][nCh], singularVectors_Left_e[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */
+                    prod[k] = W_mult0_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] );
+                    prod_e[k] = add( singularVectors_Left_e[k][nCh], singularVectors_Left_e[k][iCh] );
+                    max_e = s_max( max_e, prod_e[k] );
 #endif
                 }
-                t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e,
+
+                FOR( k = nCh + 1; k < nChannelsL; k++ ) /* nChannelsL */
+                {
+                    acc = W_add( acc, W_shr( prod[k], sub( max_e, prod_e[k] ) ) );
+                }
+                Word16 acc_e = W_norm( acc );
+                acc = W_shl( acc, acc_e );
+
+                norm_y = W_extract_h( acc );
+                norm_y_e = add( sub( max_e, acc_e ), 1 );
+                t_jj = Mpy_32_32( temp, norm_y );
 #ifndef FIX_1010_OPT_SINGLE_RESCALE
                 t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) );
 #else
-                t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), singularVectors_Left_e[nCh][nCh] ) );
+                t_jj_e = add( tempe, norm_y_e );
 #endif
                 FOR( k = nCh; k < nChannelsL; k++ ) /* nChannelsL */
                 {
diff --git a/lib_rend/ivas_dirac_decorr_dec.c b/lib_rend/ivas_dirac_decorr_dec.c
index c50d690c5b80f4812770e4aab2d6bbb26aabc62f..1788536e4ba8ad65fbf2ff9fa777eaff7972aef4 100644
--- a/lib_rend/ivas_dirac_decorr_dec.c
+++ b/lib_rend/ivas_dirac_decorr_dec.c
@@ -409,7 +409,7 @@ void ivas_dirac_dec_decorr_process_fx(
     HANDLE_DIRAC_DECORR_STATE h_freq_domain_decorr_ap_state )
 {
 
-    Word16 ch_idx, k, l, idx_in_out, max_band_decorr;
+    Word16 ch_idx, k, l, max_band_decorr;
     Word16 split_bands_idx, band_idx, decorr_buffer_len, time_idx;
     Word16 offset, idx_filter, incr_aux;
     Word16 k_1, k_2, num_bands, filter_length, pre_delay, decorr_buffer_step;
@@ -506,22 +506,24 @@ void ivas_dirac_dec_decorr_process_fx(
     set32_fx( onset_filter_fx, ONE_IN_Q31, imult1616( num_protos_diff, num_freq_bands ) );
 
     Word16 q_temp = s_min( q_onset_dec, q_aux_buffer );
+    Word16 shift_q = sub( q_onset_dec, q_temp );
 
-    IF( NE_16( q_temp, q_onset_dec ) )
+    IF( shift_q != 0 )
     {
-        FOR( Word16 i = 0; i < imult1616( num_protos_diff, max_band_decorr_temp ); i++ )
+        FOR( Word16 i = 0; i < num_protos_diff * max_band_decorr_temp; i++ )
         {
-            h_freq_domain_decorr_ap_state->h_onset_detection_power_state.onset_detector_2_fx[i] = L_shr( h_freq_domain_decorr_ap_state->h_onset_detection_power_state.onset_detector_2_fx[i], sub( q_onset_dec, q_temp ) ); // q_temp
-            h_freq_domain_decorr_ap_state->h_onset_detection_power_state.onset_detector_1_fx[i] = L_shr( h_freq_domain_decorr_ap_state->h_onset_detection_power_state.onset_detector_1_fx[i], sub( q_onset_dec, q_temp ) ); // q_temp
+            h_freq_domain_decorr_ap_state->h_onset_detection_power_state.onset_detector_2_fx[i] = L_shr( h_freq_domain_decorr_ap_state->h_onset_detection_power_state.onset_detector_2_fx[i], shift_q ); // q_temp
+            h_freq_domain_decorr_ap_state->h_onset_detection_power_state.onset_detector_1_fx[i] = L_shr( h_freq_domain_decorr_ap_state->h_onset_detection_power_state.onset_detector_1_fx[i], shift_q ); // q_temp
             move32();
             move32();
         }
     }
-    IF( NE_16( q_temp, q_aux_buffer ) )
+    shift_q = sub( q_aux_buffer, q_temp );
+    IF( shift_q != 0 )
     {
-        FOR( Word16 i = 0; i < shl( imult1616( num_protos_diff, max_band_decorr_temp ), 1 ); i++ )
+        FOR( Word16 i = 0; i < 2 * num_protos_diff * max_band_decorr_temp; i++ )
         {
-            aux_buffer_fx[i] = L_shr( aux_buffer_fx[i], sub( q_aux_buffer, q_temp ) ); // q_temp
+            aux_buffer_fx[i] = L_shr( aux_buffer_fx[i], shift_q ); // q_temp
             move32();
         }
     }
@@ -566,9 +568,9 @@ void ivas_dirac_dec_decorr_process_fx(
             /* final phase rotation */
             FOR( k = 0; k < max_band_decorr; k++ )
             {
-                *p_frame_dec_fx = L_sub( Mpy_32_16_1( ( *decorr_buffer_fx ), ( *phase_coeff_real_fx ) ), Mpy_32_16_1( ( *( decorr_buffer_fx + 1 ) ), ( *phase_coeff_imag_fx ) ) ); // sub( q_decorr_buf, 1 )
+                *p_frame_dec_fx = Msub_32_16( Mpy_32_16_1( *decorr_buffer_fx, ( *phase_coeff_real_fx ) ), *( decorr_buffer_fx + 1 ), ( *phase_coeff_imag_fx ) ); // sub( q_decorr_buf, 1 )
                 p_frame_dec_fx++;
-                *p_frame_dec_fx = L_add( Mpy_32_16_1( ( *decorr_buffer_fx ), ( *phase_coeff_imag_fx ) ), Mpy_32_16_1( ( *( decorr_buffer_fx + 1 ) ), ( *phase_coeff_real_fx ) ) ); // sub( q_decorr_buf, 1 )
+                *p_frame_dec_fx = Madd_32_16( Mpy_32_16_1( *decorr_buffer_fx, ( *phase_coeff_imag_fx ) ), ( *( decorr_buffer_fx + 1 ) ), ( *phase_coeff_real_fx ) ); // sub( q_decorr_buf, 1 )
                 p_frame_dec_fx++;
                 phase_coeff_imag_fx++;
                 phase_coeff_real_fx++;
@@ -620,8 +622,8 @@ void ivas_dirac_dec_decorr_process_fx(
 #endif
 
         q_shift = getScaleFactor32( aux_buffer_fx, imult1616( imult1616( 2, num_protos_dir ), max_band_decorr_temp ) );
-
-        FOR( Word16 j = 0; j < shl( imult1616( num_protos_dir, max_band_decorr_temp ), 1 ); j++ )
+        Word16 buf_len = shl( imult1616( num_protos_dir, max_band_decorr_temp ), 1 );
+        FOR( Word16 j = 0; j < buf_len; j++ )
         {
             aux_buffer_fx[j] = L_shl( aux_buffer_fx[j], q_shift ); // add( q_aux_buffer, q_shift )
             move32();
@@ -674,8 +676,9 @@ void ivas_dirac_dec_decorr_process_fx(
                     filter_coeff_num_real_fx = &h_freq_domain_decorr_ap_params->filter_coeff_num_real_fx[idx_filter]; // Q12
                     filter_coeff_den_real_fx = &h_freq_domain_decorr_ap_params->filter_coeff_den_real_fx[idx_filter]; // Q12
                     decorr_buffer_start_ptr_fx = &h_freq_domain_decorr_ap_state->decorr_buffer_fx[2 * ( ch_idx * max_band_decorr + band_idx )];
-                    input_real_fx = aux_buffer_fx[shl( add( imult1616( proto_index_dir[ch_idx], max_band_decorr ), band_idx ), 1 )];           // q_aux
-                    input_imag_fx = aux_buffer_fx[add( shl( add( imult1616( proto_index_dir[ch_idx], max_band_decorr ), band_idx ), 1 ), 1 )]; // q_aux
+                    Word16 idx = shl( add( imult1616( proto_index_dir[ch_idx], max_band_decorr ), band_idx ), 1 );
+                    input_real_fx = aux_buffer_fx[idx];     // q_aux
+                    input_imag_fx = aux_buffer_fx[idx + 1]; // q_aux
 
                     /* MA part of filter impulse response */
                     FOR( l = 0; l < filter_length; l++ )
@@ -766,14 +769,15 @@ void ivas_dirac_dec_decorr_process_fx(
 #ifdef MSAN_FIX
         q_shift = Q31;
         move16();
+        offset = shl( max_band_decorr, 1 );
         FOR( ch_idx = 0; ch_idx < num_channels; ++ch_idx )
         {
             q_shift = s_min( q_shift,
-                             L_norm_arr( &frame_dec_fx[shl( imult1616( ch_idx, num_freq_bands ), 1 )], shl( max_band_decorr, 1 ) ) );
+                             L_norm_arr( &frame_dec_fx[2 * ch_idx * num_freq_bands], offset ) );
         }
         FOR( ch_idx = 0; ch_idx < num_channels; ++ch_idx )
         {
-            Scale_sig32( &frame_dec_fx[shl( imult1616( ch_idx, num_freq_bands ), 1 )], shl( max_band_decorr, 1 ), q_shift );
+            scale_sig32( &frame_dec_fx[2 * ch_idx * num_freq_bands], offset, q_shift );
         }
 #else
         q_shift = L_norm_arr( frame_dec_fx, ( 2 * max_band_decorr + incr_aux ) * num_channels );
@@ -784,13 +788,14 @@ void ivas_dirac_dec_decorr_process_fx(
 
         IF( h_freq_domain_decorr_ap_params->use_ducker )
         {
+            Word16 len1 = shl( imult1616( max_band_decorr, num_protos_dir ), 1 );
             /* compute direct power w/o onsets for the energy ratio, signal is still in the aux buffer */
-            v_mult_fixed( aux_buffer_fx, aux_buffer_fx, aux_buffer_fx, shl( imult1616( max_band_decorr, num_protos_dir ), 1 ) ); // 2 *q_aux -31
+            v_mult_fixed( aux_buffer_fx, aux_buffer_fx, aux_buffer_fx, len1 ); // 2 *q_aux -31
 
-            q_aux_buffer = sub( imult1616( 2, q_aux_buffer ), 31 );
+            q_aux_buffer = sub( shl( q_aux_buffer, 1 ), 31 );
 
             // if this scaling is eliminated overflow is happening fot v_add_inc_fix
-            q_shift = sub( L_norm_arr( aux_buffer_fx, shl( imult1616( num_protos_dir, max_band_decorr ), 1 ) ), find_guarded_bits_fx( 2 ) );
+            q_shift = sub( L_norm_arr( aux_buffer_fx, len1 ), 1 /*find_guarded_bits_fx( 2 )*/ );
             Scale_sig32( aux_buffer_fx, shl( imult1616( num_protos_dir, max_band_decorr_temp ), 1 ), q_shift );
             q_aux_buffer = add( q_aux_buffer, q_shift );
 
@@ -847,18 +852,18 @@ void ivas_dirac_dec_decorr_process_fx(
             }
             norm = W_norm( min64 );
 #endif
-
-            FOR( Word16 i = 0; i < shl( imult1616( num_channels, max_band_decorr ), 1 ); i++ )
+            norm = sub( norm, 1 /*find_guarded_bits_fx( 2 )*/ );
+            FOR( Word16 i = 0; i < 2 * num_channels * max_band_decorr; i++ )
             {
-                aux_buffer_fx[i] = W_extract_h( W_shl( aux_64[i], sub( norm, find_guarded_bits_fx( 2 ) ) ) );
+                aux_buffer_fx[i] = W_extract_h( W_shl( aux_64[i], norm ) );
                 move32();
             }
-            q_aux_buffer = add( imult1616( 2, q_frame_f ), sub( sub( norm, 1 ), 32 ) );
+            q_aux_buffer = add( shl( q_frame_f, 1 ), sub( norm, 32 ) );
 
 
             FOR( ch_idx = 0; ch_idx < num_channels; ++ch_idx )
             {
-                v_add_inc_fx( &aux_buffer_fx[shl( imult1616( ch_idx, max_band_decorr ), 1 )], 2, &aux_buffer_fx[add( shl( imult1616( ch_idx, max_band_decorr ), 1 ), 1 )], 2, &aux_buffer_fx[imult1616( ch_idx, max_band_decorr )], 1, max_band_decorr );
+                v_add_inc_fx( &aux_buffer_fx[2 * ch_idx * max_band_decorr], 2, &aux_buffer_fx[2 * ch_idx * max_band_decorr + 1], 2, &aux_buffer_fx[ch_idx * max_band_decorr], 1, max_band_decorr );
             }
 
             /* smooth energies */
@@ -959,7 +964,7 @@ void ivas_dirac_dec_decorr_process_fx(
             FOR( ch_idx = 0; ch_idx < num_channels; ++ch_idx )
             {
                 q_shift = s_min( q_shift,
-                                 sub( L_norm_arr( &frame_dec_fx[shl( imult1616( ch_idx, num_freq_bands ), 1 )], shl( max_band_decorr, 1 ) ),
+                                 sub( L_norm_arr( &frame_dec_fx[2 * ch_idx * num_freq_bands], shl( max_band_decorr, 1 ) ),
                                       Q2 ) );
             }
             FOR( ch_idx = 0; ch_idx < num_channels; ++ch_idx )
@@ -974,7 +979,7 @@ void ivas_dirac_dec_decorr_process_fx(
 
             FOR( ch_idx = 0; ch_idx < num_channels; ch_idx++ )
             {
-                Word32 *frame_dec_fx_ptr = &frame_dec_fx[shl( imult1616( ch_idx, num_freq_bands ), 1 )];
+                Word32 *frame_dec_fx_ptr = &frame_dec_fx[2 * ch_idx * num_freq_bands];
                 Word16 cur_proto_index = imult1616( proto_index_dir[ch_idx], max_band_decorr );
                 Word16 cur_reverb_index = imult1616( ch_idx, max_band_decorr );
                 Word32 *reverb_energy_smooth_ptr = &h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx[cur_reverb_index]; // q_aux
@@ -1020,22 +1025,18 @@ void ivas_dirac_dec_decorr_process_fx(
                         e_duck_gain = add( e_duck_gain, sub( e_direct_energy_smooth, add( e_reverb_energy_smooth, 1 ) ) );
 
                         duck_gain = Sqrt16( duck_gain, &e_duck_gain );
-                        Word16 comp_flag = BASOP_Util_Cmp_Mant32Exp( duck_gain, e_duck_gain, 16384, 2 );
-                        IF( EQ_16( comp_flag, 1 ) )
-                        {
-                            duck_gain = 16384; // 2inQ13
-                            move16();
-                        }
-                        ELSE
+                        /* if ( duck_gain > 2.0f )
                         {
-                            duck_gain = shl( duck_gain, sub( e_duck_gain, 2 ) ); // Q13
-                        }
+                            duck_gain = 2.0f;
+                        } */
+                        duck_gain = shl_sat( duck_gain, sub( e_duck_gain, 1 ) ); // Q14
+
 #ifndef FIX_1110_OPTIM_DIRAC_DECORR_PROC
                         frame_dec_fx_ptr[2 * band_idx] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx], duck_gain ), 2 );                                 // q_frame_dec
                         frame_dec_fx_ptr[add( shl( band_idx, 1 ), 1 )] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[add( shl( band_idx, 1 ), 1 )], duck_gain ), 2 ); // q_frame_dec
 #else
-                        frame_dec_fx_ptr[2 * band_idx] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx], duck_gain ), 2 );         // q_frame_dec
-                        frame_dec_fx_ptr[2 * band_idx + 1] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx + 1], duck_gain ), 2 ); // q_frame_dec
+                        frame_dec_fx_ptr[2 * band_idx] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx], duck_gain ), 1 );         // q_frame_dec
+                        frame_dec_fx_ptr[2 * band_idx + 1] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx + 1], duck_gain ), 1 ); // q_frame_dec
 #endif
                         move32();
                         move32();
@@ -1055,7 +1056,7 @@ void ivas_dirac_dec_decorr_process_fx(
             Word16 sf = MAX_16;
             FOR( ch_idx = 0; ch_idx < num_channels; ++ch_idx )
             {
-                sf = s_min( sf, getScaleFactor32( &frame_dec_fx[shl( imult1616( ch_idx, num_freq_bands ), 1 )], shl( max_band_decorr, 1 ) ) );
+                sf = s_min( sf, getScaleFactor32( &frame_dec_fx[2 * ch_idx * num_freq_bands], shl( max_band_decorr, 1 ) ) );
             }
             sf = s_min( sub( sf, 1 ), q_shift );
             q_if_local = sub( q_shift, sf );
@@ -1065,7 +1066,7 @@ void ivas_dirac_dec_decorr_process_fx(
 #ifdef MSAN_FIX
             FOR( ch_idx = 0; ch_idx < num_channels; ++ch_idx )
             {
-                scale_sig32( &frame_dec_fx[shl( imult1616( ch_idx, num_freq_bands ), 1 )], shl( max_band_decorr, 1 ), q_shift );
+                scale_sig32( &frame_dec_fx[2 * ch_idx * num_freq_bands], shl( max_band_decorr, 1 ), q_shift );
             }
 #else
             Scale_sig32( frame_dec_fx, ( 2 * max_band_decorr + incr_aux ) * num_channels, q_shift );                                   // scaling it to input q
@@ -1078,7 +1079,7 @@ void ivas_dirac_dec_decorr_process_fx(
 #ifdef MSAN_FIX
             FOR( ch_idx = 0; ch_idx < num_channels; ++ch_idx )
             {
-                scale_sig32( &frame_dec_fx[shl( imult1616( ch_idx, num_freq_bands ), 1 )], shl( max_band_decorr, 1 ), q_shift );
+                scale_sig32( &frame_dec_fx[2 * ch_idx * num_freq_bands], shl( max_band_decorr, 1 ), q_shift );
             }
 #else
             Scale_sig32( frame_dec_fx, ( 2 * max_band_decorr + incr_aux ) * num_channels, q_shift );                                   // scaling it to input q
@@ -1098,24 +1099,22 @@ void ivas_dirac_dec_decorr_process_fx(
 
                 FOR( k = 0; k < max_band_decorr; ++k )
                 {
-                    aux_buffer_fx[2 * k] = Mpy_32_32( L_shr_r( input_frame_fx[add( shl( offset, 1 ), shl( k, 1 ) )], q_if_local ), L_sub( ONE_IN_Q31, onset_filter_fx[add( offset, k )] ) );
-                    aux_buffer_fx[add( shl( k, 1 ), 1 )] = Mpy_32_32( L_shr_r( input_frame_fx[add( add( shl( offset, 1 ), shl( k, 1 ) ), 1 )], q_if_local ), L_sub( ONE_IN_Q31, onset_filter_fx[add( offset, k )] ) ); // q_frame_f
+                    aux_buffer_fx[2 * k] = Mpy_32_32( L_shr_r( input_frame_fx[2 * ( offset + k )], q_if_local ), L_sub( ONE_IN_Q31, onset_filter_fx[offset + k] ) );
+                    aux_buffer_fx[add( shl( k, 1 ), 1 )] = Mpy_32_32( L_shr_r( input_frame_fx[2 * ( offset + k ) + 1], q_if_local ), L_sub( ONE_IN_Q31, onset_filter_fx[offset + k] ) ); // q_frame_f
                     move32();
                     move32();
                 }
 
-                v_add_fx( &frame_dec_fx[imult1616( ch_idx, shl( num_freq_bands, 1 ) )], aux_buffer_fx, &frame_dec_fx[imult1616( ch_idx, shl( num_freq_bands, 1 ) )], shl( max_band_decorr, 1 ) );
+                v_add_fx( &frame_dec_fx[2 * ch_idx * num_freq_bands], aux_buffer_fx, &frame_dec_fx[2 * ch_idx * num_freq_bands], shl( max_band_decorr, 1 ) );
             }
         }
 
         /* avoid decorrelation above maximum frequency -> set to zero the remaining frequencies*/
+        Word16 val = shl( sub( num_freq_bands, h_freq_domain_decorr_ap_params->max_band_decorr ), 1 );
         FOR( ch_idx = 0; ch_idx < num_channels; ++ch_idx )
         {
-            /* calc output indices */
-            idx_in_out = shl( ( add( imult1616( ch_idx, num_freq_bands ), h_freq_domain_decorr_ap_params->max_band_decorr ) ), 1 );
-
             /* copy to output signal */
-            set32_fx( &frame_dec_fx[idx_in_out], 0, shl( sub( num_freq_bands, h_freq_domain_decorr_ap_params->max_band_decorr ), 1 ) );
+            set32_fx( &frame_dec_fx[2 * ( ch_idx * num_freq_bands + h_freq_domain_decorr_ap_params->max_band_decorr )], 0, val );
         }
         *q_frame_dec = q_frame_f;
         move16();