From 2ec0301c1268655e48dc99244c19b938a884d389 Mon Sep 17 00:00:00 2001
From: Sandesh Venkatesh <sandesh.venkatesh@ittiam.com>
Date: Fri, 24 Jan 2025 17:23:26 +0530
Subject: [PATCH] Complexity optimization for SBA path decoding

---
 lib_com/cnst.h                             |   1 +
 lib_com/ivas_cnst.h                        |   1 +
 lib_com/ivas_dirac_com.c                   |  49 ++++---
 lib_com/ivas_spar_com.c                    |  76 +++++------
 lib_com/ivas_transient_det.c               |  18 +--
 lib_com/tools_fx.c                         |  14 +-
 lib_dec/igf_dec_fx.c                       |  13 +-
 lib_dec/ivas_spar_decoder.c                |  19 +--
 lib_rend/ivas_dirac_onsets_dec.c           | 123 +----------------
 lib_rend/ivas_dirac_output_synthesis_dec.c | 146 ++++++++++++---------
 lib_rend/ivas_dirac_rend.c                 |  21 +--
 lib_rend/ivas_efap.c                       |  68 ++--------
 12 files changed, 217 insertions(+), 332 deletions(-)

diff --git a/lib_com/cnst.h b/lib_com/cnst.h
index 7a5260a41..66b6dc9a1 100644
--- a/lib_com/cnst.h
+++ b/lib_com/cnst.h
@@ -137,6 +137,7 @@
 #define FOUR_IN_Q28                     1073741824
 
 #define MAX_WORD16                      32767
+#define ONE_IN_Q45                      (Word64)0x200000000000
 #define ONE_IN_Q62                      (Word64)0x4000000000000000
 /*----------------------------------------------------------------------------------*
  * General constants
diff --git a/lib_com/ivas_cnst.h b/lib_com/ivas_cnst.h
index a7a7ce88a..1fe54d164 100644
--- a/lib_com/ivas_cnst.h
+++ b/lib_com/ivas_cnst.h
@@ -150,6 +150,7 @@ typedef enum
     RENDERER_OSBA_LS
 } RENDERER_TYPE;
 
+#define MAX_FREQUENCY_BANDS                    64
 
 /*----------------------------------------------------------------------------------*
  * IVAS general constants
diff --git a/lib_com/ivas_dirac_com.c b/lib_com/ivas_dirac_com.c
index 6aeff8ec5..ebc874cce 100644
--- a/lib_com/ivas_dirac_com.c
+++ b/lib_com/ivas_dirac_com.c
@@ -966,38 +966,55 @@ void computeDiffuseness_fixed(
         p_tmp_c = buffer_energy + i * num_freq_bands;
 
         q_tmp = add( q_factor_energy[i], min_q_shift1 );
-        FOR( k = 0; k < num_freq_bands; k++ )
+
+        Word16 shift_q = sub( q_tmp, q_ene );
+        IF( shift_q < 0 )
         {
-            tmp = L_shl( p_tmp_c[k], min_q_shift1 );
-            IF( LT_16( q_tmp, q_ene ) )
+            FOR( k = 0; k < num_freq_bands; k++ )
             {
-                energy_slow[k] = L_add( L_shr( energy_slow[k], sub( q_ene, q_tmp ) ), tmp );
+                tmp = L_shl( p_tmp_c[k], min_q_shift1 );
+                energy_slow[k] = L_add( L_shl( energy_slow[k], shift_q ), tmp );
                 move32();
             }
-            ELSE
+        }
+        ELSE
+        {
+            FOR( k = 0; k < num_freq_bands; k++ )
             {
-                energy_slow[k] = L_add( energy_slow[k], L_shr( tmp, sub( q_tmp, q_ene ) ) );
+                tmp = L_shl( p_tmp_c[k], min_q_shift1 );
+                energy_slow[k] = L_add( energy_slow[k], L_shr( tmp, shift_q ) );
                 move32();
             }
         }
+
         q_ene = s_min( q_ene, q_tmp );
 
         /* Intensity slow */
         q_tmp = add( q_factor_intensity[i], min_q_shift2 );
-        FOR( j = 0; j < DIRAC_NUM_DIMS; ++j )
+
+        shift_q = sub( q_tmp, q_intensity );
+        IF( shift_q > 0 )
         {
-            p_tmp = buffer_intensity[j][i];
-            FOR( k = 0; k < num_freq_bands; k++ )
+            FOR( j = 0; j < DIRAC_NUM_DIMS; ++j )
             {
-                tmp = L_shl( p_tmp[k], min_q_shift2 );
-                IF( LT_16( q_intensity, q_tmp ) )
+                p_tmp = buffer_intensity[j][i];
+                FOR( k = 0; k < num_freq_bands; k++ )
                 {
-                    intensity_slow[j * num_freq_bands + k] = L_add( intensity_slow[j * num_freq_bands + k], L_shr( tmp, sub( q_tmp, q_intensity ) ) );
+                    tmp = L_shl( p_tmp[k], min_q_shift2 );
+                    intensity_slow[j * num_freq_bands + k] = L_add( intensity_slow[j * num_freq_bands + k], L_shr( tmp, shift_q ) );
                     move32();
                 }
-                ELSE
+            }
+        }
+        ELSE
+        {
+            FOR( j = 0; j < DIRAC_NUM_DIMS; ++j )
+            {
+                p_tmp = buffer_intensity[j][i];
+                FOR( k = 0; k < num_freq_bands; k++ )
                 {
-                    intensity_slow[j * num_freq_bands + k] = L_add( L_shr( intensity_slow[j * num_freq_bands + k], sub( q_intensity, q_tmp ) ), tmp );
+                    tmp = L_shl( p_tmp[k], min_q_shift2 );
+                    intensity_slow[j * num_freq_bands + k] = L_add( L_shl( intensity_slow[j * num_freq_bands + k], shift_q ), tmp );
                     move32();
                 }
             }
@@ -1017,9 +1034,7 @@ void computeDiffuseness_fixed(
 
         FOR( k = 0; k < num_freq_bands; k++ )
         {
-            p_tmp[k] = Mpy_32_32( p_tmp[k], p_tmp[k] );
-            move32();
-            intensity_slow_abs[k] = L_add( intensity_slow_abs[k], p_tmp[k] );
+            intensity_slow_abs[k] = Madd_32_32( intensity_slow_abs[k], p_tmp[k], p_tmp[k] );
             move32();
         }
     }
diff --git a/lib_com/ivas_spar_com.c b/lib_com/ivas_spar_com.c
index 362c1c9ed..9f5106e0c 100644
--- a/lib_com/ivas_spar_com.c
+++ b/lib_com/ivas_spar_com.c
@@ -7044,7 +7044,7 @@ void ivas_dirac_dec_get_response_fx(
     Word16 Q_out )
 {
     Word16 index_azimuth, index_elevation;
-    Word16 el, e, az;
+    Word16 el, e, az, q_diff;
     Word32 cos_1_fx, cos_2_fx, sin_1_fx, cos_az_fx[3];
     Word32 sin_az_fx[3];
     Word32 f_fx;
@@ -7055,49 +7055,39 @@ void ivas_dirac_dec_get_response_fx(
     index_azimuth = add( azimuth, 180 ) % 360;
     move16();
     index_elevation = add( elevation, 90 );
-    IF( GT_16( index_elevation, 90 ) )
+
+    e = 1;
+    move16();
+
+    if ( GT_16( index_elevation, 90 ) )
     {
         e = -1;
-        move16();
-    }
-    ELSE
-    {
-        e = 1;
-        move16();
     }
 
-    IF( GT_16( index_elevation, 90 ) )
+    el = index_elevation;
+    move16();
+
+    if ( GT_16( index_elevation, 90 ) )
     {
         el = sub( 180, index_elevation );
-        move16();
-    }
-    ELSE
-    {
-        el = index_elevation;
-        move16();
     }
 
-    IF( GT_16( index_azimuth, 180 ) )
+    az = index_azimuth;
+    move16();
+
+    if ( GT_16( index_azimuth, 180 ) )
     {
         az = sub( 360, index_azimuth );
-        move16();
-    }
-    ELSE
-    {
-        az = index_azimuth;
-        move16();
     }
 
-    IF( GT_16( index_azimuth, 180 ) )
+    f_fx = 1;
+    move16();
+
+    if ( GT_16( index_azimuth, 180 ) )
     {
         f_fx = -1;
-        move16();
-    }
-    ELSE
-    {
-        f_fx = 1;
-        move16();
     }
+
     cos_1_fx = L_shr( dirac_gains_trg_term_fx[az][0], 1 );  // q30
     cos_2_fx = L_shl( Mpy_32_32( cos_1_fx, cos_1_fx ), 1 ); // q30
     sin_1_fx = L_shr( dirac_gains_trg_term_fx[az][1], 1 );  // q30
@@ -7122,54 +7112,54 @@ void ivas_dirac_dec_get_response_fx(
     response_fx[0] = L_shl_sat( 1, Q_out ); // Q_out
     move32();
 
+    q_diff = sub( Q_out, 29 );
+
     FOR( l = 1; l <= ambisonics_order; l++ )
     {
         b_2 = imult1616( l, l );
-        b1_2 = add( imult1616( l, l ), shl( l, 1 ) );
+        b1_2 = add( b_2, shl( l, 1 ) );
         FOR( m = 0; m < l; m += 2 )
         {
-            b = add( b_2, m );
+            b = b_2 + m;
             a = dirac_gains_P_idx[b];
-            move16();
+
             c_fx_better = local_result_table[el][a]; // q30
             move32();
-            response_fx[b] = L_shl( Mpy_32_32( c_fx_better, sin_az_fx[l - m - 1] ), sub( Q_out, 29 ) ); // Q_out
+            response_fx[b] = L_shl( Mpy_32_32( c_fx_better, sin_az_fx[l - m - 1] ), q_diff ); // Q_out
             move32();
 
-            b1 = sub( b1_2, m );
-            response_fx[b1] = L_shl( Mpy_32_32( c_fx_better, cos_az_fx[l - m - 1] ), sub( Q_out, 29 ) ); // Q_out
+            b1 = b1_2 - m;
+            response_fx[b1] = L_shl( Mpy_32_32( c_fx_better, cos_az_fx[l - m - 1] ), q_diff ); // Q_out
             move32();
         }
 
         FOR( m = 1; m < l; m += 2 )
         {
-            b = add( b_2, m );
+            b = b_2 + m;
             a = dirac_gains_P_idx[b];
-            move16();
             c_fx_better = local_result_table[el][a]; // q30
             move32();
             if ( EQ_16( e, -1 ) )
             {
                 c_fx_better = L_negate( c_fx_better ); // q30
             }
-            response_fx[b] = L_shl( Mpy_32_32( c_fx_better, sin_az_fx[l - m - 1] ), sub( Q_out, 29 ) ); // Q_out
+            response_fx[b] = L_shl( Mpy_32_32( c_fx_better, sin_az_fx[l - m - 1] ), q_diff ); // Q_out
             move32();
-            b1 = sub( b1_2, m );
-            response_fx[b1] = L_shl( Mpy_32_32( c_fx_better, cos_az_fx[l - m - 1] ), sub( Q_out, 29 ) ); // Q_out
+
+            b1 = b1_2 - m;
+            response_fx[b1] = L_shl( Mpy_32_32( c_fx_better, cos_az_fx[l - m - 1] ), q_diff ); // Q_out
             move32();
         }
 
         b = add( b_2, l );
         a = dirac_gains_P_idx[b];
-        move16();
         c_fx_better = local_result_table_2[el][a]; // q30
         move32();
-        IF( EQ_16( ( l % 2 ), 1 ) )
+        IF( EQ_16( s_and( l, 0x01 ), 1 ) )
         {
             if ( EQ_16( e, -1 ) )
             {
                 c_fx_better = L_negate( c_fx_better ); // q30
-                move32();
             }
         }
         response_fx[b] = L_shl( c_fx_better, sub( Q_out, 30 ) ); // Q_out
diff --git a/lib_com/ivas_transient_det.c b/lib_com/ivas_transient_det.c
index 8f8905fdd..dccf4db02 100644
--- a/lib_com/ivas_transient_det.c
+++ b/lib_com/ivas_transient_det.c
@@ -344,26 +344,26 @@ static Word32 ivas_calc_duck_gain_fx(
     Word32 duck_gain_out, L_tmp;
     Word16 tmp_e;
 
-    duck_gain_out = L_add( L_shl( Mpy_32_32( L_sub( duck_gain, ONE_IN_Q30 ), duck_coeff ), Q1 ), ONE_IN_Q30 ); /*Q30*/
+    duck_gain_out = L_add( Mpy_32_32( L_sub( duck_gain, ONE_IN_Q30 ), duck_coeff ), ONE_IN_Q29 ); /*Q29*/
 
-    IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( Mpy_32_32( duck_mult_fac, env_1 ), add( 2, env1_e ), Mpy_32_32( duck_gain_out, env_2 ), add( 1, env2_e ) ), -1 ) )
+    IF( LT_64( W_mult0_32_32( duck_mult_fac, env_1 ), W_shr( W_mult0_32_32( duck_gain_out, env_2 ), sub( env1_e, env2_e ) ) ) )
     {
+
+        duck_gain_out = 0;
+        move32();
+
         test();
-        IF( ( env_1 == 0 ) || ( env_2 == 0 ) )
-        {
-            duck_gain_out = 0;
-            move32();
-        }
-        ELSE
+        IF( ( env_1 != 0 ) && ( env_2 != 0 ) )
         {
             L_tmp = BASOP_Util_Divide3232_Scale_cadence( env_1, env_2, &tmp_e );
             L_tmp = L_shl( L_tmp, add( sub( env1_e, env2_e ), tmp_e ) );
 
             duck_gain_out = Mpy_32_32( duck_mult_fac, L_tmp ); /*Q29*/
-            duck_gain_out = L_shl( duck_gain_out, Q1 );        /*Q30*/
         }
     }
 
+    duck_gain_out = L_shl( duck_gain_out, Q1 ); /*Q30*/
+
     return duck_gain_out; /*Q30*/
 }
 /*-----------------------------------------------------------------------------------------*
diff --git a/lib_com/tools_fx.c b/lib_com/tools_fx.c
index f6152f265..74a47dde8 100644
--- a/lib_com/tools_fx.c
+++ b/lib_com/tools_fx.c
@@ -884,6 +884,10 @@ void Scale_sig32(
     Flag Overflow = 0;
     move32();
 #endif
+    IF( 0 == exp0 )
+    {
+        return;
+    }
 
     FOR( i = 0; i < lg; i++ )
     {
@@ -4217,10 +4221,11 @@ UWord32 mvl2s_r(
 
     IF( (void *) y <= (const void *) x )
     {
+        Word32 tempd = L_shl( 1, sub( q_x, 1 ) );
         FOR( i = 0; i < n; i++ )
         {
-            temp = L_shr( x[i], sub( q_x, 1 ) );
-            temp = L_shr( L_add( temp, 1 ), 1 );
+            temp = L_add( x[i], tempd );
+            temp = L_shr( temp, q_x );
 
             IF( GT_32( temp, MAX16B ) )
             {
@@ -4241,10 +4246,11 @@ UWord32 mvl2s_r(
     }
     ELSE
     {
+        Word32 tempd = L_shl( 1, sub( q_x, 1 ) );
         FOR( i = n - 1; i >= 0; i-- )
         {
-            temp = L_shr( x[i], sub( q_x, 1 ) );
-            temp = L_shr( L_add( temp, 1 ), 1 );
+            temp = L_add( x[i], tempd );
+            temp = L_shr( temp, q_x );
 
             IF( GT_32( temp, MAX16B ) )
             {
diff --git a/lib_dec/igf_dec_fx.c b/lib_dec/igf_dec_fx.c
index c09ca95ef..4f74c31ca 100644
--- a/lib_dec/igf_dec_fx.c
+++ b/lib_dec/igf_dec_fx.c
@@ -2904,19 +2904,22 @@ static void IGF_getWhiteSpectralData_ivas(
     Word16 guard_bits = add( find_guarded_bits_fx( add( i_mult( 2, level ), 1 ) ), 1 ) / 2;
     s_l = sub( s_l, guard_bits );
 
+    Word16 quo = BASOP_Util_Divide3216_Scale( ONE_IN_Q30, add( shl( level, 1 ), 1 ), &tmp_e );
+    tmp_e = add( tmp_e, 1 );
+
+    ak_e = add( tmp_e, sub( shl( sub( in_e, s_l ), 1 ), 15 ) ); // tmp_e + 2 * (in_e - s_l) - 15
     FOR( i = start; i < stop - level; i++ )
     {
         ak = 0;
         move32();
-        ak_e = 0;
         move32();
         FOR( j = i - level; j < i + level + 1; j++ )
         {
             tmp_16 = extract_h( L_shl( in[j], s_l ) ); // e: in_e - s_l
             ak = L_mac( ak, tmp_16, tmp_16 );          // e: 2 * (in_e - s_l)
         }
-        ak = L_deposit_h( BASOP_Util_Divide3216_Scale( ak, add( shl( level, 1 ), 1 ), &tmp_e ) );
-        ak_e = add( tmp_e, sub( shl( sub( in_e, s_l ), 1 ), 15 ) ); // tmp_e + 2 * (in_e - s_l) - 15
+        ak = Mult_32_16( ak, quo ); // add( shl( level, 1 ), 1 ), &tmp_e ) );
+
 
         n = sub( 30, add( norm_l( ak ), sub( 31, ak_e ) ) );
         n = shr( n, 1 );
@@ -2929,7 +2932,6 @@ static void IGF_getWhiteSpectralData_ivas(
     FOR( ; i < stop; i++ )
     {
         ak = 0;
-        ak_e = 0;
         move32();
         move16();
 
@@ -2938,8 +2940,7 @@ static void IGF_getWhiteSpectralData_ivas(
             tmp_16 = extract_h( L_shl( in[j], s_l ) ); // e: in_e - s_l
             ak = L_mac( ak, tmp_16, tmp_16 );          // e: 2 * (in_e - s_l)
         }
-        ak = L_deposit_h( BASOP_Util_Divide3216_Scale( ak, sub( stop, sub( i, level ) ), &tmp_e ) );
-        ak_e = add( tmp_e, sub( shl( sub( in_e, s_l ), 1 ), 15 ) ); // tmp_e + 2 * (in_e - s_l) - 15
+        ak = Mult_32_16( ak, quo );
 
         n = sub( 30, add( norm_l( ak ), sub( 31, ak_e ) ) );
         n = shr( n, 1 );
diff --git a/lib_dec/ivas_spar_decoder.c b/lib_dec/ivas_spar_decoder.c
index fdace42d8..f76961ebe 100644
--- a/lib_dec/ivas_spar_decoder.c
+++ b/lib_dec/ivas_spar_decoder.c
@@ -1687,8 +1687,9 @@ void ivas_spar_dec_upmixer_sf_fx(
     SPAR_DEC_HANDLE hSpar;
     Word16 num_md_sub_frames;
     Word16 q1 = 30;
+    Word16 prod;
     move16();
-    push_wmops( "ivas_spar_dec_upmixer_sf" );
+    push_wmops( "ivas_spar_dec_upmixer_sf_fx" );
     hSpar = st_ivas->hSpar;
     hDecoderConfig = st_ivas->hDecoderConfig;
     nchan_transport = hSpar->hMdDec->spar_md_cfg.nchan_transport; /*Q0*/
@@ -1706,6 +1707,8 @@ void ivas_spar_dec_upmixer_sf_fx(
     slot_idx_start = hSpar->slots_rendered; /*Q0*/
     move16();
 
+    prod = i_mult( slot_idx_start, slot_size );
+
     test();
     IF( EQ_32( st_ivas->ivas_format, SBA_ISM_FORMAT ) && EQ_32( st_ivas->ism_mode, ISM_SBA_MODE_DISC ) )
     {
@@ -1716,7 +1719,7 @@ void ivas_spar_dec_upmixer_sf_fx(
 
         FOR( i = 0; i < nchan_internal; i++ )
         {
-            p_tc_fx[i] = st_ivas->hTcBuffer->tc_fx[( i + nchan_ism )] + i_mult( slot_idx_start, slot_size ); /*Q11*/
+            p_tc_fx[i] = st_ivas->hTcBuffer->tc_fx[( i + nchan_ism )] + prod; /*Q11*/
         }
 
         test();
@@ -1725,7 +1728,7 @@ void ivas_spar_dec_upmixer_sf_fx(
         {
             FOR( i = 0; i < nchan_ism; i++ )
             {
-                p_tc_fx[( i + nchan_internal )] = st_ivas->hTcBuffer->tc_fx[i] + i_mult( slot_idx_start, slot_size ); /*Q11*/
+                p_tc_fx[( i + nchan_internal )] = st_ivas->hTcBuffer->tc_fx[i] + prod; /*Q11*/
             }
         }
     }
@@ -1733,7 +1736,7 @@ void ivas_spar_dec_upmixer_sf_fx(
     {
         FOR( i = 0; i < nchan_internal; i++ )
         {
-            p_tc_fx[i] = st_ivas->hTcBuffer->tc_fx[i] + i_mult( slot_idx_start, slot_size ); /*Q11*/
+            p_tc_fx[i] = st_ivas->hTcBuffer->tc_fx[i] + prod; /*Q11*/
         }
     }
 
@@ -1897,14 +1900,14 @@ void ivas_spar_dec_upmixer_sf_fx(
                         }
                         ELSE
                         {
-                            cldfb_par_fx = 0;
-                            move32();
+                            Word64 acc = 0;
+                            move64();
                             FOR( spar_band = bin2band->p_spar_start_bands[cldfb_band]; spar_band < num_spar_bands; spar_band++ )
                             {
                                 /* accumulate contributions from all SPAR bands */
-                                Word16 tmp = extract_l( L_shr( bin2band->pp_cldfb_weights_per_spar_band_fx[cldfb_band][spar_band], 7 ) ); /*Q15*/
-                                cldfb_par_fx = L_add_sat( cldfb_par_fx, Mpy_32_16_1( mixer_mat_fx[out_ch][in_ch][spar_band], tmp ) );     /*q1*/
+                                acc = W_mac_32_32( acc, mixer_mat_fx[out_ch][in_ch][spar_band], bin2band->pp_cldfb_weights_per_spar_band_fx[cldfb_band][spar_band] ); // q1+ Q23
                             }
+                            cldfb_par_fx = W_shl_sat_l( acc, -23 ); // q1
                         }
 
                         out_re_fx[out_ch] = Madd_32_32( out_re_fx[out_ch], cldfb_in_ts_re_fx[in_ch][ts][cldfb_band], cldfb_par_fx ); /*q1-25*/
diff --git a/lib_rend/ivas_dirac_onsets_dec.c b/lib_rend/ivas_dirac_onsets_dec.c
index 9e82fa022..f53f41404 100644
--- a/lib_rend/ivas_dirac_onsets_dec.c
+++ b/lib_rend/ivas_dirac_onsets_dec.c
@@ -43,110 +43,6 @@
 #include "wmc_auto.h"
 #include "prot_fx.h"
 
-static Word16 BASOP_Util_Cmp_Mant32Exp_sat( Word32 a_m, Word16 a_e, Word32 b_m, Word16 b_e );
-Word16 BASOP_Util_Cmp_Mant32Exp_sat /*!< o: flag: result of comparison */
-    /*      0, if a == b               */
-    /*      1, if a > b                */
-    /*     -1, if a < b                */
-    ( Word32 a_m,  /*!< i: Mantissa of 1st operand a  */
-      Word16 a_e,  /*!< i: Exponent of 1st operand a  */
-      Word32 b_m,  /*!< i: Mantissa of 2nd operand b  */
-      Word16 b_e ) /*!< i: Exponent of 2nd operand b  */
-
-{
-    Word32 diff_m;
-    Word16 diff_e, shift, result;
-
-
-    /*
-       This function compares two input parameters, both represented by a 32-bit mantissa and a 16-bit exponent.
-       If both values are identical, 0 is returned.
-       If a is greater b, 1 is returned.
-       If a is less than b, -1 is returned.
-    */
-
-    /* Check, if both mantissa and exponents are identical, when normalized: return 0 */
-    shift = norm_l( a_m );
-    IF( shift )
-    {
-        a_m = L_shl( a_m, shift );
-    }
-    IF( shift )
-    {
-        a_e = sub( a_e, shift );
-    }
-
-    shift = norm_l( b_m );
-    IF( shift )
-    {
-        b_m = L_shl( b_m, shift );
-    }
-    IF( shift )
-    {
-        b_e = sub( b_e, shift );
-    }
-
-    /* align exponent, if any mantissa is zero */
-    if ( !a_m )
-    {
-        a_e = b_e;
-        move16();
-    }
-    if ( !b_m )
-    {
-        b_e = a_e;
-        move16();
-    }
-
-    BASOP_SATURATE_WARNING_OFF_EVS
-    diff_m = L_sub_sat( a_m, b_m );
-    BASOP_SATURATE_WARNING_ON_EVS
-    diff_e = sub( a_e, b_e );
-
-    test();
-    IF( diff_m == 0 && diff_e == 0 )
-    {
-        return 0;
-    }
-
-    /* Check sign, exponent and mantissa to identify, whether a is greater b or not */
-    result = -1;
-    move16();
-
-    IF( a_m >= 0 )
-    {
-        /* a is positive */
-        if ( b_m < 0 )
-        {
-            result = 1;
-            move16();
-        }
-
-        test();
-        test();
-        test();
-        if ( ( b_m >= 0 ) && ( ( diff_e > 0 ) || ( diff_e == 0 && diff_m > 0 ) ) )
-        {
-            result = 1;
-            move16();
-        }
-    }
-    ELSE
-    {
-        /* a is negative */
-        test();
-        test();
-        test();
-        if ( ( b_m < 0 ) && ( ( diff_e < 0 ) || ( diff_e == 0 && diff_m > 0 ) ) )
-        {
-            result = 1;
-            move16();
-        }
-    }
-    return result;
-}
-
-
 /*-------------------------------------------------------------------------
  * ivas_dirac_dec_onset_detection_open()
  *
@@ -247,31 +143,22 @@ void ivas_dirac_dec_onset_detection_process_fx(
                 move32();
             }
 
-            IF( *p_onset_detector_1_fx == 0 )
+            if ( *p_onset_detector_1_fx == 0 )
             {
 
-                *p_onset_detector_1_fx = L_add( *p_onset_detector_1_fx, EPSILON_FX ); /* Q(q_onset_detector) */
+                *p_onset_detector_1_fx = EPSILON_FX; /* Q(q_onset_detector) */
                 move32();
             }
             /*onset filter limited between 0 and 1*/
             tmp_fx = BASOP_Util_Divide3232_Scale( *p_onset_detector_2_fx, *p_onset_detector_1_fx, &e_scale );
-            tmp32_fx = L_mult0( tmp_fx, DIRAC_ONSET_GAIN_FX );
+            tmp32_fx = L_mult0( tmp_fx, DIRAC_ONSET_GAIN_FX ); // Q= Q12 + (15-e_scale)
             if ( tmp32_fx < 0 )
             {
                 tmp32_fx = 0;
                 move32();
             }
-            Word16 comp_flag = BASOP_Util_Cmp_Mant32Exp_sat( tmp32_fx, add( e_scale, 4 ), ONE_IN_Q30, 1 );
-            test();
-            IF( EQ_16( comp_flag, 1 ) || comp_flag == 0 )
-            {
-                tmp32_fx = ONE_IN_Q31;
-                move32();
-            }
-            ELSE
-            {
-                tmp32_fx = L_shl( tmp32_fx, add( e_scale, 4 ) );
-            }
+            tmp32_fx = L_shl_sat( tmp32_fx, add( e_scale, 4 ) );
+
             onset_filter[b] = tmp32_fx;
             move32();
 
diff --git a/lib_rend/ivas_dirac_output_synthesis_dec.c b/lib_rend/ivas_dirac_output_synthesis_dec.c
index d9377b5c0..4686ca1e6 100644
--- a/lib_rend/ivas_dirac_output_synthesis_dec.c
+++ b/lib_rend/ivas_dirac_output_synthesis_dec.c
@@ -823,6 +823,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx(
                     Word32 sqr_inp, sqr;
 
                     /*Directonal sound gain nrg compensation*/
+                    c = L_add( ONE_IN_Q29 /*1 Q29*/, Mpy_32_16_1( L_sub( h_dirac_output_synthesis_params->diffuse_compensation_factor_decorr_fx, ONE_IN_Q29 /*1 Q29*/ ), 5461 /*1.0 / 6.0 Q15*/ ) ); /*Diffuseness modellling nrg compensation*/ /*Q29*/
                     FOR( k = 0; k < num_freq_bands_diff; k++ )
                     {
                         a = h_dirac_output_synthesis_state->direct_responses_fx[ch_idx * num_freq_bands + k]; // Q = h_dirac_output_synthesis_state->q_direct_responses
@@ -838,15 +839,16 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx(
                         {
                             IF( reference_power[k + ( ch_idx + 1 ) * num_freq_bands] == 0 )
                             {
-                                b = BASOP_Util_Divide3232_Scale( reference_power[k + num_freq_bands], 232831 /* EPSILON in exp 63 */, &b_exp );
-                                b_exp = add( b_exp, sub( sub( 31, q_reference_power ), 63 ) );
+                                b = MAX_16;
+                                move16();
+                                b_exp = 0;
+                                move16();
                             }
                             ELSE
                             {
                                 b = BASOP_Util_Divide3232_Scale( reference_power[k + num_freq_bands], reference_power[k + ( ch_idx + 1 ) * num_freq_bands], &b_exp ); /*Q(15-b_exp)*/
                             }
                         }
-                        c = L_add( ONE_IN_Q29 /*1 Q29*/, Mpy_32_16_1( L_sub( h_dirac_output_synthesis_params->diffuse_compensation_factor_decorr_fx, ONE_IN_Q29 /*1 Q29*/ ), 5461 /*1.0 / 6.0 Q15*/ ) ); /*Diffuseness modellling nrg compensation*/ /*Q29*/
 
                         mpy_a_a_b = Mpy_32_32( a, Mpy_32_16_1( a, b ) );                                           // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31
                         mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q(q_diff_aab) = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31
@@ -903,6 +905,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx(
                             move16();
                         }
                     }
+                    c = Madd_32_16( ONE_IN_Q27 /*1 Q27*/, L_sub( h_dirac_output_synthesis_params->diffuse_compensation_factor_fx, ONE_IN_Q27 /*1 Q27*/ ), 5461 ); /*Diffuseness modellling nrg compensation*/ /* 1.0 / 6.0  = 5461 in Q15*/ /*Q27*/
                     FOR( ; k < num_freq_bands; k++ )
                     {
                         a = h_dirac_output_synthesis_state->direct_responses_fx[ch_idx * num_freq_bands + k]; // Q = h_dirac_output_synthesis_state->q_direct_responses
@@ -918,15 +921,16 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx(
                         {
                             IF( reference_power[k + ( ch_idx + 1 ) * num_freq_bands] == 0 )
                             {
-                                b = BASOP_Util_Divide3232_Scale( reference_power[k + num_freq_bands], 232831 /* EPSILON in exp 63 */, &b_exp );
-                                b_exp = add( b_exp, sub( sub( 31, q_reference_power ), 63 ) );
+                                b = MAX_16;
+                                move16();
+                                b_exp = 0;
+                                move16();
                             }
                             ELSE
                             {
                                 b = BASOP_Util_Divide3232_Scale( reference_power[k + num_freq_bands], reference_power[k + ( ch_idx + 1 ) * num_freq_bands], &b_exp ); /*q(15-b_exp)*/
                             }
                         }
-                        c = Madd_32_16( ONE_IN_Q27 /*1 Q27*/, L_sub( h_dirac_output_synthesis_params->diffuse_compensation_factor_fx, ONE_IN_Q27 /*1 Q27*/ ), 5461 ); /*Diffuseness modellling nrg compensation*/ /* 1.0 / 6.0  = 5461 in Q15*/ /*Q27*/
 
                         mpy_a_a_b = Mpy_32_32( a, Mpy_32_16_1( a, b ) );                                           // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31
                         mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31
@@ -1195,7 +1199,7 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx(
     Word16 buf_idx, ch_idx, i, l;
     Word16 num_freq_bands, num_freq_bands_diff;
     Word16 num_channels_dir, num_channels_diff;
-    Word32 g, g1, g2;
+    Word32 g, g1[MAX_FREQUENCY_BANDS], g2;
     Word32 *p_gains_dir, *p_gains_diff;
     Word32 *p_gains_dir_prev, *p_gains_diff_prev;
     Word32 *p_cy_cross_dir_smooth;
@@ -1209,12 +1213,14 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx(
     DIRAC_OUTPUT_SYNTHESIS_STATE h_dirac_output_synthesis_state;
     Word16 nchan_transport_foa;
     Word16 ch_idx_diff;
+    Word32 cmp1, cmp2;
     Word32 aux_buf[CLDFB_NO_CHANNELS_MAX];
     Word32 ratio_float[DIRAC_HO_NUMSECTORS * CLDFB_NO_CHANNELS_MAX];
     Word16 q_com = 0;
     move16();
     Word16 exp = 0;
     move16();
+    Word16 q_shift;
 
     /* collect some often used parameters */
     h_dirac_output_synthesis_params = hDirACRend->h_output_synthesis_psd_params;
@@ -1234,6 +1240,7 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx(
     nchan_transport_foa = s_min( 4, nchan_transport );
     move16();
 
+    Word16 prod = imult1616( num_freq_bands, num_channels_dir );
     /*-----------------------------------------------------------------*
      * comput target Gains
      *-----------------------------------------------------------------*/
@@ -1401,6 +1408,7 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx(
         move16();
 
         /*Diffuse gain*/
+        q_com = s_min( h_dirac_output_synthesis_state.q_cy_auto_diff_smooth_prev, Q31 );
         FOR( ch_idx = nchan_transport_foa; ch_idx < num_channels_diff; ch_idx++ )
         {
             v_multc_fixed_16( h_dirac_output_synthesis_state.diffuse_power_factor_fx,                               // Q31
@@ -1409,7 +1417,6 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx(
                               num_freq_bands_diff );
 
             // Scale to bring in common Q-factor
-            q_com = s_min( h_dirac_output_synthesis_state.q_cy_auto_diff_smooth_prev, Q31 );
             Scale_sig32( &h_dirac_output_synthesis_state.cy_auto_diff_smooth_fx[ch_idx * num_freq_bands_diff],
                          num_freq_bands_diff,
                          sub( q_com, Q31 ) ); /*q31->q_com*/
@@ -1431,59 +1438,71 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx(
     p_cy_cross_dir_smooth = h_dirac_output_synthesis_state.cy_cross_dir_smooth_fx;
     p_gains_dir = h_dirac_output_synthesis_state.cy_cross_dir_smooth_prev_fx;
 
+    FOR( l = 0; l < num_freq_bands; l++ )
+    {
+        g1[l] = Madd_32_32( POINT_3679_Q31, onset_filter[l], POINT_1175_Q31 - POINT_3679_Q31 ); // Q31, (Q31, Q31) -> Q31
+        move32();
+    }
+
+    q_shift = sub( 26, h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev );
+    move16();
+
     /* Direct gains */
     IF( hodirac_flag )
     {
+        cmp1 = L_shr( 66437775 /* 0.99f in Q26 */, q_shift );
+        cmp2 = L_shr( ONE_IN_Q27 /* 2.0f in Q26 */, q_shift );
         FOR( ch_idx = 0; ch_idx < nchan_transport_foa; ch_idx++ )
         {
             FOR( l = 0; l < num_freq_bands; l++ )
             {
-                g1 = Madd_32_32( POINT_3679_Q31, onset_filter[l], POINT_1175_Q31 - POINT_3679_Q31 );                                                          // Q31, (Q31, Q31) -> Q31
-                g2 = Mpy_32_32( L_sub( ONE_IN_Q31, g1 ), *( p_gains_dir ) );                                                                                  // (Q31, p_gains_dir_q) -> p_gains_dir_q
-                g2 = L_add_sat( g2, Mpy_32_32( g1, ( *( p_cy_cross_dir_smooth++ ) ) ) );                                                                      // p_gains_diff_q, (Q31, p_gains_diff_q) -> p_gains_diff_q
-                g2 = L_max( g2, Mpy_32_32( 2126008812 /* 0.99f in Q31 */, L_shl( 1, h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev ) ) );          // p_gains_dir_q
-                g2 = L_min( g2, Mpy_32_32( ONE_IN_Q31 /* 2.0f in Q30 */, L_shl( 1, add( h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev, 1 ) ) ) ); // p_gains_dir_q
-                *( p_gains_dir++ ) = g2;                                                                                                                      // p_gains_dir_q
+                g2 = Mpy_32_32( L_sub( ONE_IN_Q31, g1[l] ), *( p_gains_dir ) );             // (Q31, p_gains_dir_q) -> p_gains_dir_q
+                g2 = L_add_sat( g2, Mpy_32_32( g1[l], ( *( p_cy_cross_dir_smooth++ ) ) ) ); // p_gains_diff_q, (Q31, p_gains_diff_q) -> p_gains_diff_q
+                g2 = L_max( g2, cmp1 );                                                     // p_gains_dir_q
+                g2 = L_min( g2, cmp2 );                                                     // p_gains_dir_q
+                *( p_gains_dir++ ) = g2;                                                    // p_gains_dir_q
                 move32();
             }
         }
     }
     ELSE
     {
+        cmp1 = L_shr( 57042534 /* 0.85f in Q26 */, q_shift );
+        cmp2 = L_shr( 77175193 /* 1.15f in Q26 */, q_shift );
         FOR( ch_idx = 0; ch_idx < nchan_transport_foa; ch_idx++ )
         {
             FOR( l = 0; l < num_freq_bands; l++ )
             {
-                g1 = Madd_32_32( POINT_3679_Q31, onset_filter[l], POINT_1175_Q31 - POINT_3679_Q31 );                                                           // Q31, (Q31, Q31) -> Q31
-                g2 = Mpy_32_32( L_sub( ONE_IN_Q31, g1 ), *( p_gains_dir ) );                                                                                   // (Q31, p_gains_dir_q) -> p_gains_dir_q
-                g2 = L_add_sat( g2, Mpy_32_32( g1, ( *( p_cy_cross_dir_smooth++ ) ) ) );                                                                       // p_gains_diff_q, (Q31, p_gains_diff_q) -> p_gains_diff_q
-                g2 = L_max( g2, Mpy_32_32( 1825361101 /* 0.85f in Q31 */, L_shl( 1, h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev ) ) );           // p_gains_dir_q
-                g2 = L_min( g2, Mpy_32_32( 1234803098 /* 1.15f in Q30 */, L_shl( 1, add( h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev, 1 ) ) ) ); // p_gains_dir_q
-                *( p_gains_dir++ ) = g2;                                                                                                                       // p_gains_dir_q
+                g2 = Mpy_32_32( L_sub( ONE_IN_Q31, g1[l] ), *( p_gains_dir ) );             // (Q31, p_gains_dir_q) -> p_gains_dir_q
+                g2 = L_add_sat( g2, Mpy_32_32( g1[l], ( *( p_cy_cross_dir_smooth++ ) ) ) ); // p_gains_diff_q, (Q31, p_gains_diff_q) -> p_gains_diff_q
+                g2 = L_max( g2, cmp1 );                                                     // p_gains_dir_q
+                g2 = L_min( g2, cmp2 );                                                     // p_gains_dir_q
+                *( p_gains_dir++ ) = g2;                                                    // p_gains_dir_q
                 move32();
             }
         }
     }
 
     /*Directional gains*/
+    cmp1 = L_shr( -DIRAC_GAIN_LIMIT_Q26, q_shift );
+    cmp2 = L_negate( cmp1 );
     FOR( ch_idx = nchan_transport_foa; ch_idx < num_channels_dir; ch_idx++ )
     {
         FOR( l = 0; l < num_freq_bands; l++ )
         {
-            g1 = Madd_32_32( POINT_3679_Q31, onset_filter[l], POINT_1175_Q31 - POINT_3679_Q31 );                                                                        // Q31, (Q31, Q31) -> Q31
-            g2 = Mpy_32_32( L_sub( ONE_IN_Q31, g1 ), *( p_gains_dir ) );                                                                                                // (Q31, p_gains_dir_q) -> p_gains_dir_q
-            g2 = L_add_sat( g2, Mpy_32_32( g1, ( *( p_cy_cross_dir_smooth++ ) ) ) );                                                                                    // p_gains_diff_q, (Q31, p_gains_diff_q) -> p_gains_diff_q
-            g2 = L_max( g2, W_extract_h( W_shl( W_mult_32_32( -DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev ) ), Q5 ) ) ); // p_gains_dir_q
-            g2 = L_min( g2, W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev ) ), Q5 ) ) );  // p_gains_dir_q
-            *( p_gains_dir++ ) = g2;                                                                                                                                    // p_gains_dir_q
+            g2 = Mpy_32_32( L_sub( ONE_IN_Q31, g1[l] ), *( p_gains_dir ) );             // (Q31, p_gains_dir_q) -> p_gains_dir_q
+            g2 = L_add_sat( g2, Mpy_32_32( g1[l], ( *( p_cy_cross_dir_smooth++ ) ) ) ); // p_gains_diff_q, (Q31, p_gains_diff_q) -> p_gains_diff_q
+            g2 = L_max( g2, cmp1 );                                                     // p_gains_dir_q
+            g2 = L_min( g2, cmp2 );                                                     // p_gains_dir_q
+            *( p_gains_dir++ ) = g2;                                                    // p_gains_dir_q
             move32();
         }
     }
 
     IF( hodirac_flag )
     {
-        p_cy_cross_dir_smooth = h_dirac_output_synthesis_state.cy_cross_dir_smooth_fx + imult1616( num_freq_bands, num_channels_dir );
-        p_gains_dir = h_dirac_output_synthesis_state.cy_cross_dir_smooth_prev_fx + imult1616( num_freq_bands, num_channels_dir );
+        p_cy_cross_dir_smooth = h_dirac_output_synthesis_state.cy_cross_dir_smooth_fx + prod;
+        p_gains_dir = h_dirac_output_synthesis_state.cy_cross_dir_smooth_prev_fx + prod;
 
         /*Direct gains*/
         FOR( ch_idx = 0; ch_idx < nchan_transport_foa; ch_idx++ )
@@ -1500,12 +1519,11 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx(
         {
             FOR( l = 0; l < num_freq_bands; l++ )
             {
-                g1 = Madd_32_32( POINT_3679_Q31, onset_filter[l], POINT_1175_Q31 - POINT_3679_Q31 );                                                                        // Q31, (Q31, Q31) -> Q31
-                g2 = Mpy_32_32( L_sub( ONE_IN_Q31, g1 ), *( p_gains_dir ) );                                                                                                // (Q31, p_gains_dir_q) -> p_gains_dir_q
-                g2 = L_add_sat( g2, Mpy_32_32( g1, ( *( p_cy_cross_dir_smooth++ ) ) ) );                                                                                    // (p_gains_diff_q, (Q31, p_gains_diff_q) -> p_gains_diff_q
-                g2 = L_max( g2, W_extract_h( W_shl( W_mult_32_32( -DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev ) ), Q5 ) ) ); // p_gains_dir_q
-                g2 = L_min( g2, W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev ) ), Q5 ) ) );  // p_gains_dir_q
-                *( p_gains_dir++ ) = g2;                                                                                                                                    // p_gains_dir_q
+                g2 = Mpy_32_32( L_sub( ONE_IN_Q31, g1[l] ), *( p_gains_dir ) );             // (Q31, p_gains_dir_q) -> p_gains_dir_q
+                g2 = L_add_sat( g2, Mpy_32_32( g1[l], ( *( p_cy_cross_dir_smooth++ ) ) ) ); // (p_gains_diff_q, (Q31, p_gains_diff_q) -> p_gains_diff_q
+                g2 = L_max( g2, cmp1 );                                                     // p_gains_dir_q
+                g2 = L_min( g2, cmp2 );                                                     // p_gains_dir_q
+                *( p_gains_dir++ ) = g2;                                                    // p_gains_dir_q
                 move32();
             }
         }
@@ -1514,17 +1532,18 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx(
     /*Diffuse gains*/
     p_cy_auto_diff_smooth = h_dirac_output_synthesis_state.cy_auto_diff_smooth_fx + imult1616( nchan_transport_foa, num_freq_bands_diff );
     p_gains_diff = h_dirac_output_synthesis_state.cy_auto_diff_smooth_prev_fx + imult1616( nchan_transport_foa, num_freq_bands_diff );
+    g1[0] = POINT_1175_Q31; // Q31
+    move32();
     FOR( ch_idx = nchan_transport_foa; ch_idx < num_channels_diff; ch_idx++ )
     {
         FOR( l = 0; l < num_freq_bands_diff; l++ )
         {
-            g1 = POINT_1175_Q31; // Q31
             move32();
-            g2 = Mpy_32_32( L_sub( ONE_IN_Q31, g1 ), *( p_gains_diff ) );                                                                                              // (Q31, p_gains_dir_q) -> p_gains_dir_q
-            g2 = L_add_sat( g2, Mpy_32_32( g1, ( *( p_cy_auto_diff_smooth++ ) ) ) );                                                                                   // p_gains_diff_q, (Q31, p_gains_diff_q) -> p_gains_diff_q
-            g2 = L_max( g2, 0 );                                                                                                                                       // p_gains_diff_q
-            g2 = L_min( g2, W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state.q_cy_auto_diff_smooth_prev ) ), Q5 ) ) ); // p_gains_diff_q
-            *( p_gains_diff++ ) = g2;                                                                                                                                  // p_gains_diff_q
+            g2 = Mpy_32_32( L_sub( ONE_IN_Q31, g1[0] ), *( p_gains_diff ) );            // (Q31, p_gains_dir_q) -> p_gains_dir_q
+            g2 = L_add_sat( g2, Mpy_32_32( g1[0], ( *( p_cy_auto_diff_smooth++ ) ) ) ); // p_gains_diff_q, (Q31, p_gains_diff_q) -> p_gains_diff_q
+            g2 = L_max( g2, 0 );                                                        // p_gains_diff_q
+            g2 = L_min( g2, cmp2 );                                                     // p_gains_diff_q
+            *( p_gains_diff++ ) = g2;                                                   // p_gains_diff_q
             move32();
         }
     }
@@ -1535,8 +1554,9 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx(
 
     FOR( buf_idx = 0; buf_idx < nbslots; ++buf_idx )
     {
-        g1 = L_deposit_h( h_dirac_output_synthesis_params.interpolator_fx[buf_idx] ); // Q31
-        g2 = L_sub( ONE_IN_Q31, g1 );                                                 // Q31
+        g1[0] = L_deposit_h( h_dirac_output_synthesis_params.interpolator_fx[buf_idx] ); // Q31
+        move32();
+        g2 = L_sub( ONE_IN_Q31, g1[0] ); // Q31
 
         /*Direct input->output*/
         p_gains_dir = h_dirac_output_synthesis_state.cy_cross_dir_smooth_prev_fx; // (p_gains_dir_q)
@@ -1557,7 +1577,7 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx(
                            shl( ch_idx * num_freq_bands, Q1 );
             FOR( l = 0; l < num_freq_bands; l++ )
             {
-                g = Madd_32_32( Mpy_32_32( g1, ( *( p_gains_dir++ ) ) ), g2, ( *( p_gains_dir_prev++ ) ) ); // (Q31, p_gains_dir_q) -> (p_gains_dir_q)
+                g = Madd_32_32( Mpy_32_32( g1[0], ( *( p_gains_dir++ ) ) ), g2, ( *( p_gains_dir_prev++ ) ) ); // (Q31, p_gains_dir_q) -> (p_gains_dir_q)
 
                 output_real[l * num_channels_dir + ch_idx] = Mpy_32_32( g, ( *( p_proto_diff++ ) ) ); // (p_gains_dir_q, p_proto_diff_q) -> (p_gains_dir_q + p_proto_diff_q - 31)
                 move32();
@@ -1583,25 +1603,31 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx(
                                shl( i_mult( proto_direct_index[1], num_freq_bands ), Q1 );
                     FOR( l = 0; l < num_freq_bands; l++ )
                     {
-                        gs1 = Madd_32_32( Mpy_32_32( g1, ( *( p_gains_dir ) ) ), g2, ( *( p_gains_dir_prev ) ) );                                                                                                 // (Q31, p_gains_dir_q) -> (p_gains_dir_q)
-                        gs2 = Madd_32_32( Mpy_32_32( g1, ( *( p_gains_dir + imult1616( num_freq_bands, num_channels_dir ) ) ) ), g2, ( *( p_gains_dir_prev + imult1616( num_freq_bands, num_channels_dir ) ) ) ); // (Q31, p_gains_dir_q) -> (p_gains_dir_q)
+
+                        Word32 temp1, temp2;
+                        gs1 = Madd_32_32( Mpy_32_32( g1[0], ( *( p_gains_dir ) ) ), g2, ( *( p_gains_dir_prev ) ) );               // (Q31, p_gains_dir_q) -> (p_gains_dir_q)
+                        gs2 = Madd_32_32( Mpy_32_32( g1[0], ( *( p_gains_dir + prod ) ) ), g2, ( *( p_gains_dir_prev + prod ) ) ); // (Q31, p_gains_dir_q) -> (p_gains_dir_q)
                         p_gains_dir++;
                         p_gains_dir_prev++;
 
+                        temp1 = Mpy_32_32( 1903158016 /* 1.772454e+00f / 2 in Q31 */, ( *p_proto ) );
+                        temp2 = Mpy_32_32( 1098788992 /* 1.023327e+00f / 2 in Q31 */, ( *p_proto2 ) );
                         // ((p_gains_dir_q, p_proto_dir_q) >> 1) -> (p_gains_dir_q + p_proto_dir_q - 31)
                         output_real[l * num_channels_dir + ch_idx] =
                             Madd_32_32(
-                                Mpy_32_32( gs1, ( L_add( Mpy_32_32( 1903158016 /* 1.772454e+00f / 2 in Q31 */, ( *p_proto ) ), Mpy_32_32( 1098788992 /* 1.023327e+00f / 2 in Q31 */, ( *p_proto2 ) ) ) ) ), /* s1 */
-                                gs2, L_sub( Mpy_32_32( 1903158016 /* 1.772454e+00f / 2 in Q31 */, ( *p_proto ) ), Mpy_32_32( 1098788992 /* 1.023327e+00f / 2 in Q31 */, ( *p_proto2 ) ) ) );                /* s2 */
+                                Mpy_32_32( gs1, ( L_add( temp1, temp2 ) ) ), /* s1 */
+                                gs2, L_sub( temp1, temp2 ) );                /* s2 */
                         move32();
                         p_proto++;
                         p_proto2++;
 
+                        temp1 = Mpy_32_32( 1903158016 /* 1.772454e+00f / 2 in Q31 */, ( *p_proto ) );
+                        temp2 = Mpy_32_32( 1098788992 /* 1.023327e+00f / 2 in Q31 */, ( *p_proto2 ) );
                         // ((p_gains_dir_q, p_proto_dir_q) >> 1) -> (p_gains_dir_q + p_proto_dir_q - 31)
                         output_imag[l * num_channels_dir + ch_idx] =
                             Madd_32_32(
-                                Mpy_32_32( gs1, ( L_add( Mpy_32_32( 1903158016 /* 1.772454e+00f / 2 in Q31 */, ( *p_proto ) ), Mpy_32_32( 1098788992 /* 1.023327e+00f / 2 in Q31 */, ( *p_proto2 ) ) ) ) ),
-                                gs2, L_sub( Mpy_32_32( 1903158016 /* 1.772454e+00f / 2 in Q31 */, ( *p_proto ) ), Mpy_32_32( 1098788992 /* 1.023327e+00f / 2 in Q31 */, ( *p_proto2 ) ) ) );
+                                Mpy_32_32( gs1, ( L_add( temp1, temp2 ) ) ),
+                                gs2, L_sub( temp1, temp2 ) );
                         move32();
                         p_proto++;
                         p_proto2++;
@@ -1612,14 +1638,15 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx(
                     p_proto = h_dirac_output_synthesis_state.proto_direct_buffer_f_fx +
                               shl( i_mult( buf_idx, i_mult( num_freq_bands, num_protos_dir ) ), Q1 ) +
                               shl( i_mult( proto_direct_index[ch_idx], num_freq_bands ), Q1 );
+                    Word16 diff = sub( h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev, 31 );
                     FOR( l = 0; l < num_freq_bands; l++ )
                     {
                         p_gains_dir++;
                         p_gains_dir_prev++;
 
-                        output_real[l * num_channels_dir + ch_idx] = L_shl( *( p_proto++ ), sub( h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev, 31 ) ); // p_proto_dir_q -> (p_gains_dir_q + p_proto_dir_q - 31)
+                        output_real[l * num_channels_dir + ch_idx] = L_shl( *( p_proto++ ), diff ); // p_proto_dir_q -> (p_gains_dir_q + p_proto_dir_q - 31)
                         move32();
-                        output_imag[l * num_channels_dir + ch_idx] = L_shl( *( p_proto++ ), sub( h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev, 31 ) ); // p_proto_dir_q -> (p_gains_dir_q + p_proto_dir_q - 31)
+                        output_imag[l * num_channels_dir + ch_idx] = L_shl( *( p_proto++ ), diff ); // p_proto_dir_q -> (p_gains_dir_q + p_proto_dir_q - 31)
                         move32();
                     }
                 }
@@ -1633,7 +1660,7 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx(
                 {
                     FOR( l = 0; l < num_freq_bands; l++ )
                     {
-                        g = Madd_32_32( Mpy_32_32( g1, ( *( p_gains_dir++ ) ) ), g2, ( *( p_gains_dir_prev++ ) ) ); // (Q31, p_gains_dir_q) -> (p_gains_dir_q)
+                        g = Madd_32_32( Mpy_32_32( g1[0], ( *( p_gains_dir++ ) ) ), g2, ( *( p_gains_dir_prev++ ) ) ); // (Q31, p_gains_dir_q) -> (p_gains_dir_q)
 
                         output_real[l * num_channels_dir + ch_idx] = Mpy_32_32( g, ( *( p_proto++ ) ) ); // (p_gains_dir_q, p_proto_dir_q) -> (p_gains_dir_q + p_proto_dir_q - 31)
                         move32();
@@ -1643,14 +1670,15 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx(
                 }
                 ELSE
                 {
+                    Word16 shift_q = sub( h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev, 31 );
                     FOR( l = 0; l < num_freq_bands; l++ )
                     {
                         p_gains_dir++;
                         p_gains_dir_prev++;
 
-                        output_real[l * num_channels_dir + ch_idx] = L_shl( *( p_proto++ ), sub( h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev, 31 ) ); // p_proto_dir_q -> (p_gains_dir_q + p_proto_dir_q - 31)
+                        output_real[l * num_channels_dir + ch_idx] = L_shl( *( p_proto++ ), shift_q ); // p_proto_dir_q -> (p_gains_dir_q + p_proto_dir_q - 31)
                         move32();
-                        output_imag[l * num_channels_dir + ch_idx] = L_shl( *( p_proto++ ), sub( h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev, 31 ) ); // p_proto_dir_q -> (p_gains_dir_q + p_proto_dir_q - 31)
+                        output_imag[l * num_channels_dir + ch_idx] = L_shl( *( p_proto++ ), shift_q ); // p_proto_dir_q -> (p_gains_dir_q + p_proto_dir_q - 31)
                         move32();
                     }
                 }
@@ -1681,7 +1709,7 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx(
                 ch_idx_diff = add( ch_idx_diff, 1 );
                 FOR( l = 0; l < num_freq_bands_diff; l++ )
                 {
-                    g = Madd_32_32( Mpy_32_32( g1, ( *( p_gains_diff++ ) ) ), g2, ( *( p_gains_diff_prev++ ) ) ); // (Q31, p_gains_diff_q) -> p_gains_diff_q
+                    g = Madd_32_32( Mpy_32_32( g1[0], ( *( p_gains_diff++ ) ) ), g2, ( *( p_gains_diff_prev++ ) ) ); // (Q31, p_gains_diff_q) -> p_gains_diff_q
 
                     // ((p_gains_diff_q, p_proto_diff_q) >> Q1) -> (p_gains_diff_q + p_proto_diff_q - 31)
                     output_real[l * num_channels_dir + hDirACRend->sba_map_tc[ch_idx]] =
@@ -1765,11 +1793,11 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx(
     /* store estimates for next synthesis block */
     IF( hodirac_flag )
     {
-        Copy32( h_dirac_output_synthesis_state.cy_cross_dir_smooth_prev_fx, h_dirac_output_synthesis_state.gains_dir_prev_fx, imult1616( num_freq_bands, num_channels_dir ) * DIRAC_HO_NUMSECTORS ); /*h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev*/
+        Copy32( h_dirac_output_synthesis_state.cy_cross_dir_smooth_prev_fx, h_dirac_output_synthesis_state.gains_dir_prev_fx, prod * DIRAC_HO_NUMSECTORS ); /*h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev*/
     }
     ELSE
     {
-        Copy32( h_dirac_output_synthesis_state.cy_cross_dir_smooth_prev_fx, h_dirac_output_synthesis_state.gains_dir_prev_fx, imult1616( num_freq_bands, num_channels_dir ) ); /*h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev*/
+        Copy32( h_dirac_output_synthesis_state.cy_cross_dir_smooth_prev_fx, h_dirac_output_synthesis_state.gains_dir_prev_fx, prod ); /*h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev*/
     }
     *q_cy_cross_dir_smooth_prev = h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev;
     move16();
@@ -1781,11 +1809,11 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx(
     /* reset values */
     IF( hodirac_flag )
     {
-        set_zero_fx( h_dirac_output_synthesis_state.cy_cross_dir_smooth_fx, imult1616( num_freq_bands, num_channels_dir ) * DIRAC_HO_NUMSECTORS );
+        set_zero_fx( h_dirac_output_synthesis_state.cy_cross_dir_smooth_fx, prod * DIRAC_HO_NUMSECTORS );
     }
     ELSE
     {
-        set_zero_fx( h_dirac_output_synthesis_state.cy_cross_dir_smooth_fx, imult1616( num_freq_bands, num_channels_dir ) );
+        set_zero_fx( h_dirac_output_synthesis_state.cy_cross_dir_smooth_fx, prod );
     }
 
     set_zero_fx( h_dirac_output_synthesis_state.cy_auto_diff_smooth_fx, imult1616( num_freq_bands_diff, num_channels_diff ) );
diff --git a/lib_rend/ivas_dirac_rend.c b/lib_rend/ivas_dirac_rend.c
index 1982862bf..ea507b4c7 100644
--- a/lib_rend/ivas_dirac_rend.c
+++ b/lib_rend/ivas_dirac_rend.c
@@ -1444,15 +1444,15 @@ void protoSignalComputation_shd_fx(
                 reference_power_fx[l] = L_shr( reference_power_fx[l + num_freq_bands], 1 ); /*2*Q(q_cldfb+min_q_shift)-31-1*/
                 move32();
 
+                re1 = L_shl( RealBuffer_fx[1][0][l], min_q_shift ); /*Q(q_cldfb+min_q_shift)*/
+                re2 = L_shl( RealBuffer_fx[2][0][l], min_q_shift ); /*Q(q_cldfb+min_q_shift)*/
+                re3 = L_shl( RealBuffer_fx[3][0][l], min_q_shift ); /*Q(q_cldfb+min_q_shift)*/
+                im1 = L_shl( ImagBuffer_fx[1][0][l], min_q_shift ); /*Q(q_cldfb+min_q_shift)*/
+                im2 = L_shl( ImagBuffer_fx[2][0][l], min_q_shift ); /*Q(q_cldfb+min_q_shift)*/
+                im3 = L_shl( ImagBuffer_fx[3][0][l], min_q_shift ); /*Q(q_cldfb+min_q_shift)*/
+
                 FOR( k = 1; k < 4; k++ )
                 {
-                    re1 = L_shl( RealBuffer_fx[1][0][l], min_q_shift ); /*Q(q_cldfb+min_q_shift)*/
-                    re2 = L_shl( RealBuffer_fx[2][0][l], min_q_shift ); /*Q(q_cldfb+min_q_shift)*/
-                    re3 = L_shl( RealBuffer_fx[3][0][l], min_q_shift ); /*Q(q_cldfb+min_q_shift)*/
-                    im1 = L_shl( ImagBuffer_fx[1][0][l], min_q_shift ); /*Q(q_cldfb+min_q_shift)*/
-                    im2 = L_shl( ImagBuffer_fx[2][0][l], min_q_shift ); /*Q(q_cldfb+min_q_shift)*/
-                    im3 = L_shl( ImagBuffer_fx[3][0][l], min_q_shift ); /*Q(q_cldfb+min_q_shift)*/
-
                     idx = i_mult( 3, Rmat_k[k] );
                     idx1 = add( l, i_mult( add( k, 1 ), num_freq_bands ) );
 
@@ -1477,13 +1477,16 @@ void protoSignalComputation_shd_fx(
                 }
 
                 *proto_direct_buffer_f_q = add( q_cldfb, min_q_shift );
+                move16();
                 *reference_power_q = sub( add( *proto_direct_buffer_f_q, *proto_direct_buffer_f_q ), 31 );
+                move16();
 
+                Word16 shift = sub( *proto_direct_buffer_f_q, q_cldfb );
                 FOR( k = 1; k < 4; k++ )
                 {
-                    RealBuffer_fx[k][0][l] = L_shr( p_proto_direct_buffer_fx[2 * ( k * num_freq_bands + l )], sub( *proto_direct_buffer_f_q, q_cldfb ) ); // proto_direct_buffer_f_q -> q_cldfb
+                    RealBuffer_fx[k][0][l] = L_shr( p_proto_direct_buffer_fx[2 * ( k * num_freq_bands + l )], shift ); // proto_direct_buffer_f_q -> q_cldfb
                     move32();
-                    ImagBuffer_fx[k][0][l] = L_shr( p_proto_direct_buffer_fx[2 * ( k * num_freq_bands + l ) + 1], sub( *proto_direct_buffer_f_q, q_cldfb ) ); // proto_direct_buffer_f_q -> q_cldfb
+                    ImagBuffer_fx[k][0][l] = L_shr( p_proto_direct_buffer_fx[2 * ( k * num_freq_bands + l ) + 1], shift ); // proto_direct_buffer_f_q -> q_cldfb
                     move32();
                 }
             }
diff --git a/lib_rend/ivas_efap.c b/lib_rend/ivas_efap.c
index 7d47f3174..0bf3b85a4 100644
--- a/lib_rend/ivas_efap.c
+++ b/lib_rend/ivas_efap.c
@@ -2479,7 +2479,7 @@ static Word16 in_tri_fx(
     Word16 tmp16, tmp_e;
     Word64 S[2];
     /* Threshold adjusted */
-    Word64 thresh_int = 4295; // 1e-6f in Q32
+    Word64 thresh_int = 35184640; // 1e-6f in Q45
     move64();
 
     /*
@@ -2490,7 +2490,7 @@ static Word16 in_tri_fx(
     */
 
     v_sub_fixed( B, A, tmpDot1, 2, 0 ); // tmpDot1 q22
-    v_sub_fixed( C, A, tmpDot2, 2, 0 ); // tmpDot 2q22
+    v_sub_fixed( C, A, tmpDot2, 2, 0 ); // tmpDot2 q22
 
     /* Verification of the non-colinearity : Q22 * Q22 = Q13 */
     invFactor = L_sub( Mpy_32_32( tmpDot1[0], tmpDot2[1] ), Mpy_32_32( tmpDot1[1], tmpDot2[0] ) ); /*q22+q22-q31->q13*/
@@ -2516,68 +2516,18 @@ static Word16 in_tri_fx(
     move32();
     matInv[1][1] = Mpy_32_32( tmpDot1[0], invFactor ); // q=22+invFactor_exp
     move32();
-
-    /* Computing S (Q13 + matInv_exp_final[i] + P_minus_A_exp_final + invFactor_exp - 1 ) =
-    matInv (Q22 + matInv_exp_final[i] + invFactor_exp) *(P-A) (Q22 + P_minus_A_exp_final) */
-    Word16 matInv_exp[2][2], P_minus_A_exp[2];
-    Word16 matInv_exp_final[2], P_minus_A_exp_final;
-    FOR( Word32 i = 0; i < 2; i++ )
-    {
-        FOR( Word32 j = 0; j < 2; j++ )
-        {
-            matInv_exp[i][j] = 31;
-            move16();
-            IF( matInv[i][j] != 0 )
-            {
-                matInv_exp[i][j] = norm_l( matInv[i][j] );
-                move16();
-            }
-        }
-        matInv_exp_final[i] = s_min( matInv_exp[i][0], matInv_exp[i][1] );
-        move16();
-        P_minus_A_exp[i] = 31;
-        move16();
-        IF( P_minus_A[i] != 0 )
-        {
-            P_minus_A_exp[i] = norm_l( P_minus_A[i] );
-            move16();
-        }
-    }
-    P_minus_A_exp_final = s_min( P_minus_A_exp[0], P_minus_A_exp[1] );
-
-    S[0] = L_add( L_shr( Mpy_32_32( L_shl( matInv[0][0], matInv_exp_final[0] ), L_shl( P_minus_A[0], P_minus_A_exp_final ) ), Q1 ),
-                  L_shr( Mpy_32_32( L_shl( matInv[0][1], matInv_exp_final[0] ), L_shl( P_minus_A[1], P_minus_A_exp_final ) ), Q1 ) ); //(22+invFactor_exp+matInv_exp_final[0]+22+P_minus_A_exp_final-1)-31=>12+invFactor_exp+matInv_exp_final[0]+P_minus_A_exp_final
+    S[0] = W_mac_32_32( W_mult_32_32( matInv[0][0], P_minus_A[0] ), matInv[0][1], P_minus_A[1] ); // Q22+invFactor_exp +Q22 + 1
     move64();
-    S[1] = L_add( L_shr( Mpy_32_32( L_shl( matInv[1][0], matInv_exp_final[1] ), L_shl( P_minus_A[0], P_minus_A_exp_final ) ), Q1 ),
-                  L_shr( Mpy_32_32( L_shl( matInv[1][1], matInv_exp_final[1] ), L_shl( P_minus_A[1], P_minus_A_exp_final ) ), Q1 ) ); //(22+invFactor_exp+matInv_exp_final[1]+22+P_minus_A_exp_final-1)-31=>12+invFactor_exp+matInv_exp_final[0]+P_minus_A_exp_final
+    S[0] = W_shr( S[0], invFactor_exp ); // q45
+    move64();
+    S[1] = W_mac_32_32( W_mult_32_32( matInv[1][0], P_minus_A[0] ), matInv[1][1], P_minus_A[1] ); // Q22+invFactor_exp +Q22 + 1
+    move64();
+    S[1] = W_shr( S[1], invFactor_exp ); // q45
     move64();
-
-    /* Checking if we are in the triangle; For the theory, check Christian Borss article, section 3.2 */
-    // Q32 S
-    IF( sub( sub( sub( Q20, matInv_exp_final[0] ), P_minus_A_exp_final ), invFactor_exp ) < 0 )
-    {
-        S[0] = W_shr( S[0], sub( add( add( matInv_exp_final[0], P_minus_A_exp_final ), invFactor_exp ), Q20 ) ); // q32
-        move64();
-    }
-    ELSE
-    {
-        S[0] = W_shl( S[0], sub( sub( sub( Q20, matInv_exp_final[0] ), P_minus_A_exp_final ), invFactor_exp ) ); // q32
-        move64();
-    }
-    IF( sub( sub( sub( Q20, matInv_exp_final[1] ), P_minus_A_exp_final ), invFactor_exp ) < 0 )
-    {
-        S[1] = W_shr( S[1], sub( add( add( matInv_exp_final[1], P_minus_A_exp_final ), invFactor_exp ), Q20 ) ); // q32
-        move64();
-    }
-    ELSE
-    {
-        S[1] = W_shl( S[1], sub( sub( sub( Q20, matInv_exp_final[1] ), P_minus_A_exp_final ), invFactor_exp ) ); // q32
-        move64();
-    }
 
     test();
     test();
-    IF( LT_64( S[0], -thresh_int ) || LT_64( S[1], -thresh_int ) || GT_64( W_add( S[0], S[1] ), W_add( W_shl( 1, 32 ), thresh_int ) ) )
+    IF( LT_64( S[0], -thresh_int ) || LT_64( S[1], -thresh_int ) || GT_64( W_add( S[0], S[1] ), W_add( ONE_IN_Q45, thresh_int ) ) )
     {
         return 0;
     }
-- 
GitLab