diff --git a/lib_com/options.h b/lib_com/options.h
index 73f5a409e4bf5d7d3315e951bd455e9442826e64..32c53988aa4942a91bd1aac6f5a72f183a24cbad 100644
--- a/lib_com/options.h
+++ b/lib_com/options.h
@@ -75,6 +75,9 @@
 #define FIX_1379_MASA_ANGLE_ROUND
 
 /* Note: each compile switch (FIX_1101_...) is independent from the other ones */
+#define OPT_SBA_REND_V1_BE
+#define OPT_HEAD_ROT_REND_V1_BE
+#define OPT_SBA_DEC_V2_BE
 #define OPT_SBA_ENC_V1_BE
 #define OPT_BIN_RENDERER_V1
 #define OPT_BIN_RENDERER_V2
diff --git a/lib_dec/dec_tcx_fx.c b/lib_dec/dec_tcx_fx.c
index b9084df8821060f9ee23f849fca3115bb82047dd..8e8a2b34368a34a551c8940955044ae3d9813674 100644
--- a/lib_dec/dec_tcx_fx.c
+++ b/lib_dec/dec_tcx_fx.c
@@ -2634,9 +2634,16 @@ void IMDCT_ivas_fx(
         Word32 fac;
         // fac = shl_sat( mult_r( extract_h( L_shr_sat( hTcxDec->conceal_eof_gain32, sub( 1, hTcxDec->conceal_eof_gain_e ) ) ), st->last_concealed_gain_syn_deemph ), 1 );
         fac = Mpy_32_16_1( hTcxDec->conceal_eof_gain32, st->last_concealed_gain_syn_deemph ); // q = 31 - hTcxDec->conceal_eof_gain_e - last_concealed_gain_syn_deemph_e
+#ifdef OPT_SBA_DEC_V2_BE
+        Word16 eff_e = add( hTcxDec->conceal_eof_gain_e, st->last_concealed_gain_syn_deemph_e );
+#endif /* OPT_SBA_DEC_V2_BE */
         FOR( Word16 ind = 0; ind < overlap; ind++ )
         {
+#ifdef OPT_SBA_DEC_V2_BE
+            old_syn_overl_fx[ind] = extract_h( L_shl_sat( Mpy_32_16_1( fac, old_syn_overl_fx[ind] ), eff_e ) ); // Q(-2)
+#else                                                                                                           /* OPT_SBA_DEC_V2_BE */
             old_syn_overl_fx[ind] = extract_h( L_shl_sat( Mpy_32_16_1( fac, old_syn_overl_fx[ind] ), add( hTcxDec->conceal_eof_gain_e, st->last_concealed_gain_syn_deemph_e ) ) ); // Q(-2)
+#endif                                                                                                          /* OPT_SBA_DEC_V2_BE */
             move16();
         }
     }
@@ -4713,9 +4720,16 @@ void decoder_tcx_noiseshaping_igf_fx(
         {
             /* If the exponent on the spec side (i>L_frame) is lesser, then shift all the values in the
             spec side by the difference to make both sides have the same exponent. */
+#ifdef OPT_SBA_DEC_V2_BE
+            Word16 diff_e = sub( frame_side_x_e, spec_side_x_e );
+#endif /* OPT_SBA_DEC_V2_BE */
             FOR( i = L_frame; i < L_spec; i++ )
             {
+#ifdef OPT_SBA_DEC_V2_BE
+                x_fx[i] = L_shr( x_fx[i], diff_e );
+#else  /* OPT_SBA_DEC_V2_BE */
                 x_fx[i] = L_shr( x_fx[i], sub( frame_side_x_e, spec_side_x_e ) );
+#endif /* OPT_SBA_DEC_V2_BE */
                 move32();
             }
         }
@@ -4723,9 +4737,16 @@ void decoder_tcx_noiseshaping_igf_fx(
         {
             /* If the exponent on the spec side (i>L_frame) is greater, then shift all the values in the
             frame side (i<L_frame) by the difference to make both sides have the same exponent. */
+#ifdef OPT_SBA_DEC_V2_BE
+            Word16 diff_e = sub( spec_side_x_e, frame_side_x_e );
+#endif /* OPT_SBA_DEC_V2_BE */
             FOR( i = 0; i < L_frame; i++ )
             {
+#ifdef OPT_SBA_DEC_V2_BE
+                x_fx[i] = L_shr( x_fx[i], diff_e );
+#else  /* OPT_SBA_DEC_V2_BE */
                 x_fx[i] = L_shr( x_fx[i], sub( spec_side_x_e, frame_side_x_e ) );
+#endif /* OPT_SBA_DEC_V2_BE */
                 move32();
             }
         }
diff --git a/lib_dec/ivas_binRenderer_internal_fx.c b/lib_dec/ivas_binRenderer_internal_fx.c
index 66f6c646c1fdca7b0a50e4da6cb51b65f2443b33..d2573c4d7af6cc5a69ba82e2e3a611c27642bb90 100644
--- a/lib_dec/ivas_binRenderer_internal_fx.c
+++ b/lib_dec/ivas_binRenderer_internal_fx.c
@@ -1851,7 +1851,6 @@ void ivas_binRenderer_fx(
 
     /* Compute Convolution */
     /* memory reset for the binaural output */
-
     FOR( pos_idx = 0; pos_idx < num_poses; pos_idx++ )
     {
         FOR( chIdx = 0; chIdx < BINAURAL_CHANNELS; chIdx++ )
@@ -1875,6 +1874,8 @@ void ivas_binRenderer_fx(
     }
 #endif /* OPT_BIN_RENDERER_V2 */
 
+    /* Note: on main, this nested for loop was removed under a "bit-exact optimization" under switch OPT_SBA_DEC_V2_BE */
+    /*       this was found to clash with split rendering, so kept here. WMOPS impact should be assessed */
     FOR( chIdx = 0; chIdx < hBinRenderer->hInputSetup->nchan_out_woLFE; chIdx++ )
     {
         FOR( k = 0; k < numTimeSlots; k++ )
@@ -2105,6 +2106,22 @@ void ivas_binRenderer_fx(
             }
         }
     }
+
+#ifdef OPT_SBA_DEC_V2_BE
+    Word16 len = sub( CLDFB_NO_CHANNELS_MAX, hBinRenderer->conv_band );
+
+    FOR( pos_idx = 0; pos_idx < num_poses; pos_idx++ )
+    {
+        FOR( k = 0; k < numTimeSlots; k++ )
+        {
+            set32_fx( &Cldfb_RealBuffer_Binaural_fx[pos_idx][0][k][hBinRenderer->conv_band], 0, len );
+            set32_fx( &Cldfb_RealBuffer_Binaural_fx[pos_idx][1][k][hBinRenderer->conv_band], 0, len );
+            set32_fx( &Cldfb_ImagBuffer_Binaural_fx[pos_idx][0][k][hBinRenderer->conv_band], 0, len );
+            set32_fx( &Cldfb_ImagBuffer_Binaural_fx[pos_idx][1][k][hBinRenderer->conv_band], 0, len );
+        }
+    }
+#endif /* OPT_SBA_DEC_V2_BE */
+
     pop_wmops();
     return;
 }
diff --git a/lib_dec/ivas_dirac_output_synthesis_cov_fx.c b/lib_dec/ivas_dirac_output_synthesis_cov_fx.c
index caddb5c5cc9fe94936879def3f490ad0ae42ed6b..598f9082945fd0eb2385355d5460e71e4611cd79 100644
--- a/lib_dec/ivas_dirac_output_synthesis_cov_fx.c
+++ b/lib_dec/ivas_dirac_output_synthesis_cov_fx.c
@@ -605,8 +605,11 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx(
 #ifdef MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE
                 {
                     Word16 shifter;
-
+#ifdef OPT_SBA_DEC_V2_BE
+                    shifter = sub( mixing_matrix_res_smooth_e, 31 );
+#else  /* OPT_SBA_DEC_V2_BE */
                     shifter = 31 - mixing_matrix_res_smooth_e;
+#endif /* OPT_SBA_DEC_V2_BE */
                     FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
                     {
                         int i;
@@ -625,8 +628,13 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx(
                             temp_imag = W_add( temp_imag, W_mult0_32_32( mixing_matrix_res_smooth_fx[idx], diff_f_imag_fx[i] ) );
                             idx += nY;
                         }
+#ifdef OPT_SBA_DEC_V2_BE
+                        Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = W_shl_sat_l( temp_real, shifter );
+                        Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = W_shl_sat_l( temp_imag, shifter );
+#else  /* OPT_SBA_DEC_V2_BE */
                         Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = W_extract_l( W_shr( temp_real, shifter ) );
                         Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = W_extract_l( W_shr( temp_imag, shifter ) );
+#endif /* OPT_SBA_DEC_V2_BE */
                     }
                 }
 
diff --git a/lib_dec/ivas_spar_decoder_fx.c b/lib_dec/ivas_spar_decoder_fx.c
index 7f12c592ed95f8864c686557a7c4b35679fb8ac0..da7e1c0994c859259348f36badfa86e553dc157a 100644
--- a/lib_dec/ivas_spar_decoder_fx.c
+++ b/lib_dec/ivas_spar_decoder_fx.c
@@ -1163,6 +1163,69 @@ void ivas_spar_get_parameters_fx(
     move16();
     Word16 add_weight_fx = sub( MAX_WORD16, weight_fx );
     Word16 add_weight_20ms_fx = sub( MAX_WORD16, weight_20ms_fx );
+#ifdef OPT_SBA_DEC_V2_BE
+    Word16 out_flag[IVAS_MAX_FB_MIXER_OUT_CH];
+
+    Word32 band_bool = LT_16( split_band, IVAS_MAX_NUM_BANDS );
+
+    FOR( out_ch = 0; out_ch < num_ch_out; out_ch++ )
+    {
+        /* 20ms cross-fade for Transport channels in all frequency bands */
+        /* sub-frame processing for missing channels in all frequency bands*/
+        out_flag[out_ch] = band_bool && ( 0 == ivas_is_res_channel( out_ch, hSpar->hMdDec->spar_md_cfg.nchan_transport ) );
+        move16();
+    }
+    Word32 frame_bool = GT_16( hSpar->i_subframe, 3 );
+
+    FOR( out_ch = 0; out_ch < num_ch_out; out_ch++ )
+    {
+        IF( out_flag[out_ch] )
+        {
+            IF( frame_bool )
+            {
+                FOR( spar_band = 0; spar_band < num_spar_bands; spar_band++ )
+                {
+                    FOR( in_ch = 0; in_ch < num_ch_in; in_ch++ )
+                    {
+                        par_mat_fx[out_ch][in_ch][spar_band] = Madd_32_16( Mpy_32_16_1( hSpar->hMdDec->mixer_mat_prev_fx[ts1][out_ch][in_ch][spar_band], weight_fx ),
+                                                                           hSpar->hMdDec->mixer_mat_prev_fx[ts0][out_ch][in_ch][spar_band], add_weight_fx );
+                        move32();
+                    }
+                }
+            }
+            ELSE
+            {
+
+
+                FOR( spar_band = 0; spar_band < num_spar_bands; spar_band++ )
+                {
+                    FOR( in_ch = 0; in_ch < num_ch_in; in_ch++ )
+                    {
+                        {
+                            par_mat_fx[out_ch][in_ch][spar_band] = hSpar->hMdDec->mixer_mat_fx[out_ch][in_ch][spar_band]; /*hSpar->hMdDec->Q_mixer_mat*/
+                            move32();
+                        }
+                    }
+                }
+            }
+        }
+        ELSE
+        {
+            FOR( spar_band = 0; spar_band < num_spar_bands; spar_band++ )
+            {
+                FOR( in_ch = 0; in_ch < num_ch_in; in_ch++ )
+                {
+                    /* 20ms Transport channel reconstruction with matching encoder/decoder processing */
+                    Word16 prev_idx = SPAR_DIRAC_SPLIT_START_BAND < IVAS_MAX_NUM_BANDS ? 1 : 0; /* if SPAR_DIRAC_SPLIT_START_BAND == IVAS_MAX_NUM_BANDS, then the sub-frame mixer_mat delay line is not active */
+                    move16();
+                    par_mat_fx[out_ch][in_ch][spar_band] = Madd_32_16( Mpy_32_16_1( hSpar->hMdDec->mixer_mat_prev_fx[prev_idx][out_ch][in_ch][spar_band], add_weight_20ms_fx ),
+                                                                       hSpar->hMdDec->mixer_mat_fx[out_ch][in_ch][spar_band], weight_20ms_fx ); /*hSpar->hMdDec->Q_mixer_mat*/
+                    move32();
+                }
+            }
+        }
+    }
+#else  /* OPT_SBA_DEC_V2_BE */
     FOR( spar_band = 0; spar_band < num_spar_bands; spar_band++ )
     {
         FOR( out_ch = 0; out_ch < num_ch_out; out_ch++ )
@@ -1202,7 +1265,7 @@ void ivas_spar_get_parameters_fx(
             }
         }
     }
-
+#endif /* OPT_SBA_DEC_V2_BE */
     return;
 }
 
diff --git a/lib_dec/ivas_spar_md_dec_fx.c b/lib_dec/ivas_spar_md_dec_fx.c
index 7f487a75c272279cbc0ab168f8116743069db676..36b77732ae7e3eb7e66d842147d5a9830d7bae39 100644
--- a/lib_dec/ivas_spar_md_dec_fx.c
+++ b/lib_dec/ivas_spar_md_dec_fx.c
@@ -1318,6 +1318,14 @@ static void ivas_get_spar_matrices_fx(
                     tmp_C2_re_fx[0][j] = Mpy_32_32( active_w_dm_fac_fx, L_negate( hMdDec->spar_md.band_coeffs[( b + ( i_ts * IVAS_MAX_NUM_BANDS ) )].pred_re_fx[j - 1] ) ); // Q31 *Q22=Q22
                     move32();
                 }
+#ifdef OPT_SBA_DEC_V2_BE
+                re_fx1 = Madd_32_32( ONE_IN_Q13, tmp_C2_re_fx[0][1], tmp_C1_re_fx[1][0] ); // Q13+Q13
+
+                re_fx1 = Madd_32_32( re_fx1, tmp_C2_re_fx[0][2], tmp_C1_re_fx[2][0] ); // Q13+Q13
+
+                tmp_dm_re_fx[0][0] = L_shl( Madd_32_32( re_fx1, tmp_C2_re_fx[0][3], tmp_C1_re_fx[3][0] ), Q9 ); // (Q13+Q13) << Q9 = Q22;
+                                                                                                                //
+#else                                                                                                           /* OPT_SBA_DEC_V2_BE */
                 re_fx = Mpy_32_32( tmp_C2_re_fx[0][1], tmp_C1_re_fx[1][0] ); // Q22 *Q22 =Q13
                 re_fx1 = L_add( ONE_IN_Q13, re_fx );                         // Q13+Q13
 
@@ -1326,6 +1334,7 @@ static void ivas_get_spar_matrices_fx(
 
                 re_fx = Mpy_32_32( tmp_C2_re_fx[0][3], tmp_C1_re_fx[3][0] ); // Q22 *Q22 =Q13
                 tmp_dm_re_fx[0][0] = L_shl( L_add( re_fx1, re_fx ), Q9 );    // (Q13+Q13) << Q9 = Q22;
+#endif                                                                                                          /* OPT_SBA_DEC_V2_BE */
                 move32();
 
                 IF( EQ_16( dyn_active_w_flag, 1 ) )
@@ -1401,7 +1410,11 @@ static void ivas_get_spar_matrices_fx(
                 {
                     FOR( k = dmx_ch; k < numch_out; k++ )
                     {
+#ifndef OPT_SBA_DEC_V2_BE
                         IF( EQ_16( sub( j, dmx_ch ), sub( k, dmx_ch ) ) )
+#else  /* OPT_SBA_DEC_V2_BE */
+                        IF( EQ_16( j, k ) )
+#endif /* OPT_SBA_DEC_V2_BE */
                         {
                             tmpP_re_fx[j][k] = hMdDec->spar_md.band_coeffs[add( b, i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )].P_re_fx[sub( k, dmx_ch )]; // Q22
                             move32();
diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 0a0d15786fc5e597051697da07150a351543e03f..7a27cf339dc2e03552bd858ac6696c3dce732415 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -3758,6 +3758,169 @@ static void eig2x2_fx(
     move16();
     move16();
 
+#ifdef OPT_SBA_REND_V1_BE
+    /* Eigenvectors */
+    FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ )
+    {
+        Word16 q_diff = sub( q_e, *q_D );
+        IF( q_diff > 0 )
+        {
+            tmp1 = L_sub( D_fx[ch], L_shr( e1, q_diff ) );
+            tmp2 = L_sub( D_fx[ch], L_shr( e2, q_diff ) );
+            q_tmp1 = *q_D;
+            move16();
+        }
+        ELSE
+        {
+            tmp1 = L_sub( L_shl( D_fx[ch], q_diff ), e1 );
+            tmp2 = L_sub( L_shl( D_fx[ch], q_diff ), e2 );
+            q_tmp1 = q_e;
+            move16();
+        }
+
+        IF( GT_32( L_abs( tmp2 ), L_abs( tmp1 ) ) )
+        {
+            s_fx = tmp2;
+            move32();
+            exp = sub( norm_l( s_fx ), 1 );
+            tmp2 = Mpy_32_32( s_fx, s_fx );
+            q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 );
+
+            tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 );
+            q_tmp2 = sub( 31, q_tmp2 );
+
+            tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 );
+
+            tmp2 = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, tmp3, &exp );
+            exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) );
+            normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2
+            q_tmp2 = sub( 31, exp );
+
+            q_diff = sub( q_c, q_tmp1 );
+            IF( q_diff > 0 )
+            {
+                c_re = L_shr( c_re, q_diff );
+                c_im = L_shr( c_im, q_diff );
+                q_c = q_tmp1;
+                move16();
+            }
+            ELSE
+            {
+                s_fx = L_shl( s_fx, q_diff );
+                q_tmp1 = q_c;
+                move16();
+            }
+
+            Ure_fx[0][ch] = Mpy_32_32( s_fx, normVal_fx );
+            move32();
+            Ure_fx[1][ch] = Mpy_32_32( c_re, normVal_fx );
+            move32();
+            Uim_fx[1][ch] = Mpy_32_32( c_im, normVal_fx );
+            move32();
+            q_U_1 = sub( add( q_tmp1, q_tmp2 ), 31 );
+
+            IF( q_U_2 != 0 )
+            {
+                q_diff = sub( q_U_2, q_U_1 );
+                IF( q_diff > 0 )
+                {
+                    Ure_fx[1][ch - 1] = L_shr( Ure_fx[1][ch - 1], q_diff );
+                    Ure_fx[0][ch - 1] = L_shr( Ure_fx[0][ch - 1], q_diff );
+                    Uim_fx[0][ch - 1] = L_shr( Uim_fx[0][ch - 1], q_diff );
+                    q_U_2 = q_U_1;
+                    move32();
+                    move32();
+                    move32();
+                    move16();
+                }
+                ELSE IF( GT_16( q_U_1, q_U_2 ) )
+                {
+                    Ure_fx[1][ch] = L_shl( Ure_fx[1][ch], q_diff );
+                    Ure_fx[0][ch] = L_shl( Ure_fx[0][ch], q_diff );
+                    Uim_fx[1][ch] = L_shl( Uim_fx[1][ch], q_diff );
+                    q_U_1 = q_U_2;
+                    move32();
+                    move32();
+                    move32();
+                    move16();
+                }
+            }
+            q_U_2 = q_U_1;
+            move16();
+        }
+        ELSE
+        {
+            s_fx = tmp1;
+            move32();
+
+            exp = sub( norm_l( s_fx ), 1 );
+            tmp2 = Mpy_32_32( s_fx, s_fx );
+            q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 );
+
+            tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 );
+            q_tmp2 = sub( 31, q_tmp2 );
+
+            tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 );
+
+            tmp2 = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, tmp3, &exp );
+            exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) );
+            normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2
+            q_tmp2 = sub( 31, exp );
+
+            q_diff = sub( q_c, q_tmp1 );
+            IF( q_diff > 0 )
+            {
+                c_re = L_shr( c_re, q_diff );
+                c_im = L_shr( c_im, q_diff );
+                q_c = q_tmp1;
+                move16();
+            }
+            ELSE
+            {
+                s_fx = L_shl( s_fx, q_diff );
+                q_tmp1 = q_c;
+                move16();
+            }
+
+            Ure_fx[1][ch] = Mpy_32_32( s_fx, normVal_fx );
+            move32();
+            Ure_fx[0][ch] = Mpy_32_32( c_re, normVal_fx );
+            move32();
+            Uim_fx[0][ch] = Mpy_32_32( L_negate( c_im ), normVal_fx );
+            move32();
+            q_U_2 = sub( add( q_tmp1, q_tmp2 ), 31 );
+
+            IF( q_U_1 != 0 )
+            {
+                q_diff = sub( q_U_2, q_U_1 );
+                IF( q_diff > 0 )
+                {
+                    Ure_fx[1][ch] = L_shr( Ure_fx[1][ch], q_diff );
+                    Ure_fx[0][ch] = L_shr( Ure_fx[0][ch], q_diff );
+                    Uim_fx[0][ch] = L_shr( Uim_fx[0][ch], q_diff );
+                    q_U_2 = q_U_1;
+                    move32();
+                    move32();
+                    move32();
+                    move16();
+                }
+                ELSE IF( GT_16( q_U_1, q_U_2 ) )
+                {
+                    Ure_fx[1][ch - 1] = L_shl( Ure_fx[1][ch - 1], q_diff );
+                    Ure_fx[0][ch - 1] = L_shl( Ure_fx[0][ch - 1], q_diff );
+                    Uim_fx[1][ch - 1] = L_shl( Uim_fx[1][ch - 1], q_diff );
+                    q_U_1 = q_U_2;
+                    move32();
+                    move32();
+                    move32();
+                    move16();
+                }
+            }
+            q_U_1 = q_U_2;
+            move16();
+        }
+    }
+#else  /* OPT_SBA_REND_V1_BE */
     /* Eigenvectors */
     FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ )
     {
@@ -3914,6 +4077,8 @@ static void eig2x2_fx(
             move16();
         }
     }
+#endif /* OPT_SBA_REND_V1_BE */
+
     if ( q_U_1 != 0 )
     {
         *q_U = q_U_1;
diff --git a/lib_rend/ivas_dirac_output_synthesis_dec_fx.c b/lib_rend/ivas_dirac_output_synthesis_dec_fx.c
index 78e32bc3c69b54026af5bbe9640a6dd03f67057b..87fa8b7d03f1a86713b618b3f862e76e6388880f 100644
--- a/lib_rend/ivas_dirac_output_synthesis_dec_fx.c
+++ b/lib_rend/ivas_dirac_output_synthesis_dec_fx.c
@@ -2459,6 +2459,11 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx(
     move16();
     move16();
 
+#ifdef OPT_SBA_REND_V1_BE
+    Word32 cmp = W_shl_sat_l( DIRAC_GAIN_LIMIT_Q26, sub( h_dirac_output_synthesis_state->gains_dir_prev_q, 26 ) );
+    Word32 cmp2 = W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state->gains_diff_prev_q ) ), Q5 ) );
+#endif /* OPT_SBA_REND_V1_BE */
+
     FOR( k = 0; k < nchan_out_woLFE; k++ )
     {
         Word32 power_smooth_temp;
@@ -2506,11 +2511,19 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx(
                 *( p_gains_dir ) = 0;
                 move32();
             }
+#ifdef OPT_SBA_REND_V1_BE
+            ELSE IF( GT_32( *( p_gains_dir ), cmp ) )
+            {
+                *( p_gains_dir ) = cmp; /*26 + h_dirac_output_synthesis_state->gains_dir_prev_q + 1 + 5 - 32 -> h_dirac_output_synthesis_state->gains_dir_prev_q*/
+                move32();
+            }
+#else  /* OPT_SBA_REND_V1_BE */
             ELSE IF( GT_32( *( p_gains_dir ), W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state->gains_dir_prev_q ) ), Q5 ) ) ) )
             {
                 *( p_gains_dir ) = W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state->gains_dir_prev_q ) ), Q5 ) ); /*26 + h_dirac_output_synthesis_state->gains_dir_prev_q + 1 + 5 - 32 -> h_dirac_output_synthesis_state->gains_dir_prev_q*/
                 move32();
             }
+#endif /* OPT_SBA_REND_V1_BE */
 
             IF( *( p_cy_cross_dir_smooth_prev++ ) < 0 )
             {
@@ -2543,11 +2556,19 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx(
                 *( p_gains_diff ) = 0;
                 move32();
             }
+#ifdef OPT_SBA_REND_V1_BE
+            ELSE IF( GT_32( *( p_gains_diff ), cmp2 ) ) /*h_dirac_output_synthesis_state->gains_diff_prev_q*/
+            {
+                *( p_gains_diff ) = cmp2; /*h_dirac_output_synthesis_state->gains_diff_prev_q*/
+                move32();
+            }
+#else  /* OPT_SBA_REND_V1_BE */
             ELSE IF( GT_32( *( p_gains_diff ), W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state->gains_diff_prev_q ) ), Q5 ) ) ) ) /*h_dirac_output_synthesis_state->gains_diff_prev_q*/
             {
                 *( p_gains_diff ) = W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state->gains_diff_prev_q ) ), Q5 ) ); /*h_dirac_output_synthesis_state->gains_diff_prev_q*/
                 move32();
             }
+#endif /* OPT_SBA_REND_V1_BE */
             p_gains_diff++;
         }
 
@@ -2558,15 +2579,25 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx(
             g1 = alpha[l]; // Q31
             move32();
             g2 = L_sub( ONE_IN_Q31, g1 ); // Q31
+#ifdef OPT_SBA_REND_V1_BE
+            W_temp = W_mac_32_32( W_mult_32_32( g1, ( *( p_cy_auto_dir_smooth++ ) ) ),
+                                  g2, ( *( p_cy_auto_dir_smooth_prev ) ) ); /*32+q_cy_auto_dir_smooth_prev_local*/
+#else                                                                       /* OPT_SBA_REND_V1_BE */
             W_temp = W_add( W_mult_32_32( g1, ( *( p_cy_auto_dir_smooth++ ) ) ),
                             W_mult_32_32( g2, ( *( p_cy_auto_dir_smooth_prev ) ) ) ); /*32+q_cy_auto_dir_smooth_prev_local*/
+#endif                                                                      /* OPT_SBA_REND_V1_BE */
             q_tmp = W_norm( W_temp );
             L_tmp = W_extract_h( W_shl( W_temp, q_tmp ) );              // q_cy_auto_dir_smooth_prev_local + q_tmp
             *( p_cy_auto_dir_smooth_prev++ ) = L_shr_r( L_tmp, q_tmp ); // q_cy_auto_dir_smooth_prev_local
 
             move32();
+#ifdef OPT_SBA_REND_V1_BE
+            *( p_cy_cross_dir_smooth_prev ) = Madd_32_32( Mpy_32_32( g1, ( *( p_cy_cross_dir_smooth ) ) ),
+                                                          g2, ( *( p_cy_cross_dir_smooth_prev ) ) ); // (Q31, q_cy_cross_dir_smooth_prev) -> q_cy_cross_dir_smooth_prev
+#else                                                                                                /* OPT_SBA_REND_V1_BE */
             *( p_cy_cross_dir_smooth_prev ) = L_add( Mpy_32_32( g1, ( *( p_cy_cross_dir_smooth ) ) ),
                                                      Mpy_32_32( g2, ( *( p_cy_cross_dir_smooth_prev ) ) ) ); // (Q31, q_cy_cross_dir_smooth_prev) -> q_cy_cross_dir_smooth_prev
+#endif                                                                                               /* OPT_SBA_REND_V1_BE */
             move32();
             test();
             if ( *( p_cy_cross_dir_smooth_prev ) == 0 && ( *( p_cy_cross_dir_smooth ) != 0 ) )
@@ -2598,11 +2629,19 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx(
                 *( p_gains_dir ) = 0;
                 move32();
             }
+#ifdef OPT_SBA_REND_V1_BE
+            ELSE IF( GT_32( *( p_gains_dir ), cmp ) ) /*gains_dir_prev_q*/
+            {
+                *( p_gains_dir ) = cmp; /*gains_dir_prev_q*/
+                move32();
+            }
+#else  /* OPT_SBA_REND_V1_BE */
             ELSE IF( GT_32( *( p_gains_dir ), W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state->gains_dir_prev_q ) ), Q5 ) ) ) ) /*gains_dir_prev_q*/
             {
                 *( p_gains_dir ) = W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state->gains_dir_prev_q ) ), Q5 ) ); /*gains_dir_prev_q*/
                 move32();
             }
+#endif /* OPT_SBA_REND_V1_BE */
 
             IF( *( p_cy_cross_dir_smooth_prev++ ) < 0 )
             {
@@ -2689,7 +2728,11 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx(
                              shl( i_mult( proto_direct_index[k], num_freq_bands ), Q1 );
             FOR( l = 0; l < num_freq_bands; l++ )
             {
-                g = L_add( Mpy_32_32( g1, *( p_gain_1++ ) ), Mpy_32_32( g2, *( p_gain_2++ ) ) ); // (Q31, gains_dir_prev_q) -> gains_dir_prev_q
+#ifdef OPT_SBA_REND_V1_BE
+                g = Madd_32_32( Mpy_32_32( g1, *( p_gain_1++ ) ), g2, *( p_gain_2++ ) ); // (Q31, gains_dir_prev_q) -> gains_dir_prev_q
+#else                                                                                    /* OPT_SBA_REND_V1_BE */
+                g = L_add( Mpy_32_32( g1, *( p_gain_1++ ) ), Mpy_32_32( g2, *( p_gain_2++ ) ) );                   // (Q31, gains_dir_prev_q) -> gains_dir_prev_q
+#endif                                                                                   /* OPT_SBA_REND_V1_BE */
 
                 Cldfb_RealBuffer64_fx[k][buf_idx][l] = W_mult0_32_32( g, ( *( p_power_smooth++ ) ) ); // (gains_dir_prev_q, q_proto_direct_buffer) -> gains_dir_prev_q + q_proto_direct_buffer
                 move64();
@@ -2711,7 +2754,12 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx(
         {
             FOR( l = 0; l < h_dirac_output_synthesis_params->max_band_decorr; l++ )
             {
-                g = L_add( Mpy_32_32( g1, *( p_gain_1++ ) ), Mpy_32_32( g2, *( p_gain_2++ ) ) ); // (Q31, gains_diff_prev_q) -> gains_diff_prev_q
+#ifdef OPT_SBA_REND_V1_BE
+                g = Madd_32_32( Mpy_32_32( g1, *( p_gain_1++ ) ), g2, *( p_gain_2++ ) ); // (Q31, gains_diff_prev_q) -> gains_diff_prev_q
+#else                                                                                    /* OPT_SBA_REND_V1_BE */
+                g = L_add( Mpy_32_32( g1, *( p_gain_1++ ) ), Mpy_32_32( g2, *( p_gain_2++ ) ) );                   // (Q31, gains_diff_prev_q) -> gains_diff_prev_q
+
+#endif /* OPT_SBA_REND_V1_BE */
                 Cldfb_RealBuffer64_fx[k][buf_idx][l] = W_add( Cldfb_RealBuffer64_fx[k][buf_idx][l],
                                                               W_shr( W_mult0_32_32( g, ( *( p_power_smooth_diff++ ) ) ), negate( q_align ) ) ); // (gains_diff_prev_q, q_proto_direct_buffer) -> gains_diff_prev_q + q_proto_direct_buffer
                 move64();
@@ -2760,16 +2808,27 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx(
         }
     }
     q_align = W_norm( W_temp );
+#ifdef OPT_SBA_REND_V1_BE
+    Word16 shift = sub( q_align, 32 );
+#endif /* OPT_SBA_REND_V1_BE */
+
     FOR( buf_idx = 0; buf_idx < nbslots; ++buf_idx )
     {
         FOR( k = 0; k < nchan_out_woLFE; k++ )
         {
             FOR( l = 0; l < num_freq_bands; l++ )
             {
+#ifdef OPT_SBA_REND_V1_BE
+                RealBuffer[k][buf_idx][l] = W_shl_sat_l( Cldfb_RealBuffer64_fx[k][buf_idx][l], shift ); /*( ( ( h_dirac_output_synthesis_state->proto_direct_buffer_f_q+h_dirac_output_synthesis_state->gains_dir_prev_q )+ q_align )- 32 )*/
+                move32();
+                ImagBuffer[k][buf_idx][l] = W_shl_sat_l( Cldfb_ImagBuffer64_fx[k][buf_idx][l], shift ); /*( ( ( h_dirac_output_synthesis_state->proto_direct_buffer_f_q+h_dirac_output_synthesis_state->gains_dir_prev_q )+ q_align )- 32 )*/
+                move32();
+#else  /* OPT_SBA_REND_V1_BE */
                 RealBuffer[k][buf_idx][l] = W_extract_h( W_shl( Cldfb_RealBuffer64_fx[k][buf_idx][l], q_align ) ); /*( ( ( h_dirac_output_synthesis_state->proto_direct_buffer_f_q+h_dirac_output_synthesis_state->gains_dir_prev_q )+ q_align )- 32 )*/
                 move32();
                 ImagBuffer[k][buf_idx][l] = W_extract_h( W_shl( Cldfb_ImagBuffer64_fx[k][buf_idx][l], q_align ) ); /*( ( ( h_dirac_output_synthesis_state->proto_direct_buffer_f_q+h_dirac_output_synthesis_state->gains_dir_prev_q )+ q_align )- 32 )*/
                 move32();
+#endif /* OPT_SBA_REND_V1_BE */
             }
         }
     }
diff --git a/lib_rend/ivas_dirac_rend_fx.c b/lib_rend/ivas_dirac_rend_fx.c
index ad804a9309a53a973736763d70735cde316160d4..fb92eab13840d74c3d69ba1d783bb39a5f8c582a 100644
--- a/lib_rend/ivas_dirac_rend_fx.c
+++ b/lib_rend/ivas_dirac_rend_fx.c
@@ -3155,7 +3155,11 @@ void protoSignalComputation4_fx(
 
             sq_tmp_fx = Madd_32_32( Mpy_32_32( proto_frame_f_fx[idx], proto_frame_f_fx[idx] ), proto_frame_f_fx[idx + 1], proto_frame_f_fx[idx + 1] ); // 2*(proto_frame_f_q)-31
             sq_tmp_q = sub( add( *proto_frame_f_q, *proto_frame_f_q ), 31 );
+#ifdef OPT_SBA_REND_V1_BE
+            proto_power_smooth_fx_q = s_min( *proto_power_smooth_q, sq_tmp_q );
 
+            proto_power_smooth_fx[l + ( k * num_freq_bands )] = L_add( L_shr( proto_power_smooth_fx[l + ( k * num_freq_bands )], sub( *proto_power_smooth_q, proto_power_smooth_fx_q ) ), L_shr( sq_tmp_fx, sub( sq_tmp_q, proto_power_smooth_fx_q ) ) ); // proto_power_smooth_fx_q
+#else                                                                                                                                                                                                                                                     /* OPT_SBA_REND_V1_BE */
             IF( LT_16( *proto_power_smooth_q, sq_tmp_q ) )
             {
                 proto_power_smooth_fx[l + ( k * num_freq_bands )] = L_add( proto_power_smooth_fx[l + ( k * num_freq_bands )], L_shr( sq_tmp_fx, sub( sq_tmp_q, *proto_power_smooth_q ) ) ); // proto_power_smooth_q
@@ -3170,8 +3174,8 @@ void protoSignalComputation4_fx(
                 proto_power_smooth_fx_q = sq_tmp_q;
                 move16();
             }
-
-            p_proto_buffer_fx[idx] = proto_frame_f_fx[idx]; // proto_frame_f_q
+#endif                                                                                                                                                                                                                                                    /* OPT_SBA_REND_V1_BE */
+            p_proto_buffer_fx[idx] = proto_frame_f_fx[idx];                                                                                                                                                                                               // proto_frame_f_q
             move32();
             p_proto_buffer_fx[idx + 1] = proto_frame_f_fx[idx + 1]; // proto_frame_f_q
             move32();
diff --git a/lib_rend/ivas_efap_fx.c b/lib_rend/ivas_efap_fx.c
index fbcfdfe77cbc4cfe42131768b1c48e0667820f51..2ac397e2cb3bd0853b6531a7b914e5d045ee1b31 100644
--- a/lib_rend/ivas_efap_fx.c
+++ b/lib_rend/ivas_efap_fx.c
@@ -1528,7 +1528,7 @@ static void get_poly_gains_fx(
 #ifdef VEC_ARITH_OPT_v1
         v_sub_fixed_no_hdrm( P, A, P_minus_A, 2 ); /* Precalculate value of (P-A) q22*/
 #else                                              /* VEC_ARITH_OPT_v1 */
-        v_sub_fixed( P, A, P_minus_A, 2, 0 ); /* Precalculate value of (P-A) q22*/
+        v_sub_fixed( P, A, P_minus_A, 2, 0 );                                                      /* Precalculate value of (P-A) q22*/
 #endif                                             /* VEC_ARITH_OPT_v1 */
 
         FOR( j = i; j < numChan - 2 + i; ++j )
@@ -1585,7 +1585,7 @@ static Word32 get_tri_gain_fx(
 #ifdef VEC_ARITH_OPT_v1
     v_sub_fixed_no_hdrm( B, A, tmpSub1, 2 ); // tmpSub1 q22
 #else                                        /* VEC_ARITH_OPT_v1 */
-    v_sub_fixed( B, A, tmpSub1, 2, 0 );       // tmpSub1 q22
+    v_sub_fixed( B, A, tmpSub1, 2, 0 );                                                            // tmpSub1 q22
 #endif                                       /* VEC_ARITH_OPT_v1 */
 
     tmpDot1 = dotp_fixed( tmpN, tmpSub1, 2 ); // Q13
@@ -2248,7 +2248,7 @@ static void sort_channels_vertex_fx(
 #ifdef VEC_ARITH_OPT_v1
     v_sub_fixed_no_hdrm( tmpV1, tmpV2, tmpV3, 3 ); // tmpV3 Q30
 #else                                              /* VEC_ARITH_OPT_v1 */
-    v_sub_fixed( tmpV1, tmpV2, tmpV3, 3, 0 ); // tmpV3 Q30
+    v_sub_fixed( tmpV1, tmpV2, tmpV3, 3, 0 );                                                      // tmpV3 Q30
 #endif                                             /* VEC_ARITH_OPT_v1 */
     Word16 exp2 = 2;
     move16();
@@ -2434,7 +2434,7 @@ static Word16 in_poly_fx(                         /* Angles are in Q22 */
 #ifdef VEC_ARITH_OPT_v1
     v_sub_fixed_no_hdrm( P, A, P_minus_A, 2 ); /* Precalculate value of (P-A) q22*/
 #else                                          /* VEC_ARITH_OPT_v1 */
-    v_sub_fixed( P, A, P_minus_A, 2, 0 );     /* Precalculate value of (P-A) q22*/
+    v_sub_fixed( P, A, P_minus_A, 2, 0 );                                                          /* Precalculate value of (P-A) q22*/
 #endif                                         /* VEC_ARITH_OPT_v1 */
 
     FOR( n = 1; n < sub( numVertices, 1 ); ++n )
@@ -2508,12 +2508,16 @@ static Word16 in_tri_fx(
     v_sub_fixed_no_hdrm( B, A, tmpDot1, 2 ); // tmpDot1 q22
     v_sub_fixed_no_hdrm( C, A, tmpDot2, 2 ); // tmpDot2 q22
 #else                                        /* VEC_ARITH_OPT_v1 */
-    v_sub_fixed( B, A, tmpDot1, 2, 0 );       // tmpDot1 q22
-    v_sub_fixed( C, A, tmpDot2, 2, 0 );       // tmpDot2 q22
+    v_sub_fixed( B, A, tmpDot1, 2, 0 );                                                            // tmpDot1 q22
+    v_sub_fixed( C, A, tmpDot2, 2, 0 );                                                            // tmpDot2 q22
 #endif                                       /* VEC_ARITH_OPT_v1 */
 
     /* Verification of the non-colinearity : Q22 * Q22 = Q13 */
+#ifdef OPT_SBA_REND_V1_BE
+    invFactor = Msub_32_32( Mpy_32_32( tmpDot1[0], tmpDot2[1] ), tmpDot1[1], tmpDot2[0] ); /*q22+q22-q31->q13*/
+#else                                                                                      /* OPT_SBA_REND_V1_BE */
     invFactor = L_sub( Mpy_32_32( tmpDot1[0], tmpDot2[1] ), Mpy_32_32( tmpDot1[1], tmpDot2[0] ) ); /*q22+q22-q31->q13*/
+#endif                                                                                     /* OPT_SBA_REND_V1_BE */
 
     IF( invFactor == 0 )
     {
diff --git a/lib_rend/ivas_rotation_fx.c b/lib_rend/ivas_rotation_fx.c
index 5073b7418bd7d987fb91786a9b25c5f3e7fcc3ab..92ad8d36f4f55496349855e354687c3a25a0a9b3 100644
--- a/lib_rend/ivas_rotation_fx.c
+++ b/lib_rend/ivas_rotation_fx.c
@@ -1010,7 +1010,9 @@ void rotateFrame_shd_cldfb(
     Word16 l = 0, m1 = 0, m2 = 0;
     Word32 realRot[2 * HEADROT_ORDER + 1], imagRot[2 * HEADROT_ORDER + 1];
     Word16 SHrotmat[HEADROT_SHMAT_DIM][HEADROT_SHMAT_DIM];
+#ifndef OPT_HEAD_ROT_REND_V1_BE
     Word32 temp1, temp2;
+#endif /* OPT_HEAD_ROT_REND_V1_BE */
     move16();
     move16();
     move16();
@@ -1059,12 +1061,19 @@ void rotateFrame_shd_cldfb(
                     move32();
                     FOR( m = m1; m < m2; m++ )
                     {
+#ifdef OPT_HEAD_ROT_REND_V1_BE
+                        realRot[n - m1] = Madd_32_16_r( realRot[n - m1], Cldfb_RealBuffer[m][i][iBand], SHrotmat[n][m] ); // Q(x + 14 - 15)
+                        move32();
+                        imagRot[n - m1] = Madd_32_16_r( imagRot[n - m1], Cldfb_ImagBuffer[m][i][iBand], SHrotmat[n][m] ); // Q(x + 14 - 15)
+                        move32();
+#else  /* OPT_HEAD_ROT_REND_V1_BE */
                         temp1 = Mpy_32_16_r( Cldfb_RealBuffer[m][i][iBand], SHrotmat[n][m] ); // Q(x + 14 - 15)
                         temp2 = Mpy_32_16_r( Cldfb_ImagBuffer[m][i][iBand], SHrotmat[n][m] ); // Q(x + 14 - 15)
                         realRot[n - m1] = L_add( temp1, realRot[n - m1] );                    // Q(x + 14 - 15)
                         move32();
                         imagRot[n - m1] = L_add( temp2, imagRot[n - m1] ); // Q(x + 14 - 15)
                         move32();
+#endif /* OPT_HEAD_ROT_REND_V1_BE */
                     }
                 }
                 /* write back the result */
diff --git a/lib_rend/ivas_vbap_fx.c b/lib_rend/ivas_vbap_fx.c
index 7495953e3e462f5eca9be008502a81e7be8a6656..cfcbc67603388e210e713d9b2f2ea406795f9867 100644
--- a/lib_rend/ivas_vbap_fx.c
+++ b/lib_rend/ivas_vbap_fx.c
@@ -578,7 +578,11 @@ void vbap_determine_gains_fx(
     move32();
     FOR( ch = 0; ch < 3; ch++ )
     {
+#ifdef OPT_SBA_REND_V1_BE
+        gain_ene_fx = Madd_32_32( gain_ene_fx, gain_triplet_fx[ch], gain_triplet_fx[ch] ); /* Q(2 * VBAP_VS_TRIPLET.q_inverse_matrix - 31) */
+#else                                                                                      /* OPT_SBA_REND_V1_BE */
         gain_ene_fx = L_add( gain_ene_fx, Mpy_32_32( gain_triplet_fx[ch], gain_triplet_fx[ch] ) ); /* Q(2 * VBAP_VS_TRIPLET.q_inverse_matrix - 31) */
+#endif                                                                                     /* OPT_SBA_REND_V1_BE */
     }
 
     norm_value_fx = Isqrt( L_shr( gain_ene_fx, 1 ) ); /* Q(31 - (2 * VBAP_VS_TRIPLET.q_inverse_matrix - 31 - 1) / 2 ) = Q(47 - VBAP_VS_TRIPLET.q_inverse_matrix) */
@@ -681,9 +685,35 @@ static UWord8 vector_matrix_multiply_3x3_fx(
     Word32 *result,           /* o  : output vector Q(q_matrix) */
     Word16 q_matrix )
 {
-    result[0] = Mpy_32_16_1( matrix[0][0], src_vector[0] );                     /* Q(q_matrix) */
-    result[0] = L_add( result[0], Mpy_32_16_1( matrix[1][0], src_vector[1] ) ); /* Q(q_matrix) */
-    result[0] = L_add( result[0], Mpy_32_16_1( matrix[2][0], src_vector[2] ) ); /* Q(q_matrix) */
+#ifdef OPT_SBA_REND_V1_BE
+    Word32 pointzero_one = Mpy_32_16_1( L_lshl( 1, q_matrix ), -327 /* -0.01 in Q15 */ );
+    result[0] = Madd_32_16( Madd_32_16( Mpy_32_16_1( matrix[0][0], src_vector[0] ), matrix[1][0], src_vector[1] ), matrix[2][0], src_vector[2] ); /* Q(q_matrix) */
+    move32();
+
+    IF( LT_32( result[0], pointzero_one ) )
+    {
+        return 0;
+    }
+
+    result[1] = Madd_32_16( Madd_32_16( Mpy_32_16_1( matrix[0][1], src_vector[0] ), matrix[1][1], src_vector[1] ), matrix[2][1], src_vector[2] ); /* Q(q_matrix) */
+    move32();
+
+    IF( LT_32( result[1], pointzero_one ) )
+    {
+        return 0;
+    }
+
+    result[2] = Madd_32_16( Madd_32_16( Mpy_32_16_1( matrix[0][2], src_vector[0] ), matrix[1][2], src_vector[1] ), matrix[2][2], src_vector[2] ); /* Q(q_matrix) */
+    move32();
+
+    IF( LT_32( result[2], pointzero_one ) )
+    {
+        return 0;
+    }
+#else  /* OPT_SBA_REND_V1_BE */
+    result[0] = Mpy_32_16_1( matrix[0][0], src_vector[0] );                                        /* Q(q_matrix) */
+    result[0] = L_add( result[0], Mpy_32_16_1( matrix[1][0], src_vector[1] ) );                    /* Q(q_matrix) */
+    result[0] = L_add( result[0], Mpy_32_16_1( matrix[2][0], src_vector[2] ) );                    /* Q(q_matrix) */
     move32();
     move32();
     move32();
@@ -716,7 +746,7 @@ static UWord8 vector_matrix_multiply_3x3_fx(
     {
         return 0;
     }
-
+#endif /* OPT_SBA_REND_V1_BE */
     return 1;
 }