From d9019ae9e1e6224c9ff322d916e18879bd5430c7 Mon Sep 17 00:00:00 2001
From: Sandesh Venkatesh <sandesh.venkatesh@ittiam.com>
Date: Fri, 13 Jun 2025 21:05:19 +0530
Subject: [PATCH] SBA dec path optimization - Bit Exact changes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Changes are made under macro OPT_SBA_DEC_V2_BE

Functions and corresponding WMOPS improvements:
ivas_spar_get_parameters_fx 0.4

Commands:
./IVAS_cod -sba 1 256000 48 scripts/testv/stvFOA48c.wav bit_sba 
./IVAS_dec 7_1 48 bit_sba sba.wav
 
---
 lib_com/options.h                            |  1 +
 lib_dec/dec_tcx_fx.c                         | 21 +++++++
 lib_dec/ivas_binRenderer_internal_fx.c       | 15 +++++
 lib_dec/ivas_dirac_output_synthesis_cov_fx.c | 10 ++-
 lib_dec/ivas_spar_decoder_fx.c               | 65 +++++++++++++++++++-
 lib_dec/ivas_spar_md_dec_fx.c                | 13 ++++
 6 files changed, 123 insertions(+), 2 deletions(-)

diff --git a/lib_com/options.h b/lib_com/options.h
index f734ff624..8bbd89e87 100644
--- a/lib_com/options.h
+++ b/lib_com/options.h
@@ -77,6 +77,7 @@
 /* Note: each compile switch (FIX_1101_...) is independent from the other ones */
 #define OPT_SBA_REND_V1_BE
 #define OPT_HEAD_ROT_REND_V1_BE
+#define OPT_SBA_DEC_V2_BE
 #define OPT_SBA_ENC_V1_BE
 #define OPT_BIN_RENDERER_V1
 #define OPT_BIN_RENDERER_V2
diff --git a/lib_dec/dec_tcx_fx.c b/lib_dec/dec_tcx_fx.c
index b9084df88..8e8a2b343 100644
--- a/lib_dec/dec_tcx_fx.c
+++ b/lib_dec/dec_tcx_fx.c
@@ -2634,9 +2634,16 @@ void IMDCT_ivas_fx(
         Word32 fac;
         // fac = shl_sat( mult_r( extract_h( L_shr_sat( hTcxDec->conceal_eof_gain32, sub( 1, hTcxDec->conceal_eof_gain_e ) ) ), st->last_concealed_gain_syn_deemph ), 1 );
         fac = Mpy_32_16_1( hTcxDec->conceal_eof_gain32, st->last_concealed_gain_syn_deemph ); // q = 31 - hTcxDec->conceal_eof_gain_e - last_concealed_gain_syn_deemph_e
+#ifdef OPT_SBA_DEC_V2_BE
+        Word16 eff_e = add( hTcxDec->conceal_eof_gain_e, st->last_concealed_gain_syn_deemph_e );
+#endif /* OPT_SBA_DEC_V2_BE */
         FOR( Word16 ind = 0; ind < overlap; ind++ )
         {
+#ifdef OPT_SBA_DEC_V2_BE
+            old_syn_overl_fx[ind] = extract_h( L_shl_sat( Mpy_32_16_1( fac, old_syn_overl_fx[ind] ), eff_e ) ); // Q(-2)
+#else                                                                                                           /* OPT_SBA_DEC_V2_BE */
             old_syn_overl_fx[ind] = extract_h( L_shl_sat( Mpy_32_16_1( fac, old_syn_overl_fx[ind] ), add( hTcxDec->conceal_eof_gain_e, st->last_concealed_gain_syn_deemph_e ) ) ); // Q(-2)
+#endif                                                                                                          /* OPT_SBA_DEC_V2_BE */
             move16();
         }
     }
@@ -4713,9 +4720,16 @@ void decoder_tcx_noiseshaping_igf_fx(
         {
             /* If the exponent on the spec side (i>L_frame) is lesser, then shift all the values in the
             spec side by the difference to make both sides have the same exponent. */
+#ifdef OPT_SBA_DEC_V2_BE
+            Word16 diff_e = sub( frame_side_x_e, spec_side_x_e );
+#endif /* OPT_SBA_DEC_V2_BE */
             FOR( i = L_frame; i < L_spec; i++ )
             {
+#ifdef OPT_SBA_DEC_V2_BE
+                x_fx[i] = L_shr( x_fx[i], diff_e );
+#else  /* OPT_SBA_DEC_V2_BE */
                 x_fx[i] = L_shr( x_fx[i], sub( frame_side_x_e, spec_side_x_e ) );
+#endif /* OPT_SBA_DEC_V2_BE */
                 move32();
             }
         }
@@ -4723,9 +4737,16 @@ void decoder_tcx_noiseshaping_igf_fx(
         {
             /* If the exponent on the spec side (i>L_frame) is greater, then shift all the values in the
             frame side (i<L_frame) by the difference to make both sides have the same exponent. */
+#ifdef OPT_SBA_DEC_V2_BE
+            Word16 diff_e = sub( spec_side_x_e, frame_side_x_e );
+#endif /* OPT_SBA_DEC_V2_BE */
             FOR( i = 0; i < L_frame; i++ )
             {
+#ifdef OPT_SBA_DEC_V2_BE
+                x_fx[i] = L_shr( x_fx[i], diff_e );
+#else  /* OPT_SBA_DEC_V2_BE */
                 x_fx[i] = L_shr( x_fx[i], sub( spec_side_x_e, frame_side_x_e ) );
+#endif /* OPT_SBA_DEC_V2_BE */
                 move32();
             }
         }
diff --git a/lib_dec/ivas_binRenderer_internal_fx.c b/lib_dec/ivas_binRenderer_internal_fx.c
index ae16bf7b5..908d3b79b 100644
--- a/lib_dec/ivas_binRenderer_internal_fx.c
+++ b/lib_dec/ivas_binRenderer_internal_fx.c
@@ -1622,6 +1622,7 @@ void ivas_binRenderer_fx(
 
     /* Compute Convolution */
     /* memory reset for the binaural output */
+#ifndef OPT_SBA_DEC_V2_BE
     FOR( chIdx = 0; chIdx < BINAURAL_CHANNELS; chIdx++ )
     {
         FOR( k = 0; k < numTimeSlots; k++ )
@@ -1634,6 +1635,7 @@ void ivas_binRenderer_fx(
 #endif /* OPT_BIN_RENDERER_V2 */
         }
     }
+#endif /* OPT_SBA_DEC_V2_BE */
 
     /* Head rotation in HOA3 or CICPx */
     test();
@@ -1736,6 +1738,19 @@ void ivas_binRenderer_fx(
             }
         }
     }
+
+#ifdef OPT_SBA_DEC_V2_BE
+    Word16 len = sub( CLDFB_NO_CHANNELS_MAX, hBinRenderer->conv_band );
+
+    FOR( k = 0; k < numTimeSlots; k++ )
+    {
+        set32_fx( &Cldfb_RealBuffer_Binaural_fx[0][k][hBinRenderer->conv_band], 0, len );
+        set32_fx( &Cldfb_RealBuffer_Binaural_fx[1][k][hBinRenderer->conv_band], 0, len );
+        set32_fx( &Cldfb_ImagBuffer_Binaural_fx[0][k][hBinRenderer->conv_band], 0, len );
+        set32_fx( &Cldfb_ImagBuffer_Binaural_fx[1][k][hBinRenderer->conv_band], 0, len );
+    }
+#endif /* OPT_SBA_DEC_V2_BE */
+
     pop_wmops();
     return;
 }
diff --git a/lib_dec/ivas_dirac_output_synthesis_cov_fx.c b/lib_dec/ivas_dirac_output_synthesis_cov_fx.c
index caddb5c5c..598f90829 100644
--- a/lib_dec/ivas_dirac_output_synthesis_cov_fx.c
+++ b/lib_dec/ivas_dirac_output_synthesis_cov_fx.c
@@ -605,8 +605,11 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx(
 #ifdef MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE
                 {
                     Word16 shifter;
-
+#ifdef OPT_SBA_DEC_V2_BE
+                    shifter = sub( mixing_matrix_res_smooth_e, 31 );
+#else  /* OPT_SBA_DEC_V2_BE */
                     shifter = 31 - mixing_matrix_res_smooth_e;
+#endif /* OPT_SBA_DEC_V2_BE */
                     FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
                     {
                         int i;
@@ -625,8 +628,13 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx(
                             temp_imag = W_add( temp_imag, W_mult0_32_32( mixing_matrix_res_smooth_fx[idx], diff_f_imag_fx[i] ) );
                             idx += nY;
                         }
+#ifdef OPT_SBA_DEC_V2_BE
+                        Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = W_shl_sat_l( temp_real, shifter );
+                        Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = W_shl_sat_l( temp_imag, shifter );
+#else  /* OPT_SBA_DEC_V2_BE */
                         Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = W_extract_l( W_shr( temp_real, shifter ) );
                         Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = W_extract_l( W_shr( temp_imag, shifter ) );
+#endif /* OPT_SBA_DEC_V2_BE */
                     }
                 }
 
diff --git a/lib_dec/ivas_spar_decoder_fx.c b/lib_dec/ivas_spar_decoder_fx.c
index 29a307425..fa5f6779f 100644
--- a/lib_dec/ivas_spar_decoder_fx.c
+++ b/lib_dec/ivas_spar_decoder_fx.c
@@ -1163,6 +1163,69 @@ void ivas_spar_get_parameters_fx(
     move16();
     Word16 add_weight_fx = sub( MAX_WORD16, weight_fx );
     Word16 add_weight_20ms_fx = sub( MAX_WORD16, weight_20ms_fx );
+#ifdef OPT_SBA_DEC_V2_BE
+    Word16 out_flag[IVAS_MAX_FB_MIXER_OUT_CH];
+
+    Word32 band_bool = LT_16( split_band, IVAS_MAX_NUM_BANDS );
+
+    FOR( out_ch = 0; out_ch < num_ch_out; out_ch++ )
+    {
+        /* 20ms cross-fade for Transport channels in all frequency bands */
+        /* sub-frame processing for missing channels in all frequency bands*/
+        out_flag[out_ch] = band_bool && ( 0 == ivas_is_res_channel( out_ch, hSpar->hMdDec->spar_md_cfg.nchan_transport ) );
+        move16();
+    }
+    Word32 frame_bool = GT_16( hSpar->i_subframe, 3 );
+
+    FOR( out_ch = 0; out_ch < num_ch_out; out_ch++ )
+    {
+        IF( out_flag[out_ch] )
+        {
+            IF( frame_bool )
+            {
+                FOR( spar_band = 0; spar_band < num_spar_bands; spar_band++ )
+                {
+                    FOR( in_ch = 0; in_ch < num_ch_in; in_ch++ )
+                    {
+                        par_mat_fx[out_ch][in_ch][spar_band] = Madd_32_16( Mpy_32_16_1( hSpar->hMdDec->mixer_mat_prev_fx[ts1][out_ch][in_ch][spar_band], weight_fx ),
+                                                                           hSpar->hMdDec->mixer_mat_prev_fx[ts0][out_ch][in_ch][spar_band], add_weight_fx );
+                        move32();
+                    }
+                }
+            }
+            ELSE
+            {
+
+
+                FOR( spar_band = 0; spar_band < num_spar_bands; spar_band++ )
+                {
+                    FOR( in_ch = 0; in_ch < num_ch_in; in_ch++ )
+                    {
+                        {
+                            par_mat_fx[out_ch][in_ch][spar_band] = hSpar->hMdDec->mixer_mat_fx[out_ch][in_ch][spar_band]; /*hSpar->hMdDec->Q_mixer_mat*/
+                            move32();
+                        }
+                    }
+                }
+            }
+        }
+        ELSE
+        {
+            FOR( spar_band = 0; spar_band < num_spar_bands; spar_band++ )
+            {
+                FOR( in_ch = 0; in_ch < num_ch_in; in_ch++ )
+                {
+                    /* 20ms Transport channel reconstruction with matching encoder/decoder processing */
+                    Word16 prev_idx = SPAR_DIRAC_SPLIT_START_BAND < IVAS_MAX_NUM_BANDS ? 1 : 0; /* if SPAR_DIRAC_SPLIT_START_BAND == IVAS_MAX_NUM_BANDS, then the sub-frame mixer_mat delay line is not active */
+                    move16();
+                    par_mat_fx[out_ch][in_ch][spar_band] = Madd_32_16( Mpy_32_16_1( hSpar->hMdDec->mixer_mat_prev_fx[prev_idx][out_ch][in_ch][spar_band], add_weight_20ms_fx ),
+                                                                       hSpar->hMdDec->mixer_mat_fx[out_ch][in_ch][spar_band], weight_20ms_fx ); /*hSpar->hMdDec->Q_mixer_mat*/
+                    move32();
+                }
+            }
+        }
+    }
+#else  /* OPT_SBA_DEC_V2_BE */
     FOR( spar_band = 0; spar_band < num_spar_bands; spar_band++ )
     {
         FOR( out_ch = 0; out_ch < num_ch_out; out_ch++ )
@@ -1202,7 +1265,7 @@ void ivas_spar_get_parameters_fx(
             }
         }
     }
-
+#endif /* OPT_SBA_DEC_V2_BE */
     return;
 }
 
diff --git a/lib_dec/ivas_spar_md_dec_fx.c b/lib_dec/ivas_spar_md_dec_fx.c
index 7f487a75c..36b77732a 100644
--- a/lib_dec/ivas_spar_md_dec_fx.c
+++ b/lib_dec/ivas_spar_md_dec_fx.c
@@ -1318,6 +1318,14 @@ static void ivas_get_spar_matrices_fx(
                     tmp_C2_re_fx[0][j] = Mpy_32_32( active_w_dm_fac_fx, L_negate( hMdDec->spar_md.band_coeffs[( b + ( i_ts * IVAS_MAX_NUM_BANDS ) )].pred_re_fx[j - 1] ) ); // Q31 *Q22=Q22
                     move32();
                 }
+#ifdef OPT_SBA_DEC_V2_BE
+                re_fx1 = Madd_32_32( ONE_IN_Q13, tmp_C2_re_fx[0][1], tmp_C1_re_fx[1][0] ); // Q13+Q13
+
+                re_fx1 = Madd_32_32( re_fx1, tmp_C2_re_fx[0][2], tmp_C1_re_fx[2][0] ); // Q13+Q13
+
+                tmp_dm_re_fx[0][0] = L_shl( Madd_32_32( re_fx1, tmp_C2_re_fx[0][3], tmp_C1_re_fx[3][0] ), Q9 ); // (Q13+Q13) << Q9 = Q22;
+                                                                                                                //
+#else                                                                                                           /* OPT_SBA_DEC_V2_BE */
                 re_fx = Mpy_32_32( tmp_C2_re_fx[0][1], tmp_C1_re_fx[1][0] ); // Q22 *Q22 =Q13
                 re_fx1 = L_add( ONE_IN_Q13, re_fx );                         // Q13+Q13
 
@@ -1326,6 +1334,7 @@ static void ivas_get_spar_matrices_fx(
 
                 re_fx = Mpy_32_32( tmp_C2_re_fx[0][3], tmp_C1_re_fx[3][0] ); // Q22 *Q22 =Q13
                 tmp_dm_re_fx[0][0] = L_shl( L_add( re_fx1, re_fx ), Q9 );    // (Q13+Q13) << Q9 = Q22;
+#endif                                                                                                          /* OPT_SBA_DEC_V2_BE */
                 move32();
 
                 IF( EQ_16( dyn_active_w_flag, 1 ) )
@@ -1401,7 +1410,11 @@ static void ivas_get_spar_matrices_fx(
                 {
                     FOR( k = dmx_ch; k < numch_out; k++ )
                     {
+#ifndef OPT_SBA_DEC_V2_BE
                         IF( EQ_16( sub( j, dmx_ch ), sub( k, dmx_ch ) ) )
+#else  /* OPT_SBA_DEC_V2_BE */
+                        IF( EQ_16( j, k ) )
+#endif /* OPT_SBA_DEC_V2_BE */
                         {
                             tmpP_re_fx[j][k] = hMdDec->spar_md.band_coeffs[add( b, i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )].P_re_fx[sub( k, dmx_ch )]; // Q22
                             move32();
-- 
GitLab