From d9019ae9e1e6224c9ff322d916e18879bd5430c7 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Fri, 13 Jun 2025 21:05:19 +0530 Subject: [PATCH] SBA dec path optimization - Bit Exact changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changes are made under macro OPT_SBA_DEC_V2_BE Functions and corresponding WMOPS improvements: ivas_spar_get_parameters_fx 0.4 Commands: ./IVAS_cod -sba 1 256000 48 scripts/testv/stvFOA48c.wav bit_sba  ./IVAS_dec 7_1 48 bit_sba sba.wav   --- lib_com/options.h | 1 + lib_dec/dec_tcx_fx.c | 21 +++++++ lib_dec/ivas_binRenderer_internal_fx.c | 15 +++++ lib_dec/ivas_dirac_output_synthesis_cov_fx.c | 10 ++- lib_dec/ivas_spar_decoder_fx.c | 65 +++++++++++++++++++- lib_dec/ivas_spar_md_dec_fx.c | 13 ++++ 6 files changed, 123 insertions(+), 2 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index f734ff624..8bbd89e87 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -77,6 +77,7 @@ /* Note: each compile switch (FIX_1101_...) is independent from the other ones */ #define OPT_SBA_REND_V1_BE #define OPT_HEAD_ROT_REND_V1_BE +#define OPT_SBA_DEC_V2_BE #define OPT_SBA_ENC_V1_BE #define OPT_BIN_RENDERER_V1 #define OPT_BIN_RENDERER_V2 diff --git a/lib_dec/dec_tcx_fx.c b/lib_dec/dec_tcx_fx.c index b9084df88..8e8a2b343 100644 --- a/lib_dec/dec_tcx_fx.c +++ b/lib_dec/dec_tcx_fx.c @@ -2634,9 +2634,16 @@ void IMDCT_ivas_fx( Word32 fac; // fac = shl_sat( mult_r( extract_h( L_shr_sat( hTcxDec->conceal_eof_gain32, sub( 1, hTcxDec->conceal_eof_gain_e ) ) ), st->last_concealed_gain_syn_deemph ), 1 ); fac = Mpy_32_16_1( hTcxDec->conceal_eof_gain32, st->last_concealed_gain_syn_deemph ); // q = 31 - hTcxDec->conceal_eof_gain_e - last_concealed_gain_syn_deemph_e +#ifdef OPT_SBA_DEC_V2_BE + Word16 eff_e = add( hTcxDec->conceal_eof_gain_e, st->last_concealed_gain_syn_deemph_e ); +#endif /* OPT_SBA_DEC_V2_BE */ FOR( Word16 ind = 0; ind < overlap; ind++ ) { +#ifdef OPT_SBA_DEC_V2_BE + old_syn_overl_fx[ind] = extract_h( L_shl_sat( Mpy_32_16_1( fac, old_syn_overl_fx[ind] ), eff_e ) ); // Q(-2) +#else /* OPT_SBA_DEC_V2_BE */ old_syn_overl_fx[ind] = extract_h( L_shl_sat( Mpy_32_16_1( fac, old_syn_overl_fx[ind] ), add( hTcxDec->conceal_eof_gain_e, st->last_concealed_gain_syn_deemph_e ) ) ); // Q(-2) +#endif /* OPT_SBA_DEC_V2_BE */ move16(); } } @@ -4713,9 +4720,16 @@ void decoder_tcx_noiseshaping_igf_fx( { /* If the exponent on the spec side (i>L_frame) is lesser, then shift all the values in the spec side by the difference to make both sides have the same exponent. */ +#ifdef OPT_SBA_DEC_V2_BE + Word16 diff_e = sub( frame_side_x_e, spec_side_x_e ); +#endif /* OPT_SBA_DEC_V2_BE */ FOR( i = L_frame; i < L_spec; i++ ) { +#ifdef OPT_SBA_DEC_V2_BE + x_fx[i] = L_shr( x_fx[i], diff_e ); +#else /* OPT_SBA_DEC_V2_BE */ x_fx[i] = L_shr( x_fx[i], sub( frame_side_x_e, spec_side_x_e ) ); +#endif /* OPT_SBA_DEC_V2_BE */ move32(); } } @@ -4723,9 +4737,16 @@ void decoder_tcx_noiseshaping_igf_fx( { /* If the exponent on the spec side (i>L_frame) is greater, then shift all the values in the frame side (iconv_band ); + + FOR( k = 0; k < numTimeSlots; k++ ) + { + set32_fx( &Cldfb_RealBuffer_Binaural_fx[0][k][hBinRenderer->conv_band], 0, len ); + set32_fx( &Cldfb_RealBuffer_Binaural_fx[1][k][hBinRenderer->conv_band], 0, len ); + set32_fx( &Cldfb_ImagBuffer_Binaural_fx[0][k][hBinRenderer->conv_band], 0, len ); + set32_fx( &Cldfb_ImagBuffer_Binaural_fx[1][k][hBinRenderer->conv_band], 0, len ); + } +#endif /* OPT_SBA_DEC_V2_BE */ + pop_wmops(); return; } diff --git a/lib_dec/ivas_dirac_output_synthesis_cov_fx.c b/lib_dec/ivas_dirac_output_synthesis_cov_fx.c index caddb5c5c..598f90829 100644 --- a/lib_dec/ivas_dirac_output_synthesis_cov_fx.c +++ b/lib_dec/ivas_dirac_output_synthesis_cov_fx.c @@ -605,8 +605,11 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx( #ifdef MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE { Word16 shifter; - +#ifdef OPT_SBA_DEC_V2_BE + shifter = sub( mixing_matrix_res_smooth_e, 31 ); +#else /* OPT_SBA_DEC_V2_BE */ shifter = 31 - mixing_matrix_res_smooth_e; +#endif /* OPT_SBA_DEC_V2_BE */ FOR( ch_idx = 0; ch_idx < nY; ch_idx++ ) { int i; @@ -625,8 +628,13 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx( temp_imag = W_add( temp_imag, W_mult0_32_32( mixing_matrix_res_smooth_fx[idx], diff_f_imag_fx[i] ) ); idx += nY; } +#ifdef OPT_SBA_DEC_V2_BE + Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = W_shl_sat_l( temp_real, shifter ); + Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = W_shl_sat_l( temp_imag, shifter ); +#else /* OPT_SBA_DEC_V2_BE */ Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = W_extract_l( W_shr( temp_real, shifter ) ); Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = W_extract_l( W_shr( temp_imag, shifter ) ); +#endif /* OPT_SBA_DEC_V2_BE */ } } diff --git a/lib_dec/ivas_spar_decoder_fx.c b/lib_dec/ivas_spar_decoder_fx.c index 29a307425..fa5f6779f 100644 --- a/lib_dec/ivas_spar_decoder_fx.c +++ b/lib_dec/ivas_spar_decoder_fx.c @@ -1163,6 +1163,69 @@ void ivas_spar_get_parameters_fx( move16(); Word16 add_weight_fx = sub( MAX_WORD16, weight_fx ); Word16 add_weight_20ms_fx = sub( MAX_WORD16, weight_20ms_fx ); +#ifdef OPT_SBA_DEC_V2_BE + Word16 out_flag[IVAS_MAX_FB_MIXER_OUT_CH]; + + Word32 band_bool = LT_16( split_band, IVAS_MAX_NUM_BANDS ); + + FOR( out_ch = 0; out_ch < num_ch_out; out_ch++ ) + { + /* 20ms cross-fade for Transport channels in all frequency bands */ + /* sub-frame processing for missing channels in all frequency bands*/ + out_flag[out_ch] = band_bool && ( 0 == ivas_is_res_channel( out_ch, hSpar->hMdDec->spar_md_cfg.nchan_transport ) ); + move16(); + } + Word32 frame_bool = GT_16( hSpar->i_subframe, 3 ); + + FOR( out_ch = 0; out_ch < num_ch_out; out_ch++ ) + { + IF( out_flag[out_ch] ) + { + IF( frame_bool ) + { + FOR( spar_band = 0; spar_band < num_spar_bands; spar_band++ ) + { + FOR( in_ch = 0; in_ch < num_ch_in; in_ch++ ) + { + par_mat_fx[out_ch][in_ch][spar_band] = Madd_32_16( Mpy_32_16_1( hSpar->hMdDec->mixer_mat_prev_fx[ts1][out_ch][in_ch][spar_band], weight_fx ), + hSpar->hMdDec->mixer_mat_prev_fx[ts0][out_ch][in_ch][spar_band], add_weight_fx ); + move32(); + } + } + } + ELSE + { + + + FOR( spar_band = 0; spar_band < num_spar_bands; spar_band++ ) + { + FOR( in_ch = 0; in_ch < num_ch_in; in_ch++ ) + { + { + par_mat_fx[out_ch][in_ch][spar_band] = hSpar->hMdDec->mixer_mat_fx[out_ch][in_ch][spar_band]; /*hSpar->hMdDec->Q_mixer_mat*/ + move32(); + } + } + } + } + } + ELSE + { + FOR( spar_band = 0; spar_band < num_spar_bands; spar_band++ ) + { + FOR( in_ch = 0; in_ch < num_ch_in; in_ch++ ) + { + /* 20ms Transport channel reconstruction with matching encoder/decoder processing */ + Word16 prev_idx = SPAR_DIRAC_SPLIT_START_BAND < IVAS_MAX_NUM_BANDS ? 1 : 0; /* if SPAR_DIRAC_SPLIT_START_BAND == IVAS_MAX_NUM_BANDS, then the sub-frame mixer_mat delay line is not active */ + move16(); + par_mat_fx[out_ch][in_ch][spar_band] = Madd_32_16( Mpy_32_16_1( hSpar->hMdDec->mixer_mat_prev_fx[prev_idx][out_ch][in_ch][spar_band], add_weight_20ms_fx ), + hSpar->hMdDec->mixer_mat_fx[out_ch][in_ch][spar_band], weight_20ms_fx ); /*hSpar->hMdDec->Q_mixer_mat*/ + move32(); + } + } + } + } +#else /* OPT_SBA_DEC_V2_BE */ FOR( spar_band = 0; spar_band < num_spar_bands; spar_band++ ) { FOR( out_ch = 0; out_ch < num_ch_out; out_ch++ ) @@ -1202,7 +1265,7 @@ void ivas_spar_get_parameters_fx( } } } - +#endif /* OPT_SBA_DEC_V2_BE */ return; } diff --git a/lib_dec/ivas_spar_md_dec_fx.c b/lib_dec/ivas_spar_md_dec_fx.c index 7f487a75c..36b77732a 100644 --- a/lib_dec/ivas_spar_md_dec_fx.c +++ b/lib_dec/ivas_spar_md_dec_fx.c @@ -1318,6 +1318,14 @@ static void ivas_get_spar_matrices_fx( tmp_C2_re_fx[0][j] = Mpy_32_32( active_w_dm_fac_fx, L_negate( hMdDec->spar_md.band_coeffs[( b + ( i_ts * IVAS_MAX_NUM_BANDS ) )].pred_re_fx[j - 1] ) ); // Q31 *Q22=Q22 move32(); } +#ifdef OPT_SBA_DEC_V2_BE + re_fx1 = Madd_32_32( ONE_IN_Q13, tmp_C2_re_fx[0][1], tmp_C1_re_fx[1][0] ); // Q13+Q13 + + re_fx1 = Madd_32_32( re_fx1, tmp_C2_re_fx[0][2], tmp_C1_re_fx[2][0] ); // Q13+Q13 + + tmp_dm_re_fx[0][0] = L_shl( Madd_32_32( re_fx1, tmp_C2_re_fx[0][3], tmp_C1_re_fx[3][0] ), Q9 ); // (Q13+Q13) << Q9 = Q22; + // +#else /* OPT_SBA_DEC_V2_BE */ re_fx = Mpy_32_32( tmp_C2_re_fx[0][1], tmp_C1_re_fx[1][0] ); // Q22 *Q22 =Q13 re_fx1 = L_add( ONE_IN_Q13, re_fx ); // Q13+Q13 @@ -1326,6 +1334,7 @@ static void ivas_get_spar_matrices_fx( re_fx = Mpy_32_32( tmp_C2_re_fx[0][3], tmp_C1_re_fx[3][0] ); // Q22 *Q22 =Q13 tmp_dm_re_fx[0][0] = L_shl( L_add( re_fx1, re_fx ), Q9 ); // (Q13+Q13) << Q9 = Q22; +#endif /* OPT_SBA_DEC_V2_BE */ move32(); IF( EQ_16( dyn_active_w_flag, 1 ) ) @@ -1401,7 +1410,11 @@ static void ivas_get_spar_matrices_fx( { FOR( k = dmx_ch; k < numch_out; k++ ) { +#ifndef OPT_SBA_DEC_V2_BE IF( EQ_16( sub( j, dmx_ch ), sub( k, dmx_ch ) ) ) +#else /* OPT_SBA_DEC_V2_BE */ + IF( EQ_16( j, k ) ) +#endif /* OPT_SBA_DEC_V2_BE */ { tmpP_re_fx[j][k] = hMdDec->spar_md.band_coeffs[add( b, i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )].P_re_fx[sub( k, dmx_ch )]; // Q22 move32(); -- GitLab