From 6c5ed9f1374b32878c9048594128b303e15f959a Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Fri, 11 Jul 2025 16:13:03 +0530 Subject: [PATCH] Optimization in ISM Dec path - Non bit exact --- lib_com/options.h | 1 + lib_rend/ivas_objectRenderer_hrFilt_fx.c | 121 ++++++++++++++++++++++- lib_rend/ivas_objectRenderer_sfx_fx.c | 46 ++++++++- 3 files changed, 163 insertions(+), 5 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index 23d8ca6d1..21d976fac 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -81,6 +81,7 @@ /* Note: each compile switch (FIX_1101_...) is independent from the other ones */ #define OPT_MCT_ENC_V2_BE #define OPT_MCH_DEC_V1_NBE +#define OPT_MASA_DEC_V1_NBE #define OPT_MCT_ENC_48KB_NBE #define OPT_MCH_DEC_V1_BE #define OPT_MCT_ENC_V2_NBE diff --git a/lib_rend/ivas_objectRenderer_hrFilt_fx.c b/lib_rend/ivas_objectRenderer_hrFilt_fx.c index a098aaa25..c5689b364 100644 --- a/lib_rend/ivas_objectRenderer_hrFilt_fx.c +++ b/lib_rend/ivas_objectRenderer_hrFilt_fx.c @@ -193,7 +193,6 @@ static void GenerateFilter_fx( ) { Word16 qp, p, k, i; - Word32 index; Word16 AzIdx[HRTF_MODEL_BSPLINE_NUM_COEFFS][HRTF_MODEL_BSPLINE_NUM_COEFFS], EvIdx[HRTF_MODEL_BSPLINE_NUM_COEFFS]; /* non-zero basis functions */ Word16 num_az_idx[HRTF_MODEL_BSPLINE_NUM_COEFFS]; Word16 num_ev_idx; @@ -255,6 +254,16 @@ static void GenerateFilter_fx( move16(); FOR( p = 0; p < num_ev_idx; p++ ) { +#ifdef OPT_MASA_DEC_V1_NBE + Word32 expt = L_shl_sat( modelEval->elevBfVec_fx[p], 1 ); + FOR( i = 0; i < num_az_idx[p]; i++ ) + { + modelEval->BM_fx[qp + i] = Mpy_32_32( expt, modelEval->azimBfVec_fx[p][i] ); /*Q30 - ( Q30 * 2 - 31 )*/ // Q30 + move32(); + BM_idx[qp + i] = add( model->azim_start_idx[EvIdx[p]], AzIdx[p][i] ); + move16(); + } +#else /* OPT_MASA_DEC_V1_NBE */ FOR( i = 0; i < num_az_idx[p]; i++ ) { modelEval->BM_fx[add( qp, i )] = L_shl( Mpy_32_32( modelEval->elevBfVec_fx[p], modelEval->azimBfVec_fx[p][i] ), 1 ); /*Q30 - ( Q30 * 2 - 31 )*/ // Q30 @@ -262,12 +271,21 @@ static void GenerateFilter_fx( BM_idx[add( qp, i )] = add( model->azim_start_idx[EvIdx[p]], AzIdx[p][i] ); move16(); } +#endif /* OPT_MASA_DEC_V1_NBE */ qp = add( qp, num_az_idx[p] ); } +#ifdef OPT_MASA_DEC_V1_NBE + Word16 expL = add( model->AlphaL_e, 1 ); + Word16 expR = add( model->AlphaR_e, 1 ); + BMEnergiesL_e = add( model->EL_e, 2 ); + BMEnergiesR_e = add( model->ER_e, 2 ); +#endif /* OPT_MASA_DEC_V1_NBE */ + /* Compute HR filters, approximate optimized model evaluation */ FOR( iSec = 0; iSec < HRTF_MODEL_N_SECTIONS; iSec++ ) { +#ifndef OPT_MASA_DEC_V1_NBE ETotL = 0; move32(); ETotR = 0; @@ -288,45 +306,102 @@ static void GenerateFilter_fx( BMEnergiesL_e = add( model->EL_e, 2 ); BMEnergiesR_e = add( model->ER_e, 2 ); +#else /* OPT_MASA_DEC_V1_NBE */ + Word64 temp1 = 0; + move64(); + Word64 temp2 = 0; + move64(); +#endif /* OPT_MASA_DEC_V1_NBE */ /* Energy is precalculated part updated with square of BM value. Store index for sorting */ FOR( i = 0; i < qp; i++ ) { +#ifdef OPT_MASA_DEC_V1_NBE + modelEval->BMEnergiesL[i].val_fx = Mpy_32_32( Mpy_32_32( modelEval->BM_fx[i], modelEval->BM_fx[i] ) /*Q29*/, model->EL_fx[( iSec * model->AlphaN ) + BM_idx[i]] ); // exp: model->EL_e + 2 + modelEval->BMEnergiesR[i].val_fx = Mpy_32_32( Mpy_32_32( modelEval->BM_fx[i], modelEval->BM_fx[i] ) /*Q29*/, model->ER_fx[( iSec * model->AlphaN ) + BM_idx[i]] ); // exp: model->ER_e + 2 +#else /* OPT_MASA_DEC_V1_NBE */ modelEval->BMEnergiesL[i].val_fx = Mpy_32_32( Mpy_32_32( modelEval->BM_fx[i], modelEval->BM_fx[i] ) /*Q29*/, model->EL_fx[add( i_mult( iSec, model->AlphaN ), BM_idx[i] )] ); // exp: model->EL_e + 2 modelEval->BMEnergiesR[i].val_fx = Mpy_32_32( Mpy_32_32( modelEval->BM_fx[i], modelEval->BM_fx[i] ) /*Q29*/, model->ER_fx[add( i_mult( iSec, model->AlphaN ), BM_idx[i] )] ); // exp: model->ER_e + 2 +#endif /* OPT_MASA_DEC_V1_NBE */ move32(); move32(); modelEval->BMEnergiesL[i].i = i; move16(); modelEval->BMEnergiesR[i].i = i; move16(); - +#ifndef OPT_MASA_DEC_V1_NBE ETotL = BASOP_Util_Add_Mant32Exp( ETotL, ETotL_e, modelEval->BMEnergiesL[i].val_fx, BMEnergiesL_e, &ETotL_e ); ETotR = BASOP_Util_Add_Mant32Exp( ETotR, ETotR_e, modelEval->BMEnergiesR[i].val_fx, BMEnergiesR_e, &ETotR_e ); +#else /* OPT_MASA_DEC_V1_NBE */ + temp1 = W_add( temp1, modelEval->BMEnergiesL[i].val_fx ); // BMEnergiesL_e + temp2 = W_add( temp2, modelEval->BMEnergiesR[i].val_fx ); // BMEnergiesR_e +#endif /* OPT_MASA_DEC_V1_NBE */ } +#ifdef OPT_MASA_DEC_V1_NBE + ETotL_e = W_norm( temp1 ); + ETotL_e = sub( ETotL_e, 32 ); + ETotL = W_shl_sat_l( temp1, ETotL_e ); + ETotL_e = sub( BMEnergiesL_e, ETotL_e ); + + ETotR_e = W_norm( temp2 ); + ETotR_e = sub( ETotR_e, 32 ); + ETotR = W_shl_sat_l( temp2, ETotR_e ); + ETotR_e = sub( BMEnergiesR_e, ETotR_e ); +#endif /* OPT_MASA_DEC_V1_NBE */ /* Number of basis components actually used. */ p = s_min( HRTF_MODEL_N_CPTS_VAR[iSec], qp ); SkipSmallest_ValueIndex_fx( modelEval->UseIndsL, modelEval->BMEnergiesL, qp, sub( qp, p ) ); SkipSmallest_ValueIndex_fx( modelEval->UseIndsR, modelEval->BMEnergiesR, qp, sub( qp, p ) ); +#ifndef OPT_MASA_DEC_V1_NBE /* Account for lost energy */ FOR( i = 0; i < p; i++ ) { ESynL = BASOP_Util_Add_Mant32Exp( ESynL, ESynL_e, modelEval->BMEnergiesL[modelEval->UseIndsL[i]].val_fx, BMEnergiesL_e, &ESynL_e ); ESynR = BASOP_Util_Add_Mant32Exp( ESynR, ESynR_e, modelEval->BMEnergiesR[modelEval->UseIndsR[i]].val_fx, BMEnergiesR_e, &ESynR_e ); } +#else /* OPT_MASA_DEC_V1_NBE */ + temp1 = 0; + move64(); + temp2 = 0; + move64(); + + /* Account for lost energy */ + FOR( i = 0; i < p; i++ ) + { + temp1 = W_add( temp1, modelEval->BMEnergiesL[modelEval->UseIndsL[i]].val_fx ); // BMEnergiesL_e + temp2 = W_add( temp2, modelEval->BMEnergiesR[modelEval->UseIndsR[i]].val_fx ); // BMEnergiesR_e + } + ESynL_e = W_norm( temp1 ); + ESynL_e = sub( ESynL_e, 32 ); + ESynL = W_shl_sat_l( temp1, ESynL_e ); + ESynL_e = sub( BMEnergiesL_e, ESynL_e ); + + ESynR_e = W_norm( temp2 ); + ESynR_e = sub( ESynR_e, 32 ); + ESynR = W_shl_sat_l( temp2, ESynR_e ); + ESynR_e = sub( BMEnergiesR_e, ESynR_e ); + +#endif /* OPT_MASA_DEC_V1_NBE */ tmp32 = L_deposit_h( BASOP_Util_Divide3232_Scale( ETotL, ESynL, &ScaleL_e ) ); ScaleL_e = add( ScaleL_e, sub( ETotL_e, ESynL_e ) ); ScaleL = Sqrt32( tmp32, &ScaleL_e ); +#ifdef OPT_MASA_DEC_V1_NBE + ScaleL_e = sub( ScaleL_e, 1 ); +#endif /* OPT_MASA_DEC_V1_NBE */ tmp32 = L_deposit_h( BASOP_Util_Divide3232_Scale( ETotR, ESynR, &ScaleR_e ) ); ScaleR_e = add( ScaleR_e, sub( ETotR_e, ESynR_e ) ); ScaleR = Sqrt32( tmp32, &ScaleR_e ); +#ifdef OPT_MASA_DEC_V1_NBE + ScaleR_e = sub( ScaleR_e, 1 ); +#endif /* OPT_MASA_DEC_V1_NBE */ /* Build using only the most energetic components. */ FOR( k = model->iSecFirst[iSec]; k <= model->iSecLast[iSec]; k++ ) { +#ifndef OPT_MASA_DEC_V1_NBE modelEval->hrfModL_fx[k] = 0; move32(); modelEval->hrfModR_fx[k] = 0; @@ -337,6 +412,7 @@ static void GenerateFilter_fx( tmp_hrfModR_e = 0; move16(); + Word32 index; FOR( i = 0; i < p; i++ ) { index = L_add( BM_idx[modelEval->BMEnergiesL[modelEval->UseIndsL[i]].i], imult3216( model->AlphaN, k ) ); @@ -348,6 +424,42 @@ static void GenerateFilter_fx( modelEval->hrfModR_fx[k] = BASOP_Util_Add_Mant32Exp( modelEval->hrfModR_fx[k], tmp_hrfModR_e, tmp32, add( model->AlphaR_e, 1 ), &tmp_hrfModR_e ); move32(); } +#else /* OPT_MASA_DEC_V1_NBE */ + temp1 = 0; + move64(); + temp2 = 0; + move64(); + + FOR( i = 0; i < p; i++ ) + { + temp1 = W_add( temp1, Mpy_32_32( modelEval->BM_fx[modelEval->BMEnergiesL[modelEval->UseIndsL[i]].i], model->AlphaL_fx[BM_idx[modelEval->BMEnergiesL[modelEval->UseIndsL[i]].i] + ( model->AlphaN * k )] ) ); // add(model->AlphaL_e, 1) + temp2 = W_add( temp2, Mpy_32_32( modelEval->BM_fx[modelEval->BMEnergiesR[modelEval->UseIndsR[i]].i], model->AlphaR_fx[BM_idx[modelEval->BMEnergiesR[modelEval->UseIndsR[i]].i] + ( model->AlphaN * k )] ) ); // add(model->AlphaR_e, 1) + } + + tmp_hrfModL_e = W_norm( temp1 ); + tmp_hrfModL_e = sub( tmp_hrfModL_e, 32 ); + modelEval->hrfModL_fx[k] = W_shl_sat_l( temp1, tmp_hrfModL_e ); + move32(); + tmp_hrfModL_e = sub( expL, tmp_hrfModL_e ); + if ( temp1 == 0 ) + { + tmp_hrfModL_e = 0; + move16(); + } + + tmp_hrfModR_e = W_norm( temp2 ); + tmp_hrfModR_e = sub( tmp_hrfModR_e, 32 ); + modelEval->hrfModR_fx[k] = W_shl_sat_l( temp2, tmp_hrfModR_e ); + move32(); + tmp_hrfModR_e = sub( expR, tmp_hrfModR_e ); + + if ( temp2 == 0 ) + { + tmp_hrfModR_e = 0; + move16(); + } + +#endif /* OPT_MASA_DEC_V1_NBE */ /* Account for lost energy */ modelEval->hrfModL_fx[k] = Mpy_32_32( modelEval->hrfModL_fx[k], ScaleL ); move32(); @@ -355,8 +467,13 @@ static void GenerateFilter_fx( move32(); /* NOTE: Assuming that finally, hrfMod values will be <= 1. Hence making it Q30 */ +#ifdef OPT_MASA_DEC_V1_NBE + modelEval->hrfModL_fx[k] = L_shl( modelEval->hrfModL_fx[k], add( tmp_hrfModL_e, ScaleL_e ) ); // assuming Q30 + modelEval->hrfModR_fx[k] = L_shl( modelEval->hrfModR_fx[k], add( tmp_hrfModR_e, ScaleR_e ) ); // assuming Q30 +#else /* OPT_MASA_DEC_V1_NBE */ modelEval->hrfModL_fx[k] = L_shl( modelEval->hrfModL_fx[k], sub( add( tmp_hrfModL_e, ScaleL_e ), 1 ) ); // assuming Q30 modelEval->hrfModR_fx[k] = L_shl( modelEval->hrfModR_fx[k], sub( add( tmp_hrfModR_e, ScaleR_e ), 1 ) ); // assuming Q30 +#endif /* OPT_MASA_DEC_V1_NBE */ // move32(); move32(); } diff --git a/lib_rend/ivas_objectRenderer_sfx_fx.c b/lib_rend/ivas_objectRenderer_sfx_fx.c index a570273cf..ef5fa93e4 100644 --- a/lib_rend/ivas_objectRenderer_sfx_fx.c +++ b/lib_rend/ivas_objectRenderer_sfx_fx.c @@ -292,7 +292,9 @@ void TDREND_firfilt_fx( Word32 step_fx /* Q31 */, gain_tmp_fx /* Q31 */, gain_delta_fx /* Q30 */; Word16 tmp_e; Word64 tmp64_fx; - +#ifdef OPT_MASA_DEC_V1_NBE + Word16 shift = sub( filter_e, 32 ); +#endif /* OPT_MASA_DEC_V1_NBE */ gain_delta_fx = L_sub( Gain_fx, prevGain_fx ); // Q30 step_fx = L_deposit_h( BASOP_Util_Divide3232_Scale( gain_delta_fx, subframe_length, &tmp_e ) ); // exp(tmp_e) tmp_e = sub( tmp_e, Q30 ); @@ -306,12 +308,18 @@ void TDREND_firfilt_fx( Copy32( signal_fx + add( sub( subframe_length, filterlength ), 1 ), mem_fx, sub( filterlength, 1 ) ); /* Update memory for next frame */ // Qx /* Convolution */ +#ifdef OPT_MASA_DEC_V1_NBE + FOR( i = 0; i < intp_count; i++ ) +#else /* OPT_MASA_DEC_V1_NBE */ FOR( i = 0; i < subframe_length; i++ ) +#endif /* OPT_MASA_DEC_V1_NBE */ { tmp64_fx = 0; move64(); +#ifndef OPT_MASA_DEC_V1_NBE tmp_e = 0; move16(); +#endif /* OPT_MASA_DEC_V1_NBE */ p_tmp_fx = p_signal_fx + i; // Qx p_filter_fx = filter_fx; // exp(filter_e) @@ -324,18 +332,50 @@ void TDREND_firfilt_fx( } // This is done to keep the output Q same as input Q for signal - tmp64_fx = W_shl( tmp64_fx, filter_e ); // Qx + 32 - tmp_fx = W_extract_h( tmp64_fx ); // Qx +#ifdef OPT_MASA_DEC_V1_NBE + tmp_fx = W_shl_sat_l( tmp64_fx, shift ); // Qx +#else /* OPT_MASA_DEC_V1_NBE */ + tmp64_fx = W_shl( tmp64_fx, filter_e ); // Qx + 32 + tmp_fx = W_extract_h( tmp64_fx ); // Qx +#endif /* OPT_MASA_DEC_V1_NBE */ /* Apply linear gain interpolation in case of abrupt gain changes */ gain_tmp_fx = L_add_sat( gain_tmp_fx, step_fx ); /* Saturating values which just exceeds 1, Q31*/ signal_fx[i] = Mpy_32_32( tmp_fx, gain_tmp_fx ); // Qx move32(); +#ifndef OPT_MASA_DEC_V1_NBE IF( LT_16( i, intp_count ) ) { v_add_fx( filter_fx, filter_delta_fx, filter_fx, filterlength ); // exp(filter_e) } } +#else /* OPT_MASA_DEC_V1_NBE */ + v_add_fx( filter_fx, filter_delta_fx, filter_fx, filterlength ); // exp(filter_e) + } + FOR( ; i < subframe_length; i++ ) + { + tmp64_fx = 0; + move64(); + p_tmp_fx = p_signal_fx + i; // Qx + p_filter_fx = filter_fx; // exp(filter_e) + + + FOR( j = 0; j < filterlength; j++ ) + { + tmp64_fx = W_mac_32_32( tmp64_fx, *p_filter_fx, *p_tmp_fx ); // Qx + (Q31 - filter_e) + 1 + p_filter_fx++; // exp(filter_e) + p_tmp_fx--; // Qx + } + + // This is done to keep the output Q same as input Q for signal + tmp_fx = W_shl_sat_l( tmp64_fx, shift ); // Qx + + /* Apply linear gain interpolation in case of abrupt gain changes */ + gain_tmp_fx = L_add_sat( gain_tmp_fx, step_fx ); /* Saturating values which just exceeds 1, Q31*/ + signal_fx[i] = Mpy_32_32( tmp_fx, gain_tmp_fx ); // Qx + move32(); + } +#endif /* OPT_MASA_DEC_V1_NBE */ return; } -- GitLab