From be8a2c4ba52ccd9eda84a4cb6d664d9991686515 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Fri, 4 Jul 2025 21:37:22 +0530 Subject: [PATCH] Optimization changes for ivas_mcmasa_dmx_fx function - non bit exact --- lib_com/options.h | 1 + lib_enc/ivas_mcmasa_enc_fx.c | 64 ++++++++++++++++++++++++++++++++++-- 2 files changed, 62 insertions(+), 3 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index de20f6959..3c6148817 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -80,6 +80,7 @@ /* Note: each compile switch (FIX_1101_...) is independent from the other ones */ #define OPT_MCH_DEC_V1_NBE +#define OPT_MCT_ENC_48KB_NBE #define OPT_MCH_DEC_V1_BE #define OPT_MCT_ENC_V2_NBE #define OPT_SBA_DEC_V2_NBE diff --git a/lib_enc/ivas_mcmasa_enc_fx.c b/lib_enc/ivas_mcmasa_enc_fx.c index fb4ea608a..b24623003 100644 --- a/lib_enc/ivas_mcmasa_enc_fx.c +++ b/lib_enc/ivas_mcmasa_enc_fx.c @@ -1942,9 +1942,17 @@ static void ivas_mcmasa_dmx_fx( Word32 alpha_fx, L_tmp, L_tmp1; Word16 multiChEne_e, scale, downmixEne_e = 0, prevEQ_e, tmp, currEQ_e, instEQ_e; move16(); +#ifdef OPT_MCT_ENC_48KB_NBE + Word16 max_exp, tmp_exp, separateChannelFlag; + Word64 tmp_64; + Word64 multiChEne_64_fx = 0; + Word64 downmixEne_64_fx = 0; + move64(); + move64(); +#endif numAnalysisChannels = sub( nchan_inp, 1 ); - IF( hMcMasa->separateChannelEnabled ) + if ( hMcMasa->separateChannelEnabled ) { numAnalysisChannels = sub( nchan_inp, 2 ); } @@ -1957,22 +1965,45 @@ static void ivas_mcmasa_dmx_fx( { FOR( i = 0; i < input_frame; i++ ) { +#ifdef OPT_MCT_ENC_48KB_NBE + multiChEne_64_fx = W_mac_32_32( multiChEne_64_fx, data_fx[j][i], data_fx[j][i] ); // exp: 2*data_e +#else L_tmp1 = BASOP_Util_Add_Mant32Exp( data_fx[j][i], data_e, 0, 0, &scale ); L_tmp = Mpy_32_32( L_tmp1, L_tmp1 ); // data_e + data_e multiChEne_fx = BASOP_Util_Add_Mant32Exp( L_tmp, scale + scale, multiChEne_fx, multiChEne_e, &scale ); multiChEne_e = scale; move16(); +#endif } } - +#ifdef OPT_MCT_ENC_48KB_NBE + tmp = shl( data_e, 1 ); +#endif IF( EQ_16( nchan_transport, 2 ) ) { Word16 numSideChannels; /* Channels other than left, right, center */ Word16 leftIndex, rightIndex; +#ifdef OPT_MCT_ENC_48KB_NBE + Word16 tmp_16; + + separateChannelFlag = 1; + move16(); + if ( hMcMasa->separateChannelEnabled ) + { + separateChannelFlag = 0; + move16(); + } +#endif numSideChannels = sub( shr( numAnalysisChannels, 1 ), 1 ); FOR( j = 0; j < numSideChannels; j++ ) { +#ifdef OPT_MCT_ENC_48KB_NBE + tmp_16 = add( shl( j, 1 ), 2 ); + + leftIndex = add( tmp_16, separateChannelFlag ); + rightIndex = add( add( tmp_16, 1 ), separateChannelFlag ); +#else IF( hMcMasa->separateChannelEnabled ) { leftIndex = add( shl( j, 1 ), 2 ); @@ -1983,7 +2014,7 @@ static void ivas_mcmasa_dmx_fx( leftIndex = add( shl( j, 1 ), 3 ); rightIndex = add( shl( j, 1 ), 4 ); } - +#endif FOR( i = 0; i < input_frame; i++ ) { data_fx[0][i] = L_add( data_fx[0][i], data_fx[leftIndex][i] ); // data_e @@ -2023,15 +2054,28 @@ static void ivas_mcmasa_dmx_fx( { FOR( i = 0; i < input_frame; i++ ) { +#ifdef OPT_MCT_ENC_48KB_NBE + downmixEne_64_fx = W_mac_32_32( downmixEne_64_fx, data_fx[j][i], data_fx[j][i] ); // exp: 2*data_e +#else L_tmp1 = BASOP_Util_Add_Mant32Exp( data_fx[j][i], data_e, 0, 0, &scale ); L_tmp = Mpy_32_32( L_tmp1, L_tmp1 ); // data_e + data_e downmixEne_fx = BASOP_Util_Add_Mant32Exp( L_tmp, scale + scale, downmixEne_fx, downmixEne_e, &downmixEne_e ); +#endif } } alpha_fx = 214748364; // 0.1 in Q31 move32(); +#ifdef OPT_MCT_ENC_48KB_NBE + scale = W_norm( multiChEne_64_fx ); + multiChEne_fx = W_extract_h( W_shl( multiChEne_64_fx, scale ) ); + multiChEne_e = sub( tmp, scale ); + + scale = W_norm( downmixEne_64_fx ); + downmixEne_fx = W_extract_h( W_shl( downmixEne_64_fx, scale ) ); + downmixEne_e = sub( tmp, scale ); +#endif L_tmp = Mpy_32_32( alpha_fx, multiChEne_fx ); L_tmp1 = Mpy_32_32( 1932735284 /* 0.9f in Q31 */, hMcMasa->prevMultiChEne_fx ); hMcMasa->prevMultiChEne_fx = BASOP_Util_Add_Mant32Exp( L_tmp, multiChEne_e, L_tmp1, hMcMasa->prevMultiChEne_e, &hMcMasa->prevMultiChEne_e ); @@ -2056,12 +2100,26 @@ static void ivas_mcmasa_dmx_fx( hMcMasa->prevEQ_e = currEQ_e; move16(); +#ifdef OPT_MCT_ENC_48KB_NBE + max_exp = s_max( prevEQ_e, currEQ_e ); + prevEQ_fx = L_shl( prevEQ_fx, sub( prevEQ_e, max_exp ) ); // exp:max_exp + currEQ_fx = L_shl( currEQ_fx, sub( currEQ_e, max_exp ) ); // exp:max_exp + tmp_exp = add( max_exp, 16 ); +#endif + FOR( i = 0; i < input_frame; i++ ) { +#ifdef OPT_MCT_ENC_48KB_NBE + tmp_64 = W_mac_32_32( W_mult_32_16( currEQ_fx, hMcMasa->interpolator_fx[i] ), prevEQ_fx, L_sub( ONE_IN_Q15, hMcMasa->interpolator_fx[i] ) ); // exp:max_exp +16 + scale = W_norm( tmp_64 ); + instEQ_fx = W_extract_h( W_shl( tmp_64, scale ) ); + instEQ_e = sub( tmp_exp, scale ); +#else L_tmp = Mpy_32_32( L_deposit_h( hMcMasa->interpolator_fx[i] ), currEQ_fx ); L_tmp1 = L_sub( 1073741824 /* 1 in Q30 */, L_lshr( L_deposit_h( hMcMasa->interpolator_fx[i] ), 1 ) ); L_tmp1 = Mpy_32_32( L_tmp1, prevEQ_fx ); instEQ_fx = BASOP_Util_Add_Mant32Exp( L_tmp, currEQ_e, L_tmp1, add( prevEQ_e, 1 ), &instEQ_e ); +#endif FOR( j = 0; j < nchan_transport; j++ ) { -- GitLab