From f4db1967eb63c8a491be62cbebdad2efee565cf8 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Mon, 30 Jun 2025 13:38:02 +0530 Subject: [PATCH] Optimizations for multi-channel functions - 2 --- lib_com/ivas_fb_mixer_fx.c | 30 ++++++++++++-- lib_com/ivas_tools_fx.c | 8 ++++ lib_com/options.h | 1 + lib_com/tns_base.c | 17 ++++++-- lib_enc/ivas_mcmasa_enc_fx.c | 80 ++++++++++++++++++++++++++++++++++++ 5 files changed, 129 insertions(+), 7 deletions(-) diff --git a/lib_com/ivas_fb_mixer_fx.c b/lib_com/ivas_fb_mixer_fx.c index 0d8a74514..c40f65ad6 100644 --- a/lib_com/ivas_fb_mixer_fx.c +++ b/lib_com/ivas_fb_mixer_fx.c @@ -614,13 +614,21 @@ void ivas_fb_mixer_get_windowed_fr_fx( Word16 n_new_samples; Word32 fr_in_block_fx[L_FRAME48k * 2]; const Word16 *win_ptr_fx; - +#ifdef OPT_MCT_ENC_V1_BE + Word16 two_mdft_len = shl( mdft_len, 1 ); + Word16 tmp = sub( shl( mdft_len, 1 ), length ); + Word16 gb_neg = negate( gb ); + + n_old_samples = s_min( ( sub( hFbMixer->fb_cfg->prior_input_length, hFbMixer->fb_cfg->windowed_fr_offset ) ), two_mdft_len ); + offset = sub( tmp, hFbMixer->ana_window_offset ); + rev_offset = sub( two_mdft_len, hFbMixer->ana_window_offset ); +#else n_old_samples = s_min( ( sub( hFbMixer->fb_cfg->prior_input_length, hFbMixer->fb_cfg->windowed_fr_offset ) ), ( shl( mdft_len, 1 ) ) ); - n_new_samples = s_max( 0, sub( shl( length, 1 ), n_old_samples ) ); offset = sub( sub( shl( mdft_len, 1 ), length ), hFbMixer->ana_window_offset ); rev_offset = sub( shl( mdft_len, 1 ), hFbMixer->ana_window_offset ); +#endif set32_fx( fr_in_block_fx, 0, offset ); - + n_new_samples = s_max( 0, sub( shl( length, 1 ), n_old_samples ) ); FOR( ch_idx = 0; ch_idx < nchan_fb_in; ch_idx++ ) { Copy32( &hFbMixer->ppFilterbank_prior_input_fx[ch_idx][offset + hFbMixer->fb_cfg->windowed_fr_offset], &fr_in_block_fx[offset], sub( n_old_samples, offset ) ); // Qx @@ -628,25 +636,41 @@ void ivas_fb_mixer_get_windowed_fr_fx( win_ptr_fx = hFbMixer->pAna_window_fx; /*Q15*/ +#ifdef OPT_MCT_ENC_V1_BE + FOR( j = offset; j < tmp; j++ ) +#else FOR( j = offset; j < sub( shl( mdft_len, 1 ), length ); j++ ) +#endif { fr_in_block_fx[j] = Mpy_32_16_1( fr_in_block_fx[j], ( *( win_ptr_fx++ ) ) ); // Qx + 15 - 15 = Qx move32(); } +#ifdef OPT_MCT_ENC_V1_BE + FOR( j = rev_offset; j < two_mdft_len; j++ ) +#else FOR( j = rev_offset; j < shl( mdft_len, 1 ); j++ ) +#endif { fr_in_block_fx[j] = Mpy_32_16_1( fr_in_block_fx[j], ( *( --win_ptr_fx ) ) ); // Qx + 15 - 15 = Qx move32(); } +#ifdef OPT_MCT_ENC_V1_BE + scale_sig32( fr_in_block_fx, two_mdft_len, gb_neg ); +#else FOR( Word16 i = 0; i < shl( mdft_len, 1 ); i++ ) { fr_in_block_fx[i] = L_shr( fr_in_block_fx[i], gb ); // Qx - gb move32(); } +#endif +#ifdef OPT_MCT_ENC_V1_BE + ivas_mdft_fx( fr_in_block_fx, frame_f_real_fx[ch_idx], frame_f_imag_fx[ch_idx], two_mdft_len, mdft_len ); +#else ivas_mdft_fx( fr_in_block_fx, frame_f_real_fx[ch_idx], frame_f_imag_fx[ch_idx], shl( mdft_len, 1 ), mdft_len ); +#endif } return; diff --git a/lib_com/ivas_tools_fx.c b/lib_com/ivas_tools_fx.c index ca8dec276..e334a8cda 100644 --- a/lib_com/ivas_tools_fx.c +++ b/lib_com/ivas_tools_fx.c @@ -1955,7 +1955,11 @@ void v_multc_acc_32_16( FOR( i = 0; i < N; i++ ) { +#ifdef OPT_MCT_ENC_V1_BE + y[i] = Madd_32_16( y[i], x[i], c ); +#else y[i] = L_add( y[i], Mpy_32_16_1( x[i], c ) ); +#endif move32(); } @@ -1972,7 +1976,11 @@ void v_multc_acc_32_32( FOR( i = 0; i < N; i++ ) { +#ifdef OPT_MCT_ENC_V1_BE + y[i] = Madd_32_32( y[i], x[i], c ); /*Qx*/ +#else y[i] = L_add( y[i], Mpy_32_32( x[i], c ) ); /*Qx*/ +#endif move32(); } diff --git a/lib_com/options.h b/lib_com/options.h index 4fcc4a03e..1a9f8ab84 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -78,6 +78,7 @@ /* Note: each compile switch (FIX_1101_...) is independent from the other ones */ #define OPT_MCT_ENC_V1_NBE +#define OPT_MCT_ENC_V1_BE #define OPT_SBA_REND_V1_BE #define OPT_HEAD_ROT_REND_V1_BE #define OPT_SBA_DEC_V2_BE diff --git a/lib_com/tns_base.c b/lib_com/tns_base.c index 5e18430be..376e2813d 100644 --- a/lib_com/tns_base.c +++ b/lib_com/tns_base.c @@ -1207,17 +1207,26 @@ static Word32 FIRLattice( move32(); FOR( i = 0; i < order - 1; i++ ) { - tmp = L_add( state[i], Mpy_32_16_1( x, parCoeff[i] ) ); /*Q0*/ - x = L_add( x, Mpy_32_16_1( state[i], parCoeff[i] ) ); /* exponent: 31+0 */ - state[i] = tmpSave; /*Q0*/ +#ifdef OPT_MCT_ENC_V1_BE + tmp = Madd_32_16( state[i], x, parCoeff[i] ); /*Q0*/ + x = Madd_32_16( x, state[i], parCoeff[i] ); /* exponent: 31+0 */ +#else + tmp = L_add( state[i], Mpy_32_16_1( x, parCoeff[i] ) ); /*Q0*/ + x = L_add( x, Mpy_32_16_1( state[i], parCoeff[i] ) ); /* exponent: 31+0 */ +#endif + state[i] = tmpSave; /*Q0*/ move32(); tmpSave = tmp; /*Q0*/ move32(); } /* last stage: only need half operations */ +#ifdef OPT_MCT_ENC_V1_BE + x = Madd_32_16( x, state[order - 1], parCoeff[order - 1] ); /*Q0*/ +#else x = L_add( x, Mpy_32_16_1( state[order - 1], parCoeff[order - 1] ) ); /*Q0*/ - state[order - 1] = tmpSave; /*Q0*/ +#endif + state[order - 1] = tmpSave; /*Q0*/ move32(); return x; /*Q0*/ diff --git a/lib_enc/ivas_mcmasa_enc_fx.c b/lib_enc/ivas_mcmasa_enc_fx.c index 7d261d4e4..fb4ea608a 100644 --- a/lib_enc/ivas_mcmasa_enc_fx.c +++ b/lib_enc/ivas_mcmasa_enc_fx.c @@ -2156,6 +2156,85 @@ static void compute_cov_mtx_fx( return; } +#ifdef OPT_MCT_ENC_V1_BE +static void computeIntensityVector_enc_fx( + const Word16 *band_grouping, + Word32 Cldfb_RealBuffer[FOA_CHANNELS][DIRAC_NO_FB_BANDS_MAX], /*inp_q*/ + Word32 Cldfb_ImagBuffer[FOA_CHANNELS][DIRAC_NO_FB_BANDS_MAX], /*inp_q*/ + const Word16 enc_param_start_band, /* i : first band to process */ + const Word16 num_frequency_bands, + Word32 intensity_real[DIRAC_NUM_DIMS][MASA_FREQUENCY_BANDS], /*exp: exp_intensity_real*/ + Word16 q_intensity_real[MASA_FREQUENCY_BANDS], + Word16 inp_q ) +{ + Word16 i, j; + Word32 real, img; + Word16 brange[2]; + Word16 shift_value = add( shl( inp_q, 1 ), 1 ); + Word16 tmp_norm; + FOR( i = 0; i < num_frequency_bands; i++ ) + { + brange[0] = band_grouping[i + enc_param_start_band]; /* Q0 */ + move16(); + brange[1] = band_grouping[i + enc_param_start_band + 1]; /* Q0 */ + move16(); + Word16 num_bins = sub( brange[1], brange[0] ); + Word16 gb = find_guarded_bits_fx( num_bins ); + Word16 norm; + + Word64 tmp_1 = 0, tmp_2 = 0, tmp_3 = 0; + move64(); + move64(); + move64(); + + FOR( j = brange[0]; j < brange[1]; j++ ) + { + real = Cldfb_RealBuffer[0][j]; + move32(); + img = Cldfb_ImagBuffer[0][j]; + move32(); + Word64 t1, t2, t3; + t1 = W_mac_32_32( W_mult_32_32( Cldfb_RealBuffer[3][j], real ), Cldfb_ImagBuffer[3][j], img ); /* 2 * q_cldfb + 1 */ + t2 = W_mac_32_32( W_mult_32_32( Cldfb_RealBuffer[1][j], real ), Cldfb_ImagBuffer[1][j], img ); /* 2 * q_cldfb + 1 */ + t3 = W_mac_32_32( W_mult_32_32( Cldfb_RealBuffer[2][j], real ), Cldfb_ImagBuffer[2][j], img ); /* 2 * q_cldfb + 1 */ + t1 = W_shr( t1, gb ); + t2 = W_shr( t2, gb ); + t3 = W_shr( t3, gb ); + /* Intensity is XYZ order, audio is WYZX order. */ + tmp_1 = W_add( tmp_1, t1 ); /* 2 * q_cldfb + 1 */ + tmp_2 = W_add( tmp_2, t2 ); /* 2 * q_cldfb + 1 */ + tmp_3 = W_add( tmp_3, t3 ); /* 2 * q_cldfb + 1 */ + } + norm = 63; + move16(); + tmp_norm = W_norm( tmp_1 ); + if ( tmp_1 != 0 ) + { + norm = s_min( norm, tmp_norm ); + } + tmp_norm = W_norm( tmp_2 ); + if ( tmp_2 != 0 ) + { + norm = s_min( norm, tmp_norm ); + } + tmp_norm = W_norm( tmp_3 ); + if ( tmp_3 != 0 ) + { + norm = s_min( norm, tmp_norm ); + } + norm = sub( norm, 32 ); + intensity_real[0][i] = W_shl_sat_l( tmp_1, norm ); // shift_value - (gb - norm) + move32(); + intensity_real[1][i] = W_shl_sat_l( tmp_2, norm ); // shift_value - (gb - norm) + move32(); + intensity_real[2][i] = W_shl_sat_l( tmp_3, norm ); // shift_value - (gb - norm) + q_intensity_real[i] = sub( shift_value, sub( gb, norm ) ); + move16(); + } + + return; +} +#else static void computeIntensityVector_enc_fx( const Word16 *band_grouping, Word32 Cldfb_RealBuffer[FOA_CHANNELS][DIRAC_NO_FB_BANDS_MAX], /*inp_q*/ @@ -2240,6 +2319,7 @@ static void computeIntensityVector_enc_fx( return; } +#endif static void computeVerticalDiffuseness_fx( Word32 **buffer_intensity, /* i : Intensity vectors */ -- GitLab