From 6b2189c8ca6d8559780a61a473ff609d28deff1e Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Fri, 24 Jan 2025 14:31:56 +0530 Subject: [PATCH] Optimizations in ivas_calculate_abs_fr_fx function [x] Avg WMOPS value reduced from 23.989 to 13.397 --- lib_com/ivas_fb_mixer.c | 78 ++++++++++++++++++----------------------- 1 file changed, 35 insertions(+), 43 deletions(-) diff --git a/lib_com/ivas_fb_mixer.c b/lib_com/ivas_fb_mixer.c index 78603c1db..53aee57e3 100644 --- a/lib_com/ivas_fb_mixer.c +++ b/lib_com/ivas_fb_mixer.c @@ -915,7 +915,10 @@ static Word16 ivas_calculate_abs_fr_fx( move16(); Word16 idx_short_stride_bin_to_band = 0; move16(); + Word16 quo, tmp, exp_diff; + Word32 temp = Mpy_32_32( sampling_rate, 42949673 /* FRAMES_PER_SEC in Q31 */ ); + frame_len = extract_l( temp ); FOR( i = 0; i < bands; i++ ) { @@ -932,13 +935,11 @@ static Word16 ivas_calculate_abs_fr_fx( Word32 short_stride_pow_spec_fx[MDFT_FB_BANDS_240]; Word32 short_stride_nrg_fx = 0; move16(); - Word16 exp_diff = 0, tmp; + exp_diff = 0; move16(); - Word32 cldfb_nrg_fx = 0; - Word16 cldfb_nrg_e = 0; - move16(); - move16(); + Word64 cldfb_nrg_fx = 0; + move64(); Word16 short_stride = pFb->fb_bin_to_band.short_stride; move16(); Word32 res_dec1, res_frac, res_dec2; @@ -959,38 +960,24 @@ static Word16 ivas_calculate_abs_fr_fx( { Word32 sq_abs_fx; - Word16 sq_abs_e; // Word32 real = L_shr( *long_mdft_ptr_re_fx, 3 ); // Q27 Word32 real = *long_mdft_ptr_re_fx; // Q30 move32(); // Word32 imag = L_shr( *long_mdft_ptr_im_fx, 3 ); // Q27 Word32 imag = *long_mdft_ptr_im_fx; // Q30 - - Word16 real_exp, imag_exp; - move32(); - - Word32 real_sq, imag_sq; - - real_sq = Mpy_32_32( real, real ); // Q30 + Q30 - 31 = Q29 - real_exp = 2; - move32(); - imag_sq = Mpy_32_32( imag, imag ); // Q30 + Q30 - 31 = Q29 - imag_exp = 2; move32(); - - sq_abs_fx = BASOP_Util_Add_Mant32Exp( real_sq, real_exp, imag_sq, imag_exp, &sq_abs_e ); // Q(31 - sq_abs_e) - + Word64 acc = W_mac_32_32( W_mult_32_32( real, real ), imag, imag ); // Q61 + sq_abs_fx = W_extract_h( acc ); // Q28 long_mdft_ptr_re_fx++; long_mdft_ptr_im_fx++; /* accumulate bin energies within a short stride bin */ - short_stride_nrg_fx = L_add( short_stride_nrg_fx, L_shl( sq_abs_fx, sub( Q22, sub( Q31, sq_abs_e ) ) ) ); - short_stride_nrg_fx = L_add( short_stride_nrg_fx, L_shl( sq_abs_fx, sub( Q22, sub( Q31, sq_abs_e ) ) ) ); // Q(31 - sq_abs_e) -> Q22 + short_stride_nrg_fx = L_add( short_stride_nrg_fx, L_shr( sq_abs_fx, 6 ) ); // Q22 move32(); - IF( !( ( j + 1 ) % num_bins_per_short_stride_bin ) ) + IF( !( add( j, 1 ) % num_bins_per_short_stride_bin ) ) { /* new short stride bin */ short_stride_pow_spec_fx[j / num_bins_per_short_stride_bin] = short_stride_nrg_fx; /* energy rather than magnitude works better for covariance weighting*/ @@ -1001,33 +988,38 @@ static Word16 ivas_calculate_abs_fr_fx( } /* accumulate bin energies within a CLDFB band */ - cldfb_nrg_fx = BASOP_Util_Add_Mant32Exp( cldfb_nrg_fx, cldfb_nrg_e, sq_abs_fx, sq_abs_e, &cldfb_nrg_e ); + cldfb_nrg_fx = W_mac_32_32( cldfb_nrg_fx, sq_abs_fx, 1 ); // Q29 - IF( !( ( j + 1 ) % num_bins_per_cldfb_band ) ) + IF( !( add( j, 1 ) % num_bins_per_cldfb_band ) ) { - Word32 temp = Sqrt32( cldfb_nrg_fx, &cldfb_nrg_e ); - temp = L_shl( temp, sub( cldfb_nrg_e, Q9 ) ); // Q22 + Word16 exp = W_norm( cldfb_nrg_fx ); + cldfb_nrg_fx = W_shl( cldfb_nrg_fx, exp ); + exp = sub( 34, exp ); // 31 - (Q29 + exp -32) + temp = Sqrt32( W_extract_h( cldfb_nrg_fx ), &exp ); + temp = L_shl( temp, sub( exp, Q9 ) ); // Q22 pFb->fb_bin_to_band.pp_cldfb_weights_per_spar_band_fx[j / num_bins_per_cldfb_band][i] = temp; // Q22 move32(); cldfb_nrg_fx = 0; move32(); - cldfb_nrg_e = 0; - move16(); } } + quo = BASOP_Util_Divide3232_Scale( ONE_IN_Q30, short_stride_max_per_spar_band_fx, &exp_diff ); + /* Q of quo = Q30 - Q22 + (15 - exp_diff) --> Q23 - exp_diff. + With Mult_32_16, Q23 - exp_diff - 15 --> Q8 - exp_diff */ + exp_diff = sub( Q8, exp_diff ); + /*loop over the short MDFT bins*/ FOR( j = 0; j < short_stride; j++ ) { - tmp = BASOP_Util_Divide3232_Scale( short_stride_pow_spec_fx[j], short_stride_max_per_spar_band_fx, &exp_diff ); - short_stride_pow_spec_fx[j] = L_shl( L_deposit_l( tmp ), add( Q7, exp_diff ) ); // Q22 + short_stride_pow_spec_fx[j] = L_shr( Mult_32_16( short_stride_pow_spec_fx[j], quo ), exp_diff ); // Q22 move32(); short_stride_pow_spec_fx[j] = L_max( L_sub( short_stride_pow_spec_fx[j], 1258291 ), 0 ); // 0.3f * ONE_IN_Q22 move32(); - tmp = BASOP_Util_Divide3232_Scale( short_stride_pow_spec_fx[j], 2936012, &exp_diff ); // 0.7f * ONE_IN_Q22 - short_stride_pow_spec_fx[j] = L_shl( L_deposit_l( tmp ), add( Q7, exp_diff ) ); // Q22 + short_stride_pow_spec_fx[j] = L_shl( Mpy_32_32( short_stride_pow_spec_fx[j], 1533916891 /* 1/0.7 in Q30 */ ), 1 ); // Q22 move32(); + IF( short_stride_pow_spec_fx[j] > 0 ) { assert( idx_short_stride_bin_to_band < 2 * MDFT_FB_BANDS_240 ); /* array size of p_short_stride_bin_to_band */ @@ -1083,6 +1075,13 @@ static Word16 ivas_calculate_abs_fr_fx( sum_over_spar_bands_fx = L_max( sum_over_spar_bands_fx, EPSILON_FX ); // Q22 move32(); + exp_diff = 0; + move16(); + tmp = BASOP_Util_Divide3232_Scale( ONE_IN_Q30, sum_over_spar_bands_fx, &exp_diff ); + /* Q of quo = Q30 - Q22 + (15 - exp_diff) --> Q23 - exp_diff. + With Mult_32_16, Q23 - exp_diff - 15 --> Q8 - exp_diff */ + exp_diff = sub( Q8, exp_diff ); + FOR( i = 0; i < bands; i++ ) { test(); @@ -1096,20 +1095,13 @@ static Word16 ivas_calculate_abs_fr_fx( move16(); } - Word16 exp_diff = 0; - move16(); - Word16 tmp = BASOP_Util_Divide3232_Scale( pFb->fb_bin_to_band.pp_cldfb_weights_per_spar_band_fx[j][i], sum_over_spar_bands_fx, &exp_diff ); - pFb->fb_bin_to_band.pp_cldfb_weights_per_spar_band_fx[j][i] = L_shl( L_deposit_l( tmp ), add( Q7, exp_diff ) ); // Q22 + pFb->fb_bin_to_band.pp_cldfb_weights_per_spar_band_fx[j][i] = L_shr( Mult_32_16( pFb->fb_bin_to_band.pp_cldfb_weights_per_spar_band_fx[j][i], tmp ), exp_diff ); move32(); } pFb->fb_bin_to_band.p_spar_start_bands[j] = spar_start; move16(); } - Word16 exp; - frame_len = BASOP_Util_Divide3232_Scale( sampling_rate, FRAMES_PER_SEC, &exp ); - frame_len = shr( frame_len, sub( 15, exp ) ); - set32_fx( ppFilterbank_FRs_s_fx, 0, frame_len ); /*Commented logic is for calculating number of active bands, can be removed if not needed */ @@ -1136,7 +1128,7 @@ static Word16 ivas_calculate_abs_fr_fx( Word32 temp_fx = 0; move32(); - Word16 exp_diff = 0; + exp_diff = 0; move16(); Word32 real = L_shr( *pFilterbank_bin_to_band_re_fx, 3 ); // Q27 Word32 imag = L_shr( *pFilterbank_bin_to_band_im_fx, 3 ); // Q27 @@ -1195,7 +1187,7 @@ static Word16 ivas_calculate_abs_fr_fx( { Word16 abs_active_bins = pFb->fb_bin_to_band.pFb_active_bins_per_band[j]; Word16 abs_start_offset = pFb->fb_bin_to_band.pFb_start_bin_per_band[j]; - Word16 exp_diff = 0, tmp; + exp_diff = 0; move16(); move16(); -- GitLab