Commit d5b43b02 authored by Sandesh Venkatesh's avatar Sandesh Venkatesh
Browse files

Merge branch 'opti_ivas_calculate_abs_fr_fx' into 'main'

Optimizations in ivas_calculate_abs_fr_fx function [allow regression]

See merge request !1022
parents eee7fdb4 6b2189c8
Loading
Loading
Loading
Loading
Loading
+35 −43
Original line number Diff line number Diff line
@@ -915,7 +915,10 @@ static Word16 ivas_calculate_abs_fr_fx(
    move16();
    Word16 idx_short_stride_bin_to_band = 0;
    move16();
    Word16 quo, tmp, exp_diff;

    Word32 temp = Mpy_32_32( sampling_rate, 42949673 /* FRAMES_PER_SEC in Q31 */ );
    frame_len = extract_l( temp );

    FOR( i = 0; i < bands; i++ )
    {
@@ -932,13 +935,11 @@ static Word16 ivas_calculate_abs_fr_fx(
        Word32 short_stride_pow_spec_fx[MDFT_FB_BANDS_240];
        Word32 short_stride_nrg_fx = 0;
        move16();
        Word16 exp_diff = 0, tmp;
        exp_diff = 0;
        move16();

        Word32 cldfb_nrg_fx = 0;
        Word16 cldfb_nrg_e = 0;
        move16();
        move16();
        Word64 cldfb_nrg_fx = 0;
        move64();
        Word16 short_stride = pFb->fb_bin_to_band.short_stride;
        move16();
        Word32 res_dec1, res_frac, res_dec2;
@@ -959,38 +960,24 @@ static Word16 ivas_calculate_abs_fr_fx(
        {

            Word32 sq_abs_fx;
            Word16 sq_abs_e;

            // Word32 real = L_shr( *long_mdft_ptr_re_fx, 3 ); // Q27
            Word32 real = *long_mdft_ptr_re_fx; // Q30
            move32();
            // Word32 imag = L_shr( *long_mdft_ptr_im_fx, 3 ); // Q27
            Word32 imag = *long_mdft_ptr_im_fx; // Q30

            Word16 real_exp, imag_exp;
            move32();

            Word32 real_sq, imag_sq;

            real_sq = Mpy_32_32( real, real ); // Q30 + Q30 - 31 = Q29
            real_exp = 2;
            move32();
            imag_sq = Mpy_32_32( imag, imag ); // Q30 + Q30 - 31 = Q29
            imag_exp = 2;
            move32();

            sq_abs_fx = BASOP_Util_Add_Mant32Exp( real_sq, real_exp, imag_sq, imag_exp, &sq_abs_e ); // Q(31 - sq_abs_e)

            Word64 acc = W_mac_32_32( W_mult_32_32( real, real ), imag, imag ); // Q61
            sq_abs_fx = W_extract_h( acc );                                     // Q28
            long_mdft_ptr_re_fx++;
            long_mdft_ptr_im_fx++;

            /* accumulate bin energies within a short stride bin */

            short_stride_nrg_fx = L_add( short_stride_nrg_fx, L_shl( sq_abs_fx, sub( Q22, sub( Q31, sq_abs_e ) ) ) );
            short_stride_nrg_fx = L_add( short_stride_nrg_fx, L_shl( sq_abs_fx, sub( Q22, sub( Q31, sq_abs_e ) ) ) ); // Q(31 - sq_abs_e) -> Q22
            short_stride_nrg_fx = L_add( short_stride_nrg_fx, L_shr( sq_abs_fx, 6 ) ); // Q22
            move32();

            IF( !( ( j + 1 ) % num_bins_per_short_stride_bin ) )
            IF( !( add( j, 1 ) % num_bins_per_short_stride_bin ) )
            {
                /* new short stride bin */
                short_stride_pow_spec_fx[j / num_bins_per_short_stride_bin] = short_stride_nrg_fx; /* energy rather than magnitude works better for covariance weighting*/
@@ -1001,33 +988,38 @@ static Word16 ivas_calculate_abs_fr_fx(
            }

            /* accumulate bin energies within a CLDFB band */
            cldfb_nrg_fx = BASOP_Util_Add_Mant32Exp( cldfb_nrg_fx, cldfb_nrg_e, sq_abs_fx, sq_abs_e, &cldfb_nrg_e );
            cldfb_nrg_fx = W_mac_32_32( cldfb_nrg_fx, sq_abs_fx, 1 ); // Q29

            IF( !( ( j + 1 ) % num_bins_per_cldfb_band ) )
            IF( !( add( j, 1 ) % num_bins_per_cldfb_band ) )
            {
                Word32 temp = Sqrt32( cldfb_nrg_fx, &cldfb_nrg_e );
                temp = L_shl( temp, sub( cldfb_nrg_e, Q9 ) );                                                 // Q22
                Word16 exp = W_norm( cldfb_nrg_fx );
                cldfb_nrg_fx = W_shl( cldfb_nrg_fx, exp );
                exp = sub( 34, exp ); // 31 - (Q29 + exp -32)
                temp = Sqrt32( W_extract_h( cldfb_nrg_fx ), &exp );
                temp = L_shl( temp, sub( exp, Q9 ) );                                                         // Q22
                pFb->fb_bin_to_band.pp_cldfb_weights_per_spar_band_fx[j / num_bins_per_cldfb_band][i] = temp; // Q22
                move32();
                cldfb_nrg_fx = 0;
                move32();
                cldfb_nrg_e = 0;
                move16();
            }
        }

        quo = BASOP_Util_Divide3232_Scale( ONE_IN_Q30, short_stride_max_per_spar_band_fx, &exp_diff );
        /* Q of quo = Q30 - Q22 + (15 - exp_diff) --> Q23 - exp_diff.
        With Mult_32_16, Q23 - exp_diff - 15 --> Q8 - exp_diff */
        exp_diff = sub( Q8, exp_diff );

        /*loop over the short MDFT bins*/
        FOR( j = 0; j < short_stride; j++ )
        {
            tmp = BASOP_Util_Divide3232_Scale( short_stride_pow_spec_fx[j], short_stride_max_per_spar_band_fx, &exp_diff );
            short_stride_pow_spec_fx[j] = L_shl( L_deposit_l( tmp ), add( Q7, exp_diff ) ); // Q22
            short_stride_pow_spec_fx[j] = L_shr( Mult_32_16( short_stride_pow_spec_fx[j], quo ), exp_diff ); // Q22
            move32();
            short_stride_pow_spec_fx[j] = L_max( L_sub( short_stride_pow_spec_fx[j], 1258291 ), 0 ); // 0.3f * ONE_IN_Q22
            move32();
            tmp = BASOP_Util_Divide3232_Scale( short_stride_pow_spec_fx[j], 2936012, &exp_diff ); // 0.7f * ONE_IN_Q22
            short_stride_pow_spec_fx[j] = L_shl( L_deposit_l( tmp ), add( Q7, exp_diff ) );       // Q22
            short_stride_pow_spec_fx[j] = L_shl( Mpy_32_32( short_stride_pow_spec_fx[j], 1533916891 /* 1/0.7 in Q30 */ ), 1 ); // Q22
            move32();


            IF( short_stride_pow_spec_fx[j] > 0 )
            {
                assert( idx_short_stride_bin_to_band < 2 * MDFT_FB_BANDS_240 ); /* array size of p_short_stride_bin_to_band */
@@ -1083,6 +1075,13 @@ static Word16 ivas_calculate_abs_fr_fx(
        sum_over_spar_bands_fx = L_max( sum_over_spar_bands_fx, EPSILON_FX ); // Q22
        move32();

        exp_diff = 0;
        move16();
        tmp = BASOP_Util_Divide3232_Scale( ONE_IN_Q30, sum_over_spar_bands_fx, &exp_diff );
        /* Q of quo = Q30 - Q22 + (15 - exp_diff) --> Q23 - exp_diff.
        With Mult_32_16, Q23 - exp_diff - 15 --> Q8 - exp_diff */
        exp_diff = sub( Q8, exp_diff );

        FOR( i = 0; i < bands; i++ )
        {
            test();
@@ -1096,20 +1095,13 @@ static Word16 ivas_calculate_abs_fr_fx(
                move16();
            }

            Word16 exp_diff = 0;
            move16();
            Word16 tmp = BASOP_Util_Divide3232_Scale( pFb->fb_bin_to_band.pp_cldfb_weights_per_spar_band_fx[j][i], sum_over_spar_bands_fx, &exp_diff );
            pFb->fb_bin_to_band.pp_cldfb_weights_per_spar_band_fx[j][i] = L_shl( L_deposit_l( tmp ), add( Q7, exp_diff ) ); // Q22
            pFb->fb_bin_to_band.pp_cldfb_weights_per_spar_band_fx[j][i] = L_shr( Mult_32_16( pFb->fb_bin_to_band.pp_cldfb_weights_per_spar_band_fx[j][i], tmp ), exp_diff );
            move32();
        }
        pFb->fb_bin_to_band.p_spar_start_bands[j] = spar_start;
        move16();
    }

    Word16 exp;
    frame_len = BASOP_Util_Divide3232_Scale( sampling_rate, FRAMES_PER_SEC, &exp );
    frame_len = shr( frame_len, sub( 15, exp ) );

    set32_fx( ppFilterbank_FRs_s_fx, 0, frame_len );

    /*Commented logic is for calculating number of active bands, can be removed if not needed */
@@ -1136,7 +1128,7 @@ static Word16 ivas_calculate_abs_fr_fx(
            Word32 temp_fx = 0;
            move32();

            Word16 exp_diff = 0;
            exp_diff = 0;
            move16();
            Word32 real = L_shr( *pFilterbank_bin_to_band_re_fx, 3 ); // Q27
            Word32 imag = L_shr( *pFilterbank_bin_to_band_im_fx, 3 ); // Q27
@@ -1195,7 +1187,7 @@ static Word16 ivas_calculate_abs_fr_fx(
        {
            Word16 abs_active_bins = pFb->fb_bin_to_band.pFb_active_bins_per_band[j];
            Word16 abs_start_offset = pFb->fb_bin_to_band.pFb_start_bin_per_band[j];
            Word16 exp_diff = 0, tmp;
            exp_diff = 0;

            move16();
            move16();