Commit 2843bc35 authored by Sandesh Venkatesh's avatar Sandesh Venkatesh
Browse files

Merge branch 'mc_enc_opt_be_2' into 'main'

Optimizations for multi-channel functions - 2

See merge request !1839
parents 433b9a91 f4db1967
Loading
Loading
Loading
Loading
+27 −3
Original line number Diff line number Diff line
@@ -614,13 +614,21 @@ void ivas_fb_mixer_get_windowed_fr_fx(
    Word16 n_new_samples;
    Word32 fr_in_block_fx[L_FRAME48k * 2];
    const Word16 *win_ptr_fx;

#ifdef OPT_MCT_ENC_V1_BE
    Word16 two_mdft_len = shl( mdft_len, 1 );
    Word16 tmp = sub( shl( mdft_len, 1 ), length );
    Word16 gb_neg = negate( gb );

    n_old_samples = s_min( ( sub( hFbMixer->fb_cfg->prior_input_length, hFbMixer->fb_cfg->windowed_fr_offset ) ), two_mdft_len );
    offset = sub( tmp, hFbMixer->ana_window_offset );
    rev_offset = sub( two_mdft_len, hFbMixer->ana_window_offset );
#else
    n_old_samples = s_min( ( sub( hFbMixer->fb_cfg->prior_input_length, hFbMixer->fb_cfg->windowed_fr_offset ) ), ( shl( mdft_len, 1 ) ) );
    n_new_samples = s_max( 0, sub( shl( length, 1 ), n_old_samples ) );
    offset = sub( sub( shl( mdft_len, 1 ), length ), hFbMixer->ana_window_offset );
    rev_offset = sub( shl( mdft_len, 1 ), hFbMixer->ana_window_offset );
#endif
    set32_fx( fr_in_block_fx, 0, offset );

    n_new_samples = s_max( 0, sub( shl( length, 1 ), n_old_samples ) );
    FOR( ch_idx = 0; ch_idx < nchan_fb_in; ch_idx++ )
    {
        Copy32( &hFbMixer->ppFilterbank_prior_input_fx[ch_idx][offset + hFbMixer->fb_cfg->windowed_fr_offset], &fr_in_block_fx[offset], sub( n_old_samples, offset ) ); // Qx
@@ -628,25 +636,41 @@ void ivas_fb_mixer_get_windowed_fr_fx(

        win_ptr_fx = hFbMixer->pAna_window_fx; /*Q15*/

#ifdef OPT_MCT_ENC_V1_BE
        FOR( j = offset; j < tmp; j++ )
#else
        FOR( j = offset; j < sub( shl( mdft_len, 1 ), length ); j++ )
#endif
        {
            fr_in_block_fx[j] = Mpy_32_16_1( fr_in_block_fx[j], ( *( win_ptr_fx++ ) ) ); // Qx + 15 - 15 = Qx
            move32();
        }

#ifdef OPT_MCT_ENC_V1_BE
        FOR( j = rev_offset; j < two_mdft_len; j++ )
#else
        FOR( j = rev_offset; j < shl( mdft_len, 1 ); j++ )
#endif
        {
            fr_in_block_fx[j] = Mpy_32_16_1( fr_in_block_fx[j], ( *( --win_ptr_fx ) ) ); // Qx + 15 - 15 = Qx
            move32();
        }

#ifdef OPT_MCT_ENC_V1_BE
        scale_sig32( fr_in_block_fx, two_mdft_len, gb_neg );
#else
        FOR( Word16 i = 0; i < shl( mdft_len, 1 ); i++ )
        {
            fr_in_block_fx[i] = L_shr( fr_in_block_fx[i], gb ); // Qx - gb
            move32();
        }
#endif

#ifdef OPT_MCT_ENC_V1_BE
        ivas_mdft_fx( fr_in_block_fx, frame_f_real_fx[ch_idx], frame_f_imag_fx[ch_idx], two_mdft_len, mdft_len );
#else
        ivas_mdft_fx( fr_in_block_fx, frame_f_real_fx[ch_idx], frame_f_imag_fx[ch_idx], shl( mdft_len, 1 ), mdft_len );
#endif
    }

    return;
+8 −0
Original line number Diff line number Diff line
@@ -1955,7 +1955,11 @@ void v_multc_acc_32_16(

    FOR( i = 0; i < N; i++ )
    {
#ifdef OPT_MCT_ENC_V1_BE
        y[i] = Madd_32_16( y[i], x[i], c );
#else
        y[i] = L_add( y[i], Mpy_32_16_1( x[i], c ) );
#endif
        move32();
    }

@@ -1972,7 +1976,11 @@ void v_multc_acc_32_32(

    FOR( i = 0; i < N; i++ )
    {
#ifdef OPT_MCT_ENC_V1_BE
        y[i] = Madd_32_32( y[i], x[i], c ); /*Qx*/
#else
        y[i] = L_add( y[i], Mpy_32_32( x[i], c ) ); /*Qx*/
#endif
        move32();
    }

+1 −0
Original line number Diff line number Diff line
@@ -78,6 +78,7 @@

/* Note: each compile switch (FIX_1101_...) is independent from the other ones */
#define OPT_MCT_ENC_V1_NBE
#define OPT_MCT_ENC_V1_BE
#define OPT_SBA_REND_V1_BE
#define OPT_HEAD_ROT_REND_V1_BE
#define OPT_SBA_DEC_V2_BE
+13 −4
Original line number Diff line number Diff line
@@ -1207,8 +1207,13 @@ static Word32 FIRLattice(
    move32();
    FOR( i = 0; i < order - 1; i++ )
    {
#ifdef OPT_MCT_ENC_V1_BE
        tmp = Madd_32_16( state[i], x, parCoeff[i] ); /*Q0*/
        x = Madd_32_16( x, state[i], parCoeff[i] );   /* exponent: 31+0 */
#else
        tmp = L_add( state[i], Mpy_32_16_1( x, parCoeff[i] ) );           /*Q0*/
        x = L_add( x, Mpy_32_16_1( state[i], parCoeff[i] ) );             /* exponent: 31+0 */
#endif
        state[i] = tmpSave; /*Q0*/
        move32();
        tmpSave = tmp; /*Q0*/
@@ -1216,7 +1221,11 @@ static Word32 FIRLattice(
    }

    /* last stage: only need half operations */
#ifdef OPT_MCT_ENC_V1_BE
    x = Madd_32_16( x, state[order - 1], parCoeff[order - 1] ); /*Q0*/
#else
    x = L_add( x, Mpy_32_16_1( state[order - 1], parCoeff[order - 1] ) ); /*Q0*/
#endif
    state[order - 1] = tmpSave; /*Q0*/
    move32();

+80 −0
Original line number Diff line number Diff line
@@ -2156,6 +2156,85 @@ static void compute_cov_mtx_fx(
    return;
}

#ifdef OPT_MCT_ENC_V1_BE
static void computeIntensityVector_enc_fx(
    const Word16 *band_grouping,
    Word32 Cldfb_RealBuffer[FOA_CHANNELS][DIRAC_NO_FB_BANDS_MAX], /*inp_q*/
    Word32 Cldfb_ImagBuffer[FOA_CHANNELS][DIRAC_NO_FB_BANDS_MAX], /*inp_q*/
    const Word16 enc_param_start_band,                            /* i  : first band to process */
    const Word16 num_frequency_bands,
    Word32 intensity_real[DIRAC_NUM_DIMS][MASA_FREQUENCY_BANDS], /*exp: exp_intensity_real*/
    Word16 q_intensity_real[MASA_FREQUENCY_BANDS],
    Word16 inp_q )
{
    Word16 i, j;
    Word32 real, img;
    Word16 brange[2];
    Word16 shift_value = add( shl( inp_q, 1 ), 1 );
    Word16 tmp_norm;
    FOR( i = 0; i < num_frequency_bands; i++ )
    {
        brange[0] = band_grouping[i + enc_param_start_band]; /* Q0 */
        move16();
        brange[1] = band_grouping[i + enc_param_start_band + 1]; /* Q0 */
        move16();
        Word16 num_bins = sub( brange[1], brange[0] );
        Word16 gb = find_guarded_bits_fx( num_bins );
        Word16 norm;

        Word64 tmp_1 = 0, tmp_2 = 0, tmp_3 = 0;
        move64();
        move64();
        move64();

        FOR( j = brange[0]; j < brange[1]; j++ )
        {
            real = Cldfb_RealBuffer[0][j];
            move32();
            img = Cldfb_ImagBuffer[0][j];
            move32();
            Word64 t1, t2, t3;
            t1 = W_mac_32_32( W_mult_32_32( Cldfb_RealBuffer[3][j], real ), Cldfb_ImagBuffer[3][j], img ); /* 2 * q_cldfb + 1 */
            t2 = W_mac_32_32( W_mult_32_32( Cldfb_RealBuffer[1][j], real ), Cldfb_ImagBuffer[1][j], img ); /* 2 * q_cldfb + 1 */
            t3 = W_mac_32_32( W_mult_32_32( Cldfb_RealBuffer[2][j], real ), Cldfb_ImagBuffer[2][j], img ); /* 2 * q_cldfb + 1 */
            t1 = W_shr( t1, gb );
            t2 = W_shr( t2, gb );
            t3 = W_shr( t3, gb );
            /* Intensity is XYZ order, audio is WYZX order. */
            tmp_1 = W_add( tmp_1, t1 ); /* 2 * q_cldfb + 1 */
            tmp_2 = W_add( tmp_2, t2 ); /* 2 * q_cldfb + 1 */
            tmp_3 = W_add( tmp_3, t3 ); /* 2 * q_cldfb + 1 */
        }
        norm = 63;
        move16();
        tmp_norm = W_norm( tmp_1 );
        if ( tmp_1 != 0 )
        {
            norm = s_min( norm, tmp_norm );
        }
        tmp_norm = W_norm( tmp_2 );
        if ( tmp_2 != 0 )
        {
            norm = s_min( norm, tmp_norm );
        }
        tmp_norm = W_norm( tmp_3 );
        if ( tmp_3 != 0 )
        {
            norm = s_min( norm, tmp_norm );
        }
        norm = sub( norm, 32 );
        intensity_real[0][i] = W_shl_sat_l( tmp_1, norm ); // shift_value - (gb - norm)
        move32();
        intensity_real[1][i] = W_shl_sat_l( tmp_2, norm ); // shift_value - (gb - norm)
        move32();
        intensity_real[2][i] = W_shl_sat_l( tmp_3, norm ); // shift_value - (gb - norm)
        q_intensity_real[i] = sub( shift_value, sub( gb, norm ) );
        move16();
    }

    return;
}
#else
static void computeIntensityVector_enc_fx(
    const Word16 *band_grouping,
    Word32 Cldfb_RealBuffer[FOA_CHANNELS][DIRAC_NO_FB_BANDS_MAX], /*inp_q*/
@@ -2240,6 +2319,7 @@ static void computeIntensityVector_enc_fx(

    return;
}
#endif

static void computeVerticalDiffuseness_fx(
    Word32 **buffer_intensity,     /* i  : Intensity vectors           */