Commit ad64b267 authored by Arthur Tritthart's avatar Arthur Tritthart
Browse files

Author: Arthur Tritthart, FhG, 06-DEC-2024

Changes for BASOP tuning (ticket 1009):

File lib_com/basop_util.c:
--------------------------
modified imult1616 to directly use i_mult, WMOPS weights reduced: 2 -> 1

File lib_com/ivas_tools.c:
--------------------------
Added an IF-conditionned branch for interleaved to linear format. This is the
way, the function is currently used, WMOPS weights reduced: 5 -> 2

File lib_com/tools_fx.c:
------------------------
Simplified set32_fx function, stripped use of L_deposit_l, WMOPS weights reduced 2 -> 1

File lib_rend/ivas_dirac_decorr_dec.c:
--------------------------------------
- use of is_zero_arr, stripped constant find_guarded_bits(2)
- strip offset computation for interleaved real/imag buffer
- tune AR filter loop for WMOPS
- fix and simplify 64-bit power computation loop
- tuned energy smoothing loops for WMOPS
- skip energy scaling, if q_shift equals zero
- strip offset computation for interleaved real/imag buffer

File lib_dec/ivas_mc_param_dec.c, ivas_mct_dec_mct_fx.c:
--------------------------------------------------------
- simplify zero checks for output synthesis
- replace div(x / 1) or div(x / 2) by shift ops
- simplified shifting output

Total WMOPS saving for bitstream stv714MC48c_128kbps.192/7_1_4: 164 WMops
parent e86c049a
Loading
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -1626,7 +1626,7 @@ Word16 findIndexOfMinWord32( Word32 *x, const Word16 len )
Word16 imult1616( Word16 x, Word16 y )
{
    assert( (int) x * (int) y < 32768 && (int) x * (int) y >= -32768 );
    return extract_l( L_mult0( x, y ) );
    return i_mult(x, y);
}

Word32 imult3216( Word32 x, Word16 y )
+17 −0
Original line number Diff line number Diff line
@@ -461,6 +461,23 @@ void v_add_inc_fx(
)
{
    Word16 i;

    /* The use of this function is currently always for the interleaved input format, */
    /* that means, the following conditions are always true and thus obsolete.        */
    test();
    test();
    test();
    test();
    IF ((sub(x_inc, 2) == 0) && (sub(x2_inc, 2) == 0) && (sub(y_inc, 1) == 0) && (&x1[1] == &x2[0]) )
    {
        /* Interleaved input case, linear output */
        FOR( i = 0; i < N; i++ )
        {
            y[i] = L_add( x1[2*i+0], x1[2*i+1] ); /*Qx*/
            move32();
        }
        return;
    }
    Word16 ix1 = 0;
    Word16 ix2 = 0;
    Word16 iy = 0;
+5 −17
Original line number Diff line number Diff line
@@ -648,25 +648,13 @@ void set32_fx(
    const Word16 N  /* i  : Lenght of the vector                */
)
{
    Word16 i, tmp;
    tmp = extract_l( a );
    IF( EQ_32( L_deposit_l( tmp ), a ) )
    {
        FOR( i = 0; i < N; i++ )
        {
            y[i] = L_deposit_l( tmp );
            move32();
        }
    }
    ELSE
    {
    Word16 i;

    FOR( i = 0; i < N; i++ )
    {
        y[i] = a;
        move32();
    }
    }

    return;
}
/*-------------------------------------------------------------------*
+5 −23
Original line number Diff line number Diff line
@@ -3786,38 +3786,20 @@ void ivas_param_mc_dec_render_fx(
    slot_idx_start_cldfb_synth = 0;
    move16();

    Flag is_zero = 1;
    move32();
    FOR( j = 0; j < st_ivas->hParamMC->hMetadataPMC->nbands_coded; j++ )
    {
        is_zero = 1;
        move16();
        FOR( i = 0; i < hParamMC->h_output_synthesis_cov_state.mixing_matrix_len; i++ )
        Flag is_zero = is_zero_arr( hParamMC->h_output_synthesis_cov_state.mixing_matrix_fx[j], hParamMC->h_output_synthesis_cov_state.mixing_matrix_len );
        {
            IF( hParamMC->h_output_synthesis_cov_state.mixing_matrix_fx[j][i] != 0 )
            if ( is_zero != 0 )
            {
                is_zero = 0;
                hParamMC->h_output_synthesis_cov_state.mixing_matrix_res_exp[j] = 0;
                move16();
            }
        }
        IF( is_zero )
        {
            hParamMC->h_output_synthesis_cov_state.mixing_matrix_exp[j] = 0;
            move16();
        }
        is_zero = 1;
        move16();
        IF( LT_16( st_ivas->hParamMC->band_grouping[j], st_ivas->hParamMC->h_output_synthesis_params.max_band_decorr ) )
        {
            FOR( i = 0; i < hParamMC->h_output_synthesis_cov_state.mixing_matrix_res_len; i++ )
            {
                IF( NE_32( hParamMC->h_output_synthesis_cov_state.mixing_matrix_res_fx[j][i], 0 ) )
                {
                    is_zero = 0;
                    move16();
                }
            }
            IF( is_zero )
            is_zero = is_zero_arr( hParamMC->h_output_synthesis_cov_state.mixing_matrix_res_fx[j], hParamMC->h_output_synthesis_cov_state.mixing_matrix_res_len );
            if( is_zero != 0)
            {
                hParamMC->h_output_synthesis_cov_state.mixing_matrix_res_exp[j] = 0;
                move16();
+14 −2
Original line number Diff line number Diff line
@@ -316,6 +316,7 @@ void mctStereoIGF_dec_fx(
            test();
            IF( NE_16( hMCT->hBlockData[b]->hStereoMdct->IGFStereoMode[k], SMDCT_DUAL_MONO ) || NE_16( hMCT->hBlockData[b]->hStereoMdct->mdct_stereo_mode[k], SMDCT_DUAL_MONO ) )
            {
#if 0
                tmp = BASOP_Util_Divide1616_Scale( sts[0]->hTcxCfg->tcx_coded_lines, nSubframes, &tmp_e );
                L_spec[0] = shr( tmp, add( 15, negate( tmp_e ) ) );
                move16();
@@ -325,6 +326,15 @@ void mctStereoIGF_dec_fx(

                tmp = BASOP_Util_Divide1616_Scale( sts[0]->hTcxDec->L_frameTCX, nSubframes, &tmp_e );
                L_frameTCX_nSubframe = shr( tmp, add( 15, negate( tmp_e ) ) );
#else
                assert( nSubframes == 1 || nSubframes == 2 );
                /* Note: nSubframes is in limited range [1, 2] for this function */
                Word16 shr_div = sub( nSubframes, 1 ); /* 2 -> 1, 1 -> 0 */
                L_spec[0] = shr(sts[0]->hTcxCfg->tcx_coded_lines, shr_div);
                move16();
                L_frame_nSubframe = shr(sts[0]->L_frame, shr_div);
                L_frameTCX_nSubframe = shr( sts[0]->hTcxDec->L_frameTCX , shr_div);
#endif

                init_tcx_info_fx( sts[0], L_frame_nSubframe, L_frameTCX_nSubframe, k, bfi, &tcx_offset[0], &tcx_offsetFB[0], &L_frame[0], &L_frameTCX[0], &left_rect[0], &L_spec[0] );

@@ -334,14 +344,16 @@ void mctStereoIGF_dec_fx(
                decoder_tcx_IGF_stereo_fx( sts, hMCT->hBlockData[b]->hStereoMdct, hMCT->hBlockData[b]->mask, p_x, p_x_e, p_x_len, L_frame[0], left_rect[0], k, bfi, 1 /* MCT_flag */ );

                // Shifting output with variable exponent back to Q12
                Word16 shr_k = sub( 31 - Q12, p_x_e[0][k] );
                FOR( Word16 i = 0; i < p_x_len[0][k]; i++ )
                {
                    p_x[0][k][i] = L_shr( p_x[0][k][i], sub( 31 - Q12, p_x_e[0][k] ) );
                    p_x[0][k][i] = L_shr( p_x[0][k][i], shr_k );
                    move32();
                }
                shr_k = sub( 31 - Q12, p_x_e[1][k] );
                FOR( Word16 i = 0; i < p_x_len[1][k]; i++ )
                {
                    p_x[1][k][i] = L_shr( p_x[1][k][i], sub( 31 - Q12, p_x_e[1][k] ) );
                    p_x[1][k][i] = L_shr( p_x[1][k][i], shr_k );
                    move32();
                }
            }
Loading