Commit eee7fdb4 authored by Sandesh Venkatesh's avatar Sandesh Venkatesh
Browse files

Merge branch 'complexity_optimizations_sba_path' into 'main'

Complexity optimization for SBA path decoding [allow regression]

See merge request !1026
parents 7b212b76 2ec0301c
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -137,6 +137,7 @@
#define FOUR_IN_Q28                     1073741824

#define MAX_WORD16                      32767
#define ONE_IN_Q45                      (Word64)0x200000000000
#define ONE_IN_Q62                      (Word64)0x4000000000000000
/*----------------------------------------------------------------------------------*
 * General constants
+1 −0
Original line number Diff line number Diff line
@@ -150,6 +150,7 @@ typedef enum
    RENDERER_OSBA_LS
} RENDERER_TYPE;

#define MAX_FREQUENCY_BANDS                    64

/*----------------------------------------------------------------------------------*
 * IVAS general constants
+32 −17
Original line number Diff line number Diff line
@@ -966,38 +966,55 @@ void computeDiffuseness_fixed(
        p_tmp_c = buffer_energy + i * num_freq_bands;

        q_tmp = add( q_factor_energy[i], min_q_shift1 );

        Word16 shift_q = sub( q_tmp, q_ene );
        IF( shift_q < 0 )
        {
            FOR( k = 0; k < num_freq_bands; k++ )
            {
                tmp = L_shl( p_tmp_c[k], min_q_shift1 );
            IF( LT_16( q_tmp, q_ene ) )
            {
                energy_slow[k] = L_add( L_shr( energy_slow[k], sub( q_ene, q_tmp ) ), tmp );
                energy_slow[k] = L_add( L_shl( energy_slow[k], shift_q ), tmp );
                move32();
            }
        }
        ELSE
        {
                energy_slow[k] = L_add( energy_slow[k], L_shr( tmp, sub( q_tmp, q_ene ) ) );
            FOR( k = 0; k < num_freq_bands; k++ )
            {
                tmp = L_shl( p_tmp_c[k], min_q_shift1 );
                energy_slow[k] = L_add( energy_slow[k], L_shr( tmp, shift_q ) );
                move32();
            }
        }

        q_ene = s_min( q_ene, q_tmp );

        /* Intensity slow */
        q_tmp = add( q_factor_intensity[i], min_q_shift2 );

        shift_q = sub( q_tmp, q_intensity );
        IF( shift_q > 0 )
        {
            FOR( j = 0; j < DIRAC_NUM_DIMS; ++j )
            {
                p_tmp = buffer_intensity[j][i];
                FOR( k = 0; k < num_freq_bands; k++ )
                {
                    tmp = L_shl( p_tmp[k], min_q_shift2 );
                IF( LT_16( q_intensity, q_tmp ) )
                {
                    intensity_slow[j * num_freq_bands + k] = L_add( intensity_slow[j * num_freq_bands + k], L_shr( tmp, sub( q_tmp, q_intensity ) ) );
                    intensity_slow[j * num_freq_bands + k] = L_add( intensity_slow[j * num_freq_bands + k], L_shr( tmp, shift_q ) );
                    move32();
                }
            }
        }
        ELSE
        {
                    intensity_slow[j * num_freq_bands + k] = L_add( L_shr( intensity_slow[j * num_freq_bands + k], sub( q_intensity, q_tmp ) ), tmp );
            FOR( j = 0; j < DIRAC_NUM_DIMS; ++j )
            {
                p_tmp = buffer_intensity[j][i];
                FOR( k = 0; k < num_freq_bands; k++ )
                {
                    tmp = L_shl( p_tmp[k], min_q_shift2 );
                    intensity_slow[j * num_freq_bands + k] = L_add( L_shl( intensity_slow[j * num_freq_bands + k], shift_q ), tmp );
                    move32();
                }
            }
@@ -1017,9 +1034,7 @@ void computeDiffuseness_fixed(

        FOR( k = 0; k < num_freq_bands; k++ )
        {
            p_tmp[k] = Mpy_32_32( p_tmp[k], p_tmp[k] );
            move32();
            intensity_slow_abs[k] = L_add( intensity_slow_abs[k], p_tmp[k] );
            intensity_slow_abs[k] = Madd_32_32( intensity_slow_abs[k], p_tmp[k], p_tmp[k] );
            move32();
        }
    }
+33 −43
Original line number Diff line number Diff line
@@ -7044,7 +7044,7 @@ void ivas_dirac_dec_get_response_fx(
    Word16 Q_out )
{
    Word16 index_azimuth, index_elevation;
    Word16 el, e, az;
    Word16 el, e, az, q_diff;
    Word32 cos_1_fx, cos_2_fx, sin_1_fx, cos_az_fx[3];
    Word32 sin_az_fx[3];
    Word32 f_fx;
@@ -7055,49 +7055,39 @@ void ivas_dirac_dec_get_response_fx(
    index_azimuth = add( azimuth, 180 ) % 360;
    move16();
    index_elevation = add( elevation, 90 );
    IF( GT_16( index_elevation, 90 ) )
    {
        e = -1;
        move16();
    }
    ELSE
    {

    e = 1;
    move16();
    }

    IF( GT_16( index_elevation, 90 ) )
    if ( GT_16( index_elevation, 90 ) )
    {
        el = sub( 180, index_elevation );
        move16();
        e = -1;
    }
    ELSE
    {

    el = index_elevation;
    move16();
    }

    IF( GT_16( index_azimuth, 180 ) )
    if ( GT_16( index_elevation, 90 ) )
    {
        az = sub( 360, index_azimuth );
        move16();
        el = sub( 180, index_elevation );
    }
    ELSE
    {

    az = index_azimuth;
    move16();
    }

    IF( GT_16( index_azimuth, 180 ) )
    if ( GT_16( index_azimuth, 180 ) )
    {
        f_fx = -1;
        move16();
        az = sub( 360, index_azimuth );
    }
    ELSE
    {

    f_fx = 1;
    move16();

    if ( GT_16( index_azimuth, 180 ) )
    {
        f_fx = -1;
    }

    cos_1_fx = L_shr( dirac_gains_trg_term_fx[az][0], 1 );  // q30
    cos_2_fx = L_shl( Mpy_32_32( cos_1_fx, cos_1_fx ), 1 ); // q30
    sin_1_fx = L_shr( dirac_gains_trg_term_fx[az][1], 1 );  // q30
@@ -7122,54 +7112,54 @@ void ivas_dirac_dec_get_response_fx(
    response_fx[0] = L_shl_sat( 1, Q_out ); // Q_out
    move32();

    q_diff = sub( Q_out, 29 );

    FOR( l = 1; l <= ambisonics_order; l++ )
    {
        b_2 = imult1616( l, l );
        b1_2 = add( imult1616( l, l ), shl( l, 1 ) );
        b1_2 = add( b_2, shl( l, 1 ) );
        FOR( m = 0; m < l; m += 2 )
        {
            b = add( b_2, m );
            b = b_2 + m;
            a = dirac_gains_P_idx[b];
            move16();

            c_fx_better = local_result_table[el][a]; // q30
            move32();
            response_fx[b] = L_shl( Mpy_32_32( c_fx_better, sin_az_fx[l - m - 1] ), sub( Q_out, 29 ) ); // Q_out
            response_fx[b] = L_shl( Mpy_32_32( c_fx_better, sin_az_fx[l - m - 1] ), q_diff ); // Q_out
            move32();

            b1 = sub( b1_2, m );
            response_fx[b1] = L_shl( Mpy_32_32( c_fx_better, cos_az_fx[l - m - 1] ), sub( Q_out, 29 ) ); // Q_out
            b1 = b1_2 - m;
            response_fx[b1] = L_shl( Mpy_32_32( c_fx_better, cos_az_fx[l - m - 1] ), q_diff ); // Q_out
            move32();
        }

        FOR( m = 1; m < l; m += 2 )
        {
            b = add( b_2, m );
            b = b_2 + m;
            a = dirac_gains_P_idx[b];
            move16();
            c_fx_better = local_result_table[el][a]; // q30
            move32();
            if ( EQ_16( e, -1 ) )
            {
                c_fx_better = L_negate( c_fx_better ); // q30
            }
            response_fx[b] = L_shl( Mpy_32_32( c_fx_better, sin_az_fx[l - m - 1] ), sub( Q_out, 29 ) ); // Q_out
            response_fx[b] = L_shl( Mpy_32_32( c_fx_better, sin_az_fx[l - m - 1] ), q_diff ); // Q_out
            move32();
            b1 = sub( b1_2, m );
            response_fx[b1] = L_shl( Mpy_32_32( c_fx_better, cos_az_fx[l - m - 1] ), sub( Q_out, 29 ) ); // Q_out

            b1 = b1_2 - m;
            response_fx[b1] = L_shl( Mpy_32_32( c_fx_better, cos_az_fx[l - m - 1] ), q_diff ); // Q_out
            move32();
        }

        b = add( b_2, l );
        a = dirac_gains_P_idx[b];
        move16();
        c_fx_better = local_result_table_2[el][a]; // q30
        move32();
        IF( EQ_16( ( l % 2 ), 1 ) )
        IF( EQ_16( s_and( l, 0x01 ), 1 ) )
        {
            if ( EQ_16( e, -1 ) )
            {
                c_fx_better = L_negate( c_fx_better ); // q30
                move32();
            }
        }
        response_fx[b] = L_shl( c_fx_better, sub( Q_out, 30 ) ); // Q_out
+9 −9
Original line number Diff line number Diff line
@@ -344,26 +344,26 @@ static Word32 ivas_calc_duck_gain_fx(
    Word32 duck_gain_out, L_tmp;
    Word16 tmp_e;

    duck_gain_out = L_add( L_shl( Mpy_32_32( L_sub( duck_gain, ONE_IN_Q30 ), duck_coeff ), Q1 ), ONE_IN_Q30 ); /*Q30*/
    duck_gain_out = L_add( Mpy_32_32( L_sub( duck_gain, ONE_IN_Q30 ), duck_coeff ), ONE_IN_Q29 ); /*Q29*/

    IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( Mpy_32_32( duck_mult_fac, env_1 ), add( 2, env1_e ), Mpy_32_32( duck_gain_out, env_2 ), add( 1, env2_e ) ), -1 ) )
    {
        test();
        IF( ( env_1 == 0 ) || ( env_2 == 0 ) )
    IF( LT_64( W_mult0_32_32( duck_mult_fac, env_1 ), W_shr( W_mult0_32_32( duck_gain_out, env_2 ), sub( env1_e, env2_e ) ) ) )
    {

        duck_gain_out = 0;
        move32();
        }
        ELSE

        test();
        IF( ( env_1 != 0 ) && ( env_2 != 0 ) )
        {
            L_tmp = BASOP_Util_Divide3232_Scale_cadence( env_1, env_2, &tmp_e );
            L_tmp = L_shl( L_tmp, add( sub( env1_e, env2_e ), tmp_e ) );

            duck_gain_out = Mpy_32_32( duck_mult_fac, L_tmp ); /*Q29*/
            duck_gain_out = L_shl( duck_gain_out, Q1 );        /*Q30*/
        }
    }

    duck_gain_out = L_shl( duck_gain_out, Q1 ); /*Q30*/

    return duck_gain_out; /*Q30*/
}
/*-----------------------------------------------------------------------------------------*
Loading