Commit 76fc3d30 authored by Nicolas Roussin's avatar Nicolas Roussin
Browse files

Finalize optimization.

parent 890cd054
Loading
Loading
Loading
Loading
+0 −102
Original line number Diff line number Diff line
@@ -21,108 +21,6 @@
 *
 *****************************************************************************/
#ifdef ENH_64_BIT_OPERATOR


/*______________________________________________________________________________
|                                                                              |
|   Function Name : W_min                                                      |
|                                                                              |
|   Purpose :                                                                  |
|                                                                              |
|    Compares L64_var1 and L64_var2 and returns the minimum value.             |
|                                                                              |
|   Complexity weight : 1                                                      |
|                                                                              |
|   Inputs :                                                                   |
|                                                                              |
|    L64_var1   64 bit long signed integer (Word64) whose value falls in the   |
|       range : 0x80000000 00000000LL <= L64_var1 <= 0x7fffffff ffffffffLL.    |
|                                                                              |
|    L64_var2   64 bit long signed integer (Word64) whose value falls in the   |
|       range : 0x80000000 00000000LL <= L64_var2 <= 0x7fffffff ffffffffLL.    |
|                                                                              |
|   Outputs :                                                                  |
|                                                                              |
|    none                                                                      |
|                                                                              |
|   Return Value :                                                             |
|                                                                              |
|    L64_var_out                                                               |
|             64 bit long signed integer (Word64) whose value falls in the     |
|       range : 0x80000000 00000000LL <= L64_var_out <= 0x7fffffff ffffffffLL. |
|______________________________________________________________________________|
*/
static __inline Word64 W_min( Word64 L64_var1, Word64 L64_var2 )
{
    Word64 L64_var_out;

    if ( L64_var1 <= L64_var2 )
    {
        L64_var_out = L64_var1;
    }
    else
    {
        L64_var_out = L64_var2;
    }

#ifdef WMOPS
    multiCounter[currCounter].W_min++;
#endif /* ifdef WMOPS */

    return ( L64_var_out );
}


/*______________________________________________________________________________
|                                                                              |
|   Function Name : W_max                                                      |
|                                                                              |
|   Purpose :                                                                  |
|                                                                              |
|    Compares L64_var1 and L64_var2 and returns the maximum value.             |
|                                                                              |
|   Complexity weight : 1                                                      |
|                                                                              |
|   Inputs :                                                                   |
|                                                                              |
|    L64_var1   64 bit long signed integer (Word64) whose value falls in the   |
|       range : 0x80000000 00000000LL <= L64_var1 <= 0x7fffffff ffffffffLL.    |
|                                                                              |
|    L64_var2   64 bit long signed integer (Word64) whose value falls in the   |
|       range : 0x80000000 00000000LL <= L64_var2 <= 0x7fffffff ffffffffLL.    |
|                                                                              |
|   Outputs :                                                                  |
|                                                                              |
|    none                                                                      |
|                                                                              |
|   Return Value :                                                             |
|                                                                              |
|    L64_var_out                                                               |
|             64 bit long signed integer (Word64) whose value falls in the     |
|       range : 0x80000000 00000000LL <= L64_var_out <= 0x7fffffff ffffffffLL. |
|______________________________________________________________________________|
*/
static __inline Word64 W_max( Word64 L64_var1, Word64 L64_var2 )
{
    Word64 L64_var_out;

    if ( L64_var1 >= L64_var2 )
    {
        L64_var_out = L64_var1;
    }
    else
    {
        L64_var_out = L64_var2;
    }

#ifdef WMOPS
    multiCounter[currCounter].W_max++;
#endif /* ifdef WMOPS */

    return ( L64_var_out );
}


Word64 W_add_nosat( Word64 L64_var1, Word64 L64_var2 );
Word64 W_sub_nosat( Word64 L64_var1, Word64 L64_var2 );
Word64 W_shl( Word64 L64_var1, Word16 var2 );
+6 −0
Original line number Diff line number Diff line
@@ -159,4 +159,10 @@

/* #################### End BASOP porting switches ############################ */

/* #################### Start BASOP optimization switches ############################ */

#define NONBE_OPT_2193_EIG2X2                           /* Dolby: Issue 2193, optimize eig2x2_fx. */

/* #################### End BASOP optimization switches ############################ */

#endif
+1 −1
Original line number Diff line number Diff line
@@ -133,7 +133,7 @@ static BASIC_OP op_weight = {
#ifdef ENH_64_BIT_OPERATOR
    /* Weights of new 64 bit basops */
    ,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
#endif /* #ifdef ENH_64_BIT_OPERATOR */

#ifdef ENH_32_BIT_OPERATOR
+0 −2
Original line number Diff line number Diff line
@@ -877,8 +877,6 @@ typedef struct
/* New 64 bit basops */
#ifdef ENH_64_BIT_OPERATOR
    unsigned int move64;        /* Complexity Weight of 1 */
    unsigned int W_min;         /* Complexity Weight of 1 */
    unsigned int W_max;         /* Complexity Weight of 1 */
    unsigned int W_add_nosat;   /* Complexity Weight of 1 */
    unsigned int W_sub_nosat;   /* Complexity Weight of 1 */
    unsigned int W_shl;         /* Complexity Weight of 1 */
+35 −202
Original line number Diff line number Diff line
@@ -3518,63 +3518,20 @@ static void ivas_dirac_dec_binaural_check_and_switch_transports_headtracked_fx(
    return;
}

#if 1
static void check(
    Word32 computed_fx,
    Word16 computed_q,
    Word32 expected_fx,
    Word16 expected_q,
    Word32 max_abs_err );

static void check(
    Word32 computed_fx,
    Word16 computed_q,
    Word32 expected_fx,
    Word16 expected_q,
    Word32 max_abs_err )
{
    Word16 qd = computed_q - expected_q;
    Word32 cf = computed_fx >> +max( qd, 0 );
    Word32 ef = expected_fx >> -min( qd, 0 );
    Word32 abs_error = abs( cf - ef );
    if ( abs_error > max_abs_err )
    {
        assert( false );
    }
}
#endif

Word32 __pm_fx;
Word16 __pm_q;
Word32 __add_fx;
Word16 __add_q;

Word32 __as[BINAURAL_CHANNELS];

Word32 __s_fx[BINAURAL_CHANNELS];
Word16 __s_q[BINAURAL_CHANNELS];

Word32 __nval_fx[BINAURAL_CHANNELS];
Word16 __nval_q[BINAURAL_CHANNELS];

Word32 __Ure_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
Word32 __Uim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
Word16 __U_q;

Word32 __D_fx[BINAURAL_CHANNELS];
Word16 __D_q;

Word32 __diff_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
Word16 __diff_q;

static void eig2x2_opt(
static void eig2x2_fx(
    const Word32 E1_fx, /*q_E*/
    const Word32 E2_fx, /*q_E*/
    Word16 q_E,
    const Word32 Cre_fx, /*q_C*/
    const Word32 Cim_fx, /*q_C*/
    Word16 q_C )
    Word16 q_C,
    Word32 Ure_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_U*/
    Word32 Uim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_U*/
    Word16 *q_U,
    Word32 D_fx[BINAURAL_CHANNELS], /*q_D*/
    Word16 *q_D )
{
#ifdef NONBE_OPT_2193_EIG2X2
    Word32 s_fx[BINAURAL_CHANNELS];
    Word32 pm_fx, add_fx;
    Word32 tmp1, tmp2, e1, e2, c_re, c_im, c0_im, c1_im;
@@ -3586,13 +3543,13 @@ static void eig2x2_opt(
    move32();
    move16();

    set32_fx( (Word32 *) __Ure_fx, 0, BINAURAL_CHANNELS * BINAURAL_CHANNELS );
    set32_fx( (Word32 *) __Uim_fx, 0, BINAURAL_CHANNELS * BINAURAL_CHANNELS );
    __Ure_fx[0][0] = ONE_IN_Q30;
    set32_fx( (Word32 *) Ure_fx, 0, BINAURAL_CHANNELS * BINAURAL_CHANNELS );
    set32_fx( (Word32 *) Uim_fx, 0, BINAURAL_CHANNELS * BINAURAL_CHANNELS );
    Ure_fx[0][0] = ONE_IN_Q30;
    move32();
    __Ure_fx[1][1] = ONE_IN_Q30;
    Ure_fx[1][1] = ONE_IN_Q30;
    move32();
    __U_q = Q30;
    *q_U = Q30;
    move16();

    exp = sub( get_min_scalefactor( Cre_fx, Cim_fx ), 2 );
@@ -3662,11 +3619,6 @@ static void eig2x2_opt(
    q_tmp1 = q_e;
    move16();

    __pm_fx = pm_fx;   // FIXME
    __pm_q = q_tmp2;   // FIXME
    __add_fx = add_fx; // FIXME
    __add_q = q_tmp1;  // FIXME

    // D[0] = add + pm;
    // D[1] = max( 0.0f, add - pm );

@@ -3686,17 +3638,17 @@ static void eig2x2_opt(
        tmp2 = L_shl( tmp2, q_diff );
    }

    __D_fx[0] = L_add( tmp1, tmp2 );
    D_fx[0] = L_add( tmp1, tmp2 );
    move32();
    __D_fx[1] = L_max( L_sub( tmp1, tmp2 ), 0 );
    D_fx[1] = L_max( L_sub( tmp1, tmp2 ), 0 );
    move32();
    __D_q = s_min( q_tmp1, q_tmp2 );
    *q_D = s_min( q_tmp1, q_tmp2 );
    move32();

    // Numeric case, when input is practically zeros
    // if ( __D_fx[0] < EPSILON_FX )
    // if ( D_fx[0] < EPSILON_FX )

    if ( LT_32( L_shl_sat( __D_fx[0], sub( 31 - EPSILON_EXP, __D_q ) ), EPSILON_MANT ) )
    if ( LT_32( L_shl_sat( D_fx[0], sub( 31 - EPSILON_EXP, *q_D ) ), EPSILON_MANT ) )
    {
        return;
    }
@@ -3715,17 +3667,17 @@ static void eig2x2_opt(

    // Eigenvectors

    q_diff = sub( q_e, __D_q );
    q_tmp1 = s_min( q_e, __D_q );
    q_diff = sub( q_e, *q_D );
    q_tmp1 = s_min( q_e, *q_D );

    tmp1 = __D_fx[0];
    tmp1 = D_fx[0];
    move32();
    if ( q_diff > 0 )
    {
        tmp1 = L_shr( tmp1, q_diff );
    }

    tmp2 = __D_fx[1];
    tmp2 = D_fx[1];
    move32();
    if ( q_diff > 0 )
    {
@@ -3742,23 +3694,13 @@ static void eig2x2_opt(
        e2 = L_shl( e2, q_diff );
    }

    s_fx[0] = L_sub( tmp1, e1 ); // __D_fx[0] - e1
    tmp1 = L_sub( tmp1, e2 );    // __D_fx[0] - e2
    s_fx[1] = L_sub( tmp2, e1 ); // __D_fx[1] - e1
    tmp2 = L_sub( tmp2, e2 );    // __D_fx[1] - e2

    __diff_fx[0][0] = s_fx[0];
    __diff_fx[0][1] = tmp1;
    __diff_fx[1][0] = s_fx[1];
    __diff_fx[1][1] = tmp2;
    s_fx[0] = L_sub( tmp1, e1 ); // D_fx[0] - e1
    tmp1 = L_sub( tmp1, e2 );    // D_fx[0] - e2
    s_fx[1] = L_sub( tmp2, e1 ); // D_fx[1] - e1
    tmp2 = L_sub( tmp2, e2 );    // D_fx[1] - e2

    __diff_q = q_tmp1;

    i01 = GT_32( L_abs( tmp1 ), L_abs( s_fx[0] ) ); // fabsf( __D_fx[0] - e2 ) > fabsf( __D_fx[0] - e1 )
    i11 = GT_32( L_abs( tmp2 ), L_abs( s_fx[1] ) ); // fabsf( __D_fx[1] - e2 ) > fabsf( __D_fx[1] - e1 )

    __as[0] = i01;
    __as[1] = i11;
    i01 = GT_32( L_abs( tmp1 ), L_abs( s_fx[0] ) ); // fabsf( D_fx[0] - e2 ) > fabsf( D_fx[0] - e1 )
    i11 = GT_32( L_abs( tmp2 ), L_abs( s_fx[1] ) ); // fabsf( D_fx[1] - e2 ) > fabsf( D_fx[1] - e1 )

    if ( i01 )
    {
@@ -3772,11 +3714,6 @@ static void eig2x2_opt(
        move32();
    }

    __s_fx[0] = s_fx[0];
    __s_fx[1] = s_fx[1];
    __s_q[0] = q_tmp1;
    __s_q[1] = q_tmp1;

    // normVal = sqrtf( 1.0f / ( 1e-12f + crossSquare + s * s ) );

    Word32 nvalm[BINAURAL_CHANNELS];
@@ -3820,11 +3757,6 @@ static void eig2x2_opt(
    nvalm[1] = Sqrt32( nvalm[1], &exp );
    nvalq[1] = sub( 31, exp );

    __nval_fx[0] = nvalm[0];
    __nval_q[0] = nvalq[0];
    __nval_fx[1] = nvalm[1];
    __nval_q[1] = nvalq[1];

    q_diff = sub( q_c, q_tmp1 );
    q_tmp1 = s_min( q_tmp1, q_c );

@@ -3861,7 +3793,7 @@ static void eig2x2_opt(
        nvalm[1] = L_shl( nvalm[1], q_diff );
    }

    __U_q = sub( add( q_tmp1, q_tmp2 ), 31 );
    *q_U = sub( add( q_tmp1, q_tmp2 ), 31 );

    i00 = L_sub( 1, i01 );
    i10 = L_sub( 1, i11 );
@@ -3880,53 +3812,20 @@ static void eig2x2_opt(
        c1_im = L_negate( c1_im );
    }

    __Ure_fx[i00][0] = Mpy_32_32( s_fx[0], nvalm[0] );
    Ure_fx[i00][0] = Mpy_32_32( s_fx[0], nvalm[0] );
    move32();
    __Ure_fx[i01][0] = Mpy_32_32( c_re, nvalm[0] );
    Ure_fx[i01][0] = Mpy_32_32( c_re, nvalm[0] );
    move32();
    __Uim_fx[i01][0] = Mpy_32_32( c0_im, nvalm[0] );
    Uim_fx[i01][0] = Mpy_32_32( c0_im, nvalm[0] );
    move32();

    __Ure_fx[i10][1] = Mpy_32_32( s_fx[1], nvalm[1] );
    Ure_fx[i10][1] = Mpy_32_32( s_fx[1], nvalm[1] );
    move32();
    __Ure_fx[i11][1] = Mpy_32_32( c_re, nvalm[1] );
    Ure_fx[i11][1] = Mpy_32_32( c_re, nvalm[1] );
    move32();
    __Uim_fx[i11][1] = Mpy_32_32( c1_im, nvalm[1] );
    Uim_fx[i11][1] = Mpy_32_32( c1_im, nvalm[1] );
    move32();

    return;
}

static void eig2x2_fx(
    const Word32 E1_fx, /*q_E*/
    const Word32 E2_fx, /*q_E*/
    Word16 q_E,
    const Word32 Cre_fx, /*q_C*/
    const Word32 Cim_fx, /*q_C*/
    Word16 q_C,
    Word32 Ure_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_U*/
    Word32 Uim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_U*/
    Word16 *q_U,
    Word32 D_fx[BINAURAL_CHANNELS], /*q_D*/
    Word16 *q_D )
{
#if 0
    eig2x2_opt( E1_fx, E2_fx, q_E, Cre_fx, Cim_fx, q_C );
    Ure_fx[0][0] = __Ure_fx[0][0];
    Ure_fx[0][1] = __Ure_fx[0][1];
    Ure_fx[1][0] = __Ure_fx[1][0];
    Ure_fx[1][1] = __Ure_fx[1][1];
    Uim_fx[0][0] = __Uim_fx[0][0];
    Uim_fx[0][1] = __Uim_fx[0][1];
    Uim_fx[1][0] = __Uim_fx[1][0];
    Uim_fx[1][1] = __Uim_fx[1][1];
    *q_U = __U_q;
    D_fx[0] = __D_fx[0];
    D_fx[1] = __D_fx[1];
    *q_D = __D_q;
#else
    eig2x2_opt( E1_fx, E2_fx, q_E, Cre_fx, Cim_fx, q_C );

    Word16 chA, chB, ch;
    Word32 s_fx, normVal_fx, crossSquare_fx, a_fx, pm_fx, add_fx;
    Word32 tmp1, tmp2, tmp3, e1, e2, c_re, c_im;
@@ -4049,9 +3948,6 @@ static void eig2x2_fx(
    q_tmp1 = q_e;
    move16();

    check( __pm_fx, __pm_q, pm_fx, q_tmp2, 1 << 26 );
    check( __add_fx, __add_q, add_fx, q_tmp1, 1 << 26 );

    IF( LT_16( q_tmp1, q_tmp2 ) )
    {
        D_fx[0] = L_add( L_shr( add_fx, 1 ), L_shr( pm_fx, add( sub( q_tmp2, q_tmp1 ), 1 ) ) );
@@ -4071,9 +3967,6 @@ static void eig2x2_fx(
        move16();
    }

    check( __D_fx[0], __D_q, D_fx[0], *q_D, 1 << 26 );
    check( __D_fx[1], __D_q, D_fx[1], *q_D, 1 << 26 );

    /* Numeric case, when input is practically zeros */
    // IF( D_fx[0] < EPSILON_FX )

@@ -4086,15 +3979,6 @@ static void eig2x2_fx(
        *q_U = Q31;
        move16();

        for ( int i = 0; i < BINAURAL_CHANNELS; ++i )
        {
            for ( int j = 0; j < BINAURAL_CHANNELS; ++j )
            {
                check( __Ure_fx[i][j], __U_q, Ure_fx[i][j], *q_U, 1 << 2 );
                check( __Uim_fx[i][j], __U_q, Uim_fx[i][j], *q_U, 1 << 2 );
            }
        }

        return;
    }

@@ -4112,15 +3996,6 @@ static void eig2x2_fx(
            *q_U = Q30;
            move16();

            for ( int i = 0; i < BINAURAL_CHANNELS; ++i )
            {
                for ( int j = 0; j < BINAURAL_CHANNELS; ++j )
                {
                    check( __Ure_fx[i][j], __U_q, Ure_fx[i][j], *q_U, 1 << 2 );
                    check( __Uim_fx[i][j], __U_q, Uim_fx[i][j], *q_U, 1 << 2 );
                }
            }

            return;
        }
    }
@@ -4135,15 +4010,6 @@ static void eig2x2_fx(
            *q_U = Q30;
            move16();

            for ( int i = 0; i < BINAURAL_CHANNELS; ++i )
            {
                for ( int j = 0; j < BINAURAL_CHANNELS; ++j )
                {
                    check( __Ure_fx[i][j], __U_q, Ure_fx[i][j], *q_U, 1 << 2 );
                    check( __Uim_fx[i][j], __U_q, Uim_fx[i][j], *q_U, 1 << 2 );
                }
            }

            return;
        }
    }
@@ -4172,18 +4038,10 @@ static void eig2x2_fx(
            move16();
        }

        check( __diff_fx[ch][0], __diff_q, tmp1, q_tmp1, 1 << 26 );
        check( __diff_fx[ch][1], __diff_q, tmp2, q_tmp1, 1 << 26 );

        IF( GT_32( L_abs( tmp2 ), L_abs( tmp1 ) ) )
        {
            check( __as[ch], 0, 1, 0, 0 );

            s_fx = tmp2;
            move32();

            check( __s_fx[ch], __s_q[ch], s_fx, q_tmp1, 1 << 26 );

            exp = sub( norm_l( s_fx ), 1 );
            tmp2 = Mpy_32_32( s_fx, s_fx );
            q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 );
@@ -4198,8 +4056,6 @@ static void eig2x2_fx(
            normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2
            q_tmp2 = sub( 31, exp );

            check( __nval_fx[ch], __nval_q[ch], normVal_fx, q_tmp2, 1 << 26 );

            q_diff = sub( q_c, q_tmp1 );
            IF( q_diff > 0 )
            {
@@ -4223,10 +4079,6 @@ static void eig2x2_fx(
            move32();
            q_U_1 = sub( add( q_tmp1, q_tmp2 ), 31 );

            check( __Ure_fx[0][ch], __U_q, Ure_fx[0][ch], q_U_1, 1 << 26 );
            check( __Ure_fx[1][ch], __U_q, Ure_fx[1][ch], q_U_1, 1 << 26 );
            check( __Uim_fx[1][ch], __U_q, Uim_fx[1][ch], q_U_1, 1 << 26 );

            IF( q_U_2 != 0 )
            {
                q_diff = sub( q_U_2, q_U_1 );
@@ -4258,13 +4110,9 @@ static void eig2x2_fx(
        }
        ELSE
        {
            check( __as[ch], 0, 0, 0, 0 );

            s_fx = tmp1;
            move32();

            check( __s_fx[ch], __s_q[ch], s_fx, q_tmp1, 1 << 26 );

            exp = sub( norm_l( s_fx ), 1 );
            tmp2 = Mpy_32_32( s_fx, s_fx );
            q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 );
@@ -4279,8 +4127,6 @@ static void eig2x2_fx(
            normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2
            q_tmp2 = sub( 31, exp );

            check( __nval_fx[ch], __nval_q[ch], normVal_fx, q_tmp2, 1 << 26 );

            q_diff = sub( q_c, q_tmp1 );
            IF( q_diff > 0 )
            {
@@ -4304,10 +4150,6 @@ static void eig2x2_fx(
            move32();
            q_U_2 = sub( add( q_tmp1, q_tmp2 ), 31 );

            check( __Ure_fx[1][ch], __U_q, Ure_fx[1][ch], q_U_2, 1 << 26 );
            check( __Ure_fx[0][ch], __U_q, Ure_fx[0][ch], q_U_2, 1 << 26 );
            check( __Uim_fx[0][ch], __U_q, Uim_fx[0][ch], q_U_2, 1 << 26 );

            IF( q_U_1 != 0 )
            {
                q_diff = sub( q_U_2, q_U_1 );
@@ -4350,15 +4192,6 @@ static void eig2x2_fx(
        *q_U = q_U_2;
        move16();
    }

    for ( int i = 0; i < BINAURAL_CHANNELS; ++i )
    {
        for ( int j = 0; j < BINAURAL_CHANNELS; ++j )
        {
            check( __Ure_fx[i][j], __U_q, Ure_fx[i][j], *q_U, 1 << 26 );
            check( __Uim_fx[i][j], __U_q, Uim_fx[i][j], *q_U, 1 << 26 );
        }
    }
#endif
    return;
}