Commit 236a1872 authored by Nicolas Roussin's avatar Nicolas Roussin
Browse files

Optimize eig2x2_fx part 2.

parent 9219bfbc
Loading
Loading
Loading
Loading
+85 −9
Original line number Diff line number Diff line
@@ -3518,7 +3518,7 @@ static void ivas_dirac_dec_binaural_check_and_switch_transports_headtracked_fx(
    return;
}

#if 0
#if 1
static void check(
    Word32 computed_fx,
    Word16 computed_q,
@@ -3557,7 +3557,7 @@ static void eig2x2_fx(
    Word32 D_fx[BINAURAL_CHANNELS], /*q_D*/
    Word16 *q_D )
{
#if 0
    // #if 0
    Word16 chA, chB, ch;
    Word32 s_fx, normVal_fx, crossSquare_fx, a_fx, pm_fx, add_fx;
    Word32 tmp1, tmp2, tmp3, e1, e2, c_re, c_im;
@@ -3698,7 +3698,8 @@ static void eig2x2_fx(
        *q_D = sub( q_tmp2, 1 );
        move16();
    }
#else
// #else
#if 0
    Word16 chA, chB, ch;
    FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
    {
@@ -3710,6 +3711,7 @@ static void eig2x2_fx(
            move32();
        }
    }
#endif

    // ===================================================================================================
    /*crossSquare_fx = (c_re * c_re) + (c_im * c_im)
@@ -3747,13 +3749,16 @@ static void eig2x2_fx(
        move16();
    }

#if 0
    Word32 crossSquare_fx = (Word32) ( cs >> 32 ); // FIXME
    Word16 q_crossSquare = 2 * q_C + lshift - 32;  // FIXME
#endif
    Word16 csq = 2 * q_C + lshift; // FIXME

    // a = max(0, (e1 - e2)^2 + 4 * crossSquare_fx)
    qm = s_min( q1, q2 );
    qd = sub( q1, q2 );
    Word64 a = W_max( W_add( W_shr( es2, s_max( qd, 0 ) ), W_shr( cs, negate( s_min( qd, 0 ) ) ) ), 0 );
    Word64 a = W_max( W_add( W_shr( es2, s_max( qd, 0 ) ), W_shl( cs, s_min( qd, 0 ) ) ), 0 );

    // pm = 0.5f * sqrtf(a)
    // a = 0.5f * ( E1 + E2 );
@@ -3762,29 +3767,32 @@ static void eig2x2_fx(
    Word16 e = sub( sub( 63, lshift ), qm );
    pm = L_shr( Sqrt32( pm, &e ), 1 );
    q2 = sub( 31, e );
    // check( pm, q2, pm_fx, q_tmp2, 1 << 16 );
    check( pm, q2, pm_fx, q_tmp2, 1 << 16 );
    Word32 ea = L_add( E1_fx, E2_fx );
    lshift = sub( norm_l( ea ), 1 );
    ea = L_shl( ea, lshift );
    q1 = add( add( q_E, 1 ), lshift );
    // check( ea, q1, add_fx, q_tmp1, 1 << 16 );
    check( ea, q1, add_fx, q_tmp1, 1 << 16 );

#if 0
    Word32 add_fx = ea; // FIXME
    Word16 q_tmp1 = q1; // FIXME
    Word32 pm_fx = pm;  // FIXME
    Word16 q_tmp2 = q2; // FIXME
#endif

    // D[0] = add + pm;
    // D[1] = max( 0.0f, add - pm );
    qm = s_min( q1, q2 );
    qd = sub( q1, q2 );
    ea = L_shr( ea, s_max( qd, 0 ) );
    pm = L_shr( pm, negate( s_min( qd, 0 ) ) );
    pm = L_shl( pm, s_min( qd, 0 ) );
    Word32 d0 = L_add( ea, pm );
    Word32 d1 = L_max( L_sub( ea, pm ), 0 );
    // check( d0, qm, D_fx[0], *q_D, 1 << 16 );
    // check( d1, qm, D_fx[1], *q_D, 1 << 16 );
    check( d0, qm, D_fx[0], *q_D, 1 << 16 );
    check( d1, qm, D_fx[1], *q_D, 1 << 16 );

#if 0
    D_fx[0] = d0; // FIXME
    D_fx[1] = d1; // FIXME
    *q_D = qm;    // FIXME
@@ -3804,6 +3812,8 @@ static void eig2x2_fx(
    e2 = L_shl( E2_fx, exp );                            // FIXME
    q_e = add( q_E, exp );                               // FIXME
#endif
    // #endif

    /* Numeric case, when input is practically zeros */
    // IF( D_fx[0] < EPSILON_FX )

@@ -3875,6 +3885,64 @@ static void eig2x2_fx(
            move16();
        }

        // D[ch] - E1
        // D[ch] - E2

        Word32 arg0, arg1, arg2;
        Word32 z1, z2, ad, s;

        qm = s_min( *q_D, q_e );
        qd = sub( *q_D, q_e );

        arg0 = L_shr( D_fx[ch], s_max( qd, 0 ) );
        arg1 = L_shl( e1, s_min( qd, 0 ) );
        arg2 = L_shl( e2, s_min( qd, 0 ) );
        z1 = L_sub( arg0, arg1 );
        z2 = L_sub( arg0, arg2 );

        check( z1, qm, tmp1, q_tmp1, 1 << 1 );
        check( z2, qm, tmp2, q_tmp1, 1 << 1 );

        ad = L_sub( L_abs( z2 ), L_abs( z1 ) );
        if ( ad > 0 )
        {
            s = z2;
        }
        if ( ad <= 0 )
        {
            s = z1;
        }

        Word64 epsm = ( (Word64) 1180591621 ) << 32; // FIXME
        Word16 epsq = 63 - epsilon_exp;              // FIXME

        Word16 sq = qm;
        Word16 s2q = L_shl( sq, 1 );
        qm = s_min( s2q, csq );
        qm = s_min( qm, epsq );

        Word64 nval64m;
        Word64 nval32m;
        Word16 nvalq, nvale;
        nval64m = W_shr( W_mult0_32_32( s, s ), L_sub( s2q, qm ) );
        nval64m = W_add( nval64m, W_shr( cs, L_sub( csq, qm ) ) );
        nval64m = W_add( nval64m, W_shr( epsm, L_sub( epsq, qm ) ) );
        lshift = W_norm( nval64m );
        nval64m = W_shl( nval64m, lshift );
        nvalq = add( qm, lshift );

        Word32 test0m = W_extract_h( nval64m ); // FIXME
        Word32 test0q = nvalq;                  // FIXME

        nval32m = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( nval64m ), &nvale );
        nvale = sub( nvale, sub( Q30, nvalq - 32 ) );
        nval32m = Sqrt32( nval32m, &nvale );
        nvalq = sub( 31, nvale );

        Word32 test1m = nval32m; // FIXME
        Word32 test1q = nvalq;   // FIXME


        IF( GT_32( L_abs( tmp2 ), L_abs( tmp1 ) ) )
        {
            s_fx = tmp2;
@@ -3888,11 +3956,15 @@ static void eig2x2_fx(

            tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 );

            check( test0m, test0q, tmp3, 31 - exp_tmp3, 1 << 18 ); // TODO: remove

            tmp2 = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, tmp3, &exp );
            exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) );
            normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2
            q_tmp2 = sub( 31, exp );

            check( test1m, test1q, normVal_fx, q_tmp2, 1 << 18 ); // TODO: remove

            q_diff = sub( q_c, q_tmp1 );
            IF( q_diff > 0 )
            {
@@ -3959,11 +4031,15 @@ static void eig2x2_fx(

            tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 );

            check( test0m, test0q, tmp3, 31 - exp_tmp3, 1 << 18 ); // TODO: remove

            tmp2 = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, tmp3, &exp );
            exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) );
            normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2
            q_tmp2 = sub( 31, exp );

            check( test1m, test1q, normVal_fx, q_tmp2, 1 << 18 ); // TODO: remove

            q_diff = sub( q_c, q_tmp1 );
            IF( q_diff > 0 )
            {