Commit b9498039 authored by Nicolas Roussin's avatar Nicolas Roussin
Browse files

Remove eig2x2_fx optimisation.

parent d3508f9d
Loading
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -115,7 +115,6 @@
#define OPT_2182_MATRIX_SCALE_OPS           /* Dolby: Issue 2181, move matrix scale operations outside mul operations. */
#define OPT_2185_MATRIX_OUT_SCALING         /* Dolby: Issue 2185, optimize matrix-mul output-format. */
#define NONBE_OPT_2239_IVAS_FILTER_PROCESS  /* Dolby: Issue 2239, optimize ivas_filter_process_fx. */
#define NONBE_OPT_2193_EIG2X2               /* Dolby: Issue 2193, optimize eig2x2_fx. */
#define BE_FIX_2240_COMPUTE_COV_MTC_FX_FAST /* FhG: Speeds up covariance calculation e.g. 60 WMOPS for encoding -mc 7_1_4 24400 48 */

/* #################### End BASOP optimization switches ############################ */
+3 −329
Original line number Diff line number Diff line
@@ -3495,19 +3495,6 @@ static void ivas_dirac_dec_binaural_check_and_switch_transports_headtracked_fx(
    return;
}

#ifdef NONBE_OPT_2193_EIG2X2
static Word32 eig2x2_div_fx( Word32 num, Word32 den );

static Word32 eig2x2_div_fx( Word32 num, Word32 den )
{
    IF( EQ_32( den, 0x40000000 ) )
    {
        return num;
    }
    return div_w_newton( num, den );
}
#endif

static void eig2x2_fx(
    const Word32 E1_fx, /*q_E*/
    const Word32 E2_fx, /*q_E*/
@@ -3521,319 +3508,6 @@ static void eig2x2_fx(
    Word32 D_fx[BINAURAL_CHANNELS], /*q_D*/
    Word16 *q_D )
{
#ifdef NONBE_OPT_2193_EIG2X2
    Word32 pm_fx, add_fx;
    Word32 tmp1, tmp2, e1, e2, c_re, c_im, c0_im, c1_im;
    Word32 s0_fx, s1_fx, nval0_fx, nval1_fx;
    Word64 crossSquare_fx, tmp3, tmp4;
    Word16 q_crossSquare, q_min, q_diff, q_tmp1, q_tmp2, exp, q_e, q_c;
    Word16 nval0_q, nval1_q;
    Word32 i01, i00, i11, i10;
    Word64 eps_fx = ( (Word64) EPSILON_MANT ) << 32;
    Word16 eps_q = 63 - EPSILON_EXP;
    move32();
    move16();

    set32_fx( (Word32 *) Ure_fx, 0, BINAURAL_CHANNELS * BINAURAL_CHANNELS );
    set32_fx( (Word32 *) Uim_fx, 0, BINAURAL_CHANNELS * BINAURAL_CHANNELS );

    exp = sub( get_min_scalefactor( Cre_fx, Cim_fx ), 2 );
    c_re = L_shl( Cre_fx, exp );
    c_im = L_shl( Cim_fx, exp );
    q_c = add( q_C, exp );

    exp = sub( get_min_scalefactor( E1_fx, E2_fx ), 2 );
    e1 = L_shl( E1_fx, exp );
    e2 = L_shl( E2_fx, exp );
    q_e = add( q_E, exp );

    // crossSquare_fx = (c_re * c_re) + (c_im * c_im)
    // a_fx = (e1 + e2) * (e1 + e2) - 4.0f * ((e1 * e2) - crossSquare_fx) = (e1 - e2)^2 + 4 * crossSquare_fx
    // pm_fx = 0.5f * sqrtf(max(0.0f, a_fx))
    // add_fx = 0.5f * (e1 + e2)

    tmp1 = L_sub( e1, e2 );
    tmp3 = W_mult_32_32( tmp1, tmp1 );
    q_tmp1 = add( add( q_e, q_e ), 1 );
    if ( !tmp3 )
    {
        q_tmp1 = 63;
        move16();
    }

    crossSquare_fx = W_mac_32_32( W_mult_32_32( c_re, c_re ), c_im, c_im );
    q_crossSquare = add( add( q_c, q_c ), 1 );
    if ( !crossSquare_fx )
    {
        q_crossSquare = 63;
        move16();
    }

    tmp4 = crossSquare_fx;
    move64();
    q_tmp2 = sub( q_crossSquare, 2 );
    if ( !tmp4 )
    {
        q_tmp2 = 63;
        move16();
    }

    q_diff = sub( q_tmp1, q_tmp2 );
    q_tmp1 = s_min( q_tmp1, q_tmp2 );
    if ( q_diff > 0 )
    {
        tmp3 = W_shr( tmp3, q_diff );
    }
    if ( q_diff < 0 )
    {
        tmp4 = W_shl( tmp4, q_diff );
    }
    tmp3 = W_add( tmp3, tmp4 );
    q_diff = W_norm( tmp3 );
    tmp3 = W_shl( tmp3, q_diff );
    q_tmp1 = add( q_tmp1, q_diff );

    // pm_fx = 0.5f * sqrtf(max(0.0f, a_fx))
    exp = sub( 63, q_tmp1 );
    pm_fx = Sqrt32( L_max( 0, W_extract_h( tmp3 ) ), &exp );
    pm_fx = L_shr( pm_fx, 1 );
    q_tmp2 = sub( 31, exp );

    // add_fx = 0.5 * (e1 + e2)
    add_fx = L_shr( L_add( e1, e2 ), 1 );
    q_tmp1 = q_e;
    move16();

    // D[0] = add + pm;
    // D[1] = max( 0.0f, add - pm );

    q_diff = sub( q_tmp1, q_tmp2 );

    tmp1 = add_fx;
    move32();
    if ( q_diff > 0 )
    {
        tmp1 = L_shr( tmp1, q_diff );
    }

    tmp2 = pm_fx;
    move32();
    if ( q_diff < 0 )
    {
        tmp2 = L_shl( tmp2, q_diff );
    }

    D_fx[0] = L_add( tmp1, tmp2 );
    move32();
    D_fx[1] = L_max( L_sub( tmp1, tmp2 ), 0 );
    move32();
    *q_D = s_min( q_tmp1, q_tmp2 );
    move32();

    // Numeric case, when input is practically zeros
    // if ( D_fx[0] < EPSILON_FX )

    IF( LT_32( L_shl_sat( D_fx[0], sub( 31 - EPSILON_EXP, *q_D ) ), EPSILON_MANT ) )
    {
        Ure_fx[0][0] = ONE_IN_Q30;
        move32();
        Ure_fx[1][1] = ONE_IN_Q30;
        move32();
        *q_U = Q30;
        move16();
        return;
    }

    // Numeric case, when input is near an identity matrix with a gain
    tmp1 = Mpy_32_32( INV_1000_Q31, add_fx );
    if ( q_diff > 0 )
    {
        tmp1 = L_shr( tmp1, q_diff );
    }

    IF( LT_32( tmp2, tmp1 ) )
    {
        Ure_fx[0][0] = ONE_IN_Q30;
        move32();
        Ure_fx[1][1] = ONE_IN_Q30;
        move32();
        *q_U = Q30;
        move16();
        return;
    }

    // Eigenvectors

    q_diff = sub( q_e, *q_D );
    q_tmp1 = s_min( q_e, *q_D );

    tmp1 = D_fx[0];
    move32();
    if ( q_diff > 0 )
    {
        tmp1 = L_shr( tmp1, q_diff );
    }

    tmp2 = D_fx[1];
    move32();
    if ( q_diff > 0 )
    {
        tmp2 = L_shr( tmp2, q_diff );
    }

    if ( q_diff < 0 )
    {
        e1 = L_shl( e1, q_diff );
    }

    if ( q_diff < 0 )
    {
        e2 = L_shl( e2, q_diff );
    }

    s0_fx = L_sub( tmp1, e1 ); // D_fx[0] - e1
    tmp1 = L_sub( tmp1, e2 );  // D_fx[0] - e2
    s1_fx = L_sub( tmp2, e1 ); // D_fx[1] - e1
    tmp2 = L_sub( tmp2, e2 );  // D_fx[1] - e2

    i01 = GT_32( L_abs( tmp1 ), L_abs( s0_fx ) ); // fabsf( D_fx[0] - e2 ) > fabsf( D_fx[0] - e1 )
    i11 = GT_32( L_abs( tmp2 ), L_abs( s1_fx ) ); // fabsf( D_fx[1] - e2 ) > fabsf( D_fx[1] - e1 )

    if ( i01 )
    {
        s0_fx = tmp1;
        move32();
    }

    if ( i11 )
    {
        s1_fx = tmp2;
        move32();
    }

    // normVal = sqrtf( 1.0f / ( 1e-12f + crossSquare + s * s ) );

    q_tmp2 = shl( q_tmp1, 1 );
    q_min = s_min( q_tmp2, q_crossSquare );
    q_min = s_min( q_min, eps_q );

    q_diff = sub( q_tmp2, q_min );
    tmp3 = W_shr( W_mult0_32_32( s0_fx, s0_fx ), q_diff );
    tmp4 = W_shr( W_mult0_32_32( s1_fx, s1_fx ), q_diff );

    q_diff = sub( q_crossSquare, q_min );
    crossSquare_fx = W_shr( crossSquare_fx, q_diff );
    tmp3 = W_add( tmp3, crossSquare_fx );
    tmp4 = W_add( tmp4, crossSquare_fx );

    q_diff = sub( eps_q, q_min );
    eps_fx = W_shr( eps_fx, q_diff );
    tmp3 = W_add( tmp3, eps_fx );
    tmp4 = W_add( tmp4, eps_fx );

    q_diff = W_norm( tmp3 );
    tmp3 = W_shl( tmp3, q_diff );
    nval0_q = add( q_min, q_diff );

    q_diff = W_norm( tmp4 );
    tmp4 = W_shl( tmp4, q_diff );
    nval1_q = add( q_min, q_diff );

    // nval0_fx = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( tmp3 ), &exp );
    // exp = sub( exp, sub( 62, nval0_q ) );
    //
    // is equivalent to:
    //
    // nval0_fx = div_w_newton( ONE_IN_Q30, W_extract_h( tmp3 ) );
    // exp = sub( nval0_q, 61 );

    nval0_fx = eig2x2_div_fx( ONE_IN_Q30, W_extract_h( tmp3 ) );
    exp = sub( nval0_q, 61 );
    nval0_fx = Sqrt32( nval0_fx, &exp );
    nval0_q = sub( 31, exp );

    // nval1_fx = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( tmp4 ), &exp );
    // exp = sub( exp, sub( 62, nval1_q ) );
    //
    // is equivalent to:
    //
    // nval1_fx = div_w_newton( ONE_IN_Q30, W_extract_h( tmp4 ) );
    // exp = sub( nval1_q, 61 );

    nval1_fx = eig2x2_div_fx( ONE_IN_Q30, W_extract_h( tmp4 ) );
    exp = sub( nval1_q, 61 );
    nval1_fx = Sqrt32( nval1_fx, &exp );
    nval1_q = sub( 31, exp );

    q_diff = sub( q_c, q_tmp1 );
    q_tmp1 = s_min( q_tmp1, q_c );

    if ( q_diff > 0 )
    {
        c_re = L_shr( c_re, q_diff );
    }

    if ( q_diff > 0 )
    {
        c_im = L_shr( c_im, q_diff );
    }

    if ( q_diff < 0 )
    {
        s0_fx = L_shl( s0_fx, q_diff );
    }

    if ( q_diff < 0 )
    {
        s1_fx = L_shl( s1_fx, q_diff );
    }

    q_diff = sub( nval0_q, nval1_q );
    q_tmp2 = s_min( nval0_q, nval1_q );

    if ( q_diff > 0 )
    {
        nval0_fx = L_shr( nval0_fx, q_diff );
    }

    if ( q_diff < 0 )
    {
        nval1_fx = L_shl( nval1_fx, q_diff );
    }

    *q_U = sub( add( q_tmp1, q_tmp2 ), 31 );

    i00 = L_sub( 1, i01 );
    i10 = L_sub( 1, i11 );

    c0_im = c_im;
    move32();
    if ( i00 > 0 )
    {
        c0_im = L_negate( c0_im );
    }

    c1_im = c_im;
    move32();
    if ( i10 > 0 )
    {
        c1_im = L_negate( c1_im );
    }

    Ure_fx[i00][0] = Mpy_32_32( s0_fx, nval0_fx );
    move32();
    Ure_fx[i01][0] = Mpy_32_32( c_re, nval0_fx );
    move32();
    Uim_fx[i01][0] = Mpy_32_32( c0_im, nval0_fx );
    move32();

    Ure_fx[i10][1] = Mpy_32_32( s1_fx, nval1_fx );
    move32();
    Ure_fx[i11][1] = Mpy_32_32( c_re, nval1_fx );
    move32();
    Uim_fx[i11][1] = Mpy_32_32( c1_im, nval1_fx );
    move32();
#else
    Word16 chA, chB, ch;
    Word32 s_fx, normVal_fx, crossSquare_fx, a_fx, pm_fx, add_fx;
    Word32 tmp1, tmp2, tmp3, e1, e2, c_re, c_im;
@@ -4200,7 +3874,7 @@ static void eig2x2_fx(
        *q_U = q_U_2;
        move16();
    }
#endif

    return;
}