Commit 707aa428 authored by Manuel Jander's avatar Manuel Jander
Browse files

Activate division optimizations except one which for some reasons causes more...

Activate division optimizations except one which for some reasons causes more error in testset. Optimize get_alpha_beta() index calculation, more precision and less WMOPS.
parent ee9a5224
Loading
Loading
Loading
Loading
Loading
+18 −9
Original line number Diff line number Diff line
@@ -52,7 +52,7 @@
#define CONVERGENCE_FACTOR_FX       214748 /* factor for SVD convergence (as per latest float code: 1.0e-04f) */

#if 1
//#define FIX_1010_OPT_DIV
#define FIX_1010_OPT_DIV

#define FIX_1010_OPT_GIVENS
#define FIX_1010_OPT_GIVENS_INV
@@ -1430,7 +1430,7 @@ static void biDiagonalReductionRight_fx(
#endif
            FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */
            {
#ifndef FIX_1010_OPT_DIV
#ifndef FIX_1010_OPT_DIV_no
                singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e))  */
#else
                temp_e = norm_l( singularVectors[currChannel][jCh] );
@@ -1773,7 +1773,7 @@ static void singularVectorsAccumulationRight_fx(
#ifndef M_PI
#define M_PI 3.141592653589793 
#endif
#define NUM_REGIONS 1024
#define NUM_REGIONS 32
static Word32 alphaBeta[NUM_REGIONS][2];
static void get_alpha_beta(Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 *alpha, Word32 *beta)
{
@@ -1799,13 +1799,22 @@ static void get_alpha_beta(Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 *a
    pf = (float)p * powf(2.f, p_e-31);
    qf = (float)q * powf(2.f, q_e-31);
    r = floor((double)NUM_REGIONS * 4. * atan2f(qf, pf)/M_PI);
#else
    shift = sub(p_e, q_e);
    r = mult_r( atan2_fx(L_shr(q, s_max(0, shift)), L_shr(p, s_max(0, negate(shift)))), FL2WORD16_SCALE((float)NUM_REGIONS*4./M_PI, 14));
#endif
    if (r == NUM_REGIONS) {
    if (r >= NUM_REGIONS) {
        r =  NUM_REGIONS-1;
    }
#elif 1
    shift = sub(norm_l(q),1);
    q = L_shl(q, shift);
    q_e = sub(q_e, shift);
    shift = norm_l(p);
    p = L_shl(p, shift);
    p_e = sub(p_e, shift);
    shift = sub(q_e, p_e);
    r = shl(div_s(extract_h(q), extract_h(p)), shift);
    /* Second order polyfit of atan(r)/(pi/4) for r=0..1 */
    r = add(add(mult(mult(r,r), FL2WORD16_SCALE(-3.672563685340096e-01, 3)), mult(r, FL2WORD16_SCALE(1.375369641423651e+00, 3))), FL2WORD16_SCALE(-6.529424378422714e-03, 3));
    r = s_min(s_max(0, shr(r, 4+3)), NUM_REGIONS-1);
#endif
    assert((r >= 0) && (r < NUM_REGIONS));
    *alpha = alphaBeta[r][0];
    *beta = alphaBeta[r][1];
@@ -1840,7 +1849,7 @@ static void GivensRotation2_fx(
    move32();
#if 1
    *outInv_e = shl(*out_e, 1);
    *resultInv = ISqrt32( Mpy_32_32(r, r), outInv_e );
    *resultInv = ISqrt32( L_max(1, Mpy_32_32(r, r)), outInv_e );
    move32();
#else
    *resultInv = L_deposit_h(BASOP_Util_Divide3232_Scale(MAX_32, r, outInv_e));