Commit 75d25b05 authored by Manuel Jander's avatar Manuel Jander
Browse files

Tune normalizations under the scope of FIX_1010_OPT_DIV. Increase AMAXBMIN...

Tune normalizations under the scope of FIX_1010_OPT_DIV. Increase AMAXBMIN interval count to better match reference.
parent 82157584
Loading
Loading
Loading
Loading
Loading
+10 −32
Original line number Diff line number Diff line
@@ -56,7 +56,7 @@

#define FIX_1010_OPT_GIVENS
#define FIX_1010_OPT_GIVENS_INV
//#define FIX_1010_OPT_GIVENS_AMAX_BMIN
#define FIX_1010_OPT_GIVENS_AMAX_BMIN
#endif

/*-----------------------------------------------------------------------*
@@ -997,7 +997,7 @@ static void ApplyQRTransform_fx(
        singularValues[ch] = GivensRotation_fx( d, d_e, r, r_e, &singularValues_e[ch] ); /* exp(singularValues_e) */
        move32();
#endif
        IF( GT_32( L_abs( singularValues[ch] ), Mpy_32_32( CONVERGENCE_FACTOR_FX, L_abs( singularValues[ch] ) ) ) )
        IF (singularValues[ch] != 0)
        {
#ifndef FIX_1010_OPT_GIVENS_INV
            aux = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, singularValues[ch], &aux_e ); /* exp(aux_e + (1 - singularValues_e)) */
@@ -1154,7 +1154,7 @@ static void HouseholderReduction_fx(
#ifdef FIX_1010_OPT_DIV
static Word32 BASOP_Util_Inv32( Word32 x, Word16 *px_e )
{
    Word16 sign, shift;
    Word16 sign, shift, shift2;

    sign = 0;
    move16();
@@ -1173,7 +1173,9 @@ static Word32 BASOP_Util_Inv32( Word32 x, Word16 *px_e )
    move16();
    x = ISqrt32norm( x, px_e );
    x = Mpy_32_32( x, x );
    *px_e = add( shl( *px_e, 1 ), shift );
    shift2 = norm_l( x );
    x = L_shl( x, shift2 );
    *px_e = add( shl( *px_e, 1 ), sub(shift, shift2) );
    move16();

    if ( sign )
@@ -1252,9 +1254,6 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
#ifdef FIX_1010_OPT_DIV
        Word16 invVal_e, temp_e;
        Word32 invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e );
        temp_e = norm_l( invVal );
        invVal = L_shl( invVal, temp_e );
        invVal_e = sub( invVal_e, temp_e );
#endif
        norm_x = 0;
        move32();
@@ -1269,9 +1268,6 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
            singularVectors[jCh][currChannel] = L_shl( singularVectors[jCh][currChannel], temp_e );
            singularVectors[jCh][currChannel] = Mpy_32_32( singularVectors[jCh][currChannel], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
            sing_exp[jCh] = sub( invVal_e, temp_e );
            temp_e = norm_l( singularVectors[jCh][currChannel] );
            singularVectors[jCh][currChannel] = L_shl( singularVectors[jCh][currChannel], temp_e );
            sing_exp[jCh] = sub( sing_exp[jCh], temp_e );
            move16();
#endif
            move32();
@@ -1308,9 +1304,6 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */

#ifdef FIX_1010_OPT_DIV
        invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e );
        temp_e = norm_l( invVal );
        invVal = L_shl( invVal, temp_e );
        invVal_e = sub( invVal_e, temp_e );
#endif

        FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
@@ -1330,9 +1323,6 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
#else
            f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
            f_e = add( invVal_e, sub( norm_x_e, r_e ) );
            temp_e = norm_l( f );
            f = L_shl( f, temp_e );
            f_e = sub( f_e, temp_e );
#endif

            FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
@@ -1442,13 +1432,10 @@ static void biDiagonalReductionRight_fx(
#ifdef FIX_1010_OPT_DIV
            Word16 invVal_e, temp_e;
            Word32 invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e );
            temp_e = norm_l( invVal );
            invVal = L_shl( invVal, temp_e );
            invVal_e = sub( invVal_e, temp_e );
#endif
            FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */
            {
#ifndef FIX_1010_OPT_DIV_no
#ifndef FIX_1010_OPT_DIV
                singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e))  */
#else
                temp_e = norm_l( singularVectors[currChannel][jCh] );
@@ -1456,9 +1443,6 @@ static void biDiagonalReductionRight_fx(
                singularVectors[currChannel][jCh] = Mpy_32_32( singularVectors[currChannel][jCh], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
                sing_exp[jCh] = sub( invVal_e, temp_e );
                move16();
                temp_e = norm_l( singularVectors[currChannel][jCh] );
                singularVectors[currChannel][jCh] = L_shl( singularVectors[currChannel][jCh], temp_e );
                sing_exp[jCh] = sub( sing_exp[jCh], temp_e );
#endif
                move32();
                sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) );
@@ -1493,9 +1477,6 @@ static void biDiagonalReductionRight_fx(

#ifdef FIX_1010_OPT_DIV
            invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e );
            temp_e = norm_l( invVal );
            invVal = L_shl( invVal, temp_e );
            invVal_e = sub( invVal_e, temp_e );
#endif

            FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
@@ -1507,9 +1488,6 @@ static void biDiagonalReductionRight_fx(
                secDiag[jCh] = L_shl( singularVectors[currChannel][jCh], temp_e );
                secDiag[jCh] = Mpy_32_32( secDiag[jCh], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
                secDiag_exp[jCh] = sub( invVal_e, temp_e );
                temp_e = norm_l( secDiag[jCh] );
                secDiag[jCh] = L_shl( secDiag[jCh], temp_e );
                secDiag_exp[jCh] = sub( secDiag_exp[jCh], temp_e );
                move16();
#endif
                move32();
@@ -1791,7 +1769,7 @@ static void singularVectorsAccumulationRight_fx(
#ifndef M_PI
#define M_PI 3.141592653589793
#endif
#define NUM_REGIONS 32
#define NUM_REGIONS 128
static Word32 alphaBeta[NUM_REGIONS][2];
static void get_alpha_beta( Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 *alpha, Word32 *beta )
{
@@ -1830,10 +1808,10 @@ static void get_alpha_beta( Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 *
    p = L_shl( p, shift );
    p_e = sub( p_e, shift );
    shift = sub( q_e, p_e );
    r = shl( div_s( extract_h( q ), extract_h( p ) ), shift );
    r = shl_sat( div_s( extract_h( q ), extract_h( p ) ), shift );
    /* Second order polyfit of atan(r)/(pi/4) for r=0..1 */
    r = add( add( mult( mult( r, r ), FL2WORD16_SCALE( -3.672563685340096e-01, 3 ) ), mult( r, FL2WORD16_SCALE( 1.375369641423651e+00, 3 ) ) ), FL2WORD16_SCALE( -6.529424378422714e-03, 3 ) );
    r = s_min( s_max( 0, shr( r, 4 + 3 ) ), NUM_REGIONS - 1 );
    r = s_min( s_max( 0, shr( r, WORD16_BITS-1-7-3 ) ), NUM_REGIONS - 1 );
#endif
    assert( ( r >= 0 ) && ( r < NUM_REGIONS ) );
    *alpha = alphaBeta[r][0];