Commit aff97922 authored by Manuel Jander's avatar Manuel Jander
Browse files

Couple use of BASOP_Util_Inv32 to macro FIX_1010_OPT_INV_USING_INVSQRT and...

Couple use of BASOP_Util_Inv32 to macro FIX_1010_OPT_INV_USING_INVSQRT and disable to to improve accuracy.
parent 78d8e83c
Loading
Loading
Loading
Loading
Loading
+54 −11
Original line number Diff line number Diff line
@@ -53,6 +53,7 @@

#if 1
#define FIX_1010_OPT_DIV
// #define FIX_1010_OPT_INV_USING_INVSQRT
#define FIX_1010_OPT_SINGLE_RESCALE
#define FIX_1010_OPT_GIVENS
#define FIX_1010_OPT_GIVENS_INV
@@ -1201,7 +1202,7 @@ static void HouseholderReduction_fx(
 *
 *-------------------------------------------------------------------------*/

#ifdef FIX_1010_OPT_DIV
#ifdef FIX_1010_OPT_INV_USING_INVSQRT
static Word32 BASOP_Util_Inv32( Word32 x, Word16 *px_e )
{
    Word16 sign, shift, shift2;
@@ -1314,8 +1315,11 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
#ifdef FIX_1010_OPT_DIV
        Word16 invVal_e;
        Word32 invVal;
        /* BASOP_Util_Inv32 is not accurate enogh in this case. */
#ifdef FIX_1010_OPT_INV_USING_INVSQRT
        invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e );
#else
        invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e );
#endif
#endif
        norm_x = 0;
        move32();
@@ -1324,11 +1328,19 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
        FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
        {
#ifndef FIX_1010_OPT_DIV
#ifndef FIX_1010_OPT_SINGLE_RESCALE
            singularVectors[jCh][currChannel] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[jCh][currChannel], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
            move32();
            sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) );
            move16();
            norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
#else
            singularVectors[jCh][currChannel] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[jCh][currChannel], maxWithSign_fx( *sig_x ), &L_temp_e ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
            move32();
            singularVectors2_e[jCh][currChannel] = add( L_temp_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) );
            move16();
            norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( singularVectors2_e[jCh][currChannel], 1 ), &norm_x_e ); /* exp(norm_x_e) */
#endif
#else
            Word16 temp_e = norm_l( singularVectors[jCh][currChannel] );
            singularVectors[jCh][currChannel] = Mpy_32_32( L_shl( singularVectors[jCh][currChannel], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
@@ -1377,7 +1389,11 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
        move32();

#ifdef FIX_1010_OPT_DIV
#ifdef FIX_1010_OPT_INV_USING_INVSQRT
        invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e );
#else
        invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
#endif
#endif

        FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
@@ -1529,21 +1545,33 @@ static void biDiagonalReductionRight_fx(

#ifdef FIX_1010_OPT_DIV
            Word16 invVal_e, temp_e;
            Word32 invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e );
            Word32 invVal;
#ifdef FIX_1010_OPT_INV_USING_INVSQRT
            invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e );
#else
            invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e );
#endif
#endif
            FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */
            {
#ifndef FIX_1010_OPT_DIV
#ifndef FIX_1010_OPT_SINGLE_RESCALE
                singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e))  */
                move32();
                sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) );
                move16();
                norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
#else
                singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &L_temp_e ); /* exp(sing_exp + (singularVectors_e - sig_x_e))  */
                move32();
                singularVectors2_e[currChannel][jCh] = add( L_temp_e, sub( singularVectors2_e[currChannel][jCh], *sig_x_e ) );
                move16();
                norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( singularVectors2_e[currChannel][jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
#endif
#else
                temp_e = norm_l( singularVectors[currChannel][jCh] );
                singularVectors[currChannel][jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
                move32();

#ifndef FIX_1010_OPT_SINGLE_RESCALE
                sing_exp[jCh] = add( sub( invVal_e, temp_e ), sub( *singularVectors_e, *sig_x_e ) );
                move16();
@@ -1587,16 +1615,27 @@ static void biDiagonalReductionRight_fx(
            move32();

#ifdef FIX_1010_OPT_DIV
#ifdef FIX_1010_OPT_INV_USING_INVSQRT
            invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e );
#else
            invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
#endif
#endif

            FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
            {
#ifndef FIX_1010_OPT_DIV
#ifndef FIX_1010_OPT_SINGLE_RESCALE
                secDiag[jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( r ), &secDiag_exp[jCh] ); /* exp(secDiag_exp + (sing_exp - r_e) */
                move32();
                secDiag_exp[jCh] = add( secDiag_exp[jCh], sub( sing_exp[jCh], r_e ) );
                move32();
#else
                secDiag[jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( r ), &secDiag_exp[jCh] ); /* exp(secDiag_exp + (sing_exp - r_e) */
                move32();
                secDiag_exp[jCh] = add( secDiag_exp[jCh], sub( singularVectors2_e[currChannel][jCh], r_e ) );
                move32();
#endif
#else
                temp_e = norm_l( singularVectors[currChannel][jCh] );
                secDiag[jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
@@ -1736,7 +1775,11 @@ static void singularVectorsAccumulationLeft_fx(
        IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/
        {
#ifdef FIX_1010_OPT_DIV
#ifdef FIX_1010_OPT_INV_USING_INVSQRT
            t_ii = BASOP_Util_Inv32( maxWithSign_fx( t_ii ), &temp_exp );
#else
            t_ii = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( t_ii ), &temp_exp );
#endif
            t_ii_e = sub( temp_exp, t_ii_e );
#else
            t_ii = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, maxWithSign_fx( t_ii ), &temp_exp ); /* exp(1 + (temp_exp + tii_e)) */
@@ -1753,7 +1796,7 @@ static void singularVectorsAccumulationLeft_fx(
                {
                    norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( sing_exp2[k][nCh], sing_exp2[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */
                }
#ifdef FIX_1010_OPT_DIV
#ifdef FIX_1010_OPT_INV_USING_INVSQRT
                Word16 temp_e = norm_l( singularVectors_Left[nCh][nCh] );
                t_jj = BASOP_Util_Inv32( maxWithSign_fx( L_shl( singularVectors_Left[nCh][nCh], temp_e ) ), &temp_exp );
                t_jj = Mpy_32_32( Mpy_32_32( t_ii, norm_y ), t_jj );