Commit 6ea437ed authored by thomas dettbarn's avatar thomas dettbarn
Browse files

initial revision: Added two new functions biDiagonalReductionLeft_64() and...

initial revision: Added two new functions biDiagonalReductionLeft_64() and biDiagonalReductionRight_64() to replace biDiagonalReductionLeft_fx() and biDiagonalReductionRight_fx().
parent b480684c
Loading
Loading
Loading
Loading
+324 −1
Original line number Diff line number Diff line
@@ -29,7 +29,7 @@
   the United Nations Convention on Contracts on the International Sales of Goods.

*******************************************************************************************************/

#define	MYCHANGES
#include <stdint.h>
#include "options.h"
#include "prot_fx.h"
@@ -65,7 +65,27 @@ static void HouseholderReduction_fx(
    const Word16 nChannelsC, /* Q0 */
    Word32 *eps_x_fx,        /* exp(eps_x_fx_e) */
    Word16 *eps_x_fx_e );
#ifdef MYCHANGES
static void biDiagonalReductionLeft_64(
    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
    Word16 bitwindow,
    Word32 singularValues[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
    Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
    const Word16 nChannelsL,  /* Q0 */
    const Word16 nChannelsC,  /* Q0 */
    const Word16 currChannel /* Q0 */
);

static void biDiagonalReductionRight_64(
    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
    Word16 bitwindow,
    const Word16 nChannelsL,  /* Q0 */
    const Word16 nChannelsC,  /* Q0 */
    const Word16 currChannel, /* Q0 */
    Word32 *g, /* Q31 */
    Word16 *g_e
);
#else
static void biDiagonalReductionLeft_fx(
    Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
    Word32 singularValues[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
@@ -93,6 +113,7 @@ static void biDiagonalReductionRight_fx(
    Word16 *sig_x_e,
    Word32 *g /* Q31 */
);            // Q31
#endif

static void singularVectorsAccumulationLeft_fx(
    Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) as Input, Q31 as output */
@@ -853,15 +874,88 @@ static void HouseholderReduction_fx(
    Word16 *eps_x_fx_e )
{
    Word16 nCh;
#ifdef	MYCHANGES

	Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
    Word32 g_fx = 0;
    Word16 g_e = 0;
    move32();
    move16();

#else

    // float g = 0.0f, sig_x = 0.0f;// to be removed
    Word32 g_fx = 0, sig_x_fx = 0;
    move32();
    move32();
    Word16 sig_x_fx_e = 0;
    move16();
#endif

    Word16 iCh, jCh;
    Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
#ifdef	 MYCHANGES
	push_wmops("HouseholderReduction_fx 64");
    FOR( jCh = 0; jCh < nChannelsL; jCh++ )
    {
        FOR( iCh = 0; iCh < nChannelsC; iCh++ )
        {
            singularVectors_Left_64[jCh][iCh] = W_shr(W_deposit32_h(singularVectors_Left_fx[jCh][iCh]),32);
        }
    }
    for (nCh=0;nCh<nChannelsC;nCh++)
    {
	    Word16 bitwindow;
	    bitwindow=1;
	    biDiagonalReductionLeft_64(
			    singularVectors_Left_64,bitwindow,
			    singularValues_fx,singularValues_fx_e,
			    nChannelsL,
			    nChannelsC,
			    nCh
			    );
	    singularValues_fx_e[nCh]=add(singularVectors_Left_e,singularValues_fx_e[nCh]);
	    secDiag_fx[nCh]=g_fx;
	    move32();
	    secDiag_fx_e[nCh]=add(singularVectors_Left_e,g_e);
	    bitwindow=2;
	    biDiagonalReductionRight_64(
			    singularVectors_Left_64,bitwindow,
			    nChannelsL,
			    nChannelsC,
			    nCh,
			    &g_fx,
			    &g_e	
			    );
		{
        		Word16 L_temp_e;
		        Word32 L_temp;
			L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), secDiag_fx_e[nCh], &L_temp_e ); /* exp(L_temp_e) */
			IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( L_temp, L_temp_e, *eps_x_fx, *eps_x_fx_e ), 1 ) )
			{
				*eps_x_fx = L_temp; /* exp(L_temp_e) */
				move32();
				*eps_x_fx_e = L_temp_e;
				move32();
			}
		}
    }	
    {
        int i,j;
        for (j=0;j<nChannelsL;j++)
        {
            for (i=0;i<nChannelsC;i++)
            {
                Word16 n;
                n=W_norm(singularVectors_Left_64[j][i]);
                singularVectors_Left_fx[j][i]=W_extract_h(W_shl(singularVectors_Left_64[j][i],n));
                singularVectors_Left_fx_e[j][i]=sub(add(32,singularVectors_Left_e),n);
	    }
        }
    }
	pop_wmops();
#else

    FOR( jCh = 0; jCh < nChannelsL; jCh++ )
    {
        FOR( iCh = 0; iCh < nChannelsC; iCh++ )
@@ -887,6 +981,7 @@ static void HouseholderReduction_fx(
            move32();
        }
    }
#endif

    /* SingularVecotr Accumulation */
    singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsC );
@@ -897,6 +992,233 @@ static void HouseholderReduction_fx(
    return;
}

#ifdef	MYCHANGES
/*-------------------------------------------------------------------------
 * biDiagonalReductionLeft()
 *
 *
 *-------------------------------------------------------------------------*/

static void biDiagonalReductionLeft_64(
    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
    Word16 bitwindow,
    Word32 singularValues[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
    Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
    const Word16 nChannelsL,  /* Q0 */
    const Word16 nChannelsC,  /* Q0 */
    const Word16 currChannel /* Q0 */
)
{



    Word16 iCh, jCh;
    Word32 norm_x, g;
    Word16 norm_x_e, g_e;
    Word64 norm_64;
    g=0;
    g_e=0;
    move32();
    move16();
    norm_x=0;
    move32();
    IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
    {

        Word32 tmp;
        norm_64=0;
        move64();
        FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
        {
            tmp=W_extract_l(W_shr(singularVectors_Left_64[jCh][currChannel],bitwindow));
            norm_64=W_add(norm_64,W_mult0_32_32(tmp,tmp));
        }
        norm_x_e=W_norm(norm_64);
        norm_x=W_extract_h(W_shl(norm_64, norm_x_e ));
        norm_x_e = add(sub(shl(bitwindow, 1), norm_x_e), 1 );
    }
    IF ( norm_x )
    {
        Word32 factor2;
        Word16 tmp_e;
        Word64 tmpmul;

        Word64 r_64;      //  = sqrt(norm)*singularVectors_Left_64[currChannel][currChannel]-norm OR -sqrt(norm)*singularVectors_Left_64[currChannel][currChannel]-norm
        Word32 r, invVal;
        Word16 r_e, invVal_e;

        g_e = norm_x_e;
        move16();
        g = Sqrt32( norm_x, &g_e);
        IF ( GE_64( singularVectors_Left_64[currChannel][currChannel], 0 ) )
        {
           g = L_negate( g );
        }
        factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], bitwindow) );
        tmp_e=sub( g_e, bitwindow) ;
        tmpmul=W_mult0_32_32( g, factor2);
        tmpmul=W_shl(tmpmul, tmp_e);
        r_64=W_sub(tmpmul, norm_64);
        r_e=W_norm(r_64 );
        r=W_extract_h( W_shl( r_64, r_e ) );
        r_e = sub( add( 1, add(bitwindow, bitwindow )), r_e );


        invVal_e = r_e;
        invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, r, &invVal_e);


        tmp_e = add(31, sub(bitwindow, g_e ) );
        singularVectors_Left_64[currChannel][currChannel] = W_sub( singularVectors_Left_64[currChannel][currChannel], W_shr( W_deposit32_h( g), tmp_e) ); // here, the exponent goes up.

        bitwindow=add(bitwindow, 1); // so does the bit window
        FOR ( iCh = add( currChannel, 1 ); iCh < nChannelsC; iCh++)
        {
	    Word32 factor1;
            Word32 factor2;
            Word32 f;        // = norm / r
//            Word16 f_e;      // not really needed

            norm_64 = 0;
            for ( jCh = currChannel; jCh<nChannelsL; jCh++ )
            {
                factor1 = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], bitwindow));
                factor2 = W_extract_l( W_shr( singularVectors_Left_64[jCh][iCh], bitwindow));
                norm_64 = W_add( norm_64, W_mult0_32_32(factor1, factor2));
            }
            norm_x_e = W_norm( norm_64);
            norm_x = W_extract_h( W_shl( norm_64, norm_x_e) );
            f = Mpy_32_32( norm_x, invVal );
            FOR ( jCh = currChannel; jCh < nChannelsL; jCh++ )
            {
                 Word16 magic_shift;
                 magic_shift = add( add( norm_x_e, 23),  r_e);
                 factor1 = W_extract_l( W_shr(singularVectors_Left_64[jCh][currChannel], bitwindow ) );
                 singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr(W_mult0_32_32( f, factor1 ), magic_shift ) );
            }
        }
    }
    singularValues[currChannel] = g;
    singularValues_e[currChannel] = g_e;
    move32();
    move16();
}

/*-------------------------------------------------------------------------
 * biDiagonalReductionRight()
 *
 *
 *-------------------------------------------------------------------------*/

static void biDiagonalReductionRight_64(
    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
    Word16 bitwindow,
    const Word16 nChannelsL,  /* Q0 */
    const Word16 nChannelsC,  /* Q0 */
    const Word16 currChannel, /* Q0 */
    Word32 *g, /* Q31 */
    Word16 *g_e
)
{
    Word16 iCh, jCh;
    Word32 norm_x;
    Word16 norm_x_e;
    Word64 norm_64;
    Word16 idx;




    ( *g ) =0;
    ( *g_e ) = 0;
    move32();
    move16();
    IF ( LT_16( currChannel, nChannelsL ) && NE_16( currChannel, sub( nChannelsC, 1 ) ) ) /* i <=m && i !=n */
    {
        norm_64=0;
        move64();
        idx = add( currChannel, 1);
        FOR ( jCh = idx; jCh < nChannelsC; jCh++ )
        {
            Word32 tmp;
            tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) );
            norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp) );
        }
        norm_x_e = W_norm( norm_64);
        norm_x = W_extract_h( W_shl( norm_64, norm_x_e) );
        norm_x_e = add( sub( shl( bitwindow, 1), norm_x_e), 1);
        move16();

        IF ( norm_x )
        {
            Word32 factor1;
            Word32 factor2;
            Word16 tmp_e;
            Word64 tmpmul;
            Word32 tmp_g;
            Word16 tmp_g_e;
            Word16 magic_shift;
            Word64 r_64;
            Word32 r;
            Word16 r_e;
            Word32 f;
            Word32 invVal;
            Word16 invVal_e;

            tmp_g_e = norm_x_e;
            move16();
            tmp_g = Sqrt32( norm_x, &tmp_g_e);
            IF ( GE_64( singularVectors_Left_64[currChannel][idx],0 ) )
            {
                tmp_g = L_negate( tmp_g);
            }
            *g = tmp_g;
            *g_e = tmp_g_e;
            move32();
            move16();
            factor2=W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], bitwindow) );
            tmp_e = sub( tmp_g_e, bitwindow);
            tmpmul = W_mult0_32_32( tmp_g, factor2);
            tmpmul = W_shl(tmpmul, tmp_e);
            r_64 = W_sub( tmpmul, norm_64 );
            r_e = W_norm( r_64);
            r = W_extract_h( W_shl( r_64, r_e) );

            invVal_e = 0;
            move16();
            invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e);

            magic_shift=32-tmp_g_e;
            singularVectors_Left_64[currChannel][idx]=W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( tmp_g), magic_shift) );
            bitwindow=add(bitwindow, 1);
	
            FOR( iCh = idx; iCh < nChannelsL; iCh++ )
            {

                norm_64 = 0;
                move64();
                FOR ( jCh = idx; jCh<nChannelsC; jCh++ )
                {
                    factor1 = W_extract_l(W_shr( singularVectors_Left_64[iCh][jCh], bitwindow) );
                    factor2 = W_extract_l(W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) );
                    norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2) );
                }

                norm_x_e = W_norm( norm_64);
                norm_x = W_extract_h( W_shl( norm_64, norm_x_e) );
                f = Mpy_32_32( norm_x, invVal);
		magic_shift = 25+norm_x_e-  r_e ;	// FIXME: Why does this work?
		
                FOR( jCh = idx; jCh < nChannelsC; jCh++ )
                {
                    factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) );
                    singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2), magic_shift) );
                }
            }
        }
    }
}
#else
/*-------------------------------------------------------------------------
 * biDiagonalReductionLeft()
 *
@@ -1203,6 +1525,7 @@ static void biDiagonalReductionRight_fx(

    return;
}
#endif

/*-------------------------------------------------------------------------
 * singularVectorsAccumulationLeft()