Commit 184ae3a4 authored by thomas dettbarn's avatar thomas dettbarn
Browse files

compiles and runs.

parent 5d9ebfb5
Loading
Loading
Loading
Loading
+317 −2
Original line number Diff line number Diff line
@@ -29,7 +29,7 @@
   the United Nations Convention on Contracts on the International Sales of Goods.

*******************************************************************************************************/

#define	MYCHANGES
#include <stdint.h>
#include "options.h"
#include "prot_fx.h"
@@ -65,7 +65,30 @@ static void HouseholderReduction_fx(
    const Word16 nChannelsC, /* Q0 */
    Word32 *eps_x_fx,        /* exp(eps_x_fx_e) */
    Word16 *eps_x_fx_e );
#ifdef MYCHANGES
static void biDiagonalReductionLeft_64(
    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
    Word16 singularVectors_e,
    Word32 singularValues[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
    Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
    const Word16 nChannelsL,  /* Q0 */
    const Word16 nChannelsC,  /* Q0 */
    const Word16 currChannel /* Q0 */
);

static void biDiagonalReductionRight_64(
    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
    Word16 singularVectors_e,
    Word32 secDiag[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
    Word16 secDiag_e[MAX_OUTPUT_CHANNELS],
    const Word16 nChannelsL,  /* Q0 */
    const Word16 nChannelsC,  /* Q0 */
    const Word16 currChannel, /* Q0 */
    Word32 *g, /* Q31 */
    Word16 *g_e 
);

#endif
static void biDiagonalReductionLeft_fx(
    Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
    Word32 singularValues[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
@@ -832,15 +855,63 @@ static void HouseholderReduction_fx(
    Word16 *eps_x_fx_e )
{
    Word16 nCh;
	push_wmops("HouseholderReduction_fx");
#ifdef	MYCHANGES
	Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
    Word32 g_fx = 0;
    Word16 g_e = 0;
    move32();
    move16();

    Word32 sig_x_fx = 0;
    Word16 sig_x_fx_e = 0;
    move32();
    move16();
#else

    // float g = 0.0f, sig_x = 0.0f;// to be removed
    Word32 g_fx = 0, sig_x_fx = 0;
    move32();
    move32();
    Word16 sig_x_fx_e = 0;
    move16();
#endif

    Word16 iCh, jCh;
    Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
#ifdef	 MYCHANGES
	push_wmops("HouseholderReduction_fx 64");
    FOR( jCh = 0; jCh < nChannelsL; jCh++ )
    {
        FOR( iCh = 0; iCh < nChannelsC; iCh++ )
        {
            singularVectors_Left_64[jCh][iCh] = W_shr(W_deposit32_h(singularVectors_Left_fx[jCh][iCh]),32);
        }
    }
	for (nCh=0;nCh<nChannelsC;nCh++)
	{
		biDiagonalReductionLeft_64(
			singularVectors_Left_64,0,
			singularValues_fx,singularValues_fx_e,
			nChannelsL,
			nChannelsC,
			nCh
		);


		biDiagonalReductionRight_64(
			singularVectors_Left_64,nCh,
			secDiag_fx,secDiag_fx_e,
			nChannelsL,
			nChannelsC,
			nCh,
			&g_fx,
			&g_e	
		);
	}	
	pop_wmops();
#endif
	push_wmops("HouseholderReduction_fx 32");
    FOR( jCh = 0; jCh < nChannelsL; jCh++ )
    {
        FOR( iCh = 0; iCh < nChannelsC; iCh++ )
@@ -866,13 +937,14 @@ static void HouseholderReduction_fx(
            move32();
        }
    }
	pop_wmops();

    /* SingularVecotr Accumulation */
    singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsC );


    singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_fx_e, singularValues_fx_e, nChannelsL, nChannelsC );

	pop_wmops();
    return;
}

@@ -881,7 +953,250 @@ static void HouseholderReduction_fx(
 *
 *
 *-------------------------------------------------------------------------*/
#ifdef	MYCHANGES
static void biDiagonalReductionLeft_64(
    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
    Word16 singularVectors_e,
    Word32 singularValues[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
    Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
    const Word16 nChannelsL,  /* Q0 */
    const Word16 nChannelsC,  /* Q0 */
    const Word16 currChannel /* Q0 */
)
{
    Word16 iCh, jCh;
    Word32 norm_x, g;
    Word16 norm_x_e, g_e;
    Word64 norm_64;
    Word16 bitwindow;
    norm_x=0;
    move32();
    IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
    {
        Word16 n;
        Word32 tmp;
        norm_64=0;
        move64();
        bitwindow=add(singularVectors_e,1);
        FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
        {
            tmp=W_extract_l(W_shr(singularVectors_Left_64[jCh][currChannel],bitwindow));
            norm_64=W_add(norm_64,W_mult0_32_32(tmp,tmp));
        }
        n=W_norm(norm_64);
        norm_x=W_extract_h(W_shl(norm_64, n ));
        norm_x_e = add(sub(shl(singularVectors_e, 1), n), 3 );
    }
    IF ( norm_x )
    {
        Word32 factor2;
        Word16 tmp_e;
        Word64 tmpmul;

        Word64 r_64;      //  = sqrt(norm)*singularVectors_Left_64[currChannel][currChannel]-norm OR -sqrt(norm)*singularVectors_Left_64[currChannel][currChannel]-norm
        Word32 r, invVal;
        Word16 r_e, invVal_e;

        g_e = norm_x_e;
        move16();
        g = Sqrt32( norm_x, &g_e);
        IF ( GE_64( singularVectors_Left_64[currChannel][currChannel], 0 ) )
        {
           g = L_negate( g );
        }
        factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], singularVectors_e) );
        tmp_e=sub( sub( g_e, singularVectors_e), 2);
        tmpmul=W_mult0_32_32( g, factor2);
        tmpmul=W_shl(tmpmul, tmp_e);
        r_64=W_sub(tmpmul, norm_64);
        r_e=W_norm(r_64 );
        r=W_extract_h( W_shl( r_64, r_e ) );
//        r_e = sub( sub( add( add(1, singularVectors_e),g_e), tmp_e), r_e);  // 1+singularVectors_e + g_e - ((g_e-singularVectors_e)-2)-r_e
        r_e = sub( add( 3, add(singularVectors_e, singularVectors_e )), r_e );


        invVal_e = r_e;
        invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, r, &invVal_e);


        tmp_e = add(32, sub(singularVectors_e, g_e ) );
        singularVectors_Left_64[currChannel][currChannel] = W_sub( singularVectors_Left_64[currChannel][currChannel], W_shr( W_deposit32_h( g), tmp_e) ); // here, the exponent goes up.

        bitwindow=add(singularVectors_e, 2); // so does the bit window
        FOR ( iCh = currChannel+1; iCh < nChannelsC; iCh++)
        {
	    Word32 factor1;
            Word32 factor2;
            Word32 f;        // = norm / r
//            Word16 f_e;      // not really needed

            norm_64 = 0;
            for ( jCh = currChannel; jCh<nChannelsL; jCh++ )
            {
                factor1 = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], bitwindow));
                factor2 = W_extract_l( W_shr( singularVectors_Left_64[jCh][iCh], bitwindow));
                norm_64 = W_add( norm_64, W_mult0_32_32(factor1, factor2));
            }
            norm_x_e = W_norm( norm_64);
            norm_x = W_extract_h( W_shl( norm_64, norm_x_e) );
            f = Mpy_32_32( norm_x, invVal );
//            f_e = add( invVal_e, sub(norm_x, r_e ) );
            FOR ( jCh = currChannel; jCh < nChannelsL; jCh++ )
            {
                 Word16 magic_shift;
                 magic_shift = add( add( norm_x_e, 23),  r_e);
                 factor1 = W_extract_l( W_shr(singularVectors_Left_64[jCh][currChannel], bitwindow ) );
                 singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr(W_mult0_32_32( f, factor1 ), magic_shift ) );
            }
        }
        move16();
    }
}

static void biDiagonalReductionRight_64(
    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
    Word16 singularVectors_e,
    Word32 secDiag[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
    Word16 secDiag_e[MAX_OUTPUT_CHANNELS],
    const Word16 nChannelsL,  /* Q0 */
    const Word16 nChannelsC,  /* Q0 */
    const Word16 currChannel, /* Q0 */
    Word32 *g, /* Q31 */
    Word16 *g_e 
)
{
    Word16 iCh, jCh;
    Word32 norm_x;
    Word16 norm_x_e;
    Word64 norm_64;
    Word32 abs_x;
    Word16 abs_x_e;
    Word64 abs_64;
    Word16 bitwindow;
    Word16 idx;



    secDiag[currChannel] = ( *g );
    secDiag_e[currChannel] = ( *g_e );
    move32();
    move16();

    ( *g ) =0;
    ( *g_e ) = 0;
    move32();
    move16();

    IF ( LT_16( currChannel, nChannelsL ) && NE_16( currChannel, sub( nChannelsC, 1 ) ) ) /* i <=m && i !=n */  
    {
        Word16 tmp_e;
        norm_64=0;
        move64();
        bitwindow=add( singularVectors_e, 1);    // FIXME: WHY NOT 2????
        idx = add( currChannel, 1);
        FOR ( jCh = idx; jCh < nChannelsC; jCh++ )
        {
            Word32 tmp;
            tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) );
            norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp) );
            abs_64 = W_add( abs_64, W_abs( singularVectors_Left_64[currChannel][jCh]) );            
        }
        norm_x_e = W_norm( norm_64);
        norm_x = W_extract_h( W_shl( norm_64, norm_x_e) );
        norm_x_e = add( sub( add( singularVectors_e, singularVectors_e), norm_x_e), 3);
        move16();
        abs_x_e = W_norm( abs_64);
        abs_x = W_extract_h( W_shl( abs_64, abs_x_e) );
        abs_x_e = add( sub( add( singularVectors_e, singularVectors_e), abs_x_e), 3);

        IF ( norm_x )
        {
            Word32 factor1;
            Word32 factor2;
            Word16 tmp_e;
            Word64 tmpmul;
            Word32 tmp_g;
            Word16 tmp_g_e;
            Word16 magic_shift;
            Word64 r_64;
            Word32 r;
            Word16 r_e;
            Word32 f;
            Word16 f_e;
            Word32 invVal;
            Word16 invVal_e;

            tmp_g_e = norm_x_e;
            move16();
            tmp_g = Sqrt32( norm_x, &tmp_g_e);
            IF ( GE_64( singularVectors_Left_64[currChannel][idx],0 ) )
            {
                tmp_g = L_negate( tmp_g); 
            }
            *g = tmp_g;
            *g_e = tmp_g_e;
            move32();
            move16();
            factor2=W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], bitwindow) );
            tmp_e = sub( sub( tmp_g_e, singularVectors_e), 1);
            tmpmul = W_mult0_32_32( tmp_g, factor2);
            tmpmul = W_shl(tmpmul, tmp_e);
            
            r_64 = W_sub( W_shr( tmpmul, 1), W_shr( norm_64, 1) );// FIXME: why those two shi(f)ts?
            r_e = W_norm( r_64);
            r = W_extract_h( W_shl( r_64, r_e) );
            r_e = sub( sub( add( bitwindow, tmp_g_e), tmp_e), r_e); // FIXME: reduce this!!

            invVal_e = r_e;
            move16();
            invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e);

            magic_shift=32-tmp_g_e;
            singularVectors_Left_64[currChannel][idx]=W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( tmp_g), magic_shift) );

            bitwindow=add(singularVectors_e,2);
            FOR( iCh = idx; iCh < nChannelsC; iCh++ )
            {
                Word16 magicshift;
                
                norm_64 = 0;
                move64();
                FOR ( jCh = idx; jCh<nChannelsL; jCh++ )
                {
                    factor1 = W_extract_l(W_shr( singularVectors_Left_64[iCh][jCh], bitwindow) );
                    factor2 = W_extract_l(W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) );
                    norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2) );
                }

                norm_x_e = W_norm( norm_64);
                norm_x = W_extract_h( W_shl( norm_64, norm_x_e) );
                f = Mpy_32_32( norm_x, invVal);
                f_e = add( invVal_e, sub( norm_x_e, r_e) );
                magic_shift = -3*currChannel+22-2*norm_x_e+4*r_e+3*f_e;		// FIXME: HOW IS THIS WORKING?????!?!?!?!?!?!?!?!?!?
                FOR( iCh = idx; iCh < nChannelsC; iCh++ )
                {
                    factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) );
                    singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2), magic_shift) );
                }
            }
            // FIXME BEGIN: The following code has not yet been tested 
            invVal_e = 0;
            move16();
            invVal = BASOP_Util_Divide3232_Scale_newton( abs_x, maxWithSign_fx( r ), &invVal_e);
            invVal_e = add( invVal_e, sub( abs_x_e, r_e) );
            bitwindow = add(singularVectors_e, 1); 
            FOR ( jCh = idx; jCh < nChannelsL ; jCh++ )
            {
                secDiag[jCh] = Mpy_32_32( W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) ), invVal );
                move32();
                secDiag_e[jCh] = add(invVal_e, bitwindow);
                move16();
            }
            // FIXME END
        }
    }
}
#endif
static void biDiagonalReductionLeft_fx(
    Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
    Word32 singularValues[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */