Commit 0182bda7 authored by thomas dettbarn's avatar thomas dettbarn
Browse files

initial revision with a 64 bit householder reduction.

parent b5f7c8f6
Loading
Loading
Loading
Loading
+295 −2
Original line number Diff line number Diff line
@@ -87,7 +87,27 @@ static void biDiagonalReductionRight_fx(
    Word32 *g, /* Q31 */
    Word16 *g_e 
);           
static void biDiagonalReductionLeft_64(
    Word64 singularVectors_Left_64,
    Word16 singularVectors_e,
    Word32 singularValues[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
    Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
    const Word16 nChannelsL,  /* Q0 */
    const Word16 nChannelsC,  /* Q0 */
    const Word16 currChannel /* Q0 */
);

static void biDiagonalReductionRight_64(
    Word64 singularVectors_Left_64,
    Word16 singularVectors_e,
    Word32 secDiag[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
    Word16 secDiag_e[MAX_OUTPUT_CHANNELS],
    const Word16 nChannelsL,  /* Q0 */
    const Word16 nChannelsC,  /* Q0 */
    const Word16 currChannel, /* Q0 */
    Word32 *g, /* Q31 */
    Word16 *g_e 
);

#else
static void biDiagonalReductionLeft_fx(
@@ -846,6 +866,7 @@ static void HouseholderReduction_fx(
{
    Word16 nCh;
#ifdef	MYCHANGES
	Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
    Word32 g_fx = 0;
    Word16 g_e = 0;
    move32();
@@ -861,6 +882,38 @@ static void HouseholderReduction_fx(

    Word16 iCh, jCh;
    Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
#ifdef	 MYCHANGES
    FOR( jCh = 0; jCh < nChannelsL; jCh++ )
    {
        FOR( iCh = 0; iCh < nChannelsC; iCh++ )
        {
            singularVectors_Left_64[jCh][iCh] = W_shr(W_deposit32_h(singularVectors_Left_fx[jCh][iCh]),32);
        }
    }
	for (nCh=0;nCh<nChannelsC;nCh++)
	{
		biDiagonalReductionLeft_64(
			singularVectors_Left_64,currChannel,
			singularValues,singularValues_e,
			nChannelsL,
			nChannelsC,
			currChannel
		);
		biDiagonalReductionRight_64(
			singularVectors_Left_64,currChannel,
			secDiag,secDiag_exp,
			nChannelsL,
			nChannelsC,
			currChannel,
			&g_fx,
			&g_e	
		);
	}	

	


#endif
    FOR( jCh = 0; jCh < nChannelsL; jCh++ )
    {
        FOR( iCh = 0; iCh < nChannelsC; iCh++ )
@@ -907,6 +960,224 @@ static void HouseholderReduction_fx(
 *
 *-------------------------------------------------------------------------*/
#ifdef	MYCHANGES
static void biDiagonalReductionLeft_64(
    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
    Word16 singularVectors_e,
    Word32 singularValues[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
    Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
    const Word16 nChannelsL,  /* Q0 */
    const Word16 nChannelsC,  /* Q0 */
    const Word16 currChannel /* Q0 */
)
{
    Word16 iCh, jCh;
    Word32 norm_x, f, r, g;
    Word16 norm_x_e, f_e, r_e, g_e;
    Word32 L_temp;
    Word16 L_temp_e;
    Word64 r_64;
    Word32 tmp;
    Word16 tmpe;
    norm_x=0;
    move32();
    IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
    {
        norm_64=0;
        move64();
        tmpe=add(singularVectors_e,1);
        FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
        {
            tmp=E_extract_l(W_shr(singularVectors_Left_64[jCh][currChannel],tmpe));
            norm_64=W_add(norm_64,W_mult0_32_32(tmp,tmp));
        }
        tmpe=W_norm(norm_64);
        norm_x=W_extract_h(W_shl(norm_64, tmpe ));
        norm_x_e = add(sub(shl(singularVectors_e, 1), tmp3), 3 );
    }
    IF ( norm_x )
    {
        Word16 invVal_e;
        Word32 invVal;
        Word64 tmpmul;
        Word16 tmpe2;
        Word64 tmp64;
        L_temp_e = norm_x_e;
        move16();
        L_temp = Sqrt( norm_x, &L_temp_e);
        IF ( GE_64( singularVectors_Left_64[currChannel][currChannel] ) )
        {
           L_temp = L_negate( L_temp );
        }
        g=L_temp;
        move32();
        g_e = L_temp_e;
        move16();

        tmp=W_extract_l(W_shr(singularVectors_Left_64[currChannel][currChannel],singularVectors_e) );
        tmpe=sub(sub(g_e, singularVectors_e),2);
        tmpmul=W_mult0_32_32(g,tmp);
        tmpmul=W_shl(tmpmul,tmpe);
        r_64=W_sub(tmpmul, norm_64 );
        
        tmpe2=W_norm(r_64);
        r=W_extract_h(W_shl(r_64,tmpe2));
        r_e = sub(sub(add(add(1,singularVectors_e),g_e),tmpe,tmpe2));

        invVal_e = r_e;
        invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, r, &invVal_e);

        tmpe=add(32,sub(singularVectors_e,g_e));	// TODO: maybe the other way around??
        tmp64=W_shr(W_deposit_h(g),tmpe);
	singularVectors_Left_64[currChannel][currChannel]=W_sub(singularVectors_Left_64[currChannel][currChannel],tmp64);	// exponent +1


        tmpe=add(singularVectors_e,2);
        tmp=W_extract_l(W_shr(singularVectors_Left_64[currChannel][currChannel],tmpe));
        for ( iCh=currChannel+1; iCh<nChannelsC; iCh++)
        {
            Word32 tmp2;
            Word32 tmp3;

    
            tmp2=W_extract_l(W_shr(singularVectors_Left_64[jCh][iCh],tmpe));
            norm_64=W_mult0_32_32(tmp,tmp2);
            for (jCh=currChannel+1;jCh<nChannelsL; jCh++)
            {
                tmp2=W_extract_l(W_shr(singularVectors_Left_64[jCh][currChannel],tmpe));
                tmp3=W_extract_l(W_shr(singularVectors_Left_64[jCh][iCh],tmpe));
                norm_64=W_add(norm_64, W_mult0_32_32(tmp2,tmp3));
            }
            norm_x_e=W_norm(norm_64);
            norm_x=W_shl(norm_64,norm_x_e);
            f=Mpy_32_32(norm_x,invVal);
            f_e=add(invVal_e, sub(norm_x_e, r_e ));

            for (jCh=currChannel;jCh<nChannelsL; jCh++)
            {
                tmp2=W_extract_l(W_shr(singularVectors_Left_64[jCh][currChannel],tmpe));
		singularVectors_Left_64[jCh][iCh]=W_add(singularVectors_Left_64[jCh][iCh],W_mult0_32_32(f,tmp2));	// exponent +1
            }
        }
    }
}

static void biDiagonalReductionRight_64(
    Word64 singularVectors_Left_64,
    Word16 singularVectors_e,
    Word32 secDiag[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
    Word16 secDiag_e[MAX_OUTPUT_CHANNELS],
    const Word16 nChannelsL,  /* Q0 */
    const Word16 nChannelsC,  /* Q0 */
    const Word16 currChannel, /* Q0 */
    Word32 *g, /* Q31 */
    Word16 *g_e 
)
{
    secDiag[currChannel] = ( *g );
    move32();
    secDiag_exp[currChannel] = ( *g_e );
    move16();

    ( *g ) =0;
    ( *g_e ) =0;
    move32();
    move16();

    IF( LT_16( currChannel, nChannelsL ) && NE_16( currChannel, sub( nChannelsC, 1 ) ) ) /* i <=m && i !=n */
    {
        Word64 norm_64;
        Word64 abs_x;
        Word16 idx;
     

        idx=add(currChannel,1);

        norm_64=0;
        abs_x=0;
        move64();
        move64();
        tmpe=add(singularVectors_e,1);
        for (jCh=idx;jCh<nChannelsC; jCh++)
        {
            tmp=E_extract_l(W_shr(singularVectors_Left_64[jCh][currChannel],tmpe));
            norm_64=W_add(norm_64,W_mult0_32_32(tmp,tmp));
            abs_x=W_add(abs_x, W_abs(singularVectors_Left_64[jCh][currChannel]));
        }
        tmpe=W_norm(norm_64);
        norm_x=W_extract_h(W_shl(norm_64, tmpe ));
        norm_x_e = add(sub(shl(singularVectors_e, 1), tmp3), 3 );
        IF (norm_x)
        {
            Word16 invVal_e;
            Word32 invVal;
            Word64 tmpmul;
            Word16 tmpe2;
            Word64 tmp64;
            L_temp_e = norm_x_e;
            move16();
            L_temp = Sqrt( norm_x, &L_temp_e);
            IF ( GE_64( singularVectors_Left_64[currChannel][idx] ) )
            {
               L_temp = L_negate( L_temp );
            }
            g=L_temp;
            move32();
            g_e = L_temp_e;
            move16();
    
            tmp=W_extract_l(W_shr(singularVectors_Left_64[currChannel][idx],singularVectors_e) );
            tmpe=sub(sub(g_e, singularVectors_e),1);
            tmpmul=W_mult0_32_32(g,tmp);
            tmpmul=W_shl(tmpmul,tmpe);
            r_64=W_sub(tmpmul, norm_64 );
            
            tmpe2=W_norm(r_64);
            r=W_extract_h(W_shl(r_64,tmpe2));
            r_e = sub(sub(add(add(1,singularVectors_e),g_e),tmpe,tmpe2));
    
            invVal_e = r_e;
            invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, r, &invVal_e);
    
            tmpe=add(32,sub(singularVectors_e,g_e));	// TODO: maybe the other way around??
            tmp64=W_shr(W_deposit_h(g),tmpe);
    	    singularVectors_Left_64[currChannel][idx]=W_sub(singularVectors_Left_64[currChannel][idx],tmp64);	// exponent +1


            tmpe=add(singularVectors_e,2);
            tmp=W_extract_l(W_shr(singularVectors_Left_64[currChannel][currChannel],tmpe));
            for ( iCh=currChannel+1; iCh<nChannelsC; iCh++)
            {
                Word32 tmp2;
                Word32 tmp3;
    
        
                tmp2=W_extract_l(W_shr(singularVectors_Left_64[jCh][iCh],tmpe));
                norm_64=W_mult0_32_32(tmp,tmp2);
                for (jCh=currChannel+1;jCh<nChannelsL; jCh++)
                {
                    tmp2=W_extract_l(W_shr(singularVectors_Left_64[jCh][currChannel],tmpe));
                    tmp3=W_extract_l(W_shr(singularVectors_Left_64[jCh][iCh],tmpe));
                    norm_64=W_add(norm_64, W_mult0_32_32(tmp2,tmp3));
                }
                norm_x_e=W_norm(norm_64);
                norm_x=W_shl(norm_64,norm_x_e);
                f=Mpy_32_32(norm_x,invVal);
                f_e=add(invVal_e, sub(norm_x_e, r_e ));
    
                for (jCh=currChannel;jCh<nChannelsL; jCh++)
                {
                    tmp2=W_extract_l(W_shr(singularVectors_Left_64[jCh][currChannel],tmpe));
                    singularVectors_Left_64[jCh][iCh]=W_add(singularVectors_Left_64[jCh][iCh],W_mult0_32_32(f,tmp2));	// exponent +1
                }
            }
        }
    }
    return;
}




static void biDiagonalReductionLeft_fx(
    Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
    Word32 singularValues[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
@@ -1021,8 +1292,30 @@ static void biDiagonalReductionLeft_fx(
//			r=eins;
//			r_e=eins_e;
		}
		if (currChannel==0)
		{
			Word64 tmp64;
			Word32 tmp;
			Word16 tmpe;

			tmpe=sub(exponent0,g_e);

			printf("SIGMA::%08X<%2X %08X   s:%08X<%2x ",g,g_e, -g, singularVectors[currChannel][currChannel],singularVectors2_e[currChannel][currChannel]);
			tmp64=W_sub(W_deposit32_l(singularVectors[currChannel][currChannel]),W_shl(W_deposit32_l(tmp),tmpe));
			printf("[%016llx] ",tmp64);
			tmp=W_extract_l(W_shr(tmp64,1));
			//tmp=tmp+singularVectors[currChannel][currChannel];
			printf("(%08X)",tmp);
		} 
//		else 
		{
		    singularVectors[currChannel][currChannel] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][currChannel], singularVectors2_e[currChannel][currChannel], -g, g_e, &singularVectors2_e[currChannel][currChannel] ); /* sing_exp */
		    move32();
		}
		if (currChannel==0)
		{
			printf("s:%08X<%2x\n",singularVectors[currChannel][currChannel],singularVectors2_e[currChannel][currChannel]);
		}

            invVal_e = r_e;
            move16();