Commit 890836c1 authored by thomas dettbarn's avatar thomas dettbarn
Browse files

the 64bit part of the householder matrix compiles.

parent 0182bda7
Loading
Loading
Loading
Loading
+55 −31
Original line number Diff line number Diff line
@@ -88,7 +88,7 @@ static void biDiagonalReductionRight_fx(
    Word16 *g_e 
);           
static void biDiagonalReductionLeft_64(
    Word64 singularVectors_Left_64,
    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
    Word16 singularVectors_e,
    Word32 singularValues[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
    Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
@@ -98,7 +98,7 @@ static void biDiagonalReductionLeft_64(
);

static void biDiagonalReductionRight_64(
    Word64 singularVectors_Left_64,
    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
    Word16 singularVectors_e,
    Word32 secDiag[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
    Word16 secDiag_e[MAX_OUTPUT_CHANNELS],
@@ -893,26 +893,22 @@ static void HouseholderReduction_fx(
	for (nCh=0;nCh<nChannelsC;nCh++)
	{
		biDiagonalReductionLeft_64(
			singularVectors_Left_64,currChannel,
			singularValues,singularValues_e,
			singularVectors_Left_64,nCh,
			singularValues_fx,singularValues_fx_e,
			nChannelsL,
			nChannelsC,
			currChannel
			nCh
		);
		biDiagonalReductionRight_64(
			singularVectors_Left_64,currChannel,
			secDiag,secDiag_exp,
			singularVectors_Left_64,nCh,
			secDiag_fx,secDiag_fx_e,
			nChannelsL,
			nChannelsC,
			currChannel,
			nCh,
			&g_fx,
			&g_e	
		);
	}	

	


#endif
    FOR( jCh = 0; jCh < nChannelsL; jCh++ )
    {
@@ -978,6 +974,7 @@ static void biDiagonalReductionLeft_64(
    Word64 r_64;
    Word32 tmp;
    Word16 tmpe;
    Word64 norm_64;
    norm_x=0;
    move32();
    IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
@@ -987,12 +984,12 @@ static void biDiagonalReductionLeft_64(
        tmpe=add(singularVectors_e,1);
        FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
        {
            tmp=E_extract_l(W_shr(singularVectors_Left_64[jCh][currChannel],tmpe));
            tmp=W_extract_l(W_shr(singularVectors_Left_64[jCh][currChannel],tmpe));
            norm_64=W_add(norm_64,W_mult0_32_32(tmp,tmp));
        }
        tmpe=W_norm(norm_64);
        norm_x=W_extract_h(W_shl(norm_64, tmpe ));
        norm_x_e = add(sub(shl(singularVectors_e, 1), tmp3), 3 );
        norm_x_e = add(sub(shl(singularVectors_e, 1), tmpe), 3 );
    }
    IF ( norm_x )
    {
@@ -1003,8 +1000,8 @@ static void biDiagonalReductionLeft_64(
        Word64 tmp64;
        L_temp_e = norm_x_e;
        move16();
        L_temp = Sqrt( norm_x, &L_temp_e);
        IF ( GE_64( singularVectors_Left_64[currChannel][currChannel] ) )
        L_temp = Sqrt32( norm_x, &L_temp_e);
        IF ( GE_64( singularVectors_Left_64[currChannel][currChannel], 0 ) )
        {
           L_temp = L_negate( L_temp );
        }
@@ -1021,13 +1018,13 @@ static void biDiagonalReductionLeft_64(
        
        tmpe2=W_norm(r_64);
        r=W_extract_h(W_shl(r_64,tmpe2));
        r_e = sub(sub(add(add(1,singularVectors_e),g_e),tmpe,tmpe2));
        r_e = sub(sub(add(add(1,singularVectors_e),g_e),tmpe),tmpe2);

        invVal_e = r_e;
        invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, r, &invVal_e);

        tmpe=add(32,sub(singularVectors_e,g_e));	// TODO: maybe the other way around??
        tmp64=W_shr(W_deposit_h(g),tmpe);
        tmp64=W_shr(W_deposit32_h(g),tmpe);
	singularVectors_Left_64[currChannel][currChannel]=W_sub(singularVectors_Left_64[currChannel][currChannel],tmp64);	// exponent +1


@@ -1062,7 +1059,7 @@ static void biDiagonalReductionLeft_64(
}

static void biDiagonalReductionRight_64(
    Word64 singularVectors_Left_64,
    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
    Word16 singularVectors_e,
    Word32 secDiag[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
    Word16 secDiag_e[MAX_OUTPUT_CHANNELS],
@@ -1075,7 +1072,7 @@ static void biDiagonalReductionRight_64(
{
    secDiag[currChannel] = ( *g );
    move32();
    secDiag_exp[currChannel] = ( *g_e );
    secDiag_e[currChannel] = ( *g_e );
    move16();

    ( *g ) =0;
@@ -1088,6 +1085,11 @@ static void biDiagonalReductionRight_64(
        Word64 norm_64;
        Word64 abs_x;
        Word16 idx;
        Word32 tmp;
        Word16 tmpe;
        Word16 iCh,jCh;
        Word32 norm_x;
        Word16 norm_x_e;
     

        idx=add(currChannel,1);
@@ -1099,13 +1101,13 @@ static void biDiagonalReductionRight_64(
        tmpe=add(singularVectors_e,1);
        for (jCh=idx;jCh<nChannelsC; jCh++)
        {
            tmp=E_extract_l(W_shr(singularVectors_Left_64[jCh][currChannel],tmpe));
            tmp=W_extract_l(W_shr(singularVectors_Left_64[jCh][currChannel],tmpe));
            norm_64=W_add(norm_64,W_mult0_32_32(tmp,tmp));
            abs_x=W_add(abs_x, W_abs(singularVectors_Left_64[jCh][currChannel]));
        }
        tmpe=W_norm(norm_64);
        norm_x=W_extract_h(W_shl(norm_64, tmpe ));
        norm_x_e = add(sub(shl(singularVectors_e, 1), tmp3), 3 );
        norm_x_e = add(sub(shl(singularVectors_e, 1), tmpe), 3 );
        IF (norm_x)
        {
            Word16 invVal_e;
@@ -1113,33 +1115,42 @@ static void biDiagonalReductionRight_64(
            Word64 tmpmul;
            Word16 tmpe2;
            Word64 tmp64;
            Word32 f;
            Word16 f_e;
            Word32 L_temp;
            Word16 L_temp_e;
            Word64 r_64;
            Word32 r;
            Word16 r_e;


            L_temp_e = norm_x_e;
            move16();
            L_temp = Sqrt( norm_x, &L_temp_e);
            IF ( GE_64( singularVectors_Left_64[currChannel][idx] ) )
            L_temp = Sqrt32( norm_x, &L_temp_e);
            IF ( GE_64( singularVectors_Left_64[currChannel][idx],0 ) )
            {
               L_temp = L_negate( L_temp );
            }
            g=L_temp;
            *g=L_temp;
            move32();
            g_e = L_temp_e;
            *g_e = L_temp_e;
            move16();
    
            tmp=W_extract_l(W_shr(singularVectors_Left_64[currChannel][idx],singularVectors_e) );
            tmpe=sub(sub(g_e, singularVectors_e),1);
            tmpmul=W_mult0_32_32(g,tmp);
            tmpe=sub(sub(*g_e, singularVectors_e),1);
            tmpmul=W_mult0_32_32(*g,tmp);
            tmpmul=W_shl(tmpmul,tmpe);
            r_64=W_sub(tmpmul, norm_64 );
            
            tmpe2=W_norm(r_64);
            r=W_extract_h(W_shl(r_64,tmpe2));
            r_e = sub(sub(add(add(1,singularVectors_e),g_e),tmpe,tmpe2));
            r_e = sub(sub(add(add(1,singularVectors_e),*g_e),tmpe),tmpe2);
    
            invVal_e = r_e;
            invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, r, &invVal_e);
    
            tmpe=add(32,sub(singularVectors_e,g_e));	// TODO: maybe the other way around??
            tmp64=W_shr(W_deposit_h(g),tmpe);
            tmp64=W_shr(W_deposit32_h(g),tmpe);
    	    singularVectors_Left_64[currChannel][idx]=W_sub(singularVectors_Left_64[currChannel][idx],tmp64);	// exponent +1


@@ -1164,12 +1175,25 @@ static void biDiagonalReductionRight_64(
                f=Mpy_32_32(norm_x,invVal);
                f_e=add(invVal_e, sub(norm_x_e, r_e ));
    
                for (jCh=currChannel;jCh<nChannelsL; jCh++)
                for (jCh=idx;jCh<nChannelsL; jCh++)
                {
                    tmp2=W_extract_l(W_shr(singularVectors_Left_64[jCh][currChannel],tmpe));
                    singularVectors_Left_64[jCh][iCh]=W_add(singularVectors_Left_64[jCh][iCh],W_mult0_32_32(f,tmp2));	// exponent +1
                }
            }
            invVal_e = 0;
            move16();
            tmpe = W_norm(abs_x);
            invVal = BASOP_Util_Divide3232_Scale_newton( W_extract_h(W_shl(abs_x,tmpe)), r, &invVal_e);
            invVal_e = add(invVal_e, sub(tmpe, r_e));
            tmpe = add(1,singularVectors_e);
            for ( jCh = idx; jCh < nChannelsC ; jCh++)
            {
                secDiag[jCh] = Mpy_32_32( W_extract_l(W_shr(singularVectors_Left_64[currChannel][jCh],tmpe)), invVal);
                move32();
                secDiag_e[jCh]=add(invVal_e, tmpe);
                move16();
            }
        }
    }
    return;