Commit f2b20a15 authored by thomas dettbarn's avatar thomas dettbarn
Browse files

experimented with larger values going through the householderreduction. it did not help.

parent 035999b6
Loading
Loading
Loading
Loading
+22 −8
Original line number Diff line number Diff line
@@ -882,11 +882,12 @@ static void HouseholderReduction_fx(
	printf("\n");
#ifdef	 MYCHANGES
	push_wmops("HouseholderReduction_fx 64");
#define	SPECIAL			0
    FOR( jCh = 0; jCh < nChannelsL; jCh++ )
    {
        FOR( iCh = 0; iCh < nChannelsC; iCh++ )
        {
            singularVectors_Left_64[jCh][iCh] = W_shr(W_deposit32_h(singularVectors_Left_fx[jCh][iCh]),32);
            singularVectors_Left_64[jCh][iCh] = W_shr(W_deposit32_h(singularVectors_Left_fx[jCh][iCh]),32-SPECIAL);
        }
    }
	for (nCh=0;nCh<nChannelsC;nCh++)
@@ -1037,7 +1038,7 @@ static void HouseholderReduction_fx(
			    tmp=W_extract_h(W_shl(singularVectors_Left_64[i][j],n));
			    y=(unsigned int)tmp;

				n=32+singularVectors_Left_e-n;
				n=32+singularVectors_Left_e-n-SPECIAL;
				mine=n;
				printf("MINE:%2d]",n);
				printf("%08X/%08X ",x,y);
@@ -1104,6 +1105,7 @@ static void biDiagonalReductionLeft_64(
        norm_x_e=W_norm(norm_64);
        norm_x=W_extract_h(W_shl(norm_64, norm_x_e ));
        norm_x_e = add(sub(shl(bitwindow, 1), norm_x_e), 1 );
	printf("NORM: %016llX %08X<%2d\n",norm_64,norm_x,norm_x_e);	
    }
    IF ( norm_x )
    {
@@ -1227,6 +1229,7 @@ static void biDiagonalReductionRight_64(
        abs_x_e = W_norm( abs_64);
        abs_x = W_extract_h( W_shl( abs_64, abs_x_e) );
        abs_x_e = add( sub( add( bitwindow, bitwindow), abs_x_e), 1);
	printf("NORM: %016llx %08x<%2d ABS: %016llx %08x<%2d\n",norm_64,norm_x,norm_x_e,abs_64,abs_x,abs_x_e);

        IF ( norm_x )
        {
@@ -1256,15 +1259,19 @@ static void biDiagonalReductionRight_64(
            *g_e = tmp_g_e;
            move32();
            move16();
	printf("G: %08X<%2d\n",*g,*g_e);
            factor2=W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], bitwindow+1) );
            tmp_e = sub( tmp_g_e, bitwindow+1);
            tmpmul = W_mult0_32_32( tmp_g, factor2);
		printf("factor2:%016llX-->%08llX tmpmul:%016llX ",singularVectors_Left_64[currChannel][idx],factor2,tmpmul);
            tmpmul = W_shl(tmpmul, tmp_e);
		printf("%016llX - %016llx\n",tmpmul,norm_64);

            r_64 = W_sub( tmpmul, norm_64 );
            r_64 = W_sub( W_shr(tmpmul,SPECIAL), W_shr(norm_64,SPECIAL) );
            r_e = W_norm( r_64);
            r = W_extract_h( W_shl( r_64, r_e) );
            r_e = sub( add( shl( bitwindow, 1), 1), r_e );
		printf("R: %016llX %08X<%2d\n",r_64,r,r_e);
//	r_e=2*bitwindow+1-r_e;

            invVal_e = r_e;
@@ -1273,6 +1280,7 @@ static void biDiagonalReductionRight_64(

            magic_shift=32-tmp_g_e;
            singularVectors_Left_64[currChannel][idx]=W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( tmp_g), magic_shift) );
	printf("SING: %016llX\n",singularVectors_Left_64[currChannel][idx]);
            bitwindow=add(bitwindow, 1);
            FOR( iCh = idx; iCh < nChannelsC; iCh++ )
            {
@@ -1281,21 +1289,27 @@ static void biDiagonalReductionRight_64(
                move64();
                FOR ( jCh = idx; jCh<nChannelsL; jCh++ )
                {
                    factor1 = W_extract_l(W_shr( singularVectors_Left_64[iCh][jCh], bitwindow) );
                    factor2 = W_extract_l(W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) );
                    factor1 = W_extract_l(W_shr( singularVectors_Left_64[iCh][jCh], bitwindow+SPECIAL) );
                    factor2 = W_extract_l(W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow+SPECIAL) );
                    norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2) );
                }

                norm_x_e = W_norm( norm_64);
                norm_x = W_extract_h( W_shl( norm_64, norm_x_e) );
		printf("norm: %016llX %08X<%2d\n",norm_64,norm_x,norm_x_e);
                f = Mpy_32_32( norm_x, invVal);
                f_e = add( invVal_e, sub( norm_x_e, r_e) );
//                magic_shift = -3*currChannel+22-2*norm_x_e+4*r_e+3*f_e;		// FIXME: HOW IS THIS WORKING?????!?!?!?!?!?!?!?!?!?
                magic_shift = 22-2*norm_x_e+4*r_e+3*f_e;		// FIXME: HOW IS THIS WORKING?????!?!?!?!?!?!?!?!?!?
                magic_shift = 22-2*norm_x_e+4*r_e+3*f_e-2*SPECIAL;		// FIXME: HOW IS THIS WORKING?????!?!?!?!?!?!?!?!?!?
		printf("F:%08X<%2d invVal:%08X<%2d\n",f,f_e,invVal,invVal_e);
			printf("sing%02d: (magic:%2d)",iCh,magic_shift);
                FOR( jCh = idx; jCh < nChannelsC; jCh++ )
                {
                    factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) );
                    factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow+SPECIAL) );
			printf("[%016llX %08X ",singularVectors_Left_64[currChannel][jCh],factor2);
			printf("{%016llx+%016llx=%016llx} ",singularVectors_Left_64[iCh][jCh], W_mult0_32_32( f, factor2), singularVectors_Left_64[iCh][jCh]+W_mult0_32_32( f, factor2)>>magic_shift);
                    singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2), magic_shift) );
			printf("%016llx]",singularVectors_Left_64[iCh][jCh]);
                }
            }
            // FIXME BEGIN: The following code has not yet been tested