Commit d2cab7e8 authored by thomas dettbarn's avatar thomas dettbarn
Browse files

finding the right shifter.

parent 5cdc0867
Loading
Loading
Loading
Loading
+35 −16
Original line number Diff line number Diff line
@@ -902,6 +902,16 @@ static void HouseholderReduction_fx(
			nCh
		);


		biDiagonalReductionRight_64(
			singularVectors_Left_64,nCh,
			secDiag_fx,secDiag_fx_e,
			nChannelsL,
			nChannelsC,
			nCh,
			&g_fx,
			&g_e	
		);
		{
			int i,j;
			printf("\nCOMPARE%d, (%d)\x1b[1;32mstart\x1b[0m\n",nCh,singularVectors_Left_e);
@@ -918,16 +928,6 @@ static void HouseholderReduction_fx(
			}
			printf("COMPARE%d \x1b[1;32mend\x1b[0m\n",nCh);
		}

		biDiagonalReductionRight_64(
			singularVectors_Left_64,nCh,
			secDiag_fx,secDiag_fx_e,
			nChannelsL,
			nChannelsC,
			nCh,
			&g_fx,
			&g_e	
		);
	}	
	pop_wmops();
#endif
@@ -946,6 +946,7 @@ static void HouseholderReduction_fx(
    {
#ifdef	MYCHANGES
        biDiagonalReductionLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_fx_e, singularValues_fx_e, nChannelsL, nChannelsC, nCh );
        biDiagonalReductionRight_fx( singularVectors_Left_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &g_fx, &g_e );
		{
			int i,j;
			printf("COMPARE%d, start\n",nCh);
@@ -961,7 +962,6 @@ static void HouseholderReduction_fx(
			}
			printf("COMPARE%d, end\n",nCh);
		}
        biDiagonalReductionRight_fx( singularVectors_Left_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &g_fx, &g_e );
#else
        biDiagonalReductionLeft_fx( singularVectors_Left_fx, singularValues_fx, secDiag_fx, singularVectors_Left_fx_e, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &sig_x_fx, &sig_x_fx_e, &g_fx );
        biDiagonalReductionRight_fx( singularVectors_Left_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &sig_x_fx, &sig_x_fx_e, &g_fx );
@@ -1158,7 +1158,7 @@ static void biDiagonalReductionRight_64(
        tmpe=W_norm(norm_64);
        norm_x=W_extract_h(W_shl(norm_64, tmpe ));
        norm_x_e = add(sub(shl(singularVectors_e, 1), tmpe), 3 );
        tmpe=W_abs(abs_64);
        tmpe=W_norm(abs_64);
        abs_x=W_extract_h(W_shl(abs_64, tmpe ));
        abs_x_e = add(sub(shl(singularVectors_e, 1), tmpe), 3 );
	printf("%016llX --> %08X<%2X   abs_x:%016llX --> %08X<%2X\x1b[0m\n",norm_64,norm_x,norm_x_e,abs_64,abs_x,abs_x_e);
@@ -1189,7 +1189,6 @@ static void biDiagonalReductionRight_64(
            move32();
            *g_e = L_temp_e;
            move16();
    
            tmp=W_extract_l(W_shr(singularVectors_Left_64[currChannel][idx],singularVectors_e+1) );
            tmpe=sub(sub(*g_e, singularVectors_e),1);
            tmpmul=W_mult0_32_32(*g,tmp);
@@ -1235,26 +1234,38 @@ static void biDiagonalReductionRight_64(
                f=Mpy_32_32(norm_x,invVal);
                f_e=add(invVal_e, sub(norm_x_e, r_e ));
		printf("F: \x1b[1;33mnorm:%016llX --> %08X<%2X   %08X<%2X\x1b[0m\n",norm_64,norm_x,norm_x_e,f,f_e);
    
   		printf("singularVectors_Left%d,%d: \x1b[1;35m",currChannel,iCh); 
                for (jCh=idx;jCh<nChannelsC; jCh++)
                {
                    tmp2=W_extract_l(W_shr(singularVectors_Left_64[currChannel][jCh],tmpe));
                    singularVectors_Left_64[iCh][jCh]=W_add(singularVectors_Left_64[iCh][jCh],W_mult0_32_32(f,tmp2));	// exponent +1
			printf("s=0x%016llxll;f=0x%08xll;tmp2=0x%08xll;",singularVectors_Left_64[iCh][jCh],f,tmp2);
if (iCh==1)		    singularVectors_Left_64[iCh][jCh]=W_add(singularVectors_Left_64[iCh][jCh],W_shr(W_mult0_32_32(f,tmp2),37));
else if (iCh==2)		    singularVectors_Left_64[iCh][jCh]=W_add(singularVectors_Left_64[iCh][jCh],W_shr(W_mult0_32_32(f,tmp2),30));
else if (iCh==3)		    singularVectors_Left_64[iCh][jCh]=W_add(singularVectors_Left_64[iCh][jCh],W_shr(W_mult0_32_32(f,tmp2),35));
else if (iCh==4)		    singularVectors_Left_64[iCh][jCh]=W_add(singularVectors_Left_64[iCh][jCh],W_shr(W_mult0_32_32(f,tmp2),34));
else if (iCh==5)		    singularVectors_Left_64[iCh][jCh]=W_add(singularVectors_Left_64[iCh][jCh],W_shr(W_mult0_32_32(f,tmp2),35));
			printf("%016llX ",singularVectors_Left_64[iCh][jCh]);
                }
		printf("\x1b[0m\n");
            }
            invVal_e = 0;
            move16();
            tmpe = W_norm(abs_x);
            invVal = BASOP_Util_Divide3232_Scale_newton( W_extract_h(W_shl(abs_x,tmpe)), r, &invVal_e);
            invVal = BASOP_Util_Divide3232_Scale_newton( abs_x, maxWithSign_fx( r ), &invVal_e);
            invVal_e = add(invVal_e, sub(tmpe, r_e));
		printf("invVal: \x1b[1;36m[%016llx] %08X*%08X = %08X\x1b[0m\n",abs_64,abs_x,r,invVal);
            tmpe = add(1,singularVectors_e);
		printf("SECDIAG: \x1b[1;36m");
            for ( jCh = idx; jCh < nChannelsC ; jCh++)
            {
		printf("[%016llX] ",singularVectors_Left_64[currChannel][jCh]);
                secDiag[jCh] = Mpy_32_32( W_extract_l(W_shr(singularVectors_Left_64[currChannel][jCh],tmpe)), invVal);
                move32();
                secDiag_e[jCh]=add(invVal_e, tmpe);
                move16();
		printf("%08X*%08X= %08X<%2x   ",W_extract_l(W_shr(singularVectors_Left_64[currChannel][jCh],tmpe)), invVal, secDiag[jCh], secDiag_e[jCh]);
            }
		printf("\x1b[0m\n");
        }
    }
    return;
@@ -1655,24 +1666,32 @@ static void biDiagonalReductionRight_fx(
                norm_x = BASOP_Util_Divide3232_Scale_newton( norm_x, r, &invVal_e );
                norm_x_e = add( invVal_e, sub(norm_x_e, r_e ) );
		printf("-> %08X<%2X\n",norm_x,norm_x_e);
   		printf("singularVectors_Left%d,%d: ",currChannel,iCh); 
                FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*  nChannelsC */
                {
                    singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, singularVectors[currChannel][jCh] ), add( norm_x_e, singularVectors2_e[currChannel][jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */
			printf("|%08X*%08X|expected=0x%08Xll;",norm_x, singularVectors[currChannel][jCh],singularVectors[iCh][jCh]);
			printf("%08X<%2X  ",singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh]);
                    move32();
                }
		printf("\n");
            }
            invVal_e = 0;
            move16();
            invVal = BASOP_Util_Divide3232_Scale_newton( abs_x, maxWithSign_fx( r ), &invVal_e );
            invVal_e = add(invVal_e, sub( abs_x_e,r_e ) );
		printf("invVal: %08X*%08X = %08X\n",abs_x,r,invVal);
		printf("SECDIAG: ");
            FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
            {
                secDiag[jCh] = Mpy_32_32( singularVectors[currChannel][jCh], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
                move32();
                secDiag_exp[jCh] = add( invVal_e, singularVectors2_e[currChannel][jCh] );
                move16();
		printf("%08X*%08X= %08X<%2x   ",singularVectors[currChannel][jCh], invVal, secDiag[jCh], secDiag_exp[jCh]);
			
            }
		printf("\n");
        }
    }
    return;