Commit d241230d authored by thomas dettbarn's avatar thomas dettbarn
Browse files

getting closer to the sweet spot.

parent f6c5bac3
Loading
Loading
Loading
Loading
+16 −9
Original line number Diff line number Diff line
@@ -1005,7 +1005,7 @@ static void biDiagonalReductionLeft_64(

#define HEADROOM_LEFT_1 1 
#define HEADROOM_LEFT_2 2 //( HEADROOM_LEFT_1 + 1 )
#define HEADROOM_LEFT_3 3 //( HEADROOM_LEFT_1 + 1 )
#define HEADROOM_LEFT_3 2 //( HEADROOM_LEFT_1 + 1 )

    Word16 iCh, jCh;
    Word32 norm_x;
@@ -1057,7 +1057,8 @@ static void biDiagonalReductionLeft_64(
        r = W_extract_h( W_shl( r_64, r_e ) );

        invVal_e = 0;
        invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, r, &invVal_e );
        move16();
        invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );


        tmp_e = sub( 32, *g_e );
@@ -1112,7 +1113,8 @@ static void biDiagonalReductionRight_64(
    Word16 idx;

#define HEADROOM_RIGHT_1 2
#define HEADROOM_RIGHT_2 ( HEADROOM_RIGHT_1 + 1 )
#define HEADROOM_RIGHT_2 2 //( HEADROOM_RIGHT_1 + 1 )
#define HEADROOM_RIGHT_3 2


    ( *g ) = 0;
@@ -1158,9 +1160,9 @@ static void biDiagonalReductionRight_64(
            move32();
            move16();
            factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], HEADROOM_RIGHT_1 ) );
            tmp_e = sub( *g_e, HEADROOM_RIGHT_1 );
            tmp_e = sub( HEADROOM_RIGHT_1, *g_e );
            tmpmul = W_mult0_32_32( *g, factor2 );
            tmpmul = W_shl( tmpmul, tmp_e );
            tmpmul = W_shr( tmpmul, tmp_e );
            r_64 = W_sub( tmpmul, norm_64 );
            r_e = W_norm( r_64 );
            r = W_extract_h( W_shl( r_64, r_e ) );
@@ -1169,8 +1171,8 @@ static void biDiagonalReductionRight_64(
            move16();
            invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );

            magic_shift = 32 - *g_e;
            singularVectors_Left_64[currChannel][idx] = W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( *g ), magic_shift ) ); // here, the exponent goes up
            tmp_e = sub( 32, *g_e );
            singularVectors_Left_64[currChannel][idx] = W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( *g ), tmp_e ) ); // here, the exponent goes up

            FOR( iCh = idx; iCh < nChannelsL; iCh++ )
            {
@@ -1187,13 +1189,18 @@ static void biDiagonalReductionRight_64(
                norm_x_e = W_norm( norm_64 );
                norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
                f = Mpy_32_32( norm_x, invVal );
                magic_shift = 25 + norm_x_e - r_e; // FIXME: Why does this work?
//                magic_shift = 25 + norm_x_e - r_e; // FIXME: Why does this work?
//                magic_shift = norm_x_e-r_e   +33-(HEADROOM_RIGHT_3+2*HEADROOM_RIGHT_2);
                magic_shift = norm_x_e-r_e   +29-(HEADROOM_RIGHT_3);
printf("magic_shift:%3d norm_x_e:%3d r_e:%3d [",magic_shift,norm_x_e,r_e);

                FOR( jCh = idx; jCh < nChannelsC; jCh++ )
                {
                    factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_2 ) );
printf("%3d ",W_norm(singularVectors_Left_64[currChannel][jCh] ) );
                    factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32-HEADROOM_RIGHT_3 ) );
                    singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2 ), magic_shift ) );
                }
printf("]\n");
            }
        }
    }