Commit 617c0edf authored by thomas dettbarn's avatar thomas dettbarn
Browse files

added comments to track the Q and exponents.

parent 7d9a7a48
Loading
Loading
Loading
Loading
Loading
+36 −38
Original line number Diff line number Diff line
@@ -995,7 +995,7 @@ static void HouseholderReduction_fx(
 *-------------------------------------------------------------------------*/

static void biDiagonalReductionLeft_64(
    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],		// q(sing)	exp(sing)
    const Word16 nChannelsL,  /* Q0 */
    const Word16 nChannelsC,  /* Q0 */
    const Word16 currChannel, /* Q0 */
@@ -1027,12 +1027,12 @@ static void biDiagonalReductionLeft_64(
        move64();
        FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
        {
            tmp = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_1 ) );
            norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) );
            tmp = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_1 ) );	// q(sing)-H1			// exp(sing)+H1
            norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) );					// q(norm)=2*q(sing)-2*H1	// exp(norm)=2*exp(sing)+2*H1
        }
        norm_x_e = W_norm( norm_64 );			
        norm_x_e0 = W_norm( norm_64 );
        norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
        norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );	// q(norm_x)=32-exp(norm_x)	exp(norm_x)=exp(norm)-32
    }
    IF( norm_x )
    {
@@ -1040,51 +1040,50 @@ static void biDiagonalReductionLeft_64(
        Word16 tmp_e;
        Word64 tmpmul;

        Word64 r_64; //  = sqrt(norm)*singularVectors_Left_64[currChannel][currChannel]-norm OR -sqrt(norm)*singularVectors_Left_64[currChannel][currChannel]-norm
        Word64 r_64; 
        Word32 r, invVal;
        Word16 r_e, invVal_e;

        ( *g_e ) = add( sub( ( HEADROOM_LEFT_1 + HEADROOM_LEFT_1 ), norm_x_e ), 1 );
        ( *g_e ) = add( sub( add( HEADROOM_LEFT_1, HEADROOM_LEFT_1 ), norm_x_e ), 1 );		// exp(g)=(2*H1-exp(norm_x)+1)
        move16();
        ( *g ) = Sqrt32( norm_x, g_e );
        ( *g ) = Sqrt32( norm_x, g_e );								// --> exp(g)=((2*H1-exp(norm_x)+1)/2)
        IF( GE_64( singularVectors_Left_64[currChannel][currChannel], 0 ) )
        {
            ( *g ) = L_negate( *g );
        }
        factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], HEADROOM_LEFT_2 ) );
        factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], HEADROOM_LEFT_2 ) );	// q(factor2)=q(sing)-H2	exp(factor2)=exp(qsing)+H2
        tmp_e = sub( 2 * HEADROOM_LEFT_1 - HEADROOM_LEFT_2, ( *g_e ) );						
        tmpmul = W_mult0_32_32( ( *g ), factor2 );
        tmpmul = W_shr( tmpmul, tmp_e );
        r_64 = W_sub( tmpmul, norm_64 );
        tmpmul = W_mult0_32_32( ( *g ), factor2 );	// q(tmpmul)=q(g)+q(factor2)
        tmpmul = W_shr( tmpmul, tmp_e );		// --> q(tmpmul)=q(g)+q(factor2)-(2*H1-H2-q(g))
        r_64 = W_sub( tmpmul, norm_64 );		// q(r_64)=max(q(tmpmul),q(norm))
        r_e = W_norm( r_64 );
        r = W_extract_h( W_shl( r_64, r_e ) );

        invVal_e = 0;
        move16();
        invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
        invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );	// invVal=1/r --> q(invVal)=-q(r)


        tmp_e = sub( 32, *g_e );
        singularVectors_Left_64[currChannel][currChannel] = W_sub( singularVectors_Left_64[currChannel][currChannel], W_shr( W_deposit32_h( *g ), tmp_e ) ); // here, the exponent goes up.
        singularVectors_Left_64[currChannel][currChannel] = W_sub( singularVectors_Left_64[currChannel][currChannel], W_shr( W_deposit32_h( *g ), tmp_e ) ); // q(sing)=max(q(sing),q(r)-(2*H1-H2-exp(r))

        FOR( iCh = add( currChannel, 1 ); iCh < nChannelsC; iCh++ )
        {
            Word32 factor1;
            Word32 factor2;
            Word32 f; // = norm / r
                      //            Word16 f_e;      // not really needed
            Word16 magic_shift;

            norm_64 = 0;
            for ( jCh = currChannel; jCh < nChannelsL; jCh++ )
            {
                factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - HEADROOM_LEFT_3 ) );
                factor2 = W_extract_h( W_shl( singularVectors_Left_64[jCh][iCh], 32 - HEADROOM_LEFT_3 ) );
                norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );
                factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - HEADROOM_LEFT_3 ) );	// q(factor1) = q(sing)-H3
                factor2 = W_extract_h( W_shl( singularVectors_Left_64[jCh][iCh], 32 - HEADROOM_LEFT_3 ) );		// q(factor2) = q(sing)-H3
                norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );						// q(norm)=2*q(sing)-2*H3
            }
            norm_x_e = W_norm( norm_64 );
            norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
            f = Mpy_32_32( norm_x, invVal );
            norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );	// Note: different norm
            f = Mpy_32_32( norm_x, invVal );		// q(f)=q(norm_x)-q(invVal)
            magic_shift = ( norm_x_e - 2 * HEADROOM_LEFT_3 ) - ( r_e - 2 * HEADROOM_LEFT_1 ) + ( 32 - HEADROOM_LEFT_4 ) - 2 * invVal_e;
            FOR( jCh = currChannel; jCh < nChannelsL; jCh++ )
            {
@@ -1134,12 +1133,12 @@ static void biDiagonalReductionRight_64(
        FOR( jCh = idx; jCh < nChannelsC; jCh++ )
        {
            Word32 tmp;
            tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_1 ) );
            norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) );
            tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_1 ) );	// q(sing)-H1			// exp(sing)+H1	
            norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) );					// q(norm)=2*q(sing)-2*H1	// exp(norm)=2*exp(sing)+2*H1
        }
        norm_x_e = W_norm( norm_64 );
        norm_x_e0 = W_norm( norm_64 );
        norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
        norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );	// q(norm_x)=32-exp(norm_x)	exp(norm_x)=exp(norm)-32
        move16();

        IF( norm_x )
@@ -1156,29 +1155,28 @@ static void biDiagonalReductionRight_64(
            Word32 invVal;
            Word16 invVal_e;

            ( *g_e ) = add( sub( ( HEADROOM_RIGHT_1 + HEADROOM_RIGHT_1 ), norm_x_e ), 1 );
            ( *g_e ) = add( sub( ( HEADROOM_RIGHT_1 + HEADROOM_RIGHT_1 ), norm_x_e ), 1 );	// exp(g)=(2*H1-exp(norm_x)+1)
            move16();
            ( *g ) = Sqrt32( norm_x, g_e );
            ( *g ) = Sqrt32( norm_x, g_e );							// --> exp(g)=((2*H1-exp(norm_x)+1)/2)
            IF( GE_64( singularVectors_Left_64[currChannel][idx], 0 ) )
            {
                ( *g ) = L_negate( *g );
            }
            move32();
            move16();
            factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], HEADROOM_RIGHT_2 ) );
            factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], HEADROOM_RIGHT_2 ) );	// q(factor2)=q(sing)-H2	exp(factor2)=exp(qsing)+H2
            tmp_e = sub( 2 * HEADROOM_RIGHT_1 - HEADROOM_RIGHT_2, *g_e );
            tmpmul = W_mult0_32_32( *g, factor2 );
            tmpmul = W_shr( tmpmul, tmp_e );
            r_64 = W_sub( tmpmul, norm_64 );
            tmpmul = W_mult0_32_32( *g, factor2 );	// q(tmpmul)=q(g)+q(factor2)
            tmpmul = W_shr( tmpmul, tmp_e );		// --> q(tmpmul)=q(g)+q(factor2)-(2*H1-H2-q(g))
            r_64 = W_sub( tmpmul, norm_64 );		// q(r_64)=max(q(tmpmul),q(norm))
            r_e = W_norm( r_64 );
            r = W_extract_h( W_shl( r_64, r_e ) );

            invVal_e = 0;
            move16();
            invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );

            invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );	// invVal=1/r --> q(invVal)=-q(r)
            tmp_e = sub( 32, *g_e );
            singularVectors_Left_64[currChannel][idx] = W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( *g ), tmp_e ) ); // here, the exponent goes up
            singularVectors_Left_64[currChannel][idx] = W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( *g ), tmp_e ) );	// q(sing)=max(q(sing),q(r)-(2*H1-H2-exp(r)))

            FOR( iCh = idx; iCh < nChannelsL; iCh++ )
            {
@@ -1187,14 +1185,14 @@ static void biDiagonalReductionRight_64(
                move64();
                FOR( jCh = idx; jCh < nChannelsC; jCh++ )
                {
                    factor1 = W_extract_h( W_shl( singularVectors_Left_64[iCh][jCh], 32 - HEADROOM_RIGHT_3 ) );
                    factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - HEADROOM_RIGHT_3 ) );
                    norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );
                    factor1 = W_extract_h( W_shl( singularVectors_Left_64[iCh][jCh], 32 - HEADROOM_RIGHT_3 ) );		// q(factor1) = q(sing)-H3
                    factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - HEADROOM_RIGHT_3 ) );	// q(factor2) = q(sing)-H3
                    norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );					// q(norm)=2*q(sing)-2*H3
                }

                norm_x_e = W_norm( norm_64 );
                norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
                f = Mpy_32_32( norm_x, invVal );
                norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );	// Note: different norm
                f = Mpy_32_32( norm_x, invVal );	// q(f)=q(norm_x)-q(invVal)
                magic_shift = ( norm_x_e - 2 * HEADROOM_RIGHT_3 ) - ( r_e - 2 * HEADROOM_RIGHT_1 ) + ( 32 - HEADROOM_RIGHT_4 ) - 2 * invVal_e;

                FOR( jCh = idx; jCh < nChannelsC; jCh++ )