Commit 8a9e1044 authored by thomas dettbarn's avatar thomas dettbarn
Browse files

brought it to the same level as dettbarn_exp4_-_lib_dec

parent 4bebff46
Loading
Loading
Loading
Loading
+297 −52
Original line number Diff line number Diff line
@@ -29,7 +29,7 @@
   the United Nations Convention on Contracts on the International Sales of Goods.

*******************************************************************************************************/

#define	MYCHANGES
#include <stdint.h>
#include "options.h"
#include "prot_fx.h"
@@ -65,7 +65,7 @@ static void HouseholderReduction_fx(
    const Word16 nChannelsC, /* Q0 */
    Word32 *eps_x_fx,        /* exp(eps_x_fx_e) */
    Word16 *eps_x_fx_e );

#ifdef	MYCHANGES
static void biDiagonalReductionLeft_fx(
    Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
    Word32 singularValues[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
@@ -92,6 +92,36 @@ static void biDiagonalReductionRight_fx(
    Word16 *g_e 
);           


#else
static void biDiagonalReductionLeft_fx(
    Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
    Word32 singularValues[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
    Word32 secDiag[MAX_OUTPUT_CHANNELS],           /* exp(secDiag_e) */
    Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
    Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
    Word16 *secDiag_e,
    const Word16 nChannelsL,  /* Q0 */
    const Word16 nChannelsC,  /* Q0 */
    const Word16 currChannel, /* Q0 */
    Word32 *sig_x,            /* exp(sig_x_e) */
    Word16 *sig_x_e,
    Word32 *g /* Q31 */
);

static void biDiagonalReductionRight_fx(
    Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
    Word32 secDiag[MAX_OUTPUT_CHANNELS],           /* exp(secDiag_e) */
    Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
    Word16 *secDiag_e,
    const Word16 nChannelsL,  /* Q0 */
    const Word16 nChannelsC,  /* Q0 */
    const Word16 currChannel, /* Q0 */
    Word32 *sig_x,            /* exp(sig_x_e) */
    Word16 *sig_x_e,
    Word32 *g /* Q31 */
);            // Q31
#endif
static void singularVectorsAccumulationLeft_fx(
    Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) as Input, Q31 as output */
    Word32 singularValues[MAX_OUTPUT_CHANNELS],         /* exp(singularValues_e) */
@@ -819,11 +849,19 @@ static void HouseholderReduction_fx(
    Word16 *eps_x_fx_e )
{
    Word16 nCh;
    // float g = 0.0f, sig_x = 0.0f;// to be removed
#ifdef	MYCHANGES
    Word32 g_fx = 0;
    Word16 g_e = 0;
    move32();
    move16();
#else
    // float g = 0.0f, sig_x = 0.0f;// to be removed
    Word32 g_fx = 0, sig_x_fx = 0;
    move32();
    move32();
    Word16 sig_x_fx_e = 0;
    move16();
#endif

    Word16 iCh, jCh;
    Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
@@ -839,8 +877,13 @@ static void HouseholderReduction_fx(
    /* Bidiagonal Reduction for every channel */
    FOR( nCh = 0; nCh < nChannelsC; nCh++ ) /* nChannelsC */
    {
#ifdef	MYCHANGES
        biDiagonalReductionLeft_fx( singularVectors_Left_fx, singularValues_fx, secDiag_fx, singularVectors_Left_fx_e, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, g_fx, g_e );
        biDiagonalReductionRight_fx( singularVectors_Left_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &g_fx, &g_e );
#else
        biDiagonalReductionLeft_fx( singularVectors_Left_fx, singularValues_fx, secDiag_fx, singularVectors_Left_fx_e, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &sig_x_fx, &sig_x_fx_e, &g_fx );
        biDiagonalReductionRight_fx( singularVectors_Left_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &sig_x_fx, &sig_x_fx_e, &g_fx );
#endif

        Word16 L_temp_e;
        Word32 L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), secDiag_fx_e[nCh], &L_temp_e ); /* exp(L_temp_e) */
@@ -867,7 +910,7 @@ static void HouseholderReduction_fx(
 *
 *
 *-------------------------------------------------------------------------*/

#ifdef	MYCHANGES
static void biDiagonalReductionLeft_fx(
    Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
    Word32 singularValues[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
@@ -885,6 +928,7 @@ static void biDiagonalReductionLeft_fx(
    Word16 iCh, jCh, idx;
    Word32 norm_x, f, r;
    Word16 norm_x_e, f_e, r_e;
	Word64 norm_64;
    Word32 L_temp;
    Word16 L_temp_e;

@@ -907,9 +951,12 @@ static void biDiagonalReductionLeft_fx(
        move32();
        norm_x_e = 0;
        move16();
	norm_64 = 0;
	move64();
        FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
        {
            norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( singularVectors2_e[jCh][currChannel], 1 ), &norm_x_e ); /* exp(norm_x_e) */
		norm_64 = W_add(norm_64, W_mult0_32_32(singularVectors[jCh][currChannel], singularVectors[jCh][currChannel]));
        }

        IF( norm_x ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
@@ -959,7 +1006,6 @@ static void biDiagonalReductionLeft_fx(
                }
            }


        }

        // rescaling block
@@ -967,37 +1013,145 @@ static void biDiagonalReductionLeft_fx(
        move32();
        singularValues_e[currChannel] = g_e;
        move16();
	printf("\nsecDiag[%2d]: %08X<%2x  singularvalues:%08X<%2x\n",currChannel,secDiag[currChannel],secDiag_e[currChannel],singularValues[currChannel],singularValues_e[currChannel]);
    }

    return;
}


#else
static void biDiagonalReductionLeft_fx(
    Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
    Word32 singularValues[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
    Word32 secDiag[MAX_OUTPUT_CHANNELS],           /* exp(secDiag_e) */
    Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
    Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
    Word16 *secDiag_e,
    const Word16 nChannelsL,  /* Q0 */
    const Word16 nChannelsC,  /* Q0 */
    const Word16 currChannel, /* Q0 */
    Word32 *sig_x,            /* exp(sig_x_e) */
    Word16 *sig_x_e,
    Word32 *g /* Q31 */
)
{
    Word16 iCh, jCh, idx;
    Word32 norm_x, f, r;
    Word16 norm_x_e, f_e, r_e;
    Word32 L_temp;
    Word16 L_temp_e;

    secDiag[currChannel] = Mpy_32_32( *sig_x, *g ); /* exp(sig_x_e) */
    move32();
    secDiag_e[currChannel] = *sig_x_e;
    move16();

    /* Setting values to 0 */
    ( *sig_x ) = 0;
    move32();
    ( *g ) = 0;
    move32();

    IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
    {
        idx = currChannel;
        move16();

        FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
        {
            ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[jCh][currChannel] ), singularVectors2_e[jCh][currChannel], sig_x_e ); /* exp(sig_x_e) */
        }

        IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
        {
	    int i,j;
	    printf("\n\x1b[1;37;41mLEFT\x1b[0m\n");
	    for (i=0;i<MAX_OUTPUT_CHANNELS;i++)
            Word16 invVal_e;
            Word32 invVal;
            invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e );
            norm_x = 0;
            move32();
            norm_x_e = 0;
            move16();
            FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
            {
		    for (j=0;j<MAX_OUTPUT_CHANNELS;j++)
                Word16 temp_e = norm_l( singularVectors[jCh][currChannel] );
                singularVectors[jCh][currChannel] = Mpy_32_32( L_shl( singularVectors[jCh][currChannel], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
                move32();
                singularVectors2_e[jCh][currChannel] = sub( add( invVal_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ), temp_e );
                move16();
                norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( singularVectors2_e[jCh][currChannel], 1 ), &norm_x_e ); /* exp(norm_x_e) */
            }
            IF( GT_16( norm_x_e, 0 ) )
            {
			    printf("%08X<%2x  ",singularVectors[i][j],singularVectors2_e[i][j]);
                norm_x = MAX_32;
                move32();
                norm_x_e = 0;
                move16();
            }
		    printf("\n");
            L_temp_e = norm_x_e;
            move16();
            L_temp = Sqrt32( norm_x, &L_temp_e );
            L_temp = L_shl_r( L_temp, L_temp_e ); // Q31
                                                  //( *g ) = L_negate( GE_32( singularVectors[currChannel][idx], 0 ) ? L_temp : L_negate( L_temp ) );
            if ( singularVectors[currChannel][idx] >= 0 )
            {
                L_temp = L_negate( L_temp );
            }
	    printf("\x1b[0m\n");
    	printf("\x1b[1;37;44m");
            ( *g ) = L_temp;
            move32();

            r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e );                                      /* exp(r_e) */
            singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* sing_exp */
            move32();

            invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );

            FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
            {
                norm_x = 0;
                move32();
                norm_x_e = 0;
                move16();
                FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
                {
		printf("%08X<%2x  ",singularVectors[jCh][currChannel+1], singularVectors2_e[jCh][currChannel+1]);
                    norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( singularVectors2_e[jCh][currChannel], singularVectors2_e[jCh][iCh] ), &norm_x_e ); /* exp(norm_x_e) */
                }

                f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
                f_e = add( invVal_e, sub( norm_x_e, r_e ) );

                FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
                {
                    singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], singularVectors2_e[jCh][iCh], Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, singularVectors2_e[jCh][currChannel] ), &singularVectors2_e[jCh][iCh] );
                    move32();
                }
	printf("\n");
            }

    return;

            FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
            {
                singularVectors[jCh][currChannel] = Mpy_32_32( singularVectors[jCh][currChannel], ( *sig_x ) ); /* sing_exp + sig_x_e */
                move32();
                singularVectors2_e[jCh][currChannel] = add( singularVectors2_e[jCh][currChannel], *sig_x_e );
                move16();
            }
        }

        // rescaling block
        singularValues[currChannel] = Mpy_32_32( ( *sig_x ), ( *g ) ); /* sig_x_e */
        move32();
        singularValues_e[currChannel] = *sig_x_e;
        move16();
    }

    return;
}
#endif
/*-------------------------------------------------------------------------
 * biDiagonalReductionRight()
 *
 *
 *-------------------------------------------------------------------------*/

#ifdef	MYCHANGES
static void biDiagonalReductionRight_fx(
    Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
    Word32 secDiag[MAX_OUTPUT_CHANNELS],           /* exp(secDiag_exp[]) */
@@ -1045,17 +1199,9 @@ static void biDiagonalReductionRight_fx(
   
            Word16 invVal_e, temp_e;
            Word32 invVal;
//            IF( GT_16( norm_x_e, 0 ) )
//            {
//                norm_x = MAX_32;
//                move32();
//                norm_x_e = 0;
//                move16();
//            }
            L_temp_e = norm_x_e;
            move16();
            L_temp = Sqrt32( norm_x, &L_temp_e );
//            L_temp = L_shl_r( L_temp, L_temp_e ); // Q31
            IF( singularVectors[currChannel][idx] >= 0 )
            {
                ( *g ) = L_negate( L_temp ); /* exp(L_temp_e) */
@@ -1078,24 +1224,15 @@ static void biDiagonalReductionRight_fx(
            invVal_e = 0;
            move16();
            invVal = BASOP_Util_Divide3232_Scale_newton( abs_x, maxWithSign_fx( r ), &invVal_e );
		printf("invVal_e:%d abs_x_e:%d singularVectors2_e[currChannel][jCh]:%d r_e:%d\n",invVal_e,abs_x_e,singularVectors2_e[currChannel][idx], r_e );
            invVal_e = add(invVal_e, sub( abs_x_e,r_e ) );
		printf("\x1b[1;37;43m secdiag: ");
            FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
            {
//                temp_e = norm_l( singularVectors[currChannel][jCh] );
//                secDiag[jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
//                move32();
//                secDiag_exp[jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], r_e ) );
//                move16();
                secDiag[jCh] = Mpy_32_32( singularVectors[currChannel][jCh], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
                move32();
                secDiag_exp[jCh] = add( invVal_e, singularVectors2_e[currChannel][jCh] );
                move16();
		printf("%08X<%2x  ",secDiag[jCh],secDiag_exp[jCh]);
			
            }
		printf("\x1b[0m\n");
            FOR( iCh = currChannel + 1; iCh < nChannelsL; iCh++ ) /*  nChannelsL */
            {
                norm_x = 0;
@@ -1117,22 +1254,130 @@ static void biDiagonalReductionRight_fx(

        }
    }
    return;
}
#else
static void biDiagonalReductionRight_fx(
    Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
    Word32 secDiag[MAX_OUTPUT_CHANNELS],           /* exp(secDiag_exp[]) */
    Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
    Word16 *secDiag_exp,
    const Word16 nChannelsL,  /* Q0 */
    const Word16 nChannelsC,  /* Q0 */
    const Word16 currChannel, /* Q0 */
    Word32 *sig_x,            /* exp(sig_x_e) */
    Word16 *sig_x_e,
    Word32 *g /* Q31 */
)
{
	    int i,j;
	    printf("\n\x1b[1;37;42mRIGHT\x1b[0m\n");
	    for (i=0;i<MAX_OUTPUT_CHANNELS;i++)
    Word16 iCh, jCh, idx;
    Word32 norm_x, r;
    Word16 norm_x_e, r_e;
    Word32 L_temp;
    Word16 L_temp_e;

    /* Setting values to 0 */
    ( *sig_x ) = 0;
    move32();
    ( *g ) = 0;
    move32();

    IF( LT_16( currChannel, nChannelsL ) && NE_16( currChannel, sub( nChannelsC, 1 ) ) ) /* i <=m && i !=n */
    {
		    for (j=0;j<MAX_OUTPUT_CHANNELS;j++)
        idx = add( currChannel, 1 ); /* Q0 */

        FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
        {
			    printf("%08X<%2x  ",singularVectors[i][j],singularVectors2_e[i][j]);
            ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[currChannel][jCh] ), singularVectors2_e[currChannel][jCh], sig_x_e ); /* exp(sig_x_e) */
        }
		    printf("\n");

        IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
        {
            norm_x = 0;
            move32();
            norm_x_e = 0;
            move16();

            Word16 invVal_e, temp_e;
            Word32 invVal;
            invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e );
            FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */
            {
                temp_e = norm_l( singularVectors[currChannel][jCh] );
                singularVectors[currChannel][jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
                move32();
                singularVectors2_e[currChannel][jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], *sig_x_e ) );
                move16();
                norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( singularVectors2_e[currChannel][jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
            }
	    printf("\x1b[0m\n");
            IF( GT_16( norm_x_e, 0 ) )
            {
                norm_x = MAX_32;
                move32();
                norm_x_e = 0;
                move16();
            }
    return;
            L_temp_e = norm_x_e;
            move16();
            L_temp = Sqrt32( norm_x, &L_temp_e );
            L_temp = L_shl_r( L_temp, L_temp_e ); // Q31
            IF( singularVectors[currChannel][idx] >= 0 )
            {
                ( *g ) = L_negate( L_temp ); /* exp(L_temp_e) */
                move32();
            }
            ELSE
            {
                ( *g ) = L_negate( L_negate( L_temp ) ); /* exp(L_temp_e) */
                move32();
            }

            r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e );                                      /* exp(r_e) */
            singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* exp(sing_exp) */
            move32();

            invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );

            FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
            {
                temp_e = norm_l( singularVectors[currChannel][jCh] );
                secDiag[jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
                move32();
                secDiag_exp[jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], r_e ) );
                move16();
            }

            FOR( iCh = currChannel + 1; iCh < nChannelsL; iCh++ ) /*  nChannelsL */
            {
                norm_x = 0;
                move32();
                norm_x_e = 0;
                move16();
                FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
                {
                    norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( singularVectors2_e[iCh][jCh], singularVectors2_e[currChannel][jCh] ), &norm_x_e ); /* exp(norm_x_e) */
                }

                FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*  nChannelsC */
                {
                    singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */
                    move32();
                }
            }

            FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*  nChannelsC */
            {
                singularVectors[currChannel][jCh] = Mpy_32_32( singularVectors[currChannel][jCh], ( *sig_x ) ); /* exp(sing_exp + sig_x_e) */
                move32();
                singularVectors2_e[currChannel][jCh] = add( singularVectors2_e[currChannel][jCh], *sig_x_e );
                move16();
            }
        }
    }

    return;
}
#endif
/*-------------------------------------------------------------------------
 * singularVectorsAccumulationLeft()
 *