Commit 3e4b8b66 authored by thomas dettbarn's avatar thomas dettbarn
Browse files

brought this branch up to speed with the merge request.

parent 743d876d
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -148,4 +148,5 @@
#define FIX_1824
#define FIX_1822

#define	MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE /* FhG: reduce WMOPS of HouseHolderReduction() in ivas_svd_dec.c() by removing redundant mathematics and using 64 bit additions.*/
#endif
+165 −169
Original line number Diff line number Diff line
@@ -29,7 +29,6 @@
   the United Nations Convention on Contracts on the International Sales of Goods.

*******************************************************************************************************/
#define	MYCHANGES
#include <stdint.h>
#include "options.h"
#include "prot_fx.h"
@@ -65,26 +64,22 @@ static void HouseholderReduction_fx(
    const Word16 nChannelsC, /* Q0 */
    Word32 *eps_x_fx,        /* exp(eps_x_fx_e) */
    Word16 *eps_x_fx_e );
#ifdef MYCHANGES
#ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE
static void biDiagonalReductionLeft_64(
    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
    Word16 bitwindow,
    Word32 singularValues[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
    Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
    const Word16 nChannelsL,  /* Q0 */
    const Word16 nChannelsC,  /* Q0 */
    const Word16 currChannel /* Q0 */
);
    const Word16 currChannel, /* Q0 */
    Word32 *g,                /* Q31 */
    Word16 *g_e );

static void biDiagonalReductionRight_64(
    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
    Word16 bitwindow,
    const Word16 nChannelsL,  /* Q0 */
    const Word16 nChannelsC,  /* Q0 */
    const Word16 currChannel, /* Q0 */
    Word32 *g,                /* Q31 */
    Word16 *g_e
);
    Word16 *g_e );
#else
static void biDiagonalReductionLeft_fx(
    Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
@@ -874,12 +869,15 @@ static void HouseholderReduction_fx(
    Word16 *eps_x_fx_e )
{
    Word16 nCh;
	push_wmops("HouseholderReduction_fx");
#ifdef	MYCHANGES
#ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE

    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
    Word32 g_fx = 0;
    Word16 g_e = 0;
    Word32 g_left_fx = 0;
    Word16 g_left_e = 0;
    move32();
    move16();
    Word32 g_right_fx = 0;
    Word16 g_right_e = 0;
    move32();
    move16();

@@ -895,8 +893,7 @@ static void HouseholderReduction_fx(

    Word16 iCh, jCh;
    Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
#ifdef	 MYCHANGES
	push_wmops("HouseholderReduction_fx 64");
#ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE
    FOR( jCh = 0; jCh < nChannelsL; jCh++ )
    {
        FOR( iCh = 0; iCh < nChannelsC; iCh++ )
@@ -906,28 +903,26 @@ static void HouseholderReduction_fx(
    }
    for ( nCh = 0; nCh < nChannelsC; nCh++ )
    {
	    Word16 bitwindow;
	    bitwindow=1;
        biDiagonalReductionLeft_64(
			    singularVectors_Left_64,bitwindow,
			    singularValues_fx,singularValues_fx_e,
            singularVectors_Left_64,
            nChannelsL,
            nChannelsC,
			    nCh
			    );
	    singularValues_fx_e[nCh]=add(singularVectors_Left_e,singularValues_fx_e[nCh]);
	    secDiag_fx[nCh]=g_fx;
            nCh,
            &g_left_fx,
            &g_left_e );
        singularValues_fx[nCh] = g_left_fx;
        move32();
        singularValues_fx_e[nCh] = add( singularVectors_Left_e, g_left_e );
        secDiag_fx[nCh] = g_right_fx; /* from the previous channel */
        move32();
	    secDiag_fx_e[nCh]=add(singularVectors_Left_e,g_e);
	    bitwindow=2;
        secDiag_fx_e[nCh] = add( singularVectors_Left_e, g_right_e );
        biDiagonalReductionRight_64(
			    singularVectors_Left_64,bitwindow,
            singularVectors_Left_64,
            nChannelsL,
            nChannelsC,
            nCh,
			    &g_fx,
			    &g_e	
			    );
            &g_right_fx,
            &g_right_e );
        {
            Word16 L_temp_e;
            Word32 L_temp;
@@ -954,9 +949,8 @@ static void HouseholderReduction_fx(
            }
        }
    }
	pop_wmops();
#else
	push_wmops("HouseholderReduction_fx 32");

    FOR( jCh = 0; jCh < nChannelsL; jCh++ )
    {
        FOR( iCh = 0; iCh < nChannelsC; iCh++ )
@@ -982,45 +976,42 @@ static void HouseholderReduction_fx(
            move32();
        }
    }
    pop_wmops();
#endif



    /* SingularVecotr Accumulation */
    singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsC );


    singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_fx_e, singularValues_fx_e, nChannelsL, nChannelsC );
	pop_wmops();

    return;
}

#ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE
/*-------------------------------------------------------------------------
 * biDiagonalReductionLeft()
 *
 *
 *-------------------------------------------------------------------------*/
#ifdef	MYCHANGES

static void biDiagonalReductionLeft_64(
    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
    Word16 bitwindow,
    Word32 singularValues[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
    Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
    const Word16 nChannelsL,  /* Q0 */
    const Word16 nChannelsC,  /* Q0 */
    const Word16 currChannel /* Q0 */
)
    const Word16 currChannel, /* Q0 */
    Word32 *g,
    Word16 *g_e )
{


#define HEADROOM_LEFT_1 1
#define HEADROOM_LEFT_2 ( HEADROOM_LEFT_1 + 1 )

    Word16 iCh, jCh;
    Word32 norm_x, g;
    Word16 norm_x_e, g_e;
    Word32 norm_x;
    Word16 norm_x_e;
    Word64 norm_64;
    g=0;
    g_e=0;
    ( *g ) = 0;
    ( *g_e ) = 0;
    move32();
    move16();
    norm_x = 0;
@@ -1033,12 +1024,12 @@ static void biDiagonalReductionLeft_64(
        move64();
        FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
        {
            tmp=W_extract_l(W_shr(singularVectors_Left_64[jCh][currChannel],bitwindow));
            tmp = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_1 ) );
            norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) );
        }
        norm_x_e = W_norm( norm_64 );
        norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
        norm_x_e = add(sub(shl(bitwindow, 1), norm_x_e), 1 );
        norm_x_e = add( sub( ( HEADROOM_LEFT_1 + HEADROOM_LEFT_1 ), norm_x_e ), 1 );
    }
    IF( norm_x )
    {
@@ -1050,31 +1041,30 @@ static void biDiagonalReductionLeft_64(
        Word32 r, invVal;
        Word16 r_e, invVal_e;

        g_e = norm_x_e;
        ( *g_e ) = norm_x_e;
        move16();
        g = Sqrt32( norm_x, &g_e);
        ( *g ) = Sqrt32( norm_x, g_e );
        IF( GE_64( singularVectors_Left_64[currChannel][currChannel], 0 ) )
        {
           g = L_negate( g );
            ( *g ) = L_negate( *g );
        }
        factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], bitwindow) );
        tmp_e=sub( g_e, bitwindow) ;
        tmpmul=W_mult0_32_32( g, factor2);
        factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], HEADROOM_LEFT_1 ) );
        tmp_e = sub( ( *g_e ), HEADROOM_LEFT_1 );
        tmpmul = W_mult0_32_32( ( *g ), factor2 );
        tmpmul = W_shl( tmpmul, tmp_e );
        r_64 = W_sub( tmpmul, norm_64 );
        r_e = W_norm( r_64 );
        r = W_extract_h( W_shl( r_64, r_e ) );
        r_e = sub( add( 1, add(bitwindow, bitwindow )), r_e );
        r_e = sub( add( 1, ( HEADROOM_LEFT_1 + HEADROOM_LEFT_1 ) ), r_e );


        invVal_e = r_e;
        invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, r, &invVal_e );


        tmp_e = add(31, sub(bitwindow, g_e ) );
        singularVectors_Left_64[currChannel][currChannel] = W_sub( singularVectors_Left_64[currChannel][currChannel], W_shr( W_deposit32_h( g), tmp_e) ); // here, the exponent goes up.
        tmp_e = add( 31, sub( HEADROOM_LEFT_1, *g_e ) );
        singularVectors_Left_64[currChannel][currChannel] = W_sub( singularVectors_Left_64[currChannel][currChannel], W_shr( W_deposit32_h( *g ), tmp_e ) ); // here, the exponent goes up.

        bitwindow=add(bitwindow, 1); // so does the bit window
        FOR( iCh = add( currChannel, 1 ); iCh < nChannelsC; iCh++ )
        {
            Word32 factor1;
@@ -1085,8 +1075,8 @@ static void biDiagonalReductionLeft_64(
            norm_64 = 0;
            for ( jCh = currChannel; jCh < nChannelsL; jCh++ )
            {
                factor1 = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], bitwindow));
                factor2 = W_extract_l( W_shr( singularVectors_Left_64[jCh][iCh], bitwindow));
                factor1 = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_2 ) );
                factor2 = W_extract_l( W_shr( singularVectors_Left_64[jCh][iCh], HEADROOM_LEFT_2 ) );
                norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );
            }
            norm_x_e = W_norm( norm_64 );
@@ -1096,26 +1086,26 @@ static void biDiagonalReductionLeft_64(
            {
                Word16 magic_shift;
                magic_shift = add( add( norm_x_e, 23 ), r_e );
                 factor1 = W_extract_l( W_shr(singularVectors_Left_64[jCh][currChannel], bitwindow ) );
                factor1 = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_2 ) );
                singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr( W_mult0_32_32( f, factor1 ), magic_shift ) );
            }
        }
    }
    singularValues[currChannel] = g;
    singularValues_e[currChannel] = g_e;
    move32();
    move16();
}

/*-------------------------------------------------------------------------
 * biDiagonalReductionRight()
 *
 *
 *-------------------------------------------------------------------------*/

static void biDiagonalReductionRight_64(
    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
    Word16 bitwindow,
    const Word16 nChannelsL,  /* Q0 */
    const Word16 nChannelsC,  /* Q0 */
    const Word16 currChannel, /* Q0 */
    Word32 *g,                /* Q31 */
    Word16 *g_e
)
    Word16 *g_e )
{
    Word16 iCh, jCh;
    Word32 norm_x;
@@ -1123,7 +1113,8 @@ static void biDiagonalReductionRight_64(
    Word64 norm_64;
    Word16 idx;


#define HEADROOM_RIGHT_1 2
#define HEADROOM_RIGHT_2 ( HEADROOM_RIGHT_1 + 1 )


    ( *g ) = 0;
@@ -1138,12 +1129,12 @@ static void biDiagonalReductionRight_64(
        FOR( jCh = idx; jCh < nChannelsC; jCh++ )
        {
            Word32 tmp;
            tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) );
            tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_1 ) );
            norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) );
        }
        norm_x_e = W_norm( norm_64 );
        norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
        norm_x_e = add( sub( shl( bitwindow, 1), norm_x_e), 1);
        norm_x_e = add( sub( ( HEADROOM_RIGHT_1 + HEADROOM_RIGHT_1 ), norm_x_e ), 1 );
        move16();

        IF( norm_x )
@@ -1173,8 +1164,8 @@ static void biDiagonalReductionRight_64(
            *g_e = tmp_g_e;
            move32();
            move16();
            factor2=W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], bitwindow) );
            tmp_e = sub( tmp_g_e, bitwindow);
            factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], HEADROOM_RIGHT_1 ) );
            tmp_e = sub( tmp_g_e, HEADROOM_RIGHT_1 );
            tmpmul = W_mult0_32_32( tmp_g, factor2 );
            tmpmul = W_shl( tmpmul, tmp_e );
            r_64 = W_sub( tmpmul, norm_64 );
@@ -1186,8 +1177,7 @@ static void biDiagonalReductionRight_64(
            invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );

            magic_shift = 32 - tmp_g_e;
            singularVectors_Left_64[currChannel][idx]=W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( tmp_g), magic_shift) );
            bitwindow=add(bitwindow, 1);
            singularVectors_Left_64[currChannel][idx] = W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( tmp_g ), magic_shift ) ); // here, the exponent goes up

            FOR( iCh = idx; iCh < nChannelsL; iCh++ )
            {
@@ -1196,8 +1186,8 @@ static void biDiagonalReductionRight_64(
                move64();
                FOR( jCh = idx; jCh < nChannelsC; jCh++ )
                {
                    factor1 = W_extract_l(W_shr( singularVectors_Left_64[iCh][jCh], bitwindow) );
                    factor2 = W_extract_l(W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) );
                    factor1 = W_extract_l( W_shr( singularVectors_Left_64[iCh][jCh], HEADROOM_RIGHT_2 ) );
                    factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_2 ) );
                    norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );
                }

@@ -1208,7 +1198,7 @@ static void biDiagonalReductionRight_64(

                FOR( jCh = idx; jCh < nChannelsC; jCh++ )
                {
                    factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) );
                    factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_2 ) );
                    singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2 ), magic_shift ) );
                }
            }
@@ -1216,6 +1206,12 @@ static void biDiagonalReductionRight_64(
    }
}
#else
/*-------------------------------------------------------------------------
 * biDiagonalReductionLeft()
 *
 *
 *-------------------------------------------------------------------------*/

static void biDiagonalReductionLeft_fx(
    Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
    Word32 singularValues[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
@@ -1248,7 +1244,6 @@ static void biDiagonalReductionLeft_fx(
    ( *g ) = 0;
    move32();

	
    IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
    {
        idx = currChannel;
@@ -1258,6 +1253,7 @@ static void biDiagonalReductionLeft_fx(
        {
            ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[jCh][currChannel] ), singularVectors2_e[jCh][currChannel], sig_x_e ); /* exp(sig_x_e) */
        }

        IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
        {
            Word16 invVal_e;
@@ -1273,7 +1269,6 @@ static void biDiagonalReductionLeft_fx(
            norm_x_e = 0;
#endif /* OPT_MCH_DEC_V1_NBE */
            move16();

            FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
            {
                Word16 temp_e = norm_l( singularVectors[jCh][currChannel] );
@@ -1497,16 +1492,17 @@ static void biDiagonalReductionRight_fx(
                {
                    norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( singularVectors2_e[iCh][jCh], singularVectors2_e[currChannel][jCh] ), &norm_x_e ); /* exp(norm_x_e) */
                }

                FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*  nChannelsC */
                {
                    singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */
                    move32();
                }
            }

            FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*  nChannelsC */
            {
                singularVectors[currChannel][jCh] = Mpy_32_32( singularVectors[currChannel][jCh], ( *sig_x ) ); /* exp(sing_exp + sig_x_e) */
		
                move32();
                singularVectors2_e[currChannel][jCh] = add( singularVectors2_e[currChannel][jCh], *sig_x_e );
                move16();