Commit 8d095e08 authored by thomas dettbarn's avatar thomas dettbarn
Browse files

first draft of the optimized householder reduction.

parent f40fa467
Loading
Loading
Loading
Loading
+145 −3
Original line number Diff line number Diff line
@@ -29,6 +29,7 @@
   the United Nations Convention on Contracts on the International Sales of Goods.

*******************************************************************************************************/
//#define	MYCHANGES

#include <stdint.h>
#include "options.h"
@@ -58,8 +59,11 @@
 *-----------------------------------------------------------------------*/

static float GivensRotation( const float x, const float z );

#ifdef MYCHANGES
static void biDiagonalReductionLeft( float singularVectors[][MAX_OUTPUT_CHANNELS], float singularValues[MAX_OUTPUT_CHANNELS], float secDiag[MAX_OUTPUT_CHANNELS], const int16_t nChannelsL, const int16_t nChannelsC, const int16_t currChannel, float sig_x, float g );
#else
static void biDiagonalReductionLeft( float singularVectors[][MAX_OUTPUT_CHANNELS], float singularValues[MAX_OUTPUT_CHANNELS], float secDiag[MAX_OUTPUT_CHANNELS], const int16_t nChannelsL, const int16_t nChannelsC, const int16_t currChannel, float *sig_x, float *g );
#endif

static void biDiagonalReductionRight( float singularVectors[][MAX_OUTPUT_CHANNELS], float secDiag[MAX_OUTPUT_CHANNELS], const int16_t nChannelsL, const int16_t nChannelsC, const int16_t currChannel, float *sig_x, float *g );

@@ -493,7 +497,11 @@ static void HouseholderReduction(
    /* Bidiagonal Reduction for every channel */
    for ( nCh = 0; nCh < nChannelsC; nCh++ ) /* nChannelsC */
    {
#ifdef	MYCHANGES
        biDiagonalReductionLeft( singularVectors_Left, singularValues, secDiag, nChannelsL, nChannelsC, nCh, sig_x, g );
#else
        biDiagonalReductionLeft( singularVectors_Left, singularValues, secDiag, nChannelsL, nChannelsC, nCh, &sig_x, &g );
#endif
        biDiagonalReductionRight( singularVectors_Left, secDiag, nChannelsL, nChannelsC, nCh, &sig_x, &g );
        *eps_x = max( *eps_x, ( fabsf( singularValues[nCh] ) + fabsf( secDiag[nCh] ) ) );
    }
@@ -512,6 +520,71 @@ static void HouseholderReduction(
 *
 *-------------------------------------------------------------------------*/

#ifdef	MYCHANGES
static void biDiagonalReductionLeft(
    float singularVectors[][MAX_OUTPUT_CHANNELS],
    float singularValues[MAX_OUTPUT_CHANNELS],
    float secDiag[MAX_OUTPUT_CHANNELS],
    const int16_t nChannelsL,
    const int16_t nChannelsC,
    const int16_t currChannel,
    float sig_x,
    float g )
{
    int16_t iCh, jCh;
    float norm_x, f, r;

    secDiag[currChannel] = ( sig_x ) * ( g );

    /* Setting values to 0 */
    sig_x = 0.0f;
    g = 0.0f;

    if ( currChannel < nChannelsL ) /* i <= m */
    {
        for ( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
        {
            sig_x += fabsf( singularVectors[jCh][currChannel] );
        }
        if ( ( sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
        {
            norm_x = 0.0f;


            for ( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
            {
                norm_x += ( singularVectors[jCh][currChannel] * singularVectors[jCh][currChannel] );
            }
            g = -( singularVectors[currChannel][currChannel] >= 0 ? 1 : ( -1 ) ) * sqrtf( norm_x );
            r = g * singularVectors[currChannel][currChannel] - norm_x;
            singularVectors[currChannel][currChannel] = ( singularVectors[currChannel][currChannel] - g );

            for ( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
            {
                norm_x = 0.0f;
                for ( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
                {
                    norm_x += ( singularVectors[jCh][currChannel] * singularVectors[jCh][iCh] );
                }

                f = norm_x / maxWithSign( r );


                for ( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
                {
                    singularVectors[jCh][iCh] += ( f * singularVectors[jCh][currChannel] );
                }
            }
        }

        singularValues[currChannel] = g;
    }

    return;
}

#else

static void biDiagonalReductionLeft(
    float singularVectors[][MAX_OUTPUT_CHANNELS],
    float singularValues[MAX_OUTPUT_CHANNELS],
@@ -582,14 +655,83 @@ static void biDiagonalReductionLeft(

    return;
}

#endif

/*-------------------------------------------------------------------------
 * biDiagonalReductionRight()
 *
 *
 *-------------------------------------------------------------------------*/
#ifdef	MYCHANGES
static void biDiagonalReductionRight(
    float singularVectors[][MAX_OUTPUT_CHANNELS],
    float secDiag[MAX_OUTPUT_CHANNELS],
    const int16_t nChannelsL,
    const int16_t nChannelsC,
    const int16_t currChannel,
    float *sig_x,
    float *g )
{
    int16_t iCh, jCh, idx;
    float norm_x, r;

    /* Setting values to 0 */
    ( *sig_x ) = 0.0f;
    ( *g ) = 0.0f;

    if ( currChannel < nChannelsL && currChannel != ( nChannelsC - 1 ) ) /* i <=m && i !=n */
    {
        idx = currChannel + 1;

        for ( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
        {
            ( *sig_x ) += fabsf( singularVectors[currChannel][jCh] );
        }

        if ( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
        {
            norm_x = 0.0f;

            for ( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */
            {
                norm_x += ( singularVectors[currChannel][jCh] * singularVectors[currChannel][jCh] );
            }
            ( *g ) = -( singularVectors[currChannel][idx] >= 0 ? 1 : ( -1 ) ) * sqrtf( norm_x );
            r = ( *g ) * singularVectors[currChannel][idx] - norm_x;
            singularVectors[currChannel][idx] = ( singularVectors[currChannel][idx] - ( *g ) );

            for ( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
            {
                secDiag[jCh] = singularVectors[currChannel][jCh] / maxWithSign( r );
            }

            for ( iCh = currChannel + 1; iCh < nChannelsL; iCh++ ) /*  nChannelsL */
            {
                norm_x = 0.0f;
                for ( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
                {
                    norm_x += ( singularVectors[iCh][jCh] * singularVectors[currChannel][jCh] );
                }

                for ( jCh = idx; jCh < nChannelsC; jCh++ ) /*  nChannelsC */
                {
                    singularVectors[iCh][jCh] += ( norm_x * secDiag[jCh] );
                }
            }
            ( *g ) = ( *g ) / maxWithSign( *sig_x );

            for ( jCh = idx; jCh < nChannelsC; jCh++ ) /*  nChannelsC */
            {
                singularVectors[currChannel][jCh] = ( singularVectors[currChannel][jCh] * ( *sig_x ) );
            }
        }
    }

    return;
}


#else
static void biDiagonalReductionRight(
    float singularVectors[][MAX_OUTPUT_CHANNELS],
    float secDiag[MAX_OUTPUT_CHANNELS],
@@ -657,7 +799,7 @@ static void biDiagonalReductionRight(
    return;
}


#endif
/*-------------------------------------------------------------------------
 * singularVectorsAccumulationLeft()
 *