Commit 0f364fcb authored by Manuel Jander's avatar Manuel Jander
Browse files

Add FIX_1010_OPT_NORM_NOSAT (do not saturate intermediate results) and...

Add FIX_1010_OPT_NORM_NOSAT (do not saturate intermediate results) and FIX_1010_OPT_SEC_SINGLE_RESCALE (do not rescale secDiag data repeatedly). Improves accuracy and reduces workload but makes dependency on dynamic scaling bigger.
parent 8a752bdd
Loading
Loading
Loading
Loading
+135 −39
Original line number Diff line number Diff line
@@ -58,6 +58,8 @@
#define FIX_1010_OPT_GIVENS
#define FIX_1010_OPT_GIVENS_INV
// #define FIX_1010_OPT_GIVENS_AMAX_BMIN
#define FIX_1010_OPT_NORM_NOSAT
#define FIX_1010_OPT_SEC_SINGLE_RESCALE
#endif

/*-----------------------------------------------------------------------*
@@ -135,7 +137,11 @@ static void singularVectorsAccumulationRight_fx(
#else
    Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
#endif
#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
    Word16 secDiag_e,
#else
    Word16 *secDiag_e,
#endif
    const Word16 nChannelsC /* Q0 */
);

@@ -560,7 +566,11 @@ Word16 svd_fx(
    Word16 errorMessage, condition;
    // int16_t max_length = ((nChannelsL > nChannelsC) ? nChannelsL : nChannelsC);
    Word32 secDiag_fx[MAX_OUTPUT_CHANNELS];
#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
    Word16 secDiag_fx_e = 0;
#else
    Word16 secDiag_fx_e[MAX_OUTPUT_CHANNELS];
#endif
    move16();
    Word32 eps_x_fx = 0, temp_fx;
    move16();
@@ -569,7 +579,10 @@ Word16 svd_fx(
    Word16 temp_fx_e;
    push_wmops( "svd_fx" );

#if 1
    set32_fx( secDiag_fx, 0, MAX_OUTPUT_CHANNELS );
    set16_fx( secDiag_fx_e, 0, MAX_OUTPUT_CHANNELS );
#endif

    /* Collecting Values */
    FOR( iCh = 0; iCh < nChannelsL; iCh++ )
@@ -584,16 +597,22 @@ Word16 svd_fx(
    set16_fx( singularValues_fx_e, 0, MAX_OUTPUT_CHANNELS );

    /* Householder reduction */
#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
    HouseholderReduction_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, InputMatrix_e, singularValues_fx_e, &secDiag_fx_e, nChannelsL, nChannelsC, &eps_x_fx, &eps_x_fx_e );

#else
    HouseholderReduction_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, InputMatrix_e, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, &eps_x_fx, &eps_x_fx_e );
#endif
    /* Set extremely small values to zero if needed */
    // flushToZeroArray(singularValues, max_length);
    // flushToZeroMat(singularVectors_Left, nChannelsL, nChannelsL);
    // flushToZeroMat(singularVectors_Right, nChannelsC, nChannelsC);

    /* BidagonalDiagonalisation */
#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
    errorMessage = BidagonalDiagonalisation_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, singularValues_fx_e, &secDiag_fx_e, nChannelsL, nChannelsC, eps_x_fx, eps_x_fx_e ); /* Q0 */

#else
    errorMessage = BidagonalDiagonalisation_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, eps_x_fx, eps_x_fx_e ); /* Q0 */
#endif
    /* Sort the singular values descending order */
    lengthSingularValues = s_min( nChannelsL, nChannelsC ); /* Q0 */

@@ -676,7 +695,11 @@ static Word16 BidagonalDiagonalisation_fx(
    Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V)		   singularValues_fx_e*/
    Word32 secDiag_fx[MAX_OUTPUT_CHANNELS],                 /* i/o:                                           secDiag_fx_e*/
    Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],        /* i/o: singular values vector (S)							  */
#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
    Word16 *secDiag_fx_e, /* i/o:														  */
#else
    Word16 *secDiag_new_e, /* i/o:														  */
#endif
    const Word16 nChannelsL, /* i  : number of rows in the matrix to be decomposed		Q0*/
    const Word16 nChannelsC, /* i  : number of columns in the matrix to be decomposed	Q0*/
    const Word32 eps_x,      /* i  :                                                eps_x_e*/
@@ -690,6 +713,9 @@ static Word16 BidagonalDiagonalisation_fx(
    move16();
    move16();
    Word16 temp_exp;
#ifdef FIX_1010_OPT_NORM_NOSAT
    Word16 temp_exp2;
#endif
    Word32 g = 0;
    move16();
    Word16 g_e = 0;
@@ -700,9 +726,12 @@ static Word16 BidagonalDiagonalisation_fx(
#ifdef FIX_1010_OPT_GIVENS_INV
    Word32 temp;
#endif
    Word16 singularValues_new_e[MAX_OUTPUT_CHANNELS], secDiag_new_e[MAX_OUTPUT_CHANNELS];
    Copy( singularValues_fx_e, singularValues_new_e, MAX_OUTPUT_CHANNELS );
    Word16 singularValues_new_e[MAX_OUTPUT_CHANNELS];
#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
    Word16 secDiag_new_e[MAX_OUTPUT_CHANNELS];
    set16_fx( secDiag_new_e, *secDiag_fx_e, MAX_OUTPUT_CHANNELS );
#endif
    Copy( singularValues_fx_e, singularValues_new_e, MAX_OUTPUT_CHANNELS );

    FOR( iCh = nChannelsC - 1; iCh >= 0; iCh-- ) /* nChannelsC */
    {
@@ -779,12 +808,18 @@ static Word16 BidagonalDiagonalisation_fx(
                    c = BASOP_Util_Divide3232_Scale_cadence( c, maxWithSign_fx( singularValues_fx[kCh] ), &temp_exp );                                   /* exp(temp_exp + (c_e - singularValues_new_e)) */
                    c_e = add( temp_exp, sub( c_e, singularValues_new_e[kCh] ) );
#endif
#ifndef FIX_1010_OPT_NORM_NOSAT
                    IF( c_e > 0 )
                    {
                        c = L_shl_sat( c, c_e ); // Q31
                        c_e = 0;
                        move16();
                    }
#else
                    temp_exp2 = norm_l( c );
                    c = L_shl( c, temp_exp2 );
                    c_e = sub( c_e, temp_exp2 );
#endif
#ifdef FIX_1010_OPT_GIVENS_INV
                    s = Mpy_32_32( -g, temp );
                    s_e = add( g_e, temp_exp );
@@ -792,13 +827,18 @@ static Word16 BidagonalDiagonalisation_fx(
                    s = BASOP_Util_Divide3232_Scale_cadence( -g, maxWithSign_fx( singularValues_fx[kCh] ), &temp_exp ); /* exp(temp_exp + (g_e - singularValues_new_e))*/
                    s_e = add( temp_exp, sub( g_e, singularValues_new_e[kCh] ) );
#endif
#ifndef FIX_1010_OPT_NORM_NOSAT
                    IF( s_e > 0 )
                    {
                        s = L_shl_sat( s, s_e ); // Q31
                        s_e = 0;
                        move16();
                    }

#else
                    temp_exp2 = norm_l( s );
                    s = L_shl( s, temp_exp2 );
                    s_e = sub( s_e, temp_exp2 );
#endif
                    ApplyRotation_fx( singularVectors_Left_fx, c, c_e, s, s_e, 0, x11_e, 0, x12_e, &f1, &f1_e, &f2, &f2_e, kCh, split, nChannelsL ); /* nChannelsL */
                }
            }
@@ -849,6 +889,7 @@ static Word16 BidagonalDiagonalisation_fx(
    // rescaling block
    Copy( singularValues_new_e, singularValues_fx_e, MAX_OUTPUT_CHANNELS );

#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
    Word16 max_exp = -31;
    move16();
    FOR( iCh = 0; iCh < nChannelsC; iCh++ )
@@ -865,6 +906,7 @@ static Word16 BidagonalDiagonalisation_fx(
        secDiag_fx[iCh] = L_shr_r( secDiag_fx[iCh], sub( *secDiag_fx_e, secDiag_new_e[iCh] ) ); /* exp(secDiag_fx_e) */
        move32();
    }
#endif

    return ( error );
}
@@ -891,6 +933,9 @@ static void ApplyQRTransform_fx(
#ifdef FIX_1010_OPT_GIVENS_INV
    Word32 temp;
    Word16 temp_e;
#endif
#ifdef FIX_1010_OPT_NORM_NOSAT
    Word16 temp_norm_e;
#endif
    Word16 ch, split;
    Word32 d = 0, g = 0, r = 0, x_ii = 0, x_split = 0, x_kk = 0, mu = 0, aux = 0;
@@ -1004,12 +1049,18 @@ static void ApplyQRTransform_fx(
        c = BASOP_Util_Divide3232_Scale_cadence( d, maxWithSign_fx( secDiag[ch] ), &c_e ); /* exp(c_e + (d_e + secDiag_e)) */
        c_e = add( c_e, sub( d_e, secDiag_e[ch] ) );
#endif
#ifndef FIX_1010_OPT_NORM_NOSAT
        IF( c_e > 0 )
        {
            c = L_shl_sat( c, c_e ); // Q31
            c_e = 0;
            move16();
        }
#else
        temp_norm_e = norm_l( c );
        c = L_shl( c, temp_norm_e );
        c_e = sub( c_e, temp_norm_e );
#endif
#ifdef FIX_1010_OPT_GIVENS_INV
        s = Mpy_32_32( r, temp );
        s_e = add( r_e, temp_e );
@@ -1017,13 +1068,18 @@ static void ApplyQRTransform_fx(
        s = BASOP_Util_Divide3232_Scale_cadence( r, maxWithSign_fx( secDiag[ch] ), &s_e ); /* exp(s_e + (r_e - sec_Diag_e))*/
        s_e = add( s_e, sub( r_e, secDiag_e[ch] ) );
#endif
#ifndef FIX_1010_OPT_NORM_NOSAT
        IF( s_e > 0 )
        {
            s = L_shl_sat( s, s_e ); // Q31
            s_e = 0;
            move16();
        }

#else
        temp_norm_e = norm_l( s );
        s = L_shl( s, temp_norm_e );
        s_e = sub( s_e, temp_norm_e );
#endif
        r = Mpy_32_32( s, singularValues[ch + 1] ); /* exp(r_e + secDiag_e) */
        r_e = add( s_e, singularValues_e[ch + 1] );
        x_split = Mpy_32_32( c, singularValues[ch + 1] ); /* exp(c_e + secDiag_e) */
@@ -1052,21 +1108,33 @@ static void ApplyQRTransform_fx(

            c = Mpy_32_32( d, aux ); /* exp(d_e + aux_e) */
            c_e = add( d_e, aux_e );
#ifndef FIX_1010_OPT_NORM_NOSAT
            IF( c_e > 0 )
            {
                c = L_shl_sat( c, c_e ); // Q31
                c_e = 0;
                move16();
            }
#else
            temp_norm_e = norm_l( c );
            c = L_shl( c, temp_norm_e );
            c_e = sub( c_e, temp_norm_e );
#endif

            s = Mpy_32_32( r, aux ); /* exp(r_e + aux_e) */
            s_e = add( r_e, aux_e );
#ifndef FIX_1010_OPT_NORM_NOSAT
            IF( s_e > 0 )
            {
                s = L_shl_sat( s, s_e ); // Q31
                s_e = 0;
                move16();
            }
#else
            temp_norm_e = norm_l( s );
            s = L_shl( s, temp_norm_e );
            s_e = sub( s_e, temp_norm_e );
#endif
        }

        // ApplyRotation(singularVectors_Left, c, s, g, x_split, &d, &x_ii, ch + 1, ch, nChannelsL);
@@ -1191,7 +1259,11 @@ static void HouseholderReduction_fx(
#endif

        Word16 L_temp_e;
#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
        Word32 L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), *secDiag_fx_e, &L_temp_e ); /* exp(L_temp_e) */
#else
        Word32 L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), secDiag_fx_e[nCh], &L_temp_e ); /* exp(L_temp_e) */
#endif
        IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( L_temp, L_temp_e, *eps_x_fx, *eps_x_fx_e ), 1 ) )
        {
            *eps_x_fx = L_temp; /* exp(L_temp_e) */
@@ -1206,7 +1278,11 @@ static void HouseholderReduction_fx(
    singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_e, *secDiag_fx_e, nChannelsC );
    singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_e, singularValues_fx_e, nChannelsL, nChannelsC );
#else
#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
    singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, *secDiag_fx_e, nChannelsC );
#else
    singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsC );
#endif
    singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_fx_e, singularValues_fx_e, nChannelsL, nChannelsC );
#endif

@@ -1289,6 +1365,7 @@ static void biDiagonalReductionLeft_fx(

    secDiag[currChannel] = Mpy_32_32( *sig_x, *g ); /* exp(sig_x_e) */
    move32();
#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
    // rescaling block
    IF( GT_16( *sig_x_e, *secDiag_e ) )
    {
@@ -1306,7 +1383,10 @@ ELSE IF( LT_16( *sig_x_e, *secDiag_e ) )
    secDiag[currChannel] = L_shr_r( secDiag[currChannel], sub( *secDiag_e, *sig_x_e ) ); /* exp(secDiag_e) */
    move32();
}

#else
    secDiag_e[currChannel] = *sig_x_e;
    move16();
#endif
/* Setting values to 0 */
( *sig_x ) = 0;
move32();
@@ -1509,7 +1589,11 @@ static void biDiagonalReductionRight_fx(
#else
    Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
#endif
#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
    Word16 *secDiag_e,
#else
    Word16 *secDiag_exp,
#endif
    const Word16 nChannelsL,  /* Q0 */
    const Word16 nChannelsC,  /* Q0 */
    const Word16 currChannel, /* Q0 */
@@ -1521,7 +1605,9 @@ static void biDiagonalReductionRight_fx(
    Word16 iCh, jCh, idx;
    Word32 norm_x, r;
    Word16 norm_x_e, r_e;
#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
    Word16 secDiag_exp[MAX_OUTPUT_CHANNELS];
#endif
    Word32 L_temp;
    Word16 L_temp_e;
#ifndef FIX_1010_OPT_SINGLE_RESCALE
@@ -1532,7 +1618,9 @@ static void biDiagonalReductionRight_fx(
        set16_fx( sing_exp2[jCh], *singularVectors_e, MAX_OUTPUT_CHANNELS );
    }
#endif
#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
    set16_fx( secDiag_exp, *secDiag_e, MAX_OUTPUT_CHANNELS );
#endif

    /* Setting values to 0 */
    ( *sig_x ) = 0;
@@ -1704,7 +1792,7 @@ static void biDiagonalReductionRight_fx(
                move16();
            }


#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
            /*rescaling block*/
            Word16 exp_max = *secDiag_e;
            move16();
@@ -1719,7 +1807,7 @@ static void biDiagonalReductionRight_fx(
            }
            *secDiag_e = exp_max;
            move16();

#endif
#ifndef FIX_1010_OPT_SINGLE_RESCALE
            exp_max = *singularVectors_e;
            move16();
@@ -1910,7 +1998,11 @@ static void singularVectorsAccumulationRight_fx(
#else
    Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
#endif
#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
    Word16 secDiag_e,
#else
    Word16 *secDiag_e,
#endif
    const Word16 nChannelsC /* Q0 */
)
{
@@ -1923,7 +2015,7 @@ static void singularVectorsAccumulationRight_fx(
    nChannels = nChannelsC; /* nChannelsC	Q0*/

    /* avoid compiler warning */
    t_ii = secDiag[nChannels - 1]; /* exp(secDiag_e) */
    t_ii = secDiag[nChannels - 1]; /* exp(secDiag_e[nChannels - 1]) */
    move32();

    FOR( nCh = nChannels - 1; nCh >= 0; nCh-- ) /* nChannelsC, min(nChannelsLmnChannelsC) otherwise */
@@ -1947,7 +2039,11 @@ static void singularVectorsAccumulationRight_fx(
                    temp_exp1 = add( temp_exp1, sub( singularVectors_Left_e[nCh][iCh], singularVectors_Left_e[nCh][nCh + 1] ) );
#endif
                    move32();
#ifndef FIX_1010_OPT_SEC_SINGLE_RESCALE
                    sing_right_exp[iCh][nCh] = add( sing_right_exp[iCh][nCh], sub( temp_exp1, secDiag_e ) );
#else
                    sing_right_exp[iCh][nCh] = add( sing_right_exp[iCh][nCh], sub( temp_exp1, secDiag_e[nCh + 1] ) );
#endif
                    move16();
                    // singularVectors_Right[iCh][nCh] = L_shl_sat( singularVectors_Right[iCh][nCh], temp_exp2 );
                }
@@ -1989,7 +2085,7 @@ static void singularVectorsAccumulationRight_fx(
        }
        singularVectors_Right[nCh][nCh] = MAX_32;
        move32();
        t_ii = secDiag[nCh]; /* exp(secDiag_e) */
        t_ii = secDiag[nCh]; /* exp(secDiag_e[nCh]) */
        move32();
    }
    return;