Commit 1c6d7b7f authored by Manuel Jander's avatar Manuel Jander
Browse files

Extend dynamic scale to singularVectorsAccumulationRight_fx and...

Extend dynamic scale to singularVectorsAccumulationRight_fx and singularVectorsAccumulationLeft_fx which use dynamic scale internally anyway, to fix MLD failure. This reduces intermediate denormalizations, more precision and less complexity.
parent 64d7cb9b
Loading
Loading
Loading
Loading
Loading
+95 −39
Original line number Diff line number Diff line
@@ -116,7 +116,11 @@ static void biDiagonalReductionRight_fx(
static void singularVectorsAccumulationLeft_fx(
    Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) as Input, Q31 as output */
    Word32 singularValues[MAX_OUTPUT_CHANNELS],         /* exp(singularValues_e) */
#ifndef FIX_1010_OPT_SINGLE_RESCALE
    Word16 singularVectors_e,
#else
    Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
#endif
    Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
    const Word16 nChannelsL, /* Q0 */
    const Word16 nChannelsC  /* Q0 */
@@ -126,7 +130,11 @@ static void singularVectorsAccumulationRight_fx(
    Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS],  /* singularVectors_e */
    Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* singularVectors_e */
    Word32 secDiag[MAX_OUTPUT_CHANNELS],                 /* exp(secDiag_e) */
#ifndef FIX_1010_OPT_SINGLE_RESCALE
    Word16 singularVectors_e,
#else
    Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
#endif
    Word16 secDiag_e,
    const Word16 nChannelsC /* Q0 */
);
@@ -283,9 +291,12 @@ void svdMat2mat_fx(
    return;
}

#ifndef DEBUG_SVD_TEST
#define DEBUG_SVD_PRECISION
#endif
// #define MORE_DEBUG

#ifdef MORE_DEBUG
#if defined( DEBUG_SVD_PRECISION ) || defined( MORE_DEBUG )

#if ( MAX_INPUT_CHANNELS > MAX_OUTPUT_CHANNELS )
#define MAX_MATRIX MAX_INPUT_CHANNELS
@@ -374,9 +385,16 @@ static float matrixDifference(
    for ( int i1 = 0; i1 < dim1; i1++ )
    {
        for ( int i2 = 0; i2 < dim2; i2++ )
        {
            if ( a[i1][i2] != 0.f )
            {
                r += fabsf( ( b[i1][i2] - a[i1][i2] ) / a[i1][i2] );
            }
            else
            {
                r += fabsf( b[i1][i2] - a[i1][i2] );
            }
        }
    }

    return r / (float) ( dim1 * dim2 );
@@ -447,6 +465,7 @@ static void svd_accuracy_test_fx(
    float singularVectors_Right[MAX_INPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
    float result;
    int dimSingular;
    int problematic = 0;

    /* Convert to float and Create singular values matrix from signular values vector */
    for ( int x = 0; x < MAX_MATRIX; x++ )
@@ -474,6 +493,10 @@ static void svd_accuracy_test_fx(
    matrixTranspose( tmp1, singularVectors_Left, nChannelsL, nChannelsC );                             /* CxL */
    matrixProduct( tmp2, tmp1, singularVectors_Left, nChannelsC, nChannelsL, nChannelsL, nChannelsC ); /* CxC */
    result = matrixTestIdentity( tmp2, nChannelsC );
    if ( result >= 1.0 )
    {
        problematic = 1;
    }
#ifdef MORE_DEBUG
    matrixPrint( tmp2, nChannelsC, nChannelsC, "U\'*U" );
#endif
@@ -483,6 +506,10 @@ static void svd_accuracy_test_fx(
    matrixTranspose( tmp1, singularVectors_Right, nChannelsC, nChannelsC );                             /* CxC */
    matrixProduct( tmp2, singularVectors_Right, tmp1, nChannelsC, nChannelsC, nChannelsC, nChannelsC ); /* CxC */
    result = matrixTestIdentity( tmp2, nChannelsC );
    if ( result >= 1.0 )
    {
        problematic = 1;
    }
#ifdef MORE_DEBUG
    matrixPrint( tmp2, nChannelsC, nChannelsC, "V*V\'" );
#endif
@@ -493,10 +520,19 @@ static void svd_accuracy_test_fx(
    matrixTranspose( tmp3, singularVectors_Right, nChannelsC, nChannelsC );                                              /* CxC */
    matrixProduct( tmp2, tmp1, tmp3, nChannelsL, dimSingular, nChannelsC, nChannelsC );                                  /* LxC */
    result = matrixDifference( tmp2, InputMatrix, nChannelsL, nChannelsC );
    if ( result >= 1.0 )
    {
        problematic = 1;
    }
#ifdef MORE_DEBUG
    matrixPrint( tmp2, nChannelsL, nChannelsC, "U*S*V\'" );
#endif
    printf( "U * S * V' difference to M is %f\n", result );

    if ( problematic )
    {
        matrixPrint( InputMatrix, nChannelsL, nChannelsC, "Problematic Input" );
    }
}
#endif

@@ -608,7 +644,7 @@ Word16 svd_fx(
    WHILE( EQ_16( condition, 1 ) );

    pop_wmops();
#ifdef MORE_DEBUG
#ifdef DEBUG_SVD_PRECISION
    svd_accuracy_test_fx(
        InputMatrix,
        InputMatrix_e,
@@ -1137,7 +1173,7 @@ static void HouseholderReduction_fx(
        FOR( iCh = 0; iCh < nChannelsC; iCh++ )
        {
            singularVectors_Left_fx_e[jCh][iCh] = singularVectors_Left_e;
            move32();
            move16();
        }
    }
#endif
@@ -1164,34 +1200,14 @@ static void HouseholderReduction_fx(
        }
    }

#ifdef FIX_1010_OPT_SINGLE_RESCALE
    // rescaling block
    Word16 exp_max = 0;
    move16();
    FOR( jCh = 0; jCh < nChannelsL; jCh++ )
    {
        FOR( iCh = 0; iCh < nChannelsC; iCh++ )
        {
            exp_max = s_max( exp_max, singularVectors_Left_fx_e[jCh][iCh] );
        }
    }

    FOR( jCh = 0; jCh < nChannelsL; jCh++ )
    {
        FOR( iCh = 0; iCh < nChannelsC; iCh++ )
        {
            singularVectors_Left_fx[jCh][iCh] = L_shr_r( singularVectors_Left_fx[jCh][iCh], sub( exp_max, singularVectors_Left_fx_e[jCh][iCh] ) ); /* exp(exp_max) */
            move32();
        }
    }
    singularVectors_Left_e = exp_max;
    move16();
#endif

    /* SingularVecotr Accumulation */
#ifndef FIX_1010_OPT_SINGLE_RESCALE
    singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_e, *secDiag_fx_e, nChannelsC );

    singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_e, singularValues_fx_e, nChannelsL, nChannelsC );
#else
    singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, *secDiag_fx_e, nChannelsC );
    singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_fx_e, singularValues_fx_e, nChannelsL, nChannelsC );
#endif

    return;
}
@@ -1738,9 +1754,13 @@ static void biDiagonalReductionRight_fx(
 *-------------------------------------------------------------------------*/

static void singularVectorsAccumulationLeft_fx(
    Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) as Input, Q31 as output */
    Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* input exp(singularVectors_Left_e), output Q31 */
    Word32 singularValues[MAX_OUTPUT_CHANNELS],         /* exp(singularValues_e) */
#ifndef FIX_1010_OPT_SINGLE_RESCALE
    Word16 singularVectors_e,
#else
    Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
#endif
    Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
    const Word16 nChannelsL, /* Q0 */
    const Word16 nChannelsC  /* Q0 */
@@ -1750,11 +1770,13 @@ static void singularVectorsAccumulationLeft_fx(
    Word16 nChannels;
    Word32 norm_y, t_jj, t_ii;
    Word16 norm_y_e, t_jj_e, t_ii_e, temp_exp;
#ifndef FIX_1010_OPT_SINGLE_RESCALE
    Word16 sing_exp2[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS] = { 0 };
    FOR( nCh = 0; nCh < MAX_OUTPUT_CHANNELS; nCh++ )
    {
        set16_fx( sing_exp2[nCh], singularVectors_e, MAX_OUTPUT_CHANNELS );
    }
#endif

    /* Processing */
    nChannels = s_min( nChannelsL, nChannelsC ); /* min(nChannelsL,ChannelsC) Q0*/
@@ -1794,7 +1816,11 @@ static void singularVectorsAccumulationLeft_fx(
                move16();
                FOR( k = nCh + 1; k < nChannelsL; k++ ) /* nChannelsL */
                {
#ifndef FIX_1010_OPT_SINGLE_RESCALE
                    norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( sing_exp2[k][nCh], sing_exp2[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */
#else
                    norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( singularVectors_Left_e[k][nCh], singularVectors_Left_e[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */
#endif
                }
#ifdef FIX_1010_OPT_INV_USING_INVSQRT
                Word16 temp_e = norm_l( singularVectors_Left[nCh][nCh] );
@@ -1803,11 +1829,19 @@ static void singularVectorsAccumulationLeft_fx(
                t_jj_e = add( add( temp_exp, temp_e ), sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) );
#else
                t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e,
#ifndef FIX_1010_OPT_SINGLE_RESCALE
                t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) );
#else
                t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), singularVectors_Left_e[nCh][nCh] ) );
#endif
#endif
                FOR( k = nCh; k < nChannelsL; k++ ) /* nChannelsL */
                {
#ifndef FIX_1010_OPT_SINGLE_RESCALE
                    singularVectors_Left[k][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[k][iCh], sing_exp2[k][iCh], Mpy_32_32( t_jj, singularVectors_Left[k][nCh] ), add( t_jj_e, sing_exp2[k][nCh] ), &sing_exp2[k][iCh] ); /* exp(sing_exp2) */
#else
                    singularVectors_Left[k][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[k][iCh], singularVectors_Left_e[k][iCh], Mpy_32_32( t_jj, singularVectors_Left[k][nCh] ), add( t_jj_e, singularVectors_Left_e[k][nCh] ), &singularVectors_Left_e[k][iCh] ); /* exp(sing_exp2) */
#endif
                    move32();
                }
            }
@@ -1816,7 +1850,11 @@ static void singularVectorsAccumulationLeft_fx(
            {
                singularVectors_Left[iCh][nCh] = Mpy_32_32( singularVectors_Left[iCh][nCh], t_ii ); /* exp(sing_exp2 + t_ii_e) */
                move32();
#ifndef FIX_1010_OPT_SINGLE_RESCALE
                sing_exp2[iCh][nCh] = add( sing_exp2[iCh][nCh], t_ii_e );
#else
                singularVectors_Left_e[iCh][nCh] = add( singularVectors_Left_e[iCh][nCh], t_ii_e );
#endif
                move16();
            }
        }
@@ -1828,8 +1866,11 @@ static void singularVectorsAccumulationLeft_fx(
                move32();
            }
        }

#ifndef FIX_1010_OPT_SINGLE_RESCALE
        singularVectors_Left[nCh][nCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[nCh][nCh], sing_exp2[nCh][nCh], ONE_IN_Q30, 1, &sing_exp2[nCh][nCh] ); /* exp(sing_exp2) */
#else
        singularVectors_Left[nCh][nCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[nCh][nCh], singularVectors_Left_e[nCh][nCh], ONE_IN_Q30, 1, &singularVectors_Left_e[nCh][nCh] ); /* exp(sing_exp2) */
#endif
        move32();
    }
    // fclose(fp);
@@ -1837,7 +1878,11 @@ static void singularVectorsAccumulationLeft_fx(
    {
        FOR( iCh = 0; iCh < nChannelsC; iCh++ )
        {
#ifndef FIX_1010_OPT_SINGLE_RESCALE
            singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], sing_exp2[nCh][iCh] ); /* Q31 */
#else
            singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], singularVectors_Left_e[nCh][iCh] ); /* Q31 */
#endif
            move32();
        }
    }
@@ -1852,10 +1897,14 @@ static void singularVectorsAccumulationLeft_fx(
 *-------------------------------------------------------------------------*/

static void singularVectorsAccumulationRight_fx(
    Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS],  /* exp(singularVectors_e) */
    Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
    Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS],  /* exp(singularVectors_Left_e) */
    Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* input exp(singularVectors_Left_e), output Q31 */
    Word32 secDiag[MAX_OUTPUT_CHANNELS],                 /* exp(secDiag_e) */
#ifndef FIX_1010_OPT_SINGLE_RESCALE
    Word16 singularVectors_e,
#else
    Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
#endif
    Word16 secDiag_e,
    const Word16 nChannelsC /* Q0 */
)
@@ -1888,6 +1937,9 @@ static void singularVectorsAccumulationRight_fx(
#else
                    ratio_float = BASOP_Util_Divide3232_Scale_cadence( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ); /* exp(temp_exp1) */
                    singularVectors_Right[iCh][nCh] = BASOP_Util_Divide3232_Scale_cadence( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] );               /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */
#endif
#ifdef FIX_1010_OPT_SINGLE_RESCALE
                    temp_exp1 = add( temp_exp1, sub( singularVectors_Left_e[nCh][iCh], singularVectors_Left_e[nCh][nCh + 1] ) );
#endif
                    move32();
                    sing_right_exp[iCh][nCh] = add( sing_right_exp[iCh][nCh], sub( temp_exp1, secDiag_e ) );
@@ -1904,7 +1956,11 @@ static void singularVectorsAccumulationRight_fx(

                    FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
                    {
#ifndef FIX_1010_OPT_SINGLE_RESCALE
                        norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[nCh][k], singularVectors_Right[k][iCh] ), add( singularVectors_e, sing_right_exp[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */
#else
                        norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[nCh][k], singularVectors_Right[k][iCh] ), add( singularVectors_Left_e[nCh][k], sing_right_exp[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */
#endif
                    }

                    FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */