Loading lib_dec/ivas_svd_dec.c +4 −515 Original line number Diff line number Diff line Loading @@ -53,11 +53,9 @@ #if 1 #define FIX_1010_OPT_DIV // #define FIX_1010_OPT_INV_USING_INVSQRT #define FIX_1010_OPT_SINGLE_RESCALE #define FIX_1010_OPT_GIVENS #define FIX_1010_OPT_GIVENS_INV // #define FIX_1010_OPT_GIVENS_AMAX_BMIN #define FIX_1010_OPT_NORM_NOSAT #define FIX_1010_OPT_SEC_SINGLE_RESCALE #endif Loading Loading @@ -297,288 +295,6 @@ void svdMat2mat_fx( return; } #ifdef MORE_DEBUG2 static void matrixFx2Fl2( float r[][MAX_OUTPUT_CHANNELS], const Word32 a[][MAX_OUTPUT_CHANNELS], const Word16 a_e[][MAX_OUTPUT_CHANNELS], const int adim1, const int adim2 ) { for ( int i1 = 0; i1 < adim1; i1++ ) { for ( int i2 = 0; i2 < adim2; i2++ ) { r[i1][i2] = (float) a[i1][i2] * powf( 2.f, a_e[i1][i2] - 31 ); } } } static void matrixPrint2( const float a[][MAX_OUTPUT_CHANNELS], const int dim1, const int dim2, const char *name ) { printf( "Matrix %s[%d][%d] = \n", name, dim1, dim2 ); for ( int i1 = 0; i1 < dim1; i1++ ) { printf( " { " ); for ( int i2 = 0; i2 < dim2; i2++ ) { printf( "%.10e, ", a[i1][i2] ); } printf( " },\n" ); } } #endif #ifndef DEBUG_SVD_TEST // #define DEBUG_SVD_PRECISION #endif // #define MORE_DEBUG #if defined( DEBUG_SVD_PRECISION ) || defined( MORE_DEBUG ) #if ( MAX_INPUT_CHANNELS > MAX_OUTPUT_CHANNELS ) #define MAX_MATRIX MAX_INPUT_CHANNELS #else #define MAX_MATRIX MAX_OUTPUT_CHANNELS #endif static void matrixFx2Fl( float r[][MAX_MATRIX], const Word32 a[][MAX_MATRIX], const Word16 a_e[MAX_MATRIX], const int adim1, const int adim2 ) { for ( int i1 = 0; i1 < adim1; i1++ ) { for ( int i2 = 0; i2 < adim2; i2++ ) { r[i1][i2] = (float) a[i1][i2] * powf( 2.f, a_e[i2] - 31 ); } } } static void matrixProduct( float r[][MAX_MATRIX], const float a[][MAX_MATRIX], const float b[][MAX_MATRIX], const int adim1, const int adim2, const int bdim1, const int bdim2 ) { assert( adim2 == bdim1 ); for ( int i1 = 0; i1 < adim1; i1++ ) { for ( int i2 = 0; i2 < bdim2; i2++ ) { r[i1][i2] = 0.f; for ( int i3 = 0; i3 < bdim1; i3++ ) { r[i1][i2] += a[i1][i3] * b[i3][i2]; } } } } static void matrixTranspose( float r[][MAX_MATRIX], const float a[][MAX_MATRIX], const int adim1, const int adim2 ) { for ( int i1 = 0; i1 < adim1; i1++ ) { for ( int i2 = 0; i2 < adim2; i2++ ) { r[i2][i1] = a[i1][i2]; } } } static void matrixDiagonal( float r[][MAX_MATRIX], const float a[MAX_MATRIX], const int dim ) { for ( int i1 = 0; i1 < dim; i1++ ) { for ( int i2 = 0; i2 < dim; i2++ ) { r[i1][i2] = 0; } r[i1][i1] = a[i1]; } } static float matrixDifference( const float a[][MAX_MATRIX], const float b[][MAX_MATRIX], const int dim1, const int dim2 ) { float r = 0.f; for ( int i1 = 0; i1 < dim1; i1++ ) { for ( int i2 = 0; i2 < dim2; i2++ ) { if ( a[i1][i2] != 0.f ) { r += fabsf( ( b[i1][i2] - a[i1][i2] ) / a[i1][i2] ); } else { r += fabsf( b[i1][i2] - a[i1][i2] ); } } } return r / (float) ( dim1 * dim2 ); } static void matrixPrint( const float a[][MAX_MATRIX], const int dim1, const int dim2, const char *name ) { printf( "Matrix %s[%d][%d] = \n", name, dim1, dim2 ); for ( int i1 = 0; i1 < dim1; i1++ ) { for ( int i2 = 0; i2 < dim2; i2++ ) { printf( "%.10e, ", a[i1][i2] ); } printf( "\n" ); } } static float matrixTestIdentity( const float a[][MAX_MATRIX], const int dim ) { float r = 0.f; for ( int i1 = 0; i1 < dim; i1++ ) { for ( int i2 = 0; i2 < dim; i2++ ) { if ( i1 == i2 ) { r += fabsf( 1.f - a[i1][i2] ); } else { r += fabsf( 0.f - a[i1][i2] ); } } } return r; } #define PROBLEMATIC_THRESHOLD 0.5f static void svd_accuracy_test_fx( Word32 InputMatrixFx[][MAX_OUTPUT_CHANNELS], /* i : matrix to be decomposed (M) InputMatrix_e*/ Word16 InputMatrixFx_e, Word32 singularVectors_LeftFx[][MAX_OUTPUT_CHANNELS], /* o : left singular vectors (U) Q31 */ Word32 singularValuesFx[MAX_OUTPUT_CHANNELS], /* o : singular values vector (S) singularValues_fx_e*/ Word32 singularVectors_RightFx[][MAX_OUTPUT_CHANNELS], /* o : right singular vectors (V) Q31 */ Word16 singularValuesFx_e[MAX_OUTPUT_CHANNELS], const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed Q0*/ const Word16 nChannelsC /* i : number of columns in the matrix to be decomposed Q0*/ ) { float tmp1[MAX_MATRIX][MAX_MATRIX]; float tmp2[MAX_MATRIX][MAX_MATRIX]; float tmp3[MAX_MATRIX][MAX_MATRIX]; float InputMatrix[MAX_MATRIX][MAX_MATRIX]; Word16 singularValuesFx2_e[MAX_OUTPUT_CHANNELS]; float singularVectors_Left[MAX_INPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; float singularValues[MAX_MATRIX]; float singularValuesMatrix[MAX_MATRIX][MAX_MATRIX]; float singularVectors_Right[MAX_INPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; float result; int dimSingular; int problematic = 0; /* Convert to float and Create singular values matrix from signular values vector */ for ( int x = 0; x < MAX_MATRIX; x++ ) singularValuesFx2_e[x] = InputMatrixFx_e; matrixFx2Fl( InputMatrix, InputMatrixFx, singularValuesFx2_e, nChannelsL, nChannelsC ); dimSingular = min( nChannelsL, nChannelsC ); matrixFx2Fl( &singularValues, (Word32( * )[MAX_MATRIX]) singularValuesFx, singularValuesFx_e, 1, nChannelsC ); for ( int x = 0; x < MAX_MATRIX; x++ ) singularValuesFx2_e[x] = 0; matrixFx2Fl( singularVectors_Left, singularVectors_LeftFx, singularValuesFx2_e, nChannelsL, nChannelsC ); matrixFx2Fl( singularVectors_Right, singularVectors_RightFx, singularValuesFx2_e, nChannelsC, nChannelsC ); matrixDiagonal( singularValuesMatrix, singularValues, dimSingular ); /* CxC */ #ifdef MORE_DEBUG matrixPrint( InputMatrix, nChannelsL, nChannelsC, "A" ); printf( "Result of svd() \n" ); matrixPrint( singularVectors_Left, nChannelsL, nChannelsC, "U" ); matrixPrint( singularValuesMatrix, nChannelsC, nChannelsC, "S" ); matrixPrint( singularVectors_Right, nChannelsC, nChannelsC, "V" ); #endif printf( "\nResult quality tests\n\n" ); /* Test U' * U == I */ matrixTranspose( tmp1, singularVectors_Left, nChannelsL, nChannelsC ); /* CxL */ matrixProduct( tmp2, tmp1, singularVectors_Left, nChannelsC, nChannelsL, nChannelsL, nChannelsC ); /* CxC */ result = matrixTestIdentity( tmp2, nChannelsC ); if ( result >= PROBLEMATIC_THRESHOLD ) { problematic = 1; } #ifdef MORE_DEBUG matrixPrint( tmp2, nChannelsC, nChannelsC, "U\'*U" ); #endif printf( "U' * U difference to I is %f\n", result ); /* Test V * V' == I */ matrixTranspose( tmp1, singularVectors_Right, nChannelsC, nChannelsC ); /* CxC */ matrixProduct( tmp2, singularVectors_Right, tmp1, nChannelsC, nChannelsC, nChannelsC, nChannelsC ); /* CxC */ result = matrixTestIdentity( tmp2, nChannelsC ); if ( result >= PROBLEMATIC_THRESHOLD ) { problematic = 1; } #ifdef MORE_DEBUG matrixPrint( tmp2, nChannelsC, nChannelsC, "V*V\'" ); #endif printf( "V * V' difference to I is %f\n", result ); /* Test InputMatrix == U * S * V' */ matrixProduct( tmp1, singularVectors_Left, singularValuesMatrix, nChannelsL, nChannelsC, dimSingular, dimSingular ); /* LxC */ matrixTranspose( tmp3, singularVectors_Right, nChannelsC, nChannelsC ); /* CxC */ matrixProduct( tmp2, tmp1, tmp3, nChannelsL, dimSingular, nChannelsC, nChannelsC ); /* LxC */ result = matrixDifference( tmp2, InputMatrix, nChannelsL, nChannelsC ); if ( result >= PROBLEMATIC_THRESHOLD ) { problematic = 1; } #ifdef MORE_DEBUG matrixPrint( tmp2, nChannelsL, nChannelsC, "U*S*V\'" ); #endif printf( "U * S * V' difference to M is %f\n", result ); if ( problematic ) { matrixPrint( InputMatrix, nChannelsL, nChannelsC, "Problematic Input" ); } } #endif /*------------------------------------------------------------------------- * svd() * Loading Loading @@ -615,24 +331,9 @@ Word16 svd_fx( Word16 temp_fx_e; push_wmops( "svd_fx" ); #ifdef MORE_DEBUG2 { float input[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; Word16 exp_matrix[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; for ( int ii = 0; ii < MAX_OUTPUT_CHANNELS; ii++ ) for ( int iii = 0; iii < MAX_OUTPUT_CHANNELS; iii++ ) exp_matrix[ii][iii] = InputMatrix_e; matrixFx2Fl2( input, InputMatrix, exp_matrix, nChannelsL, nChannelsC ); matrixPrint2( input, nChannelsL, nChannelsC, " input " ); } #endif #ifndef FIX_1010_OPT_SINGLE_RESCALE set32_fx( secDiag_fx, 0, MAX_OUTPUT_CHANNELS ); set16_fx( secDiag_fx_e, 0, MAX_OUTPUT_CHANNELS ); set16_fx( singularValues_fx_e, 0, MAX_OUTPUT_CHANNELS ); #endif Loading Loading @@ -714,17 +415,6 @@ Word16 svd_fx( WHILE( EQ_16( condition, 1 ) ); pop_wmops(); #ifdef DEBUG_SVD_PRECISION svd_accuracy_test_fx( InputMatrix, InputMatrix_e, singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, singularValues_fx_e, nChannelsL, nChannelsC ); #endif return ( errorMessage ); } Loading Loading @@ -1323,18 +1013,6 @@ static void HouseholderReduction_fx( } } #ifdef MORE_DEBUG2 { float singularVectors_Left[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; float secDiag[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; matrixFx2Fl2( singularVectors_Left, singularVectors_Left_fx, singularVectors_Left_fx_e, nChannelsL, nChannelsC ); matrixFx2Fl2( secDiag, (Word32( * )[MAX_OUTPUT_CHANNELS]) secDiag_fx, (Word16( * )[MAX_OUTPUT_CHANNELS]) secDiag_fx_e, 1, nChannelsC ); matrixPrint2( singularVectors_Left, nChannelsL, nChannelsC, "left" ); matrixPrint2( secDiag, 1, nChannelsC, "secDiag" ); } #endif /* SingularVecotr Accumulation */ #ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_e, *secDiag_fx_e, nChannelsC ); Loading @@ -1346,36 +1024,10 @@ static void HouseholderReduction_fx( singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsC ); #endif #ifdef MORE_DEBUG2 { float singularVectors_Right[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; for ( int ii = 0; ii < MAX_OUTPUT_CHANNELS; ii++ ) for ( int iii = 0; iii < MAX_OUTPUT_CHANNELS; iii++ ) singularVectors_Left_fx_e[ii][iii] = 0; matrixFx2Fl2( singularVectors_Right, singularVectors_Right_fx, singularVectors_Left_fx_e, nChannelsC, nChannelsC ); matrixPrint2( singularVectors_Right, nChannelsC, nChannelsC, "right2" ); } #endif singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_fx_e, singularValues_fx_e, nChannelsL, nChannelsC ); #endif #ifdef MORE_DEBUG2 { float singularVectors_Left[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; for ( int ii = 0; ii < MAX_OUTPUT_CHANNELS; ii++ ) for ( int iii = 0; iii < MAX_OUTPUT_CHANNELS; iii++ ) singularVectors_Left_fx_e[ii][iii] = 0; matrixFx2Fl2( singularVectors_Left, singularVectors_Left_fx, singularVectors_Left_fx_e, nChannelsL, nChannelsC ); matrixPrint2( singularVectors_Left, nChannelsL, nChannelsC, "left2" ); } #endif return; } Loading @@ -1385,41 +1037,6 @@ static void HouseholderReduction_fx( * *-------------------------------------------------------------------------*/ #ifdef FIX_1010_OPT_INV_USING_INVSQRT static Word32 BASOP_Util_Inv32( Word32 x, Word16 *px_e ) { Word16 sign, shift, shift2; sign = 0; move16(); if ( x < 0 ) { sign = 1; } if ( sign ) { x = L_negate( x ); } shift = norm_l( x ); x = L_shl( x, shift ); *px_e = 0; move16(); x = ISqrt32norm( x, px_e ); x = Mpy_32_32( x, x ); shift2 = norm_l( x ); x = L_shl( x, shift2 ); *px_e = add( shl( *px_e, 1 ), sub( shift, shift2 ) ); move16(); if ( sign ) { x = L_negate( x ); } return x; } #endif static void biDiagonalReductionLeft_fx( Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ Loading Loading @@ -1477,6 +1094,7 @@ ELSE IF( LT_16( *sig_x_e, *secDiag_e ) ) secDiag_e[currChannel] = *sig_x_e; move16(); #endif /* Setting values to 0 */ ( *sig_x ) = 0; move32(); Loading @@ -1502,11 +1120,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ #ifdef FIX_1010_OPT_DIV Word16 invVal_e; Word32 invVal; #ifdef FIX_1010_OPT_INV_USING_INVSQRT invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e ); #else invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e ); #endif #endif norm_x = 0; move32(); Loading Loading @@ -1576,11 +1190,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ move32(); #ifdef FIX_1010_OPT_DIV #ifdef FIX_1010_OPT_INV_USING_INVSQRT invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e ); #else invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); #endif #endif FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */ Loading Loading @@ -1673,7 +1283,7 @@ return; static void biDiagonalReductionRight_fx( Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */ Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_exp[]) */ #ifndef FIX_1010_OPT_SINGLE_RESCALE Word16 *singularVectors_e, #else Loading Loading @@ -1741,11 +1351,7 @@ static void biDiagonalReductionRight_fx( #ifdef FIX_1010_OPT_DIV Word16 invVal_e, temp_e; Word32 invVal; #ifdef FIX_1010_OPT_INV_USING_INVSQRT invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e ); #else invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e ); #endif #endif FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */ { Loading Loading @@ -1810,11 +1416,7 @@ static void biDiagonalReductionRight_fx( move32(); #ifdef FIX_1010_OPT_DIV #ifdef FIX_1010_OPT_INV_USING_INVSQRT invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e ); #else invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); #endif #endif FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ Loading Loading @@ -1898,6 +1500,8 @@ static void biDiagonalReductionRight_fx( *secDiag_e = exp_max; move16(); #endif #ifndef FIX_1010_OPT_SINGLE_RESCALE exp_max = *singularVectors_e; move16(); Loading Loading @@ -1976,11 +1580,7 @@ static void singularVectorsAccumulationLeft_fx( IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/ { #ifdef FIX_1010_OPT_DIV #ifdef FIX_1010_OPT_INV_USING_INVSQRT t_ii = BASOP_Util_Inv32( maxWithSign_fx( t_ii ), &temp_exp ); #else t_ii = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( t_ii ), &temp_exp ); #endif t_ii_e = sub( temp_exp, t_ii_e ); #else t_ii = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, maxWithSign_fx( t_ii ), &temp_exp ); /* exp(1 + (temp_exp + tii_e)) */ Loading @@ -2001,22 +1601,11 @@ static void singularVectorsAccumulationLeft_fx( norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( singularVectors_Left_e[k][nCh], singularVectors_Left_e[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */ #endif } #ifdef FIX_1010_OPT_INV_USING_INVSQRT Word16 temp_e = norm_l( singularVectors_Left[nCh][nCh] ); t_jj = BASOP_Util_Inv32( maxWithSign_fx( L_shl( singularVectors_Left[nCh][nCh], temp_e ) ), &temp_exp ); t_jj = Mpy_32_32( Mpy_32_32( t_ii, norm_y ), t_jj ); #ifndef FIX_1010_OPT_SINGLE_RESCALE t_jj_e = add( add( temp_exp, temp_e ), sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) ); #else t_jj_e = add( add( temp_exp, temp_e ), sub( add( t_ii_e, norm_y_e ), singularVectors_Left_e[nCh][nCh] ) ); #endif #else t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e, #ifndef FIX_1010_OPT_SINGLE_RESCALE t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) ); #else t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), singularVectors_Left_e[nCh][nCh] ) ); #endif #endif FOR( k = nCh; k < nChannelsL; k++ ) /* nChannelsL */ { Loading Loading @@ -2187,59 +1776,6 @@ static void singularVectorsAccumulationRight_fx( * *-------------------------------------------------------------------------*/ #ifdef FIX_1010_OPT_GIVENS_AMAX_BMIN #ifndef M_PI #define M_PI 3.141592653589793 #endif #define NUM_REGIONS 128 static Word32 alphaBeta[NUM_REGIONS][2]; static void get_alpha_beta( Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 *alpha, Word32 *beta ) { static int init = 0; if ( init == 0 ) { for ( int i = 0; i < NUM_REGIONS; i++ ) { double thetaS, thetaE, thetaM; thetaS = M_PI / 4. * (double) i / (double) NUM_REGIONS; thetaE = M_PI / 4. * (double) ( i + 1 ) / (double) NUM_REGIONS; thetaM = M_PI / 4. * ( (double) i + 0.5 ) / (double) NUM_REGIONS; // alphaBeta[i][0] = FL2WORD32(1./(sin(thetaM)*tan((thetaS+thetaE)/2.)+cos(thetaM))); // alphaBeta[i][1] = FL2WORD32(1./(sin(thetaM)*tan((thetaS+thetaE)/2.)+cos(thetaM)) * tan((thetaS+thetaE)/2.)); alphaBeta[i][0] = FL2WORD32( 2. / ( ( ( sin( thetaM ) + sin( thetaS ) ) * tan( ( thetaS + thetaE ) / 2. ) ) + cos( thetaM ) + cos( thetaS ) ) ); alphaBeta[i][1] = FL2WORD32( 2. / ( ( ( sin( thetaM ) + sin( thetaS ) ) * tan( ( thetaS + thetaE ) / 2. ) ) + cos( thetaM ) + cos( thetaS ) ) * tan( ( thetaS + thetaE ) / 2. ) ); } init = 1; } Word16 r, shift; #if 0 float pf, qf; pf = (float)p * powf(2.f, p_e-31); qf = (float)q * powf(2.f, q_e-31); r = floor((double)NUM_REGIONS * 4. * atan2f(qf, pf)/M_PI); if (r >= NUM_REGIONS) { r = NUM_REGIONS-1; } #elif 1 shift = sub( norm_l( q ), 1 ); q = L_shl( q, shift ); q_e = sub( q_e, shift ); shift = norm_l( p ); p = L_shl( p, shift ); p_e = sub( p_e, shift ); shift = sub( q_e, p_e ); r = shl_sat( div_s( extract_h( q ), s_max( 1, extract_h( p ) ) ), shift ); /* Second order polyfit of atan(r)/(pi/4) for r=0..1 */ r = add( add( mult( mult( r, r ), FL2WORD16_SCALE( -3.672563685340096e-01, 3 ) ), mult( r, FL2WORD16_SCALE( 1.375369641423651e+00, 3 ) ) ), FL2WORD16_SCALE( -6.529424378422714e-03, 3 ) ); r = s_min( s_max( 0, shr( r, WORD16_BITS - 1 - 7 - 3 ) ), NUM_REGIONS - 1 ); #endif assert( ( r >= 0 ) && ( r < NUM_REGIONS ) ); *alpha = alphaBeta[r][0]; *beta = alphaBeta[r][1]; } #endif #ifdef FIX_1010_OPT_GIVENS_INV static void GivensRotation2_fx( Loading @@ -2253,35 +1789,6 @@ static void GivensRotation2_fx( Word16 *outInv_e ) { Word32 r; #ifdef FIX_1010_OPT_GIVENS_AMAX_BMIN Word32 az, ax, a, b; ax = L_abs( x ); az = L_abs( z ); IF( BASOP_Util_Cmp_Mant32Exp( ax, x_e, az, z_e ) > 0 ) { get_alpha_beta( ax, x_e, az, z_e, &a, &b ); r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ax, a ), x_e, Mpy_32_32( az, b ), z_e, out_e ); } ELSE { get_alpha_beta( az, z_e, ax, x_e, &a, &b ); r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( az, a ), z_e, Mpy_32_32( ax, b ), x_e, out_e ); } *result = r; move32(); #if 1 *outInv_e = shl( *out_e, 1 ); *resultInv = ISqrt32( L_max( 1, Mpy_32_32( r, r ) ), outInv_e ); move32(); #else *resultInv = L_deposit_h( BASOP_Util_Divide3232_Scale( MAX_32, r, outInv_e ) ); move32(); *outInv_e = sub( *outInv_e, *out_e ); move16(); #endif #else r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e ); r = L_max( r, 1 ); *outInv_e = *out_e; Loading @@ -2291,7 +1798,6 @@ static void GivensRotation2_fx( *resultInv = ISqrt32( r, outInv_e ); move32(); #endif } #endif Loading @@ -2312,25 +1818,8 @@ static Word32 GivensRotation_fx( #endif #ifdef FIX_1010_OPT_GIVENS #ifdef FIX_1010_OPT_GIVENS_AMAX_BMIN Word32 az, ax, a, b; ax = L_abs( x ); az = L_abs( z ); IF( BASOP_Util_Cmp_Mant32Exp( ax, x_e, az, z_e ) > 0 ) { get_alpha_beta( ax, x_e, az, z_e, &a, &b ); r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ax, a ), x_e, Mpy_32_32( az, b ), z_e, out_e ); } ELSE { get_alpha_beta( az, z_e, ax, x_e, &a, &b ); r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( az, a ), z_e, Mpy_32_32( ax, b ), x_e, out_e ); } #else r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e ); r = Sqrt32( r, out_e ); #endif #else x_abs = L_abs( x ); z_abs = L_abs( z ); Loading Loading
lib_dec/ivas_svd_dec.c +4 −515 Original line number Diff line number Diff line Loading @@ -53,11 +53,9 @@ #if 1 #define FIX_1010_OPT_DIV // #define FIX_1010_OPT_INV_USING_INVSQRT #define FIX_1010_OPT_SINGLE_RESCALE #define FIX_1010_OPT_GIVENS #define FIX_1010_OPT_GIVENS_INV // #define FIX_1010_OPT_GIVENS_AMAX_BMIN #define FIX_1010_OPT_NORM_NOSAT #define FIX_1010_OPT_SEC_SINGLE_RESCALE #endif Loading Loading @@ -297,288 +295,6 @@ void svdMat2mat_fx( return; } #ifdef MORE_DEBUG2 static void matrixFx2Fl2( float r[][MAX_OUTPUT_CHANNELS], const Word32 a[][MAX_OUTPUT_CHANNELS], const Word16 a_e[][MAX_OUTPUT_CHANNELS], const int adim1, const int adim2 ) { for ( int i1 = 0; i1 < adim1; i1++ ) { for ( int i2 = 0; i2 < adim2; i2++ ) { r[i1][i2] = (float) a[i1][i2] * powf( 2.f, a_e[i1][i2] - 31 ); } } } static void matrixPrint2( const float a[][MAX_OUTPUT_CHANNELS], const int dim1, const int dim2, const char *name ) { printf( "Matrix %s[%d][%d] = \n", name, dim1, dim2 ); for ( int i1 = 0; i1 < dim1; i1++ ) { printf( " { " ); for ( int i2 = 0; i2 < dim2; i2++ ) { printf( "%.10e, ", a[i1][i2] ); } printf( " },\n" ); } } #endif #ifndef DEBUG_SVD_TEST // #define DEBUG_SVD_PRECISION #endif // #define MORE_DEBUG #if defined( DEBUG_SVD_PRECISION ) || defined( MORE_DEBUG ) #if ( MAX_INPUT_CHANNELS > MAX_OUTPUT_CHANNELS ) #define MAX_MATRIX MAX_INPUT_CHANNELS #else #define MAX_MATRIX MAX_OUTPUT_CHANNELS #endif static void matrixFx2Fl( float r[][MAX_MATRIX], const Word32 a[][MAX_MATRIX], const Word16 a_e[MAX_MATRIX], const int adim1, const int adim2 ) { for ( int i1 = 0; i1 < adim1; i1++ ) { for ( int i2 = 0; i2 < adim2; i2++ ) { r[i1][i2] = (float) a[i1][i2] * powf( 2.f, a_e[i2] - 31 ); } } } static void matrixProduct( float r[][MAX_MATRIX], const float a[][MAX_MATRIX], const float b[][MAX_MATRIX], const int adim1, const int adim2, const int bdim1, const int bdim2 ) { assert( adim2 == bdim1 ); for ( int i1 = 0; i1 < adim1; i1++ ) { for ( int i2 = 0; i2 < bdim2; i2++ ) { r[i1][i2] = 0.f; for ( int i3 = 0; i3 < bdim1; i3++ ) { r[i1][i2] += a[i1][i3] * b[i3][i2]; } } } } static void matrixTranspose( float r[][MAX_MATRIX], const float a[][MAX_MATRIX], const int adim1, const int adim2 ) { for ( int i1 = 0; i1 < adim1; i1++ ) { for ( int i2 = 0; i2 < adim2; i2++ ) { r[i2][i1] = a[i1][i2]; } } } static void matrixDiagonal( float r[][MAX_MATRIX], const float a[MAX_MATRIX], const int dim ) { for ( int i1 = 0; i1 < dim; i1++ ) { for ( int i2 = 0; i2 < dim; i2++ ) { r[i1][i2] = 0; } r[i1][i1] = a[i1]; } } static float matrixDifference( const float a[][MAX_MATRIX], const float b[][MAX_MATRIX], const int dim1, const int dim2 ) { float r = 0.f; for ( int i1 = 0; i1 < dim1; i1++ ) { for ( int i2 = 0; i2 < dim2; i2++ ) { if ( a[i1][i2] != 0.f ) { r += fabsf( ( b[i1][i2] - a[i1][i2] ) / a[i1][i2] ); } else { r += fabsf( b[i1][i2] - a[i1][i2] ); } } } return r / (float) ( dim1 * dim2 ); } static void matrixPrint( const float a[][MAX_MATRIX], const int dim1, const int dim2, const char *name ) { printf( "Matrix %s[%d][%d] = \n", name, dim1, dim2 ); for ( int i1 = 0; i1 < dim1; i1++ ) { for ( int i2 = 0; i2 < dim2; i2++ ) { printf( "%.10e, ", a[i1][i2] ); } printf( "\n" ); } } static float matrixTestIdentity( const float a[][MAX_MATRIX], const int dim ) { float r = 0.f; for ( int i1 = 0; i1 < dim; i1++ ) { for ( int i2 = 0; i2 < dim; i2++ ) { if ( i1 == i2 ) { r += fabsf( 1.f - a[i1][i2] ); } else { r += fabsf( 0.f - a[i1][i2] ); } } } return r; } #define PROBLEMATIC_THRESHOLD 0.5f static void svd_accuracy_test_fx( Word32 InputMatrixFx[][MAX_OUTPUT_CHANNELS], /* i : matrix to be decomposed (M) InputMatrix_e*/ Word16 InputMatrixFx_e, Word32 singularVectors_LeftFx[][MAX_OUTPUT_CHANNELS], /* o : left singular vectors (U) Q31 */ Word32 singularValuesFx[MAX_OUTPUT_CHANNELS], /* o : singular values vector (S) singularValues_fx_e*/ Word32 singularVectors_RightFx[][MAX_OUTPUT_CHANNELS], /* o : right singular vectors (V) Q31 */ Word16 singularValuesFx_e[MAX_OUTPUT_CHANNELS], const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed Q0*/ const Word16 nChannelsC /* i : number of columns in the matrix to be decomposed Q0*/ ) { float tmp1[MAX_MATRIX][MAX_MATRIX]; float tmp2[MAX_MATRIX][MAX_MATRIX]; float tmp3[MAX_MATRIX][MAX_MATRIX]; float InputMatrix[MAX_MATRIX][MAX_MATRIX]; Word16 singularValuesFx2_e[MAX_OUTPUT_CHANNELS]; float singularVectors_Left[MAX_INPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; float singularValues[MAX_MATRIX]; float singularValuesMatrix[MAX_MATRIX][MAX_MATRIX]; float singularVectors_Right[MAX_INPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; float result; int dimSingular; int problematic = 0; /* Convert to float and Create singular values matrix from signular values vector */ for ( int x = 0; x < MAX_MATRIX; x++ ) singularValuesFx2_e[x] = InputMatrixFx_e; matrixFx2Fl( InputMatrix, InputMatrixFx, singularValuesFx2_e, nChannelsL, nChannelsC ); dimSingular = min( nChannelsL, nChannelsC ); matrixFx2Fl( &singularValues, (Word32( * )[MAX_MATRIX]) singularValuesFx, singularValuesFx_e, 1, nChannelsC ); for ( int x = 0; x < MAX_MATRIX; x++ ) singularValuesFx2_e[x] = 0; matrixFx2Fl( singularVectors_Left, singularVectors_LeftFx, singularValuesFx2_e, nChannelsL, nChannelsC ); matrixFx2Fl( singularVectors_Right, singularVectors_RightFx, singularValuesFx2_e, nChannelsC, nChannelsC ); matrixDiagonal( singularValuesMatrix, singularValues, dimSingular ); /* CxC */ #ifdef MORE_DEBUG matrixPrint( InputMatrix, nChannelsL, nChannelsC, "A" ); printf( "Result of svd() \n" ); matrixPrint( singularVectors_Left, nChannelsL, nChannelsC, "U" ); matrixPrint( singularValuesMatrix, nChannelsC, nChannelsC, "S" ); matrixPrint( singularVectors_Right, nChannelsC, nChannelsC, "V" ); #endif printf( "\nResult quality tests\n\n" ); /* Test U' * U == I */ matrixTranspose( tmp1, singularVectors_Left, nChannelsL, nChannelsC ); /* CxL */ matrixProduct( tmp2, tmp1, singularVectors_Left, nChannelsC, nChannelsL, nChannelsL, nChannelsC ); /* CxC */ result = matrixTestIdentity( tmp2, nChannelsC ); if ( result >= PROBLEMATIC_THRESHOLD ) { problematic = 1; } #ifdef MORE_DEBUG matrixPrint( tmp2, nChannelsC, nChannelsC, "U\'*U" ); #endif printf( "U' * U difference to I is %f\n", result ); /* Test V * V' == I */ matrixTranspose( tmp1, singularVectors_Right, nChannelsC, nChannelsC ); /* CxC */ matrixProduct( tmp2, singularVectors_Right, tmp1, nChannelsC, nChannelsC, nChannelsC, nChannelsC ); /* CxC */ result = matrixTestIdentity( tmp2, nChannelsC ); if ( result >= PROBLEMATIC_THRESHOLD ) { problematic = 1; } #ifdef MORE_DEBUG matrixPrint( tmp2, nChannelsC, nChannelsC, "V*V\'" ); #endif printf( "V * V' difference to I is %f\n", result ); /* Test InputMatrix == U * S * V' */ matrixProduct( tmp1, singularVectors_Left, singularValuesMatrix, nChannelsL, nChannelsC, dimSingular, dimSingular ); /* LxC */ matrixTranspose( tmp3, singularVectors_Right, nChannelsC, nChannelsC ); /* CxC */ matrixProduct( tmp2, tmp1, tmp3, nChannelsL, dimSingular, nChannelsC, nChannelsC ); /* LxC */ result = matrixDifference( tmp2, InputMatrix, nChannelsL, nChannelsC ); if ( result >= PROBLEMATIC_THRESHOLD ) { problematic = 1; } #ifdef MORE_DEBUG matrixPrint( tmp2, nChannelsL, nChannelsC, "U*S*V\'" ); #endif printf( "U * S * V' difference to M is %f\n", result ); if ( problematic ) { matrixPrint( InputMatrix, nChannelsL, nChannelsC, "Problematic Input" ); } } #endif /*------------------------------------------------------------------------- * svd() * Loading Loading @@ -615,24 +331,9 @@ Word16 svd_fx( Word16 temp_fx_e; push_wmops( "svd_fx" ); #ifdef MORE_DEBUG2 { float input[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; Word16 exp_matrix[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; for ( int ii = 0; ii < MAX_OUTPUT_CHANNELS; ii++ ) for ( int iii = 0; iii < MAX_OUTPUT_CHANNELS; iii++ ) exp_matrix[ii][iii] = InputMatrix_e; matrixFx2Fl2( input, InputMatrix, exp_matrix, nChannelsL, nChannelsC ); matrixPrint2( input, nChannelsL, nChannelsC, " input " ); } #endif #ifndef FIX_1010_OPT_SINGLE_RESCALE set32_fx( secDiag_fx, 0, MAX_OUTPUT_CHANNELS ); set16_fx( secDiag_fx_e, 0, MAX_OUTPUT_CHANNELS ); set16_fx( singularValues_fx_e, 0, MAX_OUTPUT_CHANNELS ); #endif Loading Loading @@ -714,17 +415,6 @@ Word16 svd_fx( WHILE( EQ_16( condition, 1 ) ); pop_wmops(); #ifdef DEBUG_SVD_PRECISION svd_accuracy_test_fx( InputMatrix, InputMatrix_e, singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, singularValues_fx_e, nChannelsL, nChannelsC ); #endif return ( errorMessage ); } Loading Loading @@ -1323,18 +1013,6 @@ static void HouseholderReduction_fx( } } #ifdef MORE_DEBUG2 { float singularVectors_Left[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; float secDiag[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; matrixFx2Fl2( singularVectors_Left, singularVectors_Left_fx, singularVectors_Left_fx_e, nChannelsL, nChannelsC ); matrixFx2Fl2( secDiag, (Word32( * )[MAX_OUTPUT_CHANNELS]) secDiag_fx, (Word16( * )[MAX_OUTPUT_CHANNELS]) secDiag_fx_e, 1, nChannelsC ); matrixPrint2( singularVectors_Left, nChannelsL, nChannelsC, "left" ); matrixPrint2( secDiag, 1, nChannelsC, "secDiag" ); } #endif /* SingularVecotr Accumulation */ #ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_e, *secDiag_fx_e, nChannelsC ); Loading @@ -1346,36 +1024,10 @@ static void HouseholderReduction_fx( singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsC ); #endif #ifdef MORE_DEBUG2 { float singularVectors_Right[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; for ( int ii = 0; ii < MAX_OUTPUT_CHANNELS; ii++ ) for ( int iii = 0; iii < MAX_OUTPUT_CHANNELS; iii++ ) singularVectors_Left_fx_e[ii][iii] = 0; matrixFx2Fl2( singularVectors_Right, singularVectors_Right_fx, singularVectors_Left_fx_e, nChannelsC, nChannelsC ); matrixPrint2( singularVectors_Right, nChannelsC, nChannelsC, "right2" ); } #endif singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_fx_e, singularValues_fx_e, nChannelsL, nChannelsC ); #endif #ifdef MORE_DEBUG2 { float singularVectors_Left[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; for ( int ii = 0; ii < MAX_OUTPUT_CHANNELS; ii++ ) for ( int iii = 0; iii < MAX_OUTPUT_CHANNELS; iii++ ) singularVectors_Left_fx_e[ii][iii] = 0; matrixFx2Fl2( singularVectors_Left, singularVectors_Left_fx, singularVectors_Left_fx_e, nChannelsL, nChannelsC ); matrixPrint2( singularVectors_Left, nChannelsL, nChannelsC, "left2" ); } #endif return; } Loading @@ -1385,41 +1037,6 @@ static void HouseholderReduction_fx( * *-------------------------------------------------------------------------*/ #ifdef FIX_1010_OPT_INV_USING_INVSQRT static Word32 BASOP_Util_Inv32( Word32 x, Word16 *px_e ) { Word16 sign, shift, shift2; sign = 0; move16(); if ( x < 0 ) { sign = 1; } if ( sign ) { x = L_negate( x ); } shift = norm_l( x ); x = L_shl( x, shift ); *px_e = 0; move16(); x = ISqrt32norm( x, px_e ); x = Mpy_32_32( x, x ); shift2 = norm_l( x ); x = L_shl( x, shift2 ); *px_e = add( shl( *px_e, 1 ), sub( shift, shift2 ) ); move16(); if ( sign ) { x = L_negate( x ); } return x; } #endif static void biDiagonalReductionLeft_fx( Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ Loading Loading @@ -1477,6 +1094,7 @@ ELSE IF( LT_16( *sig_x_e, *secDiag_e ) ) secDiag_e[currChannel] = *sig_x_e; move16(); #endif /* Setting values to 0 */ ( *sig_x ) = 0; move32(); Loading @@ -1502,11 +1120,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ #ifdef FIX_1010_OPT_DIV Word16 invVal_e; Word32 invVal; #ifdef FIX_1010_OPT_INV_USING_INVSQRT invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e ); #else invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e ); #endif #endif norm_x = 0; move32(); Loading Loading @@ -1576,11 +1190,7 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ move32(); #ifdef FIX_1010_OPT_DIV #ifdef FIX_1010_OPT_INV_USING_INVSQRT invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e ); #else invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); #endif #endif FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */ Loading Loading @@ -1673,7 +1283,7 @@ return; static void biDiagonalReductionRight_fx( Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */ Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_exp[]) */ #ifndef FIX_1010_OPT_SINGLE_RESCALE Word16 *singularVectors_e, #else Loading Loading @@ -1741,11 +1351,7 @@ static void biDiagonalReductionRight_fx( #ifdef FIX_1010_OPT_DIV Word16 invVal_e, temp_e; Word32 invVal; #ifdef FIX_1010_OPT_INV_USING_INVSQRT invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e ); #else invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e ); #endif #endif FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */ { Loading Loading @@ -1810,11 +1416,7 @@ static void biDiagonalReductionRight_fx( move32(); #ifdef FIX_1010_OPT_DIV #ifdef FIX_1010_OPT_INV_USING_INVSQRT invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e ); #else invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); #endif #endif FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ Loading Loading @@ -1898,6 +1500,8 @@ static void biDiagonalReductionRight_fx( *secDiag_e = exp_max; move16(); #endif #ifndef FIX_1010_OPT_SINGLE_RESCALE exp_max = *singularVectors_e; move16(); Loading Loading @@ -1976,11 +1580,7 @@ static void singularVectorsAccumulationLeft_fx( IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/ { #ifdef FIX_1010_OPT_DIV #ifdef FIX_1010_OPT_INV_USING_INVSQRT t_ii = BASOP_Util_Inv32( maxWithSign_fx( t_ii ), &temp_exp ); #else t_ii = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( t_ii ), &temp_exp ); #endif t_ii_e = sub( temp_exp, t_ii_e ); #else t_ii = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, maxWithSign_fx( t_ii ), &temp_exp ); /* exp(1 + (temp_exp + tii_e)) */ Loading @@ -2001,22 +1601,11 @@ static void singularVectorsAccumulationLeft_fx( norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( singularVectors_Left_e[k][nCh], singularVectors_Left_e[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */ #endif } #ifdef FIX_1010_OPT_INV_USING_INVSQRT Word16 temp_e = norm_l( singularVectors_Left[nCh][nCh] ); t_jj = BASOP_Util_Inv32( maxWithSign_fx( L_shl( singularVectors_Left[nCh][nCh], temp_e ) ), &temp_exp ); t_jj = Mpy_32_32( Mpy_32_32( t_ii, norm_y ), t_jj ); #ifndef FIX_1010_OPT_SINGLE_RESCALE t_jj_e = add( add( temp_exp, temp_e ), sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) ); #else t_jj_e = add( add( temp_exp, temp_e ), sub( add( t_ii_e, norm_y_e ), singularVectors_Left_e[nCh][nCh] ) ); #endif #else t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e, #ifndef FIX_1010_OPT_SINGLE_RESCALE t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) ); #else t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), singularVectors_Left_e[nCh][nCh] ) ); #endif #endif FOR( k = nCh; k < nChannelsL; k++ ) /* nChannelsL */ { Loading Loading @@ -2187,59 +1776,6 @@ static void singularVectorsAccumulationRight_fx( * *-------------------------------------------------------------------------*/ #ifdef FIX_1010_OPT_GIVENS_AMAX_BMIN #ifndef M_PI #define M_PI 3.141592653589793 #endif #define NUM_REGIONS 128 static Word32 alphaBeta[NUM_REGIONS][2]; static void get_alpha_beta( Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 *alpha, Word32 *beta ) { static int init = 0; if ( init == 0 ) { for ( int i = 0; i < NUM_REGIONS; i++ ) { double thetaS, thetaE, thetaM; thetaS = M_PI / 4. * (double) i / (double) NUM_REGIONS; thetaE = M_PI / 4. * (double) ( i + 1 ) / (double) NUM_REGIONS; thetaM = M_PI / 4. * ( (double) i + 0.5 ) / (double) NUM_REGIONS; // alphaBeta[i][0] = FL2WORD32(1./(sin(thetaM)*tan((thetaS+thetaE)/2.)+cos(thetaM))); // alphaBeta[i][1] = FL2WORD32(1./(sin(thetaM)*tan((thetaS+thetaE)/2.)+cos(thetaM)) * tan((thetaS+thetaE)/2.)); alphaBeta[i][0] = FL2WORD32( 2. / ( ( ( sin( thetaM ) + sin( thetaS ) ) * tan( ( thetaS + thetaE ) / 2. ) ) + cos( thetaM ) + cos( thetaS ) ) ); alphaBeta[i][1] = FL2WORD32( 2. / ( ( ( sin( thetaM ) + sin( thetaS ) ) * tan( ( thetaS + thetaE ) / 2. ) ) + cos( thetaM ) + cos( thetaS ) ) * tan( ( thetaS + thetaE ) / 2. ) ); } init = 1; } Word16 r, shift; #if 0 float pf, qf; pf = (float)p * powf(2.f, p_e-31); qf = (float)q * powf(2.f, q_e-31); r = floor((double)NUM_REGIONS * 4. * atan2f(qf, pf)/M_PI); if (r >= NUM_REGIONS) { r = NUM_REGIONS-1; } #elif 1 shift = sub( norm_l( q ), 1 ); q = L_shl( q, shift ); q_e = sub( q_e, shift ); shift = norm_l( p ); p = L_shl( p, shift ); p_e = sub( p_e, shift ); shift = sub( q_e, p_e ); r = shl_sat( div_s( extract_h( q ), s_max( 1, extract_h( p ) ) ), shift ); /* Second order polyfit of atan(r)/(pi/4) for r=0..1 */ r = add( add( mult( mult( r, r ), FL2WORD16_SCALE( -3.672563685340096e-01, 3 ) ), mult( r, FL2WORD16_SCALE( 1.375369641423651e+00, 3 ) ) ), FL2WORD16_SCALE( -6.529424378422714e-03, 3 ) ); r = s_min( s_max( 0, shr( r, WORD16_BITS - 1 - 7 - 3 ) ), NUM_REGIONS - 1 ); #endif assert( ( r >= 0 ) && ( r < NUM_REGIONS ) ); *alpha = alphaBeta[r][0]; *beta = alphaBeta[r][1]; } #endif #ifdef FIX_1010_OPT_GIVENS_INV static void GivensRotation2_fx( Loading @@ -2253,35 +1789,6 @@ static void GivensRotation2_fx( Word16 *outInv_e ) { Word32 r; #ifdef FIX_1010_OPT_GIVENS_AMAX_BMIN Word32 az, ax, a, b; ax = L_abs( x ); az = L_abs( z ); IF( BASOP_Util_Cmp_Mant32Exp( ax, x_e, az, z_e ) > 0 ) { get_alpha_beta( ax, x_e, az, z_e, &a, &b ); r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ax, a ), x_e, Mpy_32_32( az, b ), z_e, out_e ); } ELSE { get_alpha_beta( az, z_e, ax, x_e, &a, &b ); r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( az, a ), z_e, Mpy_32_32( ax, b ), x_e, out_e ); } *result = r; move32(); #if 1 *outInv_e = shl( *out_e, 1 ); *resultInv = ISqrt32( L_max( 1, Mpy_32_32( r, r ) ), outInv_e ); move32(); #else *resultInv = L_deposit_h( BASOP_Util_Divide3232_Scale( MAX_32, r, outInv_e ) ); move32(); *outInv_e = sub( *outInv_e, *out_e ); move16(); #endif #else r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e ); r = L_max( r, 1 ); *outInv_e = *out_e; Loading @@ -2291,7 +1798,6 @@ static void GivensRotation2_fx( *resultInv = ISqrt32( r, outInv_e ); move32(); #endif } #endif Loading @@ -2312,25 +1818,8 @@ static Word32 GivensRotation_fx( #endif #ifdef FIX_1010_OPT_GIVENS #ifdef FIX_1010_OPT_GIVENS_AMAX_BMIN Word32 az, ax, a, b; ax = L_abs( x ); az = L_abs( z ); IF( BASOP_Util_Cmp_Mant32Exp( ax, x_e, az, z_e ) > 0 ) { get_alpha_beta( ax, x_e, az, z_e, &a, &b ); r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ax, a ), x_e, Mpy_32_32( az, b ), z_e, out_e ); } ELSE { get_alpha_beta( az, z_e, ax, x_e, &a, &b ); r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( az, a ), z_e, Mpy_32_32( ax, b ), x_e, out_e ); } #else r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e ); r = Sqrt32( r, out_e ); #endif #else x_abs = L_abs( x ); z_abs = L_abs( z ); Loading