Loading lib_dec/ivas_svd_dec.c +10 −32 Original line number Diff line number Diff line Loading @@ -56,7 +56,7 @@ #define FIX_1010_OPT_GIVENS #define FIX_1010_OPT_GIVENS_INV //#define FIX_1010_OPT_GIVENS_AMAX_BMIN #define FIX_1010_OPT_GIVENS_AMAX_BMIN #endif /*-----------------------------------------------------------------------* Loading Loading @@ -997,7 +997,7 @@ static void ApplyQRTransform_fx( singularValues[ch] = GivensRotation_fx( d, d_e, r, r_e, &singularValues_e[ch] ); /* exp(singularValues_e) */ move32(); #endif IF( GT_32( L_abs( singularValues[ch] ), Mpy_32_32( CONVERGENCE_FACTOR_FX, L_abs( singularValues[ch] ) ) ) ) IF (singularValues[ch] != 0) { #ifndef FIX_1010_OPT_GIVENS_INV aux = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, singularValues[ch], &aux_e ); /* exp(aux_e + (1 - singularValues_e)) */ Loading Loading @@ -1154,7 +1154,7 @@ static void HouseholderReduction_fx( #ifdef FIX_1010_OPT_DIV static Word32 BASOP_Util_Inv32( Word32 x, Word16 *px_e ) { Word16 sign, shift; Word16 sign, shift, shift2; sign = 0; move16(); Loading @@ -1173,7 +1173,9 @@ static Word32 BASOP_Util_Inv32( Word32 x, Word16 *px_e ) move16(); x = ISqrt32norm( x, px_e ); x = Mpy_32_32( x, x ); *px_e = add( shl( *px_e, 1 ), shift ); shift2 = norm_l( x ); x = L_shl( x, shift2 ); *px_e = add( shl( *px_e, 1 ), sub(shift, shift2) ); move16(); if ( sign ) Loading Loading @@ -1252,9 +1254,6 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ #ifdef FIX_1010_OPT_DIV Word16 invVal_e, temp_e; Word32 invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e ); temp_e = norm_l( invVal ); invVal = L_shl( invVal, temp_e ); invVal_e = sub( invVal_e, temp_e ); #endif norm_x = 0; move32(); Loading @@ -1269,9 +1268,6 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ singularVectors[jCh][currChannel] = L_shl( singularVectors[jCh][currChannel], temp_e ); singularVectors[jCh][currChannel] = Mpy_32_32( singularVectors[jCh][currChannel], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ sing_exp[jCh] = sub( invVal_e, temp_e ); temp_e = norm_l( singularVectors[jCh][currChannel] ); singularVectors[jCh][currChannel] = L_shl( singularVectors[jCh][currChannel], temp_e ); sing_exp[jCh] = sub( sing_exp[jCh], temp_e ); move16(); #endif move32(); Loading Loading @@ -1308,9 +1304,6 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ #ifdef FIX_1010_OPT_DIV invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e ); temp_e = norm_l( invVal ); invVal = L_shl( invVal, temp_e ); invVal_e = sub( invVal_e, temp_e ); #endif FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */ Loading @@ -1330,9 +1323,6 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ #else f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */ f_e = add( invVal_e, sub( norm_x_e, r_e ) ); temp_e = norm_l( f ); f = L_shl( f, temp_e ); f_e = sub( f_e, temp_e ); #endif FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ Loading Loading @@ -1442,13 +1432,10 @@ static void biDiagonalReductionRight_fx( #ifdef FIX_1010_OPT_DIV Word16 invVal_e, temp_e; Word32 invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e ); temp_e = norm_l( invVal ); invVal = L_shl( invVal, temp_e ); invVal_e = sub( invVal_e, temp_e ); #endif FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */ { #ifndef FIX_1010_OPT_DIV_no #ifndef FIX_1010_OPT_DIV singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e)) */ #else temp_e = norm_l( singularVectors[currChannel][jCh] ); Loading @@ -1456,9 +1443,6 @@ static void biDiagonalReductionRight_fx( singularVectors[currChannel][jCh] = Mpy_32_32( singularVectors[currChannel][jCh], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ sing_exp[jCh] = sub( invVal_e, temp_e ); move16(); temp_e = norm_l( singularVectors[currChannel][jCh] ); singularVectors[currChannel][jCh] = L_shl( singularVectors[currChannel][jCh], temp_e ); sing_exp[jCh] = sub( sing_exp[jCh], temp_e ); #endif move32(); sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) ); Loading Loading @@ -1493,9 +1477,6 @@ static void biDiagonalReductionRight_fx( #ifdef FIX_1010_OPT_DIV invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e ); temp_e = norm_l( invVal ); invVal = L_shl( invVal, temp_e ); invVal_e = sub( invVal_e, temp_e ); #endif FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ Loading @@ -1507,9 +1488,6 @@ static void biDiagonalReductionRight_fx( secDiag[jCh] = L_shl( singularVectors[currChannel][jCh], temp_e ); secDiag[jCh] = Mpy_32_32( secDiag[jCh], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ secDiag_exp[jCh] = sub( invVal_e, temp_e ); temp_e = norm_l( secDiag[jCh] ); secDiag[jCh] = L_shl( secDiag[jCh], temp_e ); secDiag_exp[jCh] = sub( secDiag_exp[jCh], temp_e ); move16(); #endif move32(); Loading Loading @@ -1791,7 +1769,7 @@ static void singularVectorsAccumulationRight_fx( #ifndef M_PI #define M_PI 3.141592653589793 #endif #define NUM_REGIONS 32 #define NUM_REGIONS 128 static Word32 alphaBeta[NUM_REGIONS][2]; static void get_alpha_beta( Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 *alpha, Word32 *beta ) { Loading Loading @@ -1830,10 +1808,10 @@ static void get_alpha_beta( Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 * p = L_shl( p, shift ); p_e = sub( p_e, shift ); shift = sub( q_e, p_e ); r = shl( div_s( extract_h( q ), extract_h( p ) ), shift ); r = shl_sat( div_s( extract_h( q ), extract_h( p ) ), shift ); /* Second order polyfit of atan(r)/(pi/4) for r=0..1 */ r = add( add( mult( mult( r, r ), FL2WORD16_SCALE( -3.672563685340096e-01, 3 ) ), mult( r, FL2WORD16_SCALE( 1.375369641423651e+00, 3 ) ) ), FL2WORD16_SCALE( -6.529424378422714e-03, 3 ) ); r = s_min( s_max( 0, shr( r, 4 + 3 ) ), NUM_REGIONS - 1 ); r = s_min( s_max( 0, shr( r, WORD16_BITS-1-7-3 ) ), NUM_REGIONS - 1 ); #endif assert( ( r >= 0 ) && ( r < NUM_REGIONS ) ); *alpha = alphaBeta[r][0]; Loading Loading
lib_dec/ivas_svd_dec.c +10 −32 Original line number Diff line number Diff line Loading @@ -56,7 +56,7 @@ #define FIX_1010_OPT_GIVENS #define FIX_1010_OPT_GIVENS_INV //#define FIX_1010_OPT_GIVENS_AMAX_BMIN #define FIX_1010_OPT_GIVENS_AMAX_BMIN #endif /*-----------------------------------------------------------------------* Loading Loading @@ -997,7 +997,7 @@ static void ApplyQRTransform_fx( singularValues[ch] = GivensRotation_fx( d, d_e, r, r_e, &singularValues_e[ch] ); /* exp(singularValues_e) */ move32(); #endif IF( GT_32( L_abs( singularValues[ch] ), Mpy_32_32( CONVERGENCE_FACTOR_FX, L_abs( singularValues[ch] ) ) ) ) IF (singularValues[ch] != 0) { #ifndef FIX_1010_OPT_GIVENS_INV aux = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, singularValues[ch], &aux_e ); /* exp(aux_e + (1 - singularValues_e)) */ Loading Loading @@ -1154,7 +1154,7 @@ static void HouseholderReduction_fx( #ifdef FIX_1010_OPT_DIV static Word32 BASOP_Util_Inv32( Word32 x, Word16 *px_e ) { Word16 sign, shift; Word16 sign, shift, shift2; sign = 0; move16(); Loading @@ -1173,7 +1173,9 @@ static Word32 BASOP_Util_Inv32( Word32 x, Word16 *px_e ) move16(); x = ISqrt32norm( x, px_e ); x = Mpy_32_32( x, x ); *px_e = add( shl( *px_e, 1 ), shift ); shift2 = norm_l( x ); x = L_shl( x, shift2 ); *px_e = add( shl( *px_e, 1 ), sub(shift, shift2) ); move16(); if ( sign ) Loading Loading @@ -1252,9 +1254,6 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ #ifdef FIX_1010_OPT_DIV Word16 invVal_e, temp_e; Word32 invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e ); temp_e = norm_l( invVal ); invVal = L_shl( invVal, temp_e ); invVal_e = sub( invVal_e, temp_e ); #endif norm_x = 0; move32(); Loading @@ -1269,9 +1268,6 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ singularVectors[jCh][currChannel] = L_shl( singularVectors[jCh][currChannel], temp_e ); singularVectors[jCh][currChannel] = Mpy_32_32( singularVectors[jCh][currChannel], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ sing_exp[jCh] = sub( invVal_e, temp_e ); temp_e = norm_l( singularVectors[jCh][currChannel] ); singularVectors[jCh][currChannel] = L_shl( singularVectors[jCh][currChannel], temp_e ); sing_exp[jCh] = sub( sing_exp[jCh], temp_e ); move16(); #endif move32(); Loading Loading @@ -1308,9 +1304,6 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ #ifdef FIX_1010_OPT_DIV invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e ); temp_e = norm_l( invVal ); invVal = L_shl( invVal, temp_e ); invVal_e = sub( invVal_e, temp_e ); #endif FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */ Loading @@ -1330,9 +1323,6 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ #else f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */ f_e = add( invVal_e, sub( norm_x_e, r_e ) ); temp_e = norm_l( f ); f = L_shl( f, temp_e ); f_e = sub( f_e, temp_e ); #endif FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ Loading Loading @@ -1442,13 +1432,10 @@ static void biDiagonalReductionRight_fx( #ifdef FIX_1010_OPT_DIV Word16 invVal_e, temp_e; Word32 invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e ); temp_e = norm_l( invVal ); invVal = L_shl( invVal, temp_e ); invVal_e = sub( invVal_e, temp_e ); #endif FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */ { #ifndef FIX_1010_OPT_DIV_no #ifndef FIX_1010_OPT_DIV singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e)) */ #else temp_e = norm_l( singularVectors[currChannel][jCh] ); Loading @@ -1456,9 +1443,6 @@ static void biDiagonalReductionRight_fx( singularVectors[currChannel][jCh] = Mpy_32_32( singularVectors[currChannel][jCh], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ sing_exp[jCh] = sub( invVal_e, temp_e ); move16(); temp_e = norm_l( singularVectors[currChannel][jCh] ); singularVectors[currChannel][jCh] = L_shl( singularVectors[currChannel][jCh], temp_e ); sing_exp[jCh] = sub( sing_exp[jCh], temp_e ); #endif move32(); sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) ); Loading Loading @@ -1493,9 +1477,6 @@ static void biDiagonalReductionRight_fx( #ifdef FIX_1010_OPT_DIV invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e ); temp_e = norm_l( invVal ); invVal = L_shl( invVal, temp_e ); invVal_e = sub( invVal_e, temp_e ); #endif FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ Loading @@ -1507,9 +1488,6 @@ static void biDiagonalReductionRight_fx( secDiag[jCh] = L_shl( singularVectors[currChannel][jCh], temp_e ); secDiag[jCh] = Mpy_32_32( secDiag[jCh], invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ secDiag_exp[jCh] = sub( invVal_e, temp_e ); temp_e = norm_l( secDiag[jCh] ); secDiag[jCh] = L_shl( secDiag[jCh], temp_e ); secDiag_exp[jCh] = sub( secDiag_exp[jCh], temp_e ); move16(); #endif move32(); Loading Loading @@ -1791,7 +1769,7 @@ static void singularVectorsAccumulationRight_fx( #ifndef M_PI #define M_PI 3.141592653589793 #endif #define NUM_REGIONS 32 #define NUM_REGIONS 128 static Word32 alphaBeta[NUM_REGIONS][2]; static void get_alpha_beta( Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 *alpha, Word32 *beta ) { Loading Loading @@ -1830,10 +1808,10 @@ static void get_alpha_beta( Word32 p, Word16 p_e, Word32 q, Word16 q_e, Word32 * p = L_shl( p, shift ); p_e = sub( p_e, shift ); shift = sub( q_e, p_e ); r = shl( div_s( extract_h( q ), extract_h( p ) ), shift ); r = shl_sat( div_s( extract_h( q ), extract_h( p ) ), shift ); /* Second order polyfit of atan(r)/(pi/4) for r=0..1 */ r = add( add( mult( mult( r, r ), FL2WORD16_SCALE( -3.672563685340096e-01, 3 ) ), mult( r, FL2WORD16_SCALE( 1.375369641423651e+00, 3 ) ) ), FL2WORD16_SCALE( -6.529424378422714e-03, 3 ) ); r = s_min( s_max( 0, shr( r, 4 + 3 ) ), NUM_REGIONS - 1 ); r = s_min( s_max( 0, shr( r, WORD16_BITS-1-7-3 ) ), NUM_REGIONS - 1 ); #endif assert( ( r >= 0 ) && ( r < NUM_REGIONS ) ); *alpha = alphaBeta[r][0]; Loading