Loading lib_dec/ivas_svd_dec.c +54 −11 Original line number Diff line number Diff line Loading @@ -53,6 +53,7 @@ #if 1 #define FIX_1010_OPT_DIV // #define FIX_1010_OPT_INV_USING_INVSQRT #define FIX_1010_OPT_SINGLE_RESCALE #define FIX_1010_OPT_GIVENS #define FIX_1010_OPT_GIVENS_INV Loading Loading @@ -1201,7 +1202,7 @@ static void HouseholderReduction_fx( * *-------------------------------------------------------------------------*/ #ifdef FIX_1010_OPT_DIV #ifdef FIX_1010_OPT_INV_USING_INVSQRT static Word32 BASOP_Util_Inv32( Word32 x, Word16 *px_e ) { Word16 sign, shift, shift2; Loading Loading @@ -1314,8 +1315,11 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ #ifdef FIX_1010_OPT_DIV Word16 invVal_e; Word32 invVal; /* BASOP_Util_Inv32 is not accurate enogh in this case. */ #ifdef FIX_1010_OPT_INV_USING_INVSQRT invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e ); #else invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e ); #endif #endif norm_x = 0; move32(); Loading @@ -1324,11 +1328,19 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { #ifndef FIX_1010_OPT_DIV #ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectors[jCh][currChannel] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[jCh][currChannel], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ move32(); sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) ); move16(); norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ #else singularVectors[jCh][currChannel] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[jCh][currChannel], maxWithSign_fx( *sig_x ), &L_temp_e ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ move32(); singularVectors2_e[jCh][currChannel] = add( L_temp_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ); move16(); norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( singularVectors2_e[jCh][currChannel], 1 ), &norm_x_e ); /* exp(norm_x_e) */ #endif #else Word16 temp_e = norm_l( singularVectors[jCh][currChannel] ); singularVectors[jCh][currChannel] = Mpy_32_32( L_shl( singularVectors[jCh][currChannel], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ Loading Loading @@ -1377,7 +1389,11 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ move32(); #ifdef FIX_1010_OPT_DIV #ifdef FIX_1010_OPT_INV_USING_INVSQRT invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e ); #else invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); #endif #endif FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */ Loading Loading @@ -1529,21 +1545,33 @@ static void biDiagonalReductionRight_fx( #ifdef FIX_1010_OPT_DIV Word16 invVal_e, temp_e; Word32 invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e ); Word32 invVal; #ifdef FIX_1010_OPT_INV_USING_INVSQRT invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e ); #else invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e ); #endif #endif FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */ { #ifndef FIX_1010_OPT_DIV #ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e)) */ move32(); sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) ); move16(); norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ #else singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &L_temp_e ); /* exp(sing_exp + (singularVectors_e - sig_x_e)) */ move32(); singularVectors2_e[currChannel][jCh] = add( L_temp_e, sub( singularVectors2_e[currChannel][jCh], *sig_x_e ) ); move16(); norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( singularVectors2_e[currChannel][jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ #endif #else temp_e = norm_l( singularVectors[currChannel][jCh] ); singularVectors[currChannel][jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ move32(); #ifndef FIX_1010_OPT_SINGLE_RESCALE sing_exp[jCh] = add( sub( invVal_e, temp_e ), sub( *singularVectors_e, *sig_x_e ) ); move16(); Loading Loading @@ -1587,16 +1615,27 @@ static void biDiagonalReductionRight_fx( move32(); #ifdef FIX_1010_OPT_DIV #ifdef FIX_1010_OPT_INV_USING_INVSQRT invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e ); #else invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); #endif #endif FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ { #ifndef FIX_1010_OPT_DIV #ifndef FIX_1010_OPT_SINGLE_RESCALE secDiag[jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( r ), &secDiag_exp[jCh] ); /* exp(secDiag_exp + (sing_exp - r_e) */ move32(); secDiag_exp[jCh] = add( secDiag_exp[jCh], sub( sing_exp[jCh], r_e ) ); move32(); #else secDiag[jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( r ), &secDiag_exp[jCh] ); /* exp(secDiag_exp + (sing_exp - r_e) */ move32(); secDiag_exp[jCh] = add( secDiag_exp[jCh], sub( singularVectors2_e[currChannel][jCh], r_e ) ); move32(); #endif #else temp_e = norm_l( singularVectors[currChannel][jCh] ); secDiag[jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ Loading Loading @@ -1736,7 +1775,11 @@ static void singularVectorsAccumulationLeft_fx( IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/ { #ifdef FIX_1010_OPT_DIV #ifdef FIX_1010_OPT_INV_USING_INVSQRT t_ii = BASOP_Util_Inv32( maxWithSign_fx( t_ii ), &temp_exp ); #else t_ii = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( t_ii ), &temp_exp ); #endif t_ii_e = sub( temp_exp, t_ii_e ); #else t_ii = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, maxWithSign_fx( t_ii ), &temp_exp ); /* exp(1 + (temp_exp + tii_e)) */ Loading @@ -1753,7 +1796,7 @@ static void singularVectorsAccumulationLeft_fx( { norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( sing_exp2[k][nCh], sing_exp2[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */ } #ifdef FIX_1010_OPT_DIV #ifdef FIX_1010_OPT_INV_USING_INVSQRT Word16 temp_e = norm_l( singularVectors_Left[nCh][nCh] ); t_jj = BASOP_Util_Inv32( maxWithSign_fx( L_shl( singularVectors_Left[nCh][nCh], temp_e ) ), &temp_exp ); t_jj = Mpy_32_32( Mpy_32_32( t_ii, norm_y ), t_jj ); Loading Loading
lib_dec/ivas_svd_dec.c +54 −11 Original line number Diff line number Diff line Loading @@ -53,6 +53,7 @@ #if 1 #define FIX_1010_OPT_DIV // #define FIX_1010_OPT_INV_USING_INVSQRT #define FIX_1010_OPT_SINGLE_RESCALE #define FIX_1010_OPT_GIVENS #define FIX_1010_OPT_GIVENS_INV Loading Loading @@ -1201,7 +1202,7 @@ static void HouseholderReduction_fx( * *-------------------------------------------------------------------------*/ #ifdef FIX_1010_OPT_DIV #ifdef FIX_1010_OPT_INV_USING_INVSQRT static Word32 BASOP_Util_Inv32( Word32 x, Word16 *px_e ) { Word16 sign, shift, shift2; Loading Loading @@ -1314,8 +1315,11 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ #ifdef FIX_1010_OPT_DIV Word16 invVal_e; Word32 invVal; /* BASOP_Util_Inv32 is not accurate enogh in this case. */ #ifdef FIX_1010_OPT_INV_USING_INVSQRT invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e ); #else invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e ); #endif #endif norm_x = 0; move32(); Loading @@ -1324,11 +1328,19 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { #ifndef FIX_1010_OPT_DIV #ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectors[jCh][currChannel] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[jCh][currChannel], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ move32(); sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) ); move16(); norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ #else singularVectors[jCh][currChannel] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[jCh][currChannel], maxWithSign_fx( *sig_x ), &L_temp_e ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ move32(); singularVectors2_e[jCh][currChannel] = add( L_temp_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ); move16(); norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( singularVectors2_e[jCh][currChannel], 1 ), &norm_x_e ); /* exp(norm_x_e) */ #endif #else Word16 temp_e = norm_l( singularVectors[jCh][currChannel] ); singularVectors[jCh][currChannel] = Mpy_32_32( L_shl( singularVectors[jCh][currChannel], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ Loading Loading @@ -1377,7 +1389,11 @@ IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ move32(); #ifdef FIX_1010_OPT_DIV #ifdef FIX_1010_OPT_INV_USING_INVSQRT invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e ); #else invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); #endif #endif FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */ Loading Loading @@ -1529,21 +1545,33 @@ static void biDiagonalReductionRight_fx( #ifdef FIX_1010_OPT_DIV Word16 invVal_e, temp_e; Word32 invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e ); Word32 invVal; #ifdef FIX_1010_OPT_INV_USING_INVSQRT invVal = BASOP_Util_Inv32( maxWithSign_fx( *sig_x ), &invVal_e ); #else invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e ); #endif #endif FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */ { #ifndef FIX_1010_OPT_DIV #ifndef FIX_1010_OPT_SINGLE_RESCALE singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); /* exp(sing_exp + (singularVectors_e - sig_x_e)) */ move32(); sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) ); move16(); norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ #else singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( *sig_x ), &L_temp_e ); /* exp(sing_exp + (singularVectors_e - sig_x_e)) */ move32(); singularVectors2_e[currChannel][jCh] = add( L_temp_e, sub( singularVectors2_e[currChannel][jCh], *sig_x_e ) ); move16(); norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( singularVectors2_e[currChannel][jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ #endif #else temp_e = norm_l( singularVectors[currChannel][jCh] ); singularVectors[currChannel][jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ move32(); #ifndef FIX_1010_OPT_SINGLE_RESCALE sing_exp[jCh] = add( sub( invVal_e, temp_e ), sub( *singularVectors_e, *sig_x_e ) ); move16(); Loading Loading @@ -1587,16 +1615,27 @@ static void biDiagonalReductionRight_fx( move32(); #ifdef FIX_1010_OPT_DIV #ifdef FIX_1010_OPT_INV_USING_INVSQRT invVal = BASOP_Util_Inv32( maxWithSign_fx( r ), &invVal_e ); #else invVal = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); #endif #endif FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ { #ifndef FIX_1010_OPT_DIV #ifndef FIX_1010_OPT_SINGLE_RESCALE secDiag[jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( r ), &secDiag_exp[jCh] ); /* exp(secDiag_exp + (sing_exp - r_e) */ move32(); secDiag_exp[jCh] = add( secDiag_exp[jCh], sub( sing_exp[jCh], r_e ) ); move32(); #else secDiag[jCh] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[currChannel][jCh], maxWithSign_fx( r ), &secDiag_exp[jCh] ); /* exp(secDiag_exp + (sing_exp - r_e) */ move32(); secDiag_exp[jCh] = add( secDiag_exp[jCh], sub( singularVectors2_e[currChannel][jCh], r_e ) ); move32(); #endif #else temp_e = norm_l( singularVectors[currChannel][jCh] ); secDiag[jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */ Loading Loading @@ -1736,7 +1775,11 @@ static void singularVectorsAccumulationLeft_fx( IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/ { #ifdef FIX_1010_OPT_DIV #ifdef FIX_1010_OPT_INV_USING_INVSQRT t_ii = BASOP_Util_Inv32( maxWithSign_fx( t_ii ), &temp_exp ); #else t_ii = BASOP_Util_Divide3232_Scale_cadence( MAXVAL_WORD32, maxWithSign_fx( t_ii ), &temp_exp ); #endif t_ii_e = sub( temp_exp, t_ii_e ); #else t_ii = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, maxWithSign_fx( t_ii ), &temp_exp ); /* exp(1 + (temp_exp + tii_e)) */ Loading @@ -1753,7 +1796,7 @@ static void singularVectorsAccumulationLeft_fx( { norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( sing_exp2[k][nCh], sing_exp2[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */ } #ifdef FIX_1010_OPT_DIV #ifdef FIX_1010_OPT_INV_USING_INVSQRT Word16 temp_e = norm_l( singularVectors_Left[nCh][nCh] ); t_jj = BASOP_Util_Inv32( maxWithSign_fx( L_shl( singularVectors_Left[nCh][nCh], temp_e ) ), &temp_exp ); t_jj = Mpy_32_32( Mpy_32_32( t_ii, norm_y ), t_jj ); Loading