diff --git a/lib_com/options.h b/lib_com/options.h index 6857f4c95f6ddf7ed7a940cc4f416c5047ef4d05..0b037291c2a9cb60af68ba2a2642b86c869733e0 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -97,10 +97,11 @@ #define FIX_ISSUE_1817_REPLACE_CARRY_OVERFLOW /* FhG: bit-exact, replace carry and overflow operations by 64-bit operations, MR 1931 */ #define FIX_1844_MISSING_FREE /* FhG: add missing free in ivas_binRenderer_convModuleClose_fx() */ +#define NONBE_SVD_OPTIMIZATION /* FhG: reduce WMOPS of HouseHolderReduction() in ivas_svd_dec.c() by removing redundant mathematics and using 64 bit additions.*/ /* #################### Start BASOP porting switches ############################ */ #define FIX_1372_ISAR_POST_REND -#define NONBE_FIX_984_OMASA_EXT_OUTPUT /* Nokia: issue #984: complete the OMASA EXT output implementation */ +#define NONBE_FIX_984_OMASA_EXT_OUTPUT /* Nokia: issue #984: complete the OMASA EXT output implementation */ #define USE_NEW_HRTF_BINARY_FILE_FORMAT /* Orange: to activate when decided to change the hrtf binary file format */ #define FIX_WARNING_RENDER_CONFIG /* Orange: fix warning on windows build */ @@ -141,6 +142,7 @@ #define NONBE_1360_LFE_DELAY /* Dlb: LFE delay alignment when rendering in CLDFB domain*/ #define NONBE_1229_FIX_ISM1_DPID /* Eri: issue 1229: fix bug causing ISM 1 to use default -dpid instead of the specified one */ +#define NONBE_SVD_OPTIMIZATION /* #################### End BASOP porting switches ############################ */ diff --git a/lib_dec/ivas_mc_param_dec_fx.c b/lib_dec/ivas_mc_param_dec_fx.c index 230c210b76eb405e184962180855d12fc95c1082..73ba585d157ec29c7d3dddba982306b4b6482385 100644 --- a/lib_dec/ivas_mc_param_dec_fx.c +++ b/lib_dec/ivas_mc_param_dec_fx.c @@ -1799,8 +1799,22 @@ void ivas_param_mc_dec_digest_tc_fx( test(); IF( hParamMC->hMetadataPMC->bAttackPresent && ( EQ_32( hParamMC->synthesis_conf, PARAM_MC_SYNTH_LS_CONV_COV ) || EQ_32( hParamMC->synthesis_conf, PARAM_MC_SYNTH_MONO_STEREO ) ) ) { +#ifdef NONBE_SVD_OPTIMIZATION + Word16 len = imult1616( nchan_transport, nchan_transport ); + Word16 sc = s_min( getScaleFactor32( cx_fx, len ), getScaleFactor32( cx_next_band_fx, len ) ); + IF( EQ_16( sc, 0 ) ) + { + Scale_sig32( cx_fx, len, -Q1 ); // add one bit head room + Scale_sig32( cx_next_band_fx, len, -Q1 ); // add one bit head room + cx_e = add( cx_e, Q1 ); + cx_next_band_e = add( cx_e, Q1 ); + } + v_add_fx( cx_fx, cx_next_band_fx, cx_fx, len ); + Copy32( cx_fx, cx_next_band_fx, len ); +#else v_add_fx( cx_fx, cx_next_band_fx, cx_fx, imult1616( nchan_transport, nchan_transport ) ); Copy32( cx_fx, cx_next_band_fx, imult1616( nchan_transport, nchan_transport ) ); +#endif } FOR( is_next_band = 0; is_next_band < 2; is_next_band++ ) diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index 3d9d846a614a0d6d4e6933bae5eebfb5b65a8ba1..6471becde5e16a811711e7e8be79234b6d74d8bb 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -29,7 +29,6 @@ the United Nations Convention on Contracts on the International Sales of Goods. *******************************************************************************************************/ - #include #include "options.h" #include "prot_fx.h" @@ -65,7 +64,26 @@ static void HouseholderReduction_fx( const Word16 nChannelsC, /* Q0 */ Word32 *eps_x_fx, /* exp(eps_x_fx_e) */ Word16 *eps_x_fx_e ); +#ifdef NONBE_SVD_OPTIMIZATION + +static void biDiagonalReductionLeft_fx( + Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ + Word16 singularValues_e[][MAX_OUTPUT_CHANNELS], /* Q0 */ + const Word16 nChannelsL, + const Word16 nChannelsC, /* Q0 */ + const Word16 currChannel, /* Q0 */ + Word32 *g, + Word16 *g_e ); +static void biDiagonalReductionRight_fx( + Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ + Word16 singularVectors_e[][MAX_OUTPUT_CHANNELS], + const Word16 nChannelsL, /* Q0 */ + const Word16 nChannelsC, /* Q0 */ + const Word16 currChannel, /* Q0 */ + Word32 *g, /* Q31 */ + Word16 *g_e ); +#else static void biDiagonalReductionLeft_fx( Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */ @@ -93,6 +111,7 @@ static void biDiagonalReductionRight_fx( Word16 *sig_x_e, Word32 *g /* Q31 */ ); // Q31 +#endif static void singularVectorsAccumulationLeft_fx( Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) as Input, Q31 as output */ @@ -822,15 +841,89 @@ static void HouseholderReduction_fx( Word16 *eps_x_fx_e ) { Word16 nCh; +#ifdef NONBE_SVD_OPTIMIZATION + + Word32 g_left_fx = 0; + Word16 g_left_e = 0; + move32(); + move16(); + Word32 g_right_fx = 0; + Word16 g_right_e = 0; + move32(); + move16(); + +#else + // float g = 0.0f, sig_x = 0.0f;// to be removed Word32 g_fx = 0, sig_x_fx = 0; move32(); move32(); Word16 sig_x_fx_e = 0; move16(); +#endif Word16 iCh, jCh; Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; + +#ifdef NONBE_SVD_OPTIMIZATION + Word16 sc = 0; + move16(); + sc = getScaleFactor32( singularVectors_Left_fx[0], nChannelsC ); + FOR( jCh = 1; jCh < nChannelsL; jCh++ ) + { + sc = s_min( sc, getScaleFactor32( singularVectors_Left_fx[jCh], nChannelsC ) ); + } + FOR( jCh = 0; jCh < nChannelsL; jCh++ ) + { + Scale_sig32( singularVectors_Left_fx[jCh], nChannelsC, sc ); + FOR( iCh = 0; iCh < nChannelsC; iCh++ ) + { + singularVectors_Left_fx_e[jCh][iCh] = singularVectors_Left_e - sc; + move16(); + } + } + + FOR( nCh = 0; nCh < nChannelsC; nCh++ ) + { + secDiag_fx[nCh] = g_right_fx; /* from the previous channel */ + move32(); + secDiag_fx_e[nCh] = g_right_e; + + biDiagonalReductionLeft_fx( + singularVectors_Left_fx, + singularVectors_Left_fx_e, + nChannelsL, + nChannelsC, + nCh, + &g_left_fx, + &g_left_e ); + + singularValues_fx[nCh] = g_left_fx; + move32(); + singularValues_fx_e[nCh] = g_left_e; + + biDiagonalReductionRight_fx( + singularVectors_Left_fx, + singularVectors_Left_fx_e, + nChannelsL, + nChannelsC, + nCh, + &g_right_fx, + &g_right_e ); + + Word16 L_temp_e; + Word32 L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), secDiag_fx_e[nCh], &L_temp_e ); /* exp(L_temp_e) */ + IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( L_temp, L_temp_e, *eps_x_fx, *eps_x_fx_e ), 1 ) ) + { + *eps_x_fx = L_temp; /* exp(L_temp_e) */ + move32(); + *eps_x_fx_e = L_temp_e; + move32(); + } + } + +#else + FOR( jCh = 0; jCh < nChannelsL; jCh++ ) { FOR( iCh = 0; iCh < nChannelsC; iCh++ ) @@ -856,6 +949,7 @@ static void HouseholderReduction_fx( move32(); } } +#endif /* SingularVecotr Accumulation */ singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsC ); @@ -866,6 +960,204 @@ static void HouseholderReduction_fx( return; } +#ifdef NONBE_SVD_OPTIMIZATION +/*------------------------------------------------------------------------- + * biDiagonalReductionLeft() + * + * + *-------------------------------------------------------------------------*/ +static void biDiagonalReductionLeft_fx( + Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ + Word16 singularVectors_e[][MAX_OUTPUT_CHANNELS], /* Q0 */ + const Word16 nChannelsL, + const Word16 nChannelsC, /* Q0 */ + const Word16 currChannel, /* Q0 */ + Word32 *g, + Word16 *g_e ) +{ + Word16 iCh, jCh; + Word32 norm_x, f, r; + Word16 norm_x_e, f_e, r_e; + Word32 L_temp; + Word16 L_temp_e; + + /* Setting values to 0 */ + *g = 0; + *g_e = 0; + move32(); + move16(); + + IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ + { + Word64 temp = 0; + move64(); + norm_x = 0; + move32(); + norm_x_e = 0; + move16(); + Word16 max_e = MIN_16; + move16(); + FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */ + { + max_e = s_max( max_e, singularVectors_e[jCh][currChannel] ); + } + + FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */ + { + temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sub( max_e, singularVectors_e[jCh][currChannel] ), 1 ) ) ); + } + + Word16 nrm = W_norm( temp ); + nrm = sub( nrm, 32 ); + norm_x = W_shl_sat_l( temp, nrm ); + norm_x_e = sub( add( max_e, max_e ), nrm ); + + IF( norm_x ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */ + { + Word16 invVal_e; + Word32 invVal; + + L_temp_e = norm_x_e; + move16(); + L_temp = Sqrt32( norm_x, &L_temp_e ); + //( *g ) = L_negate( GE_32( singularVectors[currChannel][idx], 0 ) ? L_temp : L_negate( L_temp ) ); + if ( singularVectors[currChannel][currChannel] >= 0 ) + { + L_temp = L_negate( L_temp ); + move32(); + } + *g = L_temp; + move32(); + *g_e = L_temp_e; + move16(); + + r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][currChannel] ), singularVectors_e[currChannel][currChannel] + ( *g_e ), -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ + singularVectors[currChannel][currChannel] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][currChannel], singularVectors_e[currChannel][currChannel], -( *g ), *g_e, &singularVectors_e[currChannel][currChannel] ); /* sing_exp */ + invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); + + FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */ + { + Word16 max2_e = MIN_16; + max_e = MIN_16; + move16(); + move16(); + temp = 0; + move64(); + + FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */ + { + max_e = s_max( max_e, singularVectors_e[jCh][currChannel] ); /* exp(norm_x_e) */ + max2_e = s_max( max2_e, singularVectors_e[jCh][iCh] ); /* exp(norm_x_e) */ + } + max_e = add( max_e, max2_e ); + + FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */ + { + temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), sub( max_e, add( singularVectors_e[jCh][currChannel], singularVectors_e[jCh][iCh] ) ) ) ); + } + Word16 nrm = W_norm( temp ); + nrm = sub( nrm, 32 ); + norm_x = W_shl_sat_l( temp, nrm ); + norm_x_e = sub( max_e, nrm ); + + f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */ + f_e = add( invVal_e, sub( norm_x_e, r_e ) ); + + FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */ + { + singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], singularVectors_e[jCh][iCh], Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, singularVectors_e[jCh][currChannel] ), &singularVectors_e[jCh][iCh] ); + } + } + } + } + return; +} + +static void biDiagonalReductionRight_fx( + Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */ + Word16 singularVectors_e[][MAX_OUTPUT_CHANNELS], + const Word16 nChannelsL, /* Q0 */ + const Word16 nChannelsC, /* Q0 */ + const Word16 currChannel, /* Q0 */ + Word32 *g, /* Q31 */ + Word16 *g_e ) +{ + Word16 iCh, jCh, idx; + Word32 norm_x, r; + Word16 norm_x_e, r_e; + Word32 L_temp; + Word16 L_temp_e; + + /* Setting values to 0 */ + *g = 0; + *g_e = 0; + move32(); + move16(); + IF( LT_16( currChannel, nChannelsL ) && NE_16( currChannel, sub( nChannelsC, 1 ) ) ) /* i <=m && i !=n */ + { + idx = add( currChannel, 1 ); /* Q0 */ + + norm_x = 0; + move32(); + norm_x_e = 0; + move16(); + FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */ + { + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( singularVectors_e[currChannel][jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */ + } + + IF( norm_x ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */ + { + Word16 invVal_e; + Word32 invVal; + + L_temp_e = norm_x_e; + move16(); + L_temp = Sqrt32( norm_x, &L_temp_e ); + // L_temp = L_shl_r( L_temp, L_temp_e ); // Q31 + IF( singularVectors[currChannel][idx] >= 0 ) + { + ( *g ) = L_negate( L_temp ); /* exp(L_temp_e) */ + move32(); + } + ELSE + { + ( *g ) = L_temp; /* exp(L_temp_e) */ + move32(); + } + *g_e = L_temp_e; + move16(); + + r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors_e[currChannel][idx] + ( *g_e ), -norm_x, norm_x_e, &r_e ); /* exp(r_e) */ + singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors_e[currChannel][idx], -( *g ), *g_e, &singularVectors_e[currChannel][idx] ); /* exp(sing_exp) */ + + invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); + + FOR( iCh = currChannel + 1; iCh < nChannelsL; iCh++ ) /* nChannelsL */ + { + norm_x = 0; + move32(); + norm_x_e = 0; + move16(); + FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ + { + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( singularVectors_e[iCh][jCh], singularVectors_e[currChannel][jCh] ), &norm_x_e ); /* exp(norm_x_e) */ + } + + norm_x = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */ + norm_x_e = add( invVal_e, sub( norm_x_e, r_e ) ); + + FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ + { + singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors_e[iCh][jCh], Mpy_32_32( norm_x, singularVectors[currChannel][jCh] ), add( norm_x_e, singularVectors_e[currChannel][jCh] ), &singularVectors_e[iCh][jCh] ); /* exp(sing_exp2) */ + } + } + } + } + + return; +} +#else /*------------------------------------------------------------------------- * biDiagonalReductionLeft() * @@ -1149,6 +1441,7 @@ static void biDiagonalReductionRight_fx( return; } +#endif /*------------------------------------------------------------------------- * singularVectorsAccumulationLeft()