diff --git a/lib_basop/enh64.h b/lib_basop/enh64.h index c3896bb0d257aa053df48da5c84948b8255e0401..ab21d5b0c599c32edd409429d17d493371f50610 100644 --- a/lib_basop/enh64.h +++ b/lib_basop/enh64.h @@ -21,6 +21,108 @@ * *****************************************************************************/ #ifdef ENH_64_BIT_OPERATOR + + +/*______________________________________________________________________________ +| | +| Function Name : W_min | +| | +| Purpose : | +| | +| Compares L64_var1 and L64_var2 and returns the minimum value. | +| | +| Complexity weight : 1 | +| | +| Inputs : | +| | +| L64_var1 64 bit long signed integer (Word64) whose value falls in the | +| range : 0x80000000 00000000LL <= L64_var1 <= 0x7fffffff ffffffffLL. | +| | +| L64_var2 64 bit long signed integer (Word64) whose value falls in the | +| range : 0x80000000 00000000LL <= L64_var2 <= 0x7fffffff ffffffffLL. | +| | +| Outputs : | +| | +| none | +| | +| Return Value : | +| | +| L64_var_out | +| 64 bit long signed integer (Word64) whose value falls in the | +| range : 0x80000000 00000000LL <= L64_var_out <= 0x7fffffff ffffffffLL. | +|______________________________________________________________________________| +*/ +static __inline Word64 W_min( Word64 L64_var1, Word64 L64_var2 ) +{ + Word64 L64_var_out; + + if ( L64_var1 <= L64_var2 ) + { + L64_var_out = L64_var1; + } + else + { + L64_var_out = L64_var2; + } + +#ifdef WMOPS + multiCounter[currCounter].W_min++; +#endif /* ifdef WMOPS */ + + return ( L64_var_out ); +} + + +/*______________________________________________________________________________ +| | +| Function Name : W_max | +| | +| Purpose : | +| | +| Compares L64_var1 and L64_var2 and returns the maximum value. | +| | +| Complexity weight : 1 | +| | +| Inputs : | +| | +| L64_var1 64 bit long signed integer (Word64) whose value falls in the | +| range : 0x80000000 00000000LL <= L64_var1 <= 0x7fffffff ffffffffLL. | +| | +| L64_var2 64 bit long signed integer (Word64) whose value falls in the | +| range : 0x80000000 00000000LL <= L64_var2 <= 0x7fffffff ffffffffLL. | +| | +| Outputs : | +| | +| none | +| | +| Return Value : | +| | +| L64_var_out | +| 64 bit long signed integer (Word64) whose value falls in the | +| range : 0x80000000 00000000LL <= L64_var_out <= 0x7fffffff ffffffffLL. | +|______________________________________________________________________________| +*/ +static __inline Word64 W_max( Word64 L64_var1, Word64 L64_var2 ) +{ + Word64 L64_var_out; + + if ( L64_var1 >= L64_var2 ) + { + L64_var_out = L64_var1; + } + else + { + L64_var_out = L64_var2; + } + +#ifdef WMOPS + multiCounter[currCounter].W_max++; +#endif /* ifdef WMOPS */ + + return ( L64_var_out ); +} + + Word64 W_add_nosat( Word64 L64_var1, Word64 L64_var2 ); Word64 W_sub_nosat( Word64 L64_var1, Word64 L64_var2 ); Word64 W_shl( Word64 L64_var1, Word16 var2 ); diff --git a/lib_com/options.h b/lib_com/options.h index 8171bdec930d790d42d6918da7c559b591170fed..ec3d1882f07e64a963fc0015b950983c79117cc9 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -144,4 +144,11 @@ /* #################### End BASOP porting switches ############################ */ +/* #################### Start BASOP optimization switches ############################ */ + +#define NONBE_2157_INPUT_COV_MATRICES /* Dolby: task 2157: optimize ivas_dirac_dec_binaural_formulate_input_covariance_matrices */ +/* #define NONBE_2157_ACCURACY_ANALYSIS */ /* Dolby: task 2157: accuracy analysis */ + +/* #################### End BASOP optimization switches ############################ */ + #endif diff --git a/lib_debug/wmc_auto.c b/lib_debug/wmc_auto.c index 5afd9de166568531c15df05978d588430d1cad77..9d4d573f0e387ae9ca79d7cc1e651efaef16532d 100644 --- a/lib_debug/wmc_auto.c +++ b/lib_debug/wmc_auto.c @@ -133,7 +133,7 @@ static BASIC_OP op_weight = { #ifdef ENH_64_BIT_OPERATOR /* Weights of new 64 bit basops */ , - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 #endif /* #ifdef ENH_64_BIT_OPERATOR */ #ifdef ENH_32_BIT_OPERATOR diff --git a/lib_debug/wmc_auto.h b/lib_debug/wmc_auto.h index 64e2c751a9261c0e8e02c147f194047f61b83786..6dff36f50d8ab69de0defd7d07561186782a8b19 100644 --- a/lib_debug/wmc_auto.h +++ b/lib_debug/wmc_auto.h @@ -877,6 +877,8 @@ typedef struct /* New 64 bit basops */ #ifdef ENH_64_BIT_OPERATOR unsigned int move64; /* Complexity Weight of 1 */ + unsigned int W_min; /* Complexity Weight of 1 */ + unsigned int W_max; /* Complexity Weight of 1 */ unsigned int W_add_nosat; /* Complexity Weight of 1 */ unsigned int W_sub_nosat; /* Complexity Weight of 1 */ unsigned int W_shl; /* Complexity Weight of 1 */ diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 46494667d4e481d34a056d81bddcfffe0a16524c..1ca047c07ffc16ab206ba505e21597da33895311 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -98,7 +98,7 @@ static void ivas_dirac_dec_binaural_internal_fx( Decoder_Struct *st_ivas, COMBIN static void ivas_dirac_dec_decorrelate_slot_fx( DIRAC_DEC_BIN_HANDLE hDiracDecBin, const Word16 num_freq_bands, const Word16 slot, Word32 inRe[][CLDFB_SLOTS_PER_SUBFRAME] /*q_inp*/[CLDFB_NO_CHANNELS_MAX], Word32 inIm[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX] /*q_inp*/, const Word16 q_inp, Word32 decRe[][CLDFB_NO_CHANNELS_MAX], Word32 decIm[][CLDFB_NO_CHANNELS_MAX], Word16 *q_out ); -static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( DIRAC_DEC_BIN_HANDLE hDiracDecBin, SPAT_PARAM_REND_COMMON_DATA_HANDLE hSpatParamRendCom, PARAMBIN_REND_CONFIG_HANDLE hConfig, Word32 inRe[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], Word32 inIm[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], const Word16 subframe, Word32 *subFrameTotalEne, Word16 *subFrameTotalEne_e, Word32 *IIReneLimiter, const Word16 q ); +static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( DIRAC_DEC_BIN_HANDLE hDiracDecBin, SPAT_PARAM_REND_COMMON_DATA_HANDLE hSpatParamRendCom, PARAMBIN_REND_CONFIG_HANDLE hConfig, Word32 inRe[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], Word32 inIm[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], const Word16 subframe, Word32 *subFrameTotalEne_fx, Word16 *subFrameTotalEne_e, Word32 *IIReneLimiter, const Word16 q ); static void ivas_dirac_dec_binaural_formulate_target_covariance_matrices_fx( DIRAC_DEC_BIN_HANDLE hDiracDecBin, const SPAT_PARAM_REND_COMMON_DATA_HANDLE hSpatParamRendCom, const PARAMBIN_REND_CONFIG_HANDLE hConfig, Word32 Rmat[3][3], const Word16 subframe, const Word16 isHeadtracked, const Word32 *subFrameTotalEne, Word16 *subFrameTotalEne_e, const Word32 *IIReneLimiter, const MASA_ISM_DATA_HANDLE hMasaIsmData ); @@ -122,7 +122,6 @@ static void matrixMul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A static void matrixTransp2Mul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A, Word32 Bre[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word32 Bim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word16 *q_B, Word32 Ascale, Word32 Bscale, Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word16 *q_out ); - /*------------------------------------------------------------------------- * ivas_dirac_dec_init_binaural_data() * @@ -197,15 +196,24 @@ ivas_error ivas_dirac_dec_init_binaural_data_fx( set16_fx( hDiracDecBin->processMtxDecRe_fx[j][k], 0, nBins ); set16_fx( hDiracDecBin->processMtxDecIm_fx[j][k], 0, nBins ); } +#ifdef NONBE_2157_INPUT_COV_MATRICES + set64_fx( hDiracDecBin->ChEnePrev[j], 0, nBins ); +#else set32_fx( hDiracDecBin->ChEnePrev_fx[j], 0, nBins ); - set32_fx( hDiracDecBin->ChEneOutPrev_fx[j], 0, nBins ); set16_fx( hDiracDecBin->ChEnePrev_e[j], 0, nBins ); +#endif + set32_fx( hDiracDecBin->ChEneOutPrev_fx[j], 0, nBins ); set16_fx( hDiracDecBin->ChEneOutPrev_e[j], 0, nBins ); } +#ifdef NONBE_2157_INPUT_COV_MATRICES + set64_fx( hDiracDecBin->ChCrossRePrev, 0, nBins ); + set64_fx( hDiracDecBin->ChCrossImPrev, 0, nBins ); +#else set32_fx( hDiracDecBin->ChCrossRePrev_fx, 0, nBins ); set32_fx( hDiracDecBin->ChCrossImPrev_fx, 0, nBins ); set16_fx( hDiracDecBin->ChCrossRePrev_e, 0, nBins ); set16_fx( hDiracDecBin->ChCrossImPrev_e, 0, nBins ); +#endif set32_fx( hDiracDecBin->ChCrossReOutPrev_fx, 0, nBins ); set32_fx( hDiracDecBin->ChCrossImOutPrev_fx, 0, nBins ); set16_fx( hDiracDecBin->ChCrossReOutPrev_e, 0, nBins ); @@ -1316,6 +1324,66 @@ static void ivas_dirac_dec_decorrelate_slot_fx( return; } +#ifdef NONBE_2157_INPUT_COV_MATRICES + +static UWord32 ceil_pow2( UWord32 x ); +static UWord32 popcnt( UWord32 x ); +static UWord32 ceil_log2( UWord32 x ); +static Word64 Mpy_64_32( Word64 W_var1, Word32 L_var2 ); + +static UWord32 ceil_pow2( UWord32 x ) +{ + --x; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + ++x; + return x; +} + +static UWord32 popcnt( UWord32 x ) +{ + // it takes once cycle + x = ( x & 0x55555555 ) + ( ( x & 0xAAAAAAAA ) >> 1 ); + x = ( x & 0x33333333 ) + ( ( x & 0xCCCCCCCC ) >> 2 ); + x = ( x & 0x0F0F0F0F ) + ( ( x & 0xF0F0F0F0 ) >> 4 ); + x = ( x & 0x00FF00FF ) + ( ( x & 0xFF00FF00 ) >> 8 ); + x = ( x & 0x0000FFFF ) + ( ( x & 0xFFFF0000 ) >> 16 ); + return x; +} + +static UWord32 ceil_log2( UWord32 x ) +{ + if ( x ) + { + return popcnt( ceil_pow2( x ) - 1 ); + } + else + { + return 0; + } +} + +static Word64 Mpy_64_32( Word64 W_var1, Word32 L_var2 ) +{ + Word32 var1_l; + Word64 var_out; + var1_l = W_extract_l( W_var1 ); // 1 + var_out = W_mult0_32_32( L_and( var1_l, 1 ), L_var2 ); // 2 + var_out = W_mac_32_32( var_out, L_lshr( var1_l, 1 ), L_var2 ); // 2 + var_out = W_mac_32_32( W_shr( var_out, 31 ), W_extract_h( W_var1 ), L_var2 ); // 3 + return var_out; +} + +#ifdef NONBE_2157_ACCURACY_ANALYSIS +#include "string.h" +#define NONBE_2157_USE_CURRENT_VERSION +#endif +#else +#define NONBE_2157_USE_CURRENT_VERSION +#endif static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( DIRAC_DEC_BIN_HANDLE hDiracDecBin, @@ -1338,9 +1406,28 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( IVAS_FORMAT ivas_format; Word32 ivas_total_brate; Word16 nchan_transport; - Word16 exp, exp1; Word64 temp64; +#ifdef NONBE_2157_INPUT_COV_MATRICES + Word64 subFrameTotalEne[CLDFB_NO_CHANNELS_MAX]; + Word16 rsh0, rsh1; +#ifdef NONBE_2157_ACCURACY_ANALYSIS + double inRe[BINAURAL_CHANNELS][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX]; + double inIm[BINAURAL_CHANNELS][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX]; + float fSubFrameTotalEne[CLDFB_NO_CHANNELS_MAX]; + double dSubFrameTotalEne[CLDFB_NO_CHANNELS_MAX]; + float fIIReneLimiter[CLDFB_NO_CHANNELS_MAX]; + double dIIReneLimiter[CLDFB_NO_CHANNELS_MAX]; + Word32 IIReneLimiter[CLDFB_NO_CHANNELS_MAX]; +#else + Word16 e0, e1; + Word32 *IIReneLimiter; + IIReneLimiter = IIReneLimiter_fx; +#endif +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION + Word16 exp, exp1; Word32 temp; +#endif ivas_format = hConfig->ivas_format; move32(); @@ -1355,21 +1442,61 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( nBins = hSpatParamRendCom->num_freq_bands; /* Actually bins */ move16(); +#ifdef NONBE_2157_ACCURACY_ANALYSIS + { + // CONVERT FIXED-POINT INPUTS INTO DOUBLE-PRECISION FLOATING-POINT + // The double-precision mantissa is big enough to hold the int32_t values. + double maxRe = 0.0; + double maxIm = 0.0; + for ( slot = 0; slot < hSpatParamRendCom->subframe_nbslots[subframe]; slot++ ) + { + for ( bin = 0; bin < nBins; bin++ ) + { + for ( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) + { + inRe[ch][slot][bin] = ldexp( (double) inRe_fx[ch][slot][bin], -q ); + maxRe = fmax( fabs( maxRe ), inRe[ch][slot][bin] ); + inIm[ch][slot][bin] = ldexp( (double) inIm_fx[ch][slot][bin], -q ); + maxIm = fmax( fabs( maxIm ), inIm[ch][slot][bin] ); + } + } + } + // printf("Abs max value: re:%+.6e, im:%+.6e\n", maxRe, maxIm); + } +#endif + +#ifdef NONBE_2157_INPUT_COV_MATRICES + set64_fx( hDiracDecBin->ChCrossRe, 0, nBins ); + set64_fx( hDiracDecBin->ChCrossIm, 0, nBins ); +#ifdef NONBE_2157_ACCURACY_ANALYSIS + memset( hDiracDecBin->fChCrossRe, 0, nBins * sizeof( float ) ); + memset( hDiracDecBin->fChCrossIm, 0, nBins * sizeof( float ) ); + memset( hDiracDecBin->dChCrossRe, 0, nBins * sizeof( double ) ); + memset( hDiracDecBin->dChCrossIm, 0, nBins * sizeof( double ) ); +#endif +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION set32_fx( hDiracDecBin->ChCrossRe_fx, 0, nBins ); set32_fx( hDiracDecBin->ChCrossIm_fx, 0, nBins ); - set16_fx( hDiracDecBin->ChCrossRe_e, 0, nBins ); set16_fx( hDiracDecBin->ChCrossIm_e, 0, nBins ); +#endif FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { +#ifdef NONBE_2157_INPUT_COV_MATRICES + set64_fx( hDiracDecBin->ChEne[ch], 0, nBins ); +#ifdef NONBE_2157_ACCURACY_ANALYSIS + memset( hDiracDecBin->fChEne[ch], 0, nBins * sizeof( float ) ); + memset( hDiracDecBin->dChEne[ch], 0, nBins * sizeof( double ) ); +#endif +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION set32_fx( hDiracDecBin->ChEne_fx[ch], 0, nBins ); - set16_fx( hDiracDecBin->ChEne_e[ch], 0, nBins ); +#endif } - set16_fx( subFrameTotalEne_e, 0, CLDFB_NO_CHANNELS_MAX ); - /* Determine EQ for low bit rates (13.2 and 16.4 kbps) */ applyLowBitRateEQ = 0; move16(); @@ -1398,9 +1525,31 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( } /* Formulate input and target covariance matrices for this subframe */ +#ifdef NONBE_2157_INPUT_COV_MATRICES + set64_fx( subFrameTotalEne, 0, CLDFB_NO_CHANNELS_MAX ); +#ifdef NONBE_2157_ACCURACY_ANALYSIS + memset( fSubFrameTotalEne, 0, CLDFB_NO_CHANNELS_MAX * sizeof( float ) ); + memset( dSubFrameTotalEne, 0, CLDFB_NO_CHANNELS_MAX * sizeof( double ) ); + memset( fIIReneLimiter, 0, CLDFB_NO_CHANNELS_MAX * sizeof( float ) ); + memset( dIIReneLimiter, 0, CLDFB_NO_CHANNELS_MAX * sizeof( double ) ); +#endif +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION set32_fx( subFrameTotalEne_fx, 0, CLDFB_NO_CHANNELS_MAX ); + set16_fx( subFrameTotalEne_e, 0, CLDFB_NO_CHANNELS_MAX ); +#endif +#ifdef NONBE_2157_INPUT_COV_MATRICES + rsh0 = (Word16) ( ceil_log2( hSpatParamRendCom->subframe_nbslots[subframe] ) - 1 ); + rsh1 = (Word16) ( ceil_log2( BINAURAL_CHANNELS * hSpatParamRendCom->subframe_nbslots[subframe] ) - 1 ); +#ifndef NONBE_2157_ACCURACY_ANALYSIS + e0 = add( sub( 63, shl( q, 1 ) ), rsh0 ); + e1 = add( sub( 63, shl( q, 1 ) ), rsh1 ); +#endif +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION exp = sub( 63, shl( q, 1 ) ); // exp for the energy (inRe_fx * inRe_fx + inIm_fx * inIm_fx) computed below +#endif /* Calculate input covariance matrix */ FOR( slot = 0; slot < hSpatParamRendCom->subframe_nbslots[subframe]; slot++ ) @@ -1409,31 +1558,86 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( { FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { - Word32 instEne_fx; temp64 = W_mult0_32_32( inRe_fx[ch][slot][bin], inRe_fx[ch][slot][bin] ); // 2q temp64 = W_add( temp64, W_mult0_32_32( inIm_fx[ch][slot][bin], inIm_fx[ch][slot][bin] ) ); // 2q +#ifdef NONBE_2157_INPUT_COV_MATRICES + // OPTIMIZED FIXED-POINT IMPLEMENTATION + hDiracDecBin->ChEne[ch][bin] = W_add( hDiracDecBin->ChEne[ch][bin], W_shr( temp64, rsh0 ) ); // 2q - rsh0 + subFrameTotalEne[bin] = W_add( subFrameTotalEne[bin], W_shr( temp64, rsh1 ) ); // 2q - rsh1 + move64(); + move64(); +#ifdef NONBE_2157_ACCURACY_ANALYSIS + // SINGLE-PRECISION FLOATING-POINT IMPLEMENTATION + float fInstEne; + fInstEne = ( (float) inRe[ch][slot][bin] * (float) inRe[ch][slot][bin] ); + fInstEne += ( (float) inIm[ch][slot][bin] * (float) inIm[ch][slot][bin] ); + hDiracDecBin->fChEne[ch][bin] += fInstEne; + fSubFrameTotalEne[bin] += fInstEne; + // DOUBLE-PRECISION FLOATING-POINT IMPLEMENTATION + double dInstEne; + dInstEne = ( inRe[ch][slot][bin] * inRe[ch][slot][bin] ); + dInstEne += ( inIm[ch][slot][bin] * inIm[ch][slot][bin] ); + hDiracDecBin->dChEne[ch][bin] += dInstEne; + dSubFrameTotalEne[bin] += dInstEne; +#endif +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION + // CURRENT FIXED-POINT IMPLEMENTATION + Word32 instEne_fx; exp1 = W_norm( temp64 ); instEne_fx = W_extract_h( W_shl( temp64, exp1 ) ); // 2q - 32 + exp1 /* exp of instEne_fx = 31 - (2q -32 + exp1) = 63 - 2q - exp1 = exp - exp1*/ - hDiracDecBin->ChEne_fx[ch][bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChEne_fx[ch][bin], hDiracDecBin->ChEne_e[ch][bin], instEne_fx, sub( exp, exp1 ), &hDiracDecBin->ChEne_e[ch][bin] ); subFrameTotalEne_fx[bin] = BASOP_Util_Add_Mant32Exp( subFrameTotalEne_fx[bin], subFrameTotalEne_e[bin], instEne_fx, sub( exp, exp1 ), &subFrameTotalEne_e[bin] ); move32(); move32(); +#endif } temp64 = W_mult0_32_32( inRe_fx[0][slot][bin], inRe_fx[1][slot][bin] ); // 2q temp64 = W_add( temp64, W_mult0_32_32( inIm_fx[0][slot][bin], inIm_fx[1][slot][bin] ) ); // 2q +#ifdef NONBE_2157_INPUT_COV_MATRICES + // OPTIMIZED FIXED-POINT IMPLEMENTATION + hDiracDecBin->ChCrossRe[bin] = W_add( hDiracDecBin->ChCrossRe[bin], W_shr( temp64, rsh0 ) ); // 2q - rsh0 + move64(); +#ifdef NONBE_2157_ACCURACY_ANALYSIS + // SINGLE-PRECISION FLOATING-POINT IMPLEMENTATION + hDiracDecBin->fChCrossRe[bin] += (float) inRe[0][slot][bin] * (float) inRe[1][slot][bin]; + hDiracDecBin->fChCrossRe[bin] += (float) inIm[0][slot][bin] * (float) inIm[1][slot][bin]; + // DOUBLE-PRECISION FLOATING-POINT IMPLEMENTATION + hDiracDecBin->dChCrossRe[bin] += inRe[0][slot][bin] * inRe[1][slot][bin]; + hDiracDecBin->dChCrossRe[bin] += inIm[0][slot][bin] * inIm[1][slot][bin]; +#endif +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION + // CURRENT FIXED-POINT IMPLEMENTATION exp1 = W_norm( temp64 ); temp = W_extract_h( W_shl( temp64, exp1 ) ); // // 2q - 32 + exp1 hDiracDecBin->ChCrossRe_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossRe_e[bin], temp, sub( exp, exp1 ), &hDiracDecBin->ChCrossRe_e[bin] ); move32(); +#endif temp64 = W_mult0_32_32( inRe_fx[0][slot][bin], inIm_fx[1][slot][bin] ); // 2q temp64 = W_sub( temp64, W_mult0_32_32( inIm_fx[0][slot][bin], inRe_fx[1][slot][bin] ) ); // 2q +#ifdef NONBE_2157_INPUT_COV_MATRICES + // OPTIMIZED FIXED-POINT IMPLEMENTATION + hDiracDecBin->ChCrossIm[bin] = W_add( hDiracDecBin->ChCrossIm[bin], W_shr( temp64, rsh0 ) ); // 2q - rsh0 + move64(); +#ifdef NONBE_2157_ACCURACY_ANALYSIS + // SINGLE-PRECISION FLOATING-POINT IMPLEMENTATION + hDiracDecBin->fChCrossIm[bin] += (float) inRe[0][slot][bin] * (float) inIm[1][slot][bin]; + hDiracDecBin->fChCrossIm[bin] -= (float) inIm[0][slot][bin] * (float) inRe[1][slot][bin]; + // DOUBLE-PRECISION FLOATING-POINT IMPLEMENTATION + hDiracDecBin->dChCrossIm[bin] += inRe[0][slot][bin] * inIm[1][slot][bin]; + hDiracDecBin->dChCrossIm[bin] -= inIm[0][slot][bin] * inRe[1][slot][bin]; +#endif +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION + // CURRENT FIXED-POINT IMPLEMENTATION exp1 = W_norm( temp64 ); temp = W_extract_h( W_shl( temp64, exp1 ) ); // // 2q - 32 + exp1 hDiracDecBin->ChCrossIm_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossIm_fx[bin], hDiracDecBin->ChCrossIm_e[bin], temp, sub( exp, exp1 ), &hDiracDecBin->ChCrossIm_e[bin] ); move32(); +#endif } } @@ -1444,13 +1648,25 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( FOR( bin = LOW_BIT_RATE_BINAURAL_EQ_OFFSET; bin < lastEqBin; bin++ ) { +#ifdef NONBE_2157_INPUT_COV_MATRICES + subFrameTotalEne[bin] = Mpy_64_32( subFrameTotalEne[bin], lowBitRateEQ_fx[bin] ); + move64(); +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION subFrameTotalEne_fx[bin] = Mpy_32_32( subFrameTotalEne_fx[bin], lowBitRateEQ_fx[bin] ); // exp = subFrameTotalEne_e[bin] move32(); +#endif } FOR( ; bin < nBins; bin++ ) { +#ifdef NONBE_2157_INPUT_COV_MATRICES + subFrameTotalEne[bin] = Mpy_64_32( subFrameTotalEne[bin], lowBitRateEQ_fx[lastEqBin] ); + move64(); +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION subFrameTotalEne_fx[bin] = Mpy_32_32( subFrameTotalEne_fx[bin], lowBitRateEQ_fx[lastEqBin] ); // exp = subFrameTotalEne_e[bin] move32(); +#endif } } @@ -1459,11 +1675,20 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( IF( ( EQ_32( ivas_format, SBA_FORMAT ) || EQ_32( ivas_format, SBA_ISM_FORMAT ) ) && EQ_16( nchan_transport, 2 ) ) { Word32 tempRe, tempIm; +#ifdef NONBE_2157_INPUT_COV_MATRICES + Word64 subFrameSumEne[CLDFB_NO_CHANNELS_MAX]; +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION Word32 subFrameSumEne_fx[CLDFB_NO_CHANNELS_MAX]; Word16 subFrameSumEne_e[CLDFB_NO_CHANNELS_MAX]; - +#endif +#ifdef NONBE_2157_INPUT_COV_MATRICES + set64_fx( subFrameSumEne, 0, CLDFB_NO_CHANNELS_MAX ); +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION set32_fx( subFrameSumEne_fx, 0, CLDFB_NO_CHANNELS_MAX ); set16_fx( subFrameSumEne_e, 0, CLDFB_NO_CHANNELS_MAX ); +#endif FOR( slot = 0; slot < hSpatParamRendCom->subframe_nbslots[subframe]; slot++ ) { FOR( bin = 0; bin < nBins; bin++ ) @@ -1471,14 +1696,25 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( tempRe = L_add( inRe_fx[0][slot][bin], inRe_fx[1][slot][bin] ); // q tempIm = L_add( inIm_fx[0][slot][bin], inIm_fx[1][slot][bin] ); // q temp64 = W_add( W_mult0_32_32( tempRe, tempRe ), W_mult0_32_32( tempIm, tempIm ) ); // 2q +#ifdef NONBE_2157_INPUT_COV_MATRICES + subFrameSumEne[bin] = W_add( subFrameSumEne[bin], W_shr( temp64, rsh1 ) ); + move64(); +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION exp1 = W_norm( temp64 ); - temp64 = W_shl( temp64, exp1 ); // 2q + exp1 - subFrameSumEne_fx[bin] = BASOP_Util_Add_Mant32Exp( subFrameSumEne_fx[bin], subFrameSumEne_e[bin], W_extract_h( temp64 ), sub( exp /* 63 - 2q */, exp1 ) /*31 - (2q + exp1 - 32)*/, &subFrameSumEne_e[bin] ); + temp = W_extract_h( W_shl( temp64, exp1 ) ); // 2q + exp1 + subFrameSumEne_fx[bin] = BASOP_Util_Add_Mant32Exp( subFrameSumEne_fx[bin], subFrameSumEne_e[bin], temp, sub( exp /* 63 - 2q */, exp1 ) /*31 - (2q + exp1 - 32)*/, &subFrameSumEne_e[bin] ); move32(); +#endif } } FOR( bin = 0; bin < nBins; bin++ ) { +#ifdef NONBE_2157_INPUT_COV_MATRICES + subFrameTotalEne[bin] = W_max( subFrameSumEne[bin], subFrameTotalEne[bin] ); + move64(); +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION subFrameTotalEne_e[bin] = sub( subFrameTotalEne_e[bin], 1 ); move16(); temp = L_shl_sat( subFrameTotalEne_fx[bin], sub( subFrameTotalEne_e[bin], subFrameSumEne_e[bin] ) ); // subFrameSumEne_e[bin] @@ -1489,6 +1725,7 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( subFrameTotalEne_e[bin] = subFrameSumEne_e[bin]; move16(); } +#endif } } @@ -1503,6 +1740,13 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( IIReneLimiterFactor_fx = L_add( L_shl( 8, Q26 ), L_sub( L_shl( 1, Q26 ), L_shr( qualityBasedSmFactor_fx, 5 ) ) ); // Q26 } +#ifdef NONBE_2157_INPUT_COV_MATRICES +#ifdef NONBE_2157_ACCURACY_ANALYSIS + double dQualityBasedSmFactor = ldexp( (double) qualityBasedSmFactor_fx, -31 ); + double dIIReneLimiterFactor = ldexp( (double) IIReneLimiterFactor_fx, -26 ); +#endif +#endif + FOR( bin = 0; bin < nBins; bin++ ) { /* Temporally smooth cov mtx estimates for resulting mixing matrix stability. The design principle is that @@ -1510,6 +1754,43 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( * robust performance at energy offsets when compared to typical IIR averaging. */ Word16 num_e, den_e; Word32 num, den; +#ifdef NONBE_2157_INPUT_COV_MATRICES + // OPTIMIZED FIXED-POINT IMPLEMENTATION +#ifndef NONBE_2157_USE_CURRENT_VERSION + Word16 exp; +#endif + Word64 num64, den64; + num64 = W_add( hDiracDecBin->ChEne[0][bin], hDiracDecBin->ChEne[1][bin] ); // 2q - rsh0 + num64 = Mpy_64_32( num64, IIReneLimiterFactor_fx ); // 2q - rsh0 - 5 + den64 = W_add( hDiracDecBin->ChEnePrev[0][bin], hDiracDecBin->ChEnePrev[1][bin] ); // 2q - rsh0 + den64 = W_max( 1, den64 ); + num_e = W_norm( num64 ); + den_e = W_norm( den64 ); + num = W_extract_h( W_shl( num64, num_e ) ); // 2q - rsh0 - 5 + num_e - 32 + den = W_extract_h( W_shl( den64, den_e ) ); // 2q - rsh0 + den_e - 32 + IIReneLimiter[bin] = BASOP_Util_Divide3232_Scale_newton( num, den, &exp ); + exp = add( sub( den_e, num_e ), add( 5, exp ) ); + IF( L_shr_sat( IIReneLimiter[bin], sub( 31, exp ) ) > 0 ) + { + IIReneLimiter[bin] = ONE_IN_Q31; /*Q31*/ + move32(); + } + ELSE + { + IIReneLimiter[bin] = L_shl( IIReneLimiter[bin], exp ); /*Q31*/ + } +#ifdef NONBE_2157_ACCURACY_ANALYSIS + // SINGLE-PRECISION FLOATING-POINT IMPLEMENTATION + float eneRatio; + eneRatio = (float) ( ( hDiracDecBin->fChEne[0][bin] + hDiracDecBin->fChEne[1][bin] ) * (float) dIIReneLimiterFactor ) / (float) fmaxf( 1e-12f, ( hDiracDecBin->fChEnePrev[0][bin] + hDiracDecBin->fChEnePrev[1][bin] ) ); + fIIReneLimiter[bin] = fminf( 1.0f, eneRatio ); + // DOUBLE-PRECISION FLOATING-POINT IMPLEMENTATION + eneRatio = (float) ( ( hDiracDecBin->dChEne[0][bin] + hDiracDecBin->dChEne[1][bin] ) * dIIReneLimiterFactor ) / (float) fmax( 1e-12, ( hDiracDecBin->dChEnePrev[0][bin] + hDiracDecBin->dChEnePrev[1][bin] ) ); + dIIReneLimiter[bin] = fminf( 1.0f, eneRatio ); +#endif +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION + // CURRENT FIXED-POINT IMPLEMENTATION num = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChEne_fx[0][bin], hDiracDecBin->ChEne_e[0][bin], hDiracDecBin->ChEne_fx[1][bin], hDiracDecBin->ChEne_e[1][bin], &num_e ); num = Mpy_32_32( num, IIReneLimiterFactor_fx ); /*Q = (31 - num_e + 26 - 31) = (26 - num_e)*/ den_e = 0; @@ -1531,33 +1812,116 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( { IIReneLimiter_fx[bin] = L_shl( IIReneLimiter_fx[bin], exp ); /*Q31*/ } +#endif +#ifdef NONBE_2157_INPUT_COV_MATRICES + // OPTIMIZED FIXED-POINT IMPLEMENTATION + hDiracDecBin->ChCrossRe[bin] = Mpy_64_32( hDiracDecBin->ChCrossRe[bin], qualityBasedSmFactor_fx ); // 2q - rsh0 + hDiracDecBin->ChCrossIm[bin] = Mpy_64_32( hDiracDecBin->ChCrossIm[bin], qualityBasedSmFactor_fx ); // 2q - rsh0 + move64(); + move64(); + move64(); + move64(); +#ifdef NONBE_2157_ACCURACY_ANALYSIS + // SINGLE-PRECISION FLOATING-POINT IMPLEMENTATION + hDiracDecBin->fChCrossRe[bin] *= (float) dQualityBasedSmFactor; + hDiracDecBin->fChCrossIm[bin] *= (float) dQualityBasedSmFactor; + // DOUBLE-PRECISION FLOATING-POINT IMPLEMENTATION + hDiracDecBin->dChCrossRe[bin] *= dQualityBasedSmFactor; + hDiracDecBin->dChCrossIm[bin] *= dQualityBasedSmFactor; +#endif +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION + // CURRENT FIXED-POINT IMPLEMENTATION hDiracDecBin->ChCrossRe_fx[bin] = Mpy_32_32( hDiracDecBin->ChCrossRe_fx[bin], qualityBasedSmFactor_fx ); hDiracDecBin->ChCrossIm_fx[bin] = Mpy_32_32( hDiracDecBin->ChCrossIm_fx[bin], qualityBasedSmFactor_fx ); - move32(); move32(); move32(); move32(); +#endif FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { +#ifdef NONBE_2157_INPUT_COV_MATRICES + // OPTIMIZED FIXED-POINT IMPLEMENTATION + hDiracDecBin->ChEne[ch][bin] = Mpy_64_32( hDiracDecBin->ChEne[ch][bin], qualityBasedSmFactor_fx ); + move64(); +#ifdef NONBE_2157_ACCURACY_ANALYSIS + // SINGLE-PRECISION FLOATING-POINT IMPLEMENTATION + hDiracDecBin->fChEne[ch][bin] *= (float) dQualityBasedSmFactor; + // DOUBLE-PRECISION FLOATING-POINT IMPLEMENTATION + hDiracDecBin->dChEne[ch][bin] *= dQualityBasedSmFactor; +#endif +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION + // CURRENT FIXED-POINT IMPLEMENTATION hDiracDecBin->ChEne_fx[ch][bin] = Mpy_32_32( hDiracDecBin->ChEne_fx[ch][bin], qualityBasedSmFactor_fx ); move32(); +#endif } +#ifdef NONBE_2157_INPUT_COV_MATRICES + // OPTIMIZED FIXED-POINT IMPLEMENTATION + hDiracDecBin->ChCrossRe[bin] = W_add( hDiracDecBin->ChCrossRe[bin], Mpy_64_32( hDiracDecBin->ChCrossRePrev[bin], IIReneLimiter[bin] ) ); + hDiracDecBin->ChCrossIm[bin] = W_add( hDiracDecBin->ChCrossIm[bin], Mpy_64_32( hDiracDecBin->ChCrossImPrev[bin], IIReneLimiter[bin] ) ); + move64(); + move64(); +#ifdef NONBE_2157_ACCURACY_ANALYSIS + // SINGLE-PRECISION FLOATING-POINT IMPLEMENTATION + hDiracDecBin->fChCrossRe[bin] += fIIReneLimiter[bin] * hDiracDecBin->fChCrossRePrev[bin]; + hDiracDecBin->fChCrossIm[bin] += fIIReneLimiter[bin] * hDiracDecBin->fChCrossImPrev[bin]; + // DOUBLE-PRECISION FLOATING-POINT IMPLEMENTATION + hDiracDecBin->dChCrossRe[bin] += dIIReneLimiter[bin] * hDiracDecBin->dChCrossRePrev[bin]; + hDiracDecBin->dChCrossIm[bin] += dIIReneLimiter[bin] * hDiracDecBin->dChCrossImPrev[bin]; +#endif +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION + // CURRENT FIXED-POINT IMPLEMENTATION hDiracDecBin->ChCrossRe_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossRe_e[bin], Mpy_32_32( hDiracDecBin->ChCrossRePrev_fx[bin], IIReneLimiter_fx[bin] ), hDiracDecBin->ChCrossRePrev_e[bin], &hDiracDecBin->ChCrossRe_e[bin] ); hDiracDecBin->ChCrossIm_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossIm_fx[bin], hDiracDecBin->ChCrossIm_e[bin], Mpy_32_32( hDiracDecBin->ChCrossImPrev_fx[bin], IIReneLimiter_fx[bin] ), hDiracDecBin->ChCrossImPrev_e[bin], &hDiracDecBin->ChCrossIm_e[bin] ); move32(); move32(); +#endif FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { +#ifdef NONBE_2157_INPUT_COV_MATRICES + // OPTIMIZED FIXED-POINT IMPLEMENTATION + hDiracDecBin->ChEne[ch][bin] = W_add( hDiracDecBin->ChEne[ch][bin], Mpy_64_32( hDiracDecBin->ChEnePrev[ch][bin], IIReneLimiter[bin] ) ); + move64(); +#ifdef NONBE_2157_ACCURACY_ANALYSIS + // SINGLE-PRECISION FLOATING-POINT IMPLEMENTATION + hDiracDecBin->fChEne[ch][bin] += fIIReneLimiter[bin] * hDiracDecBin->fChEnePrev[ch][bin]; + // DOUBLE-PRECISION FLOATING-POINT IMPLEMENTATION + hDiracDecBin->dChEne[ch][bin] += dIIReneLimiter[bin] * hDiracDecBin->dChEnePrev[ch][bin]; +#endif +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION + // CURRENT FIXED-POINT IMPLEMENTATION hDiracDecBin->ChEne_fx[ch][bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChEne_fx[ch][bin], hDiracDecBin->ChEne_e[ch][bin], Mpy_32_32( hDiracDecBin->ChEnePrev_fx[ch][bin], IIReneLimiter_fx[bin] ), hDiracDecBin->ChEnePrev_e[ch][bin], &hDiracDecBin->ChEne_e[ch][bin] ); move32(); +#endif } /* Store energy values and coefficients for next round */ +#ifdef NONBE_2157_INPUT_COV_MATRICES + // OPTIMIZED FIXED-POINT IMPLEMENTATION + hDiracDecBin->ChCrossRePrev[bin] = hDiracDecBin->ChCrossRe[bin]; + move64(); + hDiracDecBin->ChCrossImPrev[bin] = hDiracDecBin->ChCrossIm[bin]; + move64(); +#ifdef NONBE_2157_ACCURACY_ANALYSIS + // SINGLE-PRECISION FLOATING-POINT IMPLEMENTATION + hDiracDecBin->fChCrossRePrev[bin] = hDiracDecBin->fChCrossRe[bin]; + hDiracDecBin->fChCrossRePrev[bin] = hDiracDecBin->fChCrossRe[bin]; + // DOUBLE-PRECISION FLOATING-POINT IMPLEMENTATION + hDiracDecBin->dChCrossRePrev[bin] = hDiracDecBin->dChCrossRe[bin]; + hDiracDecBin->dChCrossRePrev[bin] = hDiracDecBin->dChCrossRe[bin]; +#endif +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION + // CURRENT FIXED-POINT IMPLEMENTATION hDiracDecBin->ChCrossRePrev_fx[bin] = hDiracDecBin->ChCrossRe_fx[bin]; move32(); hDiracDecBin->ChCrossImPrev_fx[bin] = hDiracDecBin->ChCrossIm_fx[bin]; @@ -1566,16 +1930,141 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( move16(); hDiracDecBin->ChCrossImPrev_e[bin] = hDiracDecBin->ChCrossIm_e[bin]; move16(); +#endif FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { +#ifdef NONBE_2157_INPUT_COV_MATRICES + // OPTIMIZED FIXED-POINT IMPLEMENTATION + hDiracDecBin->ChEnePrev[ch][bin] = hDiracDecBin->ChEne[ch][bin]; + move64(); +#ifdef NONBE_2157_ACCURACY_ANALYSIS + // SINGLE-PRECISION FLOATING-POINT IMPLEMENTATION + hDiracDecBin->fChEnePrev[ch][bin] = hDiracDecBin->fChEne[ch][bin]; + // DOUBLE-PRECISION FLOATING-POINT IMPLEMENTATION + hDiracDecBin->dChEnePrev[ch][bin] = hDiracDecBin->dChEne[ch][bin]; +#endif +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION + // CURRENT FIXED-POINT IMPLEMENTATION hDiracDecBin->ChEnePrev_fx[ch][bin] = hDiracDecBin->ChEne_fx[ch][bin]; move32(); hDiracDecBin->ChEnePrev_e[ch][bin] = hDiracDecBin->ChEne_e[ch][bin]; move16(); +#endif } + +#ifdef NONBE_2157_INPUT_COV_MATRICES +#ifndef NONBE_2157_ACCURACY_ANALYSIS + // OPTIMIZED FIXED-POINT IMPLEMENTATION + Word16 shl; + for ( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) + { + shl = W_norm( hDiracDecBin->ChEne[ch][bin] ); + hDiracDecBin->ChEne_fx[ch][bin] = W_extract_h( W_shl( hDiracDecBin->ChEne[ch][bin], shl ) ); + hDiracDecBin->ChEne_e[ch][bin] = sub( e0, shl ); + } + + shl = W_norm( hDiracDecBin->ChCrossRe[bin] ); + hDiracDecBin->ChCrossRe_fx[bin] = W_extract_h( W_shl( hDiracDecBin->ChCrossRe[bin], shl ) ); + hDiracDecBin->ChCrossRe_e[bin] = sub( e0, shl ); + + shl = W_norm( hDiracDecBin->ChCrossIm[bin] ); + hDiracDecBin->ChCrossIm_fx[bin] = W_extract_h( W_shl( hDiracDecBin->ChCrossIm[bin], shl ) ); + hDiracDecBin->ChCrossIm_e[bin] = sub( e0, shl ); + + shl = W_norm( subFrameTotalEne[bin] ); + subFrameTotalEne_fx[bin] = W_extract_h( W_shl( subFrameTotalEne[bin], shl ) ); + subFrameTotalEne_e[bin] = sub( e1, shl ); +#endif +#endif } + +#if 0 + static int frame_number = 0; + printf( "Frame %d analysis\n", frame_number ); + { + double fc_max_abserr = 0.0; // fixed-point current + double fo_max_abserr = 0.0; // fixed-point optimized + double fs_max_abserr = 0.0; // floating-point single + for ( bin = 0; bin < nBins; bin++ ) + { + double fc = ldexp( (double) hDiracDecBin->ChEne_fx[0][bin], hDiracDecBin->ChEne_e[0][bin] - 31 ); + double fo = ldexp( (double) hDiracDecBin->ChEne[0][bin], -( 2 * q - rsh0 ) ); + double fs = hDiracDecBin->fChEne[0][bin]; + double fd = hDiracDecBin->dChEne[0][bin]; + fc_max_abserr = fmax( fc_max_abserr, fabs( fd - fc ) ); + fo_max_abserr = fmax( fo_max_abserr, fabs( fd - fo ) ); + fs_max_abserr = fmax( fs_max_abserr, fabs( fd - fs ) ); + } + printf( "ChEne0 analysis - Max Absolute Error\n" ); + printf( " - fixed-point current: %+.6e\n", fc_max_abserr ); + printf( " - fixed-point optimized: %+.6e\n", fo_max_abserr ); + printf( " - floating-point single: %+.6e\n", fs_max_abserr ); + } + { + double fc_max_abserr = 0.0; // fixed-point current + double fo_max_abserr = 0.0; // fixed-point optimized + double fs_max_abserr = 0.0; // floating-point single + for ( bin = 0; bin < nBins; bin++ ) + { + double fc = ldexp( (double) hDiracDecBin->ChEne_fx[1][bin], hDiracDecBin->ChEne_e[1][bin] - 31 ); + double fo = ldexp( (double) hDiracDecBin->ChEne[1][bin], -( 2 * q - rsh0 ) ); + double fs = hDiracDecBin->fChEne[1][bin]; + double fd = hDiracDecBin->dChEne[1][bin]; + fc_max_abserr = fmax( fc_max_abserr, fabs( fd - fc ) ); + fo_max_abserr = fmax( fo_max_abserr, fabs( fd - fo ) ); + fs_max_abserr = fmax( fs_max_abserr, fabs( fd - fs ) ); + } + printf( "ChEne1 analysis - Max Absolute Error\n" ); + printf( " - fixed-point current: %+.6e\n", fc_max_abserr ); + printf( " - fixed-point optimized: %+.6e\n", fo_max_abserr ); + printf( " - floating-point single: %+.6e\n", fs_max_abserr ); + } + { + double fc_max_abserr = 0.0; // fixed-point current + double fo_max_abserr = 0.0; // fixed-point optimized + double fs_max_abserr = 0.0; // floating-point single + for ( bin = 0; bin < nBins; bin++ ) + { + double fc = ldexp( (double) subFrameTotalEne_fx[bin], subFrameTotalEne_e[bin] - 31 ); + double fo = ldexp( (double) subFrameTotalEne[bin], -( 2 * q - rsh1 ) ); + double fs = fSubFrameTotalEne[bin]; + double fd = dSubFrameTotalEne[bin]; + fc_max_abserr = fmax( fc_max_abserr, fabs( fd - fc ) ); + fo_max_abserr = fmax( fo_max_abserr, fabs( fd - fo ) ); + fs_max_abserr = fmax( fs_max_abserr, fabs( fd - fs ) ); + } + printf( "SubFrameTotalEne analysis - Max Absolute Error\n" ); + printf( " - fixed-point current: %+.6e\n", fc_max_abserr ); + printf( " - fixed-point optimized: %+.6e\n", fo_max_abserr ); + printf( " - floating-point single: %+.6e\n", fs_max_abserr ); + } + { + double fc_max_abserr = 0.0; // fixed-point current + double fo_max_abserr = 0.0; // fixed-point optimized + double fs_max_abserr = 0.0; // floating-point single + for ( bin = 0; bin < nBins; bin++ ) + { + double fc = ldexp( (double) IIReneLimiter_fx[bin], -31 ); + double fo = ldexp( (double) IIReneLimiter[bin], -31 ); + double fs = fIIReneLimiter[bin]; + double fd = dIIReneLimiter[bin]; + fc_max_abserr = fmax( fc_max_abserr, fabs( fd - fc ) ); + fo_max_abserr = fmax( fo_max_abserr, fabs( fd - fo ) ); + fs_max_abserr = fmax( fs_max_abserr, fabs( fd - fs ) ); + } + printf( "IIReneLimiter analysis - Max Absolute Error\n" ); + printf( " - fixed-point current: %+.6e\n", fc_max_abserr ); + printf( " - fixed-point optimized: %+.6e\n", fo_max_abserr ); + printf( " - floating-point single: %+.6e\n", fs_max_abserr ); + } + + ++frame_number; +#endif + + return; } diff --git a/lib_rend/ivas_stat_rend.h b/lib_rend/ivas_stat_rend.h index 7fe2f3af753e6ac21489b0b06fe11c595b46199a..b0344ab3aa47b99ce8d7b6105c31c970df192c76 100644 --- a/lib_rend/ivas_stat_rend.h +++ b/lib_rend/ivas_stat_rend.h @@ -612,6 +612,30 @@ typedef struct ivas_dirac_dec_binaural_data_structure Word16 q_processMtxDec; Word16 q_processMtxDecPrev; +#ifdef NONBE_2157_INPUT_COV_MATRICES + Word64 ChEne[BINAURAL_CHANNELS][CLDFB_NO_CHANNELS_MAX]; + Word64 ChEnePrev[BINAURAL_CHANNELS][CLDFB_NO_CHANNELS_MAX]; + Word64 ChCrossRe[CLDFB_NO_CHANNELS_MAX]; + Word64 ChCrossRePrev[CLDFB_NO_CHANNELS_MAX]; + Word64 ChCrossIm[CLDFB_NO_CHANNELS_MAX]; + Word64 ChCrossImPrev[CLDFB_NO_CHANNELS_MAX]; +#ifdef NONBE_2157_ACCURACY_ANALYSIS + float fChEne[BINAURAL_CHANNELS][CLDFB_NO_CHANNELS_MAX]; + float fChEnePrev[BINAURAL_CHANNELS][CLDFB_NO_CHANNELS_MAX]; + float fChCrossRe[CLDFB_NO_CHANNELS_MAX]; + float fChCrossRePrev[CLDFB_NO_CHANNELS_MAX]; + float fChCrossIm[CLDFB_NO_CHANNELS_MAX]; + float fChCrossImPrev[CLDFB_NO_CHANNELS_MAX]; + + double dChEne[BINAURAL_CHANNELS][CLDFB_NO_CHANNELS_MAX]; + double dChEnePrev[BINAURAL_CHANNELS][CLDFB_NO_CHANNELS_MAX]; + double dChCrossRe[CLDFB_NO_CHANNELS_MAX]; + double dChCrossRePrev[CLDFB_NO_CHANNELS_MAX]; + double dChCrossIm[CLDFB_NO_CHANNELS_MAX]; + double dChCrossImPrev[CLDFB_NO_CHANNELS_MAX]; +#endif +#endif + Word32 ChEnePrev_fx[BINAURAL_CHANNELS][CLDFB_NO_CHANNELS_MAX]; Word32 ChCrossRePrev_fx[CLDFB_NO_CHANNELS_MAX]; Word32 ChCrossImPrev_fx[CLDFB_NO_CHANNELS_MAX]; diff --git a/lib_rend/lib_rend_fx.c b/lib_rend/lib_rend_fx.c index 3a77187247452d81598b74d96e9f4bf68d7b6194..3a657ae7d1e16c5ba62eb4be9551c0f7534e9bf7 100644 --- a/lib_rend/lib_rend_fx.c +++ b/lib_rend/lib_rend_fx.c @@ -9924,6 +9924,7 @@ static ivas_error ivas_masa_ext_rend_dirac_rend_init( return error; } +#include "string.h" static ivas_error ivas_masa_ext_rend_parambin_init( input_masa *inputMasa, /* i/o: MASA external renderer structure */ @@ -10011,17 +10012,38 @@ static ivas_error ivas_masa_ext_rend_parambin_init( move16(); move16(); move16(); +#ifdef NONBE_2157_INPUT_COV_MATRICES + set64_fx( hDiracDecBin->ChEnePrev[j], 0, nBins ); +#ifdef NONBE_2157_ACCURACY_ANALYSIS + memset( hDiracDecBin->fChEnePrev[j], 0, nBins * sizeof( float ) ); + memset( hDiracDecBin->dChEnePrev[j], 0, nBins * sizeof( double ) ); +#endif +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION set_zero_fx( hDiracDecBin->ChEnePrev_fx[j], nBins ); - set_zero_fx( hDiracDecBin->ChEneOutPrev_fx[j], nBins ); set16_fx( hDiracDecBin->ChEnePrev_e[j], 0, nBins ); +#endif + set_zero_fx( hDiracDecBin->ChEneOutPrev_fx[j], nBins ); set16_fx( hDiracDecBin->ChEneOutPrev_e[j], 0, nBins ); } +#ifdef NONBE_2157_INPUT_COV_MATRICES + set64_fx( hDiracDecBin->ChCrossRePrev, 0, nBins ); + set64_fx( hDiracDecBin->ChCrossImPrev, 0, nBins ); +#ifdef NONBE_2157_ACCURACY_ANALYSIS + memset( hDiracDecBin->fChCrossRePrev, 0, nBins * sizeof( float ) ); + memset( hDiracDecBin->fChCrossImPrev, 0, nBins * sizeof( float ) ); + memset( hDiracDecBin->dChCrossRePrev, 0, nBins * sizeof( double ) ); + memset( hDiracDecBin->dChCrossImPrev, 0, nBins * sizeof( double ) ); +#endif +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION set_zero_fx( hDiracDecBin->ChCrossRePrev_fx, nBins ); set_zero_fx( hDiracDecBin->ChCrossImPrev_fx, nBins ); - set_zero_fx( hDiracDecBin->ChCrossReOutPrev_fx, nBins ); - set_zero_fx( hDiracDecBin->ChCrossImOutPrev_fx, nBins ); set16_fx( hDiracDecBin->ChCrossRePrev_e, 0, nBins ); set16_fx( hDiracDecBin->ChCrossImPrev_e, 0, nBins ); +#endif + set_zero_fx( hDiracDecBin->ChCrossReOutPrev_fx, nBins ); + set_zero_fx( hDiracDecBin->ChCrossImOutPrev_fx, nBins ); set16_fx( hDiracDecBin->ChCrossReOutPrev_e, 0, nBins ); set16_fx( hDiracDecBin->ChCrossImOutPrev_e, 0, nBins ); hDiracDecBin->renderStereoOutputInsteadOfBinaural = 0;