From db6909322b7258ae62e24eae81f38559892147a0 Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Wed, 22 Oct 2025 15:10:20 +0100 Subject: [PATCH 01/26] Implement BASOP W_min and W_max functions. --- lib_basop/enh64.c | 97 +++++++++++++++++++++++++++++++++++++++++++- lib_basop/enh64.h | 2 + lib_debug/wmc_auto.c | 2 +- lib_debug/wmc_auto.h | 2 + 4 files changed, 101 insertions(+), 2 deletions(-) diff --git a/lib_basop/enh64.c b/lib_basop/enh64.c index 8bffb620c..b53adb79a 100644 --- a/lib_basop/enh64.c +++ b/lib_basop/enh64.c @@ -40,7 +40,103 @@ *****************************************************************************/ #ifdef ENH_64_BIT_OPERATOR +/*______________________________________________________________________________ +| | +| Function Name : W_min | +| | +| Purpose : | +| | +| Compares L64_var1 and L64_var2 and returns the minimum value. | +| | +| Complexity weight : 1 | +| | +| Inputs : | +| | +| L64_var1 64 bit long signed integer (Word64) whose value falls in the | +| range : 0x80000000 00000000LL <= L64_var1 <= 0x7fffffff ffffffffLL. | +| | +| L64_var2 64 bit long signed integer (Word64) whose value falls in the | +| range : 0x80000000 00000000LL <= L64_var2 <= 0x7fffffff ffffffffLL. | +| | +| Outputs : | +| | +| none | +| | +| Return Value : | +| | +| L64_var_out | +| 64 bit long signed integer (Word64) whose value falls in the | +| range : 0x80000000 00000000LL <= L64_var_out <= 0x7fffffff ffffffffLL. | +|______________________________________________________________________________| +*/ +Word64 W_min( Word64 L64_var1, Word64 L64_var2 ) +{ + Word64 L64_var_out; + if ( L64_var1 <= L64_var2 ) + { + L64_var_out = L64_var1; + } + else + { + L64_var_out = L64_var2; + } + +#ifdef WMOPS + multiCounter[currCounter].W_min++; +#endif /* ifdef WMOPS */ + + return ( L64_var_out ); +} + +/*______________________________________________________________________________ +| | +| Function Name : W_max | +| | +| Purpose : | +| | +| Compares L64_var1 and L64_var2 and returns the maximum value. | +| | +| Complexity weight : 1 | +| | +| Inputs : | +| | +| L64_var1 64 bit long signed integer (Word64) whose value falls in the | +| range : 0x80000000 00000000LL <= L64_var1 <= 0x7fffffff ffffffffLL. | +| | +| L64_var2 64 bit long signed integer (Word64) whose value falls in the | +| range : 0x80000000 00000000LL <= L64_var2 <= 0x7fffffff ffffffffLL. | +| | +| Outputs : | +| | +| none | +| | +| Return Value : | +| | +| L64_var_out | +| 64 bit long signed integer (Word64) whose value falls in the | +| range : 0x80000000 00000000LL <= L64_var_out <= 0x7fffffff ffffffffLL. | +|______________________________________________________________________________| +*/ +Word64 W_max( Word64 L64_var1, Word64 L64_var2 ) +{ + Word64 L64_var_out; + + if ( L64_var1 >= L64_var2 ) + { + L64_var_out = L64_var1; + } + else + { + L64_var_out = L64_var2; + } + +#ifdef WMOPS + multiCounter[currCounter].W_max++; +#endif /* ifdef WMOPS */ + + return ( L64_var_out ); +} /*___________________________________________________________________________ | | @@ -84,7 +180,6 @@ Word64 W_add_nosat( Word64 L64_var1, Word64 L64_var2 ) return L64_var_out; } - /*___________________________________________________________________________ | | | Function Name : W_sub_nosat | diff --git a/lib_basop/enh64.h b/lib_basop/enh64.h index c3896bb0d..d690708da 100644 --- a/lib_basop/enh64.h +++ b/lib_basop/enh64.h @@ -21,6 +21,8 @@ * *****************************************************************************/ #ifdef ENH_64_BIT_OPERATOR +Word64 W_min( Word64 L64_var1, Word64 L64_var2 ); +Word64 W_max( Word64 L64_var1, Word64 L64_var2 ); Word64 W_add_nosat( Word64 L64_var1, Word64 L64_var2 ); Word64 W_sub_nosat( Word64 L64_var1, Word64 L64_var2 ); Word64 W_shl( Word64 L64_var1, Word16 var2 ); diff --git a/lib_debug/wmc_auto.c b/lib_debug/wmc_auto.c index 5afd9de16..9d4d573f0 100644 --- a/lib_debug/wmc_auto.c +++ b/lib_debug/wmc_auto.c @@ -133,7 +133,7 @@ static BASIC_OP op_weight = { #ifdef ENH_64_BIT_OPERATOR /* Weights of new 64 bit basops */ , - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 #endif /* #ifdef ENH_64_BIT_OPERATOR */ #ifdef ENH_32_BIT_OPERATOR diff --git a/lib_debug/wmc_auto.h b/lib_debug/wmc_auto.h index 64e2c751a..6dff36f50 100644 --- a/lib_debug/wmc_auto.h +++ b/lib_debug/wmc_auto.h @@ -877,6 +877,8 @@ typedef struct /* New 64 bit basops */ #ifdef ENH_64_BIT_OPERATOR unsigned int move64; /* Complexity Weight of 1 */ + unsigned int W_min; /* Complexity Weight of 1 */ + unsigned int W_max; /* Complexity Weight of 1 */ unsigned int W_add_nosat; /* Complexity Weight of 1 */ unsigned int W_sub_nosat; /* Complexity Weight of 1 */ unsigned int W_shl; /* Complexity Weight of 1 */ -- GitLab From 66f905764ade74555bcd90e1fdbdbb5e25b23d4f Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Fri, 24 Oct 2025 10:27:58 +0100 Subject: [PATCH 02/26] Optimize. --- .gitignore | 2 + CMakeLists.txt | 6 +- lib_com/options.h | 12 +- .../ivas_dirac_dec_binaural_functions_fx.c | 367 +++++++++++++++++- lib_rend/ivas_stat_rend.h | 7 + lib_rend/lib_rend_fx.c | 3 + 6 files changed, 372 insertions(+), 25 deletions(-) diff --git a/.gitignore b/.gitignore index 9dcfcdcf8..e03b08671 100644 --- a/.gitignore +++ b/.gitignore @@ -84,3 +84,5 @@ coan_out_* /scripts /tests /pytest.ini +/build +audio.wav diff --git a/CMakeLists.txt b/CMakeLists.txt index ab640932f..53c3a84c7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -117,8 +117,12 @@ endif() # configuration options for all platforms set(WMOPS OFF CACHE BOOL "enable WMOPS") +set(WMOPS_DETAIL OFF CACHE BOOL "enable WMOPS detail") if(WMOPS) add_definitions("-DWMOPS=1") + if(WMOPS_DETAIL) + add_definitions("-DWMOPS_DETAIL=1") + endif() endif() project(stereo-evs LANGUAGES C) @@ -216,7 +220,7 @@ target_link_libraries(ISAR_post_rend lib_basop lib_isar lib_util lib_com) target_include_directories(ISAR_post_rend PRIVATE lib_basop lib_isar) add_executable(ambi_converter apps/ambi_converter.c) -target_link_libraries(ambi_converter lib_util lib_com lib_basop) +target_link_libraries(ambi_converter lib_util lib_com lib_basop lib_debug) if(UNIX) target_link_libraries(ambi_converter m) endif() diff --git a/lib_com/options.h b/lib_com/options.h index 5666b676e..2c92cba9a 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -55,13 +55,11 @@ #define SUPPORT_JBM_TRACEFILE /* Support for JBM tracefile, which is needed for 3GPP objective/subjective testing, but not relevant for real-world implementations */ -/*#define WMOPS*/ /* Activate complexity and memory counters */ -#ifdef WMOPS -#define WMOPS_PER_FRAME /* Output per-frame complexity (writes one float value per frame to the file "wmops_analysis") */ -#define WMOPS_DETAIL /* Output detailed complexity printout for every function. Increases runtime overhead */ -#define WMOPS_WC_FRAME_ANALYSIS /* Output detailed complexity analysis for the worst-case frame */ -/*define MEM_COUNT_DETAILS*/ /* Output detailed memory analysis for the worst-case frame (writes to the file "mem_analysis.csv") */ -#endif +/*#define WMOPS*/ /* Activate complexity and memory counters */ +/*#define WMOPS_PER_FRAME*/ /* Output per-frame complexity (writes one float value per frame to the file "wmops_analysis") */ +/*#define WMOPS_DETAIL*/ /* Output detailed complexity printout for every function. Increases runtime overhead */ +/*#define WMOPS_WC_FRAME_ANALYSIS*/ /* Output detailed complexity analysis for the worst-case frame */ +/*#define MEM_COUNT_DETAILS*/ /* Output detailed memory analysis for the worst-case frame (writes to the file "mem_analysis.csv") */ /*#define DISABLE_LIMITER*/ diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 0539b7b85..466253803 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -98,7 +98,7 @@ static void ivas_dirac_dec_binaural_internal_fx( Decoder_Struct *st_ivas, COMBIN static void ivas_dirac_dec_decorrelate_slot_fx( DIRAC_DEC_BIN_HANDLE hDiracDecBin, const Word16 num_freq_bands, const Word16 slot, Word32 inRe[][CLDFB_SLOTS_PER_SUBFRAME] /*q_inp*/[CLDFB_NO_CHANNELS_MAX], Word32 inIm[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX] /*q_inp*/, const Word16 q_inp, Word32 decRe[][CLDFB_NO_CHANNELS_MAX], Word32 decIm[][CLDFB_NO_CHANNELS_MAX], Word16 *q_out ); -static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( DIRAC_DEC_BIN_HANDLE hDiracDecBin, SPAT_PARAM_REND_COMMON_DATA_HANDLE hSpatParamRendCom, PARAMBIN_REND_CONFIG_HANDLE hConfig, Word32 inRe[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], Word32 inIm[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], const Word16 subframe, Word32 *subFrameTotalEne, Word16 *subFrameTotalEne_e, Word32 *IIReneLimiter, const Word16 q ); +static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( DIRAC_DEC_BIN_HANDLE hDiracDecBin, SPAT_PARAM_REND_COMMON_DATA_HANDLE hSpatParamRendCom, PARAMBIN_REND_CONFIG_HANDLE hConfig, Word32 inRe[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], Word32 inIm[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], const Word16 subframe, Word64 *subFrameTotalEne, Word32 *subFrameTotalEne_fx, Word16 *subFrameTotalEne_e, Word32 *IIReneLimiter, Word16 q ); static void ivas_dirac_dec_binaural_formulate_target_covariance_matrices_fx( DIRAC_DEC_BIN_HANDLE hDiracDecBin, const SPAT_PARAM_REND_COMMON_DATA_HANDLE hSpatParamRendCom, const PARAMBIN_REND_CONFIG_HANDLE hConfig, Word32 Rmat[3][3], const Word16 subframe, const Word16 isHeadtracked, const Word32 *subFrameTotalEne, Word16 *subFrameTotalEne_e, const Word32 *IIReneLimiter, const MASA_ISM_DATA_HANDLE hMasaIsmData ); @@ -207,11 +207,14 @@ ivas_error ivas_dirac_dec_init_binaural_data_fx( set16_fx( hDiracDecBin->processMtxDecRe_fx[j][k], 0, nBins ); set16_fx( hDiracDecBin->processMtxDecIm_fx[j][k], 0, nBins ); } + set64_fx( hDiracDecBin->ChEnePrev[j], 0, nBins ); set32_fx( hDiracDecBin->ChEnePrev_fx[j], 0, nBins ); set32_fx( hDiracDecBin->ChEneOutPrev_fx[j], 0, nBins ); set16_fx( hDiracDecBin->ChEnePrev_e[j], 0, nBins ); set16_fx( hDiracDecBin->ChEneOutPrev_e[j], 0, nBins ); } + set64_fx( hDiracDecBin->ChCrossRePrev, 0, nBins ); + set64_fx( hDiracDecBin->ChCrossImPrev, 0, nBins ); set32_fx( hDiracDecBin->ChCrossRePrev_fx, 0, nBins ); set32_fx( hDiracDecBin->ChCrossImPrev_fx, 0, nBins ); set16_fx( hDiracDecBin->ChCrossRePrev_e, 0, nBins ); @@ -702,6 +705,7 @@ static void ivas_dirac_dec_binaural_internal_fx( Word32 reverbIm_fx[BINAURAL_CHANNELS][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX]; Word32 decorrRe_fx[BINAURAL_CHANNELS][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX]; Word32 decorrIm_fx[BINAURAL_CHANNELS][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX]; + Word64 subFrameTotalEne[CLDFB_NO_CHANNELS_MAX]; Word32 subFrameTotalEne_fx[CLDFB_NO_CHANNELS_MAX]; Word16 subFrameTotalEne_e[CLDFB_NO_CHANNELS_MAX]; Word32 IIReneLimiter_fx[CLDFB_NO_CHANNELS_MAX]; @@ -1024,7 +1028,7 @@ static void ivas_dirac_dec_binaural_internal_fx( } ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( hDiracDecBin, hSpatParamRendCom, &config_data, Cldfb_RealBuffer_in_fx, Cldfb_ImagBuffer_in_fx, subframe, - subFrameTotalEne_fx, subFrameTotalEne_e, IIReneLimiter_fx, q_inp ); + subFrameTotalEne, subFrameTotalEne_fx, subFrameTotalEne_e, IIReneLimiter_fx, q_inp ); ivas_dirac_dec_binaural_formulate_target_covariance_matrices_fx( hDiracDecBin, hSpatParamRendCom, &config_data, Rmat_fx, subframe, hCombinedOrientationData && hCombinedOrientationData->enableCombinedOrientation[hCombinedOrientationData->subframe_idx] > 0, subFrameTotalEne_fx, subFrameTotalEne_e, IIReneLimiter_fx, st_ivas->hMasaIsmData ); @@ -1320,6 +1324,101 @@ static void ivas_dirac_dec_decorrelate_slot_fx( return; } +UWord32 ceil_pow2(UWord32 x) +{ + --x; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + ++x; + return x; +} + +UWord32 popcnt(UWord32 x) +{ + // it takes once cycle + x = (x & 0x55555555) + ((x & 0xAAAAAAAA) >> 1); + x = (x & 0x33333333) + ((x & 0xCCCCCCCC) >> 2); + x = (x & 0x0F0F0F0F) + ((x & 0xF0F0F0F0) >> 4); + x = (x & 0x00FF00FF) + ((x & 0xFF00FF00) >> 8); + x = (x & 0x0000FFFF) + ((x & 0xFFFF0000) >> 16); + return x; +} + +UWord32 ceil_log2(UWord32 x) +{ + if (x) + { + return popcnt(ceil_pow2(x) - 1); + } + else + { + return 0; + } +} + +Word64 W_mult0_u32_i32( UWord32 L_var1, Word32 L_var2 ) +{ + Word64 L64_var_out; + L64_var_out = (UWord64) L_var1 * (Word64) L_var2; +#ifdef WMOPS + multiCounter[currCounter].W_mult0_32_32++; +#endif /* if WMOPS */ + return L64_var_out; +} + +// #define CHECK_CORRECTNESS + +Word64 Mpy_64_32( Word64 W_var1, Word32 L_var2 ) +{ +#if 0 + Word64 tmph, tmpl, var_out; + tmpl = W_mult0_u32_i32( W_extract_l( W_var1 ), L_var2 ); // 2 + tmph = W_mult_32_32( W_extract_h( W_var1 ), L_var2 ); // 2 + var_out = W_add(tmph, W_shr(tmpl, 31)); // 2 + return var_out; +#else + Word32 var1_l; + Word64 var_out; + var1_l = W_extract_l( W_var1 ); // 1 + var_out = W_mult0_32_32( L_and( var1_l, 1 ), L_var2 ); // 2 + var_out = W_mac_32_32( var_out, L_lshr( var1_l, 1 ), L_var2 ); // 2 + var_out = W_mac_32_32( W_shr( var_out, 31 ), W_extract_h( W_var1 ), L_var2); // 3 +#ifdef CHECK_CORRECTNESS + __int128_t z = ((__int128_t)W_var1 * (__int128_t)L_var2) >> 31; + assert(var_out == (Word64)z); +#endif + return var_out; +#endif +} + +void check(Word64 value, Word32 q, Word64 mant, Word32 exp) +{ + Word32 qq = 31 - exp; + Word32 shift = qq - q; + + if (shift >= 0) + { + mant >>= shift; + } + else + { + value >>= -shift; + } + + Word64 error = abs(mant - value); + if (error >= 4096) + { + printf("\n\n"); + printf("0x%016llx\n", value); + printf("%d\n", q); + printf("0x%016llx\n", mant); + printf("%d\n", qq); + assert(false); + } +} static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( DIRAC_DEC_BIN_HANDLE hDiracDecBin, @@ -1328,6 +1427,7 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( Word32 inRe_fx[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], /*q*/ Word32 inIm_fx[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], /*q*/ const Word16 subframe, + Word64 *subFrameTotalEne, Word32 *subFrameTotalEne_fx, Word16 *subFrameTotalEne_e, Word32 *IIReneLimiter_fx, @@ -1359,20 +1459,26 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( nBins = hSpatParamRendCom->num_freq_bands; /* Actually bins */ move16(); + set64_fx( hDiracDecBin->ChCrossRe, 0, nBins ); + set64_fx( hDiracDecBin->ChCrossIm, 0, nBins ); +#ifdef CHECK_CORRECTNESS set32_fx( hDiracDecBin->ChCrossRe_fx, 0, nBins ); set32_fx( hDiracDecBin->ChCrossIm_fx, 0, nBins ); - set16_fx( hDiracDecBin->ChCrossRe_e, 0, nBins ); set16_fx( hDiracDecBin->ChCrossIm_e, 0, nBins ); +#endif FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { + set64_fx( hDiracDecBin->ChEne[ch], 0, nBins ); +#ifdef CHECK_CORRECTNESS set32_fx( hDiracDecBin->ChEne_fx[ch], 0, nBins ); - set16_fx( hDiracDecBin->ChEne_e[ch], 0, nBins ); +#endif } - +#ifdef CHECK_CORRECTNESS set16_fx( subFrameTotalEne_e, 0, CLDFB_NO_CHANNELS_MAX ); +#endif /* Determine EQ for low bit rates (13.2 and 16.4 kbps) */ applyLowBitRateEQ = 0; @@ -1402,10 +1508,16 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( } /* Formulate input and target covariance matrices for this subframe */ + set64_fx( subFrameTotalEne, 0, CLDFB_NO_CHANNELS_MAX ); +#ifdef CHECK_CORRECTNESS set32_fx( subFrameTotalEne_fx, 0, CLDFB_NO_CHANNELS_MAX ); +#endif exp = sub( 63, shl( q, 1 ) ); // exp for the energy (inRe_fx * inRe_fx + inIm_fx * inIm_fx) computed below + Word32 rsh0 = ceil_log2(hSpatParamRendCom->subframe_nbslots[subframe]) - 1; + Word32 rsh1 = ceil_log2(BINAURAL_CHANNELS * hSpatParamRendCom->subframe_nbslots[subframe]) - 1; + /* Calculate input covariance matrix */ FOR( slot = 0; slot < hSpatParamRendCom->subframe_nbslots[subframe]; slot++ ) { @@ -1413,31 +1525,57 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( { FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { - Word32 instEne_fx; temp64 = W_mult0_32_32( inRe_fx[ch][slot][bin], inRe_fx[ch][slot][bin] ); // 2q temp64 = W_add( temp64, W_mult0_32_32( inIm_fx[ch][slot][bin], inIm_fx[ch][slot][bin] ) ); // 2q +#ifdef CHECK_CORRECTNESS + Word32 instEne_fx; exp1 = W_norm( temp64 ); instEne_fx = W_extract_h( W_shl( temp64, exp1 ) ); // 2q - 32 + exp1 /* exp of instEne_fx = 31 - (2q -32 + exp1) = 63 - 2q - exp1 = exp - exp1*/ - hDiracDecBin->ChEne_fx[ch][bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChEne_fx[ch][bin], hDiracDecBin->ChEne_e[ch][bin], instEne_fx, sub( exp, exp1 ), &hDiracDecBin->ChEne_e[ch][bin] ); subFrameTotalEne_fx[bin] = BASOP_Util_Add_Mant32Exp( subFrameTotalEne_fx[bin], subFrameTotalEne_e[bin], instEne_fx, sub( exp, exp1 ), &subFrameTotalEne_e[bin] ); move32(); move32(); +#endif + + hDiracDecBin->ChEne[ch][bin] = W_add( hDiracDecBin->ChEne[ch][bin], W_shr( temp64, rsh0 ) ); // 2q - rsh0 + subFrameTotalEne[bin] = W_add( subFrameTotalEne[bin], W_shr( temp64, rsh1 ) ); // 2q - rsh1 +#ifdef CHECK_CORRECTNESS + check(hDiracDecBin->ChEne[ch][bin], 2*q - rsh0, hDiracDecBin->ChEne_fx[ch][bin], hDiracDecBin->ChEne_e[ch][bin]); + check(subFrameTotalEne[bin], 2*q - rsh1, subFrameTotalEne_fx[bin], subFrameTotalEne_e[bin]); +#endif + move64(); + move64(); } temp64 = W_mult0_32_32( inRe_fx[0][slot][bin], inRe_fx[1][slot][bin] ); // 2q temp64 = W_add( temp64, W_mult0_32_32( inIm_fx[0][slot][bin], inIm_fx[1][slot][bin] ) ); // 2q +#ifdef CHECK_CORRECTNESS exp1 = W_norm( temp64 ); temp = W_extract_h( W_shl( temp64, exp1 ) ); // // 2q - 32 + exp1 hDiracDecBin->ChCrossRe_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossRe_e[bin], temp, sub( exp, exp1 ), &hDiracDecBin->ChCrossRe_e[bin] ); move32(); +#endif + + hDiracDecBin->ChCrossRe[bin] = W_add( hDiracDecBin->ChCrossRe[bin], W_shr( temp64, rsh0 ) ); // 2q - rsh0 +#ifdef CHECK_CORRECTNESS + check(hDiracDecBin->ChCrossRe[bin], 2*q - rsh0, hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossRe_e[bin]); +#endif + move64(); temp64 = W_mult0_32_32( inRe_fx[0][slot][bin], inIm_fx[1][slot][bin] ); // 2q temp64 = W_sub( temp64, W_mult0_32_32( inIm_fx[0][slot][bin], inRe_fx[1][slot][bin] ) ); // 2q +#ifdef CHECK_CORRECTNESS exp1 = W_norm( temp64 ); temp = W_extract_h( W_shl( temp64, exp1 ) ); // // 2q - 32 + exp1 hDiracDecBin->ChCrossIm_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossIm_fx[bin], hDiracDecBin->ChCrossIm_e[bin], temp, sub( exp, exp1 ), &hDiracDecBin->ChCrossIm_e[bin] ); move32(); +#endif + + hDiracDecBin->ChCrossIm[bin] = W_add( hDiracDecBin->ChCrossIm[bin], W_shr( temp64, rsh0 ) ); // 2q - rsh0 +#ifdef CHECK_CORRECTNESS + check(hDiracDecBin->ChCrossIm[bin], 2*q - rsh0, hDiracDecBin->ChCrossIm_fx[bin], hDiracDecBin->ChCrossIm_e[bin]); +#endif + move64(); } } @@ -1448,13 +1586,27 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( FOR( bin = LOW_BIT_RATE_BINAURAL_EQ_OFFSET; bin < lastEqBin; bin++ ) { +#ifdef CHECK_CORRECTNESS subFrameTotalEne_fx[bin] = Mpy_32_32( subFrameTotalEne_fx[bin], lowBitRateEQ_fx[bin] ); // exp = subFrameTotalEne_e[bin] move32(); +#endif + subFrameTotalEne[bin] = Mpy_64_32( subFrameTotalEne[bin], lowBitRateEQ_fx[bin] ); +#ifdef CHECK_CORRECTNESS + check(subFrameTotalEne[bin], 2*q - rsh1, subFrameTotalEne_fx[bin], subFrameTotalEne_e[bin]); +#endif + move64(); } FOR( ; bin < nBins; bin++ ) { +#ifdef CHECK_CORRECTNESS subFrameTotalEne_fx[bin] = Mpy_32_32( subFrameTotalEne_fx[bin], lowBitRateEQ_fx[lastEqBin] ); // exp = subFrameTotalEne_e[bin] move32(); +#endif + subFrameTotalEne[bin] = Mpy_64_32( subFrameTotalEne[bin], lowBitRateEQ_fx[lastEqBin] ); +#ifdef CHECK_CORRECTNESS + check(subFrameTotalEne[bin], 2*q - rsh1, subFrameTotalEne_fx[bin], subFrameTotalEne_e[bin]); +#endif + move64(); } } @@ -1463,11 +1615,17 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( IF( ( EQ_32( ivas_format, SBA_FORMAT ) || EQ_32( ivas_format, SBA_ISM_FORMAT ) ) && EQ_16( nchan_transport, 2 ) ) { Word32 tempRe, tempIm; + Word64 subFrameSumEne[CLDFB_NO_CHANNELS_MAX]; +#ifdef CHECK_CORRECTNESS Word32 subFrameSumEne_fx[CLDFB_NO_CHANNELS_MAX]; Word16 subFrameSumEne_e[CLDFB_NO_CHANNELS_MAX]; +#endif + set64_fx( subFrameSumEne, 0, CLDFB_NO_CHANNELS_MAX ); +#ifdef CHECK_CORRECTNESS set32_fx( subFrameSumEne_fx, 0, CLDFB_NO_CHANNELS_MAX ); set16_fx( subFrameSumEne_e, 0, CLDFB_NO_CHANNELS_MAX ); +#endif FOR( slot = 0; slot < hSpatParamRendCom->subframe_nbslots[subframe]; slot++ ) { FOR( bin = 0; bin < nBins; bin++ ) @@ -1475,14 +1633,23 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( tempRe = L_add( inRe_fx[0][slot][bin], inRe_fx[1][slot][bin] ); // q tempIm = L_add( inIm_fx[0][slot][bin], inIm_fx[1][slot][bin] ); // q temp64 = W_add( W_mult0_32_32( tempRe, tempRe ), W_mult0_32_32( tempIm, tempIm ) ); // 2q +#ifdef CHECK_CORRECTNESS exp1 = W_norm( temp64 ); - temp64 = W_shl( temp64, exp1 ); // 2q + exp1 - subFrameSumEne_fx[bin] = BASOP_Util_Add_Mant32Exp( subFrameSumEne_fx[bin], subFrameSumEne_e[bin], W_extract_h( temp64 ), sub( exp /* 63 - 2q */, exp1 ) /*31 - (2q + exp1 - 32)*/, &subFrameSumEne_e[bin] ); + temp = W_extract_h( W_shl( temp64, exp1 ) ); // 2q + exp1 + subFrameSumEne_fx[bin] = BASOP_Util_Add_Mant32Exp( subFrameSumEne_fx[bin], subFrameSumEne_e[bin], temp, sub( exp /* 63 - 2q */, exp1 ) /*31 - (2q + exp1 - 32)*/, &subFrameSumEne_e[bin] ); move32(); +#endif + + subFrameSumEne[bin] = W_add( subFrameSumEne[bin], W_shr( temp64, rsh1 ) ); +#ifdef CHECK_CORRECTNESS + check(subFrameSumEne[bin], 2*q - rsh1, subFrameSumEne_fx[bin], subFrameSumEne_e[bin]); +#endif + move64(); } } FOR( bin = 0; bin < nBins; bin++ ) { +#ifdef CHECK_CORRECTNESS subFrameTotalEne_e[bin] = sub( subFrameTotalEne_e[bin], 1 ); move16(); temp = L_shl_sat( subFrameTotalEne_fx[bin], sub( subFrameTotalEne_e[bin], subFrameSumEne_e[bin] ) ); // subFrameSumEne_e[bin] @@ -1493,6 +1660,12 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( subFrameTotalEne_e[bin] = subFrameSumEne_e[bin]; move16(); } +#endif + subFrameTotalEne[bin] = W_max( subFrameSumEne[bin], subFrameTotalEne[bin] ); +#ifdef CHECK_CORRECTNESS + check(subFrameTotalEne[bin], 2*q - rsh1, subFrameTotalEne_fx[bin], subFrameTotalEne_e[bin]); +#endif + move64(); } } @@ -1512,52 +1685,148 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( /* Temporally smooth cov mtx estimates for resulting mixing matrix stability. The design principle is that * the energy history (IIR) must not be more than double of the current frame energy. This provides more * robust performance at energy offsets when compared to typical IIR averaging. */ - Word16 num_e, den_e; - Word32 num, den; +#ifdef CHECK_CORRECTNESS + Word16 num_e, den_e, exp0; + Word32 num, den, div0, div2; num = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChEne_fx[0][bin], hDiracDecBin->ChEne_e[0][bin], hDiracDecBin->ChEne_fx[1][bin], hDiracDecBin->ChEne_e[1][bin], &num_e ); num = Mpy_32_32( num, IIReneLimiterFactor_fx ); /*Q = (31 - num_e + 26 - 31) = (26 - num_e)*/ den_e = 0; move16(); den = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChEnePrev_fx[0][bin], hDiracDecBin->ChEnePrev_e[0][bin], hDiracDecBin->ChEnePrev_fx[1][bin], hDiracDecBin->ChEnePrev_e[1][bin], &den_e ); den = L_max( 1, den ); - IIReneLimiter_fx[bin] = BASOP_Util_Divide3232_Scale_cadence( num, den, &exp ); - exp = add( sub( num_e, den_e ), add( 5, exp ) ); + IIReneLimiter_fx[bin] = div0 = BASOP_Util_Divide3232_Scale_cadence( num, den, &exp0 ); + exp = add( sub( num_e, den_e ), add( 5, exp0 ) ); + IF( L_shr_sat( IIReneLimiter_fx[bin], sub( 31, exp ) ) > 0 ) + { + IIReneLimiter_fx[bin] = div2 = ONE_IN_Q31; /*Q31*/ + move32(); + } + ELSE + { + IIReneLimiter_fx[bin] = div2 = L_shl( IIReneLimiter_fx[bin], exp ); /*Q31*/ + } +#endif + + Word64 num64, den64; + Word16 num1_e, den1_e, exp1; + Word32 num1, den1, div1, div3; + num64 = W_add( W_shr( hDiracDecBin->ChEne[0][bin], 1 ), W_shr( hDiracDecBin->ChEne[1][bin], 1 ) ); // 2q - rsh0 - 1 + num64 = Mpy_64_32( num64, IIReneLimiterFactor_fx ); // 2q - rsh0 - 1 - 5 +#ifdef CHECK_CORRECTNESS + check(num64, 2*q - rsh0 - 1 - 5, num, num_e + 5); + // printf("num0: %f\n", ldexp((double)num, (num_e + 5)-31)); + // printf("num1: %f\n", ldexp((double)num64, -(2*q - rsh0 - 1 - 5))); +#endif + den64 = W_add( W_shr( hDiracDecBin->ChEnePrev[0][bin], 1 ), W_shr( hDiracDecBin->ChEnePrev[1][bin], 1 ) ); // 2q - rsh0 - 1 +#ifdef CHECK_CORRECTNESS + check(den64, 2*q - rsh0 - 1, den, den_e); + // printf("den0: %f\n", ldexp((double)den, (den_e + 0)-31)); + // printf("den1: %f\n", ldexp((double)den64, -(2*q - rsh0 - 1))); +#endif + den64 = W_max( 1, den64 ); + num1_e = W_norm( num64 ); + den1_e = W_norm( den64 ); + num1 = W_extract_h( W_shl( num64, num1_e ) ); // 2q - rsh0 - 1 - 5 + num1_e - 32 + den1 = W_extract_h( W_shl( den64, den1_e ) ); // 2q - rsh0 - 1 + den1_e - 32 +#ifdef CHECK_CORRECTNESS + // printf("num2: %f\n", ldexp((double)num1, -(2*q - rsh0 - 1 - 5 + num1_e - 32))); + // printf("den2: %f\n", ldexp((double)den1, -(2*q - rsh0 - 1 + den1_e - 32))); +#endif + IIReneLimiter_fx[bin] = div1 = BASOP_Util_Divide3232_Scale_newton( num1, den1, &exp1 ); +#ifdef CHECK_CORRECTNESS + // printf("div0: %f\n", ldexp((double)div0, (num_e + 5 - den_e + exp0)-31)); + // printf("div1: %f\n", ldexp((double)div1, +exp1+5 - num1_e+den1_e-31)); +#endif + exp = add( sub( den1_e, num1_e ), add( 5, exp1 ) ); IF( L_shr_sat( IIReneLimiter_fx[bin], sub( 31, exp ) ) > 0 ) { - IIReneLimiter_fx[bin] = ONE_IN_Q31; /*Q31*/ + IIReneLimiter_fx[bin] = div3 = ONE_IN_Q31; /*Q31*/ move32(); } ELSE { - IIReneLimiter_fx[bin] = L_shl( IIReneLimiter_fx[bin], exp ); /*Q31*/ + IIReneLimiter_fx[bin] = div3 = L_shl( IIReneLimiter_fx[bin], exp ); /*Q31*/ } +#ifdef CHECK_CORRECTNESS + // double ddiv0 = ldexp((double)div2, -31); + // double ddiv1 = ldexp((double)div3, -31); + // double error = fabs(ddiv0 - ddiv1) / fabs(ddiv0); + // if (error >= 0.2) + // { + // printf("\n\n"); + // printf("div0: %f\n", ddiv0); + // printf("div1: %f\n", ddiv1); + // assert(false); + // } +#endif + +#ifdef CHECK_CORRECTNESS hDiracDecBin->ChCrossRe_fx[bin] = Mpy_32_32( hDiracDecBin->ChCrossRe_fx[bin], qualityBasedSmFactor_fx ); hDiracDecBin->ChCrossIm_fx[bin] = Mpy_32_32( hDiracDecBin->ChCrossIm_fx[bin], qualityBasedSmFactor_fx ); - move32(); move32(); move32(); move32(); +#endif + + hDiracDecBin->ChCrossRe[bin] = Mpy_64_32( hDiracDecBin->ChCrossRe[bin], qualityBasedSmFactor_fx ); // 2q - rsh0 + hDiracDecBin->ChCrossIm[bin] = Mpy_64_32( hDiracDecBin->ChCrossIm[bin], qualityBasedSmFactor_fx ); // 2q - rsh0 +#ifdef CHECK_CORRECTNESS + check(hDiracDecBin->ChCrossRe[bin], 2*q - rsh0, hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossRe_e[bin]); + check(hDiracDecBin->ChCrossIm[bin], 2*q - rsh0, hDiracDecBin->ChCrossIm_fx[bin], hDiracDecBin->ChCrossIm_e[bin]); +#endif + move64(); + move64(); + move64(); + move64(); FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { +#ifdef CHECK_CORRECTNESS hDiracDecBin->ChEne_fx[ch][bin] = Mpy_32_32( hDiracDecBin->ChEne_fx[ch][bin], qualityBasedSmFactor_fx ); move32(); +#endif + + hDiracDecBin->ChEne[ch][bin] = Mpy_64_32( hDiracDecBin->ChEne[ch][bin], qualityBasedSmFactor_fx ); +#ifdef CHECK_CORRECTNESS + check(hDiracDecBin->ChEne[ch][bin], 2*q - rsh0, hDiracDecBin->ChEne_fx[ch][bin], hDiracDecBin->ChEne_e[ch][bin]); +#endif + move64(); } +#ifdef CHECK_CORRECTNESS hDiracDecBin->ChCrossRe_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossRe_e[bin], Mpy_32_32( hDiracDecBin->ChCrossRePrev_fx[bin], IIReneLimiter_fx[bin] ), hDiracDecBin->ChCrossRePrev_e[bin], &hDiracDecBin->ChCrossRe_e[bin] ); hDiracDecBin->ChCrossIm_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossIm_fx[bin], hDiracDecBin->ChCrossIm_e[bin], Mpy_32_32( hDiracDecBin->ChCrossImPrev_fx[bin], IIReneLimiter_fx[bin] ), hDiracDecBin->ChCrossImPrev_e[bin], &hDiracDecBin->ChCrossIm_e[bin] ); move32(); move32(); +#endif + + hDiracDecBin->ChCrossRe[bin] = W_add( hDiracDecBin->ChCrossRe[bin], Mpy_64_32( hDiracDecBin->ChCrossRePrev[bin], IIReneLimiter_fx[bin] ) ); + hDiracDecBin->ChCrossIm[bin] = W_add( hDiracDecBin->ChCrossIm[bin], Mpy_64_32( hDiracDecBin->ChCrossImPrev[bin], IIReneLimiter_fx[bin] ) ); +#ifdef CHECK_CORRECTNESS + check(hDiracDecBin->ChCrossRe[bin], 2*q - rsh0, hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossRe_e[bin]); + check(hDiracDecBin->ChCrossIm[bin], 2*q - rsh0, hDiracDecBin->ChCrossIm_fx[bin], hDiracDecBin->ChCrossIm_e[bin]); +#endif + move64(); + move64(); FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { +#ifdef CHECK_CORRECTNESS hDiracDecBin->ChEne_fx[ch][bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChEne_fx[ch][bin], hDiracDecBin->ChEne_e[ch][bin], Mpy_32_32( hDiracDecBin->ChEnePrev_fx[ch][bin], IIReneLimiter_fx[bin] ), hDiracDecBin->ChEnePrev_e[ch][bin], &hDiracDecBin->ChEne_e[ch][bin] ); move32(); +#endif + + hDiracDecBin->ChEne[ch][bin] = W_add( hDiracDecBin->ChEne[ch][bin], Mpy_64_32( hDiracDecBin->ChEnePrev[ch][bin], IIReneLimiter_fx[bin] ) ); +#ifdef CHECK_CORRECTNESS + check(hDiracDecBin->ChEne[ch][bin], 2*q - rsh0, hDiracDecBin->ChEne_fx[ch][bin], hDiracDecBin->ChEne_e[ch][bin]); +#endif + move64(); } /* Store energy values and coefficients for next round */ +#ifdef CHECK_CORRECTNESS hDiracDecBin->ChCrossRePrev_fx[bin] = hDiracDecBin->ChCrossRe_fx[bin]; move32(); hDiracDecBin->ChCrossImPrev_fx[bin] = hDiracDecBin->ChCrossIm_fx[bin]; @@ -1566,14 +1835,77 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( move16(); hDiracDecBin->ChCrossImPrev_e[bin] = hDiracDecBin->ChCrossIm_e[bin]; move16(); +#endif + + hDiracDecBin->ChCrossRePrev[bin] = hDiracDecBin->ChCrossRe[bin]; + move64(); + hDiracDecBin->ChCrossImPrev[bin] = hDiracDecBin->ChCrossIm[bin]; + move64(); FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { +#ifdef CHECK_CORRECTNESS hDiracDecBin->ChEnePrev_fx[ch][bin] = hDiracDecBin->ChEne_fx[ch][bin]; move32(); hDiracDecBin->ChEnePrev_e[ch][bin] = hDiracDecBin->ChEne_e[ch][bin]; move16(); +#endif + hDiracDecBin->ChEnePrev[ch][bin] = hDiracDecBin->ChEne[ch][bin]; + move64(); + } + + // FIX + Word16 shl; + for( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) + { + shl = W_norm(hDiracDecBin->ChEne[ch][bin]); + hDiracDecBin->ChEne_fx[ch][bin] = W_extract_h(W_shl(hDiracDecBin->ChEne[ch][bin], shl)); + hDiracDecBin->ChEne_e[ch][bin] = 63 - 2 * q + rsh0 - shl; +#ifdef WMOPS + multiCounter[currCounter].W_norm--; + multiCounter[currCounter].W_shl--; + multiCounter[currCounter].W_extract_h--; +#endif /* if WMOPS */ +#ifdef CHECK_CORRECTNESS + check(hDiracDecBin->ChEne[ch][bin], 2*q - rsh0, hDiracDecBin->ChEne_fx[ch][bin], hDiracDecBin->ChEne_e[ch][bin]); +#endif } + + shl = W_norm(hDiracDecBin->ChCrossRe[bin]); + hDiracDecBin->ChCrossRe_fx[bin] = W_extract_h(W_shl(hDiracDecBin->ChCrossRe[bin], shl)); + hDiracDecBin->ChCrossRe_e[bin] = 63 - 2 * q + rsh0 - shl; +#ifdef WMOPS + multiCounter[currCounter].W_norm--; + multiCounter[currCounter].W_shl--; + multiCounter[currCounter].W_extract_h--; +#endif /* if WMOPS */ +#ifdef CHECK_CORRECTNESS + check(hDiracDecBin->ChCrossRe[bin], 2*q - rsh0, hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossRe_e[bin]); +#endif + + shl = W_norm(hDiracDecBin->ChCrossIm[bin]); + hDiracDecBin->ChCrossIm_fx[bin] = W_extract_h(W_shl(hDiracDecBin->ChCrossIm[bin], shl)); + hDiracDecBin->ChCrossIm_e[bin] = 63 - 2 * q + rsh0 - shl; +#ifdef WMOPS + multiCounter[currCounter].W_norm--; + multiCounter[currCounter].W_shl--; + multiCounter[currCounter].W_extract_h--; +#endif /* if WMOPS */ +#ifdef CHECK_CORRECTNESS + check(hDiracDecBin->ChCrossIm[bin], 2*q - rsh0, hDiracDecBin->ChCrossIm_fx[bin], hDiracDecBin->ChCrossIm_e[bin]); +#endif + + shl = W_norm(subFrameTotalEne[bin]); + subFrameTotalEne_fx[bin] = W_extract_h(W_shl(subFrameTotalEne[bin], shl)); + subFrameTotalEne_e[bin] = 63 - 2 * q + rsh1 - shl; +#ifdef WMOPS + multiCounter[currCounter].W_norm--; + multiCounter[currCounter].W_shl--; + multiCounter[currCounter].W_extract_h--; +#endif /* if WMOPS */ +#ifdef CHECK_CORRECTNESS + check(subFrameTotalEne[bin], 2*q - rsh1, subFrameTotalEne_fx[bin], subFrameTotalEne_e[bin]); +#endif } return; @@ -6479,6 +6811,7 @@ static void ivas_masa_ext_rend_parambin_internal_fx( Word32 reverbIm_fx[BINAURAL_CHANNELS][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX]; Word32 decorrRe_fx[BINAURAL_CHANNELS][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX]; Word32 decorrIm_fx[BINAURAL_CHANNELS][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX]; + Word64 subFrameTotalEne[CLDFB_NO_CHANNELS_MAX]; Word32 subFrameTotalEne_fx[CLDFB_NO_CHANNELS_MAX]; Word16 subFrameTotalEne_e[CLDFB_NO_CHANNELS_MAX]; Word32 IIReneLimiter_fx[CLDFB_NO_CHANNELS_MAX]; @@ -6614,7 +6947,7 @@ static void ivas_masa_ext_rend_parambin_internal_fx( max_band_decorr = hDiracDecBin->h_freq_domain_decorr_ap_params->max_band_decorr; ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( hDiracDecBin, hSpatParamRendCom, &config_data, Cldfb_RealBuffer_in_fx, Cldfb_ImagBuffer_in_fx, subframe, - subFrameTotalEne_fx, subFrameTotalEne_e, IIReneLimiter_fx, q_inp ); + subFrameTotalEne, subFrameTotalEne_fx, subFrameTotalEne_e, IIReneLimiter_fx, q_inp ); ivas_dirac_dec_binaural_formulate_target_covariance_matrices_fx( hDiracDecBin, hSpatParamRendCom, &config_data, Rmat_fx, subframe, hCombinedOrientationData && hCombinedOrientationData->enableCombinedOrientation[hCombinedOrientationData->subframe_idx] > 0, subFrameTotalEne_fx, subFrameTotalEne_e, IIReneLimiter_fx, NULL ); diff --git a/lib_rend/ivas_stat_rend.h b/lib_rend/ivas_stat_rend.h index aec8eb3b9..d9913bb3f 100644 --- a/lib_rend/ivas_stat_rend.h +++ b/lib_rend/ivas_stat_rend.h @@ -612,6 +612,13 @@ typedef struct ivas_dirac_dec_binaural_data_structure Word16 q_processMtxDec; Word16 q_processMtxDecPrev; + Word64 ChEne[BINAURAL_CHANNELS][CLDFB_NO_CHANNELS_MAX]; + Word64 ChEnePrev[BINAURAL_CHANNELS][CLDFB_NO_CHANNELS_MAX]; + Word64 ChCrossRe[CLDFB_NO_CHANNELS_MAX]; + Word64 ChCrossRePrev[CLDFB_NO_CHANNELS_MAX]; + Word64 ChCrossIm[CLDFB_NO_CHANNELS_MAX]; + Word64 ChCrossImPrev[CLDFB_NO_CHANNELS_MAX]; + Word32 ChEnePrev_fx[BINAURAL_CHANNELS][CLDFB_NO_CHANNELS_MAX]; Word32 ChCrossRePrev_fx[CLDFB_NO_CHANNELS_MAX]; Word32 ChCrossImPrev_fx[CLDFB_NO_CHANNELS_MAX]; diff --git a/lib_rend/lib_rend_fx.c b/lib_rend/lib_rend_fx.c index 5827cc275..759a8f26a 100644 --- a/lib_rend/lib_rend_fx.c +++ b/lib_rend/lib_rend_fx.c @@ -9924,11 +9924,14 @@ static ivas_error ivas_masa_ext_rend_parambin_init( move16(); move16(); move16(); + set64_fx( hDiracDecBin->ChEnePrev[j], 0, nBins ); set_zero_fx( hDiracDecBin->ChEnePrev_fx[j], nBins ); set_zero_fx( hDiracDecBin->ChEneOutPrev_fx[j], nBins ); set16_fx( hDiracDecBin->ChEnePrev_e[j], 0, nBins ); set16_fx( hDiracDecBin->ChEneOutPrev_e[j], 0, nBins ); } + set64_fx( hDiracDecBin->ChCrossRePrev, 0, nBins ); + set64_fx( hDiracDecBin->ChCrossImPrev, 0, nBins ); set_zero_fx( hDiracDecBin->ChCrossRePrev_fx, nBins ); set_zero_fx( hDiracDecBin->ChCrossImPrev_fx, nBins ); set_zero_fx( hDiracDecBin->ChCrossReOutPrev_fx, nBins ); -- GitLab From 0b4b6d4aeb8858fa73e54bfc5be4dcb4645e82ab Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Fri, 24 Oct 2025 11:16:41 +0100 Subject: [PATCH 03/26] Add switch. --- lib_com/options.h | 6 + .../ivas_dirac_dec_binaural_functions_fx.c | 339 ++++++------------ lib_rend/ivas_stat_rend.h | 2 + lib_rend/lib_rend_fx.c | 12 +- 4 files changed, 121 insertions(+), 238 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index 2c92cba9a..5a8735a96 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -126,4 +126,10 @@ /* #################### End BASOP porting switches ############################ */ +/* #################### Start BASOP optimization switches ############################ */ + +#define NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES /* Dolby: task 2157: optimize ivas_dirac_dec_binaural_formulate_input_covariance_matrices */ + +/* #################### End BASOP optimization switches ############################ */ + #endif diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 466253803..d3826cfba 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -207,18 +207,24 @@ ivas_error ivas_dirac_dec_init_binaural_data_fx( set16_fx( hDiracDecBin->processMtxDecRe_fx[j][k], 0, nBins ); set16_fx( hDiracDecBin->processMtxDecIm_fx[j][k], 0, nBins ); } +#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES set64_fx( hDiracDecBin->ChEnePrev[j], 0, nBins ); +#else set32_fx( hDiracDecBin->ChEnePrev_fx[j], 0, nBins ); - set32_fx( hDiracDecBin->ChEneOutPrev_fx[j], 0, nBins ); set16_fx( hDiracDecBin->ChEnePrev_e[j], 0, nBins ); +#endif + set32_fx( hDiracDecBin->ChEneOutPrev_fx[j], 0, nBins ); set16_fx( hDiracDecBin->ChEneOutPrev_e[j], 0, nBins ); } +#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES set64_fx( hDiracDecBin->ChCrossRePrev, 0, nBins ); set64_fx( hDiracDecBin->ChCrossImPrev, 0, nBins ); +#else set32_fx( hDiracDecBin->ChCrossRePrev_fx, 0, nBins ); set32_fx( hDiracDecBin->ChCrossImPrev_fx, 0, nBins ); set16_fx( hDiracDecBin->ChCrossRePrev_e, 0, nBins ); set16_fx( hDiracDecBin->ChCrossImPrev_e, 0, nBins ); +#endif set32_fx( hDiracDecBin->ChCrossReOutPrev_fx, 0, nBins ); set32_fx( hDiracDecBin->ChCrossImOutPrev_fx, 0, nBins ); set16_fx( hDiracDecBin->ChCrossReOutPrev_e, 0, nBins ); @@ -1359,65 +1365,15 @@ UWord32 ceil_log2(UWord32 x) } } -Word64 W_mult0_u32_i32( UWord32 L_var1, Word32 L_var2 ) -{ - Word64 L64_var_out; - L64_var_out = (UWord64) L_var1 * (Word64) L_var2; -#ifdef WMOPS - multiCounter[currCounter].W_mult0_32_32++; -#endif /* if WMOPS */ - return L64_var_out; -} - -// #define CHECK_CORRECTNESS - Word64 Mpy_64_32( Word64 W_var1, Word32 L_var2 ) { -#if 0 - Word64 tmph, tmpl, var_out; - tmpl = W_mult0_u32_i32( W_extract_l( W_var1 ), L_var2 ); // 2 - tmph = W_mult_32_32( W_extract_h( W_var1 ), L_var2 ); // 2 - var_out = W_add(tmph, W_shr(tmpl, 31)); // 2 - return var_out; -#else Word32 var1_l; Word64 var_out; var1_l = W_extract_l( W_var1 ); // 1 var_out = W_mult0_32_32( L_and( var1_l, 1 ), L_var2 ); // 2 var_out = W_mac_32_32( var_out, L_lshr( var1_l, 1 ), L_var2 ); // 2 var_out = W_mac_32_32( W_shr( var_out, 31 ), W_extract_h( W_var1 ), L_var2); // 3 -#ifdef CHECK_CORRECTNESS - __int128_t z = ((__int128_t)W_var1 * (__int128_t)L_var2) >> 31; - assert(var_out == (Word64)z); -#endif return var_out; -#endif -} - -void check(Word64 value, Word32 q, Word64 mant, Word32 exp) -{ - Word32 qq = 31 - exp; - Word32 shift = qq - q; - - if (shift >= 0) - { - mant >>= shift; - } - else - { - value >>= -shift; - } - - Word64 error = abs(mant - value); - if (error >= 4096) - { - printf("\n\n"); - printf("0x%016llx\n", value); - printf("%d\n", q); - printf("0x%016llx\n", mant); - printf("%d\n", qq); - assert(false); - } } static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( @@ -1459,9 +1415,10 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( nBins = hSpatParamRendCom->num_freq_bands; /* Actually bins */ move16(); +#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES set64_fx( hDiracDecBin->ChCrossRe, 0, nBins ); set64_fx( hDiracDecBin->ChCrossIm, 0, nBins ); -#ifdef CHECK_CORRECTNESS +#else set32_fx( hDiracDecBin->ChCrossRe_fx, 0, nBins ); set32_fx( hDiracDecBin->ChCrossIm_fx, 0, nBins ); set16_fx( hDiracDecBin->ChCrossRe_e, 0, nBins ); @@ -1470,13 +1427,14 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { +#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES set64_fx( hDiracDecBin->ChEne[ch], 0, nBins ); -#ifdef CHECK_CORRECTNESS +#else set32_fx( hDiracDecBin->ChEne_fx[ch], 0, nBins ); set16_fx( hDiracDecBin->ChEne_e[ch], 0, nBins ); #endif } -#ifdef CHECK_CORRECTNESS +#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES set16_fx( subFrameTotalEne_e, 0, CLDFB_NO_CHANNELS_MAX ); #endif @@ -1508,15 +1466,18 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( } /* Formulate input and target covariance matrices for this subframe */ +#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES set64_fx( subFrameTotalEne, 0, CLDFB_NO_CHANNELS_MAX ); -#ifdef CHECK_CORRECTNESS +#else set32_fx( subFrameTotalEne_fx, 0, CLDFB_NO_CHANNELS_MAX ); #endif exp = sub( 63, shl( q, 1 ) ); // exp for the energy (inRe_fx * inRe_fx + inIm_fx * inIm_fx) computed below +#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES Word32 rsh0 = ceil_log2(hSpatParamRendCom->subframe_nbslots[subframe]) - 1; Word32 rsh1 = ceil_log2(BINAURAL_CHANNELS * hSpatParamRendCom->subframe_nbslots[subframe]) - 1; +#endif /* Calculate input covariance matrix */ FOR( slot = 0; slot < hSpatParamRendCom->subframe_nbslots[subframe]; slot++ ) @@ -1527,7 +1488,12 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( { temp64 = W_mult0_32_32( inRe_fx[ch][slot][bin], inRe_fx[ch][slot][bin] ); // 2q temp64 = W_add( temp64, W_mult0_32_32( inIm_fx[ch][slot][bin], inIm_fx[ch][slot][bin] ) ); // 2q -#ifdef CHECK_CORRECTNESS +#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES + hDiracDecBin->ChEne[ch][bin] = W_add( hDiracDecBin->ChEne[ch][bin], W_shr( temp64, rsh0 ) ); // 2q - rsh0 + subFrameTotalEne[bin] = W_add( subFrameTotalEne[bin], W_shr( temp64, rsh1 ) ); // 2q - rsh1 + move64(); + move64(); +#else Word32 instEne_fx; exp1 = W_norm( temp64 ); instEne_fx = W_extract_h( W_shl( temp64, exp1 ) ); // 2q - 32 + exp1 @@ -1537,45 +1503,30 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( move32(); move32(); #endif - - hDiracDecBin->ChEne[ch][bin] = W_add( hDiracDecBin->ChEne[ch][bin], W_shr( temp64, rsh0 ) ); // 2q - rsh0 - subFrameTotalEne[bin] = W_add( subFrameTotalEne[bin], W_shr( temp64, rsh1 ) ); // 2q - rsh1 -#ifdef CHECK_CORRECTNESS - check(hDiracDecBin->ChEne[ch][bin], 2*q - rsh0, hDiracDecBin->ChEne_fx[ch][bin], hDiracDecBin->ChEne_e[ch][bin]); - check(subFrameTotalEne[bin], 2*q - rsh1, subFrameTotalEne_fx[bin], subFrameTotalEne_e[bin]); -#endif - move64(); - move64(); } temp64 = W_mult0_32_32( inRe_fx[0][slot][bin], inRe_fx[1][slot][bin] ); // 2q temp64 = W_add( temp64, W_mult0_32_32( inIm_fx[0][slot][bin], inIm_fx[1][slot][bin] ) ); // 2q -#ifdef CHECK_CORRECTNESS +#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES + hDiracDecBin->ChCrossRe[bin] = W_add( hDiracDecBin->ChCrossRe[bin], W_shr( temp64, rsh0 ) ); // 2q - rsh0 + move64(); +#else exp1 = W_norm( temp64 ); temp = W_extract_h( W_shl( temp64, exp1 ) ); // // 2q - 32 + exp1 hDiracDecBin->ChCrossRe_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossRe_e[bin], temp, sub( exp, exp1 ), &hDiracDecBin->ChCrossRe_e[bin] ); move32(); #endif - hDiracDecBin->ChCrossRe[bin] = W_add( hDiracDecBin->ChCrossRe[bin], W_shr( temp64, rsh0 ) ); // 2q - rsh0 -#ifdef CHECK_CORRECTNESS - check(hDiracDecBin->ChCrossRe[bin], 2*q - rsh0, hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossRe_e[bin]); -#endif - move64(); - temp64 = W_mult0_32_32( inRe_fx[0][slot][bin], inIm_fx[1][slot][bin] ); // 2q temp64 = W_sub( temp64, W_mult0_32_32( inIm_fx[0][slot][bin], inRe_fx[1][slot][bin] ) ); // 2q -#ifdef CHECK_CORRECTNESS +#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES + hDiracDecBin->ChCrossIm[bin] = W_add( hDiracDecBin->ChCrossIm[bin], W_shr( temp64, rsh0 ) ); // 2q - rsh0 + move64(); +#else exp1 = W_norm( temp64 ); temp = W_extract_h( W_shl( temp64, exp1 ) ); // // 2q - 32 + exp1 hDiracDecBin->ChCrossIm_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossIm_fx[bin], hDiracDecBin->ChCrossIm_e[bin], temp, sub( exp, exp1 ), &hDiracDecBin->ChCrossIm_e[bin] ); move32(); #endif - - hDiracDecBin->ChCrossIm[bin] = W_add( hDiracDecBin->ChCrossIm[bin], W_shr( temp64, rsh0 ) ); // 2q - rsh0 -#ifdef CHECK_CORRECTNESS - check(hDiracDecBin->ChCrossIm[bin], 2*q - rsh0, hDiracDecBin->ChCrossIm_fx[bin], hDiracDecBin->ChCrossIm_e[bin]); -#endif - move64(); } } @@ -1586,27 +1537,23 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( FOR( bin = LOW_BIT_RATE_BINAURAL_EQ_OFFSET; bin < lastEqBin; bin++ ) { -#ifdef CHECK_CORRECTNESS +#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES + subFrameTotalEne[bin] = Mpy_64_32( subFrameTotalEne[bin], lowBitRateEQ_fx[bin] ); + move64(); +#else subFrameTotalEne_fx[bin] = Mpy_32_32( subFrameTotalEne_fx[bin], lowBitRateEQ_fx[bin] ); // exp = subFrameTotalEne_e[bin] move32(); #endif - subFrameTotalEne[bin] = Mpy_64_32( subFrameTotalEne[bin], lowBitRateEQ_fx[bin] ); -#ifdef CHECK_CORRECTNESS - check(subFrameTotalEne[bin], 2*q - rsh1, subFrameTotalEne_fx[bin], subFrameTotalEne_e[bin]); -#endif - move64(); } FOR( ; bin < nBins; bin++ ) { -#ifdef CHECK_CORRECTNESS +#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES + subFrameTotalEne[bin] = Mpy_64_32( subFrameTotalEne[bin], lowBitRateEQ_fx[lastEqBin] ); + move64(); +#else subFrameTotalEne_fx[bin] = Mpy_32_32( subFrameTotalEne_fx[bin], lowBitRateEQ_fx[lastEqBin] ); // exp = subFrameTotalEne_e[bin] move32(); #endif - subFrameTotalEne[bin] = Mpy_64_32( subFrameTotalEne[bin], lowBitRateEQ_fx[lastEqBin] ); -#ifdef CHECK_CORRECTNESS - check(subFrameTotalEne[bin], 2*q - rsh1, subFrameTotalEne_fx[bin], subFrameTotalEne_e[bin]); -#endif - move64(); } } @@ -1615,14 +1562,15 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( IF( ( EQ_32( ivas_format, SBA_FORMAT ) || EQ_32( ivas_format, SBA_ISM_FORMAT ) ) && EQ_16( nchan_transport, 2 ) ) { Word32 tempRe, tempIm; +#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES Word64 subFrameSumEne[CLDFB_NO_CHANNELS_MAX]; -#ifdef CHECK_CORRECTNESS +#else Word32 subFrameSumEne_fx[CLDFB_NO_CHANNELS_MAX]; Word16 subFrameSumEne_e[CLDFB_NO_CHANNELS_MAX]; #endif - +#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES set64_fx( subFrameSumEne, 0, CLDFB_NO_CHANNELS_MAX ); -#ifdef CHECK_CORRECTNESS +#else set32_fx( subFrameSumEne_fx, 0, CLDFB_NO_CHANNELS_MAX ); set16_fx( subFrameSumEne_e, 0, CLDFB_NO_CHANNELS_MAX ); #endif @@ -1633,23 +1581,23 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( tempRe = L_add( inRe_fx[0][slot][bin], inRe_fx[1][slot][bin] ); // q tempIm = L_add( inIm_fx[0][slot][bin], inIm_fx[1][slot][bin] ); // q temp64 = W_add( W_mult0_32_32( tempRe, tempRe ), W_mult0_32_32( tempIm, tempIm ) ); // 2q -#ifdef CHECK_CORRECTNESS +#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES + subFrameSumEne[bin] = W_add( subFrameSumEne[bin], W_shr( temp64, rsh1 ) ); + move64(); +#else exp1 = W_norm( temp64 ); temp = W_extract_h( W_shl( temp64, exp1 ) ); // 2q + exp1 subFrameSumEne_fx[bin] = BASOP_Util_Add_Mant32Exp( subFrameSumEne_fx[bin], subFrameSumEne_e[bin], temp, sub( exp /* 63 - 2q */, exp1 ) /*31 - (2q + exp1 - 32)*/, &subFrameSumEne_e[bin] ); move32(); #endif - - subFrameSumEne[bin] = W_add( subFrameSumEne[bin], W_shr( temp64, rsh1 ) ); -#ifdef CHECK_CORRECTNESS - check(subFrameSumEne[bin], 2*q - rsh1, subFrameSumEne_fx[bin], subFrameSumEne_e[bin]); -#endif - move64(); } } FOR( bin = 0; bin < nBins; bin++ ) { -#ifdef CHECK_CORRECTNESS +#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES + subFrameTotalEne[bin] = W_max( subFrameSumEne[bin], subFrameTotalEne[bin] ); + move64(); +#else subFrameTotalEne_e[bin] = sub( subFrameTotalEne_e[bin], 1 ); move16(); temp = L_shl_sat( subFrameTotalEne_fx[bin], sub( subFrameTotalEne_e[bin], subFrameSumEne_e[bin] ) ); // subFrameSumEne_e[bin] @@ -1661,11 +1609,6 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( move16(); } #endif - subFrameTotalEne[bin] = W_max( subFrameSumEne[bin], subFrameTotalEne[bin] ); -#ifdef CHECK_CORRECTNESS - check(subFrameTotalEne[bin], 2*q - rsh1, subFrameTotalEne_fx[bin], subFrameTotalEne_e[bin]); -#endif - move64(); } } @@ -1685,58 +1628,19 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( /* Temporally smooth cov mtx estimates for resulting mixing matrix stability. The design principle is that * the energy history (IIR) must not be more than double of the current frame energy. This provides more * robust performance at energy offsets when compared to typical IIR averaging. */ -#ifdef CHECK_CORRECTNESS - Word16 num_e, den_e, exp0; - Word32 num, den, div0, div2; - num = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChEne_fx[0][bin], hDiracDecBin->ChEne_e[0][bin], hDiracDecBin->ChEne_fx[1][bin], hDiracDecBin->ChEne_e[1][bin], &num_e ); - num = Mpy_32_32( num, IIReneLimiterFactor_fx ); /*Q = (31 - num_e + 26 - 31) = (26 - num_e)*/ - den_e = 0; - move16(); - den = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChEnePrev_fx[0][bin], hDiracDecBin->ChEnePrev_e[0][bin], hDiracDecBin->ChEnePrev_fx[1][bin], hDiracDecBin->ChEnePrev_e[1][bin], &den_e ); - den = L_max( 1, den ); - IIReneLimiter_fx[bin] = div0 = BASOP_Util_Divide3232_Scale_cadence( num, den, &exp0 ); - exp = add( sub( num_e, den_e ), add( 5, exp0 ) ); - IF( L_shr_sat( IIReneLimiter_fx[bin], sub( 31, exp ) ) > 0 ) - { - IIReneLimiter_fx[bin] = div2 = ONE_IN_Q31; /*Q31*/ - move32(); - } - ELSE - { - IIReneLimiter_fx[bin] = div2 = L_shl( IIReneLimiter_fx[bin], exp ); /*Q31*/ - } -#endif - +#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES Word64 num64, den64; Word16 num1_e, den1_e, exp1; Word32 num1, den1, div1, div3; num64 = W_add( W_shr( hDiracDecBin->ChEne[0][bin], 1 ), W_shr( hDiracDecBin->ChEne[1][bin], 1 ) ); // 2q - rsh0 - 1 num64 = Mpy_64_32( num64, IIReneLimiterFactor_fx ); // 2q - rsh0 - 1 - 5 -#ifdef CHECK_CORRECTNESS - check(num64, 2*q - rsh0 - 1 - 5, num, num_e + 5); - // printf("num0: %f\n", ldexp((double)num, (num_e + 5)-31)); - // printf("num1: %f\n", ldexp((double)num64, -(2*q - rsh0 - 1 - 5))); -#endif den64 = W_add( W_shr( hDiracDecBin->ChEnePrev[0][bin], 1 ), W_shr( hDiracDecBin->ChEnePrev[1][bin], 1 ) ); // 2q - rsh0 - 1 -#ifdef CHECK_CORRECTNESS - check(den64, 2*q - rsh0 - 1, den, den_e); - // printf("den0: %f\n", ldexp((double)den, (den_e + 0)-31)); - // printf("den1: %f\n", ldexp((double)den64, -(2*q - rsh0 - 1))); -#endif den64 = W_max( 1, den64 ); num1_e = W_norm( num64 ); den1_e = W_norm( den64 ); num1 = W_extract_h( W_shl( num64, num1_e ) ); // 2q - rsh0 - 1 - 5 + num1_e - 32 den1 = W_extract_h( W_shl( den64, den1_e ) ); // 2q - rsh0 - 1 + den1_e - 32 -#ifdef CHECK_CORRECTNESS - // printf("num2: %f\n", ldexp((double)num1, -(2*q - rsh0 - 1 - 5 + num1_e - 32))); - // printf("den2: %f\n", ldexp((double)den1, -(2*q - rsh0 - 1 + den1_e - 32))); -#endif IIReneLimiter_fx[bin] = div1 = BASOP_Util_Divide3232_Scale_newton( num1, den1, &exp1 ); -#ifdef CHECK_CORRECTNESS - // printf("div0: %f\n", ldexp((double)div0, (num_e + 5 - den_e + exp0)-31)); - // printf("div1: %f\n", ldexp((double)div1, +exp1+5 - num1_e+den1_e-31)); -#endif exp = add( sub( den1_e, num1_e ), add( 5, exp1 ) ); IF( L_shr_sat( IIReneLimiter_fx[bin], sub( 31, exp ) ) > 0 ) { @@ -1747,21 +1651,36 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( { IIReneLimiter_fx[bin] = div3 = L_shl( IIReneLimiter_fx[bin], exp ); /*Q31*/ } - -#ifdef CHECK_CORRECTNESS - // double ddiv0 = ldexp((double)div2, -31); - // double ddiv1 = ldexp((double)div3, -31); - // double error = fabs(ddiv0 - ddiv1) / fabs(ddiv0); - // if (error >= 0.2) - // { - // printf("\n\n"); - // printf("div0: %f\n", ddiv0); - // printf("div1: %f\n", ddiv1); - // assert(false); - // } +#else + Word16 num_e, den_e, exp0; + Word32 num, den, div0, div2; + num = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChEne_fx[0][bin], hDiracDecBin->ChEne_e[0][bin], hDiracDecBin->ChEne_fx[1][bin], hDiracDecBin->ChEne_e[1][bin], &num_e ); + num = Mpy_32_32( num, IIReneLimiterFactor_fx ); /*Q = (31 - num_e + 26 - 31) = (26 - num_e)*/ + den_e = 0; + move16(); + den = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChEnePrev_fx[0][bin], hDiracDecBin->ChEnePrev_e[0][bin], hDiracDecBin->ChEnePrev_fx[1][bin], hDiracDecBin->ChEnePrev_e[1][bin], &den_e ); + den = L_max( 1, den ); + IIReneLimiter_fx[bin] = div0 = BASOP_Util_Divide3232_Scale_cadence( num, den, &exp0 ); + exp = add( sub( num_e, den_e ), add( 5, exp0 ) ); + IF( L_shr_sat( IIReneLimiter_fx[bin], sub( 31, exp ) ) > 0 ) + { + IIReneLimiter_fx[bin] = div2 = ONE_IN_Q31; /*Q31*/ + move32(); + } + ELSE + { + IIReneLimiter_fx[bin] = div2 = L_shl( IIReneLimiter_fx[bin], exp ); /*Q31*/ + } #endif -#ifdef CHECK_CORRECTNESS +#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES + hDiracDecBin->ChCrossRe[bin] = Mpy_64_32( hDiracDecBin->ChCrossRe[bin], qualityBasedSmFactor_fx ); // 2q - rsh0 + hDiracDecBin->ChCrossIm[bin] = Mpy_64_32( hDiracDecBin->ChCrossIm[bin], qualityBasedSmFactor_fx ); // 2q - rsh0 + move64(); + move64(); + move64(); + move64(); +#else hDiracDecBin->ChCrossRe_fx[bin] = Mpy_32_32( hDiracDecBin->ChCrossRe_fx[bin], qualityBasedSmFactor_fx ); hDiracDecBin->ChCrossIm_fx[bin] = Mpy_32_32( hDiracDecBin->ChCrossIm_fx[bin], qualityBasedSmFactor_fx ); move32(); @@ -1770,63 +1689,47 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( move32(); #endif - hDiracDecBin->ChCrossRe[bin] = Mpy_64_32( hDiracDecBin->ChCrossRe[bin], qualityBasedSmFactor_fx ); // 2q - rsh0 - hDiracDecBin->ChCrossIm[bin] = Mpy_64_32( hDiracDecBin->ChCrossIm[bin], qualityBasedSmFactor_fx ); // 2q - rsh0 -#ifdef CHECK_CORRECTNESS - check(hDiracDecBin->ChCrossRe[bin], 2*q - rsh0, hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossRe_e[bin]); - check(hDiracDecBin->ChCrossIm[bin], 2*q - rsh0, hDiracDecBin->ChCrossIm_fx[bin], hDiracDecBin->ChCrossIm_e[bin]); -#endif - move64(); - move64(); - move64(); - move64(); - FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { -#ifdef CHECK_CORRECTNESS +#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES + hDiracDecBin->ChEne[ch][bin] = Mpy_64_32( hDiracDecBin->ChEne[ch][bin], qualityBasedSmFactor_fx ); + move64(); +#else hDiracDecBin->ChEne_fx[ch][bin] = Mpy_32_32( hDiracDecBin->ChEne_fx[ch][bin], qualityBasedSmFactor_fx ); move32(); #endif - - hDiracDecBin->ChEne[ch][bin] = Mpy_64_32( hDiracDecBin->ChEne[ch][bin], qualityBasedSmFactor_fx ); -#ifdef CHECK_CORRECTNESS - check(hDiracDecBin->ChEne[ch][bin], 2*q - rsh0, hDiracDecBin->ChEne_fx[ch][bin], hDiracDecBin->ChEne_e[ch][bin]); -#endif - move64(); } -#ifdef CHECK_CORRECTNESS +#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES + hDiracDecBin->ChCrossRe[bin] = W_add( hDiracDecBin->ChCrossRe[bin], Mpy_64_32( hDiracDecBin->ChCrossRePrev[bin], IIReneLimiter_fx[bin] ) ); + hDiracDecBin->ChCrossIm[bin] = W_add( hDiracDecBin->ChCrossIm[bin], Mpy_64_32( hDiracDecBin->ChCrossImPrev[bin], IIReneLimiter_fx[bin] ) ); + move64(); + move64(); +#else hDiracDecBin->ChCrossRe_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossRe_e[bin], Mpy_32_32( hDiracDecBin->ChCrossRePrev_fx[bin], IIReneLimiter_fx[bin] ), hDiracDecBin->ChCrossRePrev_e[bin], &hDiracDecBin->ChCrossRe_e[bin] ); hDiracDecBin->ChCrossIm_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossIm_fx[bin], hDiracDecBin->ChCrossIm_e[bin], Mpy_32_32( hDiracDecBin->ChCrossImPrev_fx[bin], IIReneLimiter_fx[bin] ), hDiracDecBin->ChCrossImPrev_e[bin], &hDiracDecBin->ChCrossIm_e[bin] ); move32(); move32(); #endif - hDiracDecBin->ChCrossRe[bin] = W_add( hDiracDecBin->ChCrossRe[bin], Mpy_64_32( hDiracDecBin->ChCrossRePrev[bin], IIReneLimiter_fx[bin] ) ); - hDiracDecBin->ChCrossIm[bin] = W_add( hDiracDecBin->ChCrossIm[bin], Mpy_64_32( hDiracDecBin->ChCrossImPrev[bin], IIReneLimiter_fx[bin] ) ); -#ifdef CHECK_CORRECTNESS - check(hDiracDecBin->ChCrossRe[bin], 2*q - rsh0, hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossRe_e[bin]); - check(hDiracDecBin->ChCrossIm[bin], 2*q - rsh0, hDiracDecBin->ChCrossIm_fx[bin], hDiracDecBin->ChCrossIm_e[bin]); -#endif - move64(); - move64(); - FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { -#ifdef CHECK_CORRECTNESS +#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES + hDiracDecBin->ChEne[ch][bin] = W_add( hDiracDecBin->ChEne[ch][bin], Mpy_64_32( hDiracDecBin->ChEnePrev[ch][bin], IIReneLimiter_fx[bin] ) ); + move64(); +#else hDiracDecBin->ChEne_fx[ch][bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChEne_fx[ch][bin], hDiracDecBin->ChEne_e[ch][bin], Mpy_32_32( hDiracDecBin->ChEnePrev_fx[ch][bin], IIReneLimiter_fx[bin] ), hDiracDecBin->ChEnePrev_e[ch][bin], &hDiracDecBin->ChEne_e[ch][bin] ); move32(); #endif - - hDiracDecBin->ChEne[ch][bin] = W_add( hDiracDecBin->ChEne[ch][bin], Mpy_64_32( hDiracDecBin->ChEnePrev[ch][bin], IIReneLimiter_fx[bin] ) ); -#ifdef CHECK_CORRECTNESS - check(hDiracDecBin->ChEne[ch][bin], 2*q - rsh0, hDiracDecBin->ChEne_fx[ch][bin], hDiracDecBin->ChEne_e[ch][bin]); -#endif - move64(); } /* Store energy values and coefficients for next round */ -#ifdef CHECK_CORRECTNESS +#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES + hDiracDecBin->ChCrossRePrev[bin] = hDiracDecBin->ChCrossRe[bin]; + move64(); + hDiracDecBin->ChCrossImPrev[bin] = hDiracDecBin->ChCrossIm[bin]; + move64(); +#else hDiracDecBin->ChCrossRePrev_fx[bin] = hDiracDecBin->ChCrossRe_fx[bin]; move32(); hDiracDecBin->ChCrossImPrev_fx[bin] = hDiracDecBin->ChCrossIm_fx[bin]; @@ -1837,23 +1740,20 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( move16(); #endif - hDiracDecBin->ChCrossRePrev[bin] = hDiracDecBin->ChCrossRe[bin]; - move64(); - hDiracDecBin->ChCrossImPrev[bin] = hDiracDecBin->ChCrossIm[bin]; - move64(); - FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { -#ifdef CHECK_CORRECTNESS +#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES + hDiracDecBin->ChEnePrev[ch][bin] = hDiracDecBin->ChEne[ch][bin]; + move64(); +#else hDiracDecBin->ChEnePrev_fx[ch][bin] = hDiracDecBin->ChEne_fx[ch][bin]; move32(); hDiracDecBin->ChEnePrev_e[ch][bin] = hDiracDecBin->ChEne_e[ch][bin]; move16(); #endif - hDiracDecBin->ChEnePrev[ch][bin] = hDiracDecBin->ChEne[ch][bin]; - move64(); } +#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES // FIX Word16 shl; for( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) @@ -1861,50 +1761,19 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( shl = W_norm(hDiracDecBin->ChEne[ch][bin]); hDiracDecBin->ChEne_fx[ch][bin] = W_extract_h(W_shl(hDiracDecBin->ChEne[ch][bin], shl)); hDiracDecBin->ChEne_e[ch][bin] = 63 - 2 * q + rsh0 - shl; -#ifdef WMOPS - multiCounter[currCounter].W_norm--; - multiCounter[currCounter].W_shl--; - multiCounter[currCounter].W_extract_h--; -#endif /* if WMOPS */ -#ifdef CHECK_CORRECTNESS - check(hDiracDecBin->ChEne[ch][bin], 2*q - rsh0, hDiracDecBin->ChEne_fx[ch][bin], hDiracDecBin->ChEne_e[ch][bin]); -#endif } shl = W_norm(hDiracDecBin->ChCrossRe[bin]); hDiracDecBin->ChCrossRe_fx[bin] = W_extract_h(W_shl(hDiracDecBin->ChCrossRe[bin], shl)); hDiracDecBin->ChCrossRe_e[bin] = 63 - 2 * q + rsh0 - shl; -#ifdef WMOPS - multiCounter[currCounter].W_norm--; - multiCounter[currCounter].W_shl--; - multiCounter[currCounter].W_extract_h--; -#endif /* if WMOPS */ -#ifdef CHECK_CORRECTNESS - check(hDiracDecBin->ChCrossRe[bin], 2*q - rsh0, hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossRe_e[bin]); -#endif shl = W_norm(hDiracDecBin->ChCrossIm[bin]); hDiracDecBin->ChCrossIm_fx[bin] = W_extract_h(W_shl(hDiracDecBin->ChCrossIm[bin], shl)); hDiracDecBin->ChCrossIm_e[bin] = 63 - 2 * q + rsh0 - shl; -#ifdef WMOPS - multiCounter[currCounter].W_norm--; - multiCounter[currCounter].W_shl--; - multiCounter[currCounter].W_extract_h--; -#endif /* if WMOPS */ -#ifdef CHECK_CORRECTNESS - check(hDiracDecBin->ChCrossIm[bin], 2*q - rsh0, hDiracDecBin->ChCrossIm_fx[bin], hDiracDecBin->ChCrossIm_e[bin]); -#endif shl = W_norm(subFrameTotalEne[bin]); subFrameTotalEne_fx[bin] = W_extract_h(W_shl(subFrameTotalEne[bin], shl)); subFrameTotalEne_e[bin] = 63 - 2 * q + rsh1 - shl; -#ifdef WMOPS - multiCounter[currCounter].W_norm--; - multiCounter[currCounter].W_shl--; - multiCounter[currCounter].W_extract_h--; -#endif /* if WMOPS */ -#ifdef CHECK_CORRECTNESS - check(subFrameTotalEne[bin], 2*q - rsh1, subFrameTotalEne_fx[bin], subFrameTotalEne_e[bin]); #endif } diff --git a/lib_rend/ivas_stat_rend.h b/lib_rend/ivas_stat_rend.h index d9913bb3f..fe79832bd 100644 --- a/lib_rend/ivas_stat_rend.h +++ b/lib_rend/ivas_stat_rend.h @@ -612,12 +612,14 @@ typedef struct ivas_dirac_dec_binaural_data_structure Word16 q_processMtxDec; Word16 q_processMtxDecPrev; +#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES Word64 ChEne[BINAURAL_CHANNELS][CLDFB_NO_CHANNELS_MAX]; Word64 ChEnePrev[BINAURAL_CHANNELS][CLDFB_NO_CHANNELS_MAX]; Word64 ChCrossRe[CLDFB_NO_CHANNELS_MAX]; Word64 ChCrossRePrev[CLDFB_NO_CHANNELS_MAX]; Word64 ChCrossIm[CLDFB_NO_CHANNELS_MAX]; Word64 ChCrossImPrev[CLDFB_NO_CHANNELS_MAX]; +#endif Word32 ChEnePrev_fx[BINAURAL_CHANNELS][CLDFB_NO_CHANNELS_MAX]; Word32 ChCrossRePrev_fx[CLDFB_NO_CHANNELS_MAX]; diff --git a/lib_rend/lib_rend_fx.c b/lib_rend/lib_rend_fx.c index 759a8f26a..6429df931 100644 --- a/lib_rend/lib_rend_fx.c +++ b/lib_rend/lib_rend_fx.c @@ -9924,20 +9924,26 @@ static ivas_error ivas_masa_ext_rend_parambin_init( move16(); move16(); move16(); +#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES set64_fx( hDiracDecBin->ChEnePrev[j], 0, nBins ); +#else set_zero_fx( hDiracDecBin->ChEnePrev_fx[j], nBins ); - set_zero_fx( hDiracDecBin->ChEneOutPrev_fx[j], nBins ); set16_fx( hDiracDecBin->ChEnePrev_e[j], 0, nBins ); +#endif + set_zero_fx( hDiracDecBin->ChEneOutPrev_fx[j], nBins ); set16_fx( hDiracDecBin->ChEneOutPrev_e[j], 0, nBins ); } +#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES set64_fx( hDiracDecBin->ChCrossRePrev, 0, nBins ); set64_fx( hDiracDecBin->ChCrossImPrev, 0, nBins ); +#else set_zero_fx( hDiracDecBin->ChCrossRePrev_fx, nBins ); set_zero_fx( hDiracDecBin->ChCrossImPrev_fx, nBins ); - set_zero_fx( hDiracDecBin->ChCrossReOutPrev_fx, nBins ); - set_zero_fx( hDiracDecBin->ChCrossImOutPrev_fx, nBins ); set16_fx( hDiracDecBin->ChCrossRePrev_e, 0, nBins ); set16_fx( hDiracDecBin->ChCrossImPrev_e, 0, nBins ); +#endif + set_zero_fx( hDiracDecBin->ChCrossReOutPrev_fx, nBins ); + set_zero_fx( hDiracDecBin->ChCrossImOutPrev_fx, nBins ); set16_fx( hDiracDecBin->ChCrossReOutPrev_e, 0, nBins ); set16_fx( hDiracDecBin->ChCrossImOutPrev_e, 0, nBins ); hDiracDecBin->renderStereoOutputInsteadOfBinaural = 0; -- GitLab From 28b527da02544b4179a3fed8ade22156bdc0c8cf Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Fri, 24 Oct 2025 12:54:02 +0100 Subject: [PATCH 04/26] clang format. --- .../ivas_dirac_dec_binaural_functions_fx.c | 72 +++++++++---------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index d3826cfba..059537628 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -1330,36 +1330,36 @@ static void ivas_dirac_dec_decorrelate_slot_fx( return; } -UWord32 ceil_pow2(UWord32 x) +UWord32 ceil_pow2( UWord32 x ) { --x; - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - x |= x >> 8; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; x |= x >> 16; ++x; return x; } -UWord32 popcnt(UWord32 x) -{ +UWord32 popcnt( UWord32 x ) +{ // it takes once cycle - x = (x & 0x55555555) + ((x & 0xAAAAAAAA) >> 1); - x = (x & 0x33333333) + ((x & 0xCCCCCCCC) >> 2); - x = (x & 0x0F0F0F0F) + ((x & 0xF0F0F0F0) >> 4); - x = (x & 0x00FF00FF) + ((x & 0xFF00FF00) >> 8); - x = (x & 0x0000FFFF) + ((x & 0xFFFF0000) >> 16); + x = ( x & 0x55555555 ) + ( ( x & 0xAAAAAAAA ) >> 1 ); + x = ( x & 0x33333333 ) + ( ( x & 0xCCCCCCCC ) >> 2 ); + x = ( x & 0x0F0F0F0F ) + ( ( x & 0xF0F0F0F0 ) >> 4 ); + x = ( x & 0x00FF00FF ) + ( ( x & 0xFF00FF00 ) >> 8 ); + x = ( x & 0x0000FFFF ) + ( ( x & 0xFFFF0000 ) >> 16 ); return x; } -UWord32 ceil_log2(UWord32 x) +UWord32 ceil_log2( UWord32 x ) { - if (x) + if ( x ) { - return popcnt(ceil_pow2(x) - 1); + return popcnt( ceil_pow2( x ) - 1 ); } - else + else { return 0; } @@ -1369,10 +1369,10 @@ Word64 Mpy_64_32( Word64 W_var1, Word32 L_var2 ) { Word32 var1_l; Word64 var_out; - var1_l = W_extract_l( W_var1 ); // 1 - var_out = W_mult0_32_32( L_and( var1_l, 1 ), L_var2 ); // 2 - var_out = W_mac_32_32( var_out, L_lshr( var1_l, 1 ), L_var2 ); // 2 - var_out = W_mac_32_32( W_shr( var_out, 31 ), W_extract_h( W_var1 ), L_var2); // 3 + var1_l = W_extract_l( W_var1 ); // 1 + var_out = W_mult0_32_32( L_and( var1_l, 1 ), L_var2 ); // 2 + var_out = W_mac_32_32( var_out, L_lshr( var1_l, 1 ), L_var2 ); // 2 + var_out = W_mac_32_32( W_shr( var_out, 31 ), W_extract_h( W_var1 ), L_var2 ); // 3 return var_out; } @@ -1383,7 +1383,7 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( Word32 inRe_fx[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], /*q*/ Word32 inIm_fx[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], /*q*/ const Word16 subframe, - Word64 *subFrameTotalEne, + Word64 *subFrameTotalEne, Word32 *subFrameTotalEne_fx, Word16 *subFrameTotalEne_e, Word32 *IIReneLimiter_fx, @@ -1475,8 +1475,8 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( exp = sub( 63, shl( q, 1 ) ); // exp for the energy (inRe_fx * inRe_fx + inIm_fx * inIm_fx) computed below #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES - Word32 rsh0 = ceil_log2(hSpatParamRendCom->subframe_nbslots[subframe]) - 1; - Word32 rsh1 = ceil_log2(BINAURAL_CHANNELS * hSpatParamRendCom->subframe_nbslots[subframe]) - 1; + Word32 rsh0 = ceil_log2( hSpatParamRendCom->subframe_nbslots[subframe] ) - 1; + Word32 rsh1 = ceil_log2( BINAURAL_CHANNELS * hSpatParamRendCom->subframe_nbslots[subframe] ) - 1; #endif /* Calculate input covariance matrix */ @@ -1632,8 +1632,8 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( Word64 num64, den64; Word16 num1_e, den1_e, exp1; Word32 num1, den1, div1, div3; - num64 = W_add( W_shr( hDiracDecBin->ChEne[0][bin], 1 ), W_shr( hDiracDecBin->ChEne[1][bin], 1 ) ); // 2q - rsh0 - 1 - num64 = Mpy_64_32( num64, IIReneLimiterFactor_fx ); // 2q - rsh0 - 1 - 5 + num64 = W_add( W_shr( hDiracDecBin->ChEne[0][bin], 1 ), W_shr( hDiracDecBin->ChEne[1][bin], 1 ) ); // 2q - rsh0 - 1 + num64 = Mpy_64_32( num64, IIReneLimiterFactor_fx ); // 2q - rsh0 - 1 - 5 den64 = W_add( W_shr( hDiracDecBin->ChEnePrev[0][bin], 1 ), W_shr( hDiracDecBin->ChEnePrev[1][bin], 1 ) ); // 2q - rsh0 - 1 den64 = W_max( 1, den64 ); num1_e = W_norm( num64 ); @@ -1674,8 +1674,8 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( #endif #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES - hDiracDecBin->ChCrossRe[bin] = Mpy_64_32( hDiracDecBin->ChCrossRe[bin], qualityBasedSmFactor_fx ); // 2q - rsh0 - hDiracDecBin->ChCrossIm[bin] = Mpy_64_32( hDiracDecBin->ChCrossIm[bin], qualityBasedSmFactor_fx ); // 2q - rsh0 + hDiracDecBin->ChCrossRe[bin] = Mpy_64_32( hDiracDecBin->ChCrossRe[bin], qualityBasedSmFactor_fx ); // 2q - rsh0 + hDiracDecBin->ChCrossIm[bin] = Mpy_64_32( hDiracDecBin->ChCrossIm[bin], qualityBasedSmFactor_fx ); // 2q - rsh0 move64(); move64(); move64(); @@ -1756,23 +1756,23 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES // FIX Word16 shl; - for( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) + for ( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { - shl = W_norm(hDiracDecBin->ChEne[ch][bin]); - hDiracDecBin->ChEne_fx[ch][bin] = W_extract_h(W_shl(hDiracDecBin->ChEne[ch][bin], shl)); + shl = W_norm( hDiracDecBin->ChEne[ch][bin] ); + hDiracDecBin->ChEne_fx[ch][bin] = W_extract_h( W_shl( hDiracDecBin->ChEne[ch][bin], shl ) ); hDiracDecBin->ChEne_e[ch][bin] = 63 - 2 * q + rsh0 - shl; } - shl = W_norm(hDiracDecBin->ChCrossRe[bin]); - hDiracDecBin->ChCrossRe_fx[bin] = W_extract_h(W_shl(hDiracDecBin->ChCrossRe[bin], shl)); + shl = W_norm( hDiracDecBin->ChCrossRe[bin] ); + hDiracDecBin->ChCrossRe_fx[bin] = W_extract_h( W_shl( hDiracDecBin->ChCrossRe[bin], shl ) ); hDiracDecBin->ChCrossRe_e[bin] = 63 - 2 * q + rsh0 - shl; - shl = W_norm(hDiracDecBin->ChCrossIm[bin]); - hDiracDecBin->ChCrossIm_fx[bin] = W_extract_h(W_shl(hDiracDecBin->ChCrossIm[bin], shl)); + shl = W_norm( hDiracDecBin->ChCrossIm[bin] ); + hDiracDecBin->ChCrossIm_fx[bin] = W_extract_h( W_shl( hDiracDecBin->ChCrossIm[bin], shl ) ); hDiracDecBin->ChCrossIm_e[bin] = 63 - 2 * q + rsh0 - shl; - shl = W_norm(subFrameTotalEne[bin]); - subFrameTotalEne_fx[bin] = W_extract_h(W_shl(subFrameTotalEne[bin], shl)); + shl = W_norm( subFrameTotalEne[bin] ); + subFrameTotalEne_fx[bin] = W_extract_h( W_shl( subFrameTotalEne[bin], shl ) ); subFrameTotalEne_e[bin] = 63 - 2 * q + rsh1 - shl; #endif } -- GitLab From 837da740df1c1e4271842c656a31dda5c1672f74 Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Mon, 27 Oct 2025 12:12:47 +0000 Subject: [PATCH 05/26] Addressed Thomas Dettbarn's comments. --- lib_basop/enh64.c | 97 ------------------------------------------ lib_basop/enh64.h | 104 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 102 insertions(+), 99 deletions(-) diff --git a/lib_basop/enh64.c b/lib_basop/enh64.c index b53adb79a..7812f5af0 100644 --- a/lib_basop/enh64.c +++ b/lib_basop/enh64.c @@ -40,103 +40,6 @@ *****************************************************************************/ #ifdef ENH_64_BIT_OPERATOR -/*______________________________________________________________________________ -| | -| Function Name : W_min | -| | -| Purpose : | -| | -| Compares L64_var1 and L64_var2 and returns the minimum value. | -| | -| Complexity weight : 1 | -| | -| Inputs : | -| | -| L64_var1 64 bit long signed integer (Word64) whose value falls in the | -| range : 0x80000000 00000000LL <= L64_var1 <= 0x7fffffff ffffffffLL. | -| | -| L64_var2 64 bit long signed integer (Word64) whose value falls in the | -| range : 0x80000000 00000000LL <= L64_var2 <= 0x7fffffff ffffffffLL. | -| | -| Outputs : | -| | -| none | -| | -| Return Value : | -| | -| L64_var_out | -| 64 bit long signed integer (Word64) whose value falls in the | -| range : 0x80000000 00000000LL <= L64_var_out <= 0x7fffffff ffffffffLL. | -|______________________________________________________________________________| -*/ -Word64 W_min( Word64 L64_var1, Word64 L64_var2 ) -{ - Word64 L64_var_out; - - if ( L64_var1 <= L64_var2 ) - { - L64_var_out = L64_var1; - } - else - { - L64_var_out = L64_var2; - } - -#ifdef WMOPS - multiCounter[currCounter].W_min++; -#endif /* ifdef WMOPS */ - - return ( L64_var_out ); -} - -/*______________________________________________________________________________ -| | -| Function Name : W_max | -| | -| Purpose : | -| | -| Compares L64_var1 and L64_var2 and returns the maximum value. | -| | -| Complexity weight : 1 | -| | -| Inputs : | -| | -| L64_var1 64 bit long signed integer (Word64) whose value falls in the | -| range : 0x80000000 00000000LL <= L64_var1 <= 0x7fffffff ffffffffLL. | -| | -| L64_var2 64 bit long signed integer (Word64) whose value falls in the | -| range : 0x80000000 00000000LL <= L64_var2 <= 0x7fffffff ffffffffLL. | -| | -| Outputs : | -| | -| none | -| | -| Return Value : | -| | -| L64_var_out | -| 64 bit long signed integer (Word64) whose value falls in the | -| range : 0x80000000 00000000LL <= L64_var_out <= 0x7fffffff ffffffffLL. | -|______________________________________________________________________________| -*/ -Word64 W_max( Word64 L64_var1, Word64 L64_var2 ) -{ - Word64 L64_var_out; - - if ( L64_var1 >= L64_var2 ) - { - L64_var_out = L64_var1; - } - else - { - L64_var_out = L64_var2; - } - -#ifdef WMOPS - multiCounter[currCounter].W_max++; -#endif /* ifdef WMOPS */ - - return ( L64_var_out ); -} /*___________________________________________________________________________ | | diff --git a/lib_basop/enh64.h b/lib_basop/enh64.h index d690708da..ab21d5b0c 100644 --- a/lib_basop/enh64.h +++ b/lib_basop/enh64.h @@ -21,8 +21,108 @@ * *****************************************************************************/ #ifdef ENH_64_BIT_OPERATOR -Word64 W_min( Word64 L64_var1, Word64 L64_var2 ); -Word64 W_max( Word64 L64_var1, Word64 L64_var2 ); + + +/*______________________________________________________________________________ +| | +| Function Name : W_min | +| | +| Purpose : | +| | +| Compares L64_var1 and L64_var2 and returns the minimum value. | +| | +| Complexity weight : 1 | +| | +| Inputs : | +| | +| L64_var1 64 bit long signed integer (Word64) whose value falls in the | +| range : 0x80000000 00000000LL <= L64_var1 <= 0x7fffffff ffffffffLL. | +| | +| L64_var2 64 bit long signed integer (Word64) whose value falls in the | +| range : 0x80000000 00000000LL <= L64_var2 <= 0x7fffffff ffffffffLL. | +| | +| Outputs : | +| | +| none | +| | +| Return Value : | +| | +| L64_var_out | +| 64 bit long signed integer (Word64) whose value falls in the | +| range : 0x80000000 00000000LL <= L64_var_out <= 0x7fffffff ffffffffLL. | +|______________________________________________________________________________| +*/ +static __inline Word64 W_min( Word64 L64_var1, Word64 L64_var2 ) +{ + Word64 L64_var_out; + + if ( L64_var1 <= L64_var2 ) + { + L64_var_out = L64_var1; + } + else + { + L64_var_out = L64_var2; + } + +#ifdef WMOPS + multiCounter[currCounter].W_min++; +#endif /* ifdef WMOPS */ + + return ( L64_var_out ); +} + + +/*______________________________________________________________________________ +| | +| Function Name : W_max | +| | +| Purpose : | +| | +| Compares L64_var1 and L64_var2 and returns the maximum value. | +| | +| Complexity weight : 1 | +| | +| Inputs : | +| | +| L64_var1 64 bit long signed integer (Word64) whose value falls in the | +| range : 0x80000000 00000000LL <= L64_var1 <= 0x7fffffff ffffffffLL. | +| | +| L64_var2 64 bit long signed integer (Word64) whose value falls in the | +| range : 0x80000000 00000000LL <= L64_var2 <= 0x7fffffff ffffffffLL. | +| | +| Outputs : | +| | +| none | +| | +| Return Value : | +| | +| L64_var_out | +| 64 bit long signed integer (Word64) whose value falls in the | +| range : 0x80000000 00000000LL <= L64_var_out <= 0x7fffffff ffffffffLL. | +|______________________________________________________________________________| +*/ +static __inline Word64 W_max( Word64 L64_var1, Word64 L64_var2 ) +{ + Word64 L64_var_out; + + if ( L64_var1 >= L64_var2 ) + { + L64_var_out = L64_var1; + } + else + { + L64_var_out = L64_var2; + } + +#ifdef WMOPS + multiCounter[currCounter].W_max++; +#endif /* ifdef WMOPS */ + + return ( L64_var_out ); +} + + Word64 W_add_nosat( Word64 L64_var1, Word64 L64_var2 ); Word64 W_sub_nosat( Word64 L64_var1, Word64 L64_var2 ); Word64 W_shl( Word64 L64_var1, Word16 var2 ); -- GitLab From 024b3128dfee95ca8cc869625aae47673a70f8be Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Mon, 27 Oct 2025 12:15:21 +0000 Subject: [PATCH 06/26] Revert changes in enh64.c. --- lib_basop/enh64.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib_basop/enh64.c b/lib_basop/enh64.c index 7812f5af0..8bffb620c 100644 --- a/lib_basop/enh64.c +++ b/lib_basop/enh64.c @@ -41,6 +41,7 @@ #ifdef ENH_64_BIT_OPERATOR + /*___________________________________________________________________________ | | | Function Name : W_add_nosat | @@ -83,6 +84,7 @@ Word64 W_add_nosat( Word64 L64_var1, Word64 L64_var2 ) return L64_var_out; } + /*___________________________________________________________________________ | | | Function Name : W_sub_nosat | -- GitLab From bb3fed4139ce4894f683d7735ebad96797ba20fe Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Mon, 27 Oct 2025 10:33:41 +0000 Subject: [PATCH 07/26] Revert some changes and improve code. --- .gitignore | 2 - CMakeLists.txt | 6 +- lib_com/options.h | 12 ++-- .../ivas_dirac_dec_binaural_functions_fx.c | 58 ++++++++++--------- 4 files changed, 39 insertions(+), 39 deletions(-) diff --git a/.gitignore b/.gitignore index e03b08671..9dcfcdcf8 100644 --- a/.gitignore +++ b/.gitignore @@ -84,5 +84,3 @@ coan_out_* /scripts /tests /pytest.ini -/build -audio.wav diff --git a/CMakeLists.txt b/CMakeLists.txt index 53c3a84c7..ab640932f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -117,12 +117,8 @@ endif() # configuration options for all platforms set(WMOPS OFF CACHE BOOL "enable WMOPS") -set(WMOPS_DETAIL OFF CACHE BOOL "enable WMOPS detail") if(WMOPS) add_definitions("-DWMOPS=1") - if(WMOPS_DETAIL) - add_definitions("-DWMOPS_DETAIL=1") - endif() endif() project(stereo-evs LANGUAGES C) @@ -220,7 +216,7 @@ target_link_libraries(ISAR_post_rend lib_basop lib_isar lib_util lib_com) target_include_directories(ISAR_post_rend PRIVATE lib_basop lib_isar) add_executable(ambi_converter apps/ambi_converter.c) -target_link_libraries(ambi_converter lib_util lib_com lib_basop lib_debug) +target_link_libraries(ambi_converter lib_util lib_com lib_basop) if(UNIX) target_link_libraries(ambi_converter m) endif() diff --git a/lib_com/options.h b/lib_com/options.h index 5a8735a96..23d699447 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -55,11 +55,13 @@ #define SUPPORT_JBM_TRACEFILE /* Support for JBM tracefile, which is needed for 3GPP objective/subjective testing, but not relevant for real-world implementations */ -/*#define WMOPS*/ /* Activate complexity and memory counters */ -/*#define WMOPS_PER_FRAME*/ /* Output per-frame complexity (writes one float value per frame to the file "wmops_analysis") */ -/*#define WMOPS_DETAIL*/ /* Output detailed complexity printout for every function. Increases runtime overhead */ -/*#define WMOPS_WC_FRAME_ANALYSIS*/ /* Output detailed complexity analysis for the worst-case frame */ -/*#define MEM_COUNT_DETAILS*/ /* Output detailed memory analysis for the worst-case frame (writes to the file "mem_analysis.csv") */ +/*#define WMOPS*/ /* Activate complexity and memory counters */ +#ifdef WMOPS +#define WMOPS_PER_FRAME /* Output per-frame complexity (writes one float value per frame to the file "wmops_analysis") */ +#define WMOPS_DETAIL /* Output detailed complexity printout for every function. Increases runtime overhead */ +#define WMOPS_WC_FRAME_ANALYSIS /* Output detailed complexity analysis for the worst-case frame */ +/*define MEM_COUNT_DETAILS*/ /* Output detailed memory analysis for the worst-case frame (writes to the file "mem_analysis.csv") */ +#endif /*#define DISABLE_LIMITER*/ diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 059537628..4f3f0cc08 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -98,7 +98,7 @@ static void ivas_dirac_dec_binaural_internal_fx( Decoder_Struct *st_ivas, COMBIN static void ivas_dirac_dec_decorrelate_slot_fx( DIRAC_DEC_BIN_HANDLE hDiracDecBin, const Word16 num_freq_bands, const Word16 slot, Word32 inRe[][CLDFB_SLOTS_PER_SUBFRAME] /*q_inp*/[CLDFB_NO_CHANNELS_MAX], Word32 inIm[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX] /*q_inp*/, const Word16 q_inp, Word32 decRe[][CLDFB_NO_CHANNELS_MAX], Word32 decIm[][CLDFB_NO_CHANNELS_MAX], Word16 *q_out ); -static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( DIRAC_DEC_BIN_HANDLE hDiracDecBin, SPAT_PARAM_REND_COMMON_DATA_HANDLE hSpatParamRendCom, PARAMBIN_REND_CONFIG_HANDLE hConfig, Word32 inRe[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], Word32 inIm[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], const Word16 subframe, Word64 *subFrameTotalEne, Word32 *subFrameTotalEne_fx, Word16 *subFrameTotalEne_e, Word32 *IIReneLimiter, Word16 q ); +static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( DIRAC_DEC_BIN_HANDLE hDiracDecBin, SPAT_PARAM_REND_COMMON_DATA_HANDLE hSpatParamRendCom, PARAMBIN_REND_CONFIG_HANDLE hConfig, Word32 inRe[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], Word32 inIm[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], const Word16 subframe, Word64 *subFrameTotalEne, Word32 *subFrameTotalEne_fx, Word16 *subFrameTotalEne_e, Word32 *IIReneLimiter, const Word16 q ); static void ivas_dirac_dec_binaural_formulate_target_covariance_matrices_fx( DIRAC_DEC_BIN_HANDLE hDiracDecBin, const SPAT_PARAM_REND_COMMON_DATA_HANDLE hSpatParamRendCom, const PARAMBIN_REND_CONFIG_HANDLE hConfig, Word32 Rmat[3][3], const Word16 subframe, const Word16 isHeadtracked, const Word32 *subFrameTotalEne, Word16 *subFrameTotalEne_e, const Word32 *IIReneLimiter, const MASA_ISM_DATA_HANDLE hMasaIsmData ); @@ -1398,9 +1398,13 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( IVAS_FORMAT ivas_format; Word32 ivas_total_brate; Word16 nchan_transport; +#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES + Word16 rsh0, rsh1, exp0, exp1; +#else Word16 exp, exp1; - Word64 temp64; Word32 temp; +#endif + Word64 temp64; ivas_format = hConfig->ivas_format; move32(); @@ -1472,11 +1476,13 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( set32_fx( subFrameTotalEne_fx, 0, CLDFB_NO_CHANNELS_MAX ); #endif - exp = sub( 63, shl( q, 1 ) ); // exp for the energy (inRe_fx * inRe_fx + inIm_fx * inIm_fx) computed below - #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES - Word32 rsh0 = ceil_log2( hSpatParamRendCom->subframe_nbslots[subframe] ) - 1; - Word32 rsh1 = ceil_log2( BINAURAL_CHANNELS * hSpatParamRendCom->subframe_nbslots[subframe] ) - 1; + rsh0 = ceil_log2( hSpatParamRendCom->subframe_nbslots[subframe] ) - 1; + rsh1 = ceil_log2( BINAURAL_CHANNELS * hSpatParamRendCom->subframe_nbslots[subframe] ) - 1; + exp0 = add( sub( 63, shl( q, 1 ) ), rsh0 ); + exp1 = add( sub( 63, shl( q, 1 ) ), rsh1 ); +#else + exp = sub( 63, shl( q, 1 ) ); // exp for the energy (inRe_fx * inRe_fx + inIm_fx * inIm_fx) computed below #endif /* Calculate input covariance matrix */ @@ -1628,48 +1634,47 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( /* Temporally smooth cov mtx estimates for resulting mixing matrix stability. The design principle is that * the energy history (IIR) must not be more than double of the current frame energy. This provides more * robust performance at energy offsets when compared to typical IIR averaging. */ + Word16 num_e, den_e; + Word32 num, den; #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES + Word16 exp; Word64 num64, den64; - Word16 num1_e, den1_e, exp1; - Word32 num1, den1, div1, div3; num64 = W_add( W_shr( hDiracDecBin->ChEne[0][bin], 1 ), W_shr( hDiracDecBin->ChEne[1][bin], 1 ) ); // 2q - rsh0 - 1 num64 = Mpy_64_32( num64, IIReneLimiterFactor_fx ); // 2q - rsh0 - 1 - 5 den64 = W_add( W_shr( hDiracDecBin->ChEnePrev[0][bin], 1 ), W_shr( hDiracDecBin->ChEnePrev[1][bin], 1 ) ); // 2q - rsh0 - 1 den64 = W_max( 1, den64 ); - num1_e = W_norm( num64 ); - den1_e = W_norm( den64 ); - num1 = W_extract_h( W_shl( num64, num1_e ) ); // 2q - rsh0 - 1 - 5 + num1_e - 32 - den1 = W_extract_h( W_shl( den64, den1_e ) ); // 2q - rsh0 - 1 + den1_e - 32 - IIReneLimiter_fx[bin] = div1 = BASOP_Util_Divide3232_Scale_newton( num1, den1, &exp1 ); - exp = add( sub( den1_e, num1_e ), add( 5, exp1 ) ); + num_e = W_norm( num64 ); + den_e = W_norm( den64 ); + num = W_extract_h( W_shl( num64, num_e ) ); // 2q - rsh0 - 1 - 5 + num_e - 32 + den = W_extract_h( W_shl( den64, den_e ) ); // 2q - rsh0 - 1 + den_e - 32 + IIReneLimiter_fx[bin] = BASOP_Util_Divide3232_Scale_newton( num, den, &exp ); + exp = add( sub( den_e, num_e ), add( 5, exp ) ); IF( L_shr_sat( IIReneLimiter_fx[bin], sub( 31, exp ) ) > 0 ) { - IIReneLimiter_fx[bin] = div3 = ONE_IN_Q31; /*Q31*/ + IIReneLimiter_fx[bin] = ONE_IN_Q31; /*Q31*/ move32(); } ELSE { - IIReneLimiter_fx[bin] = div3 = L_shl( IIReneLimiter_fx[bin], exp ); /*Q31*/ + IIReneLimiter_fx[bin] = L_shl( IIReneLimiter_fx[bin], exp ); /*Q31*/ } #else - Word16 num_e, den_e, exp0; - Word32 num, den, div0, div2; num = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChEne_fx[0][bin], hDiracDecBin->ChEne_e[0][bin], hDiracDecBin->ChEne_fx[1][bin], hDiracDecBin->ChEne_e[1][bin], &num_e ); num = Mpy_32_32( num, IIReneLimiterFactor_fx ); /*Q = (31 - num_e + 26 - 31) = (26 - num_e)*/ den_e = 0; move16(); den = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChEnePrev_fx[0][bin], hDiracDecBin->ChEnePrev_e[0][bin], hDiracDecBin->ChEnePrev_fx[1][bin], hDiracDecBin->ChEnePrev_e[1][bin], &den_e ); den = L_max( 1, den ); - IIReneLimiter_fx[bin] = div0 = BASOP_Util_Divide3232_Scale_cadence( num, den, &exp0 ); - exp = add( sub( num_e, den_e ), add( 5, exp0 ) ); + IIReneLimiter_fx[bin] = BASOP_Util_Divide3232_Scale_cadence( num, den, &exp ); + exp = add( sub( num_e, den_e ), add( 5, exp ) ); IF( L_shr_sat( IIReneLimiter_fx[bin], sub( 31, exp ) ) > 0 ) { - IIReneLimiter_fx[bin] = div2 = ONE_IN_Q31; /*Q31*/ + IIReneLimiter_fx[bin] = ONE_IN_Q31; /*Q31*/ move32(); } ELSE { - IIReneLimiter_fx[bin] = div2 = L_shl( IIReneLimiter_fx[bin], exp ); /*Q31*/ + IIReneLimiter_fx[bin] = L_shl( IIReneLimiter_fx[bin], exp ); /*Q31*/ } #endif @@ -1754,26 +1759,25 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( } #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES - // FIX Word16 shl; for ( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { shl = W_norm( hDiracDecBin->ChEne[ch][bin] ); hDiracDecBin->ChEne_fx[ch][bin] = W_extract_h( W_shl( hDiracDecBin->ChEne[ch][bin], shl ) ); - hDiracDecBin->ChEne_e[ch][bin] = 63 - 2 * q + rsh0 - shl; + hDiracDecBin->ChEne_e[ch][bin] = sub( exp0, shl ); } shl = W_norm( hDiracDecBin->ChCrossRe[bin] ); hDiracDecBin->ChCrossRe_fx[bin] = W_extract_h( W_shl( hDiracDecBin->ChCrossRe[bin], shl ) ); - hDiracDecBin->ChCrossRe_e[bin] = 63 - 2 * q + rsh0 - shl; + hDiracDecBin->ChCrossRe_e[bin] = sub( exp0, shl ); shl = W_norm( hDiracDecBin->ChCrossIm[bin] ); hDiracDecBin->ChCrossIm_fx[bin] = W_extract_h( W_shl( hDiracDecBin->ChCrossIm[bin], shl ) ); - hDiracDecBin->ChCrossIm_e[bin] = 63 - 2 * q + rsh0 - shl; + hDiracDecBin->ChCrossIm_e[bin] = sub( exp0, shl ); shl = W_norm( subFrameTotalEne[bin] ); subFrameTotalEne_fx[bin] = W_extract_h( W_shl( subFrameTotalEne[bin], shl ) ); - subFrameTotalEne_e[bin] = 63 - 2 * q + rsh1 - shl; + subFrameTotalEne_e[bin] = sub( exp1, shl ); #endif } -- GitLab From 7d78d2eb25d822787881045cddccee78373676bb Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Mon, 27 Oct 2025 13:29:22 +0000 Subject: [PATCH 08/26] Fix missing prototype errors. --- .../ivas_dirac_dec_binaural_functions_fx.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 4f3f0cc08..8accfcd1b 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -122,6 +122,13 @@ static void matrixMul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A static void matrixTransp2Mul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A, Word32 Bre[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word32 Bim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word16 *q_B, Word32 Ascale, Word32 Bscale, Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word16 *q_out ); +static UWord32 ceil_pow2( UWord32 x ); + +static UWord32 popcnt( UWord32 x ); + +static UWord32 ceil_log2( UWord32 x ); + +static Word64 Mpy_64_32( Word64 W_var1, Word32 L_var2 ); /*------------------------------------------------------------------------- * ivas_dirac_dec_init_binaural_data() @@ -1330,7 +1337,7 @@ static void ivas_dirac_dec_decorrelate_slot_fx( return; } -UWord32 ceil_pow2( UWord32 x ) +static UWord32 ceil_pow2( UWord32 x ) { --x; x |= x >> 1; @@ -1342,7 +1349,7 @@ UWord32 ceil_pow2( UWord32 x ) return x; } -UWord32 popcnt( UWord32 x ) +static UWord32 popcnt( UWord32 x ) { // it takes once cycle x = ( x & 0x55555555 ) + ( ( x & 0xAAAAAAAA ) >> 1 ); @@ -1353,7 +1360,7 @@ UWord32 popcnt( UWord32 x ) return x; } -UWord32 ceil_log2( UWord32 x ) +static UWord32 ceil_log2( UWord32 x ) { if ( x ) { @@ -1365,7 +1372,7 @@ UWord32 ceil_log2( UWord32 x ) } } -Word64 Mpy_64_32( Word64 W_var1, Word32 L_var2 ) +static Word64 Mpy_64_32( Word64 W_var1, Word32 L_var2 ) { Word32 var1_l; Word64 var_out; @@ -1477,8 +1484,8 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( #endif #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES - rsh0 = ceil_log2( hSpatParamRendCom->subframe_nbslots[subframe] ) - 1; - rsh1 = ceil_log2( BINAURAL_CHANNELS * hSpatParamRendCom->subframe_nbslots[subframe] ) - 1; + rsh0 = (Word16) ( ceil_log2( hSpatParamRendCom->subframe_nbslots[subframe] ) - 1 ); + rsh1 = (Word16) ( ceil_log2( BINAURAL_CHANNELS * hSpatParamRendCom->subframe_nbslots[subframe] ) - 1 ); exp0 = add( sub( 63, shl( q, 1 ) ), rsh0 ); exp1 = add( sub( 63, shl( q, 1 ) ), rsh1 ); #else -- GitLab From 536e3391b9c06ababa00cf6b4c5b73a33557bf5d Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Tue, 28 Oct 2025 09:57:39 +0000 Subject: [PATCH 09/26] Reduce precision of Mpy_64_32. --- CMakeLists.txt | 2 +- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ab640932f..45cda9e37 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -216,7 +216,7 @@ target_link_libraries(ISAR_post_rend lib_basop lib_isar lib_util lib_com) target_include_directories(ISAR_post_rend PRIVATE lib_basop lib_isar) add_executable(ambi_converter apps/ambi_converter.c) -target_link_libraries(ambi_converter lib_util lib_com lib_basop) +target_link_libraries(ambi_converter lib_util lib_com lib_basop lib_debug) if(UNIX) target_link_libraries(ambi_converter m) endif() diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index a67a7c895..410ff0e4e 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -1374,6 +1374,7 @@ static UWord32 ceil_log2( UWord32 x ) static Word64 Mpy_64_32( Word64 W_var1, Word32 L_var2 ) { +#if 0 Word32 var1_l; Word64 var_out; var1_l = W_extract_l( W_var1 ); // 1 @@ -1381,6 +1382,11 @@ static Word64 Mpy_64_32( Word64 W_var1, Word32 L_var2 ) var_out = W_mac_32_32( var_out, L_lshr( var1_l, 1 ), L_var2 ); // 2 var_out = W_mac_32_32( W_shr( var_out, 31 ), W_extract_h( W_var1 ), L_var2 ); // 3 return var_out; +#else + Word64 var_out; + var_out = Mpy_32_32( W_extract_h( W_var1 ), L_var2 ); // 3 + return var_out; +#endif } static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( -- GitLab From 99e842f2573e219bb5e1a1d120322e728d2236fc Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Tue, 28 Oct 2025 10:16:38 +0000 Subject: [PATCH 10/26] Increase precision of numerator and denominator and use cadence division. --- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 410ff0e4e..bb9266476 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -1374,7 +1374,7 @@ static UWord32 ceil_log2( UWord32 x ) static Word64 Mpy_64_32( Word64 W_var1, Word32 L_var2 ) { -#if 0 +#if 1 Word32 var1_l; Word64 var_out; var1_l = W_extract_l( W_var1 ); // 1 @@ -1652,15 +1652,25 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES Word16 exp; Word64 num64, den64; +#if 0 num64 = W_add( W_shr( hDiracDecBin->ChEne[0][bin], 1 ), W_shr( hDiracDecBin->ChEne[1][bin], 1 ) ); // 2q - rsh0 - 1 num64 = Mpy_64_32( num64, IIReneLimiterFactor_fx ); // 2q - rsh0 - 1 - 5 den64 = W_add( W_shr( hDiracDecBin->ChEnePrev[0][bin], 1 ), W_shr( hDiracDecBin->ChEnePrev[1][bin], 1 ) ); // 2q - rsh0 - 1 +#else + num64 = W_add( hDiracDecBin->ChEne[0][bin], hDiracDecBin->ChEne[1][bin] ); // 2q - rsh0 + num64 = Mpy_64_32( num64, IIReneLimiterFactor_fx ); // 2q - rsh0 - 5 + den64 = W_add( hDiracDecBin->ChEnePrev[0][bin], hDiracDecBin->ChEnePrev[1][bin] ); // 2q - rsh0 +#endif den64 = W_max( 1, den64 ); num_e = W_norm( num64 ); den_e = W_norm( den64 ); num = W_extract_h( W_shl( num64, num_e ) ); // 2q - rsh0 - 1 - 5 + num_e - 32 den = W_extract_h( W_shl( den64, den_e ) ); // 2q - rsh0 - 1 + den_e - 32 +#if 0 IIReneLimiter_fx[bin] = BASOP_Util_Divide3232_Scale_newton( num, den, &exp ); +#else + IIReneLimiter_fx[bin] = BASOP_Util_Divide3232_Scale_cadence( num, den, &exp ); +#endif exp = add( sub( den_e, num_e ), add( 5, exp ) ); IF( L_shr_sat( IIReneLimiter_fx[bin], sub( 31, exp ) ) > 0 ) { -- GitLab From d78bb97eed897cf200954cf16536e14e58648552 Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Tue, 28 Oct 2025 11:25:50 +0000 Subject: [PATCH 11/26] Do not right shift. --- .../ivas_dirac_dec_binaural_functions_fx.c | 32 ++++++++++++------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index bb9266476..b74179a14 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -1492,8 +1492,13 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES rsh0 = (Word16) ( ceil_log2( hSpatParamRendCom->subframe_nbslots[subframe] ) - 1 ); rsh1 = (Word16) ( ceil_log2( BINAURAL_CHANNELS * hSpatParamRendCom->subframe_nbslots[subframe] ) - 1 ); +#if 1 + exp0 = sub( 63, shl( q, 1 ) ); + exp1 = sub( 63, shl( q, 1 ) ); +#else exp0 = add( sub( 63, shl( q, 1 ) ), rsh0 ); exp1 = add( sub( 63, shl( q, 1 ) ), rsh1 ); +#endif #else exp = sub( 63, shl( q, 1 ) ); // exp for the energy (inRe_fx * inRe_fx + inIm_fx * inIm_fx) computed below #endif @@ -1508,8 +1513,13 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( temp64 = W_mult0_32_32( inRe_fx[ch][slot][bin], inRe_fx[ch][slot][bin] ); // 2q temp64 = W_add( temp64, W_mult0_32_32( inIm_fx[ch][slot][bin], inIm_fx[ch][slot][bin] ) ); // 2q #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES +#if 1 + hDiracDecBin->ChEne[ch][bin] = W_add( hDiracDecBin->ChEne[ch][bin], temp64 ); // 2q + subFrameTotalEne[bin] = W_add( subFrameTotalEne[bin], temp64 ); // 2q +#else hDiracDecBin->ChEne[ch][bin] = W_add( hDiracDecBin->ChEne[ch][bin], W_shr( temp64, rsh0 ) ); // 2q - rsh0 subFrameTotalEne[bin] = W_add( subFrameTotalEne[bin], W_shr( temp64, rsh1 ) ); // 2q - rsh1 +#endif move64(); move64(); #else @@ -1526,7 +1536,11 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( temp64 = W_mult0_32_32( inRe_fx[0][slot][bin], inRe_fx[1][slot][bin] ); // 2q temp64 = W_add( temp64, W_mult0_32_32( inIm_fx[0][slot][bin], inIm_fx[1][slot][bin] ) ); // 2q #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES +#if 1 + hDiracDecBin->ChCrossRe[bin] = W_add( hDiracDecBin->ChCrossRe[bin], temp64 ); // 2q +#else hDiracDecBin->ChCrossRe[bin] = W_add( hDiracDecBin->ChCrossRe[bin], W_shr( temp64, rsh0 ) ); // 2q - rsh0 +#endif move64(); #else exp1 = W_norm( temp64 ); @@ -1538,7 +1552,11 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( temp64 = W_mult0_32_32( inRe_fx[0][slot][bin], inIm_fx[1][slot][bin] ); // 2q temp64 = W_sub( temp64, W_mult0_32_32( inIm_fx[0][slot][bin], inRe_fx[1][slot][bin] ) ); // 2q #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES +#if 1 + hDiracDecBin->ChCrossIm[bin] = W_add( hDiracDecBin->ChCrossIm[bin], temp64 ); // 2q +#else hDiracDecBin->ChCrossIm[bin] = W_add( hDiracDecBin->ChCrossIm[bin], W_shr( temp64, rsh0 ) ); // 2q - rsh0 +#endif move64(); #else exp1 = W_norm( temp64 ); @@ -1652,25 +1670,15 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES Word16 exp; Word64 num64, den64; -#if 0 - num64 = W_add( W_shr( hDiracDecBin->ChEne[0][bin], 1 ), W_shr( hDiracDecBin->ChEne[1][bin], 1 ) ); // 2q - rsh0 - 1 - num64 = Mpy_64_32( num64, IIReneLimiterFactor_fx ); // 2q - rsh0 - 1 - 5 - den64 = W_add( W_shr( hDiracDecBin->ChEnePrev[0][bin], 1 ), W_shr( hDiracDecBin->ChEnePrev[1][bin], 1 ) ); // 2q - rsh0 - 1 -#else num64 = W_add( hDiracDecBin->ChEne[0][bin], hDiracDecBin->ChEne[1][bin] ); // 2q - rsh0 num64 = Mpy_64_32( num64, IIReneLimiterFactor_fx ); // 2q - rsh0 - 5 den64 = W_add( hDiracDecBin->ChEnePrev[0][bin], hDiracDecBin->ChEnePrev[1][bin] ); // 2q - rsh0 -#endif den64 = W_max( 1, den64 ); num_e = W_norm( num64 ); den_e = W_norm( den64 ); - num = W_extract_h( W_shl( num64, num_e ) ); // 2q - rsh0 - 1 - 5 + num_e - 32 - den = W_extract_h( W_shl( den64, den_e ) ); // 2q - rsh0 - 1 + den_e - 32 -#if 0 + num = W_extract_h( W_shl( num64, num_e ) ); // 2q - rsh0 - 5 + num_e - 32 + den = W_extract_h( W_shl( den64, den_e ) ); // 2q - rsh0 + den_e - 32 IIReneLimiter_fx[bin] = BASOP_Util_Divide3232_Scale_newton( num, den, &exp ); -#else - IIReneLimiter_fx[bin] = BASOP_Util_Divide3232_Scale_cadence( num, den, &exp ); -#endif exp = add( sub( den_e, num_e ), add( 5, exp ) ); IF( L_shr_sat( IIReneLimiter_fx[bin], sub( 31, exp ) ) > 0 ) { -- GitLab From 813ea695e853dc75b191c287bebaf39171c80360 Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Tue, 28 Oct 2025 13:08:10 +0000 Subject: [PATCH 12/26] Remove ifdefs. --- .../ivas_dirac_dec_binaural_functions_fx.c | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 822af29e7..0c16e086a 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -1472,13 +1472,8 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES rsh0 = (Word16) ( ceil_log2( hSpatParamRendCom->subframe_nbslots[subframe] ) - 1 ); rsh1 = (Word16) ( ceil_log2( BINAURAL_CHANNELS * hSpatParamRendCom->subframe_nbslots[subframe] ) - 1 ); -#if 1 exp0 = sub( 63, shl( q, 1 ) ); exp1 = sub( 63, shl( q, 1 ) ); -#else - exp0 = add( sub( 63, shl( q, 1 ) ), rsh0 ); - exp1 = add( sub( 63, shl( q, 1 ) ), rsh1 ); -#endif #else exp = sub( 63, shl( q, 1 ) ); // exp for the energy (inRe_fx * inRe_fx + inIm_fx * inIm_fx) computed below #endif @@ -1493,13 +1488,8 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( temp64 = W_mult0_32_32( inRe_fx[ch][slot][bin], inRe_fx[ch][slot][bin] ); // 2q temp64 = W_add( temp64, W_mult0_32_32( inIm_fx[ch][slot][bin], inIm_fx[ch][slot][bin] ) ); // 2q #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES -#if 1 hDiracDecBin->ChEne[ch][bin] = W_add( hDiracDecBin->ChEne[ch][bin], temp64 ); // 2q subFrameTotalEne[bin] = W_add( subFrameTotalEne[bin], temp64 ); // 2q -#else - hDiracDecBin->ChEne[ch][bin] = W_add( hDiracDecBin->ChEne[ch][bin], W_shr( temp64, rsh0 ) ); // 2q - rsh0 - subFrameTotalEne[bin] = W_add( subFrameTotalEne[bin], W_shr( temp64, rsh1 ) ); // 2q - rsh1 -#endif move64(); move64(); #else @@ -1516,11 +1506,7 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( temp64 = W_mult0_32_32( inRe_fx[0][slot][bin], inRe_fx[1][slot][bin] ); // 2q temp64 = W_add( temp64, W_mult0_32_32( inIm_fx[0][slot][bin], inIm_fx[1][slot][bin] ) ); // 2q #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES -#if 1 hDiracDecBin->ChCrossRe[bin] = W_add( hDiracDecBin->ChCrossRe[bin], temp64 ); // 2q -#else - hDiracDecBin->ChCrossRe[bin] = W_add( hDiracDecBin->ChCrossRe[bin], W_shr( temp64, rsh0 ) ); // 2q - rsh0 -#endif move64(); #else exp1 = W_norm( temp64 ); @@ -1532,11 +1518,7 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( temp64 = W_mult0_32_32( inRe_fx[0][slot][bin], inIm_fx[1][slot][bin] ); // 2q temp64 = W_sub( temp64, W_mult0_32_32( inIm_fx[0][slot][bin], inRe_fx[1][slot][bin] ) ); // 2q #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES -#if 1 hDiracDecBin->ChCrossIm[bin] = W_add( hDiracDecBin->ChCrossIm[bin], temp64 ); // 2q -#else - hDiracDecBin->ChCrossIm[bin] = W_add( hDiracDecBin->ChCrossIm[bin], W_shr( temp64, rsh0 ) ); // 2q - rsh0 -#endif move64(); #else exp1 = W_norm( temp64 ); -- GitLab From f8eb1f0e28491903284e9f19c25edbdcaa5f520b Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Tue, 28 Oct 2025 13:18:05 +0000 Subject: [PATCH 13/26] Remove unused variables. --- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 0c16e086a..935e04897 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -1392,7 +1392,8 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( Word32 ivas_total_brate; Word16 nchan_transport; #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES - Word16 rsh0, rsh1, exp0, exp1; + // Word16 rsh0, rsh1, exp0, exp1; + Word16 exp0, exp1; #else Word16 exp, exp1; Word32 temp; @@ -1470,8 +1471,8 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( #endif #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES - rsh0 = (Word16) ( ceil_log2( hSpatParamRendCom->subframe_nbslots[subframe] ) - 1 ); - rsh1 = (Word16) ( ceil_log2( BINAURAL_CHANNELS * hSpatParamRendCom->subframe_nbslots[subframe] ) - 1 ); + // rsh0 = (Word16) ( ceil_log2( hSpatParamRendCom->subframe_nbslots[subframe] ) - 1 ); + // rsh1 = (Word16) ( ceil_log2( BINAURAL_CHANNELS * hSpatParamRendCom->subframe_nbslots[subframe] ) - 1 ); exp0 = sub( 63, shl( q, 1 ) ); exp1 = sub( 63, shl( q, 1 ) ); #else -- GitLab From 987375833492dbbd4523c910babdefa85f629e80 Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Tue, 28 Oct 2025 13:31:20 +0000 Subject: [PATCH 14/26] Fix build failures. --- .../ivas_dirac_dec_binaural_functions_fx.c | 37 +------------------ 1 file changed, 1 insertion(+), 36 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 935e04897..71a8605e2 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -1317,41 +1317,6 @@ static void ivas_dirac_dec_decorrelate_slot_fx( return; } -static UWord32 ceil_pow2( UWord32 x ) -{ - --x; - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - x |= x >> 8; - x |= x >> 16; - ++x; - return x; -} - -static UWord32 popcnt( UWord32 x ) -{ - // it takes once cycle - x = ( x & 0x55555555 ) + ( ( x & 0xAAAAAAAA ) >> 1 ); - x = ( x & 0x33333333 ) + ( ( x & 0xCCCCCCCC ) >> 2 ); - x = ( x & 0x0F0F0F0F ) + ( ( x & 0xF0F0F0F0 ) >> 4 ); - x = ( x & 0x00FF00FF ) + ( ( x & 0xFF00FF00 ) >> 8 ); - x = ( x & 0x0000FFFF ) + ( ( x & 0xFFFF0000 ) >> 16 ); - return x; -} - -static UWord32 ceil_log2( UWord32 x ) -{ - if ( x ) - { - return popcnt( ceil_pow2( x ) - 1 ); - } - else - { - return 0; - } -} - static Word64 Mpy_64_32( Word64 W_var1, Word32 L_var2 ) { #if 1 @@ -1582,7 +1547,7 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( tempIm = L_add( inIm_fx[0][slot][bin], inIm_fx[1][slot][bin] ); // q temp64 = W_add( W_mult0_32_32( tempRe, tempRe ), W_mult0_32_32( tempIm, tempIm ) ); // 2q #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES - subFrameSumEne[bin] = W_add( subFrameSumEne[bin], W_shr( temp64, rsh1 ) ); + subFrameSumEne[bin] = W_add( subFrameSumEne[bin], temp64 ); move64(); #else exp1 = W_norm( temp64 ); -- GitLab From 822dc1a4b91853c78543546368bdfb5f559a4023 Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Tue, 28 Oct 2025 13:38:05 +0000 Subject: [PATCH 15/26] Remove function prototypes. --- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 71a8605e2..4ec11ecf0 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -122,12 +122,6 @@ static void matrixMul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A static void matrixTransp2Mul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A, Word32 Bre[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word32 Bim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word16 *q_B, Word32 Ascale, Word32 Bscale, Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word16 *q_out ); -static UWord32 ceil_pow2( UWord32 x ); - -static UWord32 popcnt( UWord32 x ); - -static UWord32 ceil_log2( UWord32 x ); - static Word64 Mpy_64_32( Word64 W_var1, Word32 L_var2 ); /*------------------------------------------------------------------------- -- GitLab From e726d94fab9b200e960e4263d835f785d10e570d Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Tue, 28 Oct 2025 14:00:34 +0000 Subject: [PATCH 16/26] Revert to original implementation. This reverts commit 822dc1a4b91853c78543546368bdfb5f559a4023. --- .../ivas_dirac_dec_binaural_functions_fx.c | 64 ++++++++++++++----- 1 file changed, 49 insertions(+), 15 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 4ec11ecf0..d28e6d9cb 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -122,6 +122,12 @@ static void matrixMul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A static void matrixTransp2Mul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A, Word32 Bre[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word32 Bim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word16 *q_B, Word32 Ascale, Word32 Bscale, Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word16 *q_out ); +static UWord32 ceil_pow2( UWord32 x ); + +static UWord32 popcnt( UWord32 x ); + +static UWord32 ceil_log2( UWord32 x ); + static Word64 Mpy_64_32( Word64 W_var1, Word32 L_var2 ); /*------------------------------------------------------------------------- @@ -1311,9 +1317,43 @@ static void ivas_dirac_dec_decorrelate_slot_fx( return; } +static UWord32 ceil_pow2( UWord32 x ) +{ + --x; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + ++x; + return x; +} + +static UWord32 popcnt( UWord32 x ) +{ + // it takes once cycle + x = ( x & 0x55555555 ) + ( ( x & 0xAAAAAAAA ) >> 1 ); + x = ( x & 0x33333333 ) + ( ( x & 0xCCCCCCCC ) >> 2 ); + x = ( x & 0x0F0F0F0F ) + ( ( x & 0xF0F0F0F0 ) >> 4 ); + x = ( x & 0x00FF00FF ) + ( ( x & 0xFF00FF00 ) >> 8 ); + x = ( x & 0x0000FFFF ) + ( ( x & 0xFFFF0000 ) >> 16 ); + return x; +} + +static UWord32 ceil_log2( UWord32 x ) +{ + if ( x ) + { + return popcnt( ceil_pow2( x ) - 1 ); + } + else + { + return 0; + } +} + static Word64 Mpy_64_32( Word64 W_var1, Word32 L_var2 ) { -#if 1 Word32 var1_l; Word64 var_out; var1_l = W_extract_l( W_var1 ); // 1 @@ -1321,11 +1361,6 @@ static Word64 Mpy_64_32( Word64 W_var1, Word32 L_var2 ) var_out = W_mac_32_32( var_out, L_lshr( var1_l, 1 ), L_var2 ); // 2 var_out = W_mac_32_32( W_shr( var_out, 31 ), W_extract_h( W_var1 ), L_var2 ); // 3 return var_out; -#else - Word64 var_out; - var_out = Mpy_32_32( W_extract_h( W_var1 ), L_var2 ); // 3 - return var_out; -#endif } static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( @@ -1351,8 +1386,7 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( Word32 ivas_total_brate; Word16 nchan_transport; #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES - // Word16 rsh0, rsh1, exp0, exp1; - Word16 exp0, exp1; + Word16 rsh0, rsh1, exp0, exp1; #else Word16 exp, exp1; Word32 temp; @@ -1430,10 +1464,10 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( #endif #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES - // rsh0 = (Word16) ( ceil_log2( hSpatParamRendCom->subframe_nbslots[subframe] ) - 1 ); - // rsh1 = (Word16) ( ceil_log2( BINAURAL_CHANNELS * hSpatParamRendCom->subframe_nbslots[subframe] ) - 1 ); - exp0 = sub( 63, shl( q, 1 ) ); - exp1 = sub( 63, shl( q, 1 ) ); + rsh0 = (Word16) ( ceil_log2( hSpatParamRendCom->subframe_nbslots[subframe] ) - 1 ); + rsh1 = (Word16) ( ceil_log2( BINAURAL_CHANNELS * hSpatParamRendCom->subframe_nbslots[subframe] ) - 1 ); + exp0 = add( sub( 63, shl( q, 1 ) ), rsh0 ); + exp1 = add( sub( 63, shl( q, 1 ) ), rsh1 ); #else exp = sub( 63, shl( q, 1 ) ); // exp for the energy (inRe_fx * inRe_fx + inIm_fx * inIm_fx) computed below #endif @@ -1448,8 +1482,8 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( temp64 = W_mult0_32_32( inRe_fx[ch][slot][bin], inRe_fx[ch][slot][bin] ); // 2q temp64 = W_add( temp64, W_mult0_32_32( inIm_fx[ch][slot][bin], inIm_fx[ch][slot][bin] ) ); // 2q #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES - hDiracDecBin->ChEne[ch][bin] = W_add( hDiracDecBin->ChEne[ch][bin], temp64 ); // 2q - subFrameTotalEne[bin] = W_add( subFrameTotalEne[bin], temp64 ); // 2q + hDiracDecBin->ChEne[ch][bin] = W_add( hDiracDecBin->ChEne[ch][bin], W_shr( temp64, rsh0 ) ); // 2q - rsh0 + subFrameTotalEne[bin] = W_add( subFrameTotalEne[bin], W_shr( temp64, rsh1 ) ); // 2q - rsh1 move64(); move64(); #else @@ -1541,7 +1575,7 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( tempIm = L_add( inIm_fx[0][slot][bin], inIm_fx[1][slot][bin] ); // q temp64 = W_add( W_mult0_32_32( tempRe, tempRe ), W_mult0_32_32( tempIm, tempIm ) ); // 2q #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES - subFrameSumEne[bin] = W_add( subFrameSumEne[bin], temp64 ); + subFrameSumEne[bin] = W_add( subFrameSumEne[bin], W_shr( temp64, rsh1 ) ); move64(); #else exp1 = W_norm( temp64 ); -- GitLab From 27976ffba33e77d951d36716bf52bb3a5f4bee50 Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Wed, 29 Oct 2025 07:08:01 +0000 Subject: [PATCH 17/26] Fix ChCrossRe/ChCrossIm summation. --- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index d28e6d9cb..62806fa55 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -1500,7 +1500,7 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( temp64 = W_mult0_32_32( inRe_fx[0][slot][bin], inRe_fx[1][slot][bin] ); // 2q temp64 = W_add( temp64, W_mult0_32_32( inIm_fx[0][slot][bin], inIm_fx[1][slot][bin] ) ); // 2q #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES - hDiracDecBin->ChCrossRe[bin] = W_add( hDiracDecBin->ChCrossRe[bin], temp64 ); // 2q + hDiracDecBin->ChCrossRe[bin] = W_add( hDiracDecBin->ChCrossRe[bin], W_shr( temp64, rsh0 ) ); // 2q - rsh0 move64(); #else exp1 = W_norm( temp64 ); @@ -1512,7 +1512,7 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( temp64 = W_mult0_32_32( inRe_fx[0][slot][bin], inIm_fx[1][slot][bin] ); // 2q temp64 = W_sub( temp64, W_mult0_32_32( inIm_fx[0][slot][bin], inRe_fx[1][slot][bin] ) ); // 2q #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES - hDiracDecBin->ChCrossIm[bin] = W_add( hDiracDecBin->ChCrossIm[bin], temp64 ); // 2q + hDiracDecBin->ChCrossIm[bin] = W_add( hDiracDecBin->ChCrossIm[bin], W_shr( temp64, rsh0 ) ); // 2q - rsh0 move64(); #else exp1 = W_norm( temp64 ); -- GitLab From 2bcc3f4b2db137f156c57902f3ad0bc929c3727a Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Wed, 29 Oct 2025 19:33:49 +0000 Subject: [PATCH 18/26] Add back computational checks. --- .../ivas_dirac_dec_binaural_functions_fx.c | 130 ++++++++++++++---- 1 file changed, 102 insertions(+), 28 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 62806fa55..1bc697b94 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -1363,6 +1363,42 @@ static Word64 Mpy_64_32( Word64 W_var1, Word32 L_var2 ) return var_out; } +#define NONBE_2157_VERIFY_ACCURACY + +#ifdef NONBE_2157_VERIFY_ACCURACY +static void check(Word64 computed_fx, Word32 computed_q, Word64 expected_fx, Word32 expected_e, Word64 max_error); + +static void check(Word64 computed_fx, Word32 computed_q, Word64 expected_fx, Word32 expected_e, Word64 max_error) +{ + Word32 expected_q = 31 - expected_e; + Word32 shift = expected_q - computed_q; + + if (shift >= 0) + { + expected_fx >>= shift; + } + else + { + computed_fx >>= -shift; + } + + Word64 error = llabs(expected_fx - computed_fx); + if (error > max_error) + { + printf("\n\n"); + printf("0x%016llx\n", computed_fx); + printf("%d\n", computed_q); + printf("0x%016llx\n", expected_fx); + printf("%d\n", expected_q); + assert(false); + } +} + +#define CHECK_ACCURACY(CFX, CQ, EFX, EE, ME) check(CFX, CQ, EFX, EE, ME) +#else +#define CHECK_ACCURACY(CFX, CQ, EFX, EE, ME) +#endif + static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( DIRAC_DEC_BIN_HANDLE hDiracDecBin, SPAT_PARAM_REND_COMMON_DATA_HANDLE hSpatParamRendCom, @@ -1386,8 +1422,9 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( Word32 ivas_total_brate; Word16 nchan_transport; #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES - Word16 rsh0, rsh1, exp0, exp1; -#else + Word16 rsh0, rsh1, e0, e1; +#endif +#ifdef NONBE_2157_VERIFY_ACCURACY Word16 exp, exp1; Word32 temp; #endif @@ -1409,7 +1446,8 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES set64_fx( hDiracDecBin->ChCrossRe, 0, nBins ); set64_fx( hDiracDecBin->ChCrossIm, 0, nBins ); -#else +#endif +#ifdef NONBE_2157_VERIFY_ACCURACY set32_fx( hDiracDecBin->ChCrossRe_fx, 0, nBins ); set32_fx( hDiracDecBin->ChCrossIm_fx, 0, nBins ); set16_fx( hDiracDecBin->ChCrossRe_e, 0, nBins ); @@ -1420,7 +1458,8 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( { #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES set64_fx( hDiracDecBin->ChEne[ch], 0, nBins ); -#else +#endif +#ifdef NONBE_2157_VERIFY_ACCURACY set32_fx( hDiracDecBin->ChEne_fx[ch], 0, nBins ); set16_fx( hDiracDecBin->ChEne_e[ch], 0, nBins ); #endif @@ -1459,16 +1498,18 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( /* Formulate input and target covariance matrices for this subframe */ #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES set64_fx( subFrameTotalEne, 0, CLDFB_NO_CHANNELS_MAX ); -#else +#endif +#ifdef NONBE_2157_VERIFY_ACCURACY set32_fx( subFrameTotalEne_fx, 0, CLDFB_NO_CHANNELS_MAX ); #endif #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES rsh0 = (Word16) ( ceil_log2( hSpatParamRendCom->subframe_nbslots[subframe] ) - 1 ); rsh1 = (Word16) ( ceil_log2( BINAURAL_CHANNELS * hSpatParamRendCom->subframe_nbslots[subframe] ) - 1 ); - exp0 = add( sub( 63, shl( q, 1 ) ), rsh0 ); - exp1 = add( sub( 63, shl( q, 1 ) ), rsh1 ); -#else + e0 = add( sub( 63, shl( q, 1 ) ), rsh0 ); + e1 = add( sub( 63, shl( q, 1 ) ), rsh1 ); +#endif +#ifdef NONBE_2157_VERIFY_ACCURACY exp = sub( 63, shl( q, 1 ) ); // exp for the energy (inRe_fx * inRe_fx + inIm_fx * inIm_fx) computed below #endif @@ -1486,7 +1527,8 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( subFrameTotalEne[bin] = W_add( subFrameTotalEne[bin], W_shr( temp64, rsh1 ) ); // 2q - rsh1 move64(); move64(); -#else +#endif +#ifdef NONBE_2157_VERIFY_ACCURACY Word32 instEne_fx; exp1 = W_norm( temp64 ); instEne_fx = W_extract_h( W_shl( temp64, exp1 ) ); // 2q - 32 + exp1 @@ -1496,30 +1538,36 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( move32(); move32(); #endif + CHECK_ACCURACY(hDiracDecBin->ChEne[ch][bin], 2 * q - rsh0, hDiracDecBin->ChEne_fx[ch][bin], hDiracDecBin->ChEne_e[ch][bin], 16); + CHECK_ACCURACY(subFrameTotalEne[bin], 2 * q - rsh1, subFrameTotalEne_fx[bin], subFrameTotalEne_e[bin], 16); } temp64 = W_mult0_32_32( inRe_fx[0][slot][bin], inRe_fx[1][slot][bin] ); // 2q temp64 = W_add( temp64, W_mult0_32_32( inIm_fx[0][slot][bin], inIm_fx[1][slot][bin] ) ); // 2q #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES hDiracDecBin->ChCrossRe[bin] = W_add( hDiracDecBin->ChCrossRe[bin], W_shr( temp64, rsh0 ) ); // 2q - rsh0 move64(); -#else +#endif +#ifdef NONBE_2157_VERIFY_ACCURACY exp1 = W_norm( temp64 ); temp = W_extract_h( W_shl( temp64, exp1 ) ); // // 2q - 32 + exp1 hDiracDecBin->ChCrossRe_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossRe_e[bin], temp, sub( exp, exp1 ), &hDiracDecBin->ChCrossRe_e[bin] ); move32(); #endif + CHECK_ACCURACY(hDiracDecBin->ChCrossRe[bin], 2 * q - rsh0, hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossRe_e[bin], 16); temp64 = W_mult0_32_32( inRe_fx[0][slot][bin], inIm_fx[1][slot][bin] ); // 2q temp64 = W_sub( temp64, W_mult0_32_32( inIm_fx[0][slot][bin], inRe_fx[1][slot][bin] ) ); // 2q #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES hDiracDecBin->ChCrossIm[bin] = W_add( hDiracDecBin->ChCrossIm[bin], W_shr( temp64, rsh0 ) ); // 2q - rsh0 move64(); -#else +#endif +#ifdef NONBE_2157_VERIFY_ACCURACY exp1 = W_norm( temp64 ); temp = W_extract_h( W_shl( temp64, exp1 ) ); // // 2q - 32 + exp1 hDiracDecBin->ChCrossIm_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossIm_fx[bin], hDiracDecBin->ChCrossIm_e[bin], temp, sub( exp, exp1 ), &hDiracDecBin->ChCrossIm_e[bin] ); move32(); #endif + CHECK_ACCURACY(hDiracDecBin->ChCrossIm[bin], 2 * q - rsh0, hDiracDecBin->ChCrossIm_fx[bin], hDiracDecBin->ChCrossIm_e[bin], 16); } } @@ -1533,20 +1581,24 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES subFrameTotalEne[bin] = Mpy_64_32( subFrameTotalEne[bin], lowBitRateEQ_fx[bin] ); move64(); -#else +#endif +#ifdef NONBE_2157_VERIFY_ACCURACY subFrameTotalEne_fx[bin] = Mpy_32_32( subFrameTotalEne_fx[bin], lowBitRateEQ_fx[bin] ); // exp = subFrameTotalEne_e[bin] move32(); #endif + CHECK_ACCURACY(subFrameTotalEne[bin], 2 * q - rsh1, subFrameTotalEne_fx[bin], subFrameTotalEne_e[bin], 16); } FOR( ; bin < nBins; bin++ ) { #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES subFrameTotalEne[bin] = Mpy_64_32( subFrameTotalEne[bin], lowBitRateEQ_fx[lastEqBin] ); move64(); -#else +#endif +#ifdef NONBE_2157_VERIFY_ACCURACY subFrameTotalEne_fx[bin] = Mpy_32_32( subFrameTotalEne_fx[bin], lowBitRateEQ_fx[lastEqBin] ); // exp = subFrameTotalEne_e[bin] move32(); #endif + CHECK_ACCURACY(subFrameTotalEne[bin], 2 * q - rsh1, subFrameTotalEne_fx[bin], subFrameTotalEne_e[bin], 16); } } @@ -1557,13 +1609,15 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( Word32 tempRe, tempIm; #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES Word64 subFrameSumEne[CLDFB_NO_CHANNELS_MAX]; -#else +#endif +#ifdef NONBE_2157_VERIFY_ACCURACY Word32 subFrameSumEne_fx[CLDFB_NO_CHANNELS_MAX]; Word16 subFrameSumEne_e[CLDFB_NO_CHANNELS_MAX]; #endif #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES set64_fx( subFrameSumEne, 0, CLDFB_NO_CHANNELS_MAX ); -#else +#endif +#ifdef NONBE_2157_VERIFY_ACCURACY set32_fx( subFrameSumEne_fx, 0, CLDFB_NO_CHANNELS_MAX ); set16_fx( subFrameSumEne_e, 0, CLDFB_NO_CHANNELS_MAX ); #endif @@ -1577,12 +1631,14 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES subFrameSumEne[bin] = W_add( subFrameSumEne[bin], W_shr( temp64, rsh1 ) ); move64(); -#else +#endif +#ifdef NONBE_2157_VERIFY_ACCURACY exp1 = W_norm( temp64 ); temp = W_extract_h( W_shl( temp64, exp1 ) ); // 2q + exp1 subFrameSumEne_fx[bin] = BASOP_Util_Add_Mant32Exp( subFrameSumEne_fx[bin], subFrameSumEne_e[bin], temp, sub( exp /* 63 - 2q */, exp1 ) /*31 - (2q + exp1 - 32)*/, &subFrameSumEne_e[bin] ); move32(); #endif + CHECK_ACCURACY(subFrameSumEne[bin], 2 * q - rsh1, subFrameSumEne_fx[bin], subFrameSumEne_e[bin], 16); } } FOR( bin = 0; bin < nBins; bin++ ) @@ -1590,7 +1646,9 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES subFrameTotalEne[bin] = W_max( subFrameSumEne[bin], subFrameTotalEne[bin] ); move64(); -#else +#endif + +#ifdef NONBE_2157_VERIFY_ACCURACY subFrameTotalEne_e[bin] = sub( subFrameTotalEne_e[bin], 1 ); move16(); temp = L_shl_sat( subFrameTotalEne_fx[bin], sub( subFrameTotalEne_e[bin], subFrameSumEne_e[bin] ) ); // subFrameSumEne_e[bin] @@ -1602,6 +1660,7 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( move16(); } #endif + CHECK_ACCURACY(subFrameTotalEne[bin], 2 * q - rsh1, subFrameTotalEne_fx[bin], subFrameTotalEne_e[bin], 16); } } @@ -1645,7 +1704,8 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( { IIReneLimiter_fx[bin] = L_shl( IIReneLimiter_fx[bin], exp ); /*Q31*/ } -#else +#endif +#ifdef NONBE_2157_VERIFY_ACCURACY num = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChEne_fx[0][bin], hDiracDecBin->ChEne_e[0][bin], hDiracDecBin->ChEne_fx[1][bin], hDiracDecBin->ChEne_e[1][bin], &num_e ); num = Mpy_32_32( num, IIReneLimiterFactor_fx ); /*Q = (31 - num_e + 26 - 31) = (26 - num_e)*/ den_e = 0; @@ -1672,7 +1732,8 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( move64(); move64(); move64(); -#else +#endif +#ifdef NONBE_2157_VERIFY_ACCURACY hDiracDecBin->ChCrossRe_fx[bin] = Mpy_32_32( hDiracDecBin->ChCrossRe_fx[bin], qualityBasedSmFactor_fx ); hDiracDecBin->ChCrossIm_fx[bin] = Mpy_32_32( hDiracDecBin->ChCrossIm_fx[bin], qualityBasedSmFactor_fx ); move32(); @@ -1680,16 +1741,20 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( move32(); move32(); #endif + CHECK_ACCURACY(hDiracDecBin->ChCrossRe[bin], 2 * q - rsh0, hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossRe_e[bin], 16); + CHECK_ACCURACY(hDiracDecBin->ChCrossIm[bin], 2 * q - rsh0, hDiracDecBin->ChCrossIm_fx[bin], hDiracDecBin->ChCrossIm_e[bin], 16); FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES hDiracDecBin->ChEne[ch][bin] = Mpy_64_32( hDiracDecBin->ChEne[ch][bin], qualityBasedSmFactor_fx ); move64(); -#else +#endif +#ifdef NONBE_2157_VERIFY_ACCURACY hDiracDecBin->ChEne_fx[ch][bin] = Mpy_32_32( hDiracDecBin->ChEne_fx[ch][bin], qualityBasedSmFactor_fx ); move32(); #endif + CHECK_ACCURACY(hDiracDecBin->ChEne[ch][bin], 2 * q - rsh0, hDiracDecBin->ChEne_fx[ch][bin], hDiracDecBin->ChEne_e[ch][bin], 16); } #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES @@ -1697,22 +1762,27 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( hDiracDecBin->ChCrossIm[bin] = W_add( hDiracDecBin->ChCrossIm[bin], Mpy_64_32( hDiracDecBin->ChCrossImPrev[bin], IIReneLimiter_fx[bin] ) ); move64(); move64(); -#else +#endif +#ifdef NONBE_2157_VERIFY_ACCURACY hDiracDecBin->ChCrossRe_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossRe_e[bin], Mpy_32_32( hDiracDecBin->ChCrossRePrev_fx[bin], IIReneLimiter_fx[bin] ), hDiracDecBin->ChCrossRePrev_e[bin], &hDiracDecBin->ChCrossRe_e[bin] ); hDiracDecBin->ChCrossIm_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossIm_fx[bin], hDiracDecBin->ChCrossIm_e[bin], Mpy_32_32( hDiracDecBin->ChCrossImPrev_fx[bin], IIReneLimiter_fx[bin] ), hDiracDecBin->ChCrossImPrev_e[bin], &hDiracDecBin->ChCrossIm_e[bin] ); move32(); move32(); #endif + CHECK_ACCURACY(hDiracDecBin->ChCrossRe[bin], 2 * q - rsh0, hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossRe_e[bin], 4096); // WARNING + CHECK_ACCURACY(hDiracDecBin->ChCrossIm[bin], 2 * q - rsh0, hDiracDecBin->ChCrossIm_fx[bin], hDiracDecBin->ChCrossIm_e[bin], 4096); // WARNING FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES hDiracDecBin->ChEne[ch][bin] = W_add( hDiracDecBin->ChEne[ch][bin], Mpy_64_32( hDiracDecBin->ChEnePrev[ch][bin], IIReneLimiter_fx[bin] ) ); move64(); -#else +#endif +#ifdef NONBE_2157_VERIFY_ACCURACY hDiracDecBin->ChEne_fx[ch][bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChEne_fx[ch][bin], hDiracDecBin->ChEne_e[ch][bin], Mpy_32_32( hDiracDecBin->ChEnePrev_fx[ch][bin], IIReneLimiter_fx[bin] ), hDiracDecBin->ChEnePrev_e[ch][bin], &hDiracDecBin->ChEne_e[ch][bin] ); move32(); #endif + CHECK_ACCURACY(hDiracDecBin->ChEne[ch][bin], 2 * q - rsh0, hDiracDecBin->ChEne_fx[ch][bin], hDiracDecBin->ChEne_e[ch][bin], 4096); // WARNING } /* Store energy values and coefficients for next round */ @@ -1721,7 +1791,8 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( move64(); hDiracDecBin->ChCrossImPrev[bin] = hDiracDecBin->ChCrossIm[bin]; move64(); -#else +#endif +#ifdef NONBE_2157_VERIFY_ACCURACY hDiracDecBin->ChCrossRePrev_fx[bin] = hDiracDecBin->ChCrossRe_fx[bin]; move32(); hDiracDecBin->ChCrossImPrev_fx[bin] = hDiracDecBin->ChCrossIm_fx[bin]; @@ -1737,7 +1808,8 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES hDiracDecBin->ChEnePrev[ch][bin] = hDiracDecBin->ChEne[ch][bin]; move64(); -#else +#endif +#ifdef NONBE_2157_VERIFY_ACCURACY hDiracDecBin->ChEnePrev_fx[ch][bin] = hDiracDecBin->ChEne_fx[ch][bin]; move32(); hDiracDecBin->ChEnePrev_e[ch][bin] = hDiracDecBin->ChEne_e[ch][bin]; @@ -1746,25 +1818,27 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( } #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES +#ifndef NONBE_2157_VERIFY_ACCURACY Word16 shl; for ( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { shl = W_norm( hDiracDecBin->ChEne[ch][bin] ); hDiracDecBin->ChEne_fx[ch][bin] = W_extract_h( W_shl( hDiracDecBin->ChEne[ch][bin], shl ) ); - hDiracDecBin->ChEne_e[ch][bin] = sub( exp0, shl ); + hDiracDecBin->ChEne_e[ch][bin] = sub( e0, shl ); } shl = W_norm( hDiracDecBin->ChCrossRe[bin] ); hDiracDecBin->ChCrossRe_fx[bin] = W_extract_h( W_shl( hDiracDecBin->ChCrossRe[bin], shl ) ); - hDiracDecBin->ChCrossRe_e[bin] = sub( exp0, shl ); + hDiracDecBin->ChCrossRe_e[bin] = sub( e0, shl ); shl = W_norm( hDiracDecBin->ChCrossIm[bin] ); hDiracDecBin->ChCrossIm_fx[bin] = W_extract_h( W_shl( hDiracDecBin->ChCrossIm[bin], shl ) ); - hDiracDecBin->ChCrossIm_e[bin] = sub( exp0, shl ); + hDiracDecBin->ChCrossIm_e[bin] = sub( e0, shl ); shl = W_norm( subFrameTotalEne[bin] ); subFrameTotalEne_fx[bin] = W_extract_h( W_shl( subFrameTotalEne[bin], shl ) ); - subFrameTotalEne_e[bin] = sub( exp1, shl ); + subFrameTotalEne_e[bin] = sub( e1, shl ); +#endif #endif } -- GitLab From bf78de17a6da7104de0a2f3bfc6d12d8a78f1645 Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Wed, 29 Oct 2025 19:37:20 +0000 Subject: [PATCH 19/26] clang format. --- .../ivas_dirac_dec_binaural_functions_fx.c | 58 +++++++++---------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 1bc697b94..40fa61ca2 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -1366,37 +1366,37 @@ static Word64 Mpy_64_32( Word64 W_var1, Word32 L_var2 ) #define NONBE_2157_VERIFY_ACCURACY #ifdef NONBE_2157_VERIFY_ACCURACY -static void check(Word64 computed_fx, Word32 computed_q, Word64 expected_fx, Word32 expected_e, Word64 max_error); +static void check( Word64 computed_fx, Word32 computed_q, Word64 expected_fx, Word32 expected_e, Word64 max_error ); -static void check(Word64 computed_fx, Word32 computed_q, Word64 expected_fx, Word32 expected_e, Word64 max_error) +static void check( Word64 computed_fx, Word32 computed_q, Word64 expected_fx, Word32 expected_e, Word64 max_error ) { Word32 expected_q = 31 - expected_e; Word32 shift = expected_q - computed_q; - if (shift >= 0) + if ( shift >= 0 ) { expected_fx >>= shift; - } - else + } + else { computed_fx >>= -shift; } - Word64 error = llabs(expected_fx - computed_fx); - if (error > max_error) + Word64 error = llabs( expected_fx - computed_fx ); + if ( error > max_error ) { - printf("\n\n"); - printf("0x%016llx\n", computed_fx); - printf("%d\n", computed_q); - printf("0x%016llx\n", expected_fx); - printf("%d\n", expected_q); - assert(false); + printf( "\n\n" ); + printf( "0x%016llx\n", computed_fx ); + printf( "%d\n", computed_q ); + printf( "0x%016llx\n", expected_fx ); + printf( "%d\n", expected_q ); + assert( false ); } } -#define CHECK_ACCURACY(CFX, CQ, EFX, EE, ME) check(CFX, CQ, EFX, EE, ME) +#define CHECK_ACCURACY( CFX, CQ, EFX, EE, ME ) check( CFX, CQ, EFX, EE, ME ) #else -#define CHECK_ACCURACY(CFX, CQ, EFX, EE, ME) +#define CHECK_ACCURACY( CFX, CQ, EFX, EE, ME ) #endif static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( @@ -1538,8 +1538,8 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( move32(); move32(); #endif - CHECK_ACCURACY(hDiracDecBin->ChEne[ch][bin], 2 * q - rsh0, hDiracDecBin->ChEne_fx[ch][bin], hDiracDecBin->ChEne_e[ch][bin], 16); - CHECK_ACCURACY(subFrameTotalEne[bin], 2 * q - rsh1, subFrameTotalEne_fx[bin], subFrameTotalEne_e[bin], 16); + CHECK_ACCURACY( hDiracDecBin->ChEne[ch][bin], 2 * q - rsh0, hDiracDecBin->ChEne_fx[ch][bin], hDiracDecBin->ChEne_e[ch][bin], 16 ); + CHECK_ACCURACY( subFrameTotalEne[bin], 2 * q - rsh1, subFrameTotalEne_fx[bin], subFrameTotalEne_e[bin], 16 ); } temp64 = W_mult0_32_32( inRe_fx[0][slot][bin], inRe_fx[1][slot][bin] ); // 2q temp64 = W_add( temp64, W_mult0_32_32( inIm_fx[0][slot][bin], inIm_fx[1][slot][bin] ) ); // 2q @@ -1553,7 +1553,7 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( hDiracDecBin->ChCrossRe_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossRe_e[bin], temp, sub( exp, exp1 ), &hDiracDecBin->ChCrossRe_e[bin] ); move32(); #endif - CHECK_ACCURACY(hDiracDecBin->ChCrossRe[bin], 2 * q - rsh0, hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossRe_e[bin], 16); + CHECK_ACCURACY( hDiracDecBin->ChCrossRe[bin], 2 * q - rsh0, hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossRe_e[bin], 16 ); temp64 = W_mult0_32_32( inRe_fx[0][slot][bin], inIm_fx[1][slot][bin] ); // 2q temp64 = W_sub( temp64, W_mult0_32_32( inIm_fx[0][slot][bin], inRe_fx[1][slot][bin] ) ); // 2q @@ -1567,7 +1567,7 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( hDiracDecBin->ChCrossIm_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossIm_fx[bin], hDiracDecBin->ChCrossIm_e[bin], temp, sub( exp, exp1 ), &hDiracDecBin->ChCrossIm_e[bin] ); move32(); #endif - CHECK_ACCURACY(hDiracDecBin->ChCrossIm[bin], 2 * q - rsh0, hDiracDecBin->ChCrossIm_fx[bin], hDiracDecBin->ChCrossIm_e[bin], 16); + CHECK_ACCURACY( hDiracDecBin->ChCrossIm[bin], 2 * q - rsh0, hDiracDecBin->ChCrossIm_fx[bin], hDiracDecBin->ChCrossIm_e[bin], 16 ); } } @@ -1586,7 +1586,7 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( subFrameTotalEne_fx[bin] = Mpy_32_32( subFrameTotalEne_fx[bin], lowBitRateEQ_fx[bin] ); // exp = subFrameTotalEne_e[bin] move32(); #endif - CHECK_ACCURACY(subFrameTotalEne[bin], 2 * q - rsh1, subFrameTotalEne_fx[bin], subFrameTotalEne_e[bin], 16); + CHECK_ACCURACY( subFrameTotalEne[bin], 2 * q - rsh1, subFrameTotalEne_fx[bin], subFrameTotalEne_e[bin], 16 ); } FOR( ; bin < nBins; bin++ ) { @@ -1598,7 +1598,7 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( subFrameTotalEne_fx[bin] = Mpy_32_32( subFrameTotalEne_fx[bin], lowBitRateEQ_fx[lastEqBin] ); // exp = subFrameTotalEne_e[bin] move32(); #endif - CHECK_ACCURACY(subFrameTotalEne[bin], 2 * q - rsh1, subFrameTotalEne_fx[bin], subFrameTotalEne_e[bin], 16); + CHECK_ACCURACY( subFrameTotalEne[bin], 2 * q - rsh1, subFrameTotalEne_fx[bin], subFrameTotalEne_e[bin], 16 ); } } @@ -1638,7 +1638,7 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( subFrameSumEne_fx[bin] = BASOP_Util_Add_Mant32Exp( subFrameSumEne_fx[bin], subFrameSumEne_e[bin], temp, sub( exp /* 63 - 2q */, exp1 ) /*31 - (2q + exp1 - 32)*/, &subFrameSumEne_e[bin] ); move32(); #endif - CHECK_ACCURACY(subFrameSumEne[bin], 2 * q - rsh1, subFrameSumEne_fx[bin], subFrameSumEne_e[bin], 16); + CHECK_ACCURACY( subFrameSumEne[bin], 2 * q - rsh1, subFrameSumEne_fx[bin], subFrameSumEne_e[bin], 16 ); } } FOR( bin = 0; bin < nBins; bin++ ) @@ -1660,7 +1660,7 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( move16(); } #endif - CHECK_ACCURACY(subFrameTotalEne[bin], 2 * q - rsh1, subFrameTotalEne_fx[bin], subFrameTotalEne_e[bin], 16); + CHECK_ACCURACY( subFrameTotalEne[bin], 2 * q - rsh1, subFrameTotalEne_fx[bin], subFrameTotalEne_e[bin], 16 ); } } @@ -1741,8 +1741,8 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( move32(); move32(); #endif - CHECK_ACCURACY(hDiracDecBin->ChCrossRe[bin], 2 * q - rsh0, hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossRe_e[bin], 16); - CHECK_ACCURACY(hDiracDecBin->ChCrossIm[bin], 2 * q - rsh0, hDiracDecBin->ChCrossIm_fx[bin], hDiracDecBin->ChCrossIm_e[bin], 16); + CHECK_ACCURACY( hDiracDecBin->ChCrossRe[bin], 2 * q - rsh0, hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossRe_e[bin], 16 ); + CHECK_ACCURACY( hDiracDecBin->ChCrossIm[bin], 2 * q - rsh0, hDiracDecBin->ChCrossIm_fx[bin], hDiracDecBin->ChCrossIm_e[bin], 16 ); FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { @@ -1754,7 +1754,7 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( hDiracDecBin->ChEne_fx[ch][bin] = Mpy_32_32( hDiracDecBin->ChEne_fx[ch][bin], qualityBasedSmFactor_fx ); move32(); #endif - CHECK_ACCURACY(hDiracDecBin->ChEne[ch][bin], 2 * q - rsh0, hDiracDecBin->ChEne_fx[ch][bin], hDiracDecBin->ChEne_e[ch][bin], 16); + CHECK_ACCURACY( hDiracDecBin->ChEne[ch][bin], 2 * q - rsh0, hDiracDecBin->ChEne_fx[ch][bin], hDiracDecBin->ChEne_e[ch][bin], 16 ); } #ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES @@ -1769,8 +1769,8 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( move32(); move32(); #endif - CHECK_ACCURACY(hDiracDecBin->ChCrossRe[bin], 2 * q - rsh0, hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossRe_e[bin], 4096); // WARNING - CHECK_ACCURACY(hDiracDecBin->ChCrossIm[bin], 2 * q - rsh0, hDiracDecBin->ChCrossIm_fx[bin], hDiracDecBin->ChCrossIm_e[bin], 4096); // WARNING + CHECK_ACCURACY( hDiracDecBin->ChCrossRe[bin], 2 * q - rsh0, hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossRe_e[bin], 4096 ); // WARNING + CHECK_ACCURACY( hDiracDecBin->ChCrossIm[bin], 2 * q - rsh0, hDiracDecBin->ChCrossIm_fx[bin], hDiracDecBin->ChCrossIm_e[bin], 4096 ); // WARNING FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { @@ -1782,7 +1782,7 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( hDiracDecBin->ChEne_fx[ch][bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChEne_fx[ch][bin], hDiracDecBin->ChEne_e[ch][bin], Mpy_32_32( hDiracDecBin->ChEnePrev_fx[ch][bin], IIReneLimiter_fx[bin] ), hDiracDecBin->ChEnePrev_e[ch][bin], &hDiracDecBin->ChEne_e[ch][bin] ); move32(); #endif - CHECK_ACCURACY(hDiracDecBin->ChEne[ch][bin], 2 * q - rsh0, hDiracDecBin->ChEne_fx[ch][bin], hDiracDecBin->ChEne_e[ch][bin], 4096); // WARNING + CHECK_ACCURACY( hDiracDecBin->ChEne[ch][bin], 2 * q - rsh0, hDiracDecBin->ChEne_fx[ch][bin], hDiracDecBin->ChEne_e[ch][bin], 4096 ); // WARNING } /* Store energy values and coefficients for next round */ -- GitLab From 808792d0063c76616c61ef202eb8bb2a2f4387d1 Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Thu, 30 Oct 2025 16:11:22 +0000 Subject: [PATCH 20/26] Accuracy analysis. --- lib_com/options.h | 3 +- .../ivas_dirac_dec_binaural_functions_fx.c | 432 +++++++++++++----- lib_rend/ivas_stat_rend.h | 17 +- lib_rend/lib_rend_fx.c | 17 +- 4 files changed, 355 insertions(+), 114 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index 96fcb11f1..3e8105aed 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -142,7 +142,8 @@ /* #################### Start BASOP optimization switches ############################ */ -#define NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES /* Dolby: task 2157: optimize ivas_dirac_dec_binaural_formulate_input_covariance_matrices */ +#define NONBE_2157_INPUT_COV_MATRICES /* Dolby: task 2157: optimize ivas_dirac_dec_binaural_formulate_input_covariance_matrices */ +#define NONBE_2157_ACCURACY_ANALYSIS /* Dolby: task 2157: accuracy analysis */ /* #################### End BASOP optimization switches ############################ */ diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index d0442a2c4..7ebd111ab 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -98,7 +98,7 @@ static void ivas_dirac_dec_binaural_internal_fx( Decoder_Struct *st_ivas, COMBIN static void ivas_dirac_dec_decorrelate_slot_fx( DIRAC_DEC_BIN_HANDLE hDiracDecBin, const Word16 num_freq_bands, const Word16 slot, Word32 inRe[][CLDFB_SLOTS_PER_SUBFRAME] /*q_inp*/[CLDFB_NO_CHANNELS_MAX], Word32 inIm[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX] /*q_inp*/, const Word16 q_inp, Word32 decRe[][CLDFB_NO_CHANNELS_MAX], Word32 decIm[][CLDFB_NO_CHANNELS_MAX], Word16 *q_out ); -static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( DIRAC_DEC_BIN_HANDLE hDiracDecBin, SPAT_PARAM_REND_COMMON_DATA_HANDLE hSpatParamRendCom, PARAMBIN_REND_CONFIG_HANDLE hConfig, Word32 inRe[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], Word32 inIm[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], const Word16 subframe, Word64 *subFrameTotalEne, Word32 *subFrameTotalEne_fx, Word16 *subFrameTotalEne_e, Word32 *IIReneLimiter, const Word16 q ); +static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( DIRAC_DEC_BIN_HANDLE hDiracDecBin, SPAT_PARAM_REND_COMMON_DATA_HANDLE hSpatParamRendCom, PARAMBIN_REND_CONFIG_HANDLE hConfig, Word32 inRe[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], Word32 inIm[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], const Word16 subframe, Word32 *subFrameTotalEne_fx, Word16 *subFrameTotalEne_e, Word32 *IIReneLimiter, const Word16 q ); static void ivas_dirac_dec_binaural_formulate_target_covariance_matrices_fx( DIRAC_DEC_BIN_HANDLE hDiracDecBin, const SPAT_PARAM_REND_COMMON_DATA_HANDLE hSpatParamRendCom, const PARAMBIN_REND_CONFIG_HANDLE hConfig, Word32 Rmat[3][3], const Word16 subframe, const Word16 isHeadtracked, const Word32 *subFrameTotalEne, Word16 *subFrameTotalEne_e, const Word32 *IIReneLimiter, const MASA_ISM_DATA_HANDLE hMasaIsmData ); @@ -122,14 +122,6 @@ static void matrixMul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A static void matrixTransp2Mul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A, Word32 Bre[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word32 Bim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word16 *q_B, Word32 Ascale, Word32 Bscale, Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word16 *q_out ); -static UWord32 ceil_pow2( UWord32 x ); - -static UWord32 popcnt( UWord32 x ); - -static UWord32 ceil_log2( UWord32 x ); - -static Word64 Mpy_64_32( Word64 W_var1, Word32 L_var2 ); - /*------------------------------------------------------------------------- * ivas_dirac_dec_init_binaural_data() * @@ -204,7 +196,7 @@ ivas_error ivas_dirac_dec_init_binaural_data_fx( set16_fx( hDiracDecBin->processMtxDecRe_fx[j][k], 0, nBins ); set16_fx( hDiracDecBin->processMtxDecIm_fx[j][k], 0, nBins ); } -#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES +#ifdef NONBE_2157_INPUT_COV_MATRICES set64_fx( hDiracDecBin->ChEnePrev[j], 0, nBins ); #else set32_fx( hDiracDecBin->ChEnePrev_fx[j], 0, nBins ); @@ -213,7 +205,7 @@ ivas_error ivas_dirac_dec_init_binaural_data_fx( set32_fx( hDiracDecBin->ChEneOutPrev_fx[j], 0, nBins ); set16_fx( hDiracDecBin->ChEneOutPrev_e[j], 0, nBins ); } -#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES +#ifdef NONBE_2157_INPUT_COV_MATRICES set64_fx( hDiracDecBin->ChCrossRePrev, 0, nBins ); set64_fx( hDiracDecBin->ChCrossImPrev, 0, nBins ); #else @@ -698,7 +690,6 @@ static void ivas_dirac_dec_binaural_internal_fx( Word32 reverbIm_fx[BINAURAL_CHANNELS][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX]; Word32 decorrRe_fx[BINAURAL_CHANNELS][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX]; Word32 decorrIm_fx[BINAURAL_CHANNELS][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX]; - Word64 subFrameTotalEne[CLDFB_NO_CHANNELS_MAX]; Word32 subFrameTotalEne_fx[CLDFB_NO_CHANNELS_MAX]; Word16 subFrameTotalEne_e[CLDFB_NO_CHANNELS_MAX]; Word32 IIReneLimiter_fx[CLDFB_NO_CHANNELS_MAX]; @@ -1021,7 +1012,7 @@ static void ivas_dirac_dec_binaural_internal_fx( } ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( hDiracDecBin, hSpatParamRendCom, &config_data, Cldfb_RealBuffer_in_fx, Cldfb_ImagBuffer_in_fx, subframe, - subFrameTotalEne, subFrameTotalEne_fx, subFrameTotalEne_e, IIReneLimiter_fx, q_inp ); + subFrameTotalEne_fx, subFrameTotalEne_e, IIReneLimiter_fx, q_inp ); ivas_dirac_dec_binaural_formulate_target_covariance_matrices_fx( hDiracDecBin, hSpatParamRendCom, &config_data, Rmat_fx, subframe, hCombinedOrientationData && hCombinedOrientationData->enableCombinedOrientation[hCombinedOrientationData->subframe_idx] > 0, subFrameTotalEne_fx, subFrameTotalEne_e, IIReneLimiter_fx, st_ivas->hMasaIsmData ); @@ -1333,6 +1324,13 @@ static void ivas_dirac_dec_decorrelate_slot_fx( return; } +#ifdef NONBE_2157_INPUT_COV_MATRICES + +static UWord32 ceil_pow2( UWord32 x ); +static UWord32 popcnt( UWord32 x ); +static UWord32 ceil_log2( UWord32 x ); +static Word64 Mpy_64_32( Word64 W_var1, Word32 L_var2 ); + static UWord32 ceil_pow2( UWord32 x ) { --x; @@ -1379,40 +1377,33 @@ static Word64 Mpy_64_32( Word64 W_var1, Word32 L_var2 ) return var_out; } -#define NONBE_2157_VERIFY_ACCURACY +#ifdef NONBE_2157_ACCURACY_ANALYSIS +#include "string.h" -#ifdef NONBE_2157_VERIFY_ACCURACY -static void check( Word64 computed_fx, Word32 computed_q, Word64 expected_fx, Word32 expected_e, Word64 max_error ); +static double abserr( double computed, double expected ); +static double relerr( double computed, double expected ); -static void check( Word64 computed_fx, Word32 computed_q, Word64 expected_fx, Word32 expected_e, Word64 max_error ) +static double abserr( double computed, double expected ) { - Word32 expected_q = 31 - expected_e; - Word32 shift = expected_q - computed_q; + return fabs( computed - expected ); +} - if ( shift >= 0 ) +static double relerr( double computed, double expected ) +{ + if ( expected == 0.0 && computed == 0.0 ) { - expected_fx >>= shift; + return 0.0; } else { - computed_fx >>= -shift; - } - - Word64 error = llabs( expected_fx - computed_fx ); - if ( error > max_error ) - { - printf( "\n\n" ); - printf( "0x%016llx\n", computed_fx ); - printf( "%d\n", computed_q ); - printf( "0x%016llx\n", expected_fx ); - printf( "%d\n", expected_q ); - assert( false ); + return abserr( computed, expected ) / fabs( expected ); } } -#define CHECK_ACCURACY( CFX, CQ, EFX, EE, ME ) check( CFX, CQ, EFX, EE, ME ) +#define NONBE_2157_USE_CURRENT_VERSION +#endif #else -#define CHECK_ACCURACY( CFX, CQ, EFX, EE, ME ) +#define NONBE_2157_USE_CURRENT_VERSION #endif static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( @@ -1422,7 +1413,6 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( Word32 inRe_fx[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], /*q*/ Word32 inIm_fx[][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX], /*q*/ const Word16 subframe, - Word64 *subFrameTotalEne, Word32 *subFrameTotalEne_fx, Word16 *subFrameTotalEne_e, Word32 *IIReneLimiter_fx, @@ -1437,14 +1427,27 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( IVAS_FORMAT ivas_format; Word32 ivas_total_brate; Word16 nchan_transport; -#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES + Word64 temp64; +#ifdef NONBE_2157_INPUT_COV_MATRICES + Word64 subFrameTotalEne[CLDFB_NO_CHANNELS_MAX]; Word16 rsh0, rsh1, e0, e1; +#ifdef NONBE_2157_ACCURACY_ANALYSIS + double inRe[BINAURAL_CHANNELS][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX]; + double inIm[BINAURAL_CHANNELS][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX]; + float fSubFrameTotalEne[CLDFB_NO_CHANNELS_MAX]; + double dSubFrameTotalEne[CLDFB_NO_CHANNELS_MAX]; + float fIIReneLimiter[CLDFB_NO_CHANNELS_MAX]; + double dIIReneLimiter[CLDFB_NO_CHANNELS_MAX]; + Word32 IIReneLimiter[CLDFB_NO_CHANNELS_MAX]; +#else + Word32 *IIReneLimiter; + IIReneLimiter = IIReneLimiter_fx; +#endif #endif -#ifdef NONBE_2157_VERIFY_ACCURACY +#ifdef NONBE_2157_USE_CURRENT_VERSION Word16 exp, exp1; Word32 temp; #endif - Word64 temp64; ivas_format = hConfig->ivas_format; move32(); @@ -1459,11 +1462,40 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( nBins = hSpatParamRendCom->num_freq_bands; /* Actually bins */ move16(); -#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES +#ifdef NONBE_2157_ACCURACY_ANALYSIS + { + // CONVERT FIXED-POINT INPUTS INTO DOUBLE-PRECISION FLOATING-POINT + // The double-precision mantissa is big enough to hold the int32_t values. + double maxRe = 0.0; + double maxIm = 0.0; + for ( slot = 0; slot < hSpatParamRendCom->subframe_nbslots[subframe]; slot++ ) + { + for ( bin = 0; bin < nBins; bin++ ) + { + for ( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) + { + inRe[ch][slot][bin] = ldexp( (double) inRe_fx[ch][slot][bin], -q ); + maxRe = fmax( fabs( maxRe ), inRe[ch][slot][bin] ); + inIm[ch][slot][bin] = ldexp( (double) inIm_fx[ch][slot][bin], -q ); + maxIm = fmax( fabs( maxIm ), inIm[ch][slot][bin] ); + } + } + } + // printf("Abs max value: re:%+.6e, im:%+.6e\n", maxRe, maxIm); + } +#endif + +#ifdef NONBE_2157_INPUT_COV_MATRICES set64_fx( hDiracDecBin->ChCrossRe, 0, nBins ); set64_fx( hDiracDecBin->ChCrossIm, 0, nBins ); +#ifdef NONBE_2157_ACCURACY_ANALYSIS + memset( hDiracDecBin->fChCrossRe, 0, nBins * sizeof( float ) ); + memset( hDiracDecBin->fChCrossIm, 0, nBins * sizeof( float ) ); + memset( hDiracDecBin->dChCrossRe, 0, nBins * sizeof( double ) ); + memset( hDiracDecBin->dChCrossIm, 0, nBins * sizeof( double ) ); #endif -#ifdef NONBE_2157_VERIFY_ACCURACY +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION set32_fx( hDiracDecBin->ChCrossRe_fx, 0, nBins ); set32_fx( hDiracDecBin->ChCrossIm_fx, 0, nBins ); set16_fx( hDiracDecBin->ChCrossRe_e, 0, nBins ); @@ -1472,17 +1504,18 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { -#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES +#ifdef NONBE_2157_INPUT_COV_MATRICES set64_fx( hDiracDecBin->ChEne[ch], 0, nBins ); +#ifdef NONBE_2157_ACCURACY_ANALYSIS + memset( hDiracDecBin->fChEne[ch], 0, nBins * sizeof( float ) ); + memset( hDiracDecBin->dChEne[ch], 0, nBins * sizeof( double ) ); +#endif #endif -#ifdef NONBE_2157_VERIFY_ACCURACY +#ifdef NONBE_2157_USE_CURRENT_VERSION set32_fx( hDiracDecBin->ChEne_fx[ch], 0, nBins ); set16_fx( hDiracDecBin->ChEne_e[ch], 0, nBins ); #endif } -#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES - set16_fx( subFrameTotalEne_e, 0, CLDFB_NO_CHANNELS_MAX ); -#endif /* Determine EQ for low bit rates (13.2 and 16.4 kbps) */ applyLowBitRateEQ = 0; @@ -1512,20 +1545,27 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( } /* Formulate input and target covariance matrices for this subframe */ -#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES +#ifdef NONBE_2157_INPUT_COV_MATRICES set64_fx( subFrameTotalEne, 0, CLDFB_NO_CHANNELS_MAX ); +#ifdef NONBE_2157_ACCURACY_ANALYSIS + memset( fSubFrameTotalEne, 0, CLDFB_NO_CHANNELS_MAX * sizeof( float ) ); + memset( dSubFrameTotalEne, 0, CLDFB_NO_CHANNELS_MAX * sizeof( double ) ); + memset( fIIReneLimiter, 0, CLDFB_NO_CHANNELS_MAX * sizeof( float ) ); + memset( dIIReneLimiter, 0, CLDFB_NO_CHANNELS_MAX * sizeof( double ) ); #endif -#ifdef NONBE_2157_VERIFY_ACCURACY +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION set32_fx( subFrameTotalEne_fx, 0, CLDFB_NO_CHANNELS_MAX ); + set16_fx( subFrameTotalEne_e, 0, CLDFB_NO_CHANNELS_MAX ); #endif -#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES +#ifdef NONBE_2157_INPUT_COV_MATRICES rsh0 = (Word16) ( ceil_log2( hSpatParamRendCom->subframe_nbslots[subframe] ) - 1 ); rsh1 = (Word16) ( ceil_log2( BINAURAL_CHANNELS * hSpatParamRendCom->subframe_nbslots[subframe] ) - 1 ); e0 = add( sub( 63, shl( q, 1 ) ), rsh0 ); e1 = add( sub( 63, shl( q, 1 ) ), rsh1 ); #endif -#ifdef NONBE_2157_VERIFY_ACCURACY +#ifdef NONBE_2157_USE_CURRENT_VERSION exp = sub( 63, shl( q, 1 ) ); // exp for the energy (inRe_fx * inRe_fx + inIm_fx * inIm_fx) computed below #endif @@ -1538,13 +1578,29 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( { temp64 = W_mult0_32_32( inRe_fx[ch][slot][bin], inRe_fx[ch][slot][bin] ); // 2q temp64 = W_add( temp64, W_mult0_32_32( inIm_fx[ch][slot][bin], inIm_fx[ch][slot][bin] ) ); // 2q -#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES +#ifdef NONBE_2157_INPUT_COV_MATRICES + // OPTIMIZED FIXED-POINT IMPLEMENTATION hDiracDecBin->ChEne[ch][bin] = W_add( hDiracDecBin->ChEne[ch][bin], W_shr( temp64, rsh0 ) ); // 2q - rsh0 subFrameTotalEne[bin] = W_add( subFrameTotalEne[bin], W_shr( temp64, rsh1 ) ); // 2q - rsh1 move64(); move64(); +#ifdef NONBE_2157_ACCURACY_ANALYSIS + // SINGLE-PRECISION FLOATING-POINT IMPLEMENTATION + float fInstEne; + fInstEne = ( (float) inRe[ch][slot][bin] * (float) inRe[ch][slot][bin] ); + fInstEne += ( (float) inIm[ch][slot][bin] * (float) inIm[ch][slot][bin] ); + hDiracDecBin->fChEne[ch][bin] += fInstEne; + fSubFrameTotalEne[bin] += fInstEne; + // DOUBLE-PRECISION FLOATING-POINT IMPLEMENTATION + double dInstEne; + dInstEne = ( inRe[ch][slot][bin] * inRe[ch][slot][bin] ); + dInstEne += ( inIm[ch][slot][bin] * inIm[ch][slot][bin] ); + hDiracDecBin->dChEne[ch][bin] += dInstEne; + dSubFrameTotalEne[bin] += dInstEne; #endif -#ifdef NONBE_2157_VERIFY_ACCURACY +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION + // CURRENT FIXED-POINT IMPLEMENTATION Word32 instEne_fx; exp1 = W_norm( temp64 ); instEne_fx = W_extract_h( W_shl( temp64, exp1 ) ); // 2q - 32 + exp1 @@ -1554,36 +1610,52 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( move32(); move32(); #endif - CHECK_ACCURACY( hDiracDecBin->ChEne[ch][bin], 2 * q - rsh0, hDiracDecBin->ChEne_fx[ch][bin], hDiracDecBin->ChEne_e[ch][bin], 16 ); - CHECK_ACCURACY( subFrameTotalEne[bin], 2 * q - rsh1, subFrameTotalEne_fx[bin], subFrameTotalEne_e[bin], 16 ); } temp64 = W_mult0_32_32( inRe_fx[0][slot][bin], inRe_fx[1][slot][bin] ); // 2q temp64 = W_add( temp64, W_mult0_32_32( inIm_fx[0][slot][bin], inIm_fx[1][slot][bin] ) ); // 2q -#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES +#ifdef NONBE_2157_INPUT_COV_MATRICES + // OPTIMIZED FIXED-POINT IMPLEMENTATION hDiracDecBin->ChCrossRe[bin] = W_add( hDiracDecBin->ChCrossRe[bin], W_shr( temp64, rsh0 ) ); // 2q - rsh0 move64(); +#ifdef NONBE_2157_ACCURACY_ANALYSIS + // SINGLE-PRECISION FLOATING-POINT IMPLEMENTATION + hDiracDecBin->fChCrossRe[bin] += (float) inRe[0][slot][bin] * (float) inRe[1][slot][bin]; + hDiracDecBin->fChCrossRe[bin] += (float) inIm[0][slot][bin] * (float) inIm[1][slot][bin]; + // DOUBLE-PRECISION FLOATING-POINT IMPLEMENTATION + hDiracDecBin->dChCrossRe[bin] += inRe[0][slot][bin] * inRe[1][slot][bin]; + hDiracDecBin->dChCrossRe[bin] += inIm[0][slot][bin] * inIm[1][slot][bin]; +#endif #endif -#ifdef NONBE_2157_VERIFY_ACCURACY +#ifdef NONBE_2157_USE_CURRENT_VERSION + // CURRENT FIXED-POINT IMPLEMENTATION exp1 = W_norm( temp64 ); temp = W_extract_h( W_shl( temp64, exp1 ) ); // // 2q - 32 + exp1 hDiracDecBin->ChCrossRe_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossRe_e[bin], temp, sub( exp, exp1 ), &hDiracDecBin->ChCrossRe_e[bin] ); move32(); #endif - CHECK_ACCURACY( hDiracDecBin->ChCrossRe[bin], 2 * q - rsh0, hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossRe_e[bin], 16 ); temp64 = W_mult0_32_32( inRe_fx[0][slot][bin], inIm_fx[1][slot][bin] ); // 2q temp64 = W_sub( temp64, W_mult0_32_32( inIm_fx[0][slot][bin], inRe_fx[1][slot][bin] ) ); // 2q -#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES +#ifdef NONBE_2157_INPUT_COV_MATRICES + // OPTIMIZED FIXED-POINT IMPLEMENTATION hDiracDecBin->ChCrossIm[bin] = W_add( hDiracDecBin->ChCrossIm[bin], W_shr( temp64, rsh0 ) ); // 2q - rsh0 move64(); +#ifdef NONBE_2157_ACCURACY_ANALYSIS + // SINGLE-PRECISION FLOATING-POINT IMPLEMENTATION + hDiracDecBin->fChCrossIm[bin] += (float) inRe[0][slot][bin] * (float) inIm[1][slot][bin]; + hDiracDecBin->fChCrossIm[bin] -= (float) inIm[0][slot][bin] * (float) inRe[1][slot][bin]; + // DOUBLE-PRECISION FLOATING-POINT IMPLEMENTATION + hDiracDecBin->dChCrossIm[bin] += inRe[0][slot][bin] * inIm[1][slot][bin]; + hDiracDecBin->dChCrossIm[bin] -= inIm[0][slot][bin] * inRe[1][slot][bin]; #endif -#ifdef NONBE_2157_VERIFY_ACCURACY +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION + // CURRENT FIXED-POINT IMPLEMENTATION exp1 = W_norm( temp64 ); temp = W_extract_h( W_shl( temp64, exp1 ) ); // // 2q - 32 + exp1 hDiracDecBin->ChCrossIm_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossIm_fx[bin], hDiracDecBin->ChCrossIm_e[bin], temp, sub( exp, exp1 ), &hDiracDecBin->ChCrossIm_e[bin] ); move32(); #endif - CHECK_ACCURACY( hDiracDecBin->ChCrossIm[bin], 2 * q - rsh0, hDiracDecBin->ChCrossIm_fx[bin], hDiracDecBin->ChCrossIm_e[bin], 16 ); } } @@ -1594,27 +1666,25 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( FOR( bin = LOW_BIT_RATE_BINAURAL_EQ_OFFSET; bin < lastEqBin; bin++ ) { -#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES +#ifdef NONBE_2157_INPUT_COV_MATRICES subFrameTotalEne[bin] = Mpy_64_32( subFrameTotalEne[bin], lowBitRateEQ_fx[bin] ); move64(); #endif -#ifdef NONBE_2157_VERIFY_ACCURACY +#ifdef NONBE_2157_USE_CURRENT_VERSION subFrameTotalEne_fx[bin] = Mpy_32_32( subFrameTotalEne_fx[bin], lowBitRateEQ_fx[bin] ); // exp = subFrameTotalEne_e[bin] move32(); #endif - CHECK_ACCURACY( subFrameTotalEne[bin], 2 * q - rsh1, subFrameTotalEne_fx[bin], subFrameTotalEne_e[bin], 16 ); } FOR( ; bin < nBins; bin++ ) { -#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES +#ifdef NONBE_2157_INPUT_COV_MATRICES subFrameTotalEne[bin] = Mpy_64_32( subFrameTotalEne[bin], lowBitRateEQ_fx[lastEqBin] ); move64(); #endif -#ifdef NONBE_2157_VERIFY_ACCURACY +#ifdef NONBE_2157_USE_CURRENT_VERSION subFrameTotalEne_fx[bin] = Mpy_32_32( subFrameTotalEne_fx[bin], lowBitRateEQ_fx[lastEqBin] ); // exp = subFrameTotalEne_e[bin] move32(); #endif - CHECK_ACCURACY( subFrameTotalEne[bin], 2 * q - rsh1, subFrameTotalEne_fx[bin], subFrameTotalEne_e[bin], 16 ); } } @@ -1623,17 +1693,17 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( IF( ( EQ_32( ivas_format, SBA_FORMAT ) || EQ_32( ivas_format, SBA_ISM_FORMAT ) ) && EQ_16( nchan_transport, 2 ) ) { Word32 tempRe, tempIm; -#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES +#ifdef NONBE_2157_INPUT_COV_MATRICES Word64 subFrameSumEne[CLDFB_NO_CHANNELS_MAX]; #endif -#ifdef NONBE_2157_VERIFY_ACCURACY +#ifdef NONBE_2157_USE_CURRENT_VERSION Word32 subFrameSumEne_fx[CLDFB_NO_CHANNELS_MAX]; Word16 subFrameSumEne_e[CLDFB_NO_CHANNELS_MAX]; #endif -#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES +#ifdef NONBE_2157_INPUT_COV_MATRICES set64_fx( subFrameSumEne, 0, CLDFB_NO_CHANNELS_MAX ); #endif -#ifdef NONBE_2157_VERIFY_ACCURACY +#ifdef NONBE_2157_USE_CURRENT_VERSION set32_fx( subFrameSumEne_fx, 0, CLDFB_NO_CHANNELS_MAX ); set16_fx( subFrameSumEne_e, 0, CLDFB_NO_CHANNELS_MAX ); #endif @@ -1644,27 +1714,25 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( tempRe = L_add( inRe_fx[0][slot][bin], inRe_fx[1][slot][bin] ); // q tempIm = L_add( inIm_fx[0][slot][bin], inIm_fx[1][slot][bin] ); // q temp64 = W_add( W_mult0_32_32( tempRe, tempRe ), W_mult0_32_32( tempIm, tempIm ) ); // 2q -#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES +#ifdef NONBE_2157_INPUT_COV_MATRICES subFrameSumEne[bin] = W_add( subFrameSumEne[bin], W_shr( temp64, rsh1 ) ); move64(); #endif -#ifdef NONBE_2157_VERIFY_ACCURACY +#ifdef NONBE_2157_USE_CURRENT_VERSION exp1 = W_norm( temp64 ); temp = W_extract_h( W_shl( temp64, exp1 ) ); // 2q + exp1 subFrameSumEne_fx[bin] = BASOP_Util_Add_Mant32Exp( subFrameSumEne_fx[bin], subFrameSumEne_e[bin], temp, sub( exp /* 63 - 2q */, exp1 ) /*31 - (2q + exp1 - 32)*/, &subFrameSumEne_e[bin] ); move32(); #endif - CHECK_ACCURACY( subFrameSumEne[bin], 2 * q - rsh1, subFrameSumEne_fx[bin], subFrameSumEne_e[bin], 16 ); } } FOR( bin = 0; bin < nBins; bin++ ) { -#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES +#ifdef NONBE_2157_INPUT_COV_MATRICES subFrameTotalEne[bin] = W_max( subFrameSumEne[bin], subFrameTotalEne[bin] ); move64(); #endif - -#ifdef NONBE_2157_VERIFY_ACCURACY +#ifdef NONBE_2157_USE_CURRENT_VERSION subFrameTotalEne_e[bin] = sub( subFrameTotalEne_e[bin], 1 ); move16(); temp = L_shl_sat( subFrameTotalEne_fx[bin], sub( subFrameTotalEne_e[bin], subFrameSumEne_e[bin] ) ); // subFrameSumEne_e[bin] @@ -1676,7 +1744,6 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( move16(); } #endif - CHECK_ACCURACY( subFrameTotalEne[bin], 2 * q - rsh1, subFrameTotalEne_fx[bin], subFrameTotalEne_e[bin], 16 ); } } @@ -1691,6 +1758,11 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( IIReneLimiterFactor_fx = L_add( L_shl( 8, Q26 ), L_sub( L_shl( 1, Q26 ), L_shr( qualityBasedSmFactor_fx, 5 ) ) ); // Q26 } +#ifdef NONBE_2157_INPUT_COV_MATRICES + double dQualityBasedSmFactor = ldexp( (double) qualityBasedSmFactor_fx, -31 ); + double dIIReneLimiterFactor = ldexp( (double) IIReneLimiterFactor_fx, -26 ); +#endif + FOR( bin = 0; bin < nBins; bin++ ) { /* Temporally smooth cov mtx estimates for resulting mixing matrix stability. The design principle is that @@ -1698,7 +1770,8 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( * robust performance at energy offsets when compared to typical IIR averaging. */ Word16 num_e, den_e; Word32 num, den; -#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES +#ifdef NONBE_2157_INPUT_COV_MATRICES + // OPTIMIZED FIXED-POINT IMPLEMENTATION Word16 exp; Word64 num64, den64; num64 = W_add( hDiracDecBin->ChEne[0][bin], hDiracDecBin->ChEne[1][bin] ); // 2q - rsh0 @@ -1709,19 +1782,29 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( den_e = W_norm( den64 ); num = W_extract_h( W_shl( num64, num_e ) ); // 2q - rsh0 - 5 + num_e - 32 den = W_extract_h( W_shl( den64, den_e ) ); // 2q - rsh0 + den_e - 32 - IIReneLimiter_fx[bin] = BASOP_Util_Divide3232_Scale_newton( num, den, &exp ); + IIReneLimiter[bin] = BASOP_Util_Divide3232_Scale_newton( num, den, &exp ); exp = add( sub( den_e, num_e ), add( 5, exp ) ); - IF( L_shr_sat( IIReneLimiter_fx[bin], sub( 31, exp ) ) > 0 ) + IF( L_shr_sat( IIReneLimiter[bin], sub( 31, exp ) ) > 0 ) { - IIReneLimiter_fx[bin] = ONE_IN_Q31; /*Q31*/ + IIReneLimiter[bin] = ONE_IN_Q31; /*Q31*/ move32(); } ELSE { - IIReneLimiter_fx[bin] = L_shl( IIReneLimiter_fx[bin], exp ); /*Q31*/ + IIReneLimiter[bin] = L_shl( IIReneLimiter[bin], exp ); /*Q31*/ } +#ifdef NONBE_2157_ACCURACY_ANALYSIS + // SINGLE-PRECISION FLOATING-POINT IMPLEMENTATION + float eneRatio; + eneRatio = (float) ( ( hDiracDecBin->fChEne[0][bin] + hDiracDecBin->fChEne[1][bin] ) * (float) dIIReneLimiterFactor ) / (float) fmaxf( 1e-12f, ( hDiracDecBin->fChEnePrev[0][bin] + hDiracDecBin->fChEnePrev[1][bin] ) ); + fIIReneLimiter[bin] = fminf( 1.0f, eneRatio ); + // DOUBLE-PRECISION FLOATING-POINT IMPLEMENTATION + eneRatio = (float) ( ( hDiracDecBin->dChEne[0][bin] + hDiracDecBin->dChEne[1][bin] ) * dIIReneLimiterFactor ) / (float) fmax( 1e-12, ( hDiracDecBin->dChEnePrev[0][bin] + hDiracDecBin->dChEnePrev[1][bin] ) ); + dIIReneLimiter[bin] = fminf( 1.0f, eneRatio ); #endif -#ifdef NONBE_2157_VERIFY_ACCURACY +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION + // CURRENT FIXED-POINT IMPLEMENTATION num = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChEne_fx[0][bin], hDiracDecBin->ChEne_e[0][bin], hDiracDecBin->ChEne_fx[1][bin], hDiracDecBin->ChEne_e[1][bin], &num_e ); num = Mpy_32_32( num, IIReneLimiterFactor_fx ); /*Q = (31 - num_e + 26 - 31) = (26 - num_e)*/ den_e = 0; @@ -1745,15 +1828,25 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( } #endif -#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES +#ifdef NONBE_2157_INPUT_COV_MATRICES + // OPTIMIZED FIXED-POINT IMPLEMENTATION hDiracDecBin->ChCrossRe[bin] = Mpy_64_32( hDiracDecBin->ChCrossRe[bin], qualityBasedSmFactor_fx ); // 2q - rsh0 hDiracDecBin->ChCrossIm[bin] = Mpy_64_32( hDiracDecBin->ChCrossIm[bin], qualityBasedSmFactor_fx ); // 2q - rsh0 move64(); move64(); move64(); move64(); +#ifdef NONBE_2157_ACCURACY_ANALYSIS + // SINGLE-PRECISION FLOATING-POINT IMPLEMENTATION + hDiracDecBin->fChCrossRe[bin] *= (float) dQualityBasedSmFactor; + hDiracDecBin->fChCrossIm[bin] *= (float) dQualityBasedSmFactor; + // DOUBLE-PRECISION FLOATING-POINT IMPLEMENTATION + hDiracDecBin->dChCrossRe[bin] *= dQualityBasedSmFactor; + hDiracDecBin->dChCrossIm[bin] *= dQualityBasedSmFactor; +#endif #endif -#ifdef NONBE_2157_VERIFY_ACCURACY +#ifdef NONBE_2157_USE_CURRENT_VERSION + // CURRENT FIXED-POINT IMPLEMENTATION hDiracDecBin->ChCrossRe_fx[bin] = Mpy_32_32( hDiracDecBin->ChCrossRe_fx[bin], qualityBasedSmFactor_fx ); hDiracDecBin->ChCrossIm_fx[bin] = Mpy_32_32( hDiracDecBin->ChCrossIm_fx[bin], qualityBasedSmFactor_fx ); move32(); @@ -1761,58 +1854,88 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( move32(); move32(); #endif - CHECK_ACCURACY( hDiracDecBin->ChCrossRe[bin], 2 * q - rsh0, hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossRe_e[bin], 16 ); - CHECK_ACCURACY( hDiracDecBin->ChCrossIm[bin], 2 * q - rsh0, hDiracDecBin->ChCrossIm_fx[bin], hDiracDecBin->ChCrossIm_e[bin], 16 ); FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { -#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES +#ifdef NONBE_2157_INPUT_COV_MATRICES + // OPTIMIZED FIXED-POINT IMPLEMENTATION hDiracDecBin->ChEne[ch][bin] = Mpy_64_32( hDiracDecBin->ChEne[ch][bin], qualityBasedSmFactor_fx ); move64(); +#ifdef NONBE_2157_ACCURACY_ANALYSIS + // SINGLE-PRECISION FLOATING-POINT IMPLEMENTATION + hDiracDecBin->fChEne[ch][bin] *= (float) dQualityBasedSmFactor; + // DOUBLE-PRECISION FLOATING-POINT IMPLEMENTATION + hDiracDecBin->dChEne[ch][bin] *= dQualityBasedSmFactor; #endif -#ifdef NONBE_2157_VERIFY_ACCURACY +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION + // CURRENT FIXED-POINT IMPLEMENTATION hDiracDecBin->ChEne_fx[ch][bin] = Mpy_32_32( hDiracDecBin->ChEne_fx[ch][bin], qualityBasedSmFactor_fx ); move32(); #endif - CHECK_ACCURACY( hDiracDecBin->ChEne[ch][bin], 2 * q - rsh0, hDiracDecBin->ChEne_fx[ch][bin], hDiracDecBin->ChEne_e[ch][bin], 16 ); } -#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES - hDiracDecBin->ChCrossRe[bin] = W_add( hDiracDecBin->ChCrossRe[bin], Mpy_64_32( hDiracDecBin->ChCrossRePrev[bin], IIReneLimiter_fx[bin] ) ); - hDiracDecBin->ChCrossIm[bin] = W_add( hDiracDecBin->ChCrossIm[bin], Mpy_64_32( hDiracDecBin->ChCrossImPrev[bin], IIReneLimiter_fx[bin] ) ); +#ifdef NONBE_2157_INPUT_COV_MATRICES + // OPTIMIZED FIXED-POINT IMPLEMENTATION + hDiracDecBin->ChCrossRe[bin] = W_add( hDiracDecBin->ChCrossRe[bin], Mpy_64_32( hDiracDecBin->ChCrossRePrev[bin], IIReneLimiter[bin] ) ); + hDiracDecBin->ChCrossIm[bin] = W_add( hDiracDecBin->ChCrossIm[bin], Mpy_64_32( hDiracDecBin->ChCrossImPrev[bin], IIReneLimiter[bin] ) ); move64(); move64(); +#ifdef NONBE_2157_ACCURACY_ANALYSIS + // SINGLE-PRECISION FLOATING-POINT IMPLEMENTATION + hDiracDecBin->fChCrossRe[bin] += fIIReneLimiter[bin] * hDiracDecBin->fChCrossRePrev[bin]; + hDiracDecBin->fChCrossIm[bin] += fIIReneLimiter[bin] * hDiracDecBin->fChCrossImPrev[bin]; + // DOUBLE-PRECISION FLOATING-POINT IMPLEMENTATION + hDiracDecBin->dChCrossRe[bin] += dIIReneLimiter[bin] * hDiracDecBin->dChCrossRePrev[bin]; + hDiracDecBin->dChCrossIm[bin] += dIIReneLimiter[bin] * hDiracDecBin->dChCrossImPrev[bin]; +#endif #endif -#ifdef NONBE_2157_VERIFY_ACCURACY +#ifdef NONBE_2157_USE_CURRENT_VERSION + // CURRENT FIXED-POINT IMPLEMENTATION hDiracDecBin->ChCrossRe_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossRe_e[bin], Mpy_32_32( hDiracDecBin->ChCrossRePrev_fx[bin], IIReneLimiter_fx[bin] ), hDiracDecBin->ChCrossRePrev_e[bin], &hDiracDecBin->ChCrossRe_e[bin] ); hDiracDecBin->ChCrossIm_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossIm_fx[bin], hDiracDecBin->ChCrossIm_e[bin], Mpy_32_32( hDiracDecBin->ChCrossImPrev_fx[bin], IIReneLimiter_fx[bin] ), hDiracDecBin->ChCrossImPrev_e[bin], &hDiracDecBin->ChCrossIm_e[bin] ); move32(); move32(); #endif - CHECK_ACCURACY( hDiracDecBin->ChCrossRe[bin], 2 * q - rsh0, hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossRe_e[bin], 4096 ); // WARNING - CHECK_ACCURACY( hDiracDecBin->ChCrossIm[bin], 2 * q - rsh0, hDiracDecBin->ChCrossIm_fx[bin], hDiracDecBin->ChCrossIm_e[bin], 4096 ); // WARNING FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { -#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES - hDiracDecBin->ChEne[ch][bin] = W_add( hDiracDecBin->ChEne[ch][bin], Mpy_64_32( hDiracDecBin->ChEnePrev[ch][bin], IIReneLimiter_fx[bin] ) ); +#ifdef NONBE_2157_INPUT_COV_MATRICES + // OPTIMIZED FIXED-POINT IMPLEMENTATION + hDiracDecBin->ChEne[ch][bin] = W_add( hDiracDecBin->ChEne[ch][bin], Mpy_64_32( hDiracDecBin->ChEnePrev[ch][bin], IIReneLimiter[bin] ) ); move64(); +#ifdef NONBE_2157_ACCURACY_ANALYSIS + // SINGLE-PRECISION FLOATING-POINT IMPLEMENTATION + hDiracDecBin->fChEne[ch][bin] += fIIReneLimiter[bin] * hDiracDecBin->fChEnePrev[ch][bin]; + // DOUBLE-PRECISION FLOATING-POINT IMPLEMENTATION + hDiracDecBin->dChEne[ch][bin] += dIIReneLimiter[bin] * hDiracDecBin->dChEnePrev[ch][bin]; #endif -#ifdef NONBE_2157_VERIFY_ACCURACY +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION + // CURRENT FIXED-POINT IMPLEMENTATION hDiracDecBin->ChEne_fx[ch][bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChEne_fx[ch][bin], hDiracDecBin->ChEne_e[ch][bin], Mpy_32_32( hDiracDecBin->ChEnePrev_fx[ch][bin], IIReneLimiter_fx[bin] ), hDiracDecBin->ChEnePrev_e[ch][bin], &hDiracDecBin->ChEne_e[ch][bin] ); move32(); #endif - CHECK_ACCURACY( hDiracDecBin->ChEne[ch][bin], 2 * q - rsh0, hDiracDecBin->ChEne_fx[ch][bin], hDiracDecBin->ChEne_e[ch][bin], 4096 ); // WARNING } /* Store energy values and coefficients for next round */ -#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES +#ifdef NONBE_2157_INPUT_COV_MATRICES + // OPTIMIZED FIXED-POINT IMPLEMENTATION hDiracDecBin->ChCrossRePrev[bin] = hDiracDecBin->ChCrossRe[bin]; move64(); hDiracDecBin->ChCrossImPrev[bin] = hDiracDecBin->ChCrossIm[bin]; move64(); +#ifdef NONBE_2157_ACCURACY_ANALYSIS + // SINGLE-PRECISION FLOATING-POINT IMPLEMENTATION + hDiracDecBin->fChCrossRePrev[bin] = hDiracDecBin->fChCrossRe[bin]; + hDiracDecBin->fChCrossRePrev[bin] = hDiracDecBin->fChCrossRe[bin]; + // DOUBLE-PRECISION FLOATING-POINT IMPLEMENTATION + hDiracDecBin->dChCrossRePrev[bin] = hDiracDecBin->dChCrossRe[bin]; + hDiracDecBin->dChCrossRePrev[bin] = hDiracDecBin->dChCrossRe[bin]; +#endif #endif -#ifdef NONBE_2157_VERIFY_ACCURACY +#ifdef NONBE_2157_USE_CURRENT_VERSION + // CURRENT FIXED-POINT IMPLEMENTATION hDiracDecBin->ChCrossRePrev_fx[bin] = hDiracDecBin->ChCrossRe_fx[bin]; move32(); hDiracDecBin->ChCrossImPrev_fx[bin] = hDiracDecBin->ChCrossIm_fx[bin]; @@ -1825,11 +1948,19 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { -#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES +#ifdef NONBE_2157_INPUT_COV_MATRICES + // OPTIMIZED FIXED-POINT IMPLEMENTATION hDiracDecBin->ChEnePrev[ch][bin] = hDiracDecBin->ChEne[ch][bin]; move64(); +#ifdef NONBE_2157_ACCURACY_ANALYSIS + // SINGLE-PRECISION FLOATING-POINT IMPLEMENTATION + hDiracDecBin->fChEnePrev[ch][bin] = hDiracDecBin->fChEne[ch][bin]; + // DOUBLE-PRECISION FLOATING-POINT IMPLEMENTATION + hDiracDecBin->dChEnePrev[ch][bin] = hDiracDecBin->dChEne[ch][bin]; #endif -#ifdef NONBE_2157_VERIFY_ACCURACY +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION + // CURRENT FIXED-POINT IMPLEMENTATION hDiracDecBin->ChEnePrev_fx[ch][bin] = hDiracDecBin->ChEne_fx[ch][bin]; move32(); hDiracDecBin->ChEnePrev_e[ch][bin] = hDiracDecBin->ChEne_e[ch][bin]; @@ -1837,8 +1968,9 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( #endif } -#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES -#ifndef NONBE_2157_VERIFY_ACCURACY +#ifdef NONBE_2157_INPUT_COV_MATRICES +#ifndef NONBE_2157_ACCURACY_ANALYSIS + // OPTIMIZED FIXED-POINT IMPLEMENTATION Word16 shl; for ( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { @@ -1862,6 +1994,91 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( #endif } + +#ifdef NONBE_2157_ACCURACY_ANALYSIS + static int frame_number = 0; + printf( "Frame %d analysis\n", frame_number ); + { + double fc_max_abserr = 0.0; // fixed-point current + double fo_max_abserr = 0.0; // fixed-point optimized + double fs_max_abserr = 0.0; // floating-point single + for ( bin = 0; bin < nBins; bin++ ) + { + double fc = ldexp( (double) hDiracDecBin->ChEne_fx[0][bin], hDiracDecBin->ChEne_e[0][bin] - 31 ); + double fo = ldexp( (double) hDiracDecBin->ChEne[0][bin], -( 2 * q - rsh0 ) ); + double fs = hDiracDecBin->fChEne[0][bin]; + double fd = hDiracDecBin->dChEne[0][bin]; + fc_max_abserr = fmax( fc_max_abserr, fabs( fd - fc ) ); + fo_max_abserr = fmax( fo_max_abserr, fabs( fd - fo ) ); + fs_max_abserr = fmax( fs_max_abserr, fabs( fd - fs ) ); + } + printf( "ChEne0 analysis - Max Absolute Error\n" ); + printf( " - fixed-point current: %+.6e\n", fc_max_abserr ); + printf( " - fixed-point optimized: %+.6e\n", fo_max_abserr ); + printf( " - floating-point single: %+.6e\n", fs_max_abserr ); + } + { + double fc_max_abserr = 0.0; // fixed-point current + double fo_max_abserr = 0.0; // fixed-point optimized + double fs_max_abserr = 0.0; // floating-point single + for ( bin = 0; bin < nBins; bin++ ) + { + double fc = ldexp( (double) hDiracDecBin->ChEne_fx[1][bin], hDiracDecBin->ChEne_e[1][bin] - 31 ); + double fo = ldexp( (double) hDiracDecBin->ChEne[1][bin], -( 2 * q - rsh0 ) ); + double fs = hDiracDecBin->fChEne[1][bin]; + double fd = hDiracDecBin->dChEne[1][bin]; + fc_max_abserr = fmax( fc_max_abserr, fabs( fd - fc ) ); + fo_max_abserr = fmax( fo_max_abserr, fabs( fd - fo ) ); + fs_max_abserr = fmax( fs_max_abserr, fabs( fd - fs ) ); + } + printf( "ChEne1 analysis - Max Absolute Error\n" ); + printf( " - fixed-point current: %+.6e\n", fc_max_abserr ); + printf( " - fixed-point optimized: %+.6e\n", fo_max_abserr ); + printf( " - floating-point single: %+.6e\n", fs_max_abserr ); + } + { + double fc_max_abserr = 0.0; // fixed-point current + double fo_max_abserr = 0.0; // fixed-point optimized + double fs_max_abserr = 0.0; // floating-point single + for ( bin = 0; bin < nBins; bin++ ) + { + double fc = ldexp( (double) subFrameTotalEne_fx[bin], subFrameTotalEne_e[bin] - 31 ); + double fo = ldexp( (double) subFrameTotalEne[bin], -( 2 * q - rsh1 ) ); + double fs = fSubFrameTotalEne[bin]; + double fd = dSubFrameTotalEne[bin]; + fc_max_abserr = fmax( fc_max_abserr, fabs( fd - fc ) ); + fo_max_abserr = fmax( fo_max_abserr, fabs( fd - fo ) ); + fs_max_abserr = fmax( fs_max_abserr, fabs( fd - fs ) ); + } + printf( "SubFrameTotalEne analysis - Max Absolute Error\n" ); + printf( " - fixed-point current: %+.6e\n", fc_max_abserr ); + printf( " - fixed-point optimized: %+.6e\n", fo_max_abserr ); + printf( " - floating-point single: %+.6e\n", fs_max_abserr ); + } + { + double fc_max_abserr = 0.0; // fixed-point current + double fo_max_abserr = 0.0; // fixed-point optimized + double fs_max_abserr = 0.0; // floating-point single + for ( bin = 0; bin < nBins; bin++ ) + { + double fc = ldexp( (double) IIReneLimiter_fx[bin], -31 ); + double fo = ldexp( (double) IIReneLimiter[bin], -31 ); + double fs = fIIReneLimiter[bin]; + double fd = dIIReneLimiter[bin]; + fc_max_abserr = fmax( fc_max_abserr, fabs( fd - fc ) ); + fo_max_abserr = fmax( fo_max_abserr, fabs( fd - fo ) ); + fs_max_abserr = fmax( fs_max_abserr, fabs( fd - fs ) ); + } + printf( "IIReneLimiter analysis - Max Absolute Error\n" ); + printf( " - fixed-point current: %+.6e\n", fc_max_abserr ); + printf( " - fixed-point optimized: %+.6e\n", fo_max_abserr ); + printf( " - floating-point single: %+.6e\n", fs_max_abserr ); + } + + ++frame_number; +#endif + + return; } @@ -6773,7 +6990,6 @@ static void ivas_masa_ext_rend_parambin_internal_fx( Word32 reverbIm_fx[BINAURAL_CHANNELS][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX]; Word32 decorrRe_fx[BINAURAL_CHANNELS][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX]; Word32 decorrIm_fx[BINAURAL_CHANNELS][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX]; - Word64 subFrameTotalEne[CLDFB_NO_CHANNELS_MAX]; Word32 subFrameTotalEne_fx[CLDFB_NO_CHANNELS_MAX]; Word16 subFrameTotalEne_e[CLDFB_NO_CHANNELS_MAX]; Word32 IIReneLimiter_fx[CLDFB_NO_CHANNELS_MAX]; @@ -6909,7 +7125,7 @@ static void ivas_masa_ext_rend_parambin_internal_fx( max_band_decorr = hDiracDecBin->h_freq_domain_decorr_ap_params->max_band_decorr; ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( hDiracDecBin, hSpatParamRendCom, &config_data, Cldfb_RealBuffer_in_fx, Cldfb_ImagBuffer_in_fx, subframe, - subFrameTotalEne, subFrameTotalEne_fx, subFrameTotalEne_e, IIReneLimiter_fx, q_inp ); + subFrameTotalEne_fx, subFrameTotalEne_e, IIReneLimiter_fx, q_inp ); ivas_dirac_dec_binaural_formulate_target_covariance_matrices_fx( hDiracDecBin, hSpatParamRendCom, &config_data, Rmat_fx, subframe, hCombinedOrientationData && hCombinedOrientationData->enableCombinedOrientation[hCombinedOrientationData->subframe_idx] > 0, subFrameTotalEne_fx, subFrameTotalEne_e, IIReneLimiter_fx, NULL ); diff --git a/lib_rend/ivas_stat_rend.h b/lib_rend/ivas_stat_rend.h index ed4b4a230..b0344ab3a 100644 --- a/lib_rend/ivas_stat_rend.h +++ b/lib_rend/ivas_stat_rend.h @@ -612,13 +612,28 @@ typedef struct ivas_dirac_dec_binaural_data_structure Word16 q_processMtxDec; Word16 q_processMtxDecPrev; -#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES +#ifdef NONBE_2157_INPUT_COV_MATRICES Word64 ChEne[BINAURAL_CHANNELS][CLDFB_NO_CHANNELS_MAX]; Word64 ChEnePrev[BINAURAL_CHANNELS][CLDFB_NO_CHANNELS_MAX]; Word64 ChCrossRe[CLDFB_NO_CHANNELS_MAX]; Word64 ChCrossRePrev[CLDFB_NO_CHANNELS_MAX]; Word64 ChCrossIm[CLDFB_NO_CHANNELS_MAX]; Word64 ChCrossImPrev[CLDFB_NO_CHANNELS_MAX]; +#ifdef NONBE_2157_ACCURACY_ANALYSIS + float fChEne[BINAURAL_CHANNELS][CLDFB_NO_CHANNELS_MAX]; + float fChEnePrev[BINAURAL_CHANNELS][CLDFB_NO_CHANNELS_MAX]; + float fChCrossRe[CLDFB_NO_CHANNELS_MAX]; + float fChCrossRePrev[CLDFB_NO_CHANNELS_MAX]; + float fChCrossIm[CLDFB_NO_CHANNELS_MAX]; + float fChCrossImPrev[CLDFB_NO_CHANNELS_MAX]; + + double dChEne[BINAURAL_CHANNELS][CLDFB_NO_CHANNELS_MAX]; + double dChEnePrev[BINAURAL_CHANNELS][CLDFB_NO_CHANNELS_MAX]; + double dChCrossRe[CLDFB_NO_CHANNELS_MAX]; + double dChCrossRePrev[CLDFB_NO_CHANNELS_MAX]; + double dChCrossIm[CLDFB_NO_CHANNELS_MAX]; + double dChCrossImPrev[CLDFB_NO_CHANNELS_MAX]; +#endif #endif Word32 ChEnePrev_fx[BINAURAL_CHANNELS][CLDFB_NO_CHANNELS_MAX]; diff --git a/lib_rend/lib_rend_fx.c b/lib_rend/lib_rend_fx.c index 3ddf1d06e..728ddf0b7 100644 --- a/lib_rend/lib_rend_fx.c +++ b/lib_rend/lib_rend_fx.c @@ -9924,6 +9924,7 @@ static ivas_error ivas_masa_ext_rend_dirac_rend_init( return error; } +#include "string.h" static ivas_error ivas_masa_ext_rend_parambin_init( input_masa *inputMasa, /* i/o: MASA external renderer structure */ @@ -10011,19 +10012,27 @@ static ivas_error ivas_masa_ext_rend_parambin_init( move16(); move16(); move16(); -#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES +#ifdef NONBE_2157_INPUT_COV_MATRICES set64_fx( hDiracDecBin->ChEnePrev[j], 0, nBins ); -#else + memset( hDiracDecBin->fChEnePrev[j], 0, nBins * sizeof( float ) ); + memset( hDiracDecBin->dChEnePrev[j], 0, nBins * sizeof( double ) ); +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION set_zero_fx( hDiracDecBin->ChEnePrev_fx[j], nBins ); set16_fx( hDiracDecBin->ChEnePrev_e[j], 0, nBins ); #endif set_zero_fx( hDiracDecBin->ChEneOutPrev_fx[j], nBins ); set16_fx( hDiracDecBin->ChEneOutPrev_e[j], 0, nBins ); } -#ifdef NONBE_2157_IVAS_DIRAC_DEC_BINAURAL_FORMULATE_INPUT_COVARIANCE_MATRICES +#ifdef NONBE_2157_INPUT_COV_MATRICES set64_fx( hDiracDecBin->ChCrossRePrev, 0, nBins ); set64_fx( hDiracDecBin->ChCrossImPrev, 0, nBins ); -#else + memset( hDiracDecBin->fChCrossRePrev, 0, nBins * sizeof( float ) ); + memset( hDiracDecBin->fChCrossImPrev, 0, nBins * sizeof( float ) ); + memset( hDiracDecBin->dChCrossRePrev, 0, nBins * sizeof( double ) ); + memset( hDiracDecBin->dChCrossImPrev, 0, nBins * sizeof( double ) ); +#endif +#ifdef NONBE_2157_USE_CURRENT_VERSION set_zero_fx( hDiracDecBin->ChCrossRePrev_fx, nBins ); set_zero_fx( hDiracDecBin->ChCrossImPrev_fx, nBins ); set16_fx( hDiracDecBin->ChCrossRePrev_e, 0, nBins ); -- GitLab From efb82841ae943a705271d57345de029797ec1833 Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Tue, 4 Nov 2025 13:55:52 +0000 Subject: [PATCH 21/26] Fix unused symbols. --- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 7ebd111ab..c89562e81 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -1430,7 +1430,7 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( Word64 temp64; #ifdef NONBE_2157_INPUT_COV_MATRICES Word64 subFrameTotalEne[CLDFB_NO_CHANNELS_MAX]; - Word16 rsh0, rsh1, e0, e1; + Word16 rsh0, rsh1; #ifdef NONBE_2157_ACCURACY_ANALYSIS double inRe[BINAURAL_CHANNELS][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX]; double inIm[BINAURAL_CHANNELS][CLDFB_SLOTS_PER_SUBFRAME][CLDFB_NO_CHANNELS_MAX]; @@ -1440,6 +1440,7 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( double dIIReneLimiter[CLDFB_NO_CHANNELS_MAX]; Word32 IIReneLimiter[CLDFB_NO_CHANNELS_MAX]; #else + Word16 e0, e1; Word32 *IIReneLimiter; IIReneLimiter = IIReneLimiter_fx; #endif -- GitLab From b423dafcf6a5b48ca57b96b533f783249777f6da Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Tue, 4 Nov 2025 15:57:10 +0000 Subject: [PATCH 22/26] Fix undeclared symbols. --- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index c89562e81..d39bde59d 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -1563,9 +1563,11 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( #ifdef NONBE_2157_INPUT_COV_MATRICES rsh0 = (Word16) ( ceil_log2( hSpatParamRendCom->subframe_nbslots[subframe] ) - 1 ); rsh1 = (Word16) ( ceil_log2( BINAURAL_CHANNELS * hSpatParamRendCom->subframe_nbslots[subframe] ) - 1 ); +#ifndef NONBE_2157_ACCURACY_ANALYSIS e0 = add( sub( 63, shl( q, 1 ) ), rsh0 ); e1 = add( sub( 63, shl( q, 1 ) ), rsh1 ); #endif +#endif #ifdef NONBE_2157_USE_CURRENT_VERSION exp = sub( 63, shl( q, 1 ) ); // exp for the energy (inRe_fx * inRe_fx + inIm_fx * inIm_fx) computed below #endif -- GitLab From 9e0c45fb20145d744dada6357e9ff9de09c44ff5 Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Tue, 4 Nov 2025 16:27:06 +0000 Subject: [PATCH 23/26] Remove abserr and relerr functions. --- .../ivas_dirac_dec_binaural_functions_fx.c | 23 +------------------ 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index d39bde59d..87e122d88 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -1379,27 +1379,6 @@ static Word64 Mpy_64_32( Word64 W_var1, Word32 L_var2 ) #ifdef NONBE_2157_ACCURACY_ANALYSIS #include "string.h" - -static double abserr( double computed, double expected ); -static double relerr( double computed, double expected ); - -static double abserr( double computed, double expected ) -{ - return fabs( computed - expected ); -} - -static double relerr( double computed, double expected ) -{ - if ( expected == 0.0 && computed == 0.0 ) - { - return 0.0; - } - else - { - return abserr( computed, expected ) / fabs( expected ); - } -} - #define NONBE_2157_USE_CURRENT_VERSION #endif #else @@ -1998,7 +1977,7 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( } -#ifdef NONBE_2157_ACCURACY_ANALYSIS +#if 0 static int frame_number = 0; printf( "Frame %d analysis\n", frame_number ); { -- GitLab From 3835ece2091295f79e305059cb4c2d356aa2ec67 Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Tue, 4 Nov 2025 16:32:02 +0000 Subject: [PATCH 24/26] Fix MSVC build failure. --- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 87e122d88..0500aff1c 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -1754,7 +1754,9 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( Word32 num, den; #ifdef NONBE_2157_INPUT_COV_MATRICES // OPTIMIZED FIXED-POINT IMPLEMENTATION +#ifndef NONBE_2157_USE_CURRENT_VERSION Word16 exp; +#endif Word64 num64, den64; num64 = W_add( hDiracDecBin->ChEne[0][bin], hDiracDecBin->ChEne[1][bin] ); // 2q - rsh0 num64 = Mpy_64_32( num64, IIReneLimiterFactor_fx ); // 2q - rsh0 - 5 -- GitLab From 58c4732cfc2fdb691d6a5c4f647069c3c5c6c10a Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Tue, 4 Nov 2025 16:49:30 +0000 Subject: [PATCH 25/26] Disable NONBE_2157_ACCURACY_ANALYSIS. --- lib_com/options.h | 2 +- lib_rend/lib_rend_fx.c | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/lib_com/options.h b/lib_com/options.h index 8e2182ef3..ec3d1882f 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -147,7 +147,7 @@ /* #################### Start BASOP optimization switches ############################ */ #define NONBE_2157_INPUT_COV_MATRICES /* Dolby: task 2157: optimize ivas_dirac_dec_binaural_formulate_input_covariance_matrices */ -#define NONBE_2157_ACCURACY_ANALYSIS /* Dolby: task 2157: accuracy analysis */ +/* #define NONBE_2157_ACCURACY_ANALYSIS */ /* Dolby: task 2157: accuracy analysis */ /* #################### End BASOP optimization switches ############################ */ diff --git a/lib_rend/lib_rend_fx.c b/lib_rend/lib_rend_fx.c index 728ddf0b7..3a657ae7d 100644 --- a/lib_rend/lib_rend_fx.c +++ b/lib_rend/lib_rend_fx.c @@ -10014,9 +10014,11 @@ static ivas_error ivas_masa_ext_rend_parambin_init( move16(); #ifdef NONBE_2157_INPUT_COV_MATRICES set64_fx( hDiracDecBin->ChEnePrev[j], 0, nBins ); +#ifdef NONBE_2157_ACCURACY_ANALYSIS memset( hDiracDecBin->fChEnePrev[j], 0, nBins * sizeof( float ) ); memset( hDiracDecBin->dChEnePrev[j], 0, nBins * sizeof( double ) ); #endif +#endif #ifdef NONBE_2157_USE_CURRENT_VERSION set_zero_fx( hDiracDecBin->ChEnePrev_fx[j], nBins ); set16_fx( hDiracDecBin->ChEnePrev_e[j], 0, nBins ); @@ -10027,11 +10029,13 @@ static ivas_error ivas_masa_ext_rend_parambin_init( #ifdef NONBE_2157_INPUT_COV_MATRICES set64_fx( hDiracDecBin->ChCrossRePrev, 0, nBins ); set64_fx( hDiracDecBin->ChCrossImPrev, 0, nBins ); +#ifdef NONBE_2157_ACCURACY_ANALYSIS memset( hDiracDecBin->fChCrossRePrev, 0, nBins * sizeof( float ) ); memset( hDiracDecBin->fChCrossImPrev, 0, nBins * sizeof( float ) ); memset( hDiracDecBin->dChCrossRePrev, 0, nBins * sizeof( double ) ); memset( hDiracDecBin->dChCrossImPrev, 0, nBins * sizeof( double ) ); #endif +#endif #ifdef NONBE_2157_USE_CURRENT_VERSION set_zero_fx( hDiracDecBin->ChCrossRePrev_fx, nBins ); set_zero_fx( hDiracDecBin->ChCrossImPrev_fx, nBins ); -- GitLab From d876257ca9abf38149c3e465e1e5ef83ee33cd26 Mon Sep 17 00:00:00 2001 From: Nicolas Roussin Date: Tue, 4 Nov 2025 17:06:29 +0000 Subject: [PATCH 26/26] Fix unused variables. --- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 0500aff1c..1ca047c07 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -1741,8 +1741,10 @@ static void ivas_dirac_dec_binaural_formulate_input_covariance_matrices_fx( } #ifdef NONBE_2157_INPUT_COV_MATRICES +#ifdef NONBE_2157_ACCURACY_ANALYSIS double dQualityBasedSmFactor = ldexp( (double) qualityBasedSmFactor_fx, -31 ); double dIIReneLimiterFactor = ldexp( (double) IIReneLimiterFactor_fx, -26 ); +#endif #endif FOR( bin = 0; bin < nBins; bin++ ) -- GitLab