From bb3eca924e0e62a576dc0c0131891e70a5cc2434 Mon Sep 17 00:00:00 2001 From: Arthur Date: Fri, 29 Nov 2024 16:41:54 +0100 Subject: [PATCH 01/14] Changes for IVAS-BASOP Ticket 1009: Complexity: High Complexity Overhead for ParamISM decoding to binaural lib_com/basop_util.c: Tuned instrumentation of 32-Bit division routine, now 24 bit accuracy instead of 32 bit (cadence version) lib_com/fft_fx.c: Tuned instrumentation of small helper functions get_min_scalefactor and L_norm_arr lib_com/options.h: Defined a macro for this fix 1009. It is only used in binaural renderer for simplifiying divisions (32x32). lib_com/tools.c: Tuned instrumentation of small helper functions s_minimum etc. lib_rend/ivas_dirac_dec_binaural_functions.c: Defined precalculated values for EPSILON with full precision Replaced division by constants by multiplications Replaced square root of a division (division + sqrt) by ISqrt32+Mul Simplified all matrix multiplication functions Best regards Arthur Tritthart, Fraunhofer IIS, 29-NOV-2024 --- lib_com/basop_util.c | 69 +++ lib_com/fft_fx.c | 22 + lib_com/options.h | 6 +- lib_com/tools.c | 15 +- lib_rend/ivas_dirac_dec_binaural_functions.c | 506 +++++++++---------- 5 files changed, 329 insertions(+), 289 deletions(-) diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index c465428fc..79d57198f 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -1038,8 +1038,76 @@ Word32 div_w( Word32 L_num, Word32 L_den ) } } +Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bits) +{ + Word32 z; + Word16 sx; + Word16 sy; + Word32 sign; + Word16 iteration; + Flag Carry; + Word16 s_val; + + unset_carry(&Carry); + + /* assert (x >= (Word32)0); */ + assert( y != (Word32) 0 ); + + IF( x == (Word32) 0 ) + { + *s = -31; + move16(); + return ( (Word32) 0 ); + } + + sign = L_shr(L_xor(x,y), 31); + + sx = norm_l( x ); + x = L_shl( x, sx ); + x = L_shr( x, 1 ); + s_val = sub( 1, sx ); + if( x < 0 ) + { + x = L_negate( x ); + } + + sy = norm_l( y ); + y = L_shl( y, sy ); + y = L_shr( y, 1 ); + s_val = add(s_val, sy ); + if( y >= 0 ) + { + y = L_negate( y ); + } + + *s = s_val; + move16(); + + z = L_sub(x, x); // z = 0 + + for ( iteration = (Word16) 0; iteration < (Word16) bits; iteration++ ) + { + if ( L_add(x, y) >= 0 ) + { + x = DEPR_L_add_c(x, y, &Carry); // sets always carry=1 + } + z = DEPR_L_add_c( z, z, &Carry ); // sets always carry=0 + x = L_add(x, x); + } + + if ( sign != 0 ) + { + z = L_negate( z ); + } + return L_shl(z, sub(31, bits)); +} + + Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) { +#if 1 + return BASOP_Util_Divide3232_Scale_FhG(x,y,s,24); +#else Word32 z; Word16 sx; Word16 sy; @@ -1088,6 +1156,7 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) } return z; +#endif } Word16 BASOP_Util_Divide3232_Scale( Word32 x, Word32 y, Word16 *s ) diff --git a/lib_com/fft_fx.c b/lib_com/fft_fx.c index 6b8b49bd2..3e664fb56 100644 --- a/lib_com/fft_fx.c +++ b/lib_com/fft_fx.c @@ -7299,15 +7299,26 @@ Word16 L_norm_arr( Word32 *arr, Word16 size ) Word16 q = 31; move16(); FOR( Word16 i = 0; i < size; i++ ) +#if 0 IF( arr[i] != 0 ) { q = s_min( q, norm_l( arr[i] ) ); } +#else + { + Word16 q_tst; + q_tst = norm_l(arr[i]); + if (arr[i] != 0) + q = s_min(q, q_tst); + } + +#endif return q; } Word16 get_min_scalefactor( Word32 x, Word32 y ) { +#if 0 Word16 scf = Q31; move16(); test(); @@ -7324,6 +7335,16 @@ Word16 get_min_scalefactor( Word32 x, Word32 y ) scf = s_min( scf, norm_l( y ) ); } return scf; +#else + Word16 scf = Q31; + Word16 scf_y; + if (x != 0) + scf = norm_l( x ); + scf_y = norm_l( y ); + if (y != 0) + scf = s_min(scf_y, scf); + return scf; +#endif } Flag is_zero_arr( Word32 *arr, Word16 size ) @@ -7335,4 +7356,5 @@ Flag is_zero_arr( Word32 *arr, Word16 size ) } return 1; + } diff --git a/lib_com/options.h b/lib_com/options.h index 095c183e8..9c09f9cbf 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -54,7 +54,7 @@ #define SUPPORT_JBM_TRACEFILE /* Support for JBM tracefile, which is needed for 3GPP objective/subjective testing, but not relevant for real-world implementations */ -/*#define WMOPS*/ /* Activate complexity and memory counters */ +#define WMOPS /* Activate complexity and memory counters */ #ifdef WMOPS /*#define WMOPS_PER_FRAME*/ /* Output per-frame complexity (writes one float value per frame to the file "wmops_analysis") */ /*#define MEM_COUNT_DETAILS*/ /* Output detailed memory analysis for the worst-case frame (writes to the file "mem_analysis.csv") */ @@ -196,6 +196,10 @@ #define FIX_953_WRONG_ENERGY_RATIO_MASA_EXT /* Nok: Fix 953 wrong energy ratio value after shift and cast to Word8 */ #define FIX_982_WRONG_DECODED_ENERGY_RATIO /* Nokia: Fix 982 wrong energy in EXT mode and in second direction when present */ #define FIX_999_WRONG_ISM_EXTENDED_METADATA /* VA: fix 999: fix ISM extended metadata decoding */ + +#define FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC /* FhG: Reduce workload of binaural rendering: replace 1./tmp & sqrt by Isqrt32 */ + /* Replace computations with constants by setting of constants */ + /* Simplify matrix multiplications and some external helper routines */ /* ################## End DEVELOPMENT switches ######################### */ /* clang-format on */ diff --git a/lib_com/tools.c b/lib_com/tools.c index 072cfa767..e4d5914e6 100644 --- a/lib_com/tools.c +++ b/lib_com/tools.c @@ -917,30 +917,23 @@ Word16 minimum_s( Word16 *min_val /* o : minimum value in the input vector */ ) { - Word16 i, ind, tmp; - + Word16 i, ind; ind = 0; move16(); - tmp = vec[0]; - move16(); FOR( i = 1; i < lvec; i++ ) { - IF( LT_16( vec[i], tmp ) ) + if( LT_16( vec[i], vec[ind] ) ) { - ind = i; - move16(); - tmp = vec[i]; - move16(); + ind = add(i, 0); } } if ( min_val != NULL ) { - *min_val = tmp; + *min_val = vec[ind]; move16(); } - return ind; } #else diff --git a/lib_rend/ivas_dirac_dec_binaural_functions.c b/lib_rend/ivas_dirac_dec_binaural_functions.c index 6ff9685bf..32c266c6f 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions.c @@ -79,8 +79,11 @@ Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; #define LOG_10_BASE_2_Q29 1783446528 // Q29 #define TAN_30_FX 17157 // Q15 #define INV_TAN30_FX 28377 // Q14 -#define EPSILON_MANT 1180591621 /* 1e-12 in Q70 */ +#define EPSILON_MANT 1180591621 /* 1e-12 = 0,5497558*(2^-39) in Q70 */ #define EPSILON_EXP ( -39 ) +#define ONE_DIV_EPSILON_MANT 1953125000 /* 1e+12 = 0,9094947*(2^40) */ +#define ONE_DIV_EPSILON_EXP ( 40 ) + #endif #define ADAPT_HTPROTO_ROT_LIM_1 0.8f @@ -866,10 +869,12 @@ void ivas_dirac_dec_binaural_render_fx( } output_length = 0; + move16(); FOR( subframe_idx = first_sf; subframe_idx < last_sf; subframe_idx++ ) { Word16 n_samples_sf = imult1616( slot_size, hSpatParamRendCom->subframe_nbslots[subframe_idx] ); + ivas_dirac_dec_binaural_internal_fx( st_ivas, st_ivas->hCombinedOrientationData, output_fx_local, nchan_transport, subframe_idx ); FOR( ch = 0; ch < nchan_out; ch++ ) @@ -1635,7 +1640,6 @@ static void ivas_dirac_dec_binaural_internal_fx( st_ivas->cldfbSynDec[ch]->Q_cldfb_state = Q11; move16(); } - return; } #endif @@ -2385,7 +2389,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric move32(); } } - /* Apply EQ at low bit rates */ IF( applyLowBitRateEQ != 0 ) { @@ -2402,7 +2405,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric move32(); } } - test(); test(); IF( ( EQ_32( ivas_format, SBA_FORMAT ) || EQ_32( ivas_format, SBA_ISM_FORMAT ) ) && EQ_16( nchan_transport, 2 ) ) @@ -2438,7 +2440,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric } } } - /* Determine target covariance matrix containing target binaural properties */ FOR( bin = 0; bin < nBins; bin++ ) { @@ -2534,7 +2535,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric move16(); gainCacheBaseIndex = add( 6, ismDirIndex ); } - diffuseness_fx = L_sub( diffuseness_fx, ratio_fx ); /* diffuseness = 1 - ratio1 - ratio2 */ if ( diffuseness_fx < 0 ) @@ -2579,9 +2579,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric altSpreadCoh_fx = sub( 32767, shl_sat( div_s( numr, denr ), sub( den_e, num_e ) ) ); // 4289 = pi/6 in Q13 spreadCoh_fx = s_max( spreadCoh_fx, altSpreadCoh_fx ); } - getDirectPartGains_fx( bin, aziDeg, eleDeg, &lRealp_fx, &lImagp_fx, &rRealp_fx, &rImagp_fx, hDiracDecBin->renderStereoOutputInsteadOfBinaural, Rmat_fx, &gainCache[gainCacheBaseIndex], isHeadtracked ); - Word16 q_lr = Q28; move16(); if ( hDiracDecBin->renderStereoOutputInsteadOfBinaural ) @@ -2591,7 +2589,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric spreadCoh_fx = 0; move32(); } - IF( spreadCoh_fx > 0 ) { Word32 centerMul_fx, sidesMul_fx; @@ -2644,7 +2641,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric /* Apply the gain for the right source of the three coherent sources. * -30 degrees to 330 wrapping due to internal functions. */ - getDirectPartGains_fx( bin, aziDeg + 330, eleDeg, &lRealpTmp_fx, &lImagpTmp_fx, &rRealpTmp_fx, &rImagpTmp_fx, hDiracDecBin->renderStereoOutputInsteadOfBinaural, Rmat_fx, &gainCache[gainCacheBaseIndex + 2], isHeadtracked ); hrtfEneSides_fx = L_add( hrtfEneSides_fx, @@ -2666,7 +2662,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric eneCorrectionFactor_fx = BASOP_Util_Divide3232_Scale( L_add( Mpy_32_32( hrtfEneSides_fx, Mpy_32_32( sidesMul_fx, sidesMul_fx ) ), Mpy_32_32( hrtfEneCenter_fx, Mpy_32_32( centerMul_fx, centerMul_fx ) ) ), L_max( 1, hrtfEneRealized_fx ), &eneCorrectionFactor_e ); - /* Weighting factors to determine appropriate target spectrum for spread coherent sound */ IF( LT_16( spreadCoh_fx, 16384 ) ) { @@ -2723,7 +2718,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric q_lr = Q23; move16(); } - hrtfEne_fx[0] = L_add( Mpy_32_32( lRealp_fx, lRealp_fx ), Mpy_32_32( lImagp_fx, lImagp_fx ) ); // Q( 2*q_lr - 31 ) hrtfEne_fx[1] = L_add( Mpy_32_32( rRealp_fx, rRealp_fx ), Mpy_32_32( rImagp_fx, rImagp_fx ) ); // Q( 2*q_lr - 31 ) move32(); @@ -2808,7 +2802,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric } move32(); } - /* Store parameters for formulating average diffuseness over frame */ Word32 frameMeanDiffuseness = BASOP_Util_Add_Mant32Exp( hDiracDecBin->frameMeanDiffuseness_fx[bin], 2 /*Q29*/, diffEneValForDecorrelationReduction_fx, sub( 31, q_diffEneValForDecorrelationReduction ), &exp1 ); // exp = exp1 frameMeanDiffusenessEneWeight_fx[bin] = L_add( frameMeanDiffusenessEneWeight_fx[bin], meanEnePerCh_fx ); @@ -2820,7 +2813,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric hDiracDecBin->frameMeanDiffuseness_fx[bin] = L_shl( frameMeanDiffuseness, sub( exp, 2 ) ); // Q29 move32(); } - test(); /* Temporal IIR-type smoothing of covariance matrices. Also apply encoding quality based smoothing factor. */ IF( EQ_32( ivas_format, MASA_FORMAT ) && LT_32( ivas_total_brate, MASA_STEREO_MIN_BITRATE ) ) @@ -2923,7 +2915,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric move16(); } } - return; } #endif @@ -3155,7 +3146,6 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices( } } } - return; } #else @@ -3182,6 +3172,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( Word32 ivas_total_brate; Word16 nchan_transport; Word16 exp; + Word16 q_processMtx[CLDFB_NO_CHANNELS_MAX], q_processMtxPrev[CLDFB_NO_CHANNELS_MAX]; Word16 q_processMtx_SCCR[CLDFB_NO_CHANNELS_MAX], q_processMtxPrev_SCCR[CLDFB_NO_CHANNELS_MAX]; Word16 q_processMtxDec[CLDFB_NO_CHANNELS_MAX], q_processMtxDecPrev[CLDFB_NO_CHANNELS_MAX]; @@ -3299,7 +3290,6 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( } move32(); move16(); - formulate2x2MixingMatrix_fx( hDiracDecBin->ChEne_fx[0][bin], hDiracDecBin->ChEne_fx[1][bin], hDiracDecBin->q_ChEne, hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossIm_fx[bin], @@ -3709,7 +3699,6 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( move16(); minimum_s( q_processMtxDec, nBins, &hDiracDecBin->q_processMtxDec ); minimum_s( q_processMtxDecPrev, nBins, &hDiracDecBin->q_processMtxDecPrev ); - FOR( bin = 0; bin < nBins; bin++ ) { FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) @@ -3749,7 +3738,6 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( } } } - return; } #endif @@ -5082,7 +5070,6 @@ static void eig2x2_fx( move32(); *q_U = Q31; move16(); - return; } @@ -5099,7 +5086,6 @@ static void eig2x2_fx( move32(); *q_U = Q30; move16(); - return; } } @@ -5113,11 +5099,9 @@ static void eig2x2_fx( move32(); *q_U = Q30; move16(); - return; } } - q_U_1 = 0; q_U_2 = 0; move16(); @@ -5143,6 +5127,7 @@ static void eig2x2_fx( IF( GT_32( L_abs( tmp2 ), L_abs( tmp1 ) ) ) { + s_fx = tmp2; move32(); exp = sub( norm_l( s_fx ), 1 ); @@ -5153,12 +5138,15 @@ static void eig2x2_fx( q_tmp2 = sub( 31, q_tmp2 ); tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); - +#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp ); exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) ); normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2 q_tmp2 = sub( 31, exp ); - +#else + normVal_fx = ISqrt32(tmp3, &exp_tmp3); + q_tmp2 = sub(31, exp_tmp3); +#endif IF( LT_16( q_tmp1, q_c ) ) { c_re = L_shr( c_re, sub( q_c, q_tmp1 ) ); @@ -5222,12 +5210,15 @@ static void eig2x2_fx( q_tmp2 = sub( 31, q_tmp2 ); tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); - +#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp ); exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) ); normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2 q_tmp2 = sub( 31, exp ); - +#else + normVal_fx = ISqrt32(tmp3, &exp_tmp3); + q_tmp2 = sub(31, exp_tmp3); +#endif IF( LT_16( q_tmp1, q_c ) ) { c_re = L_shr( c_re, sub( q_c, q_tmp1 ) ); @@ -5279,7 +5270,6 @@ static void eig2x2_fx( move16(); } } - IF( q_U_1 != 0 ) *q_U = q_U_1; ELSE @@ -5388,9 +5378,6 @@ static void matrixMul_fx( Word16 chA, chB; Word16 min_q_shift1, min_q_shift2; Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ); -#ifndef IVAS_ENH64_CADENCE_CHANGES - Word32 tmp1, tmp2; -#endif min_q_shift1 = sub( s_min( L_norm_arr( Are_fx[0], size ), L_norm_arr( Aim_fx[0], size ) ), 1 ); min_q_shift2 = sub( s_min( L_norm_arr( Bre_fx[0], size ), L_norm_arr( Bim_fx[0], size ) ), 1 ); @@ -5419,109 +5406,22 @@ static void matrixMul_fx( outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], W_extract_h( W_mac_32_32( W_mult_32_32( Are_fx[chA][0], Bim_fx[0][chB] ), Are_fx[chA][1], Bim_fx[1][chB] ) ) ); move32(); #else - test(); - test(); - test(); - IF( ( Are_fx[chA][0] >= 0 && Bre_fx[0][chB] >= 0 ) || ( Are_fx[chA][0] < 0 && Bre_fx[0][chB] < 0 ) ) - { - tmp1 = Mpy_32_32( Are_fx[chA][0], Bre_fx[0][chB] ); - } - ELSE - { - tmp1 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][0] ), L_abs( Bre_fx[0][chB] ) ) ); - } - test(); - test(); - test(); - IF( ( Are_fx[chA][1] >= 0 && Bre_fx[1][chB] >= 0 ) || ( Are_fx[chA][1] < 0 && Bre_fx[1][chB] < 0 ) ) - { - tmp2 = Mpy_32_32( Are_fx[chA][1], Bre_fx[1][chB] ); - } - ELSE - { - tmp2 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][1] ), L_abs( Bre_fx[1][chB] ) ) ); - } - outRe_fx[chA][chB] = L_add( tmp1, tmp2 ); - move32(); - - test(); - test(); - test(); - IF( ( Aim_fx[chA][0] >= 0 && Bim_fx[0][chB] >= 0 ) || ( Aim_fx[chA][0] < 0 && Bim_fx[0][chB] < 0 ) ) - { - tmp1 = Mpy_32_32( Aim_fx[chA][0], Bim_fx[0][chB] ); - } - ELSE - { - tmp1 = L_negate( Mpy_32_32( L_abs( Aim_fx[chA][0] ), L_abs( Bim_fx[0][chB] ) ) ); - } - test(); - test(); - test(); - IF( ( Aim_fx[chA][1] >= 0 && Bim_fx[1][chB] >= 0 ) || ( Aim_fx[chA][1] < 0 && Bim_fx[1][chB] < 0 ) ) - { - tmp2 = Mpy_32_32( Aim_fx[chA][1], Bim_fx[1][chB] ); - } - ELSE - { - tmp2 = L_negate( Mpy_32_32( L_abs( Aim_fx[chA][1] ), L_abs( Bim_fx[1][chB] ) ) ); - } - outRe_fx[chA][chB] = L_sub( outRe_fx[chA][chB], L_add( tmp1, tmp2 ) ); - move32(); - test(); - test(); - test(); - IF( ( Aim_fx[chA][0] >= 0 && Bre_fx[0][chB] >= 0 ) || ( Aim_fx[chA][0] < 0 && Bre_fx[0][chB] < 0 ) ) - { - tmp1 = Mpy_32_32( Aim_fx[chA][0], Bre_fx[0][chB] ); - } - ELSE - { - tmp1 = L_negate( Mpy_32_32( L_abs( Aim_fx[chA][0] ), L_abs( Bre_fx[0][chB] ) ) ); - } - test(); - test(); - test(); - IF( ( Aim_fx[chA][1] >= 0 && Bre_fx[1][chB] >= 0 ) || ( Aim_fx[chA][1] < 0 && Bre_fx[1][chB] < 0 ) ) - { - tmp2 = Mpy_32_32( Aim_fx[chA][1], Bre_fx[1][chB] ); - } - ELSE - { - tmp2 = L_negate( Mpy_32_32( L_abs( Aim_fx[chA][1] ), L_abs( Bre_fx[1][chB] ) ) ); - } - outIm_fx[chA][chB] = L_add( tmp1, tmp2 ); + outRe_fx[chA][chB] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[chA][0], Bre_fx[0][chB] ), + Are_fx[chA][1], Bre_fx[1][chB] ), + Aim_fx[chA][0], Bim_fx[0][chB] ), + Aim_fx[chA][1], Bim_fx[1][chB] ); move32(); - - test(); - test(); - test(); - IF( ( Are_fx[chA][0] >= 0 && Bim_fx[0][chB] >= 0 ) || ( Are_fx[chA][0] < 0 && Bim_fx[0][chB] < 0 ) ) - { - tmp1 = Mpy_32_32( Are_fx[chA][0], Bim_fx[0][chB] ); - } - ELSE - { - tmp1 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][0] ), L_abs( Bim_fx[0][chB] ) ) ); - } - test(); - test(); - test(); - IF( ( Are_fx[chA][1] >= 0 && Bim_fx[1][chB] >= 0 ) || ( Are_fx[chA][1] < 0 && Bim_fx[1][chB] < 0 ) ) - { - tmp2 = Mpy_32_32( Are_fx[chA][1], Bim_fx[1][chB] ); - } - ELSE - { - tmp2 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][1] ), L_abs( Bim_fx[1][chB] ) ) ); - } - outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], L_add( tmp1, tmp2 ) ); + outIm_fx[chA][chB] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Aim_fx[chA][0], Bre_fx[0][chB] ), + Aim_fx[chA][1], Bre_fx[1][chB] ), + Are_fx[chA][0], Bim_fx[0][chB] ), + Are_fx[chA][1], Bim_fx[1][chB] ); move32(); #endif /* #ifdef IVAS_ENH64_CADENCE_CHANGES */ } } *q_out = sub( add( *q_A, *q_B ), 31 ); + move16(); if ( L_and( is_zero_arr( outRe_fx[0], size ), is_zero_arr( outIm_fx[0], size ) ) ) { @@ -5571,77 +5471,20 @@ static void matrixTransp1Mul_fx( { Word16 chA, chB; Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ); - Word32 tmp1, tmp2; FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) { - test(); - test(); - test(); - IF( ( ( ( Are_fx[0][chA] >= 0 ) && ( Bre_fx[0][chB] >= 0 ) ) || ( ( Are_fx[0][chA] < 0 ) && ( Bre_fx[0][chB] < 0 ) ) ) ) - tmp1 = Mpy_32_32( Are_fx[0][chA], Bre_fx[0][chB] ); - ELSE - tmp1 = L_negate( Mpy_32_32( L_abs( Are_fx[0][chA] ), L_abs( Bre_fx[0][chB] ) ) ); - test(); - test(); - test(); - IF( ( ( ( Are_fx[1][chA] >= 0 ) && ( Bre_fx[1][chB] >= 0 ) ) || ( ( Are_fx[1][chA] < 0 ) && ( Bre_fx[1][chB] < 0 ) ) ) ) - tmp2 = Mpy_32_32( Are_fx[1][chA], Bre_fx[1][chB] ); - ELSE - tmp2 = L_negate( Mpy_32_32( L_abs( Are_fx[1][chA] ), L_abs( Bre_fx[1][chB] ) ) ); - outRe_fx[chA][chB] = L_add( tmp1, tmp2 ); + outRe_fx[chA][chB] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][chA], Bre_fx[0][chB] ), + Are_fx[1][chA], Bre_fx[1][chB] ), + Aim_fx[0][chA], Bim_fx[0][chB] ), + Aim_fx[1][chA], Bim_fx[1][chB] ); move32(); - test(); - test(); - test(); - IF( ( ( ( L_negate( Aim_fx[0][chA] ) >= 0 ) && ( Bim_fx[0][chB] >= 0 ) ) || ( ( L_negate( Aim_fx[0][chA] ) < 0 ) && ( Bim_fx[0][chB] < 0 ) ) ) ) - tmp1 = Mpy_32_32( -Aim_fx[0][chA], Bim_fx[0][chB] ); - ELSE - tmp1 = L_negate( Mpy_32_32( L_abs( -Aim_fx[0][chA] ), L_abs( Bim_fx[0][chB] ) ) ); - test(); - test(); - test(); - IF( ( ( ( Aim_fx[1][chA] >= 0 ) && ( Bim_fx[1][chB] >= 0 ) ) || ( ( Aim_fx[1][chA] < 0 ) && ( Bim_fx[1][chB] < 0 ) ) ) ) - tmp2 = Mpy_32_32( Aim_fx[1][chA], Bim_fx[1][chB] ); - ELSE - tmp2 = L_negate( Mpy_32_32( L_abs( Aim_fx[1][chA] ), L_abs( Bim_fx[1][chB] ) ) ); - outRe_fx[chA][chB] = L_sub( outRe_fx[chA][chB], L_sub( tmp1, tmp2 ) ); - move32(); - - test(); - test(); - test(); - IF( ( ( ( L_negate( Aim_fx[0][chA] ) >= 0 ) && ( Bre_fx[0][chB] >= 0 ) ) || ( ( L_negate( Aim_fx[0][chA] ) < 0 ) && ( Bre_fx[0][chB] < 0 ) ) ) ) - tmp1 = Mpy_32_32( -Aim_fx[0][chA], Bre_fx[0][chB] ); - ELSE - tmp1 = L_negate( Mpy_32_32( L_abs( -Aim_fx[0][chA] ), L_abs( Bre_fx[0][chB] ) ) ); - test(); - test(); - test(); - IF( ( ( ( Aim_fx[1][chA] >= 0 ) && ( Bre_fx[1][chB] >= 0 ) ) || ( ( Aim_fx[1][chA] < 0 ) && ( Bre_fx[1][chB] < 0 ) ) ) ) - tmp2 = Mpy_32_32( Aim_fx[1][chA], Bre_fx[1][chB] ); - ELSE - tmp2 = L_negate( Mpy_32_32( L_abs( Aim_fx[1][chA] ), L_abs( Bre_fx[1][chB] ) ) ); - outIm_fx[chA][chB] = L_sub( tmp1, tmp2 ); - move32(); - - test(); - test(); - test(); - IF( ( ( ( Are_fx[0][chA] >= 0 ) && ( Bim_fx[0][chB] >= 0 ) ) || ( ( Are_fx[0][chA] < 0 ) && ( Bim_fx[0][chB] < 0 ) ) ) ) - tmp1 = Mpy_32_32( Are_fx[0][chA], Bim_fx[0][chB] ); - ELSE - tmp1 = L_negate( Mpy_32_32( L_abs( Are_fx[0][chA] ), L_abs( Bim_fx[0][chB] ) ) ); - test(); - test(); - test(); - IF( ( ( ( Are_fx[1][chA] >= 0 ) && ( Bim_fx[1][chB] >= 0 ) ) || ( ( Are_fx[1][chA] < 0 ) && ( Bim_fx[1][chB] < 0 ) ) ) ) - tmp2 = Mpy_32_32( Are_fx[1][chA], Bim_fx[1][chB] ); - ELSE - tmp2 = L_negate( Mpy_32_32( L_abs( Are_fx[1][chA] ), L_abs( Bim_fx[1][chB] ) ) ); - outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], L_add( tmp1, tmp2 ) ); + outIm_fx[chA][chB] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][chA], Bim_fx[0][chB] ), + Are_fx[1][chA], Bim_fx[1][chB] ), + Aim_fx[0][chA], Bre_fx[0][chB] ), + Aim_fx[1][chA], Bre_fx[1][chB] ); move32(); } } @@ -5697,9 +5540,6 @@ static void matrixTransp2Mul_fx( Word16 chA, chB; Word16 min_q_shift; Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ); -#ifndef IVAS_ENH64_CADENCE_CHANGES - Word32 tmp1, tmp2; -#endif min_q_shift = sub( s_min( L_norm_arr( Are_fx[0], size ), L_norm_arr( Aim_fx[0], size ) ), 1 ); scale_sig32( Are_fx[0], size, min_q_shift ); @@ -5726,72 +5566,15 @@ static void matrixTransp2Mul_fx( outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], W_extract_h( W_mac_32_32( W_mult_32_32( Are_fx[chA][0], L_negate( Bim_fx[chB][0] ) ), Are_fx[chA][1], L_negate( Bim_fx[chB][1] ) ) ) ); move32(); #else - test(); - test(); - test(); - IF( ( Are_fx[chA][0] >= 0 && Bre_fx[chB][0] >= 0 ) || ( Are_fx[chA][0] < 0 && Bre_fx[chB][0] < 0 ) ) - tmp1 = Mpy_32_32( Are_fx[chA][0], Bre_fx[chB][0] ); - ELSE - tmp1 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][0] ), L_abs( Bre_fx[chB][0] ) ) ); - test(); - test(); - test(); - IF( ( Are_fx[chA][1] >= 0 && Bre_fx[chB][1] >= 0 ) || ( Are_fx[chA][1] < 0 && Bre_fx[chB][1] < 0 ) ) - tmp2 = Mpy_32_32( Are_fx[chA][1], Bre_fx[chB][1] ); - ELSE - tmp2 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][1] ), L_abs( Bre_fx[chB][1] ) ) ); - outRe_fx[chA][chB] = L_add( tmp1, tmp2 ); - move32(); - - test(); - test(); - test(); - IF( ( Aim_fx[chA][0] >= 0 && L_negate( Bim_fx[chB][0] ) >= 0 ) || ( Aim_fx[chA][0] < 0 && L_negate( Bim_fx[chB][0] ) < 0 ) ) - tmp1 = Mpy_32_32( Aim_fx[chA][0], -Bim_fx[chB][0] ); - ELSE - tmp1 = L_negate( Mpy_32_32( L_abs( Aim_fx[chA][0] ), L_abs( -Bim_fx[chB][0] ) ) ); - test(); - test(); - test(); - IF( ( Aim_fx[chA][1] >= 0 && L_negate( Bim_fx[chB][1] ) >= 0 ) || ( Aim_fx[chA][1] < 0 && L_negate( Bim_fx[chB][1] ) < 0 ) ) - tmp2 = Mpy_32_32( Aim_fx[chA][1], -Bim_fx[chB][1] ); - ELSE - tmp2 = L_negate( Mpy_32_32( L_abs( Aim_fx[chA][1] ), L_abs( -Bim_fx[chB][1] ) ) ); - outRe_fx[chA][chB] = L_sub( outRe_fx[chA][chB], L_add( tmp1, tmp2 ) ); - move32(); - - test(); - test(); - test(); - IF( ( Aim_fx[chA][0] >= 0 && Bre_fx[chB][0] >= 0 ) || ( Aim_fx[chA][0] < 0 && Bre_fx[chB][0] < 0 ) ) - tmp1 = Mpy_32_32( Aim_fx[chA][0], Bre_fx[chB][0] ); - ELSE - tmp1 = L_negate( Mpy_32_32( L_abs( Aim_fx[chA][0] ), L_abs( Bre_fx[chB][0] ) ) ); - test(); - test(); - test(); - IF( ( Aim_fx[chA][1] >= 0 && Bre_fx[chB][1] >= 0 ) || ( Aim_fx[chA][1] < 0 && Bre_fx[chB][1] < 0 ) ) - tmp2 = Mpy_32_32( Aim_fx[chA][1], Bre_fx[chB][1] ); - ELSE - tmp2 = L_negate( Mpy_32_32( L_abs( Aim_fx[chA][1] ), L_abs( Bre_fx[chB][1] ) ) ); - outIm_fx[chA][chB] = L_add( tmp1, tmp2 ); + outRe_fx[chA][chB] = Madd_32_32(Madd_32_32(Madd_32_32(Mpy_32_32(Are_fx[chA][0],Bre_fx[chB][0]), + Are_fx[chA][1],Bre_fx[chB][1]), + Aim_fx[chA][0],Bim_fx[chB][0]), + Aim_fx[chA][1],Bim_fx[chB][1]); move32(); - - test(); - test(); - test(); - IF( ( Are_fx[chA][0] >= 0 && L_negate( Bim_fx[chB][0] ) >= 0 ) || ( Are_fx[chA][0] < 0 && L_negate( Bim_fx[chB][0] ) < 0 ) ) - tmp1 = Mpy_32_32( Are_fx[chA][0], -Bim_fx[chB][0] ); - ELSE - tmp1 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][0] ), L_abs( -Bim_fx[chB][0] ) ) ); - test(); - test(); - test(); - IF( ( Are_fx[chA][1] >= 0 && L_negate( Bim_fx[chB][1] ) >= 0 ) || ( Are_fx[chA][1] < 0 && L_negate( Bim_fx[chB][1] ) < 0 ) ) - tmp2 = Mpy_32_32( Are_fx[chA][1], -Bim_fx[chB][1] ); - ELSE - tmp2 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][1] ), L_abs( -Bim_fx[chB][1] ) ) ); - outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], L_add( tmp1, tmp2 ) ); + outIm_fx[chA][chB] = Msub_32_32(Msub_32_32(Madd_32_32(Mpy_32_32(Aim_fx[chA][0],Bre_fx[chB][0]), + Aim_fx[chA][1],Bre_fx[chB][1]), + Are_fx[chA][0],Bim_fx[chB][0]), + Are_fx[chA][1],Bim_fx[chB][1]); move32(); #endif /* #ifdef IVAS_ENH64_CADENCE_CHANGES */ } @@ -5869,6 +5652,7 @@ static void chol2x2_fx( } ELSE { +#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC outRe[1][0] = BASOP_Util_Divide3232_Scale_cadence( c_re, outRe[0][0], &exp ); move32(); q_re2 = add( sub( 31, exp ), sub( q_c, q_re1 ) ); @@ -5876,6 +5660,33 @@ static void chol2x2_fx( outIm[1][0] = BASOP_Util_Divide3232_Scale_cadence( c_im, outRe[0][0], &exp ); move32(); q_im = add( sub( 31, exp ), sub( q_c, q_re1 ) ); +#else + Word32 denom; + Word16 den_exp; + Word32 my_outRe, my_outIm; + + /* Compute denom = 1.0 / outRe[0][0] */ + denom = ISqrt32(outRe[0][0], &exp); + denom = Mpy_32_32(denom, denom); + den_exp = shl(exp, 1); + + /* Normalise c_re, c_im */ + exp = norm_l( c_re ); + my_outRe = L_shl( c_re, exp ); + q_re2 = add( q_c, exp ); + exp = norm_l( c_im ); + my_outIm = L_shl( c_im, exp ); + q_im = add( q_c, exp ); + + /* Multiply and store c_re*denom and c_im*denom */ + outRe[1][0] = Mpy_32_32(denom, my_outRe); + move32(); + q_re2 = sub(q_re2, den_exp); + + outIm[1][0] = Mpy_32_32(denom, my_outIm); + move32(); + q_im = sub(q_im, den_exp); +#endif } if ( outRe[1][0] == 0 ) { @@ -5891,11 +5702,23 @@ static void chol2x2_fx( temp = Madd_32_32( Mpy_32_32( c_re, c_re ), c_im, c_im ); q_tmp = sub( add( q_c, q_c ), 31 ); + // 4611686 = Q62 IF( e1 == 0 ) { - temp = BASOP_Util_Divide3232_Scale_cadence( temp, 4611686, &exp ); - q_tmp = add( sub( 31, exp ), sub( q_tmp, 62 ) ); +#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC + temp = BASOP_Util_Divide3232_Scale_cadence( temp, 4611686, &exp ); + q_tmp = add( sub( 31, exp ), sub( q_tmp, 62 ) ); +#else + Word32 my_temp; + Word16 my_q_tmp; + my_temp = temp; + my_q_tmp = q_tmp; + temp = BASOP_Util_Divide3232_Scale_cadence( temp, 4611686, &exp ); + q_tmp = add( sub( 31, exp ), sub( q_tmp, 62 ) ); + my_temp = Mpy_32_32(my_temp, ONE_DIV_EPSILON_MANT); + my_q_tmp = add(my_q_tmp, ONE_DIV_EPSILON_EXP); +#endif } ELSE { @@ -6241,6 +6064,8 @@ static void formulate2x2MixingMatrix_fx( Word32 temp; Word16 q_Pre[BINAURAL_CHANNELS][BINAURAL_CHANNELS], q_Pim[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; Word16 hdrm_re[BINAURAL_CHANNELS][BINAURAL_CHANNELS], hdrm_im[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; + + set16_fx( hdrm_re[0], 63, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ); set16_fx( hdrm_im[0], 63, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ); set16_fx( q_Pre[0], Q31, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ); @@ -6295,8 +6120,12 @@ static void formulate2x2MixingMatrix_fx( // 4611686 = Q62 IF( maxEne_fx == 0 ) { - maxEneDiv_fx = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, 4611686, &exp ); // 4611686 = 1e-12f in Q62 - q_maxEneDiv = add( sub( 31, exp ), sub( Q30, 62 ) ); + // maxEneDiv_fx = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, 4611686, &exp ); // 4611686 = 1e-12f in Q62 + // q_maxEneDiv = add( sub( 31, exp ), sub( Q30, 62 ) ); + maxEneDiv_fx = ONE_DIV_EPSILON_MANT; + move32(); + q_maxEneDiv = ONE_DIV_EPSILON_EXP; + move16(); } ELSE { @@ -6325,7 +6154,6 @@ static void formulate2x2MixingMatrix_fx( /* Cholesky decomposition of target / output covariance matrix */ chol2x2_fx( E_out1, E_out2, q_eout, Cout_re, Cout_im, q_cout, KyRe_fx, KyIm_fx, &q_ky ); - /* Eigendecomposition of input covariance matrix */ eig2x2_fx( E_in1, E_in2, q_ein, Cin_re, Cin_im, q_cin, Uxre_fx, Uxim_fx, &q_Ux, Sx_fx, &q_Sx ); @@ -6356,8 +6184,19 @@ static void formulate2x2MixingMatrix_fx( IF( temp == 0 ) { - BASOP_Util_Divide3232_Scale_cadence( E_out1, 4611686, &exp ); // 4611686 = Q62 - exp = sub( exp, sub( q_eout, 62 ) ); + IF (E_out1 == 0) + { + Ghat_fx[0] = 0; + exp = -19; + move32(); + move16(); + } + ELSE + { + temp = BASOP_Util_Divide3232_Scale_cadence( E_out1, 4611686, &exp ); // 4611686 = Q62 + exp = sub( exp, sub( q_eout, 62 ) ); + Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp + } } ELSE { @@ -6365,16 +6204,26 @@ static void formulate2x2MixingMatrix_fx( temp = BASOP_Util_Divide3232_Scale_cadence( E_out1, temp, &exp ); exp = sub( exp, sub( q_eout, sub( 31, exp_temp ) ) ); + Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp } - Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp move32(); temp = Mpy_32_32( E_in1, 2147484 ); // 2147484 = 0.001f in Q31 temp = L_max( temp, E_in2 ); // q_ein IF( temp == 0 ) { - BASOP_Util_Divide3232_Scale_cadence( E_out2, 4611686, &exp1 ); // 4611686 = Q62 - exp1 = sub( exp1, sub( q_eout, 62 ) ); + IF (E_out2 == 0) + { /* We can set hard-coded results */ + Ghat_fx[1] = 0; + exp1 = -19; + move16(); + } + ELSE + { + temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, 4611686, &exp1 ); // 4611686 = Q62 + exp1 = sub( exp1, sub( q_eout, 62 ) ); + Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 + } } ELSE { @@ -6382,8 +6231,8 @@ static void formulate2x2MixingMatrix_fx( temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, temp, &exp1 ); exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) ); + Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 } - Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 move32(); q_Ghat = sub( 31, s_max( exp, exp1 ) ); @@ -6432,21 +6281,32 @@ static void formulate2x2MixingMatrix_fx( IF( D_fx[0] == 0 ) { +#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, 4611686, &exp ); // 4611686 = 1e-12 in Q62 exp = sub( exp, sub( Q30, 62 ) ); +#else + temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */ + exp = ONE_DIV_EPSILON_EXP; +#endif } ELSE { temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp ); exp = sub( exp, sub( Q30, q_D ) ); } + div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp move32(); IF( D_fx[1] == 0 ) { +#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, 4611686, &exp1 ); // 4611686 = 1e-12 in Q62 exp1 = sub( exp1, sub( Q30, 62 ) ); +#else + temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */ + exp1 = ONE_DIV_EPSILON_EXP; +#endif } ELSE { @@ -6547,27 +6407,54 @@ static void formulate2x2MixingMatrix_fx( } matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Ure_fx, Uim_fx, &q_U, Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */ - /* These are the final formulas of the JAES publication M = Ky P Kx^(-1) */ +#if (BINAURAL_CHANNELS != 2) FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) { IF( Sx_fx[chB] == 0 ) { - Pre_fx[chA][chB] = BASOP_Util_Divide3232_Scale_cadence( Pre_fx[chA][chB], 4611686, &exp ); // 4611686 = 1e-12 in Q62 - q_Pre[chA][chB] = add( sub( q_P, 62 ), sub( 31, exp ) ); - Pim_fx[chA][chB] = BASOP_Util_Divide3232_Scale_cadence( Pim_fx[chA][chB], 4611686, &exp ); // 4611686 = 1e-12 in Q62 - q_Pim[chA][chB] = add( sub( q_P, 62 ), sub( 31, exp ) ); + Pre_fx[chA][chB] = Mpy_32_32(Pre_fx[chA][chB], ONE_DIV_EPSILON_MANT); + //q_Pre[chA][chB] = add(sub(31, q_P), 31 - ONE_DIV_EPSILON_EXP); + q_Pre[chA][chB] = sub(62 - ONE_DIV_EPSILON_EXP, q_P); + + + Pim_fx[chA][chB] = Mpy_32_32(Pim_fx[chA][chB], ONE_DIV_EPSILON_MANT); + //q_Pim[chA][chB] = add(sub(31, q_P), 31 - ONE_DIV_EPSILON_EXP); + q_Pim[chA][chB] = sub(62 - ONE_DIV_EPSILON_EXP, q_P); + } ELSE { + Word16 Pre_shift, Pim_shift; temp = BASOP_Util_Add_Mant32Exp( Sx_fx[chB], sub( 31, q_Sx ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); +#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC Pre_fx[chA][chB] = BASOP_Util_Divide3232_Scale_cadence( Pre_fx[chA][chB], temp, &exp ); q_Pre[chA][chB] = add( sub( q_P, sub( 31, exp_temp ) ), sub( 31, exp ) ); Pim_fx[chA][chB] = BASOP_Util_Divide3232_Scale_cadence( Pim_fx[chA][chB], temp, &exp ); q_Pim[chA][chB] = add( sub( q_P, sub( 31, exp_temp ) ), sub( 31, exp ) ); +#else + temp = BASOP_Util_Divide3232_Scale_cadence(ONE_IN_Q30, temp, &exp); + Pre_shift = norm_l( Pre_fx[chA][chB] ); + Pim_shift = norm_l( Pim_fx[chA][chB] ); + Pre_fx[chA][chB] = Mpy_32_32( L_shl( Pre_fx[chA][chB], Pre_shift ), temp ); + Pim_fx[chA][chB] = Mpy_32_32( L_shl( Pim_fx[chA][chB], Pim_shift ), temp ); + q_temp = add(sub(sub(q_P, exp), sub(31, Q30)),exp_temp); + q_Pre[chA][chB] = add(q_temp, Pre_shift); + q_Pim[chA][chB] = add(q_temp, Pim_shift); +#endif + } + if (Pre_fx[chA][chB] == 0) + { + q_Pre[chA][chB] = 31; + move16(); + } + if (Pim_fx[chA][chB] == 0) + { + q_Pim[chA][chB] = 31; + move16(); } move32(); move32(); @@ -6575,6 +6462,72 @@ static void formulate2x2MixingMatrix_fx( move16(); } } +#else + /* BINAURAL_CHANNEL == 2 */ + FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) + { + IF( Sx_fx[chB] == 0 ) + { + Pre_fx[0][chB] = Mpy_32_32(Pre_fx[0][chB], ONE_DIV_EPSILON_MANT); + q_Pre[0][chB] = sub(62 - ONE_DIV_EPSILON_EXP, q_P); + Pim_fx[0][chB] = Mpy_32_32(Pim_fx[0][chB], ONE_DIV_EPSILON_MANT); + q_Pim[0][chB] = sub(62 - ONE_DIV_EPSILON_EXP, q_P); + Pre_fx[1][chB] = Mpy_32_32(Pre_fx[1][chB], ONE_DIV_EPSILON_MANT); + q_Pre[1][chB] = sub(62 - ONE_DIV_EPSILON_EXP, q_P); + Pim_fx[1][chB] = Mpy_32_32(Pim_fx[1][chB], ONE_DIV_EPSILON_MANT); + q_Pim[1][chB] = sub(62 - ONE_DIV_EPSILON_EXP, q_P); + } + ELSE + { + Word16 Pre_shift, Pim_shift; + temp = BASOP_Util_Add_Mant32Exp( Sx_fx[chB], sub( 31, q_Sx ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); + temp = BASOP_Util_Divide3232_Scale_cadence(ONE_IN_Q30, temp, &exp); + q_temp = add(sub(sub(q_P, exp), sub(31, Q30)),exp_temp); + + Pre_shift = norm_l( Pre_fx[0][chB] ); + Pim_shift = norm_l( Pim_fx[0][chB] ); + Pre_fx[0][chB] = Mpy_32_32( L_shl( Pre_fx[0][chB], Pre_shift ), temp ); + Pim_fx[0][chB] = Mpy_32_32( L_shl( Pim_fx[0][chB], Pim_shift ), temp ); + q_Pre[0][chB] = add(q_temp, Pre_shift); + q_Pim[0][chB] = add(q_temp, Pim_shift); + + Pre_shift = norm_l( Pre_fx[1][chB] ); + Pim_shift = norm_l( Pim_fx[1][chB] ); + Pre_fx[1][chB] = Mpy_32_32( L_shl( Pre_fx[1][chB], Pre_shift ), temp ); + Pim_fx[1][chB] = Mpy_32_32( L_shl( Pim_fx[1][chB], Pim_shift ), temp ); + q_Pre[1][chB] = add(q_temp, Pre_shift); + q_Pim[1][chB] = add(q_temp, Pim_shift); + } + if (Pre_fx[0][chB] == 0) + { + q_Pre[0][chB] = 31; + move16(); + } + if (Pim_fx[0][chB] == 0) + { + q_Pim[0][chB] = 31; + move16(); + } + if (Pre_fx[1][chB] == 0) + { + q_Pre[1][chB] = 31; + move16(); + } + if (Pim_fx[1][chB] == 0) + { + q_Pim[1][chB] = 31; + move16(); + } + move32(); + move32(); + move16(); + move16(); + move32(); + move32(); + move16(); + move16(); + } +#endif minimum_s( q_Pre[0], i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ), &exp ); q_P = s_min( q_P, exp ); minimum_s( q_Pim[0], i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ), &exp ); @@ -6592,7 +6545,6 @@ static void formulate2x2MixingMatrix_fx( } matrixMul_fx( KyRe_fx, KyIm_fx, &q_ky, Pre_fx, Pim_fx, &q_P, tmpRe_fx, tmpIm_fx, &q_temp ); - matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Uxre_fx, Uxim_fx, &q_Ux, Mre_fx, Mim_fx, q_M ); return; -- GitLab From 602bf646f3ec861e5e5d59aba7f9abdec58e5a26 Mon Sep 17 00:00:00 2001 From: Arthur Date: Fri, 29 Nov 2024 17:24:24 +0100 Subject: [PATCH 02/14] Fix clang format issues --- lib_com/basop_util.c | 28 ++-- lib_com/fft_fx.c | 15 +- lib_com/tools.c | 4 +- lib_rend/ivas_dirac_dec_binaural_functions.c | 161 +++++++++---------- 4 files changed, 103 insertions(+), 105 deletions(-) diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index 79d57198f..79f128578 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -1038,17 +1038,17 @@ Word32 div_w( Word32 L_num, Word32 L_den ) } } -Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bits) +Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bits ) { Word32 z; Word16 sx; Word16 sy; Word32 sign; Word16 iteration; - Flag Carry; + Flag Carry; Word16 s_val; - unset_carry(&Carry); + unset_carry( &Carry ); /* assert (x >= (Word32)0); */ assert( y != (Word32) 0 ); @@ -1060,13 +1060,13 @@ Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bi return ( (Word32) 0 ); } - sign = L_shr(L_xor(x,y), 31); + sign = L_shr( L_xor( x, y ), 31 ); sx = norm_l( x ); x = L_shl( x, sx ); x = L_shr( x, 1 ); s_val = sub( 1, sx ); - if( x < 0 ) + if ( x < 0 ) { x = L_negate( x ); } @@ -1074,8 +1074,8 @@ Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bi sy = norm_l( y ); y = L_shl( y, sy ); y = L_shr( y, 1 ); - s_val = add(s_val, sy ); - if( y >= 0 ) + s_val = add( s_val, sy ); + if ( y >= 0 ) { y = L_negate( y ); } @@ -1083,30 +1083,30 @@ Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bi *s = s_val; move16(); - z = L_sub(x, x); // z = 0 + z = L_sub( x, x ); // z = 0 for ( iteration = (Word16) 0; iteration < (Word16) bits; iteration++ ) { - if ( L_add(x, y) >= 0 ) + if ( L_add( x, y ) >= 0 ) { - x = DEPR_L_add_c(x, y, &Carry); // sets always carry=1 + x = DEPR_L_add_c( x, y, &Carry ); // sets always carry=1 } - z = DEPR_L_add_c( z, z, &Carry ); // sets always carry=0 - x = L_add(x, x); + z = DEPR_L_add_c( z, z, &Carry ); // sets always carry=0 + x = L_add( x, x ); } if ( sign != 0 ) { z = L_negate( z ); } - return L_shl(z, sub(31, bits)); + return L_shl( z, sub( 31, bits ) ); } Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) { #if 1 - return BASOP_Util_Divide3232_Scale_FhG(x,y,s,24); + return BASOP_Util_Divide3232_Scale_FhG( x, y, s, 24 ); #else Word32 z; Word16 sx; diff --git a/lib_com/fft_fx.c b/lib_com/fft_fx.c index 3e664fb56..aceffad0b 100644 --- a/lib_com/fft_fx.c +++ b/lib_com/fft_fx.c @@ -7307,11 +7307,11 @@ Word16 L_norm_arr( Word32 *arr, Word16 size ) #else { Word16 q_tst; - q_tst = norm_l(arr[i]); - if (arr[i] != 0) - q = s_min(q, q_tst); + q_tst = norm_l( arr[i] ); + if ( arr[i] != 0 ) + q = s_min( q, q_tst ); } - + #endif return q; } @@ -7338,11 +7338,11 @@ Word16 get_min_scalefactor( Word32 x, Word32 y ) #else Word16 scf = Q31; Word16 scf_y; - if (x != 0) + if ( x != 0 ) scf = norm_l( x ); scf_y = norm_l( y ); - if (y != 0) - scf = s_min(scf_y, scf); + if ( y != 0 ) + scf = s_min( scf_y, scf ); return scf; #endif } @@ -7356,5 +7356,4 @@ Flag is_zero_arr( Word32 *arr, Word16 size ) } return 1; - } diff --git a/lib_com/tools.c b/lib_com/tools.c index e4d5914e6..cd962f2ac 100644 --- a/lib_com/tools.c +++ b/lib_com/tools.c @@ -923,9 +923,9 @@ Word16 minimum_s( FOR( i = 1; i < lvec; i++ ) { - if( LT_16( vec[i], vec[ind] ) ) + if ( LT_16( vec[i], vec[ind] ) ) { - ind = add(i, 0); + ind = add( i, 0 ); } } diff --git a/lib_rend/ivas_dirac_dec_binaural_functions.c b/lib_rend/ivas_dirac_dec_binaural_functions.c index 32c266c6f..3bf7060fe 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions.c @@ -5144,8 +5144,8 @@ static void eig2x2_fx( normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2 q_tmp2 = sub( 31, exp ); #else - normVal_fx = ISqrt32(tmp3, &exp_tmp3); - q_tmp2 = sub(31, exp_tmp3); + normVal_fx = ISqrt32( tmp3, &exp_tmp3 ); + q_tmp2 = sub( 31, exp_tmp3 ); #endif IF( LT_16( q_tmp1, q_c ) ) { @@ -5216,8 +5216,8 @@ static void eig2x2_fx( normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2 q_tmp2 = sub( 31, exp ); #else - normVal_fx = ISqrt32(tmp3, &exp_tmp3); - q_tmp2 = sub(31, exp_tmp3); + normVal_fx = ISqrt32( tmp3, &exp_tmp3 ); + q_tmp2 = sub( 31, exp_tmp3 ); #endif IF( LT_16( q_tmp1, q_c ) ) { @@ -5407,14 +5407,14 @@ static void matrixMul_fx( move32(); #else outRe_fx[chA][chB] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[chA][0], Bre_fx[0][chB] ), - Are_fx[chA][1], Bre_fx[1][chB] ), - Aim_fx[chA][0], Bim_fx[0][chB] ), - Aim_fx[chA][1], Bim_fx[1][chB] ); + Are_fx[chA][1], Bre_fx[1][chB] ), + Aim_fx[chA][0], Bim_fx[0][chB] ), + Aim_fx[chA][1], Bim_fx[1][chB] ); move32(); outIm_fx[chA][chB] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Aim_fx[chA][0], Bre_fx[0][chB] ), - Aim_fx[chA][1], Bre_fx[1][chB] ), - Are_fx[chA][0], Bim_fx[0][chB] ), - Are_fx[chA][1], Bim_fx[1][chB] ); + Aim_fx[chA][1], Bre_fx[1][chB] ), + Are_fx[chA][0], Bim_fx[0][chB] ), + Are_fx[chA][1], Bim_fx[1][chB] ); move32(); #endif /* #ifdef IVAS_ENH64_CADENCE_CHANGES */ } @@ -5477,14 +5477,14 @@ static void matrixTransp1Mul_fx( FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) { outRe_fx[chA][chB] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][chA], Bre_fx[0][chB] ), - Are_fx[1][chA], Bre_fx[1][chB] ), - Aim_fx[0][chA], Bim_fx[0][chB] ), - Aim_fx[1][chA], Bim_fx[1][chB] ); + Are_fx[1][chA], Bre_fx[1][chB] ), + Aim_fx[0][chA], Bim_fx[0][chB] ), + Aim_fx[1][chA], Bim_fx[1][chB] ); move32(); outIm_fx[chA][chB] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][chA], Bim_fx[0][chB] ), - Are_fx[1][chA], Bim_fx[1][chB] ), - Aim_fx[0][chA], Bre_fx[0][chB] ), - Aim_fx[1][chA], Bre_fx[1][chB] ); + Are_fx[1][chA], Bim_fx[1][chB] ), + Aim_fx[0][chA], Bre_fx[0][chB] ), + Aim_fx[1][chA], Bre_fx[1][chB] ); move32(); } } @@ -5566,15 +5566,15 @@ static void matrixTransp2Mul_fx( outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], W_extract_h( W_mac_32_32( W_mult_32_32( Are_fx[chA][0], L_negate( Bim_fx[chB][0] ) ), Are_fx[chA][1], L_negate( Bim_fx[chB][1] ) ) ) ); move32(); #else - outRe_fx[chA][chB] = Madd_32_32(Madd_32_32(Madd_32_32(Mpy_32_32(Are_fx[chA][0],Bre_fx[chB][0]), - Are_fx[chA][1],Bre_fx[chB][1]), - Aim_fx[chA][0],Bim_fx[chB][0]), - Aim_fx[chA][1],Bim_fx[chB][1]); + outRe_fx[chA][chB] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[chA][0], Bre_fx[chB][0] ), + Are_fx[chA][1], Bre_fx[chB][1] ), + Aim_fx[chA][0], Bim_fx[chB][0] ), + Aim_fx[chA][1], Bim_fx[chB][1] ); move32(); - outIm_fx[chA][chB] = Msub_32_32(Msub_32_32(Madd_32_32(Mpy_32_32(Aim_fx[chA][0],Bre_fx[chB][0]), - Aim_fx[chA][1],Bre_fx[chB][1]), - Are_fx[chA][0],Bim_fx[chB][0]), - Are_fx[chA][1],Bim_fx[chB][1]); + outIm_fx[chA][chB] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Aim_fx[chA][0], Bre_fx[chB][0] ), + Aim_fx[chA][1], Bre_fx[chB][1] ), + Are_fx[chA][0], Bim_fx[chB][0] ), + Are_fx[chA][1], Bim_fx[chB][1] ); move32(); #endif /* #ifdef IVAS_ENH64_CADENCE_CHANGES */ } @@ -5666,9 +5666,9 @@ static void chol2x2_fx( Word32 my_outRe, my_outIm; /* Compute denom = 1.0 / outRe[0][0] */ - denom = ISqrt32(outRe[0][0], &exp); - denom = Mpy_32_32(denom, denom); - den_exp = shl(exp, 1); + denom = ISqrt32( outRe[0][0], &exp ); + denom = Mpy_32_32( denom, denom ); + den_exp = shl( exp, 1 ); /* Normalise c_re, c_im */ exp = norm_l( c_re ); @@ -5677,15 +5677,15 @@ static void chol2x2_fx( exp = norm_l( c_im ); my_outIm = L_shl( c_im, exp ); q_im = add( q_c, exp ); - + /* Multiply and store c_re*denom and c_im*denom */ - outRe[1][0] = Mpy_32_32(denom, my_outRe); + outRe[1][0] = Mpy_32_32( denom, my_outRe ); move32(); - q_re2 = sub(q_re2, den_exp); + q_re2 = sub( q_re2, den_exp ); - outIm[1][0] = Mpy_32_32(denom, my_outIm); + outIm[1][0] = Mpy_32_32( denom, my_outIm ); move32(); - q_im = sub(q_im, den_exp); + q_im = sub( q_im, den_exp ); #endif } if ( outRe[1][0] == 0 ) @@ -5702,22 +5702,22 @@ static void chol2x2_fx( temp = Madd_32_32( Mpy_32_32( c_re, c_re ), c_im, c_im ); q_tmp = sub( add( q_c, q_c ), 31 ); - + // 4611686 = Q62 IF( e1 == 0 ) { #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC - temp = BASOP_Util_Divide3232_Scale_cadence( temp, 4611686, &exp ); - q_tmp = add( sub( 31, exp ), sub( q_tmp, 62 ) ); + temp = BASOP_Util_Divide3232_Scale_cadence( temp, 4611686, &exp ); + q_tmp = add( sub( 31, exp ), sub( q_tmp, 62 ) ); #else - Word32 my_temp; - Word16 my_q_tmp; - my_temp = temp; - my_q_tmp = q_tmp; - temp = BASOP_Util_Divide3232_Scale_cadence( temp, 4611686, &exp ); - q_tmp = add( sub( 31, exp ), sub( q_tmp, 62 ) ); - my_temp = Mpy_32_32(my_temp, ONE_DIV_EPSILON_MANT); - my_q_tmp = add(my_q_tmp, ONE_DIV_EPSILON_EXP); + Word32 my_temp; + Word16 my_q_tmp; + my_temp = temp; + my_q_tmp = q_tmp; + temp = BASOP_Util_Divide3232_Scale_cadence( temp, 4611686, &exp ); + q_tmp = add( sub( 31, exp ), sub( q_tmp, 62 ) ); + my_temp = Mpy_32_32( my_temp, ONE_DIV_EPSILON_MANT ); + my_q_tmp = add( my_q_tmp, ONE_DIV_EPSILON_EXP ); #endif } ELSE @@ -6124,7 +6124,7 @@ static void formulate2x2MixingMatrix_fx( // q_maxEneDiv = add( sub( 31, exp ), sub( Q30, 62 ) ); maxEneDiv_fx = ONE_DIV_EPSILON_MANT; move32(); - q_maxEneDiv = ONE_DIV_EPSILON_EXP; + q_maxEneDiv = ONE_DIV_EPSILON_EXP; move16(); } ELSE @@ -6184,7 +6184,7 @@ static void formulate2x2MixingMatrix_fx( IF( temp == 0 ) { - IF (E_out1 == 0) + IF( E_out1 == 0 ) { Ghat_fx[0] = 0; exp = -19; @@ -6212,8 +6212,8 @@ static void formulate2x2MixingMatrix_fx( temp = L_max( temp, E_in2 ); // q_ein IF( temp == 0 ) { - IF (E_out2 == 0) - { /* We can set hard-coded results */ + IF( E_out2 == 0 ) + { /* We can set hard-coded results */ Ghat_fx[1] = 0; exp1 = -19; move16(); @@ -6285,7 +6285,7 @@ static void formulate2x2MixingMatrix_fx( temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, 4611686, &exp ); // 4611686 = 1e-12 in Q62 exp = sub( exp, sub( Q30, 62 ) ); #else - temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */ + temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */ exp = ONE_DIV_EPSILON_EXP; #endif } @@ -6304,7 +6304,7 @@ static void formulate2x2MixingMatrix_fx( temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, 4611686, &exp1 ); // 4611686 = 1e-12 in Q62 exp1 = sub( exp1, sub( Q30, 62 ) ); #else - temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */ + temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */ exp1 = ONE_DIV_EPSILON_EXP; #endif } @@ -6408,22 +6408,21 @@ static void formulate2x2MixingMatrix_fx( matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Ure_fx, Uim_fx, &q_U, Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */ /* These are the final formulas of the JAES publication M = Ky P Kx^(-1) */ -#if (BINAURAL_CHANNELS != 2) +#if ( BINAURAL_CHANNELS != 2 ) FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) { IF( Sx_fx[chB] == 0 ) { - Pre_fx[chA][chB] = Mpy_32_32(Pre_fx[chA][chB], ONE_DIV_EPSILON_MANT); - //q_Pre[chA][chB] = add(sub(31, q_P), 31 - ONE_DIV_EPSILON_EXP); - q_Pre[chA][chB] = sub(62 - ONE_DIV_EPSILON_EXP, q_P); - + Pre_fx[chA][chB] = Mpy_32_32( Pre_fx[chA][chB], ONE_DIV_EPSILON_MANT ); + // q_Pre[chA][chB] = add(sub(31, q_P), 31 - ONE_DIV_EPSILON_EXP); + q_Pre[chA][chB] = sub( 62 - ONE_DIV_EPSILON_EXP, q_P ); - Pim_fx[chA][chB] = Mpy_32_32(Pim_fx[chA][chB], ONE_DIV_EPSILON_MANT); - //q_Pim[chA][chB] = add(sub(31, q_P), 31 - ONE_DIV_EPSILON_EXP); - q_Pim[chA][chB] = sub(62 - ONE_DIV_EPSILON_EXP, q_P); + Pim_fx[chA][chB] = Mpy_32_32( Pim_fx[chA][chB], ONE_DIV_EPSILON_MANT ); + // q_Pim[chA][chB] = add(sub(31, q_P), 31 - ONE_DIV_EPSILON_EXP); + q_Pim[chA][chB] = sub( 62 - ONE_DIV_EPSILON_EXP, q_P ); } ELSE { @@ -6436,22 +6435,22 @@ static void formulate2x2MixingMatrix_fx( Pim_fx[chA][chB] = BASOP_Util_Divide3232_Scale_cadence( Pim_fx[chA][chB], temp, &exp ); q_Pim[chA][chB] = add( sub( q_P, sub( 31, exp_temp ) ), sub( 31, exp ) ); #else - temp = BASOP_Util_Divide3232_Scale_cadence(ONE_IN_Q30, temp, &exp); + temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, temp, &exp ); Pre_shift = norm_l( Pre_fx[chA][chB] ); Pim_shift = norm_l( Pim_fx[chA][chB] ); Pre_fx[chA][chB] = Mpy_32_32( L_shl( Pre_fx[chA][chB], Pre_shift ), temp ); Pim_fx[chA][chB] = Mpy_32_32( L_shl( Pim_fx[chA][chB], Pim_shift ), temp ); - q_temp = add(sub(sub(q_P, exp), sub(31, Q30)),exp_temp); - q_Pre[chA][chB] = add(q_temp, Pre_shift); - q_Pim[chA][chB] = add(q_temp, Pim_shift); + q_temp = add( sub( sub( q_P, exp ), sub( 31, Q30 ) ), exp_temp ); + q_Pre[chA][chB] = add( q_temp, Pre_shift ); + q_Pim[chA][chB] = add( q_temp, Pim_shift ); #endif } - if (Pre_fx[chA][chB] == 0) + if ( Pre_fx[chA][chB] == 0 ) { q_Pre[chA][chB] = 31; move16(); } - if (Pim_fx[chA][chB] == 0) + if ( Pim_fx[chA][chB] == 0 ) { q_Pim[chA][chB] = 31; move16(); @@ -6468,52 +6467,52 @@ static void formulate2x2MixingMatrix_fx( { IF( Sx_fx[chB] == 0 ) { - Pre_fx[0][chB] = Mpy_32_32(Pre_fx[0][chB], ONE_DIV_EPSILON_MANT); - q_Pre[0][chB] = sub(62 - ONE_DIV_EPSILON_EXP, q_P); - Pim_fx[0][chB] = Mpy_32_32(Pim_fx[0][chB], ONE_DIV_EPSILON_MANT); - q_Pim[0][chB] = sub(62 - ONE_DIV_EPSILON_EXP, q_P); - Pre_fx[1][chB] = Mpy_32_32(Pre_fx[1][chB], ONE_DIV_EPSILON_MANT); - q_Pre[1][chB] = sub(62 - ONE_DIV_EPSILON_EXP, q_P); - Pim_fx[1][chB] = Mpy_32_32(Pim_fx[1][chB], ONE_DIV_EPSILON_MANT); - q_Pim[1][chB] = sub(62 - ONE_DIV_EPSILON_EXP, q_P); + Pre_fx[0][chB] = Mpy_32_32( Pre_fx[0][chB], ONE_DIV_EPSILON_MANT ); + q_Pre[0][chB] = sub( 62 - ONE_DIV_EPSILON_EXP, q_P ); + Pim_fx[0][chB] = Mpy_32_32( Pim_fx[0][chB], ONE_DIV_EPSILON_MANT ); + q_Pim[0][chB] = sub( 62 - ONE_DIV_EPSILON_EXP, q_P ); + Pre_fx[1][chB] = Mpy_32_32( Pre_fx[1][chB], ONE_DIV_EPSILON_MANT ); + q_Pre[1][chB] = sub( 62 - ONE_DIV_EPSILON_EXP, q_P ); + Pim_fx[1][chB] = Mpy_32_32( Pim_fx[1][chB], ONE_DIV_EPSILON_MANT ); + q_Pim[1][chB] = sub( 62 - ONE_DIV_EPSILON_EXP, q_P ); } ELSE { Word16 Pre_shift, Pim_shift; temp = BASOP_Util_Add_Mant32Exp( Sx_fx[chB], sub( 31, q_Sx ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); - temp = BASOP_Util_Divide3232_Scale_cadence(ONE_IN_Q30, temp, &exp); - q_temp = add(sub(sub(q_P, exp), sub(31, Q30)),exp_temp); + temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, temp, &exp ); + q_temp = add( sub( sub( q_P, exp ), sub( 31, Q30 ) ), exp_temp ); Pre_shift = norm_l( Pre_fx[0][chB] ); Pim_shift = norm_l( Pim_fx[0][chB] ); Pre_fx[0][chB] = Mpy_32_32( L_shl( Pre_fx[0][chB], Pre_shift ), temp ); Pim_fx[0][chB] = Mpy_32_32( L_shl( Pim_fx[0][chB], Pim_shift ), temp ); - q_Pre[0][chB] = add(q_temp, Pre_shift); - q_Pim[0][chB] = add(q_temp, Pim_shift); + q_Pre[0][chB] = add( q_temp, Pre_shift ); + q_Pim[0][chB] = add( q_temp, Pim_shift ); Pre_shift = norm_l( Pre_fx[1][chB] ); Pim_shift = norm_l( Pim_fx[1][chB] ); Pre_fx[1][chB] = Mpy_32_32( L_shl( Pre_fx[1][chB], Pre_shift ), temp ); Pim_fx[1][chB] = Mpy_32_32( L_shl( Pim_fx[1][chB], Pim_shift ), temp ); - q_Pre[1][chB] = add(q_temp, Pre_shift); - q_Pim[1][chB] = add(q_temp, Pim_shift); + q_Pre[1][chB] = add( q_temp, Pre_shift ); + q_Pim[1][chB] = add( q_temp, Pim_shift ); } - if (Pre_fx[0][chB] == 0) + if ( Pre_fx[0][chB] == 0 ) { q_Pre[0][chB] = 31; move16(); } - if (Pim_fx[0][chB] == 0) + if ( Pim_fx[0][chB] == 0 ) { q_Pim[0][chB] = 31; move16(); } - if (Pre_fx[1][chB] == 0) + if ( Pre_fx[1][chB] == 0 ) { q_Pre[1][chB] = 31; move16(); } - if (Pim_fx[1][chB] == 0) + if ( Pim_fx[1][chB] == 0 ) { q_Pim[1][chB] = 31; move16(); -- GitLab From e86c049a22eac81fd3e63d581090203f651ba567 Mon Sep 17 00:00:00 2001 From: Arthur Date: Fri, 29 Nov 2024 17:43:05 +0100 Subject: [PATCH 03/14] Fix missing prototype for local function --- lib_com/basop_util.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index 79f128578..82bae938c 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -1038,6 +1038,7 @@ Word32 div_w( Word32 L_num, Word32 L_den ) } } +Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bits ); Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bits ) { Word32 z; -- GitLab From ad64b2673caea5a652e63142bfeda23793314111 Mon Sep 17 00:00:00 2001 From: Arthur Date: Fri, 6 Dec 2024 14:08:28 +0100 Subject: [PATCH 04/14] Author: Arthur Tritthart, FhG, 06-DEC-2024 Changes for BASOP tuning (ticket 1009): File lib_com/basop_util.c: -------------------------- modified imult1616 to directly use i_mult, WMOPS weights reduced: 2 -> 1 File lib_com/ivas_tools.c: -------------------------- Added an IF-conditionned branch for interleaved to linear format. This is the way, the function is currently used, WMOPS weights reduced: 5 -> 2 File lib_com/tools_fx.c: ------------------------ Simplified set32_fx function, stripped use of L_deposit_l, WMOPS weights reduced 2 -> 1 File lib_rend/ivas_dirac_decorr_dec.c: -------------------------------------- - use of is_zero_arr, stripped constant find_guarded_bits(2) - strip offset computation for interleaved real/imag buffer - tune AR filter loop for WMOPS - fix and simplify 64-bit power computation loop - tuned energy smoothing loops for WMOPS - skip energy scaling, if q_shift equals zero - strip offset computation for interleaved real/imag buffer File lib_dec/ivas_mc_param_dec.c, ivas_mct_dec_mct_fx.c: -------------------------------------------------------- - simplify zero checks for output synthesis - replace div(x / 1) or div(x / 2) by shift ops - simplified shifting output Total WMOPS saving for bitstream stv714MC48c_128kbps.192/7_1_4: 164 WMops --- lib_com/basop_util.c | 2 +- lib_com/ivas_tools.c | 17 ++++ lib_com/tools_fx.c | 22 ++--- lib_dec/ivas_mc_param_dec.c | 28 ++---- lib_dec/ivas_mct_dec_mct_fx.c | 16 +++- lib_rend/ivas_dirac_decorr_dec.c | 145 +++++++++++++++++++++++-------- 6 files changed, 151 insertions(+), 79 deletions(-) diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index 82bae938c..17ef53245 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -1626,7 +1626,7 @@ Word16 findIndexOfMinWord32( Word32 *x, const Word16 len ) Word16 imult1616( Word16 x, Word16 y ) { assert( (int) x * (int) y < 32768 && (int) x * (int) y >= -32768 ); - return extract_l( L_mult0( x, y ) ); + return i_mult(x, y); } Word32 imult3216( Word32 x, Word16 y ) diff --git a/lib_com/ivas_tools.c b/lib_com/ivas_tools.c index bead42ab9..7d6dc9376 100644 --- a/lib_com/ivas_tools.c +++ b/lib_com/ivas_tools.c @@ -461,6 +461,23 @@ void v_add_inc_fx( ) { Word16 i; + + /* The use of this function is currently always for the interleaved input format, */ + /* that means, the following conditions are always true and thus obsolete. */ + test(); + test(); + test(); + test(); + IF ((sub(x_inc, 2) == 0) && (sub(x2_inc, 2) == 0) && (sub(y_inc, 1) == 0) && (&x1[1] == &x2[0]) ) + { + /* Interleaved input case, linear output */ + FOR( i = 0; i < N; i++ ) + { + y[i] = L_add( x1[2*i+0], x1[2*i+1] ); /*Qx*/ + move32(); + } + return; + } Word16 ix1 = 0; Word16 ix2 = 0; Word16 iy = 0; diff --git a/lib_com/tools_fx.c b/lib_com/tools_fx.c index 3580e632e..eba9871fb 100644 --- a/lib_com/tools_fx.c +++ b/lib_com/tools_fx.c @@ -648,25 +648,13 @@ void set32_fx( const Word16 N /* i : Lenght of the vector */ ) { - Word16 i, tmp; - tmp = extract_l( a ); - IF( EQ_32( L_deposit_l( tmp ), a ) ) - { - FOR( i = 0; i < N; i++ ) - { - y[i] = L_deposit_l( tmp ); - move32(); - } - } - ELSE + Word16 i; + + FOR( i = 0; i < N; i++ ) { - FOR( i = 0; i < N; i++ ) - { - y[i] = a; - move32(); - } + y[i] = a; + move32(); } - return; } /*-------------------------------------------------------------------* diff --git a/lib_dec/ivas_mc_param_dec.c b/lib_dec/ivas_mc_param_dec.c index b9e9137a1..93cf30c05 100644 --- a/lib_dec/ivas_mc_param_dec.c +++ b/lib_dec/ivas_mc_param_dec.c @@ -3786,38 +3786,20 @@ void ivas_param_mc_dec_render_fx( slot_idx_start_cldfb_synth = 0; move16(); - Flag is_zero = 1; - move32(); FOR( j = 0; j < st_ivas->hParamMC->hMetadataPMC->nbands_coded; j++ ) { - is_zero = 1; - move16(); - FOR( i = 0; i < hParamMC->h_output_synthesis_cov_state.mixing_matrix_len; i++ ) + Flag is_zero = is_zero_arr( hParamMC->h_output_synthesis_cov_state.mixing_matrix_fx[j], hParamMC->h_output_synthesis_cov_state.mixing_matrix_len ); { - IF( hParamMC->h_output_synthesis_cov_state.mixing_matrix_fx[j][i] != 0 ) + if ( is_zero != 0 ) { - is_zero = 0; + hParamMC->h_output_synthesis_cov_state.mixing_matrix_res_exp[j] = 0; move16(); } } - IF( is_zero ) - { - hParamMC->h_output_synthesis_cov_state.mixing_matrix_exp[j] = 0; - move16(); - } - is_zero = 1; - move16(); IF( LT_16( st_ivas->hParamMC->band_grouping[j], st_ivas->hParamMC->h_output_synthesis_params.max_band_decorr ) ) { - FOR( i = 0; i < hParamMC->h_output_synthesis_cov_state.mixing_matrix_res_len; i++ ) - { - IF( NE_32( hParamMC->h_output_synthesis_cov_state.mixing_matrix_res_fx[j][i], 0 ) ) - { - is_zero = 0; - move16(); - } - } - IF( is_zero ) + is_zero = is_zero_arr( hParamMC->h_output_synthesis_cov_state.mixing_matrix_res_fx[j], hParamMC->h_output_synthesis_cov_state.mixing_matrix_res_len ); + if( is_zero != 0) { hParamMC->h_output_synthesis_cov_state.mixing_matrix_res_exp[j] = 0; move16(); diff --git a/lib_dec/ivas_mct_dec_mct_fx.c b/lib_dec/ivas_mct_dec_mct_fx.c index 0f9bd98cc..a21dbb1fc 100644 --- a/lib_dec/ivas_mct_dec_mct_fx.c +++ b/lib_dec/ivas_mct_dec_mct_fx.c @@ -316,6 +316,7 @@ void mctStereoIGF_dec_fx( test(); IF( NE_16( hMCT->hBlockData[b]->hStereoMdct->IGFStereoMode[k], SMDCT_DUAL_MONO ) || NE_16( hMCT->hBlockData[b]->hStereoMdct->mdct_stereo_mode[k], SMDCT_DUAL_MONO ) ) { +#if 0 tmp = BASOP_Util_Divide1616_Scale( sts[0]->hTcxCfg->tcx_coded_lines, nSubframes, &tmp_e ); L_spec[0] = shr( tmp, add( 15, negate( tmp_e ) ) ); move16(); @@ -325,6 +326,15 @@ void mctStereoIGF_dec_fx( tmp = BASOP_Util_Divide1616_Scale( sts[0]->hTcxDec->L_frameTCX, nSubframes, &tmp_e ); L_frameTCX_nSubframe = shr( tmp, add( 15, negate( tmp_e ) ) ); +#else + assert( nSubframes == 1 || nSubframes == 2 ); + /* Note: nSubframes is in limited range [1, 2] for this function */ + Word16 shr_div = sub( nSubframes, 1 ); /* 2 -> 1, 1 -> 0 */ + L_spec[0] = shr(sts[0]->hTcxCfg->tcx_coded_lines, shr_div); + move16(); + L_frame_nSubframe = shr(sts[0]->L_frame, shr_div); + L_frameTCX_nSubframe = shr( sts[0]->hTcxDec->L_frameTCX , shr_div); +#endif init_tcx_info_fx( sts[0], L_frame_nSubframe, L_frameTCX_nSubframe, k, bfi, &tcx_offset[0], &tcx_offsetFB[0], &L_frame[0], &L_frameTCX[0], &left_rect[0], &L_spec[0] ); @@ -334,14 +344,16 @@ void mctStereoIGF_dec_fx( decoder_tcx_IGF_stereo_fx( sts, hMCT->hBlockData[b]->hStereoMdct, hMCT->hBlockData[b]->mask, p_x, p_x_e, p_x_len, L_frame[0], left_rect[0], k, bfi, 1 /* MCT_flag */ ); // Shifting output with variable exponent back to Q12 + Word16 shr_k = sub( 31 - Q12, p_x_e[0][k] ); FOR( Word16 i = 0; i < p_x_len[0][k]; i++ ) { - p_x[0][k][i] = L_shr( p_x[0][k][i], sub( 31 - Q12, p_x_e[0][k] ) ); + p_x[0][k][i] = L_shr( p_x[0][k][i], shr_k ); move32(); } + shr_k = sub( 31 - Q12, p_x_e[1][k] ); FOR( Word16 i = 0; i < p_x_len[1][k]; i++ ) { - p_x[1][k][i] = L_shr( p_x[1][k][i], sub( 31 - Q12, p_x_e[1][k] ) ); + p_x[1][k][i] = L_shr( p_x[1][k][i], shr_k ); move32(); } } diff --git a/lib_rend/ivas_dirac_decorr_dec.c b/lib_rend/ivas_dirac_decorr_dec.c index dd71510d7..a5d780a11 100644 --- a/lib_rend/ivas_dirac_decorr_dec.c +++ b/lib_rend/ivas_dirac_decorr_dec.c @@ -57,6 +57,9 @@ #define DIRAC_DUCK_GAMMA_FX 1610612736 /* Q30 */ #define DIRAC_DUCK_ALPHA_FX 1717986944 /* Q31 */ #define ONE_M_DIRAC_DUCK_ALPHA 429496736 /* Q31 */ + +/* Maximal useful q-format, represents range of 2^-126 (float min) */ +#define MAX_Q_FX 157 #endif /*------------------------------------------------------------------------- @@ -1118,16 +1121,20 @@ void ivas_dirac_dec_decorr_process_fx( Word16 decorr_buff_tot_len = imult1616( imult1616( shl( decorr_buffer_len, 1 ), max_band_decorr ), num_channels ); guarded_bits = 0; - FOR( Word16 i = 0; i < decorr_buff_tot_len; i++ ) + + Flag is_zero = is_zero_arr( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len ); + if (is_zero == 0) + guarded_bits = 3; + + IF(is_zero == 0) { - IF( h_freq_domain_decorr_ap_state->decorr_buffer_fx[i] != 0 ) + q_shift = sub( getScaleFactor32( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len ), guarded_bits ); + IF (q_shift != 0) { - guarded_bits = s_max( find_guarded_bits_fx( 2 ), 3 ); + Scale_sig32( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len, q_shift ); + q_decorr_buf = add( q_decorr_buf, q_shift ); } } - q_shift = sub( getScaleFactor32( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len ), guarded_bits ); - Scale_sig32( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len, q_shift ); - q_decorr_buf = add( q_decorr_buf, q_shift ); q_shift = getScaleFactor32( aux_buffer_fx, imult1616( imult1616( 2, num_protos_dir ), max_band_decorr_temp ) ); @@ -1191,9 +1198,7 @@ void ivas_dirac_dec_decorr_process_fx( FOR( l = 0; l < filter_length; l++ ) { frame_ma_fx[2 * l] = Mpy_32_16_1( input_real_fx, filter_coeff_num_real_fx[l] ); // Q_qux -3 = q_deorr - // frame_ma_fx[2 * l] = L_shr(frame_ma_fx[2 * l],3); // scaling to q_decorr_buf - frame_ma_fx[add( shl( l, 1 ), 1 )] = Mpy_32_16_1( input_imag_fx, filter_coeff_num_real_fx[l] ); // Q_qux - 3 = q_deorr - // frame_ma_fx[2 * l + 1] = L_shr(frame_ma_fx[2 * l + 1], 3); // scaling to q_decorr_buf + frame_ma_fx[2 * l + 1] = Mpy_32_16_1( input_imag_fx, filter_coeff_num_real_fx[l] ); // Q_qux - 3 = q_deorr move32(); move32(); } @@ -1207,26 +1212,28 @@ void ivas_dirac_dec_decorr_process_fx( /*get values for AR part */ filter_frame_real_fx = decorr_buffer_ptr_fx[0]; // q_decorr - filter_frame_imag_fx = decorr_buffer_ptr_fx[1]; // q_deocrr + filter_frame_imag_fx = decorr_buffer_ptr_fx[1]; // q_decorr - decorr_buffer_ptr_fx += shl( decorr_buffer_step, 1 ); + Word16 decorr_buffer_step2x = shl(decorr_buffer_step, 1); + + decorr_buffer_ptr_fx += decorr_buffer_step2x; + move16(); FOR( l = 1; l < filter_length; l++ ) { // q adjustment needed// - decorr_buffer_ptr_fx[0] = L_add( decorr_buffer_ptr_fx[0], frame_ma_fx[2 * l] ); // q_decorr Word32 temp_1 = Mpy_32_16_1( filter_frame_real_fx, filter_coeff_den_real_fx[l] ); // q_decorr - 3 temp_1 = L_shl( temp_1, 3 ); // q_decorr - decorr_buffer_ptr_fx[0] = L_sub( decorr_buffer_ptr_fx[0], temp_1 ); // q_deocor - decorr_buffer_ptr_fx[1] = L_add( decorr_buffer_ptr_fx[1], frame_ma_fx[add( shl( l, 1 ), 1 )] ); // q_decorr + decorr_buffer_ptr_fx[0] = L_sub( L_add( decorr_buffer_ptr_fx[0], frame_ma_fx[2 * l] ), temp_1 );// q_deocor + move32(); + Word32 temp_2 = Mpy_32_16_1( filter_frame_imag_fx, filter_coeff_den_real_fx[l] ); // q_decorr - 3 temp_2 = L_shl( temp_2, 3 ); // q_decorr - decorr_buffer_ptr_fx[1] = L_sub( decorr_buffer_ptr_fx[1], temp_2 ); // q_decorr - decorr_buffer_ptr_fx += imult1616( 2, decorr_buffer_step ); - move32(); - move32(); - move32(); + decorr_buffer_ptr_fx[1] = L_sub( L_add( decorr_buffer_ptr_fx[1], frame_ma_fx[2 * l + 1] ), temp_2 );// q_decorr move32(); + + decorr_buffer_ptr_fx += decorr_buffer_step2x; + move16(); } } } @@ -1283,6 +1290,10 @@ void ivas_dirac_dec_decorr_process_fx( q_direct_energy = q_aux_buffer; move16(); +#if 0 + /* Attention: this loop reports norm=0, whenever any data is 0. */ + /* Therefore, useful left-shifts are skipped, accuracy is lost. */ + /* calculate the power of the decorrelated signal */ FOR( ch_idx = 0; ch_idx < num_channels; ++ch_idx ) { @@ -1295,6 +1306,37 @@ void ivas_dirac_dec_decorr_process_fx( norm = s_min( norm, W_norm( aux_64[add( offset2, i )] ) ); } } +#else + /* calculate the power of the decorrelated signal */ + Word64 *m64_aux = aux_64; + move32(); + Word64 min64 = (Word64) 0; + move64(); + Word32 *m32_frame_dec_fx = frame_dec_fx; + move32(); + offset1 = shl(num_freq_bands, 1); + offset2 = shl( max_band_decorr, 1 ); + + + FOR( ch_idx = 0; ch_idx < num_channels; ++ch_idx ) + { + FOR( Word16 i = 0; i < offset2; i++ ) + { + m64_aux[i] = W_mult0_32_32( m32_frame_dec_fx[i], m32_frame_dec_fx[i] ); + move64(); + if ( GT_64( m64_aux[i], min64 ) ) + { + min64 = m64_aux[i]; + move64(); + } + } + m64_aux += offset2; + m32_frame_dec_fx += offset1; + move64(); + move32(); + } + norm = W_norm(min64); +#endif FOR( Word16 i = 0; i < shl( imult1616( num_channels, max_band_decorr ), 1 ); i++ ) { @@ -1310,32 +1352,63 @@ void ivas_dirac_dec_decorr_process_fx( } /* smooth energies */ - v_multc_fixed( aux_buffer_fx, ONE_M_DIRAC_DUCK_ALPHA, aux_buffer_fx, imult1616( num_channels, max_band_decorr ) ); // q_aux_buffer - v_multc_fixed( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, DIRAC_DUCK_ALPHA_FX, h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, imult1616( num_channels, max_band_decorr ) ); // same-q + Word16 len = imult1616( num_channels, max_band_decorr ); + Word16 aux_e = sub( 31, q_aux_buffer ); + Word16 max_e = s_max( aux_e, e_reverb_energy_smooth ); + Word16 shr_aux = sub( max_e, aux_e ); /* Note: headroom is zero */ + Word16 shr_res = sub( max_e, e_reverb_energy_smooth ); /* Note: headroom is zero */ - v_add_fixed_me( aux_buffer_fx, sub( 31, q_aux_buffer ), h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, e_reverb_energy_smooth, h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, &e_reverb_energy_smooth, imult1616( num_channels, max_band_decorr ), 0 ); - h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = sub( 31, e_reverb_energy_smooth ); + /* Note: DIRAC_DUCK_ALPHA_FX and ONE_M_DIRAC_DUCK_ALPHA are both in Q31 (e=0) */ + /* => a multiplication with this values does not change the q/e value. */ - v_multc_fixed( direct_energy_fx, ONE_M_DIRAC_DUCK_ALPHA, direct_energy_fx, imult1616( num_protos_dir, max_band_decorr ) ); // same q + FOR(Word16 i = 0; i < len; i++) + { + h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx[i] = L_add( + L_shr( Mpy_32_32( aux_buffer_fx[i], ONE_M_DIRAC_DUCK_ALPHA), shr_aux ), + L_shr( Mpy_32_32( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx[i], DIRAC_DUCK_ALPHA_FX ), shr_res ) ); + move32(); + } + e_reverb_energy_smooth = max_e; + move16(); + h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = sub( 31, e_reverb_energy_smooth ); + move16(); - v_multc_fixed( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx, DIRAC_DUCK_ALPHA_FX, h_freq_domain_decorr_ap_state->direct_energy_smooth_fx, imult1616( num_protos_dir, max_band_decorr ) ); // same q + len = imult1616( num_protos_dir, max_band_decorr ); + Word16 den_e = sub( 31, q_direct_energy ); + Word16 max_x = s_max( den_e, e_direct_energy_smooth ); + Word16 shr_den = sub( max_x, den_e ); /* Note: headroom is zero */ + Word16 shr_des = sub( max_x, e_direct_energy_smooth ); /* Note: headroom is zero */ - v_add_fixed_me( direct_energy_fx, sub( 31, q_direct_energy ), h_freq_domain_decorr_ap_state->direct_energy_smooth_fx, e_direct_energy_smooth, h_freq_domain_decorr_ap_state->direct_energy_smooth_fx, &e_direct_energy_smooth, imult1616( num_protos_dir, max_band_decorr ), 0 ); + FOR( Word16 i = 0; i < len; i++ ) + { + h_freq_domain_decorr_ap_state->direct_energy_smooth_fx[i] = L_add( + L_shr( Mpy_32_32( direct_energy_fx[i], ONE_M_DIRAC_DUCK_ALPHA ), shr_den ), + L_shr( Mpy_32_32( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx[i], DIRAC_DUCK_ALPHA_FX ), shr_des ) ); + move32(); + } + e_direct_energy_smooth = max_x; + move16(); h_freq_domain_decorr_ap_state->q_direct_energy_smooth = sub( 31, e_direct_energy_smooth ); move16(); // scaling energy buffers for better precision for higher values// q_shift = L_norm_arr( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx, imult1616( num_protos_dir, max_band_decorr ) ); - Scale_sig32( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx, imult1616( num_protos_dir, max_band_decorr ), q_shift ); - h_freq_domain_decorr_ap_state->q_direct_energy_smooth = add( h_freq_domain_decorr_ap_state->q_direct_energy_smooth, q_shift ); - move16(); - - + IF(q_shift != 0) + { + Scale_sig32( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx, imult1616( num_protos_dir, max_band_decorr ), q_shift ); + h_freq_domain_decorr_ap_state->q_direct_energy_smooth = add( h_freq_domain_decorr_ap_state->q_direct_energy_smooth, q_shift ); + move16(); + } q_shift = L_norm_arr( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, imult1616( num_channels, max_band_decorr ) ); - Scale_sig32( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, imult1616( num_channels, max_band_decorr ), q_shift ); - h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = add( h_freq_domain_decorr_ap_state->q_reverb_energy_smooth, q_shift ); - move16(); + IF( q_shift != 0 ) + { + Scale_sig32( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, imult1616( num_channels, max_band_decorr ), q_shift ); + h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = add( h_freq_domain_decorr_ap_state->q_reverb_energy_smooth, q_shift ); + move16(); + } + h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = min(MAX_Q_FX, h_freq_domain_decorr_ap_state->q_reverb_energy_smooth); + h_freq_domain_decorr_ap_state->q_direct_energy_smooth = min(MAX_Q_FX, h_freq_domain_decorr_ap_state->q_direct_energy_smooth); e_reverb_energy_smooth = sub( 31, h_freq_domain_decorr_ap_state->q_reverb_energy_smooth ); e_direct_energy_smooth = sub( 31, h_freq_domain_decorr_ap_state->q_direct_energy_smooth ); @@ -1392,7 +1465,7 @@ void ivas_dirac_dec_decorr_process_fx( duck_gain = shl( duck_gain, sub( e_duck_gain, 1 ) ); // Q14 frame_dec_fx_ptr[2 * band_idx] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx], duck_gain ), 1 ); // q_frame_f - frame_dec_fx_ptr[add( shl( band_idx, 1 ), 1 )] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[add( shl( band_idx, 1 ), 1 )], duck_gain ), 1 ); // q_frame_f + frame_dec_fx_ptr[2 * band_idx + 1] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx + 1], duck_gain ), 1 ); // q_frame_f move32(); move32(); } @@ -1414,7 +1487,7 @@ void ivas_dirac_dec_decorr_process_fx( duck_gain = shl( duck_gain, sub( e_duck_gain, 2 ) ); // Q13 } frame_dec_fx_ptr[2 * band_idx] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx], duck_gain ), 2 ); // q_frame_dec - frame_dec_fx_ptr[add( shl( band_idx, 1 ), 1 )] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[add( shl( band_idx, 1 ), 1 )], duck_gain ), 2 ); // q_frame_dec + frame_dec_fx_ptr[2 * band_idx + 1] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx + 1], duck_gain ), 2 ); // q_frame_dec move32(); move32(); } -- GitLab From c703268d8f13f1c03785459079fc0a7e10cd80d7 Mon Sep 17 00:00:00 2001 From: Arthur Date: Fri, 6 Dec 2024 14:32:12 +0100 Subject: [PATCH 05/14] apply patch for clang format --- lib_com/basop_util.c | 2 +- lib_com/ivas_tools.c | 4 +-- lib_dec/ivas_mc_param_dec.c | 2 +- lib_dec/ivas_mct_dec_mct_fx.c | 6 ++-- lib_rend/ivas_dirac_decorr_dec.c | 58 ++++++++++++++++---------------- 5 files changed, 36 insertions(+), 36 deletions(-) diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index 17ef53245..d7fc7ec72 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -1626,7 +1626,7 @@ Word16 findIndexOfMinWord32( Word32 *x, const Word16 len ) Word16 imult1616( Word16 x, Word16 y ) { assert( (int) x * (int) y < 32768 && (int) x * (int) y >= -32768 ); - return i_mult(x, y); + return i_mult( x, y ); } Word32 imult3216( Word32 x, Word16 y ) diff --git a/lib_com/ivas_tools.c b/lib_com/ivas_tools.c index 7d6dc9376..335684f37 100644 --- a/lib_com/ivas_tools.c +++ b/lib_com/ivas_tools.c @@ -468,12 +468,12 @@ void v_add_inc_fx( test(); test(); test(); - IF ((sub(x_inc, 2) == 0) && (sub(x2_inc, 2) == 0) && (sub(y_inc, 1) == 0) && (&x1[1] == &x2[0]) ) + IF( ( sub( x_inc, 2 ) == 0 ) && ( sub( x2_inc, 2 ) == 0 ) && ( sub( y_inc, 1 ) == 0 ) && ( &x1[1] == &x2[0] ) ) { /* Interleaved input case, linear output */ FOR( i = 0; i < N; i++ ) { - y[i] = L_add( x1[2*i+0], x1[2*i+1] ); /*Qx*/ + y[i] = L_add( x1[2 * i + 0], x1[2 * i + 1] ); /*Qx*/ move32(); } return; diff --git a/lib_dec/ivas_mc_param_dec.c b/lib_dec/ivas_mc_param_dec.c index 93cf30c05..b6ad8d146 100644 --- a/lib_dec/ivas_mc_param_dec.c +++ b/lib_dec/ivas_mc_param_dec.c @@ -3799,7 +3799,7 @@ void ivas_param_mc_dec_render_fx( IF( LT_16( st_ivas->hParamMC->band_grouping[j], st_ivas->hParamMC->h_output_synthesis_params.max_band_decorr ) ) { is_zero = is_zero_arr( hParamMC->h_output_synthesis_cov_state.mixing_matrix_res_fx[j], hParamMC->h_output_synthesis_cov_state.mixing_matrix_res_len ); - if( is_zero != 0) + if ( is_zero != 0 ) { hParamMC->h_output_synthesis_cov_state.mixing_matrix_res_exp[j] = 0; move16(); diff --git a/lib_dec/ivas_mct_dec_mct_fx.c b/lib_dec/ivas_mct_dec_mct_fx.c index a21dbb1fc..de7fc3a37 100644 --- a/lib_dec/ivas_mct_dec_mct_fx.c +++ b/lib_dec/ivas_mct_dec_mct_fx.c @@ -330,10 +330,10 @@ void mctStereoIGF_dec_fx( assert( nSubframes == 1 || nSubframes == 2 ); /* Note: nSubframes is in limited range [1, 2] for this function */ Word16 shr_div = sub( nSubframes, 1 ); /* 2 -> 1, 1 -> 0 */ - L_spec[0] = shr(sts[0]->hTcxCfg->tcx_coded_lines, shr_div); + L_spec[0] = shr( sts[0]->hTcxCfg->tcx_coded_lines, shr_div ); move16(); - L_frame_nSubframe = shr(sts[0]->L_frame, shr_div); - L_frameTCX_nSubframe = shr( sts[0]->hTcxDec->L_frameTCX , shr_div); + L_frame_nSubframe = shr( sts[0]->L_frame, shr_div ); + L_frameTCX_nSubframe = shr( sts[0]->hTcxDec->L_frameTCX, shr_div ); #endif init_tcx_info_fx( sts[0], L_frame_nSubframe, L_frameTCX_nSubframe, k, bfi, &tcx_offset[0], &tcx_offsetFB[0], &L_frame[0], &L_frameTCX[0], &left_rect[0], &L_spec[0] ); diff --git a/lib_rend/ivas_dirac_decorr_dec.c b/lib_rend/ivas_dirac_decorr_dec.c index a5d780a11..90b00880e 100644 --- a/lib_rend/ivas_dirac_decorr_dec.c +++ b/lib_rend/ivas_dirac_decorr_dec.c @@ -59,7 +59,7 @@ #define ONE_M_DIRAC_DUCK_ALPHA 429496736 /* Q31 */ /* Maximal useful q-format, represents range of 2^-126 (float min) */ -#define MAX_Q_FX 157 +#define MAX_Q_FX 157 #endif /*------------------------------------------------------------------------- @@ -1123,13 +1123,13 @@ void ivas_dirac_dec_decorr_process_fx( guarded_bits = 0; Flag is_zero = is_zero_arr( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len ); - if (is_zero == 0) - guarded_bits = 3; + if ( is_zero == 0 ) + guarded_bits = 3; - IF(is_zero == 0) + IF( is_zero == 0 ) { q_shift = sub( getScaleFactor32( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len ), guarded_bits ); - IF (q_shift != 0) + IF( q_shift != 0 ) { Scale_sig32( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len, q_shift ); q_decorr_buf = add( q_decorr_buf, q_shift ); @@ -1197,7 +1197,7 @@ void ivas_dirac_dec_decorr_process_fx( /* MA part of filter impulse response */ FOR( l = 0; l < filter_length; l++ ) { - frame_ma_fx[2 * l] = Mpy_32_16_1( input_real_fx, filter_coeff_num_real_fx[l] ); // Q_qux -3 = q_deorr + frame_ma_fx[2 * l] = Mpy_32_16_1( input_real_fx, filter_coeff_num_real_fx[l] ); // Q_qux -3 = q_deorr frame_ma_fx[2 * l + 1] = Mpy_32_16_1( input_imag_fx, filter_coeff_num_real_fx[l] ); // Q_qux - 3 = q_deorr move32(); move32(); @@ -1214,7 +1214,7 @@ void ivas_dirac_dec_decorr_process_fx( filter_frame_real_fx = decorr_buffer_ptr_fx[0]; // q_decorr filter_frame_imag_fx = decorr_buffer_ptr_fx[1]; // q_decorr - Word16 decorr_buffer_step2x = shl(decorr_buffer_step, 1); + Word16 decorr_buffer_step2x = shl( decorr_buffer_step, 1 ); decorr_buffer_ptr_fx += decorr_buffer_step2x; move16(); @@ -1222,14 +1222,14 @@ void ivas_dirac_dec_decorr_process_fx( FOR( l = 1; l < filter_length; l++ ) { // q adjustment needed// - Word32 temp_1 = Mpy_32_16_1( filter_frame_real_fx, filter_coeff_den_real_fx[l] ); // q_decorr - 3 - temp_1 = L_shl( temp_1, 3 ); // q_decorr - decorr_buffer_ptr_fx[0] = L_sub( L_add( decorr_buffer_ptr_fx[0], frame_ma_fx[2 * l] ), temp_1 );// q_deocor + Word32 temp_1 = Mpy_32_16_1( filter_frame_real_fx, filter_coeff_den_real_fx[l] ); // q_decorr - 3 + temp_1 = L_shl( temp_1, 3 ); // q_decorr + decorr_buffer_ptr_fx[0] = L_sub( L_add( decorr_buffer_ptr_fx[0], frame_ma_fx[2 * l] ), temp_1 ); // q_deocor move32(); - Word32 temp_2 = Mpy_32_16_1( filter_frame_imag_fx, filter_coeff_den_real_fx[l] ); // q_decorr - 3 - temp_2 = L_shl( temp_2, 3 ); // q_decorr - decorr_buffer_ptr_fx[1] = L_sub( L_add( decorr_buffer_ptr_fx[1], frame_ma_fx[2 * l + 1] ), temp_2 );// q_decorr + Word32 temp_2 = Mpy_32_16_1( filter_frame_imag_fx, filter_coeff_den_real_fx[l] ); // q_decorr - 3 + temp_2 = L_shl( temp_2, 3 ); // q_decorr + decorr_buffer_ptr_fx[1] = L_sub( L_add( decorr_buffer_ptr_fx[1], frame_ma_fx[2 * l + 1] ), temp_2 ); // q_decorr move32(); decorr_buffer_ptr_fx += decorr_buffer_step2x; @@ -1309,15 +1309,15 @@ void ivas_dirac_dec_decorr_process_fx( #else /* calculate the power of the decorrelated signal */ Word64 *m64_aux = aux_64; - move32(); + move32(); Word64 min64 = (Word64) 0; - move64(); + move64(); Word32 *m32_frame_dec_fx = frame_dec_fx; move32(); - offset1 = shl(num_freq_bands, 1); + offset1 = shl( num_freq_bands, 1 ); offset2 = shl( max_band_decorr, 1 ); - + FOR( ch_idx = 0; ch_idx < num_channels; ++ch_idx ) { FOR( Word16 i = 0; i < offset2; i++ ) @@ -1335,7 +1335,7 @@ void ivas_dirac_dec_decorr_process_fx( move64(); move32(); } - norm = W_norm(min64); + norm = W_norm( min64 ); #endif FOR( Word16 i = 0; i < shl( imult1616( num_channels, max_band_decorr ), 1 ); i++ ) @@ -1362,11 +1362,11 @@ void ivas_dirac_dec_decorr_process_fx( /* Note: DIRAC_DUCK_ALPHA_FX and ONE_M_DIRAC_DUCK_ALPHA are both in Q31 (e=0) */ /* => a multiplication with this values does not change the q/e value. */ - FOR(Word16 i = 0; i < len; i++) + FOR( Word16 i = 0; i < len; i++ ) { h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx[i] = L_add( - L_shr( Mpy_32_32( aux_buffer_fx[i], ONE_M_DIRAC_DUCK_ALPHA), shr_aux ), - L_shr( Mpy_32_32( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx[i], DIRAC_DUCK_ALPHA_FX ), shr_res ) ); + L_shr( Mpy_32_32( aux_buffer_fx[i], ONE_M_DIRAC_DUCK_ALPHA ), shr_aux ), + L_shr( Mpy_32_32( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx[i], DIRAC_DUCK_ALPHA_FX ), shr_res ) ); move32(); } e_reverb_energy_smooth = max_e; @@ -1382,9 +1382,9 @@ void ivas_dirac_dec_decorr_process_fx( FOR( Word16 i = 0; i < len; i++ ) { - h_freq_domain_decorr_ap_state->direct_energy_smooth_fx[i] = L_add( - L_shr( Mpy_32_32( direct_energy_fx[i], ONE_M_DIRAC_DUCK_ALPHA ), shr_den ), - L_shr( Mpy_32_32( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx[i], DIRAC_DUCK_ALPHA_FX ), shr_des ) ); + h_freq_domain_decorr_ap_state->direct_energy_smooth_fx[i] = L_add( + L_shr( Mpy_32_32( direct_energy_fx[i], ONE_M_DIRAC_DUCK_ALPHA ), shr_den ), + L_shr( Mpy_32_32( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx[i], DIRAC_DUCK_ALPHA_FX ), shr_des ) ); move32(); } e_direct_energy_smooth = max_x; @@ -1394,7 +1394,7 @@ void ivas_dirac_dec_decorr_process_fx( // scaling energy buffers for better precision for higher values// q_shift = L_norm_arr( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx, imult1616( num_protos_dir, max_band_decorr ) ); - IF(q_shift != 0) + IF( q_shift != 0 ) { Scale_sig32( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx, imult1616( num_protos_dir, max_band_decorr ), q_shift ); h_freq_domain_decorr_ap_state->q_direct_energy_smooth = add( h_freq_domain_decorr_ap_state->q_direct_energy_smooth, q_shift ); @@ -1407,8 +1407,8 @@ void ivas_dirac_dec_decorr_process_fx( h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = add( h_freq_domain_decorr_ap_state->q_reverb_energy_smooth, q_shift ); move16(); } - h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = min(MAX_Q_FX, h_freq_domain_decorr_ap_state->q_reverb_energy_smooth); - h_freq_domain_decorr_ap_state->q_direct_energy_smooth = min(MAX_Q_FX, h_freq_domain_decorr_ap_state->q_direct_energy_smooth); + h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = min( MAX_Q_FX, h_freq_domain_decorr_ap_state->q_reverb_energy_smooth ); + h_freq_domain_decorr_ap_state->q_direct_energy_smooth = min( MAX_Q_FX, h_freq_domain_decorr_ap_state->q_direct_energy_smooth ); e_reverb_energy_smooth = sub( 31, h_freq_domain_decorr_ap_state->q_reverb_energy_smooth ); e_direct_energy_smooth = sub( 31, h_freq_domain_decorr_ap_state->q_direct_energy_smooth ); @@ -1464,7 +1464,7 @@ void ivas_dirac_dec_decorr_process_fx( duck_gain = shl( duck_gain, sub( e_duck_gain, 1 ) ); // Q14 - frame_dec_fx_ptr[2 * band_idx] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx], duck_gain ), 1 ); // q_frame_f + frame_dec_fx_ptr[2 * band_idx] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx], duck_gain ), 1 ); // q_frame_f frame_dec_fx_ptr[2 * band_idx + 1] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx + 1], duck_gain ), 1 ); // q_frame_f move32(); move32(); @@ -1486,7 +1486,7 @@ void ivas_dirac_dec_decorr_process_fx( { duck_gain = shl( duck_gain, sub( e_duck_gain, 2 ) ); // Q13 } - frame_dec_fx_ptr[2 * band_idx] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx], duck_gain ), 2 ); // q_frame_dec + frame_dec_fx_ptr[2 * band_idx] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx], duck_gain ), 2 ); // q_frame_dec frame_dec_fx_ptr[2 * band_idx + 1] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx + 1], duck_gain ), 2 ); // q_frame_dec move32(); move32(); -- GitLab From ff62d7455ef525a6f64ba288344b6532a76b2d56 Mon Sep 17 00:00:00 2001 From: Arthur Date: Tue, 10 Dec 2024 17:48:40 +0100 Subject: [PATCH 06/14] Fix pipeline issues due to inverse square root. Fix other exponent settings. --- lib_rend/ivas_dirac_dec_binaural_functions.c | 26 +++++++++----------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions.c b/lib_rend/ivas_dirac_dec_binaural_functions.c index 3bf7060fe..14b55a94f 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions.c @@ -83,7 +83,6 @@ Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; #define EPSILON_EXP ( -39 ) #define ONE_DIV_EPSILON_MANT 1953125000 /* 1e+12 = 0,9094947*(2^40) */ #define ONE_DIV_EPSILON_EXP ( 40 ) - #endif #define ADAPT_HTPROTO_ROT_LIM_1 0.8f @@ -5138,14 +5137,16 @@ static void eig2x2_fx( q_tmp2 = sub( 31, q_tmp2 ); tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); -#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC +#if !defined(FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC) || 1 tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp ); exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) ); normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2 q_tmp2 = sub( 31, exp ); #else - normVal_fx = ISqrt32( tmp3, &exp_tmp3 ); - q_tmp2 = sub( 31, exp_tmp3 ); + /* Note: This code part does not work yet, see pipeline issue for BASOP #1009 */ + /* although the same code works at other places: mantissa and q_format is fine */ + normVal_fx = ISqrt32( tmp3, &exp ); + q_tmp2 = sub( 31, exp ); #endif IF( LT_16( q_tmp1, q_c ) ) { @@ -5710,14 +5711,11 @@ static void chol2x2_fx( temp = BASOP_Util_Divide3232_Scale_cadence( temp, 4611686, &exp ); q_tmp = add( sub( 31, exp ), sub( q_tmp, 62 ) ); #else - Word32 my_temp; - Word16 my_q_tmp; - my_temp = temp; - my_q_tmp = q_tmp; - temp = BASOP_Util_Divide3232_Scale_cadence( temp, 4611686, &exp ); - q_tmp = add( sub( 31, exp ), sub( q_tmp, 62 ) ); - my_temp = Mpy_32_32( my_temp, ONE_DIV_EPSILON_MANT ); - my_q_tmp = add( my_q_tmp, ONE_DIV_EPSILON_EXP ); + Word16 norm = norm_l(temp); + temp = L_shl(temp, norm); + q_tmp = add(q_tmp, norm); + temp = Mpy_32_32( temp, ONE_DIV_EPSILON_MANT ); + q_tmp = sub( q_tmp, ONE_DIV_EPSILON_EXP ); #endif } ELSE @@ -6120,11 +6118,9 @@ static void formulate2x2MixingMatrix_fx( // 4611686 = Q62 IF( maxEne_fx == 0 ) { - // maxEneDiv_fx = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, 4611686, &exp ); // 4611686 = 1e-12f in Q62 - // q_maxEneDiv = add( sub( 31, exp ), sub( Q30, 62 ) ); maxEneDiv_fx = ONE_DIV_EPSILON_MANT; move32(); - q_maxEneDiv = ONE_DIV_EPSILON_EXP; + q_maxEneDiv = 31 - ONE_DIV_EPSILON_EXP; move16(); } ELSE -- GitLab From 3ccde0ad7308ab1d704b117fcdd3af0e74feafce Mon Sep 17 00:00:00 2001 From: Markus Multrus Date: Wed, 11 Dec 2024 14:14:41 +0100 Subject: [PATCH 07/14] disable WMOPS in options.h --- lib_com/options.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_com/options.h b/lib_com/options.h index b62ba1b83..a0e901e21 100755 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -54,7 +54,7 @@ #define SUPPORT_JBM_TRACEFILE /* Support for JBM tracefile, which is needed for 3GPP objective/subjective testing, but not relevant for real-world implementations */ -#define WMOPS /* Activate complexity and memory counters */ +/*#define WMOPS*/ /* Activate complexity and memory counters */ #ifdef WMOPS /*#define WMOPS_PER_FRAME*/ /* Output per-frame complexity (writes one float value per frame to the file "wmops_analysis") */ /*#define MEM_COUNT_DETAILS*/ /* Output detailed memory analysis for the worst-case frame (writes to the file "mem_analysis.csv") */ -- GitLab From 9597707aae3d5d35df73e707b174e5a63f1fe04c Mon Sep 17 00:00:00 2001 From: Markus Multrus Date: Wed, 11 Dec 2024 14:18:15 +0100 Subject: [PATCH 08/14] formatting --- lib_rend/ivas_dirac_dec_binaural_functions.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions.c b/lib_rend/ivas_dirac_dec_binaural_functions.c index 14b55a94f..10a9ba029 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions.c @@ -5137,7 +5137,7 @@ static void eig2x2_fx( q_tmp2 = sub( 31, q_tmp2 ); tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); -#if !defined(FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC) || 1 +#if !defined( FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC ) || 1 tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp ); exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) ); normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2 @@ -5711,9 +5711,9 @@ static void chol2x2_fx( temp = BASOP_Util_Divide3232_Scale_cadence( temp, 4611686, &exp ); q_tmp = add( sub( 31, exp ), sub( q_tmp, 62 ) ); #else - Word16 norm = norm_l(temp); - temp = L_shl(temp, norm); - q_tmp = add(q_tmp, norm); + Word16 norm = norm_l( temp ); + temp = L_shl( temp, norm ); + q_tmp = add( q_tmp, norm ); temp = Mpy_32_32( temp, ONE_DIV_EPSILON_MANT ); q_tmp = sub( q_tmp, ONE_DIV_EPSILON_EXP ); #endif -- GitLab From 327c77c902322bf064ff23da0d9c48da9ef2868f Mon Sep 17 00:00:00 2001 From: Markus Multrus Date: Wed, 11 Dec 2024 15:22:02 +0100 Subject: [PATCH 09/14] revert various whitespace changes --- lib_rend/ivas_dirac_dec_binaural_functions.c | 28 ++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/lib_rend/ivas_dirac_dec_binaural_functions.c b/lib_rend/ivas_dirac_dec_binaural_functions.c index 10a9ba029..cb8d772d7 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions.c @@ -1639,6 +1639,7 @@ static void ivas_dirac_dec_binaural_internal_fx( st_ivas->cldfbSynDec[ch]->Q_cldfb_state = Q11; move16(); } + return; } #endif @@ -2388,6 +2389,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric move32(); } } + /* Apply EQ at low bit rates */ IF( applyLowBitRateEQ != 0 ) { @@ -2404,6 +2406,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric move32(); } } + test(); test(); IF( ( EQ_32( ivas_format, SBA_FORMAT ) || EQ_32( ivas_format, SBA_ISM_FORMAT ) ) && EQ_16( nchan_transport, 2 ) ) @@ -2439,6 +2442,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric } } } + /* Determine target covariance matrix containing target binaural properties */ FOR( bin = 0; bin < nBins; bin++ ) { @@ -2534,6 +2538,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric move16(); gainCacheBaseIndex = add( 6, ismDirIndex ); } + diffuseness_fx = L_sub( diffuseness_fx, ratio_fx ); /* diffuseness = 1 - ratio1 - ratio2 */ if ( diffuseness_fx < 0 ) @@ -2578,7 +2583,9 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric altSpreadCoh_fx = sub( 32767, shl_sat( div_s( numr, denr ), sub( den_e, num_e ) ) ); // 4289 = pi/6 in Q13 spreadCoh_fx = s_max( spreadCoh_fx, altSpreadCoh_fx ); } + getDirectPartGains_fx( bin, aziDeg, eleDeg, &lRealp_fx, &lImagp_fx, &rRealp_fx, &rImagp_fx, hDiracDecBin->renderStereoOutputInsteadOfBinaural, Rmat_fx, &gainCache[gainCacheBaseIndex], isHeadtracked ); + Word16 q_lr = Q28; move16(); if ( hDiracDecBin->renderStereoOutputInsteadOfBinaural ) @@ -2588,6 +2595,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric spreadCoh_fx = 0; move32(); } + IF( spreadCoh_fx > 0 ) { Word32 centerMul_fx, sidesMul_fx; @@ -2640,6 +2648,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric /* Apply the gain for the right source of the three coherent sources. * -30 degrees to 330 wrapping due to internal functions. */ + getDirectPartGains_fx( bin, aziDeg + 330, eleDeg, &lRealpTmp_fx, &lImagpTmp_fx, &rRealpTmp_fx, &rImagpTmp_fx, hDiracDecBin->renderStereoOutputInsteadOfBinaural, Rmat_fx, &gainCache[gainCacheBaseIndex + 2], isHeadtracked ); hrtfEneSides_fx = L_add( hrtfEneSides_fx, @@ -2661,6 +2670,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric eneCorrectionFactor_fx = BASOP_Util_Divide3232_Scale( L_add( Mpy_32_32( hrtfEneSides_fx, Mpy_32_32( sidesMul_fx, sidesMul_fx ) ), Mpy_32_32( hrtfEneCenter_fx, Mpy_32_32( centerMul_fx, centerMul_fx ) ) ), L_max( 1, hrtfEneRealized_fx ), &eneCorrectionFactor_e ); + /* Weighting factors to determine appropriate target spectrum for spread coherent sound */ IF( LT_16( spreadCoh_fx, 16384 ) ) { @@ -2717,6 +2727,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric q_lr = Q23; move16(); } + hrtfEne_fx[0] = L_add( Mpy_32_32( lRealp_fx, lRealp_fx ), Mpy_32_32( lImagp_fx, lImagp_fx ) ); // Q( 2*q_lr - 31 ) hrtfEne_fx[1] = L_add( Mpy_32_32( rRealp_fx, rRealp_fx ), Mpy_32_32( rImagp_fx, rImagp_fx ) ); // Q( 2*q_lr - 31 ) move32(); @@ -2801,6 +2812,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric } move32(); } + /* Store parameters for formulating average diffuseness over frame */ Word32 frameMeanDiffuseness = BASOP_Util_Add_Mant32Exp( hDiracDecBin->frameMeanDiffuseness_fx[bin], 2 /*Q29*/, diffEneValForDecorrelationReduction_fx, sub( 31, q_diffEneValForDecorrelationReduction ), &exp1 ); // exp = exp1 frameMeanDiffusenessEneWeight_fx[bin] = L_add( frameMeanDiffusenessEneWeight_fx[bin], meanEnePerCh_fx ); @@ -2812,6 +2824,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric hDiracDecBin->frameMeanDiffuseness_fx[bin] = L_shl( frameMeanDiffuseness, sub( exp, 2 ) ); // Q29 move32(); } + test(); /* Temporal IIR-type smoothing of covariance matrices. Also apply encoding quality based smoothing factor. */ IF( EQ_32( ivas_format, MASA_FORMAT ) && LT_32( ivas_total_brate, MASA_STEREO_MIN_BITRATE ) ) @@ -2914,6 +2927,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric move16(); } } + return; } #endif @@ -3145,6 +3159,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices( } } } + return; } #else @@ -3289,6 +3304,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( } move32(); move16(); + formulate2x2MixingMatrix_fx( hDiracDecBin->ChEne_fx[0][bin], hDiracDecBin->ChEne_fx[1][bin], hDiracDecBin->q_ChEne, hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossIm_fx[bin], @@ -3698,6 +3714,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( move16(); minimum_s( q_processMtxDec, nBins, &hDiracDecBin->q_processMtxDec ); minimum_s( q_processMtxDecPrev, nBins, &hDiracDecBin->q_processMtxDecPrev ); + FOR( bin = 0; bin < nBins; bin++ ) { FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) @@ -3737,6 +3754,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( } } } + return; } #endif @@ -5069,6 +5087,7 @@ static void eig2x2_fx( move32(); *q_U = Q31; move16(); + return; } @@ -5085,6 +5104,7 @@ static void eig2x2_fx( move32(); *q_U = Q30; move16(); + return; } } @@ -5098,9 +5118,11 @@ static void eig2x2_fx( move32(); *q_U = Q30; move16(); + return; } } + q_U_1 = 0; q_U_2 = 0; move16(); @@ -5137,6 +5159,7 @@ static void eig2x2_fx( q_tmp2 = sub( 31, q_tmp2 ); tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); + #if !defined( FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC ) || 1 tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp ); exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) ); @@ -5148,6 +5171,7 @@ static void eig2x2_fx( normVal_fx = ISqrt32( tmp3, &exp ); q_tmp2 = sub( 31, exp ); #endif + IF( LT_16( q_tmp1, q_c ) ) { c_re = L_shr( c_re, sub( q_c, q_tmp1 ) ); @@ -5271,6 +5295,7 @@ static void eig2x2_fx( move16(); } } + IF( q_U_1 != 0 ) *q_U = q_U_1; ELSE @@ -6150,6 +6175,7 @@ static void formulate2x2MixingMatrix_fx( /* Cholesky decomposition of target / output covariance matrix */ chol2x2_fx( E_out1, E_out2, q_eout, Cout_re, Cout_im, q_cout, KyRe_fx, KyIm_fx, &q_ky ); + /* Eigendecomposition of input covariance matrix */ eig2x2_fx( E_in1, E_in2, q_ein, Cin_re, Cin_im, q_cin, Uxre_fx, Uxim_fx, &q_Ux, Sx_fx, &q_Sx ); @@ -6403,6 +6429,7 @@ static void formulate2x2MixingMatrix_fx( } matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Ure_fx, Uim_fx, &q_U, Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */ + /* These are the final formulas of the JAES publication M = Ky P Kx^(-1) */ #if ( BINAURAL_CHANNELS != 2 ) FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) @@ -6540,6 +6567,7 @@ static void formulate2x2MixingMatrix_fx( } matrixMul_fx( KyRe_fx, KyIm_fx, &q_ky, Pre_fx, Pim_fx, &q_P, tmpRe_fx, tmpIm_fx, &q_temp ); + matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Uxre_fx, Uxim_fx, &q_Ux, Mre_fx, Mim_fx, q_M ); return; -- GitLab From aea094454c48147449583e4b2f65643e4daa97e6 Mon Sep 17 00:00:00 2001 From: Markus Multrus Date: Thu, 12 Dec 2024 15:56:31 +0100 Subject: [PATCH 10/14] move function declaration of BASOP_Util_Divide3232_Scale_FhG to lib_com/basop_util.h, use FOR instead of for --- lib_com/basop_util.c | 4 ++-- lib_com/basop_util.h | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index d7fc7ec72..b34cc36d8 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -1038,7 +1038,6 @@ Word32 div_w( Word32 L_num, Word32 L_den ) } } -Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bits ); Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bits ) { Word32 z; @@ -1086,7 +1085,7 @@ Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bi z = L_sub( x, x ); // z = 0 - for ( iteration = (Word16) 0; iteration < (Word16) bits; iteration++ ) + FOR ( iteration = (Word16) 0; iteration < (Word16) bits; iteration++ ) { if ( L_add( x, y ) >= 0 ) { @@ -1100,6 +1099,7 @@ Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bi { z = L_negate( z ); } + return L_shl( z, sub( 31, bits ) ); } diff --git a/lib_com/basop_util.h b/lib_com/basop_util.h index 92994542e..6b68a5092 100644 --- a/lib_com/basop_util.h +++ b/lib_com/basop_util.h @@ -328,6 +328,12 @@ Word16 BASOP_Util_Divide3232_Scale( Word32 x, /*!< i : Numerator*/ Word32 y, /*!< i : Denominator*/ Word16 *s ); /*!< o : Additional scalefactor difference*/ + +Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, /*!< i : Numerator*/ + Word32 y, /*!< i : Denominator*/ + Word16 *s, /*!< o : Additional scalefactor difference*/ + Word16 bits ); /*!< i : number of mantissa bits of result*/ + Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, /*!< i : Numerator*/ Word32 y, /*!< i : Denominator*/ Word16 *s ); /*!< o : Additional scalefactor difference*/ -- GitLab From e7ca3356cf194c6aba5f1c20f55f296b6d0bf033 Mon Sep 17 00:00:00 2001 From: Markus Multrus Date: Thu, 12 Dec 2024 17:38:39 +0100 Subject: [PATCH 11/14] formatting --- lib_com/basop_util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index b34cc36d8..20564cacb 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -1085,7 +1085,7 @@ Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bi z = L_sub( x, x ); // z = 0 - FOR ( iteration = (Word16) 0; iteration < (Word16) bits; iteration++ ) + FOR( iteration = (Word16) 0; iteration < (Word16) bits; iteration++ ) { if ( L_add( x, y ) >= 0 ) { -- GitLab From e5b78387c52c3b3c9ca76f74c4ef8d85de45a06d Mon Sep 17 00:00:00 2001 From: Markus Multrus Date: Fri, 13 Dec 2024 14:38:05 +0100 Subject: [PATCH 12/14] address formal issues + issue for x == 0 && y == 0 in get_min_scalefactor() --- lib_com/fft_fx.c | 24 +++++++++++++++++++++--- lib_com/options.h | 1 + 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/lib_com/fft_fx.c b/lib_com/fft_fx.c index bd4a12415..67dd7b637 100644 --- a/lib_com/fft_fx.c +++ b/lib_com/fft_fx.c @@ -7262,7 +7262,7 @@ Word16 L_norm_arr( Word32 *arr, Word16 size ) Word16 q = 31; move16(); FOR( Word16 i = 0; i < size; i++ ) -#if 0 +#ifndef FIX_1009_OPT_L_NORM_ARR IF( arr[i] != 0 ) { q = s_min( q, norm_l( arr[i] ) ); @@ -7270,9 +7270,12 @@ Word16 L_norm_arr( Word32 *arr, Word16 size ) #else { Word16 q_tst; + q_tst = norm_l( arr[i] ); if ( arr[i] != 0 ) + { q = s_min( q, q_tst ); + } } #endif @@ -7281,7 +7284,7 @@ Word16 L_norm_arr( Word32 *arr, Word16 size ) Word16 get_min_scalefactor( Word32 x, Word32 y ) { -#if 0 +#ifndef FIX_1009_OPT_GETMINSCALEFAC Word16 scf = Q31; move16(); test(); @@ -7299,13 +7302,28 @@ Word16 get_min_scalefactor( Word32 x, Word32 y ) } return scf; #else - Word16 scf = Q31; Word16 scf_y; + Word16 scf = Q31; + move16(); + + test(); + if ( x == 0 && y == 0 ) + { + scf = 0; + move16(); + } + if ( x != 0 ) + { scf = norm_l( x ); + } + scf_y = norm_l( y ); if ( y != 0 ) + { scf = s_min( scf_y, scf ); + } + return scf; #endif } diff --git a/lib_com/options.h b/lib_com/options.h index f5898306e..104e51ce0 100755 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -89,6 +89,7 @@ #define FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC /* FhG: Reduce workload of binaural rendering: replace 1./tmp & sqrt by Isqrt32 */ #define FIX_1009_OPT_PARAMMC_RENDER /* FhG: Optimize ivas_param_mc_dec_render_fx() */ +#define FIX_1009_OPT_GETMINSCALEFAC /* FhG: Optimize get_min_scalefactor(), avoid IF */ /* Replace computations with constants by setting of constants */ /* Simplify matrix multiplications and some external helper routines */ -- GitLab From c8f737edd3798e14645393d9daa6407f5ba2ed4f Mon Sep 17 00:00:00 2001 From: Markus Multrus Date: Fri, 13 Dec 2024 21:58:46 +0100 Subject: [PATCH 13/14] fix typo in variable name --- lib_dec/ivas_mc_param_dec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_dec/ivas_mc_param_dec.c b/lib_dec/ivas_mc_param_dec.c index 9768820c9..e3d8bf0c0 100644 --- a/lib_dec/ivas_mc_param_dec.c +++ b/lib_dec/ivas_mc_param_dec.c @@ -1997,7 +1997,7 @@ void ivas_param_mc_dec_render_fx( { if ( is_zero != 0 ) { - hParamMC->h_output_synthesis_cov_state.mixing_matrix_res_exp[j] = 0; + hParamMC->h_output_synthesis_cov_state.mixing_matrix_exp[j] = 0; move16(); } } -- GitLab From 9e0eb11fdc2bdb6d8c3f6d2c38f52b37fc82c02c Mon Sep 17 00:00:00 2001 From: Markus Multrus Date: Sun, 15 Dec 2024 19:10:34 +0100 Subject: [PATCH 14/14] BASOP_Util_Divide3232_Scale_FhG(): replace DEPR_L_add_c() by L_add_co(); currently inactive, since no counting in L_add_co() --- lib_com/basop_util.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index 20564cacb..04f0dc770 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -1038,6 +1038,8 @@ Word32 div_w( Word32 L_num, Word32 L_den ) } } +// replace depreacted L_add_c() by L_add_co(); currently disabled, because of missing counting in L_add_co(); +//#define REPLACE_DEPR_L_ADD_C Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bits ) { Word32 z; @@ -1046,9 +1048,15 @@ Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bi Word32 sign; Word16 iteration; Flag Carry; +#ifdef REPLACE_DEPR_L_ADD_C + Flag Overflow; +#endif Word16 s_val; unset_carry( &Carry ); +#ifdef REPLACE_DEPR_L_ADD_C + unset_overflow( &Overflow ); +#endif /* assert (x >= (Word32)0); */ assert( y != (Word32) 0 ); @@ -1089,9 +1097,17 @@ Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bi { if ( L_add( x, y ) >= 0 ) { +#ifdef REPLACE_DEPR_L_ADD_C + x = L_add_co( x, y, &Carry, &Overflow ); // sets always carry=1 +#else x = DEPR_L_add_c( x, y, &Carry ); // sets always carry=1 +#endif } +#ifdef REPLACE_DEPR_L_ADD_C + z = L_add_co( z, z, &Carry, &Overflow ); // sets always carry=0 +#else z = DEPR_L_add_c( z, z, &Carry ); // sets always carry=0 +#endif x = L_add( x, x ); } -- GitLab