diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index c465428fc90125779c4ab2505bc28a0684356467..04f0dc770ece4ddf23487cc9e88d6f879ce34c15 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -1038,8 +1038,93 @@ Word32 div_w( Word32 L_num, Word32 L_den ) } } +// replace depreacted L_add_c() by L_add_co(); currently disabled, because of missing counting in L_add_co(); +//#define REPLACE_DEPR_L_ADD_C +Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bits ) +{ + Word32 z; + Word16 sx; + Word16 sy; + Word32 sign; + Word16 iteration; + Flag Carry; +#ifdef REPLACE_DEPR_L_ADD_C + Flag Overflow; +#endif + Word16 s_val; + + unset_carry( &Carry ); +#ifdef REPLACE_DEPR_L_ADD_C + unset_overflow( &Overflow ); +#endif + + /* assert (x >= (Word32)0); */ + assert( y != (Word32) 0 ); + + IF( x == (Word32) 0 ) + { + *s = -31; + move16(); + return ( (Word32) 0 ); + } + + sign = L_shr( L_xor( x, y ), 31 ); + + sx = norm_l( x ); + x = L_shl( x, sx ); + x = L_shr( x, 1 ); + s_val = sub( 1, sx ); + if ( x < 0 ) + { + x = L_negate( x ); + } + + sy = norm_l( y ); + y = L_shl( y, sy ); + y = L_shr( y, 1 ); + s_val = add( s_val, sy ); + if ( y >= 0 ) + { + y = L_negate( y ); + } + + *s = s_val; + move16(); + + z = L_sub( x, x ); // z = 0 + + FOR( iteration = (Word16) 0; iteration < (Word16) bits; iteration++ ) + { + if ( L_add( x, y ) >= 0 ) + { +#ifdef REPLACE_DEPR_L_ADD_C + x = L_add_co( x, y, &Carry, &Overflow ); // sets always carry=1 +#else + x = DEPR_L_add_c( x, y, &Carry ); // sets always carry=1 +#endif + } +#ifdef REPLACE_DEPR_L_ADD_C + z = L_add_co( z, z, &Carry, &Overflow ); // sets always carry=0 +#else + z = DEPR_L_add_c( z, z, &Carry ); // sets always carry=0 +#endif + x = L_add( x, x ); + } + + if ( sign != 0 ) + { + z = L_negate( z ); + } + + return L_shl( z, sub( 31, bits ) ); +} + + Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) { +#if 1 + return BASOP_Util_Divide3232_Scale_FhG( x, y, s, 24 ); +#else Word32 z; Word16 sx; Word16 sy; @@ -1088,6 +1173,7 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) } return z; +#endif } Word16 BASOP_Util_Divide3232_Scale( Word32 x, Word32 y, Word16 *s ) @@ -1556,7 +1642,7 @@ Word16 findIndexOfMinWord32( Word32 *x, const Word16 len ) Word16 imult1616( Word16 x, Word16 y ) { assert( (int) x * (int) y < 32768 && (int) x * (int) y >= -32768 ); - return extract_l( L_mult0( x, y ) ); + return i_mult( x, y ); } Word32 imult3216( Word32 x, Word16 y ) diff --git a/lib_com/basop_util.h b/lib_com/basop_util.h index 92994542e564c3d6856f667be8e387e797c4071b..6b68a509278bd8de72cb8108902d4300a3a13f0e 100644 --- a/lib_com/basop_util.h +++ b/lib_com/basop_util.h @@ -328,6 +328,12 @@ Word16 BASOP_Util_Divide3232_Scale( Word32 x, /*!< i : Numerator*/ Word32 y, /*!< i : Denominator*/ Word16 *s ); /*!< o : Additional scalefactor difference*/ + +Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, /*!< i : Numerator*/ + Word32 y, /*!< i : Denominator*/ + Word16 *s, /*!< o : Additional scalefactor difference*/ + Word16 bits ); /*!< i : number of mantissa bits of result*/ + Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, /*!< i : Numerator*/ Word32 y, /*!< i : Denominator*/ Word16 *s ); /*!< o : Additional scalefactor difference*/ diff --git a/lib_com/fft_fx.c b/lib_com/fft_fx.c index 64a4c8be2d23ba1d0ceb7aeebd581bcf0e542314..67dd7b6373d3d004361fbd0110dd8b369f514cb2 100644 --- a/lib_com/fft_fx.c +++ b/lib_com/fft_fx.c @@ -7262,15 +7262,29 @@ Word16 L_norm_arr( Word32 *arr, Word16 size ) Word16 q = 31; move16(); FOR( Word16 i = 0; i < size; i++ ) +#ifndef FIX_1009_OPT_L_NORM_ARR IF( arr[i] != 0 ) { q = s_min( q, norm_l( arr[i] ) ); } +#else + { + Word16 q_tst; + + q_tst = norm_l( arr[i] ); + if ( arr[i] != 0 ) + { + q = s_min( q, q_tst ); + } + } + +#endif return q; } Word16 get_min_scalefactor( Word32 x, Word32 y ) { +#ifndef FIX_1009_OPT_GETMINSCALEFAC Word16 scf = Q31; move16(); test(); @@ -7287,6 +7301,31 @@ Word16 get_min_scalefactor( Word32 x, Word32 y ) scf = s_min( scf, norm_l( y ) ); } return scf; +#else + Word16 scf_y; + Word16 scf = Q31; + move16(); + + test(); + if ( x == 0 && y == 0 ) + { + scf = 0; + move16(); + } + + if ( x != 0 ) + { + scf = norm_l( x ); + } + + scf_y = norm_l( y ); + if ( y != 0 ) + { + scf = s_min( scf_y, scf ); + } + + return scf; +#endif } Flag is_zero_arr( Word32 *arr, Word16 size ) diff --git a/lib_com/ivas_tools.c b/lib_com/ivas_tools.c index 943d0062051932db768e7e1f5c60d8806a0e9cd0..b7b136f389ec282d203a5200ab2106cd55272a8a 100644 --- a/lib_com/ivas_tools.c +++ b/lib_com/ivas_tools.c @@ -447,6 +447,23 @@ void v_add_inc_fx( ) { Word16 i; + + /* The use of this function is currently always for the interleaved input format, */ + /* that means, the following conditions are always true and thus obsolete. */ + test(); + test(); + test(); + test(); + IF( ( sub( x_inc, 2 ) == 0 ) && ( sub( x2_inc, 2 ) == 0 ) && ( sub( y_inc, 1 ) == 0 ) && ( &x1[1] == &x2[0] ) ) + { + /* Interleaved input case, linear output */ + FOR( i = 0; i < N; i++ ) + { + y[i] = L_add( x1[2 * i + 0], x1[2 * i + 1] ); /*Qx*/ + move32(); + } + return; + } Word16 ix1 = 0; Word16 ix2 = 0; Word16 iy = 0; diff --git a/lib_com/options.h b/lib_com/options.h old mode 100644 new mode 100755 index cacefdab4fdb59427afcd30a35fccb3cd3bb7165..104e51ce0d799e4956c69bcd852d445dfa9d68cd --- a/lib_com/options.h +++ b/lib_com/options.h @@ -86,4 +86,12 @@ #define FIX_1054_IF_ELSE_CMPLX /* VA: Fix 1054 incorrect counting of complexity when ELSE-IF sequence is encoutered in two functions */ #define FIX_1052_COPY_CMPLX_DISCREPANCY /* VA: modify IF-ELSE statements used in Copy*() functions to avoid dependency on x[] and y[] in RAM */ #define FIX_1049_SHR_RO_COMPLEXITY /* VA: fix for issue 1049: incorrect counting of complexity in the shr_ro() function */ + +#define FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC /* FhG: Reduce workload of binaural rendering: replace 1./tmp & sqrt by Isqrt32 */ +#define FIX_1009_OPT_PARAMMC_RENDER /* FhG: Optimize ivas_param_mc_dec_render_fx() */ +#define FIX_1009_OPT_GETMINSCALEFAC /* FhG: Optimize get_min_scalefactor(), avoid IF */ + /* Replace computations with constants by setting of constants */ + /* Simplify matrix multiplications and some external helper routines */ + + #endif diff --git a/lib_com/tools.c b/lib_com/tools.c index b358196587026ed5921906b0fbacee2467d69ee6..1b8cfd7251a2f57eb096d0069f2b4746864e08d0 100644 --- a/lib_com/tools.c +++ b/lib_com/tools.c @@ -879,30 +879,23 @@ Word16 minimum_s( Word16 *min_val /* o : minimum value in the input vector */ ) { - Word16 i, ind, tmp; - + Word16 i, ind; ind = 0; move16(); - tmp = vec[0]; - move16(); FOR( i = 1; i < lvec; i++ ) { - IF( LT_16( vec[i], tmp ) ) + if ( LT_16( vec[i], vec[ind] ) ) { - ind = i; - move16(); - tmp = vec[i]; - move16(); + ind = add( i, 0 ); } } if ( min_val != NULL ) { - *min_val = tmp; + *min_val = vec[ind]; move16(); } - return ind; } diff --git a/lib_com/tools_fx.c b/lib_com/tools_fx.c index b6cb03469dc17872997098a09c96e659146892cd..7fc742fb5692965d9b52ede048a2a7e472aa367f 100644 --- a/lib_com/tools_fx.c +++ b/lib_com/tools_fx.c @@ -692,25 +692,13 @@ void set32_fx( const Word16 N /* i : Lenght of the vector */ ) { - Word16 i, tmp; - tmp = extract_l( a ); - IF( EQ_32( L_deposit_l( tmp ), a ) ) - { - FOR( i = 0; i < N; i++ ) - { - y[i] = L_deposit_l( tmp ); - move32(); - } - } - ELSE + Word16 i; + + FOR( i = 0; i < N; i++ ) { - FOR( i = 0; i < N; i++ ) - { - y[i] = a; - move32(); - } + y[i] = a; + move32(); } - return; } /*-------------------------------------------------------------------* diff --git a/lib_dec/ivas_mc_param_dec.c b/lib_dec/ivas_mc_param_dec.c index fc158ca89d75cd413ffdeb4b76b0b6efe2f31fa2..e3d8bf0c034ff19afd08dd299334be9c128ad9f5 100644 --- a/lib_dec/ivas_mc_param_dec.c +++ b/lib_dec/ivas_mc_param_dec.c @@ -1968,10 +1968,13 @@ void ivas_param_mc_dec_render_fx( slot_idx_start_cldfb_synth = 0; move16(); +#ifndef FIX_1009_OPT_PARAMMC_RENDER Flag is_zero = 1; move32(); +#endif FOR( j = 0; j < st_ivas->hParamMC->hMetadataPMC->nbands_coded; j++ ) { +#ifndef FIX_1009_OPT_PARAMMC_RENDER is_zero = 1; move16(); FOR( i = 0; i < hParamMC->h_output_synthesis_cov_state.mixing_matrix_len; i++ ) @@ -1989,8 +1992,19 @@ void ivas_param_mc_dec_render_fx( } is_zero = 1; move16(); +#else + Flag is_zero = is_zero_arr( hParamMC->h_output_synthesis_cov_state.mixing_matrix_fx[j], hParamMC->h_output_synthesis_cov_state.mixing_matrix_len ); + { + if ( is_zero != 0 ) + { + hParamMC->h_output_synthesis_cov_state.mixing_matrix_exp[j] = 0; + move16(); + } + } +#endif IF( LT_16( st_ivas->hParamMC->band_grouping[j], st_ivas->hParamMC->h_output_synthesis_params.max_band_decorr ) ) { +#ifndef FIX_1009_OPT_PARAMMC_RENDER FOR( i = 0; i < hParamMC->h_output_synthesis_cov_state.mixing_matrix_res_len; i++ ) { IF( NE_32( hParamMC->h_output_synthesis_cov_state.mixing_matrix_res_fx[j][i], 0 ) ) @@ -2000,6 +2014,10 @@ void ivas_param_mc_dec_render_fx( } } IF( is_zero ) +#else + is_zero = is_zero_arr( hParamMC->h_output_synthesis_cov_state.mixing_matrix_res_fx[j], hParamMC->h_output_synthesis_cov_state.mixing_matrix_res_len ); + if ( is_zero != 0 ) +#endif { hParamMC->h_output_synthesis_cov_state.mixing_matrix_res_exp[j] = 0; move16(); diff --git a/lib_dec/ivas_mct_dec_mct_fx.c b/lib_dec/ivas_mct_dec_mct_fx.c index 1f8954e67becaab0ac7ab24194d00a4852b8b982..dc681d564cf8bfc404c810482287cfb8f936e3c7 100644 --- a/lib_dec/ivas_mct_dec_mct_fx.c +++ b/lib_dec/ivas_mct_dec_mct_fx.c @@ -315,6 +315,7 @@ void mctStereoIGF_dec_fx( test(); IF( NE_16( hMCT->hBlockData[b]->hStereoMdct->IGFStereoMode[k], SMDCT_DUAL_MONO ) || NE_16( hMCT->hBlockData[b]->hStereoMdct->mdct_stereo_mode[k], SMDCT_DUAL_MONO ) ) { +#if 0 tmp = BASOP_Util_Divide1616_Scale( sts[0]->hTcxCfg->tcx_coded_lines, nSubframes, &tmp_e ); L_spec[0] = shr( tmp, add( 15, negate( tmp_e ) ) ); move16(); @@ -324,6 +325,15 @@ void mctStereoIGF_dec_fx( tmp = BASOP_Util_Divide1616_Scale( sts[0]->hTcxDec->L_frameTCX, nSubframes, &tmp_e ); L_frameTCX_nSubframe = shr( tmp, add( 15, negate( tmp_e ) ) ); +#else + assert( nSubframes == 1 || nSubframes == 2 ); + /* Note: nSubframes is in limited range [1, 2] for this function */ + Word16 shr_div = sub( nSubframes, 1 ); /* 2 -> 1, 1 -> 0 */ + L_spec[0] = shr( sts[0]->hTcxCfg->tcx_coded_lines, shr_div ); + move16(); + L_frame_nSubframe = shr( sts[0]->L_frame, shr_div ); + L_frameTCX_nSubframe = shr( sts[0]->hTcxDec->L_frameTCX, shr_div ); +#endif init_tcx_info_fx( sts[0], L_frame_nSubframe, L_frameTCX_nSubframe, k, bfi, &tcx_offset[0], &tcx_offsetFB[0], &L_frame[0], &L_frameTCX[0], &left_rect[0], &L_spec[0] ); @@ -333,14 +343,16 @@ void mctStereoIGF_dec_fx( decoder_tcx_IGF_stereo_fx( sts, hMCT->hBlockData[b]->hStereoMdct, hMCT->hBlockData[b]->mask, p_x, p_x_e, p_x_len, L_frame[0], left_rect[0], k, bfi, 1 /* MCT_flag */ ); // Shifting output with variable exponent back to Q12 + Word16 shr_k = sub( 31 - Q12, p_x_e[0][k] ); FOR( Word16 i = 0; i < p_x_len[0][k]; i++ ) { - p_x[0][k][i] = L_shr( p_x[0][k][i], sub( 31 - Q12, p_x_e[0][k] ) ); + p_x[0][k][i] = L_shr( p_x[0][k][i], shr_k ); move32(); } + shr_k = sub( 31 - Q12, p_x_e[1][k] ); FOR( Word16 i = 0; i < p_x_len[1][k]; i++ ) { - p_x[1][k][i] = L_shr( p_x[1][k][i], sub( 31 - Q12, p_x_e[1][k] ) ); + p_x[1][k][i] = L_shr( p_x[1][k][i], shr_k ); move32(); } } diff --git a/lib_rend/ivas_dirac_dec_binaural_functions.c b/lib_rend/ivas_dirac_dec_binaural_functions.c index 6ead07ee06ed9fe0d692a1bb9903b8eec26a8786..f54f01e0deac910a37cb700b20f5776121d47bbf 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions.c @@ -72,8 +72,10 @@ Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; #define LOG_10_BASE_2_Q29 1783446528 // Q29 #define TAN_30_FX 17157 // Q15 #define INV_TAN30_FX 28377 // Q14 -#define EPSILON_MANT 1180591621 /* 1e-12 in Q70 */ +#define EPSILON_MANT 1180591621 /* 1e-12 = 0.5497558*(2^-39) in Q70 */ #define EPSILON_EXP ( -39 ) +#define ONE_DIV_EPSILON_MANT 1953125000 /* 1e+12 = 0.9094947*(2^40) */ +#define ONE_DIV_EPSILON_EXP ( 40 ) #define ADAPT_HTPROTO_ROT_LIM_1 0.8f #define MAX_GAIN_CACHE_SIZE ( ( MASA_MAXIMUM_DIRECTIONS * 3 ) + MAX_NUM_OBJECTS ) /* == different calls to get gains */ @@ -485,10 +487,12 @@ void ivas_dirac_dec_binaural_render_fx( } output_length = 0; + move16(); FOR( subframe_idx = first_sf; subframe_idx < last_sf; subframe_idx++ ) { Word16 n_samples_sf = imult1616( slot_size, hSpatParamRendCom->subframe_nbslots[subframe_idx] ); + ivas_dirac_dec_binaural_internal_fx( st_ivas, st_ivas->hCombinedOrientationData, output_fx_local, nchan_transport, subframe_idx ); FOR( ch = 0; ch < nchan_out; ch++ ) @@ -1767,6 +1771,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( Word32 ivas_total_brate; Word16 nchan_transport; Word16 exp; + Word16 q_processMtx[CLDFB_NO_CHANNELS_MAX], q_processMtxPrev[CLDFB_NO_CHANNELS_MAX]; Word16 q_processMtx_SCCR[CLDFB_NO_CHANNELS_MAX], q_processMtxPrev_SCCR[CLDFB_NO_CHANNELS_MAX]; Word16 q_processMtxDec[CLDFB_NO_CHANNELS_MAX], q_processMtxDecPrev[CLDFB_NO_CHANNELS_MAX]; @@ -3267,6 +3272,7 @@ static void eig2x2_fx( IF( GT_32( L_abs( tmp2 ), L_abs( tmp1 ) ) ) { + s_fx = tmp2; move32(); exp = sub( norm_l( s_fx ), 1 ); @@ -3278,10 +3284,17 @@ static void eig2x2_fx( tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); +#if !defined( FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC ) || 1 tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp ); exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) ); normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2 q_tmp2 = sub( 31, exp ); +#else + /* Note: This code part does not work yet, see pipeline issue for BASOP #1009 */ + /* although the same code works at other places: mantissa and q_format is fine */ + normVal_fx = ISqrt32( tmp3, &exp ); + q_tmp2 = sub( 31, exp ); +#endif IF( LT_16( q_tmp1, q_c ) ) { @@ -3346,12 +3359,15 @@ static void eig2x2_fx( q_tmp2 = sub( 31, q_tmp2 ); tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); - +#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp ); exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) ); normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2 q_tmp2 = sub( 31, exp ); - +#else + normVal_fx = ISqrt32( tmp3, &exp_tmp3 ); + q_tmp2 = sub( 31, exp_tmp3 ); +#endif IF( LT_16( q_tmp1, q_c ) ) { c_re = L_shr( c_re, sub( q_c, q_tmp1 ) ); @@ -3463,9 +3479,6 @@ static void matrixMul_fx( Word16 chA, chB; Word16 min_q_shift1, min_q_shift2; Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ); -#ifndef IVAS_ENH64_CADENCE_CHANGES - Word32 tmp1, tmp2; -#endif min_q_shift1 = sub( s_min( L_norm_arr( Are_fx[0], size ), L_norm_arr( Aim_fx[0], size ) ), 1 ); min_q_shift2 = sub( s_min( L_norm_arr( Bre_fx[0], size ), L_norm_arr( Bim_fx[0], size ) ), 1 ); @@ -3494,109 +3507,22 @@ static void matrixMul_fx( outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], W_extract_h( W_mac_32_32( W_mult_32_32( Are_fx[chA][0], Bim_fx[0][chB] ), Are_fx[chA][1], Bim_fx[1][chB] ) ) ); move32(); #else - test(); - test(); - test(); - IF( ( Are_fx[chA][0] >= 0 && Bre_fx[0][chB] >= 0 ) || ( Are_fx[chA][0] < 0 && Bre_fx[0][chB] < 0 ) ) - { - tmp1 = Mpy_32_32( Are_fx[chA][0], Bre_fx[0][chB] ); - } - ELSE - { - tmp1 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][0] ), L_abs( Bre_fx[0][chB] ) ) ); - } - test(); - test(); - test(); - IF( ( Are_fx[chA][1] >= 0 && Bre_fx[1][chB] >= 0 ) || ( Are_fx[chA][1] < 0 && Bre_fx[1][chB] < 0 ) ) - { - tmp2 = Mpy_32_32( Are_fx[chA][1], Bre_fx[1][chB] ); - } - ELSE - { - tmp2 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][1] ), L_abs( Bre_fx[1][chB] ) ) ); - } - outRe_fx[chA][chB] = L_add( tmp1, tmp2 ); + outRe_fx[chA][chB] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[chA][0], Bre_fx[0][chB] ), + Are_fx[chA][1], Bre_fx[1][chB] ), + Aim_fx[chA][0], Bim_fx[0][chB] ), + Aim_fx[chA][1], Bim_fx[1][chB] ); move32(); - - test(); - test(); - test(); - IF( ( Aim_fx[chA][0] >= 0 && Bim_fx[0][chB] >= 0 ) || ( Aim_fx[chA][0] < 0 && Bim_fx[0][chB] < 0 ) ) - { - tmp1 = Mpy_32_32( Aim_fx[chA][0], Bim_fx[0][chB] ); - } - ELSE - { - tmp1 = L_negate( Mpy_32_32( L_abs( Aim_fx[chA][0] ), L_abs( Bim_fx[0][chB] ) ) ); - } - test(); - test(); - test(); - IF( ( Aim_fx[chA][1] >= 0 && Bim_fx[1][chB] >= 0 ) || ( Aim_fx[chA][1] < 0 && Bim_fx[1][chB] < 0 ) ) - { - tmp2 = Mpy_32_32( Aim_fx[chA][1], Bim_fx[1][chB] ); - } - ELSE - { - tmp2 = L_negate( Mpy_32_32( L_abs( Aim_fx[chA][1] ), L_abs( Bim_fx[1][chB] ) ) ); - } - outRe_fx[chA][chB] = L_sub( outRe_fx[chA][chB], L_add( tmp1, tmp2 ) ); - move32(); - test(); - test(); - test(); - IF( ( Aim_fx[chA][0] >= 0 && Bre_fx[0][chB] >= 0 ) || ( Aim_fx[chA][0] < 0 && Bre_fx[0][chB] < 0 ) ) - { - tmp1 = Mpy_32_32( Aim_fx[chA][0], Bre_fx[0][chB] ); - } - ELSE - { - tmp1 = L_negate( Mpy_32_32( L_abs( Aim_fx[chA][0] ), L_abs( Bre_fx[0][chB] ) ) ); - } - test(); - test(); - test(); - IF( ( Aim_fx[chA][1] >= 0 && Bre_fx[1][chB] >= 0 ) || ( Aim_fx[chA][1] < 0 && Bre_fx[1][chB] < 0 ) ) - { - tmp2 = Mpy_32_32( Aim_fx[chA][1], Bre_fx[1][chB] ); - } - ELSE - { - tmp2 = L_negate( Mpy_32_32( L_abs( Aim_fx[chA][1] ), L_abs( Bre_fx[1][chB] ) ) ); - } - outIm_fx[chA][chB] = L_add( tmp1, tmp2 ); - move32(); - - test(); - test(); - test(); - IF( ( Are_fx[chA][0] >= 0 && Bim_fx[0][chB] >= 0 ) || ( Are_fx[chA][0] < 0 && Bim_fx[0][chB] < 0 ) ) - { - tmp1 = Mpy_32_32( Are_fx[chA][0], Bim_fx[0][chB] ); - } - ELSE - { - tmp1 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][0] ), L_abs( Bim_fx[0][chB] ) ) ); - } - test(); - test(); - test(); - IF( ( Are_fx[chA][1] >= 0 && Bim_fx[1][chB] >= 0 ) || ( Are_fx[chA][1] < 0 && Bim_fx[1][chB] < 0 ) ) - { - tmp2 = Mpy_32_32( Are_fx[chA][1], Bim_fx[1][chB] ); - } - ELSE - { - tmp2 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][1] ), L_abs( Bim_fx[1][chB] ) ) ); - } - outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], L_add( tmp1, tmp2 ) ); + outIm_fx[chA][chB] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Aim_fx[chA][0], Bre_fx[0][chB] ), + Aim_fx[chA][1], Bre_fx[1][chB] ), + Are_fx[chA][0], Bim_fx[0][chB] ), + Are_fx[chA][1], Bim_fx[1][chB] ); move32(); #endif /* #ifdef IVAS_ENH64_CADENCE_CHANGES */ } } *q_out = sub( add( *q_A, *q_B ), 31 ); + move16(); if ( L_and( is_zero_arr( outRe_fx[0], size ), is_zero_arr( outIm_fx[0], size ) ) ) { @@ -3620,77 +3546,20 @@ static void matrixTransp1Mul_fx( { Word16 chA, chB; Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ); - Word32 tmp1, tmp2; FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) { - test(); - test(); - test(); - IF( ( ( ( Are_fx[0][chA] >= 0 ) && ( Bre_fx[0][chB] >= 0 ) ) || ( ( Are_fx[0][chA] < 0 ) && ( Bre_fx[0][chB] < 0 ) ) ) ) - tmp1 = Mpy_32_32( Are_fx[0][chA], Bre_fx[0][chB] ); - ELSE - tmp1 = L_negate( Mpy_32_32( L_abs( Are_fx[0][chA] ), L_abs( Bre_fx[0][chB] ) ) ); - test(); - test(); - test(); - IF( ( ( ( Are_fx[1][chA] >= 0 ) && ( Bre_fx[1][chB] >= 0 ) ) || ( ( Are_fx[1][chA] < 0 ) && ( Bre_fx[1][chB] < 0 ) ) ) ) - tmp2 = Mpy_32_32( Are_fx[1][chA], Bre_fx[1][chB] ); - ELSE - tmp2 = L_negate( Mpy_32_32( L_abs( Are_fx[1][chA] ), L_abs( Bre_fx[1][chB] ) ) ); - outRe_fx[chA][chB] = L_add( tmp1, tmp2 ); - move32(); - test(); - test(); - test(); - IF( ( ( ( L_negate( Aim_fx[0][chA] ) >= 0 ) && ( Bim_fx[0][chB] >= 0 ) ) || ( ( L_negate( Aim_fx[0][chA] ) < 0 ) && ( Bim_fx[0][chB] < 0 ) ) ) ) - tmp1 = Mpy_32_32( -Aim_fx[0][chA], Bim_fx[0][chB] ); - ELSE - tmp1 = L_negate( Mpy_32_32( L_abs( -Aim_fx[0][chA] ), L_abs( Bim_fx[0][chB] ) ) ); - test(); - test(); - test(); - IF( ( ( ( Aim_fx[1][chA] >= 0 ) && ( Bim_fx[1][chB] >= 0 ) ) || ( ( Aim_fx[1][chA] < 0 ) && ( Bim_fx[1][chB] < 0 ) ) ) ) - tmp2 = Mpy_32_32( Aim_fx[1][chA], Bim_fx[1][chB] ); - ELSE - tmp2 = L_negate( Mpy_32_32( L_abs( Aim_fx[1][chA] ), L_abs( Bim_fx[1][chB] ) ) ); - outRe_fx[chA][chB] = L_sub( outRe_fx[chA][chB], L_sub( tmp1, tmp2 ) ); + outRe_fx[chA][chB] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][chA], Bre_fx[0][chB] ), + Are_fx[1][chA], Bre_fx[1][chB] ), + Aim_fx[0][chA], Bim_fx[0][chB] ), + Aim_fx[1][chA], Bim_fx[1][chB] ); move32(); - - test(); - test(); - test(); - IF( ( ( ( L_negate( Aim_fx[0][chA] ) >= 0 ) && ( Bre_fx[0][chB] >= 0 ) ) || ( ( L_negate( Aim_fx[0][chA] ) < 0 ) && ( Bre_fx[0][chB] < 0 ) ) ) ) - tmp1 = Mpy_32_32( -Aim_fx[0][chA], Bre_fx[0][chB] ); - ELSE - tmp1 = L_negate( Mpy_32_32( L_abs( -Aim_fx[0][chA] ), L_abs( Bre_fx[0][chB] ) ) ); - test(); - test(); - test(); - IF( ( ( ( Aim_fx[1][chA] >= 0 ) && ( Bre_fx[1][chB] >= 0 ) ) || ( ( Aim_fx[1][chA] < 0 ) && ( Bre_fx[1][chB] < 0 ) ) ) ) - tmp2 = Mpy_32_32( Aim_fx[1][chA], Bre_fx[1][chB] ); - ELSE - tmp2 = L_negate( Mpy_32_32( L_abs( Aim_fx[1][chA] ), L_abs( Bre_fx[1][chB] ) ) ); - outIm_fx[chA][chB] = L_sub( tmp1, tmp2 ); - move32(); - - test(); - test(); - test(); - IF( ( ( ( Are_fx[0][chA] >= 0 ) && ( Bim_fx[0][chB] >= 0 ) ) || ( ( Are_fx[0][chA] < 0 ) && ( Bim_fx[0][chB] < 0 ) ) ) ) - tmp1 = Mpy_32_32( Are_fx[0][chA], Bim_fx[0][chB] ); - ELSE - tmp1 = L_negate( Mpy_32_32( L_abs( Are_fx[0][chA] ), L_abs( Bim_fx[0][chB] ) ) ); - test(); - test(); - test(); - IF( ( ( ( Are_fx[1][chA] >= 0 ) && ( Bim_fx[1][chB] >= 0 ) ) || ( ( Are_fx[1][chA] < 0 ) && ( Bim_fx[1][chB] < 0 ) ) ) ) - tmp2 = Mpy_32_32( Are_fx[1][chA], Bim_fx[1][chB] ); - ELSE - tmp2 = L_negate( Mpy_32_32( L_abs( Are_fx[1][chA] ), L_abs( Bim_fx[1][chB] ) ) ); - outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], L_add( tmp1, tmp2 ) ); + outIm_fx[chA][chB] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][chA], Bim_fx[0][chB] ), + Are_fx[1][chA], Bim_fx[1][chB] ), + Aim_fx[0][chA], Bre_fx[0][chB] ), + Aim_fx[1][chA], Bre_fx[1][chB] ); move32(); } } @@ -3720,9 +3589,6 @@ static void matrixTransp2Mul_fx( Word16 chA, chB; Word16 min_q_shift; Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ); -#ifndef IVAS_ENH64_CADENCE_CHANGES - Word32 tmp1, tmp2; -#endif min_q_shift = sub( s_min( L_norm_arr( Are_fx[0], size ), L_norm_arr( Aim_fx[0], size ) ), 1 ); scale_sig32( Are_fx[0], size, min_q_shift ); @@ -3749,72 +3615,15 @@ static void matrixTransp2Mul_fx( outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], W_extract_h( W_mac_32_32( W_mult_32_32( Are_fx[chA][0], L_negate( Bim_fx[chB][0] ) ), Are_fx[chA][1], L_negate( Bim_fx[chB][1] ) ) ) ); move32(); #else - test(); - test(); - test(); - IF( ( Are_fx[chA][0] >= 0 && Bre_fx[chB][0] >= 0 ) || ( Are_fx[chA][0] < 0 && Bre_fx[chB][0] < 0 ) ) - tmp1 = Mpy_32_32( Are_fx[chA][0], Bre_fx[chB][0] ); - ELSE - tmp1 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][0] ), L_abs( Bre_fx[chB][0] ) ) ); - test(); - test(); - test(); - IF( ( Are_fx[chA][1] >= 0 && Bre_fx[chB][1] >= 0 ) || ( Are_fx[chA][1] < 0 && Bre_fx[chB][1] < 0 ) ) - tmp2 = Mpy_32_32( Are_fx[chA][1], Bre_fx[chB][1] ); - ELSE - tmp2 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][1] ), L_abs( Bre_fx[chB][1] ) ) ); - outRe_fx[chA][chB] = L_add( tmp1, tmp2 ); - move32(); - - test(); - test(); - test(); - IF( ( Aim_fx[chA][0] >= 0 && L_negate( Bim_fx[chB][0] ) >= 0 ) || ( Aim_fx[chA][0] < 0 && L_negate( Bim_fx[chB][0] ) < 0 ) ) - tmp1 = Mpy_32_32( Aim_fx[chA][0], -Bim_fx[chB][0] ); - ELSE - tmp1 = L_negate( Mpy_32_32( L_abs( Aim_fx[chA][0] ), L_abs( -Bim_fx[chB][0] ) ) ); - test(); - test(); - test(); - IF( ( Aim_fx[chA][1] >= 0 && L_negate( Bim_fx[chB][1] ) >= 0 ) || ( Aim_fx[chA][1] < 0 && L_negate( Bim_fx[chB][1] ) < 0 ) ) - tmp2 = Mpy_32_32( Aim_fx[chA][1], -Bim_fx[chB][1] ); - ELSE - tmp2 = L_negate( Mpy_32_32( L_abs( Aim_fx[chA][1] ), L_abs( -Bim_fx[chB][1] ) ) ); - outRe_fx[chA][chB] = L_sub( outRe_fx[chA][chB], L_add( tmp1, tmp2 ) ); - move32(); - - test(); - test(); - test(); - IF( ( Aim_fx[chA][0] >= 0 && Bre_fx[chB][0] >= 0 ) || ( Aim_fx[chA][0] < 0 && Bre_fx[chB][0] < 0 ) ) - tmp1 = Mpy_32_32( Aim_fx[chA][0], Bre_fx[chB][0] ); - ELSE - tmp1 = L_negate( Mpy_32_32( L_abs( Aim_fx[chA][0] ), L_abs( Bre_fx[chB][0] ) ) ); - test(); - test(); - test(); - IF( ( Aim_fx[chA][1] >= 0 && Bre_fx[chB][1] >= 0 ) || ( Aim_fx[chA][1] < 0 && Bre_fx[chB][1] < 0 ) ) - tmp2 = Mpy_32_32( Aim_fx[chA][1], Bre_fx[chB][1] ); - ELSE - tmp2 = L_negate( Mpy_32_32( L_abs( Aim_fx[chA][1] ), L_abs( Bre_fx[chB][1] ) ) ); - outIm_fx[chA][chB] = L_add( tmp1, tmp2 ); + outRe_fx[chA][chB] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[chA][0], Bre_fx[chB][0] ), + Are_fx[chA][1], Bre_fx[chB][1] ), + Aim_fx[chA][0], Bim_fx[chB][0] ), + Aim_fx[chA][1], Bim_fx[chB][1] ); move32(); - - test(); - test(); - test(); - IF( ( Are_fx[chA][0] >= 0 && L_negate( Bim_fx[chB][0] ) >= 0 ) || ( Are_fx[chA][0] < 0 && L_negate( Bim_fx[chB][0] ) < 0 ) ) - tmp1 = Mpy_32_32( Are_fx[chA][0], -Bim_fx[chB][0] ); - ELSE - tmp1 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][0] ), L_abs( -Bim_fx[chB][0] ) ) ); - test(); - test(); - test(); - IF( ( Are_fx[chA][1] >= 0 && L_negate( Bim_fx[chB][1] ) >= 0 ) || ( Are_fx[chA][1] < 0 && L_negate( Bim_fx[chB][1] ) < 0 ) ) - tmp2 = Mpy_32_32( Are_fx[chA][1], -Bim_fx[chB][1] ); - ELSE - tmp2 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][1] ), L_abs( -Bim_fx[chB][1] ) ) ); - outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], L_add( tmp1, tmp2 ) ); + outIm_fx[chA][chB] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Aim_fx[chA][0], Bre_fx[chB][0] ), + Aim_fx[chA][1], Bre_fx[chB][1] ), + Are_fx[chA][0], Bim_fx[chB][0] ), + Are_fx[chA][1], Bim_fx[chB][1] ); move32(); #endif /* #ifdef IVAS_ENH64_CADENCE_CHANGES */ } @@ -3890,6 +3699,7 @@ static void chol2x2_fx( } ELSE { +#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC outRe[1][0] = BASOP_Util_Divide3232_Scale_cadence( c_re, outRe[0][0], &exp ); move32(); q_re2 = add( sub( 31, exp ), sub( q_c, q_re1 ) ); @@ -3897,6 +3707,33 @@ static void chol2x2_fx( outIm[1][0] = BASOP_Util_Divide3232_Scale_cadence( c_im, outRe[0][0], &exp ); move32(); q_im = add( sub( 31, exp ), sub( q_c, q_re1 ) ); +#else + Word32 denom; + Word16 den_exp; + Word32 my_outRe, my_outIm; + + /* Compute denom = 1.0 / outRe[0][0] */ + denom = ISqrt32( outRe[0][0], &exp ); + denom = Mpy_32_32( denom, denom ); + den_exp = shl( exp, 1 ); + + /* Normalise c_re, c_im */ + exp = norm_l( c_re ); + my_outRe = L_shl( c_re, exp ); + q_re2 = add( q_c, exp ); + exp = norm_l( c_im ); + my_outIm = L_shl( c_im, exp ); + q_im = add( q_c, exp ); + + /* Multiply and store c_re*denom and c_im*denom */ + outRe[1][0] = Mpy_32_32( denom, my_outRe ); + move32(); + q_re2 = sub( q_re2, den_exp ); + + outIm[1][0] = Mpy_32_32( denom, my_outIm ); + move32(); + q_im = sub( q_im, den_exp ); +#endif } if ( outRe[1][0] == 0 ) { @@ -3912,11 +3749,20 @@ static void chol2x2_fx( temp = Madd_32_32( Mpy_32_32( c_re, c_re ), c_im, c_im ); q_tmp = sub( add( q_c, q_c ), 31 ); + // 4611686 = Q62 IF( e1 == 0 ) { +#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC temp = BASOP_Util_Divide3232_Scale_cadence( temp, 4611686, &exp ); q_tmp = add( sub( 31, exp ), sub( q_tmp, 62 ) ); +#else + Word16 norm = norm_l( temp ); + temp = L_shl( temp, norm ); + q_tmp = add( q_tmp, norm ); + temp = Mpy_32_32( temp, ONE_DIV_EPSILON_MANT ); + q_tmp = sub( q_tmp, ONE_DIV_EPSILON_EXP ); +#endif } ELSE { @@ -4092,6 +3938,8 @@ static void formulate2x2MixingMatrix_fx( Word32 temp; Word16 q_Pre[BINAURAL_CHANNELS][BINAURAL_CHANNELS], q_Pim[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; Word16 hdrm_re[BINAURAL_CHANNELS][BINAURAL_CHANNELS], hdrm_im[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; + + set16_fx( hdrm_re[0], 63, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ); set16_fx( hdrm_im[0], 63, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ); set16_fx( q_Pre[0], Q31, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ); @@ -4146,8 +3994,10 @@ static void formulate2x2MixingMatrix_fx( // 4611686 = Q62 IF( maxEne_fx == 0 ) { - maxEneDiv_fx = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, 4611686, &exp ); // 4611686 = 1e-12f in Q62 - q_maxEneDiv = add( sub( 31, exp ), sub( Q30, 62 ) ); + maxEneDiv_fx = ONE_DIV_EPSILON_MANT; + move32(); + q_maxEneDiv = 31 - ONE_DIV_EPSILON_EXP; + move16(); } ELSE { @@ -4207,8 +4057,19 @@ static void formulate2x2MixingMatrix_fx( IF( temp == 0 ) { - BASOP_Util_Divide3232_Scale_cadence( E_out1, 4611686, &exp ); // 4611686 = Q62 - exp = sub( exp, sub( q_eout, 62 ) ); + IF( E_out1 == 0 ) + { + Ghat_fx[0] = 0; + exp = -19; + move32(); + move16(); + } + ELSE + { + temp = BASOP_Util_Divide3232_Scale_cadence( E_out1, 4611686, &exp ); // 4611686 = Q62 + exp = sub( exp, sub( q_eout, 62 ) ); + Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp + } } ELSE { @@ -4216,16 +4077,26 @@ static void formulate2x2MixingMatrix_fx( temp = BASOP_Util_Divide3232_Scale_cadence( E_out1, temp, &exp ); exp = sub( exp, sub( q_eout, sub( 31, exp_temp ) ) ); + Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp } - Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp move32(); temp = Mpy_32_32( E_in1, 2147484 ); // 2147484 = 0.001f in Q31 temp = L_max( temp, E_in2 ); // q_ein IF( temp == 0 ) { - BASOP_Util_Divide3232_Scale_cadence( E_out2, 4611686, &exp1 ); // 4611686 = Q62 - exp1 = sub( exp1, sub( q_eout, 62 ) ); + IF( E_out2 == 0 ) + { /* We can set hard-coded results */ + Ghat_fx[1] = 0; + exp1 = -19; + move16(); + } + ELSE + { + temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, 4611686, &exp1 ); // 4611686 = Q62 + exp1 = sub( exp1, sub( q_eout, 62 ) ); + Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 + } } ELSE { @@ -4233,8 +4104,8 @@ static void formulate2x2MixingMatrix_fx( temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, temp, &exp1 ); exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) ); + Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 } - Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 move32(); q_Ghat = sub( 31, s_max( exp, exp1 ) ); @@ -4283,21 +4154,32 @@ static void formulate2x2MixingMatrix_fx( IF( D_fx[0] == 0 ) { +#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, 4611686, &exp ); // 4611686 = 1e-12 in Q62 exp = sub( exp, sub( Q30, 62 ) ); +#else + temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */ + exp = ONE_DIV_EPSILON_EXP; +#endif } ELSE { temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp ); exp = sub( exp, sub( Q30, q_D ) ); } + div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp move32(); IF( D_fx[1] == 0 ) { +#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, 4611686, &exp1 ); // 4611686 = 1e-12 in Q62 exp1 = sub( exp1, sub( Q30, 62 ) ); +#else + temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */ + exp1 = ONE_DIV_EPSILON_EXP; +#endif } ELSE { @@ -4400,25 +4282,52 @@ static void formulate2x2MixingMatrix_fx( matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Ure_fx, Uim_fx, &q_U, Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */ /* These are the final formulas of the JAES publication M = Ky P Kx^(-1) */ +#if ( BINAURAL_CHANNELS != 2 ) FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) { IF( Sx_fx[chB] == 0 ) { - Pre_fx[chA][chB] = BASOP_Util_Divide3232_Scale_cadence( Pre_fx[chA][chB], 4611686, &exp ); // 4611686 = 1e-12 in Q62 - q_Pre[chA][chB] = add( sub( q_P, 62 ), sub( 31, exp ) ); - Pim_fx[chA][chB] = BASOP_Util_Divide3232_Scale_cadence( Pim_fx[chA][chB], 4611686, &exp ); // 4611686 = 1e-12 in Q62 - q_Pim[chA][chB] = add( sub( q_P, 62 ), sub( 31, exp ) ); + Pre_fx[chA][chB] = Mpy_32_32( Pre_fx[chA][chB], ONE_DIV_EPSILON_MANT ); + // q_Pre[chA][chB] = add(sub(31, q_P), 31 - ONE_DIV_EPSILON_EXP); + q_Pre[chA][chB] = sub( 62 - ONE_DIV_EPSILON_EXP, q_P ); + + + Pim_fx[chA][chB] = Mpy_32_32( Pim_fx[chA][chB], ONE_DIV_EPSILON_MANT ); + // q_Pim[chA][chB] = add(sub(31, q_P), 31 - ONE_DIV_EPSILON_EXP); + q_Pim[chA][chB] = sub( 62 - ONE_DIV_EPSILON_EXP, q_P ); } ELSE { + Word16 Pre_shift, Pim_shift; temp = BASOP_Util_Add_Mant32Exp( Sx_fx[chB], sub( 31, q_Sx ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); +#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC Pre_fx[chA][chB] = BASOP_Util_Divide3232_Scale_cadence( Pre_fx[chA][chB], temp, &exp ); q_Pre[chA][chB] = add( sub( q_P, sub( 31, exp_temp ) ), sub( 31, exp ) ); Pim_fx[chA][chB] = BASOP_Util_Divide3232_Scale_cadence( Pim_fx[chA][chB], temp, &exp ); q_Pim[chA][chB] = add( sub( q_P, sub( 31, exp_temp ) ), sub( 31, exp ) ); +#else + temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, temp, &exp ); + Pre_shift = norm_l( Pre_fx[chA][chB] ); + Pim_shift = norm_l( Pim_fx[chA][chB] ); + Pre_fx[chA][chB] = Mpy_32_32( L_shl( Pre_fx[chA][chB], Pre_shift ), temp ); + Pim_fx[chA][chB] = Mpy_32_32( L_shl( Pim_fx[chA][chB], Pim_shift ), temp ); + q_temp = add( sub( sub( q_P, exp ), sub( 31, Q30 ) ), exp_temp ); + q_Pre[chA][chB] = add( q_temp, Pre_shift ); + q_Pim[chA][chB] = add( q_temp, Pim_shift ); +#endif + } + if ( Pre_fx[chA][chB] == 0 ) + { + q_Pre[chA][chB] = 31; + move16(); + } + if ( Pim_fx[chA][chB] == 0 ) + { + q_Pim[chA][chB] = 31; + move16(); } move32(); move32(); @@ -4426,6 +4335,72 @@ static void formulate2x2MixingMatrix_fx( move16(); } } +#else + /* BINAURAL_CHANNEL == 2 */ + FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) + { + IF( Sx_fx[chB] == 0 ) + { + Pre_fx[0][chB] = Mpy_32_32( Pre_fx[0][chB], ONE_DIV_EPSILON_MANT ); + q_Pre[0][chB] = sub( 62 - ONE_DIV_EPSILON_EXP, q_P ); + Pim_fx[0][chB] = Mpy_32_32( Pim_fx[0][chB], ONE_DIV_EPSILON_MANT ); + q_Pim[0][chB] = sub( 62 - ONE_DIV_EPSILON_EXP, q_P ); + Pre_fx[1][chB] = Mpy_32_32( Pre_fx[1][chB], ONE_DIV_EPSILON_MANT ); + q_Pre[1][chB] = sub( 62 - ONE_DIV_EPSILON_EXP, q_P ); + Pim_fx[1][chB] = Mpy_32_32( Pim_fx[1][chB], ONE_DIV_EPSILON_MANT ); + q_Pim[1][chB] = sub( 62 - ONE_DIV_EPSILON_EXP, q_P ); + } + ELSE + { + Word16 Pre_shift, Pim_shift; + temp = BASOP_Util_Add_Mant32Exp( Sx_fx[chB], sub( 31, q_Sx ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); + temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, temp, &exp ); + q_temp = add( sub( sub( q_P, exp ), sub( 31, Q30 ) ), exp_temp ); + + Pre_shift = norm_l( Pre_fx[0][chB] ); + Pim_shift = norm_l( Pim_fx[0][chB] ); + Pre_fx[0][chB] = Mpy_32_32( L_shl( Pre_fx[0][chB], Pre_shift ), temp ); + Pim_fx[0][chB] = Mpy_32_32( L_shl( Pim_fx[0][chB], Pim_shift ), temp ); + q_Pre[0][chB] = add( q_temp, Pre_shift ); + q_Pim[0][chB] = add( q_temp, Pim_shift ); + + Pre_shift = norm_l( Pre_fx[1][chB] ); + Pim_shift = norm_l( Pim_fx[1][chB] ); + Pre_fx[1][chB] = Mpy_32_32( L_shl( Pre_fx[1][chB], Pre_shift ), temp ); + Pim_fx[1][chB] = Mpy_32_32( L_shl( Pim_fx[1][chB], Pim_shift ), temp ); + q_Pre[1][chB] = add( q_temp, Pre_shift ); + q_Pim[1][chB] = add( q_temp, Pim_shift ); + } + if ( Pre_fx[0][chB] == 0 ) + { + q_Pre[0][chB] = 31; + move16(); + } + if ( Pim_fx[0][chB] == 0 ) + { + q_Pim[0][chB] = 31; + move16(); + } + if ( Pre_fx[1][chB] == 0 ) + { + q_Pre[1][chB] = 31; + move16(); + } + if ( Pim_fx[1][chB] == 0 ) + { + q_Pim[1][chB] = 31; + move16(); + } + move32(); + move32(); + move16(); + move16(); + move32(); + move32(); + move16(); + move16(); + } +#endif minimum_s( q_Pre[0], i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ), &exp ); q_P = s_min( q_P, exp ); minimum_s( q_Pim[0], i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ), &exp ); diff --git a/lib_rend/ivas_dirac_decorr_dec.c b/lib_rend/ivas_dirac_decorr_dec.c index a043e97f93f2e6d5c505d3fa8c5659bcaea78ebd..95ab88545aec85bc47cc75328dee2f97fac79d14 100644 --- a/lib_rend/ivas_dirac_decorr_dec.c +++ b/lib_rend/ivas_dirac_decorr_dec.c @@ -57,6 +57,10 @@ #define DIRAC_DUCK_ALPHA_FX 1717986944 /* Q31 */ #define ONE_M_DIRAC_DUCK_ALPHA 429496736 /* Q31 */ +/* Maximal useful q-format, represents range of 2^-126 (float min) */ +#define MAX_Q_FX 157 + + /*------------------------------------------------------------------------- * Local function prototypes *------------------------------------------------------------------------*/ @@ -583,16 +587,20 @@ void ivas_dirac_dec_decorr_process_fx( Word16 decorr_buff_tot_len = imult1616( imult1616( shl( decorr_buffer_len, 1 ), max_band_decorr ), num_channels ); guarded_bits = 0; - FOR( Word16 i = 0; i < decorr_buff_tot_len; i++ ) + + Flag is_zero = is_zero_arr( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len ); + if ( is_zero == 0 ) + guarded_bits = 3; + + IF( is_zero == 0 ) { - IF( h_freq_domain_decorr_ap_state->decorr_buffer_fx[i] != 0 ) + q_shift = sub( getScaleFactor32( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len ), guarded_bits ); + IF( q_shift != 0 ) { - guarded_bits = s_max( find_guarded_bits_fx( 2 ), 3 ); + Scale_sig32( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len, q_shift ); + q_decorr_buf = add( q_decorr_buf, q_shift ); } } - q_shift = sub( getScaleFactor32( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len ), guarded_bits ); - Scale_sig32( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len, q_shift ); - q_decorr_buf = add( q_decorr_buf, q_shift ); q_shift = getScaleFactor32( aux_buffer_fx, imult1616( imult1616( 2, num_protos_dir ), max_band_decorr_temp ) ); @@ -655,10 +663,8 @@ void ivas_dirac_dec_decorr_process_fx( /* MA part of filter impulse response */ FOR( l = 0; l < filter_length; l++ ) { - frame_ma_fx[2 * l] = Mpy_32_16_1( input_real_fx, filter_coeff_num_real_fx[l] ); // Q_qux -3 = q_deorr - // frame_ma_fx[2 * l] = L_shr(frame_ma_fx[2 * l],3); // scaling to q_decorr_buf - frame_ma_fx[add( shl( l, 1 ), 1 )] = Mpy_32_16_1( input_imag_fx, filter_coeff_num_real_fx[l] ); // Q_qux - 3 = q_deorr - // frame_ma_fx[2 * l + 1] = L_shr(frame_ma_fx[2 * l + 1], 3); // scaling to q_decorr_buf + frame_ma_fx[2 * l] = Mpy_32_16_1( input_real_fx, filter_coeff_num_real_fx[l] ); // Q_qux -3 = q_deorr + frame_ma_fx[2 * l + 1] = Mpy_32_16_1( input_imag_fx, filter_coeff_num_real_fx[l] ); // Q_qux - 3 = q_deorr move32(); move32(); } @@ -672,26 +678,28 @@ void ivas_dirac_dec_decorr_process_fx( /*get values for AR part */ filter_frame_real_fx = decorr_buffer_ptr_fx[0]; // q_decorr - filter_frame_imag_fx = decorr_buffer_ptr_fx[1]; // q_deocrr + filter_frame_imag_fx = decorr_buffer_ptr_fx[1]; // q_decorr + + Word16 decorr_buffer_step2x = shl( decorr_buffer_step, 1 ); - decorr_buffer_ptr_fx += shl( decorr_buffer_step, 1 ); + decorr_buffer_ptr_fx += decorr_buffer_step2x; + move16(); FOR( l = 1; l < filter_length; l++ ) { // q adjustment needed// - decorr_buffer_ptr_fx[0] = L_add( decorr_buffer_ptr_fx[0], frame_ma_fx[2 * l] ); // q_decorr - Word32 temp_1 = Mpy_32_16_1( filter_frame_real_fx, filter_coeff_den_real_fx[l] ); // q_decorr - 3 - temp_1 = L_shl( temp_1, 3 ); // q_decorr - decorr_buffer_ptr_fx[0] = L_sub( decorr_buffer_ptr_fx[0], temp_1 ); // q_deocor - decorr_buffer_ptr_fx[1] = L_add( decorr_buffer_ptr_fx[1], frame_ma_fx[add( shl( l, 1 ), 1 )] ); // q_decorr - Word32 temp_2 = Mpy_32_16_1( filter_frame_imag_fx, filter_coeff_den_real_fx[l] ); // q_decorr - 3 - temp_2 = L_shl( temp_2, 3 ); // q_decorr - decorr_buffer_ptr_fx[1] = L_sub( decorr_buffer_ptr_fx[1], temp_2 ); // q_decorr - decorr_buffer_ptr_fx += imult1616( 2, decorr_buffer_step ); - move32(); - move32(); + Word32 temp_1 = Mpy_32_16_1( filter_frame_real_fx, filter_coeff_den_real_fx[l] ); // q_decorr - 3 + temp_1 = L_shl( temp_1, 3 ); // q_decorr + decorr_buffer_ptr_fx[0] = L_sub( L_add( decorr_buffer_ptr_fx[0], frame_ma_fx[2 * l] ), temp_1 ); // q_deocor move32(); + + Word32 temp_2 = Mpy_32_16_1( filter_frame_imag_fx, filter_coeff_den_real_fx[l] ); // q_decorr - 3 + temp_2 = L_shl( temp_2, 3 ); // q_decorr + decorr_buffer_ptr_fx[1] = L_sub( L_add( decorr_buffer_ptr_fx[1], frame_ma_fx[2 * l + 1] ), temp_2 ); // q_decorr move32(); + + decorr_buffer_ptr_fx += decorr_buffer_step2x; + move16(); } } } @@ -748,6 +756,10 @@ void ivas_dirac_dec_decorr_process_fx( q_direct_energy = q_aux_buffer; move16(); +#if 0 + /* Attention: this loop reports norm=0, whenever any data is 0. */ + /* Therefore, useful left-shifts are skipped, accuracy is lost. */ + /* calculate the power of the decorrelated signal */ FOR( ch_idx = 0; ch_idx < num_channels; ++ch_idx ) { @@ -760,6 +772,37 @@ void ivas_dirac_dec_decorr_process_fx( norm = s_min( norm, W_norm( aux_64[add( offset2, i )] ) ); } } +#else + /* calculate the power of the decorrelated signal */ + Word64 *m64_aux = aux_64; + move32(); + Word64 min64 = (Word64) 0; + move64(); + Word32 *m32_frame_dec_fx = frame_dec_fx; + move32(); + offset1 = shl( num_freq_bands, 1 ); + offset2 = shl( max_band_decorr, 1 ); + + + FOR( ch_idx = 0; ch_idx < num_channels; ++ch_idx ) + { + FOR( Word16 i = 0; i < offset2; i++ ) + { + m64_aux[i] = W_mult0_32_32( m32_frame_dec_fx[i], m32_frame_dec_fx[i] ); + move64(); + if ( GT_64( m64_aux[i], min64 ) ) + { + min64 = m64_aux[i]; + move64(); + } + } + m64_aux += offset2; + m32_frame_dec_fx += offset1; + move64(); + move32(); + } + norm = W_norm( min64 ); +#endif FOR( Word16 i = 0; i < shl( imult1616( num_channels, max_band_decorr ), 1 ); i++ ) { @@ -775,32 +818,63 @@ void ivas_dirac_dec_decorr_process_fx( } /* smooth energies */ - v_multc_fixed( aux_buffer_fx, ONE_M_DIRAC_DUCK_ALPHA, aux_buffer_fx, imult1616( num_channels, max_band_decorr ) ); // q_aux_buffer - v_multc_fixed( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, DIRAC_DUCK_ALPHA_FX, h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, imult1616( num_channels, max_band_decorr ) ); // same-q + Word16 len = imult1616( num_channels, max_band_decorr ); + Word16 aux_e = sub( 31, q_aux_buffer ); + Word16 max_e = s_max( aux_e, e_reverb_energy_smooth ); + Word16 shr_aux = sub( max_e, aux_e ); /* Note: headroom is zero */ + Word16 shr_res = sub( max_e, e_reverb_energy_smooth ); /* Note: headroom is zero */ - v_add_fixed_me( aux_buffer_fx, sub( 31, q_aux_buffer ), h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, e_reverb_energy_smooth, h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, &e_reverb_energy_smooth, imult1616( num_channels, max_band_decorr ), 0 ); - h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = sub( 31, e_reverb_energy_smooth ); + /* Note: DIRAC_DUCK_ALPHA_FX and ONE_M_DIRAC_DUCK_ALPHA are both in Q31 (e=0) */ + /* => a multiplication with this values does not change the q/e value. */ - v_multc_fixed( direct_energy_fx, ONE_M_DIRAC_DUCK_ALPHA, direct_energy_fx, imult1616( num_protos_dir, max_band_decorr ) ); // same q + FOR( Word16 i = 0; i < len; i++ ) + { + h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx[i] = L_add( + L_shr( Mpy_32_32( aux_buffer_fx[i], ONE_M_DIRAC_DUCK_ALPHA ), shr_aux ), + L_shr( Mpy_32_32( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx[i], DIRAC_DUCK_ALPHA_FX ), shr_res ) ); + move32(); + } + e_reverb_energy_smooth = max_e; + move16(); + h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = sub( 31, e_reverb_energy_smooth ); + move16(); - v_multc_fixed( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx, DIRAC_DUCK_ALPHA_FX, h_freq_domain_decorr_ap_state->direct_energy_smooth_fx, imult1616( num_protos_dir, max_band_decorr ) ); // same q + len = imult1616( num_protos_dir, max_band_decorr ); + Word16 den_e = sub( 31, q_direct_energy ); + Word16 max_x = s_max( den_e, e_direct_energy_smooth ); + Word16 shr_den = sub( max_x, den_e ); /* Note: headroom is zero */ + Word16 shr_des = sub( max_x, e_direct_energy_smooth ); /* Note: headroom is zero */ - v_add_fixed_me( direct_energy_fx, sub( 31, q_direct_energy ), h_freq_domain_decorr_ap_state->direct_energy_smooth_fx, e_direct_energy_smooth, h_freq_domain_decorr_ap_state->direct_energy_smooth_fx, &e_direct_energy_smooth, imult1616( num_protos_dir, max_band_decorr ), 0 ); + FOR( Word16 i = 0; i < len; i++ ) + { + h_freq_domain_decorr_ap_state->direct_energy_smooth_fx[i] = L_add( + L_shr( Mpy_32_32( direct_energy_fx[i], ONE_M_DIRAC_DUCK_ALPHA ), shr_den ), + L_shr( Mpy_32_32( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx[i], DIRAC_DUCK_ALPHA_FX ), shr_des ) ); + move32(); + } + e_direct_energy_smooth = max_x; + move16(); h_freq_domain_decorr_ap_state->q_direct_energy_smooth = sub( 31, e_direct_energy_smooth ); move16(); // scaling energy buffers for better precision for higher values// q_shift = L_norm_arr( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx, imult1616( num_protos_dir, max_band_decorr ) ); - Scale_sig32( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx, imult1616( num_protos_dir, max_band_decorr ), q_shift ); - h_freq_domain_decorr_ap_state->q_direct_energy_smooth = add( h_freq_domain_decorr_ap_state->q_direct_energy_smooth, q_shift ); - move16(); - - + IF( q_shift != 0 ) + { + Scale_sig32( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx, imult1616( num_protos_dir, max_band_decorr ), q_shift ); + h_freq_domain_decorr_ap_state->q_direct_energy_smooth = add( h_freq_domain_decorr_ap_state->q_direct_energy_smooth, q_shift ); + move16(); + } q_shift = L_norm_arr( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, imult1616( num_channels, max_band_decorr ) ); - Scale_sig32( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, imult1616( num_channels, max_band_decorr ), q_shift ); - h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = add( h_freq_domain_decorr_ap_state->q_reverb_energy_smooth, q_shift ); - move16(); + IF( q_shift != 0 ) + { + Scale_sig32( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, imult1616( num_channels, max_band_decorr ), q_shift ); + h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = add( h_freq_domain_decorr_ap_state->q_reverb_energy_smooth, q_shift ); + move16(); + } + h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = min( MAX_Q_FX, h_freq_domain_decorr_ap_state->q_reverb_energy_smooth ); + h_freq_domain_decorr_ap_state->q_direct_energy_smooth = min( MAX_Q_FX, h_freq_domain_decorr_ap_state->q_direct_energy_smooth ); e_reverb_energy_smooth = sub( 31, h_freq_domain_decorr_ap_state->q_reverb_energy_smooth ); e_direct_energy_smooth = sub( 31, h_freq_domain_decorr_ap_state->q_direct_energy_smooth ); @@ -856,8 +930,8 @@ void ivas_dirac_dec_decorr_process_fx( duck_gain = shl( duck_gain, sub( e_duck_gain, 1 ) ); // Q14 - frame_dec_fx_ptr[2 * band_idx] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx], duck_gain ), 1 ); // q_frame_f - frame_dec_fx_ptr[add( shl( band_idx, 1 ), 1 )] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[add( shl( band_idx, 1 ), 1 )], duck_gain ), 1 ); // q_frame_f + frame_dec_fx_ptr[2 * band_idx] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx], duck_gain ), 1 ); // q_frame_f + frame_dec_fx_ptr[2 * band_idx + 1] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx + 1], duck_gain ), 1 ); // q_frame_f move32(); move32(); } @@ -878,8 +952,8 @@ void ivas_dirac_dec_decorr_process_fx( { duck_gain = shl( duck_gain, sub( e_duck_gain, 2 ) ); // Q13 } - frame_dec_fx_ptr[2 * band_idx] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx], duck_gain ), 2 ); // q_frame_dec - frame_dec_fx_ptr[add( shl( band_idx, 1 ), 1 )] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[add( shl( band_idx, 1 ), 1 )], duck_gain ), 2 ); // q_frame_dec + frame_dec_fx_ptr[2 * band_idx] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx], duck_gain ), 2 ); // q_frame_dec + frame_dec_fx_ptr[2 * band_idx + 1] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx + 1], duck_gain ), 2 ); // q_frame_dec move32(); move32(); }