diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index cef466471db4f1d2d9602c057a0f60a77e888c45..a744e9831edc5a19c48b950bf303cb4c244cc4b1 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -405,7 +405,9 @@ static Word16 ISqrt16_common( Word16 m, /* handle even exponents */ if ( s_and( e, 1 ) == 0 ) + { m = mult_r( m, 0x5a82 ); + } return m; } @@ -420,8 +422,11 @@ static Word32 ISqrt32_common( Word32 m, #endif assert( m >= 0x40000000 ); - +#ifdef BASOP_NOGLOB_DECLARE_LOCAL m16 = round_fx_o( m, &Overflow ); +#else + m16 = round_fx( m ); +#endif /* get table index (upper 6 bits minus 32) */ /* index = (m16 >> 25) - 32; */ @@ -435,7 +440,9 @@ static Word32 ISqrt32_common( Word32 m, /* handle even exponents */ if ( s_and( e, 1 ) == 0 ) + { m = Mpy_32_16_1( m, 0x5a82 ); + } return m; } diff --git a/lib_com/options.h b/lib_com/options.h index 194fab0b32a84a619f6cb8286f41fc9334050d3f..d64672a5527b94110bf6a60464605bd408f44638 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -134,6 +134,8 @@ #define FIX_1010_OPT_GIVENS_INV /* FhG: SVD complexity optimizations (non-be) */ #define FIX_1010_OPT_NORM_NOSAT /* FhG: SVD complexity optimizations (non-be) */ #define FIX_1010_OPT_SEC_SINGLE_RESCALE /* FhG: SVD complexity optimizations (non-be) */ +#define FIX_1072_SPEEDUP_matrixTransp2Mul_fx /* FhG: complexity optimization (non-be) */ +#define FIX_1072_REDUCE_DIVS /* FhG: complexity optimization (non-be) */ #define FIX_ISSUE_1230 /* Ittiam: Fix for issue 1230: Basop Enc audible differences and distortion @16kbps */ #define NONBE_1211_DTX_BR_SWITCHING /* VA: port float issue 1211: fix crash in MASA DTX bitrate switching */ #define FIX_1189_GSC_IVAS_OMASA /* VA: Fix for issue 1189: Bitstream desynchornization due to reading/writing of the GSC_IVAS_mode parameter */ diff --git a/lib_rend/ivas_dirac_dec_binaural_functions.c b/lib_rend/ivas_dirac_dec_binaural_functions.c index d90728d588cdf5dd0173a0666a69ac7b9aa444de..71ecb5106be77e0f5381dd721bc665b3790c51d9 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions.c @@ -130,7 +130,20 @@ static void formulate2x2MixingMatrix_fx( Word32 Ein1_fx /*q_Ein*/, Word32 Ein2_f static void matrixMul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A, Word32 Bre[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word32 Bim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word16 *q_B, Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word16 *q_out ); -static void matrixTransp2Mul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A, Word32 Bre[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word32 Bim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word16 *q_B, Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word16 *q_out ); +static void matrixTransp2Mul_fx( + Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, + Word32 Aim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, + Word16 *q_A, + Word32 Bre[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, + Word32 Bim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, + Word16 *q_B, +#ifdef FIX_1072_SPEEDUP_matrixTransp2Mul_fx + int Ascale, + int Bscale, +#endif + Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, + Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, + Word16 *q_out ); /*------------------------------------------------------------------------- * ivas_dirac_dec_init_binaural_data() @@ -1970,7 +1983,13 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( /* Make matrix multiplication M*Cx*M' to determine resulting covariance matrix of processing input with M */ matrixMul_fx( Mre_fx, Mim_fx, &q_M, CxRe_fx, CxIm_fx, &q_Cx, tmpMtxRe_fx, tmpMtxIm_fx, &q_tmp ); - matrixTransp2Mul_fx( tmpMtxRe_fx, tmpMtxIm_fx, &q_tmp, Mre_fx, Mim_fx, &q_M, resultMtxRe_fx, resultMtxIm_fx, &q_res ); + matrixTransp2Mul_fx( + tmpMtxRe_fx, tmpMtxIm_fx, &q_tmp, Mre_fx, Mim_fx, &q_M, +#ifdef FIX_1072_SPEEDUP_matrixTransp2Mul_fx + 1 /*int Ascale*/, + 0 /*int Bscale*/, +#endif + resultMtxRe_fx, resultMtxIm_fx, &q_res ); /* When below the frequency limit where decorrelation is applied, we inject the decorrelated * residual (or missing) signal component. The procedure is active when there are not enough independent @@ -3791,6 +3810,10 @@ static void matrixTransp2Mul_fx( Word32 Bre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_B*/ Word32 Bim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_B*/ Word16 *q_B, +#ifdef FIX_1072_SPEEDUP_matrixTransp2Mul_fx + int Ascale, + int Bscale, +#endif Word32 outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/ Word32 outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/ Word16 *q_out ) @@ -3804,16 +3827,27 @@ static void matrixTransp2Mul_fx( #endif #endif - min_q_shift = sub( s_min( L_norm_arr( Are_fx[0], size ), L_norm_arr( Aim_fx[0], size ) ), 1 ); - scale_sig32( Are_fx[0], size, min_q_shift ); - scale_sig32( Aim_fx[0], size, min_q_shift ); - *q_A = add( *q_A, min_q_shift ); - move16(); - min_q_shift = sub( s_min( L_norm_arr( Bre_fx[0], size ), L_norm_arr( Bim_fx[0], size ) ), 1 ); - scale_sig32( Bre_fx[0], size, min_q_shift ); - scale_sig32( Bim_fx[0], size, min_q_shift ); - *q_B = add( *q_B, min_q_shift ); - move16(); +#ifdef FIX_1072_SPEEDUP_matrixTransp2Mul_fx + IF( Ascale == 1 ) +#endif + { + min_q_shift = sub( s_min( L_norm_arr( Are_fx[0], size ), L_norm_arr( Aim_fx[0], size ) ), 1 ); + scale_sig32( Are_fx[0], size, min_q_shift ); + scale_sig32( Aim_fx[0], size, min_q_shift ); + *q_A = add( *q_A, min_q_shift ); + move16(); + } + +#ifdef FIX_1072_SPEEDUP_matrixTransp2Mul_fx + IF( Bscale == 1 ) +#endif + { + min_q_shift = sub( s_min( L_norm_arr( Bre_fx[0], size ), L_norm_arr( Bim_fx[0], size ) ), 1 ); + scale_sig32( Bre_fx[0], size, min_q_shift ); + scale_sig32( Bim_fx[0], size, min_q_shift ); + *q_B = add( *q_B, min_q_shift ); + move16(); + } FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { @@ -4094,6 +4128,7 @@ static void chol2x2_fx( // 4611686 = Q62 IF( outRe[1][1] == 0 ) { +#if !defined( FIX_1072_REDUCE_DIVS ) outRe[0][1] = BASOP_Util_Divide3232_Scale_cadence( c_re, 4611686, &exp ); move32(); q_re2 = add( sub( 31, exp ), sub( q_c, 62 ) ); @@ -4101,9 +4136,25 @@ static void chol2x2_fx( outIm[0][1] = BASOP_Util_Divide3232_Scale_cadence( -c_im, 4611686, &exp ); move32(); q_im = add( sub( 31, exp ), sub( q_c, 62 ) ); + +#else + // outRe[0][1] = BASOP_Util_Divide3232_Scale_cadence( c_re, 4611686, &exp ); + Word32 tmp1 = 1953125005; /* 1/4611686 Q62 */ + exp = 9; + + outRe[0][1] = Mpy_32_32( tmp1, c_re ); + move32(); + q_re2 = add( sub( 31, exp ), sub( q_c, 62 ) ); + + // outIm[0][1] = BASOP_Util_Divide3232_Scale_cadence( -c_im, 4611686, &exp ); + outIm[0][1] = Mpy_32_32( tmp1, -c_im ); + move32(); + q_im = add( sub( 31, exp ), sub( q_c, 62 ) ); +#endif } ELSE { +#if !defined( FIX_1072_REDUCE_DIVS ) outRe[0][1] = BASOP_Util_Divide3232_Scale_cadence( c_re, outRe[1][1], &exp ); move32(); q_re2 = add( sub( 31, exp ), sub( q_c, q_re1 ) ); @@ -4111,6 +4162,20 @@ static void chol2x2_fx( outIm[0][1] = BASOP_Util_Divide3232_Scale_cadence( -c_im, outRe[1][1], &exp ); move32(); q_im = add( sub( 31, exp ), sub( q_c, q_re1 ) ); +#else + { + // outRe[0][1] = BASOP_Util_Divide3232_Scale_cadence( c_re, outRe[1][1], &exp ); + Word32 tmp1 = BASOP_Util_Divide3232_Scale_cadence( 0x7FFFFFFF, outRe[1][1], &exp ); + outRe[0][1] = Mpy_32_32( tmp1, c_re ); + move32(); + q_re2 = add( sub( 31, exp ), sub( q_c, q_re1 ) ); + + // outIm[0][1] = BASOP_Util_Divide3232_Scale_cadence( -c_im, outRe[1][1], &exp ); + outIm[0][1] = Mpy_32_32( tmp1, -c_im ); + move32(); + q_im = add( sub( 31, exp ), sub( q_c, q_re1 ) ); + } +#endif } if ( outRe[0][1] == 0 ) { @@ -4358,7 +4423,7 @@ static void formulate2x2MixingMatrix_fx( Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp } #else - BASOP_Util_Divide3232_Scale_cadence( E_out1, 4611686, &exp ); // 4611686 = Q62 + temp = BASOP_Util_Divide3232_Scale_cadence( E_out1, 4611686, &exp ); // 4611686 = Q62 exp = sub( exp, sub( q_eout, 62 ) ); #endif } @@ -4395,7 +4460,7 @@ static void formulate2x2MixingMatrix_fx( Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 } #else - BASOP_Util_Divide3232_Scale_cadence( E_out2, 4611686, &exp1 ); // 4611686 = Q62 + temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, 4611686, &exp1 ); // 4611686 = Q62 exp1 = sub( exp1, sub( q_eout, 62 ) ); #endif } @@ -4584,7 +4649,12 @@ static void formulate2x2MixingMatrix_fx( } } - matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Ure_fx, Uim_fx, &q_U, Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */ + matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Ure_fx, Uim_fx, &q_U, +#ifdef FIX_1072_SPEEDUP_matrixTransp2Mul_fx + 1 /*int Ascale*/, + 0 /*int Bscale*/, +#endif + Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */ /* These are the final formulas of the JAES publication M = Ky P Kx^(-1) */ #if ( BINAURAL_CHANNELS != 2 ) @@ -4733,8 +4803,12 @@ static void formulate2x2MixingMatrix_fx( matrixMul_fx( KyRe_fx, KyIm_fx, &q_ky, Pre_fx, Pim_fx, &q_P, tmpRe_fx, tmpIm_fx, &q_temp ); - matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Uxre_fx, Uxim_fx, &q_Ux, Mre_fx, Mim_fx, q_M ); - + matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Uxre_fx, Uxim_fx, &q_Ux, +#ifdef FIX_1072_SPEEDUP_matrixTransp2Mul_fx + 1 /*int Ascale*/, + 0 /*int Bscale*/, +#endif + Mre_fx, Mim_fx, q_M ); return; }