diff --git a/lib_com/options.h b/lib_com/options.h index c89a248523c74e65af1066e010bed50f31522194..5e96bfb28581095ebbe70d64abd39acc6832b37e 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -147,4 +147,11 @@ /* #################### End BASOP porting switches ############################ */ +/* #################### Start BASOP optimization switches ############################ */ + +#define OPT_2181_MATRIX_TRANSP_1_MUL /* Dolby: Issue 2181, optimize matrixTransp1Mul_fx. */ +#define OPT_2182_MATRIX_SCALE_OPS /* Dolby: Issue 2181, move matrix scale operations outside mul operations. */ + +/* #################### End BASOP optimization switches ############################ */ + #endif diff --git a/lib_dec/ivas_ism_metadata_dec_fx.c b/lib_dec/ivas_ism_metadata_dec_fx.c index 54cc5da15e24d25f025530478d84a4674a0c5ad2..4d3c06b9c0b4d919657a8f206c56115d7c28aca2 100644 --- a/lib_dec/ivas_ism_metadata_dec_fx.c +++ b/lib_dec/ivas_ism_metadata_dec_fx.c @@ -30,7 +30,6 @@ *******************************************************************************************************/ -#include "move.h" #include #include "options.h" #include "ivas_cnst.h" @@ -40,6 +39,7 @@ #include "ivas_stat_enc.h" #include #include "wmc_auto.h" +#include "move.h" #include "ivas_prot_fx.h" diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 3211b0e46fcb0b5844447e2e1ec4d87fa18b6bcf..d1d7bd8b65488b0a610a7046dcb70a864114ea4c 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -118,9 +118,17 @@ static void ivas_masa_ext_rend_parambin_internal_fx( MASA_EXT_REND_HANDLE hMasaE static void formulate2x2MixingMatrix_fx( Word32 Ein1_fx /*q_Ein*/, Word32 Ein2_fx /*q_Ein*/, Word16 q_Ein, Word32 CinRe_fx /*q_Cin*/, Word32 CinIm_fx /*q_Cin*/, Word16 q_Cin, Word32 Eout1_fx /*q_Eout*/, Word32 Eout2_fx /*q_Eout*/, Word16 q_Eout, Word32 CoutRe_fx /*q_Cout*/, Word32 CoutIm_fx /*q_Cout*/, Word16 q_Cout, Word32 Q_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*Q31*/, Word32 Mre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word32 Mim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word16 *q_M, const Word16 regularizationFactor_fx /*Q14*/ ); +#ifdef OPT_2182_MATRIX_SCALE_OPS +static void matrixScale_fx( Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A ); +#endif + static void matrixMul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A, Word32 Bre[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word32 Bim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word16 *q_B, Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word16 *q_out ); +#ifdef OPT_2182_MATRIX_SCALE_OPS +static void matrixTransp2Mul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A, Word32 Bre[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word32 Bim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word16 *q_B, Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word16 *q_out ); +#else static void matrixTransp2Mul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A, Word32 Bre[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word32 Bim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word16 *q_B, Word32 Ascale, Word32 Bscale, Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word16 *q_out ); +#endif /*------------------------------------------------------------------------- @@ -2284,12 +2292,23 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( } /* Make matrix multiplication M*Cx*M' to determine resulting covariance matrix of processing input with M */ +#ifdef OPT_2182_MATRIX_SCALE_OPS + matrixScale_fx( Mre_fx, Mim_fx, &q_M ); + matrixScale_fx( CxRe_fx, CxIm_fx, &q_Cx ); +#endif matrixMul_fx( Mre_fx, Mim_fx, &q_M, CxRe_fx, CxIm_fx, &q_Cx, tmpMtxRe_fx, tmpMtxIm_fx, &q_tmp ); +#ifdef OPT_2182_MATRIX_SCALE_OPS + matrixScale_fx( tmpMtxRe_fx, tmpMtxIm_fx, &q_tmp ); + matrixTransp2Mul_fx( + tmpMtxRe_fx, tmpMtxIm_fx, &q_tmp, Mre_fx, Mim_fx, &q_M, + resultMtxRe_fx, resultMtxIm_fx, &q_res ); +#else matrixTransp2Mul_fx( tmpMtxRe_fx, tmpMtxIm_fx, &q_tmp, Mre_fx, Mim_fx, &q_M, 1 /*int Ascale*/, 0 /*int Bscale*/, resultMtxRe_fx, resultMtxIm_fx, &q_res ); +#endif /* When below the frequency limit where decorrelation is applied, we inject the decorrelated * residual (or missing) signal component. The procedure is active when there are not enough independent @@ -3938,6 +3957,22 @@ static void matrixDiagMul_fx( return; } +#ifdef OPT_2182_MATRIX_SCALE_OPS +static void matrixScale_fx( + Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ + Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ + Word16 *q_A ) +{ + Word16 shift; + Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ); + shift = sub( s_min( L_norm_arr( Are_fx[0], size ), L_norm_arr( Aim_fx[0], size ) ), 1 ); + scale_sig32( Are_fx[0], size, shift ); + scale_sig32( Aim_fx[0], size, shift ); + *q_A = add( *q_A, shift ); + move16(); +} +#endif + static void matrixMul_fx( Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ @@ -3949,6 +3984,10 @@ static void matrixMul_fx( Word32 outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/ Word16 *q_out ) { +#ifdef OPT_2182_MATRIX_SCALE_OPS + Word16 chA, chB; + Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ); +#else Word16 chA, chB; Word16 min_q_shift1, min_q_shift2; Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ); @@ -3965,6 +4004,7 @@ static void matrixMul_fx( *q_B = add( *q_B, min_q_shift2 ); move16(); move16(); +#endif FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { @@ -4021,6 +4061,45 @@ static void matrixTransp1Mul_fx( Word64 tmp_outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; Word64 tmp_outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; +#ifdef OPT_2181_MATRIX_TRANSP_1_MUL + Word64 tmp64; + Word16 common_lsh, q; + q = add( add( q_A, q_B ), 1 ); + common_lsh = sub( 63, q ); + move16(); + + FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) + { + FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) + { + tmp64 = W_mult_32_32( Are_fx[0][chA], Bre_fx[0][chB] ); + tmp64 = W_mac_32_32( tmp64, Are_fx[1][chA], Bre_fx[1][chB] ); + tmp64 = W_mac_32_32( tmp64, Aim_fx[0][chA], Bim_fx[0][chB] ); + tmp_outRe_fx[chA][chB] = tmp64 = W_mac_32_32( tmp64, Aim_fx[1][chA], Bim_fx[1][chB] ); + move64(); + common_lsh = s_min( common_lsh, W_norm( tmp64 ) ); + + tmp64 = W_mult_32_32( Aim_fx[0][chA], Bre_fx[0][chB] ); + tmp64 = W_mac_32_32( tmp64, Aim_fx[1][chA], Bre_fx[1][chB] ); + tmp64 = W_mac_32_32( W_neg( tmp64 ), Are_fx[0][chA], Bim_fx[0][chB] ); + tmp_outIm_fx[chA][chB] = tmp64 = W_mac_32_32( tmp64, Are_fx[1][chA], Bim_fx[1][chB] ); + move64(); + common_lsh = s_min( common_lsh, W_norm( tmp64 ) ); + } + } + FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) + { + FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) + { + outRe_fx[chA][chB] = W_extract_h( W_shl( tmp_outRe_fx[chA][chB], common_lsh ) ); + move32(); + outIm_fx[chA][chB] = W_extract_h( W_shl( tmp_outIm_fx[chA][chB], common_lsh ) ); + move32(); + } + } + *q_out = sub( add( q, common_lsh ), 32 ); + move16(); +#else Word16 q_tmp_outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; Word16 q_tmp_outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; Word64 tmp64_1, tmp64_2; @@ -4067,6 +4146,7 @@ static void matrixTransp1Mul_fx( } *q_out = sub( q_common, 32 ); move16(); +#endif if ( L_and( is_zero_arr( outRe_fx[0], size ), is_zero_arr( outIm_fx[0], size ) ) ) { *q_out = Q31; @@ -4076,6 +4156,18 @@ static void matrixTransp1Mul_fx( return; } +#ifdef OPT_2182_MATRIX_SCALE_OPS +static void matrixTransp2Mul_fx( + Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ + Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ + Word16 *q_A, + Word32 Bre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_B*/ + Word32 Bim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_B*/ + Word16 *q_B, + Word32 outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/ + Word32 outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/ + Word16 *q_out ) +#else static void matrixTransp2Mul_fx( Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ @@ -4088,7 +4180,12 @@ static void matrixTransp2Mul_fx( Word32 outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/ Word32 outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/ Word16 *q_out ) +#endif { +#ifdef OPT_2182_MATRIX_SCALE_OPS + Word16 chA, chB; + Word16 size = BINAURAL_CHANNELS * BINAURAL_CHANNELS; +#else Word16 chA, chB; Word16 min_q_shift; Word16 size = BINAURAL_CHANNELS * BINAURAL_CHANNELS; @@ -4110,6 +4207,7 @@ static void matrixTransp2Mul_fx( *q_B = add( *q_B, min_q_shift ); move16(); } +#endif FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { @@ -4647,6 +4745,10 @@ static void formulate2x2MixingMatrix_fx( q_temp = sub( add( q_ky, q_GhatQ ), 31 ); /* A = Ky' * G_hat * Q * Kx (see publication) */ +#ifdef OPT_2182_MATRIX_SCALE_OPS + matrixScale_fx( tmpRe_fx, tmpIm_fx, &q_temp ); + matrixScale_fx( Kxre_fx, Kxim_fx, &q_Kx ); +#endif matrixMul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Kxre_fx, Kxim_fx, &q_Kx, Are_fx, Aim_fx, &q_A ); /* Find nearest orthonormal matrix P to A = Ky' * G_hat * Q * Kx @@ -4704,6 +4806,10 @@ static void formulate2x2MixingMatrix_fx( div_fx[0] = L_min( div_fx[0], thresh ); // q_div div_fx[1] = L_min( div_fx[1], thresh ); // q_div +#ifdef OPT_2182_MATRIX_SCALE_OPS + matrixScale_fx( Are_fx, Aim_fx, &q_A ); + matrixScale_fx( Ure_fx, Uim_fx, &q_U ); +#endif matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp ); exp = L_norm_arr( div_fx, BINAURAL_CHANNELS ); @@ -4767,10 +4873,15 @@ static void formulate2x2MixingMatrix_fx( } } +#ifdef OPT_2182_MATRIX_SCALE_OPS + matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Ure_fx, Uim_fx, &q_U, + Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */ +#else matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Ure_fx, Uim_fx, &q_U, 0 /*int Ascale*/, 0 /*int Bscale*/, Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */ +#endif /* These are the final formulas of the JAES publication M = Ky P Kx^(-1) */ #if ( BINAURAL_CHANNELS != 2 ) @@ -4901,12 +5012,22 @@ static void formulate2x2MixingMatrix_fx( } } +#ifdef OPT_2182_MATRIX_SCALE_OPS + matrixScale_fx( KyRe_fx, KyIm_fx, &q_ky ); + matrixScale_fx( Pre_fx, Pim_fx, &q_P ); +#endif matrixMul_fx( KyRe_fx, KyIm_fx, &q_ky, Pre_fx, Pim_fx, &q_P, tmpRe_fx, tmpIm_fx, &q_temp ); - +#ifdef OPT_2182_MATRIX_SCALE_OPS + matrixScale_fx( tmpRe_fx, tmpIm_fx, &q_temp ); + matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Uxre_fx, Uxim_fx, &q_Ux, + Mre_fx, Mim_fx, q_M ); +#else matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Uxre_fx, Uxim_fx, &q_Ux, 1 /*int Ascale*/, 0 /*int Bscale*/, Mre_fx, Mim_fx, q_M ); +#endif + return; }