Optimise matrix function. (0cc97443) · Commits · SA4 / Audio / IVAS BASOP

lib_com/options.h

+4 −3

Original line number	Diff line number	Diff line
		@@ -109,6 +109,7 @@
		#define OPT_2239_IVAS_FILTER_PROCESS /* Dolby: Issue 2239, optimize ivas_filter_process_fx. */
		#define NONBE_OPT_2193_EIG2X2 /* Dolby: Issue 2193, optimize eig2x2_fx. */
		#define BE_FIX_2240_COMPUTE_COV_MTC_FX_FAST /* FhG: Speeds up covariance calculation e.g. 60 WMOPS for encoding -mc 7_1_4 24400 48 */
		#define OPT_2269_IVAS_DIRAC_DEC_BINAURAL_FUNCTIONS /* Dolby: Issue 2269, optimize IVAS DIRAC DEC binaural functions. */

		/* #################### End BASOP optimization switches ############################ */

lib_rend/ivas_dirac_dec_binaural_functions_fx.c

+308 −8

Original line number	Diff line number	Diff line
		@@ -4185,9 +4185,39 @@ static void matrixDiagMul_fx(
		Word32 imOut_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /q_Out/
		Word16 *q_Out )
		{
		Word16 chA, chB;
		Word32 not_zero = 0;

		#ifdef OPT_2269_IVAS_DIRAC_DEC_BINAURAL_FUNCTIONS
		reOut_fx[0][0] = Mpy_32_32( reIn_fx[0][0], D_fx[0] );
		imOut_fx[0][0] = Mpy_32_32( imIn_fx[0][0], D_fx[0] );
		move32();
		move32();
		not_zero = L_or( not_zero, reOut_fx[0][0] );
		not_zero = L_or( not_zero, imOut_fx[0][0] );

		reOut_fx[0][1] = Mpy_32_32( reIn_fx[0][1], D_fx[1] );
		imOut_fx[0][1] = Mpy_32_32( imIn_fx[0][1], D_fx[1] );
		move32();
		move32();
		not_zero = L_or( not_zero, reOut_fx[0][1] );
		not_zero = L_or( not_zero, imOut_fx[0][1] );

		reOut_fx[1][0] = Mpy_32_32( reIn_fx[1][0], D_fx[0] );
		imOut_fx[1][0] = Mpy_32_32( imIn_fx[1][0], D_fx[0] );
		move32();
		move32();
		not_zero = L_or( not_zero, reOut_fx[1][0] );
		not_zero = L_or( not_zero, imOut_fx[1][0] );

		reOut_fx[1][1] = Mpy_32_32( reIn_fx[1][1], D_fx[1] );
		imOut_fx[1][1] = Mpy_32_32( imIn_fx[1][1], D_fx[1] );
		move32();
		move32();
		not_zero = L_or( not_zero, reOut_fx[1][1] );
		not_zero = L_or( not_zero, imOut_fx[1][1] );
		#else
		Word16 chA, chB;

		FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
		{
		FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
		@@ -4200,7 +4230,7 @@ static void matrixDiagMul_fx(
		not_zero = L_or( not_zero, imOut_fx[chA][chB] );
		}
		}

		#endif
		*q_Out = sub( add( q_In, q_D ), 31 );
		move16();
		if ( !not_zero )
		@@ -4217,6 +4247,78 @@ static void matrixScale_fx(
		Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /q_A/
		Word16 *q_A )
		{
		#ifdef OPT_2269_IVAS_DIRAC_DEC_BINAURAL_FUNCTIONS
		Word16 shift_tmp;
		Word16 shift = 31;
		move16();

		shift_tmp = norm_l( Are_fx[0][0] );
		if ( Are_fx[0][0] != 0 )
		{
		shift = s_min( shift, shift_tmp );
		}
		shift_tmp = norm_l( Are_fx[0][1] );
		if ( Are_fx[0][1] != 0 )
		{
		shift = s_min( shift, shift_tmp );
		}
		shift_tmp = norm_l( Are_fx[1][0] );
		if ( Are_fx[1][0] != 0 )
		{
		shift = s_min( shift, shift_tmp );
		}
		shift_tmp = norm_l( Are_fx[1][1] );
		if ( Are_fx[1][1] != 0 )
		{
		shift = s_min( shift, shift_tmp );
		}

		shift_tmp = norm_l( Aim_fx[0][0] );
		if ( Aim_fx[0][0] != 0 )
		{
		shift = s_min( shift, shift_tmp );
		}
		shift_tmp = norm_l( Aim_fx[0][1] );
		if ( Aim_fx[0][1] != 0 )
		{
		shift = s_min( shift, shift_tmp );
		}
		shift_tmp = norm_l( Aim_fx[1][0] );
		if ( Aim_fx[1][0] != 0 )
		{
		shift = s_min( shift, shift_tmp );
		}
		shift_tmp = norm_l( Aim_fx[1][1] );
		if ( Aim_fx[1][1] != 0 )
		{
		shift = s_min( shift, shift_tmp );
		}

		shift = sub( shift, 1 );

		IF( shift != 0 )
		{
		Are_fx[0][0] = L_shl( Are_fx[0][0], shift );
		Are_fx[0][1] = L_shl( Are_fx[0][1], shift );
		Are_fx[1][0] = L_shl( Are_fx[1][0], shift );
		Are_fx[1][1] = L_shl( Are_fx[1][1], shift );
		Aim_fx[0][0] = L_shl( Aim_fx[0][0], shift );
		Aim_fx[0][1] = L_shl( Aim_fx[0][1], shift );
		Aim_fx[1][0] = L_shl( Aim_fx[1][0], shift );
		Aim_fx[1][1] = L_shl( Aim_fx[1][1], shift );
		move32();
		move32();
		move32();
		move32();
		move32();
		move32();
		move32();
		move32();
		}

		q_A = add( q_A, shift );
		move16();
		#else
		Word16 shift;
		Word16 size = BINAURAL_CHANNELS * BINAURAL_CHANNELS;
		shift = sub( s_min( L_norm_arr( Are_fx[0], size ), L_norm_arr( Aim_fx[0], size ) ), 1 );
		@@ -4224,6 +4326,7 @@ static void matrixScale_fx(
		scale_sig32( Aim_fx[0], size, shift );
		q_A = add( q_A, shift );
		move16();
		#endif
		}

		static void matrixMul_fx(
		@@ -4237,8 +4340,61 @@ static void matrixMul_fx(
		Word32 outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /q_out/
		Word16 *q_out )
		{
		Word16 chA, chB;
		Word32 not_zero = 0;
		#if defined( OPT_2269_IVAS_DIRAC_DEC_BINAURAL_FUNCTIONS ) && !defined( IVAS_ENH64_CADENCE_CHANGES )
		outRe_fx[0][0] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Bre_fx[0][0] ),
		Are_fx[0][1], Bre_fx[1][0] ),
		Aim_fx[0][0], Bim_fx[0][0] ),
		Aim_fx[0][1], Bim_fx[1][0] );
		move32();
		outIm_fx[0][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Aim_fx[0][0], Bre_fx[0][0] ),
		Aim_fx[0][1], Bre_fx[1][0] ),
		Are_fx[0][0], Bim_fx[0][0] ),
		Are_fx[0][1], Bim_fx[1][0] );
		move32();
		not_zero = L_or( not_zero, outRe_fx[0][0] );
		not_zero = L_or( not_zero, outIm_fx[0][0] );

		outRe_fx[0][1] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Bre_fx[0][1] ),
		Are_fx[0][1], Bre_fx[1][1] ),
		Aim_fx[0][0], Bim_fx[0][1] ),
		Aim_fx[0][1], Bim_fx[1][1] );
		move32();
		outIm_fx[0][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Aim_fx[0][0], Bre_fx[0][1] ),
		Aim_fx[0][1], Bre_fx[1][1] ),
		Are_fx[0][0], Bim_fx[0][1] ),
		Are_fx[0][1], Bim_fx[1][1] );
		move32();
		not_zero = L_or( not_zero, outRe_fx[0][1] );
		not_zero = L_or( not_zero, outIm_fx[0][1] );

		outRe_fx[1][0] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[1][0], Bre_fx[0][0] ),
		Are_fx[1][1], Bre_fx[1][0] ),
		Aim_fx[1][0], Bim_fx[0][0] ),
		Aim_fx[1][1], Bim_fx[1][0] );
		move32();
		outIm_fx[1][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Aim_fx[1][0], Bre_fx[0][0] ),
		Aim_fx[1][1], Bre_fx[1][0] ),
		Are_fx[1][0], Bim_fx[0][0] ),
		Are_fx[1][1], Bim_fx[1][0] );
		move32();
		not_zero = L_or( not_zero, outRe_fx[1][0] );
		not_zero = L_or( not_zero, outIm_fx[1][0] );

		outRe_fx[1][1] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[1][0], Bre_fx[0][1] ),
		Are_fx[1][1], Bre_fx[1][1] ),
		Aim_fx[1][0], Bim_fx[0][1] ),
		Aim_fx[1][1], Bim_fx[1][1] );
		move32();
		outIm_fx[1][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Aim_fx[1][0], Bre_fx[0][1] ),
		Aim_fx[1][1], Bre_fx[1][1] ),
		Are_fx[1][0], Bim_fx[0][1] ),
		Are_fx[1][1], Bim_fx[1][1] );
		move32();
		not_zero = L_or( not_zero, outRe_fx[1][1] );
		not_zero = L_or( not_zero, outIm_fx[1][1] );
		#else
		Word16 chA, chB;

		FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
		{
		@@ -4269,6 +4425,7 @@ static void matrixMul_fx(
		not_zero = L_or( not_zero, outIm_fx[chA][chB] );
		}
		}
		#endif

		q_out = sub( add( q_A, *q_B ), 31 );
		move16();
		@@ -4292,8 +4449,6 @@ static void matrixTransp1Mul_fx(
		Word32 outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /q_out/
		Word16 *q_out )
		{
		Word16 chA, chB;

		Word64 tmp_outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
		Word64 tmp_outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
		Word64 tmp64;
		@@ -4302,6 +4457,96 @@ static void matrixTransp1Mul_fx(
		common_lsh = sub( 63, q );
		move16();

		Word32 not_zero = 0;
		move32();

		#ifdef OPT_2269_IVAS_DIRAC_DEC_BINAURAL_FUNCTIONS
		tmp64 = W_mult_32_32( Are_fx[0][0], Bre_fx[0][0] );
		tmp64 = W_mac_32_32( tmp64, Are_fx[1][0], Bre_fx[1][0] );
		tmp64 = W_mac_32_32( tmp64, Aim_fx[0][0], Bim_fx[0][0] );
		tmp_outRe_fx[0][0] = W_mac_32_32( tmp64, Aim_fx[1][0], Bim_fx[1][0] );
		move64();
		common_lsh = s_min( common_lsh, W_norm( tmp_outRe_fx[0][0] ) );

		tmp64 = W_mult_32_32( Aim_fx[0][0], Bre_fx[0][0] );
		tmp64 = W_mac_32_32( tmp64, Aim_fx[1][0], Bre_fx[1][0] );
		tmp64 = W_mac_32_32( W_neg( tmp64 ), Are_fx[0][0], Bim_fx[0][0] );
		tmp_outIm_fx[0][0] = W_mac_32_32( tmp64, Are_fx[1][0], Bim_fx[1][0] );
		move64();
		common_lsh = s_min( common_lsh, W_norm( tmp_outIm_fx[0][0] ) );

		tmp64 = W_mult_32_32( Are_fx[0][0], Bre_fx[0][1] );
		tmp64 = W_mac_32_32( tmp64, Are_fx[1][0], Bre_fx[1][1] );
		tmp64 = W_mac_32_32( tmp64, Aim_fx[0][0], Bim_fx[0][1] );
		tmp_outRe_fx[0][1] = W_mac_32_32( tmp64, Aim_fx[1][0], Bim_fx[1][1] );
		move64();
		common_lsh = s_min( common_lsh, W_norm( tmp_outRe_fx[0][1] ) );

		tmp64 = W_mult_32_32( Aim_fx[0][0], Bre_fx[0][1] );
		tmp64 = W_mac_32_32( tmp64, Aim_fx[1][0], Bre_fx[1][1] );
		tmp64 = W_mac_32_32( W_neg( tmp64 ), Are_fx[0][0], Bim_fx[0][1] );
		tmp_outIm_fx[0][1] = W_mac_32_32( tmp64, Are_fx[1][0], Bim_fx[1][1] );
		move64();
		common_lsh = s_min( common_lsh, W_norm( tmp_outIm_fx[0][1] ) );

		tmp64 = W_mult_32_32( Are_fx[0][1], Bre_fx[0][0] );
		tmp64 = W_mac_32_32( tmp64, Are_fx[1][1], Bre_fx[1][0] );
		tmp64 = W_mac_32_32( tmp64, Aim_fx[0][1], Bim_fx[0][0] );
		tmp_outRe_fx[1][0] = W_mac_32_32( tmp64, Aim_fx[1][1], Bim_fx[1][0] );
		move64();
		common_lsh = s_min( common_lsh, W_norm( tmp_outRe_fx[1][0] ) );

		tmp64 = W_mult_32_32( Aim_fx[0][1], Bre_fx[0][0] );
		tmp64 = W_mac_32_32( tmp64, Aim_fx[1][1], Bre_fx[1][0] );
		tmp64 = W_mac_32_32( W_neg( tmp64 ), Are_fx[0][1], Bim_fx[0][0] );
		tmp_outIm_fx[1][0] = W_mac_32_32( tmp64, Are_fx[1][1], Bim_fx[1][0] );
		move64();
		common_lsh = s_min( common_lsh, W_norm( tmp_outIm_fx[1][0] ) );

		tmp64 = W_mult_32_32( Are_fx[0][1], Bre_fx[0][1] );
		tmp64 = W_mac_32_32( tmp64, Are_fx[1][1], Bre_fx[1][1] );
		tmp64 = W_mac_32_32( tmp64, Aim_fx[0][1], Bim_fx[0][1] );
		tmp_outRe_fx[1][1] = W_mac_32_32( tmp64, Aim_fx[1][1], Bim_fx[1][1] );
		move64();
		common_lsh = s_min( common_lsh, W_norm( tmp_outRe_fx[1][1] ) );

		tmp64 = W_mult_32_32( Aim_fx[0][1], Bre_fx[0][1] );
		tmp64 = W_mac_32_32( tmp64, Aim_fx[1][1], Bre_fx[1][1] );
		tmp64 = W_mac_32_32( W_neg( tmp64 ), Are_fx[0][1], Bim_fx[0][1] );
		tmp_outIm_fx[1][1] = W_mac_32_32( tmp64, Are_fx[1][1], Bim_fx[1][1] );
		move64();
		common_lsh = s_min( common_lsh, W_norm( tmp_outIm_fx[1][1] ) );

		outRe_fx[0][0] = W_extract_h( W_shl( tmp_outRe_fx[0][0], common_lsh ) );
		move32();
		outIm_fx[0][0] = W_extract_h( W_shl( tmp_outIm_fx[0][0], common_lsh ) );
		move32();
		not_zero = L_or( not_zero, outRe_fx[0][0] );
		not_zero = L_or( not_zero, outIm_fx[0][0] );

		outRe_fx[0][1] = W_extract_h( W_shl( tmp_outRe_fx[0][1], common_lsh ) );
		move32();
		outIm_fx[0][1] = W_extract_h( W_shl( tmp_outIm_fx[0][1], common_lsh ) );
		move32();
		not_zero = L_or( not_zero, outRe_fx[0][1] );
		not_zero = L_or( not_zero, outIm_fx[0][1] );

		outRe_fx[1][0] = W_extract_h( W_shl( tmp_outRe_fx[1][0], common_lsh ) );
		move32();
		outIm_fx[1][0] = W_extract_h( W_shl( tmp_outIm_fx[1][0], common_lsh ) );
		move32();
		not_zero = L_or( not_zero, outRe_fx[1][0] );
		not_zero = L_or( not_zero, outIm_fx[1][0] );

		outRe_fx[1][1] = W_extract_h( W_shl( tmp_outRe_fx[1][1], common_lsh ) );
		move32();
		outIm_fx[1][1] = W_extract_h( W_shl( tmp_outIm_fx[1][1], common_lsh ) );
		move32();
		not_zero = L_or( not_zero, outRe_fx[1][1] );
		not_zero = L_or( not_zero, outIm_fx[1][1] );
		#else
		Word16 chA, chB;

		FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
		{
		FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
		@@ -4321,7 +4566,7 @@ static void matrixTransp1Mul_fx(
		common_lsh = s_min( common_lsh, W_norm( tmp_outIm_fx[chA][chB] ) );
		}
		}
		Word32 not_zero = 0;

		FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
		{
		FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
		@@ -4334,6 +4579,8 @@ static void matrixTransp1Mul_fx(
		not_zero = L_or( not_zero, outIm_fx[chA][chB] );
		}
		}
		#endif

		*q_out = sub( add( q, common_lsh ), 32 );
		move16();
		if ( !not_zero )
		@@ -4356,9 +4603,61 @@ static void matrixTransp2Mul_fx(
		Word32 outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /q_out/
		Word16 *q_out )
		{
		Word16 chA, chB;
		// Word16 size = BINAURAL_CHANNELS * BINAURAL_CHANNELS;
		Word32 not_zero = 0;
		#if defined( OPT_2269_IVAS_DIRAC_DEC_BINAURAL_FUNCTIONS ) && !defined( IVAS_ENH64_CADENCE_CHANGES )
		outRe_fx[0][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Bre_fx[0][0] ),
		Are_fx[0][1], Bre_fx[0][1] ),
		Aim_fx[0][0], Bim_fx[0][0] ),
		Aim_fx[0][1], Bim_fx[0][1] );
		move32();
		outIm_fx[0][0] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Aim_fx[0][0], Bre_fx[0][0] ),
		Aim_fx[0][1], Bre_fx[0][1] ),
		Are_fx[0][0], Bim_fx[0][0] ),
		Are_fx[0][1], Bim_fx[0][1] );
		move32();
		not_zero = L_or( not_zero, outRe_fx[0][0] );
		not_zero = L_or( not_zero, outIm_fx[0][0] );

		outRe_fx[0][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Bre_fx[1][0] ),
		Are_fx[0][1], Bre_fx[1][1] ),
		Aim_fx[0][0], Bim_fx[1][0] ),
		Aim_fx[0][1], Bim_fx[1][1] );
		move32();
		outIm_fx[0][1] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Aim_fx[0][0], Bre_fx[1][0] ),
		Aim_fx[0][1], Bre_fx[1][1] ),
		Are_fx[0][0], Bim_fx[1][0] ),
		Are_fx[0][1], Bim_fx[1][1] );
		move32();
		not_zero = L_or( not_zero, outRe_fx[0][1] );
		not_zero = L_or( not_zero, outIm_fx[0][1] );

		outRe_fx[1][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[1][0], Bre_fx[0][0] ),
		Are_fx[1][1], Bre_fx[0][1] ),
		Aim_fx[1][0], Bim_fx[0][0] ),
		Aim_fx[1][1], Bim_fx[0][1] );
		move32();
		outIm_fx[1][0] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Aim_fx[1][0], Bre_fx[0][0] ),
		Aim_fx[1][1], Bre_fx[0][1] ),
		Are_fx[1][0], Bim_fx[0][0] ),
		Are_fx[1][1], Bim_fx[0][1] );
		move32();
		not_zero = L_or( not_zero, outRe_fx[1][0] );
		not_zero = L_or( not_zero, outIm_fx[1][0] );

		outRe_fx[1][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[1][0], Bre_fx[1][0] ),
		Are_fx[1][1], Bre_fx[1][1] ),
		Aim_fx[1][0], Bim_fx[1][0] ),
		Aim_fx[1][1], Bim_fx[1][1] );
		move32();
		outIm_fx[1][1] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Aim_fx[1][0], Bre_fx[1][0] ),
		Aim_fx[1][1], Bre_fx[1][1] ),
		Are_fx[1][0], Bim_fx[1][0] ),
		Are_fx[1][1], Bim_fx[1][1] );
		move32();
		not_zero = L_or( not_zero, outRe_fx[1][1] );
		not_zero = L_or( not_zero, outIm_fx[1][1] );
		#else
		Word16 chA, chB;
		FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
		{
		FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
		@@ -4388,6 +4687,7 @@ static void matrixTransp2Mul_fx(
		not_zero = L_or( not_zero, outIm_fx[chA][chB] );
		}
		}
		#endif

		q_out = sub( add( q_A, *q_B ), 31 );
		move16();