activate chol2x2 macro to test pipeline (0d8f56c9) · Commits · SA4 / Audio / IVAS BASOP

lib_rend/ivas_dirac_dec_binaural_functions.c

+209 −15

Original line number	Diff line number	Diff line
		@@ -50,16 +50,20 @@
		//#define FIX1072_SPEEDUP_formulate2x2MixingMatrix_fx_simple
		//#define FIX1072_SPEEDUP_formulate2x2MixingMatrix_fx_reduceDivs // orange tests

		//#define FIX1072_SPEEDUP_chol2x2_fx

		#define FIX_1072_SPEEDUP_matrixMul_fx
		#define FIX_1072_SPEEDUP_matrixTransp1Mul_fx
		#define FIX_1072_SPEEDUP_matrixTransp2Mul_fx_scaleSwitch
		#define FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest
		#define FIX_1072_SPEEDUP_matrixFunctions_negateTuning

		#define FIX1072_SPEEDUP_chol2x2_fx // 6Mhz

		// SPEEDUP_matrix 012345: ACCEPT REGRESSIONS // NULL: 306.459 Mhz --> difference to SPEEDUP_matrix 012345 : 14Mhz
		//#define FIX_1072_SPEEDUP_matrixMul_fx //SPEEDUP_matrix_0
		//#define FIX_1072_SPEEDUP_matrixTransp1Mul_fx //SPEEDUP_matrix_1
		//#define FIX_1072_SPEEDUP_matrixTransp2Mul_fx_scaleSwitch //SPEEDUP_matrix_2
		//#define FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest //SPEEDUP_matrix_3
		//#define FIX_1072_SPEEDUP_matrixFunctions_negateTuning //SPEEDUP_matrix_4
		#if BINAURAL_CHANNELS==2
		//#define FIX_1072_SPEEDUP_matrixTransp1Mul_fx_IN1EQIN2_BINCH2 //SPEEDUP_matrix_5 //293.773 , 292.468
		#endif

		// issue 1072
		//#define FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT
		#ifdef FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT
		// numbers fa2a72:
		// default : 266.984
		// no-opt : 290.663 --> 23.5 Mhz Gain
		@@ -68,8 +72,6 @@
		// 1 : 282.651 --> 8 Mhz gain
		// 0 : 282.704 --> 8 Mhz gain

		//#define FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT
		#ifdef FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT
		#include <stdio.h>

		//#define FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT_0 //8Mhz with 1072 issue stream
		@@ -3575,6 +3577,7 @@ static inline Word32 matrixMul_func1( Word32 cond1, Word32 cond2, Word32 prod )
		tmp = L_negate( Mpy_32_32( L_abs( Are_fx[chA][0] ), L_abs( Bre_fx[0][chB] ) ) );
		*/
		}

		#endif


		@@ -3962,6 +3965,181 @@ static void matrixTransp1Mul_fx(
		return;
		}

		#ifdef FIX_1072_SPEEDUP_matrixTransp1Mul_fx_IN1EQIN2_BINCH2
		static void matrixTransp1Mul_fx_in1isin2(
		Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /q_A/
		Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /q_A/
		Word16 q_A,
		Word32 outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /q_out/
		Word32 outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /q_out/
		Word16 *q_out )
		{
		Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS );
		Word32 tmp1, tmp2;

		/* Create testVariables*/
		Word32 Are_fx_0_0_GE_0 = GE_32( Are_fx[0][0], 0 );
		Word32 Are_fx_0_0_LT_0 = LT_32( Are_fx[0][0], 0 );
		Word32 Are_fx_1_0_GE_0 = GE_32( Are_fx[1][0], 0 );
		Word32 Are_fx_1_0_LT_0 = LT_32( Are_fx[1][0], 0 );
		Word32 Aim_fx_0_0_GE_0 = GE_32( Aim_fx[0][0], 0 );
		Word32 Aim_fx_0_0_LT_0 = LT_32( Aim_fx[0][0], 0 );
		Word32 Aim_fx_1_0_GE_0 = GE_32( Aim_fx[1][0], 0 );
		Word32 Aim_fx_1_0_LT_0 = LT_32( Aim_fx[1][0], 0 );
		Word32 LNeg_Aim_fx_0_0 = L_negate( Aim_fx[0][0] );

		Word32 Are_fx_0_1_GE_0 = GE_32( Are_fx[0][1], 0 );
		Word32 Are_fx_0_1_LT_0 = LT_32( Are_fx[0][1], 0 );
		Word32 Are_fx_1_1_GE_0 = GE_32( Are_fx[1][1], 0 );
		Word32 Are_fx_1_1_LT_0 = LT_32( Are_fx[1][1], 0 );
		Word32 Aim_fx_0_1_GE_0 = GE_32( Aim_fx[0][1], 0 );
		Word32 Aim_fx_0_1_LT_0 = LT_32( Aim_fx[0][1], 0 );
		Word32 Aim_fx_1_1_GE_0 = GE_32( Aim_fx[1][1], 0 );
		Word32 Aim_fx_1_1_LT_0 = LT_32( Aim_fx[1][1], 0 );
		Word32 LNeg_Aim_fx_0_1 = L_negate( Aim_fx[0][1] );


		Word32 tmp3;

		{ /UNROLL FOR BINAURAL CHANNELS==2/
		/CHA=0, CHB=0/
		/* Create testVariables*/
		tmp1 = Mpy_32_32( Are_fx[0][0], Are_fx[0][0] );
		tmp2 = Mpy_32_32( Are_fx[1][0], Are_fx[1][0] );
		tmp3 = L_add( tmp1, tmp2 );

		tmp1 = Mpy_32_32( LNeg_Aim_fx_0_0, Aim_fx[0][0] );
		tmp2 = Mpy_32_32( Aim_fx[1][0], Aim_fx[1][0] );
		outRe_fx[0][0] = L_sub( tmp3, L_sub( tmp1, tmp2 ) );
		move32();

		tmp1 = matrixMul_func1( Aim_fx_0_0_GE_0 \|\| Are_fx_0_0_LT_0, Aim_fx_0_0_LT_0 \|\| Are_fx_0_0_GE_0, Mpy_32_32( LNeg_Aim_fx_0_0, Are_fx[0][0] ) );
		move32();
		move32();
		tmp2 = matrixMul_func1( Aim_fx_1_0_LT_0 \|\| Are_fx_1_0_LT_0, Aim_fx_1_0_GE_0 \|\| Are_fx_1_0_GE_0, Mpy_32_32( Aim_fx[1][0], Are_fx[1][0] ) );
		move32();
		move32();
		tmp3 = L_sub( tmp1, tmp2 );

		tmp1 = matrixMul_func1( Are_fx_0_0_LT_0 \|\| Aim_fx_0_0_LT_0, Are_fx_0_0_GE_0 \|\| Aim_fx_0_0_GE_0, Mpy_32_32( Are_fx[0][0], Aim_fx[0][0] ) );
		move32();
		move32();
		tmp2 = matrixMul_func1( Are_fx_1_0_LT_0 \|\| Aim_fx_1_0_LT_0, Are_fx_1_0_GE_0 \|\| Aim_fx_1_0_GE_0, Mpy_32_32( Are_fx[1][0], Aim_fx[1][0] ) );
		move32();
		move32();
		outIm_fx[0][0] = L_add( tmp3, L_add( tmp1, tmp2 ) );
		move32();

		/CHA=0, CHB=1/
		tmp1 = matrixMul_func1( Are_fx_0_0_LT_0 \|\| Are_fx_0_1_LT_0, Are_fx_0_0_GE_0 \|\| Are_fx_0_1_GE_0, Mpy_32_32( Are_fx[0][0], Are_fx[0][1] ) );
		move32();
		move32();
		tmp2 = matrixMul_func1( Are_fx_1_0_LT_0 \|\| Are_fx_1_1_LT_0, Are_fx_1_0_GE_0 \|\| Are_fx_1_1_GE_0, Mpy_32_32( Are_fx[1][0], Are_fx[1][1] ) );
		move32();
		move32();
		tmp3 = L_add( tmp1, tmp2 );

		tmp1 = matrixMul_func1( Aim_fx_0_0_GE_0 \|\| Aim_fx_0_1_LT_0, Aim_fx_0_0_LT_0 \|\| Aim_fx_0_1_GE_0, Mpy_32_32( LNeg_Aim_fx_0_0, Aim_fx[0][1] ) );
		move32();
		move32();
		tmp2 = matrixMul_func1( Aim_fx_1_0_LT_0 \|\| Aim_fx_1_1_LT_0, Aim_fx_1_0_GE_0 \|\| Aim_fx_1_1_GE_0, Mpy_32_32( Aim_fx[1][0], Aim_fx[1][1] ) );
		move32();
		move32();
		outRe_fx[0][1] = L_sub( tmp3, L_sub( tmp1, tmp2 ) );
		move32();

		tmp1 = matrixMul_func1( Aim_fx_0_0_GE_0 \|\| Are_fx_0_1_LT_0, Aim_fx_0_0_LT_0 \|\| Are_fx_0_1_GE_0, Mpy_32_32( LNeg_Aim_fx_0_0, Are_fx[0][1] ) );
		move32();
		move32();
		tmp2 = matrixMul_func1( Aim_fx_1_0_LT_0 \|\| Are_fx_1_1_LT_0, Aim_fx_1_0_GE_0 \|\| Are_fx_1_1_GE_0, Mpy_32_32( Aim_fx[1][0], Are_fx[1][1] ) );
		move32();
		move32();
		tmp3 = L_sub( tmp1, tmp2 );

		tmp1 = matrixMul_func1( Are_fx_0_0_LT_0 \|\| Aim_fx_0_1_LT_0, Are_fx_0_0_GE_0 \|\| Aim_fx_0_1_GE_0, Mpy_32_32( Are_fx[0][0], Aim_fx[0][1] ) );
		move32();
		move32();
		tmp2 = matrixMul_func1( Are_fx_1_0_LT_0 \|\| Aim_fx_1_1_LT_0, Are_fx_1_0_GE_0 \|\| Aim_fx_1_1_GE_0, Mpy_32_32( Are_fx[1][0], Aim_fx[1][1] ) );
		move32();
		move32();
		outIm_fx[0][1] = L_add( tmp3, L_add( tmp1, tmp2 ) );
		move32();

		/CHA=1, CHB=0/
		tmp1 = matrixMul_func1( Are_fx_0_1_LT_0 \|\| Are_fx_0_0_LT_0, Are_fx_0_1_GE_0 \|\| Are_fx_0_0_GE_0, Mpy_32_32( Are_fx[0][1], Are_fx[0][0] ) );
		move32();
		move32();
		tmp2 = matrixMul_func1( Are_fx_1_1_LT_0 \|\| Are_fx_1_0_LT_0, Are_fx_1_1_GE_0 \|\| Are_fx_1_0_GE_0, Mpy_32_32( Are_fx[1][1], Are_fx[1][0] ) );
		move32();
		move32();
		tmp3 = L_add( tmp1, tmp2 );

		tmp1 = matrixMul_func1( Aim_fx_0_1_GE_0 \|\| Aim_fx_0_0_LT_0, Aim_fx_0_1_LT_0 \|\| Aim_fx_0_0_GE_0, Mpy_32_32( LNeg_Aim_fx_0_1, Aim_fx[0][0] ) );
		move32();
		move32();
		tmp2 = matrixMul_func1( Aim_fx_1_1_LT_0 \|\| Aim_fx_1_0_LT_0, Aim_fx_1_1_GE_0 \|\| Aim_fx_1_0_GE_0, Mpy_32_32( Aim_fx[1][1], Aim_fx[1][0] ) );
		move32();
		move32();
		outRe_fx[1][0] = L_sub( tmp3, L_sub( tmp1, tmp2 ) );
		move32();

		tmp1 = matrixMul_func1( Aim_fx_0_1_GE_0 \|\| Are_fx_0_0_LT_0, Aim_fx_0_1_LT_0 \|\| Are_fx_0_0_GE_0, Mpy_32_32( LNeg_Aim_fx_0_1, Are_fx[0][0] ) );
		move32();
		move32();
		tmp2 = matrixMul_func1( Aim_fx_1_1_LT_0 \|\| Are_fx_1_0_LT_0, Aim_fx_1_1_GE_0 \|\| Are_fx_1_0_GE_0, Mpy_32_32( Aim_fx[1][1], Are_fx[1][0] ) );
		move32();
		move32();
		tmp3 = L_sub( tmp1, tmp2 );

		tmp1 = matrixMul_func1( Are_fx_0_1_LT_0 \|\| Aim_fx_0_0_LT_0, Are_fx_0_1_GE_0 \|\| Aim_fx_0_0_GE_0, Mpy_32_32( Are_fx[0][1], Aim_fx[0][0] ) );
		move32();
		move32();
		tmp2 = matrixMul_func1( Are_fx_1_1_LT_0 \|\| Aim_fx_1_0_LT_0, Are_fx_1_1_GE_0 \|\| Aim_fx_1_0_GE_0, Mpy_32_32( Are_fx[1][1], Aim_fx[1][0] ) );
		move32();
		move32();
		outIm_fx[1][0] = L_add( tmp3, L_add( tmp1, tmp2 ) );
		move32();

		/CHA=1, CHB=1/
		/* Create testVariables*/
		tmp1 = Mpy_32_32( Are_fx[0][1], Are_fx[0][1] );
		tmp2 = Mpy_32_32( Are_fx[1][1], Are_fx[1][1] );
		tmp3 = L_add( tmp1, tmp2 );

		tmp1 = Mpy_32_32( LNeg_Aim_fx_0_1, Aim_fx[0][1] );
		tmp2 = Mpy_32_32( Aim_fx[1][1], Aim_fx[1][1] );
		outRe_fx[1][1] = L_sub( tmp3, L_sub( tmp1, tmp2 ) );
		move32();

		tmp1 = matrixMul_func1( Aim_fx_0_1_GE_0 \|\| Are_fx_0_1_LT_0, Aim_fx_0_1_LT_0 \|\| Are_fx_0_1_GE_0, Mpy_32_32( LNeg_Aim_fx_0_1, Are_fx[0][1] ) );
		move32();
		move32();
		tmp2 = matrixMul_func1( Aim_fx_1_1_LT_0 \|\| Are_fx_1_1_LT_0, Aim_fx_1_1_GE_0 \|\| Are_fx_1_1_GE_0, Mpy_32_32( Aim_fx[1][1], Are_fx[1][1] ) );
		move32();
		move32();
		tmp3 = L_sub( tmp1, tmp2 );

		tmp1 = matrixMul_func1( Are_fx_0_1_LT_0 \|\| Aim_fx_0_1_LT_0, Are_fx_0_1_GE_0 \|\| Aim_fx_0_1_GE_0, Mpy_32_32( Are_fx[0][1], Aim_fx[0][1] ) );
		move32();
		move32();
		tmp2 = matrixMul_func1( Are_fx_1_1_LT_0 \|\| Aim_fx_1_1_LT_0, Are_fx_1_1_GE_0 \|\| Aim_fx_1_1_GE_0, Mpy_32_32( Are_fx[1][1], Aim_fx[1][1] ) );
		move32();
		move32();
		outIm_fx[1][1] = L_add( tmp3, L_add( tmp1, tmp2 ) );
		move32();
		}
		*q_out = sub( add( q_A, q_A ), 31 );

		move16();
		if ( L_and( is_zero_arr( outRe_fx[0], size ), is_zero_arr( outIm_fx[0], size ) ) )
		{
		*q_out = Q31;
		move16();
		}
		return;
		}
		#endif /FIX_1072_SPEEDUP_matrixTransp1Mul_fx_IN1EQIN2_BINCH2/

		static void matrixTransp2Mul_fx(
		Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /q_A/
		Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /q_A/
		@@ -4664,6 +4842,7 @@ static void formulate2x2MixingMatrix_fx(
		pop_wmops(); //( "IDDB_2x2Matrix_Part1" );
		push_wmops( "IDDB_2x2Matrix_Part2" );

		push_wmops( "IDDB_2x2Matrix_Part2.1" );
		/* Cholesky decomposition of target / output covariance matrix */
		chol2x2_fx( E_out1, E_out2, q_eout, Cout_re, Cout_im, q_cout, KyRe_fx, KyIm_fx, &q_ky );

		@@ -4695,6 +4874,8 @@ static void formulate2x2MixingMatrix_fx(
		temp = Mpy_32_32( E_in2, 2147484 ); // 2147484 = 0.001f in Q31
		temp = L_max( temp, E_in1 );

		pop_wmops(); //push_wmops( "IDDB_2x2Matrix_Part2.1" );
		push_wmops( "IDDB_2x2Matrix_Part2.2" );
		#if defined( FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT ) && defined( FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT_2 )
		/IF (E_out1 == 0)/
		{
		@@ -4818,6 +4999,8 @@ static void formulate2x2MixingMatrix_fx(
		move32();
		Ghat_fx[1] = L_shr( Ghat_fx[1], sub( sub( 31, exp1 ), q_Ghat ) ); // q_Ghat
		move32();
		pop_wmops(); //push_wmops( "IDDB_2x2Matrix_Part2.2" );
		push_wmops( "IDDB_2x2Matrix_Part2.3" );

		/* Matrix multiplication, tmp = Ky' * G_hat * Q */
		FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
		@@ -4848,12 +5031,19 @@ static void formulate2x2MixingMatrix_fx(

		q_temp = sub( add( q_ky, q_GhatQ ), 31 );

		pop_wmops(); //push_wmops( "IDDB_2x2Matrix_Part2.3" );
		push_wmops( "IDDB_2x2Matrix_Part2.4" );

		/* A = Ky' * G_hat * Q * Kx (see publication) */
		matrixMul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Kxre_fx, Kxim_fx, &q_Kx, Are_fx, Aim_fx, &q_A );

		/* Find nearest orthonormal matrix P to A = Ky' * G_hat * Q * Kx
		For matrix A that is P = A(A'A)^0.5 */
		#ifdef FIX_1072_SPEEDUP_matrixTransp1Mul_fx_IN1EQIN2_BINCH2
		matrixTransp1Mul_fx_in1isin2( Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp );
		#else
		matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp );
		#endif

		eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D );

		@@ -4914,7 +5104,8 @@ static void formulate2x2MixingMatrix_fx(
		div_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
		move32();
		#endif /FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT/

		pop_wmops(); //push_wmops( "IDDB_2x2Matrix_Part2.4" );
		push_wmops( "IDDB_2x2Matrix_Part2.5" );
		q_div = sub( 31, s_max( exp, exp1 ) );

		div_fx[0] = L_shr( div_fx[0], sub( sub( 31, exp ), q_div ) ); // q_div
		@@ -4946,6 +5137,8 @@ static void formulate2x2MixingMatrix_fx(
		scale_sig32( div_fx, BINAURAL_CHANNELS, exp );
		q_div = add( q_div, exp );

		pop_wmops(); //push_wmops( "IDDB_2x2Matrix_Part2.5" );
		push_wmops( "IDDB_2x2Matrix_Part2.6" );
		FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
		{
		FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
		@@ -5015,7 +5208,8 @@ static void formulate2x2MixingMatrix_fx(
		}
		}
		}

		pop_wmops(); //push_wmops( "IDDB_2x2Matrix_Part2.6" );
		push_wmops( "IDDB_2x2Matrix_Part2.7" );
		minimum_s( hdrm_re[0], i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ), &exp );
		q_temp = exp;
		move16();
		@@ -5039,7 +5233,7 @@ static void formulate2x2MixingMatrix_fx(
		0 /int Bscale/,
		#endif
		Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */

		pop_wmops(); //push_wmops( "IDDB_2x2Matrix_Part2.7" );
		pop_wmops(); //( "IDDB_2x2Matrix_Part2" );
		push_wmops( "IDDB_2x2Matrix_Part3" );