Loading lib_rend/ivas_dirac_dec_binaural_functions.c +209 −15 Original line number Diff line number Diff line Loading @@ -50,16 +50,20 @@ //#define FIX1072_SPEEDUP_formulate2x2MixingMatrix_fx_simple //#define FIX1072_SPEEDUP_formulate2x2MixingMatrix_fx_reduceDivs // orange tests //#define FIX1072_SPEEDUP_chol2x2_fx #define FIX_1072_SPEEDUP_matrixMul_fx #define FIX_1072_SPEEDUP_matrixTransp1Mul_fx #define FIX_1072_SPEEDUP_matrixTransp2Mul_fx_scaleSwitch #define FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest #define FIX_1072_SPEEDUP_matrixFunctions_negateTuning #define FIX1072_SPEEDUP_chol2x2_fx // 6Mhz // SPEEDUP_matrix 012345: ACCEPT REGRESSIONS // NULL: 306.459 Mhz --> difference to SPEEDUP_matrix 012345 : 14Mhz //#define FIX_1072_SPEEDUP_matrixMul_fx //SPEEDUP_matrix_0 //#define FIX_1072_SPEEDUP_matrixTransp1Mul_fx //SPEEDUP_matrix_1 //#define FIX_1072_SPEEDUP_matrixTransp2Mul_fx_scaleSwitch //SPEEDUP_matrix_2 //#define FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest //SPEEDUP_matrix_3 //#define FIX_1072_SPEEDUP_matrixFunctions_negateTuning //SPEEDUP_matrix_4 #if BINAURAL_CHANNELS==2 //#define FIX_1072_SPEEDUP_matrixTransp1Mul_fx_IN1EQIN2_BINCH2 //SPEEDUP_matrix_5 //293.773 , 292.468 #endif // issue 1072 //#define FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT #ifdef FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT // numbers fa2a72: // default : 266.984 // no-opt : 290.663 --> 23.5 Mhz Gain Loading @@ -68,8 +72,6 @@ // 1 : 282.651 --> 8 Mhz gain // 0 : 282.704 --> 8 Mhz gain //#define FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT #ifdef FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT #include <stdio.h> //#define FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT_0 //8Mhz with 1072 issue stream Loading Loading @@ -3575,6 +3577,7 @@ static inline Word32 matrixMul_func1( Word32 cond1, Word32 cond2, Word32 prod ) tmp = L_negate( Mpy_32_32( L_abs( Are_fx[chA][0] ), L_abs( Bre_fx[0][chB] ) ) ); */ } #endif Loading Loading @@ -3962,6 +3965,181 @@ static void matrixTransp1Mul_fx( return; } #ifdef FIX_1072_SPEEDUP_matrixTransp1Mul_fx_IN1EQIN2_BINCH2 static void matrixTransp1Mul_fx_in1isin2( Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ Word16 q_A, Word32 outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/ Word32 outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/ Word16 *q_out ) { Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ); Word32 tmp1, tmp2; /* Create testVariables*/ Word32 Are_fx_0_0_GE_0 = GE_32( Are_fx[0][0], 0 ); Word32 Are_fx_0_0_LT_0 = LT_32( Are_fx[0][0], 0 ); Word32 Are_fx_1_0_GE_0 = GE_32( Are_fx[1][0], 0 ); Word32 Are_fx_1_0_LT_0 = LT_32( Are_fx[1][0], 0 ); Word32 Aim_fx_0_0_GE_0 = GE_32( Aim_fx[0][0], 0 ); Word32 Aim_fx_0_0_LT_0 = LT_32( Aim_fx[0][0], 0 ); Word32 Aim_fx_1_0_GE_0 = GE_32( Aim_fx[1][0], 0 ); Word32 Aim_fx_1_0_LT_0 = LT_32( Aim_fx[1][0], 0 ); Word32 LNeg_Aim_fx_0_0 = L_negate( Aim_fx[0][0] ); Word32 Are_fx_0_1_GE_0 = GE_32( Are_fx[0][1], 0 ); Word32 Are_fx_0_1_LT_0 = LT_32( Are_fx[0][1], 0 ); Word32 Are_fx_1_1_GE_0 = GE_32( Are_fx[1][1], 0 ); Word32 Are_fx_1_1_LT_0 = LT_32( Are_fx[1][1], 0 ); Word32 Aim_fx_0_1_GE_0 = GE_32( Aim_fx[0][1], 0 ); Word32 Aim_fx_0_1_LT_0 = LT_32( Aim_fx[0][1], 0 ); Word32 Aim_fx_1_1_GE_0 = GE_32( Aim_fx[1][1], 0 ); Word32 Aim_fx_1_1_LT_0 = LT_32( Aim_fx[1][1], 0 ); Word32 LNeg_Aim_fx_0_1 = L_negate( Aim_fx[0][1] ); Word32 tmp3; { /*UNROLL FOR BINAURAL CHANNELS==2*/ /*CHA=0, CHB=0*/ /* Create testVariables*/ tmp1 = Mpy_32_32( Are_fx[0][0], Are_fx[0][0] ); tmp2 = Mpy_32_32( Are_fx[1][0], Are_fx[1][0] ); tmp3 = L_add( tmp1, tmp2 ); tmp1 = Mpy_32_32( LNeg_Aim_fx_0_0, Aim_fx[0][0] ); tmp2 = Mpy_32_32( Aim_fx[1][0], Aim_fx[1][0] ); outRe_fx[0][0] = L_sub( tmp3, L_sub( tmp1, tmp2 ) ); move32(); tmp1 = matrixMul_func1( Aim_fx_0_0_GE_0 || Are_fx_0_0_LT_0, Aim_fx_0_0_LT_0 || Are_fx_0_0_GE_0, Mpy_32_32( LNeg_Aim_fx_0_0, Are_fx[0][0] ) ); move32(); move32(); tmp2 = matrixMul_func1( Aim_fx_1_0_LT_0 || Are_fx_1_0_LT_0, Aim_fx_1_0_GE_0 || Are_fx_1_0_GE_0, Mpy_32_32( Aim_fx[1][0], Are_fx[1][0] ) ); move32(); move32(); tmp3 = L_sub( tmp1, tmp2 ); tmp1 = matrixMul_func1( Are_fx_0_0_LT_0 || Aim_fx_0_0_LT_0, Are_fx_0_0_GE_0 || Aim_fx_0_0_GE_0, Mpy_32_32( Are_fx[0][0], Aim_fx[0][0] ) ); move32(); move32(); tmp2 = matrixMul_func1( Are_fx_1_0_LT_0 || Aim_fx_1_0_LT_0, Are_fx_1_0_GE_0 || Aim_fx_1_0_GE_0, Mpy_32_32( Are_fx[1][0], Aim_fx[1][0] ) ); move32(); move32(); outIm_fx[0][0] = L_add( tmp3, L_add( tmp1, tmp2 ) ); move32(); /*CHA=0, CHB=1*/ tmp1 = matrixMul_func1( Are_fx_0_0_LT_0 || Are_fx_0_1_LT_0, Are_fx_0_0_GE_0 || Are_fx_0_1_GE_0, Mpy_32_32( Are_fx[0][0], Are_fx[0][1] ) ); move32(); move32(); tmp2 = matrixMul_func1( Are_fx_1_0_LT_0 || Are_fx_1_1_LT_0, Are_fx_1_0_GE_0 || Are_fx_1_1_GE_0, Mpy_32_32( Are_fx[1][0], Are_fx[1][1] ) ); move32(); move32(); tmp3 = L_add( tmp1, tmp2 ); tmp1 = matrixMul_func1( Aim_fx_0_0_GE_0 || Aim_fx_0_1_LT_0, Aim_fx_0_0_LT_0 || Aim_fx_0_1_GE_0, Mpy_32_32( LNeg_Aim_fx_0_0, Aim_fx[0][1] ) ); move32(); move32(); tmp2 = matrixMul_func1( Aim_fx_1_0_LT_0 || Aim_fx_1_1_LT_0, Aim_fx_1_0_GE_0 || Aim_fx_1_1_GE_0, Mpy_32_32( Aim_fx[1][0], Aim_fx[1][1] ) ); move32(); move32(); outRe_fx[0][1] = L_sub( tmp3, L_sub( tmp1, tmp2 ) ); move32(); tmp1 = matrixMul_func1( Aim_fx_0_0_GE_0 || Are_fx_0_1_LT_0, Aim_fx_0_0_LT_0 || Are_fx_0_1_GE_0, Mpy_32_32( LNeg_Aim_fx_0_0, Are_fx[0][1] ) ); move32(); move32(); tmp2 = matrixMul_func1( Aim_fx_1_0_LT_0 || Are_fx_1_1_LT_0, Aim_fx_1_0_GE_0 || Are_fx_1_1_GE_0, Mpy_32_32( Aim_fx[1][0], Are_fx[1][1] ) ); move32(); move32(); tmp3 = L_sub( tmp1, tmp2 ); tmp1 = matrixMul_func1( Are_fx_0_0_LT_0 || Aim_fx_0_1_LT_0, Are_fx_0_0_GE_0 || Aim_fx_0_1_GE_0, Mpy_32_32( Are_fx[0][0], Aim_fx[0][1] ) ); move32(); move32(); tmp2 = matrixMul_func1( Are_fx_1_0_LT_0 || Aim_fx_1_1_LT_0, Are_fx_1_0_GE_0 || Aim_fx_1_1_GE_0, Mpy_32_32( Are_fx[1][0], Aim_fx[1][1] ) ); move32(); move32(); outIm_fx[0][1] = L_add( tmp3, L_add( tmp1, tmp2 ) ); move32(); /*CHA=1, CHB=0*/ tmp1 = matrixMul_func1( Are_fx_0_1_LT_0 || Are_fx_0_0_LT_0, Are_fx_0_1_GE_0 || Are_fx_0_0_GE_0, Mpy_32_32( Are_fx[0][1], Are_fx[0][0] ) ); move32(); move32(); tmp2 = matrixMul_func1( Are_fx_1_1_LT_0 || Are_fx_1_0_LT_0, Are_fx_1_1_GE_0 || Are_fx_1_0_GE_0, Mpy_32_32( Are_fx[1][1], Are_fx[1][0] ) ); move32(); move32(); tmp3 = L_add( tmp1, tmp2 ); tmp1 = matrixMul_func1( Aim_fx_0_1_GE_0 || Aim_fx_0_0_LT_0, Aim_fx_0_1_LT_0 || Aim_fx_0_0_GE_0, Mpy_32_32( LNeg_Aim_fx_0_1, Aim_fx[0][0] ) ); move32(); move32(); tmp2 = matrixMul_func1( Aim_fx_1_1_LT_0 || Aim_fx_1_0_LT_0, Aim_fx_1_1_GE_0 || Aim_fx_1_0_GE_0, Mpy_32_32( Aim_fx[1][1], Aim_fx[1][0] ) ); move32(); move32(); outRe_fx[1][0] = L_sub( tmp3, L_sub( tmp1, tmp2 ) ); move32(); tmp1 = matrixMul_func1( Aim_fx_0_1_GE_0 || Are_fx_0_0_LT_0, Aim_fx_0_1_LT_0 || Are_fx_0_0_GE_0, Mpy_32_32( LNeg_Aim_fx_0_1, Are_fx[0][0] ) ); move32(); move32(); tmp2 = matrixMul_func1( Aim_fx_1_1_LT_0 || Are_fx_1_0_LT_0, Aim_fx_1_1_GE_0 || Are_fx_1_0_GE_0, Mpy_32_32( Aim_fx[1][1], Are_fx[1][0] ) ); move32(); move32(); tmp3 = L_sub( tmp1, tmp2 ); tmp1 = matrixMul_func1( Are_fx_0_1_LT_0 || Aim_fx_0_0_LT_0, Are_fx_0_1_GE_0 || Aim_fx_0_0_GE_0, Mpy_32_32( Are_fx[0][1], Aim_fx[0][0] ) ); move32(); move32(); tmp2 = matrixMul_func1( Are_fx_1_1_LT_0 || Aim_fx_1_0_LT_0, Are_fx_1_1_GE_0 || Aim_fx_1_0_GE_0, Mpy_32_32( Are_fx[1][1], Aim_fx[1][0] ) ); move32(); move32(); outIm_fx[1][0] = L_add( tmp3, L_add( tmp1, tmp2 ) ); move32(); /*CHA=1, CHB=1*/ /* Create testVariables*/ tmp1 = Mpy_32_32( Are_fx[0][1], Are_fx[0][1] ); tmp2 = Mpy_32_32( Are_fx[1][1], Are_fx[1][1] ); tmp3 = L_add( tmp1, tmp2 ); tmp1 = Mpy_32_32( LNeg_Aim_fx_0_1, Aim_fx[0][1] ); tmp2 = Mpy_32_32( Aim_fx[1][1], Aim_fx[1][1] ); outRe_fx[1][1] = L_sub( tmp3, L_sub( tmp1, tmp2 ) ); move32(); tmp1 = matrixMul_func1( Aim_fx_0_1_GE_0 || Are_fx_0_1_LT_0, Aim_fx_0_1_LT_0 || Are_fx_0_1_GE_0, Mpy_32_32( LNeg_Aim_fx_0_1, Are_fx[0][1] ) ); move32(); move32(); tmp2 = matrixMul_func1( Aim_fx_1_1_LT_0 || Are_fx_1_1_LT_0, Aim_fx_1_1_GE_0 || Are_fx_1_1_GE_0, Mpy_32_32( Aim_fx[1][1], Are_fx[1][1] ) ); move32(); move32(); tmp3 = L_sub( tmp1, tmp2 ); tmp1 = matrixMul_func1( Are_fx_0_1_LT_0 || Aim_fx_0_1_LT_0, Are_fx_0_1_GE_0 || Aim_fx_0_1_GE_0, Mpy_32_32( Are_fx[0][1], Aim_fx[0][1] ) ); move32(); move32(); tmp2 = matrixMul_func1( Are_fx_1_1_LT_0 || Aim_fx_1_1_LT_0, Are_fx_1_1_GE_0 || Aim_fx_1_1_GE_0, Mpy_32_32( Are_fx[1][1], Aim_fx[1][1] ) ); move32(); move32(); outIm_fx[1][1] = L_add( tmp3, L_add( tmp1, tmp2 ) ); move32(); } *q_out = sub( add( q_A, q_A ), 31 ); move16(); if ( L_and( is_zero_arr( outRe_fx[0], size ), is_zero_arr( outIm_fx[0], size ) ) ) { *q_out = Q31; move16(); } return; } #endif /*FIX_1072_SPEEDUP_matrixTransp1Mul_fx_IN1EQIN2_BINCH2*/ static void matrixTransp2Mul_fx( Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ Loading Loading @@ -4664,6 +4842,7 @@ static void formulate2x2MixingMatrix_fx( pop_wmops(); //( "IDDB_2x2Matrix_Part1" ); push_wmops( "IDDB_2x2Matrix_Part2" ); push_wmops( "IDDB_2x2Matrix_Part2.1" ); /* Cholesky decomposition of target / output covariance matrix */ chol2x2_fx( E_out1, E_out2, q_eout, Cout_re, Cout_im, q_cout, KyRe_fx, KyIm_fx, &q_ky ); Loading Loading @@ -4695,6 +4874,8 @@ static void formulate2x2MixingMatrix_fx( temp = Mpy_32_32( E_in2, 2147484 ); // 2147484 = 0.001f in Q31 temp = L_max( temp, E_in1 ); pop_wmops(); //push_wmops( "IDDB_2x2Matrix_Part2.1" ); push_wmops( "IDDB_2x2Matrix_Part2.2" ); #if defined( FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT ) && defined( FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT_2 ) /*IF (E_out1 == 0)*/ { Loading Loading @@ -4818,6 +4999,8 @@ static void formulate2x2MixingMatrix_fx( move32(); Ghat_fx[1] = L_shr( Ghat_fx[1], sub( sub( 31, exp1 ), q_Ghat ) ); // q_Ghat move32(); pop_wmops(); //push_wmops( "IDDB_2x2Matrix_Part2.2" ); push_wmops( "IDDB_2x2Matrix_Part2.3" ); /* Matrix multiplication, tmp = Ky' * G_hat * Q */ FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) Loading Loading @@ -4848,12 +5031,19 @@ static void formulate2x2MixingMatrix_fx( q_temp = sub( add( q_ky, q_GhatQ ), 31 ); pop_wmops(); //push_wmops( "IDDB_2x2Matrix_Part2.3" ); push_wmops( "IDDB_2x2Matrix_Part2.4" ); /* A = Ky' * G_hat * Q * Kx (see publication) */ matrixMul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Kxre_fx, Kxim_fx, &q_Kx, Are_fx, Aim_fx, &q_A ); /* Find nearest orthonormal matrix P to A = Ky' * G_hat * Q * Kx For matrix A that is P = A(A'A)^0.5 */ #ifdef FIX_1072_SPEEDUP_matrixTransp1Mul_fx_IN1EQIN2_BINCH2 matrixTransp1Mul_fx_in1isin2( Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp ); #else matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp ); #endif eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D ); Loading Loading @@ -4914,7 +5104,8 @@ static void formulate2x2MixingMatrix_fx( div_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 move32(); #endif /*FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT*/ pop_wmops(); //push_wmops( "IDDB_2x2Matrix_Part2.4" ); push_wmops( "IDDB_2x2Matrix_Part2.5" ); q_div = sub( 31, s_max( exp, exp1 ) ); div_fx[0] = L_shr( div_fx[0], sub( sub( 31, exp ), q_div ) ); // q_div Loading Loading @@ -4946,6 +5137,8 @@ static void formulate2x2MixingMatrix_fx( scale_sig32( div_fx, BINAURAL_CHANNELS, exp ); q_div = add( q_div, exp ); pop_wmops(); //push_wmops( "IDDB_2x2Matrix_Part2.5" ); push_wmops( "IDDB_2x2Matrix_Part2.6" ); FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) Loading Loading @@ -5015,7 +5208,8 @@ static void formulate2x2MixingMatrix_fx( } } } pop_wmops(); //push_wmops( "IDDB_2x2Matrix_Part2.6" ); push_wmops( "IDDB_2x2Matrix_Part2.7" ); minimum_s( hdrm_re[0], i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ), &exp ); q_temp = exp; move16(); Loading @@ -5039,7 +5233,7 @@ static void formulate2x2MixingMatrix_fx( 0 /*int Bscale*/, #endif Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */ pop_wmops(); //push_wmops( "IDDB_2x2Matrix_Part2.7" ); pop_wmops(); //( "IDDB_2x2Matrix_Part2" ); push_wmops( "IDDB_2x2Matrix_Part3" ); Loading Loading
lib_rend/ivas_dirac_dec_binaural_functions.c +209 −15 Original line number Diff line number Diff line Loading @@ -50,16 +50,20 @@ //#define FIX1072_SPEEDUP_formulate2x2MixingMatrix_fx_simple //#define FIX1072_SPEEDUP_formulate2x2MixingMatrix_fx_reduceDivs // orange tests //#define FIX1072_SPEEDUP_chol2x2_fx #define FIX_1072_SPEEDUP_matrixMul_fx #define FIX_1072_SPEEDUP_matrixTransp1Mul_fx #define FIX_1072_SPEEDUP_matrixTransp2Mul_fx_scaleSwitch #define FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest #define FIX_1072_SPEEDUP_matrixFunctions_negateTuning #define FIX1072_SPEEDUP_chol2x2_fx // 6Mhz // SPEEDUP_matrix 012345: ACCEPT REGRESSIONS // NULL: 306.459 Mhz --> difference to SPEEDUP_matrix 012345 : 14Mhz //#define FIX_1072_SPEEDUP_matrixMul_fx //SPEEDUP_matrix_0 //#define FIX_1072_SPEEDUP_matrixTransp1Mul_fx //SPEEDUP_matrix_1 //#define FIX_1072_SPEEDUP_matrixTransp2Mul_fx_scaleSwitch //SPEEDUP_matrix_2 //#define FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest //SPEEDUP_matrix_3 //#define FIX_1072_SPEEDUP_matrixFunctions_negateTuning //SPEEDUP_matrix_4 #if BINAURAL_CHANNELS==2 //#define FIX_1072_SPEEDUP_matrixTransp1Mul_fx_IN1EQIN2_BINCH2 //SPEEDUP_matrix_5 //293.773 , 292.468 #endif // issue 1072 //#define FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT #ifdef FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT // numbers fa2a72: // default : 266.984 // no-opt : 290.663 --> 23.5 Mhz Gain Loading @@ -68,8 +72,6 @@ // 1 : 282.651 --> 8 Mhz gain // 0 : 282.704 --> 8 Mhz gain //#define FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT #ifdef FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT #include <stdio.h> //#define FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT_0 //8Mhz with 1072 issue stream Loading Loading @@ -3575,6 +3577,7 @@ static inline Word32 matrixMul_func1( Word32 cond1, Word32 cond2, Word32 prod ) tmp = L_negate( Mpy_32_32( L_abs( Are_fx[chA][0] ), L_abs( Bre_fx[0][chB] ) ) ); */ } #endif Loading Loading @@ -3962,6 +3965,181 @@ static void matrixTransp1Mul_fx( return; } #ifdef FIX_1072_SPEEDUP_matrixTransp1Mul_fx_IN1EQIN2_BINCH2 static void matrixTransp1Mul_fx_in1isin2( Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ Word16 q_A, Word32 outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/ Word32 outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/ Word16 *q_out ) { Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ); Word32 tmp1, tmp2; /* Create testVariables*/ Word32 Are_fx_0_0_GE_0 = GE_32( Are_fx[0][0], 0 ); Word32 Are_fx_0_0_LT_0 = LT_32( Are_fx[0][0], 0 ); Word32 Are_fx_1_0_GE_0 = GE_32( Are_fx[1][0], 0 ); Word32 Are_fx_1_0_LT_0 = LT_32( Are_fx[1][0], 0 ); Word32 Aim_fx_0_0_GE_0 = GE_32( Aim_fx[0][0], 0 ); Word32 Aim_fx_0_0_LT_0 = LT_32( Aim_fx[0][0], 0 ); Word32 Aim_fx_1_0_GE_0 = GE_32( Aim_fx[1][0], 0 ); Word32 Aim_fx_1_0_LT_0 = LT_32( Aim_fx[1][0], 0 ); Word32 LNeg_Aim_fx_0_0 = L_negate( Aim_fx[0][0] ); Word32 Are_fx_0_1_GE_0 = GE_32( Are_fx[0][1], 0 ); Word32 Are_fx_0_1_LT_0 = LT_32( Are_fx[0][1], 0 ); Word32 Are_fx_1_1_GE_0 = GE_32( Are_fx[1][1], 0 ); Word32 Are_fx_1_1_LT_0 = LT_32( Are_fx[1][1], 0 ); Word32 Aim_fx_0_1_GE_0 = GE_32( Aim_fx[0][1], 0 ); Word32 Aim_fx_0_1_LT_0 = LT_32( Aim_fx[0][1], 0 ); Word32 Aim_fx_1_1_GE_0 = GE_32( Aim_fx[1][1], 0 ); Word32 Aim_fx_1_1_LT_0 = LT_32( Aim_fx[1][1], 0 ); Word32 LNeg_Aim_fx_0_1 = L_negate( Aim_fx[0][1] ); Word32 tmp3; { /*UNROLL FOR BINAURAL CHANNELS==2*/ /*CHA=0, CHB=0*/ /* Create testVariables*/ tmp1 = Mpy_32_32( Are_fx[0][0], Are_fx[0][0] ); tmp2 = Mpy_32_32( Are_fx[1][0], Are_fx[1][0] ); tmp3 = L_add( tmp1, tmp2 ); tmp1 = Mpy_32_32( LNeg_Aim_fx_0_0, Aim_fx[0][0] ); tmp2 = Mpy_32_32( Aim_fx[1][0], Aim_fx[1][0] ); outRe_fx[0][0] = L_sub( tmp3, L_sub( tmp1, tmp2 ) ); move32(); tmp1 = matrixMul_func1( Aim_fx_0_0_GE_0 || Are_fx_0_0_LT_0, Aim_fx_0_0_LT_0 || Are_fx_0_0_GE_0, Mpy_32_32( LNeg_Aim_fx_0_0, Are_fx[0][0] ) ); move32(); move32(); tmp2 = matrixMul_func1( Aim_fx_1_0_LT_0 || Are_fx_1_0_LT_0, Aim_fx_1_0_GE_0 || Are_fx_1_0_GE_0, Mpy_32_32( Aim_fx[1][0], Are_fx[1][0] ) ); move32(); move32(); tmp3 = L_sub( tmp1, tmp2 ); tmp1 = matrixMul_func1( Are_fx_0_0_LT_0 || Aim_fx_0_0_LT_0, Are_fx_0_0_GE_0 || Aim_fx_0_0_GE_0, Mpy_32_32( Are_fx[0][0], Aim_fx[0][0] ) ); move32(); move32(); tmp2 = matrixMul_func1( Are_fx_1_0_LT_0 || Aim_fx_1_0_LT_0, Are_fx_1_0_GE_0 || Aim_fx_1_0_GE_0, Mpy_32_32( Are_fx[1][0], Aim_fx[1][0] ) ); move32(); move32(); outIm_fx[0][0] = L_add( tmp3, L_add( tmp1, tmp2 ) ); move32(); /*CHA=0, CHB=1*/ tmp1 = matrixMul_func1( Are_fx_0_0_LT_0 || Are_fx_0_1_LT_0, Are_fx_0_0_GE_0 || Are_fx_0_1_GE_0, Mpy_32_32( Are_fx[0][0], Are_fx[0][1] ) ); move32(); move32(); tmp2 = matrixMul_func1( Are_fx_1_0_LT_0 || Are_fx_1_1_LT_0, Are_fx_1_0_GE_0 || Are_fx_1_1_GE_0, Mpy_32_32( Are_fx[1][0], Are_fx[1][1] ) ); move32(); move32(); tmp3 = L_add( tmp1, tmp2 ); tmp1 = matrixMul_func1( Aim_fx_0_0_GE_0 || Aim_fx_0_1_LT_0, Aim_fx_0_0_LT_0 || Aim_fx_0_1_GE_0, Mpy_32_32( LNeg_Aim_fx_0_0, Aim_fx[0][1] ) ); move32(); move32(); tmp2 = matrixMul_func1( Aim_fx_1_0_LT_0 || Aim_fx_1_1_LT_0, Aim_fx_1_0_GE_0 || Aim_fx_1_1_GE_0, Mpy_32_32( Aim_fx[1][0], Aim_fx[1][1] ) ); move32(); move32(); outRe_fx[0][1] = L_sub( tmp3, L_sub( tmp1, tmp2 ) ); move32(); tmp1 = matrixMul_func1( Aim_fx_0_0_GE_0 || Are_fx_0_1_LT_0, Aim_fx_0_0_LT_0 || Are_fx_0_1_GE_0, Mpy_32_32( LNeg_Aim_fx_0_0, Are_fx[0][1] ) ); move32(); move32(); tmp2 = matrixMul_func1( Aim_fx_1_0_LT_0 || Are_fx_1_1_LT_0, Aim_fx_1_0_GE_0 || Are_fx_1_1_GE_0, Mpy_32_32( Aim_fx[1][0], Are_fx[1][1] ) ); move32(); move32(); tmp3 = L_sub( tmp1, tmp2 ); tmp1 = matrixMul_func1( Are_fx_0_0_LT_0 || Aim_fx_0_1_LT_0, Are_fx_0_0_GE_0 || Aim_fx_0_1_GE_0, Mpy_32_32( Are_fx[0][0], Aim_fx[0][1] ) ); move32(); move32(); tmp2 = matrixMul_func1( Are_fx_1_0_LT_0 || Aim_fx_1_1_LT_0, Are_fx_1_0_GE_0 || Aim_fx_1_1_GE_0, Mpy_32_32( Are_fx[1][0], Aim_fx[1][1] ) ); move32(); move32(); outIm_fx[0][1] = L_add( tmp3, L_add( tmp1, tmp2 ) ); move32(); /*CHA=1, CHB=0*/ tmp1 = matrixMul_func1( Are_fx_0_1_LT_0 || Are_fx_0_0_LT_0, Are_fx_0_1_GE_0 || Are_fx_0_0_GE_0, Mpy_32_32( Are_fx[0][1], Are_fx[0][0] ) ); move32(); move32(); tmp2 = matrixMul_func1( Are_fx_1_1_LT_0 || Are_fx_1_0_LT_0, Are_fx_1_1_GE_0 || Are_fx_1_0_GE_0, Mpy_32_32( Are_fx[1][1], Are_fx[1][0] ) ); move32(); move32(); tmp3 = L_add( tmp1, tmp2 ); tmp1 = matrixMul_func1( Aim_fx_0_1_GE_0 || Aim_fx_0_0_LT_0, Aim_fx_0_1_LT_0 || Aim_fx_0_0_GE_0, Mpy_32_32( LNeg_Aim_fx_0_1, Aim_fx[0][0] ) ); move32(); move32(); tmp2 = matrixMul_func1( Aim_fx_1_1_LT_0 || Aim_fx_1_0_LT_0, Aim_fx_1_1_GE_0 || Aim_fx_1_0_GE_0, Mpy_32_32( Aim_fx[1][1], Aim_fx[1][0] ) ); move32(); move32(); outRe_fx[1][0] = L_sub( tmp3, L_sub( tmp1, tmp2 ) ); move32(); tmp1 = matrixMul_func1( Aim_fx_0_1_GE_0 || Are_fx_0_0_LT_0, Aim_fx_0_1_LT_0 || Are_fx_0_0_GE_0, Mpy_32_32( LNeg_Aim_fx_0_1, Are_fx[0][0] ) ); move32(); move32(); tmp2 = matrixMul_func1( Aim_fx_1_1_LT_0 || Are_fx_1_0_LT_0, Aim_fx_1_1_GE_0 || Are_fx_1_0_GE_0, Mpy_32_32( Aim_fx[1][1], Are_fx[1][0] ) ); move32(); move32(); tmp3 = L_sub( tmp1, tmp2 ); tmp1 = matrixMul_func1( Are_fx_0_1_LT_0 || Aim_fx_0_0_LT_0, Are_fx_0_1_GE_0 || Aim_fx_0_0_GE_0, Mpy_32_32( Are_fx[0][1], Aim_fx[0][0] ) ); move32(); move32(); tmp2 = matrixMul_func1( Are_fx_1_1_LT_0 || Aim_fx_1_0_LT_0, Are_fx_1_1_GE_0 || Aim_fx_1_0_GE_0, Mpy_32_32( Are_fx[1][1], Aim_fx[1][0] ) ); move32(); move32(); outIm_fx[1][0] = L_add( tmp3, L_add( tmp1, tmp2 ) ); move32(); /*CHA=1, CHB=1*/ /* Create testVariables*/ tmp1 = Mpy_32_32( Are_fx[0][1], Are_fx[0][1] ); tmp2 = Mpy_32_32( Are_fx[1][1], Are_fx[1][1] ); tmp3 = L_add( tmp1, tmp2 ); tmp1 = Mpy_32_32( LNeg_Aim_fx_0_1, Aim_fx[0][1] ); tmp2 = Mpy_32_32( Aim_fx[1][1], Aim_fx[1][1] ); outRe_fx[1][1] = L_sub( tmp3, L_sub( tmp1, tmp2 ) ); move32(); tmp1 = matrixMul_func1( Aim_fx_0_1_GE_0 || Are_fx_0_1_LT_0, Aim_fx_0_1_LT_0 || Are_fx_0_1_GE_0, Mpy_32_32( LNeg_Aim_fx_0_1, Are_fx[0][1] ) ); move32(); move32(); tmp2 = matrixMul_func1( Aim_fx_1_1_LT_0 || Are_fx_1_1_LT_0, Aim_fx_1_1_GE_0 || Are_fx_1_1_GE_0, Mpy_32_32( Aim_fx[1][1], Are_fx[1][1] ) ); move32(); move32(); tmp3 = L_sub( tmp1, tmp2 ); tmp1 = matrixMul_func1( Are_fx_0_1_LT_0 || Aim_fx_0_1_LT_0, Are_fx_0_1_GE_0 || Aim_fx_0_1_GE_0, Mpy_32_32( Are_fx[0][1], Aim_fx[0][1] ) ); move32(); move32(); tmp2 = matrixMul_func1( Are_fx_1_1_LT_0 || Aim_fx_1_1_LT_0, Are_fx_1_1_GE_0 || Aim_fx_1_1_GE_0, Mpy_32_32( Are_fx[1][1], Aim_fx[1][1] ) ); move32(); move32(); outIm_fx[1][1] = L_add( tmp3, L_add( tmp1, tmp2 ) ); move32(); } *q_out = sub( add( q_A, q_A ), 31 ); move16(); if ( L_and( is_zero_arr( outRe_fx[0], size ), is_zero_arr( outIm_fx[0], size ) ) ) { *q_out = Q31; move16(); } return; } #endif /*FIX_1072_SPEEDUP_matrixTransp1Mul_fx_IN1EQIN2_BINCH2*/ static void matrixTransp2Mul_fx( Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ Loading Loading @@ -4664,6 +4842,7 @@ static void formulate2x2MixingMatrix_fx( pop_wmops(); //( "IDDB_2x2Matrix_Part1" ); push_wmops( "IDDB_2x2Matrix_Part2" ); push_wmops( "IDDB_2x2Matrix_Part2.1" ); /* Cholesky decomposition of target / output covariance matrix */ chol2x2_fx( E_out1, E_out2, q_eout, Cout_re, Cout_im, q_cout, KyRe_fx, KyIm_fx, &q_ky ); Loading Loading @@ -4695,6 +4874,8 @@ static void formulate2x2MixingMatrix_fx( temp = Mpy_32_32( E_in2, 2147484 ); // 2147484 = 0.001f in Q31 temp = L_max( temp, E_in1 ); pop_wmops(); //push_wmops( "IDDB_2x2Matrix_Part2.1" ); push_wmops( "IDDB_2x2Matrix_Part2.2" ); #if defined( FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT ) && defined( FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT_2 ) /*IF (E_out1 == 0)*/ { Loading Loading @@ -4818,6 +4999,8 @@ static void formulate2x2MixingMatrix_fx( move32(); Ghat_fx[1] = L_shr( Ghat_fx[1], sub( sub( 31, exp1 ), q_Ghat ) ); // q_Ghat move32(); pop_wmops(); //push_wmops( "IDDB_2x2Matrix_Part2.2" ); push_wmops( "IDDB_2x2Matrix_Part2.3" ); /* Matrix multiplication, tmp = Ky' * G_hat * Q */ FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) Loading Loading @@ -4848,12 +5031,19 @@ static void formulate2x2MixingMatrix_fx( q_temp = sub( add( q_ky, q_GhatQ ), 31 ); pop_wmops(); //push_wmops( "IDDB_2x2Matrix_Part2.3" ); push_wmops( "IDDB_2x2Matrix_Part2.4" ); /* A = Ky' * G_hat * Q * Kx (see publication) */ matrixMul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Kxre_fx, Kxim_fx, &q_Kx, Are_fx, Aim_fx, &q_A ); /* Find nearest orthonormal matrix P to A = Ky' * G_hat * Q * Kx For matrix A that is P = A(A'A)^0.5 */ #ifdef FIX_1072_SPEEDUP_matrixTransp1Mul_fx_IN1EQIN2_BINCH2 matrixTransp1Mul_fx_in1isin2( Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp ); #else matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp ); #endif eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D ); Loading Loading @@ -4914,7 +5104,8 @@ static void formulate2x2MixingMatrix_fx( div_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1 move32(); #endif /*FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT*/ pop_wmops(); //push_wmops( "IDDB_2x2Matrix_Part2.4" ); push_wmops( "IDDB_2x2Matrix_Part2.5" ); q_div = sub( 31, s_max( exp, exp1 ) ); div_fx[0] = L_shr( div_fx[0], sub( sub( 31, exp ), q_div ) ); // q_div Loading Loading @@ -4946,6 +5137,8 @@ static void formulate2x2MixingMatrix_fx( scale_sig32( div_fx, BINAURAL_CHANNELS, exp ); q_div = add( q_div, exp ); pop_wmops(); //push_wmops( "IDDB_2x2Matrix_Part2.5" ); push_wmops( "IDDB_2x2Matrix_Part2.6" ); FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) Loading Loading @@ -5015,7 +5208,8 @@ static void formulate2x2MixingMatrix_fx( } } } pop_wmops(); //push_wmops( "IDDB_2x2Matrix_Part2.6" ); push_wmops( "IDDB_2x2Matrix_Part2.7" ); minimum_s( hdrm_re[0], i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ), &exp ); q_temp = exp; move16(); Loading @@ -5039,7 +5233,7 @@ static void formulate2x2MixingMatrix_fx( 0 /*int Bscale*/, #endif Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */ pop_wmops(); //push_wmops( "IDDB_2x2Matrix_Part2.7" ); pop_wmops(); //( "IDDB_2x2Matrix_Part2" ); push_wmops( "IDDB_2x2Matrix_Part3" ); Loading