Loading lib_rend/ivas_dirac_dec_binaural_functions_fx.c +433 −179 Original line number Diff line number Diff line Loading @@ -3537,13 +3537,366 @@ static void check( Word32 cf = computed_fx >> +max( qd, 0 ); Word32 ef = expected_fx >> -min( qd, 0 ); Word32 abs_error = abs( cf - ef ); if ( abs_error >= max_abs_err ) if ( abs_error > max_abs_err ) { assert( false ); } } #endif Word32 __pm_fx; Word16 __pm_q; Word32 __add_fx; Word16 __add_q; Word32 __as[BINAURAL_CHANNELS]; Word32 __s_fx[BINAURAL_CHANNELS]; Word16 __s_q[BINAURAL_CHANNELS]; Word32 __nval_fx[BINAURAL_CHANNELS]; Word16 __nval_q[BINAURAL_CHANNELS]; Word32 __Ure_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; Word32 __Uim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; Word16 __U_q; Word32 __D_fx[BINAURAL_CHANNELS]; Word16 __D_q; Word32 __diff_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; Word16 __diff_q; static void eig2x2_opt( const Word32 E1_fx, /*q_E*/ const Word32 E2_fx, /*q_E*/ Word16 q_E, const Word32 Cre_fx, /*q_C*/ const Word32 Cim_fx, /*q_C*/ Word16 q_C ) { Word32 s_fx[BINAURAL_CHANNELS]; Word32 pm_fx, add_fx; Word32 tmp1, tmp2, e1, e2, c_re, c_im, c0_im, c1_im; Word64 crossSquare_fx, tmp3, tmp4; Word16 q_crossSquare, q_min, q_diff, q_tmp1, q_tmp2, exp, q_e, q_c; Word32 i01, i00, i11, i10; Word64 eps_fx = ( (Word64) EPSILON_MANT ) << 32; Word16 eps_q = 63 - EPSILON_EXP; move32(); move16(); set32_fx( (Word32 *) __Ure_fx, 0, BINAURAL_CHANNELS * BINAURAL_CHANNELS ); set32_fx( (Word32 *) __Uim_fx, 0, BINAURAL_CHANNELS * BINAURAL_CHANNELS ); __Ure_fx[0][0] = ONE_IN_Q30; move32(); __Ure_fx[1][1] = ONE_IN_Q30; move32(); __U_q = Q30; move16(); exp = sub( get_min_scalefactor( Cre_fx, Cim_fx ), 2 ); c_re = L_shl( Cre_fx, exp ); c_im = L_shl( Cim_fx, exp ); q_c = add( q_C, exp ); exp = sub( get_min_scalefactor( E1_fx, E2_fx ), 2 ); e1 = L_shl( E1_fx, exp ); e2 = L_shl( E2_fx, exp ); q_e = add( q_E, exp ); // crossSquare_fx = (c_re * c_re) + (c_im * c_im) // a_fx = (e1 + e2) * (e1 + e2) - 4.0f * ((e1 * e2) - crossSquare_fx) = (e1 - e2)^2 + 4 * crossSquare_fx // pm_fx = 0.5f * sqrtf(max(0.0f, a_fx)) // add_fx = 0.5f * (e1 + e2) tmp1 = L_sub( e1, e2 ); tmp3 = W_mult_32_32( tmp1, tmp1 ); q_tmp1 = add( add( q_e, q_e ), 1 ); if ( !tmp3 ) { q_tmp1 = 63; move16(); } crossSquare_fx = W_mac_32_32( W_mult_32_32( c_re, c_re ), c_im, c_im ); q_crossSquare = add( add( q_c, q_c ), 1 ); if ( !crossSquare_fx ) { q_crossSquare = 63; move16(); } tmp4 = crossSquare_fx; move64(); q_tmp2 = sub( q_crossSquare, 2 ); if ( !tmp4 ) { q_tmp2 = 63; move16(); } q_diff = sub( q_tmp1, q_tmp2 ); q_tmp1 = s_min( q_tmp1, q_tmp2 ); if ( q_diff > 0 ) { tmp3 = W_shr( tmp3, q_diff ); } if ( q_diff < 0 ) { tmp4 = W_shl( tmp4, q_diff ); } tmp3 = W_add( tmp3, tmp4 ); q_diff = W_norm( tmp3 ); tmp3 = W_shl( tmp3, q_diff ); q_tmp1 = add( q_tmp1, q_diff ); // pm_fx = 0.5f * sqrtf(max(0.0f, a_fx)) exp = sub( 63, q_tmp1 ); pm_fx = Sqrt32( L_max( 0, W_extract_h( tmp3 ) ), &exp ); pm_fx = L_shr( pm_fx, 1 ); q_tmp2 = sub( 31, exp ); // add_fx = 0.5 * (e1 + e2) add_fx = L_shr( L_add( e1, e2 ), 1 ); q_tmp1 = q_e; move16(); __pm_fx = pm_fx; // FIXME __pm_q = q_tmp2; // FIXME __add_fx = add_fx; // FIXME __add_q = q_tmp1; // FIXME // D[0] = add + pm; // D[1] = max( 0.0f, add - pm ); q_diff = sub( q_tmp1, q_tmp2 ); tmp1 = add_fx; move32(); if ( q_diff > 0 ) { tmp1 = L_shr( tmp1, q_diff ); } tmp2 = pm_fx; move32(); if ( q_diff < 0 ) { tmp2 = L_shl( tmp2, q_diff ); } __D_fx[0] = L_add( tmp1, tmp2 ); move32(); __D_fx[1] = L_max( L_sub( tmp1, tmp2 ), 0 ); move32(); __D_q = s_min( q_tmp1, q_tmp2 ); move32(); // Numeric case, when input is practically zeros // if ( __D_fx[0] < EPSILON_FX ) if ( LT_32( L_shl_sat( __D_fx[0], sub( 31 - EPSILON_EXP, __D_q ) ), EPSILON_MANT ) ) { return; } // Numeric case, when input is near an identity matrix with a gain tmp1 = Mpy_32_32( INV_1000_Q31, add_fx ); if ( q_diff > 0 ) { tmp1 = L_shr( tmp1, q_diff ); } if ( LT_32( tmp2, tmp1 ) ) { return; } // Eigenvectors q_diff = sub( q_e, __D_q ); q_tmp1 = s_min( q_e, __D_q ); tmp1 = __D_fx[0]; move32(); if ( q_diff > 0 ) { tmp1 = L_shr( tmp1, q_diff ); } tmp2 = __D_fx[1]; move32(); if ( q_diff > 0 ) { tmp2 = L_shr( tmp2, q_diff ); } if ( q_diff < 0 ) { e1 = L_shl( e1, q_diff ); } if ( q_diff < 0 ) { e2 = L_shl( e2, q_diff ); } s_fx[0] = L_sub( tmp1, e1 ); // __D_fx[0] - e1 tmp1 = L_sub( tmp1, e2 ); // __D_fx[0] - e2 s_fx[1] = L_sub( tmp2, e1 ); // __D_fx[1] - e1 tmp2 = L_sub( tmp2, e2 ); // __D_fx[1] - e2 __diff_fx[0][0] = s_fx[0]; __diff_fx[0][1] = tmp1; __diff_fx[1][0] = s_fx[1]; __diff_fx[1][1] = tmp2; __diff_q = q_tmp1; i01 = GT_32( L_abs( tmp1 ), L_abs( s_fx[0] ) ); // fabsf( __D_fx[0] - e2 ) > fabsf( __D_fx[0] - e1 ) i11 = GT_32( L_abs( tmp2 ), L_abs( s_fx[1] ) ); // fabsf( __D_fx[1] - e2 ) > fabsf( __D_fx[1] - e1 ) __as[0] = i01; __as[1] = i11; if ( i01 ) { s_fx[0] = tmp1; move32(); } if ( i11 ) { s_fx[1] = tmp2; move32(); } __s_fx[0] = s_fx[0]; __s_fx[1] = s_fx[1]; __s_q[0] = q_tmp1; __s_q[1] = q_tmp1; // normVal = sqrtf( 1.0f / ( 1e-12f + crossSquare + s * s ) ); Word32 nvalm[BINAURAL_CHANNELS]; Word16 nvalq[BINAURAL_CHANNELS]; q_tmp2 = L_shl( q_tmp1, 1 ); q_min = s_min( q_tmp2, q_crossSquare ); q_min = s_min( q_min, eps_q ); Word64 nval64m[BINAURAL_CHANNELS]; q_diff = L_sub( q_tmp2, q_min ); nval64m[0] = W_shr( W_mult0_32_32( s_fx[0], s_fx[0] ), q_diff ); nval64m[1] = W_shr( W_mult0_32_32( s_fx[1], s_fx[1] ), q_diff ); q_diff = L_sub( q_crossSquare, q_min ); crossSquare_fx = W_shr( crossSquare_fx, q_diff ); nval64m[0] = W_add( nval64m[0], crossSquare_fx ); nval64m[1] = W_add( nval64m[1], crossSquare_fx ); q_diff = L_sub( eps_q, q_min ); eps_fx = W_shr( eps_fx, q_diff ); nval64m[0] = W_add( nval64m[0], eps_fx ); nval64m[1] = W_add( nval64m[1], eps_fx ); q_diff = W_norm( nval64m[0] ); nval64m[0] = W_shl( nval64m[0], q_diff ); nvalq[0] = add( q_min, q_diff ); q_diff = W_norm( nval64m[1] ); nval64m[1] = W_shl( nval64m[1], q_diff ); nvalq[1] = add( q_min, q_diff ); nvalm[0] = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( nval64m[0] ), &exp ); exp = sub( exp, sub( 62, nvalq[0] ) ); nvalm[0] = Sqrt32( nvalm[0], &exp ); nvalq[0] = sub( 31, exp ); nvalm[1] = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( nval64m[1] ), &exp ); exp = sub( exp, sub( 62, nvalq[1] ) ); nvalm[1] = Sqrt32( nvalm[1], &exp ); nvalq[1] = sub( 31, exp ); __nval_fx[0] = nvalm[0]; __nval_q[0] = nvalq[0]; __nval_fx[1] = nvalm[1]; __nval_q[1] = nvalq[1]; q_diff = sub( q_c, q_tmp1 ); q_tmp1 = s_min( q_tmp1, q_c ); if ( q_diff > 0 ) { c_re = L_shr( c_re, q_diff ); } if ( q_diff > 0 ) { c_im = L_shr( c_im, q_diff ); } if ( q_diff < 0 ) { s_fx[0] = L_shl( s_fx[0], q_diff ); } if ( q_diff < 0 ) { s_fx[1] = L_shl( s_fx[1], q_diff ); } q_diff = sub( nvalq[0], nvalq[1] ); q_tmp2 = s_min( nvalq[0], nvalq[1] ); if ( q_diff > 0 ) { nvalm[0] = L_shr( nvalm[0], q_diff ); } if ( q_diff < 0 ) { nvalm[1] = L_shl( nvalm[1], q_diff ); } __U_q = sub( add( q_tmp1, q_tmp2 ), 31 ); i00 = L_sub( 1, i01 ); i10 = L_sub( 1, i11 ); c0_im = c_im; move32(); if ( i00 > 0 ) { c0_im = L_negate( c0_im ); } c1_im = c_im; move32(); if ( i10 > 0 ) { c1_im = L_negate( c1_im ); } __Ure_fx[i00][0] = Mpy_32_32( s_fx[0], nvalm[0] ); move32(); __Ure_fx[i01][0] = Mpy_32_32( c_re, nvalm[0] ); move32(); __Uim_fx[i01][0] = Mpy_32_32( c0_im, nvalm[0] ); move32(); __Ure_fx[i10][1] = Mpy_32_32( s_fx[1], nvalm[1] ); move32(); __Ure_fx[i11][1] = Mpy_32_32( c_re, nvalm[1] ); move32(); __Uim_fx[i11][1] = Mpy_32_32( c1_im, nvalm[1] ); move32(); return; } static void eig2x2_fx( const Word32 E1_fx, /*q_E*/ const Word32 E2_fx, /*q_E*/ Loading @@ -3557,7 +3910,23 @@ static void eig2x2_fx( Word32 D_fx[BINAURAL_CHANNELS], /*q_D*/ Word16 *q_D ) { // #if 0 #if 0 eig2x2_opt( E1_fx, E2_fx, q_E, Cre_fx, Cim_fx, q_C ); Ure_fx[0][0] = __Ure_fx[0][0]; Ure_fx[0][1] = __Ure_fx[0][1]; Ure_fx[1][0] = __Ure_fx[1][0]; Ure_fx[1][1] = __Ure_fx[1][1]; Uim_fx[0][0] = __Uim_fx[0][0]; Uim_fx[0][1] = __Uim_fx[0][1]; Uim_fx[1][0] = __Uim_fx[1][0]; Uim_fx[1][1] = __Uim_fx[1][1]; *q_U = __U_q; D_fx[0] = __D_fx[0]; D_fx[1] = __D_fx[1]; *q_D = __D_q; #else eig2x2_opt( E1_fx, E2_fx, q_E, Cre_fx, Cim_fx, q_C ); Word16 chA, chB, ch; Word32 s_fx, normVal_fx, crossSquare_fx, a_fx, pm_fx, add_fx; Word32 tmp1, tmp2, tmp3, e1, e2, c_re, c_im; Loading Loading @@ -3680,6 +4049,9 @@ static void eig2x2_fx( q_tmp1 = q_e; move16(); check( __pm_fx, __pm_q, pm_fx, q_tmp2, 1 << 26 ); check( __add_fx, __add_q, add_fx, q_tmp1, 1 << 26 ); IF( LT_16( q_tmp1, q_tmp2 ) ) { D_fx[0] = L_add( L_shr( add_fx, 1 ), L_shr( pm_fx, add( sub( q_tmp2, q_tmp1 ), 1 ) ) ); Loading @@ -3698,121 +4070,9 @@ static void eig2x2_fx( *q_D = sub( q_tmp2, 1 ); move16(); } // #else #if 0 Word16 chA, chB, ch; FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) { Ure_fx[chA][chB] = 0; move32(); Uim_fx[chA][chB] = 0; move32(); } } #endif // =================================================================================================== /*crossSquare_fx = (c_re * c_re) + (c_im * c_im) a_fx = (e1 + e2) * (e1 + e2) - 4.0f * ((e1 * e2) - crossSquare_fx) = (e1 - e2)^2 + 4 * crossSquare_fx pm_fx = 0.5f * sqrtf(max(0.0f, a_fx)) add_fx = 0.5f * (e1 + e2)*/ Word16 q1, q2, qm, qd, lshift; // (e1 - e2)^2 -> Q: 2 * q_E q1 = shl( q_E, 1 ); // 4 * ((c_re * c_re) + (c_im * c_im)) -> Q: 2 * q_C - 2 q2 = sub( shl( q_C, 1 ), 2 ); // (e1 - e2)^2 Word32 es = L_sub( E1_fx, E2_fx ); Word64 es2 = W_mult0_32_32( es, es ); lshift = sub( W_norm( es2 ), 1 ); es2 = W_shl( es2, lshift ); q1 = add( q1, lshift ); if ( !es2 ) { q1 = 63; move16(); } // 4 * ((c_re * c_re) + (c_im * c_im)) Word64 cs = W_add( W_mult0_32_32( Cre_fx, Cre_fx ), W_mult0_32_32( Cim_fx, Cim_fx ) ); // 2*q_C-2 lshift = sub( W_norm( cs ), 1 ); cs = W_shl( cs, lshift ); q2 = add( q2, lshift ); if ( !cs ) { q2 = 63; move16(); } #if 0 Word32 crossSquare_fx = (Word32) ( cs >> 32 ); // FIXME Word16 q_crossSquare = 2 * q_C + lshift - 32; // FIXME #endif Word16 csq = 2 * q_C + lshift; // FIXME // a = max(0, (e1 - e2)^2 + 4 * crossSquare_fx) qm = s_min( q1, q2 ); qd = sub( q1, q2 ); Word64 a = W_max( W_add( W_shr( es2, s_max( qd, 0 ) ), W_shl( cs, s_min( qd, 0 ) ) ), 0 ); // pm = 0.5f * sqrtf(a) // a = 0.5f * ( E1 + E2 ); lshift = W_norm( a ); Word32 pm = W_extract_h( W_shl( a, lshift ) ); Word16 e = sub( sub( 63, lshift ), qm ); pm = L_shr( Sqrt32( pm, &e ), 1 ); q2 = sub( 31, e ); check( pm, q2, pm_fx, q_tmp2, 1 << 16 ); Word32 ea = L_add( E1_fx, E2_fx ); lshift = sub( norm_l( ea ), 1 ); ea = L_shl( ea, lshift ); q1 = add( add( q_E, 1 ), lshift ); check( ea, q1, add_fx, q_tmp1, 1 << 16 ); #if 0 Word32 add_fx = ea; // FIXME Word16 q_tmp1 = q1; // FIXME Word32 pm_fx = pm; // FIXME Word16 q_tmp2 = q2; // FIXME #endif // D[0] = add + pm; // D[1] = max( 0.0f, add - pm ); qm = s_min( q1, q2 ); qd = sub( q1, q2 ); ea = L_shr( ea, s_max( qd, 0 ) ); pm = L_shl( pm, s_min( qd, 0 ) ); Word32 d0 = L_add( ea, pm ); Word32 d1 = L_max( L_sub( ea, pm ), 0 ); check( d0, qm, D_fx[0], *q_D, 1 << 16 ); check( d1, qm, D_fx[1], *q_D, 1 << 16 ); #if 0 D_fx[0] = d0; // FIXME D_fx[1] = d1; // FIXME *q_D = qm; // FIXME Word32 tmp1, tmp2, tmp3, e1, e2, s_fx, normVal_fx, c_re, c_im; // FIXME Word16 q_U_1, q_U_2, q_c, q_e, exp, exp_tmp3; // FIXME Word32 epsilon_mant = 1180591621; // FIXME Word16 epsilon_exp = -39; // FIXME exp = sub( get_min_scalefactor( Cre_fx, Cim_fx ), 2 ); // FIXME c_re = L_shl( Cre_fx, exp ); // FIXME c_im = L_shl( Cim_fx, exp ); // FIXME q_c = add( q_C, exp ); // FIXME exp = sub( get_min_scalefactor( E1_fx, E2_fx ), 2 ); // FIXME e1 = L_shl( E1_fx, exp ); // FIXME e2 = L_shl( E2_fx, exp ); // FIXME q_e = add( q_E, exp ); // FIXME #endif // #endif check( __D_fx[0], __D_q, D_fx[0], *q_D, 1 << 26 ); check( __D_fx[1], __D_q, D_fx[1], *q_D, 1 << 26 ); /* Numeric case, when input is practically zeros */ // IF( D_fx[0] < EPSILON_FX ) Loading @@ -3826,6 +4086,15 @@ static void eig2x2_fx( *q_U = Q31; move16(); for ( int i = 0; i < BINAURAL_CHANNELS; ++i ) { for ( int j = 0; j < BINAURAL_CHANNELS; ++j ) { check( __Ure_fx[i][j], __U_q, Ure_fx[i][j], *q_U, 1 << 2 ); check( __Uim_fx[i][j], __U_q, Uim_fx[i][j], *q_U, 1 << 2 ); } } return; } Loading @@ -3843,6 +4112,15 @@ static void eig2x2_fx( *q_U = Q30; move16(); for ( int i = 0; i < BINAURAL_CHANNELS; ++i ) { for ( int j = 0; j < BINAURAL_CHANNELS; ++j ) { check( __Ure_fx[i][j], __U_q, Ure_fx[i][j], *q_U, 1 << 2 ); check( __Uim_fx[i][j], __U_q, Uim_fx[i][j], *q_U, 1 << 2 ); } } return; } } Loading @@ -3857,6 +4135,15 @@ static void eig2x2_fx( *q_U = Q30; move16(); for ( int i = 0; i < BINAURAL_CHANNELS; ++i ) { for ( int j = 0; j < BINAURAL_CHANNELS; ++j ) { check( __Ure_fx[i][j], __U_q, Ure_fx[i][j], *q_U, 1 << 2 ); check( __Uim_fx[i][j], __U_q, Uim_fx[i][j], *q_U, 1 << 2 ); } } return; } } Loading Loading @@ -3885,68 +4172,18 @@ static void eig2x2_fx( move16(); } // D[ch] - E1 // D[ch] - E2 Word32 arg0, arg1, arg2; Word32 z1, z2, ad, s; qm = s_min( *q_D, q_e ); qd = sub( *q_D, q_e ); arg0 = L_shr( D_fx[ch], s_max( qd, 0 ) ); arg1 = L_shl( e1, s_min( qd, 0 ) ); arg2 = L_shl( e2, s_min( qd, 0 ) ); z1 = L_sub( arg0, arg1 ); z2 = L_sub( arg0, arg2 ); check( z1, qm, tmp1, q_tmp1, 1 << 1 ); check( z2, qm, tmp2, q_tmp1, 1 << 1 ); ad = L_sub( L_abs( z2 ), L_abs( z1 ) ); if ( ad > 0 ) { s = z2; } if ( ad <= 0 ) { s = z1; } Word64 epsm = ( (Word64) 1180591621 ) << 32; // FIXME Word16 epsq = 63 - epsilon_exp; // FIXME Word16 sq = qm; Word16 s2q = L_shl( sq, 1 ); qm = s_min( s2q, csq ); qm = s_min( qm, epsq ); Word64 nval64m; Word64 nval32m; Word16 nvalq, nvale; nval64m = W_shr( W_mult0_32_32( s, s ), L_sub( s2q, qm ) ); nval64m = W_add( nval64m, W_shr( cs, L_sub( csq, qm ) ) ); nval64m = W_add( nval64m, W_shr( epsm, L_sub( epsq, qm ) ) ); lshift = W_norm( nval64m ); nval64m = W_shl( nval64m, lshift ); nvalq = add( qm, lshift ); Word32 test0m = W_extract_h( nval64m ); // FIXME Word32 test0q = nvalq; // FIXME nval32m = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( nval64m ), &nvale ); nvale = sub( nvale, sub( Q30, nvalq - 32 ) ); nval32m = Sqrt32( nval32m, &nvale ); nvalq = sub( 31, nvale ); Word32 test1m = nval32m; // FIXME Word32 test1q = nvalq; // FIXME check( __diff_fx[ch][0], __diff_q, tmp1, q_tmp1, 1 << 26 ); check( __diff_fx[ch][1], __diff_q, tmp2, q_tmp1, 1 << 26 ); IF( GT_32( L_abs( tmp2 ), L_abs( tmp1 ) ) ) { check( __as[ch], 0, 1, 0, 0 ); s_fx = tmp2; move32(); check( __s_fx[ch], __s_q[ch], s_fx, q_tmp1, 1 << 26 ); exp = sub( norm_l( s_fx ), 1 ); tmp2 = Mpy_32_32( s_fx, s_fx ); q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 ); Loading @@ -3956,14 +4193,12 @@ static void eig2x2_fx( tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); check( test0m, test0q, tmp3, 31 - exp_tmp3, 1 << 18 ); // TODO: remove tmp2 = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, tmp3, &exp ); exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) ); normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2 q_tmp2 = sub( 31, exp ); check( test1m, test1q, normVal_fx, q_tmp2, 1 << 18 ); // TODO: remove check( __nval_fx[ch], __nval_q[ch], normVal_fx, q_tmp2, 1 << 26 ); q_diff = sub( q_c, q_tmp1 ); IF( q_diff > 0 ) Loading @@ -3988,6 +4223,10 @@ static void eig2x2_fx( move32(); q_U_1 = sub( add( q_tmp1, q_tmp2 ), 31 ); check( __Ure_fx[0][ch], __U_q, Ure_fx[0][ch], q_U_1, 1 << 26 ); check( __Ure_fx[1][ch], __U_q, Ure_fx[1][ch], q_U_1, 1 << 26 ); check( __Uim_fx[1][ch], __U_q, Uim_fx[1][ch], q_U_1, 1 << 26 ); IF( q_U_2 != 0 ) { q_diff = sub( q_U_2, q_U_1 ); Loading Loading @@ -4019,9 +4258,13 @@ static void eig2x2_fx( } ELSE { check( __as[ch], 0, 0, 0, 0 ); s_fx = tmp1; move32(); check( __s_fx[ch], __s_q[ch], s_fx, q_tmp1, 1 << 26 ); exp = sub( norm_l( s_fx ), 1 ); tmp2 = Mpy_32_32( s_fx, s_fx ); q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 ); Loading @@ -4031,14 +4274,12 @@ static void eig2x2_fx( tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); check( test0m, test0q, tmp3, 31 - exp_tmp3, 1 << 18 ); // TODO: remove tmp2 = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, tmp3, &exp ); exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) ); normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2 q_tmp2 = sub( 31, exp ); check( test1m, test1q, normVal_fx, q_tmp2, 1 << 18 ); // TODO: remove check( __nval_fx[ch], __nval_q[ch], normVal_fx, q_tmp2, 1 << 26 ); q_diff = sub( q_c, q_tmp1 ); IF( q_diff > 0 ) Loading @@ -4063,6 +4304,10 @@ static void eig2x2_fx( move32(); q_U_2 = sub( add( q_tmp1, q_tmp2 ), 31 ); check( __Ure_fx[1][ch], __U_q, Ure_fx[1][ch], q_U_2, 1 << 26 ); check( __Ure_fx[0][ch], __U_q, Ure_fx[0][ch], q_U_2, 1 << 26 ); check( __Uim_fx[0][ch], __U_q, Uim_fx[0][ch], q_U_2, 1 << 26 ); IF( q_U_1 != 0 ) { q_diff = sub( q_U_2, q_U_1 ); Loading Loading @@ -4106,6 +4351,15 @@ static void eig2x2_fx( move16(); } for ( int i = 0; i < BINAURAL_CHANNELS; ++i ) { for ( int j = 0; j < BINAURAL_CHANNELS; ++j ) { check( __Ure_fx[i][j], __U_q, Ure_fx[i][j], *q_U, 1 << 26 ); check( __Uim_fx[i][j], __U_q, Uim_fx[i][j], *q_U, 1 << 26 ); } } #endif return; } Loading Loading
lib_rend/ivas_dirac_dec_binaural_functions_fx.c +433 −179 Original line number Diff line number Diff line Loading @@ -3537,13 +3537,366 @@ static void check( Word32 cf = computed_fx >> +max( qd, 0 ); Word32 ef = expected_fx >> -min( qd, 0 ); Word32 abs_error = abs( cf - ef ); if ( abs_error >= max_abs_err ) if ( abs_error > max_abs_err ) { assert( false ); } } #endif Word32 __pm_fx; Word16 __pm_q; Word32 __add_fx; Word16 __add_q; Word32 __as[BINAURAL_CHANNELS]; Word32 __s_fx[BINAURAL_CHANNELS]; Word16 __s_q[BINAURAL_CHANNELS]; Word32 __nval_fx[BINAURAL_CHANNELS]; Word16 __nval_q[BINAURAL_CHANNELS]; Word32 __Ure_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; Word32 __Uim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; Word16 __U_q; Word32 __D_fx[BINAURAL_CHANNELS]; Word16 __D_q; Word32 __diff_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; Word16 __diff_q; static void eig2x2_opt( const Word32 E1_fx, /*q_E*/ const Word32 E2_fx, /*q_E*/ Word16 q_E, const Word32 Cre_fx, /*q_C*/ const Word32 Cim_fx, /*q_C*/ Word16 q_C ) { Word32 s_fx[BINAURAL_CHANNELS]; Word32 pm_fx, add_fx; Word32 tmp1, tmp2, e1, e2, c_re, c_im, c0_im, c1_im; Word64 crossSquare_fx, tmp3, tmp4; Word16 q_crossSquare, q_min, q_diff, q_tmp1, q_tmp2, exp, q_e, q_c; Word32 i01, i00, i11, i10; Word64 eps_fx = ( (Word64) EPSILON_MANT ) << 32; Word16 eps_q = 63 - EPSILON_EXP; move32(); move16(); set32_fx( (Word32 *) __Ure_fx, 0, BINAURAL_CHANNELS * BINAURAL_CHANNELS ); set32_fx( (Word32 *) __Uim_fx, 0, BINAURAL_CHANNELS * BINAURAL_CHANNELS ); __Ure_fx[0][0] = ONE_IN_Q30; move32(); __Ure_fx[1][1] = ONE_IN_Q30; move32(); __U_q = Q30; move16(); exp = sub( get_min_scalefactor( Cre_fx, Cim_fx ), 2 ); c_re = L_shl( Cre_fx, exp ); c_im = L_shl( Cim_fx, exp ); q_c = add( q_C, exp ); exp = sub( get_min_scalefactor( E1_fx, E2_fx ), 2 ); e1 = L_shl( E1_fx, exp ); e2 = L_shl( E2_fx, exp ); q_e = add( q_E, exp ); // crossSquare_fx = (c_re * c_re) + (c_im * c_im) // a_fx = (e1 + e2) * (e1 + e2) - 4.0f * ((e1 * e2) - crossSquare_fx) = (e1 - e2)^2 + 4 * crossSquare_fx // pm_fx = 0.5f * sqrtf(max(0.0f, a_fx)) // add_fx = 0.5f * (e1 + e2) tmp1 = L_sub( e1, e2 ); tmp3 = W_mult_32_32( tmp1, tmp1 ); q_tmp1 = add( add( q_e, q_e ), 1 ); if ( !tmp3 ) { q_tmp1 = 63; move16(); } crossSquare_fx = W_mac_32_32( W_mult_32_32( c_re, c_re ), c_im, c_im ); q_crossSquare = add( add( q_c, q_c ), 1 ); if ( !crossSquare_fx ) { q_crossSquare = 63; move16(); } tmp4 = crossSquare_fx; move64(); q_tmp2 = sub( q_crossSquare, 2 ); if ( !tmp4 ) { q_tmp2 = 63; move16(); } q_diff = sub( q_tmp1, q_tmp2 ); q_tmp1 = s_min( q_tmp1, q_tmp2 ); if ( q_diff > 0 ) { tmp3 = W_shr( tmp3, q_diff ); } if ( q_diff < 0 ) { tmp4 = W_shl( tmp4, q_diff ); } tmp3 = W_add( tmp3, tmp4 ); q_diff = W_norm( tmp3 ); tmp3 = W_shl( tmp3, q_diff ); q_tmp1 = add( q_tmp1, q_diff ); // pm_fx = 0.5f * sqrtf(max(0.0f, a_fx)) exp = sub( 63, q_tmp1 ); pm_fx = Sqrt32( L_max( 0, W_extract_h( tmp3 ) ), &exp ); pm_fx = L_shr( pm_fx, 1 ); q_tmp2 = sub( 31, exp ); // add_fx = 0.5 * (e1 + e2) add_fx = L_shr( L_add( e1, e2 ), 1 ); q_tmp1 = q_e; move16(); __pm_fx = pm_fx; // FIXME __pm_q = q_tmp2; // FIXME __add_fx = add_fx; // FIXME __add_q = q_tmp1; // FIXME // D[0] = add + pm; // D[1] = max( 0.0f, add - pm ); q_diff = sub( q_tmp1, q_tmp2 ); tmp1 = add_fx; move32(); if ( q_diff > 0 ) { tmp1 = L_shr( tmp1, q_diff ); } tmp2 = pm_fx; move32(); if ( q_diff < 0 ) { tmp2 = L_shl( tmp2, q_diff ); } __D_fx[0] = L_add( tmp1, tmp2 ); move32(); __D_fx[1] = L_max( L_sub( tmp1, tmp2 ), 0 ); move32(); __D_q = s_min( q_tmp1, q_tmp2 ); move32(); // Numeric case, when input is practically zeros // if ( __D_fx[0] < EPSILON_FX ) if ( LT_32( L_shl_sat( __D_fx[0], sub( 31 - EPSILON_EXP, __D_q ) ), EPSILON_MANT ) ) { return; } // Numeric case, when input is near an identity matrix with a gain tmp1 = Mpy_32_32( INV_1000_Q31, add_fx ); if ( q_diff > 0 ) { tmp1 = L_shr( tmp1, q_diff ); } if ( LT_32( tmp2, tmp1 ) ) { return; } // Eigenvectors q_diff = sub( q_e, __D_q ); q_tmp1 = s_min( q_e, __D_q ); tmp1 = __D_fx[0]; move32(); if ( q_diff > 0 ) { tmp1 = L_shr( tmp1, q_diff ); } tmp2 = __D_fx[1]; move32(); if ( q_diff > 0 ) { tmp2 = L_shr( tmp2, q_diff ); } if ( q_diff < 0 ) { e1 = L_shl( e1, q_diff ); } if ( q_diff < 0 ) { e2 = L_shl( e2, q_diff ); } s_fx[0] = L_sub( tmp1, e1 ); // __D_fx[0] - e1 tmp1 = L_sub( tmp1, e2 ); // __D_fx[0] - e2 s_fx[1] = L_sub( tmp2, e1 ); // __D_fx[1] - e1 tmp2 = L_sub( tmp2, e2 ); // __D_fx[1] - e2 __diff_fx[0][0] = s_fx[0]; __diff_fx[0][1] = tmp1; __diff_fx[1][0] = s_fx[1]; __diff_fx[1][1] = tmp2; __diff_q = q_tmp1; i01 = GT_32( L_abs( tmp1 ), L_abs( s_fx[0] ) ); // fabsf( __D_fx[0] - e2 ) > fabsf( __D_fx[0] - e1 ) i11 = GT_32( L_abs( tmp2 ), L_abs( s_fx[1] ) ); // fabsf( __D_fx[1] - e2 ) > fabsf( __D_fx[1] - e1 ) __as[0] = i01; __as[1] = i11; if ( i01 ) { s_fx[0] = tmp1; move32(); } if ( i11 ) { s_fx[1] = tmp2; move32(); } __s_fx[0] = s_fx[0]; __s_fx[1] = s_fx[1]; __s_q[0] = q_tmp1; __s_q[1] = q_tmp1; // normVal = sqrtf( 1.0f / ( 1e-12f + crossSquare + s * s ) ); Word32 nvalm[BINAURAL_CHANNELS]; Word16 nvalq[BINAURAL_CHANNELS]; q_tmp2 = L_shl( q_tmp1, 1 ); q_min = s_min( q_tmp2, q_crossSquare ); q_min = s_min( q_min, eps_q ); Word64 nval64m[BINAURAL_CHANNELS]; q_diff = L_sub( q_tmp2, q_min ); nval64m[0] = W_shr( W_mult0_32_32( s_fx[0], s_fx[0] ), q_diff ); nval64m[1] = W_shr( W_mult0_32_32( s_fx[1], s_fx[1] ), q_diff ); q_diff = L_sub( q_crossSquare, q_min ); crossSquare_fx = W_shr( crossSquare_fx, q_diff ); nval64m[0] = W_add( nval64m[0], crossSquare_fx ); nval64m[1] = W_add( nval64m[1], crossSquare_fx ); q_diff = L_sub( eps_q, q_min ); eps_fx = W_shr( eps_fx, q_diff ); nval64m[0] = W_add( nval64m[0], eps_fx ); nval64m[1] = W_add( nval64m[1], eps_fx ); q_diff = W_norm( nval64m[0] ); nval64m[0] = W_shl( nval64m[0], q_diff ); nvalq[0] = add( q_min, q_diff ); q_diff = W_norm( nval64m[1] ); nval64m[1] = W_shl( nval64m[1], q_diff ); nvalq[1] = add( q_min, q_diff ); nvalm[0] = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( nval64m[0] ), &exp ); exp = sub( exp, sub( 62, nvalq[0] ) ); nvalm[0] = Sqrt32( nvalm[0], &exp ); nvalq[0] = sub( 31, exp ); nvalm[1] = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( nval64m[1] ), &exp ); exp = sub( exp, sub( 62, nvalq[1] ) ); nvalm[1] = Sqrt32( nvalm[1], &exp ); nvalq[1] = sub( 31, exp ); __nval_fx[0] = nvalm[0]; __nval_q[0] = nvalq[0]; __nval_fx[1] = nvalm[1]; __nval_q[1] = nvalq[1]; q_diff = sub( q_c, q_tmp1 ); q_tmp1 = s_min( q_tmp1, q_c ); if ( q_diff > 0 ) { c_re = L_shr( c_re, q_diff ); } if ( q_diff > 0 ) { c_im = L_shr( c_im, q_diff ); } if ( q_diff < 0 ) { s_fx[0] = L_shl( s_fx[0], q_diff ); } if ( q_diff < 0 ) { s_fx[1] = L_shl( s_fx[1], q_diff ); } q_diff = sub( nvalq[0], nvalq[1] ); q_tmp2 = s_min( nvalq[0], nvalq[1] ); if ( q_diff > 0 ) { nvalm[0] = L_shr( nvalm[0], q_diff ); } if ( q_diff < 0 ) { nvalm[1] = L_shl( nvalm[1], q_diff ); } __U_q = sub( add( q_tmp1, q_tmp2 ), 31 ); i00 = L_sub( 1, i01 ); i10 = L_sub( 1, i11 ); c0_im = c_im; move32(); if ( i00 > 0 ) { c0_im = L_negate( c0_im ); } c1_im = c_im; move32(); if ( i10 > 0 ) { c1_im = L_negate( c1_im ); } __Ure_fx[i00][0] = Mpy_32_32( s_fx[0], nvalm[0] ); move32(); __Ure_fx[i01][0] = Mpy_32_32( c_re, nvalm[0] ); move32(); __Uim_fx[i01][0] = Mpy_32_32( c0_im, nvalm[0] ); move32(); __Ure_fx[i10][1] = Mpy_32_32( s_fx[1], nvalm[1] ); move32(); __Ure_fx[i11][1] = Mpy_32_32( c_re, nvalm[1] ); move32(); __Uim_fx[i11][1] = Mpy_32_32( c1_im, nvalm[1] ); move32(); return; } static void eig2x2_fx( const Word32 E1_fx, /*q_E*/ const Word32 E2_fx, /*q_E*/ Loading @@ -3557,7 +3910,23 @@ static void eig2x2_fx( Word32 D_fx[BINAURAL_CHANNELS], /*q_D*/ Word16 *q_D ) { // #if 0 #if 0 eig2x2_opt( E1_fx, E2_fx, q_E, Cre_fx, Cim_fx, q_C ); Ure_fx[0][0] = __Ure_fx[0][0]; Ure_fx[0][1] = __Ure_fx[0][1]; Ure_fx[1][0] = __Ure_fx[1][0]; Ure_fx[1][1] = __Ure_fx[1][1]; Uim_fx[0][0] = __Uim_fx[0][0]; Uim_fx[0][1] = __Uim_fx[0][1]; Uim_fx[1][0] = __Uim_fx[1][0]; Uim_fx[1][1] = __Uim_fx[1][1]; *q_U = __U_q; D_fx[0] = __D_fx[0]; D_fx[1] = __D_fx[1]; *q_D = __D_q; #else eig2x2_opt( E1_fx, E2_fx, q_E, Cre_fx, Cim_fx, q_C ); Word16 chA, chB, ch; Word32 s_fx, normVal_fx, crossSquare_fx, a_fx, pm_fx, add_fx; Word32 tmp1, tmp2, tmp3, e1, e2, c_re, c_im; Loading Loading @@ -3680,6 +4049,9 @@ static void eig2x2_fx( q_tmp1 = q_e; move16(); check( __pm_fx, __pm_q, pm_fx, q_tmp2, 1 << 26 ); check( __add_fx, __add_q, add_fx, q_tmp1, 1 << 26 ); IF( LT_16( q_tmp1, q_tmp2 ) ) { D_fx[0] = L_add( L_shr( add_fx, 1 ), L_shr( pm_fx, add( sub( q_tmp2, q_tmp1 ), 1 ) ) ); Loading @@ -3698,121 +4070,9 @@ static void eig2x2_fx( *q_D = sub( q_tmp2, 1 ); move16(); } // #else #if 0 Word16 chA, chB, ch; FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) { Ure_fx[chA][chB] = 0; move32(); Uim_fx[chA][chB] = 0; move32(); } } #endif // =================================================================================================== /*crossSquare_fx = (c_re * c_re) + (c_im * c_im) a_fx = (e1 + e2) * (e1 + e2) - 4.0f * ((e1 * e2) - crossSquare_fx) = (e1 - e2)^2 + 4 * crossSquare_fx pm_fx = 0.5f * sqrtf(max(0.0f, a_fx)) add_fx = 0.5f * (e1 + e2)*/ Word16 q1, q2, qm, qd, lshift; // (e1 - e2)^2 -> Q: 2 * q_E q1 = shl( q_E, 1 ); // 4 * ((c_re * c_re) + (c_im * c_im)) -> Q: 2 * q_C - 2 q2 = sub( shl( q_C, 1 ), 2 ); // (e1 - e2)^2 Word32 es = L_sub( E1_fx, E2_fx ); Word64 es2 = W_mult0_32_32( es, es ); lshift = sub( W_norm( es2 ), 1 ); es2 = W_shl( es2, lshift ); q1 = add( q1, lshift ); if ( !es2 ) { q1 = 63; move16(); } // 4 * ((c_re * c_re) + (c_im * c_im)) Word64 cs = W_add( W_mult0_32_32( Cre_fx, Cre_fx ), W_mult0_32_32( Cim_fx, Cim_fx ) ); // 2*q_C-2 lshift = sub( W_norm( cs ), 1 ); cs = W_shl( cs, lshift ); q2 = add( q2, lshift ); if ( !cs ) { q2 = 63; move16(); } #if 0 Word32 crossSquare_fx = (Word32) ( cs >> 32 ); // FIXME Word16 q_crossSquare = 2 * q_C + lshift - 32; // FIXME #endif Word16 csq = 2 * q_C + lshift; // FIXME // a = max(0, (e1 - e2)^2 + 4 * crossSquare_fx) qm = s_min( q1, q2 ); qd = sub( q1, q2 ); Word64 a = W_max( W_add( W_shr( es2, s_max( qd, 0 ) ), W_shl( cs, s_min( qd, 0 ) ) ), 0 ); // pm = 0.5f * sqrtf(a) // a = 0.5f * ( E1 + E2 ); lshift = W_norm( a ); Word32 pm = W_extract_h( W_shl( a, lshift ) ); Word16 e = sub( sub( 63, lshift ), qm ); pm = L_shr( Sqrt32( pm, &e ), 1 ); q2 = sub( 31, e ); check( pm, q2, pm_fx, q_tmp2, 1 << 16 ); Word32 ea = L_add( E1_fx, E2_fx ); lshift = sub( norm_l( ea ), 1 ); ea = L_shl( ea, lshift ); q1 = add( add( q_E, 1 ), lshift ); check( ea, q1, add_fx, q_tmp1, 1 << 16 ); #if 0 Word32 add_fx = ea; // FIXME Word16 q_tmp1 = q1; // FIXME Word32 pm_fx = pm; // FIXME Word16 q_tmp2 = q2; // FIXME #endif // D[0] = add + pm; // D[1] = max( 0.0f, add - pm ); qm = s_min( q1, q2 ); qd = sub( q1, q2 ); ea = L_shr( ea, s_max( qd, 0 ) ); pm = L_shl( pm, s_min( qd, 0 ) ); Word32 d0 = L_add( ea, pm ); Word32 d1 = L_max( L_sub( ea, pm ), 0 ); check( d0, qm, D_fx[0], *q_D, 1 << 16 ); check( d1, qm, D_fx[1], *q_D, 1 << 16 ); #if 0 D_fx[0] = d0; // FIXME D_fx[1] = d1; // FIXME *q_D = qm; // FIXME Word32 tmp1, tmp2, tmp3, e1, e2, s_fx, normVal_fx, c_re, c_im; // FIXME Word16 q_U_1, q_U_2, q_c, q_e, exp, exp_tmp3; // FIXME Word32 epsilon_mant = 1180591621; // FIXME Word16 epsilon_exp = -39; // FIXME exp = sub( get_min_scalefactor( Cre_fx, Cim_fx ), 2 ); // FIXME c_re = L_shl( Cre_fx, exp ); // FIXME c_im = L_shl( Cim_fx, exp ); // FIXME q_c = add( q_C, exp ); // FIXME exp = sub( get_min_scalefactor( E1_fx, E2_fx ), 2 ); // FIXME e1 = L_shl( E1_fx, exp ); // FIXME e2 = L_shl( E2_fx, exp ); // FIXME q_e = add( q_E, exp ); // FIXME #endif // #endif check( __D_fx[0], __D_q, D_fx[0], *q_D, 1 << 26 ); check( __D_fx[1], __D_q, D_fx[1], *q_D, 1 << 26 ); /* Numeric case, when input is practically zeros */ // IF( D_fx[0] < EPSILON_FX ) Loading @@ -3826,6 +4086,15 @@ static void eig2x2_fx( *q_U = Q31; move16(); for ( int i = 0; i < BINAURAL_CHANNELS; ++i ) { for ( int j = 0; j < BINAURAL_CHANNELS; ++j ) { check( __Ure_fx[i][j], __U_q, Ure_fx[i][j], *q_U, 1 << 2 ); check( __Uim_fx[i][j], __U_q, Uim_fx[i][j], *q_U, 1 << 2 ); } } return; } Loading @@ -3843,6 +4112,15 @@ static void eig2x2_fx( *q_U = Q30; move16(); for ( int i = 0; i < BINAURAL_CHANNELS; ++i ) { for ( int j = 0; j < BINAURAL_CHANNELS; ++j ) { check( __Ure_fx[i][j], __U_q, Ure_fx[i][j], *q_U, 1 << 2 ); check( __Uim_fx[i][j], __U_q, Uim_fx[i][j], *q_U, 1 << 2 ); } } return; } } Loading @@ -3857,6 +4135,15 @@ static void eig2x2_fx( *q_U = Q30; move16(); for ( int i = 0; i < BINAURAL_CHANNELS; ++i ) { for ( int j = 0; j < BINAURAL_CHANNELS; ++j ) { check( __Ure_fx[i][j], __U_q, Ure_fx[i][j], *q_U, 1 << 2 ); check( __Uim_fx[i][j], __U_q, Uim_fx[i][j], *q_U, 1 << 2 ); } } return; } } Loading Loading @@ -3885,68 +4172,18 @@ static void eig2x2_fx( move16(); } // D[ch] - E1 // D[ch] - E2 Word32 arg0, arg1, arg2; Word32 z1, z2, ad, s; qm = s_min( *q_D, q_e ); qd = sub( *q_D, q_e ); arg0 = L_shr( D_fx[ch], s_max( qd, 0 ) ); arg1 = L_shl( e1, s_min( qd, 0 ) ); arg2 = L_shl( e2, s_min( qd, 0 ) ); z1 = L_sub( arg0, arg1 ); z2 = L_sub( arg0, arg2 ); check( z1, qm, tmp1, q_tmp1, 1 << 1 ); check( z2, qm, tmp2, q_tmp1, 1 << 1 ); ad = L_sub( L_abs( z2 ), L_abs( z1 ) ); if ( ad > 0 ) { s = z2; } if ( ad <= 0 ) { s = z1; } Word64 epsm = ( (Word64) 1180591621 ) << 32; // FIXME Word16 epsq = 63 - epsilon_exp; // FIXME Word16 sq = qm; Word16 s2q = L_shl( sq, 1 ); qm = s_min( s2q, csq ); qm = s_min( qm, epsq ); Word64 nval64m; Word64 nval32m; Word16 nvalq, nvale; nval64m = W_shr( W_mult0_32_32( s, s ), L_sub( s2q, qm ) ); nval64m = W_add( nval64m, W_shr( cs, L_sub( csq, qm ) ) ); nval64m = W_add( nval64m, W_shr( epsm, L_sub( epsq, qm ) ) ); lshift = W_norm( nval64m ); nval64m = W_shl( nval64m, lshift ); nvalq = add( qm, lshift ); Word32 test0m = W_extract_h( nval64m ); // FIXME Word32 test0q = nvalq; // FIXME nval32m = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( nval64m ), &nvale ); nvale = sub( nvale, sub( Q30, nvalq - 32 ) ); nval32m = Sqrt32( nval32m, &nvale ); nvalq = sub( 31, nvale ); Word32 test1m = nval32m; // FIXME Word32 test1q = nvalq; // FIXME check( __diff_fx[ch][0], __diff_q, tmp1, q_tmp1, 1 << 26 ); check( __diff_fx[ch][1], __diff_q, tmp2, q_tmp1, 1 << 26 ); IF( GT_32( L_abs( tmp2 ), L_abs( tmp1 ) ) ) { check( __as[ch], 0, 1, 0, 0 ); s_fx = tmp2; move32(); check( __s_fx[ch], __s_q[ch], s_fx, q_tmp1, 1 << 26 ); exp = sub( norm_l( s_fx ), 1 ); tmp2 = Mpy_32_32( s_fx, s_fx ); q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 ); Loading @@ -3956,14 +4193,12 @@ static void eig2x2_fx( tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); check( test0m, test0q, tmp3, 31 - exp_tmp3, 1 << 18 ); // TODO: remove tmp2 = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, tmp3, &exp ); exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) ); normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2 q_tmp2 = sub( 31, exp ); check( test1m, test1q, normVal_fx, q_tmp2, 1 << 18 ); // TODO: remove check( __nval_fx[ch], __nval_q[ch], normVal_fx, q_tmp2, 1 << 26 ); q_diff = sub( q_c, q_tmp1 ); IF( q_diff > 0 ) Loading @@ -3988,6 +4223,10 @@ static void eig2x2_fx( move32(); q_U_1 = sub( add( q_tmp1, q_tmp2 ), 31 ); check( __Ure_fx[0][ch], __U_q, Ure_fx[0][ch], q_U_1, 1 << 26 ); check( __Ure_fx[1][ch], __U_q, Ure_fx[1][ch], q_U_1, 1 << 26 ); check( __Uim_fx[1][ch], __U_q, Uim_fx[1][ch], q_U_1, 1 << 26 ); IF( q_U_2 != 0 ) { q_diff = sub( q_U_2, q_U_1 ); Loading Loading @@ -4019,9 +4258,13 @@ static void eig2x2_fx( } ELSE { check( __as[ch], 0, 0, 0, 0 ); s_fx = tmp1; move32(); check( __s_fx[ch], __s_q[ch], s_fx, q_tmp1, 1 << 26 ); exp = sub( norm_l( s_fx ), 1 ); tmp2 = Mpy_32_32( s_fx, s_fx ); q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 ); Loading @@ -4031,14 +4274,12 @@ static void eig2x2_fx( tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); check( test0m, test0q, tmp3, 31 - exp_tmp3, 1 << 18 ); // TODO: remove tmp2 = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, tmp3, &exp ); exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) ); normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2 q_tmp2 = sub( 31, exp ); check( test1m, test1q, normVal_fx, q_tmp2, 1 << 18 ); // TODO: remove check( __nval_fx[ch], __nval_q[ch], normVal_fx, q_tmp2, 1 << 26 ); q_diff = sub( q_c, q_tmp1 ); IF( q_diff > 0 ) Loading @@ -4063,6 +4304,10 @@ static void eig2x2_fx( move32(); q_U_2 = sub( add( q_tmp1, q_tmp2 ), 31 ); check( __Ure_fx[1][ch], __U_q, Ure_fx[1][ch], q_U_2, 1 << 26 ); check( __Ure_fx[0][ch], __U_q, Ure_fx[0][ch], q_U_2, 1 << 26 ); check( __Uim_fx[0][ch], __U_q, Uim_fx[0][ch], q_U_2, 1 << 26 ); IF( q_U_1 != 0 ) { q_diff = sub( q_U_2, q_U_1 ); Loading Loading @@ -4106,6 +4351,15 @@ static void eig2x2_fx( move16(); } for ( int i = 0; i < BINAURAL_CHANNELS; ++i ) { for ( int j = 0; j < BINAURAL_CHANNELS; ++j ) { check( __Ure_fx[i][j], __U_q, Ure_fx[i][j], *q_U, 1 << 26 ); check( __Uim_fx[i][j], __U_q, Uim_fx[i][j], *q_U, 1 << 26 ); } } #endif return; } Loading