Loading lib_rend/ivas_dirac_dec_binaural_functions_fx.c +54 −58 Original line number Diff line number Diff line Loading @@ -3519,11 +3519,12 @@ static void eig2x2_fx( Word16 *q_D ) { #ifdef NONBE_OPT_2193_EIG2X2 Word32 s_fx[BINAURAL_CHANNELS]; Word32 pm_fx, add_fx; Word32 tmp1, tmp2, e1, e2, c_re, c_im, c0_im, c1_im; Word32 condition, s0_fx, s1_fx, nval0_fx, nval1_fx; Word64 crossSquare_fx, tmp3, tmp4; Word16 q_crossSquare, q_min, q_diff, q_tmp1, q_tmp2, exp, q_e, q_c; Word16 nval0_q, nval1_q; Word32 i01, i00, i11, i10; Word64 eps_fx = ( (Word64) EPSILON_MANT ) << 32; Word16 eps_q = 63 - EPSILON_EXP; Loading Loading @@ -3634,8 +3635,8 @@ static void eig2x2_fx( // Numeric case, when input is practically zeros // if ( D_fx[0] < EPSILON_FX ) if ( LT_32( L_shl_sat( D_fx[0], sub( 31 - EPSILON_EXP, *q_D ) ), EPSILON_MANT ) ) condition = LT_32( L_shl_sat( D_fx[0], sub( 31 - EPSILON_EXP, *q_D ) ), EPSILON_MANT ); if ( condition != 0 ) { return; } Loading @@ -3646,8 +3647,8 @@ static void eig2x2_fx( { tmp1 = L_shr( tmp1, q_diff ); } if ( LT_32( tmp2, tmp1 ) ) condition = LT_32( tmp2, tmp1 ); if ( condition != 0 ) { return; } Loading Loading @@ -3681,84 +3682,79 @@ static void eig2x2_fx( e2 = L_shl( e2, q_diff ); } s_fx[0] = L_sub( tmp1, e1 ); // D_fx[0] - e1 s0_fx = L_sub( tmp1, e1 ); // D_fx[0] - e1 tmp1 = L_sub( tmp1, e2 ); // D_fx[0] - e2 s_fx[1] = L_sub( tmp2, e1 ); // D_fx[1] - e1 s1_fx = L_sub( tmp2, e1 ); // D_fx[1] - e1 tmp2 = L_sub( tmp2, e2 ); // D_fx[1] - e2 i01 = GT_32( L_abs( tmp1 ), L_abs( s_fx[0] ) ); // fabsf( D_fx[0] - e2 ) > fabsf( D_fx[0] - e1 ) i11 = GT_32( L_abs( tmp2 ), L_abs( s_fx[1] ) ); // fabsf( D_fx[1] - e2 ) > fabsf( D_fx[1] - e1 ) i01 = GT_32( L_abs( tmp1 ), L_abs( s0_fx ) ); // fabsf( D_fx[0] - e2 ) > fabsf( D_fx[0] - e1 ) i11 = GT_32( L_abs( tmp2 ), L_abs( s1_fx ) ); // fabsf( D_fx[1] - e2 ) > fabsf( D_fx[1] - e1 ) if ( i01 ) { s_fx[0] = tmp1; s0_fx = tmp1; move32(); } if ( i11 ) { s_fx[1] = tmp2; s1_fx = tmp2; move32(); } // normVal = sqrtf( 1.0f / ( 1e-12f + crossSquare + s * s ) ); Word32 nvalm[BINAURAL_CHANNELS]; Word16 nvalq[BINAURAL_CHANNELS]; q_tmp2 = shl( q_tmp1, 1 ); q_min = s_min( q_tmp2, q_crossSquare ); q_min = s_min( q_min, eps_q ); Word64 nval64m[BINAURAL_CHANNELS]; q_diff = sub( q_tmp2, q_min ); nval64m[0] = W_shr( W_mult0_32_32( s_fx[0], s_fx[0] ), q_diff ); nval64m[1] = W_shr( W_mult0_32_32( s_fx[1], s_fx[1] ), q_diff ); tmp3 = W_shr( W_mult0_32_32( s0_fx, s0_fx ), q_diff ); tmp4 = W_shr( W_mult0_32_32( s1_fx, s1_fx ), q_diff ); q_diff = sub( q_crossSquare, q_min ); crossSquare_fx = W_shr( crossSquare_fx, q_diff ); nval64m[0] = W_add( nval64m[0], crossSquare_fx ); nval64m[1] = W_add( nval64m[1], crossSquare_fx ); tmp3 = W_add( tmp3, crossSquare_fx ); tmp4 = W_add( tmp4, crossSquare_fx ); q_diff = sub( eps_q, q_min ); eps_fx = W_shr( eps_fx, q_diff ); nval64m[0] = W_add( nval64m[0], eps_fx ); nval64m[1] = W_add( nval64m[1], eps_fx ); tmp3 = W_add( tmp3, eps_fx ); tmp4 = W_add( tmp4, eps_fx ); q_diff = W_norm( nval64m[0] ); nval64m[0] = W_shl( nval64m[0], q_diff ); nvalq[0] = add( q_min, q_diff ); q_diff = W_norm( tmp3 ); tmp3 = W_shl( tmp3, q_diff ); nval0_q = add( q_min, q_diff ); q_diff = W_norm( nval64m[1] ); nval64m[1] = W_shl( nval64m[1], q_diff ); nvalq[1] = add( q_min, q_diff ); q_diff = W_norm( tmp4 ); tmp4 = W_shl( tmp4, q_diff ); nval1_q = add( q_min, q_diff ); // nvalm[0] = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( nval64m[0] ), &exp ); // exp = sub( exp, sub( 62, nvalq[0] ) ); // nval0_fx = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( tmp3 ), &exp ); // exp = sub( exp, sub( 62, nval0_q ) ); // // is equivalent to: // // nvalm[0] = div_w_newton( ONE_IN_Q30, W_extract_h( nval64m[0] ) ); // exp = sub( nvalq[0], 61 ); // nval0_fx = div_w_newton( ONE_IN_Q30, W_extract_h( tmp3 ) ); // exp = sub( nval0_q, 61 ); nvalm[0] = eig2x2_div_fx( ONE_IN_Q30, W_extract_h( nval64m[0] ) ); exp = sub( nvalq[0], 61 ); nvalm[0] = Sqrt32( nvalm[0], &exp ); nvalq[0] = sub( 31, exp ); nval0_fx = eig2x2_div_fx( ONE_IN_Q30, W_extract_h( tmp3 ) ); exp = sub( nval0_q, 61 ); nval0_fx = Sqrt32( nval0_fx, &exp ); nval0_q = sub( 31, exp ); // nvalm[1] = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( nval64m[1] ), &exp ); // exp = sub( exp, sub( 62, nvalq[1] ) ); // nval1_fx = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( tmp4 ), &exp ); // exp = sub( exp, sub( 62, nval1_q ) ); // // is equivalent to: // // nvalm[1] = div_w_newton( ONE_IN_Q30, W_extract_h( nval64m[1] ) ); // exp = sub( nvalq[1], 61 ); // nval1_fx = div_w_newton( ONE_IN_Q30, W_extract_h( tmp4 ) ); // exp = sub( nval1_q, 61 ); nvalm[1] = eig2x2_div_fx( ONE_IN_Q30, W_extract_h( nval64m[1] ) ); exp = sub( nvalq[1], 61 ); nvalm[1] = Sqrt32( nvalm[1], &exp ); nvalq[1] = sub( 31, exp ); nval1_fx = eig2x2_div_fx( ONE_IN_Q30, W_extract_h( tmp4 ) ); exp = sub( nval1_q, 61 ); nval1_fx = Sqrt32( nval1_fx, &exp ); nval1_q = sub( 31, exp ); q_diff = sub( q_c, q_tmp1 ); q_tmp1 = s_min( q_tmp1, q_c ); Loading @@ -3775,25 +3771,25 @@ static void eig2x2_fx( if ( q_diff < 0 ) { s_fx[0] = L_shl( s_fx[0], q_diff ); s0_fx = L_shl( s0_fx, q_diff ); } if ( q_diff < 0 ) { s_fx[1] = L_shl( s_fx[1], q_diff ); s1_fx = L_shl( s1_fx, q_diff ); } q_diff = sub( nvalq[0], nvalq[1] ); q_tmp2 = s_min( nvalq[0], nvalq[1] ); q_diff = sub( nval0_q, nval1_q ); q_tmp2 = s_min( nval0_q, nval1_q ); if ( q_diff > 0 ) { nvalm[0] = L_shr( nvalm[0], q_diff ); nval0_fx = L_shr( nval0_fx, q_diff ); } if ( q_diff < 0 ) { nvalm[1] = L_shl( nvalm[1], q_diff ); nval1_fx = L_shl( nval1_fx, q_diff ); } *q_U = sub( add( q_tmp1, q_tmp2 ), 31 ); Loading @@ -3815,18 +3811,18 @@ static void eig2x2_fx( c1_im = L_negate( c1_im ); } Ure_fx[i00][0] = Mpy_32_32( s_fx[0], nvalm[0] ); Ure_fx[i00][0] = Mpy_32_32( s0_fx, nval0_fx ); move32(); Ure_fx[i01][0] = Mpy_32_32( c_re, nvalm[0] ); Ure_fx[i01][0] = Mpy_32_32( c_re, nval0_fx ); move32(); Uim_fx[i01][0] = Mpy_32_32( c0_im, nvalm[0] ); Uim_fx[i01][0] = Mpy_32_32( c0_im, nval0_fx ); move32(); Ure_fx[i10][1] = Mpy_32_32( s_fx[1], nvalm[1] ); Ure_fx[i10][1] = Mpy_32_32( s1_fx, nval1_fx ); move32(); Ure_fx[i11][1] = Mpy_32_32( c_re, nvalm[1] ); Ure_fx[i11][1] = Mpy_32_32( c_re, nval1_fx ); move32(); Uim_fx[i11][1] = Mpy_32_32( c1_im, nvalm[1] ); Uim_fx[i11][1] = Mpy_32_32( c1_im, nval1_fx ); move32(); #else Word16 chA, chB, ch; Loading Loading
lib_rend/ivas_dirac_dec_binaural_functions_fx.c +54 −58 Original line number Diff line number Diff line Loading @@ -3519,11 +3519,12 @@ static void eig2x2_fx( Word16 *q_D ) { #ifdef NONBE_OPT_2193_EIG2X2 Word32 s_fx[BINAURAL_CHANNELS]; Word32 pm_fx, add_fx; Word32 tmp1, tmp2, e1, e2, c_re, c_im, c0_im, c1_im; Word32 condition, s0_fx, s1_fx, nval0_fx, nval1_fx; Word64 crossSquare_fx, tmp3, tmp4; Word16 q_crossSquare, q_min, q_diff, q_tmp1, q_tmp2, exp, q_e, q_c; Word16 nval0_q, nval1_q; Word32 i01, i00, i11, i10; Word64 eps_fx = ( (Word64) EPSILON_MANT ) << 32; Word16 eps_q = 63 - EPSILON_EXP; Loading Loading @@ -3634,8 +3635,8 @@ static void eig2x2_fx( // Numeric case, when input is practically zeros // if ( D_fx[0] < EPSILON_FX ) if ( LT_32( L_shl_sat( D_fx[0], sub( 31 - EPSILON_EXP, *q_D ) ), EPSILON_MANT ) ) condition = LT_32( L_shl_sat( D_fx[0], sub( 31 - EPSILON_EXP, *q_D ) ), EPSILON_MANT ); if ( condition != 0 ) { return; } Loading @@ -3646,8 +3647,8 @@ static void eig2x2_fx( { tmp1 = L_shr( tmp1, q_diff ); } if ( LT_32( tmp2, tmp1 ) ) condition = LT_32( tmp2, tmp1 ); if ( condition != 0 ) { return; } Loading Loading @@ -3681,84 +3682,79 @@ static void eig2x2_fx( e2 = L_shl( e2, q_diff ); } s_fx[0] = L_sub( tmp1, e1 ); // D_fx[0] - e1 s0_fx = L_sub( tmp1, e1 ); // D_fx[0] - e1 tmp1 = L_sub( tmp1, e2 ); // D_fx[0] - e2 s_fx[1] = L_sub( tmp2, e1 ); // D_fx[1] - e1 s1_fx = L_sub( tmp2, e1 ); // D_fx[1] - e1 tmp2 = L_sub( tmp2, e2 ); // D_fx[1] - e2 i01 = GT_32( L_abs( tmp1 ), L_abs( s_fx[0] ) ); // fabsf( D_fx[0] - e2 ) > fabsf( D_fx[0] - e1 ) i11 = GT_32( L_abs( tmp2 ), L_abs( s_fx[1] ) ); // fabsf( D_fx[1] - e2 ) > fabsf( D_fx[1] - e1 ) i01 = GT_32( L_abs( tmp1 ), L_abs( s0_fx ) ); // fabsf( D_fx[0] - e2 ) > fabsf( D_fx[0] - e1 ) i11 = GT_32( L_abs( tmp2 ), L_abs( s1_fx ) ); // fabsf( D_fx[1] - e2 ) > fabsf( D_fx[1] - e1 ) if ( i01 ) { s_fx[0] = tmp1; s0_fx = tmp1; move32(); } if ( i11 ) { s_fx[1] = tmp2; s1_fx = tmp2; move32(); } // normVal = sqrtf( 1.0f / ( 1e-12f + crossSquare + s * s ) ); Word32 nvalm[BINAURAL_CHANNELS]; Word16 nvalq[BINAURAL_CHANNELS]; q_tmp2 = shl( q_tmp1, 1 ); q_min = s_min( q_tmp2, q_crossSquare ); q_min = s_min( q_min, eps_q ); Word64 nval64m[BINAURAL_CHANNELS]; q_diff = sub( q_tmp2, q_min ); nval64m[0] = W_shr( W_mult0_32_32( s_fx[0], s_fx[0] ), q_diff ); nval64m[1] = W_shr( W_mult0_32_32( s_fx[1], s_fx[1] ), q_diff ); tmp3 = W_shr( W_mult0_32_32( s0_fx, s0_fx ), q_diff ); tmp4 = W_shr( W_mult0_32_32( s1_fx, s1_fx ), q_diff ); q_diff = sub( q_crossSquare, q_min ); crossSquare_fx = W_shr( crossSquare_fx, q_diff ); nval64m[0] = W_add( nval64m[0], crossSquare_fx ); nval64m[1] = W_add( nval64m[1], crossSquare_fx ); tmp3 = W_add( tmp3, crossSquare_fx ); tmp4 = W_add( tmp4, crossSquare_fx ); q_diff = sub( eps_q, q_min ); eps_fx = W_shr( eps_fx, q_diff ); nval64m[0] = W_add( nval64m[0], eps_fx ); nval64m[1] = W_add( nval64m[1], eps_fx ); tmp3 = W_add( tmp3, eps_fx ); tmp4 = W_add( tmp4, eps_fx ); q_diff = W_norm( nval64m[0] ); nval64m[0] = W_shl( nval64m[0], q_diff ); nvalq[0] = add( q_min, q_diff ); q_diff = W_norm( tmp3 ); tmp3 = W_shl( tmp3, q_diff ); nval0_q = add( q_min, q_diff ); q_diff = W_norm( nval64m[1] ); nval64m[1] = W_shl( nval64m[1], q_diff ); nvalq[1] = add( q_min, q_diff ); q_diff = W_norm( tmp4 ); tmp4 = W_shl( tmp4, q_diff ); nval1_q = add( q_min, q_diff ); // nvalm[0] = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( nval64m[0] ), &exp ); // exp = sub( exp, sub( 62, nvalq[0] ) ); // nval0_fx = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( tmp3 ), &exp ); // exp = sub( exp, sub( 62, nval0_q ) ); // // is equivalent to: // // nvalm[0] = div_w_newton( ONE_IN_Q30, W_extract_h( nval64m[0] ) ); // exp = sub( nvalq[0], 61 ); // nval0_fx = div_w_newton( ONE_IN_Q30, W_extract_h( tmp3 ) ); // exp = sub( nval0_q, 61 ); nvalm[0] = eig2x2_div_fx( ONE_IN_Q30, W_extract_h( nval64m[0] ) ); exp = sub( nvalq[0], 61 ); nvalm[0] = Sqrt32( nvalm[0], &exp ); nvalq[0] = sub( 31, exp ); nval0_fx = eig2x2_div_fx( ONE_IN_Q30, W_extract_h( tmp3 ) ); exp = sub( nval0_q, 61 ); nval0_fx = Sqrt32( nval0_fx, &exp ); nval0_q = sub( 31, exp ); // nvalm[1] = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( nval64m[1] ), &exp ); // exp = sub( exp, sub( 62, nvalq[1] ) ); // nval1_fx = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, W_extract_h( tmp4 ), &exp ); // exp = sub( exp, sub( 62, nval1_q ) ); // // is equivalent to: // // nvalm[1] = div_w_newton( ONE_IN_Q30, W_extract_h( nval64m[1] ) ); // exp = sub( nvalq[1], 61 ); // nval1_fx = div_w_newton( ONE_IN_Q30, W_extract_h( tmp4 ) ); // exp = sub( nval1_q, 61 ); nvalm[1] = eig2x2_div_fx( ONE_IN_Q30, W_extract_h( nval64m[1] ) ); exp = sub( nvalq[1], 61 ); nvalm[1] = Sqrt32( nvalm[1], &exp ); nvalq[1] = sub( 31, exp ); nval1_fx = eig2x2_div_fx( ONE_IN_Q30, W_extract_h( tmp4 ) ); exp = sub( nval1_q, 61 ); nval1_fx = Sqrt32( nval1_fx, &exp ); nval1_q = sub( 31, exp ); q_diff = sub( q_c, q_tmp1 ); q_tmp1 = s_min( q_tmp1, q_c ); Loading @@ -3775,25 +3771,25 @@ static void eig2x2_fx( if ( q_diff < 0 ) { s_fx[0] = L_shl( s_fx[0], q_diff ); s0_fx = L_shl( s0_fx, q_diff ); } if ( q_diff < 0 ) { s_fx[1] = L_shl( s_fx[1], q_diff ); s1_fx = L_shl( s1_fx, q_diff ); } q_diff = sub( nvalq[0], nvalq[1] ); q_tmp2 = s_min( nvalq[0], nvalq[1] ); q_diff = sub( nval0_q, nval1_q ); q_tmp2 = s_min( nval0_q, nval1_q ); if ( q_diff > 0 ) { nvalm[0] = L_shr( nvalm[0], q_diff ); nval0_fx = L_shr( nval0_fx, q_diff ); } if ( q_diff < 0 ) { nvalm[1] = L_shl( nvalm[1], q_diff ); nval1_fx = L_shl( nval1_fx, q_diff ); } *q_U = sub( add( q_tmp1, q_tmp2 ), 31 ); Loading @@ -3815,18 +3811,18 @@ static void eig2x2_fx( c1_im = L_negate( c1_im ); } Ure_fx[i00][0] = Mpy_32_32( s_fx[0], nvalm[0] ); Ure_fx[i00][0] = Mpy_32_32( s0_fx, nval0_fx ); move32(); Ure_fx[i01][0] = Mpy_32_32( c_re, nvalm[0] ); Ure_fx[i01][0] = Mpy_32_32( c_re, nval0_fx ); move32(); Uim_fx[i01][0] = Mpy_32_32( c0_im, nvalm[0] ); Uim_fx[i01][0] = Mpy_32_32( c0_im, nval0_fx ); move32(); Ure_fx[i10][1] = Mpy_32_32( s_fx[1], nvalm[1] ); Ure_fx[i10][1] = Mpy_32_32( s1_fx, nval1_fx ); move32(); Ure_fx[i11][1] = Mpy_32_32( c_re, nvalm[1] ); Ure_fx[i11][1] = Mpy_32_32( c_re, nval1_fx ); move32(); Uim_fx[i11][1] = Mpy_32_32( c1_im, nvalm[1] ); Uim_fx[i11][1] = Mpy_32_32( c1_im, nval1_fx ); move32(); #else Word16 chA, chB, ch; Loading