Commit cfeceee1 authored by Fabian Bauer's avatar Fabian Bauer
Browse files

synched FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic with be branch, changed func1

parent 4cf3a559
Loading
Loading
Loading
Loading
Loading
+37 −90
Original line number Diff line number Diff line
@@ -6697,9 +6697,14 @@ static inline Word64 wmac_3216( Word64 x1, Word32 x2, Word16 x3 )
    return W_mac_32_16( x1, x2, x3 );
}

static inline Word64 wmsu_3216( Word64 x1, Word32 x2, Word16 x3 )
static inline Word64 finalSat16( Word64 W_tmpx, Word64 W_tmpy )
{
    return W_msu_32_16( x1, x2, x3 );
    return W_sat_l( W_shr( W_add( W_tmpX, W_shl( W_tmpY, 2 - 16 + 3 ) ), 3 ) ); /*Q_input_fx + 11*/
}

static inline Word64 finalSat32( Word64 W_tmpx, Word64 W_tmpy )
{
    return W_sat_l( W_shr( W_add( W_tmpX, W_shl( W_tmpY, 2 - 16 + 3 + 16 ) ), 3 + 16 ) );
}

inline static void elliptic_bpf_48k_generic_func1( Word16 *input16_fx, Word32 *input32_fx, Word32 *L_tmp, const Word16 full_band_bpf_fx[][5], Word16 IsUpsampled3, Word32 *L_tmpMax )
@@ -6713,12 +6718,16 @@ inline static void elliptic_bpf_48k_generic_func1( Word16 *input16_fx, Word32 *i
    if ( input16_fx > 0 )
    {
        Word64 ( *wmac )( Word64, Word16, Word16 );
        Word64 ( *wmsu )( Word64, Word32, Word16 );
        Word64 ( *finalSat )( Word64, Word64 );
        wmac = wmac_1616;
        finalSat = finalSat16;
    }
    if ( input32_fx > 0 )
    {
        Word64 ( *wmac )( Word64, Word32, Word16 );
        Word64 ( *wmsu )( Word64, Word32, Word16 );
        Word64 ( *finalSat )( Word64, Word64 );
        wmac = wmac_3216;
        finalSat = finalSat32;
    }

    IF( !IsUpsampled3 )
@@ -6726,16 +6735,16 @@ inline static void elliptic_bpf_48k_generic_func1( Word16 *input16_fx, Word32 *i
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2
        FOR( i = 0; i < L_FRAME48k; i++ )
        {
            W_tmpX = W_mac_16_16( 0, input_fx[i - 4], full_band_bpf_fx[0][4] );
            W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 3], full_band_bpf_fx[0][3] );
            W_tmpX = wmac( 0, input_fx[i - 4], full_band_bpf_fx[0][4] );
            W_tmpX = wmac( W_tmpX, input_fx[i - 3], full_band_bpf_fx[0][3] );
            W_tmpY = W_msu_32_16( 0, L_tmp[i - 1], full_band_bpf_fx[3][1] );
            W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 2], full_band_bpf_fx[0][2] );
            W_tmpX = wmac( W_tmpX, input_fx[i - 2], full_band_bpf_fx[0][2] );
            W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 2], full_band_bpf_fx[3][2] );
            W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 1], full_band_bpf_fx[0][1] );
            W_tmpX = wmac( W_tmpX, input_fx[i - 1], full_band_bpf_fx[0][1] );
            W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 3], full_band_bpf_fx[3][3] );
            W_tmpX = W_mac_16_16( W_tmpX, input_fx[i], full_band_bpf_fx[0][0] );
            W_tmpX = wmac( W_tmpX, input_fx[i], full_band_bpf_fx[0][0] );
            W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 4], full_band_bpf_fx[3][4] );
            L_tmp[i] = W_sat_l( W_shr( W_add( W_tmpX, W_shl( W_tmpY, 2 - 16 + 3 ) ), 3 ) ); /*Q_input_fx + 11*/
            L_tmp[i] = finalSat( W_tmpX, W_tmpY ); /*Q_input_fx + 11*/
            move32();
            if ( *L_tmpMax > 0 )
            {
@@ -6747,27 +6756,7 @@ inline static void elliptic_bpf_48k_generic_func1( Word16 *input16_fx, Word32 *i
            }
        }
#else
        FOR( i = 0; i < L_FRAME48k; i++ )
        {
            L_tmpX = L_shr( L_mult( input_fx[i - 4], full_band_bpf_fx[0][4] ), 3 );                             /*Q_input_fx + 13 + 1 - 3*/
            L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 3], full_band_bpf_fx[0][3] ), 3 ), L_tmpX );        /*Q_input_fx + 13 + 1 - 3*/
            L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 1], full_band_bpf_fx[3][1] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
            L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 2], full_band_bpf_fx[0][2] ), 3 ), L_tmpX );        /*Q_input_fx + 13 + 1 - 3*/
            L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 2], full_band_bpf_fx[3][2] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
            L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 1], full_band_bpf_fx[0][1] ), 3 ), L_tmpX );        /*Q_input_fx + 13 + 1 - 3*/
            L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 3], full_band_bpf_fx[3][3] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
            L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i], full_band_bpf_fx[0][0] ), 3 ), L_tmpX );            /*Q_input_fx + 13 + 1 - 3*/
            L_tmp[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13  -15 +2*/
            move32();
            if ( *L_tmpMax > 0 )
            {
                L_tmpAbs = L_abs( L_tmp[i] );
            }
            if ( *L_tmpMax > 0 )
            {
                L_tmpMax2 = L_max( L_tmpMax2, L_tmpAbs );
            }
        }
        assert( 0 ); /*kein bock*/
#endif
    } /*IsUpsampled3*/
    ELSE
@@ -6829,58 +6818,7 @@ inline static void elliptic_bpf_48k_generic_func1( Word16 *input16_fx, Word32 *i
            i++;
        }
#else
        FOR( i = 0; i < L_FRAME48k; )
        {
            L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 3], full_band_bpf_fx[0][3] ), 3 ), 0 );             /*Q_input_fx + 13 + 1 - 3*/
            L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 1], full_band_bpf_fx[3][1] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
            L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 2], full_band_bpf_fx[3][2] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
            L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 3], full_band_bpf_fx[3][3] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
            L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i], full_band_bpf_fx[0][0] ), 3 ), L_tmpX );            /*Q_input_fx + 13 + 1 - 3*/
            L_tmp[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13  -15 +2*/
            move32();
            if ( *L_tmpMax > 0 )
            {
                L_tmpAbs = L_abs( L_tmp[i] );
            }
            if ( *L_tmpMax > 0 )
            {
                L_tmpMax2 = L_max( L_tmpMax2, L_tmpAbs );
            }
            i++;

            L_tmpX = L_shr( L_mult( input_fx[i - 4], full_band_bpf_fx[0][4] ), 3 );                             /*Q_input_fx + 13 + 1 - 3*/
            L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 1], full_band_bpf_fx[3][1] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
            L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 2], full_band_bpf_fx[3][2] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
            L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 1], full_band_bpf_fx[0][1] ), 3 ), L_tmpX );        /*Q_input_fx + 13 + 1 - 3*/
            L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 3], full_band_bpf_fx[3][3] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
            L_tmp[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13  -15 +2*/
            move32();
            if ( *L_tmpMax > 0 )
            {
                L_tmpAbs = L_abs( L_tmp[i] );
            }
            if ( *L_tmpMax > 0 )
            {
                L_tmpMax2 = L_max( L_tmpMax2, L_tmpAbs );
            }
            i++;

            L_tmpX = L_sub_sat( 0, L_shl_sat( Mult_32_16( L_tmp[i - 1], full_band_bpf_fx[3][1] ), 2 ) );        /*Q_input_fx + 11 + 13  -15 +2*/
            L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 2], full_band_bpf_fx[0][2] ), 3 ), L_tmpX );        /*Q_input_fx + 13 + 1 - 3*/
            L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 2], full_band_bpf_fx[3][2] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
            L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 3], full_band_bpf_fx[3][3] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
            L_tmp[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13  -15 +2*/
            move32();
            if ( *L_tmpMax > 0 )
            {
                L_tmpAbs = L_abs( L_tmp[i] );
            }
            if ( *L_tmpMax > 0 )
            {
                L_tmpMax2 = L_max( L_tmpMax2, L_tmpAbs );
            }
            i++;
        }
        assert( 0 ); /*kein bock*/
#endif /*#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2*/
    }  /*IsUpsampled3*/
    *L_tmpMax = L_tmpMax2;
@@ -6932,32 +6870,41 @@ void elliptic_bpf_48k_generic_fx(
    Word32 memory2_fx_2[4], memory2_fx_3[4];
#endif

    FOR( i = 0; i < 4; i++ )
    {
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic
    {
    FOR( i = 0; i < 4; i++ )
        memory_fx0 = extract_l( memory_fx2[0][i] );
        input_fx[i - 4] = shl_sat( memory_fx0, sub( *Q_input_fx, memory_fx_Q[0] ) );
        L_tmp[i - 4] = L_shl_sat( memory_fx2[1][i], sub( add( *Q_input_fx, 11 ), memory_fx_Q[1] ) );
        L_tmp2[i - 4] = L_shl_sat( memory_fx2[2][i], sub( add( *Q_input_fx, 6 ), memory_fx_Q[2] ) );
        // memory2_fx[3][i] = L_shl_sat( memory_fx2[3][i], sub( add( *Q_input_fx, 1 ), memory_fx_Q[3] ) );
        move32();
        move32();
        move32();
        move32();
        move32();
    }
#else
    FOR( i = 0; i < 4; i++ )
    {
        memory_fx0[0][i] = extract_l( memory_fx2[0][i] );
        memory_fx[0][i] = shl_sat( memory_fx0[0][i], sub( *Q_input_fx, memory_fx_Q[0] ) );
        memory2_fx[1][i] = L_shl_sat( memory_fx2[1][i], sub( add( *Q_input_fx, 11 ), memory_fx_Q[1] ) );
        memory2_fx[2][i] = L_shl_sat( memory_fx2[2][i], sub( add( *Q_input_fx, 6 ), memory_fx_Q[2] ) );
        memory2_fx[3][i] = L_shl_sat( memory_fx2[3][i], sub( add( *Q_input_fx, 1 ), memory_fx_Q[3] ) );
#endif
        move32();
        move32();
        move32();
        move32();
        move32();
    }
    }
#endif

#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic
    L_tmpMax = L_add( 0, 0 );
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_func1
    elliptic_bpf_48k_generic_func1( input_fx, L_tmp, &full_band_bpf_fx[0], IsUpsampled3, 0 );
    elliptic_bpf_48k_generic_func1( input_fx, 0, L_tmp, &full_band_bpf_fx[0], IsUpsampled3, 0 );
#else /*FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_func1*/
    IF( !IsUpsampled3 )
    {
@@ -7141,7 +7088,7 @@ void elliptic_bpf_48k_generic_fx(
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic
    L_tmpMax = L_add( 0, 0 );
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_func1
    elliptic_bpf_48k_generic_func1( L_tmp, L_tmp2, &full_band_bpf_fx[1], 0, &L_tmpMax );
    elliptic_bpf_48k_generic_func1( 0, L_tmp, L_tmp2, &full_band_bpf_fx[1], 0, &L_tmpMax );
#else /*FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_func1*/
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2
    IF( isIVAS )
@@ -7270,7 +7217,7 @@ void elliptic_bpf_48k_generic_fx(

    L_tmpMax = L_add( 0, 0 );
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_func1 /*use subfunc*/
    elliptic_bpf_48k_generic_func1( L_tmp2, L_output, &full_band_bpf_fx[2], 0, &L_tmpMax );
    elliptic_bpf_48k_generic_func1( 0, L_tmp2, L_output, &full_band_bpf_fx[2], 0, &L_tmpMax );
#else /*FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_func1*/
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2
    IF( isIVAS )