Commit 43a92380 authored by Nicolas Roussin's avatar Nicolas Roussin
Browse files

Optimize ivas_filter_process_fx part 3.

parent 85284408
Loading
Loading
Loading
Loading
+71 −117
Original line number Diff line number Diff line
@@ -43,7 +43,7 @@
 *------------------------------------------------------------------------------------------*/

static void ivas_iir_2_filter_fx( ivas_filters_process_state_t *filter_state, Word32 *pIn_Out_fx, const Word16 length, const Word16 stage, Word16 *pIn_Out_e );
static void ivas_iir_2_filter_fixed_fx( ivas_filters_process_state_t *filter_state, Word32 *pIn_Out_fx, const Word16 length, const Word16 stage, Word16 *pIn_Out_e );
static void ivas_iir_2_filter_fixed_fx( ivas_filters_process_state_t *filter_state, Word32 *pIn_Out_fx, const Word16 length, const Word16 stage, Word16 q );


/*-----------------------------------------------------------------------------------------*
@@ -79,6 +79,12 @@ void ivas_filters_init_fx(
            move32();
            move16();
            move16();
#ifdef OPT_2239_IVAS_FILTER_PROCESS
            filter_state->num_q[IVAS_FILTER_STAGE_0][i] = Q30;
            filter_state->den_q[IVAS_FILTER_STAGE_0][i] = Q30;
            move16();
            move16();
#endif
        }

        filter_state->state_fx[0][0] = 0;
@@ -94,15 +100,27 @@ void ivas_filters_init_fx(
        move16();
        move16();

        filter_state->state64_fx[0][0] = 0;
        filter_state->state64_fx[0][1] = 0;
        filter_state->state64_fx[0][2] = 0;
#ifdef OPT_2239_IVAS_FILTER_PROCESS
        filter_state->state64_fx[IVAS_FILTER_STAGE_0][0] = 0;
        filter_state->state64_fx[IVAS_FILTER_STAGE_0][1] = 0;
        filter_state->state64_fx[IVAS_FILTER_STAGE_0][2] = 0;
        move64();
        move64();
        move64();
        filter_state->state64_q[IVAS_FILTER_STAGE_0] = Q30;
        move16();
#endif
    }
    ELSE
    {
        filter_state->filt_len = IVAS_BIQUAD_FILT_LEN;
        move16();

#ifdef OPT_2239_IVAS_FILTER_PROCESS
        filter_state->state64_q[IVAS_FILTER_STAGE_0] = 63;
        filter_state->state64_q[IVAS_FILTER_STAGE_1] = 63;
#endif

        FOR( i = 0; i < IVAS_BIQUAD_FILT_LEN; i++ )
        {

@@ -122,6 +140,24 @@ void ivas_filters_init_fx(
            move16();
            move16();
            move16();
#ifdef OPT_2239_IVAS_FILTER_PROCESS
            filter_state->num_q[IVAS_FILTER_STAGE_0][i] = sub( 31, filt_coeff_e[i + 0 * IVAS_BIQUAD_FILT_LEN] );
            filter_state->den_q[IVAS_FILTER_STAGE_0][i] = sub( 31, filt_coeff_e[i + 1 * IVAS_BIQUAD_FILT_LEN] );
            filter_state->num_q[IVAS_FILTER_STAGE_1][i] = sub( 31, filt_coeff_e[i + 2 * IVAS_BIQUAD_FILT_LEN] );
            filter_state->den_q[IVAS_FILTER_STAGE_1][i] = sub( 31, filt_coeff_e[i + 3 * IVAS_BIQUAD_FILT_LEN] );
            move16();
            move16();
            move16();
            move16();
            filter_state->state64_q[IVAS_FILTER_STAGE_0] = s_min( filter_state->state64_q[IVAS_FILTER_STAGE_0], filter_state->num_q[IVAS_FILTER_STAGE_0][i] );
            filter_state->state64_q[IVAS_FILTER_STAGE_0] = s_min( filter_state->state64_q[IVAS_FILTER_STAGE_0], filter_state->den_q[IVAS_FILTER_STAGE_0][i] );
            filter_state->state64_q[IVAS_FILTER_STAGE_1] = s_min( filter_state->state64_q[IVAS_FILTER_STAGE_1], filter_state->num_q[IVAS_FILTER_STAGE_1][i] );
            filter_state->state64_q[IVAS_FILTER_STAGE_1] = s_min( filter_state->state64_q[IVAS_FILTER_STAGE_1], filter_state->den_q[IVAS_FILTER_STAGE_1][i] );
            move16();
            move16();
            move16();
            move16();
#endif
        }

        filter_state->state_fx[0][0] = 0;
@@ -149,17 +185,21 @@ void ivas_filters_init_fx(
        move16();
        move16();

        filter_state->state64_fx[0][0] = 0;
        filter_state->state64_fx[0][1] = 0;
        filter_state->state64_fx[0][2] = 0;
        filter_state->state64_fx[1][0] = 0;
        filter_state->state64_fx[1][1] = 0;
        filter_state->state64_fx[1][2] = 0;
    }

#ifdef OPT_2239_IVAS_FILTER_PROCESS
    filter_state->q_diff = 0;
        filter_state->state64_fx[IVAS_FILTER_STAGE_0][0] = 0;
        filter_state->state64_fx[IVAS_FILTER_STAGE_0][1] = 0;
        filter_state->state64_fx[IVAS_FILTER_STAGE_0][2] = 0;
        filter_state->state64_fx[IVAS_FILTER_STAGE_1][0] = 0;
        filter_state->state64_fx[IVAS_FILTER_STAGE_1][1] = 0;
        filter_state->state64_fx[IVAS_FILTER_STAGE_1][2] = 0;
        move64();
        move64();
        move64();
        move64();
        move64();
        move64();
#endif
    }

    return;
}
@@ -306,16 +346,16 @@ void ivas_filter_process_fixed_fx(
    ivas_filters_process_state_t *filter_state, /* i/o: filter state handle             */
    Word32 *pIn_Out_fx,                         /* i/o: signal subject to filtering Q(q_factor)   */
    const Word16 length,                        /* i  : filter order                    */
    Word16 *q_factor )
    Word16 q )
{
    SWITCH( filter_state->order )
    {
        case IVAS_FILTER_ORDER_1:
            ivas_iir_2_filter_fixed_fx( filter_state, pIn_Out_fx, length, IVAS_FILTER_STAGE_0, q_factor );
            ivas_iir_2_filter_fixed_fx( filter_state, pIn_Out_fx, length, IVAS_FILTER_STAGE_0, q );
            BREAK;
        case IVAS_FILTER_ORDER_4:
            ivas_iir_2_filter_fixed_fx( filter_state, pIn_Out_fx, length, IVAS_FILTER_STAGE_0, q_factor );
            ivas_iir_2_filter_fixed_fx( filter_state, pIn_Out_fx, length, IVAS_FILTER_STAGE_1, q_factor );
            ivas_iir_2_filter_fixed_fx( filter_state, pIn_Out_fx, length, IVAS_FILTER_STAGE_0, q );
            ivas_iir_2_filter_fixed_fx( filter_state, pIn_Out_fx, length, IVAS_FILTER_STAGE_1, q );
            BREAK;
        default:
            BREAK;
@@ -324,129 +364,43 @@ void ivas_filter_process_fixed_fx(
    return;
}

#if 0
static void __check( Word64 computed_fx, Word16 computed_q, Word64 expected_fx, Word16 expected_q );

static void __check( Word64 computed_fx, Word16 computed_q, Word64 expected_fx, Word16 expected_q )
{
    Word16 cq, eq, shift, q_diff;
    Word64 cfx, efx;

    shift = W_norm( computed_fx );
    cfx = W_shl( computed_fx, shift );
    cq = add( computed_q, shift );

    shift = W_norm( expected_fx );
    efx = W_shl( expected_fx, shift );
    eq = add( expected_q, shift );

    q_diff = cq - eq;
    shift = abs( q_diff );
    if ( q_diff >= 0 )
    {
        cfx = shift < 64 ? W_shr( cfx, shift ) : 0;
    }
    else
    {
        efx = shift < 64 ? W_shr( efx, shift ) : 0;
    }
    Word64 error = llabs( cfx - efx );
    if ( error > ( ( (Word64) 1 ) << 60 ) )
    {
        assert( false );
    }
}

Word64 __to_state64( Word16 q, Word32 state_fx, Word16 state_e )
{
    Word16 q_diff = q - ( 31 - state_e );
    Word64 state;
    if ( q_diff <= -32 )
        state = 0;
    else
        state = W_shl( (Word64) state_fx, q_diff );
    // __check( state, q, state_fx, 31 - state_e );
    return state;
}
#endif

static void ivas_iir_2_filter_fixed_fx(
    ivas_filters_process_state_t *filter_state,
    Word32 *pIn_Out_fx, // Q(31-*pIn_Out_e)
    const Word16 length,
    const Word16 stage,
    Word16 *q_factor )
    Word16 q )
{
    Word16 i, j;

    Word32 *pIn_fx = pIn_Out_fx;
    Word32 *pOut_fx = pIn_Out_fx;
    Word32 tmp_pIn_buf_i_fx;

    Word32 L_tmp_prod, L_tmp;
    Word16 L_prod_e, L_tmp_e;

    Word16 q_in, q_out;
    Word16 e_in, e_out;
    q_in = *q_factor;
    *q_factor = q_out = q_in + filter_state->q_diff;

    // Word64 shift, tmp_prod, tmp, out64;
    Word64 shift, tmp_prod, tmp;
    Word32 in, out;

    FOR( i = 0; i < length; i++ )
    {
        tmp_pIn_buf_i_fx = pIn_fx[i];
        in = pIn_Out_fx[i];
        move32();
        e_in = 31 - q_in;
        move16();

        L_tmp_prod = Mpy_32_32( filter_state->num_fx[stage][0], pIn_fx[i] ); // Q31 -(pIn_Out_e[i]+ filter_state->num_e[stage][0])
        L_prod_e = add( 31 - q_in, filter_state->num_e[stage][0] );

        // shift = 31 - filter_state->num_e[stage][0] + q_in - ( q_out + 32 );
        // tmp_prod = W_shr( W_mult0_32_32( filter_state->num_fx[stage][0], pIn_fx[i] ), shift );
        // __check( tmp_prod, q_out + 32, L_tmp_prod, 31 - L_prod_e );
        shift = filter_state->num_q[stage][0] + q - filter_state->state64_q[stage];
        tmp_prod = W_shr( W_mult0_32_32( filter_state->num_fx[stage][0], in ), shift );

        e_out = 31 - q_in;
        pOut_fx[i] = BASOP_Util_Add_Mant32Exp( filter_state->state_fx[stage][0], filter_state->state_e[stage][0], L_tmp_prod, L_prod_e, &e_out ); // Q31 - pIn_Out_e[i]
        shift = sub( filter_state->state64_q[stage], q );
        pIn_Out_fx[i] = out = W_shr( W_add( filter_state->state64_fx[stage][0], tmp_prod ), shift );
        move32();

        // out64 = W_add( filter_state->state64_fx[stage][0], tmp_prod );
        // __check( out64, q_out + 32, pOut_fx[i], 31 - e_out );
        // pOut_fx[i] = (Word32)(out64 >> 32);

        FOR( j = 1; j < filter_state->filt_len; j++ )
        {
            L_tmp_prod = Mpy_32_32( filter_state->num_fx[stage][j], tmp_pIn_buf_i_fx ); // Q31-L_prod_e
            L_prod_e = add( filter_state->num_e[stage][j], e_in );

            // shift = 31 - filter_state->num_e[stage][j] + q_in - ( q_out + 32 );
            // tmp_prod = W_shr( W_mult0_32_32( filter_state->num_fx[stage][j], tmp_pIn_buf_i_fx ), shift );
            // __check( tmp_prod, q_out + 32, L_tmp_prod, 31 - L_prod_e );

            L_tmp = BASOP_Util_Add_Mant32Exp( filter_state->state_fx[stage][j], filter_state->state_e[stage][j], L_tmp_prod, L_prod_e, &L_tmp_e ); // Q31 - L_tmp_e

            // tmp = W_add( filter_state->state64_fx[stage][j], tmp_prod );
            // __check( tmp, q_out + 32, L_tmp, 31 - L_tmp_e );
            shift = filter_state->num_q[stage][j] + q - filter_state->state64_q[stage];
            tmp_prod = W_shr( W_mult0_32_32( filter_state->num_fx[stage][j], in ), shift );

            L_tmp_prod = Mpy_32_32( filter_state->den_fx[stage][j], pOut_fx[i] ); // Q31 - ( pIn_Out_e[i]+filter_state->den_e[stage][j] )
            L_prod_e = add( e_out, filter_state->den_e[stage][j] );
            tmp = W_add( filter_state->state64_fx[stage][j], tmp_prod );

            // shift = 31 - filter_state->den_e[stage][j] + q_out - ( q_out + 32 );
            // tmp_prod = W_shr( W_mult0_32_32( filter_state->den_fx[stage][j], pOut_fx[i] ), shift );
            // __check( tmp_prod, q_out + 32, L_tmp_prod, 31 - L_prod_e );
            shift = filter_state->den_q[stage][j] + q - filter_state->state64_q[stage];
            tmp_prod = W_shr( W_mult0_32_32( filter_state->den_fx[stage][j], out ), shift );

            filter_state->state_fx[stage][j - 1] = BASOP_Util_Add_Mant32Exp( L_tmp, L_tmp_e, L_negate( L_tmp_prod ), L_prod_e, &filter_state->state_e[stage][j - 1] ); // Q31 - filter_state->state_e[stage][j - 1]
            filter_state->state64_fx[stage][j - 1] = W_add( tmp, W_neg( tmp_prod ) );
            move32();

            // filter_state->state64_fx[stage][j - 1] = W_add( tmp, tmp_prod );
            // __check( filter_state->state64_fx[stage][j - 1], q_out + 32, filter_state->state_fx[stage][j - 1], 31 - filter_state->state_e[stage][j - 1] );

            // filter_state->state64_fx[stage][j - 1] = __to_state64( q_out + 32, filter_state->state_fx[stage][j - 1], filter_state->state_e[stage][j - 1] );
        }

        pOut_fx[i] = L_shr_r_sat( pOut_fx[i], sub( sub( 31, q_out ), e_out ) ); // Q(31-pIn_Out_e[i]) -> Q(q_factor)
        move32();
    }

    return;
+1 −1
Original line number Diff line number Diff line
@@ -3821,7 +3821,7 @@ void ivas_filter_process_fixed_fx(
    ivas_filters_process_state_t *filter_state,                 /* i/o: filter state handle                             */
    Word32 *pIn_Out_fx,                                         /* i  : signal subject to filtering                     */
    const Word16 length,                                        /* i  : filter order                                    */
    Word16 *q_factor 
    Word16 q 
);

ivas_error ivas_osba_enc_open_fx(
+4 −2
Original line number Diff line number Diff line
@@ -662,8 +662,10 @@ typedef struct ivas_filters_process_state_t
    Word16 state_e[IVAS_FILTER_MAX_STAGES][IVAS_BIQUAD_FILT_LEN];

#ifdef OPT_2239_IVAS_FILTER_PROCESS
    Word16 q_diff; // q_diff = q_out - q_in -> q_out = q_diff + q_in
    Word32 state64_fx[IVAS_FILTER_MAX_STAGES][IVAS_BIQUAD_FILT_LEN];
    Word64 state64_fx[IVAS_FILTER_MAX_STAGES][IVAS_BIQUAD_FILT_LEN];
    Word16 num_q[IVAS_FILTER_MAX_STAGES][IVAS_BIQUAD_FILT_LEN];
    Word16 den_q[IVAS_FILTER_MAX_STAGES][IVAS_BIQUAD_FILT_LEN];
    Word16 state64_q[IVAS_FILTER_MAX_STAGES];
#endif

} ivas_filters_process_state_t;
+75 −19
Original line number Diff line number Diff line
@@ -375,7 +375,6 @@ static Word32 ivas_calc_duck_gain_fx(
 * Calculate in/out ducking gains
 *-----------------------------------------------------------------------------------------*/

#ifndef OPT_2239_IVAS_FILTER_PROCESS
static void check( Word32 computed_fx, Word16 computed_q, Word32 expected_fx, Word16 expected_q );

static void check( Word32 computed_fx, Word16 computed_q, Word32 expected_fx, Word16 expected_q )
@@ -393,12 +392,11 @@ static void check( Word32 computed_fx, Word16 computed_q, Word32 expected_fx, Wo
        efx = L_shr( expected_fx, -q_diff );
    }
    Word32 error = abs( cfx - efx );
    if ( error >= ( 1 << 22 ) )
    if ( error > ( 1 << 16 ) )
    {
        assert( false );
    }
}
#endif

void ivas_td_decorr_get_ducking_gains_fx(
    ivas_trans_det_state_t *hTranDet, /* i/o: Transient detector handle */
@@ -424,11 +422,11 @@ void ivas_td_decorr_get_ducking_gains_fx(
    Copy32( pIn_pcm, e_fast_fx, frame_len ); /*Q11*/


#ifdef OPT_2239_IVAS_FILTER_PROCESS
#if 0 // def OPT_2239_IVAS_FILTER_PROCESS
    /* env hpf */
    Word16 q_fast = Q11;

    ivas_filter_process_fixed_fx( &hTranDet->env_hpf, e_fast_fx, frame_len, &q_fast );
    ivas_filter_process_fixed_fx( &hTranDet->env_hpf, e_fast_fx, frame_len, q_fast );

    Word32 env_eps_fx = IVAS_TDET_PARM_ENV_EPS_fx;
    Word16 env_eps_q = Q31;
@@ -436,9 +434,9 @@ void ivas_td_decorr_get_ducking_gains_fx(
    Word16 q_diff = sub( q_fast, env_eps_q );
    q_fast = s_min( q_fast, env_eps_q );

    if ( q_diff >= 0 )
    IF( q_diff >= 0 )
    {
        for ( i = 0; i < frame_len; i++ )
        FOR( i = 0; i < frame_len; i++ )
        {
            e_fast_fx[i] = L_add( L_abs( L_shr( e_fast_fx[i], q_diff ) ), env_eps_fx );
            move32();
@@ -446,10 +444,10 @@ void ivas_td_decorr_get_ducking_gains_fx(
            move32();
        }
    }
    else
    ELSE
    {
        env_eps_fx = L_shl( env_eps_fx, q_diff );
        for ( i = 0; i < frame_len; i++ )
        FOR( i = 0; i < frame_len; i++ )
        {
            e_fast_fx[i] = L_add( L_abs( e_fast_fx[i] ), env_eps_fx );
            move32();
@@ -461,10 +459,10 @@ void ivas_td_decorr_get_ducking_gains_fx(
    Word16 q_slow = q_fast;

    /* env fast*/
    ivas_filter_process_fixed_fx( &hTranDet->env_fast, e_fast_fx, frame_len, &q_fast );
    ivas_filter_process_fixed_fx( &hTranDet->env_fast, e_fast_fx, frame_len, q_fast );

    /* env slow */
    ivas_filter_process_fixed_fx( &hTranDet->env_slow, e_slow_fx, frame_len, &q_slow );
    ivas_filter_process_fixed_fx( &hTranDet->env_slow, e_slow_fx, frame_len, q_slow );


    IF( tdet_flag )
@@ -503,15 +501,17 @@ void ivas_td_decorr_get_ducking_gains_fx(
    ivas_filter_process_exp_fx( &hTranDet->env_hpf, e_fast_fx, frame_len, e_fast_e );


    // Word32 test_e_fast_fx[L_FRAME48k];
    // Word16 q_fast = Q11;
    // Copy32( pIn_pcm, test_e_fast_fx, frame_len ); /*Q11*/
    // ivas_filter_process_fixed_fx( &hTranDet->test_env_hpf, test_e_fast_fx, frame_len, &q_fast );
    // ====================================================================================================
    Word32 test_e_fast_fx[L_FRAME48k];
    Word16 q_fast = Q11;
    Copy32( pIn_pcm, test_e_fast_fx, frame_len ); /*Q11*/
    ivas_filter_process_fixed_fx( &hTranDet->test_env_hpf, test_e_fast_fx, frame_len, q_fast );

    // for ( i = 0; i < frame_len; i++ )
    // {
    //     check(test_e_fast_fx[i],q_fast,e_fast_fx[i],31-e_fast_e[i]);
    // }
    for ( i = 0; i < frame_len; i++ )
    {
        check( test_e_fast_fx[i], q_fast, e_fast_fx[i], 31 - e_fast_e[i] );
    }
    // ====================================================================================================


    FOR( i = 0; i < frame_len; i++ )
@@ -525,12 +525,68 @@ void ivas_td_decorr_get_ducking_gains_fx(
        move16();
    }

    // ====================================================================================================
    Word32 test_e_slow_fx[L_FRAME48k];
    Word32 env_eps_fx = IVAS_TDET_PARM_ENV_EPS_fx;
    Word16 env_eps_q = Q31;

    Word16 q_diff = sub( q_fast, env_eps_q );
    q_fast = s_min( q_fast, env_eps_q );

    if ( q_diff >= 0 )
    {
        for ( i = 0; i < frame_len; i++ )
        {
            test_e_fast_fx[i] = L_add( L_abs( L_shr( test_e_fast_fx[i], q_diff ) ), env_eps_fx );
            move32();
            test_e_slow_fx[i] = test_e_fast_fx[i];
            move32();
        }
    }
    else
    {
        env_eps_fx = L_shl( env_eps_fx, q_diff );
        for ( i = 0; i < frame_len; i++ )
        {
            test_e_fast_fx[i] = L_add( L_abs( test_e_fast_fx[i] ), env_eps_fx );
            move32();
            test_e_slow_fx[i] = test_e_fast_fx[i];
            move32();
        }
    }

    Word16 q_slow = q_fast;

    for ( i = 0; i < frame_len; i++ )
    {
        check( test_e_fast_fx[i], q_fast, e_fast_fx[i], 31 - e_fast_e[i] );
    }
    // ====================================================================================================

    /* env fast*/
    ivas_filter_process_exp_fx( &hTranDet->env_fast, e_fast_fx, frame_len, e_fast_e );

    // ====================================================================================================
    ivas_filter_process_fixed_fx( &hTranDet->test_env_fast, test_e_fast_fx, frame_len, q_fast );

    for ( i = 0; i < frame_len; i++ )
    {
        check( test_e_fast_fx[i], q_fast, e_fast_fx[i], 31 - e_fast_e[i] );
    }
    // ====================================================================================================

    /* env slow */
    ivas_filter_process_exp_fx( &hTranDet->env_slow, e_slow_fx, frame_len, e_slow_e );

    // ====================================================================================================
    ivas_filter_process_fixed_fx( &hTranDet->test_env_slow, test_e_slow_fx, frame_len, q_slow );

    for ( i = 0; i < frame_len; i++ )
    {
        check( test_e_slow_fx[i], q_slow, e_slow_fx[i], 31 - e_slow_e[i] );
    }
    // ====================================================================================================

    IF( tdet_flag )
    {
        FOR( i = 0; i < frame_len; i++ )