Commit 2cf229be authored by Nicolas Roussin's avatar Nicolas Roussin
Browse files

Optimize ivas_filter_process_fx part 2.

parent 005ceceb
Loading
Loading
Loading
Loading
+101 −32
Original line number Diff line number Diff line
@@ -93,6 +93,10 @@ void ivas_filters_init_fx(
        move16();
        move16();
        move16();

        filter_state->state64_fx[0][0] = 0;
        filter_state->state64_fx[0][1] = 0;
        filter_state->state64_fx[0][2] = 0;
    }
    ELSE
    {
@@ -144,6 +148,13 @@ void ivas_filters_init_fx(
        move16();
        move16();
        move16();

        filter_state->state64_fx[0][0] = 0;
        filter_state->state64_fx[0][1] = 0;
        filter_state->state64_fx[0][2] = 0;
        filter_state->state64_fx[1][0] = 0;
        filter_state->state64_fx[1][1] = 0;
        filter_state->state64_fx[1][2] = 0;
    }

#ifdef OPT_2239_IVAS_FILTER_PROCESS
@@ -293,22 +304,18 @@ static void ivas_iir_2_filter_fx(

void ivas_filter_process_fixed_fx(
    ivas_filters_process_state_t *filter_state, /* i/o: filter state handle             */
    Word32 *pIn_Out_fx,                         /* i/o: signal subject to filtering (exp[i] : pIn_out_e[i])   */
    Word32 *pIn_Out_fx,                         /* i/o: signal subject to filtering Q(q_factor)   */
    const Word16 length,                        /* i  : filter order                    */
    Word16 *q_in_out )
    Word16 *q_factor )
{
    SWITCH( filter_state->order )
    {
        case IVAS_FILTER_ORDER_1:
            ivas_iir_2_filter_fixed_fx( filter_state, pIn_Out_fx, length, IVAS_FILTER_STAGE_0, q_in_out );
            /* Scale pIn_Out_fx back to input Q */
            ivas_iir_2_filter_fixed_fx( filter_state, pIn_Out_fx, length, IVAS_FILTER_STAGE_0, q_factor );
            BREAK;
        case IVAS_FILTER_ORDER_4:
            /* biquad-1 */
            ivas_iir_2_filter_fixed_fx( filter_state, pIn_Out_fx, length, IVAS_FILTER_STAGE_0, q_in_out );
            /* biquad-2 */
            ivas_iir_2_filter_fixed_fx( filter_state, pIn_Out_fx, length, IVAS_FILTER_STAGE_1, q_in_out );
            /* Scale pIn_Out_fx back to input Q */
            ivas_iir_2_filter_fixed_fx( filter_state, pIn_Out_fx, length, IVAS_FILTER_STAGE_0, q_factor );
            ivas_iir_2_filter_fixed_fx( filter_state, pIn_Out_fx, length, IVAS_FILTER_STAGE_1, q_factor );
            BREAK;
        default:
            BREAK;
@@ -317,66 +324,128 @@ void ivas_filter_process_fixed_fx(
    return;
}

#if 0
static void __check( Word64 computed_fx, Word16 computed_q, Word64 expected_fx, Word16 expected_q );

static void __check( Word64 computed_fx, Word16 computed_q, Word64 expected_fx, Word16 expected_q )
{
    Word16 cq, eq, shift, q_diff;
    Word64 cfx, efx;

    shift = W_norm( computed_fx );
    cfx = W_shl( computed_fx, shift );
    cq = add( computed_q, shift );

    shift = W_norm( expected_fx );
    efx = W_shl( expected_fx, shift );
    eq = add( expected_q, shift );

    q_diff = cq - eq;
    shift = abs( q_diff );
    if ( q_diff >= 0 )
    {
        cfx = shift < 64 ? W_shr( cfx, shift ) : 0;
    }
    else
    {
        efx = shift < 64 ? W_shr( efx, shift ) : 0;
    }
    Word64 error = llabs( cfx - efx );
    if ( error > ( ( (Word64) 1 ) << 60 ) )
    {
        assert( false );
    }
}

Word64 __to_state64( Word16 q, Word32 state_fx, Word16 state_e )
{
    Word16 q_diff = q - ( 31 - state_e );
    Word64 state;
    if ( q_diff <= -32 )
        state = 0;
    else
        state = W_shl( (Word64) state_fx, q_diff );
    // __check( state, q, state_fx, 31 - state_e );
    return state;
}
#endif

static void ivas_iir_2_filter_fixed_fx(
    ivas_filters_process_state_t *filter_state,
    Word32 *pIn_Out_fx, // Q(31-*pIn_Out_e)
    const Word16 length,
    const Word16 stage,
    Word16 *q_in_out )
    Word16 *q_factor )
{
    Word16 i, j;

    Word32 *pIn_fx = pIn_Out_fx;
    Word32 *pOut_fx = pIn_Out_fx;
    Word32 tmp_pIn_buf_i_fx;
    Word16 tmp_pIn_buf_i_e;

    Word32 L_tmp_prod, L_tmp;
    Word16 L_prod_e, L_tmp_e;

    Word16 exp;
    Word16 q_in = *q_in_out;
    Word16 q_out = add( filter_state->q_diff, q_in );
    *q_in_out = q_out;
    Word16 q_in, q_out;
    Word16 e_in, e_out;
    q_in = *q_factor;
    *q_factor = q_out = q_in + filter_state->q_diff;

    // Word64 shift, tmp_prod, tmp, out64;

    FOR( i = 0; i < length; i++ )
    {
        tmp_pIn_buf_i_fx = pIn_fx[i];
        move32();
        tmp_pIn_buf_i_e = q_in;
        e_in = 31 - q_in;
        move16();

        L_tmp_prod = Mpy_32_32( filter_state->num_fx[stage][0], pIn_fx[i] ); // Q31 -(q_in+ filter_state->num_e[stage][0])
        L_prod_e = add( q_in, filter_state->num_e[stage][0] );
        L_tmp_prod = Mpy_32_32( filter_state->num_fx[stage][0], pIn_fx[i] ); // Q31 -(pIn_Out_e[i]+ filter_state->num_e[stage][0])
        L_prod_e = add( 31 - q_in, filter_state->num_e[stage][0] );

        // shift = 31 - filter_state->num_e[stage][0] + q_in - ( q_out + 32 );
        // tmp_prod = W_shr( W_mult0_32_32( filter_state->num_fx[stage][0], pIn_fx[i] ), shift );
        // __check( tmp_prod, q_out + 32, L_tmp_prod, 31 - L_prod_e );

        pOut_fx[i] = BASOP_Util_Add_Mant32Exp( filter_state->state_fx[stage][0], filter_state->state_e[stage][0], L_tmp_prod, L_prod_e, &exp ); // Q31 - pIn_Out_e[i]
        e_out = 31 - q_in;
        pOut_fx[i] = BASOP_Util_Add_Mant32Exp( filter_state->state_fx[stage][0], filter_state->state_e[stage][0], L_tmp_prod, L_prod_e, &e_out ); // Q31 - pIn_Out_e[i]
        move32();

        // out64 = W_add( filter_state->state64_fx[stage][0], tmp_prod );
        // __check( out64, q_out + 32, pOut_fx[i], 31 - e_out );
        // pOut_fx[i] = (Word32)(out64 >> 32);

        FOR( j = 1; j < filter_state->filt_len; j++ )
        {
            L_tmp_prod = Mpy_32_32( filter_state->num_fx[stage][j], tmp_pIn_buf_i_fx ); // Q31-L_prod_e
            L_prod_e = add( filter_state->num_e[stage][j], tmp_pIn_buf_i_e );
            L_prod_e = add( filter_state->num_e[stage][j], e_in );

            // shift = 31 - filter_state->num_e[stage][j] + q_in - ( q_out + 32 );
            // tmp_prod = W_shr( W_mult0_32_32( filter_state->num_fx[stage][j], tmp_pIn_buf_i_fx ), shift );
            // __check( tmp_prod, q_out + 32, L_tmp_prod, 31 - L_prod_e );

            L_tmp = BASOP_Util_Add_Mant32Exp( filter_state->state_fx[stage][j], filter_state->state_e[stage][j], L_tmp_prod, L_prod_e, &L_tmp_e ); // Q31 - L_tmp_e

            // tmp = W_add( filter_state->state64_fx[stage][j], tmp_prod );
            // __check( tmp, q_out + 32, L_tmp, 31 - L_tmp_e );

            L_tmp_prod = Mpy_32_32( filter_state->den_fx[stage][j], pOut_fx[i] ); // Q31 - ( pIn_Out_e[i]+filter_state->den_e[stage][j] )
            L_prod_e = add( exp, filter_state->den_e[stage][j] );
            L_prod_e = add( e_out, filter_state->den_e[stage][j] );

            // shift = 31 - filter_state->den_e[stage][j] + q_out - ( q_out + 32 );
            // tmp_prod = W_shr( W_mult0_32_32( filter_state->den_fx[stage][j], pOut_fx[i] ), shift );
            // __check( tmp_prod, q_out + 32, L_tmp_prod, 31 - L_prod_e );

            filter_state->state_fx[stage][j - 1] = BASOP_Util_Add_Mant32Exp( L_tmp, L_tmp_e, L_negate( L_tmp_prod ), L_prod_e, &filter_state->state_e[stage][j - 1] ); // Q31 - filter_state->state_e[stage][j - 1]
            move32();
            /*In case when exponent is less than -31 the value is very small and negligible hence resetting it to zero to avoid exponent overflow*/
            IF( LT_16( filter_state->state_e[stage][j - 1], -31 ) )
            {
                filter_state->state_fx[stage][j - 1] = 0;
                move32();
                filter_state->state_e[stage][j - 1] = 0;
                move16();
            }
        }

            // filter_state->state64_fx[stage][j - 1] = W_add( tmp, tmp_prod );
            // __check( filter_state->state64_fx[stage][j - 1], q_out + 32, filter_state->state_fx[stage][j - 1], 31 - filter_state->state_e[stage][j - 1] );

            // filter_state->state64_fx[stage][j - 1] = __to_state64( q_out + 32, filter_state->state_fx[stage][j - 1], filter_state->state_e[stage][j - 1] );
        }

        Word16 shift = ( 31 - exp ) - q_out;
        pOut_fx[i] = L_shr_r_sat( pOut_fx[i], shift );
        pOut_fx[i] = L_shr_r_sat( pOut_fx[i], sub( sub( 31, q_out ), e_out ) ); // Q(31-pIn_Out_e[i]) -> Q(q_factor)
        move32();
    }

+4 −4
Original line number Diff line number Diff line
@@ -3819,9 +3819,9 @@ void ivas_filter_process_exp_fx(

void ivas_filter_process_fixed_fx(
    ivas_filters_process_state_t *filter_state,                 /* i/o: filter state handle                             */
    Word32 *pIn_Out_fx,                         /* i/o: signal subject to filtering (exp[i] : pIn_out_e[i])   */
    Word32 *pIn_Out_fx,                                         /* i  : signal subject to filtering                     */
    const Word16 length,                                        /* i  : filter order                                    */
    Word16 *q_in_out 
    Word16 *q_factor 
);

ivas_error ivas_osba_enc_open_fx(
+4 −0
Original line number Diff line number Diff line
@@ -663,6 +663,7 @@ typedef struct ivas_filters_process_state_t

#ifdef OPT_2239_IVAS_FILTER_PROCESS
    Word16 q_diff; // q_diff = q_out - q_in -> q_out = q_diff + q_in
    Word32 state64_fx[IVAS_FILTER_MAX_STAGES][IVAS_BIQUAD_FILT_LEN];
#endif

} ivas_filters_process_state_t;
@@ -677,6 +678,9 @@ typedef struct ivas_trans_det_state_t
    ivas_filters_process_state_t env_hpf;
    ivas_filters_process_state_t env_fast;
    ivas_filters_process_state_t env_slow;
    ivas_filters_process_state_t test_env_hpf;
    ivas_filters_process_state_t test_env_fast;
    ivas_filters_process_state_t test_env_slow;
    Word32 in_duck_coeff;
    Word32 out_duck_coeff;
    Word32 in_duck_gain;
+39 −0
Original line number Diff line number Diff line
@@ -225,6 +225,9 @@ static void ivas_transient_det_init(
    ivas_filters_init_fx( &hTranDet->env_hpf, (const Word32 *) filt_coeff_arr_fx[0], filt_coeff_arr_e, IVAS_FILTER_ORDER_1 );
    ivas_filters_init_fx( &hTranDet->env_fast, (const Word32 *) filt_coeff_arr_fx[1], filt_coeff_arr_e, IVAS_FILTER_ORDER_1 );
    ivas_filters_init_fx( &hTranDet->env_slow, (const Word32 *) filt_coeff_arr_fx[2], filt_coeff_arr_e, IVAS_FILTER_ORDER_1 );
    ivas_filters_init_fx( &hTranDet->test_env_hpf, (const Word32 *) filt_coeff_arr_fx[0], filt_coeff_arr_e, IVAS_FILTER_ORDER_1 );
    ivas_filters_init_fx( &hTranDet->test_env_fast, (const Word32 *) filt_coeff_arr_fx[1], filt_coeff_arr_e, IVAS_FILTER_ORDER_1 );
    ivas_filters_init_fx( &hTranDet->test_env_slow, (const Word32 *) filt_coeff_arr_fx[2], filt_coeff_arr_e, IVAS_FILTER_ORDER_1 );

    hTranDet->duck_mult_fac = IVAS_TDET_DUCK_MULT_FAC; /*Q29*/
    move32();
@@ -372,6 +375,31 @@ static Word32 ivas_calc_duck_gain_fx(
 * Calculate in/out ducking gains
 *-----------------------------------------------------------------------------------------*/

#ifndef OPT_2239_IVAS_FILTER_PROCESS
static void check( Word32 computed_fx, Word16 computed_q, Word32 expected_fx, Word16 expected_q );

static void check( Word32 computed_fx, Word16 computed_q, Word32 expected_fx, Word16 expected_q )
{
    Word32 cfx, efx;
    Word16 q_diff = computed_q - expected_q;
    if ( q_diff >= 0 )
    {
        cfx = L_shr( computed_fx, +q_diff );
        efx = expected_fx;
    }
    else
    {
        cfx = computed_fx;
        efx = L_shr( expected_fx, -q_diff );
    }
    Word32 error = abs( cfx - efx );
    if ( error >= ( 1 << 22 ) )
    {
        assert( false );
    }
}
#endif

void ivas_td_decorr_get_ducking_gains_fx(
    ivas_trans_det_state_t *hTranDet, /* i/o: Transient detector handle */
    Word32 *pIn_pcm,                  /*Q11*/
@@ -475,6 +503,17 @@ void ivas_td_decorr_get_ducking_gains_fx(
    ivas_filter_process_exp_fx( &hTranDet->env_hpf, e_fast_fx, frame_len, e_fast_e );


    // Word32 test_e_fast_fx[L_FRAME48k];
    // Word16 q_fast = Q11;
    // Copy32( pIn_pcm, test_e_fast_fx, frame_len ); /*Q11*/
    // ivas_filter_process_fixed_fx( &hTranDet->test_env_hpf, test_e_fast_fx, frame_len, &q_fast );

    // for ( i = 0; i < frame_len; i++ )
    // {
    //     check(test_e_fast_fx[i],q_fast,e_fast_fx[i],31-e_fast_e[i]);
    // }


    FOR( i = 0; i < frame_len; i++ )
    {
        // e_fast_fx[i] = L_add( L_abs( e_fast_fx[i] ), L_shr( IVAS_TDET_PARM_ENV_EPS_fx, q_factor_diff ) ); /*Q14*/