Commit c1c64d94 authored by thomas dettbarn's avatar thomas dettbarn
Browse files

more loop unrolling in tcx_ltp_synth_filter() and tcx_ltp_synth_filter32() for...

more loop unrolling in tcx_ltp_synth_filter() and tcx_ltp_synth_filter32() for bit exact WMOPS improvement.
parent fcc5477e
Loading
Loading
Loading
Loading
Loading
+271 −97
Original line number Diff line number Diff line
@@ -491,7 +491,10 @@ static void tcx_ltp_synth_filter(
            if ( fade < 0 )
                step = negate( step );
        }

        IF ( zir != NULL )
        {
            IF ( fade != 0)
            {
                FOR( j = 0; j < length; j++ )
                {
                    s = L_deposit_l( 0 );
@@ -511,22 +514,88 @@ static void tcx_ltp_synth_filter(
                       fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */
                    i = sub_sat( round_fx_sat( s ), mult_r_sat( round_fx_sat( s2 ), ALPHA ) ); /* Qx */
                    k = mult_r( gain, i );                                                     /* Qx */
            if ( fade != 0 )
                    k = mult_r( k, alpha );     /* Qx */
                    k = add_sat( synth[j], k ); /* Qx */
            if ( zir != NULL )
                    k = sub_sat( k, zir[j] );   /* Qx */
        
                    synth_ltp[j] = k; /* Qx */
                    move16();
        
                    BASOP_SATURATE_WARNING_OFF_EVS;
                    alpha = add_sat( alpha, step );
                    BASOP_SATURATE_WARNING_ON_EVS;
        
                    x0++;
                    x1++;
                    y0++;
                    y1++;
                }
            }
            ELSE
            {
                FOR( j = 0; j < length; j++ )
                {
                    s = L_deposit_l( 0 );
                    s2 = L_deposit_l( 0 );
                    k = 0;
                    move16();
                    FOR( i = 0; i < L; i++ )
                    {
                        s = L_mac_sat( L_mac_sat( s, w0[k], x0[i] ), w1[k], x1[-i] );   /* Qx */
                        s2 = L_mac_sat( L_mac_sat( s2, v0[k], y0[i] ), v1[k], y1[-i] ); /* Qx */
                        k = k + pitch_res;
                    }
        
                    /* s2 *= ALPHA;
                       normal:      synth_ltp[j] = synth[j] - gain * s2 + gain * s;
                       zir:         synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j];
                       fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */
                    i = sub_sat( round_fx_sat( s ), mult_r_sat( round_fx_sat( s2 ), ALPHA ) ); /* Qx */
                    k = mult_r( gain, i );                                                     /* Qx */
                    k = add_sat( synth[j], k ); /* Qx */
                    k = sub_sat( k, zir[j] );   /* Qx */
        
                    synth_ltp[j] = k; /* Qx */
                    move16();
        
                    x0++;
                    x1++;
                    y0++;
                    y1++;
                }
            }
        }
        ELSE
        {
            IF ( fade != 0)
            {
                FOR( j = 0; j < length; j++ )
                {
                    s = L_deposit_l( 0 );
                    s2 = L_deposit_l( 0 );
                    k = 0;
                    move16();
                    FOR( i = 0; i < L; i++ )
                    {
                        s = L_mac_sat( L_mac_sat( s, w0[k], x0[i] ), w1[k], x1[-i] );   /* Qx */
                        s2 = L_mac_sat( L_mac_sat( s2, v0[k], y0[i] ), v1[k], y1[-i] ); /* Qx */
                        k = k + pitch_res;
                    }
        
                    /* s2 *= ALPHA;
                       normal:      synth_ltp[j] = synth[j] - gain * s2 + gain * s;
                       zir:         synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j];
                       fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */
                    i = sub_sat( round_fx_sat( s ), mult_r_sat( round_fx_sat( s2 ), ALPHA ) ); /* Qx */
                    k = mult_r( gain, i );                                                     /* Qx */
                    k = mult_r( k, alpha );
                    k = add_sat( synth[j], k ); /* Qx */
        
                    synth_ltp[j] = k; /* Qx */
                    move16();
        
                    BASOP_SATURATE_WARNING_OFF_EVS;
            if ( fade != 0 )
            {
                    alpha = add_sat( alpha, step );
            }
                    BASOP_SATURATE_WARNING_ON_EVS;
        
                    x0++;
@@ -536,6 +605,41 @@ static void tcx_ltp_synth_filter(
                }
            }
            ELSE
            {
                FOR( j = 0; j < length; j++ )
                {
                    s = L_deposit_l( 0 );
                    s2 = L_deposit_l( 0 );
                    k = 0;
                    move16();
                    FOR( i = 0; i < L; i++ )
                    {
                        s = L_mac_sat( L_mac_sat( s, w0[k], x0[i] ), w1[k], x1[-i] );   /* Qx */
                        s2 = L_mac_sat( L_mac_sat( s2, v0[k], y0[i] ), v1[k], y1[-i] ); /* Qx */
                        k = k + pitch_res;
                    }
        
                    /* s2 *= ALPHA;
                       normal:      synth_ltp[j] = synth[j] - gain * s2 + gain * s;
                       zir:         synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j];
                       fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */
                    i = sub_sat( round_fx_sat( s ), mult_r_sat( round_fx_sat( s2 ), ALPHA ) ); /* Qx */
                    k = mult_r( gain, i );                                                     /* Qx */
                    k = add_sat( synth[j], k ); /* Qx */
        
                    synth_ltp[j] = k; /* Qx */
                    move16();
       
 
                    x0++;
                    x1++;
                    y0++;
                    y1++;
                }
            }            
        }
    }
    ELSE
    {
        Copy( synth, synth_ltp, length ); /* Qx */
    }
@@ -579,7 +683,7 @@ static void tcx_ltp_synth_filter32(

        L = tcxLtpFilters[filtIdx].length; /* Q0 */
        move16();
        alpha = 0x7FFF; /* 1 in Q15 */
        alpha = 0;
        move16();
        IF( fade != 0 )
        {
@@ -604,14 +708,47 @@ static void tcx_ltp_synth_filter32(
            if ( fade < 0 )
                step = negate( step );
        }
        if ( zir != NULL )
        IF( fade != 0 )
        {
            IF ( zir != NULL )
            {
                FOR( j = 0; j < length; j++ )
                {
                synth[j] = L_sub_sat( synth[j], zir[j] );
                    s = L_deposit_l( 0 );
                    s2 = L_deposit_l( 0 );
                    k = 0;
                    move16();
                    FOR( i = 0; i < L; i++ )
                    {
                        s = Madd_32_16_r( Madd_32_16_r( s, x0[i], w0[k] ), x1[-i], w1[k] );   /* Qx */
                        s2 = Madd_32_16_r( Madd_32_16_r( s2, y0[i], v0[k] ), y1[-i], v1[k] ); /* Qx */
                        k = k + pitch_res;
                    }
    
                    /* s2 *= ALPHA;
                       normal:      synth_ltp[j] = synth[j] - gain * s2 + gain * s;
                       zir:         synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j];
                       fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */
                    L_tmp = L_sub_sat( s, Mpy_32_16_r( s2, ALPHA ) ); /* Qx */
                    L_tmp2 = Mpy_32_16_r( L_tmp, gain );              /* Qx */
                    L_tmp2 = Mpy_32_16_r( L_tmp2, alpha );            /* Qx */
                    L_tmp2 = L_sub_sat( L_tmp2, zir[j] );             /* Qx */
                    L_tmp2 = L_add_sat( synth[j], L_tmp2 );           /* Qx */
    
                    synth_ltp[j] = L_tmp2; /* Qx */
                    move16();
    
                    BASOP_SATURATE_WARNING_OFF_EVS;
                    alpha = add_sat( alpha, step );
                    BASOP_SATURATE_WARNING_ON_EVS;
    
                    x0++;
                    x1++;
                    y0++;
                    y1++;
                }
        IF( fade != 0 )
            }
            ELSE
            {
                FOR( j = 0; j < length; j++ )
                {
@@ -648,7 +785,10 @@ static void tcx_ltp_synth_filter32(
                    y1++;
                }
            }
        }
        ELSE
        {
            IF ( zir != NULL )
            {
                FOR( j = 0; j < length; j++ )
                {
@@ -669,6 +809,7 @@ static void tcx_ltp_synth_filter32(
                       fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */
                    L_tmp = L_sub_sat( s, Mpy_32_16_r( s2, ALPHA ) ); /* Qx */
                    L_tmp2 = Mpy_32_16_r( L_tmp, gain );              /* Qx */
                    L_tmp2 = L_sub_sat( L_tmp2, zir[j] );             /* Qx */
                    L_tmp2 = L_add_sat( synth[j], L_tmp2 );           /* Qx */
    
                    synth_ltp[j] = L_tmp2; /* Qx */
@@ -680,6 +821,39 @@ static void tcx_ltp_synth_filter32(
                    y1++;
                }
            }
            ELSE
            {
                FOR( j = 0; j < length; j++ )
                {
                    s = L_deposit_l( 0 );
                    s2 = L_deposit_l( 0 );
                    k = 0;
                    move16();
                    FOR( i = 0; i < L; i++ )
                    {
                        s = Madd_32_16_r( Madd_32_16_r( s, x0[i], w0[k] ), x1[-i], w1[k] );   /* Qx */
                        s2 = Madd_32_16_r( Madd_32_16_r( s2, y0[i], v0[k] ), y1[-i], v1[k] ); /* Qx */
                        k = k + pitch_res;
                    }
    
                    /* s2 *= ALPHA;
                       normal:      synth_ltp[j] = synth[j] - gain * s2 + gain * s;
                       zir:         synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j];
                       fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */
                    L_tmp = L_sub_sat( s, Mpy_32_16_r( s2, ALPHA ) ); /* Qx */
                    L_tmp2 = Mpy_32_16_r( L_tmp, gain );              /* Qx */
                    L_tmp2 = L_add_sat( synth[j], L_tmp2 );           /* Qx */
    
                    synth_ltp[j] = L_tmp2; /* Qx */
                    move16();
    
                    x0++;
                    x1++;
                    y0++;
                    y1++;
                }
            }
        }
    }
    ELSE
    {