Commit 3c94b6f1 authored by multrus's avatar multrus
Browse files

Merge branch '2022-improve-wmops-performance-of-tcx_ltp_synth_filter32' into 'main'

Resolve "Improve WMOPS performance of tcx_ltp_synth_filter32()"

Closes #2022

See merge request !2241
parents db431779 e04810d1
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -106,6 +106,9 @@

#define FIX_1962_FORMAT_CONV_SPECTRAL_DIFF                   /* FhG: Improved precision of targetEnergy in ivas_ls_setup_conversion_process_mdct_fx() */
#define FIX_2003_CON_TCX_OVERFLOW                            /* FhG: Use a dynamic scaling factor for the synth buffer at the output of con_tcx_ivas_fx() */
#define OPT_TCXLTP_FILTER_LOOP                               /* FhG: optimize loop in tcx_ltp_synth_filter */


/* #################### Start BASOP porting switches ############################ */

#define NONBE_1244_FIX_SWB_BWE_MEMORY                   /* VA: issue 1244: fix to SWB BWE memory in case of switching from FB coding - pending a review by Huawei */
+299 −1
Original line number Diff line number Diff line
@@ -492,6 +492,155 @@ static void tcx_ltp_synth_filter(
                step = negate( step );
        }

#ifdef OPT_TCXLTP_FILTER_LOOP
        IF( zir != NULL )
        {
            IF( fade != 0 )
            {
                FOR( j = 0; j < length; j++ )
                {
                    s = L_deposit_l( 0 );
                    s2 = L_deposit_l( 0 );
                    k = 0;
                    move16();
                    FOR( i = 0; i < L; i++ )
                    {
                        s = L_mac_sat( L_mac_sat( s, w0[k], x0[i] ), w1[k], x1[-i] );   /* Qx */
                        s2 = L_mac_sat( L_mac_sat( s2, v0[k], y0[i] ), v1[k], y1[-i] ); /* Qx */
                        k = k + pitch_res;
                    }

                    /* s2 *= ALPHA;
                       normal:      synth_ltp[j] = synth[j] - gain * s2 + gain * s;
                       zir:         synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j];
                       fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */
                    i = sub_sat( round_fx_sat( s ), mult_r_sat( round_fx_sat( s2 ), ALPHA ) ); /* Qx */
                    k = mult_r( gain, i );                                                     /* Qx */
                    k = mult_r( k, alpha );                                                    /* Qx */
                    k = add_sat( synth[j], k );                                                /* Qx */
                    k = sub_sat( k, zir[j] );                                                  /* Qx */

                    synth_ltp[j] = k; /* Qx */
                    move16();

                    BASOP_SATURATE_WARNING_OFF_EVS;
                    alpha = add_sat( alpha, step );
                    BASOP_SATURATE_WARNING_ON_EVS;

                    x0++;
                    x1++;
                    y0++;
                    y1++;
                }
            }
            ELSE
            {
                FOR( j = 0; j < length; j++ )
                {
                    s = L_deposit_l( 0 );
                    s2 = L_deposit_l( 0 );
                    k = 0;
                    move16();
                    FOR( i = 0; i < L; i++ )
                    {
                        s = L_mac_sat( L_mac_sat( s, w0[k], x0[i] ), w1[k], x1[-i] );   /* Qx */
                        s2 = L_mac_sat( L_mac_sat( s2, v0[k], y0[i] ), v1[k], y1[-i] ); /* Qx */
                        k = k + pitch_res;
                    }

                    /* s2 *= ALPHA;
                       normal:      synth_ltp[j] = synth[j] - gain * s2 + gain * s;
                       zir:         synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j];
                       fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */
                    i = sub_sat( round_fx_sat( s ), mult_r_sat( round_fx_sat( s2 ), ALPHA ) ); /* Qx */
                    k = mult_r( gain, i );                                                     /* Qx */
                    k = add_sat( synth[j], k );                                                /* Qx */
                    k = sub_sat( k, zir[j] );                                                  /* Qx */

                    synth_ltp[j] = k; /* Qx */
                    move16();

                    x0++;
                    x1++;
                    y0++;
                    y1++;
                }
            }
        }
        ELSE
        {
            IF( fade != 0 )
            {
                FOR( j = 0; j < length; j++ )
                {
                    s = L_deposit_l( 0 );
                    s2 = L_deposit_l( 0 );
                    k = 0;
                    move16();
                    FOR( i = 0; i < L; i++ )
                    {
                        s = L_mac_sat( L_mac_sat( s, w0[k], x0[i] ), w1[k], x1[-i] );   /* Qx */
                        s2 = L_mac_sat( L_mac_sat( s2, v0[k], y0[i] ), v1[k], y1[-i] ); /* Qx */
                        k = k + pitch_res;
                    }

                    /* s2 *= ALPHA;
                       normal:      synth_ltp[j] = synth[j] - gain * s2 + gain * s;
                       zir:         synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j];
                       fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */
                    i = sub_sat( round_fx_sat( s ), mult_r_sat( round_fx_sat( s2 ), ALPHA ) ); /* Qx */
                    k = mult_r( gain, i );                                                     /* Qx */
                    k = mult_r( k, alpha );
                    k = add_sat( synth[j], k ); /* Qx */

                    synth_ltp[j] = k; /* Qx */
                    move16();

                    BASOP_SATURATE_WARNING_OFF_EVS;
                    alpha = add_sat( alpha, step );
                    BASOP_SATURATE_WARNING_ON_EVS;

                    x0++;
                    x1++;
                    y0++;
                    y1++;
                }
            }
            ELSE
            {
                FOR( j = 0; j < length; j++ )
                {
                    s = L_deposit_l( 0 );
                    s2 = L_deposit_l( 0 );
                    k = 0;
                    move16();
                    FOR( i = 0; i < L; i++ )
                    {
                        s = L_mac_sat( L_mac_sat( s, w0[k], x0[i] ), w1[k], x1[-i] );   /* Qx */
                        s2 = L_mac_sat( L_mac_sat( s2, v0[k], y0[i] ), v1[k], y1[-i] ); /* Qx */
                        k = k + pitch_res;
                    }

                    /* s2 *= ALPHA;
                       normal:      synth_ltp[j] = synth[j] - gain * s2 + gain * s;
                       zir:         synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j];
                       fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */
                    i = sub_sat( round_fx_sat( s ), mult_r_sat( round_fx_sat( s2 ), ALPHA ) ); /* Qx */
                    k = mult_r( gain, i );                                                     /* Qx */
                    k = add_sat( synth[j], k );                                                /* Qx */

                    synth_ltp[j] = k; /* Qx */
                    move16();


                    x0++;
                    x1++;
                    y0++;
                    y1++;
                }
            }
        }
#else
        FOR( j = 0; j < length; j++ )
        {
            s = L_deposit_l( 0 );
@@ -534,6 +683,7 @@ static void tcx_ltp_synth_filter(
            y0++;
            y1++;
        }
#endif
    }
    ELSE
    {
@@ -579,7 +729,6 @@ static void tcx_ltp_synth_filter32(

        L = tcxLtpFilters[filtIdx].length; /* Q0 */
        move16();

        alpha = 0;
        move16();
        IF( fade != 0 )
@@ -606,6 +755,154 @@ static void tcx_ltp_synth_filter32(
                step = negate( step );
        }

#ifdef OPT_TCXLTP_FILTER_LOOP
        IF( fade != 0 )
        {
            IF( zir != NULL )
            {
                FOR( j = 0; j < length; j++ )
                {
                    s = L_deposit_l( 0 );
                    s2 = L_deposit_l( 0 );
                    k = 0;
                    move16();
                    FOR( i = 0; i < L; i++ )
                    {
                        s = Madd_32_16_r( Madd_32_16_r( s, x0[i], w0[k] ), x1[-i], w1[k] );   /* Qx */
                        s2 = Madd_32_16_r( Madd_32_16_r( s2, y0[i], v0[k] ), y1[-i], v1[k] ); /* Qx */
                        k = k + pitch_res;
                    }

                    /* s2 *= ALPHA;
                       normal:      synth_ltp[j] = synth[j] - gain * s2 + gain * s;
                       zir:         synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j];
                       fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */
                    L_tmp = L_sub_sat( s, Mpy_32_16_r( s2, ALPHA ) ); /* Qx */
                    L_tmp2 = Mpy_32_16_r( L_tmp, gain );              /* Qx */
                    L_tmp2 = Mpy_32_16_r( L_tmp2, alpha );            /* Qx */
                    L_tmp2 = L_sub_sat( L_tmp2, zir[j] );             /* Qx */
                    L_tmp2 = L_add_sat( synth[j], L_tmp2 );           /* Qx */

                    synth_ltp[j] = L_tmp2; /* Qx */
                    move16();

                    BASOP_SATURATE_WARNING_OFF_EVS;
                    alpha = add_sat( alpha, step );
                    BASOP_SATURATE_WARNING_ON_EVS;

                    x0++;
                    x1++;
                    y0++;
                    y1++;
                }
            }
            ELSE
            {
                FOR( j = 0; j < length; j++ )
                {
                    s = L_deposit_l( 0 );
                    s2 = L_deposit_l( 0 );
                    k = 0;
                    move16();
                    FOR( i = 0; i < L; i++ )
                    {
                        s = Madd_32_16_r( Madd_32_16_r( s, x0[i], w0[k] ), x1[-i], w1[k] );   /* Qx */
                        s2 = Madd_32_16_r( Madd_32_16_r( s2, y0[i], v0[k] ), y1[-i], v1[k] ); /* Qx */
                        k = k + pitch_res;
                    }

                    /* s2 *= ALPHA;
                       normal:      synth_ltp[j] = synth[j] - gain * s2 + gain * s;
                       zir:         synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j];
                       fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */
                    L_tmp = L_sub_sat( s, Mpy_32_16_r( s2, ALPHA ) ); /* Qx */
                    L_tmp2 = Mpy_32_16_r( L_tmp, gain );              /* Qx */
                    L_tmp2 = Mpy_32_16_r( L_tmp2, alpha );            /* Qx */
                    L_tmp2 = L_add_sat( synth[j], L_tmp2 );           /* Qx */

                    synth_ltp[j] = L_tmp2; /* Qx */
                    move16();

                    BASOP_SATURATE_WARNING_OFF_EVS;
                    alpha = add_sat( alpha, step );
                    BASOP_SATURATE_WARNING_ON_EVS;

                    x0++;
                    x1++;
                    y0++;
                    y1++;
                }
            }
        }
        ELSE
        {
            IF( zir != NULL )
            {
                FOR( j = 0; j < length; j++ )
                {
                    s = L_deposit_l( 0 );
                    s2 = L_deposit_l( 0 );
                    k = 0;
                    move16();
                    FOR( i = 0; i < L; i++ )
                    {
                        s = Madd_32_16_r( Madd_32_16_r( s, x0[i], w0[k] ), x1[-i], w1[k] );   /* Qx */
                        s2 = Madd_32_16_r( Madd_32_16_r( s2, y0[i], v0[k] ), y1[-i], v1[k] ); /* Qx */
                        k = k + pitch_res;
                    }

                    /* s2 *= ALPHA;
                       normal:      synth_ltp[j] = synth[j] - gain * s2 + gain * s;
                       zir:         synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j];
                       fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */
                    L_tmp = L_sub_sat( s, Mpy_32_16_r( s2, ALPHA ) ); /* Qx */
                    L_tmp2 = Mpy_32_16_r( L_tmp, gain );              /* Qx */
                    L_tmp2 = L_sub_sat( L_tmp2, zir[j] );             /* Qx */
                    L_tmp2 = L_add_sat( synth[j], L_tmp2 );           /* Qx */

                    synth_ltp[j] = L_tmp2; /* Qx */
                    move16();

                    x0++;
                    x1++;
                    y0++;
                    y1++;
                }
            }
            ELSE
            {
                FOR( j = 0; j < length; j++ )
                {
                    s = L_deposit_l( 0 );
                    s2 = L_deposit_l( 0 );
                    k = 0;
                    move16();
                    FOR( i = 0; i < L; i++ )
                    {
                        s = Madd_32_16_r( Madd_32_16_r( s, x0[i], w0[k] ), x1[-i], w1[k] );   /* Qx */
                        s2 = Madd_32_16_r( Madd_32_16_r( s2, y0[i], v0[k] ), y1[-i], v1[k] ); /* Qx */
                        k = k + pitch_res;
                    }

                    /* s2 *= ALPHA;
                       normal:      synth_ltp[j] = synth[j] - gain * s2 + gain * s;
                       zir:         synth_ltp[j] = synth[j] - gain * s2 + gain * s - zir[j];
                       fade-in/out: synth_ltp[j] = synth[j] - alpha * gain * s2 + alpha * gain * s; */
                    L_tmp = L_sub_sat( s, Mpy_32_16_r( s2, ALPHA ) ); /* Qx */
                    L_tmp2 = Mpy_32_16_r( L_tmp, gain );              /* Qx */
                    L_tmp2 = L_add_sat( synth[j], L_tmp2 );           /* Qx */

                    synth_ltp[j] = L_tmp2; /* Qx */
                    move16();

                    x0++;
                    x1++;
                    y0++;
                    y1++;
                }
            }
        }
#else
        FOR( j = 0; j < length; j++ )
        {
            s = L_deposit_l( 0 );
@@ -648,6 +945,7 @@ static void tcx_ltp_synth_filter32(
            y0++;
            y1++;
        }
#endif
    }
    ELSE
    {