Commit f908d5bf authored by Fabian Bauer's avatar Fabian Bauer
Browse files

fft_fx_evs.c : completed overflow op replacement

parent 5a23784c
Loading
Loading
Loading
Loading
Loading
+429 −1
Original line number Diff line number Diff line
@@ -1186,9 +1186,11 @@ static void fft5_32_16fx(
    Word16 T1, To, T8, Tt, T9, Ts, Te, Tp, Th, Tn, T2, T3, T4, T5, T6, T7;
    Word16 i0, i1, i2, i3, i4;
    Word32 L_tmp;
#ifndef ISSUE_1836_replace_overflow_libcom
#ifdef BASOP_NOGLOB_DECLARE_LOCAL
    Flag Overflow = 0;
    move32();
#endif
#endif
    i0 = Idx[0];
    move16();
@@ -1216,9 +1218,15 @@ static void fft5_32_16fx(
    move16();
    T6 = zRe[i3];
    move16();
#ifdef ISSUE_1836_replace_overflow_libcom
    T7 = add_sat( T5, T6 );
    T8 = add_sat( T4, T7 );
    Tt = sub_sat( T5, T6 );
#else
    T7 = add_o( T5, T6, &Overflow );
    T8 = add_o( T4, T7, &Overflow );
    Tt = sub_o( T5, T6, &Overflow );
#endif
    /* T9 = KP559016994 * (T4 - T7); */
    L_tmp = Mult_32_16( KP559016994_16FX, sub_sat( T4, T7 ) ); // Q(16 +x)
    T9 = round_fx_sat( L_tmp );                                // Qx
@@ -1240,11 +1248,17 @@ static void fft5_32_16fx(
    L_tmp = Mult_32_16( KP559016994_16FX, sub_sat( T4, T7 ) ); // Q(16 +x)
    Tn = round_fx_sat( L_tmp );                                // Qx


#ifdef ISSUE_1836_replace_overflow_libcom
    zRe[i0] = add_sat( T1, T8 );
    move16();
    zIm[i0] = add_sat( To, Tp );
    move32();
#else
    zRe[i0] = add_o( T1, T8, &Overflow );
    move16();
    zIm[i0] = add_o( To, Tp, &Overflow );
    move32();
#endif

    /*T2 = KP951056516*Te + KP587785252*Th; */
    L_tmp = Mult_32_16( KP951056516_16FX, Te );        // Q(16 +x)
@@ -1261,6 +1275,16 @@ static void fft5_32_16fx(
    T4 = add_sat( T9, T6 );
    T5 = sub_sat( T6, T9 );

#ifdef ISSUE_1836_replace_overflow_libcom
    zRe[i3] = sub_sat( T4, T2 );
    move32();
    zRe[i1] = add_sat( T5, T3 );
    move32();
    zRe[i2] = add_sat( T4, T2 );
    move32();
    zRe[i4] = sub_sat( T5, T3 );
    move32();
#else
    zRe[i3] = sub_o( T4, T2, &Overflow );
    move32();
    zRe[i1] = add_o( T5, T3, &Overflow );
@@ -1269,6 +1293,7 @@ static void fft5_32_16fx(
    move32();
    zRe[i4] = sub_o( T5, T3, &Overflow );
    move32();
#endif

    /*    T2 = KP951056516 * Ts + KP587785252 * Tt; */
    L_tmp = Mult_32_16( KP951056516_16FX, Ts );        // Q(16 +x)
@@ -1675,9 +1700,11 @@ static void cftfsub_16fx(
{
    Word16 j, j1, j2, j3, l;
    Word16 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
#ifndef ISSUE_1836_replace_overflow_libcom
#ifdef BASOP_NOGLOB_DECLARE_LOCAL
    Flag Overflow = 0;
    move32();
#endif
#endif

    l = 2;
@@ -1731,6 +1758,15 @@ static void cftfsub_16fx(
    {
        FOR( j = 0; j < l; j += 2 )
        {
#ifdef ISSUE_1836_replace_overflow_libcom
            j1 = add_sat( j, l );
            x0r = sub_sat( a[j], a[j1] );
            x0i = sub_sat( a[j + 1], a[j1 + 1] );
            a[j] = add_sat( a[j], a[j1] );
            move16();
            a[j + 1] = add_sat( a[j + 1], a[j1 + 1] );
            move16();
#else
            j1 = add_o( j, l, &Overflow );
            x0r = sub_o( a[j], a[j1], &Overflow );
            x0i = sub_o( a[j + 1], a[j1 + 1], &Overflow );
@@ -1738,6 +1774,7 @@ static void cftfsub_16fx(
            move16();
            a[j + 1] = add_o( a[j + 1], a[j1 + 1], &Overflow );
            move16();
#endif
            a[j1] = x0r;
            move16();
            a[j1 + 1] = x0i;
@@ -1762,11 +1799,84 @@ static void cft1st_16fx(
    Word16 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
    Word16 tmp;
    Word32 L_tmp;
#ifndef ISSUE_1836_replace_overflow_libcom
#ifdef BASOP_NOGLOB_DECLARE_LOCAL
    Flag Overflow = 0;
    move32();
#endif
#endif

#ifdef ISSUE_1836_replace_overflow_libcom
    x0r = add_sat( a[0], a[2] );
    x0i = add_sat( a[1], a[3] );
    x1r = sub_sat( a[0], a[2] );
    x1i = sub_sat( a[1], a[3] );
    x2r = add_sat( a[4], a[6] );
    x2i = add_sat( a[5], a[7] );
    x3r = sub_sat( a[4], a[6] );
    x3i = sub_sat( a[5], a[7] );
    a[0] = add_sat( x0r, x2r );
    move16();
    a[1] = add_sat( x0i, x2i );
    move16();
    a[4] = sub_sat( x0r, x2r );
    move16();
    a[5] = sub_sat( x0i, x2i );
    move16();
    a[2] = sub_sat( x1r, x3i );
    move16();
    a[3] = add_sat( x1i, x3r );
    move16();
    a[6] = add_sat( x1r, x3i );
    move16();
    a[7] = sub_sat( x1i, x3r );
    wk1r = w[2];
    move32();

    x0r = add_sat( a[8], a[10] );
    x0i = add_sat( a[9], a[11] );
    x1r = sub_sat( a[8], a[10] );
    x1i = sub_sat( a[9], a[11] );
    x2r = add_sat( a[12], a[14] );
    x2i = add_sat( a[13], a[15] );
    x3r = sub_sat( a[12], a[14] );
    x3i = sub_sat( a[13], a[15] );
    a[8] = add_sat( x0r, x2r );
    move16();
    a[9] = add_sat( x0i, x2i );
    move16();
    a[12] = sub_sat( x2i, x0i );
    move16();
    a[13] = sub_sat( x0r, x2r );
    move16();

    x0r = sub_sat( x1r, x3i );
    x0i = add_sat( x1i, x3r );
    tmp = sub_sat( x0r, x0i );
    L_tmp = Mult_32_16( wk1r, tmp );                                 /*Q(15+Qx+Q_edct) */

    a[10] = round_fx_sat( L_shl_sat( L_tmp, 1 ) );                   /*Q(Qx+Q_edct) */
    move16();

    tmp = add_sat( x0r, x0i );
    L_tmp = Mult_32_16( wk1r, tmp );                                 /*Q(15+Qx+Q_edct) */
    a[11] = round_fx_sat( L_shl_sat( L_tmp, 1 ) );                   /* Q(Qx+Q_edct) */
    move16();

    x0r = add_sat( x3i, x1r );
    x0i = sub_sat( x3r, x1i );
    tmp = sub_sat( x0i, x0r );
    L_tmp = Mult_32_16( wk1r, tmp );                                 /*Q(15+Qx+Q_edct) */
    a[14] = round_fx_sat( L_shl_sat( L_tmp, 1 ) );                   /*Q(Qx+Q_edct) */
    move16();

    tmp = add_sat( x0i, x0r );
    L_tmp = Mult_32_16( wk1r, tmp );                                 /*Q(15+Qx+Q_edct) */
    a[15] = round_fx_sat( L_shl_sat( L_tmp, 1 ) );                   /*Q(Qx+Q_edct) */
    move16();
    k1 = 0;
    move16();
#else
    x0r = add_o( a[0], a[2], &Overflow );
    x0i = add_o( a[1], a[3], &Overflow );
    x1r = sub_o( a[0], a[2], &Overflow );
@@ -1836,6 +1946,7 @@ static void cft1st_16fx(
    move16();
    k1 = 0;
    move16();
#endif

    FOR( j = 16; j < n; j += 16 )
    {
@@ -1856,6 +1967,118 @@ static void cft1st_16fx(

        L_tmp = L_shl( Mult_32_32( wk2i, wk1r ), 1 ); /*Q29 */
        wk3i = L_sub( L_shl( L_tmp, 1 ), wk1i );      /*Q30 */
#ifdef ISSUE_1836_replace_overflow_libcom
        x0r = add_sat( a[j], a[j + 2] );
        x0i = add_sat( a[j + 1], a[j + 3] );
        x1r = sub_sat( a[j], a[j + 2] );
        x1i = sub_sat( a[j + 1], a[j + 3] );
        x2r = add_sat( a[j + 4], a[j + 6] );
        x2i = add_sat( a[j + 5], a[j + 7] );
        x3r = sub_sat( a[j + 4], a[j + 6] );
        x3i = sub_sat( a[j + 5], a[j + 7] );
        a[j] = add_sat( x0r, x2r );
        move16();
        a[j + 1] = add_sat( x0i, x2i );
        move16();

        x0r = sub_sat( x0r, x2r );
        x0i = sub_sat( x0i, x2i );
        L_tmp = Mult_32_16( wk2r, x0r );                                    /*Q(15+Qx+Q_edct) */
        L_tmp = Msub_32_16( L_tmp, wk2i, x0i );                             /*Q(15+Qx+Q_edct) */
        a[j + 4] = round_fx_sat( L_shl_sat( L_tmp, 1 ) );                   /*Q(Qx+Q_edct) */
        move16();

        L_tmp = Mult_32_16( wk2r, x0i );                                    /*Q(15+Qx+Q_edct) */
        L_tmp = Madd_32_16( L_tmp, wk2i, x0r );                             /*Q(15+Qx+Q_edct) */
        a[j + 5] = round_fx_sat( L_shl_sat( L_tmp, 1 ) );                   /*Q(Qx+Q_edct) */
        move16();

        x0r = sub_sat( x1r, x3i );
        x0i = add_sat( x1i, x3r );
        L_tmp = Mult_32_16( wk1r, x0r );                                    /*Q(15+Qx+Q_edct) */
        L_tmp = Msub_32_16( L_tmp, wk1i, x0i );                             /*Q(15+Qx+Q_edct) */
        a[j + 2] = round_fx_sat( L_shl_sat( L_tmp, 1 ) );                   /*Q(Qx+Q_edct) */
        move16();

        L_tmp = Mult_32_16( wk1r, x0i );                                    /*Q(15+Qx+Q_edct) */
        L_tmp = Madd_32_16( L_tmp, wk1i, x0r );                             /*Q(15+Qx+Q_edct) */
        a[j + 3] = round_fx_sat( L_shl_sat( L_tmp, 1 ) );                   /*Q(Qx+Q_edct) */
        move16();

        x0r = add_sat( x1r, x3i );
        x0i = sub_sat( x1i, x3r );
        L_tmp = Mult_32_16( wk3r, x0r );                                    /*Q(15+Qx+Q_edct) */
        L_tmp = Msub_32_16( L_tmp, wk3i, x0i );                             /*Q(15+Qx+Q_edct) */
        a[j + 6] = round_fx_sat( L_shl_sat( L_tmp, 1 ) );                   /*Q(Qx+Q_edct) */
        move16();

        L_tmp = Mult_32_16( wk3r, x0i );                                    /*Q(15+Qx+Q_edct) */
        L_tmp = Madd_32_16( L_tmp, wk3i, x0r );                             /*Q(15+Qx+Q_edct) */
        a[j + 7] = round_fx_sat( L_shl_sat( L_tmp, 1 ) );                   /*Q(Qx+Q_edct) */
        move16();

        wk1r = w[k2 + 2];
        move32();
        wk1i = w[k2 + 3];
        move32();
        L_tmp = L_shl( Mult_32_32( wk2r, wk1i ), 1 ); /*Q29 */
        wk3r = L_sub( wk1r, L_shl( L_tmp, 1 ) );      /*Q30  */

        L_tmp = L_shl( Mult_32_32( wk2r, wk1r ), 1 ); /*Q29 */
        wk3i = L_sub( L_shl( L_tmp, 1 ), wk1i );      /*Q30 */

        x0r = add_sat( a[j + 8], a[j + 10] );
        x0i = add_sat( a[j + 9], a[j + 11] );
        x1r = sub_sat( a[j + 8], a[j + 10] );
        x1i = sub_sat( a[j + 9], a[j + 11] );
        x2r = add_sat( a[j + 12], a[j + 14] );
        x2i = add_sat( a[j + 13], a[j + 15] );
        x3r = sub_sat( a[j + 12], a[j + 14] );
        x3i = sub_sat( a[j + 13], a[j + 15] );
        a[j + 8] = add_sat( x0r, x2r );
        move16();
        a[j + 9] = add_sat( x0i, x2i );
        move16();

        x0r = sub_sat( x0r, x2r );
        x0i = sub_sat( x0i, x2i );
        tmp = negate( x0r );
        L_tmp = Mult_32_16( wk2i, tmp );                                     /*Q(15+Qx+Q_edct) */
        L_tmp = Msub_32_16( L_tmp, wk2r, x0i );                              /*Q(15+Qx+Q_edct) */
        a[j + 12] = round_fx_sat( L_shl_sat( L_tmp, 1 ) );                   /*Q(Qx+Q_edct) */
        move16();

        tmp = negate( x0i );
        L_tmp = Mult_32_16( wk2i, tmp );                                     /*Q(15+Qx+Q_edct) */
        L_tmp = Madd_32_16( L_tmp, wk2r, x0r );                              /*Q(15+Qx+Q_edct) */
        a[j + 13] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
        move16();

        x0r = sub_sat( x1r, x3i );
        x0i = add_sat( x1i, x3r );
        L_tmp = Mult_32_16( wk1r, x0r );                                     /*Q(15+Qx+Q_edct) */
        L_tmp = Msub_32_16( L_tmp, wk1i, x0i );                              /*Q(15+Qx+Q_edct) */
        a[j + 10] = round_fx_sat( L_shl_sat( L_tmp, 1 ) );                   /*Q(Qx+Q_edct) */
        move16();

        L_tmp = Mult_32_16( wk1r, x0i );                                     /*Q(15+Qx+Q_edct) */
        L_tmp = Madd_32_16( L_tmp, wk1i, x0r );                              /*Q(15+Qx+Q_edct) */
        a[j + 11] = round_fx_sat( L_shl_sat( L_tmp, 1 ) );                   /*Q(Qx+Q_edct) */
        move16();

        x0r = add_sat( x1r, x3i );
        x0i = sub_sat( x1i, x3r );

        L_tmp = Mult_32_16( wk3r, x0r );                                     /*Q(15+Qx+Q_edct) */
        L_tmp = Msub_32_16( L_tmp, wk3i, x0i );                              /*Q(15+Qx+Q_edct) */
        a[j + 14] = round_fx_sat( L_shl_sat( L_tmp, 1 ) );                   /*Q(Qx+Q_edct) */
        move16();

        L_tmp = Mult_32_16( wk3r, x0i );                                     /*Q(15+Qx+Q_edct) */
        L_tmp = Madd_32_16( L_tmp, wk3i, x0r );                              /*Q(15+Qx+Q_edct) */
        a[j + 15] = round_fx_sat( L_shl_sat( L_tmp, 1 ) );                   /*Q(Qx+Q_edct) */
        move16();
#else
        x0r = add_o( a[j], a[j + 2], &Overflow );
        x0i = add_o( a[j + 1], a[j + 3], &Overflow );
        x1r = sub_o( a[j], a[j + 2], &Overflow );
@@ -1966,6 +2189,7 @@ static void cft1st_16fx(
        L_tmp = Madd_32_16( L_tmp, wk3i, x0r );                              /*Q(15+Qx+Q_edct) */
        a[j + 15] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
        move16();
#endif
    }

    return;
@@ -1988,13 +2212,44 @@ static void cftmdl_16fx(
    Word16 tmp, tmp2;
    Word32 L_tmp;
    Word32 L_x0r, L_x0i;
#ifndef ISSUE_1836_replace_overflow_libcom
#ifdef BASOP_NOGLOB_DECLARE_LOCAL
    Flag Overflow = 0;
    move32();
#endif
#endif
    m = shl( l, 2 );
    FOR( j = 0; j < l; j += 2 )
    {
#ifdef ISSUE_1836_replace_overflow_libcom
        j1 = add_sat( j, l );
        j2 = add_sat( j1, l );
        j3 = add_sat( j2, l );
        x0r = add_sat( a[j], a[j1] );
        x0i = add_sat( a[j + 1], a[j1 + 1] );
        x1r = sub_sat( a[j], a[j1] );
        x1i = sub_sat( a[j + 1], a[j1 + 1] );
        x2r = add_sat( a[j2], a[j3] );
        x2i = add_sat( a[j2 + 1], a[j3 + 1] );
        x3r = sub_sat( a[j2], a[j3] );
        x3i = sub_sat( a[j2 + 1], a[j3 + 1] );
        a[j] = add_sat( x0r, x2r );
        move16();
        a[j + 1] = add_sat( x0i, x2i );
        move16();
        a[j2] = sub_sat( x0r, x2r );
        move16();
        a[j2 + 1] = sub_sat( x0i, x2i );
        move16();
        a[j1] = sub_sat( x1r, x3i );
        move16();
        a[j1 + 1] = add_sat( x1i, x3r );
        move16();
        a[j3] = add_sat( x1r, x3i );
        move16();
        a[j3 + 1] = sub_sat( x1i, x3r );
        move16();
#else
        j1 = add_o( j, l, &Overflow );
        j2 = add_o( j1, l, &Overflow );
        j3 = add_o( j2, l, &Overflow );
@@ -2022,6 +2277,7 @@ static void cftmdl_16fx(
        move16();
        a[j3 + 1] = sub_o( x1i, x3r, &Overflow );
        move16();
#endif
    }

    wk1r = w[2];
@@ -2029,6 +2285,51 @@ static void cftmdl_16fx(
    tmp2 = add( l, m );
    FOR( j = m; j < tmp2; j += 2 )
    {
#ifdef ISSUE_1836_replace_overflow_libcom
        j1 = add_sat( j, l );
        j2 = add_sat( j1, l );
        j3 = add_sat( j2, l );
        x0r = add_sat( a[j], a[j1] );
        x0i = add_sat( a[j + 1], a[j1 + 1] );
        x1r = sub_sat( a[j], a[j1] );
        x1i = sub_sat( a[j + 1], a[j1 + 1] );
        x2r = add_sat( a[j2], a[j3] );
        x2i = add_sat( a[j2 + 1], a[j3 + 1] );
        x3r = sub_sat( a[j2], a[j3] );
        x3i = sub_sat( a[j2 + 1], a[j3 + 1] );
        a[j] = add_sat( x0r, x2r );
        move16();
        a[j + 1] = add_sat( x0i, x2i );
        move16();
        a[j2] = sub_sat( x2i, x0i );
        move16();
        a[j2 + 1] = sub_sat( x0r, x2r );
        move16();

        x0r = sub_sat( x1r, x3i );
        x0i = add_sat( x1i, x3r );
        tmp = sub_sat( x0r, x0i );
        L_tmp = Mult_32_16( wk1r, tmp );                                 /*Q(15+Qx+Q_edct) */
        a[j1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) );                   /*Q(Qx+Q_edct) */
        move16();

        tmp = add_sat( x0r, x0i );
        L_tmp = Mult_32_16( wk1r, tmp );                                     /*Q(15+Qx+Q_edct) */
        a[j1 + 1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) );                   /*Q(Qx+Q_edct) */
        move16();

        x0r = add_sat( x3i, x1r );
        x0i = sub_sat( x3r, x1i );
        tmp = sub_sat( x0i, x0r );
        L_tmp = Mult_32_16( wk1r, tmp );                                 /*Q(15+Qx+Q_edct) */
        a[j3] = round_fx_sat( L_shl_sat( L_tmp, 1 ) );                   /*Q(Qx+Q_edct) */
        move16();

        tmp = add_sat( x0i, x0r );
        L_tmp = Mult_32_16( wk1r, tmp );                                     /*Q(15+Qx+Q_edct) */
        a[j3 + 1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) );                   /*Q(Qx+Q_edct) */
        move16();
#else
        j1 = add_o( j, l, &Overflow );
        j2 = add_o( j1, l, &Overflow );
        j3 = add_o( j2, l, &Overflow );
@@ -2072,6 +2373,7 @@ static void cftmdl_16fx(
        L_tmp = Mult_32_16( wk1r, tmp );                                     /*Q(15+Qx+Q_edct) */
        a[j3 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
        move16();
#endif
    }

    k1 = 0;
@@ -2098,6 +2400,63 @@ static void cftmdl_16fx(
        tmp2 = add( l, k );
        FOR( j = k; j < tmp2; j += 2 )
        {
#ifdef ISSUE_1836_replace_overflow_libcom
            j1 = add_sat( j, l );
            j2 = add_sat( j1, l );
            j3 = add_sat( j2, l );
            x0r = add_sat( a[j], a[j1] );
            x0i = add_sat( a[j + 1], a[j1 + 1] );
            x1r = sub_sat( a[j], a[j1] );
            x1i = sub_sat( a[j + 1], a[j1 + 1] );
            x2r = add_sat( a[j2], a[j3] );
            x2i = add_sat( a[j2 + 1], a[j3 + 1] );
            x3r = sub_sat( a[j2], a[j3] );
            x3i = sub_sat( a[j2 + 1], a[j3 + 1] );
            a[j] = add_sat( x0r, x2r );
            move16();
            a[j + 1] = add_sat( x0i, x2i );
            move16();

            x0r = sub_sat( x0r, x2r );
            x0i = sub_sat( x0i, x2i );

            L_tmp = Mult_32_16( wk2r, x0r );                                 /*Q(15+Qx+Q_edct) */
            L_tmp = Msub_32_16( L_tmp, wk2i, x0i );                          /*Q(15+Qx+Q_edct) */
            a[j2] = round_fx_sat( L_shl_sat( L_tmp, 1 ) );                   /*Q(Qx+Q_edct) */
            move16();

            L_tmp = Mult_32_16( wk2r, x0i );                                     /*Q(15+Qx+Q_edct) */
            L_tmp = Madd_32_16( L_tmp, wk2i, x0r );                              /*Q(15+Qx+Q_edct) */
            a[j2 + 1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) );                   /*Q(Qx+Q_edct) */
            move16();

            x0r = sub_sat( x1r, x3i );
            x0i = add_sat( x1i, x3r );

            L_tmp = Mult_32_16( wk1r, x0r );                                 /*Q(15+Qx+Q_edct) */
            L_tmp = Msub_32_16( L_tmp, wk1i, x0i );                          /*Q(15+Qx+Q_edct) */
            a[j1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) );                   /*Q(Qx+Q_edct) */
            move16();

            L_tmp = Mult_32_16( wk1r, x0i );                                     /*Q(15+Qx+Q_edct) */
            L_tmp = Madd_32_16( L_tmp, wk1i, x0r );                              /*Q(15+Qx+Q_edct) */
            a[j1 + 1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) );                   /*Q(Qx+Q_edct) */
            move16();

            L_x0r = L_add( (Word32) x1r, (Word32) x3i );
            L_x0i = L_sub( (Word32) x1i, (Word32) x3r );
            x0r = extract_l( L_x0r );
            x0i = extract_l( L_x0i );
            L_tmp = Mult_32_16( wk3r, x0r );                                 /*Q(15+Qx+Q_edct) */
            L_tmp = Msub_32_16( L_tmp, wk3i, x0i );                          /*Q(15+Qx+Q_edct) */
            a[j3] = round_fx_sat( L_shl_sat( L_tmp, 1 ) );                   /*Q(Qx+Q_edct) */
            move16();

            L_tmp = Mult_32_16( wk3r, x0i );                                     /*Q(15+Qx+Q_edct) */
            L_tmp = Madd_32_16( L_tmp, wk3i, x0r );                              /*Q(15+Qx+Q_edct) */
            a[j3 + 1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) );                   /*Q(Qx+Q_edct) */
            move16();
#else
            j1 = add_o( j, l, &Overflow );
            j2 = add_o( j1, l, &Overflow );
            j3 = add_o( j2, l, &Overflow );
@@ -2153,20 +2512,88 @@ static void cftmdl_16fx(
            L_tmp = Madd_32_16( L_tmp, wk3i, x0r );                              /*Q(15+Qx+Q_edct) */
            a[j3 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
            move16();
#endif
        }

        wk1r = w[k2 + 2];
        move32();
        wk1i = w[k2 + 3];
        move32();
#ifdef ISSUE_1836_replace_overflow_libcom
        L_tmp = L_shl_sat( Mult_32_32( wk2r, wk1i ), 1 );                  /*Q29 */
        wk3r = L_sub_sat( wk1r, L_shl_sat( L_tmp, 1 ) );                   /*Q30  */

        L_tmp = L_shl_sat( Mult_32_32( wk2r, wk1r ), 1 );                  /*Q29 */
        wk3i = L_sub_sat( L_shl_sat( L_tmp, 1 ), wk1i );                   /*Q30 */
        tmp2 = add( l, add( k, m ) );
#else
        L_tmp = L_shl_o( Mult_32_32( wk2r, wk1i ), 1, &Overflow );         /*Q29 */
        wk3r = L_sub_o( wk1r, L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q30  */

        L_tmp = L_shl_o( Mult_32_32( wk2r, wk1r ), 1, &Overflow );         /*Q29 */
        wk3i = L_sub_o( L_shl_o( L_tmp, 1, &Overflow ), wk1i, &Overflow ); /*Q30 */
        tmp2 = add( l, add( k, m ) );
#endif
        FOR( j = add( k, m ); j < tmp2; j += 2 )
        {
#ifdef ISSUE_1836_replace_overflow_libcom
            j1 = add_sat( j, l );
            j2 = add_sat( j1, l );
            j3 = add_sat( j2, l );
            x0r = add_sat( a[j], a[j1] );
            x0i = add_sat( a[j + 1], a[j1 + 1] );
            x1r = sub_sat( a[j], a[j1] );
            x1i = sub_sat( a[j + 1], a[j1 + 1] );
            x2r = add_sat( a[j2], a[j3] );
            x2i = add_sat( a[j2 + 1], a[j3 + 1] );
            x3r = sub_sat( a[j2], a[j3] );
            x3i = sub_sat( a[j2 + 1], a[j3 + 1] );
            a[j] = add_sat( x0r, x2r );
            move16();
            a[j + 1] = add_sat( x0i, x2i );
            move16();

            x0r = sub_sat( x0r, x2r );
            x0i = sub_sat( x0i, x2i );

            tmp = negate( x0r );
            L_tmp = Mult_32_16( wk2i, tmp );                                 /*Q(15+Qx+Q_edct) */
            L_tmp = Msub_32_16( L_tmp, wk2r, x0i );                          /*Q(15+Qx+Q_edct) */
            a[j2] = round_fx_sat( L_shl_sat( L_tmp, 1 ) );                   /*Q(Qx+Q_edct) */
            move16();

            tmp = negate( x0i );
            L_tmp = Mult_32_16( wk2i, tmp );                                     /*Q(15+Qx+Q_edct) */
            L_tmp = Madd_32_16( L_tmp, wk2r, x0r );                              /*Q(15+Qx+Q_edct) */
            a[j2 + 1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) );                   /*Q(Qx+Q_edct) */
            move16();

            x0r = sub_sat( x1r, x3i );
            x0i = add_sat( x1i, x3r );

            L_tmp = Mult_32_16( wk1r, x0r );                                 /*Q(15+Qx+Q_edct) */
            L_tmp = Msub_32_16( L_tmp, wk1i, x0i );                          /*Q(15+Qx+Q_edct) */
            a[j1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) );                   /*Q(Qx+Q_edct) */
            move16();

            L_tmp = Mult_32_16( wk1r, x0i );                                     /*Q(15+Qx+Q_edct) */
            L_tmp = Madd_32_16( L_tmp, wk1i, x0r );                              /*Q(15+Qx+Q_edct) */
            a[j1 + 1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) );                   /*Q(Qx+Q_edct) */
            move16();

            x0r = add_sat( x1r, x3i );
            x0i = sub_sat( x1i, x3r );

            L_tmp = Mult_32_16( wk3r, x0r );                                 /*Q(15+Qx+Q_edct) */
            L_tmp = Msub_32_16( L_tmp, wk3i, x0i );                          /*Q(15+Qx+Q_edct) */
            a[j3] = round_fx_sat( L_shl_sat( L_tmp, 1 ) );                   /*Q(Qx+Q_edct) */
            move16();

            L_tmp = Mult_32_16( wk3r, x0i );                                     /*Q(15+Qx+Q_edct) */
            L_tmp = Madd_32_16( L_tmp, wk3i, x0r );                              /*Q(15+Qx+Q_edct) */
            a[j3 + 1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) );                   /*Q(Qx+Q_edct) */
            move16();
#else
            j1 = add_o( j, l, &Overflow );
            j2 = add_o( j1, l, &Overflow );
            j3 = add_o( j2, l, &Overflow );
@@ -2223,6 +2650,7 @@ static void cftmdl_16fx(
            L_tmp = Madd_32_16( L_tmp, wk3i, x0r );                              /*Q(15+Qx+Q_edct) */
            a[j3 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
            move16();
#endif
        }
    }