Commit 78c80a0c authored by vaclav's avatar vaclav
Browse files

OPTIMIZE_FFT_STACK

parent 82ef8f7d
Loading
Loading
Loading
Loading
+178 −15
Original line number Diff line number Diff line
@@ -11,7 +11,9 @@
#include "stl.h"
#include "math_32.h"

static Word16 get_edxt_factor( Word16 length ) /* Returns value of sqrtf(2.f/length) in Q15 */

static Word16 get_edxt_factor(
    const Word16 length ) /* Returns value of sqrtf(2.f/length) in Q15 */
{
    Word16 factor; /*Q15*/
    factor = 0;
@@ -53,8 +55,12 @@ static Word16 get_edxt_factor( Word16 length ) /* Returns value of sqrtf(2.f/len
    }
    ELSE IF( EQ_16( length, 40 ) )
    {
#ifdef OPTIMIZE_FFT_STACK
        assert( 0 );
#else
        factor = 7327; /*0.223 in Q15*/
        move16();
#endif
    }
    ELSE IF( EQ_16( length, 960 ) )
    {
@@ -73,33 +79,57 @@ static Word16 get_edxt_factor( Word16 length ) /* Returns value of sqrtf(2.f/len
    }
    ELSE IF( EQ_16( length, 120 ) )
    {
#ifdef OPTIMIZE_FFT_STACK
        assert( 0 );
#else
        factor = 4230; /*0.1290 in Q15*/
        move16();
#endif
    }
    ELSE IF( EQ_16( length, 1200 ) )
    {
#ifdef OPTIMIZE_FFT_STACK
        assert( 0 );
#else
        factor = 1338; /*0.040 in Q15*/
        move16();
#endif
    }
    ELSE IF( EQ_16( length, 800 ) )
    {
#ifdef OPTIMIZE_FFT_STACK
        assert( 0 );
#else
        factor = 1638; /*0.05 in Q15*/
        move16();
#endif
    }
    ELSE IF( EQ_16( length, 400 ) )
    {
#ifdef OPTIMIZE_FFT_STACK
        assert( 0 );
#else
        factor = 2317; /*0.070 in Q15*/
        move16();
#endif
    }
    ELSE IF( EQ_16( length, 200 ) )
    {
#ifdef OPTIMIZE_FFT_STACK
        assert( 0 );
#else
        factor = 3277; /*0.1 in Q15*/
        move16();
#endif
    }

    return factor; /*Q15*/
}

static Word16 const *get_edct_table( Word16 length /*Q0*/, Word16 *q )

static Word16 const *get_edct_table(
    const Word16 length /*Q0*/,
    Word16 *q )
{
    Word16 const *edct_table;
    edct_table = NULL;
@@ -590,6 +620,7 @@ void edxt_fx(
    move16();
    cosPtr = NULL;
    sinPtr = NULL;

    IF( EQ_16( length, 512 ) )
    {
        cosPtr = cos_scale_tbl_512; /*Q15*/
@@ -641,10 +672,14 @@ void edxt_fx(
    }
    ELSE IF( EQ_16( length, 40 ) )
    {
#ifdef OPTIMIZE_FFT_STACK
        assert( 0 );
#else
        cosPtr = cos_scale_tbl_640; /*Q15*/
        sinPtr = sin_scale_tbl_640; /*Q15*/
        n = 16;
        move16();
#endif
    }
    ELSE IF( EQ_16( length, 960 ) )
    {
@@ -669,38 +704,58 @@ void edxt_fx(
    }
    ELSE IF( EQ_16( length, 120 ) )
    {
#ifdef OPTIMIZE_FFT_STACK
        assert( 0 );
#else
        cosPtr = cos_scale_tbl_960; /*Q15*/
        sinPtr = sin_scale_tbl_960; /*Q15*/
        n = 8;
        move16();
#endif
    }
    ELSE IF( EQ_16( length, 1200 ) )
    {
#ifdef OPTIMIZE_FFT_STACK
        assert( 0 );
#else
        cosPtr = cos_scale_tbl_1200; /*Q15*/
        sinPtr = sin_scale_tbl_1200; /*Q15*/
        n = 1;
        move16();
#endif
    }
    ELSE IF( EQ_16( length, 800 ) )
    {
#ifdef OPTIMIZE_FFT_STACK
        assert( 0 );
#else
        cosPtr = cos_scale_tbl_800; /*Q15*/
        sinPtr = sin_scale_tbl_800; /*Q15*/
        n = 1;
        move16();
#endif
    }
    ELSE IF( EQ_16( length, 400 ) )
    {
#ifdef OPTIMIZE_FFT_STACK
        assert( 0 );
#else
        cosPtr = cos_scale_tbl_800; /*Q15*/
        sinPtr = sin_scale_tbl_800; /*Q15*/
        n = 2;
        move16();
#endif
    }
    ELSE IF( EQ_16( length, 200 ) )
    {
#ifdef OPTIMIZE_FFT_STACK
        assert( 0 );
#else
        cosPtr = cos_scale_tbl_800; /*Q15*/
        sinPtr = sin_scale_tbl_800; /*Q15*/
        n = 4;
        move16();
#endif
    }

    test();
@@ -708,16 +763,26 @@ void edxt_fx(
    {
        const Word16 Nm1 = sub( length, 1 );
        const Word16 xSign = sub( imult1616( 2, shr( kernelType, 1 ) ), 1 ); /*Q0*/
#ifdef OPTIMIZE_FFT_STACK
        cmplx spec[L_FRAME_MAX];
#else
        Word32 re[L_FRAME_PLUS];
        Word32 im[L_FRAME_PLUS];
#endif

        IF( !synthesis )
        {
            FOR( k = ( Nm1 / 2 ); k >= 0; k-- ) /* pre-modulation of audio input */
            {
#ifdef OPTIMIZE_FFT_STACK
                spec[k].re = x[2 * k];                                                            /*Qx*/
                spec[( Nm1 - k )].re = Mpy_32_16_1( x[( ( k * 2 ) + 1 )], shl_sat( xSign, 15 ) ); /*Qx*/
                spec[k].im = spec[( Nm1 - k )].im = 0;
#else
                re[k] = x[2 * k];                                                            /*Qx*/
                re[( Nm1 - k )] = Mpy_32_16_1( x[( ( k * 2 ) + 1 )], shl_sat( xSign, 15 ) ); /*Qx*/
                im[k] = im[( Nm1 - k )] = 0;
#endif
                move32();
                move32();
                move32();
@@ -726,26 +791,46 @@ void edxt_fx(
            IF( EQ_16( length, 512 ) )
            {
                /* Scaling down re and im buffers to avoid overflow in DoRTFTn_fx if the minimum headroom is less than 4 bits */
#ifdef OPTIMIZE_FFT_STACK
                hdrm = L_norm_arr_cmplx( spec, 512 );
#else
                hdrm = s_min( L_norm_arr( re, 512 ), L_norm_arr( im, 512 ) );
#endif
                IF( LT_16( hdrm, 4 ) )
                {
                    tmp = sub( hdrm, 4 );
#ifdef OPTIMIZE_FFT_STACK
                    scale_sig32_cmplx( spec, 512, tmp );
#else
                    scale_sig32( re, 512, tmp );
                    scale_sig32( im, 512, tmp );
#endif
                }

#ifdef OPTIMIZE_FFT_STACK
                DoRTFTn_fx( NULL, NULL, spec, 512 );
#else
                DoRTFTn_fx( re, im, 512 );
#endif

                IF( LT_16( hdrm, 4 ) )
                {
                    tmp = negate( tmp );
#ifdef OPTIMIZE_FFT_STACK
                    scale_sig32_cmplx( spec, 512, tmp );
#else
                    scale_sig32( re, 512, tmp );
                    scale_sig32( im, 512, tmp );
#endif
                }
            }
            ELSE /* fft() doesn't support 512 */
            {
#ifdef OPTIMIZE_FFT_STACK
                fft_cmplx_fx( spec, length );
#else
                fft_fx( re, im, length, 1 );
#endif
            }

            IF( shr( kernelType, 1 ) )
@@ -757,12 +842,21 @@ void edxt_fx(
                    const Word16 wRe = cosPtr[( k * n )]; /*Q15*/
                    const Word16 wIm = sinPtr[( k * n )]; /*Q15*/

#ifdef OPTIMIZE_FFT_STACK
                    y[k] /*pt 1*/ = L_add( Mpy_32_16_1( spec[k].re, wRe ), Mpy_32_16_1( spec[k].im, wIm ) );     /*Qx*/
                    y[( length - k )] = L_sub( Mpy_32_16_1( spec[k].re, wIm ), Mpy_32_16_1( spec[k].im, wRe ) ); /*Qx*/
#else
                    y[k] /*pt 1*/ = L_add( Mpy_32_16_1( re[k], wRe ), Mpy_32_16_1( im[k], wIm ) );         /*Qx*/
                    y[( length - k )] = L_sub( Mpy_32_16_1( re[k], wIm ), Mpy_32_16_1( im[k], wRe ) );     /*Qx*/
#endif
                    move32();
                    move32();
                }
#ifdef OPTIMIZE_FFT_STACK
                y[( length / 2 )] = Mpy_32_16_1( spec[( length / 2 )].re, INV_SQRT_2_Q15 ); /*Qx*/
#else
                y[( length / 2 )] = Mpy_32_16_1( re[( length / 2 )], INV_SQRT_2_Q15 );                     /*Qx*/
#endif
                move32();
            }
            ELSE /* forw. DST-II */
@@ -774,16 +868,29 @@ void edxt_fx(
                    const Word16 wRe = cosPtr[( k * n )]; /*Q15*/
                    const Word16 wIm = sinPtr[( k * n )]; /*Q15*/

#ifdef OPTIMIZE_FFT_STACK
                    y[( Nm1 - k )] = L_add( Mpy_32_16_1( spec[k].re, wRe ), Mpy_32_16_1( spec[k].im, wIm ) ); /*Qx*/
                    y[k - 1] = L_sub( Mpy_32_16_1( spec[k].re, wIm ), Mpy_32_16_1( spec[k].im, wRe ) );       /*Qx*/
#else
                    y[( Nm1 - k )] = L_add( Mpy_32_16_1( re[k], wRe ), Mpy_32_16_1( im[k], wIm ) );        /*Qx*/
                    y[k - 1] = L_sub( Mpy_32_16_1( re[k], wIm ), Mpy_32_16_1( im[k], wRe ) );              /*Qx*/
#endif
                    move32();
                    move32();
                }
#ifdef OPTIMIZE_FFT_STACK
                y[( Nm1 / 2 )] = Mpy_32_16_1( spec[( length / 2 )].re, INV_SQRT_2_Q15 ); /*Qx*/
#else
                y[( Nm1 / 2 )] = Mpy_32_16_1( re[( length / 2 )], INV_SQRT_2_Q15 );                        /*Qx*/
#endif
                move32();
            }

#ifdef OPTIMIZE_FFT_STACK
            y[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )] = L_shr( spec[0].re, 1 ); /*Qx*/
#else
            y[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )] = L_shr( re[0], 1 );                                 /*Qx*/
#endif
            move32();
        }
        ELSE /* inverse II = III */
@@ -797,12 +904,21 @@ void edxt_fx(
                    const Word16 wRe = shr( cosPtr[imult1616( k, n )], 1 );
                    const Word16 wIm = shr( sinPtr[imult1616( k, n )], 1 );

#ifdef OPTIMIZE_FFT_STACK
                    spec[k].re = L_add( Mpy_32_16_1( x[k], wRe ), Mpy_32_16_1( x[( length - k )], wIm ) ); /*Qx*/
                    spec[k].im = L_sub( Mpy_32_16_1( x[( length - k )], wRe ), Mpy_32_16_1( x[k], wIm ) ); /*Qx*/
#else
                    re[k] = L_add( Mpy_32_16_1( x[k], wRe ), Mpy_32_16_1( x[( length - k )], wIm ) );      /*Qx*/
                    im[k] = L_sub( Mpy_32_16_1( x[( length - k )], wRe ), Mpy_32_16_1( x[k], wIm ) );      /*Qx*/
#endif
                    move32();
                    move32();
                }
#ifdef OPTIMIZE_FFT_STACK
                spec[( length / 2 )].re = Mpy_32_16_1( x[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/
#else
                re[( length / 2 )] = Mpy_32_16_1( x[( length / 2 )], INV_SQRT_2_Q15 );                     /*Qx*/
#endif
                move32();
            }
            ELSE /* DST type III */
@@ -814,23 +930,42 @@ void edxt_fx(
                    const Word16 wRe = shr( cosPtr[( k * n )], 1 ); /*Q15*/
                    const Word16 wIm = shr( sinPtr[( k * n )], 1 ); /*Q15*/

#ifdef OPTIMIZE_FFT_STACK
                    spec[k].re = L_add( Mpy_32_16_1( x[( Nm1 - k )], wRe ), Mpy_32_16_1( x[( k - 1 )], wIm ) ); /*Qx*/
                    spec[k].im = L_sub( Mpy_32_16_1( x[( k - 1 )], wRe ), Mpy_32_16_1( x[( Nm1 - k )], wIm ) ); /*Qx*/
#else
                    re[k] = L_add( Mpy_32_16_1( x[( Nm1 - k )], wRe ), Mpy_32_16_1( x[( k - 1 )], wIm ) ); /*Qx*/
                    im[k] = L_sub( Mpy_32_16_1( x[( k - 1 )], wRe ), Mpy_32_16_1( x[( Nm1 - k )], wIm ) ); /*Qx*/
#endif
                    move32();
                    move32();
                }
#ifdef OPTIMIZE_FFT_STACK
                spec[( length / 2 )].re = Mpy_32_16_1( x[( Nm1 / 2 )], INV_SQRT_2_Q15 ); /*Qx*/
#else
                re[( length / 2 )] = Mpy_32_16_1( x[( Nm1 / 2 )], INV_SQRT_2_Q15 );                        /*Qx*/
#endif
                move32();
            }

#ifdef OPTIMIZE_FFT_STACK
            spec[0].re = x[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )]; /*Qx*/
            spec[0].im = spec[( length / 2 )].im = 0;
#else
            re[0] = x[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )];                                             /*Qx*/
            im[0] = im[( length / 2 )] = 0;
#endif
            move32();
            move32();
            FOR( k = ( Nm1 / 2 ); k > 0; k-- )
            {
#ifdef OPTIMIZE_FFT_STACK
                spec[( length - k )].re = spec[k].re;             /*Qx*/
                spec[( length - k )].im = L_negate( spec[k].im ); /*Qx*/
#else
                re[( length - k )] = re[k];             /*Qx*/
                im[( length - k )] = L_negate( im[k] ); /*Qx*/
#endif
                move32();
                move32();
            }
@@ -838,35 +973,63 @@ void edxt_fx(
            IF( EQ_16( length, 512 ) )
            {
                /* Scaling down re and im buffers to avoid overflow in DoRTFTn_fx if the minimum headroom is less than 4 bits */
#ifdef OPTIMIZE_FFT_STACK
                hdrm = L_norm_arr_cmplx( spec, 512 );
#else
                hdrm = s_min( L_norm_arr( re, 512 ), L_norm_arr( im, 512 ) );
#endif
                IF( LT_16( hdrm, 4 ) )
                {
                    tmp = sub( hdrm, 4 );
#ifdef OPTIMIZE_FFT_STACK
                    scale_sig32_cmplx( spec, 512, tmp );
#else
                    scale_sig32( re, 512, tmp );
                    scale_sig32( im, 512, tmp );
#endif
                }

#ifdef OPTIMIZE_FFT_STACK
                DoRTFTn_fx( NULL, NULL, spec, 512 );
#else
                DoRTFTn_fx( re, im, 512 );
#endif

                IF( LT_16( hdrm, 4 ) )
                {
                    tmp = negate( tmp );
#ifdef OPTIMIZE_FFT_STACK
                    scale_sig32_cmplx( spec, 512, tmp );
#else
                    scale_sig32( re, 512, tmp );
                    scale_sig32( im, 512, tmp );
#endif
                }
            }
            ELSE /* fft() doesn't support 512 */
            {
#ifdef OPTIMIZE_FFT_STACK
                fft_cmplx_fx( spec, length );
#else
                fft_fx( re, im, length, 1 );
#endif
            }

            FOR( k = ( Nm1 / 2 ); k >= 0; k-- ) /* post-modulation of FFT output */
            {
#ifdef OPTIMIZE_FFT_STACK
                y[2 * k] = spec[k].re; /*Qx*/
#else
                y[2 * k] = re[k];                                                                /*Qx*/
#endif
                move32();
                IF( xSign != 0 )
                {
#ifdef OPTIMIZE_FFT_STACK
                    y[( ( k * 2 ) + 1 )] = Mpy_32_16_1( spec[( Nm1 - k )].re, shl_sat( xSign, 15 ) ); /*Qx*/
#else
                    y[( ( k * 2 ) + 1 )] = Mpy_32_16_1( re[( Nm1 - k )], shl_sat( xSign, 15 ) ); /*Qx*/
#endif
                }
                ELSE
                {
+70 −0
Original line number Diff line number Diff line
@@ -6932,6 +6932,7 @@ void fft_fx(
    const Word16 s       /* i  : sign                */
)
{

    cmplx x[960];

    FOR( Word16 j = 0; j < length; j++ )
@@ -7010,6 +7011,75 @@ void fft_fx(
    return;
}


#ifdef OPTIMIZE_FFT_STACK
void fft_cmplx_fx(
    cmplx *x,           /* i/o: complex data        */
    const Word16 length /* i  : length of fft       */
)
{
    SWITCH( length )
    {
        case 20:
            fft_len20_fx( x );
            BREAK;
        case 40:
            fft_lenN( x, FFT_RotVector_640_fx, 640, 5, 8, 8, 40 );
            BREAK;
        case 64:
            fft_lenN( x, FFT_RotVector_256_fx, 256, 8, 8, 8, 64 );
            BREAK;
        case 80:
            fft_lenN( x, FFT_RotVector_640_fx, 640, 10, 8, 4, 40 );
            BREAK;
        case 100:
            fft_lenN( x, FFT_RotVector_400_fx, 400, 10, 10, 4, 40 );
            BREAK;
        case 120:
            fft_lenN( x, FFT_RotVector_960_fx, 960, 15, 8, 4, 60 );
            BREAK;
        case 128:
            fft_lenN( x, FFT_RotVector_256_fx, 256, 16, 8, 4, 64 );
            BREAK;
        case 160:
            fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 8, 2, 40 );
            BREAK;
        case 200:
            fft_lenN( x, FFT_RotVector_400_fx, 400, 20, 10, 2, 40 );
            BREAK;
        case 240:
            fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 8, 2, 60 );
            BREAK;
        case 256:
            fft_lenN( x, FFT_RotVector_256_fx, 256, 32, 8, 2, 64 );
            BREAK;
        case 320:
            fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 16, 2, 40 );
            BREAK;
        case 400:
            fft_lenN( x, FFT_RotVector_400_fx, 400, 20, 20, 2, 40 );
            BREAK;
        case 480:
            fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 16, 2, 60 );
            BREAK;
        case 600:
            fft_lenN( x, FFT_RotVector_600_fx, 600, 30, 20, 2, 60 );
            BREAK;
        case 640:
            fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 32, 2, 40 );
            BREAK;
        case 960:
            fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 32, 2, 60 );
            BREAK;
        default:
            assert( !"fft length is not supported!" );
    }

    return;
}
#endif


void rfft_fx(
    Word32 *x,           /* i/o: values Qx                   */
    const Word16 *w,     /* i  : window Q15                   */
+46 −18
Original line number Diff line number Diff line
@@ -47,13 +47,30 @@ static void cftmdl_fx( Word16 n, Word16 l, Word32 *a, const Word16 *w );
void DoRTFTn_fx(
    Word32 *x, /* i/o : real part of input and output data Q(x)      */
    Word32 *y, /* i/o : imaginary part of input and output data Q(x) */
#ifdef OPTIMIZE_FFT_STACK
    cmplx *spec, /* i/o : complex input and output data                */
#endif
    const Word16 n /* i : size of the FFT up to 1024 */
)
{

    Word16 i;
    Word32 z[2048], *pt;

#ifdef OPTIMIZE_FFT_STACK
    IF( spec != NULL )
    {
        pt = z;
        FOR( i = 0; i < n; i++ )
        {
            *pt++ = spec[i].re;
            move16();
            *pt++ = spec[i].im;
            move16();
        }
    }
    ELSE
    {
#endif
        pt = z;
        FOR( i = 0; i < n; i++ )
        {
@@ -62,6 +79,9 @@ void DoRTFTn_fx(
            *pt++ = y[i];
            move16();
        }
#ifdef OPTIMIZE_FFT_STACK
    }
#endif

    IF( EQ_16( n, 16 ) )
    {
@@ -92,6 +112,10 @@ void DoRTFTn_fx(
        assert( 0 );
    }

#ifdef OPTIMIZE_FFT_STACK
    IF( spec == NULL )
    {
#endif
        x[0] = z[0];
        move16();
        y[0] = z[1];
@@ -104,7 +128,9 @@ void DoRTFTn_fx(
            y[i] = *pt++;
            move16();
        }

#ifdef OPTIMIZE_FFT_STACK
    }
#endif
    return;
}

@@ -124,6 +150,8 @@ static void cdftForw_fx(

    /* Do FFT */
    cftfsub_fx( n, a, w );

    return;
}

/*-----------------------------------------------------------------*
+1 −0
Original line number Diff line number Diff line
@@ -95,6 +95,7 @@
#define FIX_1525_UNINIT_FORMAT_SWITCHING_DEC            /* VA: float issue 1525: fix reading of uninitialized memory in format switching at the decoder */
#define HARMONIZE_2446_CON_TCX_FX                       /* FhG: basop issue: 2446 harmonization of function con_tcx_fx() */
#define FIX_2433_ARITH_OVERFLOW_IN_QMETA_ENC            /* Nokia: Fix to convert non-converted binary operations */
#define OPTIMIZE_FFT_STACK

/* #################### End BE switches ################################## */

+23 −2
Original line number Diff line number Diff line
@@ -1303,6 +1303,14 @@ void scale_sig32(
    const Word16 exp0 /* i  : exponent: x = round(x << exp)   Qx xx exp */
);

#ifdef OPTIMIZE_FFT_STACK
void scale_sig32_cmplx(
    cmplx x[],        /* i/o: signal to scale                 Qx        */
    const Word16 lg,  /* i  : size of x[]                     Q0        */
    const Word16 exp0 /* i  : exponent: x = round(x << exp)   Qx   exp  */
);

#endif
void Scale_sig64(
    Word64 x[], /* i/o: signal to scale                 Qx        */
    Word16 len, /* i  : size of x[]                     Q0        */
@@ -4052,6 +4060,9 @@ void BASOP_rfft( Word32 *x, Word16 sizeOfFft, Word16 *scale, Word16 isign );
void DoRTFTn_fx(
    Word32 *x, /* i/o : real part of i   and output data       */
    Word32 *y, /* i/o : imaginary part of i   and output data  */
#ifdef OPTIMIZE_FFT_STACK
    cmplx *spec, /* i/o : complex input and output data          */
#endif
    const Word16 n /* i : size of the FFT up to 1024 */
);

@@ -4113,6 +4124,13 @@ void fft_fx(
    const Word16 s       /* i  : sign                */
);

#ifdef OPTIMIZE_FFT_STACK
void fft_cmplx_fx(
    cmplx *spec,        /* i/o: complex data        */
    const Word16 length /* i  : length of fft       */
);

#endif
void rfft_fx(
    Word32 *x,           /* i/o: values                    */
    const Word16 *w,     /* i  : window                    */
@@ -4130,6 +4148,9 @@ void DoRTFTn_fx_ivas(
Word16 find_guarded_bits_fx( const Word32 n );

Word16 L_norm_arr( const Word32 *arr, Word16 size );
#ifdef OPTIMIZE_FFT_STACK
Word16 L_norm_arr_cmplx( const cmplx *arr, Word16 size );
#endif
Word16 norm_arr( Word16 *arr, Word16 size );
Word16 W_norm_arr( Word64 *arr, Word16 size );

Loading