Commit a29bd934 authored by vaclav's avatar vaclav
Browse files

Merge branch 'OPTIMIZE_FFT_STACK' into 'main'

FFT: reduce stack and harmonize functions

See merge request !2884
parents 7543c759 a5d8c358
Loading
Loading
Loading
Loading
+60 −0
Original line number Diff line number Diff line
@@ -714,6 +714,66 @@ Word16 getScaleFactor16( /* o: measured headroom in range [
    return i;
}

#ifdef OPTIMIZE_FFT_STACK
/* o: measured headroom in range [0..31], 0 if all x[i] == 0 */
Word16 getScaleFactor32_cmplx(
    cmplx *x,          /* i: array containing 32-bit data */
    const Word16 len_x /* i: length of the array to scan  */
)
{
    Word16 i, i_min, i_max, i_re, i_im;
    Word32 x_min_re, x_max_re, x_min_im, x_max_im;

    x_max_re = 0;
    move32();
    x_min_re = 0;
    move32();
    x_max_im = 0;
    move32();
    x_min_im = 0;
    move32();
    FOR( i = 0; i < len_x; i++ )
    {
        if ( x[i].re >= 0 )
            x_max_re = L_max( x_max_re, x[i].re );
        if ( x[i].re < 0 )
            x_min_re = L_min( x_min_re, x[i].re );
        if ( x[i].im >= 0 )
            x_max_im = L_max( x_max_im, x[i].im );
        if ( x[i].im < 0 )
            x_min_im = L_min( x_min_im, x[i].im );
    }

    i_max = 0x20;
    move16();
    i_min = 0x20;
    move16();

    if ( x_max_re != 0 )
        i_max = norm_l( x_max_re );

    if ( x_min_re != 0 )
        i_min = norm_l( x_min_re );

    i_re = s_and( s_min( i_max, i_min ), 0x1F );

    i_max = 0x20;
    move16();
    i_min = 0x20;
    move16();

    if ( x_max_im != 0 )
        i_max = norm_l( x_max_im );

    if ( x_min_im != 0 )
        i_min = norm_l( x_min_im );

    i_im = s_and( s_min( i_max, i_min ), 0x1F );

    return s_min( i_re, i_im );
}
#endif


/********************************************************************/
/*!
+12 −3
Original line number Diff line number Diff line
@@ -216,10 +216,19 @@ void BASOP_Util_Sqrt_InvSqrt_MantExp( Word16 mantissa, /*!< mantissa */
    and   -32768 <= x <= -16384 for negative x
*/

Word16 getScaleFactor16(                       /* o: measured headroom in range [0..15], 0 if all x[i] == 0 */
/* o: measured headroom in range [0..15], 0 if all x[i] == 0 */
Word16 getScaleFactor16(
    const Word16 *x,      /* i: array containing 16-bit data */
    const Word16 len_x ); /* i: length of the array to scan  */

#ifdef OPTIMIZE_FFT_STACK
/* o: measured headroom in range [0..31], 0 if all x[i] == 0 */
Word16 getScaleFactor32_cmplx(
    cmplx *x,          /* i: array containing 32-bit data */
    const Word16 len_x /* i: length of the array to scan  */
);
#endif

/********************************************************************/
/*!
  \brief   Calculates the scalefactor needed to normalize input array
+316 −26

File changed.

Preview size limit exceeded, changes collapsed.

+130 −2
Original line number Diff line number Diff line
@@ -109,11 +109,13 @@ static void fft5_8( Word16 n1, Word32 *zRe, Word32 *zIm, const Word16 *Idx );
static void fft4_5( Word32 *x, Word32 *y, const Word16 *Idx );
static void fft5_4( Word16 n1, Word32 *zRe, Word32 *zIm, const Word16 *Idx );

#ifndef HARMONIZE_DCT
void DoRTFTn_fx_ivas(
    Word32 *x,     /* i/o: real part of input and output data       */
    Word32 *y,     /* i/o: imaginary part of input and output data  */
    const Word16 n /* i  : size of the FFT n=(2^k) up to 1024       */
);
#endif
/*-----------------------------------------------------------------*
 * fft15_shift2()
 * 15-point FFT with 2-point circular shift
@@ -2438,6 +2440,7 @@ static void cftmdl(
    return;
}

#ifndef HARMONIZE_DCT
static void cftbsub(
    Word16 n,       // Q0
    Word32 *a,      // Qx
@@ -2733,6 +2736,7 @@ void edct2_fx_ivas(
        }
    }
}
#endif

void DoRTFTn_fx_ivas(
    Word32 *x,     /* i/o: real part of input and output data Qx      */
@@ -2740,7 +2744,6 @@ void DoRTFTn_fx_ivas(
    const Word16 n /* i  : size of the FFT up to 1024 Q0*/
)
{

    Word16 i;
    Word32 z[2048];

@@ -6427,6 +6430,7 @@ static void fft_lenN(
 * Complex-value FFT
 *-----------------------------------------------------------------*/

#ifndef HARMONIZE_DCT
void fft_fx(
    Word32 *re,          /* i/o: real part Qx          */
    Word32 *im,          /* i/o: imag part Qx          */
@@ -6434,6 +6438,7 @@ void fft_fx(
    const Word16 s       /* i  : sign                */
)
{

    cmplx x[960];

    FOR( Word16 j = 0; j < length; j++ )
@@ -6511,6 +6516,73 @@ void fft_fx(

    return;
}
#else
void fft_fx(
    cmplx *x,           /* i/o: complex data        */
    const Word16 length /* i  : length of fft       */
)
{
    SWITCH( length )
    {
        case 20:
            fft_len20_fx( x );
            BREAK;
        case 40:
            fft_lenN( x, FFT_RotVector_640_fx, 640, 5, 8, 8, 40 );
            BREAK;
        case 64:
            fft_lenN( x, FFT_RotVector_256_fx, 256, 8, 8, 8, 64 );
            BREAK;
        case 80:
            fft_lenN( x, FFT_RotVector_640_fx, 640, 10, 8, 4, 40 );
            BREAK;
        case 100:
            fft_lenN( x, FFT_RotVector_400_fx, 400, 10, 10, 4, 40 );
            BREAK;
        case 120:
            fft_lenN( x, FFT_RotVector_960_fx, 960, 15, 8, 4, 60 );
            BREAK;
        case 128:
            fft_lenN( x, FFT_RotVector_256_fx, 256, 16, 8, 4, 64 );
            BREAK;
        case 160:
            fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 8, 2, 40 );
            BREAK;
        case 200:
            fft_lenN( x, FFT_RotVector_400_fx, 400, 20, 10, 2, 40 );
            BREAK;
        case 240:
            fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 8, 2, 60 );
            BREAK;
        case 256:
            fft_lenN( x, FFT_RotVector_256_fx, 256, 32, 8, 2, 64 );
            BREAK;
        case 320:
            fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 16, 2, 40 );
            BREAK;
        case 400:
            fft_lenN( x, FFT_RotVector_400_fx, 400, 20, 20, 2, 40 );
            BREAK;
        case 480:
            fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 16, 2, 60 );
            BREAK;
        case 600:
            fft_lenN( x, FFT_RotVector_600_fx, 600, 30, 20, 2, 60 );
            BREAK;
        case 640:
            fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 32, 2, 40 );
            BREAK;
        case 960:
            fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 32, 2, 60 );
            BREAK;
        default:
            assert( !"fft length is not supported!" );
    }

    return;
}
#endif


void rfft_fx(
    Word32 *x,           /* i/o: values Qx                   */
@@ -6522,6 +6594,9 @@ void rfft_fx(
    Word16 i, sizeOfFft2, sizeOfFft4;
    Word32 tmp, t1, t2, t3, t4;
    Word16 s1, s2;
#ifdef HARMONIZE_DCT
    cmplx spec[L_FRAME48k];
#endif

    sizeOfFft2 = shr( length, 1 );
    sizeOfFft4 = shr( length, 2 );
@@ -6592,10 +6667,43 @@ void rfft_fx(

    SWITCH( isign )
    {

        case -1:

#ifdef HARMONIZE_DCT
            FOR( i = 0; i < sizeOfFft2; i++ )
            {
                spec[i].re = x[2 * i];
                move32();
                spec[i].im = x[2 * i + 1];
                move32();
            }

            fft_fx( spec, sizeOfFft2 );

            FOR( i = 0; i < sizeOfFft4; i++ )
            {
                x[2 * i] = spec[i].re;
                move32();
                x[2 * i + 1] = spec[sizeOfFft2 - i - 1].re;
                move32();

                x[2 * i] = spec[i].im;
                move32();
                x[2 * i + 1] = L_negate( spec[sizeOfFft2 - i - 1].im );
                move32();
            }

            FOR( i = 0; i < sizeOfFft2; i++ )
            {
                x[2 * i] = spec[i].re;
                move32();
                x[2 * i + 1] = spec[i].im;
                move32();
            }
#else
            fft_fx( x, x + 1, sizeOfFft2, 2 );
#endif

            // Qx
            tmp = L_add( x[0], x[1] );
            x[1] = L_sub( x[0], x[1] ); // Qx
@@ -6651,7 +6759,27 @@ void rfft_fx(
                move32();
            }

#ifdef HARMONIZE_DCT
            FOR( i = 0; i < sizeOfFft2; i++ )
            {
                spec[i].re = x[2 * i];
                move32();
                spec[i].im = x[2 * i + 1];
                move32();
            }

            fft_fx( spec, sizeOfFft2 );

            FOR( i = 0; i < sizeOfFft2; i++ )
            {
                x[2 * i] = spec[i].re;
                move32();
                x[2 * i + 1] = spec[i].im;
                move32();
            }
#else
            fft_fx( x, x + 1, sizeOfFft2, 2 );
#endif

            FOR( i = 0; i < length; i += 2 )
            {
+73 −24
Original line number Diff line number Diff line
@@ -10,7 +10,7 @@
#include <assert.h>

/*-----------------------------------------------------------------*
 * Local functions
 * Local constants
 *-----------------------------------------------------------------*/

#define FFT3_ONE_THIRD 21845 /* 1/3 in Q16 */
@@ -19,6 +19,10 @@
#define KP951056516_16FX 2042378325 /* EDCT & EMDCT constants Q31*/
#define KP587785252_16FX 1262259213 /* EDCT & EMDCT constants Q31*/

/*-----------------------------------------------------------------*
 * Local function prototypes
 *-----------------------------------------------------------------*/

static void fft5_shift4_16fx( Word16 n1, Word16 *zRe, Word16 *zIm, const Word16 *Idx );
static void fft64_16fx( Word16 *x, Word16 *y, const Word16 *Idx );
static void fft32_5_16fx( Word16 *x, Word16 *y, const Word16 *Idx );
@@ -32,11 +36,6 @@ static void fft64_16fx( Word16 *x, Word16 *y, const Word16 *Idx );
static void fft5_32_16fx( Word16 *zRe, Word16 *zIm, const Word16 *Idx );
static void cdftForw_16fx( Word16 n, Word16 *a, const Word16 *ip, const Word32 *w );

#include "math_32.h"

/*-----------------------------------------------------------------*
 * Local functions
 *-----------------------------------------------------------------*/
static void cdftForw_fx( Word16 n, Word32 *a, const Word16 *ip, const Word16 *w );
static void bitrv2_SR_fx( Word16 n, const Word16 *ip, Word32 *a );
static void cftfsub_fx( Word16 n, Word32 *a, const Word16 *w );
@@ -44,16 +43,39 @@ static void cft1st_fx( Word16 n, Word32 *a, const Word16 *w );
static void cftmdl_fx( Word16 n, Word16 l, Word32 *a, const Word16 *w );


/*-----------------------------------------------------------------*
 * DoRTFTn_fx()
 *
 *
 *-----------------------------------------------------------------*/

void DoRTFTn_fx(
    Word32 *x, /* i/o : real part of input and output data Q(x)      */
    Word32 *y, /* i/o : imaginary part of input and output data Q(x) */
#ifdef OPTIMIZE_FFT_STACK
    cmplx *spec, /* i/o : complex input and output data                */
#endif
    const Word16 n /* i : size of the FFT up to 1024                     */
)
{

    Word16 i;
    Word32 z[2048], *pt;

#ifdef OPTIMIZE_FFT_STACK
    IF( spec != NULL )
    {
        pt = z;
        FOR( i = 0; i < n; i++ )
        {
            *pt++ = spec[i].re;
            move16();
            *pt++ = spec[i].im;
            move16();
        }
    }
    ELSE
    {
#endif
        pt = z;
        FOR( i = 0; i < n; i++ )
        {
@@ -62,6 +84,9 @@ void DoRTFTn_fx(
            *pt++ = y[i];
            move16();
        }
#ifdef OPTIMIZE_FFT_STACK
    }
#endif

    IF( EQ_16( n, 16 ) )
    {
@@ -92,6 +117,25 @@ void DoRTFTn_fx(
        assert( 0 );
    }

#ifdef OPTIMIZE_FFT_STACK
    IF( spec != NULL )
    {
        spec[0].re = z[0];
        move16();
        spec[0].im = z[1];
        move16();
        pt = &z[2];
        FOR( i = n - 1; i >= 1; i-- )
        {
            spec[i].re = *pt++;
            move16();
            spec[i].im = *pt++;
            move16();
        }
    }
    ELSE
    {
#endif
        x[0] = z[0];
        move16();
        y[0] = z[1];
@@ -104,6 +148,9 @@ void DoRTFTn_fx(
            y[i] = *pt++;
            move16();
        }
#ifdef OPTIMIZE_FFT_STACK
    }
#endif

    return;
}
@@ -124,6 +171,8 @@ static void cdftForw_fx(

    /* Do FFT */
    cftfsub_fx( n, a, w );

    return;
}

/*-----------------------------------------------------------------*
Loading