Merge branch 'OPTIMIZE_FFT_STACK' into 'main' (a29bd934) · Commits · SA4 / Audio / IVAS BASOP

lib_com/basop_util.c

+60 −0

Original line number	Diff line number	Diff line
		@@ -714,6 +714,66 @@ Word16 getScaleFactor16( /* o: measured headroom in range [
		return i;
		}

		#ifdef OPTIMIZE_FFT_STACK
		/* o: measured headroom in range [0..31], 0 if all x[i] == 0 */
		Word16 getScaleFactor32_cmplx(
		cmplx x, / i: array containing 32-bit data */
		const Word16 len_x /* i: length of the array to scan */
		)
		{
		Word16 i, i_min, i_max, i_re, i_im;
		Word32 x_min_re, x_max_re, x_min_im, x_max_im;

		x_max_re = 0;
		move32();
		x_min_re = 0;
		move32();
		x_max_im = 0;
		move32();
		x_min_im = 0;
		move32();
		FOR( i = 0; i < len_x; i++ )
		{
		if ( x[i].re >= 0 )
		x_max_re = L_max( x_max_re, x[i].re );
		if ( x[i].re < 0 )
		x_min_re = L_min( x_min_re, x[i].re );
		if ( x[i].im >= 0 )
		x_max_im = L_max( x_max_im, x[i].im );
		if ( x[i].im < 0 )
		x_min_im = L_min( x_min_im, x[i].im );
		}

		i_max = 0x20;
		move16();
		i_min = 0x20;
		move16();

		if ( x_max_re != 0 )
		i_max = norm_l( x_max_re );

		if ( x_min_re != 0 )
		i_min = norm_l( x_min_re );

		i_re = s_and( s_min( i_max, i_min ), 0x1F );

		i_max = 0x20;
		move16();
		i_min = 0x20;
		move16();

		if ( x_max_im != 0 )
		i_max = norm_l( x_max_im );

		if ( x_min_im != 0 )
		i_min = norm_l( x_min_im );

		i_im = s_and( s_min( i_max, i_min ), 0x1F );

		return s_min( i_re, i_im );
		}
		#endif


		/********************************************************************/
		/*!

lib_com/basop_util.h

+12 −3

Original line number	Diff line number	Diff line
		@@ -216,10 +216,19 @@ void BASOP_Util_Sqrt_InvSqrt_MantExp( Word16 mantissa, /!< mantissa /
		and -32768 <= x <= -16384 for negative x
		*/

		Word16 getScaleFactor16( /* o: measured headroom in range [0..15], 0 if all x[i] == 0 */
		/* o: measured headroom in range [0..15], 0 if all x[i] == 0 */
		Word16 getScaleFactor16(
		const Word16 x, / i: array containing 16-bit data */
		const Word16 len_x ); /* i: length of the array to scan */

		#ifdef OPTIMIZE_FFT_STACK
		/* o: measured headroom in range [0..31], 0 if all x[i] == 0 */
		Word16 getScaleFactor32_cmplx(
		cmplx x, / i: array containing 32-bit data */
		const Word16 len_x /* i: length of the array to scan */
		);
		#endif

		/********************************************************************/
		/*!
		\brief Calculates the scalefactor needed to normalize input array

lib_com/edct_fx.c

+316 −26

File changed.

Preview size limit exceeded, changes collapsed.

lib_com/fft_fx.c

+130 −2

Original line number	Diff line number	Diff line
		@@ -109,11 +109,13 @@ static void fft5_8( Word16 n1, Word32 zRe, Word32 zIm, const Word16 *Idx );
		static void fft4_5( Word32 x, Word32 y, const Word16 *Idx );
		static void fft5_4( Word16 n1, Word32 zRe, Word32 zIm, const Word16 *Idx );

		#ifndef HARMONIZE_DCT
		void DoRTFTn_fx_ivas(
		Word32 x, / i/o: real part of input and output data */
		Word32 y, / i/o: imaginary part of input and output data */
		const Word16 n /* i : size of the FFT n=(2^k) up to 1024 */
		);
		#endif
		/-----------------------------------------------------------------
		* fft15_shift2()
		* 15-point FFT with 2-point circular shift
		@@ -2438,6 +2440,7 @@ static void cftmdl(
		return;
		}

		#ifndef HARMONIZE_DCT
		static void cftbsub(
		Word16 n, // Q0
		Word32 *a, // Qx
		@@ -2733,6 +2736,7 @@ void edct2_fx_ivas(
		}
		}
		}
		#endif

		void DoRTFTn_fx_ivas(
		Word32 x, / i/o: real part of input and output data Qx */
		@@ -2740,7 +2744,6 @@ void DoRTFTn_fx_ivas(
		const Word16 n /* i : size of the FFT up to 1024 Q0*/
		)
		{

		Word16 i;
		Word32 z[2048];

		@@ -6427,6 +6430,7 @@ static void fft_lenN(
		* Complex-value FFT
		-----------------------------------------------------------------/

		#ifndef HARMONIZE_DCT
		void fft_fx(
		Word32 re, / i/o: real part Qx */
		Word32 im, / i/o: imag part Qx */
		@@ -6434,6 +6438,7 @@ void fft_fx(
		const Word16 s /* i : sign */
		)
		{

		cmplx x[960];

		FOR( Word16 j = 0; j < length; j++ )
		@@ -6511,6 +6516,73 @@ void fft_fx(

		return;
		}
		#else
		void fft_fx(
		cmplx x, / i/o: complex data */
		const Word16 length /* i : length of fft */
		)
		{
		SWITCH( length )
		{
		case 20:
		fft_len20_fx( x );
		BREAK;
		case 40:
		fft_lenN( x, FFT_RotVector_640_fx, 640, 5, 8, 8, 40 );
		BREAK;
		case 64:
		fft_lenN( x, FFT_RotVector_256_fx, 256, 8, 8, 8, 64 );
		BREAK;
		case 80:
		fft_lenN( x, FFT_RotVector_640_fx, 640, 10, 8, 4, 40 );
		BREAK;
		case 100:
		fft_lenN( x, FFT_RotVector_400_fx, 400, 10, 10, 4, 40 );
		BREAK;
		case 120:
		fft_lenN( x, FFT_RotVector_960_fx, 960, 15, 8, 4, 60 );
		BREAK;
		case 128:
		fft_lenN( x, FFT_RotVector_256_fx, 256, 16, 8, 4, 64 );
		BREAK;
		case 160:
		fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 8, 2, 40 );
		BREAK;
		case 200:
		fft_lenN( x, FFT_RotVector_400_fx, 400, 20, 10, 2, 40 );
		BREAK;
		case 240:
		fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 8, 2, 60 );
		BREAK;
		case 256:
		fft_lenN( x, FFT_RotVector_256_fx, 256, 32, 8, 2, 64 );
		BREAK;
		case 320:
		fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 16, 2, 40 );
		BREAK;
		case 400:
		fft_lenN( x, FFT_RotVector_400_fx, 400, 20, 20, 2, 40 );
		BREAK;
		case 480:
		fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 16, 2, 60 );
		BREAK;
		case 600:
		fft_lenN( x, FFT_RotVector_600_fx, 600, 30, 20, 2, 60 );
		BREAK;
		case 640:
		fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 32, 2, 40 );
		BREAK;
		case 960:
		fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 32, 2, 60 );
		BREAK;
		default:
		assert( !"fft length is not supported!" );
		}

		return;
		}
		#endif


		void rfft_fx(
		Word32 x, / i/o: values Qx */
		@@ -6522,6 +6594,9 @@ void rfft_fx(
		Word16 i, sizeOfFft2, sizeOfFft4;
		Word32 tmp, t1, t2, t3, t4;
		Word16 s1, s2;
		#ifdef HARMONIZE_DCT
		cmplx spec[L_FRAME48k];
		#endif

		sizeOfFft2 = shr( length, 1 );
		sizeOfFft4 = shr( length, 2 );
		@@ -6592,10 +6667,43 @@ void rfft_fx(

		SWITCH( isign )
		{

		case -1:

		#ifdef HARMONIZE_DCT
		FOR( i = 0; i < sizeOfFft2; i++ )
		{
		spec[i].re = x[2 * i];
		move32();
		spec[i].im = x[2 * i + 1];
		move32();
		}

		fft_fx( spec, sizeOfFft2 );

		FOR( i = 0; i < sizeOfFft4; i++ )
		{
		x[2 * i] = spec[i].re;
		move32();
		x[2 * i + 1] = spec[sizeOfFft2 - i - 1].re;
		move32();

		x[2 * i] = spec[i].im;
		move32();
		x[2 * i + 1] = L_negate( spec[sizeOfFft2 - i - 1].im );
		move32();
		}

		FOR( i = 0; i < sizeOfFft2; i++ )
		{
		x[2 * i] = spec[i].re;
		move32();
		x[2 * i + 1] = spec[i].im;
		move32();
		}
		#else
		fft_fx( x, x + 1, sizeOfFft2, 2 );
		#endif

		// Qx
		tmp = L_add( x[0], x[1] );
		x[1] = L_sub( x[0], x[1] ); // Qx
		@@ -6651,7 +6759,27 @@ void rfft_fx(
		move32();
		}

		#ifdef HARMONIZE_DCT
		FOR( i = 0; i < sizeOfFft2; i++ )
		{
		spec[i].re = x[2 * i];
		move32();
		spec[i].im = x[2 * i + 1];
		move32();
		}

		fft_fx( spec, sizeOfFft2 );

		FOR( i = 0; i < sizeOfFft2; i++ )
		{
		x[2 * i] = spec[i].re;
		move32();
		x[2 * i + 1] = spec[i].im;
		move32();
		}
		#else
		fft_fx( x, x + 1, sizeOfFft2, 2 );
		#endif

		FOR( i = 0; i < length; i += 2 )
		{

lib_com/fft_fx_evs.c

+73 −24

Original line number	Diff line number	Diff line
		@@ -10,7 +10,7 @@
		#include <assert.h>

		/-----------------------------------------------------------------
		* Local functions
		* Local constants
		-----------------------------------------------------------------/

		#define FFT3_ONE_THIRD 21845 /* 1/3 in Q16 */
		@@ -19,6 +19,10 @@
		#define KP951056516_16FX 2042378325 /* EDCT & EMDCT constants Q31*/
		#define KP587785252_16FX 1262259213 /* EDCT & EMDCT constants Q31*/

		/-----------------------------------------------------------------
		* Local function prototypes
		-----------------------------------------------------------------/

		static void fft5_shift4_16fx( Word16 n1, Word16 zRe, Word16 zIm, const Word16 *Idx );
		static void fft64_16fx( Word16 x, Word16 y, const Word16 *Idx );
		static void fft32_5_16fx( Word16 x, Word16 y, const Word16 *Idx );
		@@ -32,11 +36,6 @@ static void fft64_16fx( Word16 x, Word16 y, const Word16 *Idx );
		static void fft5_32_16fx( Word16 zRe, Word16 zIm, const Word16 *Idx );
		static void cdftForw_16fx( Word16 n, Word16 a, const Word16 ip, const Word32 *w );

		#include "math_32.h"

		/-----------------------------------------------------------------
		* Local functions
		-----------------------------------------------------------------/
		static void cdftForw_fx( Word16 n, Word32 a, const Word16 ip, const Word16 *w );
		static void bitrv2_SR_fx( Word16 n, const Word16 ip, Word32 a );
		static void cftfsub_fx( Word16 n, Word32 a, const Word16 w );
		@@ -44,16 +43,39 @@ static void cft1st_fx( Word16 n, Word32 a, const Word16 w );
		static void cftmdl_fx( Word16 n, Word16 l, Word32 a, const Word16 w );


		/-----------------------------------------------------------------
		* DoRTFTn_fx()
		*
		*
		-----------------------------------------------------------------/

		void DoRTFTn_fx(
		Word32 x, / i/o : real part of input and output data Q(x) */
		Word32 y, / i/o : imaginary part of input and output data Q(x) */
		#ifdef OPTIMIZE_FFT_STACK
		cmplx spec, / i/o : complex input and output data */
		#endif
		const Word16 n /* i : size of the FFT up to 1024 */
		)
		{

		Word16 i;
		Word32 z[2048], *pt;

		#ifdef OPTIMIZE_FFT_STACK
		IF( spec != NULL )
		{
		pt = z;
		FOR( i = 0; i < n; i++ )
		{
		*pt++ = spec[i].re;
		move16();
		*pt++ = spec[i].im;
		move16();
		}
		}
		ELSE
		{
		#endif
		pt = z;
		FOR( i = 0; i < n; i++ )
		{
		@@ -62,6 +84,9 @@ void DoRTFTn_fx(
		*pt++ = y[i];
		move16();
		}
		#ifdef OPTIMIZE_FFT_STACK
		}
		#endif

		IF( EQ_16( n, 16 ) )
		{
		@@ -92,6 +117,25 @@ void DoRTFTn_fx(
		assert( 0 );
		}

		#ifdef OPTIMIZE_FFT_STACK
		IF( spec != NULL )
		{
		spec[0].re = z[0];
		move16();
		spec[0].im = z[1];
		move16();
		pt = &z[2];
		FOR( i = n - 1; i >= 1; i-- )
		{
		spec[i].re = *pt++;
		move16();
		spec[i].im = *pt++;
		move16();
		}
		}
		ELSE
		{
		#endif
		x[0] = z[0];
		move16();
		y[0] = z[1];
		@@ -104,6 +148,9 @@ void DoRTFTn_fx(
		y[i] = *pt++;
		move16();
		}
		#ifdef OPTIMIZE_FFT_STACK
		}
		#endif

		return;
		}
		@@ -124,6 +171,8 @@ static void cdftForw_fx(

		/* Do FFT */
		cftfsub_fx( n, a, w );

		return;
		}

		/-----------------------------------------------------------------