OPTIMIZE_FFT_STACK (78c80a0c) · Commits · SA4 / Audio / IVAS BASOP

lib_com/edct_fx.c

+178 −15

Original line number	Diff line number	Diff line
		@@ -11,7 +11,9 @@
		#include "stl.h"
		#include "math_32.h"

		static Word16 get_edxt_factor( Word16 length ) /* Returns value of sqrtf(2.f/length) in Q15 */

		static Word16 get_edxt_factor(
		const Word16 length ) /* Returns value of sqrtf(2.f/length) in Q15 */
		{
		Word16 factor; /Q15/
		factor = 0;
		@@ -53,8 +55,12 @@ static Word16 get_edxt_factor( Word16 length ) /* Returns value of sqrtf(2.f/len
		}
		ELSE IF( EQ_16( length, 40 ) )
		{
		#ifdef OPTIMIZE_FFT_STACK
		assert( 0 );
		#else
		factor = 7327; /0.223 in Q15/
		move16();
		#endif
		}
		ELSE IF( EQ_16( length, 960 ) )
		{
		@@ -73,33 +79,57 @@ static Word16 get_edxt_factor( Word16 length ) /* Returns value of sqrtf(2.f/len
		}
		ELSE IF( EQ_16( length, 120 ) )
		{
		#ifdef OPTIMIZE_FFT_STACK
		assert( 0 );
		#else
		factor = 4230; /0.1290 in Q15/
		move16();
		#endif
		}
		ELSE IF( EQ_16( length, 1200 ) )
		{
		#ifdef OPTIMIZE_FFT_STACK
		assert( 0 );
		#else
		factor = 1338; /0.040 in Q15/
		move16();
		#endif
		}
		ELSE IF( EQ_16( length, 800 ) )
		{
		#ifdef OPTIMIZE_FFT_STACK
		assert( 0 );
		#else
		factor = 1638; /0.05 in Q15/
		move16();
		#endif
		}
		ELSE IF( EQ_16( length, 400 ) )
		{
		#ifdef OPTIMIZE_FFT_STACK
		assert( 0 );
		#else
		factor = 2317; /0.070 in Q15/
		move16();
		#endif
		}
		ELSE IF( EQ_16( length, 200 ) )
		{
		#ifdef OPTIMIZE_FFT_STACK
		assert( 0 );
		#else
		factor = 3277; /0.1 in Q15/
		move16();
		#endif
		}

		return factor; /Q15/
		}

		static Word16 const get_edct_table( Word16 length /Q0/, Word16 q )

		static Word16 const *get_edct_table(
		const Word16 length /Q0/,
		Word16 *q )
		{
		Word16 const *edct_table;
		edct_table = NULL;
		@@ -590,6 +620,7 @@ void edxt_fx(
		move16();
		cosPtr = NULL;
		sinPtr = NULL;

		IF( EQ_16( length, 512 ) )
		{
		cosPtr = cos_scale_tbl_512; /Q15/
		@@ -641,10 +672,14 @@ void edxt_fx(
		}
		ELSE IF( EQ_16( length, 40 ) )
		{
		#ifdef OPTIMIZE_FFT_STACK
		assert( 0 );
		#else
		cosPtr = cos_scale_tbl_640; /Q15/
		sinPtr = sin_scale_tbl_640; /Q15/
		n = 16;
		move16();
		#endif
		}
		ELSE IF( EQ_16( length, 960 ) )
		{
		@@ -669,38 +704,58 @@ void edxt_fx(
		}
		ELSE IF( EQ_16( length, 120 ) )
		{
		#ifdef OPTIMIZE_FFT_STACK
		assert( 0 );
		#else
		cosPtr = cos_scale_tbl_960; /Q15/
		sinPtr = sin_scale_tbl_960; /Q15/
		n = 8;
		move16();
		#endif
		}
		ELSE IF( EQ_16( length, 1200 ) )
		{
		#ifdef OPTIMIZE_FFT_STACK
		assert( 0 );
		#else
		cosPtr = cos_scale_tbl_1200; /Q15/
		sinPtr = sin_scale_tbl_1200; /Q15/
		n = 1;
		move16();
		#endif
		}
		ELSE IF( EQ_16( length, 800 ) )
		{
		#ifdef OPTIMIZE_FFT_STACK
		assert( 0 );
		#else
		cosPtr = cos_scale_tbl_800; /Q15/
		sinPtr = sin_scale_tbl_800; /Q15/
		n = 1;
		move16();
		#endif
		}
		ELSE IF( EQ_16( length, 400 ) )
		{
		#ifdef OPTIMIZE_FFT_STACK
		assert( 0 );
		#else
		cosPtr = cos_scale_tbl_800; /Q15/
		sinPtr = sin_scale_tbl_800; /Q15/
		n = 2;
		move16();
		#endif
		}
		ELSE IF( EQ_16( length, 200 ) )
		{
		#ifdef OPTIMIZE_FFT_STACK
		assert( 0 );
		#else
		cosPtr = cos_scale_tbl_800; /Q15/
		sinPtr = sin_scale_tbl_800; /Q15/
		n = 4;
		move16();
		#endif
		}

		test();
		@@ -708,16 +763,26 @@ void edxt_fx(
		{
		const Word16 Nm1 = sub( length, 1 );
		const Word16 xSign = sub( imult1616( 2, shr( kernelType, 1 ) ), 1 ); /Q0/
		#ifdef OPTIMIZE_FFT_STACK
		cmplx spec[L_FRAME_MAX];
		#else
		Word32 re[L_FRAME_PLUS];
		Word32 im[L_FRAME_PLUS];
		#endif

		IF( !synthesis )
		{
		FOR( k = ( Nm1 / 2 ); k >= 0; k-- ) /* pre-modulation of audio input */
		{
		#ifdef OPTIMIZE_FFT_STACK
		spec[k].re = x[2 * k]; /Qx/
		spec[( Nm1 - k )].re = Mpy_32_16_1( x[( ( k * 2 ) + 1 )], shl_sat( xSign, 15 ) ); /Qx/
		spec[k].im = spec[( Nm1 - k )].im = 0;
		#else
		re[k] = x[2 * k]; /Qx/
		re[( Nm1 - k )] = Mpy_32_16_1( x[( ( k * 2 ) + 1 )], shl_sat( xSign, 15 ) ); /Qx/
		im[k] = im[( Nm1 - k )] = 0;
		#endif
		move32();
		move32();
		move32();
		@@ -726,26 +791,46 @@ void edxt_fx(
		IF( EQ_16( length, 512 ) )
		{
		/* Scaling down re and im buffers to avoid overflow in DoRTFTn_fx if the minimum headroom is less than 4 bits */
		#ifdef OPTIMIZE_FFT_STACK
		hdrm = L_norm_arr_cmplx( spec, 512 );
		#else
		hdrm = s_min( L_norm_arr( re, 512 ), L_norm_arr( im, 512 ) );
		#endif
		IF( LT_16( hdrm, 4 ) )
		{
		tmp = sub( hdrm, 4 );
		#ifdef OPTIMIZE_FFT_STACK
		scale_sig32_cmplx( spec, 512, tmp );
		#else
		scale_sig32( re, 512, tmp );
		scale_sig32( im, 512, tmp );
		#endif
		}

		#ifdef OPTIMIZE_FFT_STACK
		DoRTFTn_fx( NULL, NULL, spec, 512 );
		#else
		DoRTFTn_fx( re, im, 512 );
		#endif

		IF( LT_16( hdrm, 4 ) )
		{
		tmp = negate( tmp );
		#ifdef OPTIMIZE_FFT_STACK
		scale_sig32_cmplx( spec, 512, tmp );
		#else
		scale_sig32( re, 512, tmp );
		scale_sig32( im, 512, tmp );
		#endif
		}
		}
		ELSE /* fft() doesn't support 512 */
		{
		#ifdef OPTIMIZE_FFT_STACK
		fft_cmplx_fx( spec, length );
		#else
		fft_fx( re, im, length, 1 );
		#endif
		}

		IF( shr( kernelType, 1 ) )
		@@ -757,12 +842,21 @@ void edxt_fx(
		const Word16 wRe = cosPtr[( k * n )]; /Q15/
		const Word16 wIm = sinPtr[( k * n )]; /Q15/

		#ifdef OPTIMIZE_FFT_STACK
		y[k] /pt 1/ = L_add( Mpy_32_16_1( spec[k].re, wRe ), Mpy_32_16_1( spec[k].im, wIm ) ); /Qx/
		y[( length - k )] = L_sub( Mpy_32_16_1( spec[k].re, wIm ), Mpy_32_16_1( spec[k].im, wRe ) ); /Qx/
		#else
		y[k] /pt 1/ = L_add( Mpy_32_16_1( re[k], wRe ), Mpy_32_16_1( im[k], wIm ) ); /Qx/
		y[( length - k )] = L_sub( Mpy_32_16_1( re[k], wIm ), Mpy_32_16_1( im[k], wRe ) ); /Qx/
		#endif
		move32();
		move32();
		}
		#ifdef OPTIMIZE_FFT_STACK
		y[( length / 2 )] = Mpy_32_16_1( spec[( length / 2 )].re, INV_SQRT_2_Q15 ); /Qx/
		#else
		y[( length / 2 )] = Mpy_32_16_1( re[( length / 2 )], INV_SQRT_2_Q15 ); /Qx/
		#endif
		move32();
		}
		ELSE /* forw. DST-II */
		@@ -774,16 +868,29 @@ void edxt_fx(
		const Word16 wRe = cosPtr[( k * n )]; /Q15/
		const Word16 wIm = sinPtr[( k * n )]; /Q15/

		#ifdef OPTIMIZE_FFT_STACK
		y[( Nm1 - k )] = L_add( Mpy_32_16_1( spec[k].re, wRe ), Mpy_32_16_1( spec[k].im, wIm ) ); /Qx/
		y[k - 1] = L_sub( Mpy_32_16_1( spec[k].re, wIm ), Mpy_32_16_1( spec[k].im, wRe ) ); /Qx/
		#else
		y[( Nm1 - k )] = L_add( Mpy_32_16_1( re[k], wRe ), Mpy_32_16_1( im[k], wIm ) ); /Qx/
		y[k - 1] = L_sub( Mpy_32_16_1( re[k], wIm ), Mpy_32_16_1( im[k], wRe ) ); /Qx/
		#endif
		move32();
		move32();
		}
		#ifdef OPTIMIZE_FFT_STACK
		y[( Nm1 / 2 )] = Mpy_32_16_1( spec[( length / 2 )].re, INV_SQRT_2_Q15 ); /Qx/
		#else
		y[( Nm1 / 2 )] = Mpy_32_16_1( re[( length / 2 )], INV_SQRT_2_Q15 ); /Qx/
		#endif
		move32();
		}

		#ifdef OPTIMIZE_FFT_STACK
		y[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )] = L_shr( spec[0].re, 1 ); /Qx/
		#else
		y[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )] = L_shr( re[0], 1 ); /Qx/
		#endif
		move32();
		}
		ELSE /* inverse II = III */
		@@ -797,12 +904,21 @@ void edxt_fx(
		const Word16 wRe = shr( cosPtr[imult1616( k, n )], 1 );
		const Word16 wIm = shr( sinPtr[imult1616( k, n )], 1 );

		#ifdef OPTIMIZE_FFT_STACK
		spec[k].re = L_add( Mpy_32_16_1( x[k], wRe ), Mpy_32_16_1( x[( length - k )], wIm ) ); /Qx/
		spec[k].im = L_sub( Mpy_32_16_1( x[( length - k )], wRe ), Mpy_32_16_1( x[k], wIm ) ); /Qx/
		#else
		re[k] = L_add( Mpy_32_16_1( x[k], wRe ), Mpy_32_16_1( x[( length - k )], wIm ) ); /Qx/
		im[k] = L_sub( Mpy_32_16_1( x[( length - k )], wRe ), Mpy_32_16_1( x[k], wIm ) ); /Qx/
		#endif
		move32();
		move32();
		}
		#ifdef OPTIMIZE_FFT_STACK
		spec[( length / 2 )].re = Mpy_32_16_1( x[( length / 2 )], INV_SQRT_2_Q15 ); /Qx/
		#else
		re[( length / 2 )] = Mpy_32_16_1( x[( length / 2 )], INV_SQRT_2_Q15 ); /Qx/
		#endif
		move32();
		}
		ELSE /* DST type III */
		@@ -814,23 +930,42 @@ void edxt_fx(
		const Word16 wRe = shr( cosPtr[( k * n )], 1 ); /Q15/
		const Word16 wIm = shr( sinPtr[( k * n )], 1 ); /Q15/

		#ifdef OPTIMIZE_FFT_STACK
		spec[k].re = L_add( Mpy_32_16_1( x[( Nm1 - k )], wRe ), Mpy_32_16_1( x[( k - 1 )], wIm ) ); /Qx/
		spec[k].im = L_sub( Mpy_32_16_1( x[( k - 1 )], wRe ), Mpy_32_16_1( x[( Nm1 - k )], wIm ) ); /Qx/
		#else
		re[k] = L_add( Mpy_32_16_1( x[( Nm1 - k )], wRe ), Mpy_32_16_1( x[( k - 1 )], wIm ) ); /Qx/
		im[k] = L_sub( Mpy_32_16_1( x[( k - 1 )], wRe ), Mpy_32_16_1( x[( Nm1 - k )], wIm ) ); /Qx/
		#endif
		move32();
		move32();
		}
		#ifdef OPTIMIZE_FFT_STACK
		spec[( length / 2 )].re = Mpy_32_16_1( x[( Nm1 / 2 )], INV_SQRT_2_Q15 ); /Qx/
		#else
		re[( length / 2 )] = Mpy_32_16_1( x[( Nm1 / 2 )], INV_SQRT_2_Q15 ); /Qx/
		#endif
		move32();
		}

		#ifdef OPTIMIZE_FFT_STACK
		spec[0].re = x[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )]; /Qx/
		spec[0].im = spec[( length / 2 )].im = 0;
		#else
		re[0] = x[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )]; /Qx/
		im[0] = im[( length / 2 )] = 0;
		#endif
		move32();
		move32();
		FOR( k = ( Nm1 / 2 ); k > 0; k-- )
		{
		#ifdef OPTIMIZE_FFT_STACK
		spec[( length - k )].re = spec[k].re; /Qx/
		spec[( length - k )].im = L_negate( spec[k].im ); /Qx/
		#else
		re[( length - k )] = re[k]; /Qx/
		im[( length - k )] = L_negate( im[k] ); /Qx/
		#endif
		move32();
		move32();
		}
		@@ -838,35 +973,63 @@ void edxt_fx(
		IF( EQ_16( length, 512 ) )
		{
		/* Scaling down re and im buffers to avoid overflow in DoRTFTn_fx if the minimum headroom is less than 4 bits */
		#ifdef OPTIMIZE_FFT_STACK
		hdrm = L_norm_arr_cmplx( spec, 512 );
		#else
		hdrm = s_min( L_norm_arr( re, 512 ), L_norm_arr( im, 512 ) );
		#endif
		IF( LT_16( hdrm, 4 ) )
		{
		tmp = sub( hdrm, 4 );
		#ifdef OPTIMIZE_FFT_STACK
		scale_sig32_cmplx( spec, 512, tmp );
		#else
		scale_sig32( re, 512, tmp );
		scale_sig32( im, 512, tmp );
		#endif
		}

		#ifdef OPTIMIZE_FFT_STACK
		DoRTFTn_fx( NULL, NULL, spec, 512 );
		#else
		DoRTFTn_fx( re, im, 512 );
		#endif

		IF( LT_16( hdrm, 4 ) )
		{
		tmp = negate( tmp );
		#ifdef OPTIMIZE_FFT_STACK
		scale_sig32_cmplx( spec, 512, tmp );
		#else
		scale_sig32( re, 512, tmp );
		scale_sig32( im, 512, tmp );
		#endif
		}
		}
		ELSE /* fft() doesn't support 512 */
		{
		#ifdef OPTIMIZE_FFT_STACK
		fft_cmplx_fx( spec, length );
		#else
		fft_fx( re, im, length, 1 );
		#endif
		}

		FOR( k = ( Nm1 / 2 ); k >= 0; k-- ) /* post-modulation of FFT output */
		{
		#ifdef OPTIMIZE_FFT_STACK
		y[2 * k] = spec[k].re; /Qx/
		#else
		y[2 * k] = re[k]; /Qx/
		#endif
		move32();
		IF( xSign != 0 )
		{
		#ifdef OPTIMIZE_FFT_STACK
		y[( ( k * 2 ) + 1 )] = Mpy_32_16_1( spec[( Nm1 - k )].re, shl_sat( xSign, 15 ) ); /Qx/
		#else
		y[( ( k * 2 ) + 1 )] = Mpy_32_16_1( re[( Nm1 - k )], shl_sat( xSign, 15 ) ); /Qx/
		#endif
		}
		ELSE
		{

lib_com/fft_fx.c

+70 −0

Original line number	Diff line number	Diff line
		@@ -6932,6 +6932,7 @@ void fft_fx(
		const Word16 s /* i : sign */
		)
		{

		cmplx x[960];

		FOR( Word16 j = 0; j < length; j++ )
		@@ -7010,6 +7011,75 @@ void fft_fx(
		return;
		}


		#ifdef OPTIMIZE_FFT_STACK
		void fft_cmplx_fx(
		cmplx x, / i/o: complex data */
		const Word16 length /* i : length of fft */
		)
		{
		SWITCH( length )
		{
		case 20:
		fft_len20_fx( x );
		BREAK;
		case 40:
		fft_lenN( x, FFT_RotVector_640_fx, 640, 5, 8, 8, 40 );
		BREAK;
		case 64:
		fft_lenN( x, FFT_RotVector_256_fx, 256, 8, 8, 8, 64 );
		BREAK;
		case 80:
		fft_lenN( x, FFT_RotVector_640_fx, 640, 10, 8, 4, 40 );
		BREAK;
		case 100:
		fft_lenN( x, FFT_RotVector_400_fx, 400, 10, 10, 4, 40 );
		BREAK;
		case 120:
		fft_lenN( x, FFT_RotVector_960_fx, 960, 15, 8, 4, 60 );
		BREAK;
		case 128:
		fft_lenN( x, FFT_RotVector_256_fx, 256, 16, 8, 4, 64 );
		BREAK;
		case 160:
		fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 8, 2, 40 );
		BREAK;
		case 200:
		fft_lenN( x, FFT_RotVector_400_fx, 400, 20, 10, 2, 40 );
		BREAK;
		case 240:
		fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 8, 2, 60 );
		BREAK;
		case 256:
		fft_lenN( x, FFT_RotVector_256_fx, 256, 32, 8, 2, 64 );
		BREAK;
		case 320:
		fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 16, 2, 40 );
		BREAK;
		case 400:
		fft_lenN( x, FFT_RotVector_400_fx, 400, 20, 20, 2, 40 );
		BREAK;
		case 480:
		fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 16, 2, 60 );
		BREAK;
		case 600:
		fft_lenN( x, FFT_RotVector_600_fx, 600, 30, 20, 2, 60 );
		BREAK;
		case 640:
		fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 32, 2, 40 );
		BREAK;
		case 960:
		fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 32, 2, 60 );
		BREAK;
		default:
		assert( !"fft length is not supported!" );
		}

		return;
		}
		#endif


		void rfft_fx(
		Word32 x, / i/o: values Qx */
		const Word16 w, / i : window Q15 */

lib_com/fft_fx_evs.c

+46 −18

Original line number	Diff line number	Diff line
		@@ -47,13 +47,30 @@ static void cftmdl_fx( Word16 n, Word16 l, Word32 a, const Word16 w );
		void DoRTFTn_fx(
		Word32 x, / i/o : real part of input and output data Q(x) */
		Word32 y, / i/o : imaginary part of input and output data Q(x) */
		#ifdef OPTIMIZE_FFT_STACK
		cmplx spec, / i/o : complex input and output data */
		#endif
		const Word16 n /* i : size of the FFT up to 1024 */
		)
		{

		Word16 i;
		Word32 z[2048], *pt;

		#ifdef OPTIMIZE_FFT_STACK
		IF( spec != NULL )
		{
		pt = z;
		FOR( i = 0; i < n; i++ )
		{
		*pt++ = spec[i].re;
		move16();
		*pt++ = spec[i].im;
		move16();
		}
		}
		ELSE
		{
		#endif
		pt = z;
		FOR( i = 0; i < n; i++ )
		{
		@@ -62,6 +79,9 @@ void DoRTFTn_fx(
		*pt++ = y[i];
		move16();
		}
		#ifdef OPTIMIZE_FFT_STACK
		}
		#endif

		IF( EQ_16( n, 16 ) )
		{
		@@ -92,6 +112,10 @@ void DoRTFTn_fx(
		assert( 0 );
		}

		#ifdef OPTIMIZE_FFT_STACK
		IF( spec == NULL )
		{
		#endif
		x[0] = z[0];
		move16();
		y[0] = z[1];
		@@ -104,7 +128,9 @@ void DoRTFTn_fx(
		y[i] = *pt++;
		move16();
		}

		#ifdef OPTIMIZE_FFT_STACK
		}
		#endif
		return;
		}

		@@ -124,6 +150,8 @@ static void cdftForw_fx(

		/* Do FFT */
		cftfsub_fx( n, a, w );

		return;
		}

		/-----------------------------------------------------------------

lib_com/options.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -95,6 +95,7 @@
		#define FIX_1525_UNINIT_FORMAT_SWITCHING_DEC /* VA: float issue 1525: fix reading of uninitialized memory in format switching at the decoder */
		#define HARMONIZE_2446_CON_TCX_FX /* FhG: basop issue: 2446 harmonization of function con_tcx_fx() */
		#define FIX_2433_ARITH_OVERFLOW_IN_QMETA_ENC /* Nokia: Fix to convert non-converted binary operations */
		#define OPTIMIZE_FFT_STACK

		/* #################### End BE switches ################################## */

lib_com/prot_fx.h

+23 −2

Original line number	Diff line number	Diff line
		@@ -1303,6 +1303,14 @@ void scale_sig32(
		const Word16 exp0 /* i : exponent: x = round(x << exp) Qx xx exp */
		);

		#ifdef OPTIMIZE_FFT_STACK
		void scale_sig32_cmplx(
		cmplx x[], /* i/o: signal to scale Qx */
		const Word16 lg, /* i : size of x[] Q0 */
		const Word16 exp0 /* i : exponent: x = round(x << exp) Qx exp */
		);

		#endif
		void Scale_sig64(
		Word64 x[], /* i/o: signal to scale Qx */
		Word16 len, /* i : size of x[] Q0 */
		@@ -4052,6 +4060,9 @@ void BASOP_rfft( Word32 x, Word16 sizeOfFft, Word16 scale, Word16 isign );
		void DoRTFTn_fx(
		Word32 x, / i/o : real part of i and output data */
		Word32 y, / i/o : imaginary part of i and output data */
		#ifdef OPTIMIZE_FFT_STACK
		cmplx spec, / i/o : complex input and output data */
		#endif
		const Word16 n /* i : size of the FFT up to 1024 */
		);

		@@ -4113,6 +4124,13 @@ void fft_fx(
		const Word16 s /* i : sign */
		);

		#ifdef OPTIMIZE_FFT_STACK
		void fft_cmplx_fx(
		cmplx spec, / i/o: complex data */
		const Word16 length /* i : length of fft */
		);

		#endif
		void rfft_fx(
		Word32 x, / i/o: values */
		const Word16 w, / i : window */
		@@ -4130,6 +4148,9 @@ void DoRTFTn_fx_ivas(
		Word16 find_guarded_bits_fx( const Word32 n );

		Word16 L_norm_arr( const Word32 *arr, Word16 size );
		#ifdef OPTIMIZE_FFT_STACK
		Word16 L_norm_arr_cmplx( const cmplx *arr, Word16 size );
		#endif
		Word16 norm_arr( Word16 *arr, Word16 size );
		Word16 W_norm_arr( Word64 *arr, Word16 size );