Loading lib_com/edct_fx.c +178 −15 Original line number Diff line number Diff line Loading @@ -11,7 +11,9 @@ #include "stl.h" #include "math_32.h" static Word16 get_edxt_factor( Word16 length ) /* Returns value of sqrtf(2.f/length) in Q15 */ static Word16 get_edxt_factor( const Word16 length ) /* Returns value of sqrtf(2.f/length) in Q15 */ { Word16 factor; /*Q15*/ factor = 0; Loading Loading @@ -53,8 +55,12 @@ static Word16 get_edxt_factor( Word16 length ) /* Returns value of sqrtf(2.f/len } ELSE IF( EQ_16( length, 40 ) ) { #ifdef OPTIMIZE_FFT_STACK assert( 0 ); #else factor = 7327; /*0.223 in Q15*/ move16(); #endif } ELSE IF( EQ_16( length, 960 ) ) { Loading @@ -73,33 +79,57 @@ static Word16 get_edxt_factor( Word16 length ) /* Returns value of sqrtf(2.f/len } ELSE IF( EQ_16( length, 120 ) ) { #ifdef OPTIMIZE_FFT_STACK assert( 0 ); #else factor = 4230; /*0.1290 in Q15*/ move16(); #endif } ELSE IF( EQ_16( length, 1200 ) ) { #ifdef OPTIMIZE_FFT_STACK assert( 0 ); #else factor = 1338; /*0.040 in Q15*/ move16(); #endif } ELSE IF( EQ_16( length, 800 ) ) { #ifdef OPTIMIZE_FFT_STACK assert( 0 ); #else factor = 1638; /*0.05 in Q15*/ move16(); #endif } ELSE IF( EQ_16( length, 400 ) ) { #ifdef OPTIMIZE_FFT_STACK assert( 0 ); #else factor = 2317; /*0.070 in Q15*/ move16(); #endif } ELSE IF( EQ_16( length, 200 ) ) { #ifdef OPTIMIZE_FFT_STACK assert( 0 ); #else factor = 3277; /*0.1 in Q15*/ move16(); #endif } return factor; /*Q15*/ } static Word16 const *get_edct_table( Word16 length /*Q0*/, Word16 *q ) static Word16 const *get_edct_table( const Word16 length /*Q0*/, Word16 *q ) { Word16 const *edct_table; edct_table = NULL; Loading Loading @@ -590,6 +620,7 @@ void edxt_fx( move16(); cosPtr = NULL; sinPtr = NULL; IF( EQ_16( length, 512 ) ) { cosPtr = cos_scale_tbl_512; /*Q15*/ Loading Loading @@ -641,10 +672,14 @@ void edxt_fx( } ELSE IF( EQ_16( length, 40 ) ) { #ifdef OPTIMIZE_FFT_STACK assert( 0 ); #else cosPtr = cos_scale_tbl_640; /*Q15*/ sinPtr = sin_scale_tbl_640; /*Q15*/ n = 16; move16(); #endif } ELSE IF( EQ_16( length, 960 ) ) { Loading @@ -669,38 +704,58 @@ void edxt_fx( } ELSE IF( EQ_16( length, 120 ) ) { #ifdef OPTIMIZE_FFT_STACK assert( 0 ); #else cosPtr = cos_scale_tbl_960; /*Q15*/ sinPtr = sin_scale_tbl_960; /*Q15*/ n = 8; move16(); #endif } ELSE IF( EQ_16( length, 1200 ) ) { #ifdef OPTIMIZE_FFT_STACK assert( 0 ); #else cosPtr = cos_scale_tbl_1200; /*Q15*/ sinPtr = sin_scale_tbl_1200; /*Q15*/ n = 1; move16(); #endif } ELSE IF( EQ_16( length, 800 ) ) { #ifdef OPTIMIZE_FFT_STACK assert( 0 ); #else cosPtr = cos_scale_tbl_800; /*Q15*/ sinPtr = sin_scale_tbl_800; /*Q15*/ n = 1; move16(); #endif } ELSE IF( EQ_16( length, 400 ) ) { #ifdef OPTIMIZE_FFT_STACK assert( 0 ); #else cosPtr = cos_scale_tbl_800; /*Q15*/ sinPtr = sin_scale_tbl_800; /*Q15*/ n = 2; move16(); #endif } ELSE IF( EQ_16( length, 200 ) ) { #ifdef OPTIMIZE_FFT_STACK assert( 0 ); #else cosPtr = cos_scale_tbl_800; /*Q15*/ sinPtr = sin_scale_tbl_800; /*Q15*/ n = 4; move16(); #endif } test(); Loading @@ -708,16 +763,26 @@ void edxt_fx( { const Word16 Nm1 = sub( length, 1 ); const Word16 xSign = sub( imult1616( 2, shr( kernelType, 1 ) ), 1 ); /*Q0*/ #ifdef OPTIMIZE_FFT_STACK cmplx spec[L_FRAME_MAX]; #else Word32 re[L_FRAME_PLUS]; Word32 im[L_FRAME_PLUS]; #endif IF( !synthesis ) { FOR( k = ( Nm1 / 2 ); k >= 0; k-- ) /* pre-modulation of audio input */ { #ifdef OPTIMIZE_FFT_STACK spec[k].re = x[2 * k]; /*Qx*/ spec[( Nm1 - k )].re = Mpy_32_16_1( x[( ( k * 2 ) + 1 )], shl_sat( xSign, 15 ) ); /*Qx*/ spec[k].im = spec[( Nm1 - k )].im = 0; #else re[k] = x[2 * k]; /*Qx*/ re[( Nm1 - k )] = Mpy_32_16_1( x[( ( k * 2 ) + 1 )], shl_sat( xSign, 15 ) ); /*Qx*/ im[k] = im[( Nm1 - k )] = 0; #endif move32(); move32(); move32(); Loading @@ -726,26 +791,46 @@ void edxt_fx( IF( EQ_16( length, 512 ) ) { /* Scaling down re and im buffers to avoid overflow in DoRTFTn_fx if the minimum headroom is less than 4 bits */ #ifdef OPTIMIZE_FFT_STACK hdrm = L_norm_arr_cmplx( spec, 512 ); #else hdrm = s_min( L_norm_arr( re, 512 ), L_norm_arr( im, 512 ) ); #endif IF( LT_16( hdrm, 4 ) ) { tmp = sub( hdrm, 4 ); #ifdef OPTIMIZE_FFT_STACK scale_sig32_cmplx( spec, 512, tmp ); #else scale_sig32( re, 512, tmp ); scale_sig32( im, 512, tmp ); #endif } #ifdef OPTIMIZE_FFT_STACK DoRTFTn_fx( NULL, NULL, spec, 512 ); #else DoRTFTn_fx( re, im, 512 ); #endif IF( LT_16( hdrm, 4 ) ) { tmp = negate( tmp ); #ifdef OPTIMIZE_FFT_STACK scale_sig32_cmplx( spec, 512, tmp ); #else scale_sig32( re, 512, tmp ); scale_sig32( im, 512, tmp ); #endif } } ELSE /* fft() doesn't support 512 */ { #ifdef OPTIMIZE_FFT_STACK fft_cmplx_fx( spec, length ); #else fft_fx( re, im, length, 1 ); #endif } IF( shr( kernelType, 1 ) ) Loading @@ -757,12 +842,21 @@ void edxt_fx( const Word16 wRe = cosPtr[( k * n )]; /*Q15*/ const Word16 wIm = sinPtr[( k * n )]; /*Q15*/ #ifdef OPTIMIZE_FFT_STACK y[k] /*pt 1*/ = L_add( Mpy_32_16_1( spec[k].re, wRe ), Mpy_32_16_1( spec[k].im, wIm ) ); /*Qx*/ y[( length - k )] = L_sub( Mpy_32_16_1( spec[k].re, wIm ), Mpy_32_16_1( spec[k].im, wRe ) ); /*Qx*/ #else y[k] /*pt 1*/ = L_add( Mpy_32_16_1( re[k], wRe ), Mpy_32_16_1( im[k], wIm ) ); /*Qx*/ y[( length - k )] = L_sub( Mpy_32_16_1( re[k], wIm ), Mpy_32_16_1( im[k], wRe ) ); /*Qx*/ #endif move32(); move32(); } #ifdef OPTIMIZE_FFT_STACK y[( length / 2 )] = Mpy_32_16_1( spec[( length / 2 )].re, INV_SQRT_2_Q15 ); /*Qx*/ #else y[( length / 2 )] = Mpy_32_16_1( re[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ #endif move32(); } ELSE /* forw. DST-II */ Loading @@ -774,16 +868,29 @@ void edxt_fx( const Word16 wRe = cosPtr[( k * n )]; /*Q15*/ const Word16 wIm = sinPtr[( k * n )]; /*Q15*/ #ifdef OPTIMIZE_FFT_STACK y[( Nm1 - k )] = L_add( Mpy_32_16_1( spec[k].re, wRe ), Mpy_32_16_1( spec[k].im, wIm ) ); /*Qx*/ y[k - 1] = L_sub( Mpy_32_16_1( spec[k].re, wIm ), Mpy_32_16_1( spec[k].im, wRe ) ); /*Qx*/ #else y[( Nm1 - k )] = L_add( Mpy_32_16_1( re[k], wRe ), Mpy_32_16_1( im[k], wIm ) ); /*Qx*/ y[k - 1] = L_sub( Mpy_32_16_1( re[k], wIm ), Mpy_32_16_1( im[k], wRe ) ); /*Qx*/ #endif move32(); move32(); } #ifdef OPTIMIZE_FFT_STACK y[( Nm1 / 2 )] = Mpy_32_16_1( spec[( length / 2 )].re, INV_SQRT_2_Q15 ); /*Qx*/ #else y[( Nm1 / 2 )] = Mpy_32_16_1( re[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ #endif move32(); } #ifdef OPTIMIZE_FFT_STACK y[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )] = L_shr( spec[0].re, 1 ); /*Qx*/ #else y[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )] = L_shr( re[0], 1 ); /*Qx*/ #endif move32(); } ELSE /* inverse II = III */ Loading @@ -797,12 +904,21 @@ void edxt_fx( const Word16 wRe = shr( cosPtr[imult1616( k, n )], 1 ); const Word16 wIm = shr( sinPtr[imult1616( k, n )], 1 ); #ifdef OPTIMIZE_FFT_STACK spec[k].re = L_add( Mpy_32_16_1( x[k], wRe ), Mpy_32_16_1( x[( length - k )], wIm ) ); /*Qx*/ spec[k].im = L_sub( Mpy_32_16_1( x[( length - k )], wRe ), Mpy_32_16_1( x[k], wIm ) ); /*Qx*/ #else re[k] = L_add( Mpy_32_16_1( x[k], wRe ), Mpy_32_16_1( x[( length - k )], wIm ) ); /*Qx*/ im[k] = L_sub( Mpy_32_16_1( x[( length - k )], wRe ), Mpy_32_16_1( x[k], wIm ) ); /*Qx*/ #endif move32(); move32(); } #ifdef OPTIMIZE_FFT_STACK spec[( length / 2 )].re = Mpy_32_16_1( x[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ #else re[( length / 2 )] = Mpy_32_16_1( x[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ #endif move32(); } ELSE /* DST type III */ Loading @@ -814,23 +930,42 @@ void edxt_fx( const Word16 wRe = shr( cosPtr[( k * n )], 1 ); /*Q15*/ const Word16 wIm = shr( sinPtr[( k * n )], 1 ); /*Q15*/ #ifdef OPTIMIZE_FFT_STACK spec[k].re = L_add( Mpy_32_16_1( x[( Nm1 - k )], wRe ), Mpy_32_16_1( x[( k - 1 )], wIm ) ); /*Qx*/ spec[k].im = L_sub( Mpy_32_16_1( x[( k - 1 )], wRe ), Mpy_32_16_1( x[( Nm1 - k )], wIm ) ); /*Qx*/ #else re[k] = L_add( Mpy_32_16_1( x[( Nm1 - k )], wRe ), Mpy_32_16_1( x[( k - 1 )], wIm ) ); /*Qx*/ im[k] = L_sub( Mpy_32_16_1( x[( k - 1 )], wRe ), Mpy_32_16_1( x[( Nm1 - k )], wIm ) ); /*Qx*/ #endif move32(); move32(); } #ifdef OPTIMIZE_FFT_STACK spec[( length / 2 )].re = Mpy_32_16_1( x[( Nm1 / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ #else re[( length / 2 )] = Mpy_32_16_1( x[( Nm1 / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ #endif move32(); } #ifdef OPTIMIZE_FFT_STACK spec[0].re = x[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )]; /*Qx*/ spec[0].im = spec[( length / 2 )].im = 0; #else re[0] = x[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )]; /*Qx*/ im[0] = im[( length / 2 )] = 0; #endif move32(); move32(); FOR( k = ( Nm1 / 2 ); k > 0; k-- ) { #ifdef OPTIMIZE_FFT_STACK spec[( length - k )].re = spec[k].re; /*Qx*/ spec[( length - k )].im = L_negate( spec[k].im ); /*Qx*/ #else re[( length - k )] = re[k]; /*Qx*/ im[( length - k )] = L_negate( im[k] ); /*Qx*/ #endif move32(); move32(); } Loading @@ -838,35 +973,63 @@ void edxt_fx( IF( EQ_16( length, 512 ) ) { /* Scaling down re and im buffers to avoid overflow in DoRTFTn_fx if the minimum headroom is less than 4 bits */ #ifdef OPTIMIZE_FFT_STACK hdrm = L_norm_arr_cmplx( spec, 512 ); #else hdrm = s_min( L_norm_arr( re, 512 ), L_norm_arr( im, 512 ) ); #endif IF( LT_16( hdrm, 4 ) ) { tmp = sub( hdrm, 4 ); #ifdef OPTIMIZE_FFT_STACK scale_sig32_cmplx( spec, 512, tmp ); #else scale_sig32( re, 512, tmp ); scale_sig32( im, 512, tmp ); #endif } #ifdef OPTIMIZE_FFT_STACK DoRTFTn_fx( NULL, NULL, spec, 512 ); #else DoRTFTn_fx( re, im, 512 ); #endif IF( LT_16( hdrm, 4 ) ) { tmp = negate( tmp ); #ifdef OPTIMIZE_FFT_STACK scale_sig32_cmplx( spec, 512, tmp ); #else scale_sig32( re, 512, tmp ); scale_sig32( im, 512, tmp ); #endif } } ELSE /* fft() doesn't support 512 */ { #ifdef OPTIMIZE_FFT_STACK fft_cmplx_fx( spec, length ); #else fft_fx( re, im, length, 1 ); #endif } FOR( k = ( Nm1 / 2 ); k >= 0; k-- ) /* post-modulation of FFT output */ { #ifdef OPTIMIZE_FFT_STACK y[2 * k] = spec[k].re; /*Qx*/ #else y[2 * k] = re[k]; /*Qx*/ #endif move32(); IF( xSign != 0 ) { #ifdef OPTIMIZE_FFT_STACK y[( ( k * 2 ) + 1 )] = Mpy_32_16_1( spec[( Nm1 - k )].re, shl_sat( xSign, 15 ) ); /*Qx*/ #else y[( ( k * 2 ) + 1 )] = Mpy_32_16_1( re[( Nm1 - k )], shl_sat( xSign, 15 ) ); /*Qx*/ #endif } ELSE { Loading lib_com/fft_fx.c +70 −0 Original line number Diff line number Diff line Loading @@ -6932,6 +6932,7 @@ void fft_fx( const Word16 s /* i : sign */ ) { cmplx x[960]; FOR( Word16 j = 0; j < length; j++ ) Loading Loading @@ -7010,6 +7011,75 @@ void fft_fx( return; } #ifdef OPTIMIZE_FFT_STACK void fft_cmplx_fx( cmplx *x, /* i/o: complex data */ const Word16 length /* i : length of fft */ ) { SWITCH( length ) { case 20: fft_len20_fx( x ); BREAK; case 40: fft_lenN( x, FFT_RotVector_640_fx, 640, 5, 8, 8, 40 ); BREAK; case 64: fft_lenN( x, FFT_RotVector_256_fx, 256, 8, 8, 8, 64 ); BREAK; case 80: fft_lenN( x, FFT_RotVector_640_fx, 640, 10, 8, 4, 40 ); BREAK; case 100: fft_lenN( x, FFT_RotVector_400_fx, 400, 10, 10, 4, 40 ); BREAK; case 120: fft_lenN( x, FFT_RotVector_960_fx, 960, 15, 8, 4, 60 ); BREAK; case 128: fft_lenN( x, FFT_RotVector_256_fx, 256, 16, 8, 4, 64 ); BREAK; case 160: fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 8, 2, 40 ); BREAK; case 200: fft_lenN( x, FFT_RotVector_400_fx, 400, 20, 10, 2, 40 ); BREAK; case 240: fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 8, 2, 60 ); BREAK; case 256: fft_lenN( x, FFT_RotVector_256_fx, 256, 32, 8, 2, 64 ); BREAK; case 320: fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 16, 2, 40 ); BREAK; case 400: fft_lenN( x, FFT_RotVector_400_fx, 400, 20, 20, 2, 40 ); BREAK; case 480: fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 16, 2, 60 ); BREAK; case 600: fft_lenN( x, FFT_RotVector_600_fx, 600, 30, 20, 2, 60 ); BREAK; case 640: fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 32, 2, 40 ); BREAK; case 960: fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 32, 2, 60 ); BREAK; default: assert( !"fft length is not supported!" ); } return; } #endif void rfft_fx( Word32 *x, /* i/o: values Qx */ const Word16 *w, /* i : window Q15 */ Loading lib_com/fft_fx_evs.c +46 −18 Original line number Diff line number Diff line Loading @@ -47,13 +47,30 @@ static void cftmdl_fx( Word16 n, Word16 l, Word32 *a, const Word16 *w ); void DoRTFTn_fx( Word32 *x, /* i/o : real part of input and output data Q(x) */ Word32 *y, /* i/o : imaginary part of input and output data Q(x) */ #ifdef OPTIMIZE_FFT_STACK cmplx *spec, /* i/o : complex input and output data */ #endif const Word16 n /* i : size of the FFT up to 1024 */ ) { Word16 i; Word32 z[2048], *pt; #ifdef OPTIMIZE_FFT_STACK IF( spec != NULL ) { pt = z; FOR( i = 0; i < n; i++ ) { *pt++ = spec[i].re; move16(); *pt++ = spec[i].im; move16(); } } ELSE { #endif pt = z; FOR( i = 0; i < n; i++ ) { Loading @@ -62,6 +79,9 @@ void DoRTFTn_fx( *pt++ = y[i]; move16(); } #ifdef OPTIMIZE_FFT_STACK } #endif IF( EQ_16( n, 16 ) ) { Loading Loading @@ -92,6 +112,10 @@ void DoRTFTn_fx( assert( 0 ); } #ifdef OPTIMIZE_FFT_STACK IF( spec == NULL ) { #endif x[0] = z[0]; move16(); y[0] = z[1]; Loading @@ -104,7 +128,9 @@ void DoRTFTn_fx( y[i] = *pt++; move16(); } #ifdef OPTIMIZE_FFT_STACK } #endif return; } Loading @@ -124,6 +150,8 @@ static void cdftForw_fx( /* Do FFT */ cftfsub_fx( n, a, w ); return; } /*-----------------------------------------------------------------* Loading lib_com/options.h +1 −0 Original line number Diff line number Diff line Loading @@ -95,6 +95,7 @@ #define FIX_1525_UNINIT_FORMAT_SWITCHING_DEC /* VA: float issue 1525: fix reading of uninitialized memory in format switching at the decoder */ #define HARMONIZE_2446_CON_TCX_FX /* FhG: basop issue: 2446 harmonization of function con_tcx_fx() */ #define FIX_2433_ARITH_OVERFLOW_IN_QMETA_ENC /* Nokia: Fix to convert non-converted binary operations */ #define OPTIMIZE_FFT_STACK /* #################### End BE switches ################################## */ Loading lib_com/prot_fx.h +23 −2 Original line number Diff line number Diff line Loading @@ -1303,6 +1303,14 @@ void scale_sig32( const Word16 exp0 /* i : exponent: x = round(x << exp) Qx xx exp */ ); #ifdef OPTIMIZE_FFT_STACK void scale_sig32_cmplx( cmplx x[], /* i/o: signal to scale Qx */ const Word16 lg, /* i : size of x[] Q0 */ const Word16 exp0 /* i : exponent: x = round(x << exp) Qx exp */ ); #endif void Scale_sig64( Word64 x[], /* i/o: signal to scale Qx */ Word16 len, /* i : size of x[] Q0 */ Loading Loading @@ -4052,6 +4060,9 @@ void BASOP_rfft( Word32 *x, Word16 sizeOfFft, Word16 *scale, Word16 isign ); void DoRTFTn_fx( Word32 *x, /* i/o : real part of i and output data */ Word32 *y, /* i/o : imaginary part of i and output data */ #ifdef OPTIMIZE_FFT_STACK cmplx *spec, /* i/o : complex input and output data */ #endif const Word16 n /* i : size of the FFT up to 1024 */ ); Loading Loading @@ -4113,6 +4124,13 @@ void fft_fx( const Word16 s /* i : sign */ ); #ifdef OPTIMIZE_FFT_STACK void fft_cmplx_fx( cmplx *spec, /* i/o: complex data */ const Word16 length /* i : length of fft */ ); #endif void rfft_fx( Word32 *x, /* i/o: values */ const Word16 *w, /* i : window */ Loading @@ -4130,6 +4148,9 @@ void DoRTFTn_fx_ivas( Word16 find_guarded_bits_fx( const Word32 n ); Word16 L_norm_arr( const Word32 *arr, Word16 size ); #ifdef OPTIMIZE_FFT_STACK Word16 L_norm_arr_cmplx( const cmplx *arr, Word16 size ); #endif Word16 norm_arr( Word16 *arr, Word16 size ); Word16 W_norm_arr( Word64 *arr, Word16 size ); Loading Loading
lib_com/edct_fx.c +178 −15 Original line number Diff line number Diff line Loading @@ -11,7 +11,9 @@ #include "stl.h" #include "math_32.h" static Word16 get_edxt_factor( Word16 length ) /* Returns value of sqrtf(2.f/length) in Q15 */ static Word16 get_edxt_factor( const Word16 length ) /* Returns value of sqrtf(2.f/length) in Q15 */ { Word16 factor; /*Q15*/ factor = 0; Loading Loading @@ -53,8 +55,12 @@ static Word16 get_edxt_factor( Word16 length ) /* Returns value of sqrtf(2.f/len } ELSE IF( EQ_16( length, 40 ) ) { #ifdef OPTIMIZE_FFT_STACK assert( 0 ); #else factor = 7327; /*0.223 in Q15*/ move16(); #endif } ELSE IF( EQ_16( length, 960 ) ) { Loading @@ -73,33 +79,57 @@ static Word16 get_edxt_factor( Word16 length ) /* Returns value of sqrtf(2.f/len } ELSE IF( EQ_16( length, 120 ) ) { #ifdef OPTIMIZE_FFT_STACK assert( 0 ); #else factor = 4230; /*0.1290 in Q15*/ move16(); #endif } ELSE IF( EQ_16( length, 1200 ) ) { #ifdef OPTIMIZE_FFT_STACK assert( 0 ); #else factor = 1338; /*0.040 in Q15*/ move16(); #endif } ELSE IF( EQ_16( length, 800 ) ) { #ifdef OPTIMIZE_FFT_STACK assert( 0 ); #else factor = 1638; /*0.05 in Q15*/ move16(); #endif } ELSE IF( EQ_16( length, 400 ) ) { #ifdef OPTIMIZE_FFT_STACK assert( 0 ); #else factor = 2317; /*0.070 in Q15*/ move16(); #endif } ELSE IF( EQ_16( length, 200 ) ) { #ifdef OPTIMIZE_FFT_STACK assert( 0 ); #else factor = 3277; /*0.1 in Q15*/ move16(); #endif } return factor; /*Q15*/ } static Word16 const *get_edct_table( Word16 length /*Q0*/, Word16 *q ) static Word16 const *get_edct_table( const Word16 length /*Q0*/, Word16 *q ) { Word16 const *edct_table; edct_table = NULL; Loading Loading @@ -590,6 +620,7 @@ void edxt_fx( move16(); cosPtr = NULL; sinPtr = NULL; IF( EQ_16( length, 512 ) ) { cosPtr = cos_scale_tbl_512; /*Q15*/ Loading Loading @@ -641,10 +672,14 @@ void edxt_fx( } ELSE IF( EQ_16( length, 40 ) ) { #ifdef OPTIMIZE_FFT_STACK assert( 0 ); #else cosPtr = cos_scale_tbl_640; /*Q15*/ sinPtr = sin_scale_tbl_640; /*Q15*/ n = 16; move16(); #endif } ELSE IF( EQ_16( length, 960 ) ) { Loading @@ -669,38 +704,58 @@ void edxt_fx( } ELSE IF( EQ_16( length, 120 ) ) { #ifdef OPTIMIZE_FFT_STACK assert( 0 ); #else cosPtr = cos_scale_tbl_960; /*Q15*/ sinPtr = sin_scale_tbl_960; /*Q15*/ n = 8; move16(); #endif } ELSE IF( EQ_16( length, 1200 ) ) { #ifdef OPTIMIZE_FFT_STACK assert( 0 ); #else cosPtr = cos_scale_tbl_1200; /*Q15*/ sinPtr = sin_scale_tbl_1200; /*Q15*/ n = 1; move16(); #endif } ELSE IF( EQ_16( length, 800 ) ) { #ifdef OPTIMIZE_FFT_STACK assert( 0 ); #else cosPtr = cos_scale_tbl_800; /*Q15*/ sinPtr = sin_scale_tbl_800; /*Q15*/ n = 1; move16(); #endif } ELSE IF( EQ_16( length, 400 ) ) { #ifdef OPTIMIZE_FFT_STACK assert( 0 ); #else cosPtr = cos_scale_tbl_800; /*Q15*/ sinPtr = sin_scale_tbl_800; /*Q15*/ n = 2; move16(); #endif } ELSE IF( EQ_16( length, 200 ) ) { #ifdef OPTIMIZE_FFT_STACK assert( 0 ); #else cosPtr = cos_scale_tbl_800; /*Q15*/ sinPtr = sin_scale_tbl_800; /*Q15*/ n = 4; move16(); #endif } test(); Loading @@ -708,16 +763,26 @@ void edxt_fx( { const Word16 Nm1 = sub( length, 1 ); const Word16 xSign = sub( imult1616( 2, shr( kernelType, 1 ) ), 1 ); /*Q0*/ #ifdef OPTIMIZE_FFT_STACK cmplx spec[L_FRAME_MAX]; #else Word32 re[L_FRAME_PLUS]; Word32 im[L_FRAME_PLUS]; #endif IF( !synthesis ) { FOR( k = ( Nm1 / 2 ); k >= 0; k-- ) /* pre-modulation of audio input */ { #ifdef OPTIMIZE_FFT_STACK spec[k].re = x[2 * k]; /*Qx*/ spec[( Nm1 - k )].re = Mpy_32_16_1( x[( ( k * 2 ) + 1 )], shl_sat( xSign, 15 ) ); /*Qx*/ spec[k].im = spec[( Nm1 - k )].im = 0; #else re[k] = x[2 * k]; /*Qx*/ re[( Nm1 - k )] = Mpy_32_16_1( x[( ( k * 2 ) + 1 )], shl_sat( xSign, 15 ) ); /*Qx*/ im[k] = im[( Nm1 - k )] = 0; #endif move32(); move32(); move32(); Loading @@ -726,26 +791,46 @@ void edxt_fx( IF( EQ_16( length, 512 ) ) { /* Scaling down re and im buffers to avoid overflow in DoRTFTn_fx if the minimum headroom is less than 4 bits */ #ifdef OPTIMIZE_FFT_STACK hdrm = L_norm_arr_cmplx( spec, 512 ); #else hdrm = s_min( L_norm_arr( re, 512 ), L_norm_arr( im, 512 ) ); #endif IF( LT_16( hdrm, 4 ) ) { tmp = sub( hdrm, 4 ); #ifdef OPTIMIZE_FFT_STACK scale_sig32_cmplx( spec, 512, tmp ); #else scale_sig32( re, 512, tmp ); scale_sig32( im, 512, tmp ); #endif } #ifdef OPTIMIZE_FFT_STACK DoRTFTn_fx( NULL, NULL, spec, 512 ); #else DoRTFTn_fx( re, im, 512 ); #endif IF( LT_16( hdrm, 4 ) ) { tmp = negate( tmp ); #ifdef OPTIMIZE_FFT_STACK scale_sig32_cmplx( spec, 512, tmp ); #else scale_sig32( re, 512, tmp ); scale_sig32( im, 512, tmp ); #endif } } ELSE /* fft() doesn't support 512 */ { #ifdef OPTIMIZE_FFT_STACK fft_cmplx_fx( spec, length ); #else fft_fx( re, im, length, 1 ); #endif } IF( shr( kernelType, 1 ) ) Loading @@ -757,12 +842,21 @@ void edxt_fx( const Word16 wRe = cosPtr[( k * n )]; /*Q15*/ const Word16 wIm = sinPtr[( k * n )]; /*Q15*/ #ifdef OPTIMIZE_FFT_STACK y[k] /*pt 1*/ = L_add( Mpy_32_16_1( spec[k].re, wRe ), Mpy_32_16_1( spec[k].im, wIm ) ); /*Qx*/ y[( length - k )] = L_sub( Mpy_32_16_1( spec[k].re, wIm ), Mpy_32_16_1( spec[k].im, wRe ) ); /*Qx*/ #else y[k] /*pt 1*/ = L_add( Mpy_32_16_1( re[k], wRe ), Mpy_32_16_1( im[k], wIm ) ); /*Qx*/ y[( length - k )] = L_sub( Mpy_32_16_1( re[k], wIm ), Mpy_32_16_1( im[k], wRe ) ); /*Qx*/ #endif move32(); move32(); } #ifdef OPTIMIZE_FFT_STACK y[( length / 2 )] = Mpy_32_16_1( spec[( length / 2 )].re, INV_SQRT_2_Q15 ); /*Qx*/ #else y[( length / 2 )] = Mpy_32_16_1( re[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ #endif move32(); } ELSE /* forw. DST-II */ Loading @@ -774,16 +868,29 @@ void edxt_fx( const Word16 wRe = cosPtr[( k * n )]; /*Q15*/ const Word16 wIm = sinPtr[( k * n )]; /*Q15*/ #ifdef OPTIMIZE_FFT_STACK y[( Nm1 - k )] = L_add( Mpy_32_16_1( spec[k].re, wRe ), Mpy_32_16_1( spec[k].im, wIm ) ); /*Qx*/ y[k - 1] = L_sub( Mpy_32_16_1( spec[k].re, wIm ), Mpy_32_16_1( spec[k].im, wRe ) ); /*Qx*/ #else y[( Nm1 - k )] = L_add( Mpy_32_16_1( re[k], wRe ), Mpy_32_16_1( im[k], wIm ) ); /*Qx*/ y[k - 1] = L_sub( Mpy_32_16_1( re[k], wIm ), Mpy_32_16_1( im[k], wRe ) ); /*Qx*/ #endif move32(); move32(); } #ifdef OPTIMIZE_FFT_STACK y[( Nm1 / 2 )] = Mpy_32_16_1( spec[( length / 2 )].re, INV_SQRT_2_Q15 ); /*Qx*/ #else y[( Nm1 / 2 )] = Mpy_32_16_1( re[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ #endif move32(); } #ifdef OPTIMIZE_FFT_STACK y[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )] = L_shr( spec[0].re, 1 ); /*Qx*/ #else y[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )] = L_shr( re[0], 1 ); /*Qx*/ #endif move32(); } ELSE /* inverse II = III */ Loading @@ -797,12 +904,21 @@ void edxt_fx( const Word16 wRe = shr( cosPtr[imult1616( k, n )], 1 ); const Word16 wIm = shr( sinPtr[imult1616( k, n )], 1 ); #ifdef OPTIMIZE_FFT_STACK spec[k].re = L_add( Mpy_32_16_1( x[k], wRe ), Mpy_32_16_1( x[( length - k )], wIm ) ); /*Qx*/ spec[k].im = L_sub( Mpy_32_16_1( x[( length - k )], wRe ), Mpy_32_16_1( x[k], wIm ) ); /*Qx*/ #else re[k] = L_add( Mpy_32_16_1( x[k], wRe ), Mpy_32_16_1( x[( length - k )], wIm ) ); /*Qx*/ im[k] = L_sub( Mpy_32_16_1( x[( length - k )], wRe ), Mpy_32_16_1( x[k], wIm ) ); /*Qx*/ #endif move32(); move32(); } #ifdef OPTIMIZE_FFT_STACK spec[( length / 2 )].re = Mpy_32_16_1( x[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ #else re[( length / 2 )] = Mpy_32_16_1( x[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ #endif move32(); } ELSE /* DST type III */ Loading @@ -814,23 +930,42 @@ void edxt_fx( const Word16 wRe = shr( cosPtr[( k * n )], 1 ); /*Q15*/ const Word16 wIm = shr( sinPtr[( k * n )], 1 ); /*Q15*/ #ifdef OPTIMIZE_FFT_STACK spec[k].re = L_add( Mpy_32_16_1( x[( Nm1 - k )], wRe ), Mpy_32_16_1( x[( k - 1 )], wIm ) ); /*Qx*/ spec[k].im = L_sub( Mpy_32_16_1( x[( k - 1 )], wRe ), Mpy_32_16_1( x[( Nm1 - k )], wIm ) ); /*Qx*/ #else re[k] = L_add( Mpy_32_16_1( x[( Nm1 - k )], wRe ), Mpy_32_16_1( x[( k - 1 )], wIm ) ); /*Qx*/ im[k] = L_sub( Mpy_32_16_1( x[( k - 1 )], wRe ), Mpy_32_16_1( x[( Nm1 - k )], wIm ) ); /*Qx*/ #endif move32(); move32(); } #ifdef OPTIMIZE_FFT_STACK spec[( length / 2 )].re = Mpy_32_16_1( x[( Nm1 / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ #else re[( length / 2 )] = Mpy_32_16_1( x[( Nm1 / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ #endif move32(); } #ifdef OPTIMIZE_FFT_STACK spec[0].re = x[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )]; /*Qx*/ spec[0].im = spec[( length / 2 )].im = 0; #else re[0] = x[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )]; /*Qx*/ im[0] = im[( length / 2 )] = 0; #endif move32(); move32(); FOR( k = ( Nm1 / 2 ); k > 0; k-- ) { #ifdef OPTIMIZE_FFT_STACK spec[( length - k )].re = spec[k].re; /*Qx*/ spec[( length - k )].im = L_negate( spec[k].im ); /*Qx*/ #else re[( length - k )] = re[k]; /*Qx*/ im[( length - k )] = L_negate( im[k] ); /*Qx*/ #endif move32(); move32(); } Loading @@ -838,35 +973,63 @@ void edxt_fx( IF( EQ_16( length, 512 ) ) { /* Scaling down re and im buffers to avoid overflow in DoRTFTn_fx if the minimum headroom is less than 4 bits */ #ifdef OPTIMIZE_FFT_STACK hdrm = L_norm_arr_cmplx( spec, 512 ); #else hdrm = s_min( L_norm_arr( re, 512 ), L_norm_arr( im, 512 ) ); #endif IF( LT_16( hdrm, 4 ) ) { tmp = sub( hdrm, 4 ); #ifdef OPTIMIZE_FFT_STACK scale_sig32_cmplx( spec, 512, tmp ); #else scale_sig32( re, 512, tmp ); scale_sig32( im, 512, tmp ); #endif } #ifdef OPTIMIZE_FFT_STACK DoRTFTn_fx( NULL, NULL, spec, 512 ); #else DoRTFTn_fx( re, im, 512 ); #endif IF( LT_16( hdrm, 4 ) ) { tmp = negate( tmp ); #ifdef OPTIMIZE_FFT_STACK scale_sig32_cmplx( spec, 512, tmp ); #else scale_sig32( re, 512, tmp ); scale_sig32( im, 512, tmp ); #endif } } ELSE /* fft() doesn't support 512 */ { #ifdef OPTIMIZE_FFT_STACK fft_cmplx_fx( spec, length ); #else fft_fx( re, im, length, 1 ); #endif } FOR( k = ( Nm1 / 2 ); k >= 0; k-- ) /* post-modulation of FFT output */ { #ifdef OPTIMIZE_FFT_STACK y[2 * k] = spec[k].re; /*Qx*/ #else y[2 * k] = re[k]; /*Qx*/ #endif move32(); IF( xSign != 0 ) { #ifdef OPTIMIZE_FFT_STACK y[( ( k * 2 ) + 1 )] = Mpy_32_16_1( spec[( Nm1 - k )].re, shl_sat( xSign, 15 ) ); /*Qx*/ #else y[( ( k * 2 ) + 1 )] = Mpy_32_16_1( re[( Nm1 - k )], shl_sat( xSign, 15 ) ); /*Qx*/ #endif } ELSE { Loading
lib_com/fft_fx.c +70 −0 Original line number Diff line number Diff line Loading @@ -6932,6 +6932,7 @@ void fft_fx( const Word16 s /* i : sign */ ) { cmplx x[960]; FOR( Word16 j = 0; j < length; j++ ) Loading Loading @@ -7010,6 +7011,75 @@ void fft_fx( return; } #ifdef OPTIMIZE_FFT_STACK void fft_cmplx_fx( cmplx *x, /* i/o: complex data */ const Word16 length /* i : length of fft */ ) { SWITCH( length ) { case 20: fft_len20_fx( x ); BREAK; case 40: fft_lenN( x, FFT_RotVector_640_fx, 640, 5, 8, 8, 40 ); BREAK; case 64: fft_lenN( x, FFT_RotVector_256_fx, 256, 8, 8, 8, 64 ); BREAK; case 80: fft_lenN( x, FFT_RotVector_640_fx, 640, 10, 8, 4, 40 ); BREAK; case 100: fft_lenN( x, FFT_RotVector_400_fx, 400, 10, 10, 4, 40 ); BREAK; case 120: fft_lenN( x, FFT_RotVector_960_fx, 960, 15, 8, 4, 60 ); BREAK; case 128: fft_lenN( x, FFT_RotVector_256_fx, 256, 16, 8, 4, 64 ); BREAK; case 160: fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 8, 2, 40 ); BREAK; case 200: fft_lenN( x, FFT_RotVector_400_fx, 400, 20, 10, 2, 40 ); BREAK; case 240: fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 8, 2, 60 ); BREAK; case 256: fft_lenN( x, FFT_RotVector_256_fx, 256, 32, 8, 2, 64 ); BREAK; case 320: fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 16, 2, 40 ); BREAK; case 400: fft_lenN( x, FFT_RotVector_400_fx, 400, 20, 20, 2, 40 ); BREAK; case 480: fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 16, 2, 60 ); BREAK; case 600: fft_lenN( x, FFT_RotVector_600_fx, 600, 30, 20, 2, 60 ); BREAK; case 640: fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 32, 2, 40 ); BREAK; case 960: fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 32, 2, 60 ); BREAK; default: assert( !"fft length is not supported!" ); } return; } #endif void rfft_fx( Word32 *x, /* i/o: values Qx */ const Word16 *w, /* i : window Q15 */ Loading
lib_com/fft_fx_evs.c +46 −18 Original line number Diff line number Diff line Loading @@ -47,13 +47,30 @@ static void cftmdl_fx( Word16 n, Word16 l, Word32 *a, const Word16 *w ); void DoRTFTn_fx( Word32 *x, /* i/o : real part of input and output data Q(x) */ Word32 *y, /* i/o : imaginary part of input and output data Q(x) */ #ifdef OPTIMIZE_FFT_STACK cmplx *spec, /* i/o : complex input and output data */ #endif const Word16 n /* i : size of the FFT up to 1024 */ ) { Word16 i; Word32 z[2048], *pt; #ifdef OPTIMIZE_FFT_STACK IF( spec != NULL ) { pt = z; FOR( i = 0; i < n; i++ ) { *pt++ = spec[i].re; move16(); *pt++ = spec[i].im; move16(); } } ELSE { #endif pt = z; FOR( i = 0; i < n; i++ ) { Loading @@ -62,6 +79,9 @@ void DoRTFTn_fx( *pt++ = y[i]; move16(); } #ifdef OPTIMIZE_FFT_STACK } #endif IF( EQ_16( n, 16 ) ) { Loading Loading @@ -92,6 +112,10 @@ void DoRTFTn_fx( assert( 0 ); } #ifdef OPTIMIZE_FFT_STACK IF( spec == NULL ) { #endif x[0] = z[0]; move16(); y[0] = z[1]; Loading @@ -104,7 +128,9 @@ void DoRTFTn_fx( y[i] = *pt++; move16(); } #ifdef OPTIMIZE_FFT_STACK } #endif return; } Loading @@ -124,6 +150,8 @@ static void cdftForw_fx( /* Do FFT */ cftfsub_fx( n, a, w ); return; } /*-----------------------------------------------------------------* Loading
lib_com/options.h +1 −0 Original line number Diff line number Diff line Loading @@ -95,6 +95,7 @@ #define FIX_1525_UNINIT_FORMAT_SWITCHING_DEC /* VA: float issue 1525: fix reading of uninitialized memory in format switching at the decoder */ #define HARMONIZE_2446_CON_TCX_FX /* FhG: basop issue: 2446 harmonization of function con_tcx_fx() */ #define FIX_2433_ARITH_OVERFLOW_IN_QMETA_ENC /* Nokia: Fix to convert non-converted binary operations */ #define OPTIMIZE_FFT_STACK /* #################### End BE switches ################################## */ Loading
lib_com/prot_fx.h +23 −2 Original line number Diff line number Diff line Loading @@ -1303,6 +1303,14 @@ void scale_sig32( const Word16 exp0 /* i : exponent: x = round(x << exp) Qx xx exp */ ); #ifdef OPTIMIZE_FFT_STACK void scale_sig32_cmplx( cmplx x[], /* i/o: signal to scale Qx */ const Word16 lg, /* i : size of x[] Q0 */ const Word16 exp0 /* i : exponent: x = round(x << exp) Qx exp */ ); #endif void Scale_sig64( Word64 x[], /* i/o: signal to scale Qx */ Word16 len, /* i : size of x[] Q0 */ Loading Loading @@ -4052,6 +4060,9 @@ void BASOP_rfft( Word32 *x, Word16 sizeOfFft, Word16 *scale, Word16 isign ); void DoRTFTn_fx( Word32 *x, /* i/o : real part of i and output data */ Word32 *y, /* i/o : imaginary part of i and output data */ #ifdef OPTIMIZE_FFT_STACK cmplx *spec, /* i/o : complex input and output data */ #endif const Word16 n /* i : size of the FFT up to 1024 */ ); Loading Loading @@ -4113,6 +4124,13 @@ void fft_fx( const Word16 s /* i : sign */ ); #ifdef OPTIMIZE_FFT_STACK void fft_cmplx_fx( cmplx *spec, /* i/o: complex data */ const Word16 length /* i : length of fft */ ); #endif void rfft_fx( Word32 *x, /* i/o: values */ const Word16 *w, /* i : window */ Loading @@ -4130,6 +4148,9 @@ void DoRTFTn_fx_ivas( Word16 find_guarded_bits_fx( const Word32 n ); Word16 L_norm_arr( const Word32 *arr, Word16 size ); #ifdef OPTIMIZE_FFT_STACK Word16 L_norm_arr_cmplx( const cmplx *arr, Word16 size ); #endif Word16 norm_arr( Word16 *arr, Word16 size ); Word16 W_norm_arr( Word64 *arr, Word16 size ); Loading