diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index 551dbdeef64247b414cf3cb292d5671aed170575..73581f3729db9eae7162a814e352532ad088a0bd 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -758,10 +758,69 @@ Word16 getScaleFactor32( /* o: measured headroom in range [ i = s_and( s_min( i_max, i_min ), 0x1F ); - return i; } +#ifdef OPTIMIZE_FFT_STACK +/* o: measured headroom in range [0..31], 0 if all x[i] == 0 */ +Word16 getScaleFactor32_cmplx( + cmplx *x, /* i: array containing 32-bit data */ + const Word16 len_x /* i: length of the array to scan */ +) +{ + Word16 i, i_min, i_max, i_re, i_im; + Word32 x_min_re, x_max_re, x_min_im, x_max_im; + + x_max_re = 0; + move32(); + x_min_re = 0; + move32(); + x_max_im = 0; + move32(); + x_min_im = 0; + move32(); + FOR( i = 0; i < len_x; i++ ) + { + if ( x[i].re >= 0 ) + x_max_re = L_max( x_max_re, x[i].re ); + if ( x[i].re < 0 ) + x_min_re = L_min( x_min_re, x[i].re ); + if ( x[i].im >= 0 ) + x_max_im = L_max( x_max_im, x[i].im ); + if ( x[i].im < 0 ) + x_min_im = L_min( x_min_im, x[i].im ); + } + + i_max = 0x20; + move16(); + i_min = 0x20; + move16(); + + if ( x_max_re != 0 ) + i_max = norm_l( x_max_re ); + + if ( x_min_re != 0 ) + i_min = norm_l( x_min_re ); + + i_re = s_and( s_min( i_max, i_min ), 0x1F ); + + i_max = 0x20; + move16(); + i_min = 0x20; + move16(); + + if ( x_max_im != 0 ) + i_max = norm_l( x_max_im ); + + if ( x_min_im != 0 ) + i_min = norm_l( x_min_im ); + + i_im = s_and( s_min( i_max, i_min ), 0x1F ); + + return s_min( i_re, i_im ); +} +#endif + Word16 getScaleFactor32_copy( /* o: measured headroom in range [0..31], 0 if all x[i] == 0 */ const Word32 *x, /* i: array containing 32-bit data */ const Word32 len_x ) /* i: length of the array to scan */ diff --git a/lib_com/basop_util.h b/lib_com/basop_util.h index 4265162488781d7effb4ca756968d6353e5f778c..b2290e4539c5eb5b5211abb78e50b8bd89fac6c5 100644 --- a/lib_com/basop_util.h +++ b/lib_com/basop_util.h @@ -235,6 +235,14 @@ Word16 getScaleFactor32( const Word32 *x, /* i : array containing 32-bit data */ const Word16 len_x ); /* i : length of the array to scan */ +#ifdef OPTIMIZE_FFT_STACK +/* o: measured headroom in range [0..31], 0 if all x[i] == 0 */ +Word16 getScaleFactor32_cmplx( + cmplx *x, /* i: array containing 32-bit data */ + const Word16 len_x /* i: length of the array to scan */ +); +#endif + Word16 getScaleFactor32_copy( /* o: measured headroom in range [0..31], 0 if all x[i] == 0 */ const Word32 *x, /* i: array containing 32-bit data */ const Word32 len_x ); /* i: length of the array to scan */ diff --git a/lib_com/edct_fx.c b/lib_com/edct_fx.c index 55bc483e0fcc2558eeea1fb5a958b54318aefda6..84793d4fdd6fa5ae2cae8bde0c1a9f20e261a046 100644 --- a/lib_com/edct_fx.c +++ b/lib_com/edct_fx.c @@ -11,7 +11,9 @@ #include "stl.h" #include "math_32.h" -static Word16 get_edxt_factor( Word16 length ) /* Returns value of sqrtf(2.f/length) in Q15 */ + +static Word16 get_edxt_factor( + const Word16 length ) /* Returns value of sqrtf(2.f/length) in Q15 */ { Word16 factor; /*Q15*/ factor = 0; @@ -53,8 +55,12 @@ static Word16 get_edxt_factor( Word16 length ) /* Returns value of sqrtf(2.f/len } ELSE IF( EQ_16( length, 40 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else factor = 7327; /*0.223 in Q15*/ move16(); +#endif } ELSE IF( EQ_16( length, 960 ) ) { @@ -73,33 +79,57 @@ static Word16 get_edxt_factor( Word16 length ) /* Returns value of sqrtf(2.f/len } ELSE IF( EQ_16( length, 120 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else factor = 4230; /*0.1290 in Q15*/ move16(); +#endif } ELSE IF( EQ_16( length, 1200 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else factor = 1338; /*0.040 in Q15*/ move16(); +#endif } ELSE IF( EQ_16( length, 800 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else factor = 1638; /*0.05 in Q15*/ move16(); +#endif } ELSE IF( EQ_16( length, 400 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else factor = 2317; /*0.070 in Q15*/ move16(); +#endif } ELSE IF( EQ_16( length, 200 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else factor = 3277; /*0.1 in Q15*/ move16(); +#endif } + return factor; /*Q15*/ } -static Word16 const *get_edct_table( Word16 length /*Q0*/, Word16 *q ) + +static Word16 const *get_edct_table( + const Word16 length /*Q0*/, + Word16 *q ) { Word16 const *edct_table; edct_table = NULL; @@ -163,7 +193,7 @@ static Word16 const *get_edct_table( Word16 length /*Q0*/, Word16 *q ) /*-------------------------------------------------------------------------* * FUNCTION : edct_fx() * - * PURPOSE : DCT transform + * PURPOSE : DCT transform, 32-bit version * * INPUT ARGUMENTS : * _ (Word16) length : length @@ -177,14 +207,23 @@ void edct_fx( const Word32 *x, /* i : input signal Qq */ Word32 *y, /* o : output transform Qq */ Word16 length, /* i : length Q0*/ - Word16 *q /* i : Q value of input signal */ +#ifdef HARMONIZE_DCT + Word16 *q, /* i : Q value of input signal */ + const Word16 element_mode /* i : element mode */ +#else + Word16 *q /* i : Q value of input signal */ +#endif ) { Word16 i; Word32 re; Word32 im; const Word16 *edct_table; /*Q16 */ +#ifdef OPTIMIZE_FFT_STACK + cmplx spec[L_FRAME_PLUS / 2]; +#else Word32 complex_buf[2 * ( L_FRAME48k / 2 + 240 )]; +#endif Word32 L_tmp; Word16 tmp; Word16 len1; @@ -194,26 +233,64 @@ void edct_fx( /* Twiddling and Pre-rotate */ FOR( i = 0; i < len1; i++ ) { - L_tmp = Mult_32_16( x[2 * i], edct_table[i] ); /*Q(q+1) */ + L_tmp = Mult_32_16( x[2 * i], edct_table[i] ); /*Q(q+1) */ +#ifdef OPTIMIZE_FFT_STACK + spec[i].re = Madd_32_16( L_tmp, x[( length - ( 1 + ( i * 2 ) ) )], edct_table[( len1 - ( 1 + i ) )] ); /*Q(q+1) */ +#else complex_buf[2 * i] = Madd_32_16( L_tmp, x[( length - ( 1 + ( i * 2 ) ) )], edct_table[( len1 - ( 1 + i ) )] ); /*Q(q+1) */ +#endif move32(); L_tmp = Mult_32_16( x[( length - ( 1 + ( i * 2 ) ) )], edct_table[i] ); /*Q(q+1) */ - +#ifdef OPTIMIZE_FFT_STACK + spec[i].im = Msub_32_16( L_tmp, x[( i * 2 )], edct_table[( len1 - ( 1 + i ) )] ); /*Q(q+1) */ +#else complex_buf[( ( i * 2 ) + 1 )] = Msub_32_16( L_tmp, x[( i * 2 )], edct_table[( len1 - ( 1 + i ) )] ); /*Q(q+1) */ +#endif move32(); } - *q = sub( 15, *q ); - move16(); +#ifdef HARMONIZE_DCT + IF( element_mode == EVS_MONO ) + { +#endif + *q = sub( 15, *q ); + move16(); +#ifdef OPTIMIZE_FFT_STACK + BASOP_cfft( spec, len1, q, y ); +#else BASOP_cfft( (cmplx *) complex_buf, len1, q, y ); +#endif + + tmp = div_s( 1, length ); /*Q15 */ + tmp = round_fx( L_shl( L_mult( tmp, 19302 /*0.75f * EVS_PI in Q13*/ ), 2 ) ); /*Q15 */ +#ifdef HARMONIZE_DCT + } + ELSE + { + *q = sub( 31, *q ); + move16(); + tmp = sub( getScaleFactor32_cmplx( spec, len1 ), find_guarded_bits_fx( len1 ) ); + scale_sig32_cmplx( spec, len1, tmp ); + + fft_fx( spec, len1 ); + *q = sub( *q, tmp ); + move16(); + + tmp = div_s( 4, length ); /*Q17 */ + tmp = round_fx( L_mult( tmp, 19302 /*0.75f * EVS_PI in Q13*/ ) ); /*Q15 */ + } +#endif - tmp = div_s( 1, length ); /*Q15 */ - tmp = round_fx( L_shl( L_mult( tmp, 19302 /*0.75f * EVS_PI in Q13*/ ), 2 ) ); /*Q15 */ FOR( i = 0; i < len1; i++ ) { - re = Msub_32_16( complex_buf[2 * i], complex_buf[( ( i * 2 ) + 1 )], tmp ); /*Q(q+1) */ - im = Madd_32_16( complex_buf[( ( i * 2 ) + 1 )], complex_buf[2 * i], tmp ); /*Q(q+1) */ +#ifdef OPTIMIZE_FFT_STACK + re = Msub_32_16( spec[i].re, spec[i].im, tmp ); /*Q(q+1) */ + im = Madd_32_16( spec[i].im, spec[i].re, tmp ); /*Q(q+1) */ +#else + re = Msub_32_16( complex_buf[2 * i], complex_buf[( ( i * 2 ) + 1 )], tmp ); /*Q(q+1) */ + im = Madd_32_16( complex_buf[( ( i * 2 ) + 1 )], complex_buf[2 * i], tmp ); /*Q(q+1) */ +#endif y[2 * i] = L_add( Mult_32_16( re, edct_table[i] ), Mult_32_16( im, edct_table[( len1 - ( 1 + i ) )] ) ); /*Q(q+2)*/ move32(); y[( length - ( 1 + ( i * 2 ) ) )] = L_sub( Mult_32_16( re, edct_table[( len1 - ( 1 + i ) )] ), Mult_32_16( im, edct_table[i] ) ); /*Q(q+2)*/ @@ -222,9 +299,18 @@ void edct_fx( *q = sub( 15 + 2, *q ); move16(); +#ifdef HARMONIZE_DCT + IF( element_mode != EVS_MONO ) + { + *q = add( *q, Q16 ); + move16(); + } +#endif + return; } +#ifndef HARMONIZE_DCT void edct_ivas_fx( const Word32 *x, /* i : input signal Qq */ Word32 *y, /* o : output transform Qq */ @@ -284,8 +370,11 @@ void edct_ivas_fx( *q = sub( 31 + 2, *q ); move16(); + return; } +#endif + /*-------------------------------------------------------------------------* * FUNCTION : edst_fx() * @@ -310,7 +399,11 @@ void edst_fx( Word32 re; Word32 im; const Word16 *edct_table; /*Q16 */ +#ifdef OPTIMIZE_FFT_STACK + cmplx complex_buf[L_FRAME_PLUS / 2]; +#else Word32 complex_buf[2 * ( L_FRAME48k / 2 + 240 )]; +#endif Word32 L_tmp; Word16 tmp; Word16 len1; @@ -320,25 +413,42 @@ void edst_fx( /* Twiddling and Pre-rotate */ FOR( i = 0; i < len1; i++ ) { - L_tmp = Mult_32_16( x[( length - ( 1 + ( i * 2 ) ) )], edct_table[i] ); /*Qq+1*/ + L_tmp = Mult_32_16( x[( length - ( 1 + ( i * 2 ) ) )], edct_table[i] ); /*Qq+1*/ +#ifdef OPTIMIZE_FFT_STACK + complex_buf[i].re = Madd_32_16( L_tmp, x[2 * i], edct_table[( len1 - ( 1 + i ) )] ); /*Qq+1*/ +#else complex_buf[2 * i] = Madd_32_16( L_tmp, x[2 * i], edct_table[( len1 - ( 1 + i ) )] ); /*Qq+1*/ +#endif move32(); - L_tmp = Mult_32_16( x[2 * i], edct_table[i] ); /*Qq+1*/ + L_tmp = Mult_32_16( x[2 * i], edct_table[i] ); /*Qq+1*/ +#ifdef OPTIMIZE_FFT_STACK + complex_buf[i].im = Msub_32_16( L_tmp, x[( length - ( 1 + ( i * 2 ) ) )], edct_table[( len1 - ( 1 + i ) )] ); /*Qq+1*/ +#else complex_buf[( ( i * 2 ) + 1 )] = Msub_32_16( L_tmp, x[( length - ( 1 + ( i * 2 ) ) )], edct_table[( len1 - ( 1 + i ) )] ); /*Qq+1*/ +#endif move32(); } *q = sub( 15, *q ); move16(); +#ifdef OPTIMIZE_FFT_STACK + BASOP_cfft( complex_buf, len1, q, y ); +#else BASOP_cfft( (cmplx *) complex_buf, len1, q, y ); +#endif tmp = div_s( 1, length ); /*Q15 */ tmp = round_fx( L_shl( L_mult( tmp, 19302 /*0.75f * EVS_PI in Q13*/ ), 2 ) ); /*Q15 */ FOR( i = 0; i < len1; i++ ) { - re = Msub_32_16( complex_buf[2 * i], complex_buf[( ( i * 2 ) + 1 )], tmp ); /*Qq+1*/ - im = Madd_32_16( complex_buf[( ( i * 2 ) + 1 )], complex_buf[2 * i], tmp ); /*Qq+1*/ +#ifdef OPTIMIZE_FFT_STACK + re = Msub_32_16( complex_buf[i].re, complex_buf[i].im, tmp ); /*Qq+1*/ + im = Madd_32_16( complex_buf[i].im, complex_buf[i].re, tmp ); /*Qq+1*/ +#else + re = Msub_32_16( complex_buf[2 * i], complex_buf[( ( i * 2 ) + 1 )], tmp ); /*Qq+1*/ + im = Madd_32_16( complex_buf[( ( i * 2 ) + 1 )], complex_buf[2 * i], tmp ); /*Qq+1*/ +#endif y[2 * i] = L_add( Mult_32_16( re, edct_table[i] ), Mult_32_16( im, edct_table[( len1 - ( 1 + i ) )] ) ); /*Qq+2*/ move32(); y[( length - ( 1 + ( i * 2 ) ) )] = L_sub( Mult_32_16( im, edct_table[i] ), Mult_32_16( re, edct_table[( len1 - ( 1 + i ) )] ) ); /*Qq+2*/ @@ -350,10 +460,12 @@ void edst_fx( return; } + + /*========================================================================*/ -/* FUNCTION : edct_fx() */ +/* FUNCTION : edct_16fx() */ /*------------------------------------------------------------------------*/ -/* PURPOSE : DCT transform */ +/* PURPOSE : DCT transform, 32-bit version */ /*------------------------------------------------------------------------*/ /* INPUT ARGUMENTS : */ /* _ (Word16) length : length */ @@ -365,7 +477,6 @@ void edst_fx( /* OUTPUT ARGUMENTS : */ /* _ (Word16[]) y : output transform Qx */ /*------------------------------------------------------------------------*/ - /*------------------------------------------------------------------------*/ /* RETURN ARGUMENTS : */ /* _ None */ @@ -374,26 +485,35 @@ void edct_16fx( const Word16 *x, /* i : input signal Qx */ Word16 *y, /* o : output transform Qx */ Word16 length, /* i : length */ - Word16 bh, /* bit-headroom */ + Word16 bh /* bit-headroom */ +#ifndef HARMONIZE_DCT + , const Word16 element_mode - +#endif ) { Word16 i; Word16 re[L_FRAME48k / 2]; Word16 im[L_FRAME48k / 2]; const Word16 *edct_table = NULL; +#ifndef OPTIMIZE_FFT_STACK Word16 re2[L_FRAME48k / 2]; Word16 im2[L_FRAME48k / 2]; - +#endif Word32 L_tmp, Lacc, Lmax; +#ifdef OPTIMIZE_FFT_STACK + Word16 tmp, tmp_re, fact; +#else Word16 tmp, fact; +#endif Word16 Q_edct; Word16 Len2, i2; const Word16 *px, *pt; Word16 *py; +#ifndef HARMONIZE_DCT (void) element_mode; /*COMPLETE: some eDCT sub function are missing */ +#endif IF( EQ_16( length, L_FRAME32k ) ) { @@ -447,28 +567,48 @@ void edct_16fx( { i2 = shl( i, 1 ); - L_tmp = L_mult( x[i2], edct_table[i] ); /*Q(Qx+16) */ - Lacc = L_mac_sat( L_tmp, *px, *pt ); /*Q(Qx+16) */ + L_tmp = L_mult( x[i2], edct_table[i] ); /*Q(Qx+16) */ + Lacc = L_mac_sat( L_tmp, *px, *pt ); /*Q(Qx+16) */ +#ifdef OPTIMIZE_FFT_STACK + re[i] = round_fx_sat( L_shl_sat( Lacc, Q_edct ) ); /* Q(Qx+Q_edct) */ +#else re2[i] = round_fx_sat( L_shl_sat( Lacc, Q_edct ) ); /* Q(Qx+Q_edct) */ +#endif move16(); - L_tmp = L_mult( *px, edct_table[i] ); /*Q(Qx+16) */ - Lacc = L_msu_sat( L_tmp, x[i2], *pt ); /*Q(Qx+16) */ + L_tmp = L_mult( *px, edct_table[i] ); /*Q(Qx+16) */ + Lacc = L_msu_sat( L_tmp, x[i2], *pt ); /*Q(Qx+16) */ +#ifdef OPTIMIZE_FFT_STACK + im[i] = round_fx_sat( L_shl_sat( Lacc, Q_edct ) ); /* Q(Qx+Q_edct) */ +#else im2[i] = round_fx_sat( L_shl_sat( Lacc, Q_edct ) ); /* Q(Qx+Q_edct) */ +#endif move16(); px -= 2; pt--; } IF( EQ_16( length, L_FRAME32k ) ) { +#ifdef OPTIMIZE_FFT_STACK + DoRTFT320_16fx( re, im ); +#else DoRTFT320_16fx( re2, im2 ); +#endif } ELSE IF( EQ_16( length, L_FRAME ) ) { +#ifdef OPTIMIZE_FFT_STACK + DoRTFT128_16fx( re, im ); +#else DoRTFT128_16fx( re2, im2 ); +#endif } ELSE IF( EQ_16( length, L_FRAME16k ) ) { +#ifdef OPTIMIZE_FFT_STACK + DoRTFT160_16fx( re, im ); +#else DoRTFT160_16fx( re2, im2 ); +#endif } ELSE { @@ -478,6 +618,18 @@ void edct_16fx( fact = round_fx( L_shl( L_tmp, 2 ) ); /*Q15 */ FOR( i = 0; i < shr( length, 1 ); i++ ) { +#ifdef OPTIMIZE_FFT_STACK + tmp = mult_r( im[i], fact ); /*Q(Qx+Q_edct) */ + tmp_re = sub_sat( re[i], tmp ); /*Q(Qx+Q_edct) */ + move16(); + + tmp = mult_r( re[i], fact ); /*Q(Qx+Q_edct) */ + im[i] = add_sat( im[i], tmp ); /*Q(Qx+Q_edct) */ + move16(); + + re[i] = tmp_re; + move16(); +#else tmp = mult_r( im2[i], fact ); /*Q(Qx+Q_edct) */ re[i] = sub_sat( re2[i], tmp ); /*Q(Qx+Q_edct) */ move16(); @@ -485,6 +637,7 @@ void edct_16fx( tmp = mult_r( re2[i], fact ); /*Q(Qx+Q_edct) */ im[i] = add_sat( im2[i], tmp ); /*Q(Qx+Q_edct) */ move16(); +#endif } /* Post-rotate and obtain the output data */ @@ -537,7 +690,11 @@ void iedct_short_fx( seg_len_div4 = shr( segment_length, 2 ); /*Q0*/ seg_len_3mul_div4 = add( seg_len_div2, seg_len_div4 ); +#ifdef HARMONIZE_DCT + edct_fx( in, alias, seg_len_div2, Q, EVS_MONO ); +#else edct_fx( in, alias, seg_len_div2, Q ); +#endif FOR( i = 0; i < seg_len_div2; i++ ) { IF( alias[i] != 0 ) @@ -590,6 +747,7 @@ void edxt_fx( move16(); cosPtr = NULL; sinPtr = NULL; + IF( EQ_16( length, 512 ) ) { cosPtr = cos_scale_tbl_512; /*Q15*/ @@ -641,10 +799,14 @@ void edxt_fx( } ELSE IF( EQ_16( length, 40 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else cosPtr = cos_scale_tbl_640; /*Q15*/ sinPtr = sin_scale_tbl_640; /*Q15*/ n = 16; move16(); +#endif } ELSE IF( EQ_16( length, 960 ) ) { @@ -669,38 +831,58 @@ void edxt_fx( } ELSE IF( EQ_16( length, 120 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else cosPtr = cos_scale_tbl_960; /*Q15*/ sinPtr = sin_scale_tbl_960; /*Q15*/ n = 8; move16(); +#endif } ELSE IF( EQ_16( length, 1200 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else cosPtr = cos_scale_tbl_1200; /*Q15*/ sinPtr = sin_scale_tbl_1200; /*Q15*/ n = 1; move16(); +#endif } ELSE IF( EQ_16( length, 800 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else cosPtr = cos_scale_tbl_800; /*Q15*/ sinPtr = sin_scale_tbl_800; /*Q15*/ n = 1; move16(); +#endif } ELSE IF( EQ_16( length, 400 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else cosPtr = cos_scale_tbl_800; /*Q15*/ sinPtr = sin_scale_tbl_800; /*Q15*/ n = 2; move16(); +#endif } ELSE IF( EQ_16( length, 200 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else cosPtr = cos_scale_tbl_800; /*Q15*/ sinPtr = sin_scale_tbl_800; /*Q15*/ n = 4; move16(); +#endif } test(); @@ -708,16 +890,26 @@ void edxt_fx( { const Word16 Nm1 = sub( length, 1 ); const Word16 xSign = sub( imult1616( 2, shr( kernelType, 1 ) ), 1 ); /*Q0*/ +#ifdef OPTIMIZE_FFT_STACK + cmplx spec[L_FRAME_MAX]; +#else Word32 re[L_FRAME_PLUS]; Word32 im[L_FRAME_PLUS]; +#endif IF( !synthesis ) { FOR( k = ( Nm1 / 2 ); k >= 0; k-- ) /* pre-modulation of audio input */ { +#ifdef OPTIMIZE_FFT_STACK + spec[k].re = x[2 * k]; /*Qx*/ + spec[( Nm1 - k )].re = Mpy_32_16_1( x[( ( k * 2 ) + 1 )], shl_sat( xSign, 15 ) ); /*Qx*/ + spec[k].im = spec[( Nm1 - k )].im = 0; +#else re[k] = x[2 * k]; /*Qx*/ re[( Nm1 - k )] = Mpy_32_16_1( x[( ( k * 2 ) + 1 )], shl_sat( xSign, 15 ) ); /*Qx*/ im[k] = im[( Nm1 - k )] = 0; +#endif move32(); move32(); move32(); @@ -726,26 +918,46 @@ void edxt_fx( IF( EQ_16( length, 512 ) ) { /* Scaling down re and im buffers to avoid overflow in DoRTFTn_fx if the minimum headroom is less than 4 bits */ +#ifdef OPTIMIZE_FFT_STACK + hdrm = L_norm_arr_cmplx( spec, 512 ); +#else hdrm = s_min( L_norm_arr( re, 512 ), L_norm_arr( im, 512 ) ); +#endif IF( LT_16( hdrm, 4 ) ) { tmp = sub( hdrm, 4 ); +#ifdef OPTIMIZE_FFT_STACK + scale_sig32_cmplx( spec, 512, tmp ); +#else scale_sig32( re, 512, tmp ); scale_sig32( im, 512, tmp ); +#endif } +#ifdef OPTIMIZE_FFT_STACK + DoRTFTn_fx( NULL, NULL, spec, 512 ); +#else DoRTFTn_fx( re, im, 512 ); +#endif IF( LT_16( hdrm, 4 ) ) { tmp = negate( tmp ); +#ifdef OPTIMIZE_FFT_STACK + scale_sig32_cmplx( spec, 512, tmp ); +#else scale_sig32( re, 512, tmp ); scale_sig32( im, 512, tmp ); +#endif } } ELSE /* fft() doesn't support 512 */ { +#ifdef OPTIMIZE_FFT_STACK + fft_fx( spec, length ); +#else fft_fx( re, im, length, 1 ); +#endif } IF( shr( kernelType, 1 ) ) @@ -757,12 +969,21 @@ void edxt_fx( const Word16 wRe = cosPtr[( k * n )]; /*Q15*/ const Word16 wIm = sinPtr[( k * n )]; /*Q15*/ +#ifdef OPTIMIZE_FFT_STACK + y[k] /*pt 1*/ = L_add( Mpy_32_16_1( spec[k].re, wRe ), Mpy_32_16_1( spec[k].im, wIm ) ); /*Qx*/ + y[( length - k )] = L_sub( Mpy_32_16_1( spec[k].re, wIm ), Mpy_32_16_1( spec[k].im, wRe ) ); /*Qx*/ +#else y[k] /*pt 1*/ = L_add( Mpy_32_16_1( re[k], wRe ), Mpy_32_16_1( im[k], wIm ) ); /*Qx*/ y[( length - k )] = L_sub( Mpy_32_16_1( re[k], wIm ), Mpy_32_16_1( im[k], wRe ) ); /*Qx*/ +#endif move32(); move32(); } +#ifdef OPTIMIZE_FFT_STACK + y[( length / 2 )] = Mpy_32_16_1( spec[( length / 2 )].re, INV_SQRT_2_Q15 ); /*Qx*/ +#else y[( length / 2 )] = Mpy_32_16_1( re[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ +#endif move32(); } ELSE /* forw. DST-II */ @@ -774,16 +995,29 @@ void edxt_fx( const Word16 wRe = cosPtr[( k * n )]; /*Q15*/ const Word16 wIm = sinPtr[( k * n )]; /*Q15*/ +#ifdef OPTIMIZE_FFT_STACK + y[( Nm1 - k )] = L_add( Mpy_32_16_1( spec[k].re, wRe ), Mpy_32_16_1( spec[k].im, wIm ) ); /*Qx*/ + y[k - 1] = L_sub( Mpy_32_16_1( spec[k].re, wIm ), Mpy_32_16_1( spec[k].im, wRe ) ); /*Qx*/ +#else y[( Nm1 - k )] = L_add( Mpy_32_16_1( re[k], wRe ), Mpy_32_16_1( im[k], wIm ) ); /*Qx*/ y[k - 1] = L_sub( Mpy_32_16_1( re[k], wIm ), Mpy_32_16_1( im[k], wRe ) ); /*Qx*/ +#endif move32(); move32(); } +#ifdef OPTIMIZE_FFT_STACK + y[( Nm1 / 2 )] = Mpy_32_16_1( spec[( length / 2 )].re, INV_SQRT_2_Q15 ); /*Qx*/ +#else y[( Nm1 / 2 )] = Mpy_32_16_1( re[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ +#endif move32(); } +#ifdef OPTIMIZE_FFT_STACK + y[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )] = L_shr( spec[0].re, 1 ); /*Qx*/ +#else y[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )] = L_shr( re[0], 1 ); /*Qx*/ +#endif move32(); } ELSE /* inverse II = III */ @@ -797,12 +1031,21 @@ void edxt_fx( const Word16 wRe = shr( cosPtr[imult1616( k, n )], 1 ); const Word16 wIm = shr( sinPtr[imult1616( k, n )], 1 ); +#ifdef OPTIMIZE_FFT_STACK + spec[k].re = L_add( Mpy_32_16_1( x[k], wRe ), Mpy_32_16_1( x[( length - k )], wIm ) ); /*Qx*/ + spec[k].im = L_sub( Mpy_32_16_1( x[( length - k )], wRe ), Mpy_32_16_1( x[k], wIm ) ); /*Qx*/ +#else re[k] = L_add( Mpy_32_16_1( x[k], wRe ), Mpy_32_16_1( x[( length - k )], wIm ) ); /*Qx*/ im[k] = L_sub( Mpy_32_16_1( x[( length - k )], wRe ), Mpy_32_16_1( x[k], wIm ) ); /*Qx*/ +#endif move32(); move32(); } +#ifdef OPTIMIZE_FFT_STACK + spec[( length / 2 )].re = Mpy_32_16_1( x[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ +#else re[( length / 2 )] = Mpy_32_16_1( x[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ +#endif move32(); } ELSE /* DST type III */ @@ -814,23 +1057,42 @@ void edxt_fx( const Word16 wRe = shr( cosPtr[( k * n )], 1 ); /*Q15*/ const Word16 wIm = shr( sinPtr[( k * n )], 1 ); /*Q15*/ +#ifdef OPTIMIZE_FFT_STACK + spec[k].re = L_add( Mpy_32_16_1( x[( Nm1 - k )], wRe ), Mpy_32_16_1( x[( k - 1 )], wIm ) ); /*Qx*/ + spec[k].im = L_sub( Mpy_32_16_1( x[( k - 1 )], wRe ), Mpy_32_16_1( x[( Nm1 - k )], wIm ) ); /*Qx*/ +#else re[k] = L_add( Mpy_32_16_1( x[( Nm1 - k )], wRe ), Mpy_32_16_1( x[( k - 1 )], wIm ) ); /*Qx*/ im[k] = L_sub( Mpy_32_16_1( x[( k - 1 )], wRe ), Mpy_32_16_1( x[( Nm1 - k )], wIm ) ); /*Qx*/ +#endif move32(); move32(); } +#ifdef OPTIMIZE_FFT_STACK + spec[( length / 2 )].re = Mpy_32_16_1( x[( Nm1 / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ +#else re[( length / 2 )] = Mpy_32_16_1( x[( Nm1 / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ +#endif move32(); } +#ifdef OPTIMIZE_FFT_STACK + spec[0].re = x[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )]; /*Qx*/ + spec[0].im = spec[( length / 2 )].im = 0; +#else re[0] = x[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )]; /*Qx*/ im[0] = im[( length / 2 )] = 0; +#endif move32(); move32(); FOR( k = ( Nm1 / 2 ); k > 0; k-- ) { +#ifdef OPTIMIZE_FFT_STACK + spec[( length - k )].re = spec[k].re; /*Qx*/ + spec[( length - k )].im = L_negate( spec[k].im ); /*Qx*/ +#else re[( length - k )] = re[k]; /*Qx*/ im[( length - k )] = L_negate( im[k] ); /*Qx*/ +#endif move32(); move32(); } @@ -838,35 +1100,63 @@ void edxt_fx( IF( EQ_16( length, 512 ) ) { /* Scaling down re and im buffers to avoid overflow in DoRTFTn_fx if the minimum headroom is less than 4 bits */ +#ifdef OPTIMIZE_FFT_STACK + hdrm = L_norm_arr_cmplx( spec, 512 ); +#else hdrm = s_min( L_norm_arr( re, 512 ), L_norm_arr( im, 512 ) ); +#endif IF( LT_16( hdrm, 4 ) ) { tmp = sub( hdrm, 4 ); +#ifdef OPTIMIZE_FFT_STACK + scale_sig32_cmplx( spec, 512, tmp ); +#else scale_sig32( re, 512, tmp ); scale_sig32( im, 512, tmp ); +#endif } +#ifdef OPTIMIZE_FFT_STACK + DoRTFTn_fx( NULL, NULL, spec, 512 ); +#else DoRTFTn_fx( re, im, 512 ); +#endif IF( LT_16( hdrm, 4 ) ) { tmp = negate( tmp ); +#ifdef OPTIMIZE_FFT_STACK + scale_sig32_cmplx( spec, 512, tmp ); +#else scale_sig32( re, 512, tmp ); scale_sig32( im, 512, tmp ); +#endif } } ELSE /* fft() doesn't support 512 */ { +#ifdef OPTIMIZE_FFT_STACK + fft_fx( spec, length ); +#else fft_fx( re, im, length, 1 ); +#endif } FOR( k = ( Nm1 / 2 ); k >= 0; k-- ) /* post-modulation of FFT output */ { +#ifdef OPTIMIZE_FFT_STACK + y[2 * k] = spec[k].re; /*Qx*/ +#else y[2 * k] = re[k]; /*Qx*/ +#endif move32(); IF( xSign != 0 ) { +#ifdef OPTIMIZE_FFT_STACK + y[( ( k * 2 ) + 1 )] = Mpy_32_16_1( spec[( Nm1 - k )].re, shl_sat( xSign, 15 ) ); /*Qx*/ +#else y[( ( k * 2 ) + 1 )] = Mpy_32_16_1( re[( Nm1 - k )], shl_sat( xSign, 15 ) ); /*Qx*/ +#endif } ELSE { diff --git a/lib_com/fft_fx.c b/lib_com/fft_fx.c index 4fbb39a5e37a4aae074bc28eec326d1bdfcf7f34..18acad8651da2a3b5fd81a2b3eeddfceb357aa17 100644 --- a/lib_com/fft_fx.c +++ b/lib_com/fft_fx.c @@ -51,8 +51,6 @@ * Local constants *-----------------------------------------------------------------*/ -#define Mpy_32_xx Mpy_32_16_1 - #define FFTC( x ) WORD322WORD16( (Word32) x ) /* DCT related */ @@ -83,17 +81,19 @@ #define FFT_C165 ( FFTC( 0x30fbc54d ) ) /* FL2WORD32( 3.826834323650898e-1) COS_3PI_DIV8 Q15*/ #define FFT_C166 ( FFTC( 0xcf043ab3 ) ) /* FL2WORD32(-3.826834323650898e-1) -COS_3PI_DIV8 Q15*/ -#define SCALEFACTOR16 ( 0 ) -#define SCALEFACTOR20 ( 0 ) + /*-----------------------------------------------------------------* * Local function prototypes *-----------------------------------------------------------------*/ static void cdftForw( Word16 n, Word32 *a, const Word16 *ip, const Word16 *w ); +#ifndef HARMONIZE_DoRTFTn static void bitrv2_SR( Word16 n, const Word16 *ip, Word32 *a ); +#endif static void cftfsub( Word16 n, Word32 *a, const Word16 *w ); static void cft1st( Word16 n, Word32 *a, const Word16 *w ); static void cftmdl( Word16 n, Word16 l, Word32 *a, const Word16 *w ); + static void fft16_ivas( Word32 *x, Word32 *y, const Word16 *Idx ); static void fft5_shift1( Word16 n1, Word32 *zRe, Word32 *zIm, const Word16 *Idx ); static void fft8( Word32 *x, Word32 *y, const Word16 *Idx ); @@ -109,11 +109,13 @@ static void fft5_8( Word16 n1, Word32 *zRe, Word32 *zIm, const Word16 *Idx ); static void fft4_5( Word32 *x, Word32 *y, const Word16 *Idx ); static void fft5_4( Word16 n1, Word32 *zRe, Word32 *zIm, const Word16 *Idx ); +#ifndef HARMONIZE_DCT void DoRTFTn_fx_ivas( Word32 *x, /* i/o: real part of input and output data */ Word32 *y, /* i/o: imaginary part of input and output data */ const Word16 n /* i : size of the FFT n=(2^k) up to 1024 */ ); +#endif /*-----------------------------------------------------------------* * fft15_shift2() * 15-point FFT with 2-point circular shift @@ -1553,7 +1555,11 @@ static void fft5_4( * a low complexity 2-dimensional DFT of 80 points *-----------------------------------------------------------------*/ +#ifdef HARMONIZE_DoRTFTn +static void DoRTFT80_fx( +#else void DoRTFT80_fx( +#endif Word32 *x, /* i/o: real part of input and output data Qx */ Word32 *y /* i/o: imaginary part of input and output data Qx */ ) @@ -1580,7 +1586,11 @@ void DoRTFT80_fx( * a low complexity 2-dimensional DFT of 120 points *-----------------------------------------------------------------*/ +#ifdef HARMONIZE_DoRTFTn +static void DoRTFT120_fx( +#else void DoRTFT120_fx( +#endif Word32 *x, /* i/o: real part of input and output data Qx */ Word32 *y /* i/o: imaginary part of input and output data Qx */ ) @@ -1607,7 +1617,11 @@ void DoRTFT120_fx( * a low complexity 2-dimensional DFT of 160 points *-----------------------------------------------------------------*/ +#ifdef HARMONIZE_DoRTFTn +static void DoRTFT160_fx( +#else void DoRTFT160_fx( +#endif Word32 x[], /* i/o: real part of input and output data Qx */ Word32 y[] /* i/o: imaginary part of input and output data Qx */ ) @@ -1634,7 +1648,11 @@ void DoRTFT160_fx( * a low complexity 2-dimensional DFT of 320 points *-----------------------------------------------------------------*/ +#ifdef HARMONIZE_DoRTFTn +static void DoRTFT320_fx( +#else void DoRTFT320_fx( +#endif Word32 *x, /* i/o: real part of input and output data Qx */ Word32 *y /* i/o: imaginary part of input and output data Qx */ ) @@ -1661,7 +1679,11 @@ void DoRTFT320_fx( * a low complexity 2-dimensional DFT of 480 points *-----------------------------------------------------------------*/ +#ifdef HARMONIZE_DoRTFTn +static void DoRTFT480_fx( +#else void DoRTFT480_fx( +#endif Word32 *x, /* i/o: real part of input and output data Qx */ Word32 *y /* i/o: imaginary part of input and output data Qx */ ) @@ -1688,7 +1710,11 @@ void DoRTFT480_fx( * a low complexity 2-dimensional DFT of 40 points *-----------------------------------------------------------------*/ +#ifdef HARMONIZE_DoRTFTn +static void DoRTFT40_fx( +#else void DoRTFT40_fx( +#endif Word32 *x, /* i/o: real part of input and output data Qx */ Word32 *y /* i/o: imaginary part of input and output data Qx */ ) @@ -1714,7 +1740,11 @@ void DoRTFT40_fx( * a low complexity 2-dimensional DFT of 20 points *-----------------------------------------------------------------*/ +#ifdef HARMONIZE_DoRTFTn +static void DoRTFT20_fx( +#else void DoRTFT20_fx( +#endif Word32 *x, /* i/o: real part of input and output data Qx */ Word32 *y /* i/o: imaginary part of input and output data Qx */ ) @@ -1741,12 +1771,15 @@ void DoRTFT20_fx( * FFT with 128 points *-----------------------------------------------------------------*/ +#ifdef HARMONIZE_DoRTFTn +static void DoRTFT128_fx( +#else void DoRTFT128_fx( +#endif Word32 *x, /* i/o: real part of input and output data Qx */ Word32 *y /* i/o: imaginary part of input and output data Qx */ ) { - Word16 i; Word32 z[256]; @@ -1775,25 +1808,34 @@ void DoRTFT128_fx( return; } + /*-----------------------------------------------------------------* * cdftForw() - * Main fuction of Complex Discrete Fourier Transform + * Main function of Complex Discrete Fourier Transform *-----------------------------------------------------------------*/ static void cdftForw( Word16 n, /* i : data length of real and imag Q0 */ Word32 *a, /* i/o: input/output data Qx */ const Word16 *ip, /* i : work area for bit reversal Qx */ - const Word16 *w /* i : cos/sin table Q15 */ + const Word16 *w /* i : cos/sin table Q15 */ ) { /* bit reversal */ +#ifdef HARMONIZE_DoRTFTn + bitrv2_SR_fx( n, ip + 2, a ); +#else bitrv2_SR( n, ip + 2, a ); +#endif /* Do FFT */ cftfsub( n, a, w ); + + return; } + +#ifndef HARMONIZE_DoRTFTn /*-----------------------------------------------------------------* * bitrv2_SR() * Bit reversal @@ -1995,6 +2037,7 @@ static void bitrv2_SR( return; } +#endif /*-----------------------------------------------------------------* * cftfsub() @@ -2438,6 +2481,7 @@ static void cftmdl( return; } +#ifndef HARMONIZE_DCT static void cftbsub( Word16 n, // Q0 Word32 *a, // Qx @@ -2733,14 +2777,14 @@ void edct2_fx_ivas( } } } - +#endif +#ifndef HARMONIZE_DoRTFTn void DoRTFTn_fx_ivas( Word32 *x, /* i/o: real part of input and output data Qx */ Word32 *y, /* i/o: imaginary part of input and output data Qx */ const Word16 n /* i : size of the FFT up to 1024 Q0*/ ) { - Word16 i; Word32 z[2048]; @@ -2790,7 +2834,7 @@ void DoRTFTn_fx_ivas( return; } - +#endif static void rfft_post( const Word16 *sine_table, // Q15 @@ -3776,7 +3820,11 @@ void DoFFT_fx( DoRTFT320_fx( re2, im2 ); BREAK; case 256: +#ifdef HARMONIZE_DoRTFTn + DoRTFTn_fx( re2, im2, NULL, 256 ); +#else DoRTFTn_fx_ivas( re2, im2, 256 ); +#endif BREAK; case 240: DoRTFT240( re2, im2 ); @@ -3800,7 +3848,11 @@ void DoFFT_fx( DoRTFT80_fx( re2, im2 ); BREAK; case 64: +#ifdef HARMONIZE_DoRTFTn + DoRTFTn_fx( re2, im2, NULL, 64 ); +#else DoRTFTn_fx_ivas( re2, im2, 64 ); +#endif BREAK; case 40: DoRTFT40_fx( re2, im2 ); @@ -6427,6 +6479,7 @@ static void fft_lenN( * Complex-value FFT *-----------------------------------------------------------------*/ +#ifndef HARMONIZE_DCT void fft_fx( Word32 *re, /* i/o: real part Qx */ Word32 *im, /* i/o: imag part Qx */ @@ -6434,6 +6487,7 @@ void fft_fx( const Word16 s /* i : sign */ ) { + cmplx x[960]; FOR( Word16 j = 0; j < length; j++ ) @@ -6511,6 +6565,73 @@ void fft_fx( return; } +#else +void fft_fx( + cmplx *x, /* i/o: complex data */ + const Word16 length /* i : length of fft */ +) +{ + SWITCH( length ) + { + case 20: + fft_len20_fx( x ); + BREAK; + case 40: + fft_lenN( x, FFT_RotVector_640_fx, 640, 5, 8, 8, 40 ); + BREAK; + case 64: + fft_lenN( x, FFT_RotVector_256_fx, 256, 8, 8, 8, 64 ); + BREAK; + case 80: + fft_lenN( x, FFT_RotVector_640_fx, 640, 10, 8, 4, 40 ); + BREAK; + case 100: + fft_lenN( x, FFT_RotVector_400_fx, 400, 10, 10, 4, 40 ); + BREAK; + case 120: + fft_lenN( x, FFT_RotVector_960_fx, 960, 15, 8, 4, 60 ); + BREAK; + case 128: + fft_lenN( x, FFT_RotVector_256_fx, 256, 16, 8, 4, 64 ); + BREAK; + case 160: + fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 8, 2, 40 ); + BREAK; + case 200: + fft_lenN( x, FFT_RotVector_400_fx, 400, 20, 10, 2, 40 ); + BREAK; + case 240: + fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 8, 2, 60 ); + BREAK; + case 256: + fft_lenN( x, FFT_RotVector_256_fx, 256, 32, 8, 2, 64 ); + BREAK; + case 320: + fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 16, 2, 40 ); + BREAK; + case 400: + fft_lenN( x, FFT_RotVector_400_fx, 400, 20, 20, 2, 40 ); + BREAK; + case 480: + fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 16, 2, 60 ); + BREAK; + case 600: + fft_lenN( x, FFT_RotVector_600_fx, 600, 30, 20, 2, 60 ); + BREAK; + case 640: + fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 32, 2, 40 ); + BREAK; + case 960: + fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 32, 2, 60 ); + BREAK; + default: + assert( !"fft length is not supported!" ); + } + + return; +} +#endif + void rfft_fx( Word32 *x, /* i/o: values Qx */ @@ -6522,6 +6643,9 @@ void rfft_fx( Word16 i, sizeOfFft2, sizeOfFft4; Word32 tmp, t1, t2, t3, t4; Word16 s1, s2; +#ifdef HARMONIZE_DCT + cmplx spec[L_FRAME48k]; +#endif sizeOfFft2 = shr( length, 1 ); sizeOfFft4 = shr( length, 2 ); @@ -6592,10 +6716,43 @@ void rfft_fx( SWITCH( isign ) { - case -1: +#ifdef HARMONIZE_DCT + FOR( i = 0; i < sizeOfFft2; i++ ) + { + spec[i].re = x[2 * i]; + move32(); + spec[i].im = x[2 * i + 1]; + move32(); + } + + fft_fx( spec, sizeOfFft2 ); + + FOR( i = 0; i < sizeOfFft4; i++ ) + { + x[2 * i] = spec[i].re; + move32(); + x[2 * i + 1] = spec[sizeOfFft2 - i - 1].re; + move32(); + + x[2 * i] = spec[i].im; + move32(); + x[2 * i + 1] = L_negate( spec[sizeOfFft2 - i - 1].im ); + move32(); + } + + FOR( i = 0; i < sizeOfFft2; i++ ) + { + x[2 * i] = spec[i].re; + move32(); + x[2 * i + 1] = spec[i].im; + move32(); + } +#else fft_fx( x, x + 1, sizeOfFft2, 2 ); +#endif + // Qx tmp = L_add( x[0], x[1] ); x[1] = L_sub( x[0], x[1] ); // Qx @@ -6651,7 +6808,27 @@ void rfft_fx( move32(); } +#ifdef HARMONIZE_DCT + FOR( i = 0; i < sizeOfFft2; i++ ) + { + spec[i].re = x[2 * i]; + move32(); + spec[i].im = x[2 * i + 1]; + move32(); + } + + fft_fx( spec, sizeOfFft2 ); + + FOR( i = 0; i < sizeOfFft2; i++ ) + { + x[2 * i] = spec[i].re; + move32(); + x[2 * i + 1] = spec[i].im; + move32(); + } +#else fft_fx( x, x + 1, sizeOfFft2, 2 ); +#endif FOR( i = 0; i < length; i += 2 ) { diff --git a/lib_com/fft_fx_evs.c b/lib_com/fft_fx_evs.c index a7b2461cb18fe44c1b763ca4e52997a1ef2e49c3..82b47b73e97a520d4dd8c60f28797abed460ebd9 100644 --- a/lib_com/fft_fx_evs.c +++ b/lib_com/fft_fx_evs.c @@ -10,7 +10,7 @@ #include /*-----------------------------------------------------------------* - * Local functions + * Local constants *-----------------------------------------------------------------*/ #define FFT3_ONE_THIRD 21845 /* 1/3 in Q16 */ @@ -19,6 +19,10 @@ #define KP951056516_16FX 2042378325 /* EDCT & EMDCT constants Q31*/ #define KP587785252_16FX 1262259213 /* EDCT & EMDCT constants Q31*/ +/*-----------------------------------------------------------------* + * Local function prototypes + *-----------------------------------------------------------------*/ + static void fft5_shift4_16fx( Word16 n1, Word16 *zRe, Word16 *zIm, const Word16 *Idx ); static void fft64_16fx( Word16 *x, Word16 *y, const Word16 *Idx ); static void fft32_5_16fx( Word16 *x, Word16 *y, const Word16 *Idx ); @@ -32,86 +36,156 @@ static void fft64_16fx( Word16 *x, Word16 *y, const Word16 *Idx ); static void fft5_32_16fx( Word16 *zRe, Word16 *zIm, const Word16 *Idx ); static void cdftForw_16fx( Word16 n, Word16 *a, const Word16 *ip, const Word32 *w ); -#include "math_32.h" - -/*-----------------------------------------------------------------* - * Local functions - *-----------------------------------------------------------------*/ static void cdftForw_fx( Word16 n, Word32 *a, const Word16 *ip, const Word16 *w ); +#ifndef HARMONIZE_DoRTFTn static void bitrv2_SR_fx( Word16 n, const Word16 *ip, Word32 *a ); +#endif static void cftfsub_fx( Word16 n, Word32 *a, const Word16 *w ); static void cft1st_fx( Word16 n, Word32 *a, const Word16 *w ); static void cftmdl_fx( Word16 n, Word16 l, Word32 *a, const Word16 *w ); +/*-----------------------------------------------------------------* + * DoRTFTn_fx() + * + * + *-----------------------------------------------------------------*/ + void DoRTFTn_fx( - Word32 *x, /* i/o : real part of input and output data Q(x) */ - Word32 *y, /* i/o : imaginary part of input and output data Q(x) */ - const Word16 n /* i : size of the FFT up to 1024 */ + Word32 *x, /* i/o : real part of input and output data Q(x) */ + Word32 *y, /* i/o : imaginary part of input and output data Q(x) */ +#ifdef OPTIMIZE_FFT_STACK + cmplx *spec, /* i/o : complex input and output data */ +#endif + const Word16 n /* i : size of the FFT up to 1024 */ ) { - Word16 i; Word32 z[2048], *pt; - pt = z; - FOR( i = 0; i < n; i++ ) +#ifdef OPTIMIZE_FFT_STACK + IF( spec != NULL ) { - *pt++ = x[i]; - move16(); - *pt++ = y[i]; - move16(); + pt = z; + FOR( i = 0; i < n; i++ ) + { + *pt++ = spec[i].re; + move16(); + *pt++ = spec[i].im; + move16(); + } + } + ELSE + { +#endif + pt = z; + FOR( i = 0; i < n; i++ ) + { + *pt++ = x[i]; + move16(); + *pt++ = y[i]; + move16(); + } +#ifdef OPTIMIZE_FFT_STACK } +#endif IF( EQ_16( n, 16 ) ) { +#ifdef HARMONIZE_DoRTFTn + cdftForw_fx( 2 * n, z, Ip_fft16, w_fft512_fx ); +#else cdftForw_fx( 2 * n, z, Ip_fft16, w_fft512_fx_evs ); +#endif } ELSE IF( EQ_16( n, 32 ) ) { +#ifdef HARMONIZE_DoRTFTn + cdftForw_fx( 2 * n, z, Ip_fft32, w_fft512_fx ); +#else cdftForw_fx( 2 * n, z, Ip_fft32, w_fft512_fx_evs ); +#endif } ELSE IF( EQ_16( n, 64 ) ) { +#ifdef HARMONIZE_DoRTFTn + cdftForw_fx( 2 * n, z, Ip_fft64, w_fft512_fx ); +#else cdftForw_fx( 2 * n, z, Ip_fft64, w_fft512_fx_evs ); +#endif } ELSE IF( EQ_16( n, 128 ) ) { +#ifdef HARMONIZE_DoRTFTn + cdftForw_fx( 2 * n, z, Ip_fft128, w_fft512_fx ); +#else cdftForw_fx( 2 * n, z, Ip_fft128, w_fft512_fx_evs ); +#endif } ELSE IF( EQ_16( n, 256 ) ) { +#ifdef HARMONIZE_DoRTFTn + cdftForw_fx( 2 * n, z, Ip_fft256, w_fft512_fx ); +#else cdftForw_fx( 2 * n, z, Ip_fft256, w_fft512_fx_evs ); +#endif } ELSE IF( EQ_16( n, 512 ) ) { +#ifdef HARMONIZE_DoRTFTn + cdftForw_fx( 2 * n, z, Ip_fft512, w_fft512_fx ); +#else cdftForw_fx( 2 * n, z, Ip_fft512, w_fft512_fx_evs ); +#endif } ELSE { assert( 0 ); } - x[0] = z[0]; - move16(); - y[0] = z[1]; - move16(); - pt = &z[2]; - FOR( i = n - 1; i >= 1; i-- ) +#ifdef OPTIMIZE_FFT_STACK + IF( spec != NULL ) + { + spec[0].re = z[0]; + move16(); + spec[0].im = z[1]; + move16(); + pt = &z[2]; + FOR( i = n - 1; i >= 1; i-- ) + { + spec[i].re = *pt++; + move16(); + spec[i].im = *pt++; + move16(); + } + } + ELSE { - x[i] = *pt++; +#endif + x[0] = z[0]; move16(); - y[i] = *pt++; + y[0] = z[1]; move16(); + pt = &z[2]; + FOR( i = n - 1; i >= 1; i-- ) + { + x[i] = *pt++; + move16(); + y[i] = *pt++; + move16(); + } +#ifdef OPTIMIZE_FFT_STACK } +#endif return; } /*-----------------------------------------------------------------* * cdftForw_fx() - * Main fuction of Complex Discrete Fourier Transform + * Main function of Complex Discrete Fourier Transform, 32-bit data *-----------------------------------------------------------------*/ + static void cdftForw_fx( Word16 n, /* i : data length of real and imag */ Word32 *a, /* i/o : input/output data Q(q)*/ @@ -124,13 +198,20 @@ static void cdftForw_fx( /* Do FFT */ cftfsub_fx( n, a, w ); + + return; } /*-----------------------------------------------------------------* * bitrv2_SR_fx() * Bit reversal *-----------------------------------------------------------------*/ + +#ifdef HARMONIZE_DoRTFTn +void bitrv2_SR_fx( +#else static void bitrv2_SR_fx( +#endif Word16 n, /* i : data length of real and imag */ const Word16 *ip, /* i/o : work area for bit reversal */ Word32 *a /* i/o : input/output data Q(q)*/ @@ -1463,9 +1544,11 @@ void DoRTFT128_16fx( return; } + + /*-----------------------------------------------------------------* * cdftForw() - * Main fuction of Complex Discrete Fourier Transform + * Main function of Complex Discrete Fourier Transform, 16-bit data *-----------------------------------------------------------------*/ static void cdftForw_16fx( Word16 n, /* i : data length of real and imag */ diff --git a/lib_com/gs_inact_switching_fx.c b/lib_com/gs_inact_switching_fx.c index d00b8e3c0346b63084b679e242a0d2ffec76062e..16e72cd585feedd7c5c46ca8bae3a4da28d23ec7 100644 --- a/lib_com/gs_inact_switching_fx.c +++ b/lib_com/gs_inact_switching_fx.c @@ -89,7 +89,11 @@ void Inac_switch_ematch_fx( ELSE IF( EQ_16( coder_type, VOICED ) || EQ_16( coder_type, GENERIC ) || EQ_16( coder_type, TRANSITION ) || ( last_core != ACELP_CORE ) || NE_16( last_codec_mode, MODE1 ) || ( ( element_mode > EVS_MONO ) && EQ_16( coder_type, UNVOICED ) ) ) { /* Find spectrum and energy per band for GC and VC frames */ +#ifdef HARMONIZE_DCT + edct_16fx( exc2, dct_exc_tmp, L_frame, 5 ); +#else edct_16fx( exc2, dct_exc_tmp, L_frame, 5, element_mode ); +#endif Ener_per_band_comp_fx( dct_exc_tmp, Ener_per_bd, Q_exc, MBANDS_GN, 1, L_frame ); @@ -103,7 +107,11 @@ void Inac_switch_ematch_fx( ELSE IF( ( coder_type == INACTIVE ) && inactive_coder_type_flag ) { /* Find spectrum and energy per band for inactive frames */ +#ifdef HARMONIZE_DCT + edct_16fx( exc2, dct_exc_tmp, L_frame, 5 ); +#else edct_16fx( exc2, dct_exc_tmp, L_frame, 5, element_mode ); +#endif Ener_per_band_comp_fx( dct_exc_tmp, Ener_per_bd, Q_exc, MBANDS_GN, 1, L_frame ); @@ -188,7 +196,11 @@ void Inac_switch_ematch_fx( Scale_sig( dct_exc_tmp, 240, 1 ); // Q_exc Scale_sig( exc2, 240, 1 ); // Q_exc } +#ifdef HARMONIZE_DCT + edct_16fx( dct_exc_tmp, exc2, L_frame, 5 ); +#else edct_16fx( dct_exc_tmp, exc2, L_frame, 5, element_mode ); +#endif } return; diff --git a/lib_com/ivas_mdct_imdct_fx.c b/lib_com/ivas_mdct_imdct_fx.c index fde3edaddd34d6fd40aa67a67ff0d726193d3b5e..304ed9fccc810d5997e135d93dcea9ebb2ce98b5 100644 --- a/lib_com/ivas_mdct_imdct_fx.c +++ b/lib_com/ivas_mdct_imdct_fx.c @@ -52,9 +52,17 @@ #define IVAS_MDCT_SCALING_GAIN_16k_Q31 0X00003193 /* 5.909703592235439e-06f */ #define IVAS_MDCT_SCALING_GAIN_16k_Q46 0x18C97EF4 - #define IVAS_ONE_BY_IMDCT_SCALING_GAIN_Q16 0x08432A51 /* 1 / 2115.165304808f */ +#ifdef HARMONIZE_DoRTFTn + +/*------------------------------------------------------------------------------------------* + * Local functions prototypes + *------------------------------------------------------------------------------------------*/ + +static void ivas_get_twid_factors_fx( const Word16 length, const Word16 **pTwid_re, const Word16 **pTwid_im ); + +#endif /*-----------------------------------------------------------------------------------------* * Function ivas_tda_fx() @@ -154,7 +162,11 @@ void ivas_mdct_fx( len_by_2 = shr( length, 1 ); ivas_mdct_scaling_gain = ivas_get_mdct_scaling_gain_fx( len_by_2 ); // Q46 +#ifdef HARMONIZE_DoRTFTn + ivas_get_twid_factors_fx( length, &pTwid_re, &pTwid_im ); +#else ivas_get_twid_factors_fx1( length, &pTwid_re, &pTwid_im ); +#endif FOR( i = 0; i < len_by_2; i++ ) { @@ -299,8 +311,11 @@ void ivas_imdct_fx( Word32 im[IVAS_480_PT_LEN]; len_by_2 = shr( length, 1 ); +#ifdef HARMONIZE_DoRTFTn + ivas_get_twid_factors_fx( length, &pTwid_re, &pTwid_im ); +#else ivas_get_twid_factors_fx1( length, &pTwid_re, &pTwid_im ); - +#endif FOR( i = 0; i < len_by_2; i++ ) { re[i] = L_add( Mpy_32_16_1( pIn[length - 2 * i - 1], pTwid_re[i] ), Mpy_32_16_1( pIn[2 * i], pTwid_im[i] ) ); /*stl_arr_index Q24*/ @@ -310,6 +325,7 @@ void ivas_imdct_fx( } ivas_ifft_cplx( &re[0], &im[0], len_by_2 ); + IF( len_by_2 > 0 ) { *q_out = sub( *q_out, Q15 ); @@ -351,11 +367,16 @@ void ivas_imdct_fx( /*-----------------------------------------------------------------------------------------* - * Function ivas_get_twid_factors_fx1() + * Function ivas_get_twid_factors_fx() * * Sets/Maps the fft twiddle tables based on fft length *-----------------------------------------------------------------------------------------*/ + +#ifdef HARMONIZE_DoRTFTn +static void ivas_get_twid_factors_fx( +#else void ivas_get_twid_factors_fx1( +#endif const Word16 length, // Q0 const Word16 **pTwid_re, // Q15 const Word16 **pTwid_im ) // Q15 @@ -375,11 +396,13 @@ void ivas_get_twid_factors_fx1( *pTwid_re = (const Word16 *) &ivas_cos_twiddle_160_fx[0]; // Q15 *pTwid_im = (const Word16 *) &ivas_sin_twiddle_160_fx[0]; // Q15 } +#ifndef HARMONIZE_DoRTFTn ELSE IF( EQ_16( length, 80 ) ) { *pTwid_re = (const Word16 *) &ivas_cos_twiddle_80_fx[0]; // Q15 *pTwid_im = (const Word16 *) &ivas_sin_twiddle_80_fx[0]; // Q15 } +#endif ELSE { assert( !"Not supported FFT length!" ); diff --git a/lib_com/ivas_mdft_imdft_fx.c b/lib_com/ivas_mdft_imdft_fx.c index 49d1cbbf11c01d8bd02dd538abad93162ee0fab9..19774e7126f009bc4ee5d16d273300632c86cc9c 100644 --- a/lib_com/ivas_mdft_imdft_fx.c +++ b/lib_com/ivas_mdft_imdft_fx.c @@ -214,7 +214,29 @@ static void ivas_ifft_cplx1_fx( move32(); } +#ifdef HARMONIZE_DCT + cmplx x[L_FRAME48k]; + + FOR( i = 0; i < length; i++ ) + { + x[i].re = re[i]; + move32(); + x[i].im = im[i]; + move32(); + } + + fft_fx( x, length ); + + FOR( i = 0; i < length; i++ ) + { + re[i] = x[i].re; + move32(); + im[i] = x[i].im; + move32(); + } +#else fft_fx( re, im, length, 1 ); +#endif return; } @@ -233,8 +255,12 @@ void ivas_mdft_fx( const Word16 mdft_length /* i : MDFT length */ ) { +#ifdef HARMONIZE_DCT + cmplx spec[L_FRAME48k]; +#else Word32 re[L_FRAME48k]; Word32 im[L_FRAME48k]; +#endif Word16 j, len_by_2; const Word32 *pTwid; // Q31 len_by_2 = shr( mdft_length, 1 ); @@ -244,23 +270,53 @@ void ivas_mdft_fx( { FOR( j = 0; j < mdft_length; j++ ) { +#ifdef HARMONIZE_DCT + spec[j].re = Mpy_32_32( pIn[j], pTwid[j] ); // ( Qin + Q31 ) - Q31 -> Qin + move32(); + spec[j].im = Mpy_32_32( L_negate( pIn[j] ), pTwid[mdft_length - j] ); // ( Qin + Q31 ) - Q31 -> Qin + move32(); +#else re[j] = Mpy_32_32( pIn[j], pTwid[j] ); // ( Qin + Q31 ) - Q31 -> Qin move32(); im[j] = Mpy_32_32( L_negate( pIn[j] ), pTwid[mdft_length - j] ); // ( Qin + Q31 ) - Q31 -> Qin move32(); +#endif } } ELSE { FOR( j = 0; j < mdft_length; j++ ) { +#ifdef HARMONIZE_DCT + spec[j].re = Msub_32_32( Mpy_32_32( pIn[j], pTwid[j] ), pIn[add( mdft_length, j )], pTwid[mdft_length - j] ); // ( Qin + Q31 ) - Q31 -> Qin + move32(); + spec[j].im = Msub_32_32( Mpy_32_32( L_negate( pIn[j] ), pTwid[mdft_length - j] ), pIn[mdft_length + j], pTwid[j] ); // ( Qin + Q31 ) - Q31 -> Qin + move32(); +#else re[j] = Msub_32_32( Mpy_32_32( pIn[j], pTwid[j] ), pIn[add( mdft_length, j )], pTwid[mdft_length - j] ); // ( Qin + Q31 ) - Q31 -> Qin move32(); im[j] = Msub_32_32( Mpy_32_32( L_negate( pIn[j] ), pTwid[mdft_length - j] ), pIn[mdft_length + j], pTwid[j] ); // ( Qin + Q31 ) - Q31 -> Qin move32(); +#endif } } +#ifdef HARMONIZE_DCT + fft_fx( spec, mdft_length ); + + FOR( j = 0; j < len_by_2; j++ ) + { + pOut_re[2 * j] = spec[j].re; + move32(); + pOut_re[2 * j + 1] = spec[mdft_length - j - 1].re; + move32(); + + pOut_im[2 * j] = spec[j].im; + move32(); + pOut_im[2 * j + 1] = L_negate( spec[mdft_length - j - 1].im ); + move32(); + } +#else fft_fx( re, im, mdft_length, 1 ); FOR( j = 0; j < len_by_2; j++ ) { @@ -274,6 +330,8 @@ void ivas_mdft_fx( pOut_im[2 * j + 1] = L_negate( im[mdft_length - j - 1] ); // Qin move32(); } +#endif + return; } diff --git a/lib_com/ivas_prot_fx.h b/lib_com/ivas_prot_fx.h index 17d4ba062813a1586791e833f16142a798425d83..a28f20d875716f72f895bfdaae714347b2709ce1 100644 --- a/lib_com/ivas_prot_fx.h +++ b/lib_com/ivas_prot_fx.h @@ -216,12 +216,14 @@ void ivas_dct_windowing_fx( Word32 *pTemp_lfe ); +#ifndef HARMONIZE_DoRTFTn void ivas_get_twid_factors_fx1( const Word16 length, // Q0 const Word16 **pTwid_re, // Q15 const Word16 **pTwid_im ); +#endif Word32 ivas_get_mdct_scaling_gain_fx( const Word16 dct_len_by_2 ); diff --git a/lib_com/ivas_rom_com.h b/lib_com/ivas_rom_com.h index 6081e450affc3b2e778d31e2a3e89cbf9c2d220d..1ac8737ac7251f7890b24fd64e37034d9c99bd44 100644 --- a/lib_com/ivas_rom_com.h +++ b/lib_com/ivas_rom_com.h @@ -502,8 +502,10 @@ extern const Word16 ivas_sin_twiddle_320_fx[IVAS_320_PT_LEN >> 1]; extern const Word16 ivas_cos_twiddle_320_fx[IVAS_320_PT_LEN >> 1]; extern const Word16 ivas_sin_twiddle_160_fx[IVAS_160_PT_LEN >> 1]; extern const Word16 ivas_cos_twiddle_160_fx[IVAS_160_PT_LEN >> 1]; +#ifndef HARMONIZE_DoRTFTn extern const Word16 ivas_sin_twiddle_80_fx[IVAS_80_PT_LEN >> 1]; extern const Word16 ivas_cos_twiddle_80_fx[IVAS_80_PT_LEN >> 1]; +#endif extern const Word16 nf_tw_smoothing_coeffs_fx[N_LTP_GAIN_MEMS]; extern const Word32 dft_res_gains_q_fx[][2]; extern const Word16 dft_res_cod_alpha_fx[STEREO_DFT_BAND_MAX]; diff --git a/lib_com/ivas_rom_com_fx.c b/lib_com/ivas_rom_com_fx.c index 4ef543df7eac85d5479913267c65404b9e25d60b..3aa42c5a6f453913202e9b6cdb855715fe74cf22 100644 --- a/lib_com/ivas_rom_com_fx.c +++ b/lib_com/ivas_rom_com_fx.c @@ -3044,6 +3044,7 @@ const Word16 ivas_cos_twiddle_160_fx[ IVAS_160_PT_LEN >> 1 ] = { SHC( 0x13b6 ), SHC( 0x1139 ), SHC( 0x0ebb ), SHC( 0x0c3b ), SHC( 0x09ba ), SHC( 0x0738 ), SHC( 0x04b6 ), SHC( 0x0232 ), }; +#ifndef HARMONIZE_DoRTFTn const Word16 ivas_sin_twiddle_80_fx[ IVAS_80_PT_LEN >> 1 ] = { SHC( 0xff60 ), SHC( 0xfa59 ), SHC( 0xf555 ), SHC( 0xf055 ), SHC( 0xeb5c ), SHC( 0xe66a ), SHC( 0xe183 ), SHC( 0xdca7 ), SHC( 0xd7da ), SHC( 0xd31c ), SHC( 0xce70 ), SHC( 0xc9d8 ), SHC( 0xc555 ), SHC( 0xc0e9 ), SHC( 0xbc96 ), SHC( 0xb85e ), @@ -3059,7 +3060,7 @@ const Word16 ivas_cos_twiddle_80_fx[ IVAS_80_PT_LEN >> 1 ] = { SHC( 0x4aba ), SHC( 0x4696 ), SHC( 0x4257 ), SHC( 0x3dfe ), SHC( 0x398c ), SHC( 0x3504 ), SHC( 0x3067 ), SHC( 0x2bb6 ), SHC( 0x26f4 ), SHC( 0x2223 ), SHC( 0x1d45 ), SHC( 0x185a ), SHC( 0x1367 ), SHC( 0x0e6b ), SHC( 0x096a ), SHC( 0x0465 ), }; - +#endif const Word16 nf_tw_smoothing_coeffs_fx[N_LTP_GAIN_MEMS] = { 13107, 6553, 6553, 6553 diff --git a/lib_com/options.h b/lib_com/options.h index 10649419f741d49c9a83d1b6616ef9696e5edebb..70845710c1cccd3319c964753231bbe8933ba14c 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -86,8 +86,10 @@ #define HARM_HQ_CORE_KEEP_BE /* hack to keep all BE after HQ core functions harmonization; pending resolving issues #2450, #2451, #2452 */ #define FIX_BASOP_2457_HARM_GEN /* FhG: harmonization of function generate_comfort_noise_dec_hf_ivas_fx()*/ #define FIX_BASOP_2478_HARM_ENC_PRM_HM /* FhG: basop issue 2478: harmonize enc_prm_hm() and enc_prm_hm_ivas_fx() */ -#define FIX_2455_HARMONIZE_generate_comfort_noise_enc /* FhG: harmonize generate_comfort_noise_enc and generate_comfort_noise_enc_ivas */ -#define FIX_2455_HARMONIZE_configureFdCngEnc /* FhG: harmonize generate_comfort_noise_enc and generate_comfort_noise_enc_ivas */ +#define FIX_2455_HARMONIZE_generate_comfort_noise_enc /* FhG: harmonize generate_comfort_noise_enc and generate_comfort_noise_enc_ivas */ +#define FIX_2455_HARMONIZE_configureFdCngEnc /* FhG: harmonize generate_comfort_noise_enc and generate_comfort_noise_enc_ivas */ +#define OPTIMIZE_FFT_STACK /* VA: removal of intermediate FFT buffers */ +#define HARMONIZE_DCT /* VA: removal of duplicated DCT functions */ /* #################### End BE switches ################################## */ @@ -96,6 +98,7 @@ /* any switch which is non-be wrt. TS 26.251 V3.0 */ #define FIX_2398_PRECISSION_ORIENTATION_TRACKING /* FhG: use refinement of Sqrt32 within certain functions*/ +#define HARMONIZE_DoRTFTn /* VA: harmonize functions DoRTFTn_fx() and DoRTFTn_fx_ivas() */ /* ##################### End NON-BE switches ########################### */ diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index de383d0a7a6919950b97eea9f34fc0025ffad88d..629c6fd35a1655321c32d4ca19f17a2cf2fa3385 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -1292,6 +1292,14 @@ void scale_sig32( const Word16 exp0 /* i : exponent: x = round(x << exp) Qx xx exp */ ); +#ifdef OPTIMIZE_FFT_STACK +void scale_sig32_cmplx( + cmplx x[], /* i/o: signal to scale Qx */ + const Word16 lg, /* i : size of x[] Q0 */ + const Word16 exp0 /* i : exponent: x = round(x << exp) Qx exp */ +); + +#endif void Scale_sig64( Word64 x[], /* i/o: signal to scale Qx */ Word16 len, /* i : size of x[] Q0 */ @@ -3982,16 +3990,22 @@ void edct_fx( const Word32 *x, /* i : i signal Qq */ Word32 *y, /* o : output transform Qq */ Word16 length, /* i : length */ - Word16 *q /* i : Q value of i signal */ +#ifdef HARMONIZE_DCT + Word16 *q, /* i : Q value of input signal */ + const Word16 element_mode /* i : element mode */ +#else + Word16 *q /* i : Q value of i signal */ +#endif ); +#ifndef HARMONIZE_DCT void edct_ivas_fx( const Word32 *x, /* i : input signal Qq */ Word32 *y, /* o : output transform Qq */ Word16 length, /* i : length Q0*/ Word16 *q /* i : Q value of input signal */ ); - +#endif void edst_fx( const Word32 *x, /* i : i signal Qq */ Word32 *y, /* o : output transform Qq */ @@ -4003,8 +4017,12 @@ void edct_16fx( const Word16 *x, /* i : i signal Qx */ Word16 *y, /* o : output transform Qx */ Word16 length, /* i : length */ - Word16 bh, /* bit-headroom */ - const Word16 element_mode ); + Word16 bh /* bit-headroom */ +#ifndef HARMONIZE_DCT + , + const Word16 element_mode +#endif +); void iedct_short_fx( const Word32 *in, /* i : i vector */ @@ -4025,12 +4043,24 @@ void fft16( Word32 *re, Word32 *im, Word16 s, Word16 bScale ); void BASOP_cfft( cmplx *pComplexBuf, Word16 sizeOfFft, Word16 *scale, Word32 workBuffer[2 * BASOP_CFFT_MAX_LENGTH] ); void BASOP_rfft( Word32 *x, Word16 sizeOfFft, Word16 *scale, Word16 isign ); +#ifdef HARMONIZE_DoRTFTn +void bitrv2_SR_fx( + Word16 n, /* i : data length of real and imag Q0 */ + const Word16 *ip, /* i/o: work area for bit reversal Q0 */ + Word32 *a /* i/o: input/output data Qx */ +); +#endif + void DoRTFTn_fx( - Word32 *x, /* i/o : real part of i and output data */ - Word32 *y, /* i/o : imaginary part of i and output data */ - const Word16 n /* i : size of the FFT up to 1024 */ + Word32 *x, /* i/o : real part of i and output data */ + Word32 *y, /* i/o : imaginary part of i and output data */ +#ifdef OPTIMIZE_FFT_STACK + cmplx *spec, /* i/o : complex input and output data */ +#endif + const Word16 n /* i : size of the FFT up to 1024 */ ); +#ifndef HARMONIZE_DoRTFTn void DoRTFT480_fx( Word32 *x, /* i/o: real part of input and output data */ Word32 *y /* i/o: imaginary part of input and output data */ @@ -4071,6 +4101,7 @@ void DoRTFT20_fx( Word32 *y /* i/o: imaginary part of input and output data */ ); +#endif Word16 RFFTN_fx( Word32 *data, const Word16 *sine_table, @@ -4083,10 +4114,15 @@ void DoFFT_fx( const Word16 length ); void fft_fx( +#ifndef HARMONIZE_DCT Word32 *re, /* i/o: real part */ Word32 *im, /* i/o: imag part */ const Word16 length, /* i : length of fft */ const Word16 s /* i : sign */ +#else + cmplx *spec, /* i/o: complex data */ + const Word16 length /* i : length of fft */ +#endif ); void rfft_fx( @@ -4096,16 +4132,19 @@ void rfft_fx( const Word16 isign /* i : sign */ ); +#ifndef HARMONIZE_DoRTFTn void DoRTFTn_fx_ivas( Word32 *x, /* i/o: real part of input and output data */ Word32 *y, /* i/o: imaginary part of input and output data */ const Word16 n /* i : size of the FFT up to 1024 */ ); - - +#endif Word16 find_guarded_bits_fx( const Word32 n ); Word16 L_norm_arr( const Word32 *arr, Word16 size ); +#ifdef OPTIMIZE_FFT_STACK +Word16 L_norm_arr_cmplx( const cmplx *arr, Word16 size ); +#endif Word16 norm_arr( Word16 *arr, Word16 size ); Word16 W_norm_arr( Word64 *arr, Word16 size ); @@ -4115,6 +4154,7 @@ Flag is_zero_arr( Word32 *arr, Word16 size ); Flag is_zero_arr16( Word16 *arr, Word16 size ); Flag is_zero_arr64( Word64 *arr, Word16 size ); +#ifndef HARMONIZE_DCT void edct2_fx_ivas( const Word16 n, const Word16 isgn, @@ -4122,7 +4162,7 @@ void edct2_fx_ivas( Word32 *a, const Word16 *ip, const Word16 *w ); - +#endif void edct2_fx( Word16 n, Word16 isgn, diff --git a/lib_com/rom_com.h b/lib_com/rom_com.h index 5d5feebcbeeb8424eda7b769749f826c3e05d86a..f70325356ff7a2e30bbe3ae59a92b79e917c9779 100644 --- a/lib_com/rom_com.h +++ b/lib_com/rom_com.h @@ -1059,32 +1059,34 @@ extern const Word16 Gain_dic3_NB_fx[]; /*Q12 */ * FFT transform *------------------------------------------------------------------------------*/ -extern const Word16 Odx_fft64[64]; // Q0 -extern const Word16 Ip_fft64[6]; // Q0 -extern const Word16 Odx_fft32_15[32]; // Q0 -extern const Word32 w_fft32_16fx[16]; // Q30 -extern const Word16 Ip_fft32[6]; // Q0 -extern const Word16 Odx_fft32_5[32]; // Q0 -extern const Word16 Odx_fft16[16]; // Q0 -extern const Word16 Ip_fft16[6]; // Q0 -extern const Word16 Ip_fft8[6]; // Q0 -extern const Word16 Idx_dortft80[80]; // Q0 -extern const Word16 Idx_dortft120[120]; // Q0 -extern const Word16 Idx_dortft160[160]; // Q0 -extern const Word16 Idx_dortft320[320]; // Q0 -extern const Word16 Idx_dortft480[480]; // Q0 -extern const Word16 Ip_fft128[10]; // Q0 -extern const Word32 w_fft128_16fx[64]; // Q30 -extern const Word16 Ip_fft256[10]; // Q0 -extern const Word16 Ip_fft512[18]; // Q0 +extern const Word16 Odx_fft64[64]; // Q0 +extern const Word16 Ip_fft64[6]; // Q0 +extern const Word16 Odx_fft32_15[32]; // Q0 +extern const Word32 w_fft32_16fx[16]; // Q30 +extern const Word16 Ip_fft32[6]; // Q0 +extern const Word16 Odx_fft32_5[32]; // Q0 +extern const Word16 Odx_fft16[16]; // Q0 +extern const Word16 Ip_fft16[6]; // Q0 +extern const Word16 Ip_fft8[6]; // Q0 +extern const Word16 Idx_dortft80[80]; // Q0 +extern const Word16 Idx_dortft120[120]; // Q0 +extern const Word16 Idx_dortft160[160]; // Q0 +extern const Word16 Idx_dortft320[320]; // Q0 +extern const Word16 Idx_dortft480[480]; // Q0 +extern const Word16 Ip_fft128[10]; // Q0 +extern const Word32 w_fft128_16fx[64]; // Q30 +extern const Word16 Ip_fft256[10]; // Q0 +extern const Word16 Ip_fft512[18]; // Q0 +#ifndef HARMONIZE_DoRTFTn extern const Word16 w_fft512_fx_evs[256]; // Q14 -extern const Word16 Idx_dortft40[40]; // Q0 -extern const Word16 Odx_fft8_5[8]; // Q0 -extern const Word16 ip_edct2_64[6]; // Q0 -extern const Word16 w_edct2_64_fx[80]; /*Q14 */ -extern const Word16 Idx_dortft20[20]; // Q0 -extern const Word16 Odx_fft4_5[4]; // Q0 -extern const Word16 Ip_fft4[6]; // Q0 +#endif +extern const Word16 Idx_dortft40[40]; // Q0 +extern const Word16 Odx_fft8_5[8]; // Q0 +extern const Word16 ip_edct2_64[6]; // Q0 +extern const Word16 w_edct2_64_fx[80]; /*Q14 */ +extern const Word16 Idx_dortft20[20]; // Q0 +extern const Word16 Odx_fft4_5[4]; // Q0 +extern const Word16 Ip_fft4[6]; // Q0 /*----------------------------------------------------------------------------------* * FEC for HQ core @@ -1550,16 +1552,18 @@ extern const Word16 ivas_sine_panning_tbl_fx[601]; // Q15 extern const Word16 ivas_sin_az_fx[361]; // Q15 // edct_fx.c -extern const Word16 sin_scale_tbl_960[960]; // Q15 -extern const Word16 cos_scale_tbl_960[960]; // Q15 -extern const Word16 cos_scale_tbl_640[640]; // Q15 -extern const Word16 sin_scale_tbl_640[640]; // Q15 -extern const Word16 sin_scale_tbl_512[512]; // Q15 -extern const Word16 cos_scale_tbl_512[512]; // Q15 +extern const Word16 sin_scale_tbl_960[960]; // Q15 +extern const Word16 cos_scale_tbl_960[960]; // Q15 +extern const Word16 cos_scale_tbl_640[640]; // Q15 +extern const Word16 sin_scale_tbl_640[640]; // Q15 +extern const Word16 sin_scale_tbl_512[512]; // Q15 +extern const Word16 cos_scale_tbl_512[512]; // Q15 +#ifndef OPTIMIZE_FFT_STACK extern const Word16 cos_scale_tbl_1200[1200]; // Q15 extern const Word16 sin_scale_tbl_1200[1200]; // Q15 extern const Word16 cos_scale_tbl_800[800]; // Q15 extern const Word16 sin_scale_tbl_800[800]; // Q15 +#endif extern const Word16 scales_ivas_fx[][MAX_NO_SCALES * 2]; // Q11 extern const Word16 scales_p_ivas_fx[][MAX_NO_SCALES * 2]; // Q11 diff --git a/lib_com/rom_com_fx.c b/lib_com/rom_com_fx.c index f73ab13f3fe3c17ccc95e9137c253542e6928634..6cea712586c615a215b5009b94201b97a26f1aeb 100644 --- a/lib_com/rom_com_fx.c +++ b/lib_com/rom_com_fx.c @@ -18420,7 +18420,11 @@ const Word16 Ip_fft256[10] = {128, 1, 0, 256, 128, 384, 64, 320,192, 448}; // const Word16 Ip_fft512[18] = {256, 1, 0, 512, 256, 768, 128, 640,384, 896, 64, 576, 320, 832, 192, 704,448, 960}; // Q0 +#ifdef HARMONIZE_DoRTFTn +const Word16 w_fft512_fx[256] =//Q14 +#else const Word16 w_fft512_fx_evs[256] =//Q14 +#endif { 16384, 0, 11585, 11585, 15137, 6270, 6270, 15137, 16069, 3196, 9102, 13623, 13623, 9102, 3196, 16069, @@ -27386,6 +27390,7 @@ const Word16 cos_scale_tbl_512[512] = /* Q15 */ 201, 100 }; +#ifndef OPTIMIZE_FFT_STACK const Word16 sin_scale_tbl_1200[1200] = { /* Q15 */ 0, 42, 85, 128, 171, 214, 257, 300, 343, 386, 428, 471, 514, 557, 600, 643, @@ -27897,7 +27902,7 @@ const Word16 cos_scale_tbl_800[800] = { /* Q15 */ 32750, 32752, 32754, 32756, 32757, 32759, 32760, 32761, 32762, 32763, 32764, 32765, 32765, 32766, 32766, 32766 }; - +#endif const Word16 scales_ivas_fx[][MAX_NO_SCALES * 2] = /* 2 subvectors Q11*/ { { @@ -31443,6 +31448,7 @@ const Word16 w_fft256_fx[128] = { SHC( 0x7ff6 ), }; +#ifndef HARMONIZE_DoRTFTn const Word16 w_fft512_fx[256] = { // Q15 SHC( 0x7fff ), @@ -31703,6 +31709,7 @@ const Word16 w_fft512_fx[256] = { SHC( 0x7ffd ), }; +#endif const Word16 FFT_RotVector_960_fx[1860] = { // Q15 SHC( 0x7fff ), diff --git a/lib_com/scale_mem_fx.c b/lib_com/scale_mem_fx.c index 01c2442a8eb73da4686608a77743e259c61a995f..4f095703af811dd6c6c8fa2719b1e5b81bd8caf9 100644 --- a/lib_com/scale_mem_fx.c +++ b/lib_com/scale_mem_fx.c @@ -337,6 +337,37 @@ void scale_sig32_r( return; } +#ifdef OPTIMIZE_FFT_STACK +void scale_sig32_cmplx( + cmplx x[], /* i/o: signal to scale Qx */ + const Word16 lg, /* i : size of x[] Q0 */ + const Word16 exp0 /* i : exponent: x = round(x << exp) Qx exp */ +) +{ + Word16 i; + + FOR( i = 0; i < lg; i++ ) + { + /* saturation can occur here */ + x[i].re = L_shl( x[i].re, exp0 ); + move32(); + if ( 0 == exp0 ) + { + BREAK; + } + x[i].im = L_shl( x[i].im, exp0 ); + move32(); + if ( 0 == exp0 ) + { + BREAK; + } + } + + return; +} +#endif + + /*-------------------------------------------------------------------* * Rescale_mem: * diff --git a/lib_com/tcx_mdct_fx.c b/lib_com/tcx_mdct_fx.c index 4fd016729f51dde64c070c963f22a3d129a4e74f..d29a4ffede893df57886efed30e7b8ad05426d86 100644 --- a/lib_com/tcx_mdct_fx.c +++ b/lib_com/tcx_mdct_fx.c @@ -138,7 +138,11 @@ void TCX_MDCT( *y_e = sub( 15, *y_e ); move16(); +#ifdef HARMONIZE_DCT + edct_fx( y, y, l / 2 + m + r / 2, y_e, EVS_MONO ); +#else edct_fx( y, y, l / 2 + m + r / 2, y_e ); +#endif *y_e = sub( 15 - 1, *y_e ); move16(); return; @@ -220,7 +224,11 @@ void TCX_MDCT_Inverse( R2 = shr( r, 1 ); x_e = sub( 15, x_e ); +#ifdef HARMONIZE_DCT + edct_fx( x, tmp_buf + L2, add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &x_e, EVS_MONO ); +#else edct_fx( x, tmp_buf + L2, add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &x_e ); +#endif x_e = sub( 15, x_e ); fac = TCX_MDCT_Inverse_GetScaleFactor( add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &fac_e ); /* exp(fac_e) */ diff --git a/lib_com/tools_fx.c b/lib_com/tools_fx.c index fd0eceedb65d5b7218bed50924e9061984884c63..cf07a4e227e779755985ab5cda61b854e7b6035f 100644 --- a/lib_com/tools_fx.c +++ b/lib_com/tools_fx.c @@ -5378,6 +5378,35 @@ Word16 L_norm_arr( return q; } +#ifdef OPTIMIZE_FFT_STACK +Word16 L_norm_arr_cmplx( + const cmplx *arr, + Word16 size ) +{ + Word16 q = 31; + move16(); + + FOR( Word16 i = 0; i < size; i++ ) + { + Word16 q_tst; + + q_tst = norm_l( arr[i].re ); + if ( arr[i].re != 0 ) + { + q = s_min( q, q_tst ); + } + + q_tst = norm_l( arr[i].im ); + if ( arr[i].im != 0 ) + { + q = s_min( q, q_tst ); + } + } + + return q; +} +#endif + Word16 norm_arr( Word16 *arr, Word16 size ) diff --git a/lib_com/trans_direct_fx.c b/lib_com/trans_direct_fx.c index c84cd9efad4c495df32efee54db2a27c02a2faa0..fa0a27a9a43bf4deac452becb6bf57849418f095 100644 --- a/lib_com/trans_direct_fx.c +++ b/lib_com/trans_direct_fx.c @@ -103,7 +103,11 @@ void direct_transform_fx( Qs[0] = *Q; move16(); +#ifdef HARMONIZE_DCT + edct_fx( dctin32_fx, oseg_fx, segment_length2, &Qs[0], EVS_MONO ); +#else edct_fx( dctin32_fx, oseg_fx, segment_length2, &Qs[0] ); +#endif Qmin = s_min( Qs[0], Qmin ); iseg_fx = &in32_r16_fx[segment_length4]; @@ -136,7 +140,11 @@ void direct_transform_fx( Qs[seg] = *Q; move16(); +#ifdef HARMONIZE_DCT + edct_fx( dctin32_fx, oseg_fx, segment_length2, &Qs[seg], EVS_MONO ); +#else edct_fx( dctin32_fx, oseg_fx, segment_length2, &Qs[seg] ); +#endif Qmin = s_min( Qs[seg], Qmin ); iseg_fx += segment_length2; @@ -164,7 +172,11 @@ void direct_transform_fx( } Qs[NUM_TIME_SWITCHING_BLOCKS - 1] = *Q; move16(); +#ifdef HARMONIZE_DCT + edct_fx( dctin32_fx, oseg_fx, segment_length2, &Qs[NUM_TIME_SWITCHING_BLOCKS - 1], EVS_MONO ); +#else edct_fx( dctin32_fx, oseg_fx, segment_length2, &Qs[NUM_TIME_SWITCHING_BLOCKS - 1] ); +#endif Qmin = s_min( Qs[NUM_TIME_SWITCHING_BLOCKS - 1], Qmin ); *Q = Qmin; @@ -183,7 +195,11 @@ void direct_transform_fx( } ELSE { +#ifdef HARMONIZE_DCT + edct_fx( in32_fx, out32_fx, L, Q, EVS_MONO ); +#else edct_fx( in32_fx, out32_fx, L, Q ); +#endif } return; diff --git a/lib_com/trans_inv_fx.c b/lib_com/trans_inv_fx.c index 34d424f26e0bcdebff96c44bcc3584ff2f2f725c..32e188f2d87b8b2d14aa1b4e38bd30d47155c0d0 100644 --- a/lib_com/trans_inv_fx.c +++ b/lib_com/trans_inv_fx.c @@ -1122,6 +1122,10 @@ void Inverse_Transform( } ELSE { +#ifdef HARMONIZE_DCT + edct_fx( in_mdct, out, L, Q, EVS_MONO ); +#else edct_fx( in_mdct, out, L, Q ); +#endif } } diff --git a/lib_dec/FEC_HQ_phase_ecu_fx.c b/lib_dec/FEC_HQ_phase_ecu_fx.c index b760376539aadbad2b5af27c37639a595505aef2..1120a889befaa4d4fcecbd37a81dc49aa9058bd0 100644 --- a/lib_dec/FEC_HQ_phase_ecu_fx.c +++ b/lib_dec/FEC_HQ_phase_ecu_fx.c @@ -2556,7 +2556,11 @@ static void fec_ecu_dft_fx( *exp = s_min( *exp, 15 ); } +#ifdef OPTIMIZE_FFT_STACK + DoRTFTn_fx( Tfr32, Tfi32, NULL, *Nfft ); +#else DoRTFTn_fx( Tfr32, Tfi32, *Nfft ); +#endif N_LP = shr( *Nfft, 1 ); L_tmp = L_deposit_l( 0 ); diff --git a/lib_dec/FEC_fx.c b/lib_dec/FEC_fx.c index fe2780a8600c7dc9457102ef016ee65929d384a1..9674ab9c7a52191baf2822939be8c0716a021e1d 100644 --- a/lib_dec/FEC_fx.c +++ b/lib_dec/FEC_fx.c @@ -5,15 +5,20 @@ #include #include "options.h" /* Compilation switches */ #include "cnst.h" /* Common constants */ -#include "rom_com.h" /* Common static table prototypes */ +#include "rom_com.h" /* Common static table prototypes */ #include "rom_dec.h" /* Decoder static table prototypes */ #include "prot_fx.h" /* Function prototypes */ #include "basop_util.h" + + /*-------------------------------------------------------------------* * Local function prototypes *-------------------------------------------------------------------*/ + static void pulseRes_preCalc( Word16 *cond1, Word16 *cond2, Word32 *cond3, Word16 new_pit, Word16 Tc, Word16 L_frame ); void gain_dec_bfi_fx( Word16 *past_qua_en ); + + /*======================================================================*/ /* FUNCTION : FEC_exc_estim_fx() */ /*----------------------------------------------------------------------*/ @@ -47,7 +52,6 @@ void gain_dec_bfi_fx( Word16 *past_qua_en ); /* _ (Word16[]) voice_factors_fx : frame error rate Q15 */ /* _ (Word16[]) FEC_pitch_fx(tmp_tc): FEC pitch Q6 */ /*-----------------------------------------------------------------------*/ - /* _ (Word16) st_fx->lp_gainp_fx : FEC -low-pass filtered pitch gain Q14 */ /* _ (Word16) st_fx->seed :FEC-seed for random generator for excitation*/ /* _ (Word16) st_fx->bfi_pitch_fx : LP filter coefficient */ @@ -57,7 +61,6 @@ void gain_dec_bfi_fx( Word16 *past_qua_en ); /* _ None */ /*=======================================================================*/ - void FEC_exc_estim_fx( Decoder_State *st_fx, /* i/o: Decoder static memory */ const Word16 L_frame, /* i : length of the frame */ @@ -73,7 +76,6 @@ void FEC_exc_estim_fx( Word16 *tmp_noise /* o : long-term noise energy Q0 */ ) { - Word16 exc2_buf[L_FRAME16k + MODE1_L_FIR_FER - 1]; Word16 gainCNG, new_pit /*Q0*/; /* Q3*/ Word16 exp; @@ -152,7 +154,6 @@ void FEC_exc_estim_fx( move16(); } - pitch_pred_linear_fit( st_fx->nbLostCmpt, st_fx->last_good, @@ -170,13 +171,11 @@ void FEC_exc_estim_fx( new_pit /*Q0 int*/ = shl( round_fx( predPitchLag ), 0 ); } - /*-----------------------------------------------------------------* * estimate subframe pitch values for the FEC frame *-----------------------------------------------------------------*/ /* initialize pitch to the long-term pitch */ - *tmp_tc = st_fx->bfi_pitch_fx; move16(); /*Q6*/ IF( EQ_16( L_frame, L_FRAME ) ) @@ -473,7 +472,11 @@ void FEC_exc_estim_fx( move16(); /* Transform to frequency domain */ +#ifdef HARMONIZE_DCT + edct_16fx( exc, exc_dct_in, st_fx->L_frame, 5 ); +#else edct_16fx( exc, exc_dct_in, st_fx->L_frame, 5, st_fx->element_mode ); +#endif /* Reset unvaluable part of the adaptive (pitch) excitation contribution */ max_len = sub( st_fx->L_frame, Diff_len ); @@ -498,6 +501,7 @@ void FEC_exc_estim_fx( /*-----------------------------------------------------------------* * Replicate the last spectrum in case the last good frame was coded by GSC *-----------------------------------------------------------------*/ + test(); test(); test(); @@ -514,7 +518,11 @@ void FEC_exc_estim_fx( *tmp_noise = shr_r( st_fx->lp_gainc_fx, 3 ); /*Q0*/ move16(); /* Transform back to time domain */ +#ifdef HARMONIZE_DCT + edct_16fx( exc_dct_in, exc, st_fx->L_frame, 5 ); +#else edct_16fx( exc_dct_in, exc, st_fx->L_frame, 5, st_fx->element_mode ); +#endif } ELSE { @@ -739,12 +747,19 @@ void FEC_exc_estim_fx( move16(); st_fx->bfi_pitch_frame = st_fx->L_frame; move16(); + return; } /*calculates some conditions for Pulse resynchronization to take place*/ -static void pulseRes_preCalc( Word16 *cond1, Word16 *cond2, Word32 *cond3, Word16 new_pit, Word16 Tc, Word16 L_frame ) +static void pulseRes_preCalc( + Word16 *cond1, + Word16 *cond2, + Word32 *cond3, + Word16 new_pit, + Word16 Tc, + Word16 L_frame ) { Word16 tmp_pit, tmp_pit_e, tmp_frame, tmp_frame_e; Word32 tmp_pit2; @@ -773,8 +788,11 @@ static void pulseRes_preCalc( Word16 *cond1, Word16 *cond2, Word32 *cond3, Word1 BASOP_SATURATE_WARNING_ON_EVS *cond3 = L_sub( L_mult0( -1, tmp_pit ), tmp_pit2 ); move32(); + + return; } + /*-------------------------------------------------------------------* * gain_dec_bfi() * diff --git a/lib_dec/LD_music_post_filter_fx.c b/lib_dec/LD_music_post_filter_fx.c index fc3a94a77f54dc0d326ea7198ece000da7f1a8c8..989a47e8da2dcb590071d8468291060ca850e210 100644 --- a/lib_dec/LD_music_post_filter_fx.c +++ b/lib_dec/LD_music_post_filter_fx.c @@ -877,7 +877,11 @@ void Prep_music_postP_fx( * EDCT and back to 16 bits *------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( exc16, dct_buffer_out, DCT_L_POST, 6 ); +#else edct_16fx( exc16, dct_buffer_out, DCT_L_POST, 6, EVS_MONO ); +#endif *qdct = Q_exc; move16(); @@ -957,7 +961,11 @@ void Post_music_postP_fx( * Go back to time domain *------------------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( dct_buffer_in, exc16, DCT_L_POST, 6 ); +#else edct_16fx( dct_buffer_in, exc16, DCT_L_POST, 6, EVS_MONO ); +#endif Copy( exc16 + OFFSET2, exc2, L_FRAME ); diff --git a/lib_dec/core_switching_dec_fx.c b/lib_dec/core_switching_dec_fx.c index b4f34830d36dbdb6787ee69da6e1c63fa64f74cd..a0db01d31658acc6cc1b1449483163e460961930 100644 --- a/lib_dec/core_switching_dec_fx.c +++ b/lib_dec/core_switching_dec_fx.c @@ -111,7 +111,11 @@ void bw_switching_pre_proc_fx( * Calculate frequency energy of 0~3.2kHz and 3.2~6.4kHz the ACELP core synthesis *-------------------------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( old_syn_12k8_16k_fx, syn_dct_fx, L_FRAME, 6 ); +#else edct_16fx( old_syn_12k8_16k_fx, syn_dct_fx, L_FRAME, 6, st_fx->element_mode ); +#endif L_tmp = L_deposit_l( 0 ); FOR( i = 0; i < L_FRAME / 2; i++ ) diff --git a/lib_dec/dec_tcx_fx.c b/lib_dec/dec_tcx_fx.c index 65ad45e3230401be45af293a06986135108bb0de..f8cd015ea741030a2d82702c3c871251a28dbb28 100644 --- a/lib_dec/dec_tcx_fx.c +++ b/lib_dec/dec_tcx_fx.c @@ -2181,7 +2181,11 @@ void IMDCT_fx( Word32 *x, Word16 x_e, Word16 *old_syn_overl, Word16 *syn_Overl_T /* DCT */ Q = sub( 31, x_e ); +#ifdef HARMONIZE_DCT + edct_fx( x, tmp_buf, L_frame, &Q, EVS_MONO ); +#else edct_fx( x, tmp_buf, L_frame, &Q ); +#endif /* scale by sqrt(L / NORM_MDCT_FACTOR) */ tmp1 = mult_r( shl( L_frame, 4 ), 26214 /*128.f / NORM_MDCT_FACTOR Q15*/ ); /* 4Q11 */ @@ -2713,7 +2717,11 @@ static void TCX_MDCT_Inverse_qwin_fx( R2 = shr( r, 1 ); x_e = sub( 15, x_e ); +#ifdef HARMONIZE_DCT + edct_fx( x, tmp_buf + L2, add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &x_e, EVS_MONO ); +#else edct_fx( x, tmp_buf + L2, add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &x_e ); +#endif x_e = sub( 15, x_e ); fac = TCX_MDCT_Inverse_GetScaleFactor( add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &fac_e ); /* exp(fac_e) */ @@ -3305,7 +3313,11 @@ void IMDCT_ivas_fx( } ELSE { +#ifdef HARMONIZE_DCT + edct_fx( x_fx, xn_buf_fx_32 + add( shr( overlap, 1 ), nz ), L_frame, &q_xn_buf_fx_32, IVAS_SCE /* just cannot be EVS_MONO */ ); +#else edct_ivas_fx( x_fx, xn_buf_fx_32 + add( shr( overlap, 1 ), nz ), L_frame, &q_xn_buf_fx_32 ); +#endif Word16 res_m, res_e; res_e = 0; move16(); diff --git a/lib_dec/gs_dec_amr_wb_fx.c b/lib_dec/gs_dec_amr_wb_fx.c index 326c2f91976fd0087b8b6bdc60edd852091dd4d1..2670ebfef8896ac43888822603e0d906c9d6a14d 100644 --- a/lib_dec/gs_dec_amr_wb_fx.c +++ b/lib_dec/gs_dec_amr_wb_fx.c @@ -450,10 +450,21 @@ void improv_amr_wb_gs_fx( * Do the excitation modification according to the content * Go back to time domain -> Overwrite exctiation *------------------------------------------------------------*/ + +#ifdef HARMONIZE_DCT + edct_16fx( exc2_fx, dct_exc_in_fx, L_FRAME, 6 ); +#else edct_16fx( exc2_fx, dct_exc_in_fx, L_FRAME, 6, EVS_MONO ); +#endif + gs_dec_amr_wb_fx( core_brate, seed_tcx, dct_exc_in_fx, Q_exc2, dct_exc_out_fx, Q_exc2, pitch_buf_fx, lt_voice_fac_fx, clas, coder_type ); +#ifdef HARMONIZE_DCT + edct_16fx( dct_exc_out_fx, exc2_fx, L_FRAME, 6 ); +#else edct_16fx( dct_exc_out_fx, exc2_fx, L_FRAME, 6, EVS_MONO ); +#endif + /*------------------------------------------------------------* * Redo core synthesis at 12k8 Hz with the modified excitation *------------------------------------------------------------*/ diff --git a/lib_dec/gs_dec_fx.c b/lib_dec/gs_dec_fx.c index f50533b4b5c7f3bf7208cd0c9b1e67fe19ad3354..a24bb88dcc0b332fa826902788ab63bcc877ae12 100644 --- a/lib_dec/gs_dec_fx.c +++ b/lib_dec/gs_dec_fx.c @@ -358,7 +358,11 @@ void decod_audio_fx( * DCT transform *---------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( exc, dct_epit, st_fx->L_frame, 7 ); +#else edct_16fx( exc, dct_epit, st_fx->L_frame, 7, st_fx->element_mode ); +#endif /*---------------------------------------------------------------* * Reset unvaluable part of the adaptive (pitch) excitation contribution @@ -497,8 +501,13 @@ void decod_audio_fx( * iDCT transform *--------------------------------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( dct_epit, exc, st_fx->L_frame, 7 ); + edct_16fx( exc_wo_nf, exc_wo_nf, st_fx->L_frame, 7 ); +#else edct_16fx( dct_epit, exc, st_fx->L_frame, 7, st_fx->element_mode ); edct_16fx( exc_wo_nf, exc_wo_nf, st_fx->L_frame, 7, st_fx->element_mode ); +#endif /*----------------------------------------------------------------------* * Remove potential pre-echo in case an onset has been detected diff --git a/lib_dec/hf_synth_fx.c b/lib_dec/hf_synth_fx.c index 9c4807835e765b727e4b973535a819da59fd409d..32fcb3311c7c63a1adf9fc45d2ad4a56fbfb2975 100644 --- a/lib_dec/hf_synth_fx.c +++ b/lib_dec/hf_synth_fx.c @@ -716,7 +716,11 @@ void hf_synth_amr_wb_fx( Copy_Scale_sig_16_32_DEPREC( exc, exc32, L_FRAME, qdct ); /* Qexc + qdct */ qdct = add( qdct, Q_exc ); +#ifdef HARMONIZE_DCT + edct_fx( exc32, dct_exc32, L_FRAME, &qdct, EVS_MONO ); +#else edct_fx( exc32, dct_exc32, L_FRAME, &qdct ); +#endif q_tmp = Exp32Array( L_FRAME, dct_exc32 ); q_tmp = sub( q_tmp, 16 ); @@ -1006,7 +1010,11 @@ void hf_synth_amr_wb_fx( qhf = sub( q_tmp, 1 ); Copy_Scale_sig_16_32_DEPREC( dct_hb, dct_hb32, L_FRAME16k, qhf ); /* qhf + qdct */ qhf = add( qhf, qdct ); +#ifdef HARMONIZE_DCT + edct_fx( dct_hb32, exc16k32, L_FRAME16k, &qhf, EVS_MONO ); +#else edct_fx( dct_hb32, exc16k32, L_FRAME16k, &qhf ); +#endif q_tmp = Exp32Array( L_FRAME16k, exc16k32 ); q_tmp = sub( q_tmp, 16 ); Copy_Scale_sig_32_16( exc16k32, exc16k, L_FRAME16k, q_tmp ); /* qhf + qtmp */ diff --git a/lib_dec/ivas_td_low_rate_dec_fx.c b/lib_dec/ivas_td_low_rate_dec_fx.c index 916329e66cc4f890b44b393331d3d02465993943..200443dbc7a0a8363d517462b787005caa19c21b 100644 --- a/lib_dec/ivas_td_low_rate_dec_fx.c +++ b/lib_dec/ivas_td_low_rate_dec_fx.c @@ -134,9 +134,14 @@ void tdm_low_rate_dec_fx( * iDCT transform *--------------------------------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( dct_epit, exc, L_FRAME, find_guarded_bits_fx( L_FRAME ) ); + edct_16fx( exc_wo_nf_fx, exc_wo_nf_fx, L_FRAME, find_guarded_bits_fx( L_FRAME ) ); +#else edct_16fx( dct_epit, exc, L_FRAME, find_guarded_bits_fx( L_FRAME ), IVAS_CPE_TD ); edct_16fx( exc_wo_nf_fx, exc_wo_nf_fx, L_FRAME, find_guarded_bits_fx( L_FRAME ), IVAS_CPE_TD ); +#endif IF( bwe_exc != NULL ) { diff --git a/lib_enc/bw_detect_fx.c b/lib_enc/bw_detect_fx.c index 2e04b986ff3813d6ba305070632b388d41938a4b..200ff9098b699c165999646dc08c7b3ad0d65ec6 100644 --- a/lib_enc/bw_detect_fx.c +++ b/lib_enc/bw_detect_fx.c @@ -308,7 +308,11 @@ void bw_detect_fx( in_win32[i] = L_mult( *pt++, *pt1-- ); move32(); } +#ifdef HARMONIZE_DCT + edct_fx( in_win32, spect32, BWD_TOTAL_WIDTH, &Q_dct, EVS_MONO ); +#else edct_fx( in_win32, spect32, BWD_TOTAL_WIDTH, &Q_dct /*,st->element_mode*/ ); +#endif FOR( i = 0; i < BWD_TOTAL_WIDTH; i++ ) { diff --git a/lib_enc/cod_tcx_fx.c b/lib_enc/cod_tcx_fx.c index 7fae11949351474a433d43eab4d4fb491affb158..e56c306718401da940e1c354ebaf75e059fd72ba 100644 --- a/lib_enc/cod_tcx_fx.c +++ b/lib_enc/cod_tcx_fx.c @@ -2490,7 +2490,11 @@ void QuantizeSpectrum_fx( /* DCT */ Q = sub( 31, *spectrum_e ); +#ifdef HARMONIZE_DCT + edct_fx( spectrum, tmp_buf, L_frame, &Q, EVS_MONO ); +#else edct_fx( spectrum, tmp_buf, L_frame, &Q ); +#endif /* scale by sqrt(L / NORM_MDCT_FACTOR) */ tmp1 = mult_r( shl( L_frame, 4 ), 26214 /*128.f / NORM_MDCT_FACTOR Q15*/ ); /* 4Q11 */ @@ -3722,7 +3726,11 @@ void coder_tcx_fx( Q = sub( Q, tmp2 ); /* DCT */ +#ifdef HARMONIZE_DCT + edct_fx( tmp_buf, spectrum, L_frame, &Q, EVS_MONO ); +#else edct_fx( tmp_buf, spectrum, L_frame, &Q ); +#endif *spectrum_e = sub( 31, Q ); move16(); } @@ -4404,7 +4412,11 @@ void InternalTCXDecoder_fx( /* DCT */ Q = sub( 31, *spectrum_e ); +#ifdef HARMONIZE_DCT + edct_fx( spectrum_fx, tmp_buf, L_frame, &Q, IVAS_SCE /* just cannot be EVS_MONO */ ); +#else edct_ivas_fx( spectrum_fx, tmp_buf, L_frame, &Q ); +#endif /* scale by sqrt(L / NORM_MDCT_FACTOR) */ tmp1 = mult_r( shl( L_frame, 4 ), 26214 /*128.f / NORM_MDCT_FACTOR Q15*/ ); /* 4Q11 */ diff --git a/lib_enc/ext_sig_ana_fx.c b/lib_enc/ext_sig_ana_fx.c index f5b91249ff996820a63ea5fb9ce49eb484f9d713..e34cf4f996161c18755c977b09c4bcfe121e5c4e 100644 --- a/lib_enc/ext_sig_ana_fx.c +++ b/lib_enc/ext_sig_ana_fx.c @@ -372,7 +372,11 @@ void core_signal_analysis_high_bitrate_fx( Q = sub( Q, tmp2 ); /* DCT */ +#ifdef HARMONIZE_DCT + edct_fx( tmp_buf, spectrum[frameno], L_subframe, &Q, EVS_MONO ); +#else edct_fx( tmp_buf, spectrum[frameno], L_subframe, &Q ); +#endif *spectrum_e = sub( 31, Q ); } ELSE @@ -945,7 +949,11 @@ void core_signal_analysis_high_bitrate_ivas_fx( Word16 Q; Q = q_out_wtda; +#ifdef HARMONIZE_DCT + edct_fx( tcx20Win_32, hTcxEnc->spectrum_fx[frameno], L_subframe, &Q, st->element_mode ); +#else edct_ivas_fx( tcx20Win_32, hTcxEnc->spectrum_fx[frameno], L_subframe, &Q ); +#endif hTcxEnc->spectrum_e[frameno] = sub( 31, Q ); move16(); diff --git a/lib_enc/gs_enc_fx.c b/lib_enc/gs_enc_fx.c index 6c7a8b6c07422bf44396daf75a3c55847c36d3ff..7d2ac0208bada96f5a2886210cd10ab9d8b448fb 100644 --- a/lib_enc/gs_enc_fx.c +++ b/lib_enc/gs_enc_fx.c @@ -255,8 +255,13 @@ void encod_audio_fx( * DCT transform *---------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( exc, dct_epit, st_fx->L_frame, 7 ); + edct_16fx( res, dct_res, st_fx->L_frame, 7 ); +#else edct_16fx( exc, dct_epit, st_fx->L_frame, 7, st_fx->element_mode ); edct_16fx( res, dct_res, st_fx->L_frame, 7, st_fx->element_mode ); +#endif /*---------------------------------------------------------------* * Calculate energy dynamics @@ -372,8 +377,13 @@ void encod_audio_fx( * iDCT transform *--------------------------------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( dct_epit, exc, st_fx->L_frame, 7 ); + edct_16fx( exc_wo_nf, exc_wo_nf, st_fx->L_frame, 7 ); +#else edct_16fx( dct_epit, exc, st_fx->L_frame, 7, st_fx->element_mode ); edct_16fx( exc_wo_nf, exc_wo_nf, st_fx->L_frame, 7, st_fx->element_mode ); +#endif IF( NE_16( st_fx->element_mode, EVS_MONO ) ) { diff --git a/lib_enc/ivas_mdct_core_enc_fx.c b/lib_enc/ivas_mdct_core_enc_fx.c index 438c646dd094c0201e151e4d76b63973940b4ce9..cacd8a06edf0738fe06c3d290c3abc0f70035057 100644 --- a/lib_enc/ivas_mdct_core_enc_fx.c +++ b/lib_enc/ivas_mdct_core_enc_fx.c @@ -1109,6 +1109,55 @@ void enc_prm_igf_mdct( return; } +#ifdef OPTIMIZE_FFT_STACK +/*-------------------------------------------------------------------* + * compute_power_spec() + * + * + *-------------------------------------------------------------------*/ + +static void compute_power_spec( + TCX_ENC_HANDLE hTcxEnc, + Word32 *mdst_spectrum_fx[NB_DIV], + Word32 powerSpec_fx[N_MAX], + Word16 *q_pow, + const Word16 n, + const Word16 L_subframeTCX ) +{ + Word16 i; + Word64 powerSpec_fx64[N_MAX]; + + IF( hTcxEnc->fUseTns[n] ) + { + FOR( i = 0; i < L_subframeTCX; i++ ) + { + powerSpec_fx64[i] = W_mult_32_32( hTcxEnc->spectrum_fx[n][i], hTcxEnc->spectrum_fx[n][i] ); + move64(); + } + *q_pow = W_norm_arr( powerSpec_fx64, L_subframeTCX ); + } + ELSE + { + FOR( i = 0; i < L_subframeTCX; i++ ) + { + powerSpec_fx64[i] = W_mac_32_32( W_mult_32_32( mdst_spectrum_fx[n][i], mdst_spectrum_fx[n][i] ), hTcxEnc->spectrum_fx[n][i], hTcxEnc->spectrum_fx[n][i] ); + move64(); + } + *q_pow = W_norm_arr( powerSpec_fx64, L_subframeTCX ); + } + + FOR( i = 0; i < L_subframeTCX; i++ ) + { + powerSpec_fx64[i] = W_shl( powerSpec_fx64[i], *q_pow ); + move64(); + powerSpec_fx[i] = W_extract_h( powerSpec_fx64[i] ); + move32(); + } + + return; +} +#endif + /*-------------------------------------------------------------------* * ivas_mdct_core_whitening_enc() * @@ -1144,7 +1193,9 @@ void ivas_mdct_core_whitening_enc_fx( Word32 temp_buffer[15 * L_FRAME48k / 8]; Word32 *windowedSignal_fx[CPE_CHANNELS]; Word32 *powerSpec_fx = orig_spectrum_long[0]; +#ifndef OPTIMIZE_FFT_STACK Word64 powerSpec_fx64[N_MAX]; +#endif Word16 nrg_fx; /* Q15 */ Encoder_State *st, **sts; Word32 scf_fx[CPE_CHANNELS][NB_DIV][M]; @@ -1912,6 +1963,9 @@ void ivas_mdct_core_whitening_enc_fx( move16(); FOR( n = 0; n < nSubframes; n++ ) { +#ifdef OPTIMIZE_FFT_STACK + compute_power_spec( st->hTcxEnc, mdst_spectrum_fx[ch], powerSpec_fx, &q_pow, n, L_subframeTCX ); +#else IF( st->hTcxEnc->fUseTns[n] ) { FOR( i = 0; i < L_subframeTCX; i++ ) @@ -1938,6 +1992,7 @@ void ivas_mdct_core_whitening_enc_fx( powerSpec_fx[i] = W_extract_h( powerSpec_fx64[i] ); move32(); } +#endif IF( mct_on ) { FOR( i = 0; i < L_subframeTCX; i++ ) diff --git a/lib_enc/ivas_td_low_rate_enc_fx.c b/lib_enc/ivas_td_low_rate_enc_fx.c index 8ca5a4a5029a9986997db3831e7a3f96777e4464..17ca57065537275e0fd4ce110501a4e89a23466c 100644 --- a/lib_enc/ivas_td_low_rate_enc_fx.c +++ b/lib_enc/ivas_td_low_rate_enc_fx.c @@ -94,7 +94,11 @@ void tdm_low_rate_enc_fx( * DCT transform of the residual and create a subsample residual *---------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( res, dct_res_fx, L_FRAME, 7 ); +#else edct_16fx( res, dct_res_fx, L_FRAME, 7, st->element_mode ); +#endif /*--------------------------------------------------------------------------------------* * GSC encoder @@ -119,9 +123,14 @@ void tdm_low_rate_enc_fx( * iDCT transform *--------------------------------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( dct_epit_fx, exc_fx, L_FRAME, 7 ); + edct_16fx( exc_wo_nf_fx, exc_wo_nf_fx, L_FRAME, 7 ); +#else edct_16fx( dct_epit_fx, exc_fx, L_FRAME, 7, st->element_mode ); edct_16fx( exc_wo_nf_fx, exc_wo_nf_fx, L_FRAME, 7, st->element_mode ); +#endif /*--------------------------------------------------------------------------------------* * Remove potential pre-echo in case an onset has been detected diff --git a/lib_rend/ivas_reverb_fft_filter_fx.c b/lib_rend/ivas_reverb_fft_filter_fx.c index dcf13c15345f125a0ee3f2fba9f8ca7f4c187e80..c8bd561dd47c4eaab653e2e9c1b02b470b7d2e2c 100644 --- a/lib_rend/ivas_reverb_fft_filter_fx.c +++ b/lib_rend/ivas_reverb_fft_filter_fx.c @@ -100,7 +100,12 @@ static void fft_wrapper_2ch_fx( Word16 k, mirror_k; Word32 left_re_fx, left_im_fx, right_re_fx, right_im_fx; +#ifdef HARMONIZE_DoRTFTn + DoRTFTn_fx( buffer_L_fx, buffer_R_fx, NULL, fft_size ); +#else DoRTFTn_fx_ivas( buffer_L_fx, buffer_R_fx, fft_size ); +#endif + /* separating left and right channel spectra */ buffer_L_fx[0] = L_shl( buffer_L_fx[0], 1 ); // Qx + 1 move32(); @@ -167,10 +172,16 @@ static void ifft_wrapper_2ch_fx( move32(); } +#ifdef HARMONIZE_DoRTFTn + DoRTFTn_fx( buffer_L, buffer_R, NULL, fft_size ); +#else DoRTFTn_fx_ivas( buffer_L, buffer_R, fft_size ); +#endif return; } + + /*-----------------------------------------------------------------------------------------* * Function ivas_reverb_t2f_f2t_init() * diff --git a/lib_rend/ivas_reverb_filter_design_fx.c b/lib_rend/ivas_reverb_filter_design_fx.c index 6d23b0053af7223a7c745b64214f868deb3164b5..c783d3e786ad0d315a95ad3650fc7be2e1bbaae9 100644 --- a/lib_rend/ivas_reverb_filter_design_fx.c +++ b/lib_rend/ivas_reverb_filter_design_fx.c @@ -206,7 +206,11 @@ static void calc_min_phase_fx( /* Convert back and isolate the phase. */ IF( LE_16( fft_size, 512 ) ) /* for size <= 512 using complex-value FFT (more effecient, but available only up to 512 size) */ { +#ifdef HARMONIZE_DoRTFTn + DoRTFTn_fx( pFolded_cepstrum_re, pFolded_cepstrum_im, NULL, fft_size ); +#else DoRTFTn_fx_ivas( pFolded_cepstrum_re, pFolded_cepstrum_im, fft_size ); +#endif /* Copying the img part into the output */ FOR( idx = 1; idx < half_fft_size; idx++ )