diff --git a/lib_com/edct_fx.c b/lib_com/edct_fx.c index 55bc483e0fcc2558eeea1fb5a958b54318aefda6..c6925bf50d71258e440b6ea288888216a18d3c34 100644 --- a/lib_com/edct_fx.c +++ b/lib_com/edct_fx.c @@ -11,7 +11,9 @@ #include "stl.h" #include "math_32.h" -static Word16 get_edxt_factor( Word16 length ) /* Returns value of sqrtf(2.f/length) in Q15 */ + +static Word16 get_edxt_factor( + const Word16 length ) /* Returns value of sqrtf(2.f/length) in Q15 */ { Word16 factor; /*Q15*/ factor = 0; @@ -53,8 +55,12 @@ static Word16 get_edxt_factor( Word16 length ) /* Returns value of sqrtf(2.f/len } ELSE IF( EQ_16( length, 40 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else factor = 7327; /*0.223 in Q15*/ move16(); +#endif } ELSE IF( EQ_16( length, 960 ) ) { @@ -73,33 +79,57 @@ static Word16 get_edxt_factor( Word16 length ) /* Returns value of sqrtf(2.f/len } ELSE IF( EQ_16( length, 120 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else factor = 4230; /*0.1290 in Q15*/ move16(); +#endif } ELSE IF( EQ_16( length, 1200 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else factor = 1338; /*0.040 in Q15*/ move16(); +#endif } ELSE IF( EQ_16( length, 800 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else factor = 1638; /*0.05 in Q15*/ move16(); +#endif } ELSE IF( EQ_16( length, 400 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else factor = 2317; /*0.070 in Q15*/ move16(); +#endif } ELSE IF( EQ_16( length, 200 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else factor = 3277; /*0.1 in Q15*/ move16(); +#endif } + return factor; /*Q15*/ } -static Word16 const *get_edct_table( Word16 length /*Q0*/, Word16 *q ) + +static Word16 const *get_edct_table( + const Word16 length /*Q0*/, + Word16 *q ) { Word16 const *edct_table; edct_table = NULL; @@ -163,7 +193,7 @@ static Word16 const *get_edct_table( Word16 length /*Q0*/, Word16 *q ) /*-------------------------------------------------------------------------* * FUNCTION : edct_fx() * - * PURPOSE : DCT transform + * PURPOSE : DCT transform, 32-bit version * * INPUT ARGUMENTS : * _ (Word16) length : length @@ -177,14 +207,23 @@ void edct_fx( const Word32 *x, /* i : input signal Qq */ Word32 *y, /* o : output transform Qq */ Word16 length, /* i : length Q0*/ - Word16 *q /* i : Q value of input signal */ +#ifdef HARMONIZE_DCT + Word16 *q, /* i : Q value of input signal */ + const Word16 element_mode /* i : element mode */ +#else + Word16 *q /* i : Q value of input signal */ +#endif ) { Word16 i; Word32 re; Word32 im; const Word16 *edct_table; /*Q16 */ +#ifdef OPTIMIZE_FFT_STACK + cmplx spec[L_FRAME_PLUS / 2]; +#else Word32 complex_buf[2 * ( L_FRAME48k / 2 + 240 )]; +#endif Word32 L_tmp; Word16 tmp; Word16 len1; @@ -194,26 +233,64 @@ void edct_fx( /* Twiddling and Pre-rotate */ FOR( i = 0; i < len1; i++ ) { - L_tmp = Mult_32_16( x[2 * i], edct_table[i] ); /*Q(q+1) */ + L_tmp = Mult_32_16( x[2 * i], edct_table[i] ); /*Q(q+1) */ +#ifdef OPTIMIZE_FFT_STACK + spec[i].re = Madd_32_16( L_tmp, x[( length - ( 1 + ( i * 2 ) ) )], edct_table[( len1 - ( 1 + i ) )] ); /*Q(q+1) */ +#else complex_buf[2 * i] = Madd_32_16( L_tmp, x[( length - ( 1 + ( i * 2 ) ) )], edct_table[( len1 - ( 1 + i ) )] ); /*Q(q+1) */ +#endif move32(); L_tmp = Mult_32_16( x[( length - ( 1 + ( i * 2 ) ) )], edct_table[i] ); /*Q(q+1) */ - +#ifdef OPTIMIZE_FFT_STACK + spec[i].im = Msub_32_16( L_tmp, x[( i * 2 )], edct_table[( len1 - ( 1 + i ) )] ); /*Q(q+1) */ +#else complex_buf[( ( i * 2 ) + 1 )] = Msub_32_16( L_tmp, x[( i * 2 )], edct_table[( len1 - ( 1 + i ) )] ); /*Q(q+1) */ +#endif move32(); } - *q = sub( 15, *q ); - move16(); +#ifdef HARMONIZE_DCT + IF( element_mode == EVS_MONO ) + { +#endif + *q = sub( 15, *q ); + move16(); +#ifdef OPTIMIZE_FFT_STACK + BASOP_cfft( spec, len1, q, y ); +#else BASOP_cfft( (cmplx *) complex_buf, len1, q, y ); +#endif + + tmp = div_s( 1, length ); /*Q15 */ + tmp = round_fx( L_shl( L_mult( tmp, 19302 /*0.75f * EVS_PI in Q13*/ ), 2 ) ); /*Q15 */ +#ifdef HARMONIZE_DCT + } + ELSE + { + *q = sub( 31, *q ); + move16(); + tmp = sub( getScaleFactor32( (Word32 *) spec, shl( len1, 1 ) ), find_guarded_bits_fx( len1 ) ); + scale_sig32_cmplx( spec, len1, tmp ); + + fft_fx( spec, len1 ); + *q = sub( *q, tmp ); + move16(); + + tmp = div_s( 4, length ); /*Q17 */ + tmp = round_fx( L_mult( tmp, 19302 /*0.75f * EVS_PI in Q13*/ ) ); /*Q15 */ + } +#endif - tmp = div_s( 1, length ); /*Q15 */ - tmp = round_fx( L_shl( L_mult( tmp, 19302 /*0.75f * EVS_PI in Q13*/ ), 2 ) ); /*Q15 */ FOR( i = 0; i < len1; i++ ) { - re = Msub_32_16( complex_buf[2 * i], complex_buf[( ( i * 2 ) + 1 )], tmp ); /*Q(q+1) */ - im = Madd_32_16( complex_buf[( ( i * 2 ) + 1 )], complex_buf[2 * i], tmp ); /*Q(q+1) */ +#ifdef OPTIMIZE_FFT_STACK + re = Msub_32_16( spec[i].re, spec[i].im, tmp ); /*Q(q+1) */ + im = Madd_32_16( spec[i].im, spec[i].re, tmp ); /*Q(q+1) */ +#else + re = Msub_32_16( complex_buf[2 * i], complex_buf[( ( i * 2 ) + 1 )], tmp ); /*Q(q+1) */ + im = Madd_32_16( complex_buf[( ( i * 2 ) + 1 )], complex_buf[2 * i], tmp ); /*Q(q+1) */ +#endif y[2 * i] = L_add( Mult_32_16( re, edct_table[i] ), Mult_32_16( im, edct_table[( len1 - ( 1 + i ) )] ) ); /*Q(q+2)*/ move32(); y[( length - ( 1 + ( i * 2 ) ) )] = L_sub( Mult_32_16( re, edct_table[( len1 - ( 1 + i ) )] ), Mult_32_16( im, edct_table[i] ) ); /*Q(q+2)*/ @@ -222,9 +299,18 @@ void edct_fx( *q = sub( 15 + 2, *q ); move16(); +#ifdef HARMONIZE_DCT + IF( element_mode != EVS_MONO ) + { + *q = add( *q, Q16 ); + move16(); + } +#endif + return; } +#ifndef HARMONIZE_DCT void edct_ivas_fx( const Word32 *x, /* i : input signal Qq */ Word32 *y, /* o : output transform Qq */ @@ -284,8 +370,11 @@ void edct_ivas_fx( *q = sub( 31 + 2, *q ); move16(); + return; } +#endif + /*-------------------------------------------------------------------------* * FUNCTION : edst_fx() * @@ -310,7 +399,11 @@ void edst_fx( Word32 re; Word32 im; const Word16 *edct_table; /*Q16 */ +#ifdef OPTIMIZE_FFT_STACK + cmplx complex_buf[L_FRAME_PLUS / 2]; +#else Word32 complex_buf[2 * ( L_FRAME48k / 2 + 240 )]; +#endif Word32 L_tmp; Word16 tmp; Word16 len1; @@ -320,25 +413,42 @@ void edst_fx( /* Twiddling and Pre-rotate */ FOR( i = 0; i < len1; i++ ) { - L_tmp = Mult_32_16( x[( length - ( 1 + ( i * 2 ) ) )], edct_table[i] ); /*Qq+1*/ + L_tmp = Mult_32_16( x[( length - ( 1 + ( i * 2 ) ) )], edct_table[i] ); /*Qq+1*/ +#ifdef OPTIMIZE_FFT_STACK + complex_buf[i].re = Madd_32_16( L_tmp, x[2 * i], edct_table[( len1 - ( 1 + i ) )] ); /*Qq+1*/ +#else complex_buf[2 * i] = Madd_32_16( L_tmp, x[2 * i], edct_table[( len1 - ( 1 + i ) )] ); /*Qq+1*/ +#endif move32(); - L_tmp = Mult_32_16( x[2 * i], edct_table[i] ); /*Qq+1*/ + L_tmp = Mult_32_16( x[2 * i], edct_table[i] ); /*Qq+1*/ +#ifdef OPTIMIZE_FFT_STACK + complex_buf[i].im = Msub_32_16( L_tmp, x[( length - ( 1 + ( i * 2 ) ) )], edct_table[( len1 - ( 1 + i ) )] ); /*Qq+1*/ +#else complex_buf[( ( i * 2 ) + 1 )] = Msub_32_16( L_tmp, x[( length - ( 1 + ( i * 2 ) ) )], edct_table[( len1 - ( 1 + i ) )] ); /*Qq+1*/ +#endif move32(); } *q = sub( 15, *q ); move16(); +#ifdef OPTIMIZE_FFT_STACK + BASOP_cfft( complex_buf, len1, q, y ); +#else BASOP_cfft( (cmplx *) complex_buf, len1, q, y ); +#endif tmp = div_s( 1, length ); /*Q15 */ tmp = round_fx( L_shl( L_mult( tmp, 19302 /*0.75f * EVS_PI in Q13*/ ), 2 ) ); /*Q15 */ FOR( i = 0; i < len1; i++ ) { - re = Msub_32_16( complex_buf[2 * i], complex_buf[( ( i * 2 ) + 1 )], tmp ); /*Qq+1*/ - im = Madd_32_16( complex_buf[( ( i * 2 ) + 1 )], complex_buf[2 * i], tmp ); /*Qq+1*/ +#ifdef OPTIMIZE_FFT_STACK + re = Msub_32_16( complex_buf[i].re, complex_buf[i].im, tmp ); /*Qq+1*/ + im = Madd_32_16( complex_buf[i].im, complex_buf[i].re, tmp ); /*Qq+1*/ +#else + re = Msub_32_16( complex_buf[2 * i], complex_buf[( ( i * 2 ) + 1 )], tmp ); /*Qq+1*/ + im = Madd_32_16( complex_buf[( ( i * 2 ) + 1 )], complex_buf[2 * i], tmp ); /*Qq+1*/ +#endif y[2 * i] = L_add( Mult_32_16( re, edct_table[i] ), Mult_32_16( im, edct_table[( len1 - ( 1 + i ) )] ) ); /*Qq+2*/ move32(); y[( length - ( 1 + ( i * 2 ) ) )] = L_sub( Mult_32_16( im, edct_table[i] ), Mult_32_16( re, edct_table[( len1 - ( 1 + i ) )] ) ); /*Qq+2*/ @@ -350,10 +460,12 @@ void edst_fx( return; } + + /*========================================================================*/ -/* FUNCTION : edct_fx() */ +/* FUNCTION : edct_16fx() */ /*------------------------------------------------------------------------*/ -/* PURPOSE : DCT transform */ +/* PURPOSE : DCT transform, 32-bit version */ /*------------------------------------------------------------------------*/ /* INPUT ARGUMENTS : */ /* _ (Word16) length : length */ @@ -365,7 +477,6 @@ void edst_fx( /* OUTPUT ARGUMENTS : */ /* _ (Word16[]) y : output transform Qx */ /*------------------------------------------------------------------------*/ - /*------------------------------------------------------------------------*/ /* RETURN ARGUMENTS : */ /* _ None */ @@ -374,26 +485,35 @@ void edct_16fx( const Word16 *x, /* i : input signal Qx */ Word16 *y, /* o : output transform Qx */ Word16 length, /* i : length */ - Word16 bh, /* bit-headroom */ + Word16 bh /* bit-headroom */ +#ifndef HARMONIZE_DCT + , const Word16 element_mode - +#endif ) { Word16 i; Word16 re[L_FRAME48k / 2]; Word16 im[L_FRAME48k / 2]; const Word16 *edct_table = NULL; +#ifndef OPTIMIZE_FFT_STACK Word16 re2[L_FRAME48k / 2]; Word16 im2[L_FRAME48k / 2]; - +#endif Word32 L_tmp, Lacc, Lmax; +#ifdef OPTIMIZE_FFT_STACK + Word16 tmp, tmp_re, fact; +#else Word16 tmp, fact; +#endif Word16 Q_edct; Word16 Len2, i2; const Word16 *px, *pt; Word16 *py; +#ifndef HARMONIZE_DCT (void) element_mode; /*COMPLETE: some eDCT sub function are missing */ +#endif IF( EQ_16( length, L_FRAME32k ) ) { @@ -447,28 +567,48 @@ void edct_16fx( { i2 = shl( i, 1 ); - L_tmp = L_mult( x[i2], edct_table[i] ); /*Q(Qx+16) */ - Lacc = L_mac_sat( L_tmp, *px, *pt ); /*Q(Qx+16) */ + L_tmp = L_mult( x[i2], edct_table[i] ); /*Q(Qx+16) */ + Lacc = L_mac_sat( L_tmp, *px, *pt ); /*Q(Qx+16) */ +#ifdef OPTIMIZE_FFT_STACK + re[i] = round_fx_sat( L_shl_sat( Lacc, Q_edct ) ); /* Q(Qx+Q_edct) */ +#else re2[i] = round_fx_sat( L_shl_sat( Lacc, Q_edct ) ); /* Q(Qx+Q_edct) */ +#endif move16(); - L_tmp = L_mult( *px, edct_table[i] ); /*Q(Qx+16) */ - Lacc = L_msu_sat( L_tmp, x[i2], *pt ); /*Q(Qx+16) */ + L_tmp = L_mult( *px, edct_table[i] ); /*Q(Qx+16) */ + Lacc = L_msu_sat( L_tmp, x[i2], *pt ); /*Q(Qx+16) */ +#ifdef OPTIMIZE_FFT_STACK + im[i] = round_fx_sat( L_shl_sat( Lacc, Q_edct ) ); /* Q(Qx+Q_edct) */ +#else im2[i] = round_fx_sat( L_shl_sat( Lacc, Q_edct ) ); /* Q(Qx+Q_edct) */ +#endif move16(); px -= 2; pt--; } IF( EQ_16( length, L_FRAME32k ) ) { +#ifdef OPTIMIZE_FFT_STACK + DoRTFT320_16fx( re, im ); +#else DoRTFT320_16fx( re2, im2 ); +#endif } ELSE IF( EQ_16( length, L_FRAME ) ) { +#ifdef OPTIMIZE_FFT_STACK + DoRTFT128_16fx( re, im ); +#else DoRTFT128_16fx( re2, im2 ); +#endif } ELSE IF( EQ_16( length, L_FRAME16k ) ) { +#ifdef OPTIMIZE_FFT_STACK + DoRTFT160_16fx( re, im ); +#else DoRTFT160_16fx( re2, im2 ); +#endif } ELSE { @@ -478,6 +618,18 @@ void edct_16fx( fact = round_fx( L_shl( L_tmp, 2 ) ); /*Q15 */ FOR( i = 0; i < shr( length, 1 ); i++ ) { +#ifdef OPTIMIZE_FFT_STACK + tmp = mult_r( im[i], fact ); /*Q(Qx+Q_edct) */ + tmp_re = sub_sat( re[i], tmp ); /*Q(Qx+Q_edct) */ + move16(); + + tmp = mult_r( re[i], fact ); /*Q(Qx+Q_edct) */ + im[i] = add_sat( im[i], tmp ); /*Q(Qx+Q_edct) */ + move16(); + + re[i] = tmp_re; + move16(); +#else tmp = mult_r( im2[i], fact ); /*Q(Qx+Q_edct) */ re[i] = sub_sat( re2[i], tmp ); /*Q(Qx+Q_edct) */ move16(); @@ -485,6 +637,7 @@ void edct_16fx( tmp = mult_r( re2[i], fact ); /*Q(Qx+Q_edct) */ im[i] = add_sat( im2[i], tmp ); /*Q(Qx+Q_edct) */ move16(); +#endif } /* Post-rotate and obtain the output data */ @@ -537,7 +690,11 @@ void iedct_short_fx( seg_len_div4 = shr( segment_length, 2 ); /*Q0*/ seg_len_3mul_div4 = add( seg_len_div2, seg_len_div4 ); +#ifdef HARMONIZE_DCT + edct_fx( in, alias, seg_len_div2, Q, EVS_MONO ); +#else edct_fx( in, alias, seg_len_div2, Q ); +#endif FOR( i = 0; i < seg_len_div2; i++ ) { IF( alias[i] != 0 ) @@ -590,6 +747,7 @@ void edxt_fx( move16(); cosPtr = NULL; sinPtr = NULL; + IF( EQ_16( length, 512 ) ) { cosPtr = cos_scale_tbl_512; /*Q15*/ @@ -641,10 +799,14 @@ void edxt_fx( } ELSE IF( EQ_16( length, 40 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else cosPtr = cos_scale_tbl_640; /*Q15*/ sinPtr = sin_scale_tbl_640; /*Q15*/ n = 16; move16(); +#endif } ELSE IF( EQ_16( length, 960 ) ) { @@ -669,38 +831,58 @@ void edxt_fx( } ELSE IF( EQ_16( length, 120 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else cosPtr = cos_scale_tbl_960; /*Q15*/ sinPtr = sin_scale_tbl_960; /*Q15*/ n = 8; move16(); +#endif } ELSE IF( EQ_16( length, 1200 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else cosPtr = cos_scale_tbl_1200; /*Q15*/ sinPtr = sin_scale_tbl_1200; /*Q15*/ n = 1; move16(); +#endif } ELSE IF( EQ_16( length, 800 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else cosPtr = cos_scale_tbl_800; /*Q15*/ sinPtr = sin_scale_tbl_800; /*Q15*/ n = 1; move16(); +#endif } ELSE IF( EQ_16( length, 400 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else cosPtr = cos_scale_tbl_800; /*Q15*/ sinPtr = sin_scale_tbl_800; /*Q15*/ n = 2; move16(); +#endif } ELSE IF( EQ_16( length, 200 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else cosPtr = cos_scale_tbl_800; /*Q15*/ sinPtr = sin_scale_tbl_800; /*Q15*/ n = 4; move16(); +#endif } test(); @@ -708,16 +890,26 @@ void edxt_fx( { const Word16 Nm1 = sub( length, 1 ); const Word16 xSign = sub( imult1616( 2, shr( kernelType, 1 ) ), 1 ); /*Q0*/ +#ifdef OPTIMIZE_FFT_STACK + cmplx spec[L_FRAME_MAX]; +#else Word32 re[L_FRAME_PLUS]; Word32 im[L_FRAME_PLUS]; +#endif IF( !synthesis ) { FOR( k = ( Nm1 / 2 ); k >= 0; k-- ) /* pre-modulation of audio input */ { +#ifdef OPTIMIZE_FFT_STACK + spec[k].re = x[2 * k]; /*Qx*/ + spec[( Nm1 - k )].re = Mpy_32_16_1( x[( ( k * 2 ) + 1 )], shl_sat( xSign, 15 ) ); /*Qx*/ + spec[k].im = spec[( Nm1 - k )].im = 0; +#else re[k] = x[2 * k]; /*Qx*/ re[( Nm1 - k )] = Mpy_32_16_1( x[( ( k * 2 ) + 1 )], shl_sat( xSign, 15 ) ); /*Qx*/ im[k] = im[( Nm1 - k )] = 0; +#endif move32(); move32(); move32(); @@ -726,26 +918,46 @@ void edxt_fx( IF( EQ_16( length, 512 ) ) { /* Scaling down re and im buffers to avoid overflow in DoRTFTn_fx if the minimum headroom is less than 4 bits */ +#ifdef OPTIMIZE_FFT_STACK + hdrm = L_norm_arr_cmplx( spec, 512 ); +#else hdrm = s_min( L_norm_arr( re, 512 ), L_norm_arr( im, 512 ) ); +#endif IF( LT_16( hdrm, 4 ) ) { tmp = sub( hdrm, 4 ); +#ifdef OPTIMIZE_FFT_STACK + scale_sig32_cmplx( spec, 512, tmp ); +#else scale_sig32( re, 512, tmp ); scale_sig32( im, 512, tmp ); +#endif } +#ifdef OPTIMIZE_FFT_STACK + DoRTFTn_fx( NULL, NULL, spec, 512 ); +#else DoRTFTn_fx( re, im, 512 ); +#endif IF( LT_16( hdrm, 4 ) ) { tmp = negate( tmp ); +#ifdef OPTIMIZE_FFT_STACK + scale_sig32_cmplx( spec, 512, tmp ); +#else scale_sig32( re, 512, tmp ); scale_sig32( im, 512, tmp ); +#endif } } ELSE /* fft() doesn't support 512 */ { +#ifdef OPTIMIZE_FFT_STACK + fft_fx( spec, length ); +#else fft_fx( re, im, length, 1 ); +#endif } IF( shr( kernelType, 1 ) ) @@ -757,12 +969,21 @@ void edxt_fx( const Word16 wRe = cosPtr[( k * n )]; /*Q15*/ const Word16 wIm = sinPtr[( k * n )]; /*Q15*/ +#ifdef OPTIMIZE_FFT_STACK + y[k] /*pt 1*/ = L_add( Mpy_32_16_1( spec[k].re, wRe ), Mpy_32_16_1( spec[k].im, wIm ) ); /*Qx*/ + y[( length - k )] = L_sub( Mpy_32_16_1( spec[k].re, wIm ), Mpy_32_16_1( spec[k].im, wRe ) ); /*Qx*/ +#else y[k] /*pt 1*/ = L_add( Mpy_32_16_1( re[k], wRe ), Mpy_32_16_1( im[k], wIm ) ); /*Qx*/ y[( length - k )] = L_sub( Mpy_32_16_1( re[k], wIm ), Mpy_32_16_1( im[k], wRe ) ); /*Qx*/ +#endif move32(); move32(); } +#ifdef OPTIMIZE_FFT_STACK + y[( length / 2 )] = Mpy_32_16_1( spec[( length / 2 )].re, INV_SQRT_2_Q15 ); /*Qx*/ +#else y[( length / 2 )] = Mpy_32_16_1( re[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ +#endif move32(); } ELSE /* forw. DST-II */ @@ -774,16 +995,29 @@ void edxt_fx( const Word16 wRe = cosPtr[( k * n )]; /*Q15*/ const Word16 wIm = sinPtr[( k * n )]; /*Q15*/ +#ifdef OPTIMIZE_FFT_STACK + y[( Nm1 - k )] = L_add( Mpy_32_16_1( spec[k].re, wRe ), Mpy_32_16_1( spec[k].im, wIm ) ); /*Qx*/ + y[k - 1] = L_sub( Mpy_32_16_1( spec[k].re, wIm ), Mpy_32_16_1( spec[k].im, wRe ) ); /*Qx*/ +#else y[( Nm1 - k )] = L_add( Mpy_32_16_1( re[k], wRe ), Mpy_32_16_1( im[k], wIm ) ); /*Qx*/ y[k - 1] = L_sub( Mpy_32_16_1( re[k], wIm ), Mpy_32_16_1( im[k], wRe ) ); /*Qx*/ +#endif move32(); move32(); } +#ifdef OPTIMIZE_FFT_STACK + y[( Nm1 / 2 )] = Mpy_32_16_1( spec[( length / 2 )].re, INV_SQRT_2_Q15 ); /*Qx*/ +#else y[( Nm1 / 2 )] = Mpy_32_16_1( re[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ +#endif move32(); } +#ifdef OPTIMIZE_FFT_STACK + y[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )] = L_shr( spec[0].re, 1 ); /*Qx*/ +#else y[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )] = L_shr( re[0], 1 ); /*Qx*/ +#endif move32(); } ELSE /* inverse II = III */ @@ -797,12 +1031,21 @@ void edxt_fx( const Word16 wRe = shr( cosPtr[imult1616( k, n )], 1 ); const Word16 wIm = shr( sinPtr[imult1616( k, n )], 1 ); +#ifdef OPTIMIZE_FFT_STACK + spec[k].re = L_add( Mpy_32_16_1( x[k], wRe ), Mpy_32_16_1( x[( length - k )], wIm ) ); /*Qx*/ + spec[k].im = L_sub( Mpy_32_16_1( x[( length - k )], wRe ), Mpy_32_16_1( x[k], wIm ) ); /*Qx*/ +#else re[k] = L_add( Mpy_32_16_1( x[k], wRe ), Mpy_32_16_1( x[( length - k )], wIm ) ); /*Qx*/ im[k] = L_sub( Mpy_32_16_1( x[( length - k )], wRe ), Mpy_32_16_1( x[k], wIm ) ); /*Qx*/ +#endif move32(); move32(); } +#ifdef OPTIMIZE_FFT_STACK + spec[( length / 2 )].re = Mpy_32_16_1( x[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ +#else re[( length / 2 )] = Mpy_32_16_1( x[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ +#endif move32(); } ELSE /* DST type III */ @@ -814,23 +1057,42 @@ void edxt_fx( const Word16 wRe = shr( cosPtr[( k * n )], 1 ); /*Q15*/ const Word16 wIm = shr( sinPtr[( k * n )], 1 ); /*Q15*/ +#ifdef OPTIMIZE_FFT_STACK + spec[k].re = L_add( Mpy_32_16_1( x[( Nm1 - k )], wRe ), Mpy_32_16_1( x[( k - 1 )], wIm ) ); /*Qx*/ + spec[k].im = L_sub( Mpy_32_16_1( x[( k - 1 )], wRe ), Mpy_32_16_1( x[( Nm1 - k )], wIm ) ); /*Qx*/ +#else re[k] = L_add( Mpy_32_16_1( x[( Nm1 - k )], wRe ), Mpy_32_16_1( x[( k - 1 )], wIm ) ); /*Qx*/ im[k] = L_sub( Mpy_32_16_1( x[( k - 1 )], wRe ), Mpy_32_16_1( x[( Nm1 - k )], wIm ) ); /*Qx*/ +#endif move32(); move32(); } +#ifdef OPTIMIZE_FFT_STACK + spec[( length / 2 )].re = Mpy_32_16_1( x[( Nm1 / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ +#else re[( length / 2 )] = Mpy_32_16_1( x[( Nm1 / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ +#endif move32(); } +#ifdef OPTIMIZE_FFT_STACK + spec[0].re = x[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )]; /*Qx*/ + spec[0].im = spec[( length / 2 )].im = 0; +#else re[0] = x[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )]; /*Qx*/ im[0] = im[( length / 2 )] = 0; +#endif move32(); move32(); FOR( k = ( Nm1 / 2 ); k > 0; k-- ) { +#ifdef OPTIMIZE_FFT_STACK + spec[( length - k )].re = spec[k].re; /*Qx*/ + spec[( length - k )].im = L_negate( spec[k].im ); /*Qx*/ +#else re[( length - k )] = re[k]; /*Qx*/ im[( length - k )] = L_negate( im[k] ); /*Qx*/ +#endif move32(); move32(); } @@ -838,35 +1100,63 @@ void edxt_fx( IF( EQ_16( length, 512 ) ) { /* Scaling down re and im buffers to avoid overflow in DoRTFTn_fx if the minimum headroom is less than 4 bits */ +#ifdef OPTIMIZE_FFT_STACK + hdrm = L_norm_arr_cmplx( spec, 512 ); +#else hdrm = s_min( L_norm_arr( re, 512 ), L_norm_arr( im, 512 ) ); +#endif IF( LT_16( hdrm, 4 ) ) { tmp = sub( hdrm, 4 ); +#ifdef OPTIMIZE_FFT_STACK + scale_sig32_cmplx( spec, 512, tmp ); +#else scale_sig32( re, 512, tmp ); scale_sig32( im, 512, tmp ); +#endif } +#ifdef OPTIMIZE_FFT_STACK + DoRTFTn_fx( NULL, NULL, spec, 512 ); +#else DoRTFTn_fx( re, im, 512 ); +#endif IF( LT_16( hdrm, 4 ) ) { tmp = negate( tmp ); +#ifdef OPTIMIZE_FFT_STACK + scale_sig32_cmplx( spec, 512, tmp ); +#else scale_sig32( re, 512, tmp ); scale_sig32( im, 512, tmp ); +#endif } } ELSE /* fft() doesn't support 512 */ { +#ifdef OPTIMIZE_FFT_STACK + fft_fx( spec, length ); +#else fft_fx( re, im, length, 1 ); +#endif } FOR( k = ( Nm1 / 2 ); k >= 0; k-- ) /* post-modulation of FFT output */ { +#ifdef OPTIMIZE_FFT_STACK + y[2 * k] = spec[k].re; /*Qx*/ +#else y[2 * k] = re[k]; /*Qx*/ +#endif move32(); IF( xSign != 0 ) { +#ifdef OPTIMIZE_FFT_STACK + y[( ( k * 2 ) + 1 )] = Mpy_32_16_1( spec[( Nm1 - k )].re, shl_sat( xSign, 15 ) ); /*Qx*/ +#else y[( ( k * 2 ) + 1 )] = Mpy_32_16_1( re[( Nm1 - k )], shl_sat( xSign, 15 ) ); /*Qx*/ +#endif } ELSE { diff --git a/lib_com/fft_fx.c b/lib_com/fft_fx.c index 4fbb39a5e37a4aae074bc28eec326d1bdfcf7f34..299b45cc2b3d23660978d66fe402df8613fcefbe 100644 --- a/lib_com/fft_fx.c +++ b/lib_com/fft_fx.c @@ -109,11 +109,13 @@ static void fft5_8( Word16 n1, Word32 *zRe, Word32 *zIm, const Word16 *Idx ); static void fft4_5( Word32 *x, Word32 *y, const Word16 *Idx ); static void fft5_4( Word16 n1, Word32 *zRe, Word32 *zIm, const Word16 *Idx ); +#ifndef HARMONIZE_DCT void DoRTFTn_fx_ivas( Word32 *x, /* i/o: real part of input and output data */ Word32 *y, /* i/o: imaginary part of input and output data */ const Word16 n /* i : size of the FFT n=(2^k) up to 1024 */ ); +#endif /*-----------------------------------------------------------------* * fft15_shift2() * 15-point FFT with 2-point circular shift @@ -2438,6 +2440,7 @@ static void cftmdl( return; } +#ifndef HARMONIZE_DCT static void cftbsub( Word16 n, // Q0 Word32 *a, // Qx @@ -2733,6 +2736,7 @@ void edct2_fx_ivas( } } } +#endif void DoRTFTn_fx_ivas( Word32 *x, /* i/o: real part of input and output data Qx */ @@ -2740,7 +2744,6 @@ void DoRTFTn_fx_ivas( const Word16 n /* i : size of the FFT up to 1024 Q0*/ ) { - Word16 i; Word32 z[2048]; @@ -6427,6 +6430,7 @@ static void fft_lenN( * Complex-value FFT *-----------------------------------------------------------------*/ +#ifndef HARMONIZE_DCT void fft_fx( Word32 *re, /* i/o: real part Qx */ Word32 *im, /* i/o: imag part Qx */ @@ -6434,6 +6438,7 @@ void fft_fx( const Word16 s /* i : sign */ ) { + cmplx x[960]; FOR( Word16 j = 0; j < length; j++ ) @@ -6511,6 +6516,73 @@ void fft_fx( return; } +#else +void fft_fx( + cmplx *x, /* i/o: complex data */ + const Word16 length /* i : length of fft */ +) +{ + SWITCH( length ) + { + case 20: + fft_len20_fx( x ); + BREAK; + case 40: + fft_lenN( x, FFT_RotVector_640_fx, 640, 5, 8, 8, 40 ); + BREAK; + case 64: + fft_lenN( x, FFT_RotVector_256_fx, 256, 8, 8, 8, 64 ); + BREAK; + case 80: + fft_lenN( x, FFT_RotVector_640_fx, 640, 10, 8, 4, 40 ); + BREAK; + case 100: + fft_lenN( x, FFT_RotVector_400_fx, 400, 10, 10, 4, 40 ); + BREAK; + case 120: + fft_lenN( x, FFT_RotVector_960_fx, 960, 15, 8, 4, 60 ); + BREAK; + case 128: + fft_lenN( x, FFT_RotVector_256_fx, 256, 16, 8, 4, 64 ); + BREAK; + case 160: + fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 8, 2, 40 ); + BREAK; + case 200: + fft_lenN( x, FFT_RotVector_400_fx, 400, 20, 10, 2, 40 ); + BREAK; + case 240: + fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 8, 2, 60 ); + BREAK; + case 256: + fft_lenN( x, FFT_RotVector_256_fx, 256, 32, 8, 2, 64 ); + BREAK; + case 320: + fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 16, 2, 40 ); + BREAK; + case 400: + fft_lenN( x, FFT_RotVector_400_fx, 400, 20, 20, 2, 40 ); + BREAK; + case 480: + fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 16, 2, 60 ); + BREAK; + case 600: + fft_lenN( x, FFT_RotVector_600_fx, 600, 30, 20, 2, 60 ); + BREAK; + case 640: + fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 32, 2, 40 ); + BREAK; + case 960: + fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 32, 2, 60 ); + BREAK; + default: + assert( !"fft length is not supported!" ); + } + + return; +} +#endif + void rfft_fx( Word32 *x, /* i/o: values Qx */ @@ -6522,6 +6594,9 @@ void rfft_fx( Word16 i, sizeOfFft2, sizeOfFft4; Word32 tmp, t1, t2, t3, t4; Word16 s1, s2; +#ifdef HARMONIZE_DCT + cmplx spec[L_FRAME48k]; +#endif sizeOfFft2 = shr( length, 1 ); sizeOfFft4 = shr( length, 2 ); @@ -6592,10 +6667,43 @@ void rfft_fx( SWITCH( isign ) { - case -1: +#ifdef HARMONIZE_DCT + FOR( i = 0; i < sizeOfFft2; i++ ) + { + spec[i].re = x[2 * i]; + move32(); + spec[i].im = x[2 * i + 1]; + move32(); + } + + fft_fx( spec, sizeOfFft2 ); + + FOR( i = 0; i < sizeOfFft4; i++ ) + { + x[2 * i] = spec[i].re; + move32(); + x[2 * i + 1] = spec[sizeOfFft2 - i - 1].re; + move32(); + + x[2 * i] = spec[i].im; + move32(); + x[2 * i + 1] = L_negate( spec[sizeOfFft2 - i - 1].im ); + move32(); + } + + FOR( i = 0; i < sizeOfFft2; i++ ) + { + x[2 * i] = spec[i].re; + move32(); + x[2 * i + 1] = spec[i].im; + move32(); + } +#else fft_fx( x, x + 1, sizeOfFft2, 2 ); +#endif + // Qx tmp = L_add( x[0], x[1] ); x[1] = L_sub( x[0], x[1] ); // Qx @@ -6651,7 +6759,27 @@ void rfft_fx( move32(); } +#ifdef HARMONIZE_DCT + FOR( i = 0; i < sizeOfFft2; i++ ) + { + spec[i].re = x[2 * i]; + move32(); + spec[i].im = x[2 * i + 1]; + move32(); + } + + fft_fx( spec, sizeOfFft2 ); + + FOR( i = 0; i < sizeOfFft2; i++ ) + { + x[2 * i] = spec[i].re; + move32(); + x[2 * i + 1] = spec[i].im; + move32(); + } +#else fft_fx( x, x + 1, sizeOfFft2, 2 ); +#endif FOR( i = 0; i < length; i += 2 ) { diff --git a/lib_com/fft_fx_evs.c b/lib_com/fft_fx_evs.c index a7b2461cb18fe44c1b763ca4e52997a1ef2e49c3..a17ff18325ac0bcd014c72b9106a42c1d31fac2d 100644 --- a/lib_com/fft_fx_evs.c +++ b/lib_com/fft_fx_evs.c @@ -10,7 +10,7 @@ #include /*-----------------------------------------------------------------* - * Local functions + * Local constants *-----------------------------------------------------------------*/ #define FFT3_ONE_THIRD 21845 /* 1/3 in Q16 */ @@ -19,6 +19,10 @@ #define KP951056516_16FX 2042378325 /* EDCT & EMDCT constants Q31*/ #define KP587785252_16FX 1262259213 /* EDCT & EMDCT constants Q31*/ +/*-----------------------------------------------------------------* + * Local function prototypes + *-----------------------------------------------------------------*/ + static void fft5_shift4_16fx( Word16 n1, Word16 *zRe, Word16 *zIm, const Word16 *Idx ); static void fft64_16fx( Word16 *x, Word16 *y, const Word16 *Idx ); static void fft32_5_16fx( Word16 *x, Word16 *y, const Word16 *Idx ); @@ -32,11 +36,6 @@ static void fft64_16fx( Word16 *x, Word16 *y, const Word16 *Idx ); static void fft5_32_16fx( Word16 *zRe, Word16 *zIm, const Word16 *Idx ); static void cdftForw_16fx( Word16 n, Word16 *a, const Word16 *ip, const Word32 *w ); -#include "math_32.h" - -/*-----------------------------------------------------------------* - * Local functions - *-----------------------------------------------------------------*/ static void cdftForw_fx( Word16 n, Word32 *a, const Word16 *ip, const Word16 *w ); static void bitrv2_SR_fx( Word16 n, const Word16 *ip, Word32 *a ); static void cftfsub_fx( Word16 n, Word32 *a, const Word16 *w ); @@ -44,24 +43,50 @@ static void cft1st_fx( Word16 n, Word32 *a, const Word16 *w ); static void cftmdl_fx( Word16 n, Word16 l, Word32 *a, const Word16 *w ); +/*-----------------------------------------------------------------* + * DoRTFTn_fx() + * + * + *-----------------------------------------------------------------*/ + void DoRTFTn_fx( - Word32 *x, /* i/o : real part of input and output data Q(x) */ - Word32 *y, /* i/o : imaginary part of input and output data Q(x) */ - const Word16 n /* i : size of the FFT up to 1024 */ + Word32 *x, /* i/o : real part of input and output data Q(x) */ + Word32 *y, /* i/o : imaginary part of input and output data Q(x) */ +#ifdef OPTIMIZE_FFT_STACK + cmplx *spec, /* i/o : complex input and output data */ +#endif + const Word16 n /* i : size of the FFT up to 1024 */ ) { - Word16 i; Word32 z[2048], *pt; - pt = z; - FOR( i = 0; i < n; i++ ) +#ifdef OPTIMIZE_FFT_STACK + IF( spec != NULL ) { - *pt++ = x[i]; - move16(); - *pt++ = y[i]; - move16(); + pt = z; + FOR( i = 0; i < n; i++ ) + { + *pt++ = spec[i].re; + move16(); + *pt++ = spec[i].im; + move16(); + } + } + ELSE + { +#endif + pt = z; + FOR( i = 0; i < n; i++ ) + { + *pt++ = x[i]; + move16(); + *pt++ = y[i]; + move16(); + } +#ifdef OPTIMIZE_FFT_STACK } +#endif IF( EQ_16( n, 16 ) ) { @@ -92,18 +117,40 @@ void DoRTFTn_fx( assert( 0 ); } - x[0] = z[0]; - move16(); - y[0] = z[1]; - move16(); - pt = &z[2]; - FOR( i = n - 1; i >= 1; i-- ) +#ifdef OPTIMIZE_FFT_STACK + IF( spec != NULL ) + { + spec[0].re = z[0]; + move16(); + spec[0].im = z[1]; + move16(); + pt = &z[2]; + FOR( i = n - 1; i >= 1; i-- ) + { + spec[i].re = *pt++; + move16(); + spec[i].im = *pt++; + move16(); + } + } + ELSE { - x[i] = *pt++; +#endif + x[0] = z[0]; move16(); - y[i] = *pt++; + y[0] = z[1]; move16(); + pt = &z[2]; + FOR( i = n - 1; i >= 1; i-- ) + { + x[i] = *pt++; + move16(); + y[i] = *pt++; + move16(); + } +#ifdef OPTIMIZE_FFT_STACK } +#endif return; } @@ -124,6 +171,8 @@ static void cdftForw_fx( /* Do FFT */ cftfsub_fx( n, a, w ); + + return; } /*-----------------------------------------------------------------* diff --git a/lib_com/gs_inact_switching_fx.c b/lib_com/gs_inact_switching_fx.c index d00b8e3c0346b63084b679e242a0d2ffec76062e..16e72cd585feedd7c5c46ca8bae3a4da28d23ec7 100644 --- a/lib_com/gs_inact_switching_fx.c +++ b/lib_com/gs_inact_switching_fx.c @@ -89,7 +89,11 @@ void Inac_switch_ematch_fx( ELSE IF( EQ_16( coder_type, VOICED ) || EQ_16( coder_type, GENERIC ) || EQ_16( coder_type, TRANSITION ) || ( last_core != ACELP_CORE ) || NE_16( last_codec_mode, MODE1 ) || ( ( element_mode > EVS_MONO ) && EQ_16( coder_type, UNVOICED ) ) ) { /* Find spectrum and energy per band for GC and VC frames */ +#ifdef HARMONIZE_DCT + edct_16fx( exc2, dct_exc_tmp, L_frame, 5 ); +#else edct_16fx( exc2, dct_exc_tmp, L_frame, 5, element_mode ); +#endif Ener_per_band_comp_fx( dct_exc_tmp, Ener_per_bd, Q_exc, MBANDS_GN, 1, L_frame ); @@ -103,7 +107,11 @@ void Inac_switch_ematch_fx( ELSE IF( ( coder_type == INACTIVE ) && inactive_coder_type_flag ) { /* Find spectrum and energy per band for inactive frames */ +#ifdef HARMONIZE_DCT + edct_16fx( exc2, dct_exc_tmp, L_frame, 5 ); +#else edct_16fx( exc2, dct_exc_tmp, L_frame, 5, element_mode ); +#endif Ener_per_band_comp_fx( dct_exc_tmp, Ener_per_bd, Q_exc, MBANDS_GN, 1, L_frame ); @@ -188,7 +196,11 @@ void Inac_switch_ematch_fx( Scale_sig( dct_exc_tmp, 240, 1 ); // Q_exc Scale_sig( exc2, 240, 1 ); // Q_exc } +#ifdef HARMONIZE_DCT + edct_16fx( dct_exc_tmp, exc2, L_frame, 5 ); +#else edct_16fx( dct_exc_tmp, exc2, L_frame, 5, element_mode ); +#endif } return; diff --git a/lib_com/ivas_mdft_imdft_fx.c b/lib_com/ivas_mdft_imdft_fx.c index 49d1cbbf11c01d8bd02dd538abad93162ee0fab9..19774e7126f009bc4ee5d16d273300632c86cc9c 100644 --- a/lib_com/ivas_mdft_imdft_fx.c +++ b/lib_com/ivas_mdft_imdft_fx.c @@ -214,7 +214,29 @@ static void ivas_ifft_cplx1_fx( move32(); } +#ifdef HARMONIZE_DCT + cmplx x[L_FRAME48k]; + + FOR( i = 0; i < length; i++ ) + { + x[i].re = re[i]; + move32(); + x[i].im = im[i]; + move32(); + } + + fft_fx( x, length ); + + FOR( i = 0; i < length; i++ ) + { + re[i] = x[i].re; + move32(); + im[i] = x[i].im; + move32(); + } +#else fft_fx( re, im, length, 1 ); +#endif return; } @@ -233,8 +255,12 @@ void ivas_mdft_fx( const Word16 mdft_length /* i : MDFT length */ ) { +#ifdef HARMONIZE_DCT + cmplx spec[L_FRAME48k]; +#else Word32 re[L_FRAME48k]; Word32 im[L_FRAME48k]; +#endif Word16 j, len_by_2; const Word32 *pTwid; // Q31 len_by_2 = shr( mdft_length, 1 ); @@ -244,23 +270,53 @@ void ivas_mdft_fx( { FOR( j = 0; j < mdft_length; j++ ) { +#ifdef HARMONIZE_DCT + spec[j].re = Mpy_32_32( pIn[j], pTwid[j] ); // ( Qin + Q31 ) - Q31 -> Qin + move32(); + spec[j].im = Mpy_32_32( L_negate( pIn[j] ), pTwid[mdft_length - j] ); // ( Qin + Q31 ) - Q31 -> Qin + move32(); +#else re[j] = Mpy_32_32( pIn[j], pTwid[j] ); // ( Qin + Q31 ) - Q31 -> Qin move32(); im[j] = Mpy_32_32( L_negate( pIn[j] ), pTwid[mdft_length - j] ); // ( Qin + Q31 ) - Q31 -> Qin move32(); +#endif } } ELSE { FOR( j = 0; j < mdft_length; j++ ) { +#ifdef HARMONIZE_DCT + spec[j].re = Msub_32_32( Mpy_32_32( pIn[j], pTwid[j] ), pIn[add( mdft_length, j )], pTwid[mdft_length - j] ); // ( Qin + Q31 ) - Q31 -> Qin + move32(); + spec[j].im = Msub_32_32( Mpy_32_32( L_negate( pIn[j] ), pTwid[mdft_length - j] ), pIn[mdft_length + j], pTwid[j] ); // ( Qin + Q31 ) - Q31 -> Qin + move32(); +#else re[j] = Msub_32_32( Mpy_32_32( pIn[j], pTwid[j] ), pIn[add( mdft_length, j )], pTwid[mdft_length - j] ); // ( Qin + Q31 ) - Q31 -> Qin move32(); im[j] = Msub_32_32( Mpy_32_32( L_negate( pIn[j] ), pTwid[mdft_length - j] ), pIn[mdft_length + j], pTwid[j] ); // ( Qin + Q31 ) - Q31 -> Qin move32(); +#endif } } +#ifdef HARMONIZE_DCT + fft_fx( spec, mdft_length ); + + FOR( j = 0; j < len_by_2; j++ ) + { + pOut_re[2 * j] = spec[j].re; + move32(); + pOut_re[2 * j + 1] = spec[mdft_length - j - 1].re; + move32(); + + pOut_im[2 * j] = spec[j].im; + move32(); + pOut_im[2 * j + 1] = L_negate( spec[mdft_length - j - 1].im ); + move32(); + } +#else fft_fx( re, im, mdft_length, 1 ); FOR( j = 0; j < len_by_2; j++ ) { @@ -274,6 +330,8 @@ void ivas_mdft_fx( pOut_im[2 * j + 1] = L_negate( im[mdft_length - j - 1] ); // Qin move32(); } +#endif + return; } diff --git a/lib_com/options.h b/lib_com/options.h index daede4c0d2584ef73b77d4b5f9700e8bceb2fe1c..408d65eec4c547bff735b705fe1317618d7cb0cf 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -86,10 +86,12 @@ #define HARM_HQ_CORE_KEEP_BE /* hack to keep all BE after HQ core functions harmonization; pending resolving issues #2450, #2451, #2452 */ #define FIX_BASOP_2457_HARM_GEN /* FhG: harmonization of function generate_comfort_noise_dec_hf_ivas_fx()*/ #define FIX_BASOP_2478_HARM_ENC_PRM_HM /* FhG: basop issue 2478: harmonize enc_prm_hm() and enc_prm_hm_ivas_fx() */ -#define FIX_2455_HARMONIZE_generate_comfort_noise_enc /* FhG: harmonize generate_comfort_noise_enc and generate_comfort_noise_enc_ivas */ -#define FIX_2455_HARMONIZE_configureFdCngEnc /* FhG: harmonize generate_comfort_noise_enc and generate_comfort_noise_enc_ivas */ +#define FIX_2455_HARMONIZE_generate_comfort_noise_enc /* FhG: harmonize generate_comfort_noise_enc and generate_comfort_noise_enc_ivas */ +#define FIX_2455_HARMONIZE_configureFdCngEnc /* FhG: harmonize generate_comfort_noise_enc and generate_comfort_noise_enc_ivas */ #define FIX_2463_EVS_BWE_LSF /* VA: basop issue 2463: harmonize calling of Quant_BWE_LSF_fx() */ #define FIX_2452_HQ_CORE_PEAQ_AVR_RATIO_HARM /* Eri: Basop issue 2453: Fix alignment of peak_avrg_ratio_fx */ +#define OPTIMIZE_FFT_STACK /* VA: removal of intermediate FFT buffers */ +#define HARMONIZE_DCT /* VA: removal of duplicated DCT functions */ /* #################### End BE switches ################################## */ diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index 02cc7f06d9e2f601e777acb42cb5c3bdd8a63e79..b0dbc498e3275d7f0a4575c57fc295c52a5d1920 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -1292,6 +1292,14 @@ void scale_sig32( const Word16 exp0 /* i : exponent: x = round(x << exp) Qx xx exp */ ); +#ifdef OPTIMIZE_FFT_STACK +void scale_sig32_cmplx( + cmplx x[], /* i/o: signal to scale Qx */ + const Word16 lg, /* i : size of x[] Q0 */ + const Word16 exp0 /* i : exponent: x = round(x << exp) Qx exp */ +); + +#endif void Scale_sig64( Word64 x[], /* i/o: signal to scale Qx */ Word16 len, /* i : size of x[] Q0 */ @@ -3982,16 +3990,22 @@ void edct_fx( const Word32 *x, /* i : i signal Qq */ Word32 *y, /* o : output transform Qq */ Word16 length, /* i : length */ - Word16 *q /* i : Q value of i signal */ +#ifdef HARMONIZE_DCT + Word16 *q, /* i : Q value of input signal */ + const Word16 element_mode /* i : element mode */ +#else + Word16 *q /* i : Q value of i signal */ +#endif ); +#ifndef HARMONIZE_DCT void edct_ivas_fx( const Word32 *x, /* i : input signal Qq */ Word32 *y, /* o : output transform Qq */ Word16 length, /* i : length Q0*/ Word16 *q /* i : Q value of input signal */ ); - +#endif void edst_fx( const Word32 *x, /* i : i signal Qq */ Word32 *y, /* o : output transform Qq */ @@ -4003,8 +4017,12 @@ void edct_16fx( const Word16 *x, /* i : i signal Qx */ Word16 *y, /* o : output transform Qx */ Word16 length, /* i : length */ - Word16 bh, /* bit-headroom */ - const Word16 element_mode ); + Word16 bh /* bit-headroom */ +#ifndef HARMONIZE_DCT + , + const Word16 element_mode +#endif +); void iedct_short_fx( const Word32 *in, /* i : i vector */ @@ -4026,9 +4044,12 @@ void BASOP_cfft( cmplx *pComplexBuf, Word16 sizeOfFft, Word16 *scale, Word32 wor void BASOP_rfft( Word32 *x, Word16 sizeOfFft, Word16 *scale, Word16 isign ); void DoRTFTn_fx( - Word32 *x, /* i/o : real part of i and output data */ - Word32 *y, /* i/o : imaginary part of i and output data */ - const Word16 n /* i : size of the FFT up to 1024 */ + Word32 *x, /* i/o : real part of i and output data */ + Word32 *y, /* i/o : imaginary part of i and output data */ +#ifdef OPTIMIZE_FFT_STACK + cmplx *spec, /* i/o : complex input and output data */ +#endif + const Word16 n /* i : size of the FFT up to 1024 */ ); void DoRTFT480_fx( @@ -4083,10 +4104,15 @@ void DoFFT_fx( const Word16 length ); void fft_fx( +#ifndef HARMONIZE_DCT Word32 *re, /* i/o: real part */ Word32 *im, /* i/o: imag part */ const Word16 length, /* i : length of fft */ const Word16 s /* i : sign */ +#else + cmplx *spec, /* i/o: complex data */ + const Word16 length /* i : length of fft */ +#endif ); void rfft_fx( @@ -4102,10 +4128,12 @@ void DoRTFTn_fx_ivas( const Word16 n /* i : size of the FFT up to 1024 */ ); - Word16 find_guarded_bits_fx( const Word32 n ); Word16 L_norm_arr( const Word32 *arr, Word16 size ); +#ifdef OPTIMIZE_FFT_STACK +Word16 L_norm_arr_cmplx( const cmplx *arr, Word16 size ); +#endif Word16 norm_arr( Word16 *arr, Word16 size ); Word16 W_norm_arr( Word64 *arr, Word16 size ); @@ -4115,6 +4143,7 @@ Flag is_zero_arr( Word32 *arr, Word16 size ); Flag is_zero_arr16( Word16 *arr, Word16 size ); Flag is_zero_arr64( Word64 *arr, Word16 size ); +#ifndef HARMONIZE_DCT void edct2_fx_ivas( const Word16 n, const Word16 isgn, @@ -4122,7 +4151,7 @@ void edct2_fx_ivas( Word32 *a, const Word16 *ip, const Word16 *w ); - +#endif void edct2_fx( Word16 n, Word16 isgn, diff --git a/lib_com/rom_com.h b/lib_com/rom_com.h index 5d5feebcbeeb8424eda7b769749f826c3e05d86a..dd73f7c50a3e5d8d6341d81e1c5cc9bf1a49cc5c 100644 --- a/lib_com/rom_com.h +++ b/lib_com/rom_com.h @@ -1550,16 +1550,18 @@ extern const Word16 ivas_sine_panning_tbl_fx[601]; // Q15 extern const Word16 ivas_sin_az_fx[361]; // Q15 // edct_fx.c -extern const Word16 sin_scale_tbl_960[960]; // Q15 -extern const Word16 cos_scale_tbl_960[960]; // Q15 -extern const Word16 cos_scale_tbl_640[640]; // Q15 -extern const Word16 sin_scale_tbl_640[640]; // Q15 -extern const Word16 sin_scale_tbl_512[512]; // Q15 -extern const Word16 cos_scale_tbl_512[512]; // Q15 +extern const Word16 sin_scale_tbl_960[960]; // Q15 +extern const Word16 cos_scale_tbl_960[960]; // Q15 +extern const Word16 cos_scale_tbl_640[640]; // Q15 +extern const Word16 sin_scale_tbl_640[640]; // Q15 +extern const Word16 sin_scale_tbl_512[512]; // Q15 +extern const Word16 cos_scale_tbl_512[512]; // Q15 +#ifndef OPTIMIZE_FFT_STACK extern const Word16 cos_scale_tbl_1200[1200]; // Q15 extern const Word16 sin_scale_tbl_1200[1200]; // Q15 extern const Word16 cos_scale_tbl_800[800]; // Q15 extern const Word16 sin_scale_tbl_800[800]; // Q15 +#endif extern const Word16 scales_ivas_fx[][MAX_NO_SCALES * 2]; // Q11 extern const Word16 scales_p_ivas_fx[][MAX_NO_SCALES * 2]; // Q11 diff --git a/lib_com/rom_com_fx.c b/lib_com/rom_com_fx.c index f73ab13f3fe3c17ccc95e9137c253542e6928634..14c6b97b88e61ee5d03ed020b60c9adbfa6c2adb 100644 --- a/lib_com/rom_com_fx.c +++ b/lib_com/rom_com_fx.c @@ -27386,6 +27386,7 @@ const Word16 cos_scale_tbl_512[512] = /* Q15 */ 201, 100 }; +#ifndef OPTIMIZE_FFT_STACK const Word16 sin_scale_tbl_1200[1200] = { /* Q15 */ 0, 42, 85, 128, 171, 214, 257, 300, 343, 386, 428, 471, 514, 557, 600, 643, @@ -27897,7 +27898,7 @@ const Word16 cos_scale_tbl_800[800] = { /* Q15 */ 32750, 32752, 32754, 32756, 32757, 32759, 32760, 32761, 32762, 32763, 32764, 32765, 32765, 32766, 32766, 32766 }; - +#endif const Word16 scales_ivas_fx[][MAX_NO_SCALES * 2] = /* 2 subvectors Q11*/ { { diff --git a/lib_com/scale_mem_fx.c b/lib_com/scale_mem_fx.c index 01c2442a8eb73da4686608a77743e259c61a995f..4f095703af811dd6c6c8fa2719b1e5b81bd8caf9 100644 --- a/lib_com/scale_mem_fx.c +++ b/lib_com/scale_mem_fx.c @@ -337,6 +337,37 @@ void scale_sig32_r( return; } +#ifdef OPTIMIZE_FFT_STACK +void scale_sig32_cmplx( + cmplx x[], /* i/o: signal to scale Qx */ + const Word16 lg, /* i : size of x[] Q0 */ + const Word16 exp0 /* i : exponent: x = round(x << exp) Qx exp */ +) +{ + Word16 i; + + FOR( i = 0; i < lg; i++ ) + { + /* saturation can occur here */ + x[i].re = L_shl( x[i].re, exp0 ); + move32(); + if ( 0 == exp0 ) + { + BREAK; + } + x[i].im = L_shl( x[i].im, exp0 ); + move32(); + if ( 0 == exp0 ) + { + BREAK; + } + } + + return; +} +#endif + + /*-------------------------------------------------------------------* * Rescale_mem: * diff --git a/lib_com/tcx_mdct_fx.c b/lib_com/tcx_mdct_fx.c index 4fd016729f51dde64c070c963f22a3d129a4e74f..d29a4ffede893df57886efed30e7b8ad05426d86 100644 --- a/lib_com/tcx_mdct_fx.c +++ b/lib_com/tcx_mdct_fx.c @@ -138,7 +138,11 @@ void TCX_MDCT( *y_e = sub( 15, *y_e ); move16(); +#ifdef HARMONIZE_DCT + edct_fx( y, y, l / 2 + m + r / 2, y_e, EVS_MONO ); +#else edct_fx( y, y, l / 2 + m + r / 2, y_e ); +#endif *y_e = sub( 15 - 1, *y_e ); move16(); return; @@ -220,7 +224,11 @@ void TCX_MDCT_Inverse( R2 = shr( r, 1 ); x_e = sub( 15, x_e ); +#ifdef HARMONIZE_DCT + edct_fx( x, tmp_buf + L2, add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &x_e, EVS_MONO ); +#else edct_fx( x, tmp_buf + L2, add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &x_e ); +#endif x_e = sub( 15, x_e ); fac = TCX_MDCT_Inverse_GetScaleFactor( add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &fac_e ); /* exp(fac_e) */ diff --git a/lib_com/tools_fx.c b/lib_com/tools_fx.c index fd0eceedb65d5b7218bed50924e9061984884c63..cf07a4e227e779755985ab5cda61b854e7b6035f 100644 --- a/lib_com/tools_fx.c +++ b/lib_com/tools_fx.c @@ -5378,6 +5378,35 @@ Word16 L_norm_arr( return q; } +#ifdef OPTIMIZE_FFT_STACK +Word16 L_norm_arr_cmplx( + const cmplx *arr, + Word16 size ) +{ + Word16 q = 31; + move16(); + + FOR( Word16 i = 0; i < size; i++ ) + { + Word16 q_tst; + + q_tst = norm_l( arr[i].re ); + if ( arr[i].re != 0 ) + { + q = s_min( q, q_tst ); + } + + q_tst = norm_l( arr[i].im ); + if ( arr[i].im != 0 ) + { + q = s_min( q, q_tst ); + } + } + + return q; +} +#endif + Word16 norm_arr( Word16 *arr, Word16 size ) diff --git a/lib_com/trans_direct_fx.c b/lib_com/trans_direct_fx.c index c84cd9efad4c495df32efee54db2a27c02a2faa0..fa0a27a9a43bf4deac452becb6bf57849418f095 100644 --- a/lib_com/trans_direct_fx.c +++ b/lib_com/trans_direct_fx.c @@ -103,7 +103,11 @@ void direct_transform_fx( Qs[0] = *Q; move16(); +#ifdef HARMONIZE_DCT + edct_fx( dctin32_fx, oseg_fx, segment_length2, &Qs[0], EVS_MONO ); +#else edct_fx( dctin32_fx, oseg_fx, segment_length2, &Qs[0] ); +#endif Qmin = s_min( Qs[0], Qmin ); iseg_fx = &in32_r16_fx[segment_length4]; @@ -136,7 +140,11 @@ void direct_transform_fx( Qs[seg] = *Q; move16(); +#ifdef HARMONIZE_DCT + edct_fx( dctin32_fx, oseg_fx, segment_length2, &Qs[seg], EVS_MONO ); +#else edct_fx( dctin32_fx, oseg_fx, segment_length2, &Qs[seg] ); +#endif Qmin = s_min( Qs[seg], Qmin ); iseg_fx += segment_length2; @@ -164,7 +172,11 @@ void direct_transform_fx( } Qs[NUM_TIME_SWITCHING_BLOCKS - 1] = *Q; move16(); +#ifdef HARMONIZE_DCT + edct_fx( dctin32_fx, oseg_fx, segment_length2, &Qs[NUM_TIME_SWITCHING_BLOCKS - 1], EVS_MONO ); +#else edct_fx( dctin32_fx, oseg_fx, segment_length2, &Qs[NUM_TIME_SWITCHING_BLOCKS - 1] ); +#endif Qmin = s_min( Qs[NUM_TIME_SWITCHING_BLOCKS - 1], Qmin ); *Q = Qmin; @@ -183,7 +195,11 @@ void direct_transform_fx( } ELSE { +#ifdef HARMONIZE_DCT + edct_fx( in32_fx, out32_fx, L, Q, EVS_MONO ); +#else edct_fx( in32_fx, out32_fx, L, Q ); +#endif } return; diff --git a/lib_com/trans_inv_fx.c b/lib_com/trans_inv_fx.c index 34d424f26e0bcdebff96c44bcc3584ff2f2f725c..32e188f2d87b8b2d14aa1b4e38bd30d47155c0d0 100644 --- a/lib_com/trans_inv_fx.c +++ b/lib_com/trans_inv_fx.c @@ -1122,6 +1122,10 @@ void Inverse_Transform( } ELSE { +#ifdef HARMONIZE_DCT + edct_fx( in_mdct, out, L, Q, EVS_MONO ); +#else edct_fx( in_mdct, out, L, Q ); +#endif } } diff --git a/lib_dec/FEC_HQ_phase_ecu_fx.c b/lib_dec/FEC_HQ_phase_ecu_fx.c index f1c3ab88bdb16db69c9981a6b50efd0ce491dac2..4e267a0d6b61e76c8d69bc2a948cabc13c842755 100644 --- a/lib_dec/FEC_HQ_phase_ecu_fx.c +++ b/lib_dec/FEC_HQ_phase_ecu_fx.c @@ -2556,7 +2556,11 @@ static void fec_ecu_dft_fx( *exp = s_min( *exp, 15 ); } +#ifdef OPTIMIZE_FFT_STACK + DoRTFTn_fx( Tfr32, Tfi32, NULL, *Nfft ); +#else DoRTFTn_fx( Tfr32, Tfi32, *Nfft ); +#endif N_LP = shr( *Nfft, 1 ); L_tmp = L_deposit_l( 0 ); diff --git a/lib_dec/FEC_fx.c b/lib_dec/FEC_fx.c index fe2780a8600c7dc9457102ef016ee65929d384a1..9674ab9c7a52191baf2822939be8c0716a021e1d 100644 --- a/lib_dec/FEC_fx.c +++ b/lib_dec/FEC_fx.c @@ -5,15 +5,20 @@ #include #include "options.h" /* Compilation switches */ #include "cnst.h" /* Common constants */ -#include "rom_com.h" /* Common static table prototypes */ +#include "rom_com.h" /* Common static table prototypes */ #include "rom_dec.h" /* Decoder static table prototypes */ #include "prot_fx.h" /* Function prototypes */ #include "basop_util.h" + + /*-------------------------------------------------------------------* * Local function prototypes *-------------------------------------------------------------------*/ + static void pulseRes_preCalc( Word16 *cond1, Word16 *cond2, Word32 *cond3, Word16 new_pit, Word16 Tc, Word16 L_frame ); void gain_dec_bfi_fx( Word16 *past_qua_en ); + + /*======================================================================*/ /* FUNCTION : FEC_exc_estim_fx() */ /*----------------------------------------------------------------------*/ @@ -47,7 +52,6 @@ void gain_dec_bfi_fx( Word16 *past_qua_en ); /* _ (Word16[]) voice_factors_fx : frame error rate Q15 */ /* _ (Word16[]) FEC_pitch_fx(tmp_tc): FEC pitch Q6 */ /*-----------------------------------------------------------------------*/ - /* _ (Word16) st_fx->lp_gainp_fx : FEC -low-pass filtered pitch gain Q14 */ /* _ (Word16) st_fx->seed :FEC-seed for random generator for excitation*/ /* _ (Word16) st_fx->bfi_pitch_fx : LP filter coefficient */ @@ -57,7 +61,6 @@ void gain_dec_bfi_fx( Word16 *past_qua_en ); /* _ None */ /*=======================================================================*/ - void FEC_exc_estim_fx( Decoder_State *st_fx, /* i/o: Decoder static memory */ const Word16 L_frame, /* i : length of the frame */ @@ -73,7 +76,6 @@ void FEC_exc_estim_fx( Word16 *tmp_noise /* o : long-term noise energy Q0 */ ) { - Word16 exc2_buf[L_FRAME16k + MODE1_L_FIR_FER - 1]; Word16 gainCNG, new_pit /*Q0*/; /* Q3*/ Word16 exp; @@ -152,7 +154,6 @@ void FEC_exc_estim_fx( move16(); } - pitch_pred_linear_fit( st_fx->nbLostCmpt, st_fx->last_good, @@ -170,13 +171,11 @@ void FEC_exc_estim_fx( new_pit /*Q0 int*/ = shl( round_fx( predPitchLag ), 0 ); } - /*-----------------------------------------------------------------* * estimate subframe pitch values for the FEC frame *-----------------------------------------------------------------*/ /* initialize pitch to the long-term pitch */ - *tmp_tc = st_fx->bfi_pitch_fx; move16(); /*Q6*/ IF( EQ_16( L_frame, L_FRAME ) ) @@ -473,7 +472,11 @@ void FEC_exc_estim_fx( move16(); /* Transform to frequency domain */ +#ifdef HARMONIZE_DCT + edct_16fx( exc, exc_dct_in, st_fx->L_frame, 5 ); +#else edct_16fx( exc, exc_dct_in, st_fx->L_frame, 5, st_fx->element_mode ); +#endif /* Reset unvaluable part of the adaptive (pitch) excitation contribution */ max_len = sub( st_fx->L_frame, Diff_len ); @@ -498,6 +501,7 @@ void FEC_exc_estim_fx( /*-----------------------------------------------------------------* * Replicate the last spectrum in case the last good frame was coded by GSC *-----------------------------------------------------------------*/ + test(); test(); test(); @@ -514,7 +518,11 @@ void FEC_exc_estim_fx( *tmp_noise = shr_r( st_fx->lp_gainc_fx, 3 ); /*Q0*/ move16(); /* Transform back to time domain */ +#ifdef HARMONIZE_DCT + edct_16fx( exc_dct_in, exc, st_fx->L_frame, 5 ); +#else edct_16fx( exc_dct_in, exc, st_fx->L_frame, 5, st_fx->element_mode ); +#endif } ELSE { @@ -739,12 +747,19 @@ void FEC_exc_estim_fx( move16(); st_fx->bfi_pitch_frame = st_fx->L_frame; move16(); + return; } /*calculates some conditions for Pulse resynchronization to take place*/ -static void pulseRes_preCalc( Word16 *cond1, Word16 *cond2, Word32 *cond3, Word16 new_pit, Word16 Tc, Word16 L_frame ) +static void pulseRes_preCalc( + Word16 *cond1, + Word16 *cond2, + Word32 *cond3, + Word16 new_pit, + Word16 Tc, + Word16 L_frame ) { Word16 tmp_pit, tmp_pit_e, tmp_frame, tmp_frame_e; Word32 tmp_pit2; @@ -773,8 +788,11 @@ static void pulseRes_preCalc( Word16 *cond1, Word16 *cond2, Word32 *cond3, Word1 BASOP_SATURATE_WARNING_ON_EVS *cond3 = L_sub( L_mult0( -1, tmp_pit ), tmp_pit2 ); move32(); + + return; } + /*-------------------------------------------------------------------* * gain_dec_bfi() * diff --git a/lib_dec/LD_music_post_filter_fx.c b/lib_dec/LD_music_post_filter_fx.c index fc3a94a77f54dc0d326ea7198ece000da7f1a8c8..989a47e8da2dcb590071d8468291060ca850e210 100644 --- a/lib_dec/LD_music_post_filter_fx.c +++ b/lib_dec/LD_music_post_filter_fx.c @@ -877,7 +877,11 @@ void Prep_music_postP_fx( * EDCT and back to 16 bits *------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( exc16, dct_buffer_out, DCT_L_POST, 6 ); +#else edct_16fx( exc16, dct_buffer_out, DCT_L_POST, 6, EVS_MONO ); +#endif *qdct = Q_exc; move16(); @@ -957,7 +961,11 @@ void Post_music_postP_fx( * Go back to time domain *------------------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( dct_buffer_in, exc16, DCT_L_POST, 6 ); +#else edct_16fx( dct_buffer_in, exc16, DCT_L_POST, 6, EVS_MONO ); +#endif Copy( exc16 + OFFSET2, exc2, L_FRAME ); diff --git a/lib_dec/core_switching_dec_fx.c b/lib_dec/core_switching_dec_fx.c index b4f34830d36dbdb6787ee69da6e1c63fa64f74cd..a0db01d31658acc6cc1b1449483163e460961930 100644 --- a/lib_dec/core_switching_dec_fx.c +++ b/lib_dec/core_switching_dec_fx.c @@ -111,7 +111,11 @@ void bw_switching_pre_proc_fx( * Calculate frequency energy of 0~3.2kHz and 3.2~6.4kHz the ACELP core synthesis *-------------------------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( old_syn_12k8_16k_fx, syn_dct_fx, L_FRAME, 6 ); +#else edct_16fx( old_syn_12k8_16k_fx, syn_dct_fx, L_FRAME, 6, st_fx->element_mode ); +#endif L_tmp = L_deposit_l( 0 ); FOR( i = 0; i < L_FRAME / 2; i++ ) diff --git a/lib_dec/dec_tcx_fx.c b/lib_dec/dec_tcx_fx.c index 65ad45e3230401be45af293a06986135108bb0de..f8cd015ea741030a2d82702c3c871251a28dbb28 100644 --- a/lib_dec/dec_tcx_fx.c +++ b/lib_dec/dec_tcx_fx.c @@ -2181,7 +2181,11 @@ void IMDCT_fx( Word32 *x, Word16 x_e, Word16 *old_syn_overl, Word16 *syn_Overl_T /* DCT */ Q = sub( 31, x_e ); +#ifdef HARMONIZE_DCT + edct_fx( x, tmp_buf, L_frame, &Q, EVS_MONO ); +#else edct_fx( x, tmp_buf, L_frame, &Q ); +#endif /* scale by sqrt(L / NORM_MDCT_FACTOR) */ tmp1 = mult_r( shl( L_frame, 4 ), 26214 /*128.f / NORM_MDCT_FACTOR Q15*/ ); /* 4Q11 */ @@ -2713,7 +2717,11 @@ static void TCX_MDCT_Inverse_qwin_fx( R2 = shr( r, 1 ); x_e = sub( 15, x_e ); +#ifdef HARMONIZE_DCT + edct_fx( x, tmp_buf + L2, add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &x_e, EVS_MONO ); +#else edct_fx( x, tmp_buf + L2, add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &x_e ); +#endif x_e = sub( 15, x_e ); fac = TCX_MDCT_Inverse_GetScaleFactor( add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &fac_e ); /* exp(fac_e) */ @@ -3305,7 +3313,11 @@ void IMDCT_ivas_fx( } ELSE { +#ifdef HARMONIZE_DCT + edct_fx( x_fx, xn_buf_fx_32 + add( shr( overlap, 1 ), nz ), L_frame, &q_xn_buf_fx_32, IVAS_SCE /* just cannot be EVS_MONO */ ); +#else edct_ivas_fx( x_fx, xn_buf_fx_32 + add( shr( overlap, 1 ), nz ), L_frame, &q_xn_buf_fx_32 ); +#endif Word16 res_m, res_e; res_e = 0; move16(); diff --git a/lib_dec/gs_dec_amr_wb_fx.c b/lib_dec/gs_dec_amr_wb_fx.c index 326c2f91976fd0087b8b6bdc60edd852091dd4d1..2670ebfef8896ac43888822603e0d906c9d6a14d 100644 --- a/lib_dec/gs_dec_amr_wb_fx.c +++ b/lib_dec/gs_dec_amr_wb_fx.c @@ -450,10 +450,21 @@ void improv_amr_wb_gs_fx( * Do the excitation modification according to the content * Go back to time domain -> Overwrite exctiation *------------------------------------------------------------*/ + +#ifdef HARMONIZE_DCT + edct_16fx( exc2_fx, dct_exc_in_fx, L_FRAME, 6 ); +#else edct_16fx( exc2_fx, dct_exc_in_fx, L_FRAME, 6, EVS_MONO ); +#endif + gs_dec_amr_wb_fx( core_brate, seed_tcx, dct_exc_in_fx, Q_exc2, dct_exc_out_fx, Q_exc2, pitch_buf_fx, lt_voice_fac_fx, clas, coder_type ); +#ifdef HARMONIZE_DCT + edct_16fx( dct_exc_out_fx, exc2_fx, L_FRAME, 6 ); +#else edct_16fx( dct_exc_out_fx, exc2_fx, L_FRAME, 6, EVS_MONO ); +#endif + /*------------------------------------------------------------* * Redo core synthesis at 12k8 Hz with the modified excitation *------------------------------------------------------------*/ diff --git a/lib_dec/gs_dec_fx.c b/lib_dec/gs_dec_fx.c index f50533b4b5c7f3bf7208cd0c9b1e67fe19ad3354..a24bb88dcc0b332fa826902788ab63bcc877ae12 100644 --- a/lib_dec/gs_dec_fx.c +++ b/lib_dec/gs_dec_fx.c @@ -358,7 +358,11 @@ void decod_audio_fx( * DCT transform *---------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( exc, dct_epit, st_fx->L_frame, 7 ); +#else edct_16fx( exc, dct_epit, st_fx->L_frame, 7, st_fx->element_mode ); +#endif /*---------------------------------------------------------------* * Reset unvaluable part of the adaptive (pitch) excitation contribution @@ -497,8 +501,13 @@ void decod_audio_fx( * iDCT transform *--------------------------------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( dct_epit, exc, st_fx->L_frame, 7 ); + edct_16fx( exc_wo_nf, exc_wo_nf, st_fx->L_frame, 7 ); +#else edct_16fx( dct_epit, exc, st_fx->L_frame, 7, st_fx->element_mode ); edct_16fx( exc_wo_nf, exc_wo_nf, st_fx->L_frame, 7, st_fx->element_mode ); +#endif /*----------------------------------------------------------------------* * Remove potential pre-echo in case an onset has been detected diff --git a/lib_dec/hf_synth_fx.c b/lib_dec/hf_synth_fx.c index 9c4807835e765b727e4b973535a819da59fd409d..32fcb3311c7c63a1adf9fc45d2ad4a56fbfb2975 100644 --- a/lib_dec/hf_synth_fx.c +++ b/lib_dec/hf_synth_fx.c @@ -716,7 +716,11 @@ void hf_synth_amr_wb_fx( Copy_Scale_sig_16_32_DEPREC( exc, exc32, L_FRAME, qdct ); /* Qexc + qdct */ qdct = add( qdct, Q_exc ); +#ifdef HARMONIZE_DCT + edct_fx( exc32, dct_exc32, L_FRAME, &qdct, EVS_MONO ); +#else edct_fx( exc32, dct_exc32, L_FRAME, &qdct ); +#endif q_tmp = Exp32Array( L_FRAME, dct_exc32 ); q_tmp = sub( q_tmp, 16 ); @@ -1006,7 +1010,11 @@ void hf_synth_amr_wb_fx( qhf = sub( q_tmp, 1 ); Copy_Scale_sig_16_32_DEPREC( dct_hb, dct_hb32, L_FRAME16k, qhf ); /* qhf + qdct */ qhf = add( qhf, qdct ); +#ifdef HARMONIZE_DCT + edct_fx( dct_hb32, exc16k32, L_FRAME16k, &qhf, EVS_MONO ); +#else edct_fx( dct_hb32, exc16k32, L_FRAME16k, &qhf ); +#endif q_tmp = Exp32Array( L_FRAME16k, exc16k32 ); q_tmp = sub( q_tmp, 16 ); Copy_Scale_sig_32_16( exc16k32, exc16k, L_FRAME16k, q_tmp ); /* qhf + qtmp */ diff --git a/lib_dec/ivas_td_low_rate_dec_fx.c b/lib_dec/ivas_td_low_rate_dec_fx.c index 916329e66cc4f890b44b393331d3d02465993943..200443dbc7a0a8363d517462b787005caa19c21b 100644 --- a/lib_dec/ivas_td_low_rate_dec_fx.c +++ b/lib_dec/ivas_td_low_rate_dec_fx.c @@ -134,9 +134,14 @@ void tdm_low_rate_dec_fx( * iDCT transform *--------------------------------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( dct_epit, exc, L_FRAME, find_guarded_bits_fx( L_FRAME ) ); + edct_16fx( exc_wo_nf_fx, exc_wo_nf_fx, L_FRAME, find_guarded_bits_fx( L_FRAME ) ); +#else edct_16fx( dct_epit, exc, L_FRAME, find_guarded_bits_fx( L_FRAME ), IVAS_CPE_TD ); edct_16fx( exc_wo_nf_fx, exc_wo_nf_fx, L_FRAME, find_guarded_bits_fx( L_FRAME ), IVAS_CPE_TD ); +#endif IF( bwe_exc != NULL ) { diff --git a/lib_enc/bw_detect_fx.c b/lib_enc/bw_detect_fx.c index 2e04b986ff3813d6ba305070632b388d41938a4b..200ff9098b699c165999646dc08c7b3ad0d65ec6 100644 --- a/lib_enc/bw_detect_fx.c +++ b/lib_enc/bw_detect_fx.c @@ -308,7 +308,11 @@ void bw_detect_fx( in_win32[i] = L_mult( *pt++, *pt1-- ); move32(); } +#ifdef HARMONIZE_DCT + edct_fx( in_win32, spect32, BWD_TOTAL_WIDTH, &Q_dct, EVS_MONO ); +#else edct_fx( in_win32, spect32, BWD_TOTAL_WIDTH, &Q_dct /*,st->element_mode*/ ); +#endif FOR( i = 0; i < BWD_TOTAL_WIDTH; i++ ) { diff --git a/lib_enc/cod_tcx_fx.c b/lib_enc/cod_tcx_fx.c index 7fae11949351474a433d43eab4d4fb491affb158..e56c306718401da940e1c354ebaf75e059fd72ba 100644 --- a/lib_enc/cod_tcx_fx.c +++ b/lib_enc/cod_tcx_fx.c @@ -2490,7 +2490,11 @@ void QuantizeSpectrum_fx( /* DCT */ Q = sub( 31, *spectrum_e ); +#ifdef HARMONIZE_DCT + edct_fx( spectrum, tmp_buf, L_frame, &Q, EVS_MONO ); +#else edct_fx( spectrum, tmp_buf, L_frame, &Q ); +#endif /* scale by sqrt(L / NORM_MDCT_FACTOR) */ tmp1 = mult_r( shl( L_frame, 4 ), 26214 /*128.f / NORM_MDCT_FACTOR Q15*/ ); /* 4Q11 */ @@ -3722,7 +3726,11 @@ void coder_tcx_fx( Q = sub( Q, tmp2 ); /* DCT */ +#ifdef HARMONIZE_DCT + edct_fx( tmp_buf, spectrum, L_frame, &Q, EVS_MONO ); +#else edct_fx( tmp_buf, spectrum, L_frame, &Q ); +#endif *spectrum_e = sub( 31, Q ); move16(); } @@ -4404,7 +4412,11 @@ void InternalTCXDecoder_fx( /* DCT */ Q = sub( 31, *spectrum_e ); +#ifdef HARMONIZE_DCT + edct_fx( spectrum_fx, tmp_buf, L_frame, &Q, IVAS_SCE /* just cannot be EVS_MONO */ ); +#else edct_ivas_fx( spectrum_fx, tmp_buf, L_frame, &Q ); +#endif /* scale by sqrt(L / NORM_MDCT_FACTOR) */ tmp1 = mult_r( shl( L_frame, 4 ), 26214 /*128.f / NORM_MDCT_FACTOR Q15*/ ); /* 4Q11 */ diff --git a/lib_enc/ext_sig_ana_fx.c b/lib_enc/ext_sig_ana_fx.c index f5b91249ff996820a63ea5fb9ce49eb484f9d713..e34cf4f996161c18755c977b09c4bcfe121e5c4e 100644 --- a/lib_enc/ext_sig_ana_fx.c +++ b/lib_enc/ext_sig_ana_fx.c @@ -372,7 +372,11 @@ void core_signal_analysis_high_bitrate_fx( Q = sub( Q, tmp2 ); /* DCT */ +#ifdef HARMONIZE_DCT + edct_fx( tmp_buf, spectrum[frameno], L_subframe, &Q, EVS_MONO ); +#else edct_fx( tmp_buf, spectrum[frameno], L_subframe, &Q ); +#endif *spectrum_e = sub( 31, Q ); } ELSE @@ -945,7 +949,11 @@ void core_signal_analysis_high_bitrate_ivas_fx( Word16 Q; Q = q_out_wtda; +#ifdef HARMONIZE_DCT + edct_fx( tcx20Win_32, hTcxEnc->spectrum_fx[frameno], L_subframe, &Q, st->element_mode ); +#else edct_ivas_fx( tcx20Win_32, hTcxEnc->spectrum_fx[frameno], L_subframe, &Q ); +#endif hTcxEnc->spectrum_e[frameno] = sub( 31, Q ); move16(); diff --git a/lib_enc/gs_enc_fx.c b/lib_enc/gs_enc_fx.c index 6c7a8b6c07422bf44396daf75a3c55847c36d3ff..7d2ac0208bada96f5a2886210cd10ab9d8b448fb 100644 --- a/lib_enc/gs_enc_fx.c +++ b/lib_enc/gs_enc_fx.c @@ -255,8 +255,13 @@ void encod_audio_fx( * DCT transform *---------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( exc, dct_epit, st_fx->L_frame, 7 ); + edct_16fx( res, dct_res, st_fx->L_frame, 7 ); +#else edct_16fx( exc, dct_epit, st_fx->L_frame, 7, st_fx->element_mode ); edct_16fx( res, dct_res, st_fx->L_frame, 7, st_fx->element_mode ); +#endif /*---------------------------------------------------------------* * Calculate energy dynamics @@ -372,8 +377,13 @@ void encod_audio_fx( * iDCT transform *--------------------------------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( dct_epit, exc, st_fx->L_frame, 7 ); + edct_16fx( exc_wo_nf, exc_wo_nf, st_fx->L_frame, 7 ); +#else edct_16fx( dct_epit, exc, st_fx->L_frame, 7, st_fx->element_mode ); edct_16fx( exc_wo_nf, exc_wo_nf, st_fx->L_frame, 7, st_fx->element_mode ); +#endif IF( NE_16( st_fx->element_mode, EVS_MONO ) ) { diff --git a/lib_enc/ivas_mdct_core_enc_fx.c b/lib_enc/ivas_mdct_core_enc_fx.c index b8098a62fee4ae8ee86aa054d61837f0814af121..c3b32f7e2a4d9f04daac3b8782b4fd45618bffe6 100644 --- a/lib_enc/ivas_mdct_core_enc_fx.c +++ b/lib_enc/ivas_mdct_core_enc_fx.c @@ -1109,6 +1109,55 @@ void enc_prm_igf_mdct( return; } +#ifdef OPTIMIZE_FFT_STACK +/*-------------------------------------------------------------------* + * compute_power_spec() + * + * + *-------------------------------------------------------------------*/ + +static void compute_power_spec( + TCX_ENC_HANDLE hTcxEnc, + Word32 *mdst_spectrum_fx[NB_DIV], + Word32 powerSpec_fx[N_MAX], + Word16 *q_pow, + const Word16 n, + const Word16 L_subframeTCX ) +{ + Word16 i; + Word64 powerSpec_fx64[N_MAX]; + + IF( hTcxEnc->fUseTns[n] ) + { + FOR( i = 0; i < L_subframeTCX; i++ ) + { + powerSpec_fx64[i] = W_mult_32_32( hTcxEnc->spectrum_fx[n][i], hTcxEnc->spectrum_fx[n][i] ); + move64(); + } + *q_pow = W_norm_arr( powerSpec_fx64, L_subframeTCX ); + } + ELSE + { + FOR( i = 0; i < L_subframeTCX; i++ ) + { + powerSpec_fx64[i] = W_mac_32_32( W_mult_32_32( mdst_spectrum_fx[n][i], mdst_spectrum_fx[n][i] ), hTcxEnc->spectrum_fx[n][i], hTcxEnc->spectrum_fx[n][i] ); + move64(); + } + *q_pow = W_norm_arr( powerSpec_fx64, L_subframeTCX ); + } + + FOR( i = 0; i < L_subframeTCX; i++ ) + { + powerSpec_fx64[i] = W_shl( powerSpec_fx64[i], *q_pow ); + move64(); + powerSpec_fx[i] = W_extract_h( powerSpec_fx64[i] ); + move32(); + } + + return; +} +#endif + /*-------------------------------------------------------------------* * ivas_mdct_core_whitening_enc() * @@ -1144,7 +1193,9 @@ void ivas_mdct_core_whitening_enc_fx( Word32 temp_buffer[15 * L_FRAME48k / 8]; Word32 *windowedSignal_fx[CPE_CHANNELS]; Word32 *powerSpec_fx = orig_spectrum_long[0]; +#ifndef OPTIMIZE_FFT_STACK Word64 powerSpec_fx64[N_MAX]; +#endif Word16 nrg_fx; /* Q15 */ Encoder_State *st, **sts; Word32 scf_fx[CPE_CHANNELS][NB_DIV][M]; @@ -1912,6 +1963,9 @@ void ivas_mdct_core_whitening_enc_fx( move16(); FOR( n = 0; n < nSubframes; n++ ) { +#ifdef OPTIMIZE_FFT_STACK + compute_power_spec( st->hTcxEnc, mdst_spectrum_fx[ch], powerSpec_fx, &q_pow, n, L_subframeTCX ); +#else IF( st->hTcxEnc->fUseTns[n] ) { FOR( i = 0; i < L_subframeTCX; i++ ) @@ -1938,6 +1992,7 @@ void ivas_mdct_core_whitening_enc_fx( powerSpec_fx[i] = W_extract_h( powerSpec_fx64[i] ); move32(); } +#endif IF( mct_on ) { FOR( i = 0; i < L_subframeTCX; i++ ) diff --git a/lib_enc/ivas_td_low_rate_enc_fx.c b/lib_enc/ivas_td_low_rate_enc_fx.c index 8ca5a4a5029a9986997db3831e7a3f96777e4464..17ca57065537275e0fd4ce110501a4e89a23466c 100644 --- a/lib_enc/ivas_td_low_rate_enc_fx.c +++ b/lib_enc/ivas_td_low_rate_enc_fx.c @@ -94,7 +94,11 @@ void tdm_low_rate_enc_fx( * DCT transform of the residual and create a subsample residual *---------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( res, dct_res_fx, L_FRAME, 7 ); +#else edct_16fx( res, dct_res_fx, L_FRAME, 7, st->element_mode ); +#endif /*--------------------------------------------------------------------------------------* * GSC encoder @@ -119,9 +123,14 @@ void tdm_low_rate_enc_fx( * iDCT transform *--------------------------------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( dct_epit_fx, exc_fx, L_FRAME, 7 ); + edct_16fx( exc_wo_nf_fx, exc_wo_nf_fx, L_FRAME, 7 ); +#else edct_16fx( dct_epit_fx, exc_fx, L_FRAME, 7, st->element_mode ); edct_16fx( exc_wo_nf_fx, exc_wo_nf_fx, L_FRAME, 7, st->element_mode ); +#endif /*--------------------------------------------------------------------------------------* * Remove potential pre-echo in case an onset has been detected diff --git a/lib_rend/ivas_reverb_fft_filter_fx.c b/lib_rend/ivas_reverb_fft_filter_fx.c index dcf13c15345f125a0ee3f2fba9f8ca7f4c187e80..6f4e3052daffcb1c0808776707094080b78d246d 100644 --- a/lib_rend/ivas_reverb_fft_filter_fx.c +++ b/lib_rend/ivas_reverb_fft_filter_fx.c @@ -101,6 +101,7 @@ static void fft_wrapper_2ch_fx( Word32 left_re_fx, left_im_fx, right_re_fx, right_im_fx; DoRTFTn_fx_ivas( buffer_L_fx, buffer_R_fx, fft_size ); + /* separating left and right channel spectra */ buffer_L_fx[0] = L_shl( buffer_L_fx[0], 1 ); // Qx + 1 move32(); @@ -171,6 +172,8 @@ static void ifft_wrapper_2ch_fx( return; } + + /*-----------------------------------------------------------------------------------------* * Function ivas_reverb_t2f_f2t_init() *