From d31efd874cefe3766a5c0a9e10731f4b352f0cd0 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Thu, 11 Jul 2024 12:24:19 +0530 Subject: [PATCH 1/2] BASOP and instrumentation for some of the lib_com files --- lib_com/enr_1_az.c | 11 +- lib_com/env_adj.c | 4 +- lib_com/env_stab.c | 4 + lib_com/env_stab_trans.c | 3 +- lib_com/fft_fx.c | 2428 +++++++++++++++++++++++++++---- lib_com/tcq_position_arith_fx.c | 324 +++-- lib_com/tcx_ltp_fx.c | 122 +- lib_com/tcx_mdct_fx.c | 92 +- lib_com/tcx_mdct_window.c | 66 +- lib_com/tcx_utils_fx.c | 76 +- lib_com/tec_com.c | 58 +- lib_com/tns_base.c | 47 +- 12 files changed, 2636 insertions(+), 599 deletions(-) diff --git a/lib_com/enr_1_az.c b/lib_com/enr_1_az.c index f55d6917b..2f663deaf 100644 --- a/lib_com/enr_1_az.c +++ b/lib_com/enr_1_az.c @@ -84,6 +84,7 @@ Word16 Enr_1_Az_fx( /* o : impulse response energy Q3 Word32 L_tmp, L_tmp2; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif /* Find the impulse response */ @@ -99,12 +100,14 @@ Word16 Enr_1_Az_fx( /* o : impulse response energy Q3 /* h1_in Q11, h1_out Q10 */ L_tmp = L_mult( a0, 1 << 13 ); /* Q25 = L_mult(Q11,Q13) */ *y = round_fx( L_tmp ); /* Q25 to Q9 */ - L_tmp2 = L_mult( *y, *y ); /* Q19 = L_mult(Q9,Q9) */ + move16(); + L_tmp2 = L_mult( *y, *y ); /* Q19 = L_mult(Q9,Q9) */ y++; L_tmp = L_msu( 0, Aq[1], y[-1] ); /* Q23 = L_mult(Q14,Q9) */ L_tmp = L_shl( L_tmp, q ); - *y = round_fx( L_tmp ); /* Q25 to Q9 */ + *y = round_fx( L_tmp ); /* Q25 to Q9 */ + move16(); L_tmp2 = L_mac( L_tmp2, *y, *y ); /* Q19 = L_mult(Q9,Q9) */ y++; @@ -124,6 +127,7 @@ Word16 Enr_1_Az_fx( /* o : impulse response energy Q3 #ifdef BASOP_NOGLOB L_tmp = L_shl_o( L_tmp, q, &Overflow ); *y = round_fx_o( L_tmp, &Overflow ); + move16(); L_tmp2 = L_mac_o( L_tmp2, *y, *y, &Overflow ); #else L_tmp = L_shl( L_tmp, q ); @@ -148,6 +152,7 @@ Word16 Enr_1_Az_fx( /* o : impulse response energy Q3 #ifdef BASOP_NOGLOB L_tmp = L_shl_o( L_tmp, q, &Overflow ); *y = round_fx_o( L_tmp, &Overflow ); + move16(); L_tmp2 = L_mac_o( L_tmp2, *y, *y, &Overflow ); #else L_tmp = L_shl( L_tmp, q ); @@ -158,6 +163,7 @@ Word16 Enr_1_Az_fx( /* o : impulse response energy Q3 } #ifdef BASOP_NOGLOB *Overflow_out = Overflow; + move32(); return round_fx_o( L_tmp2, Overflow_out ); /* Q19 to Q3 */ #else return round_fx( L_tmp2 ); /* Q19 to Q3 */ @@ -170,6 +176,7 @@ Word16 Enr_1_Az_fx( /* o : impulse response energy Q3 ) { Flag Overflow = 0; + move32(); return Enr_1_Az_fx_o( Aq, len, &Overflow ); } #endif diff --git a/lib_com/env_adj.c b/lib_com/env_adj.c index e6b6e57bd..874bf4aa8 100644 --- a/lib_com/env_adj.c +++ b/lib_com/env_adj.c @@ -259,7 +259,8 @@ void env_adj_fx( } ELSE { - adj[i] = MAX_16; /* Q15, 1.0f (saturated) */ + adj[i] = MAX_16; /* Q15, 1.0f (saturated) */ + move16(); IF( EQ_16( att_state, 1 ) ) /* End of attenuation region found */ { /* tmp = min(1, max(0, len-ENV_ADJ_START)*(1.0f/ENV_ADJ_INCL)); */ @@ -269,7 +270,6 @@ void env_adj_fx( tmp = round_fx( L_shl( L_mult0( s_max( 0, sub( len, ENV_ADJ_START_FX ) ), ENV_ADJ_INV_INCL_FX ), 16 ) ); /* Q15 (15+16-16) */ #endif tmp_diff = sub( MAX_16, tmp ); /* Q15 */ - move16(); FOR( j = start; j < i; j++ ) { /* adj[j] = max(tmp + (1-tmp)*adj[j],env_stab); */ diff --git a/lib_com/env_stab.c b/lib_com/env_stab.c index 4140a64dc..a05cb79ba 100644 --- a/lib_com/env_stab.c +++ b/lib_com/env_stab.c @@ -201,6 +201,7 @@ Word16 env_stability_fx( /* in Q15 */ Word16 inv_nb_sfm; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif IF( core_switching_flag ) @@ -377,6 +378,7 @@ Word16 env_stab_smo_fx( /* Q0 */ Word16 tmp, sum, exp; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif /* get previous state */ prev_state = maximum_fx( env_stab_state_p, NUM_ENV_STAB_PLC_STATES, &maxval ); @@ -415,8 +417,10 @@ Word16 env_stab_smo_fx( /* Q0 */ { #ifdef BASOP_NOGLOB env_stab_state_p[i] = round_fx_o( L_shl_o( L_mult_o( env_stab_state_p[i], tmp, &Overflow ), add( exp, 1 ), &Overflow ), &Overflow ); /* Q15 */ + move16(); #else env_stab_state_p[i] = round_fx( L_shl( L_mult( env_stab_state_p[i], tmp ), add( exp, 1 ) ) ); /* Q15 */ + move16(); #endif } diff --git a/lib_com/env_stab_trans.c b/lib_com/env_stab_trans.c index 2062f4e77..29473bd66 100644 --- a/lib_com/env_stab_trans.c +++ b/lib_com/env_stab_trans.c @@ -231,6 +231,7 @@ void env_stab_transient_detect_fx( FOR( blk = 0; blk < NUM_SUBFRAMES; blk++ ) { L_E_sub[blk] = L_deposit_l( 0 ); /* Q9 */ + move32(); FOR( i = 0; i < BANDS_PER_SUBFRAMES; i++ ) /* 9 times -> < 2^4 */ { @@ -291,7 +292,7 @@ void env_stab_transient_detect_fx( *no_att_hangover = ATT_LIM_HANGOVER; move16(); } - ELSE if ( *no_att_hangover > 0 ) + ELSE IF( *no_att_hangover > 0 ) { *no_att_hangover = sub( *no_att_hangover, 1 ); move16(); diff --git a/lib_com/fft_fx.c b/lib_com/fft_fx.c index 7a0d153eb..4fe9aac60 100644 --- a/lib_com/fft_fx.c +++ b/lib_com/fft_fx.c @@ -188,27 +188,48 @@ static void fft15_shift2( Word32 f4o7, f4o8, f4o9, f4o10, f4o11, f4o12, f4o13, f4o14, f4o15, f4o16, f4o17, f4o18, f4o19; in0 = Idx[0]; + move16(); in8 = Idx[n1]; + move16(); in16 = Idx[n1 * 2]; - in24 = Idx[n1 * 3]; + move16(); + in24 = Idx[i_mult( n1, 3 )]; + move16(); in32 = Idx[n1 * 4]; - in1 = Idx[n1 * 5]; + move16(); + in1 = Idx[i_mult( n1, 5 )]; + move16(); in9 = Idx[n1 * 6]; - in17 = Idx[n1 * 7]; + move16(); + in17 = Idx[i_mult( n1, 7 )]; + move16(); in25 = Idx[n1 * 8]; - in33 = Idx[n1 * 9]; + move16(); + in33 = Idx[i_mult( n1, 9 )]; + move16(); in2 = Idx[n1 * 10]; - in10 = Idx[n1 * 11]; + move16(); + in10 = Idx[i_mult( n1, 11 )]; + move16(); in18 = Idx[n1 * 12]; - in26 = Idx[n1 * 13]; + move16(); + in26 = Idx[i_mult( n1, 13 )]; + move16(); in34 = Idx[n1 * 14]; + move16(); f2i13 = zRe[in0]; + move32(); f2i14 = zIm[in0]; + move32(); f2i21 = zRe[in1]; + move32(); f2i22 = zRe[in2]; + move32(); f2i23 = zIm[in1]; + move32(); f2i24 = zIm[in2]; + move32(); f2i15 = L_add( f2i21, f2i22 ); f2i16 = Mpy_32_16_1( L_sub( f2i22, f2i21 ), FFT_15PONIT_WNK4 ); @@ -225,29 +246,41 @@ static void fft15_shift2( fi6 = L_sub( f2i20, f2i16 ); f3i1 = zRe[in9]; + move32(); f4i2 = zRe[in10]; + move32(); f4i3 = zRe[in8]; + move32(); f3i2 = L_add( f4i2, f4i3 ); f3i3 = L_sub( f3i1, L_shr( f3i2, 1 ) ); f3i4 = Mpy_32_16_1( L_sub( f4i3, f4i2 ), FFT_15PONIT_WNK4 ); f3i5 = zIm[in9]; + move32(); f4i4 = zIm[in10]; + move32(); f4i5 = zIm[in8]; + move32(); f3i6 = L_add( f4i4, f4i5 ); f3i7 = Mpy_32_16_1( L_sub( f4i4, f4i5 ), FFT_15PONIT_WNK4 ); f3i8 = L_sub( f3i5, L_shr( f3i6, 1 ) ); f3i9 = zRe[in33]; + move32(); f4i6 = zRe[in34]; + move32(); f4i7 = zRe[in32]; + move32(); f3i10 = L_add( f4i6, f4i7 ); f3i11 = L_sub( f3i9, L_shr( f3i10, 1 ) ); f3i12 = Mpy_32_16_1( L_sub( f4i7, f4i6 ), FFT_15PONIT_WNK4 ); f3i13 = zIm[in33]; + move32(); f4i8 = zIm[in34]; + move32(); f4i9 = zIm[in32]; + move32(); f3i14 = L_add( f4i8, f4i9 ); f3i15 = Mpy_32_16_1( L_sub( f4i8, f4i9 ), FFT_15PONIT_WNK4 ); f4i1 = L_sub( f3i13, L_shr( f3i14, 1 ) ); @@ -272,29 +305,41 @@ static void fft15_shift2( fi24 = L_add( fi22, fi23 ); f4i10 = zRe[in24]; + move32(); fo6 = zRe[in25]; + move32(); fo7 = zRe[in26]; + move32(); f4i11 = L_add( fo6, fo7 ); f4i12 = L_sub( f4i10, L_shr( f4i11, 1 ) ); f4i13 = Mpy_32_16_1( L_sub( fo7, fo6 ), FFT_15PONIT_WNK4 ); f4i14 = zIm[in24]; + move32(); fo8 = zIm[in25]; + move32(); fo9 = zIm[in26]; + move32(); f4i15 = L_add( fo8, fo9 ); f4i16 = Mpy_32_16_1( L_sub( fo8, fo9 ), FFT_15PONIT_WNK4 ); f4i17 = L_sub( f4i14, L_shr( f4i15, 1 ) ); f4i18 = zRe[in18]; + move32(); f2o10 = zRe[in16]; + move32(); f2o11 = zRe[in17]; + move32(); f4i19 = L_add( f2o10, f2o11 ); f4i20 = L_sub( f4i18, L_shr( f4i19, 1 ) ); fo1 = Mpy_32_16_1( L_sub( f2o11, f2o10 ), FFT_15PONIT_WNK4 ); fo2 = zIm[in18]; + move32(); f2o12 = zIm[in16]; + move32(); f2o13 = zIm[in17]; + move32(); fo3 = L_add( f2o12, f2o13 ); fo4 = Mpy_32_16_1( L_sub( f2o12, f2o13 ), FFT_15PONIT_WNK4 ); fo5 = L_sub( fo2, L_shr( fo3, 1 ) ); @@ -327,12 +372,17 @@ static void fft15_shift2( fo14 = L_add( Mpy_32_16_1( fo15, FFT_15PONIT_WNK3 ), Mpy_32_16_1( fo16, FFT_15PONIT_WNK2 ) ); zRe[in0] = L_add( fi1, fo11 ); + move32(); fo17 = L_add( fo10, fo12 ); zRe[in18] = L_sub( fo17, fo14 ); + move32(); zRe[in24] = L_add( fo17, fo14 ); + move32(); fo18 = L_sub( fo12, fo10 ); zRe[in9] = L_sub( fo18, fo13 ); + move32(); zRe[in33] = L_add( fo18, fo13 ); + move32(); f2o1 = Mpy_32_16_1( L_sub( f2i3, fi15 ), FFT_15PONIT_WNK1 ); f2o2 = L_add( f2i3, fi15 ); @@ -342,12 +392,17 @@ static void fft15_shift2( f2o4 = L_sub( Mpy_32_16_1( f2o6, FFT_15PONIT_WNK2 ), Mpy_32_16_1( f2o7, FFT_15PONIT_WNK3 ) ); f2o5 = L_add( Mpy_32_16_1( f2o6, FFT_15PONIT_WNK3 ), Mpy_32_16_1( f2o7, FFT_15PONIT_WNK2 ) ); zIm[in0] = L_add( fi2, f2o2 ); + move32(); f2o8 = L_add( f2o1, f2o3 ); zIm[in24] = L_sub( f2o8, f2o5 ); + move32(); zIm[in18] = L_add( f2o5, f2o8 ); + move32(); f2o9 = L_sub( f2o3, f2o1 ); zIm[in33] = L_sub( f2o9, f2o4 ); + move32(); zIm[in9] = L_add( f2o4, f2o9 ); + move32(); f2o14 = Mpy_32_16_1( L_sub( fi30, fi12 ), FFT_15PONIT_WNK1 ); f2o15 = L_add( fi30, fi12 ); @@ -357,12 +412,17 @@ static void fft15_shift2( f3o2 = L_sub( Mpy_32_16_1( f3o4, FFT_15PONIT_WNK2 ), Mpy_32_16_1( f3o5, FFT_15PONIT_WNK3 ) ); f3o3 = L_add( Mpy_32_16_1( f3o5, FFT_15PONIT_WNK2 ), Mpy_32_16_1( f3o4, FFT_15PONIT_WNK3 ) ); zRe[in2] = L_add( fi3, f2o15 ); + move32(); f3o6 = L_add( f2o14, f3o1 ); zRe[in17] = L_sub( f3o6, f3o3 ); + move32(); zRe[in26] = L_add( f3o6, f3o3 ); + move32(); f3o7 = L_sub( f3o1, f2o14 ); zRe[in8] = L_sub( f3o7, f3o2 ); + move32(); zRe[in32] = L_add( f3o7, f3o2 ); + move32(); f3o8 = Mpy_32_16_1( L_sub( f2i6, fi18 ), FFT_15PONIT_WNK1 ); f3o9 = L_add( f2i6, fi18 ); @@ -372,12 +432,17 @@ static void fft15_shift2( f3o11 = L_sub( Mpy_32_16_1( f3o13, FFT_15PONIT_WNK2 ), Mpy_32_16_1( f3o14, FFT_15PONIT_WNK3 ) ); f3o12 = L_add( Mpy_32_16_1( f3o14, FFT_15PONIT_WNK2 ), Mpy_32_16_1( f3o13, FFT_15PONIT_WNK3 ) ); zIm[in2] = L_add( fi6, f3o9 ); + move32(); f3o15 = L_add( f3o8, f3o10 ); zIm[in26] = L_sub( f3o15, f3o12 ); + move32(); zIm[in17] = L_add( f3o12, f3o15 ); + move32(); f4o1 = L_sub( f3o10, f3o8 ); zIm[in8] = L_add( f3o11, f4o1 ); + move32(); zIm[in32] = L_sub( f4o1, f3o11 ); + move32(); f4o2 = Mpy_32_16_1( L_sub( f2i9, fi21 ), FFT_15PONIT_WNK1 ); f4o3 = L_add( f2i9, fi21 ); @@ -387,13 +452,18 @@ static void fft15_shift2( f4o5 = L_add( Mpy_32_16_1( f4o8, FFT_15PONIT_WNK3 ), Mpy_32_16_1( f4o7, FFT_15PONIT_WNK2 ) ); f4o6 = L_sub( Mpy_32_16_1( f4o8, FFT_15PONIT_WNK2 ), Mpy_32_16_1( f4o7, FFT_15PONIT_WNK3 ) ); zIm[in1] = L_add( fi5, f4o3 ); + move32(); f4o9 = L_sub( f4o4, f4o2 ); f4o10 = L_add( f4o2, f4o4 ); zIm[in10] = L_add( f4o6, f4o9 ); + move32(); zIm[in34] = L_sub( f4o9, f4o6 ); + move32(); zIm[in25] = L_sub( f4o10, f4o5 ); + move32(); zIm[in16] = L_add( f4o5, f4o10 ); + move32(); f4o11 = Mpy_32_16_1( L_sub( f2i12, fi24 ), FFT_15PONIT_WNK1 ); f4o12 = L_add( f2i12, fi24 ); @@ -403,13 +473,18 @@ static void fft15_shift2( f4o14 = L_add( Mpy_32_16_1( f4o16, FFT_15PONIT_WNK2 ), Mpy_32_16_1( f4o17, FFT_15PONIT_WNK3 ) ); f4o15 = L_sub( Mpy_32_16_1( f4o17, FFT_15PONIT_WNK2 ), Mpy_32_16_1( f4o16, FFT_15PONIT_WNK3 ) ); zRe[in1] = L_add( fi4, f4o12 ); + move32(); f4o18 = L_sub( f4o13, f4o11 ); f4o19 = L_add( f4o11, f4o13 ); zRe[in10] = L_sub( f4o18, f4o15 ); + move32(); zRe[in34] = L_add( f4o18, f4o15 ); + move32(); zRe[in16] = L_sub( f4o19, f4o14 ); + move32(); zRe[in25] = L_add( f4o19, f4o14 ); + move32(); return; } @@ -441,20 +516,35 @@ static void fft15_shift8( Word32 f5o8, f5o9, f5o10, f5o11, f5o12, f5o13, f5o14, f5o15, f5o16, f5o17, f5o18, f5o19, f5o21, f5o22; in0 = Idx[0]; + move16(); in8 = Idx[n1]; + move16(); in16 = Idx[n1 * 2]; - in24 = Idx[n1 * 3]; + move16(); + in24 = Idx[i_mult( n1, 3 )]; + move16(); in32 = Idx[n1 * 4]; - in1 = Idx[n1 * 5]; + move16(); + in1 = Idx[i_mult( n1, 5 )]; + move16(); in9 = Idx[n1 * 6]; - in17 = Idx[n1 * 7]; + move16(); + in17 = Idx[i_mult( n1, 7 )]; + move16(); in25 = Idx[n1 * 8]; - in33 = Idx[n1 * 9]; + move16(); + in33 = Idx[i_mult( n1, 9 )]; + move16(); in2 = Idx[n1 * 10]; - in10 = Idx[n1 * 11]; + move16(); + in10 = Idx[i_mult( n1, 11 )]; + move16(); in18 = Idx[n1 * 12]; - in26 = Idx[n1 * 13]; + move16(); + in26 = Idx[i_mult( n1, 13 )]; + move16(); in34 = Idx[n1 * 14]; + move16(); f2i13 = zRe[in0]; f2i14 = zIm[in0]; @@ -478,17 +568,29 @@ static void fft15_shift8( fi6 = L_sub( f3i5, f3i1 ); f3i10 = zRe[in9]; + move32(); f4i11 = zRe[in10]; + move32(); f4i12 = zRe[in8]; + move32(); f3i14 = zIm[in9]; + move32(); f4i13 = zIm[in10]; + move32(); f4i14 = zIm[in8]; + move32(); f4i3 = zRe[in33]; + move32(); f4i15 = zRe[in34]; + move32(); fo1 = zRe[in32]; + move32(); f4i7 = zIm[in33]; + move32(); fo2 = zIm[in34]; + move32(); fo3 = zIm[in32]; + move32(); f3i11 = L_add( f4i11, f4i12 ); @@ -524,17 +626,29 @@ static void fft15_shift8( fi24 = L_add( fi22, fi23 ); fo4 = zRe[in24]; + move32(); f2o5 = zRe[in25]; + move32(); f2o6 = zRe[in26]; + move32(); fo8 = zIm[in24]; + move32(); f2o7 = zIm[in25]; + move32(); f2o8 = zIm[in26]; + move32(); fo12 = zRe[in18]; + move32(); f2o9 = zRe[in16]; + move32(); f2o10 = zRe[in17]; + move32(); f2o1 = zIm[in18]; + move32(); f2o11 = zIm[in16]; + move32(); f2o12 = zIm[in17]; + move32(); fo5 = L_add( f2o5, f2o6 ); @@ -577,12 +691,17 @@ static void fft15_shift8( f3o1 = L_sub( Mpy_32_16_1( f3o3, FFT_15PONIT_WNK2 ), Mpy_32_16_1( f3o4, FFT_15PONIT_WNK3 ) ); f3o2 = L_add( Mpy_32_16_1( f3o4, FFT_15PONIT_WNK2 ), Mpy_32_16_1( f3o3, FFT_15PONIT_WNK3 ) ); zRe[in0] = L_add( fi1, f2o14 ); + move32(); f3o5 = L_add( f2o13, f2o15 ); zRe[in24] = L_sub( f3o5, f3o2 ); + move32(); zRe[in18] = L_add( f3o5, f3o2 ); + move32(); f3o6 = L_sub( f2o15, f2o13 ); zRe[in33] = L_sub( f3o6, f3o1 ); + move32(); zRe[in9] = L_add( f3o6, f3o1 ); + move32(); f3o7 = Mpy_32_16_1( L_sub( f2i3, fi15 ), FFT_15PONIT_WNK1 ); f3o8 = L_add( f2i3, fi15 ); @@ -592,12 +711,17 @@ static void fft15_shift8( f3o10 = L_sub( Mpy_32_16_1( f3o12, FFT_15PONIT_WNK2 ), Mpy_32_16_1( f3o13, FFT_15PONIT_WNK3 ) ); f3o11 = L_add( Mpy_32_16_1( f3o13, FFT_15PONIT_WNK2 ), Mpy_32_16_1( f3o12, FFT_15PONIT_WNK3 ) ); zIm[in0] = L_add( fi2, f3o8 ); + move32(); f3o14 = L_add( f3o7, f3o9 ); zIm[in18] = L_sub( f3o14, f3o11 ); + move32(); zIm[in24] = L_add( f3o11, f3o14 ); + move32(); f3o15 = L_sub( f3o9, f3o7 ); zIm[in9] = L_sub( f3o15, f3o10 ); + move32(); zIm[in33] = L_add( f3o10, f3o15 ); + move32(); f4o1 = Mpy_32_16_1( L_sub( fi30, fi12 ), FFT_15PONIT_WNK1 ); f4o2 = L_add( fi30, fi12 ); @@ -607,12 +731,17 @@ static void fft15_shift8( f4o4 = L_sub( Mpy_32_16_1( f4o6, FFT_15PONIT_WNK2 ), Mpy_32_16_1( f4o7, FFT_15PONIT_WNK3 ) ); f4o5 = L_add( Mpy_32_16_1( f4o7, FFT_15PONIT_WNK2 ), Mpy_32_16_1( f4o6, FFT_15PONIT_WNK3 ) ); zRe[in2] = L_add( fi3, f4o2 ); + move32(); f4o8 = L_add( f4o1, f4o3 ); zRe[in26] = L_sub( f4o8, f4o5 ); + move32(); zRe[in17] = L_add( f4o8, f4o5 ); + move32(); f4o9 = L_sub( f4o3, f4o1 ); zRe[in32] = L_sub( f4o9, f4o4 ); + move32(); zRe[in8] = L_add( f4o9, f4o4 ); + move32(); f4o10 = Mpy_32_16_1( L_sub( f2i6, fi18 ), FFT_15PONIT_WNK1 ); f4o11 = L_add( f2i6, fi18 ); @@ -622,12 +751,17 @@ static void fft15_shift8( f4o13 = L_sub( Mpy_32_16_1( f4o15, FFT_15PONIT_WNK2 ), Mpy_32_16_1( f5o1, FFT_15PONIT_WNK3 ) ); f4o14 = L_add( Mpy_32_16_1( f5o1, FFT_15PONIT_WNK2 ), Mpy_32_16_1( f4o15, FFT_15PONIT_WNK3 ) ); zIm[in2] = L_add( fi6, f4o11 ); + move32(); f5o2 = L_add( f4o10, f4o12 ); zIm[in17] = L_sub( f5o2, f4o14 ); + move32(); zIm[in26] = L_add( f4o14, f5o2 ); + move32(); f5o3 = L_sub( f4o12, f4o10 ); zIm[in32] = L_add( f4o13, f5o3 ); + move32(); zIm[in8] = L_sub( f5o3, f4o13 ); + move32(); f5o4 = Mpy_32_16_1( L_sub( f2i9, fi21 ), FFT_15PONIT_WNK1 ); f5o5 = L_add( f2i9, fi21 ); @@ -637,13 +771,18 @@ static void fft15_shift8( f5o7 = L_add( Mpy_32_16_1( f5o9, FFT_15PONIT_WNK2 ), Mpy_32_16_1( f5o10, FFT_15PONIT_WNK3 ) ); f5o8 = L_sub( Mpy_32_16_1( f5o10, FFT_15PONIT_WNK2 ), Mpy_32_16_1( f5o9, FFT_15PONIT_WNK3 ) ); zIm[in1] = L_add( fi5, f5o5 ); + move32(); f5o11 = L_sub( f5o6, f5o4 ); f5o12 = L_add( f5o4, f5o6 ); zIm[in34] = L_add( f5o8, f5o11 ); + move32(); zIm[in10] = L_sub( f5o11, f5o8 ); + move32(); zIm[in16] = L_sub( f5o12, f5o7 ); + move32(); zIm[in25] = L_add( f5o7, f5o12 ); + move32(); f5o13 = Mpy_32_16_1( L_sub( f2i12, fi24 ), FFT_15PONIT_WNK1 ); f5o14 = L_add( f2i12, fi24 ); @@ -653,13 +792,18 @@ static void fft15_shift8( f5o16 = L_add( Mpy_32_16_1( f5o18, FFT_15PONIT_WNK2 ), Mpy_32_16_1( f5o19, FFT_15PONIT_WNK3 ) ); f5o17 = L_sub( Mpy_32_16_1( f5o19, FFT_15PONIT_WNK2 ), Mpy_32_16_1( f5o18, FFT_15PONIT_WNK3 ) ); zRe[in1] = L_add( fi4, f5o14 ); + move32(); f5o21 = L_sub( f5o15, f5o13 ); f5o22 = L_add( f5o13, f5o15 ); zRe[in34] = L_sub( f5o21, f5o17 ); + move32(); zRe[in10] = L_add( f5o21, f5o17 ); + move32(); zRe[in25] = L_sub( f5o22, f5o16 ); + move32(); zRe[in16] = L_add( f5o22, f5o16 ); + move32(); return; } @@ -681,17 +825,28 @@ static void fft5_shift1( Word16 in1, in2, in3, in4, in5; in1 = Idx[0]; + move16(); in2 = Idx[n1]; + move16(); in3 = Idx[n1 * 2]; - in4 = Idx[n1 * 3]; + move16(); + in4 = Idx[i_mult( n1, 3 )]; + move16(); in5 = Idx[n1 * 4]; + move16(); fi1 = zRe[in1]; + move32(); fi2 = zIm[in1]; + move32(); fo3 = zRe[in2]; + move32(); fo4 = zRe[in5]; + move32(); fo6 = zRe[in3]; + move32(); fo7 = zRe[in4]; + move32(); fo5 = L_add( fo3, fo4 ); fo8 = L_add( fo6, fo7 ); @@ -701,9 +856,13 @@ static void fft5_shift1( fi6 = L_sub( fo3, fo4 ); fo3 = zIm[in2]; + move32(); fo4 = zIm[in5]; + move32(); fo6 = zIm[in3]; + move32(); fo7 = zIm[in4]; + move32(); fo5 = L_add( fo3, fo4 ); fo8 = L_add( fo6, fo7 ); @@ -713,7 +872,9 @@ static void fft5_shift1( fo2 = Mpy_32_16_1( L_sub( fo5, fo8 ), FFT_15PONIT_WNK1 ); zRe[in1] = L_add( fi1, fi3 ); + move32(); zIm[in1] = L_add( fi2, fi8 ); + move32(); fo3 = L_add( Mpy_32_16_1( fi7, FFT_15PONIT_WNK2 ), Mpy_32_16_1( fo1, FFT_15PONIT_WNK3 ) ); fo4 = L_sub( Mpy_32_16_1( fo1, FFT_15PONIT_WNK2 ), Mpy_32_16_1( fi7, FFT_15PONIT_WNK3 ) ); @@ -722,9 +883,13 @@ static void fft5_shift1( fo6 = L_sub( fo7, fi5 ); zRe[in2] = L_add( fo5, fo3 ); + move32(); zRe[in3] = L_sub( fo6, fo4 ); + move32(); zRe[in4] = L_add( fo6, fo4 ); + move32(); zRe[in5] = L_sub( fo5, fo3 ); + move32(); fo3 = L_add( Mpy_32_16_1( fi6, FFT_15PONIT_WNK2 ), Mpy_32_16_1( fi4, FFT_15PONIT_WNK3 ) ); fo4 = L_sub( Mpy_32_16_1( fi4, FFT_15PONIT_WNK2 ), Mpy_32_16_1( fi6, FFT_15PONIT_WNK3 ) ); @@ -733,9 +898,13 @@ static void fft5_shift1( fo6 = L_sub( fo7, fo2 ); zIm[in2] = L_sub( fo5, fo3 ); + move32(); zIm[in3] = L_add( fo4, fo6 ); + move32(); zIm[in4] = L_sub( fo6, fo4 ); + move32(); zIm[in5] = L_add( fo3, fo5 ); + move32(); return; } @@ -757,17 +926,28 @@ static void fft5_shift4( Word16 in1, in2, in3, in4, in5; in1 = Idx[0]; + move16(); in2 = Idx[n1]; + move16(); in3 = Idx[n1 * 2]; - in4 = Idx[n1 * 3]; + move16(); + in4 = Idx[i_mult( n1, 3 )]; + move16(); in5 = Idx[n1 * 4]; + move16(); fi1 = zRe[in1]; + move32(); fi2 = zIm[in1]; + move32(); fo3 = zRe[in2]; + move32(); fo4 = zRe[in5]; + move32(); fo6 = zRe[in3]; + move32(); fo7 = zRe[in4]; + move32(); fo5 = L_add( fo3, fo4 ); fo8 = L_add( fo6, fo7 ); @@ -777,9 +957,13 @@ static void fft5_shift4( fi6 = L_sub( fo3, fo4 ); fo3 = zIm[in2]; + move32(); fo4 = zIm[in5]; + move32(); fo6 = zIm[in3]; + move32(); fo7 = zIm[in4]; + move32(); fo5 = L_add( fo3, fo4 ); fo8 = L_add( fo6, fo7 ); @@ -789,7 +973,9 @@ static void fft5_shift4( fo2 = Mpy_32_16_1( L_sub( fo5, fo8 ), FFT_15PONIT_WNK1 ); zRe[in1] = L_add( fi1, fi3 ); + move32(); zIm[in1] = L_add( fi2, fi8 ); + move32(); fo3 = L_add( Mpy_32_16_1( fi7, FFT_15PONIT_WNK2 ), Mpy_32_16_1( fo1, FFT_15PONIT_WNK3 ) ); fo4 = L_sub( Mpy_32_16_1( fo1, FFT_15PONIT_WNK2 ), Mpy_32_16_1( fi7, FFT_15PONIT_WNK3 ) ); @@ -798,9 +984,13 @@ static void fft5_shift4( fo6 = L_sub( fo7, fi5 ); zRe[in2] = L_sub( fo5, fo3 ); + move32(); zRe[in4] = L_sub( fo6, fo4 ); + move32(); zRe[in3] = L_add( fo6, fo4 ); + move32(); zRe[in5] = L_add( fo5, fo3 ); + move32(); fo3 = L_add( Mpy_32_16_1( fi6, FFT_15PONIT_WNK2 ), Mpy_32_16_1( fi4, FFT_15PONIT_WNK3 ) ); fo4 = L_sub( Mpy_32_16_1( fi4, FFT_15PONIT_WNK2 ), Mpy_32_16_1( fi6, FFT_15PONIT_WNK3 ) ); @@ -809,9 +999,13 @@ static void fft5_shift4( fo6 = L_sub( fo7, fo2 ); zIm[in3] = L_sub( fo6, fo4 ); + move32(); zIm[in2] = L_add( fo3, fo5 ); + move32(); zIm[in4] = L_add( fo4, fo6 ); + move32(); zIm[in5] = L_sub( fo5, fo3 ); + move32(); return; } @@ -832,17 +1026,28 @@ static void fft5_32( Word16 in1, in2, in3, in4, in5; in1 = Idx[0]; + move16(); in2 = Idx[32]; + move16(); in3 = Idx[64]; + move16(); in4 = Idx[96]; + move16(); in5 = Idx[128]; + move16(); fi1 = zRe[in1]; + move32(); fi2 = zIm[in1]; + move32(); fo3 = zRe[in2]; + move32(); fo4 = zRe[in5]; + move32(); fo6 = zRe[in3]; + move32(); fo7 = zRe[in4]; + move32(); fo5 = L_add( fo3, fo4 ); fo8 = L_add( fo6, fo7 ); @@ -852,9 +1057,13 @@ static void fft5_32( fi6 = L_sub( fo3, fo4 ); fo3 = zIm[in2]; + move32(); fo4 = zIm[in5]; + move32(); fo6 = zIm[in3]; + move32(); fo7 = zIm[in4]; + move32(); fo5 = L_add( fo3, fo4 ); fo8 = L_add( fo6, fo7 ); @@ -864,7 +1073,9 @@ static void fft5_32( fo2 = Mpy_32_16_1( L_sub( fo5, fo8 ), FFT_15PONIT_WNK1 ); zRe[in1] = L_add( fi1, fi3 ); + move32(); zIm[in1] = L_add( fi2, fi8 ); + move32(); fo3 = L_add( Mpy_32_16_1( fi7, FFT_15PONIT_WNK2 ), Mpy_32_16_1( fo1, FFT_15PONIT_WNK3 ) ); fo4 = L_sub( Mpy_32_16_1( fo1, FFT_15PONIT_WNK2 ), Mpy_32_16_1( fi7, FFT_15PONIT_WNK3 ) ); @@ -873,9 +1084,13 @@ static void fft5_32( fo6 = L_sub( fo7, fi5 ); zRe[in2] = L_add( fo6, fo4 ); + move32(); zRe[in3] = L_add( fo5, fo3 ); + move32(); zRe[in4] = L_sub( fo5, fo3 ); + move32(); zRe[in5] = L_sub( fo6, fo4 ); + move32(); fo3 = L_add( Mpy_32_16_1( fi6, FFT_15PONIT_WNK2 ), Mpy_32_16_1( fi4, FFT_15PONIT_WNK3 ) ); fo4 = L_sub( Mpy_32_16_1( fi4, FFT_15PONIT_WNK2 ), Mpy_32_16_1( fi6, FFT_15PONIT_WNK3 ) ); @@ -884,9 +1099,13 @@ static void fft5_32( fo6 = L_sub( fo7, fo2 ); zIm[in2] = L_sub( fo6, fo4 ); + move32(); zIm[in3] = L_sub( fo5, fo3 ); + move32(); zIm[in4] = L_add( fo3, fo5 ); + move32(); zIm[in5] = L_add( fo4, fo6 ); + move32(); return; } @@ -907,8 +1126,11 @@ static void fft64( FOR( i = 0; i < 64; i++ ) { id = Idx[i]; + move16(); z[2 * i] = x[id]; - z[2 * i + 1] = y[id]; + move32(); + z[add( shl( i, 1 ), 1 )] = y[id]; + move32(); } cdftForw( 128, z, Ip_fft64, w_fft64_fx ); @@ -916,9 +1138,13 @@ static void fft64( FOR( i = 0; i < 64; i++ ) { jd = Odx_fft64[i]; + move16(); id = Idx[jd]; + move16(); x[id] = z[2 * i]; - y[id] = z[2 * i + 1]; + move32(); + y[id] = z[add( shl( i, 1 ), 1 )]; + move32(); } return; @@ -942,8 +1168,11 @@ static void fft32_15( FOR( i = 0; i < 32; i++ ) { id = Idx[i]; + move16(); z[2 * i] = x[id]; - z[2 * i + 1] = y[id]; + move32(); + z[add( shl( i, 1 ), 1 )] = y[id]; + move32(); } cdftForw( 64, z, Ip_fft32, w_fft32_fx ); @@ -951,9 +1180,13 @@ static void fft32_15( FOR( i = 0; i < 32; i++ ) { jd = Odx_fft32_15[i]; + move16(); id = Idx[jd]; + move16(); x[id] = z[2 * i]; - y[id] = z[2 * i + 1]; + move32(); + y[id] = z[add( shl( i, 1 ), 1 )]; + move32(); } return; @@ -976,8 +1209,11 @@ static void fft32_5( FOR( i = 0; i < 32; i++ ) { id = Idx[i]; + move16(); z[2 * i] = x[id]; - z[2 * i + 1] = y[id]; + move32(); + z[add( shl( i, 1 ), 1 )] = y[id]; + move32(); } cdftForw( 64, z, Ip_fft32, w_fft32_fx ); @@ -985,9 +1221,13 @@ static void fft32_5( FOR( i = 0; i < 32; i++ ) { jd = Odx_fft32_5[i]; + move16(); id = Idx[jd]; + move16(); x[id] = z[2 * i]; - y[id] = z[2 * i + 1]; + move32(); + y[id] = z[add( shl( i, 1 ), 1 )]; + move32(); } return; @@ -1010,8 +1250,11 @@ static void fft16_ivas( FOR( i = 0; i < 16; i++ ) { id = Idx[i]; + move16(); z[2 * i] = x[id]; - z[2 * i + 1] = y[id]; + move32(); + z[add( shl( i, 1 ), 1 )] = y[id]; + move32(); } cdftForw( 32, z, Ip_fft16, w_fft16_fx ); @@ -1019,9 +1262,13 @@ static void fft16_ivas( FOR( i = 0; i < 16; i++ ) { jd = Odx_fft16[i]; + move16(); id = Idx[jd]; + move16(); x[id] = z[2 * i]; - y[id] = z[2 * i + 1]; + move32(); + y[id] = z[add( shl( i, 1 ), 1 )]; + move32(); } return; @@ -1044,8 +1291,11 @@ static void fft8( FOR( i = 0; i < 8; i++ ) { id = Idx[i]; + move16(); z[2 * i] = x[id]; - z[2 * i + 1] = y[id]; + move32(); + z[add( shl( i, 1 ), 1 )] = y[id]; + move32(); } cdftForw( 16, z, Ip_fft8, w_fft8_fx ); @@ -1053,8 +1303,11 @@ static void fft8( FOR( i = 0; i < 8; i++ ) { id = Idx[i]; + move16(); x[id] = z[2 * i]; - y[id] = z[2 * i + 1]; + move32(); + y[id] = z[add( shl( i, 1 ), 1 )]; + move32(); } return; @@ -1077,8 +1330,11 @@ static void fft8_5( FOR( i = 0; i < 8; i++ ) { id = Idx[i]; + move16(); z[2 * i] = x[id]; - z[2 * i + 1] = y[id]; + move32(); + z[add( shl( i, 1 ), 1 )] = y[id]; + move32(); } cdftForw( 16, z, Ip_fft8, w_fft8_fx ); @@ -1086,9 +1342,13 @@ static void fft8_5( FOR( i = 0; i < 8; i++ ) { jd = Odx_fft8_5[i]; + move16(); id = Idx[jd]; + move16(); x[id] = z[2 * i]; - y[id] = z[2 * i + 1]; + move32(); + y[id] = z[add( shl( i, 1 ), 1 )]; + move32(); } return; } @@ -1110,17 +1370,28 @@ static void fft5_8( Word16 in1, in2, in3, in4, in5; in1 = Idx[0]; + move16(); in2 = Idx[n1]; + move16(); in3 = Idx[n1 * 2]; - in4 = Idx[n1 * 3]; + move16(); + in4 = Idx[i_mult( n1, 3 )]; + move16(); in5 = Idx[n1 * 4]; + move16(); fi1 = zRe[in1]; + move32(); fi2 = zIm[in1]; + move32(); fo3 = zRe[in2]; + move32(); fo4 = zRe[in5]; + move32(); fo6 = zRe[in3]; + move32(); fo7 = zRe[in4]; + move32(); fo5 = L_add( fo3, fo4 ); fo8 = L_add( fo6, fo7 ); @@ -1130,9 +1401,13 @@ static void fft5_8( fi6 = L_sub( fo3, fo4 ); fo3 = zIm[in2]; + move32(); fo4 = zIm[in5]; + move32(); fo6 = zIm[in3]; + move32(); fo7 = zIm[in4]; + move32(); fo5 = L_add( fo3, fo4 ); fo8 = L_add( fo6, fo7 ); @@ -1142,7 +1417,9 @@ static void fft5_8( fo2 = Mpy_32_16_1( L_sub( fo5, fo8 ), FFT_15PONIT_WNK1 ); zRe[in1] = L_add( fi1, fi3 ); + move32(); zIm[in1] = L_add( fi2, fi8 ); + move32(); fo3 = L_add( Mpy_32_16_1( fi7, FFT_15PONIT_WNK2 ), Mpy_32_16_1( fo1, FFT_15PONIT_WNK3 ) ); fo4 = L_sub( Mpy_32_16_1( fo1, FFT_15PONIT_WNK2 ), Mpy_32_16_1( fi7, FFT_15PONIT_WNK3 ) ); @@ -1151,9 +1428,13 @@ static void fft5_8( fo6 = L_sub( fo7, fi5 ); zRe[in2] = L_sub( fo6, fo4 ); + move32(); zRe[in3] = L_sub( fo5, fo3 ); + move32(); zRe[in5] = L_add( fo6, fo4 ); + move32(); zRe[in4] = L_add( fo5, fo3 ); + move32(); fo3 = L_add( Mpy_32_16_1( fi6, FFT_15PONIT_WNK2 ), Mpy_32_16_1( fi4, FFT_15PONIT_WNK3 ) ); fo4 = L_sub( Mpy_32_16_1( fi4, FFT_15PONIT_WNK2 ), Mpy_32_16_1( fi6, FFT_15PONIT_WNK3 ) ); @@ -1162,9 +1443,13 @@ static void fft5_8( fo6 = L_sub( fo7, fo2 ); zIm[in2] = L_add( fo4, fo6 ); + move32(); zIm[in3] = L_add( fo3, fo5 ); + move32(); zIm[in4] = L_sub( fo5, fo3 ); + move32(); zIm[in5] = L_sub( fo6, fo4 ); + move32(); return; } @@ -1186,8 +1471,11 @@ static void fft4_5( FOR( i = 0; i < 4; i++ ) { id = Idx[i]; + move16(); z[2 * i] = x[id]; - z[2 * i + 1] = y[id]; + move32(); + z[add( shl( i, 1 ), 1 )] = y[id]; + move32(); } cdftForw( 8, z, Ip_fft4, w_fft4_fx ); @@ -1195,9 +1483,13 @@ static void fft4_5( FOR( i = 0; i < 4; i++ ) { jd = Odx_fft4_5[i]; + move16(); id = Idx[jd]; + move16(); x[id] = z[2 * i]; - y[id] = z[2 * i + 1]; + move32(); + y[id] = z[add( shl( i, 1 ), 1 )]; + move32(); } return; } @@ -1218,17 +1510,28 @@ static void fft5_4( Word16 in1, in2, in3, in4, in5; in1 = Idx[0]; + move16(); in2 = Idx[n1]; + move16(); in3 = Idx[n1 * 2]; - in4 = Idx[n1 * 3]; + move16(); + in4 = Idx[i_mult( n1, 3 )]; + move16(); in5 = Idx[n1 * 4]; + move16(); fi1 = zRe[in1]; + move32(); fi2 = zIm[in1]; + move32(); fo3 = zRe[in2]; + move32(); fo4 = zRe[in5]; + move32(); fo6 = zRe[in3]; + move32(); fo7 = zRe[in4]; + move32(); fo5 = L_add( fo3, fo4 ); fo8 = L_add( fo6, fo7 ); @@ -1238,9 +1541,13 @@ static void fft5_4( fi6 = L_sub( fo3, fo4 ); fo3 = zIm[in2]; + move32(); fo4 = zIm[in5]; + move32(); fo6 = zIm[in3]; + move32(); fo7 = zIm[in4]; + move32(); fo5 = L_add( fo3, fo4 ); fo8 = L_add( fo6, fo7 ); @@ -1250,7 +1557,9 @@ static void fft5_4( fo2 = Mpy_32_16_1( L_sub( fo5, fo8 ), FFT_15PONIT_WNK1 ); zRe[in1] = L_add( fi1, fi3 ); + move32(); zIm[in1] = L_add( fi2, fi8 ); + move32(); fo3 = L_add( Mpy_32_16_1( fi7, FFT_15PONIT_WNK2 ), Mpy_32_16_1( fo1, FFT_15PONIT_WNK3 ) ); fo4 = L_sub( Mpy_32_16_1( fo1, FFT_15PONIT_WNK2 ), Mpy_32_16_1( fi7, FFT_15PONIT_WNK3 ) ); @@ -1259,9 +1568,13 @@ static void fft5_4( fo6 = L_sub( fo7, fi5 ); zRe[in2] = L_sub( fo5, fo3 ); + move32(); zRe[in4] = L_sub( fo6, fo4 ); + move32(); zRe[in3] = L_add( fo6, fo4 ); + move32(); zRe[in5] = L_add( fo5, fo3 ); + move32(); fo3 = L_add( Mpy_32_16_1( fi6, FFT_15PONIT_WNK2 ), Mpy_32_16_1( fi4, FFT_15PONIT_WNK3 ) ); fo4 = L_sub( Mpy_32_16_1( fi4, FFT_15PONIT_WNK2 ), Mpy_32_16_1( fi6, FFT_15PONIT_WNK3 ) ); @@ -1270,9 +1583,13 @@ static void fft5_4( fo6 = L_sub( fo7, fo2 ); zIm[in2] = L_add( fo3, fo5 ); + move32(); zIm[in3] = L_sub( fo6, fo4 ); + move32(); zIm[in4] = L_add( fo4, fo6 ); + move32(); zIm[in5] = L_sub( fo5, fo3 ); + move32(); return; } @@ -1292,7 +1609,7 @@ void DoRTFT80_fx( /* Applying 16-point FFT for 5 times based on the address table Idx_dortft80 */ FOR( j = 0; j < 5; j++ ) { - fft16_ivas( x, y, Idx_dortft80 + 16 * j ); + fft16_ivas( x, y, Idx_dortft80 + shl( j, 4 ) ); } /* Applying 5-point FFT for 16 times based on the address table Idx_dortft80 */ @@ -1319,7 +1636,7 @@ void DoRTFT120_fx( /* Applying 8-point FFT for 15 times based on the address table Idx_dortft120 */ FOR( j = 0; j < 15; j++ ) { - fft8( x, y, Idx_dortft120 + 8 * j ); + fft8( x, y, Idx_dortft120 + shl( j, 3 ) ); } /* Applying 15-point FFT for 8 times based on the address table Idx_dortft120 */ @@ -1346,7 +1663,7 @@ void DoRTFT160_fx( /* Applying 32-point FFT for 5 times based on the address table Idx_dortft160 */ FOR( j = 0; j < 5; j++ ) { - fft32_5( x, y, Idx_dortft160 + 32 * j ); + fft32_5( x, y, Idx_dortft160 + shl( j, 5 ) ); } /* Applying 5-point FFT for 32 times based on the address table Idx_dortft160 */ @@ -1373,7 +1690,7 @@ void DoRTFT320_fx( /* Applying 64-point FFT for 5 times based on the address table Idx_dortft160 */ FOR( j = 0; j < 5; j++ ) { - fft64( x, y, Idx_dortft320 + 64 * j ); + fft64( x, y, Idx_dortft320 + shl( j, 6 ) ); } /* Applying 5-point FFT for 64 times based on the address table Idx_dortft160 */ @@ -1400,7 +1717,7 @@ void DoRTFT480_fx( /* Applying 32-point FFT for 15 times based on the address table Idx_dortft160 */ FOR( j = 0; j < 15; j++ ) { - fft32_15( x, y, Idx_dortft480 + 32 * j ); + fft32_15( x, y, Idx_dortft480 + shl( j, 5 ) ); } /* Applying 5-point FFT for 32 times based on the address table Idx_dortft160 */ @@ -1426,7 +1743,7 @@ void DoRTFT40_fx( /* Applying 8-point FFT for 5 times based on the address table Idx_dortft40 */ FOR( j = 0; j < 5; j++ ) { - fft8_5( x, y, Idx_dortft40 + 8 * j ); + fft8_5( x, y, Idx_dortft40 + shl( j, 3 ) ); } /* Applying 5-point FFT for 8 times based on the address table Idx_dortft40 */ @@ -1453,7 +1770,7 @@ void DoRTFT20_fx( /* Applying 4-point FFT for 5 times based on the address table Idx_dortft20 */ FOR( j = 0; j < 5; j++ ) { - fft4_5( x, y, Idx_dortft20 + 4 * j ); + fft4_5( x, y, Idx_dortft20 + shl( j, 2 ) ); } /* Applying 5-point FFT for 4 times based on the address table Idx_dortft20 */ @@ -1482,17 +1799,23 @@ void DoRTFT128_fx( FOR( i = 0; i < 128; i++ ) { z[2 * i] = x[i]; - z[2 * i + 1] = y[i]; + move32(); + z[add( shl( i, 1 ), 1 )] = y[i]; + move32(); } cdftForw( 256, z, Ip_fft128, w_fft128_fx ); x[0] = z[0]; + move32(); y[0] = z[1]; + move32(); FOR( i = 1; i < 128; i++ ) { x[128 - i] = z[2 * i]; - y[128 - i] = z[2 * i + 1]; + move32(); + y[128 - i] = z[add( shl( i, 1 ), 1 )]; + move32(); } return; @@ -1535,22 +1858,30 @@ static void bitrv2_SR( IF( EQ_16( n, 64 ) ) { m = 4; + move16(); l = -1; + move16(); } ELSE IF( EQ_16( n, 256 ) ) { m = 8; + move16(); l = -1; + move16(); } ELSE IF( EQ_16( n, 16 ) ) { m = 2; + move16(); l = -1; + move16(); } ELSE { l = n; + move16(); m = 1; + move16(); WHILE( shl( m, 3 ) < l ) { @@ -1562,7 +1893,7 @@ static void bitrv2_SR( m2 = shl( m, 1 ); - IF( EQ_16( l, 0 ) ) + IF( l == 0 ) { FOR( k = 0; k < m; k++ ) { @@ -1571,55 +1902,95 @@ static void bitrv2_SR( j1 = add( shl( j, 1 ), ip[k] ); k1 = add( shl( k, 1 ), ip[j] ); xr = a[j1]; + move32(); xi = a[j1 + 1]; + move32(); yr = a[k1]; + move32(); yi = a[k1 + 1]; + move32(); a[j1] = yr; + move32(); a[j1 + 1] = yi; + move32(); a[k1] = xr; + move32(); a[k1 + 1] = xi; - j1 += m2; - k1 += 2 * m2; + move32(); + j1 = add( j1, m2 ); + k1 = add( k1, 2 * m2 ); xr = a[j1]; + move32(); xi = a[j1 + 1]; + move32(); yr = a[k1]; + move32(); yi = a[k1 + 1]; + move32(); a[j1] = yr; + move32(); a[j1 + 1] = yi; + move32(); a[k1] = xr; + move32(); a[k1 + 1] = xi; + move32(); j1 = add( j1, m2 ); k1 = sub( k1, m2 ); xr = a[j1]; + move32(); xi = a[j1 + 1]; + move32(); yr = a[k1]; + move32(); yi = a[k1 + 1]; + move32(); a[j1] = yr; + move32(); a[j1 + 1] = yi; + move32(); a[k1] = xr; + move32(); a[k1 + 1] = xi; + move32(); j1 = add( j1, m2 ); k1 = add( k1, shl( m2, 1 ) ); xr = a[j1]; + move32(); xi = a[j1 + 1]; + move32(); yr = a[k1]; + move32(); yi = a[k1 + 1]; + move32(); a[j1] = yr; + move32(); a[j1 + 1] = yi; + move32(); a[k1] = xr; + move32(); a[k1 + 1] = xi; + move32(); } j1 = add( shl( k, 1 ), add( m2, ip[k] ) ); k1 = add( j1, m2 ); xr = a[j1]; + move32(); xi = a[j1 + 1]; + move32(); yr = a[k1]; + move32(); yi = a[k1 + 1]; + move32(); a[j1] = yr; + move32(); a[j1 + 1] = yi; + move32(); a[k1] = xr; + move32(); a[k1 + 1] = xi; + move32(); } } ELSE @@ -1631,23 +2002,39 @@ static void bitrv2_SR( j1 = add( shl( j, 1 ), ip[k] ); k1 = add( shl( k, 1 ), ip[j] ); xr = a[j1]; + move32(); xi = a[j1 + 1]; + move32(); yr = a[k1]; + move32(); yi = a[k1 + 1]; + move32(); a[j1] = yr; + move32(); a[j1 + 1] = yi; + move32(); a[k1] = xr; + move32(); a[k1 + 1] = xi; + move32(); j1 = add( j1, m2 ); k1 = add( k1, m2 ); xr = a[j1]; + move32(); xi = a[j1 + 1]; + move32(); yr = a[k1]; + move32(); yi = a[k1 + 1]; + move32(); a[j1] = yr; + move32(); a[j1 + 1] = yi; + move32(); a[k1] = xr; + move32(); a[k1 + 1] = xi; + move32(); } } } @@ -1670,12 +2057,14 @@ static void cftfsub( Word32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; l = 2; + move16(); IF( GT_16( n, 8 ) ) { cft1st( n, a, w ); l = 8; + move16(); WHILE( LT_16( shl( l, 2 ), n ) ) { @@ -1700,13 +2089,21 @@ static void cftfsub( x3r = L_sub( a[j2], a[j3] ); x3i = L_sub( a[j2 + 1], a[j3 + 1] ); a[j] = L_add( x0r, x2r ); + move32(); a[j + 1] = L_add( x0i, x2i ); + move32(); a[j2] = L_sub( x0r, x2r ); + move32(); a[j2 + 1] = L_sub( x0i, x2i ); + move32(); a[j1] = L_sub( x1r, x3i ); + move32(); a[j1 + 1] = L_add( x1i, x3r ); + move32(); a[j3] = L_add( x1r, x3i ); + move32(); a[j3 + 1] = L_sub( x1i, x3r ); + move32(); } } ELSE @@ -1717,9 +2114,13 @@ static void cftfsub( x0r = L_sub( a[j], a[j1] ); x0i = L_sub( a[j + 1], a[j1 + 1] ); a[j] = L_add( a[j], a[j1] ); + move32(); a[j + 1] = L_add( a[j + 1], a[j1 + 1] ); + move32(); a[j1] = x0r; + move32(); a[j1 + 1] = x0i; + move32(); } } @@ -1750,13 +2151,21 @@ static void cft1st( x3r = L_sub( a[4], a[6] ); x3i = L_sub( a[5], a[7] ); a[0] = L_add( x0r, x2r ); + move32(); a[1] = L_add( x0i, x2i ); + move32(); a[4] = L_sub( x0r, x2r ); + move32(); a[5] = L_sub( x0i, x2i ); + move32(); a[2] = L_sub( x1r, x3i ); + move32(); a[3] = L_add( x1i, x3r ); + move32(); a[6] = L_add( x1r, x3i ); + move32(); a[7] = L_sub( x1i, x3r ); + move32(); wk1r = w[2]; x0r = L_add( a[8], a[10] ); x0i = L_add( a[9], a[11] ); @@ -1767,27 +2176,40 @@ static void cft1st( x3r = L_sub( a[12], a[14] ); x3i = L_sub( a[13], a[15] ); a[8] = L_add( x0r, x2r ); + move32(); a[9] = L_add( x0i, x2i ); + move32(); a[12] = L_sub( x2i, x0i ); + move32(); a[13] = L_sub( x0r, x2r ); + move32(); x0r = L_sub( x1r, x3i ); x0i = L_add( x1i, x3r ); a[10] = Mpy_32_16_1( L_sub( x0r, x0i ), wk1r ); + move32(); a[11] = Mpy_32_16_1( L_add( x0r, x0i ), wk1r ); + move32(); x0r = L_add( x3i, x1r ); x0i = L_sub( x3r, x1i ); a[14] = Mpy_32_16_1( L_sub( x0i, x0r ), wk1r ); + move32(); a[15] = Mpy_32_16_1( L_add( x0i, x0r ), wk1r ); + move32(); k1 = 0; + move16(); FOR( j = 16; j < n; j += 16 ) { k1 = add( k1, 2 ); k2 = shl( k1, 1 ); wk2r = w[k1]; + move16(); wk2i = w[k1 + 1]; + move16(); wk1r = w[k2]; + move16(); wk1i = w[k2 + 1]; + move16(); wtmp = mult_r( wk2i, wk1i ); wk3r = sub( wk1r, wtmp ); wk3r = sub( wk3r, wtmp ); @@ -1803,21 +2225,31 @@ static void cft1st( x3r = L_sub( a[j + 4], a[j + 6] ); x3i = L_sub( a[j + 5], a[j + 7] ); a[j] = L_add( x0r, x2r ); + move32(); a[j + 1] = L_add( x0i, x2i ); + move32(); x0r = L_sub( x0r, x2r ); x0i = L_sub( x0i, x2i ); a[j + 4] = L_sub( Mpy_32_16_1( x0r, wk2r ), Mpy_32_16_1( x0i, wk2i ) ); + move32(); a[j + 5] = L_add( Mpy_32_16_1( x0i, wk2r ), Mpy_32_16_1( x0r, wk2i ) ); + move32(); x0r = L_sub( x1r, x3i ); x0i = L_add( x1i, x3r ); a[j + 2] = L_sub( Mpy_32_16_1( x0r, wk1r ), Mpy_32_16_1( x0i, wk1i ) ); + move32(); a[j + 3] = L_add( Mpy_32_16_1( x0i, wk1r ), Mpy_32_16_1( x0r, wk1i ) ); + move32(); x0r = L_add( x1r, x3i ); x0i = L_sub( x1i, x3r ); a[j + 6] = L_sub( Mpy_32_16_1( x0r, wk3r ), Mpy_32_16_1( x0i, wk3i ) ); + move32(); a[j + 7] = L_add( Mpy_32_16_1( x0i, wk3r ), Mpy_32_16_1( x0r, wk3i ) ); + move32(); wk1r = w[k2 + 2]; + move16(); wk1i = w[k2 + 3]; + move16(); wtmp = mult_r( wk2r, wk1i ); wk3r = sub( wk1r, wtmp ); wk3r = sub( wk3r, wtmp ); @@ -1833,19 +2265,26 @@ static void cft1st( x3r = L_sub( a[j + 12], a[j + 14] ); x3i = L_sub( a[j + 13], a[j + 15] ); a[j + 8] = L_add( x0r, x2r ); + move32(); a[j + 9] = L_add( x0i, x2i ); + move32(); x0r = L_sub( x0r, x2r ); x0i = L_sub( x0i, x2i ); a[j + 12] = L_negate( L_add( Mpy_32_16_1( x0r, wk2i ), Mpy_32_16_1( x0i, wk2r ) ) ); + move32(); a[j + 13] = L_sub( Mpy_32_16_1( x0r, wk2r ), Mpy_32_16_1( x0i, wk2i ) ); + move32(); x0r = L_sub( x1r, x3i ); x0i = L_add( x1i, x3r ); a[j + 10] = L_sub( Mpy_32_16_1( x0r, wk1r ), Mpy_32_16_1( x0i, wk1i ) ); + move32(); a[j + 11] = L_add( Mpy_32_16_1( x0i, wk1r ), Mpy_32_16_1( x0r, wk1i ) ); x0r = L_add( x1r, x3i ); x0i = L_sub( x1i, x3r ); a[j + 14] = L_sub( Mpy_32_16_1( x0r, wk3r ), Mpy_32_16_1( x0i, wk3i ) ); + move32(); a[j + 15] = L_add( Mpy_32_16_1( x0i, wk3r ), Mpy_32_16_1( x0r, wk3i ) ); + move32(); } return; @@ -1882,16 +2321,25 @@ static void cftmdl( x3r = L_sub( a[j2], a[j3] ); x3i = L_sub( a[j2 + 1], a[j3 + 1] ); a[j] = L_add( x0r, x2r ); + move32(); a[j + 1] = L_add( x0i, x2i ); + move32(); a[j2] = L_sub( x0r, x2r ); + move32(); a[j2 + 1] = L_sub( x0i, x2i ); + move32(); a[j1] = L_sub( x1r, x3i ); + move32(); a[j1 + 1] = L_add( x1i, x3r ); + move32(); a[j3] = L_add( x1r, x3i ); + move32(); a[j3 + 1] = L_sub( x1i, x3r ); + move32(); } wk1r = w[2]; + move16(); FOR( j = m; j < l + m; j += 2 ) { j1 = add( j, l ); @@ -1906,29 +2354,42 @@ static void cftmdl( x3r = L_sub( a[j2], a[j3] ); x3i = L_sub( a[j2 + 1], a[j3 + 1] ); a[j] = L_add( x0r, x2r ); + move32(); a[j + 1] = L_add( x0i, x2i ); + move32(); a[j2] = L_sub( x2i, x0i ); + move32(); a[j2 + 1] = L_sub( x0r, x2r ); + move32(); x0r = L_sub( x1r, x3i ); x0i = L_add( x1i, x3r ); a[j1] = Mpy_32_16_1( L_sub( x0r, x0i ), wk1r ); + move32(); a[j1 + 1] = Mpy_32_16_1( L_add( x0r, x0i ), wk1r ); + move32(); x0r = L_add( x3i, x1r ); x0i = L_sub( x3r, x1i ); a[j3] = Mpy_32_16_1( L_sub( x0i, x0r ), wk1r ); + move32(); a[j3 + 1] = Mpy_32_16_1( L_add( x0i, x0r ), wk1r ); + move32(); } k1 = 0; + move16(); m2 = shl( m, 1 ); FOR( k = m2; k < n; k += m2 ) { k1 = add( k1, 2 ); k2 = shl( k1, 1 ); wk2r = w[k1]; + move16(); wk2i = w[k1 + 1]; + move16(); wk1r = w[k2]; + move16(); wk1i = w[k2 + 1]; + move16(); wtmp = mult_r( wk2i, wk1i ); wk3r = sub( wk1r, wtmp ); wk3r = sub( wk3r, wtmp ); @@ -1949,23 +2410,33 @@ static void cftmdl( x3r = L_sub( a[j2], a[j3] ); x3i = L_sub( a[j2 + 1], a[j3 + 1] ); a[j] = L_add( x0r, x2r ); + move32(); a[j + 1] = L_add( x0i, x2i ); + move32(); x0r = L_sub( x0r, x2r ); x0i = L_sub( x0i, x2i ); a[j2] = L_sub( Mpy_32_16_1( x0r, wk2r ), Mpy_32_16_1( x0i, wk2i ) ); + move32(); a[j2 + 1] = L_add( Mpy_32_16_1( x0i, wk2r ), Mpy_32_16_1( x0r, wk2i ) ); + move32(); x0r = L_sub( x1r, x3i ); x0i = L_add( x1i, x3r ); a[j1] = L_sub( Mpy_32_16_1( x0r, wk1r ), Mpy_32_16_1( x0i, wk1i ) ); + move32(); a[j1 + 1] = L_add( Mpy_32_16_1( x0i, wk1r ), Mpy_32_16_1( x0r, wk1i ) ); + move32(); x0r = L_add( x1r, x3i ); x0i = L_sub( x1i, x3r ); a[j3] = L_sub( Mpy_32_16_1( x0r, wk3r ), Mpy_32_16_1( x0i, wk3i ) ); + move32(); a[j3 + 1] = L_add( Mpy_32_16_1( x0i, wk3r ), Mpy_32_16_1( x0r, wk3i ) ); + move32(); } wk1r = w[k2 + 2]; + move16(); wk1i = w[k2 + 3]; + move16(); wtmp = mult_r( wk2r, wk1i ); wk3r = sub( wk1r, wtmp ); wk3r = sub( wk3r, wtmp ); @@ -1986,19 +2457,27 @@ static void cftmdl( x3r = L_sub( a[j2], a[j3] ); x3i = L_sub( a[j2 + 1], a[j3 + 1] ); a[j] = L_add( x0r, x2r ); + move32(); a[j + 1] = L_add( x0i, x2i ); + move32(); x0r = L_sub( x0r, x2r ); x0i = L_sub( x0i, x2i ); a[j2] = L_negate( L_add( Mpy_32_16_1( x0r, wk2i ), Mpy_32_16_1( x0i, wk2r ) ) ); + move32(); a[j2 + 1] = L_sub( Mpy_32_16_1( x0r, wk2r ), Mpy_32_16_1( x0i, wk2i ) ); + move32(); x0r = L_sub( x1r, x3i ); x0i = L_add( x1i, x3r ); a[j1] = L_sub( Mpy_32_16_1( x0r, wk1r ), Mpy_32_16_1( x0i, wk1i ) ); + move32(); a[j1 + 1] = L_add( Mpy_32_16_1( x0i, wk1r ), Mpy_32_16_1( x0r, wk1i ) ); + move32(); x0r = L_add( x1r, x3i ); x0i = L_sub( x1i, x3r ); a[j3] = L_sub( Mpy_32_16_1( x0r, wk3r ), Mpy_32_16_1( x0i, wk3i ) ); + move32(); a[j3 + 1] = L_add( Mpy_32_16_1( x0i, wk3r ), Mpy_32_16_1( x0r, wk3i ) ); + move32(); } } @@ -2015,10 +2494,12 @@ static void cftbsub( Word32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; l = 2; + move16(); IF( GT_16( n, 8 ) ) { cft1st( n, a, w ); l = 8; + move16(); WHILE( LT_16( shl( l, 2 ), n ) ) { @@ -2043,13 +2524,21 @@ static void cftbsub( x3r = L_sub( a[j2], a[j3] ); x3i = L_sub( a[j2 + 1], a[j3 + 1] ); a[j] = L_add( x0r, x2r ); + move32(); a[j + 1] = L_sub( x0i, x2i ); + move32(); a[j2] = L_sub( x0r, x2r ); + move32(); a[j2 + 1] = L_add( x0i, x2i ); + move32(); a[j1] = L_sub( x1r, x3i ); + move32(); a[j1 + 1] = L_sub( x1i, x3r ); + move32(); a[j3] = L_add( x1r, x3i ); + move32(); a[j3 + 1] = L_add( x1i, x3r ); + move32(); } } ELSE @@ -2060,9 +2549,13 @@ static void cftbsub( x0r = L_sub( a[j], a[j1] ); x0i = L_sub( a[j1 + 1], a[j + 1] ); a[j] = L_add( a[j], a[j1] ); + move32(); a[j + 1] = L_negate( L_add( a[j + 1], a[j1 + 1] ) ); + move32(); a[j1] = x0r; + move32(); a[j1 + 1] = x0i; + move32(); } } @@ -2080,22 +2573,28 @@ static void rftfsub( Word32 xr, xi, yr, yi; m = shr( n, 1 ); - ks = 2 * nc / m; + ks = idiv1616( shl( nc, 1 ), m ); kk = 0; + move16(); FOR( j = 2; j < m; j += 2 ) { k = sub( n, j ); kk = add( kk, ks ); - wkr = sub( (Word16) 0x4000, c[nc - kk] ); + wkr = sub( (Word16) 0x4000, c[sub( nc, kk )] ); wki = c[kk]; + move16(); xr = L_sub( a[j], a[k] ); xi = L_add( a[j + 1], a[k + 1] ); yr = L_sub( Mpy_32_16_1( xr, wkr ), Mpy_32_16_1( xi, wki ) ); yi = L_add( Mpy_32_16_1( xi, wkr ), Mpy_32_16_1( xr, wki ) ); a[j] = L_sub( a[j], yr ); + move32(); a[j + 1] = L_sub( a[j + 1], yi ); + move32(); a[k] = L_add( a[k], yr ); + move32(); a[k + 1] = L_sub( a[k + 1], yi ); + move32(); } return; @@ -2112,23 +2611,29 @@ static void rftbsub( Word16 wkr, wki; Word32 xr, xi, yr, yi; a[1] = L_negate( a[1] ); + move32(); m = shr( n, 1 ); - ks = 2 * nc / m; + ks = idiv1616( shl( nc, 1 ), m ); kk = 0; + move16(); FOR( j = 2; j < m; j += 2 ) { - k = n - j; - kk += ks; - wkr = sub( (Word16) 0x4000, c[nc - kk] ); + k = sub( n, j ); + kk = add( kk, ks ); + wkr = sub( (Word16) 0x4000, c[sub( nc, kk )] ); wki = c[kk]; xr = L_sub( a[j], a[k] ); xi = L_add( a[j + 1], a[k + 1] ); yr = L_add( Mpy_32_16_1( xr, wkr ), Mpy_32_16_1( xi, wki ) ); yi = L_sub( Mpy_32_16_1( xi, wkr ), Mpy_32_16_1( xr, wki ) ); a[j] = L_sub( a[j], yr ); + move32(); a[j + 1] = L_sub( yi, a[j + 1] ); + move32(); a[k] = L_add( a[k], yr ); + move32(); a[k + 1] = L_sub( yi, a[k + 1] ); + move32(); } a[m + 1] = L_negate( a[m + 1] ); @@ -2147,19 +2652,23 @@ static void dctsub( Word32 xr; m = shr( n, 1 ); - ks = nc / n; + ks = idiv1616( nc, n ); kk = 0; + move16(); FOR( j = 1; j < m; j++ ) { k = sub( n, j ); - kk += ks; - wkr = sub( c[kk], c[nc - kk] ); - wki = add( c[kk], c[nc - kk] ); + kk = add( kk, ks ); + wkr = sub( c[kk], c[sub( nc, kk )] ); + wki = add( c[kk], c[sub( nc, kk )] ); xr = L_sub( Mpy_32_16_1( a[j], wki ), Mpy_32_16_1( a[k], wkr ) ); a[j] = L_add( Mpy_32_16_1( a[j], wkr ), Mpy_32_16_1( a[k], wki ) ); + move32(); a[k] = xr; + move32(); } a[m] = Mpy_32_16_1( a[m], c[0] ); + move32(); return; } @@ -2185,26 +2694,34 @@ void edct2_fx_ivas( Copy32( in, a, n ); nw = ip[0]; - IF( GT_16( n, shl( nw, 2 ) ) ) + move16(); + if ( GT_16( n, shl( nw, 2 ) ) ) { nw = shr( n, 2 ); } nc = ip[1]; - IF( GT_16( n, nc ) ) + move16(); + if ( GT_16( n, nc ) ) { nc = n; + move16(); } - IF( LT_16( isgn, 0 ) ) + IF( isgn < 0 ) { xr = a[n - 1]; + move16(); FOR( j = n - 2; j >= 2; j -= 2 ) { a[j + 1] = L_sub( a[j], a[j - 1] ); + move32(); a[j] = L_add( a[j], a[j - 1] ); + move32(); } a[1] = L_sub( a[0], xr ); + move32(); a[0] = L_add( a[0], xr ); + move32(); IF( GT_16( n, 4 ) ) { @@ -2218,14 +2735,15 @@ void edct2_fx_ivas( } } - IF( GE_16( isgn, 0 ) ) + IF( isgn >= 0 ) { a[0] = L_shr( a[0], 1 ); + move32(); } dctsub( n, a, nc, w + nw ); - IF( GE_16( isgn, 0 ) ) + IF( isgn >= 0 ) { IF( GT_16( n, 4 ) ) { @@ -2239,16 +2757,21 @@ void edct2_fx_ivas( } xr = L_sub( a[0], a[1] ); a[0] = L_add( a[0], a[1] ); + move32(); FOR( j = 2; j < n; j += 2 ) { a[j - 1] = L_sub( a[j], a[j + 1] ); + move32(); a[j] = L_add( a[j], a[j + 1] ); + move32(); } a[n - 1] = xr; + move32(); FOR( j = 0; j < n; j++ ) { a[j] = L_shr( a[j], 5 ); + move32(); } } } @@ -2266,7 +2789,9 @@ void DoRTFTn_fx_ivas( FOR( i = 0; i < n; i++ ) { z[2 * i] = x[i]; - z[2 * i + 1] = y[i]; + move32(); + z[add( shl( i, 1 ), 1 )] = y[i]; + move32(); } SWITCH( n ) @@ -2294,11 +2819,15 @@ void DoRTFTn_fx_ivas( } x[0] = z[0]; + move32(); y[0] = z[1]; + move32(); FOR( i = 1; i < n; i++ ) { - x[n - i] = z[2 * i]; - y[n - i] = z[2 * i + 1]; + x[sub( n, i )] = z[2 * i]; + move32(); + y[sub( n, i )] = z[add( shl( i, 1 ), 1 )]; + move32(); } return; @@ -2322,22 +2851,31 @@ void fft3_fx_ivas( /* Determine the order of the transform, the length of decimated */ /* transforms m, and the step for the sine and cosine tables. */ - switch ( n ) + SWITCH( n ) { case 1536: order = 9; + move16(); m = 512; + move16(); step = 1; - break; + move16(); + BREAK; case 384: order = 7; + move16(); m = 128; + move16(); step = 4; - break; + move16(); + BREAK; default: order = 9; + move16(); m = 512; + move16(); step = 1; + move16(); } /* Compose decimated sequences X[3i], X[3i+1],X[3i+2] */ @@ -2352,8 +2890,11 @@ void fft3_fx_ivas( FOR( i = 0; i < n / 3; i++ ) { *z0++ = *x++; /* Z0[i] = X[3i]; */ + move32(); *z1++ = *x++; /* Z1[i] = X[3i+1]; */ + move32(); *z2++ = *x++; /* Z2[i] = X[3i+2]; */ + move32(); } fft_rel_fx32( &Z0[0], m, order ); @@ -2373,49 +2914,59 @@ void fft3_fx_ivas( c1_step = negate( step ); s1_step = step; + move16(); c2_step = negate( shl( step, 1 ) ); s2_step = shl( step, 1 ); c1_ind = add( T_SIN_PI_2, c1_step ); s1_ind = s1_step; + move16(); c2_ind = add( T_SIN_PI_2, c2_step ); s2_ind = s2_step; + move16(); /* special case: i = 0 */ RY[0] = L_add( RZ0[0], L_add( RZ1[0], RZ2[0] ) ); + move32(); /* first 3/12 */ - for ( i = 1; i < 3 * m / 8; i++, c1_ind = add( c1_ind, c1_step ), s1_ind = add( s1_ind, s1_step ), c2_ind = add( c2_ind, c2_step ), s2_ind = add( s2_ind, s2_step ) ) + FOR( i = 1; i < i_mult( 3, shr( m, 3 ) ); ( i++, c1_ind = add( c1_ind, c1_step ), s1_ind = add( s1_ind, s1_step ), c2_ind = add( c2_ind, c2_step ), s2_ind = add( s2_ind, s2_step ) ) ) { RY[i] = L_add( RZ0[i], L_add( Mpy_32_16_1( RZ1[i], t_sin[c1_ind] ), L_add( Mpy_32_16_1( IZ1[-i], t_sin[s1_ind] ), L_add( Mpy_32_16_1( RZ2[i], t_sin[c2_ind] ), Mpy_32_16_1( IZ2[-i], t_sin[s2_ind] ) ) ) ) ); + move32(); IY[-i] = L_sub( IZ0[-i], L_add( L_sub( Mpy_32_16_1( RZ1[i], t_sin[s1_ind] ), Mpy_32_16_1( IZ1[-i], t_sin[c1_ind] ) ), L_sub( Mpy_32_16_1( RZ2[i], t_sin[s2_ind] ), Mpy_32_16_1( IZ2[-i], t_sin[c2_ind] ) ) ) ); + move32(); } /* next 1/12 */ - for ( ; i < 4 * m / 8; i++, c1_ind = add( c1_ind, c1_step ), s1_ind = add( s1_ind, s1_step ), c2_ind = sub( c2_ind, c2_step ), s2_ind = sub( s2_ind, s2_step ) ) + FOR( ; i < 4 * m / 8; ( i++, c1_ind = add( c1_ind, c1_step ), s1_ind = add( s1_ind, s1_step ), c2_ind = sub( c2_ind, c2_step ), s2_ind = sub( s2_ind, s2_step ) ) ) { RY[i] = L_add( RZ0[i], L_sub( L_add( Mpy_32_16_1( RZ1[i], t_sin[c1_ind] ), Mpy_32_16_1( IZ1[-i], t_sin[s1_ind] ) ), L_sub( Mpy_32_16_1( RZ2[i], t_sin[c2_ind] ), Mpy_32_16_1( IZ2[-i], t_sin[s2_ind] ) ) ) ); + move32(); IY[-i] = L_sub( IZ0[-i], L_sub( Mpy_32_16_1( RZ1[i], t_sin[s1_ind] ), L_sub( Mpy_32_16_1( IZ1[-i], t_sin[c1_ind] ), L_add( Mpy_32_16_1( RZ2[i], t_sin[s2_ind] ), Mpy_32_16_1( IZ2[-i], t_sin[c2_ind] ) ) ) ) ); + move32(); } /* special case: i = m/2 i.e. 1/3 */ RY[i] = L_add( RZ0[i], L_sub( Mpy_32_16_1( RZ1[i], t_sin[c1_ind] ), Mpy_32_16_1( RZ2[i], t_sin[c2_ind] ) ) ); + move32(); IY[-i] = L_negate( L_add( Mpy_32_16_1( RZ1[i], t_sin[s1_ind] ), Mpy_32_16_1( RZ2[i], t_sin[s2_ind] ) ) ); - i++; + move32(); + i = add( i, 1 ); c1_ind = add( c1_ind, c1_step ); s1_ind = add( s1_ind, s1_step ); @@ -2423,69 +2974,81 @@ void fft3_fx_ivas( s2_ind = sub( s2_ind, s2_step ); /* next 2/12 */ - for ( j = i - 2; i < 6 * m / 8; i++, j--, c1_ind = add( c1_ind, c1_step ), s1_ind = add( s1_ind, s1_step ), c2_ind = sub( c2_ind, c2_step ), s2_ind = sub( s2_ind, s2_step ) ) + FOR( j = i - 2; i < 6 * m / 8; ( i++, j--, c1_ind = add( c1_ind, c1_step ), s1_ind = add( s1_ind, s1_step ), c2_ind = sub( c2_ind, c2_step ), s2_ind = sub( s2_ind, s2_step ) ) ) { RY[i] = L_add( RZ0[j], L_sub( Mpy_32_16_1( RZ1[j], t_sin[c1_ind] ), L_add( Mpy_32_16_1( IZ1[-j], t_sin[s1_ind] ), L_add( Mpy_32_16_1( RZ2[j], t_sin[c2_ind] ), Mpy_32_16_1( IZ2[-j], t_sin[s2_ind] ) ) ) ) ); + move32(); IY[-i] = L_negate( L_add( IZ0[-j], L_add( Mpy_32_16_1( RZ1[j], t_sin[s1_ind] ), L_add( Mpy_32_16_1( IZ1[-j], t_sin[c1_ind] ), L_sub( Mpy_32_16_1( RZ2[j], t_sin[s2_ind] ), Mpy_32_16_1( IZ2[-j], t_sin[c2_ind] ) ) ) ) ) ); + move32(); } /*--------------------------half--------------------------*/ /* next 2/12 */ - for ( ; i < 8 * m / 8; i++, j--, c1_ind = sub( c1_ind, c1_step ), s1_ind = sub( s1_ind, s1_step ), c2_ind = add( c2_ind, c2_step ), s2_ind = add( s2_ind, s2_step ) ) + FOR( ; i < 8 * m / 8; ( i++, j--, c1_ind = sub( c1_ind, c1_step ), s1_ind = sub( s1_ind, s1_step ), c2_ind = add( c2_ind, c2_step ), s2_ind = add( s2_ind, s2_step ) ) ) { RY[i] = L_sub( RZ0[j], L_add( Mpy_32_16_1( RZ1[j], t_sin[c1_ind] ), L_add( Mpy_32_16_1( IZ1[-j], t_sin[s1_ind] ), L_sub( Mpy_32_16_1( RZ2[j], t_sin[c2_ind] ), Mpy_32_16_1( IZ2[-j], t_sin[s2_ind] ) ) ) ) ); + move32(); IY[-i] = L_negate( L_add( IZ0[-j], L_sub( Mpy_32_16_1( RZ1[j], t_sin[s1_ind] ), L_add( Mpy_32_16_1( IZ1[-j], t_sin[c1_ind] ), L_add( Mpy_32_16_1( RZ2[j], t_sin[s2_ind] ), Mpy_32_16_1( IZ2[-j], t_sin[c2_ind] ) ) ) ) ) ); + move32(); } /* special case: i = m, i.e 2/3 */ RY[i] = L_sub( RZ0[j], L_add( Mpy_32_16_1( RZ1[j], t_sin[c1_ind] ), Mpy_32_16_1( RZ2[j], t_sin[c2_ind] ) ) ); - IY[-i++] = L_sub( Mpy_32_16_1( RZ2[j], t_sin[s2_ind] ), - Mpy_32_16_1( RZ1[j], t_sin[s1_ind] ) ); + move32(); + IY[-i] = L_sub( Mpy_32_16_1( RZ2[j], t_sin[s2_ind] ), + Mpy_32_16_1( RZ1[j], t_sin[s1_ind] ) ); + move32(); + i = add( i, 1 ); c1_ind = sub( c1_ind, c1_step ), s1_ind = sub( s1_ind, s1_step ), c2_ind = add( c2_ind, c2_step ), s2_ind = add( s2_ind, s2_step ); /* next 1/12 */ - for ( j = 1; i < 9 * m / 8; i++, j++, c1_ind = sub( c1_ind, c1_step ), s1_ind = sub( s1_ind, s1_step ), c2_ind = add( c2_ind, c2_step ), s2_ind = add( s2_ind, s2_step ) ) + FOR( j = 1; i < i_mult( 9, shr( m, 3 ) ); ( i++, j++, c1_ind = sub( c1_ind, c1_step ), s1_ind = sub( s1_ind, s1_step ), c2_ind = add( c2_ind, c2_step ), s2_ind = add( s2_ind, s2_step ) ) ) { RY[i] = L_sub( RZ0[j], L_sub( Mpy_32_16_1( RZ1[j], t_sin[c1_ind] ), L_sub( Mpy_32_16_1( IZ1[-j], t_sin[s1_ind] ), L_add( Mpy_32_16_1( RZ2[j], t_sin[c2_ind] ), Mpy_32_16_1( IZ2[-j], t_sin[s2_ind] ) ) ) ) ); + move32(); IY[-i] = L_sub( IZ0[-j], L_add( Mpy_32_16_1( RZ1[j], t_sin[s1_ind] ), L_sub( Mpy_32_16_1( IZ1[-j], t_sin[c1_ind] ), L_sub( Mpy_32_16_1( RZ2[j], t_sin[s2_ind] ), Mpy_32_16_1( IZ2[-j], t_sin[c2_ind] ) ) ) ) ); + move32(); } /* last 3/12 */ - for ( ; i < 12 * m / 8; i++, j++, c1_ind = sub( c1_ind, c1_step ), s1_ind = sub( s1_ind, s1_step ), c2_ind = sub( c2_ind, c2_step ), s2_ind = sub( s2_ind, s2_step ) ) + FOR( ; i < 12 * m / 8; ( i++, j++, c1_ind = sub( c1_ind, c1_step ), s1_ind = sub( s1_ind, s1_step ), c2_ind = sub( c2_ind, c2_step ), s2_ind = sub( s2_ind, s2_step ) ) ) { RY[i] = L_sub( RZ0[j], L_sub( L_sub( Mpy_32_16_1( RZ1[j], t_sin[c1_ind] ), Mpy_32_16_1( IZ1[-j], t_sin[s1_ind] ) ), L_sub( Mpy_32_16_1( RZ2[j], t_sin[c2_ind] ), Mpy_32_16_1( IZ2[-j], t_sin[s2_ind] ) ) ) ); + move32(); IY[-i] = L_sub( IZ0[-j], L_sub( L_add( Mpy_32_16_1( RZ1[j], t_sin[s1_ind] ), Mpy_32_16_1( IZ1[-j], t_sin[c1_ind] ) ), L_add( Mpy_32_16_1( RZ2[j], t_sin[s2_ind] ), Mpy_32_16_1( IZ2[-j], t_sin[c2_ind] ) ) ) ); + move32(); } /* special case: i = 3*m/2 */ RY[i] = L_sub( RZ0[j], L_sub( Mpy_32_16_1( RZ1[j], t_sin[c1_ind] ), Mpy_32_16_1( RZ2[j], t_sin[c2_ind] ) ) ); + move32(); return; } @@ -2506,22 +3069,31 @@ void ifft3_fx_ivas( /* Determine the order of the transform, the length of decimated */ /* transforms m, and the step for the sine and cosine tables. */ - switch ( n ) + SWITCH( n ) { case 1536: order = 9; + move16(); m = 512; + move16(); step = 1; - break; + move16(); + BREAK; case 384: order = 7; + move16(); m = 128; + move16(); step = 4; - break; + move16(); + BREAK; default: order = 9; + move16(); m = 512; + move16(); step = 1; + move16(); } /* pointer initialization */ @@ -2534,23 +3106,27 @@ void ifft3_fx_ivas( RZ0 = &Z[0]; RZ1 = RZ0 + m; - RZ2 = RZ0 + n / 2 - m / 2; + RZ2 = RZ0 + sub( shr( n, 1 ), shr( m, 1 ) ); IZ0 = &Z[n]; IZ1 = IZ0 - m; - IZ2 = IZ0 - n / 2 + m / 2; + IZ2 = IZ0 - sub( shr( n, 1 ), shr( m, 1 ) ); /* Inverse butterflies of order 3. */ /* Construction of Y0 */ RY0[0] = L_add( RZ0[0], L_add( RZ1[0], RZ2[0] ) ); + move32(); FOR( i = 1; i < m / 2; i++ ) { RY0[i] = L_add( RZ0[i], L_add( RZ1[i], RZ2[-i] ) ); + move32(); IY0[-i] = L_add( IZ0[-i], L_sub( IZ1[-i], IZ2[i] ) ); + move32(); } /* m/2 */ RY0[i] = L_add( RZ0[i], L_add( RZ1[i], RZ2[-i] ) ); + move32(); /* Construction of Y1 */ c0_ind = T_SIN_PI_2; @@ -2565,6 +3141,7 @@ void ifft3_fx_ivas( L_add( Mpy_32_16_1( RZ2[0], t_sin[c2_ind] ), L_add( Mpy_32_16_1( IZ1[0], t_sin[s1_ind] ), Mpy_32_16_1( IZ2[0], t_sin[s2_ind] ) ) ) ) ); + move32(); c0_ind = sub( c0_ind, step ); s0_ind = add( s0_ind, step ); @@ -2572,7 +3149,7 @@ void ifft3_fx_ivas( s1_ind = sub( s1_ind, step ); c2_ind = sub( c2_ind, step ); s2_ind = add( s2_ind, step ); - for ( i = 1; i < m / 4; i++, c0_ind = sub( c0_ind, step ), s0_ind = add( s0_ind, step ), c1_ind = add( c1_ind, step ), s1_ind = sub( s1_ind, step ), c2_ind = sub( c2_ind, step ), s2_ind = add( s2_ind, step ) ) + FOR( i = 1; i < m / 4; ( i++, c0_ind = sub( c0_ind, step ), s0_ind = add( s0_ind, step ), c1_ind = add( c1_ind, step ), s1_ind = sub( s1_ind, step ), c2_ind = sub( c2_ind, step ), s2_ind = add( s2_ind, step ) ) ) { RY1[i] = L_sub( Mpy_32_16_1( RZ0[i], t_sin[c0_ind] ), L_add( Mpy_32_16_1( RZ1[i], t_sin[c1_ind] ), @@ -2580,15 +3157,17 @@ void ifft3_fx_ivas( L_add( Mpy_32_16_1( IZ0[-i], t_sin[s0_ind] ), L_add( Mpy_32_16_1( IZ1[-i], t_sin[s1_ind] ), Mpy_32_16_1( IZ2[i], t_sin[s2_ind] ) ) ) ) ) ); + move32(); IY1[-i] = L_add( L_sub( Mpy_32_16_1( IZ0[-i], t_sin[c0_ind] ), Mpy_32_16_1( IZ1[-i], t_sin[c1_ind] ) ), L_add( Mpy_32_16_1( IZ2[i], t_sin[c2_ind] ), L_add( Mpy_32_16_1( RZ0[i], t_sin[s0_ind] ), L_sub( Mpy_32_16_1( RZ1[i], t_sin[s1_ind] ), Mpy_32_16_1( RZ2[-i], t_sin[s2_ind] ) ) ) ) ); + move32(); } - for ( ; i < m / 2; i++, c0_ind = sub( c0_ind, step ), s0_ind = add( s0_ind, step ), c1_ind = add( c1_ind, step ), s1_ind = sub( s1_ind, step ), c2_ind = add( c2_ind, step ), s2_ind = sub( s2_ind, step ) ) + FOR( ; i < m / 2; ( i++, c0_ind = sub( c0_ind, step ), s0_ind = add( s0_ind, step ), c1_ind = add( c1_ind, step ), s1_ind = sub( s1_ind, step ), c2_ind = add( c2_ind, step ), s2_ind = sub( s2_ind, step ) ) ) { RY1[i] = L_sub( Mpy_32_16_1( RZ0[i], t_sin[c0_ind] ), L_add( L_sub( Mpy_32_16_1( RZ1[i], t_sin[c1_ind] ), @@ -2596,12 +3175,14 @@ void ifft3_fx_ivas( L_add( Mpy_32_16_1( IZ0[-i], t_sin[s0_ind] ), L_add( Mpy_32_16_1( IZ1[-i], t_sin[s1_ind] ), Mpy_32_16_1( IZ2[i], t_sin[s2_ind] ) ) ) ) ); + move32(); IY1[-i] = L_sub( Mpy_32_16_1( IZ0[-i], t_sin[c0_ind] ), L_sub( L_add( Mpy_32_16_1( IZ1[-i], t_sin[c1_ind] ), Mpy_32_16_1( IZ2[i], t_sin[c2_ind] ) ), L_add( Mpy_32_16_1( RZ0[i], t_sin[s0_ind] ), L_sub( Mpy_32_16_1( RZ1[i], t_sin[s1_ind] ), Mpy_32_16_1( RZ2[-i], t_sin[s2_ind] ) ) ) ) ); + move32(); } /* m/2 */ @@ -2611,20 +3192,28 @@ void ifft3_fx_ivas( L_add( Mpy_32_16_1( IZ0[-i], t_sin[s0_ind] ), L_add( Mpy_32_16_1( IZ1[-i], t_sin[s1_ind] ), Mpy_32_16_1( IZ2[i], t_sin[s2_ind] ) ) ) ) ); + move32(); /* Construction of Y2 */ c0_ind = T_SIN_PI_2; + move16(); s0_ind = 0; + move16(); c1_ind = T_SIN_PI_2 * 1 / 3; + move16(); s1_ind = T_SIN_PI_2 * 2 / 3; + move16(); c2_ind = T_SIN_PI_2 * 1 / 3; + move16(); s2_ind = T_SIN_PI_2 * 2 / 3; - step2 = 2 * step; + move16(); + step2 = shl( step, 1 ); RY2[0] = L_sub( Mpy_32_16_1( RZ0[0], t_sin[c0_ind] ), L_sub( L_add( Mpy_32_16_1( RZ1[0], t_sin[c1_ind] ), Mpy_32_16_1( RZ2[0], t_sin[c2_ind] ) ), L_add( Mpy_32_16_1( IZ1[0], t_sin[s1_ind] ), Mpy_32_16_1( IZ2[0], t_sin[s2_ind] ) ) ) ); + move32(); c0_ind = sub( c0_ind, step2 ); s0_ind = add( s0_ind, step2 ); @@ -2632,7 +3221,7 @@ void ifft3_fx_ivas( s1_ind = add( s1_ind, step2 ); c2_ind = add( c2_ind, step2 ); s2_ind = sub( s2_ind, step2 ); - for ( i = 1; i < m / 8; i++, c0_ind = sub( c0_ind, step2 ), s0_ind = add( s0_ind, step2 ), c1_ind = sub( c1_ind, step2 ), s1_ind = add( s1_ind, step2 ), c2_ind = add( c2_ind, step2 ), s2_ind = sub( s2_ind, step2 ) ) + FOR( i = 1; i < m / 8; ( i++, c0_ind = sub( c0_ind, step2 ), s0_ind = add( s0_ind, step2 ), c1_ind = sub( c1_ind, step2 ), s1_ind = add( s1_ind, step2 ), c2_ind = add( c2_ind, step2 ), s2_ind = sub( s2_ind, step2 ) ) ) { RY2[i] = L_sub( Mpy_32_16_1( RZ0[i], t_sin[c0_ind] ), L_add( L_add( Mpy_32_16_1( RZ1[i], t_sin[c1_ind] ), @@ -2640,15 +3229,17 @@ void ifft3_fx_ivas( L_sub( Mpy_32_16_1( IZ0[-i], t_sin[s0_ind] ), L_add( Mpy_32_16_1( IZ1[-i], t_sin[s1_ind] ), Mpy_32_16_1( IZ2[i], t_sin[s2_ind] ) ) ) ) ); + move32(); IY2[-i] = L_add( L_sub( Mpy_32_16_1( IZ0[-i], t_sin[c0_ind] ), Mpy_32_16_1( IZ1[-i], t_sin[c1_ind] ) ), L_add( Mpy_32_16_1( IZ2[i], t_sin[c2_ind] ), L_sub( Mpy_32_16_1( RZ0[i], t_sin[s0_ind] ), L_sub( Mpy_32_16_1( RZ1[i], t_sin[s1_ind] ), Mpy_32_16_1( RZ2[-i], t_sin[s2_ind] ) ) ) ) ); + move32(); } - for ( ; i < m / 4; i++, c0_ind = sub( c0_ind, step2 ), s0_ind = add( s0_ind, step2 ), c1_ind = add( c1_ind, step2 ), s1_ind = sub( s1_ind, step2 ), c2_ind = add( c2_ind, step2 ), s2_ind = sub( s2_ind, step2 ) ) + FOR( ; i < m / 4; ( i++, c0_ind = sub( c0_ind, step2 ), s0_ind = add( s0_ind, step2 ), c1_ind = add( c1_ind, step2 ), s1_ind = sub( s1_ind, step2 ), c2_ind = add( c2_ind, step2 ), s2_ind = sub( s2_ind, step2 ) ) ) { RY2[i] = L_add( Mpy_32_16_1( RZ0[i], t_sin[c0_ind] ), L_sub( Mpy_32_16_1( RZ1[i], t_sin[c1_ind] ), @@ -2656,15 +3247,17 @@ void ifft3_fx_ivas( Mpy_32_16_1( IZ0[-i], t_sin[s0_ind] ) ), L_add( Mpy_32_16_1( IZ1[-i], t_sin[s1_ind] ), Mpy_32_16_1( IZ2[i], t_sin[s2_ind] ) ) ) ) ); + move32(); IY2[-i] = L_add( Mpy_32_16_1( IZ0[-i], t_sin[c0_ind] ), L_add( Mpy_32_16_1( IZ1[-i], t_sin[c1_ind] ), L_add( Mpy_32_16_1( IZ2[i], t_sin[c2_ind] ), L_sub( Mpy_32_16_1( RZ0[i], t_sin[s0_ind] ), L_sub( Mpy_32_16_1( RZ1[i], t_sin[s1_ind] ), Mpy_32_16_1( RZ2[-i], t_sin[s2_ind] ) ) ) ) ) ); + move32(); } - for ( ; i < 3 * m / 8; i++, c0_ind = sub( c0_ind, step2 ), s0_ind = add( s0_ind, step2 ), c1_ind = add( c1_ind, step2 ), s1_ind = sub( s1_ind, step2 ), c2_ind = sub( c2_ind, step2 ), s2_ind = add( s2_ind, step2 ) ) + FOR( ; i < i_mult( 3, shr( m, 3 ) ); ( i++, c0_ind = sub( c0_ind, step2 ), s0_ind = add( s0_ind, step2 ), c1_ind = add( c1_ind, step2 ), s1_ind = sub( s1_ind, step2 ), c2_ind = sub( c2_ind, step2 ), s2_ind = add( s2_ind, step2 ) ) ) { RY2[i] = L_sub( L_add( Mpy_32_16_1( RZ0[i], t_sin[c0_ind] ), Mpy_32_16_1( RZ1[i], t_sin[c1_ind] ) ), @@ -2672,15 +3265,17 @@ void ifft3_fx_ivas( Mpy_32_16_1( IZ0[-i], t_sin[s0_ind] ) ), L_sub( Mpy_32_16_1( IZ1[-i], t_sin[s1_ind] ), Mpy_32_16_1( IZ2[i], t_sin[s2_ind] ) ) ) ); + move32(); IY2[-i] = L_sub( L_add( Mpy_32_16_1( IZ0[-i], t_sin[c0_ind] ), L_add( Mpy_32_16_1( IZ1[-i], t_sin[c1_ind] ), L_add( Mpy_32_16_1( IZ2[i], t_sin[c2_ind] ), Mpy_32_16_1( RZ0[i], t_sin[s0_ind] ) ) ) ), L_add( Mpy_32_16_1( RZ1[i], t_sin[s1_ind] ), Mpy_32_16_1( RZ2[-i], t_sin[s2_ind] ) ) ); + move32(); } - for ( ; i < m / 2; i++, c0_ind = add( c0_ind, step2 ), s0_ind = sub( s0_ind, step2 ), c1_ind = add( c1_ind, step2 ), s1_ind = sub( s1_ind, step2 ), c2_ind = sub( c2_ind, step2 ), s2_ind = add( s2_ind, step2 ) ) + FOR( ; i < m / 2; ( i++, c0_ind = add( c0_ind, step2 ), s0_ind = sub( s0_ind, step2 ), c1_ind = add( c1_ind, step2 ), s1_ind = sub( s1_ind, step2 ), c2_ind = sub( c2_ind, step2 ), s2_ind = add( s2_ind, step2 ) ) ) { RY2[i] = L_sub( L_sub( Mpy_32_16_1( RZ1[i], t_sin[c1_ind] ), Mpy_32_16_1( RZ0[i], t_sin[c0_ind] ) ), @@ -2688,12 +3283,14 @@ void ifft3_fx_ivas( Mpy_32_16_1( IZ0[-i], t_sin[s0_ind] ) ), L_sub( Mpy_32_16_1( IZ1[-i], t_sin[s1_ind] ), Mpy_32_16_1( IZ2[i], t_sin[s2_ind] ) ) ) ); + move32(); IY2[-i] = L_add( L_sub( Mpy_32_16_1( IZ1[-i], t_sin[c1_ind] ), Mpy_32_16_1( IZ0[-i], t_sin[c0_ind] ) ), L_sub( L_add( Mpy_32_16_1( IZ2[i], t_sin[c2_ind] ), Mpy_32_16_1( RZ0[i], t_sin[s0_ind] ) ), L_add( Mpy_32_16_1( RZ1[i], t_sin[s1_ind] ), Mpy_32_16_1( RZ2[-i], t_sin[s2_ind] ) ) ) ); + move32(); } /* m/2 */ @@ -2703,6 +3300,7 @@ void ifft3_fx_ivas( Mpy_32_16_1( IZ0[-i], t_sin[s0_ind] ) ), L_sub( Mpy_32_16_1( IZ1[-i], t_sin[s1_ind] ), Mpy_32_16_1( IZ2[i], t_sin[s2_ind] ) ) ) ); + move32(); /* Compute the inverse FFT for all 3 blocks. */ ifft_rel_fx32( RY0, m, order ); @@ -2717,9 +3315,15 @@ void ifft3_fx_ivas( scale = (Word16) ( 0x2AAB ); FOR( i = 0; i < n; ) { - X[i++] = Mpy_32_16_1( ( *y0++ ), scale ); - X[i++] = Mpy_32_16_1( ( *y1++ ), scale ); - X[i++] = Mpy_32_16_1( ( *y2++ ), scale ); + X[i] = Mpy_32_16_1( ( *y0++ ), scale ); + move32(); + i = add( i, 1 ); + X[i] = Mpy_32_16_1( ( *y1++ ), scale ); + move32(); + i = add( i, 1 ); + X[i] = Mpy_32_16_1( ( *y2++ ), scale ); + move32(); + i = add( i, 1 ); } return; @@ -2734,27 +3338,36 @@ static void rfft_post( Word32 tmp1, tmp2, tmp3, tmp4; Word16 s, c; Word16 i = 0; + move16(); tmp1 = L_add( buf[0], buf[1] ); buf[1] = L_sub( buf[0], buf[1] ); + move32(); buf[0] = tmp1; + move32(); - FOR( i = 1; i <= ( len + 2 ) / 4; i++ ) + FOR( i = 1; i <= shr( add( len, 2 ), 2 ); i++ ) { - s = sine_table[i]; /* sin(pi*i/(len/2)) */ - c = sine_table[i + len / 4]; /* cos(pi*i/(len/2)) */ + s = sine_table[i]; /* sin(pi*i/(len/2)) */ + move16(); + c = sine_table[add( i, shr( len, 2 ) )]; /* cos(pi*i/(len/2)) */ + move16(); - tmp1 = L_sub( buf[2 * i], buf[len - 2 * i] ); - tmp2 = L_add( buf[2 * i + 1], buf[len - 2 * i + 1] ); + tmp1 = L_sub( buf[2 * i], buf[sub( len, shl( i, 1 ) )] ); + tmp2 = L_add( buf[add( shl( i, 1 ), 1 )], buf[add( sub( len, shl( i, 1 ) ), 1 )] ); tmp3 = L_sub( Mpy_32_16_1( tmp1, s ), Mpy_32_16_1( tmp2, c ) ); /* real part of j*W(k,N)*[T(k) - T'(N-k)] */ tmp4 = L_add( Mpy_32_16_1( tmp1, c ), Mpy_32_16_1( tmp2, s ) ); /* imag part of j*W(k,N)*[T(k) - T'(N-k)] */ - tmp1 = L_add( buf[2 * i], buf[len - 2 * i] ); - tmp2 = L_sub( buf[2 * i + 1], buf[len - 2 * i + 1] ); + tmp1 = L_add( buf[2 * i], buf[sub( len, shl( i, 1 ) )] ); + tmp2 = L_sub( buf[add( shl( i, 1 ), 1 )], buf[add( sub( len, shl( i, 1 ) ), 1 )] ); buf[2 * i] = L_shr( L_sub( tmp1, tmp3 ), 1 ); - buf[2 * i + 1] = L_shr( L_sub( tmp2, tmp4 ), 1 ); - buf[len - 2 * i] = L_shr( L_add( tmp1, tmp3 ), 1 ); - buf[len - 2 * i + 1] = L_negate( L_shr( L_add( tmp2, tmp4 ), 1 ) ); + move32(); + buf[add( shl( i, 1 ), 1 )] = L_shr( L_sub( tmp2, tmp4 ), 1 ); + move32(); + buf[sub( len, shl( i, 1 ) )] = L_shr( L_add( tmp1, tmp3 ), 1 ); + move32(); + buf[add( sub( len, shl( i, 1 ) ), 1 )] = L_negate( L_shr( L_add( tmp2, tmp4 ), 1 ) ); + move32(); } } @@ -2770,24 +3383,32 @@ static void rfft_pre( tmp1 = L_add( buf[0], buf[1] ); buf[1] = Mpy_32_16_1( L_sub( buf[0], buf[1] ), scale ); + move32(); buf[0] = Mpy_32_16_1( tmp1, scale ); + move32(); - FOR( i = 1; i <= ( len + 2 ) / 4; i++ ) + FOR( i = 1; i <= shr( add( len, 2 ), 2 ); i++ ) { - s = sine_table[i]; /* sin(pi*i/(len/2)) */ - c = sine_table[i + len / 4]; /* cos(pi*i/(len/2)) */ + s = sine_table[i]; /* sin(pi*i/(len/2)) */ + move16(); + c = sine_table[add( i, shr( len, 2 ) )]; /* cos(pi*i/(len/2)) */ + move16(); - tmp1 = L_sub( buf[2 * i], buf[len - 2 * i] ); - tmp2 = L_add( buf[2 * i + 1], buf[len - 2 * i + 1] ); + tmp1 = L_sub( buf[2 * i], buf[sub( len, shl( i, 1 ) )] ); + tmp2 = L_add( buf[add( shl( i, 1 ), 1 )], buf[add( sub( len, shl( i, 1 ) ), 1 )] ); tmp3 = L_add( Mpy_32_16_1( tmp1, s ), Mpy_32_16_1( tmp2, c ) ); /* real part of j*W(k,N)*[T(k) - T'(N-k)] */ tmp4 = L_sub( Mpy_32_16_1( tmp2, s ), Mpy_32_16_1( tmp1, c ) ); /* imag part of j*W(k,N)*[T(k) - T'(N-k)] */ - tmp1 = L_add( buf[2 * i], buf[len - 2 * i] ); - tmp2 = L_sub( buf[2 * i + 1], buf[len - 2 * i + 1] ); + tmp1 = L_add( buf[2 * i], buf[sub( len, shl( i, 1 ) )] ); + tmp2 = L_sub( buf[add( shl( i, 1 ), 1 )], buf[add( sub( len, shl( i, 1 ) ), 1 )] ); buf[2 * i] = Mpy_32_16_1( L_add( tmp1, tmp3 ), scale ); - buf[2 * i + 1] = L_negate( Mpy_32_16_1( L_add( tmp2, tmp4 ), scale ) ); - buf[len - 2 * i] = Mpy_32_16_1( L_sub( tmp1, tmp3 ), scale ); - buf[len - 2 * i + 1] = Mpy_32_16_1( L_sub( tmp2, tmp4 ), scale ); + move32(); + buf[add( shl( i, 1 ), 1 )] = L_negate( Mpy_32_16_1( L_add( tmp2, tmp4 ), scale ) ); + move32(); + buf[sub( len, shl( i, 1 ) )] = Mpy_32_16_1( L_sub( tmp1, tmp3 ), scale ); + move32(); + buf[add( sub( len, shl( i, 1 ) ), 1 )] = Mpy_32_16_1( L_sub( tmp2, tmp4 ), scale ); + move32(); } return; @@ -2814,13 +3435,17 @@ Word16 RFFTN_fx( FOR( i = 0; i < 320; i++ ) { x[i] = data[2 * i]; - y[i] = data[2 * i + 1]; + move32(); + y[i] = data[add( shl( i, 1 ), 1 )]; + move32(); } DoRTFT320_fx( x, y ); FOR( i = 0; i < 320; i++ ) { data[2 * i] = x[i]; - data[2 * i + 1] = y[i]; + move32(); + data[add( shl( i, 1 ), 1 )] = y[i]; + move32(); } IF( EQ_16( sign, -1 ) ) @@ -2834,27 +3459,36 @@ Word16 RFFTN_fx( { Word16 i; const Word16 log2 = 9; + move16(); Word32 reordered_data[512]; IF( EQ_16( sign, -1 ) ) { fft_rel_fx32( data, len, log2 ); reordered_data[0] = data[0]; + move32(); reordered_data[1] = data[len / 2]; + move32(); FOR( i = 1; i < len / 2; i++ ) { reordered_data[2 * i] = data[i]; - reordered_data[2 * i + 1] = data[len - i]; + move32(); + reordered_data[add( shl( i, 1 ), 1 )] = data[sub( len, i )]; + move32(); } } ELSE { reordered_data[0] = data[0]; + move32(); reordered_data[len / 2] = data[1]; + move32(); FOR( i = 1; i < len / 2; i++ ) { reordered_data[i] = data[2 * i]; - reordered_data[len - i] = data[2 * i + 1]; + move32(); + reordered_data[sub( len, i )] = data[add( shl( i, 1 ), 1 )]; + move32(); } ifft_rel_fx32( reordered_data, len, log2 ); } @@ -2876,7 +3510,9 @@ static void butterfly( Word32 *aMinusb ) { *aPlusb = L_add( a, b ); + move32(); *aMinusb = L_sub( a, b ); + move32(); return; } @@ -2892,14 +3528,22 @@ static void fft2( Word32 re2, im2; re1 = pInOut[0]; + move32(); im1 = pInOut[1]; + move32(); re2 = pInOut[2]; + move32(); im2 = pInOut[3]; + move32(); pInOut[0] = L_add( re1, re2 ); + move32(); pInOut[1] = L_add( im1, im2 ); + move32(); pInOut[2] = L_sub( re1, re2 ); + move32(); pInOut[3] = L_sub( im1, im2 ); + move32(); return; } @@ -2917,11 +3561,17 @@ static void fft3_2( Word32 *pInOut ) Word32 tmp3, tmp4; re1 = pInOut[0]; + move32(); im1 = pInOut[1]; + move32(); re2 = pInOut[2]; + move32(); im2 = pInOut[3]; + move32(); re3 = pInOut[4]; + move32(); im3 = pInOut[5]; + move32(); /* FFT MATRIX: 1.0000 1.0000 1.0000 @@ -2934,12 +3584,18 @@ static void fft3_2( Word32 *pInOut ) tmp2 = L_sub( re2, re3 ); tmp4 = L_sub( im2, im3 ); pInOut[0] = L_add( re1, tmp1 ); + move32(); pInOut[1] = L_add( im1, tmp3 ); + move32(); pInOut[2] = L_sub( re1, L_sub( Mpy_32_16_1( tmp1, C31 ), Mpy_32_16_1( tmp4, C32 ) ) ); + move32(); pInOut[4] = L_sub( re1, L_add( Mpy_32_16_1( tmp1, C31 ), Mpy_32_16_1( tmp4, C32 ) ) ); + move32(); pInOut[3] = L_sub( im1, L_add( Mpy_32_16_1( tmp2, C32 ), Mpy_32_16_1( tmp3, C31 ) ) ); + move32(); pInOut[5] = L_add( im1, L_sub( Mpy_32_16_1( tmp2, C32 ), Mpy_32_16_1( tmp3, C31 ) ) ); + move32(); } @@ -2957,13 +3613,21 @@ static void fft4( Word32 tmp7, tmp8; re1 = pInOut[0]; + move32(); im1 = pInOut[1]; + move32(); re2 = pInOut[2]; + move32(); im2 = pInOut[3]; + move32(); re3 = pInOut[4]; + move32(); im3 = pInOut[5]; + move32(); re4 = pInOut[6]; + move32(); im4 = pInOut[7]; + move32(); /* 1.0000 1.0000 1.0000 1.0000 @@ -2976,19 +3640,27 @@ static void fft4( tmp5 = L_add( im1, im3 ); tmp7 = L_add( im2, im4 ); pInOut[0] = L_add( tmp1, tmp3 ); + move32(); pInOut[4] = L_sub( tmp1, tmp3 ); + move32(); pInOut[1] = L_add( tmp5, tmp7 ); + move32(); pInOut[5] = L_sub( tmp5, tmp7 ); + move32(); tmp2 = L_sub( re1, re3 ); tmp4 = L_sub( re2, re4 ); tmp6 = L_sub( im1, im3 ); tmp8 = L_sub( im2, im4 ); pInOut[2] = L_add( tmp2, tmp8 ); + move32(); pInOut[6] = L_sub( tmp2, tmp8 ); + move32(); pInOut[3] = L_sub( tmp6, tmp4 ); + move32(); pInOut[7] = L_add( tmp4, tmp6 ); + move32(); return; } @@ -3005,10 +3677,15 @@ static void fft5( cmplx t[4]; x[0] = pInOut[0]; + move32(); x[1] = pInOut[1]; + move32(); x[2] = pInOut[2]; + move32(); x[3] = pInOut[3]; + move32(); x[4] = pInOut[4]; + move32(); /* 1.0000 1.0000 1.0000 1.0000 1.0000 @@ -3019,17 +3696,26 @@ static void fft5( 1.0000 0.3090 + 0.9511i -0.8090 + 0.5878i -0.8090 - 0.5878i 0.3090 - 0.9511i */ t[0] = CL_add( x[1], x[4] ); + move32(); t[1] = CL_sub( x[1], x[4] ); + move32(); t[2] = CL_add( x[2], x[3] ); + move32(); t[3] = CL_sub( x[2], x[3] ); + move32(); pInOut[0] = CL_add( x[0], CL_add( t[0], t[2] ) ); + move32(); pInOut[1] = CL_add( CL_add( x[0], CL_sub( CL_scale( t[0], C51 ), CL_scale( t[2], C53 ) ) ), CL_add( CL_conjugate( CL_scale( CL_swap_real_imag( t[1] ), C52 ) ), CL_conjugate( CL_scale( CL_swap_real_imag( t[3] ), C54 ) ) ) ); + move32(); pInOut[4] = CL_add( x[0], CL_sub( CL_scale( t[0], C51 ), CL_add( CL_scale( t[2], C53 ), CL_add( CL_conjugate( CL_scale( CL_swap_real_imag( t[1] ), C52 ) ), CL_conjugate( CL_scale( CL_swap_real_imag( t[3] ), C54 ) ) ) ) ) ); + move32(); pInOut[2] = CL_add( CL_sub( x[0], CL_scale( t[0], C53 ) ), CL_add( CL_scale( t[2], C51 ), CL_sub( CL_conjugate( CL_scale( CL_swap_real_imag( t[1] ), C54 ) ), CL_conjugate( CL_scale( CL_swap_real_imag( t[3] ), C52 ) ) ) ) ); + move32(); pInOut[3] = CL_add( CL_sub( x[0], CL_scale( t[0], C53 ) ), CL_add( CL_sub( CL_scale( t[2], C51 ), CL_conjugate( CL_scale( CL_swap_real_imag( t[1] ), C54 ) ) ), CL_conjugate( CL_scale( CL_swap_real_imag( t[3] ), C52 ) ) ) ); + move32(); return; } @@ -3049,9 +3735,13 @@ static void fft8_2( Word32 im3_5p, im3_5m; re0 = pInOut[0]; + move32(); im0 = pInOut[1]; + move32(); re4 = pInOut[8]; + move32(); im4 = pInOut[9]; + move32(); butterfly( pInOut[1 * 2], pInOut[7 * 2], &re1_7p, &re1_7m ); butterfly( pInOut[1 * 2 + 1], pInOut[7 * 2 + 1], &im1_7p, &im1_7m ); butterfly( pInOut[2 * 2], pInOut[6 * 2], &re2_6p, &re2_6m ); @@ -3069,29 +3759,45 @@ static void fft8_2( 6: 1 + 0i - 0 + 1i -1 - 0i 0 - 1i 1 + 0i - 0 + 1i - 1 - 0i - 0 - 1i 7: 1 + 0i C81 + C81i -0 + 1i -C81 + C81i -1 - 0i -C81 - C81i - 0 - 1i C81 - C81i */ - pInOut[0] = re0 + re4 + re1_7p + re2_6p + re3_5p; - pInOut[1] = im0 + im4 + im1_7p + im2_6p + im3_5p; + pInOut[0] = L_add( L_add( L_add( L_add( re0, re4 ), re1_7p ), re2_6p ), re3_5p ); + move32(); + pInOut[1] = L_add( L_add( L_add( L_add( im0, im4 ), im1_7p ), im2_6p ), im3_5p ); + move32(); - pInOut[2] = re0 + Mpy_32_16_1( L_sub( re1_7p, re3_5p ), C81 ) - re4 + Mpy_32_16_1( L_add( im1_7m, im3_5m ), C81 ) + im2_6m; - pInOut[3] = im0 + Mpy_32_16_1( L_sub( im1_7p, im3_5p ), C81 ) - im4 - Mpy_32_16_1( L_add( re1_7m, re3_5m ), C81 ) - re2_6m; + pInOut[2] = L_add( L_add( L_sub( L_add( re0, Mpy_32_16_1( L_sub( re1_7p, re3_5p ), C81 ) ), re4 ), Mpy_32_16_1( L_add( im1_7m, im3_5m ), C81 ) ), im2_6m ); + move32(); + pInOut[3] = L_sub( L_sub( L_sub( L_add( im0, Mpy_32_16_1( L_sub( im1_7p, im3_5p ), C81 ) ), im4 ), Mpy_32_16_1( L_add( re1_7m, re3_5m ), C81 ) ), re2_6m ); + move32(); - pInOut[4] = re0 - re2_6p + re4 + im1_7m - im3_5m; - pInOut[5] = im0 - im2_6p + im4 - re1_7m + re3_5m; + pInOut[4] = L_sub( L_add( L_add( L_sub( re0, re2_6p ), re4 ), im1_7m ), im3_5m ); + move32(); + pInOut[5] = L_add( L_sub( L_add( L_sub( im0, im2_6p ), im4 ), re1_7m ), re3_5m ); + move32(); - pInOut[6] = re0 + Mpy_32_16_1( L_sub( re3_5p, re1_7p ), C81 ) - re4 + Mpy_32_16_1( L_add( im1_7m, im3_5m ), C81 ) - im2_6m; - pInOut[7] = im0 + Mpy_32_16_1( L_sub( im3_5p, im1_7p ), C81 ) - im4 - Mpy_32_16_1( L_add( re1_7m, re3_5m ), C81 ) + re2_6m; + pInOut[6] = L_sub( L_add( L_sub( L_add( re0, Mpy_32_16_1( L_sub( re3_5p, re1_7p ), C81 ) ), re4 ), Mpy_32_16_1( L_add( im1_7m, im3_5m ), C81 ) ), im2_6m ); + move32(); + pInOut[7] = L_add( L_sub( L_sub( L_add( im0, Mpy_32_16_1( L_sub( im3_5p, im1_7p ), C81 ) ), im4 ), Mpy_32_16_1( L_add( re1_7m, re3_5m ), C81 ) ), re2_6m ); + move32(); - pInOut[8] = re0 - re1_7p + re2_6p - re3_5p + re4; - pInOut[9] = im0 - im1_7p + im2_6p - im3_5p + im4; + pInOut[8] = L_add( L_sub( L_add( L_sub( re0, re1_7p ), re2_6p ), re3_5p ), re4 ); + move32(); + pInOut[9] = L_add( L_sub( L_add( L_sub( im0, im1_7p ), im2_6p ), im3_5p ), im4 ); + move32(); - pInOut[10] = re0 + Mpy_32_16_1( L_sub( re3_5p, re1_7p ), C81 ) - re4 - Mpy_32_16_1( L_add( im1_7m, im3_5m ), C81 ) + im2_6m; - pInOut[11] = im0 + Mpy_32_16_1( L_sub( im3_5p, im1_7p ), C81 ) - im4 + Mpy_32_16_1( L_add( re1_7m, re3_5m ), C81 ) - re2_6m; + pInOut[10] = L_add( L_sub( L_sub( L_add( re0, Mpy_32_16_1( L_sub( re3_5p, re1_7p ), C81 ) ), re4 ), Mpy_32_16_1( L_add( im1_7m, im3_5m ), C81 ) ), im2_6m ); + move32(); + pInOut[11] = L_sub( L_add( L_sub( L_add( im0, Mpy_32_16_1( L_sub( im3_5p, im1_7p ), C81 ) ), im4 ), Mpy_32_16_1( L_add( re1_7m, re3_5m ), C81 ) ), re2_6m ); + move32(); - pInOut[12] = re0 - re2_6p + re4 - im1_7m + im3_5m; - pInOut[13] = im0 - im2_6p + im4 + re1_7m - re3_5m; + pInOut[12] = L_add( L_sub( L_add( L_sub( re0, re2_6p ), re4 ), im1_7m ), im3_5m ); + move32(); + pInOut[13] = L_sub( L_add( L_add( L_sub( im0, im2_6p ), im4 ), re1_7m ), re3_5m ); + move32(); - pInOut[14] = re0 + Mpy_32_16_1( L_sub( re1_7p, re3_5p ), C81 ) - re4 - Mpy_32_16_1( L_add( im1_7m, im3_5m ), C81 ) - im2_6m; - pInOut[15] = im0 + Mpy_32_16_1( L_sub( im1_7p, im3_5p ), C81 ) - im4 + Mpy_32_16_1( L_add( re1_7m, re3_5m ), C81 ) + re2_6m; + pInOut[14] = L_sub( L_sub( L_sub( L_add( re0, Mpy_32_16_1( L_sub( re1_7p, re3_5p ), C81 ) ), re4 ), Mpy_32_16_1( L_add( im1_7m, im3_5m ), C81 ) ), im2_6m ); + move32(); + pInOut[15] = L_add( L_add( L_sub( L_add( im0, Mpy_32_16_1( L_sub( im1_7p, im3_5p ), C81 ) ), im4 ), Mpy_32_16_1( L_add( re1_7m, re3_5m ), C81 ) ), re2_6m ); + move32(); return; } @@ -3113,16 +3819,20 @@ static void nextFFT( fft4( x ); BREAK; case 5: - FOR( Word32 i = 0; i < 5; i++ ) + FOR( Word16 i = 0; i < 5; i++ ) { val[i].re = x[2 * i]; - val[i].im = x[2 * i + 1]; + move32(); + val[i].im = x[add( shl( i, 1 ), 1 )]; + move32(); } fft5( val ); - FOR( Word32 i = 0; i < 5; i++ ) + FOR( Word16 i = 0; i < 5; i++ ) { x[2 * i] = val[i].re; - x[2 * i + 1] = val[i].im; + move32(); + x[add( shl( i, 1 ), 1 )] = val[i].im; + move32(); } BREAK; case 8: @@ -3142,16 +3852,19 @@ static __inline Word16 findFactor( const Word16 length ) { Word16 i = 0; + move16(); Word16 factor = 0; + move16(); WHILE( CTFFTfactors[i] != 0 ) { - IF( EQ_16( 0, ( length % CTFFTfactors[i] ) ) ) + IF( ( length % CTFFTfactors[i] ) == 0 ) { factor = CTFFTfactors[i]; + move16(); BREAK; } - i++; + i = add( i, 1 ); } return factor; } @@ -3165,12 +3878,17 @@ static __inline void twiddle( Word16 i, ii; const Word16 *ptr_sin = &sin_twiddle_table_25_5_5[0]; const Word16 *ptr_cos = &cos_twiddle_table_25_5_5[0]; + + test(); + test(); + test(); + test(); IF( EQ_16( length, 16 ) && EQ_16( n1, 8 ) && EQ_16( n2, 2 ) ) { ptr_sin = &sin_twiddle_table_16_8_2[0]; ptr_cos = &cos_twiddle_table_16_8_2[0]; } - ELSE IF( length != 25 || n1 != 5 || n2 != 5 ) + ELSE IF( NE_16( length, 25 ) || NE_16( n1, 5 ) || NE_16( n2, 5 ) ) { assert( 0 ); } @@ -3180,10 +3898,12 @@ static __inline void twiddle( FOR( ii = 1; ii < n2; ii++ ) { Word32 xRe, xIm; - xRe = x[2 * ( i * n2 + ii )]; - xIm = x[2 * ( i * n2 + ii ) + 1]; - x[2 * ( i * n2 + ii )] = (Word32) L_sub( Mpy_32_16_1( xRe, ptr_cos[i * n2 + ii] ), Mpy_32_16_1( xIm, ptr_sin[i * n2 + ii] ) ); - x[2 * ( i * n2 + ii ) + 1] = (Word32) L_add( Mpy_32_16_1( xRe, ptr_sin[i * n2 + ii] ), Mpy_32_16_1( xIm, ptr_cos[i * n2 + ii] ) ); + xRe = x[shl( ( add( i_mult( i, n2 ), ii ) ), 1 )]; + xIm = x[add( shl( ( add( i_mult( i, n2 ), ii ) ), 1 ), 1 )]; + x[shl( ( add( i_mult( i, n2 ), ii ) ), 1 )] = L_sub( Mpy_32_16_1( xRe, ptr_cos[add( i_mult( i, n2 ), ii )] ), Mpy_32_16_1( xIm, ptr_sin[add( i_mult( i, n2 ), ii )] ) ); + move32(); + x[add( shl( ( add( i_mult( i, n2 ), ii ) ), 1 ), 1 )] = L_add( Mpy_32_16_1( xRe, ptr_sin[add( i_mult( i, n2 ), ii )] ), Mpy_32_16_1( xIm, ptr_cos[add( i_mult( i, n2 ), ii )] ) ); + move32(); } } return; @@ -3198,6 +3918,7 @@ static void cooleyTukeyFFT( Word16 i, ii; Word16 n1, n2; Word16 cnt = 0; + move16(); Word32 *src, *dest; cmplx val[5]; @@ -3218,13 +3939,17 @@ static void cooleyTukeyFFT( FOR( i = 0; i < 5; i++ ) { val[i].re = x[2 * i]; - val[i].im = x[2 * i + 1]; + move32(); + val[i].im = x[add( shl( i, 1 ), 1 )]; + move32(); } fft5( val ); FOR( i = 0; i < 5; i++ ) { x[2 * i] = val[i].re; - x[2 * i + 1] = val[i].im; + move32(); + x[add( shl( i, 1 ), 1 )] = val[i].im; + move32(); } BREAK; case 8: @@ -3234,10 +3959,11 @@ static void cooleyTukeyFFT( { factor = findFactor( length ); - IF( GT_16( factor, 0 ) && GT_16( length / factor, 1 ) ) + IF( GT_16( factor, 0 ) && GT_16( idiv1616( length, factor ), 1 ) ) { n1 = factor; - n2 = length / factor; + move16(); + n2 = idiv1616( length, factor ); /* DATA Resorting for stage1 */ dest = scratch; @@ -3249,7 +3975,9 @@ static void cooleyTukeyFFT( /* *dest++ = x[2*(i+ii*n1)]; */ /* *dest++ = x[2*(i+ii*n1)+1]; */ *dest++ = *src; + move32(); *dest++ = *( src + 1 ); + move32(); src += 2 * n1; } } @@ -3258,39 +3986,47 @@ static void cooleyTukeyFFT( FOR( i = 0; i < length; i++ ) { *dest++ = *src++; + move32(); *dest++ = *src++; + move32(); } /* perform n1 ffts of length n2 */ FOR( i = 0; i < n1; i++ ) { - cooleyTukeyFFT( x + 2 * i * n2, n2, scratch + 2 * i * n2 ); + cooleyTukeyFFT( x + shl( i_mult( i, n2 ), 1 ), n2, scratch + shl( i_mult( i, n2 ), 1 ) ); } /*data twiddeling */ twiddle( x, length, n1, n2 ); /* DATA Resorting for stage2 */ cnt = 0; + move16(); FOR( i = 0; i < n2; i++ ) { FOR( ii = 0; ii < n1; ii++ ) { - scratch[2 * cnt] = x[2 * ( i + ii * n2 )]; - scratch[2 * cnt + 1] = x[2 * ( i + ii * n2 ) + 1]; - cnt++; + scratch[2 * cnt] = x[shl( add( i, i_mult( ii, n2 ) ), 1 )]; + move32(); + scratch[add( shl( cnt, 1 ), 1 )] = x[add( shl( add( i, i_mult( ii, n2 ) ), 1 ), 1 )]; + move32(); + cnt = add( cnt, 1 ); } } /* perform n2 ffts of length n1 */ FOR( i = 0; i < n2; i++ ) { - nextFFT( scratch + 2 * i * n1, n1 ); + nextFFT( scratch + shl( i_mult( i, n1 ), 1 ), n1 ); } cnt = 0; + move16(); FOR( i = 0; i < n1; i++ ) { FOR( ii = 0; ii < n2; ii++ ) { - x[2 * cnt] = scratch[2 * ( i + ii * n1 )]; - x[2 * cnt + 1] = scratch[2 * ( i + ii * n1 ) + 1]; - cnt++; + x[2 * cnt] = scratch[shl( add( i, i_mult( ii, n1 ) ), 1 )]; + move32(); + x[add( shl( cnt, 1 ), 1 )] = scratch[add( shl( add( i, i_mult( ii, n1 ) ), 1 ), 1 )]; + move32(); + cnt = add( cnt, 1 ); } } } @@ -3318,38 +4054,52 @@ static void pfaDFT( { Word32 *tmp = scratch1; Word16 n1_inv = 1, n2_inv = 1; + move16(); + move16(); Word16 n2 = factor[0 /*idx*/]; - Word16 n1 = length / n2; + move16(); + Word16 n1 = idiv1616( length, n2 ); Word16 idx, incr; - WHILE( ( ( n1_inv * n1 ) % n2 ) != 1 ) + WHILE( ( i_mult( n1_inv, n1 ) % n2 ) != 1 ) { - n1_inv++; + n1_inv = add( n1_inv, 1 ); } - WHILE( ( ( n2_inv * n2 ) % n1 ) != 1 ) + WHILE( ( i_mult( n2_inv, n2 ) % n1 ) != 1 ) { - n2_inv++; + n2_inv = add( n2_inv, 1 ); } idx = 0; - incr = n1 * n1_inv; + move16(); + incr = i_mult( n1, n1_inv ); + move16(); cnt = 0; + move16(); FOR( i = 0; i < n1; i++ ) { FOR( ii = 0; ii < n2 - 1; ii++ ) { - tmp[cnt++] = x[2 * idx]; - tmp[cnt++] = x[2 * idx + 1]; + tmp[cnt] = x[2 * idx]; + move32(); + cnt = add( cnt, 1 ); + tmp[cnt] = x[add( shl( idx, 1 ), 1 )]; + move32(); + cnt = add( cnt, 1 ); - idx += incr; + idx = add( idx, incr ); IF( GT_16( idx, length ) ) { - idx -= length; + idx = sub( idx, length ); } } - tmp[cnt++] = x[2 * idx]; - tmp[cnt++] = x[2 * idx + 1]; - idx++; + tmp[cnt] = x[2 * idx]; + move32(); + cnt = add( cnt, 1 ); + tmp[cnt] = x[add( shl( idx, 1 ), 1 )]; + move32(); + cnt = add( cnt, 1 ); + idx = add( idx, 1 ); } FOR( cnt = 0; cnt < length; cnt += n2 ) { @@ -3359,35 +4109,45 @@ static void pfaDFT( { FOR( i = 0; i < n2; i++ ) { - x[2 * ( cnt + i * n1 )] = tmp[2 * ( cnt * n2 + i )]; - x[2 * ( cnt + i * n1 ) + 1] = tmp[2 * ( cnt * n2 + i ) + 1]; + x[shl( add( cnt, i_mult( i, n1 ) ), 1 )] = tmp[shl( add( i_mult( cnt, n2 ), i ), 1 )]; + move32(); + x[add( shl( add( cnt, i_mult( i, n1 ) ), 1 ), 1 )] = tmp[add( shl( add( i_mult( cnt, n2 ), i ), 1 ), 1 )]; + move32(); } } FOR( cnt = 0; cnt < length; cnt += n1 ) { - pfaDFT( x + 2 * cnt, n1, tmp, numFactors - 1, &factor[1] ); + pfaDFT( x + 2 * cnt, n1, tmp, sub( numFactors, 1 ), &factor[1] ); } idx = 0; + move16(); cnt = 0; + move16(); FOR( i = 0; i < n2; i++ ) { - idx = i * n1; + idx = i_mult( i, n1 ); FOR( ii = 0; ii < n1; ii++ ) { - tmp[2 * idx] = x[cnt++]; - tmp[2 * idx + 1] = x[cnt++]; - idx += n2; + tmp[2 * idx] = x[cnt]; + move32(); + cnt = add( cnt, 1 ); + tmp[add( shl( idx, 1 ), 1 )] = x[cnt]; + move32(); + cnt = add( cnt, 1 ); + idx = add( idx, n2 ); IF( GT_16( idx, length ) ) { - idx -= length; + idx = sub( idx, length ); } } } FOR( cnt = 0; cnt < length; cnt++ ) { x[2 * cnt] = tmp[2 * cnt]; - x[2 * cnt + 1] = tmp[2 * cnt + 1]; + move32(); + x[add( shl( cnt, 1 ), 1 )] = tmp[add( shl( cnt, 1 ), 1 )]; + move32(); } } ELSE @@ -3405,11 +4165,14 @@ static void fftf_interleave( const Word16 len ) { Word16 i = 0; + move16(); FOR( i = 0; i < len; i++ ) { *out++ = *re++; + move32(); *out++ = *im++; + move32(); } return; @@ -3422,11 +4185,14 @@ static void fftf_deinterleave( const Word16 len ) { Word16 i = 0; + move16(); FOR( i = 0; i < len; i++ ) { *re++ = *in++; + move32(); *im++ = *in++; + move32(); } return; @@ -3439,6 +4205,9 @@ static void DoRTFT600( { Word32 scratch[1200], cmplx[1200]; Word16 factors[3] = { 25, 8, 3 }; + move16(); + move16(); + move16(); fftf_interleave( x, y, cmplx, 600 ); pfaDFT( cmplx, 600, scratch, 3, factors ); @@ -3454,6 +4223,8 @@ static void DoRTFT400( { Word32 scratch[800], cmplx[800]; Word16 factors[2] = { 25, 16 }; + move16(); + move16(); fftf_interleave( x, y, cmplx, 400 ); pfaDFT( cmplx, 400, scratch, 2, factors ); @@ -3470,6 +4241,9 @@ static void DoRTFT240( { Word32 scratch[480], cmplx[480]; Word16 factors[3] = { 16, 5, 3 }; + move16(); + move16(); + move16(); fftf_interleave( x, y, cmplx, 240 ); pfaDFT( cmplx, 240, scratch, 3, factors ); @@ -3485,6 +4259,8 @@ static void DoRTFT200( { Word32 scratch[400], cmplx[400]; Word16 factors[2] = { 25, 8 }; + move16(); + move16(); fftf_interleave( x, y, cmplx, 200 ); pfaDFT( cmplx, 200, scratch, 2, factors ); @@ -3500,6 +4276,8 @@ static void DoRTFT100( { Word32 scratch[200], cmplx[200]; Word16 factors[2] = { 25, 4 }; + move16(); + move16(); fftf_interleave( x, y, cmplx, 100 ); pfaDFT( cmplx, 100, scratch, 2, factors ); @@ -3585,6 +4363,7 @@ static void fft_len5( t = CL_scale( CL_sub( y1, y3 ), FFT_C54 ); y1 = CL_add( y1, y3 ); x[0] = CL_add( x[0], y1 ); + move64(); y1 = CL_add( x[0], CL_shl( CL_scale( y1, FFT_C55 ), 1 ) ); y3 = CL_sub( y1, t ); @@ -3595,9 +4374,13 @@ static void fft_len5( y2 = CL_add( t, CL_scale( y2, FFT_C53 ) ); x[1] = CL_msu_j( y1, y2 ); + move64(); x[4] = CL_mac_j( y1, y2 ); + move64(); x[2] = CL_mac_j( y3, y4 ); + move64(); x[3] = CL_msu_j( y3, y4 ); + move64(); return; } @@ -3608,36 +4391,63 @@ static void fft_len8( cmplx t[8], s[8]; t[0] = CL_add( x[0], x[4] ); + move64(); t[1] = CL_sub( x[0], x[4] ); + move64(); t[2] = CL_add( x[1], x[5] ); + move64(); t[3] = CL_sub( x[1], x[5] ); + move64(); t[4] = CL_add( x[2], x[6] ); + move64(); t[5] = CL_sub( x[2], x[6] ); + move64(); t[6] = CL_add( x[3], x[7] ); + move64(); t[7] = CL_sub( x[3], x[7] ); + move64(); s[0] = CL_add( t[0], t[4] ); + move64(); s[2] = CL_sub( t[0], t[4] ); + move64(); s[4] = CL_mac_j( t[1], t[5] ); + move64(); s[5] = CL_msu_j( t[1], t[5] ); + move64(); s[1] = CL_add( t[2], t[6] ); + move64(); s[3] = CL_swap_real_imag( CL_sub( CL_conjugate( t[2] ), CL_conjugate( t[6] ) ) ); + move64(); t[0] = CL_swap_real_imag( CL_add( t[3], t[7] ) ); + move64(); t[1] = CL_sub( t[3], t[7] ); + move64(); s[6] = CL_scale( CL_add( CL_conjugate( t[0] ), t[1] ), FFT_C81 ); + move64(); s[7] = CL_scale( CL_sub( t[0], CL_conjugate( t[1] ) ), FFT_C81 ); + move64(); s[7] = CL_conjugate( s[7] ); + move64(); x[0] = CL_add( s[0], s[1] ); + move64(); x[4] = CL_sub( s[0], s[1] ); + move64(); x[2] = CL_sub( s[2], s[3] ); + move64(); x[6] = CL_add( s[2], s[3] ); + move64(); x[3] = CL_add( s[4], s[7] ); + move64(); x[7] = CL_sub( s[4], s[7] ); + move64(); x[1] = CL_add( s[5], s[6] ); + move64(); x[5] = CL_sub( s[5], s[6] ); + move64(); return; } @@ -3650,58 +4460,100 @@ static void fft_len10( cmplx y[10]; s[0] = CL_add( x[6], x[4] ); + move64(); s[3] = CL_sub( x[6], x[4] ); + move64(); s[2] = CL_add( x[2], x[8] ); + move64(); s[1] = CL_sub( x[2], x[8] ); + move64(); t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 ); s[0] = CL_add( s[0], s[2] ); + move64(); y[0] = CL_add( x[0], s[0] ); + move64(); s[0] = CL_add( y[0], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) ); + move64(); s[2] = CL_sub( s[0], t ); + move64(); s[0] = CL_add( s[0], t ); + move64(); t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 ); + move64(); s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) ); + move64(); s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) ); + move64(); y[2] = CL_msu_j( s[0], s[1] ); + move64(); y[8] = CL_mac_j( s[0], s[1] ); + move64(); y[4] = CL_mac_j( s[2], s[3] ); + move64(); y[6] = CL_msu_j( s[2], s[3] ); + move64(); s[0] = CL_add( x[1], x[9] ); + move64(); s[3] = CL_sub( x[1], x[9] ); + move64(); s[2] = CL_add( x[7], x[3] ); + move64(); s[1] = CL_sub( x[7], x[3] ); + move64(); t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 ); s[0] = CL_add( s[0], s[2] ); + move64(); y[1] = CL_add( x[5], s[0] ); + move64(); s[0] = CL_add( y[1], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) ); + move64(); s[2] = CL_sub( s[0], t ); + move64(); s[0] = CL_add( s[0], t ); + move64(); t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 ); s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) ); + move64(); s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) ); + move64(); y[3] = CL_msu_j( s[0], s[1] ); + move64(); y[9] = CL_mac_j( s[0], s[1] ); + move64(); + move64(); y[5] = CL_mac_j( s[2], s[3] ); + move64(); y[7] = CL_msu_j( s[2], s[3] ); + move64(); x[0] = CL_add( y[0], y[1] ); + move64(); x[5] = CL_sub( y[0], y[1] ); + move64(); x[2] = CL_add( y[2], y[3] ); + move64(); x[7] = CL_sub( y[2], y[3] ); + move64(); x[4] = CL_add( y[4], y[5] ); + move64(); x[9] = CL_sub( y[4], y[5] ); + move64(); x[6] = CL_add( y[6], y[7] ); + move64(); x[1] = CL_sub( y[6], y[7] ); + move64(); x[8] = CL_add( y[8], y[9] ); + move64(); x[3] = CL_sub( y[8], y[9] ); + move64(); return; } @@ -3714,101 +4566,176 @@ static void fft_len15( cmplx y[15]; s[0] = CL_add( x[3], x[12] ); + move64(); s[3] = CL_sub( x[3], x[12] ); + move64(); s[2] = CL_add( x[6], x[9] ); + move64(); s[1] = CL_sub( x[6], x[9] ); + move64(); t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 ); s[0] = CL_add( s[0], s[2] ); + move64(); y[0] = CL_add( x[0], s[0] ); + move64(); s[0] = CL_add( y[0], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) ); + move64(); s[2] = CL_sub( s[0], t ); + move64(); s[0] = CL_add( s[0], t ); + move64(); t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 ); s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) ); + move64(); s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) ); + move64(); y[1] = CL_msu_j( s[0], s[1] ); + move64(); y[4] = CL_mac_j( s[0], s[1] ); + move64(); y[2] = CL_mac_j( s[2], s[3] ); + move64(); y[3] = CL_msu_j( s[2], s[3] ); + move64(); s[0] = CL_add( x[8], x[2] ); + move64(); s[3] = CL_sub( x[8], x[2] ); + move64(); s[2] = CL_add( x[11], x[14] ); + move64(); s[1] = CL_sub( x[11], x[14] ); + move64(); t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 ); s[0] = CL_add( s[0], s[2] ); + move64(); y[5] = CL_add( x[5], s[0] ); + move64(); s[0] = CL_add( y[5], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) ); + move64(); s[2] = CL_sub( s[0], t ); + move64(); s[0] = CL_add( s[0], t ); + move64(); t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 ); s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) ); + move64(); s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) ); + move64(); y[6] = CL_msu_j( s[0], s[1] ); + move64(); y[9] = CL_mac_j( s[0], s[1] ); + move64(); y[7] = CL_mac_j( s[2], s[3] ); + move64(); y[8] = CL_msu_j( s[2], s[3] ); + move64(); s[0] = CL_add( x[13], x[7] ); + move64(); s[3] = CL_sub( x[13], x[7] ); + move64(); s[2] = CL_add( x[1], x[4] ); + move64(); s[1] = CL_sub( x[1], x[4] ); + move64(); t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 ); s[0] = CL_add( s[0], s[2] ); + move64(); y[10] = CL_add( x[10], s[0] ); + move64(); s[0] = CL_add( y[10], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) ); + move64(); s[2] = CL_sub( s[0], t ); + move64(); s[0] = CL_add( s[0], t ); + move64(); t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 ); s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) ); + move64(); s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) ); + move64(); y[11] = CL_msu_j( s[0], s[1] ); + move64(); y[14] = CL_mac_j( s[0], s[1] ); + move64(); y[12] = CL_mac_j( s[2], s[3] ); + move64(); y[13] = CL_msu_j( s[2], s[3] ); + move64(); s[0] = CL_add( y[5], y[10] ); + move64(); s[1] = CL_scale( CL_sub( y[5], y[10] ), FFT_C31 ); + move64(); x[0] = CL_add( y[0], s[0] ); + move64(); s[0] = CL_sub( y[0], CL_shr( s[0], 1 ) ); + move64(); x[10] = CL_mac_j( s[0], s[1] ); + move64(); x[5] = CL_msu_j( s[0], s[1] ); + move64(); s[0] = CL_add( y[6], y[11] ); + move64(); s[1] = CL_scale( CL_sub( y[6], y[11] ), FFT_C31 ); + move64(); x[6] = CL_add( y[1], s[0] ); + move64(); s[0] = CL_sub( y[1], CL_shr( s[0], 1 ) ); + move64(); x[1] = CL_mac_j( s[0], s[1] ); + move64(); x[11] = CL_msu_j( s[0], s[1] ); + move64(); s[0] = CL_add( y[7], y[12] ); + move64(); s[1] = CL_scale( CL_sub( y[7], y[12] ), FFT_C31 ); + move64(); x[12] = CL_add( y[2], s[0] ); + move64(); s[0] = CL_sub( y[2], CL_shr( s[0], 1 ) ); + move64(); x[7] = CL_mac_j( s[0], s[1] ); + move64(); x[2] = CL_msu_j( s[0], s[1] ); + move64(); s[0] = CL_add( y[8], y[13] ); + move64(); s[1] = CL_scale( CL_sub( y[8], y[13] ), FFT_C31 ); + move64(); x[3] = CL_add( y[3], s[0] ); + move64(); s[0] = CL_sub( y[3], CL_shr( s[0], 1 ) ); + move64(); x[13] = CL_mac_j( s[0], s[1] ); + move64(); x[8] = CL_msu_j( s[0], s[1] ); + move64(); s[0] = CL_add( y[9], y[14] ); + move64(); s[1] = CL_scale( CL_sub( y[9], y[14] ), FFT_C31 ); + move64(); x[9] = CL_add( y[4], s[0] ); + move64(); s[0] = CL_sub( y[4], CL_shr( s[0], 1 ) ); + move64(); x[4] = CL_mac_j( s[0], s[1] ); + move64(); x[14] = CL_msu_j( s[0], s[1] ); + move64(); return; } @@ -3821,137 +4748,242 @@ static void fft_len16( cmplx y[16]; s[0] = CL_shr( x[0], SCALEFACTOR16 ); + move64(); s[1] = CL_shr( x[4], SCALEFACTOR16 ); + move64(); s[2] = CL_shr( x[8], SCALEFACTOR16 ); + move64(); s[3] = CL_shr( x[12], SCALEFACTOR16 ); + move64(); t[0] = CL_add( s[0], s[2] ); + move64(); t[1] = CL_sub( s[0], s[2] ); + move64(); t[2] = CL_add( s[1], s[3] ); + move64(); t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( s[1] ), CL_conjugate( s[3] ) ) ); + move64(); y[0] = CL_add( t[0], t[2] ); + move64(); y[1] = CL_sub( t[1], t[3] ); + move64(); y[2] = CL_sub( t[0], t[2] ); + move64(); y[3] = CL_add( t[1], t[3] ); + move64(); s[0] = CL_shr( x[1], SCALEFACTOR16 ); + move64(); s[1] = CL_shr( x[5], SCALEFACTOR16 ); + move64(); s[2] = CL_shr( x[9], SCALEFACTOR16 ); + move64(); s[3] = CL_shr( x[13], SCALEFACTOR16 ); + move64(); t[0] = CL_add( s[0], s[2] ); + move64(); t[1] = CL_sub( s[0], s[2] ); + move64(); t[2] = CL_add( s[1], s[3] ); + move64(); t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( s[1] ), CL_conjugate( s[3] ) ) ); + move64(); y[4] = CL_add( t[0], t[2] ); + move64(); y[5] = CL_sub( t[1], t[3] ); + move64(); y[6] = CL_sub( t[0], t[2] ); + move64(); y[7] = CL_add( t[1], t[3] ); + move64(); s[0] = CL_shr( x[2], SCALEFACTOR16 ); + move64(); s[1] = CL_shr( x[6], SCALEFACTOR16 ); + move64(); s[2] = CL_shr( x[10], SCALEFACTOR16 ); + move64(); s[3] = CL_shr( x[14], SCALEFACTOR16 ); + move64(); t[0] = CL_add( s[0], s[2] ); + move64(); t[1] = CL_sub( s[0], s[2] ); + move64(); t[2] = CL_add( s[1], s[3] ); + move64(); t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( s[1] ), CL_conjugate( s[3] ) ) ); + move64(); y[8] = CL_add( t[0], t[2] ); + move64(); y[9] = CL_sub( t[1], t[3] ); + move64(); y[10] = CL_swap_real_imag( CL_sub( t[0], t[2] ) ); + move64(); y[10] = CL_conjugate( y[10] ); + move64(); y[11] = CL_add( t[1], t[3] ); + move64(); s[0] = CL_shr( x[3], SCALEFACTOR16 ); + move64(); s[1] = CL_shr( x[7], SCALEFACTOR16 ); + move64(); s[2] = CL_shr( x[11], SCALEFACTOR16 ); + move64(); s[3] = CL_shr( x[15], SCALEFACTOR16 ); + move64(); t[0] = CL_add( s[0], s[2] ); + move64(); t[1] = CL_sub( s[0], s[2] ); + move64(); t[2] = CL_add( s[1], s[3] ); + move64(); t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( s[1] ), CL_conjugate( s[3] ) ) ); + move64(); y[12] = CL_add( t[0], t[2] ); + move64(); y[13] = CL_sub( t[1], t[3] ); + move64(); y[14] = CL_sub( t[0], t[2] ); + move64(); y[15] = CL_add( t[1], t[3] ); + move64(); s[0] = CL_scale( y[11], FFT_C162 ); + move64(); y[11] = CL_mac_j( s[0], s[0] ); + move64(); s[0] = CL_scale( y[14], FFT_C162 ); + move64(); y[14] = CL_mac_j( s[0], s[0] ); + move64(); s[0] = CL_scale( y[6], FFT_C161 ); + move64(); y[6] = CL_mac_j( s[0], s[0] ); + move64(); y[6] = CL_swap_real_imag( y[6] ); + move64(); y[6] = CL_conjugate( y[6] ); + move64(); s[0] = CL_scale( y[9], FFT_C161 ); + move64(); y[9] = CL_mac_j( s[0], s[0] ); + move64(); y[9] = CL_swap_real_imag( y[9] ); + move64(); y[9] = CL_conjugate( y[9] ); + move64(); s[0] = CL_scale( y[5], FFT_C163 ); + move64(); s[1] = CL_scale( y[5], FFT_C166 ); + move64(); y[5] = CL_mac_j( s[0], s[1] ); + move64(); s[0] = CL_scale( y[7], FFT_C165 ); + move64(); s[1] = CL_scale( y[7], FFT_C164 ); + move64(); y[7] = CL_mac_j( s[0], s[1] ); + move64(); s[0] = CL_scale( y[13], FFT_C165 ); + move64(); s[1] = CL_scale( y[13], FFT_C164 ); + move64(); y[13] = CL_mac_j( s[0], s[1] ); + move64(); s[0] = CL_scale( y[15], FFT_C164 ); + move64(); s[1] = CL_scale( y[15], FFT_C165 ); + move64(); y[15] = CL_mac_j( s[0], s[1] ); + move64(); t[0] = CL_add( y[0], y[8] ); + move64(); t[1] = CL_sub( y[0], y[8] ); + move64(); t[2] = CL_add( y[4], y[12] ); + move64(); t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[4] ), CL_conjugate( y[12] ) ) ); + move64(); x[0] = CL_add( t[0], t[2] ); + move64(); x[4] = CL_sub( t[1], t[3] ); + move64(); x[8] = CL_sub( t[0], t[2] ); + move64(); x[12] = CL_add( t[1], t[3] ); + move64(); t[0] = CL_add( y[1], y[9] ); + move64(); t[1] = CL_sub( y[1], y[9] ); + move64(); t[2] = CL_add( y[5], y[13] ); + move64(); t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[5] ), CL_conjugate( y[13] ) ) ); + move64(); x[1] = CL_add( t[0], t[2] ); + move64(); x[5] = CL_sub( t[1], t[3] ); + move64(); x[9] = CL_sub( t[0], t[2] ); + move64(); x[13] = CL_add( t[1], t[3] ); + move64(); t[0] = CL_add( y[2], y[10] ); + move64(); t[1] = CL_sub( y[2], y[10] ); + move64(); t[2] = CL_add( y[6], y[14] ); + move64(); t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[6] ), CL_conjugate( y[14] ) ) ); + move64(); x[2] = CL_add( t[0], t[2] ); + move64(); x[6] = CL_sub( t[1], t[3] ); + move64(); x[10] = CL_sub( t[0], t[2] ); + move64(); x[14] = CL_add( t[1], t[3] ); + move64(); t[0] = CL_add( y[3], y[11] ); + move64(); t[1] = CL_sub( y[3], y[11] ); + move64(); t[2] = CL_add( y[7], y[15] ); + move64(); t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[7] ), CL_conjugate( y[15] ) ) ); + move64(); x[3] = CL_add( t[0], t[2] ); + move64(); x[7] = CL_sub( t[1], t[3] ); + move64(); x[11] = CL_sub( t[0], t[2] ); + move64(); x[15] = CL_add( t[1], t[3] ); + move64(); return; } @@ -3966,154 +4998,274 @@ static void fft_len20_fx( cmplx y[20]; xx[0] = CL_shr( x[0], SCALEFACTOR20 ); + move64(); xx[1] = CL_shr( x[16], SCALEFACTOR20 ); + move64(); xx[2] = CL_shr( x[12], SCALEFACTOR20 ); + move64(); xx[3] = CL_shr( x[8], SCALEFACTOR20 ); + move64(); xx[4] = CL_shr( x[4], SCALEFACTOR20 ); + move64(); s[0] = CL_add( xx[1], xx[4] ); + move64(); s[3] = CL_sub( xx[1], xx[4] ); + move64(); s[2] = CL_add( xx[2], xx[3] ); + move64(); s[1] = CL_sub( xx[2], xx[3] ); + move64(); t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 ); s[0] = CL_add( s[0], s[2] ); + move64(); y[0] = CL_add( xx[0], s[0] ); + move64(); s[0] = CL_add( y[0], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) ); + move64(); s[2] = CL_sub( s[0], t ); + move64(); s[0] = CL_add( s[0], t ); + move64(); t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 ); s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) ); + move64(); s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) ); + move64(); y[4] = CL_msu_j( s[0], s[1] ); + move64(); y[16] = CL_mac_j( s[0], s[1] ); + move64(); y[8] = CL_mac_j( s[2], s[3] ); + move64(); y[12] = CL_msu_j( s[2], s[3] ); + move64(); xx[0] = CL_shr( x[5], SCALEFACTOR20 ); + move64(); xx[1] = CL_shr( x[1], SCALEFACTOR20 ); + move64(); xx[2] = CL_shr( x[17], SCALEFACTOR20 ); + move64(); xx[3] = CL_shr( x[13], SCALEFACTOR20 ); + move64(); xx[4] = CL_shr( x[9], SCALEFACTOR20 ); + move64(); s[0] = CL_add( xx[1], xx[4] ); + move64(); s[3] = CL_sub( xx[1], xx[4] ); + move64(); s[2] = CL_add( xx[2], xx[3] ); + move64(); s[1] = CL_sub( xx[2], xx[3] ); + move64(); t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 ); s[0] = CL_add( s[0], s[2] ); + move64(); y[1] = CL_add( xx[0], s[0] ); + move64(); s[0] = CL_add( y[1], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) ); + move64(); s[2] = CL_sub( s[0], t ); + move64(); s[0] = CL_add( s[0], t ); + move64(); t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 ); s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) ); + move64(); s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) ); + move64(); y[5] = CL_msu_j( s[0], s[1] ); + move64(); y[17] = CL_mac_j( s[0], s[1] ); + move64(); y[9] = CL_mac_j( s[2], s[3] ); + move64(); y[13] = CL_msu_j( s[2], s[3] ); + move64(); xx[0] = CL_shr( x[10], SCALEFACTOR20 ); + move64(); xx[1] = CL_shr( x[6], SCALEFACTOR20 ); + move64(); xx[2] = CL_shr( x[2], SCALEFACTOR20 ); + move64(); xx[3] = CL_shr( x[18], SCALEFACTOR20 ); + move64(); xx[4] = CL_shr( x[14], SCALEFACTOR20 ); + move64(); s[0] = CL_add( xx[1], xx[4] ); + move64(); s[3] = CL_sub( xx[1], xx[4] ); + move64(); s[2] = CL_add( xx[2], xx[3] ); + move64(); s[1] = CL_sub( xx[2], xx[3] ); + move64(); t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 ); s[0] = CL_add( s[0], s[2] ); + move64(); y[2] = CL_add( xx[0], s[0] ); + move64(); s[0] = CL_add( y[2], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) ); + move64(); s[2] = CL_sub( s[0], t ); + move64(); s[0] = CL_add( s[0], t ); + move64(); t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 ); s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) ); + move64(); s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) ); + move64(); y[6] = CL_msu_j( s[0], s[1] ); + move64(); y[18] = CL_mac_j( s[0], s[1] ); + move64(); y[10] = CL_mac_j( s[2], s[3] ); + move64(); y[14] = CL_msu_j( s[2], s[3] ); + move64(); xx[0] = CL_shr( x[15], SCALEFACTOR20 ); + move64(); xx[1] = CL_shr( x[11], SCALEFACTOR20 ); + move64(); xx[2] = CL_shr( x[7], SCALEFACTOR20 ); + move64(); xx[3] = CL_shr( x[3], SCALEFACTOR20 ); + move64(); xx[4] = CL_shr( x[19], SCALEFACTOR20 ); + move64(); s[0] = CL_add( xx[1], xx[4] ); + move64(); s[3] = CL_sub( xx[1], xx[4] ); + move64(); s[2] = CL_add( xx[2], xx[3] ); + move64(); s[1] = CL_sub( xx[2], xx[3] ); + move64(); t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 ); s[0] = CL_add( s[0], s[2] ); + move64(); y[3] = CL_add( xx[0], s[0] ); + move64(); s[0] = CL_add( y[3], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) ); + move64(); s[2] = CL_sub( s[0], t ); + move64(); s[0] = CL_add( s[0], t ); + move64(); t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 ); s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) ); + move64(); s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) ); + move64(); y[7] = CL_msu_j( s[0], s[1] ); + move64(); y[19] = CL_mac_j( s[0], s[1] ); + move64(); y[11] = CL_mac_j( s[2], s[3] ); + move64(); y[15] = CL_msu_j( s[2], s[3] ); + move64(); tt[0] = CL_add( y[0], y[2] ); + move64(); tt[1] = CL_sub( y[0], y[2] ); + move64(); tt[2] = CL_add( y[1], y[3] ); + move64(); tt[3] = CL_swap_real_imag( CL_conjugate( CL_sub( y[1], y[3] ) ) ); + move64(); x[0] = CL_add( tt[0], tt[2] ); + move64(); x[5] = CL_sub( tt[1], tt[3] ); + move64(); x[10] = CL_sub( tt[0], tt[2] ); + move64(); x[15] = CL_add( tt[1], tt[3] ); + move64(); tt[0] = CL_add( y[4], y[6] ); + move64(); tt[1] = CL_sub( y[4], y[6] ); + move64(); tt[2] = CL_add( y[5], y[7] ); + move64(); tt[3] = CL_swap_real_imag( CL_conjugate( CL_sub( y[5], y[7] ) ) ); + move64(); x[4] = CL_add( tt[0], tt[2] ); + move64(); x[9] = CL_sub( tt[1], tt[3] ); + move64(); x[14] = CL_sub( tt[0], tt[2] ); + move64(); x[19] = CL_add( tt[1], tt[3] ); + move64(); tt[0] = CL_add( y[8], y[10] ); + move64(); tt[1] = CL_sub( y[8], y[10] ); + move64(); tt[2] = CL_add( y[9], y[11] ); + move64(); tt[3] = CL_swap_real_imag( CL_conjugate( CL_sub( y[9], y[11] ) ) ); + move64(); x[8] = CL_add( tt[0], tt[2] ); + move64(); x[13] = CL_sub( tt[1], tt[3] ); + move64(); x[18] = CL_sub( tt[0], tt[2] ); + move64(); x[3] = CL_add( tt[1], tt[3] ); + move64(); tt[0] = CL_add( y[12], y[14] ); + move64(); tt[1] = CL_sub( y[12], y[14] ); + move64(); tt[2] = CL_add( y[13], y[15] ); + move64(); tt[3] = CL_swap_real_imag( CL_conjugate( CL_sub( y[13], y[15] ) ) ); + move64(); x[12] = CL_add( tt[0], tt[2] ); + move64(); x[17] = CL_sub( tt[1], tt[3] ); + move64(); x[2] = CL_sub( tt[0], tt[2] ); + move64(); x[7] = CL_add( tt[1], tt[3] ); + move64(); tt[0] = CL_add( y[16], y[18] ); + move64(); tt[1] = CL_sub( y[16], y[18] ); + move64(); tt[2] = CL_add( y[17], y[19] ); + move64(); tt[3] = CL_swap_real_imag( CL_conjugate( CL_sub( y[17], y[19] ) ) ); + move64(); x[16] = CL_add( tt[0], tt[2] ); + move64(); x[1] = CL_sub( tt[1], tt[3] ); + move64(); x[6] = CL_sub( tt[0], tt[2] ); + move64(); x[11] = CL_add( tt[1], tt[3] ); + move64(); return; } @@ -4132,323 +5284,561 @@ static void fft_len30( h = &x[15]; xx[0] = x[0]; + move64(); xx[1] = x[18]; + move64(); xx[2] = x[6]; + move64(); xx[3] = x[24]; + move64(); xx[4] = x[12]; + move64(); xx[5] = x[20]; + move64(); xx[6] = x[8]; + move64(); xx[7] = x[26]; + move64(); xx[8] = x[14]; + move64(); xx[9] = x[2]; + move64(); xx[10] = x[10]; + move64(); xx[11] = x[28]; + move64(); xx[12] = x[16]; + move64(); xx[13] = x[4]; + move64(); xx[14] = x[22]; + move64(); s[0] = CL_add( xx[1], xx[4] ); + move64(); s[3] = CL_sub( xx[1], xx[4] ); + move64(); s[2] = CL_add( xx[2], xx[3] ); + move64(); s[1] = CL_sub( xx[2], xx[3] ); + move64(); t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 ); s[0] = CL_add( s[0], s[2] ); + move64(); y[0] = CL_add( xx[0], s[0] ); s[0] = CL_add( y[0], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) ); + move64(); s[2] = CL_sub( s[0], t ); + move64(); s[0] = CL_add( s[0], t ); + move64(); t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 ); s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) ); + move64(); s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) ); + move64(); y[1] = CL_msu_j( s[0], s[1] ); + move64(); y[4] = CL_mac_j( s[0], s[1] ); + move64(); y[2] = CL_mac_j( s[2], s[3] ); + move64(); y[3] = CL_msu_j( s[2], s[3] ); + move64(); s[0] = CL_add( xx[6], xx[9] ); + move64(); s[3] = CL_sub( xx[6], xx[9] ); + move64(); s[2] = CL_add( xx[7], xx[8] ); + move64(); s[1] = CL_sub( xx[7], xx[8] ); + move64(); t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 ); s[0] = CL_add( s[0], s[2] ); + move64(); y[5] = CL_add( xx[5], s[0] ); + move64(); s[0] = CL_add( y[5], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) ); + move64(); s[2] = CL_sub( s[0], t ); + move64(); s[0] = CL_add( s[0], t ); + move64(); t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 ); s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) ); + move64(); s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) ); + move64(); y[6] = CL_msu_j( s[0], s[1] ); + move64(); y[9] = CL_mac_j( s[0], s[1] ); + move64(); y[7] = CL_mac_j( s[2], s[3] ); + move64(); y[8] = CL_msu_j( s[2], s[3] ); + move64(); s[0] = CL_add( xx[11], xx[14] ); + move64(); s[3] = CL_sub( xx[11], xx[14] ); + move64(); s[2] = CL_add( xx[12], xx[13] ); + move64(); s[1] = CL_sub( xx[12], xx[13] ); + move64(); t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 ); s[0] = CL_add( s[0], s[2] ); + move64(); y[10] = CL_add( xx[10], s[0] ); + move64(); s[0] = CL_add( y[10], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) ); s[2] = CL_sub( s[0], t ); + move64(); s[0] = CL_add( s[0], t ); + move64(); t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 ); s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) ); + move64(); s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) ); + move64(); y[11] = CL_msu_j( s[0], s[1] ); + move64(); y[14] = CL_mac_j( s[0], s[1] ); + move64(); y[12] = CL_mac_j( s[2], s[3] ); + move64(); y[13] = CL_msu_j( s[2], s[3] ); + move64(); s[0] = CL_add( y[5], y[10] ); + move64(); s[1] = CL_scale( CL_sub( y[5], y[10] ), FFT_C31 ); + move64(); z[0] = CL_add( y[0], s[0] ); + move64(); s[0] = CL_sub( y[0], CL_shr( s[0], 1 ) ); + move64(); z[10] = CL_mac_j( s[0], s[1] ); + move64(); z[5] = CL_msu_j( s[0], s[1] ); + move64(); s[0] = CL_add( y[6], y[11] ); + move64(); s[1] = CL_scale( CL_sub( y[6], y[11] ), FFT_C31 ); + move64(); z[6] = CL_add( y[1], s[0] ); + move64(); s[0] = CL_sub( y[1], CL_shr( s[0], 1 ) ); + move64(); z[1] = CL_mac_j( s[0], s[1] ); + move64(); z[11] = CL_msu_j( s[0], s[1] ); + move64(); s[0] = CL_add( y[7], y[12] ); + move64(); s[1] = CL_scale( CL_sub( y[7], y[12] ), FFT_C31 ); + move64(); z[12] = CL_add( y[2], s[0] ); + move64(); s[0] = CL_sub( y[2], CL_shr( s[0], 1 ) ); + move64(); z[7] = CL_mac_j( s[0], s[1] ); + move64(); z[2] = CL_msu_j( s[0], s[1] ); + move64(); s[0] = CL_add( y[8], y[13] ); + move64(); s[1] = CL_scale( CL_sub( y[8], y[13] ), FFT_C31 ); + move64(); z[3] = CL_add( y[3], s[0] ); + move64(); s[0] = CL_sub( y[3], CL_shr( s[0], 1 ) ); + move64(); z[13] = CL_mac_j( s[0], s[1] ); + move64(); z[8] = CL_msu_j( s[0], s[1] ); + move64(); s[0] = CL_add( y[9], y[14] ); + move64(); s[1] = CL_scale( CL_sub( y[9], y[14] ), FFT_C31 ); + move64(); z[9] = CL_add( y[4], s[0] ); + move64(); s[0] = CL_sub( y[4], CL_shr( s[0], 1 ) ); + move64(); z[4] = CL_mac_j( s[0], s[1] ); + move64(); z[14] = CL_msu_j( s[0], s[1] ); + move64(); xx[0] = x[15]; + move64(); xx[1] = x[3]; + move64(); xx[2] = x[21]; + move64(); xx[3] = x[9]; + move64(); xx[4] = x[27]; + move64(); xx[5] = x[5]; + move64(); xx[6] = x[23]; + move64(); xx[7] = x[11]; + move64(); xx[8] = x[29]; + move64(); xx[9] = x[17]; + move64(); xx[10] = x[25]; + move64(); xx[11] = x[13]; + move64(); xx[12] = x[1]; + move64(); xx[13] = x[19]; + move64(); xx[14] = x[7]; + move64(); s[0] = CL_add( xx[1], xx[4] ); + move64(); s[3] = CL_sub( xx[1], xx[4] ); + move64(); s[2] = CL_add( xx[2], xx[3] ); + move64(); s[1] = CL_sub( xx[2], xx[3] ); + move64(); t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 ); s[0] = CL_add( s[0], s[2] ); + move64(); y[0] = CL_add( xx[0], s[0] ); + move64(); s[0] = CL_add( y[0], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) ); + move64(); s[2] = CL_sub( s[0], t ); + move64(); s[0] = CL_add( s[0], t ); + move64(); t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 ); s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) ); + move64(); s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) ); + move64(); y[1] = CL_msu_j( s[0], s[1] ); + move64(); y[4] = CL_mac_j( s[0], s[1] ); + move64(); y[2] = CL_mac_j( s[2], s[3] ); + move64(); y[3] = CL_msu_j( s[2], s[3] ); + move64(); s[0] = CL_add( xx[6], xx[9] ); + move64(); s[3] = CL_sub( xx[6], xx[9] ); + move64(); s[2] = CL_add( xx[7], xx[8] ); + move64(); s[1] = CL_sub( xx[7], xx[8] ); + move64(); t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 ); s[0] = CL_add( s[0], s[2] ); + move64(); y[5] = CL_add( xx[5], s[0] ); + move64(); s[0] = CL_add( y[5], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) ); + move64(); s[2] = CL_sub( s[0], t ); + move64(); s[0] = CL_add( s[0], t ); + move64(); t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 ); s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) ); + move64(); s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) ); + move64(); y[6] = CL_msu_j( s[0], s[1] ); + move64(); y[9] = CL_mac_j( s[0], s[1] ); + move64(); y[7] = CL_mac_j( s[2], s[3] ); + move64(); y[8] = CL_msu_j( s[2], s[3] ); + move64(); s[0] = CL_add( xx[11], xx[14] ); + move64(); s[3] = CL_sub( xx[11], xx[14] ); + move64(); s[2] = CL_add( xx[12], xx[13] ); + move64(); s[1] = CL_sub( xx[12], xx[13] ); + move64(); t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 ); s[0] = CL_add( s[0], s[2] ); + move64(); y[10] = CL_add( xx[10], s[0] ); + move64(); s[0] = CL_add( y[10], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) ); + move64(); s[2] = CL_sub( s[0], t ); + move64(); s[0] = CL_add( s[0], t ); + move64(); t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 ); s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) ); + move64(); s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) ); + move64(); y[11] = CL_msu_j( s[0], s[1] ); + move64(); y[14] = CL_mac_j( s[0], s[1] ); + move64(); y[12] = CL_mac_j( s[2], s[3] ); + move64(); y[13] = CL_msu_j( s[2], s[3] ); + move64(); s[0] = CL_add( y[5], y[10] ); + move64(); s[1] = CL_scale( CL_sub( y[5], y[10] ), FFT_C31 ); + move64(); z[15] = CL_add( y[0], s[0] ); + move64(); s[0] = CL_sub( y[0], CL_shr( s[0], 1 ) ); + move64(); z[25] = CL_mac_j( s[0], s[1] ); + move64(); z[20] = CL_msu_j( s[0], s[1] ); + move64(); s[0] = CL_add( y[6], y[11] ); + move64(); s[1] = CL_scale( CL_sub( y[6], y[11] ), FFT_C31 ); + move64(); z[21] = CL_add( y[1], s[0] ); + move64(); s[0] = CL_sub( y[1], CL_shr( s[0], 1 ) ); + move64(); z[16] = CL_mac_j( s[0], s[1] ); + move64(); z[26] = CL_msu_j( s[0], s[1] ); + move64(); s[0] = CL_add( y[7], y[12] ); + move64(); s[1] = CL_scale( CL_sub( y[7], y[12] ), FFT_C31 ); + move64(); z[27] = CL_add( y[2], s[0] ); + move64(); s[0] = CL_sub( y[2], CL_shr( s[0], 1 ) ); + move64(); z[22] = CL_mac_j( s[0], s[1] ); + move64(); z[17] = CL_msu_j( s[0], s[1] ); + move64(); s[0] = CL_add( y[8], y[13] ); + move64(); s[1] = CL_scale( CL_sub( y[8], y[13] ), FFT_C31 ); + move64(); z[18] = CL_add( y[3], s[0] ); + move64(); s[0] = CL_sub( y[3], CL_shr( s[0], 1 ) ); + move64(); z[28] = CL_mac_j( s[0], s[1] ); + move64(); z[23] = CL_msu_j( s[0], s[1] ); + move64(); s[0] = CL_add( y[9], y[14] ); + move64(); s[1] = CL_scale( CL_sub( y[9], y[14] ), FFT_C31 ); + move64(); z[24] = CL_add( y[4], s[0] ); + move64(); s[0] = CL_sub( y[4], CL_shr( s[0], 1 ) ); + move64(); z[19] = CL_mac_j( s[0], s[1] ); + move64(); z[29] = CL_msu_j( s[0], s[1] ); + move64(); s[0] = z[0]; + move64(); s[1] = z[15]; + move64(); *l = CL_add( s[0], s[1] ); + move64(); *h = CL_sub( s[0], s[1] ); + move64(); l += 1, h += 1; s[0] = z[8]; + move64(); s[1] = z[23]; + move64(); *h = CL_add( s[0], s[1] ); + move64(); *l = CL_sub( s[0], s[1] ); + move64(); l += 1, h += 1; s[0] = z[1]; + move64(); s[1] = z[16]; + move64(); *l = CL_add( s[0], s[1] ); + move64(); *h = CL_sub( s[0], s[1] ); + move64(); l += 1, h += 1; s[0] = z[9]; + move64(); s[1] = z[24]; + move64(); *h = CL_add( s[0], s[1] ); + move64(); *l = CL_sub( s[0], s[1] ); + move64(); l += 1, h += 1; s[0] = z[2]; + move64(); s[1] = z[17]; + move64(); *l = CL_add( s[0], s[1] ); + move64(); *h = CL_sub( s[0], s[1] ); + move64(); l += 1, h += 1; s[0] = z[10]; + move64(); s[1] = z[25]; + move64(); *h = CL_add( s[0], s[1] ); + move64(); *l = CL_sub( s[0], s[1] ); + move64(); l += 1, h += 1; s[0] = z[3]; + move64(); s[1] = z[18]; + move64(); *l = CL_add( s[0], s[1] ); + move64(); *h = CL_sub( s[0], s[1] ); + move64(); l += 1, h += 1; s[0] = z[11]; + move64(); s[1] = z[26]; + move64(); *h = CL_add( s[0], s[1] ); + move64(); *l = CL_sub( s[0], s[1] ); + move64(); l += 1, h += 1; s[0] = z[4]; + move64(); s[1] = z[19]; + move64(); *l = CL_add( s[0], s[1] ); + move64(); *h = CL_sub( s[0], s[1] ); + move64(); l += 1, h += 1; s[0] = z[12]; + move64(); s[1] = z[27]; + move64(); *h = CL_add( s[0], s[1] ); + move64(); *l = CL_sub( s[0], s[1] ); + move64(); l += 1, h += 1; s[0] = z[5]; + move64(); s[1] = z[20]; + move64(); *l = CL_add( s[0], s[1] ); + move64(); *h = CL_sub( s[0], s[1] ); + move64(); l += 1, h += 1; s[0] = z[13]; + move64(); s[1] = z[28]; + move64(); *h = CL_add( s[0], s[1] ); + move64(); *l = CL_sub( s[0], s[1] ); + move64(); l += 1, h += 1; s[0] = z[6]; + move64(); s[1] = z[21]; + move64(); *l = CL_add( s[0], s[1] ); + move64(); *h = CL_sub( s[0], s[1] ); + move64(); l += 1, h += 1; s[0] = z[14]; + move64(); s[1] = z[29]; + move64(); *h = CL_add( s[0], s[1] ); + move64(); *l = CL_sub( s[0], s[1] ); + move64(); l += 1, h += 1; s[0] = z[7]; + move64(); s[1] = z[22]; + move64(); *l = CL_add( s[0], s[1] ); + move64(); *h = CL_sub( s[0], s[1] ); + move64(); l += 1, h += 1; return; @@ -4462,333 +5852,577 @@ static void fft_len32( cmplx ab; xx[0] = x[0]; + move64(); xx[1] = x[4]; + move64(); xx[2] = x[8]; + move64(); xx[3] = x[12]; + move64(); xx[4] = x[16]; + move64(); xx[5] = x[20]; + move64(); xx[6] = x[24]; + move64(); xx[7] = x[28]; + move64(); t[0] = CL_add( xx[0], xx[4] ); + move64(); t[1] = CL_sub( xx[0], xx[4] ); + move64(); t[2] = CL_add( xx[1], xx[5] ); + move64(); t[3] = CL_sub( xx[1], xx[5] ); + move64(); t[4] = CL_add( xx[2], xx[6] ); + move64(); t[5] = CL_sub( xx[2], xx[6] ); + move64(); t[6] = CL_add( xx[3], xx[7] ); + move64(); t[7] = CL_sub( xx[3], xx[7] ); + move64(); s[0] = CL_add( t[0], t[4] ); + move64(); s[2] = CL_sub( t[0], t[4] ); + move64(); s[4] = CL_mac_j( t[1], t[5] ); + move64(); s[5] = CL_msu_j( t[1], t[5] ); + move64(); s[1] = CL_add( t[2], t[6] ); + move64(); s[3] = CL_swap_real_imag( CL_sub( CL_conjugate( t[2] ), CL_conjugate( t[6] ) ) ); + move64(); t[0] = CL_swap_real_imag( CL_add( t[3], t[7] ) ); + move64(); t[1] = CL_sub( t[3], t[7] ); + move64(); { s[6] = CL_scale( CL_add( CL_conjugate( t[0] ), t[1] ), FFT_C81 ); + move64(); s[7] = CL_scale( CL_sub( t[0], CL_conjugate( t[1] ) ), FFT_C81 ); + move64(); s[7] = CL_conjugate( s[7] ); + move64(); }; y[0] = CL_add( s[0], s[1] ); + move64(); y[4] = CL_sub( s[0], s[1] ); + move64(); y[2] = CL_sub( s[2], s[3] ); + move64(); y[6] = CL_add( s[2], s[3] ); + move64(); y[3] = CL_add( s[4], s[7] ); + move64(); y[7] = CL_sub( s[4], s[7] ); + move64(); y[1] = CL_add( s[5], s[6] ); + move64(); y[5] = CL_sub( s[5], s[6] ); + move64(); xx[0] = x[1]; + move64(); xx[1] = x[5]; + move64(); xx[2] = x[9]; + move64(); xx[3] = x[13]; + move64(); xx[4] = x[17]; + move64(); xx[5] = x[21]; + move64(); xx[6] = x[25]; + move64(); xx[7] = x[29]; + move64(); t[0] = CL_add( xx[0], xx[4] ); + move64(); t[1] = CL_sub( xx[0], xx[4] ); + move64(); t[2] = CL_add( xx[1], xx[5] ); + move64(); t[3] = CL_sub( xx[1], xx[5] ); + move64(); t[4] = CL_add( xx[2], xx[6] ); + move64(); t[5] = CL_sub( xx[2], xx[6] ); + move64(); t[6] = CL_add( xx[3], xx[7] ); + move64(); t[7] = CL_sub( xx[3], xx[7] ); + move64(); s[0] = CL_add( t[0], t[4] ); + move64(); s[2] = CL_sub( t[0], t[4] ); + move64(); s[4] = CL_mac_j( t[1], t[5] ); + move64(); s[5] = CL_msu_j( t[1], t[5] ); + move64(); s[1] = CL_add( t[2], t[6] ); + move64(); s[3] = CL_swap_real_imag( CL_sub( CL_conjugate( t[2] ), CL_conjugate( t[6] ) ) ); + move64(); t[0] = CL_swap_real_imag( CL_add( t[3], t[7] ) ); + move64(); t[1] = CL_sub( t[3], t[7] ); + move64(); { s[6] = CL_scale( CL_add( CL_conjugate( t[0] ), t[1] ), FFT_C81 ); + move64(); s[7] = CL_scale( CL_sub( t[0], CL_conjugate( t[1] ) ), FFT_C81 ); + move64(); s[7] = CL_conjugate( s[7] ); + move64(); }; y[8] = CL_add( s[0], s[1] ); + move64(); y[12] = CL_sub( s[0], s[1] ); + move64(); y[10] = CL_sub( s[2], s[3] ); + move64(); y[14] = CL_add( s[2], s[3] ); + move64(); y[11] = CL_add( s[4], s[7] ); + move64(); y[15] = CL_sub( s[4], s[7] ); + move64(); y[9] = CL_add( s[5], s[6] ); + move64(); y[13] = CL_sub( s[5], s[6] ); + move64(); xx[0] = x[2]; + move64(); xx[1] = x[6]; + move64(); xx[2] = x[10]; + move64(); xx[3] = x[14]; + move64(); xx[4] = x[18]; + move64(); xx[5] = x[22]; + move64(); xx[6] = x[26]; + move64(); xx[7] = x[30]; + move64(); t[0] = CL_add( xx[0], xx[4] ); + move64(); t[1] = CL_sub( xx[0], xx[4] ); + move64(); t[2] = CL_add( xx[1], xx[5] ); + move64(); t[3] = CL_sub( xx[1], xx[5] ); + move64(); t[4] = CL_add( xx[2], xx[6] ); + move64(); t[5] = CL_sub( xx[2], xx[6] ); + move64(); t[6] = CL_add( xx[3], xx[7] ); + move64(); t[7] = CL_sub( xx[3], xx[7] ); + move64(); s[0] = CL_add( t[0], t[4] ); + move64(); s[2] = CL_sub( t[0], t[4] ); + move64(); s[4] = CL_mac_j( t[1], t[5] ); + move64(); s[5] = CL_msu_j( t[1], t[5] ); + move64(); s[1] = CL_add( t[2], t[6] ); + move64(); s[3] = CL_swap_real_imag( CL_sub( CL_conjugate( t[2] ), CL_conjugate( t[6] ) ) ); + move64(); t[0] = CL_swap_real_imag( CL_add( t[3], t[7] ) ); + move64(); t[1] = CL_sub( t[3], t[7] ); + move64(); { s[6] = CL_scale( CL_add( CL_conjugate( t[0] ), t[1] ), FFT_C81 ); + move64(); s[7] = CL_scale( CL_sub( t[0], CL_conjugate( t[1] ) ), FFT_C81 ); + move64(); s[7] = CL_conjugate( s[7] ); + move64(); }; y[16] = CL_add( s[0], s[1] ); + move64(); y[20] = CL_sub( s[0], s[1] ); + move64(); y[18] = CL_sub( s[2], s[3] ); + move64(); y[22] = CL_add( s[2], s[3] ); + move64(); y[19] = CL_add( s[4], s[7] ); + move64(); y[23] = CL_sub( s[4], s[7] ); + move64(); y[17] = CL_add( s[5], s[6] ); + move64(); y[21] = CL_sub( s[5], s[6] ); + move64(); xx[0] = x[3]; + move64(); xx[1] = x[7]; + move64(); xx[2] = x[11]; + move64(); xx[3] = x[15]; + move64(); xx[4] = x[19]; + move64(); xx[5] = x[23]; + move64(); xx[6] = x[27]; + move64(); xx[7] = x[31]; + move64(); t[0] = CL_add( xx[0], xx[4] ); + move64(); t[1] = CL_sub( xx[0], xx[4] ); + move64(); t[2] = CL_add( xx[1], xx[5] ); + move64(); t[3] = CL_sub( xx[1], xx[5] ); + move64(); t[4] = CL_add( xx[2], xx[6] ); + move64(); t[5] = CL_sub( xx[2], xx[6] ); + move64(); t[6] = CL_add( xx[3], xx[7] ); + move64(); t[7] = CL_sub( xx[3], xx[7] ); + move64(); s[0] = CL_add( t[0], t[4] ); + move64(); s[2] = CL_sub( t[0], t[4] ); + move64(); s[4] = CL_mac_j( t[1], t[5] ); + move64(); s[5] = CL_msu_j( t[1], t[5] ); + move64(); s[1] = CL_add( t[2], t[6] ); + move64(); s[3] = CL_swap_real_imag( CL_sub( CL_conjugate( t[2] ), CL_conjugate( t[6] ) ) ); + move64(); t[0] = CL_swap_real_imag( CL_add( t[3], t[7] ) ); + move64(); t[1] = CL_sub( t[3], t[7] ); + move64(); { s[6] = CL_scale( CL_add( CL_conjugate( t[0] ), t[1] ), FFT_C81 ); + move64(); s[7] = CL_scale( CL_sub( t[0], CL_conjugate( t[1] ) ), FFT_C81 ); + move64(); s[7] = CL_conjugate( s[7] ); + move64(); }; y[24] = CL_add( s[0], s[1] ); + move64(); y[28] = CL_sub( s[0], s[1] ); + move64(); y[26] = CL_sub( s[2], s[3] ); + move64(); y[30] = CL_add( s[2], s[3] ); + move64(); y[27] = CL_add( s[4], s[7] ); + move64(); y[31] = CL_sub( s[4], s[7] ); + move64(); y[25] = CL_add( s[5], s[6] ); + move64(); y[29] = CL_sub( s[5], s[6] ); + move64(); { ab = y[9]; + move64(); y[9] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[0] ), CL_scale( ab, FFT_RotVector_32_fx[1] ) ); + move64(); }; { ab = y[10]; + move64(); y[10] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[2] ), CL_scale( ab, FFT_RotVector_32_fx[3] ) ); + move64(); }; { ab = y[11]; + move64(); y[11] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[4] ), CL_scale( ab, FFT_RotVector_32_fx[5] ) ); + move64(); }; { ab = y[12]; + move64(); y[12] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[6] ), CL_scale( ab, FFT_RotVector_32_fx[7] ) ); + move64(); }; { ab = y[13]; + move64(); y[13] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[8] ), CL_scale( ab, FFT_RotVector_32_fx[9] ) ); + move64(); }; { ab = y[14]; + move64(); y[14] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[10] ), CL_scale( ab, FFT_RotVector_32_fx[11] ) ); + move64(); }; { ab = y[15]; + move64(); y[15] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[12] ), CL_scale( ab, FFT_RotVector_32_fx[13] ) ); + move64(); }; { ab = y[17]; + move64(); y[17] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[14] ), CL_scale( ab, FFT_RotVector_32_fx[15] ) ); + move64(); }; { ab = y[18]; + move64(); y[18] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[16] ), CL_scale( ab, FFT_RotVector_32_fx[17] ) ); + move64(); }; { ab = y[19]; + move64(); y[19] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[18] ), CL_scale( ab, FFT_RotVector_32_fx[19] ) ); + move64(); }; { ab = y[21]; + move64(); y[21] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[20] ), CL_scale( ab, FFT_RotVector_32_fx[21] ) ); + move64(); }; { ab = y[22]; + move64(); y[22] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[22] ), CL_scale( ab, FFT_RotVector_32_fx[23] ) ); + move64(); }; { ab = y[23]; + move64(); y[23] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[24] ), CL_scale( ab, FFT_RotVector_32_fx[25] ) ); + move64(); }; { ab = y[25]; + move64(); y[25] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[26] ), CL_scale( ab, FFT_RotVector_32_fx[27] ) ); + move64(); }; { ab = y[26]; + move64(); y[26] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[28] ), CL_scale( ab, FFT_RotVector_32_fx[29] ) ); + move64(); }; { ab = y[27]; + move64(); y[27] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[30] ), CL_scale( ab, FFT_RotVector_32_fx[31] ) ); + move64(); }; { ab = y[28]; + move64(); y[28] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[32] ), CL_scale( ab, FFT_RotVector_32_fx[33] ) ); + move64(); }; { ab = y[29]; + move64(); y[29] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[34] ), CL_scale( ab, FFT_RotVector_32_fx[35] ) ); + move64(); }; { ab = y[30]; + move64(); y[30] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[36] ), CL_scale( ab, FFT_RotVector_32_fx[37] ) ); + move64(); }; { ab = y[31]; + move64(); y[31] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[38] ), CL_scale( ab, FFT_RotVector_32_fx[39] ) ); + move64(); }; t[0] = CL_add( y[0], y[16] ); + move64(); t[1] = CL_sub( y[0], y[16] ); + move64(); t[2] = CL_add( y[8], y[24] ); + move64(); t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[8] ), CL_conjugate( y[24] ) ) ); + move64(); x[0] = CL_add( t[0], t[2] ); + move64(); x[8] = CL_sub( t[1], t[3] ); + move64(); x[16] = CL_sub( t[0], t[2] ); + move64(); x[24] = CL_add( t[1], t[3] ); + move64(); t[0] = CL_add( y[1], y[17] ); + move64(); t[1] = CL_sub( y[1], y[17] ); + move64(); t[2] = CL_add( y[9], y[25] ); + move64(); t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[9] ), CL_conjugate( y[25] ) ) ); + move64(); x[1] = CL_add( t[0], t[2] ); + move64(); x[9] = CL_sub( t[1], t[3] ); + move64(); x[17] = CL_sub( t[0], t[2] ); + move64(); x[25] = CL_add( t[1], t[3] ); + move64(); t[0] = CL_add( y[2], y[18] ); + move64(); t[1] = CL_sub( y[2], y[18] ); + move64(); t[2] = CL_add( y[10], y[26] ); + move64(); t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[10] ), CL_conjugate( y[26] ) ) ); + move64(); x[2] = CL_add( t[0], t[2] ); + move64(); x[10] = CL_sub( t[1], t[3] ); + move64(); x[18] = CL_sub( t[0], t[2] ); + move64(); x[26] = CL_add( t[1], t[3] ); + move64(); t[0] = CL_add( y[3], y[19] ); + move64(); t[1] = CL_sub( y[3], y[19] ); + move64(); t[2] = CL_add( y[11], y[27] ); + move64(); t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[11] ), CL_conjugate( y[27] ) ) ); + move64(); x[3] = CL_add( t[0], t[2] ); + move64(); x[11] = CL_sub( t[1], t[3] ); + move64(); x[19] = CL_sub( t[0], t[2] ); + move64(); x[27] = CL_add( t[1], t[3] ); + move64(); t[0] = CL_msu_j( y[4], y[20] ); + move64(); t[1] = CL_mac_j( y[4], y[20] ); + move64(); t[2] = CL_add( y[12], y[28] ); + move64(); t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[12] ), CL_conjugate( y[28] ) ) ); + move64(); x[4] = CL_add( t[0], t[2] ); + move64(); x[12] = CL_sub( t[1], t[3] ); + move64(); x[20] = CL_sub( t[0], t[2] ); + move64(); x[28] = CL_add( t[1], t[3] ); + move64(); t[0] = CL_add( y[5], y[21] ); + move64(); t[1] = CL_sub( y[5], y[21] ); + move64(); t[2] = CL_add( y[13], y[29] ); + move64(); t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[13] ), CL_conjugate( y[29] ) ) ); + move64(); x[5] = CL_add( t[0], t[2] ); + move64(); x[13] = CL_sub( t[1], t[3] ); + move64(); x[21] = CL_sub( t[0], t[2] ); + move64(); x[29] = CL_add( t[1], t[3] ); + move64(); t[0] = CL_add( y[6], y[22] ); + move64(); t[1] = CL_sub( y[6], y[22] ); + move64(); t[2] = CL_add( y[14], y[30] ); + move64(); t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[14] ), CL_conjugate( y[30] ) ) ); + move64(); x[6] = CL_add( t[0], t[2] ); + move64(); x[14] = CL_sub( t[1], t[3] ); + move64(); x[22] = CL_sub( t[0], t[2] ); + move64(); x[30] = CL_add( t[1], t[3] ); + move64(); t[0] = CL_add( y[7], y[23] ); + move64(); t[1] = CL_sub( y[7], y[23] ); + move64(); t[2] = CL_add( y[15], y[31] ); + move64(); t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[15] ), CL_conjugate( y[31] ) ) ); + move64(); x[7] = CL_add( t[0], t[2] ); + move64(); x[15] = CL_sub( t[1], t[3] ); + move64(); x[23] = CL_sub( t[0], t[2] ); + move64(); x[31] = CL_add( t[1], t[3] ); + move64(); return; } @@ -4809,8 +6443,10 @@ static void fft_lenN( { FOR( j = 0; j < dim1; j++ ) { - xx[i * dim1 + j].re = x[i + j * dim2].re; - xx[i * dim1 + j].im = x[i + j * dim2].im; + xx[add( i_mult( i, dim1 ), j )].re = x[add( i, i_mult( j, dim2 ) )].re; + move64(); + xx[add( i_mult( i, dim1 ), j )].im = x[add( i, i_mult( j, dim2 ) )].im; + move64(); } } @@ -4819,56 +6455,56 @@ static void fft_lenN( case 5: FOR( i = 0; i < dim2; i++ ) { - fft_len5( &xx[i * dim1] ); + fft_len5( &xx[i_mult( i, dim1 )] ); } BREAK; case 8: FOR( i = 0; i < dim2; i++ ) { - fft_len8( &xx[i * dim1] ); + fft_len8( &xx[i_mult( i, dim1 )] ); } BREAK; case 10: FOR( i = 0; i < dim2; i++ ) { - fft_len10( &xx[i * dim1] ); + fft_len10( &xx[i_mult( i, dim1 )] ); } BREAK; case 15: FOR( i = 0; i < dim2; i++ ) { - fft_len15( &xx[i * dim1] ); + fft_len15( &xx[i_mult( i, dim1 )] ); } BREAK; case 16: FOR( i = 0; i < dim2; i++ ) { - fft_len16( &xx[i * dim1] ); + fft_len16( &xx[i_mult( i, dim1 )] ); } BREAK; case 20: FOR( i = 0; i < dim2; i++ ) { - fft_len20_fx( &xx[i * dim1] ); + fft_len20_fx( &xx[i_mult( i, dim1 )] ); } BREAK; case 30: FOR( i = 0; i < dim2; i++ ) { - fft_len30( &xx[i * dim1] ); + fft_len30( &xx[i_mult( i, dim1 )] ); } BREAK; case 32: FOR( i = 0; i < dim2; i++ ) { - fft_len32( &xx[i * dim1] ); + fft_len32( &xx[i_mult( i, dim1 )] ); } BREAK; } @@ -4882,100 +6518,145 @@ static void fft_lenN( cmplx s[8]; cmplx y[8]; + test(); + test(); + test(); + test(); IF( EQ_16( dim1, 30 ) || EQ_16( dim1, 20 ) || EQ_16( dim1, 15 ) || EQ_16( dim1, 10 ) || EQ_16( dim1, 5 ) ) { FOR( i = 0; i < dim1; i++ ) { { - y[0] = xx[i + 0 * dim1]; + y[0] = xx[i]; // y[0] = xx[i + 0 * dim1] }; - IF( EQ_16( i, 0 ) ) + IF( i == 0 ) { { - y[1] = xx[i + 1 * dim1]; + y[1] = xx[add( i, i_mult( 1, dim1 ) )]; + move64(); }; { - y[2] = xx[i + 2 * dim1]; + y[2] = xx[add( i, i_mult( 2, dim1 ) )]; + move64(); }; { - y[3] = xx[i + 3 * dim1]; + y[3] = xx[add( i, i_mult( 3, dim1 ) )]; + move64(); }; { - y[4] = xx[i + 4 * dim1]; + y[4] = xx[add( i, i_mult( 4, dim1 ) )]; + move64(); }; { - y[5] = xx[i + 5 * dim1]; + y[5] = xx[add( i, i_mult( 5, dim1 ) )]; + move64(); }; { - y[6] = xx[i + 6 * dim1]; + y[6] = xx[add( i, i_mult( 6, dim1 ) )]; + move64(); }; { - y[7] = xx[i + 7 * dim1]; + y[7] = xx[add( i, i_mult( 7, dim1 ) )]; + move64(); }; } ELSE { { - y[1] = CL_mac_j( CL_scale( xx[i + 1 * dim1], W[sc * i + sc * 1 * dim1 * 2 - Woff] ), - CL_scale( xx[i + 1 * dim1], W[sc * i + sc * 1 * dim1 * 2 + 1 - Woff] ) ); + y[1] = CL_mac_j( CL_scale( xx[add( i, i_mult( 1, dim1 ) )], W[sub( add( i_mult( sc, i ), shl( i_mult( i_mult( sc, 1 ), dim1 ), 1 ) ), Woff )] ), + CL_scale( xx[add( i, i_mult( 1, dim1 ) )], W[sub( add( add( i_mult( sc, i ), shl( i_mult( i_mult( sc, 1 ), dim1 ), 1 ) ), 1 ), Woff )] ) ); + move64(); }; { - y[2] = CL_mac_j( CL_scale( xx[i + 2 * dim1], W[sc * i + sc * 2 * dim1 * 2 - Woff] ), - CL_scale( xx[i + 2 * dim1], W[sc * i + sc * 2 * dim1 * 2 + 1 - Woff] ) ); + y[2] = CL_mac_j( CL_scale( xx[add( i, i_mult( 2, dim1 ) )], W[sub( add( i_mult( sc, i ), shl( i_mult( i_mult( sc, 2 ), dim1 ), 1 ) ), Woff )] ), + CL_scale( xx[add( i, i_mult( 2, dim1 ) )], W[sub( add( add( i_mult( sc, i ), shl( i_mult( i_mult( sc, 2 ), dim1 ), 1 ) ), 1 ), Woff )] ) ); + move64(); }; { - y[3] = CL_mac_j( CL_scale( xx[i + 3 * dim1], W[sc * i + sc * 3 * dim1 * 2 - Woff] ), - CL_scale( xx[i + 3 * dim1], W[sc * i + sc * 3 * dim1 * 2 + 1 - Woff] ) ); + y[3] = CL_mac_j( CL_scale( xx[add( i, i_mult( 3, dim1 ) )], W[sub( add( i_mult( sc, i ), shl( i_mult( i_mult( sc, 3 ), dim1 ), 1 ) ), Woff )] ), + CL_scale( xx[add( i, i_mult( 3, dim1 ) )], W[sub( add( add( i_mult( sc, i ), shl( i_mult( i_mult( sc, 3 ), dim1 ), 1 ) ), 1 ), Woff )] ) ); + move64(); }; { - y[4] = CL_mac_j( CL_scale( xx[i + 4 * dim1], W[sc * i + sc * 4 * dim1 * 2 - Woff] ), - CL_scale( xx[i + 4 * dim1], W[sc * i + sc * 4 * dim1 * 2 + 1 - Woff] ) ); + y[4] = CL_mac_j( CL_scale( xx[add( i, i_mult( 4, dim1 ) )], W[sub( add( i_mult( sc, i ), shl( i_mult( i_mult( sc, 4 ), dim1 ), 1 ) ), Woff )] ), + CL_scale( xx[add( i, i_mult( 4, dim1 ) )], W[sub( add( add( i_mult( sc, i ), shl( i_mult( i_mult( sc, 4 ), dim1 ), 1 ) ), 1 ), Woff )] ) ); + move64(); }; { - y[5] = CL_mac_j( CL_scale( xx[i + 5 * dim1], W[sc * i + sc * 5 * dim1 * 2 - Woff] ), - CL_scale( xx[i + 5 * dim1], W[sc * i + sc * 5 * dim1 * 2 + 1 - Woff] ) ); + y[5] = CL_mac_j( CL_scale( xx[add( i, i_mult( 5, dim1 ) )], W[sub( add( i_mult( sc, i ), shl( i_mult( i_mult( sc, 5 ), dim1 ), 1 ) ), Woff )] ), + CL_scale( xx[add( i, i_mult( 5, dim1 ) )], W[sub( add( add( i_mult( sc, i ), shl( i_mult( i_mult( sc, 5 ), dim1 ), 1 ) ), 1 ), Woff )] ) ); + move64(); }; { - y[6] = CL_mac_j( CL_scale( xx[i + 6 * dim1], W[sc * i + sc * 6 * dim1 * 2 - Woff] ), - CL_scale( xx[i + 6 * dim1], W[sc * i + sc * 6 * dim1 * 2 + 1 - Woff] ) ); + y[6] = CL_mac_j( CL_scale( xx[add( i, i_mult( 6, dim1 ) )], W[sub( add( i_mult( sc, i ), shl( i_mult( i_mult( sc, 6 ), dim1 ), 1 ) ), Woff )] ), + CL_scale( xx[add( i, i_mult( 6, dim1 ) )], W[sub( add( add( i_mult( sc, i ), shl( i_mult( i_mult( sc, 6 ), dim1 ), 1 ) ), 1 ), Woff )] ) ); + move64(); }; { - y[7] = CL_mac_j( CL_scale( xx[i + 7 * dim1], W[sc * i + sc * 7 * dim1 * 2 - Woff] ), - CL_scale( xx[i + 7 * dim1], W[sc * i + sc * 7 * dim1 * 2 + 1 - Woff] ) ); + y[7] = CL_mac_j( CL_scale( xx[add( i, i_mult( 7, dim1 ) )], W[sub( add( i_mult( sc, i ), shl( i_mult( i_mult( sc, 7 ), dim1 ), 1 ) ), Woff )] ), + CL_scale( xx[add( i, i_mult( 7, dim1 ) )], W[sub( add( add( i_mult( sc, i ), shl( i_mult( i_mult( sc, 7 ), dim1 ), 1 ) ), 1 ), Woff )] ) ); + move64(); }; } t[0] = CL_add( y[0], y[4] ); + move64(); t[1] = CL_sub( y[0], y[4] ); + move64(); t[2] = CL_add( y[1], y[5] ); + move64(); t[3] = CL_sub( y[1], y[5] ); + move64(); t[4] = CL_add( y[2], y[6] ); + move64(); t[5] = CL_sub( y[2], y[6] ); + move64(); t[6] = CL_add( y[3], y[7] ); + move64(); t[7] = CL_sub( y[3], y[7] ); + move64(); s[0] = CL_add( t[0], t[4] ); + move64(); s[2] = CL_sub( t[0], t[4] ); + move64(); s[4] = CL_mac_j( t[1], t[5] ); + move64(); s[5] = CL_msu_j( t[1], t[5] ); + move64(); s[1] = CL_add( t[2], t[6] ); + move64(); s[3] = CL_swap_real_imag( CL_sub( CL_conjugate( t[2] ), CL_conjugate( t[6] ) ) ); + move64(); t[0] = CL_swap_real_imag( CL_add( t[3], t[7] ) ); + move64(); t[1] = CL_sub( t[3], t[7] ); + move64(); s[6] = CL_scale( CL_add( CL_conjugate( t[0] ), t[1] ), FFT_C81 ); + move64(); s[7] = CL_scale( CL_sub( t[0], CL_conjugate( t[1] ) ), FFT_C81 ); + move64(); s[7] = CL_conjugate( s[7] ); - - x[i + 0 * dim1] = CL_add( s[0], s[1] ); - x[i + 1 * dim1] = CL_add( s[5], s[6] ); - x[i + 2 * dim1] = CL_sub( s[2], s[3] ); - x[i + 3 * dim1] = CL_add( s[4], s[7] ); - x[i + 4 * dim1] = CL_sub( s[0], s[1] ); - x[i + 5 * dim1] = CL_sub( s[5], s[6] ); - x[i + 6 * dim1] = CL_add( s[2], s[3] ); - x[i + 7 * dim1] = CL_sub( s[4], s[7] ); + move64(); + + x[i] = CL_add( s[0], s[1] ); /*x[add(i + i_mult(0 , dim1)] = CL_add( s[0], s[1] )*/ + move64(); + x[add( i, i_mult( 1, dim1 ) )] = CL_add( s[5], s[6] ); + move64(); + x[add( i, i_mult( 2, dim1 ) )] = CL_sub( s[2], s[3] ); + move64(); + x[add( i, i_mult( 3, dim1 ) )] = CL_add( s[4], s[7] ); + move64(); + x[add( i, i_mult( 4, dim1 ) )] = CL_sub( s[0], s[1] ); + move64(); + x[add( i, i_mult( 5, dim1 ) )] = CL_sub( s[5], s[6] ); + move64(); + x[add( i, i_mult( 6, dim1 ) )] = CL_add( s[2], s[3] ); + move64(); + x[add( i, i_mult( 7, dim1 ) )] = CL_sub( s[4], s[7] ); + move64(); } } ELSE @@ -4983,95 +6664,137 @@ static void fft_lenN( FOR( i = 0; i < dim1; i++ ) { { - y[0] = xx[i + 0 * dim1]; + y[0] = xx[i]; /* y[0] = xx[i + 0 * dim1] */ + move64(); }; - IF( EQ_16( i, 0 ) ) + IF( i == 0 ) { { - y[1] = xx[i + 1 * dim1]; + y[1] = xx[add( i, i_mult( 1, dim1 ) )]; + move64(); }; { - y[2] = xx[i + 2 * dim1]; + y[2] = xx[add( i, i_mult( 2, dim1 ) )]; + move64(); }; { - y[3] = xx[i + 3 * dim1]; + y[3] = xx[add( i, i_mult( 3, dim1 ) )]; + move64(); }; { - y[4] = xx[i + 4 * dim1]; + y[4] = xx[add( i, i_mult( 4, dim1 ) )]; + move64(); }; { - y[5] = xx[i + 5 * dim1]; + y[5] = xx[add( i, i_mult( 5, dim1 ) )]; + move64(); }; { - y[6] = xx[i + 6 * dim1]; + y[6] = xx[add( i, i_mult( 6, dim1 ) )]; + move64(); }; { - y[7] = xx[i + 7 * dim1]; + y[7] = xx[add( i, i_mult( 7, dim1 ) )]; + move64(); }; } ELSE { { - y[1] = CL_mac_j( CL_scale( xx[i + 1 * dim1], W[sc * i + sc * 1 * dim1 - Woff] ), - CL_scale( xx[i + 1 * dim1], W[sc * i + sc * 1 * dim1 + 1 - Woff] ) ); + y[1] = CL_mac_j( CL_scale( xx[add( i, i_mult( 1, dim1 ) )], W[sub( add( i_mult( sc, i ), i_mult( i_mult( sc, 1 ), dim1 ) ), Woff )] ), + CL_scale( xx[add( i, i_mult( 1, dim1 ) )], W[sub( add( add( i_mult( sc, i ), i_mult( i_mult( sc, 1 ), dim1 ) ), 1 ), Woff )] ) ); + move64(); }; { - y[2] = CL_mac_j( CL_scale( xx[i + 2 * dim1], W[sc * i + sc * 2 * dim1 - Woff] ), - CL_scale( xx[i + 2 * dim1], W[sc * i + sc * 2 * dim1 + 1 - Woff] ) ); + y[2] = CL_mac_j( CL_scale( xx[add( i, i_mult( 2, dim1 ) )], W[sub( add( i_mult( sc, i ), i_mult( i_mult( sc, 2 ), dim1 ) ), Woff )] ), + CL_scale( xx[add( i, i_mult( 2, dim1 ) )], W[sub( add( add( i_mult( sc, i ), i_mult( i_mult( sc, 2 ), dim1 ) ), 1 ), Woff )] ) ); + move64(); }; { - y[3] = CL_mac_j( CL_scale( xx[i + 3 * dim1], W[sc * i + sc * 3 * dim1 - Woff] ), - CL_scale( xx[i + 3 * dim1], W[sc * i + sc * 3 * dim1 + 1 - Woff] ) ); + y[3] = CL_mac_j( CL_scale( xx[add( i, i_mult( 3, dim1 ) )], W[sub( add( i_mult( sc, i ), i_mult( i_mult( sc, 3 ), dim1 ) ), Woff )] ), + CL_scale( xx[add( i, i_mult( 3, dim1 ) )], W[sub( add( add( i_mult( sc, i ), i_mult( i_mult( sc, 3 ), dim1 ) ), 1 ), Woff )] ) ); + move64(); }; { - y[4] = CL_mac_j( CL_scale( xx[i + 4 * dim1], W[sc * i + sc * 4 * dim1 - Woff] ), - CL_scale( xx[i + 4 * dim1], W[sc * i + sc * 4 * dim1 + 1 - Woff] ) ); + y[4] = CL_mac_j( CL_scale( xx[add( i, i_mult( 4, dim1 ) )], W[sub( add( i_mult( sc, i ), i_mult( i_mult( sc, 4 ), dim1 ) ), Woff )] ), + CL_scale( xx[add( i, i_mult( 4, dim1 ) )], W[sub( add( add( i_mult( sc, i ), i_mult( i_mult( sc, 4 ), dim1 ) ), 1 ), Woff )] ) ); + move64(); }; { - y[5] = CL_mac_j( CL_scale( xx[i + 5 * dim1], W[sc * i + sc * 5 * dim1 - Woff] ), - CL_scale( xx[i + 5 * dim1], W[sc * i + sc * 5 * dim1 + 1 - Woff] ) ); + y[5] = CL_mac_j( CL_scale( xx[add( i, i_mult( 5, dim1 ) )], W[sub( add( i_mult( sc, i ), i_mult( i_mult( sc, 5 ), dim1 ) ), Woff )] ), + CL_scale( xx[add( i, i_mult( 5, dim1 ) )], W[sub( add( add( i_mult( sc, i ), i_mult( i_mult( sc, 5 ), dim1 ) ), 1 ), Woff )] ) ); + move64(); }; { - y[6] = CL_mac_j( CL_scale( xx[i + 6 * dim1], W[sc * i + sc * 6 * dim1 - Woff] ), - CL_scale( xx[i + 6 * dim1], W[sc * i + sc * 6 * dim1 + 1 - Woff] ) ); + y[6] = CL_mac_j( CL_scale( xx[add( i, i_mult( 6, dim1 ) )], W[sub( add( i_mult( sc, i ), i_mult( i_mult( sc, 6 ), dim1 ) ), Woff )] ), + CL_scale( xx[add( i, i_mult( 6, dim1 ) )], W[sub( add( add( i_mult( sc, i ), i_mult( i_mult( sc, 6 ), dim1 ) ), 1 ), Woff )] ) ); + move64(); }; { - y[7] = CL_mac_j( CL_scale( xx[i + 7 * dim1], W[sc * i + sc * 7 * dim1 - Woff] ), - CL_scale( xx[i + 7 * dim1], W[sc * i + sc * 7 * dim1 + 1 - Woff] ) ); + y[7] = CL_mac_j( CL_scale( xx[add( i, i_mult( 7, dim1 ) )], W[sub( add( i_mult( sc, i ), i_mult( i_mult( sc, 7 ), dim1 ) ), Woff )] ), + CL_scale( xx[add( i, i_mult( 7, dim1 ) )], W[sub( add( add( i_mult( sc, i ), i_mult( i_mult( sc, 7 ), dim1 ) ), 1 ), Woff )] ) ); + move64(); }; } t[0] = CL_add( y[0], y[4] ); + move64(); t[1] = CL_sub( y[0], y[4] ); + move64(); t[2] = CL_add( y[1], y[5] ); + move64(); t[3] = CL_sub( y[1], y[5] ); + move64(); t[4] = CL_add( y[2], y[6] ); + move64(); t[5] = CL_sub( y[2], y[6] ); + move64(); t[6] = CL_add( y[3], y[7] ); + move64(); t[7] = CL_sub( y[3], y[7] ); + move64(); s[0] = CL_add( t[0], t[4] ); + move64(); s[2] = CL_sub( t[0], t[4] ); + move64(); s[4] = CL_mac_j( t[1], t[5] ); + move64(); s[5] = CL_msu_j( t[1], t[5] ); + move64(); s[1] = CL_add( t[2], t[6] ); + move64(); s[3] = CL_swap_real_imag( CL_sub( CL_conjugate( t[2] ), CL_conjugate( t[6] ) ) ); + move64(); t[0] = CL_swap_real_imag( CL_add( t[3], t[7] ) ); + move64(); t[1] = CL_sub( t[3], t[7] ); + move64(); s[6] = CL_scale( CL_add( CL_conjugate( t[0] ), t[1] ), FFT_C81 ); + move64(); s[7] = CL_scale( CL_sub( t[0], CL_conjugate( t[1] ) ), FFT_C81 ); + move64(); s[7] = CL_conjugate( s[7] ); - - x[i + 0 * dim1] = CL_add( s[0], s[1] ); - x[i + 1 * dim1] = CL_add( s[5], s[6] ); - x[i + 2 * dim1] = CL_sub( s[2], s[3] ); - x[i + 3 * dim1] = CL_add( s[4], s[7] ); - x[i + 4 * dim1] = CL_sub( s[0], s[1] ); - x[i + 5 * dim1] = CL_sub( s[5], s[6] ); - x[i + 6 * dim1] = CL_add( s[2], s[3] ); - x[i + 7 * dim1] = CL_sub( s[4], s[7] ); + move64(); + + x[i] = CL_add( s[0], s[1] ); /*x[i + 0 * dim1] = CL_add( s[0], s[1] )*/ + move64(); + x[add( i, i_mult( 1, dim1 ) )] = CL_add( s[5], s[6] ); + move64(); + x[add( i, i_mult( 2, dim1 ) )] = CL_sub( s[2], s[3] ); + move64(); + x[add( i, i_mult( 3, dim1 ) )] = CL_add( s[4], s[7] ); + move64(); + x[add( i, i_mult( 4, dim1 ) )] = CL_sub( s[0], s[1] ); + move64(); + x[add( i, i_mult( 5, dim1 ) )] = CL_sub( s[5], s[6] ); + move64(); + x[add( i, i_mult( 6, dim1 ) )] = CL_add( s[2], s[3] ); + move64(); + x[add( i, i_mult( 7, dim1 ) )] = CL_sub( s[4], s[7] ); + move64(); } } BREAK; @@ -5083,32 +6806,35 @@ static void fft_lenN( FOR( j = 0; j < dim2; j++ ) { { - y[j] = xx[0 + j * dim1]; + y[j] = xx[0 + i_mult( j, dim1 )]; /*y[j] = xx[0 + j * dim1];*/ }; } fft_len10( &y[0] ); FOR( j = 0; j < dim2; j++ ) { - x[0 + j * dim1] = y[j]; + x[0 + i_mult( j, dim1 )] = y[j]; + move64(); } FOR( i = 1; i < dim1; i++ ) { { y[( 0 + 0 )] = xx[i + ( 0 + 0 ) * dim1]; + move64(); } FOR( j = 1; j < dim2; j++ ) { { - y[( j + 0 )] = CL_mac_j( CL_scale( xx[i + ( j + 0 ) * dim1], W[sc * i + sc * j * dim1 - Woff] ), - CL_scale( xx[i + ( j + 0 ) * dim1], W[sc * i + sc * j * dim1 + 1 - Woff] ) ); + y[( j + 0 )] = CL_mac_j( CL_scale( xx[add( i, i_mult( ( j + 0 ), dim1 ) )], W[sub( add( i_mult( sc, i ), i_mult( i_mult( sc, j ), dim1 ) ), Woff )] ), + CL_scale( xx[add( i, i_mult( ( j + 0 ), dim1 ) )], W[sub( add( add( i_mult( sc, i ), i_mult( i_mult( sc, j ), dim1 ) ), 1 ), Woff )] ) ); + move64(); } } fft_len10( &y[0] ); FOR( j = 0; j < dim2; j++ ) { - x[i + j * dim1] = y[j]; + x[add( i, i_mult( j, dim1 ) )] = y[j]; } } BREAK; @@ -5120,14 +6846,15 @@ static void fft_lenN( FOR( j = 0; j < dim2; j++ ) { { - y[j] = xx[0 + j * dim1]; + y[j] = xx[0 + i_mult( j, dim1 )]; + move64(); }; } fft_len16( &y[0] ); FOR( j = 0; j < dim2; j++ ) { - x[0 + j * dim1] = y[j]; + x[0 + i_mult( j, dim1 )] = y[j]; } FOR( i = 1; i < dim1; i++ ) @@ -5139,14 +6866,16 @@ static void fft_lenN( FOR( j = 1; j < dim2; j++ ) { { - y[( j + 0 )] = CL_mac_j( CL_scale( xx[i + ( j + 0 ) * dim1], W[sc * i + sc * j * dim1 - Woff] ), - CL_scale( xx[i + ( j + 0 ) * dim1], W[sc * i + sc * j * dim1 + 1 - Woff] ) ); + y[( j + 0 )] = CL_mac_j( CL_scale( xx[add( i, i_mult( ( j + 0 ), dim1 ) )], W[sub( add( i_mult( sc, i ), i_mult( i_mult( sc, j ), dim1 ) ), Woff )] ), + CL_scale( xx[add( i, i_mult( ( j + 0 ), dim1 ) )], W[sub( add( add( i_mult( sc, i ), i_mult( i_mult( sc, j ), dim1 ) ), 1 ), Woff )] ) ); + move64(); } } fft_len16( &y[0] ); FOR( j = 0; j < dim2; j++ ) { - x[i + j * dim1] = y[j]; + x[add( i, i_mult( j, dim1 ) )] = y[j]; + move64(); } } BREAK; @@ -5158,40 +6887,47 @@ static void fft_lenN( FOR( j = 0; j < dim2; j++ ) { { - y[j] = xx[0 + j * dim1]; + y[j] = xx[0 + i_mult( j, dim1 )]; + move64(); }; } fft_len20_fx( &y[0] ); FOR( j = 0; j < dim2; j++ ) { - x[0 + j * dim1] = y[j]; + x[0 + i_mult( j, dim1 )] = y[j]; + move64(); } FOR( i = 1; i < dim1; i++ ) { { - y[( 0 + 0 )] = xx[i + ( 0 + 0 ) * dim1]; + y[( 0 + 0 )] = xx[add( i, ( 0 + 0 ) * dim1 )]; + move64(); } { - y[( 0 + 1 )] = CL_mac_j( CL_scale( xx[i + ( 0 + 1 ) * dim1], W[len + sc * i + 0 * dim1 - Woff] ), - CL_scale( xx[i + ( 0 + 1 ) * dim1], W[len + sc * i + 0 * dim1 + 1 - Woff] ) ); + y[( 0 + 1 )] = CL_mac_j( CL_scale( xx[add( i, i_mult( ( 0 + 1 ), dim1 ) )], W[sub( add( len, i_mult( sc, i ) ) + 0 * dim1, Woff )] ), + CL_scale( xx[add( i, i_mult( ( 0 + 1 ), dim1 ) )], W[sub( add( add( len, i_mult( sc, i ) ) + 0 * dim1, 1 ), Woff )] ) ); + move64(); } FOR( j = 2; j < dim2; j = j + 2 ) { { - y[( j + 0 )] = CL_mac_j( CL_scale( xx[i + ( j + 0 ) * dim1], W[sc * i + j * dim1 - Woff] ), - CL_scale( xx[i + ( j + 0 ) * dim1], W[sc * i + j * dim1 + 1 - Woff] ) ); + y[( j + 0 )] = CL_mac_j( CL_scale( xx[add( i, i_mult( ( j + 0 ), dim1 ) )], W[sub( add( i_mult( sc, i ), i_mult( j, dim1 ) ), Woff )] ), + CL_scale( xx[add( i, i_mult( ( j + 0 ), dim1 ) )], W[sub( add( add( i_mult( sc, i ), i_mult( j, dim1 ) ), 1 ), Woff )] ) ); + move64(); } { - y[( j + 1 )] = CL_mac_j( CL_scale( xx[i + ( j + 1 ) * dim1], W[len + sc * i + j * dim1 - Woff] ), - CL_scale( xx[i + ( j + 1 ) * dim1], W[len + sc * i + j * dim1 + 1 - Woff] ) ); + y[( j + 1 )] = CL_mac_j( CL_scale( xx[add( i, i_mult( ( j + 1 ), dim1 ) )], W[sub( add( add( len, i_mult( sc, i ) ), i_mult( j, dim1 ) ), Woff )] ), + CL_scale( xx[add( i, i_mult( ( j + 1 ), dim1 ) )], W[sub( add( add( add( len, i_mult( sc, i ) ), i_mult( j, dim1 ) ), 1 ), Woff )] ) ); + move64(); } } fft_len20_fx( &y[0] ); FOR( j = 0; j < dim2; j++ ) { x[i + j * dim1] = y[j]; + move64(); } } BREAK; @@ -5204,39 +6940,46 @@ static void fft_lenN( { { y[j] = xx[0 + j * dim1]; + move64(); }; } fft_len32( &y[0] ); FOR( j = 0; j < dim2; j++ ) { x[0 + j * dim1] = y[j]; + move64(); } FOR( i = 1; i < dim1; i++ ) { { y[( 0 + 0 )] = xx[i + ( 0 + 0 ) * dim1]; + move64(); } { - y[( 0 + 1 )] = CL_mac_j( CL_scale( xx[i + ( 0 + 1 ) * dim1], W[len + sc * i + 0 * dim1 - Woff] ), - CL_scale( xx[i + ( 0 + 1 ) * dim1], W[len + sc * i + 0 * dim1 + 1 - Woff] ) ); + y[( 0 + 1 )] = CL_mac_j( CL_scale( xx[add( i, i_mult( ( 0 + 1 ), dim1 ) )], W[sub( add( len, i_mult( sc, i ) ) + 0 * dim1, Woff )] ), + CL_scale( xx[add( i, i_mult( ( 0 + 1 ), dim1 ) )], W[sub( add( add( len, i_mult( sc, i ) ) + 0 * dim1, 1 ), Woff )] ) ); + move64(); } FOR( j = 2; j < dim2; j = j + 2 ) { { - y[( j + 0 )] = CL_mac_j( CL_scale( xx[i + ( j + 0 ) * dim1], W[sc * i + j * dim1 - Woff] ), - CL_scale( xx[i + ( j + 0 ) * dim1], W[sc * i + j * dim1 + 1 - Woff] ) ); + y[( j + 0 )] = CL_mac_j( CL_scale( xx[add( i, i_mult( ( j + 0 ), dim1 ) )], W[sub( add( i_mult( sc, i ), i_mult( j, dim1 ) ), Woff )] ), + CL_scale( xx[add( i, i_mult( ( j + 0 ), dim1 ) )], W[sub( add( add( i_mult( sc, i ), i_mult( j, dim1 ) ), 1 ), Woff )] ) ); + move64(); } { - y[( j + 1 )] = CL_mac_j( CL_scale( xx[i + ( j + 1 ) * dim1], W[len + sc * i + j * dim1 - Woff] ), - CL_scale( xx[i + ( j + 1 ) * dim1], W[len + sc * i + j * dim1 + 1 - Woff] ) ); + y[( j + 1 )] = CL_mac_j( CL_scale( xx[add( i, i_mult( ( j + 1 ), dim1 ) )], W[sub( add( add( len, i_mult( sc, i ) ), i_mult( j, dim1 ) ), Woff )] ), + CL_scale( xx[add( i, i_mult( ( j + 1 ), dim1 ) )], W[sub( add( add( add( len, i_mult( sc, i ) ), i_mult( j, dim1 ) ), 1 ), Woff )] ) ); + move64(); } } fft_len32( &y[0] ); FOR( j = 0; j < dim2; j++ ) { - x[i + j * dim1] = y[j]; + x[add( i, i_mult( j, dim1 ) )] = y[j]; + move64(); } } BREAK; @@ -5260,10 +7003,12 @@ void fft_fx( { cmplx x[960]; - FOR( Word32 j = 0; j < length; j++ ) + FOR( Word16 j = 0; j < length; j++ ) { - x[j].re = re[s * j]; - x[j].im = im[s * j]; + x[j].re = re[i_mult( s, j )]; + move32(); + x[j].im = im[i_mult( s, j )]; + move32(); } SWITCH( length ) @@ -5323,10 +7068,12 @@ void fft_fx( assert( !"fft length is not supported!" ); } - FOR( Word32 j = 0; j < length; j++ ) + FOR( Word16 j = 0; j < length; j++ ) { - re[s * j] = x[j].re; - im[s * j] = x[j].im; + re[i_mult( s, j )] = x[j].re; + move32(); + im[i_mult( s, j )] = x[j].im; + move32(); } return; @@ -5343,33 +7090,45 @@ void rfft_fx( Word32 tmp, t1, t2, t3, t4; Word16 s1, s2; - sizeOfFft2 = length >> 1; - sizeOfFft4 = length >> 2; + sizeOfFft2 = shr( length, 1 ); + sizeOfFft4 = shr( length, 2 ); SWITCH( sizeOfFft2 ) { case 80: s1 = 409; + move16(); s2 = -409; + move16(); BREAK; case 128: s1 = 256; + move16(); s2 = -256; + move16(); BREAK; case 160: s1 = 204; + move16(); s2 = -204; + move16(); BREAK; case 320: s1 = 102; + move16(); s2 = -102; + move16(); BREAK; case 480: s1 = 68; + move16(); s2 = -68; + move16(); BREAK; default: s1 = -1; + move16(); s2 = -1; + move16(); printf( "Configuration not supported" ); assert( 0 ); } @@ -5383,21 +7142,27 @@ void rfft_fx( tmp = L_add( x[0], x[1] ); x[1] = L_sub( x[0], x[1] ); + move32(); x[0] = tmp; + move32(); FOR( i = 1; i <= sizeOfFft4; i++ ) { - t1 = L_sub( x[2 * i], x[length - 2 * i] ); - t2 = L_add( x[2 * i + 1], x[length - 2 * i + 1] ); + t1 = L_sub( x[2 * i], x[sub( length, shl( i, 1 ) )] ); + t2 = L_add( x[add( shl( i, 1 ), 1 )], x[add( sub( length, shl( i, 1 ) ), 1 )] ); t3 = L_sub( Mpy_32_16_1( t1, w[i] ), Mpy_32_16_1( t2, w[i + sizeOfFft4] ) ); t4 = L_add( Mpy_32_16_1( t1, w[i + sizeOfFft4] ), Mpy_32_16_1( t2, w[i] ) ); - t1 = L_add( x[2 * i], x[length - 2 * i] ); - t2 = L_sub( x[2 * i + 1], x[length - 2 * i + 1] ); + t1 = L_add( x[2 * i], x[sub( length, shl( i, 1 ) )] ); + t2 = L_sub( x[add( shl( i, 1 ), 1 )], x[add( sub( length, shl( i, 1 ) ), 1 )] ); x[2 * i] = Mpy_32_16_1( L_sub( t1, t3 ), 16384 ); - x[2 * i + 1] = Mpy_32_16_1( L_sub( t2, t4 ), 16384 ); - x[length - 2 * i] = Mpy_32_16_1( L_add( t1, t3 ), 16384 ); - x[length - 2 * i + 1] = Mpy_32_16_1( L_negate( L_add( t2, t4 ) ), 16384 ); + move32(); + x[add( shl( i, 1 ), 1 )] = Mpy_32_16_1( L_sub( t2, t4 ), 16384 ); + move32(); + x[sub( length, shl( i, 1 ) )] = Mpy_32_16_1( L_add( t1, t3 ), 16384 ); + move32(); + x[add( sub( length, shl( i, 1 ) ), 1 )] = Mpy_32_16_1( L_negate( L_add( t2, t4 ) ), 16384 ); + move32(); } BREAK; @@ -5406,21 +7171,27 @@ void rfft_fx( tmp = Mpy_32_16_1( L_add( x[0], x[1] ), 16384 ); x[1] = Mpy_32_16_1( L_sub( x[1], x[0] ), 16384 ); + move32(); x[0] = tmp; + move32(); FOR( i = 1; i <= sizeOfFft4; i++ ) { - t1 = L_sub( x[2 * i], x[length - 2 * i] ); - t2 = L_add( x[2 * i + 1], x[length - 2 * i + 1] ); - t3 = L_add( Mpy_32_16_1( t1, w[i] ), Mpy_32_16_1( t2, w[i + sizeOfFft4] ) ); - t4 = L_sub( Mpy_32_16_1( t2, w[i] ), Mpy_32_16_1( t1, w[i + sizeOfFft4] ) ); - t1 = L_add( x[2 * i], x[length - 2 * i] ); - t2 = L_sub( x[2 * i + 1], x[length - 2 * i + 1] ); + t1 = L_sub( x[2 * i], x[sub( length, shl( i, 1 ) )] ); + t2 = L_add( x[add( shl( i, 1 ), 1 )], x[add( sub( length, shl( i, 1 ) ), 1 )] ); + t3 = L_add( Mpy_32_16_1( t1, w[i] ), Mpy_32_16_1( t2, w[add( i, sizeOfFft4 )] ) ); + t4 = L_sub( Mpy_32_16_1( t2, w[i] ), Mpy_32_16_1( t1, w[add( i, sizeOfFft4 )] ) ); + t1 = L_add( x[2 * i], x[sub( length, shl( i, 1 ) )] ); + t2 = L_sub( x[add( shl( i, 1 ), 1 )], x[add( sub( length, shl( i, 1 ) ), 1 )] ); x[2 * i] = Mpy_32_16_1( L_sub( t1, t3 ), 16384 ); - x[2 * i + 1] = Mpy_32_16_1( L_sub( t4, t2 ), 16384 ); - x[length - 2 * i] = Mpy_32_16_1( L_add( t1, t3 ), 16384 ); - x[length - 2 * i + 1] = Mpy_32_16_1( L_add( t2, t4 ), 16384 ); + move32(); + x[add( shl( i, 1 ), 1 )] = Mpy_32_16_1( L_sub( t4, t2 ), 16384 ); + move32(); + x[sub( length, shl( i, 1 ) )] = Mpy_32_16_1( L_add( t1, t3 ), 16384 ); + move32(); + x[add( sub( length, shl( i, 1 ) ), 1 )] = Mpy_32_16_1( L_add( t2, t4 ), 16384 ); + move32(); } fft_fx( x, x + 1, sizeOfFft2, 2 ); @@ -5428,7 +7199,9 @@ void rfft_fx( FOR( i = 0; i < length; i += 2 ) { x[i] = Mpy_32_16_1( x[i], s1 ); + move32(); x[i + 1] = Mpy_32_16_1( x[i + 1], s2 ); + move32(); } BREAK; @@ -5439,28 +7212,45 @@ void rfft_fx( Word16 find_guarded_bits_fx( Word32 n ) { - return n <= 1 ? 0 : n <= 2 ? 1 - : n <= 4 ? 2 - : n <= 8 ? 3 - : n <= 16 ? 4 - : n <= 32 ? 5 - : n <= 64 ? 6 - : n <= 128 ? 7 - : n <= 256 ? 8 - : n <= 512 ? 9 - : n <= 1024 ? 10 - : n <= 2048 ? 11 - : n <= 4096 ? 12 - : n <= 8192 ? 13 - : n <= 16384 ? 14 - : 15; + // return n <= 1 ? 0 : n <= 2 ? 1 + // : n <= 4 ? 2 + // : n <= 8 ? 3 + // : n <= 16 ? 4 + // : n <= 32 ? 5 + // : n <= 64 ? 6 + // : n <= 128 ? 7 + // : n <= 256 ? 8 + // : n <= 512 ? 9 + // : n <= 1024 ? 10 + // : n <= 2048 ? 11 + // : n <= 4096 ? 12 + // : n <= 8192 ? 13 + // : n <= 16384 ? 14 + // : 15; + /*Word16 val = 0; + move32(); + test(); + WHILE( GT_32( n, L_shl( 1, val ) ) && LT_32( val, 16 ) ) + { + val = add( val, 1 ); + }*/ + IF( LE_32( n, 1 ) ) + { + return 0; + } + ELSE + { + + return sub( 31, norm_l( L_sub( n, 1 ) ) ); + } } Word16 L_norm_arr( Word32 *arr, Word16 size ) { Word16 q = 31; - FOR( int i = 0; i < size; i++ ) - if ( arr[i] != 0 ) + move16(); + FOR( Word16 i = 0; i < size; i++ ) + IF( arr[i] != 0 ) { q = s_min( q, norm_l( arr[i] ) ); } @@ -5470,15 +7260,17 @@ Word16 L_norm_arr( Word32 *arr, Word16 size ) Word16 get_min_scalefactor( Word32 x, Word32 y ) { Word16 scf = Q31; - IF( EQ_32( x, 0 ) && EQ_32( y, 0 ) ) + move16(); + test(); + IF( x == 0 && y == 0 ) { return 0; } - IF( NE_32( x, 0 ) ) + IF( x != 0 ) { scf = s_min( scf, norm_l( x ) ); } - IF( NE_32( y, 0 ) ) + IF( y != 0 ) { scf = s_min( scf, norm_l( y ) ); } @@ -5487,8 +7279,8 @@ Word16 get_min_scalefactor( Word32 x, Word32 y ) Flag is_zero_arr( Word32 *arr, Word16 size ) { - FOR( int i = 0; i < size; i++ ) - if ( arr[i] != 0 ) + FOR( Word16 i = 0; i < size; i++ ) + IF( arr[i] != 0 ) { return 0; } diff --git a/lib_com/tcq_position_arith_fx.c b/lib_com/tcq_position_arith_fx.c index 12ed38196..459c4d362 100644 --- a/lib_com/tcq_position_arith_fx.c +++ b/lib_com/tcq_position_arith_fx.c @@ -78,15 +78,17 @@ static void bitstream_save_bit( cur = (UWord8) ( cur | L_shl( bit, pBS->curPos ) ); move16(); pBS->curPos = sub( pBS->curPos, 1 ); + move16(); pBS->buf[pBS->numByte] = cur; move16(); pBS->numbits = L_add( pBS->numbits, 1 ); - + move32(); IF( pBS->curPos < 0 ) { pBS->curPos = 7; move16(); pBS->numByte = L_add( pBS->numByte, 1 ); + move32(); } return; @@ -105,15 +107,13 @@ static UWord32 bitstream_load_bit( } curPos = &pBS->curPos; - move16(); - bit = ( ( pBS->buf[pBS->numByte] >> ( *curPos )-- ) & 0x00000001 ); - move16(); - move16(); - move16(); + bit = UL_and( UL_lshr( pBS->buf[pBS->numByte] , ( *curPos ) ) , 0x00000001 ); + *curPos = sub(*curPos, 1); IF( *curPos < 0 ) { pBS->numByte = L_add( pBS->numByte, 1 ); + move32(); *curPos = 7; move16(); } @@ -131,11 +131,13 @@ static void bitstream_rollback( pBS->curPos++; move16(); pBS->numbits = L_sub( pBS->numbits, 1 ); + move32(); IF( EQ_16( pBS->curPos, 8 ) ) { pBS->curPos = 0; move16(); pBS->numByte = L_sub( pBS->numByte, 1 ); + move32(); } } @@ -147,6 +149,7 @@ static void transmission_bits( { bitstream_save_bit( arInst->bsInst, bit ); arInst->num_bits = L_add( arInst->num_bits, 1 ); + move32(); bit = !bit; move32(); @@ -154,6 +157,7 @@ static void transmission_bits( { bitstream_save_bit( arInst->bsInst, bit ); arInst->num_bits = L_add( arInst->num_bits, 1 ); + move32(); } return; @@ -166,7 +170,9 @@ static Word32 ar_make_model_fx( { Word16 dist; Word32 sum = 0; + move32(); Word32 cum = 0; + move32(); Word16 i, tmp; FOR( i = 0; i < len; i++ ) @@ -183,7 +189,7 @@ static Word32 ar_make_model_fx( { /*model[i] = (short)( ( cum * MAX_AR_FREQ ) / sum ); */ model[i] = extract_l( ar_div( cum * MAX_AR_FREQ, sum ) ); - + move16(); if ( i ) cum = L_add( cum, freq[i - 1] ); } @@ -223,12 +229,14 @@ void ar_encoder_start_fx( move32(); arInst->low = L_deposit_l( 0 ); + move32(); arInst->high = AR_TOP; move32(); arInst->bits_to_follow = 0; move16(); arInst->num_bits = L_deposit_l( 0 ); + move32(); arInst->max_bits = max_bits; move32(); } @@ -246,8 +254,8 @@ static void ar_encode_fx( symbol = L_add( symbol, 1 ); range = L_add( L_sub( high, low ), 1 ); - high = L_sub( L_add( low, ar_div( range * model[symbol - 1], model[0] ) ), 1 ); - low = L_add( low, ar_div( range * model[symbol], model[0] ) ); + high = L_sub( L_add( low, ar_div( imult3216( range, model[symbol - 1] ), model[0] ) ), 1 ); + low = L_add( low, ar_div( imult3216( range, model[symbol] ), model[0] ) ); FOR( ;; ) { @@ -330,13 +338,15 @@ void ar_decoder_start_fx( move32(); arInst->low = L_deposit_l( 0 ); + move32(); arInst->high = AR_TOP; move32(); arInst->value = L_deposit_l( 0 ); - + move32(); FOR( i = 0; i < AR_BITS; i++ ) { arInst->value = L_add( L_shl( arInst->value, 1 ), bitstream_load_bit( arInst->bsInst ) ); + move32(); } return; @@ -356,7 +366,7 @@ static Word16 ar_decode_fx( range = L_add( L_sub( high, low ), 1 ); /*cum = (short)( ( ( (unsigned int)( arInst->value - arInst->low ) + 1 ) * model[0] - 1 ) / range ); */ - cum = extract_l( ar_div( L_sub( L_add( L_sub( value, low ), 1 ) * model[0], 1 ), range ) ); + cum = extract_l( ar_div( L_sub( imult3216( L_add( L_sub( value, low ), 1 ), model[0] ), 1 ), range ) ); symbol = 1; move16(); @@ -365,8 +375,8 @@ static Word16 ar_decode_fx( symbol = add( symbol, 1 ); } - high = L_sub( L_add( low, ar_div( range * model[symbol - 1], model[0] ) ), 1 ); - low = L_add( low, ar_div( range * model[symbol], model[0] ) ); + high = L_sub( L_add( low, ar_div( imult3216( range, model[symbol - 1] ), model[0] ) ), 1 ); + low = L_add( low, ar_div( imult3216( range, model[symbol] ), model[0] ) ); FOR( i = 0; i < 0x7FFF; i++ ) { @@ -396,7 +406,7 @@ static Word16 ar_decode_fx( arInst->value = value; move32(); - return ( symbol - 1 ); + return ( sub(symbol , 1) ); } void ar_decoder_done_fx( @@ -472,7 +482,7 @@ static Word32 GetBitsFromPulses_fx( exp1 = norm_l( pow_getbitsfrompulses_fx[temp_fx1] ); exp2 = norm_l( frac_fx32 ); frac_fx32 = Mult_32_32( L_shl( pow_getbitsfrompulses_fx[temp_fx1], exp1 ), L_shl( frac_fx32, exp2 ) ); /*21 + exp1 + 30 + exp2 - 31 */ - frac_fx32 = L_shr( frac_fx32, exp1 + exp2 ) + 1; /*20 */ + frac_fx32 = L_add( L_shr( frac_fx32, add( exp1, exp2 ) ), 1 ); /*20 */ IF( LT_16( exp, integer_fx ) ) { @@ -488,8 +498,7 @@ static Word32 GetBitsFromPulses_fx( } IF( GE_32( mantissa_fx, 0x200000 ) ) { - exp++; - move16(); + exp = add( exp, 1 ); mantissa_fx = L_shr( mantissa_fx, 1 ); } @@ -528,7 +537,7 @@ static void TCQnew_fx( { Word16 i, st, dminpos, position; Word16 pulsesnum, nzpos = 0; - + move16(); Word32 metric_fx[STATES][TCQ_MAX_BAND_SIZE]; Word16 path_fx[STATES][TCQ_MAX_BAND_SIZE]; Word16 quant_fx[STATES][TCQ_MAX_BAND_SIZE]; @@ -570,7 +579,7 @@ static void TCQnew_fx( exp = norm_l( scale_fx ); tmp32 = L_shl( scale_fx, exp ); tmp32 = Mult_32_16( v_fx[i], extract_h( tmp32 ) ); /*12 + 20 + exp - 16 - 15 + Qscale */ - exp1 = 26 - ( exp - 19 + Qscale ); + exp1 = sub( 26, add( sub( exp, 19 ), Qscale ) ); exp2 = norm_l( tmp32 ); IF( GE_16( exp2, exp1 ) ) { @@ -659,7 +668,7 @@ static void TCQnew_fx( move16(); if ( v_fx[i - 1] <= 0 ) { - vout_fx[i - 1] = -quant_fx[position][i]; + vout_fx[i - 1] = negate(quant_fx[position][i]); move16(); } position = path_fx[position][i]; @@ -720,10 +729,13 @@ Word32 GetISCScale_fx( Word16 SafeExp; Word32 magnbits_fx = 0, tcqmagnbits_fx /*, surplus_fx, bits_fx*/; + move32(); Word16 prob0_fx, prob1_fx, num, denum, quantum1_fx, quantum2_fx; Word32 dmin_fx, scale_fx32; Word32 actualt_fx; Word32 pt_fx, sxy_fx = 0, sy2_fx = 0; + move32(); + move32(); Word16 pn_fx, g_fx, scale_fx; Word16 tmp16, exp, exp1, exp2, Q_temp, hi, lo; Word32 m_fx, tmp32; @@ -733,8 +745,15 @@ Word32 GetISCScale_fx( Word16 Qscale; Word16 Qsxy = 4; - exp = 0; /* to avoid compilation warnings */ + move32(); + move32(); + move32(); + move16(); + + exp = 0; /* to avoid compilation warnings */ + move16(); Qscale = 0; /* to avoid compilation warnings */ + move16(); set32_fx( dist_fx, 0, size ); set16_fx( exp_dist, 31, size ); @@ -794,15 +813,16 @@ Word32 GetISCScale_fx( exp2 = norm_l( m_fx ); scale_fx = div_s( shl( pulsesnum, exp1 ), extract_h( L_shl( m_fx, exp2 ) ) ); /*15 + exp1 - (exp2 + 12 - 16) */ - exp = 15 + exp1 - ( exp2 + 12 - 16 ) + 2; + exp = add( sub( add( 15, exp1 ), sub( add( exp2, 12 ), 16 ) ), 2 ); move16(); t_fx = 0; move16(); FOR( i = 0; i < size; i++ ) { - tmp32 = Mult_32_16( aquants_fx[i], scale_fx ); /*12 + exp - 15 */ - tmp32 = L_shl( tmp32, sub( 16, 12 + exp - 15 ) ); /*16 */ + tmp32 = Mult_32_16( aquants_fx[i], scale_fx ); /*12 + exp - 15 */ + tmp32 = L_shl( tmp32, sub( 16, add( 12 - 15, exp ) ) ); /*16 */ magn_fx[i] = extract_h( L_add( 32768, tmp32 ) ); + move16(); t_fx = add( t_fx, magn_fx[i] ); } } @@ -837,8 +857,8 @@ Word32 GetISCScale_fx( { FOR( i = 0; i < size; i++ ) { - sxy_fx = L_add( sxy_fx, L_shl( Mult_32_16( aquants_fx[i], magn_fx[i] ), Qsxy + 3 ) ); /* 12+0-15 +9 -> 6 */ - sy2_fx = L_add( sy2_fx, L_mult0( magn_fx[i], magn_fx[i] ) ); /*0 */ + sxy_fx = L_add( sxy_fx, L_shl( Mult_32_16( aquants_fx[i], magn_fx[i] ), add( Qsxy, 3 ) ) ); /* 12+0-15 +9 -> 6 */ + sy2_fx = L_add( sy2_fx, L_mult0( magn_fx[i], magn_fx[i] ) ); /*0 */ } Q_temp = 32; move16(); @@ -876,20 +896,21 @@ Word32 GetISCScale_fx( { tmp16 = shl( tmp16, exp2 ); } - g_fx = div_s( extract_h( tmp32 ), tmp16 ); /*15 + 12 + exp1 - 16 - exp2; */ - exp = 15 + 12 + exp1 - 16 - exp2 - 2; + g_fx = div_s( extract_h( tmp32 ), tmp16 ); /*15 + 12 + exp1 - 16 - exp2; */ + exp = add( 15 + 12 - 16 - 2, sub( exp1, exp2 ) ); /*exp = 15 + 12 + exp1 - 16 - exp2 - 2*/ move16(); } IF( g_fx == 0 ) { dist_fx[i] = L_deposit_l( 0 ); + move32(); } ELSE { IF( direction > 0 ) { - tmp32 = L_add( sxy_fx, L_shr( aquants_fx[i], 12 - Qsxy ) ); /*Qsxy */ + tmp32 = L_add( sxy_fx, L_shr( aquants_fx[i], sub( 12, Qsxy ) ) ); /*Qsxy */ t32 = L_add( sy2_fx, L_add( 1, L_deposit_l( shl( magn_fx[i], 1 ) ) ) ); IF( LT_16( norm_l( t32 ), 15 ) ) @@ -906,7 +927,7 @@ Word32 GetISCScale_fx( } ELSE { - tmp32 = L_sub( sxy_fx, L_shr( aquants_fx[i], 12 - Qsxy ) ); /*Qsxy */ + tmp32 = L_sub( sxy_fx, L_shr( aquants_fx[i], sub( 12, Qsxy ) ) ); /*Qsxy */ t32 = L_add( sy2_fx, L_sub( 1, L_deposit_l( shl( magn_fx[i], 1 ) ) ) ); SafeExp = norm_l( t32 ); @@ -923,7 +944,7 @@ Word32 GetISCScale_fx( } } #ifdef BASOP_NOGLOB - tmp32 = L_shl_sat( tmp32, 1 - SafeExp ); /* *2 */ + tmp32 = L_shl_sat( tmp32, sub( 1, SafeExp ) ); /* *2 */ tmp32 = L_sub_sat( L_shl_sat( L_mult0( g_fx, tmp16 ), sub( Qsxy, exp ) ), tmp32 ); /*Qsxy */ #else tmp32 = L_shl( tmp32, 1 - SafeExp ); /* *2 */ @@ -931,7 +952,7 @@ Word32 GetISCScale_fx( #endif dist_fx[i] = Mult_32_16( tmp32, g_fx ); /*Qsxy + exp - 15 */ move32(); - exp_dist[i] = add( Qsxy - 15, exp ); + exp_dist[i] = add( sub( Qsxy, 15 ), exp ); move16(); if ( LT_16( exp_dist[i], Q_temp ) ) { @@ -1020,7 +1041,7 @@ Word32 GetISCScale_fx( exp2 = sub( norm_l( pulsesnum ), 1 ); lo = L_Extract_lc( L_shl( actualt_fx, exp1 ), &hi ); scale_fx32 = Div_32( L_shl( pulsesnum, exp2 ), hi, lo ); /*31 + exp2 - exp1 - 12 */ - Qscale = 31 + exp2 - exp1 - 12 + 2; + Qscale = add( 31 - 12 + 2, sub( exp2, exp1 ) ); /*31 + exp2 - exp1 - 12 + 2*/ move16(); } @@ -1172,8 +1193,7 @@ Word32 GetISCScale_fx( exp1 = sub( norm_s( num ), 1 ); exp2 = norm_s( denum ); prob1_fx = div_s( shl( num, exp1 ), shl( denum, exp2 ) ); /*15 + exp1 - exp2 */ - exp = 15 + exp1 - exp2; - move16(); + exp = add( 15, sub( exp1, exp2 ) ); prob1_fx = shl( prob1_fx, sub( 15, exp ) ); prob0_fx = sub( MAX_16, prob1_fx ); } @@ -1209,8 +1229,7 @@ Word32 GetISCScale_fx( } } - leftnz--; - move16(); + leftnz = sub( leftnz, 1 ); leftp = sub( leftp, abs_s( magn_fx[i] ) ); } @@ -1218,9 +1237,9 @@ Word32 GetISCScale_fx( } /* Update actual occurred surplus */ - tcqmagnbits_fx = L_sub( L_sub( table_logcum_fx[pulsescurr], table_logcum_fx[nzposcurr] ), table_logcum_fx[pulsescurr - ( nzposcurr - 1 )] ); + tcqmagnbits_fx = L_sub( L_sub( table_logcum_fx[pulsescurr], table_logcum_fx[nzposcurr] ), table_logcum_fx[sub( pulsescurr, sub( nzposcurr, 1 ) )] ); *surplus_fx = L_add( *surplus_fx, L_sub( tcqmagnbits_fx, L_shl( magnbits_fx, 1 ) ) ); - + move32(); *nzpout = nzposcurr; move16(); } /*magnitude coding */ @@ -1236,7 +1255,7 @@ Word32 GetISCScale_fx( exp2 = sub( norm_l( pulsesnum ), 1 ); lo = L_Extract_lc( L_shl( actualt_fx, exp1 ), &hi ); scale_fx32 = Div_32( L_shl( pulsesnum, exp2 ), hi, lo ); /*31 + exp2 - exp1 - 12 */ - Qscale = 31 + exp2 - exp1 - 12; + Qscale = add( 31 - 12, sub( exp2, exp1 ) ); /*31 + exp2 - exp1 - 12*/ move16(); } test(); @@ -1247,7 +1266,7 @@ Word32 GetISCScale_fx( FOR( i = 0; i < size; i++ ) { - if ( abs_s( magn_fx[i] ) > 1 ) + if ( GT_16( abs_s( magn_fx[i] ), 1 ) ) { flag_g1 = 1; move16(); @@ -1270,14 +1289,15 @@ Word32 GetISCScale_fx( move16(); /*sbuffer[*bcount] = scale_fx32;*/ exp = norm_s( pulsesnum ); - tmp16 = div_l( actualt_fx, shl( pulsesnum, exp - 1 ) ); + tmp16 = div_l( actualt_fx, shl( pulsesnum, sub( exp, 1 ) ) ); tmp32 = L_shl( L_deposit_l( tmp16 ), exp ); /*sbuffer[*bcount] = 1/((float)tmp32/pow(2.0, 12));*/ sbuffer[*bcount] = tmp32; /*Q12*/ move32(); /*sbuffer[*bcount] = (float)scale_fx32 / pow(2.0, Qscale);*/ - ( *bcount )++; + ( *bcount ) = add( *bcount, 1 ); + move16(); } } } @@ -1297,6 +1317,7 @@ void TCQLSB_fx( { Word16 i, st, dminpos, position; Word16 q_fx = 6554; /*Q15*/ + move16(); Word32 dmin_fx, curdist1_fx, curdist2_fx, newdist1_fx, newdist2_fx; Word16 path[STATES_LSB][TCQ_LSB_SIZE]; Word16 quant[STATES_LSB][TCQ_LSB_SIZE]; @@ -1338,9 +1359,9 @@ void TCQLSB_fx( exp2 = norm_l( s2_fx ); s1_fx16 = extract_h( L_shl( s1_fx, exp1 ) ); /*12 + exp1 - 16*/ s2_fx16 = extract_h( L_shl( s2_fx, exp2 ) ); /*12 + exp2 - 16*/ - exp1 = 12 + exp1 - 16; + exp1 = add( 12 - 16, exp1 ); move16(); - exp2 = 12 + exp2 - 16; + exp2 = add( 12 - 16, exp2 ); move16(); a1_fx = L_add( abuffer_fx[i], 0 ); @@ -1407,36 +1428,36 @@ void TCQLSB_fx( /* decision */ IF( LT_32( L_add( curdist1_fx, newdist1_fx ), L_add( curdist2_fx, newdist2_fx ) ) ) { - path[st][i / 2 + 1] = step_LSB[st][0]; + path[st][add( shr( i, 1 ), 1 )] = step_LSB[st][0]; move16(); - metric_fx[st][i / 2 + 1] = L_add( curdist1_fx, newdist1_fx ); + metric_fx[st][add( shr( i, 1 ), 1 )] = L_add( curdist1_fx, newdist1_fx ); move32(); - quant[st][i / 2 + 1] = 0; + quant[st][add( shr( i, 1 ), 1 )] = 0; move16(); - dquant[st][i / 2 + 1] = dqnt_LSB[step_LSB[st][0]][st]; + dquant[st][add( shr( i, 1 ), 1 )] = dqnt_LSB[step_LSB[st][0]][st]; move16(); } ELSE { - path[st][i / 2 + 1] = step_LSB[st][1]; + path[st][add( shr( i, 1 ), 1 )] = step_LSB[st][1]; move16(); - metric_fx[st][i / 2 + 1] = L_add( curdist2_fx, newdist2_fx ); + metric_fx[st][add( shr( i, 1 ), 1 )] = L_add( curdist2_fx, newdist2_fx ); move32(); - quant[st][i / 2 + 1] = 1; + quant[st][add( shr( i, 1 ), 1 )] = 1; move16(); - dquant[st][i / 2 + 1] = dqnt_LSB[step_LSB[st][0]][st]; + dquant[st][add( shr( i, 1 ), 1 )] = dqnt_LSB[step_LSB[st][0]][st]; move16(); } - if ( GT_32( MaxPath, metric_fx[st][i / 2 + 1] ) ) + if ( GT_32( MaxPath, metric_fx[st][add( shr( i, 1 ), 1 )] ) ) { - MaxPath = L_add( metric_fx[st][i / 2 + 1], 0 ); + MaxPath = L_add( metric_fx[st][add( shr( i, 1 ), 1 )], 0 ); } } /* Metric renormalization to prevent overflow */ FOR( st = 0; st < 4; st++ ) { - metric_fx[st][i / 2 + 1] = L_sub( metric_fx[st][i / 2 + 1], MaxPath ); + metric_fx[st][add( shr( i, 1 ), 1 )] = L_sub( metric_fx[st][add( shr( i, 1 ), 1 )], MaxPath ); move32(); } } @@ -1461,9 +1482,9 @@ void TCQLSB_fx( FOR( ; i >= 0; i -= 2 ) { - qout[i / 2] = quant[position][i / 2 + 1]; + qout[i / 2] = quant[position][add( shr( i, 1 ), 1 )]; move16(); - dpath[i / 2] = dquant[position][i / 2 + 1]; + dpath[i / 2] = dquant[position][add( shr( i, 1 ), 1 )]; move16(); IF( s_and( denc_LSB[position][qout[i / 2]], 0x1 ) ) @@ -1487,7 +1508,7 @@ void TCQLSB_fx( move16(); } - position = path[position][i / 2 + 1]; + position = path[position][add( shr( i, 1 ), 1 )]; move16(); } @@ -1495,6 +1516,7 @@ void TCQLSB_fx( FOR( i = 0; i < bcount; i++ ) { mbuffer_fx[i] = add( add( mbuffer_fx[i], shl( mbuffer_fx[i], 2 ) ), dbuffer_fx[i] ); + move16(); } return; @@ -1506,8 +1528,9 @@ void TCQLSBdec_fx( { /*float q = QTCQ;*/ Word16 q = 1; /*x5*/ + move16(); Word16 i, tmp, state = 0; - + move16(); tmp = shr( bcount, 1 ); FOR( i = 0; i < tmp; i++ ) { @@ -1519,16 +1542,18 @@ void TCQLSBdec_fx( ELSE { mbuffer[2 * i] = negate( q ); + move16(); } IF( s_and( ddec_LSB[state][dpath[i]], 0x2 ) ) { - mbuffer[2 * i + 1] = q; + mbuffer[add( shl( i, 1 ), 1 )] = q; move16(); } ELSE { - mbuffer[2 * i + 1] = negate( q ); + mbuffer[add( shl( i, 1 ), 1 )] = negate( q ); + move16(); } state = dstep_LSB[state][dpath[i]]; @@ -1544,7 +1569,8 @@ void RestoreTCQ_fx( Word16 *mbuffer ) { Word16 i, nzpos = 0, flag_g1 = 0; - + move16(); + move16(); /* calculate actual nz positions */ nzpos = 0; move16(); @@ -1576,6 +1602,7 @@ void RestoreTCQ_fx( magn[i] = mbuffer[*bcount]; move16(); *bcount = add( *bcount, 1 ); + move16(); } } } @@ -1589,7 +1616,8 @@ void RestoreTCQdec_fx( Word16 *mbuffer ) { Word16 i, nzpos = 0, flag_g1 = 0; - + move16(); + move16(); /* calculate actual nz positions */ nzpos = 0; move16(); @@ -1604,6 +1632,7 @@ void RestoreTCQdec_fx( move16(); } magn[i] = extract_l( L_mult0( magn[i], 5 ) ); + move16(); } } @@ -1624,6 +1653,7 @@ void RestoreTCQdec_fx( magn[i] = mbuffer[*bcount]; move16(); *bcount = add( *bcount, 1 ); + move16(); } } } @@ -1661,6 +1691,7 @@ void LoadTCQdata_fx( FOR( i = 0; i < bcount; i++ ) { dpath[i] = ar_decode_fx( arInst, uniform_model ); + move16(); } return; @@ -1675,12 +1706,15 @@ Word32 encode_position_ari_fx( { Word16 i, tmp; Word16 nz = 0, pulses = 0; + move16(); + move16(); Word16 prob[TCQ_MAX_BAND_SIZE]; Word16 model_num_nz[TCQ_MAX_BAND_SIZE]; Word16 *cur_quants = quants; Word16 integer, frac; Word32 /*est_bits_frame_fx, */ btcq_fx = 0, bits_fx = 0, pnzp_fx; - + move16(); + move16(); Word32 cp, scp, fxone, fxp1; Word16 pos; @@ -1704,26 +1738,26 @@ Word32 encode_position_ari_fx( btcq_fx = GetBitsFromPulses_fx( pulses, size ); /* Estimate TCQ bits */ - bits_fx = L_sub( table_logcum_fx[size + 1], L_add( table_logcum_fx[nz + 1], table_logcum_fx[size - nz + 1] ) ); - bits_fx = L_add( bits_fx, L_sub( btcq_fx, L_sub( table_logcum_fx[size + 1], L_add( table_logcum_fx[nz + 1], table_logcum_fx[size - nz + 1] ) ) ) ); - bits_fx = L_sub( bits_fx, L_sub( table_logcum_fx[pulses], L_add( table_logcum_fx[nz], table_logcum_fx[pulses - ( nz - 1 )] ) ) ); + bits_fx = L_sub( table_logcum_fx[size + 1], L_add( table_logcum_fx[nz + 1], table_logcum_fx[add( sub( size, nz ), 1 )] ) ); + bits_fx = L_add( bits_fx, L_sub( btcq_fx, L_sub( table_logcum_fx[size + 1], L_add( table_logcum_fx[nz + 1], table_logcum_fx[add(sub( size, nz ) , 1)] ) ) ) ); + bits_fx = L_sub( bits_fx, L_sub( table_logcum_fx[pulses], L_add( table_logcum_fx[nz], table_logcum_fx[add( pulses, sub( nz, 1 ) )] ) ) ); bits_fx = L_sub( bits_fx, nz ); *est_bits_frame_fx = L_add( *est_bits_frame_fx, bits_fx ); - + move32(); /*caculate the #nz probability */ tmp = s_min( pulses, size ); FOR( i = 0; i < tmp; i++ ) { pnzp_fx = L_sub( L_deposit_h( add( i, 1 ) ), btcq_fx ); - pnzp_fx = L_add( pnzp_fx, L_add( L_sub( table_logcum_fx[size + 1], L_add( table_logcum_fx[i + 2], table_logcum_fx[size - i] ) ), - L_sub( table_logcum_fx[pulses], L_add( table_logcum_fx[i + 1], table_logcum_fx[pulses - i] ) ) ) ); + pnzp_fx = L_add( pnzp_fx, L_add( L_sub( table_logcum_fx[size + 1], L_add( table_logcum_fx[i + 2], table_logcum_fx[sub( size, i )] ) ), + L_sub( table_logcum_fx[pulses], L_add( table_logcum_fx[i + 1], table_logcum_fx[sub( pulses, i )] ) ) ) ); pnzp_fx = L_add( pnzp_fx, 917498 ); /*16 */ IF( GT_32( pnzp_fx, 0 ) ) { integer = extract_h( pnzp_fx ); frac = extract_l( L_shr( L_sub( pnzp_fx, L_deposit_h( integer ) ), 1 ) ); /*15 */ prob[i] = extract_h( L_shl( Pow2( integer, frac ), 16 ) ); /*0 */ - + move16(); /*zero probability will incur problems in ar_make_model() */ if ( prob[i] == 0 ) { @@ -1742,7 +1776,7 @@ Word32 encode_position_ari_fx( IF( GT_16( nz, 1 ) ) { - ar_encode_fx( parenc, model_num_nz, nz - 1 ); /*encode #nz */ + ar_encode_fx( parenc, model_num_nz, sub( nz, 1 ) ); /*encode #nz */ scp = L_add( fxp1, 0 ); pos = 0; move16(); @@ -1759,11 +1793,11 @@ Word32 encode_position_ari_fx( } ELSE { - cp = L_sub( fxone, div_l( L_deposit_h( nz ), ( size - i ) ) ); + cp = L_sub( fxone, div_l( L_deposit_h( nz ), sub( size, i ) ) ); } scp = Mult_32_16( scp, extract_l( cp ) ); model_num_nz[pos + 1] = round_fx( L_shl( scp, 6 ) ); - + move16(); test(); test(); IF( ( model_num_nz[pos + 1] == 0 && scp > 0 ) || model_num_nz[pos] == model_num_nz[pos + 1] ) @@ -1806,8 +1840,8 @@ Word32 encode_position_ari_fx( move16(); FOR( i = 0; i < size; i++ ) { - model_num_nz[i + 1] = round_fx( L_shr( L_deposit_h( div_l( L_deposit_h( size - i - 1 ), size ) ), 1 ) ); - + model_num_nz[i + 1] = round_fx( L_shr( L_deposit_h( div_l( L_deposit_h( sub( sub( size, i ), 1 ) ), size ) ), 1 ) ); + move16(); if ( cur_quants[i] != 0 ) { pos = i; @@ -1839,7 +1873,7 @@ Word32 encode_magnitude_usq_fx( /*estimate fac bits */ - bits_fx = L_sub( table_logcum_fx[npulses], L_add( table_logcum_fx[nzpos], table_logcum_fx[npulses - nzpos + 1] ) ); + bits_fx = L_sub( table_logcum_fx[npulses], L_add( table_logcum_fx[nzpos], table_logcum_fx[add( sub( npulses, nzpos ), 1 )] ) ); *est_frame_bits_fx = L_add( *est_frame_bits_fx, bits_fx ); test(); @@ -1860,11 +1894,13 @@ Word32 encode_magnitude_usq_fx( tmp = sub( abs_s( magn_fx[i] ), 1 ); FOR( j = 0; j < tmp; j++ ) { - magn_position[k++] = 0; + magn_position[k] = 0; move16(); + k = add( k, 1 ); } - magn_position[k++] = 1; + magn_position[k] = 1; move16(); + k = add( k, 1 ); } } @@ -1892,7 +1928,7 @@ Word32 encode_magnitude_usq_fx( } scp = Mult_32_16( scp, extract_l( cp ) ); model_m[pos + 1] = round_fx( L_shl( scp, 6 ) ); - + move16(); test(); test(); IF( ( model_m[pos + 1] == 0 && scp > 0 ) || model_m[pos] == model_m[pos + 1] ) @@ -1903,8 +1939,7 @@ Word32 encode_magnitude_usq_fx( ar_encode_fx( parenc, model_m, pos ); pos = 0; move16(); - i--; - move16(); + i = sub( i, 1 ); scp = L_add( fxp1, 0 ); CONTINUE; } @@ -1914,17 +1949,16 @@ Word32 encode_magnitude_usq_fx( ar_encode_fx( parenc, model_m, pos ); pos = 0; move16(); - magnzp--; + magnzp = sub( magnzp, 1 ); move16(); scp = L_add( fxp1, 0 ); } ELSE { - pos++; - move16(); + pos = add( pos, 1 ); } - magnp--; + magnp = sub( magnp, 1 ); move16(); } return bits_fx; @@ -1945,6 +1979,9 @@ Word32 encode_magnitude_tcq_fx( Word16 i, j; Word32 st; Word16 magn_mode[3] = { MAX_AR_FREQ, 0, 0 }; + move16(); + move16(); + move16(); Word16 leftp = npulses; /*pulsesnum; */ Word16 leftnz = nzpos; /*nzpos; */ @@ -1952,8 +1989,9 @@ Word32 encode_magnitude_tcq_fx( move16(); bits_fx = L_deposit_l( 0 ); - tcq_bits_fx = L_sub( table_logcum_fx[npulses], L_add( table_logcum_fx[nzpos], table_logcum_fx[npulses - ( nzpos - 1 )] ) ); + tcq_bits_fx = L_sub( table_logcum_fx[npulses], L_add( table_logcum_fx[nzpos], table_logcum_fx[sub( npulses, sub( nzpos, 1 ) )] ) ); *est_frame_bits_fx = L_add( *est_frame_bits_fx, tcq_bits_fx ); + move32(); test(); IF( EQ_16( nzpos, npulses ) || EQ_16( nzpos, 1 ) ) @@ -1989,7 +2027,7 @@ Word32 encode_magnitude_tcq_fx( exp1 = sub( norm_s( num ), 1 ); exp2 = norm_s( denum ); prob1_fx = div_s( shl( num, exp1 ), shl( denum, exp2 ) ); /*15 + exp1 - exp2 */ - exp = 15 + exp1 - exp2; + exp = add( 15, sub( exp1, exp2 ) ); move16(); prob1_fx = shl( prob1_fx, sub( 15, exp ) ); prob0_fx = sub( MAX_16, prob1_fx ); @@ -2014,6 +2052,7 @@ Word32 encode_magnitude_tcq_fx( } magn_mode[1] = mult( prob1_fx, MAX_AR_FREQ ); + move16(); IF( LT_16( j, sub( abs_s( magn_fx[i] ), 1 ) ) ) { ar_encode_fx( parenc, magn_mode, 0 ); @@ -2046,6 +2085,7 @@ Word32 encode_signs_fx( Word32 i, sign; *est_frame_bits_fx = L_add( *est_frame_bits_fx, L_deposit_h( npos ) ); + move32(); FOR( i = 0; i < size; i++ ) { IF( magn[i] != 0 ) @@ -2076,6 +2116,8 @@ void decode_position_ari_fx( Word16 integer, frac; Word32 cp, scp, fxone, fxp1; Word16 stpos = 0, pos, ovrflag = 0, temppos, storepos; + move16(); + move16(); fxone = 32768; move32(); @@ -2104,14 +2146,15 @@ void decode_position_ari_fx( /*calculate the probability of #nz */ pnzp_fx = L_sub( L_deposit_h( add( i, 1 ) ), btcq_fx ); - pnzp_fx = L_add( pnzp_fx, L_add( L_sub( table_logcum_fx[size + 1], L_add( table_logcum_fx[i + 2], table_logcum_fx[size - i] ) ), - L_sub( table_logcum_fx[npulses], L_add( table_logcum_fx[i + 1], table_logcum_fx[npulses - i] ) ) ) ); + pnzp_fx = L_add( pnzp_fx, L_add( L_sub( table_logcum_fx[size + 1], L_add( table_logcum_fx[i + 2], table_logcum_fx[sub( size, i )] ) ), + L_sub( table_logcum_fx[npulses], L_add( table_logcum_fx[i + 1], table_logcum_fx[sub( npulses, i )] ) ) ) ); pnzp_fx = L_add( pnzp_fx, 917498 ); /*16 */ IF( GT_32( pnzp_fx, 0 ) ) { integer = extract_h( pnzp_fx ); frac = extract_l( L_shr( L_sub( pnzp_fx, L_deposit_h( integer ) ), 1 ) ); /*15 */ prob[i] = extract_h( L_shl( Pow2( integer, frac ), 16 ) ); /*0 */ + move16(); if ( prob[i] == 0 ) { prob[i] = 1; @@ -2127,6 +2170,7 @@ void decode_position_ari_fx( ar_make_model_fx( prob, mode_num_nz, s_min( npulses, size ) ); *nz = add( 1, ar_decode_fx( pardec, mode_num_nz ) ); /*get #nz */ + move16(); nzp = *nz; move16(); IF( nzp == 1 ) @@ -2135,7 +2179,8 @@ void decode_position_ari_fx( move16(); FOR( i = 0; i < size; i++ ) { - mode_num_nz[i + 1] = round_fx( L_shr( L_deposit_h( div_l( L_deposit_h( size - i - 1 ), size ) ), 1 ) ); + mode_num_nz[i + 1] = round_fx( L_shr( L_deposit_h( div_l( L_deposit_h( sub( sub( size, i ), 1 ) ), size ) ), 1 ) ); + move16(); } position[ar_decode_fx( pardec, mode_num_nz )] = 1; @@ -2159,34 +2204,32 @@ void decode_position_ari_fx( ovrflag = 0; move16(); - IF( nzp == ( size - i ) ) + IF( nzp == sub( size, i ) ) { cp = L_deposit_l( 0 ); } ELSE { - cp = L_sub( fxone, div_l( L_deposit_h( nzp ), ( size - i ) ) ); + cp = L_sub( fxone, div_l( L_deposit_h( nzp ), sub( size, i ) ) ); } scp = Mult_32_16( scp, extract_l( cp ) ); - mode_num_nz[i + 1 - storepos - stpos] = round_fx( L_shl( scp, 6 ) ); + mode_num_nz[sub( sub( add( i, 1 ), storepos ), stpos )] = round_fx( L_shl( scp, 6 ) ); test(); test(); - IF( ( mode_num_nz[i + 1 - storepos - stpos] == 0 && scp > 0 ) || mode_num_nz[i - storepos - stpos] == mode_num_nz[i + 1 - storepos - stpos] ) + IF( ( mode_num_nz[sub( sub( add( i, 1 ), storepos ), stpos )] == 0 && scp > 0 ) || EQ_16( mode_num_nz[sub( sub( i, storepos ), stpos )] , mode_num_nz[sub( sub( add( i, 1 ), storepos ), stpos )] ) ) { - mode_num_nz[i + 1 - storepos - stpos] = 0; + mode_num_nz[sub( sub( add( i, 1 ), storepos ), stpos )] = 0; move16(); ovrflag = 1; move16(); temppos = ar_decode_fx( pardec, mode_num_nz ); - move16(); - storepos += temppos; - move16(); + storepos = add( storepos, temppos ); scp = L_add( fxp1, 0 ); - IF( temppos == i - stpos ) /* esc transmitted */ + IF( temppos == sub( i, stpos ) ) /* esc transmitted */ { - i--; + i = sub( i, 1 ); move16(); } ELSE @@ -2197,7 +2240,7 @@ void decode_position_ari_fx( } IF( !ovrflag ) { - pos = ar_decode_fx( pardec, mode_num_nz ) + storepos; + pos = add( ar_decode_fx( pardec, mode_num_nz ), storepos ); move16(); } ELSE @@ -2206,10 +2249,9 @@ void decode_position_ari_fx( move16(); } - position[stpos + pos] = 1; - move16(); - stpos += pos + 1; + position[add( stpos, pos )] = 1; move16(); + stpos = add( stpos, add( pos, 1 ) ); } } } @@ -2223,7 +2265,8 @@ void decode_position_ari_fx( move16(); FOR( i = 0; i < size; i++ ) { - mode_num_nz[i + 1] = round_fx( L_shr( L_deposit_h( div_l( L_deposit_h( size - i - 1 ), size ) ), 1 ) ); + mode_num_nz[i + 1] = round_fx( L_shr( L_deposit_h( div_l( L_deposit_h( sub( sub( size, i ), 1 ) ), size ) ), 1 ) ); + move16(); } position[ar_decode_fx( pardec, mode_num_nz )] = 1; @@ -2247,7 +2290,10 @@ void decode_magnitude_usq_fx( { Word16 i, magnp, magnzp; Word16 magns[TCQ_MAX_BAND_SIZE], magncout = 0; + move16(); Word16 storemagn, ovrflag = 0, pos, tempmagn = 0, mmodel[MAX_PULSES + 2]; + move16(); + move16(); Word32 cp, scp, fxone, fxp1; fxone = 32768; @@ -2304,13 +2350,13 @@ void decode_magnitude_usq_fx( ovrflag = 0; move16(); - IF( magnzp == ( magnp - i ) ) + IF( magnzp == sub( magnp, i ) ) { cp = L_deposit_l( 0 ); } ELSE { - cp = L_sub( fxone, div_l( L_deposit_h( magnzp ), magnp - i ) ); + cp = L_sub( fxone, div_l( L_deposit_h( magnzp ), sub( magnp, i ) ) ); } IF( cp == fxone ) @@ -2319,18 +2365,17 @@ void decode_magnitude_usq_fx( } scp = Mult_32_16( scp, extract_l( cp ) ); - mmodel[i + 1 - storemagn] = round_fx( L_shl( scp, 6 ) ); - + mmodel[sub( add( i, 1 ), storemagn )] = round_fx( L_shl( scp, 6 ) ); + move16(); test(); test(); - IF( ( mmodel[i + 1 - storemagn] == 0 && scp > 0 ) || mmodel[i - storemagn] == mmodel[i + 1 - storemagn] ) + IF( ( mmodel[sub( add( i, 1 ), storemagn )] == 0 && scp > 0 ) || EQ_16( mmodel[sub( i, storemagn )], mmodel[sub( add( i, 1 ), storemagn )] ) ) { - mmodel[i + 1 - storemagn] = 0; + mmodel[sub( add( i, 1 ), storemagn )] = 0; move16(); /* read data */ tempmagn = ar_decode_fx( pardec, mmodel ); - storemagn += tempmagn; - move16(); + storemagn = add( storemagn, tempmagn ); IF( tempmagn < i ) { @@ -2343,8 +2388,7 @@ void decode_magnitude_usq_fx( { /* esc code */ scp = L_add( fxp1, 0 ); - i--; - move16(); + i = sub( i, 1 ); } } } @@ -2359,12 +2403,9 @@ void decode_magnitude_usq_fx( out[magncout] = ar_decode_fx( pardec, mmodel ) + storemagn + 1; move16(); } - magnp -= out[magncout]; - move16(); - magnzp--; - move16(); - magncout++; - move16(); + magnp = sub( magnp, out[magncout] ); + magnzp = sub( magnzp, 1 ); + magncout = add(magncout, 1 ); IF( magnzp == 0 ) /* last magnitude generation */ { @@ -2372,7 +2413,7 @@ void decode_magnitude_usq_fx( { IF( positions[pos] != 0 ) { - out[magncout] = magnp + 1; + out[magncout] = add( magnp, 1 ); move16(); return; } @@ -2380,7 +2421,7 @@ void decode_magnitude_usq_fx( { out[magncout] = 0; move16(); - magncout++; + magncout = add( magncout, 1 ); move16(); } } @@ -2391,8 +2432,7 @@ void decode_magnitude_usq_fx( { out[magncout] = positions[pos]; move16(); - magncout++; - move16(); + magncout = add( magncout, 1 ); } return; } @@ -2401,8 +2441,7 @@ void decode_magnitude_usq_fx( { out[magncout] = 0; move16(); - magncout++; - move16(); + magncout = add( magncout, 1 ); } } @@ -2424,11 +2463,16 @@ void decode_mangitude_tcq_fx( Word16 i, j, symbol, st; Word16 leftp = npulses; /*pulsesnum; */ - Word16 leftnz = nzpos; /*nzpos; */ + move16(); + Word16 leftnz = nzpos; /*nzpos; */ + move16(); Word16 magn_mode[3] = { MAX_AR_FREQ, 0, 0 }; + move16(); + move16(); + move16(); bits_fx = L_deposit_l( 0 ); - tcq_bits_fx = L_sub( table_logcum_fx[npulses], L_add( table_logcum_fx[nzpos], table_logcum_fx[npulses - ( nzpos - 1 )] ) ); + tcq_bits_fx = L_sub( table_logcum_fx[npulses], L_add( table_logcum_fx[nzpos], table_logcum_fx[sub( npulses, sub( nzpos, 1 ) )] ) ); IF( EQ_16( nzpos, npulses ) ) { @@ -2462,6 +2506,7 @@ void decode_mangitude_tcq_fx( } out[i] = positions[i]; + move16(); IF( positions[i] != 0 ) { /*generate the trellis path */ @@ -2483,7 +2528,7 @@ void decode_mangitude_tcq_fx( exp1 = sub( norm_s( num ), 1 ); exp2 = norm_s( denum ); prob1_fx = div_s( shl( num, exp1 ), shl( denum, exp2 ) ); /*15 + exp1 - exp2 */ - exp = 15 + exp1 - exp2; + exp = add(15 , sub(exp1 , exp2)); prob1_fx = shl( prob1_fx, sub( 15, exp ) ); prob0_fx = sub( MAX_16, prob1_fx ); } @@ -2514,7 +2559,7 @@ void decode_mangitude_tcq_fx( /*magn_mode[1] = (short)(prob1 * MAX_AR_FREQ); */ magn_mode[1] = mult( prob1_fx, MAX_AR_FREQ ); - + move16(); IF( ar_decode_fx( pardec, magn_mode ) ) { exp1 = norm_s( prob1_fx ); @@ -2564,6 +2609,7 @@ void decode_mangitude_tcq_fx( IF( positions[i] != 0 ) { out[i] = add( sub( leftp, leftnz ), 1 ); + move16(); } } @@ -2572,6 +2618,7 @@ void decode_mangitude_tcq_fx( { /*update the surplus */ *surplus_fx = L_add( *surplus_fx, L_sub( tcq_bits_fx, L_shl( bits_fx, 1 ) ) ); + move32(); } return; @@ -2594,6 +2641,7 @@ void decode_signs_fx( if ( tmp <= 0 ) { out[i] = negate( out[i] ); + move16(); } } } @@ -2608,18 +2656,19 @@ Word16 GetScale_fx( ) { Word16 pulses = MAX_PULSES, p_est, exp, exp1, exp2, magicnum; + move16(); Word32 t, a, b, ab, estbits_fx = 0; - + move32(); magicnum = 24773; move16(); /*Q17: 0.188992013101951f; */ t = L_shr( L_mult( magicnum, blen ), 2 ); exp = norm_l( t ); - a = L_shl( 14 - exp, 15 ) + Log2_norm_lc( L_shl( t, exp ) ); + a = L_add( L_shl( sub( 14, exp ), 15 ), Log2_norm_lc( L_shl( t, exp ) ) ); exp1 = sub( norm_l( bits_fx ), 1 ); - exp2 = norm_s( blen - 1 ); - b = L_shr( L_deposit_l( div_l( L_shl( bits_fx, exp1 ), shl( blen - 1, exp2 ) ) ), exp1 - exp2 ); + exp2 = norm_s( sub( blen, 1 ) ); + b = L_shr( L_deposit_l( div_l( L_shl( bits_fx, exp1 ), shl( sub( blen, 1 ), exp2 ) ) ), sub( exp1, exp2 ) ); ab = L_add( a, b ); @@ -2639,6 +2688,7 @@ Word16 GetScale_fx( IF( surplus_fx != 0 ) { *surplus_fx = L_add( *surplus_fx, L_sub( bits_fx, estbits_fx ) ); + move16(); } return pulses; diff --git a/lib_com/tcx_ltp_fx.c b/lib_com/tcx_ltp_fx.c index 0c3190705..88481ad6f 100644 --- a/lib_com/tcx_ltp_fx.c +++ b/lib_com/tcx_ltp_fx.c @@ -402,6 +402,7 @@ void predict_signal( const Word16 *x0, *win; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif x0 = &excI[-T0 - 1]; frac = negate( frac ); @@ -453,6 +454,7 @@ static void tcx_ltp_synth_filter( const Word16 *v0, *v1; const Word16 *w0, *w1; Word16 alpha, step = 0; /* initialize just to avoid compiler warning */ + move16(); Word16 i, j, k, L; IF( gain > 0 ) @@ -663,7 +665,7 @@ static void tcx_ltp_synth_filter32( L_tmp2 = Mpy_32_16_r( L_tmp2, alpha ); #ifdef BASOP_NOGLOB L_tmp2 = L_add_sat( synth[j], L_tmp2 ); - IF( zir != NULL ) + if( zir != NULL ) { L_tmp2 = L_sub_sat( L_tmp2, zir[j] ); } @@ -726,12 +728,13 @@ static void tcx_ltp_synth_filter_10( assert( GE_16( filtIdx, 0 ) ); w0 = &tcxLtpFilters[filtIdx].filt[pitch_fr]; - w1 = &tcxLtpFilters[filtIdx].filt[pitch_res - pitch_fr]; + w1 = &tcxLtpFilters[filtIdx].filt[sub(pitch_res , pitch_fr)]; v0 = &tcxLtpFilters[filtIdx].filt[0]; v1 = &tcxLtpFilters[filtIdx].filt[pitch_res]; L = tcxLtpFilters[filtIdx].length; curr_gain = gain; + move16(); gain_step = negate( gain ) / length; for ( j = 0; j < length; j++ ) @@ -784,7 +787,7 @@ static void tcx_ltp_synth_filter_10_fx( y0 = in; y1 = y0 - 1; - assert( GE_16( filtIdx, 0 ) ); + assert( filtIdx >= 0 ); w0 = &tcxLtpFilters[filtIdx].filt[pitch_fr]; w1 = &tcxLtpFilters[filtIdx].filt[pitch_res - pitch_fr]; @@ -793,12 +796,15 @@ static void tcx_ltp_synth_filter_10_fx( L = tcxLtpFilters[filtIdx].length; curr_gain = gain; - gain_step = negate( gain ) / length; + move16(); + gain_step = idiv1616(negate( gain ) , length); // TODO for ( j = 0; j < length; j++ ) { s = 0; + move16(); s2 = 0; + move16(); for ( i = 0, k = 0; i < L; i++, k += pitch_res ) { @@ -810,7 +816,7 @@ static void tcx_ltp_synth_filter_10_fx( // out[j] = in[j] - curr_gain * s2 * ALPHA + curr_gain * s; out[j] = L_add_sat( in[j], Mpy_32_16_1( L_sub_sat( s, Mpy_32_16_1( s2, ALPHA ) ), curr_gain ) ); - + move32(); x0++; x1++; y0++; @@ -845,7 +851,7 @@ static void tcx_ltp_synth_filter_01( y0 = in; y1 = y0 - 1; - assert( GE_16( filtIdx, 0 ) ); + assert( filtIdx >= 0 ); w0 = &tcxLtpFilters[filtIdx].filt[pitch_fr]; w1 = &tcxLtpFilters[filtIdx].filt[pitch_res - pitch_fr]; @@ -854,12 +860,16 @@ static void tcx_ltp_synth_filter_01( L = tcxLtpFilters[filtIdx].length; curr_gain = 0; - gain_step = gain / length; + move16(); + //gain_step = gain / length; // TODO + gain_step = idiv1616(gain , length); // TODO for ( j = 0; j < length; j++ ) { s = 0; + move16(); s2 = 0; + move16(); for ( i = 0, k = 0; i < L; i++, k = add( k, pitch_res ) ) { @@ -871,7 +881,7 @@ static void tcx_ltp_synth_filter_01( // out[j] = in[j] - curr_gain * s2 * ALPHA + curr_gain * s; out[j] = add_sat( in[j], mult_r_sat( curr_gain, sub_sat( round_fx_sat( s ), mult_r_sat( round_fx_sat( s2 ), ALPHA ) ) ) ); - + move32(); x0++; x1++; @@ -907,7 +917,7 @@ static void tcx_ltp_synth_filter_01_fx( y0 = in; y1 = y0 - 1; - assert( GE_16( filtIdx, 0 ) ); + assert( filtIdx >= 0 ); w0 = &tcxLtpFilters[filtIdx].filt[pitch_fr]; w1 = &tcxLtpFilters[filtIdx].filt[pitch_res - pitch_fr]; @@ -916,12 +926,16 @@ static void tcx_ltp_synth_filter_01_fx( L = tcxLtpFilters[filtIdx].length; curr_gain = 0; - gain_step = gain / length; + move16(); + //gain_step = gain / length; // TODO + gain_step = idiv1616(gain , length); // TODO for ( j = 0; j < length; j++ ) { s = 0; + move16(); s2 = 0; + move16(); for ( i = 0, k = 0; i < L; i++, k = add( k, pitch_res ) ) { @@ -933,7 +947,7 @@ static void tcx_ltp_synth_filter_01_fx( // out[j] = in[j] - curr_gain * s2 * ALPHA + curr_gain * s; out[j] = L_add_sat( in[j], Mpy_32_16_r( L_sub_sat( s, Mpy_32_16_1( s2, ALPHA ) ), curr_gain ) ); - + move32(); x0++; x1++; @@ -992,7 +1006,7 @@ static void tcx_ltp_synth_filter_11_unequal_pitch( y0 = in; y1 = y0 - 1; - assert( GE_16( filtIdx, 0 ) && GE_16( prev_filtIdx, 0 ) ); + assert( ( filtIdx >= 0 ) && ( prev_filtIdx >= 0 ) ); w0 = &tcxLtpFilters[prev_filtIdx].filt[prev_pitch_fr]; w1 = &tcxLtpFilters[prev_filtIdx].filt[prev_pitch_res - prev_pitch_fr]; @@ -1000,6 +1014,7 @@ static void tcx_ltp_synth_filter_11_unequal_pitch( v1 = &tcxLtpFilters[prev_filtIdx].filt[prev_pitch_res]; prev_L = tcxLtpFilters[prev_filtIdx].length; + move16(); p0 = &tcxLtpFilters[filtIdx].filt[cur_pitch_fr]; p1 = &tcxLtpFilters[filtIdx].filt[pitch_res - cur_pitch_fr]; @@ -1007,15 +1022,20 @@ static void tcx_ltp_synth_filter_11_unequal_pitch( q1 = &tcxLtpFilters[filtIdx].filt[pitch_res]; L = tcxLtpFilters[filtIdx].length; + move16(); /* 1. decreasing gain filter. The first filter unit with the parameters associated to the previous interval and scaling towards 0 */ gain = prev_gain; - gain_step = negate( prev_gain ) / length; + move16(); + //gain_step = negate( prev_gain ) / length; // TODO + gain_step = idiv1616(negate( prev_gain ) , length); // TODO for ( j = 0; j < length; j++ ) { s = 0; + move16(); s2 = 0; + move16(); for ( i = 0, k = 0; i < prev_L; i++, k += prev_pitch_res ) { @@ -1047,12 +1067,15 @@ static void tcx_ltp_synth_filter_11_unequal_pitch( /* 2. increasing gain filter. The second filter unit with the parameters associated to the current interval and scaling from 0 towards current gain */ gain = 0; - gain_step = cur_gain / length; + move16(); + gain_step = cur_gain / length; // TODO for ( j = 0; j < length; j++ ) { s3 = 0; + move16(); s4 = 0; + move16(); for ( i = 0, k = 0; i < L; i++, k += pitch_res ) { @@ -1064,7 +1087,7 @@ static void tcx_ltp_synth_filter_11_unequal_pitch( // out[j] = *(temp_ptr + j) - gain * s4 * ALPHA + gain * s3; out[j] = add_sat( *( temp_ptr + j ), mult_r_sat( gain, sub_sat( round_fx_sat( s3 ), mult_r_sat( round_fx_sat( s4 ), ALPHA ) ) ) ); - + move32(); l0++; l1++; @@ -1120,6 +1143,7 @@ static void tcx_ltp_synth_filter_11_unequal_pitch_fx( v1 = &tcxLtpFilters[prev_filtIdx].filt[prev_pitch_res]; prev_L = tcxLtpFilters[prev_filtIdx].length; + move16(); p0 = &tcxLtpFilters[filtIdx].filt[cur_pitch_fr]; p1 = &tcxLtpFilters[filtIdx].filt[pitch_res - cur_pitch_fr]; @@ -1127,15 +1151,19 @@ static void tcx_ltp_synth_filter_11_unequal_pitch_fx( q1 = &tcxLtpFilters[filtIdx].filt[pitch_res]; L = tcxLtpFilters[filtIdx].length; + move16(); /* 1. decreasing gain filter. The first filter unit with the parameters associated to the previous interval and scaling towards 0 */ gain = prev_gain; - gain_step = negate( prev_gain ) / length; + move16(); + gain_step = idiv1616(negate( prev_gain ) , length); // TODOD for ( j = 0; j < length; j++ ) { s = 0; + move16(); s2 = 0; + move16(); for ( i = 0, k = 0; i < prev_L; i++, k += prev_pitch_res ) { @@ -1147,7 +1175,7 @@ static void tcx_ltp_synth_filter_11_unequal_pitch_fx( // out[j] = in[j] - gain * s2 * ALPHA + gain * s; out[j] = L_add_sat( in[j], Mpy_32_16_r( L_sub_sat( s, Mpy_32_16_1( s2, ALPHA ) ), gain ) ); - + move32(); x0++; x1++; y0++; @@ -1167,12 +1195,15 @@ static void tcx_ltp_synth_filter_11_unequal_pitch_fx( /* 2. increasing gain filter. The second filter unit with the parameters associated to the current interval and scaling from 0 towards current gain */ gain = 0; - gain_step = cur_gain / length; + move16(); + gain_step = cur_gain / length; // TODO for ( j = 0; j < length; j++ ) { s3 = 0; + move16(); s4 = 0; + move16(); for ( i = 0, k = 0; i < L; i++, k += pitch_res ) { @@ -1184,7 +1215,7 @@ static void tcx_ltp_synth_filter_11_unequal_pitch_fx( // out[j] = *(temp_ptr + j) - gain * s4 * ALPHA + gain * s3; out[j] = L_add_sat( *( temp_ptr + j ), Mpy_32_16_r( L_sub_sat( s3, Mpy_32_16_1( s4, ALPHA ) ), gain ) ); - + move32(); l0++; l1++; @@ -1309,9 +1340,10 @@ void tcx_ltp_post( } filtIdx = 0; /* just to avoid comilation warnings */ + move16(); tcx_buf_len = NS2SA( st->output_Fs, TCXLTP_DELAY_NS ); SideInfoOnly = 0; - + move16(); if ( GE_32( total_brate, HQ_96k ) ) { SideInfoOnly = 1; @@ -1327,16 +1359,17 @@ void tcx_ltp_post( move16(); } - IF( EQ_16( core, ACELP_CORE ) ) + IF( core == ACELP_CORE ) { bfi = 0; + move16(); pitch_int = 0; - pitch_fr = 0; - gain = 0; - L_frame_core = st->L_frame_past; move16(); + pitch_fr = 0; move16(); + gain = 0; move16(); + L_frame_core = st->L_frame_past; move16(); } ELSE @@ -1388,6 +1421,7 @@ void tcx_ltp_post( ELSE IF( bfi == 0 ) { /* LTP and good frame */ + test(); IF( EQ_16( st->element_mode, EVS_MONO ) ) /* hard tunings for EVS_MONO, HQ_48k is TCX only */ { IF( NE_16( output_frame, L_frame_core ) ) @@ -1544,29 +1578,36 @@ void tcx_ltp_post( test(); test(); test(); - //#ifdef IVAS_CODE_TCX_LTP + // #ifdef IVAS_CODE_TCX_LTP if ( st->element_mode != EVS_MONO ) - { - if ( gain == 0 && hTcxLtpDec->tcxltp_gain_post_prev == 0 ) + { + test(); + test(); + test(); + test(); + test(); + test(); + test(); + IF ( gain == 0 && hTcxLtpDec->tcxltp_gain_post_prev == 0 ) { /* The filtering is deactivated, just copy input to the output */ Copy( sig_in + delay, sig_out + delay, L_transition ); } - else if ( gain == 0 && hTcxLtpDec->tcxltp_gain_post_prev != 0 ) + ELSE IF ( gain == 0 && hTcxLtpDec->tcxltp_gain_post_prev != 0 ) { /* Filtering with the first filter unit */ tcx_ltp_synth_filter_10( sig_out + delay, sig_in + delay, L_transition, hTcxLtpDec->tcxltp_pitch_int_post_prev, hTcxLtpDec->tcxltp_pitch_fr_post_prev, hTcxLtpDec->tcxltp_gain_post_prev, st->pit_res_max_past, hTcxLtpDec->tcxltp_filt_idx_prev ); } - else if ( gain != 0 && hTcxLtpDec->tcxltp_gain_post_prev == 0 ) + ELSE IF( gain != 0 && hTcxLtpDec->tcxltp_gain_post_prev == 0 ) { /* Filtering with the second filter unit */ tcx_ltp_synth_filter_01( sig_out + delay, sig_in + delay, L_transition, pitch_int, pitch_fr, gain, st->pit_res_max, filtIdx ); } - else if ( gain == hTcxLtpDec->tcxltp_gain_post_prev && pitch_int == hTcxLtpDec->tcxltp_pitch_int_post_prev && pitch_fr == hTcxLtpDec->tcxltp_pitch_fr_post_prev && st->pit_res_max == st->pit_res_max_past && filtIdx == hTcxLtpDec->tcxltp_filt_idx_prev ) + ELSE IF( EQ_16(gain ,hTcxLtpDec->tcxltp_gain_post_prev )&& EQ_16(pitch_int , hTcxLtpDec->tcxltp_pitch_int_post_prev) && EQ_16(pitch_fr, hTcxLtpDec->tcxltp_pitch_fr_post_prev )&& EQ_16(st->pit_res_max , st->pit_res_max_past )&& EQ_16(filtIdx , hTcxLtpDec->tcxltp_filt_idx_prev )) { tcx_ltp_synth_filter( sig_out + delay, sig_in + delay, L_transition, pitch_int, pitch_fr, gain, st->pit_res_max, NULL, 0, filtIdx ); } - else + ELSE { /* Filtering with the first filter unit, followed by the filtering with the second filter unit */ tcx_ltp_synth_filter_11_unequal_pitch( sig_out + delay, sig_in + delay, L_transition, pitch_int, pitch_fr, gain, st->pit_res_max, filtIdx, @@ -1574,8 +1615,12 @@ void tcx_ltp_post( } } ELSE - //#endif + // #endif { + test(); + test(); + test(); + test(); IF( gain == 0 && hTcxLtpDec->tcxltp_gain_post_prev == 0 ) { Copy( sig_in + delay, sig_out + delay, L_transition ); @@ -1626,6 +1671,7 @@ void tcx_ltp_post( hTcxLtpDec->tcxltp_filt_idx_prev = filtIdx; move16(); st->pit_res_max_past = st->pit_res_max; + move16(); Copy( sig_out, hTcxLtpDec->tcxltp_mem_out, output_frame ); } @@ -1653,14 +1699,16 @@ void tcx_ltp_post32( move32(); IF( EQ_16( st->element_mode, IVAS_CPE_MDCT ) ) { - total_brate = st->bits_frame_nominal * FRAMES_PER_SEC; + total_brate = st->bits_frame_nominal * FRAMES_PER_SEC; // TDO move32(); } filtIdx = 0; /* just to avoid comilation warnings */ + move16(); tcx_buf_len = NS2SA_fx2( st->output_Fs, TCXLTP_DELAY_NS ); + move16(); SideInfoOnly = 0; - + move16(); if ( GE_32( total_brate, HQ_96k ) ) { SideInfoOnly = 1; @@ -1745,7 +1793,7 @@ void tcx_ltp_post32( pitch_int = idiv1616U( tmp, st->pit_res_max ); pitch_fr = sub( tmp, imult1616( pitch_int, st->pit_res_max ) ); } - IF( EQ_16( st->element_mode, EVS_MONO ) ) /* hard tunings for EVS_MONO, HQ_48k is TCX only */ + IF( st->element_mode == EVS_MONO ) /* hard tunings for EVS_MONO, HQ_48k is TCX only */ { test(); test(); @@ -1893,7 +1941,7 @@ void tcx_ltp_post32( test(); test(); test(); - //#ifdef IVAS_CODE_TCX_LTP + // #ifdef IVAS_CODE_TCX_LTP if ( st->element_mode != EVS_MONO ) { if ( gain == 0 && hTcxLtpDec->tcxltp_gain_post_prev == 0 ) @@ -1923,7 +1971,7 @@ void tcx_ltp_post32( } } ELSE - //#endif + // #endif { IF( gain == 0 && hTcxLtpDec->tcxltp_gain_post_prev == 0 ) { diff --git a/lib_com/tcx_mdct_fx.c b/lib_com/tcx_mdct_fx.c index 76f8e414a..1281c027e 100644 --- a/lib_com/tcx_mdct_fx.c +++ b/lib_com/tcx_mdct_fx.c @@ -30,7 +30,7 @@ static Word16 TCX_MDCT_GetScaleFactor( *factor_e = 0; move16(); } - ELSE IF( EQ_16( L, 4 * NORM_MDCT_FACTOR ) ) + ELSE IF( EQ_16( L, shl( NORM_MDCT_FACTOR, 2 ) ) ) { factor = 16384; move16(); @@ -109,6 +109,7 @@ void TCX_MDCT( #endif #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif factor = TCX_MDCT_GetScaleFactor( add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &factor_e ); *y_e = add( *y_e, factor_e ); @@ -119,13 +120,13 @@ void TCX_MDCT( /* Init */ FOR( i = 0; i < m / 2; i++ ) { - y[m / 2 + r / 2 + i] = L_mult( x[l + m / 2 - 1 - i], neg_factor ); + y[add( add( shr( m, 1 ), shr( r, 1 ) ), i )] = L_mult( x[sub( sub( add( l, shr( m, 1 ) ), 1 ), i )], neg_factor ); move32(); } FOR( i = 0; i < l / 2; i++ ) { #ifdef BASOP_NOGLOB - y[m / 2 + r / 2 + m / 2 + i] = L_msu_o( L_mult( x[i], factor ), x[l - 1 - i], factor, &Overflow ); + y[add( add( add( shr( m, 1 ), shr( r, 1 ) ), shr( m, 1 ) ), i )] = L_msu_o( L_mult( x[i], factor ), x[sub(sub(l , 1) , i)], factor, &Overflow ); #else /* BASOP_NOGLOB */ y[m / 2 + r / 2 + m / 2 + i] = L_msu( L_mult( x[i], factor ), x[l - 1 - i], factor ); #endif /* BASOP_NOGLOB */ @@ -133,13 +134,13 @@ void TCX_MDCT( } FOR( i = 0; i < m / 2; i++ ) { - y[m / 2 + r / 2 - 1 - i] = L_mult( x[l + m / 2 + i], neg_factor ); + y[sub( sub( add( shr( m, 1 ), shr( r, 1 ) ), 1 ), i )] = L_mult( x[add(add(l , shr(m ,1)) , i)], neg_factor ); move32(); } FOR( i = 0; i < r / 2; i++ ) { #ifdef BASOP_NOGLOB - y[m / 2 + r / 2 - 1 - m / 2 - i] = L_mac_o( L_mult( x[l + m + i], neg_factor ), x[l + m + r - 1 - i], neg_factor, &Overflow ); + y[sub( sub( sub( add( shr( m, 1 ), shr( r, 1 ) ), 1 ), shr( m, 1 ) ), i )] = L_mac_o( L_mult( x[add( add( l, m ), i )], neg_factor ), x[sub(sub(add(add(l , m) , r) , 1) , i)], neg_factor, &Overflow ); #else /* BASOP_NOGLOB */ y[m / 2 + r / 2 - 1 - m / 2 - i] = L_mac( L_mult( x[l + m + i], neg_factor ), x[l + m + r - 1 - i], neg_factor ); #endif /* BASOP_NOGLOB */ @@ -147,8 +148,10 @@ void TCX_MDCT( } *y_e = sub( 15, *y_e ); + move16(); edct_fx( y, y, l / 2 + m + r / 2, y_e ); *y_e = sub( 15 - 1, *y_e ); + move16(); return; } @@ -170,6 +173,7 @@ void TCX_MDST( #endif #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif factor = TCX_MDCT_GetScaleFactor( add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &factor_e ); *y_e = add( *y_e, factor_e ); @@ -179,13 +183,13 @@ void TCX_MDST( /* Init */ FOR( i = 0; i < m / 2; i++ ) { - y[m / 2 + r / 2 + i] = L_mult( x[l + m / 2 - 1 - i], neg_factor ); + y[add( add( shr( m, 1 ), shr( r, 1 ) ), i )] = L_mult( x[sub( sub( add( l, shr( m, 1 ) ), 1 ), i )], neg_factor ); move32(); } FOR( i = 0; i < l / 2; i++ ) { #ifdef BASOP_NOGLOB - y[m / 2 + r / 2 + m / 2 + i] = L_msu_o( L_mult( x[i], neg_factor ), x[l - 1 - i], factor, &Overflow ); + y[add( add( add( shr(m ,1), shr(r ,1) ), shr(m ,1) ), i )] = L_msu_o( L_mult( x[i], neg_factor ), x[sub(sub(l , 1) , i)], factor, &Overflow ); #else /* BASOP_NOGLOB */ y[m / 2 + r / 2 + m / 2 + i] = L_msu( L_mult( x[i], neg_factor ), x[l - 1 - i], factor ); #endif @@ -193,13 +197,13 @@ void TCX_MDST( } FOR( i = 0; i < m / 2; i++ ) { - y[m / 2 + r / 2 - 1 - i] = L_mult( x[l + m / 2 + i], neg_factor ); + y[sub( sub( add( shr( m, 1 ), shr( r, 1 ) ), 1 ), i )] = L_mult( x[add( add( l, shr( m, 1 ) ), i )], neg_factor ); move32(); } FOR( i = 0; i < r / 2; i++ ) { #ifdef BASOP_NOGLOB - y[m / 2 + r / 2 - 1 - m / 2 - i] = L_mac_sat( L_mult( x[l + m + i], neg_factor ), x[l + m + r - 1 - i], factor ); + y[sub( sub( sub( add( shr( m, 1 ), shr( r, 1 ) ), 1 ), m / 2 ), i )] = L_mac_sat( L_mult( x[add( add( l, m ), i )], neg_factor ), x[sub( sub( add( add( l, m ), r ), 1 ), i )], factor ); #else y[m / 2 + r / 2 - 1 - m / 2 - i] = L_mac( L_mult( x[l + m + i], neg_factor ), x[l + m + r - 1 - i], factor ); #endif @@ -207,7 +211,7 @@ void TCX_MDST( } *y_e = sub( 15, *y_e ); - edst_fx( y, y, l / 2 + m + r / 2, y_e ); + edst_fx( y, y, add( add( shr( l, 1 ), m ), shr( r, 1 ) ), y_e ); *y_e = sub( 15 - 1, *y_e ); return; } @@ -233,7 +237,7 @@ void TCX_MDCT_Inverse( R2 = shr( r, 1 ); x_e = sub( 15, x_e ); - edct_fx( x, tmp_buf + L2, l / 2 + m + r / 2, &x_e ); + edct_fx( x, tmp_buf + L2, add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &x_e ); x_e = sub( 15, x_e ); fac = TCX_MDCT_Inverse_GetScaleFactor( add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &fac_e ); @@ -247,19 +251,22 @@ void TCX_MDCT_Inverse( FOR( i = 0; i < R2; i++ ) { #ifdef BASOP_NOGLOB - y[l + m + R2 + i] = round_fx_sat( L_shl_sat( Mpy_32_16_1( tmp_buf[L2 + i], negfac ), s ) ); /* fold out right end of DCT */ + y[add(add(add(l , m) , R2) , i)] = round_fx_sat( L_shl_sat( Mpy_32_16_1( tmp_buf[add(L2 , i)], negfac ), s ) ); /* fold out right end of DCT */ #else y[l + m + R2 + i] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[L2 + i], negfac ), s ) ); /* fold out right end of DCT */ #endif + + move16(); } FOR( i = 0; i < L2; i++ ) { #ifdef BASOP_NOGLOB - y[i] = round_fx_sat( L_shl_sat( Mpy_32_16_1( tmp_buf[L2 + m + R2 + i], fac ), s ) ); /* negate, fold out left end of DCT */ + y[i] = round_fx_sat( L_shl_sat( Mpy_32_16_1( tmp_buf[add(add(add(L2 , m) , R2) , i)], fac ), s ) ); /* negate, fold out left end of DCT */ #else y[i] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[L2 + m + R2 + i], fac ), s ) ); /* negate, fold out left end of DCT */ #endif + move16(); } FOR( i = 0; i < shr( add( L2, add( m, R2 ) ), 1 ); i++ ) @@ -267,14 +274,15 @@ void TCX_MDCT_Inverse( Word16 f; #ifdef BASOP_NOGLOB - f = round_fx_sat( L_shl_sat( Mpy_32_16_1( tmp_buf[L2 + i], negfac ), s ) ); - y[L2 + i] = round_fx_sat( L_shl_sat( Mpy_32_16_1( tmp_buf[l + m + R2 - 1 - i], negfac ), s ) ); /* time-reverse mid of DCT */ + f = round_fx_sat( L_shl_sat( Mpy_32_16_1( tmp_buf[add( L2, i )], negfac ), s ) ); + y[add( L2, i )] = round_fx_sat( L_shl_sat( Mpy_32_16_1( tmp_buf[sub( sub( add( add( l, m ), R2 ), 1 ), i )], negfac ), s ) ); /* time-reverse mid of DCT */ #else f = round_fx( L_shl( Mpy_32_16_1( tmp_buf[L2 + i], negfac ), s ) ); y[L2 + i] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[l + m + R2 - 1 - i], negfac ), s ) ); /* time-reverse mid of DCT */ #endif move16(); - y[l + m + R2 - 1 - i] = f; + y[sub( sub( add( add( l, m ), R2 ), 1 ), i )] = f; + move16(); } } @@ -290,6 +298,8 @@ void TCX_MDST_Inverse_fx( Word16 i, fac, negfac, s; Word16 L2 = l, R2 = r; + move16(); + move16(); Word32 tmp_buf[N_MAX + L_MDCT_OVLP_MAX / 2]; Word16 fac_e; @@ -297,7 +307,7 @@ void TCX_MDST_Inverse_fx( R2 = shr( r, 1 ); x_e = sub( 15, x_e ); - edst_fx( x, tmp_buf + L2, l / 2 + m + r / 2, &x_e ); + edst_fx( x, tmp_buf + L2, add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &x_e ); x_e = sub( 15, x_e ); fac = TCX_MDCT_Inverse_GetScaleFactor( add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &fac_e ); @@ -310,23 +320,26 @@ void TCX_MDST_Inverse_fx( FOR( i = 0; i < R2; i++ ) { - y[l + m + R2 + i] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[L2 + i], fac ), s ) ); /* fold out right end of DCT */ + y[add(add(add(l , m) , R2) , i)] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[add(L2 , i)], fac ), s ) ); /* fold out right end of DCT */ + move16(); } FOR( i = 0; i < L2; i++ ) { - y[i] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[L2 + m + R2 + i], negfac ), s ) ); /* negate, fold out left end of DCT */ + y[i] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[add( add( add( L2, m ), R2 ), i )], negfac ), s ) ); /* negate, fold out left end of DCT */ + move16(); } FOR( i = 0; i < shr( add( L2, add( m, R2 ) ), 1 ); i++ ) { Word16 f; - f = round_fx( L_shl( Mpy_32_16_1( tmp_buf[L2 + i], fac ), s ) ); + f = round_fx( L_shl( Mpy_32_16_1( tmp_buf[add( L2, i )], fac ), s ) ); - y[L2 + i] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[l + m + R2 - 1 - i], negfac ), s ) ); /* time-reverse mid of DCT */ + y[add(L2 , i)] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[sub( sub( add( add( l, m ), R2 ), 1 ), i )], negfac ), s ) ); /* time-reverse mid of DCT */ + move16(); + y[sub( sub( add( add( l, m ), R2 ), 1 ), i )] = negate( f ); move16(); - y[l + m + R2 - 1 - i] = negate( f ); } } @@ -354,37 +367,54 @@ void TCX_MDXT_Inverse_fx( set32_fx( tmp_buf, 0, N_MAX + L_MDCT_OVLP_MAX / 2 ); - edxt_fx( x, tmp_buf + L2, L2 + m + R2, kernel_type, TRUE ); + edxt_fx( x, tmp_buf + L2, add(add(L2 , m) , R2), kernel_type, TRUE ); fac = TCX_MDCT_Inverse_GetScaleFactor( add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &fac_e ); x_e = add( x_e, fac_e ); negfac = negate( fac ); - signLeft = ( kernel_type >= MDCT_II ? negfac : fac ); - signRight = ( kernel_type & 1 ? fac : negfac ); + IF( GE_16( kernel_type, MDCT_II ) ) + { + signLeft = negfac; + } + ELSE + { + signLeft = fac; + } + //signRight = ( kernel_type & 1 ? fac : negfac ); + IF ( L_and( kernel_type, 1 ) ) + { + signRight = fac; + } + ELSE + { + signRight = negfac; + } s = x_e; move16(); FOR( i = 0; i < L2; i++ ) { - y[i] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[L2 + m + R2 + i], signLeft ), s ) ); /* fold out the left end */ + y[i] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[add(add(add(L2 , m) , R2) , i)], signLeft ), s ) ); /* fold out the left end */ } FOR( i = 0; i < R2; i++ ) { - y[l + m + R2 + i] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[L2 + i], signRight ), s ) ); /* ...and right end */ + y[add(add(add(l , m) , R2) , i)] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[add(L2 , i)], signRight ), s ) ); /* ...and right end */ + move16(); } - FOR( i = 0; i < ( ( L2 + m + R2 ) >> 1 ); i++ ) + FOR( i = 0; i < ( ( add(add(L2 , m) , R2) ) >> 1 ); i++ ) { - f = round_fx( L_shl( Mpy_32_16_1( tmp_buf[L2 + i], negfac ), s ) ); + f = round_fx( L_shl( Mpy_32_16_1( tmp_buf[add(L2 , i)], negfac ), s ) ); - y[L2 + i] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[l + m + R2 - 1 - i], negfac ), s ) ); /* time-reverse mid of DCT */ + y[add(L2 , i)] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[sub(sub(add(add(l , m) , R2) , 1) , i)], negfac ), s ) ); /* time-reverse mid of DCT */ + move16(); + y[sub(sub(add(add(l , m) , R2) , 1) , i)] = f; move16(); - y[l + m + R2 - 1 - i] = f; } return; diff --git a/lib_com/tcx_mdct_window.c b/lib_com/tcx_mdct_window.c index 87333ccd4..44c1755ae 100644 --- a/lib_com/tcx_mdct_window.c +++ b/lib_com/tcx_mdct_window.c @@ -55,13 +55,13 @@ const PWord16 *getSineWindowTable( Word16 length ); void mdct_window_sine_flt( float *window, const int32_t Fs, - const int16_t n, - const int16_t window_type, - const int16_t element_mode ) + const Word16 n, + const Word16 window_type, + const Word16 element_mode ) { if ( element_mode == EVS_MONO ) { - int16_t i; + Word16 i; float c; c = EVS_PI / ( 2.0f * (float) n ); @@ -74,26 +74,26 @@ void mdct_window_sine_flt( else { const float *window_table = 0; - int16_t buf_in_size = 0; + Word16 buf_in_size = 0; switch ( window_type ) { case FULL_OVERLAP: window_table = tcx_mdct_window_48; buf_in_size = 420; - break; + BREAK; case HALF_OVERLAP: window_table = tcx_mdct_window_half_48; buf_in_size = 180; - break; + BREAK; case TRANSITION_OVERLAP: case MIN_OVERLAP: window_table = tcx_mdct_window_trans_48; buf_in_size = 60; - break; + BREAK; default: assert( 0 && "Unsupported window type" ); - break; + BREAK; } if ( Fs == 48000 ) @@ -116,56 +116,64 @@ void mdct_window_sine_IVAS_updated( const Word16 window_type, const Word16 element_mode ) { - if ( element_mode == EVS_MONO ) + IF (EQ_16( element_mode , EVS_MONO )) { const PWord16 *table; table = getSineWindowTable( n ); - for ( int i = 0; i < n / 2; i++ ) + FOR ( Word32 i = 0; i < shr( n, 1 ); i++ ) { window[i].v.re = table[i].v.re; + move16(); window[i].v.im = table[i].v.im; + move16(); } // PMT("getSineWindowTable needs to be updated for IVAS") } - else + ELSE { const Word16 *window_table = 0; - int16_t buf_in_size = 0; - Word16 temp[420] = { 0 }; - switch ( window_type ) + Word16 buf_in_size = 0; + move16(); + Word16 temp[420]; + set16_fx(temp, 0, 420); + SWITCH ( window_type ) { case FULL_OVERLAP: window_table = tcx_mdct_window_48_fx; buf_in_size = 420; - break; + move16(); + BREAK; case HALF_OVERLAP: window_table = tcx_mdct_window_half_48_fx; buf_in_size = 180; - break; + BREAK; case TRANSITION_OVERLAP: case MIN_OVERLAP: window_table = tcx_mdct_window_trans_48_fx; buf_in_size = 60; - break; + move16(); + BREAK; default: assert( 0 && "Unsupported window type" ); - break; + BREAK; } - if ( Fs == 48000 ) + IF( EQ_32( Fs , 48000 )) { Copy( window_table, temp, n ); } - else + ELSE { lerp( window_table, temp, n, buf_in_size ); } - for ( int i = 0; i < n / 2; i++ ) + FOR ( Word32 i = 0; i < shr(n, 1); i++ ) { window[i].v.re = temp[n - 1 - i]; + move16(); window[i].v.im = temp[i]; + move16(); } } } @@ -179,10 +187,12 @@ void mdct_window_sine( PWord16 *window, Word16 n ) { const PWord16 *table; table = getSineWindowTable( n ); - for ( int i = 0; i < n / 2; i++ ) + FOR ( Word32 i = 0; i < shr(n, 1); i++ ) { window[i].v.re = table[i].v.re; + move16(); window[i].v.im = table[i].v.im; + move16(); } // PMT("getSineWindowTable needs to be updated for IVAS") } @@ -190,7 +200,7 @@ void mdct_window_sine( PWord16 *window, Word16 n ) else { const float *window_table = 0; - int16_t buf_in_size = 0; + Word16 buf_in_size = 0; switch ( window_type ) { case FULL_OVERLAP: @@ -233,9 +243,9 @@ void mdct_window_sine( PWord16 *window, Word16 n ) void mdct_window_aldo_flt( float *window1, float *window2, - const int16_t n ) + const Word16 n ) { - int16_t i, n1, n2, d; + Word16 i, n1, n2, d; const float *p1, *p2; /* set table pointers and decimation factor */ @@ -394,6 +404,7 @@ void mdct_window_aldo( p1 = window_48kHz_fx + 2; p2 = window_48kHz_fx + 1110 - 3; d = 6; + move16(); BREAK; case 512 / 2: p1 = window_256kHz; @@ -404,11 +415,13 @@ void mdct_window_aldo( p1 = window_48kHz_fx + 1; p2 = window_48kHz_fx + 1110 - 2; d = 3; + move16(); BREAK; case 1024 / 2: p1 = window_256kHz; p2 = window_256kHz + 592 - 1; d = 1; + move16(); BREAK; case 1280 / 2: p1 = window_48kHz_fx + 1; @@ -419,6 +432,7 @@ void mdct_window_aldo( p1 = window_48kHz_fx; p2 = window_48kHz_fx + 1110 - 1; d = 1; + move16(); BREAK; default: assert( 0 ); diff --git a/lib_com/tcx_utils_fx.c b/lib_com/tcx_utils_fx.c index 74149f2be..0191e2a7c 100644 --- a/lib_com/tcx_utils_fx.c +++ b/lib_com/tcx_utils_fx.c @@ -75,7 +75,6 @@ static void tcx_get_windows( *left_overlap = hTcxCfg->tcx_mdct_window_length; move16(); *left_win = hTcxCfg->tcx_aldo_window_1_trunc; - move16(); BREAK; default: assert( !"Not supported overlap" ); @@ -98,7 +97,6 @@ static void tcx_get_windows( *right_overlap = hTcxCfg->tcx_mdct_window_delay; move16(); *right_win = hTcxCfg->tcx_aldo_window_2; - move16(); BREAK; default: assert( !"Not supported overlap" ); @@ -131,10 +129,8 @@ static void tcx_get_windows( BREAK; case FULL_OVERLAP: *left_overlap = hTcxCfg->tcx_mdct_window_lengthFB; - *left_win = hTcxCfg->tcx_aldo_window_1_FB_trunc; - move16(); move16(); - + *left_win = hTcxCfg->tcx_aldo_window_1_FB_trunc; BREAK; default: assert( !"Not supported overlap" ); @@ -161,7 +157,6 @@ static void tcx_get_windows( case FULL_OVERLAP: *right_overlap = hTcxCfg->tcx_mdct_window_delayFB; move16(); - move16(); *right_win = hTcxCfg->tcx_aldo_window_2_FB; BREAK; default: @@ -366,8 +361,8 @@ void tcx_windowing_synthesis_current_frame( { /*signal[i] *= (float)(i)/(float)(acelp_zir_len); signal[i] += acelp_zir[i]*(float)(acelp_zir_len-i)/(float)(acelp_zir_len);*/ - move16(); signal[i] = add( mult_r( signal[i], div_s( i, acelp_zir_len ) ), mult_r( acelp_zir[i], div_s( sub( acelp_zir_len, i ), acelp_zir_len ) ) ); + move16(); } } /* Rectangular window (past-frame is ACELP) */ @@ -706,8 +701,8 @@ void lpc2mdct( Word16 *mdct_gains_exp, Word16 *mdct_inv_gains, Word16 *mdct_inv_gains_exp, - const int16_t length, - const int16_t noInverse ) + const Word16 length, + const Word16 noInverse ) { Word32 ComplexData[2 * FDNS_NPTS]; Word16 i, j, k, sizeN, step, scale, s, tmp16; @@ -739,7 +734,9 @@ void lpc2mdct( FOR( ; i < FDNS_NPTS; i++ ) { ComplexData[2 * i] = L_deposit_l( 0 ); + move32(); ComplexData[2 * i + 1] = L_deposit_l( 0 ); + move32(); } move16(); @@ -873,14 +870,18 @@ void lpc2mdct_2( FOR( i = 0; i < lpcOrder + 1; i++ ) { RealData_fx[i] = L_mult( lpcCoeffs[i], ptwiddle->v.re ); + move32(); ImagData_fx[i] = L_negate( L_mult( lpcCoeffs[i], ptwiddle->v.im ) ); + move32(); ptwiddle += step; } FOR( ; i < sizeN; i++ ) { RealData_fx[i] = L_deposit_l( 0 ); + move32(); ImagData_fx[i] = L_deposit_l( 0 ); + move32(); } /* half length FFT */ @@ -888,7 +889,8 @@ void lpc2mdct_2( BASOP_cfft_ivas( RealData_fx, ImagData_fx, 1, &scale ); /*Get amplitude*/ - j = FDNS_NPTS - 1; + j = sub(FDNS_NPTS , 1); + move16(); k = 0; move16(); @@ -1172,11 +1174,13 @@ void mdct_noiseShaping_ivas_fx( Word16 m, n, k1, k2; j = 0; + move16(); /* FDNS_NPTS = 64 */ k = shr( lg, 6 ); m = s_and( lg, 0x3F ); Word16 max_e = MIN16B; + move16(); FOR( i = 0; i < FDNS_NPTS; i++ ) { max_e = s_max( max_e, add( *x_e, gains_exp[i] ) ); @@ -1223,7 +1227,9 @@ void mdct_noiseShaping_ivas_fx( FOR( l = 0; l < k; l++ ) { x_fx[i] = Mpy_32_16_1( x_fx[i], gains_fx[j] ); + move32(); x_fx[i] = L_shr( x_fx[i], sub( max_e, add( *x_e, gains_exp[j] ) ) ); + move32(); i = add( i, 1 ); } j = add( j, 1 ); @@ -1236,7 +1242,9 @@ void mdct_noiseShaping_ivas_fx( FOR( l = 0; l < k; l++ ) { x_fx[i] = Mpy_32_16_1( x_fx[i], gains_fx[j] ); + move32(); x_fx[i] = L_shr( x_fx[i], sub( max_e, add( *x_e, gains_exp[j] ) ) ); + move32(); i = add( i, 1 ); } j = add( j, 1 ); @@ -1739,7 +1747,6 @@ void tcx_noise_filling( nrg = L_deposit_l( 1 ); win = 0; move16(); - move16(); FOR( ; i < lowpassLine; i++ ) { @@ -1882,11 +1889,11 @@ void tcx_noise_filling_with_shift( } i = add( i, 1 ); segmentOffset = i; + move16(); } nrg = L_deposit_l( 1 ); win = 0; move16(); - move16(); FOR( ; i < lowpassLine; i++ ) { @@ -1909,13 +1916,15 @@ void tcx_noise_filling_with_shift( FOR( m = segmentOffset; m < tmp2; m++ ) { Word16 nrm = 31; - + move16(); Q[m] = Mpy_32_16_1( Q[m], tmp1 ); + move32(); IF( Q[m] ) { nrm = norm_l( Q[m] ); } Q[m] = L_shl( Q[m], nrm ); + move32(); new_Q_e[m] = sub( add( new_Q_e[m], s ), nrm ); move32(); } @@ -1928,11 +1937,13 @@ void tcx_noise_filling_with_shift( Word16 nrm = 31; Q[m] = Mpy_32_16_1( Q[m], tmp1 ); + move32(); IF( Q[m] ) { nrm = norm_l( Q[m] ); } Q[m] = L_shl( Q[m], nrm ); + move32(); new_Q_e[m] = sub( add( new_Q_e[m], s ), nrm ); move32(); win = sub( win, 1 ); @@ -1951,16 +1962,18 @@ void tcx_noise_filling_with_shift( } Word16 nrm = 31; + move16(); Random( &seed ); Q[i] = L_mult0( mult( seed, fac_ns ), win ); + move32(); IF( Q[i] ) { nrm = norm_l( Q[i] ); } Q[i] = L_shl( Q[i], nrm ); - new_Q_e[i] = 31 - nrm; move32(); + new_Q_e[i] = sub(31 , nrm); tmp1 = shr( seed, 4 ); nrg = L_mac0( nrg, tmp1, tmp1 ); /* sum up energy of current noise segment */ @@ -1985,24 +1998,27 @@ void tcx_noise_filling_with_shift( FOR( m = segmentOffset; m < lowpassLine; m++ ) { Word16 nrm = 31; - + move16(); /* at this point: - flt Q[m] = (Q[m] * 2^(new_Q_e[m] - 31)) / (nTransWidth*nTransWidth) - flt tmp1 = (tmp1 * 2^(s - 15)) * (nTransWidth*nTransWidth) */ Q[m] = Mpy_32_16_1( Q[m], tmp1 ); + move32(); IF( Q[m] ) { nrm = norm_l( Q[m] ); } Q[m] = L_shl( Q[m], nrm ); + move32(); new_Q_e[m] = add( new_Q_e[m], s - nrm ); move32(); } } Word16 max_e = 0; + move16(); FOR( i = 0; i < lowpassLine; i++ ) { max_e = s_max( max_e, new_Q_e[i] ); @@ -2011,6 +2027,7 @@ void tcx_noise_filling_with_shift( FOR( i = 0; i < lowpassLine; i++ ) { Q[i] = L_shr( Q[i], sub( max_e, new_Q_e[i] ) ); + move32(); } *Q_e = max_e; @@ -2028,8 +2045,8 @@ void InitTnsConfigs( STnsConfig tnsConfig[2][2], const Word16 igfStopFreq, const Word32 total_brate, - const int16_t element_mode, - const int16_t MCT_flag ) + const Word16 element_mode, + const Word16 MCT_flag ) { IF( GT_32( total_brate, ACELP_32k ) ) { @@ -2063,10 +2080,13 @@ void SetAllowTnsOnWhite( ) { tnsConfig[0][0].allowTnsOnWhite = allowTnsOnWhite; + move16(); tnsConfig[0][1].allowTnsOnWhite = allowTnsOnWhite; + move16(); tnsConfig[1][0].allowTnsOnWhite = allowTnsOnWhite; + move16(); tnsConfig[1][1].allowTnsOnWhite = allowTnsOnWhite; - + move16(); return; } #endif @@ -2216,27 +2236,31 @@ void init_TCX_config( { /* Initialize the TCX MDCT windows */ hTcxCfg->tcx_mdct_window_length = extract_l( L_shr( L_mult0( L_LOOK_12k8, fscale ), LD_FSCALE_DENOM ) ); + move16(); hTcxCfg->tcx_mdct_window_delay = hTcxCfg->tcx_mdct_window_length; move16(); hTcxCfg->tcx_mdct_window_half_length = extract_l( L_shr( L_mult0( L_LOOK_12k8 - NS2SA( 12800, 5000000L ), fscale ), LD_FSCALE_DENOM ) ); - - hTcxCfg->tcx_mdct_window_min_length = shr( L_frame, 4 ); /* 1.25ms */ + move16(); + hTcxCfg->tcx_mdct_window_min_length = shr( L_frame, 4 ); /* 1.25ms */ + move16(); hTcxCfg->tcx_mdct_window_trans_length = shr( L_frame, 4 ); /* 1.25ms */ - + move16(); hTcxCfg->tcx5Size = shr( L_frame, 2 ); /* 5ms */ - + move16(); hTcxCfg->tcx_mdct_window_lengthFB = extract_l( L_shr( L_mult0( L_LOOK_12k8, fscaleFB ), LD_FSCALE_DENOM ) ); + move16(); hTcxCfg->tcx_mdct_window_delayFB = hTcxCfg->tcx_mdct_window_lengthFB; move16(); hTcxCfg->tcx_mdct_window_half_lengthFB = extract_l( L_shr( L_mult0( L_LOOK_12k8 - NS2SA( 12800, 5000000L ), fscaleFB ), LD_FSCALE_DENOM ) ); - - hTcxCfg->tcx_mdct_window_min_lengthFB = shr( L_frameTCX, 4 ); /* 1.25ms */ + move16(); + hTcxCfg->tcx_mdct_window_min_lengthFB = shr( L_frameTCX, 4 ); /* 1.25ms */ + move16(); hTcxCfg->tcx_mdct_window_trans_lengthFB = shr( L_frameTCX, 4 ); /* 1.25ms */ - + move16(); hTcxCfg->tcx5SizeFB = shr( L_frameTCX, 2 ); /* 5ms */ - + move16(); mdct_window_sine( hTcxCfg->tcx_mdct_window, hTcxCfg->tcx_mdct_window_length ); mdct_window_sine( hTcxCfg->tcx_mdct_window_half, hTcxCfg->tcx_mdct_window_half_length ); mdct_window_sine( hTcxCfg->tcx_mdct_window_minimum, hTcxCfg->tcx_mdct_window_min_length ); diff --git a/lib_com/tec_com.c b/lib_com/tec_com.c index 5e32b842b..dd902ea4b 100644 --- a/lib_com/tec_com.c +++ b/lib_com/tec_com.c @@ -80,6 +80,7 @@ static Word32 calcVar_Fix( xx = L_deposit_l( 0 ); *x = L_deposit_l( 0 ); + move32(); FOR( i = 0; i < len; i++ ) { @@ -88,6 +89,7 @@ static Word32 calcVar_Fix( tmpX = L_shr( Mpy_32_32( L_shl( in[i], exp1 ), L_shl( in[i], exp1 ) ), r_sft ); xx = L_add( xx, tmpX ); *x = L_add( *x, in[i] ); + move32(); } ans = L_deposit_l( 0 ); @@ -213,11 +215,13 @@ static Word32 calcCorrelationCoefficient2_Fix( r_sft = sub( 8, sub( exp2, exp1 ) ); tmpCor = L_shr( tmpCor, r_sft ); ans = tmpCor; + move16(); } } ELSE { ans = 0; + move16(); } @@ -256,7 +260,6 @@ static void calcLoBufferEnc_Fx( li = TecLowBandTable[lb]; move16(); ui = sub( TecLowBandTable[lb + 1], 1 ); - move16(); assert( ( ui - li ) == 1 ); @@ -283,6 +286,7 @@ static void calcLoBufferEnc_Fx( /* 0.50171665944 = 10 * log10(2.0) / NbTecLowBand / 2.0 */ loBuffer[slot] = extract_h( L_shl( Mpy_32_16_1( tmp, 16440 /*0.50171665944 Q15*/ ), 1 ) ); + move16(); } } /*------------------------------------------------------------------- @@ -318,7 +322,6 @@ static void calcHiTempEnv_Fx( normFac = getNormReciprocalWord16( bwHigh ); scale = sub( scale, s1 ); - move16(); FOR( timeIndex = startPos; timeIndex < stopPos; timeIndex++ ) { @@ -350,6 +353,7 @@ static void calcHiTempEnv_Fx( /* 0.75257498916 = 10 * log10(2.0) / 4.0 */ hiTempEnv[timeIndex] = extract_h( L_shr( L_shl( Mpy_32_16_1( nrgLog, 24660 /*0.75257498916 Q15*/ ), 2 ), 1 ) ); + move16(); } } /*------------------------------------------------------------------- @@ -433,6 +437,7 @@ static void calcLoBufferDec_Fx( /* 0.50171665944 = 10 * log10(2.0) / NbTecLowBand / 2.0 */ loBuffer[slot] = extract_h( L_shl( Mpy_32_16_1( tmp, 16440 /*0.50171665944 Q15*/ ), 1 ) ); + move16(); } ELSE { @@ -468,6 +473,7 @@ static void calcLoTempEnv_Fx( } /* adjFac is scaled by factor 0.5 */ loTempEnv_Fx[slot] = extract_h( Mpy_32_16_1( accu, adjFac_Fx ) ); + move16(); } } /*------------------------------------------------------------------- @@ -506,6 +512,7 @@ static void calcLoTempEnv_TBE_Fx( Word16 slot; Word32 accu; Word16 delay = 1; + move16(); /* TecSC_Fx values are scaled by factor 2.0 */ FOR( slot = 0; slot < noCols; slot++ ) @@ -517,6 +524,7 @@ static void calcLoTempEnv_TBE_Fx( } /* adjFac is scaled by factor 0.5 */ loTempEnv_Fx[slot] = extract_h( Mpy_32_16_1( accu, adjFac_Fx ) ); + move16(); } } /*------------------------------------------------------------------- @@ -534,6 +542,7 @@ static void calcLoTempEnv_ns_TBE_Fx( Word16 slot; Word16 delay = 1; Word16 fac = 22938 /*1.4f * 0.5f Q15*/; + move16(); FOR( slot = 0; slot < noCols; slot++ ) { @@ -605,7 +614,9 @@ static void calcGainLinear_TBE_Fx( s2 = norm_l( tmp32 ); pGainTemp_m[slot] = extract_h( L_shl( tmp32, s2 ) ); + move16(); pGainTemp_e[slot] = sub( s, s2 ); + move16(); } } /*------------------------------------------------------------------- @@ -629,10 +640,11 @@ void calcGainTemp_TBE_Fx( Word16 loTempEnv_Fx[16]; const Word16 BW_LO = TecLowBandTable[NBTECLOWBAND]; + move16(); Word16 slot; - Word16 noCols = stopPos - startPos; - Word16 bandOffset = lowSubband - BW_LO; + Word16 noCols = sub( stopPos, startPos ); + Word16 bandOffset = sub( lowSubband, BW_LO ); assert( lowSubband >= BW_LO ); @@ -643,7 +655,7 @@ void calcGainTemp_TBE_Fx( startPos, stopPos, bandOffset, - 15 - cldfb_exp ); + sub(15 , cldfb_exp) ); IF( code > 0 ) { @@ -680,7 +692,9 @@ static void setSubfrConfig_Fix( const Word16 l_subfr ) { *n_subfr = sub( (Word16) N_TEC_TFA_SUBFR, i_offset ); + move16(); *k_offset = i_mult( i_offset, l_subfr ); + move16(); } /*------------------------------------------------------------------- * calcSum_Fx() @@ -699,7 +713,9 @@ static Word16 calcSum_Fx( Word16 sum16_e; /* Q0 */ *sum16_m = 0; + move16(); sum16_e = 0; + move16(); FOR( slot = 0; slot < len; slot++ ) { sum16_e = BASOP_Util_Add_MantExp( @@ -731,8 +747,10 @@ static Word16 calcSubfrNrg_Fx( Word16 s; Word16 sum16_e; Word16 s2 = 1; /* headroom for a summatoin of length l_subfr ( < 2^s2 = 64 ) */ + move16(); k = k_offset; + move16(); FOR( i = i_offset; i < N_TEC_TFA_SUBFR; i++ ) { Word32 nrg32; @@ -771,7 +789,9 @@ static Word16 calcSubfrNrg_Fx( } sum16_e = enr_e[i_offset]; + move16(); *sum16_m = enr_m[i_offset]; + move16(); FOR( i = i_offset + 1; i < N_TEC_TFA_SUBFR; i++ ) { @@ -868,6 +888,7 @@ static Word16 procTec_Fx( gain_ave_e = sub( gain_ave_e, 3 ); k = k_offset; + move16(); FOR( i = i_offset; i < N_TEC_TFA_SUBFR; i++ ) { IF( enr_m[i] > 0 ) @@ -943,7 +964,6 @@ static Word16 procTec_Fx( lower_limit_gain_m = min_curr_enr_m; move16(); lower_limit_gain_e = sub( min_curr_enr_e, 1 ); - move16(); } /* upper_limit_gain */ @@ -977,7 +997,9 @@ static Word16 procTec_Fx( } gain_m[i] = mult_r( gain_m[i], inv_curr_enr_m[i] ); + move16(); gain_e[i] = add( gain_e[i], inv_curr_enr_e[i] ); + move16(); s = norm_s( gain_m[i] ); gain_m[i] = shl( gain_m[i], s ); @@ -1009,8 +1031,7 @@ static Word16 procTec_Fx( s = norm_s( hb_synth_Fx[k] ); hb_synth_Fx[k] = mult_r( gain_m[i], shl( hb_synth_Fx[k], s ) ); move16(); - shift[k] = s - gain_e[i]; - move16(); + shift[k] = sub(s , gain_e[i]); if ( GT_16( min_shift, shift[k] ) ) { @@ -1040,6 +1061,7 @@ static Word16 procTec_Fx( min_shift = sub( s, exp_syn ); /* exp_syn(old) - exp_syn(new) */ k = k_offset; + move16(); FOR( i = i_offset; i < N_TEC_TFA_SUBFR; i++ ) { FOR( j = 0; j < l_subfr; j++ ) @@ -1203,6 +1225,7 @@ Word16 procTecTfa_TBE_Fx( Word16 code ) { Word16 i_offset = 0; + move16(); Word16 exp_syn_frame = sub( 15, hb_synth_fx_exp ); @@ -1252,8 +1275,11 @@ void calcHiEnvLoBuff_Fix( Word16 pCldfbPow_FixScale ) { const Word16 BW_LO = TecLowBandTable[NBTECLOWBAND]; + move16(); const Word16 lowSubband = pFreqBandTable[0]; + move16(); const Word16 highSubband = pFreqBandTable[nSfb]; + move16(); Word16 bandOffsetBottom; @@ -1300,7 +1326,9 @@ void calcLoEnvCheckCorrHiLo_Fix( ) { const Word16 BW_LO = TecLowBandTable[NBTECLOWBAND]; + move16(); const Word16 lowSubband = pFreqBandTable[0]; + move16(); Word16 i; Word16 bandOffsetBottom; @@ -1317,6 +1345,7 @@ void calcLoEnvCheckCorrHiLo_Fix( Word32 EQ4, EQ5, EQ6; Word16 code = 0; /* SET TENTATIVELY */ + move16(); Word32 loVar_ns_Fix; Word32 diff_hi_lo_sum_Fix; Word32 loSum_ns_Fix; @@ -1335,6 +1364,7 @@ void calcLoEnvCheckCorrHiLo_Fix( FOR( i = 0; i < noCols + DELAY_TEMP_ENV_BUFF_TEC; i++ ) { hiTempEnv32_Fix[i] = L_deposit_l( hiTempEnv[i] ); + move32(); } hiVar_Fix = calcVar_Fix( hiTempEnv32_Fix, (Word32) noCols, &hiSum_Fix ); @@ -1355,6 +1385,7 @@ void calcLoEnvCheckCorrHiLo_Fix( FOR( i = 0; i < noCols; i++ ) { loTempEnv32_ns_Fix[i] = L_deposit_l( loTempEnv_ns_Fix[i] ); + move32(); } loVar_ns_Fix = calcVar_Fix( loTempEnv32_ns_Fix, noCols, &loSum_ns_Fix ); @@ -1362,9 +1393,9 @@ void calcLoEnvCheckCorrHiLo_Fix( EQ4 = L_sub( L_shr( hiVar_Fix, 7 ), 800 ); EQ5 = L_sub( L_shr( loVar_ns_Fix, 7 ), 720 ); - ; + EQ6 = L_sub( L_shr( diff_hi_lo_sum_Fix, 7 ), 100 ); - ; + test(); test(); if ( EQ4 > 0 && EQ5 > 0 && EQ6 < 0 ) @@ -1393,6 +1424,7 @@ void calcLoEnvCheckCorrHiLo_Fix( move16(); maxPosHi = maxPosLo = 0; move16(); + move16(); FOR( i = 1; i < noCols; i++ ) { if ( LT_16( maxHiFix, hiTempEnv[i] ) ) @@ -1419,12 +1451,14 @@ void calcLoEnvCheckCorrHiLo_Fix( { Word16 feature_max_Fix = 0; + move16(); Word16 pos_feature_max = 0; + move16(); Word16 feature_Fix[16]; Word16 min_local_Fix, max_local_Fix; Word16 j; Word16 len_window = EXT_DELAY_HI_TEMP_ENV + 1; - + move16(); Word16 *curr_pos_Fix = hiTempEnv; move16(); @@ -1458,6 +1492,7 @@ void calcLoEnvCheckCorrHiLo_Fix( } } feature_Fix[i] = sub( max_local_Fix, min_local_Fix ); + move16(); if ( LT_16( feature_max_Fix, feature_Fix[i] ) ) { @@ -1492,6 +1527,7 @@ void calcLoEnvCheckCorrHiLo_Fix( FOR( i = 0; i < noCols; i++ ) { loTempEnv32_Fix[i] = L_deposit_l( loTempEnv_Fix[i] ); + move32(); } loVar_Fix = calcVar_Fix( loTempEnv32_Fix, noCols, &loSum_Fix ); /* = = */ diff --git a/lib_com/tns_base.c b/lib_com/tns_base.c index f89bf8e07..4e2ce9c96 100644 --- a/lib_com/tns_base.c +++ b/lib_com/tns_base.c @@ -90,6 +90,7 @@ void InitTnsConfiguration( const int16_t is_mct ) { Word16 iFilter = 0; + move16(); Word16 *startLineFilter; Word32 L_tmp; Word32 nSampleRate; @@ -105,7 +106,9 @@ void InitTnsConfiguration( /* Sanity checks */ assert( ( nSampleRate > 0 ) && ( frameLength > 0 ) && ( pTnsConfig != NULL ) ); - if ( ( nSampleRate <= 0 ) || ( frameLength <= 0 ) || ( pTnsConfig == NULL ) ) + test(); + test(); + IF ( ( nSampleRate <= 0 ) || ( frameLength <= 0 ) || ( pTnsConfig == NULL ) ) { return /*TNS_FATAL_ERROR*/; } @@ -250,8 +253,6 @@ void ApplyTnsFilter( Word16 parCoeff[TNS_MAX_FILTER_ORDER]; const STnsFilter *pFilter; - - move16(); move16(); move16(); pFilter = &pTnsData->filter[iFilter]; @@ -309,6 +310,7 @@ Word16 ITF_Detect_fx( Word16 spectrumLength; Word16 const nSubdivisions = MAX_SUBDIVISIONS; + move16(); Word16 iSubdivisions; Word16 iStartLine; Word16 iEndLine; @@ -323,6 +325,7 @@ Word16 ITF_Detect_fx( Word16 n, i; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif move16(); @@ -357,7 +360,7 @@ Word16 ITF_Detect_fx( /* Check threshold HLM_MIN_NRG */ BASOP_SATURATE_WARNING_OFF_EVS; #ifdef BASOP_NOGLOB - tmp32 = L_sub( L_shl_o( L_tmp, sub( shift, 24 - Q ), &Overflow ), 4194304l /*HLM_MIN_NRG Q7*/ ); + tmp32 = L_sub( L_shl_o( L_tmp, sub( shift, sub(24 , Q) ), &Overflow ), 4194304l /*HLM_MIN_NRG Q7*/ ); #else /* BASOP_NOGLOB */ tmp32 = L_sub( L_shl( L_tmp, sub( shift, 24 - Q ) ), 4194304l /*HLM_MIN_NRG Q7*/ ); #endif @@ -435,6 +438,7 @@ Word16 ITF_Detect_fx( FOR( i = 0; i < n; i++ ) { tmpbuf[i] = round_fx( L_shl( pSpectrum[iStartLine + i - IGF_START_MN], shift ) ); + move16(); } FOR( lag = 0; lag <= maxOrder; lag++ ) @@ -464,6 +468,7 @@ Word16 ITF_Detect_fx( ITF_GetFilterParameters_fx( rxx, s_min( maxOrder, shr( spectrumLength, 2 ) ), A, Q_A, predictionGain ); *curr_order = maxOrder; + move16(); } return 1; @@ -483,6 +488,7 @@ Word16 ITF_Detect_ivas_fx( Word16 spectrumLength; Word16 const nSubdivisions = MAX_SUBDIVISIONS; + move16(); Word16 iSubdivisions; Word16 iStartLine; Word16 iEndLine; @@ -497,6 +503,7 @@ Word16 ITF_Detect_ivas_fx( Word16 n, i; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif move16(); @@ -605,6 +612,7 @@ Word16 ITF_Detect_ivas_fx( FOR( i = 0; i < n; i++ ) { tmpbuf[i] = round_fx_o( L_shl( pSpectrum[iStartLine + i - IGF_START_MN], shift ), &Overflow ); + move16(); } FOR( lag = 0; lag <= maxOrder; lag++ ) @@ -634,6 +642,7 @@ Word16 ITF_Detect_ivas_fx( ITF_GetFilterParameters_fx( rxx, s_min( maxOrder, shr( spectrumLength, 2 ) ), A, Q_A, predictionGain ); *curr_order = maxOrder; + move16(); } return 1; @@ -672,6 +681,8 @@ static Word16 DecodeUsingTable( Decoder_State *st, Word16 *pValue, const Coding { Word16 code = 0; Word16 nBits = 0; + move16(); + move16(); Word16 valueIndex; assert( ( nSize >= 0 ) && ( nSize <= 256 ) ); @@ -688,7 +699,9 @@ static Word16 DecodeUsingTable( Decoder_State *st, Word16 *pValue, const Coding if ( nBits > nSize || nBits > 16 ) { st->BER_detect = 1; + move16(); *pValue = 0; + move16(); return -1; } @@ -705,12 +718,14 @@ static Word16 DecodeUsingTable( Decoder_State *st, Word16 *pValue, const Coding if ( valueIndex < nSize ) { *pValue = (Word16) codes[valueIndex].value; + move16(); } else { st->BER_detect = 1; + move16(); *pValue = 0; - + move16(); return -1; } @@ -723,12 +738,14 @@ static Word16 DecodeUsingTable( Decoder_State *st, Word16 *pValue, const Coding void const *GetTnsFilterCoeff( void const *p, const Word16 index, Word16 *pValue ) { *pValue = ( (Word16 const *) p )[index] + INDEX_SHIFT; + move16(); return NULL; } void *SetTnsFilterCoeff( void *p, const Word16 index, const Word16 value ) { ( (Word16 *) p )[index] = sub( value, INDEX_SHIFT ); + move16(); return NULL; } @@ -884,15 +901,17 @@ Word16 DecodeTnsFilterOrder( Decoder_State *st, const Word16 index, Word16 *pVal void const *GetNumOfTnsFilters( void const *p, const Word16 index, Word16 *pValue ) { + move16(); *pValue = ( (STnsData const *) p )[index].nFilters; - + move16(); return ( (STnsData const *) p )[index].filter; } void *SetNumOfTnsFilters( void *p, const Word16 index, Word16 value ) { + move16(); ( (STnsData *) p )[index].nFilters = value; - + move16(); return ( (STnsData *) p )[index].filter; } @@ -951,7 +970,9 @@ void const *GetTnsEnabledSingleFilter( void const *p, const Word16 index, Word16 void *SetTnsEnabledSingleFilter( void *p, const Word16 index, const Word16 value ) { + move16(); ( (STnsData *) p )[index].nFilters = value; + move16(); return ( (STnsData *) p )[index].filter; } @@ -966,13 +987,18 @@ void ResetTnsData( STnsData *pTnsData ) pTnsData->nFilters = 0; + move16(); pTnsData->tnsOnWhitenedSpectra = 0; + move16(); FOR( iFilter = 0; iFilter < (Word16) ( sizeof( pTnsData->filter ) / sizeof( pTnsData->filter[0] ) ); iFilter++ ) { STnsFilter *const pTnsFilter = &pTnsData->filter[iFilter]; pTnsFilter->spectrumLength = 0; + move16(); pTnsFilter->predictionGain = ONE_IN_Q7; + move16(); pTnsFilter->avgSqrCoef = 0; + move16(); /* TODO: remove float _flt dependencies */ pTnsFilter->predictionGain_flt = 1.0f; pTnsFilter->avgSqrCoef_flt = 0; @@ -990,6 +1016,7 @@ void ClearTnsFilterCoefficients( { move16(); pTnsFilter->order = 0; + move16(); assert( TNS_MAX_FILTER_ORDER == 8 ); move16(); move16(); @@ -1194,6 +1221,7 @@ static void ITF_GetFilterParameters_fx( L_tmp = E_LPC_schur( rxx, parCoeff, epsP, maxOrder ); BASOP_SATURATE_WARNING_OFF_EVS /* Allow saturation, this value is compared against a threshold. */ *predictionGain = divide3232( L_shr( epsP[0], PRED_GAIN_E ), L_tmp ); + move16(); BASOP_SATURATE_WARNING_ON_EVS { @@ -1202,8 +1230,9 @@ static void ITF_GetFilterParameters_fx( /* Convert ParCor / reflection coefficients to LPC */ A32[0] = 134217728l /*1.0 Q27*/; - move16(); /* Q11+16 */ + move32(); /* Q11+16 */ A32[1] = L_shr( L_deposit_h( parCoeff[0] ), 4 ); /* Q11+16 */ + move32(); FOR( i = 1; i < maxOrder; i++ ) { @@ -1239,8 +1268,10 @@ static void ITF_GetFilterParameters_fx( FOR( i = 0; i < maxOrder; i++ ) { A[i] = round_fx( L_shl( A32[i], tmp ) ); /* Q11+tmp */ + move16(); } *Q_A = add( 11, tmp ); + move16(); } return; } -- GitLab From 434df91271ea1c7a72a8f8d329efa2d4acb63c81 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Thu, 11 Jul 2024 12:28:03 +0530 Subject: [PATCH 2/2] Clang formatting changes --- lib_com/tcq_position_arith_fx.c | 18 +++++------ lib_com/tcx_ltp_fx.c | 54 ++++++++++++++++----------------- lib_com/tcx_mdct_fx.c | 34 ++++++++++----------- lib_com/tcx_mdct_window.c | 14 ++++----- lib_com/tcx_utils_fx.c | 4 +-- lib_com/tec_com.c | 4 +-- lib_com/tns_base.c | 4 +-- 7 files changed, 66 insertions(+), 66 deletions(-) diff --git a/lib_com/tcq_position_arith_fx.c b/lib_com/tcq_position_arith_fx.c index 459c4d362..2ae9e8505 100644 --- a/lib_com/tcq_position_arith_fx.c +++ b/lib_com/tcq_position_arith_fx.c @@ -107,8 +107,8 @@ static UWord32 bitstream_load_bit( } curPos = &pBS->curPos; - bit = UL_and( UL_lshr( pBS->buf[pBS->numByte] , ( *curPos ) ) , 0x00000001 ); - *curPos = sub(*curPos, 1); + bit = UL_and( UL_lshr( pBS->buf[pBS->numByte], ( *curPos ) ), 0x00000001 ); + *curPos = sub( *curPos, 1 ); IF( *curPos < 0 ) { @@ -406,7 +406,7 @@ static Word16 ar_decode_fx( arInst->value = value; move32(); - return ( sub(symbol , 1) ); + return ( sub( symbol, 1 ) ); } void ar_decoder_done_fx( @@ -668,7 +668,7 @@ static void TCQnew_fx( move16(); if ( v_fx[i - 1] <= 0 ) { - vout_fx[i - 1] = negate(quant_fx[position][i]); + vout_fx[i - 1] = negate( quant_fx[position][i] ); move16(); } position = path_fx[position][i]; @@ -1739,7 +1739,7 @@ Word32 encode_position_ari_fx( btcq_fx = GetBitsFromPulses_fx( pulses, size ); /* Estimate TCQ bits */ bits_fx = L_sub( table_logcum_fx[size + 1], L_add( table_logcum_fx[nz + 1], table_logcum_fx[add( sub( size, nz ), 1 )] ) ); - bits_fx = L_add( bits_fx, L_sub( btcq_fx, L_sub( table_logcum_fx[size + 1], L_add( table_logcum_fx[nz + 1], table_logcum_fx[add(sub( size, nz ) , 1)] ) ) ) ); + bits_fx = L_add( bits_fx, L_sub( btcq_fx, L_sub( table_logcum_fx[size + 1], L_add( table_logcum_fx[nz + 1], table_logcum_fx[add( sub( size, nz ), 1 )] ) ) ) ); bits_fx = L_sub( bits_fx, L_sub( table_logcum_fx[pulses], L_add( table_logcum_fx[nz], table_logcum_fx[add( pulses, sub( nz, 1 ) )] ) ) ); bits_fx = L_sub( bits_fx, nz ); *est_bits_frame_fx = L_add( *est_bits_frame_fx, bits_fx ); @@ -2217,7 +2217,7 @@ void decode_position_ari_fx( test(); test(); - IF( ( mode_num_nz[sub( sub( add( i, 1 ), storepos ), stpos )] == 0 && scp > 0 ) || EQ_16( mode_num_nz[sub( sub( i, storepos ), stpos )] , mode_num_nz[sub( sub( add( i, 1 ), storepos ), stpos )] ) ) + IF( ( mode_num_nz[sub( sub( add( i, 1 ), storepos ), stpos )] == 0 && scp > 0 ) || EQ_16( mode_num_nz[sub( sub( i, storepos ), stpos )], mode_num_nz[sub( sub( add( i, 1 ), storepos ), stpos )] ) ) { mode_num_nz[sub( sub( add( i, 1 ), storepos ), stpos )] = 0; move16(); @@ -2405,7 +2405,7 @@ void decode_magnitude_usq_fx( } magnp = sub( magnp, out[magncout] ); magnzp = sub( magnzp, 1 ); - magncout = add(magncout, 1 ); + magncout = add( magncout, 1 ); IF( magnzp == 0 ) /* last magnitude generation */ { @@ -2528,7 +2528,7 @@ void decode_mangitude_tcq_fx( exp1 = sub( norm_s( num ), 1 ); exp2 = norm_s( denum ); prob1_fx = div_s( shl( num, exp1 ), shl( denum, exp2 ) ); /*15 + exp1 - exp2 */ - exp = add(15 , sub(exp1 , exp2)); + exp = add( 15, sub( exp1, exp2 ) ); prob1_fx = shl( prob1_fx, sub( 15, exp ) ); prob0_fx = sub( MAX_16, prob1_fx ); } @@ -2658,7 +2658,7 @@ Word16 GetScale_fx( Word16 pulses = MAX_PULSES, p_est, exp, exp1, exp2, magicnum; move16(); Word32 t, a, b, ab, estbits_fx = 0; - move32(); + move32(); magicnum = 24773; move16(); /*Q17: 0.188992013101951f; */ diff --git a/lib_com/tcx_ltp_fx.c b/lib_com/tcx_ltp_fx.c index 88481ad6f..134e861af 100644 --- a/lib_com/tcx_ltp_fx.c +++ b/lib_com/tcx_ltp_fx.c @@ -665,7 +665,7 @@ static void tcx_ltp_synth_filter32( L_tmp2 = Mpy_32_16_r( L_tmp2, alpha ); #ifdef BASOP_NOGLOB L_tmp2 = L_add_sat( synth[j], L_tmp2 ); - if( zir != NULL ) + if ( zir != NULL ) { L_tmp2 = L_sub_sat( L_tmp2, zir[j] ); } @@ -728,7 +728,7 @@ static void tcx_ltp_synth_filter_10( assert( GE_16( filtIdx, 0 ) ); w0 = &tcxLtpFilters[filtIdx].filt[pitch_fr]; - w1 = &tcxLtpFilters[filtIdx].filt[sub(pitch_res , pitch_fr)]; + w1 = &tcxLtpFilters[filtIdx].filt[sub( pitch_res, pitch_fr )]; v0 = &tcxLtpFilters[filtIdx].filt[0]; v1 = &tcxLtpFilters[filtIdx].filt[pitch_res]; L = tcxLtpFilters[filtIdx].length; @@ -797,7 +797,7 @@ static void tcx_ltp_synth_filter_10_fx( curr_gain = gain; move16(); - gain_step = idiv1616(negate( gain ) , length); // TODO + gain_step = idiv1616( negate( gain ), length ); // TODO for ( j = 0; j < length; j++ ) { @@ -861,8 +861,8 @@ static void tcx_ltp_synth_filter_01( curr_gain = 0; move16(); - //gain_step = gain / length; // TODO - gain_step = idiv1616(gain , length); // TODO + // gain_step = gain / length; // TODO + gain_step = idiv1616( gain, length ); // TODO for ( j = 0; j < length; j++ ) { @@ -927,8 +927,8 @@ static void tcx_ltp_synth_filter_01_fx( curr_gain = 0; move16(); - //gain_step = gain / length; // TODO - gain_step = idiv1616(gain , length); // TODO + // gain_step = gain / length; // TODO + gain_step = idiv1616( gain, length ); // TODO for ( j = 0; j < length; j++ ) { @@ -1027,8 +1027,8 @@ static void tcx_ltp_synth_filter_11_unequal_pitch( /* 1. decreasing gain filter. The first filter unit with the parameters associated to the previous interval and scaling towards 0 */ gain = prev_gain; move16(); - //gain_step = negate( prev_gain ) / length; // TODO - gain_step = idiv1616(negate( prev_gain ) , length); // TODO + // gain_step = negate( prev_gain ) / length; // TODO + gain_step = idiv1616( negate( prev_gain ), length ); // TODO for ( j = 0; j < length; j++ ) { @@ -1156,7 +1156,7 @@ static void tcx_ltp_synth_filter_11_unequal_pitch_fx( /* 1. decreasing gain filter. The first filter unit with the parameters associated to the previous interval and scaling towards 0 */ gain = prev_gain; move16(); - gain_step = idiv1616(negate( prev_gain ) , length); // TODOD + gain_step = idiv1616( negate( prev_gain ), length ); // TODOD for ( j = 0; j < length; j++ ) { @@ -1359,7 +1359,7 @@ void tcx_ltp_post( move16(); } - IF( core == ACELP_CORE ) + IF( core == ACELP_CORE ) { bfi = 0; move16(); @@ -1421,7 +1421,7 @@ void tcx_ltp_post( ELSE IF( bfi == 0 ) { /* LTP and good frame */ - test(); + test(); IF( EQ_16( st->element_mode, EVS_MONO ) ) /* hard tunings for EVS_MONO, HQ_48k is TCX only */ { IF( NE_16( output_frame, L_frame_core ) ) @@ -1580,20 +1580,20 @@ void tcx_ltp_post( test(); // #ifdef IVAS_CODE_TCX_LTP if ( st->element_mode != EVS_MONO ) - { - test(); - test(); - test(); - test(); - test(); - test(); - test(); - IF ( gain == 0 && hTcxLtpDec->tcxltp_gain_post_prev == 0 ) + { + test(); + test(); + test(); + test(); + test(); + test(); + test(); + IF( gain == 0 && hTcxLtpDec->tcxltp_gain_post_prev == 0 ) { /* The filtering is deactivated, just copy input to the output */ Copy( sig_in + delay, sig_out + delay, L_transition ); } - ELSE IF ( gain == 0 && hTcxLtpDec->tcxltp_gain_post_prev != 0 ) + ELSE IF( gain == 0 && hTcxLtpDec->tcxltp_gain_post_prev != 0 ) { /* Filtering with the first filter unit */ tcx_ltp_synth_filter_10( sig_out + delay, sig_in + delay, L_transition, hTcxLtpDec->tcxltp_pitch_int_post_prev, hTcxLtpDec->tcxltp_pitch_fr_post_prev, hTcxLtpDec->tcxltp_gain_post_prev, st->pit_res_max_past, hTcxLtpDec->tcxltp_filt_idx_prev ); @@ -1603,7 +1603,7 @@ void tcx_ltp_post( /* Filtering with the second filter unit */ tcx_ltp_synth_filter_01( sig_out + delay, sig_in + delay, L_transition, pitch_int, pitch_fr, gain, st->pit_res_max, filtIdx ); } - ELSE IF( EQ_16(gain ,hTcxLtpDec->tcxltp_gain_post_prev )&& EQ_16(pitch_int , hTcxLtpDec->tcxltp_pitch_int_post_prev) && EQ_16(pitch_fr, hTcxLtpDec->tcxltp_pitch_fr_post_prev )&& EQ_16(st->pit_res_max , st->pit_res_max_past )&& EQ_16(filtIdx , hTcxLtpDec->tcxltp_filt_idx_prev )) + ELSE IF( EQ_16( gain, hTcxLtpDec->tcxltp_gain_post_prev ) && EQ_16( pitch_int, hTcxLtpDec->tcxltp_pitch_int_post_prev ) && EQ_16( pitch_fr, hTcxLtpDec->tcxltp_pitch_fr_post_prev ) && EQ_16( st->pit_res_max, st->pit_res_max_past ) && EQ_16( filtIdx, hTcxLtpDec->tcxltp_filt_idx_prev ) ) { tcx_ltp_synth_filter( sig_out + delay, sig_in + delay, L_transition, pitch_int, pitch_fr, gain, st->pit_res_max, NULL, 0, filtIdx ); } @@ -1617,10 +1617,10 @@ void tcx_ltp_post( ELSE // #endif { - test(); - test(); - test(); - test(); + test(); + test(); + test(); + test(); IF( gain == 0 && hTcxLtpDec->tcxltp_gain_post_prev == 0 ) { Copy( sig_in + delay, sig_out + delay, L_transition ); diff --git a/lib_com/tcx_mdct_fx.c b/lib_com/tcx_mdct_fx.c index 1281c027e..f66ea0d76 100644 --- a/lib_com/tcx_mdct_fx.c +++ b/lib_com/tcx_mdct_fx.c @@ -126,7 +126,7 @@ void TCX_MDCT( FOR( i = 0; i < l / 2; i++ ) { #ifdef BASOP_NOGLOB - y[add( add( add( shr( m, 1 ), shr( r, 1 ) ), shr( m, 1 ) ), i )] = L_msu_o( L_mult( x[i], factor ), x[sub(sub(l , 1) , i)], factor, &Overflow ); + y[add( add( add( shr( m, 1 ), shr( r, 1 ) ), shr( m, 1 ) ), i )] = L_msu_o( L_mult( x[i], factor ), x[sub( sub( l, 1 ), i )], factor, &Overflow ); #else /* BASOP_NOGLOB */ y[m / 2 + r / 2 + m / 2 + i] = L_msu( L_mult( x[i], factor ), x[l - 1 - i], factor ); #endif /* BASOP_NOGLOB */ @@ -134,13 +134,13 @@ void TCX_MDCT( } FOR( i = 0; i < m / 2; i++ ) { - y[sub( sub( add( shr( m, 1 ), shr( r, 1 ) ), 1 ), i )] = L_mult( x[add(add(l , shr(m ,1)) , i)], neg_factor ); + y[sub( sub( add( shr( m, 1 ), shr( r, 1 ) ), 1 ), i )] = L_mult( x[add( add( l, shr( m, 1 ) ), i )], neg_factor ); move32(); } FOR( i = 0; i < r / 2; i++ ) { #ifdef BASOP_NOGLOB - y[sub( sub( sub( add( shr( m, 1 ), shr( r, 1 ) ), 1 ), shr( m, 1 ) ), i )] = L_mac_o( L_mult( x[add( add( l, m ), i )], neg_factor ), x[sub(sub(add(add(l , m) , r) , 1) , i)], neg_factor, &Overflow ); + y[sub( sub( sub( add( shr( m, 1 ), shr( r, 1 ) ), 1 ), shr( m, 1 ) ), i )] = L_mac_o( L_mult( x[add( add( l, m ), i )], neg_factor ), x[sub( sub( add( add( l, m ), r ), 1 ), i )], neg_factor, &Overflow ); #else /* BASOP_NOGLOB */ y[m / 2 + r / 2 - 1 - m / 2 - i] = L_mac( L_mult( x[l + m + i], neg_factor ), x[l + m + r - 1 - i], neg_factor ); #endif /* BASOP_NOGLOB */ @@ -189,7 +189,7 @@ void TCX_MDST( FOR( i = 0; i < l / 2; i++ ) { #ifdef BASOP_NOGLOB - y[add( add( add( shr(m ,1), shr(r ,1) ), shr(m ,1) ), i )] = L_msu_o( L_mult( x[i], neg_factor ), x[sub(sub(l , 1) , i)], factor, &Overflow ); + y[add( add( add( shr( m, 1 ), shr( r, 1 ) ), shr( m, 1 ) ), i )] = L_msu_o( L_mult( x[i], neg_factor ), x[sub( sub( l, 1 ), i )], factor, &Overflow ); #else /* BASOP_NOGLOB */ y[m / 2 + r / 2 + m / 2 + i] = L_msu( L_mult( x[i], neg_factor ), x[l - 1 - i], factor ); #endif @@ -251,7 +251,7 @@ void TCX_MDCT_Inverse( FOR( i = 0; i < R2; i++ ) { #ifdef BASOP_NOGLOB - y[add(add(add(l , m) , R2) , i)] = round_fx_sat( L_shl_sat( Mpy_32_16_1( tmp_buf[add(L2 , i)], negfac ), s ) ); /* fold out right end of DCT */ + y[add( add( add( l, m ), R2 ), i )] = round_fx_sat( L_shl_sat( Mpy_32_16_1( tmp_buf[add( L2, i )], negfac ), s ) ); /* fold out right end of DCT */ #else y[l + m + R2 + i] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[L2 + i], negfac ), s ) ); /* fold out right end of DCT */ #endif @@ -262,7 +262,7 @@ void TCX_MDCT_Inverse( FOR( i = 0; i < L2; i++ ) { #ifdef BASOP_NOGLOB - y[i] = round_fx_sat( L_shl_sat( Mpy_32_16_1( tmp_buf[add(add(add(L2 , m) , R2) , i)], fac ), s ) ); /* negate, fold out left end of DCT */ + y[i] = round_fx_sat( L_shl_sat( Mpy_32_16_1( tmp_buf[add( add( add( L2, m ), R2 ), i )], fac ), s ) ); /* negate, fold out left end of DCT */ #else y[i] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[L2 + m + R2 + i], fac ), s ) ); /* negate, fold out left end of DCT */ #endif @@ -320,7 +320,7 @@ void TCX_MDST_Inverse_fx( FOR( i = 0; i < R2; i++ ) { - y[add(add(add(l , m) , R2) , i)] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[add(L2 , i)], fac ), s ) ); /* fold out right end of DCT */ + y[add( add( add( l, m ), R2 ), i )] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[add( L2, i )], fac ), s ) ); /* fold out right end of DCT */ move16(); } @@ -335,7 +335,7 @@ void TCX_MDST_Inverse_fx( Word16 f; f = round_fx( L_shl( Mpy_32_16_1( tmp_buf[add( L2, i )], fac ), s ) ); - y[add(L2 , i)] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[sub( sub( add( add( l, m ), R2 ), 1 ), i )], negfac ), s ) ); /* time-reverse mid of DCT */ + y[add( L2, i )] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[sub( sub( add( add( l, m ), R2 ), 1 ), i )], negfac ), s ) ); /* time-reverse mid of DCT */ move16(); y[sub( sub( add( add( l, m ), R2 ), 1 ), i )] = negate( f ); @@ -367,7 +367,7 @@ void TCX_MDXT_Inverse_fx( set32_fx( tmp_buf, 0, N_MAX + L_MDCT_OVLP_MAX / 2 ); - edxt_fx( x, tmp_buf + L2, add(add(L2 , m) , R2), kernel_type, TRUE ); + edxt_fx( x, tmp_buf + L2, add( add( L2, m ), R2 ), kernel_type, TRUE ); fac = TCX_MDCT_Inverse_GetScaleFactor( add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &fac_e ); x_e = add( x_e, fac_e ); @@ -381,8 +381,8 @@ void TCX_MDXT_Inverse_fx( { signLeft = fac; } - //signRight = ( kernel_type & 1 ? fac : negfac ); - IF ( L_and( kernel_type, 1 ) ) + // signRight = ( kernel_type & 1 ? fac : negfac ); + IF( L_and( kernel_type, 1 ) ) { signRight = fac; } @@ -396,24 +396,24 @@ void TCX_MDXT_Inverse_fx( FOR( i = 0; i < L2; i++ ) { - y[i] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[add(add(add(L2 , m) , R2) , i)], signLeft ), s ) ); /* fold out the left end */ + y[i] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[add( add( add( L2, m ), R2 ), i )], signLeft ), s ) ); /* fold out the left end */ } FOR( i = 0; i < R2; i++ ) { - y[add(add(add(l , m) , R2) , i)] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[add(L2 , i)], signRight ), s ) ); /* ...and right end */ + y[add( add( add( l, m ), R2 ), i )] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[add( L2, i )], signRight ), s ) ); /* ...and right end */ move16(); } - FOR( i = 0; i < ( ( add(add(L2 , m) , R2) ) >> 1 ); i++ ) + FOR( i = 0; i < ( ( add( add( L2, m ), R2 ) ) >> 1 ); i++ ) { - f = round_fx( L_shl( Mpy_32_16_1( tmp_buf[add(L2 , i)], negfac ), s ) ); + f = round_fx( L_shl( Mpy_32_16_1( tmp_buf[add( L2, i )], negfac ), s ) ); - y[add(L2 , i)] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[sub(sub(add(add(l , m) , R2) , 1) , i)], negfac ), s ) ); /* time-reverse mid of DCT */ + y[add( L2, i )] = round_fx( L_shl( Mpy_32_16_1( tmp_buf[sub( sub( add( add( l, m ), R2 ), 1 ), i )], negfac ), s ) ); /* time-reverse mid of DCT */ move16(); - y[sub(sub(add(add(l , m) , R2) , 1) , i)] = f; + y[sub( sub( add( add( l, m ), R2 ), 1 ), i )] = f; move16(); } diff --git a/lib_com/tcx_mdct_window.c b/lib_com/tcx_mdct_window.c index 44c1755ae..8276f07d4 100644 --- a/lib_com/tcx_mdct_window.c +++ b/lib_com/tcx_mdct_window.c @@ -116,11 +116,11 @@ void mdct_window_sine_IVAS_updated( const Word16 window_type, const Word16 element_mode ) { - IF (EQ_16( element_mode , EVS_MONO )) + IF( EQ_16( element_mode, EVS_MONO ) ) { const PWord16 *table; table = getSineWindowTable( n ); - FOR ( Word32 i = 0; i < shr( n, 1 ); i++ ) + FOR( Word32 i = 0; i < shr( n, 1 ); i++ ) { window[i].v.re = table[i].v.re; move16(); @@ -135,8 +135,8 @@ void mdct_window_sine_IVAS_updated( Word16 buf_in_size = 0; move16(); Word16 temp[420]; - set16_fx(temp, 0, 420); - SWITCH ( window_type ) + set16_fx( temp, 0, 420 ); + SWITCH( window_type ) { case FULL_OVERLAP: window_table = tcx_mdct_window_48_fx; @@ -159,7 +159,7 @@ void mdct_window_sine_IVAS_updated( BREAK; } - IF( EQ_32( Fs , 48000 )) + IF( EQ_32( Fs, 48000 ) ) { Copy( window_table, temp, n ); } @@ -168,7 +168,7 @@ void mdct_window_sine_IVAS_updated( lerp( window_table, temp, n, buf_in_size ); } - FOR ( Word32 i = 0; i < shr(n, 1); i++ ) + FOR( Word32 i = 0; i < shr( n, 1 ); i++ ) { window[i].v.re = temp[n - 1 - i]; move16(); @@ -187,7 +187,7 @@ void mdct_window_sine( PWord16 *window, Word16 n ) { const PWord16 *table; table = getSineWindowTable( n ); - FOR ( Word32 i = 0; i < shr(n, 1); i++ ) + FOR( Word32 i = 0; i < shr( n, 1 ); i++ ) { window[i].v.re = table[i].v.re; move16(); diff --git a/lib_com/tcx_utils_fx.c b/lib_com/tcx_utils_fx.c index 0191e2a7c..e5d9f1478 100644 --- a/lib_com/tcx_utils_fx.c +++ b/lib_com/tcx_utils_fx.c @@ -889,7 +889,7 @@ void lpc2mdct_2( BASOP_cfft_ivas( RealData_fx, ImagData_fx, 1, &scale ); /*Get amplitude*/ - j = sub(FDNS_NPTS , 1); + j = sub( FDNS_NPTS, 1 ); move16(); k = 0; move16(); @@ -1973,7 +1973,7 @@ void tcx_noise_filling_with_shift( } Q[i] = L_shl( Q[i], nrm ); move32(); - new_Q_e[i] = sub(31 , nrm); + new_Q_e[i] = sub( 31, nrm ); tmp1 = shr( seed, 4 ); nrg = L_mac0( nrg, tmp1, tmp1 ); /* sum up energy of current noise segment */ diff --git a/lib_com/tec_com.c b/lib_com/tec_com.c index dd902ea4b..d702c4330 100644 --- a/lib_com/tec_com.c +++ b/lib_com/tec_com.c @@ -655,7 +655,7 @@ void calcGainTemp_TBE_Fx( startPos, stopPos, bandOffset, - sub(15 , cldfb_exp) ); + sub( 15, cldfb_exp ) ); IF( code > 0 ) { @@ -1031,7 +1031,7 @@ static Word16 procTec_Fx( s = norm_s( hb_synth_Fx[k] ); hb_synth_Fx[k] = mult_r( gain_m[i], shl( hb_synth_Fx[k], s ) ); move16(); - shift[k] = sub(s , gain_e[i]); + shift[k] = sub( s, gain_e[i] ); if ( GT_16( min_shift, shift[k] ) ) { diff --git a/lib_com/tns_base.c b/lib_com/tns_base.c index 4e2ce9c96..6c62b0a3c 100644 --- a/lib_com/tns_base.c +++ b/lib_com/tns_base.c @@ -108,7 +108,7 @@ void InitTnsConfiguration( assert( ( nSampleRate > 0 ) && ( frameLength > 0 ) && ( pTnsConfig != NULL ) ); test(); test(); - IF ( ( nSampleRate <= 0 ) || ( frameLength <= 0 ) || ( pTnsConfig == NULL ) ) + IF( ( nSampleRate <= 0 ) || ( frameLength <= 0 ) || ( pTnsConfig == NULL ) ) { return /*TNS_FATAL_ERROR*/; } @@ -360,7 +360,7 @@ Word16 ITF_Detect_fx( /* Check threshold HLM_MIN_NRG */ BASOP_SATURATE_WARNING_OFF_EVS; #ifdef BASOP_NOGLOB - tmp32 = L_sub( L_shl_o( L_tmp, sub( shift, sub(24 , Q) ), &Overflow ), 4194304l /*HLM_MIN_NRG Q7*/ ); + tmp32 = L_sub( L_shl_o( L_tmp, sub( shift, sub( 24, Q ) ), &Overflow ), 4194304l /*HLM_MIN_NRG Q7*/ ); #else /* BASOP_NOGLOB */ tmp32 = L_sub( L_shl( L_tmp, sub( shift, 24 - Q ) ), 4194304l /*HLM_MIN_NRG Q7*/ ); #endif -- GitLab