diff --git a/lib_com/cnst.h b/lib_com/cnst.h index 66b6dc9a15d2f74cffce30849e79c01572b3fc3f..a5030ac9c98bb3bf1f430798afe117443c43918a 100644 --- a/lib_com/cnst.h +++ b/lib_com/cnst.h @@ -1233,6 +1233,7 @@ enum #define SPC 0.0234952f #define SPC_plus SPC * 1.001f #define ALPHA_SQ ( ( 0.5f / PI2 ) * ( 0.5f / PI2 ) ) +#define ALPHA_SQ_Q30 (6799549) /* ( ( 0.5f / PI2 ) * ( 0.5f / PI2 ) ) in Q30 */ #define NC M / 2 #define LSF_GAP 50.0f diff --git a/lib_com/fft_rel.c b/lib_com/fft_rel.c index 85d0ae76a6e87bc28c3ea04e27d5d26d19a4d3c6..aa3b578bdb1a0df53f8fd8541e0ff198da03d704 100644 --- a/lib_com/fft_rel.c +++ b/lib_com/fft_rel.c @@ -301,6 +301,168 @@ void fft_rel( return; } +void fft_rel_16_32fx( + Word16 x[], /* i/o: input/output vector Qx */ + Word16 *q_x, /* extra scaling added on speech buffer*/ + Word16 i_subfr, + const Word16 n, /* i : vector length */ + const Word16 m /* i : log2 of vector length */ +) +{ + Word16 i, j, k, n1, n2, n4; + Word16 step; + Word32 xt, t1, t2; + Word32 *x0, *x1, *x2; + const Word16 *s, *c; + Word32 *xi2, *xi3, *xi4, *xi1; + + Word32 fft_bff32[L_FFT]; + Copy_Scale_sig_16_32_no_sat( x, fft_bff32, L_FFT, 0 ); // copying x to fft_bff32 without scaling + + /*-----------------------------------------------------------------* + * Digit reverse counter + *-----------------------------------------------------------------*/ + + j = 0; + move16(); + x0 = &fft_bff32[0]; // Qx + FOR( i = 0; i < n - 1; i++ ) + { + IF( LT_16( i, j ) ) + { + xt = fft_bff32[j]; // Qx + move32(); + fft_bff32[j] = *x0; // Qx + move32(); + *x0 = xt; // Qx + move32(); + } + x0++; + k = shr( n, 1 ); + WHILE( ( k <= j ) ) + { + j = sub( j, k ); + k = shr( k, 1 ); + } + j = add( j, k ); + } + + /*-----------------------------------------------------------------* + * Length two butterflies + *-----------------------------------------------------------------*/ + + x0 = &fft_bff32[0]; + x1 = &fft_bff32[1]; + FOR( i = 0; i < ( n >> 1 ); i++ ) + { + xt = *x0; + move32(); + *x0 = L_add( xt, *x1 ); + move32(); + *x1 = L_sub( xt, *x1 ); + move32(); + x0++; + x0++; + x1++; + x1++; + } + + /*-----------------------------------------------------------------* + * Other butterflies + * + * The implementation described in [1] has been changed by using + * table lookup for evaluating sine and cosine functions. The + * variable ind and its increment step are needed to access table + * entries. Note that this implementation assumes n4 to be so + * small that ind will never exceed the table. Thus the input + * argument n and the constant N_MAX_SAS must be set properly. + *-----------------------------------------------------------------*/ + + n2 = 1; + move16(); + /* step = N_MAX_SAS/4; */ + FOR( k = 2; k <= m; k++ ) + { + n4 = n2; + move16(); + n2 = shl( n4, 1 ); + n1 = shl( n2, 1 ); + + step = idiv1616( N_MAX_SAS, n1 ); + + x0 = fft_bff32; + x1 = fft_bff32 + n2; + x2 = fft_bff32 + add( n2, n4 ); + FOR( i = 0; i < n; i += n1 ) + { + xt = *x0; + move32(); /* xt = x[i]; */ + *x0 = L_add( xt, *x1 ); + move32(); /* x[i] = xt + x[i+n2]; */ + *x1 = L_sub( xt, *x1 ); + move32(); /* x[i+n2] = xt - x[i+n2]; */ + *x2 = L_negate( *x2 ); + move32(); /* x[i+n2+n4] = -x[i+n2+n4]; */ + + + s = sincos_t_fx + step; // Q15 + c = s + 64; // Q15 + xi1 = fft_bff32 + add( i, 1 ); + xi3 = xi1 + n2; + xi2 = xi3 - 2; + xi4 = xi1 + sub( n1, 2 ); + + FOR( j = 1; j < n4; j++ ) + { + t1 = L_add( Mpy_32_16_1( *xi3, *c ), Mpy_32_16_1( *xi4, *s ) ); /* t1 = *xi3**(pt_c+ind) + *xi4**(pt_s+ind); Qx */ + t2 = L_sub( Mpy_32_16_1( *xi3, *s ), Mpy_32_16_1( *xi4, *c ) ); /* t2 = *xi3**(pt_s+ind) - *xi4**(pt_c+ind); Qx */ + *xi4 = L_sub( *xi2, t2 ); + move32(); + *xi3 = L_negate( L_add( *xi2, t2 ) ); + move32(); + *xi2 = L_sub( *xi1, t1 ); + move32(); + *xi1 = L_add( *xi1, t1 ); + move32(); + + xi4--; + xi2--; + xi3++; + xi1++; + c += step; + s += step; /* autoincrement by ar0 */ + } + + x0 += n1; + x1 += n1; + x2 += n1; + } + /* step = shr(step, 1); */ + } + Word16 norm = L_norm_arr( fft_bff32, L_FFT ); + IF( i_subfr == 0 ) + { + Copy_Scale_sig32_16( fft_bff32, x, L_FFT, norm ); + *q_x = sub( norm, 16 ); + move16(); + } + ELSE + { + IF( LT_16( sub( norm, 16 ), *q_x ) ) + { + scale_sig( x - L_FFT, L_FFT, sub( sub( norm, 16 ), *q_x ) ); + Copy_Scale_sig32_16( fft_bff32, x, L_FFT, norm ); + *q_x = sub( norm, 16 ); + move16(); + } + ELSE + { + Copy_Scale_sig32_16( fft_bff32, x, L_FFT, add( 16, *q_x ) ); + } + } + + return; +} void fft_rel_fx( Word16 x[], /* i/o: input/output vector Qx */ diff --git a/lib_com/lsf_tools_fx.c b/lib_com/lsf_tools_fx.c index b99fc47538f9cd8cae2507f1a5d809175a1e00a3..0a8a86aee07b9f02c0ebdc231a74620231793d82 100644 --- a/lib_com/lsf_tools_fx.c +++ b/lib_com/lsf_tools_fx.c @@ -1505,6 +1505,80 @@ void lsp_weights_fx( move16(); } +void lsp_weights_ivas_fx( + Word16 lsp_nq_fx[], + Word16 w[], + Word16 Order, + Word16 *Qout ) +{ + Word16 i; + Word16 q_weight[20]; + Word32 weight[20]; + Word16 delta1, delta2; + Word32 L_tmp; + Word16 q_min; + + delta1 = lsp_nq_fx[0]; // Q15 + move16(); + delta2 = sub( lsp_nq_fx[1], lsp_nq_fx[0] ); // Q15 + + L_tmp = L_mult0( delta1, delta2 ); // Q30 // Q30 + L_tmp = root_a_over_b_ivas_fx( ALPHA_SQ_Q30, Q30, L_tmp, Q30, &q_weight[0] ); // q_weight[0] + + weight[0] = Mpy_32_16_1( L_tmp, 32000 /* 250 in Q7*/ ); // q_weight[0]-8 + q_weight[0] = sub( q_weight[0], 8 ); + move32(); + move16(); + + q_min = q_weight[0]; + move16(); + + FOR( i = 1; i < Order - 1; i++ ) + { + delta1 = sub( lsp_nq_fx[i], lsp_nq_fx[i - 1] ); // Q15 + delta2 = sub( lsp_nq_fx[i + 1], lsp_nq_fx[i] ); // Q15 + + L_tmp = L_mult0( delta1, delta2 ); // Q30 + L_tmp = root_a_over_b_ivas_fx( ALPHA_SQ_Q30, Q30, L_tmp, Q30, &q_weight[i] ); // q_weight[i] + + weight[i] = Mpy_32_16_1( L_tmp, 32000 /* 250 in Q7*/ ); // q_weight[i] + q_weight[i] = sub( q_weight[i], 8 ); + move32(); + move16(); + + q_min = s_min( q_min, q_weight[i] ); + } + delta1 = sub( lsp_nq_fx[i], lsp_nq_fx[i - 1] ); // Q15 + delta2 = sub( 16384 /* 0.5 in Q15*/, lsp_nq_fx[i] ); // Q15 + + L_tmp = L_mult0( delta1, delta2 ); // Q30 + L_tmp = root_a_over_b_ivas_fx( ALPHA_SQ_Q30, Q30, L_tmp, Q30, &q_weight[i] ); // q_weight[i] + + weight[i] = Mpy_32_16_1( L_tmp, 32000 /* 250 in Q7*/ ); // q_weight[i] + q_weight[i] = sub( q_weight[i], 8 ); + move32(); + move16(); + + q_min = s_min( q_min, q_weight[i] ); + + FOR( i = 0; i < Order; i++ ) + { + w[i] = round_fx( L_shl( weight[i], sub( q_min, q_weight[i] ) ) ); /* q_min-16 */ + move16(); + } + + IF( Order != LPC_SHB_ORDER_WB ) + { + w[3] = round_fx( L_shl( L_mult( w[3], 18022 ), 1 ) ); /* q_min-16 */ + w[4] = round_fx( L_shl( L_mult( w[4], 18022 ), 1 ) ); /* q_min-16 */ + move16(); + move16(); + } + + *Qout = sub( q_min, 16 ); + move16(); +} + /* * E_LPC_isf_isp_conversion * diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index daf2c80d5b546d3cbccfde7f9891cac059635b7d..fbd3b27f3d974c3bb7017a7d8eb7f172dd30f2dc 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -1084,6 +1084,12 @@ void lsp_weights_fx( Word16 Order, Word16 *Qout ); +void lsp_weights_ivas_fx( + Word16 lsp_nq_fx[], + Word16 w[], + Word16 Order, + Word16 *Qout ); + void space_lsfs_fx( Word16 *lsfs, /* i/o: Line spectral frequencies */ const Word16 order /* i : order of LP analysis */ @@ -1472,6 +1478,13 @@ void fft_rel_fx( const Word16 n, /* i : vector length */ const Word16 m /* i : log2 of vector length */ ); +void fft_rel_16_32fx( + Word16 x[], /* i/o: input/output vector Qx */ + Word16 *q_x, /* extra scaling added on speech buffer*/ + Word16 i_subfr, + const Word16 n, /* i : vector length */ + const Word16 m /* i : log2 of vector length */ +); void fft_rel_fx32( Word32 x[], /* i/o: i /output vector */ const Word16 n, /* i : vector length */ @@ -10297,6 +10310,13 @@ Word32 root_a_over_b_fx( Word16 Q_b, Word16 *exp_out ); +Word32 root_a_over_b_ivas_fx( + Word32 a, /* Q(Q_a) */ + Word16 Q_a, + Word32 b, /* Q(Q_b) */ + Word16 Q_b, + Word16 *exp_out ); + void fir_fx( const Word16 x[], /* i : input vector Qx*/ const Word16 h[], /* i : impulse response of the FIR filter Q12*/ Word16 y[], /* o : output vector (result of filtering) Qx*/ diff --git a/lib_com/tools_fx.c b/lib_com/tools_fx.c index 704b44b8483a247d765a60d5c141cda7a6b93c34..063f49fc4e226c2b6dbc5bf7d53dfbcefe2c5bfc 100644 --- a/lib_com/tools_fx.c +++ b/lib_com/tools_fx.c @@ -2505,6 +2505,109 @@ Word32 root_a_over_b_fx( return L_tmp; } +Word32 root_a_over_b_ivas_fx( + Word32 a, /* Q(Q_a) */ + Word16 Q_a, + Word32 b, /* Q(Q_b) */ + Word16 Q_b, + Word16 *q_out ) +{ + Word16 shift_a, shift_b, shift; + Word32 mod_a, mod_b, one_in_Q_a, one_in_Q_b, half_in_Q_a, half_in_Q_b; + Word32 a_sqr, b_sqr, p0, p1, p2, approx; + Word16 exp; + + test(); + IF( ( a <= 0 ) || ( b <= 0 ) ) + { + *q_out = 0; + move16(); + return 0; + } + + one_in_Q_a = L_shl( 1, Q_a ); // 1.0f in Q_a + one_in_Q_b = L_shl( 1, Q_b ); // 1.0f in Q_b + half_in_Q_a = L_shr( one_in_Q_a, 1 ); // 0.5f in Q_a + half_in_Q_b = L_shr( one_in_Q_b, 1 ); // 0.5f in Q_b + + a = L_add( a, one_in_Q_a ); + b = L_add( b, one_in_Q_b ); + + /* This next piece of code implements a "norm" function */ + /* and returns the shift needed to scale "a" to have a */ + /* 1 in the (MSB-1) position. This is equivalent to */ + /* giving a value between 0.5 & 1.0. */ + + mod_a = a; + move32(); + + shift_a = 0; + move16(); + WHILE( GT_32( mod_a, one_in_Q_a ) ) + { + mod_a = L_shr( mod_a, 1 ); + shift_a = sub( shift_a, 1 ); + } + + WHILE( LT_32( mod_a, half_in_Q_a ) ) + { + mod_a = L_shl( mod_a, 1 ); + shift_a = add( shift_a, 1 ); + } + + shift_a = s_and( shift_a, -2 ); + mod_a = L_shl( a, shift_a ); // Q_a + + /* This next piece of code implements a "norm" function */ + /* and returns the shift needed to scale "b" to have a */ + /* 1 in the (MSB-1) position. This is equivalent to */ + /* giving a value between 0.5 & 1.0. */ + mod_b = b; + move32(); + + shift_b = 0; + move16(); + WHILE( GT_32( mod_b, one_in_Q_b ) ) + { + mod_b = L_shr( mod_b, 1 ); + shift_b = sub( shift_b, 1 ); + } + + WHILE( LT_32( mod_b, half_in_Q_b ) ) + { + mod_b = L_shl( mod_b, 1 ); + shift_b = add( shift_b, 1 ); + } + + shift_b = s_and( shift_b, -2 ); + mod_b = L_shl( b, shift_b ); // Q_b + + shift = shr( sub( shift_b, shift_a ), 1 ); + + a_sqr = W_extract_h( W_shl( W_mult0_32_32( mod_a, mod_a ), sub( 32, Q_a ) ) ); // Q_a + b_sqr = W_extract_h( W_shl( W_mult0_32_32( mod_b, mod_b ), sub( 32, Q_b ) ) ); // Q_b + + p2 = L_shl( -408505077 /* -0.7609f in Q29 */, sub( Q_b, 31 ) ); // Qb-2 + p1 = L_shl( 1444612250 /* 2.6908f in Q29 */, sub( Q_b, 31 ) ); // Qb-2 + p0 = L_shl( 385258566 /* 0.7176f in Q29 */, sub( Q_b, 31 ) ); // Qb-2 + + p2 = Madd_32_32( Madd_32_32( p2, 501759554 /* 0.9346f in Q29*/, mod_b ), -252060893 /* -0.4695f in Q29 */, b_sqr ); // Q_b-2 + p1 = Madd_32_32( Madd_32_32( p1, -1774680487 /* -3.3056f in Q29 */, mod_b ), 891635211 /* 1.6608f in Q29 */, b_sqr ); // Q_b-2 + p0 = Madd_32_32( Madd_32_32( p0, -473251709 /* -0.8815f in Q29 */, mod_b ), 237780127 /* 0.4429f in Q29 */, b_sqr ); // Q_b-2 + + /* approx = p0 + p1 * mod_a + p2 * mod_a * mod_a; */ + approx = Madd_32_32( Mpy_32_32( p1, mod_a ), p2, a_sqr ); // Q_a+Q_b-33 + approx = L_add( approx, L_shl( p0, sub( Q_a, 31 ) ) ); // Q_a+Q_b-33 + + exp = sub( norm_l( approx ), 1 ); + approx = L_shl( approx, exp ); // // Q_a+Q_b-33+exp + + *q_out = sub( add( sub( add( Q_a, Q_b ), 33 ), exp ), shift ); + move16(); + + return approx; +} + /*===================================================================*/ /* FUNCTION : fir_fx () */ /*-------------------------------------------------------------------*/ diff --git a/lib_enc/analy_sp_fx.c b/lib_enc/analy_sp_fx.c index f0d9405638fd411b9d271c6f2723815f950a2459..c0d1a8bcae8997c2e8e67c3ac7cb7464efad5dcc 100644 --- a/lib_enc/analy_sp_fx.c +++ b/lib_enc/analy_sp_fx.c @@ -510,9 +510,7 @@ void ivas_analy_sp_fx( } ELSE { - Word16 scale = norm_arr( speech + 3 * ( L_SUBFR / 2 ) - L_FFT / 2, L_FFT + 4 * ( L_SUBFR / 2 ) ); - scale = sub( scale, LOG2_L_FFT ); // guard_bits - *q_fft_buff = add( Q_new, scale ); + Word16 scale = 0; move16(); FOR( i_subfr = 0; i_subfr <= 1; i_subfr++ ) @@ -543,11 +541,25 @@ void ivas_analy_sp_fx( move16(); } - scale_sig( pt_fft, L_FFT, scale ); - /* compute the spectrum */ - fft_rel_fx( pt_fft, L_FFT, LOG2_L_FFT ); - + fft_rel_16_32fx( pt_fft, &scale, i_subfr, L_FFT, LOG2_L_FFT ); + *q_fft_buff = add( Q_new, scale ); // resultant q for fft_buff + move16(); + IF( EQ_16( i_subfr, 1 ) ) + { + Word16 new_q_lf_E = add( shl( *q_fft_buff, 1 ), 14 ); + Word16 new_q_bands = new_q_lf_E; + IF( GT_16( new_q_bands, 39 ) ) + { + new_q_bands = 39; + move16(); + } + scale_sig32( fr_bands, NB_BANDS, sub( new_q_bands, *q_fr_bands ) ); + scale_sig32( lf_E, VOIC_BINS, sub( new_q_lf_E, *q_lf_E ) ); + LEtot = W_shl( LEtot, sub( new_q_bands, *q_fr_bands ) ); + scale_sig32( Bin_E, L_FFT / 2, sub( new_q_lf_E, *q_lf_E ) ); + scale_sig32( band_energies, NB_BANDS, sub( new_q_bands, *q_fr_bands ) ); + } /* find energy per critical band */ ivas_find_enr( pt_fft, *q_fft_buff, pt_bands, q_fr_bands, lf_E + i_subfr * VOIC_BINS, q_lf_E, &LEtot, min_band, max_band, &Bin_E[i_subfr * L_FFT / 2], BIN, band_energies + i_subfr * NB_BANDS ); diff --git a/lib_enc/swb_tbe_enc_fx.c b/lib_enc/swb_tbe_enc_fx.c index 1366ab4c54123a1256a307374a28576449b898d2..23bd387d4db846e742a72044d90b9983c7131d32 100644 --- a/lib_enc/swb_tbe_enc_fx.c +++ b/lib_enc/swb_tbe_enc_fx.c @@ -3148,7 +3148,7 @@ void swb_tbe_enc_ivas_fx( test(); IF( st_fx->rf_mode || EQ_32( st_fx->extl_brate, SWB_TBE_0k95 ) || EQ_32( st_fx->extl_brate, SWB_TBE_1k10 ) ) { - lsp_weights_fx( lsf_shb_fx, weights_lsp, LPC_SHB_ORDER, &Q_out ); + lsp_weights_ivas_fx( lsf_shb_fx, weights_lsp, LPC_SHB_ORDER, &Q_out ); /* to compensate for the 1.1* weighting done inside the function lsp_weights */ /*weights_lsp[3]*=0.909091f; weights_lsp[4]*=0.909091f; */ diff --git a/lib_rend/lib_rend.c b/lib_rend/lib_rend.c index 9cab741826e79a08c82f80572417f6673cd95669..86eea321bc28387a7d23287cbd18943db2a10cfd 100644 --- a/lib_rend/lib_rend.c +++ b/lib_rend/lib_rend.c @@ -4564,7 +4564,7 @@ static void renderBufferChannelLerp_fx( { i = 0; Word32 tmp = Q31_BY_SUB_FRAME_240; - Word32 tmp1 = 239; + Word32 tmp1 = 239; /* L_SUBFRAME_48k - 1 */ move32(); move32(); move32(); @@ -4572,49 +4572,49 @@ static void renderBufferChannelLerp_fx( { case NUM_SAMPLES_960: tmp = Q31_BY_NUM_SAMPLES_960; - tmp1 = 959; + tmp1 = 959; /* NUM_SAMPLES_960 - 1 */ move32(); move32(); BREAK; case NUM_SAMPLES_720: tmp = Q31_BY_NUM_SAMPLES_720; - tmp1 = 719; + tmp1 = 719; /* NUM_SAMPLES_720 - 1 */ move32(); move32(); BREAK; case NUM_SAMPLES_320: tmp = Q31_BY_NUM_SAMPLES_320; - tmp1 = 319; + tmp1 = 319; /* NUM_SAMPLES_320 - 1 */ move32(); move32(); BREAK; case NUM_SAMPLES_160: tmp = Q31_BY_NUM_SAMPLES_160; - tmp1 = 159; + tmp1 = 159; /* NUM_SAMPLES_160 - 1 */ move32(); move32(); BREAK; case L_SUBFRAME_48k: tmp = Q31_BY_SUB_FRAME_240; - tmp1 = 239; + tmp1 = 239; /* L_SUBFRAME_48k - 1 */ move32(); move32(); BREAK; case L_SUBFRAME_32k: tmp = Q31_BY_SUB_FRAME_180; - tmp1 = 179; + tmp1 = 179; /* L_SUBFRAME_32k - 1 */ move32(); move32(); BREAK; case L_SUBFRAME_16k: tmp = Q31_BY_SUB_FRAME_80; - tmp1 = 79; + tmp1 = 79; /* L_SUBFRAME_16k - 1 */ move32(); move32(); BREAK; case L_SUBFRAME_8k: tmp = Q31_BY_SUB_FRAME_40; - tmp1 = 39; + tmp1 = 39; /* L_SUBFRAME_8k - 1 */ move32(); move32(); BREAK;