diff --git a/lib_com/ivas_cnst.h b/lib_com/ivas_cnst.h index 4cda51198fded4a364d2c0364120abcea8cb4b6c..d4beedca7474f0ba94140f5775ca6920b0548843 100644 --- a/lib_com/ivas_cnst.h +++ b/lib_com/ivas_cnst.h @@ -824,7 +824,9 @@ enum fea_names #define STEREO_BITS_TCA_CORRSTATS 5 /* target corrStats */ #define STEREO_BITS_TCA_GD 5 /* target gain */ #define STEREO_TCA_GDMIN -1.0f +#define STEREO_TCA_GDMIN_FX -32768 #define STEREO_TCA_GDSTEP 0.05f +#define STEREO_TCA_GDSTEP_FX 819 #define STEREO_BITS_TCA ( STEREO_BITS_TCA_CHAN + STEREO_BITS_TCA_CORRSTATS + STEREO_BITS_TCA_GD ) #define STEREO_ICBWE_MSFLAG_BITS 1 /* BWE Multi Source flag */ diff --git a/lib_com/ivas_prot.h b/lib_com/ivas_prot.h index d86aa894eda9a6187b351dc1a8f9b74fe16900fc..670917fdd7a25f4a638fe83ea97ba2350dead88c 100644 --- a/lib_com/ivas_prot.h +++ b/lib_com/ivas_prot.h @@ -317,13 +317,25 @@ ivas_error pre_proc_ivas( int16_t *Voicing_flag, /* o : voicing flag for HQ FEC */ const float old_wsp[], /* i : weighted input signal buffer */ const int16_t loc_harm, /* i : harmonicity flag */ +#ifndef IVAS_FLOAT_FIXED const float cor_map_sum, /* i : speech/music clasif. parameter */ +#endif const int16_t vad_flag_dtx, /* i : HE-SAD flag with additional DTX HO */ +#ifndef IVAS_FLOAT_FIXED /*const*/ float enerBuffer[CLDFB_NO_CHANNELS_MAX], /* i : energy buffer */ /*const*/ float fft_buff[2 * L_FFT], /* i : FFT buffer */ +#endif const int16_t MCT_flag, /* i : hMCT handle allocated (1) or not (0) */ const int16_t vad_hover_flag, /* i : VAD hangover flag */ const int16_t flag_16k_smc /* i : flag to indicate if the OL SMC is run at 16 kHz */ +#ifdef IVAS_FLOAT_FIXED + , + Word32 enerBuffer_fx[CLDFB_NO_CHANNELS_MAX], + Word16 e_enerBuffer, + Word16 fft_buff_fx[2 * L_FFT], + Word32 cor_map_sum_fx, + Word16 exp_cor_map_sum +#endif ); ivas_error ivas_compute_core_buffers( @@ -2680,17 +2692,33 @@ void stereo_td_init_enc_fx( STEREO_TD_ENC_DATA_HANDLE hStereoTD, /* i/o: TD stereo encoder handle */ const Word16 last_element_mode /* i : last element mode */ ); - +#ifndef IVAS_FLOAT_FIXED ivas_error stereo_set_tdm( CPE_ENC_HANDLE hCPE, /* i : CPE encoder structure */ const int16_t input_frame /* i : input frame length per channel */ ); - +#else +ivas_error stereo_set_tdm_fx( + CPE_ENC_HANDLE hCPE, /* i : CPE encoder structure */ + const Word16 input_frame, /* i : input frame length per channel */ + Word16 input_q +); +#endif +#ifndef IVAS_FLOAT_FIXED void stereo_tdm_prep_dwnmx ( CPE_ENC_HANDLE hCPE, /* i : CPE encoder structure */ const float *input1, /* i : right channel input */ const int16_t input_frame /* i : frame lenght */ ); +#else +void stereo_tdm_prep_dwnmx_fx( + CPE_ENC_HANDLE hCPE, /* i : CPE encoder structure */ + const Word32 *input1, /* i : right channel input */ + const Word16 input_frame, /* i : frame lenght */ + const Word16 input_q /* i : frame lenght */ +); +#endif + #ifndef IVAS_FLOAT_FIXED int16_t stereo_tdm_ener_analysis( const int16_t ivas_format, /* i : IVAS format */ @@ -3638,6 +3666,11 @@ ivas_error front_vad_create_fx( void front_vad_destroy( FRONT_VAD_ENC_HANDLE *hFrontVad /* i/o: front-VAD handle */ ); +#ifdef IVAS_FLOAT_FIXED +void front_vad_destroy_fx( + FRONT_VAD_ENC_HANDLE *hFrontVad /* i/o: front-VAD handle */ +); +#endif ivas_error front_vad( CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure, nullable */ @@ -3658,23 +3691,25 @@ ivas_error front_vad( ); #ifdef IVAS_FLOAT_FIXED ivas_error front_vad_fx( - CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure, nullable */ - Encoder_State *st, /* i/o: encoder state structure */ - const ENCODER_CONFIG_HANDLE hEncoderConfig, /* i : configuration structure */ - FRONT_VAD_ENC_HANDLE *hFrontVads, /* i/o: FrontVad handles */ - const int16_t MCT_flag, /* i : hMCT handle allocated (1) or not (0) */ - const int16_t input_frame, /* i : frame length */ - int16_t vad_flag_dtx[], /* o : HE-SAD flag with additional DTX HO */ - float fr_bands[][2 * NB_BANDS], /* i : energy in frequency bands */ - float Etot_LR[], /* o : total energy Left & Right channel */ - float lf_E[][2 * VOIC_BINS], /* i : per bin spectrum energy in lf, LR channels */ - int16_t localVAD_HE_SAD[], /* o : HE-SAD flag without hangover, LR channels */ - int16_t vad_hover_flag[], /* o : VAD hangover flag */ - float band_energies_LR[2 * NB_BANDS], /* o : energy in critical bands without minimum noise floor E_MIN*/ - float *PS_out, /* o : energy spectrum */ - float *Bin_E_out, /* o : log-energy spectrum of the current frame */ - Word16 Q_inp , - Word16 Q_add ); + CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure, nullable */ + Encoder_State * st, /* i/o: encoder state structure */ + const ENCODER_CONFIG_HANDLE hEncoderConfig, /* i : configuration structure */ + FRONT_VAD_ENC_HANDLE *hFrontVads, /* i/o: FrontVad handles */ + const Word16 MCT_flag, /* i : hMCT handle allocated (1) or not (0) */ + const Word16 input_frame, /* i : frame length */ + Word16 vad_flag_dtx[], /* o : HE-SAD flag with additional DTX HO */ + Word32 fr_bands_fx[][2 * NB_BANDS], /* o : energy in frequency bands Q_buffer[n] + QSCALE + 2 */ + Word16 Etot_LR_fx[], /* o : total energy Left & Right channel Q8 */ + Word32 lf_E_fx[][2 * VOIC_BINS], /* i : per bin spectrum energy in lf, LR channels Q_buffer[n] + QSCALE */ + Word16 localVAD_HE_SAD[], /* o : HE-SAD flag without hangover, LR channels */ + Word16 vad_hover_flag[], /* o : VAD hangover flag */ + Word32 band_energies_LR_fx[2 * NB_BANDS], /* o : energy in critical bands without minimum noise floor E_MIN Q_buffer[1] + QSCALE + 2 - band_ener_guardbits*/ + Word32 *PS_out_fx, /* o : energy spectrum Q_buffer + QSCALE */ + Word16 *Bin_E_out_fx, /* o : log-energy spectrum of the current frame Q7 */ + Word16 Q_inp, + Word16 *Q_buffer, + Word16 Q_add, + Word16 *front_create_flag ); #endif ivas_error front_vad_spar( SPAR_ENC_HANDLE hSpar, /* i/o: SPAR encoder structure */ @@ -4004,7 +4039,16 @@ Word32 check_bounds_l( const Word32 low, /* i : Low limit */ const Word32 high /* i : High limit */ ); +#ifdef IVAS_FLOAT_FIXED +ivas_error stereo_memory_enc_fx( + CPE_ENC_HANDLE hCPE, /* i : CPE encoder structure */ + const int32_t input_Fs, /* i : input sampling rate */ + const int16_t max_bwidth, /* i : maximum audio bandwidth */ + const IVAS_FORMAT ivas_format, /* i : IVAS format */ + const int16_t nchan_transport /* i : number transport chans */ +); +#else ivas_error stereo_memory_enc( CPE_ENC_HANDLE hCPE, /* i : CPE encoder structure */ const int32_t input_Fs, /* i : input sampling rate */ @@ -4014,6 +4058,7 @@ ivas_error stereo_memory_enc( const int16_t nchan_transport /* i : number transport chans */ ); +#endif ivas_error stereo_memory_dec( const int32_t ivas_total_brate, /* i : IVAS total bitrate */ @@ -4024,13 +4069,20 @@ ivas_error stereo_memory_dec( const MC_MODE mc_mode, /* i : MC mode */ const int16_t nchan_transport /* i : number of transport channels */ ); - +#ifdef IVAS_FLOAT_FIXED +void stereo_switching_enc_fx( + CPE_ENC_HANDLE hCPE, /* i : CPE encoder structure */ + Word16 old_input_signal_pri[], /* i : old input signal of primary channel */ + const Word16 input_frame, /* i : input frame length */ + const Word16 q_inp +); +#else void stereo_switching_enc( CPE_ENC_HANDLE hCPE, /* i : CPE structure */ float old_input_signal_pri[], /* i : old input signal of primary channel */ const int16_t input_frame /* i : input frame length */ ); - +#endif void stereo_switching_dec( CPE_DEC_HANDLE hCPE, /* i/o: CPE decoder structure */ const int32_t ivas_total_brate /* i : IVAS total bitrate */ diff --git a/lib_com/ivas_prot_fx.h b/lib_com/ivas_prot_fx.h index a87cca412105a7831fdf90bd802f28c049548898..adbe2b0fe7c2eacec832bdd1d5fda6c7945c45b1 100644 --- a/lib_com/ivas_prot_fx.h +++ b/lib_com/ivas_prot_fx.h @@ -3015,6 +3015,11 @@ Word16 transient_analysis_ivas_fx( const Word16 multi_harm_limit, /* i : multi harmonic threshold Q_multi_harm_limit = Qx */ Word16 Q_multi_harm_limit ); +void set_transient_stereo_fx( + CPE_ENC_HANDLE hCPE, /* i : CPE structure */ + Word16 currFlatness[] /* i/o: current flatness Q7*/ +); + void ivas_smc_mode_selection_fx( Encoder_State *st, /* i/o: encoder state structure */ const Word32 element_brate, /* i : element bitrate */ diff --git a/lib_enc/analy_sp_fx.c b/lib_enc/analy_sp_fx.c index 49affa6e6b07024793c970f418a785eb3cf360c3..82a77f60294300701605a70f1bb039e14b6f23c2 100644 --- a/lib_enc/analy_sp_fx.c +++ b/lib_enc/analy_sp_fx.c @@ -272,7 +272,7 @@ static void find_enr_dft_fx( /* for low frequency bins, save per bin energy for the use in find_tilt() */ freq = bin_freq; move32(); - for ( i = 0; i < NB_BANDS - 1; i++ ) /* up to maximum allowed voiced critical band */ + FOR( i = 0; i < NB_BANDS - 1; i++ ) /* up to maximum allowed voiced critical band */ { band_fx[i] = 0; move32(); @@ -317,7 +317,7 @@ static void find_enr_dft_fx( move32(); band_ener_fx[i] = band_fx[i]; /* per band energy without E_MIN */ move32(); - if ( band_fx[i] < L_shl( E_MIN_Q11_FX, sub( Qout, Q11 ) ) ) + if ( LT_32( band_fx[i], L_shl( E_MIN_Q11_FX, sub( Qout, Q11 ) ) ) ) { band_fx[i] = L_shl( E_MIN_Q11_FX, sub( Qout, Q11 ) ); move32(); @@ -381,7 +381,7 @@ static void find_enr_dft_fx( /* put bin energies from BinE into Bin_E[L_FFT/2-1] (interpolate 40 Hz bin values to fit into 50 Hz bins) */ /* Last value of Bin_E is handled outside this function*/ assert( bin_cnt == ( STEREO_DFT_N_12k8_ENC / 2 - 1 ) ); - Scale_sig32( BinE_fx, 256, sub( Qout, ( 2 * Q_inp_dmx - 32 ) ) ); + Scale_sig32( BinE_fx, 256, sub( Qout, sub( shl( Q_inp_dmx, 1 ), 32 ) ) ); BinE_fx[STEREO_DFT_N_12k8_ENC / 2 - 1] = BinE_fx[STEREO_DFT_N_12k8_ENC / 2 - 2]; move32(); L_lerp_fx( BinE_fx, Bin_E_fx, L_FFT / 2, STEREO_DFT_N_12k8_ENC / 2, &Qout ); diff --git a/lib_enc/cod4t64_fast.c b/lib_enc/cod4t64_fast.c index e778ca0827332571815bf6ab82d7f2df76e5bef4..85c5d157f5a98925a8736763778adac0ce821374 100644 --- a/lib_enc/cod4t64_fast.c +++ b/lib_enc/cod4t64_fast.c @@ -140,25 +140,25 @@ static Word16 find_best_pulse_fx( const Word16 L_subfr, const Word16 nb_tracks, const Word16 track, - const Word16 dn[], + const Word32 dn[], const Word16 sign[], Word16 *s ) { Word16 m, i; - Word16 temp, max_val; + Word32 temp, max_val; - max_val = MIN_16; - move16(); + max_val = MIN_32; + move32(); m = track; move16(); FOR( i = track; i < L_subfr; i += nb_tracks ) { - temp = i_mult( dn[i], sign[i] ); + temp = imult3216( dn[i], sign[i] ); - IF( GE_16( temp, max_val ) ) + IF( GE_32( temp, max_val ) ) { max_val = temp; - move16(); + move32(); m = i; move16(); } @@ -841,16 +841,23 @@ void acelp_fast_fx( Word16 m[MAX_PULSES_STEREO], s[MAX_PULSES_STEREO], m_max[MAX_PULSES_STEREO], s_max[MAX_PULSES_STEREO]; Word16 track_order[NB_TRACK_FCB_4T * MAX_NUM_INTER], m0_track[NB_TRACK_FCB_4T]; Word16 ind_stream[NPMAXPT * NB_TRACK_FCB_4T], idx; - Word16 G, G1, G2, G3, Gn, Gd, dn[L_SUBFR_MAX]; - Word16 tmpF, y_tmp[L_SUBFR_MAX]; - Word32 crit_num, crit_den, crit_num_max, crit_den_max; + Word16 G, G1, G2, G3, Gn, Gd; + Word16 y_tmp[L_SUBFR_MAX]; + Word32 dn[L_SUBFR_MAX]; + Word32 crit_num, crit_den, crit_num_max, crit_den_max, L_tmp1, L_tmp2; + Word16 q_crit_num, q_crit_den, q_crit_num_max, q_crit_den_max; Word16 h_buf[4 * L_SUBFR_MAX], *h, *h_inv, *p_hn, alp_buf[2 * L_SUBFR_MAX], *alp, *alp_pos0, *alp_pos1, *alp_pos2, *alp_pos3; Word32 dndn_fx, cncn_fx, cncn_track[NB_TRACK_FCB_4T]; Word16 dndn_e, cncn_e, cncn_track_e[NB_TRACK_FCB_4T]; - Word16 s_coef_fx, bn_orig_fx[L_SUBFR_MAX]; - Word16 max_val_fx, temp_fx, sign_fx[L_SUBFR_MAX], max_track[MAX_NUM_INTER]; + Word16 s_coef_fx; + Word32 bn_orig_fx[L_SUBFR_MAX]; + Word32 max_val_fx, temp_fx, max_track[MAX_NUM_INTER]; + Word16 sign_fx[L_SUBFR_MAX]; Word16 beta1_fx, beta2_fx; - Word16 exp; + Word16 exp, exp1, shift, q_H; + Word64 s64; + Word16 flag = 0; + move16(); /*-----------------------------------------------------------------* * Initialization @@ -993,6 +1000,8 @@ void acelp_fast_fx( dndn_fx = BASOP_Util_Add_Mant32Exp( dndn_fx, dndn_e, L_mult0( dn_orig[i], dn_orig[i] ), exp, &dndn_e ); // Q(dndn_e) } + exp1 = sub( Q31, shl( sub( Q_dn, 1 ), 1 ) ); + cncn_fx = 214748365 /* 0.1f in Q31 */; move32(); cncn_e = 0; @@ -1007,7 +1016,7 @@ void acelp_fast_fx( FOR( i = 0; i < L_subfr; i += nb_tracks ) { - cncn_track[q] = BASOP_Util_Add_Mant32Exp( cncn_track[q], cncn_track_e[q], L_mult0( cn[i + q], cn[i + q] ), exp, &cncn_track_e[q] ); // Q(cncn_track_e[q]) + cncn_track[q] = BASOP_Util_Add_Mant32Exp( cncn_track[q], cncn_track_e[q], L_mult0( cn[i + q], cn[i + q] ), exp1, &cncn_track_e[q] ); // Q(cncn_track_e[q]) move16(); } cncn_fx = BASOP_Util_Add_Mant32Exp( cncn_fx, cncn_e, cncn_track[q], cncn_track_e[q], &cncn_e ); // Q(cncn_e) @@ -1016,13 +1025,13 @@ void acelp_fast_fx( Word16 tmp; s_coef_fx = BASOP_Util_Divide3232_Scale( dndn_fx, cncn_fx, &tmp ); tmp = add( tmp, sub( dndn_e, cncn_e ) ); - s_coef_fx = Sqrt16( s_coef_fx, &tmp ); // Q(tmp) + s_coef_fx = Sqrt16( s_coef_fx, &tmp ); // Q(15 - tmp) FOR( i = 0; i < L_subfr; i++ ) { - bn_orig_fx[i] = add( mult( s_coef_fx, cn[i] ), shr( i_mult_sat( beta1_fx, dn_orig[i] ), tmp ) ); // Q_dn - tmp - move16(); + bn_orig_fx[i] = L_mac0( L_shr( L_mult( s_coef_fx, cn[i] ), sub( 15, tmp ) ), beta1_fx, dn_orig[i] ); // Q_dn + move32(); IF( bn_orig_fx[i] >= 0 ) { @@ -1061,20 +1070,23 @@ void acelp_fast_fx( /*-----------------------------------------------------------------* * Approximate FI[i][j] by alp[abs(i-j)] and compute buffer alp_buf[]. *-----------------------------------------------------------------*/ - Word16 shift = add( shl( add( norm_s( H[0] ), 1 ), 1 ), 1 ); + q_H = sub( 14, norm_s( H[0] ) ); + shift = sub( shl( q_H, 1 ), 6 ); + alp = alp_buf + L_subfr; FOR( i = 0; i < L_subfr; i++ ) { - *alp = 0; - move16(); + s64 = 0; + move64(); FOR( j = i; j < L_subfr; j++ ) { - *alp = shr( add( *alp, mult( H[j], H[j - i] ) ), 1 ); - move16(); + s64 = W_mac0_16_16( s64, H[j], H[j - i] ); /* Q = shift + 6*/ } - alp_buf[L_subfr - i] = *alp++; + *alp = extract_l( W_extract_l( W_shr( s64, shift ) ) ); /*Q6*/ + move16(); + alp_buf[L_subfr - i] = *alp++; /*Q6*/ move16(); } @@ -1083,16 +1095,16 @@ void acelp_fast_fx( FOR( q = 0; q < nb_tracks; q++ ) { max_track[q] = 0; - move16(); + move32(); FOR( i = q; i < L_subfr; i += nb_tracks ) { - temp_fx = i_mult( bn_orig_fx[i], sign_fx[i] ); // Q_dn - tmp + temp_fx = imult3216( bn_orig_fx[i], sign_fx[i] ); // Q_dn - IF( GE_16( temp_fx, shr( max_track[q], tmp ) ) ) + IF( GE_32( temp_fx, max_track[q] ) ) { max_track[q] = temp_fx; // Q_dn - move16(); + move32(); m0_track[q] = i; move16(); } @@ -1121,25 +1133,24 @@ void acelp_fast_fx( /* skip certain tracks if number of pulses is lower than number of tracks */ IF( EQ_16( nb_pulse, 2 ) && EQ_16( nb_tracks, NB_TRACK_FCB_4T ) ) { - max_track[NB_TRACK_FCB_4T - 3] = shl( -1, Q_dn ); // Q_dn - move16(); - max_track[NB_TRACK_FCB_4T - 1] = shl( -1, Q_dn ); // Q_dn - move16(); + max_track[NB_TRACK_FCB_4T - 3] = L_shl( -1, Q_dn ); // Q_dn + move32(); + max_track[NB_TRACK_FCB_4T - 1] = L_shl( -1, Q_dn ); // Q_dn + move32(); } ELSE IF( EQ_16( nb_pulse, 3 ) && EQ_16( codetrackpos, TRACKPOS_FIXED_FIRST ) ) { - max_track[NB_TRACK_FCB_4T - 1] = shl( -1, Q_dn ); // Q_dn - move16(); + max_track[NB_TRACK_FCB_4T - 1] = L_shl( -1, Q_dn ); // Q_dn + move32(); } FOR( q = 0; q < nb_tracks; q++ ) { - i = maximum_fx( max_track, nb_tracks, &tmpF ); - move16(); + i = maximum_32_fx( max_track, nb_tracks, &L_tmp1 ); track_order[q] = i; move16(); - max_track[i] = shl( -1, Q_dn ); // Q_dn - move16(); + max_track[i] = L_shl( -1, Q_dn ); // Q_dn + move32(); } track_order[4] = track_order[1]; // Q0 @@ -1206,9 +1217,13 @@ void acelp_fast_fx( * Main searching loop *-----------------------------------------------------------------*/ - crit_num_max = -32768; + crit_num_max = MIN_32; // Q31 + move32(); + q_crit_num_max = Q31; move16(); - crit_den_max = 32767; + crit_den_max = MAX_32; // Q31 + move32(); + q_crit_den_max = Q31; move16(); skip_track_max = -1; move16(); @@ -1232,12 +1247,12 @@ void acelp_fast_fx( IF( EQ_16( nb_tracks, NB_TRACK_FCB_2T ) ) { - Gn = i_mult( s[0], dn_orig[m[0]] ); // Q_dn + Gn = i_mult( s[0], shr( dn_orig[m[0]], 1 ) ); // Q_dn - 1 + Gd = alp[0]; // Q6 move16(); - Gd = alp[0]; // exp(shift) - move16(); - G = div_s( Gn, Gd ); // Q_dn - shift - G = i_mult( G, s[0] ); + G = BASOP_Util_Divide1616_Scale( Gn, Gd, &exp1 ); // Q_dn -1 - 6 + 15 - exp1 = Q_dn - 6 + 14 - exp1 + G = i_mult( G, s[0] ); // Q_dn - 6 + 14 - exp1 + shift = sub( 14, exp1 ); track = track_order[q * nb_tracks + 1]; // Q0 move16(); @@ -1245,8 +1260,8 @@ void acelp_fast_fx( FOR( i = track; i < L_subfr; i += NB_TRACK_FCB_2T ) { - dn[i] = sub( dn_orig[i], shl( mult( G, *alp_pos0 ), shift ) ); // Q_dn - move16(); + dn[i] = L_sub( L_deposit_l( dn_orig[i] ), L_shr( L_mult0( G, *alp_pos0 ), shift ) ); // Q_dn + move32(); alp_pos0 = alp_pos0 + NB_TRACK_FCB_2T; } @@ -1255,12 +1270,12 @@ void acelp_fast_fx( } ELSE { - Gn = i_mult( s[0], dn_orig[m[0]] ); // Q_dn - Gd = alp[0]; // exp(shift) + Gn = i_mult( s[0], shr( dn_orig[m[0]], 1 ) ); // Q_dn - 1 + Gd = alp[0]; // Q6 move16(); - G = Gn; // Q_dn + G = Gn; // Q_dn - 1 move16(); - G = i_mult( G, s[0] ); + G = i_mult( G, s[0] ); // Q_dn - 1 track = track_order[q * nb_tracks + 1]; // Q0 move16(); @@ -1271,29 +1286,34 @@ void acelp_fast_fx( dndn_e = 0; move16(); + s64 = 0; + FOR( i = track; i < L_subfr; i += nb_tracks ) { - dn[i] = sub( mult( Gd, dn_orig[i] ), mult( G, *alp_pos0 ) ); // Q_dn - shift - move16(); + dn[i] = L_shr( L_msu( L_mult0( Gd, dn_orig[i] ), G, *alp_pos0 ), 6 ); // Q_dn + move32(); alp_pos0 += nb_tracks; - dndn_fx = BASOP_Util_Add_Mant32Exp( dndn_fx, dndn_e, L_mult0( dn[i], dn[i] ), add( exp, shl( shift, 1 ) ), &dndn_e ); // exp(dndn_e) + s64 = W_mac_32_32( s64, dn[i], dn[i] ); // 2 * Q_dn + 1 } - - s_coef_fx = BASOP_Util_Divide3232_Scale( dndn_fx, cncn_track[track], &tmp ); // exp(tmp) - tmp = add( tmp, sub( dndn_e, cncn_track_e[track] ) ); - s_coef_fx = Sqrt16( s_coef_fx, &tmp ); // exp(tmp) - tmp = sub( tmp, shift ); + exp1 = W_norm( s64 ); + dndn_fx = W_extract_h( W_shl( s64, exp1 ) ); // 2 * Q_dyn + exp1 - 31 + dndn_e = sub( 31, sub( add( shl( Q_dn, 1 ), exp ), 31 ) ); + + s_coef_fx = BASOP_Util_Divide3232_Scale( dndn_fx, cncn_track[track], &exp1 ); + exp1 = add( exp1, sub( dndn_e, cncn_track_e[track] ) ); + s_coef_fx = Sqrt16( s_coef_fx, &exp1 ); + exp1 = sub( exp1, shift ); max_val_fx = MIN_16; move16(); m[1] = track; // Q0 move16(); FOR( i = track; i < L_subfr; i += nb_tracks ) { - dn[i] = add( shl( mult( s_coef_fx, cn[i] ), tmp ), i_mult( beta2_fx, shr( dn[i], 2 ) ) ); // Q_dn - move16(); - temp_fx = i_mult( dn[i], sign_fx[i] ); // Q_dn + dn[i] = L_add( L_shr( L_mult( s_coef_fx, cn[i] ), sub( 15, exp1 ) ), L_shr( imult3216( dn[i], beta2_fx ), 2 ) ); // Q_dn + move32(); + temp_fx = imult3216( dn[i], sign_fx[i] ); // Q_dn - IF( GE_16( temp_fx, max_val_fx ) ) + IF( GE_32( temp_fx, max_val_fx ) ) { max_val_fx = temp_fx; // Q_dn move16(); @@ -1312,12 +1332,12 @@ void acelp_fast_fx( IF( GE_16( nb_pulse, 3 ) ) { - Gn = add( Gn, i_mult( s[1], dn_orig[m[1]] ) ); // Q_dn - Gd = add( Gd, add( alp[0], i_mult( i_mult( i_mult( 2, s[0] ), s[1] ), alp[m[0] - m[1]] ) ) ); // exp(shift) - G = Gn; // Q_dn + Gn = add( Gn, i_mult( s[1], shr( dn_orig[m[1]], 1 ) ) ); // Q_dn -1 + Gd = add( Gd, add( alp[0], i_mult( i_mult( shl( s[0], 1 ), s[1] ), alp[m[0] - m[1]] ) ) ); // Q6 + G = Gn; // Q_dn - 1 move16(); - G1 = i_mult( G, s[1] ); // Q_dn - G = i_mult( G, s[0] ); // Q_dn + G1 = i_mult( G, s[1] ); // Q_dn-1 + G = i_mult( G, s[0] ); // Q_dn-1 track = track_order[q * nb_tracks + 2]; // Q0 move16(); @@ -1326,8 +1346,8 @@ void acelp_fast_fx( FOR( i = track; i < L_subfr; i += nb_tracks ) { - dn[i] = sub( sub( mult( Gd, dn_orig[i] ), mult( G, *alp_pos0 ) ), mult( G1, *alp_pos1 ) ); // Q_dn - shift - move16(); + dn[i] = L_shr( L_msu( L_msu0( L_mult0( Gd, dn_orig[i] ), G, *alp_pos0 ), G1, *alp_pos1 ), 6 ); // Q_dn + move32(); alp_pos0 += nb_tracks; alp_pos1 += nb_tracks; } @@ -1342,13 +1362,13 @@ void acelp_fast_fx( IF( GE_16( nb_pulse, 4 ) ) { - Gn = add( Gn, i_mult( s[2], dn_orig[m[2]] ) ); // Q_dn - Gd = add( Gd, add( add( alp[0], i_mult( i_mult( i_mult( 2, s[0] ), s[2] ), alp[m[0] - m[2]] ) ), i_mult( i_mult( i_mult( 2, s[1] ), s[2] ), alp[m[1] - m[2]] ) ) ); // exp(shift) - G = Gn; + Gn = add( Gn, i_mult( s[2], shr( dn_orig[m[2]], 1 ) ) ); // Q_dn-1 + Gd = add( Gd, add( add( alp[0], i_mult( i_mult( shl( s[0], 1 ), s[2] ), alp[m[0] - m[2]] ) ), i_mult( i_mult( shl( s[1], 1 ), s[2] ), alp[m[1] - m[2]] ) ) ); // Q6 + G = Gn; // Q_dn-1 move16(); - G1 = i_mult( G, s[1] ); - G2 = i_mult( G, s[2] ); - G = i_mult( G, s[0] ); + G1 = i_mult( G, s[1] ); // Q_dn-1 + G2 = i_mult( G, s[2] ); // Q_dn-1 + G = i_mult( G, s[0] ); // Q_dn-1 track = track_order[q * nb_tracks + 3]; move16(); @@ -1358,8 +1378,8 @@ void acelp_fast_fx( FOR( i = track; i < L_subfr; i += nb_tracks ) { - dn[i] = sub( sub( sub( mult( Gd, dn_orig[i] ), mult( G, *alp_pos0 ) ), mult( G1, *alp_pos1 ) ), mult( G2, *alp_pos2 ) ); // Q_dn - shift - move16(); + dn[i] = L_shr( L_msu( L_msu( L_msu( L_mult0( Gd, dn_orig[i] ), G, *alp_pos0 ), G1, *alp_pos1 ), G2, *alp_pos2 ), 6 ); // Q_dn + move32(); alp_pos0 += nb_tracks; alp_pos1 += nb_tracks; alp_pos2 += nb_tracks; @@ -1380,13 +1400,13 @@ void acelp_fast_fx( IF( GE_16( nb_pulse, 5 ) ) { - Gn = add( Gn, i_mult( s[3], dn_orig[m[3]] ) ); // Q_dn - Gd = add( Gd, add( add( add( alp[0], i_mult( i_mult( i_mult( 2, s[0] ), s[3] ), alp[m[0] - m[3]] ) ), i_mult( i_mult( i_mult( 2, s[1] ), s[3] ), alp[m[1] - m[3]] ) ), i_mult( i_mult( i_mult( 2, s[2] ), s[3] ), alp[m[2] - m[3]] ) ) ); // Q_dn - shift - G = Gn; - G1 = i_mult( G, s[1] ); // Q_dn - G2 = i_mult( G, s[2] ); // Q_dn - G3 = i_mult( G, s[3] ); // Q_dn - G = i_mult( G, s[0] ); // Q_dn + Gn = add( Gn, i_mult( s[3], shr( dn_orig[m[3]], 1 ) ) ); // Q_dn-1 + Gd = add( Gd, add( add( add( alp[0], i_mult( i_mult( shl( s[0], 1 ), s[3] ), alp[m[0] - m[3]] ) ), i_mult( i_mult( shl( s[1], 1 ), s[3] ), alp[m[1] - m[3]] ) ), i_mult( i_mult( shl( s[2], 1 ), s[3] ), alp[m[2] - m[3]] ) ) ); // Q6 + G = Gn; // Q_dn-1 + G1 = i_mult( G, s[1] ); // Q_dn-1 + G2 = i_mult( G, s[2] ); // Q_dn-1 + G3 = i_mult( G, s[3] ); // Q_dn-1 + G = i_mult( G, s[0] ); // Q_dn-1 IF( EQ_16( cdk_index, 6 ) ) { @@ -1400,8 +1420,8 @@ void acelp_fast_fx( FOR( i = track; i < L_subfr; i += nb_tracks ) { - dn[i] = sub( sub( sub( sub( mult( Gd, dn_orig[i] ), mult( G, *alp_pos0 ) ), mult( G1, *alp_pos1 ) ), mult( G2, *alp_pos2 ) ), mult( G3, *alp_pos3 ) ); // Q_dn - shift - move16(); + dn[i] = L_shr( L_msu( L_msu( L_msu( L_msu( L_mult0( Gd, dn_orig[i] ), G, *alp_pos0 ), G1, *alp_pos1 ), G2, *alp_pos2 ), G3, *alp_pos3 ), 6 ); // Q_dn + move32(); alp_pos0 += nb_tracks; alp_pos1 += nb_tracks; alp_pos2 += nb_tracks; @@ -1420,7 +1440,7 @@ void acelp_fast_fx( FOR( i = 0; i < L_subfr; i++ ) { - dn[i] = sub( sub( sub( sub( mult( Gd, dn_orig[i] ), mult( G, *alp_pos0 ) ), mult( G1, *alp_pos1 ) ), mult( G2, *alp_pos2 ) ), mult( G3, *alp_pos3 ) ); + dn[i] = L_shr( L_msu( L_msu( L_msu( L_msu( L_mult0( Gd, dn_orig[i] ), G, *alp_pos0 ), G1, *alp_pos1 ), G2, *alp_pos2 ), G3, *alp_pos3 ), 6 ); /*Q_dn*/ move16(); alp_pos0++; alp_pos1++; @@ -1428,8 +1448,7 @@ void acelp_fast_fx( alp_pos3++; } - Word32 L_tmp; - i = emaximum_fx( 0, dn, L_subfr, &L_tmp ); + i = emaximum_32fx( Q_dn, dn, L_subfr, &L_tmp1 ); track = i % nb_tracks; move16(); @@ -1462,22 +1481,72 @@ void acelp_fast_fx( FOR( i = 0; i < L_subfr; i++ ) { - y_tmp[i] = add( y_tmp[i], *p_hn++ ); // exp(shift) + y_tmp[i] = add( y_tmp[i], *p_hn++ ); // q_H move16(); } - crit_num = L_add( crit_num, L_mult0( s[j], dn_orig[m[j]] ) ); // Q_dn + crit_num = L_mac0( crit_num, s[j], dn_orig[m[j]] ); // Q_dn + } + + s64 = W_mult0_32_32( crit_num, crit_num ); // 2*Q_dn + exp = W_norm( s64 ); + crit_num = W_extract_h( W_shl( s64, exp ) ); // 2*Q_dn + exp - 32 + q_crit_num = add( shl( Q_dn, 1 ), sub( exp, 32 ) ); + + // crit_den = sum2_fx( y_tmp, L_subfr ); // 2*q_H + s64 = 0; + move64(); + FOR( i = 0; i < L_subfr; i++ ) + { + s64 = W_mac0_16_16( s64, y_tmp[i], y_tmp[i] ); + } + exp1 = W_norm( s64 ); + crit_den = W_extract_h( W_shl( s64, exp1 ) ); // 2*q_H + exp1 - 32 + q_crit_den = add( shl( q_H, 1 ), sub( exp1, 32 ) ); + + L_tmp1 = Mpy_32_32( crit_num, crit_den_max ); + exp = sub( add( q_crit_num, q_crit_den_max ), 31 ); + L_tmp2 = Mpy_32_32( crit_den, crit_num_max ); + exp1 = sub( add( q_crit_den, q_crit_num_max ), 31 ); + + IF( GT_16( exp, exp1 ) ) + { + IF( GE_32( L_shr( L_tmp1, sub( exp, exp1 ) ), L_tmp2 ) ) + { + flag = 1; + move16(); + } + ELSE + { + flag = 0; + move16(); + } + } + ELSE + { + IF( GE_32( L_tmp1, L_shr( L_tmp2, sub( exp1, exp ) ) ) ) + { + flag = 1; + move16(); + } + ELSE + { + flag = 0; + move16(); + } } - crit_num = W_extract_l( W_mult_32_32( crit_num, crit_num ) ); // 2*Q_dn+1 - crit_den = sum2_fx( y_tmp, L_subfr ); // 2*exp(shift) - IF( GE_32( Mpy_32_32( crit_num, crit_den_max ), Mpy_32_32( crit_den, crit_num_max ) ) ) + IF( flag ) { crit_num_max = crit_num; move32(); + q_crit_num_max = q_crit_num; + move16(); crit_den_max = crit_den; move32(); + q_crit_den_max = q_crit_den; + move16(); FOR( j = 0; j < nb_pulse; j++ ) { @@ -1487,7 +1556,7 @@ void acelp_fast_fx( move16(); } - Copy( y_tmp, y, L_subfr ); // exp(shift) + Copy( y_tmp, y, L_subfr ); // q_H skip_track_max = skip_track[q]; move16(); } @@ -1504,7 +1573,7 @@ void acelp_fast_fx( code[m_max[q]] = add( code[m_max[q]], s_max[q] ); // Q0 move16(); } - + test(); IF( EQ_16( bits, 12 ) || EQ_16( bits, 14 ) ) { /* 12 bits, 2 pulses, 2 tracks 11 used all tracks */ @@ -1606,7 +1675,7 @@ void acelp_fast_fx( IF( NE_16( ind_stream[j], -1 ) ) { idx = quant_1p_N1_L_subfr( nb_pos, ind_stream[j], bits_track ); - push_indice( hBstr, IND_ALG_CDBK_4T64, idx, bits_track + 1 ); + push_indice( hBstr, IND_ALG_CDBK_4T64, idx, add( bits_track, 1 ) ); } } } @@ -1618,12 +1687,12 @@ void acelp_fast_fx( IF( EQ_16( q, skip_track_max ) ) { idx = quant_2p_2N1( ind_stream[j], ind_stream[j + 1], bits_track ); - push_indice( hBstr, IND_ALG_CDBK_4T64, idx, ( 2 * bits_track ) + 1 ); + push_indice( hBstr, IND_ALG_CDBK_4T64, idx, add( shl( bits_track, 1 ), 1 ) ); } ELSE { idx = quant_1p_N1_L_subfr( nb_pos, ind_stream[j], bits_track ); - push_indice( hBstr, IND_ALG_CDBK_4T64, idx, bits_track + 1 ); + push_indice( hBstr, IND_ALG_CDBK_4T64, idx, add( bits_track, 1 ) ); } } } diff --git a/lib_enc/core_enc_init.c b/lib_enc/core_enc_init.c index efe5ecbdaae4eb8d6eee813c6193f71f9bd76a66..887fb759b425dc60845f80054fe88da166afcee2 100644 --- a/lib_enc/core_enc_init.c +++ b/lib_enc/core_enc_init.c @@ -710,7 +710,8 @@ static void init_tcx_ivas_fx( hTcxEnc->kernel_switch_corr_past = 0; hTcxEnc->kernel_symmetry_past = 0; /* MDCT_IV & 1 */ hTcxEnc->enc_ste_pre_corr_past = 0; - hTcxEnc->tfm_mem_fx = 12288 /*0.75 in Q14*/; + // hTcxEnc->tfm_mem_fx = 12288 /*0.75 in Q14*/; + hTcxEnc->tfm_mem_fx = 1610612736; #ifdef IVAS_FLOAT_FIXED_CONVERSIONS hTcxEnc->tfm_mem = 0.75f; #endif diff --git a/lib_enc/igf_enc.c b/lib_enc/igf_enc.c index 178b1196e19812574147cab5e3c5ba7a52f1f0a0..6a8f3f3f95119a56fbe37fb1753ffdb244600ad1 100644 --- a/lib_enc/igf_enc.c +++ b/lib_enc/igf_enc.c @@ -50,6 +50,7 @@ #define INV_Log2_10_Q15 9864 /*1/log2(10) in Q15*/ +#define INV_Log2_10_Q12 1233 /*1/log2(10) in Q12*/ #define INV_Log2_e_Q15 22713 /*1/log2(e) in Q15*/ /*-------------------------------------------------------------------* * IGF_write_bit() @@ -532,7 +533,7 @@ static Word32 IGF_getTNR_fx( tmp_e = add( tmp_e, sub( tonal_e, noise_e ) ); IF( GE_16( temp, 1 ) ) { - tonalToNoise = L_shr( imult3216( Mult_32_16( L_add( BASOP_Util_Log2( temp ), L_shl( add( 16, tmp_e ), Q25 ) ) /*Q25*/, INV_Log2_10_Q15 ) /*25+15-15*/, 20 ) /*25*/, 3 ); /*Q22*/ + tonalToNoise = imult3216( Mult_32_16( L_add( BASOP_Util_Log2( temp ), L_shl( add( 16, tmp_e ), Q25 ) ) /*Q25*/, INV_Log2_10_Q12 ) /*25+12-15*/, 20 ); /*Q22*/ } ELSE { diff --git a/lib_enc/init_enc.c b/lib_enc/init_enc.c index 9edf656240f2d920abd77aaa1d0a6767610a3334..5ea9b7b3c37e021efb8c4e21e1ffdc710c2ef73e 100644 --- a/lib_enc/init_enc.c +++ b/lib_enc/init_enc.c @@ -356,6 +356,8 @@ ivas_error init_encoder( set16_fx( st->mem_decim_fx, 0, 2 * L_FILT_MAX ); st->input_buff_fx = st->hSignalBuf->input_buff; st->input_buff32_fx = st->hSignalBuf->input_buff32; + set32_fx( st->hSignalBuf->input_buff32, 0, L_FRAME48k + L_FRAME48k + NS2SA( 48000, DELAY_FIR_RESAMPL_NS ) ); + set16_fx( st->hSignalBuf->input_buff, 0, L_FRAME48k + L_FRAME48k + NS2SA( 48000, DELAY_FIR_RESAMPL_NS ) ); #endif st->input_buff = st->hSignalBuf->input_buff_flt; set_zero( st->input_buff, L_FRAME48k + L_FRAME48k + NS2SA( 48000, DELAY_FIR_RESAMPL_NS ) ); @@ -1312,7 +1314,7 @@ ivas_error init_encoder_ivas_fx( return IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for Noise estimation\n" ); } - noise_est_init_fx( st->hNoiseEst ); + noise_est_init_ivas_fx( st->hNoiseEst ); } ELSE { diff --git a/lib_enc/ivas_core_enc.c b/lib_enc/ivas_core_enc.c index 5cb7de2f685f3b42a6e3d9dee47f9f91dee8ffbb..f75c8db80f5ba2ddc9986343c0d3428ea6b006a5 100644 --- a/lib_enc/ivas_core_enc.c +++ b/lib_enc/ivas_core_enc.c @@ -208,12 +208,127 @@ ivas_error ivas_core_enc( /*---------------------------------------------------------------------* * Pre-processing, incl. Decision matrix *---------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS - if ( ( error = pre_proc_ivas( st, last_element_mode, element_brate, ivas_format == SBA_FORMAT ? last_element_brate : element_brate, input_frame, old_inp_12k8[n], old_inp_16k[n], &inp[n], &ener[n], A[n], Aw[n], epsP[n], lsp_new[n], lsp_mid[n], new_inp_resamp16k[n], &Voicing_flag[n], old_wsp[n], loc_harm[n], cor_map_sum[n], vad_flag_dtx[n], enerBuffer[n], fft_buff[n], MCT_flag, vad_hover_flag[n], flag_16k_smc ) ) != IVAS_ERR_OK ) + Word32 enerBuffer_fx[CLDFB_NO_CHANNELS_MAX]; + Word16 fft_buff_fx[2 * L_FFT]; + Word32 cor_map_sum_fx = 0; + Word16 exp_cor_map_sum = 0; + Word16 q_fft_buff; + Word16 e_enerBuffer; + + f2me( cor_map_sum[n], &cor_map_sum_fx, &exp_cor_map_sum ); + + /*Decision_matrix enc*/ + q_fft_buff = Q_factor_arr( fft_buff[n], ( 2 * L_FFT ) ); + if ( q_fft_buff >= 2 ) + { + q_fft_buff -= 3; + } + else + { + q_fft_buff -= 4; + } + floatToFixed_arr( fft_buff[n], fft_buff_fx, q_fft_buff, ( 2 * L_FFT ) ); + f2me_buf( enerBuffer[n], enerBuffer_fx, &e_enerBuffer, (Word32) CLDFB_NO_CHANNELS_MAX ); + Word16 tmp_shift = find_guarded_bits_fx( 5 ); // Computing guraded bits necessary in the energyBuffer + scale_sig32( enerBuffer_fx, CLDFB_NO_CHANNELS_MAX, -tmp_shift ); // Computing the shift as per guarded bits in the energyBuffer + e_enerBuffer += tmp_shift; // Shifting the exponent of energyBuffer with the tmp_shift + floatToFixed_arrL( st->Bin_E_old, st->Bin_E_old_fx, Q_factor_arrL( st->Bin_E_old, 129 ), 129 ); + + if ( st->element_mode != IVAS_SCE && !st->low_rate_mode && !( st->total_brate > MAX_ACELP_BRATE ) && st->element_mode != IVAS_CPE_MDCT && st->coder_type != INACTIVE && st->sp_aud_decision1 != 0 && st->sp_aud_decision2 != 0 && st->sp_aud_decision1 != 1 && st->sp_aud_decision2 != 0 && !( st->element_mode == IVAS_CPE_TD || st->sp_aud_decision0 == 0 ) ) + { + if ( st->hTcxEnc != NULL ) + { + st->hTcxEnc->clas_sec_old_fx = float_to_fix16( st->hTcxEnc->clas_sec_old_flt, Q13 ); + } + } + + /*setModeidx*/ + IF( st->envWeighted && !st->enableTcxLpc ) + { + /* Unweight the envelope */ + floatToFixed_arr( st->lsp_old, st->lsp_old_fx, Q15, M ); + st->gamma = (Word16) floatToFixed( st->gamma_flt, Q14 ); + } + + /*setTcxmode*/ + IF( NE_16( st->element_mode, IVAS_CPE_MDCT ) ) + { + Word16 q_com = Q31; + q_com = L_get_q1( st->prevEnergyHF ); + q_com = s_min( q_com, L_get_q1( st->currEnergyHF ) ); + st->prevEnergyHF_fx = floatToFixed_32( st->prevEnergyHF, q_com ); + st->currEnergyHF_fx = floatToFixed_32( st->currEnergyHF, q_com ); + st->currEnergyHF_e_fx = sub( Q31, q_com ); + move16(); + move16(); + move16(); + IF( st->hTranDet ) + { + floatToFixed_arr( st->hTranDet->subblockEnergies.subblockNrgChange_flt, st->hTranDet->subblockEnergies.subblockNrgChange, Q15 - NRG_CHANGE_E, NSUBBLOCKS + MAX_TD_DELAY ); + } + IF( st->hTcxEnc ) + { + st->hTcxEnc->tfm_mem_fx = floatToFixed_32( st->hTcxEnc->tfm_mem, Q31 ); + move16(); + st->hTcxEnc->tcxltp_norm_corr_past = float_to_fix16( st->hTcxEnc->tcxltp_norm_corr_past_flt, Q15 ); + move16(); + } + } +#endif + if ( ( error = pre_proc_ivas( st, last_element_mode, element_brate, ivas_format == SBA_FORMAT ? last_element_brate : element_brate, input_frame, old_inp_12k8[n], old_inp_16k[n], + &inp[n], &ener[n], A[n], Aw[n], epsP[n], lsp_new[n], lsp_mid[n], new_inp_resamp16k[n], &Voicing_flag[n], old_wsp[n], loc_harm[n], +#ifndef IVAS_FLOAT_FIXED + cor_map_sum[n], +#endif + vad_flag_dtx[n], +#ifndef IVAS_FLOAT_FIXED + enerBuffer[n], + fft_buff[n], +#endif + MCT_flag, vad_hover_flag[n], flag_16k_smc +#ifdef IVAS_FLOAT_FIXED + , + enerBuffer_fx, + e_enerBuffer, + fft_buff_fx, + cor_map_sum_fx, + exp_cor_map_sum +#endif + ) ) != IVAS_ERR_OK ) { return error; } +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + /*decison matrix*/ + if ( st->element_mode != IVAS_SCE && !st->low_rate_mode && !( st->total_brate > MAX_ACELP_BRATE ) && st->element_mode != IVAS_CPE_MDCT && st->coder_type != INACTIVE && st->sp_aud_decision1 != 0 && st->sp_aud_decision2 != 0 && st->sp_aud_decision1 != 1 && st->sp_aud_decision2 != 0 && !( st->element_mode == IVAS_CPE_TD || st->sp_aud_decision0 == 0 ) ) + { + if ( st->hTcxEnc != NULL ) + { + st->hTcxEnc->clas_sec_old_flt = fix16_to_float( st->hTcxEnc->clas_sec_old_fx, Q13 ); + } + } + + /*setmode_index*/ + IF( st->envWeighted && !st->enableTcxLpc ) + { + fixedToFloat_arr( st->lsp_old_fx, st->lsp_old, Q15, M ); + fixedToFloat_arr( st->lsf_old_fx, st->lsf_old, Q15, M ); + } + + /*SetTCXModeInfo_ivas_fx*/ + IF( NE_16( st->element_mode, IVAS_CPE_MDCT ) ) + { + if ( st->hTcxEnc ) + { + st->hTcxEnc->tfm_mem = fixedToFloat_32( st->hTcxEnc->tfm_mem_fx, Q31 ); + move16(); + } + } +#endif + if ( st->element_mode == IVAS_CPE_MDCT || st->element_mode == IVAS_SCE ) { st->enablePlcWaveadjust = 0; @@ -572,7 +687,7 @@ ivas_error ivas_core_enc( { FOR( i = 0; i < L_FRAME32k; i++ ) { - st->hTcxEnc->old_out_fx[i] = float_to_fix16( st->hTcxEnc->old_out[i], 0 ); + st->hTcxEnc->old_out_fx[i] = (Word16) st->hTcxEnc->old_out[i] * ( 1 << 0 ); } } @@ -1059,11 +1174,10 @@ ivas_error ivas_core_enc( fb_tbe_enc( st, st->input, fb_exc ); #else #ifdef IVAS_FLOAT_FIXED_CONVERSIONS - Word16 Q_input; Q_fb_exc = Q_factor_arr( fb_exc, L_FRAME16k ); floatToFixed_arr( fb_exc, fb_exc_fx, Q_fb_exc, L_FRAME16k ); // Q_input is being calculated inside already - Q_input = 0; + Word16 Q_input = 0; floatToFixed_arr( st->input_buff, st->input_buff_fx, Q_input, L_FRAME48k + L_FRAME48k + NS2SA( 48000, DELAY_FIR_RESAMPL_NS ) ); Word16 len_old_input_fhb_fx = NS2SA( 48000, ACELP_LOOK_NS + DELAY_FD_BWE_ENC_12k8_NS + DELAY_FIR_RESAMPL_NS ) - L_FRAME48k / 2; st->hBWE_TD->old_input_fhb_fx_Q = Q_factor_arr( st->hBWE_TD->old_input_fhb, len_old_input_fhb_fx ); diff --git a/lib_enc/ivas_core_pre_proc.c b/lib_enc/ivas_core_pre_proc.c index e0c293aa2180a9a4b1e4c46b7df9a896ac4d8af4..46062b0dfad478d0c2fb95e504b76dd8e97d6117 100644 --- a/lib_enc/ivas_core_pre_proc.c +++ b/lib_enc/ivas_core_pre_proc.c @@ -42,6 +42,7 @@ #include "prot_fx.h" #include "ivas_prot_fx.h" #include "prot_fx_enc.h" +#include "basop_util.h" /*-------------------------------------------------------------------* * pre_proc_ivas() @@ -51,31 +52,43 @@ *--------------------------------------------------------------------*/ ivas_error pre_proc_ivas( - Encoder_State *st, /* i/o: encoder state structure */ - const int16_t last_element_mode, /* i : last element mode */ - const int32_t element_brate, /* i : element bitrate */ - const int32_t last_element_brate, /* i : last element bitrate */ - const int16_t input_frame, /* i : frame length */ - float old_inp_12k8[], /* i/o: buffer of old input signal */ - float old_inp_16k[], /* i/o: buffer of old input signal @ 16kHz */ - float **inp, /* o : ptr. to inp. signal in the current frame*/ - float *ener, /* o : residual energy from Levinson-Durbin */ - float A[NB_SUBFR16k * ( M + 1 )], /* i/o: A(z) unquantized for the 4 subframes */ - float Aw[NB_SUBFR16k * ( M + 1 )], /* i/o: weighted A(z) unquantized for subframes */ - float epsP[M + 1], /* i/o: LP prediction errors */ - float lsp_new[M], /* i/o: LSPs at the end of the frame */ - float lsp_mid[M], /* i/o: LSPs in the middle of the frame */ - float *new_inp_resamp16k, /* o : new input signal @16kHz, non pre-emphasised, used by the WB TBE/BWE */ - int16_t *Voicing_flag, /* o : voicing flag for HQ FEC */ - const float old_wsp[], /* i : weighted input signal buffer */ - const int16_t loc_harm, /* i : harmonicity flag */ - const float cor_map_sum, /* i : speech/music clasif. parameter */ - const int16_t vad_flag_dtx, /* i : HE-SAD flag with additional DTX HO */ + Encoder_State *st, /* i/o: encoder state structure */ + const int16_t last_element_mode, /* i : last element mode */ + const int32_t element_brate, /* i : element bitrate */ + const int32_t last_element_brate, /* i : last element bitrate */ + const int16_t input_frame, /* i : frame length */ + float old_inp_12k8[], /* i/o: buffer of old input signal */ + float old_inp_16k[], /* i/o: buffer of old input signal @ 16kHz */ + float **inp, /* o : ptr. to inp. signal in the current frame*/ + float *ener, /* o : residual energy from Levinson-Durbin */ + float A[NB_SUBFR16k * ( M + 1 )], /* i/o: A(z) unquantized for the 4 subframes */ + float Aw[NB_SUBFR16k * ( M + 1 )], /* i/o: weighted A(z) unquantized for subframes */ + float epsP[M + 1], /* i/o: LP prediction errors */ + float lsp_new[M], /* i/o: LSPs at the end of the frame */ + float lsp_mid[M], /* i/o: LSPs in the middle of the frame */ + float *new_inp_resamp16k, /* o : new input signal @16kHz, non pre-emphasised, used by the WB TBE/BWE */ + int16_t *Voicing_flag, /* o : voicing flag for HQ FEC */ + const float old_wsp[], /* i : weighted input signal buffer */ + const int16_t loc_harm, /* i : harmonicity flag */ +#ifndef IVAS_FLOAT_FIXED + const float cor_map_sum, /* i : speech/music clasif. parameter */ +#endif + const int16_t vad_flag_dtx, /* i : HE-SAD flag with additional DTX HO */ +#ifndef IVAS_FLOAT_FIXED /*const*/ float enerBuffer[CLDFB_NO_CHANNELS_MAX], /* i : energy buffer */ /*const*/ float fft_buff[2 * L_FFT], /* i : FFT buffer */ - const int16_t MCT_flag, /* i : hMCT handle allocated (1) or not (0) */ - const int16_t vad_hover_flag, /* i : VAD hangover flag */ - const int16_t flag_16k_smc /* i : flag to indicate if the OL SMC is run at 16 kHz */ +#endif + const int16_t MCT_flag, /* i : hMCT handle allocated (1) or not (0) */ + const int16_t vad_hover_flag, /* i : VAD hangover flag */ + const int16_t flag_16k_smc /* i : flag to indicate if the OL SMC is run at 16 kHz */ +#ifdef IVAS_FLOAT_FIXED + , + Word32 enerBuffer_fx[CLDFB_NO_CHANNELS_MAX], + Word16 e_enerBuffer, + Word16 fft_buff_fx[2 * L_FFT], + Word32 cor_map_sum_fx, + Word16 exp_cor_map_sum +#endif ) { int16_t L_look, element_mode, lMemRecalc_12k8; @@ -94,6 +107,37 @@ ivas_error pre_proc_ivas( element_mode = st->element_mode; +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS1 + Word16 old_inp_16k_fx[L_INP_MEM]; + Word16 exp_old_inp_16k = 0; + Word16 old_inp_12k8_fx[496]; + Word16 exp_old_inp_12k8 = 0; + Word16 old_wsp_fx[L_WSP]; + Word16 exp_old_wsp = 0; + Word16 A_fx[NB_SUBFR16k * ( M + 1 )], exp_A = 0; + Word16 Aw_fx[NB_SUBFR16k * ( M + 1 )], exp_Aw = 0; + Word16 lsp_new_fx[M], exp_lsp_new = 0; + Word16 lsp_mid_fx[M], exp_lsp_mid = 0; + Word16 epsP_fx[M + 1], exp_epsP = 0; + Word32 enerBuffer_fx[CLDFB_NO_CHANNELS_MAX]; + Word16 fft_buff_fx[2 * L_FFT]; + Word32 cor_map_sum_fx = 0; + Word16 exp_cor_map_sum = 0; + Word16 q_fft_buff; + Word16 e_enerBuffer; + + /*input buffers*/ + f2me_buf_16( st->old_inp_16k, old_inp_16k_fx, &exp_old_inp_16k, L_INP_MEM ); + f2me_buf_16( old_inp_12k8, old_inp_12k8_fx, &exp_old_inp_12k8, 496 ); + f2me_buf_16( old_wsp, old_wsp_fx, &exp_old_wsp, L_WSP ); + f2me_buf_16( A, A_fx, &exp_A, NB_SUBFR16k * ( M + 1 ) ); + f2me_buf_16( Aw, Aw_fx, &exp_Aw, NB_SUBFR16k * ( M + 1 ) ); + f2me_buf_16( lsp_new, lsp_new_fx, &exp_lsp_new, M ); + f2me_buf_16( lsp_mid, lsp_mid_fx, &exp_lsp_mid, M ); + f2me_buf_16( epsP, epsP_fx, &exp_epsP, M + 1 ); + f2me( cor_map_sum, &cor_map_sum_fx, &exp_cor_map_sum ); +#endif + new_inp_12k8 = old_inp_12k8 + L_INP_MEM; /* pointer to new samples of the input signal in 12.8kHz core */ inp_12k8 = new_inp_12k8 - L_LOOK_12k8; @@ -109,11 +153,127 @@ ivas_error pre_proc_ivas( { lMemRecalc_12k8 = NS2SA( INT_FS_12k8, L_MEM_RECALC_NS ); } - /*----------------------------------------------------------------* * Selection of internal ACELP Fs (12.8 kHz or 16 kHz) *----------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + IF( EQ_32( st->core_brate, FRAME_NO_DATA ) ) + { + /* prevent "L_frame" changes in CNG segments */ + st->L_frame = st->last_L_frame; + move16(); + } + ELSE IF( EQ_32( st->core_brate, SID_2k40 ) && GE_16( st->bwidth, WB ) && st->hDtxEnc->first_CNG && ( st->hTdCngEnc != NULL && LT_16( st->hTdCngEnc->act_cnt2, MIN_ACT_CNG_UPD ) ) ) + { + /* prevent "L_frame" changes in SID frame after short segment of active frames */ + st->L_frame = st->hDtxEnc->last_CNG_L_frame; + move16(); + } + ELSE IF( ( ( EQ_16( st->element_mode, IVAS_CPE_MDCT ) && GE_32( st->element_brate, IVAS_64k ) && GE_16( st->bwidth, SWB ) ) || ( EQ_16( element_mode, IVAS_SCE ) && GT_32( st->total_brate, MAX_ACELP_BRATE ) && GE_16( st->bwidth, SWB ) ) ) && NE_32( st->core_brate, SID_2k40 ) ) + { + st->L_frame = L_FRAME32k; + move16(); + } + ELSE IF( GE_16( st->bwidth, SWB ) && GT_32( st->total_brate, MAX_ACELP_BRATE_ISM ) && LE_32( st->total_brate, MAX_ACELP_BRATE ) && EQ_16( element_mode, IVAS_SCE ) && st->is_ism_format && st->tcxonly && NE_32( st->core_brate, SID_2k40 ) ) + { + st->L_frame = L_FRAME25_6k; + move16(); + } + ELSE IF( st->flag_ACELP16k ) + { + st->L_frame = L_FRAME16k; + move16(); + } + ELSE + { + st->L_frame = L_FRAME; + move16(); + } + + test(); + test(); + test(); + Word16 flag_1 = 0; + IF( EQ_16( st->L_frame, L_FRAME16k ) ) + { + flag_1 = ACELP_16k40; + move16(); + } + ELSE + { + flag_1 = ACELP_9k60; + move16(); + } + IF( st->hFdCngEnc != NULL && NE_16( st->element_mode, IVAS_CPE_MDCT ) && ( ( NE_16( st->hFdCngEnc->hFdCngCom->frameSize, st->L_frame ) ) || ( NE_16( st->hFdCngEnc->hFdCngCom->CngBandwidth, st->input_bwidth ) ) ) ) + { + configureFdCngEnc_ivas_fx( st->hFdCngEnc, max( st->input_bwidth, WB ), flag_1 ); + } + + IF( st->ini_frame == 0 ) + { + /* avoid switching of internal ACELP Fs in the very first frame */ + st->last_L_frame = st->L_frame; + move16(); + } + + IF( EQ_16( st->L_frame, L_FRAME ) ) + { +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + st->gamma_flt = GAMMA1_FLT; + st->preemph_fac_flt = PREEMPH_FAC_FLT; +#endif + st->gamma = GAMMA1; + st->preemph_fac = PREEMPH_FAC; + move16(); + move16(); + } + ELSE IF( EQ_16( st->L_frame, L_FRAME32k ) ) + { + +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + st->gamma_flt = GAMMA16k_FLT; + st->preemph_fac_flt = PREEMPH_FAC_SWB_FLT; +#endif + st->gamma = GAMMA16k; + st->preemph_fac = PREEMPH_FAC_SWB; + move16(); + move16(); + } + ELSE + { +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + st->gamma_flt = GAMMA16k_FLT; + st->preemph_fac_flt = PREEMPH_FAC_16k_FLT; +#endif + st->gamma = GAMMA16k; + st->preemph_fac = PREEMPH_FAC_16k; + move16(); + move16(); + } + + st->sr_core = L_mult0( st->L_frame, FRAMES_PER_SEC ); + st->encoderLookahead_enc = NS2SA_FX2( st->sr_core, ACELP_LOOK_NS ); + st->encoderPastSamples_enc = shr( ( imult1616( st->L_frame, 9 ) ), 4 ); + move32(); + move16(); + move16(); +#else if ( st->core_brate == FRAME_NO_DATA ) { /* prevent "L_frame" changes in CNG segments */ @@ -175,14 +335,92 @@ ivas_error pre_proc_ivas( st->sr_core = st->L_frame * FRAMES_PER_SEC; st->encoderLookahead_enc = NS2SA( st->sr_core, ACELP_LOOK_NS ); st->encoderPastSamples_enc = ( st->L_frame * 9 ) >> 4; - +#endif /*-----------------------------------------------------------------* * coder_type rewriting in case of switching * IC frames selection * enforce TC frames in case of switching *-----------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + /* enforce TRANSITION frames */ + IF( !( EQ_16( st->element_mode, IVAS_CPE_TD ) && EQ_16( st->idchan, 1 ) ) && NE_16( st->last_L_frame, st->L_frame ) && NE_32( st->core_brate, FRAME_NO_DATA ) && NE_32( st->core_brate, SID_2k40 ) && NE_32( st->last_core_brate, FRAME_NO_DATA ) && NE_32( st->last_core_brate, SID_2k40 ) && NE_16( st->coder_type_raw, VOICED ) ) + { + /* enforce TC frame in case of ACELP@12k8 <-> ACELP@16k core switching */ + st->coder_type = TRANSITION; + move16(); + } + ELSE IF( EQ_16( st->last_core, HQ_CORE ) && NE_16( st->coder_type_raw, VOICED ) ) + { + /* enforce TC frame in case of HQ -> ACELP core switching */ + st->coder_type = TRANSITION; + move16(); + } + ELSE IF( LE_32( st->last_core_brate, SID_2k40 ) && EQ_16( st->cng_type, FD_CNG ) && !( EQ_16( element_mode, IVAS_CPE_TD ) ) ) + { + /* enforce TC frame in case of FD_CNG -> ACELP switching (past excitation not available) */ + st->coder_type = TRANSITION; + move16(); + } + /* select INACTIVE frames */ + ELSE IF( LE_32( st->total_brate, MAX_GSC_INACTIVE_BRATE ) && st->vad_flag == 0 && NE_16( st->element_mode, IVAS_CPE_MDCT ) ) + { + /* inactive frames will be coded by GSC technology */ + /* except for the VBR mode. VBR mode uses NELP for that */ + test(); + test(); + test(); + IF( !( st->Opt_SC_VBR && vad_flag_dtx ) && ( st->idchan == 0 || NE_16( element_mode, IVAS_CPE_TD ) ) ) + { + st->coder_type = INACTIVE; + move16(); + st->hGSCEnc->noise_lev = NOISE_LEVEL_SP3; + move16(); + } + } + ELSE IF( GT_32( st->total_brate, MAX_GSC_INACTIVE_BRATE ) && ( ( st->vad_flag == 0 && GE_16( st->bwidth, SWB ) && GE_16( st->max_bwidth, SWB ) ) || ( st->localVAD == 0 && ( LE_16( st->bwidth, WB ) || LE_16( st->max_bwidth, WB ) ) ) ) ) + { + /* inactive frames will be coded by AVQ technology */ + st->coder_type = INACTIVE; + move16(); + } + + /*---------------------------------------------------------------------* + * Decision matrix (selection of technologies) + *---------------------------------------------------------------------*/ + + st->mdct_sw = MODE1; + st->mdct_sw_enable = MODE1; + move16(); + move16(); + test(); + test(); + test(); + IF( ( LE_32( st->total_brate, MIN_BRATE_GSC_NOISY_FLAG ) || LT_16( st->bwidth, SWB ) || st->flag_ACELP16k ) && st->GSC_IVAS_mode == 0 ) + { + st->GSC_noisy_speech = 0; + move16(); + } +#else /* enforce TRANSITION frames */ if ( !( st->element_mode == IVAS_CPE_TD && st->idchan == 1 ) && st->last_L_frame != st->L_frame && st->core_brate != FRAME_NO_DATA && st->core_brate != SID_2k40 && st->last_core_brate != FRAME_NO_DATA && st->last_core_brate != SID_2k40 && st->coder_type_raw != VOICED ) { @@ -227,74 +465,236 @@ ivas_error pre_proc_ivas( { st->GSC_noisy_speech = 0; } +#endif /* core selection */ #ifndef IVAS_FLOAT_FIXED ivas_decision_matrix_enc( st, element_brate, fft_buff, enerBuffer, last_element_mode ); #else + ivas_decision_matrix_enc_fx( st, element_brate, fft_buff_fx, enerBuffer_fx, e_enerBuffer, last_element_mode ); +#endif +#ifdef IVAS_FLOAT_FIXED + test(); + test(); + IF( EQ_16( st->L_frame, L_FRAME16k ) && ( EQ_16( st->coder_type, VOICED ) || EQ_16( st->coder_type, UNVOICED ) ) ) /* VOICED and UNVOICED are not supported in ACELP@16k */ + { + st->coder_type = GENERIC; + move16(); + } -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS - Word16 fft_buff_fx[2 * L_FFT]; - Word32 enerBuffer_fx[CLDFB_NO_CHANNELS_MAX]; + test(); + IF( EQ_16( st->core, TCX_20_CORE ) || EQ_16( st->core, HQ_CORE ) ) + { + st->Nb_ACELP_frames = 0; + move16(); + /* Configure TCX with the same bitrate as given when (re-)initializing TCX */ + total_brate_tmp = st->total_brate; + move32(); + st->total_brate = L_mult0( st->bits_frame_nominal, FRAMES_PER_SEC ); + move32(); - Word16 q_fft_buff; + SetModeIndex_ivas_fx( st, st->last_bits_frame_nominal * FRAMES_PER_SEC, last_element_mode, MCT_flag ); + + st->sr_core = getCoreSamplerateMode2( element_mode, st->total_brate, st->bwidth, st->flag_ACELP16k, st->rf_mode, st->is_ism_format ); + move16(); + st->total_brate = total_brate_tmp; + move32(); + + st->L_frame = extract_l( Mpy_32_32( st->sr_core, ONE_BY_FRAMES_PER_SEC_Q31 ) ); + st->encoderLookahead_enc = NS2SA_FX2( st->sr_core, ACELP_LOOK_NS ); + st->encoderPastSamples_enc = shr( ( imult1616( st->L_frame, 9 ) ), 4 ); + move16(); + move16(); - q_fft_buff = Q_factor_arrL( fft_buff, ( 2 * L_FFT ) ); + IF( EQ_32( st->sr_core, INT_FS_12k8 ) ) + { +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + st->preemph_fac_flt = PREEMPH_FAC_FLT; + st->gamma_flt = GAMMA1_FLT; +#endif + st->preemph_fac = PREEMPH_FAC; + st->gamma = GAMMA1; + move16(); + move16(); + } + ELSE IF( EQ_32( st->sr_core, INT_FS_16k ) ) + { +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + st->preemph_fac_flt = PREEMPH_FAC_16k_FLT; + st->gamma_flt = GAMMA16k_FLT; +#endif + st->preemph_fac = PREEMPH_FAC_16k; + st->gamma = GAMMA16k; + move16(); + move16(); + } + ELSE /* st->sr_core >=25600 */ + { +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + st->preemph_fac_flt = PREEMPH_FAC_SWB_FLT; + st->gamma_flt = GAMMA16k_FLT; +#endif + st->preemph_fac = PREEMPH_FAC_SWB; + st->gamma = GAMMA16k; + move16(); + move16(); + } - if ( q_fft_buff <= 16 ) + IF( st->vad_flag == 0 ) + { + st->coder_type = INACTIVE; + move16(); + } + ELSE IF( GT_16( st->coder_type, GENERIC ) ) + { + st->coder_type = GENERIC; + move16(); + } + IF( NE_16( st->element_mode, IVAS_CPE_MDCT ) ) + { + SetTCXModeInfo_ivas_fx( st, st->hTranDet, &st->hTcxCfg->tcx_curr_overlap_mode ); + } + } + ELSE IF( EQ_16( st->element_mode, IVAS_CPE_MDCT ) ) { - q_fft_buff = 0; +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + st->hTcxEnc->tfm_mem = 0.75f; +#endif + st->hTcxEnc->tfm_mem_fx = 1610612736; /*0.75f in Q31*/ + move32(); } - else + ELSE IF( NE_32( element_brate, last_element_brate ) ) { - q_fft_buff = q_fft_buff - 16; + IF( NE_32( st->core_brate, FRAME_NO_DATA ) ) + { + SetModeIndex_ivas_fx( st, L_mult0( st->bits_frame_nominal, FRAMES_PER_SEC ), element_mode, MCT_flag ); + } + + test(); + test(); + IF( NE_16( st->extl, -1 ) && NE_16( st->extl, IGF_BWE ) && EQ_16( st->igf, 1 ) ) + { + st->igf = 0; + move16(); + } } - if ( q_fft_buff >= 2 ) + + /*-----------------------------------------------------------------* + * Update of ACELP harmonicity counter (used in ACELP transform codebook @32kbps) + *-----------------------------------------------------------------*/ + test(); + test(); + test(); + Word16 flag = BASOP_Util_Cmp_Mant32Exp( cor_map_sum_fx, exp_cor_map_sum, 50, 31 ); + IF( EQ_16( loc_harm, 1 ) && flag && EQ_16( st->clas, VOICED_CLAS ) && EQ_16( st->coder_type, GENERIC ) ) { - q_fft_buff -= 3; + st->last_harm_flag_acelp = add( st->last_harm_flag_acelp, 1 ); + move16(); + + IF( GT_16( st->last_harm_flag_acelp, 10 ) ) + { + st->last_harm_flag_acelp = 10; + move16(); + } } - else + ELSE { - q_fft_buff -= 4; + st->last_harm_flag_acelp = 0; + move16(); } - floatToFixed_arr( fft_buff, fft_buff_fx, q_fft_buff, ( 2 * L_FFT ) ); - - Word16 e_enerBuffer; + /*-----------------------------------------------------------------* + * Update audio frames counter (used for UV decision) + *-----------------------------------------------------------------*/ - f2me_buf( enerBuffer, enerBuffer_fx, &e_enerBuffer, (Word32) CLDFB_NO_CHANNELS_MAX ); + IF( EQ_16( st->coder_type, AUDIO ) ) + { + st->audio_frame_cnt = add( st->audio_frame_cnt, AUDIO_COUNTER_STEP ); + move16(); + } + ELSE IF( NE_16( st->coder_type, INACTIVE ) ) + { + st->audio_frame_cnt = sub( st->audio_frame_cnt, 1 ); + move16(); + } - Word16 tmp_shift = find_guarded_bits_fx( 5 ); // Computing guraded bits necessary in the energyBuffer + IF( GT_16( st->audio_frame_cnt, AUDIO_COUNTER_MAX ) ) + { + st->audio_frame_cnt = AUDIO_COUNTER_MAX; + move16(); + } - scale_sig32( enerBuffer_fx, CLDFB_NO_CHANNELS_MAX, -tmp_shift ); // Computing the shift as per guarded bits in the energyBuffer + IF( st->audio_frame_cnt < 0 ) + { + st->audio_frame_cnt = 0; + move16(); + } - e_enerBuffer += tmp_shift; // Shifting the exponent of energyBuffer with the tmp_shift + /*-----------------------------------------------------------------* + * Set formant sharpening flag + *-----------------------------------------------------------------*/ - floatToFixed_arrL( st->Bin_E_old, st->Bin_E_old_fx, Q_factor_arrL( st->Bin_E_old, 129 ), 129 ); + st->sharpFlag = 0; + move16(); - if ( st->element_mode != IVAS_SCE && !st->low_rate_mode && !( st->total_brate > MAX_ACELP_BRATE ) && st->element_mode != IVAS_CPE_MDCT && st->coder_type != INACTIVE && st->sp_aud_decision1 != 0 && st->sp_aud_decision2 != 0 && st->sp_aud_decision1 != 1 && st->sp_aud_decision2 != 0 && !( st->element_mode == IVAS_CPE_TD || st->sp_aud_decision0 == 0 ) ) + test(); + test(); + IF( EQ_16( st->coder_type, GENERIC ) || EQ_16( st->coder_type, VOICED ) || EQ_16( st->coder_type, TRANSITION ) ) { - if ( st->hTcxEnc != NULL ) + test(); + IF( GE_32( element_brate, FRMT_SHP_MIN_BRATE_IVAS ) && GT_16( st->lp_noise_fx, FORMANT_SHARPENING_NOISE_THRESHOLD_FX ) ) + { + st->sharpFlag = 0; + move16(); + } + ELSE { - st->hTcxEnc->clas_sec_old_fx = float_to_fix16( st->hTcxEnc->clas_sec_old_flt, Q13 ); + st->sharpFlag = 1; + move16(); } } -#endif - ivas_decision_matrix_enc_fx( st, element_brate, fft_buff_fx, enerBuffer_fx, e_enerBuffer, last_element_mode ); + /* channel-aware mode - due to lack of signaling bit, sharpFlag is 1 always in RF mode */ + test(); + test(); + IF( st->rf_mode && ( EQ_16( st->coder_type, VOICED ) || EQ_16( st->coder_type, GENERIC ) ) ) + { + st->sharpFlag = 1; + move16(); + } -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS - if ( st->element_mode != IVAS_SCE && !st->low_rate_mode && !( st->total_brate > MAX_ACELP_BRATE ) && st->element_mode != IVAS_CPE_MDCT && st->coder_type != INACTIVE && st->sp_aud_decision1 != 0 && st->sp_aud_decision2 != 0 && st->sp_aud_decision1 != 1 && st->sp_aud_decision2 != 0 && !( st->element_mode == IVAS_CPE_TD || st->sp_aud_decision0 == 0 ) ) + /* TD stereo, secondary channel - due to lack of signaling bits, sharpFlag is always 1 */ + test(); + IF( EQ_16( element_mode, IVAS_CPE_TD ) && EQ_16( st->idchan, 1 ) ) { - if ( st->hTcxEnc != NULL ) + st->sharpFlag = 0; + move16(); + test(); + IF( EQ_16( st->coder_type, GENERIC ) || EQ_16( st->coder_type, VOICED ) ) { - st->hTcxEnc->clas_sec_old_flt = fix16_to_float( st->hTcxEnc->clas_sec_old_fx, Q13 ); + st->sharpFlag = 1; + move16(); } } -#endif -#endif + /*-----------------------------------------------------------------* + * Set voicing flag for HQ FEC + *-----------------------------------------------------------------*/ + + test(); + test(); + IF( st->sp_aud_decision1 == 0 && ( EQ_16( st->coder_type, VOICED ) || EQ_16( st->coder_type, GENERIC ) ) ) + { + *Voicing_flag = 1; + move16(); + } + ELSE + { + *Voicing_flag = 0; + move16(); + } +#else if ( st->L_frame == L_FRAME16k && ( st->coder_type == VOICED || st->coder_type == UNVOICED ) ) /* VOICED and UNVOICED are not supported in ACELP@16k */ { st->coder_type = GENERIC; @@ -552,6 +952,7 @@ ivas_error pre_proc_ivas( { *Voicing_flag = 0; } +#endif /*-----------------------------------------------------------------* * Compute core-coder buffers at internal sampling rate @@ -621,6 +1022,31 @@ ivas_error pre_proc_ivas( } /* Update VAD hangover frame counter in active frames */ +#ifdef IVAS_FLOAT_FIXED + test(); + test(); + IF( !( EQ_32( st->core_brate, SID_2k40 ) || EQ_32( st->core_brate, FRAME_NO_DATA ) ) && st->tcxonly == 0 ) + { + test(); + test(); + test(); + IF( st->hTdCngEnc != NULL && st->Opt_DTX_ON && vad_hover_flag ) + { + st->hTdCngEnc->burst_ho_cnt = add( st->hTdCngEnc->burst_ho_cnt, 1 ); + move16(); + IF( GT_16( st->hTdCngEnc->burst_ho_cnt, HO_HIST_SIZE ) ) + { + st->hTdCngEnc->burst_ho_cnt = HO_HIST_SIZE; + move16(); + } + } + ELSE IF( st->hTdCngEnc != NULL && vad_flag_dtx ) + { + st->hTdCngEnc->burst_ho_cnt = 0; + move16(); + } + } +#else if ( !( st->core_brate == SID_2k40 || st->core_brate == FRAME_NO_DATA ) && st->tcxonly == 0 ) { if ( st->hTdCngEnc != NULL && st->Opt_DTX_ON && vad_hover_flag ) @@ -636,7 +1062,7 @@ ivas_error pre_proc_ivas( st->hTdCngEnc->burst_ho_cnt = 0; } } - +#endif pop_wmops(); return error; diff --git a/lib_enc/ivas_core_pre_proc_front.c b/lib_enc/ivas_core_pre_proc_front.c index 2f827ce81f0a34761530d94c09dc6b3bf37c136b..117bffd8e932002a7cbfbba8b4a00ccf09642fd6 100644 --- a/lib_enc/ivas_core_pre_proc_front.c +++ b/lib_enc/ivas_core_pre_proc_front.c @@ -2138,8 +2138,43 @@ ivas_error pre_proc_front_ivas_fx( old_pitch1 = st->pitch[1]; - pitch_ol( st->pitch, st->voicing, &st->old_pitch, &st->old_corr, corr_shift, &st->old_thres, &st->delta_pit, st->old_wsp2, wsp, st->mem_decim2, *relE, L_look, st->clas, st->input_bwidth, st->Opt_SC_VBR ); +#ifdef IVAS_FLOAT_FIXED + Word16 exp_wsp = 0, Q_wsp = 0; + move16(); +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + floatToFixed_arr( st->old_wsp, st->old_wsp_fx, Q_factor_arr( st->old_wsp, L_WSP_MEM ), L_WSP_MEM ); + Copy( st->old_wsp_fx, old_wsp_fx, L_WSP_MEM ); + wsp_fx = old_wsp_fx + L_WSP_MEM; + st->old_thres_fx = (Word16) floatToFixed( st->old_thres, Q15 ); + st->old_corr_fx = (Word16) floatToFixed( st->old_corr, Q15 ); + floatToFixed_arr( st->voicing, st->voicing_fx, Q15, 3 ); + corr_shift_fx = (Word16) floatToFixed( corr_shift, Q15 ); + f2me_buf_16( wsp, wsp_fx, &exp_wsp, L_WSP - L_WSP_MEM ); + Q_wsp = sub( 15, exp_wsp ); + + floatToFixed_arr( wsp, wsp_fx, Q_wsp, L_WSP - L_WSP_MEM ); + floatToFixed_arr( st->old_wsp2, st->old_wsp2_fx, Q_wsp, 115 ); + floatToFixed_arr( st->mem_decim2, st->mem_decim2_fx, Q_wsp, 3 ); + relE_fx = (Word16) floatToFixed( *relE, Q8 ); +#endif + + pitch_ol_ivas_fx( st->pitch, st->voicing_fx, &st->old_pitch, &st->old_corr_fx, corr_shift_fx, &st->old_thres_fx, + &st->delta_pit, st->old_wsp2_fx, wsp_fx, st->mem_decim2_fx, relE_fx, st->clas, st->input_bwidth, st->Opt_SC_VBR ); + +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + st->old_thres = fixedToFloat( st->old_thres_fx, Q15 ); + st->old_corr = fixedToFloat( st->old_corr_fx, Q15 ); + fixedToFloat_arr( st->voicing_fx, st->voicing, Q15, 3 ); + corr_shift = fixedToFloat( corr_shift_fx, Q15 ); + fixedToFloat_arr( st->old_wsp2_fx, st->old_wsp2, Q_wsp, 115 ); + fixedToFloat_arr( st->mem_decim2_fx, st->mem_decim2, Q_wsp, 3 ); + *relE = fixedToFloat( relE_fx, Q8 ); +#endif + +#else + pitch_ol( st->pitch, st->voicing, &st->old_pitch, &st->old_corr, corr_shift, &st->old_thres, &st->delta_pit, st->old_wsp2, wsp, st->mem_decim2, *relE, L_look, st->clas, st->input_bwidth, st->Opt_SC_VBR ); +#endif /* Updates for adaptive lag window memory */ st->old_pitch_la = st->pitch[2]; @@ -2596,7 +2631,27 @@ ivas_error pre_proc_front_ivas_fx( * TC frame selection *-----------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + Word16 oi12k8_fx[L_INP_12k8]; + floatToFixed_arr( old_inp_12k8, oi12k8_fx, 0, L_INP_12k8 ); // Q_fac doesn't matter as it is only being used for sign + Word16 *ni12k8_fx = oi12k8_fx + L_INP_MEM; + inp_12k8_fx = ni12k8_fx - L_look; + floatToFixed_arrL( ee, ee_fx, Q6, 2 ); + relE_fx = float_to_fix16( *relE, Q8 ); + floatToFixed_arr( st->voicing, st->voicing_fx, Q15, 3 ); + st->prev_fmerit = float_to_fix16( st->prev_fmerit_flt, Q15 ); +#endif + st->clas = signal_clas_fx( st, inp_12k8_fx, ee_fx, relE_fx, L_look, tdm_SM_last_clas ); + +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + fixedToFloat_arr( st->voicing_fx, st->voicing, Q15, 3 ); + st->prev_fmerit_flt = fix16_to_float( st->prev_fmerit, Q15 ); + st->fmerit_dt_flt = fix16_to_float( st->fmerit_dt, Q15 ); +#endif +#else st->clas = signal_clas( st, inp_12k8, ee, *relE, L_look, tdm_SM_last_clas ); +#endif #ifndef IVAS_FLOAT_FIXED select_TC( MODE1, st->tc_cnt, &st->coder_type, st->localVAD ); #else @@ -2610,12 +2665,15 @@ ivas_error pre_proc_front_ivas_fx( /*-----------------------------------------------------------------* * Collect stereo classifier features *-----------------------------------------------------------------*/ - +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + Word16 cor_map_sum_e; + f2me( *cor_map_sum, &cor_map_sum_fx, &cor_map_sum_e ); +#endif if ( hStereoClassif != NULL ) { #if 1 #ifdef IVAS_FLOAT_FIXED_CONVERSIONS - Word16 cor_map_sum_e, non_staX_e, sp_div_e, epsP_e, max_e_Etot; + Word16 non_staX_e, sp_div_e, epsP_e, max_e_Etot; hStereoClassif->relE_buf_e = 0; f2me( hStereoClassif->ave_ener_L, &hStereoClassif->ave_ener_L_fx, &hStereoClassif->ave_ener_L_fx_e ); f2me( hStereoClassif->ave_ener_R, &hStereoClassif->ave_ener_R_fx, &hStereoClassif->ave_ener_R_fx_e ); @@ -2632,7 +2690,6 @@ ivas_error pre_proc_front_ivas_fx( floatToFixed_arr32( hStereoClassif->xtalk_fv, hStereoClassif->xtalk_fv_fx, Q15, 58 ); f2me_buf( hStereoClassif->voicing_ch1, hStereoClassif->voicing_ch1_fx, &hStereoClassif->voicing_ch1_e, 3 ); f2me_buf_16( st->voicing, st->voicing_fx, &st->voicing_e, 3 ); - f2me( *cor_map_sum, &cor_map_sum_fx, &cor_map_sum_e ); f2me( non_staX, &non_staX_fx, &non_staX_e ); f2me( hStereoClassif->nchar_ch1, &hStereoClassif->nchar_ch1_fx, &hStereoClassif->nchar_ch1_e ); f2me_buf( epsP, epsP_fx, &epsP_e, 17 ); @@ -2689,7 +2746,69 @@ ivas_error pre_proc_front_ivas_fx( * 1st stage speech/music classification (GMM model) *----------------------------------------------------------------*/ +#if 0 smc_dec = ivas_smc_gmm( st, hStereoClassif, localVAD_HE_SAD, Etot, lsp_new, *cor_map_sum, epsP, PS, non_staX, *relE, &high_lpn_flag, flag_spitch ); +#else + SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas; +#if 1 + Word16 lsp_new_fx[M]; + Word32 PS_fx[128]; + Word16 Q_esp; + Word16 non_sta_fx = float_to_fix16( non_staX, Q6 ); + Word16 Etot_fx_0 = float_to_fix16( Etot, Q8 ); + floatToFixed_arr( lsp_new, lsp_new_fx, Q15, M ); + hSpMusClas->wdlp_0_95_sp_fx = float_to_fix16( hSpMusClas->wdlp_0_95_sp, Q8 ); + hSpMusClas->wdlp_xtalk_fx = floatToFixed( hSpMusClas->wdlp_xtalk, Q19 ); + hSpMusClas->wrise_fx = float_to_fix16( hSpMusClas->wrise, 9 ); + relE_fx = float_to_fix16( *relE, 8 ); + floatToFixed_arr16( st->voicing, st->voicing_fx, 15, 3 ); + hSpMusClas->prev_relE_fx = float_to_fix16( hSpMusClas->prev_relE, 8 ); + hSpMusClas->relE_attack_sum_fx = float_to_fix16( hSpMusClas->relE_attack_sum, 8 ); + Word16 Qfact_PS = Q_factor_arrL( PS, 128 ); + floatToFixed_arr32( PS, PS_fx, Qfact_PS, 128 ); + Word16 e_esp; + f2me_buf( epsP, epsP_fx, &e_esp, M + 1 ); + Q_esp = sub( 31, e_esp ); + Word16 Qfact_PS_past = Q_factor_arrL( hSpMusClas->past_PS, 67 ); + floatToFixed_arr32( hSpMusClas->past_PS, hSpMusClas->past_PS_fx, Qfact_PS_past, 67 ); + hSpMusClas->dlp_var_LT_fx = float_to_fix( hSpMusClas->dlp_var_LT, Q19 ); + hSpMusClas->dlp_mean_LT_fx = float_to_fix( hSpMusClas->dlp_mean_LT, Q19 ); + hSpMusClas->dlp_mean_ST_fx = float_to_fix( hSpMusClas->dlp_mean_ST, Q19 ); + floatToFixed_arr32( hSpMusClas->past_dlp_mean_ST, hSpMusClas->past_dlp_mean_ST_fx, Q19, 7 ); + floatToFixed_arr32( hSpMusClas->prev_FV, hSpMusClas->prev_FV_fx, Q20, 15 ); + floatToFixed_arr32( hSpMusClas->FV_st, hSpMusClas->FV_st_fx, Q20, 15 ); +#endif + smc_dec = ivas_smc_gmm_fx( st, hStereoClassif, localVAD_HE_SAD, Etot_fx_0, lsp_new_fx, extract_l( L_shr( cor_map_sum_fx, sub( 23, cor_map_sum_e ) ) ) /*q8*/, epsP_fx, PS_fx, non_sta_fx, relE_fx, &high_lpn_flag, flag_spitch, Qfact_PS, Q_esp, Qfact_PS_past ); +#if 1 + fixedToFloat_arr( hSpMusClas->past_dlp_fx, hSpMusClas->past_dlp, Q9, HANG_LEN - 1 ); + hSpMusClas->lpm = fixedToFloat( hSpMusClas->lpm_fx, Q7 ); // Q7 + hSpMusClas->lps = fixedToFloat( hSpMusClas->lps_fx, Q7 ); // Q7 + hSpMusClas->lpn = fixedToFloat( hSpMusClas->lpn_fx, Q7 ); // Q7 + hSpMusClas->wdrop = fixedToFloat( hSpMusClas->wdrop_fx, Q9 ); // Q8 + hSpMusClas->wrise = fixedToFloat( hSpMusClas->wrise_fx, Q9 ); // Q8 + hSpMusClas->lt_dec_thres = fixedToFloat( hSpMusClas->lt_dec_thres_fx, Q9 ); // Q8 + hSpMusClas->wdlp_0_95_sp = fixedToFloat( hSpMusClas->wdlp_0_95_sp_fx, Q8 ); + hSpMusClas->dlp_mean_LT = fixedToFloat_32( hSpMusClas->dlp_mean_LT_fx, Q19 ); + hSpMusClas->wdlp_xtalk = fixedToFloat( hSpMusClas->wdlp_xtalk_fx, Q19 ); + hSpMusClas->dlp_var_LT = fixedToFloat_32( hSpMusClas->dlp_var_LT_fx, Q19 ); + hSpMusClas->prev_relE = fixedToFloat( hSpMusClas->prev_relE_fx, Q8 ); + hSpMusClas->prev_Etot = fixedToFloat( hSpMusClas->prev_Etot_fx, Q8 ); + fixedToFloat_arrL32( hSpMusClas->past_PS_fx, hSpMusClas->past_PS, Qfact_PS_past, 67 ); + hSpMusClas->relE_attack_sum = fixedToFloat( hSpMusClas->relE_attack_sum_fx, Q8 ); + fixedToFloat_arrL32( hSpMusClas->FV_st_fx, hSpMusClas->FV_st, Q20, 15 ); + fixedToFloat_arrL32( hSpMusClas->prev_FV_fx, hSpMusClas->prev_FV, Q20, 15 ); + fixedToFloat_arrL32( hSpMusClas->past_dlp_mean_ST_fx, hSpMusClas->past_dlp_mean_ST, Q19, 7 ); + hSpMusClas->dlp_mean_ST = fixedToFloat( hSpMusClas->dlp_mean_ST_fx, Q19 ); + + if ( hStereoClassif != NULL ) + { + hStereoClassif->ps_diff_ch1 = me2f( hStereoClassif->ps_diff_ch1_fx, hStereoClassif->ps_diff_ch1_e ); // Qfact_PS_past - 7 + hStereoClassif->ps_sta_ch1 = me2f( hStereoClassif->ps_sta_ch1_fx, hStereoClassif->ps_sta_ch1_e ); // logf( ps_sta + 1e-5f );Q25 + hStereoClassif->ps_diff_ch2 = me2f( hStereoClassif->ps_diff_ch2_fx, hStereoClassif->ps_diff_ch2_e ); + hStereoClassif->ps_sta_ch2 = me2f( hStereoClassif->ps_sta_ch2_fx, hStereoClassif->ps_sta_ch2_e ); + } +#endif +#endif /*----------------------------------------------------------------* diff --git a/lib_enc/ivas_cpe_enc.c b/lib_enc/ivas_cpe_enc.c index e35be41a86d1df7d3508acbdfc92d000e5b7c475..668b9f71c094dcc62a2571bb9519a2b2d44d7b62 100644 --- a/lib_enc/ivas_cpe_enc.c +++ b/lib_enc/ivas_cpe_enc.c @@ -54,9 +54,11 @@ /*--------------------------------------------------------------------------* * Local function prototypes *--------------------------------------------------------------------------*/ - +#ifdef IVAS_FLOAT_FIXED +static void stereo_mode_combined_format_enc_fx( const Encoder_Struct *st_ivas, CPE_ENC_HANDLE hCPE ); +#else static void stereo_mode_combined_format_enc( const Encoder_Struct *st_ivas, CPE_ENC_HANDLE hCPE ); - +#endif /*-------------------------------------------------------------------* * ivas_cpe_enc() @@ -115,14 +117,16 @@ ivas_error ivas_cpe_enc( float cor_map_sum[CPE_CHANNELS]; /* speech/music clasif. parameter */ int16_t vad_flag_dtx[CPE_CHANNELS]; /* HE-SAD flag with additional DTX HO */ float enerBuffer[CPE_CHANNELS][CLDFB_NO_CHANNELS_MAX]; /* energy buffer */ - float currFlatness[CPE_CHANNELS]; /* flatness parameter */ + float currFlatness[CPE_CHANNELS] = { 0 }; /* flatness parameter */ #ifdef IVAS_FLOAT_FIXED Word16 currFlatness_fx[CPE_CHANNELS]; /* flatness parameter Q7 */ #endif - float fft_buff[CPE_CHANNELS][2 * L_FFT]; /* FFT buffer */ - int16_t tdm_ratio_idx, tdm_ratio_idx_SM; /* temp. TD stereo parameters */ - int16_t tdm_SM_or_LRTD_Pri; /* temp. TD stereo parameters */ - float tdm_last_ratio; /* temp. TD stereo parameters */ + float fft_buff[CPE_CHANNELS][2 * L_FFT]; /* FFT buffer */ + int16_t tdm_ratio_idx, tdm_ratio_idx_SM; /* temp. TD stereo parameters */ + int16_t tdm_SM_or_LRTD_Pri; /* temp. TD stereo parameters */ +#ifndef IVAS_FLOAT_FIXED + float tdm_last_ratio; /* temp. TD stereo parameters */ +#endif int16_t nb_bits; /* number of DFT stereo side bits */ float fr_bands[CPE_CHANNELS][2 * NB_BANDS]; /* energy in frequency bands */ float Etot_LR[CPE_CHANNELS]; /* total energy */ @@ -161,7 +165,7 @@ ivas_error ivas_cpe_enc( tdm_SM_or_LRTD_Pri = 0; tdm_ratio_idx = -1; tdm_ratio_idx_SM = -1; - tdm_last_ratio = 0; + // tdm_last_ratio = 0; set16_fx( pitch_fr_fx[0], 0, NB_SUBFR ); set16_fx( pitch_fr_fx[1], 0, NB_SUBFR ); @@ -205,12 +209,61 @@ ivas_error ivas_cpe_enc( if ( sts[0]->ini_frame > 0 && st_ivas->hMCT == NULL ) { +#ifdef IVAS_FLOAT_FIXED +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + hCPE->hStereoClassif->is_speech_fx = floatToFixed_32( hCPE->hStereoClassif->is_speech, Q9 ); + hCPE->hStereoClassif->xtalk_wscore_fx = floatToFixed_32( hCPE->hStereoClassif->xtalk_wscore, Q31 ); + hCPE->hCoreCoder[0]->hSpMusClas->past_dlp_fx[0] = float_to_fix16( hCPE->hCoreCoder[0]->hSpMusClas->past_dlp[0], Q9 ); + hCPE->hCoreCoder[0]->hSpMusClas->wdlp_xtalk_fx = floatToFixed( hCPE->hCoreCoder[0]->hSpMusClas->wdlp_xtalk, Q19 ); +#endif + hCPE->element_mode = select_stereo_mode( hCPE, ivas_format ); - } +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + fixedToFloat_arrL( hCPE->hStereoClassif->unclr_fv_fx, hCPE->hStereoClassif->unclr_fv, Q15, SSC_MAX_NFEA ); + fixedToFloat_arrL( hCPE->hStereoClassif->xtalk_fv_fx, hCPE->hStereoClassif->xtalk_fv, Q15, SSC_MAX_NFEA ); + hCPE->hStereoClassif->is_speech = fixedToFloat_32( hCPE->hStereoClassif->is_speech_fx, Q9 ); +#endif +#else + hCPE->element_mode = select_stereo_mode( hCPE, ivas_format ); +#endif + } +#ifdef IVAS_FLOAT_FIXED + stereo_mode_combined_format_enc_fx( st_ivas, hCPE ); +#else stereo_mode_combined_format_enc( st_ivas, hCPE ); +#endif #ifdef IVAS_FLOAT_FIXED Word16 Q_inp = Q15; + move16(); + Word16 Q_buffer[2]; + Word32 band_energies_LR_fx[2 * NB_BANDS]; + Word16 Etot_LR_fx[CPE_CHANNELS]; + Word32 lf_E_fx[CPE_CHANNELS][2 * VOIC_BINS]; + Word32 fr_bands_fx[CPE_CHANNELS][2 * NB_BANDS]; + Word16 band_ener_guardbits = find_guarded_bits_fx( 2 * NB_BANDS ); + Word16 Q_add = 2; + move16(); + Word16 front_create_flag = 0; + move16(); +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + for ( n = 0; n < CPE_CHANNELS; n++ ) + { + sts[n]->lp_speech_fx = (Word16) floatToFixed( sts[n]->lp_speech, Q8 ); + sts[n]->lp_noise_fx = (Word16) floatToFixed( sts[n]->lp_noise, Q8 ); + sts[n]->flag_noisy_speech_snr_fx = (Word8) sts[n]->flag_noisy_speech_snr; + Q_buffer[n] = 15; + move16(); + } + if ( sts[0]->hFdCngEnc != NULL ) + { + sts[0]->last_totalNoise_fx = (Word16) float_to_fix16( sts[0]->last_totalNoise, Q8 ); + sts[0]->hNoiseEst->totalNoise_fx = (Word16) float_to_fix16( sts[0]->hNoiseEst->totalNoise, Q8 ); + for ( int i = 0; i < TOTALNOISE_HIST_SIZE - 1; i++ ) + { + sts[0]->totalNoise_increase_hist_fx[n] = (Word16) float_to_fix16( sts[0]->totalNoise_increase_hist[n], Q8 ); + } + } if ( hCPE->hFrontVad[0] != NULL && hCPE->element_mode != IVAS_CPE_MDCT ) { @@ -226,12 +279,134 @@ ivas_error ivas_cpe_enc( // Q_inp = L_min( Q_inp, Q_factor_arr( sts[n]->input, L_FRAME48k ) ); floatToFixed_arr( sts[n]->input, sts[n]->input_fx, Q_inp, L_FRAME48k ); } + Word16 Qband = -1; + move16(); + Word16 Q_new_old = add( sub( Q_inp, Qband ), Q_add ); + + for ( n = 0; n < CPE_CHANNELS; n++ ) + { + + floatToFixed_arr( hCPE->hFrontVad[n]->mem_decim, hCPE->hFrontVad[n]->mem_decim_fx, Q_inp, 90 ); + hCPE->hFrontVad[n]->mem_preemph_fx = (Word16) floatToFixed( hCPE->hFrontVad[n]->mem_preemph, Q_inp - 1 ); + Q_buffer[n] = Q_factor_arr( hCPE->hFrontVad[n]->buffer_12k8 + L_FFT, L_FFT / 2 ); + floatToFixed_arr( hCPE->hFrontVad[n]->buffer_12k8, hCPE->hFrontVad[n]->buffer_12k8_fx, Q_buffer[n], 384 ); + hCPE->hFrontVad[n]->hNoiseEst->Etot_h_32fx = (Word32) ( hCPE->hFrontVad[n]->hNoiseEst->Etot_h * 16777216.0 ); + hCPE->hFrontVad[n]->hNoiseEst->Etot_l_32fx = (Word32) ( hCPE->hFrontVad[n]->hNoiseEst->Etot_l * 16777216.0 ); + hCPE->hFrontVad[n]->hNoiseEst->Etot_l_lp_32fx = (Word32) ( hCPE->hFrontVad[n]->hNoiseEst->Etot_l_lp * 16777216.0 ); + hCPE->hFrontVad[n]->hNoiseEst->Etot_last_32fx = (Word32) ( hCPE->hFrontVad[n]->hNoiseEst->Etot_last * 16777216.0 ); + hCPE->hFrontVad[n]->hNoiseEst->Etot_v_h2_32fx = (Word32) ( hCPE->hFrontVad[n]->hNoiseEst->Etot_v_h2 * 16777216.0 ); + hCPE->hFrontVad[n]->hNoiseEst->Etot_lp_32fx = (Word32) ( hCPE->hFrontVad[n]->hNoiseEst->Etot_lp * 16777216.0 ); + hCPE->hFrontVad[n]->hNoiseEst->sign_dyn_lp_32fx = (Word32) ( hCPE->hFrontVad[n]->hNoiseEst->sign_dyn_lp * 16777216.0 ); + hCPE->hFrontVad[n]->hNoiseEst->totalNoise_fx = (Word16) ( hCPE->hFrontVad[n]->hNoiseEst->totalNoise * ( 256.0 ) ); + + hCPE->hFrontVad[n]->lp_noise_fx = (Word16) floatToFixed( hCPE->hFrontVad[n]->lp_noise, Q8 ); + hCPE->hFrontVad[n]->lp_speech_fx = (Word16) floatToFixed( hCPE->hFrontVad[n]->lp_speech, Q8 ); + + floatToFixed_arrL( hCPE->hFrontVad[n]->hNoiseEst->bckr, hCPE->hFrontVad[n]->hNoiseEst->bckr_fx, Q_new_old + QSCALE, 20 ); + floatToFixed_arrL( hCPE->hFrontVad[n]->hNoiseEst->enrO, hCPE->hFrontVad[n]->hNoiseEst->enrO_fx, Q_new_old + QSCALE, 20 ); + + hCPE->hFrontVad[n]->hVAD->bcg_flux_fx = (Word16) hCPE->hFrontVad[n]->hVAD->bcg_flux * ( 1 << 4 ); + + + hCPE->hFrontVad[n]->hVAD->snr_sum_vad_fx = (Word16) ( hCPE->hFrontVad[n]->hVAD->snr_sum_vad * 32767 ); + hCPE->hFrontVad[n]->hVAD->prim_act_quick_fx = (Word16) ( hCPE->hFrontVad[n]->hVAD->prim_act_quick * 32767 ); + hCPE->hFrontVad[n]->hVAD->prim_act_slow_fx = (Word16) ( hCPE->hFrontVad[n]->hVAD->prim_act_slow * 32767 ); + hCPE->hFrontVad[n]->hVAD->prim_act_fx = (Word16) ( hCPE->hFrontVad[n]->hVAD->prim_act * 32767 ); + hCPE->hFrontVad[n]->hVAD->prim_act_quick_he_fx = (Word16) ( hCPE->hFrontVad[n]->hVAD->prim_act_quick_he * 32767 ); + hCPE->hFrontVad[n]->hVAD->prim_act_slow_he_fx = (Word16) ( hCPE->hFrontVad[n]->hVAD->prim_act_slow_he * 32767 ); + hCPE->hFrontVad[n]->hVAD->prim_act_he_fx = (Word16) ( hCPE->hFrontVad[n]->hVAD->prim_act_he * 32767 ); + } + floatToFixed_arrL( &band_energies_LR[0], &band_energies_LR_fx[0], Q_new_old + QSCALE + 2, 40 ); } - Word16 Q_add = 2; - if ( ( error = front_vad_fx( hCPE, NULL, hEncoderConfig, &hCPE->hFrontVad[0], st_ivas->hMCT != NULL, input_frame, vad_flag_dtx, fr_bands, Etot_LR, lf_E, localVAD_HE_SAD, vad_hover_flag, band_energies_LR, NULL, NULL, Q_inp, Q_add ) ) != IVAS_ERR_OK ) +#endif + + if ( ( error = front_vad_fx( hCPE, NULL, hEncoderConfig, &hCPE->hFrontVad[0], st_ivas->hMCT != NULL, input_frame, vad_flag_dtx, fr_bands_fx, Etot_LR_fx, lf_E_fx, localVAD_HE_SAD, vad_hover_flag, band_energies_LR_fx, NULL, NULL, Q_inp, Q_buffer, Q_add, &front_create_flag ) ) != IVAS_ERR_OK ) { return error; } +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + if ( sts[0]->hFdCngEnc != NULL ) + { + sts[0]->last_totalNoise = fix16_to_float( sts[0]->last_totalNoise_fx, Q8 ); + sts[0]->hNoiseEst->totalNoise = fix16_to_float( sts[0]->hNoiseEst->totalNoise_fx, Q8 ); + for ( int i = 0; i < TOTALNOISE_HIST_SIZE - 1; i++ ) + { + sts[0]->totalNoise_increase_hist[n] = fix16_to_float( sts[0]->totalNoise_increase_hist_fx[n], Q8 ); + } + sts[0]->hFdCngEnc->hFdCngCom->init_old_flt = ( sts[0]->hFdCngEnc->hFdCngCom->init_old == 32767 ) ? FLT_MAX : sts[0]->hFdCngEnc->hFdCngCom->init_old_flt; + } + if ( hCPE->hFrontVad[0] != NULL && hCPE->element_mode != IVAS_CPE_MDCT ) + { + Word16 Qband = -1; + move16(); + Word16 Q_new_old = add( sub( Q_inp, Qband ), Q_add ); + for ( n = 0; n < CPE_CHANNELS; n++ ) + { + fixedToFloat_arr( hCPE->hFrontVad[n]->mem_decim_fx, hCPE->hFrontVad[n]->mem_decim, Q_inp, 90 ); + hCPE->hFrontVad[n]->mem_preemph = fixedToFloat( hCPE->hFrontVad[n]->mem_preemph_fx, Q_inp + Qband ); + fixedToFloat_arr( hCPE->hFrontVad[n]->buffer_12k8_fx, hCPE->hFrontVad[n]->buffer_12k8, Q_buffer[n], 384 ); + fixedToFloat_arrL( fr_bands_fx[n], fr_bands[n], Q_buffer[n] + QSCALE + 2, 40 ); + fixedToFloat_arrL( lf_E_fx[n], lf_E[n], Q_buffer[n] + QSCALE, 148 ); + + Etot_LR[n] = fixedToFloat( Etot_LR_fx[n], Q8 ); + fixedToFloat_arrL( hCPE->hFrontVad[n]->hNoiseEst->bckr_fx, hCPE->hFrontVad[n]->hNoiseEst->bckr, Q_new_old + QSCALE, 20 ); + fixedToFloat_arrL( hCPE->hFrontVad[n]->hNoiseEst->enrO_fx, hCPE->hFrontVad[n]->hNoiseEst->enrO, Q_new_old + QSCALE, 20 ); + hCPE->hFrontVad[n]->hNoiseEst->Etot_h = (float) ( hCPE->hFrontVad[n]->hNoiseEst->Etot_h_32fx / 16777216.0 ); + hCPE->hFrontVad[n]->hNoiseEst->Etot_l = (float) ( hCPE->hFrontVad[n]->hNoiseEst->Etot_l_32fx / 16777216.0 ); + hCPE->hFrontVad[n]->hNoiseEst->Etot_l_lp = (float) ( hCPE->hFrontVad[n]->hNoiseEst->Etot_l_lp_32fx / 16777216.0 ); + hCPE->hFrontVad[n]->hNoiseEst->Etot_last = (float) ( hCPE->hFrontVad[n]->hNoiseEst->Etot_last_32fx / 16777216.0 ); + // hCPE->hFrontVad[n]->hNoiseEst->Etot_v_h2 = (float) ( hCPE->hFrontVad[n]->hNoiseEst->Etot_v_h2_32fx / 16777216.0 ); + hCPE->hFrontVad[n]->hNoiseEst->Etot_lp = (float) ( hCPE->hFrontVad[n]->hNoiseEst->Etot_lp_32fx / 16777216.0 ); + // hCPE->hFrontVad[n]->hNoiseEst->sign_dyn_lp = (float) ( hCPE->hFrontVad[n]->hNoiseEst->sign_dyn_lp_32fx / 16777216.0 ); + hCPE->hFrontVad[n]->lp_noise = fixedToFloat( hCPE->hFrontVad[n]->lp_noise_fx, Q8 ); + hCPE->hFrontVad[n]->lp_speech = fixedToFloat( hCPE->hFrontVad[n]->lp_speech_fx, Q8 ); + hCPE->hFrontVad[n]->hNoiseEst->Etot_v_h2 = (float) ( hCPE->hFrontVad[n]->hNoiseEst->Etot_v_h2_fx / ( 256.0 ) ); + hCPE->hFrontVad[n]->hNoiseEst->totalNoise = (float) ( (float) hCPE->hFrontVad[n]->hNoiseEst->totalNoise_fx / ( 256.0 ) ); + hCPE->hFrontVad[n]->hNoiseEst->sign_dyn_lp = (float) ( hCPE->hFrontVad[n]->hNoiseEst->sign_dyn_lp_fx / ( 256.0 ) ); + hCPE->hFrontVad[n]->hVAD->bcg_flux = (float) ( hCPE->hFrontVad[n]->hVAD->bcg_flux_fx / ( 16.0 ) ); + hCPE->hFrontVad[n]->hVAD->snr_sum_vad = (float) ( hCPE->hFrontVad[n]->hVAD->snr_sum_vad_fx / 32767.0 ); + hCPE->hFrontVad[n]->hVAD->prim_act_quick = (float) ( hCPE->hFrontVad[n]->hVAD->prim_act_quick_fx / 32767.0 ); + hCPE->hFrontVad[n]->hVAD->prim_act_slow = (float) ( hCPE->hFrontVad[n]->hVAD->prim_act_slow_fx / 32767.0 ); + hCPE->hFrontVad[n]->hVAD->prim_act = (float) ( hCPE->hFrontVad[n]->hVAD->prim_act_fx / 32767.0 ); + hCPE->hFrontVad[n]->hVAD->prim_act_quick_he = (float) ( hCPE->hFrontVad[n]->hVAD->prim_act_quick_he_fx / 32767.0 ); + hCPE->hFrontVad[n]->hVAD->prim_act_slow_he = (float) ( hCPE->hFrontVad[n]->hVAD->prim_act_slow_he_fx / 32767.0 ); + hCPE->hFrontVad[n]->hVAD->prim_act_he = (float) ( hCPE->hFrontVad[n]->hVAD->prim_act_he_fx / 32767.0 ); + + hCPE->hFrontVad[n]->hVAD->running_avg = fix16_to_float( hCPE->hFrontVad[n]->hVAD->running_avg_fx, Q15 ); + hCPE->hFrontVad[n]->hVAD->ra_deltasum = fix16_to_float( hCPE->hFrontVad[n]->hVAD->ra_deltasum_fx, Q15 ); + fixedToFloat_arr( hCPE->hFrontVad[n]->hNoiseEst->old_S_fx, hCPE->hFrontVad[n]->hNoiseEst->old_S, Q7, 128 ); + fixedToFloat_arr( hCPE->hFrontVad[n]->hNoiseEst->cor_map_fx, hCPE->hFrontVad[n]->hNoiseEst->cor_map, Q15, 128 ); + hCPE->hFrontVad[n]->hNoiseEst->Etot_st_est = fix16_to_float( hCPE->hFrontVad[n]->hNoiseEst->Etot_st_est_fx, Q8 ); + hCPE->hFrontVad[n]->hNoiseEst->Etot_sq_st_est = fix16_to_float( hCPE->hFrontVad[n]->hNoiseEst->Etot_sq_st_est_fx, Q2 ); + hCPE->hFrontVad[n]->hNoiseEst->multi_harm_limit = fix16_to_float( hCPE->hFrontVad[n]->hNoiseEst->multi_harm_limit_fx, Q9 ); + hCPE->hFrontVad[n]->hNoiseEst->noise_char = fix16_to_float( hCPE->hFrontVad[n]->hNoiseEst->noise_char_fx, Q11 ); + + hCPE->hFrontVad[n]->hNoiseEst->noise_char = fix16_to_float( hCPE->hFrontVad[n]->hNoiseEst->noise_char_fx, Q11 ); + hCPE->hFrontVad[n]->hNoiseEst->epsP_0_2_lp = fix16_to_float( hCPE->hFrontVad[n]->hNoiseEst->epsP_0_2_lp_fx, Q12 ); + hCPE->hFrontVad[n]->hNoiseEst->epsP_0_2_ad_lp = fix16_to_float( hCPE->hFrontVad[n]->hNoiseEst->epsP_0_2_ad_lp_fx, Q12 ); + hCPE->hFrontVad[n]->hNoiseEst->epsP_2_16_lp = fix16_to_float( hCPE->hFrontVad[n]->hNoiseEst->epsP_2_16_lp_fx, Q12 ); + hCPE->hFrontVad[n]->hNoiseEst->epsP_2_16_lp2 = fix16_to_float( hCPE->hFrontVad[n]->hNoiseEst->epsP_2_16_lp2_fx, Q12 ); + hCPE->hFrontVad[n]->hNoiseEst->epsP_2_16_dlp_lp2 = fix16_to_float( hCPE->hFrontVad[n]->hNoiseEst->epsP_2_16_dlp_lp2_fx, Q12 ); + hCPE->hFrontVad[n]->hNoiseEst->lt_tn_track = fix16_to_float( hCPE->hFrontVad[n]->hNoiseEst->lt_tn_track_fx, Q15 ); + hCPE->hFrontVad[n]->hNoiseEst->lt_tn_dist = fix16_to_float( hCPE->hFrontVad[n]->hNoiseEst->lt_tn_dist_fx, Q8 ); + hCPE->hFrontVad[n]->hNoiseEst->lt_haco_ev = fix16_to_float( hCPE->hFrontVad[n]->hNoiseEst->lt_haco_ev_fx, Q15 ); + hCPE->hFrontVad[n]->hNoiseEst->lt_Ellp_dist = fix16_to_float( hCPE->hFrontVad[n]->hNoiseEst->lt_Ellp_dist_fx, Q8 ); + hCPE->hFrontVad[n]->hNoiseEst->act_pred = fix16_to_float( hCPE->hFrontVad[n]->hNoiseEst->act_pred_fx, Q15 ); + hCPE->hFrontVad[n]->hNoiseEst->lt_aEn_zero = fix16_to_float( hCPE->hFrontVad[n]->hNoiseEst->lt_aEn_zero_fx, Q15 ); + if ( front_create_flag ) + { + fixedToFloat_arrL( hCPE->hFrontVad[n]->hNoiseEst->fr_bands1_fx, hCPE->hFrontVad[n]->hNoiseEst->fr_bands1, Q17, NB_BANDS ); + fixedToFloat_arrL( hCPE->hFrontVad[n]->hNoiseEst->fr_bands2_fx, hCPE->hFrontVad[n]->hNoiseEst->fr_bands2, Q17, NB_BANDS ); + fixedToFloat_arrL( hCPE->hFrontVad[n]->hNoiseEst->ave_enr_fx, hCPE->hFrontVad[n]->hNoiseEst->ave_enr, Q8, NB_BANDS ); + fixedToFloat_arrL( hCPE->hFrontVad[n]->hNoiseEst->ave_enr2_fx, hCPE->hFrontVad[n]->hNoiseEst->ave_enr2, Q8, NB_BANDS ); + } + // floatToFixed_arr( hCPE->hFrontVad[n]->mem_decim, hCPE->hFrontVad[n]->mem_decim_fx, Q_inp, 90 ); + // hCPE->hFrontVad[n]->mem_preemph_fx = (Word16) floatToFixed( hCPE->hFrontVad[n]->mem_preemph, Q_inp ); + } + // fixedToFloat_arrL( band_energies_fx, band_energies, Q_new + QSCALE + 2, 40 ); + fixedToFloat_arrL( &band_energies_LR_fx[0], &band_energies_LR[0], Q_buffer[1] + QSCALE + 2 - band_ener_guardbits, 40 ); + } +#endif #else if ( ( error = front_vad( hCPE, NULL, hEncoderConfig, &hCPE->hFrontVad[0], st_ivas->hMCT != NULL, input_frame, vad_flag_dtx, fr_bands, Etot_LR, lf_E, localVAD_HE_SAD, vad_hover_flag, band_energies_LR, NULL, NULL ) ) != IVAS_ERR_OK ) @@ -253,28 +428,76 @@ ivas_error ivas_cpe_enc( /*----------------------------------------------------------------* * dynamically allocate data structures depending on the actual stereo mode *----------------------------------------------------------------*/ - +#ifdef IVAS_FLOAT_FIXED + if ( ( error = stereo_memory_enc_fx( hCPE, input_Fs, max_bwidth, ivas_format, st_ivas->nchan_transport ) ) != IVAS_ERR_OK ) + { + return error; + } +#else if ( ( error = stereo_memory_enc( hCPE, input_Fs, max_bwidth, &tdm_last_ratio, ivas_format, st_ivas->nchan_transport ) ) != IVAS_ERR_OK ) { return error; } +#endif /*----------------------------------------------------------------* * Set TD stereo parameters *----------------------------------------------------------------*/ - +#ifdef IVAS_FLOAT_FIXED + Q_inp = min( Q_inp, Q_factor_arrL( sts[1]->input, input_frame ) ); + floatToFixed_arrL32( sts[1]->input, sts[1]->input32_fx, Q_inp, input_frame ); + if ( ( error = stereo_set_tdm_fx( hCPE, input_frame, Q_inp ) ) != IVAS_ERR_OK ) + { + return error; + } +#else if ( ( error = stereo_set_tdm( hCPE, input_frame ) ) != IVAS_ERR_OK ) { return error; } +#endif /*----------------------------------------------------------------* * Resets/updates in case of stereo switching *----------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + floatToFixed_arr( sts[0]->old_input_signal, sts[0]->old_input_signal_fx, 0, input_frame ); + Word16 q_inp = Q15; + for ( n = 0; n < CPE_CHANNELS; n++ ) + { + q_inp = min( q_inp, Q_factor_arr( sts[n]->input_buff, 1965 ) ); + } + for ( n = 0; n < CPE_CHANNELS; n++ ) + { + floatToFixed_arr( sts[n]->input_buff, sts[n]->input_buff_fx, q_inp, 1965 ); + } +#endif - stereo_switching_enc( hCPE, sts[0]->old_input_signal, input_frame ); + stereo_switching_enc_fx( hCPE, sts[0]->old_input_signal_fx, input_frame, q_inp ); + +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + + if ( hCPE->element_mode > IVAS_CPE_DFT && hCPE->input_mem[0] != NULL && hCPE->element_mode != IVAS_CPE_MDCT ) + { + for ( n = 0; n < CPE_CHANNELS; n++ ) + { + fixedToFloat_arr( hCPE->input_mem_fx[n], hCPE->input_mem[n], q_inp, STEREO_DFT_OVL_MAX * input_frame / L_FRAME48k ); + } + } + if ( hCPE->element_mode == IVAS_CPE_DFT ) + { + for ( n = 0; n < CPE_CHANNELS; n++ ) + { + fixedToFloat_arr( sts[n]->input_buff_fx, sts[n]->input_buff, q_inp, 1965 ); + } + } +#endif +#else + stereo_switching_enc( hCPE, sts[0]->old_input_signal, input_frame ); +#endif /*----------------------------------------------------------------* * Temporal inter-channel alignment, stereo adjustment *----------------------------------------------------------------*/ @@ -328,7 +551,25 @@ ivas_error ivas_cpe_enc( /* Synchonize detection for downmix-based stereo */ if ( hCPE->element_mode == IVAS_CPE_DFT || hCPE->element_mode == IVAS_CPE_TD ) { +#ifdef IVAS_FLOAT_FIXED +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + currFlatness_fx[0] = float_to_fix16( currFlatness[0], Q7 ); + currFlatness_fx[1] = float_to_fix16( currFlatness[1], Q7 ); +#endif + set_transient_stereo_fx( hCPE, currFlatness_fx ); +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + if ( hCPE->hStereoDft ) + { + hCPE->hStereoDft->hItd->currFlatness = fix16_to_float( hCPE->hStereoDft->hItd->currFlatness_fx, Q7 ); + } + if ( hCPE->hStereoMdct ) + { + hCPE->hStereoMdct->hItd->currFlatness = fix16_to_float( hCPE->hStereoMdct->hItd->currFlatness_fx, Q7 ); + } +#endif +#else set_transient_stereo( hCPE, currFlatness ); +#endif } /*----------------------------------------------------------------* @@ -501,7 +742,10 @@ ivas_error ivas_cpe_enc( floatToFixed_arrL( hCPE->hStereoClassif->unclr_fv, hCPE->hStereoClassif->unclr_fv_fx, 15, SSC_MAX_NFEA ); f2me( hCPE->hStereoClassif->ave_ener_L, &hCPE->hStereoClassif->ave_ener_L_fx, &hCPE->hStereoClassif->ave_ener_L_fx_e ); f2me( hCPE->hStereoClassif->ave_ener_R, &hCPE->hStereoClassif->ave_ener_R_fx, &hCPE->hStereoClassif->ave_ener_R_fx_e ); - f2me( hCPE->hStereoDft->hItd->currFlatness, &hCPE->hStereoDft->hItd->currFlatness_fx, &hCPE->hStereoDft->hItd->currFlatness_fx_e ); + if ( hCPE->hStereoMdct ) + { + hCPE->hStereoMdct->hItd->currFlatness_fx = float_to_fix16( hCPE->hStereoMdct->hItd->currFlatness, Q7 ); + } #ifndef MSAN_FIX hCPE->hStereoClassif->xtalk_score_fx = floatToFixed( hCPE->hStereoClassif->xtalk_score, 31 ); @@ -734,7 +978,7 @@ ivas_error ivas_cpe_enc( floatToFixed_arr( hCPE->hCoreCoder[0]->voicing, hCPE->hCoreCoder[0]->voicing_fx, 15, 3 ); f2me( hCPE->hStereoClassif->ave_ener_L, &hCPE->hStereoClassif->ave_ener_L_fx, &hCPE->hStereoClassif->ave_ener_L_fx_e ); f2me( hCPE->hStereoClassif->ave_ener_R, &hCPE->hStereoClassif->ave_ener_R_fx, &hCPE->hStereoClassif->ave_ener_R_fx_e ); - f2me( hCPE->hStereoMdct->hItd->currFlatness, &hCPE->hStereoMdct->hItd->currFlatness_fx, &hCPE->hStereoMdct->hItd->currFlatness_fx_e ); + hCPE->hStereoMdct->hItd->currFlatness_fx = float_to_fix16( hCPE->hStereoMdct->hItd->currFlatness, Q7 ); floatToFixed_arrL( hCPE->hStereoClassif->xtalk_score_buf, hCPE->hStereoClassif->xtalk_score_buf_fx, 31, XTALK_SCORE_BUF_LEN ); hCPE->hStereoClassif->xtalk_wscore_fx = floatToFixed( hCPE->hStereoClassif->xtalk_wscore, 31 ); hCPE->hStereoClassif->relE_0_1_fx = floatToFixed( hCPE->hStereoClassif->relE_0_1, 31 ); @@ -1441,7 +1685,9 @@ ivas_error ivas_cpe_enc( tdm_SM_or_LRTD_Pri = 0; tdm_ratio_idx = -1; tdm_ratio_idx_SM = -1; +#ifndef IVAS_FLOAT_FIXED tdm_last_ratio = 0; +#endif /*------------------------------------------------------------------* @@ -2811,7 +3057,75 @@ free( hCPE ); return; } +#ifdef IVAS_FLOAT_FIXED +/*------------------------------------------------------------------------- + * stereo_mode_combined_format_enc() + * + * Set stereo format in a combined format + *-------------------------------------------------------------------------*/ + +static void stereo_mode_combined_format_enc_fx( + const Encoder_Struct *st_ivas, /* i : encoder main structure */ + CPE_ENC_HANDLE hCPE /* i/o: CPE handle */ +) +{ + ENCODER_CONFIG_HANDLE hEncoderConfig; + Word32 element_brate_ref; + + hEncoderConfig = st_ivas->hEncoderConfig; + + IF( EQ_16( hEncoderConfig->ivas_format, MASA_ISM_FORMAT ) ) + { + element_brate_ref = hCPE->element_brate; + move32(); + + test(); + test(); + test(); + test(); + IF( EQ_16( st_ivas->ism_mode, ISM_MASA_MODE_DISC ) && + ( ( EQ_16( hEncoderConfig->nchan_ism, 3 ) && EQ_32( hEncoderConfig->ivas_total_brate, IVAS_96k ) ) || + ( EQ_16( hEncoderConfig->nchan_ism, 4 ) && EQ_32( hEncoderConfig->ivas_total_brate, IVAS_128k ) ) ) ) + { + IF( GT_32( L_add( hCPE->element_brate, hCPE->brate_surplus ), IVAS_64k ) ) + { + st_ivas->hMasa->data.hOmasaData->omasa_stereo_sw_cnt = 0; + move16(); + } + ELSE + { + st_ivas->hMasa->data.hOmasaData->omasa_stereo_sw_cnt = add( st_ivas->hMasa->data.hOmasaData->omasa_stereo_sw_cnt, 1 ); + move16(); + st_ivas->hMasa->data.hOmasaData->omasa_stereo_sw_cnt = s_min( st_ivas->hMasa->data.hOmasaData->omasa_stereo_sw_cnt, OMASA_STEREO_SW_CNT_MAX ); + move16(); + } + + IF( LT_16( st_ivas->hMasa->data.hOmasaData->omasa_stereo_sw_cnt, OMASA_STEREO_SW_CNT_MAX ) ) + { + hCPE->element_mode = IVAS_CPE_MDCT; + move16(); + hCPE->element_brate = IVAS_64k; + move32(); + hCPE->brate_surplus = L_sub( hCPE->brate_surplus, L_sub( hCPE->element_brate, element_brate_ref ) ); + move32(); + } + + /* write OMASA stereo mode signalling */ + IF( EQ_16( hCPE->element_mode, IVAS_CPE_MDCT ) ) + { + push_indice( hCPE->hCoreCoder[0]->hBstr, IND_SMODE_OMASA, 1, NBITS_ELEMENT_MODE ); + } + ELSE + { + push_indice( hCPE->hCoreCoder[0]->hBstr, IND_SMODE_OMASA, 0, NBITS_ELEMENT_MODE ); + } + } + } + + return; +} +#else /*------------------------------------------------------------------------- * stereo_mode_combined_format_enc() * @@ -2867,3 +3181,4 @@ static void stereo_mode_combined_format_enc( return; } +#endif diff --git a/lib_enc/ivas_front_vad.c b/lib_enc/ivas_front_vad.c index e466d2763ef90ce3c3fa7223b6281030a8cdd3e8..c3c181e12de42aca1b0175f94e1725861b68b133 100644 --- a/lib_enc/ivas_front_vad.c +++ b/lib_enc/ivas_front_vad.c @@ -125,7 +125,7 @@ ivas_error front_vad( { for ( n = 0; n < n_chan; n++ ) { - front_vad_destroy( &hFrontVads[n] ); + front_vad_destroy_fx( &hFrontVads[n] ); hFrontVads[n] = NULL; } } @@ -248,47 +248,38 @@ ivas_error front_vad_fx( Encoder_State *st, /* i/o: encoder state structure */ const ENCODER_CONFIG_HANDLE hEncoderConfig, /* i : configuration structure */ FRONT_VAD_ENC_HANDLE *hFrontVads, /* i/o: FrontVad handles */ - const int16_t MCT_flag, /* i : hMCT handle allocated (1) or not (0) */ - const int16_t input_frame, /* i : frame length */ - int16_t vad_flag_dtx[], /* o : HE-SAD flag with additional DTX HO */ - float fr_bands[][2 * NB_BANDS], /* i : energy in frequency bands */ - float Etot_LR[], /* o : total energy Left & Right channel */ - float lf_E[][2 * VOIC_BINS], /* i : per bin spectrum energy in lf, LR channels */ - int16_t localVAD_HE_SAD[], /* o : HE-SAD flag without hangover, LR channels */ - int16_t vad_hover_flag[], /* o : VAD hangover flag */ - float band_energies_LR[2 * NB_BANDS], /* o : energy in critical bands without minimum noise floor E_MIN*/ - float *PS_out, /* o : energy spectrum */ - float *Bin_E_out, /* o : log-energy spectrum of the current frame */ + const Word16 MCT_flag, /* i : hMCT handle allocated (1) or not (0) */ + const Word16 input_frame, /* i : frame length */ + Word16 vad_flag_dtx[], /* o : HE-SAD flag with additional DTX HO */ + Word32 fr_bands_fx[][2 * NB_BANDS], /* o : energy in frequency bands Q_buffer[n] + QSCALE + 2 */ + Word16 Etot_LR_fx[], /* o : total energy Left & Right channel Q8 */ + Word32 lf_E_fx[][2 * VOIC_BINS], /* i : per bin spectrum energy in lf, LR channels Q_buffer[n] + QSCALE */ + Word16 localVAD_HE_SAD[], /* o : HE-SAD flag without hangover, LR channels */ + Word16 vad_hover_flag[], /* o : VAD hangover flag */ + Word32 band_energies_LR_fx[2 * NB_BANDS], /* o : energy in critical bands without minimum noise floor E_MIN Q_buffer[1] + QSCALE + 2 - band_ener_guardbits*/ + Word32 *PS_out_fx, /* o : energy spectrum Q_buffer + QSCALE */ + Word16 *Bin_E_out_fx, /* o : log-energy spectrum of the current frame Q7 */ Word16 Q_inp, - Word16 Q_add ) + Word16 *Q_buffer, + Word16 Q_add, + Word16 *front_create_flag ) { ENC_CORE_HANDLE *sts; - float band_energies[2 * NB_BANDS]; /* energy in critical bands without minimum noise floor E_MIN */ Word32 band_energies_fx[2 * NB_BANDS]; /* energy in critical bands without minimum noise floor E_MIN */ Word32 PS_fx[128]; - // Word32 *PS_out_fx; - // Word32 PS_out_buff_fx[128]; Word16 *lgBin_E_fx; - float PS[L_FRAME / 2]; /* speech/music clasif. parameters */ - // float snr_sum_he; /* HE SAD parameters */ Word16 snr_sum_he_fx; - float Bin_E[L_FFT]; /* per bin log energy spectrum for mid-frame */ - Word32 fr_bands_fx[2][2 * NB_BANDS] = { { 0 } }; // Word32 fr_bands_cp_fx[2 * NB_BANDS] = { 0 }; Word32 Bin_E_fx[L_FFT] = { 0 }; /* per bin log energy spectrum for mid-frame */ Word32 Bin_E_old_fx[L_FFT / 2] = { 0 }; /* old per bin log energy spectrum for mid-frame */ Word16 fft_buffLR_fx[2 * L_FFT]; /* fft buffer */ - Word32 lf_E_fx[2][2 * VOIC_BINS]; - Word16 Etot_LR_fx[2]; - Word32 band_energies_LR_fx[2 * NB_BANDS]; - int16_t n, n_chan, dummy; + // Word16 front_create_flag = 0; + Word16 n, n_chan, dummy; Word8 dummy_short; - int16_t element_mode, last_element_mode; + Word16 element_mode, last_element_mode; ivas_error error; - Word16 Q_buffer[2]; - Word16 Q_new = 0, band_ener_guardbits = 0; - Word16 Qband, mem_decim_size, Q_band[2]; + Word16 Qband, mem_decim_size; error = IVAS_ERR_OK; push_wmops( "front_vad" ); @@ -296,145 +287,116 @@ ivas_error front_vad_fx( if ( st != NULL ) { lgBin_E_fx = &st->lgBin_E_fx[0]; + move16(); } - if ( hCPE != NULL ) + IF( hCPE != NULL ) { n_chan = CPE_CHANNELS; sts = &hCPE->hCoreCoder[0]; element_mode = hCPE->element_mode; + move16(); last_element_mode = hCPE->last_element_mode; + move16(); } - else if ( st != NULL ) + ELSE IF( st != NULL ) { n_chan = 1; + move16(); sts = &st; element_mode = IVAS_SCE; + move16(); last_element_mode = IVAS_SCE; + move16(); } - else + ELSE { assert( 0 && "Either CPE or SCE must be given!" ); return IVAS_ERR_INTERNAL_FATAL; } - // snr_sum_he = 0; - for ( n = 0; n < n_chan; n++ ) + FOR( n = 0; n < n_chan; n++ ) { localVAD_HE_SAD[n] = 0; + move16(); vad_hover_flag[n] = 0; + move16(); vad_flag_dtx[n] = 1; + move16(); } /*------------------------------------------------------------------* * Allocate/deallocate hFrontVad handles in case of element_mode change *-----------------------------------------------------------------*/ - - if ( sts[0]->ini_frame > 0 && MCT_flag == 0 && last_element_mode != element_mode ) + test(); + test(); + IF( sts[0]->ini_frame > 0 && MCT_flag == 0 && NE_16( last_element_mode, element_mode ) ) { - if ( element_mode == IVAS_CPE_MDCT ) + IF( EQ_16( element_mode, IVAS_CPE_MDCT ) ) { - if ( hFrontVads[0] != NULL ) + IF( hFrontVads[0] != NULL ) { - for ( n = 0; n < n_chan; n++ ) + FOR( n = 0; n < n_chan; n++ ) { - front_vad_destroy( &hFrontVads[n] ); + front_vad_destroy_fx( &hFrontVads[n] ); hFrontVads[n] = NULL; } } } - else + ELSE { - if ( sts[0]->Opt_DTX_ON && hFrontVads[0] == NULL ) + test(); + IF( sts[0]->Opt_DTX_ON && hFrontVads[0] == NULL ) { for ( n = 0; n < n_chan; n++ ) { - if ( ( error = front_vad_create( &hFrontVads[n], hEncoderConfig ) ) != IVAS_ERR_OK ) + *front_create_flag = 1; + move16(); + IF( NE_32( ( error = front_vad_create_fx( &hFrontVads[n], hEncoderConfig ) ), IVAS_ERR_OK ) ) { return error; } - /* if ( ( error = front_vad_create_fx( &hFrontVads[n], hEncoderConfig ) ) != IVAS_ERR_OK ) - { - return error; - }*/ } } } } - + test(); /* Only run VAD if DTX is on and TD stereo or unified stereo is selected */ - if ( hFrontVads[0] != NULL && element_mode != IVAS_CPE_MDCT ) + IF( hFrontVads[0] != NULL && NE_16( element_mode, IVAS_CPE_MDCT ) ) { /*------------------------------------------------------------------* * VAD *-----------------------------------------------------------------*/ - - set_zero( band_energies_LR, 2 * NB_BANDS ); - /* Q_new = Q15; - for ( n = 0; n < n_chan; n++ ) - { - Q_new =min(Q_new, Q_factor_arr( hFrontVads[n]->buffer_12k8, 384 )); - } - for ( n = 0; n < n_chan; n++ ) - { - floatToFixed_arr( hFrontVads[n]->mem_decim, hFrontVads[n]->mem_decim_fx, Q_inp, 90 ); - hFrontVads[n]->mem_preemph_fx = (Word16) floatToFixed( hFrontVads[n]->mem_preemph, Q_inp ); - }*/ + set_val_Word32( band_energies_LR_fx, 0, 2 * NB_BANDS ); Qband = -1; - Word16 Q_new_old = ( Q_inp - Qband ) + Q_add; - for ( n = 0; n < n_chan; n++ ) + move16(); + Word16 Q_new_old = add( sub( Q_inp, Qband ), Q_add ); + + IF( *front_create_flag ) { - floatToFixed_arr( hFrontVads[n]->mem_decim, hFrontVads[n]->mem_decim_fx, Q_inp, 90 ); - hFrontVads[n]->mem_preemph_fx = (Word16) floatToFixed( hFrontVads[n]->mem_preemph, Q_inp - 1 ); - Q_buffer[n] = Q_factor_arr( hFrontVads[n]->buffer_12k8 + L_FFT, L_FFT / 2 ); - floatToFixed_arr( hFrontVads[n]->buffer_12k8, hFrontVads[n]->buffer_12k8_fx, Q_buffer[n], 384 ); - hFrontVads[n]->hNoiseEst->Etot_h_32fx = (Word32) ( hFrontVads[n]->hNoiseEst->Etot_h * 16777216.0 ); - hFrontVads[n]->hNoiseEst->Etot_l_32fx = (Word32) ( hFrontVads[n]->hNoiseEst->Etot_l * 16777216.0 ); - hFrontVads[n]->hNoiseEst->Etot_l_lp_32fx = (Word32) ( hFrontVads[n]->hNoiseEst->Etot_l_lp * 16777216.0 ); - hFrontVads[n]->hNoiseEst->Etot_last_32fx = (Word32) ( hFrontVads[n]->hNoiseEst->Etot_last * 16777216.0 ); - hFrontVads[n]->hNoiseEst->Etot_v_h2_32fx = (Word32) ( hFrontVads[n]->hNoiseEst->Etot_v_h2 * 16777216.0 ); - hFrontVads[n]->hNoiseEst->Etot_lp_32fx = (Word32) ( hFrontVads[n]->hNoiseEst->Etot_lp * 16777216.0 ); - hFrontVads[n]->hNoiseEst->sign_dyn_lp_32fx = (Word32) ( hFrontVads[n]->hNoiseEst->sign_dyn_lp * 16777216.0 ); - - hFrontVads[n]->lp_noise_fx = (Word16) floatToFixed( hFrontVads[n]->lp_noise, Q8 ); - hFrontVads[n]->lp_speech_fx = (Word16) floatToFixed( hFrontVads[n]->lp_speech, Q8 ); - sts[n]->lp_speech_fx = (Word16) floatToFixed( sts[n]->lp_speech, Q8 ); - sts[n]->lp_noise_fx = (Word16) floatToFixed( sts[n]->lp_noise, Q8 ); - floatToFixed_arrL( hFrontVads[n]->hNoiseEst->bckr, hFrontVads[n]->hNoiseEst->bckr_fx, Q_new_old + QSCALE + 2, 20 ); - floatToFixed_arrL( hFrontVads[n]->hNoiseEst->enrO, hFrontVads[n]->hNoiseEst->enrO_fx, Q_new_old + QSCALE + 2, 20 ); - sts[n]->flag_noisy_speech_snr_fx = (Word8) sts[n]->flag_noisy_speech_snr; - hFrontVads[n]->hVAD->bcg_flux_fx = (Word16) hFrontVads[n]->hVAD->bcg_flux * ( 1 << 4 ); - - - hFrontVads[n]->hVAD->snr_sum_vad_fx = (Word16) ( hFrontVads[n]->hVAD->snr_sum_vad * 32767 ); - hFrontVads[n]->hVAD->prim_act_quick_fx = (Word16) ( hFrontVads[n]->hVAD->prim_act_quick * 32767 ); - hFrontVads[n]->hVAD->prim_act_slow_fx = (Word16) ( hFrontVads[n]->hVAD->prim_act_slow * 32767 ); - hFrontVads[n]->hVAD->prim_act_fx = (Word16) ( hFrontVads[n]->hVAD->prim_act * 32767 ); - hFrontVads[n]->hVAD->prim_act_quick_he_fx = (Word16) ( hFrontVads[n]->hVAD->prim_act_quick_he * 32767 ); - hFrontVads[n]->hVAD->prim_act_slow_he_fx = (Word16) ( hFrontVads[n]->hVAD->prim_act_slow_he * 32767 ); - hFrontVads[n]->hVAD->prim_act_he_fx = (Word16) ( hFrontVads[n]->hVAD->prim_act_he * 32767 ); + FOR( n = 0; n < n_chan; n++ ) + { + scale_sig32( hFrontVads[n]->hNoiseEst->bckr_fx, 20, sub( add( Q_new_old, QSCALE ), Q11 ) ); + scale_sig32( hFrontVads[n]->hNoiseEst->enrO_fx, 20, sub( add( Q_new_old, QSCALE ), Q11 ) ); + hFrontVads[n]->lp_speech_fx = shr( hFrontVads[n]->lp_speech_fx, 1 ); + move16(); + } } - floatToFixed_arrL( &band_energies_LR[0], &band_energies_LR_fx[0], Q_new_old + QSCALE + 2, 40 ); band_ener_guardbits = find_guarded_bits_fx( 2 * NB_BANDS ); - for ( n = 0; n < n_chan; n++ ) + FOR( n = 0; n < n_chan; n++ ) { FRONT_VAD_ENC_HANDLE hFrontVad; hFrontVad = hFrontVads[n]; - // hFrontVad->mem_preemph_fx = (Word16) floatToFixed( hFrontVad->mem_preemph, Q_inp ); + /* Move previous frame 12k8 signal */ MVR2R_WORD16( hFrontVad->buffer_12k8_fx + L_FFT, hFrontVad->buffer_12k8_fx, L_FFT / 2 ); - // mvr2r_Word16( hFrontVad->buffer_12k8_fx + L_FFT, hFrontVad->buffer_12k8_fx, L_FFT / 2 ); #if 1 - // floatToFixed_arr( hFrontVad->mem_decim, hFrontVad->mem_decim_fx, Q_inp, 90 ); - // modify_Fs( sts[n]->input, input_frame, sts[0]->input_Fs, hFrontVad->buffer_12k8 + L_FFT / 2, INT_FS_12k8, hFrontVad->mem_decim, ( sts[0]->max_bwidth == NB ) ); + /* Resample to 12k8 */ modify_Fs_fx( sts[n]->input_fx, input_frame, sts[0]->input_Fs, hFrontVad->buffer_12k8_fx + L_FFT / 2, INT_FS_12k8, hFrontVad->mem_decim_fx, ( sts[0]->max_bwidth == NB ), &Qband, &mem_decim_size ); - // fixedToFloat_arr( hFrontVad->buffer_12k8_fx + L_FFT / 2, hFrontVad->buffer_12k8 + L_FFT / 2, Q_inp + Qband, 384 - L_FFT / 2 ); - // fixedToFloat_arr( hFrontVad->mem_decim_fx, hFrontVad->mem_decim, Q_inp, 90 ); - // Scale_sig( hFrontVad->buffer_12k8_fx + L_FFT / 2, 384 - L_FFT / 2, Q9 - (Q_inp + Qband )); - Q_band[n] = Qband; + #else modify_Fs( sts[n]->input, input_frame, sts[0]->input_Fs, hFrontVad->buffer_12k8 + L_FFT / 2, INT_FS_12k8, hFrontVad->mem_decim, ( sts[0]->max_bwidth == NB ) ); @@ -444,191 +406,71 @@ ivas_error front_vad_fx( preemph( hFrontVad->buffer_12k8 + L_FFT / 2, PREEMPH_FAC_FLT, L_FRAME, &hFrontVad->mem_preemph ); #else /* Preemphasis */ - // hFrontVad->mem_preemph_fx = (Word16) floatToFixed( hFrontVad->mem_preemph, Q_inp + Qband ); hFrontVad->mem_preemph_fx = shl( hFrontVad->mem_preemph_fx, -1 - Qband ); - /*Word16 temp = (Word16) floatToFixed( hFrontVad->mem_preemph, Q_inp + Qband ); - if ( abs(temp - hFrontVad->mem_preemph_fx) > 2 ) - { - printf( "%d\n", temp ); - }*/ - // preemph( hFrontVad->buffer_12k8 + L_FFT / 2, PREEMPH_FAC_FLT, L_FRAME, &hFrontVad->mem_preemph ); PREEMPH_FX( hFrontVad->buffer_12k8_fx + L_FFT / 2, PREEMPH_FAC, L_FRAME, &hFrontVad->mem_preemph_fx ); - // hFrontVad->mem_preemph = fixedToFloat( hFrontVad->mem_preemph_fx, Q_inp + Qband ); - // fixedToFloat_arr( hFrontVad->buffer_12k8_fx + L_FFT / 2, hFrontVad->buffer_12k8 + L_FFT / 2, Q_inp + Qband, 384 - L_FFT / 2 ); #endif #if 0 analy_sp( IVAS_CPE_TD, hCPE, sts[0]->input_Fs, hFrontVad->buffer_12k8 + L_FFT / 2 - 3 * ( L_SUBFR / 2 ), Bin_E, Bin_E_old, fr_bands[n], lf_E[n], &Etot_LR[n], sts[0]->min_band, sts[0]->max_band, band_energies, PS, fft_buffLR ); #else Word16 Scale_fac[2]; - Q_new = ( Q_inp - Qband ); - // if ( Q_buffer[n] < Q_inp + Qband ) - //{ - // Scale_sig( hFrontVad->buffer_12k8_fx, L_FFT / 2, Q_buffer[n] - Q_new ); - // //Scale_sig( hFrontVad->buffer_12k8_fx + L_FFT / 2, 384 - L_FFT / 2, Q_buffer[n] -( Q_inp + Qband) ); - // Q_new = Q_buffer[n]; - // // Scale_sig( hFrontVad->buffer_12k8_fx, L_FFT / 2, Q_inp + Qband - Q_buffer[n] ); - // } - // else - //{ - // Scale_sig( hFrontVad->buffer_12k8_fx + L_FFT / 2, 384 - L_FFT / 2, ( Q_inp + Qband ) - Q_buffer[n] ); - // Q_new = ( Q_inp + Qband ); - // } - - // Q_new = Q_factor_arr( hFrontVad->buffer_12k8, 384 ); + Q_new = add( sub( Q_inp, Qband ), Q_add ); Scale_sig( hFrontVad->buffer_12k8_fx, L_FFT / 2, Q_new - Q_buffer[n] ); - Scale_sig( hFrontVad->buffer_12k8_fx + L_FFT / 2, 384 - L_FFT / 2, Q_new - ( Q_inp + Qband ) ); + Scale_sig( hFrontVad->buffer_12k8_fx + L_FFT / 2, 384 - L_FFT / 2, Q_new - add( Q_inp, Qband ) ); Q_buffer[n] = Q_new; - // floatToFixed_arr( hFrontVad->buffer_12k8, hFrontVad->buffer_12k8_fx, Q_new, 384 ); Word32 Le_min_scaled = L_shr_r( L_add( L_shr( E_MIN_FXQ15, sub( 14, add( Q_new, QSCALE ) ) ), 1 ), 1 ); Le_min_scaled = L_shl( Le_min_scaled, 2 ); ivas_analy_sp_fx( IVAS_CPE_TD, hCPE, sts[0]->input_Fs, hFrontVad->buffer_12k8_fx + L_FFT / 2 - 3 * ( L_SUBFR / 2 ), Q_new, fr_bands_fx[n], lf_E_fx[n], &Etot_LR_fx[n], sts[0]->min_band, sts[0]->max_band, Le_min_scaled, Scale_fac, Bin_E_fx, Bin_E_old_fx, PS_fx, lgBin_E_fx, band_energies_fx, fft_buffLR_fx ); - // fixedToFloat_arrL( fr_bands_fx[n], fr_bands[n], Q_new + QSCALE + 2, 40 ); - // fixedToFloat_arrL( band_energies_fx, band_energies, Q_new + QSCALE + 2, 40 ); - /* fixedToFloat_arrL( lf_E_fx[n], lf_E[n], Q_new + QSCALE - 2, 148 ); - if ( lgBin_E_fx != NULL ) - { - fixedToFloat_arr( lgBin_E_fx, Bin_E, Q7, 128 ); - } - fixedToFloat_arrL( PS_fx, PS, Q_new + QSCALE, 128 ); - Etot_LR[n] = fixedToFloat( Etot_LR_fx[n], Q8 );*/ + #endif /* add up energies for later calculating average of channel energies */ - // v_add( &band_energies[0], &band_energies_LR[0], &band_energies_LR[0], 2 * NB_BANDS ); - // floatToFixed_arrL( &band_energies_LR[0], &band_energies_LR_fx[0], Q_new + QSCALE + 2, 40 ); - // Scale_sig32( &band_energies_LR_fx[0], ( Q_new + QSCALE + 2 ) - ( Q_new_old + QSCALE + 2 ) - band_ener_guardbits , 40 ); - // Scale_sig32( band_energies_fx,( Q_new + QSCALE + 2)-( Q_new_old + QSCALE + 2 ), 40 ); - + // Scale_sig32( &band_energies_LR_fx[0], ( Q_new + QSCALE + 2 ) - ( Q_new_old + QSCALE + 2 - band_ener_guardbits ), 40 ); + Q_new_old = Q_new; v_add_fixed( &band_energies_fx[0], &band_energies_LR_fx[0], &band_energies_LR_fx[0], 2 * NB_BANDS, band_ener_guardbits ); - // fixedToFloat_arrL( &band_energies_LR_fx[0], &band_energies_LR[0], Q_new + QSCALE - band_ener_guardbits, 40 ); #if 0 noise_est_pre( Etot_LR[n], hFrontVads[0]->ini_frame, hFrontVad->hNoiseEst, 0, 0, 0 ); #else - // Word32 Etot_fx = (Word32) ( Etot_LR[n] * ( 1 << 24 ) ); Word32 Etot_fx = L_deposit_h( Etot_LR_fx[n] ); - /* hFrontVad->hNoiseEst->Etot_h_32fx = (Word32) ( hFrontVad->hNoiseEst->Etot_h * 16777216.0 ); - hFrontVad->hNoiseEst->Etot_l_32fx = (Word32) ( hFrontVad->hNoiseEst->Etot_l * 16777216.0 ); - hFrontVad->hNoiseEst->Etot_l_lp_32fx = (Word32) ( hFrontVad->hNoiseEst->Etot_l_lp * 16777216.0 ); - hFrontVad->hNoiseEst->Etot_last_32fx = (Word32) ( hFrontVad->hNoiseEst->Etot_last * 16777216.0 ); - hFrontVad->hNoiseEst->Etot_v_h2_32fx = (Word32) ( hFrontVad->hNoiseEst->Etot_v_h2 * 16777216.0 ); - hFrontVad->hNoiseEst->Etot_lp_32fx = (Word32) ( hFrontVad->hNoiseEst->Etot_lp * 16777216.0 ); - hFrontVad->hNoiseEst->sign_dyn_lp_32fx = (Word32) ( hFrontVad->hNoiseEst->sign_dyn_lp * 16777216.0 );*/ noise_est_pre_32fx( Etot_fx, hFrontVads[0]->ini_frame, hFrontVad->hNoiseEst, 0, 0, 0 ); - /* hFrontVad->hNoiseEst->Etot_h = (float) ( hFrontVad->hNoiseEst->Etot_h_32fx / 16777216.0 ); - hFrontVad->hNoiseEst->Etot_l = (float) ( hFrontVad->hNoiseEst->Etot_l_32fx / 16777216.0 ); - hFrontVad->hNoiseEst->Etot_l_lp = (float) ( hFrontVad->hNoiseEst->Etot_l_lp_32fx / 16777216.0 ); - hFrontVad->hNoiseEst->Etot_last = (float) ( hFrontVad->hNoiseEst->Etot_last_32fx / 16777216.0 ); - hFrontVad->hNoiseEst->Etot_v_h2 = (float) ( hFrontVad->hNoiseEst->Etot_v_h2_32fx / 16777216.0 ); - hFrontVad->hNoiseEst->Etot_lp = (float) ( hFrontVad->hNoiseEst->Etot_lp_32fx / 16777216.0 ); - hFrontVad->hNoiseEst->sign_dyn_lp = (float) ( hFrontVad->hNoiseEst->sign_dyn_lp_32fx / 16777216.0 );*/ - // hFrontVad->hNoiseEst->Etot_v_h2 = (float) ( hFrontVad->hNoiseEst->Etot_v_h2_32fx / 16777216.0 ); - // hFrontVad->hNoiseEst->sign_dyn_lp = (float) ( hFrontVad->hNoiseEst->sign_dyn_lp_32fx / 16777216.0 ); #endif /* wb_vad */ #if 0 hFrontVad->hVAD->vad_flag = wb_vad( sts[n], fr_bands[n], &dummy, &dummy, &dummy, &snr_sum_he, &localVAD_HE_SAD[n], &dummy, hFrontVad->hVAD, hFrontVad->hNoiseEst, hFrontVad->lp_speech, hFrontVad->lp_noise ); #else - // floatToFixed_arrL( fr_bands[n], fr_bands_fx[n], Q_new + QSCALE, 40 ); - /*for ( int i = 0; i < 40; i++ ) - { - fr_bands_cp_fx[i] = L_shr_sat( fr_bands_fx[n][i], 2 ); - }*/ - // Scale_sig32( fr_bands_fx[n], -2, 40 ); Scale_sig32( hFrontVads[n]->hNoiseEst->bckr_fx, Q_new + QSCALE - ( Q_new_old + QSCALE ), 20 ); Scale_sig32( hFrontVads[n]->hNoiseEst->enrO_fx, Q_new + QSCALE - ( Q_new_old + QSCALE ), 20 ); - /* hFrontVad->lp_noise_fx = (Word16) floatToFixed( hFrontVad->lp_noise, Q8 ); - hFrontVad->lp_speech_fx = (Word16) floatToFixed( hFrontVad->lp_speech, Q8 ); - sts[n]->lp_speech_fx = (Word16) floatToFixed( sts[n]->lp_speech, Q8 ); - sts[n]->lp_noise_fx = (Word16) floatToFixed( sts[n]->lp_noise, Q8 ); - floatToFixed_arrL( hFrontVad->hNoiseEst->bckr, hFrontVad->hNoiseEst->bckr_fx, Q_new + QSCALE, 20 ); - floatToFixed_arrL( hFrontVad->hNoiseEst->enrO, hFrontVad->hNoiseEst->enrO_fx, Q_new + QSCALE, 20 ); - sts[n]->flag_noisy_speech_snr_fx = (Word8) sts[n]->flag_noisy_speech_snr; - hFrontVad->hVAD->bcg_flux_fx = (Word16) hFrontVad->hVAD->bcg_flux * ( 1 << 4 ); - hFrontVad->hNoiseEst->Etot_v_h2_fx = (Word16) ( hFrontVad->hNoiseEst->Etot_v_h2 * ( 1 << 8 ) ); - hFrontVad->hNoiseEst->sign_dyn_lp_fx = (Word16) ( hFrontVad->hNoiseEst->sign_dyn_lp * ( 1 << 8 ) ); - hFrontVad->hVAD->snr_sum_vad_fx = (Word16) ( hFrontVad->hVAD->snr_sum_vad * 32767 ); - hFrontVad->hVAD->prim_act_quick_fx = (Word16) ( hFrontVad->hVAD->prim_act_quick * 32767 ); - hFrontVad->hVAD->prim_act_slow_fx = (Word16) ( hFrontVad->hVAD->prim_act_slow * 32767 ); - hFrontVad->hVAD->prim_act_fx = (Word16) ( hFrontVad->hVAD->prim_act * 32767 ); - hFrontVad->hVAD->prim_act_quick_he_fx = (Word16) ( hFrontVad->hVAD->prim_act_quick_he * 32767 ); - hFrontVad->hVAD->prim_act_slow_he_fx = (Word16) ( hFrontVad->hVAD->prim_act_slow_he * 32767 ); - hFrontVad->hVAD->prim_act_he_fx = (Word16) ( hFrontVad->hVAD->prim_act_he * 32767 );*/ hFrontVad->hNoiseEst->sign_dyn_lp_fx = extract_h( hFrontVad->hNoiseEst->sign_dyn_lp_32fx ); hFrontVad->hNoiseEst->Etot_v_h2_fx = extract_h( hFrontVad->hNoiseEst->Etot_v_h2_32fx ); hFrontVad->hVAD->vad_flag = wb_vad_ivas_fx( sts[n], fr_bands_fx[n], &dummy, &dummy, &dummy, &snr_sum_he_fx, &localVAD_HE_SAD[n], &dummy_short, Q_new, hFrontVad->hVAD, hFrontVad->hNoiseEst, hFrontVad->lp_speech_fx, hFrontVad->lp_noise_fx ); - // snr_sum_he = (float)(snr_sum_he_fx / ONE_IN_Q8 ); - /* hFrontVad->hNoiseEst->Etot_v_h2 = (float) ( hFrontVad->hNoiseEst->Etot_v_h2_fx / ( 256.0 ) ); - hFrontVad->hNoiseEst->sign_dyn_lp = (float) ( hFrontVad->hNoiseEst->sign_dyn_lp_fx / ( 256.0 ) ); - hFrontVad->hVAD->bcg_flux = (float) ( hFrontVad->hVAD->bcg_flux_fx / ( 16.0 ) ); - hFrontVad->hVAD->snr_sum_vad = (float) ( hFrontVad->hVAD->snr_sum_vad_fx / 32767.0 ); - hFrontVad->hVAD->prim_act_quick = (float) ( hFrontVad->hVAD->prim_act_quick_fx / 32767.0 ); - hFrontVad->hVAD->prim_act_slow = (float) ( hFrontVad->hVAD->prim_act_slow_fx / 32767.0 ); - hFrontVad->hVAD->prim_act = (float) ( hFrontVad->hVAD->prim_act_fx / 32767.0 ); - hFrontVad->hVAD->prim_act_quick_he = (float) ( hFrontVad->hVAD->prim_act_quick_he_fx / 32767.0 ); - hFrontVad->hVAD->prim_act_slow_he = (float) ( hFrontVad->hVAD->prim_act_slow_he_fx / 32767.0 ); - hFrontVad->hVAD->prim_act_he = (float) ( hFrontVad->hVAD->prim_act_he_fx / 32767.0 );*/ - Q_new_old = Q_new; #endif - if ( n == 0 && n_chan > 1 && last_element_mode == IVAS_CPE_DFT ) + test(); + test(); + if ( n == 0 && GT_16( n_chan, 1 ) && EQ_16( last_element_mode, IVAS_CPE_DFT ) ) { sts[1]->last_coder_type = sts[0]->last_coder_type; + move16(); } #if 0 /* DTX hangover addition */ vad_flag_dtx[n] = dtx_hangover_addition( sts[n], hFrontVad->hVAD->vad_flag, hFrontVad->lp_speech - hFrontVad->lp_noise, 0 /* <- no cldfb addition */, &vad_hover_flag[n], hFrontVad->hVAD, hFrontVad->hNoiseEst, &hFrontVads[n]->rem_dtx_ho ); #else vad_flag_dtx[n] = ivas_dtx_hangover_addition_fx( sts[n], hFrontVad->hVAD->vad_flag, hFrontVad->lp_speech_fx - hFrontVad->lp_noise_fx, 0 /* <- no cldfb addition */, &vad_hover_flag[n], hFrontVad->hVAD, hFrontVad->hNoiseEst, &hFrontVads[n]->rem_dtx_ho ); - + move16(); #endif - if ( n_chan == 1 ) + if ( EQ_16( n_chan, 1 ) ) { sts[n]->vad_flag = hFrontVad->hVAD->vad_flag; + move16(); } } - for ( n = 0; n < n_chan; n++ ) - { - fixedToFloat_arr( hFrontVads[n]->mem_decim_fx, hFrontVads[n]->mem_decim, Q_inp, 90 ); - hFrontVads[n]->mem_preemph = fixedToFloat( hFrontVads[n]->mem_preemph_fx, Q_inp + Q_band[n] ); - fixedToFloat_arr( hFrontVads[n]->buffer_12k8_fx, hFrontVads[n]->buffer_12k8, Q_buffer[n], 384 ); - fixedToFloat_arrL( fr_bands_fx[n], fr_bands[n], Q_buffer[n] + QSCALE + 2, 40 ); - fixedToFloat_arrL( lf_E_fx[n], lf_E[n], Q_buffer[n] + QSCALE, 148 ); - if ( lgBin_E_fx != NULL ) - { - fixedToFloat_arr( lgBin_E_fx, Bin_E, Q7, 128 ); - } - fixedToFloat_arrL( PS_fx, PS, Q_new + QSCALE, 128 ); - Etot_LR[n] = fixedToFloat( Etot_LR_fx[n], Q8 ); - - hFrontVads[n]->hNoiseEst->Etot_h = (float) ( hFrontVads[n]->hNoiseEst->Etot_h_32fx / 16777216.0 ); - hFrontVads[n]->hNoiseEst->Etot_l = (float) ( hFrontVads[n]->hNoiseEst->Etot_l_32fx / 16777216.0 ); - hFrontVads[n]->hNoiseEst->Etot_l_lp = (float) ( hFrontVads[n]->hNoiseEst->Etot_l_lp_32fx / 16777216.0 ); - hFrontVads[n]->hNoiseEst->Etot_last = (float) ( hFrontVads[n]->hNoiseEst->Etot_last_32fx / 16777216.0 ); - // hFrontVads[n]->hNoiseEst->Etot_v_h2 = (float) ( hFrontVads[n]->hNoiseEst->Etot_v_h2_32fx / 16777216.0 ); - hFrontVads[n]->hNoiseEst->Etot_lp = (float) ( hFrontVads[n]->hNoiseEst->Etot_lp_32fx / 16777216.0 ); - // hFrontVads[n]->hNoiseEst->sign_dyn_lp = (float) ( hFrontVads[n]->hNoiseEst->sign_dyn_lp_32fx / 16777216.0 ); - - hFrontVads[n]->hNoiseEst->Etot_v_h2 = (float) ( hFrontVads[n]->hNoiseEst->Etot_v_h2_fx / ( 256.0 ) ); - hFrontVads[n]->hNoiseEst->sign_dyn_lp = (float) ( hFrontVads[n]->hNoiseEst->sign_dyn_lp_fx / ( 256.0 ) ); - hFrontVads[n]->hVAD->bcg_flux = (float) ( hFrontVads[n]->hVAD->bcg_flux_fx / ( 16.0 ) ); - hFrontVads[n]->hVAD->snr_sum_vad = (float) ( hFrontVads[n]->hVAD->snr_sum_vad_fx / 32767.0 ); - hFrontVads[n]->hVAD->prim_act_quick = (float) ( hFrontVads[n]->hVAD->prim_act_quick_fx / 32767.0 ); - hFrontVads[n]->hVAD->prim_act_slow = (float) ( hFrontVads[n]->hVAD->prim_act_slow_fx / 32767.0 ); - hFrontVads[n]->hVAD->prim_act = (float) ( hFrontVads[n]->hVAD->prim_act_fx / 32767.0 ); - hFrontVads[n]->hVAD->prim_act_quick_he = (float) ( hFrontVads[n]->hVAD->prim_act_quick_he_fx / 32767.0 ); - hFrontVads[n]->hVAD->prim_act_slow_he = (float) ( hFrontVads[n]->hVAD->prim_act_slow_he_fx / 32767.0 ); - hFrontVads[n]->hVAD->prim_act_he = (float) ( hFrontVads[n]->hVAD->prim_act_he_fx / 32767.0 ); - // floatToFixed_arr( hFrontVads[n]->mem_decim, hFrontVads[n]->mem_decim_fx, Q_inp, 90 ); - // hFrontVads[n]->mem_preemph_fx = (Word16) floatToFixed( hFrontVads[n]->mem_preemph, Q_inp ); - } - fixedToFloat_arrL( band_energies_fx, band_energies, Q_new + QSCALE + 2, 40 ); - // fixedToFloat_arrL( &band_energies_LR_fx[0], &band_energies_LR[0], Q_new + QSCALE + 2 - band_ener_guardbits, 40 ); - if ( n_chan == CPE_CHANNELS ) + + IF( EQ_16( n_chan, CPE_CHANNELS ) ) { /* get average channel energies, adding up was already done, so only need to scale by number of channels */ #if 0 @@ -641,66 +483,51 @@ ivas_error front_vad_fx( /* Logical OR between L and R decisions */ vad_flag_dtx[0] = vad_flag_dtx[0] || vad_flag_dtx[1]; } - fixedToFloat_arrL( &band_energies_LR_fx[0], &band_energies_LR[0], Q_new + QSCALE + 2 - band_ener_guardbits, 40 ); - if ( sts[0]->hFdCngEnc != NULL ) + IF( sts[0]->hFdCngEnc != NULL ) { #if 0 resetFdCngEnc( sts[0] ); #else - sts[0]->last_totalNoise_fx = (Word16) float_to_fix16( sts[0]->last_totalNoise, Q8 ); - sts[0]->hNoiseEst->totalNoise_fx = (Word16) float_to_fix16( sts[0]->hNoiseEst->totalNoise, Q8 ); - for ( int i = 0; i < TOTALNOISE_HIST_SIZE - 1; i++ ) - { - sts[0]->totalNoise_increase_hist_fx[n] = (Word16) float_to_fix16( sts[0]->totalNoise_increase_hist[n], Q8 ); - } resetFdCngEnc_fx( sts[0] ); - sts[0]->last_totalNoise = fix16_to_float( sts[0]->last_totalNoise_fx, Q8 ); - sts[0]->hNoiseEst->totalNoise = fix16_to_float( sts[0]->hNoiseEst->totalNoise_fx, Q8 ); - for ( int i = 0; i < TOTALNOISE_HIST_SIZE - 1; i++ ) - { - sts[0]->totalNoise_increase_hist[n] = fix16_to_float( sts[0]->totalNoise_increase_hist_fx[n], Q8 ); - } - sts[0]->hFdCngEnc->hFdCngCom->init_old_flt = ( sts[0]->hFdCngEnc->hFdCngCom->init_old == 32767 ) ? FLT_MAX : sts[0]->hFdCngEnc->hFdCngCom->init_old_flt; #endif } - + test(); /* Part of DTX to decide if SID/NO_DATA */ - if ( vad_flag_dtx[0] == 0 && sts[0]->ini_frame > 2 ) /* CNG coding starts after 3 frames */ + IF( vad_flag_dtx[0] == 0 && GT_16( sts[0]->ini_frame, 2 ) ) /* CNG coding starts after 3 frames */ { - if ( sts[0]->fd_cng_reset_flag == 0 ) + IF( sts[0]->fd_cng_reset_flag == 0 ) { if ( hCPE != NULL ) { hCPE->element_mode = IVAS_CPE_DFT; + move16(); sts[1]->active_cnt = 0; + move16(); } } - else + ELSE { vad_flag_dtx[0] = 1; + move16(); } } - else + ELSE { vad_flag_dtx[0] = 1; + move16(); } } - if ( PS_out != NULL ) + IF( PS_out_fx != NULL ) { -#if 1 - mvr2r( PS, PS_out, L_FRAME / 2 ); -#else - mvr2r_Word32( PS_fx, PS_out_fx, L_FRAME / 2 ); - fixedToFloat_arrL( PS_out_fx, PS_out, Q_new + QSCALE, 128 ); -#endif + MVR2R_WORD32( PS_fx, PS_out_fx, L_FRAME / 2 ); } - if ( Bin_E_out != NULL ) + IF( Bin_E_out_fx != NULL ) { - mvr2r( Bin_E, Bin_E_out, L_FRAME ); + MVR2R_WORD16( lgBin_E_fx, Bin_E_out_fx, L_FRAME / 2 ); } pop_wmops(); @@ -767,7 +594,9 @@ ivas_error front_vad_create( } set_f( hFrontVad->delay_buf, 0, hFrontVad->delay_samples ); } - +#ifdef IVAS_FLOAT_FIXED + hFrontVad->delay_buf_fx = NULL; +#endif *hFrontVad_out = hFrontVad; return IVAS_ERR_OK; @@ -789,7 +618,7 @@ ivas_error front_vad_create_fx( { return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for Noise estimation\n" ) ); } - noise_est_init_fx( hFrontVad->hNoiseEst ); + noise_est_init_ivas_fx( hFrontVad->hNoiseEst ); IF( ( hFrontVad->hVAD = (VAD_HANDLE) malloc( sizeof( VAD_DATA ) ) ) == NULL ) { @@ -797,16 +626,22 @@ ivas_error front_vad_create_fx( } wb_vad_init_ivas_fx( hFrontVad->hVAD ); - hFrontVad->lp_speech_fx = 23040; /* Initialize the long-term active speech level in dB */ - hFrontVad->lp_noise_fx = 0; /* Initialize the long-term noise level in dB */ + hFrontVad->lp_speech_fx = 23040; // Q9/* Initialize the long-term active speech level in dB */ + move16(); + hFrontVad->lp_noise_fx = 0; /* Initialize the long-term noise level in dB */ + move16(); set16_fx( hFrontVad->mem_decim_fx, 0, shl( L_FILT_MAX, 1 ) ); set16_fx( hFrontVad->buffer_12k8_fx, 0, i_mult( 3, shr( L_FRAME, 1 ) ) ); hFrontVad->mem_preemph_fx = 0; + move16(); hFrontVad->ini_frame = 0; + move16(); hFrontVad->hVAD->vad_flag = 1; + move16(); /* allocate delay buffer to compensate for filterbank delay */ hFrontVad->delay_samples = NS2SA( hEncoderConfig->input_Fs, IVAS_FB_ENC_DELAY_NS ); + move16(); hFrontVad->delay_buf_fx = NULL; IF( GT_16( hFrontVad->delay_samples, 0 ) ) { @@ -817,6 +652,8 @@ ivas_error front_vad_create_fx( set16_fx( hFrontVad->delay_buf_fx, 0, hFrontVad->delay_samples ); } + hFrontVad->delay_buf = NULL; // Inorder to avoid issue in front_vad_distroy_fx call , Will have to be removed later + *hFrontVad_out = hFrontVad; return IVAS_ERR_OK; @@ -853,8 +690,36 @@ void front_vad_destroy( return; } +#ifdef IVAS_FLOAT_FIXED +void front_vad_destroy_fx( + FRONT_VAD_ENC_HANDLE *hFrontVad /* i/o: front-VAD handle */ +) +{ + IF( *hFrontVad != NULL ) + { + free( ( *hFrontVad )->hNoiseEst ); + ( *hFrontVad )->hNoiseEst = NULL; + free( ( *hFrontVad )->hVAD ); + ( *hFrontVad )->hVAD = NULL; + IF( ( *hFrontVad )->delay_buf_fx != NULL ) + { + free( ( *hFrontVad )->delay_buf_fx ); + ( *hFrontVad )->delay_buf_fx = NULL; + } + IF( ( *hFrontVad )->delay_buf != NULL ) + { + free( ( *hFrontVad )->delay_buf ); + ( *hFrontVad )->delay_buf = NULL; + } + free( *hFrontVad ); + *hFrontVad = NULL; + } + + return; +} +#endif /*-----------------------------------------------------------------------------------------* * Function front_vad_spar() * @@ -881,6 +746,7 @@ ivas_error front_vad_spar( int16_t localVAD_HE_SAD[1]; int16_t vad_hover_flag[1]; float band_energies[2 * NB_BANDS]; + Word32 band_energies_fx[2 * NB_BANDS]; int16_t high_lpn_flag; Encoder_State *st; float tmpN[NB_BANDS], tmpE[NB_BANDS]; @@ -942,6 +808,7 @@ ivas_error front_vad_spar( int16_t flag_spitch; float PS[L_FRAME / 2]; + Word32 PS_fx[L_FRAME / 2]; int16_t old_pitch; ivas_error error; #ifdef DUMP_VAD_SPAR @@ -1004,8 +871,18 @@ ivas_error front_vad_spar( delay_signal_float( st->input, input_frame, hFrontVad->delay_buf, hFrontVad->delay_samples ); Word16 Q_inp; Q_inp = Q_factor_arr( st->input, L_FRAME48k ); +#if 0 + FILE *fp = fopen( "input_before.txt", "ab+" ); + for ( int i = 0; i < L_FRAME48k; i++ ) + { + fprintf( fp, "%d\t", i ); + fprintf( fp, "%f\n", st->input[i] ); + } + fclose( fp ); +#endif floatToFixed_arr( st->input, st->input_fx, Q_inp, L_FRAME48k ); - + Word16 Q_add = 0; + move16(); /*------------------------------------------------------------------* * Front-VAD *-----------------------------------------------------------------*/ @@ -1015,11 +892,87 @@ ivas_error front_vad_spar( return error; } #else - Word16 Q_add = 0; - if ( ( error = front_vad_fx( NULL, st, hEncoderConfig, &hFrontVad, 0 /* MCT_flag */, input_frame, vad_flag_dtx, fr_bands, Etot, lf_E, localVAD_HE_SAD, vad_hover_flag, band_energies, &PS[0], &st->Bin_E[0], Q_inp, Q_add ) ) != IVAS_ERR_OK ) + // Word16 n_chan = 1; +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + Word16 front_create_flag = 0; + move16(); + Word16 Qband = -1; + move16(); + Word16 Q_new_old = add( sub( Q_inp, Qband ), Q_add ); + Word16 band_ener_guardbits = find_guarded_bits_fx( 2 * NB_BANDS ); + floatToFixed_arr( hFrontVad->mem_decim, hFrontVad->mem_decim_fx, Q_inp, 90 ); + hFrontVad->mem_preemph_fx = (Word16) floatToFixed( hFrontVad->mem_preemph, Q_inp - 1 ); + Word16 Q_buffer = Q_factor_arr( hFrontVad->buffer_12k8 + L_FFT, L_FFT / 2 ); + floatToFixed_arr( hFrontVad->buffer_12k8, hFrontVad->buffer_12k8_fx, Q_buffer, 384 ); + hFrontVad->hNoiseEst->Etot_h_32fx = (Word32) ( hFrontVad->hNoiseEst->Etot_h * 16777216.0 ); + hFrontVad->hNoiseEst->Etot_l_32fx = (Word32) ( hFrontVad->hNoiseEst->Etot_l * 16777216.0 ); + hFrontVad->hNoiseEst->Etot_l_lp_32fx = (Word32) ( hFrontVad->hNoiseEst->Etot_l_lp * 16777216.0 ); + hFrontVad->hNoiseEst->Etot_last_32fx = (Word32) ( hFrontVad->hNoiseEst->Etot_last * 16777216.0 ); + hFrontVad->hNoiseEst->Etot_v_h2_32fx = (Word32) ( hFrontVad->hNoiseEst->Etot_v_h2 * 16777216.0 ); + hFrontVad->hNoiseEst->Etot_lp_32fx = (Word32) ( hFrontVad->hNoiseEst->Etot_lp * 16777216.0 ); + hFrontVad->hNoiseEst->sign_dyn_lp_32fx = (Word32) ( hFrontVad->hNoiseEst->sign_dyn_lp * 16777216.0 ); + + hFrontVad->lp_noise_fx = (Word16) floatToFixed( hFrontVad->lp_noise, Q8 ); + hFrontVad->lp_speech_fx = (Word16) floatToFixed( hFrontVad->lp_speech, Q8 ); + st->lp_speech_fx = (Word16) floatToFixed( st->lp_speech, Q8 ); + st->lp_noise_fx = (Word16) floatToFixed( st->lp_noise, Q8 ); + floatToFixed_arrL( hFrontVad->hNoiseEst->bckr, hFrontVad->hNoiseEst->bckr_fx, Q_new_old + QSCALE + 2, 20 ); + floatToFixed_arrL( hFrontVad->hNoiseEst->enrO, hFrontVad->hNoiseEst->enrO_fx, Q_new_old + QSCALE + 2, 20 ); + st->flag_noisy_speech_snr_fx = (Word8) st->flag_noisy_speech_snr; + hFrontVad->hVAD->bcg_flux_fx = (Word16) hFrontVad->hVAD->bcg_flux * ( 1 << 4 ); + + + hFrontVad->hVAD->snr_sum_vad_fx = (Word16) ( hFrontVad->hVAD->snr_sum_vad * 32767 ); + hFrontVad->hVAD->prim_act_quick_fx = (Word16) ( hFrontVad->hVAD->prim_act_quick * 32767 ); + hFrontVad->hVAD->prim_act_slow_fx = (Word16) ( hFrontVad->hVAD->prim_act_slow * 32767 ); + hFrontVad->hVAD->prim_act_fx = (Word16) ( hFrontVad->hVAD->prim_act * 32767 ); + hFrontVad->hVAD->prim_act_quick_he_fx = (Word16) ( hFrontVad->hVAD->prim_act_quick_he * 32767 ); + hFrontVad->hVAD->prim_act_slow_he_fx = (Word16) ( hFrontVad->hVAD->prim_act_slow_he * 32767 ); + hFrontVad->hVAD->prim_act_he_fx = (Word16) ( hFrontVad->hVAD->prim_act_he * 32767 ); + floatToFixed_arrL( &band_energies[0], &band_energies_fx[0], Q_new_old + QSCALE + 2, 40 ); +#endif + if ( ( error = front_vad_fx( NULL, st, hEncoderConfig, &hFrontVad, 0 /* MCT_flag */, input_frame, vad_flag_dtx, fr_bands_fx, Etot_fx, lf_E_fx, localVAD_HE_SAD, vad_hover_flag, band_energies_fx, &PS_fx[0], &st->lgBin_E_fx[0], Q_inp, &Q_buffer, Q_add, &front_create_flag ) ) != IVAS_ERR_OK ) { return error; } +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + // fixedToFloat_arrL( &band_energies_LR_fx[0], &band_energies_LR[0], Q_new + QSCALE + 2 - band_ener_guardbits, 40 ); + fixedToFloat_arr( hFrontVad->mem_decim_fx, hFrontVad->mem_decim, Q_inp, 90 ); + hFrontVad->mem_preemph = fixedToFloat( hFrontVad->mem_preemph_fx, Q_inp + Qband ); + fixedToFloat_arr( hFrontVad->buffer_12k8_fx, hFrontVad->buffer_12k8, Q_buffer, 384 ); + fixedToFloat_arrL( fr_bands_fx[0], fr_bands[0], Q_buffer + QSCALE + 2, 40 ); + fixedToFloat_arrL( lf_E_fx[0], lf_E[0], Q_buffer + QSCALE, 148 ); + if ( st->lgBin_E_fx != NULL ) + { + fixedToFloat_arr( st->lgBin_E_fx, st->Bin_E, Q7, 128 ); + } + fixedToFloat_arrL( PS_fx, PS, Q_buffer + QSCALE, 128 ); + Etot[0] = fixedToFloat( Etot_fx[0], Q8 ); + + hFrontVad->hNoiseEst->Etot_h = (float) ( hFrontVad->hNoiseEst->Etot_h_32fx / 16777216.0 ); + hFrontVad->hNoiseEst->Etot_l = (float) ( hFrontVad->hNoiseEst->Etot_l_32fx / 16777216.0 ); + hFrontVad->hNoiseEst->Etot_l_lp = (float) ( hFrontVad->hNoiseEst->Etot_l_lp_32fx / 16777216.0 ); + hFrontVad->hNoiseEst->Etot_last = (float) ( hFrontVad->hNoiseEst->Etot_last_32fx / 16777216.0 ); + // hFrontVad->hNoiseEst->Etot_v_h2 = (float) ( hFrontVad->hNoiseEst->Etot_v_h2_32fx / 16777216.0 ); + hFrontVad->hNoiseEst->Etot_lp = (float) ( hFrontVad->hNoiseEst->Etot_lp_32fx / 16777216.0 ); + // hFrontVad->hNoiseEst->sign_dyn_lp = (float) ( hFrontVad->hNoiseEst->sign_dyn_lp_32fx / 16777216.0 ); + + hFrontVad->hNoiseEst->Etot_v_h2 = (float) ( hFrontVad->hNoiseEst->Etot_v_h2_fx / ( 256.0 ) ); + hFrontVad->hNoiseEst->sign_dyn_lp = (float) ( hFrontVad->hNoiseEst->sign_dyn_lp_fx / ( 256.0 ) ); + hFrontVad->hVAD->bcg_flux = (float) ( hFrontVad->hVAD->bcg_flux_fx / ( 16.0 ) ); + hFrontVad->hVAD->snr_sum_vad = (float) ( hFrontVad->hVAD->snr_sum_vad_fx / 32767.0 ); + hFrontVad->hVAD->prim_act_quick = (float) ( hFrontVad->hVAD->prim_act_quick_fx / 32767.0 ); + hFrontVad->hVAD->prim_act_slow = (float) ( hFrontVad->hVAD->prim_act_slow_fx / 32767.0 ); + hFrontVad->hVAD->prim_act = (float) ( hFrontVad->hVAD->prim_act_fx / 32767.0 ); + hFrontVad->hVAD->prim_act_quick_he = (float) ( hFrontVad->hVAD->prim_act_quick_he_fx / 32767.0 ); + hFrontVad->hVAD->prim_act_slow_he = (float) ( hFrontVad->hVAD->prim_act_slow_he_fx / 32767.0 ); + hFrontVad->hVAD->prim_act_he = (float) ( hFrontVad->hVAD->prim_act_he_fx / 32767.0 ); + // floatToFixed_arr( hFrontVad->mem_decim, hFrontVad->mem_decim_fx, Q_inp, 90 ); + // hFrontVad->mem_preemph_fx = (Word16) floatToFixed( hFrontVad->mem_preemph, Q_inp ); + + // fixedToFloat_arrL( band_energies_fx, band_energies, Q_buffer + QSCALE + 2, 40 ); + fixedToFloat_arrL( &band_energies_fx[0], &band_energies[0], Q_buffer + QSCALE + 2 - band_ener_guardbits, 40 ); +#endif #endif Word32 e_min_scaled; @@ -1304,19 +1257,20 @@ ivas_error front_vad_spar( #if 1 // Word16 relE_fx; // Word16 lsp_new_fx[M]; - Word32 PS_fx[128]; + // Word32 PS_fx[128]; // Word32 epsP_fx[M + 1]; - cor_map_sum_fx = (Word16) floatToFixed( cor_map_sum, Q8 ); - Word16 non_sta_fx = (Word16) floatToFixed( non_staX, Q8 ); + cor_map_sum_fx = float_to_fix16( cor_map_sum, Q8 ); + Word16 non_sta_fx = float_to_fix16( non_staX, Q6 ); // Word32 epsP_fx[M + 1]; - Word16 Etot_fx_0 = (Word16) floatToFixed( Etot[0], Q8 ); + Word16 Etot_fx_0 = float_to_fix16( Etot[0], Q8 ); floatToFixed_arr( lsp_new, lsp_new_fx, Q15, M ); - hSpMusClas->wdlp_0_95_sp_fx = (Word16) floatToFixed( hSpMusClas->wdlp_0_95_sp, Q8 ); + hSpMusClas->wdlp_0_95_sp_fx = float_to_fix16( hSpMusClas->wdlp_0_95_sp, Q8 ); hSpMusClas->wdlp_xtalk_fx = floatToFixed( hSpMusClas->wdlp_xtalk, Q19 ); - hSpMusClas->wrise_fx = (Word16) ( hSpMusClas->wrise * ONE_IN_Q9 ); - relE_fx = (Word16) ( relE * ONE_IN_Q9 ); - hSpMusClas->prev_relE_fx = (Word16) ( hSpMusClas->prev_relE * ONE_IN_Q9 ); - hSpMusClas->relE_attack_sum_fx = (Word16) ( hSpMusClas->relE_attack_sum * ONE_IN_Q9 ); + hSpMusClas->wrise_fx = float_to_fix16( hSpMusClas->wrise, 9 ); + relE_fx = float_to_fix16( relE, 8 ); + floatToFixed_arr16( st->voicing, st->voicing_fx, 15, 3 ); + hSpMusClas->prev_relE_fx = float_to_fix16( hSpMusClas->prev_relE, 8 ); + hSpMusClas->relE_attack_sum_fx = float_to_fix16( hSpMusClas->relE_attack_sum, 8 ); Word16 Qfact_PS = Q_factor_arrL( PS, 128 ); floatToFixed_arr32( PS, PS_fx, Qfact_PS, 128 ); Word16 e_esp; @@ -1324,25 +1278,33 @@ ivas_error front_vad_spar( Q_esp = sub( 31, e_esp ); Word16 Qfact_PS_past = Q_factor_arrL( hSpMusClas->past_PS, 67 ); floatToFixed_arr32( hSpMusClas->past_PS, hSpMusClas->past_PS_fx, Qfact_PS_past, 67 ); + hSpMusClas->dlp_var_LT_fx = float_to_fix( hSpMusClas->dlp_var_LT, Q19 ); + hSpMusClas->dlp_mean_LT_fx = float_to_fix( hSpMusClas->dlp_mean_LT, Q19 ); + hSpMusClas->dlp_mean_ST_fx = float_to_fix( hSpMusClas->dlp_mean_ST, Q19 ); + floatToFixed_arr32( hSpMusClas->past_dlp_mean_ST, hSpMusClas->past_dlp_mean_ST_fx, Q19, 7 ); + floatToFixed_arr32( hSpMusClas->prev_FV, hSpMusClas->prev_FV_fx, Q20, 15 ); #endif ivas_smc_gmm_fx( st, NULL, localVAD_HE_SAD[0], Etot_fx_0, lsp_new_fx, cor_map_sum_fx, epsP_fx, PS_fx, non_sta_fx, relE_fx, &high_lpn_flag, flag_spitch, Qfact_PS, Q_esp, Qfact_PS_past ); #if 1 fixedToFloat_arr( hSpMusClas->past_dlp_fx, hSpMusClas->past_dlp, Q9, HANG_LEN - 1 ); - hSpMusClas->lpm = (Word16) fixedToFloat( hSpMusClas->lpm_fx, Q8 ); // Q8 - hSpMusClas->lps = (Word16) fixedToFloat( hSpMusClas->lps_fx, Q8 ); // Q8 - hSpMusClas->lpn = (Word16) fixedToFloat( hSpMusClas->lpn_fx, Q8 ); // Q8 - hSpMusClas->wdrop = (Word16) fixedToFloat( hSpMusClas->wdrop_fx, Q9 ); // Q8 - hSpMusClas->wrise = (Word16) fixedToFloat( hSpMusClas->wrise_fx, Q9 ); // Q8 - hSpMusClas->lt_dec_thres = (Word16) fixedToFloat( hSpMusClas->lt_dec_thres_fx, Q9 ); // Q8 + hSpMusClas->lpm = fixedToFloat( hSpMusClas->lpm_fx, Q7 ); // Q7 + hSpMusClas->lps = fixedToFloat( hSpMusClas->lps_fx, Q7 ); // Q7 + hSpMusClas->lpn = fixedToFloat( hSpMusClas->lpn_fx, Q7 ); // Q7 + hSpMusClas->wdrop = fixedToFloat( hSpMusClas->wdrop_fx, Q9 ); // Q8 + hSpMusClas->wrise = fixedToFloat( hSpMusClas->wrise_fx, Q9 ); // Q8 + hSpMusClas->lt_dec_thres = fixedToFloat( hSpMusClas->lt_dec_thres_fx, Q9 ); // Q8 hSpMusClas->wdlp_0_95_sp = fixedToFloat( hSpMusClas->wdlp_0_95_sp_fx, Q8 ); hSpMusClas->dlp_mean_LT = fixedToFloat_32( hSpMusClas->dlp_mean_LT_fx, Q19 ); hSpMusClas->wdlp_xtalk = fixedToFloat( hSpMusClas->wdlp_xtalk_fx, Q19 ); hSpMusClas->dlp_var_LT = fixedToFloat_32( hSpMusClas->dlp_var_LT_fx, Q19 ); - hSpMusClas->prev_relE = (Word16) fixedToFloat( hSpMusClas->prev_relE_fx, Q9 ); - hSpMusClas->prev_Etot = (Word16) fixedToFloat( hSpMusClas->prev_Etot_fx, Q8 ); + hSpMusClas->prev_relE = fixedToFloat( hSpMusClas->prev_relE_fx, Q8 ); + hSpMusClas->prev_Etot = fixedToFloat( hSpMusClas->prev_Etot_fx, Q8 ); fixedToFloat_arrL32( hSpMusClas->past_PS_fx, hSpMusClas->past_PS, Qfact_PS_past, 67 ); - hSpMusClas->relE_attack_sum = (Word16) fixedToFloat( hSpMusClas->relE_attack_sum_fx, Q9 ); + hSpMusClas->relE_attack_sum = fixedToFloat( hSpMusClas->relE_attack_sum_fx, Q8 ); fixedToFloat_arrL32( hSpMusClas->FV_st_fx, hSpMusClas->FV_st, Q20, 15 ); + hSpMusClas->dlp_mean_ST = fixedToFloat( hSpMusClas->dlp_mean_ST_fx, Q19 ); + fixedToFloat_arrL32( hSpMusClas->past_dlp_mean_ST_fx, hSpMusClas->past_dlp_mean_ST, Q19, 7 ); + fixedToFloat_arrL32( hSpMusClas->prev_FV_fx, hSpMusClas->prev_FV, Q20, 15 ); #endif #endif #if 0 diff --git a/lib_enc/ivas_stat_enc.h b/lib_enc/ivas_stat_enc.h index b87c9d058eea6528491a86fc458d026c0bebbf99..6d42df9331138e9bcf51673e4eff9267791dabbb 100644 --- a/lib_enc/ivas_stat_enc.h +++ b/lib_enc/ivas_stat_enc.h @@ -77,8 +77,7 @@ typedef struct stereo_itd_data_struct Word32 prev_avg_max_fx; Word16 prev_avg_max_fx_e; float currFlatness; - Word32 currFlatness_fx; - Word16 currFlatness_fx_e; + Word16 currFlatness_fx; /* Xtalk classifier */ float prev_m1; @@ -801,7 +800,7 @@ typedef struct ivas_stereo_classifier_data_structure Word32 xtalk_wscore_fx; // Q31 Word32 xtalk_score_fx; // Q31 Word32 xtalk_score_wrelE_fx; // Q31 - Word32 is_speech_fx; + Word32 is_speech_fx; // Q9 } STEREO_CLASSIF_DATA, *STEREO_CLASSIF_HANDLE; diff --git a/lib_enc/ivas_stereo_classifier.c b/lib_enc/ivas_stereo_classifier.c index 5621385e78c9e896d115c7e34b6051fc0bca02d1..adeeb1ff6ea1aaa3167d2207382fecac2da10f93 100644 --- a/lib_enc/ivas_stereo_classifier.c +++ b/lib_enc/ivas_stereo_classifier.c @@ -106,6 +106,179 @@ static float redge_detect( const float *inp, const int16_t len, const float inp_ *-------------------------------------------------------------------*/ /*! r: element mode */ +#ifdef IVAS_FLOAT_FIXED +Word16 select_stereo_mode( + CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ + const IVAS_FORMAT ivas_format /* i : IVAS format */ +) +{ + Word16 element_mode; + STEREO_CLASSIF_HANDLE hStereoClassif; + Word16 is_speech; + Word16 stereo_switching_flag; + + /* initialization */ + element_mode = hCPE->element_mode; + hStereoClassif = hCPE->hStereoClassif; + + /* set binary flag to prevent LRTD mode on music */ + hStereoClassif->is_speech_fx = L_add( Mpy_32_32( hStereoClassif->is_speech_fx, 2083059139 ), Mpy_32_16_1( 64424509, hCPE->hCoreCoder[0]->hSpMusClas->past_dlp_fx[0] ) ); /* (((Q25*Q31) << 1) >> 32) -> Q25 + (((Q31*Q9) << 1) >> 16) -> Q25*/ + move32(); + is_speech = ( LT_32( hStereoClassif->is_speech_fx, ONE_IN_Q25 ) && hCPE->hCoreCoder[0]->hSpMusClas->wdlp_xtalk_fx < 0 ); + move16(); + + /* set binary flag indicating LRTD mode based on unclr/xtalk classifiers' decisions */ + hStereoClassif->prev_lrtd_mode = hStereoClassif->lrtd_mode; + move16(); + hStereoClassif->unclr_decision = ( hStereoClassif->unclr_decision && hCPE->hCoreCoder[0]->flag_noisy_speech_snr == 0 && GT_32( hCPE->element_brate, IVAS_16k4 ) ); + move16(); + hStereoClassif->lrtd_mode = ( ( hStereoClassif->unclr_decision | hStereoClassif->xtalk_decision ) && is_speech ); + move16(); + + stereo_switching_flag = 1; + move16(); + + test(); + test(); + IF( GE_32( hCPE->element_brate, MIN_BRATE_MDCT_STEREO ) || ( ( EQ_16( ivas_format, MASA_FORMAT ) || EQ_16( ivas_format, MASA_ISM_FORMAT ) ) && LT_32( hCPE->element_brate, MASA_STEREO_MIN_BITRATE ) ) ) + { + stereo_switching_flag = 0; + move16(); + } + test(); + IF( GE_32( hCPE->element_brate, MIN_BRATE_MDCT_STEREO ) ) + { + hStereoClassif->prev_lrtd_mode = 0; + move16(); + hStereoClassif->lrtd_mode = 0; + move16(); + element_mode = IVAS_CPE_MDCT; + move16(); + } + ELSE IF( LT_32( hCPE->element_brate, MIN_BRATE_MDCT_STEREO ) && EQ_16( hCPE->last_element_mode, IVAS_CPE_MDCT ) ) + { + hStereoClassif->lrtd_mode = 0; + move16(); + element_mode = IVAS_CPE_DFT; + move16(); + test(); + test(); + test(); + test(); + test(); + IF( EQ_16( stereo_switching_flag, 1 ) && GT_32( hCPE->element_brate, IVAS_13k2 ) && LT_16( hCPE->hCoreCoder[0]->hSpMusClas->past_dlp_fx[0], ONE_IN_Q9 ) && LT_32( hCPE->hCoreCoder[0]->hSpMusClas->wdlp_xtalk_fx, -5243 /* -0.01 in Q19*/ ) && EQ_16( hCPE->hCoreCoder[0]->vad_flag, 1 ) && ( hCPE->hStereoMdct->sw_uncorr || hStereoClassif->xtalk_decision ) ) + { + hStereoClassif->lrtd_mode = 1; + move16(); + element_mode = IVAS_CPE_TD; + move16(); + } + } + + /* set the element mode */ + test(); + IF( EQ_16( hStereoClassif->lrtd_mode, 1 ) && EQ_16( stereo_switching_flag, 1 ) ) + { + element_mode = IVAS_CPE_TD; + move16(); + } + ELSE IF( LT_16( element_mode, IVAS_CPE_MDCT ) ) + { + IF( EQ_16( stereo_switching_flag, 0 ) ) + { + test(); + test(); + IF( ( EQ_16( ivas_format, MASA_FORMAT ) || EQ_16( ivas_format, MASA_ISM_FORMAT ) ) && LT_32( hCPE->element_brate, MASA_STEREO_MIN_BITRATE ) ) + { + element_mode = IVAS_CPE_DFT; + move16(); + } + } + ELSE IF( EQ_16( element_mode, IVAS_CPE_TD ) ) + { + test(); + IF( hCPE->hStereoTD->prev_fr_LRTD_TD_dec > 0 && is_speech ) + { + /* if unclr_decision goes from 1->0 on active content, continue in LRTD mode */ + hStereoClassif->lrtd_mode = 1; + move16(); + } + ELSE IF( EQ_16( stereo_switching_flag, 1 ) ) + { + element_mode = IVAS_CPE_DFT; + move16(); + } + } + ELSE IF( EQ_16( stereo_switching_flag, 1 ) ) + { + element_mode = IVAS_CPE_DFT; + move16(); + } + } + + /* switch from LRTD to DFT when xtalk_decision goes from 0->1 (note: this special case is not handled in the xtalk classifier) */ + test(); + test(); + IF( EQ_16( hCPE->last_element_mode, IVAS_CPE_TD ) && EQ_16( element_mode, IVAS_CPE_TD ) && EQ_16( hStereoClassif->xtalk_decision, 1 ) ) + { + test(); + test(); + test(); + test(); + test(); + IF( hCPE->hStereoTD->prev_fr_LRTD_TD_dec == 0 && GT_16( hCPE->hStereoTD->tdm_FD2LRTD_SW_cnt, 15 ) && GT_16( hCPE->hStereoTD->tdm_last_LRTD_frame_cnt, 3 ) && LT_16( hCPE->hCoreCoder[0]->clas, VOICED_CLAS ) && ( GE_32( hCPE->element_brate, IVAS_16k4 ) || LT_32( hStereoClassif->xtalk_wscore_fx, 21474836 /*0.01f in Q31*/ ) ) ) + { + IF( EQ_16( stereo_switching_flag, 1 ) ) + { + element_mode = IVAS_CPE_DFT; + move16(); + } + hStereoClassif->xtalk_decision = 0; + move16(); + hStereoClassif->lrtd_mode = 0; + move16(); + } + } + + IF( NE_16( hCPE->last_element_mode, element_mode ) ) + { + test(); + IF( NE_16( hCPE->last_element_mode, IVAS_CPE_DFT ) && NE_16( hCPE->last_element_mode, IVAS_CPE_TD ) ) + { + Word16 lrtd_mode = hStereoClassif->lrtd_mode; + move16(); + + /* reset stereo classifier when switching from MDCT stereo to Unified stereo */ + stereo_classifier_init( hCPE->hStereoClassif ); +#ifdef IVAS_FLOAT_FIXED + stereo_classifier_init_fx( hCPE->hStereoClassif ); +#endif + hStereoClassif->lrtd_mode = lrtd_mode; + move16(); + } + ELSE + { + /* reset UNCLR classifier parameters */ + set32_fx( hStereoClassif->unclr_fv_fx, -MAX_16, SSC_MAX_NFEA ); + hStereoClassif->unclr_corrLagMax_prev = 0; + move16(); + + /* reset xtalk classifier parameters */ + set32_fx( hStereoClassif->xtalk_fv_fx, -MAX_16, SSC_MAX_NFEA ); + } + } + test(); + IF( EQ_16( element_mode, IVAS_CPE_TD ) && hCPE->hCoreCoder[0]->Opt_DTX_ON ) + { + hCPE->hStereoCng->td_active = 1; + move16(); + hCPE->hStereoCng->first_SID_after_TD = 1; + move16(); + } + + return ( element_mode ); +} +#else int16_t select_stereo_mode( CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ const IVAS_FORMAT ivas_format /* i : IVAS format */ @@ -233,7 +406,7 @@ int16_t select_stereo_mode( return ( element_mode ); } - +#endif /*-------------------------------------------------------------------* * Function stereo_classifier_init() diff --git a/lib_enc/ivas_stereo_dft_enc.c b/lib_enc/ivas_stereo_dft_enc.c index c0d84f5bed614538f1c84f0b050ef04eb9c2e311..e6b1449b63b129ec14f001b94f453759f2c80989 100644 --- a/lib_enc/ivas_stereo_dft_enc.c +++ b/lib_enc/ivas_stereo_dft_enc.c @@ -1324,7 +1324,6 @@ void stereo_enc_itd_init_fx( hItd->prev_avg_max_fx = 0; hItd->prev_avg_max_fx_e = 0; hItd->currFlatness_fx = 0; - hItd->currFlatness_fx_e = 0; /* Xtalk classifier */ hItd->prev_m1_fx = 0; diff --git a/lib_enc/ivas_stereo_dft_enc_itd.c b/lib_enc/ivas_stereo_dft_enc_itd.c index badadc4bba5e960397a4e157683430f78c649c5f..5dd6f6dbe74ccac033355631895536c214422cbd 100644 --- a/lib_enc/ivas_stereo_dft_enc_itd.c +++ b/lib_enc/ivas_stereo_dft_enc_itd.c @@ -1890,7 +1890,8 @@ void stereo_dft_enc_compute_itd_fx( L_temp_e = add( L_temp_e, L_temp2_e ); sfm_R = BASOP_Util_Divide3232_Scale_cadence( L_temp, sum_abs_R, &sfm_L_e ); sfm_R_e = add( sfm_L_e, sub( L_temp_e, sum_abs_R_e ) ); - sfm_R = L_shl_r( sfm_R, sfm_R_e ); // Q31 + // sfm_R = L_shl_r( sfm_R, sfm_R_e ); // Q31 + sfm_R = L_shl_sat( sfm_R, sfm_R_e ); // Q31 } if ( sfm_R > sfm_L ) @@ -2565,7 +2566,7 @@ void stereo_dft_enc_compute_itd_fx( test(); test(); test(); - IF( flag_noisy_speech_snr == 0 && hCPE->hCoreCoder[0]->vad_flag == 1 && hItd->detected_itd_flag == 0 && ( BASOP_Util_Cmp_Mant32Exp( hItd->currFlatness_fx, hItd->currFlatness_fx_e, 1610612736, 1 ) < 0 || hCPE->hCoreCoder[0]->sp_aud_decision0 == 1 ) ) + IF( flag_noisy_speech_snr == 0 && hCPE->hCoreCoder[0]->vad_flag == 1 && hItd->detected_itd_flag == 0 && ( LT_16( hItd->currFlatness_fx, 192 ) /* 1.5 in Q7*/ || hCPE->hCoreCoder[0]->sp_aud_decision0 == 1 ) ) { // hItd->itd_thres *= 1.5f; hItd->itd_thres_fx = L_shl_sat( Mpy_32_32( hItd->itd_thres_fx, 1610612736 ), 1 ); /* Saturation added to avoid assertions (this needs to be investigated) */ diff --git a/lib_enc/ivas_stereo_ica_enc.c b/lib_enc/ivas_stereo_ica_enc.c index 6f80d59c44d66e9e06c4115b081cb22865fdb023..f35a9a3f876732994500bb04637e69476a451a74 100644 --- a/lib_enc/ivas_stereo_ica_enc.c +++ b/lib_enc/ivas_stereo_ica_enc.c @@ -2416,7 +2416,7 @@ void stereo_tca_enc( #endif utilCrossCorr_fx( ptrChanL_fx, ptrChanL_q, ptrChanR_fx, ptrChanR_q, NULL, &tempF1_fx, &tempF1_exp, tempLag, input_frame, 0 ); #ifdef IVAS_FLOAT_FIXED_CONVERSIONS - tempF1 = fixedToFloat_32( tempF1_fx, Q31 - tempF1_exp ); + tempF1 = fixedToFloat( tempF1_fx, Q31 - tempF1_exp ); #endif #else utilCrossCorr( ptrChanL, ptrChanR, NULL, &tempF1, tempLag, input_frame, 0 ); diff --git a/lib_enc/ivas_stereo_switching_enc.c b/lib_enc/ivas_stereo_switching_enc.c index c817a366b62acf3749e3ede3204bb8c526496040..da6aeb8c72503dbe0b8e864278840d652e6a9b78 100644 --- a/lib_enc/ivas_stereo_switching_enc.c +++ b/lib_enc/ivas_stereo_switching_enc.c @@ -36,13 +36,91 @@ #include "rom_com.h" #include "prot.h" #include "ivas_prot.h" +#ifdef IVAS_FLOAT_FIXED +#include "ivas_rom_com_fx.h" +#endif #include "ivas_rom_com.h" #include "assert.h" #include "wmc_auto.h" #ifdef IVAS_FLOAT_FIXED #include "prot_fx_enc.h" +#include "prot_fx.h" +#endif + +#ifdef IVAS_FLOAT_FIXED +/*-------------------------------------------------------------------* + * Function allocate_CoreCoder_enc() + * + * Allocate CoreCoder modules + *-------------------------------------------------------------------*/ + +static ivas_error allocate_CoreCoder_enc_fx( + ENC_CORE_HANDLE st /* i/o: Core encoder state structure */ +) +{ + IF( st->hLPDmem == NULL && st->element_mode != IVAS_CPE_MDCT ) + { + IF( ( st->hLPDmem = (LPD_state_HANDLE) malloc( sizeof( LPD_state ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for LPDmem\n" ) ); + } +#ifdef IVAS_FLOAT_FIXED + LPDmem_enc_init_fx( st->hLPDmem ); +#endif + LPDmem_enc_init( st->hLPDmem ); + } + + IF( st->hGSCEnc == NULL && st->element_mode != IVAS_CPE_MDCT ) + { + IF( ( st->hGSCEnc = (GSC_ENC_HANDLE) malloc( sizeof( GSC_ENC_DATA ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for GSC\n" ) ); + } +#ifdef IVAS_FLOAT_FIXED + GSC_enc_init_fx( st->hGSCEnc ); +#endif + GSC_enc_init( st->hGSCEnc ); + } + + IF( st->hNoiseEst == NULL ) + { + IF( ( st->hNoiseEst = (NOISE_EST_HANDLE) malloc( sizeof( NOISE_EST_DATA ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for Noise estimation\n" ) ); + } +#ifdef IVAS_FLOAT_FIXED + noise_est_init_ivas_fx( st->hNoiseEst ); +#endif + noise_est_init( st->hNoiseEst ); + } + + IF( st->hVAD == NULL ) + { + IF( ( st->hVAD = (VAD_HANDLE) malloc( sizeof( VAD_DATA ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for VAD\n" ) ); + } +#ifdef IVAS_FLOAT_FIXED + wb_vad_init_fx( st->hVAD ); +#endif + wb_vad_init( st->hVAD ); + } + + IF( st->hSpMusClas == NULL ) + { + IF( ( st->hSpMusClas = (SP_MUS_CLAS_HANDLE) malloc( sizeof( SP_MUS_CLAS_DATA ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for Speech/music classifier\n" ) ); + } +#ifdef IVAS_FLOAT_FIXED + speech_music_clas_init_fx( st->hSpMusClas ); #endif + speech_music_clas_init( st->hSpMusClas ); + } + return IVAS_ERR_OK; +} +#endif /*-------------------------------------------------------------------* * Function allocate_CoreCoder_enc() * @@ -101,7 +179,6 @@ static ivas_error allocate_CoreCoder_enc( return IVAS_ERR_OK; } - /*-------------------------------------------------------------------* * Function deallocate_CoreCoder_TCX_enc() * @@ -112,25 +189,25 @@ static void deallocate_CoreCoder_TCX_enc( ENC_CORE_HANDLE st /* i/o: Core encoder state structure */ ) { - if ( st->hTcxEnc != NULL ) + IF( st->hTcxEnc != NULL ) { free( st->hTcxEnc ); st->hTcxEnc = NULL; } - if ( st->hTcxCfg != NULL ) + IF( st->hTcxCfg != NULL ) { free( st->hTcxCfg ); st->hTcxCfg = NULL; } - if ( st->hIGFEnc != NULL ) + IF( st->hIGFEnc != NULL ) { free( st->hIGFEnc ); st->hIGFEnc = NULL; } - if ( st->hHQ_core != NULL ) + IF( st->hHQ_core != NULL ) { free( st->hHQ_core ); st->hHQ_core = NULL; @@ -140,76 +217,492 @@ static void deallocate_CoreCoder_TCX_enc( } -/*-------------------------------------------------------------------* - * Function deallocate_CoreCoder_enc() - * - * Deallocate CoreCoder modules - *-------------------------------------------------------------------*/ +/*-------------------------------------------------------------------* + * Function deallocate_CoreCoder_enc() + * + * Deallocate CoreCoder modules + *-------------------------------------------------------------------*/ + +static void deallocate_CoreCoder_enc( + ENC_CORE_HANDLE st /* i/o: Core encoder state structure */ +) +{ + IF( st->hLPDmem != NULL ) + { + free( st->hLPDmem ); + st->hLPDmem = NULL; + } + + IF( st->hGSCEnc != NULL ) + { + free( st->hGSCEnc ); + st->hGSCEnc = NULL; + } + + IF( st->hNoiseEst != NULL && NE_16( st->element_mode, IVAS_CPE_MDCT ) ) + { + free( st->hNoiseEst ); + st->hNoiseEst = NULL; + } + + IF( st->hVAD != NULL && NE_16( st->element_mode, IVAS_CPE_MDCT ) ) + { + free( st->hVAD ); + st->hVAD = NULL; + } + + IF( st->hSpMusClas != NULL && NE_16( st->element_mode, IVAS_CPE_MDCT ) ) + { + free( st->hSpMusClas ); + st->hSpMusClas = NULL; + } + + IF( st->cldfbAnaEnc != NULL ) + { + deleteCldfb_ivas( &st->cldfbAnaEnc ); + } + + IF( st->hBWE_TD != NULL ) + { + free( st->hBWE_TD ); + st->hBWE_TD = NULL; + } + + IF( st->cldfbSynTd != NULL ) + { + deleteCldfb_ivas( &st->cldfbSynTd ); + } + + IF( st->hBWE_FD != NULL ) + { + free( st->hBWE_FD ); + st->hBWE_FD = NULL; + } + + IF( st->element_mode != IVAS_CPE_MDCT ) + { + deallocate_CoreCoder_TCX_enc( st ); + } + + return; +} + +#ifdef IVAS_FLOAT_FIXED +/*-------------------------------------------------------------------* + * Function stereo_memory_enc() + * + * Dynamically allocate/deallocate data structures depending on the actual CPE mode + *-------------------------------------------------------------------*/ + +ivas_error stereo_memory_enc_fx( + CPE_ENC_HANDLE hCPE, /* i : CPE encoder structure */ + const Word32 input_Fs, /* i : input sampling rate */ + const Word16 max_bwidth, /* i : maximum audio bandwidth */ + const IVAS_FORMAT ivas_format, /* i : ivas format */ + const Word16 nchan_transport /* i : number transport chans */ +) +{ + Encoder_State *st; + ivas_error error; + + error = IVAS_ERR_OK; + move32(); + + assert( hCPE->last_element_mode >= IVAS_CPE_DFT && "Switching from SCE to CPE is not a valid configuration!" ); + + /*--------------------------------------------------------------* + * save parameters from structures that will be freed + *---------------------------------------------------------------*/ + test(); + IF( hCPE->hStereoTCA != NULL && EQ_16( hCPE->last_element_mode, IVAS_CPE_DFT ) ) + { + set16_fx( hCPE->hStereoTCA->prevCorrLagStats, (Word16) hCPE->hStereoDft->hItd->itd[1], 3 ); + IF( hCPE->hStereoDft->hItd->itd[1] >= 0 ) + { + hCPE->hStereoTCA->prevRefChanIndx = L_CH_INDX; + } + ELSE + { + hCPE->hStereoTCA->prevRefChanIndx = R_CH_INDX; + } + move16(); + } + + /*--------------------------------------------------------------* + * allocate/deallocate data structures + *---------------------------------------------------------------*/ + + IF( NE_16( hCPE->element_mode, hCPE->last_element_mode ) ) + { + /*--------------------------------------------------------------* + * switching CPE mode to DFT stereo + *---------------------------------------------------------------*/ + + IF( EQ_16( hCPE->element_mode, IVAS_CPE_DFT ) ) + { + /* deallocate data structure of the previous CPE mode */ + IF( hCPE->hStereoTD != NULL ) + { + free( hCPE->hStereoTD ); + hCPE->hStereoTD = NULL; + } + + IF( hCPE->hStereoMdct != NULL ) + { + stereo_mdct_enc_destroy( &( hCPE->hStereoMdct ) ); + hCPE->hStereoMdct = NULL; + } + + /* deallocate CoreCoder secondary channel */ + deallocate_CoreCoder_enc( hCPE->hCoreCoder[1] ); + + /* allocate DFT stereo data structure */ + IF( NE_32( ( error = stereo_dft_enc_create( &( hCPE->hStereoDft ), input_Fs, max_bwidth ) ), IVAS_ERR_OK ) ) + { + return error; + } + + /* allocate ICBWE structure */ + IF( hCPE->hStereoICBWE == NULL ) + { + IF( ( hCPE->hStereoICBWE = (STEREO_ICBWE_ENC_HANDLE) malloc( sizeof( STEREO_ICBWE_ENC_DATA ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for Stereo ICBWE \n" ) ); + } + + stereo_icBWE_init_enc( hCPE->hStereoICBWE ); + } + + /* allocate HQ core in M channel */ + st = hCPE->hCoreCoder[0]; + IF( st->hHQ_core == NULL ) + { + IF( ( st->hHQ_core = (HQ_ENC_HANDLE) malloc( sizeof( HQ_ENC_DATA ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for HQ core\n" ) ); + } + + HQ_core_enc_init( st->hHQ_core ); + } + } + + /*--------------------------------------------------------------* + * switching CPE mode to TD stereo + *---------------------------------------------------------------*/ + + IF( EQ_16( hCPE->element_mode, IVAS_CPE_TD ) ) + { + /* deallocate data structure of the previous CPE mode */ + IF( hCPE->hStereoDft != NULL ) + { + stereo_dft_enc_destroy( &( hCPE->hStereoDft ) ); + hCPE->hStereoDft = NULL; + } + + IF( hCPE->hStereoMdct != NULL ) + { + stereo_mdct_enc_destroy( &( hCPE->hStereoMdct ) ); + hCPE->hStereoMdct = NULL; + } + + /* deallocated TCX/IGF structures for second channel */ + deallocate_CoreCoder_TCX_enc( hCPE->hCoreCoder[1] ); + + /* allocate TD stereo data structure */ + IF( hCPE->hStereoTD != NULL ) + { + return IVAS_ERROR( IVAS_ERR_INTERNAL_FATAL, "Error: TD Stereo memory already allocated\n" ); + } + + IF( ( hCPE->hStereoTD = (STEREO_TD_ENC_DATA_HANDLE) malloc( sizeof( STEREO_TD_ENC_DATA ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for TD Stereo\n" ) ); + } + + stereo_td_init_enc( hCPE->hStereoTD, hCPE->last_element_mode ); + /* allocate secondary channel */ +#ifdef IVAS_FLOAT_FIXED + IF( NE_32( ( error = allocate_CoreCoder_enc_fx( hCPE->hCoreCoder[1] ) ), IVAS_ERR_OK ) ) +#endif + IF( ( error = allocate_CoreCoder_enc( hCPE->hCoreCoder[1] ) ) != IVAS_ERR_OK ) + { + return error; + } + } + + /*--------------------------------------------------------------* + * allocate DFT/TD stereo structures after MDCT stereo frame + *---------------------------------------------------------------*/ + test(); + test(); + IF( EQ_16( hCPE->last_element_mode, IVAS_CPE_MDCT ) && ( EQ_16( hCPE->element_mode, IVAS_CPE_DFT ) || EQ_16( hCPE->element_mode, IVAS_CPE_TD ) ) ) + { + /* Deallocate MDCT CNG structures */ + deleteCldfb_ivas( &hCPE->hCoreCoder[0]->cldfbAnaEnc ); + deleteCldfb_ivas( &hCPE->hCoreCoder[1]->cldfbAnaEnc ); + + IF( EQ_16( hCPE->element_mode, IVAS_CPE_DFT ) ) + { + IF( hCPE->hCoreCoder[1]->hDtxEnc != NULL ) + { + free( hCPE->hCoreCoder[1]->hDtxEnc ); + hCPE->hCoreCoder[1]->hDtxEnc = NULL; + } + + IF( hCPE->hCoreCoder[1]->hFdCngEnc != NULL ) + { + deleteFdCngEnc( &hCPE->hCoreCoder[1]->hFdCngEnc ); + } + } + + IF( hCPE->hCoreCoder[0]->Opt_DTX_ON && hCPE->hCoreCoder[0]->hTdCngEnc == NULL ) + { + IF( ( hCPE->hCoreCoder[0]->hTdCngEnc = (TD_CNG_ENC_HANDLE) malloc( sizeof( TD_CNG_ENC_DATA ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DTX/TD CNG\n" ) ); + } + + td_cng_enc_init( hCPE->hCoreCoder[0]->hTdCngEnc, hCPE->hCoreCoder[0]->Opt_DTX_ON, hCPE->hCoreCoder[0]->max_bwidth ); + } + + /* allocate TCA data structure */ + IF( hCPE->hStereoTCA != NULL ) + { + return IVAS_ERROR( IVAS_ERR_INTERNAL_FATAL, "Error: TCA Stereo memory already allocated\n" ); + } + + IF( ( hCPE->hStereoTCA = (STEREO_TCA_ENC_HANDLE) malloc( sizeof( STEREO_TCA_ENC_DATA ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for Stereo TCA\n" ) ); + } + + stereo_tca_init_enc( hCPE->hStereoTCA, input_Fs ); + + st = hCPE->hCoreCoder[0]; + + /* allocate primary channel substructures */ + IF( NE_32( ( error = allocate_CoreCoder_enc( st ) ), IVAS_ERR_OK ) ) + { + return error; + } + + /* allocate CLDFB for primary channel */ + IF( st->cldfbAnaEnc == NULL ) + { + IF( NE_32( ( error = openCldfb_ivas_enc( &st->cldfbAnaEnc, CLDFB_ANALYSIS, input_Fs, CLDFB_PROTOTYPE_1_25MS ) ), IVAS_ERR_OK ) ) + { + return error; + } + } + + /* allocate BWEs for primary channel */ + IF( st->hBWE_TD == NULL ) + { + IF( ( st->hBWE_TD = (TD_BWE_ENC_HANDLE) malloc( sizeof( TD_BWE_ENC_DATA ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for TD BWE\n" ) ); + } + + IF( st->cldfbSynTd == NULL ) + { + IF( NE_32( ( error = openCldfb_ivas_enc( &st->cldfbSynTd, CLDFB_SYNTHESIS, 16000, CLDFB_PROTOTYPE_1_25MS ) ), IVAS_ERR_OK ) ) + { + return error; + } + } + + InitSWBencBuffer( st->hBWE_TD ); + InitSWBencBuffer_fx( st ); + ResetSHBbuffer_Enc( st->hBWE_TD ); + + IF( ( st->hBWE_FD = (FD_BWE_ENC_HANDLE) malloc( sizeof( FD_BWE_ENC_DATA ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for FD BWE\n" ) ); + } + + fd_bwe_enc_init( st->hBWE_FD ); + } + + /* allocate stereo CNG structure */ + IF( hCPE->hStereoCng == NULL ) + { + IF( ( hCPE->hStereoCng = (STEREO_CNG_ENC_HANDLE) malloc( sizeof( STEREO_CNG_ENC ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for Stereo Cng for Unified/TD\n" ) ); + } + + stereo_enc_cng_init( hCPE->hStereoCng ); + } + } + + /*--------------------------------------------------------------* + * switching CPE mode to MDCT stereo + *---------------------------------------------------------------*/ + + IF( EQ_16( hCPE->element_mode, IVAS_CPE_MDCT ) ) + { + Word16 i; + + /* deallocate data structure of the previous CPE mode */ + IF( hCPE->hStereoDft != NULL ) + { + stereo_dft_enc_destroy( &( hCPE->hStereoDft ) ); + hCPE->hStereoDft = NULL; + } + + IF( hCPE->hStereoTD != NULL ) + { + free( hCPE->hStereoTD ); + hCPE->hStereoTD = NULL; + } + + IF( hCPE->hStereoTCA != NULL ) + { + free( hCPE->hStereoTCA ); + hCPE->hStereoTCA = NULL; + } + + IF( hCPE->hStereoICBWE != NULL ) + { + free( hCPE->hStereoICBWE ); + hCPE->hStereoICBWE = NULL; + } + + FOR( i = 0; i < CPE_CHANNELS; i++ ) + { + /* deallocate core-coder substructures */ + deallocate_CoreCoder_enc( hCPE->hCoreCoder[i] ); + } + + IF( EQ_16( hCPE->last_element_mode, IVAS_CPE_DFT ) ) + { + /* allocate secondary channel */ + IF( NE_32( ( error = allocate_CoreCoder_enc( hCPE->hCoreCoder[1] ) ), IVAS_ERR_OK ) ) + { + return error; + } + } + + /* allocate TCX/IGF structures for second channel */ + st = hCPE->hCoreCoder[1]; + + IF( ( st->hTcxEnc = (TCX_ENC_HANDLE) malloc( sizeof( TCX_ENC_DATA ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for hTcxEnc\n" ) ); + } + +#ifdef IVAS_FLOAT_FIXED + st->hTcxEnc->spectrum_fx[0] = st->hTcxEnc->spectrum_long_fx; + st->hTcxEnc->spectrum_fx[1] = st->hTcxEnc->spectrum_long_fx + N_TCX10_MAX; +#endif // IVAS_FLOAT_FIXED + + st->hTcxEnc->spectrum[0] = st->hTcxEnc->spectrum_long; + st->hTcxEnc->spectrum[1] = st->hTcxEnc->spectrum_long + N_TCX10_MAX; +#ifdef IVAS_FLOAT_FIXED + st->hTcxEnc->spectrum_fx[0] = st->hTcxEnc->spectrum_long_fx; + st->hTcxEnc->spectrum_fx[1] = st->hTcxEnc->spectrum_long_fx + N_TCX10_MAX; +#endif + set_f( st->hTcxEnc->old_out, 0, L_FRAME32k ); + set_f( st->hTcxEnc->spectrum_long, 0, N_MAX ); + st->hTcxEnc->tfm_mem = 0.75f; + + IF( EQ_16( hCPE->last_element_mode, IVAS_CPE_DFT ) ) + { + st->last_core = ACELP_CORE; /* needed to set-up TCX core in SetTCXModeInfo() */ + move16(); + } + + IF( ( st->hTcxCfg = (TCX_CONFIG_HANDLE) malloc( sizeof( TCX_config ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for hTcxCfg\n" ) ); + } + + IF( ( st->hIGFEnc = (IGF_ENC_INSTANCE_HANDLE) malloc( sizeof( IGF_ENC_INSTANCE ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for hIGFEnc\n" ) ); + } + st->igf = getIgfPresent( st->element_mode, st->total_brate, st->bwidth, st->rf_mode ); + move16(); + + /* allocate and initialize MDCT stereo structure */ + IF( ( hCPE->hStereoMdct = (STEREO_MDCT_ENC_DATA_HANDLE) malloc( sizeof( STEREO_MDCT_ENC_DATA ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for MDCT Stereo \n" ) ); + } -static void deallocate_CoreCoder_enc( - ENC_CORE_HANDLE st /* i/o: Core encoder state structure */ -) -{ - if ( st->hLPDmem != NULL ) - { - free( st->hLPDmem ); - st->hLPDmem = NULL; - } + initMdctStereoEncData_fx( hCPE->hStereoMdct, ivas_format, hCPE->element_mode, hCPE->element_brate, hCPE->hCoreCoder[0]->max_bwidth, 0, NULL, 1 ); - if ( st->hGSCEnc != NULL ) - { - free( st->hGSCEnc ); - st->hGSCEnc = NULL; - } + test(); + hCPE->hStereoMdct->isSBAStereoMode = ( EQ_32( ivas_format, SBA_FORMAT ) && EQ_16( nchan_transport, 2 ) ); - if ( st->hNoiseEst != NULL && st->element_mode != IVAS_CPE_MDCT ) - { - free( st->hNoiseEst ); - st->hNoiseEst = NULL; - } + test(); + test(); + IF( EQ_16( hCPE->element_mode, IVAS_CPE_MDCT ) && LE_32( hCPE->element_brate, MAX_MDCT_ITD_BRATE ) && EQ_16( ivas_format, STEREO_FORMAT ) ) + { + IF( NE_32( ( error = initMdctItdHandling( hCPE->hStereoMdct, input_Fs ) ), IVAS_ERR_OK ) ) + { + return error; + } + } - if ( st->hVAD != NULL && st->element_mode != IVAS_CPE_MDCT ) - { - free( st->hVAD ); - st->hVAD = NULL; - } + /* allocate/deallocate and initialize DTX/CNG structures */ + IF( hCPE->hCoreCoder[0]->Opt_DTX_ON ) + { + FOR( i = 0; i < CPE_CHANNELS; i++ ) + { + st = hCPE->hCoreCoder[i]; + IF( NE_32( ( error = openCldfb_ivas_enc( &st->cldfbAnaEnc, CLDFB_ANALYSIS, st->input_Fs, CLDFB_PROTOTYPE_1_25MS ) ), IVAS_ERR_OK ) ) + { + return error; + } - if ( st->hSpMusClas != NULL && st->element_mode != IVAS_CPE_MDCT ) - { - free( st->hSpMusClas ); - st->hSpMusClas = NULL; - } + st->currEnergyLookAhead = 6.1e-5f; - if ( st->cldfbAnaEnc != NULL ) - { - deleteCldfb_ivas( &st->cldfbAnaEnc ); - } + IF( st->hDtxEnc == NULL ) + { + IF( ( st->hDtxEnc = (DTX_ENC_HANDLE) malloc( sizeof( DTX_ENC_DATA ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DTX variables\n" ) ); + } + } + dtx_enc_init( st, 0, FIXED_SID_RATE ); - if ( st->hBWE_TD != NULL ) - { - free( st->hBWE_TD ); - st->hBWE_TD = NULL; - } + IF( st->hTdCngEnc != NULL ) + { + free( st->hTdCngEnc ); + st->hTdCngEnc = NULL; + } - if ( st->cldfbSynTd != NULL ) - { - deleteCldfb_ivas( &st->cldfbSynTd ); - } + IF( st->hFdCngEnc == NULL ) + { + IF( NE_32( ( error = createFdCngEnc( &st->hFdCngEnc ) ), IVAS_ERR_OK ) ) + { + return error; + } - if ( st->hBWE_FD != NULL ) - { - free( st->hBWE_FD ); - st->hBWE_FD = NULL; + initFdCngEnc( st->hFdCngEnc, st->input_Fs, st->cldfbAnaEnc->scale_flt ); + configureFdCngEnc( st->hFdCngEnc, st->bwidth, st->rf_mode && st->total_brate == ACELP_13k20 ? ACELP_9k60 : st->total_brate ); + } + } + } + } } - if ( st->element_mode != IVAS_CPE_MDCT ) + test(); + test(); + test(); + IF( EQ_16( ivas_format, STEREO_FORMAT ) && EQ_16( hCPE->element_mode, IVAS_CPE_MDCT ) && LE_32( hCPE->element_brate, MAX_MDCT_ITD_BRATE ) && GT_32( hCPE->last_element_brate, MAX_MDCT_ITD_BRATE ) ) { - deallocate_CoreCoder_TCX_enc( st ); + /* allocate MDCT stereo ITD handling structure */ + IF( NE_32( ( error = initMdctItdHandling( hCPE->hStereoMdct, input_Fs ) ), IVAS_ERR_OK ) ) + { + return error; + } } - return; + return error; } - +#else /*-------------------------------------------------------------------* * Function stereo_memory_enc() @@ -631,7 +1124,314 @@ ivas_error stereo_memory_enc( return error; } +#endif +#ifdef IVAS_FLOAT_FIXED + +static void v_multc_fixed_32_16( + const Word16 x[], /* i : Input vector */ + const Word32 c, /* i : Constant */ + Word32 y[], /* o : Output vector that contains c*x */ + const Word16 N /* i : Vector length */ +) +{ + Word16 i; + + FOR( i = 0; i < N; i++ ) + { + y[i] = Mpy_32_16_1( c, x[i] ); + move32(); + } + + return; +} +/*-------------------------------------------------------------------* + * Function stereo_switching_enc() + * + * Handling of memories in case of CPE modes switching + *-------------------------------------------------------------------*/ + +void stereo_switching_enc_fx( + CPE_ENC_HANDLE hCPE, /* i : CPE encoder structure */ + Word16 old_input_signal_pri[], /* i : old input signal of primary channel */ + const Word16 input_frame, /* i : input frame length */ + const Word16 q_inp ) +{ + Word16 i, n, dft_ovl, offset; + Word16 tmp_fx; + move16(); + Encoder_State **sts; + + sts = hCPE->hCoreCoder; + dft_ovl = extract_l( Mpy_32_32( imult3216( input_frame, STEREO_DFT_OVL_MAX ), 2236963 ) ); // 1/L_FRAME48k = 2236963 (Q31) + + /* update DFT analysis overlap memory */ + IF( GT_16( hCPE->element_mode, IVAS_CPE_DFT ) && hCPE->input_mem_fx[0] != NULL && NE_16( hCPE->element_mode, IVAS_CPE_MDCT ) ) + { + FOR( n = 0; n < CPE_CHANNELS; n++ ) + { + Copy( sts[n]->input_fx + input_frame - dft_ovl, hCPE->input_mem_fx[n], dft_ovl ); + } + } + + /* save original stereo input (MDCT overlap part) for both channels in unused old input of right channel for possible DFT->MDCT transition */ + IF( EQ_16( hCPE->element_mode, IVAS_CPE_DFT ) ) + { + Copy( sts[0]->input_fx + sub( (Word16) Mpy_32_32( sts[0]->input_Fs, 42949673 /* 1/50 in Q31*/ ), sts[0]->encoderLookahead_FB ), sts[1]->input_fx - shl( sts[0]->encoderLookahead_FB, 1 ), sts[0]->encoderLookahead_FB ); + + Copy( sts[1]->input_fx + sub( (Word16) Mpy_32_32( sts[1]->input_Fs, 42949673 /* 1/50 in Q31*/ ), sts[1]->encoderLookahead_FB ), sts[1]->input_fx - sts[1]->encoderLookahead_FB, sts[1]->encoderLookahead_FB ); + } + + + /* TD/MDCT -> DFT stereo switching */ + test(); + IF( EQ_16( hCPE->element_mode, IVAS_CPE_DFT ) && NE_16( hCPE->last_element_mode, IVAS_CPE_DFT ) ) + { + /* window DFT synthesis overlap memory @input_Fs, primary channel */ + FOR( i = 0; i < dft_ovl; i++ ) + { + hCPE->hStereoDft->output_mem_dmx_fx[i] = Mpy_32_16_r( hCPE->hStereoDft->win_fx[dft_ovl - 1 - i], old_input_signal_pri[input_frame - dft_ovl + i] ); + } + /* reset 48kHz BWE overlap memory */ + set32_fx( hCPE->hStereoDft->output_mem_dmx_32k_fx, 0, STEREO_DFT_OVL_32k ); + + stereo_dft_enc_reset_fx( hCPE->hStereoDft ); + + /* update ITD parameters */ + test(); + IF( EQ_16( hCPE->element_mode, IVAS_CPE_DFT ) && EQ_16( hCPE->last_element_mode, IVAS_CPE_TD ) ) + { + set32_fx( hCPE->hStereoDft->hItd->itd_fx, hCPE->hStereoTCA->prevCorrLagStats[2], STEREO_DFT_ENC_DFT_NB ); + } + + /* Update the side_gain[] parameters */ + IF( hCPE->hStereoTCA != NULL && NE_16( hCPE->last_element_mode, IVAS_CPE_MDCT ) ) + { + tmp_fx = usdequant_fx( hCPE->hStereoTCA->indx_ica_gD, STEREO_TCA_GDMIN_FX, STEREO_TCA_GDSTEP_FX ); + FOR( i = 0; i < STEREO_DFT_BAND_MAX; i++ ) + { + hCPE->hStereoDft->side_gain_fx[STEREO_DFT_BAND_MAX + i] = L_deposit_h( tmp_fx ); + move32(); + } + } + + /* do not allow differential coding of DFT side parameters */ + hCPE->hStereoDft->res_pred_counter = STEREO_DFT_FEC_THRESHOLD; + move16(); + + /* update DFT synthesis overlap memory @12.8kHz */ + FOR( i = 0; i < STEREO_DFT_OVL_12k8; i++ ) + { + hCPE->hStereoDft->output_mem_dmx_12k8_fx[i] = L_shr( Mpy_32_16_r( hCPE->hStereoDft->win_12k8_fx[STEREO_DFT_OVL_12k8 - 1 - i], sts[0]->buf_speech_enc[L_FRAME32k + L_FRAME - STEREO_DFT_OVL_12k8 + i] ), q_inp ); + move32(); + } + Word16 q_dmx = Q16; + move16(); + /* update DFT synthesis overlap memory @16kHz, primary channel only */ + L_lerp_fx( hCPE->hStereoDft->output_mem_dmx_fx, hCPE->hStereoDft->output_mem_dmx_16k_fx, STEREO_DFT_OVL_16k, dft_ovl, &q_dmx ); + FOR( i = 0; i < STEREO_DFT_OVL_16k; i++ ) + { + hCPE->hStereoDft->output_mem_dmx_16k_fx[i] = L_shl( hCPE->hStereoDft->output_mem_dmx_16k_fx[i], q_dmx - Q16 ); + move32(); + } + + /* reset DFT synthesis overlap memory @8kHz, secondary channel */ + set32_fx( hCPE->hStereoDft->output_mem_res_8k_fx, 0, STEREO_DFT_OVL_8k ); + + hCPE->hCoreCoder[1]->vad_flag = 0; + move16(); + } + + /* MDCT -> TD stereo switching */ + test(); + test(); + IF( EQ_16( hCPE->element_mode, IVAS_CPE_TD ) && EQ_16( hCPE->last_element_mode, IVAS_CPE_MDCT ) ) + { + hCPE->hStereoTD->tdm_last_ratio_idx = LRTD_STEREO_LEFT_IS_PRIM; + move16(); + hCPE->hStereoTD->tdm_last_ratio_idx_SM = LRTD_STEREO_LEFT_IS_PRIM; + move16(); + hCPE->hStereoTD->tdm_last_SM_flag = 0; + move16(); + hCPE->hStereoTD->tdm_last_inst_ratio_idx = LRTD_STEREO_MID_IS_PRIM; + move16(); + hCPE->hStereoTD->tdm_last_ratio_fx = tdm_ratio_tabl_fx[LRTD_STEREO_LEFT_IS_PRIM]; + move32(); + } + /* DFT -> TD stereo switching */ + ELSE IF( EQ_16( hCPE->element_mode, IVAS_CPE_TD ) && EQ_16( hCPE->last_element_mode, IVAS_CPE_DFT ) ) + { + hCPE->hStereoTD->tdm_last_ratio_idx = LRTD_STEREO_MID_IS_PRIM; + move16(); + hCPE->hStereoTD->tdm_last_ratio_idx_SM = LRTD_STEREO_MID_IS_PRIM; + move16(); + hCPE->hStereoTD->tdm_last_SM_flag = 0; + move16(); + hCPE->hStereoTD->tdm_last_inst_ratio_idx = LRTD_STEREO_MID_IS_PRIM; + move16(); + + /* First frame after DFT frame AND the content is uncorrelated or xtalk -> the primary channel is forced to left */ + IF( EQ_16( hCPE->hStereoClassif->lrtd_mode, 1 ) ) + { + set_zero( sts[1]->input - input_frame, input_frame ); + set_zero_fx( sts[1]->input32_fx - input_frame, input_frame ); + + hCPE->hStereoTD->tdm_last_ratio_fx = tdm_ratio_tabl_fx[LRTD_STEREO_LEFT_IS_PRIM]; + + hCPE->hStereoTD->tdm_last_ratio_idx = LRTD_STEREO_LEFT_IS_PRIM; + + IF( LT_32( hCPE->hStereoTCA->instTargetGain_fx, 26843546 ) && ( hCPE->hCoreCoder[0]->vad_flag || hCPE->hCoreCoder[1]->vad_flag ) ) /* but if there is no content in the L channel -> the primary channel is forced to right */ + { + hCPE->hStereoTD->tdm_last_ratio_fx = tdm_ratio_tabl_fx[LRTD_STEREO_RIGHT_IS_PRIM]; + hCPE->hStereoTD->tdm_last_ratio_idx = LRTD_STEREO_RIGHT_IS_PRIM; + move16(); + } + } + } + + /* no secondary channel in the previous frame -> memory resets */ + test(); + IF( GT_16( hCPE->element_mode, IVAS_CPE_DFT ) && EQ_16( hCPE->last_element_mode, IVAS_CPE_DFT ) ) + { + IF( sts[0]->cldfbAnaEnc != NULL ) + { + offset = sub( sts[0]->cldfbAnaEnc->p_filter_length, sts[0]->cldfbAnaEnc->no_channels ); + for ( i = 0; i < offset; i++ ) + { + sts[0]->cldfbAnaEnc->cldfb_state_fx[i] = L_deposit_h( old_input_signal_pri[input_frame - offset - NS2SA( input_frame * FRAMES_PER_SEC, L_MEM_RECALC_TBE_NS ) + i] ); + move32(); + } + sts[0]->cldfbAnaEnc->Q_cldfb_state = Q16; + move16(); + } + + IF( sts[0]->cldfbSynTd != NULL ) + { + cldfb_reset_memory_fx( sts[0]->cldfbSynTd ); + sts[0]->currEnergyLookAhead_fx = 130996; + move32(); + } + + IF( hCPE->hStereoICBWE == NULL && sts[1]->cldfbAnaEnc != NULL ) + { + offset = sub( sts[1]->cldfbAnaEnc->p_filter_length, sts[1]->cldfbAnaEnc->no_channels ); + + IF( hCPE->hStereoTD != NULL && EQ_16( hCPE->hStereoTD->tdm_last_ratio_idx, LRTD_STEREO_LEFT_IS_PRIM ) ) + { + v_multc_fixed_32_16( hCPE->hCoreCoder[1]->old_input_signal_fx + sub( input_frame, add( offset, NS2SA( input_frame * FRAMES_PER_SEC, L_MEM_RECALC_TBE_NS ) ) ), -MAX_32, sts[1]->cldfbAnaEnc->cldfb_state_fx, offset ); + } + ELSE + { + FOR( i = 0; i < offset; i++ ) + { + sts[1]->cldfbAnaEnc->cldfb_state_fx[i] = L_shr( L_deposit_h( hCPE->hCoreCoder[1]->old_input_signal_fx[input_frame - offset - NS2SA( input_frame * FRAMES_PER_SEC, L_MEM_RECALC_TBE_NS ) + i] ), 5 ); + move32(); + } + } + + IF( sts[1]->cldfbSynTd != NULL ) + { + cldfb_reset_memory_fx( sts[1]->cldfbSynTd ); + sts[1]->currEnergyLookAhead_fx = 130996; + move32(); + } + } + + sts[1]->last_extl = -1; + move16(); + + /* no secondary channel in the previous frame -> memory resets */ + set16_fx( sts[1]->old_inp_12k8_fx, 0, L_INP_MEM ); + set16_fx( sts[1]->mem_decim_fx, 0, 2 * L_FILT_MAX ); + + sts[1]->mem_preemph_fx = 0; + move16(); + + set16_fx( sts[1]->buf_speech_enc, 0, L_PAST_MAX_32k + L_FRAME32k + L_NEXT_MAX_32k ); + set16_fx( sts[1]->buf_speech_enc_pe, 0, L_PAST_MAX_32k + L_FRAME32k + L_NEXT_MAX_32k ); + + IF( sts[1]->hTcxEnc != NULL ) + { + set16_fx( sts[1]->hTcxEnc->buf_speech_ltp, 0, L_PAST_MAX_32k + L_FRAME32k + L_NEXT_MAX_32k ); + } + set16_fx( sts[1]->buf_wspeech_enc, 0, L_FRAME16k + L_SUBFR + L_FRAME16k + L_NEXT_MAX_16k ); + set16_fx( sts[1]->buf_synth, 0, OLD_SYNTH_SIZE_ENC + L_FRAME32k ); + sts[1]->mem_wsp_fx = 0; + move16(); + sts[1]->mem_wsp_enc = 0; + move16(); + init_gp_clip_fx( sts[1]->clip_var_fx ); + + set32_fx( sts[1]->Bin_E_fx, 0, L_FFT ); + set32_fx( sts[1]->Bin_E_old_fx, 0, L_FFT / 2 ); + + /* sts[1]->hLPDmem reset already done in allocation of handles */ + + sts[1]->last_L_frame = sts[0]->last_L_frame; + move16(); + pitch_ol_init_fx( &sts[1]->old_thres_fx, &sts[1]->old_pitch, &sts[1]->delta_pit, &sts[1]->old_corr_fx ); + set16_fx( sts[1]->old_wsp_fx, 0, L_WSP_MEM ); + set16_fx( sts[1]->old_wsp2_fx, 0, ( L_WSP_MEM - L_INTERPOL ) / OPL_DECIM ); + set16_fx( sts[1]->mem_decim2_fx, 0, 3 ); + Copy( sts[0]->pitch, sts[1]->pitch, 3 ); + + sts[1]->Nb_ACELP_frames = 0; + move16(); + + /* populate PCh memories into the SCh */ + IF( sts[0]->hLPDmem != NULL ) + { + Copy( sts[0]->hLPDmem->old_exc, sts[1]->hLPDmem->old_exc, L_EXC_MEM ); + } + Copy( sts[0]->lsf_old_fx, sts[1]->lsf_old_fx, M ); + Copy( sts[0]->lsp_old_fx, sts[1]->lsp_old_fx, M ); + Copy( sts[0]->lsf_old1_fx, sts[1]->lsf_old1_fx, M ); + Copy( sts[0]->lsp_old1_fx, sts[1]->lsp_old1_fx, M ); + + sts[1]->GSC_noisy_speech = 0; + move16(); + IF( EQ_16( hCPE->element_mode, IVAS_CPE_MDCT ) ) + { + /* cross-fade overlap region of DFT Stereo downmix and original stereo channels */ + tmp_fx = div_s( 64, shl( sts[0]->encoderLookahead_FB, Q6 ) ); + FOR( i = 0; i < sts[0]->encoderLookahead_FB; i++ ) + { + sts[1]->input32_fx[-sts[0]->encoderLookahead_FB + i] = L_shr( Mpy_32_16_1( L_add( Mpy_32_16_1( sts[0]->input32_fx[-sts[0]->encoderLookahead_FB + i], sts[0]->encoderLookahead_FB - i ), Mpy_32_16_1( sts[1]->input32_fx[-sts[0]->encoderLookahead_FB + i], i ) ), tmp_fx ), 13 ); + move32(); + sts[0]->input32_fx[-sts[0]->encoderLookahead_FB + i] = L_shr( Mpy_32_16_1( L_add( Mpy_32_16_1( sts[0]->input32_fx[-sts[0]->encoderLookahead_FB + i], sts[0]->encoderLookahead_FB - i ), Mpy_32_16_1( sts[1]->input32_fx[-2 * sts[0]->encoderLookahead_FB + i], i ) ), tmp_fx ), 13 ); + move32(); + } + /* restore continuous signal in right channel (part of old_output was used to store original left channel) */ + Copy32( sts[0]->input32_fx - sts[0]->hTcxEnc->L_frameTCX, sts[1]->input32_fx - sts[0]->hTcxEnc->L_frameTCX, sts[0]->hTcxEnc->L_frameTCX - sts[0]->encoderLookahead_FB ); + + sts[1]->last_core = sts[0]->last_core; + move16(); + sts[1]->last_coder_type = sts[0]->last_coder_type; + move16(); + sts[1]->last_bwidth = sts[0]->last_bwidth; + move16(); + } + } + ELSE IF( EQ_16( hCPE->element_mode, IVAS_CPE_TD ) && EQ_16( hCPE->last_element_mode, IVAS_CPE_MDCT ) ) + { + set16_fx( sts[0]->hLPDmem->old_exc, 0, L_EXC_MEM ); + set16_fx( sts[1]->hLPDmem->old_exc, 0, L_EXC_MEM ); + } + test(); + + /* TD/DFT -> MDCT stereo switching (there is no TCX in the TD stereo secondary channel, or DFT stereo) */ + test(); + IF( EQ_16( hCPE->element_mode, IVAS_CPE_MDCT ) && NE_16( hCPE->last_element_mode, IVAS_CPE_MDCT ) ) + { + sts[1]->hTcxCfg->last_aldo = sts[0]->hTcxCfg->last_aldo; + move16(); + sts[1]->hTcxCfg->tcx_curr_overlap_mode = sts[0]->hTcxCfg->tcx_curr_overlap_mode; + move16(); + } + + return; +} +#else /*-------------------------------------------------------------------* * Function stereo_switching_enc() * @@ -861,3 +1661,4 @@ void stereo_switching_enc( return; } +#endif diff --git a/lib_enc/ivas_stereo_td_enc.c b/lib_enc/ivas_stereo_td_enc.c index f97989cfb75cde11bda5ea6ae9f00cab5fa90f6b..1ff09e865d8acbb31c82c7caa7adebed559bf5c6 100644 --- a/lib_enc/ivas_stereo_td_enc.c +++ b/lib_enc/ivas_stereo_td_enc.c @@ -45,6 +45,7 @@ #ifdef IVAS_FLOAT_FIXED #include "prot_fx_enc.h" #include "ivas_prot_fx.h" +#include "prot_fx.h" #endif @@ -222,6 +223,184 @@ void stereo_td_init_enc_fx( return; } +#ifdef IVAS_FLOAT_FIXED +/*-------------------------------------------------------------------* + * stereo_set_tdm() + * + * Set TD stereo encoder parameters + *-------------------------------------------------------------------*/ + +ivas_error stereo_set_tdm_fx( + CPE_ENC_HANDLE hCPE, /* i : CPE encoder structure */ + const Word16 input_frame, /* i : input frame length per channel */ + Word16 input_q ) +{ + Encoder_State **sts; + sts = hCPE->hCoreCoder; + ivas_error error; + + error = IVAS_ERR_OK; + + /* initialize TD stereo parameters */ + IF( hCPE->hStereoTD != NULL ) + { + hCPE->hStereoTD->tdm_lp_reuse_flag = 0; + move16(); + hCPE->hStereoTD->tdm_low_rate_mode = 0; + move16(); + hCPE->hStereoTD->tdm_Pitch_reuse_flag = 0; + move16(); + + IF( EQ_16( hCPE->hStereoClassif->lrtd_mode, 1 ) ) + { + /* initialize this flag when uncorrelated L&R channels have been detected in the previous frame */ + test(); + test(); + IF( EQ_16( hCPE->hStereoTD->prev_fr_LRTD_TD_dec, 1 ) || NE_16( hCPE->last_element_mode, IVAS_CPE_TD ) || LT_16( hCPE->hStereoTD->tdm_FD2LRTD_SW_cnt, 5 ) ) + { + hCPE->hStereoTD->tdm_LRTD_flag = 1; + move16(); + } + ELSE + { + hCPE->hStereoTD->tdm_LRTD_flag = 0; + move16(); + } + } + ELSE + { + hCPE->hStereoTD->tdm_LRTD_flag = hCPE->hStereoTD->prev_fr_LRTD_TD_dec; + move16(); + } + +#ifdef DEBUG_MODE_INFO + dbgwrite( &hCPE->hStereoTD->tdm_LRTD_flag, 2, 1, (int16_t) ( hCPE->hCoreCoder[0]->input_Fs / FRAMES_PER_SEC ), "res/tdm_LRTD_flag" ); +#endif + + + /* normal TD / LRTD switching */ + IF( EQ_16( hCPE->hStereoTD->tdm_LRTD_flag, 0 ) ) + { + Encoder_State *st; + st = hCPE->hCoreCoder[1]; + + /* deallocate CLDFB ana for secondary channel */ + IF( st->cldfbAnaEnc != NULL ) + { + deleteCldfb_ivas( &st->cldfbAnaEnc ); + } + + /* deallocate BWEs for secondary channel */ + IF( st->hBWE_TD != NULL ) + { + IF( st->hBWE_TD != NULL ) + { + free( st->hBWE_TD ); + st->hBWE_TD = NULL; + } + + deleteCldfb_ivas( &st->cldfbSynTd ); + + IF( st->hBWE_FD != NULL ) + { + free( st->hBWE_FD ); + st->hBWE_FD = NULL; + } + } + + /* allocate ICBWE structure */ + IF( hCPE->hStereoICBWE == NULL ) + { + IF( ( hCPE->hStereoICBWE = (STEREO_ICBWE_ENC_HANDLE) malloc( sizeof( STEREO_ICBWE_ENC_DATA ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for Stereo ICBWE \n" ) ); + } +#ifdef IVAS_FLOAT_FIXED + stereo_icBWE_init_enc_fx( hCPE->hStereoICBWE ); +#endif + stereo_icBWE_init_enc( hCPE->hStereoICBWE ); + } + } + ELSE /* tdm_LRTD_flag == 1 */ + { + Encoder_State *st; + st = hCPE->hCoreCoder[1]; + + /* deallocate ICBWE structure */ + IF( hCPE->hStereoICBWE != NULL ) + { + free( hCPE->hStereoICBWE ); + hCPE->hStereoICBWE = NULL; + } + + /* allocate CLDFB ana for secondary channel */ + IF( st->cldfbAnaEnc == NULL ) + { + IF( NE_32( ( error = openCldfb_ivas_enc( &st->cldfbAnaEnc, CLDFB_ANALYSIS, st->input_Fs, CLDFB_PROTOTYPE_1_25MS ) ), IVAS_ERR_OK ) ) + { + return error; + } + } + + /* allocate BWEs for secondary channel */ + IF( st->hBWE_TD == NULL ) + { + IF( ( st->hBWE_TD = (TD_BWE_ENC_HANDLE) malloc( sizeof( TD_BWE_ENC_DATA ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for TD BWE\n" ) ); + } + IF( NE_32( ( error = openCldfb_ivas_enc( &st->cldfbSynTd, CLDFB_SYNTHESIS, 16000, CLDFB_PROTOTYPE_1_25MS ) ), IVAS_ERR_OK ) ) + { + return error; + } + + InitSWBencBuffer( st->hBWE_TD ); +#ifdef IVAS_FLOAT_FIXED + InitSWBencBuffer_fx( st ); +#endif + ResetSHBbuffer_Enc( st->hBWE_TD ); +#ifdef IVAS_FLOAT_FIXED + ResetSHBbuffer_Enc_fx( st ); +#endif + + IF( ( st->hBWE_FD = (FD_BWE_ENC_HANDLE) malloc( sizeof( FD_BWE_ENC_DATA ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for FD BWE\n" ) ); + } + +#ifdef IVAS_FLOAT_FIXED + fd_bwe_enc_init_fx( st->hBWE_FD ); +#endif + fd_bwe_enc_init( st->hBWE_FD ); + } + } + + IF( hCPE->hStereoClassif->lrtd_mode == 0 ) + { + hCPE->hStereoTD->tdm_FD2LRTD_SW_cnt = 0; + move16(); + } + hCPE->hStereoTD->tdm_FD2LRTD_SW_cnt = s_min( 100, hCPE->hStereoTD->tdm_FD2LRTD_SW_cnt + 1 ); + move16(); + stereo_tdm_prep_dwnmx_fx( hCPE, sts[1]->input32_fx, input_frame, input_q ); + } + ELSE + { +#ifdef DEBUG_MODE_INFO + { + int16_t tmp = -2; + dbgwrite( &tmp, 2, 1, (int16_t) ( hCPE->hCoreCoder[0]->input_Fs / FRAMES_PER_SEC ), "res/tdm_LRTD_flag" ); + } +#endif + hCPE->hCoreCoder[0]->tdm_LRTD_flag = 0; + move16(); + hCPE->hCoreCoder[1]->tdm_LRTD_flag = 0; + move16(); + } + + return error; +} +#else /*-------------------------------------------------------------------* * stereo_set_tdm() * @@ -387,7 +566,7 @@ ivas_error stereo_set_tdm( return error; } - +#endif /*-------------------------------------------------------------------* * tdm_configure_enc() * @@ -1290,7 +1469,63 @@ void stereo_tdm_downmix( return; } +#ifdef IVAS_FLOAT_FIXED +/*-------------------------------------------------------------------* + * Function stereo_tdm_prep_dwnmx() + * + * Reactivate downmixing after bitrate switching from MDCT to lower rate + *-------------------------------------------------------------------*/ +void stereo_tdm_prep_dwnmx_fx( + CPE_ENC_HANDLE hCPE, /* i : CPE encoder structure */ + const Word32 *input1, /* i : right channel input */ + const Word16 input_frame, /* i : frame lenght */ + const Word16 input_q /* i : frame lenght */ +) +{ +#define USER_ENER + Word32 mener; + int16_t i, sw_pos, enr_len; + Encoder_State **sts; + Word16 mener_e; + sts = hCPE->hCoreCoder; + + i = idiv1616( input_frame, L_FRAME16k ); + + sw_pos = i_mult( 22, i ); + enr_len = i_mult( 6, i ); + IF( EQ_16( hCPE->element_mode, IVAS_CPE_TD ) ) + { + IF( EQ_16( hCPE->hStereoTD->flag_skip_DMX, 1 ) ) /* hStereoTD is defined only if element mode == TD */ + { + IF( EQ_16( hCPE->last_element_mode, IVAS_CPE_TD ) ) + { + Word16 tmp_e; + mener_e = sub( 31, input_q ); + mener = L_add( sum2_32_fx( input1 + sub( input_frame, sw_pos ), enr_len, &mener_e ), EPSILON_FX ); + mener = BASOP_Util_Divide3232_Scale( mener, (Word32) enr_len, &tmp_e ); + mener_e = add( mener_e, sub( tmp_e, 15 ) ); + mener = Sqrt32( mener, &mener_e ); + test(); + test(); + IF( LT_32( mener, L_shl( 10, sub( 31, mener_e ) ) ) && ( sts[1]->vad_flag == 0 || EQ_16( sts[1]->coder_type_raw, UNVOICED ) ) ) + { + hCPE->hStereoTD->flag_skip_DMX = 0; /* Can start using the TD downmix whenever the right channel is sufficiently low energy to limit switching artefacts */ + move16(); + } + } + ELSE IF( EQ_16( hCPE->last_element_mode, IVAS_CPE_DFT ) || EQ_32( hCPE->last_element_brate, IVAS_13k2 ) ) /* Just security check, should not happened */ + { + hCPE->hStereoTD->flag_skip_DMX = 0; + move16(); + } + test(); + } + } + + return; +} +#else /*-------------------------------------------------------------------* * Function stereo_tdm_prep_dwnmx() * @@ -1335,3 +1570,4 @@ void stereo_tdm_prep_dwnmx( return; } +#endif diff --git a/lib_enc/multi_harm_fx.c b/lib_enc/multi_harm_fx.c index 2277ad07e98be7f69bdc14b5af61ad4dc526f259..43e5ff5bb2c66584256cfd24fdb7d515b104ca2b 100644 --- a/lib_enc/multi_harm_fx.c +++ b/lib_enc/multi_harm_fx.c @@ -146,7 +146,7 @@ Word16 multi_harm_fx( /* o : frame multi-harmonicity } /* subtract the floor */ - S[i] = s_max( sub( Bin_E[i], flor ), 0 ); + S[i] = s_max( sub_sat( Bin_E[i], flor ), 0 ); move16(); /* update the floor */ diff --git a/lib_enc/nois_est_fx.c b/lib_enc/nois_est_fx.c index 045b6687951202d5a156d909cefa9ff11f0ccc61..d0476a4129ede75409a8c6fe6250ba1a12064e64 100644 --- a/lib_enc/nois_est_fx.c +++ b/lib_enc/nois_est_fx.c @@ -224,7 +224,129 @@ void noise_est_init_fx( move16(); hNoiseEst->lt_aEn_zero_fx = 0; move16(); + hNoiseEst->Etot_h_32fx = 0; + move32(); + hNoiseEst->Etot_l_32fx = 0; + move32(); + hNoiseEst->Etot_l_lp_32fx = 0; + move32(); + hNoiseEst->Etot_last_32fx = 0; + move32(); + hNoiseEst->Etot_v_h2_32fx = 0; + move32(); + hNoiseEst->Etot_lp_32fx = 0; + move32(); + hNoiseEst->sign_dyn_lp_32fx = 0; + move32(); + /* Tonal detector */ + FOR( i = 0; i < L_FFT / 2; i++ ) + { + hNoiseEst->old_S_fx[i] = 1; + move16(); + } + set16_fx( hNoiseEst->cor_map_fx, 0, L_FFT / 2 ); + hNoiseEst->act_pred_fx = 32767; + move16(); + hNoiseEst->noise_char_fx = 0; + move16(); + hNoiseEst->multi_harm_limit_fx = THR_CORR_INIT_FX; + hNoiseEst->Etot_lp_fx = 0; + hNoiseEst->Etot_h_fx = 0; + hNoiseEst->Etot_l_fx = 0; + hNoiseEst->Etot_l_lp_fx = 0; + hNoiseEst->Etot_last_fx = 0; + hNoiseEst->Etot_v_h2_fx = 0; + hNoiseEst->sign_dyn_lp_fx = 0; + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + + return; +} + +void noise_est_init_ivas_fx( + NOISE_EST_HANDLE hNoiseEst /* i/o: Noise estimation handle */ +) +{ + Word16 i; + + FOR( i = 0; i < NB_BANDS; i++ ) + { + hNoiseEst->fr_bands1_fx[i] = 1; + move32(); /*1e-5f; */ + hNoiseEst->fr_bands2_fx[i] = 1; + move32(); /*1e-5f; */ + hNoiseEst->ave_enr2_fx[i] = E_MIN_FX; + move32(); /*Q7//E_MIN; */ + hNoiseEst->enrO_fx[i] = E_MIN_Q11_FX; + move32(); + hNoiseEst->bckr_fx[i] = E_MIN_Q11_FX; + move32(); + hNoiseEst->ave_enr_fx[i] = E_MIN_FX; + move32(); + } + move16(); + hNoiseEst->totalNoise_fx = 0; + move16(); + hNoiseEst->first_noise_updt = 0; + // hNoiseEst->first_noise_updt_cnt_fx = 0; IVAS_CODE ?? + move16(); + + hNoiseEst->aEn = 6; + // hNoiseEst->aEn_inac_cnt = 0; IVAS_CODE + move16(); + + hNoiseEst->harm_cor_cnt = 0; + move16(); + hNoiseEst->bg_cnt = 0; + move16(); + + hNoiseEst->lt_tn_track_fx = 6554; /*.20 in Q15*/ + move16(); + hNoiseEst->lt_tn_dist_fx = 0; + move16(); + hNoiseEst->lt_Ellp_dist_fx = 0; + move16(); + hNoiseEst->lt_haco_ev_fx = 13107; /*.40 in Q15*/ + move16(); + hNoiseEst->low_tn_track_cnt = 0; + move16(); + + hNoiseEst->Etot_st_est_fx = 5120; /* 20.0f in Q8 */ + hNoiseEst->Etot_sq_st_est_fx = 1600; /* 400 in Q2 */ + //### + + hNoiseEst->epsP_0_2_lp_fx = 4096; /*1.0 Q12*/ + move16(); + hNoiseEst->epsP_0_2_ad_lp_fx = 0; + move16(); + hNoiseEst->epsP_2_16_lp_fx = 4096; + move16(); + hNoiseEst->epsP_2_16_lp2_fx = 4096; + move16(); + hNoiseEst->epsP_2_16_dlp_lp2_fx = 0; + move16(); + hNoiseEst->lt_aEn_zero_fx = 0; + move16(); + hNoiseEst->Etot_h_32fx = 0; + move32(); + hNoiseEst->Etot_l_32fx = 0; + move32(); + hNoiseEst->Etot_l_lp_32fx = 0; + move32(); + hNoiseEst->Etot_last_32fx = 0; + move32(); + hNoiseEst->Etot_v_h2_32fx = 0; + move32(); + hNoiseEst->Etot_lp_32fx = 0; + move32(); + hNoiseEst->sign_dyn_lp_32fx = 0; + move32(); /* Tonal detector */ FOR( i = 0; i < L_FFT / 2; i++ ) { diff --git a/lib_enc/prot_fx_enc.h b/lib_enc/prot_fx_enc.h index 7c58d50d345d6d08e57d2e0ef20a3533c9ce2015..02fbc8e7d438fec491cd7c67611f82df745045be 100644 --- a/lib_enc/prot_fx_enc.h +++ b/lib_enc/prot_fx_enc.h @@ -3938,6 +3938,10 @@ void noise_est_init_fx( NOISE_EST_HANDLE hNoiseEst /* i/o: Noise estimation handle */ ); +void noise_est_init_ivas_fx( + NOISE_EST_HANDLE hNoiseEst /* i/o: Noise estimation handle */ +); + void InitSWBencBuffer_fx( Encoder_State *st_fx /* i/o: SHB encoder structure */ ); diff --git a/lib_enc/speech_music_classif_fx.c b/lib_enc/speech_music_classif_fx.c index 4e0d9badedfc68504c95feb0c6d43e6bf45fdfa4..53f942deb5e169ac9d61395c3ddec319451914b0 100644 --- a/lib_enc/speech_music_classif_fx.c +++ b/lib_enc/speech_music_classif_fx.c @@ -1580,7 +1580,7 @@ Word16 ivas_smc_gmm_fx( IF( localVAD_HE_SAD ) { test(); - IF( LT_16( relE_fx, -10240 ) ) + IF( LT_16( relE_fx, -5120 /*20 q8*/ ) ) { IF( hSpMusClas->sp_mus_state > 0 ) { @@ -1664,11 +1664,12 @@ Word16 ivas_smc_gmm_fx( /* detect attacks based on relE */ IF( GT_16( relE_fx, hSpMusClas->prev_relE_fx ) ) { - hSpMusClas->relE_attack_sum_fx = add_sat( sub_sat( relE_fx, hSpMusClas->prev_relE_fx ), hSpMusClas->relE_attack_sum_fx ); + hSpMusClas->relE_attack_sum_fx = add_sat( sub_sat( relE_fx, hSpMusClas->prev_relE_fx ), hSpMusClas->relE_attack_sum_fx ); /*q8*/ + move16(); } ELSE { - hSpMusClas->relE_attack_sum_fx = 0; + hSpMusClas->relE_attack_sum_fx = 0; /*q8*/ move16(); } hSpMusClas->prev_relE_fx = relE_fx; @@ -1695,7 +1696,7 @@ Word16 ivas_smc_gmm_fx( move16(); test(); test(); - IF( hSpMusClas->sp_mus_state > 0 && LT_16( hSpMusClas->sp_mus_state, HANG_LEN ) && GT_16( hSpMusClas->relE_attack_sum_fx, 2560 ) ) + IF( hSpMusClas->sp_mus_state > 0 && LT_16( hSpMusClas->sp_mus_state, HANG_LEN ) && GT_16( hSpMusClas->relE_attack_sum_fx, 1280 /*q8*/ ) ) { hSpMusClas->relE_attack_cnt = add( hSpMusClas->relE_attack_cnt, 1 ); @@ -1719,6 +1720,7 @@ Word16 ivas_smc_gmm_fx( } hSpMusClas->prev_Etot_fx = Etot_fx; + move16(); /*------------------------------------------------------------------* * Preparation of the feature vector @@ -1737,18 +1739,21 @@ Word16 ivas_smc_gmm_fx( // *pFV_fx++ = (float) ( st->pitch[0] + st->pitch[1] + st->pitch[2] ) / 3.0f; *pFV_fx++ = Mpy_32_32( L_shl( add( add( st->pitch[0], st->pitch[1] ), st->pitch[2] ), Q20 ), 715827883 ); } + move32(); + test(); test(); /* [1] voicing */ IF( relE_attack_flag || EQ_16( st->tc_cnt, 1 ) || EQ_16( st->tc_cnt, 2 ) ) { - *pFV_fx++ = st->voicing_fx[2]; + *pFV_fx++ = L_shl( st->voicing_fx[2], 5 ); /*q20*/ } ELSE { // *pFV++ = ( st->voicing[0] + st->voicing[1] + st->voicing[2] ) / 3.0f; - *pFV_fx++ = Mpy_32_32( L_shl( L_add( L_add( st->voicing_fx[0], st->voicing_fx[1] ), st->voicing_fx[2] ), Q5 ), 715827883 ); + *pFV_fx++ = Mpy_32_32( L_shl( L_add( L_add( st->voicing_fx[0], st->voicing_fx[1] ), st->voicing_fx[2] ), Q5 ), 715827883 ); /*q20*/ } + move32(); temp_exp = 1; move16(); @@ -1800,22 +1805,25 @@ Word16 ivas_smc_gmm_fx( /* [7] cor_map_sum */ *pFV_fx++ = L_shl( cor_map_sum_fx, Q12 ); + move32(); /* [8] non_sta */ - *pFV_fx++ = L_shl( non_sta_fx, Q12 ); + *pFV_fx++ = L_shl( non_sta_fx, Q14 ); /*scaling from 6 to 20*/ + move32(); /* [9] epsP */ - temp32 = epsP_fx[14]; + temp32 = L_add( epsP_fx[14], L_shr( 21474, sub( 31, Q_esp ) ) ); move32(); temp32_log = L_add( BASOP_Util_Log2( temp32 ), L_shl( sub( Q31, Q_esp ), Q25 ) ); temp32_log1 = Mpy_32_32( temp32_log, 1488522239 ); /*logf(x) = log2(x)*logf(2)*/ - temp32 = epsP_fx[0]; + temp32 = L_add( epsP_fx[0], L_shr( 21474, sub( 31, Q_esp ) ) ); move32(); temp32_log = L_add( BASOP_Util_Log2( temp32 ), L_shl( sub( Q31, Q_esp ), Q25 ) ); temp32_log2 = Mpy_32_32( temp32_log, 1488522239 ); /*logf(x) = log2(x)*logf(2)*/ - *pFV_fx++ = L_shr( L_add( temp32_log1, temp32_log2 ), Q5 ); + *pFV_fx++ = L_shr( L_sub( temp32_log1, temp32_log2 ), Q5 ); + move32(); //*pFV++ = logf( epsP[14] + 1e-5f ) - logf( epsP[0] + 1e-5f ); /* [10,11,12] MFCCs */ @@ -1876,6 +1884,7 @@ Word16 ivas_smc_gmm_fx( } *pFV_fx++ = L_shr( ps_diff_fx, sub( sub( Qfact_PS_past, Q7 ), Q20 ) ); /// ps_diff; + move32(); /* [14] ps_sta (spectral stationarity) */ ps_sta_fx = 0; @@ -1900,6 +1909,7 @@ Word16 ivas_smc_gmm_fx( temp32_log = L_add( BASOP_Util_Log2( ps_sta_fx ), L_shl( ps_sta_exp, Q25 ) ); temp32_log = Mpy_32_32( temp32_log, 1488522239 ); /*logf(x) = log2(x)*logf(2)*/ *pFV_fx++ = L_shr( temp32_log, Q5 ); // logf( ps_sta + 1e-5f ); + move32(); MVR2R_WORD32( &PS_norm_fx[LOWEST_FBIN], hSpMusClas->past_PS_fx, HIGHEST_FBIN - LOWEST_FBIN ); /* save ps_diff and ps_sta features for XTALK and UNCLR classifier */ @@ -1907,16 +1917,22 @@ Word16 ivas_smc_gmm_fx( { IF( st->idchan == 0 ) { - hStereoClassif->ps_diff_ch1_fx = ps_diff_fx; // Qfact_PS_past - 7 - hStereoClassif->ps_sta_ch1_fx = temp32_log; // logf( ps_sta + 1e-5f );Q25 + hStereoClassif->ps_diff_ch1_fx = ps_diff_fx; // Qfact_PS_past - 7 + hStereoClassif->ps_diff_ch1_e = sub( 38, Qfact_PS_past ); // Qfact_PS_past - 7 + hStereoClassif->ps_sta_ch1_fx = temp32_log; // logf( ps_sta + 1e-5f );Q25 + hStereoClassif->ps_sta_ch1_e = 6; // logf( ps_sta + 1e-5f );Q25 } ELSE { hStereoClassif->ps_diff_ch2_fx = ps_diff_fx; + hStereoClassif->ps_diff_ch2_e = sub( 38, Qfact_PS_past ); hStereoClassif->ps_sta_ch2_fx = temp32_log; // logf( ps_sta + 1e-5f );Q25 + hStereoClassif->ps_sta_ch2_e = 6; // logf( ps_sta + 1e-5f );Q25 } move32(); move32(); + move16(); + move16(); } /*------------------------------------------------------------------* @@ -2065,25 +2081,41 @@ Word16 ivas_smc_gmm_fx( move32(); FOR( m = 0; m < N_SMC_MIXTURES; m++ ) { - v_sub32_fx( FV_fx, &means_speech_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF ); + Word32 temp[N_PCA_COEF]; + FOR( Word16 ind = 0; ind < N_PCA_COEF; ind++ ) + { + temp[ind] = L_shr( means_speech_fx[m * N_PCA_COEF + ind], sub( 27, Qfact_FV ) ); + move32(); + } + v_sub32_fx( FV_fx, temp, fvm_fx, N_PCA_COEF ); fvm_exp = sub( 31, Qfact_FV ); lprob_exp = 0; move16(); lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp ); - ps_fx[m] = L_sub( log_weights_speech_compute[m], L_shr( L_shl_sat( lprob_fx, sub( Q19, sub( Q31, lprob_exp ) ) ), 1 ) ); + ps_fx[m] = L_sub( log_weights_speech_compute[m], L_shl( lprob_fx, sub( Q19 - 1, sub( Q31, lprob_exp ) ) ) ); move32(); - v_sub32_fx( FV_fx, &means_music_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF ); + FOR( Word16 ind = 0; ind < N_PCA_COEF; ind++ ) + { + temp[ind] = L_shr( means_music_fx[m * N_PCA_COEF + ind], sub( 27, Qfact_FV ) ); + move32(); + } + v_sub32_fx( FV_fx, temp, fvm_fx, N_PCA_COEF ); lprob_exp = 0; move16(); lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp ); - pm_fx[m] = L_sub( log_weights_music_compute[m], L_shr( L_shl_sat( lprob_fx, sub( Q19, sub( Q31, lprob_exp ) ) ), 1 ) ); + pm_fx[m] = L_sub( log_weights_music_compute[m], L_shl( lprob_fx, sub( Q19 - 1, sub( Q31, lprob_exp ) ) ) ); move32(); - v_sub32_fx( FV_fx, &means_noise_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF ); + FOR( Word16 ind = 0; ind < N_PCA_COEF; ind++ ) + { + temp[ind] = L_shr( means_noise_fx[m * N_PCA_COEF + ind], sub( 27, Qfact_FV ) ); + move32(); + } + v_sub32_fx( FV_fx, temp, fvm_fx, N_PCA_COEF ); lprob_exp = 0; move16(); lprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF, fvm_exp, 31 - 28, &lprob_exp ); - pn_fx[m] = L_sub( log_weights_music_compute[m], L_shr( L_shl_sat( lprob_fx, sub( Q19, sub( Q31, lprob_exp ) ) ), 1 ) ); + pn_fx[m] = L_sub( log_weights_noise_compute[m], L_shl( lprob_fx, sub( Q19 - 1, sub( Q31, lprob_exp ) ) ) ); move32(); } @@ -2098,9 +2130,9 @@ Word16 ivas_smc_gmm_fx( move32(); } - hSpMusClas->lpm_fx = extract_l( L_shr( lpm_fx, 11 ) ); // Q8 - hSpMusClas->lps_fx = extract_l( L_shr( lps_fx, 11 ) ); // Q8 - hSpMusClas->lpn_fx = extract_l( L_shr( lpn_fx, 11 ) ); // Q8 + hSpMusClas->lpm_fx = extract_l( L_shr( lpm_fx, 12 ) ); // Q7 + hSpMusClas->lps_fx = extract_l( L_shr( lps_fx, 12 ) ); // Q7 + hSpMusClas->lpn_fx = extract_l( L_shr( lpn_fx, 12 ) ); // Q7 /* determine HQ Generic speech class */ IF( st->hHQ_core != NULL ) @@ -2149,13 +2181,13 @@ Word16 ivas_smc_gmm_fx( dec = (Word16) GT_32( dlp_fx, 0 ); /* calculate weight based on relE (higher relE -> lower weight, lower relE -> higher weight) */ - Word16 Qio = Q25; + Word16 Qio = Q24; move16(); - wrelE_fx = lin_interp32_fx( L_deposit_h( relE_fx ), 503316480, 30198989, -503316480, 33218888, 1, &Qio ); // Q25 + wrelE_fx = lin_interp32_fx( L_deposit_h( relE_fx ), 15 << 24, 15099494 /*0.9 q24*/, -( 15 << 24 ), 16609443 /*0.99 q24*/, 1, &Qio ); // Q25 wrelE_fx = L_shr( wrelE_fx, sub( Qio, 25 ) ); /* calculate weight based on drops of dlp (close to 1 during sudden drops of dlp, close to 0 otherwise) */ // hSpMusClas->dlp_mean_ST = 0.8f * hSpMusClas->dlp_mean_ST + 0.2f * dlp; - hSpMusClas->dlp_mean_ST_fx = L_add( Mpy_32_32( 419430, hSpMusClas->dlp_mean_ST_fx ), Mpy_32_32( 104858, dlp_fx ) ); + hSpMusClas->dlp_mean_ST_fx = L_add( Mpy_32_32( 1717986918, hSpMusClas->dlp_mean_ST_fx ), Mpy_32_32( 429496729, dlp_fx ) ); hSpMusClas->lt_dec_thres_fx = extract_l( L_shr( hSpMusClas->dlp_mean_ST_fx, 10 ) ); test(); IF( dlp_fx < 0 && LT_32( dlp_fx, hSpMusClas->dlp_mean_ST_fx ) ) @@ -2208,7 +2240,7 @@ Word16 ivas_smc_gmm_fx( wght_fx = Mpy_32_32( Mpy_32_32( wrelE_fx, wdrop_fx ), wrise_fx ); // Q13 test(); /* ratio of delta means vs. delta variances */ - if ( hSpMusClas->sp_mus_state > 0 && LT_16( hSpMusClas->sp_mus_state, HANG_LEN ) ) + IF( hSpMusClas->sp_mus_state > 0 && LT_16( hSpMusClas->sp_mus_state, HANG_LEN ) ) { hSpMusClas->dlp_mean_LT_fx = dlp_fx; @@ -2220,6 +2252,7 @@ Word16 ivas_smc_gmm_fx( hSpMusClas->dlp_mean_LT_fx = L_add( Mpy_32_32( 1932735283, hSpMusClas->dlp_mean_LT_fx ), Mpy_32_32( 214748365, dlp_fx ) ); // Q19 temp32 = L_sub( dlp_fx, hSpMusClas->dlp_mean_LT_fx ); + temp32 = W_extract_l( W_shr( W_mult0_32_32( temp32, temp32 ), 19 ) ); /*q19*/ hSpMusClas->dlp_var_LT_fx = L_add( Mpy_32_32( 1932735283, hSpMusClas->dlp_var_LT_fx ), Mpy_32_32( 214748365, temp32 ) ); test(); @@ -2235,13 +2268,27 @@ Word16 ivas_smc_gmm_fx( temp_exp = sub( Q31, Q19 ); Word16 div_e = 0; move16(); - temp32 = Sqrt32( L_abs( hSpMusClas->dlp_var_LT_fx ), &temp_exp ); - temp_sqrt = L_add( Sqrt32( L_abs( hSpMusClas->dlp_var_LT_fx ), &temp_exp ), 1 ); + temp_sqrt = Sqrt32( L_abs( hSpMusClas->dlp_var_LT_fx ), &temp_exp ); + IF( temp_exp < 0 ) + { + temp_sqrt = L_shl( temp_sqrt, temp_exp ); + temp_exp = 0; + move16(); + } + temp_sqrt = L_shr( temp_sqrt, 1 ); /*adding 1 as guard bit to avoid overflow in addition*/ + temp_exp = add( temp_exp, 1 ); + temp_sqrt = L_add( temp_sqrt, L_shl( 1, sub( 31, temp_exp ) ) ); dlp_mean2var_fx = BASOP_Util_Divide3232_Scale( L_abs( hSpMusClas->dlp_mean_LT_fx ), temp_sqrt, &div_e ); dlp_mean2var_q = sub( add( Q3, temp_exp ), div_e ); // 15-div_e+Q19 -(31-temp_exp) + IF( GT_16( dlp_mean2var_q, 26 ) ) + { + dlp_mean2var_fx = shl( dlp_mean2var_fx, sub( 26, dlp_mean2var_q ) ); + dlp_mean2var_q = 26; + move16(); + } } - if ( GT_32( L_deposit_l( dlp_mean2var_fx ), L_shl( 15, dlp_mean2var_q ) ) ) + IF( GT_32( L_deposit_l( dlp_mean2var_fx ), L_shl( 15, dlp_mean2var_q ) ) ) { /* decrease the weight little bit when the classifier indicates "strong speech" or "strong music" */ // wght *= 0.9f; @@ -2266,8 +2313,8 @@ Word16 ivas_smc_gmm_fx( /* calculate weighted decision */ // hSpMusClas->wdlp_0_95_sp = wght * hSpMusClas->wdlp_0_95_sp + ( 1 - wght ) * dlp; - - hSpMusClas->wdlp_0_95_sp_fx = (Word16) L_add( L_shl( Mpy_32_16_1( wght_fx, hSpMusClas->wdlp_0_95_sp_fx ), Q2 ), L_shl( Mpy_32_32( L_sub( ONE_IN_Q13, wght_fx ), dlp_fx ), Q5 ) ); // Q8 + hSpMusClas->wdlp_0_95_sp_fx = extract_l( L_add( L_shl( Mpy_32_16_1( wght_fx, hSpMusClas->wdlp_0_95_sp_fx ), Q2 ), Mpy_32_32( L_shl( L_sub( ONE_IN_Q13, wght_fx ), Q7 ), dlp_fx ) ) ); // Q8 + move16(); /* xtalk classifier: apply long hysteresis to prevent LRTD on music */ diff --git a/lib_enc/stat_enc.h b/lib_enc/stat_enc.h index a8ae5ecac00fba9f05f5d1b92a1f11c6d26ffac2..82aaebc17793743eabeca9ef76f737ca7b0f47d1 100644 --- a/lib_enc/stat_enc.h +++ b/lib_enc/stat_enc.h @@ -228,25 +228,25 @@ typedef struct vad_structure int16_t hangover_cnt_music; float bcg_flux; - Word16 bcg_flux_fx; + Word16 bcg_flux_fx; // Q4 int16_t soft_hangover; int16_t voiced_burst; int16_t bcg_flux_init; int16_t nb_active_frames_he1; int16_t hangover_cnt_he1; - float prim_act_quick; /* Noise estimator - primary activity quick */ - float prim_act_slow; /* Noise estimator - primary activity slow */ - float prim_act; /* Noise estimator - primary activity slow rise quick fall */ - float prim_act_quick_he; /* Noise estimator - primary activity quick */ - float prim_act_slow_he; /* Noise estimator - primary activity slow */ - float prim_act_he; /* Noise estimator - primary activity slow rise quick fall */ - Word16 prim_act_quick_fx; /* Noise estimator - primary activity quick */ - Word16 prim_act_slow_fx; /* Noise estimator - primary activity slow */ - Word16 prim_act_fx; /* Noise estimator - primary activity slow rise quick fall */ - Word16 prim_act_quick_he_fx; /* Noise estimator - primary activity quick */ - Word16 prim_act_slow_he_fx; /* Noise estimator - primary activity slow */ - Word16 prim_act_he_fx; /* Q15 Noise estimator - primary activity slow rise quick fall */ + float prim_act_quick; /* Noise estimator - primary activity quick */ + float prim_act_slow; /* Noise estimator - primary activity slow */ + float prim_act; /* Noise estimator - primary activity slow rise quick fall */ + float prim_act_quick_he; /* Noise estimator - primary activity quick */ + float prim_act_slow_he; /* Noise estimator - primary activity slow */ + float prim_act_he; /* Noise estimator - primary activity slow rise quick fall */ + Word16 prim_act_quick_fx; /*Q15 */ /* Noise estimator - primary activity quick */ + Word16 prim_act_slow_fx; /*Q15 */ /* Noise estimator - primary activity slow */ + Word16 prim_act_fx; /*Q15 */ /* Noise estimator - primary activity slow rise quick fall */ + Word16 prim_act_quick_he_fx; /*Q15 */ /* Noise estimator - primary activity quick */ + Word16 prim_act_slow_he_fx; /*Q15 */ /* Noise estimator - primary activity slow */ + Word16 prim_act_he_fx; /*Q15 */ /* Q15 Noise estimator - primary activity slow rise quick fall */ int16_t spectral_tilt_reset; int16_t consec_inactive; @@ -255,7 +255,7 @@ typedef struct vad_structure int16_t trigger_SID; float running_avg; float snr_sum_vad; - Word16 snr_sum_vad_fx; + Word16 snr_sum_vad_fx; /*Q15 */ Word16 running_avg_fx; /*Q15 */ Word32 L_snr_sum_vad_fx; /*Q4*/ @@ -807,11 +807,11 @@ typedef struct noise_estimation_structure Word16 Etot_last_fx; /*Q8*/ Word16 Etot_lp_fx; /* Q8 Noise estimator - Filtered input energy */ - Word32 Etot_l_32fx; /* Q8 Noise estimator - Track energy from below */ - Word32 Etot_h_32fx; /* Q8 Noise estimator - Track energy from above */ - Word32 Etot_l_lp_32fx; /* Q8 Noise estimator - Smoothed low energy */ - Word32 Etot_last_32fx; /*Q8*/ - Word32 Etot_lp_32fx; /* Q8 Noise estimator - Filtered input energy */ + Word32 Etot_l_32fx; /* Q24 Noise estimator - Track energy from below */ + Word32 Etot_h_32fx; /* Q24 Noise estimator - Track energy from above */ + Word32 Etot_l_lp_32fx; /* Q24 Noise estimator - Smoothed low energy */ + Word32 Etot_last_32fx; /*Q24*/ + Word32 Etot_lp_32fx; /* Q24 Noise estimator - Filtered input energy */ Word16 lt_tn_track_fx; /* Q15 */ Word16 lt_tn_dist_fx; /* Q8*/ @@ -2270,7 +2270,7 @@ typedef struct enc_core_structure float preemph_fac_flt; /* Preemphasis factor */ float gamma_flt; - Word16 preemph_fac; /*Preemphasis factor*/ + Word16 preemph_fac; /*Preemphasis factor Q15*/ Word16 gamma; // Q15 Word16 inv_gamma; @@ -2991,4 +2991,5 @@ typedef struct context_rc_mem_struct } RC_CONTEXT_MEM, *HANDLE_RC_CONTEXT_MEM; + #endif diff --git a/lib_enc/swb_tbe_enc_fx.c b/lib_enc/swb_tbe_enc_fx.c index dc8b0870435524beeb065101f8b3bd3166e5c075..b00f385cf3a3b2248336b890f1957d5e83ea2129 100644 --- a/lib_enc/swb_tbe_enc_fx.c +++ b/lib_enc/swb_tbe_enc_fx.c @@ -3582,7 +3582,7 @@ void swb_tbe_enc_ivas_fx( FOR( i = 0; i < LPC_SHB_ORDER; i++ ) { - hBWE_TD->mem_stp_swb_fx[i] = shl( hBWE_TD->mem_stp_swb_fx[i], sub( Q_bwe_exc, st_fx->prev_Q_bwe_syn ) ); + hBWE_TD->mem_stp_swb_fx[i] = shl_sat( hBWE_TD->mem_stp_swb_fx[i], sub( Q_bwe_exc, st_fx->prev_Q_bwe_syn ) ); move16(); } diff --git a/lib_enc/transient_detection.c b/lib_enc/transient_detection.c index 39d33b7b0e7bc413c0198ff4c757d7bb50120054..fd80f29e6041569cef1a582400d9a675fb2c9934 100644 --- a/lib_enc/transient_detection.c +++ b/lib_enc/transient_detection.c @@ -45,6 +45,7 @@ #ifdef IVAS_FLOAT_FIXED #include "prot_fx.h" +#include "ivas_prot_fx.h" #endif /*---------------------------------------------------------------* * Local constants @@ -767,7 +768,73 @@ static void CalculateSubblockEnergies( * * *-------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +void set_transient_stereo_fx( + CPE_ENC_HANDLE hCPE, /* i : CPE structure */ + Word16 currFlatness[] /* i/o: current flatness */ +) +{ + Word16 n, attackIsPresent; + Word16 currFlatnessMax; + Encoder_State **sts; + + sts = hCPE->hCoreCoder; + + /* for DFT/TD based stereo ,map avg. flatness to individual stereo channels (M/S or X/Y) */ + maximum_fx( currFlatness, CPE_CHANNELS, &currFlatnessMax ); + attackIsPresent = 0; + move16(); + + FOR( n = 0; n < CPE_CHANNELS; n++ ) + { + attackIsPresent = s_max( attackIsPresent, sts[n]->hTranDet->transientDetector.bIsAttackPresent ); + } + + set16_fx( currFlatness, currFlatnessMax, CPE_CHANNELS ); + + FOR( n = 0; n < CPE_CHANNELS; n++ ) + { + sts[n]->hTranDet->transientDetector.bIsAttackPresent = attackIsPresent; + move16(); + } + + IF( hCPE->hStereoDft != NULL ) + { + IF( hCPE->hStereoDft->attackPresent ) + { + hCPE->hStereoDft->wasTransient = 1; + move16(); + } + ELSE IF( hCPE->hStereoDft->wasTransient ) + { + hCPE->hStereoDft->wasTransient = 0; + move16(); + } + + hCPE->hStereoDft->attackPresent = attackIsPresent; + move16(); + + hCPE->hStereoDft->hItd->currFlatness_fx = 0; + move16(); + FOR( n = 0; n < CPE_CHANNELS; n++ ) + { + hCPE->hStereoDft->hItd->currFlatness_fx = s_max( hCPE->hStereoDft->hItd->currFlatness_fx, currFlatness[n] ); + } + } + + IF( hCPE->hStereoMdct != NULL ) + { + hCPE->hStereoMdct->hItd->currFlatness_fx = 0; + move16(); + FOR( n = 0; n < CPE_CHANNELS; n++ ) + { + hCPE->hStereoMdct->hItd->currFlatness_fx = s_max( hCPE->hStereoMdct->hItd->currFlatness_fx, currFlatness[n] ); + } + } + return; +} +#else void set_transient_stereo( CPE_ENC_HANDLE hCPE, /* i : CPE structure */ float currFlatness[] /* i/o: current flatness */ @@ -826,7 +893,7 @@ void set_transient_stereo( return; } - +#endif /*-------------------------------------------------------------------* * transient_analysis() * diff --git a/lib_enc/vad_fx.c b/lib_enc/vad_fx.c index 427dfa2e78af38ced22961ac9b69a13c0661fbcf..fbb3f5161831f9732c1519c3ca67bdc3eeab5d9a 100644 --- a/lib_enc/vad_fx.c +++ b/lib_enc/vad_fx.c @@ -532,7 +532,6 @@ Word16 ivas_dtx_hangover_addition_fx( hangover_short_dtx = sub( hangover_short_dtx, cldfb_subtraction ); hangover_short_dtx = s_max( hangover_short_dtx, 0 ); } - IF( vad_flag != 0 ) /* Speech present */ { flag_dtx = 1; @@ -619,6 +618,7 @@ Word16 ivas_dtx_hangover_addition_fx( test(); + IF( flag_dtx != 0 && st_fx->localVAD == 0 ) { *vad_hover_flag_ptr = 1;