From 6100594a951a3c9a4c877e2c1d1328d00f824e4f Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Mon, 30 Dec 2024 12:44:03 +0530 Subject: [PATCH] Integration of ivas_analy_sp_fx in front_vad_fx, Bug fixes and Q-documentation --- Workspace_msvc/lib_enc.vcxproj | 2 +- Workspace_msvc/lib_enc.vcxproj.filters | 6 +- lib_com/cnst.h | 1 + lib_com/ivas_prot_fx.h | 12 +- lib_com/prot.h | 6 - lib_com/prot_fx.h | 18 +- lib_com/tools_fx.c | 15 + lib_enc/analy_sp_fx.c | 682 ++---------------- lib_enc/cod_uv_fx.c | 8 +- lib_enc/comvad_decision_fx.c | 62 +- lib_enc/core_enc_2div_fx.c | 14 +- lib_enc/core_enc_init.c | 239 +----- lib_enc/core_enc_init_fx.c | 62 +- lib_enc/core_enc_ol_fx.c | 135 ++-- lib_enc/core_enc_updt.c | 2 +- lib_enc/ext_sig_ana_fx.c | 2 - lib_enc/igf_enc.c | 9 +- lib_enc/ivas_core_enc.c | 11 - lib_enc/ivas_core_pre_proc_front.c | 69 +- lib_enc/ivas_cpe_enc.c | 25 +- lib_enc/ivas_enc.c | 7 +- lib_enc/ivas_front_vad.c | 112 ++- lib_enc/ivas_init_enc.c | 3 + lib_enc/ivas_mct_enc.c | 12 - lib_enc/ivas_mct_enc_mct.c | 7 +- lib_enc/ivas_rom_enc.h | 19 +- lib_enc/{ivas_rom_enc.c => ivas_rom_enc_fx.c} | 59 +- lib_enc/ivas_stat_enc.h | 189 +++-- lib_enc/ivas_stereo_mdct_core_enc.c | 2 +- lib_enc/nois_est_fx.c | 110 +++ lib_enc/prot_fx_enc.h | 49 +- lib_enc/tcx_utils_enc.c | 14 +- lib_enc/tcx_utils_enc_fx.c | 2 +- lib_enc/vad_fx.c | 48 +- 34 files changed, 701 insertions(+), 1312 deletions(-) rename lib_enc/{ivas_rom_enc.c => ivas_rom_enc_fx.c} (98%) diff --git a/Workspace_msvc/lib_enc.vcxproj b/Workspace_msvc/lib_enc.vcxproj index 384fb8bb1..6a96e575a 100644 --- a/Workspace_msvc/lib_enc.vcxproj +++ b/Workspace_msvc/lib_enc.vcxproj @@ -229,6 +229,7 @@ + @@ -331,7 +332,6 @@ - diff --git a/Workspace_msvc/lib_enc.vcxproj.filters b/Workspace_msvc/lib_enc.vcxproj.filters index 2476cdb6c..73bd7c7bb 100644 --- a/Workspace_msvc/lib_enc.vcxproj.filters +++ b/Workspace_msvc/lib_enc.vcxproj.filters @@ -436,9 +436,6 @@ enc_evs_c - - enc_ivas_c - enc_ivas_c @@ -1013,6 +1010,9 @@ enc_evs_c + + enc_ivas_c + diff --git a/lib_com/cnst.h b/lib_com/cnst.h index bf6e305d1..fe16db670 100644 --- a/lib_com/cnst.h +++ b/lib_com/cnst.h @@ -2774,6 +2774,7 @@ enum #define MU_MA_FX 10923 /* original prediction factor for the AMR WB tables (Q15) */ #define E_MIN_FXQ15 115 /* Q15*/ +#define E_MIN_FXQ31 7516193 /* 0.0035d in Q31*/ #define MAX_DYNAMIC_FX (82*128) #define MIN_DYNAMIC_FX (50*128) diff --git a/lib_com/ivas_prot_fx.h b/lib_com/ivas_prot_fx.h index 7882f0482..8726e13d8 100644 --- a/lib_com/ivas_prot_fx.h +++ b/lib_com/ivas_prot_fx.h @@ -4564,13 +4564,17 @@ ivas_error front_vad_fx( const Word16 MCT_flag, /* i : hMCT handle allocated (1) or not (0) */ const Word16 input_frame, /* i : frame length */ Word16 vad_flag_dtx[], /* o : HE-SAD flag with additional DTX HO */ - Word32 fr_bands_fx[][2 * NB_BANDS], /* o : energy in frequency bands Q_buffer[n] + QSCALE + 2 */ + Word32 fr_bands_fx[][2 * NB_BANDS], /* o : energy in frequency bands q_fr_bands_fx */ + Word16 q_fr_bands[], /* o : Q of fr_bands_fx Q0 */ Word16 Etot_LR_fx[], /* o : total energy Left & Right channel Q8 */ - Word32 lf_E_fx[][2 * VOIC_BINS], /* i : per bin spectrum energy in lf, LR channels Q_buffer[n] + QSCALE */ + Word32 lf_E_fx[][2 * VOIC_BINS], /* i : per bin spectrum energy in lf, LR channels q_lf_E */ + Word16 q_lf_E[], /* o : Q of lf_E_fx */ Word16 localVAD_HE_SAD[], /* o : HE-SAD flag without hangover, LR channels */ Word16 vad_hover_flag[], /* o : VAD hangover flag */ - Word32 band_energies_LR_fx[2 * NB_BANDS], /* o : energy in critical bands without minimum noise floor E_MIN Q_buffer[1] + QSCALE + 2 - band_ener_guardbits*/ - Word32 *PS_out_fx, /* o : energy spectrum Q_buffer + QSCALE */ + Word32 band_energies_LR_fx[2 * NB_BANDS], /* o : energy in critical bands without minimum noise floor E_MIN q_band_energies_LR */ + Word16 *q_band_energies_LR, /* o : Q of band_energies_LR_fx */ + Word32 *PS_out_fx, /* o : energy spectrum q_PS_out */ + Word16 *q_PS_out, /* o : Q of PS_out_fx Q0 */ Word16 *Bin_E_out_fx, /* o : log-energy spectrum of the current frame Q7 */ Word16 Q_inp, Word16 *Q_buffer, diff --git a/lib_com/prot.h b/lib_com/prot.h index c55786ae4..5cfa2cd29 100644 --- a/lib_com/prot.h +++ b/lib_com/prot.h @@ -5666,12 +5666,6 @@ void adapt_lag_wind_fx( const int32_t sr_core /* i : core sampling rate */ ); -void init_coder_ace_plus( - Encoder_State *st, /* i : Encoder state handle */ - const int32_t last_total_brate, /* i : last total bitrate */ - const int16_t MCT_flag /* i : hMCT handle allocated (1) or not (0)*/ -); - void core_coder_reconfig( Encoder_State *st, /* i/o: encoder state structure */ const int32_t last_total_brate /* i : last total bitrate */ diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index 75b01b759..715c49ae1 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -206,6 +206,12 @@ void Scale_sig( const Word16 exp0 /* i : exponent: x = round(x << exp) Qx ?exp */ ); +void scale_sig( + Word16 x[], /* i/o: signal to scale Qx */ + const Word16 lg, /* i : size of x[] Q0 */ + const Word16 exp0 /* i : exponent: x = round(x << exp) Qx ?exp */ +); + // tools.c Word32 sum2_fx( /* o : sum of all squared vector elements Q(2x+1)*/ const Word16 *vec, /* i : i vector Qx*/ @@ -10785,9 +10791,11 @@ Word32 sum2_32_fx( Word16 *e ); void ProcessStereoIGF_fx( STEREO_MDCT_ENC_DATA_HANDLE hStereoMdct, - Encoder_State *sts[CPE_CHANNELS], /* i : Encoder state */ - Word16 ms_mask[2][MAX_SFB], /* i : bandwise MS mask */ - Word32 *pITFMDCTSpectrum_fx[CPE_CHANNELS][NB_DIV], /* i : MDCT spectrum fir ITF */ + Encoder_State *sts[CPE_CHANNELS], /* i : Encoder state */ + Word16 ms_mask[2][MAX_SFB], /* i : bandwise MS mask */ + Word32 *pITFMDCTSpectrum_fx[CPE_CHANNELS][NB_DIV], /* i : MDCT spectrum fir ITF */ + Word16 q_pITFMDCTSpectrum_1, + Word16 q_pITFMDCTSpectrum_2, Word32 *pPowerSpectrum_fx[CPE_CHANNELS], /* i/o: MDCT^2 + MDST^2 spectrum, or estimate */ Word32 *pPowerSpectrumMsInv_fx[CPE_CHANNELS][NB_DIV], /* i : inverse power spectrum */ Word32 *inv_spectrum_fx[CPE_CHANNELS][NB_DIV], /* i : inverse spectrum */ @@ -10813,8 +10821,8 @@ void IGFEncApplyStereo_fx( void IGFSaveSpectrumForITF_ivas_fx( IGF_ENC_INSTANCE_HANDLE hIGFEnc, /* i/o: instance handle of IGF Encoder */ const Word16 igfGridIdx, /* i : IGF grid index */ - const Word32 *pITFSpectrum /* i : MDCT spectrum */ -); + const Word32 *pITFSpectrum, /* i : MDCT spectrum */ + Word16 exp_pITFSpectrum ); Word16 IGFEncWriteBitstream_ivas_fx( const IGF_ENC_INSTANCE_HANDLE hIGFEnc, /* i : instance handle of IGF Encoder */ BSTR_ENC_HANDLE hBstr, /* i/o: encoder bitstream handle */ diff --git a/lib_com/tools_fx.c b/lib_com/tools_fx.c index 62766e3b7..dbef25ccb 100644 --- a/lib_com/tools_fx.c +++ b/lib_com/tools_fx.c @@ -1021,6 +1021,21 @@ void Scale_sig( } } +void scale_sig( + Word16 x[], /* i/o: signal to scale Qx */ + const Word16 lg, /* i : size of x[] Q0 */ + const Word16 exp0 /* i : exponent: x = round(x << exp) Qx ?exp */ +) +{ + Word16 i; + + FOR( i = 0; i < lg; i++ ) + { + x[i] = shl( x[i], exp0 ); + move16(); + } +} + /*---------------------------------------------------------------------* * mean() * diff --git a/lib_enc/analy_sp_fx.c b/lib_enc/analy_sp_fx.c index 8ad417eee..c07d8ba6d 100644 --- a/lib_enc/analy_sp_fx.c +++ b/lib_enc/analy_sp_fx.c @@ -18,8 +18,7 @@ *-------------------------------------------------------------------*/ static void find_enr( Word16 data[], Word32 band[], Word32 *ptE, Word32 *LEtot, const Word16 min_band, const Word16 max_band, const Word16 Q_new2, const Word32 e_min, Word32 *Bin_E, Word16 BIN_FREQ_FX, Word32 *band_energies ); -static void ivas_find_enr( Word16 data[], Word32 band[], Word32 *ptE, Word32 *LEtot, const Word16 min_band, const Word16 max_band, const Word16 Q_new2, const Word32 e_min, Word32 *Bin_E, Word16 BIN_FREQ_FX, Word32 *band_energies ); -static void ivas_find_enr1( Word16 *data, Word16 q_data, Word32 *band, Word16 *q_band, Word32 *ptE, Word16 *q_ptE, Word64 *LEtot, const Word16 min_band, const Word16 max_band, const Word16 Q_new, const Word32 e_min, Word32 *Bin_E, Word16 BIN_FREQ_FX, Word32 *band_energies ); +static void ivas_find_enr( Word16 *data, Word16 q_data, Word32 *band, Word16 *q_band, Word32 *ptE, Word16 *q_ptE, Word64 *LEtot, const Word16 min_band, const Word16 max_band, Word32 *Bin_E, Word16 BIN_FREQ_FX, Word32 *band_energies ); #ifdef IVAS_CODE_CPE static void find_enr_dft( CPE_ENC_HANDLE hCPE, const int32_t input_Fs, float DFT_past_DMX[], float band[], float *ptE, float *Etot, const int16_t min_band, const int16_t max_band, float *Bin_E, float *band_ener ); #endif @@ -200,220 +199,6 @@ void analy_sp_fx( * * find input signal energy for each critical band using the DFT buffers *------------------------------------------------------------------------*/ - -static void find_enr_dft_fx( - CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ - const Word32 input_Fs, /* i : input sampling rate */ - Word32 DFT_past_DMX_fx[], /* i:input DFT_Dmx (Q_inp_dmx ) */ - Word32 band_fx[], /* o : per band energy */ - Word32 *ptE_fx, /* o : per bin energy for low frequencies */ - Word32 *Etot_fx, /* i/o: total energy (Q8) */ - const Word16 min_band, /* i : minimum critical band */ - const Word16 max_band, /* i : maximum critical band */ - Word32 *Bin_E_fx, /* o : Per bin energy (Q7) */ - Word32 *band_ener_fx, /* o : per band energy without E_MIN (Qout) */ - Word16 Q_inp_dmx, - Word16 *Qout ) -{ - Word16 i, cnt; - Word32 tmp_fx; - Word32 freq; - const Word32 *ptR_fx, *ptI_fx; - Word32 norm_val_fx; - Word16 bin_cnt = 0; - move16(); - // Word32 band_ener_fx[2 * NB_BANDS]; - // Etot_fx[0] = Etot[0] * ONE_IN_Q8; - Word32 c_fx, s_fx; - /* One window - 40ms*12.8kHz = 512 samples */ - Word32 c_1_fx = 2147321984; // cosf( PI2 / STEREO_DFT_N_12k8_ENC ) * ONE_IN_Q31; - Word32 s_1_fx = 26352928; // sinf( PI2 / STEREO_DFT_N_12k8_ENC ) * ONE_IN_Q31; - Word32 g_1_fx = 1570240000; // ( 1.f + 0.68f * 0.68f ) * ONE_IN_Q30; - Word32 g_2_fx = 1460288896; // 2 * 0.68f * ONE_IN_Q30; - move32(); - move32(); - move32(); - move32(); - - Word32 g_fx; - Word32 scaleWin_fx; - - Word32 BinE_fx[STEREO_DFT_N_12k8_ENC / 2]; /* NB_BANDS = 20 (= 6350Hz) = highest band available for SR 12.8 -> bin_cnt = 158 */ - Word16 tmp16, scale = 0; - tmp16 = BASOP_Util_Divide3216_Scale( input_Fs, hCPE->hStereoDft->NFFT, &scale ); - tmp16 = shr( tmp16, negate( add( 1, scale ) ) ); - Word16 bin_freq = tmp16; // input_Fs / (float) hCPE->hStereoDft->NFFT; /* adaptive frequency bin width */ - - // scaleWin = 1 / ( 2 * hCPE->hStereoDft->win_ana_energy ); - // scaleWin_fx = ONE_IN_Q31/ ( hCPE->hStereoDft->win_ana_energy_fx ); - scaleWin_fx = BASOP_Util_Divide3216_Scale( ONE_IN_Q31, hCPE->hStereoDft->win_ana_energy_fx, &scale ); // q = 16-scale - Word16 scalewin_q = sub( 16, scale ); - tmp16 = BASOP_Util_Divide3216_Scale( BIN, bin_freq, &scale ); - tmp16 = shr( tmp16, negate( add( 1, scale ) ) ); - Word16 x_fx = tmp16; - move16(); - scaleWin_fx = L_mult0( x_fx, extract_l( scaleWin_fx ) ); // scalewin_q - // scaleWin *= (float) BIN / bin_freq; - // norm_val = scaleWin * 4.0f / ( hCPE->hStereoDft->NFFT * hCPE->hStereoDft->NFFT ); - norm_val_fx = L_deposit_l( BASOP_Util_Divide3232_Scale( L_shl( scaleWin_fx, Q2 ), L_mult0( hCPE->hStereoDft->NFFT, hCPE->hStereoDft->NFFT ), &scale ) ); - norm_val_fx = L_shl( norm_val_fx, Q31 - ( scalewin_q + 15 - scale ) ); // q31 - - ptR_fx = &DFT_past_DMX_fx[2]; /* first real */ - ptI_fx = &DFT_past_DMX_fx[3]; /* first imaginary */ - move32(); - move32(); - c_fx = c_1_fx; - s_fx = s_1_fx; - move32(); - move32(); - - /* for low frequency bins, save per bin energy for the use in find_tilt() */ - freq = bin_freq; - move32(); - FOR( i = 0; i < NB_BANDS - 1; i++ ) /* up to maximum allowed voiced critical band */ - { - band_fx[i] = 0; - move32(); - cnt = 0; - move16(); - /* bins up to crit_band 17 (<= 3700 Hz): - * bin_cnt old (bin_width 50 Hz): 74 (74 * FRAMES_PER_SEC = 3700) - * bin_cnt new (bin_width 40 Hz): 92 (92 * 40 = 3680) - */ - tmp16 = BASOP_Util_Divide3216_Scale( L_sub( crit_bands_fx[i], freq ), bin_freq, &scale ); - tmp16 = shr( tmp16, negate( add( 1, scale ) ) ); - Word32 freq_diff = L_deposit_l( tmp16 ); - Word16 guarded_bits; - guarded_bits = find_guarded_bits_fx( freq_diff ); - s_fx = L_shr( s_fx, guarded_bits ); - c_fx = L_shr( c_fx, guarded_bits ); - WHILE( LE_32( freq, crit_bands_fx[i] ) ) - { - Word64 te = ( W_add( W_mult0_32_32( *ptR_fx, *ptR_fx ), W_mult0_32_32( *ptI_fx, *ptI_fx ) ) ); - Word16 te_exp = W_norm( te ); - te = W_shl( te, te_exp ); // 2 * Q_inp_dmx + te_exp - BinE_fx[bin_cnt] = W_extract_h( te ); // 2 * Q_inp_dmx + te_exp - 32 - move32(); - g_fx = L_sub( L_shr( g_1_fx, guarded_bits ), Mpy_32_32( g_2_fx, c_fx ) ); // 30 - guarded_bits - tmp_fx = L_sub( Mpy_32_32( c_fx, c_1_fx ), Mpy_32_32( s_fx, s_1_fx ) ); // 31 - guarded_bits - s_fx = L_add( Mpy_32_32( s_fx, c_1_fx ), Mpy_32_32( c_fx, s_1_fx ) ); // 31 - guarded_bits - c_fx = tmp_fx; // 31 - guarded_bits - move32(); - - Word64 ngmult = W_mult0_32_32( norm_val_fx, g_fx ); // Q31 + Q30 - gaurded_bits - Word16 ngmult_exp = W_norm( ngmult ); - ngmult = W_shl( ngmult, ngmult_exp ); // Q31 + Q30 - gaurded_bits + ngmult_exp - BinE_fx[bin_cnt] = Mpy_32_32( W_extract_h( ngmult ), BinE_fx[bin_cnt] ); //(Q31 + Q31 - gaurded_bits + ngmult_exp) - Q32 + (2 * Q_inp_dmx + te_exp - 32) - 31 - move32(); - // To Be Checked - BinE_fx[bin_cnt] = L_shl_sat( BinE_fx[bin_cnt], sub( *Qout, sub( sub( add( add( shl( Q_inp_dmx, 1 ), te_exp ), ngmult_exp ), 34 ), guarded_bits ) ) ); - move32(); - // To Be Checked - band_fx[i] = L_add_sat( BinE_fx[bin_cnt], band_fx[i] ); - move32(); - ptR_fx += 2; - ptI_fx += 2; - freq = L_add( freq, L_deposit_l( bin_freq ) ); - cnt = add( cnt, 1 ); - bin_cnt = add( bin_cnt, 1 ); - } - s_fx = L_shl( s_fx, guarded_bits ); - c_fx = L_shl( c_fx, guarded_bits ); - band_fx[i] = Mpy_32_16_r( band_fx[i], inv_tbl_fx[cnt] ); /* normalization per frequency bin */ - move32(); - band_ener_fx[i] = band_fx[i]; /* per band energy without E_MIN */ - move32(); - IF( LT_32( band_fx[i], L_shl( E_MIN_Q11_FX, sub( *Qout, Q11 ) ) ) ) - { - band_fx[i] = L_shl( E_MIN_Q11_FX, sub( *Qout, Q11 ) ); - move32(); - } - } - - /* continue computing the energy per critical band for higher frequencies */ - - /* old version, FFT 256 @ SR12.8 (-> bin_width = 50 Hz): - NB_BANDS = 20 (= 6350Hz) = highest band available for SR 12.8 -> bin_cnt = 127 = L_FFT/2-1*/ - - /* new version: DFT (1200/800/400) @ input SR (48/32/16) (-> bin_width = 40 Hz): - * - */ - /* NB_BANDS = 20 (= 6350Hz) = highest band available for SR 12.8 -> bin_cnt = 158 */ - /* NB_BANDS = 21 (= 7700Hz) = highest band available for SR 16 -> bin_cnt = 192 */ - /* NB_BANDS = 24 (= 15500Hz) = highest band available for SR 32 -> bin_cnt = 387 */ - /* NB_BANDS = 24 (= 15500Hz) = highest band available for SR 48 -> bin_cnt = 387 */ - - for ( ; i < NB_BANDS; i++ ) - { - band_fx[i] = 0; - move32(); - cnt = 0; - move16(); - tmp16 = BASOP_Util_Divide3216_Scale( L_sub( 6399, freq ), bin_freq, &scale ); - tmp16 = shr( tmp16, negate( add( 1, scale ) ) ); - Word32 freq_diff = L_deposit_l( tmp16 ); - Word16 guarded_bits; - guarded_bits = find_guarded_bits_fx( freq_diff ); - c_fx = L_shr( c_fx, guarded_bits ); - WHILE( LT_32( freq, 6399 ) ) - { - Word64 te = ( W_add( W_mult0_32_32( *ptR_fx, *ptR_fx ), W_mult0_32_32( *ptI_fx, *ptI_fx ) ) ); - Word16 te_exp = W_norm( te ); - te = W_shl( te, te_exp ); // 2 * Q_inp_dmx + te_exp - BinE_fx[bin_cnt] = W_extract_h( te ); // 2 * Q_inp_dmx + te_exp - 32 - move32(); - g_fx = L_sub( L_shr( g_1_fx, guarded_bits ), Mpy_32_32( g_2_fx, c_fx ) ); - - Word64 ngmult = W_mult0_32_32( norm_val_fx, g_fx ); // Q31 + Q31 - gaurded_bits - Word16 ngmult_exp = W_norm( ngmult ); - ngmult = W_shl( ngmult, ngmult_exp ); // Q31 + Q31 - gaurded_bits + ngmult_exp - BinE_fx[bin_cnt] = Mpy_32_32( W_extract_h( ngmult ), BinE_fx[bin_cnt] ); //(Q31 + Q31 - gaurded_bits + ngmult_exp) - Q32 + (2 * Q_inp_dmx + te_exp - 32) - 31 - move32(); - // To Be Checked - BinE_fx[bin_cnt] = L_shl_sat( BinE_fx[bin_cnt], sub( *Qout, sub( sub( add( add( shl( Q_inp_dmx, 1 ), te_exp ), ngmult_exp ), 34 ), guarded_bits ) ) ); - move32(); - // To Be Checked - band_fx[i] = L_add_sat( BinE_fx[bin_cnt], band_fx[i] ); - move32(); - ptR_fx += 2; - ptI_fx += 2; - freq = L_add( freq, L_deposit_l( bin_freq ) ); - cnt = add( cnt, 1 ); - bin_cnt = add( bin_cnt, 1 ); - } - c_fx = L_shl( c_fx, guarded_bits ); - band_fx[i] = Mpy_32_16_r( band_fx[i], inv_tbl_fx[cnt] ); - move32(); - band_ener_fx[i] = band_fx[i]; - move32(); - IF( LT_32( band_fx[i], L_shl( E_MIN_Q11_FX, sub( *Qout, Q11 ) ) ) ) - { - band_fx[i] = L_shl( E_MIN_Q11_FX, sub( *Qout, Q11 ) ); - move32(); - } - } - - /* put bin energies from BinE into Bin_E[L_FFT/2-1] (interpolate 40 Hz bin values to fit into 50 Hz bins) */ - /* Last value of Bin_E is handled outside this function*/ - assert( bin_cnt == ( STEREO_DFT_N_12k8_ENC / 2 - 1 ) ); - BinE_fx[STEREO_DFT_N_12k8_ENC / 2 - 1] = BinE_fx[STEREO_DFT_N_12k8_ENC / 2 - 2]; - move32(); - L_lerp_fx( BinE_fx, Bin_E_fx, L_FFT / 2, STEREO_DFT_N_12k8_ENC / 2, Qout ); - MVR2R_WORD32( Bin_E_fx, ptE_fx, VOIC_BINS ); // Qout - - /* find the total log energy */ - tmp_fx = *Etot_fx; - move32(); - FOR( i = min_band; i <= max_band; i++ ) - { - tmp_fx = L_add( tmp_fx, band_fx[i] ); // Qout - } - *Etot_fx = tmp_fx; - move32(); - - return; -} - static void find_enr_dft_ivas_fx( CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ const Word32 input_Fs, /* i : input sampling rate */ @@ -502,12 +287,12 @@ static void find_enr_dft_ivas_fx( IF( GT_16( *q_band, 39 ) ) { + shift = sub( *q_band, 39 ); *q_band = 39; move16(); - shift = sub( *q_band, 39 ); } - min_ener = L_shl( 7516193 /* 0.0035 in Q31 */, sub( *q_band, 31 ) ); + min_ener = L_shl( E_MIN_FXQ31 /* 0.0035 in Q31 */, sub( *q_band, 31 ) ); FOR( i = 0; i < NB_BANDS - 1; i++ ) /* up to maximum allowed voiced critical band */ { @@ -648,152 +433,7 @@ static void find_enr_dft_ivas_fx( * * Spectral analysis of 12.8kHz input *-------------------------------------------------------------------*/ - void ivas_analy_sp_fx( - const Word16 element_mode, /* i : element mode */ - CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ - const Word32 input_Fs, /* i : input sampling rate */ - Word16 *speech, /* i : speech buffer Q_new - preemph_bits */ - const Word16 Q_new, /* i : current scaling exp Q0 */ - Word32 *fr_bands, /* o : energy in critical frequency bands Q_new + QSCALE */ - Word32 *lf_E, /* o : per bin E for first... Q_new + QSCALE - 2*/ - Word16 *Etot, /* o : total input energy Q8 */ - const Word16 min_band, /* i : minimum critical band Q0 */ - const Word16 max_band, /* i : maximum critical band Q0 */ - const Word32 e_min_scaled, /* i : minimum energy scaled Q_new + QSCALE */ - Word16 Scale_fac[2], /* o : FFT scales factors (2 values by frame) Q0 */ - Word32 *Bin_E, /* o : per-bin energy spectrum Q7 */ - Word32 *Bin_E_old, /* o : per-bin energy spectrum of the previous frame Q7 */ - Word32 *PS, /* o : per-bin energy spectrum Q_new + QSCALE */ - Word16 *EspecdB, /* o : per-bin log energy spectrum (with f=0) Q7 */ - Word32 *band_energies, /* o : energy in critical frequency bands without minimum noise floor MODE2_E_MIN (Q_new + QSCALE + 2)*/ - Word16 *fft_buff /* o : FFT coefficients (Q_new + QSCALE + 2) */ -) -{ - Word16 *pt; - Word16 i_subfr, i; - Word32 *pt_bands; - Word32 Ltmp, LEtot; - Word16 *pt_fft; - Word16 Min_val, Max_val; - Word16 Scale_fac2; - Word16 fft_temp[L_FFT]; - - (void) input_Fs; - /*-----------------------------------------------------------------* - * Compute spectrum - * find energy per critical frequency band and total energy in dB - *-----------------------------------------------------------------*/ - - pt_bands = fr_bands; - pt_fft = fft_buff; - LEtot = L_deposit_l( 0 ); - IF( NE_16( element_mode, IVAS_CPE_DFT ) ) - { - FOR( i_subfr = 0; i_subfr <= 1; i_subfr++ ) - { - pt = speech + 3 * ( L_SUBFR / 2 ) - L_FFT / 2; - IF( i_subfr != 0 ) - { - pt = speech + 7 * ( L_SUBFR / 2 ) - L_FFT / 2; - } - - /* Clear 1st value of 1st part, copy 1st value of 2nd part */ - fft_temp[0] = 0; - move16(); - fft_temp[L_FFT / 2] = pt[L_FFT / 2]; - move16(); - Max_val = s_max( fft_temp[0], fft_temp[L_FFT / 2] ); - Min_val = s_min( fft_temp[0], fft_temp[L_FFT / 2] ); - - FOR( i = 1; i < L_FFT / 2; i++ ) - { - /* 1st windowed part */ - fft_temp[i] = mult_r( pt[i], sqrt_han_window_fx[i] ); - move16(); - if ( fft_temp[i] > 0 ) - Max_val = s_max( Max_val, fft_temp[i] ); - if ( fft_temp[i] < 0 ) - Min_val = s_min( Min_val, fft_temp[i] ); - - /* 2nd windowed part */ - fft_temp[L_FFT - i] = mult_r( pt[L_FFT - i], sqrt_han_window_fx[i] ); - move16(); - if ( fft_temp[L_FFT - i] > 0 ) - Max_val = s_max( Max_val, fft_temp[L_FFT - i] ); - if ( fft_temp[L_FFT - i] < 0 ) - Min_val = s_min( Min_val, fft_temp[L_FFT - i] ); - } - - /* Combine -Min_val and Max_val into one */ - Max_val = s_max( negate( Min_val ), Max_val ); - - Scale_fac[i_subfr] = s_min( sub( norm_s( Max_val ), 1 ), 6 ); - move16(); - Scale_fac2 = shl( Scale_fac[i_subfr], 1 ); - Scale_sig( fft_temp, L_FRAME_12k8, Scale_fac[i_subfr] ); - - r_fft_fx_lc( FFT_W128, SIZE_256, SIZE2_256, NUM_STAGE_256, fft_temp, pt_fft, 1 ); - Scale_sig( pt_fft, L_FFT, -1 ); // Q(-1) - /*e_min_scaled = Q_new + QSCALE + 2*/ - ivas_find_enr( pt_fft, pt_bands, lf_E + i_subfr * VOIC_BINS, &LEtot, min_band, max_band, - add( Q_new, Scale_fac2 ), e_min_scaled, &Bin_E[i_subfr * L_FFT / 2], BIN, band_energies + i_subfr * NB_BANDS ); - pt_bands += NB_BANDS; - pt_fft += L_FFT; - } - LEtot = L_shl_sat( LEtot, 2 ); // Q_new + Q_SCALE - 2 - } - ELSE - { - Word16 Qout = add( Q_new, QSCALE - 2 ); - find_enr_dft_fx( hCPE, input_Fs, hCPE->hStereoDft->DFT_fx[0], pt_bands, lf_E, &LEtot, min_band, max_band, Bin_E, band_energies, sub( Q31, hCPE->hStereoDft->DFT_fx_e[0] ), &Qout ); - MVR2R_WORD32( lf_E, lf_E + VOIC_BINS, VOIC_BINS ); - MVR2R_WORD32( Bin_E, Bin_E + ( L_FFT / 2 ), L_FFT / 2 ); - MVR2R_WORD32( band_energies, band_energies + NB_BANDS, NB_BANDS ); - MVR2R_WORD32( pt_bands, pt_bands + NB_BANDS, NB_BANDS ); - LEtot = L_shl( LEtot, 1 ); - } - Word32 temp32_log; - temp32_log = L_add( BASOP_Util_Log2( L_shr( LEtot, 1 ) ), L_shl( sub( Q31, add( Q_new, QSCALE - Q2 ) ), Q25 ) ); - temp32_log = Mpy_32_32( temp32_log, 1616142483 ); // log10(x) = log2(x)/log2(10) - /* 10.0 * log10( (float) tmp )*/ - /* 10.0/log2(10) in Q29 = 1616142483*/ - *Etot = extract_l( L_shr( temp32_log, 23 - 8 ) ); // Q8 - - Bin_E[L_FFT / 2 - 1] = Bin_E[L_FFT / 2 - 2]; - move32(); - Bin_E[L_FFT - 1] = Bin_E[L_FFT - 2]; - move32(); - - /* Per-bin log-energy spectrum */ - - FOR( i = 0; i < L_FFT / 2; i++ ) - { - Bin_E_old[i] = Bin_E[i]; - move32(); - /* tmp = (input[i] + input[i+Len]+0.001f)/2.0f */ - Ltmp = L_max( L_shr( 21474, sub( Q31, add( Q_new, QSCALE ) ) ), L_add( L_shr( Bin_E[i], 1 ), L_shr( Bin_E[i + L_FFT / 2], 1 ) ) ); - if ( PS != NULL ) - { - PS[i] = Ltmp; - move32(); - } - if ( EspecdB != NULL ) - { - temp32_log = L_add( BASOP_Util_Log2( Ltmp ), L_shl( sub( Q31, Q_new + QSCALE ), Q25 ) ); - EspecdB[i] = extract_l( L_shr( Mpy_32_32( temp32_log, 1860652798 ), Q22 - Q7 ) ); /*log(x) = log2(x)*logf(2)*/ - /* 10.0*log((float)tmp) */ - /* 10.0*logf(2) in Q28 = 1860652798 */ - - move16(); - } - } - - - return; -} - -void ivas_analy_sp_fx_front( const Word16 element_mode, /* i : element mode */ CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ const Word32 input_Fs, /* i : input sampling rate */ @@ -806,8 +446,6 @@ void ivas_analy_sp_fx_front( Word16 *Etot, /* o : total input energy Q8 */ const Word16 min_band, /* i : minimum critical band Q0 */ const Word16 max_band, /* i : maximum critical band Q0 */ - const Word32 e_min_scaled, /* i : minimum energy scaled Q_new + QSCALE - 2 */ - Word16 Scale_fac[2], /* o : FFT scales factors (2 values by frame) Q0 */ Word32 *Bin_E, /* o : per-bin energy spectrum Q7 */ Word16 *q_Bin_E, /* o : per-bin energy spectrum Q7 */ Word32 *Bin_E_old, /* o : per-bin energy spectrum of the previous frame Q7 */ @@ -815,9 +453,10 @@ void ivas_analy_sp_fx_front( Word32 *PS, /* o : per-bin energy spectrum Q_new + QSCALE - 2 */ Word16 *q_PS, /* o : per-bin energy spectrum Q_new + QSCALE - 2 */ Word16 *EspecdB, /* o : per-bin log energy spectrum (with f=0) Q7 */ - Word32 *band_energies, /* o : energy in critical frequency bands without minimum noise floor MODE2_E_MIN (band_energies_exp)*/ - Word16 *band_energies_exp, /* o : exponent of energy in critical frequency bands without minimum noise floor MODE2_E_MIN */ - Word16 *fft_buff /* o : FFT coefficients (Q_new + Scale_fac[i_subfr]) */ + Word32 *band_energies, /* o : energy in critical frequency bands without minimum noise floor MODE2_E_MIN (q_band_energies)*/ + Word16 *q_band_energies, /* o : Q of energy in critical frequency bands without minimum noise floor MODE2_E_MIN */ + Word16 *fft_buff, /* o : FFT coefficients (q_fft_buff) */ + Word16 *q_fft_buff /* o : Q of FFT coefficients Q0 */ ) { Word16 *pt; @@ -842,18 +481,16 @@ void ivas_analy_sp_fx_front( IF( is_zero_arr16( speech + 3 * ( L_SUBFR / 2 ) - L_FFT / 2, L_FFT + 4 * ( L_SUBFR / 2 ) ) ) { set16_fx( pt_fft, 0, 2 * L_FFT ); - Scale_fac[0] = 0; - Scale_fac[1] = 0; - move16(); + *q_fft_buff = Q15; move16(); set32_fx( Bin_E, 0, L_FFT ); set32_fx( lf_E, 0, 2 * VOIC_BINS ); set32_fx( band_energies, 0, 2 * NB_BANDS ); - set32_fx( fr_bands, e_min_scaled, 2 * NB_BANDS ); + set32_fx( fr_bands, E_MIN_FXQ31, 2 * NB_BANDS ); // Q31 (*q_fr_bands) - LEtot = W_shl( W_mult_32_16( e_min_scaled, add( sub( max_band, min_band ), 1 ) ), 1 ); // Q_new + QSCALE - 1 - *q_fr_bands = add( Q_new, Q_SCALE - 2 ); + LEtot = W_shl( W_mult_32_16( E_MIN_FXQ31, add( sub( max_band, min_band ), 1 ) ), 1 ); // Q32 (*q_fr_bands+1) + *q_fr_bands = Q31; *q_lf_E = *q_fr_bands; move16(); move16(); @@ -862,6 +499,9 @@ void ivas_analy_sp_fx_front( { Word16 scale = norm_arr( speech + 3 * ( L_SUBFR / 2 ) - L_FFT / 2, L_FFT + 4 * ( L_SUBFR / 2 ) ); scale = sub( scale, LOG2_L_FFT ); // guard_bits + *q_fft_buff = add( Q_new, scale ); + move16(); + FOR( i_subfr = 0; i_subfr <= 1; i_subfr++ ) { /* set pointer to the beginning of the signal for spectral analysis */ @@ -890,17 +530,14 @@ void ivas_analy_sp_fx_front( move16(); } - Scale_fac[i_subfr] = scale; - move16(); - - Scale_sig( pt_fft, L_FFT, Scale_fac[i_subfr] ); + scale_sig( pt_fft, L_FFT, scale ); /* compute the spectrum */ fft_rel_fx( pt_fft, L_FFT, LOG2_L_FFT ); /* find energy per critical band */ - ivas_find_enr1( pt_fft, add( Q_new, Scale_fac[i_subfr] ), pt_bands, q_fr_bands, lf_E + i_subfr * VOIC_BINS, q_lf_E, &LEtot, min_band, max_band, - Q_new, e_min_scaled, &Bin_E[i_subfr * L_FFT / 2], BIN, band_energies + i_subfr * NB_BANDS ); + ivas_find_enr( pt_fft, *q_fft_buff, pt_bands, q_fr_bands, lf_E + i_subfr * VOIC_BINS, q_lf_E, &LEtot, min_band, max_band, + &Bin_E[i_subfr * L_FFT / 2], BIN, band_energies + i_subfr * NB_BANDS ); pt_bands += NB_BANDS; pt_fft += L_FFT; @@ -932,10 +569,10 @@ void ivas_analy_sp_fx_front( set32_fx( Bin_E, 0, L_FFT ); set32_fx( lf_E, 0, 2 * VOIC_BINS ); set32_fx( band_energies, 0, 2 * NB_BANDS ); - set32_fx( fr_bands, e_min_scaled, 2 * NB_BANDS ); + set32_fx( fr_bands, E_MIN_FXQ31, 2 * NB_BANDS ); // Q31 (*q_fr_bands) - LEtot = W_shl( W_mult_32_16( e_min_scaled, add( sub( max_band, min_band ), 1 ) ), 1 ); // Q_new + QSCALE - 1 - *q_fr_bands = add( Q_new, Q_SCALE - 2 ); + LEtot = W_shl( W_mult_32_16( E_MIN_FXQ31, add( sub( max_band, min_band ), 1 ) ), 1 ); // Q32 (*q_fr_bands+1) + *q_fr_bands = Q31; *q_lf_E = *q_fr_bands; move16(); move16(); @@ -967,7 +604,7 @@ void ivas_analy_sp_fx_front( } } - *band_energies_exp = sub( 31, *q_fr_bands ); + *q_band_energies = *q_fr_bands; move16(); exp = sub( getScaleFactor32( fr_bands, 2 * NB_BANDS ), 1 ); @@ -977,7 +614,7 @@ void ivas_analy_sp_fx_front( exp = sub( getScaleFactor32( band_energies, 2 * NB_BANDS ), 1 ); scale_sig32( band_energies, 2 * NB_BANDS, exp ); - *band_energies_exp = sub( *band_energies_exp, exp ); + *q_band_energies = add( *q_band_energies, exp ); move16(); *q_Bin_E_old = *q_Bin_E; @@ -1024,8 +661,11 @@ void ivas_analy_sp_fx_front( } } - *q_PS = *q_Bin_E; - move16(); + if ( q_PS != NULL ) + { + *q_PS = *q_Bin_E; + move16(); + } exp = L_norm_arr( Bin_E, L_FFT / 2 ); IF( GE_16( exp, sub( *q_Bin_E, Q22 ) ) ) @@ -1048,252 +688,8 @@ void ivas_analy_sp_fx_front( * find input signal energy for each critical band and first 74 LF bins * The energy is normalized by the number of frequency bins in a channel *------------------------------------------------------------------------*/ - -static void ivas_find_enr( - Word16 data[], /* i : fft result */ - Word32 band[], /* o : per band energy Q_new + QSCALE */ - Word32 *ptE, /* o : per bin energy for low frequencies Q_new + QSCALE-2 */ - Word32 *LEtot, /* o : total energy Q_new + QSCALE */ - const Word16 min_band, /* i : minimum critical band Q0 */ - const Word16 max_band, /* i : maximum critical band Q0 */ - const Word16 Q_new2, /* i : scaling factor Q0 */ - const Word32 e_min, /* i : minimum energy scaled Q_new + QSCALE */ - Word32 *Bin_E, /* o : Per bin energy Q_new + QSCALE-2 */ - Word16 BIN_FREQ_FX, /* i : Number of frequency bins */ - Word32 *band_energies /* o : per band energy without MODE2_E_MIN */ -) -{ - Word16 i, cnt, shift_to_norm; - Word16 freq, wtmp; - Word16 *ptR, *ptI, diff_scaleP1, diff_scaleM2; - Word16 exp_band; - Word32 Ltmp, Ltmp1; - Word16 voic_band; - Word32 etot; - Word16 exp_etot; - Word32 *tmpptr; -#ifdef BASOP_NOGLOB_DECLARE_LOCAL - Flag Overflow = 0; - move16(); -#endif - - - ptR = &data[1]; /* first real */ - ptI = &data[L_FFT - 1]; /* first imaginary */ - - /*-----------------------------------------------------------------------------------* - * Scaling needed by band and ptE output - * Wants all energies scaled by Q_new + QSCALE to maintain maximum - * precision on bckr noise in clean speech - * First shift left by Q_new + QSCALE than shift right by 2*Q_new-1 - * shift left (Q_new + QSCALE - (2*Q_new -1)) - * shift left (QSCALE - Q_new + 1) == shift left by (QSCALE+1) - Q_new - *-----------------------------------------------------------------------------------*/ - - diff_scaleP1 = sub( QSCALE + 1 + 1, Q_new2 ); - diff_scaleM2 = sub( QSCALE + 1 - 2, Q_new2 ); - - voic_band = VOIC_BAND_8k; - move16(); - assert( VOIC_BAND == VOIC_BAND_8k ); - - etot = L_deposit_l( 0 ); - exp_etot = 0; - move16(); - - /*-----------------------------------------------------------------* - * For low frequency bins, save per bin energy for the use - * in NS and find_tilt() - *-----------------------------------------------------------------*/ - - freq = BIN_FREQ_FX; - move16(); - FOR( i = 0; i < voic_band; i++ ) /* up to maximum allowed voiced critical band */ - { - tmpptr = Bin_E; - move16(); - Ltmp1 = L_deposit_l( 0 ); - - FOR( ; freq <= crit_bands_fx[i]; freq += BIN_FREQ_FX ) - { - /*ptE = *ptR * *ptR + *ptI * *ptI */ /* energy */ - Word64 te = ( W_add( W_mult0_32_32( *ptR, *ptR ), W_mult0_32_32( *ptI, *ptI ) ) ); // 2 * Qfft - Word16 te_exp = W_norm( te ); - te = W_shl( te, te_exp ); // 2 * Qfft + te_exp - Ltmp = W_extract_h( te ); // 2 * Qfft + te_exp - 32 - - Ltmp = L_shr( Ltmp, sub( te_exp, 32 ) ); - /* *ptE *= 4.0 / (L_FFT*L_FFT) */ - /* normalization - corresponds to FFT normalization by 2/L_FFT */ - BASOP_SATURATE_WARNING_OFF_EVS; /* saturation seems to have no effect (tested by simulation) */ -#ifdef BASOP_NOGLOB - *ptE = L_shl_o( Ltmp, diff_scaleM2, &Overflow ); -#else /* BASOP_NOGLOB */ - *ptE = L_shl( Ltmp, diff_scaleM2 ); -#endif /* BASOP_NOGLOB */ - move32(); /* scaled by Q_new + QSCALE - 2 */ - BASOP_SATURATE_WARNING_ON_EVS; - /*band[i] += *ptE++;*/ - *Bin_E = *ptE; - move32(); - Bin_E++; - Ltmp1 = L_add( Ltmp1, Ltmp ); - - ptE++; - ptR++; - ptI--; - } - - exp_band = sub( norm_l( Ltmp1 ), 1 ); /* divide by 2 to ensure band < cnt */ - wtmp = round_fx( L_shl( Ltmp1, exp_band ) ); - - /* band[i] /= cnt */ /* normalization per frequency bin */ - cnt = (Word16) ( Bin_E - tmpptr ); - shift_to_norm = norm_s( cnt ); - wtmp = div_s( wtmp, shl( cnt, shift_to_norm ) ); - Ltmp1 = L_deposit_l( wtmp ); - - exp_band = sub( exp_band, shift_to_norm ); - exp_band = sub( diff_scaleP1, exp_band ); - BASOP_SATURATE_WARNING_OFF_EVS; /* saturation seems to have no effect (tested by simulation) */ -#ifdef BASOP_NOGLOB - band[i] = L_shl_o( Ltmp1, exp_band, &Overflow ); -#else /* BASOP_NOGLOB */ - band[i] = L_shl( Ltmp1, exp_band ); -#endif /* BASOP_NOGLOB */ - move32(); /* band scaled by Q_new + QSCALE */ - BASOP_SATURATE_WARNING_ON_EVS; - - test(); - IF( GE_16( i, min_band ) && LE_16( i, max_band ) ) - { - IF( LT_32( band[i], e_min ) ) - { - Ltmp1 = L_shl( e_min, 0 ); - exp_band = 0; - move16(); - } - - wtmp = sub( exp_band, exp_etot ); - if ( wtmp > 0 ) - { - etot = L_shr( etot, wtmp ); - } - exp_etot = s_max( exp_etot, exp_band ); - etot = L_add( etot, L_shl( Ltmp1, sub( exp_band, exp_etot ) ) ); - } - - band_energies[i] = band[i]; - move32(); - - band[i] = L_max( band[i], e_min ); - move32(); - } - - IF( EQ_16( BIN_FREQ_FX, 50 ) ) - { - /*-----------------------------------------------------------------* - * Continue compute the E per critical band for high frequencies - *-----------------------------------------------------------------*/ - - FOR( i = voic_band; i < NB_BANDS; i++ ) - { - tmpptr = Bin_E; - move16(); - Ltmp1 = L_deposit_l( 0 ); - - FOR( ; freq <= crit_bands_fx[i]; freq += BIN_FREQ_FX ) - { - /* *ptE = *ptR * *ptR + *ptI * *ptI */ - Word64 te = ( W_add( W_mult0_32_32( *ptR, *ptR ), W_mult0_32_32( *ptI, *ptI ) ) ); - Word16 te_exp = W_norm( te ); - te = W_shl( te, te_exp ); - Ltmp = W_extract_h( te ); - Ltmp = L_shr( Ltmp, sub( te_exp, 32 ) ); - - /* *ptE *= 4.0 / (L_FFT*L_FFT) */ - /* normalization - corresponds to FFT normalization by 2/L_FFT */ - BASOP_SATURATE_WARNING_OFF_EVS; /* saturation seems to have no effect (tested by simulation) */ -#ifdef BASOP_NOGLOB - *Bin_E = L_shl_o( Ltmp, diff_scaleM2, &Overflow ); -#else /* BASOP_NOGLOB */ - *Bin_E = L_shl( Ltmp, diff_scaleM2 ); -#endif /* BASOP_NOGLOB */ - move32(); /* scaled by Q_new + QSCALE - 2 */ - BASOP_SATURATE_WARNING_ON_EVS; - Bin_E++; - Ltmp1 = L_add( Ltmp1, Ltmp ); - - ptR++; - ptI--; - } - - exp_band = sub( norm_l( Ltmp1 ), 1 ); /* divide by 2 to ensure band < cnt */ - wtmp = round_fx( L_shl( Ltmp1, exp_band ) ); - - /* band[i] /= cnt */ /* normalization per frequency bin */ - cnt = (Word16) ( Bin_E - tmpptr ); - shift_to_norm = norm_s( cnt ); - wtmp = div_s( wtmp, shl( cnt, shift_to_norm ) ); - Ltmp1 = L_deposit_l( wtmp ); - - exp_band = sub( exp_band, shift_to_norm ); - exp_band = sub( diff_scaleP1, exp_band ); - BASOP_SATURATE_WARNING_OFF_EVS; /* saturation seems to have no effect (tested by simulation) */ -#ifdef BASOP_NOGLOB - band[i] = L_shl_o( Ltmp1, exp_band, &Overflow ); -#else /* BASOP_NOGLOB */ - band[i] = L_shl( Ltmp1, exp_band ); -#endif - move32(); /* band scaled by Q_new + QSCALE */ - BASOP_SATURATE_WARNING_ON_EVS; - - test(); - IF( GE_16( i, min_band ) && LE_16( i, max_band ) ) - { - IF( LT_32( band[i], e_min ) ) - { - Ltmp1 = L_shl( e_min, 0 ); - exp_band = 0; - move16(); - } - - wtmp = sub( exp_band, exp_etot ); - if ( wtmp > 0 ) - { - etot = L_shr( etot, wtmp ); - } - exp_etot = s_max( exp_etot, exp_band ); - - etot = L_add( etot, L_shl( Ltmp1, sub( exp_band, exp_etot ) ) ); - } - - band_energies[i] = band[i]; - move32(); - - band[i] = L_max( band[i], e_min ); - move32(); - } - } - - /*-----------------------------------------------------------------* - * Find the total energy over the input bandwidth - *-----------------------------------------------------------------*/ - -#ifdef BASOP_NOGLOB - etot = L_add_sat( *LEtot, L_shl_sat( etot, sub( exp_etot, 4 ) ) ); -#else - etot = L_add( *LEtot, L_shl( etot, sub( exp_etot, 4 ) ) ); -#endif - *LEtot = etot; - move32(); - - - return; -} - /* Merge with ivas_find_enr function once analy_sp is unified */ -static void ivas_find_enr1( +static void ivas_find_enr( Word16 data[], /* i : fft result */ Word16 q_data, /* i : Q of fft result */ Word32 band[], /* o : per band energy q_band */ @@ -1303,8 +699,6 @@ static void ivas_find_enr1( Word64 *LEtot, /* o : total energy q_band+1 */ const Word16 min_band, /* i : minimum critical band Q0 */ const Word16 max_band, /* i : maximum critical band Q0 */ - const Word16 Q_new, /* i : scaling factor Q0 */ - const Word32 e_min, /* i : minimum energy scaled Q_new+QSCALE-2 */ Word32 *Bin_E, /* o : Per bin energy q_ptE */ Word16 BIN_FREQ_FX, /* i : Number of frequency bins Q0 */ Word32 *band_energies /* o : per band energy without MODE2_E_MIN q_band */ @@ -1317,6 +711,8 @@ static void ivas_find_enr1( Word64 etot, band_ener; Word16 start_freq; Word32 min_ener; + Word16 shift = 0; + move16(); ptR = &data[1]; /* first real */ ptI = &data[L_FFT - 1]; /* first imaginary */ @@ -1335,8 +731,14 @@ static void ivas_find_enr1( *q_band = add( shl( q_data, 1 ), 14 ); move16(); - /* Only used for comparison purpose, so saturation is added. It doesn't effect the outcome */ - min_ener = L_shl( e_min, sub( *q_band, add( Q_new, QSCALE - 2 ) ) ); // *q_band + IF( GT_16( *q_band, 39 ) ) + { + shift = sub( *q_band, 39 ); + *q_band = 39; + move16(); + } + + min_ener = L_shl( E_MIN_FXQ31, sub( *q_band, Q31 ) ); // *q_band freq = BIN_FREQ_FX; move16(); @@ -1372,14 +774,14 @@ static void ivas_find_enr1( band[i] += *ptE++; band[i] *= inv_tbl[cnt]; // normalization per frequency bin */ - band_ener = W_mac_32_16( band_ener, *ptE, inv_tbl_fx[( ( crit_bands_fx[i] - start_freq ) / BIN ) + 1] ); // *q_ptE+16 + band_ener = W_mac_32_16( band_ener, *ptE, inv_tbl_fx[( ( crit_bands_fx[i] - start_freq ) / BIN ) + 1] ); // *q_band+16+shift ptR++; ptI--; ptE++; freq = add( freq, BIN_FREQ_FX ); } - band[i] = W_extract_h( W_shl( band_ener, Q16 ) ); // *q_ptE+16+16-32 = *q_ptE = *q_band + band[i] = W_extract_h( W_shl( band_ener, sub( Q16, shift ) ) ); // *q_band move32(); band_energies[i] = band[i]; /* per band energy without E_MIN */ // *q_band @@ -1427,7 +829,7 @@ static void ivas_find_enr1( band[i] += *ptE++; band[i] *= inv_tbl[cnt]; // normalization per frequency bin */ - band_ener = W_mac_32_16( band_ener, *Bin_E, inv_tbl_fx[( ( crit_bands_fx[i] - start_freq ) / BIN ) + 1] ); // q_ptE+16 + band_ener = W_mac_32_16( band_ener, *Bin_E, inv_tbl_fx[( ( crit_bands_fx[i] - start_freq ) / BIN ) + 1] ); // *q_band+16+shift ptR++; ptI--; Bin_E++; @@ -1435,7 +837,7 @@ static void ivas_find_enr1( freq = add( freq, BIN_FREQ_FX ); } - band[i] = W_extract_h( W_shl_nosat( band_ener, Q16 ) ); // *q_ptE+16+16-32 = *q_ptE = *q_band + band[i] = W_extract_h( W_shl_nosat( band_ener, sub( Q16, shift ) ) ); // *q_band move32(); band_energies[i] = band[i]; /* per band energy without E_MIN */ // *q_band diff --git a/lib_enc/cod_uv_fx.c b/lib_enc/cod_uv_fx.c index ca17c0f25..22b515417 100644 --- a/lib_enc/cod_uv_fx.c +++ b/lib_enc/cod_uv_fx.c @@ -64,7 +64,7 @@ void gauss_L2_fx( E_UTIL_f_convolve( code, h, y11, L_SUBFR ); /* y11: Q8+shift */ Scale_sig( y11, L_SUBFR, sub( 1, shift ) ); /* Q9 */ *gain = L_deposit_l( 0 ); - + move32(); /*Update correlations for gains coding */ tmp32 = L_shr( 21474836l /*0.01f Q31*/, 31 - 18 ); /* Q18 */ tmp32_2 = L_shr( 21474836l /*0.01f Q31*/, 31 - 18 ); /* Q18 */ @@ -83,6 +83,7 @@ void gauss_L2_fx( #endif g_corr->y1y1_e = sub( 31 - 18, tmp16 ); move16(); + move16(); tmp16 = norm_l( tmp32_2 ); #ifdef BASOP_NOGLOB @@ -92,6 +93,7 @@ void gauss_L2_fx( #endif g_corr->y1y2_e = sub( 31 - 18, tmp16 ); move16(); + move16(); } void gauss_L2_ivas_fx( @@ -139,7 +141,7 @@ void gauss_L2_ivas_fx( E_UTIL_f_convolve( code, h, y11, L_SUBFR ); /* y11: Q8+shift */ Scale_sig( y11, L_SUBFR, sub( 1, shift ) ); /* Q9 */ *gain = L_deposit_l( 0 ); - + move32(); /*Update correlations for gains coding */ tmp32 = L_shr( 21474836l /*0.01f Q31*/, 31 - 18 ); /* Q18 */ tmp32_2 = L_shr( 21474836l /*0.01f Q31*/, 31 - 18 ); /* Q18 */ @@ -159,9 +161,11 @@ void gauss_L2_ivas_fx( #endif g_corr->y1y1_e = sub( 31 - 18, tmp16 ); move16(); + move16(); tmp16 = norm_l( tmp32_2 ); g_corr->y1y2 = round_fx_sat( L_shl( tmp32_2, tmp16 ) ); g_corr->y1y2_e = sub( 31 - 18, tmp16 ); move16(); + move16(); } diff --git a/lib_enc/comvad_decision_fx.c b/lib_enc/comvad_decision_fx.c index 505e18326..1499bbfce 100644 --- a/lib_enc/comvad_decision_fx.c +++ b/lib_enc/comvad_decision_fx.c @@ -24,10 +24,10 @@ /*#define CLDFB_VAD*/ /* test on the CLDFB-VAD */ static Word16 comvad_hangover( - const Word32 lt_snr_org, /* i : original long time SNR*/ - const Word32 snr, /* i : frequency domain SNR */ - const Word32 l_snr, /* i : long time frequency domain SNR calculated by l_speech_snr and l_silence_snr*/ - const Word32 snr_flux, /* i : average tsnr*/ + const Word32 lt_snr_org, /* i : original long time SNR Q25*/ + const Word32 snr, /* i : frequency domain SNR Q25*/ + const Word32 l_snr, /* i : long time frequency domain SNR calculated by l_speech_snr and l_silence_snr Q25*/ + const Word32 snr_flux, /* i : average tsnr Q25*/ const Word16 bw_index, /* i : band width index*/ const Word16 vad_flag, const Word16 pre_res_hang_num, /* i : residual amount of previous hangover */ @@ -379,7 +379,7 @@ static Word16 comvad_hangover( { IF( ( NE_16( noisy_type, SILENCE ) ) ) { - speech_flag--; + speech_flag = sub( speech_flag, 1 ); } ELSE { @@ -395,13 +395,13 @@ static Word16 comvad_hangover( Word16 comvad_decision_fx( VAD_CLDFB_HANDLE hVAD_CLDFB, /* i/o: CLDFB VAD state */ - const Word32 l_snr, /* i : long time frequency domain*/ - const Word32 lt_snr_org, /* i : original long time SNR*/ - const Word32 lt_snr, /* i : long time SNR calculated by fg_energy and bg_energy*/ - const Word32 snr_flux, /* i : average tsnr of several frames*/ - const Word32 snr, /* i : frequency domain SNR */ - Word32 tsnr, /* i : time domain SNR */ - const Word32 frame_energy, /* i : current frame energy */ + const Word32 l_snr, /* i : long time frequency domain Q25*/ + const Word32 lt_snr_org, /* i : original long time SNR Q25*/ + const Word32 lt_snr, /* i : long time SNR calculated by fg_energy and bg_energy Q25*/ + const Word32 snr_flux, /* i : average tsnr of several frames Q25*/ + const Word32 snr, /* i : frequency domain SNR Q25 */ + Word32 tsnr, /* i : time domain SNR Q25*/ + const Word32 frame_energy, /* i : current frame energy Q(frame_energy_Q)*/ const Word16 music_backgound_f, /* i : background music flag*/ const Word16 frame_energy_Q, /* i : the Scaling of current frame energy*/ Word16 *cldfb_addition, /* o : adjust the harmonized hangover */ @@ -443,7 +443,7 @@ Word16 comvad_decision_fx( move16(); test(); - IF( GT_32( hVAD_CLDFB->lf_snr_smooth, LS_MIN_SILENCE_SNR[hVAD_CLDFB->bw_index - CLDFBVAD_NB_ID] ) && GT_32( lt_snr_org, LT_MIN_SILENCE_SNR_FX[hVAD_CLDFB->bw_index - CLDFBVAD_NB_ID] ) ) + if ( GT_32( hVAD_CLDFB->lf_snr_smooth, LS_MIN_SILENCE_SNR[hVAD_CLDFB->bw_index - CLDFBVAD_NB_ID] ) && GT_32( lt_snr_org, LT_MIN_SILENCE_SNR_FX[hVAD_CLDFB->bw_index - CLDFBVAD_NB_ID] ) ) { noisy_type = SILENCE; move16(); @@ -477,7 +477,7 @@ Word16 comvad_decision_fx( IF( EQ_16( hVAD_CLDFB->fg_energy_count, 512 ) ) { - fg_energy = MUL_F( fg_energy, 0x6000 ); + fg_energy = MUL_F( fg_energy, 0x6000 /*.75f in Q15*/ ); hVAD_CLDFB->fg_energy_count = 384; move16(); } @@ -527,7 +527,7 @@ Word16 comvad_decision_fx( l_silence_snr_count = L_shl( hVAD_CLDFB->l_silence_snr_count, Qnorm_silence_count ); snr_div_fix = div_l( l_silence_snr, extract_h( l_silence_snr_count ) ); } - snr_sub = L_sub( snr, 0x3000000 ); + snr_sub = L_sub( snr, 0x3000000 /*1.5f in Q25*/ ); snr_div_fix32 = L_deposit_l( snr_div_fix ); snr_div_fix32 = L_shr( snr_div_fix32, add( 6, sub( Qnorm_silence, Qnorm_silence_count ) ) ); @@ -536,7 +536,7 @@ Word16 comvad_decision_fx( { IF( EQ_32( hVAD_CLDFB->l_speech_snr_count, 512 ) ) { - hVAD_CLDFB->l_speech_snr = L_add( MUL_F( hVAD_CLDFB->l_speech_snr, 0x6000 ), L_shr( snr, 9 ) ); + hVAD_CLDFB->l_speech_snr = L_add( MUL_F( hVAD_CLDFB->l_speech_snr, 0x6000 /*.75f in Q15*/ ), L_shr( snr, 9 ) ); move32(); hVAD_CLDFB->l_speech_snr_count = L_deposit_l( 384 + 1 ); move32(); @@ -557,7 +557,7 @@ Word16 comvad_decision_fx( { Word32 lt_snr_add; - lt_snr_add = L_add( 0x03cccccd, MUL_F( lt_snr, 0x23d7 ) ); + lt_snr_add = L_add( 0x03cccccd /*1.9 in Q25*/, MUL_F( lt_snr, 0x23d7 ) ); if ( GT_32( snr_flux, lt_snr_add ) ) { @@ -593,7 +593,7 @@ Word16 comvad_decision_fx( { Word32 lt_snr_add; - lt_snr_add = L_add( 0x04333333, MUL_F( lt_snr, 0x1eb8 ) ); + lt_snr_add = L_add( 0x04333333 /*2.1 in Q25*/, MUL_F( lt_snr, 0x1eb8 ) ); if ( GT_32( snr_flux, lt_snr_add ) ) { @@ -629,7 +629,7 @@ Word16 comvad_decision_fx( { Word32 lt_snr_add; - lt_snr_add = L_add( 0x04333333, MUL_F( lt_snr, 0x28f5 ) ); + lt_snr_add = L_add( 0x04333333 /*2.1 Q25*/, MUL_F( lt_snr, 0x28f5 ) ); if ( ( GT_32( snr_flux, lt_snr_add ) ) ) { @@ -671,7 +671,8 @@ Word16 comvad_decision_fx( test(); if ( GT_16( hVAD_CLDFB->ltd_stable_rate[0], 2621 /* 0.08 Q15 */ ) && EQ_16( vad_flag, 1 ) && ( tmpout > 0 ) ) { - hVAD_CLDFB->fg_energy_est_start = L_deposit_l( 1 ); + hVAD_CLDFB->fg_energy_est_start = 1; + move32(); } } @@ -694,7 +695,7 @@ Word16 comvad_decision_fx( move16(); } vadb_flag = vad_flag; - + move16(); IF( EQ_16( hVAD_CLDFB->bw_index, CLDFBVAD_SWB_ID ) ) { test(); @@ -710,13 +711,14 @@ Word16 comvad_decision_fx( test(); test(); test(); - IF( GT_32( snr_flux, 83886078 /* 2.5 Q25 */ ) || ( hVAD_CLDFB->continuous_speech_num2 > 40 && GT_32( snr_flux, 67108862 /* 2.0 Q25 */ ) ) || music_backgound_f == 1 ) + IF( GT_32( snr_flux, 83886078 /* 2.5 Q25 */ ) || ( hVAD_CLDFB->continuous_speech_num2 > 40 && GT_32( snr_flux, 67108862 /* 2.0 Q25 */ ) ) || EQ_16( music_backgound_f, 1 ) ) { vad_flag = s_or( vad_flag, vada_flag ); } ELSE IF( EQ_16( SILENCE, noisy_type ) ) { vad_flag = vada_flag; + move16(); } } } @@ -735,13 +737,14 @@ Word16 comvad_decision_fx( test(); test(); test(); - IF( GT_32( snr_flux, 73819748 /* 2.2 Q25 */ ) || ( hVAD_CLDFB->continuous_speech_num2 > 40 && GT_32( snr_flux, 57042532 /* 1.7 Q25 */ ) ) || music_backgound_f == 1 ) + IF( GT_32( snr_flux, 73819748 /* 2.2 Q25 */ ) || ( hVAD_CLDFB->continuous_speech_num2 > 40 && GT_32( snr_flux, 57042532 /* 1.7 Q25 */ ) ) || EQ_16( music_backgound_f, 1 ) ) { vad_flag = s_or( vad_flag, vada_flag ); } ELSE IF( EQ_16( SILENCE, noisy_type ) ) { vad_flag = vada_flag; + move16(); } } } @@ -753,6 +756,7 @@ Word16 comvad_decision_fx( IF( GT_32( hVAD_CLDFB->lf_snr_smooth, 419430388 /* 12.5 Q25 */ ) && music_backgound_f == 0 ) { vad_flag = vada_flag; + move16(); } } ELSE @@ -760,7 +764,7 @@ Word16 comvad_decision_fx( test(); test(); test(); - IF( GT_32( snr_flux, 67108862 /* 2.0 Q25 */ ) || ( hVAD_CLDFB->continuous_speech_num2 > 30 && GT_32( snr_flux, 50331647 /* 1.5 Q25 */ ) ) || music_backgound_f == 1 ) + IF( GT_32( snr_flux, 67108862 /* 2.0 Q25 */ ) || ( GT_16( hVAD_CLDFB->continuous_speech_num2, 30 ) && GT_32( snr_flux, 50331647 /* 1.5 Q25 */ ) ) || EQ_16( music_backgound_f, 1 ) ) { vad_flag = s_or( vad_flag, vada_flag ); } @@ -771,7 +775,7 @@ Word16 comvad_decision_fx( { IF( EQ_32( hVAD_CLDFB->l_silence_snr_count, 512 ) ) { - hVAD_CLDFB->l_silence_snr = L_add( MUL_F( hVAD_CLDFB->l_silence_snr, 0x6000 ), L_shr( snr, 9 ) ); + hVAD_CLDFB->l_silence_snr = L_add( MUL_F( hVAD_CLDFB->l_silence_snr, 0x6000 /*.75f Q15*/ ), L_shr( snr, 9 ) ); move32(); hVAD_CLDFB->l_silence_snr_count = L_deposit_l( 384 + 1 ); move32(); @@ -789,7 +793,7 @@ Word16 comvad_decision_fx( { IF( EQ_16( hVAD_CLDFB->bg_energy_count, 512 ) ) { - bg_energy = MUL_F( bg_energy, 0x6000 ); + bg_energy = MUL_F( bg_energy, 0x6000 /*.75f Q15*/ ); hVAD_CLDFB->bg_energy_count = 384; move16(); } @@ -804,9 +808,11 @@ Word16 comvad_decision_fx( test(); hVAD_CLDFB->vad_flag_for_bk_update = vad_flag; + move16(); IF( LT_16( hVAD_CLDFB->update_count, 12 ) && EQ_16( vadb_flag, 1 ) ) { hVAD_CLDFB->warm_hang_num = s_max( 20, speech_flag ); + move16(); } test(); IF( vad_flag == 0 && hVAD_CLDFB->warm_hang_num > 0 ) @@ -814,6 +820,7 @@ Word16 comvad_decision_fx( hVAD_CLDFB->warm_hang_num = sub( hVAD_CLDFB->warm_hang_num, 1 ); vad_flag = 1; move16(); + move16(); } @@ -827,15 +834,16 @@ Word16 comvad_decision_fx( hVAD_CLDFB->speech_flag = speech_flag; move16(); - move16(); test(); IF( EQ_16( noisy_type, SILENCE ) && NE_16( hVAD_CLDFB->bw_index, CLDFBVAD_NB_ID ) ) { *cldfb_addition = 2; + move16(); } ELSE { *cldfb_addition = 0; + move16(); if ( EQ_16( hVAD_CLDFB->bw_index, CLDFBVAD_WB_ID ) ) { diff --git a/lib_enc/core_enc_2div_fx.c b/lib_enc/core_enc_2div_fx.c index 151561b09..fa74ec6b5 100644 --- a/lib_enc/core_enc_2div_fx.c +++ b/lib_enc/core_enc_2div_fx.c @@ -22,7 +22,7 @@ *-------------------------------------------------------------------*/ void core_encode_twodiv_fx( - const Word16 new_samples[], /* i : new samples */ + const Word16 new_samples[], /* i : new samples */ Encoder_State *st, /* i/o: coder memory state */ const Word16 coder_type, /* i : coding type */ Word16 Aw[NB_SUBFR16k * ( M + 1 )], /* i : weighted A(z) unquant. for subframes*/ @@ -60,9 +60,7 @@ void core_encode_twodiv_fx( spectrum[1] = spectrum_buf + N_TCX10_MAX; hm_cfg[0].indexBuffer = &indexBuffer[0]; - move16(); hm_cfg[1].indexBuffer = &indexBuffer[N_MAX / 2 + 1]; - move16(); move16(); move16(); @@ -203,19 +201,19 @@ void core_encode_twodiv_fx( /* calculate noise-filling over whole spectrum for TCX10 frames */ move16(); - hTcxEnc->measuredBwRatio = 0x4000; + hTcxEnc->measuredBwRatio = 0x4000; // 1 in Q14 FOR( n = 0; n < 2; n++ ) { target_bits = sub( shr( sub( add( bitsAvailable, 1 ), n ), 1 ), tnsBits[n] ); - if ( n == 0 ) + IF( n == 0 ) { target_bits = sub( target_bits, ltpBits ); } test(); - if ( st->enablePlcWaveadjust && n ) + IF( st->enablePlcWaveadjust && n ) { target_bits = sub( target_bits, 1 ); } @@ -285,7 +283,7 @@ void core_encode_twodiv_fx( /* subtract bits for TCX overlap mode (1 bit: full, 2 bits: half or no overlap) */ target_bits = sub( target_bits, 1 ); test(); - if ( EQ_16( st->hTcxCfg->tcx_curr_overlap_mode, HALF_OVERLAP ) || EQ_16( st->hTcxCfg->tcx_curr_overlap_mode, MIN_OVERLAP ) ) + IF( EQ_16( st->hTcxCfg->tcx_curr_overlap_mode, HALF_OVERLAP ) || EQ_16( st->hTcxCfg->tcx_curr_overlap_mode, MIN_OVERLAP ) ) { target_bits = sub( target_bits, 1 ); } @@ -293,7 +291,7 @@ void core_encode_twodiv_fx( target_bits = sub( target_bits, st->hIGFEnc->infoTotalBitsPerFrameWritten ); - if ( st->enablePlcWaveadjust ) + IF( st->enablePlcWaveadjust ) { target_bits = sub( target_bits, 1 ); } diff --git a/lib_enc/core_enc_init.c b/lib_enc/core_enc_init.c index c378731ad..2ed302c31 100644 --- a/lib_enc/core_enc_init.c +++ b/lib_enc/core_enc_init.c @@ -52,7 +52,7 @@ *-----------------------------------------------------------------------*/ static void init_tcx_ivas_fx( Encoder_State *st, const Word16 L_frame_old, const Word32 total_brate, const Word32 last_total_brate, const Word16 MCT_flag ); static void init_core_sig_ana_ivas_fx( Encoder_State *st ); -static void init_modes_ivas_fx( Encoder_State *st, const int32_t last_total_brate ); +static void init_modes_ivas_fx( Encoder_State *st, const Word32 last_total_brate ); static void init_sig_buffers_ivas_fx( Encoder_State *st, const Word16 L_frame_old, const Word16 L_subfr ); static void init_acelp_ivas_fx( Encoder_State *st, Word16 L_frame_old, Word16 shift ); /*-----------------------------------------------------------------------* @@ -121,7 +121,7 @@ void init_coder_ace_plus_ivas_fx( test(); test(); test(); - IF( ( EQ_16( st->element_mode, EVS_MONO ) && EQ_32( st->L_frame, L_FRAME16k ) && LE_32( st->total_brate, ACELP_32k ) ) || ( GT_16( st->element_mode, EVS_MONO ) && EQ_32( st->L_frame, L_FRAME16k ) && LE_32( st->total_brate, MAX_ACELP_BRATE ) ) ) + IF( ( st->element_mode == EVS_MONO && EQ_32( st->L_frame, L_FRAME16k ) && LE_32( st->total_brate, ACELP_32k ) ) || ( st->element_mode > EVS_MONO && EQ_32( st->L_frame, L_FRAME16k ) && LE_32( st->total_brate, MAX_ACELP_BRATE ) ) ) { st->nb_subfr = NB_SUBFR16k; move16(); @@ -221,7 +221,7 @@ void init_coder_ace_plus_ivas_fx( init_acelp_ivas_fx( st, L_frame_old, 0 ); - IF( st->ini_frame == 0 ) + if ( st->ini_frame == 0 ) { st->tec_tfa = 0; move16(); @@ -235,7 +235,7 @@ void init_coder_ace_plus_ivas_fx( test(); test(); test(); - IF( EQ_16( st->bwidth, SWB ) && ( EQ_32( st->total_brate, ACELP_16k40 ) || EQ_32( st->total_brate, ACELP_24k40 ) ) && EQ_16( st->element_mode, EVS_MONO ) ) + IF( EQ_16( st->bwidth, SWB ) && ( EQ_32( st->total_brate, ACELP_16k40 ) || EQ_32( st->total_brate, ACELP_24k40 ) ) && st->element_mode == EVS_MONO ) { st->tec_tfa = 1; move16(); @@ -254,7 +254,7 @@ void init_coder_ace_plus_ivas_fx( test(); test(); test(); - IF( ( EQ_32( st->total_brate, ACELP_9k60 ) || EQ_32( st->total_brate, ACELP_16k40 ) || EQ_32( st->total_brate, ACELP_24k40 ) ) && EQ_16( st->element_mode, EVS_MONO ) ) + IF( ( EQ_32( st->total_brate, ACELP_9k60 ) || EQ_32( st->total_brate, ACELP_16k40 ) || EQ_32( st->total_brate, ACELP_24k40 ) ) && st->element_mode == EVS_MONO ) { st->glr = 1; move16(); @@ -315,213 +315,6 @@ void init_coder_ace_plus_ivas_fx( return; } -void init_coder_ace_plus( - Encoder_State *st, /* i : Encoder state */ - const int32_t last_total_brate, /* i : last total bitrate */ - const int16_t MCT_flag /* i : hMCT handle allocated (1) or not (0) */ -) -{ - TCX_ENC_HANDLE hTcxEnc = st->hTcxEnc; - int16_t L_frame_old; /*keep old frame size for switching */ - int16_t L_subfr; - - /* Bitrate */ - st->tcxonly = getTcxonly_ivas_fx( st->element_mode, st->total_brate, MCT_flag, st->is_ism_format ); - - /* Core Sampling Rate */ - st->sr_core = getCoreSamplerateMode2_flt( st->element_mode, st->total_brate, st->bwidth, st->flag_ACELP16k, st->rf_mode, st->is_ism_format ); - st->fscale = sr2fscale( st->sr_core ); - - /* Narrowband? */ - st->narrowBand = ( st->bwidth == NB ) ? 1 : 0; - - /* Core Framing */ - L_frame_old = st->last_L_frame; - st->L_frame = (int16_t) ( st->sr_core / FRAMES_PER_SEC ); - st->L_frame_past = -1; - - if ( hTcxEnc != NULL ) - { - hTcxEnc->L_frameTCX = (int16_t) ( st->input_Fs / FRAMES_PER_SEC ); - - if ( st->ini_frame == 0 ) - { - hTcxEnc->tcx_target_bits_fac = ONE_IN_Q14; - - set16_fx( hTcxEnc->Txnq, 0, L_FRAME32k / 2 + 64 ); - hTcxEnc->acelp_zir = hTcxEnc->Txnq + L_FRAME / 2; - hTcxEnc->q_Txnq = Q15; - move16(); - } - } - - if ( ( st->element_mode == EVS_MONO && st->L_frame == L_FRAME16k && st->total_brate <= ACELP_32k ) || ( st->element_mode > EVS_MONO && st->L_frame == L_FRAME16k && st->total_brate <= MAX_ACELP_BRATE ) ) - { - st->nb_subfr = NB_SUBFR16k; - } - else - { - st->nb_subfr = NB_SUBFR; - } - L_subfr = st->L_frame / st->nb_subfr; - - /* Core Lookahead */ - st->encoderLookahead_enc = NS2SA( st->sr_core, ACELP_LOOK_NS ); - st->encoderLookahead_FB = NS2SA( st->input_Fs, ACELP_LOOK_NS ); - - if ( st->ini_frame == 0 ) - { - st->acelpFramesCount = 0; -#ifdef MSAN_FIX - st->prevTempFlatness_fx = 128; // Q7 -#endif - } - - /* Initialize TBE */ - if ( st->hBWE_TD != NULL ) - { - st->hBWE_TD->prev_coder_type = GENERIC; - set16_fx( st->hBWE_TD->prev_lsf_diff_fx, 16384, LPC_SHB_ORDER - 2 ); - st->hBWE_TD->prev_tilt_para_fx = 0; - set16_fx( st->hBWE_TD->cur_sub_Aq_fx, 0, M + 1 ); - move16(); - } - - st->currEnergyHF_fx = 0; - move32(); - - /* Initialize LPC analysis/quantization */ - if ( st->sr_core <= INT_FS_16k && st->tcxonly == 0 ) - { - st->lpcQuantization = 1; - } - else - { - st->lpcQuantization = 0; - } - - st->next_force_safety_net = 0; - if ( ( st->last_L_frame != st->L_frame ) || ( st->last_core == AMR_WB_CORE ) || ( st->last_core == HQ_CORE ) ) - { - set16_fx( st->mem_MA_fx, 0, M ); - Copy( GEWB_Ave_fx, st->mem_AR_fx, M ); - } - - /* Initialize IGF */ - if ( st->hIGFEnc != NULL ) - { - st->hIGFEnc->infoStopFrequency = -1; - } - - if ( st->igf && st->hIGFEnc != NULL ) - { - IGFEncSetMode_ivas_fx( st->hIGFEnc, st->total_brate, st->bwidth, st->element_mode, st->rf_mode ); - } - else if ( st->hIGFEnc != NULL ) - { - st->hIGFEnc->infoTotalBitsWritten = 0; - st->hIGFEnc->infoTotalBitsPerFrameWritten = 0; - } - - /* Initialize Core Signal Analysis Module */ - init_core_sig_ana_ivas_fx( st ); - - /* Initialize TCX */ - if ( hTcxEnc != NULL ) - { - init_tcx_ivas_fx( st, L_frame_old, st->total_brate, last_total_brate, MCT_flag ); - } - - /* Initialize Signal Buffers */ - init_sig_buffers_ivas_fx( st, L_frame_old, L_subfr ); - - /* Initialize ACELP */ - init_acelp_ivas_fx( st, L_frame_old, 0 ); - - if ( st->ini_frame == 0 ) - { - st->tec_tfa = 0; - } - - if ( st->hTECEnc != NULL ) - { - resetTecEnc_Fx( st->hTECEnc, st->tec_tfa ); - } - - if ( st->bwidth == SWB && ( st->total_brate == ACELP_16k40 || st->total_brate == ACELP_24k40 ) && st->element_mode == EVS_MONO ) - { - st->tec_tfa = 1; - } - else - { - st->tec_tfa = 0; - } - - st->tec_flag = 0; - st->tfa_flag = 0; - - - if ( ( st->total_brate == ACELP_9k60 || st->total_brate == ACELP_16k40 || st->total_brate == ACELP_24k40 ) && st->element_mode == EVS_MONO ) - { - st->glr = 1; - } - else - { - st->glr = 0; - } - - st->glr_reset = 0; - - /* Initialize ACELP/TCX Modes */ - init_modes_ivas_fx( st, last_total_brate ); - - /* Adaptive BPF */ - if ( st->total_brate >= HQ_48k ) - { - st->enablePlcWaveadjust = 1; - } - else - { - st->enablePlcWaveadjust = 0; - } - - set32_fx( st->mem_bpf_fx1, 0, 2 * L_FILT16k ); - set32_fx( st->mem_error_bpf_fx, 0, 2 * L_FILT16k ); - - IF( st->ini_frame == 0 ) - { - st->Q_max_enc[0] = 15; - move16(); - st->Q_max_enc[1] = 15; - move16(); - } - - IF( st->total_brate >= HQ_48k ) - { - st->enablePlcWaveadjust = 1; - move16(); - } - ELSE - { - st->enablePlcWaveadjust = 0; - move16(); - } - - if ( st->hPlcExt ) - { - init_PLC_enc_fx( st->hPlcExt, st->sr_core ); - } - - st->glr_idx[0] = 0; - st->glr_idx[1] = 0; - st->mean_gc[0] = 0; - st->mean_gc[1] = 0; - st->prev_lsf4_mean = 0; - st->last_stab_fac = 0; - - return; -} - /*-----------------------------------------------------------------------* * init_tcx() * @@ -688,8 +481,8 @@ static void init_sig_buffers_ivas_fx( Encoder_State *st, const Word16 L_frame_ol IF( NE_16( st->last_core, TCX_20_CORE ) && NE_16( st->last_core, TCX_10_CORE ) ) /* condition should be checked again */ { Copy( st->buf_speech_enc, hTcxEnc->buf_speech_ltp, st->L_frame ); - Scale_sig( hTcxEnc->buf_speech_ltp, st->L_frame, sub( st->exp_buf_speech_enc, s_max( st->exp_buf_speech_enc, hTcxEnc->exp_buf_speech_ltp ) ) ); - Scale_sig( hTcxEnc->buf_speech_ltp + st->L_frame, sub( L_PAST_MAX_32k + L_FRAME32k + L_NEXT_MAX_32k, st->L_frame ), sub( hTcxEnc->exp_buf_speech_ltp, s_max( st->exp_buf_speech_enc, hTcxEnc->exp_buf_speech_ltp ) ) ); + Scale_sig( hTcxEnc->buf_speech_ltp, st->L_frame, sub( st->exp_buf_speech_enc, s_max( st->exp_buf_speech_enc, hTcxEnc->exp_buf_speech_ltp ) ) ); // Q(15-max(st->exp_buf_speech_enc, hTcxEnc->exp_buf_speech_ltp)) + Scale_sig( hTcxEnc->buf_speech_ltp + st->L_frame, sub( L_PAST_MAX_32k + L_FRAME32k + L_NEXT_MAX_32k, st->L_frame ), sub( hTcxEnc->exp_buf_speech_ltp, s_max( st->exp_buf_speech_enc, hTcxEnc->exp_buf_speech_ltp ) ) ); // Q(15-max(st->exp_buf_speech_enc, hTcxEnc->exp_buf_speech_ltp)) hTcxEnc->exp_buf_speech_ltp = s_max( hTcxEnc->exp_buf_speech_ltp, st->exp_buf_speech_enc ); move16(); } @@ -707,9 +500,9 @@ static void init_sig_buffers_ivas_fx( Encoder_State *st, const Word16 L_frame_ol // Copy_Scale_sig( st->old_inp_12k8_fx, st->buf_speech_enc_pe + st->L_frame - L_INP_MEM, L_INP_MEM, sub( st->prev_Q_new, st->prev_Q_old ) ); Copy( st->old_inp_12k8_fx, st->buf_speech_enc_pe + st->L_frame - L_INP_MEM, L_INP_MEM ); /* SCaling to common exponent*/ - Scale_sig( st->buf_speech_enc_pe + sub( st->L_frame, L_INP_MEM ), L_INP_MEM, sub( st->exp_old_inp_12k8, s_max( st->exp_old_inp_12k8, st->exp_buf_speech_enc_pe ) ) ); - Scale_sig( st->buf_speech_enc_pe, sub( st->L_frame, L_INP_MEM ), sub( st->exp_buf_speech_enc_pe, s_max( st->exp_old_inp_12k8, st->exp_buf_speech_enc_pe ) ) ); - Scale_sig( st->buf_speech_enc_pe + st->L_frame, sub( L_PAST_MAX_32k + L_FRAME32k + L_NEXT_MAX_32k, st->L_frame ), sub( st->exp_buf_speech_enc_pe, s_max( st->exp_old_inp_12k8, st->exp_buf_speech_enc_pe ) ) ); + Scale_sig( st->buf_speech_enc_pe + sub( st->L_frame, L_INP_MEM ), L_INP_MEM, sub( st->exp_old_inp_12k8, s_max( st->exp_old_inp_12k8, st->exp_buf_speech_enc_pe ) ) ); // Q(15-max( st->exp_old_inp_12k8, st->exp_buf_speech_enc_pe )) + Scale_sig( st->buf_speech_enc_pe, sub( st->L_frame, L_INP_MEM ), sub( st->exp_buf_speech_enc_pe, s_max( st->exp_old_inp_12k8, st->exp_buf_speech_enc_pe ) ) ); // Q(15-max( st->exp_old_inp_12k8, st->exp_buf_speech_enc_pe )) + Scale_sig( st->buf_speech_enc_pe + st->L_frame, sub( L_PAST_MAX_32k + L_FRAME32k + L_NEXT_MAX_32k, st->L_frame ), sub( st->exp_buf_speech_enc_pe, s_max( st->exp_old_inp_12k8, st->exp_buf_speech_enc_pe ) ) ); // Q(15-max( st->exp_old_inp_12k8, st->exp_buf_speech_enc_pe )) st->exp_buf_speech_enc_pe = s_max( st->exp_old_inp_12k8, st->exp_buf_speech_enc_pe ); move16(); } @@ -758,7 +551,7 @@ static void init_sig_buffers_ivas_fx( Encoder_State *st, const Word16 L_frame_ol hTcxEnc->new_speech_ltp = hTcxEnc->buf_speech_ltp + st->encoderPastSamples_enc + st->encoderLookahead_enc; } - if ( st->hTcxEnc != NULL ) + IF( st->hTcxEnc != NULL ) { st->hTcxEnc->new_speech_TCX = st->input_buff_fx + Mpy_32_32( st->input_Fs, ONE_BY_FRAMES_PER_SEC_Q31 ); st->hTcxEnc->speech_TCX = st->hTcxEnc->new_speech_TCX - st->encoderLookahead_FB; @@ -771,7 +564,7 @@ static void init_sig_buffers_ivas_fx( Encoder_State *st, const Word16 L_frame_ol hTcxEnc->speech_ltp = hTcxEnc->buf_speech_ltp + st->encoderPastSamples_enc; } - IF( GT_16( st->element_mode, EVS_MONO ) ) + IF( st->element_mode > EVS_MONO ) { st->wspeech_enc = st->buf_wspeech_enc + st->L_frame + L_SUBFR; } @@ -889,7 +682,7 @@ static void init_acelp_ivas_fx( Encoder_State *st, Word16 L_frame_old, Word16 sh } ELSE /*Rate switching*/ { - IF( EQ_16( st->last_core, ACELP_CORE ) ) + IF( st->last_core == ACELP_CORE ) { lerp( hTcxEnc->Txnq, hTcxEnc->Txnq, shr( st->L_frame, 1 ), shr( L_frame_old, 1 ) ); } @@ -900,7 +693,7 @@ static void init_acelp_ivas_fx( Encoder_State *st, Word16 L_frame_old, Word16 sh hTcxEnc->acelp_zir = hTcxEnc->Txnq + shr( st->L_frame, 1 ); /* Rate switching */ - IF( EQ_16( st->last_codec_mode, MODE1 ) && EQ_16( st->element_mode, EVS_MONO ) ) + IF( EQ_16( st->last_codec_mode, MODE1 ) && st->element_mode == EVS_MONO ) { IF( hLPDmem != NULL ) { @@ -1142,9 +935,9 @@ static void init_acelp_ivas_fx( Encoder_State *st, Word16 L_frame_old, Word16 sh move32(); st->pst_lp_ener_fx = 0; move16(); - if ( EQ_16( st->last_codec_mode, MODE1 ) ) + IF( EQ_16( st->last_codec_mode, MODE1 ) ) { - st->mem_bpf_fx.lp_error = L_deposit_l( 0 ); + st->mem_bpf_fx.lp_error = 0; move32(); st->pst_mem_deemp_err_fx = 0; move16(); diff --git a/lib_enc/core_enc_init_fx.c b/lib_enc/core_enc_init_fx.c index d35a443d8..6638b4c33 100644 --- a/lib_enc/core_enc_init_fx.c +++ b/lib_enc/core_enc_init_fx.c @@ -63,6 +63,7 @@ void init_coder_ace_plus_fx( /* Core Sampling Rate */ st->sr_core = getCoreSamplerateMode2( st->element_mode, st->total_brate, st->bwidth, st->flag_ACELP16k, st->rf_mode, st->is_ism_format ); st->fscale = sr2fscale( st->sr_core ); + move32(); move16(); /* Narrowband? */ @@ -77,12 +78,13 @@ void init_coder_ace_plus_fx( /* Core Framing */ L_frame_old = st->last_L_frame; move16(); - st->L_frame = extract_l( Mult_32_16( st->sr_core, 0x0290 ) ); + st->L_frame = extract_l( Mult_32_16( st->sr_core, 0x0290 /* 1/FRAMES_PER_SEC in Q15*/ ) ); st->L_frame_past = -1; move16(); + move16(); - hTcxEnc->L_frameTCX = extract_l( Mult_32_16( st->input_Fs, 0x0290 ) ); - + hTcxEnc->L_frameTCX = extract_l( Mult_32_16( st->input_Fs, 0x0290 /* 1/FRAMES_PER_SEC in Q15*/ ) ); + move16(); st->nb_subfr = NB_SUBFR; move16(); L_subfr = shr( st->L_frame, 2 ); @@ -112,7 +114,7 @@ void init_coder_ace_plus_fx( /* Initialize TBE */ hBWE_TD->prev_coder_type = GENERIC; move16(); - set16_fx( hBWE_TD->prev_lsf_diff_fx, 16384, LPC_SHB_ORDER - 2 ); + set16_fx( hBWE_TD->prev_lsf_diff_fx, 16384 /*.5f Q15*/, LPC_SHB_ORDER - 2 ); hBWE_TD->prev_tilt_para_fx = 0; move16(); set16_fx( hBWE_TD->cur_sub_Aq_fx, 0, M + 1 ); @@ -149,8 +151,9 @@ void init_coder_ace_plus_fx( if ( st->hIGFEnc != NULL ) { st->hIGFEnc->infoStopFrequency = -1; + move16(); } - move16(); + IF( st->igf ) { IGFEncSetMode_fx( st->hIGFEnc, st->total_brate, st->bwidth, st->element_mode, st->rf_mode ); @@ -158,9 +161,11 @@ void init_coder_ace_plus_fx( ELSE IF( st->hIGFEnc != NULL ) { st->hIGFEnc->infoTotalBitsWritten = 0; + move16(); if ( EQ_16( MCT_flag, -10 ) ) { st->hIGFEnc->infoTotalBitsPerFrameWritten = 0; + move16(); } /* the line above is needed for some transition, but when going from low rate to high rate, ex 7k2 to 96k, EVS fix point doesn't do the reset (or it does somewhere else */ } @@ -209,7 +214,6 @@ void init_coder_ace_plus_fx( /* Initialize DTX */ IF( st->ini_frame == 0 ) { - vad_init_fx( &st->vad_st ); } @@ -269,8 +273,8 @@ void init_coder_ace_plus_fx( move16(); move16(); move16(); /* casts */ - st->mean_gc[0] = L_deposit_h( 0 ); - st->mean_gc[1] = L_deposit_h( 0 ); + st->mean_gc[0] = 0; + st->mean_gc[1] = 0; st->prev_lsf4_mean = 0; move16(); @@ -314,7 +318,7 @@ static void init_tcx_fx( st->hTcxCfg->lfacNext = sub( st->hTcxCfg->tcx_offset, shr( st->L_frame, 2 ) ); move16(); st->hTcxCfg->lfacNextFB = sub( st->hTcxCfg->tcx_offsetFB, shr( hTcxEnc->L_frameTCX, 2 ) ); - + move16(); IF( st->ini_frame == 0 ) { st->hTcxCfg->tcx_curr_overlap_mode = st->hTcxCfg->tcx_last_overlap_mode = ALDO_WINDOW; @@ -375,7 +379,7 @@ static void init_tcx_fx( /* TCX-LTP */ hTcxEnc->tcxltp = getTcxLtp( st->sr_core ); - + move16(); test(); test(); test(); @@ -392,7 +396,7 @@ static void init_tcx_fx( hTcxEnc->tcxltp_norm_corr_past = 0; move16(); } - ELSE IF( NE_16( st->L_frame, L_frame_old ) && !( ( st->total_brate == 16400 || st->total_brate == 24400 ) && ( st->total_brate == st->last_total_brate ) && ( st->last_bwidth == st->bwidth ) ) ) + ELSE IF( NE_16( st->L_frame, L_frame_old ) && !( ( EQ_32( st->total_brate, 16400 ) || EQ_32( st->total_brate, 24400 ) ) && ( EQ_32( st->total_brate, st->last_total_brate ) ) && ( EQ_32( st->last_bwidth, st->bwidth ) ) ) ) { Word16 pitres, pitres_old; Word16 pit, pit_old; @@ -427,7 +431,6 @@ static void init_tcx_fx( hTcxEnc->tcxltp_pitch_int_past = pit; move16(); - move16(); hTcxEnc->tcxltp_pitch_fr_past = i_mult2( sub( pit, hTcxEnc->tcxltp_pitch_int_past ), pitres ); move16(); } @@ -444,8 +447,9 @@ static void init_tcx_fx( } st->hTcxCfg->ctx_hm = getCtxHm( st->element_mode, st->total_brate, st->rf_mode ); - + move16(); hTcxEnc->tcx_lpc_shaped_ari = getTcxLpcShapedAri( st->total_brate, st->rf_mode, st->element_mode ); + move16(); } void init_sig_buffers_fx( Encoder_State *st, const Word16 L_frame_old, const Word16 L_subfr ) @@ -485,14 +489,14 @@ void init_sig_buffers_fx( Encoder_State *st, const Word16 L_frame_old, const Wor Copy( st->buf_speech_enc, hTcxEnc->buf_speech_ltp, st->L_frame ); } - Copy_Scale_sig( st->old_wsp_fx, st->buf_wspeech_enc + st->L_frame + L_SUBFR - L_WSP_MEM, L_WSP_MEM, sub( st->prev_Q_new, st->prev_Q_old ) ); + Copy_Scale_sig( st->old_wsp_fx, st->buf_wspeech_enc + st->L_frame + L_SUBFR - L_WSP_MEM, L_WSP_MEM, sub( st->prev_Q_new, st->prev_Q_old ) ); // Q(prev_Q_new) /*Resamp buffers needed only for ACELP*/ test(); test(); IF( EQ_16( st->L_frame, L_FRAME ) && !st->tcxonly ) { - Copy_Scale_sig( st->old_inp_12k8_fx, st->buf_speech_enc_pe + st->L_frame - L_INP_MEM, L_INP_MEM, sub( st->prev_Q_new, st->prev_Q_old ) ); + Copy_Scale_sig( st->old_inp_12k8_fx, st->buf_speech_enc_pe + st->L_frame - L_INP_MEM, L_INP_MEM, sub( st->prev_Q_new, st->prev_Q_old ) ); // Q(prev_Q_new) } ELSE IF( EQ_16( st->L_frame, L_FRAME16k ) && !st->tcxonly ) { @@ -509,7 +513,7 @@ void init_sig_buffers_fx( Encoder_State *st, const Word16 L_frame_old, const Wor ELSE IF( !st->tcxonly && GE_32( st->last_total_brate, ACELP_32k ) ) { - Copy_Scale_sig( st->old_wsp_fx, st->buf_wspeech_enc + st->L_frame + L_SUBFR - L_WSP_MEM, L_WSP_MEM, sub( st->prev_Q_new, st->prev_Q_old ) ); + Copy_Scale_sig( st->old_wsp_fx, st->buf_wspeech_enc + st->L_frame + L_SUBFR - L_WSP_MEM, L_WSP_MEM, sub( st->prev_Q_new, st->prev_Q_old ) ); // Q(prev_Q_new) /*Resamp buffers needed only for ACELP*/ IF( EQ_16( st->L_frame, L_FRAME16k ) ) @@ -608,7 +612,7 @@ static void init_acelp_fx( Encoder_State *st, Word16 L_frame_old, const Word16 s /* Init pitch lag */ st->pit_res_max = initPitchLagParameters( st->sr_core, &st->pit_min, &st->pit_fr1, &st->pit_fr1b, &st->pit_fr2, &st->pit_max ); - + move16(); /* Init LPDmem */ IF( st->ini_frame == 0 ) @@ -671,6 +675,8 @@ static void init_acelp_fx( Encoder_State *st, Word16 L_frame_old, const Word16 s /* unquantized LPC*/ test(); + test(); + test(); IF( !( ( EQ_32( st->total_brate, ACELP_16k40 ) || EQ_32( st->total_brate, ACELP_24k40 ) ) && ( EQ_32( st->total_brate, st->last_total_brate ) ) && ( EQ_16( st->last_bwidth, st->bwidth ) ) ) ) { Copy( st->lsp_old1_fx, st->lspold_enc_fx, M ); /*lsp old @12.8kHz*/ @@ -693,6 +699,8 @@ static void init_acelp_fx( Encoder_State *st, Word16 L_frame_old, const Word16 s st->hTcxCfg->last_aldo = 1; /*It was previously ALDO*/ st->hTcxCfg->tcx_curr_overlap_mode = ALDO_WINDOW; + move16(); + move16(); /*ALDO overlap windowed past: also used in MODE1 but for other MDCT-FB*/ set16_fx( hTcxEnc->old_out_fx, 0, st->L_frame ); } @@ -704,6 +712,7 @@ static void init_acelp_fx( Encoder_State *st, Word16 L_frame_old, const Word16 s { /* convert quantized LSP vector */ st->rate_switching_reset = lsp_convert_poly_fx( st->lsp_old_fx, st->L_frame, 0 ); + move16(); IF( st->tcxonly == 0 ) { lsp2lsf_fx( st->lsp_old_fx, st->lsf_old_fx, M, st->sr_core ); @@ -742,6 +751,7 @@ static void init_acelp_fx( Encoder_State *st, Word16 L_frame_old, const Word16 s #else hLPDmem->mem_w0 = sub( shr( st->wspeech_enc[-1], shift ), tmp ); #endif + move16(); } } ELSE IF( ( NE_16( st->L_frame, L_frame_old ) ) ) @@ -804,7 +814,8 @@ static void init_acelp_fx( Encoder_State *st, Word16 L_frame_old, const Word16 s } /* Post-processing */ - hLPDmem->dm_fx.prev_gain_code = L_deposit_l( 0 ); + hLPDmem->dm_fx.prev_gain_code = 0; + move32(); set16_fx( hLPDmem->dm_fx.prev_gain_pit, 0, 6 ); hLPDmem->dm_fx.prev_state = 0; @@ -815,7 +826,8 @@ static void init_acelp_fx( Encoder_State *st, Word16 L_frame_old, const Word16 s /* Pulse Search configuration */ st->acelp_autocorr = 1; move16(); - + test(); + test(); /*Use for 12.8 kHz sampling rate and low bitrates, the conventional pulse search->better SNR*/ if ( ( LE_32( st->total_brate, ACELP_9k60 ) || st->rf_mode != 0 ) && ( EQ_32( st->sr_core, 12800 ) ) ) { @@ -827,10 +839,12 @@ static void init_acelp_fx( Encoder_State *st, Word16 L_frame_old, const Word16 s /*BPF parameters for adjusting gain in function of background noise*/ IF( EQ_16( st->codec_mode, MODE2 ) ) { - st->mem_bpf_fx.lp_error_ener = L_deposit_l( 0 ); - if ( st->last_codec_mode == MODE1 ) + st->mem_bpf_fx.lp_error_ener = 0; + move32(); + if ( EQ_16( st->last_codec_mode, MODE1 ) ) { - st->mem_bpf_fx.lp_error = L_deposit_l( 0 ); + st->mem_bpf_fx.lp_error = 0; + move32(); } } @@ -889,13 +903,15 @@ static void init_modes_fx( assert( st->fscale == 2 * st->L_frame ); /* this assumption is true if operated in 20ms frames with FSCALE_DENOM == 512, which is the current default */ tmp32 = L_shl( st->total_brate, 1 ); /* (float)st->L_frame/(float)st->fscale * (float)FSCALE_DENOM/128.0f * (float)st->bitrate */ st->bits_frame_nominal = extract_l( L_shr( Mpy_32_16_1( tmp32, 20972 ), 6 ) ); /* 20972 = 0.01 * 64 * 32768 */ - + move16(); IF( st->Opt_AMR_WB ) { st->bits_frame = st->bits_frame_nominal; st->bits_frame_core = st->bits_frame_nominal; st->frame_size_index = 0; move16(); + move16(); + move16(); } ELSE { diff --git a/lib_enc/core_enc_ol_fx.c b/lib_enc/core_enc_ol_fx.c index 89bd8b524..ee347543b 100644 --- a/lib_enc/core_enc_ol_fx.c +++ b/lib_enc/core_enc_ol_fx.c @@ -33,12 +33,12 @@ static void BITS_ALLOC_TCX_config_rf( Word16 *rf_frame_type, Word16 *rf_target_b void core_encode_openloop_fx( Encoder_State *st, /* i/o: encoder state structure */ const Word16 coder_type, /* i : coding type */ - const Word16 Aw[NB_SUBFR16k * ( M + 1 )], /* i : weighted A(z) unquant. for subframes*/ - const Word16 *lsp_new, /* i : LSPs at the end of the frame */ - const Word16 *lsp_mid, /* i : LSPs at the middle of the frame */ - Word16 *pitch_buf, /* i/o: floating pitch values for each subfr*/ - Word16 *voice_factors, /* o : voicing factors */ - Word16 *ptr_bwe_exc, /* o : excitation for SWB TBE */ + const Word16 Aw[NB_SUBFR16k * ( M + 1 )], /* i : weighted A(z) unquant. for subframes */ + const Word16 *lsp_new, /* i : LSPs at the end of the frame Q15*/ + const Word16 *lsp_mid, /* i : LSPs at the middle of the frame Q15*/ + Word16 *pitch_buf, /* i/o: floating pitch values for each subfr Q6*/ + Word16 *voice_factors, /* o : voicing factors Q15*/ + Word16 *ptr_bwe_exc, /* o : excitation for SWB TBE Qexc*/ const Word16 vad_hover_flag, const Word16 vad_flag_dtx, Word16 Q_new, @@ -86,6 +86,7 @@ void core_encode_openloop_fx( #endif #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif TCX_ENC_HANDLE hTcxEnc = st->hTcxEnc; RF_ENC_HANDLE hRF = st->hRF; @@ -123,23 +124,26 @@ void core_encode_openloop_fx( /* back up LPD mem_w0 target generation memory */ hRF->rf_mem_w0 = hLPDmem->mem_w0; - + move16(); /* back up clip gain memory */ Copy( st->clip_var_fx, hRF->rf_clip_var, 6 ); /* back up tilt code */ hRF->rf_tilt_code = hLPDmem->tilt_code; - + move16(); /* back up dispMem */ hRF->rf_dm_fx.prev_state = hLPDmem->dm_fx.prev_state; + move16(); hRF->rf_dm_fx.prev_gain_code = hLPDmem->dm_fx.prev_gain_code; + move32(); FOR( i = 0; i < 6; i++ ) { hRF->rf_dm_fx.prev_gain_pit[i] = hLPDmem->dm_fx.prev_gain_pit[i]; + move16(); } /* back up gc_threshold for noise addition */ hRF->rf_gc_threshold = hLPDmem->gc_threshold; - + move32(); /* initialization */ tcx_lpc_cdk = 0; @@ -233,7 +237,7 @@ void core_encode_openloop_fx( /* calculate target bits for core coding */ target_bits = sub( target_bits, st->rf_target_bits_write ); } - IF( st->igf != 0 ) + if ( st->igf != 0 ) { target_bits = sub( target_bits, get_tbe_bits_fx( st->total_brate, st->bwidth, st->rf_mode ) ); } @@ -249,7 +253,7 @@ void core_encode_openloop_fx( } /* reset TBE buffers previous frame frame wasn't ACELP*/ - IF( NE_16( st->last_core, ACELP_CORE ) ) + IF( st->last_core != ACELP_CORE ) { TBEreset_enc_fx( st, st->bwidth ); } @@ -261,6 +265,7 @@ void core_encode_openloop_fx( IF( st->hPlcExt ) { st->glr_idx[0] = encSideSpecPowDiffuseDetector_fx( st->hPlcExt->last_lsf_ref, st->hPlcExt->last_lsf_con, st->last_sr_core, &( st->prev_lsf4_mean ), (Word8) st->glr, coder_type ); + move16(); Copy( lsf_q, st->hPlcExt->last_lsf_ref, M ); Copy( st->hPlcExt->lsf_con, st->hPlcExt->last_lsf_con, M ); @@ -272,12 +277,11 @@ void core_encode_openloop_fx( st->glr_idx[0] = 0; } move16(); - move16(); st->hPlcExt->LPDmem = hLPDmem; encoderSideLossSimulation_fx( st, st->hPlcExt, lsf_q, stab_fac, st->hPlcExt->calcOnlylsf, st->L_frame ); } st->last_stab_fac = stab_fac; - + move16(); hTcxEnc->tcxltp_norm_corr_past = st->voicing_fx[1]; move16(); @@ -355,7 +359,7 @@ void core_encode_openloop_fx( /* Account for core signaling bits difference: bandwidth and ACELP/TCX signaling bit are replaced */ target_bits = add( target_bits, sub( add( FrameSizeConfig[st->frame_size_index].bandwidth_bits, 1 ), signalling_mode1_tcx20_enc_fx( st, 0 ) ) ); } - ELSE if ( EQ_16( st->mdct_sw_enable, MODE2 ) ) + ELSE IF( EQ_16( st->mdct_sw_enable, MODE2 ) ) { target_bits = sub( target_bits, 1 ); } @@ -396,7 +400,6 @@ void core_encode_openloop_fx( coder_tcx_post_fx( st, hLPDmem, st->hTcxCfg, st->synth, A_q, Aw, st->wspeech_enc, Q_new, shift ); - move16(); IF( st->hPlcExt ) { st->hPlcExt->LPDmem = hLPDmem; @@ -430,7 +433,9 @@ void core_encode_openloop_fx( lsf_update_memory( st->narrowBand, st->lsf_old_fx, st->mem_MA_fx, st->mem_MA_fx, M ); st->pstreaklen = 0; - st->streaklimit_fx = 32767; + st->streaklimit_fx = 32767; /*1 Q15*/ + move16(); + move16(); /* check resonance for pitch clipping algorithm */ gp_clip_test_lsf_fx( st->element_mode, st->lsf_old_fx, st->clip_var_fx, 0 ); Copy( st->lsf_old_fx, st->mem_AR_fx, M ); @@ -447,6 +452,8 @@ void core_encode_openloop_fx( *--------------------------------------------------------------*/ IF( st->Opt_DTX_ON != 0 ) { + test(); + test(); /* update CNG parameters in active frames */ IF( EQ_16( st->bwidth, NB ) && st->enableTcxLpc && st->core != ACELP_CORE ) { @@ -454,6 +461,7 @@ void core_encode_openloop_fx( assert( st->L_frame == L_FRAME ); Copy( st->synth + L_FRAME - L_LP, buf, L_LP ); tmp = st->synth[L_FRAME - L_LP - 1]; + move16(); E_UTIL_f_preemph2( Q_new - 1, buf, st->preemph_fac, L_LP, &tmp ); autocorr_fx( buf, M, r_h, r_l, &Q_r, L_LP, Assym_window_W16fx, 0, 0 ); lag_wind( r_h, r_l, M, INT_FS_FX, LAGW_WEAK ); @@ -501,12 +509,13 @@ void core_encode_openloop_fx( *---------------------------------------------------------------*/ test(); - IF( GT_16( st->core, ACELP_CORE ) || ( st->rate_switching_reset != 0 ) ) + IF( ( st->core > ACELP_CORE ) || ( st->rate_switching_reset != 0 ) ) { /*TCX mode: copy values*/ set16_fx( st->mem_bpf_fx.noise_buf, 0, 2 * L_FILT16k ); /*TCX->no gain*/ set16_fx( st->mem_bpf_fx.error_buf, 0, L_FILT16k ); /*TCX->no gain*/ st->bpf_gain_param = 0; + move16(); } ELSE IF( st->acelp_cfg.bpf_mode > 0 ) { @@ -532,7 +541,7 @@ void core_encode_openloop_fx( IF( EQ_16( st->rf_mode, 1 ) ) { set16_fx( lsf_q_1st_rf, 0, M ); - IF( EQ_16( st->core, ACELP_CORE ) ) + IF( st->core == ACELP_CORE ) { /* convert LSPs to LP coefficients */ lsp2lsf_fx( lsp_new, lsf_uq_rf, M, st->sr_core ); @@ -541,6 +550,7 @@ void core_encode_openloop_fx( /* first stage VQ, 8 bits; reuse TCX high rate codebook */ hRF->rf_indx_lsf[0][0] = vlpc_1st_cod_fx( lsf_uq_rf, lsf_q_1st_rf, w_rf, st->rf_mode ); + move16(); /*v_sub(lsf_uq_rf, lsf_q_1st_rf, lsf_q_d_rf, M);*/ FOR( i = 0; i < M; i++ ) { @@ -549,6 +559,7 @@ void core_encode_openloop_fx( #else lsf_q_d_rf[i] = shl( mult_r( sub( lsf_uq_rf[i], lsf_q_1st_rf[i] ), 25600 ), 5 ); #endif + move16(); /*input value is in Qx2.56, convert to Q6 to match table, quantizer table kept at Q6 to avoid losing precision */ /*Assume this difference data max range can be represented by Q6*/ } @@ -567,6 +578,7 @@ void core_encode_openloop_fx( tmp = lsf_q_diff_cb_8b_rf[i + M * hRF->rf_indx_lsf[0][1]]; /*tmp = quantized lsf_q_d_rf in Q6*/ tmp = shr( mult_r( tmp, 20972 ), 4 ); /* bring lsf_q_d_rf to Qx2.56 for addition */ lsf_q_rf[i] = add( lsf_q_1st_rf[i], tmp ); + move16(); } v_sort( lsf_q_rf, 0, M - 1 ); @@ -584,9 +596,12 @@ void core_encode_openloop_fx( hRF->rf_indx_lsf[0][0] = param_lpc[1]; hRF->rf_indx_lsf[0][1] = param_lpc[2]; hRF->rf_indx_lsf[0][2] = param_lpc[3]; + move16(); + move16(); + move16(); } - IF( EQ_16( st->core, ACELP_CORE ) ) + IF( st->core == ACELP_CORE ) { /* current n-th ACELP frame and its corresponding partial copy */ @@ -608,8 +623,9 @@ void core_encode_openloop_fx( /* RF frame type in the buffer */ hRF->rf_indx_frametype[0] = hRF->rf_frame_type; hRF->rf_targetbits_buff[0] = hRF->rf_target_bits; - - IF( NE_16( hRF->rf_frame_type, RF_NO_DATA ) ) + move16(); + move16(); + IF( hRF->rf_frame_type != RF_NO_DATA ) { /* coder_acelp_rf_fx does the partial copy encoding based on the rf frame type chosen for the RF encoding */ coder_acelp_rf_fx( &( hRF->acelp_cfg_rf ), coder_type, Aw, Aq_rf, st->speech_enc_pe, st->voicing_fx, st->pitch, @@ -651,7 +667,7 @@ void core_encode_openloop_fx( test(); test(); test(); - IF( + if ( ( EQ_16( st->core, TCX_20_CORE ) ) /*(st->core == TCX_20_CORE)*/ && ( EQ_16( st->last_core, TCX_20_CORE ) ) /*&&(st->last_core == TCX_20_CORE)*/ && ( EQ_16( hRF->rf_second_last_core, TCX_20_CORE ) ) /*&& (st->rf_second_last_core == TCX_20_CORE)*/ @@ -687,10 +703,12 @@ void core_encode_openloop_fx( LT_16( hRF->rf_gain_tcx[0], mult_r( hRF->rf_gain_tcx[1], 29491 /*0.90f Q15*/ ) ) ) ) { TD_mode = 0; + move16(); } ELSE { TD_mode = 1; + move16(); } /* updates */ @@ -743,6 +761,7 @@ static void closest_centroid_rf( Word32 L_tmp, best_werr, werr; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif ind_vec[0] = 0; @@ -782,12 +801,12 @@ static void closest_centroid_rf( void core_acelp_tcx20_switching_fx( Encoder_State *st, /* i/o: encoder state structure */ - Word16 non_staX, /* i : unbound non-stationarity for sp/mu clas */ - Word16 *pitch_fr, /* i/o: fraction pitch values */ - Word16 *voicing_fr, /* i/o: fractional voicing values */ - const Word16 currFlatness, /* i : flatness */ - const Word16 lsp_mid[M], /* i : LSPs at the middle of the frame */ - const Word16 stab_fac, /* i : LP filter stability */ + Word16 non_staX, /* i : unbound non-stationarity for sp/mu clas Q8*/ + Word16 *pitch_fr, /* i/o: fraction pitch values Q6*/ + Word16 *voicing_fr, /* i/o: fractional voicing values Q15*/ + const Word16 currFlatness, /* i : flatness Q7*/ + const Word16 lsp_mid[M], /* i : LSPs at the middle of the frame Q15*/ + const Word16 stab_fac, /* i : LP filter stability Q15*/ Word16 Q_new, Word16 shift ) { @@ -818,6 +837,7 @@ void core_acelp_tcx20_switching_fx( SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif /* Check minimum pitch for quantization */ @@ -920,7 +940,7 @@ void core_acelp_tcx20_switching_fx( Copy( hTcxEnc->speech_ltp + sub( tcx_offset, shr( overlap, 1 ) ), xn_buf, add( L_frame, overlap ) ); tmp16 = shr( overlap, 1 ); - IF( EQ_16( st->last_core, ACELP_CORE ) ) + IF( st->last_core == ACELP_CORE ) { IF( tcx_offset < 0 ) { @@ -958,7 +978,7 @@ void core_acelp_tcx20_switching_fx( tmp16 = mult_r( shl( L_frame, 5 ), 29309 /*16*0.0559017 Q15*/ ); /* L_frame / sqrt(2*NORM_MDCT_FACTOR); Q9 */ FOR( i = 0; i < L_frame; i++ ) { - x[i] = Mpy_32_16_1( x[i], tmp16 ); + x[i] = Mpy_32_16_1( x[i], tmp16 ); // Q(31-(x_e+6)) move32(); } x_e = add( x_e, 6 ); @@ -971,7 +991,7 @@ void core_acelp_tcx20_switching_fx( IF( st->narrowBand ) { - j = mult( L_frame, 20480 ); + j = mult( L_frame, 20480 /* .625 Q15*/ ); set32_fx( &x[j], 0, sub( L_frame, j ) ); } @@ -1220,7 +1240,7 @@ void core_acelp_tcx20_switching_fx( if ( ( GT_16( snr_acelp, snr_tcx ) ) && ( LT_16( snr_acelp, add( snr_tcx, 512 /*2.0f Q8*/ ) ) ) && #ifdef BASOP_NOGLOB - ( LT_16( add_o( st->prevTempFlatness_fx, currFlatness, &Overflow ), 416 /*3.25f Q7*/ ) || EQ_16( stab_fac, 0x7fff ) || + ( LT_16( add_o( st->prevTempFlatness_fx, currFlatness, &Overflow ), 416 /*3.25f Q7*/ ) || EQ_16( stab_fac, 0x7fff /*1 Q15*/ ) || #else /* BASOP_NOGLOB */ ( LT_16( add( st->prevTempFlatness_fx, currFlatness ), 416 /*3.25f Q7*/ ) || EQ_16( stab_fac, 0x7fff ) || #endif @@ -1269,7 +1289,7 @@ void core_acelp_tcx20_switching_fx( test(); test(); test(); - if ( EQ_32( st->sr_core, INT_FS_12k8 ) && ( offset_tcx < 0x18950F ) && GT_16( non_staX, 1280 /*5.0f Q8*/ ) && ( snr_acelp >= snr_tcx - 1024 /*4.0f in Q8*/ ) && GE_16( st->acelpFramesCount, 1 ) && ( ( GT_16( hSpMusClas->lps_fx, hSpMusClas->lpm_fx ) && GE_16( tmp16, 9830 ) ) || ( GE_16( st->acelpFramesCount, 6 ) && ( hSpMusClas->lps_fx > hSpMusClas->lpm_fx - 768 ) ) ) && ( st->sp_aud_decision0 == 0 ) && st->vad_flag != 0 ) + if ( EQ_32( st->sr_core, INT_FS_12k8 ) && ( offset_tcx < 0x18950F ) && GT_16( non_staX, 1280 /*5.0f Q8*/ ) && ( GE_16( snr_acelp, sub( snr_tcx, 1024 /*4.0f in Q8*/ ) ) ) && GE_16( st->acelpFramesCount, 1 ) && ( ( GT_16( hSpMusClas->lps_fx, hSpMusClas->lpm_fx ) && GE_16( tmp16, 9830 ) ) || ( GE_16( st->acelpFramesCount, 6 ) && ( GT_16( hSpMusClas->lps_fx, sub( hSpMusClas->lpm_fx, 768 ) ) ) ) ) && ( st->sp_aud_decision0 == 0 ) && st->vad_flag != 0 ) { /* Fine tuned across various databases based on various metrics to detect TCX frames in speech.*/ dsnr = 1024; @@ -1296,15 +1316,17 @@ void core_acelp_tcx20_switching_fx( test(); test(); if ( EQ_32( st->sr_core, INT_FS_12k8 ) && ( LT_16( non_staX, 512 /*2.0f Q8*/ ) || ( st->flag_noisy_speech_snr == 0 && EQ_16( st->vad_flag, 1 ) && ( offset_tcx == L_add( 0xFFD57AB5, 0 ) ) && GE_16( st->acelpFramesCount, 6 ) ) ) && - ( st->last_core == ACELP_CORE || st->last_core == TCX_20_CORE ) ) + ( st->last_core == ACELP_CORE || EQ_16( st->last_core, TCX_20_CORE ) ) ) { st->core = st->last_core; + move16(); } ELSE IF( GT_16( add( snr_acelp, dsnr ), snr_tcx ) ) { st->core = ACELP_CORE; move16(); st->acelpFramesCount = s_min( 32767 - 1, add( st->acelpFramesCount, 1 ) ); + move16(); } ELSE { @@ -1356,12 +1378,13 @@ void core_acelp_tcx20_switching_fx( *-------------------------------------------------------------------*/ static void BITS_ALLOC_ACELP_config_rf( const Word16 coder_type, - Word16 *tilt_code, + Word16 *tilt_code, // Q15 Word16 *rf_frame_type, Word16 *rf_target_bits, Word16 nb_subfr, Word16 rf_fec_indicator, - Word16 *pitch_buf ) + Word16 *pitch_buf // Q6 +) { Word16 mean_tc, min_tilt_code, max_tilt_code; Word16 nrgMode, ltfMode, ltpMode, gainsMode; @@ -1417,6 +1440,10 @@ static void BITS_ALLOC_ACELP_config_rf( IF( EQ_16( rf_fec_indicator, 1 ) ) { + test(); + test(); + test(); + test(); test(); test(); test(); @@ -1434,6 +1461,10 @@ static void BITS_ALLOC_ACELP_config_rf( } ELSE { + test(); + test(); + test(); + test(); test(); test(); test(); @@ -1456,41 +1487,52 @@ static void BITS_ALLOC_ACELP_config_rf( /* rf_mode, 1 bit */ *rf_target_bits = add( *rf_target_bits, 1 ); - + move16(); /* rf_fec_offset 2 bits */ *rf_target_bits = add( *rf_target_bits, 2 ); - + move16(); /* rf_frame_type, 3 bits */ *rf_target_bits = add( *rf_target_bits, 3 ); - + move16(); /* LSF bits 8 + 8 bits */ *rf_target_bits = add( *rf_target_bits, 16 ); - + move16(); /* Intialize the RF mode frame type to all-pred */ *rf_frame_type = RF_ALLPRED; - + move16(); + test(); + test(); + test(); test(); - IF( EQ_16( coder_type, INACTIVE ) || en_partial_red == 0 ) + IF( coder_type == INACTIVE || en_partial_red == 0 ) { *rf_frame_type = RF_NO_DATA; + move16(); } - ELSE IF( EQ_16( coder_type, UNVOICED ) || EQ_16( coder_type, INACTIVE ) ) + ELSE IF( EQ_16( coder_type, UNVOICED ) || ( coder_type == INACTIVE ) ) { *rf_frame_type = RF_NELP; + move16(); } ELSE IF( EQ_16( coder_type, GENERIC ) && LT_16( max_tilt_code, 1638 /*0.05f Q15*/ ) ) { *rf_frame_type = RF_NOPRED; + move16(); } ELSE IF( EQ_16( coder_type, GENERIC ) && LT_16( mean_tc, 9830 /*0.3f Q15*/ ) ) { *rf_frame_type = RF_GENPRED; + move16(); } nrgMode = ACELP_NRG_MODE[1][1][*rf_frame_type]; ltfMode = ACELP_LTF_MODE[1][1][*rf_frame_type]; ltpMode = ACELP_LTP_MODE[1][1][*rf_frame_type]; gainsMode = ACELP_GAINS_MODE[1][1][*rf_frame_type]; + move16(); + move16(); + move16(); + move16(); /* Number of RF bits for different RF coder types */ SWITCH( *rf_frame_type ) @@ -1499,6 +1541,7 @@ static void BITS_ALLOC_ACELP_config_rf( /* Es_pred bits 3 bits, LTF: 1, pitch: 8,5,5,5, FCB: 0, gain: 7,0,7,0, Diff GFr: 4*/ *rf_target_bits += ( ACELP_NRG_BITS[nrgMode] + ACELP_LTF_BITS[ltfMode] + ACELP_LTP_BITS_SFR[ltpMode][0] + ACELP_LTP_BITS_SFR[ltpMode][1] + ACELP_LTP_BITS_SFR[ltpMode][2] + ACELP_LTP_BITS_SFR[ltpMode][3] + ACELP_GAINS_BITS[gainsMode] + ACELP_GAINS_BITS[gainsMode] + 2 /*2 bits for PartialCopy GainFrame*/ ); + move16(); BREAK; case RF_NOPRED: @@ -1506,6 +1549,7 @@ static void BITS_ALLOC_ACELP_config_rf( /*bits += (3 + 0 + 0 + 28 + 12 + 2); */ /* 64 rf bits */ *rf_target_bits += ( ACELP_NRG_BITS[nrgMode] + ACELP_LTF_BITS[ltfMode] + 28 + ACELP_GAINS_BITS[gainsMode] + ACELP_GAINS_BITS[gainsMode] + 2 /*2 bits for PartialCopy GainFrame*/ ); + move16(); BREAK; case RF_GENPRED: @@ -1513,16 +1557,19 @@ static void BITS_ALLOC_ACELP_config_rf( /*bits += (3 + 0 + 16 + 23 + 10 + 0); */ /* 72 rf bits */ *rf_target_bits += ( ACELP_NRG_BITS[nrgMode] + ACELP_LTF_BITS[ltfMode] + ACELP_LTP_BITS_SFR[ltpMode][0] + ACELP_LTP_BITS_SFR[ltpMode][1] + ACELP_LTP_BITS_SFR[ltpMode][2] + ACELP_LTP_BITS_SFR[ltpMode][3] + 14 + ACELP_GAINS_BITS[gainsMode] + ACELP_GAINS_BITS[gainsMode] + 2 /*2 bits for PartialCopy GainFrame*/ ); + move16(); BREAK; case RF_NELP: /* gain: 19, Diff GFr: 5 */ /*bits += (19 + 5); */ *rf_target_bits += ( 19 + NUM_BITS_SHB_FRAMEGAIN ); + move16(); BREAK; case RF_NO_DATA: *rf_target_bits = 6; + move16(); BREAK; default: @@ -1553,7 +1600,7 @@ static void BITS_ALLOC_TCX_config_rf( move16(); test(); - IF( EQ_16( coder_type, INACTIVE ) || EQ_16( last_core, ACELP_CORE ) ) + IF( ( coder_type == INACTIVE ) || ( last_core == ACELP_CORE ) ) { *rf_frame_type = RF_NO_DATA; move16(); diff --git a/lib_enc/core_enc_updt.c b/lib_enc/core_enc_updt.c index 92a2a7b9e..95f1c67b6 100644 --- a/lib_enc/core_enc_updt.c +++ b/lib_enc/core_enc_updt.c @@ -74,7 +74,7 @@ void core_encode_update_ivas_fx( test(); IF( ( st->core == ACELP_CORE ) || EQ_16( st->core, AMR_WB_CORE ) || EQ_32( st->core_brate, SID_2k40 ) || ( st->core_brate == FRAME_NO_DATA ) ) { - Word16 max_e = s_max( st->hTcxEnc->exp_buf_speech_ltp, hTcxEnc->exp_buf_speech_ltp ); + Word16 max_e = s_max( st->hTcxEnc->exp_buf_speech_ltp, st->exp_buf_speech_enc ); Scale_sig( hTcxEnc->buf_speech_ltp, L_PAST_MAX_32k + L_FRAME32k + L_NEXT_MAX_32k, negate( sub( max_e, st->hTcxEnc->exp_buf_speech_ltp ) ) ); // Q(31-max_e) Copy_Scale_sig( st->buf_speech_enc + st->L_frame, hTcxEnc->buf_speech_ltp + st->L_frame, st->L_frame, negate( sub( max_e, st->exp_buf_speech_enc ) ) ); // Q(31-max_e) st->hTcxEnc->exp_buf_speech_ltp = max_e; diff --git a/lib_enc/ext_sig_ana_fx.c b/lib_enc/ext_sig_ana_fx.c index a53e0199c..ae88362c0 100644 --- a/lib_enc/ext_sig_ana_fx.c +++ b/lib_enc/ext_sig_ana_fx.c @@ -1395,8 +1395,6 @@ void core_signal_analysis_high_bitrate_ivas_fx( { Word16 q_spectrum = sub( Q31, hTcxEnc->spectrum_e[frameno] ); Word16 q_powerSpec = sub( Q31, powerSpec_e ); - st->hIGFEnc->spec_be_igf_e = hTcxEnc->spectrum_e[frameno]; - move16(); ProcessIGF_ivas_fx( st, hTcxEnc->spectrum_fx[frameno], hTcxEnc->spectrum_fx[frameno], &q_spectrum, powerSpec, &q_powerSpec, transform_type[frameno] == TCX_20, frameno, 0, vad_hover_flag ); } } diff --git a/lib_enc/igf_enc.c b/lib_enc/igf_enc.c index 4fa98f20b..b7a218b63 100644 --- a/lib_enc/igf_enc.c +++ b/lib_enc/igf_enc.c @@ -2661,13 +2661,18 @@ void IGFEncApplyStereo_fx( void IGFSaveSpectrumForITF_ivas_fx( IGF_ENC_INSTANCE_HANDLE hIGFEnc, /* i/o: instance handle of IGF Encoder */ const Word16 igfGridIdx, /* i : IGF grid index */ - const Word32 *pITFSpectrum /* i : MDCT spectrum */ -) + const Word32 *pITFSpectrum, /* i : MDCT spectrum */ + Word16 exp_pITFSpectrum ) { IGF_UpdateInfo( hIGFEnc, igfGridIdx ); Copy32( pITFSpectrum + IGF_START_MN, hIGFEnc->spec_be_igf, sub( hIGFEnc->infoStopLine, IGF_START_MN ) ); + scale_sig32( hIGFEnc->spec_be_igf, sub( hIGFEnc->infoStopLine, IGF_START_MN ), sub( exp_pITFSpectrum, s_max( exp_pITFSpectrum, hIGFEnc->spec_be_igf_e ) ) ); + scale_sig32( hIGFEnc->spec_be_igf + sub( hIGFEnc->infoStopLine, IGF_START_MN ), sub( N_MAX_TCX - IGF_START_MN, sub( hIGFEnc->infoStopLine, IGF_START_MN ) ), sub( hIGFEnc->spec_be_igf_e, s_max( exp_pITFSpectrum, hIGFEnc->spec_be_igf_e ) ) ); + hIGFEnc->spec_be_igf_e = s_max( exp_pITFSpectrum, hIGFEnc->spec_be_igf_e ); + move16(); + return; } diff --git a/lib_enc/ivas_core_enc.c b/lib_enc/ivas_core_enc.c index c7daec818..a68f0ad46 100644 --- a/lib_enc/ivas_core_enc.c +++ b/lib_enc/ivas_core_enc.c @@ -366,7 +366,6 @@ ivas_error ivas_core_enc( Scale_sig( st->hTcxEnc->Txnq, L_FRAME32k / 2 + 64, sub( -1, st->hTcxEnc->q_Txnq ) ); st->hTcxEnc->q_Txnq = -Q1; move16(); - Q_spec_old = hTcxEnc->spectrum_long_e; move16(); /* TCX core encoder */ @@ -645,12 +644,6 @@ ivas_error ivas_core_enc( st->q_old_inp = st->q_inp = 0; move16(); move16(); - - if ( st->hTcxEnc != NULL ) - { - st->hTcxEnc->exp_buf_speech_ltp = 31; // Q0 - move16(); - } IF( st->hBWE_FD != NULL ) { Scale_sig( st->hBWE_FD->L_old_wtda_swb_fx, L_FRAME48k, sub( -1, st->Q_old_wtda ) ); @@ -842,10 +835,6 @@ ivas_error ivas_core_enc( st->hBWE_FD->prev_Q_input_lp = sub( Q_new[n], 1 ); move16(); } - IF( st->hTcxEnc != NULL ) - { - Scale_sig( st->hTcxEnc->buf_speech_ltp, L_PAST_MAX_32k + L_FRAME32k + L_NEXT_MAX_32k, negate( st->hTcxEnc->exp_buf_speech_ltp ) ); // Q0 - } } /*------------------------------------------------------------------* diff --git a/lib_enc/ivas_core_pre_proc_front.c b/lib_enc/ivas_core_pre_proc_front.c index e53515637..6c5abdb13 100644 --- a/lib_enc/ivas_core_pre_proc_front.c +++ b/lib_enc/ivas_core_pre_proc_front.c @@ -247,7 +247,7 @@ ivas_error pre_proc_front_ivas_fx( Word16 realBuffer16[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX]; Word16 imagBuffer16[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX]; Word16 res_cod_SNR_M_fx_e[STEREO_DFT_BAND_MAX]; - Word16 band_energies_fx_exp, Qfact_PS, q_lf_E_fx; + Word16 Qfact_PS, q_lf_E_fx; #ifdef DEBUG_MODE_INFO Word32 *in_buff_temp; Word16 in_q_temp; @@ -755,45 +755,21 @@ ivas_error pre_proc_front_ivas_fx( Word16 *lgBin_E_fx; Word32 PS_fx[128]; Word32 band_energies_fx[2 * NB_BANDS]; + Word16 q_band_energies; lgBin_E_fx = NULL; if ( st != NULL ) { lgBin_E_fx = &st->lgBin_E_fx[0]; } - Word16 Scale_fac[2] = { 0 }; - move16(); - move16(); - - Word32 Le_min_scaled = L_shr_r( L_add( L_shr( E_MIN_FXQ15, sub( 14, add( *Q_new, QSCALE - 2 ) ) ), 1 ), 1 ); - ivas_analy_sp_fx_front( element_mode, hCPE, input_Fs, inp_12k8_fx /*old_inp_12k8_e*/, *Q_new, fr_bands_fx, &fr_bands_fx_q, lf_E_fx, &q_lf_E_fx, &Etot_fx, st->min_band, st->max_band, Le_min_scaled, Scale_fac, st->Bin_E_fx, &st->q_Bin_E, st->Bin_E_old_fx, &st->q_Bin_E_old, PS_fx, &Qfact_PS, lgBin_E_fx, band_energies_fx, &band_energies_fx_exp, fft_buff_fx /*, Q_inp_dmx*/ ); - - Word16 min_q = MAX_16, fft_q[2]; - Word16 i_sbfr; - FOR( i_sbfr = 0; i_sbfr < 2; i_sbfr++ ) - { - fft_q[i_sbfr] = add( *Q_new, Scale_fac[i_sbfr] ); - move16(); - min_q = s_min( min_q, fft_q[i_sbfr] ); - } - FOR( i_sbfr = 0; i_sbfr < 2; i_sbfr++ ) - { - Scale_sig( fft_buff_fx + i_sbfr * L_FFT, L_FFT, sub( min_q, fft_q[i_sbfr] ) ); - } - *fft_buff_fx_q = min_q; - move16(); + ivas_analy_sp_fx( element_mode, hCPE, input_Fs, inp_12k8_fx /*old_inp_12k8_e*/, *Q_new, fr_bands_fx, &fr_bands_fx_q, lf_E_fx, &q_lf_E_fx, + &Etot_fx, st->min_band, st->max_band, st->Bin_E_fx, &st->q_Bin_E, st->Bin_E_old_fx, &st->q_Bin_E_old, PS_fx, &Qfact_PS, + lgBin_E_fx, band_energies_fx, &q_band_energies, fft_buff_fx, fft_buff_fx_q ); Word16 Q_bands0 = 0, Q_bands1 = 0; move16(); move16(); - Scale_sig32( st->hNoiseEst->bckr_fx, NB_BANDS, sub( add( *Q_new, QSCALE ), st->hNoiseEst->q_bckr ) ); - st->hNoiseEst->q_bckr = add( *Q_new, QSCALE ); - move16(); - Scale_sig32( st->hNoiseEst->enrO_fx, NB_BANDS, sub( add( *Q_new, QSCALE ), st->hNoiseEst->q_enrO ) ); - st->hNoiseEst->q_enrO = add( *Q_new, QSCALE ); - move16(); - IF( hStereoClassif != NULL ) { IF( GT_32( sub( st->lp_speech_fx, Etot_fx ), 25 << Q8 ) ) /*Q8*/ @@ -832,7 +808,24 @@ ivas_error pre_proc_front_ivas_fx( move16(); } - st->vad_flag = wb_vad_ivas_fx( st, fr_bands_fx, &i, &i, &i, &snr_sum_he_fx, &localVAD_HE_SAD, &( st->flag_noisy_speech_snr ), *Q_new, NULL, NULL, -MAX_16, -MAX_16 ); //-100000f == max 16bit float + Word16 scale = add( L_norm_arr( st->hNoiseEst->bckr_fx, NB_BANDS ), st->hNoiseEst->q_bckr ); + scale = s_min( scale, add( L_norm_arr( st->hNoiseEst->enrO_fx, NB_BANDS ), st->hNoiseEst->q_enrO ) ); + scale = s_min( scale, fr_bands_fx_q ); + + scale_sig32( st->hNoiseEst->bckr_fx, NB_BANDS, sub( scale, st->hNoiseEst->q_bckr ) ); + st->hNoiseEst->q_bckr = scale; + move16(); + + scale_sig32( st->hNoiseEst->enrO_fx, NB_BANDS, sub( scale, st->hNoiseEst->q_enrO ) ); + st->hNoiseEst->q_enrO = scale; + move16(); + + scale_sig32( fr_bands_fx, 2 * NB_BANDS, sub( scale, fr_bands_fx_q ) ); + fr_bands_fx_q = scale; + move16(); + + st->vad_flag = wb_vad_ivas_fx( st, fr_bands_fx, fr_bands_fx_q, &i, &i, &i, &snr_sum_he_fx, &localVAD_HE_SAD, + &( st->flag_noisy_speech_snr ), NULL, NULL, -MAX_16, -MAX_16 ); //-100000f == max 16bit float move16(); test(); @@ -918,10 +911,11 @@ ivas_error pre_proc_front_ivas_fx( * Correlation correction as a function of total noise level *----------------------------------------------------------------*/ - Le_min_scaled = L_shr_r( L_add( L_shr( E_MIN_FXQ15, sub( 14, add( *Q_new, QSCALE ) ) ), 1 ), 1 ); - - noise_est_down_fx( fr_bands_fx, st->hNoiseEst->bckr_fx, tmpN_fx, tmpE_fx, st->min_band, st->max_band, &st->hNoiseEst->totalNoise_fx, Etot_fx, &st->hNoiseEst->Etot_last_fx, &st->hNoiseEst->Etot_v_h2_fx, *Q_new, Le_min_scaled ); - st->hNoiseEst->q_bckr = add( *Q_new, QSCALE ); + noise_est_down_ivas_fx( fr_bands_fx, fr_bands_fx_q, st->hNoiseEst->bckr_fx, tmpN_fx, tmpE_fx, st->min_band, st->max_band, + &st->hNoiseEst->totalNoise_fx, Etot_fx, &st->hNoiseEst->Etot_last_fx, &st->hNoiseEst->Etot_v_h2_fx ); + q_tmpN = fr_bands_fx_q; + q_tmpE = fr_bands_fx_q; + move16(); move16(); test(); @@ -950,8 +944,7 @@ ivas_error pre_proc_front_ivas_fx( move16(); move16(); } - q_tmpN = q_tmpE = add( *Q_new, QSCALE ); - move16(); + *relE_fx = sub( Etot_fx, st->lp_speech_fx ); move16(); @@ -1006,7 +999,7 @@ ivas_error pre_proc_front_ivas_fx( move16(); shift = getScaleFactor32( band_energies_fx, 2 * NB_BANDS ); scale_sig32( band_energies_fx, 2 * NB_BANDS, shift ); - band_energies_fx_exp = sub( band_energies_fx_exp, shift ); + q_band_energies = add( q_band_energies, shift ); zero_flag = get_zero_flag( st->hFdCngEnc->msPeriodog_fx, NPART ); IF( zero_flag ) @@ -1040,7 +1033,7 @@ ivas_error pre_proc_front_ivas_fx( st->hFdCngEnc->hFdCngCom->exp_cldfb_periodog = sub( 31, normmsperiodog ); move16(); - perform_noise_estimation_enc_ivas_fx( band_energies_fx, band_energies_fx_exp, enerBuffer_fx, *enerBuffer_fx_exp, st->hFdCngEnc, input_Fs, hCPE ); + perform_noise_estimation_enc_ivas_fx( band_energies_fx, sub( Q31, q_band_energies ), enerBuffer_fx, *enerBuffer_fx_exp, st->hFdCngEnc, input_Fs, hCPE ); } } } diff --git a/lib_enc/ivas_cpe_enc.c b/lib_enc/ivas_cpe_enc.c index be6223e23..bb3babe74 100644 --- a/lib_enc/ivas_cpe_enc.c +++ b/lib_enc/ivas_cpe_enc.c @@ -245,7 +245,7 @@ ivas_error ivas_cpe_enc_fx( Word16 Q_inp = 0; move16(); - Word16 band_ener_guardbits = find_guarded_bits_fx( 2 * NB_BANDS ); + Word16 Q_add = 2; move16(); Word16 front_create_flag = 0; @@ -270,8 +270,12 @@ ivas_error ivas_cpe_enc_fx( } } - - IF( NE_32( ( error = front_vad_fx( hCPE, NULL, hEncoderConfig, &hCPE->hFrontVad[0], st_ivas->hMCT != NULL, input_frame, vad_flag_dtx, fr_bands_fx, Etot_LR_fx, lf_E_fx, localVAD_HE_SAD, vad_hover_flag, band_energies_LR_fx, NULL, NULL, Q_inp, Q_buffer, Q_add, &front_create_flag ) ), IVAS_ERR_OK ) ) + Word16 q_band_energies_LR; + Word16 q_fr_bands[2], q_lf_E[2]; + IF( NE_32( ( error = front_vad_fx( hCPE, NULL, hEncoderConfig, &hCPE->hFrontVad[0], st_ivas->hMCT != NULL, input_frame, vad_flag_dtx, + fr_bands_fx, q_fr_bands, Etot_LR_fx, lf_E_fx, q_lf_E, localVAD_HE_SAD, vad_hover_flag, band_energies_LR_fx, + &q_band_energies_LR, NULL, NULL, NULL, Q_inp, Q_buffer, Q_add, &front_create_flag ) ), + IVAS_ERR_OK ) ) { return error; } @@ -370,7 +374,8 @@ ivas_error ivas_cpe_enc_fx( sts[0]->q_old_inp = q_inp; move16(); Scale_sig( sts[0]->buf_speech_enc, L_PAST_MAX_32k + L_FRAME32k + L_NEXT_MAX_32k, sub( q_inp, sub( Q15, sts[0]->exp_buf_speech_enc ) ) ); - + sts[0]->exp_buf_speech_enc = sub( Q15, q_inp ); + move16(); stereo_switching_enc_fx( hCPE, sts[0]->old_input_signal_fx, input_frame, q_inp ); /*----------------------------------------------------------------* @@ -400,17 +405,15 @@ ivas_error ivas_cpe_enc_fx( FOR( n = 0; n < CPE_CHANNELS; n++ ) { - Scale_sig( sts[n]->input_fx, input_frame, sub( -1, sts[n]->q_inp ) ); - sts[n]->q_inp = -1; - move16(); Copy( sts[n]->input_fx, orig_input_fx[n], input_frame ); - Q_orig_inp[n] = sts[n]->q_inp; + Scale_sig( orig_input_fx[n], input_frame, sub( -1, sts[n]->q_inp ) ); + Q_orig_inp[n] = -1; move16(); IF( hCPE->hStereoICBWE != NULL ) { hCPE->hStereoICBWE->dataChan_fx[n] = &orig_input_fx[n][0]; - hCPE->hStereoICBWE->q_dataChan_fx = sts[n]->q_inp; + hCPE->hStereoICBWE->q_dataChan_fx = Q_orig_inp[n]; move16(); } } @@ -803,8 +806,8 @@ ivas_error ivas_cpe_enc_fx( error = pre_proc_front_ivas_fx( NULL, hCPE, hCPE->element_brate, nb_bits_metadata, input_frame, n, old_inp_12k8_16fx[n], old_inp_16k_16fx[n], &ener_fx[n], &relE_fx[n], A_fx[n], Aw_fx[n], epsP_fx[n], &epsP_fx_q[n], lsp_new_fx[n], lsp_mid_fx[n], &vad_hover_flag[n], &attack_flag[n], realBuffer_fx[n], imagBuffer_fx[n], &q_re_im_buf[n], old_wsp_fx[n], &q_old_wsp, pitch_fr_fx[n], voicing_fr_fx[n], &loc_harm[n], &cor_map_sum_fx[n], &vad_flag_dtx[n], enerBuffer_fx[n], &enerBuffer_fx_exp[n], - fft_buff_fx[n], &fft_buff_fx_q[n], A_fx[0], lsp_new_fx[0], currFlatness_fx[n], tdm_ratio_idx, fr_bands_fx, add( Q_buffer[n], QSCALE ), Etot_LR_fx, lf_E_fx, add( Q_buffer[n], QSCALE - 2 ), localVAD_HE_SAD, - band_energies_LR_fx, sub( add( Q_buffer[n], QSCALE ), band_ener_guardbits ), 0, st_ivas->hSpar != NULL ? st_ivas->hSpar->front_vad_flag : 0, 0, 0, ivas_format, st_ivas->hMCT != NULL, ivas_total_brate, &Q_new[n] + fft_buff_fx[n], &fft_buff_fx_q[n], A_fx[0], lsp_new_fx[0], currFlatness_fx[n], tdm_ratio_idx, fr_bands_fx, q_fr_bands[n], Etot_LR_fx, lf_E_fx, q_lf_E[n], localVAD_HE_SAD, + band_energies_LR_fx, q_band_energies_LR, 0, st_ivas->hSpar != NULL ? st_ivas->hSpar->front_vad_flag : 0, 0, 0, ivas_format, st_ivas->hMCT != NULL, ivas_total_brate, &Q_new[n] #ifdef DEBUG_MODE_INFO , ( st_ivas->nSCE + ( cpe_id * CPE_CHANNELS ) + n ) diff --git a/lib_enc/ivas_enc.c b/lib_enc/ivas_enc.c index 1c715d516..e7c693329 100644 --- a/lib_enc/ivas_enc.c +++ b/lib_enc/ivas_enc.c @@ -106,9 +106,12 @@ ivas_error ivas_enc_fx( FOR( n = 0; n < MAX_INPUT_CHANNELS + MAX_NUM_OBJECTS; n++ ) { data_fx[n] = st_ivas->p_data_fx[n]; + IF( data_fx[n] ) + { + Scale_sig32( data_fx[n], input_frame, sub( Q11, st_ivas->q_data_fx ) ); + } } - - st_ivas->q_data_fx = 11; // Q-factor of the input buffer + st_ivas->q_data_fx = Q11; move16(); n = 0; move16(); diff --git a/lib_enc/ivas_front_vad.c b/lib_enc/ivas_front_vad.c index 16e1d5008..e89aac118 100644 --- a/lib_enc/ivas_front_vad.c +++ b/lib_enc/ivas_front_vad.c @@ -59,13 +59,17 @@ ivas_error front_vad_fx( const Word16 MCT_flag, /* i : hMCT handle allocated (1) or not (0) */ const Word16 input_frame, /* i : frame length */ Word16 vad_flag_dtx[], /* o : HE-SAD flag with additional DTX HO */ - Word32 fr_bands_fx[][2 * NB_BANDS], /* o : energy in frequency bands Q_buffer[n] + QSCALE + 2 */ + Word32 fr_bands_fx[][2 * NB_BANDS], /* o : energy in frequency bands q_fr_bands_fx */ + Word16 q_fr_bands[], /* o : Q of fr_bands_fx Q0 */ Word16 Etot_LR_fx[], /* o : total energy Left & Right channel Q8 */ - Word32 lf_E_fx[][2 * VOIC_BINS], /* i : per bin spectrum energy in lf, LR channels Q_buffer[n] + QSCALE */ + Word32 lf_E_fx[][2 * VOIC_BINS], /* i : per bin spectrum energy in lf, LR channels q_lf_E */ + Word16 q_lf_E[], /* o : Q of lf_E_fx */ Word16 localVAD_HE_SAD[], /* o : HE-SAD flag without hangover, LR channels */ Word16 vad_hover_flag[], /* o : VAD hangover flag */ - Word32 band_energies_LR_fx[2 * NB_BANDS], /* o : energy in critical bands without minimum noise floor E_MIN Q_buffer[1] + QSCALE + 2 - band_ener_guardbits*/ - Word32 *PS_out_fx, /* o : energy spectrum Q_buffer + QSCALE */ + Word32 band_energies_LR_fx[2 * NB_BANDS], /* o : energy in critical bands without minimum noise floor E_MIN q_band_energies_LR */ + Word16 *q_band_energies_LR, /* o : Q of band_energies_LR_fx */ + Word32 *PS_out_fx, /* o : energy spectrum q_PS_out */ + Word16 *q_PS_out, /* o : Q of PS_out_fx Q0 */ Word16 *Bin_E_out_fx, /* o : log-energy spectrum of the current frame Q7 */ Word16 Q_inp, Word16 *Q_buffer, @@ -86,11 +90,14 @@ ivas_error front_vad_fx( Word16 dummy_short; Word16 element_mode, last_element_mode; ivas_error error; - Word16 Q_new = 0, band_ener_guardbits = 0; + Word16 Q_new; Word16 Qband, mem_decim_size; error = IVAS_ERR_OK; push_wmops( "front_vad" ); + Q_new = 0; + move16(); + lgBin_E_fx = NULL; if ( st != NULL ) { @@ -198,7 +205,10 @@ ivas_error front_vad_fx( move16(); } } - band_ener_guardbits = find_guarded_bits_fx( 2 * NB_BANDS ); + + Word16 q_Bin_E, q_Bin_E_old; + Word16 q_band_energies; + Word16 q_fft_buffLR; FOR( n = 0; n < n_chan; n++ ) { FRONT_VAD_ENC_HANDLE hFrontVad; @@ -219,33 +229,75 @@ ivas_error front_vad_fx( PREEMPH_FX( hFrontVad->buffer_12k8_fx + L_FFT / 2, PREEMPH_FAC, L_FRAME, &hFrontVad->mem_preemph_fx ); - Word16 Scale_fac[2]; Q_new = add( sub( Q_inp, Qband ), Q_add ); Scale_sig( hFrontVad->buffer_12k8_fx, L_FFT / 2, Q_new - Q_buffer[n] ); Scale_sig( hFrontVad->buffer_12k8_fx + L_FFT / 2, 384 - L_FFT / 2, Q_new - add( Q_inp, Qband ) ); Q_buffer[n] = Q_new; - Word32 Le_min_scaled = L_shr_r( L_add( L_shr( E_MIN_FXQ15, sub( 14, add( Q_new, QSCALE ) ) ), 1 ), 1 ); - Le_min_scaled = L_shl( Le_min_scaled, 2 ); - ivas_analy_sp_fx( IVAS_CPE_TD, hCPE, sts[0]->input_Fs, hFrontVad->buffer_12k8_fx + L_FFT / 2 - 3 * ( L_SUBFR / 2 ), Q_new, fr_bands_fx[n], lf_E_fx[n], &Etot_LR_fx[n], sts[0]->min_band, sts[0]->max_band, Le_min_scaled, Scale_fac, Bin_E_fx, Bin_E_old_fx, PS_fx, lgBin_E_fx, band_energies_fx, fft_buffLR_fx ); + move16(); + + ivas_analy_sp_fx( IVAS_CPE_TD, hCPE, sts[0]->input_Fs, hFrontVad->buffer_12k8_fx + L_FFT / 2 - 3 * ( L_SUBFR / 2 ), Q_new, fr_bands_fx[n], + &q_fr_bands[n], lf_E_fx[n], &q_lf_E[n], &Etot_LR_fx[n], sts[0]->min_band, sts[0]->max_band, Bin_E_fx, &q_Bin_E, Bin_E_old_fx, + &q_Bin_E_old, PS_fx, q_PS_out, lgBin_E_fx, band_energies_fx, &q_band_energies, fft_buffLR_fx, &q_fft_buffLR ); + if ( n == 0 ) + { + *q_band_energies_LR = q_band_energies; + move16(); + } + + /* v_add( &band_energies[0], &band_energies_LR[0], &band_energies_LR[0], 2 * NB_BANDS ); */ + IF( EQ_16( n, 1 ) ) + { + IF( LT_16( *q_band_energies_LR, q_band_energies ) ) + { + scale_sig32( band_energies_fx, 2 * NB_BANDS, sub( *q_band_energies_LR, q_band_energies ) ); // q_band_energies_LR + } + ELSE + { + scale_sig32( band_energies_LR_fx, 2 * NB_BANDS, sub( q_band_energies, *q_band_energies_LR ) ); // q_band_energies_LR + *q_band_energies_LR = q_band_energies; + move16(); + } + } + + v_add_fx( &band_energies_fx[0], &band_energies_LR_fx[0], &band_energies_LR_fx[0], 2 * NB_BANDS ); // q_band_energies_LR /* add up energies for later calculating average of channel energies */ - // Scale_sig32( &band_energies_LR_fx[0], ( Q_new + QSCALE + 2 ) - ( Q_new_old + QSCALE + 2 - band_ener_guardbits ), 40 ); Q_new_old = Q_new; - v_add_fixed( &band_energies_fx[0], &band_energies_LR_fx[0], &band_energies_LR_fx[0], 2 * NB_BANDS, band_ener_guardbits ); + move16(); Word32 Etot_fx = L_deposit_h( Etot_LR_fx[n] ); noise_est_pre_32fx( Etot_fx, hFrontVads[0]->ini_frame, hFrontVad->hNoiseEst, 0, 0, 0 ); /* wb_vad */ - Scale_sig32( hFrontVads[n]->hNoiseEst->bckr_fx, NB_BANDS, sub( add( Q_new, QSCALE ), hFrontVads[n]->hNoiseEst->q_bckr ) ); - hFrontVads[n]->hNoiseEst->q_bckr = add( Q_new, QSCALE ); + Word16 scale = getScaleFactor32( hFrontVads[n]->hNoiseEst->bckr_fx, NB_BANDS ); + scale_sig32( hFrontVads[n]->hNoiseEst->bckr_fx, NB_BANDS, scale ); + hFrontVads[n]->hNoiseEst->q_bckr = add( hFrontVads[n]->hNoiseEst->q_bckr, scale ); + move16(); + + scale = getScaleFactor32( hFrontVads[n]->hNoiseEst->enrO_fx, NB_BANDS ); + scale_sig32( hFrontVads[n]->hNoiseEst->enrO_fx, NB_BANDS, scale ); + hFrontVads[n]->hNoiseEst->q_enrO = add( hFrontVads[n]->hNoiseEst->q_enrO, scale ); + move16(); + + scale = s_min( q_fr_bands[n], s_min( hFrontVads[n]->hNoiseEst->q_bckr, hFrontVads[n]->hNoiseEst->q_enrO ) ); + + scale_sig32( hFrontVads[n]->hNoiseEst->bckr_fx, NB_BANDS, sub( scale, hFrontVads[n]->hNoiseEst->q_bckr ) ); + hFrontVads[n]->hNoiseEst->q_bckr = scale; + move16(); + + scale_sig32( hFrontVads[n]->hNoiseEst->enrO_fx, NB_BANDS, sub( scale, hFrontVads[n]->hNoiseEst->q_enrO ) ); + hFrontVads[n]->hNoiseEst->q_enrO = scale; move16(); - Scale_sig32( hFrontVads[n]->hNoiseEst->enrO_fx, NB_BANDS, sub( add( Q_new, QSCALE ), hFrontVads[n]->hNoiseEst->q_enrO ) ); - hFrontVads[n]->hNoiseEst->q_enrO = add( Q_new, QSCALE ); + + scale_sig32( fr_bands_fx[n], 2 * NB_BANDS, sub( scale, q_fr_bands[n] ) ); + q_fr_bands[n] = scale; move16(); - hFrontVad->hVAD->vad_flag = wb_vad_ivas_fx( sts[n], fr_bands_fx[n], &dummy, &dummy, &dummy, &snr_sum_he_fx, &localVAD_HE_SAD[n], &dummy_short, Q_new, hFrontVad->hVAD, hFrontVad->hNoiseEst, hFrontVad->lp_speech_fx, hFrontVad->lp_noise_fx ); + + hFrontVad->hVAD->vad_flag = wb_vad_ivas_fx( sts[n], fr_bands_fx[n], q_fr_bands[n], &dummy, &dummy, &dummy, &snr_sum_he_fx, + &localVAD_HE_SAD[n], &dummy_short, hFrontVad->hVAD, hFrontVad->hNoiseEst, + hFrontVad->lp_speech_fx, hFrontVad->lp_noise_fx ); test(); test(); @@ -267,10 +319,12 @@ ivas_error front_vad_fx( IF( EQ_16( n_chan, CPE_CHANNELS ) ) { /* get average channel energies, adding up was already done, so only need to scale by number of channels */ - v_multc_fixed( &band_energies_LR_fx[0], ONE_IN_Q30, &band_energies_LR_fx[0], 2 * NB_BANDS ); + v_multc_fixed( &band_energies_LR_fx[0], ONE_IN_Q30 /* 0.5f in Q31 */, &band_energies_LR_fx[0], 2 * NB_BANDS ); /* Logical OR between L and R decisions */ + test(); vad_flag_dtx[0] = vad_flag_dtx[0] || vad_flag_dtx[1]; + move16(); } IF( sts[0]->hFdCngEnc != NULL ) { @@ -492,6 +546,7 @@ ivas_error front_vad_spar_fx( Word16 tmp; Word16 old_pitch; ivas_error error; + Word16 Qfact_PS; push_wmops( "front_vad_SPAR" ); error = IVAS_ERR_OK; @@ -585,7 +640,12 @@ ivas_error front_vad_spar_fx( Scale_sig( hFrontVad->mem_decim_fx, 2 * L_FILT_MAX, sub( Q_inp, hFrontVad->q_mem_decim ) ); hFrontVad->q_mem_decim = Q_inp; move16(); - IF( NE_32( ( error = front_vad_fx( NULL, st, hEncoderConfig, &hFrontVad, 0 /* MCT_flag */, input_frame, vad_flag_dtx, fr_bands_fx, Etot_fx, lf_E_fx, localVAD_HE_SAD, vad_hover_flag, band_energies_fx, &PS_fx[0], &st->lgBin_E_fx[0], Q_inp, &Q_buffer, Q_add, &front_create_flag ) ), IVAS_ERR_OK ) ) + Word16 q_band_energies; + Word16 q_fr_bands[2], q_lf_E[2]; + IF( NE_32( ( error = front_vad_fx( NULL, st, hEncoderConfig, &hFrontVad, 0 /* MCT_flag */, input_frame, vad_flag_dtx, fr_bands_fx, + q_fr_bands, Etot_fx, lf_E_fx, q_lf_E, localVAD_HE_SAD, vad_hover_flag, band_energies_fx, + &q_band_energies, PS_fx, &Qfact_PS, &st->lgBin_E_fx[0], Q_inp, &Q_buffer, Q_add, &front_create_flag ) ), + IVAS_ERR_OK ) ) { return error; } @@ -595,11 +655,9 @@ ivas_error front_vad_spar_fx( Q_inp_12k8 = hFrontVad->q_buffer_12k8; move16(); - IF( st->lgBin_E_fx != NULL ) - { - Copy_Scale_sig_16_32( st->lgBin_E_fx, st->Bin_E_fx, L_FFT / 2, sub( st->q_Bin_E, Q7 ) ); - } - Scale_sig32( fr_bands_fx[0], 40, sub( ( Q_bands + QSCALE ), add( Q_buffer, QSCALE + 2 ) ) ); + Scale_sig32( fr_bands_fx[0], 40, sub( add( Q_bands, QSCALE ), q_fr_bands[0] ) ); // Q_bands+QSCALE + q_fr_bands[0] = add( Q_bands, QSCALE ); + move16(); Word32 e_min_scaled; e_min_scaled = L_shr_r( L_add( L_shr( E_MIN_FXQ15, sub( 14, add( Q_bands, QSCALE ) ) ), 1 ), 1 ); @@ -677,7 +735,9 @@ ivas_error front_vad_spar_fx( } Q_bands = Q9; move16(); - Scale_sig32( lf_E_fx[0], 148, ( Q_bands + QSCALE - 2 ) - ( Q_buffer + QSCALE ) ); + Scale_sig32( lf_E_fx[0], 148, sub( add( Q_bands, QSCALE - 2 ), q_lf_E[0] ) ); // Q_bands+QSCALE-2 + q_lf_E[0] = add( Q_bands, QSCALE - 2 ); + move16(); noise_est_ivas_fx( st, old_pitch, tmpN_fx, epsP_h, epsP_l, Etot_fx[0], Etot_fx[0] - hFrontVad->lp_speech_fx, corr_shift_fx, tmpE_fx, fr_bands_fx[0], &cor_map_sum_fx, NULL, &sp_div_fx, &Q_sp_div, &non_staX_fx, &loc_harm, lf_E_fx[0], &hFrontVad->hNoiseEst->harm_cor_cnt, hFrontVad->hNoiseEst->Etot_l_lp_fx, hFrontVad->hNoiseEst->Etot_v_h2_fx, &hFrontVad->hNoiseEst->bg_cnt, st->lgBin_E_fx, Q_bands, e_min_scaled, &sp_floor, S_map_fx, NULL, hFrontVad, hFrontVad->ini_frame ); MVR2R_WORD16( st->pitch, st->pitch, 3 ); @@ -689,7 +749,7 @@ ivas_error front_vad_spar_fx( Word16 Etot_fx_0 = Etot_fx[0]; move16(); scale = getScaleFactor32( PS_fx, 128 ); - Word16 Qfact_PS = add( add( Q_buffer, QSCALE ), scale ); + Qfact_PS = add( Qfact_PS, scale ); Scale_sig32( PS_fx, 128, scale ); ivas_smc_gmm_fx( st, NULL, localVAD_HE_SAD[0], Etot_fx_0, lsp_new_fx, cor_map_sum_fx, epsP_fx, PS_fx, non_sta_fx, relE_fx, &high_lpn_flag, flag_spitch, Qfact_PS, Q_esp, hSpMusClas->past_PS_Q ); diff --git a/lib_enc/ivas_init_enc.c b/lib_enc/ivas_init_enc.c index 2a516ea28..16052d5e5 100644 --- a/lib_enc/ivas_init_enc.c +++ b/lib_enc/ivas_init_enc.c @@ -507,7 +507,10 @@ ivas_error ivas_init_encoder( { return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for floating-point input audio buffer!\n" ) ); } + set32_fx( st_ivas->p_data_fx[n], 0, extract_l( Mpy_32_16_1( input_Fs, INV_FRAME_PER_SEC_Q15 ) ) ); } + st_ivas->q_data_fx = Q11; + move16(); FOR( ; n < MAX_INPUT_CHANNELS + MAX_NUM_OBJECTS; n++ ) { st_ivas->p_data_fx[n] = NULL; diff --git a/lib_enc/ivas_mct_enc.c b/lib_enc/ivas_mct_enc.c index a43be5c22..4d032cee8 100644 --- a/lib_enc/ivas_mct_enc.c +++ b/lib_enc/ivas_mct_enc.c @@ -551,19 +551,7 @@ ivas_error ivas_mct_enc_fx( /* common encoder updates */ st = hCPE->hCoreCoder[n]; - IF( st->hTcxEnc != NULL ) - { - st->hTcxEnc->exp_buf_speech_ltp = 31; /*Q0*/ - } - - updt_enc_common_ivas_fx( st, Q_new_out[cpe_id][n] ); - - - IF( st->hTcxEnc != NULL ) - { - Scale_sig( st->hTcxEnc->buf_speech_ltp, L_PAST_MAX_32k + L_FRAME32k + L_NEXT_MAX_32k, st->hTcxEnc->exp_buf_speech_ltp ); // st->hTcxEnc->exp_buf_speech_ltp - } } } diff --git a/lib_enc/ivas_mct_enc_mct.c b/lib_enc/ivas_mct_enc_mct.c index 30c45b5ef..ea916c673 100644 --- a/lib_enc/ivas_mct_enc_mct.c +++ b/lib_enc/ivas_mct_enc_mct.c @@ -1085,13 +1085,8 @@ void mctStereoIGF_enc_fx( move16(); } - p_st[0]->hIGFEnc->spec_be_igf_e = sub( 31, q_origSpec ); - p_st[1]->hIGFEnc->spec_be_igf_e = sub( 31, q_origSpec ); - move16(); - move16(); - ProcessStereoIGF_fx( hMCT->hBlockData[b]->hStereoMdct, p_st, hMCT->hBlockData[b]->mask, - p_orig_spectrum_fx, + p_orig_spectrum_fx, q_origSpec, q_origSpec, p_powerSpec_fx, p_powerSpecMsInv_fx, p_inv_spectrum_fx, n, sp_aud_decision0[ch1], p_st[0]->total_brate ); } ELSE diff --git a/lib_enc/ivas_rom_enc.h b/lib_enc/ivas_rom_enc.h index e859dc5e6..501965bfa 100644 --- a/lib_enc/ivas_rom_enc.h +++ b/lib_enc/ivas_rom_enc.h @@ -48,16 +48,16 @@ extern const Word32 unclr_mean_td[]; extern const Word32 unclr_scale_td[]; extern const Word32 unclr_coef_td[]; -extern const int16_t xtalk_isel_td[]; +extern const Word16 xtalk_isel_td[]; extern const Word32 xtalk_mean_td[]; extern const Word32 xtalk_scale_td[]; extern const Word32 xtalk_coef_td[]; -extern const int16_t xtalk_isel_dft[]; +extern const Word16 xtalk_isel_dft[]; extern const Word32 xtalk_mean_dft_q15[]; extern const Word32 xtalk_scale_dft_q15[]; extern const Word32 xtalk_coef_dft_q30[]; -extern const int16_t unclr_isel_dft[]; +extern const Word16 unclr_isel_dft[]; extern const Word32 unclr_mean_dft_Q15[]; extern const Word32 unclr_scale_dft_Q15[]; extern const Word32 unclr_coef_dft_Q28[]; @@ -75,9 +75,8 @@ extern const Word32 icbwe_regressionValuesDFT_fx[]; * DFT stereo ROM tables *----------------------------------------------------------------------------------*/ -extern const int16_t itd_vad_band_tbl[]; +extern const Word16 itd_vad_band_tbl[]; extern const Word32 ild_q_Q25[16]; -extern const int16_t ild_q[]; extern const Word32 Wn_table_fx[50]; extern const Word16 win_ana_8k_fx[STEREO_DFT_OVL_8k]; @@ -101,12 +100,12 @@ extern const Word32 ari_bit_estimate_s17_LC_fx[RANGE_N_CONTEXT][RANGE_N_SYMBOLS] /*----------------------------------------------------------------------------------* * ECLVQ Stereo ROM tables *----------------------------------------------------------------------------------*/ -extern const int16_t log2_1px_table[65]; +extern const Word16 log2_1px_table[65]; extern const Word32 log2TB_Q31[ECSQ_log2TB_SIZE]; extern const Word16 ECSQ_log2_fact_Q10[1 + ECSQ_SEGMENT_SIZE]; -extern const uint16_t ECSQ_tab_param[ECSQ_CONFIG_COUNT][1 + ECSQ_PARAM_COUNT]; -extern const uint16_t *const ECSQ_tab_abs_lsbs[1 + 4]; -extern const uint16_t ECSQ_tab_vals[ECSQ_PARAM_COUNT - 1][1 + ECSQ_TAB_VALS_SIZE]; +extern const UWord16 ECSQ_tab_param[ECSQ_CONFIG_COUNT][1 + ECSQ_PARAM_COUNT]; +extern const UWord16 *const ECSQ_tab_abs_lsbs[1 + 4]; +extern const UWord16 ECSQ_tab_vals[ECSQ_PARAM_COUNT - 1][1 + ECSQ_TAB_VALS_SIZE]; /*----------------------------------------------------------------------------------* @@ -123,7 +122,7 @@ extern const Word32 Stereo_dmx_wnd_coef_48k_fx[L_FRAME48k]; extern const HUFF_TABLE huff_alpha_table; extern const HUFF_TABLE huff_beta_table; -extern const int16_t mc_paramupmix_fb_remix_order[4]; +extern const Word16 mc_paramupmix_fb_remix_order[4]; /*----------------------------------------------------------------------------------* * ParamMC ROM tables diff --git a/lib_enc/ivas_rom_enc.c b/lib_enc/ivas_rom_enc_fx.c similarity index 98% rename from lib_enc/ivas_rom_enc.c rename to lib_enc/ivas_rom_enc_fx.c index 2326c5c8c..e4361b933 100644 --- a/lib_enc/ivas_rom_enc.c +++ b/lib_enc/ivas_rom_enc_fx.c @@ -67,7 +67,7 @@ const Word32 unclr_coef_td[SIZE_UNCLR_ISEL_TD] = /*Q15*/ }; /* UNCLR classifier in DFT stereo: list of selected features */ -const int16_t unclr_isel_dft[SIZE_UNCLR_ISEL_DFT] = +const Word16 unclr_isel_dft[SIZE_UNCLR_ISEL_DFT] = { E_gainILD, E_gainIPD, E_angle_rot, E_g_pred, E_cohSNR, E_d_prodL_prodR, E_sum_xcorr, E_xcorr_itd_value }; @@ -90,7 +90,7 @@ const Word32 unclr_coef_dft_Q28[SIZE_UNCLR_ISEL_DFT] = }; /* xtalk clasifier in TD stereo: list of selected features */ -const int16_t xtalk_isel_td[SIZE_XTALK_ISEL_TD] = +const Word16 xtalk_isel_td[SIZE_XTALK_ISEL_TD] = { E_d_clas, E_d_voicing, E_sum_d_LSF, E_d_lepsP_13, E_d_cor_map_sum, E_d_nchar, E_d_non_sta, E_d_sp_div, E_sum_prod, E_tdm_es_em, E_m_corrL_corrR, E_corrEst0, E_corrEst_ncorr, E_corrLagStats0, E_ica_corr_value0, E_diff_corrLM_corrRM, E_tdm_LT_es_em }; @@ -108,7 +108,7 @@ const Word32 xtalk_coef_td[SIZE_XTALK_ISEL_TD] = /*Q15*/ { 1841, 4353, 3322, -5411, 1061, 2716, -2453, 1046, 45199, -51474, -2431, -2245, 2194, -542, -135853, 99, 18138 }; -const int16_t xtalk_isel_dft[SIZE_XTALK_ISEL_DFT] = +const Word16 xtalk_isel_dft[SIZE_XTALK_ISEL_DFT] = { E_clas, E_gainILD, E_gainIPD, E_angle_rot, E_g_pred, E_d_prodL_prodR, E_sum_xcorr, E_xcorr_itd_value, E_gphat_d_itd2, E_gphat_ratio_m1_m2, E_gphat_m2_m2 }; @@ -127,8 +127,6 @@ const Word32 xtalk_coef_dft_q30[SIZE_XTALK_ISEL_DFT] = /*----------------------------------------------------------------------------------* * Stereo IC-BWE ROM tables *----------------------------------------------------------------------------------*/ -#define FLOAT_2_FIX_Q30(a) ((Word32)(a * 1024.0 * 1024.0 * 1024.0)) -#define FLOAT_2_FIX_Q31(a) ((Word32)(a * 1024.0 * 1024.0 * 1024.0 * 2.0)) /* Q30Value >> Q5 */ const Word32 icbwe_thr_TDM_fx[7] = {-1318855552 >> 5, -1667671424 >> 5, -1072553216 >> 5, 920192448 >> 5, 897910144 >> 5, 936654016 >> 5, @@ -148,8 +146,8 @@ const Word32 icbwe_regressionValuesDFT_fx[8] = {65221224, 333933696, 1127643648, * DFT stereo ROM tables *----------------------------------------------------------------------------------*/ -/* DFT stereo ITD ROM table */ -const int16_t itd_vad_band_tbl[STEREO_DFT_ITD_VAD_BAND_NUM+1] = +/* DFT stereo ITD ROM table Q0*/ +const Word16 itd_vad_band_tbl[STEREO_DFT_ITD_VAD_BAND_NUM+1] = { 5, 8, 11, 16, 21, 26, 30, 37, 43, 51, 59, 69, 80, 93, 107, 126, 147, 176, 211, 254, 320 @@ -159,10 +157,6 @@ const Word32 ild_q_Q25[16] = { 0, 67108864, 134217728, 201326592, 268435456, 335544320, 436207616, 536870912, 637534208, 738197504, 838860800, 1006632960, 1174405120, 1342177280, 1509949440, 1677721600 }; -const int16_t ild_q[16] = -{ - 0,2,4,6,8,10,13,16,19,22,25,30,35,40,45,50 -}; /* table of values of the analysis window cross-correlation function at 32kHz (stride 8) */ const Word32 Wn_table_fx[50] = /*Q31*/ @@ -176,14 +170,14 @@ const Word32 Wn_table_fx[50] = /*Q31*/ 1208316416, 1184339328 }; - +//Q15 const Word16 win_ana_8k_fx[STEREO_DFT_OVL_8k] = { 3471, 6011, 7759, 9178, 10404, 11497, 12492, 13411, 14268, 15072, 15832, 16554, 17241, 17898, 18527, 19131, 19711, 20271, 20810, 21331, 21834, 22320, 22790, 23245, 23686, 24113, 24526, 24927, 25315, 25691, 26055, 26407, 26749, 27079, 27399, 27708, 28007, 28295, 28574, 28843, 29102, 29352, 29592, 29823, 30045, 30257, 30461, 30656, 30841, 31018, 31186, 31345, 31496, 31638, 31772, 31897, 32013, 32121, 32221, 32312, 32395, 32470, 32536, 32594, 32643, 32684, 32717, 32742, 32759, 32767 }; - +//Q15 const Word16 win_ana_12k8_fx[STEREO_DFT_OVL_12k8] = { 2744, 4753, 6135, 7259, 8229, 9096, 9887, 10618, 11300, 11943, 12552, 13131, 13685, 14216, 14726, 15218, 15693, 16152, 16598, 17030, 17449, 17857, 18255, 18642, 19019, 19388, 19747, 20098, 20441, 20777, 21105, 21426, 21741, 22048, 22350, @@ -193,7 +187,7 @@ const Word16 win_ana_12k8_fx[STEREO_DFT_OVL_12k8] = { 31714, 31796, 31874, 31949, 32020, 32088, 32153, 32215, 32273, 32328, 32380, 32429, 32474, 32516, 32555, 32590, 32622, 32652, 32677, 32700, 32719, 32735, 32748, 32758, 32764, 32767 }; - +//Q15 const Word16 win_ana_16k_fx[STEREO_DFT_OVL_16k] = { 2454, 4251, 5488, 6493, 7361, 8137, 8845, 9500, 10112, 10688, 11234, 11754, 12251, 12729, 13188, 13631, 14059, 14473, 14875, 15266, 15646, 16016, 16377, 16729, 17072, 17408, 17736, 18057, 18372, 18680, 18982, 19278, 19568, 19853, 20133, @@ -205,7 +199,7 @@ const Word16 win_ana_16k_fx[STEREO_DFT_OVL_16k] = { 32375, 32414, 32452, 32487, 32520, 32551, 32580, 32607, 32632, 32654, 32675, 32693, 32710, 32724, 32737, 32747, 32755, 32762, 32766, 32767 }; - +//Q15 const Word16 win_ana_32k_fx[STEREO_DFT_OVL_32k] = { 1735, 3006, 3881, 4591, 5206, 5755, 6257, 6720, 7154, 7563, 7951, 8320, 8674, 9013, 9341, 9657, 9962, 10259, 10547, 10827, 11100, 11366, 11626, 11881, 12129, 12373, 12611, 12845, 13075, 13300, 13521, 13739, 13953, 14164, 14371, 14575, @@ -225,7 +219,7 @@ const Word16 win_ana_32k_fx[STEREO_DFT_OVL_32k] = { 32637, 32649, 32660, 32670, 32680, 32689, 32698, 32706, 32714, 32721, 32728, 32734, 32740, 32745, 32749, 32753, 32757, 32760, 32763, 32765, 32766, 32767, 32767, }; - +//Q15 const Word16 win_ana_48k_fx[STEREO_DFT_OVL_MAX] = { 1417, 2454, 3168, 3749, 4251, 4699, 5109, 5488, 5842, 6176, 6493, 6795, 7084, 7361, 7629, 7887, 8137, 8380, 8616, 8845, 9069, 9287, 9500, 9708, 9912, 10112, 10307, 10500, 10688, 10873, 11055, 11234, 11410, 11583, 11754, 11922, 12088, 12251, @@ -253,6 +247,7 @@ const Word16 win_ana_48k_fx[STEREO_DFT_OVL_MAX] = { 32699, 32705, 32710, 32715, 32720, 32724, 32729, 32733, 32737, 32740, 32744, 32747, 32750, 32753, 32755, 32758, 32760, 32762, 32763, 32765, 32766, 32767, 32767, 32767, 32767 }; +//Q31 const Word32 win_syn_8k_fx[STEREO_DFT_OVL_8k] = { 2552069, 13257920, 28512140, 47194820, 68734296, 92758192, 118992928, 147223104, 177270272, 208980864, 242218768, 276861760, 312796896, 349919776, 388131456, 427338304, 467450272, 508381120, 550047232, 592367232, 635262144, 678654848, 722469312, 766631488, 811068480, 855708672, 900481344, 945317568, 990149120, @@ -261,7 +256,7 @@ const Word32 win_syn_8k_fx[STEREO_DFT_OVL_8k] = { 1980701440, 2002483840, 2022827776, 2041707136, 2059098624, 2074979712, 2089330176, 2102132224, 2113369344, 2123027840, 2131095296, 2137561472, 2142418176, 2145659392, 2147280896 }; - +//Q31 const Word32 win_syn_12k8_fx[STEREO_DFT_OVL_12k8] = { 1261002, 6551972, 14094794, 23341214, 34014852, 45938972, 58986220, 73058896, 88077968, 103977504, 120700824, 138198304, 156425504, 175342256, 194911840, 215100336, 235876160, 257209056, 279070880, 301434752, 324274976, 347566784, 371285952, 395409728, 419915296, 444781024, 469985376, 495507360, 521326560, @@ -273,7 +268,7 @@ const Word32 win_syn_12k8_fx[STEREO_DFT_OVL_12k8] = { 2062192000, 2072117888, 2081447424, 2090175872, 2098299008, 2105812608, 2112713344, 2118997504, 2124662144, 2129704192, 2134121728, 2137912064, 2141073664, 2143604608, 2145504128, 2146770944, 2147404416 }; - +//Q31 const Word32 win_syn_16k_fx[STEREO_DFT_OVL_16k] = { 902372, 4688386, 10086516, 16705275, 24347740, 32888712, 42238640, 52328808, 63104236, 74519400, 86535432, 99118824, 112239520, 125870888, 139988880, 154570928, 169597088, 185047600, 200904832, 217152048, 233772928, 250752224, 268075536, 285728512, 303697792, 321970304, 340533344, 359374528, 378481760, 397843456, @@ -287,7 +282,7 @@ const Word32 win_syn_16k_fx[STEREO_DFT_OVL_16k] = { 2092676480, 2099077632, 2105088640, 2110707584, 2115932544, 2120762112, 2125194240, 2129228160, 2132862336, 2136095360, 2138926592, 2141355136, 2143379968, 2145000704, 2146216576, 2147027584, 2147432960 }; - +//Q31 const Word32 win_syn_32k_fx[STEREO_DFT_OVL_32k] = { 319116, 1657642, 3566755, 5907942, 8612268, 11636355, 14948419, 18525698, 22348862, 26402882, 30674656, 35153448, 39829380, 44694072, 49740016, 54960336, 60348800, 65899616, 71607840, 77468328, 83476984, 89629312, 95921440, 102349712, 108910496, 115600336, 122416448, 129355608, 136415040, 143591712, 150883056, 158286720, @@ -311,7 +306,7 @@ const Word32 win_syn_32k_fx[STEREO_DFT_OVL_32k] = { 2126240128, 2128257280, 2130174336, 2131991296, 2133708160, 2135324800, 2136840960, 2138256512, 2139571712, 2140785920, 2141899136, 2142911616, 2143823104, 2144633472, 2145342592, 2145950592, 2146457344, 2146862848, 2147166848, 2147369600, 2147470976 }; - +//Q31 const Word32 win_syn_48k_fx[STEREO_DFT_OVL_MAX] = { 173731, 902372, 1941540, 3216071, 4688386, 6334862, 8138533, 10086516, 12168931, 14377618, 16705275, 19146106, 21695168, 24347740, 27099956, 29947948, 32888712, 35919240, 39036744, 42238640, 45522788, 48886820, 52328808, 55846816, 59439128, 63104236, 66840212, 70645768, 74519400, 78459600, 82465520, 86535432, 90668480, @@ -345,7 +340,7 @@ const Word32 win_syn_48k_fx[STEREO_DFT_OVL_MAX] = { 2133984512, 2135062400, 2136095360, 2137083776, 2138027648, 2138926592, 2139780992, 2140590464, 2141355136, 2142075008, 2142749952, 2143379968, 2143965184, 2144505344, 2145000704, 2145451008, 2145856256, 2146216576, 2146531840, 2146802304, 2147027584, 2147207680, 2147342720, 2147432960, 2147478016 }; - +//Q31 const Word32 win_mdct_8k_fx[STEREO_DFT_OVL_8k] = { 24094122, 72270552, 120410264, 168489632, 216483968, 264369408, 312121728, 359716832, 407130912, 454339904, 501320192, 548047936, 594499712, 640652160, 686482048, 731966336, 777081984, 821806336, 866117120, 909991488, 953407808, 996343936, 1038778432, 1080689792, 1122057216, 1162859392, 1203076224, 1242687104, 1281672320, @@ -360,7 +355,7 @@ const Word32 win_mdct_8k_fx[STEREO_DFT_OVL_8k] = { *----------------------------------------------------------------------------------*/ /* approximation table for log2(1 + x) in Q10 format, with x in [0, 1] in Q6 format */ -const int16_t log2_1px_table[65] = +const Word16 log2_1px_table[65] = { 0, 23, 45, 68, 90, 111, 132, 153, 174, 194, 214, 234, 254, 273, 292, 311, 330, 348, 366, 384, 402, 419, 436, 454, 470, 487, 504, 520, 536, 552, 568, 584, @@ -376,7 +371,8 @@ const Word16 ECSQ_log2_fact_Q10[1 + ECSQ_SEGMENT_SIZE] = { 0, 0, 1024, 2647, 4695, 7072, 9719, 12594, 15666 }; -const uint16_t ECSQ_tab_param[ECSQ_CONFIG_COUNT][1 + ECSQ_PARAM_COUNT] = +//Q0 +const UWord16 ECSQ_tab_param[ECSQ_CONFIG_COUNT][1 + ECSQ_PARAM_COUNT] = { { 16384, 15360, 14336, 13312, 12288, 11264, 10240, 9216, 8192, 7168, 6144, 5120, 4096, 3072, 2048, 1024, 0 }, /* un-optimized (reserved) */ { 16384, 7090, 365, 171, 73, 38, 21, 13, 9, 7, 6, 5, 4, 3, 2, 1, 0 }, /* 34 kbps target SNR */ @@ -386,8 +382,8 @@ const uint16_t ECSQ_tab_param[ECSQ_CONFIG_COUNT][1 + ECSQ_PARAM_COUNT] = { 16384, 6940, 633, 186, 74, 38, 21, 13, 9, 7, 6, 5, 4, 3, 2, 1, 0 }, /* 56 kbps target SNR */ { 16384, 10548, 1936, 774, 117, 41, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 } /* 56 kbps target bits */ }; - -const uint16_t ECSQ_tab_vals[ECSQ_PARAM_COUNT - 1][1 + ECSQ_TAB_VALS_SIZE] = +//Q0 +const UWord16 ECSQ_tab_vals[ECSQ_PARAM_COUNT - 1][1 + ECSQ_TAB_VALS_SIZE] = { { 16384, 9939, 3659, 1349, 499, 186, 71, 29, 14, 8, 6, 5, 4, 3, 2, 1, 0 }, /* param = 0 */ { 16384, 12760, 7739, 4694, 2847, 1727, 1048, 636, 386, 234, 142, 86, 52, 32, 20, 12, 0 }, /* param = 1 */ @@ -407,31 +403,32 @@ const uint16_t ECSQ_tab_vals[ECSQ_PARAM_COUNT - 1][1 + ECSQ_TAB_VALS_SIZE] = }; /* table for uniform coding of absolute values in {0, +-1} */ -static const uint16_t ECSQ_tab_abs_1bit[1 + 2] = +//Q0 +static const UWord16 ECSQ_tab_abs_1bit[1 + 2] = { 16384, 10922, 0 }; /* table for uniform coding of absolute values in {0, +-1, +-2, +-3} */ -static const uint16_t ECSQ_tab_abs_2bit[1 + 4] = +static const UWord16 ECSQ_tab_abs_2bit[1 + 4] = { 16384, 14046, 9364, 4682, 0 }; /* table for uniform coding of absolute values in {0, +-1, ..., +-7} */ -static const uint16_t ECSQ_tab_abs_3bit[1 + 8] = +static const UWord16 ECSQ_tab_abs_3bit[1 + 8] = { 16384, 15288, 13104, 10920, 8736, 6552, 4368, 2184, 0 }; /* table for uniform coding of absolute values in {0, +-1, ..., +-15} */ -static const uint16_t ECSQ_tab_abs_4bit[1 + 16] = +static const UWord16 ECSQ_tab_abs_4bit[1 + 16] = { 16384, 15870, 14812, 13754, 12696, 11638, 10580, 9522, 8464, 7406, 6348, 5290, 4232, 3174, 2116, 1058, 0 }; /* array of tables for uniform coding of absolute values */ -const uint16_t * const ECSQ_tab_abs_lsbs[1 + 4] = +const UWord16 * const ECSQ_tab_abs_lsbs[1 + 4] = { NULL, ECSQ_tab_abs_1bit, ECSQ_tab_abs_2bit, ECSQ_tab_abs_3bit, ECSQ_tab_abs_4bit }; @@ -747,7 +744,7 @@ const HUFF_TABLE huff_beta_table = } }; -const int16_t mc_paramupmix_fb_remix_order[4] = {0, 1, 2, 3}; +const Word16 mc_paramupmix_fb_remix_order[4] = {0, 1, 2, 3}; /*----------------------------------------------------------------------------------* * ParamMC ROM tables diff --git a/lib_enc/ivas_stat_enc.h b/lib_enc/ivas_stat_enc.h index 64338184e..7bb5a28e0 100644 --- a/lib_enc/ivas_stat_enc.h +++ b/lib_enc/ivas_stat_enc.h @@ -116,15 +116,15 @@ typedef struct dft_ana_struct /* State of the range encoder */ typedef struct { - uint32_t rc_low; - uint32_t rc_range; - int16_t rc_cache; - int16_t rc_carry; - int16_t rc_carry_count; + UWord32 rc_low; + UWord32 rc_range; + Word16 rc_cache; + Word16 rc_carry; + Word16 rc_carry_count; - uint8_t byte_buffer[RANGE_UNI_BUFFER_BYTES_MAX]; - int16_t byte_count; - int16_t last_byte_bit_count; + UWord8 byte_buffer[RANGE_UNI_BUFFER_BYTES_MAX]; + Word16 byte_count; + Word16 last_byte_bit_count; } RangeUniEncState; @@ -313,23 +313,23 @@ typedef struct stereo_mdct_enc_data_structure STEREO_MDCT_BAND_PARAMETERS stbParamsTCX20afterACELP; /* stereo frequency band parameters for transition frames */ /* only intraframe */ - int16_t mdct_stereo_mode[2]; /* mdct stereo mode: LR, MS, band-wise MS */ + Word16 mdct_stereo_mode[2]; /* mdct stereo mode: LR, MS, band-wise MS */ #ifdef DEBUGGING - int16_t mdct_stereo_mode_cmdl; /* MDCT stereo mode from command-line */ - int16_t fDualMono; /* force dual mono in MDCT stereo mode */ - int16_t fMSstereo; /* force full-band MS in MDCT stereo mode */ + Word16 mdct_stereo_mode_cmdl; /* MDCT stereo mode from command-line */ + Word16 fDualMono; /* force dual mono in MDCT stereo mode */ + Word16 fMSstereo; /* force full-band MS in MDCT stereo mode */ #endif - int16_t global_ild[2]; /* Quantized ILD for the whole spectrum */ - int16_t split_ratio; /* Ratio of bitrate (1 to 7), split_ratio = 8 * 1st chn bitrate / (1st + 2nd chn bitrate) */ + Word16 global_ild[2]; /* Quantized ILD for the whole spectrum */ + Word16 split_ratio; /* Ratio of bitrate (1 to 7), split_ratio = 8 * 1st chn bitrate / (1st + 2nd chn bitrate) */ - int16_t IGFStereoMode[2]; /* MDCT stereo mode for IGF */ + Word16 IGFStereoMode[2]; /* MDCT stereo mode for IGF */ ITD_DATA_HANDLE hItd; DFT_ANA_HANDLE hDft_ana; - int16_t sw_uncorr; + Word16 sw_uncorr; - int16_t isSBAStereoMode; + Word16 isSBAStereoMode; } STEREO_MDCT_ENC_DATA, *STEREO_MDCT_ENC_DATA_HANDLE; @@ -395,17 +395,16 @@ typedef struct stereo_td_enc_data_structure Word16 tdm_SM_modi_flag; /* Flag that indicates to modify ratio */ Word16 tdm_SM_reset_flag; /* Flag that indicates to reset the parameters for SM mode */ - Word16 tdm_FD2LRTD_SW_cnt; /* Count the number of frames following a FD to LRTD switching */ - Word16 tdm_LRTD_flag; /* LRTD stereo mode flag */ - Word16 prev_fr_LRTD_TD_dec; /* At the beginning of a frame, contains the previous LRTD decision that might have been modified during last frame */ - Word16 tdm_inst_ratio_idx; /* Instantaneous correlation ratio index */ - Word16 tdm_last_inst_ratio_idx; /* previous frame instantaneous correlation ratio index */ - Word16 tdm_vad_hangover_cnt; /* Count the number of frames where hangover_cnt >= 5 in both primary and secondary channel */ - Word16 tdm_ini_frame_cnt; /* Count the number of frame to decide how to evaluate the local VAD of primary and secondary channel */ - Word16 tdm_last_LRTD_frame_cnt; /* Count the number of frame since the last LRTD frame */ - Word16 tdm_last_LRTD_PriCh_cnt; /* Count the number of frame since the primary channel changed */ - Word16 flag_skip_DMX; /* flag that indicates whether the TD downmixing is skipped */ - // Word32 tdm_Pri_pitch_buf_fx[NB_SUBFR]; + Word16 tdm_FD2LRTD_SW_cnt; /* Count the number of frames following a FD to LRTD switching */ + Word16 tdm_LRTD_flag; /* LRTD stereo mode flag */ + Word16 prev_fr_LRTD_TD_dec; /* At the beginning of a frame, contains the previous LRTD decision that might have been modified during last frame */ + Word16 tdm_inst_ratio_idx; /* Instantaneous correlation ratio index */ + Word16 tdm_last_inst_ratio_idx; /* previous frame instantaneous correlation ratio index */ + Word16 tdm_vad_hangover_cnt; /* Count the number of frames where hangover_cnt >= 5 in both primary and secondary channel */ + Word16 tdm_ini_frame_cnt; /* Count the number of frame to decide how to evaluate the local VAD of primary and secondary channel */ + Word16 tdm_last_LRTD_frame_cnt; /* Count the number of frame since the last LRTD frame */ + Word16 tdm_last_LRTD_PriCh_cnt; /* Count the number of frame since the primary channel changed */ + Word16 flag_skip_DMX; /* flag that indicates whether the TD downmixing is skipped */ Word16 tdm_Pri_pitch_buf_fx[NB_SUBFR]; // Q6 } STEREO_TD_ENC_DATA, *STEREO_TD_ENC_DATA_HANDLE; @@ -452,7 +451,6 @@ typedef struct stereo_tca_enc_data_structure Word32 C_mem_fx[2 * L_NCSHIFT_DS + 1]; Word16 C_mem_exp[2 * L_NCSHIFT_DS + 1]; Word32 E1_mem_fx, E2_mem_fx; - // Word16 E1_E2_mem_exp; Word16 E1_mem_exp; Word16 E2_mem_exp; Word32 delay_0_mem_fx[MAX_DELAYREGLEN]; @@ -645,7 +643,6 @@ typedef struct front_vad_enc Word16 buffer_12k8_fx[3 * L_FRAME / 2]; Word16 q_mem_decim; Word16 q_buffer_12k8; - // Word32 buffer_12k8_fx[3 * L_FRAME / 2]; /* 12k8 signal buffer */ } FRONT_VAD_ENC, *FRONT_VAD_ENC_HANDLE; @@ -729,9 +726,9 @@ typedef struct ivas_enc_cov_handler_state_t { ivas_cov_smooth_state_t *pCov_state; ivas_cov_smooth_state_t *pCov_dtx_state; - int16_t num_bins; - int16_t prior_dtx_present; - int16_t prior_var_flag; + Word16 num_bins; + Word16 prior_dtx_present; + Word16 prior_var_flag; Word32 bb_var_lt_fx[FOA_CHANNELS]; } ivas_enc_cov_handler_state_t; @@ -785,10 +782,6 @@ typedef struct ivas_spar_enc_lib_t Word32 core_nominal_brate; /* Nominal bitrate for core coding */ FRONT_VAD_ENC_HANDLE hFrontVad; /* front-VAD handle */ ENC_CORE_HANDLE hCoreCoderVAD; /* core-coder handle for front-VAD module */ -#ifndef EVS_FLOAT_ENC - // ENC_CORE_HANDLE_FX hCoreCoderVAD_fx; /* core coder handle */ -#endif - Word16 spar_reconfig_flag; Word16 front_vad_flag; Word16 front_vad_dtx_flag; @@ -859,23 +852,23 @@ typedef struct ivas_omasa_enc_state_structure Word16 broadband_energy_prev_e; /*exponent for broadband_energy_prev_fx*/ Word16 fade_out_gain_fx[L_FRAME48k]; /*q15*/ Word16 fade_in_gain_fx[L_FRAME48k]; /*q15*/ - uint8_t nbands; - uint8_t nCodingBands; - uint8_t nSubframes; + UWord8 nbands; + UWord8 nCodingBands; + UWord8 nSubframes; /* CLDFB analysis */ - int16_t num_Cldfb_instances; + Word16 num_Cldfb_instances; HANDLE_CLDFB_FILTER_BANK cldfbAnaEnc[MAX_NUM_OBJECTS]; /* DirAC parameter estimation */ - int16_t band_grouping[MASA_FREQUENCY_BANDS + 1]; - int16_t block_grouping[5]; + Word16 band_grouping[MASA_FREQUENCY_BANDS + 1]; + Word16 block_grouping[5]; /* diffuseness */ - int16_t index_buffer_intensity; + Word16 index_buffer_intensity; - int16_t prev_selected_object; - uint8_t changing_object; + Word16 prev_selected_object; + UWord8 changing_object; } OMASA_ENC_STATE, *OMASA_ENC_HANDLE; @@ -912,8 +905,8 @@ typedef struct ivas_masa_dir_align_struct typedef struct ivas_masa_sync_struct { MASA_METADATA_FRAME previous_metadata; - uint8_t prev_sim_stop; - uint8_t prev_offset; + UWord8 prev_sim_stop; + UWord8 prev_offset; MASA_FRAME_MODE frame_mode; } MASA_SYNC_STATE, *MASA_SYNC_HANDLE; @@ -961,15 +954,15 @@ typedef struct ivas_masa_encoder_struct typedef struct ivas_mcmasa_enc_data_structure { - int16_t nbands; - int16_t nCodingBands; + Word16 nbands; + Word16 nCodingBands; /* delay compensation */ Word32 *delay_buffer_lfe[2]; /* Delay buffer for LFE estimation */ - int16_t num_samples_delay_comp; - int16_t num_slots_delay_comp; - int16_t offset_comp; + Word16 num_samples_delay_comp; + Word16 num_slots_delay_comp; + Word16 offset_comp; IVAS_FB_MIXER_HANDLE hFbMixer; IVAS_FB_MIXER_HANDLE hFbMixerLfe; @@ -977,12 +970,12 @@ typedef struct ivas_mcmasa_enc_data_structure /* DirAC parameter estimation */ Word32 **direction_vector_m_fx[DIRAC_NUM_DIMS]; /* Average direction vector */ Word16 **direction_vector_e[DIRAC_NUM_DIMS]; /* Average direction vector */ - int16_t band_grouping[MASA_FREQUENCY_BANDS + 1]; - int16_t block_grouping[5]; + Word16 band_grouping[MASA_FREQUENCY_BANDS + 1]; + Word16 block_grouping[5]; /* diffuseness */ - int16_t index_buffer_intensity; - int8_t no_col_avg_diff; + Word16 index_buffer_intensity; + Word8 no_col_avg_diff; Word32 **buffer_intensity_real_fx[DIRAC_NUM_DIMS]; Word16 buffer_intensity_real_q[DIRAC_NO_COL_AVG_DIFF]; Word32 **buffer_intensity_real_vert_fx; @@ -993,11 +986,11 @@ typedef struct ivas_mcmasa_enc_data_structure Word32 chnlToFoaEvenMtx_fx[FOA_CHANNELS][MCMASA_MAX_ANA_CHANS]; Word32 ls_azimuth_fx[MCMASA_MAX_ANA_CHANS]; - int16_t leftNearest[MCMASA_MAX_ANA_CHANS]; - int16_t rightNearest[MCMASA_MAX_ANA_CHANS]; - int16_t numHorizontalChannels; - uint8_t isHorizontalSetup; - uint8_t combineRatios; + Word16 leftNearest[MCMASA_MAX_ANA_CHANS]; + Word16 rightNearest[MCMASA_MAX_ANA_CHANS]; + Word16 numHorizontalChannels; + UWord8 isHorizontalSetup; + UWord8 combineRatios; Word32 prevMultiChEne_fx; Word16 prevMultiChEne_e; @@ -1007,8 +1000,8 @@ typedef struct ivas_mcmasa_enc_data_structure Word16 prevEQ_e; Word16 interpolator_fx[L_FRAME48k]; - uint8_t separateChannelEnabled; - int16_t separateChannelIndex; + UWord8 separateChannelEnabled; + Word16 separateChannelIndex; /* LFE coding */ Word32 lfeLfEne[MAX_PARAM_SPATIAL_SUBFRAMES]; @@ -1016,9 +1009,9 @@ typedef struct ivas_mcmasa_enc_data_structure Word32 totalLfEne[MAX_PARAM_SPATIAL_SUBFRAMES]; Word16 totalLfEne_e[MAX_PARAM_SPATIAL_SUBFRAMES]; Word32 *lfeAnaRingBuffer[2]; - int16_t ringBufferPointer; + Word16 ringBufferPointer; Word32 lowpassSum[2]; - int16_t ringBufferSize; + Word16 ringBufferSize; } MCMASA_ENC_DATA, *MCMASA_ENC_HANDLE; @@ -1033,7 +1026,7 @@ typedef struct ivas_osba_enc_data_structure Word32 interpolator_fx[L_FRAME48k]; Word32 prev_object_dm_gains_fx[MAX_NUM_OBJECTS][MAX_INPUT_CHANNELS]; - int16_t nchan_ism; + Word16 nchan_ism; Word32 *input_data_mem_fx[MAX_NUM_OBJECTS]; } OSBA_ENC_DATA, *OSBA_ENC_HANDLE; @@ -1101,14 +1094,12 @@ typedef struct cpe_enc_data_structure STEREO_CNG_ENC_HANDLE hStereoCng; /* Stereo CNG data structure */ FRONT_VAD_ENC_HANDLE hFrontVad[CPE_CHANNELS]; - // float *input_mem[CPE_CHANNELS]; /* input channels buffers memory; needed to be up-to-date for TD->DFT stereo switching */ - Word32 brate_surplus; /* bitrate surplus for bitrate adaptation in combined format coding */ Word16 *input_mem_fx[CPE_CHANNELS]; /* input channels buffers memory; needed to be up-to-date for TD->DFT stereo switching */ Word16 q_input_mem[CPE_CHANNELS]; #ifdef DEBUGGING - int16_t stereo_mode_cmdl; /* stereo mode forced from the commaand-line */ + Word16 stereo_mode_cmdl; /* stereo mode forced from the commaand-line */ #endif } CPE_ENC_DATA, *CPE_ENC_HANDLE; @@ -1120,9 +1111,9 @@ typedef struct cpe_enc_data_structure typedef struct mct_block_data_struct { - int16_t isActive; - int16_t ch1, ch2; - int16_t mask[2][MAX_SFB]; + Word16 isActive; + Word16 ch1, ch2; + Word16 mask[2][MAX_SFB]; STEREO_MDCT_ENC_DATA_HANDLE hStereoMdct; /* MDCT stereo data handle */ } MCT_BLOCK_DATA, *MCT_BLOCK_DATA_HANDLE; @@ -1131,16 +1122,16 @@ typedef struct mct_enc_data_structure { BSTR_ENC_HANDLE hBstr; /* bitstream handle for side bits - in MCT, side bits are written at the beginning of the bitstream */ - int16_t nchan_out_woLFE; /* number of active channels within multi-channel configuration */ - int16_t currBlockDataCnt; - int16_t bitsChannelPairIndex; /* bits needed to code channel pair index, depends on number of active channels */ + Word16 nchan_out_woLFE; /* number of active channels within multi-channel configuration */ + Word16 currBlockDataCnt; + Word16 bitsChannelPairIndex; /* bits needed to code channel pair index, depends on number of active channels */ MCT_BLOCK_DATA_HANDLE hBlockData[MCT_MAX_BLOCKS]; Word32 lastxCorrMatrix_fx[MCT_MAX_CHANNELS][MCT_MAX_CHANNELS]; Word16 lastxCorrMatrix_e; - int16_t lowE_ch[MCT_MAX_CHANNELS]; - uint16_t mc_global_ild[MCT_MAX_CHANNELS]; - int16_t nBitsMCT; /* number of bits spent on mct side info */ + Word16 lowE_ch[MCT_MAX_CHANNELS]; + UWord16 mc_global_ild[MCT_MAX_CHANNELS]; + Word16 nBitsMCT; /* number of bits spent on mct side info */ /* pointers to local buffers */ Word32 *p_mdst_spectrum_long_fx[MCT_MAX_BLOCKS][CPE_CHANNELS]; Word32 *p_orig_spectrum_long_fx[MCT_MAX_BLOCKS][CPE_CHANNELS]; @@ -1148,11 +1139,11 @@ typedef struct mct_enc_data_structure Word16 q_orig_spectrum_long_com; Word16 q_mdst_spectrum_long_fx[MCT_MAX_BLOCKS][CPE_CHANNELS][NB_DIV]; - int16_t tnsBits[MCT_MAX_BLOCKS][CPE_CHANNELS][NB_DIV]; /* number of tns bits in the frame */ - int16_t tnsSize[MCT_MAX_BLOCKS][CPE_CHANNELS][NB_DIV]; /* number of tns parameters put into prm */ - int16_t p_param[MCT_MAX_BLOCKS][CPE_CHANNELS][NB_DIV]; + Word16 tnsBits[MCT_MAX_BLOCKS][CPE_CHANNELS][NB_DIV]; /* number of tns bits in the frame */ + Word16 tnsSize[MCT_MAX_BLOCKS][CPE_CHANNELS][NB_DIV]; /* number of tns parameters put into prm */ + Word16 p_param[MCT_MAX_BLOCKS][CPE_CHANNELS][NB_DIV]; - int16_t hbr_mct; + Word16 hbr_mct; } MCT_ENC_DATA, *MCT_ENC_HANDLE; @@ -1169,9 +1160,9 @@ typedef struct stereo_dmx_evs_phase_only_correlation_structure Word32 peak_width_fx[CPE_CHANNELS]; // Q16 Word32 confidence_fx; // Q31 - int16_t ispeak[CPE_CHANNELS]; - int16_t itdLR[CPE_CHANNELS]; - int16_t shift_limit; + Word16 ispeak[CPE_CHANNELS]; + Word16 itdLR[CPE_CHANNELS]; + Word16 shift_limit; const Word32 *wnd_fx; @@ -1182,7 +1173,7 @@ typedef struct stereo_dmx_evs_phase_only_correlation_structure typedef struct stereo_dmx_evs_correlation_filter_structure { - int16_t init_frmCntr; + Word16 init_frmCntr; Word16 isd_rate_s_fx; // Q15 Word32 iccr_s_fx; // Q31 @@ -1191,8 +1182,8 @@ typedef struct stereo_dmx_evs_correlation_filter_structure Word32 Pi_fx[STEREO_DMX_EVS_NB_SUBBAND_MAX]; // Q31 Word16 rfft_ipd_coef_fx[L_FRAME48k / 2 + 1]; - int16_t pha_len; - int16_t fad_len; + Word16 pha_len; + Word16 fad_len; Word16 win_fx[STEREO_DMX_EVS_PHA_LEN_MAX]; // Q14 @@ -1204,14 +1195,14 @@ typedef struct stereo_dmx_evs_correlation_filter_structure STEREO_DMX_EVS_PHA curr_pha; STEREO_DMX_EVS_PHA prev_pha; - int16_t pha_hys_cnt; + Word16 pha_hys_cnt; - int16_t prc_thres; + Word16 prc_thres; STEREO_DMX_EVS_PRC curr_prc; STEREO_DMX_EVS_PRC prev_prc; - int16_t prc_hys_cnt; + Word16 prc_hys_cnt; Word32 fad_g_prc_fx[L_FRAME48k]; // Q31 - int16_t fad_len_prc; + Word16 fad_len_prc; Word32 trns_aux_energy_fx[CPE_CHANNELS]; @@ -1248,9 +1239,9 @@ typedef struct ivas_lfe_enc_data_structure { LFE_WINDOW_HANDLE pWindow_state; BSTR_ENC_HANDLE hBstr; /* pointer to encoder bitstream handle */ - const uint16_t *cum_freq_models[IVAS_MAX_NUM_QUANT_STRATS][IVAS_MAX_NUM_DCT_COEF_GROUPS]; - int16_t lfe_enc_indices_coeffs_tbl[IVAS_MAX_NUM_QUANT_STRATS][IVAS_MAX_NUM_DCT_COEF_GROUPS]; - int16_t lfe_bits; + const UWord16 *cum_freq_models[IVAS_MAX_NUM_QUANT_STRATS][IVAS_MAX_NUM_DCT_COEF_GROUPS]; + Word16 lfe_enc_indices_coeffs_tbl[IVAS_MAX_NUM_QUANT_STRATS][IVAS_MAX_NUM_DCT_COEF_GROUPS]; + Word16 lfe_bits; Word32 *old_wtda_audio_fx; Word16 q_old_wtda_audio; @@ -1298,9 +1289,9 @@ typedef struct encoder_config_structure #ifdef DEBUGGING /* debugging options */ - int16_t stereo_mode_cmdl; /* stereo mode forced from the command-line */ - int16_t force; /* parameter to force specific "core" of the Core-Coder*/ - int16_t mdct_stereo_mode_cmdl; /* mdct stereo mode forced from command-line, employed only when DEBUG_FORCE_MDCT_STEREO_MODE is activated */ + Word16 stereo_mode_cmdl; /* stereo mode forced from the command-line */ + Word16 force; /* parameter to force specific "core" of the Core-Coder*/ + Word16 mdct_stereo_mode_cmdl; /* mdct stereo mode forced from command-line, employed only when DEBUG_FORCE_MDCT_STEREO_MODE is activated */ #endif diff --git a/lib_enc/ivas_stereo_mdct_core_enc.c b/lib_enc/ivas_stereo_mdct_core_enc.c index bf5496096..8ebcbf570 100644 --- a/lib_enc/ivas_stereo_mdct_core_enc.c +++ b/lib_enc/ivas_stereo_mdct_core_enc.c @@ -628,7 +628,7 @@ void stereo_mdct_core_enc_fx( hIGFEnc[1]->spec_be_igf_e = p_orig_spectrum_e[1]; move16(); ProcessStereoIGF_fx( hStereoMdct, sts, ms_mask, - orig_spectrum_fx, + orig_spectrum_fx, sub( Q31, p_orig_spectrum_e[0] ), sub( Q31, p_orig_spectrum_e[1] ), p_powerSpec_fx, powerSpecMsInv_fx, inv_spectrum_fx, n, hCPE->hCoreCoder[0]->sp_aud_decision0, hCPE->hCoreCoder[0]->element_brate ); } ELSE diff --git a/lib_enc/nois_est_fx.c b/lib_enc/nois_est_fx.c index 6ef980166..b01121f1f 100644 --- a/lib_enc/nois_est_fx.c +++ b/lib_enc/nois_est_fx.c @@ -748,6 +748,116 @@ void noise_est_down_fx( return; } +void noise_est_down_ivas_fx( + const Word32 fr_bands[], /* i : per band input energy (contains 2 vectors) q_fr_bands */ + const Word16 q_fr_bands, /* i : Q of fr_bands */ + Word32 bckr[], /* i/o: per band background noise energy estimate q_fr_bands */ + Word32 tmpN[], /* o : temporary noise update q_fr_bands */ + Word32 enr[], /* o : averaged energy over both subframes */ + const Word16 min_band, /* i : minimum critical band */ + const Word16 max_band, /* i : maximum critical band */ + Word16 *totalNoise, /* o : noise estimate over all critical bands */ + Word16 Etot, /* i : Energy of current frame */ + Word16 *Etot_last, /* i/o: Energy of last frame Q8 */ + Word16 *Etot_v_h2 /* i/o: Energy variations of noise frames Q8 */ +) + +{ + Word32 Ltmp, L_tmp; + const Word32 *pt1, *pt2; + Word16 i; + Word16 e_Noise, f_Noise; + Word32 e_min; + Word32 totalNoise_temp; + Word32 L_Etot, L_Etot_last, L_Etot_v_h2, L_Etot_v; + Word64 sum; + + e_min = L_shl( 7516193 /* 0.0035f in Q31 */, sub( q_fr_bands, Q31 ) ); // q_fr_bands + + L_Etot = L_shl( Etot, 16 ); /*Q24 for later AR1 computations*/ + L_Etot_last = L_shl( *Etot_last, 16 ); + L_Etot_v_h2 = L_shl( *Etot_v_h2, 16 ); + + /*-----------------------------------------------------------------* + * Estimate total noise energy + *-----------------------------------------------------------------*/ + + totalNoise_temp = L_deposit_l( 0 ); + sum = 0; + move64(); + FOR( i = min_band; i <= max_band; i++ ) + { + sum = W_mac_32_32( sum, bckr[i], 1 ); // q_fr_bands+1 + } + if ( sum == 0 ) + { + sum = 1; /* make sure log2_norm_lc does not cause table reading out of bounds */ + move64(); + } + e_Noise = W_norm( sum ); + totalNoise_temp = W_extract_h( W_shl( sum, e_Noise ) ); // q_fr_bands+e_Noise-31 + e_Noise = sub( 62, add( e_Noise, q_fr_bands ) ); // 31-(q_fr_bands+e_Noise-31) + + /*totalNoise = 10.0f * (float)log10( *totalNoise );*/ + f_Noise = Log2_norm_lc( totalNoise_temp ); // exponent of log => 30-0 = 30 + e_Noise = sub( e_Noise, 1 ); // 30-(31-e_Noise) = e_Noise-1 + Ltmp = L_mac( L_deposit_h( e_Noise ), f_Noise, 1 ); // Q16 + Ltmp = Mpy_32_16_1( Ltmp, LG10 ); // Q14 (16+13-15) + Ltmp = L_shl( Ltmp, 10 ); // Q26 + *totalNoise = round_fx( Ltmp ); /*Q8*/ + move16(); + + /*-----------------------------------------------------------------* + * Average energy per frame for each frequency band + *-----------------------------------------------------------------*/ + + pt1 = fr_bands; + pt2 = fr_bands + NB_BANDS; + + FOR( i = 0; i < NB_BANDS; i++ ) + { + /* enr[i] = 0.5f * ( *pt1++ + *pt2++ ); */ + enr[i] = W_extract_h( W_mac_32_32( W_mult_32_32( *pt1, ONE_IN_Q30 ), *pt2, ONE_IN_Q30 ) ); // q_fr_bands+30+1-32+1 = q_fr_bands + move32(); + pt1++; + pt2++; + } + + /*-----------------------------------------------------------------* + * Background noise energy update + *-----------------------------------------------------------------*/ + + FOR( i = 0; i < NB_BANDS; i++ ) + { + /* tmpN[i] = (1-ALPHA) * bckr[i] + ALPHA * enr[i]; */ + /* handle div by zero in find_tilt_fx */ + tmpN[i] = L_max( Madd_32_16( Mpy_32_16_1( bckr[i], ALPHAM1_FX ), enr[i], ALPHA_FX ), e_min ); // q_fr_bands + move32(); + + /* if( tmpN[i] < bckr[i] ) { bckr[i] = tmpN[i]; }*/ + /* Defend to increase noise estimate: keep as it is or decrease */ + bckr[i] = L_max( L_min( bckr[i], tmpN[i] ), e_min ); // q_fr_bands + move32(); + } + + /*------------------------------------------------------------------* + * Energy variation update + *------------------------------------------------------------------*/ + /*Etot_v = (float) fabs(*Etot_last - Etot);*/ + L_Etot_v = L_abs( L_sub( L_Etot_last, L_Etot ) ); /* Q24 */ + + /* *Etot_v_h2 = (1.0f-0.02f) * *Etot_v_h2 + 0.02f * min(3.0f, Etot_v); */ + L_tmp = L_min( 50331648 /* 3.0f in Q24 */, L_Etot_v ); // Q24 + L_tmp = Mult_32_16( L_tmp, 655 /*.02 in Q15 */ ); // Q24 + L_Etot_v_h2 = Madd_32_16( L_tmp, L_Etot_v_h2, 32113 /* 0.98 in Q15 */ ); // Q24 + + /* if (*Etot_v_h2 < 0.1f) { *Etot_v_h2 = 0.1f; } */ + *Etot_v_h2 = s_max( round_fx( L_Etot_v_h2 ), 26 /* 0.1 in Q8*/ ); // Q8 + move16(); + + return; +} + /*-----------------------------------------------------------------* * noise_est_fx() * diff --git a/lib_enc/prot_fx_enc.h b/lib_enc/prot_fx_enc.h index c587b96a6..900e4960c 100644 --- a/lib_enc/prot_fx_enc.h +++ b/lib_enc/prot_fx_enc.h @@ -353,6 +353,20 @@ void noise_est_down_fx( const Word32 e_min /* i : minimum energy scaled Q_new + QSCALE */ ); +void noise_est_down_ivas_fx( + const Word32 fr_bands[], /* i : per band i energy (contains 2 vectors) */ + const Word16 q_fr_bands, /* i : Q of fr_bands */ + Word32 bckr[], /* i/o: per band background noise energy estimate */ + Word32 tmpN[], /* o : temporary noise update */ + Word32 enr[], /* o : averaged energy over both subframes */ + const Word16 min_band, /* i : minimum critical band */ + const Word16 max_band, /* i : maximum critical band */ + Word16 *totalNoise, /* o : noise estimate over all critical bands */ + Word16 Etot, /* i : Energy of current frame */ + Word16 *Etot_last, /* i/o: Energy of last frame Q8 */ + Word16 *Etot_v_h2 /* i/o: Energy variations of noise frames Q8 */ +); + void noise_est_fx( Encoder_State *st_fx, /* i/o: state structure */ const Word16 old_pitch1, /* i : previous frame OL pitch[1] */ @@ -783,14 +797,14 @@ Word16 wb_vad_fx( Word16 wb_vad_ivas_fx( Encoder_State *st_fx, /* i/o: encoder state structure */ - const Word32 fr_bands[], /* i : per band i energy (contains 2 vectors) Q_new+QSCALE*/ + const Word32 fr_bands[], /* i : per band i energy (contains 2 vectors) q_fr_bands*/ + const Word16 q_fr_bands, /* i : Q of fr_bands */ Word16 *noisy_speech_HO, /* o : SC-VBR noisy speech HO flag */ Word16 *clean_speech_HO, /* o : SC-VBR clean speech HO flag */ Word16 *NB_speech_HO, /* o : SC-VBR NB speech HO flag */ Word16 *snr_sum_he, /* o : Output snr_sum as weighted spectral measure*/ Word16 *localVAD_HE_SAD, Word16 *flag_noisy_speech_snr, /* o : */ - const Word16 Q_new, /* i : scaling factor Q0 */ VAD_HANDLE hVAD, /* i/o: VAD data handle */ NOISE_EST_HANDLE hNoiseEst, /* i : Noise estimation handle */ Word16 lp_speech, /* i : long term active speech energy average */ @@ -2160,28 +2174,8 @@ void analy_sp_fx( Word32 *band_energies, /* o : energy in critical frequency bands without minimum noise floor MODE2_E_MIN */ Word16 *fft_buff /* o : FFT coefficients */ ); -void ivas_analy_sp_fx( - const Word16 element_mode, /* i : element mode */ - CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ - const Word32 input_Fs, /* i : input sampling rate */ - Word16 *speech, /* i : speech buffer Q_new - preemph_bits */ - const Word16 Q_new, /* i : current scaling exp Q0 */ - Word32 *fr_bands, /* o : energy in critical frequency bands Q_new + QSCALE */ - Word32 *lf_E, /* o : per bin E for first... Q_new + QSCALE - 2*/ - Word16 *Etot, /* o : total input energy Q8 */ - const Word16 min_band, /* i : minimum critical band Q0 */ - const Word16 max_band, /* i : maximum critical band Q0 */ - const Word32 e_min_scaled, /* i : minimum energy scaled Q_new + QSCALE */ - Word16 Scale_fac[2], /* o : FFT scales factors (2 values by frame) Q0 */ - Word32 *Bin_E, /* o : per-bin energy spectrum Q7 */ - Word32 *Bin_E_old, /* o : per-bin energy spectrum of the previous frame Q7 */ - Word32 *PS, /* o : per-bin energy spectrum Q_new + QSCALE */ - Word16 *EspecdB, /* o : per-bin log energy spectrum (with f=0) Q7 */ - Word32 *band_energies, /* o : energy in critical frequency bands without minimum noise floor MODE2_E_MIN (Q_new + QSCALE + 2)*/ - Word16 *fft_buff /* o : FFT coefficients (Q_new + QSCALE + 2) */ -); -void ivas_analy_sp_fx_front( +void ivas_analy_sp_fx( const Word16 element_mode, /* i : element mode */ CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ const Word32 input_Fs, /* i : input sampling rate */ @@ -2194,8 +2188,6 @@ void ivas_analy_sp_fx_front( Word16 *Etot, /* o : total input energy Q8 */ const Word16 min_band, /* i : minimum critical band Q0 */ const Word16 max_band, /* i : maximum critical band Q0 */ - const Word32 e_min_scaled, /* i : minimum energy scaled Q_new + QSCALE */ - Word16 Scale_fac[2], /* o : FFT scales factors (2 values by frame) Q0 */ Word32 *Bin_E, /* o : per-bin energy spectrum q_Bin_E */ Word16 *q_Bin_E, /* o : Q of per-bin energy spectrum Q0 */ Word32 *Bin_E_old, /* o : per-bin energy spectrum of the previous frame q_Bin_E_old */ @@ -2203,9 +2195,10 @@ void ivas_analy_sp_fx_front( Word32 *PS, /* o : per-bin energy spectrum q_PS */ Word16 *q_PS, /* o : Q of per-bin energy spectrum Q0 */ Word16 *EspecdB, /* o : per-bin log energy spectrum (with f=0) Q7 */ - Word32 *band_energies, /* o : energy in critical frequency bands without minimum noise floor MODE2_E_MIN (band_energies_exp)*/ - Word16 *band_energies_exp, /* o : exponent of energy in critical frequency bands without minimum noise floor MODE2_E_MIN */ - Word16 *fft_buff /* o : FFT coefficients (Q_new + Scale_fac[i_subfr]) */ + Word32 *band_energies, /* o : energy in critical frequency bands without minimum noise floor MODE2_E_MIN (q_band_energies)*/ + Word16 *q_band_energies, /* o : Q of energy in critical frequency bands without minimum noise floor MODE2_E_MIN */ + Word16 *fft_buff, /* o : FFT coefficients (q_fft_buff) */ + Word16 *q_fft_buff /* o : Q of FFT coefficients Q0 */ ); void find_wsp_fx( const Word16 Az[], diff --git a/lib_enc/tcx_utils_enc.c b/lib_enc/tcx_utils_enc.c index 2db3ce73a..4ea9a52fc 100644 --- a/lib_enc/tcx_utils_enc.c +++ b/lib_enc/tcx_utils_enc.c @@ -108,7 +108,7 @@ void ProcessIGF_ivas_fx( } } - IGFSaveSpectrumForITF_ivas_fx( hIGFEnc, igfGridIdx, pITFMDCTSpectrum ); + IGFSaveSpectrumForITF_ivas_fx( hIGFEnc, igfGridIdx, pITFMDCTSpectrum, sub( Q31, *q_spectrum ) ); IGFEncApplyMono_ivas_fx( st, igfGridIdx, pMDCTSpectrum, sub( Q31, *q_spectrum ), pPowerSpectrum, sub( Q31, *q_powerSpec ), isTCX20, st->hTcxEnc->fUseTns[frameno], sp_aud_decision0, vad_hover_flag ); @@ -163,9 +163,11 @@ void ProcessIGF_ivas_fx( void ProcessStereoIGF_fx( STEREO_MDCT_ENC_DATA_HANDLE hStereoMdct, - Encoder_State *sts[CPE_CHANNELS], /* i : Encoder state */ - Word16 ms_mask[2][MAX_SFB], /* i : bandwise MS mask */ - Word32 *pITFMDCTSpectrum_fx[CPE_CHANNELS][NB_DIV], /* i : MDCT spectrum fir ITF */ + Encoder_State *sts[CPE_CHANNELS], /* i : Encoder state */ + Word16 ms_mask[2][MAX_SFB], /* i : bandwise MS mask */ + Word32 *pITFMDCTSpectrum_fx[CPE_CHANNELS][NB_DIV], /* i : MDCT spectrum fir ITF */ + Word16 q_pITFMDCTSpectrum_1, + Word16 q_pITFMDCTSpectrum_2, Word32 *pPowerSpectrum_fx[CPE_CHANNELS], /* i/o: MDCT^2 + MDST^2 spectrum, or estimate */ Word32 *pPowerSpectrumMsInv_fx[CPE_CHANNELS][NB_DIV], /* i : inverse power spectrum */ Word32 *inv_spectrum_fx[CPE_CHANNELS][NB_DIV], /* i : inverse spectrum */ @@ -207,9 +209,9 @@ void ProcessStereoIGF_fx( } move16(); - IGFSaveSpectrumForITF_ivas_fx( hIGFEnc[0], igfGridIdx, pITFMDCTSpectrum_fx[0][frameno] ); + IGFSaveSpectrumForITF_ivas_fx( hIGFEnc[0], igfGridIdx, pITFMDCTSpectrum_fx[0][frameno], sub( Q31, q_pITFMDCTSpectrum_1 ) ); - IGFSaveSpectrumForITF_ivas_fx( hIGFEnc[1], igfGridIdx, pITFMDCTSpectrum_fx[1][frameno] ); + IGFSaveSpectrumForITF_ivas_fx( hIGFEnc[1], igfGridIdx, pITFMDCTSpectrum_fx[1][frameno], sub( Q31, q_pITFMDCTSpectrum_2 ) ); IGFEncApplyStereo_fx( hStereoMdct, ms_mask, hIGFEnc, igfGridIdx, sts, pPowerSpectrum_fx, pPowerSpectrumMsInv_fx, inv_spectrum_fx, frameno, sp_aud_decision0, element_brate ); diff --git a/lib_enc/tcx_utils_enc_fx.c b/lib_enc/tcx_utils_enc_fx.c index a5889e401..0d7720f62 100644 --- a/lib_enc/tcx_utils_enc_fx.c +++ b/lib_enc/tcx_utils_enc_fx.c @@ -751,7 +751,7 @@ Word16 SQ_gain_ivas_fx( /* output: SQ gain */ s = shl( sub( x_e, s ), 1 ); /* log */ - IF( EQ_16( ener, 1 ) ) + IF( EQ_32( ener, 1 ) ) { en[i] = -131072; /* log10(0.01) in Q16 */ move32(); diff --git a/lib_enc/vad_fx.c b/lib_enc/vad_fx.c index 21bf16087..6ab97954b 100644 --- a/lib_enc/vad_fx.c +++ b/lib_enc/vad_fx.c @@ -1904,14 +1904,14 @@ Word16 wb_vad_fx( Word16 wb_vad_ivas_fx( Encoder_State *st_fx, /* i/o: encoder state structure */ - const Word32 fr_bands[], /* i : per band input energy (contains 2 vectors) Q_new+QSCALE*/ + const Word32 fr_bands[], /* i : per band i energy (contains 2 vectors) q_fr_bands*/ + const Word16 q_fr_bands, /* i : Q of fr_bands */ Word16 *noisy_speech_HO, /* o : SC-VBR noisy speech HO flag */ Word16 *clean_speech_HO, /* o : SC-VBR clean speech HO flag */ Word16 *NB_speech_HO, /* o : SC-VBR NB speech HO flag */ Word16 *snr_sum_he, /* o : Output snr_sum as weighted spectral measure*/ Word16 *localVAD_HE_SAD, Word16 *flag_noisy_speech_snr, /* o : */ - const Word16 Q_new, /* i : scaling factor Q0 */ VAD_HANDLE hVAD, /* i/o: VAD data handle */ NOISE_EST_HANDLE hNoiseEst, /* i : Noise estimation handle */ Word16 lp_speech_fx, /* i : long term active speech energy average */ @@ -2257,17 +2257,7 @@ Word16 wb_vad_ivas_fx( } ELSE { - e_num = norm_l( L_tmp1 ); - m_num = extract_h( L_shl( L_tmp1, e_num ) ); - - /* if bckr[i] == 0; approx. L_snr */ - e_noise = add( 30 + 1, abs_s( Q_new ) ); - - m_num = shr( m_num, 1 ); - shift_snr = add( sub( e_num, e_noise ), 15 - 4 ); - - snr_tmp = div_s( m_num, 32767 ); - L_snr = L_shr_o( snr_tmp, shift_snr, &Overflow ); /*L_snr in Q4*/ + L_snr = L_shr_o( L_tmp1, sub( Q3, q_fr_bands ), &Overflow ); // q_fr_bands+1 -> Q4 } } ELSE @@ -2294,17 +2284,7 @@ Word16 wb_vad_ivas_fx( } ELSE { - e_num = norm_l( L_tmp2 ); - m_num = extract_h( L_shl( L_tmp2, e_num ) ); - - /* if bckr[i] == 0; approx. L_snr */ - e_noise = add( 30 + 1, abs_s( Q_new ) ); - - m_num = shr( m_num, 1 ); - shift_snr = add( sub( e_num, e_noise ), 15 - 4 ); - - snr_tmp = div_s( m_num, 32767 ); - L_snr = L_shr_o( snr_tmp, shift_snr, &Overflow ); /*L_snr in Q4*/ + L_snr = L_shr_o( L_tmp2, sub( Q3, q_fr_bands ), &Overflow ); // q_fr_bands+1 -> Q4 } } @@ -2436,17 +2416,7 @@ Word16 wb_vad_ivas_fx( } ELSE { - e_num = norm_l( L_tmp1 ); - m_num = extract_h( L_shl( L_tmp1, e_num ) ); - - /* if bckr[i] == 0; approx. L_snr */ - e_noise = add( 30 + 1, abs_s( Q_new ) ); - - m_num = shr( m_num, 1 ); - shift_snr = add( sub( e_num, e_noise ), 15 - 4 ); - - snr_tmp = div_s( m_num, 32767 ); - L_snr = L_shr_o( snr_tmp, shift_snr, &Overflow ); /*L_snr in Q4*/ + L_snr = L_shr_o( L_tmp1, sub( Q3, q_fr_bands ), &Overflow ); // q_fr_bands+1 -> Q4 } @@ -2471,17 +2441,17 @@ Word16 wb_vad_ivas_fx( IF( LT_16( i, 3 ) ) { #ifdef BASOP_NOGLOB - L_accum_ener_L = L_add_o( L_accum_ener_L, hNoiseEst->bckr_fx[i], &Overflow ); /*Q_new+QSCALE */ + L_accum_ener_L = L_add_o( L_accum_ener_L, hNoiseEst->bckr_fx[i], &Overflow ); /* hNoiseEst->q_bckr */ #else - L_accum_ener_L = L_add( L_accum_ener_L, hNoiseEst->bckr_fx[i] ); /*Q_new+QSCALE */ + L_accum_ener_L = L_add( L_accum_ener_L, hNoiseEst->bckr_fx[i] ); /*hNoiseEst->q_bckr */ #endif } ELSE { #ifdef BASOP_NOGLOB - L_accum_ener_H = L_add_o( L_accum_ener_H, hNoiseEst->bckr_fx[i], &Overflow ); /*Q_new+QSCALE */ + L_accum_ener_H = L_add_o( L_accum_ener_H, hNoiseEst->bckr_fx[i], &Overflow ); /*hNoiseEst->q_bckr */ #else - L_accum_ener_H = L_add( L_accum_ener_H, hNoiseEst->bckr_fx[i] ); /*Q_new+QSCALE */ + L_accum_ener_H = L_add( L_accum_ener_H, hNoiseEst->bckr_fx[i] ); /*hNoiseEst->q_bckr */ #endif } -- GitLab